diff --git a/.github/actions/nf-test/action.yml b/.github/actions/nf-test/action.yml
index 243e7823..bf44d961 100644
--- a/.github/actions/nf-test/action.yml
+++ b/.github/actions/nf-test/action.yml
@@ -54,13 +54,9 @@ runs:
conda-solver: libmamba
conda-remove-defaults: true
- # TODO Skip failing conda tests and document their failures
- # https://github.com/nf-core/modules/issues/7017
- name: Run nf-test
shell: bash
env:
- NFT_DIFF: ${{ env.NFT_DIFF }}
- NFT_DIFF_ARGS: ${{ env.NFT_DIFF_ARGS }}
NFT_WORKDIR: ${{ env.NFT_WORKDIR }}
run: |
nf-test test \
diff --git a/.github/workflows/linting.yml b/.github/workflows/linting.yml
index f2d7d1dd..8b0f88c3 100644
--- a/.github/workflows/linting.yml
+++ b/.github/workflows/linting.yml
@@ -13,7 +13,7 @@ jobs:
steps:
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4
- - name: Set up Python 3.12
+ - name: Set up Python 3.13
uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5
with:
python-version: "3.13"
diff --git a/.github/workflows/linting_comment.yml b/.github/workflows/linting_comment.yml
index 7e8050fb..d43797d9 100644
--- a/.github/workflows/linting_comment.yml
+++ b/.github/workflows/linting_comment.yml
@@ -11,7 +11,7 @@ jobs:
runs-on: ubuntu-latest
steps:
- name: Download lint results
- uses: dawidd6/action-download-artifact@4c1e823582f43b179e2cbb49c3eade4e41f992e2 # v10
+ uses: dawidd6/action-download-artifact@ac66b43f0e6a346234dd65d4d0c8fbb31cb316e5 # v11
with:
workflow: linting.yml
workflow_conclusion: completed
diff --git a/.github/workflows/nf-test.yml b/.github/workflows/nf-test.yml
index 55b6e759..afcd1fd0 100644
--- a/.github/workflows/nf-test.yml
+++ b/.github/workflows/nf-test.yml
@@ -1,12 +1,5 @@
name: Run nf-test
on:
- push:
- paths-ignore:
- - "docs/**"
- - "**/meta.yml"
- - "**/*.md"
- - "**/*.png"
- - "**/*.svg"
pull_request:
paths-ignore:
- "docs/**"
@@ -25,6 +18,7 @@ concurrency:
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+ NFT_TAGS: "cicd"
NFT_VER: "0.9.2"
NFT_WORKDIR: "~"
NXF_ANSI_LOG: false
@@ -35,7 +29,7 @@ jobs:
nf-test-changes:
name: nf-test-changes
runs-on: # use self-hosted runners
- - runs-on=$-nf-test-changes
+ - runs-on=${{ github.run_id }}-nf-test-changes
- runner=4cpu-linux-x64
outputs:
shard: ${{ steps.set-shards.outputs.shard }}
@@ -58,6 +52,7 @@ jobs:
NFT_VER: ${{ env.NFT_VER }}
with:
max_shards: 7
+ tags: ${{ env.NFT_TAGS }}
- name: debug
run: |
@@ -69,7 +64,7 @@ jobs:
needs: [nf-test-changes]
if: ${{ needs.nf-test-changes.outputs.total_shards != '0' }}
runs-on: # use self-hosted runners
- - runs-on=$-nf-test
+ - runs-on=${{ github.run_id }}-nf-test
- runner=4cpu-linux-x64
- disk=large
strategy:
@@ -86,7 +81,7 @@ jobs:
- isMain: false
profile: "singularity"
NXF_VER:
- - "24.04.2"
+ - "24.10.5"
- "latest-everything"
env:
NXF_ANSI_LOG: false
@@ -98,23 +93,40 @@ jobs:
fetch-depth: 0
- name: Run nf-test
+ id: run_nf_test
uses: ./.github/actions/nf-test
+ continue-on-error: ${{ matrix.NXF_VER == 'latest-everything' }}
env:
- NFT_DIFF: ${{ env.NFT_DIFF }}
- NFT_DIFF_ARGS: ${{ env.NFT_DIFF_ARGS }}
NFT_WORKDIR: ${{ env.NFT_WORKDIR }}
with:
profile: ${{ matrix.profile }}
shard: ${{ matrix.shard }}
total_shards: ${{ env.TOTAL_SHARDS }}
+ tags: ${{ env.NFT_TAGS }}
+
+ - name: Report test status
+ if: ${{ always() }}
+ run: |
+ if [[ "${{ steps.run_nf_test.outcome }}" == "failure" ]]; then
+ echo "::error::Test with ${{ matrix.NXF_VER }} failed"
+ # Add to workflow summary
+ echo "## ❌ Test failed: ${{ matrix.profile }} | ${{ matrix.NXF_VER }} | Shard ${{ matrix.shard }}/${{ env.TOTAL_SHARDS }}" >> $GITHUB_STEP_SUMMARY
+ if [[ "${{ matrix.NXF_VER }}" == "latest-everything" ]]; then
+ echo "::warning::Test with latest-everything failed but will not cause workflow failure. Please check if the error is expected or if it needs fixing."
+ fi
+ if [[ "${{ matrix.NXF_VER }}" != "latest-everything" ]]; then
+ exit 1
+ fi
+ fi
+
confirm-pass:
needs: [nf-test]
if: always()
runs-on: # use self-hosted runners
- - runs-on=$-confirm-pass
+ - runs-on=${{ github.run_id }}-confirm-pass
- runner=2cpu-linux-x64
steps:
- - name: One or more tests failed
+ - name: One or more tests failed (excluding latest-everything)
if: ${{ contains(needs.*.result, 'failure') }}
run: exit 1
@@ -133,11 +145,3 @@ jobs:
echo "DEBUG: toJSON(needs) = ${{ toJSON(needs) }}"
echo "DEBUG: toJSON(needs.*.result) = ${{ toJSON(needs.*.result) }}"
echo "::endgroup::"
-
- - name: Clean Workspace # Purge the workspace in case it's running on a self-hosted runner
- if: always()
- run: |
- ls -la ./
- rm -rf ./* || true
- rm -rf ./.??* || true
- ls -la ./
diff --git a/.github/workflows/release-announcements.yml b/.github/workflows/release-announcements.yml
index 4abaf484..0f732495 100644
--- a/.github/workflows/release-announcements.yml
+++ b/.github/workflows/release-announcements.yml
@@ -30,7 +30,7 @@ jobs:
bsky-post:
runs-on: ubuntu-latest
steps:
- - uses: zentered/bluesky-post-action@4aa83560bb3eac05dbad1e5f221ee339118abdd2 # v0.2.0
+ - uses: zentered/bluesky-post-action@6461056ea355ea43b977e149f7bf76aaa572e5e8 # v0.3.0
with:
post: |
Pipeline release! ${{ github.repository }} v${{ github.event.release.tag_name }} - ${{ github.event.release.name }}!
diff --git a/.nf-core.yml b/.nf-core.yml
index 6e85cf9a..d7927b18 100644
--- a/.nf-core.yml
+++ b/.nf-core.yml
@@ -1,5 +1,5 @@
repository_type: pipeline
-nf_core_version: 3.3.1
+nf_core_version: 3.3.2
template:
author: Stephen Watts
description: A comprehensive cancer DNA/RNA analysis and reporting pipeline
@@ -8,13 +8,12 @@ template:
name: oncoanalyser
org: nf-core
outdir: .
- version: 2.1.0
skip_features:
+ - gpu
- nf-test
+ version: 2.2.0
lint:
actions_ci: false
- multiqc_config: false
- nf_test_content: false
files_exist:
- lib/Utils.groovy
- lib/WorkflowMain.groovy
@@ -23,7 +22,9 @@ lint:
- assets/nf-core-oncoanalyser_logo_light.png
- docs/images/nf-core-oncoanalyser_logo_dark.png
- docs/images/nf-core-oncoanalyser_logo_light.png
+ multiqc_config: false
nextflow_config:
- config_defaults:
- params.fastp_umi_length
- params.fastp_umi_skip
+ nf_test_content: false
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 9d0b248d..bb41beec 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -4,7 +4,7 @@ repos:
hooks:
- id: prettier
additional_dependencies:
- - prettier@3.5.0
+ - prettier@3.6.2
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v5.0.0
hooks:
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 8a6cd2a4..94543a53 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -3,6 +3,67 @@
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) and this project mostly adheres to
[Semantic Versioning](https://semver.org/spec/v2.0.0.html).
+## [[2.2.0](https://github.com/nf-core/oncoanalyser/releases/tag/2.2.0)] Royal Spoonbill - 2025-08-02
+
+- [241](https://github.com/nf-core/oncoanalyser/pull/241) - Apply minor fixes and updates
+ - Allow 'prepare reference' feature to be driven by samplesheet
+ - Set minimum stringr / stringi version for CUPPA environment
+ - Reintroduce decoy sequences for ESVEE with GRCh37 genomes
+ - Update WiGiTS reference data paths
+ - Improve FASTQ and longitudinal sample input handling
+ - Fix REDUX TSV collection in SAGE append subworkflow
+ - Update CHANGELOG.md
+- [235](https://github.com/nf-core/oncoanalyser/pull/235) - Publish selected command / log files
+- [234](https://github.com/nf-core/oncoanalyser/pull/234) - Update WiGiTS tools and reference data
+- [233](https://github.com/nf-core/oncoanalyser/pull/233) - Update documentation
+- [232](https://github.com/nf-core/oncoanalyser/pull/232) - Extend the 'prepare reference' functionality
+- [231](https://github.com/nf-core/oncoanalyser/pull/231) - Implement 'purity estimate' (WISP) workflow
+- [230](https://github.com/nf-core/oncoanalyser/pull/230) - Implement 'panel resource creation' workflow
+- [220](https://github.com/nf-core/oncoanalyser/pull/220) - Add reports to tower.yml
+- [222](https://github.com/nf-core/oncoanalyser/pull/222) - Post-release bump
+
+### Software dependencies
+
+| Dependency | Old version | New version |
+| ------------------ | ----------- | ----------- |
+| `AMBER` | 4.1.1 | 4.2 |
+| `BamTools` | 1.3 | 1.4.2 |
+| `bwa-plus` | 1.0.0 | - |
+| `bwa-mem2` | - | 2.3 |
+| `CHORD` | 2.1.0 | 2.1.2 |
+| `COBALT` | 2.0 | 2.1 |
+| `ESVEE` | 1.0.3 | 1.1.2 |
+| `ISOFOX` | 1.7.1 | 1.7.2 |
+| `LILAC` | 1.6 | 1.7.1 |
+| `LINX` | 2.0.2 | 2.1 |
+| `NEO` | 1.2 | 1.2.1 |
+| `ORANGE` | 3.8.1 | 4.1 |
+| `PAVE` | 1.7.1 | 1.8 |
+| `PURPLE` | 4.1 | 4.2 |
+| `REDUX` | 1.1.2 | 1.2 |
+| `SAGE` | 4.0 | 4.1 |
+| `VirusInterpreter` | 1.7 | 1.7.1 |
+| `WISP` | - | 1.2 |
+
+### Reference data
+
+| Name | Old version | New version |
+| ------------------------ | ----------- | ----------- |
+| `HMF pipeline resources` | `2.1.0--1` | `2.2.0--3` |
+| `HMF TSO500 resources` | `2.0.0--3` | `2.2.0--3` |
+
+### Parameters
+
+| Old name | New name |
+| ----------- | ---------------------- |
+| `fastp_umi` | `fastp_umi_enabled` |
+| `redux_umi` | `redux_umi_enabled` |
+| - | `purity_estimate_mode` |
+| - | `ref_data_types` |
+| - | `driver_gene_panel` |
+| - | `target_regions_bed` |
+| - | `hmftools_log_level` |
+
## [[2.1.0](https://github.com/nf-core/oncoanalyser/releases/tag/2.1.0)] Peruvian Pelican - 2025-06-30
- [219](https://github.com/nf-core/oncoanalyser/pull/219) - Add metromap-style diagram for pipeline overview
diff --git a/README.md b/README.md
index e4d7f621..6cd186ca 100644
--- a/README.md
+++ b/README.md
@@ -5,14 +5,14 @@
-[](https://github.com/nf-core/oncoanalyser/actions/workflows/ci.yml)
+[](https://github.com/nf-core/oncoanalyser/actions/workflows/nf-test.yml)
[](https://github.com/nf-core/oncoanalyser/actions/workflows/linting.yml)
[](https://nf-co.re/oncoanalyser/results)
[](https://doi.org/10.5281/zenodo.15189386)
[](https://www.nf-test.com)
-[](https://www.nextflow.io/)
-[](https://github.com/nf-core/tools/releases/tag/3.3.1)
+[](https://www.nextflow.io/)
+[](https://github.com/nf-core/tools/releases/tag/3.3.2)
[](https://docs.conda.io/en/latest/)
[](https://www.docker.com/)
[](https://sylabs.io/docs/)
@@ -25,27 +25,35 @@
## Introduction
-**nf-core/oncoanalyser** is a Nextflow pipeline for the comprehensive analysis of cancer genomes and transcriptomes
+**nf-core/oncoanalyser** is a Nextflow pipeline for the comprehensive analysis of cancer DNA and RNA sequencing data
using the [WiGiTS](https://github.com/hartwigmedical/hmftools) toolkit from the Hartwig Medical Foundation. The pipeline
supports a wide range of experimental setups:
-- FASTQ, BAM, or CRAM input files
-- WGS (whole genome sequencing), WTS (whole transcriptome sequencing), and targeted / panel sequencing (built-in support
- for the [TSO500
- panel](https://sapac.illumina.com/products/by-type/clinical-research-products/trusight-oncology-500.html) with other
- panels and exome requiring [panel reference data
- generation](https://github.com/hartwigmedical/hmftools/blob/master/pipeline/README_TARGETED.md))
-- Paired tumor / normal and tumor-only sample setups, donor sample support for further normal subtraction (e.g. for
- patients with bone marrow transplants or other contaminants in the tumor)
+- FASTQ, BAM, and / or CRAM input files
+- WGS (whole genome sequencing), WTS (whole transcriptome sequencing), and targeted / panel sequencing1
+- Paired tumor / normal and tumor-only samples, and support for donor samples for further normal subtraction
+- Purity estimate for longitudinal samples using genomic features of the primary sample from the same patient2
- UMI (unique molecular identifier) processing supported for DNA sequencing data
- Most GRCh37 and GRCh38 reference genome builds
+1 built-in support for the [TSO500
+panel](https://www.illumina.com/products/by-type/clinical-research-products/trusight-oncology-500.html) with other
+panels and exomes requiring [creation of custom panel reference
+data](https://nf-co.re/oncoanalyser/usage#custom-panels)
+
+2 for example a primary WGS tissue biospy and longitudinal low-pass WGS ccfDNA sample taken from the
+same patient
+
## Pipeline overview
-The pipeline mainly uses tools from [WiGiTS](https://github.com/hartwigmedical/hmftools), as well as some external
-tools. Due to the limitations of panel data, certain tools (indicated with `*` below) do not run in `targeted` mode.
+The pipeline mainly uses tools from [WiGiTS](https://github.com/hartwigmedical/hmftools), as well as some other external
+tools. There are [several workflows available](https://nf-co.re/oncoanalyser/usage#introduction) in `oncoanalyser` and
+the tool information below primarily relates to the `wgts` and `targeted` analysis modes.
+
+> [!NOTE]
+> Due to the limitations of panel data, certain tools (indicated with `*` below) do not run in `targeted` mode.
- Read alignment: [BWA-MEM2](https://github.com/bwa-mem2/bwa-mem2) (DNA), [STAR](https://github.com/alexdobin/STAR) (RNA)
- Read post-processing: [REDUX](https://github.com/hartwigmedical/hmftools/tree/master/redux) (DNA), [Picard MarkDuplicates](https://gatk.broadinstitute.org/hc/en-us/articles/360037052812-MarkDuplicates-Picard) (RNA)
@@ -63,6 +71,10 @@ tools. Due to the limitations of panel data, certain tools (indicated with `*` b
- Pharmacogenomics: [PEACH](https://github.com/hartwigmedical/hmftools/tree/master/peach)
- Summary report: [ORANGE](https://github.com/hartwigmedical/hmftools/tree/master/orange), [linxreport](https://github.com/umccr/linxreport)
+For the `purity_estimate` mode, several of the above tools are run with adjusted configuration in addition to the following.
+
+- Tumor fraction estimation: [WISP](https://github.com/hartwigmedical/hmftools/tree/master/wisp)
+
## Usage
> [!NOTE]
@@ -82,7 +94,7 @@ Launch `oncoanalyser`:
```bash
nextflow run nf-core/oncoanalyser \
-profile \
- -revision 2.1.0 \
+ -revision 2.2.0 \
--mode \
--genome \
--input samplesheet.csv \
diff --git a/conf/base.config b/conf/base.config
index 0c51cce6..27564110 100644
--- a/conf/base.config
+++ b/conf/base.config
@@ -63,5 +63,6 @@ process {
}
withLabel: process_gpu {
ext.use_gpu = { workflow.profile.contains('gpu') }
+ accelerator = { workflow.profile.contains('gpu') ? 1 : null }
}
}
diff --git a/conf/hmf_data.config b/conf/hmf_data.config
index fbd9a445..4cbbadf0 100644
--- a/conf/hmf_data.config
+++ b/conf/hmf_data.config
@@ -9,23 +9,21 @@ params {
// CUPPA
cuppa_alt_sj = 'misc/cuppa/alt_sj.selected_loci.37.tsv.gz'
cuppa_classifier = 'misc/cuppa/cuppa_classifier.37.pickle.gz'
- // SV Prep
- sv_prep_blocklist = 'dna/sv/sv_prep_blacklist.37.bed'
// ESVEE
decoy_sequences_image = 'dna/sv/hg38_decoys.fa.img'
gridss_pon_breakends = 'dna/sv/sgl_pon.37.bed.gz'
gridss_pon_breakpoints = 'dna/sv/sv_pon.37.bedpe.gz'
repeatmasker_annotations = 'dna/sv/repeat_mask_data.37.fa.gz'
// Isofox
- alt_sj_distribution = 'rna/isofox.hmf_3444.alt_sj_cohort.37.csv'
- gene_exp_distribution = 'rna/isofox.hmf_3444.gene_distribution.37.csv'
+ alt_sj_distribution = 'rna/isofox.hmf_3444.alt_sj_cohort.37.csv.gz'
+ gene_exp_distribution = 'rna/isofox.hmf_3444.gene_distribution.37.csv.gz'
isofox_counts = 'rna/read_151_exp_counts.37.csv'
isofox_gc_ratios = 'rna/read_100_exp_gc_ratios.37.csv'
// LILAC
lilac_resources = 'misc/lilac/'
// Neo
neo_resources = 'misc/neo/binding/'
- cohort_tpm_medians = 'misc/neo/tpm_cohort/isofox.hmf_3444.transcript_medians.37.csv'
+ cohort_tpm_medians = 'misc/neo/tpm_cohort/isofox.hmf_3444.transcript_medians.37.csv.gz'
// CIDER
cider_blastdb = 'misc/cider/blastdb/'
// PEACH
@@ -76,23 +74,21 @@ params {
// CUPPA
cuppa_alt_sj = 'misc/cuppa/alt_sj.selected_loci.38.tsv.gz'
cuppa_classifier = 'misc/cuppa/cuppa_classifier.38.pickle.gz'
- // SV Prep
- sv_prep_blocklist = 'dna/sv/sv_prep_blacklist.38.bed'
// ESVEE
decoy_sequences_image = []
gridss_pon_breakends = 'dna/sv/sgl_pon.38.bed.gz'
gridss_pon_breakpoints = 'dna/sv/sv_pon.38.bedpe.gz'
repeatmasker_annotations = 'dna/sv/repeat_mask_data.38.fa.gz'
// Isofox
- alt_sj_distribution = 'rna/isofox.hmf_3444.alt_sj_cohort.38.csv'
- gene_exp_distribution = 'rna/isofox.hmf_3444.gene_distribution.38.csv'
+ alt_sj_distribution = 'rna/isofox.hmf_38_151_2600.alt_sj_cohort.csv.gz'
+ gene_exp_distribution = 'rna/isofox.hmf_38_151_2600.gene_distribution.csv.gz'
isofox_counts = 'rna/read_151_exp_counts.38.csv'
isofox_gc_ratios = 'rna/read_100_exp_gc_ratios.38.csv'
// LILAC
lilac_resources = 'misc/lilac/'
// Neo
neo_resources = 'misc/neo/binding/'
- cohort_tpm_medians = 'misc/neo/tpm_cohort/isofox.hmf_3444.transcript_medians.38.csv'
+ cohort_tpm_medians = 'misc/neo/tpm_cohort/isofox.hmf_38_151_2600.transcript_medians.csv.gz'
// CIDER
cider_blastdb = 'misc/cider/blastdb/'
// PEACH
diff --git a/conf/modules.config b/conf/modules.config
index 3293e4bf..aa148a85 100644
--- a/conf/modules.config
+++ b/conf/modules.config
@@ -13,9 +13,8 @@
process {
withName: 'WRITE_REFERENCE_DATA' {
- def date = new java.util.Date().format('yyyyMMdd_HHmmss');
publishDir = [
- path: { "${params.outdir}/reference_data/${workflow_version}/${date}" },
+ path: { "${params.outdir}/reference_data/${workflow.manifest.version}" },
mode: params.publish_dir_mode,
]
}
@@ -32,103 +31,124 @@ process {
publishDir = [
path: { "${params.outdir}" },
mode: params.publish_dir_mode,
- saveAs: { filename -> filename.equals('versions.yml') ? null : "${meta.key}/alignments/rna/${filename}" },
+ saveAs: { filename -> get_saveas_path(meta, task, filename, "${meta.key}/alignments/rna/${filename}") },
]
}
withName: 'REDUX' {
+ ext.log_level = { "${params.hmftools_log_level}" }
publishDir = [
path: { "${params.outdir}" },
mode: params.publish_dir_mode,
- saveAs: { filename -> filename.equals('versions.yml') ? null : "${meta.key}/alignments/dna/${filename}" },
+ saveAs: { filename -> get_saveas_path(meta, task, filename, "${meta.key}/alignments/dna/${filename}") },
]
}
withName: 'AMBER' {
+ ext.log_level = { "${params.hmftools_log_level}" }
publishDir = [
path: { "${params.outdir}" },
mode: params.publish_dir_mode,
- saveAs: { filename -> filename.equals('versions.yml') ? null : "${meta.key}/${filename}" },
+ saveAs: { filename -> get_saveas_path(meta, task, filename, "${meta.key}/${filename}") },
]
}
withName: 'COBALT' {
+ ext.log_level = { "${params.hmftools_log_level}" }
publishDir = [
path: { "${params.outdir}" },
mode: params.publish_dir_mode,
- saveAs: { filename -> filename.equals('versions.yml') ? null : "${meta.key}/${filename}" },
+ saveAs: { filename -> get_saveas_path(meta, task, filename, "${meta.key}/${filename}") },
]
}
- withName: '.*:ESVEE_CALLING:ESVEE_(PREP|ASSEMBLE|DEPTH_ANNOTATOR|CALL)' {
+ withName: '.*:ESVEE_CALLING:ESVEE' {
+ ext.log_level = { "${params.hmftools_log_level}" }
publishDir = [
path: { "${params.outdir}" },
mode: params.publish_dir_mode,
- saveAs: { filename -> filename.equals('versions.yml') ? null : "${meta.key}/esvee/${filename}" }
+ saveAs: { filename -> get_saveas_path(meta, task, filename, "${meta.key}/${filename}") },
]
}
withName: '.*:SAGE_CALLING:SAGE_GERMLINE' {
+ ext.log_level = { "${params.hmftools_log_level}" }
publishDir = [
path: { "${params.outdir}" },
mode: params.publish_dir_mode,
- saveAs: { filename -> filename.equals('versions.yml') ? null : "${meta.key}/sage/${filename}" },
+ saveAs: { filename -> get_saveas_path(meta, task, filename, "${meta.key}/sage_calling/${filename}") },
]
}
withName: '.*:SAGE_CALLING:SAGE_SOMATIC' {
+ ext.log_level = { "${params.hmftools_log_level}" }
publishDir = [
path: { "${params.outdir}" },
mode: params.publish_dir_mode,
- saveAs: { filename -> filename.equals('versions.yml') ? null : "${meta.key}/sage/${filename}" },
+ saveAs: { filename -> get_saveas_path(meta, task, filename, "${meta.key}/sage_calling/${filename}") },
]
}
- withName: '.*:SAGE_APPEND:SAGE_APPEND_(?:GERMLINE|SOMATIC)' {
+ withName: '.*:SAGE_APPEND:SAGE_APPEND_GERMLINE' {
+ ext.log_level = { "${params.hmftools_log_level}" }
publishDir = [
path: { "${params.outdir}" },
mode: params.publish_dir_mode,
- saveAs: { filename -> filename.equals('versions.yml') ? null : "${meta.key}/sage/append/${filename}" },
+ saveAs: { filename -> get_saveas_path(meta, task, filename, "${meta.key}/sage_append/germline") },
+ ]
+ }
+
+ withName: '.*:SAGE_APPEND:SAGE_APPEND_SOMATIC' {
+ ext.log_level = { "${params.hmftools_log_level}" }
+ publishDir = [
+ path: { "${params.outdir}" },
+ mode: params.publish_dir_mode,
+ saveAs: { filename -> get_saveas_path(meta, task, filename, "${meta.key}/sage_append/somatic") },
]
}
withName: '.*:PAVE_ANNOTATION:PAVE_(?:GERMLINE|SOMATIC)' {
+ ext.log_level = { "${params.hmftools_log_level}" }
publishDir = [
path: { "${params.outdir}" },
mode: params.publish_dir_mode,
- saveAs: { filename -> filename.equals('versions.yml') ? null : "${meta.key}/pave/${filename}" },
+ saveAs: { filename -> get_saveas_path(meta, task, filename, "${meta.key}/pave/${filename}") },
]
}
withName: 'PURPLE' {
+ ext.log_level = { "${params.hmftools_log_level}" }
publishDir = [
path: { "${params.outdir}" },
mode: params.publish_dir_mode,
- saveAs: { filename -> filename.equals('versions.yml') ? null : "${meta.key}/${filename}" },
+ saveAs: { filename -> get_saveas_path(meta, task, filename, "${meta.key}/${filename}") },
]
}
withName: '.*:LINX_ANNOTATION:LINX_GERMLINE' {
+ ext.log_level = { "${params.hmftools_log_level}" }
publishDir = [
path: { "${params.outdir}" },
mode: params.publish_dir_mode,
- saveAs: { filename -> filename.equals('versions.yml') ? null : "${meta.key}/linx/germline_annotations/" },
+ saveAs: { filename -> get_saveas_path(meta, task, filename, "${meta.key}/linx/germline_annotations/") },
]
}
withName: '.*:LINX_ANNOTATION:LINX_SOMATIC' {
+ ext.log_level = { "${params.hmftools_log_level}" }
publishDir = [
path: { "${params.outdir}" },
mode: params.publish_dir_mode,
- saveAs: { filename -> filename.equals('versions.yml') ? null : "${meta.key}/linx/somatic_annotations/" },
+ saveAs: { filename -> get_saveas_path(meta, task, filename, "${meta.key}/linx/somatic_annotations/") },
]
}
withName: '.*:LINX_PLOTTING:LINX_VISUALISER' {
+ ext.log_level = { "${params.hmftools_log_level}" }
publishDir = [
path: { "${params.outdir}" },
mode: params.publish_dir_mode,
- saveAs: { filename -> filename.equals('versions.yml') ? null : "${meta.key}/linx/somatic_plots/" },
+ saveAs: { filename -> get_saveas_path(meta, task, filename, "${meta.key}/linx/somatic_plots/") },
]
}
@@ -136,31 +156,34 @@ process {
publishDir = [
path: { "${params.outdir}" },
mode: params.publish_dir_mode,
- saveAs: { filename -> filename.equals('versions.yml') ? null : "${meta.key}/linx/${filename}" },
+ saveAs: { filename -> get_saveas_path(meta, task, filename, "${meta.key}/linx/${filename}") },
]
}
withName: 'CIDER' {
+ ext.log_level = { "${params.hmftools_log_level}" }
publishDir = [
path: { "${params.outdir}" },
mode: params.publish_dir_mode,
- saveAs: { filename -> filename.equals('versions.yml') ? null : "${meta.key}/${filename}" },
+ saveAs: { filename -> get_saveas_path(meta, task, filename, "${meta.key}/${filename}") },
]
}
withName: 'BAMTOOLS' {
+ ext.log_level = { "${params.hmftools_log_level}" }
publishDir = [
path: { "${params.outdir}" },
mode: params.publish_dir_mode,
- saveAs: { filename -> filename.equals('versions.yml') ? null : "${meta.key}/bamtools/${filename}" },
+ saveAs: { filename -> get_saveas_path(meta, task, filename, "${meta.key}/bamtools/${filename}") },
]
}
withName: 'CHORD' {
+ ext.log_level = { "${params.hmftools_log_level}" }
publishDir = [
path: { "${params.outdir}" },
mode: params.publish_dir_mode,
- saveAs: { filename -> filename.equals('versions.yml') ? null : "${meta.key}/${filename}" },
+ saveAs: { filename -> get_saveas_path(meta, task, filename, "${meta.key}/${filename}") },
]
}
@@ -171,18 +194,20 @@ process {
}
withName: 'LILAC' {
+ ext.log_level = { "${params.hmftools_log_level}" }
publishDir = [
path: { "${params.outdir}" },
mode: params.publish_dir_mode,
- saveAs: { filename -> filename.equals('versions.yml') ? null : "${meta.key}/${filename}" }
+ saveAs: { filename -> get_saveas_path(meta, task, filename, "${meta.key}/${filename}") },
]
}
withName: 'SIGS' {
+ ext.log_level = { "${params.hmftools_log_level}" }
publishDir = [
path: { "${params.outdir}" },
mode: params.publish_dir_mode,
- saveAs: { filename -> filename.equals('versions.yml') ? null : "${meta.key}/${filename}" },
+ saveAs: { filename -> get_saveas_path(meta, task, filename, "${meta.key}/${filename}") },
]
}
@@ -190,7 +215,7 @@ process {
publishDir = [
path: { "${params.outdir}" },
mode: params.publish_dir_mode,
- saveAs: { filename -> filename.equals('versions.yml') ? null : "${meta.id}/teal/${new File(filename).name}" },
+ saveAs: { filename -> get_saveas_path(meta, task, filename, "${meta.id}/teal/${new File(filename).name}") },
]
}
@@ -198,80 +223,138 @@ process {
publishDir = [
path: { "${params.outdir}" },
mode: params.publish_dir_mode,
- saveAs: { filename -> filename.equals('versions.yml') ? null : "${meta.key}/virusbreakend/${filename}" },
+ saveAs: { filename -> get_saveas_path(meta, task, filename, "${meta.key}/virusbreakend/${filename}") },
]
}
withName: 'VIRUSINTERPRETER' {
+ ext.log_level = { "${params.hmftools_log_level}" }
publishDir = [
path: { "${params.outdir}" },
mode: params.publish_dir_mode,
- saveAs: { filename -> filename.equals('versions.yml') ? null : "${meta.key}/${filename}" },
+ saveAs: { filename -> get_saveas_path(meta, task, filename, "${meta.key}/${filename}") },
]
}
withName: 'ISOFOX' {
+ ext.log_level = { "${params.hmftools_log_level}" }
publishDir = [
path: { "${params.outdir}" },
mode: params.publish_dir_mode,
- saveAs: { filename -> filename.equals('versions.yml') ? null : "${meta.key}/${filename}" },
+ saveAs: { filename -> get_saveas_path(meta, task, filename, "${meta.key}/${filename}") },
]
}
withName: 'NEO_SCORER' {
+ ext.log_level = { "${params.hmftools_log_level}" }
publishDir = [
path: { "${params.outdir}" },
mode: params.publish_dir_mode,
- saveAs: { filename -> filename.equals('versions.yml') ? null : "${meta.key}/neo/scorer/" },
+ saveAs: { filename -> get_saveas_path(meta, task, filename, "${meta.key}/neo/scorer/") },
]
}
withName: '.*:NEO_PREDICTION:NEO_ANNOTATE_FUSIONS' {
+ ext.log_level = { "${params.hmftools_log_level}" }
publishDir = [
path: { "${params.outdir}" },
mode: params.publish_dir_mode,
- saveAs: { filename -> filename.equals('versions.yml') ? null : "${meta.key}/neo/annotated_fusions/${filename}" },
+ saveAs: { filename -> get_saveas_path(meta, task, filename, "${meta.key}/neo/annotated_fusions/${filename}") },
]
}
withName: 'NEO_FINDER' {
+ ext.log_level = { "${params.hmftools_log_level}" }
+ publishDir = [
+ path: { "${params.outdir}" },
+ mode: params.publish_dir_mode,
+ saveAs: { filename -> get_saveas_path(meta, task, filename, "${meta.key}/neo/finder/") },
+ ]
+ }
+
+ withName: 'WISP' {
+ ext.log_level = { "${params.hmftools_log_level}" }
publishDir = [
path: { "${params.outdir}" },
mode: params.publish_dir_mode,
- saveAs: { filename -> filename.equals('versions.yml') ? null : "${meta.key}/neo/finder/" },
+ saveAs: { filename -> get_saveas_path(meta, task, filename, "${meta.key}/${filename}") },
]
}
withName: 'CUPPA' {
+ ext.log_level = { "${params.hmftools_log_level}" }
publishDir = [
path: { "${params.outdir}" },
mode: params.publish_dir_mode,
- saveAs: { filename -> filename.equals('versions.yml') ? null : "${meta.key}/${filename}" },
+ saveAs: { filename -> get_saveas_path(meta, task, filename, "${meta.key}/${filename}") },
]
}
withName: 'PEACH' {
+ ext.log_level = { "${params.hmftools_log_level}" }
publishDir = [
path: { "${params.outdir}" },
mode: params.publish_dir_mode,
- saveAs: { filename -> filename.equals('versions.yml') ? null : "${meta.key}/${filename}" },
+ saveAs: { filename -> get_saveas_path(meta, task, filename, "${meta.key}/${filename}") },
]
}
withName: 'ORANGE' {
+ ext.log_level = { "${params.hmftools_log_level}" }
publishDir = [
path: { "${params.outdir}" },
mode: params.publish_dir_mode,
// NOTE(SW): java.io.File and Nextflow's file do not work here, resorting to string splitting
- saveAs: { filename ->
- if (filename.equals('versions.yml')) {
- return null
- } else {
- def tokens = filename.split('[/]')
- return "${meta.key}/orange/${tokens[-1]}"
- }
- }
+ saveAs: { filename -> get_saveas_path(meta, task, filename, "${meta.key}/orange/${filename.split('[/]')[-1]}") },
+ ]
+ }
+
+ withName: 'COBALT_PANEL_NORMALISATION' {
+ ext.log_level = { "${params.hmftools_log_level}" }
+ publishDir = [
+ path: { "${params.outdir}" },
+ mode: params.publish_dir_mode,
+ saveAs: { filename -> get_saveas_path(meta, task, filename, "panel_resources/${filename}", panel_resource_creation = true) },
+ ]
+ }
+
+ withName: 'PAVE_PON_PANEL_CREATION' {
+ ext.log_level = { "${params.hmftools_log_level}" }
+ publishDir = [
+ path: { "${params.outdir}" },
+ mode: params.publish_dir_mode,
+ saveAs: { filename -> get_saveas_path(meta, task, filename, "panel_resources/${filename}", panel_resource_creation = true) },
+ ]
+ }
+
+ withName: 'ISOFOX_PANEL_NORMALISATION' {
+ ext.log_level = { "${params.hmftools_log_level}" }
+ publishDir = [
+ path: { "${params.outdir}" },
+ mode: params.publish_dir_mode,
+ saveAs: { filename -> get_saveas_path(meta, task, filename, "panel_resources/${filename}", panel_resource_creation = true) },
]
}
}
+
+def get_saveas_path(meta, task, filename, path, panel_resource_creation=false) {
+ if (filename.equals('versions.yml')) {
+ return null
+ } else if (filename.contains('.command.')) {
+ if (filename ==~ /.*\.command\.(sh|out|err|log|run)/) {
+ def process_name = task.process.toLowerCase().replaceFirst(/^.+:/, '')
+
+ if (panel_resource_creation) {
+ return "panel_resources/logs/${process_name}${filename}"
+ } else {
+ return "${meta.key}/logs/${meta.id}.${process_name}${filename}"
+ }
+
+ } else {
+ return null
+ }
+ } else {
+ return path
+ }
+}
diff --git a/conf/panel_data.config b/conf/panel_data.config
index 352f5dd2..80de6843 100644
--- a/conf/panel_data.config
+++ b/conf/panel_data.config
@@ -5,33 +5,29 @@ params {
tso500 {
'37' {
- driver_gene_panel = 'common/DriverGenePanel.tso500.37.tsv'
- sage_actionable_panel = 'variants/ActionableCodingPanel.tso500.37.bed.gz'
- sage_coverage_panel = 'variants/CoverageCodingPanel.tso500.37.bed.gz'
- pon_artefacts = 'variants/pon_artefacts.tso500.37.tsv.gz'
- target_region_bed = 'copy_number/target_regions_definition.tso500.37.bed.gz'
- target_region_normalisation = 'copy_number/cobalt_normalisation.tso500.37.tsv'
- target_region_ratios = 'copy_number/target_regions_ratios.tso500.37.tsv'
- target_region_msi_indels = 'copy_number/target_regions_msi_indels.tso500.37.tsv'
- isofox_tpm_norm = 'rna_resources/isofox.gene_normalisation.tso500.37.csv'
- isofox_gene_ids = 'rna_resources/tso500_rna_gene_ids.csv'
- isofox_counts = 'rna_resources/read_93_exp_counts.37.csv'
- isofox_gc_ratios = 'rna_resources/read_93_exp_gc_ratios.37.csv'
+ driver_gene_panel = 'driver_genes.tso500.37.tsv'
+ pon_artefacts = 'pon_artefacts.tso500.37.tsv.gz'
+ target_region_bed = 'panel_definition.tso500.37.bed.gz'
+ target_region_normalisation = 'cobalt_normalisation.tso500.37.tsv'
+ target_region_ratios = 'tmb_ratio.tso500.37.tsv'
+ target_region_msi_indels = 'msi_indels.tso500.37.tsv'
+ isofox_tpm_norm = 'isofox.gene_normalisation.tso500.37.csv'
+ isofox_gene_ids = 'tso500_rna_gene_ids.csv'
+ isofox_counts = 'read_93_exp_counts.37.csv'
+ isofox_gc_ratios = 'read_93_exp_gc_ratios.37.csv'
}
'38' {
- driver_gene_panel = 'common/DriverGenePanel.tso500.38.tsv'
- sage_actionable_panel = 'variants/ActionableCodingPanel.tso500.38.bed.gz'
- sage_coverage_panel = 'variants/CoverageCodingPanel.tso500.38.bed.gz'
- pon_artefacts = 'variants/pon_artefacts.tso500.38.tsv.gz'
- target_region_bed = 'copy_number/target_regions_definition.tso500.38.bed.gz'
- target_region_normalisation = 'copy_number/cobalt_normalisation.tso500.38.tsv'
- target_region_ratios = 'copy_number/target_regions_ratios.tso500.38.tsv'
- target_region_msi_indels = 'copy_number/target_regions_msi_indels.tso500.38.tsv'
- isofox_tpm_norm = 'rna_resources/isofox.gene_normalisation.tso500.38.csv'
- isofox_gene_ids = 'rna_resources/tso500_rna_gene_ids.csv'
- isofox_counts = 'rna_resources/read_93_exp_counts.38.csv'
- isofox_gc_ratios = 'rna_resources/read_93_exp_gc_ratios.38.csv'
+ driver_gene_panel = 'driver_genes.tso500.38.tsv'
+ pon_artefacts = 'pon_artefacts.tso500.38.tsv.gz'
+ target_region_bed = 'panel_definition.tso500.38.bed.gz'
+ target_region_normalisation = 'cobalt_normalisation.tso500.38.tsv'
+ target_region_ratios = 'tmb_ratio.tso500.38.tsv'
+ target_region_msi_indels = 'msi_indels.tso500.38.tsv'
+ isofox_tpm_norm = 'isofox.gene_normalisation.tso500.38.csv'
+ isofox_gene_ids = 'tso500_rna_gene_ids.csv'
+ isofox_counts = 'read_93_exp_counts.38.csv'
+ isofox_gc_ratios = 'read_93_exp_gc_ratios.38.csv'
}
}
diff --git a/docs/output.md b/docs/output.md
index c2d12e45..cb67d80d 100644
--- a/docs/output.md
+++ b/docs/output.md
@@ -3,7 +3,8 @@
## Introduction
This document describes the output produced by the pipeline. The directories listed below will be created in the results
-directory after the pipeline has finished. All paths are relative to the top-level results directory.
+directory for a typical WGTS analysis after the pipeline has finished. All paths are relative to the top-level results
+directory.
```tree
output/
@@ -20,6 +21,7 @@ output/
│ ├── isofox/
│ ├── lilac/
│ ├── linx/
+│ ├── logs/
│ ├── orange/
│ ├── pave/
│ ├── peach/
@@ -76,9 +78,12 @@ output/
- [CUPPA](#cuppa) - Tissue of origin prediction
- [Pharmacogenomics](#pharmacogenomics)
- [PEACH](#peach) - Pharmacogenomic assessment
+- [Tumor fraction estimate](#tumor-fraction-estimate)
+ - [WISP](#wisp) - Tumor fraction estimate for longitudinal samples
- [Report generation](#report-generation)
- [ORANGE](#orange) - Summary report
- [linxreport](#linxreport) - Interactive LINX report
+- [Logs](#logs) - Run command and log files per tool/process
- [Pipeline information](#pipeline-information) - Workflow execution metrics
### Read alignment
@@ -154,12 +159,10 @@ _Picard MarkDuplicates is only run on RNA alignments_
Output files
- `/sage/append/`
-
- `.sage.append.vcf.gz`: Tumor DNA sample small variant VCF with RNA data appended.
- `.sage.append.vcf.gz`: Normal DNA sample small variant VCF with RNA data appended.
- `/sage/somatic/`
-
- `.sage.bqr.png`: Normal DNA sample base quality recalibration metrics plot.
- `.sage.bqr.tsv`: Normal DNA sample base quality recalibration metrics.
- `.sage.bqr.png`: Tumor DNA sample base quality recalibration metrics plot.
@@ -209,7 +212,6 @@ information with regards to transcript and coding effects.
Output files
- `/esvee/prep/`
-
- `.esvee.prep.bam`: Tumor DNA sample BAM with candidate SV reads.
- `.esvee.prep.bam.bai`: Tumor DNA sample BAM index.
- `.esvee.prep.disc_stats.tsv`: Tumor DNA sample discordant reads stats.
@@ -219,7 +221,6 @@ information with regards to transcript and coding effects.
- `.esvee.prep.bam.bai`: Tumor DNA sample BAM index.
- `/esvee/assemble/`
-
- `.esvee.assembly.tsv`: Tumor DNA sample breakend assemblies.
- `.esvee.alignment.tsv`: Tumor DNA sample assemblies realigned to the reference genome.
- `.esvee.breakend.tsv`: Tumor DNA sample breakends.
@@ -228,7 +229,6 @@ information with regards to transcript and coding effects.
- `.esvee.raw.vcf.gz.tbi`: Tumor DNA sample VCF with candidate breakends.
- `/esvee/depth_annotation/`
-
- `.esvee.ref_depth.vcf.gz`: Tumor DNA sample VCF annotated with normal sample read depths.
- `.esvee.ref_depth.vcf.gz.tbi`: Tumor DNA sample VCF index.
@@ -330,7 +330,6 @@ purity/ploidy and annotates both small and structural variant calls with copy-nu
Output files
- `/linx/germline_annotations/`
-
- `linx.version`: LINX version file.
- `.linx.germline.breakend.tsv`: Normal DNA sample breakend data.
- `.linx.germline.clusters.tsv`: Normal DNA sample clustered events.
@@ -340,7 +339,6 @@ purity/ploidy and annotates both small and structural variant calls with copy-nu
- `.linx.germline.svs.tsv`: Normal DNA sample structural variants.
- `/linx/somatic_annotations/`
-
- `linx.version`: LINX version file.
- `.linx.breakend.tsv`: Tumor DNA sample breakend data.
- `.linx.clusters.tsv`: Tumor DNA sample clustered events.
@@ -474,11 +472,9 @@ for each of the IG and TCR loci including an abundance estimate.
Output files
- `/neo/finder/`
-
- `.neo_data.tsv`: Tumor sample neoepitope candidates.
- `/neo/annotated_fusions/`
-
- `.isf.neoepitope.tsv`: Tumor sample annotated Isofox fusions.
- `/neo/scorer/`
@@ -551,7 +547,7 @@ using DNA and/or RNA features generated by upstream WiGiTS components.
- `.peach.events.tsv`: Normal DNA sample variant events.
- `.peach.gene.events.tsv`: Normal DNA sample variant events (linked by gene).
- `.peach.haplotypes.all.tsv`: Normal DNA sample all haplotypes.
- - `.peach.haplotypes.best.tsv`: Normal DNA sample best haplotypes..
+ - `.peach.haplotypes.best.tsv`: Normal DNA sample best haplotypes.
- `.peach.qc.tsv`: PEACH QC file.
@@ -559,6 +555,29 @@ using DNA and/or RNA features generated by upstream WiGiTS components.
[PEACH](https://github.com/hartwigmedical/hmftools/tree/master/peach) infers haplotypes for interpretation in a
pharmacogenomic context.
+### Tumor fraction estimate
+
+#### WISP
+
+
+Output files
+
+- `/wisp/`
+ - `.cn_gc_ratio_fit.png`: Longitudinal sample copy number GC ratio fit.
+ - `_.wisp.amber_loh.tsv`: Longitudinal sample LOH sites used for tumor fraction estimation.
+ - `_.wisp.cn_plot_calcs.tsv`: Longitudinal sample copy number fit coefficients.
+ - `_.wisp.cn_segments.tsv`: Longitudinal sample copy number segments.
+ - `_.wisp.fragment_length.tsv`: Longitudinal sample fragment lengths stats.
+ - `_.wisp.somatic_peak.tsv`: Longitudinal sample implied tumor fraction per somatic variant.
+ - `_.wisp.somatic_variants.tsv`: Longitudinal sample counts and filtering rules per variant used in the SNV tumor fraction estimate.
+ - `_.wisp.summary.tsv`: Longitudinal sample summary.
+
+
+
+[WISP](https://github.com/hartwigmedical/hmftools/tree/master/wisp) estimates tumor fraction in
+a given sample (typically ctDNA), guided by the biomarkers identified prior analysis of the same
+patient (typically a primary tissue biopsy).
+
### Report generation
#### ORANGE
@@ -587,6 +606,20 @@ hmftool components into a single static PDF report.
[linxreport](https://github.com/umccr/linxreport) generates an interactive report containing LINX annotations and plots.
+### Logs
+
+
+Output files
+
+- `/logs/`
+ - `..command.sh`: Run command with tool arguments
+ - `..command.out`: Standard output
+ - `..command.err`: Standard error
+ - `..command.log`: Combined standard output and error (may not exist for some executors)
+
+
+The logs directory stores the `.command.*` files for each tool from the Nextflow work directory
+
### Pipeline information
diff --git a/docs/usage.md b/docs/usage.md
index 2815cffc..1b9e4178 100644
--- a/docs/usage.md
+++ b/docs/usage.md
@@ -10,53 +10,67 @@ The `oncoanalyser` pipeline typically runs from FASTQ, BAM, or CRAM [input files
most GRCh37 and GRCh38 human [reference genome builds](#custom-genomes), and provides UMI ([unique molecular
identifier](#umi-processing)) processing for DNA sequencing data.
-The pipeline supports two workflow modes: (1) whole genome and/or transcriptome, and (2) targeted panel. Both modes
-accept DNA and RNA sequencing data from matched tumor / normal (with optional
-[donor](#paired-tumor-and-normal-dna-with-donor-sample) sample) and tumor-only samples. The below table shows the
-supported [sample setups](#sample-setups):
-
-| Data Type | Tumor DNA | Normal DNA | Donor DNA | Tumor RNA |
-| --------- | ------------------ | ------------------ | ------------------ | ------------------ |
-| DNA | :white_check_mark: | - | - | - |
-| DNA | :white_check_mark: | :white_check_mark: | - | - |
-| DNA | :white_check_mark: | :white_check_mark: | :white_check_mark: | - |
-| DNA + RNA | :white_check_mark: | - | - | :white_check_mark: |
-| DNA + RNA | :white_check_mark: | :white_check_mark: | - | :white_check_mark: |
-| DNA + RNA | :white_check_mark: | :white_check_mark: | :white_check_mark: | :white_check_mark: |
-| RNA | - | - | - | :white_check_mark: |
+Two main analysis modes are supported by `oncoanalyser`:
-## Running the pipeline
+- [**wgts**](#whole-genome--transcriptome-sequencing-wgts): whole genome and/or transcriptome sequencing
+- [**targeted**](#targeted-sequencing): targeted/panel sequencing
-:::tip
+Both modes accept various combinations of DNA and/or RNA sequencing data from tumor-only or matched tumor / normal (with optional
+[donor](#paired-tumor-and-normal-dna-with-donor-sample) sample). The below table shows the supported [sample setups](#sample-setups):
-Jump to [FAQ and troubleshooting](/oncoanalyser/2.1.0/docs/usage/faq_and_troubleshooting)
+| DNA samples | RNA samples |
+| ------------------------ | ----------- |
+| `tumor` | - |
+| `tumor`+`normal` | - |
+| `tumor`+`normal`+`donor` | - |
+| `tumor` | `tumor` |
+| `tumor`+`normal` | `tumor` |
+| `tumor`+`normal`+`donor` | `tumor` |
+| - | `tumor` |
-:::
+Besides the main analysis modes, several other modes are also available:
+
+- [**purity_estimate**](#purity-estimate): tumor fraction estimation in longitudinal samples (e.g. for MRD)
+- [**prepare_reference**](#automatic-staging): staging genomes and WiGiTS tool reference data
+- [**panel_resource_creation**](#custom-panels): creating reference data for custom panels
+
+## Running the pipeline
+
+If you intend to run `oncoanalyser` on more than one sample, we recommend first [staging](#staging-reference-data) and
+[configuring](#configuring-reference-data) reference data (genome and tool specific data). Otherwise, reference data is
+automatically staged every run resulting in unnecessary disk/network usage.
A typical command for running `oncoanalyser` is shown below:
```bash
nextflow run nf-core/oncoanalyser \
+ -revision 2.2.0 \
+ -config reference_data.config \ # Optional but recommended
-profile docker \
- -revision 2.1.0 \
--mode wgts \
--genome GRCh38_hmf \
--input samplesheet.csv \
- --outdir output/
+ --outdir output/ \
```
-The [samplesheet](#samplesheet) provided to `--input` argument contains input sample details and corresponding files to
-be analysed.
+Below is a brief description of each argument:
-Additionally, various features of `oncoanalyser` can be configured by using a file provided to the `-config` argument.
-This is generally recommended and it can be used to customise a number of settings or resources including:
+- `-profile`: [configuration presets](#-profile) for different compute environments
+- `-revision`: `oncoanalyser` version to run (can be a git [tag](https://github.com/nf-core/oncoanalyser/tags), [branch](https://github.com/nf-core/oncoanalyser/branches), or commit hash)
+- `--mode`: [run mode](#run-modes)
+- `--genome`: genome version, typically `GRCh38_hmf` or `GRCh37_hmf`
+- `--input`: the [samplesheet](#samplesheet) containing sample details and corresponding files to be analysed
+- `--output`: output directory
+- `-config`: one or more configuration files for customising e.g. genome and tool specific data (as mentioned above),
+ normalisation data for [custom panels](#custom-panels) (TSO500 panel supported by default), [compute resources](#compute-resources), or
+ [other configuration](#custom-configuration)
+
+:::tip
-- Reference genome and tool specific data: it is strongly recommended to [stage](#staging-reference-data) these files.
- Otherwise, `oncoanalyser` automatically stages them every run resulting in unnecessary disk/network usage
-- Panel normalisation data: all panels except the built-in TSO500 panel require [additional
- setup](#panel-reference-data) of reference data
-- [Other configuration](#custom-configuration): this may include [compute resources](#compute-resources) or [UMI
- settings](#umi-processing)
+If you encounter any issues setting up or running `oncoanalyser`, please see
+[FAQ and troubleshooting](/oncoanalyser/2.2.0/docs/usage/faq_and_troubleshooting)
+
+:::
### Outputs
@@ -87,7 +101,7 @@ outdir: 'output/'
and be run using this command:
```bash
-nextflow run nf-core/oncoanalyser -revision 2.1.0 -profile docker -params-file params.yaml
+nextflow run nf-core/oncoanalyser -revision 2.2.0 -profile docker -params-file params.yaml
```
You can also generate such `yaml`/`json` files via [nf-core/launch](https://nf-co.re/launch).
@@ -104,7 +118,7 @@ nextflow pull nf-core/oncoanalyser
It is a good idea to specify a pipeline version when running the pipeline on your data. This ensures that a specific version of the pipeline code and software are used when you run your pipeline. If you keep using the same tag, you'll be running the same version of the pipeline, even if there have been changes to the code since.
-First, go to the [nf-core/oncoanalyser releases page](https://github.com/nf-core/oncoanalyser/releases) and find the latest pipeline version - numeric only (e.g. `2.1.0`). Then specify this when running the pipeline with `-revision` (one hyphen) - e.g. `-revision 2.1.0`. Of course, you can switch to another version by changing the number after the `-revision` flag.
+First, go to the [nf-core/oncoanalyser releases page](https://github.com/nf-core/oncoanalyser/releases) and find the latest pipeline version - numeric only (e.g. `2.2.0`). Then specify this when running the pipeline with `-revision` (one hyphen) - e.g. `-revision 2.2.0`. Of course, you can switch to another version by changing the number after the `-revision` flag.
This version number will be logged in reports when you run the pipeline, so that you'll know what you used when you look back in the future. For example, in the `/pipeline_info/software_versions.yml` file.
@@ -128,7 +142,7 @@ row as the first line with the below columns:
| `sample_id` | Sample identifier |
| `sample_type` | Sample type: `tumor`, `normal` |
| `sequence_type` | Sequence type: `dna`, `rna` |
-| `filetype` | File type: e.g. `fastq`, `bam`, `bai`; a full list of valid values can be found [here](https://github.com/nf-core/oncoanalyser/blob/2.1.0/lib/Constants.groovy#L56) |
+| `filetype` | File type: e.g. `fastq`, `bam`, `bai`; a full list of valid values can be found [here](https://github.com/nf-core/oncoanalyser/blob/2.2.0/lib/Constants.groovy#L80) |
| `info` | Additional sample information such as sequencing library and lane for [FASTQ](#fastq) files, this column is only required when running an analysis from FASTQ |
| `filepath` | Absolute filepath to input file, which can be a local filepath or supported protocol (http, https, ftp, s3, az, gz) |
@@ -164,7 +178,7 @@ Currently only gzip compressed, non-interleaved paired-end FASTQ files are curre
:::
-#### BAM and CRAM
+#### BAM
To run from BAM, specify `bam` in the `filetype` field:
@@ -183,27 +197,74 @@ PATIENT1,PATIENT1,PATIENT1-T,tumor,dna,bam,/path/to/PATIENT1-T.dna.bam
PATIENT1,PATIENT1,PATIENT1-T,tumor,dna,bai,/other/dir/PATIENT1-T.dna.bam.bai
```
-To run from CRAM, simply provide the CRAM and optionally the CRAM index with `bam` or `bai` in the `filetype` field:
+#### CRAM
-```csv title="samplesheet.cram_crai.csv"
+:::info
+
+To run analyses starting from CRAM, you must use the CRAM format version ≤3.0 with the reference fully embedded. An
+example command converting to the appropriate CRAM format is shown:
+
+```bash
+samtools view \
+ --cram \
+ --output-fmt-option version=3.0 \
+ --output-fmt-option embed_ref=1 \
+ --output-fmt-option reference=/path/to/reference.fasta \
+ --output sample.cram \
+ --threads 4 \
+ --write-index \
+ sample.bam
+```
+
+:::
+
+To run from CRAM, use `cram` and `crai` in the `filetype` field. `crai` only needs to be provided if the CRAM index is
+not in the same directory as the CRAM file:
+
+```csv title="samplesheet.cram.csv"
+group_id,subject_id,sample_id,sample_type,sequence_type,filetype,filepath
+PATIENT1,PATIENT1,PATIENT1-T,tumor,dna,cram,/path/to/PATIENT1-T.dna.cram
+PATIENT1,PATIENT1,PATIENT1-T,tumor,dna,crai,/other/dir/PATIENT1-T.dna.cram.crai
+```
+
+Similarly, for REDUX CRAMs, provide `cram_redux` and optionally `crai`:
+
+```csv title="samplesheet.redux_cram.csv"
group_id,subject_id,sample_id,sample_type,sequence_type,filetype,filepath
-PATIENT1,PATIENT1,PATIENT1-T,tumor,dna,bam,/path/to/PATIENT1-T.dna.cram
-PATIENT1,PATIENT1,PATIENT1-T,tumor,dna,bai,/other/dir/PATIENT1-T.dna.cram.crai
+PATIENT1,PATIENT1,PATIENT1-T,tumor,dna,cram_redux,/path/to/PATIENT1-T.dna.redux.cram
+PATIENT1,PATIENT1,PATIENT1-T,tumor,dna,crai,/other/dir/PATIENT1-T.dna.cram.crai
```
-#### REDUX BAM
+:::warning
+
+There is a fixed performance cost associated with reading CRAM files. This means the time it takes to read large CRAMs
+vs BAMs is similar, whereas reading small CRAMs can take significantly longer (>10x) than reading small BAMs. If you
+have small CRAMs (e.g. <10GB), it will be faster to decompress the CRAM into a BAM, and then run `oncoanalyser` with
+this BAM.
+
+This performance issue is due to how CRAM reading is implemented in
+[htsjdk](https://github.com/samtools/htsjdk/blob/master/src/main/java/htsjdk/samtools/CRAMFileReader.java) (which is
+used throughout the WiGiTS tools). We plan to address this issue in future releases of `oncoanalyser`.
+
+:::
+
+#### REDUX BAM / CRAM
When running an analysis with DNA data from FASTQ, two of the most time consuming and resource intensive pipeline steps
are [BWA-MEM2](https://github.com/bwa-mem2/bwa-mem2) read alignment and
-[REDUX](https://github.com/hartwigmedical/hmftools/tree/master/redux) alignment processing. Where the REDUX output BAM
-already exists for a given sample from a prior analysis, these read alignment and processing steps can be skipped by
-providing the REDUX BAM as `bam_redux` in the `filetype` field. The REDUX BAM index can also optionally be provided with
-`filetype` as `bai` if required.
+[REDUX](https://github.com/hartwigmedical/hmftools/tree/master/redux) alignment processing.
+
+`oncoanalyser` can be run starting from REDUX BAMs or CRAMs if they already exist from a prior analysis.
+
+For REDUX BAMs, provide `bam_redux`/`cram_redux` in the `filetype` field, and optionally the BAM/CRAM index to `bai`/`crai` (only required
+if indexes are not in the same directory as the BAM/CRAM):
```csv title="samplesheet.redux_bam_bai.csv"
group_id,subject_id,sample_id,sample_type,sequence_type,filetype,filepath
PATIENT1,PATIENT1,PATIENT1-T,tumor,dna,bam_redux,/path/to/PATIENT1-T.dna.redux.bam
PATIENT1,PATIENT1,PATIENT1-T,tumor,dna,bai,/other/dir/PATIENT1-T.dna.redux.bam.bai
+PATIENT2,PATIENT2,PATIENT2-T,tumor,dna,cram_redux,/path/to/PATIENT2-T.dna.redux.cram
+PATIENT2,PATIENT2,PATIENT2-T,tumor,dna,crai,/other/dir/PATIENT2-T.dna.redux.cram.crai
```
The `*.jitter_params.tsv` and `*.ms_table.tsv.gz` REDUX output files are expected to be in the same directory as the
@@ -226,10 +287,10 @@ You can also [start from existing inputs](#starting-from-existing-inputs) other
:::warning
-When starting from REDUX BAM, the filenames must have the format:
+When starting from REDUX BAM/CRAM, the filenames must have the format:
-- `.redux.bam`
-- `.redux.bam.bai`
+- `.redux.bam` or `.redux.cram`
+- `.redux.bam.bai` or `.redux.cram.crai`
- `.jitter_params.tsv`
- `.ms_table.tsv.gz`
@@ -312,43 +373,46 @@ documentation](https://github.com/hartwigmedical/hmftools/blob/master/pipeline/R
### Staging reference data
By default `oncoanalyser` will download the required pre-configured reference data (based on the provided samplesheet
-and CLI arguments) to the Nextflow work directory during every run before proceeding with the analysis. It is therefore
-strongly recommended to first stage and configure reference data to avoid repeated retrieval when performing multiple
-`oncoanalyser` analyses.
+and CLI arguments) to the Nextflow work directory during every run before proceeding with the analysis.
-#### Automatic staging
+However, strongly recommended to first stage and configure reference data to avoid repeated retrieval when
+performing multiple `oncoanalyser` analyses. See the below for instructions.
-All reference data required for an analysis can be staged and prepared automatically by `oncoanalyser`. This is done by
-configuring the desired analysis and then including the `--prepare_reference_only` argument, which causes `oncoanalyser`
-to write reference data to the specified output directory without running the full pipeline.
+#### Automatic staging
-For example the below samplesheet and command for analysing DNA data in `wgts` mode will stage the required `GRCh38_hmf`
-genome (and indexes) and [WiGiTS](https://github.com/hartwigmedical/hmftools) resources files. As this analysis only
-involves WGS data, no reference data files related to RNA or the `panel` mode will be retrieved.
-
-```csv title="samplesheet.tn_dna.csv"
-group_id,subject_id,sample_id,sample_type,sequence_type,filetype,filepath
-PATIENT1,PATIENT1,PATIENT1-N,normal,dna,bam,/path/to/PATIENT1-N.dna.bam
-PATIENT1,PATIENT1,PATIENT1-T,tumor,dna,bam,/path/to/PATIENT1-T.dna.bam
-```
+The reference data required for running `oncoanalyser` can be staged automatically using
+`--mode prepare_reference` and specifying `--ref_data_types`. The below example command will stage the required
+`GRCh38_hmf` genome (and indexes) and [WiGiTS](https://github.com/hartwigmedical/hmftools) resources files for WGS
+analysis from BAM.
```bash
nextflow run nf-core/oncoanalyser \
- -revision 2.1.0 \
+ -revision 2.2.0 \
-profile docker \
- --mode wgts \
+ --mode prepare_reference \
+ --ref_data_types wgs \
--genome GRCh38_hmf \
- --input samplesheet.csv \
- --outdir output/ \
- --prepare_reference_only
+ --outdir output/
```
-Executing the above command will download and prepare default reference data without running any analysis, and once
-complete the prepared reference files can be found in `./prepare_reference/reference_data/2.1.0//`. You can then provide
-a config file that points to these reference files (see [Configuring reference data](#configuring-reference-data)) which can
-be used for subsequent `oncoanalyser` runs.
-
-It is recommended to remove the Nextflow work directory once reference data staging is complete to free disk space.
+Once the above commands complete, the stated reference data can be found in `/reference_data/2.2.0`. You will
+then need to provide a config file that points to these reference files (see [Configuring reference data](#configuring-reference-data))
+which can be used for subsequent `oncoanalyser` runs. The Nextflow work directory can also be removed to free up disk
+space.
+
+The below table shows the possible values for `--ref_data_types`. Note that multiple can be provided as comma separated
+list, e.g. `--ref_data_types wgs,dna_alignment`
+
+| Value | Description | Combination of |
+| :--------------------------------- | :---------------------------------------------------------------------------------------- | :-------------------------------------------------------- |
+| `wgs` | Ref data for WGS analysis from BAM | `fasta`, `fai`, `dict`, `img`, `hmftools`, `gridss_index` |
+| `wts` | Ref data for WTS analysis from BAM | `fasta`, `fai`, `dict`, `img`, `hmftools` |
+| `targeted` | Ref data for targeted analysis from BAM | `fasta`, `fai`, `dict`, `img`, `hmftools`, `panel` |
+| `bwamem2_index` or `dna_alignment` | BWA-MEM2 index. Required if aligning DNA FASTQs | |
+| `star_index` or `rna_alignment` | STAR index. Required if aligning RNA FASTQs | |
+| `gridss_index` | GRIDSS index. Required if running Virusbreakend/Virusinterpreter | |
+| `hmftools` | [WiGiTS](https://github.com/hartwigmedical/hmftools) resources files | |
+| `panel` | Panel ref data. Only TSO500 currently supported. Please also specify arg `--panel tso500` | |
#### Manual staging
@@ -385,85 +449,11 @@ The configuration file can then be supplied to `oncoanalyser` via the `-config <
```bash
nextflow run nf-core/oncoanalyser \
- -revision 2.1.0 \
- -config refdata.config \
+ -revision 2.2.0 \
+ -config reference_data.config \
<...>
```
-### Panel reference data
-
-Analysis of panel / targeted sequencing data requires additional panel-specific reference data (e.g. region / gene
-definitions, copy number and transcript normalisation data, known artefacts). This data is included and pre-configured
-for the TSO500 panel, and can be used to analyse TSO500 sequence data by setting `--panel tso500` when running in
-`targeted` mode:
-
-```bash
-nextflow run nf-core/oncoanalyser \
- -revision 2.1.0 \
- -config refdata.config \
- -profile docker \
- --genome GRCh38_hmf \
- --mode targeted \
- --panel tso500 \
- --input samplesheet.csv \
- --outdir output/
-```
-
-For panels other than TSO500 (including whole exome), the panel-specific reference data must first be generated using a
-training procedure detailed [here](https://github.com/hartwigmedical/hmftools/blob/master/pipeline/README_TARGETED.md).
-The resulting panel-specific reference data must then be defined in a configuration file:
-
-```groovy title="panel.config"
-params {
- ref_data_panel_data_path = "/path/to/my_custom_panel_resources/"
-
- // These are relative paths within the dir provided by `ref_data_panel_data_path` above
- panel_data_paths {
-
- mycustompanel { // This is the name that should be passed to the `--panel` argument
-
- // Genome version: '37' or '38'
- '38' {
- driver_gene_panel = 'common/DriverGenePanel.custom_panel.38.tsv'
- sage_actionable_panel = 'variants/ActionableCodingPanel.custom_panel.38.bed.gz'
- sage_coverage_panel = 'variants/CoverageCodingPanel.custom_panel.38.bed.gz'
- pon_artefacts = 'variants/pon_artefacts.custom_panel.38.tsv.gz'
- target_region_bed = 'copy_number/target_regions_definition.custom_panel.38.bed.gz'
- target_region_normalisation = 'copy_number/cobalt_normalisation.custom_panel.38.tsv'
- target_region_ratios = 'copy_number/target_regions_ratios.custom_panel.38.tsv'
- target_region_msi_indels = 'copy_number/target_regions_msi_indels.custom_panel.38.tsv'
-
- // The below are optional and filepaths can be omitted for non-RNA panels by providing an empty list, e.g.:
- // isofox_tpm_norm = []
- isofox_tpm_norm = 'rna_resources/isofox.gene_normalisation.custom_panel.38.csv'
- isofox_gene_ids = 'rna_resources/custom_panel.rna_gene_ids.csv'
- isofox_counts = 'rna_resources/read_93_exp_counts.38.csv'
- isofox_gc_ratios = 'rna_resources/read_93_exp_gc_ratios.38.csv'
- }
- }
- }
-}
-```
-
-To run an analysis of panel sequence data:
-
-- provide both the panel-specific reference data configuration file via the `-config ` argument
-- set the panel name in the `--panel ` argument, this must match the name defined in the configuration file
-- set the `--force_panel` argument, which is required when not using the built-in `tso500` panel
-
-```bash
-nextflow run nf-core/oncoanalyser \
- -revision 2.1.0 \
- -config panel.config \
- -profile docker \
- --genome GRCh38_hmf \
- --mode targeted \
- --panel mycustompanel \
- --force_panel \
- --input samplesheet.csv \
- --outdir output/
-```
-
### Custom genomes
It is strongly recommended to use a Hartwig-distributed reference genome for alignments and subsequent analysis
@@ -491,38 +481,29 @@ params {
}
```
-Each index required for the analysis will first be created before running the rest of `oncoanalyser` with the following
-command:
-
-:::tip
-
-In a process similar to [staging reference data](#automatic-staging), you can first generate the required indexes by
-setting `--prepare_reference_only` and then provide the prepared reference files to `oncoanalyser` through a custom
-config file. This avoids having to regenerate indexes for each new analysis.
-
-:::
+Each index can then be created in by using `--mode prepare_reference` and `--ref_data_types`
+(see section [staging reference data](#automatic-staging)). The below example command would create the indexes for WGS analysis:
```bash
nextflow run nf-core/oncoanalyser \
- -revision 2.1.0 \
- -profile docker \
+ -revision 2.2.0 \
-config genome.custom.config \
- --mode wgts \
+ -profile docker \
+ --mode prepare_reference \
+ --ref_data_types wgs,bwamem2_index,gridss_index
--genome CustomGenome \
--genome_version <37|38> \
--genome_type \
--force_genome \
- --input samplesheet.csv \
--outdir output/
```
-Creation of a STAR index also requires transcript annotations, please provide either of the following GTF files via the
-`--ref_data_genome_gtf` option after decompressing:
+If aligning FASTQs from RNA seq data for WTS analysis, you should also provide `star_index` to `--ref_data_types`. Creating the
+STAR index also requires transcript annotations; please provide either of the following GTF files via the `--ref_data_genome_gtf` option
+after decompressing:
-- GRCh37: [GENCODE v37 (Ensembl v74)
- annotations](https://ftp.ebi.ac.uk/pub/databases/gencode/Gencode_human/release_19/gencode.v19.annotation.gtf.gz)
-- GRCh38: [GENCODE v38 (Ensembl v104)
- annotations](https://ftp.ebi.ac.uk/pub/databases/gencode/Gencode_human/release_38/gencode.v38.annotation.gtf.gz)
+- GRCh37: [GENCODE v37 (Ensembl v74) annotations](https://ftp.ebi.ac.uk/pub/databases/gencode/Gencode_human/release_19/gencode.v19.annotation.gtf.gz)
+- GRCh38: [GENCODE v38 (Ensembl v104) annotations](https://ftp.ebi.ac.uk/pub/databases/gencode/Gencode_human/release_38/gencode.v38.annotation.gtf.gz)
:::warning
@@ -548,8 +529,8 @@ _GRCh37 genome (Hartwig): `GRCh37_hmf`_
| BWA-MEM2 index | [bwa-mem2_index-2.2.1.tar.gz](https://pub-cf6ba01919994c3cbd354659947f74d8.r2.dev/genomes/GRCh37_hmf/25.1/bwa-mem2_index-2.2.1.tar.gz) |
| GRIDSS index | [gridss_index-2.13.2.tar.gz](https://pub-cf6ba01919994c3cbd354659947f74d8.r2.dev/genomes/GRCh37_hmf/25.1/gridss_index-2.13.2.tar.gz) |
| STAR index | [star_index-gencode_19-2.7.3a.tar.gz](https://pub-cf6ba01919994c3cbd354659947f74d8.r2.dev/genomes/GRCh37_hmf/25.1/star_index-gencode_19-2.7.3a.tar.gz) |
-| WiGiTS data | [hmf_pipeline_resources.37_v2.1.0--1.tar.gz](https://pub-cf6ba01919994c3cbd354659947f74d8.r2.dev/hmf_reference_data/hmftools/hmf_pipeline_resources.37_v2.1.0--1.tar.gz) |
-| TSO500 panel data | [hmf_panel_resources.tso500.37_v2.0.0--3.tar.gz](https://pub-cf6ba01919994c3cbd354659947f74d8.r2.dev/hmf_reference_data/panels/hmf_panel_resources.tso500.37_v2.0.0--3.tar.gz) |
+| WiGiTS data | [hmf_pipeline_resources.37_v2.2.0--3.tar.gz](https://pub-cf6ba01919994c3cbd354659947f74d8.r2.dev/hmf_reference_data/hmftools/hmf_pipeline_resources.37_v2.2.0--3.tar.gz) |
+| TSO500 panel data | [hmf_panel_resources.tso500.37_v2.2.0--3.tar.gz](https://pub-cf6ba01919994c3cbd354659947f74d8.r2.dev/hmf_reference_data/panels/hmf_panel_resources.tso500.37_v2.2.0--3.tar.gz) |
_GRCh38 genome (Hartwig): `GRCh38_hmf`_
@@ -562,67 +543,208 @@ _GRCh38 genome (Hartwig): `GRCh38_hmf`_
| BWA-MEM2 index | [bwa-mem2_index-2.2.1.tar.gz](https://pub-cf6ba01919994c3cbd354659947f74d8.r2.dev/genomes/GRCh38_hmf/25.1/bwa-mem2_index-2.2.1.tar.gz) |
| GRIDSS index | [gridss_index-2.13.2.tar.gz](https://pub-cf6ba01919994c3cbd354659947f74d8.r2.dev/genomes/GRCh38_hmf/25.1/gridss_index-2.13.2.tar.gz) |
| STAR index | [star_index-gencode_38-2.7.3a.tar.gz](https://pub-cf6ba01919994c3cbd354659947f74d8.r2.dev/genomes/GRCh38_hmf/25.1/star_index-gencode_38-2.7.3a.tar.gz) |
-| WiGiTS data | [hmf_pipeline_resources.38_v2.1.0--1.tar.gz](https://pub-cf6ba01919994c3cbd354659947f74d8.r2.dev/hmf_reference_data/hmftools/hmf_pipeline_resources.38_v2.1.0--1.tar.gz) |
-| TSO500 panel data | [hmf_panel_resources.tso500.38_v2.0.0--3.tar.gz](https://pub-cf6ba01919994c3cbd354659947f74d8.r2.dev/hmf_reference_data/panels/hmf_panel_resources.tso500.38_v2.0.0--3.tar.gz) |
+| WiGiTS data | [hmf_pipeline_resources.38_v2.2.0--3.tar.gz](https://pub-cf6ba01919994c3cbd354659947f74d8.r2.dev/hmf_reference_data/hmftools/hmf_pipeline_resources.38_v2.2.0--3.tar.gz) |
+| TSO500 panel data | [hmf_panel_resources.tso500.38_v2.2.0--3.tar.gz](https://pub-cf6ba01919994c3cbd354659947f74d8.r2.dev/hmf_reference_data/panels/hmf_panel_resources.tso500.38_v2.2.0--3.tar.gz) |
-## Process selection
+## Run modes
-It is possible to exclude or include specific processes when running `oncoanalyser`. The full list of processes that can
-be selected is available [here](https://github.com/nf-core/oncoanalyser/blob/2.1.0/lib/Constants.groovy#L32).
+### Whole genome / transcriptome sequencing (WGTS)
-### Excluding processes
-
-Most of the major components in `oncoanalyser` can be skipped using the `--processes_exclude` argument. There are
-circumstances where it is desirable to skip resource intensive processes like VIRUSBreakend or where you have no use for
-the outputs from some process such as the ORANGE report. In the example of skipping the VIRUSBreakend and ORANGE
-processes, the `oncoanalyser` command would take the following form:
+`--mode wgts` is used for analysing of whole genome (WGS) and/or whole transcriptome (WTS) sequencing data, and can be run like so:
```bash
nextflow run nf-core/oncoanalyser \
- -revision 2.1.0 \
+ -revision 2.2.0 \
+ -config reference_data.config \
-profile docker \
--mode wgts \
- --processes_exclude virusinterpreter,orange \
--genome GRCh38_hmf \
--input samplesheet.csv \
--outdir output/
```
-:::warning
+### Targeted sequencing
-When skipping components no checks are done to identify orphan processes in the execution DAG or for redundant
-processes.
+`--mode targeted` is used for analysing targeted or panel sequencing samples. The TSO500 panel has in-built support by setting
+`--panel tso500`. A typical run command for TSO500 panels would be:
-:::
+```bash
+nextflow run nf-core/oncoanalyser \
+ -revision 2.2.0 \
+ -config reference_data.config \
+ -profile docker \
+ --mode targeted \
+ --panel tso500 \
+ --genome GRCh38_hmf \
+ --input samplesheet.csv \
+ --outdir output/
+```
-### Manual process selection
+Panels other than TSO500 require additional arguments, as well as custom reference data to be created.
+Please see [Custom panels](#custom-panels).
+
+### Custom panels
+
+`--mode panel_resource_creation` assists with creating custom panel reference data files (for panels other than TSO500), which fit and
+normalise the biases inherent to that specific panel.
+
+The below table summarises the required reference data files. Some panel reference data files must first be manually created - instructions
+can be found on the [**WiGiTS targeted analysis readme**](https://github.com/hartwigmedical/hmftools/blob/master/pipeline/README_TARGETED.md).
+Some these files are used with `--mode panel_resource_creation` to create the remaining required reference data files.
+
+| Data type | File / config name | Comment |
+| :-------- | :---------------------------- | :---------------------------------------------------------------------------------------------------------------------- |
+| DNA | `driver_gene_panel` | Manually created |
+| DNA | `target_region_bed` | Manually created |
+| DNA | `target_region_msi_indels` | Manually created |
+| DNA | `target_region_ratios` | Manually created |
+| DNA | `target_region_normalisation` | Output from `--mode panel_resource_creation` |
+| DNA | `pon_artefacts` | Output from `--mode panel_resource_creation` |
+| RNA | `isofox_gene_ids` | Manually created |
+| RNA | `isofox_tpm_norm` | Output from `--mode panel_resource_creation` |
+| RNA | `isofox_counts` | Recommended to use `read_151_exp_counts..csv` from [WiGiTS reference data](#reference-data-urls) |
+| RNA | `isofox_gc_ratios` | Recommended to use `read_100_exp_gc_ratios..csv` from [WiGiTS reference data](#reference-data-urls) |
+
+:::note
+
+RNA reference data is only required if your panel supports RNA sequencing data.
+
+:::
-The `--processes_manual` argument can be used to enable manual process selection and `--processes_include
-` to configure individual processes to execute. One use case would be to run processes which are
-not run by default, such as neoepitope calling with [NEO](https://github.com/hartwigmedical/hmftools/tree/master/neo).
-To do this, provide the below example samplesheet:
+Once your manually created files are ready, create a samplesheet with a representative set of panel sequencing samples
+(**≥20 recommended**). The below example samplesheet provides BAM files, but [FASTQ files](#fastq) can also be provided.
-```csv title='samplesheet.manual.csv'
+```csv title="samplesheet.panel_resource_creation.csv"
group_id,subject_id,sample_id,sample_type,sequence_type,filetype,filepath
-PATIENT1,PATIENT1,PATIENT1-N,normal,dna,bam,/path/to/PATIENT1-N.dna.wgs.bam
-PATIENT1,PATIENT1,PATIENT1-T,tumor,dna,bam,/path/to/PATIENT1-T.dna.wgs.bam
-PATIENT1,PATIENT1,PATIENT1-T-RNA,tumor,rna,bam,/path/to/PATIENT1-T.rna.wgs.bam
+PATIENT1,PATIENT1,PATIENT1-T,tumor,dna,bam,/path/to/PATIENT1-T.dna.bam
+PATIENT1,PATIENT1,PATIENT1-T,tumor,dna,bai,/path/to/PATIENT1-T.dna.bam.bai
+PATIENT2,PATIENT2,PATIENT2-T,tumor,dna,bam,/path/to/PATIENT2-T.dna.bam
+PATIENT2,PATIENT2,PATIENT2-T,tumor,dna,bai,/path/to/PATIENT2-T.dna.bam.bai
```
-Then, run `oncoanalyser` with the `neo` process selected as well as all required upstream processes:
+Then, run `oncoanalyser` with `--mode panel_resource_creation` providing the samplesheet, as well as the relevant manually created files
+to arguments `--driver_gene_panel`, `--target_regions_bed`, and `--isofox_gene_ids`:
```bash
nextflow run nf-core/oncoanalyser \
- -revision 2.1.0 \
+ -revision 2.2.0 \
+ -config reference_data.config \
-profile docker \
- --mode wgts \
- --processes_manual \
- --processes_include isofox,redux,amber,cobalt,sage,pave,esvee,purple,linx,lilac,neo \
+ --mode panel_resource_creation \
--genome GRCh38_hmf \
- --input samplesheet.neo_inputs.csv \
+ --input samplesheet.panel_resource_creation.csv \
+ --driver_gene_panel DriverGenePanel.38.tsv \
+ --target_regions_bed target_regions_definition.38.bed.gz \
+ --isofox_gene_ids rna_gene_ids.csv \ # Optional, only provide if panel supports RNA sequencing data
+ --outdir output/
+```
+
+Place the all the custom panel reference data files in a directory, and define the paths / file names in a configuration file:
+
+```groovy title="panel.config"
+params {
+ ref_data_panel_data_path = "/directory/containing/my_custom_panel_resources/"
+
+ // These are relative paths within the dir provided by `ref_data_panel_data_path` above
+ panel_data_paths {
+
+ my_custom_panel { // This is the name that should be passed to the `--panel` argument
+
+ // Genome version: '37' or '38'
+ '38' {
+ driver_gene_panel = 'DriverGenePanel.38.tsv'
+ pon_artefacts = 'pave.somatic_artefacts.38.tsv'
+ target_region_bed = 'target_regions_definition.38.bed.gz'
+ target_region_normalisation = 'cobalt.region_normalisation.38.tsv'
+ target_region_ratios = 'target_regions_ratios.38.tsv'
+ target_region_msi_indels = 'target_regions_msi_indels.38.tsv'
+
+ // RNA. Optional, only provide if panel supports RNA data.
+ isofox_gene_ids = 'rna_gene_ids.csv'
+ isofox_tpm_norm = 'isofox.gene_normalisation.38.csv'
+ isofox_counts = 'read_151_exp_counts.37.csv'
+ isofox_gc_ratios = 'read_100_exp_gc_ratios.37.csv'
+ }
+ }
+ }
+}
+```
+
+Lastly, run `oncoanalyser` with `--mode targeted` to analyse your panel sequencing sample. You will also need to:
+
+- provide the custom panel reference data configuration file to the `-config ` argument
+- set the panel name in the `--panel ` argument as defined in the configuration file (e.g. `my_custom_panel`)
+- set the `--force_panel` argument to enable non-built-in panels
+
+```bash
+nextflow run nf-core/oncoanalyser \
+ -revision 2.2.0 \
+ -config reference_data.config \
+ -config panel_data.config \
+ -profile docker \
+ --mode targeted \
+ --panel my_custom_panel \
+ --genome GRCh38_hmf \
+ --force_panel \
+ --input samplesheet.csv \
+ --outdir output/
+```
+
+### Purity estimate
+
+`--mode purity_estimate` uses [WISP](https://github.com/hartwigmedical/hmftools/tree/master/wisp) to estimate the tumor fraction
+(aka purity) for a longitudinal sample (typically a ctDNA sample) guided by variants identified in a primary sample of the same patient
+(typically a primary tissue biopsy). This can be used for example for detecting minimal residual disease (MRD).
+
+The primary sample must first have been run in either [**WGTS**](#whole-genome--transcriptome-sequencing-wgts) or
+[**targeted**](#targeted-sequencing) mode.
+
+A samplesheet with the paths to the primary and longitudinal sample data is then created. Specifically:
+
+- The BAM from the longitudinal tumor sample
+- The AMBER and PURPLE directories from the **primary tumor** sample
+- (Optional) The REDUX BAM of the normal sample, if the normal sample was provided in the primary sample run (i.e. was run in tumor/normal mode)
+
+```csv title="samplesheet.purity_estimate.csv"
+group_id,subject_id,sample_id,sample_type,sequence_type,filetype,info,filepath
+PATEINT1,PATIENT1,PATIENT1-L,tumor,dna,bam,longitudinal_sample,/path/to/PATIENT1-T.dna.longitudinal.bam
+PATIENT1,PATIENT1,PATIENT1-N,normal,dna,bam_redux,,/path/to/PATIENT1-N.dna.redux.bam
+PATEINT1,PATIENT1,PATIENT1-T,tumor,dna,amber_dir,,/path/to/PATIENT1-T/amber/
+PATEINT1,PATIENT1,PATIENT1-T,tumor,dna,purple_dir,,/path/to/PATIENT1-T/purple/
+```
+
+Then run `oncoanalyser` providing `--mode purity_estimate` and `--purity_estimate_mode ` (how the **longitudinal sample**
+was sequenced):
+
+```bash
+nextflow run nf-core/oncoanalyser \
+ -revision 2.2.0 \
+ -config reference_data.config \
+ -profile docker \
+ --mode purity_estimate \
+ --purity_estimate_mode targeted \
+ --genome GRCh38_hmf \
+ --input samplesheet.purity_estimate.csv \
--outdir output/
```
+:::note
+
+`--purity_estimate_mode` simply sets different arguments for certain tools (e.g. SAGE). When running with `--purity_estimate_mode targeted`,
+you do not need to configure panel ref data paths with as you would with `--mode targeted`.
+
+:::
+
+### Prepare reference data
+
+`--mode prepare_reference` assists with staging all the reference data required to run `oncoanalyser`.
+Please see: [Staging reference data: Automatic staging](#automatic-staging)
+
+## Process selection
+
+It is possible to exclude or manually select specific processes when running `oncoanalyser`. The full list of processes that can
+be selected is available [here](https://github.com/nf-core/oncoanalyser/blob/2.2.0/lib/Constants.groovy#L53).
+
:::warning
It is the user's responsibility to select the required upstream processes for a downstream process to run. If not all
@@ -631,6 +753,38 @@ process running.
:::
+### Excluding processes
+
+Most of the major components in `oncoanalyser` can be skipped using the `--processes_exclude` argument. You may want to
+skip resource intensive processes like Virusbreakend, or ORANGE because you do not require the report, for example:
+
+```bash
+nextflow run nf-core/oncoanalyser \
+ -revision 2.2.0 \
+ -profile docker \
+ --mode wgts \
+ --processes_exclude virusinterpreter,orange \
+ --genome GRCh38_hmf \
+ --input samplesheet.csv \
+ --outdir output/
+```
+
+### Manual process selection
+
+The `--processes_manual` argument can be used to select the exact processes that `onconalyser` will run. For example,
+you may only want to run alignment and SNV/indel, SV and CNV calling from DNA FASTQs, like so:
+
+```bash
+nextflow run nf-core/oncoanalyser \
+ -revision 2.2.0 \
+ -profile docker \
+ --mode wgts \
+ --processes_manual alignment,redux,sage,amber,cobalt,esvee,sage,pave,purple \
+ --genome GRCh38_hmf \
+ --input samplesheet.csv \
+ --outdir output/
+```
+
### Starting from existing inputs
An `oncoanalyser` analysis can start at arbitrary points as long as the required inputs are provided. For example,
@@ -662,23 +816,15 @@ Then, run `oncoanalyser` skipping all processes except for `neo`:
```bash
nextflow run nf-core/oncoanalyser \
- -revision 2.1.0 \
+ -revision 2.2.0 \
-profile docker \
--mode wgts \
- --processes_manual \
- --processes_include neo \
+ --processes_manual neo \
--genome GRCh38_hmf \
--input samplesheet.neo_inputs.csv \
--outdir output/
```
-:::warning
-
-Providing existing inputs will cause `oncoanalyser` to skip the corresponding process but none of the upstream
-processes. It is the responsibility of the user to skip all relevant processes.
-
-:::
-
## Core Nextflow arguments
:::note
@@ -753,51 +899,58 @@ Syntax and examples of config items are described in the [Nextflow documentation
### Compute resources
The default compute resources (e.g. CPUs, RAM, disk space) configured in `oncoanalyser` may not be sufficient for one or
-more processes. To change the resource requests, please see the [tuning workflow
-resources](https://nf-co.re/docs/usage/configuration#tuning-workflow-resources) and [max
-resources](https://nf-co.re/docs/usage/configuration#max-resources) sections of the nf-core website.
+more processes (nf-core documentation: [tuning workflow resources](https://nf-co.re/docs/usage/configuration#tuning-workflow-resources)).
+For example, for high depth samples (e.g. panel samples), you may need increase the memory for alignment, read processing (REDUX),
+small variant calling (SAGE), or structural variant calling (ESVEE) steps.
-Below are the settings per WiGiTS tool that Hartwig uses internally and recommends. For high depth samples (e.g. panel
-samples), you may need increase the memory for alignment, read processing (REDUX) and/or variant calling (SAGE or ESVEE)
-steps.
+Below are the settings per tool that Hartwig Medical Foundation uses when running `oncoanalyser` in Google cloud:
```groovy
process {
- withName: '.*ALIGN' { cpus = 12; memory = 72.GB; }
- withName: AMBER { cpus = 16; memory = 24.GB; }
- withName: BAMTOOLS { cpus = 16; memory = 24.GB; }
- withName: CHORD { cpus = 4; memory = 12.GB; }
- withName: COBALT { cpus = 16; memory = 24.GB; }
- withName: CUPPA { cpus = 4; memory = 16.GB; }
- withName: 'ESVEE.*' { cpus = 32; memory = 64.GB; }
- withName: LILAC { cpus = 16; memory = 24.GB; }
- withName: 'LINX.*' { cpus = 16; memory = 16.GB; }
- withName: REDUX { cpus = 32; memory = 64.GB; }
- withName: ORANGE { cpus = 4; memory = 16.GB; }
- withName: 'PAVE.*' { cpus = 8; memory = 32.GB; }
- withName: PURPLE { cpus = 8; memory = 40.GB; }
- withName: 'SAGE.*' { cpus = 32; memory = 64.GB; }
- withName: VIRUSBREAKEND { cpus = 8; memory = 64.GB; }
- withName: VIRUSINTERPRETER { cpus = 2; memory = 8.GB; }
+ withName: '.*ALIGN' { memory = 72.GB; cpus = 12; disk = 750.GB }
+ withName: 'AMBER' { memory = 24.GB; cpus = 16; disk = 375.GB }
+ withName: 'BAMTOOLS' { memory = 24.GB; cpus = 16; disk = 375.GB }
+ withName: 'CHORD' { memory = 12.GB; cpus = 4 ; disk = 375.GB }
+ withName: 'CIDER' { memory = 24.GB; cpus = 16; disk = 375.GB }
+ withName: 'COBALT' { memory = 24.GB; cpus = 16; disk = 375.GB }
+ withName: 'CUPPA' { memory = 16.GB; cpus = 4 ; disk = 375.GB }
+ withName: 'ESVEE' { memory = 96.GB; cpus = 32; disk = 375.GB }
+ withName: 'ISOFOX' { memory = 24.GB; cpus = 16; disk = 375.GB }
+ withName: 'LILAC' { memory = 24.GB; cpus = 16; disk = 375.GB }
+ withName: 'LINX_.*' { memory = 16.GB; cpus = 8 ; disk = 375.GB }
+ withName: 'REDUX' { memory = 64.GB; cpus = 32; disk = 750.GB }
+ withName: 'ORANGE' { memory = 16.GB; cpus = 4 ; disk = 375.GB }
+ withName: 'PAVE.*' { memory = 32.GB; cpus = 8 ; disk = 375.GB }
+ withName: 'PEACH' { memory = 4.GB ; cpus = 2 ; disk = 375.GB }
+ withName: 'PURPLE' { memory = 40.GB; cpus = 8 ; disk = 375.GB }
+ withName: 'SAGE.*' { memory = 64.GB; cpus = 32; disk = 375.GB }
+ withName: 'TEAL.*' { memory = 32.GB; cpus = 32; disk = 375.GB }
+ withName: 'VIRUSBREAKEND' { memory = 64.GB; cpus = 16; disk = 375.GB }
+ withName: 'VIRUSINTERPRETER' { memory = 8.GB ; cpus = 2 ; disk = 375.GB }
+ withName: 'WISP' { memory = 16.GB; cpus = 4 ; disk = 375.GB }
}
```
-Lastly, we recommend setting an upper limit on total resources that `oncoanalyser` is allowed to use. This will
-typically be the max resources available to the VM / compute job. Below are the settings that Hartwig Medical Foundation
-uses internally. When running multiple steps and/or samples in parallel, this will prevent `oncoanalyser` from
-requesting more resources than available on the machine.
+We recommend setting an upper limit on total resources that `oncoanalyser` is allowed to use (nf-core
+documentation: [max resources](https://nf-co.re/docs/usage/configuration#max-resources)). Otherwise, `oncoanalyser` may
+crash when it tries to request more resources than available on a machine or compute job.
+Below are some recommended resource limit settings:
```groovy
process {
resourceLimits = [
cpus: 64,
- memory: 124.GB, // = 0.97 * 128.GB
+ memory: 120.GB, // Provides leeway on a 128.GB machine
disk: 1500.GB,
time: 48.h
]
}
```
+The total runtime of `oncoanalyser` is ~3h for a paired 100x/30x tumor/normal WGS run starting from BAMs with parallel job execution via
+Google batch. However, your runtime will vary depending on several factors such as sequencing depth, number of small/structural variants, or
+parallel vs. non-parallel job execution.
+
### Container images
#### Custom containers
@@ -905,13 +1058,13 @@ on the presence/format of your UMI strings, you may need to configure one or mor
```groovy title='umi.config'
params {
// For FASTQ files
- fastp_umi = true // Enable UMI processing by fastp
+ fastp_umi_enabled = true // Enable UMI processing by fastp
fastp_umi_location = "per_read" // --umi_loc fastp arg
fastp_umi_length = 7 // --umi_len fastp arg
fastp_umi_skip = 0 // --umi_skip fastp arg
// For BAM files
- redux_umi = true // Enable UMI processing by REDUX
+ redux_umi_enabled = true // Enable UMI processing by REDUX
redux_umi_duplex_delim = "_" // Duplex UMI delimiter
}
```
diff --git a/docs/usage/faq_and_troubleshooting.md b/docs/usage/faq_and_troubleshooting.md
index 768f4f4a..0e773a4a 100644
--- a/docs/usage/faq_and_troubleshooting.md
+++ b/docs/usage/faq_and_troubleshooting.md
@@ -19,13 +19,12 @@
- [Placing `oncoanalyser` CLI arguments into a configuration
file](#placing-oncoanalyser-cli-arguments-into-a-configuration-file)
- [Errors and navigating the `work/` directory](#errors-and-navigating-the-work-directory)
-- [Saving logs from the `work/` directory](#saving-logs-from-the-work-directory)
- [Resuming runs in Google Batch](#resuming-runs-in-google-batch)
## How to start from CRAM?
-Simply specify a CRAM path instead of a BAM path in the sample sheet. See section [Input starting points: BAM /
-CRAM](./#bam-and-cram).
+Simply provide a CRAM path under filetype `cram` in the sample sheet. See section [Input starting points: CRAM](./#cram)
+for details.
## How to handle UMIs?
@@ -94,14 +93,13 @@ example, you would run `oncoanalyser` with the below command (assuming starting
```bash
nextflow run nf-core/oncoanalyser \
- -revision 2.1.0 \
+ -revision 2.2.0 \
-profile docker \
--mode wgts \
+ --processes_manual alignment,redux,amber,cobalt,sage,pave,esvee,purple \
--genome GRCh38_hmf \
- --input samplesheet.neo_inputs.csv \
- --outdir output/ \
- --processes_manual \
- --processes_include alignment,redux,amber,cobalt,sage,pave,esvee,purple
+ --input samplesheet.csv \
+ --outdir output/
```
## Why does `oncoanalyser` call too many / too few variants than another pipeline?
@@ -213,13 +211,13 @@ For example, the `oncoanalyser` arguments which start with `--` in this command:
```shell
nextflow run nf-core/oncoanalyser \
- -revision 2.1.0 \
- -config refdata.config \
+ -revision 2.2.0 \
-profile docker \
+ -config refdata.config \
--mode wgts \
--genome GRCh38_hmf \
- --input /path/to/samplesheet.csv \
- --outdir /path/to/outdir/
+ --input samplesheet.csv \
+ --outdir output/
```
can be specified in a config file by stripping the `--` like so:
@@ -228,8 +226,8 @@ can be specified in a config file by stripping the `--` like so:
params {
mode = "wgts"
genome = "GRCh38_hmf"
- input = "/path/to/samplesheet.csv"
- outdir = "/path/to/outdir/"
+ input = "samplesheet.csv"
+ outdir = "outdir/"
}
```
@@ -237,9 +235,9 @@ and provided as a config file when running `oncoanalyser`:
```shell
nextflow run nf-core/oncoanalyser \
+ -revision 2.2.0 \
-config refdata.config \
-config params.config \
- -revision 2.1.0 \
-profile docker \
<...>
```
@@ -271,7 +269,7 @@ work/
│ ├── .command.sh # Bash script used to run the process *within the container*
│ ├── .command.run # Bash script used to run the process in the host machine
│ ├── .command.begin
-│ ├── .command.log # All log messages (combination of stdout and stderr)
+│ ├── .command.log # All log messages (combination of stdout and stderr). Might not exist for some executors
│ ├── .command.err # stderr log messages
│ ├── .command.out # stdout log messages
│ ├── .command.trace # Compute resource usage stats
@@ -291,32 +289,6 @@ The `work/` directory can be hard to navigate due to the `/) to show the directory
structure, which allows you to manually find the target process directory.
-## Saving logs from the `work/` directory
-
-To save logs to the final output directory (i.e. path provided to `--outdir`), we can provide the below
-[afterScript](https://www.nextflow.io/docs/latest/reference/process.html#afterscript) directive in a config file:
-
-```groovy
-// Adapted from this GitHub issue: https://github.com/nextflow-io/nextflow/issues/1166
-process.afterScript = {
- // params.outdir: --outdir arg
- // meta.key: sample_id from the sample sheet
- log_dir = "${params.outdir}/${meta.key}/logs"
-
- // task.process: name of the process
- // meta.id: concatenation of the group_id and sample_id from the sample sheet
- dest_file_prefix = "${log_dir}/${task.process}.${meta.id}"
-
- // The value of afterScript is simply a bash command as a string
- cmd = "mkdir -p ${log_dir}; "
- cmd += "for file in .command.{sh,log}; do cp \$file ${dest_file_prefix}\${file}; done"
- cmd
-}
-```
-
-The above afterScript directive will copy `.sh` and `.log` files from the `work/` directory for every process. Each
-destination file will have the below example path:
-
```shell
outdir/coloMini/logs/NFCORE_ONCOANALYSER:WGTS:REDUX_PROCESSING:REDUX.coloMini_coloMiniT.command.log
```
diff --git a/lib/Constants.groovy b/lib/Constants.groovy
index 32decfee..8fa6eeb5 100644
--- a/lib/Constants.groovy
+++ b/lib/Constants.groovy
@@ -3,19 +3,19 @@ class Constants {
// NOTE(SW): the HMF reference data files are incompatible with hg19 due to different contig naming
static List GENOMES_VERSION_37 = ['GRCh37_hmf', 'GRCh37']
static List GENOMES_VERSION_38 = ['GRCh38_hmf', 'GRCh38', 'hg38']
- static List GENOMES_ALT = ['GRCh38', 'hg38']
+ static List GENOMES_ALT = ['GRCh38', 'hg38']
- static List GENOMES_SUPPORTED = ['GRCh37_hmf', 'GRCh38_hmf']
- static List GENOMES_DEFINED = Constants.GENOMES_VERSION_37 + Constants.GENOMES_VERSION_38
+ static List GENOMES_SUPPORTED = ['GRCh37_hmf', 'GRCh38_hmf']
+ static List GENOMES_DEFINED = Constants.GENOMES_VERSION_37 + Constants.GENOMES_VERSION_38
- static List PANELS_DEFINED = ['tso500']
+ static List PANELS_DEFINED = ['tso500']
- static String HMF_DATA_37_PATH = 'https://pub-cf6ba01919994c3cbd354659947f74d8.r2.dev/hmf_reference_data/hmftools/hmf_pipeline_resources.37_v2.1.0--1.tar.gz'
- static String HMF_DATA_38_PATH = 'https://pub-cf6ba01919994c3cbd354659947f74d8.r2.dev/hmf_reference_data/hmftools/hmf_pipeline_resources.38_v2.1.0--1.tar.gz'
+ static String HMF_DATA_37_PATH = 'https://pub-cf6ba01919994c3cbd354659947f74d8.r2.dev/hmf_reference_data/hmftools/hmf_pipeline_resources.37_v2.2.0--3.tar.gz'
+ static String HMF_DATA_38_PATH = 'https://pub-cf6ba01919994c3cbd354659947f74d8.r2.dev/hmf_reference_data/hmftools/hmf_pipeline_resources.38_v2.2.0--3.tar.gz'
- static String TSO500_PANEL_37_PATH = 'https://pub-cf6ba01919994c3cbd354659947f74d8.r2.dev/hmf_reference_data/panels/hmf_panel_resources.tso500.37_v2.0.0--3.tar.gz'
- static String TSO500_PANEL_38_PATH = 'https://pub-cf6ba01919994c3cbd354659947f74d8.r2.dev/hmf_reference_data/panels/hmf_panel_resources.tso500.38_v2.0.0--3.tar.gz'
+ static String TSO500_PANEL_37_PATH = 'https://pub-cf6ba01919994c3cbd354659947f74d8.r2.dev/hmf_reference_data/panels/hmf_panel_resources.tso500.37_v2.2.0--3.tar.gz'
+ static String TSO500_PANEL_38_PATH = 'https://pub-cf6ba01919994c3cbd354659947f74d8.r2.dev/hmf_reference_data/panels/hmf_panel_resources.tso500.38_v2.2.0--3.tar.gz'
static String HLA_SLICE_BED_GRCH38_ALT_PATH = 'https://pub-cf6ba01919994c3cbd354659947f74d8.r2.dev/other/hla_slice/grch38_alt.plus_homologous.bed'
@@ -25,10 +25,33 @@ class Constants {
static enum RunMode {
+ PANEL_RESOURCE_CREATION,
+ PREPARE_REFERENCE,
+ PURITY_ESTIMATE,
TARGETED,
WGTS,
}
+ static enum RefDataType {
+ // Compound types
+ TARGETED,
+ WGS,
+ WTS,
+
+ // Individual types
+ BWAMEM2_INDEX,
+ DICT,
+ DNA_ALIGNMENT,
+ FAI,
+ FASTA,
+ GRIDSS_INDEX,
+ HMFTOOLS,
+ IMG,
+ PANEL,
+ RNA_ALIGNMENT,
+ STAR_INDEX,
+ }
+
static enum Process {
ALIGNMENT,
AMBER,
@@ -51,18 +74,25 @@ class Constants {
SIGS,
TEAL,
VIRUSINTERPRETER,
+ WISP,
}
+ static List DEFAULT_EXCLUDED_PROCESSES = [] // For experimental tools
+
static enum FileType {
// Generic
- BAM,
BAI,
+ BAM,
+ CRAI,
+ CRAM,
FASTQ,
- // Redux
+
+ // REDUX
BAM_REDUX,
- REDUX_DUP_FREQ_TSV,
+ CRAM_REDUX,
REDUX_JITTER_TSV,
REDUX_MS_TSV,
+
// Process
AMBER_DIR,
BAMTOOLS_DIR,
@@ -76,34 +106,36 @@ class Constants {
PURPLE_DIR,
SAGE_VCF,
SAGE_VCF_TBI,
- SAGE_APPEND_VCF,
+ SAGE_APPEND_DIR,
VIRUSINTERPRETER_DIR,
+
// ORANGE specific
CHORD_DIR,
- SIGS_DIR,
CUPPA_DIR,
LINX_PLOT_DIR,
- SAGE_DIR,
PEACH_DIR,
+ SAGE_DIR,
+ SIGS_DIR,
}
static enum SampleType {
- TUMOR,
+ DONOR,
NORMAL,
+ TUMOR,
TUMOR_NORMAL,
- DONOR,
}
static enum SequenceType {
DNA,
- RNA,
DNA_RNA,
+ RNA,
}
static enum InfoField {
CANCER_TYPE,
LANE,
LIBRARY_ID,
+ LONGITUDINAL_SAMPLE,
}
static Map PLACEHOLDER_META = [meta_placeholder: null]
@@ -168,12 +200,6 @@ class Constants {
SequenceType.DNA,
],
- REDUX_DUP_FREQ_TSV_TUMOR: [
- FileType.REDUX_DUP_FREQ_TSV,
- SampleType.TUMOR,
- SequenceType.DNA,
- ],
-
REDUX_JITTER_TSV_TUMOR: [
FileType.REDUX_JITTER_TSV,
SampleType.TUMOR,
@@ -192,12 +218,6 @@ class Constants {
SequenceType.DNA,
],
- REDUX_DUP_FREQ_TSV_NORMAL: [
- FileType.REDUX_DUP_FREQ_TSV,
- SampleType.NORMAL,
- SequenceType.DNA,
- ],
-
REDUX_JITTER_TSV_NORMAL: [
FileType.REDUX_JITTER_TSV,
SampleType.NORMAL,
@@ -216,12 +236,6 @@ class Constants {
SequenceType.DNA,
],
- REDUX_DUP_FREQ_TSV_DONOR: [
- FileType.REDUX_DUP_FREQ_TSV,
- SampleType.DONOR,
- SequenceType.DNA,
- ],
-
REDUX_JITTER_TSV_DONOR: [
FileType.REDUX_JITTER_TSV,
SampleType.DONOR,
@@ -294,13 +308,13 @@ class Constants {
SampleType.NORMAL,
SequenceType.DNA,
],
- SAGE_APPEND_VCF_TUMOR: [
- FileType.SAGE_APPEND_VCF,
+ SAGE_APPEND_DIR_TUMOR: [
+ FileType.SAGE_APPEND_DIR,
SampleType.TUMOR,
SequenceType.DNA_RNA,
],
- SAGE_APPEND_VCF_NORMAL: [
- FileType.SAGE_APPEND_VCF,
+ SAGE_APPEND_DIR_NORMAL: [
+ FileType.SAGE_APPEND_DIR,
SampleType.NORMAL,
SequenceType.DNA_RNA,
],
diff --git a/lib/Processes.groovy b/lib/Processes.groovy
index f4770838..9bdd032f 100644
--- a/lib/Processes.groovy
+++ b/lib/Processes.groovy
@@ -8,22 +8,30 @@ class Processes {
public static getRunStages(include, exclude, manual_select, log) {
- // Get default processes
- // NOTE(SW): currently set all except Neo to run by default; Process.NEO excluded to be more concise in code
def processes
+
if (manual_select) {
- processes = []
+ processes = this.getProcessList(manual_select, log)
+
+ if (include || exclude) {
+ log.warning "When manually selecting processes, including/excluding processes is ignored"
+ }
+
} else {
+
+ // Get default processes
processes = Constants.Process.values().toList()
- processes.remove(Constants.Process.NEO)
- }
- def include_list = this.getProcessList(include, log)
- def exclude_list = this.getProcessList(exclude, log)
- this.checkIncludeExcludeList(include_list, exclude_list, log)
+ // NOTE(LN): Disable some processes from running by default
+ Constants.DEFAULT_EXCLUDED_PROCESSES.each {it -> processes.remove(it) }
- processes.addAll(include_list)
- processes.removeAll(exclude_list)
+ def include_list = this.getProcessList(include, log)
+ def exclude_list = this.getProcessList(exclude, log)
+ this.checkIncludeExcludeList(include_list, exclude_list, log)
+
+ processes.addAll(include_list)
+ processes.removeAll(exclude_list)
+ }
return Constants.Process
.values()
diff --git a/lib/Utils.groovy b/lib/Utils.groovy
index e2ef6d4b..cfe40aad 100644
--- a/lib/Utils.groovy
+++ b/lib/Utils.groovy
@@ -8,6 +8,11 @@ class Utils {
public static parseInput(input_fp_str, stub_run, log) {
+ if (!input_fp_str) {
+ log.error "Missing required --input argument"
+ Nextflow.exit(1)
+ }
+
// NOTE(SW): using NF .splitCsv channel operator, hence should be easily interchangable with NF syntax
def input_fp = Utils.getFileObject(input_fp_str)
@@ -28,6 +33,37 @@ class Utils {
meta.subject_id = it.subject_id
}
+ // Info data
+ def info_data = [:]
+ if (it.containsKey('info')) {
+ // Parse
+ it.info
+ .tokenize(';')
+ .each { e ->
+ def (k, v) = e.tokenize(':')
+ def info_field_enum = Utils.getEnumFromString(k, Constants.InfoField)
+
+ if (!info_field_enum) {
+ def info_field_str = Utils.getEnumNames(Constants.InfoField).join('\n - ')
+ log.error "received invalid info field: '${k}'. Valid options are:\n - ${info_field_str}"
+ Nextflow.exit(1)
+ }
+
+ if (info_data.containsKey(info_field_enum)) {
+ log.error "got duplicate info field for ${group_id} ${sample_type_enum}/${sequence_type_enum}: ${info_field_enum}"
+ Nextflow.exit(1)
+ }
+
+ info_data[info_field_enum] = v
+ }
+
+ // Process
+ if (info_data.containsKey(Constants.InfoField.CANCER_TYPE)) {
+ meta[Constants.InfoField.CANCER_TYPE] = info_data[Constants.InfoField.CANCER_TYPE]
+ }
+
+ }
+
// Sample type
def sample_type_enum = Utils.getEnumFromString(it.sample_type, Constants.SampleType)
if (!sample_type_enum) {
@@ -53,49 +89,33 @@ class Utils {
}
def sample_key = [sample_type_enum, sequence_type_enum]
- def meta_sample = meta.get(sample_key, [sample_id: it.sample_id])
+ def meta_sample = meta.get(sample_key, [:])
- if (meta_sample.sample_id != it.sample_id) {
- log.error "got unexpected sample name for ${group_id} ${sample_type_enum}/${sequence_type_enum}: ${it.sample_id}"
- Nextflow.exit(1)
- }
+ if (info_data.containsKey(Constants.InfoField.LONGITUDINAL_SAMPLE)) {
- if (meta_sample.containsKey(filetype_enum) & filetype_enum != Constants.FileType.FASTQ) {
- log.error "got duplicate file for ${group_id} ${sample_type_enum}/${sequence_type_enum}: ${filetype_enum}"
- Nextflow.exit(1)
- }
+ if (meta_sample.containsKey('longitudinal_sample_id') && meta_sample.longitudinal_sample_id != it.sample_id) {
+ log.error "got multiple longitudinal samples for ${group_id} ${sample_type_enum}/${sequence_type_enum}: ${it.sample_id}"
+ Nextflow.exit(1)
+ }
- // Info data
- def info_data = [:]
- if (it.containsKey('info')) {
- // Parse
- it.info
- .tokenize(';')
- .each { e ->
- def (k, v) = e.tokenize(':')
- def info_field_enum = Utils.getEnumFromString(k, Constants.InfoField)
+ meta_sample.longitudinal_sample_id = it.sample_id
- if (!info_field_enum) {
- def info_field_str = Utils.getEnumNames(Constants.InfoField).join('\n - ')
- log.error "received invalid info field: '${k}'. Valid options are:\n - ${info_field_str}"
- Nextflow.exit(1)
- }
+ } else if (meta_sample.containsKey('sample_id') && meta_sample.sample_id != it.sample_id) {
- if (info_data.containsKey(info_field_enum)) {
- log.error "got duplicate info field for ${group_id} ${sample_type_enum}/${sequence_type_enum}: ${info_field_enum}"
- Nextflow.exit(1)
- }
+ log.error "got unexpected sample name for ${group_id} ${sample_type_enum}/${sequence_type_enum}: ${it.sample_id}"
+ Nextflow.exit(1)
- info_data[info_field_enum] = v
- }
+ } else {
- // Process
- if (info_data.containsKey(Constants.InfoField.CANCER_TYPE)) {
- meta[Constants.InfoField.CANCER_TYPE] = info_data[Constants.InfoField.CANCER_TYPE]
- }
+ meta_sample.sample_id = it.sample_id
}
+ // Filetype uniqueness
+ if (meta_sample.containsKey(filetype_enum) & filetype_enum != Constants.FileType.FASTQ) {
+ log.error "got duplicate file for ${group_id} ${sample_type_enum}/${sequence_type_enum}: ${filetype_enum}"
+ Nextflow.exit(1)
+ }
// Handle inputs appropriately
if (filetype_enum === Constants.FileType.FASTQ) {
@@ -110,7 +130,15 @@ class Utils {
Nextflow.exit(1)
}
- def (fwd, rev) = it.filepath.tokenize(';')
+ def fastq_entries = it.filepath.tokenize(';')
+
+ if (fastq_entries.size() != 2) {
+ log.error "expected exactly 2 FASTQ files delimited by ';' (i.e. ';') but found ${fastq_entries.size} " +
+ " files for ${group_id} ${sample_type_enum}/${sequence_type_enum} but found ${fastq_entries.size} files"
+ Nextflow.exit(1)
+ }
+
+ def (fwd, rev) = fastq_entries
def fastq_key = [info_data[Constants.InfoField.LIBRARY_ID], info_data[Constants.InfoField.LANE]]
if (meta_sample.containsKey(fastq_key)) {
@@ -122,7 +150,7 @@ class Utils {
meta_sample[filetype_enum] = [:]
}
- meta_sample[filetype_enum][fastq_key] = ['fwd': fwd, 'rev': rev]
+ meta_sample[filetype_enum][fastq_key] = ['fwd': Utils.getFileObject(fwd), 'rev': Utils.getFileObject(rev)]
} else {
@@ -144,12 +172,12 @@ class Utils {
def index_enum
def index_str
- if (key === Constants.FileType.BAM) {
- index_enum = Constants.FileType.BAI
- index_str = (meta[sample_key][key].toString().endsWith('cram')) ? 'crai' : 'bai'
- } else if (key === Constants.FileType.BAM_REDUX) {
+ if (key === Constants.FileType.BAM || key === Constants.FileType.BAM_REDUX) {
index_enum = Constants.FileType.BAI
index_str = 'bai'
+ } else if (key === Constants.FileType.CRAM || key === Constants.FileType.CRAM_REDUX) {
+ index_enum = Constants.FileType.CRAI
+ index_str = 'crai'
} else if (key === Constants.FileType.ESVEE_VCF) {
index_enum = Constants.FileType.ESVEE_VCF_TBI
index_str = 'tbi'
@@ -174,46 +202,67 @@ class Utils {
}
meta[sample_key][index_enum] = index_fp
+ }
+ }
+
+ // CRAMs are passed to hmftools as if they were BAMs, e.g. `-bam_file /path/to/tumor.cram`
+ // We therefore set the BAM/BAI path to be the CRAM/CRAI path
+ sample_keys.each { sample_key ->
+
+ def meta_sample = meta[sample_key]
+ if (meta_sample.containsKey(Constants.FileType.CRAM_REDUX)) {
+ meta_sample[Constants.FileType.BAM_REDUX] = meta_sample.remove(Constants.FileType.CRAM_REDUX)
}
+
+ if (meta_sample.containsKey(Constants.FileType.CRAM)) {
+ meta_sample[Constants.FileType.BAM] = meta_sample.remove(Constants.FileType.CRAM)
+ }
+
+ // The BAI key is used to store the index for both regular/REDUX CRAMs/BAMs
+ if (meta_sample.containsKey(Constants.FileType.CRAI)) {
+ meta_sample[Constants.FileType.BAI] = meta_sample.remove(Constants.FileType.CRAI)
+ }
+
}
// Check that REDUX TSVs are present
sample_keys.each { sample_key ->
- if(stub_run)
+ if (stub_run) {
return
+ }
def meta_sample = meta[sample_key]
- def sample_id = meta_sample.sample_id
- if(!meta_sample.containsKey(Constants.FileType.BAM_REDUX))
+ if (!meta_sample.containsKey(Constants.FileType.BAM_REDUX)) {
return
-
- if(meta_sample.containsKey(Constants.FileType.BAM)) {
- log.error "${Constants.FileType.BAM} and ${Constants.FileType.BAM_REDUX} provided for sample ${sample_id}. Please only provide one or the other"
- Nextflow.exit(1)
}
def bam_path = meta_sample[Constants.FileType.BAM_REDUX]
def bam_dir = bam_path.getParent().toUriString()
// Get user specified TSV paths
- def jitter_tsv = meta_sample[Constants.FileType.REDUX_JITTER_TSV]
- def ms_tsv = meta_sample[Constants.FileType.REDUX_MS_TSV]
+ def jitter_tsv = meta_sample[Constants.FileType.REDUX_JITTER_TSV]
+ def ms_tsv = meta_sample[Constants.FileType.REDUX_MS_TSV]
// If TSV paths not provided, default to TSV paths in the same dir as the BAM
- jitter_tsv = jitter_tsv ?: "${bam_dir}/${sample_id}.jitter_params.tsv"
- ms_tsv = ms_tsv ?: "${bam_dir}/${sample_id}.ms_table.tsv.gz"
+ def sample_id = meta_sample.getOrDefault('longitudinal_sample_id', meta_sample['sample_id'])
+ jitter_tsv = jitter_tsv ?: "${bam_dir}/${sample_id}.jitter_params.tsv"
+ ms_tsv = ms_tsv ?: "${bam_dir}/${sample_id}.ms_table.tsv.gz"
- jitter_tsv = nextflow.Nextflow.file(jitter_tsv)
- ms_tsv = nextflow.Nextflow.file(ms_tsv)
+ jitter_tsv = nextflow.Nextflow.file(jitter_tsv)
+ ms_tsv = nextflow.Nextflow.file(ms_tsv)
def missing_tsvs = [:]
- if(!jitter_tsv.exists()) missing_tsvs[Constants.FileType.REDUX_JITTER_TSV] = jitter_tsv
- if(!ms_tsv.exists()) missing_tsvs[Constants.FileType.REDUX_MS_TSV] = ms_tsv
+ if (!jitter_tsv.exists()) {
+ missing_tsvs[Constants.FileType.REDUX_JITTER_TSV] = jitter_tsv
+ }
+ if (!ms_tsv.exists()) {
+ missing_tsvs[Constants.FileType.REDUX_MS_TSV] = ms_tsv
+ }
- if(missing_tsvs.size() > 0){
+ if (missing_tsvs.size() > 0) {
def error_message = []
@@ -221,7 +270,8 @@ class Utils {
error_message.add("${bam_path.toUriString()} (${Constants.FileType.BAM_REDUX})")
missing_tsvs.each { error_message.add("${it.value} (missing expected ${it.key})") }
error_message.add("")
- error_message.add("Alternatively, provide the TSV paths in the sample sheet using filetype values: " +
+ error_message.add(
+ "Alternatively, provide the TSV paths in the sample sheet using filetype values: " +
"${Constants.FileType.REDUX_JITTER_TSV}, " +
"${Constants.FileType.REDUX_MS_TSV}"
)
@@ -233,6 +283,18 @@ class Utils {
// Set parsed REDUX TSV paths in metadata object
meta_sample[Constants.FileType.REDUX_JITTER_TSV] = jitter_tsv
meta_sample[Constants.FileType.REDUX_MS_TSV] = ms_tsv
+
+ }
+
+ // For purity estimation with WISP, require primary normal DNA BAM when an AMBER directory is provided
+ def meta_tumor_dna = meta.getOrDefault([Constants.SampleType.TUMOR, Constants.SequenceType.DNA], [:])
+ def longitudinal = meta_tumor_dna.containsKey('longitudinal_sample_id')
+ def has_amber_dir = meta_tumor_dna.containsKey(Constants.FileType.AMBER_DIR)
+ def has_normal_dna_bam = Utils.hasNormalDnaBam(meta) || Utils.hasNormalDnaReduxBam(meta)
+
+ if (longitudinal && has_amber_dir && !has_normal_dna_bam) {
+ log.error "AMBER input was provided without the required primary normal DNA BAM for ${meta.group_id}"
+ Nextflow.exit(1)
}
return meta
@@ -259,7 +321,7 @@ class Utils {
fps << "${params.ref_data_hmf_data_path.replaceAll('/$', '')}/${v}"
}
- if(params.panel !== null) {
+ if (params.panel !== null) {
params.panel_data_paths[params.panel][params.genome_version.toString()]
.each { k, v ->
fps << "${params.ref_data_panel_data_path.replaceAll('/$', '')}/${v}"
@@ -267,11 +329,15 @@ class Utils {
}
fps.each { fp_str ->
- if (fp_str === null) return
+ if (fp_str === null) {
+ return
+ }
def fp = Utils.getFileObject(fp_str)
- if (!fp_str || fp.exists()) return
+ if (!fp_str || fp.exists()) {
+ return
+ }
if (fp_str.endsWith('/')) {
fp.mkdirs()
@@ -303,19 +369,23 @@ class Utils {
def (sample_type, sequence_type) = key
- if (!meta[key].containsKey(Constants.FileType.BAM) &&
+ if (
+ !meta[key].containsKey(Constants.FileType.BAM) &&
!meta[key].containsKey(Constants.FileType.BAM_REDUX) &&
- !meta[key].containsKey(Constants.FileType.FASTQ)) {
+ !meta[key].containsKey(Constants.FileType.CRAM) &&
+ !meta[key].containsKey(Constants.FileType.CRAM_REDUX) &&
+ !meta[key].containsKey(Constants.FileType.FASTQ)
+ ) {
- log.error "no BAMs nor BAM_MARKDUPs nor FASTQs provided for ${meta.group_id} ${sample_type}/${sequence_type}\n\n" +
- "NB: BAMs or BAM_MARKDUPs or FASTQs are always required as they are the basis to determine input sample type."
+ log.error "no BAM/CRAM nor BAM_REDUX/CRAM_REDUX nor FASTQ files provided for ${meta.group_id} ${sample_type}/${sequence_type}\n\n" +
+ "NB: At least one of these files is required as they are the basis to determine input sample type."
Nextflow.exit(1)
}
}
// Do not allow donor sample without normal sample
- if (Utils.hasDonorDna(meta) && ! Utils.hasNormalDna(meta)) {
+ if (Utils.hasDonorDna(meta) && !Utils.hasNormalDna(meta)) {
log.error "a donor sample but not normal sample was found for ${meta.group_id}\n\n" +
"Analysis with a donor sample requires a normal sample."
Nextflow.exit(1)
@@ -326,8 +396,8 @@ class Utils {
// Do not allow donor DNA
if (Utils.hasDonorDna(meta)) {
- log.error "targeted mode is not compatible with the donor DNA BAM provided for ${meta.group_id}\n\n" +
- "The targeted workflow supports only tumor and normal DNA BAMs (and tumor RNA BAMs for TSO500)"
+ log.error "targeted mode is not compatible with the donor DNA BAM/CRAM provided for ${meta.group_id}\n\n" +
+ "The targeted workflow supports only tumor and normal DNA BAM/CRAMs (and tumor RNA BAM/CRAMs for TSO500)"
Nextflow.exit(1)
}
@@ -408,6 +478,15 @@ class Utils {
Nextflow.exit(1)
}
+ // Require --isofox_gene_ids argument to be provided in PANEL_RESOURCE_CREATION when RNA inputs are present
+ if (run_config.mode === Constants.RunMode.PANEL_RESOURCE_CREATION && run_config.has_rna && !params.isofox_gene_ids) {
+ log.error "\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" +
+ " Running the panel resource creation workflow with RNA requires that the\n" +
+ " --isofox_gene_ids argument is set with an appropriate input file.\n" +
+ "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~"
+ Nextflow.exit(1)
+ }
+
}
static public getEnumFromString(s, e) {
@@ -459,8 +538,21 @@ class Utils {
}
// Sample names
+ static public getTumorDnaSampleName(Map named_args, meta) {
+ def meta_sample = getTumorDnaSample(meta)
+ def sample_id
+
+ if (named_args.getOrDefault('primary', false)) {
+ sample_id = meta_sample['sample_id']
+ } else {
+ sample_id = meta_sample.getOrDefault('longitudinal_sample_id', meta_sample['sample_id'])
+ }
+
+ return sample_id
+ }
+
static public getTumorDnaSampleName(meta) {
- return getTumorDnaSample(meta)['sample_id']
+ getTumorDnaSampleName([:], meta)
}
static public getTumorRnaSampleName(meta) {
diff --git a/lib/WorkflowMain.groovy b/lib/WorkflowMain.groovy
index 8cf99e57..dba90270 100755
--- a/lib/WorkflowMain.groovy
+++ b/lib/WorkflowMain.groovy
@@ -16,7 +16,6 @@ class WorkflowMain {
def default_invalid = false
// Set defaults common to all run configuration
-
if (!params.containsKey('genome_version')) {
if (Constants.GENOMES_VERSION_37.contains(params.genome)) {
params.genome_version = '37'
@@ -62,30 +61,39 @@ class WorkflowMain {
return
}
- if (run_mode === Constants.RunMode.TARGETED) {
+ // Attempt to set default panel data path; make no assumption on valid 'panel' value
+ if (run_mode === Constants.RunMode.TARGETED || run_mode === Constants.RunMode.PREPARE_REFERENCE) {
- // Attempt to set default panel data path; make no assumption on valid 'panel' value
if (params.containsKey('panel')) {
- if (params.panel == 'tso500' && params.genome_version.toString() == '37') {
- params.ref_data_panel_data_path = Constants.TSO500_PANEL_37_PATH
- } else if (params.panel == 'tso500' && params.genome_version.toString() == '38') {
- params.ref_data_panel_data_path = Constants.TSO500_PANEL_38_PATH
+
+ if (params.panel == 'tso500') {
+ if (params.genome_version.toString() == '37') {
+ params.ref_data_panel_data_path = Constants.TSO500_PANEL_37_PATH
+ } else if (params.genome_version.toString() == '38') {
+ params.ref_data_panel_data_path = Constants.TSO500_PANEL_38_PATH
+ }
}
+
}
+ }
+
+
+ if (run_mode === Constants.RunMode.TARGETED) {
+
// When fastp UMI is enabled, REDUX UMI should be as well
- if (params.fastp_umi && (!params.containsKey('redux_umi') || !params.redux_umi)) {
- params.redux_umi = true
+ if (params.fastp_umi_enabled && (!params.containsKey('redux_umi_enabled') || !params.redux_umi_enabled)) {
+ params.redux_umi_enabled = true
}
// Set the REDUX UMI duplex delimiter to '_' when the following conditions are met:
// - both fastp and REDUX UMI processing enabled
// - fastp is using a duplex UMI location type (per_index or per_read)
// - no REDUX duplex delimiter has been set
- def fastp_and_redux_umi = params.fastp_umi && params.redux_umi
+ def fastp_and_redux_umi_enabled = params.fastp_umi_enabled && params.redux_umi_enabled
def fastp_duplex_location = params.containsKey('fastp_umi_location') && (params.fastp_umi_location == 'per_index' || params.fastp_umi_location == 'per_read')
def no_umi_duplex_delim = !params.containsKey('redux_umi_duplex_delim') || !params.redux_umi_duplex_delim
- if (fastp_and_redux_umi && fastp_duplex_location && no_umi_duplex_delim) {
+ if (fastp_and_redux_umi_enabled && fastp_duplex_location && no_umi_duplex_delim) {
params.redux_umi_duplex_delim = '_'
}
@@ -193,6 +201,18 @@ class WorkflowMain {
def run_mode = Utils.getRunMode(params.mode, log)
+ if (run_mode === Constants.RunMode.PREPARE_REFERENCE && params.ref_data_types == null) {
+
+ def ref_data_types = Utils.getEnumNames(Constants.RefDataType).join('\n - ')
+
+ log.error "\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" +
+ " CLI argument --ref_data_types is required for mode prepare_reference.\n" +
+ " Please specify one or more of the below valid values (separated by commas)\n" +
+ " - ${ref_data_types}\n" +
+ "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~"
+ Nextflow.exit(1)
+ }
+
if (run_mode === Constants.RunMode.TARGETED) {
if (!params.containsKey('panel') || params.panel === null) {
@@ -200,7 +220,7 @@ class WorkflowMain {
def panels = Constants.PANELS_DEFINED.join('\n - ')
log.error "\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" +
" A panel is required to be set using the --panel CLI argument or in a\n" +
- " configuration file when running in targeted mode.\n" +
+ " configuration file when running in targeted mode or panel resource creation mode.\n" +
" Currently, the available built-in panels are:\n" +
" - ${panels}\n" +
"~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~"
@@ -224,6 +244,30 @@ class WorkflowMain {
}
}
+ if (run_mode === Constants.RunMode.PURITY_ESTIMATE) {
+
+ def purity_estimate_modes = [Constants.RunMode.WGTS, Constants.RunMode.TARGETED]
+
+ def purity_mode_enum = !params.purity_estimate_mode
+ ? null
+ : Utils.getEnumFromString(params.purity_estimate_mode, Constants.RunMode)
+
+ if (!purity_mode_enum || !purity_estimate_modes.contains(purity_mode_enum)) {
+
+ def purity_estimate_modes_str = purity_estimate_modes
+ .collect { e -> e.name().toLowerCase() }
+ .join('\n - ')
+
+ log.error "\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" +
+ " A valid purity estimate run mode must be set using the --purity_estimate_mode\n" +
+ " CLI argument or in a configuration file.\n" +
+ " Currently, the available run modes are:\n" +
+ " - ${purity_estimate_modes_str}\n"
+ "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~"
+ Nextflow.exit(1)
+ }
+ }
+
if (params.ref_data_genome_alt !== null) {
if (params.genome_type != 'alt') {
log.error "\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" +
@@ -247,17 +291,17 @@ class WorkflowMain {
// UMI parameters
def fastp_umi_args_set_any = params.fastp_umi_location || params.fastp_umi_length || params.fastp_umi_skip >= 0
- if (fastp_umi_args_set_any && !params.fastp_umi) {
+ if (fastp_umi_args_set_any && !params.fastp_umi_enabled) {
log.error "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" +
" Detected use of fastp UMI parameters but fastp UMI processing has not been enabled.\n" +
- " Please review your configuration and set the fastp_umi flag or otherwise adjust\n" +
- " accordingly.\n" +
+ " Please review your configuration and set the fastp_umi_enabled flag or otherwise " +
+ " adjust accordingly.\n" +
"~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~"
Nextflow.exit(1)
}
def fastp_umi_args_set_all = params.fastp_umi_location && params.fastp_umi_length && params.fastp_umi_skip >= 0
- if (params.fastp_umi && !fastp_umi_args_set_all) {
+ if (params.fastp_umi_enabled && !fastp_umi_args_set_all) {
log.error "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" +
" Refusing to run fastp UMI processing without having any UMI params configured.\n" +
" Please review your configuration and appropriately set all fastp_umi_* parameters.\n" +
@@ -265,10 +309,10 @@ class WorkflowMain {
Nextflow.exit(1)
}
- if (params.redux_umi_duplex_delim && params.redux_umi === false) {
+ if (params.redux_umi_duplex_delim && params.redux_umi_enabled === false) {
log.error "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" +
" Detected use of REDUX UMI parameters but REDUX UMI processing has not been\n" +
- " enabled. Please review your configuration and set the redux_umi flag or\n" +
+ " enabled. Please review your configuration and set the redux_umi_enabled flag or\n" +
" otherwise adjust accordingly.\n" +
"~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~"
Nextflow.exit(1)
@@ -289,7 +333,6 @@ class WorkflowMain {
return [
mode: run_mode,
- panel: run_mode === Constants.RunMode.TARGETED ? params.panel : null,
stages: stages,
has_dna: inputs.any { Utils.hasTumorDna(it) },
has_rna: inputs.any { Utils.hasTumorRna(it) },
@@ -297,4 +340,98 @@ class WorkflowMain {
has_dna_fastq: inputs.any { Utils.hasTumorDnaFastq(it) || Utils.hasNormalDnaFastq(it) },
]
}
+
+ public static getPrepConfigFromSamplesheet(run_config) {
+ return [
+ prepare_ref_data_only: false,
+
+ require_fasta: true,
+ require_fai: true,
+ require_dict: true,
+ require_img: true,
+
+ require_bwamem2_index: run_config.has_dna_fastq && run_config.stages.alignment,
+ require_star_index: run_config.has_rna_fastq && run_config.stages.alignment,
+
+ require_gridss_index: run_config.has_dna && run_config.mode === Constants.RunMode.WGTS && run_config.stages.virusinterpreter,
+ require_hmftools_data: true,
+ require_panel_data: run_config.mode === Constants.RunMode.TARGETED,
+ ]
+ }
+
+ public static getPrepConfigFromCli(params, log) {
+ def ref_data_types = params.ref_data_types
+ .tokenize(',')
+ .collect {
+ def ref_data_type_enum = Utils.getEnumFromString(it, Constants.RefDataType)
+
+ if (!ref_data_type_enum) {
+ def ref_data_type_str = Utils.getEnumNames(Constants.RefDataType).join('\n - ')
+ log.error "received invalid ref data type: '${it}'. Valid options are:\n - ${ref_data_type_str}"
+ Nextflow.exit(1)
+ }
+
+ return ref_data_type_enum
+ }
+
+ if (
+ ref_data_types.contains(Constants.RefDataType.WGS) ||
+ ref_data_types.contains(Constants.RefDataType.WTS) ||
+ ref_data_types.contains(Constants.RefDataType.TARGETED)
+ ) {
+ ref_data_types += [
+ Constants.RefDataType.FASTA,
+ Constants.RefDataType.FAI,
+ Constants.RefDataType.DICT,
+ Constants.RefDataType.IMG,
+ Constants.RefDataType.HMFTOOLS
+ ]
+ }
+
+ if (ref_data_types.contains(Constants.RefDataType.WGS)) {
+ ref_data_types += [Constants.RefDataType.GRIDSS_INDEX]
+ }
+
+ if (ref_data_types.contains(Constants.RefDataType.TARGETED)) {
+ ref_data_types += [Constants.RefDataType.PANEL]
+ }
+
+ def require_fasta = ref_data_types.contains(Constants.RefDataType.FASTA)
+ def require_fai = ref_data_types.contains(Constants.RefDataType.FAI)
+ def require_dict = ref_data_types.contains(Constants.RefDataType.DICT)
+ def require_img = ref_data_types.contains(Constants.RefDataType.IMG)
+
+ def require_bwamem2_index = ref_data_types.contains(Constants.RefDataType.BWAMEM2_INDEX) || ref_data_types.contains(Constants.RefDataType.DNA_ALIGNMENT)
+ def require_star_index = ref_data_types.contains(Constants.RefDataType.STAR_INDEX) || ref_data_types.contains(Constants.RefDataType.RNA_ALIGNMENT)
+
+ def require_gridss_index = ref_data_types.contains(Constants.RefDataType.GRIDSS_INDEX)
+ def require_hmftools_data = ref_data_types.contains(Constants.RefDataType.HMFTOOLS)
+ def require_panel_data = ref_data_types.contains(Constants.RefDataType.PANEL)
+
+ if (require_panel_data) {
+ if (params.panel == null) {
+ require_panel_data = false
+ log.warn "Skipping preparing panel specific reference data as --panel CLI argument was not provided"
+ } else if (!Constants.PANELS_DEFINED.contains(params.panel)) {
+ require_panel_data = false
+ log.warn "Skipping preparing panel specific reference data for custom panel: ${params.panel}"
+ }
+ }
+
+ return [
+ prepare_ref_data_only: true,
+
+ require_fasta: require_fasta,
+ require_fai: require_fai,
+ require_dict: require_dict,
+ require_img: require_img,
+
+ require_bwamem2_index: require_bwamem2_index,
+ require_star_index: require_star_index,
+
+ require_gridss_index: require_gridss_index,
+ require_hmftools_data: require_hmftools_data,
+ require_panel_data: require_panel_data,
+ ]
+ }
}
diff --git a/main.nf b/main.nf
index facb872d..e1e528c1 100644
--- a/main.nf
+++ b/main.nf
@@ -58,8 +58,11 @@ if (workflow.stubRun && params.create_stub_placeholders) {
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
*/
-include { TARGETED } from './workflows/targeted'
-include { WGTS } from './workflows/wgts'
+include { PANEL_RESOURCE_CREATION } from './workflows/panel_resource_creation'
+include { PREPARE_REFERENCE } from './workflows/prepare_reference'
+include { PURITY_ESTIMATE } from './workflows/purity_estimate'
+include { TARGETED } from './workflows/targeted'
+include { WGTS } from './workflows/wgts'
/*
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -70,17 +73,35 @@ include { WGTS } from './workflows/wgts'
//
// WORKFLOW: Run main analysis pipeline depending on type of input
//
-run_mode = Utils.getRunMode(params.mode, log)
workflow NFCORE_ONCOANALYSER {
- if (run_mode === Constants.RunMode.WGTS) {
- WGTS()
- } else if (run_mode === Constants.RunMode.TARGETED) {
- TARGETED()
+ // Get run mode
+ run_mode = Utils.getRunMode(params.mode, log)
+
+ // Run selected workflow
+ // NOTE(SW): prepare reference is checked early as params.input is not required
+ if (run_mode === Constants.RunMode.PREPARE_REFERENCE) {
+ PREPARE_REFERENCE()
} else {
- log.error("received bad run mode: ${run_mode}")
- Nextflow.exit(1)
+ // Parse and validate inputs
+ inputs = Utils.parseInput(params.input, workflow.stubRun, log)
+ run_config = WorkflowMain.getRunConfig(params, inputs, log)
+ Utils.validateInput(inputs, run_config, params, log)
+
+ // Run requested workflow
+ if (run_mode === Constants.RunMode.WGTS) {
+ WGTS(inputs, run_config)
+ } else if (run_mode === Constants.RunMode.TARGETED) {
+ TARGETED(inputs, run_config)
+ } else if (run_mode === Constants.RunMode.PURITY_ESTIMATE) {
+ PURITY_ESTIMATE(inputs, run_config)
+ } else if (run_mode === Constants.RunMode.PANEL_RESOURCE_CREATION) {
+ PANEL_RESOURCE_CREATION(inputs, run_config)
+ } else {
+ log.error("received bad run mode: ${run_mode}")
+ Nextflow.exit(1)
+ }
}
}
diff --git a/modules/local/amber/environment.yml b/modules/local/amber/environment.yml
index 7a77f054..42a3e5d5 100644
--- a/modules/local/amber/environment.yml
+++ b/modules/local/amber/environment.yml
@@ -4,4 +4,4 @@ channels:
- bioconda
- defaults
dependencies:
- - bioconda::hmftools-amber=4.1.1
+ - bioconda::hmftools-amber=4.2
diff --git a/modules/local/amber/main.nf b/modules/local/amber/main.nf
index aa2e314c..b75b047c 100644
--- a/modules/local/amber/main.nf
+++ b/modules/local/amber/main.nf
@@ -4,18 +4,20 @@ process AMBER {
conda "${moduleDir}/environment.yml"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
- 'https://depot.galaxyproject.org/singularity/hmftools-amber:4.1.1--hdfd78af_0' :
- 'biocontainers/hmftools-amber:4.1.1--hdfd78af_0' }"
+ 'https://depot.galaxyproject.org/singularity/hmftools-amber:4.2--hdfd78af_0' :
+ 'biocontainers/hmftools-amber:4.2--hdfd78af_0' }"
input:
tuple val(meta), path(tumor_bam), path(normal_bam), path(donor_bam), path(tumor_bai), path(normal_bai), path(donor_bai)
val genome_ver
path heterozygous_sites
- path target_region_bed
+ path target_regions_bed
+ val tumor_min_depth
output:
tuple val(meta), path('amber/'), emit: amber_dir
path 'versions.yml' , emit: versions
+ path '.command.*' , emit: command_files
when:
task.ext.when == null || task.ext.when
@@ -25,17 +27,21 @@ process AMBER {
def xmx_mod = task.ext.xmx_mod ?: 0.75
+ def log_level_arg = task.ext.log_level ? "-log_level ${task.ext.log_level}" : ''
+
def reference_ids = []
- if (meta.normal_id != null) reference_ids.add(meta.normal_id)
- if (meta.donor_id != null) reference_ids.add(meta.donor_id)
+ if (meta.normal_id != null) { reference_ids.add(meta.normal_id) }
+ if (meta.donor_id != null) { reference_ids.add(meta.donor_id) }
def reference_arg = reference_ids.size() > 0 ? "-reference ${String.join(",", reference_ids)}" : ''
def reference_bams = []
- if (normal_bam) reference_bams.add(normal_bam.toString())
- if (donor_bam) reference_bams.add(donor_bam.toString())
+ if (normal_bam) { reference_bams.add(normal_bam.toString()) }
+ if (donor_bam) { reference_bams.add(donor_bam.toString()) }
def reference_bam_arg = reference_bams.size() > 0 ? "-reference_bam ${String.join(",", reference_bams)}" : ''
- def target_regions_bed_arg = target_region_bed ? "-target_regions_bed ${target_region_bed}" : ''
+ def target_regions_bed_arg = target_regions_bed ? "-target_regions_bed ${target_regions_bed}" : ''
+
+ def tumor_min_depth_arg = tumor_min_depth ? "-tumor_min_depth ${tumor_min_depth}" : ''
"""
amber \\
@@ -45,9 +51,11 @@ process AMBER {
-tumor_bam ${tumor_bam} \\
${reference_arg} \\
${reference_bam_arg} \\
- ${target_regions_bed_arg} \\
-ref_genome_version ${genome_ver} \\
+ ${target_regions_bed_arg} \\
-loci ${heterozygous_sites} \\
+ ${tumor_min_depth_arg} \\
+ ${log_level_arg} \\
-threads ${task.cpus} \\
-output_dir amber/
@@ -60,6 +68,7 @@ process AMBER {
stub:
"""
mkdir -p amber/
+
touch amber/placeholder
echo -e '${task.process}:\\n stub: noversions\\n' > versions.yml
diff --git a/modules/local/amber/meta.yml b/modules/local/amber/meta.yml
index 476550ea..72601e98 100644
--- a/modules/local/amber/meta.yml
+++ b/modules/local/amber/meta.yml
@@ -14,7 +14,7 @@ input:
type: map
description: |
Groovy Map containing sample information
- e.g. [id: 'sample_id', tumor_id: 'tumor_name', normal_id: 'normal_name']
+ e.g. [id: 'sample_id']
- tumor_bam:
type: file
description: Tumor BAM file
@@ -46,22 +46,29 @@ input:
type: file
description: AMBER heterozygous sites file
pattern: "*.{vcf.gz}"
- - target_region_bed:
+ - target_regions_bed:
type: file
- description: Target region BED file (optional)
+ description: Target regions BED file (optional)
pattern: "*.{bed}"
+ - tumor_min_depth:
+ type: string
+ description: Minimum depth for a site to be considered
output:
- meta:
type: map
description: |
Groovy Map containing sample information
- e.g. [id: 'sample_id', tumor_id: 'tumor_name', normal_id: 'normal_name']
+ e.g. [id: 'sample_id']
- amber_dir:
- type: directory
+ file: directory
description: AMBER output directory
+ pattern: "versions.yml"
- versions:
type: file
description: File containing software versions
pattern: "versions.yml"
+ - command_files:
+ type: list
+ description: List of command files
authors:
- "@scwatts"
diff --git a/modules/local/bamtools/environment.yml b/modules/local/bamtools/environment.yml
index f340a260..bb739b9c 100644
--- a/modules/local/bamtools/environment.yml
+++ b/modules/local/bamtools/environment.yml
@@ -4,4 +4,4 @@ channels:
- bioconda
- defaults
dependencies:
- - bioconda::hmftools-bam-tools=1.3
+ - bioconda::hmftools-bam-tools=1.4.2
diff --git a/modules/local/bamtools/main.nf b/modules/local/bamtools/main.nf
index 02e9c9dc..226f0c9a 100644
--- a/modules/local/bamtools/main.nf
+++ b/modules/local/bamtools/main.nf
@@ -4,17 +4,20 @@ process BAMTOOLS {
conda "${moduleDir}/environment.yml"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
- 'https://depot.galaxyproject.org/singularity/hmftools-bam-tools:1.3--hdfd78af_0' :
- 'biocontainers/hmftools-bam-tools:1.3--hdfd78af_0' }"
+ 'https://depot.galaxyproject.org/singularity/hmftools-bam-tools:1.4.2--hdfd78af_0' :
+ 'biocontainers/hmftools-bam-tools:1.4.2--hdfd78af_0' }"
input:
tuple val(meta), path(bam), path(bai)
path genome_fasta
val genome_ver
+ path driver_gene_panel
+ path ensembl_data_resources
output:
tuple val(meta), path("${meta.id}_bamtools/"), emit: metrics_dir
path 'versions.yml' , emit: versions
+ path '.command.*' , emit: command_files
when:
task.ext.when == null || task.ext.when
@@ -24,6 +27,8 @@ process BAMTOOLS {
def xmx_mod = task.ext.xmx_mod ?: 0.75
+ def log_level_arg = task.ext.log_level ? "-log_level ${task.ext.log_level}" : ''
+
"""
mkdir -p ${meta.id}_bamtools/
@@ -35,8 +40,10 @@ process BAMTOOLS {
-bam_file ${bam} \\
-ref_genome ${genome_fasta} \\
-ref_genome_version ${genome_ver} \\
+ -driver_gene_panel ${driver_gene_panel} \\
+ -ensembl_data_dir ${ensembl_data_resources} \\
+ ${log_level_arg} \\
-threads ${task.cpus} \\
- -log_level INFO \\
-output_dir ${meta.id}_bamtools/
cat <<-END_VERSIONS > versions.yml
diff --git a/modules/local/bamtools/meta.yml b/modules/local/bamtools/meta.yml
index 69dda01b..63c9368b 100644
--- a/modules/local/bamtools/meta.yml
+++ b/modules/local/bamtools/meta.yml
@@ -29,6 +29,13 @@ input:
- genome_ver:
type: string
description: Reference genome version
+ - driver_gene_panel:
+ type: file
+ description: Driver gene panel file
+ pattern: "*.{tsv}"
+ - ensembl_data_resources:
+ type: directory
+ description: HMF ensembl data resources directory
output:
- meta:
type: map
@@ -42,5 +49,8 @@ output:
type: file
description: File containing software versions
pattern: "versions.yml"
+ - command_files:
+ type: list
+ description: List of command files
authors:
- "@scwatts"
diff --git a/modules/local/bwa-mem2/mem/environment.yml b/modules/local/bwa-mem2/mem/environment.yml
index 7fc9ecd7..e6e35644 100644
--- a/modules/local/bwa-mem2/mem/environment.yml
+++ b/modules/local/bwa-mem2/mem/environment.yml
@@ -4,6 +4,6 @@ channels:
- bioconda
- defaults
dependencies:
- - bioconda::hmftools-bwa-plus=1.0.0
+ - bioconda::bwa-mem2=2.3
- bioconda::samtools=1.21
- bioconda::sambamba=1.0.1
diff --git a/modules/local/bwa-mem2/mem/main.nf b/modules/local/bwa-mem2/mem/main.nf
index ae70e0b5..bd6a30f3 100644
--- a/modules/local/bwa-mem2/mem/main.nf
+++ b/modules/local/bwa-mem2/mem/main.nf
@@ -4,8 +4,8 @@ process BWAMEM2_ALIGN {
conda "${moduleDir}/environment.yml"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
- 'https://depot.galaxyproject.org/singularity/hmftools-bwa-plus:1.0.0--h077b44d_0' :
- 'biocontainers/hmftools-bwa-plus:1.0.0--h077b44d_0' }"
+ 'https://depot.galaxyproject.org/singularity/mulled-v2-4dde50190ae599f2bb2027cb2c8763ea00fb5084:596c0d6a494faa218562f2be03af2714d454da4f-0' :
+ 'biocontainers/mulled-v2-4dde50190ae599f2bb2027cb2c8763ea00fb5084:596c0d6a494faa218562f2be03af2714d454da4f-0' }"
input:
tuple val(meta), path(reads_fwd), path(reads_rev)
@@ -15,6 +15,7 @@ process BWAMEM2_ALIGN {
output:
tuple val(meta), path('*.bam'), path('*.bai'), emit: bam
path 'versions.yml' , emit: versions
+ path '.command.*' , emit: command_files
when:
task.ext.when == null || task.ext.when
@@ -30,7 +31,7 @@ process BWAMEM2_ALIGN {
"""
ln -fs \$(find -L ${genome_bwamem2_index} -type f) ./
- bwa-plus mem \\
+ bwa-mem2 mem \\
${args} \\
-Y \\
-K 100000000 \\
@@ -54,9 +55,10 @@ process BWAMEM2_ALIGN {
--out ${output_fn} \\
/dev/stdin
+ # NOTE(SW): bwa-mem2 version hardcoded as 2.3 reports the wrong version, see https://github.com/bwa-mem2/bwa-mem2/issues/276
cat <<-END_VERSIONS > versions.yml
"${task.process}":
- bwa-plus: \$(bwa-plus version 2>/dev/null)
+ bwa-mem2: 2.3
sambamba: \$(sambamba --version 2>&1 | sed -n '/^sambamba / { s/^.* //p }' | head -n1)
END_VERSIONS
"""
diff --git a/modules/local/bwa-mem2/mem/meta.yml b/modules/local/bwa-mem2/mem/meta.yml
index da61d816..305a01a8 100644
--- a/modules/local/bwa-mem2/mem/meta.yml
+++ b/modules/local/bwa-mem2/mem/meta.yml
@@ -16,7 +16,7 @@ input:
type: map
description: |
Groovy Map containing sample information
- e.g. [id: 'sample_id', tumor_id: 'tumor_name', normal_id: 'normal_name']
+ e.g. [id: 'sample_id']
- reads_fwd:
type: file
description: Forward reads FASTQ file
@@ -37,7 +37,7 @@ output:
type: map
description: |
Groovy Map containing sample information
- e.g. [id: 'sample_id', tumor_id: 'tumor_name', normal_id: 'normal_name']
+ e.g. [id: 'sample_id']
- bam:
type: list
description: BAM and BAI file
@@ -46,6 +46,9 @@ output:
type: file
description: File containing software versions
pattern: "versions.yml"
+ - command_files:
+ type: list
+ description: List of command files
authors:
- "@scwatts"
- "@mkcmkc"
diff --git a/modules/local/chord/environment.yml b/modules/local/chord/environment.yml
index cd5b4cb1..6e71ad2b 100644
--- a/modules/local/chord/environment.yml
+++ b/modules/local/chord/environment.yml
@@ -4,4 +4,4 @@ channels:
- bioconda
- defaults
dependencies:
- - bioconda::hmftools-chord=2.1.0
+ - bioconda::hmftools-chord=2.1.2
diff --git a/modules/local/chord/main.nf b/modules/local/chord/main.nf
index 2f13c91f..0f6ab506 100644
--- a/modules/local/chord/main.nf
+++ b/modules/local/chord/main.nf
@@ -4,8 +4,8 @@ process CHORD {
conda "${moduleDir}/environment.yml"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
- 'https://depot.galaxyproject.org/singularity/hmftools-chord:2.1.0--hdfd78af_0' :
- 'biocontainers/hmftools-chord:2.1.0--hdfd78af_0' }"
+ 'https://depot.galaxyproject.org/singularity/hmftools-chord:2.1.2--hdfd78af_0' :
+ 'biocontainers/hmftools-chord:2.1.2--hdfd78af_0' }"
input:
tuple val(meta), path(smlv_vcf), path(sv_vcf)
@@ -16,6 +16,7 @@ process CHORD {
output:
tuple val(meta), path('chord/'), emit: chord_dir
path 'versions.yml' , emit: versions
+ path '.command.*' , emit: command_files
when:
task.ext.when == null || task.ext.when
@@ -25,6 +26,8 @@ process CHORD {
def xmx_mod = task.ext.xmx_mod ?: 0.95
+ def log_level_arg = task.ext.log_level ? "-log_level ${task.ext.log_level}" : ''
+
"""
## NOTE(LN): The CHORD jar runs an embedded R script using 'com.hartwig.hmftools.common.utils.r.RExecutor' which requires absolute
## paths. Relative paths don't work because RExecutor executes from a tmp dir, and not the working dir of this nextflow process
@@ -37,20 +40,20 @@ process CHORD {
-sample ${meta.sample_id} \\
-snv_indel_vcf_file \$(realpath ${smlv_vcf}) \\
-sv_vcf_file \$(realpath ${sv_vcf}) \\
- -output_dir \$(realpath chord/) \\
-ref_genome ${genome_fasta} \\
- -log_level DEBUG
+ ${log_level_arg} \\
+ -output_dir \$(realpath chord/)
cat <<-END_VERSIONS > versions.yml
"${task.process}":
chord: \$(chord -version | sed -n '/^CHORD version/ { s/^.* //p }')
END_VERSIONS
-
"""
stub:
"""
mkdir -p chord/
+
touch chord/${meta.sample_id}.chord.mutation_contexts.tsv
touch chord/${meta.sample_id}.chord.prediction.tsv
diff --git a/modules/local/chord/meta.yml b/modules/local/chord/meta.yml
index 6e92da8f..49457be0 100644
--- a/modules/local/chord/meta.yml
+++ b/modules/local/chord/meta.yml
@@ -35,9 +35,6 @@ input:
type: file
description: Reference genome assembly dict file
pattern: "*.{dict}"
- - genome_ver:
- type: string
- description: Reference genome version
output:
- meta:
type: map
@@ -51,5 +48,8 @@ output:
type: file
description: File containing software versions
pattern: "versions.yml"
+ - command_files:
+ type: list
+ description: List of command files
authors:
- "@scwatts"
diff --git a/modules/local/cider/main.nf b/modules/local/cider/main.nf
index 7c169b57..c51d24ee 100644
--- a/modules/local/cider/main.nf
+++ b/modules/local/cider/main.nf
@@ -15,6 +15,7 @@ process CIDER {
output:
tuple val(meta), path('cider/*'), emit: cider_dir
path 'versions.yml' , emit: versions
+ path '.command.*' , emit: command_files
when:
task.ext.when == null || task.ext.when
@@ -24,6 +25,8 @@ process CIDER {
def xmx_mod = task.ext.xmx_mod ?: 0.75
+ def log_level_arg = task.ext.log_level ? "-log_level ${task.ext.log_level}" : ''
+
"""
cider \\
-Xmx${Math.round(task.memory.bytes * xmx_mod)} \\
@@ -31,11 +34,12 @@ process CIDER {
${args} \\
-sample ${meta.sample_id} \\
-bam ${bam} \\
+ -ref_genome_version ${genome_ver} \\
-blast \$(which blastn | sed 's#/bin/blastn##') \\
-blast_db ${human_blastdb} \\
- -ref_genome_version ${genome_ver} \\
- -threads ${task.cpus} \\
-write_cider_bam \\
+ -threads ${task.cpus} \\
+ ${log_level_arg} \\
-output_dir cider/
cat <<-END_VERSIONS > versions.yml
diff --git a/modules/local/cider/meta.yml b/modules/local/cider/meta.yml
index 8c0dc0d5..08967c8e 100644
--- a/modules/local/cider/meta.yml
+++ b/modules/local/cider/meta.yml
@@ -15,7 +15,7 @@ input:
type: map
description: |
Groovy Map containing sample information
- e.g. [id: 'sample_id', tumor_id: 'tumor_name', normal_id: 'normal_name']
+ e.g. [id: 'sample_id']
- bam:
type: file
description: BAM file
@@ -35,7 +35,7 @@ output:
type: map
description: |
Groovy Map containing sample information
- e.g. [id: 'sample_id', tumor_id: 'tumor_name', normal_id: 'normal_name']
+ e.g. [id: 'sample_id']
- cider_dir:
type: directory
description: CIDER output directory
@@ -43,5 +43,8 @@ output:
type: file
description: File containing software versions
pattern: "versions.yml"
+ - command_files:
+ type: list
+ description: List of command files
authors:
- "@scwatts"
diff --git a/modules/local/cobalt/panel_normalisation/environment.yml b/modules/local/cobalt/panel_normalisation/environment.yml
new file mode 100644
index 00000000..c7a860fb
--- /dev/null
+++ b/modules/local/cobalt/panel_normalisation/environment.yml
@@ -0,0 +1,7 @@
+name: cobalt_panel_normalisation
+channels:
+ - conda-forge
+ - bioconda
+ - defaults
+dependencies:
+ - bioconda::hmftools-cobalt=2.1
diff --git a/modules/local/cobalt/panel_normalisation/main.nf b/modules/local/cobalt/panel_normalisation/main.nf
new file mode 100644
index 00000000..659d9490
--- /dev/null
+++ b/modules/local/cobalt/panel_normalisation/main.nf
@@ -0,0 +1,65 @@
+process COBALT_PANEL_NORMALISATION {
+ label 'process_single'
+
+ conda "${moduleDir}/environment.yml"
+ container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+ 'https://depot.galaxyproject.org/singularity/hmftools-cobalt:2.1--hdfd78af_1' :
+ 'biocontainers/hmftools-cobalt:2.1--hdfd78af_1' }"
+
+ input:
+ tuple path('amber_dir.*'), path('cobalt_dir.*')
+ val genome_ver
+ path gc_profile
+ path target_regions_bed
+
+ output:
+ path 'cobalt.region_normalisation.*.tsv', emit: cobalt_normalisation
+ path 'versions.yml' , emit: versions
+ path '.command.*' , emit: command_files
+
+ when:
+ task.ext.when == null || task.ext.when
+
+ script:
+ def args = task.ext.args ?: ''
+
+ def log_level_arg = task.ext.log_level ? "-log_level ${task.ext.log_level}" : ''
+
+ """
+ mkdir -p inputs/
+
+ for fp in \$(find -L amber_dir.* cobalt_dir.* -type f ! -name '*.version'); do
+ ln -sf ../\${fp} inputs/\${fp##*/};
+ done
+
+ (
+ echo SampleId
+ basename -s .amber.baf.tsv.gz -a inputs/*.amber.baf.tsv.gz
+ ) > sample_ids.txt
+
+ cobalt \\
+ -Xmx${Math.round(task.memory.bytes * 0.95)} \\
+ com.hartwig.hmftools.cobalt.norm.NormalisationFileBuilder \\
+ ${args} \\
+ -sample_id_file sample_ids.txt \\
+ -amber_dir inputs/ \\
+ -cobalt_dir inputs/ \\
+ -ref_genome_version ${genome_ver} \\
+ -gc_profile ${gc_profile} \\
+ -target_regions_bed ${target_regions_bed} \\
+ ${log_level_arg} \\
+ -output_file cobalt.region_normalisation.${genome_ver}.tsv
+
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ cobalt_panel_normalisation: \$(cobalt -version | sed 's/^.* //')
+ END_VERSIONS
+ """
+
+ stub:
+ """
+ touch cobalt.region_normalisation.${genome_ver}.tsv
+
+ echo -e '${task.process}:\\n stub: noversions\\n' > versions.yml
+ """
+}
diff --git a/modules/local/cobalt/panel_normalisation/meta.yml b/modules/local/cobalt/panel_normalisation/meta.yml
new file mode 100644
index 00000000..166085fc
--- /dev/null
+++ b/modules/local/cobalt/panel_normalisation/meta.yml
@@ -0,0 +1,44 @@
+name: cobalt_panel_normalisation
+description: Count bam lines determines the read depth ratios of the supplied tumor and reference genomes
+keywords:
+ - cobalt
+ - read depth ratios
+ - cnv
+tools:
+ - cobalt:
+ description: Count bam lines determines the read depth ratios of the supplied tumor and reference genomes.
+ homepage: https://github.com/hartwigmedical/hmftools/tree/master/cobalt
+ documentation: https://github.com/hartwigmedical/hmftools/tree/master/cobalt
+ licence: ["GPL v3"]
+input:
+ - amber_dirs:
+ type: directory
+ description: List of AMBER output directories
+ - cobalt_dirs:
+ type: directory
+ description: List of COBALT output directories
+ - genome_ver:
+ type: string
+ description: Reference genome version
+ - gc_profile:
+ type: file
+ description: GC profile file
+ pattern: "*.{cnp}"
+ - target_regions_bed:
+ type: file
+ description: Target regions BED file
+ pattern: "*.{bed}"
+output:
+ - cobalt_normalisation:
+ type: file
+ description: COBALT normalisation file
+ pattern: "*.{tsv}"
+ - versions:
+ type: file
+ description: File containing software versions
+ pattern: "versions.yml"
+ - command_files:
+ type: list
+ description: List of command files
+authors:
+ - "@scwatts"
diff --git a/modules/local/esvee/call/environment.yml b/modules/local/cobalt/run/environment.yml
similarity index 55%
rename from modules/local/esvee/call/environment.yml
rename to modules/local/cobalt/run/environment.yml
index 01355c11..80e8b458 100644
--- a/modules/local/esvee/call/environment.yml
+++ b/modules/local/cobalt/run/environment.yml
@@ -1,7 +1,7 @@
-name: esvee_call
+name: cobalt_run
channels:
- conda-forge
- bioconda
- defaults
dependencies:
- - bioconda::hmftools-esvee=1.0.3
+ - bioconda::hmftools-cobalt=2.1
diff --git a/modules/local/cobalt/main.nf b/modules/local/cobalt/run/main.nf
similarity index 65%
rename from modules/local/cobalt/main.nf
rename to modules/local/cobalt/run/main.nf
index c4bbc8e6..03541aa0 100644
--- a/modules/local/cobalt/main.nf
+++ b/modules/local/cobalt/run/main.nf
@@ -4,18 +4,20 @@ process COBALT {
conda "${moduleDir}/environment.yml"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
- 'https://depot.galaxyproject.org/singularity/hmftools-cobalt:2.0--hdfd78af_0' :
- 'biocontainers/hmftools-cobalt:2.0--hdfd78af_0' }"
+ 'https://depot.galaxyproject.org/singularity/hmftools-cobalt:2.1--hdfd78af_1' :
+ 'biocontainers/hmftools-cobalt:2.1--hdfd78af_1' }"
input:
tuple val(meta), path(tumor_bam), path(normal_bam), path(tumor_bai), path(normal_bai)
path gc_profile
path diploid_regions
path target_region_normalisation
+ val targeted_mode
output:
tuple val(meta), path('cobalt/'), emit: cobalt_dir
path 'versions.yml' , emit: versions
+ path '.command.*' , emit: command_files
when:
task.ext.when == null || task.ext.when
@@ -25,15 +27,18 @@ process COBALT {
def xmx_mod = task.ext.xmx_mod ?: 0.75
+ def log_level_arg = task.ext.log_level ? "-log_level ${task.ext.log_level}" : ''
+
def reference_arg = meta.containsKey('normal_id') ? "-reference ${meta.normal_id}" : ''
def reference_bam_arg = normal_bam ? "-reference_bam ${normal_bam}" : ''
- def diploid_regions_arg = diploid_regions ? "-tumor_only_diploid_bed ${diploid_regions}" : ''
- def target_region_arg = target_region_normalisation ? "-target_region ${target_region_normalisation}" : ''
+ def target_region_norm_file_arg = target_region_normalisation ? "-target_region_norm_file ${target_region_normalisation}" : ''
+
+ def tumor_only_mode = !meta.containsKey('normal_id')
- def run_mode = Utils.getEnumFromString(params.mode, Constants.RunMode)
- def pcf_gamma_arg = run_mode === Constants.RunMode.TARGETED && !meta.containsKey('normal_id')
- ? "-pcf_gamma 50" : ""
+ def pcf_gamma_arg = targeted_mode && tumor_only_mode ? '-pcf_gamma 50' : ''
+
+ def diploid_regions_arg = !targeted_mode && tumor_only_mode ? "-tumor_only_diploid_bed ${diploid_regions}" : ''
"""
cobalt \\
@@ -41,24 +46,26 @@ process COBALT {
${args} \\
-tumor ${meta.tumor_id} \\
-tumor_bam ${tumor_bam} \\
+ ${pcf_gamma_arg} \\
${reference_arg} \\
${reference_bam_arg} \\
- -threads ${task.cpus} \\
-gc_profile ${gc_profile} \\
${diploid_regions_arg} \\
- ${target_region_arg} \\
- ${pcf_gamma_arg} \\
+ ${target_region_norm_file_arg} \\
+ ${log_level_arg} \\
+ -threads ${task.cpus} \\
-output_dir cobalt/
cat <<-END_VERSIONS > versions.yml
"${task.process}":
- cobalt: \$(cobalt -version | sed -n '/^Cobalt version/ { s/^.* //p }')
+ cobalt_run: \$(cobalt -version | sed -n '/^Cobalt version/ { s/^.* //p }')
END_VERSIONS
"""
stub:
"""
mkdir -p cobalt/
+
touch cobalt/placeholder
echo -e '${task.process}:\\n stub: noversions\\n' > versions.yml
diff --git a/modules/local/cobalt/meta.yml b/modules/local/cobalt/run/meta.yml
similarity index 86%
rename from modules/local/cobalt/meta.yml
rename to modules/local/cobalt/run/meta.yml
index 61812410..d9fe3b76 100644
--- a/modules/local/cobalt/meta.yml
+++ b/modules/local/cobalt/run/meta.yml
@@ -1,4 +1,4 @@
-name: cobalt
+name: cobalt_run
description: Count bam lines determines the read depth ratios of the supplied tumor and reference genomes
keywords:
- cobalt
@@ -15,7 +15,7 @@ input:
type: map
description: |
Groovy Map containing sample information
- e.g. [id: 'sample_id', tumor_id: 'tumor_name', normal_id: 'normal_name']
+ e.g. [id: 'sample_id']
- tumor_bam:
type: file
description: Tumor BAM file
@@ -44,12 +44,15 @@ input:
type: file
description: Normalisation file (optional)
pattern: "*.{tsv}"
+ - targeted_mode:
+ type: boolean
+ description: Flag indicating whether targeted mode is set
output:
- meta:
type: map
description: |
Groovy Map containing sample information
- e.g. [id: 'sample_id', tumor_id: 'tumor_name', normal_id: 'normal_name']
+ e.g. [id: 'sample_id']
- cobalt_dir:
type: directory
description: COBALT output directory
@@ -57,5 +60,8 @@ output:
type: file
description: File containing software versions
pattern: "versions.yml"
+ - command_files:
+ type: list
+ description: List of command files
authors:
- "@scwatts"
diff --git a/modules/local/cuppa/environment.yml b/modules/local/cuppa/environment.yml
index d6280c19..68842773 100644
--- a/modules/local/cuppa/environment.yml
+++ b/modules/local/cuppa/environment.yml
@@ -5,3 +5,5 @@ channels:
- defaults
dependencies:
- bioconda::hmftools-cuppa=2.3.2
+ - conda-forge::r-stringr>=1.5
+ - conda-forge::r-stringi>=1.8
diff --git a/modules/local/cuppa/main.nf b/modules/local/cuppa/main.nf
index 3a4ab7ec..c21fd7bc 100644
--- a/modules/local/cuppa/main.nf
+++ b/modules/local/cuppa/main.nf
@@ -17,6 +17,7 @@ process CUPPA {
output:
tuple val(meta), path('cuppa/'), emit: cuppa_dir
path 'versions.yml' , emit: versions
+ path '.command.*' , emit: command_files
when:
task.ext.when == null || task.ext.when
@@ -27,6 +28,8 @@ process CUPPA {
def xmx_mod = task.ext.xmx_mod ?: 0.75
+ def log_level_arg = task.ext.log_level ? "-log_level ${task.ext.log_level}" : ''
+
def isofox_dir_name = categories == 'ALL' ? 'isofox_dir__prepared' : isofox_dir
def isofox_dir_arg = isofox_dir ? "-isofox_dir ${isofox_dir_name}" : ''
def ref_alt_sj_sites_arg = isofox_dir ? "-ref_alt_sj_sites ${cuppa_alt_sj}" : ''
@@ -57,8 +60,9 @@ process CUPPA {
${linx_dir_arg} \\
${virusinterpreter_dir_arg} \\
${isofox_dir_arg} \\
- ${ref_alt_sj_sites_arg} \\
-ref_genome_version ${genome_ver} \\
+ ${ref_alt_sj_sites_arg} \\
+ ${log_level_arg} \\
-output_dir cuppa/
# Make predictions
diff --git a/modules/local/cuppa/meta.yml b/modules/local/cuppa/meta.yml
index 58e6549a..281b1d6e 100644
--- a/modules/local/cuppa/meta.yml
+++ b/modules/local/cuppa/meta.yml
@@ -53,5 +53,8 @@ output:
type: file
description: File containing software versions
pattern: "versions.yml"
+ - command_files:
+ type: list
+ description: List of command files
authors:
- "@scwatts"
diff --git a/modules/local/custom/extract_tarball/main.nf b/modules/local/custom/extract_tarball/main.nf
index e956b0ad..cbe743e1 100644
--- a/modules/local/custom/extract_tarball/main.nf
+++ b/modules/local/custom/extract_tarball/main.nf
@@ -11,6 +11,7 @@ process CUSTOM_EXTRACTTARBALL {
output:
path "${meta.id}/", emit: extracted_dir
+ path '.command.*' , emit: command_files
when:
task.ext.when == null || task.ext.when
diff --git a/modules/local/custom/lilac_extract_and_index_contig/main.nf b/modules/local/custom/lilac_extract_and_index_contig/main.nf
index 986fe6cf..128140bf 100644
--- a/modules/local/custom/lilac_extract_and_index_contig/main.nf
+++ b/modules/local/custom/lilac_extract_and_index_contig/main.nf
@@ -14,9 +14,10 @@ process CUSTOM_EXTRACTCONTIG {
val run
output:
- path "*extracted.fa" , emit: contig
- path "*extracted.fa.*", emit: bwamem2_index
+ path '*extracted.fa' , emit: contig
+ path '*extracted.fa.*', emit: bwamem2_index
path 'versions.yml' , emit: versions
+ path '.command.*' , emit: command_files
when:
task.ext.when == null || task.ext.when
diff --git a/modules/local/custom/lilac_realign_reads_lilac/main.nf b/modules/local/custom/lilac_realign_reads_lilac/main.nf
index e5667fb3..df9a45b8 100644
--- a/modules/local/custom/lilac_realign_reads_lilac/main.nf
+++ b/modules/local/custom/lilac_realign_reads_lilac/main.nf
@@ -13,8 +13,9 @@ process CUSTOM_REALIGNREADS {
path reference_indices
output:
- tuple val(meta), path("*realigned.bam"), path("*realigned.bam.bai"), emit: bam
+ tuple val(meta), path('*realigned.bam'), path('*realigned.bam.bai'), emit: bam
path 'versions.yml' , emit: versions
+ path '.command.*' , emit: command_files
when:
task.ext.when == null || task.ext.when
diff --git a/modules/local/custom/lilac_slice/main.nf b/modules/local/custom/lilac_slice/main.nf
index 56498143..93a7978d 100644
--- a/modules/local/custom/lilac_slice/main.nf
+++ b/modules/local/custom/lilac_slice/main.nf
@@ -12,8 +12,9 @@ process CUSTOM_SLICE {
path bed
output:
- tuple val(meta), path("*sliced.bam"), path("*sliced.bam.bai"), emit: bam
+ tuple val(meta), path('*sliced.bam'), path('*sliced.bam.bai'), emit: bam
path 'versions.yml' , emit: versions
+ path '.command.*' , emit: command_files
when:
task.ext.when == null || task.ext.when
diff --git a/modules/local/custom/write_reference_data/main.nf b/modules/local/custom/write_reference_data/main.nf
index 04a1151e..c8fe6185 100644
--- a/modules/local/custom/write_reference_data/main.nf
+++ b/modules/local/custom/write_reference_data/main.nf
@@ -8,7 +8,6 @@ process WRITE_REFERENCE_DATA {
input:
path fp
- val workflow_version
output:
path fp, includeInputs: true
diff --git a/modules/local/esvee/assemble/main.nf b/modules/local/esvee/assemble/main.nf
deleted file mode 100644
index 7b043e80..00000000
--- a/modules/local/esvee/assemble/main.nf
+++ /dev/null
@@ -1,76 +0,0 @@
-process ESVEE_ASSEMBLE {
- tag "${meta.id}"
- label 'process_high'
-
- conda "${moduleDir}/environment.yml"
- container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
- 'https://depot.galaxyproject.org/singularity/hmftools-esvee:1.0.3--hdfd78af_0' :
- 'biocontainers/hmftools-esvee:1.0.3--hdfd78af_0' }"
-
- input:
- tuple val(meta), path(tumor_prep_bam), path(tumor_prep_bai), path(normal_prep_bam), path(normal_prep_bai), path(prep_dir)
- path genome_fasta
- path genome_fai
- path genome_dict
- path genome_img
- val genome_ver
- path decoy_sequences_image
-
- output:
- tuple val(meta), path('assemble/') , emit: assemble_dir
- tuple val(meta), path("assemble/${meta.tumor_id}.esvee.raw.vcf.gz"), emit: raw_vcf
- path 'versions.yml' , emit: versions
-
- when:
- task.ext.when == null || task.ext.when
-
- script:
- def args = task.ext.args ?: ''
-
- def xmx_mod = task.ext.xmx_mod ?: 0.95
-
- def reference_arg = meta.normal_id != null ? "-reference ${meta.normal_id}" : ''
- def reference_bam_arg = meta.normal_id != null ? "-reference_bam ${normal_prep_bam}" : ''
-
- def decoy_genome_arg = decoy_sequences_image ? "-decoy_genome ${decoy_sequences_image}" : ''
-
- """
- mkdir -p assemble/
-
- esvee com.hartwig.hmftools.esvee.assembly.AssemblyApplication \\
- -Xmx${Math.round(task.memory.bytes * xmx_mod)} \\
- ${args} \\
- -tumor ${meta.tumor_id} \\
- -tumor_bam ${tumor_prep_bam} \\
- ${reference_arg} \\
- ${reference_bam_arg} \\
- -esvee_prep_dir ${prep_dir}/ \\
- -ref_genome ${genome_fasta} \\
- -ref_genome_version ${genome_ver} \\
- ${decoy_genome_arg} \\
- -write_types 'JUNC_ASSEMBLY;PHASED_ASSEMBLY;ALIGNMENT;BREAKEND;VCF' \\
- -output_dir assemble/ \\
- -threads ${task.cpus} \\
- -perf_log_time 10 \\
- -log_level DEBUG
-
- cat <<-END_VERSIONS > versions.yml
- "${task.process}":
- esvee: \$(esvee -version | sed -n '/^Esvee version/ { s/^.* //p }')
- END_VERSIONS
- """
-
- stub:
- """
- mkdir -p assemble/
-
- touch assemble/${meta.tumor_id}.esvee.raw.vcf.gz
- touch assemble/${meta.tumor_id}.esvee.raw.vcf.gz.tbi
- touch assemble/${meta.tumor_id}.esvee.alignment.tsv
- touch assemble/${meta.tumor_id}.esvee.assembly.tsv
- touch assemble/${meta.tumor_id}.esvee.phased_assembly.tsv
- touch assemble/${meta.tumor_id}.esvee.breakend.tsv
-
- echo -e '${task.process}:\\n stub: noversions\\n' > versions.yml
- """
-}
diff --git a/modules/local/esvee/assemble/meta.yml b/modules/local/esvee/assemble/meta.yml
deleted file mode 100644
index e4dffb15..00000000
--- a/modules/local/esvee/assemble/meta.yml
+++ /dev/null
@@ -1,75 +0,0 @@
-name: esvee_assemble
-description: Assemble SVs with ESVEE
-keywords:
- - assemble
- - sv
-tools:
- - esvee:
- description: Structural variant (SV) calling
- homepage: https://github.com/hartwigmedical/hmftools/tree/master/esvee
- documentation: https://github.com/hartwigmedical/hmftools/tree/master/esvee
- licence: ["GPL >=3"]
-input:
- - meta:
- type: map
- description: |
- Groovy Map containing sample information
- e.g. [id: 'sample_id']
- - tumor_prep_bam:
- type: file
- description: Tumor prep BAM file
- pattern: "*.{bam}"
- - tumor_prep_bai:
- type: file
- description: Tumor prep BAI file
- pattern: "*.{bai}"
- - normal_prep_bam:
- type: file
- description: Normal prep BAM file
- pattern: "*.{bam}"
- - normal_prep_bai:
- type: file
- description: Normal prep BAI file
- pattern: "*.{bai}"
- - prep_dir:
- type: directory
- description: ESVEE prep output directory
- - genome_fasta:
- type: file
- description: Reference genome assembly FASTA file
- pattern: "*.{fa,fasta}"
- - genome_fai:
- type: file
- description: Reference genome assembly fai file
- pattern: "*.{fai}"
- - genome_dict:
- type: file
- description: Reference genome assembly dict file
- pattern: "*.{dict}"
- - genome_img:
- type: file
- description: Reference genome assembly img file
- pattern: "*.{img}"
- - decouy_sequences_image:
- type: file
- description: ESVEE decoy sequences images file
- pattern: "*.{img}"
-output:
- - meta:
- type: map
- description: |
- Groovy Map containing sample information
- e.g. [id: 'sample_id']
- - assembly_dir:
- type: directory
- description: ESVEE assemble output directory
- - raw_vcf:
- type: file
- description: Raw VCF output file
- pattern: "*.{vcf.gz}"
- - versions:
- type: file
- description: File containing software versions
- pattern: "versions.yml"
-authors:
- - "@scwatts"
diff --git a/modules/local/esvee/call/main.nf b/modules/local/esvee/call/main.nf
deleted file mode 100644
index a6480a18..00000000
--- a/modules/local/esvee/call/main.nf
+++ /dev/null
@@ -1,81 +0,0 @@
-process ESVEE_CALL {
- tag "${meta.id}"
- label 'process_high'
-
- conda "${moduleDir}/environment.yml"
- container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
- 'https://depot.galaxyproject.org/singularity/hmftools-esvee:1.0.3--hdfd78af_0' :
- 'biocontainers/hmftools-esvee:1.0.3--hdfd78af_0' }"
-
- input:
- tuple val(meta), path(ref_depth_vcf), path(prep_dir)
- path genome_fasta
- val genome_ver
- path pon_breakends
- path pon_breakpoints
- path known_fusions
- path repeatmasker_annotations
-
- output:
- tuple val(meta), path("caller/") , emit: caller_dir
- tuple val(meta), path("caller/${meta.tumor_id}.esvee.unfiltered.vcf.gz"), path("caller/${meta.tumor_id}.esvee.unfiltered.vcf.gz.tbi"), emit: unfiltered_vcf
- tuple val(meta), path("caller/${meta.tumor_id}.esvee.somatic.vcf.gz"), path("caller/${meta.tumor_id}.esvee.somatic.vcf.gz.tbi") , emit: somatic_vcf
- tuple val(meta), path("caller/${meta.tumor_id}.esvee.germline.vcf.gz"), path("caller/${meta.tumor_id}.esvee.germline.vcf.gz.tbi") , emit: germline_vcf, optional: true
- path 'versions.yml' , emit: versions
-
- when:
- task.ext.when == null || task.ext.when
-
- script:
- def args = task.ext.args ?: ''
-
- def xmx_mod = task.ext.xmx_mod ?: 0.95
-
- def reference_arg = meta.normal_id != null ? "-reference ${meta.normal_id}" : ''
-
- """
- mkdir -p caller/
-
- esvee com.hartwig.hmftools.esvee.caller.CallerApplication \\
- -Xmx${Math.round(task.memory.bytes * xmx_mod)} \\
- ${args} \\
- -sample ${meta.tumor_id} \\
- ${reference_arg} \\
- -input_vcf ${ref_depth_vcf} \\
- -esvee_prep_dir ${prep_dir}/ \\
- -ref_genome_version ${genome_ver} \\
- -known_hotspot_file ${known_fusions} \\
- -pon_sgl_file ${pon_breakends} \\
- -pon_sv_file ${pon_breakpoints} \\
- -repeat_mask_file ${repeatmasker_annotations} \\
- -output_dir caller/ \\
- -log_level DEBUG
-
- cat <<-END_VERSIONS > versions.yml
- "${task.process}":
- esvee: \$(esvee -version | sed -n '/^Esvee version/ { s/^.* //p }')
- END_VERSIONS
- """
-
- stub:
- """
- mkdir -p caller/
-
- vcf_template='##fileformat=VCFv4.1
- ##contig=
- #CHROM POS ID REF ALT QUAL FILTER INFO
- . . . . . . .
- '
-
- echo \${vcf_template} | gzip -c > caller/${meta.tumor_id}.esvee.unfiltered.vcf.gz
- echo \${vcf_template} | gzip -c > caller/${meta.tumor_id}.esvee.somatic.vcf.gz
-
- touch caller/${meta.tumor_id}.esvee.unfiltered.vcf.gz.tbi
- touch caller/${meta.tumor_id}.esvee.somatic.vcf.gz.tbi
-
- ${ (meta.normal_id != null) ? "touch caller/${meta.tumor_id}.esvee.germline.vcf.gz" : '' }
- ${ (meta.normal_id != null) ? "touch caller/${meta.tumor_id}.esvee.germline.vcf.gz.tbi" : '' }
-
- echo -e '${task.process}:\\n stub: noversions\\n' > versions.yml
- """
-}
diff --git a/modules/local/esvee/depth_annotator/environment.yml b/modules/local/esvee/depth_annotator/environment.yml
deleted file mode 100644
index 2a0425a6..00000000
--- a/modules/local/esvee/depth_annotator/environment.yml
+++ /dev/null
@@ -1,7 +0,0 @@
-name: esvee_depth_annotator
-channels:
- - conda-forge
- - bioconda
- - defaults
-dependencies:
- - bioconda::hmftools-esvee=1.0.3
diff --git a/modules/local/esvee/depth_annotator/main.nf b/modules/local/esvee/depth_annotator/main.nf
deleted file mode 100644
index 4086179a..00000000
--- a/modules/local/esvee/depth_annotator/main.nf
+++ /dev/null
@@ -1,71 +0,0 @@
-process ESVEE_DEPTH_ANNOTATOR {
- tag "${meta.id}"
- label 'process_high'
-
- conda "${moduleDir}/environment.yml"
- container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
- 'https://depot.galaxyproject.org/singularity/hmftools-esvee:1.0.3--hdfd78af_0' :
- 'biocontainers/hmftools-esvee:1.0.3--hdfd78af_0' }"
-
- input:
- tuple val(meta), path(tumor_bam), path(tumor_bai), path(normal_bam), path(normal_bai), path(raw_vcf)
- path genome_fasta
- val genome_ver
- path unmap_regions
-
- output:
- tuple val(meta), path("depth_annotation/") , emit: depth_annotation_dir
- tuple val(meta), path("depth_annotation/${meta.tumor_id}.esvee.ref_depth.vcf.gz"), emit: ref_depth_vcf
- path 'versions.yml' , emit: versions
-
- when:
- task.ext.when == null || task.ext.when
-
- script:
- def args = task.ext.args ?: ''
-
- def xmx_mod = task.ext.xmx_mod ?: 0.75
-
- def sample_ids = [meta.tumor_id]
- def bam_files = [tumor_bam.toString()]
-
- if(meta.normal_id != null){
- sample_ids.add(meta.normal_id)
- bam_files.add(normal_bam.toString())
- }
-
- def sample_ids_string = String.join(',', sample_ids)
- def bam_files_string = String.join(',', bam_files)
-
- """
- mkdir -p depth_annotation/
-
- esvee com.hartwig.hmftools.esvee.depth.DepthAnnotator \\
- -Xmx${Math.round(task.memory.bytes * xmx_mod)} \\
- ${args} \\
- -sample ${sample_ids_string} \\
- -bam_file ${bam_files_string} \\
- -input_vcf ${raw_vcf} \\
- -ref_genome ${genome_fasta} \\
- -ref_genome_version ${genome_ver} \\
- -unmap_regions ${unmap_regions} \\
- -output_dir depth_annotation/ \\
- -threads ${task.cpus} \\
- -log_level DEBUG
-
- cat <<-END_VERSIONS > versions.yml
- "${task.process}":
- esvee: \$(esvee -version | sed -n '/^Esvee version/ { s/^.* //p }')
- END_VERSIONS
- """
-
- stub:
- """
- mkdir -p depth_annotation/
-
- touch depth_annotation/${meta.tumor_id}.esvee.ref_depth.vcf.gz
- touch depth_annotation/${meta.tumor_id}.esvee.ref_depth.vcf.gz.tbi
-
- echo -e '${task.process}:\\n stub: noversions\\n' > versions.yml
- """
-}
diff --git a/modules/local/esvee/depth_annotator/meta.yml b/modules/local/esvee/depth_annotator/meta.yml
deleted file mode 100644
index ab349fa7..00000000
--- a/modules/local/esvee/depth_annotator/meta.yml
+++ /dev/null
@@ -1,68 +0,0 @@
-name: esvee_depth_annotator
-description: Annotate ESVEE VCFs with depth information
-keywords:
- - depth
- - annotation
- - sv
-tools:
- - esvee:
- description: Structural variant (SV) calling
- homepage: https://github.com/hartwigmedical/hmftools/tree/master/esvee
- documentation: https://github.com/hartwigmedical/hmftools/tree/master/esvee
- licence: ["GPL >=3"]
-input:
- - meta:
- type: map
- description: |
- Groovy Map containing sample information
- e.g. [id: 'sample_id']
- - tumor_bam:
- type: file
- description: Tumor BAM file
- pattern: "*.{bam}"
- - tumor_bai:
- type: file
- description: Tumor BAI file
- pattern: "*.{bai}"
- - normal_bam:
- type: file
- description: Normal BAM file
- pattern: "*.{bam}"
- - normal_bai:
- type: file
- description: Normal BAI file
- pattern: "*.{bai}"
- - raw_vcf:
- type: file
- description: ESVEE assemble raw VCF file
- pattern: "*.{vcf.gz}"
- - genome_fasta:
- type: file
- description: Reference genome assembly FASTA file
- pattern: "*.{fa,fasta}"
- - genome_ver:
- type: string
- description: Reference genome version
- - unmap_regions:
- type: file
- description: Hartwig unmap regions file
- pattern: "*.{tsv}"
-output:
- - meta:
- type: map
- description: |
- Groovy Map containing sample information
- e.g. [id: 'sample_id']
- - depth_annotation_dir:
- type: directory
- description: ESVEE depth annotation output directory
- - ref_depth_vcf:
- type: file
- description: Depth annotated VCF file
- pattern: "*.{vcf.gz}"
- - versions:
- type: file
- description: File containing software versions
- pattern: "versions.yml"
-authors:
- - "@scwatts"
diff --git a/modules/local/cobalt/environment.yml b/modules/local/esvee/environment.yml
similarity index 58%
rename from modules/local/cobalt/environment.yml
rename to modules/local/esvee/environment.yml
index 8b4e2bc1..1e56a788 100644
--- a/modules/local/cobalt/environment.yml
+++ b/modules/local/esvee/environment.yml
@@ -1,7 +1,7 @@
-name: cobalt
+name: esvee
channels:
- conda-forge
- bioconda
- defaults
dependencies:
- - bioconda::hmftools-cobalt=2.0
+ - bioconda::hmftools-esvee=1.1.2
diff --git a/modules/local/esvee/main.nf b/modules/local/esvee/main.nf
new file mode 100644
index 00000000..abb63cc9
--- /dev/null
+++ b/modules/local/esvee/main.nf
@@ -0,0 +1,89 @@
+process ESVEE {
+ tag "${meta.id}"
+ label 'process_high'
+
+ conda "${moduleDir}/environment.yml"
+ container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+ 'https://depot.galaxyproject.org/singularity/hmftools-esvee:1.1.2--hdfd78af_0' :
+ 'biocontainers/hmftools-esvee:1.1.2--hdfd78af_0' }"
+
+ input:
+ tuple val(meta), path(tumor_bam), path(tumor_bai), path(normal_bam), path(normal_bai)
+ path genome_fasta
+ path genome_fai
+ path genome_dict
+ path genome_img
+ val genome_ver
+ path pon_breakends
+ path pon_breakpoints
+ path decoy_sequences_image
+ path known_fusions
+ path repeatmasker_annotations
+ path unmap_regions
+
+ output:
+ tuple val(meta), path('esvee/') , emit: esvee_dir
+ tuple val(meta), path("esvee/${meta.tumor_id}.esvee.unfiltered.vcf.gz"), path("esvee/${meta.tumor_id}.esvee.unfiltered.vcf.gz.tbi"), emit: unfiltered_vcf
+ tuple val(meta), path("esvee/${meta.tumor_id}.esvee.somatic.vcf.gz"), path("esvee/${meta.tumor_id}.esvee.somatic.vcf.gz.tbi") , emit: somatic_vcf
+ tuple val(meta), path("esvee/${meta.tumor_id}.esvee.germline.vcf.gz"), path("esvee/${meta.tumor_id}.esvee.germline.vcf.gz.tbi") , emit: germline_vcf, optional: true
+ path 'versions.yml' , emit: versions
+ path '.command.*' , emit: command_files
+
+ when:
+ task.ext.when == null || task.ext.when
+
+ script:
+ def args = task.ext.args ?: ''
+
+ def log_level_arg = task.ext.log_level ? "-log_level ${task.ext.log_level}" : ''
+
+ def reference_arg = meta.normal_id ? "-reference ${meta.normal_id}" : ''
+ def reference_bam_arg = meta.normal_id ? "-reference_bam ${normal_bam}" : ''
+
+ """
+ mkdir -p esvee/
+
+ esvee \\
+ -Xmx${Math.round(task.memory.bytes * 0.95)} \\
+ ${args} \\
+ -tumor ${meta.tumor_id} \\
+ -tumor_bam ${tumor_bam} \\
+ ${reference_arg} \\
+ ${reference_bam_arg} \\
+ -esvee_prep_dir esvee/ \\
+ -ref_genome ${genome_fasta} \\
+ -ref_genome_version ${genome_ver} \\
+ -known_hotspot_file ${known_fusions} \\
+ -pon_sgl_file ${pon_breakends} \\
+ -pon_sv_file ${pon_breakpoints} \\
+ -repeat_mask_file ${repeatmasker_annotations} \\
+ -unmap_regions ${unmap_regions} \\
+ -bamtool \$(which sambamba) \\
+ -write_types 'PREP_JUNCTION;PREP_BAM;FRAGMENT_LENGTH_DIST;JUNC_ASSEMBLY;PHASED_ASSEMBLY;ALIGNMENT;BREAKEND;VCF' \\
+ -threads ${task.cpus} \\
+ ${log_level_arg} \\
+ -output_dir esvee/
+
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ esvee: \$(java -jar \${ESVEE_JAR} -version | sed 's/^.*Esvee version: //')
+ END_VERSIONS
+ """
+
+ stub:
+ """
+ mkdir -p esvee/
+
+ touch esvee/${meta.tumor_id}.esvee.unfiltered.vcf.gz
+ touch esvee/${meta.tumor_id}.esvee.unfiltered.vcf.gz.tbi
+ touch esvee/${meta.tumor_id}.esvee.somatic.vcf.gz
+ touch esvee/${meta.tumor_id}.esvee.somatic.vcf.gz.tbi
+ touch esvee/${meta.tumor_id}.esvee.germline.vcf.gz
+ touch esvee/${meta.tumor_id}.esvee.germline.vcf.gz.tbi
+
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ esvee: \$(echo "1.0-beta")
+ END_VERSIONS
+ """
+}
diff --git a/modules/local/esvee/call/meta.yml b/modules/local/esvee/meta.yml
similarity index 60%
rename from modules/local/esvee/call/meta.yml
rename to modules/local/esvee/meta.yml
index 38979eb8..5bdb20cb 100644
--- a/modules/local/esvee/call/meta.yml
+++ b/modules/local/esvee/meta.yml
@@ -1,4 +1,4 @@
-name: esvee_call
+name: esvee
description: Call somatic SVs with ESVEE
keywords:
- calling
@@ -8,28 +8,50 @@ tools:
description: Structural variant (SV) calling
homepage: https://github.com/hartwigmedical/hmftools/tree/master/esvee
documentation: https://github.com/hartwigmedical/hmftools/tree/master/esvee
- licence: ["GPL >=3"]
+ licence: ["GPL v3"]
input:
+ - meta:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [id: 'sample_id']
- - ref_depth_vcf:
+ - tumor_bam:
type: file
- description: ESVEE depth annotated VCF file
- pattern: "*.{vcf.gz}"
- - prep_dir:
- type: directory
- description: ESVEE prep output directory
+ description: Tumor BAM file
+ pattern: "*.{bam}"
+ - tumor_bai:
+ type: file
+ description: Tumor BAI file
+ pattern: "*.{bai}"
+ - normal_bam:
+ type: file
+ description: Normal BAM file
+ pattern: "*.{bam}"
+ - normal_bai:
+ type: file
+ description: Normal BAI file
+ pattern: "*.{bai}"
- genome_fasta:
type: file
description: Reference genome assembly FASTA file
pattern: "*.{fa,fasta}"
+ - genome_fai:
+ type: file
+ description: Reference genome assembly fai file
+ pattern: "*.{fai}"
+ - genome_dict:
+ type: file
+ description: Reference genome assembly dict file
+ pattern: "*.{dict}"
+ - genome_img:
+ type: file
+ description: Reference genome assembly img file
+ pattern: "*.{img}"
- genome_ver:
type: string
description: Reference genome version
- - pon_breakend:
+ - pon_breakends:
type: file
description: GRIDSS breakend PON file
pattern: "*.{bed.gz}"
@@ -37,6 +59,10 @@ input:
type: file
description: GRIDSS breakpoint PON file
pattern: "*.{bedpe.gz}"
+ - decoy_sequences_image:
+ type: file
+ description: ESVEE decoy sequences images file
+ pattern: "*.{img}"
- known_fusions:
type: file
description: HMF Known Fusions file
@@ -44,15 +70,19 @@ input:
- repeatmasker_annotations:
type: file
description: RepeatMasker annotations file
+ - unmap_regions:
+ type: file
+ description: Hartwig unmap regions file
+ pattern: "*.{tsv}"
output:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [id: 'sample_id']
- - caller_dir:
+ - esvee_dir:
type: directory
- description: ESVEE call output directory
+ description: ESVEE output directory
- unfiltered_vcf:
type: list
description: Unfiltered VCF file
@@ -69,5 +99,8 @@ output:
type: file
description: File containing software versions
pattern: "versions.yml"
+ - command_files:
+ type: list
+ description: List of command files
authors:
- "@scwatts"
diff --git a/modules/local/esvee/prep/main.nf b/modules/local/esvee/prep/main.nf
deleted file mode 100644
index f1c6145e..00000000
--- a/modules/local/esvee/prep/main.nf
+++ /dev/null
@@ -1,81 +0,0 @@
-process ESVEE_PREP {
- tag "${meta.id}"
- label 'process_high'
-
- conda "${moduleDir}/environment.yml"
- container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
- 'https://depot.galaxyproject.org/singularity/hmftools-esvee:1.0.3--hdfd78af_0' :
- 'biocontainers/hmftools-esvee:1.0.3--hdfd78af_0' }"
-
- input:
- tuple val(meta), path(tumor_bam), path(tumor_bai), path(normal_bam), path(normal_bai)
- path genome_fasta
- val genome_ver
- path sv_blocklist
- path known_fusions
-
- output:
- tuple val(meta), path("prep/") , emit: prep_dir
- tuple val(meta), path("prep/${meta.tumor_id}.*.bam"), path("prep/${meta.tumor_id}.*.bam.bai") , emit: tumor_prep_bam
- tuple val(meta), path("prep/${meta.normal_id}.*.bam"), path("prep/${meta.normal_id}.*.bam.bai"), emit: normal_prep_bam, optional: true
- path 'versions.yml' , emit: versions
-
- when:
- task.ext.when == null || task.ext.when
-
- script:
- def args = task.ext.args ?: ''
-
- def xmx_mod = task.ext.xmx_mod ?: 0.75
-
- def sample_ids = [meta.tumor_id]
- def bam_files = [tumor_bam.toString()]
-
- if(meta.normal_id != null){
- sample_ids.add(meta.normal_id)
- bam_files.add(normal_bam.toString())
- }
-
- def sample_ids_string = String.join(',', sample_ids)
- def bam_files_string = String.join(',', bam_files)
-
- """
- mkdir -p prep/
-
- SAMBAMBA_PATH=\$(which sambamba)
-
- esvee com.hartwig.hmftools.esvee.prep.PrepApplication \\
- -Xmx${Math.round(task.memory.bytes * xmx_mod)} \\
- ${args} \\
- -sample "${sample_ids_string}" \\
- -bam_file "${bam_files_string}" \\
- -ref_genome ${genome_fasta} \\
- -ref_genome_version ${genome_ver} \\
- -blacklist_bed ${sv_blocklist} \\
- -known_fusion_bed ${known_fusions} \\
- -bamtool \$SAMBAMBA_PATH \\
- -write_types 'JUNCTIONS;BAM;FRAGMENT_LENGTH_DIST' \\
- -output_dir prep/ \\
- -threads ${task.cpus} \\
- -log_level DEBUG \\
-
- cat <<-END_VERSIONS > versions.yml
- "${task.process}":
- esvee: \$(esvee -version | sed -n '/^Esvee version/ { s/^.* //p }')
- END_VERSIONS
- """
-
- stub:
- """
- mkdir -p prep/
-
- ${ (meta.normal_id != null) ? "touch prep/${meta.normal_id}.esvee.prep.bam" : '' }
- ${ (meta.normal_id != null) ? "touch prep/${meta.normal_id}.esvee.prep.bam.bai" : '' }
- touch "prep/${meta.tumor_id}.esvee.prep.bam"
- touch "prep/${meta.tumor_id}.esvee.prep.bam.bai"
- touch "prep/${meta.tumor_id}.esvee.prep.fragment_length.tsv"
- touch "prep/${meta.tumor_id}.esvee.prep.junction.tsv"
-
- echo -e '${task.process}:\\n stub: noversions\\n' > versions.yml
- """
-}
diff --git a/modules/local/esvee/prep/meta.yml b/modules/local/esvee/prep/meta.yml
deleted file mode 100644
index 4ca71e03..00000000
--- a/modules/local/esvee/prep/meta.yml
+++ /dev/null
@@ -1,72 +0,0 @@
-name: esvee_prep
-description: Select reads associated with SV events
-keywords:
- - filtering
- - reads
- - sv
-tools:
- - esvee:
- description: Structural variant (SV) calling
- homepage: https://github.com/hartwigmedical/hmftools/tree/master/esvee
- documentation: https://github.com/hartwigmedical/hmftools/tree/master/esvee
- licence: ["GPL >=3"]
-input:
- - meta:
- type: map
- description: |
- Groovy Map containing sample information
- e.g. [id: 'sample_id']
- - tumor_bam:
- type: file
- description: Tumor BAM file
- pattern: "*.{bam}"
- - tumor_bai:
- type: file
- description: Tumor BAI file
- pattern: "*.{bai}"
- - normal_bam:
- type: file
- description: Normal BAM file
- pattern: "*.{bam}"
- - normal_bai:
- type: file
- description: Normal BAI file
- pattern: "*.{bai}"
- - genome_fasta:
- type: file
- description: Reference genome assembly FASTA file
- pattern: "*.{fa,fasta}"
- - genome_ver:
- type: string
- description: Reference genome version
- - sv_blocklist:
- type: file
- description: SV Prep blocklist file
- pattern: "*.{bed}"
- - known_fusions:
- type: file
- description: Known fusions file
- pattern: "*.{bedpe}"
-output:
- - meta:
- type: map
- description: |
- Groovy Map containing sample information
- e.g. [id: 'sample_id']
- - prep_dir:
- type: directory
- description: ESVEE prep output directory
- - tumor_prep_bam:
- type: list
- description: Tumor prep BAM and BAI file
- pattern: "*.{bam,bam.bai}"
- - normal_prep_bam:
- type: list
- description: Normal prep BAM and BAI file (optional)
- pattern: "*.{bam,bam.bai}"
- - versions:
- type: file
- description: File containing software versions
- pattern: "versions.yml"
-authors:
- - "@scwatts"
diff --git a/modules/local/fastp/main.nf b/modules/local/fastp/main.nf
index 8b197978..0ca9d241 100644
--- a/modules/local/fastp/main.nf
+++ b/modules/local/fastp/main.nf
@@ -17,6 +17,7 @@ process FASTP {
output:
tuple val(meta), path('*_R1.fastp.fastq.gz'), path('*_R2.fastp.fastq.gz'), emit: fastq
path 'versions.yml' , emit: versions
+ path '.command.*' , emit: command_files
when:
task.ext.when == null || task.ext.when
@@ -27,9 +28,9 @@ process FASTP {
def split_by_lines_arg = max_fastq_records > 0 ? "--split_by_lines ${4 * max_fastq_records.toLong()}" : ''
def umi_args_list = []
- if (umi_location) umi_args_list.add("--umi_loc ${umi_location}")
- if (umi_length) umi_args_list.add("--umi_len ${umi_length}")
- if (umi_skip >= 0) umi_args_list.add("--umi_skip ${umi_skip}")
+ if (umi_location) { umi_args_list.add("--umi_loc ${umi_location}") }
+ if (umi_length) { umi_args_list.add("--umi_len ${umi_length}") }
+ if (umi_skip >= 0) { umi_args_list.add("--umi_skip ${umi_skip}") }
def umi_args = umi_args_list ? '--umi ' + umi_args_list.join(' ') : ''
"""
diff --git a/modules/local/fastp/meta.yml b/modules/local/fastp/meta.yml
index 434d3cc4..a8584b8c 100644
--- a/modules/local/fastp/meta.yml
+++ b/modules/local/fastp/meta.yml
@@ -16,7 +16,7 @@ input:
type: map
description: |
Groovy Map containing sample information
- e.g. [id: 'sample_id', tumor_id: 'tumor_name', normal_id: 'normal_name']
+ e.g. [id: 'sample_id']
- reads_fwd:
type: file
description: Forward reads FASTQ file
@@ -42,7 +42,7 @@ output:
type: map
description: |
Groovy Map containing sample information
- e.g. [id: 'sample_id', tumor_id: 'tumor_name', normal_id: 'normal_name']
+ e.g. [id: 'sample_id']
- fastq:
type: list
description: Forward and reverse FASTQ files
@@ -51,6 +51,9 @@ output:
type: file
description: File containing software versions
pattern: "versions.yml"
+ - command_files:
+ type: list
+ description: List of command files
authors:
- "@scwatts"
- "@mkcmkc"
diff --git a/modules/local/gatk4/bwaindeximage/main.nf b/modules/local/gatk4/bwaindeximage/main.nf
index 448963af..1231ba74 100644
--- a/modules/local/gatk4/bwaindeximage/main.nf
+++ b/modules/local/gatk4/bwaindeximage/main.nf
@@ -13,6 +13,7 @@ process GATK4_BWA_INDEX_IMAGE {
output:
path "${genome_fasta}.img", emit: img
path 'versions.yml' , emit: versions
+ path '.command.*' , emit: command_files
when:
task.ext.when == null || task.ext.when
diff --git a/modules/local/gatk4/bwaindeximage/meta.yml b/modules/local/gatk4/bwaindeximage/meta.yml
index 56140919..b81582e7 100644
--- a/modules/local/gatk4/bwaindeximage/meta.yml
+++ b/modules/local/gatk4/bwaindeximage/meta.yml
@@ -33,5 +33,8 @@ output:
type: file
description: File containing software versions
pattern: "versions.yml"
+ - command_files:
+ type: list
+ description: List of command files
authors:
- "@scwatts"
diff --git a/modules/local/gridss/index/main.nf b/modules/local/gridss/index/main.nf
index dfc32cc5..7dbffb7d 100644
--- a/modules/local/gridss/index/main.nf
+++ b/modules/local/gridss/index/main.nf
@@ -17,6 +17,7 @@ process GRIDSS_INDEX {
output:
path 'gridss_index/', emit: index
path 'versions.yml' , emit: versions
+ path '.command.*' , emit: command_files
when:
task.ext.when == null || task.ext.when
diff --git a/modules/local/gridss/index/meta.yml b/modules/local/gridss/index/meta.yml
index 94aa572a..2e045b2b 100644
--- a/modules/local/gridss/index/meta.yml
+++ b/modules/local/gridss/index/meta.yml
@@ -33,5 +33,8 @@ output:
type: file
description: File containing software versions
pattern: "versions.yml"
+ - command_files:
+ type: list
+ description: List of command files
authors:
- "@scwatts"
diff --git a/modules/local/isofox/panel_normalisation/environment.yml b/modules/local/isofox/panel_normalisation/environment.yml
new file mode 100644
index 00000000..832224f5
--- /dev/null
+++ b/modules/local/isofox/panel_normalisation/environment.yml
@@ -0,0 +1,7 @@
+name: isofox_panel_normalisation
+channels:
+ - conda-forge
+ - bioconda
+ - defaults
+dependencies:
+ - bioconda::hmftools-isofox=1.7.2
diff --git a/modules/local/isofox/panel_normalisation/main.nf b/modules/local/isofox/panel_normalisation/main.nf
new file mode 100644
index 00000000..3b8fb1eb
--- /dev/null
+++ b/modules/local/isofox/panel_normalisation/main.nf
@@ -0,0 +1,63 @@
+process ISOFOX_PANEL_NORMALISATION {
+ label 'process_medium'
+
+ conda "${moduleDir}/environment.yml"
+ container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+ 'https://depot.galaxyproject.org/singularity/hmftools-isofox:1.7.2--hdfd78af_1' :
+ 'biocontainers/hmftools-isofox:1.7.2--hdfd78af_1' }"
+
+ input:
+ path 'isofox_dirs.*'
+ val genome_ver
+ path gene_ids
+ path gene_distribution
+
+ output:
+ path 'isofox.gene_normalisation.*.csv', emit: isofox_normalisation
+ path 'versions.yml' , emit: versions
+ path '.command.*' , emit: command_files
+
+ when:
+ task.ext.when == null || task.ext.when
+
+ script:
+ def args = task.ext.args ?: ''
+
+ def log_level_arg = task.ext.log_level ? "-log_level ${task.ext.log_level}" : ''
+
+ """
+ mkdir -p inputs/
+ for fp in \$(find -L isofox_dirs.* -name '*.gene_data.csv'); do ln -sf ../\${fp} inputs/; done
+
+ (
+ echo SampleId
+ basename -s .isf.gene_data.csv -a inputs/*.isf.gene_data.csv
+ ) > sample_ids.txt
+
+ isofox \\
+ -Xmx${Math.round(task.memory.bytes * 0.95)} \\
+ com.hartwig.hmftools.isofox.cohort.CohortAnalyser \\
+ ${args} \\
+ -sample_data_file sample_ids.txt \\
+ -root_data_dir inputs/ \\
+ -analyses PANEL_TPM_NORMALISATION \\
+ -gene_id_file ${gene_ids} \\
+ -gene_distribution_file ${gene_distribution} \\
+ ${log_level_arg} \\
+ -output_dir ./
+
+ mv isofox.panel_gene_normalisation.csv isofox.gene_normalisation.${genome_ver}.csv
+
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ isofox: \$(isofox -version | sed 's/^.* //')
+ END_VERSIONS
+ """
+
+ stub:
+ """
+ touch isofox.gene_normalisation.${genome_ver}.csv
+
+ echo -e '${task.process}:\\n stub: noversions\\n' > versions.yml
+ """
+}
diff --git a/modules/local/isofox/panel_normalisation/meta.yml b/modules/local/isofox/panel_normalisation/meta.yml
new file mode 100644
index 00000000..8ad69273
--- /dev/null
+++ b/modules/local/isofox/panel_normalisation/meta.yml
@@ -0,0 +1,40 @@
+name: isofox_panel_normalisation
+description: Characterise and count gene, transcript features
+keywords:
+ - rna
+ - rnaseq
+tools:
+ - isofox:
+ description: Characterises and counts gene, transcript features
+ homepage: https://github.com/hartwigmedical/hmftools/tree/master/isofox
+ documentation: https://github.com/hartwigmedical/hmftools/tree/master/isofox
+ licence: ["GPL v3"]
+input:
+ - isofox_dirs:
+ type: directory
+ description: List of Isofox directories
+ - genome_ver:
+ type: string
+ description: Reference genome version
+ - gene_ids:
+ type: file
+ description: Isofox gene ID file (optional)
+ pattern: "*.{csv}"
+ - gene_distribution:
+ type: file
+ description: Isofox cohort gene expression file
+ pattern: "*.{csv}"
+output:
+ - isofox_normalisation:
+ type: file
+ description: Isofox normalisation file
+ pattern: "versions.yml"
+ - versions:
+ type: file
+ description: File containing software versions
+ pattern: "versions.yml"
+ - command_files:
+ type: list
+ description: List of command files
+authors:
+ - "@scwatts"
diff --git a/modules/local/esvee/prep/environment.yml b/modules/local/isofox/run/environment.yml
similarity index 55%
rename from modules/local/esvee/prep/environment.yml
rename to modules/local/isofox/run/environment.yml
index 6750d5c4..2b59e126 100644
--- a/modules/local/esvee/prep/environment.yml
+++ b/modules/local/isofox/run/environment.yml
@@ -1,7 +1,7 @@
-name: esvee_prep
+name: isofox_run
channels:
- conda-forge
- bioconda
- defaults
dependencies:
- - bioconda::hmftools-esvee=1.0.3
+ - bioconda::hmftools-isofox=1.7.2
diff --git a/modules/local/isofox/main.nf b/modules/local/isofox/run/main.nf
similarity index 89%
rename from modules/local/isofox/main.nf
rename to modules/local/isofox/run/main.nf
index 6ddfa0a7..88f7c438 100644
--- a/modules/local/isofox/main.nf
+++ b/modules/local/isofox/run/main.nf
@@ -4,8 +4,8 @@ process ISOFOX {
conda "${moduleDir}/environment.yml"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
- 'https://depot.galaxyproject.org/singularity/hmftools-isofox:1.7.1--hdfd78af_1' :
- 'biocontainers/hmftools-isofox:1.7.1--hdfd78af_1' }"
+ 'https://depot.galaxyproject.org/singularity/hmftools-isofox:1.7.2--hdfd78af_1' :
+ 'biocontainers/hmftools-isofox:1.7.2--hdfd78af_1' }"
input:
tuple val(meta), path(bam), path(bai)
@@ -24,6 +24,7 @@ process ISOFOX {
output:
tuple val(meta), path('isofox/'), emit: isofox_dir
path 'versions.yml' , emit: versions
+ path '.command.*' , emit: command_files
when:
task.ext.when == null || task.ext.when
@@ -33,6 +34,8 @@ process ISOFOX {
def xmx_mod = task.ext.xmx_mod ?: 0.75
+ def log_level_arg = task.ext.log_level ? "-log_level ${task.ext.log_level}" : ''
+
def functions_arg = functions ? "-functions \'${functions}\'" : ''
def exp_counts_arg = exp_counts ? "-exp_counts_file ${exp_counts}" : ''
@@ -48,9 +51,9 @@ process ISOFOX {
-Xmx${Math.round(task.memory.bytes * xmx_mod)} \\
${args} \\
-sample ${meta.sample_id} \\
- -bam_file ${bam} \\
${functions_arg} \\
-read_length ${read_length} \\
+ -bam_file ${bam} \\
-ref_genome ${genome_fasta} \\
-ref_genome_version ${genome_ver} \\
-ensembl_data_dir ${ensembl_data_resources} \\
@@ -60,6 +63,7 @@ process ISOFOX {
${gene_ids_arg} \\
${tpm_norm_arg} \\
-threads ${task.cpus} \\
+ ${log_level_arg} \\
-output_dir isofox/
cat <<-END_VERSIONS > versions.yml
@@ -71,6 +75,7 @@ process ISOFOX {
stub:
"""
mkdir -p isofox/
+
touch isofox/placeholder
echo -e '${task.process}:\\n stub: noversions\\n' > versions.yml
diff --git a/modules/local/isofox/meta.yml b/modules/local/isofox/run/meta.yml
similarity index 95%
rename from modules/local/isofox/meta.yml
rename to modules/local/isofox/run/meta.yml
index 5ccf224e..5a8aa8ee 100644
--- a/modules/local/isofox/meta.yml
+++ b/modules/local/isofox/run/meta.yml
@@ -1,4 +1,4 @@
-name: isofox
+name: isofox_run
description: Characterise and count gene, transcript features
keywords:
- rna
@@ -76,5 +76,8 @@ output:
type: file
description: File containing software versions
pattern: "versions.yml"
+ - command_files:
+ type: list
+ description: List of command files
authors:
- "@scwatts"
diff --git a/modules/local/lilac/environment.yml b/modules/local/lilac/environment.yml
index 8832e639..b91fd31b 100644
--- a/modules/local/lilac/environment.yml
+++ b/modules/local/lilac/environment.yml
@@ -4,4 +4,4 @@ channels:
- bioconda
- defaults
dependencies:
- - bioconda::hmftools-lilac=1.6
+ - bioconda::hmftools-lilac=1.7.1
diff --git a/modules/local/lilac/main.nf b/modules/local/lilac/main.nf
index 49e22520..f553c7ff 100644
--- a/modules/local/lilac/main.nf
+++ b/modules/local/lilac/main.nf
@@ -4,8 +4,8 @@ process LILAC {
conda "${moduleDir}/environment.yml"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
- 'https://depot.galaxyproject.org/singularity/hmftools-lilac:1.6--hdfd78af_1' :
- 'biocontainers/hmftools-lilac:1.6--hdfd78af_1' }"
+ 'https://depot.galaxyproject.org/singularity/hmftools-lilac:1.7.1--hdfd78af_0' :
+ 'biocontainers/hmftools-lilac:1.7.1--hdfd78af_0' }"
input:
tuple val(meta), path(normal_dna_bam), path(normal_dna_bai), path(tumor_dna_bam), path(tumor_dna_bai), path(tumor_rna_bam), path(tumor_rna_bai), path(purple_dir)
@@ -13,10 +13,12 @@ process LILAC {
path genome_fai
val genome_ver
path lilac_resources, stageAs: 'lilac_resources'
+ val targeted_mode
output:
tuple val(meta), path('lilac/'), emit: lilac_dir
path 'versions.yml' , emit: versions
+ path '.command.*' , emit: command_files
when:
task.ext.when == null || task.ext.when
@@ -26,6 +28,8 @@ process LILAC {
def xmx_mod = task.ext.xmx_mod ?: 0.75
+ def log_level_arg = task.ext.log_level ? "-log_level ${task.ext.log_level}" : ''
+
def sample_name = getSampleName(meta, tumor_dna_bam, normal_dna_bam)
def normal_bam_arg = normal_dna_bam ? "-reference_bam ${normal_dna_bam}" : ''
@@ -34,6 +38,8 @@ process LILAC {
def purple_dir_arg = purple_dir ? "-purple_dir ${purple_dir}" : ''
+ def freq_score_penalty = targeted_mode ? '0.0018' : '0.0009'
+
"""
lilac \\
-Xmx${Math.round(task.memory.bytes * xmx_mod)} \\
@@ -46,7 +52,9 @@ process LILAC {
-ref_genome ${genome_fasta} \\
-ref_genome_version ${genome_ver} \\
-resource_dir ${lilac_resources} \\
+ -freq_score_penalty ${freq_score_penalty} \\
-threads ${task.cpus} \\
+ ${log_level_arg} \\
-output_dir lilac/
cat <<-END_VERSIONS > versions.yml
@@ -58,6 +66,7 @@ process LILAC {
stub:
"""
mkdir -p lilac/
+
touch lilac/placeholder
echo -e '${task.process}:\\n stub: noversions\\n' > versions.yml
diff --git a/modules/local/lilac/meta.yml b/modules/local/lilac/meta.yml
index ae7eaaf6..5e8e63aa 100644
--- a/modules/local/lilac/meta.yml
+++ b/modules/local/lilac/meta.yml
@@ -15,7 +15,7 @@ input:
type: map
description: |
Groovy Map containing sample information
- e.g. [id: 'sample_id', tumor_id: 'tumor_name', normal_id: 'normal_name']
+ e.g. [id: 'sample_id']
- normal_dna_bam:
type: file
description: Normal DNA BAM file (optional)
@@ -57,12 +57,15 @@ input:
- lilac_resources:
type: directory
description: LILAC resources directory
+ - targeted_mode:
+ type: boolean
+ description: Flag indicating whether targeted mode is set
output:
- meta:
type: map
description: |
Groovy Map containing sample information
- e.g. [id: 'sample_id', tumor_id: 'tumor_name', normal_id: 'normal_name']
+ e.g. [id: 'sample_id']
- lilac_dir:
type: file
description: LILAC output directory
@@ -70,5 +73,8 @@ output:
type: file
description: File containing software versions
pattern: "versions.yml"
+ - command_files:
+ type: list
+ description: List of command files
authors:
- "@scwatts"
diff --git a/modules/local/linx/germline/environment.yml b/modules/local/linx/germline/environment.yml
index 0305a9c6..b5b61d59 100644
--- a/modules/local/linx/germline/environment.yml
+++ b/modules/local/linx/germline/environment.yml
@@ -4,4 +4,4 @@ channels:
- bioconda
- defaults
dependencies:
- - bioconda::hmftools-linx=2.0.2
+ - bioconda::hmftools-linx=2.1
diff --git a/modules/local/linx/germline/main.nf b/modules/local/linx/germline/main.nf
index eb0d1682..c6c65074 100644
--- a/modules/local/linx/germline/main.nf
+++ b/modules/local/linx/germline/main.nf
@@ -4,8 +4,8 @@ process LINX_GERMLINE {
conda "${moduleDir}/environment.yml"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
- 'https://depot.galaxyproject.org/singularity/hmftools-linx:2.0.2--hdfd78af_0' :
- 'biocontainers/hmftools-linx:2.0.2--hdfd78af_0' }"
+ 'https://depot.galaxyproject.org/singularity/hmftools-linx:2.1--hdfd78af_0' :
+ 'biocontainers/hmftools-linx:2.1--hdfd78af_0' }"
input:
tuple val(meta), path(sv_vcf)
@@ -16,6 +16,7 @@ process LINX_GERMLINE {
output:
tuple val(meta), path('linx_germline/'), emit: annotation_dir
path 'versions.yml' , emit: versions
+ path '.command.*' , emit: command_files
when:
task.ext.when == null || task.ext.when
@@ -25,6 +26,8 @@ process LINX_GERMLINE {
def xmx_mod = task.ext.xmx_mod ?: 0.75
+ def log_level_arg = task.ext.log_level ? "-log_level ${task.ext.log_level}" : ''
+
"""
linx \\
-Xmx${Math.round(task.memory.bytes * xmx_mod)} \\
@@ -35,6 +38,7 @@ process LINX_GERMLINE {
-ref_genome_version ${genome_ver} \\
-ensembl_data_dir ${ensembl_data_resources} \\
-driver_gene_panel ${driver_gene_panel} \\
+ ${log_level_arg} \\
-output_dir linx_germline/
cat <<-END_VERSIONS > versions.yml
@@ -46,6 +50,7 @@ process LINX_GERMLINE {
stub:
"""
mkdir linx_germline/
+
touch linx_germline/placeholder
echo -e '${task.process}:\\n stub: noversions\\n' > versions.yml
diff --git a/modules/local/linx/germline/meta.yml b/modules/local/linx/germline/meta.yml
index bf3befa8..1a80a796 100644
--- a/modules/local/linx/germline/meta.yml
+++ b/modules/local/linx/germline/meta.yml
@@ -43,5 +43,8 @@ output:
type: file
description: File containing software versions
pattern: "versions.yml"
+ - command_files:
+ type: list
+ description: List of command files
authors:
- "@scwatts"
diff --git a/modules/local/linx/somatic/environment.yml b/modules/local/linx/somatic/environment.yml
index 331323b5..2b047417 100644
--- a/modules/local/linx/somatic/environment.yml
+++ b/modules/local/linx/somatic/environment.yml
@@ -4,4 +4,4 @@ channels:
- bioconda
- defaults
dependencies:
- - bioconda::hmftools-linx=2.0.2
+ - bioconda::hmftools-linx=2.1
diff --git a/modules/local/linx/somatic/main.nf b/modules/local/linx/somatic/main.nf
index dcc27268..cccfc9c8 100644
--- a/modules/local/linx/somatic/main.nf
+++ b/modules/local/linx/somatic/main.nf
@@ -4,8 +4,8 @@ process LINX_SOMATIC {
conda "${moduleDir}/environment.yml"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
- 'https://depot.galaxyproject.org/singularity/hmftools-linx:2.0.2--hdfd78af_0' :
- 'biocontainers/hmftools-linx:2.0.2--hdfd78af_0' }"
+ 'https://depot.galaxyproject.org/singularity/hmftools-linx:2.1--hdfd78af_0' :
+ 'biocontainers/hmftools-linx:2.1--hdfd78af_0' }"
input:
tuple val(meta), path(purple_dir)
@@ -17,6 +17,7 @@ process LINX_SOMATIC {
output:
tuple val(meta), path('linx_somatic/'), emit: annotation_dir
path 'versions.yml' , emit: versions
+ path '.command.*' , emit: command_files
when:
task.ext.when == null || task.ext.when
@@ -26,6 +27,8 @@ process LINX_SOMATIC {
def xmx_mod = task.ext.xmx_mod ?: 0.75
+ def log_level_arg = task.ext.log_level ? "-log_level ${task.ext.log_level}" : ''
+
"""
linx \\
-Xmx${Math.round(task.memory.bytes * xmx_mod)} \\
@@ -39,6 +42,7 @@ process LINX_SOMATIC {
-driver_gene_panel ${driver_gene_panel} \\
-write_vis_data \\
-write_neo_epitopes \\
+ ${log_level_arg} \\
-output_dir linx_somatic/
cat <<-END_VERSIONS > versions.yml
@@ -50,6 +54,7 @@ process LINX_SOMATIC {
stub:
"""
mkdir linx_somatic/
+
touch linx_somatic/placeholder
echo -e '${task.process}:\\n stub: noversions\\n' > versions.yml
diff --git a/modules/local/linx/somatic/meta.yml b/modules/local/linx/somatic/meta.yml
index d838d2c8..7d760cda 100644
--- a/modules/local/linx/somatic/meta.yml
+++ b/modules/local/linx/somatic/meta.yml
@@ -46,5 +46,8 @@ output:
type: file
description: File containing software versions
pattern: "versions.yml"
+ - command_files:
+ type: list
+ description: List of command files
authors:
- "@scwatts"
diff --git a/modules/local/linx/visualiser/environment.yml b/modules/local/linx/visualiser/environment.yml
index 0dbcd0fe..e9b1bfdd 100644
--- a/modules/local/linx/visualiser/environment.yml
+++ b/modules/local/linx/visualiser/environment.yml
@@ -4,4 +4,4 @@ channels:
- bioconda
- defaults
dependencies:
- - bioconda::hmftools-linx=2.0.2
+ - bioconda::hmftools-linx=2.1
diff --git a/modules/local/linx/visualiser/main.nf b/modules/local/linx/visualiser/main.nf
index df10802a..0c59f47c 100644
--- a/modules/local/linx/visualiser/main.nf
+++ b/modules/local/linx/visualiser/main.nf
@@ -4,8 +4,8 @@ process LINX_VISUALISER {
conda "${moduleDir}/environment.yml"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
- 'https://depot.galaxyproject.org/singularity/hmftools-linx:2.0.2--hdfd78af_0' :
- 'biocontainers/hmftools-linx:2.0.2--hdfd78af_0' }"
+ 'https://depot.galaxyproject.org/singularity/hmftools-linx:2.1--hdfd78af_0' :
+ 'biocontainers/hmftools-linx:2.1--hdfd78af_0' }"
input:
tuple val(meta), path(linx_annotation_dir)
@@ -15,6 +15,7 @@ process LINX_VISUALISER {
output:
tuple val(meta), path('plots/'), emit: plots
path 'versions.yml' , emit: versions
+ path '.command.*' , emit: command_files
when:
task.ext.when == null || task.ext.when
@@ -25,6 +26,8 @@ process LINX_VISUALISER {
def xmx_mod = task.ext.xmx_mod ?: 0.75
+ def log_level_arg = task.ext.log_level ? "-log_level ${task.ext.log_level}" : ''
+
"""
# NOTE(SW): the output plot directories are always required for ORANGE, which is straightfoward to handle with POSIX
# fs but more involved with FusionFS since it will not write empty directories to S3. A placeholder file can't be
@@ -54,6 +57,7 @@ process LINX_VISUALISER {
-ensembl_data_dir ${ensembl_data_resources} \\
-circos \$(which circos) \\
-threads ${task.cpus} \\
+ ${log_level_arg} \\
-plot_out plots/all/ \\
-data_out data/all/
@@ -78,6 +82,7 @@ process LINX_VISUALISER {
-circos \$(which circos) \\
-plot_reportable \\
-threads ${task.cpus} \\
+ ${log_level_arg} \\
-plot_out plots/reportable/ \\
-data_out data/reportable/
@@ -95,6 +100,7 @@ process LINX_VISUALISER {
stub:
"""
mkdir -p plots/{all,reportable}/
+
touch plots/{all,reportable}/placeholder
echo -e '${task.process}:\n stub: noversions\n' > versions.yml
diff --git a/modules/local/linx/visualiser/meta.yml b/modules/local/linx/visualiser/meta.yml
index d48c1fac..abeee967 100644
--- a/modules/local/linx/visualiser/meta.yml
+++ b/modules/local/linx/visualiser/meta.yml
@@ -39,5 +39,8 @@ output:
type: file
description: File containing software versions
pattern: "versions.yml"
+ - command_files:
+ type: list
+ description: List of command files
authors:
- "@scwatts"
diff --git a/modules/local/linxreport/main.nf b/modules/local/linxreport/main.nf
index 9e9c7caf..1cced3e7 100644
--- a/modules/local/linxreport/main.nf
+++ b/modules/local/linxreport/main.nf
@@ -13,6 +13,7 @@ process LINXREPORT {
output:
tuple val(meta), path('*_linx.html'), emit: html
path 'versions.yml' , emit: versions
+ path '.command.*' , emit: command_files
when:
task.ext.when == null || task.ext.when
diff --git a/modules/local/linxreport/meta.yml b/modules/local/linxreport/meta.yml
index 7bf2e0ab..6e3decef 100644
--- a/modules/local/linxreport/meta.yml
+++ b/modules/local/linxreport/meta.yml
@@ -37,5 +37,8 @@ output:
type: file
description: File containing software versions
pattern: "versions.yml"
+ - command_files:
+ type: list
+ description: List of command files
authors:
- "@scwatts"
diff --git a/modules/local/neo/annotate_fusions/environment.yml b/modules/local/neo/annotate_fusions/environment.yml
index 79150b43..d33f2bae 100644
--- a/modules/local/neo/annotate_fusions/environment.yml
+++ b/modules/local/neo/annotate_fusions/environment.yml
@@ -4,4 +4,4 @@ channels:
- bioconda
- defaults
dependencies:
- - bioconda::hmftools-isofox=1.7.1
+ - bioconda::hmftools-isofox=1.7.2
diff --git a/modules/local/neo/annotate_fusions/main.nf b/modules/local/neo/annotate_fusions/main.nf
index ffb2a459..d7c1e858 100644
--- a/modules/local/neo/annotate_fusions/main.nf
+++ b/modules/local/neo/annotate_fusions/main.nf
@@ -4,8 +4,8 @@ process NEO_ANNOTATE_FUSIONS {
conda "${moduleDir}/environment.yml"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
- 'https://depot.galaxyproject.org/singularity/hmftools-isofox:1.7.1--hdfd78af_1' :
- 'biocontainers/hmftools-isofox:1.7.1--hdfd78af_1' }"
+ 'https://depot.galaxyproject.org/singularity/hmftools-isofox:1.7.2--hdfd78af_1' :
+ 'biocontainers/hmftools-isofox:1.7.2--hdfd78af_1' }"
input:
tuple val(meta), path(neo_finder_dir), path(bam), path(bai)
@@ -18,6 +18,7 @@ process NEO_ANNOTATE_FUSIONS {
output:
tuple val(meta), path('*isf.neoepitope.tsv'), emit: annotated_fusions
path 'versions.yml' , emit: versions
+ path '.command.*' , emit: command_files
when:
task.ext.when == null || task.ext.when
@@ -27,6 +28,8 @@ process NEO_ANNOTATE_FUSIONS {
def xmx_mod = task.ext.xmx_mod ?: 0.95
+ def log_level_arg = task.ext.log_level ? "-log_level ${task.ext.log_level}" : ''
+
"""
mkdir -p isofox/
@@ -36,12 +39,13 @@ process NEO_ANNOTATE_FUSIONS {
-sample ${meta.sample_id} \\
-bam_file ${bam} \\
-functions NEO_EPITOPES \\
- -neo_dir ${neo_finder_dir} \\
-read_length ${read_length} \\
+ -neo_dir ${neo_finder_dir} \\
-ref_genome ${genome_fasta} \\
-ref_genome_version ${genome_ver} \\
-ensembl_data_dir ${ensembl_data_resources} \\
-threads ${task.cpus} \\
+ ${log_level_arg} \\
-output_dir ./
cat <<-END_VERSIONS > versions.yml
@@ -53,6 +57,7 @@ process NEO_ANNOTATE_FUSIONS {
stub:
"""
touch ${meta.sample_id}.isf.neoepitope.tsv
+
echo -e '${task.process}:\\n stub: noversions\\n' > versions.yml
"""
}
diff --git a/modules/local/neo/annotate_fusions/meta.yml b/modules/local/neo/annotate_fusions/meta.yml
index f9573815..c0ba191f 100644
--- a/modules/local/neo/annotate_fusions/meta.yml
+++ b/modules/local/neo/annotate_fusions/meta.yml
@@ -58,6 +58,9 @@ output:
type: file
description: File containing software versions
pattern: "versions.yml"
+ - command_files:
+ type: list
+ description: List of command files
authors:
- "@scwatts"
- "@charlesshale"
diff --git a/modules/local/neo/finder/environment.yml b/modules/local/neo/finder/environment.yml
index 2e973cb8..2e92c519 100644
--- a/modules/local/neo/finder/environment.yml
+++ b/modules/local/neo/finder/environment.yml
@@ -4,4 +4,4 @@ channels:
- bioconda
- defaults
dependencies:
- - bioconda::hmftools-neo=1.2
+ - bioconda::hmftools-neo=1.2.1
diff --git a/modules/local/neo/finder/main.nf b/modules/local/neo/finder/main.nf
index 2e46bfd6..af9751c1 100644
--- a/modules/local/neo/finder/main.nf
+++ b/modules/local/neo/finder/main.nf
@@ -4,8 +4,8 @@ process NEO_FINDER {
conda "${moduleDir}/environment.yml"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
- 'https://depot.galaxyproject.org/singularity/hmftools-neo:1.2--hdfd78af_1' :
- 'biocontainers/hmftools-neo:1.2--hdfd78af_1' }"
+ 'https://depot.galaxyproject.org/singularity/hmftools-neo:1.2.1--hdfd78af_0' :
+ 'biocontainers/hmftools-neo:1.2.1--hdfd78af_0' }"
input:
tuple val(meta), path(purple_dir), path(linx_annotation_dir)
@@ -17,6 +17,7 @@ process NEO_FINDER {
output:
tuple val(meta), path('neo_finder/'), emit: neo_finder_dir
path 'versions.yml' , emit: versions
+ path '.command.*' , emit: command_files
when:
task.ext.when == null || task.ext.when
@@ -26,6 +27,8 @@ process NEO_FINDER {
def xmx_mod = task.ext.xmx_mod ?: 0.95
+ def log_level_arg = task.ext.log_level ? "-log_level ${task.ext.log_level}" : ''
+
"""
mkdir -p neo_finder/
@@ -38,7 +41,7 @@ process NEO_FINDER {
-ref_genome ${genome_fasta} \\
-ref_genome_version ${genome_ver} \\
-ensembl_data_dir ${ensembl_data_resources} \\
- -log_debug \\
+ ${log_level_arg} \\
-output_dir neo_finder/
cat <<-END_VERSIONS > versions.yml
@@ -50,6 +53,7 @@ process NEO_FINDER {
stub:
"""
mkdir -p neo_finder/
+
echo -e '${task.process}:\\n stub: noversions\\n' > versions.yml
"""
}
diff --git a/modules/local/neo/finder/meta.yml b/modules/local/neo/finder/meta.yml
index 01dc2fcb..9171f4dd 100644
--- a/modules/local/neo/finder/meta.yml
+++ b/modules/local/neo/finder/meta.yml
@@ -47,6 +47,9 @@ output:
type: file
description: File containing software versions
pattern: "versions.yml"
+ - command_files:
+ type: list
+ description: List of command files
authors:
- "@scwatts"
- "@charlesshale"
diff --git a/modules/local/neo/scorer/environment.yml b/modules/local/neo/scorer/environment.yml
index 284c1335..f8a309f8 100644
--- a/modules/local/neo/scorer/environment.yml
+++ b/modules/local/neo/scorer/environment.yml
@@ -4,4 +4,4 @@ channels:
- bioconda
- defaults
dependencies:
- - bioconda::hmftools-neo=1.2
+ - bioconda::hmftools-neo=1.2.1
diff --git a/modules/local/neo/scorer/main.nf b/modules/local/neo/scorer/main.nf
index c8367255..f3a817d0 100644
--- a/modules/local/neo/scorer/main.nf
+++ b/modules/local/neo/scorer/main.nf
@@ -4,8 +4,8 @@ process NEO_SCORER {
conda "${moduleDir}/environment.yml"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
- 'https://depot.galaxyproject.org/singularity/hmftools-neo:1.2--hdfd78af_1' :
- 'biocontainers/hmftools-neo:1.2--hdfd78af_1' }"
+ 'https://depot.galaxyproject.org/singularity/hmftools-neo:1.2.1--hdfd78af_0' :
+ 'biocontainers/hmftools-neo:1.2.1--hdfd78af_0' }"
input:
tuple val(meta), path(isofox_dir), path(purple_dir), path(sage_vcf), path(lilac_dir), path(neo_finder_dir), path(annotated_fusions)
@@ -16,6 +16,7 @@ process NEO_SCORER {
output:
tuple val(meta), path('neo_scorer/'), emit: neo_scorer_dir
path 'versions.yml' , emit: versions
+ path '.command.*' , emit: command_files
when:
task.ext.when == null || task.ext.when
@@ -25,6 +26,8 @@ process NEO_SCORER {
def xmx_mod = task.ext.xmx_mod ?: 0.95
+ def log_level_arg = task.ext.log_level ? "-log_level ${task.ext.log_level}" : ''
+
def rna_sample_arg = meta.containsKey('sample_rna_id') ? "-rna_sample ${meta.sample_rna_id}" : ''
def rna_somatic_vcf_arg = meta.containsKey('sample_rna_id') ? "-rna_somatic_vcf ${sage_vcf}" : ''
@@ -49,16 +52,16 @@ process NEO_SCORER {
${args} \\
-sample ${meta.sample_id} \\
${cancer_type_arg} \\
+ -purple_dir ${purple_dir} \\
${rna_sample_arg} \\
\${isofox_dir_arg} \\
- -purple_dir ${purple_dir} \\
${rna_somatic_vcf_arg} \\
-lilac_dir ${lilac_dir} \\
-neo_dir ${neo_finder_dir} \\
-ensembl_data_dir ${ensembl_data_resources} \\
-score_file_dir ${neo_resources} \\
-cancer_tpm_medians_file ${cohort_tpm_medians} \\
- -log_debug \\
+ ${log_level_arg} \\
-output_dir neo_scorer/
cat <<-END_VERSIONS > versions.yml
@@ -70,6 +73,7 @@ process NEO_SCORER {
stub:
"""
mkdir -p neo_scorer/
+
echo -e '${task.process}:\\n stub: noversions\\n' > versions.yml
"""
}
diff --git a/modules/local/neo/scorer/meta.yml b/modules/local/neo/scorer/meta.yml
index c870da4d..5ce8f853 100644
--- a/modules/local/neo/scorer/meta.yml
+++ b/modules/local/neo/scorer/meta.yml
@@ -57,6 +57,9 @@ output:
type: file
description: File containing software versions
pattern: "versions.yml"
+ - command_files:
+ type: list
+ description: List of command files
authors:
- "@scwatts"
- "@charlesshale"
diff --git a/modules/local/orange/environment.yml b/modules/local/orange/environment.yml
index dc3f3020..45b29182 100644
--- a/modules/local/orange/environment.yml
+++ b/modules/local/orange/environment.yml
@@ -4,4 +4,4 @@ channels:
- bioconda
- defaults
dependencies:
- - bioconda::hmftools-orange=3.8.1
+ - bioconda::hmftools-orange=4.1
diff --git a/modules/local/orange/main.nf b/modules/local/orange/main.nf
index 708dd3ce..a2539487 100644
--- a/modules/local/orange/main.nf
+++ b/modules/local/orange/main.nf
@@ -4,8 +4,8 @@ process ORANGE {
conda "${moduleDir}/environment.yml"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
- 'https://depot.galaxyproject.org/singularity/hmftools-orange:3.8.1--hdfd78af_0' :
- 'biocontainers/hmftools-orange:3.8.1--hdfd78af_0' }"
+ 'https://depot.galaxyproject.org/singularity/hmftools-orange:4.1--hdfd78af_0' :
+ 'biocontainers/hmftools-orange:4.1--hdfd78af_0' }"
input:
tuple val(meta),
@@ -42,6 +42,7 @@ process ORANGE {
tuple val(meta), path('output/*.orange.pdf') , emit: pdf, optional: true
tuple val(meta), path('output/*.orange.json'), emit: json, optional: true
path 'versions.yml' , emit: versions
+ path '.command.*' , emit: command_files
when:
task.ext.when == null || task.ext.when
@@ -51,6 +52,8 @@ process ORANGE {
def xmx_mod = task.ext.xmx_mod ?: 0.95
+ def log_level_arg = task.ext.log_level ? "-log_level ${task.ext.log_level}" : ''
+
def pipeline_version_str = pipeline_version ?: 'not specified'
def run_mode = Utils.getEnumFromString(params.mode, Constants.RunMode);
@@ -162,6 +165,7 @@ process ORANGE {
-ensembl_data_dir ${ensembl_data_resources} \\
${isofox_gene_distribution_arg} \\
${isofox_alt_sj_arg} \\
+ ${log_level_arg} \\
-output_dir output/
cat <<-END_VERSIONS > versions.yml
@@ -173,6 +177,7 @@ process ORANGE {
stub:
"""
mkdir -p output/
+
touch output/${meta.tumor_id}.orange.json
touch output/${meta.tumor_id}.orange.pdf
diff --git a/modules/local/orange/meta.yml b/modules/local/orange/meta.yml
index c4e45850..33b038de 100644
--- a/modules/local/orange/meta.yml
+++ b/modules/local/orange/meta.yml
@@ -13,7 +13,7 @@ input:
type: map
description: |
Groovy Map containing sample information
- e.g. [id: 'sample_id', tumor_id: 'tumor_name', normal_id: 'normal_name']
+ e.g. [id: 'sample_id']
- bamtools_somatic_dir:
type: directory
description: BamTools somatic output directory
@@ -124,5 +124,8 @@ output:
type: file
description: File containing software versions
pattern: "versions.yml"
+ - command_files:
+ type: list
+ description: List of command files
authors:
- "@scwatts"
diff --git a/modules/local/pave/germline/environment.yml b/modules/local/pave/germline/environment.yml
index 946ac0e8..6bd33a54 100644
--- a/modules/local/pave/germline/environment.yml
+++ b/modules/local/pave/germline/environment.yml
@@ -4,4 +4,4 @@ channels:
- bioconda
- defaults
dependencies:
- - bioconda::hmftools-pave=1.7.1
+ - bioconda::hmftools-pave=1.8
diff --git a/modules/local/pave/germline/main.nf b/modules/local/pave/germline/main.nf
index e8baf26d..054922f5 100644
--- a/modules/local/pave/germline/main.nf
+++ b/modules/local/pave/germline/main.nf
@@ -1,15 +1,11 @@
-// NOTE(SW): use of tumor sample name here is consistent with Pipeline5
-// - https://github.com/hartwigmedical/pipeline5/blob/v5.33/cluster/src/main/java/com/hartwig/pipeline/tertiary/pave/PaveGermline.java#L36-L41
-// - https://github.com/hartwigmedical/pipeline5/blob/v5.33/cluster/src/main/java/com/hartwig/pipeline/tertiary/pave/PaveArguments.java#L31-L43
-
process PAVE_GERMLINE {
tag "${meta.id}"
label 'process_medium'
conda "${moduleDir}/environment.yml"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
- 'https://depot.galaxyproject.org/singularity/hmftools-pave:1.7.1--hdfd78af_0' :
- 'biocontainers/hmftools-pave:1.7.1--hdfd78af_0' }"
+ 'https://depot.galaxyproject.org/singularity/hmftools-pave:1.8--hdfd78af_1' :
+ 'biocontainers/hmftools-pave:1.8--hdfd78af_1' }"
input:
tuple val(meta), path(sage_vcf), path(sage_tbi)
@@ -22,12 +18,12 @@ process PAVE_GERMLINE {
path segment_mappability
path driver_gene_panel
path ensembl_data_resources
- path gnomad_resource
output:
- tuple val(meta), path("*.vcf.gz") , emit: vcf
- tuple val(meta), path("*.vcf.gz.tbi"), emit: index
+ tuple val(meta), path('*.vcf.gz') , emit: vcf
+ tuple val(meta), path('*.vcf.gz.tbi'), emit: index
path 'versions.yml' , emit: versions
+ path '.command.*' , emit: command_files
when:
task.ext.when == null || task.ext.when
@@ -35,23 +31,15 @@ process PAVE_GERMLINE {
script:
def args = task.ext.args ?: ''
- def xmx_mod = task.ext.xmx_mod ?: 0.75
-
- def gnomad_args
- if (genome_ver.toString() == '37') {
- gnomad_args = "-gnomad_freq_file ${gnomad_resource}"
- } else if (genome_ver.toString() == '38') {
- gnomad_args = "-gnomad_freq_dir ${gnomad_resource}"
- } else {
- error "got bad genome version: ${genome_ver}"
- }
+ def log_level_arg = task.ext.log_level ? "-log_level ${task.ext.log_level}" : ''
"""
pave \\
- -Xmx${Math.round(task.memory.bytes * xmx_mod)} \\
+ -Xmx${Math.round(task.memory.bytes * 0.95)} \\
${args} \\
-sample ${meta.sample_id} \\
- -vcf_file ${sage_vcf} \\
+ -input_vcf ${sage_vcf} \\
+ -output_vcf ${meta.sample_id}.pave.germline.vcf.gz \\
-ref_genome ${genome_fasta} \\
-ref_genome_version ${genome_ver} \\
-clinvar_vcf ${clinvar_annotations} \\
@@ -60,21 +48,20 @@ process PAVE_GERMLINE {
-ensembl_data_dir ${ensembl_data_resources} \\
-blacklist_bed ${sage_blocklist_regions} \\
-blacklist_vcf ${sage_blocklist_sites} \\
- ${gnomad_args} \\
-gnomad_no_filter \\
- -read_pass_only \\
-threads ${task.cpus} \\
+ ${log_level_arg} \\
-output_dir ./
cat <<-END_VERSIONS > versions.yml
"${task.process}":
- pave: \$(pave -version | sed -n '/^Pave version / { s/^.* //p }')
+ pave: \$(pave -version | sed 's/^.* //')
END_VERSIONS
"""
stub:
"""
- touch ${meta.sample_id}.sage.pave_germline.vcf.gz{,.tbi}
+ touch ${meta.sample_id}.pave.germline.vcf.gz{,.tbi}
echo -e '${task.process}:\\n stub: noversions\\n' > versions.yml
"""
diff --git a/modules/local/pave/germline/meta.yml b/modules/local/pave/germline/meta.yml
index 5d209cec..2981f457 100644
--- a/modules/local/pave/germline/meta.yml
+++ b/modules/local/pave/germline/meta.yml
@@ -60,8 +60,6 @@ input:
- ensembl_data_resources:
type: directory
description: HMF ensembl data resources directory
- - gnomad_resource:
- description: gnomAD resource
output:
- meta:
type: map
@@ -80,5 +78,8 @@ output:
type: file
description: File containing software versions
pattern: "versions.yml"
+ - command_files:
+ type: list
+ description: List of command files
authors:
- "@scwatts"
diff --git a/modules/local/esvee/assemble/environment.yml b/modules/local/pave/pon_creation/environment.yml
similarity index 54%
rename from modules/local/esvee/assemble/environment.yml
rename to modules/local/pave/pon_creation/environment.yml
index 96a1ad40..c92d09fa 100644
--- a/modules/local/esvee/assemble/environment.yml
+++ b/modules/local/pave/pon_creation/environment.yml
@@ -1,7 +1,7 @@
-name: esvee_assemble
+name: pave_pon_creation
channels:
- conda-forge
- bioconda
- defaults
dependencies:
- - bioconda::hmftools-esvee=1.0.3
+ - bioconda::hmftools-pave=1.8
diff --git a/modules/local/pave/pon_creation/main.nf b/modules/local/pave/pon_creation/main.nf
new file mode 100644
index 00000000..def1768b
--- /dev/null
+++ b/modules/local/pave/pon_creation/main.nf
@@ -0,0 +1,54 @@
+process PAVE_PON_PANEL_CREATION {
+ label 'process_medium'
+
+ conda "${moduleDir}/environment.yml"
+ container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+ 'https://depot.galaxyproject.org/singularity/hmftools-pave:1.8--hdfd78af_1' :
+ 'biocontainers/hmftools-pave:1.8--hdfd78af_1' }"
+
+ input:
+ tuple path(sage_vcf), path(sage_tbi)
+ val genome_ver
+
+ output:
+ path 'pave.somatic_artefacts.*.tsv', emit: pave_artefacts
+ path 'versions.yml' , emit: versions
+ path '.command.*' , emit: command_files
+
+ when:
+ task.ext.when == null || task.ext.when
+
+ script:
+ def args = task.ext.args ?: ''
+
+ def log_level_arg = task.ext.log_level ? "-log_level ${task.ext.log_level}" : ''
+
+ """
+ (
+ echo SampleId
+ basename -s .sage.somatic.vcf.gz -a *.sage.somatic.vcf.gz
+ ) > sample_ids.txt
+
+ pave \\
+ -Xmx${Math.round(task.memory.bytes * 0.95)} \\
+ com.hartwig.hmftools.pave.pon_gen.PonBuilder \\
+ ${args} \\
+ -sample_id_file sample_ids.txt \\
+ -vcf_path '*.sage.somatic.vcf.gz' \\
+ -ref_genome_version ${genome_ver} \\
+ ${log_level_arg} \\
+ -output_pon_file pave.somatic_artefacts.${genome_ver}.tsv
+
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ pave: \$(pave -version | sed 's/^.* //')
+ END_VERSIONS
+ """
+
+ stub:
+ """
+ touch pave.somatic_artefacts.${genome_ver}.tsv
+
+ echo -e '${task.process}:\\n stub: noversions\\n' > versions.yml
+ """
+}
diff --git a/modules/local/pave/pon_creation/meta.yml b/modules/local/pave/pon_creation/meta.yml
new file mode 100644
index 00000000..c3cd28c6
--- /dev/null
+++ b/modules/local/pave/pon_creation/meta.yml
@@ -0,0 +1,41 @@
+name: pave_pon_creation
+description: Annotate small variant VCF with gene, transcript coding and protein effects
+keywords:
+ - pave
+ - annotation
+ - gene
+ - transcript
+ - protein
+ - vcf
+tools:
+ - pave:
+ description: Annotates small variant VCF with gene, transcript coding and protein effects.
+ homepage: https://github.com/hartwigmedical/hmftools/tree/master/pave
+ documentation: https://github.com/hartwigmedical/hmftools/tree/master/pave
+ licence: ["GPL v3"]
+input:
+ - sage_vcf:
+ type: file
+ description: SAGE VCF file
+ pattern: "*.{vcf.gz}"
+ - sage_tbi:
+ type: file
+ description: SAGE VCF index file
+ pattern: "*.{tbi}"
+ - genome_ver:
+ type: string
+ description: Reference genome version
+output:
+ - pave_artefacts:
+ type: file
+ description: PAVE artefacts file
+ pattern: "*.{tsv}"
+ - versions:
+ type: file
+ description: File containing software versions
+ pattern: "versions.yml"
+ - command_files:
+ type: list
+ description: List of command files
+authors:
+ - "@scwatts"
diff --git a/modules/local/pave/somatic/environment.yml b/modules/local/pave/somatic/environment.yml
index 7fa3bb0b..736cc619 100644
--- a/modules/local/pave/somatic/environment.yml
+++ b/modules/local/pave/somatic/environment.yml
@@ -4,4 +4,4 @@ channels:
- bioconda
- defaults
dependencies:
- - bioconda::hmftools-pave=1.7.1
+ - bioconda::hmftools-pave=1.8
diff --git a/modules/local/pave/somatic/main.nf b/modules/local/pave/somatic/main.nf
index 7298e39a..46c59a33 100644
--- a/modules/local/pave/somatic/main.nf
+++ b/modules/local/pave/somatic/main.nf
@@ -4,16 +4,16 @@ process PAVE_SOMATIC {
conda "${moduleDir}/environment.yml"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
- 'https://depot.galaxyproject.org/singularity/hmftools-pave:1.7.1--hdfd78af_0' :
- 'biocontainers/hmftools-pave:1.7.1--hdfd78af_0' }"
+ 'https://depot.galaxyproject.org/singularity/hmftools-pave:1.8--hdfd78af_1' :
+ 'biocontainers/hmftools-pave:1.8--hdfd78af_1' }"
input:
tuple val(meta), path(sage_vcf), path(sage_tbi)
path genome_fasta
val genome_ver
path genome_fai
- path sage_pon
path pon_artefacts
+ path sage_pon
path clinvar_annotations
path segment_mappability
path driver_gene_panel
@@ -21,9 +21,10 @@ process PAVE_SOMATIC {
path gnomad_resource
output:
- tuple val(meta), path("*.vcf.gz") , emit: vcf
- tuple val(meta), path("*.vcf.gz.tbi"), emit: index
+ tuple val(meta), path('*.vcf.gz') , emit: vcf
+ tuple val(meta), path('*.vcf.gz.tbi'), emit: index
path 'versions.yml' , emit: versions
+ path '.command.*' , emit: command_files
when:
task.ext.when == null || task.ext.when
@@ -33,13 +34,12 @@ process PAVE_SOMATIC {
def xmx_mod = task.ext.xmx_mod ?: 0.75
- def pon_filters
+ def log_level_arg = task.ext.log_level ? "-log_level ${task.ext.log_level}" : ''
+
def gnomad_args
if (genome_ver.toString() == '37') {
- pon_filters = 'HOTSPOT:10:5;PANEL:6:5;UNKNOWN:6:0'
gnomad_args = "-gnomad_freq_file ${gnomad_resource}"
} else if (genome_ver.toString() == '38') {
- pon_filters = 'HOTSPOT:6:5;PANEL:3:3;UNKNOWN:3:0'
gnomad_args = "-gnomad_freq_dir ${gnomad_resource}"
} else {
error "got bad genome version: ${genome_ver}"
@@ -53,19 +53,19 @@ process PAVE_SOMATIC {
-Xmx${Math.round(task.memory.bytes * xmx_mod)} \\
${args} \\
-sample ${meta.sample_id} \\
- -vcf_file ${sage_vcf} \\
+ -input_vcf ${sage_vcf} \\
+ -output_vcf ${meta.sample_id}.pave.somatic.vcf.gz \\
-ref_genome ${genome_fasta} \\
-ref_genome_version ${genome_ver} \\
- -pon_file ${sage_pon} \\
- -pon_filters "${pon_filters}" \\
${pon_artefact_arg} \\
+ -pon_file ${sage_pon} \\
+ ${gnomad_args} \\
-clinvar_vcf ${clinvar_annotations} \\
-driver_gene_panel ${driver_gene_panel} \\
-mappability_bed ${segment_mappability} \\
-ensembl_data_dir ${ensembl_data_resources} \\
- ${gnomad_args} \\
- -read_pass_only \\
-threads ${task.cpus} \\
+ ${log_level_arg} \\
-output_dir ./
cat <<-END_VERSIONS > versions.yml
@@ -76,7 +76,7 @@ process PAVE_SOMATIC {
stub:
"""
- touch ${meta.sample_id}.sage.pave_somatic.vcf.gz{,.tbi}
+ touch ${meta.sample_id}.pave.somatic.vcf.gz{,.tbi}
echo -e '${task.process}:\\n stub: noversions\\n' > versions.yml
"""
diff --git a/modules/local/pave/somatic/meta.yml b/modules/local/pave/somatic/meta.yml
index af814d51..8734eb2f 100644
--- a/modules/local/pave/somatic/meta.yml
+++ b/modules/local/pave/somatic/meta.yml
@@ -38,13 +38,13 @@ input:
type: file
description: Reference genome assembly fai file
pattern: "*.{fai}"
+ - pon_artefacts:
+ type: file
+ description: Taregeted sequencing PON artefacts file (optional)
- sage_pon:
type: file
description: SAGE PON file
pattern: "*.{tsv.gz}"
- - pon_artefacts:
- type: file
- description: Taregeted sequencing PON artefacts file (optional)
- clinvar_annotations:
type: file
description: ClinVar annotations VCF file
@@ -80,5 +80,8 @@ output:
type: file
description: File containing software versions
pattern: "versions.yml"
+ - command_files:
+ type: list
+ description: List of command files
authors:
- "@scwatts"
diff --git a/modules/local/peach/main.nf b/modules/local/peach/main.nf
index 6497803f..4bcb0047 100644
--- a/modules/local/peach/main.nf
+++ b/modules/local/peach/main.nf
@@ -16,6 +16,7 @@ process PEACH {
output:
tuple val(meta), path('peach/'), emit: peach_dir
path 'versions.yml' , emit: versions
+ path '.command.*' , emit: command_files
when:
task.ext.when == null || task.ext.when
@@ -25,6 +26,8 @@ process PEACH {
def xmx_mod = task.ext.xmx_mod ?: 0.75
+ def log_level_arg = task.ext.log_level ? "-log_level ${task.ext.log_level}" : ''
+
"""
peach \\
-Xmx${Math.round(task.memory.bytes * xmx_mod)} \\
@@ -34,8 +37,10 @@ process PEACH {
-haplotypes_file ${haplotypes} \\
-function_file ${haplotype_functions} \\
-drugs_file ${drug_info} \\
+ ${log_level_arg} \\
-output_dir peach/
+
cat <<-END_VERSIONS > versions.yml
"${task.process}":
peach: \$(peach -version | sed -n '/Peach version/ { s/^.* //p }')
diff --git a/modules/local/peach/meta.yml b/modules/local/peach/meta.yml
index 8bb98e2f..8367bf48 100644
--- a/modules/local/peach/meta.yml
+++ b/modules/local/peach/meta.yml
@@ -15,7 +15,7 @@ input:
type: map
description: |
Groovy Map containing sample information
- e.g. [id: 'sample_id', tumor_id: 'tumor_name', normal_id: 'normal_name']
+ e.g. [id: 'sample_id']
- germline_vcf:
type: file
description: PURPLE germline small variant VCF file
@@ -37,7 +37,7 @@ output:
type: map
description: |
Groovy Map containing sample information
- e.g. [id: 'sample_id', tumor_id: 'tumor_name', normal_id: 'normal_name']
+ e.g. [id: 'sample_id']
- peach_dir:
type: directory
description: PEACH output directory
@@ -45,5 +45,8 @@ output:
type: file
description: File containing software versions
pattern: "versions.yml"
+ - command_files:
+ type: list
+ description: List of command files
authors:
- "@scwatts"
diff --git a/modules/local/purple/environment.yml b/modules/local/purple/environment.yml
index fb18f1e6..cf79e7a6 100644
--- a/modules/local/purple/environment.yml
+++ b/modules/local/purple/environment.yml
@@ -4,4 +4,4 @@ channels:
- bioconda
- defaults
dependencies:
- - bioconda::hmftools-purple=4.1
+ - bioconda::hmftools-purple=4.2
diff --git a/modules/local/purple/main.nf b/modules/local/purple/main.nf
index 79460730..1ee95b5b 100644
--- a/modules/local/purple/main.nf
+++ b/modules/local/purple/main.nf
@@ -4,8 +4,8 @@ process PURPLE {
conda "${moduleDir}/environment.yml"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
- 'https://depot.galaxyproject.org/singularity/hmftools-purple:4.1--hdfd78af_0' :
- 'biocontainers/hmftools-purple:4.1--hdfd78af_0' }"
+ 'https://depot.galaxyproject.org/singularity/hmftools-purple:4.2--hdfd78af_0' :
+ 'biocontainers/hmftools-purple:4.2--hdfd78af_0' }"
input:
tuple val(meta), path(amber_dir), path(cobalt_dir), path(sv_tumor_vcf), path(sv_tumor_tbi), path(sv_normal_vcf), path(sv_normal_tbi), path(smlv_tumor_vcf), path(smlv_normal_vcf)
@@ -26,6 +26,7 @@ process PURPLE {
output:
tuple val(meta), path('purple/'), emit: purple_dir
path 'versions.yml' , emit: versions
+ path '.command.*' , emit: command_files
when:
task.ext.when == null || task.ext.when
@@ -35,6 +36,8 @@ process PURPLE {
def xmx_mod = task.ext.xmx_mod ?: 0.75
+ def log_level_arg = task.ext.log_level ? "-log_level ${task.ext.log_level}" : ''
+
def reference_arg = meta.containsKey('normal_id') ? "-reference ${meta.normal_id}" : ''
def sv_tumor_vcf_arg = sv_tumor_vcf ? "-somatic_sv_vcf ${sv_tumor_vcf}" : ''
@@ -75,6 +78,7 @@ process PURPLE {
-gc_profile ${gc_profile} \\
-circos \$(which circos) \\
-threads ${task.cpus} \\
+ ${log_level_arg} \\
-output_dir purple/
cat <<-END_VERSIONS > versions.yml
@@ -86,6 +90,7 @@ process PURPLE {
stub:
"""
mkdir purple/
+
touch purple/${meta.tumor_id}.purple.cnv.gene.tsv
touch purple/${meta.tumor_id}.purple.cnv.somatic.tsv
touch purple/${meta.tumor_id}.purple.driver.catalog.germline.tsv
diff --git a/modules/local/purple/meta.yml b/modules/local/purple/meta.yml
index 075721e1..c8602339 100644
--- a/modules/local/purple/meta.yml
+++ b/modules/local/purple/meta.yml
@@ -17,7 +17,7 @@ input:
type: map
description: |
Groovy Map containing sample information
- e.g. [id: 'sample_id', tumor_id: 'tumor_name', normal_id: 'normal_name']
+ e.g. [id: 'sample_id']
- amber_dir:
type: directory
description: AMBER output directory
@@ -103,7 +103,7 @@ output:
type: map
description: |
Groovy Map containing sample information
- e.g. [id: 'sample_id', tumor_id: 'tumor_name', normal_id: 'normal_name']
+ e.g. [id: 'sample_id']
- purple_dir:
type: directory
description: PURPLE output directory
@@ -111,5 +111,8 @@ output:
type: file
description: File containing software versions
pattern: "versions.yml"
+ - command_files:
+ type: list
+ description: List of command files
authors:
- "@scwatts"
diff --git a/modules/local/redux/environment.yml b/modules/local/redux/environment.yml
index 81143c4f..4a631aa6 100644
--- a/modules/local/redux/environment.yml
+++ b/modules/local/redux/environment.yml
@@ -4,4 +4,4 @@ channels:
- bioconda
- defaults
dependencies:
- - bioconda::hmftools-redux=1.1.2
+ - bioconda::hmftools-redux=1.2
diff --git a/modules/local/redux/main.nf b/modules/local/redux/main.nf
index 12123ccb..cc342e2d 100644
--- a/modules/local/redux/main.nf
+++ b/modules/local/redux/main.nf
@@ -4,8 +4,8 @@ process REDUX {
conda "${moduleDir}/environment.yml"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
- 'https://depot.galaxyproject.org/singularity/hmftools-redux:1.1.2--hdfd78af_0' :
- 'biocontainers/hmftools-redux:1.1.2--hdfd78af_0' }"
+ 'https://depot.galaxyproject.org/singularity/hmftools-redux:1.2--hdfd78af_0' :
+ 'biocontainers/hmftools-redux:1.2--hdfd78af_0' }"
input:
tuple val(meta), path(bams), path(bais)
@@ -24,6 +24,7 @@ process REDUX {
tuple val(meta), path('*.jitter_params.tsv') , emit: jitter_tsv
tuple val(meta), path('*.ms_table.tsv.gz') , emit: ms_tsv
path 'versions.yml' , emit: versions
+ path '.command.*' , emit: command_files
when:
task.ext.when == null || task.ext.when
@@ -33,6 +34,8 @@ process REDUX {
def xmx_mod = task.ext.xmx_mod ?: 0.95
+ def log_level_arg = task.ext.log_level ? "-log_level ${task.ext.log_level}" : ''
+
def form_consensus_arg = umi_enable ? '' : '-form_consensus'
def umi_args_list = []
@@ -45,19 +48,19 @@ process REDUX {
-Xmx${Math.round(task.memory.bytes * xmx_mod)} \\
${args} \\
-sample ${meta.sample_id} \\
+ ${form_consensus_arg} \\
+ ${umi_args} \\
-input_bam ${bams.join(',')} \\
- -output_dir ./ \\
-output_bam ./${meta.sample_id}.redux.bam \\
-ref_genome ${genome_fasta} \\
-ref_genome_version ${genome_ver} \\
- -unmap_regions ${unmap_regions} \\
-ref_genome_msi_file ${msi_jitter_sites} \\
+ -unmap_regions ${unmap_regions} \\
-bamtool \$(which samtools) \\
- ${form_consensus_arg} \\
- ${umi_args} \\
-write_stats \\
-threads ${task.cpus} \\
- -log_level DEBUG
+ ${log_level_arg} \\
+ -output_dir ./
cat <<-END_VERSIONS > versions.yml
"${task.process}":
diff --git a/modules/local/redux/meta.yml b/modules/local/redux/meta.yml
index 062634cc..44f137e9 100644
--- a/modules/local/redux/meta.yml
+++ b/modules/local/redux/meta.yml
@@ -14,7 +14,7 @@ input:
type: map
description: |
Groovy Map containing sample information
- e.g. [id: 'sample_id', tumor_id: 'tumor_name', normal_id: 'normal_name']
+ e.g. [id: 'sample_id']
- bams:
type: list
description: List BAM files
@@ -55,7 +55,7 @@ output:
type: map
description: |
Groovy Map containing sample information
- e.g. [id: 'sample_id', tumor_id: 'tumor_name', normal_id: 'normal_name']
+ e.g. [id: 'sample_id']
- bam:
type: list
description: BAM and BAI file
@@ -76,6 +76,9 @@ output:
type: file
description: File containing software versions
pattern: "versions.yml"
+ - command_files:
+ type: list
+ description: List of command files
authors:
- "@scwatts"
- "@mkcmkc"
diff --git a/modules/local/sage/append/environment.yml b/modules/local/sage/append/environment.yml
index afd7f6ad..59e9d98a 100644
--- a/modules/local/sage/append/environment.yml
+++ b/modules/local/sage/append/environment.yml
@@ -4,4 +4,4 @@ channels:
- bioconda
- defaults
dependencies:
- - bioconda::hmftools-sage=4.0
+ - bioconda::hmftools-sage=4.1
diff --git a/modules/local/sage/append/main.nf b/modules/local/sage/append/main.nf
index 5062c564..f0a9c799 100644
--- a/modules/local/sage/append/main.nf
+++ b/modules/local/sage/append/main.nf
@@ -4,19 +4,21 @@ process SAGE_APPEND {
conda "${moduleDir}/environment.yml"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
- 'https://depot.galaxyproject.org/singularity/hmftools-sage:4.0--hdfd78af_0' :
- 'biocontainers/hmftools-sage:4.0--hdfd78af_0' }"
+ 'https://depot.galaxyproject.org/singularity/hmftools-sage:4.1--hdfd78af_0' :
+ 'biocontainers/hmftools-sage:4.1--hdfd78af_0' }"
input:
- tuple val(meta), path(vcf), path(bam), path(bai)
+ tuple val(meta), path(vcf), path(bams), path(bais), path(redux_tsvs)
path genome_fasta
val genome_ver
path genome_fai
path genome_dict
+ val targeted_mode
output:
- tuple val(meta), path('*.append.vcf.gz'), emit: vcf
- path 'versions.yml' , emit: versions
+ tuple val(meta), path('sage_append'), emit: sage_append_dir
+ path 'versions.yml' , emit: versions
+ path '.command.*' , emit: command_files
when:
task.ext.when == null || task.ext.when
@@ -26,24 +28,30 @@ process SAGE_APPEND {
def xmx_mod = task.ext.xmx_mod ?: 0.75
- def run_mode = Utils.getEnumFromString(params.mode, Constants.RunMode)
- def high_depth_mode_arg = (run_mode === Constants.RunMode.TARGETED) ? '-high_depth_mode' : ''
+ def log_level_arg = task.ext.log_level ? "-log_level ${task.ext.log_level}" : ''
+
+ def skip_msi_jitter_arg = !redux_tsvs ? '-skip_msi_jitter' : ''
+ def high_depth_mode_arg = targeted_mode ? '-high_depth_mode' : ''
"""
+ mkdir -p sage_append/
+
sage \\
-Xmx${Math.round(task.memory.bytes * xmx_mod)} \\
com.hartwig.hmftools.sage.append.SageAppendApplication \\
${args} \\
-input_vcf ${vcf} \\
- -reference ${meta.tumor_rna_id} \\
- -reference_bam ${bam} \\
+ -max_read_depth 100000 \\
+ -reference ${meta.reference_ids.join(',')} \\
+ -reference_bam ${bams.join(',')} \\
-ref_genome ${genome_fasta} \\
-ref_genome_version ${genome_ver} \\
- -skip_msi_jitter \\
-write_frag_lengths \\
${high_depth_mode_arg} \\
+ ${skip_msi_jitter_arg} \\
-threads ${task.cpus} \\
- -output_vcf ${meta.dna_id}.sage.append.vcf.gz
+ ${log_level_arg} \\
+ -output_vcf sage_append/${meta.output_file_id}.sage.append.vcf.gz
cat <<-END_VERSIONS > versions.yml
"${task.process}":
@@ -53,7 +61,12 @@ process SAGE_APPEND {
stub:
"""
- touch "${meta.dna_id}.sage.append.vcf.gz"
+ mkdir -p sage_append/
+
+ touch sage_append/${meta.output_file_id}.frag_lengths.tsv.gz
+ touch sage_append/${meta.output_file_id}.sage.append.vcf.gz
+ touch sage_append/${meta.output_file_id}.sage.append.vcf.gz.tbi
+ touch sage_append/${meta.output_file_id}_query.sage.bqr.tsv
echo -e '${task.process}:\\n stub: noversions\\n' > versions.yml
"""
diff --git a/modules/local/sage/append/meta.yml b/modules/local/sage/append/meta.yml
index 970d2539..b5a41e42 100644
--- a/modules/local/sage/append/meta.yml
+++ b/modules/local/sage/append/meta.yml
@@ -15,19 +15,20 @@ input:
type: map
description: |
Groovy Map containing sample informatio
- e.g. [id: 'sample_id', append_id: 'sample_id_append']
+ e.g. [id: 'sample_id']
- vcf:
type: file
description: VCF file
pattern: "*.{vcf.gz}"
- - bam:
- type: file
- description: BAM file
- pattern: "*.{bam}"
- - bai:
- type: file
- description: BAI file
- pattern: "*.{bai}"
+ - bams:
+ type: list
+ description: BAM files
+ - bais:
+ type: list
+ description: BAI files
+ - redux_tsvs:
+ type: list
+ description: REDUX MS files
- genome_fasta:
type: file
description: Reference genome assembly FASTA file
@@ -43,19 +44,24 @@ input:
type: file
description: Reference genome assembly dict file
pattern: "*.{dict}"
+ - targeted_mode:
+ type: boolean
+ description: Flag indicating whether targeted mode is set
output:
- meta:
type: map
description: |
Groovy Map containing sample information
- e.g. [id: 'sample_id', append_id: 'sample_id_append']
- - vcf:
- type: file
- description: VCF file
- pattern: "*.{vcf.gz}"
+ e.g. [id: 'sample_id']
+ - sage_append_dir:
+ type: directory
+ description: SAGE append output directory
- versions:
type: file
description: File containing software versions
pattern: "versions.yml"
+ - command_files:
+ type: list
+ description: List of command files
authors:
- "@scwatts"
diff --git a/modules/local/sage/germline/environment.yml b/modules/local/sage/germline/environment.yml
index 8cac1e92..3f3782e1 100644
--- a/modules/local/sage/germline/environment.yml
+++ b/modules/local/sage/germline/environment.yml
@@ -4,4 +4,4 @@ channels:
- bioconda
- defaults
dependencies:
- - bioconda::hmftools-sage=4.0
+ - bioconda::hmftools-sage=4.1
diff --git a/modules/local/sage/germline/main.nf b/modules/local/sage/germline/main.nf
index 2c618061..7e35ec75 100644
--- a/modules/local/sage/germline/main.nf
+++ b/modules/local/sage/germline/main.nf
@@ -1,11 +1,11 @@
process SAGE_GERMLINE {
tag "${meta.id}"
- label 'process_high'
+ label 'process_medium'
conda "${moduleDir}/environment.yml"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
- 'https://depot.galaxyproject.org/singularity/hmftools-sage:4.0--hdfd78af_0' :
- 'biocontainers/hmftools-sage:4.0--hdfd78af_0' }"
+ 'https://depot.galaxyproject.org/singularity/hmftools-sage:4.1--hdfd78af_0' :
+ 'biocontainers/hmftools-sage:4.1--hdfd78af_0' }"
input:
tuple val(meta), path(tumor_bam), path(normal_bam), path(tumor_bai), path(normal_bai), path(redux_tsvs)
@@ -14,18 +14,16 @@ process SAGE_GERMLINE {
path genome_fai
path genome_dict
path sage_known_hotspots_germline
- path sage_actionable_panel
- path sage_coverage_panel
path sage_highconf_regions
+ path driver_gene_panel
path ensembl_data_resources
+ val targeted_mode
output:
tuple val(meta), path('germline/*.sage.germline.vcf.gz'), path('germline/*.sage.germline.vcf.gz.tbi'), emit: vcf
tuple val(meta), path('germline/') , emit: sage_dir
path 'versions.yml' , emit: versions
-
- def run_mode = Utils.getEnumFromString(params.mode, Constants.RunMode)
- def high_depth_mode_arg = (run_mode === Constants.RunMode.TARGETED) ? '-high_depth_mode' : ''
+ path '.command.*' , emit: command_files
when:
task.ext.when == null || task.ext.when
@@ -33,43 +31,46 @@ process SAGE_GERMLINE {
script:
def args = task.ext.args ?: ''
- def xmx_mod = task.ext.xmx_mod ?: 0.95
+ def log_level_arg = task.ext.log_level ? "-log_level ${task.ext.log_level}" : ''
+
+ def high_depth_mode_arg = targeted_mode ? '-high_depth_mode' : ''
"""
mkdir -p germline/
sage \\
- -Xmx${Math.round(task.memory.bytes * xmx_mod)} \\
+ -Xmx${Math.round(task.memory.bytes * 0.95)} \\
${args} \\
-tumor ${meta.normal_id} \\
-tumor_bam ${normal_bam} \\
-reference ${meta.tumor_id} \\
-reference_bam ${tumor_bam} \\
-jitter_param_dir ./ \\
+ -ref_sample_count 0 \\
-ref_genome ${genome_fasta} \\
-ref_genome_version ${genome_ver} \\
-hotspots ${sage_known_hotspots_germline} \\
- -panel_bed ${sage_actionable_panel} \\
- -coverage_bed ${sage_coverage_panel} \\
+ -driver_gene_panel ${driver_gene_panel} \\
-high_confidence_bed ${sage_highconf_regions} \\
-ensembl_data_dir ${ensembl_data_resources} \\
-germline \\
-panel_only \\
- -ref_sample_count 0 \\
${high_depth_mode_arg} \\
-bqr_write_plot \\
-threads ${task.cpus} \\
+ ${log_level_arg} \\
-output_vcf germline/${meta.tumor_id}.sage.germline.vcf.gz
cat <<-END_VERSIONS > versions.yml
"${task.process}":
- sage: \$(sage -version | sed -n '/^Sage version / { s/^.* //p }')
+ sage: \$(sage -version | sed 's/^.* //')
END_VERSIONS
"""
stub:
"""
mkdir -p germline/
+
touch germline/${meta.tumor_id}.sage.germline.vcf.gz
touch germline/${meta.tumor_id}.sage.germline.vcf.gz.tbi
touch germline/${meta.tumor_id}.sage.bqr.png
diff --git a/modules/local/sage/germline/meta.yml b/modules/local/sage/germline/meta.yml
index 942d3a6a..5fb156d4 100644
--- a/modules/local/sage/germline/meta.yml
+++ b/modules/local/sage/germline/meta.yml
@@ -16,7 +16,7 @@ input:
type: map
description: |
Groovy Map containing sample informatio
- e.g. [id: 'sample_id', tumor_id: 'tumor_name', normal_id: 'normal_name']
+ e.g. [id: 'sample_id']
- tumor_bam:
type: file
description: Tumor BAM file
@@ -55,18 +55,14 @@ input:
type: file
description: SAGE germline known hotspots file
pattern: "*.{vcf.gz}"
- - sage_actionable_panel:
- type: file
- description: SAGE actionable panel file
- pattern: "*.{bed.gz}"
- - sage_coverage_panel:
- type: file
- description: SAGE coverage gene panel file
- pattern: "*.{bed.gz}"
- sage_highconf_regions:
type: file
description: SAGE high confidence regions file
pattern: "*.{bed.gz}"
+ - driver_gene_panel:
+ type: file
+ description: Driver gene panel file
+ pattern: "*.{tsv}"
- ensembl_data_resources:
type: directory
description: HMF ensembl data resources directory
@@ -75,7 +71,7 @@ output:
type: map
description: |
Groovy Map containing sample information
- e.g. [id: 'sample_id', tumor_id: 'tumor_name', normal_id: 'normal_name']
+ e.g. [id: 'sample_id']
- vcf:
type: file
description: VCF file
@@ -87,5 +83,8 @@ output:
type: file
description: File containing software versions
pattern: "versions.yml"
+ - command_files:
+ type: list
+ description: List of command files
authors:
- "@scwatts"
diff --git a/modules/local/sage/somatic/environment.yml b/modules/local/sage/somatic/environment.yml
index 3e75984b..1dbb5f2f 100644
--- a/modules/local/sage/somatic/environment.yml
+++ b/modules/local/sage/somatic/environment.yml
@@ -4,4 +4,4 @@ channels:
- bioconda
- defaults
dependencies:
- - bioconda::hmftools-sage=4.0
+ - bioconda::hmftools-sage=4.1
diff --git a/modules/local/sage/somatic/main.nf b/modules/local/sage/somatic/main.nf
index 07667eae..84fc55de 100644
--- a/modules/local/sage/somatic/main.nf
+++ b/modules/local/sage/somatic/main.nf
@@ -6,8 +6,8 @@ process SAGE_SOMATIC {
conda "${moduleDir}/environment.yml"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
- 'https://depot.galaxyproject.org/singularity/hmftools-sage:4.0--hdfd78af_0' :
- 'biocontainers/hmftools-sage:4.0--hdfd78af_0' }"
+ 'https://depot.galaxyproject.org/singularity/hmftools-sage:4.1--hdfd78af_0' :
+ 'biocontainers/hmftools-sage:4.1--hdfd78af_0' }"
input:
tuple val(meta), path(tumor_bam), path(normal_bam), path(donor_bam), path(tumor_bai), path(normal_bai), path(donor_bai), path(redux_tsvs)
@@ -15,16 +15,19 @@ process SAGE_SOMATIC {
val genome_ver
path genome_fai
path genome_dict
+ path sage_pon
path sage_known_hotspots_somatic
- path sage_actionable_panel
- path sage_coverage_panel
path sage_highconf_regions
+ path driver_gene_panel
path ensembl_data_resources
+ path gnomad_resource
+ val targeted_mode
output:
tuple val(meta), path('somatic/*.sage.somatic.vcf.gz'), path('somatic/*.sage.somatic.vcf.gz.tbi'), emit: vcf
tuple val(meta), path('somatic/') , emit: sage_dir
path 'versions.yml' , emit: versions
+ path '.command.*' , emit: command_files
when:
task.ext.when == null || task.ext.when
@@ -32,28 +35,53 @@ process SAGE_SOMATIC {
script:
def args = task.ext.args ?: ''
- def xmx_mod = task.ext.xmx_mod ?: 0.95
+ def log_level_arg = task.ext.log_level ? "-log_level ${task.ext.log_level}" : ''
+ // Sample IDs
def reference_ids = []
- if (meta.normal_id != null) reference_ids.add(meta.normal_id)
- if (meta.donor_id != null) reference_ids.add(meta.donor_id)
+ if (meta.normal_id != null) { reference_ids.add(meta.normal_id) }
+ if (meta.donor_id != null) { reference_ids.add(meta.donor_id) }
def reference_arg = reference_ids.size() > 0 ? "-reference ${String.join(',', reference_ids)}" : ''
+ def ref_sample_count_arg = reference_ids.size() > 0 ? "-ref_sample_count ${reference_ids.size()}" : ''
+ // BAMs
def reference_bams = []
- if (normal_bam) reference_bams.add(normal_bam.toString())
- if (donor_bam) reference_bams.add(donor_bam.toString())
+ if (normal_bam) { reference_bams.add(normal_bam.toString()) }
+ if (donor_bam) { reference_bams.add(donor_bam.toString()) }
def reference_bam_arg = reference_bams.size() > 0 ? "-reference_bam ${String.join(',', reference_bams)}" : ''
- def ref_sample_count_arg = "-ref_sample_count ${reference_ids.size()}"
-
- def run_mode = Utils.getEnumFromString(params.mode, Constants.RunMode)
- def high_depth_mode_arg = (run_mode === Constants.RunMode.TARGETED) ? '-high_depth_mode' : ''
+ // Tumor in normal contamination (TINC): only for WGS tumor/normal samples
+ def run_tinc_arg = ''
+ def write_fit_variants_arg = ''
+ def gnomad_arg = ''
+ def pon_file_arg = ''
+
+ if (!targeted_mode && tumor_bam && normal_bam) {
+ run_tinc_arg = '-run_tinc'
+ pon_file_arg = "-pon_file ${sage_pon}"
+ write_fit_variants_arg = '-write_fit_variants'
+
+ if (genome_ver.toString() == '37') {
+ gnomad_arg = "-gnomad_freq_file ${gnomad_resource}"
+ } else if (genome_ver.toString() == '38') {
+ gnomad_arg = "-gnomad_freq_dir ${gnomad_resource}"
+ } else {
+ error "got bad genome version: ${genome_ver}"
+ }
+ }
+
+ // NOTE(SW): use of ternary inexplicitly causes a 'variable already defined in scope error'
+ if (targeted_mode) {
+ high_depth_mode_arg = '-high_depth_mode'
+ } else {
+ high_depth_mode_arg = ''
+ }
"""
mkdir -p somatic/
sage \\
- -Xmx${Math.round(task.memory.bytes * xmx_mod)} \\
+ -Xmx${Math.round(task.memory.bytes * 0.95)} \\
${args} \\
${reference_arg} \\
${reference_bam_arg} \\
@@ -64,18 +92,22 @@ process SAGE_SOMATIC {
-ref_genome ${genome_fasta} \\
-ref_genome_version ${genome_ver} \\
-hotspots ${sage_known_hotspots_somatic} \\
- -panel_bed ${sage_actionable_panel} \\
- -coverage_bed ${sage_coverage_panel} \\
+ -driver_gene_panel ${driver_gene_panel} \\
-high_confidence_bed ${sage_highconf_regions} \\
-ensembl_data_dir ${ensembl_data_resources} \\
+ ${pon_file_arg} \\
+ ${gnomad_arg} \\
+ ${run_tinc_arg} \\
${high_depth_mode_arg} \\
-bqr_write_plot \\
+ ${write_fit_variants_arg} \\
-threads ${task.cpus} \\
+ ${log_level_arg} \\
-output_vcf somatic/${meta.tumor_id}.sage.somatic.vcf.gz
cat <<-END_VERSIONS > versions.yml
"${task.process}":
- sage: \$(sage -version | sed -n '/^Sage version / { s/^.* //p }')
+ sage: \$(sage -version | sed 's/^.* //')
END_VERSIONS
"""
diff --git a/modules/local/sage/somatic/meta.yml b/modules/local/sage/somatic/meta.yml
index a70d6129..9fd096b1 100644
--- a/modules/local/sage/somatic/meta.yml
+++ b/modules/local/sage/somatic/meta.yml
@@ -16,7 +16,7 @@ input:
type: map
description: |
Groovy Map containing sample informatio
- e.g. [id: 'sample_id', tumor_id: 'tumor_name', normal_id: 'normal_name']
+ e.g. [id: 'sample_id']
- tumor_bam:
type: file
description: Tumor BAM file
@@ -59,31 +59,34 @@ input:
type: file
description: Reference genome assembly dict file
pattern: "*.{dict}"
+ - sage_pon:
+ type: file
+ description: SAGE PON file (optional)
+ pattern: "*.{tsv.gz}"
- sage_known_hotspots_somatic:
type: file
description: SAGE somatic known hotspots file
pattern: "*.{vcf.gz}"
- - sage_actionable_panel:
- type: file
- description: SAGE actionable gene panel file
- pattern: "*.{bed.gz}"
- - sage_coverage_panel:
- type: file
- description: SAGE coverage gene panel file
- pattern: "*.{bed.gz}"
- sage_highconf_regions:
type: file
description: SAGE high confidence regions file
pattern: "*.{bed.gz}"
+ - driver_gene_panel:
+ type: file
+ description: Driver gene panel file
+ pattern: "*.{tsv}"
- ensembl_data_resources:
type: directory
description: HMF ensembl data resources directory
+ - targeted_mode:
+ type: boolean
+ description: Flag indicating whether targeted mode is set
output:
- meta:
type: map
description: |
Groovy Map containing sample information
- e.g. [id: 'sample_id', tumor_id: 'tumor_name', normal_id: 'normal_name']
+ e.g. [id: 'sample_id']
- vcf:
type: file
description: SAGE VCF file
@@ -95,5 +98,8 @@ output:
type: file
description: File containing software versions
pattern: "versions.yml"
+ - command_files:
+ type: list
+ description: List of command files
authors:
- "@scwatts"
diff --git a/modules/local/sambamba/merge/main.nf b/modules/local/sambamba/merge/main.nf
index d67b9743..31b29013 100644
--- a/modules/local/sambamba/merge/main.nf
+++ b/modules/local/sambamba/merge/main.nf
@@ -13,6 +13,7 @@ process SAMBAMBA_MERGE {
output:
tuple val(meta), path('*bam'), emit: bam
path 'versions.yml' , emit: versions
+ path '.command.*' , emit: command_files
when:
task.ext.when == null || task.ext.when
diff --git a/modules/local/sambamba/merge/meta.yml b/modules/local/sambamba/merge/meta.yml
index c4424055..849ef025 100644
--- a/modules/local/sambamba/merge/meta.yml
+++ b/modules/local/sambamba/merge/meta.yml
@@ -15,7 +15,7 @@ input:
type: map
description: |
Groovy Map containing sample information
- e.g. [id: 'sample_id', tumor_id: 'tumor_name', normal_id: 'normal_name']
+ e.g. [id: 'sample_id']
- bams:
type: list
description: List BAM files
@@ -24,7 +24,7 @@ output:
type: map
description: |
Groovy Map containing sample information
- e.g. [id: 'sample_id', tumor_id: 'tumor_name', normal_id: 'normal_name']
+ e.g. [id: 'sample_id']
- bam:
type: file
description: BAM file
@@ -33,6 +33,9 @@ output:
type: file
description: File containing software versions
pattern: "versions.yml"
+ - command_files:
+ type: list
+ description: List of command files
authors:
- "@scwatts"
- "@mkcmkc"
diff --git a/modules/local/sigs/main.nf b/modules/local/sigs/main.nf
index 96a210f9..fd95383d 100644
--- a/modules/local/sigs/main.nf
+++ b/modules/local/sigs/main.nf
@@ -14,6 +14,7 @@ process SIGS {
output:
tuple val(meta), path('sigs/'), emit: sigs_dir
path 'versions.yml' , emit: versions
+ path '.command.*' , emit: command_files
when:
task.ext.when == null || task.ext.when
@@ -23,6 +24,8 @@ process SIGS {
def xmx_mod = task.ext.xmx_mod ?: 0.75
+ def log_level_arg = task.ext.log_level ? "-log_level ${task.ext.log_level}" : ''
+
"""
mkdir -p sigs/
@@ -32,6 +35,7 @@ process SIGS {
-sample ${meta.sample_id} \\
-somatic_vcf_file ${smlv_vcf} \\
-signatures_file ${signatures} \\
+ ${log_level_arg} \\
-output_dir sigs/
cat <<-END_VERSIONS > versions.yml
@@ -43,6 +47,7 @@ process SIGS {
stub:
"""
mkdir -p sigs/
+
touch sigs/placeholder
echo -e '${task.process}:\\n stub: noversions\\n' > versions.yml
diff --git a/modules/local/sigs/meta.yml b/modules/local/sigs/meta.yml
index 70dd85b6..767f5b2b 100644
--- a/modules/local/sigs/meta.yml
+++ b/modules/local/sigs/meta.yml
@@ -14,7 +14,7 @@ input:
type: map
description: |
Groovy Map containing sample information
- e.g. [id: 'sample_id', tumor_id: 'tumor_name']
+ e.g. [id: 'sample_id']
- smlv_vcf:
type: file
description: Small somatic variant VCF file
@@ -28,7 +28,7 @@ output:
type: map
description: |
Groovy Map containing sample information
- e.g. [id: 'sample_id', tumor_id: 'tumor_name', normal_id: 'normal_name']
+ e.g. [id: 'sample_id']
- sigs_dir:
type: directory
description: Sigs output directory
@@ -36,5 +36,8 @@ output:
type: file
description: File containing software versions
pattern: "versions.yml"
+ - command_files:
+ type: list
+ description: List of command files
authors:
- "@scwatts"
diff --git a/modules/local/star/align/main.nf b/modules/local/star/align/main.nf
index 66d7c392..68c3906e 100644
--- a/modules/local/star/align/main.nf
+++ b/modules/local/star/align/main.nf
@@ -14,6 +14,7 @@ process STAR_ALIGN {
output:
tuple val(meta), path('*bam'), emit: bam
path 'versions.yml' , emit: versions
+ path '.command.*' , emit: command_files
when:
task.ext.when == null || task.ext.when
diff --git a/modules/local/star/align/meta.yml b/modules/local/star/align/meta.yml
index 19bb83c5..4f7283a7 100644
--- a/modules/local/star/align/meta.yml
+++ b/modules/local/star/align/meta.yml
@@ -16,7 +16,7 @@ input:
type: map
description: |
Groovy Map containing sample information
- e.g. [id: 'sample_id', tumor_id: 'tumor_name', normal_id: 'normal_name']
+ e.g. [id: 'sample_id']
- reads_fwd:
type: file
description: Forward reads FASTQ file
@@ -33,7 +33,7 @@ output:
type: map
description: |
Groovy Map containing sample information
- e.g. [id: 'sample_id', tumor_id: 'tumor_name', normal_id: 'normal_name']
+ e.g. [id: 'sample_id']
- bam:
type: file
description: BAM file
@@ -42,5 +42,8 @@ output:
type: file
description: File containing software versions
pattern: "versions.yml"
+ - command_files:
+ type: list
+ description: List of command files
authors:
- "@scwatts"
diff --git a/modules/local/teal/pipeline/environment.yml b/modules/local/teal/pipeline/environment.yml
index c9466094..9810b8a1 100644
--- a/modules/local/teal/pipeline/environment.yml
+++ b/modules/local/teal/pipeline/environment.yml
@@ -1,4 +1,4 @@
-name: teal
+name: teal_pipeline
channels:
- conda-forge
- bioconda
diff --git a/modules/local/teal/pipeline/main.nf b/modules/local/teal/pipeline/main.nf
index 5712455e..4aeea3eb 100644
--- a/modules/local/teal/pipeline/main.nf
+++ b/modules/local/teal/pipeline/main.nf
@@ -9,14 +9,20 @@ process TEAL_PIPELINE {
input:
tuple val(meta),
- path(tumor_teal_bam), path(tumor_teal_bai),
- path(normal_teal_bam), path(normal_teal_bai),
- path(tumor_metrics_dir), path(normal_metrics_dir), path(cobalt_dir), path(purple_dir)
+ path(tumor_teal_bam),
+ path(tumor_teal_bai),
+ path(normal_teal_bam),
+ path(normal_teal_bai),
+ path(tumor_metrics_dir),
+ path(normal_metrics_dir),
+ path(cobalt_dir),
+ path(purple_dir)
val genome_ver
output:
tuple val(meta), path('teal/*.tsv*'), emit: teal_tsvs
path 'versions.yml' , emit: versions
+ path '.command.*' , emit: command_files
when:
task.ext.when == null || task.ext.when
@@ -35,8 +41,13 @@ process TEAL_PIPELINE {
def reference_bam_arg = normal_teal_bam ? "-reference_bam ${normal_teal_bam}" : ''
def reference_wgs_metrics_arg = normal_metrics_dir ? "-reference_wgs_metrics ${normal_metrics_dir}/${meta.normal_id}.bam_metric.summary.tsv" : ''
- if (tumor_arg && ! purple_arg) error "TEAL requires PURPLE inputs when analysing tumor data"
- if (! tumor_arg && ! reference_arg) error "TEAL at least tumor or normal data for analyses"
+ if (tumor_arg && !purple_arg) {
+ error 'TEAL requires PURPLE inputs when analysing tumor data'
+ }
+
+ if (!tumor_arg && !reference_arg) {
+ error 'TEAL at least tumor or normal data for analyses'
+ }
"""
teal \\
diff --git a/modules/local/teal/pipeline/meta.yml b/modules/local/teal/pipeline/meta.yml
index 81e5171c..57bced64 100644
--- a/modules/local/teal/pipeline/meta.yml
+++ b/modules/local/teal/pipeline/meta.yml
@@ -16,22 +16,22 @@ input:
type: map
description: |
Groovy Map containing sample information
- e.g. [id: 'sample_id', tumor_id: 'tumor_name', normal_id: 'normal_name']
- - tumor_bam:
+ e.g. [id: 'sample_id']
+ - tumor_teal_bam:
type: file
- description: Tumor BAM file (optional)
+ description: Tumor TEAL BAM file (optional)
pattern: "*.{bam}"
- - normal_bam:
+ - tumor_teal_bai:
type: file
- description: Normal BAM file (optional)
- pattern: "*.{bam}"
- - tumor_bai:
- type: file
- description: Tumor BAI file (optional)
+ description: Tumor TEAL BAI file (optional)
pattern: "*.{bai}"
- - normal_bai:
+ - normal_teal_bam:
type: file
- description: Normal BAI file (optional)
+ description: Normal TEAL BAM file (optional)
+ pattern: "*.{bam}"
+ - normal_teal_bai:
+ type: file
+ description: Normal TEAL BAI file (optional)
pattern: "*.{bai}"
- tumor_wgs_metrics_dir:
type: directory
@@ -53,13 +53,17 @@ output:
type: map
description: |
Groovy Map containing sample information
- e.g. [id: 'sample_id', tumor_id: 'tumor_name', normal_id: 'normal_name']
- - teal_dir:
- type: directory
+ e.g. [id: 'sample_id']
+ - teal_tsvs:
+ type: list
description: TEAL output directory
+ pattern: "*.{tsv,tsv.*}"
- versions:
type: file
description: File containing software versions
pattern: "versions.yml"
+ - command_files:
+ type: list
+ description: List of command files
authors:
- "@scwatts"
diff --git a/modules/local/teal/prep/environment.yml b/modules/local/teal/prep/environment.yml
index c9466094..4a826d86 100644
--- a/modules/local/teal/prep/environment.yml
+++ b/modules/local/teal/prep/environment.yml
@@ -1,4 +1,4 @@
-name: teal
+name: teal_prep
channels:
- conda-forge
- bioconda
diff --git a/modules/local/teal/prep/main.nf b/modules/local/teal/prep/main.nf
index 0845571e..47910b58 100644
--- a/modules/local/teal/prep/main.nf
+++ b/modules/local/teal/prep/main.nf
@@ -12,9 +12,10 @@ process TEAL_PREP {
val genome_ver
output:
- tuple val(meta), path("teal_bam/${meta.tumor_id}.teal.telbam{.bam,.bam.bai}") , emit: tumor_bam
- tuple val(meta), path("teal_bam/${meta.normal_id}.teal.telbam{.bam,.bam.bai}"), emit: normal_bam, optional: true
- path 'versions.yml', emit: versions
+ tuple val(meta), path("teal_bam/${meta.tumor_id}.teal.telbam{.bam,.bam.bai}") , emit: tumor_teal_bam
+ tuple val(meta), path("teal_bam/${meta.normal_id}.teal.telbam{.bam,.bam.bai}"), emit: normal_teal_bam, optional: true
+ path 'versions.yml' , emit: versions
+ path '.command.*' , emit: command_files
when:
task.ext.when == null || task.ext.when
@@ -24,21 +25,19 @@ process TEAL_PREP {
def xmx_mod = task.ext.xmx_mod ?: 0.95
- def tumor_arg = ""
- def tumor_bam_arg = ""
- def tumor_bam_index_command = ""
-
- if(tumor_bam) {
+ def tumor_arg = ''
+ def tumor_bam_arg = ''
+ def tumor_bam_index_command = ''
+ if (tumor_bam) {
tumor_arg = "-tumor ${meta.tumor_id}"
tumor_bam_arg = "-tumor_bam ${tumor_bam}"
tumor_bam_index_command = "samtools index teal_bam/${meta.tumor_id}.teal.telbam.bam"
}
- def reference_arg = ""
- def reference_bam_arg = ""
- def reference_bam_index_command = ""
-
- if(normal_bam) {
+ def reference_arg = ''
+ def reference_bam_arg = ''
+ def reference_bam_index_command = ''
+ if (normal_bam) {
reference_arg = "-reference ${meta.normal_id}"
reference_bam_arg = "-reference_bam ${normal_bam}"
reference_bam_index_command = "samtools index teal_bam/${meta.normal_id}.teal.telbam.bam"
diff --git a/modules/local/teal/prep/meta.yml b/modules/local/teal/prep/meta.yml
index 81e5171c..f0cbf94f 100644
--- a/modules/local/teal/prep/meta.yml
+++ b/modules/local/teal/prep/meta.yml
@@ -16,50 +16,43 @@ input:
type: map
description: |
Groovy Map containing sample information
- e.g. [id: 'sample_id', tumor_id: 'tumor_name', normal_id: 'normal_name']
+ e.g. [id: 'sample_id']
- tumor_bam:
type: file
description: Tumor BAM file (optional)
pattern: "*.{bam}"
- - normal_bam:
- type: file
- description: Normal BAM file (optional)
- pattern: "*.{bam}"
- tumor_bai:
type: file
description: Tumor BAI file (optional)
pattern: "*.{bai}"
+ - normal_bam:
+ type: file
+ description: Normal BAM file (optional)
+ pattern: "*.{bam}"
- normal_bai:
type: file
description: Normal BAI file (optional)
pattern: "*.{bai}"
- - tumor_wgs_metrics_dir:
- type: directory
- description: Tumor WGS metrics directory (optional)
- - normal_wgs_metrics:
- type: directory
- description: Normal WGS metrics directory (optional)
- - cobalt_dir:
- type: directory
- description: COBALT output directory
- - purple_dir:
- type: directory
- description: PURPLE output directory (optional)
- - genome_ver:
- type: string
- description: Reference genome version
output:
- meta:
type: map
description: |
Groovy Map containing sample information
- e.g. [id: 'sample_id', tumor_id: 'tumor_name', normal_id: 'normal_name']
- - teal_dir:
- type: directory
- description: TEAL output directory
+ e.g. [id: 'sample_id']
+ - tumor_teal_bam:
+ type: list
+ description: Tumor TEAL BAM and BAI file
+ pattern: "*.{bam,bai}"
+ - normal_teal_bam:
+ type: list
+ description: Normal TEAL BAM and BAI file
+ pattern: "*.{bam,bai}"
- versions:
type: file
description: File containing software versions
pattern: "versions.yml"
+ - command_files:
+ type: list
+ description: List of command files
authors:
- "@scwatts"
diff --git a/modules/local/virusbreakend/main.nf b/modules/local/virusbreakend/main.nf
index 973c02cd..b78272ba 100644
--- a/modules/local/virusbreakend/main.nf
+++ b/modules/local/virusbreakend/main.nf
@@ -17,9 +17,10 @@ process VIRUSBREAKEND {
path gridss_config
output:
- tuple val(meta), path("*.summary.tsv"), emit: tsv
- path "*.virusbreakend.vcf" , emit: vcf
+ tuple val(meta), path('*.summary.tsv'), emit: tsv
+ path '*.virusbreakend.vcf' , emit: vcf
path 'versions.yml' , emit: versions
+ path '.command.*' , emit: command_files
when:
task.ext.when == null || task.ext.when
diff --git a/modules/local/virusbreakend/meta.yml b/modules/local/virusbreakend/meta.yml
index a61ff589..35224d48 100644
--- a/modules/local/virusbreakend/meta.yml
+++ b/modules/local/virusbreakend/meta.yml
@@ -59,5 +59,8 @@ output:
type: file
description: File containing software versions
pattern: "versions.yml"
+ - command_files:
+ type: list
+ description: List of command files
authors:
- "@scwatts"
diff --git a/modules/local/virusinterpreter/environment.yml b/modules/local/virusinterpreter/environment.yml
index 8bc81120..36f605a8 100644
--- a/modules/local/virusinterpreter/environment.yml
+++ b/modules/local/virusinterpreter/environment.yml
@@ -4,4 +4,4 @@ channels:
- bioconda
- defaults
dependencies:
- - bioconda::hmftools-virus-interpreter=1.7
+ - bioconda::hmftools-virus-interpreter=1.7.1
diff --git a/modules/local/virusinterpreter/main.nf b/modules/local/virusinterpreter/main.nf
index c15b9b1e..64c6d159 100644
--- a/modules/local/virusinterpreter/main.nf
+++ b/modules/local/virusinterpreter/main.nf
@@ -4,8 +4,8 @@ process VIRUSINTERPRETER {
conda "${moduleDir}/environment.yml"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
- 'https://depot.galaxyproject.org/singularity/hmftools-virus-interpreter:1.7--hdfd78af_0' :
- 'biocontainers/hmftools-virus-interpreter:1.7--hdfd78af_0' }"
+ 'https://depot.galaxyproject.org/singularity/hmftools-virus-interpreter:1.7.1--hdfd78af_0' :
+ 'biocontainers/hmftools-virus-interpreter:1.7.1--hdfd78af_0' }"
input:
tuple val(meta), path(virus_tsv), path(purple_dir), path(bamtools_somatic_dir)
@@ -16,6 +16,7 @@ process VIRUSINTERPRETER {
output:
tuple val(meta), path('virusinterpreter/'), emit: virusinterpreter_dir
path 'versions.yml' , emit: versions
+ path '.command.*' , emit: command_files
when:
task.ext.when == null || task.ext.when
@@ -25,6 +26,8 @@ process VIRUSINTERPRETER {
def xmx_mod = task.ext.xmx_mod ?: 0.95
+ def log_level_arg = task.ext.log_level ? "-log_level ${task.ext.log_level}" : ''
+
"""
mkdir -p virusinterpreter/
@@ -38,6 +41,7 @@ process VIRUSINTERPRETER {
-taxonomy_db_tsv ${taxonomy_db} \\
-virus_reporting_db_tsv ${reporting_db} \\
-virus_blacklisting_db_tsv ${blocklist_db} \\
+ ${log_level_arg} \\
-output_dir virusinterpreter/
cat <<-END_VERSIONS > versions.yml
@@ -49,6 +53,7 @@ process VIRUSINTERPRETER {
stub:
"""
mkdir -p virusinterpreter/
+
touch virusinterpreter/${meta.sample_id}.virus.annotated.tsv
echo -e '${task.process}:\\n stub: noversions\\n' > versions.yml
diff --git a/modules/local/virusinterpreter/meta.yml b/modules/local/virusinterpreter/meta.yml
index 6cf0ffb2..dd6f0938 100644
--- a/modules/local/virusinterpreter/meta.yml
+++ b/modules/local/virusinterpreter/meta.yml
@@ -35,7 +35,7 @@ input:
type: file
description: Virus Interpreter reporting database file
pattern: "*.{tsv}"
- - blacklist_db:
+ - blocklist_db:
type: file
description: Virus Interpreter blocklist database file
pattern: "*.{tsv}"
@@ -52,5 +52,8 @@ output:
type: file
description: File containing software versions
pattern: "versions.yml"
+ - command_files:
+ type: list
+ description: List of command files
authors:
- "@scwatts"
diff --git a/modules/local/isofox/environment.yml b/modules/local/wisp/environment.yml
similarity index 57%
rename from modules/local/isofox/environment.yml
rename to modules/local/wisp/environment.yml
index d4251c57..1d35d2ac 100644
--- a/modules/local/isofox/environment.yml
+++ b/modules/local/wisp/environment.yml
@@ -1,7 +1,7 @@
-name: isofox
+name: wisp
channels:
- conda-forge
- bioconda
- defaults
dependencies:
- - bioconda::hmftools-isofox=1.7.1
+ - bioconda::hmftools-wisp=1.2
diff --git a/modules/local/wisp/main.nf b/modules/local/wisp/main.nf
new file mode 100644
index 00000000..447c73af
--- /dev/null
+++ b/modules/local/wisp/main.nf
@@ -0,0 +1,105 @@
+process WISP {
+ tag "${meta.id}"
+ label 'process_low'
+
+ conda "${moduleDir}/environment.yml"
+ container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+ 'https://depot.galaxyproject.org/singularity/hmftools-wisp:1.2--hdfd78af_0' :
+ 'biocontainers/hmftools-wisp:1.2--hdfd78af_0' }"
+
+ input:
+ tuple val(meta),
+ path(primary_purple_dir),
+ path('primary_amber_dir'),
+ path('sample_amber_dir'),
+ path(cobalt_dir),
+ path(sage_append_dir)
+ path genome_fasta
+ path genome_fai
+ val targeted_mode
+
+ output:
+ path 'wisp/' , emit: wisp_dir
+ path 'versions.yml', emit: versions
+ path '.command.*' , emit: command_files
+
+ when:
+ task.ext.when == null || task.ext.when
+
+ script:
+ def args = task.ext.args ?: ''
+
+ def log_level_arg = task.ext.log_level ? "-log_level ${task.ext.log_level}" : ''
+
+ def purity_estimate_mode = Utils.getEnumFromString(params.purity_estimate_mode, Constants.RunMode)
+
+ def purity_methods
+ def amber_dir_arg
+ def cobalt_dir_arg
+ def gc_ratio_min_arg
+ def write_types_arg
+
+ if (targeted_mode) {
+ purity_methods = 'SOMATIC_VARIANT'
+ amber_dir_arg = ''
+ cobalt_dir_arg = ''
+ gc_ratio_min_arg = '-gc_ratio_min 0.4'
+ write_types_arg = "-write_types 'SOMATIC_DATA;SOMATIC_PLOT'"
+ } else {
+ purity_methods = "'SOMATIC_VARIANT;AMBER_LOH;COPY_NUMBER'"
+ amber_dir_arg = '-amber_dir amber_dir__prepared/'
+ cobalt_dir_arg = "-cobalt_dir ${cobalt_dir}"
+ gc_ratio_min_arg = ''
+ write_types_arg = '-write_types ALL'
+ }
+
+ """
+ # Put AMBER outputs from all samples into the same dir
+ if [[ -n "${amber_dir_arg}" ]]; then
+ mkdir -p amber_dir__prepared/;
+ for fp in ${primary_amber_dir}/*.amber.*; do ln -sf ../\$fp amber_dir__prepared/; done
+ for fp in ${sample_amber_dir}/*.amber.*; do ln -sf ../\$fp amber_dir__prepared/; done
+ fi;
+
+ # Run WISP
+ mkdir -p wisp/
+
+ wisp \\
+ -Xmx${Math.round(task.memory.bytes * 0.95)} \\
+ com.hartwig.hmftools.wisp.purity.PurityEstimator \\
+ ${args} \\
+ -patient_id ${meta.subject_id} \\
+ -tumor_id ${meta.primary_id} \\
+ -samples ${meta.longitudinal_id} \\
+ -purity_methods ${purity_methods} \\
+ -somatic_vcf ${sage_append_dir}/${meta.longitudinal_id}.sage.append.vcf.gz \\
+ -purple_dir ${primary_purple_dir} \\
+ ${amber_dir_arg} \\
+ ${cobalt_dir_arg} \\
+ -ref_genome ${genome_fasta} \\
+ ${gc_ratio_min_arg} \\
+ ${write_types_arg} \\
+ ${log_level_arg} \\
+ -output_dir wisp/
+
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ wisp: \$(wisp -version | sed 's/^.* //')
+ END_VERSIONS
+ """
+
+ stub:
+ """
+ mkdir -p wisp/
+
+ touch wisp/${meta.subject_id}_${meta.longitudinal_id}.wisp.cn_plot_calcs.tsv
+ touch wisp/${meta.subject_id}_${meta.longitudinal_id}.wisp.cn_segments.tsv
+ touch wisp/${meta.subject_id}_${meta.longitudinal_id}.wisp.somatic_peak.tsv
+ touch wisp/${meta.subject_id}_${meta.longitudinal_id}.wisp.somatic_variants.tsv
+ touch wisp/${meta.subject_id}_${meta.longitudinal_id}.wisp.summary.tsv
+ touch wisp/${meta.longitudinal_id}.cn_gc_ratio_fit.png
+ touch wisp/${meta.longitudinal_id}.somatic_vaf.png
+
+ echo -e '${task.process}:\\n stub: noversions\\n' > versions.yml
+ """
+}
diff --git a/modules/local/wisp/meta.yml b/modules/local/wisp/meta.yml
new file mode 100644
index 00000000..105d7944
--- /dev/null
+++ b/modules/local/wisp/meta.yml
@@ -0,0 +1,62 @@
+name: wisp
+description: Estimates TF in a given sample using biomarkers of an existing sample from the same patient
+keywords:
+ - tumor_fraction
+ - purity
+ - ccfdna
+tools:
+ - wisp:
+ description: Estimates TF in a given sample using biomarkers of an existing sample from the same patient.
+ homepage: https://github.com/hartwigmedical/hmftools/tree/master/wisp
+ documentation: https://github.com/hartwigmedical/hmftools/tree/master/wisp
+ licence: ["GPL v3"]
+input:
+ - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. [id: 'sample_id']
+ - primary_purple_dir:
+ type: directory
+ description: PURPLE directory of the primary sample
+ - primary_amber_dir:
+ type: directory
+ description: AMBER directory of the primary sample
+ - sample_amber_dir:
+ type: directory
+ description: AMBER directory of the query sample
+ - cobalt_dir:
+ type: directory
+ description: COBALT directory
+ - sage_append_dir:
+ type: directory
+ description: SAGE append directory
+ - genome_fasta:
+ type: file
+ description: Reference genome assembly FASTA file
+ pattern: "*.{fa,fasta}"
+ - genome_fai:
+ type: file
+ description: Reference genome assembly fai file
+ pattern: "*.{fai}"
+ - targeted_mode:
+ type: boolean
+ description: Flag indicating whether targeted mode is set
+output:
+ - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. [id: 'sample_id']
+ - wisp_dir:
+ type: directory
+ description: WISP output direcotry
+ - versions:
+ type: file
+ description: File containing software versions
+ pattern: "versions.yml"
+ - command_files:
+ type: list
+ description: List of command files
+authors:
+ - "@scwatts"
diff --git a/modules/nf-core/gatk4/markduplicates/gatk4-markduplicates.diff b/modules/nf-core/gatk4/markduplicates/gatk4-markduplicates.diff
index a2eac783..6bb68a97 100644
--- a/modules/nf-core/gatk4/markduplicates/gatk4-markduplicates.diff
+++ b/modules/nf-core/gatk4/markduplicates/gatk4-markduplicates.diff
@@ -1,4 +1,4 @@
-Changes in module 'nf-core/gatk4/markduplicates'
+Changes in component 'nf-core/gatk4/markduplicates'
'modules/nf-core/gatk4/markduplicates/meta.yml' is unchanged
Changes in 'gatk4/markduplicates/main.nf':
--- modules/nf-core/gatk4/markduplicates/main.nf
@@ -17,7 +17,14 @@ Changes in 'gatk4/markduplicates/main.nf':
input:
tuple val(meta), path(bam)
-@@ -25,7 +25,7 @@
+@@ -19,13 +19,14 @@
+ tuple val(meta), path("*.bai"), emit: bai, optional: true
+ tuple val(meta), path("*.metrics"), emit: metrics
+ path "versions.yml", emit: versions
++ path '.command.{sh,log}', emit: command_files
+
+ when:
+ task.ext.when == null || task.ext.when
script:
def args = task.ext.args ?: ''
@@ -26,7 +33,7 @@ Changes in 'gatk4/markduplicates/main.nf':
def input_list = bam.collect{"--INPUT $it"}.join(' ')
def reference = fasta ? "--REFERENCE_SEQUENCE ${fasta}" : ""
-@@ -38,20 +38,29 @@
+@@ -38,20 +39,29 @@
"""
gatk --java-options "-Xmx${avail_mem}M" MarkDuplicates \\
$input_list \\
diff --git a/modules/nf-core/gatk4/markduplicates/main.nf b/modules/nf-core/gatk4/markduplicates/main.nf
index 35e3a59e..91b8c3b3 100644
--- a/modules/nf-core/gatk4/markduplicates/main.nf
+++ b/modules/nf-core/gatk4/markduplicates/main.nf
@@ -19,6 +19,7 @@ process GATK4_MARKDUPLICATES {
tuple val(meta), path("*.bai"), emit: bai, optional: true
tuple val(meta), path("*.metrics"), emit: metrics
path "versions.yml", emit: versions
+ path '.command.{sh,log}', emit: command_files
when:
task.ext.when == null || task.ext.when
diff --git a/nextflow.config b/nextflow.config
index 6faf1e38..32615124 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -14,6 +14,7 @@ params {
// Workflow mode
mode = null
+ purity_estimate_mode = null
// Force options
force_genome = false
@@ -21,13 +22,15 @@ params {
// Read processing and alignment options
max_fastq_records = 10000000
- fastp_umi = false
- redux_umi = false
+ fastp_umi_enabled = false
+ redux_umi_enabled = false
// Process configuration
- processes_manual = false
- processes_include = null
- processes_exclude = null
+ processes_manual = ''
+ processes_include = null
+ processes_exclude = null
+
+ hmftools_log_level = 'DEBUG'
// Reference genome information; iGenomes is effectively disabled but retained for linting
genome = null
@@ -43,8 +46,13 @@ params {
isofox_read_length = null
isofox_functions = 'TRANSCRIPT_COUNTS;ALT_SPLICE_JUNCTIONS;FUSIONS;RETAINED_INTRONS'
+ // NOTE(SW): used only for panel resource creation
+ driver_gene_panel = null
+ target_regions_bed = null
+
gridss_config = null
+ ref_data_types = null
prepare_reference_only = false
create_stub_placeholders = false
@@ -271,7 +279,7 @@ manifest {
affiliation: '',
email: '',
github: '',
- contribution: ['author'],
+ contribution: ['author', 'maintainer'],
orcid: ''
],
[
@@ -287,14 +295,14 @@ manifest {
description = """A comprehensive cancer DNA/RNA analysis and reporting pipeline"""
mainScript = 'main.nf'
defaultBranch = 'master'
- nextflowVersion = '!>=24.04.2'
- version = '2.1.0'
+ nextflowVersion = '!>=24.10.5'
+ version = '2.2.0'
doi = ''
}
// Nextflow plugins
plugins {
- id 'nf-schema@2.3.0' // Validation of pipeline parameters and creation of an input channel from a sample sheet
+ id 'nf-schema@2.4.2' // Validation of pipeline parameters and creation of an input channel from a sample sheet
}
validation {
@@ -326,6 +334,7 @@ validation {
'fastp_umi_location',
'fastp_umi_skip',
'redux_umi_duplex_delim',
+ 'prepare_reference_only',
]
lenientMode = true
diff --git a/nextflow_schema.json b/nextflow_schema.json
index 82eed696..e58e06c5 100644
--- a/nextflow_schema.json
+++ b/nextflow_schema.json
@@ -10,7 +10,7 @@
"type": "object",
"fa_icon": "fas fa-terminal",
"description": "Define where the pipeline should find input data and save output data.",
- "required": ["input", "outdir"],
+ "required": ["outdir"],
"properties": {
"input": {
"type": "string",
@@ -48,6 +48,12 @@
"type": "string",
"description": "Workflow run mode.",
"fa_icon": "fas fa-diagram-project",
+ "pattern": "^(wgts|targeted|purity_estimate|panel_resource_creation|prepare_reference)"
+ },
+ "purity_estimate_mode": {
+ "type": "string",
+ "description": "Purity estimate workflow run mode.",
+ "fa_icon": "fas fa-diagram-project",
"pattern": "^(wgts|targeted)"
},
"panel": {
@@ -68,24 +74,23 @@
"fa_icon": "fas fa-palette"
},
"processes_manual": {
- "type": "boolean",
- "description": "Run only processes manually provided in processes_include.",
+ "type": "string",
+ "description": "Manually run processes provided as a comma separated list.",
"fa_icon": "fas fa-diagram-project"
},
"processes_exclude": {
"type": "string",
- "description": "Pipeline processes to exclude.",
+ "description": "Exclude processes provided as a comma separated list.",
"fa_icon": "fas fa-diagram-project"
},
"processes_include": {
"type": "string",
- "description": "Pipeline processes to include.",
+ "description": "Include processes that are excluded by default, provided as a comma separated list.",
"fa_icon": "fas fa-diagram-project"
},
- "prepare_reference_only": {
- "type": "boolean",
- "description": "Prepare and write reference output only.",
- "default": false,
+ "ref_data_types": {
+ "type": "string",
+ "description": "Which reference data types to download and extract.",
"fa_icon": "fas fa-diagram-project"
},
"create_stub_placeholders": {
@@ -100,7 +105,7 @@
"default": 10000000,
"fa_icon": "fas fa-cog"
},
- "fastp_umi": {
+ "fastp_umi_enabled": {
"type": "boolean",
"description": "Enable fastp UMI processing.",
"default": false,
@@ -123,7 +128,7 @@
"default": -1,
"fa_icon": "fas fa-cog"
},
- "redux_umi": {
+ "redux_umi_enabled": {
"type": "boolean",
"description": "Enable REDUX UMI processing.",
"default": false,
@@ -169,6 +174,23 @@
"description": "Semicolon-separated list of Isofox functions to run",
"default": "TRANSCRIPT_COUNTS;ALT_SPLICE_JUNCTIONS;FUSIONS;RETAINED_INTRONS",
"fa_icon": "fas fa-cog"
+ },
+ "driver_gene_panel": {
+ "type": "string",
+ "description": "User defined driver gene panel used in panel resource creation, or for overriding the default file located in ref_data_hmf_data_path",
+ "fa_icon": "fas fa-cog"
+ },
+ "target_regions_bed": {
+ "type": "string",
+ "description": "User defined target regions BED used in panel resource creation.",
+ "fa_icon": "fas fa-cog"
+ },
+ "hmftools_log_level": {
+ "type": "string",
+ "enum": ["ALL", "TRACE", "DEBUG", "INFO", "WARN", "ERROR", "FATAL"],
+ "description": "Log level filter for WiGiTS modules",
+ "default": "DEBUG",
+ "fa_icon": "fas fa-cog"
}
}
},
@@ -277,6 +299,12 @@
"pattern": "^\\S+\\.bed$",
"description": "Path to HLA slice BED file.",
"fa_icon": "far fa-file-code"
+ },
+ "prepare_reference_only": {
+ "type": "boolean",
+ "default": false,
+ "fa_icon": "fas fa-cog",
+ "hidden": true
}
}
},
diff --git a/nf-test.config b/nf-test.config
index 889df760..3a1fff59 100644
--- a/nf-test.config
+++ b/nf-test.config
@@ -9,7 +9,7 @@ config {
configFile "tests/nextflow.config"
// ignore tests coming from the nf-core/modules repo
- ignore 'modules/nf-core/**/*', 'subworkflows/nf-core/**/*'
+ ignore 'modules/nf-core/**/tests/*', 'subworkflows/nf-core/**/tests/*'
// run all test with defined profile(s) from the main nextflow.config
profile "test"
diff --git a/ro-crate-metadata.json b/ro-crate-metadata.json
index 97cbed4e..13465d0b 100644
--- a/ro-crate-metadata.json
+++ b/ro-crate-metadata.json
@@ -22,8 +22,8 @@
"@id": "./",
"@type": "Dataset",
"creativeWorkStatus": "InProgress",
- "datePublished": "2025-06-03T11:02:02+00:00",
- "description": "
\n \n \n \n \n
\n\n[](https://github.com/nf-core/oncoanalyser/actions/workflows/ci.yml)\n[](https://github.com/nf-core/oncoanalyser/actions/workflows/linting.yml)\n[](https://nf-co.re/oncoanalyser/results)\n[](https://doi.org/10.5281/zenodo.15189386)\n[](https://www.nf-test.com)\n\n[](https://www.nextflow.io/)\n[](https://github.com/nf-core/tools/releases/tag/3.3.1)\n[](https://docs.conda.io/en/latest/)\n[](https://www.docker.com/)\n[](https://sylabs.io/docs/)\n[](https://cloud.seqera.io/launch?pipeline=https://github.com/nf-core/oncoanalyser)\n\n[](https://nfcore.slack.com/channels/oncoanalyser)\n[](https://bsky.app/profile/nf-co.re)\n[](https://mstdn.science/@nf_core)\n[](https://www.youtube.com/c/nf-core)\n\n## Introduction\n\n**nf-core/oncoanalyser** is a Nextflow pipeline for the comprehensive analysis of cancer genomes and transcriptomes\nusing the [WiGiTS](https://github.com/hartwigmedical/hmftools) toolkit from the Hartwig Medical Foundation. The pipeline\nsupports a wide range of experimental setups:\n\n- FASTQ, BAM, or CRAM input files\n- WGS (whole genome sequencing), WTS (whole transcriptome sequencing), and targeted / panel sequencing (built-in support\n for the [TSO500\n panel](https://sapac.illumina.com/products/by-type/clinical-research-products/trusight-oncology-500.html) with other\n panels and exome requiring [panel reference data\n generation](https://github.com/hartwigmedical/hmftools/blob/master/pipeline/README_TARGETED.md))\n- Paired tumor / normal and tumor-only sample setups, donor sample support for further normal subtraction (e.g. for\n patients with bone marrow transplants or other contaminants in the tumor)\n- UMI (unique molecular identifier) processing supported for DNA sequencing data\n- Most GRCh37 and GRCh38 reference genome builds\n\n## Pipeline overview\n\n
\n\nThe pipeline mainly uses tools from [WiGiTS](https://github.com/hartwigmedical/hmftools), as well as some external\ntools. Due to the limitations of panel data, certain tools (indicated with `*` below) do not run in `targeted` mode.\n\n- Read alignment: [BWA-MEM2](https://github.com/bwa-mem2/bwa-mem2) (DNA), [STAR](https://github.com/alexdobin/STAR) (RNA)\n- Read post-processing: [REDUX](https://github.com/hartwigmedical/hmftools/tree/master/redux) (DNA), [Picard MarkDuplicates](https://gatk.broadinstitute.org/hc/en-us/articles/360037052812-MarkDuplicates-Picard) (RNA)\n- SNV, MNV, INDEL calling: [SAGE](https://github.com/hartwigmedical/hmftools/tree/master/sage), [PAVE](https://github.com/hartwigmedical/hmftools/tree/master/pave)\n- SV calling: [ESVEE](https://github.com/hartwigmedical/hmftools/tree/master/esvee)\n- CNV calling: [AMBER](https://github.com/hartwigmedical/hmftools/tree/master/amber), [COBALT](https://github.com/hartwigmedical/hmftools/tree/master/cobalt), [PURPLE](https://github.com/hartwigmedical/hmftools/tree/master/purple)\n- SV and driver event interpretation: [LINX](https://github.com/hartwigmedical/hmftools/tree/master/linx)\n- RNA transcript analysis: [ISOFOX](https://github.com/hartwigmedical/hmftools/tree/master/isofox)\n- Oncoviral detection: [VIRUSbreakend](https://github.com/PapenfussLab/gridss)\\*, [VirusInterpreter](https://github.com/hartwigmedical/hmftools/tree/master/virus-interpreter)\\*\n- Telomere characterisation: [TEAL](https://github.com/hartwigmedical/hmftools/tree/master/teal)\\*\n- Immune analysis: [LILAC](https://github.com/hartwigmedical/hmftools/tree/master/lilac), [CIDER](https://github.com/hartwigmedical/hmftools/tree/master/cider), [NEO](https://github.com/hartwigmedical/hmftools/tree/master/neo)\\*\n- Mutational signature fitting: [SIGS](https://github.com/hartwigmedical/hmftools/tree/master/sigs)\\*\n- HRD prediction: [CHORD](https://github.com/hartwigmedical/hmftools/tree/master/chord)\\*\n- Tissue of origin prediction: [CUPPA](https://github.com/hartwigmedical/hmftools/tree/master/cuppa)\\*\n- Pharmacogenomics: [PEACH](https://github.com/hartwigmedical/hmftools/tree/master/peach)\n- Summary report: [ORANGE](https://github.com/hartwigmedical/hmftools/tree/master/orange), [linxreport](https://github.com/umccr/linxreport)\n\n## Usage\n\n> [!NOTE]\n> If you are new to Nextflow and nf-core, please refer to [this page](https://nf-co.re/docs/usage/installation) on how to set-up Nextflow. Make sure to [test your setup](https://nf-co.re/docs/usage/introduction#how-to-run-a-pipeline) with `-profile test` before running the workflow on actual data.\n\nCreate a samplesheet with your inputs (WGS/WTS BAMs in this example):\n\n```csv\ngroup_id,subject_id,sample_id,sample_type,sequence_type,filetype,filepath\nPATIENT1_WGTS,PATIENT1,PATIENT1-N,normal,dna,bam,/path/to/PATIENT1-N.dna.bam\nPATIENT1_WGTS,PATIENT1,PATIENT1-T,tumor,dna,bam,/path/to/PATIENT1-T.dna.bam\nPATIENT1_WGTS,PATIENT1,PATIENT1-T-RNA,tumor,rna,bam,/path/to/PATIENT1-T.rna.bam\n```\n\nLaunch `oncoanalyser`:\n\n```bash\nnextflow run nf-core/oncoanalyser \\\n -profile \\\n -revision 2.1.0 \\\n --mode \\\n --genome \\\n --input samplesheet.csv \\\n --outdir output/\n```\n\n> [!WARNING]\n> Please provide pipeline parameters via the CLI or Nextflow `-params-file` option. Custom config files including those provided by the `-c` Nextflow option can be used to provide any configuration _**except for parameters**_; see [docs](https://nf-co.re/docs/usage/getting_started/configuration#custom-configuration-files).\n\nFor more details and further functionality, please refer to the [usage documentation](https://nf-co.re/oncoanalyser/usage) and the [parameter documentation](https://nf-co.re/oncoanalyser/parameters).\n\n## Pipeline output\n\nTo see the results of an example test run with a full size dataset refer to the [results](https://nf-co.re/oncoanalyser/results) tab on the nf-core website pipeline page.\nFor more details about the output files and reports, please refer to the\n[output documentation](https://nf-co.re/oncoanalyser/output).\n\n## Version information\n\n### Extended support\n\nAs `oncoanalyser` is used in clinical settings and subject to accreditation standards in some instances, there is a need\nfor long-term stability and reliability for feature releases in order to meet operational requirements. This is\naccomplished through long-term support of several nominated feature releases, which all receive bug fixes and security\nfixes during the period of extended support.\n\nEach release that is given extended support is allocated a separate long-lived git branch with the 'stable' prefix, e.g.\n`stable/1.2.x`, `stable/1.5.x`. Feature development otherwise occurs on the `dev` branch with stable releases pushed to\n`master`.\n\nVersions nominated to have current long-term support:\n\n- TBD\n\n## Known issues\n\nPlease refer to [this page](https://github.com/nf-core/oncoanalyser/issues/177) for details regarding any known issues.\n\n## Credits\n\nThe `oncoanalyser` pipeline was written and is maintained by Stephen Watts ([@scwatts](https://github.com/scwatts)) from\nthe [Genomics Platform\nGroup](https://mdhs.unimelb.edu.au/centre-for-cancer-research/our-research/genomics-platform-group) at the [University\nof Melbourne Centre for Cancer Research](https://mdhs.unimelb.edu.au/centre-for-cancer-research).\n\nWe thank the following organisations and people for their extensive assistance in the development of this pipeline,\nlisted in alphabetical order:\n\n- [Hartwig Medical Foundation\n Australia](https://www.hartwigmedicalfoundation.nl/en/partnerships/hartwig-medical-foundation-australia/)\n- Oliver Hofmann\n\n## Contributions and Support\n\nIf you would like to contribute to this pipeline, please see the [contributing guidelines](.github/CONTRIBUTING.md).\n\nFor further information or help, don't hesitate to get in touch on the [Slack `#oncoanalyser`\nchannel](https://nfcore.slack.com/channels/oncoanalyser) (you can join with [this invite](https://nf-co.re/join/slack)).\n\n## Citations\n\nYou can cite the `oncoanalyser` Zenodo record for a specific version using the following DOI:\n[10.5281/zenodo.15189386](https://doi.org/10.5281/zenodo.15189386)\n\nAn extensive list of references for the tools used by the pipeline can be found in the [`CITATIONS.md`](CITATIONS.md)\nfile.\n\nYou can cite the `nf-core` publication as follows:\n\n> **The nf-core framework for community-curated bioinformatics pipelines.**\n>\n> Philip Ewels, Alexander Peltzer, Sven Fillinger, Harshil Patel, Johannes Alneberg, Andreas Wilm, Maxime Ulysse Garcia,\n> Paolo Di Tommaso & Sven Nahnsen.\n>\n> _Nat Biotechnol._ 2020 Feb 13. doi: [10.1038/s41587-020-0439-x](https://dx.doi.org/10.1038/s41587-020-0439-x).\n",
+ "datePublished": "2025-08-12T00:13:00+00:00",
+ "description": "
\n \n \n \n \n
\n\n[](https://github.com/nf-core/oncoanalyser/actions/workflows/nf-test.yml)\n[](https://github.com/nf-core/oncoanalyser/actions/workflows/linting.yml)\n[](https://nf-co.re/oncoanalyser/results)\n[](https://doi.org/10.5281/zenodo.15189386)\n[](https://www.nf-test.com)\n\n[](https://www.nextflow.io/)\n[](https://github.com/nf-core/tools/releases/tag/3.3.2)\n[](https://docs.conda.io/en/latest/)\n[](https://www.docker.com/)\n[](https://sylabs.io/docs/)\n[](https://cloud.seqera.io/launch?pipeline=https://github.com/nf-core/oncoanalyser)\n\n[](https://nfcore.slack.com/channels/oncoanalyser)\n[](https://bsky.app/profile/nf-co.re)\n[](https://mstdn.science/@nf_core)\n[](https://www.youtube.com/c/nf-core)\n\n## Introduction\n\n**nf-core/oncoanalyser** is a Nextflow pipeline for the comprehensive analysis of cancer DNA and RNA sequencing data\nusing the [WiGiTS](https://github.com/hartwigmedical/hmftools) toolkit from the Hartwig Medical Foundation. The pipeline\nsupports a wide range of experimental setups:\n\n- FASTQ, BAM, and / or CRAM input files\n- WGS (whole genome sequencing), WTS (whole transcriptome sequencing), and targeted / panel sequencing1\n- Paired tumor / normal and tumor-only samples, and support for donor samples for further normal subtraction\n- Purity estimate for longitudinal samples using genomic features of the primary sample from the same patient2\n- UMI (unique molecular identifier) processing supported for DNA sequencing data\n- Most GRCh37 and GRCh38 reference genome builds\n\n1 built-in support for the [TSO500\npanel](https://www.illumina.com/products/by-type/clinical-research-products/trusight-oncology-500.html) with other\npanels and exomes requiring [creation of custom panel reference\ndata](https://nf-co.re/oncoanalyser/usage#custom-panels)\n \n2 for example a primary WGS tissue biospy and longitudinal low-pass WGS ccfDNA sample taken from the\nsame patient\n\n## Pipeline overview\n\n