From 58613448dbe059896e997ba29f5109691ab45f68 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Tue, 26 Jul 2022 16:54:37 +0200 Subject: [PATCH 01/75] update some tool versions --- gridss.wdl | 21 ++++---- hmftools.wdl | 137 +++++++-------------------------------------------- 2 files changed, 30 insertions(+), 128 deletions(-) diff --git a/gridss.wdl b/gridss.wdl index 35e41d21..27dedf32 100644 --- a/gridss.wdl +++ b/gridss.wdl @@ -34,7 +34,7 @@ task AnnotateInsertedSequence { Int threads = 8 String javaXmx = "8G" String memory = "9G" - String dockerImage = "quay.io/biowdl/gridss:2.12.2" + String dockerImage = "quay.io/biocontainers/gridss:2.13.2--h20b1175_1" #TODO check if we still need our own patched image Int timeMinutes = 120 } @@ -165,7 +165,7 @@ task GRIDSS { Int nonJvmMemoryGb = 50 Int threads = 4 Int timeMinutes = ceil(7200 / threads) + 1800 - String dockerImage = "quay.io/biowdl/gridss:2.12.2" + String dockerImage = "quay.io/biocontainers/gridss:2.13.2--h20b1175_1" #TODO check if we still need our own patched image } command { @@ -241,14 +241,14 @@ task GridssAnnotateVcfRepeatmasker { String memory = "25G" Int threads = 8 - String dockerImage = "quay.io/biowdl/gridss:2.12.2" + String dockerImage = "quay.io/biocontainers/gridss:2.13.2--h20b1175_1" #TODO check if we still need our own patched image Int timeMinutes = 1440 } command { gridss_annotate_vcf_repeatmasker \ --output ~{outputPath} \ - --jar /usr/local/share/gridss-2.12.2-0/gridss.jar \ + --jar /usr/local/share/gridss-2.13.2-1/gridss.jar \ -w . \ -t ~{threads} \ ~{gridssVcf} @@ -289,9 +289,10 @@ task Virusbreakend { File virusbreakendDB String outputPath = "./virusbreakend.vcf" - String memory = "75G" + String extraMemoryGB = 10 + Int gridssMemoryGB = 60 Int threads = 8 - String dockerImage = "quay.io/biowdl/gridss:2.12.2" + String dockerImage = "quay.io/biocontainers/gridss:2.13.2--h20b1175_1" #TODO check if we still need our own patched image Int timeMinutes = 180 } @@ -304,8 +305,9 @@ task Virusbreakend { --workingdir . \ --reference ~{referenceFasta} \ --db virusbreakenddb \ - --jar /usr/local/share/gridss-2.12.2-0/gridss.jar \ + --jar /usr/local/share/gridss-2.13.2-1/gridss.jar \ -t ~{threads} \ + --gridssargs '--jvmheap ~{gridssMemoryGB}G' \ ~{bam} } @@ -316,7 +318,7 @@ task Virusbreakend { runtime { cpu: threads - memory: memory + memory: "~{gridssMemoryGB + extraMemoryGB}G" time_minutes: timeMinutes # !UnknownRuntimeKey docker: dockerImage } @@ -328,7 +330,8 @@ task Virusbreakend { referenceImg: {description: "The BWA index image (generated with GATK BwaMemIndexImageCreator) of the reference.", category: "required"} virusbreakendDB: {description: "A .tar.gz containing the virusbreakend database.", category: "required"} outputPath: {description: "The path the output should be written to.", category: "common"} - memory: {description: "The amount of memory this job will use.", category: "advanced"} + extraMemoryGB: {description: "Extra memory needed for the job in GB.", category: "advanced"} + gridssMemoryGB: {description: "Memory assigned to GRIDSS in GB.", category: "advanced"} threads: {description: "The number of the threads to use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", diff --git a/hmftools.wdl b/hmftools.wdl index 628e2f9b..7d87a982 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -38,7 +38,7 @@ task Amber { String memory = "70G" String javaXmx = "64G" Int timeMinutes = 240 - String dockerImage = "quay.io/biocontainers/hmftools-amber:3.5--0" + String dockerImage = "quay.io/biocontainers/hmftools-amber:3.9--hdfd78af_0" } command { @@ -115,7 +115,7 @@ task Cobalt { String memory = "5G" String javaXmx = "4G" Int timeMinutes = 240 - String dockerImage = "quay.io/biocontainers/hmftools-cobalt:1.11--0" + String dockerImage = "quay.io/biocontainers/hmftools-cobalt:1.13--hdfd78af_0" } command { @@ -342,16 +342,17 @@ task Gripss { File knownFusionPairBedpe File breakendPon File breakpointPon - String referenceName - String tumorName + String? referenceName + String sampleName File vcf File vcfIndex + String outputId String outputDir = "./" String memory = "17G" String javaXmx = "16G" Int timeMinutes = 50 - String dockerImage = "quay.io/biocontainers/hmftools-gripss:2.0--hdfd78af_0" + String dockerImage = "quay.io/biocontainers/hmftools-gripss:2.1--hdfd78af_0" } command { @@ -362,11 +363,11 @@ task Gripss { -known_hotspot_file ~{knownFusionPairBedpe} \ -pon_sgl_file ~{breakendPon} \ -pon_sv_file ~{breakpointPon} \ - -reference ~{referenceName} \ + ~{"-reference " + referenceName} \ -sample ~{tumorName} \ -vcf ~{vcf} \ -output_dir ~{outputDir} \ - -output_id somatic + -output_id ~{outputId} } output { @@ -405,116 +406,6 @@ task Gripss { } } -task GripssApplicationKt { - # Obsolete - input { - File inputVcf - String outputPath = "gripss.vcf.gz" - String tumorName - String referenceName - File referenceFasta - File referenceFastaFai - File referenceFastaDict - File breakpointHotspot - File breakendPon - File breakpointPon - - String memory = "32G" - String javaXmx = "31G" - Int timeMinutes = 45 - String dockerImage = "quay.io/biocontainers/hmftools-gripss:1.11--hdfd78af_0" - } - - command { - java -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \ - -cp /usr/local/share/hmftools-gripss-1.11-0/gripss.jar \ - com.hartwig.hmftools.gripss.GripssApplicationKt \ - -tumor ~{tumorName} \ - -reference ~{referenceName} \ - -ref_genome ~{referenceFasta} \ - -breakpoint_hotspot ~{breakpointHotspot} \ - -breakend_pon ~{breakendPon} \ - -breakpoint_pon ~{breakpointPon} \ - -input_vcf ~{inputVcf} \ - -output_vcf ~{outputPath} \ - -paired_normal_tumor_ordinals - } - - output { - File outputVcf = outputPath - File outputVcfIndex = outputPath + ".tbi" - } - - runtime { - memory: memory - time_minutes: timeMinutes # !UnknownRuntimeKey - docker: dockerImage - } - - parameter_meta { - inputVcf: {description: "The input VCF.", category: "required"} - outputPath: {description: "The path where th eoutput VCF will be written.", category: "common"} - referenceName: {description: "The name of the normal sample.", category: "required"} - tumorName: {description: "The name of the tumor sample.", category: "required"} - referenceFasta: {description: "The reference fasta file.", category: "required"} - referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", - category: "required"} - referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"} - breakpointHotspot: {description: "Equivalent to the `-breakpoint_hotspot` option.", category: "required"} - breakendPon: {description: "Equivalent to the `-breakend_pon` option.", category: "required"} - breakpointPon: {description: "Equivalent to the `-breakpoint_pon` option.", category: "required"} - memory: {description: "The amount of memory this job will use.", category: "advanced"} - javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", - category: "advanced"} - timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} - } -} - -task GripssHardFilterApplicationKt { - # Obsolete - input { - File inputVcf - String outputPath = "gripss_hard_filter.vcf.gz" - - String memory = "3G" - String javaXmx = "2G" - Int timeMinutes = 15 - String dockerImage = "quay.io/biocontainers/hmftools-gripss:1.11--hdfd78af_0" - } - - command { - java -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \ - -cp /usr/local/share/hmftools-gripss-1.11-0/gripss.jar \ - com.hartwig.hmftools.gripss.GripssHardFilterApplicationKt \ - -input_vcf ~{inputVcf} \ - -output_vcf ~{outputPath} - } - - output { - File outputVcf = outputPath - File outputVcfIndex = outputPath + ".tbi" - } - - runtime { - memory: memory - time_minutes: timeMinutes # !UnknownRuntimeKey - docker: dockerImage - } - - parameter_meta { - inputVcf: {description: "The input VCF.", category: "required"} - outputPath: {description: "The path where th eoutput VCF will be written.", category: "common"} - memory: {description: "The amount of memory this job will use.", category: "advanced"} - javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", - category: "advanced"} - timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} - } -} - task HealthChecker { input { String outputDir = "." @@ -1212,6 +1103,11 @@ task Sage { Boolean hg38 = false Boolean panelOnly = false String outputPath = "./sage.vcf.gz" + #The following should be in the same directory. + File geneDataCsv + File proteinFeaturesCsv + File transExonDataCsv + File transSpliceDataCsv String? referenceName File? referenceBam @@ -1229,7 +1125,7 @@ task Sage { String javaXmx = "50G" String memory = "51G" Int timeMinutes = 1 + ceil(size(select_all([tumorBam, referenceBam]), "G") * 9 / threads) - String dockerImage = "quay.io/biocontainers/hmftools-sage:2.8--hdfd78af_1" + String dockerImage = "quay.io/biocontainers/hmftools-sage:3.0.3--hdfd78af_0" } command { @@ -1242,7 +1138,10 @@ task Sage { -hotspots ~{hotspots} \ -panel_bed ~{panelBed} \ -high_confidence_bed ~{highConfidenceBed} \ - -assembly ~{true="hg38" false="hg19" hg38} \ + -ref_genome_version ~{true="hg38" false="hg19" hg38} \ + -ensembl_data_dir ~{sub(geneDataCsv, basename(geneDataCsv), "")} \ + -write_bqr_data \ + -write_bqr_plot \ ~{"-hotspot_min_tumor_qual " + hotspotMinTumorQual} \ ~{"-panel_min_tumor_qual " + panelMinTumorQual} \ ~{"-hotspot_max_germline_vaf " + hotspotMaxGermlineVaf} \ From 814184efd50fd87dc1d478d0a88d7af04816130b Mon Sep 17 00:00:00 2001 From: DavyCats Date: Thu, 28 Jul 2022 16:14:48 +0200 Subject: [PATCH 02/75] start adjusting pave command --- hmftools.wdl | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/hmftools.wdl b/hmftools.wdl index 7d87a982..9d84f253 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -790,11 +790,14 @@ task Pave { File proteinFeaturesCsv File transExonDataCsv File transSpliceDataCsv + File mappabilityBed + File? ponFile + File? ponArtefactFile Int timeMinutes = 50 String javaXmx = "8G" String memory = "9G" - String dockerImage = "quay.io/biowdl/pave:v1.0" + String dockerImage = "quay.io/biowdl/pave:v1.2.2" } command { @@ -807,7 +810,11 @@ task Pave { -ensembl_data_dir ~{sub(geneDataCsv, basename(geneDataCsv), "")} \ -ref_genome ~{referenceFasta} \ -ref_genome_version ~{refGenomeVersion} \ - -driver_gene_panel ~{driverGenePanel} + -driver_gene_panel ~{driverGenePanel} \ + -read_pass_only \ + -mappability_bed ~{mappabilityBed} \ + ~{"-pon_file " + ponFile} \ + ~{"-pon_artefact_file " + ponArtefactFile} \ } output { From 24bb770c2330afc53f1ddcbe01b43e2998904683 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Thu, 28 Jul 2022 16:17:29 +0200 Subject: [PATCH 03/75] start adjusting pave command --- hmftools.wdl | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/hmftools.wdl b/hmftools.wdl index 9d84f253..ddbc890e 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -793,6 +793,8 @@ task Pave { File mappabilityBed File? ponFile File? ponArtefactFile + String? ponFilters + Array[File]+? Int timeMinutes = 50 String javaXmx = "8G" @@ -815,6 +817,9 @@ task Pave { -mappability_bed ~{mappabilityBed} \ ~{"-pon_file " + ponFile} \ ~{"-pon_artefact_file " + ponArtefactFile} \ + ~{if defined(ponFilters) then ("-pon_filters '" + ponFilters + "'") else ""} \ + ~{if defined(gnomadFreqDir) then "-gnomad_freq_dir " + sub(gnomadFreqDir[0], basename(gnomadFreqDir[0]), "") else ""} \ + ~{if defined(gnomadFreqDir) then "-gnomad_load_chr_on_demand" else ""} } output { From 67cffb398345d1e7c84d9bd5edead18ea1239c81 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Fri, 29 Jul 2022 15:25:51 +0200 Subject: [PATCH 04/75] add more optinal inputs to Pave --- hmftools.wdl | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/hmftools.wdl b/hmftools.wdl index ddbc890e..5307489b 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -794,7 +794,12 @@ task Pave { File? ponFile File? ponArtefactFile String? ponFilters - Array[File]+? + Array[File]+? gnomadFreqDir + File? clinvarVcf + File? clinvarVcfIndex + File? blacklistVcf + File? blacklistBed + File? blacklistVcfIndex Int timeMinutes = 50 String javaXmx = "8G" @@ -819,7 +824,10 @@ task Pave { ~{"-pon_artefact_file " + ponArtefactFile} \ ~{if defined(ponFilters) then ("-pon_filters '" + ponFilters + "'") else ""} \ ~{if defined(gnomadFreqDir) then "-gnomad_freq_dir " + sub(gnomadFreqDir[0], basename(gnomadFreqDir[0]), "") else ""} \ - ~{if defined(gnomadFreqDir) then "-gnomad_load_chr_on_demand" else ""} + ~{if defined(gnomadFreqDir) then "-gnomad_load_chr_on_demand" else ""} \ + ~{"-clinvar_vcf " + clinvarVcf} \ + ~{"-blacklist_bed " + blacklistBed} \ + ~{"-blacklist_vcf " + blacklistVcf} } output { From 7462967c94534014fe0125cc26d97f3c41fbb624 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Fri, 29 Jul 2022 15:30:20 +0200 Subject: [PATCH 05/75] adjust purple command --- hmftools.wdl | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/hmftools.wdl b/hmftools.wdl index 5307489b..03cbae3d 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -978,9 +978,11 @@ task Purple { File referenceFasta File referenceFastaFai File referenceFastaDict + String refGenomeVersion File driverGenePanel File somaticHotspots File germlineHotspots + File germlineDelFreqFile #The following should be in the same directory. File geneDataCsv File proteinFeaturesCsv @@ -1000,6 +1002,7 @@ task Purple { -reference ~{referenceName} \ -germline_vcf ~{germlineVcf} \ -germline_hotspots ~{germlineHotspots} \ + -germline_del_freq_file ~{germlineDelFreqFile} \ -tumor ~{tumorName} \ -output_dir ~{outputDir} \ -amber ~{sub(amberOutput[0], basename(amberOutput[0]), "")} \ @@ -1010,6 +1013,7 @@ task Purple { -sv_recovery_vcf ~{fullSvVcf} \ -circos /usr/local/bin/circos \ -ref_genome ~{referenceFasta} \ + -ref_genome_version ~{refGenomeVersion} \ -ensembl_data_dir ~{sub(geneDataCsv, basename(geneDataCsv), "")} \ -run_drivers \ -somatic_hotspots ~{somaticHotspots} \ From 09b9db706d7e344c2a0b7e6e2d7a9949c96837ec Mon Sep 17 00:00:00 2001 From: DavyCats Date: Fri, 5 Aug 2022 12:07:21 +0200 Subject: [PATCH 06/75] update pave parameter_meta --- hmftools.wdl | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/hmftools.wdl b/hmftools.wdl index 90256ef6..6d6ffec1 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -785,12 +785,14 @@ task Pave { File referenceFastaDict String refGenomeVersion File driverGenePanel + File mappabilityBed + #The following should be in the same directory. File geneDataCsv File proteinFeaturesCsv File transExonDataCsv File transSpliceDataCsv - File mappabilityBed + File? ponFile File? ponArtefactFile String? ponFilters @@ -856,7 +858,16 @@ task Pave { proteinFeaturesCsv: {description: "A CSV file containing protein feature information, must be in the same directory as `geneDataCsv`, `transExonDataCsv` and `transSpliceDataCsv`.", category: "required"} transExonDataCsv: {description: "A CSV file containing transcript exon information, must be in the same directory as `geneDataCsv`, `proteinFeaturesCsv` and `transSpliceDataCsv`.", category: "required"} transSpliceDataCsv: {description: "A CSV file containing transcript splicing information, must be in the same directory as `geneDataCsv`, `proteinFeaturesCsv` and `transExonDataCsv`.", category: "required"} - + mappabilityBed: {description: "A bed file with mappability information.", category: "required"} + ponFile: {description: "A panel of normals files.", category: "common"} + ponArtefactFile: {description: "A panel of normals artefact file.", category: "common"} + ponFilters: {description: "Filters to be applied based on the panel of normals.", category: "common"} + gnomadFreqDir: {description: "A directory with gnomad frequency information.", category: "common"} + clinvarVcf: {description: "A clinvar VCF file.", category: "common"} + clinvarVcfIndex: {description: "The index for the clinvar VCF file.", category: "common"} + blacklistVcf: {description: "A blacklist VCF file.", category: "common"} + blacklistBed: {description: "A blacklist bed file.", category: "common"} + blacklistVcfIndex: {description: "The index for the blacklist vcf file.", category: "common"} memory: {description: "The amount of memory this job will use.", category: "advanced"} javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} From 89c709b1aa222c51dd33470186540b4099edd3eb Mon Sep 17 00:00:00 2001 From: DavyCats Date: Fri, 5 Aug 2022 15:29:43 +0200 Subject: [PATCH 07/75] changes for WGSinCancerDiagnostics --- hmftools.wdl | 71 ++++++++++++++++++++++++++++++++++------------------ sambamba.wdl | 45 +++++++++++++++++++++++++++++++++ 2 files changed, 91 insertions(+), 25 deletions(-) diff --git a/hmftools.wdl b/hmftools.wdl index 6d6ffec1..07b708db 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -364,17 +364,17 @@ task Gripss { -pon_sgl_file ~{breakendPon} \ -pon_sv_file ~{breakpointPon} \ ~{"-reference " + referenceName} \ - -sample ~{tumorName} \ + -sample ~{sampleName} \ -vcf ~{vcf} \ -output_dir ~{outputDir} \ -output_id ~{outputId} } output { - File fullVcf = "~{outputDir}/~{tumorName}.gripss.somatic.vcf.gz" - File fullVcfIndex = "~{outputDir}/~{tumorName}.gripss.somatic.vcf.gz.tbi" - File filteredVcf = "~{outputDir}/~{tumorName}.gripss.filtered.somatic.vcf.gz" - File filteredVcfIndex = "~{outputDir}/~{tumorName}.gripss.filtered.somatic.vcf.gz.tbi" + File fullVcf = "~{outputDir}/~{sampleName}.gripss.somatic.vcf.gz" + File fullVcfIndex = "~{outputDir}/~{sampleName}.gripss.somatic.vcf.gz.tbi" + File filteredVcf = "~{outputDir}/~{sampleName}.gripss.filtered.somatic.vcf.gz" + File filteredVcfIndex = "~{outputDir}/~{sampleName}.gripss.filtered.somatic.vcf.gz.tbi" } runtime { @@ -391,7 +391,7 @@ task Gripss { knownFusionPairBedpe: {description: "Equivalent to the `-known_hotspot_file` option.", category: "required"} breakendPon: {description: "Equivalent to the `-pon_sgl_file` option.", category: "required"} breakpointPon: {description: "Equivalent to the `-pon_sv_file` option.", category: "required"} - tumorName: {description: "The name of the tumor sample.", category: "required"} + sampleName: {description: "The name of the tumor sample.", category: "required"} referenceName: {description: "The name of the normal sample.", category: "required"} vcf: {description: "The input VCF.", category: "required"} vcfIndex: {description: "The index for the input VCF.", category: "required"} @@ -420,7 +420,7 @@ task HealthChecker { String javaXmx = "2G" String memory = "1G" Int timeMinutes = 1 - String dockerImage = "quay.io/biowdl/health-checker:3.2" + String dockerImage = "quay.io/biowdl/health-checker:3.4" } command { @@ -481,14 +481,20 @@ task Linx { String sampleName File svVcf File svVcfIndex - Array[File]+ purpleOutput + Array[File] purpleOutput = [] String refGenomeVersion String outputDir = "./linx" - File fragileSiteCsv + File? fragileSiteCsv File lineElementCsv - File knownFusionCsv + File? knownFusionCsv File driverGenePanel Boolean writeAllVisFusions = false + Boolean germline = false + Boolean checkFusions = true + Boolean checkDrivers = true + Boolean writeVisData = true + File? germlinePonSvFile + File? germlinePonSglFile #The following should be in the same directory. File geneDataCsv File proteinFeaturesCsv @@ -498,26 +504,34 @@ task Linx { String memory = "9G" String javaXmx = "8G" Int timeMinutes = 10 - String dockerImage = "quay.io/biocontainers/hmftools-linx:1.18--hdfd78af_0" + String dockerImage = "quay.io/biowdl/linx:1.19.1" #patched version of biocontainer + + String? DONOTDEFINE } + String? purpleDir = if length(purpleOutput) > 0 + then sub(purpleOutput[0], basename(purpleOutput[0]), "") + else DONOTDEFINE + command { linx -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \ -sample ~{sampleName} \ -sv_vcf ~{svVcf} \ - -purple_dir ~{sub(purpleOutput[0], basename(purpleOutput[0]), "")} \ + ~{"-purple_dir " + purpleDir} \ -ref_genome_version ~{refGenomeVersion} \ -output_dir ~{outputDir} \ - -fragile_site_file ~{fragileSiteCsv} \ + ~{"-fragile_site_file " + fragileSiteCsv} \ -line_element_file ~{lineElementCsv} \ -ensembl_data_dir ~{sub(geneDataCsv, basename(geneDataCsv), "")} \ - -check_fusions \ - -known_fusion_file ~{knownFusionCsv} \ - -check_drivers \ + ~{if checkFusions then "-check_fusions" else ""} \ + ~{"-known_fusion_file " + knownFusionCsv} \ + ~{if checkDrivers then "-check_drivers" else ""} \ -driver_gene_panel ~{driverGenePanel} \ - -chaining_sv_limit 0 \ - -write_vis_data \ - ~{if writeAllVisFusions then "-write_all_vis_fusions" else ""} + ~{if writeVisData then "-write_vis_data" else ""} \ + ~{if writeAllVisFusions then "-write_all_vis_fusions" else ""} \ + ~{if germline then "-germline" else ""} \ + ~{"-germline_pon_sv_file " + germlinePonSvFile} \ + ~{"-germline_pon_sgl_file " + germlinePonSglFile} } output { @@ -584,14 +598,14 @@ task LinxVisualisations { String memory = "9G" String javaXmx = "8G" Int timeMinutes = 1440 - String dockerImage = "quay.io/biocontainers/hmftools-linx:1.18--hdfd78af_0" + String dockerImage = "quay.io/biowdl/linx:1.19.1" #patched version of biocontainer } command { set -e mkdir -p ~{outputDir} java -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \ - -cp /usr/local/share/hmftools-linx-1.18-0/sv-linx.jar \ + -cp /usr/local/share/hmftools-linx-1.19-0/linx.jar \ com.hartwig.hmftools.linx.visualiser.SvVisualiser \ -sample ~{sample} \ -ref_genome_version ~{refGenomeVersion} \ @@ -786,6 +800,7 @@ task Pave { String refGenomeVersion File driverGenePanel File mappabilityBed + Array[File] gnomadFreqFiles = [] #The following should be in the same directory. File geneDataCsv @@ -796,7 +811,6 @@ task Pave { File? ponFile File? ponArtefactFile String? ponFilters - Array[File]+? gnomadFreqDir File? clinvarVcf File? clinvarVcfIndex File? blacklistVcf @@ -807,8 +821,14 @@ task Pave { String javaXmx = "8G" String memory = "9G" String dockerImage = "quay.io/biowdl/pave:v1.2.2" + + String? DONOTDEFINE } + String? gnomadFreqDir = if length(gnomadFreqFiles) > 0 + then sub(gnomadFreqFiles[0], basename(gnomadFreqFiles[0]), "") + else DONOTDEFINE + command { set -e mkdir -p ~{outputDir} @@ -825,7 +845,7 @@ task Pave { ~{"-pon_file " + ponFile} \ ~{"-pon_artefact_file " + ponArtefactFile} \ ~{if defined(ponFilters) then ("-pon_filters '" + ponFilters + "'") else ""} \ - ~{if defined(gnomadFreqDir) then "-gnomad_freq_dir " + sub(gnomadFreqDir[0], basename(gnomadFreqDir[0]), "") else ""} \ + ~{"-gnomad_freq_dir " + gnomadFreqDir} \ ~{if defined(gnomadFreqDir) then "-gnomad_load_chr_on_demand" else ""} \ ~{"-clinvar_vcf " + clinvarVcf} \ ~{"-blacklist_bed " + blacklistBed} \ @@ -862,7 +882,7 @@ task Pave { ponFile: {description: "A panel of normals files.", category: "common"} ponArtefactFile: {description: "A panel of normals artefact file.", category: "common"} ponFilters: {description: "Filters to be applied based on the panel of normals.", category: "common"} - gnomadFreqDir: {description: "A directory with gnomad frequency information.", category: "common"} + gnomadFreqFiles: {description: "A directory with gnomad frequency information.", category: "common"} clinvarVcf: {description: "A clinvar VCF file.", category: "common"} clinvarVcfIndex: {description: "The index for the clinvar VCF file.", category: "common"} blacklistVcf: {description: "A blacklist VCF file.", category: "common"} @@ -1005,7 +1025,8 @@ task Purple { String memory = "9G" String javaXmx = "8G" # clone of quay.io/biocontainers/hmftools-purple:3.2--hdfd78af_0 with 'ln -s /usr/local/lib/libwebp.so.7 /usr/local/lib/libwebp.so.6' - String dockerImage = "quay.io/biowdl/hmftools-purple:3.2" + #String dockerImage = "quay.io/biowdl/hmftools-purple:3.2" FIXME see if biocontainer works + String dockerImage = "quay.io/biocontainers/hmftools-purple:3.5--hdfd78af_0" } command { diff --git a/sambamba.wdl b/sambamba.wdl index 6696668a..e3c3fa38 100644 --- a/sambamba.wdl +++ b/sambamba.wdl @@ -139,6 +139,51 @@ task Markdup { } } +task Slice { + input { + File bamFile + File bamIndex + String outputPath = "./sliced.bam" + File regions + + String memory = "8G" + Int timeMinutes = 720 + String dockerImage = "quay.io/biocontainers/sambamba:0.7.1--h148d290_2" + } + + command { + set -e + mkdir -p "$(dirname ~{outputPath})" + + sambamba slice \ + -L ~{regions} \ + -o ~{outputPath} \ + ~{bamFile} + } + + output { + File slicedBam = outputPath + File slicedBamIndex = sub(outputPath, "\.bam$", ".bai") + } + + runtime { + memory: memory + time_minutes: timeMinutes + docker: dockerImage + } + + parameter_meta { + bamIndex: {description: "The input BAM files.", category: "required"} + outputPath: {description: "Output directory path + output file.", category: "required"} + regions: {description: "Regiosn to get sliced.", category: "required"} + + memory: {description: "The amount of memory this job will use.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} + } +} + task Sort { input { File inputBam From 8308cc95eec0675374b3e09d1b165f3f93fed199 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Thu, 11 Aug 2022 15:19:50 +0200 Subject: [PATCH 08/75] start adding lilac --- hmftools.wdl | 59 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 59 insertions(+) diff --git a/hmftools.wdl b/hmftools.wdl index 07b708db..b87fcedd 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -476,6 +476,65 @@ task HealthChecker { } } +task Lilac { + input { + String tumorName + File referenceBam + File referenceBamIndex + File tumorBam + File tumorBamIndex + String refGenomeVersion + File referenceFasta + File referenceFastaFai + File referenceFastaDict + File geneCopyNumberFile + File somaticVariantsFile + File somaticVariantsFileIndex + String outputDir = "./lilac" + + #The following need to be in the same directory + File hlaRefAminoacidSequencesCsv + File hlaRefNucleotideSequencesCsv + File lilacAlleleFrequenciesCsv + + String javaXmx = "15G" + String memory = "16G" + Int timeMinutes = 1440 #FIXME + Int threads = 1 + String dockerImage = "" #TODO + } + + command { + LILAC -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \ + -sample ~{tumorName} \ + -reference_bam ~{referenceBam} \ + -ref_genome ~{referenceFasta} \ + -ref_genome_version ~{refGenomeVersion} \ + -resource_dir ~{sub(hlaRefAminoacidSequencesCsv, basename(hlaRefAminoacidSequencesCsv), "")} \ + -outputDir ~{outputDir} \ + -threads ~{threads} \ + -tumor_bam ~{tumorBam} \ + -gene_copy_number_file ~{geneCopyNumberFile} \ + -somatic_variants_file ~{somaticVariantsFile} + } + + output { + #TODO + } + + runtime { + memory: memory + cpu: threads + time_minutes: timeMinutes # !UnknownRuntimeKey + docker: dockerImage + } + + parameter_meta { + + } +} + + task Linx { input { String sampleName From 82e876c886a2df217125fa0163f0a637eb9c8d24 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Thu, 18 Aug 2022 16:08:07 +0200 Subject: [PATCH 09/75] fix some issues --- gridss.wdl | 4 ++-- hmftools.wdl | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/gridss.wdl b/gridss.wdl index 95b081f9..cd310244 100644 --- a/gridss.wdl +++ b/gridss.wdl @@ -34,7 +34,7 @@ task AnnotateInsertedSequence { Int threads = 8 String javaXmx = "8G" String memory = "9G" - String dockerImage = "quay.io/biocontainers/gridss:2.13.2--h20b1175_1" #TODO check if we still need our own patched image + String dockerImage = "quay.io/biowdl/gridss:2.13.2" #TODO check if we still need our own patched image Int timeMinutes = 120 } @@ -289,7 +289,7 @@ task Virusbreakend { File virusbreakendDB String outputPath = "./virusbreakend.vcf" - String extraMemoryGB = 10 + Int extraMemoryGB = 10 Int gridssMemoryGB = 60 Int threads = 12 String dockerImage = "quay.io/biocontainers/gridss:2.13.2--h20b1175_1" #TODO check if we still need our own patched image diff --git a/hmftools.wdl b/hmftools.wdl index b87fcedd..c381c321 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -114,7 +114,7 @@ task Cobalt { Int threads = 1 String memory = "5G" String javaXmx = "4G" - Int timeMinutes = 480 + Int timeMinutes = 960 String dockerImage = "quay.io/biocontainers/hmftools-cobalt:1.13--hdfd78af_0" } From 1ea966ca383831448627ada50e8c1eaeaafcbc3e Mon Sep 17 00:00:00 2001 From: DavyCats Date: Fri, 19 Aug 2022 12:08:14 +0200 Subject: [PATCH 10/75] add docker for lilac, increase memory for cobalt --- hmftools.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hmftools.wdl b/hmftools.wdl index c381c321..443d2da5 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -501,7 +501,7 @@ task Lilac { String memory = "16G" Int timeMinutes = 1440 #FIXME Int threads = 1 - String dockerImage = "" #TODO + String dockerImage = "quay.io/biocontainers/hmftools-lilac:1.1--hdfd78af_0" #TODO } command { From 7fe51ebbb0011f498a11f75a7379279d6fc4be70 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Fri, 19 Aug 2022 15:47:18 +0200 Subject: [PATCH 11/75] fix sage ref genome version --- hmftools.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hmftools.wdl b/hmftools.wdl index 443d2da5..b7611ee8 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -1253,7 +1253,7 @@ task Sage { -hotspots ~{hotspots} \ -panel_bed ~{panelBed} \ -high_confidence_bed ~{highConfidenceBed} \ - -ref_genome_version ~{true="hg38" false="hg19" hg38} \ + -ref_genome_version ~{true="38" false="37" hg38} \ -ensembl_data_dir ~{sub(geneDataCsv, basename(geneDataCsv), "")} \ -write_bqr_data \ -write_bqr_plot \ From 48f8245ab28bda543fe139e21fb91f88049032ac Mon Sep 17 00:00:00 2001 From: DavyCats Date: Mon, 22 Aug 2022 15:47:01 +0200 Subject: [PATCH 12/75] add missing required input for amber --- hmftools.wdl | 3 +++ 1 file changed, 3 insertions(+) diff --git a/hmftools.wdl b/hmftools.wdl index b7611ee8..f5342ab4 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -33,6 +33,7 @@ task Amber { File referenceFasta File referenceFastaFai File referenceFastaDict + String refGenomeVersion Int threads = 2 String memory = "70G" @@ -50,6 +51,7 @@ task Amber { -output_dir ~{outputDir} \ -threads ~{threads} \ -ref_genome ~{referenceFasta} \ + -ref_genome_version ~{refGenomeVersion} \ -loci ~{loci} } @@ -90,6 +92,7 @@ task Amber { referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", category: "required"} referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"} + refGenomeVersion: {description: "The version of the reference genome: 37 or 38.", category: "required"} threads: {description: "The number of threads the program will use.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", From 66394c7f93597ee2fbb26405d98925e4e65e9504 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Tue, 23 Aug 2022 13:37:59 +0200 Subject: [PATCH 13/75] fix some issues --- gridss.wdl | 4 ++-- sambamba.wdl | 3 ++- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/gridss.wdl b/gridss.wdl index cd310244..ddc08fd9 100644 --- a/gridss.wdl +++ b/gridss.wdl @@ -29,12 +29,12 @@ task AnnotateInsertedSequence { File viralReference File viralReferenceFai File viralReferenceDict - File viralReferenceImg + Array[File]+ viralReferenceBwaIndex Int threads = 8 String javaXmx = "8G" String memory = "9G" - String dockerImage = "quay.io/biowdl/gridss:2.13.2" #TODO check if we still need our own patched image + String dockerImage = "quay.io/biocontainers/gridss:2.13.2--h20b1175_1" #TODO check if we still need our own patched image Int timeMinutes = 120 } diff --git a/sambamba.wdl b/sambamba.wdl index e3c3fa38..87cfaeb2 100644 --- a/sambamba.wdl +++ b/sambamba.wdl @@ -159,11 +159,12 @@ task Slice { -L ~{regions} \ -o ~{outputPath} \ ~{bamFile} + sambamba index ~{outputPath} } output { File slicedBam = outputPath - File slicedBamIndex = sub(outputPath, "\.bam$", ".bai") + File slicedBamIndex = "~{outputPath}.bai" } runtime { From 1372195c722e3658322c9442af9a3717f5aa0565 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Tue, 23 Aug 2022 13:42:19 +0200 Subject: [PATCH 14/75] fix parameter_meta --- gridss.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gridss.wdl b/gridss.wdl index ddc08fd9..64b8a3f4 100644 --- a/gridss.wdl +++ b/gridss.wdl @@ -68,7 +68,7 @@ task AnnotateInsertedSequence { viralReference: {description: "A fasta file with viral sequences.", category: "required"} viralReferenceFai: {description: "The index for the viral reference fasta.", category: "required"} viralReferenceDict: {description: "The dict file for the viral reference.", category: "required"} - viralReferenceImg: {description: "The BWA index image (generated with GATK BwaMemIndexImageCreator) of the viral reference.", category: "required"} + viralReferenceBwaIndex: {description: "The BWA index files of the viral reference.", category: "required"} memory: {description: "The amount of memory this job will use.", category: "advanced"} javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", From b016041643e37ec6ed4277e7e8a5bc2369831747 Mon Sep 17 00:00:00 2001 From: Davy Cats Date: Fri, 26 Aug 2022 14:36:10 +0200 Subject: [PATCH 15/75] fix output names gripss --- hmftools.wdl | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/hmftools.wdl b/hmftools.wdl index f5342ab4..087002a6 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -373,11 +373,13 @@ task Gripss { -output_id ~{outputId} } + String suffix = if defined(referenceName) then "somatic" else "germline" + output { - File fullVcf = "~{outputDir}/~{sampleName}.gripss.somatic.vcf.gz" - File fullVcfIndex = "~{outputDir}/~{sampleName}.gripss.somatic.vcf.gz.tbi" - File filteredVcf = "~{outputDir}/~{sampleName}.gripss.filtered.somatic.vcf.gz" - File filteredVcfIndex = "~{outputDir}/~{sampleName}.gripss.filtered.somatic.vcf.gz.tbi" + File fullVcf = "~{outputDir}/~{sampleName}.gripss.~{suffix}.vcf.gz" + File fullVcfIndex = "~{outputDir}/~{sampleName}.gripss.~{suffix}.vcf.gz.tbi" + File filteredVcf = "~{outputDir}/~{sampleName}.gripss.filtered.~{suffix}.vcf.gz" + File filteredVcfIndex = "~{outputDir}/~{sampleName}.gripss.filtered.~{suffix}.vcf.gz.tbi" } runtime { From f84a0aee0705b1eadaaf8e738808daacf8b216cd Mon Sep 17 00:00:00 2001 From: DavyCats Date: Mon, 12 Sep 2022 12:51:46 +0200 Subject: [PATCH 16/75] update docker image for hmftools amber and cobalt --- hmftools.wdl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/hmftools.wdl b/hmftools.wdl index a34decb7..aca5f18b 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -39,7 +39,7 @@ task Amber { String memory = "70GiB" String javaXmx = "64G" Int timeMinutes = 240 - String dockerImage = "quay.io/biocontainers/hmftools-amber:3.9--hdfd78af_0" + String dockerImage = "quay.io/biocontainers/hmftools-amber:3.9--hdfd78af_1" } command { @@ -118,7 +118,7 @@ task Cobalt { String memory = "5GiB" String javaXmx = "4G" Int timeMinutes = 960 - String dockerImage = "quay.io/biocontainers/hmftools-cobalt:1.13--hdfd78af_0" + String dockerImage = "quay.io/biocontainers/hmftools-cobalt:1.13--hdfd78af_1" } command { From 1e627055d122a3a306c1464c2c417041eee87633 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Fri, 23 Sep 2022 11:46:34 +0200 Subject: [PATCH 17/75] update outputs for amber and cobalt --- hmftools.wdl | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/hmftools.wdl b/hmftools.wdl index aca5f18b..8d6c1bca 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -58,18 +58,16 @@ task Amber { output { File version = "~{outputDir}/amber.version" File tumorBafPcf = "~{outputDir}/~{tumorName}.amber.baf.pcf" - File tumorBafTsv = "~{outputDir}/~{tumorName}.amber.baf.tsv" - File tumorBafVcf = "~{outputDir}/~{tumorName}.amber.baf.vcf.gz" - File tumorBafVcfIndex = "~{outputDir}/~{tumorName}.amber.baf.vcf.gz.tbi" + File tumorBafTsv = "~{outputDir}/~{tumorName}.amber.baf.tsv.gz" File tumorContaminationVcf = "~{outputDir}/~{tumorName}.amber.contamination.vcf.gz" File tumorContaminationVcfIndex = "~{outputDir}/~{tumorName}.amber.contamination.vcf.gz.tbi" File tumorContaminationTsv = "~{outputDir}/~{tumorName}.amber.contamination.tsv" File tumorQc = "~{outputDir}/~{tumorName}.amber.qc" + File normalHomozygousregionsTsv = "~{outputDir}/~{referenceName}.amber.homozygousregion.tsv" File normalSnpVcf = "~{outputDir}/~{referenceName}.amber.snp.vcf.gz" File normalSnpVcfIndex = "~{outputDir}/~{referenceName}.amber.snp.vcf.gz.tbi" - Array[File] outputs = [version, tumorBafPcf, tumorBafTsv, tumorBafVcf, tumorBafVcfIndex, - tumorContaminationVcf, tumorContaminationVcfIndex, tumorContaminationTsv, tumorQc, - normalSnpVcf, normalSnpVcfIndex] + Array[File] outputs = [version, tumorBafPcf, tumorBafTsv, tumorContaminationVcf, + tumorContaminationVcfIndex, tumorContaminationTsv, tumorQc, normalSnpVcf, normalSnpVcfIndex] } runtime { @@ -139,10 +137,9 @@ task Cobalt { File normalRationPcf = "~{outputDir}/~{referenceName}.cobalt.ratio.pcf" File tumorGcMedianTsv = "~{outputDir}/~{tumorName}.cobalt.gc.median.tsv" File tumorRatioPcf = "~{outputDir}/~{tumorName}.cobalt.ratio.pcf" - File tumorRatioTsv = "~{outputDir}/~{tumorName}.cobalt.ratio.tsv" - File tumorChrLen = "~{outputDir}/~{tumorName}.chr.len" + File tumorRatioTsv = "~{outputDir}/~{tumorName}.cobalt.ratio.tsv.gz" Array[File] outputs = [version, normalGcMedianTsv, normalRationMedianTsv, - normalRationPcf, tumorGcMedianTsv, tumorRatioPcf, tumorRatioTsv, tumorChrLen] + normalRationPcf, tumorGcMedianTsv, tumorRatioPcf, tumorRatioTsv] } runtime { From a47d21a3e2c2477aa76727465348fe5b0eb40af2 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Fri, 23 Sep 2022 12:51:35 +0200 Subject: [PATCH 18/75] typo --- hmftools.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hmftools.wdl b/hmftools.wdl index 8d6c1bca..987d4d85 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -562,7 +562,7 @@ task Linx { File transExonDataCsv File transSpliceDataCsv - String memory = "9iB" + String memory = "9GiB" String javaXmx = "8G" Int timeMinutes = 10 String dockerImage = "quay.io/biowdl/linx:1.19.1" #patched version of biocontainer From c748f839fa8a01175c04a4b41297193486a08387 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Fri, 17 Feb 2023 12:16:00 +0100 Subject: [PATCH 19/75] add some missing options to hmftools sage and cobalt --- hmftools.wdl | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/hmftools.wdl b/hmftools.wdl index 90236a64..14ced8fe 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -111,6 +111,7 @@ task Cobalt { File tumorBamIndex String outputDir = "./cobalt" File gcProfile + File refGenomeFile Int threads = 1 String memory = "5GiB" @@ -127,7 +128,8 @@ task Cobalt { -tumor_bam ~{tumorBam} \ -output_dir ~{outputDir} \ -threads ~{threads} \ - -gc_profile ~{gcProfile} + -gc_profile ~{gcProfile} \ + -ref_genome ~{refGenomeFile} } output { @@ -158,6 +160,7 @@ task Cobalt { tumorBamIndex: {description: "The index for the tumor BAM file.", category: "required"} outputDir: {description: "The path to the output directory.", category: "common"} gcProfile: {description: "A file describing the GC profile of the reference genome.", category: "required"} + refGenomeFile: {description: "The reference genome fasta file.", category: "required"} threads: {description: "The number of threads the program will use.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", @@ -1237,6 +1240,8 @@ task Sage { Int? panelMaxGermlineRelRawBaseQual String? mnvFilterEnabled File? coverageBed + Int? refSampleCount + Int threads = 32 String javaXmx = "16G" @@ -1268,6 +1273,7 @@ task Sage { ~{"-mnv_filter_enabled " + mnvFilterEnabled} \ ~{"-coverage_bed " + coverageBed} \ ~{true="-panel_only" false="" panelOnly} \ + ~{"-ref_sample_count " + refSampleCount} \ -threads ~{threads} \ -out ~{outputPath} } @@ -1310,6 +1316,7 @@ task Sage { panelMaxGermlineVaf: {description: "Equivalent to sage's `panel_max_germline_vaf` option.", category: "advanced"} panelMaxGermlineRelRawBaseQual: {description: "Equivalent to sage's `panel_max_germline_vaf` option.", category: "advanced"} mnvFilterEnabled: {description: "Equivalent to sage's `mnv_filter_enabled` option.", category: "advanced"} + refSampleCount: {description: "Equivalent to sage's `ref_sample_count` option.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", From 01cc5f76115bce536127e146e2c13d0ef1f93427 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Fri, 31 Mar 2023 16:57:07 +0200 Subject: [PATCH 20/75] make reference optional in amber and cobalt --- hmftools.wdl | 34 ++++++++++++++++++++++------------ picard.wdl | 4 +++- 2 files changed, 25 insertions(+), 13 deletions(-) diff --git a/hmftools.wdl b/hmftools.wdl index d6d65484..cf98cde8 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -22,9 +22,9 @@ version 1.0 task Amber { input { - String referenceName - File referenceBam - File referenceBamIndex + String? referenceName + File? referenceBam + File? referenceBamIndex String tumorName File tumorBam File tumorBamIndex @@ -35,6 +35,8 @@ task Amber { File referenceFastaDict String refGenomeVersion + Int? tumorOnlyMinDepth + Int threads = 2 String memory = "85GiB" String javaXmx = "80G" @@ -44,15 +46,16 @@ task Amber { command { AMBER -Xmx~{javaXmx} \ - -reference ~{referenceName} \ - -reference_bam ~{referenceBam} \ + ~{"-reference " + referenceName} \ + ~{"-reference_bam " + referenceBam} \ -tumor ~{tumorName} \ -tumor_bam ~{tumorBam} \ -output_dir ~{outputDir} \ -threads ~{threads} \ -ref_genome ~{referenceFasta} \ -ref_genome_version ~{refGenomeVersion} \ - -loci ~{loci} + -loci ~{loci} \ + ~{"-tumor-only-min-depth " + tumorOnlyMinDepth} } output { @@ -103,9 +106,9 @@ task Amber { task Cobalt { input { - String referenceName - File referenceBam - File referenceBamIndex + String? referenceName + File? referenceBam + File? referenceBamIndex String tumorName File tumorBam File tumorBamIndex @@ -113,6 +116,10 @@ task Cobalt { File gcProfile File refGenomeFile + File? tumorOnlyDiploidBed + File? targetRegionsNormalisationTsv + Int? pcfGamma + Int threads = 1 String memory = "5GiB" String javaXmx = "4G" @@ -122,14 +129,17 @@ task Cobalt { command { COBALT -Xmx~{javaXmx} \ - -reference ~{referenceName} \ - -reference_bam ~{referenceBam} \ + ~{"-reference " + referenceName} \ + ~{"-reference_bam " + referenceBam} \ -tumor ~{tumorName} \ -tumor_bam ~{tumorBam} \ -output_dir ~{outputDir} \ -threads ~{threads} \ -gc_profile ~{gcProfile} \ - -ref_genome ~{refGenomeFile} + -ref_genome ~{refGenomeFile} \ + ~{"-tumor_only_diploid_bed " + tumorOnlyDiploidBed} \ + ~{"-target_region " + targetRegionsNormalisationTsv} \ + ~{"-pcf_gamma" + pcfGamma} } output { diff --git a/picard.wdl b/picard.wdl index 6628cf0e..314e0a7b 100644 --- a/picard.wdl +++ b/picard.wdl @@ -510,6 +510,7 @@ task CollectWgsMetrics { Int? minimumMappingQuality Int? minimumBaseQuality Int? coverageCap + File? intervals String memory = "5GiB" String javaXmx = "4G" @@ -528,7 +529,8 @@ task CollectWgsMetrics { OUTPUT=~{outputPath} \ ~{"MINIMUM_MAPPING_QUALITY=" + minimumMappingQuality} \ ~{"MINIMUM_BASE_QUALITY=" + minimumBaseQuality} \ - ~{"COVERAGE_CAP=" + coverageCap} + ~{"COVERAGE_CAP=" + coverageCap} \ + ~{"INTERVALS=" + intervals} } output { From 20fffe3c090648550363f354fcefd13e670ee7e2 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Fri, 14 Apr 2023 16:27:05 +0200 Subject: [PATCH 21/75] update various hmftools, add task for svprep gridss --- gridss.wdl | 83 ++++++++++++++++- hmftools.wdl | 253 ++++++++++++++++++++++++++++++++++++++++++--------- 2 files changed, 292 insertions(+), 44 deletions(-) diff --git a/gridss.wdl b/gridss.wdl index 2066899d..57515750 100644 --- a/gridss.wdl +++ b/gridss.wdl @@ -34,7 +34,7 @@ task AnnotateInsertedSequence { Int threads = 8 String javaXmx = "8G" String memory = "9GiB" - String dockerImage = "quay.io/biocontainers/gridss:2.13.2--h20b1175_1" #TODO check if we still need our own patched image + String dockerImage = "quay.io/biocontainers/gridss:2.13.2--h20b1175_1" Int timeMinutes = 120 } @@ -269,7 +269,7 @@ task GRIDSS { Int nonJvmMemoryGb = 10 Int threads = 12 Int timeMinutes = ceil(7200 / threads) + 1800 - String dockerImage = "quay.io/biocontainers/gridss:2.13.2--h20b1175_1" #TODO check if we still need our own patched image + String dockerImage = "quay.io/biocontainers/gridss:2.13.2--h20b1175_1" } command { @@ -382,6 +382,83 @@ task GridssAnnotateVcfRepeatmasker { } } +task GridssSvPrep { + input { + Array[String]+ tumorLabel + Array[File]+ tumorBam + Array[File]+ tumorBai + Array[File]+ tumorFilteredBam + Array[File]+ tumorFilteredBai + BwaIndex reference + File blacklistBed + File gridssProperties + + String? normalLabel + File? normalBam + File? normalBai + File? normalFilteredBam + File? normalFilteredBai + String outputPath = "gridss.vcf.gz" + + Int jvmHeapSizeGb = 48 + Int nonJvmMemoryGb = 10 + Int threads = 10 + Int timeMinutes = ceil(7200 / threads) + 1800 + String dockerImage = "quay.io/biowdl/gridss:2.13.2_1" + } + + command { + gridss_sv-prep \ + --steps all \ + --output ~{outputPath} \ + --wirkingdir . \ + --reference ~{reference.fastaFile} \ + --jar /usr/local/share/gridss-2.13.2-1/gridss.jar \ + --blacklist ~{blacklistBed} \ + --configuration ~{gridssProperties} \ + --labels ~{normalLabel}~{true="," false="" defined(normalLabel)}~{sep="," tumorLabel} \ + --bams ~{normalBam}~{true="," false="" defined(normalBam)}~{sep="," tumorBam} \ + --filtered_bams ~{normalFilteredBam}~{true="," false="" defined(normalFilteredBam)}~{sep="," tumorFilteredBam} \ + --jvmheap ~{jvmHeapSizeGb}G \ + --threads ~{threads} + } + + output { + File vcf = outputPath + File vcfIndex = outputPath + ".tbi" + } + + runtime { + cpu: threads + memory: "~{jvmHeapSizeGb + nonJvmMemoryGb}GiB" + time_minutes: timeMinutes # !UnknownRuntimeKey + docker: dockerImage + } + + parameter_meta { + tumorBam: {description: "The input BAM file. This should be the tumor/case sample in case of a paired analysis.", category: "required"} + tumorBai: {description: "The index for tumorBam.", category: "required"} + tumorFilteredBam: {description: "The input BAM file preprocessed by hmftools' sv-prep.", category: "required"} + tumorFilteredBai: {description: "The index for tumorFilteredBam.", category: "required"} + tumorLabel: {description: "The name of the (tumor) sample.", category: "required"} + reference: {description: "A BWA index, this should also include the fasta index file (.fai).", category: "required"} + outputPath: {description: "The path for the output VCf file.", category: "common"} + normalBam: {description: "The BAM file for the normal/control sample.", category: "advanced"} + normalBai: {description: "The index for normalBam.", category: "advanced"} + normalFilteredBam: {description: "The BAM file for the normal control sample preprocessed by hmftools' sv-prep.", category: "required"} + normalFilteredBai: {description: "The index for normalFilteredBam.", category: "required"} + normalLabel: {description: "The name of the normal sample.", category: "advanced"} + blacklistBed: {description: "A bed file with blaclisted regins.", category: "advanced"} + gridssProperties: {description: "A properties file for gridss.", category: "advanced"} + + threads: {description: "The number of the threads to use.", category: "advanced"} + jvmHeapSizeGb: {description: "The size of JVM heap for assembly and variant calling", category: "advanced"} + nonJvmMemoryGb: {description: "The amount of memory in Gb to be requested besides JVM memory.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + } +} + task SomaticFilter { input { File vcfFile @@ -451,7 +528,7 @@ task Virusbreakend { Int extraMemoryGB = 10 Int gridssMemoryGB = 60 Int threads = 12 - String dockerImage = "quay.io/biocontainers/gridss:2.13.2--h20b1175_1" #TODO check if we still need our own patched image + String dockerImage = "quay.io/biocontainers/gridss:2.13.2--h20b1175_1" Int timeMinutes = 320 } diff --git a/hmftools.wdl b/hmftools.wdl index cf98cde8..9e133723 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -355,17 +355,24 @@ task Gripss { File knownFusionPairBedpe File breakendPon File breakpointPon + File repeatMaskFile String? referenceName String sampleName File vcf File vcfIndex String outputId String outputDir = "./" + Boolean hg38 = false + Int? hardMinTumorQual + Int? minQualBreakPoint + Int? minQualBreakEnd + Boolean filterSgls = false + Boolean germline = false String memory = "17GiB" String javaXmx = "16G" Int timeMinutes = 50 - String dockerImage = "quay.io/biocontainers/hmftools-gripss:2.1--hdfd78af_0" + String dockerImage = "quay.io/biocontainers/hmftools-gripss:2.3.2--hdfd78af_0" } command { @@ -373,14 +380,21 @@ task Gripss { mkdir -p ~{outputDir} gripss -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \ -ref_genome ~{referenceFasta} \ + -ref_genome_version ~{if hg38 then "38" else "37"} \ -known_hotspot_file ~{knownFusionPairBedpe} \ -pon_sgl_file ~{breakendPon} \ -pon_sv_file ~{breakpointPon} \ + -repeat_mask_file ~{repeatMaskFile} \ ~{"-reference " + referenceName} \ -sample ~{sampleName} \ -vcf ~{vcf} \ -output_dir ~{outputDir} \ - -output_id ~{outputId} + -output_id ~{outputId} \ + ~{if filterSgls then "-filter_sgls" else ""} \ + ~{"-hard_min_tumor_qual " + hardMinTumorQual} \ + ~{"-min_qual_break_point " + minQualBreakPoint} \ + ~{"-min_qual_break_end " + minQualBreakEnd} \ + ~{if germline then "-germline" else ""} } String suffix = if defined(referenceName) then "somatic" else "germline" @@ -890,11 +904,12 @@ task Pave { File? blacklistVcf File? blacklistBed File? blacklistVcfIndex + Boolean writePassOnly = false Int timeMinutes = 50 String javaXmx = "8G" String memory = "9GiB" - String dockerImage = "quay.io/biowdl/pave:v1.2.2" + String dockerImage = "quay.io/biocontainers/hmftools-pave:1.4.1--hdfd78af_0" String? DONOTDEFINE } @@ -923,7 +938,8 @@ task Pave { ~{if defined(gnomadFreqDir) then "-gnomad_load_chr_on_demand" else ""} \ ~{"-clinvar_vcf " + clinvarVcf} \ ~{"-blacklist_bed " + blacklistBed} \ - ~{"-blacklist_vcf " + blacklistVcf} + ~{"-blacklist_vcf " + blacklistVcf} \ + ~{if writePassOnly then "-write_pass_only" else ""} } output { @@ -1068,14 +1084,14 @@ task Protect { task Purple { input { - String referenceName + String? referenceName String tumorName String outputDir = "./purple" Array[File]+ amberOutput Array[File]+ cobaltOutput File gcProfile File somaticVcf - File germlineVcf + File? germlineVcf File filteredSvVcf File filteredSvVcfIndex File fullSvVcf @@ -1086,10 +1102,15 @@ task Purple { String refGenomeVersion File driverGenePanel File somaticHotspots - File germlineHotspots - File germlineDelFreqFile + File? germlineHotspots + File? germlineDelFreqFile Float? highlyDiploidPercentage Float? somaticMinPuritySpread + File? targetRegionsBed + File? targetRegionsRatios + File? targetRegionsMsiIndels + Int? minDiploidTumorRatioCount + Int? minDiploidTumorRatioCountCentromere #The following should be in the same directory. File geneDataCsv File proteinFeaturesCsv @@ -1102,15 +1123,15 @@ task Purple { String javaXmx = "8G" # clone of quay.io/biocontainers/hmftools-purple:3.2--hdfd78af_0 with 'ln -s /usr/local/lib/libwebp.so.7 /usr/local/lib/libwebp.so.6' #String dockerImage = "quay.io/biowdl/hmftools-purple:3.2" FIXME see if biocontainer works - String dockerImage = "quay.io/biocontainers/hmftools-purple:3.5--hdfd78af_0" + String dockerImage = "quay.io/biocontainers/hmftools-purple:3.7.1--hdfd78af_0" } command { PURPLE -Xmx~{javaXmx} \ - -reference ~{referenceName} \ - -germline_vcf ~{germlineVcf} \ - -germline_hotspots ~{germlineHotspots} \ - -germline_del_freq_file ~{germlineDelFreqFile} \ + ~{"-reference " + referenceName} \ + ~{"-germline_vcf " + germlineVcf} \ + ~{"-germline_hotspots " + germlineHotspots} \ + ~{"-germline_del_freq_file " + germlineDelFreqFile} \ -tumor ~{tumorName} \ -output_dir ~{outputDir} \ -amber ~{sub(amberOutput[0], basename(amberOutput[0]), "")} \ @@ -1128,6 +1149,11 @@ task Purple { -driver_gene_panel ~{driverGenePanel} \ ~{"-highly_diploid_percentage " + highlyDiploidPercentage} \ ~{"-somatic_min_purity_spread " + somaticMinPuritySpread} \ + ~{"-target_regions_bed " + targetRegionsBed} \ + ~{"-target_regions_ratios " + targetRegionsRatios} \ + ~{"-target_regions_msi_indels " + targetRegionsMsiIndels} \ + ~{"-min_diploid_tumor_ratio_count " + minDiploidTumorRatioCount} \ + ~{"-min_diploid_tumor_ratio_count_centromere" + minDiploidTumorRatioCountCentromere} \ -threads ~{threads} } @@ -1227,15 +1253,16 @@ task Purple { task Sage { input { - String tumorName - File tumorBam - File tumorBamIndex + Array[String]+ tumorName + Array[File]+ tumorBam + Array[File]+ tumorBamIndex File referenceFasta File referenceFastaDict File referenceFastaFai File hotspots File panelBed File highConfidenceBed + File coverageBed Boolean hg38 = false Boolean panelOnly = false String outputPath = "./sage.vcf.gz" @@ -1245,63 +1272,65 @@ task Sage { File transExonDataCsv File transSpliceDataCsv - String? referenceName - File? referenceBam - File? referenceBamIndex + Array[String] referenceName = [] + Array[File] referenceBam = [] + Array[File] referenceBamIndex = [] Int? hotspotMinTumorQual Int? panelMinTumorQual Int? hotspotMaxGermlineVaf Int? hotspotMaxGermlineRelRawBaseQual Int? panelMaxGermlineVaf Int? panelMaxGermlineRelRawBaseQual - String? mnvFilterEnabled - File? coverageBed Int? refSampleCount - + Float? hotspotMinTumorVaf + Int? highConfidenceMinTumorQual + Int? lowConfidenceMinTumorQual Int threads = 32 String javaXmx = "16G" String memory = "20GiB" Int timeMinutes = 720 - String dockerImage = "quay.io/biocontainers/hmftools-sage:3.0.3--hdfd78af_0" + String dockerImage = "quay.io/biocontainers/hmftools-sage:3.2.3--hdfd78af_0" } command { SAGE -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \ - -tumor ~{tumorName} \ - -tumor_bam ~{tumorBam} \ - ~{"-reference " + referenceName} \ - ~{"-reference_bam " + referenceBam} \ - -ref_genome ~{referenceFasta} \ + -tumor ~{sep="," tumorName} \ + -tumor_bam ~{sep="," tumorBam} \ + ~{if length(referenceName) > 0 then "-reference" else ""} ~{sep="," referenceName} \ + ~{if length(referenceBam) > 0 then "-reference_bam" else ""} ~{sep="," referenceBam} \ -hotspots ~{hotspots} \ - -panel_bed ~{panelBed} \ + ~{"-hotspot_min_tumor_qual " + hotspotMinTumorQual} \ -high_confidence_bed ~{highConfidenceBed} \ + -panel_bed ~{panelBed} \ + -coverage_bed ~{coverageBed} \ + -ref_genome ~{referenceFasta} \ -ref_genome_version ~{true="38" false="37" hg38} \ -ensembl_data_dir ~{sub(geneDataCsv, basename(geneDataCsv), "")} \ -write_bqr_data \ -write_bqr_plot \ - ~{"-hotspot_min_tumor_qual " + hotspotMinTumorQual} \ + -out ~{outputPath} \ + -threads ~{threads} \ ~{"-panel_min_tumor_qual " + panelMinTumorQual} \ ~{"-hotspot_max_germline_vaf " + hotspotMaxGermlineVaf} \ ~{"-hotspot_max_germline_rel_raw_base_qual " + hotspotMaxGermlineRelRawBaseQual} \ ~{"-panel_max_germline_vaf " + panelMaxGermlineVaf} \ ~{"-panel_max_germline_rel_raw_base_qual " + panelMaxGermlineRelRawBaseQual} \ - ~{"-mnv_filter_enabled " + mnvFilterEnabled} \ - ~{"-coverage_bed " + coverageBed} \ ~{true="-panel_only" false="" panelOnly} \ ~{"-ref_sample_count " + refSampleCount} \ - -threads ~{threads} \ - -out ~{outputPath} + ~{"-hotspot_min_tumor_vaf " + hotspotMinTumorVaf} \ + ~{"-high_confidence_min_tumor_qual " + highConfidenceMinTumorQual} \ + ~{"-low_confidence_min_tumor_qual " + lowConfidenceMinTumorQual} } - output { + output { #FIXME does it produce multiple plots/tsvs if multiple samples are given? File outputVcf = outputPath File outputVcfIndex = outputPath + ".tbi" - File? referenceSageBqrPng = "~{referenceName}.sage.bqr.png" - File? referenceSageBqrTsv = "~{referenceName}.sage.bqr.tsv" - File tumorSageBqrPng = "~{tumorName}.sage.bqr.png" - File tumorSageBqrTsv = "~{tumorName}.sage.bqr.tsv" - File sageGeneCoverageTsv = "~{tumorName}.sage.gene.coverage.tsv" + File? referenceSageBqrPng = "~{referenceName[0]}.sage.bqr.png" + File? referenceSageBqrTsv = "~{referenceName[0]}.sage.bqr.tsv" + File tumorSageBqrPng = "~{tumorName[0]}.sage.bqr.png" + File tumorSageBqrTsv = "~{tumorName[0]}.sage.bqr.tsv" + File sageGeneCoverageTsv = "~{tumorName[0]}.sage.gene.coverage.tsv" } runtime { @@ -1331,9 +1360,151 @@ task Sage { hotspotMaxGermlineRelRawBaseQual: {description: "Equivalent to sage's `hotspot_max_germline_rel_raw_base_qual` option.", category: "advanced"} panelMaxGermlineVaf: {description: "Equivalent to sage's `panel_max_germline_vaf` option.", category: "advanced"} panelMaxGermlineRelRawBaseQual: {description: "Equivalent to sage's `panel_max_germline_vaf` option.", category: "advanced"} - mnvFilterEnabled: {description: "Equivalent to sage's `mnv_filter_enabled` option.", category: "advanced"} refSampleCount: {description: "Equivalent to sage's `ref_sample_count` option.", category: "advanced"} + hg38: {description: "Whether or not the refernce genome is HG18, if false HG19 is assumed.", category: "common"} + + threads: {description: "The numve of threads to use.", category: "advanced"} + memory: {description: "The amount of memory this job will use.", category: "advanced"} + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", + category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} + } +} + +task SvPrep { + # for ref also add tumorJunctionFile + input { + String sampleName + File bamFile + File bamIndex + File referenceFasta + File referenceFastaDict + File referenceFastaFai + File blacklistBed + File knownFusionBed + String outputDir = "." + + File? existingJunctionFile + Boolean hg38 = false + + Int threads = 10 + String javaXmx = "48G" + String memory = "50GiB" + Int timeMinutes = 120 + String dockerImage = "quay.io/biocontainers/hmftools-sv-prep:1.1--hdfd78af_0" + } + + command { + set -e + SvPrep -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \ + -sample ~{sampleName} \ + -bam_file ~{bamFile} \ + -ref_genome ~{referenceFasta} \ + -ref_genome_version ~{true="38" false="37" hg38} \ + -blacklist_bed ~{blacklistBed} \ + -known_fusion_bed ~{knownFusionBed} \ + ~{"-existing_junction_file " + existingJunctionFile} \ + -write_types "JUNCTIONS;BAM;FRAGMENT_LENGTH_DIST" \ + -output_dir ~{outputDir} \ + -threads ~{threads} + samtools sort -O bam ~{outputDir}/~{sampleName}.sv_prep.bam -o ~{outputDir}/~{sampleName}.sv_prep.sorted.bam + samtools index ~{outputDir}/~{sampleName}.sv_prep.sorted.bam + } + + output { + File preppedBam = "~{outputDir}/~{sampleName}.sv_prep.sorted.bam" + File preppedBamIndex = "~{outputDir}/~{sampleName}.sv_prep.sorted.bam.bai" + File junctions = "~{outputDir}/~{sampleName}.sv_prep.junctions.csv" + } + + runtime { + time_minutes: timeMinutes # !UnknownRuntimeKey + cpu: threads + docker: dockerImage + memory: memory + } + + parameter_meta { + sampleName: {description: "The name of the sample.", category: "required"} + bamFile: {description: "The BAM file to prepare for SV calling with GRIDSS.", category: "required"} + bamIndex: {description: "The index for the BAM file.", category: "required"} + referenceFasta: {description: "The reference fasta file.", category: "required"} + referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", category: "required"} + referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"} + blacklistBed: {description: "Blacklist bed file.", category: "required"} + knownFusionBed: {description: "Bed file with known fusion sites", category: "required"} + outputDir: {description: "Path to the output directory.", category: "common"} + existingJunctionFile: {description: "Junctions file generated by an earlier run of this tool, eg. from a paired sample.", category: "common"} + hg38: {description: "Whether or not the refernce genome is HG18, if false HG19 is assumed.", category: "common"} + + threads: {description: "The numve of threads to use.", category: "advanced"} + memory: {description: "The amount of memory this job will use.", category: "advanced"} + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", + category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} + } +} + +task SvPrepDepthAnnotator { + input { + File inputVcf + File inputVcfIndex + Array[File]+ bamFiles + Array[File]+ bamIndexes + Array[String]+ samples + File referenceFasta + File referenceFastaDict + File referenceFastaFai + Boolean hg38 = false + String outputVcf = "gridss.depth_annotated.vcf.gz" + + Int threads = 10 + String javaXmx = "48G" + String memory = "50GiB" + Int timeMinutes = 120 + String dockerImage = "quay.io/biocontainers/hmftools-sv-prep:1.1--hdfd78af_0" + } + + command { + java -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \ + -cp /usr/local/share/hmftools-sv-prep-1.1-0/sv-prep.jar \ + com.hartwig.hmftools.svprep.depth.DepthAnnotator \ + -input_vcf ~{inputVcf} \ + -output_vcf ~{outputVcf} \ + -samples ~{sep="," samples} \ + -bam_files ~{sep="," bamFiles} \ + -ref_genome ~{referenceFasta} \ + -ref_genome_version ~{if hg38 then "38" else "37"} \ + -threads ~{threads} + } + + output { + File vcf = outputVcf + File vcfIndex = outputVcf + ".tbi" + } + + runtime { + time_minutes: timeMinutes # !UnknownRuntimeKey + cpu: threads + docker: dockerImage + memory: memory + } + + parameter_meta { + samples: {description: "The names of the samples.", category: "required"} + bamFiles: {description: "The BAM files.", category: "required"} + bamIndexes: {description: "The indexes for the BAM files.", category: "required"} + referenceFasta: {description: "The reference fasta file.", category: "required"} + referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", category: "required"} + referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"} + hg38: {description: "Whether or not the refernce genome is HG18, if false HG19 is assumed.", category: "common"} + outputVcf: {description: "The path for the output VCF.", category: "common"} + threads: {description: "The numve of threads to use.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} From e2e52b7407252915ae16a5850ed03f916dfcf818 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Wed, 19 Apr 2023 16:37:37 +0200 Subject: [PATCH 22/75] update samtools view --- samtools.wdl | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/samtools.wdl b/samtools.wdl index fbb445e7..875b9ddb 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -566,7 +566,9 @@ task View { File inFile String outputFileName = "view.bam" Boolean uncompressedBamOutput = false + Boolean useIndex = false + File? inFileIndex File? referenceFasta Int? includeFilter Int? excludeFilter @@ -589,7 +591,8 @@ task View { samtools view -b \ ~{"-T " + referenceFasta} \ ~{"-o " + outputFileName} \ - ~{true="-u " false="" uncompressedBamOutput} \ + ~{if uncompressedBamOutput then "-u" else ""} \ + ~{if useIndex then "-M" else ""} \ ~{"-f " + includeFilter} \ ~{"-F " + excludeFilter} \ ~{"-G " + excludeSpecificFilter} \ @@ -617,6 +620,8 @@ task View { inFile: {description: "A BAM, SAM or CRAM file.", category: "required"} outputFileName: {description: "The location the output BAM file should be written.", category: "common"} uncompressedBamOutput: {description: "Equivalent to samtools view's `-u` flag.", category: "advanced"} + useIndex: {description: "Equivalent to samtools view's `-M` flag.", category: "advanced"} + inFileIndex: {description: "An index for the inFile.", category: "common"} referenceFasta: {description: "The reference fasta file also used for mapping.", category: "advanced"} includeFilter: {description: "Equivalent to samtools view's `-f` option.", category: "advanced"} excludeFilter: {description: "Equivalent to samtools view's `-F` option.", category: "advanced"} From f8f3efed77e1c4cf1ce093f09fefb43f92a6f087 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Thu, 20 Apr 2023 16:05:13 +0200 Subject: [PATCH 23/75] update linx and lilac --- hmftools.wdl | 34 +++++++++++++++------------------- 1 file changed, 15 insertions(+), 19 deletions(-) diff --git a/hmftools.wdl b/hmftools.wdl index 9e133723..cacfac7c 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -507,18 +507,18 @@ task HealthChecker { task Lilac { input { - String tumorName + String sampleName File referenceBam File referenceBamIndex - File tumorBam - File tumorBamIndex + File? tumorBam + File? tumorBamIndex String refGenomeVersion File referenceFasta File referenceFastaFai File referenceFastaDict - File geneCopyNumberFile - File somaticVariantsFile - File somaticVariantsFileIndex + File? geneCopyNumberFile + File? somaticVariantsFile + File? somaticVariantsFileIndex String outputDir = "./lilac" #The following need to be in the same directory @@ -530,21 +530,21 @@ task Lilac { String memory = "16GiB" Int timeMinutes = 1440 #FIXME Int threads = 1 - String dockerImage = "quay.io/biocontainers/hmftools-lilac:1.1--hdfd78af_0" #TODO + String dockerImage = "quay.io/biocontainers/hmftools-lilac:1.4.2--hdfd78af_0" } command { LILAC -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \ - -sample ~{tumorName} \ + -sample ~{sampleName} \ -reference_bam ~{referenceBam} \ -ref_genome ~{referenceFasta} \ -ref_genome_version ~{refGenomeVersion} \ -resource_dir ~{sub(hlaRefAminoacidSequencesCsv, basename(hlaRefAminoacidSequencesCsv), "")} \ -outputDir ~{outputDir} \ -threads ~{threads} \ - -tumor_bam ~{tumorBam} \ - -gene_copy_number_file ~{geneCopyNumberFile} \ - -somatic_variants_file ~{somaticVariantsFile} + ~{"-tumor_bam " + tumorBam} \ + ~{"-gene_copy_number " + geneCopyNumberFile} \ + ~{"-somatic_vcf " + somaticVariantsFile} } output { @@ -581,8 +581,6 @@ task Linx { Boolean checkFusions = true Boolean checkDrivers = true Boolean writeVisData = true - File? germlinePonSvFile - File? germlinePonSglFile #The following should be in the same directory. File geneDataCsv File proteinFeaturesCsv @@ -592,7 +590,7 @@ task Linx { String memory = "9GiB" String javaXmx = "8G" Int timeMinutes = 10 - String dockerImage = "quay.io/biowdl/linx:1.19.1" #patched version of biocontainer + String dockerImage = "quay.io/biocontainers/hmftools-linx:1.22.1--hdfd78af_0" String? DONOTDEFINE } @@ -617,9 +615,7 @@ task Linx { -driver_gene_panel ~{driverGenePanel} \ ~{if writeVisData then "-write_vis_data" else ""} \ ~{if writeAllVisFusions then "-write_all_vis_fusions" else ""} \ - ~{if germline then "-germline" else ""} \ - ~{"-germline_pon_sv_file " + germlinePonSvFile} \ - ~{"-germline_pon_sgl_file " + germlinePonSglFile} + ~{if germline then "-germline" else ""} } output { @@ -686,14 +682,14 @@ task LinxVisualisations { String memory = "9GiB" String javaXmx = "8G" Int timeMinutes = 1440 - String dockerImage = "quay.io/biowdl/linx:1.19.1" #patched version of biocontainer + String dockerImage = "quay.io/biocontainers/hmftools-linx:1.22.1--hdfd78af_0" } command { set -e mkdir -p ~{outputDir} java -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \ - -cp /usr/local/share/hmftools-linx-1.19-0/linx.jar \ + -cp /usr/local/share/hmftools-linx-1.22.1-0/linx.jar \ com.hartwig.hmftools.linx.visualiser.SvVisualiser \ -sample ~{sample} \ -ref_genome_version ~{refGenomeVersion} \ From 1673a014d2f84347cf8a52fc9bb10a57e62b6a45 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Fri, 21 Apr 2023 15:38:35 +0200 Subject: [PATCH 24/75] update peach, add rose, etc. --- gridss.wdl | 1 - hmftools.wdl | 186 +++++++++++++++++++++++++++++++++++++++++++-------- peach.wdl | 4 +- 3 files changed, 159 insertions(+), 32 deletions(-) diff --git a/gridss.wdl b/gridss.wdl index 57515750..789335a4 100644 --- a/gridss.wdl +++ b/gridss.wdl @@ -521,7 +521,6 @@ task Virusbreakend { File referenceFasta File referenceFastaFai File referenceFastaDict - File referenceImg File virusbreakendDB String outputPath = "./virusbreakend.vcf" diff --git a/hmftools.wdl b/hmftools.wdl index cacfac7c..6d8a494b 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -189,7 +189,7 @@ task CupGenerateReport { String memory = "5GiB" Int timeMinutes = 10 - String dockerImage = "quay.io/biowdl/cuppa:1.6" + String dockerImage = "quay.io/biowdl/cuppa:1.7.1" } # This script writes to the directory that the input is located in. @@ -245,34 +245,29 @@ task Cuppa { input { Array[File]+ linxOutput Array[File]+ purpleOutput + File virusInterpreterOutput String sampleName Array[String]+ categories = ["DNA"] Array[File]+ referenceData - File purpleSvVcf - File purpleSvVcfIndex - File purpleSomaticVcf - File purpleSomaticVcfIndex String outputDir = "./cuppa" String javaXmx = "4G" String memory = "5GiB" Int timeMinutes = 10 - String dockerImage = "quay.io/biowdl/cuppa:1.6" + String dockerImage = "quay.io/biowdl/cuppa:1.7.1" } command { set -e mkdir -p sampleData ~{outputDir} ln -s -t sampleData ~{sep=" " linxOutput} ~{sep=" " purpleOutput} + ln -s -t sampleData ~{virusInterpreterOutput} cuppa -Xmx~{javaXmx} \ -output_dir ~{outputDir} \ - -output_id ~{sampleName} \ -categories '~{sep="," categories}' \ -ref_data_dir ~{sub(referenceData[0], basename(referenceData[0]), "")} \ -sample_data_dir sampleData \ - -sample_data ~{sampleName} \ - -sample_sv_file ~{purpleSvVcf} \ - -sample_somatic_vcf ~{purpleSomaticVcf} + -sample_data ~{sampleName} } output { @@ -291,10 +286,6 @@ task Cuppa { sampleName: {description: "The name of the sample.", category: "required"} categories: {description: "The classifiers to use.", category: "advanced"} referenceData : {description: "The reference data.", category: "required"} - purpleSvVcf: {description: "The VCF file produced by purple which contains structural variants.", category: "required"} - purpleSvVcfIndex: {description: "The index of the structural variants VCF file produced by purple.", category: "required"} - purpleSomaticVcf: {description: "The VCF file produced by purple which contains somatic variants.", category: "required"} - purpleSomaticVcfIndex: {description: "The index of the somatic VCF file produced by purple.", category: "required"} outputDir: {description: "The directory the ouput will be placed in.", category: "common"} memory: {description: "The amount of memory this job will use.", category: "advanced"} javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", @@ -313,7 +304,7 @@ task CuppaChart { String memory = "4GiB" Int timeMinutes = 5 - String dockerImage = "quay.io/biowdl/cuppa:1.6" + String dockerImage = "quay.io/biowdl/cuppa:1.7.1" } command { @@ -548,7 +539,8 @@ task Lilac { } output { - #TODO + File lilacCsv = "~{outputDir}/~{sampleName}.lilac.csv" + File lilacQcCsv = "~{outputDir}/~{sampleName}.lilac.qc.csv" } runtime { @@ -627,16 +619,18 @@ task Linx { File linxLinks = "~{outputDir}/~{sampleName}.linx.links.tsv" File linxSvs = "~{outputDir}/~{sampleName}.linx.svs.tsv" File linxVisCopyNumber = "~{outputDir}/~{sampleName}.linx.vis_copy_number.tsv" - File linxVisFusion = "~{outputDir}/~{sampleName}.linx.vis_fusion.tsv" - File linxVisGeneExon = "~{outputDir}/~{sampleName}.linx.vis_gene_exon.tsv" - File linxVisProteinDomain = "~{outputDir}/~{sampleName}.linx.vis_protein_domain.tsv" - File linxVisSegments = "~{outputDir}/~{sampleName}.linx.vis_segments.tsv" - File linxVisSvData = "~{outputDir}/~{sampleName}.linx.vis_sv_data.tsv" + File? linxVisFusion = "~{outputDir}/~{sampleName}.linx.vis_fusion.tsv" + File? linxVisGeneExon = "~{outputDir}/~{sampleName}.linx.vis_gene_exon.tsv" + File? linxVisProteinDomain = "~{outputDir}/~{sampleName}.linx.vis_protein_domain.tsv" + File? linxVisSegments = "~{outputDir}/~{sampleName}.linx.vis_segments.tsv" + File? linxVisSvData = "~{outputDir}/~{sampleName}.linx.vis_sv_data.tsv" + File? linxGermlineDriverCatalogTsv = "~{outputDir}/~{sampleName}.linx.germline.driver.catalog.tsv" + File? linxGermlineDisruptionTsv = "~{outputDir}/~{sampleName}.linx.germline.disruption.tsv" File linxVersion = "~{outputDir}/linx.version" - Array[File] outputs = [driverCatalog, linxBreakend, linxClusters, linxDrivers, linxFusion, + Array[File] outputs = select_all([driverCatalog, linxBreakend, linxClusters, linxDrivers, linxFusion, linxLinks, linxSvs, linxVisCopyNumber, linxVisFusion, linxVisGeneExon, linxVisProteinDomain, linxVisSegments, linxVisSvData, - linxVersion] + linxGermlineDriverCatalogTsv, linxGermlineDisruptionTsv, linxVersion]) } runtime { @@ -743,19 +737,25 @@ task Orange { File sageSomaticTumorSampleBqrPlot File purpleGeneCopyNumberTsv File purpleGermlineDriverCatalogTsv + File purpleGermlineDeletionTsv File purpleGermlineVariantVcf File purpleGermlineVariantVcfIndex Array[File]+ purplePlots File purplePurityTsv File purpleQcFile + File purpleSomaticCopyNumberFile File purpleSomaticDriverCatalogTsv File purpleSomaticVariantVcf File purpleSomaticVariantVcfIndex + File lilacQcCsv + File lilacResultCsv File linxFusionTsv File linxBreakendTsv File linxDriverCatalogTsv File linxDriverTsv + File linxGermlineDisruptionTsv Array[File]+ linxPlots + File linxStructuralVariantTsv File cuppaResultCsv File cuppaSummaryPlot File? cuppaFeaturePlot @@ -766,11 +766,14 @@ task Orange { #File pipelineVersionFile File cohortMappingTsv File cohortPercentilesTsv + Boolean hg38 = false + File driverGenePanel + File knownFusionFile String memory = "17GiB" String javaXmx = "16G" Int timeMinutes = 10 - String dockerImage = "quay.io/biowdl/orange:v1.6" + String dockerImage = "quay.io/biocontainers/hmftools-orange:1.10.2--hdfd78af_0" } command { @@ -778,6 +781,7 @@ task Orange { mkdir -p ~{outputDir} orange -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \ -output_dir ~{outputDir} \ + -ref_genome_version ~{if hg38 then "38" else "37"} \ -doid_json ~{doidJson} \ -primary_tumor_doids '~{sep=";" sampleDoids}' \ -max_evidence_level C \ @@ -792,17 +796,23 @@ task Orange { -sage_somatic_tumor_sample_bqr_plot ~{sageSomaticTumorSampleBqrPlot} \ -purple_gene_copy_number_tsv ~{purpleGeneCopyNumberTsv} \ -purple_germline_driver_catalog_tsv ~{purpleGermlineDriverCatalogTsv} \ + -purple_germline_deletion_tsv ~{purpleGermlineDeletionTsv} \ -purple_germline_variant_vcf ~{purpleGermlineVariantVcf} \ -purple_plot_directory ~{sub(purplePlots[0], basename(purplePlots[0]), "")} \ -purple_purity_tsv ~{purplePurityTsv} \ -purple_qc_file ~{purpleQcFile} \ + -purple_somatic_copy_number_tsv ~{purpleSomaticCopyNumberFile} \ -purple_somatic_driver_catalog_tsv ~{purpleSomaticDriverCatalogTsv} \ -purple_somatic_variant_vcf ~{purpleSomaticVariantVcf} \ + -lilac_qc_csv ~{lilacQcCsv} \ + -lilac_result_csv ~{lilacResultCsv} \ -linx_fusion_tsv ~{linxFusionTsv} \ -linx_breakend_tsv ~{linxBreakendTsv} \ -linx_driver_catalog_tsv ~{linxDriverCatalogTsv} \ -linx_driver_tsv ~{linxDriverTsv} \ + -linx_germline_disruption_tsv ~{linxGermlineDisruptionTsv} \ -linx_plot_directory ~{sub(linxPlots[0], basename(linxPlots[0]), "")} \ + -linx_structural_variant_tsv ~{linxStructuralVariantTsv} \ -cuppa_result_csv ~{cuppaResultCsv} \ -cuppa_summary_plot ~{cuppaSummaryPlot} \ ~{"-cuppa_feature_plot " + cuppaFeaturePlot} \ @@ -811,7 +821,9 @@ task Orange { -protect_evidence_tsv ~{protectEvidenceTsv} \ -annotated_virus_tsv ~{annotatedVirusTsv} \ -cohort_mapping_tsv ~{cohortMappingTsv} \ - -cohort_percentiles_tsv ~{cohortPercentilesTsv} + -cohort_percentiles_tsv ~{cohortPercentilesTsv} \ + -driver_gene_panel_tsv ~{driverGenePanel} \ + -known_fusion_file ~{knownFusionFile} } #TODO may need to be added: -pipeline_version_file ~{pipelineVersionFile} @@ -1006,21 +1018,25 @@ task Protect { File linxDriversCatalog File chordPrediction File annotatedVirus + File lilacResultCsv + File lilacQcCsv + File driverGeneTsv String memory = "9GiB" String javaXmx = "8G" Int timeMinutes = 60 - String dockerImage = "quay.io/biowdl/protect:v2.0" + String dockerImage = "quay.io/biocontainers/hmftools-protect:2.3--hdfd78af_0" } command { - protect -Xmx~{javaXmx} \ + protect -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \ -ref_genome_version ~{refGenomeVersion} \ -tumor_sample_id ~{tumorName} \ -reference_sample_id ~{referenceName} \ -primary_tumor_doids '~{sep=";" sampleDoids}' \ -output_dir ~{outputDir} \ -serve_actionability_dir ~{sub(serveActionability[0], basename(serveActionability[0]), "")} \ + -driver_gene_tsv ~{driverGeneTsv} \ -doid_json ~{doidJson} \ -purple_purity_tsv ~{purplePurity} \ -purple_qc_file ~{purpleQc} \ @@ -1033,7 +1049,9 @@ task Protect { -linx_breakend_tsv ~{linxBreakend} \ -linx_driver_catalog_tsv ~{linxDriversCatalog} \ -chord_prediction_txt ~{chordPrediction} \ - -annotated_virus_tsv ~{annotatedVirus} + -annotated_virus_tsv ~{annotatedVirus} \ + -lilac_result_csv ~{lilacResultCsv} \ + -lilac_qc_csv ~{lilacQcCsv} } output { @@ -1123,7 +1141,7 @@ task Purple { } command { - PURPLE -Xmx~{javaXmx} \ + PURPLE -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \ ~{"-reference " + referenceName} \ ~{"-germline_vcf " + germlineVcf} \ ~{"-germline_hotspots " + germlineHotspots} \ @@ -1247,6 +1265,77 @@ task Purple { } } +task Rose { + input { + File actionabilityDatabaseTsv + Boolean hg38 = false + File driverGeneTsv + File purplePurityTsv + File purpleQc + File purpleGeneCopyNumberTsv + File purpleSomaticDriverCatalogTsv + File purpleGermlineDriverCatalogTsv + File purpleSomaticVcf + File purpleSomaticVcfIndex + File purpleGermlineVcf + File purpleGermlineVcfIndex + File linxFusionTsv + File linxBreakendTsv + File linxDriverCatalogTsv + File annotatedVirusTsv + File chordPredictionTxt + File cuppaResultCsv + String outputDir = "./rose" + String tumorName + String referenceName + + String memory = "9GiB" + String javaXmx = "8G" + Int timeMinutes = 60 + String dockerImage = "quay.io/biocontainers/hmftools-rose:1.3--hdfd78af_0" + } + + command { + set -e + mkdir -p ~{outputDir} + rose -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \ + -actionability_database_tsv ~{actionabilityDatabaseTsv} \ + -ref_genome_version ~{if hg38 then "38" else "37"} \ + -driver_gene_tsv ~{driverGeneTsv} \ + -purple_purity_tsv ~{purplePurityTsv} \ + -purple_qc_file ~{purpleQc} \ + -purple_gene_copy_number_tsv ~{purpleGeneCopyNumberTsv} \ + -purple_somatic_driver_catalog_tsv ~{purpleSomaticDriverCatalogTsv} \ + -purple_germline_driver_catalog_tsv ~{purpleGermlineDriverCatalogTsv} \ + -purple_somatic_variant_vcf ~{purpleSomaticVcf} \ + -purple_germline_variant_vcf ~{purpleGermlineVcf} \ + -linx_fusion_tsv ~{linxFusionTsv} \ + -linx_breakend_tsv ~{linxBreakendTsv} \ + -linx_driver_catalog_tsv ~{linxDriverCatalogTsv} \ + -annotated_virus_tsv ~{annotatedVirusTsv} \ + -chord_prediction_txt ~{chordPredictionTxt} \ + -cuppa_result_csv ~{cuppaResultCsv} \ + -output_dir ~{outputDir} \ + -tumor_sample_id ~{tumorName} \ + -ref_sample_id ~{referenceName} \ + -patient_id not_used_because_primary_tumor_tsv_has_only_headers + } + + output { + #TODO + } + + runtime { + time_minutes: timeMinutes # !UnknownRuntimeKey + docker: dockerImage + memory: memory + } + + parameter_meta { + + } +} + task Sage { input { Array[String]+ tumorName @@ -1369,6 +1458,45 @@ task Sage { } } +task Sigs { + input { + String sampleName + File signaturesFile + File somaticVcfFile + File somaticVcfIndex + String outputDir = "./sigs" + + String javaXmx = "4G" + String memory = "5GiB" + Int timeMinutes = 60 + String dockerImage = "quay.io/biocontainers/hmftools-sigs:1.1--hdfd78af_0" + } + + command { + set -e + mkdir -p ~{outputDir} + sigs -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \ + -sample ~{sampleName} \ + -signatures_file ~{signaturesFile} \ + -somatic_vcf_file ~{somaticVcfFile} \ + -output_dir ~{outputDir} + } + + output { + File sigAllocationTsv = "~{outputDir}/~{sampleName}.sig.allocation.tsv" + } + + runtime { + time_minutes: timeMinutes # !UnknownRuntimeKey + docker: dockerImage + memory: memory + } + + parameter_meta { + + } +} + task SvPrep { # for ref also add tumorJunctionFile input { diff --git a/peach.wdl b/peach.wdl index 7da029d0..39db7e08 100644 --- a/peach.wdl +++ b/peach.wdl @@ -30,7 +30,7 @@ task Peach { File panelJson String memory = "2GiB" - String dockerImage = "quay.io/biowdl/peach:v1.5" + String dockerImage = "quay.io/biowdl/peach:v1.7" Int timeMinutes = 5 } @@ -41,7 +41,7 @@ task Peach { --vcf ~{germlineVcf} \ --sample_t_id ~{tumorName} \ --sample_r_id ~{normalName} \ - --tool_version 1.5 \ + --tool_version 1.7 \ --outputdir ~{outputDir} \ --panel ~{panelJson} } From 8c661fe8a7b2eeb9821b8d57cdc3c394727ba9ae Mon Sep 17 00:00:00 2001 From: DavyCats Date: Fri, 21 Apr 2023 16:03:25 +0200 Subject: [PATCH 25/75] fix lint issue --- gridss.wdl | 1 - 1 file changed, 1 deletion(-) diff --git a/gridss.wdl b/gridss.wdl index 789335a4..ee060251 100644 --- a/gridss.wdl +++ b/gridss.wdl @@ -562,7 +562,6 @@ task Virusbreakend { bam: {description: "A BAM file.", category: "required"} bamIndex: {description: "The index for the BAM file.", category: "required"} referenceFasta: {description: "The fasta of the reference genome.", category: "required"} - referenceImg: {description: "The BWA index image (generated with GATK BwaMemIndexImageCreator) of the reference.", category: "required"} virusbreakendDB: {description: "A .tar.gz containing the virusbreakend database.", category: "required"} outputPath: {description: "The path the output should be written to.", category: "common"} extraMemoryGB: {description: "Extra memory needed for the job in GB.", category: "advanced"} From a41beefcaff763620da78a53646bd4e29751f351 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Tue, 25 Apr 2023 12:24:07 +0200 Subject: [PATCH 26/75] adjust AnnotateInsertedSequence inputs --- gridss.wdl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/gridss.wdl b/gridss.wdl index ee060251..067aa2f0 100644 --- a/gridss.wdl +++ b/gridss.wdl @@ -29,7 +29,7 @@ task AnnotateInsertedSequence { File viralReference File viralReferenceFai File viralReferenceDict - Array[File]+ viralReferenceBwaIndex + File viralReferenceBwaIndex Int threads = 8 String javaXmx = "8G" @@ -68,7 +68,7 @@ task AnnotateInsertedSequence { viralReference: {description: "A fasta file with viral sequences.", category: "required"} viralReferenceFai: {description: "The index for the viral reference fasta.", category: "required"} viralReferenceDict: {description: "The dict file for the viral reference.", category: "required"} - viralReferenceBwaIndex: {description: "The BWA index files of the viral reference.", category: "required"} + viralReferenceBwaIndex: {description: "The BWA index img file of the viral reference.", category: "required"} memory: {description: "The amount of memory this job will use.", category: "advanced"} javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", From 9bf0c753a42c455eb9a95f96af930fe21829ce6f Mon Sep 17 00:00:00 2001 From: DavyCats Date: Fri, 28 Apr 2023 14:58:25 +0200 Subject: [PATCH 27/75] update svprep docker image --- hmftools.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hmftools.wdl b/hmftools.wdl index 6d8a494b..2e8cdec9 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -1517,7 +1517,7 @@ task SvPrep { String javaXmx = "48G" String memory = "50GiB" Int timeMinutes = 120 - String dockerImage = "quay.io/biocontainers/hmftools-sv-prep:1.1--hdfd78af_0" + String dockerImage = "quay.io/biocontainers/hmftools-sv-prep:1.1--hdfd78af_1" } command { From 65fb7209b3d26913b4a1520198c682cfc312f2ba Mon Sep 17 00:00:00 2001 From: DavyCats Date: Tue, 2 May 2023 11:29:16 +0200 Subject: [PATCH 28/75] typo --- gridss.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gridss.wdl b/gridss.wdl index 067aa2f0..9b6596b5 100644 --- a/gridss.wdl +++ b/gridss.wdl @@ -411,7 +411,7 @@ task GridssSvPrep { gridss_sv-prep \ --steps all \ --output ~{outputPath} \ - --wirkingdir . \ + --workingdir . \ --reference ~{reference.fastaFile} \ --jar /usr/local/share/gridss-2.13.2-1/gridss.jar \ --blacklist ~{blacklistBed} \ From ae173ef9146d1759ba218774f19062bf6abed9bd Mon Sep 17 00:00:00 2001 From: DavyCats Date: Tue, 2 May 2023 11:55:11 +0200 Subject: [PATCH 29/75] try using digest instead of tag --- gridss.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gridss.wdl b/gridss.wdl index 9b6596b5..0ffd4acf 100644 --- a/gridss.wdl +++ b/gridss.wdl @@ -404,7 +404,7 @@ task GridssSvPrep { Int nonJvmMemoryGb = 10 Int threads = 10 Int timeMinutes = ceil(7200 / threads) + 1800 - String dockerImage = "quay.io/biowdl/gridss:2.13.2_1" + String dockerImage = "quay.io/biowdl/gridss@sha256:f70696fda4b6f2612b21539d49986cf31bee7542a9eb0269a9f718f99df3fb2a" } command { From fb65c53c74446d89228cdbfa741b40cdbd16d857 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Tue, 2 May 2023 16:03:45 +0200 Subject: [PATCH 30/75] give SvPrepDepthAnnotator more time --- hmftools.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hmftools.wdl b/hmftools.wdl index 2e8cdec9..45b132f4 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -1589,7 +1589,7 @@ task SvPrepDepthAnnotator { Int threads = 10 String javaXmx = "48G" String memory = "50GiB" - Int timeMinutes = 120 + Int timeMinutes = 240 String dockerImage = "quay.io/biocontainers/hmftools-sv-prep:1.1--hdfd78af_0" } From 5ee9d8e49761212a5d22f77e7738a58eda1cd6ff Mon Sep 17 00:00:00 2001 From: DavyCats Date: Thu, 4 May 2023 12:01:07 +0200 Subject: [PATCH 31/75] update inputs for annotateinsertedsequences --- gridss.wdl | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/gridss.wdl b/gridss.wdl index 0ffd4acf..6f4f3660 100644 --- a/gridss.wdl +++ b/gridss.wdl @@ -26,10 +26,7 @@ task AnnotateInsertedSequence { input { File inputVcf String outputPath = "gridss.annotated.vcf.gz" - File viralReference - File viralReferenceFai - File viralReferenceDict - File viralReferenceBwaIndex + BwaIndex viralReferenceBwaIndex Int threads = 8 String javaXmx = "8G" @@ -42,7 +39,7 @@ task AnnotateInsertedSequence { set -e _JAVA_OPTIONS="$_JAVA_OPTIONS -Xmx~{javaXmx}" AnnotateInsertedSequence \ - REFERENCE_SEQUENCE=~{viralReference} \ + REFERENCE_SEQUENCE=~{viralReferenceBwaIndex.fastaFile} \ INPUT=~{inputVcf} \ OUTPUT=~{outputPath} \ ALIGNMENT=APPEND \ @@ -65,10 +62,7 @@ task AnnotateInsertedSequence { parameter_meta { inputVcf: {description: "The input VCF file.", category: "required"} outputPath: {description: "The path the output will be written to.", category: "common"} - viralReference: {description: "A fasta file with viral sequences.", category: "required"} - viralReferenceFai: {description: "The index for the viral reference fasta.", category: "required"} - viralReferenceDict: {description: "The dict file for the viral reference.", category: "required"} - viralReferenceBwaIndex: {description: "The BWA index img file of the viral reference.", category: "required"} + viralReferenceBwaIndex: {description: "The BWA index of the viral reference.", category: "required"} memory: {description: "The amount of memory this job will use.", category: "advanced"} javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", From 05270fe76c18f24288ddc42f6bcff67dc9eb3d10 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Fri, 5 May 2023 10:23:47 +0200 Subject: [PATCH 32/75] typo --- hmftools.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hmftools.wdl b/hmftools.wdl index 45b132f4..1f919356 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -1167,7 +1167,7 @@ task Purple { ~{"-target_regions_ratios " + targetRegionsRatios} \ ~{"-target_regions_msi_indels " + targetRegionsMsiIndels} \ ~{"-min_diploid_tumor_ratio_count " + minDiploidTumorRatioCount} \ - ~{"-min_diploid_tumor_ratio_count_centromere" + minDiploidTumorRatioCountCentromere} \ + ~{"-min_diploid_tumor_ratio_count_centromere" + minDiploidTumorRatioCountCentromere} \ -threads ~{threads} } From 646ccc523ff98278d206dd7f89f267e2f305af17 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Fri, 5 May 2023 12:23:03 +0200 Subject: [PATCH 33/75] fix linx outputs --- hmftools.wdl | 33 +++++++++++++++++---------------- 1 file changed, 17 insertions(+), 16 deletions(-) diff --git a/hmftools.wdl b/hmftools.wdl index 1f919356..143bdc00 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -610,27 +610,28 @@ task Linx { ~{if germline then "-germline" else ""} } + String prefix = if germline then "~{sampleName}.linx.germline" else "~{sampleName}.linx" + output { - File driverCatalog = "~{outputDir}/~{sampleName}.linx.driver.catalog.tsv" - File linxBreakend = "~{outputDir}/~{sampleName}.linx.breakend.tsv" - File linxClusters = "~{outputDir}/~{sampleName}.linx.clusters.tsv" - File linxDrivers = "~{outputDir}/~{sampleName}.linx.drivers.tsv" - File linxFusion = "~{outputDir}/~{sampleName}.linx.fusion.tsv" - File linxLinks = "~{outputDir}/~{sampleName}.linx.links.tsv" - File linxSvs = "~{outputDir}/~{sampleName}.linx.svs.tsv" - File linxVisCopyNumber = "~{outputDir}/~{sampleName}.linx.vis_copy_number.tsv" - File? linxVisFusion = "~{outputDir}/~{sampleName}.linx.vis_fusion.tsv" - File? linxVisGeneExon = "~{outputDir}/~{sampleName}.linx.vis_gene_exon.tsv" - File? linxVisProteinDomain = "~{outputDir}/~{sampleName}.linx.vis_protein_domain.tsv" - File? linxVisSegments = "~{outputDir}/~{sampleName}.linx.vis_segments.tsv" - File? linxVisSvData = "~{outputDir}/~{sampleName}.linx.vis_sv_data.tsv" - File? linxGermlineDriverCatalogTsv = "~{outputDir}/~{sampleName}.linx.germline.driver.catalog.tsv" - File? linxGermlineDisruptionTsv = "~{outputDir}/~{sampleName}.linx.germline.disruption.tsv" + File driverCatalog = "~{outputDir}/~{prefix}.driver.catalog.tsv" + File linxClusters = "~{outputDir}/~{prefix}.clusters.tsv" + File linxLinks = "~{outputDir}/~{prefix}.links.tsv" + File linxSvs = "~{outputDir}/~{prefix}.svs.tsv" + File? linxBreakend = "~{outputDir}/~{prefix}.breakend.tsv" + File? linxDrivers = "~{outputDir}/~{prefix}.drivers.tsv" + File? linxFusion = "~{outputDir}/~{prefix}.fusion.tsv" + File? linxVisCopyNumber = "~{outputDir}/~{prefix}.vis_copy_number.tsv" + File? linxVisFusion = "~{outputDir}/~{prefix}.vis_fusion.tsv" + File? linxVisGeneExon = "~{outputDir}/~{prefix}.vis_gene_exon.tsv" + File? linxVisProteinDomain = "~{outputDir}/~{prefix}.vis_protein_domain.tsv" + File? linxVisSegments = "~{outputDir}/~{prefix}.vis_segments.tsv" + File? linxVisSvData = "~{outputDir}/~{prefix}.vis_sv_data.tsv" + File? linxDisruptionTsv = "~{outputDir}/~{prefix}.disruption.tsv" File linxVersion = "~{outputDir}/linx.version" Array[File] outputs = select_all([driverCatalog, linxBreakend, linxClusters, linxDrivers, linxFusion, linxLinks, linxSvs, linxVisCopyNumber, linxVisFusion, linxVisGeneExon, linxVisProteinDomain, linxVisSegments, linxVisSvData, - linxGermlineDriverCatalogTsv, linxGermlineDisruptionTsv, linxVersion]) + linxDisruptionTsv, linxVersion]) } runtime { From 0260a7a0eb72b82bd615cc4e598bb4a4adf1dbe9 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Fri, 12 May 2023 14:05:09 +0200 Subject: [PATCH 34/75] cleanup --- gridss.wdl | 2 +- hmftools.wdl | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/gridss.wdl b/gridss.wdl index 6f4f3660..c9ba9bac 100644 --- a/gridss.wdl +++ b/gridss.wdl @@ -339,7 +339,7 @@ task GridssAnnotateVcfRepeatmasker { String memory = "25GiB" Int threads = 8 - String dockerImage = "quay.io/biocontainers/gridss:2.13.2--h20b1175_1" #TODO check if we still need our own patched image + String dockerImage = "quay.io/biocontainers/gridss:2.13.2--h20b1175_1" Int timeMinutes = 1440 } diff --git a/hmftools.wdl b/hmftools.wdl index 143bdc00..f6fb261b 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -826,7 +826,6 @@ task Orange { -driver_gene_panel_tsv ~{driverGenePanel} \ -known_fusion_file ~{knownFusionFile} } - #TODO may need to be added: -pipeline_version_file ~{pipelineVersionFile} output { File orangeJson = "~{outputDir}/~{tumorName}.orange.json" From 1b4467c982a1ff06f434679bf9eb65d3edac62fc Mon Sep 17 00:00:00 2001 From: DavyCats Date: Tue, 16 May 2023 10:30:21 +0200 Subject: [PATCH 35/75] typo --- hmftools.wdl | 2 +- peach.wdl | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/hmftools.wdl b/hmftools.wdl index f6fb261b..1f6eb8f9 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -531,7 +531,7 @@ task Lilac { -ref_genome ~{referenceFasta} \ -ref_genome_version ~{refGenomeVersion} \ -resource_dir ~{sub(hlaRefAminoacidSequencesCsv, basename(hlaRefAminoacidSequencesCsv), "")} \ - -outputDir ~{outputDir} \ + -output_dir ~{outputDir} \ -threads ~{threads} \ ~{"-tumor_bam " + tumorBam} \ ~{"-gene_copy_number " + geneCopyNumberFile} \ diff --git a/peach.wdl b/peach.wdl index 39db7e08..9ace8958 100644 --- a/peach.wdl +++ b/peach.wdl @@ -30,7 +30,7 @@ task Peach { File panelJson String memory = "2GiB" - String dockerImage = "quay.io/biowdl/peach:v1.7" + String dockerImage = "quay.io/biowdl/peach@sha256:025dc28fe448256729a6022d4d30deaee8105ab83d123dab9640251985240748" Int timeMinutes = 5 } From 143d5132e7f3b25137c6b1533586a4aa1436c3ca Mon Sep 17 00:00:00 2001 From: DavyCats Date: Tue, 16 May 2023 12:55:10 +0200 Subject: [PATCH 36/75] use digest instead of tag for cuppa docker --- hmftools.wdl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/hmftools.wdl b/hmftools.wdl index 1f6eb8f9..b25ce28f 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -189,7 +189,7 @@ task CupGenerateReport { String memory = "5GiB" Int timeMinutes = 10 - String dockerImage = "quay.io/biowdl/cuppa:1.7.1" + String dockerImage = "quay.io/biowdl/cuppa@sha256:e76d367a3226068967fb64ad6adaa889cbdcc01397075b0cbc382bbba4350b98" } # This script writes to the directory that the input is located in. @@ -254,7 +254,7 @@ task Cuppa { String javaXmx = "4G" String memory = "5GiB" Int timeMinutes = 10 - String dockerImage = "quay.io/biowdl/cuppa:1.7.1" + String dockerImage = "quay.io/biowdl/cuppa@sha256:e76d367a3226068967fb64ad6adaa889cbdcc01397075b0cbc382bbba4350b98" } command { @@ -304,7 +304,7 @@ task CuppaChart { String memory = "4GiB" Int timeMinutes = 5 - String dockerImage = "quay.io/biowdl/cuppa:1.7.1" + String dockerImage = "quay.io/biowdl/cuppa@sha256:e76d367a3226068967fb64ad6adaa889cbdcc01397075b0cbc382bbba4350b98" } command { From b570e7ce3c42e3c95c2f1affd402367802c54e3f Mon Sep 17 00:00:00 2001 From: DavyCats Date: Tue, 16 May 2023 14:19:44 +0200 Subject: [PATCH 37/75] fix issue with java permissions in orange --- hmftools.wdl | 1 + 1 file changed, 1 insertion(+) diff --git a/hmftools.wdl b/hmftools.wdl index b25ce28f..f5d690a2 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -780,6 +780,7 @@ task Orange { command { set -e mkdir -p ~{outputDir} + export JAVA_TOOL_OPTIONS='--add-opens=java.base/java.time=ALL-UNNAMED' orange -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \ -output_dir ~{outputDir} \ -ref_genome_version ~{if hg38 then "38" else "37"} \ From 5bc6e31955c58755149b6b5231b45ed5edf6c7fc Mon Sep 17 00:00:00 2001 From: DavyCats Date: Mon, 22 May 2023 15:15:12 +0200 Subject: [PATCH 38/75] add output for hmftools rose --- hmftools.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hmftools.wdl b/hmftools.wdl index f5d690a2..90d6828e 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -1323,7 +1323,7 @@ task Rose { } output { - #TODO + File roseTsv = "~{outputDir}/~{tumorName}.rose.tsv" } runtime { From c9f9172de1eca1a52d87f7a881b157df2611e945 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Fri, 26 May 2023 15:38:38 +0200 Subject: [PATCH 39/75] fix timeMinutes for fastp --- fastp.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fastp.wdl b/fastp.wdl index 9849738b..7df0a8f3 100644 --- a/fastp.wdl +++ b/fastp.wdl @@ -42,7 +42,7 @@ task Fastp { Int threads = 4 String memory = "50GiB" - Int timeMinutes = 1 + ceil(size([read1, read2], "G") * 6.0 / threads) + Int timeMinutes = 1 + ceil(size([read1, read2], "G") * 7.0 / select_first([effectiveSplit, threads])) String dockerImage = "quay.io/biocontainers/fastp:0.23.2--h5f740d0_3" Int? noneInt From 99a299981c7dbb0ade363d04c56ad063dda83265 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Tue, 30 May 2023 09:58:38 +0200 Subject: [PATCH 40/75] increase sambamaba markdup time --- sambamba.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sambamba.wdl b/sambamba.wdl index fb6c83de..51077ec0 100644 --- a/sambamba.wdl +++ b/sambamba.wdl @@ -84,7 +84,7 @@ task Markdup { # Added 8192 mb as a margin of safety. Real life use with this setting uses 2.7 GiB. Int memoryMb = 8192 + sortBufferSize + 2 * ioBufferSize # Time minute calculation does not work well for higher number of threads. - Int timeMinutes = 1 + ceil(size(inputBams, "GiB") * 25) / threads + Int timeMinutes = 1 + ceil(size(inputBams, "GiB") * 30) / threads String dockerImage = "quay.io/biocontainers/sambamba:0.7.1--h148d290_2" } From 1a2ad6810445d70386422f0df120a3a02e149687 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Wed, 31 May 2023 11:42:08 +0200 Subject: [PATCH 41/75] increase time sambamaba markdup --- sambamba.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sambamba.wdl b/sambamba.wdl index 51077ec0..acf40278 100644 --- a/sambamba.wdl +++ b/sambamba.wdl @@ -84,7 +84,7 @@ task Markdup { # Added 8192 mb as a margin of safety. Real life use with this setting uses 2.7 GiB. Int memoryMb = 8192 + sortBufferSize + 2 * ioBufferSize # Time minute calculation does not work well for higher number of threads. - Int timeMinutes = 1 + ceil(size(inputBams, "GiB") * 30) / threads + Int timeMinutes = 1 + ceil(size(inputBams, "GiB") * 35) / threads String dockerImage = "quay.io/biocontainers/sambamba:0.7.1--h148d290_2" } From 4fe8370fb723a0a1449abbe6fb30f677c12d3f42 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Fri, 2 Jun 2023 14:27:29 +0200 Subject: [PATCH 42/75] add missing purple output --- hmftools.wdl | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/hmftools.wdl b/hmftools.wdl index 90d6828e..2c043dd8 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -1215,7 +1215,8 @@ task Purple { purpleCnvSomaticTsv, purplePurityRangeTsv, purplePurityTsv, purpleQc, purpleSegmentTsv, purpleSomaticClonalityTsv, purpleSomaticHistTsv, purpleSomaticVcf, purpleSomaticVcfIndex, purpleSvVcf, purpleSvVcfIndex, - purpleVersion, purpleGermlineVcf, purpleGermlineVcfIndex, driverCatalogGermlineTsv] + purpleVersion, purpleGermlineVcf, purpleGermlineVcfIndex, driverCatalogGermlineTsv, + purpleGermlineDeletionTsv] Array[File] plots = select_all([circosPlot, copynumberPlot, inputPlot, mapPlot, purityRangePlot, segmentPlot, somaticClonalityPlot, somaticPlot, somaticRainfallPlot]) Array[File] circos = [circosNormalRatio, circosConf, circosIndel, circosLink, From 52cfccef8c352659e281382309b9eef5c3c9bad5 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Tue, 6 Jun 2023 14:19:15 +0200 Subject: [PATCH 43/75] adjust outputs for hmftools --- extractSigPredictHRD.wdl | 2 +- hmftools.wdl | 25 +++++++++++++++++-------- 2 files changed, 18 insertions(+), 9 deletions(-) diff --git a/extractSigPredictHRD.wdl b/extractSigPredictHRD.wdl index 1520b608..c9e4c67f 100644 --- a/extractSigPredictHRD.wdl +++ b/extractSigPredictHRD.wdl @@ -22,7 +22,7 @@ version 1.0 task ExtractSigPredictHRD { input { - String outputDir = "." + String outputDir = "./chord" String sampleName File snvIndelVcf File snvIndelVcfIndex diff --git a/hmftools.wdl b/hmftools.wdl index 2c043dd8..23efa1ae 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -70,7 +70,8 @@ task Amber { File normalSnpVcf = "~{outputDir}/~{referenceName}.amber.snp.vcf.gz" File normalSnpVcfIndex = "~{outputDir}/~{referenceName}.amber.snp.vcf.gz.tbi" Array[File] outputs = [version, tumorBafPcf, tumorBafTsv, tumorContaminationVcf, - tumorContaminationVcfIndex, tumorContaminationTsv, tumorQc, normalSnpVcf, normalSnpVcfIndex] + tumorContaminationVcfIndex, tumorContaminationTsv, tumorQc, normalHomozygousregionsTsv, + normalSnpVcf, normalSnpVcfIndex] } runtime { @@ -541,6 +542,7 @@ task Lilac { output { File lilacCsv = "~{outputDir}/~{sampleName}.lilac.csv" File lilacQcCsv = "~{outputDir}/~{sampleName}.lilac.qc.csv" + File candidatesCoverageCsv = "~{outputDir}/~{sampleName}.candidates.coverage.csv" } runtime { @@ -668,7 +670,7 @@ task Linx { task LinxVisualisations { input { - String outputDir = "./linx_visualisation" + String outputDir = "./linx" String sample String refGenomeVersion Array[File]+ linxOutput @@ -1002,7 +1004,7 @@ task Protect { String tumorName String referenceName Array[String]+ sampleDoids - String outputDir = "." + String outputDir = "./protect" Array[File]+ serveActionability File doidJson File purplePurity @@ -1410,14 +1412,20 @@ task Sage { ~{"-low_confidence_min_tumor_qual " + lowConfidenceMinTumorQual} } + String outputDir = sub(outputPath, basename(outputPath), "") + output { #FIXME does it produce multiple plots/tsvs if multiple samples are given? File outputVcf = outputPath File outputVcfIndex = outputPath + ".tbi" - File? referenceSageBqrPng = "~{referenceName[0]}.sage.bqr.png" - File? referenceSageBqrTsv = "~{referenceName[0]}.sage.bqr.tsv" - File tumorSageBqrPng = "~{tumorName[0]}.sage.bqr.png" - File tumorSageBqrTsv = "~{tumorName[0]}.sage.bqr.tsv" - File sageGeneCoverageTsv = "~{tumorName[0]}.sage.gene.coverage.tsv" + File? referenceSageBqrPng = "~{outputDir}/~{referenceName[0]}.sage.bqr.png" + File? referenceSageBqrTsv = "~{outputDir}/~{referenceName[0]}.sage.bqr.tsv" + File tumorSageBqrPng = "~{outputDir}/~{tumorName[0]}.sage.bqr.png" + File tumorSageBqrTsv = "~{outputDir}/~{tumorName[0]}.sage.bqr.tsv" + File sageGeneCoverageTsv = "~{outputDir}/~{tumorName[0]}.sage.gene.coverage.tsv" + File referenceSageExonMediansTsv = "~{outputDir}/~{tumorName[0]}.sage.exon.medians.tsv" + Array[File] outputs = select_all([outputVcf, outputVcfIndex, referenceSageBqrPng, + referenceSageBqrTsv, tumorSageBqrPng, tumorSageBqrTsv, + sageGeneCoverageTsv, referenceSageExonMediansTsv]) } runtime { @@ -1486,6 +1494,7 @@ task Sigs { output { File sigAllocationTsv = "~{outputDir}/~{sampleName}.sig.allocation.tsv" + File sigSnvCountsCsv = "~{outputDir}/~{sampleName}.sig.snv_counts.csv" } runtime { From 6194b2d1352e2020d3f42982ebc7c9d2f4d21108 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Thu, 13 Jul 2023 14:37:33 +0200 Subject: [PATCH 44/75] add writeNeoEpitopes option to linx --- hmftools.wdl | 3 +++ 1 file changed, 3 insertions(+) diff --git a/hmftools.wdl b/hmftools.wdl index 23efa1ae..d494a5eb 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -575,6 +575,7 @@ task Linx { Boolean checkFusions = true Boolean checkDrivers = true Boolean writeVisData = true + Boolean writeNeoEpitopes = false #The following should be in the same directory. File geneDataCsv File proteinFeaturesCsv @@ -609,6 +610,7 @@ task Linx { -driver_gene_panel ~{driverGenePanel} \ ~{if writeVisData then "-write_vis_data" else ""} \ ~{if writeAllVisFusions then "-write_all_vis_fusions" else ""} \ + ~{if writeNeoEpitopes then "-write_neo_epitopes" else ""} \ ~{if germline then "-germline" else ""} } @@ -654,6 +656,7 @@ task Linx { knownFusionCsv: {description: "A CSV file describing known fusions.", category: "required"} driverGenePanel: {description: "A TSV file describing the driver gene panel.", category: "required"} writeAllVisFusions: {description: "Equivalent to the -write_all_vis_fusions flag.", category: "advanced"} + writeNeoEpitopes: {description: "Equivalent to the -write_neo_epitopes flag.", category: "advanced"} geneDataCsv: {description: "A CSV file containing gene information, must be in the same directory as `proteinFeaturesCsv`, `transExonDataCsv` and `transSpliceDataCsv`.", category: "required"} proteinFeaturesCsv: {description: "A CSV file containing protein feature information, must be in the same directory as `geneDataCsv`, `transExonDataCsv` and `transSpliceDataCsv`.", category: "required"} transExonDataCsv: {description: "A CSV file containing transcript exon information, must be in the same directory as `geneDataCsv`, `proteinFeaturesCsv` and `transSpliceDataCsv`.", category: "required"} From c3e29085e54becb14ac5b492164b1eb090417e04 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Tue, 18 Jul 2023 11:30:57 +0200 Subject: [PATCH 45/75] add task for hmftools neo --- hmftools.wdl | 70 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 70 insertions(+) diff --git a/hmftools.wdl b/hmftools.wdl index d494a5eb..7e82ecc8 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -727,6 +727,76 @@ task LinxVisualisations { } } +task Neo { + input { + String sampleId + File somaticVcf + File somaticVcfIndex + Array[File]+ linxOutput + String refGenomeVersion + File referenceFasta + File referenceFastaFai + File referenceFastaDict + String outputDir = "./neo" + #The following should be in the same directory. + File geneDataCsv + File proteinFeaturesCsv + File transExonDataCsv + File transSpliceDataCsv + + String memory = "9GiB" + String javaXmx = "8G" + Int timeMinutes = 1440 + String dockerImage = "quay.io/biocontainers/hmftools-neo:1.0.1--hdfd78af_0" + } + + command { + set -e + mkdir -p ~{outputDir} + neo -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \ + -sample ~{sampleId} \ + -ref_genome_version ~{refGenomeVersion} \ + -ref_genome ~{referenceFasta} \ + -ensembl_data_dir ~{sub(geneDataCsv, basename(geneDataCsv), "")} \ + -linx_dir ~{sub(linxOutput[0], basename(linxOutput[0]), "")} \ + -somatic_vcf ~{somaticVcf} \ + -output_dir ~{outputDir} + } + + output { + File neoData = "~{outputDir}/~{sampleId}.neo.neo_data.tsv" + } + + runtime { + time_minutes: timeMinutes # !UnknownRuntimeKey + docker: dockerImage + memory: memory + } + + parameter_meta { + sampleId: {description: "The name/id of the sample.", category: "required"} + somaticVcf: {description: "The vcf containing the samples's somatic variants.", category: "required"} + somaticVcfIndex: {description: "The vcf containing the samples's somatic variants.", category: "required"} + linxOutput: {description: "The directory containing the linx output.", category: "required"} + refGenomeVersion: {description: "The version of the genome assembly used for alignment. Either \"37\" or \"38\".", category: "required"} + referenceFasta: {description: "The reference fasta file.", category: "required"} + referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", category: "required"} + referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"} + outputDir: {description: "The directory the outputs will be written to.", category: "required"} + geneDataCsv: {description: "A CSV file containing gene information, must be in the same directory as `proteinFeaturesCsv`, `transExonDataCsv` and `transSpliceDataCsv`.", category: "required"} + proteinFeaturesCsv: {description: "A CSV file containing protein feature information, must be in the same directory as `geneDataCsv`, `transExonDataCsv` and `transSpliceDataCsv`.", category: "required"} + transExonDataCsv: {description: "A CSV file containing transcript exon information, must be in the same directory as `geneDataCsv`, `proteinFeaturesCsv` and `transSpliceDataCsv`.", category: "required"} + transSpliceDataCsv: {description: "A CSV file containing transcript splicing information, must be in the same directory as `geneDataCsv`, `proteinFeaturesCsv` and `transExonDataCsv`.", category: "required"} + + memory: {description: "The amount of memory this job will use.", category: "advanced"} + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", + category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} + } +} + task Orange { input { String outputDir = "./orange" From 0a7cfc98bf197d031acb2a19c2fa6d73f9ceca33 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Tue, 18 Jul 2023 11:43:39 +0200 Subject: [PATCH 46/75] add parameter_meta to lilac --- hmftools.wdl | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/hmftools.wdl b/hmftools.wdl index 7e82ecc8..38f5124b 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -553,7 +553,32 @@ task Lilac { } parameter_meta { + sampleName: {description: "The name of the sample.", category: "required"} + referenceBam: {description: "The bam file for the reference sample.", category: "required"} + referenceBamIndex: {description: "The index for the reference sample's bam file.", category: "required"} + tumorBam: {description: "The bam file for the tumor sample.", category: "common"} + tumorBamIndex: {description: "The index for the tumor sample's bam file.", category: "required"} + refGenomeVersion: {description: "The version of the genome assembly used for alignment. Either \"37\" or \"38\".", category: "required"} + referenceFasta: {description: "The reference fasta file.", category: "required"} + referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", category: "required"} + referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"} + geneCopyNumberFile: {description: "Gene copy number file produced by purple.", category: "common"} + somaticVariantsFile: {description: "Somatic variant VCF produced by purple.", category: "common"} + somaticVariantsFileIndex: {description: "Index for the somatic variant VCf produced by purple.", category: "common"} + outputDir: {description: "The directory the outputs will be written to.", category: "required"} + + #The following need to be in the same directory + hlaRefAminoacidSequencesCsv: {description: "LILAC reference file.", category: "required"} + hlaRefNucleotideSequencesCsv: {description: "LILAC reference file.", category: "required"} + lilacAlleleFrequenciesCsv: {description: "LILAC reference file.", category: "required"} + memory: {description: "The amount of memory this job will use.", category: "advanced"} + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", + category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + threads: {description: "The number of threads to use", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} } } From 290a3fc2232f0508fdcbe080442c2b4b8e142ec1 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Tue, 18 Jul 2023 14:35:40 +0200 Subject: [PATCH 47/75] add task for hmftools NeoScorer --- hmftools.wdl | 98 ++++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 96 insertions(+), 2 deletions(-) diff --git a/hmftools.wdl b/hmftools.wdl index 38f5124b..537e2bfa 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -566,8 +566,6 @@ task Lilac { somaticVariantsFile: {description: "Somatic variant VCF produced by purple.", category: "common"} somaticVariantsFileIndex: {description: "Index for the somatic variant VCf produced by purple.", category: "common"} outputDir: {description: "The directory the outputs will be written to.", category: "required"} - - #The following need to be in the same directory hlaRefAminoacidSequencesCsv: {description: "LILAC reference file.", category: "required"} hlaRefNucleotideSequencesCsv: {description: "LILAC reference file.", category: "required"} lilacAlleleFrequenciesCsv: {description: "LILAC reference file.", category: "required"} @@ -822,6 +820,102 @@ task Neo { } } +task neoScorer { + input { + String sampleId + String refGenomeVersion + File referenceFasta + File referenceFastaFai + File referenceFastaDict + Array[File]+ neoBindingFiles + String neoBindingFileId = "cmb_02" + File cancerTpmMedians + File neoData + Array[File]+ lilacOutput + Array[File]+ purpleOutput + String outputDir = "./neo" + + #The following should be in the same directory. + File geneDataCsv + File proteinFeaturesCsv + File transExonDataCsv + File transSpliceDataCsv + + String? cancerType + Array[File]? isofoxOutput + File? rnaSomaticVcf + File? rnaSomaticVcfIndex + + String memory = "9GiB" + String javaXmx = "8G" + Int timeMinutes = 1440 + String dockerImage = "quay.io/biocontainers/hmftools-neo:1.0.1--hdfd78af_0" + } + + String isofoxDir = sub(select_first([isofoxOutput, [""]])[0], basename(select_first([isofoxOutput, [""]])[0]), "") + + command { + set -e + mkdir -p ~{outputDir} + neo com.hartwig.hmftools.neo.scorer.NeoScorer Xmx~{javaXmx} -XX:ParallelGCThreads=1 \ + -sample ~{sampleId} \ + ~{"-cancer_type " + cancerType} \ + -ref_genome_version ~{refGenomeVersion} \ + -ref_genome ~{referenceFasta} \ + -ensembl_data_dir ~{sub(geneDataCsv, basename(geneDataCsv), "")} \ + -score_file_dir ~{sub(neoBindingFiles[0], basename(neoBindingFiles[0]), "")} \ + -score_file_id ~{neoBindingFileId} \ + -cancer_tpm_medians_file ~{cancerTpmMedians} \ + -neo_dir ~{sub(neoData, basename(neoData), "")} \ + ~{if defined(isofoxOutput) then "-isofox_dir " + isofoxDir else ""} \ + -lilac_dir ~{sub(lilacOutput[0], basename(lilacOutput[0]), "")} \ + -purple_dir ~{sub(purpleOutput[0], basename(purpleOutput[0]), "")} \ + ~{"-rna_somatic_vcf " + rnaSomaticVcf} \ + -output_dir ~{outputDir} + } + + output { + File neoepitopes = "~{outputDir}/~{sampleId}.neo.neoepitope.tsv" + File peptideScores = "~{outputDir}/~{sampleId}.neo.peptide_scores.tsv" + } + + runtime { + time_minutes: timeMinutes # !UnknownRuntimeKey + docker: dockerImage + memory: memory + } + + parameter_meta { + sampleId: {description: "The name/id of the sample.", category: "required"} + refGenomeVersion: {description: "The version of the genome assembly used for alignment. Either \"37\" or \"38\".", category: "required"} + referenceFasta: {description: "The reference fasta file.", category: "required"} + referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", category: "required"} + referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"} + neoBindingFiles: {description: "The neo binding reference files.", category: "required"} + neoBindingFileId: {description: "The neo binding reference file version id.", category: "required"} + cancerTpmMedians: {description: "HMF RNA cohort transcript median TPM file.", category: "required"} + neoData: {description: "Data file produced by neo.", category: "required"} + lilacOutput: {description: "The output produced by lilac.", category: "required"} + purpleOutput: {description: "The output produced by purple.", category: "required"} + outputDir: {description: "The directory the outputs will be written to.", category: "required"} + geneDataCsv: {description: "A CSV file containing gene information, must be in the same directory as `proteinFeaturesCsv`, `transExonDataCsv` and `transSpliceDataCsv`.", category: "required"} + proteinFeaturesCsv: {description: "A CSV file containing protein feature information, must be in the same directory as `geneDataCsv`, `transExonDataCsv` and `transSpliceDataCsv`.", category: "required"} + transExonDataCsv: {description: "A CSV file containing transcript exon information, must be in the same directory as `geneDataCsv`, `proteinFeaturesCsv` and `transSpliceDataCsv`.", category: "required"} + transSpliceDataCsv: {description: "A CSV file containing transcript splicing information, must be in the same directory as `geneDataCsv`, `proteinFeaturesCsv` and `transExonDataCsv`.", category: "required"} + cancerType: {description: "The cancer type.", category: "common"} + isofoxOutput: {description: "The output produced by isofox.", category: "common"} + rnaSomaticVcf: {description: "SageAppend produced rna somatic VCF file.", category: "common"} + rnaSomaticVcfIndex: {description: "Index for the rna somatic VCF file.", category: "common"} + + memory: {description: "The amount of memory this job will use.", category: "advanced"} + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", + category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} + } +} + task Orange { input { String outputDir = "./orange" From 2f11393a46e201bae32703fa4d2dc66222e8982b Mon Sep 17 00:00:00 2001 From: DavyCats Date: Wed, 19 Jul 2023 12:49:45 +0200 Subject: [PATCH 48/75] fix some outputs and typo --- hmftools.wdl | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/hmftools.wdl b/hmftools.wdl index 537e2bfa..ca6aee78 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -543,6 +543,7 @@ task Lilac { File lilacCsv = "~{outputDir}/~{sampleName}.lilac.csv" File lilacQcCsv = "~{outputDir}/~{sampleName}.lilac.qc.csv" File candidatesCoverageCsv = "~{outputDir}/~{sampleName}.candidates.coverage.csv" + Array[File] outputs = [lilacCsv, lilacQcCsv, candidatesCoverageCsv] } runtime { @@ -654,11 +655,12 @@ task Linx { File? linxVisSegments = "~{outputDir}/~{prefix}.vis_segments.tsv" File? linxVisSvData = "~{outputDir}/~{prefix}.vis_sv_data.tsv" File? linxDisruptionTsv = "~{outputDir}/~{prefix}.disruption.tsv" + File? linxNeoepitopeTsv = "~{outputDir}/~{prefix}.neoepitope.tsv" File linxVersion = "~{outputDir}/linx.version" Array[File] outputs = select_all([driverCatalog, linxBreakend, linxClusters, linxDrivers, linxFusion, linxLinks, linxSvs, linxVisCopyNumber, linxVisFusion, linxVisGeneExon, linxVisProteinDomain, linxVisSegments, linxVisSvData, - linxDisruptionTsv, linxVersion]) + linxDisruptionTsv, linxNeoepitopeTsv, linxVersion]) } runtime { @@ -820,7 +822,7 @@ task Neo { } } -task neoScorer { +task NeoScorer { input { String sampleId String refGenomeVersion @@ -828,7 +830,7 @@ task neoScorer { File referenceFastaFai File referenceFastaDict Array[File]+ neoBindingFiles - String neoBindingFileId = "cmb_02" + String neoBindingFileId File cancerTpmMedians File neoData Array[File]+ lilacOutput From 318eca8f907fc78b9658e0f67849e9a137f5ec90 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Thu, 20 Jul 2023 13:02:51 +0200 Subject: [PATCH 49/75] add mkdir to SvPrepDepthAnnotator --- hmftools.wdl | 2 ++ 1 file changed, 2 insertions(+) diff --git a/hmftools.wdl b/hmftools.wdl index 23efa1ae..05a3b634 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -1605,6 +1605,8 @@ task SvPrepDepthAnnotator { } command { + set -e + mkdir -p "$(dirname ~{outputVcf})" java -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \ -cp /usr/local/share/hmftools-sv-prep-1.1-0/sv-prep.jar \ com.hartwig.hmftools.svprep.depth.DepthAnnotator \ From 7e987c575262400698cd525fbb178ef0cf960eea Mon Sep 17 00:00:00 2001 From: DavyCats Date: Thu, 20 Jul 2023 14:14:48 +0200 Subject: [PATCH 50/75] fix Neo/NeoScorer inputs --- hmftools.wdl | 16 +++++----------- 1 file changed, 5 insertions(+), 11 deletions(-) diff --git a/hmftools.wdl b/hmftools.wdl index a48a28d8..aae544c1 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -769,6 +769,8 @@ task Neo { File transExonDataCsv File transSpliceDataCsv + Int reqAminoAcids = 15 + String memory = "9GiB" String javaXmx = "8G" Int timeMinutes = 1440 @@ -785,6 +787,7 @@ task Neo { -ensembl_data_dir ~{sub(geneDataCsv, basename(geneDataCsv), "")} \ -linx_dir ~{sub(linxOutput[0], basename(linxOutput[0]), "")} \ -somatic_vcf ~{somaticVcf} \ + -req_amino_acids ~{reqAminoAcids} \ -output_dir ~{outputDir} } @@ -807,11 +810,12 @@ task Neo { referenceFasta: {description: "The reference fasta file.", category: "required"} referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", category: "required"} referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"} - outputDir: {description: "The directory the outputs will be written to.", category: "required"} + outputDir: {description: "The directory the outputs will be written to.", category: "common"} geneDataCsv: {description: "A CSV file containing gene information, must be in the same directory as `proteinFeaturesCsv`, `transExonDataCsv` and `transSpliceDataCsv`.", category: "required"} proteinFeaturesCsv: {description: "A CSV file containing protein feature information, must be in the same directory as `geneDataCsv`, `transExonDataCsv` and `transSpliceDataCsv`.", category: "required"} transExonDataCsv: {description: "A CSV file containing transcript exon information, must be in the same directory as `geneDataCsv`, `proteinFeaturesCsv` and `transSpliceDataCsv`.", category: "required"} transSpliceDataCsv: {description: "A CSV file containing transcript splicing information, must be in the same directory as `geneDataCsv`, `proteinFeaturesCsv` and `transExonDataCsv`.", category: "required"} + reqAminoAcids: {description: "Equivalent to neo's -req_amino_acids option.", category: "required"} memory: {description: "The amount of memory this job will use.", category: "advanced"} javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", @@ -825,10 +829,6 @@ task Neo { task NeoScorer { input { String sampleId - String refGenomeVersion - File referenceFasta - File referenceFastaFai - File referenceFastaDict Array[File]+ neoBindingFiles String neoBindingFileId File cancerTpmMedians @@ -862,8 +862,6 @@ task NeoScorer { neo com.hartwig.hmftools.neo.scorer.NeoScorer Xmx~{javaXmx} -XX:ParallelGCThreads=1 \ -sample ~{sampleId} \ ~{"-cancer_type " + cancerType} \ - -ref_genome_version ~{refGenomeVersion} \ - -ref_genome ~{referenceFasta} \ -ensembl_data_dir ~{sub(geneDataCsv, basename(geneDataCsv), "")} \ -score_file_dir ~{sub(neoBindingFiles[0], basename(neoBindingFiles[0]), "")} \ -score_file_id ~{neoBindingFileId} \ @@ -889,10 +887,6 @@ task NeoScorer { parameter_meta { sampleId: {description: "The name/id of the sample.", category: "required"} - refGenomeVersion: {description: "The version of the genome assembly used for alignment. Either \"37\" or \"38\".", category: "required"} - referenceFasta: {description: "The reference fasta file.", category: "required"} - referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", category: "required"} - referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"} neoBindingFiles: {description: "The neo binding reference files.", category: "required"} neoBindingFileId: {description: "The neo binding reference file version id.", category: "required"} cancerTpmMedians: {description: "HMF RNA cohort transcript median TPM file.", category: "required"} From c10a063d488268e59c4100b81c5870f1c2d4714c Mon Sep 17 00:00:00 2001 From: DavyCats Date: Fri, 21 Jul 2023 09:48:52 +0200 Subject: [PATCH 51/75] add mkdir to ExtractSigPredictHRD --- extractSigPredictHRD.wdl | 2 ++ 1 file changed, 2 insertions(+) diff --git a/extractSigPredictHRD.wdl b/extractSigPredictHRD.wdl index c9e4c67f..86f298c6 100644 --- a/extractSigPredictHRD.wdl +++ b/extractSigPredictHRD.wdl @@ -36,6 +36,8 @@ task ExtractSigPredictHRD { } command { + set -e + mkdir -p ~{outputDir} extractSigPredictHRD.R \ ~{outputDir} \ ~{sampleName} \ From 4991c67e4e12f7101d0b24c53ddf78e4e6ff3105 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Fri, 21 Jul 2023 09:50:19 +0200 Subject: [PATCH 52/75] add mkdir to VirusInterpreter --- hmftools.wdl | 2 ++ 1 file changed, 2 insertions(+) diff --git a/hmftools.wdl b/hmftools.wdl index 05a3b634..46422f15 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -1669,6 +1669,8 @@ task VirusInterpreter { } command { + set -e + mkdir -p ~{outputDir} virus-interpreter -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \ -sample_id ~{sampleId} \ -purple_purity_tsv ~{purplePurityTsv} \ From fe131980526ff8703424a2b9cb0becf7a5b53cee Mon Sep 17 00:00:00 2001 From: DavyCats Date: Fri, 21 Jul 2023 14:05:47 +0200 Subject: [PATCH 53/75] fix ExtractSigPredictHRD --- extractSigPredictHRD.wdl | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/extractSigPredictHRD.wdl b/extractSigPredictHRD.wdl index 86f298c6..b43d8daf 100644 --- a/extractSigPredictHRD.wdl +++ b/extractSigPredictHRD.wdl @@ -38,8 +38,9 @@ task ExtractSigPredictHRD { command { set -e mkdir -p ~{outputDir} + cd ~{outputDir} extractSigPredictHRD.R \ - ~{outputDir} \ + . \ ~{sampleName} \ ~{snvIndelVcf} \ ~{svVcf} \ From fd29caaf7fc7a595e848d247fe70ff8b7a21a68a Mon Sep 17 00:00:00 2001 From: DavyCats Date: Tue, 22 Aug 2023 11:37:45 +0200 Subject: [PATCH 54/75] add optional inputs to star, add isofox task --- hmftools.wdl | 84 ++++++++++++++++++++++++++++++++++++++++++++++++++++ star.wdl | 30 +++++++++++++++++++ 2 files changed, 114 insertions(+) diff --git a/hmftools.wdl b/hmftools.wdl index bfdf0817..7ef6f596 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -497,6 +497,90 @@ task HealthChecker { } } +task Isofox { + input { + String sampleName + File neoepitopeFile + File bamFile + File bamIndex + File referenceFasta + File referenceFastaFai + File referenceFastaDict + String refGenomeVersion + File expCountsFile + File expGcRatiosFile + + String outputDir = "./isofox" + Int readLength = 151 + + #The following should be in the same directory. + File geneDataCsv + File proteinFeaturesCsv + File transExonDataCsv + File transSpliceDataCsv + + Int threads = 10 + String javaXmx = "12G" + String memory = "13GiB" + Int timeMinutes = 120 + String dockerImage = "quay.io/biocontainers/hmftools-isofox:1.6.2--hdfd78af_0" + } + + command { + set -e + mkdir -p ~{outputDir} + isofox -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \ + -sample ~{sampleName} \ + -functions NEO_EPITOPES;TRANSCRIPT_COUNTS;ALT_SPLICE_JUNCTIONS;FUSIONS \ + -neoepitope_file ~{neoepitopeFile} \ + -bam_file ~{bamFile} \ + -ref_genome ~{referenceFasta} \ + -ref_genome_version ~{refGenomeVersion} \ + -ensembl_data_dir ~{sub(geneDataCsv, basename(geneDataCsv), "")} \ + -output_dir ~{outputDir} \ + -log_debug \ + -threads ~{threads} + } + + output { + File neoepitopeTsv = "~{outputDir}/~{sampleName}.isf.neoepitope.tsv" + File outputs = [neoepitopeTsv] + #TODO + } + + runtime { + cpu: threads + memory: memory + time_minutes: timeMinutes # !UnknownRuntimeKey + docker: dockerImage + } + + parameter_meta { + sampleName: {description: "The name of the sample.", category: "required"} + neoepitopeFile: {description: "Neo's data file.", category: "required"} + bamFile: {description: "Input rna BAM file.", category: "required"} + bamIndex: {description: "Index for the rna BAM file.", category: "required"} + referenceFasta: {description: "The reference fasta file.", category: "required"} + referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", category: "required"} + referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"} + refGenomeVersion: {description: "The version of the genome assembly used for alignment. Either \"37\" or \"38\".", category: "required"} + expCountsFile: {description: "Isofox reference file.", category: "required"} + expGcRatiosFile: {description: "Isofox reference file.", category: "required"} + outputDir: {description: "The directory the outputs will be written to.", category: "required"} + geneDataCsv: {description: "A CSV file containing gene information, must be in the same directory as `proteinFeaturesCsv`, `transExonDataCsv` and `transSpliceDataCsv`.", category: "required"} + proteinFeaturesCsv: {description: "A CSV file containing protein feature information, must be in the same directory as `geneDataCsv`, `transExonDataCsv` and `transSpliceDataCsv`.", category: "required"} + transExonDataCsv: {description: "A CSV file containing transcript exon information, must be in the same directory as `geneDataCsv`, `proteinFeaturesCsv` and `transSpliceDataCsv`.", category: "required"} + transSpliceDataCsv: {description: "A CSV file containing transcript splicing information, must be in the same directory as `geneDataCsv`, `proteinFeaturesCsv` and `transExonDataCsv`.", category: "required"} + + memory: {description: "The amount of memory this job will use.", category: "advanced"} + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", + category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} + } +} + task Lilac { input { String sampleName diff --git a/star.wdl b/star.wdl index 88d3c838..5af12236 100644 --- a/star.wdl +++ b/star.wdl @@ -121,6 +121,21 @@ task Star { Float? outFilterScoreMinOverLread Int? outFilterMatchNmin Float? outFilterMatchNminOverLread + String? outSAMattributes + Int? outFilterMultimapNmax + Int? outFilterMismatchNmax + Int? limitOutSJcollapsed + Int? chimSegmentMin + String? chimOutType + Int? chimJunctionOverhangMin + Int? chimSegmentReadGapMax + Int? chimScoreMin + Int? chimScoreDropMax + Int? chimScoreJunctionNonGTAG + Int? chimScoreSeparation + Int? alignSplicedMateMapLminOverLmate + Int? alignSplicedMateMapLmin + String? alignSJstitchMismatchNmax String? outStd String? twopassMode = "Basic" Array[String]? outSAMattrRGline @@ -158,6 +173,21 @@ task Star { ~{"--outFilterMatchNmin " + outFilterMatchNmin} \ ~{"--outFilterMatchNminOverLread " + outFilterMatchNminOverLread} \ ~{"--outSAMunmapped " + outSAMunmapped} \ + ~{"--outSAMattributes " + outSAMattributes} \ + ~{"--outFilterMultimapNmax " + outFilterMultimapNmax} \ + ~{"--outFilterMismatchNmax " + outFilterMismatchNmax} \ + ~{"--limitOutSJcollapsed " + limitOutSJcollapsed} \ + ~{"--chimSegmentMin " + chimSegmentMin} \ + ~{"--chimOutType " + chimOutType} \ + ~{"--chimJunctionOverhangMin " + chimJunctionOverhangMin} \ + ~{"--chimSegmentReadGapMax " + chimSegmentReadGapMax} \ + ~{"--chimScoreMin " + chimScoreMin} \ + ~{"--chimScoreDropMax " + chimScoreDropMax} \ + ~{"--chimScoreJunctionNonGTAG " + chimScoreJunctionNonGTAG} \ + ~{"--chimScoreSeparation " + chimScoreSeparation} \ + ~{"--alignSplicedMateMapLminOverLmate " + alignSplicedMateMapLminOverLmate} \ + ~{"--alignSplicedMateMapLmin " + alignSplicedMateMapLmin} \ + ~{"--alignSJstitchMismatchNmax " + alignSJstitchMismatchNmax} \ ~{"--runThreadN " + runThreadN} \ ~{"--outStd " + outStd} \ ~{"--twopassMode " + twopassMode} \ From 7e142caf9edbef8d1a4f18fed20091b6a1e05f8e Mon Sep 17 00:00:00 2001 From: DavyCats Date: Tue, 22 Aug 2023 11:47:44 +0200 Subject: [PATCH 55/75] Fix lint issues --- hmftools.wdl | 2 +- star.wdl | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/hmftools.wdl b/hmftools.wdl index 7ef6f596..0d98580f 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -544,7 +544,7 @@ task Isofox { output { File neoepitopeTsv = "~{outputDir}/~{sampleName}.isf.neoepitope.tsv" - File outputs = [neoepitopeTsv] + Array[File] outputs = [neoepitopeTsv] #TODO } diff --git a/star.wdl b/star.wdl index 5af12236..5b651970 100644 --- a/star.wdl +++ b/star.wdl @@ -133,7 +133,7 @@ task Star { Int? chimScoreDropMax Int? chimScoreJunctionNonGTAG Int? chimScoreSeparation - Int? alignSplicedMateMapLminOverLmate + Float? alignSplicedMateMapLminOverLmate Int? alignSplicedMateMapLmin String? alignSJstitchMismatchNmax String? outStd From 67ca5b69a9565ada8de5df99e6dfe9ebfbfaf0a7 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Tue, 22 Aug 2023 13:34:44 +0200 Subject: [PATCH 56/75] add task for sage append --- hmftools.wdl | 68 +++++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 65 insertions(+), 3 deletions(-) diff --git a/hmftools.wdl b/hmftools.wdl index 0d98580f..93be2cfc 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -1730,7 +1730,69 @@ task Sage { refSampleCount: {description: "Equivalent to sage's `ref_sample_count` option.", category: "advanced"} hg38: {description: "Whether or not the refernce genome is HG18, if false HG19 is assumed.", category: "common"} - threads: {description: "The numve of threads to use.", category: "advanced"} + threads: {description: "The number of threads to use.", category: "advanced"} + memory: {description: "The amount of memory this job will use.", category: "advanced"} + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", + category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} + } +} + +task SageAppend { + input { + String sampleName + File bamFile + File bamIndex + File referenceFasta + File referenceFastaDict + File referenceFastaFai + File sageVcf + String outPath = "./sage_append.vcf" + + Int threads = 2 + String javaXmx = "32G" + String memory = "33GiB" + Int timeMinutes = 720 + String dockerImage = "quay.io/biocontainers/hmftools-sage:3.2.3--hdfd78af_0" + } + + command { + java -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \ + -cp /usr/local/share/hmftools-sage-3.2.3-0/sage.jar \ + com.hartwig.hmftools.sage.append.SageAppendApplication \ + -reference ~{sampleName} \ + -reference_bam ~{bamFile} \ + -ref_genome ~{referenceFasta} \ + -input_vcf ~{sageVcf} \ + -out ~{outPath} \ + -threads ~{threads} + } + + output { + File vcf = outPath + File index = "~{outPath}.tbi" + } + + runtime { + time_minutes: timeMinutes # !UnknownRuntimeKey + cpu: threads + docker: dockerImage + memory: memory + } + + parameter_meta { + sampleName: {description: "The sample id.", category: "required"} + bamFile: {description: "The input BAM file.", category: "required"} + bamIndex: {description: "Index for the input BAM file", category: "required"} + referenceFasta: {description: "The reference fasta file.", category: "required"} + referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", category: "required"} + referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"} + sageVcf: {description: "A VCF file from Sage or Purple.", category: "required"} + outPath: {description: "Location to write the output to.", category: "required"} + + threads: {description: "The number of threads to use.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} @@ -1846,7 +1908,7 @@ task SvPrep { existingJunctionFile: {description: "Junctions file generated by an earlier run of this tool, eg. from a paired sample.", category: "common"} hg38: {description: "Whether or not the refernce genome is HG18, if false HG19 is assumed.", category: "common"} - threads: {description: "The numve of threads to use.", category: "advanced"} + threads: {description: "The number of threads to use.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} @@ -1913,7 +1975,7 @@ task SvPrepDepthAnnotator { hg38: {description: "Whether or not the refernce genome is HG18, if false HG19 is assumed.", category: "common"} outputVcf: {description: "The path for the output VCF.", category: "common"} - threads: {description: "The numve of threads to use.", category: "advanced"} + threads: {description: "The number of threads to use.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} From 2d1156ab01d091a5048718aec2e82b019cc3338d Mon Sep 17 00:00:00 2001 From: DavyCats Date: Mon, 28 Aug 2023 10:02:24 +0200 Subject: [PATCH 57/75] add support for unsorted BAM file in STAR task --- star.wdl | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/star.wdl b/star.wdl index 5b651970..5462672e 100644 --- a/star.wdl +++ b/star.wdl @@ -156,7 +156,8 @@ task Star { # in the runtime section. #TODO: Could be extended for all possible output extensions. - Map[String, String] samOutputNames = {"BAM SortedByCoordinate": "sortedByCoord.out.bam"} + Map[String, String] samOutputNames = {"BAM SortedByCoordinate": "sortedByCoord.out.bam", + "BAM Unsorted": "out.bam"} command { set -e From e4e98c72431c14ee7d578497aafd8a393c9f07b1 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Tue, 29 Aug 2023 11:17:23 +0200 Subject: [PATCH 58/75] make index output optional in sambamba markdup, remove index input from isofox and sageappend --- hmftools.wdl | 4 +--- sambamba.wdl | 7 +++++-- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/hmftools.wdl b/hmftools.wdl index 93be2cfc..6921a980 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -502,7 +502,6 @@ task Isofox { String sampleName File neoepitopeFile File bamFile - File bamIndex File referenceFasta File referenceFastaFai File referenceFastaDict @@ -559,7 +558,6 @@ task Isofox { sampleName: {description: "The name of the sample.", category: "required"} neoepitopeFile: {description: "Neo's data file.", category: "required"} bamFile: {description: "Input rna BAM file.", category: "required"} - bamIndex: {description: "Index for the rna BAM file.", category: "required"} referenceFasta: {description: "The reference fasta file.", category: "required"} referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", category: "required"} referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"} @@ -1744,7 +1742,7 @@ task SageAppend { input { String sampleName File bamFile - File bamIndex + File? bamIndex File referenceFasta File referenceFastaDict File referenceFastaFai diff --git a/sambamba.wdl b/sambamba.wdl index acf40278..3052c8af 100644 --- a/sambamba.wdl +++ b/sambamba.wdl @@ -103,12 +103,15 @@ task Markdup { ~{"--io-buffer-size " + ioBufferSize} \ ~{sep=' ' inputBams} ~{outputPath} # sambamba creates an index for us. - mv ~{outputPath}.bai ~{bamIndexPath} + if [ -f ~{outputPath}.bai ] + then + mv ~{outputPath}.bai ~{bamIndexPath} + fi } output { File outputBam = outputPath - File outputBamIndex = bamIndexPath + File? outputBamIndex = bamIndexPath } runtime { From 6210b3740be6f3de2f7f01d78de6d164022fdc65 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Tue, 5 Sep 2023 15:33:48 +0200 Subject: [PATCH 59/75] fix semi-colon issue --- hmftools.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hmftools.wdl b/hmftools.wdl index 6921a980..e75feb09 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -530,7 +530,7 @@ task Isofox { mkdir -p ~{outputDir} isofox -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \ -sample ~{sampleName} \ - -functions NEO_EPITOPES;TRANSCRIPT_COUNTS;ALT_SPLICE_JUNCTIONS;FUSIONS \ + -functions 'NEO_EPITOPES;TRANSCRIPT_COUNTS;ALT_SPLICE_JUNCTIONS;FUSIONS' \ -neoepitope_file ~{neoepitopeFile} \ -bam_file ~{bamFile} \ -ref_genome ~{referenceFasta} \ From a7fa3b13715502af2ed48e96286db71ba81fc948 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Wed, 6 Sep 2023 13:43:45 +0200 Subject: [PATCH 60/75] turn neo data tsv into csv for isofox --- hmftools.wdl | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/hmftools.wdl b/hmftools.wdl index e75feb09..f5a13449 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -528,10 +528,11 @@ task Isofox { command { set -e mkdir -p ~{outputDir} + sed 's/\t/,/g' ~{neoepitopeFile} > tmp.neo_data.csv isofox -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \ -sample ~{sampleName} \ -functions 'NEO_EPITOPES;TRANSCRIPT_COUNTS;ALT_SPLICE_JUNCTIONS;FUSIONS' \ - -neoepitope_file ~{neoepitopeFile} \ + -neoepitope_file tmp.neo_data.csv \ -bam_file ~{bamFile} \ -ref_genome ~{referenceFasta} \ -ref_genome_version ~{refGenomeVersion} \ From 9667c6ac49f5b486661e6e8e90ea025ea0f713f1 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Thu, 7 Sep 2023 13:23:59 +0200 Subject: [PATCH 61/75] add isofox outputs --- hmftools.wdl | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/hmftools.wdl b/hmftools.wdl index f5a13449..75eec8b9 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -543,9 +543,13 @@ task Isofox { } output { - File neoepitopeTsv = "~{outputDir}/~{sampleName}.isf.neoepitope.tsv" - Array[File] outputs = [neoepitopeTsv] - #TODO + File neoepitope = "~{outputDir}/~{sampleName}.isf.neoepitope.csv" + File altSpliceJunc = "~{outputDir}/~{sampleName}.isf.alt_splice_junc.csv" + File geneCollection = "~{outputDir}/~{sampleName}.isf.gene_collection.csv" + File passFusions = "~{outputDir}/~{sampleName}.isf.pass_fusions.csv" + File fusions = "~{outputDir}/~{sampleName}.isf.fusions.csv" + Array[File] outputs = [neoepitopeTsv, altSpliceJunc, geneCollection, + passFusions, fusions] } runtime { From 37a23566001b8cc4e6703218277cb5bec909ea81 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Thu, 7 Sep 2023 13:48:10 +0200 Subject: [PATCH 62/75] fix output name --- hmftools.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hmftools.wdl b/hmftools.wdl index 75eec8b9..46116561 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -548,7 +548,7 @@ task Isofox { File geneCollection = "~{outputDir}/~{sampleName}.isf.gene_collection.csv" File passFusions = "~{outputDir}/~{sampleName}.isf.pass_fusions.csv" File fusions = "~{outputDir}/~{sampleName}.isf.fusions.csv" - Array[File] outputs = [neoepitopeTsv, altSpliceJunc, geneCollection, + Array[File] outputs = [neoepitope, altSpliceJunc, geneCollection, passFusions, fusions] } From c3c74a8406da01943e45e2a46961d32d6d959e72 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Tue, 12 Sep 2023 14:51:23 +0200 Subject: [PATCH 63/75] split isofox neoepitope function into separate task --- hmftools.wdl | 85 ++++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 83 insertions(+), 2 deletions(-) diff --git a/hmftools.wdl b/hmftools.wdl index 46116561..1e5b6b21 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -500,7 +500,6 @@ task HealthChecker { task Isofox { input { String sampleName - File neoepitopeFile File bamFile File referenceFasta File referenceFastaFai @@ -528,10 +527,90 @@ task Isofox { command { set -e mkdir -p ~{outputDir} + isofox -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \ + -sample ~{sampleName} \ + -functions 'TRANSCRIPT_COUNTS;ALT_SPLICE_JUNCTIONS;FUSIONS' \ + -bam_file ~{bamFile} \ + -ref_genome ~{referenceFasta} \ + -ref_genome_version ~{refGenomeVersion} \ + -ensembl_data_dir ~{sub(geneDataCsv, basename(geneDataCsv), "")} \ + -output_dir ~{outputDir} \ + -log_debug \ + -threads ~{threads} + } + + output { + #TODO + Array[File] outputs = [] + } + + runtime { + cpu: threads + memory: memory + time_minutes: timeMinutes # !UnknownRuntimeKey + docker: dockerImage + } + + parameter_meta { + sampleName: {description: "The name of the sample.", category: "required"} + bamFile: {description: "Input rna BAM file.", category: "required"} + referenceFasta: {description: "The reference fasta file.", category: "required"} + referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", category: "required"} + referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"} + refGenomeVersion: {description: "The version of the genome assembly used for alignment. Either \"37\" or \"38\".", category: "required"} + expCountsFile: {description: "Isofox reference file.", category: "required"} + expGcRatiosFile: {description: "Isofox reference file.", category: "required"} + outputDir: {description: "The directory the outputs will be written to.", category: "required"} + geneDataCsv: {description: "A CSV file containing gene information, must be in the same directory as `proteinFeaturesCsv`, `transExonDataCsv` and `transSpliceDataCsv`.", category: "required"} + proteinFeaturesCsv: {description: "A CSV file containing protein feature information, must be in the same directory as `geneDataCsv`, `transExonDataCsv` and `transSpliceDataCsv`.", category: "required"} + transExonDataCsv: {description: "A CSV file containing transcript exon information, must be in the same directory as `geneDataCsv`, `proteinFeaturesCsv` and `transSpliceDataCsv`.", category: "required"} + transSpliceDataCsv: {description: "A CSV file containing transcript splicing information, must be in the same directory as `geneDataCsv`, `proteinFeaturesCsv` and `transExonDataCsv`.", category: "required"} + + memory: {description: "The amount of memory this job will use.", category: "advanced"} + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", + category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} + } +} + +task IsofoxNeoEpitopes { + input { + String sampleName + File neoepitopeFile + File bamFile + File referenceFasta + File referenceFastaFai + File referenceFastaDict + String refGenomeVersion + File expCountsFile + File expGcRatiosFile + Array[File]+ isofoxOutput + + String outputDir = "./isofox" + Int readLength = 151 + + #The following should be in the same directory. + File geneDataCsv + File proteinFeaturesCsv + File transExonDataCsv + File transSpliceDataCsv + + Int threads = 10 + String javaXmx = "12G" + String memory = "13GiB" + Int timeMinutes = 120 + String dockerImage = "quay.io/biocontainers/hmftools-isofox:1.6.2--hdfd78af_0" + } + + command { + set -e + cp -r $(dirname ~{isofoxOutput[0]}) ~{outputDir} sed 's/\t/,/g' ~{neoepitopeFile} > tmp.neo_data.csv isofox -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \ -sample ~{sampleName} \ - -functions 'NEO_EPITOPES;TRANSCRIPT_COUNTS;ALT_SPLICE_JUNCTIONS;FUSIONS' \ + -functions 'NEO_EPITOPES' \ -neoepitope_file tmp.neo_data.csv \ -bam_file ~{bamFile} \ -ref_genome ~{referenceFasta} \ @@ -548,6 +627,7 @@ task Isofox { File geneCollection = "~{outputDir}/~{sampleName}.isf.gene_collection.csv" File passFusions = "~{outputDir}/~{sampleName}.isf.pass_fusions.csv" File fusions = "~{outputDir}/~{sampleName}.isf.fusions.csv" + #TODO Array[File] outputs = [neoepitope, altSpliceJunc, geneCollection, passFusions, fusions] } @@ -569,6 +649,7 @@ task Isofox { refGenomeVersion: {description: "The version of the genome assembly used for alignment. Either \"37\" or \"38\".", category: "required"} expCountsFile: {description: "Isofox reference file.", category: "required"} expGcRatiosFile: {description: "Isofox reference file.", category: "required"} + isofoxOutput: {description: "Isofox output files.", category: "required"} outputDir: {description: "The directory the outputs will be written to.", category: "required"} geneDataCsv: {description: "A CSV file containing gene information, must be in the same directory as `proteinFeaturesCsv`, `transExonDataCsv` and `transSpliceDataCsv`.", category: "required"} proteinFeaturesCsv: {description: "A CSV file containing protein feature information, must be in the same directory as `geneDataCsv`, `transExonDataCsv` and `transSpliceDataCsv`.", category: "required"} From b025103ca44ed04795329ab1eaac52476533b5ea Mon Sep 17 00:00:00 2001 From: DavyCats Date: Tue, 12 Sep 2023 16:02:41 +0200 Subject: [PATCH 64/75] add inputs to isofox for bam index --- hmftools.wdl | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/hmftools.wdl b/hmftools.wdl index 1e5b6b21..23d77f4f 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -501,6 +501,7 @@ task Isofox { input { String sampleName File bamFile + File bamIndex File referenceFasta File referenceFastaFai File referenceFastaDict @@ -554,6 +555,7 @@ task Isofox { parameter_meta { sampleName: {description: "The name of the sample.", category: "required"} bamFile: {description: "Input rna BAM file.", category: "required"} + bamIndex: {description: "Index of the BAM file.", category: "required"} referenceFasta: {description: "The reference fasta file.", category: "required"} referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", category: "required"} referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"} @@ -580,6 +582,7 @@ task IsofoxNeoEpitopes { String sampleName File neoepitopeFile File bamFile + File bamIndex File referenceFasta File referenceFastaFai File referenceFastaDict @@ -643,6 +646,7 @@ task IsofoxNeoEpitopes { sampleName: {description: "The name of the sample.", category: "required"} neoepitopeFile: {description: "Neo's data file.", category: "required"} bamFile: {description: "Input rna BAM file.", category: "required"} + bamIndex: {description: "Index of the BAM file.", category: "required"} referenceFasta: {description: "The reference fasta file.", category: "required"} referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", category: "required"} referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"} From c1a9308d593d9813bc8d0c1ede5fe66d883fc69d Mon Sep 17 00:00:00 2001 From: DavyCats Date: Thu, 14 Sep 2023 14:45:30 +0200 Subject: [PATCH 65/75] increase isofox memory --- hmftools.wdl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/hmftools.wdl b/hmftools.wdl index 23d77f4f..ec261020 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -519,8 +519,8 @@ task Isofox { File transSpliceDataCsv Int threads = 10 - String javaXmx = "12G" - String memory = "13GiB" + String javaXmx = "24G" + String memory = "25GiB" Int timeMinutes = 120 String dockerImage = "quay.io/biocontainers/hmftools-isofox:1.6.2--hdfd78af_0" } From 6433047a45cb9a0ea1f14f9dece90c8ae7de4209 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Thu, 14 Sep 2023 15:43:20 +0200 Subject: [PATCH 66/75] increase memory for isofox --- hmftools.wdl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/hmftools.wdl b/hmftools.wdl index ec261020..773ecb8f 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -519,8 +519,8 @@ task Isofox { File transSpliceDataCsv Int threads = 10 - String javaXmx = "24G" - String memory = "25GiB" + String javaXmx = "48G" + String memory = "50GiB" Int timeMinutes = 120 String dockerImage = "quay.io/biocontainers/hmftools-isofox:1.6.2--hdfd78af_0" } From 24037d1f7aa7f6ab7b8aaa78897f79c3498ae31a Mon Sep 17 00:00:00 2001 From: DavyCats Date: Fri, 15 Sep 2023 13:45:53 +0200 Subject: [PATCH 67/75] add isofox outputs --- hmftools.wdl | 23 ++++++++++++++++------- 1 file changed, 16 insertions(+), 7 deletions(-) diff --git a/hmftools.wdl b/hmftools.wdl index 773ecb8f..38f48137 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -541,8 +541,15 @@ task Isofox { } output { - #TODO - Array[File] outputs = [] + File altSpliceJunc = "~{outputDir}/~{sampleName}.isf.alt_splice_junc.csv" + File fusions = "~{outputDir}/~{sampleName}.isf.fusions.csv" + File geneCollection = "~{outputDir}/~{sampleName}.isf.gene_collection.csv" + File geneData = "~{outputDir}/~{sampleName}.isf.gene_data.csv " + File passFusions = "~{outputDir}/~{sampleName}.isf.pass_fusions.csv" + File summary = "~{outputDir}/~{sampleName}.isf.summary.csv" + File transcriptData = "~{outputDir}/~{sampleName}.isf.transcript_data.csv" + Array[File] outputs = [altSpliceJunc, fusions, geneCollection, geneData, + passFusions, summary, transcriptData] } runtime { @@ -625,14 +632,16 @@ task IsofoxNeoEpitopes { } output { - File neoepitope = "~{outputDir}/~{sampleName}.isf.neoepitope.csv" File altSpliceJunc = "~{outputDir}/~{sampleName}.isf.alt_splice_junc.csv" + File fusions = "~{outputDir}/~{sampleName}.isf.fusions.csv" File geneCollection = "~{outputDir}/~{sampleName}.isf.gene_collection.csv" + File geneData = "~{outputDir}/~{sampleName}.isf.gene_data.csv " File passFusions = "~{outputDir}/~{sampleName}.isf.pass_fusions.csv" - File fusions = "~{outputDir}/~{sampleName}.isf.fusions.csv" - #TODO - Array[File] outputs = [neoepitope, altSpliceJunc, geneCollection, - passFusions, fusions] + File summary = "~{outputDir}/~{sampleName}.isf.summary.csv" + File transcriptData = "~{outputDir}/~{sampleName}.isf.transcript_data.csv" + File neoepitope = "~{outputDir}/~{sampleName}.isf.neoepitope.csv" + Array[File] outputs = [altSpliceJunc, fusions, geneCollection, geneData, + passFusions, summary, transcriptData, neoepitope] } runtime { From f28727ce1b6212918a1a38044850546f624bffed Mon Sep 17 00:00:00 2001 From: DavyCats Date: Fri, 15 Sep 2023 13:48:31 +0200 Subject: [PATCH 68/75] adjust isofox memory --- hmftools.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hmftools.wdl b/hmftools.wdl index 38f48137..7710722a 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -520,7 +520,7 @@ task Isofox { Int threads = 10 String javaXmx = "48G" - String memory = "50GiB" + String memory = "60GiB" Int timeMinutes = 120 String dockerImage = "quay.io/biocontainers/hmftools-isofox:1.6.2--hdfd78af_0" } From 6097d3db940364ba3d65abee4a420d18adb9be84 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Fri, 15 Sep 2023 17:26:17 +0200 Subject: [PATCH 69/75] increase time for isofox --- hmftools.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hmftools.wdl b/hmftools.wdl index 7710722a..f3640aa3 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -521,7 +521,7 @@ task Isofox { Int threads = 10 String javaXmx = "48G" String memory = "60GiB" - Int timeMinutes = 120 + Int timeMinutes = 240 String dockerImage = "quay.io/biocontainers/hmftools-isofox:1.6.2--hdfd78af_0" } From 0d283c64f051653041e8b7164d022a879e0db0a5 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Tue, 26 Sep 2023 15:40:47 +0200 Subject: [PATCH 70/75] convert neoepitope csv into tsv for neoscorer --- hmftools.wdl | 2 ++ 1 file changed, 2 insertions(+) diff --git a/hmftools.wdl b/hmftools.wdl index f3640aa3..62e95e00 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -1040,6 +1040,8 @@ task NeoScorer { command { set -e mkdir -p ~{outputDir} + mkdir isofox + sed 's/,/\t/g' ~{isofoxDir}/~{sampleName}.isf.neoepitope.csv > isofox/~{sampleName}.isf.neoepitope.tsv neo com.hartwig.hmftools.neo.scorer.NeoScorer Xmx~{javaXmx} -XX:ParallelGCThreads=1 \ -sample ~{sampleId} \ ~{"-cancer_type " + cancerType} \ From 063dc44c8794d78c077b82fe95a5d9ee25ffef74 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Tue, 26 Sep 2023 15:43:30 +0200 Subject: [PATCH 71/75] make the sed command depend on the input --- hmftools.wdl | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/hmftools.wdl b/hmftools.wdl index 62e95e00..1240c586 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -1036,12 +1036,15 @@ task NeoScorer { } String isofoxDir = sub(select_first([isofoxOutput, [""]])[0], basename(select_first([isofoxOutput, [""]])[0]), "") + String sedCommand = if defined(isofoxOutput) + then "sed 's/,/\t/g' ~{isofoxDir}/~{sampleName}.isf.neoepitope.csv > isofox/~{sampleName}.isf.neoepitope.tsv" + else "" command { set -e mkdir -p ~{outputDir} mkdir isofox - sed 's/,/\t/g' ~{isofoxDir}/~{sampleName}.isf.neoepitope.csv > isofox/~{sampleName}.isf.neoepitope.tsv + ~{sedCommand} neo com.hartwig.hmftools.neo.scorer.NeoScorer Xmx~{javaXmx} -XX:ParallelGCThreads=1 \ -sample ~{sampleId} \ ~{"-cancer_type " + cancerType} \ @@ -1050,7 +1053,7 @@ task NeoScorer { -score_file_id ~{neoBindingFileId} \ -cancer_tpm_medians_file ~{cancerTpmMedians} \ -neo_dir ~{sub(neoData, basename(neoData), "")} \ - ~{if defined(isofoxOutput) then "-isofox_dir " + isofoxDir else ""} \ + ~{if defined(isofoxOutput) then "-isofox_dir isofox" else ""} \ -lilac_dir ~{sub(lilacOutput[0], basename(lilacOutput[0]), "")} \ -purple_dir ~{sub(purpleOutput[0], basename(purpleOutput[0]), "")} \ ~{"-rna_somatic_vcf " + rnaSomaticVcf} \ From 20a8563f389aa827a20a8d8d9f20a94923ea2b0c Mon Sep 17 00:00:00 2001 From: DavyCats Date: Tue, 26 Sep 2023 15:58:44 +0200 Subject: [PATCH 72/75] sampleName should be sampleId --- hmftools.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hmftools.wdl b/hmftools.wdl index 1240c586..10e7ae62 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -1037,7 +1037,7 @@ task NeoScorer { String isofoxDir = sub(select_first([isofoxOutput, [""]])[0], basename(select_first([isofoxOutput, [""]])[0]), "") String sedCommand = if defined(isofoxOutput) - then "sed 's/,/\t/g' ~{isofoxDir}/~{sampleName}.isf.neoepitope.csv > isofox/~{sampleName}.isf.neoepitope.tsv" + then "sed 's/,/\t/g' ~{isofoxDir}/~{sampleId}.isf.neoepitope.csv > isofox/~{sampleId}.isf.neoepitope.tsv" else "" command { From 19c2993a08a46aa7eb22ef892b81ebb71430f542 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Fri, 29 Sep 2023 10:53:48 +0200 Subject: [PATCH 73/75] fix copying of isofox data --- hmftools.wdl | 1 + 1 file changed, 1 insertion(+) diff --git a/hmftools.wdl b/hmftools.wdl index 10e7ae62..39a518ed 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -1044,6 +1044,7 @@ task NeoScorer { set -e mkdir -p ~{outputDir} mkdir isofox + cp isofoxDir/* -t isofox ~{sedCommand} neo com.hartwig.hmftools.neo.scorer.NeoScorer Xmx~{javaXmx} -XX:ParallelGCThreads=1 \ -sample ~{sampleId} \ From c3da2d570f653cf64c1632d064fb30c8554ccee7 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Fri, 29 Sep 2023 11:16:08 +0200 Subject: [PATCH 74/75] fix cp command --- hmftools.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hmftools.wdl b/hmftools.wdl index 39a518ed..367668ac 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -1044,7 +1044,7 @@ task NeoScorer { set -e mkdir -p ~{outputDir} mkdir isofox - cp isofoxDir/* -t isofox + cp isofoxDir/* ./isofox/ ~{sedCommand} neo com.hartwig.hmftools.neo.scorer.NeoScorer Xmx~{javaXmx} -XX:ParallelGCThreads=1 \ -sample ~{sampleId} \ From 5a5bc99aeccc812ba666c308306cc7390e5933fe Mon Sep 17 00:00:00 2001 From: DavyCats Date: Fri, 29 Sep 2023 11:42:20 +0200 Subject: [PATCH 75/75] fix missing placholder syntax --- hmftools.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hmftools.wdl b/hmftools.wdl index 367668ac..15f89e64 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -1044,7 +1044,7 @@ task NeoScorer { set -e mkdir -p ~{outputDir} mkdir isofox - cp isofoxDir/* ./isofox/ + cp ~{isofoxDir}/* ./isofox/ ~{sedCommand} neo com.hartwig.hmftools.neo.scorer.NeoScorer Xmx~{javaXmx} -XX:ParallelGCThreads=1 \ -sample ~{sampleId} \