From 58613448dbe059896e997ba29f5109691ab45f68 Mon Sep 17 00:00:00 2001
From: DavyCats <davycats.dc@gmail.com>
Date: Tue, 26 Jul 2022 16:54:37 +0200
Subject: [PATCH 01/75] update some tool versions

---
 gridss.wdl   |  21 ++++----
 hmftools.wdl | 137 +++++++--------------------------------------------
 2 files changed, 30 insertions(+), 128 deletions(-)

diff --git a/gridss.wdl b/gridss.wdl
index 35e41d21..27dedf32 100644
--- a/gridss.wdl
+++ b/gridss.wdl
@@ -34,7 +34,7 @@ task AnnotateInsertedSequence {
         Int threads = 8
         String javaXmx = "8G"
         String memory = "9G"
-        String dockerImage = "quay.io/biowdl/gridss:2.12.2"
+        String dockerImage = "quay.io/biocontainers/gridss:2.13.2--h20b1175_1" #TODO check if we still need our own patched image
         Int timeMinutes = 120
     }
 
@@ -165,7 +165,7 @@ task GRIDSS {
         Int nonJvmMemoryGb = 50
         Int threads = 4
         Int timeMinutes = ceil(7200 / threads) + 1800
-        String dockerImage = "quay.io/biowdl/gridss:2.12.2"
+        String dockerImage = "quay.io/biocontainers/gridss:2.13.2--h20b1175_1" #TODO check if we still need our own patched image
     }
 
     command {
@@ -241,14 +241,14 @@ task GridssAnnotateVcfRepeatmasker {
 
         String memory = "25G"
         Int threads = 8
-        String dockerImage = "quay.io/biowdl/gridss:2.12.2"
+        String dockerImage = "quay.io/biocontainers/gridss:2.13.2--h20b1175_1" #TODO check if we still need our own patched image
         Int timeMinutes = 1440
     }
 
     command {
         gridss_annotate_vcf_repeatmasker \
         --output ~{outputPath} \
-        --jar /usr/local/share/gridss-2.12.2-0/gridss.jar \
+        --jar /usr/local/share/gridss-2.13.2-1/gridss.jar \
         -w . \
         -t ~{threads} \
         ~{gridssVcf}
@@ -289,9 +289,10 @@ task Virusbreakend {
         File virusbreakendDB
         String outputPath = "./virusbreakend.vcf"
 
-        String memory = "75G"
+        String extraMemoryGB = 10
+        Int gridssMemoryGB = 60
         Int threads = 8
-        String dockerImage = "quay.io/biowdl/gridss:2.12.2"
+        String dockerImage = "quay.io/biocontainers/gridss:2.13.2--h20b1175_1" #TODO check if we still need our own patched image
         Int timeMinutes = 180
     }
 
@@ -304,8 +305,9 @@ task Virusbreakend {
         --workingdir . \
         --reference ~{referenceFasta} \
         --db virusbreakenddb \
-        --jar /usr/local/share/gridss-2.12.2-0/gridss.jar \
+        --jar /usr/local/share/gridss-2.13.2-1/gridss.jar \
         -t ~{threads} \
+        --gridssargs '--jvmheap ~{gridssMemoryGB}G' \
         ~{bam}
     }
 
@@ -316,7 +318,7 @@ task Virusbreakend {
 
     runtime {
         cpu: threads
-        memory: memory
+        memory: "~{gridssMemoryGB + extraMemoryGB}G"
         time_minutes: timeMinutes # !UnknownRuntimeKey
         docker: dockerImage
     }
@@ -328,7 +330,8 @@ task Virusbreakend {
         referenceImg: {description: "The BWA index image (generated with GATK BwaMemIndexImageCreator) of the reference.", category: "required"}
         virusbreakendDB: {description: "A .tar.gz containing the virusbreakend database.", category: "required"}
         outputPath: {description: "The path the output should be written to.", category: "common"}
-        memory: {description: "The amount of memory this job will use.", category: "advanced"}
+        extraMemoryGB: {description: "Extra memory needed for the job in GB.", category: "advanced"}
+        gridssMemoryGB: {description: "Memory assigned to GRIDSS in GB.", category: "advanced"}
         threads: {description: "The number of the threads to use.", category: "advanced"}
         timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"}
         dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.",
diff --git a/hmftools.wdl b/hmftools.wdl
index 628e2f9b..7d87a982 100644
--- a/hmftools.wdl
+++ b/hmftools.wdl
@@ -38,7 +38,7 @@ task Amber {
         String memory = "70G"
         String javaXmx = "64G"
         Int timeMinutes = 240
-        String dockerImage = "quay.io/biocontainers/hmftools-amber:3.5--0"
+        String dockerImage = "quay.io/biocontainers/hmftools-amber:3.9--hdfd78af_0"
     }
 
     command {
@@ -115,7 +115,7 @@ task Cobalt {
         String memory = "5G"
         String javaXmx = "4G"
         Int timeMinutes = 240
-        String dockerImage = "quay.io/biocontainers/hmftools-cobalt:1.11--0"
+        String dockerImage = "quay.io/biocontainers/hmftools-cobalt:1.13--hdfd78af_0"
     }
 
     command {
@@ -342,16 +342,17 @@ task Gripss {
         File knownFusionPairBedpe
         File breakendPon
         File breakpointPon
-        String referenceName
-        String tumorName
+        String? referenceName
+        String sampleName
         File vcf
         File vcfIndex
+        String outputId
         String outputDir = "./"
 
         String memory = "17G"
         String javaXmx = "16G"
         Int timeMinutes = 50
-        String dockerImage = "quay.io/biocontainers/hmftools-gripss:2.0--hdfd78af_0"
+        String dockerImage = "quay.io/biocontainers/hmftools-gripss:2.1--hdfd78af_0"
     }
 
     command {
@@ -362,11 +363,11 @@ task Gripss {
         -known_hotspot_file ~{knownFusionPairBedpe} \
         -pon_sgl_file ~{breakendPon} \
         -pon_sv_file ~{breakpointPon} \
-        -reference ~{referenceName} \
+        ~{"-reference " + referenceName} \
         -sample ~{tumorName} \
         -vcf ~{vcf} \
         -output_dir ~{outputDir} \
-        -output_id somatic
+        -output_id ~{outputId}
     }
 
     output {
@@ -405,116 +406,6 @@ task Gripss {
     }
 }
 
-task GripssApplicationKt {
-    # Obsolete
-    input {
-        File inputVcf
-        String outputPath = "gripss.vcf.gz"
-        String tumorName
-        String referenceName
-        File referenceFasta
-        File referenceFastaFai
-        File referenceFastaDict
-        File breakpointHotspot
-        File breakendPon
-        File breakpointPon
-
-        String memory = "32G"
-        String javaXmx = "31G"
-        Int timeMinutes = 45
-        String dockerImage = "quay.io/biocontainers/hmftools-gripss:1.11--hdfd78af_0"
-    }
-
-    command {
-        java -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \
-        -cp /usr/local/share/hmftools-gripss-1.11-0/gripss.jar \
-        com.hartwig.hmftools.gripss.GripssApplicationKt \
-        -tumor ~{tumorName} \
-        -reference ~{referenceName} \
-        -ref_genome ~{referenceFasta} \
-        -breakpoint_hotspot ~{breakpointHotspot} \
-        -breakend_pon ~{breakendPon} \
-        -breakpoint_pon ~{breakpointPon} \
-        -input_vcf ~{inputVcf} \
-        -output_vcf ~{outputPath} \
-        -paired_normal_tumor_ordinals
-    }
-
-    output {
-        File outputVcf = outputPath
-        File outputVcfIndex = outputPath + ".tbi"
-    }
-
-    runtime {
-        memory: memory
-        time_minutes: timeMinutes # !UnknownRuntimeKey
-        docker: dockerImage
-    }
-
-    parameter_meta {
-        inputVcf: {description: "The input VCF.", category: "required"}
-        outputPath: {description: "The path where th eoutput VCF will be written.", category: "common"}
-        referenceName: {description: "The name of the normal sample.", category: "required"}
-        tumorName: {description: "The name of the tumor sample.", category: "required"}
-        referenceFasta: {description: "The reference fasta file.", category: "required"}
-        referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.",
-                             category: "required"}
-        referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"}
-        breakpointHotspot: {description: "Equivalent to the `-breakpoint_hotspot` option.", category: "required"}
-        breakendPon: {description: "Equivalent to the `-breakend_pon` option.", category: "required"}
-        breakpointPon: {description: "Equivalent to the `-breakpoint_pon` option.", category: "required"}
-        memory: {description: "The amount of memory this job will use.", category: "advanced"}
-        javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.",
-                  category: "advanced"}
-        timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"}
-        dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.",
-                      category: "advanced"}
-    }
-}
-
-task GripssHardFilterApplicationKt {
-    # Obsolete
-    input {
-        File inputVcf
-        String outputPath = "gripss_hard_filter.vcf.gz"
-
-        String memory = "3G"
-        String javaXmx = "2G"
-        Int timeMinutes = 15
-        String dockerImage = "quay.io/biocontainers/hmftools-gripss:1.11--hdfd78af_0"
-    }
-
-    command {
-        java -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \
-        -cp /usr/local/share/hmftools-gripss-1.11-0/gripss.jar \
-        com.hartwig.hmftools.gripss.GripssHardFilterApplicationKt \
-        -input_vcf ~{inputVcf} \
-        -output_vcf ~{outputPath}
-    }
-
-    output {
-        File outputVcf = outputPath
-        File outputVcfIndex = outputPath + ".tbi"
-    }
-
-    runtime {
-        memory: memory
-        time_minutes: timeMinutes # !UnknownRuntimeKey
-        docker: dockerImage
-    }
-
-    parameter_meta {
-        inputVcf: {description: "The input VCF.", category: "required"}
-        outputPath: {description: "The path where th eoutput VCF will be written.", category: "common"}
-        memory: {description: "The amount of memory this job will use.", category: "advanced"}
-        javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.",
-                  category: "advanced"}
-        timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"}
-        dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.",
-                      category: "advanced"}
-    }
-}
-
 task HealthChecker {
     input {
         String outputDir = "."
@@ -1212,6 +1103,11 @@ task Sage {
         Boolean hg38 = false
         Boolean panelOnly = false
         String outputPath = "./sage.vcf.gz"
+        #The following should be in the same directory.
+        File geneDataCsv
+        File proteinFeaturesCsv
+        File transExonDataCsv
+        File transSpliceDataCsv
 
         String? referenceName
         File? referenceBam
@@ -1229,7 +1125,7 @@ task Sage {
         String javaXmx = "50G"
         String memory = "51G"
         Int timeMinutes = 1 + ceil(size(select_all([tumorBam, referenceBam]), "G") * 9 / threads)
-        String dockerImage = "quay.io/biocontainers/hmftools-sage:2.8--hdfd78af_1"
+        String dockerImage = "quay.io/biocontainers/hmftools-sage:3.0.3--hdfd78af_0"
     }
 
     command {
@@ -1242,7 +1138,10 @@ task Sage {
         -hotspots ~{hotspots} \
         -panel_bed ~{panelBed} \
         -high_confidence_bed ~{highConfidenceBed} \
-        -assembly ~{true="hg38" false="hg19" hg38} \
+        -ref_genome_version ~{true="hg38" false="hg19" hg38} \
+        -ensembl_data_dir ~{sub(geneDataCsv, basename(geneDataCsv), "")} \
+        -write_bqr_data \
+        -write_bqr_plot \
         ~{"-hotspot_min_tumor_qual " + hotspotMinTumorQual} \
         ~{"-panel_min_tumor_qual " + panelMinTumorQual} \
         ~{"-hotspot_max_germline_vaf " + hotspotMaxGermlineVaf} \

From 814184efd50fd87dc1d478d0a88d7af04816130b Mon Sep 17 00:00:00 2001
From: DavyCats <davycats.dc@gmail.com>
Date: Thu, 28 Jul 2022 16:14:48 +0200
Subject: [PATCH 02/75] start adjusting pave command

---
 hmftools.wdl | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/hmftools.wdl b/hmftools.wdl
index 7d87a982..9d84f253 100644
--- a/hmftools.wdl
+++ b/hmftools.wdl
@@ -790,11 +790,14 @@ task Pave {
         File proteinFeaturesCsv
         File transExonDataCsv
         File transSpliceDataCsv
+        File mappabilityBed
+        File? ponFile
+        File? ponArtefactFile
 
         Int timeMinutes = 50
         String javaXmx = "8G"
         String memory = "9G"
-        String dockerImage = "quay.io/biowdl/pave:v1.0"
+        String dockerImage = "quay.io/biowdl/pave:v1.2.2"
     }
 
     command {
@@ -807,7 +810,11 @@ task Pave {
         -ensembl_data_dir ~{sub(geneDataCsv, basename(geneDataCsv), "")} \
         -ref_genome ~{referenceFasta} \
         -ref_genome_version ~{refGenomeVersion} \
-        -driver_gene_panel ~{driverGenePanel}
+        -driver_gene_panel ~{driverGenePanel} \
+        -read_pass_only \
+        -mappability_bed ~{mappabilityBed} \
+        ~{"-pon_file " + ponFile} \
+        ~{"-pon_artefact_file " + ponArtefactFile} \
     }
 
     output {

From 24bb770c2330afc53f1ddcbe01b43e2998904683 Mon Sep 17 00:00:00 2001
From: DavyCats <davycats.dc@gmail.com>
Date: Thu, 28 Jul 2022 16:17:29 +0200
Subject: [PATCH 03/75] start adjusting pave command

---
 hmftools.wdl | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/hmftools.wdl b/hmftools.wdl
index 9d84f253..ddbc890e 100644
--- a/hmftools.wdl
+++ b/hmftools.wdl
@@ -793,6 +793,8 @@ task Pave {
         File mappabilityBed
         File? ponFile
         File? ponArtefactFile
+        String? ponFilters
+        Array[File]+?
 
         Int timeMinutes = 50
         String javaXmx = "8G"
@@ -815,6 +817,9 @@ task Pave {
         -mappability_bed ~{mappabilityBed} \
         ~{"-pon_file " + ponFile} \
         ~{"-pon_artefact_file " + ponArtefactFile} \
+        ~{if defined(ponFilters) then ("-pon_filters '" + ponFilters + "'") else ""} \
+        ~{if defined(gnomadFreqDir) then "-gnomad_freq_dir " + sub(gnomadFreqDir[0], basename(gnomadFreqDir[0]), "") else ""} \
+        ~{if defined(gnomadFreqDir) then "-gnomad_load_chr_on_demand" else ""}
     }
 
     output {

From 67cffb398345d1e7c84d9bd5edead18ea1239c81 Mon Sep 17 00:00:00 2001
From: DavyCats <davycats.dc@gmail.com>
Date: Fri, 29 Jul 2022 15:25:51 +0200
Subject: [PATCH 04/75] add more optinal inputs to Pave

---
 hmftools.wdl | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/hmftools.wdl b/hmftools.wdl
index ddbc890e..5307489b 100644
--- a/hmftools.wdl
+++ b/hmftools.wdl
@@ -794,7 +794,12 @@ task Pave {
         File? ponFile
         File? ponArtefactFile
         String? ponFilters
-        Array[File]+?
+        Array[File]+? gnomadFreqDir
+        File? clinvarVcf
+        File? clinvarVcfIndex
+        File? blacklistVcf
+        File? blacklistBed
+        File? blacklistVcfIndex
 
         Int timeMinutes = 50
         String javaXmx = "8G"
@@ -819,7 +824,10 @@ task Pave {
         ~{"-pon_artefact_file " + ponArtefactFile} \
         ~{if defined(ponFilters) then ("-pon_filters '" + ponFilters + "'") else ""} \
         ~{if defined(gnomadFreqDir) then "-gnomad_freq_dir " + sub(gnomadFreqDir[0], basename(gnomadFreqDir[0]), "") else ""} \
-        ~{if defined(gnomadFreqDir) then "-gnomad_load_chr_on_demand" else ""}
+        ~{if defined(gnomadFreqDir) then "-gnomad_load_chr_on_demand" else ""} \
+        ~{"-clinvar_vcf " + clinvarVcf} \
+        ~{"-blacklist_bed " + blacklistBed} \
+        ~{"-blacklist_vcf " + blacklistVcf}
     }
 
     output {

From 7462967c94534014fe0125cc26d97f3c41fbb624 Mon Sep 17 00:00:00 2001
From: DavyCats <davycats.dc@gmail.com>
Date: Fri, 29 Jul 2022 15:30:20 +0200
Subject: [PATCH 05/75] adjust purple command

---
 hmftools.wdl | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/hmftools.wdl b/hmftools.wdl
index 5307489b..03cbae3d 100644
--- a/hmftools.wdl
+++ b/hmftools.wdl
@@ -978,9 +978,11 @@ task Purple {
         File referenceFasta
         File referenceFastaFai
         File referenceFastaDict
+        String refGenomeVersion
         File driverGenePanel
         File somaticHotspots
         File germlineHotspots
+        File germlineDelFreqFile
         #The following should be in the same directory.
         File geneDataCsv
         File proteinFeaturesCsv
@@ -1000,6 +1002,7 @@ task Purple {
         -reference ~{referenceName} \
         -germline_vcf ~{germlineVcf} \
         -germline_hotspots ~{germlineHotspots} \
+        -germline_del_freq_file ~{germlineDelFreqFile} \
         -tumor ~{tumorName} \
         -output_dir ~{outputDir} \
         -amber ~{sub(amberOutput[0], basename(amberOutput[0]), "")} \
@@ -1010,6 +1013,7 @@ task Purple {
         -sv_recovery_vcf ~{fullSvVcf} \
         -circos /usr/local/bin/circos \
         -ref_genome ~{referenceFasta} \
+        -ref_genome_version ~{refGenomeVersion} \
         -ensembl_data_dir ~{sub(geneDataCsv, basename(geneDataCsv), "")} \
         -run_drivers \
         -somatic_hotspots ~{somaticHotspots} \

From 09b9db706d7e344c2a0b7e6e2d7a9949c96837ec Mon Sep 17 00:00:00 2001
From: DavyCats <davycats.dc@gmail.com>
Date: Fri, 5 Aug 2022 12:07:21 +0200
Subject: [PATCH 06/75] update pave parameter_meta

---
 hmftools.wdl | 15 +++++++++++++--
 1 file changed, 13 insertions(+), 2 deletions(-)

diff --git a/hmftools.wdl b/hmftools.wdl
index 90256ef6..6d6ffec1 100644
--- a/hmftools.wdl
+++ b/hmftools.wdl
@@ -785,12 +785,14 @@ task Pave {
         File referenceFastaDict
         String refGenomeVersion
         File driverGenePanel
+        File mappabilityBed
+
         #The following should be in the same directory.
         File geneDataCsv
         File proteinFeaturesCsv
         File transExonDataCsv
         File transSpliceDataCsv
-        File mappabilityBed
+
         File? ponFile
         File? ponArtefactFile
         String? ponFilters
@@ -856,7 +858,16 @@ task Pave {
         proteinFeaturesCsv: {description: "A  CSV file containing protein feature information, must be in the same directory as `geneDataCsv`, `transExonDataCsv` and `transSpliceDataCsv`.", category: "required"}
         transExonDataCsv: {description: "A  CSV file containing transcript exon information, must be in the same directory as `geneDataCsv`, `proteinFeaturesCsv` and `transSpliceDataCsv`.", category: "required"}
         transSpliceDataCsv: {description: "A  CSV file containing transcript splicing information, must be in the same directory as `geneDataCsv`, `proteinFeaturesCsv` and `transExonDataCsv`.", category: "required"}
-
+        mappabilityBed: {description: "A bed file with mappability information.", category: "required"}
+        ponFile: {description: "A panel of normals files.", category: "common"}
+        ponArtefactFile: {description: "A panel of normals artefact file.", category: "common"}
+        ponFilters: {description: "Filters to be applied based on the panel of normals.", category: "common"}
+        gnomadFreqDir: {description: "A directory with gnomad frequency information.", category: "common"}
+        clinvarVcf: {description: "A clinvar VCF file.", category: "common"}
+        clinvarVcfIndex: {description: "The index for the clinvar VCF file.", category: "common"}
+        blacklistVcf: {description: "A blacklist VCF file.", category: "common"}
+        blacklistBed: {description: "A blacklist bed file.", category: "common"}
+        blacklistVcfIndex: {description: "The index for the blacklist vcf file.", category: "common"}
         memory: {description: "The amount of memory this job will use.", category: "advanced"}
         javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.",
                   category: "advanced"}

From 89c709b1aa222c51dd33470186540b4099edd3eb Mon Sep 17 00:00:00 2001
From: DavyCats <davycats.dc@gmail.com>
Date: Fri, 5 Aug 2022 15:29:43 +0200
Subject: [PATCH 07/75] changes for WGSinCancerDiagnostics

---
 hmftools.wdl | 71 ++++++++++++++++++++++++++++++++++------------------
 sambamba.wdl | 45 +++++++++++++++++++++++++++++++++
 2 files changed, 91 insertions(+), 25 deletions(-)

diff --git a/hmftools.wdl b/hmftools.wdl
index 6d6ffec1..07b708db 100644
--- a/hmftools.wdl
+++ b/hmftools.wdl
@@ -364,17 +364,17 @@ task Gripss {
         -pon_sgl_file ~{breakendPon} \
         -pon_sv_file ~{breakpointPon} \
         ~{"-reference " + referenceName} \
-        -sample ~{tumorName} \
+        -sample ~{sampleName} \
         -vcf ~{vcf} \
         -output_dir ~{outputDir} \
         -output_id ~{outputId}
     }
 
     output {
-        File fullVcf = "~{outputDir}/~{tumorName}.gripss.somatic.vcf.gz"
-        File fullVcfIndex = "~{outputDir}/~{tumorName}.gripss.somatic.vcf.gz.tbi"
-        File filteredVcf = "~{outputDir}/~{tumorName}.gripss.filtered.somatic.vcf.gz"
-        File filteredVcfIndex = "~{outputDir}/~{tumorName}.gripss.filtered.somatic.vcf.gz.tbi"
+        File fullVcf = "~{outputDir}/~{sampleName}.gripss.somatic.vcf.gz"
+        File fullVcfIndex = "~{outputDir}/~{sampleName}.gripss.somatic.vcf.gz.tbi"
+        File filteredVcf = "~{outputDir}/~{sampleName}.gripss.filtered.somatic.vcf.gz"
+        File filteredVcfIndex = "~{outputDir}/~{sampleName}.gripss.filtered.somatic.vcf.gz.tbi"
     }
 
     runtime {
@@ -391,7 +391,7 @@ task Gripss {
         knownFusionPairBedpe: {description: "Equivalent to the `-known_hotspot_file` option.", category: "required"}
         breakendPon: {description: "Equivalent to the `-pon_sgl_file` option.", category: "required"}
         breakpointPon: {description: "Equivalent to the `-pon_sv_file` option.", category: "required"}
-        tumorName: {description: "The name of the tumor sample.", category: "required"}
+        sampleName: {description: "The name of the tumor sample.", category: "required"}
         referenceName: {description: "The name of the normal sample.", category: "required"}
         vcf: {description: "The input VCF.", category: "required"}
         vcfIndex: {description: "The index for the input VCF.", category: "required"}
@@ -420,7 +420,7 @@ task HealthChecker {
         String javaXmx = "2G"
         String memory = "1G"
         Int timeMinutes = 1
-        String dockerImage = "quay.io/biowdl/health-checker:3.2"
+        String dockerImage = "quay.io/biowdl/health-checker:3.4"
     }
 
     command {
@@ -481,14 +481,20 @@ task Linx {
         String sampleName
         File svVcf
         File svVcfIndex
-        Array[File]+ purpleOutput
+        Array[File] purpleOutput = []
         String refGenomeVersion
         String outputDir = "./linx"
-        File fragileSiteCsv
+        File? fragileSiteCsv
         File lineElementCsv
-        File knownFusionCsv
+        File? knownFusionCsv
         File driverGenePanel
         Boolean writeAllVisFusions = false
+        Boolean germline = false
+        Boolean checkFusions = true
+        Boolean checkDrivers = true
+        Boolean writeVisData = true
+        File? germlinePonSvFile
+        File? germlinePonSglFile
         #The following should be in the same directory.
         File geneDataCsv
         File proteinFeaturesCsv
@@ -498,26 +504,34 @@ task Linx {
         String memory = "9G"
         String javaXmx = "8G"
         Int timeMinutes = 10
-        String dockerImage = "quay.io/biocontainers/hmftools-linx:1.18--hdfd78af_0"
+        String dockerImage = "quay.io/biowdl/linx:1.19.1" #patched version of biocontainer
+
+        String? DONOTDEFINE
     }
 
+    String? purpleDir = if length(purpleOutput) > 0
+        then sub(purpleOutput[0], basename(purpleOutput[0]), "")
+        else DONOTDEFINE
+
     command {
         linx -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \
         -sample ~{sampleName} \
         -sv_vcf ~{svVcf} \
-        -purple_dir ~{sub(purpleOutput[0], basename(purpleOutput[0]), "")} \
+        ~{"-purple_dir " + purpleDir} \
         -ref_genome_version ~{refGenomeVersion} \
         -output_dir ~{outputDir} \
-        -fragile_site_file ~{fragileSiteCsv} \
+        ~{"-fragile_site_file " + fragileSiteCsv} \
         -line_element_file ~{lineElementCsv} \
         -ensembl_data_dir ~{sub(geneDataCsv, basename(geneDataCsv), "")} \
-        -check_fusions \
-        -known_fusion_file ~{knownFusionCsv} \
-        -check_drivers \
+        ~{if checkFusions then "-check_fusions" else ""} \
+        ~{"-known_fusion_file " + knownFusionCsv} \
+        ~{if checkDrivers then "-check_drivers" else ""} \
         -driver_gene_panel ~{driverGenePanel} \
-        -chaining_sv_limit 0 \
-        -write_vis_data \
-        ~{if writeAllVisFusions then "-write_all_vis_fusions" else ""}
+        ~{if writeVisData then "-write_vis_data" else ""} \
+        ~{if writeAllVisFusions then "-write_all_vis_fusions" else ""} \
+        ~{if germline then "-germline" else ""} \
+        ~{"-germline_pon_sv_file " + germlinePonSvFile} \
+        ~{"-germline_pon_sgl_file " + germlinePonSglFile}
     }
 
     output {
@@ -584,14 +598,14 @@ task LinxVisualisations {
         String memory = "9G"
         String javaXmx = "8G"
         Int timeMinutes = 1440
-        String dockerImage = "quay.io/biocontainers/hmftools-linx:1.18--hdfd78af_0"
+        String dockerImage = "quay.io/biowdl/linx:1.19.1" #patched version of biocontainer
     }
 
     command {
         set -e
         mkdir -p ~{outputDir}
         java -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \
-        -cp /usr/local/share/hmftools-linx-1.18-0/sv-linx.jar \
+        -cp /usr/local/share/hmftools-linx-1.19-0/linx.jar \
         com.hartwig.hmftools.linx.visualiser.SvVisualiser \
         -sample ~{sample} \
         -ref_genome_version ~{refGenomeVersion} \
@@ -786,6 +800,7 @@ task Pave {
         String refGenomeVersion
         File driverGenePanel
         File mappabilityBed
+        Array[File] gnomadFreqFiles = []
 
         #The following should be in the same directory.
         File geneDataCsv
@@ -796,7 +811,6 @@ task Pave {
         File? ponFile
         File? ponArtefactFile
         String? ponFilters
-        Array[File]+? gnomadFreqDir
         File? clinvarVcf
         File? clinvarVcfIndex
         File? blacklistVcf
@@ -807,8 +821,14 @@ task Pave {
         String javaXmx = "8G"
         String memory = "9G"
         String dockerImage = "quay.io/biowdl/pave:v1.2.2"
+
+        String? DONOTDEFINE
     }
 
+    String? gnomadFreqDir = if length(gnomadFreqFiles) > 0
+        then sub(gnomadFreqFiles[0], basename(gnomadFreqFiles[0]), "")
+        else DONOTDEFINE
+
     command {
         set -e
         mkdir -p ~{outputDir}
@@ -825,7 +845,7 @@ task Pave {
         ~{"-pon_file " + ponFile} \
         ~{"-pon_artefact_file " + ponArtefactFile} \
         ~{if defined(ponFilters) then ("-pon_filters '" + ponFilters + "'") else ""} \
-        ~{if defined(gnomadFreqDir) then "-gnomad_freq_dir " + sub(gnomadFreqDir[0], basename(gnomadFreqDir[0]), "") else ""} \
+        ~{"-gnomad_freq_dir " + gnomadFreqDir} \
         ~{if defined(gnomadFreqDir) then "-gnomad_load_chr_on_demand" else ""} \
         ~{"-clinvar_vcf " + clinvarVcf} \
         ~{"-blacklist_bed " + blacklistBed} \
@@ -862,7 +882,7 @@ task Pave {
         ponFile: {description: "A panel of normals files.", category: "common"}
         ponArtefactFile: {description: "A panel of normals artefact file.", category: "common"}
         ponFilters: {description: "Filters to be applied based on the panel of normals.", category: "common"}
-        gnomadFreqDir: {description: "A directory with gnomad frequency information.", category: "common"}
+        gnomadFreqFiles: {description: "A directory with gnomad frequency information.", category: "common"}
         clinvarVcf: {description: "A clinvar VCF file.", category: "common"}
         clinvarVcfIndex: {description: "The index for the clinvar VCF file.", category: "common"}
         blacklistVcf: {description: "A blacklist VCF file.", category: "common"}
@@ -1005,7 +1025,8 @@ task Purple {
         String memory = "9G"
         String javaXmx = "8G"
         # clone of quay.io/biocontainers/hmftools-purple:3.2--hdfd78af_0 with 'ln -s /usr/local/lib/libwebp.so.7 /usr/local/lib/libwebp.so.6'
-        String dockerImage = "quay.io/biowdl/hmftools-purple:3.2"
+        #String dockerImage = "quay.io/biowdl/hmftools-purple:3.2" FIXME see if biocontainer works
+        String dockerImage = "quay.io/biocontainers/hmftools-purple:3.5--hdfd78af_0"
     }
 
     command {
diff --git a/sambamba.wdl b/sambamba.wdl
index 6696668a..e3c3fa38 100644
--- a/sambamba.wdl
+++ b/sambamba.wdl
@@ -139,6 +139,51 @@ task Markdup {
     }
 }
 
+task Slice {
+    input {
+        File bamFile
+        File bamIndex
+        String outputPath = "./sliced.bam"
+        File regions
+
+        String memory = "8G"
+        Int timeMinutes = 720
+        String dockerImage = "quay.io/biocontainers/sambamba:0.7.1--h148d290_2"
+    }
+
+    command {
+        set -e
+        mkdir -p "$(dirname ~{outputPath})"
+
+        sambamba slice \
+        -L ~{regions} \
+        -o ~{outputPath} \
+        ~{bamFile}
+    }
+
+    output {
+        File slicedBam = outputPath
+        File slicedBamIndex = sub(outputPath, "\.bam$", ".bai")
+    }
+
+    runtime {
+        memory: memory
+        time_minutes: timeMinutes
+        docker: dockerImage
+    }
+
+    parameter_meta {
+        bamIndex: {description: "The input BAM files.", category: "required"}
+        outputPath: {description: "Output directory path + output file.", category: "required"}
+        regions: {description: "Regiosn to get sliced.", category: "required"}
+
+        memory: {description: "The amount of memory this job will use.", category: "advanced"}
+        timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"}
+        dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.",
+                      category: "advanced"}
+    }
+}
+
 task Sort {
     input {
         File inputBam

From 8308cc95eec0675374b3e09d1b165f3f93fed199 Mon Sep 17 00:00:00 2001
From: DavyCats <davycats.dc@gmail.com>
Date: Thu, 11 Aug 2022 15:19:50 +0200
Subject: [PATCH 08/75] start adding lilac

---
 hmftools.wdl | 59 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 59 insertions(+)

diff --git a/hmftools.wdl b/hmftools.wdl
index 07b708db..b87fcedd 100644
--- a/hmftools.wdl
+++ b/hmftools.wdl
@@ -476,6 +476,65 @@ task HealthChecker {
     }
 }
 
+task Lilac {
+    input {
+        String tumorName
+        File referenceBam
+        File referenceBamIndex
+        File tumorBam
+        File tumorBamIndex
+        String refGenomeVersion
+        File referenceFasta
+        File referenceFastaFai
+        File referenceFastaDict
+        File geneCopyNumberFile
+        File somaticVariantsFile
+        File somaticVariantsFileIndex
+        String outputDir = "./lilac"
+
+        #The following need to be in the same directory
+        File hlaRefAminoacidSequencesCsv
+        File hlaRefNucleotideSequencesCsv
+        File lilacAlleleFrequenciesCsv
+
+        String javaXmx = "15G"
+        String memory = "16G"
+        Int timeMinutes = 1440 #FIXME
+        Int threads = 1
+        String dockerImage = "" #TODO
+    }
+
+    command {
+        LILAC -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \
+        -sample ~{tumorName} \
+        -reference_bam ~{referenceBam} \
+        -ref_genome ~{referenceFasta} \
+        -ref_genome_version ~{refGenomeVersion} \
+        -resource_dir ~{sub(hlaRefAminoacidSequencesCsv, basename(hlaRefAminoacidSequencesCsv), "")} \
+        -outputDir ~{outputDir} \
+        -threads ~{threads} \
+        -tumor_bam ~{tumorBam} \
+        -gene_copy_number_file ~{geneCopyNumberFile} \
+        -somatic_variants_file ~{somaticVariantsFile}
+    }
+
+    output {
+        #TODO
+    }
+
+    runtime {
+        memory: memory
+        cpu: threads
+        time_minutes: timeMinutes # !UnknownRuntimeKey
+        docker: dockerImage
+    }
+
+    parameter_meta {
+
+    }
+}
+
+
 task Linx {
     input {
         String sampleName

From 82e876c886a2df217125fa0163f0a637eb9c8d24 Mon Sep 17 00:00:00 2001
From: DavyCats <davycats.dc@gmail.com>
Date: Thu, 18 Aug 2022 16:08:07 +0200
Subject: [PATCH 09/75] fix some issues

---
 gridss.wdl   | 4 ++--
 hmftools.wdl | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/gridss.wdl b/gridss.wdl
index 95b081f9..cd310244 100644
--- a/gridss.wdl
+++ b/gridss.wdl
@@ -34,7 +34,7 @@ task AnnotateInsertedSequence {
         Int threads = 8
         String javaXmx = "8G"
         String memory = "9G"
-        String dockerImage = "quay.io/biocontainers/gridss:2.13.2--h20b1175_1" #TODO check if we still need our own patched image
+        String dockerImage = "quay.io/biowdl/gridss:2.13.2" #TODO check if we still need our own patched image
         Int timeMinutes = 120
     }
 
@@ -289,7 +289,7 @@ task Virusbreakend {
         File virusbreakendDB
         String outputPath = "./virusbreakend.vcf"
 
-        String extraMemoryGB = 10
+        Int extraMemoryGB = 10
         Int gridssMemoryGB = 60
         Int threads = 12
         String dockerImage = "quay.io/biocontainers/gridss:2.13.2--h20b1175_1" #TODO check if we still need our own patched image
diff --git a/hmftools.wdl b/hmftools.wdl
index b87fcedd..c381c321 100644
--- a/hmftools.wdl
+++ b/hmftools.wdl
@@ -114,7 +114,7 @@ task Cobalt {
         Int threads = 1
         String memory = "5G"
         String javaXmx = "4G"
-        Int timeMinutes = 480
+        Int timeMinutes = 960
         String dockerImage = "quay.io/biocontainers/hmftools-cobalt:1.13--hdfd78af_0"
     }
 

From 1ea966ca383831448627ada50e8c1eaeaafcbc3e Mon Sep 17 00:00:00 2001
From: DavyCats <davycats.dc@gmail.com>
Date: Fri, 19 Aug 2022 12:08:14 +0200
Subject: [PATCH 10/75] add docker for lilac, increase memory for cobalt

---
 hmftools.wdl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/hmftools.wdl b/hmftools.wdl
index c381c321..443d2da5 100644
--- a/hmftools.wdl
+++ b/hmftools.wdl
@@ -501,7 +501,7 @@ task Lilac {
         String memory = "16G"
         Int timeMinutes = 1440 #FIXME
         Int threads = 1
-        String dockerImage = "" #TODO
+        String dockerImage = "quay.io/biocontainers/hmftools-lilac:1.1--hdfd78af_0" #TODO
     }
 
     command {

From 7fe51ebbb0011f498a11f75a7379279d6fc4be70 Mon Sep 17 00:00:00 2001
From: DavyCats <davycats.dc@gmail.com>
Date: Fri, 19 Aug 2022 15:47:18 +0200
Subject: [PATCH 11/75] fix sage ref genome version

---
 hmftools.wdl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/hmftools.wdl b/hmftools.wdl
index 443d2da5..b7611ee8 100644
--- a/hmftools.wdl
+++ b/hmftools.wdl
@@ -1253,7 +1253,7 @@ task Sage {
         -hotspots ~{hotspots} \
         -panel_bed ~{panelBed} \
         -high_confidence_bed ~{highConfidenceBed} \
-        -ref_genome_version ~{true="hg38" false="hg19" hg38} \
+        -ref_genome_version ~{true="38" false="37" hg38} \
         -ensembl_data_dir ~{sub(geneDataCsv, basename(geneDataCsv), "")} \
         -write_bqr_data \
         -write_bqr_plot \

From 48f8245ab28bda543fe139e21fb91f88049032ac Mon Sep 17 00:00:00 2001
From: DavyCats <davycats.dc@gmail.com>
Date: Mon, 22 Aug 2022 15:47:01 +0200
Subject: [PATCH 12/75] add missing required input for amber

---
 hmftools.wdl | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/hmftools.wdl b/hmftools.wdl
index b7611ee8..f5342ab4 100644
--- a/hmftools.wdl
+++ b/hmftools.wdl
@@ -33,6 +33,7 @@ task Amber {
         File referenceFasta
         File referenceFastaFai
         File referenceFastaDict
+        String refGenomeVersion
 
         Int threads = 2
         String memory = "70G"
@@ -50,6 +51,7 @@ task Amber {
         -output_dir ~{outputDir} \
         -threads ~{threads} \
         -ref_genome ~{referenceFasta} \
+        -ref_genome_version ~{refGenomeVersion} \
         -loci ~{loci}
     }
 
@@ -90,6 +92,7 @@ task Amber {
         referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.",
                              category: "required"}
         referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"}
+        refGenomeVersion: {description: "The version of the reference genome: 37 or 38.", category: "required"}
         threads: {description: "The number of threads the program will use.", category: "advanced"}
         memory: {description: "The amount of memory this job will use.", category: "advanced"}
         javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.",

From 66394c7f93597ee2fbb26405d98925e4e65e9504 Mon Sep 17 00:00:00 2001
From: DavyCats <davycats.dc@gmail.com>
Date: Tue, 23 Aug 2022 13:37:59 +0200
Subject: [PATCH 13/75] fix some issues

---
 gridss.wdl   | 4 ++--
 sambamba.wdl | 3 ++-
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/gridss.wdl b/gridss.wdl
index cd310244..ddc08fd9 100644
--- a/gridss.wdl
+++ b/gridss.wdl
@@ -29,12 +29,12 @@ task AnnotateInsertedSequence {
         File viralReference
         File viralReferenceFai
         File viralReferenceDict
-        File viralReferenceImg
+        Array[File]+ viralReferenceBwaIndex
 
         Int threads = 8
         String javaXmx = "8G"
         String memory = "9G"
-        String dockerImage = "quay.io/biowdl/gridss:2.13.2" #TODO check if we still need our own patched image
+        String dockerImage = "quay.io/biocontainers/gridss:2.13.2--h20b1175_1" #TODO check if we still need our own patched image
         Int timeMinutes = 120
     }
 
diff --git a/sambamba.wdl b/sambamba.wdl
index e3c3fa38..87cfaeb2 100644
--- a/sambamba.wdl
+++ b/sambamba.wdl
@@ -159,11 +159,12 @@ task Slice {
         -L ~{regions} \
         -o ~{outputPath} \
         ~{bamFile}
+        sambamba index ~{outputPath}
     }
 
     output {
         File slicedBam = outputPath
-        File slicedBamIndex = sub(outputPath, "\.bam$", ".bai")
+        File slicedBamIndex = "~{outputPath}.bai"
     }
 
     runtime {

From 1372195c722e3658322c9442af9a3717f5aa0565 Mon Sep 17 00:00:00 2001
From: DavyCats <davycats.dc@gmail.com>
Date: Tue, 23 Aug 2022 13:42:19 +0200
Subject: [PATCH 14/75] fix parameter_meta

---
 gridss.wdl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gridss.wdl b/gridss.wdl
index ddc08fd9..64b8a3f4 100644
--- a/gridss.wdl
+++ b/gridss.wdl
@@ -68,7 +68,7 @@ task AnnotateInsertedSequence {
         viralReference: {description: "A fasta file with viral sequences.", category: "required"}
         viralReferenceFai: {description: "The index for the viral reference fasta.", category: "required"}
         viralReferenceDict: {description: "The dict file for the viral reference.", category: "required"}
-        viralReferenceImg: {description: "The BWA index image (generated with GATK BwaMemIndexImageCreator) of the viral reference.", category: "required"}
+        viralReferenceBwaIndex: {description: "The BWA index files of the viral reference.", category: "required"}
 
         memory: {description: "The amount of memory this job will use.", category: "advanced"}
         javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.",

From b016041643e37ec6ed4277e7e8a5bc2369831747 Mon Sep 17 00:00:00 2001
From: Davy Cats <davycats.dc@gmail.com>
Date: Fri, 26 Aug 2022 14:36:10 +0200
Subject: [PATCH 15/75] fix output names gripss

---
 hmftools.wdl | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/hmftools.wdl b/hmftools.wdl
index f5342ab4..087002a6 100644
--- a/hmftools.wdl
+++ b/hmftools.wdl
@@ -373,11 +373,13 @@ task Gripss {
         -output_id ~{outputId}
     }
 
+    String suffix = if defined(referenceName) then "somatic" else "germline"
+
     output {
-        File fullVcf = "~{outputDir}/~{sampleName}.gripss.somatic.vcf.gz"
-        File fullVcfIndex = "~{outputDir}/~{sampleName}.gripss.somatic.vcf.gz.tbi"
-        File filteredVcf = "~{outputDir}/~{sampleName}.gripss.filtered.somatic.vcf.gz"
-        File filteredVcfIndex = "~{outputDir}/~{sampleName}.gripss.filtered.somatic.vcf.gz.tbi"
+        File fullVcf = "~{outputDir}/~{sampleName}.gripss.~{suffix}.vcf.gz"
+        File fullVcfIndex = "~{outputDir}/~{sampleName}.gripss.~{suffix}.vcf.gz.tbi"
+        File filteredVcf = "~{outputDir}/~{sampleName}.gripss.filtered.~{suffix}.vcf.gz"
+        File filteredVcfIndex = "~{outputDir}/~{sampleName}.gripss.filtered.~{suffix}.vcf.gz.tbi"
     }
 
     runtime {

From f84a0aee0705b1eadaaf8e738808daacf8b216cd Mon Sep 17 00:00:00 2001
From: DavyCats <davycats.dc@gmail.com>
Date: Mon, 12 Sep 2022 12:51:46 +0200
Subject: [PATCH 16/75] update docker image for hmftools amber and cobalt

---
 hmftools.wdl | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/hmftools.wdl b/hmftools.wdl
index a34decb7..aca5f18b 100644
--- a/hmftools.wdl
+++ b/hmftools.wdl
@@ -39,7 +39,7 @@ task Amber {
         String memory = "70GiB"
         String javaXmx = "64G"
         Int timeMinutes = 240
-        String dockerImage = "quay.io/biocontainers/hmftools-amber:3.9--hdfd78af_0"
+        String dockerImage = "quay.io/biocontainers/hmftools-amber:3.9--hdfd78af_1"
     }
 
     command {
@@ -118,7 +118,7 @@ task Cobalt {
         String memory = "5GiB"
         String javaXmx = "4G"
         Int timeMinutes = 960
-        String dockerImage = "quay.io/biocontainers/hmftools-cobalt:1.13--hdfd78af_0"
+        String dockerImage = "quay.io/biocontainers/hmftools-cobalt:1.13--hdfd78af_1"
     }
 
     command {

From 1e627055d122a3a306c1464c2c417041eee87633 Mon Sep 17 00:00:00 2001
From: DavyCats <davycats.dc@gmail.com>
Date: Fri, 23 Sep 2022 11:46:34 +0200
Subject: [PATCH 17/75] update outputs for amber and cobalt

---
 hmftools.wdl | 15 ++++++---------
 1 file changed, 6 insertions(+), 9 deletions(-)

diff --git a/hmftools.wdl b/hmftools.wdl
index aca5f18b..8d6c1bca 100644
--- a/hmftools.wdl
+++ b/hmftools.wdl
@@ -58,18 +58,16 @@ task Amber {
     output {
         File version = "~{outputDir}/amber.version"
         File tumorBafPcf = "~{outputDir}/~{tumorName}.amber.baf.pcf"
-        File tumorBafTsv = "~{outputDir}/~{tumorName}.amber.baf.tsv"
-        File tumorBafVcf = "~{outputDir}/~{tumorName}.amber.baf.vcf.gz"
-        File tumorBafVcfIndex = "~{outputDir}/~{tumorName}.amber.baf.vcf.gz.tbi"
+        File tumorBafTsv = "~{outputDir}/~{tumorName}.amber.baf.tsv.gz"
         File tumorContaminationVcf = "~{outputDir}/~{tumorName}.amber.contamination.vcf.gz"
         File tumorContaminationVcfIndex = "~{outputDir}/~{tumorName}.amber.contamination.vcf.gz.tbi"
         File tumorContaminationTsv = "~{outputDir}/~{tumorName}.amber.contamination.tsv"
         File tumorQc = "~{outputDir}/~{tumorName}.amber.qc"
+        File normalHomozygousregionsTsv = "~{outputDir}/~{referenceName}.amber.homozygousregion.tsv"
         File normalSnpVcf = "~{outputDir}/~{referenceName}.amber.snp.vcf.gz"
         File normalSnpVcfIndex = "~{outputDir}/~{referenceName}.amber.snp.vcf.gz.tbi"
-        Array[File] outputs = [version, tumorBafPcf, tumorBafTsv, tumorBafVcf, tumorBafVcfIndex,
-            tumorContaminationVcf, tumorContaminationVcfIndex, tumorContaminationTsv, tumorQc,
-            normalSnpVcf, normalSnpVcfIndex]
+        Array[File] outputs = [version, tumorBafPcf, tumorBafTsv, tumorContaminationVcf,
+            tumorContaminationVcfIndex, tumorContaminationTsv, tumorQc, normalSnpVcf, normalSnpVcfIndex]
     }
 
     runtime {
@@ -139,10 +137,9 @@ task Cobalt {
         File normalRationPcf = "~{outputDir}/~{referenceName}.cobalt.ratio.pcf"
         File tumorGcMedianTsv = "~{outputDir}/~{tumorName}.cobalt.gc.median.tsv"
         File tumorRatioPcf = "~{outputDir}/~{tumorName}.cobalt.ratio.pcf"
-        File tumorRatioTsv = "~{outputDir}/~{tumorName}.cobalt.ratio.tsv"
-        File tumorChrLen = "~{outputDir}/~{tumorName}.chr.len"
+        File tumorRatioTsv = "~{outputDir}/~{tumorName}.cobalt.ratio.tsv.gz"
         Array[File] outputs = [version, normalGcMedianTsv, normalRationMedianTsv,
-            normalRationPcf, tumorGcMedianTsv, tumorRatioPcf, tumorRatioTsv, tumorChrLen]
+            normalRationPcf, tumorGcMedianTsv, tumorRatioPcf, tumorRatioTsv]
     }
 
     runtime {

From a47d21a3e2c2477aa76727465348fe5b0eb40af2 Mon Sep 17 00:00:00 2001
From: DavyCats <davycats.dc@gmail.com>
Date: Fri, 23 Sep 2022 12:51:35 +0200
Subject: [PATCH 18/75] typo

---
 hmftools.wdl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/hmftools.wdl b/hmftools.wdl
index 8d6c1bca..987d4d85 100644
--- a/hmftools.wdl
+++ b/hmftools.wdl
@@ -562,7 +562,7 @@ task Linx {
         File transExonDataCsv
         File transSpliceDataCsv
 
-        String memory = "9iB"
+        String memory = "9GiB"
         String javaXmx = "8G"
         Int timeMinutes = 10
         String dockerImage = "quay.io/biowdl/linx:1.19.1" #patched version of biocontainer

From c748f839fa8a01175c04a4b41297193486a08387 Mon Sep 17 00:00:00 2001
From: DavyCats <davycats.dc@gmail.com>
Date: Fri, 17 Feb 2023 12:16:00 +0100
Subject: [PATCH 19/75] add some missing options to hmftools sage and cobalt

---
 hmftools.wdl | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/hmftools.wdl b/hmftools.wdl
index 90236a64..14ced8fe 100644
--- a/hmftools.wdl
+++ b/hmftools.wdl
@@ -111,6 +111,7 @@ task Cobalt {
         File tumorBamIndex
         String outputDir = "./cobalt"
         File gcProfile
+        File refGenomeFile
 
         Int threads = 1
         String memory = "5GiB"
@@ -127,7 +128,8 @@ task Cobalt {
         -tumor_bam ~{tumorBam} \
         -output_dir ~{outputDir} \
         -threads ~{threads} \
-        -gc_profile ~{gcProfile}
+        -gc_profile ~{gcProfile} \
+        -ref_genome ~{refGenomeFile}
     }
 
     output {
@@ -158,6 +160,7 @@ task Cobalt {
         tumorBamIndex: {description: "The index for the tumor BAM file.", category: "required"}
         outputDir: {description: "The path to the output directory.", category: "common"}
         gcProfile: {description: "A file describing the GC profile of the reference genome.", category: "required"}
+        refGenomeFile: {description: "The reference genome fasta file.", category: "required"}
         threads: {description: "The number of threads the program will use.", category: "advanced"}
         memory: {description: "The amount of memory this job will use.", category: "advanced"}
         javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.",
@@ -1237,6 +1240,8 @@ task Sage {
         Int? panelMaxGermlineRelRawBaseQual
         String? mnvFilterEnabled
         File? coverageBed
+        Int? refSampleCount
+
 
         Int threads = 32
         String javaXmx = "16G"
@@ -1268,6 +1273,7 @@ task Sage {
         ~{"-mnv_filter_enabled " + mnvFilterEnabled} \
         ~{"-coverage_bed " + coverageBed} \
         ~{true="-panel_only" false="" panelOnly} \
+        ~{"-ref_sample_count " + refSampleCount} \
         -threads ~{threads} \
         -out ~{outputPath}
     }
@@ -1310,6 +1316,7 @@ task Sage {
         panelMaxGermlineVaf: {description: "Equivalent to sage's `panel_max_germline_vaf` option.", category: "advanced"}
         panelMaxGermlineRelRawBaseQual: {description: "Equivalent to sage's `panel_max_germline_vaf` option.", category: "advanced"}
         mnvFilterEnabled: {description: "Equivalent to sage's `mnv_filter_enabled` option.", category: "advanced"}
+        refSampleCount: {description: "Equivalent to sage's `ref_sample_count` option.", category: "advanced"}
 
         memory: {description: "The amount of memory this job will use.", category: "advanced"}
         javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.",

From 01cc5f76115bce536127e146e2c13d0ef1f93427 Mon Sep 17 00:00:00 2001
From: DavyCats <davycats.dc@gmail.com>
Date: Fri, 31 Mar 2023 16:57:07 +0200
Subject: [PATCH 20/75] make reference optional in amber and cobalt

---
 hmftools.wdl | 34 ++++++++++++++++++++++------------
 picard.wdl   |  4 +++-
 2 files changed, 25 insertions(+), 13 deletions(-)

diff --git a/hmftools.wdl b/hmftools.wdl
index d6d65484..cf98cde8 100644
--- a/hmftools.wdl
+++ b/hmftools.wdl
@@ -22,9 +22,9 @@ version 1.0
 
 task Amber {
     input {
-        String referenceName
-        File referenceBam
-        File referenceBamIndex
+        String? referenceName
+        File? referenceBam
+        File? referenceBamIndex
         String tumorName
         File tumorBam
         File tumorBamIndex
@@ -35,6 +35,8 @@ task Amber {
         File referenceFastaDict
         String refGenomeVersion
 
+        Int? tumorOnlyMinDepth
+
         Int threads = 2
         String memory = "85GiB"
         String javaXmx = "80G"
@@ -44,15 +46,16 @@ task Amber {
 
     command {
         AMBER -Xmx~{javaXmx} \
-        -reference ~{referenceName} \
-        -reference_bam ~{referenceBam} \
+        ~{"-reference " + referenceName} \
+        ~{"-reference_bam " + referenceBam} \
         -tumor ~{tumorName} \
         -tumor_bam ~{tumorBam} \
         -output_dir ~{outputDir} \
         -threads ~{threads} \
         -ref_genome ~{referenceFasta} \
         -ref_genome_version ~{refGenomeVersion} \
-        -loci ~{loci}
+        -loci ~{loci} \
+        ~{"-tumor-only-min-depth " + tumorOnlyMinDepth}
     }
 
     output {
@@ -103,9 +106,9 @@ task Amber {
 
 task Cobalt {
     input {
-        String referenceName
-        File referenceBam
-        File referenceBamIndex
+        String? referenceName
+        File? referenceBam
+        File? referenceBamIndex
         String tumorName
         File tumorBam
         File tumorBamIndex
@@ -113,6 +116,10 @@ task Cobalt {
         File gcProfile
         File refGenomeFile
 
+        File? tumorOnlyDiploidBed
+        File? targetRegionsNormalisationTsv
+        Int? pcfGamma
+
         Int threads = 1
         String memory = "5GiB"
         String javaXmx = "4G"
@@ -122,14 +129,17 @@ task Cobalt {
 
     command {
         COBALT -Xmx~{javaXmx} \
-        -reference ~{referenceName} \
-        -reference_bam ~{referenceBam} \
+        ~{"-reference " + referenceName} \
+        ~{"-reference_bam " + referenceBam} \
         -tumor ~{tumorName} \
         -tumor_bam ~{tumorBam} \
         -output_dir ~{outputDir} \
         -threads ~{threads} \
         -gc_profile ~{gcProfile} \
-        -ref_genome ~{refGenomeFile}
+        -ref_genome ~{refGenomeFile} \
+        ~{"-tumor_only_diploid_bed " + tumorOnlyDiploidBed} \
+        ~{"-target_region " + targetRegionsNormalisationTsv} \
+        ~{"-pcf_gamma" + pcfGamma}
     }
 
     output {
diff --git a/picard.wdl b/picard.wdl
index 6628cf0e..314e0a7b 100644
--- a/picard.wdl
+++ b/picard.wdl
@@ -510,6 +510,7 @@ task CollectWgsMetrics {
         Int? minimumMappingQuality
         Int? minimumBaseQuality
         Int? coverageCap
+        File? intervals
 
         String memory = "5GiB"
         String javaXmx = "4G"
@@ -528,7 +529,8 @@ task CollectWgsMetrics {
         OUTPUT=~{outputPath} \
         ~{"MINIMUM_MAPPING_QUALITY=" + minimumMappingQuality} \
         ~{"MINIMUM_BASE_QUALITY=" + minimumBaseQuality} \
-        ~{"COVERAGE_CAP=" + coverageCap}
+        ~{"COVERAGE_CAP=" + coverageCap} \
+        ~{"INTERVALS=" + intervals}
     }
 
     output {

From 20fffe3c090648550363f354fcefd13e670ee7e2 Mon Sep 17 00:00:00 2001
From: DavyCats <davycats.dc@gmail.com>
Date: Fri, 14 Apr 2023 16:27:05 +0200
Subject: [PATCH 21/75] update various hmftools, add task for svprep gridss

---
 gridss.wdl   |  83 ++++++++++++++++-
 hmftools.wdl | 253 ++++++++++++++++++++++++++++++++++++++++++---------
 2 files changed, 292 insertions(+), 44 deletions(-)

diff --git a/gridss.wdl b/gridss.wdl
index 2066899d..57515750 100644
--- a/gridss.wdl
+++ b/gridss.wdl
@@ -34,7 +34,7 @@ task AnnotateInsertedSequence {
         Int threads = 8
         String javaXmx = "8G"
         String memory = "9GiB"
-        String dockerImage = "quay.io/biocontainers/gridss:2.13.2--h20b1175_1" #TODO check if we still need our own patched image
+        String dockerImage = "quay.io/biocontainers/gridss:2.13.2--h20b1175_1"
         Int timeMinutes = 120
     }
 
@@ -269,7 +269,7 @@ task GRIDSS {
         Int nonJvmMemoryGb = 10
         Int threads = 12
         Int timeMinutes = ceil(7200 / threads) + 1800
-        String dockerImage = "quay.io/biocontainers/gridss:2.13.2--h20b1175_1" #TODO check if we still need our own patched image
+        String dockerImage = "quay.io/biocontainers/gridss:2.13.2--h20b1175_1"
     }
 
     command {
@@ -382,6 +382,83 @@ task GridssAnnotateVcfRepeatmasker {
     }
 }
 
+task GridssSvPrep {
+    input {
+        Array[String]+ tumorLabel
+        Array[File]+ tumorBam
+        Array[File]+ tumorBai
+        Array[File]+ tumorFilteredBam
+        Array[File]+ tumorFilteredBai
+        BwaIndex reference
+        File blacklistBed
+        File gridssProperties
+
+        String? normalLabel
+        File? normalBam
+        File? normalBai
+        File? normalFilteredBam
+        File? normalFilteredBai
+        String outputPath = "gridss.vcf.gz"
+
+        Int jvmHeapSizeGb = 48
+        Int nonJvmMemoryGb = 10
+        Int threads = 10
+        Int timeMinutes = ceil(7200 / threads) + 1800
+        String dockerImage = "quay.io/biowdl/gridss:2.13.2_1"
+    }
+
+    command {
+        gridss_sv-prep \
+        --steps all \
+        --output ~{outputPath} \
+        --wirkingdir . \
+        --reference ~{reference.fastaFile} \
+        --jar /usr/local/share/gridss-2.13.2-1/gridss.jar \
+        --blacklist ~{blacklistBed} \
+        --configuration ~{gridssProperties} \
+        --labels ~{normalLabel}~{true="," false="" defined(normalLabel)}~{sep="," tumorLabel} \
+        --bams ~{normalBam}~{true="," false="" defined(normalBam)}~{sep="," tumorBam} \
+        --filtered_bams ~{normalFilteredBam}~{true="," false="" defined(normalFilteredBam)}~{sep="," tumorFilteredBam} \
+        --jvmheap ~{jvmHeapSizeGb}G \
+        --threads ~{threads}
+    }
+
+    output {
+        File vcf = outputPath
+        File vcfIndex = outputPath + ".tbi"
+    }
+
+    runtime {
+        cpu: threads
+        memory: "~{jvmHeapSizeGb + nonJvmMemoryGb}GiB"
+        time_minutes: timeMinutes # !UnknownRuntimeKey
+        docker: dockerImage
+    }
+
+    parameter_meta {
+        tumorBam: {description: "The input BAM file. This should be the tumor/case sample in case of a paired analysis.", category: "required"}
+        tumorBai: {description: "The index for tumorBam.", category: "required"}
+        tumorFilteredBam: {description: "The input BAM file preprocessed by hmftools' sv-prep.", category: "required"}
+        tumorFilteredBai: {description: "The index for tumorFilteredBam.", category: "required"}
+        tumorLabel: {description: "The name of the (tumor) sample.", category: "required"}
+        reference: {description: "A BWA index, this should also include the fasta index file (.fai).", category: "required"}
+        outputPath: {description: "The path for the output VCf file.", category: "common"}
+        normalBam: {description: "The BAM file for the normal/control sample.", category: "advanced"}
+        normalBai: {description: "The index for normalBam.", category: "advanced"}
+        normalFilteredBam: {description: "The BAM file for the normal control sample preprocessed by hmftools' sv-prep.", category: "required"}
+        normalFilteredBai: {description: "The index for normalFilteredBam.", category: "required"}
+        normalLabel: {description: "The name of the normal sample.", category: "advanced"}
+        blacklistBed: {description: "A bed file with blaclisted regins.", category: "advanced"}
+        gridssProperties: {description: "A properties file for gridss.", category: "advanced"}
+
+        threads: {description: "The number of the threads to use.", category: "advanced"}
+        jvmHeapSizeGb: {description: "The size of JVM heap for assembly and variant calling", category: "advanced"}
+        nonJvmMemoryGb: {description: "The amount of memory in Gb to be requested besides JVM memory.", category: "advanced"}
+        timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"}
+        dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"}
+    }
+}
+
 task SomaticFilter {
     input {
         File vcfFile
@@ -451,7 +528,7 @@ task Virusbreakend {
         Int extraMemoryGB = 10
         Int gridssMemoryGB = 60
         Int threads = 12
-        String dockerImage = "quay.io/biocontainers/gridss:2.13.2--h20b1175_1" #TODO check if we still need our own patched image
+        String dockerImage = "quay.io/biocontainers/gridss:2.13.2--h20b1175_1"
         Int timeMinutes = 320
     }
 
diff --git a/hmftools.wdl b/hmftools.wdl
index cf98cde8..9e133723 100644
--- a/hmftools.wdl
+++ b/hmftools.wdl
@@ -355,17 +355,24 @@ task Gripss {
         File knownFusionPairBedpe
         File breakendPon
         File breakpointPon
+        File repeatMaskFile
         String? referenceName
         String sampleName
         File vcf
         File vcfIndex
         String outputId
         String outputDir = "./"
+        Boolean hg38 = false
+        Int? hardMinTumorQual
+        Int? minQualBreakPoint
+        Int? minQualBreakEnd
+        Boolean filterSgls = false
+        Boolean germline = false
 
         String memory = "17GiB"
         String javaXmx = "16G"
         Int timeMinutes = 50
-        String dockerImage = "quay.io/biocontainers/hmftools-gripss:2.1--hdfd78af_0"
+        String dockerImage = "quay.io/biocontainers/hmftools-gripss:2.3.2--hdfd78af_0"
     }
 
     command {
@@ -373,14 +380,21 @@ task Gripss {
         mkdir -p ~{outputDir}
         gripss -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \
         -ref_genome ~{referenceFasta} \
+        -ref_genome_version ~{if hg38 then "38" else "37"} \
         -known_hotspot_file ~{knownFusionPairBedpe} \
         -pon_sgl_file ~{breakendPon} \
         -pon_sv_file ~{breakpointPon} \
+        -repeat_mask_file ~{repeatMaskFile} \
         ~{"-reference " + referenceName} \
         -sample ~{sampleName} \
         -vcf ~{vcf} \
         -output_dir ~{outputDir} \
-        -output_id ~{outputId}
+        -output_id ~{outputId} \
+        ~{if filterSgls then "-filter_sgls" else ""} \
+        ~{"-hard_min_tumor_qual " + hardMinTumorQual} \
+        ~{"-min_qual_break_point " + minQualBreakPoint} \
+        ~{"-min_qual_break_end " + minQualBreakEnd} \
+        ~{if germline then "-germline" else ""}
     }
 
     String suffix = if defined(referenceName) then "somatic" else "germline"
@@ -890,11 +904,12 @@ task Pave {
         File? blacklistVcf
         File? blacklistBed
         File? blacklistVcfIndex
+        Boolean writePassOnly = false
 
         Int timeMinutes = 50
         String javaXmx = "8G"
         String memory = "9GiB"
-        String dockerImage = "quay.io/biowdl/pave:v1.2.2"
+        String dockerImage = "quay.io/biocontainers/hmftools-pave:1.4.1--hdfd78af_0"
 
         String? DONOTDEFINE
     }
@@ -923,7 +938,8 @@ task Pave {
         ~{if defined(gnomadFreqDir) then "-gnomad_load_chr_on_demand" else ""} \
         ~{"-clinvar_vcf " + clinvarVcf} \
         ~{"-blacklist_bed " + blacklistBed} \
-        ~{"-blacklist_vcf " + blacklistVcf}
+        ~{"-blacklist_vcf " + blacklistVcf} \
+        ~{if writePassOnly then "-write_pass_only" else ""}
     }
 
     output {
@@ -1068,14 +1084,14 @@ task Protect {
 
 task Purple {
     input {
-        String referenceName
+        String? referenceName
         String tumorName
         String outputDir = "./purple"
         Array[File]+ amberOutput
         Array[File]+ cobaltOutput
         File gcProfile
         File somaticVcf
-        File germlineVcf
+        File? germlineVcf
         File filteredSvVcf
         File filteredSvVcfIndex
         File fullSvVcf
@@ -1086,10 +1102,15 @@ task Purple {
         String refGenomeVersion
         File driverGenePanel
         File somaticHotspots
-        File germlineHotspots
-        File germlineDelFreqFile
+        File? germlineHotspots
+        File? germlineDelFreqFile
         Float? highlyDiploidPercentage
         Float? somaticMinPuritySpread
+        File? targetRegionsBed
+        File? targetRegionsRatios
+        File? targetRegionsMsiIndels
+        Int? minDiploidTumorRatioCount
+        Int? minDiploidTumorRatioCountCentromere
         #The following should be in the same directory.
         File geneDataCsv
         File proteinFeaturesCsv
@@ -1102,15 +1123,15 @@ task Purple {
         String javaXmx = "8G"
         # clone of quay.io/biocontainers/hmftools-purple:3.2--hdfd78af_0 with 'ln -s /usr/local/lib/libwebp.so.7 /usr/local/lib/libwebp.so.6'
         #String dockerImage = "quay.io/biowdl/hmftools-purple:3.2" FIXME see if biocontainer works
-        String dockerImage = "quay.io/biocontainers/hmftools-purple:3.5--hdfd78af_0"
+        String dockerImage = "quay.io/biocontainers/hmftools-purple:3.7.1--hdfd78af_0"
     }
 
     command {
         PURPLE -Xmx~{javaXmx} \
-        -reference ~{referenceName} \
-        -germline_vcf ~{germlineVcf} \
-        -germline_hotspots ~{germlineHotspots} \
-        -germline_del_freq_file ~{germlineDelFreqFile} \
+        ~{"-reference " + referenceName} \
+        ~{"-germline_vcf " + germlineVcf} \
+        ~{"-germline_hotspots " + germlineHotspots} \
+        ~{"-germline_del_freq_file " + germlineDelFreqFile} \
         -tumor ~{tumorName} \
         -output_dir ~{outputDir} \
         -amber ~{sub(amberOutput[0], basename(amberOutput[0]), "")} \
@@ -1128,6 +1149,11 @@ task Purple {
         -driver_gene_panel ~{driverGenePanel} \
         ~{"-highly_diploid_percentage " + highlyDiploidPercentage} \
         ~{"-somatic_min_purity_spread " + somaticMinPuritySpread} \
+        ~{"-target_regions_bed " + targetRegionsBed} \
+        ~{"-target_regions_ratios " + targetRegionsRatios} \
+        ~{"-target_regions_msi_indels " + targetRegionsMsiIndels} \
+        ~{"-min_diploid_tumor_ratio_count " + minDiploidTumorRatioCount} \
+        ~{"-min_diploid_tumor_ratio_count_centromere" + minDiploidTumorRatioCountCentromere} \ 
         -threads ~{threads}
     }
 
@@ -1227,15 +1253,16 @@ task Purple {
 
 task Sage {
     input {
-        String tumorName
-        File tumorBam
-        File tumorBamIndex
+        Array[String]+ tumorName
+        Array[File]+ tumorBam
+        Array[File]+ tumorBamIndex
         File referenceFasta
         File referenceFastaDict
         File referenceFastaFai
         File hotspots
         File panelBed
         File highConfidenceBed
+        File coverageBed
         Boolean hg38 = false
         Boolean panelOnly = false
         String outputPath = "./sage.vcf.gz"
@@ -1245,63 +1272,65 @@ task Sage {
         File transExonDataCsv
         File transSpliceDataCsv
 
-        String? referenceName
-        File? referenceBam
-        File? referenceBamIndex
+        Array[String] referenceName = []
+        Array[File] referenceBam = []
+        Array[File] referenceBamIndex = []
         Int? hotspotMinTumorQual
         Int? panelMinTumorQual
         Int? hotspotMaxGermlineVaf
         Int? hotspotMaxGermlineRelRawBaseQual
         Int? panelMaxGermlineVaf
         Int? panelMaxGermlineRelRawBaseQual
-        String? mnvFilterEnabled
-        File? coverageBed
         Int? refSampleCount
-
+        Float? hotspotMinTumorVaf
+        Int? highConfidenceMinTumorQual
+        Int? lowConfidenceMinTumorQual
 
         Int threads = 32
         String javaXmx = "16G"
         String memory = "20GiB"
         Int timeMinutes = 720
-        String dockerImage = "quay.io/biocontainers/hmftools-sage:3.0.3--hdfd78af_0"
+        String dockerImage = "quay.io/biocontainers/hmftools-sage:3.2.3--hdfd78af_0"
     }
 
     command {
         SAGE -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \
-        -tumor ~{tumorName} \
-        -tumor_bam ~{tumorBam} \
-        ~{"-reference " + referenceName} \
-        ~{"-reference_bam " + referenceBam} \
-        -ref_genome ~{referenceFasta} \
+        -tumor ~{sep="," tumorName} \
+        -tumor_bam ~{sep="," tumorBam} \
+        ~{if length(referenceName) > 0 then "-reference" else ""} ~{sep="," referenceName} \
+        ~{if length(referenceBam) > 0 then "-reference_bam" else ""}  ~{sep="," referenceBam} \
         -hotspots ~{hotspots} \
-        -panel_bed ~{panelBed} \
+        ~{"-hotspot_min_tumor_qual " + hotspotMinTumorQual} \
         -high_confidence_bed ~{highConfidenceBed} \
+        -panel_bed ~{panelBed} \
+        -coverage_bed ~{coverageBed} \
+        -ref_genome ~{referenceFasta} \
         -ref_genome_version ~{true="38" false="37" hg38} \
         -ensembl_data_dir ~{sub(geneDataCsv, basename(geneDataCsv), "")} \
         -write_bqr_data \
         -write_bqr_plot \
-        ~{"-hotspot_min_tumor_qual " + hotspotMinTumorQual} \
+        -out ~{outputPath} \
+        -threads ~{threads} \
         ~{"-panel_min_tumor_qual " + panelMinTumorQual} \
         ~{"-hotspot_max_germline_vaf " + hotspotMaxGermlineVaf} \
         ~{"-hotspot_max_germline_rel_raw_base_qual " + hotspotMaxGermlineRelRawBaseQual} \
         ~{"-panel_max_germline_vaf " + panelMaxGermlineVaf} \
         ~{"-panel_max_germline_rel_raw_base_qual " + panelMaxGermlineRelRawBaseQual} \
-        ~{"-mnv_filter_enabled " + mnvFilterEnabled} \
-        ~{"-coverage_bed " + coverageBed} \
         ~{true="-panel_only" false="" panelOnly} \
         ~{"-ref_sample_count " + refSampleCount} \
-        -threads ~{threads} \
-        -out ~{outputPath}
+        ~{"-hotspot_min_tumor_vaf " + hotspotMinTumorVaf} \
+        ~{"-high_confidence_min_tumor_qual " + highConfidenceMinTumorQual} \
+        ~{"-low_confidence_min_tumor_qual " + lowConfidenceMinTumorQual}
     }
 
-    output {
+    output { #FIXME does it produce multiple plots/tsvs if multiple samples are given?
         File outputVcf = outputPath
         File outputVcfIndex = outputPath + ".tbi"
-        File? referenceSageBqrPng = "~{referenceName}.sage.bqr.png"
-        File? referenceSageBqrTsv = "~{referenceName}.sage.bqr.tsv"
-        File tumorSageBqrPng = "~{tumorName}.sage.bqr.png"
-        File tumorSageBqrTsv = "~{tumorName}.sage.bqr.tsv"
-        File sageGeneCoverageTsv = "~{tumorName}.sage.gene.coverage.tsv"
+        File? referenceSageBqrPng = "~{referenceName[0]}.sage.bqr.png"
+        File? referenceSageBqrTsv = "~{referenceName[0]}.sage.bqr.tsv"
+        File tumorSageBqrPng = "~{tumorName[0]}.sage.bqr.png"
+        File tumorSageBqrTsv = "~{tumorName[0]}.sage.bqr.tsv"
+        File sageGeneCoverageTsv = "~{tumorName[0]}.sage.gene.coverage.tsv"
     }
 
     runtime {
@@ -1331,9 +1360,151 @@ task Sage {
         hotspotMaxGermlineRelRawBaseQual: {description: "Equivalent to sage's `hotspot_max_germline_rel_raw_base_qual` option.", category: "advanced"}
         panelMaxGermlineVaf: {description: "Equivalent to sage's `panel_max_germline_vaf` option.", category: "advanced"}
         panelMaxGermlineRelRawBaseQual: {description: "Equivalent to sage's `panel_max_germline_vaf` option.", category: "advanced"}
-        mnvFilterEnabled: {description: "Equivalent to sage's `mnv_filter_enabled` option.", category: "advanced"}
         refSampleCount: {description: "Equivalent to sage's `ref_sample_count` option.", category: "advanced"}
+        hg38: {description: "Whether or not the refernce genome is HG18, if false HG19 is assumed.", category: "common"}
+
+        threads: {description: "The numve of threads to use.", category: "advanced"}
+        memory: {description: "The amount of memory this job will use.", category: "advanced"}
+        javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.",
+                  category: "advanced"}
+        timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"}
+        dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.",
+                      category: "advanced"}
+    }
+}
+
+task SvPrep {
+    # for ref also add tumorJunctionFile
+    input {
+        String sampleName
+        File bamFile
+        File bamIndex
+        File referenceFasta
+        File referenceFastaDict
+        File referenceFastaFai
+        File blacklistBed
+        File knownFusionBed
+        String outputDir = "."
+
+        File? existingJunctionFile
+        Boolean hg38 = false
+
+        Int threads = 10
+        String javaXmx = "48G"
+        String memory = "50GiB"
+        Int timeMinutes = 120
+        String dockerImage = "quay.io/biocontainers/hmftools-sv-prep:1.1--hdfd78af_0"
+    }
+
+    command {
+        set -e
+        SvPrep -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \
+        -sample ~{sampleName} \
+        -bam_file ~{bamFile} \
+        -ref_genome ~{referenceFasta} \
+        -ref_genome_version ~{true="38" false="37" hg38} \
+        -blacklist_bed ~{blacklistBed} \
+        -known_fusion_bed ~{knownFusionBed} \
+        ~{"-existing_junction_file " + existingJunctionFile} \
+        -write_types "JUNCTIONS;BAM;FRAGMENT_LENGTH_DIST" \
+        -output_dir ~{outputDir} \
+        -threads ~{threads}
+        samtools sort -O bam ~{outputDir}/~{sampleName}.sv_prep.bam -o ~{outputDir}/~{sampleName}.sv_prep.sorted.bam
+        samtools index ~{outputDir}/~{sampleName}.sv_prep.sorted.bam
+    }
+
+    output {
+        File preppedBam = "~{outputDir}/~{sampleName}.sv_prep.sorted.bam"
+        File preppedBamIndex = "~{outputDir}/~{sampleName}.sv_prep.sorted.bam.bai"
+        File junctions = "~{outputDir}/~{sampleName}.sv_prep.junctions.csv"
+    }
+
+    runtime {
+        time_minutes: timeMinutes # !UnknownRuntimeKey
+        cpu: threads
+        docker: dockerImage
+        memory: memory
+    }
+
+    parameter_meta {
+        sampleName: {description: "The name of the sample.", category: "required"}
+        bamFile: {description: "The BAM file to prepare for SV calling with GRIDSS.", category: "required"}
+        bamIndex: {description: "The index for the BAM file.", category: "required"}
+        referenceFasta: {description: "The reference fasta file.", category: "required"}
+        referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", category: "required"}
+        referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"}
+        blacklistBed: {description: "Blacklist bed file.", category: "required"}
+        knownFusionBed: {description: "Bed file with known fusion sites", category: "required"}
+        outputDir: {description: "Path to the output directory.", category: "common"}
+        existingJunctionFile: {description: "Junctions file generated by an earlier run of this tool, eg. from a paired sample.", category: "common"}
+        hg38: {description: "Whether or not the refernce genome is HG18, if false HG19 is assumed.", category: "common"}
+
+        threads: {description: "The numve of threads to use.", category: "advanced"}
+        memory: {description: "The amount of memory this job will use.", category: "advanced"}
+        javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.",
+                  category: "advanced"}
+        timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"}
+        dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.",
+                      category: "advanced"}
+    }
+}
+
+task SvPrepDepthAnnotator {
+    input {
+        File inputVcf
+        File inputVcfIndex
+        Array[File]+ bamFiles
+        Array[File]+ bamIndexes
+        Array[String]+ samples
+        File referenceFasta
+        File referenceFastaDict
+        File referenceFastaFai
+        Boolean hg38 = false
+        String outputVcf = "gridss.depth_annotated.vcf.gz"
+
+        Int threads = 10
+        String javaXmx = "48G"
+        String memory = "50GiB"
+        Int timeMinutes = 120
+        String dockerImage = "quay.io/biocontainers/hmftools-sv-prep:1.1--hdfd78af_0"
+    }
+
+    command {
+        java -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \
+        -cp /usr/local/share/hmftools-sv-prep-1.1-0/sv-prep.jar \
+        com.hartwig.hmftools.svprep.depth.DepthAnnotator \
+        -input_vcf ~{inputVcf} \
+        -output_vcf ~{outputVcf} \
+        -samples ~{sep="," samples} \
+        -bam_files ~{sep="," bamFiles} \
+        -ref_genome ~{referenceFasta} \
+        -ref_genome_version ~{if hg38 then "38" else "37"} \
+        -threads ~{threads}
+    }
+
+    output {
+        File vcf = outputVcf
+        File vcfIndex = outputVcf + ".tbi"
+    }
+
+    runtime {
+        time_minutes: timeMinutes # !UnknownRuntimeKey
+        cpu: threads
+        docker: dockerImage
+        memory: memory
+    }
+
+    parameter_meta {
+        samples: {description: "The names of the samples.", category: "required"}
+        bamFiles: {description: "The BAM files.", category: "required"}
+        bamIndexes: {description: "The indexes for the BAM files.", category: "required"}
+        referenceFasta: {description: "The reference fasta file.", category: "required"}
+        referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", category: "required"}
+        referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"}
+        hg38: {description: "Whether or not the refernce genome is HG18, if false HG19 is assumed.", category: "common"}
+        outputVcf: {description: "The path for the output VCF.", category: "common"}
 
+        threads: {description: "The numve of threads to use.", category: "advanced"}
         memory: {description: "The amount of memory this job will use.", category: "advanced"}
         javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.",
                   category: "advanced"}

From e2e52b7407252915ae16a5850ed03f916dfcf818 Mon Sep 17 00:00:00 2001
From: DavyCats <davycats.dc@gmail.com>
Date: Wed, 19 Apr 2023 16:37:37 +0200
Subject: [PATCH 22/75] update samtools view

---
 samtools.wdl | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/samtools.wdl b/samtools.wdl
index fbb445e7..875b9ddb 100644
--- a/samtools.wdl
+++ b/samtools.wdl
@@ -566,7 +566,9 @@ task View {
         File inFile
         String outputFileName = "view.bam"
         Boolean uncompressedBamOutput = false
+        Boolean useIndex = false
 
+        File? inFileIndex
         File? referenceFasta
         Int? includeFilter
         Int? excludeFilter
@@ -589,7 +591,8 @@ task View {
         samtools view -b \
         ~{"-T " + referenceFasta} \
         ~{"-o " + outputFileName} \
-        ~{true="-u " false="" uncompressedBamOutput} \
+        ~{if uncompressedBamOutput then "-u" else ""} \
+        ~{if useIndex then "-M" else ""} \
         ~{"-f " + includeFilter} \
         ~{"-F " + excludeFilter} \
         ~{"-G " + excludeSpecificFilter} \
@@ -617,6 +620,8 @@ task View {
         inFile: {description: "A BAM, SAM or CRAM file.", category: "required"}
         outputFileName: {description: "The location the output BAM file should be written.", category: "common"}
         uncompressedBamOutput: {description: "Equivalent to samtools view's `-u` flag.", category: "advanced"}
+        useIndex: {description: "Equivalent to samtools view's `-M` flag.", category: "advanced"}
+        inFileIndex: {description: "An index for the inFile.", category: "common"}
         referenceFasta: {description: "The reference fasta file also used for mapping.", category: "advanced"}
         includeFilter: {description: "Equivalent to samtools view's `-f` option.", category: "advanced"}
         excludeFilter: {description: "Equivalent to samtools view's `-F` option.", category: "advanced"}

From f8f3efed77e1c4cf1ce093f09fefb43f92a6f087 Mon Sep 17 00:00:00 2001
From: DavyCats <davycats.dc@gmail.com>
Date: Thu, 20 Apr 2023 16:05:13 +0200
Subject: [PATCH 23/75] update linx and lilac

---
 hmftools.wdl | 34 +++++++++++++++-------------------
 1 file changed, 15 insertions(+), 19 deletions(-)

diff --git a/hmftools.wdl b/hmftools.wdl
index 9e133723..cacfac7c 100644
--- a/hmftools.wdl
+++ b/hmftools.wdl
@@ -507,18 +507,18 @@ task HealthChecker {
 
 task Lilac {
     input {
-        String tumorName
+        String sampleName
         File referenceBam
         File referenceBamIndex
-        File tumorBam
-        File tumorBamIndex
+        File? tumorBam
+        File? tumorBamIndex
         String refGenomeVersion
         File referenceFasta
         File referenceFastaFai
         File referenceFastaDict
-        File geneCopyNumberFile
-        File somaticVariantsFile
-        File somaticVariantsFileIndex
+        File? geneCopyNumberFile
+        File? somaticVariantsFile
+        File? somaticVariantsFileIndex
         String outputDir = "./lilac"
 
         #The following need to be in the same directory
@@ -530,21 +530,21 @@ task Lilac {
         String memory = "16GiB"
         Int timeMinutes = 1440 #FIXME
         Int threads = 1
-        String dockerImage = "quay.io/biocontainers/hmftools-lilac:1.1--hdfd78af_0" #TODO
+        String dockerImage = "quay.io/biocontainers/hmftools-lilac:1.4.2--hdfd78af_0"
     }
 
     command {
         LILAC -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \
-        -sample ~{tumorName} \
+        -sample ~{sampleName} \
         -reference_bam ~{referenceBam} \
         -ref_genome ~{referenceFasta} \
         -ref_genome_version ~{refGenomeVersion} \
         -resource_dir ~{sub(hlaRefAminoacidSequencesCsv, basename(hlaRefAminoacidSequencesCsv), "")} \
         -outputDir ~{outputDir} \
         -threads ~{threads} \
-        -tumor_bam ~{tumorBam} \
-        -gene_copy_number_file ~{geneCopyNumberFile} \
-        -somatic_variants_file ~{somaticVariantsFile}
+        ~{"-tumor_bam " + tumorBam} \
+        ~{"-gene_copy_number " + geneCopyNumberFile} \
+        ~{"-somatic_vcf " + somaticVariantsFile}
     }
 
     output {
@@ -581,8 +581,6 @@ task Linx {
         Boolean checkFusions = true
         Boolean checkDrivers = true
         Boolean writeVisData = true
-        File? germlinePonSvFile
-        File? germlinePonSglFile
         #The following should be in the same directory.
         File geneDataCsv
         File proteinFeaturesCsv
@@ -592,7 +590,7 @@ task Linx {
         String memory = "9GiB"
         String javaXmx = "8G"
         Int timeMinutes = 10
-        String dockerImage = "quay.io/biowdl/linx:1.19.1" #patched version of biocontainer
+        String dockerImage = "quay.io/biocontainers/hmftools-linx:1.22.1--hdfd78af_0"
 
         String? DONOTDEFINE
     }
@@ -617,9 +615,7 @@ task Linx {
         -driver_gene_panel ~{driverGenePanel} \
         ~{if writeVisData then "-write_vis_data" else ""} \
         ~{if writeAllVisFusions then "-write_all_vis_fusions" else ""} \
-        ~{if germline then "-germline" else ""} \
-        ~{"-germline_pon_sv_file " + germlinePonSvFile} \
-        ~{"-germline_pon_sgl_file " + germlinePonSglFile}
+        ~{if germline then "-germline" else ""}
     }
 
     output {
@@ -686,14 +682,14 @@ task LinxVisualisations {
         String memory = "9GiB"
         String javaXmx = "8G"
         Int timeMinutes = 1440
-        String dockerImage = "quay.io/biowdl/linx:1.19.1" #patched version of biocontainer
+        String dockerImage = "quay.io/biocontainers/hmftools-linx:1.22.1--hdfd78af_0"
     }
 
     command {
         set -e
         mkdir -p ~{outputDir}
         java -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \
-        -cp /usr/local/share/hmftools-linx-1.19-0/linx.jar \
+        -cp /usr/local/share/hmftools-linx-1.22.1-0/linx.jar \
         com.hartwig.hmftools.linx.visualiser.SvVisualiser \
         -sample ~{sample} \
         -ref_genome_version ~{refGenomeVersion} \

From 1673a014d2f84347cf8a52fc9bb10a57e62b6a45 Mon Sep 17 00:00:00 2001
From: DavyCats <davycats.dc@gmail.com>
Date: Fri, 21 Apr 2023 15:38:35 +0200
Subject: [PATCH 24/75] update peach, add rose, etc.

---
 gridss.wdl   |   1 -
 hmftools.wdl | 186 +++++++++++++++++++++++++++++++++++++++++++--------
 peach.wdl    |   4 +-
 3 files changed, 159 insertions(+), 32 deletions(-)

diff --git a/gridss.wdl b/gridss.wdl
index 57515750..789335a4 100644
--- a/gridss.wdl
+++ b/gridss.wdl
@@ -521,7 +521,6 @@ task Virusbreakend {
         File referenceFasta
         File referenceFastaFai
         File referenceFastaDict
-        File referenceImg
         File virusbreakendDB
         String outputPath = "./virusbreakend.vcf"
 
diff --git a/hmftools.wdl b/hmftools.wdl
index cacfac7c..6d8a494b 100644
--- a/hmftools.wdl
+++ b/hmftools.wdl
@@ -189,7 +189,7 @@ task CupGenerateReport {
 
         String memory = "5GiB"
         Int timeMinutes = 10
-        String dockerImage = "quay.io/biowdl/cuppa:1.6"
+        String dockerImage = "quay.io/biowdl/cuppa:1.7.1"
     }
 
     # This script writes to the directory that the input is located in.
@@ -245,34 +245,29 @@ task Cuppa {
     input {
         Array[File]+ linxOutput
         Array[File]+ purpleOutput
+        File virusInterpreterOutput
         String sampleName
         Array[String]+ categories = ["DNA"]
         Array[File]+ referenceData
-        File purpleSvVcf
-        File purpleSvVcfIndex
-        File purpleSomaticVcf
-        File purpleSomaticVcfIndex
         String outputDir = "./cuppa"
 
         String javaXmx = "4G"
         String memory = "5GiB"
         Int timeMinutes = 10
-        String dockerImage = "quay.io/biowdl/cuppa:1.6"
+        String dockerImage = "quay.io/biowdl/cuppa:1.7.1"
     }
 
     command {
         set -e
         mkdir -p sampleData ~{outputDir}
         ln -s -t sampleData ~{sep=" " linxOutput} ~{sep=" " purpleOutput}
+        ln -s -t sampleData ~{virusInterpreterOutput}
         cuppa -Xmx~{javaXmx} \
         -output_dir ~{outputDir} \
-        -output_id ~{sampleName} \
         -categories '~{sep="," categories}' \
         -ref_data_dir ~{sub(referenceData[0], basename(referenceData[0]), "")} \
         -sample_data_dir sampleData \
-        -sample_data ~{sampleName} \
-        -sample_sv_file ~{purpleSvVcf} \
-        -sample_somatic_vcf ~{purpleSomaticVcf}
+        -sample_data ~{sampleName}
     }
 
     output {
@@ -291,10 +286,6 @@ task Cuppa {
         sampleName: {description: "The name of the sample.", category: "required"}
         categories: {description: "The classifiers to use.", category: "advanced"}
         referenceData : {description: "The reference data.", category: "required"}
-        purpleSvVcf: {description: "The VCF file produced by purple which contains structural variants.", category: "required"}
-        purpleSvVcfIndex: {description: "The index of the structural variants VCF file produced by purple.", category: "required"}
-        purpleSomaticVcf: {description: "The VCF file produced by purple which contains somatic variants.", category: "required"}
-        purpleSomaticVcfIndex: {description: "The index of the somatic VCF file produced by purple.", category: "required"}
         outputDir: {description: "The directory the ouput will be placed in.", category: "common"}
         memory: {description: "The amount of memory this job will use.", category: "advanced"}
         javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.",
@@ -313,7 +304,7 @@ task CuppaChart {
 
         String memory = "4GiB"
         Int timeMinutes = 5
-        String dockerImage = "quay.io/biowdl/cuppa:1.6"
+        String dockerImage = "quay.io/biowdl/cuppa:1.7.1"
     }
 
     command {
@@ -548,7 +539,8 @@ task Lilac {
     }
 
     output {
-        #TODO
+        File lilacCsv = "~{outputDir}/~{sampleName}.lilac.csv"
+        File lilacQcCsv = "~{outputDir}/~{sampleName}.lilac.qc.csv"
     }
 
     runtime {
@@ -627,16 +619,18 @@ task Linx {
         File linxLinks = "~{outputDir}/~{sampleName}.linx.links.tsv"
         File linxSvs = "~{outputDir}/~{sampleName}.linx.svs.tsv"
         File linxVisCopyNumber = "~{outputDir}/~{sampleName}.linx.vis_copy_number.tsv"
-        File linxVisFusion = "~{outputDir}/~{sampleName}.linx.vis_fusion.tsv"
-        File linxVisGeneExon = "~{outputDir}/~{sampleName}.linx.vis_gene_exon.tsv"
-        File linxVisProteinDomain = "~{outputDir}/~{sampleName}.linx.vis_protein_domain.tsv"
-        File linxVisSegments = "~{outputDir}/~{sampleName}.linx.vis_segments.tsv"
-        File linxVisSvData = "~{outputDir}/~{sampleName}.linx.vis_sv_data.tsv"
+        File? linxVisFusion = "~{outputDir}/~{sampleName}.linx.vis_fusion.tsv"
+        File? linxVisGeneExon = "~{outputDir}/~{sampleName}.linx.vis_gene_exon.tsv"
+        File? linxVisProteinDomain = "~{outputDir}/~{sampleName}.linx.vis_protein_domain.tsv"
+        File? linxVisSegments = "~{outputDir}/~{sampleName}.linx.vis_segments.tsv"
+        File? linxVisSvData = "~{outputDir}/~{sampleName}.linx.vis_sv_data.tsv"
+        File? linxGermlineDriverCatalogTsv = "~{outputDir}/~{sampleName}.linx.germline.driver.catalog.tsv"
+        File? linxGermlineDisruptionTsv = "~{outputDir}/~{sampleName}.linx.germline.disruption.tsv"
         File linxVersion = "~{outputDir}/linx.version"
-        Array[File] outputs = [driverCatalog, linxBreakend, linxClusters, linxDrivers, linxFusion,
+        Array[File] outputs = select_all([driverCatalog, linxBreakend, linxClusters, linxDrivers, linxFusion,
                                linxLinks, linxSvs, linxVisCopyNumber, linxVisFusion,
                                linxVisGeneExon, linxVisProteinDomain, linxVisSegments, linxVisSvData,
-                               linxVersion]
+                               linxGermlineDriverCatalogTsv, linxGermlineDisruptionTsv, linxVersion])
     }
 
     runtime {
@@ -743,19 +737,25 @@ task Orange {
         File sageSomaticTumorSampleBqrPlot
         File purpleGeneCopyNumberTsv
         File purpleGermlineDriverCatalogTsv
+        File purpleGermlineDeletionTsv
         File purpleGermlineVariantVcf
         File purpleGermlineVariantVcfIndex
         Array[File]+ purplePlots
         File purplePurityTsv
         File purpleQcFile
+        File purpleSomaticCopyNumberFile
         File purpleSomaticDriverCatalogTsv
         File purpleSomaticVariantVcf
         File purpleSomaticVariantVcfIndex
+        File lilacQcCsv
+        File lilacResultCsv
         File linxFusionTsv
         File linxBreakendTsv
         File linxDriverCatalogTsv
         File linxDriverTsv
+        File linxGermlineDisruptionTsv
         Array[File]+ linxPlots
+        File linxStructuralVariantTsv
         File cuppaResultCsv
         File cuppaSummaryPlot
         File? cuppaFeaturePlot
@@ -766,11 +766,14 @@ task Orange {
         #File pipelineVersionFile
         File cohortMappingTsv
         File cohortPercentilesTsv
+        Boolean hg38 = false
+        File driverGenePanel
+        File knownFusionFile
 
         String memory = "17GiB"
         String javaXmx = "16G"
         Int timeMinutes = 10
-        String dockerImage = "quay.io/biowdl/orange:v1.6"
+        String dockerImage = "quay.io/biocontainers/hmftools-orange:1.10.2--hdfd78af_0"
     }
 
     command {
@@ -778,6 +781,7 @@ task Orange {
         mkdir -p ~{outputDir}
         orange -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \
         -output_dir ~{outputDir} \
+        -ref_genome_version ~{if hg38 then "38" else "37"} \
         -doid_json ~{doidJson} \
         -primary_tumor_doids '~{sep=";" sampleDoids}' \
         -max_evidence_level C \
@@ -792,17 +796,23 @@ task Orange {
         -sage_somatic_tumor_sample_bqr_plot ~{sageSomaticTumorSampleBqrPlot} \
         -purple_gene_copy_number_tsv ~{purpleGeneCopyNumberTsv} \
         -purple_germline_driver_catalog_tsv ~{purpleGermlineDriverCatalogTsv} \
+        -purple_germline_deletion_tsv ~{purpleGermlineDeletionTsv} \
         -purple_germline_variant_vcf ~{purpleGermlineVariantVcf} \
         -purple_plot_directory ~{sub(purplePlots[0], basename(purplePlots[0]), "")} \
         -purple_purity_tsv ~{purplePurityTsv} \
         -purple_qc_file ~{purpleQcFile} \
+        -purple_somatic_copy_number_tsv ~{purpleSomaticCopyNumberFile} \
         -purple_somatic_driver_catalog_tsv ~{purpleSomaticDriverCatalogTsv} \
         -purple_somatic_variant_vcf ~{purpleSomaticVariantVcf} \
+        -lilac_qc_csv ~{lilacQcCsv} \
+        -lilac_result_csv ~{lilacResultCsv} \
         -linx_fusion_tsv ~{linxFusionTsv} \
         -linx_breakend_tsv ~{linxBreakendTsv} \
         -linx_driver_catalog_tsv ~{linxDriverCatalogTsv} \
         -linx_driver_tsv ~{linxDriverTsv} \
+        -linx_germline_disruption_tsv ~{linxGermlineDisruptionTsv} \
         -linx_plot_directory ~{sub(linxPlots[0], basename(linxPlots[0]), "")} \
+        -linx_structural_variant_tsv ~{linxStructuralVariantTsv} \
         -cuppa_result_csv ~{cuppaResultCsv} \
         -cuppa_summary_plot ~{cuppaSummaryPlot} \
         ~{"-cuppa_feature_plot " + cuppaFeaturePlot} \
@@ -811,7 +821,9 @@ task Orange {
         -protect_evidence_tsv ~{protectEvidenceTsv} \
         -annotated_virus_tsv ~{annotatedVirusTsv} \
         -cohort_mapping_tsv ~{cohortMappingTsv} \
-        -cohort_percentiles_tsv ~{cohortPercentilesTsv}
+        -cohort_percentiles_tsv ~{cohortPercentilesTsv} \
+        -driver_gene_panel_tsv ~{driverGenePanel} \
+        -known_fusion_file ~{knownFusionFile}
     }
     #TODO may need to be added: -pipeline_version_file ~{pipelineVersionFile}
 
@@ -1006,21 +1018,25 @@ task Protect {
         File linxDriversCatalog
         File chordPrediction
         File annotatedVirus
+        File lilacResultCsv
+        File lilacQcCsv
+        File driverGeneTsv
 
         String memory = "9GiB"
         String javaXmx = "8G"
         Int timeMinutes = 60
-        String dockerImage = "quay.io/biowdl/protect:v2.0"
+        String dockerImage = "quay.io/biocontainers/hmftools-protect:2.3--hdfd78af_0"
     }
 
     command {
-        protect -Xmx~{javaXmx} \
+        protect -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \
         -ref_genome_version ~{refGenomeVersion} \
         -tumor_sample_id ~{tumorName} \
         -reference_sample_id ~{referenceName} \
         -primary_tumor_doids '~{sep=";" sampleDoids}' \
         -output_dir ~{outputDir} \
         -serve_actionability_dir ~{sub(serveActionability[0], basename(serveActionability[0]), "")} \
+        -driver_gene_tsv ~{driverGeneTsv} \
         -doid_json ~{doidJson} \
         -purple_purity_tsv ~{purplePurity} \
         -purple_qc_file ~{purpleQc} \
@@ -1033,7 +1049,9 @@ task Protect {
         -linx_breakend_tsv ~{linxBreakend} \
         -linx_driver_catalog_tsv ~{linxDriversCatalog} \
         -chord_prediction_txt ~{chordPrediction} \
-        -annotated_virus_tsv ~{annotatedVirus}
+        -annotated_virus_tsv ~{annotatedVirus} \
+        -lilac_result_csv ~{lilacResultCsv} \
+        -lilac_qc_csv ~{lilacQcCsv}
     }
 
     output {
@@ -1123,7 +1141,7 @@ task Purple {
     }
 
     command {
-        PURPLE -Xmx~{javaXmx} \
+        PURPLE -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \
         ~{"-reference " + referenceName} \
         ~{"-germline_vcf " + germlineVcf} \
         ~{"-germline_hotspots " + germlineHotspots} \
@@ -1247,6 +1265,77 @@ task Purple {
     }
 }
 
+task Rose {
+    input {
+        File actionabilityDatabaseTsv
+        Boolean hg38 = false
+        File driverGeneTsv
+        File purplePurityTsv
+        File purpleQc
+        File purpleGeneCopyNumberTsv
+        File purpleSomaticDriverCatalogTsv
+        File purpleGermlineDriverCatalogTsv
+        File purpleSomaticVcf
+        File purpleSomaticVcfIndex
+        File purpleGermlineVcf
+        File purpleGermlineVcfIndex
+        File linxFusionTsv
+        File linxBreakendTsv
+        File linxDriverCatalogTsv
+        File annotatedVirusTsv
+        File chordPredictionTxt
+        File cuppaResultCsv
+        String outputDir = "./rose"
+        String tumorName
+        String referenceName
+
+        String memory = "9GiB"
+        String javaXmx = "8G"
+        Int timeMinutes = 60
+        String dockerImage = "quay.io/biocontainers/hmftools-rose:1.3--hdfd78af_0"
+    }
+
+    command {
+        set -e
+        mkdir -p ~{outputDir}
+        rose -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \
+        -actionability_database_tsv ~{actionabilityDatabaseTsv} \
+        -ref_genome_version ~{if hg38 then "38" else "37"} \
+        -driver_gene_tsv ~{driverGeneTsv} \
+        -purple_purity_tsv ~{purplePurityTsv} \
+        -purple_qc_file ~{purpleQc} \
+        -purple_gene_copy_number_tsv ~{purpleGeneCopyNumberTsv} \
+        -purple_somatic_driver_catalog_tsv ~{purpleSomaticDriverCatalogTsv} \
+        -purple_germline_driver_catalog_tsv ~{purpleGermlineDriverCatalogTsv} \
+        -purple_somatic_variant_vcf ~{purpleSomaticVcf} \
+        -purple_germline_variant_vcf ~{purpleGermlineVcf} \
+        -linx_fusion_tsv ~{linxFusionTsv} \
+        -linx_breakend_tsv ~{linxBreakendTsv} \
+        -linx_driver_catalog_tsv ~{linxDriverCatalogTsv} \
+        -annotated_virus_tsv ~{annotatedVirusTsv} \
+        -chord_prediction_txt ~{chordPredictionTxt} \
+        -cuppa_result_csv ~{cuppaResultCsv} \
+        -output_dir ~{outputDir} \
+        -tumor_sample_id ~{tumorName} \
+        -ref_sample_id ~{referenceName} \
+        -patient_id not_used_because_primary_tumor_tsv_has_only_headers
+    }
+
+    output {
+        #TODO
+    }
+
+    runtime {
+        time_minutes: timeMinutes # !UnknownRuntimeKey
+        docker: dockerImage
+        memory: memory
+    }
+
+    parameter_meta {
+
+    }
+}
+
 task Sage {
     input {
         Array[String]+ tumorName
@@ -1369,6 +1458,45 @@ task Sage {
     }
 }
 
+task Sigs {
+    input {
+        String sampleName
+        File signaturesFile
+        File somaticVcfFile
+        File somaticVcfIndex
+        String outputDir = "./sigs"
+
+        String javaXmx = "4G"
+        String memory = "5GiB"
+        Int timeMinutes = 60
+        String dockerImage = "quay.io/biocontainers/hmftools-sigs:1.1--hdfd78af_0"
+    }
+
+    command {
+        set -e
+        mkdir -p ~{outputDir}
+        sigs -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \
+        -sample ~{sampleName} \
+        -signatures_file ~{signaturesFile} \
+        -somatic_vcf_file ~{somaticVcfFile} \
+        -output_dir ~{outputDir}
+    }
+
+    output {
+        File sigAllocationTsv = "~{outputDir}/~{sampleName}.sig.allocation.tsv"
+    }
+
+    runtime {
+        time_minutes: timeMinutes # !UnknownRuntimeKey
+        docker: dockerImage
+        memory: memory
+    }
+
+    parameter_meta {
+
+    }
+}
+
 task SvPrep {
     # for ref also add tumorJunctionFile
     input {
diff --git a/peach.wdl b/peach.wdl
index 7da029d0..39db7e08 100644
--- a/peach.wdl
+++ b/peach.wdl
@@ -30,7 +30,7 @@ task Peach {
         File panelJson
 
         String memory = "2GiB"
-        String dockerImage = "quay.io/biowdl/peach:v1.5"
+        String dockerImage = "quay.io/biowdl/peach:v1.7"
         Int timeMinutes = 5
     }
 
@@ -41,7 +41,7 @@ task Peach {
         --vcf ~{germlineVcf} \
         --sample_t_id ~{tumorName} \
         --sample_r_id ~{normalName} \
-        --tool_version 1.5 \
+        --tool_version 1.7 \
         --outputdir ~{outputDir} \
         --panel ~{panelJson}
     }

From 8c661fe8a7b2eeb9821b8d57cdc3c394727ba9ae Mon Sep 17 00:00:00 2001
From: DavyCats <davycats.dc@gmail.com>
Date: Fri, 21 Apr 2023 16:03:25 +0200
Subject: [PATCH 25/75] fix lint issue

---
 gridss.wdl | 1 -
 1 file changed, 1 deletion(-)

diff --git a/gridss.wdl b/gridss.wdl
index 789335a4..ee060251 100644
--- a/gridss.wdl
+++ b/gridss.wdl
@@ -562,7 +562,6 @@ task Virusbreakend {
         bam: {description: "A BAM file.", category: "required"}
         bamIndex: {description: "The index for the BAM file.", category: "required"}
         referenceFasta: {description: "The fasta of the reference genome.", category: "required"}
-        referenceImg: {description: "The BWA index image (generated with GATK BwaMemIndexImageCreator) of the reference.", category: "required"}
         virusbreakendDB: {description: "A .tar.gz containing the virusbreakend database.", category: "required"}
         outputPath: {description: "The path the output should be written to.", category: "common"}
         extraMemoryGB: {description: "Extra memory needed for the job in GB.", category: "advanced"}

From a41beefcaff763620da78a53646bd4e29751f351 Mon Sep 17 00:00:00 2001
From: DavyCats <davycats.dc@gmail.com>
Date: Tue, 25 Apr 2023 12:24:07 +0200
Subject: [PATCH 26/75] adjust AnnotateInsertedSequence inputs

---
 gridss.wdl | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/gridss.wdl b/gridss.wdl
index ee060251..067aa2f0 100644
--- a/gridss.wdl
+++ b/gridss.wdl
@@ -29,7 +29,7 @@ task AnnotateInsertedSequence {
         File viralReference
         File viralReferenceFai
         File viralReferenceDict
-        Array[File]+ viralReferenceBwaIndex
+        File viralReferenceBwaIndex
 
         Int threads = 8
         String javaXmx = "8G"
@@ -68,7 +68,7 @@ task AnnotateInsertedSequence {
         viralReference: {description: "A fasta file with viral sequences.", category: "required"}
         viralReferenceFai: {description: "The index for the viral reference fasta.", category: "required"}
         viralReferenceDict: {description: "The dict file for the viral reference.", category: "required"}
-        viralReferenceBwaIndex: {description: "The BWA index files of the viral reference.", category: "required"}
+        viralReferenceBwaIndex: {description: "The BWA index img file of the viral reference.", category: "required"}
 
         memory: {description: "The amount of memory this job will use.", category: "advanced"}
         javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.",

From 9bf0c753a42c455eb9a95f96af930fe21829ce6f Mon Sep 17 00:00:00 2001
From: DavyCats <davycats.dc@gmail.com>
Date: Fri, 28 Apr 2023 14:58:25 +0200
Subject: [PATCH 27/75] update svprep docker image

---
 hmftools.wdl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/hmftools.wdl b/hmftools.wdl
index 6d8a494b..2e8cdec9 100644
--- a/hmftools.wdl
+++ b/hmftools.wdl
@@ -1517,7 +1517,7 @@ task SvPrep {
         String javaXmx = "48G"
         String memory = "50GiB"
         Int timeMinutes = 120
-        String dockerImage = "quay.io/biocontainers/hmftools-sv-prep:1.1--hdfd78af_0"
+        String dockerImage = "quay.io/biocontainers/hmftools-sv-prep:1.1--hdfd78af_1"
     }
 
     command {

From 65fb7209b3d26913b4a1520198c682cfc312f2ba Mon Sep 17 00:00:00 2001
From: DavyCats <davycats.dc@gmail.com>
Date: Tue, 2 May 2023 11:29:16 +0200
Subject: [PATCH 28/75] typo

---
 gridss.wdl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gridss.wdl b/gridss.wdl
index 067aa2f0..9b6596b5 100644
--- a/gridss.wdl
+++ b/gridss.wdl
@@ -411,7 +411,7 @@ task GridssSvPrep {
         gridss_sv-prep \
         --steps all \
         --output ~{outputPath} \
-        --wirkingdir . \
+        --workingdir . \
         --reference ~{reference.fastaFile} \
         --jar /usr/local/share/gridss-2.13.2-1/gridss.jar \
         --blacklist ~{blacklistBed} \

From ae173ef9146d1759ba218774f19062bf6abed9bd Mon Sep 17 00:00:00 2001
From: DavyCats <davycats.dc@gmail.com>
Date: Tue, 2 May 2023 11:55:11 +0200
Subject: [PATCH 29/75] try using digest instead of tag

---
 gridss.wdl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gridss.wdl b/gridss.wdl
index 9b6596b5..0ffd4acf 100644
--- a/gridss.wdl
+++ b/gridss.wdl
@@ -404,7 +404,7 @@ task GridssSvPrep {
         Int nonJvmMemoryGb = 10
         Int threads = 10
         Int timeMinutes = ceil(7200 / threads) + 1800
-        String dockerImage = "quay.io/biowdl/gridss:2.13.2_1"
+        String dockerImage = "quay.io/biowdl/gridss@sha256:f70696fda4b6f2612b21539d49986cf31bee7542a9eb0269a9f718f99df3fb2a"
     }
 
     command {

From fb65c53c74446d89228cdbfa741b40cdbd16d857 Mon Sep 17 00:00:00 2001
From: DavyCats <davycats.dc@gmail.com>
Date: Tue, 2 May 2023 16:03:45 +0200
Subject: [PATCH 30/75] give SvPrepDepthAnnotator more time

---
 hmftools.wdl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/hmftools.wdl b/hmftools.wdl
index 2e8cdec9..45b132f4 100644
--- a/hmftools.wdl
+++ b/hmftools.wdl
@@ -1589,7 +1589,7 @@ task SvPrepDepthAnnotator {
         Int threads = 10
         String javaXmx = "48G"
         String memory = "50GiB"
-        Int timeMinutes = 120
+        Int timeMinutes = 240
         String dockerImage = "quay.io/biocontainers/hmftools-sv-prep:1.1--hdfd78af_0"
     }
 

From 5ee9d8e49761212a5d22f77e7738a58eda1cd6ff Mon Sep 17 00:00:00 2001
From: DavyCats <davycats.dc@gmail.com>
Date: Thu, 4 May 2023 12:01:07 +0200
Subject: [PATCH 31/75] update inputs for annotateinsertedsequences

---
 gridss.wdl | 12 +++---------
 1 file changed, 3 insertions(+), 9 deletions(-)

diff --git a/gridss.wdl b/gridss.wdl
index 0ffd4acf..6f4f3660 100644
--- a/gridss.wdl
+++ b/gridss.wdl
@@ -26,10 +26,7 @@ task AnnotateInsertedSequence {
     input {
         File inputVcf
         String outputPath = "gridss.annotated.vcf.gz"
-        File viralReference
-        File viralReferenceFai
-        File viralReferenceDict
-        File viralReferenceBwaIndex
+        BwaIndex viralReferenceBwaIndex
 
         Int threads = 8
         String javaXmx = "8G"
@@ -42,7 +39,7 @@ task AnnotateInsertedSequence {
         set -e
         _JAVA_OPTIONS="$_JAVA_OPTIONS -Xmx~{javaXmx}"
         AnnotateInsertedSequence \
-        REFERENCE_SEQUENCE=~{viralReference} \
+        REFERENCE_SEQUENCE=~{viralReferenceBwaIndex.fastaFile} \
         INPUT=~{inputVcf} \
         OUTPUT=~{outputPath} \
         ALIGNMENT=APPEND \
@@ -65,10 +62,7 @@ task AnnotateInsertedSequence {
     parameter_meta {
         inputVcf: {description: "The input VCF file.", category: "required"}
         outputPath: {description: "The path the output will be written to.", category: "common"}
-        viralReference: {description: "A fasta file with viral sequences.", category: "required"}
-        viralReferenceFai: {description: "The index for the viral reference fasta.", category: "required"}
-        viralReferenceDict: {description: "The dict file for the viral reference.", category: "required"}
-        viralReferenceBwaIndex: {description: "The BWA index img file of the viral reference.", category: "required"}
+        viralReferenceBwaIndex: {description: "The BWA index of the viral reference.", category: "required"}
 
         memory: {description: "The amount of memory this job will use.", category: "advanced"}
         javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.",

From 05270fe76c18f24288ddc42f6bcff67dc9eb3d10 Mon Sep 17 00:00:00 2001
From: DavyCats <davycats.dc@gmail.com>
Date: Fri, 5 May 2023 10:23:47 +0200
Subject: [PATCH 32/75] typo

---
 hmftools.wdl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/hmftools.wdl b/hmftools.wdl
index 45b132f4..1f919356 100644
--- a/hmftools.wdl
+++ b/hmftools.wdl
@@ -1167,7 +1167,7 @@ task Purple {
         ~{"-target_regions_ratios " + targetRegionsRatios} \
         ~{"-target_regions_msi_indels " + targetRegionsMsiIndels} \
         ~{"-min_diploid_tumor_ratio_count " + minDiploidTumorRatioCount} \
-        ~{"-min_diploid_tumor_ratio_count_centromere" + minDiploidTumorRatioCountCentromere} \ 
+        ~{"-min_diploid_tumor_ratio_count_centromere" + minDiploidTumorRatioCountCentromere} \
         -threads ~{threads}
     }
 

From 646ccc523ff98278d206dd7f89f267e2f305af17 Mon Sep 17 00:00:00 2001
From: DavyCats <davycats.dc@gmail.com>
Date: Fri, 5 May 2023 12:23:03 +0200
Subject: [PATCH 33/75] fix linx outputs

---
 hmftools.wdl | 33 +++++++++++++++++----------------
 1 file changed, 17 insertions(+), 16 deletions(-)

diff --git a/hmftools.wdl b/hmftools.wdl
index 1f919356..143bdc00 100644
--- a/hmftools.wdl
+++ b/hmftools.wdl
@@ -610,27 +610,28 @@ task Linx {
         ~{if germline then "-germline" else ""}
     }
 
+    String prefix = if germline then "~{sampleName}.linx.germline" else "~{sampleName}.linx"
+
     output {
-        File driverCatalog = "~{outputDir}/~{sampleName}.linx.driver.catalog.tsv"
-        File linxBreakend = "~{outputDir}/~{sampleName}.linx.breakend.tsv"
-        File linxClusters = "~{outputDir}/~{sampleName}.linx.clusters.tsv"
-        File linxDrivers = "~{outputDir}/~{sampleName}.linx.drivers.tsv"
-        File linxFusion = "~{outputDir}/~{sampleName}.linx.fusion.tsv"
-        File linxLinks = "~{outputDir}/~{sampleName}.linx.links.tsv"
-        File linxSvs = "~{outputDir}/~{sampleName}.linx.svs.tsv"
-        File linxVisCopyNumber = "~{outputDir}/~{sampleName}.linx.vis_copy_number.tsv"
-        File? linxVisFusion = "~{outputDir}/~{sampleName}.linx.vis_fusion.tsv"
-        File? linxVisGeneExon = "~{outputDir}/~{sampleName}.linx.vis_gene_exon.tsv"
-        File? linxVisProteinDomain = "~{outputDir}/~{sampleName}.linx.vis_protein_domain.tsv"
-        File? linxVisSegments = "~{outputDir}/~{sampleName}.linx.vis_segments.tsv"
-        File? linxVisSvData = "~{outputDir}/~{sampleName}.linx.vis_sv_data.tsv"
-        File? linxGermlineDriverCatalogTsv = "~{outputDir}/~{sampleName}.linx.germline.driver.catalog.tsv"
-        File? linxGermlineDisruptionTsv = "~{outputDir}/~{sampleName}.linx.germline.disruption.tsv"
+        File driverCatalog = "~{outputDir}/~{prefix}.driver.catalog.tsv"
+        File linxClusters = "~{outputDir}/~{prefix}.clusters.tsv"
+        File linxLinks = "~{outputDir}/~{prefix}.links.tsv"
+        File linxSvs = "~{outputDir}/~{prefix}.svs.tsv"
+        File? linxBreakend = "~{outputDir}/~{prefix}.breakend.tsv"
+        File? linxDrivers = "~{outputDir}/~{prefix}.drivers.tsv"
+        File? linxFusion = "~{outputDir}/~{prefix}.fusion.tsv"
+        File? linxVisCopyNumber = "~{outputDir}/~{prefix}.vis_copy_number.tsv"
+        File? linxVisFusion = "~{outputDir}/~{prefix}.vis_fusion.tsv"
+        File? linxVisGeneExon = "~{outputDir}/~{prefix}.vis_gene_exon.tsv"
+        File? linxVisProteinDomain = "~{outputDir}/~{prefix}.vis_protein_domain.tsv"
+        File? linxVisSegments = "~{outputDir}/~{prefix}.vis_segments.tsv"
+        File? linxVisSvData = "~{outputDir}/~{prefix}.vis_sv_data.tsv"
+        File? linxDisruptionTsv = "~{outputDir}/~{prefix}.disruption.tsv"
         File linxVersion = "~{outputDir}/linx.version"
         Array[File] outputs = select_all([driverCatalog, linxBreakend, linxClusters, linxDrivers, linxFusion,
                                linxLinks, linxSvs, linxVisCopyNumber, linxVisFusion,
                                linxVisGeneExon, linxVisProteinDomain, linxVisSegments, linxVisSvData,
-                               linxGermlineDriverCatalogTsv, linxGermlineDisruptionTsv, linxVersion])
+                               linxDisruptionTsv, linxVersion])
     }
 
     runtime {

From 0260a7a0eb72b82bd615cc4e598bb4a4adf1dbe9 Mon Sep 17 00:00:00 2001
From: DavyCats <davycats.dc@gmail.com>
Date: Fri, 12 May 2023 14:05:09 +0200
Subject: [PATCH 34/75] cleanup

---
 gridss.wdl   | 2 +-
 hmftools.wdl | 1 -
 2 files changed, 1 insertion(+), 2 deletions(-)

diff --git a/gridss.wdl b/gridss.wdl
index 6f4f3660..c9ba9bac 100644
--- a/gridss.wdl
+++ b/gridss.wdl
@@ -339,7 +339,7 @@ task GridssAnnotateVcfRepeatmasker {
 
         String memory = "25GiB"
         Int threads = 8
-        String dockerImage = "quay.io/biocontainers/gridss:2.13.2--h20b1175_1" #TODO check if we still need our own patched image
+        String dockerImage = "quay.io/biocontainers/gridss:2.13.2--h20b1175_1"
         Int timeMinutes = 1440
     }
 
diff --git a/hmftools.wdl b/hmftools.wdl
index 143bdc00..f6fb261b 100644
--- a/hmftools.wdl
+++ b/hmftools.wdl
@@ -826,7 +826,6 @@ task Orange {
         -driver_gene_panel_tsv ~{driverGenePanel} \
         -known_fusion_file ~{knownFusionFile}
     }
-    #TODO may need to be added: -pipeline_version_file ~{pipelineVersionFile}
 
     output {
         File orangeJson = "~{outputDir}/~{tumorName}.orange.json"

From 1b4467c982a1ff06f434679bf9eb65d3edac62fc Mon Sep 17 00:00:00 2001
From: DavyCats <davycats.dc@gmail.com>
Date: Tue, 16 May 2023 10:30:21 +0200
Subject: [PATCH 35/75] typo

---
 hmftools.wdl | 2 +-
 peach.wdl    | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/hmftools.wdl b/hmftools.wdl
index f6fb261b..1f6eb8f9 100644
--- a/hmftools.wdl
+++ b/hmftools.wdl
@@ -531,7 +531,7 @@ task Lilac {
         -ref_genome ~{referenceFasta} \
         -ref_genome_version ~{refGenomeVersion} \
         -resource_dir ~{sub(hlaRefAminoacidSequencesCsv, basename(hlaRefAminoacidSequencesCsv), "")} \
-        -outputDir ~{outputDir} \
+        -output_dir ~{outputDir} \
         -threads ~{threads} \
         ~{"-tumor_bam " + tumorBam} \
         ~{"-gene_copy_number " + geneCopyNumberFile} \
diff --git a/peach.wdl b/peach.wdl
index 39db7e08..9ace8958 100644
--- a/peach.wdl
+++ b/peach.wdl
@@ -30,7 +30,7 @@ task Peach {
         File panelJson
 
         String memory = "2GiB"
-        String dockerImage = "quay.io/biowdl/peach:v1.7"
+        String dockerImage = "quay.io/biowdl/peach@sha256:025dc28fe448256729a6022d4d30deaee8105ab83d123dab9640251985240748"
         Int timeMinutes = 5
     }
 

From 143d5132e7f3b25137c6b1533586a4aa1436c3ca Mon Sep 17 00:00:00 2001
From: DavyCats <davycats.dc@gmail.com>
Date: Tue, 16 May 2023 12:55:10 +0200
Subject: [PATCH 36/75] use digest instead of tag for cuppa docker

---
 hmftools.wdl | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/hmftools.wdl b/hmftools.wdl
index 1f6eb8f9..b25ce28f 100644
--- a/hmftools.wdl
+++ b/hmftools.wdl
@@ -189,7 +189,7 @@ task CupGenerateReport {
 
         String memory = "5GiB"
         Int timeMinutes = 10
-        String dockerImage = "quay.io/biowdl/cuppa:1.7.1"
+        String dockerImage = "quay.io/biowdl/cuppa@sha256:e76d367a3226068967fb64ad6adaa889cbdcc01397075b0cbc382bbba4350b98"
     }
 
     # This script writes to the directory that the input is located in.
@@ -254,7 +254,7 @@ task Cuppa {
         String javaXmx = "4G"
         String memory = "5GiB"
         Int timeMinutes = 10
-        String dockerImage = "quay.io/biowdl/cuppa:1.7.1"
+        String dockerImage = "quay.io/biowdl/cuppa@sha256:e76d367a3226068967fb64ad6adaa889cbdcc01397075b0cbc382bbba4350b98"
     }
 
     command {
@@ -304,7 +304,7 @@ task CuppaChart {
 
         String memory = "4GiB"
         Int timeMinutes = 5
-        String dockerImage = "quay.io/biowdl/cuppa:1.7.1"
+        String dockerImage = "quay.io/biowdl/cuppa@sha256:e76d367a3226068967fb64ad6adaa889cbdcc01397075b0cbc382bbba4350b98"
     }
 
     command {

From b570e7ce3c42e3c95c2f1affd402367802c54e3f Mon Sep 17 00:00:00 2001
From: DavyCats <davycats.dc@gmail.com>
Date: Tue, 16 May 2023 14:19:44 +0200
Subject: [PATCH 37/75] fix issue with java permissions in orange

---
 hmftools.wdl | 1 +
 1 file changed, 1 insertion(+)

diff --git a/hmftools.wdl b/hmftools.wdl
index b25ce28f..f5d690a2 100644
--- a/hmftools.wdl
+++ b/hmftools.wdl
@@ -780,6 +780,7 @@ task Orange {
     command {
         set -e
         mkdir -p ~{outputDir}
+        export JAVA_TOOL_OPTIONS='--add-opens=java.base/java.time=ALL-UNNAMED'
         orange -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \
         -output_dir ~{outputDir} \
         -ref_genome_version ~{if hg38 then "38" else "37"} \

From 5bc6e31955c58755149b6b5231b45ed5edf6c7fc Mon Sep 17 00:00:00 2001
From: DavyCats <davycats.dc@gmail.com>
Date: Mon, 22 May 2023 15:15:12 +0200
Subject: [PATCH 38/75] add output for hmftools rose

---
 hmftools.wdl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/hmftools.wdl b/hmftools.wdl
index f5d690a2..90d6828e 100644
--- a/hmftools.wdl
+++ b/hmftools.wdl
@@ -1323,7 +1323,7 @@ task Rose {
     }
 
     output {
-        #TODO
+        File roseTsv = "~{outputDir}/~{tumorName}.rose.tsv"
     }
 
     runtime {

From c9f9172de1eca1a52d87f7a881b157df2611e945 Mon Sep 17 00:00:00 2001
From: DavyCats <davycats.dc@gmail.com>
Date: Fri, 26 May 2023 15:38:38 +0200
Subject: [PATCH 39/75] fix timeMinutes for fastp

---
 fastp.wdl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fastp.wdl b/fastp.wdl
index 9849738b..7df0a8f3 100644
--- a/fastp.wdl
+++ b/fastp.wdl
@@ -42,7 +42,7 @@ task Fastp {
         
         Int threads = 4
         String memory = "50GiB"
-        Int timeMinutes = 1 + ceil(size([read1, read2], "G")  * 6.0 / threads)
+        Int timeMinutes = 1 + ceil(size([read1, read2], "G")  * 7.0 / select_first([effectiveSplit, threads]))
         String dockerImage = "quay.io/biocontainers/fastp:0.23.2--h5f740d0_3"
 
         Int? noneInt

From 99a299981c7dbb0ade363d04c56ad063dda83265 Mon Sep 17 00:00:00 2001
From: DavyCats <davycats.dc@gmail.com>
Date: Tue, 30 May 2023 09:58:38 +0200
Subject: [PATCH 40/75] increase sambamaba markdup time

---
 sambamba.wdl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sambamba.wdl b/sambamba.wdl
index fb6c83de..51077ec0 100644
--- a/sambamba.wdl
+++ b/sambamba.wdl
@@ -84,7 +84,7 @@ task Markdup {
         # Added 8192 mb as a margin of safety. Real life use with this setting uses 2.7 GiB.
         Int memoryMb = 8192 + sortBufferSize + 2 * ioBufferSize
         # Time minute calculation does not work well for higher number of threads.
-        Int timeMinutes = 1 + ceil(size(inputBams, "GiB") * 25) / threads
+        Int timeMinutes = 1 + ceil(size(inputBams, "GiB") * 30) / threads
         String dockerImage = "quay.io/biocontainers/sambamba:0.7.1--h148d290_2"
     }
 

From 1a2ad6810445d70386422f0df120a3a02e149687 Mon Sep 17 00:00:00 2001
From: DavyCats <davycats.dc@gmail.com>
Date: Wed, 31 May 2023 11:42:08 +0200
Subject: [PATCH 41/75] increase time sambamaba markdup

---
 sambamba.wdl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sambamba.wdl b/sambamba.wdl
index 51077ec0..acf40278 100644
--- a/sambamba.wdl
+++ b/sambamba.wdl
@@ -84,7 +84,7 @@ task Markdup {
         # Added 8192 mb as a margin of safety. Real life use with this setting uses 2.7 GiB.
         Int memoryMb = 8192 + sortBufferSize + 2 * ioBufferSize
         # Time minute calculation does not work well for higher number of threads.
-        Int timeMinutes = 1 + ceil(size(inputBams, "GiB") * 30) / threads
+        Int timeMinutes = 1 + ceil(size(inputBams, "GiB") * 35) / threads
         String dockerImage = "quay.io/biocontainers/sambamba:0.7.1--h148d290_2"
     }
 

From 4fe8370fb723a0a1449abbe6fb30f677c12d3f42 Mon Sep 17 00:00:00 2001
From: DavyCats <davycats.dc@gmail.com>
Date: Fri, 2 Jun 2023 14:27:29 +0200
Subject: [PATCH 42/75] add missing purple output

---
 hmftools.wdl | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/hmftools.wdl b/hmftools.wdl
index 90d6828e..2c043dd8 100644
--- a/hmftools.wdl
+++ b/hmftools.wdl
@@ -1215,7 +1215,8 @@ task Purple {
             purpleCnvSomaticTsv, purplePurityRangeTsv, purplePurityTsv, purpleQc,
             purpleSegmentTsv, purpleSomaticClonalityTsv, purpleSomaticHistTsv,
             purpleSomaticVcf, purpleSomaticVcfIndex, purpleSvVcf, purpleSvVcfIndex,
-            purpleVersion, purpleGermlineVcf, purpleGermlineVcfIndex, driverCatalogGermlineTsv]
+            purpleVersion, purpleGermlineVcf, purpleGermlineVcfIndex, driverCatalogGermlineTsv,
+            purpleGermlineDeletionTsv]
         Array[File] plots = select_all([circosPlot, copynumberPlot, inputPlot, mapPlot, purityRangePlot,
             segmentPlot, somaticClonalityPlot, somaticPlot, somaticRainfallPlot])
         Array[File] circos = [circosNormalRatio, circosConf, circosIndel, circosLink,

From 52cfccef8c352659e281382309b9eef5c3c9bad5 Mon Sep 17 00:00:00 2001
From: DavyCats <davycats.dc@gmail.com>
Date: Tue, 6 Jun 2023 14:19:15 +0200
Subject: [PATCH 43/75] adjust outputs for hmftools

---
 extractSigPredictHRD.wdl |  2 +-
 hmftools.wdl             | 25 +++++++++++++++++--------
 2 files changed, 18 insertions(+), 9 deletions(-)

diff --git a/extractSigPredictHRD.wdl b/extractSigPredictHRD.wdl
index 1520b608..c9e4c67f 100644
--- a/extractSigPredictHRD.wdl
+++ b/extractSigPredictHRD.wdl
@@ -22,7 +22,7 @@ version 1.0
 
 task ExtractSigPredictHRD {
     input {
-        String outputDir = "."
+        String outputDir = "./chord"
         String sampleName
         File snvIndelVcf
         File snvIndelVcfIndex
diff --git a/hmftools.wdl b/hmftools.wdl
index 2c043dd8..23efa1ae 100644
--- a/hmftools.wdl
+++ b/hmftools.wdl
@@ -70,7 +70,8 @@ task Amber {
         File normalSnpVcf = "~{outputDir}/~{referenceName}.amber.snp.vcf.gz"
         File normalSnpVcfIndex = "~{outputDir}/~{referenceName}.amber.snp.vcf.gz.tbi"
         Array[File] outputs = [version, tumorBafPcf, tumorBafTsv, tumorContaminationVcf,
-            tumorContaminationVcfIndex, tumorContaminationTsv, tumorQc, normalSnpVcf, normalSnpVcfIndex]
+            tumorContaminationVcfIndex, tumorContaminationTsv, tumorQc, normalHomozygousregionsTsv,
+            normalSnpVcf, normalSnpVcfIndex]
     }
 
     runtime {
@@ -541,6 +542,7 @@ task Lilac {
     output {
         File lilacCsv = "~{outputDir}/~{sampleName}.lilac.csv"
         File lilacQcCsv = "~{outputDir}/~{sampleName}.lilac.qc.csv"
+        File candidatesCoverageCsv = "~{outputDir}/~{sampleName}.candidates.coverage.csv"
     }
 
     runtime {
@@ -668,7 +670,7 @@ task Linx {
 
 task LinxVisualisations {
     input {
-        String outputDir = "./linx_visualisation"
+        String outputDir = "./linx"
         String sample
         String refGenomeVersion
         Array[File]+ linxOutput
@@ -1002,7 +1004,7 @@ task Protect {
         String tumorName
         String referenceName
         Array[String]+ sampleDoids
-        String outputDir = "."
+        String outputDir = "./protect"
         Array[File]+ serveActionability
         File doidJson
         File purplePurity
@@ -1410,14 +1412,20 @@ task Sage {
         ~{"-low_confidence_min_tumor_qual " + lowConfidenceMinTumorQual}
     }
 
+    String outputDir = sub(outputPath, basename(outputPath), "")
+
     output { #FIXME does it produce multiple plots/tsvs if multiple samples are given?
         File outputVcf = outputPath
         File outputVcfIndex = outputPath + ".tbi"
-        File? referenceSageBqrPng = "~{referenceName[0]}.sage.bqr.png"
-        File? referenceSageBqrTsv = "~{referenceName[0]}.sage.bqr.tsv"
-        File tumorSageBqrPng = "~{tumorName[0]}.sage.bqr.png"
-        File tumorSageBqrTsv = "~{tumorName[0]}.sage.bqr.tsv"
-        File sageGeneCoverageTsv = "~{tumorName[0]}.sage.gene.coverage.tsv"
+        File? referenceSageBqrPng = "~{outputDir}/~{referenceName[0]}.sage.bqr.png"
+        File? referenceSageBqrTsv = "~{outputDir}/~{referenceName[0]}.sage.bqr.tsv"
+        File tumorSageBqrPng = "~{outputDir}/~{tumorName[0]}.sage.bqr.png"
+        File tumorSageBqrTsv = "~{outputDir}/~{tumorName[0]}.sage.bqr.tsv"
+        File sageGeneCoverageTsv = "~{outputDir}/~{tumorName[0]}.sage.gene.coverage.tsv"
+        File referenceSageExonMediansTsv = "~{outputDir}/~{tumorName[0]}.sage.exon.medians.tsv"
+        Array[File] outputs = select_all([outputVcf, outputVcfIndex, referenceSageBqrPng,
+                                          referenceSageBqrTsv, tumorSageBqrPng, tumorSageBqrTsv,
+                                          sageGeneCoverageTsv, referenceSageExonMediansTsv])
     }
 
     runtime {
@@ -1486,6 +1494,7 @@ task Sigs {
 
     output {
         File sigAllocationTsv = "~{outputDir}/~{sampleName}.sig.allocation.tsv"
+        File sigSnvCountsCsv = "~{outputDir}/~{sampleName}.sig.snv_counts.csv"
     }
 
     runtime {

From 6194b2d1352e2020d3f42982ebc7c9d2f4d21108 Mon Sep 17 00:00:00 2001
From: DavyCats <davycats.dc@gmail.com>
Date: Thu, 13 Jul 2023 14:37:33 +0200
Subject: [PATCH 44/75] add writeNeoEpitopes option to linx

---
 hmftools.wdl | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/hmftools.wdl b/hmftools.wdl
index 23efa1ae..d494a5eb 100644
--- a/hmftools.wdl
+++ b/hmftools.wdl
@@ -575,6 +575,7 @@ task Linx {
         Boolean checkFusions = true
         Boolean checkDrivers = true
         Boolean writeVisData = true
+        Boolean writeNeoEpitopes = false
         #The following should be in the same directory.
         File geneDataCsv
         File proteinFeaturesCsv
@@ -609,6 +610,7 @@ task Linx {
         -driver_gene_panel ~{driverGenePanel} \
         ~{if writeVisData then "-write_vis_data" else ""} \
         ~{if writeAllVisFusions then "-write_all_vis_fusions" else ""} \
+        ~{if writeNeoEpitopes then "-write_neo_epitopes" else ""} \
         ~{if germline then "-germline" else ""}
     }
 
@@ -654,6 +656,7 @@ task Linx {
         knownFusionCsv: {description: "A CSV file describing known fusions.", category: "required"}
         driverGenePanel: {description: "A TSV file describing the driver gene panel.", category: "required"}
         writeAllVisFusions: {description: "Equivalent to the -write_all_vis_fusions flag.", category: "advanced"}
+        writeNeoEpitopes: {description: "Equivalent to the -write_neo_epitopes flag.", category: "advanced"}
         geneDataCsv: {description: "A  CSV file containing gene information, must be in the same directory as `proteinFeaturesCsv`, `transExonDataCsv` and `transSpliceDataCsv`.", category: "required"}
         proteinFeaturesCsv: {description: "A  CSV file containing protein feature information, must be in the same directory as `geneDataCsv`, `transExonDataCsv` and `transSpliceDataCsv`.", category: "required"}
         transExonDataCsv: {description: "A  CSV file containing transcript exon information, must be in the same directory as `geneDataCsv`, `proteinFeaturesCsv` and `transSpliceDataCsv`.", category: "required"}

From c3e29085e54becb14ac5b492164b1eb090417e04 Mon Sep 17 00:00:00 2001
From: DavyCats <davycats.dc@gmail.com>
Date: Tue, 18 Jul 2023 11:30:57 +0200
Subject: [PATCH 45/75] add task for hmftools neo

---
 hmftools.wdl | 70 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 70 insertions(+)

diff --git a/hmftools.wdl b/hmftools.wdl
index d494a5eb..7e82ecc8 100644
--- a/hmftools.wdl
+++ b/hmftools.wdl
@@ -727,6 +727,76 @@ task LinxVisualisations {
     }
 }
 
+task Neo {
+    input {
+        String sampleId
+        File somaticVcf
+        File somaticVcfIndex
+        Array[File]+ linxOutput
+        String refGenomeVersion
+        File referenceFasta
+        File referenceFastaFai
+        File referenceFastaDict
+        String outputDir = "./neo"
+        #The following should be in the same directory.
+        File geneDataCsv
+        File proteinFeaturesCsv
+        File transExonDataCsv
+        File transSpliceDataCsv
+
+        String memory = "9GiB"
+        String javaXmx = "8G"
+        Int timeMinutes = 1440
+        String dockerImage = "quay.io/biocontainers/hmftools-neo:1.0.1--hdfd78af_0"
+    }
+
+    command {
+        set -e
+        mkdir -p ~{outputDir}
+        neo -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \
+        -sample ~{sampleId} \
+        -ref_genome_version ~{refGenomeVersion} \
+        -ref_genome ~{referenceFasta} \
+        -ensembl_data_dir ~{sub(geneDataCsv, basename(geneDataCsv), "")} \
+        -linx_dir ~{sub(linxOutput[0], basename(linxOutput[0]), "")} \
+        -somatic_vcf ~{somaticVcf} \
+        -output_dir ~{outputDir}
+    }
+
+    output {
+        File neoData = "~{outputDir}/~{sampleId}.neo.neo_data.tsv"
+    }
+
+    runtime {
+        time_minutes: timeMinutes # !UnknownRuntimeKey
+        docker: dockerImage
+        memory: memory
+    }
+
+    parameter_meta {
+        sampleId: {description: "The name/id of the sample.", category: "required"}
+        somaticVcf: {description: "The vcf containing the samples's somatic variants.", category: "required"}
+        somaticVcfIndex: {description: "The vcf containing the samples's somatic variants.", category: "required"}
+        linxOutput: {description: "The directory containing the linx output.", category: "required"}
+        refGenomeVersion: {description: "The version of the genome assembly used for alignment. Either \"37\" or \"38\".", category: "required"}
+        referenceFasta: {description: "The reference fasta file.", category: "required"}
+        referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", category: "required"}
+        referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"}
+        outputDir: {description: "The directory the outputs will be written to.", category: "required"}
+        geneDataCsv: {description: "A  CSV file containing gene information, must be in the same directory as `proteinFeaturesCsv`, `transExonDataCsv` and `transSpliceDataCsv`.", category: "required"}
+        proteinFeaturesCsv: {description: "A  CSV file containing protein feature information, must be in the same directory as `geneDataCsv`, `transExonDataCsv` and `transSpliceDataCsv`.", category: "required"}
+        transExonDataCsv: {description: "A  CSV file containing transcript exon information, must be in the same directory as `geneDataCsv`, `proteinFeaturesCsv` and `transSpliceDataCsv`.", category: "required"}
+        transSpliceDataCsv: {description: "A  CSV file containing transcript splicing information, must be in the same directory as `geneDataCsv`, `proteinFeaturesCsv` and `transExonDataCsv`.", category: "required"}
+
+        memory: {description: "The amount of memory this job will use.", category: "advanced"}
+        javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.",
+                  category: "advanced"}
+        timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"}
+        dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.",
+                      category: "advanced"}
+    }
+}
+
 task Orange {
     input {
         String outputDir = "./orange"

From 0a7cfc98bf197d031acb2a19c2fa6d73f9ceca33 Mon Sep 17 00:00:00 2001
From: DavyCats <davycats.dc@gmail.com>
Date: Tue, 18 Jul 2023 11:43:39 +0200
Subject: [PATCH 46/75] add parameter_meta to lilac

---
 hmftools.wdl | 25 +++++++++++++++++++++++++
 1 file changed, 25 insertions(+)

diff --git a/hmftools.wdl b/hmftools.wdl
index 7e82ecc8..38f5124b 100644
--- a/hmftools.wdl
+++ b/hmftools.wdl
@@ -553,7 +553,32 @@ task Lilac {
     }
 
     parameter_meta {
+        sampleName: {description: "The name of the sample.", category: "required"}
+        referenceBam: {description: "The bam file for the reference sample.", category: "required"}
+        referenceBamIndex: {description: "The index for the reference sample's bam file.", category: "required"}
+        tumorBam: {description: "The bam file for the tumor sample.", category: "common"}
+        tumorBamIndex: {description: "The index for the tumor sample's bam file.", category: "required"}
+        refGenomeVersion: {description: "The version of the genome assembly used for alignment. Either \"37\" or \"38\".", category: "required"}
+        referenceFasta: {description: "The reference fasta file.", category: "required"}
+        referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", category: "required"}
+        referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"}
+        geneCopyNumberFile: {description: "Gene copy number file produced by purple.", category: "common"}
+        somaticVariantsFile: {description: "Somatic variant VCF produced by purple.", category: "common"}
+        somaticVariantsFileIndex: {description: "Index for the somatic variant VCf produced by purple.", category: "common"}
+        outputDir: {description: "The directory the outputs will be written to.", category: "required"}
+
+        #The following need to be in the same directory
+        hlaRefAminoacidSequencesCsv: {description: "LILAC reference file.", category: "required"}
+        hlaRefNucleotideSequencesCsv: {description: "LILAC reference file.", category: "required"}
+        lilacAlleleFrequenciesCsv: {description: "LILAC reference file.", category: "required"}
 
+        memory: {description: "The amount of memory this job will use.", category: "advanced"}
+        javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.",
+                  category: "advanced"}
+        timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"}
+        threads: {description: "The number of threads to use", category: "advanced"}
+        dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.",
+                      category: "advanced"}
     }
 }
 

From 290a3fc2232f0508fdcbe080442c2b4b8e142ec1 Mon Sep 17 00:00:00 2001
From: DavyCats <davycats.dc@gmail.com>
Date: Tue, 18 Jul 2023 14:35:40 +0200
Subject: [PATCH 47/75] add task for hmftools NeoScorer

---
 hmftools.wdl | 98 ++++++++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 96 insertions(+), 2 deletions(-)

diff --git a/hmftools.wdl b/hmftools.wdl
index 38f5124b..537e2bfa 100644
--- a/hmftools.wdl
+++ b/hmftools.wdl
@@ -566,8 +566,6 @@ task Lilac {
         somaticVariantsFile: {description: "Somatic variant VCF produced by purple.", category: "common"}
         somaticVariantsFileIndex: {description: "Index for the somatic variant VCf produced by purple.", category: "common"}
         outputDir: {description: "The directory the outputs will be written to.", category: "required"}
-
-        #The following need to be in the same directory
         hlaRefAminoacidSequencesCsv: {description: "LILAC reference file.", category: "required"}
         hlaRefNucleotideSequencesCsv: {description: "LILAC reference file.", category: "required"}
         lilacAlleleFrequenciesCsv: {description: "LILAC reference file.", category: "required"}
@@ -822,6 +820,102 @@ task Neo {
     }
 }
 
+task neoScorer {
+    input {
+        String sampleId
+        String refGenomeVersion
+        File referenceFasta
+        File referenceFastaFai
+        File referenceFastaDict
+        Array[File]+ neoBindingFiles
+        String neoBindingFileId = "cmb_02"
+        File cancerTpmMedians
+        File neoData
+        Array[File]+ lilacOutput
+        Array[File]+ purpleOutput
+        String outputDir = "./neo"
+
+        #The following should be in the same directory.
+        File geneDataCsv
+        File proteinFeaturesCsv
+        File transExonDataCsv
+        File transSpliceDataCsv
+
+        String? cancerType
+        Array[File]? isofoxOutput
+        File? rnaSomaticVcf
+        File? rnaSomaticVcfIndex
+
+        String memory = "9GiB"
+        String javaXmx = "8G"
+        Int timeMinutes = 1440
+        String dockerImage = "quay.io/biocontainers/hmftools-neo:1.0.1--hdfd78af_0"
+    }
+
+    String isofoxDir = sub(select_first([isofoxOutput, [""]])[0], basename(select_first([isofoxOutput, [""]])[0]), "")
+
+    command {
+        set -e
+        mkdir -p ~{outputDir}
+        neo com.hartwig.hmftools.neo.scorer.NeoScorer Xmx~{javaXmx} -XX:ParallelGCThreads=1 \
+        -sample ~{sampleId} \
+        ~{"-cancer_type " + cancerType} \
+        -ref_genome_version ~{refGenomeVersion} \
+        -ref_genome ~{referenceFasta} \
+        -ensembl_data_dir ~{sub(geneDataCsv, basename(geneDataCsv), "")} \
+        -score_file_dir ~{sub(neoBindingFiles[0], basename(neoBindingFiles[0]), "")} \
+        -score_file_id ~{neoBindingFileId} \
+        -cancer_tpm_medians_file ~{cancerTpmMedians} \
+        -neo_dir ~{sub(neoData, basename(neoData), "")} \
+        ~{if defined(isofoxOutput) then "-isofox_dir " + isofoxDir else ""} \
+        -lilac_dir ~{sub(lilacOutput[0], basename(lilacOutput[0]), "")} \
+        -purple_dir ~{sub(purpleOutput[0], basename(purpleOutput[0]), "")} \
+        ~{"-rna_somatic_vcf " + rnaSomaticVcf} \
+        -output_dir ~{outputDir}
+    }
+
+    output {
+        File neoepitopes = "~{outputDir}/~{sampleId}.neo.neoepitope.tsv"
+        File peptideScores = "~{outputDir}/~{sampleId}.neo.peptide_scores.tsv"
+    }
+
+    runtime {
+        time_minutes: timeMinutes # !UnknownRuntimeKey
+        docker: dockerImage
+        memory: memory
+    }
+
+    parameter_meta {
+        sampleId: {description: "The name/id of the sample.", category: "required"}
+        refGenomeVersion: {description: "The version of the genome assembly used for alignment. Either \"37\" or \"38\".", category: "required"}
+        referenceFasta: {description: "The reference fasta file.", category: "required"}
+        referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", category: "required"}
+        referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"}
+        neoBindingFiles: {description: "The neo binding reference files.", category: "required"}
+        neoBindingFileId: {description: "The neo binding reference file version id.", category: "required"}
+        cancerTpmMedians: {description: "HMF RNA cohort transcript median TPM file.", category: "required"}
+        neoData: {description: "Data file produced by neo.", category: "required"}
+        lilacOutput: {description: "The output produced by lilac.", category: "required"}
+        purpleOutput: {description: "The output produced by purple.", category: "required"}
+        outputDir: {description: "The directory the outputs will be written to.", category: "required"}
+        geneDataCsv: {description: "A  CSV file containing gene information, must be in the same directory as `proteinFeaturesCsv`, `transExonDataCsv` and `transSpliceDataCsv`.", category: "required"}
+        proteinFeaturesCsv: {description: "A  CSV file containing protein feature information, must be in the same directory as `geneDataCsv`, `transExonDataCsv` and `transSpliceDataCsv`.", category: "required"}
+        transExonDataCsv: {description: "A  CSV file containing transcript exon information, must be in the same directory as `geneDataCsv`, `proteinFeaturesCsv` and `transSpliceDataCsv`.", category: "required"}
+        transSpliceDataCsv: {description: "A  CSV file containing transcript splicing information, must be in the same directory as `geneDataCsv`, `proteinFeaturesCsv` and `transExonDataCsv`.", category: "required"}
+        cancerType: {description: "The cancer type.", category: "common"}
+        isofoxOutput: {description: "The output produced by isofox.", category: "common"}
+        rnaSomaticVcf: {description: "SageAppend produced rna somatic VCF file.", category: "common"}
+        rnaSomaticVcfIndex: {description: "Index for the rna somatic VCF file.", category: "common"}
+
+        memory: {description: "The amount of memory this job will use.", category: "advanced"}
+        javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.",
+                  category: "advanced"}
+        timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"}
+        dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.",
+                      category: "advanced"}
+    }
+}
+
 task Orange {
     input {
         String outputDir = "./orange"

From 2f11393a46e201bae32703fa4d2dc66222e8982b Mon Sep 17 00:00:00 2001
From: DavyCats <davycats.dc@gmail.com>
Date: Wed, 19 Jul 2023 12:49:45 +0200
Subject: [PATCH 48/75] fix some outputs and typo

---
 hmftools.wdl | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/hmftools.wdl b/hmftools.wdl
index 537e2bfa..ca6aee78 100644
--- a/hmftools.wdl
+++ b/hmftools.wdl
@@ -543,6 +543,7 @@ task Lilac {
         File lilacCsv = "~{outputDir}/~{sampleName}.lilac.csv"
         File lilacQcCsv = "~{outputDir}/~{sampleName}.lilac.qc.csv"
         File candidatesCoverageCsv = "~{outputDir}/~{sampleName}.candidates.coverage.csv"
+        Array[File] outputs = [lilacCsv, lilacQcCsv, candidatesCoverageCsv]
     }
 
     runtime {
@@ -654,11 +655,12 @@ task Linx {
         File? linxVisSegments = "~{outputDir}/~{prefix}.vis_segments.tsv"
         File? linxVisSvData = "~{outputDir}/~{prefix}.vis_sv_data.tsv"
         File? linxDisruptionTsv = "~{outputDir}/~{prefix}.disruption.tsv"
+        File? linxNeoepitopeTsv = "~{outputDir}/~{prefix}.neoepitope.tsv"
         File linxVersion = "~{outputDir}/linx.version"
         Array[File] outputs = select_all([driverCatalog, linxBreakend, linxClusters, linxDrivers, linxFusion,
                                linxLinks, linxSvs, linxVisCopyNumber, linxVisFusion,
                                linxVisGeneExon, linxVisProteinDomain, linxVisSegments, linxVisSvData,
-                               linxDisruptionTsv, linxVersion])
+                               linxDisruptionTsv, linxNeoepitopeTsv, linxVersion])
     }
 
     runtime {
@@ -820,7 +822,7 @@ task Neo {
     }
 }
 
-task neoScorer {
+task NeoScorer {
     input {
         String sampleId
         String refGenomeVersion
@@ -828,7 +830,7 @@ task neoScorer {
         File referenceFastaFai
         File referenceFastaDict
         Array[File]+ neoBindingFiles
-        String neoBindingFileId = "cmb_02"
+        String neoBindingFileId
         File cancerTpmMedians
         File neoData
         Array[File]+ lilacOutput

From 318eca8f907fc78b9658e0f67849e9a137f5ec90 Mon Sep 17 00:00:00 2001
From: DavyCats <davycats.dc@gmail.com>
Date: Thu, 20 Jul 2023 13:02:51 +0200
Subject: [PATCH 49/75] add mkdir to SvPrepDepthAnnotator

---
 hmftools.wdl | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/hmftools.wdl b/hmftools.wdl
index 23efa1ae..05a3b634 100644
--- a/hmftools.wdl
+++ b/hmftools.wdl
@@ -1605,6 +1605,8 @@ task SvPrepDepthAnnotator {
     }
 
     command {
+        set -e
+        mkdir -p "$(dirname ~{outputVcf})"
         java -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \
         -cp /usr/local/share/hmftools-sv-prep-1.1-0/sv-prep.jar \
         com.hartwig.hmftools.svprep.depth.DepthAnnotator \

From 7e987c575262400698cd525fbb178ef0cf960eea Mon Sep 17 00:00:00 2001
From: DavyCats <davycats.dc@gmail.com>
Date: Thu, 20 Jul 2023 14:14:48 +0200
Subject: [PATCH 50/75] fix Neo/NeoScorer inputs

---
 hmftools.wdl | 16 +++++-----------
 1 file changed, 5 insertions(+), 11 deletions(-)

diff --git a/hmftools.wdl b/hmftools.wdl
index a48a28d8..aae544c1 100644
--- a/hmftools.wdl
+++ b/hmftools.wdl
@@ -769,6 +769,8 @@ task Neo {
         File transExonDataCsv
         File transSpliceDataCsv
 
+        Int reqAminoAcids = 15
+
         String memory = "9GiB"
         String javaXmx = "8G"
         Int timeMinutes = 1440
@@ -785,6 +787,7 @@ task Neo {
         -ensembl_data_dir ~{sub(geneDataCsv, basename(geneDataCsv), "")} \
         -linx_dir ~{sub(linxOutput[0], basename(linxOutput[0]), "")} \
         -somatic_vcf ~{somaticVcf} \
+        -req_amino_acids ~{reqAminoAcids} \
         -output_dir ~{outputDir}
     }
 
@@ -807,11 +810,12 @@ task Neo {
         referenceFasta: {description: "The reference fasta file.", category: "required"}
         referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", category: "required"}
         referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"}
-        outputDir: {description: "The directory the outputs will be written to.", category: "required"}
+        outputDir: {description: "The directory the outputs will be written to.", category: "common"}
         geneDataCsv: {description: "A  CSV file containing gene information, must be in the same directory as `proteinFeaturesCsv`, `transExonDataCsv` and `transSpliceDataCsv`.", category: "required"}
         proteinFeaturesCsv: {description: "A  CSV file containing protein feature information, must be in the same directory as `geneDataCsv`, `transExonDataCsv` and `transSpliceDataCsv`.", category: "required"}
         transExonDataCsv: {description: "A  CSV file containing transcript exon information, must be in the same directory as `geneDataCsv`, `proteinFeaturesCsv` and `transSpliceDataCsv`.", category: "required"}
         transSpliceDataCsv: {description: "A  CSV file containing transcript splicing information, must be in the same directory as `geneDataCsv`, `proteinFeaturesCsv` and `transExonDataCsv`.", category: "required"}
+        reqAminoAcids: {description: "Equivalent to neo's -req_amino_acids option.", category: "required"}
 
         memory: {description: "The amount of memory this job will use.", category: "advanced"}
         javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.",
@@ -825,10 +829,6 @@ task Neo {
 task NeoScorer {
     input {
         String sampleId
-        String refGenomeVersion
-        File referenceFasta
-        File referenceFastaFai
-        File referenceFastaDict
         Array[File]+ neoBindingFiles
         String neoBindingFileId
         File cancerTpmMedians
@@ -862,8 +862,6 @@ task NeoScorer {
         neo com.hartwig.hmftools.neo.scorer.NeoScorer Xmx~{javaXmx} -XX:ParallelGCThreads=1 \
         -sample ~{sampleId} \
         ~{"-cancer_type " + cancerType} \
-        -ref_genome_version ~{refGenomeVersion} \
-        -ref_genome ~{referenceFasta} \
         -ensembl_data_dir ~{sub(geneDataCsv, basename(geneDataCsv), "")} \
         -score_file_dir ~{sub(neoBindingFiles[0], basename(neoBindingFiles[0]), "")} \
         -score_file_id ~{neoBindingFileId} \
@@ -889,10 +887,6 @@ task NeoScorer {
 
     parameter_meta {
         sampleId: {description: "The name/id of the sample.", category: "required"}
-        refGenomeVersion: {description: "The version of the genome assembly used for alignment. Either \"37\" or \"38\".", category: "required"}
-        referenceFasta: {description: "The reference fasta file.", category: "required"}
-        referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", category: "required"}
-        referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"}
         neoBindingFiles: {description: "The neo binding reference files.", category: "required"}
         neoBindingFileId: {description: "The neo binding reference file version id.", category: "required"}
         cancerTpmMedians: {description: "HMF RNA cohort transcript median TPM file.", category: "required"}

From c10a063d488268e59c4100b81c5870f1c2d4714c Mon Sep 17 00:00:00 2001
From: DavyCats <davycats.dc@gmail.com>
Date: Fri, 21 Jul 2023 09:48:52 +0200
Subject: [PATCH 51/75] add mkdir to ExtractSigPredictHRD

---
 extractSigPredictHRD.wdl | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/extractSigPredictHRD.wdl b/extractSigPredictHRD.wdl
index c9e4c67f..86f298c6 100644
--- a/extractSigPredictHRD.wdl
+++ b/extractSigPredictHRD.wdl
@@ -36,6 +36,8 @@ task ExtractSigPredictHRD {
     }
 
     command {
+        set -e
+        mkdir -p ~{outputDir}
         extractSigPredictHRD.R \
         ~{outputDir} \
         ~{sampleName} \

From 4991c67e4e12f7101d0b24c53ddf78e4e6ff3105 Mon Sep 17 00:00:00 2001
From: DavyCats <davycats.dc@gmail.com>
Date: Fri, 21 Jul 2023 09:50:19 +0200
Subject: [PATCH 52/75] add mkdir to VirusInterpreter

---
 hmftools.wdl | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/hmftools.wdl b/hmftools.wdl
index 05a3b634..46422f15 100644
--- a/hmftools.wdl
+++ b/hmftools.wdl
@@ -1669,6 +1669,8 @@ task VirusInterpreter {
     }
 
     command {
+        set -e
+        mkdir -p ~{outputDir}
         virus-interpreter -Xmx~{javaXmx} -XX:ParallelGCThreads=1  \
         -sample_id ~{sampleId} \
         -purple_purity_tsv ~{purplePurityTsv} \

From fe131980526ff8703424a2b9cb0becf7a5b53cee Mon Sep 17 00:00:00 2001
From: DavyCats <davycats.dc@gmail.com>
Date: Fri, 21 Jul 2023 14:05:47 +0200
Subject: [PATCH 53/75] fix ExtractSigPredictHRD

---
 extractSigPredictHRD.wdl | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/extractSigPredictHRD.wdl b/extractSigPredictHRD.wdl
index 86f298c6..b43d8daf 100644
--- a/extractSigPredictHRD.wdl
+++ b/extractSigPredictHRD.wdl
@@ -38,8 +38,9 @@ task ExtractSigPredictHRD {
     command {
         set -e
         mkdir -p ~{outputDir}
+        cd ~{outputDir}
         extractSigPredictHRD.R \
-        ~{outputDir} \
+        . \
         ~{sampleName} \
         ~{snvIndelVcf} \
         ~{svVcf} \

From fd29caaf7fc7a595e848d247fe70ff8b7a21a68a Mon Sep 17 00:00:00 2001
From: DavyCats <davycats.dc@gmail.com>
Date: Tue, 22 Aug 2023 11:37:45 +0200
Subject: [PATCH 54/75] add optional inputs to star, add isofox task

---
 hmftools.wdl | 84 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 star.wdl     | 30 +++++++++++++++++++
 2 files changed, 114 insertions(+)

diff --git a/hmftools.wdl b/hmftools.wdl
index bfdf0817..7ef6f596 100644
--- a/hmftools.wdl
+++ b/hmftools.wdl
@@ -497,6 +497,90 @@ task HealthChecker {
     }
 }
 
+task Isofox {
+    input {
+        String sampleName
+        File neoepitopeFile
+        File bamFile
+        File bamIndex
+        File referenceFasta
+        File referenceFastaFai
+        File referenceFastaDict
+        String refGenomeVersion
+        File expCountsFile
+        File expGcRatiosFile
+
+        String outputDir = "./isofox"
+        Int readLength = 151
+
+        #The following should be in the same directory.
+        File geneDataCsv
+        File proteinFeaturesCsv
+        File transExonDataCsv
+        File transSpliceDataCsv
+
+        Int threads = 10
+        String javaXmx = "12G"
+        String memory = "13GiB"
+        Int timeMinutes = 120
+        String dockerImage = "quay.io/biocontainers/hmftools-isofox:1.6.2--hdfd78af_0"
+    }
+
+    command {
+        set -e
+        mkdir -p ~{outputDir}
+        isofox -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \
+        -sample ~{sampleName} \
+        -functions NEO_EPITOPES;TRANSCRIPT_COUNTS;ALT_SPLICE_JUNCTIONS;FUSIONS \
+        -neoepitope_file ~{neoepitopeFile} \
+        -bam_file ~{bamFile} \
+        -ref_genome ~{referenceFasta} \
+        -ref_genome_version ~{refGenomeVersion} \
+        -ensembl_data_dir ~{sub(geneDataCsv, basename(geneDataCsv), "")} \
+        -output_dir ~{outputDir} \
+        -log_debug \
+        -threads ~{threads}
+    }
+
+    output {
+        File neoepitopeTsv = "~{outputDir}/~{sampleName}.isf.neoepitope.tsv"
+        File outputs = [neoepitopeTsv]
+        #TODO
+    }
+
+    runtime {
+        cpu: threads
+        memory: memory
+        time_minutes: timeMinutes # !UnknownRuntimeKey
+        docker: dockerImage
+    }
+
+    parameter_meta {
+        sampleName: {description: "The name of the sample.", category: "required"}
+        neoepitopeFile: {description: "Neo's data file.", category: "required"}
+        bamFile: {description: "Input rna BAM file.", category: "required"}
+        bamIndex: {description: "Index for the rna BAM file.", category: "required"}
+        referenceFasta: {description: "The reference fasta file.", category: "required"}
+        referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", category: "required"}
+        referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"}  
+        refGenomeVersion: {description: "The version of the genome assembly used for alignment. Either \"37\" or \"38\".", category: "required"}
+        expCountsFile: {description: "Isofox reference file.", category: "required"}
+        expGcRatiosFile: {description: "Isofox reference file.", category: "required"}
+        outputDir: {description: "The directory the outputs will be written to.", category: "required"}
+        geneDataCsv: {description: "A  CSV file containing gene information, must be in the same directory as `proteinFeaturesCsv`, `transExonDataCsv` and `transSpliceDataCsv`.", category: "required"}
+        proteinFeaturesCsv: {description: "A  CSV file containing protein feature information, must be in the same directory as `geneDataCsv`, `transExonDataCsv` and `transSpliceDataCsv`.", category: "required"}
+        transExonDataCsv: {description: "A  CSV file containing transcript exon information, must be in the same directory as `geneDataCsv`, `proteinFeaturesCsv` and `transSpliceDataCsv`.", category: "required"}
+        transSpliceDataCsv: {description: "A  CSV file containing transcript splicing information, must be in the same directory as `geneDataCsv`, `proteinFeaturesCsv` and `transExonDataCsv`.", category: "required"}
+
+        memory: {description: "The amount of memory this job will use.", category: "advanced"}
+        javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.",
+                  category: "advanced"}
+        timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"}
+        dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.",
+                      category: "advanced"}
+    }
+}
+
 task Lilac {
     input {
         String sampleName
diff --git a/star.wdl b/star.wdl
index 88d3c838..5af12236 100644
--- a/star.wdl
+++ b/star.wdl
@@ -121,6 +121,21 @@ task Star {
         Float? outFilterScoreMinOverLread
         Int? outFilterMatchNmin
         Float? outFilterMatchNminOverLread
+        String? outSAMattributes
+        Int? outFilterMultimapNmax
+        Int? outFilterMismatchNmax
+        Int? limitOutSJcollapsed
+        Int? chimSegmentMin
+        String? chimOutType
+        Int? chimJunctionOverhangMin
+        Int? chimSegmentReadGapMax
+        Int? chimScoreMin
+        Int? chimScoreDropMax
+        Int? chimScoreJunctionNonGTAG
+        Int? chimScoreSeparation
+        Int? alignSplicedMateMapLminOverLmate
+        Int? alignSplicedMateMapLmin
+        String? alignSJstitchMismatchNmax
         String? outStd
         String? twopassMode = "Basic"
         Array[String]? outSAMattrRGline
@@ -158,6 +173,21 @@ task Star {
         ~{"--outFilterMatchNmin " + outFilterMatchNmin} \
         ~{"--outFilterMatchNminOverLread " + outFilterMatchNminOverLread} \
         ~{"--outSAMunmapped " + outSAMunmapped} \
+        ~{"--outSAMattributes " + outSAMattributes} \
+        ~{"--outFilterMultimapNmax " + outFilterMultimapNmax} \
+        ~{"--outFilterMismatchNmax " + outFilterMismatchNmax} \
+        ~{"--limitOutSJcollapsed " + limitOutSJcollapsed} \
+        ~{"--chimSegmentMin " + chimSegmentMin} \
+        ~{"--chimOutType " + chimOutType} \
+        ~{"--chimJunctionOverhangMin " + chimJunctionOverhangMin} \
+        ~{"--chimSegmentReadGapMax " + chimSegmentReadGapMax} \
+        ~{"--chimScoreMin " + chimScoreMin} \
+        ~{"--chimScoreDropMax " + chimScoreDropMax} \
+        ~{"--chimScoreJunctionNonGTAG " + chimScoreJunctionNonGTAG} \
+        ~{"--chimScoreSeparation " + chimScoreSeparation} \
+        ~{"--alignSplicedMateMapLminOverLmate " + alignSplicedMateMapLminOverLmate} \
+        ~{"--alignSplicedMateMapLmin " + alignSplicedMateMapLmin} \
+        ~{"--alignSJstitchMismatchNmax " + alignSJstitchMismatchNmax} \
         ~{"--runThreadN " + runThreadN} \
         ~{"--outStd " + outStd} \
         ~{"--twopassMode " + twopassMode} \

From 7e142caf9edbef8d1a4f18fed20091b6a1e05f8e Mon Sep 17 00:00:00 2001
From: DavyCats <davycats.dc@gmail.com>
Date: Tue, 22 Aug 2023 11:47:44 +0200
Subject: [PATCH 55/75] Fix lint issues

---
 hmftools.wdl | 2 +-
 star.wdl     | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/hmftools.wdl b/hmftools.wdl
index 7ef6f596..0d98580f 100644
--- a/hmftools.wdl
+++ b/hmftools.wdl
@@ -544,7 +544,7 @@ task Isofox {
 
     output {
         File neoepitopeTsv = "~{outputDir}/~{sampleName}.isf.neoepitope.tsv"
-        File outputs = [neoepitopeTsv]
+        Array[File] outputs = [neoepitopeTsv]
         #TODO
     }
 
diff --git a/star.wdl b/star.wdl
index 5af12236..5b651970 100644
--- a/star.wdl
+++ b/star.wdl
@@ -133,7 +133,7 @@ task Star {
         Int? chimScoreDropMax
         Int? chimScoreJunctionNonGTAG
         Int? chimScoreSeparation
-        Int? alignSplicedMateMapLminOverLmate
+        Float? alignSplicedMateMapLminOverLmate
         Int? alignSplicedMateMapLmin
         String? alignSJstitchMismatchNmax
         String? outStd

From 67ca5b69a9565ada8de5df99e6dfe9ebfbfaf0a7 Mon Sep 17 00:00:00 2001
From: DavyCats <davycats.dc@gmail.com>
Date: Tue, 22 Aug 2023 13:34:44 +0200
Subject: [PATCH 56/75] add task for sage append

---
 hmftools.wdl | 68 +++++++++++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 65 insertions(+), 3 deletions(-)

diff --git a/hmftools.wdl b/hmftools.wdl
index 0d98580f..93be2cfc 100644
--- a/hmftools.wdl
+++ b/hmftools.wdl
@@ -1730,7 +1730,69 @@ task Sage {
         refSampleCount: {description: "Equivalent to sage's `ref_sample_count` option.", category: "advanced"}
         hg38: {description: "Whether or not the refernce genome is HG18, if false HG19 is assumed.", category: "common"}
 
-        threads: {description: "The numve of threads to use.", category: "advanced"}
+        threads: {description: "The number of threads to use.", category: "advanced"}
+        memory: {description: "The amount of memory this job will use.", category: "advanced"}
+        javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.",
+                  category: "advanced"}
+        timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"}
+        dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.",
+                      category: "advanced"}
+    }
+}
+
+task SageAppend {
+    input {
+        String sampleName
+        File bamFile
+        File bamIndex
+        File referenceFasta
+        File referenceFastaDict
+        File referenceFastaFai
+        File sageVcf
+        String outPath = "./sage_append.vcf"
+
+        Int threads = 2
+        String javaXmx = "32G"
+        String memory = "33GiB"
+        Int timeMinutes = 720
+        String dockerImage = "quay.io/biocontainers/hmftools-sage:3.2.3--hdfd78af_0"
+    }
+
+    command {
+        java -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \
+        -cp /usr/local/share/hmftools-sage-3.2.3-0/sage.jar \
+        com.hartwig.hmftools.sage.append.SageAppendApplication \
+        -reference ~{sampleName} \
+        -reference_bam ~{bamFile} \
+        -ref_genome ~{referenceFasta} \
+        -input_vcf ~{sageVcf} \
+        -out ~{outPath} \
+        -threads ~{threads}
+    }
+
+    output {
+        File vcf = outPath
+        File index = "~{outPath}.tbi"
+    }
+
+    runtime {
+        time_minutes: timeMinutes # !UnknownRuntimeKey
+        cpu: threads
+        docker: dockerImage
+        memory: memory
+    }
+
+    parameter_meta {
+        sampleName: {description: "The sample id.", category: "required"}
+        bamFile: {description: "The input BAM file.", category: "required"}
+        bamIndex: {description: "Index for the input BAM file", category: "required"}
+        referenceFasta: {description: "The reference fasta file.", category: "required"}
+        referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", category: "required"}
+        referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"}
+        sageVcf: {description: "A VCF file from Sage or Purple.", category: "required"}
+        outPath: {description: "Location to write the output to.", category: "required"}
+
+        threads: {description: "The number of threads to use.", category: "advanced"}
         memory: {description: "The amount of memory this job will use.", category: "advanced"}
         javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.",
                   category: "advanced"}
@@ -1846,7 +1908,7 @@ task SvPrep {
         existingJunctionFile: {description: "Junctions file generated by an earlier run of this tool, eg. from a paired sample.", category: "common"}
         hg38: {description: "Whether or not the refernce genome is HG18, if false HG19 is assumed.", category: "common"}
 
-        threads: {description: "The numve of threads to use.", category: "advanced"}
+        threads: {description: "The number of threads to use.", category: "advanced"}
         memory: {description: "The amount of memory this job will use.", category: "advanced"}
         javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.",
                   category: "advanced"}
@@ -1913,7 +1975,7 @@ task SvPrepDepthAnnotator {
         hg38: {description: "Whether or not the refernce genome is HG18, if false HG19 is assumed.", category: "common"}
         outputVcf: {description: "The path for the output VCF.", category: "common"}
 
-        threads: {description: "The numve of threads to use.", category: "advanced"}
+        threads: {description: "The number of threads to use.", category: "advanced"}
         memory: {description: "The amount of memory this job will use.", category: "advanced"}
         javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.",
                   category: "advanced"}

From 2d1156ab01d091a5048718aec2e82b019cc3338d Mon Sep 17 00:00:00 2001
From: DavyCats <davycats.dc@gmail.com>
Date: Mon, 28 Aug 2023 10:02:24 +0200
Subject: [PATCH 57/75] add support for unsorted BAM file in STAR task

---
 star.wdl | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/star.wdl b/star.wdl
index 5b651970..5462672e 100644
--- a/star.wdl
+++ b/star.wdl
@@ -156,7 +156,8 @@ task Star {
     # in the runtime section.
 
     #TODO: Could be extended for all possible output extensions.
-    Map[String, String] samOutputNames = {"BAM SortedByCoordinate": "sortedByCoord.out.bam"}
+    Map[String, String] samOutputNames = {"BAM SortedByCoordinate": "sortedByCoord.out.bam",
+                                          "BAM Unsorted": "out.bam"}
 
     command {
         set -e

From e4e98c72431c14ee7d578497aafd8a393c9f07b1 Mon Sep 17 00:00:00 2001
From: DavyCats <davycats.dc@gmail.com>
Date: Tue, 29 Aug 2023 11:17:23 +0200
Subject: [PATCH 58/75] make index output optional in sambamba markdup, remove
 index input from isofox and sageappend

---
 hmftools.wdl | 4 +---
 sambamba.wdl | 7 +++++--
 2 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/hmftools.wdl b/hmftools.wdl
index 93be2cfc..6921a980 100644
--- a/hmftools.wdl
+++ b/hmftools.wdl
@@ -502,7 +502,6 @@ task Isofox {
         String sampleName
         File neoepitopeFile
         File bamFile
-        File bamIndex
         File referenceFasta
         File referenceFastaFai
         File referenceFastaDict
@@ -559,7 +558,6 @@ task Isofox {
         sampleName: {description: "The name of the sample.", category: "required"}
         neoepitopeFile: {description: "Neo's data file.", category: "required"}
         bamFile: {description: "Input rna BAM file.", category: "required"}
-        bamIndex: {description: "Index for the rna BAM file.", category: "required"}
         referenceFasta: {description: "The reference fasta file.", category: "required"}
         referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", category: "required"}
         referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"}  
@@ -1744,7 +1742,7 @@ task SageAppend {
     input {
         String sampleName
         File bamFile
-        File bamIndex
+        File? bamIndex
         File referenceFasta
         File referenceFastaDict
         File referenceFastaFai
diff --git a/sambamba.wdl b/sambamba.wdl
index acf40278..3052c8af 100644
--- a/sambamba.wdl
+++ b/sambamba.wdl
@@ -103,12 +103,15 @@ task Markdup {
         ~{"--io-buffer-size " + ioBufferSize} \
         ~{sep=' ' inputBams} ~{outputPath}
         # sambamba creates an index for us.
-        mv ~{outputPath}.bai ~{bamIndexPath}
+        if [ -f ~{outputPath}.bai ]
+          then
+            mv ~{outputPath}.bai ~{bamIndexPath}
+        fi
     }
 
     output {
         File outputBam = outputPath
-        File outputBamIndex = bamIndexPath
+        File? outputBamIndex = bamIndexPath
     }
 
     runtime {

From 6210b3740be6f3de2f7f01d78de6d164022fdc65 Mon Sep 17 00:00:00 2001
From: DavyCats <davycats.dc@gmail.com>
Date: Tue, 5 Sep 2023 15:33:48 +0200
Subject: [PATCH 59/75] fix semi-colon issue

---
 hmftools.wdl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/hmftools.wdl b/hmftools.wdl
index 6921a980..e75feb09 100644
--- a/hmftools.wdl
+++ b/hmftools.wdl
@@ -530,7 +530,7 @@ task Isofox {
         mkdir -p ~{outputDir}
         isofox -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \
         -sample ~{sampleName} \
-        -functions NEO_EPITOPES;TRANSCRIPT_COUNTS;ALT_SPLICE_JUNCTIONS;FUSIONS \
+        -functions 'NEO_EPITOPES;TRANSCRIPT_COUNTS;ALT_SPLICE_JUNCTIONS;FUSIONS' \
         -neoepitope_file ~{neoepitopeFile} \
         -bam_file ~{bamFile} \
         -ref_genome ~{referenceFasta} \

From a7fa3b13715502af2ed48e96286db71ba81fc948 Mon Sep 17 00:00:00 2001
From: DavyCats <davycats.dc@gmail.com>
Date: Wed, 6 Sep 2023 13:43:45 +0200
Subject: [PATCH 60/75] turn neo data tsv into csv for isofox

---
 hmftools.wdl | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/hmftools.wdl b/hmftools.wdl
index e75feb09..f5a13449 100644
--- a/hmftools.wdl
+++ b/hmftools.wdl
@@ -528,10 +528,11 @@ task Isofox {
     command {
         set -e
         mkdir -p ~{outputDir}
+        sed 's/\t/,/g' ~{neoepitopeFile} > tmp.neo_data.csv
         isofox -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \
         -sample ~{sampleName} \
         -functions 'NEO_EPITOPES;TRANSCRIPT_COUNTS;ALT_SPLICE_JUNCTIONS;FUSIONS' \
-        -neoepitope_file ~{neoepitopeFile} \
+        -neoepitope_file tmp.neo_data.csv \
         -bam_file ~{bamFile} \
         -ref_genome ~{referenceFasta} \
         -ref_genome_version ~{refGenomeVersion} \

From 9667c6ac49f5b486661e6e8e90ea025ea0f713f1 Mon Sep 17 00:00:00 2001
From: DavyCats <davycats.dc@gmail.com>
Date: Thu, 7 Sep 2023 13:23:59 +0200
Subject: [PATCH 61/75] add isofox outputs

---
 hmftools.wdl | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/hmftools.wdl b/hmftools.wdl
index f5a13449..75eec8b9 100644
--- a/hmftools.wdl
+++ b/hmftools.wdl
@@ -543,9 +543,13 @@ task Isofox {
     }
 
     output {
-        File neoepitopeTsv = "~{outputDir}/~{sampleName}.isf.neoepitope.tsv"
-        Array[File] outputs = [neoepitopeTsv]
-        #TODO
+        File neoepitope = "~{outputDir}/~{sampleName}.isf.neoepitope.csv"
+        File altSpliceJunc = "~{outputDir}/~{sampleName}.isf.alt_splice_junc.csv"
+        File geneCollection = "~{outputDir}/~{sampleName}.isf.gene_collection.csv"
+        File passFusions = "~{outputDir}/~{sampleName}.isf.pass_fusions.csv"
+        File fusions = "~{outputDir}/~{sampleName}.isf.fusions.csv"
+        Array[File] outputs = [neoepitopeTsv, altSpliceJunc, geneCollection,
+                               passFusions, fusions]
     }
 
     runtime {

From 37a23566001b8cc4e6703218277cb5bec909ea81 Mon Sep 17 00:00:00 2001
From: DavyCats <davycats.dc@gmail.com>
Date: Thu, 7 Sep 2023 13:48:10 +0200
Subject: [PATCH 62/75] fix output name

---
 hmftools.wdl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/hmftools.wdl b/hmftools.wdl
index 75eec8b9..46116561 100644
--- a/hmftools.wdl
+++ b/hmftools.wdl
@@ -548,7 +548,7 @@ task Isofox {
         File geneCollection = "~{outputDir}/~{sampleName}.isf.gene_collection.csv"
         File passFusions = "~{outputDir}/~{sampleName}.isf.pass_fusions.csv"
         File fusions = "~{outputDir}/~{sampleName}.isf.fusions.csv"
-        Array[File] outputs = [neoepitopeTsv, altSpliceJunc, geneCollection,
+        Array[File] outputs = [neoepitope, altSpliceJunc, geneCollection,
                                passFusions, fusions]
     }
 

From c3c74a8406da01943e45e2a46961d32d6d959e72 Mon Sep 17 00:00:00 2001
From: DavyCats <davycats.dc@gmail.com>
Date: Tue, 12 Sep 2023 14:51:23 +0200
Subject: [PATCH 63/75] split isofox neoepitope function into separate task

---
 hmftools.wdl | 85 ++++++++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 83 insertions(+), 2 deletions(-)

diff --git a/hmftools.wdl b/hmftools.wdl
index 46116561..1e5b6b21 100644
--- a/hmftools.wdl
+++ b/hmftools.wdl
@@ -500,7 +500,6 @@ task HealthChecker {
 task Isofox {
     input {
         String sampleName
-        File neoepitopeFile
         File bamFile
         File referenceFasta
         File referenceFastaFai
@@ -528,10 +527,90 @@ task Isofox {
     command {
         set -e
         mkdir -p ~{outputDir}
+        isofox -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \
+        -sample ~{sampleName} \
+        -functions 'TRANSCRIPT_COUNTS;ALT_SPLICE_JUNCTIONS;FUSIONS' \
+        -bam_file ~{bamFile} \
+        -ref_genome ~{referenceFasta} \
+        -ref_genome_version ~{refGenomeVersion} \
+        -ensembl_data_dir ~{sub(geneDataCsv, basename(geneDataCsv), "")} \
+        -output_dir ~{outputDir} \
+        -log_debug \
+        -threads ~{threads}
+    }
+
+    output {
+        #TODO
+        Array[File] outputs = []
+    }
+
+    runtime {
+        cpu: threads
+        memory: memory
+        time_minutes: timeMinutes # !UnknownRuntimeKey
+        docker: dockerImage
+    }
+
+    parameter_meta {
+        sampleName: {description: "The name of the sample.", category: "required"}
+        bamFile: {description: "Input rna BAM file.", category: "required"}
+        referenceFasta: {description: "The reference fasta file.", category: "required"}
+        referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", category: "required"}
+        referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"}  
+        refGenomeVersion: {description: "The version of the genome assembly used for alignment. Either \"37\" or \"38\".", category: "required"}
+        expCountsFile: {description: "Isofox reference file.", category: "required"}
+        expGcRatiosFile: {description: "Isofox reference file.", category: "required"}
+        outputDir: {description: "The directory the outputs will be written to.", category: "required"}
+        geneDataCsv: {description: "A  CSV file containing gene information, must be in the same directory as `proteinFeaturesCsv`, `transExonDataCsv` and `transSpliceDataCsv`.", category: "required"}
+        proteinFeaturesCsv: {description: "A  CSV file containing protein feature information, must be in the same directory as `geneDataCsv`, `transExonDataCsv` and `transSpliceDataCsv`.", category: "required"}
+        transExonDataCsv: {description: "A  CSV file containing transcript exon information, must be in the same directory as `geneDataCsv`, `proteinFeaturesCsv` and `transSpliceDataCsv`.", category: "required"}
+        transSpliceDataCsv: {description: "A  CSV file containing transcript splicing information, must be in the same directory as `geneDataCsv`, `proteinFeaturesCsv` and `transExonDataCsv`.", category: "required"}
+
+        memory: {description: "The amount of memory this job will use.", category: "advanced"}
+        javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.",
+                  category: "advanced"}
+        timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"}
+        dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.",
+                      category: "advanced"}
+    }
+}
+
+task IsofoxNeoEpitopes {
+    input {
+        String sampleName
+        File neoepitopeFile
+        File bamFile
+        File referenceFasta
+        File referenceFastaFai
+        File referenceFastaDict
+        String refGenomeVersion
+        File expCountsFile
+        File expGcRatiosFile
+        Array[File]+ isofoxOutput
+
+        String outputDir = "./isofox"
+        Int readLength = 151
+
+        #The following should be in the same directory.
+        File geneDataCsv
+        File proteinFeaturesCsv
+        File transExonDataCsv
+        File transSpliceDataCsv
+
+        Int threads = 10
+        String javaXmx = "12G"
+        String memory = "13GiB"
+        Int timeMinutes = 120
+        String dockerImage = "quay.io/biocontainers/hmftools-isofox:1.6.2--hdfd78af_0"
+    }
+
+    command {
+        set -e
+        cp -r $(dirname ~{isofoxOutput[0]}) ~{outputDir}
         sed 's/\t/,/g' ~{neoepitopeFile} > tmp.neo_data.csv
         isofox -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \
         -sample ~{sampleName} \
-        -functions 'NEO_EPITOPES;TRANSCRIPT_COUNTS;ALT_SPLICE_JUNCTIONS;FUSIONS' \
+        -functions 'NEO_EPITOPES' \
         -neoepitope_file tmp.neo_data.csv \
         -bam_file ~{bamFile} \
         -ref_genome ~{referenceFasta} \
@@ -548,6 +627,7 @@ task Isofox {
         File geneCollection = "~{outputDir}/~{sampleName}.isf.gene_collection.csv"
         File passFusions = "~{outputDir}/~{sampleName}.isf.pass_fusions.csv"
         File fusions = "~{outputDir}/~{sampleName}.isf.fusions.csv"
+        #TODO
         Array[File] outputs = [neoepitope, altSpliceJunc, geneCollection,
                                passFusions, fusions]
     }
@@ -569,6 +649,7 @@ task Isofox {
         refGenomeVersion: {description: "The version of the genome assembly used for alignment. Either \"37\" or \"38\".", category: "required"}
         expCountsFile: {description: "Isofox reference file.", category: "required"}
         expGcRatiosFile: {description: "Isofox reference file.", category: "required"}
+        isofoxOutput: {description: "Isofox output files.", category: "required"}
         outputDir: {description: "The directory the outputs will be written to.", category: "required"}
         geneDataCsv: {description: "A  CSV file containing gene information, must be in the same directory as `proteinFeaturesCsv`, `transExonDataCsv` and `transSpliceDataCsv`.", category: "required"}
         proteinFeaturesCsv: {description: "A  CSV file containing protein feature information, must be in the same directory as `geneDataCsv`, `transExonDataCsv` and `transSpliceDataCsv`.", category: "required"}

From b025103ca44ed04795329ab1eaac52476533b5ea Mon Sep 17 00:00:00 2001
From: DavyCats <davycats.dc@gmail.com>
Date: Tue, 12 Sep 2023 16:02:41 +0200
Subject: [PATCH 64/75] add inputs to isofox for bam index

---
 hmftools.wdl | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/hmftools.wdl b/hmftools.wdl
index 1e5b6b21..23d77f4f 100644
--- a/hmftools.wdl
+++ b/hmftools.wdl
@@ -501,6 +501,7 @@ task Isofox {
     input {
         String sampleName
         File bamFile
+        File bamIndex
         File referenceFasta
         File referenceFastaFai
         File referenceFastaDict
@@ -554,6 +555,7 @@ task Isofox {
     parameter_meta {
         sampleName: {description: "The name of the sample.", category: "required"}
         bamFile: {description: "Input rna BAM file.", category: "required"}
+        bamIndex: {description: "Index of the BAM file.", category: "required"}
         referenceFasta: {description: "The reference fasta file.", category: "required"}
         referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", category: "required"}
         referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"}  
@@ -580,6 +582,7 @@ task IsofoxNeoEpitopes {
         String sampleName
         File neoepitopeFile
         File bamFile
+        File bamIndex
         File referenceFasta
         File referenceFastaFai
         File referenceFastaDict
@@ -643,6 +646,7 @@ task IsofoxNeoEpitopes {
         sampleName: {description: "The name of the sample.", category: "required"}
         neoepitopeFile: {description: "Neo's data file.", category: "required"}
         bamFile: {description: "Input rna BAM file.", category: "required"}
+        bamIndex: {description: "Index of the BAM file.", category: "required"}
         referenceFasta: {description: "The reference fasta file.", category: "required"}
         referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", category: "required"}
         referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"}  

From c1a9308d593d9813bc8d0c1ede5fe66d883fc69d Mon Sep 17 00:00:00 2001
From: DavyCats <davycats.dc@gmail.com>
Date: Thu, 14 Sep 2023 14:45:30 +0200
Subject: [PATCH 65/75] increase isofox memory

---
 hmftools.wdl | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/hmftools.wdl b/hmftools.wdl
index 23d77f4f..ec261020 100644
--- a/hmftools.wdl
+++ b/hmftools.wdl
@@ -519,8 +519,8 @@ task Isofox {
         File transSpliceDataCsv
 
         Int threads = 10
-        String javaXmx = "12G"
-        String memory = "13GiB"
+        String javaXmx = "24G"
+        String memory = "25GiB"
         Int timeMinutes = 120
         String dockerImage = "quay.io/biocontainers/hmftools-isofox:1.6.2--hdfd78af_0"
     }

From 6433047a45cb9a0ea1f14f9dece90c8ae7de4209 Mon Sep 17 00:00:00 2001
From: DavyCats <davycats.dc@gmail.com>
Date: Thu, 14 Sep 2023 15:43:20 +0200
Subject: [PATCH 66/75] increase memory for isofox

---
 hmftools.wdl | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/hmftools.wdl b/hmftools.wdl
index ec261020..773ecb8f 100644
--- a/hmftools.wdl
+++ b/hmftools.wdl
@@ -519,8 +519,8 @@ task Isofox {
         File transSpliceDataCsv
 
         Int threads = 10
-        String javaXmx = "24G"
-        String memory = "25GiB"
+        String javaXmx = "48G"
+        String memory = "50GiB"
         Int timeMinutes = 120
         String dockerImage = "quay.io/biocontainers/hmftools-isofox:1.6.2--hdfd78af_0"
     }

From 24037d1f7aa7f6ab7b8aaa78897f79c3498ae31a Mon Sep 17 00:00:00 2001
From: DavyCats <davycats.dc@gmail.com>
Date: Fri, 15 Sep 2023 13:45:53 +0200
Subject: [PATCH 67/75] add isofox outputs

---
 hmftools.wdl | 23 ++++++++++++++++-------
 1 file changed, 16 insertions(+), 7 deletions(-)

diff --git a/hmftools.wdl b/hmftools.wdl
index 773ecb8f..38f48137 100644
--- a/hmftools.wdl
+++ b/hmftools.wdl
@@ -541,8 +541,15 @@ task Isofox {
     }
 
     output {
-        #TODO
-        Array[File] outputs = []
+        File altSpliceJunc = "~{outputDir}/~{sampleName}.isf.alt_splice_junc.csv"
+        File fusions = "~{outputDir}/~{sampleName}.isf.fusions.csv"
+        File geneCollection = "~{outputDir}/~{sampleName}.isf.gene_collection.csv"
+        File geneData = "~{outputDir}/~{sampleName}.isf.gene_data.csv "
+        File passFusions = "~{outputDir}/~{sampleName}.isf.pass_fusions.csv"
+        File summary = "~{outputDir}/~{sampleName}.isf.summary.csv"
+        File transcriptData = "~{outputDir}/~{sampleName}.isf.transcript_data.csv"
+        Array[File] outputs = [altSpliceJunc, fusions, geneCollection, geneData,
+                               passFusions, summary, transcriptData]
     }
 
     runtime {
@@ -625,14 +632,16 @@ task IsofoxNeoEpitopes {
     }
 
     output {
-        File neoepitope = "~{outputDir}/~{sampleName}.isf.neoepitope.csv"
         File altSpliceJunc = "~{outputDir}/~{sampleName}.isf.alt_splice_junc.csv"
+        File fusions = "~{outputDir}/~{sampleName}.isf.fusions.csv"
         File geneCollection = "~{outputDir}/~{sampleName}.isf.gene_collection.csv"
+        File geneData = "~{outputDir}/~{sampleName}.isf.gene_data.csv "
         File passFusions = "~{outputDir}/~{sampleName}.isf.pass_fusions.csv"
-        File fusions = "~{outputDir}/~{sampleName}.isf.fusions.csv"
-        #TODO
-        Array[File] outputs = [neoepitope, altSpliceJunc, geneCollection,
-                               passFusions, fusions]
+        File summary = "~{outputDir}/~{sampleName}.isf.summary.csv"
+        File transcriptData = "~{outputDir}/~{sampleName}.isf.transcript_data.csv"
+        File neoepitope = "~{outputDir}/~{sampleName}.isf.neoepitope.csv"
+        Array[File] outputs = [altSpliceJunc, fusions, geneCollection, geneData,
+                               passFusions, summary, transcriptData, neoepitope]
     }
 
     runtime {

From f28727ce1b6212918a1a38044850546f624bffed Mon Sep 17 00:00:00 2001
From: DavyCats <davycats.dc@gmail.com>
Date: Fri, 15 Sep 2023 13:48:31 +0200
Subject: [PATCH 68/75] adjust isofox memory

---
 hmftools.wdl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/hmftools.wdl b/hmftools.wdl
index 38f48137..7710722a 100644
--- a/hmftools.wdl
+++ b/hmftools.wdl
@@ -520,7 +520,7 @@ task Isofox {
 
         Int threads = 10
         String javaXmx = "48G"
-        String memory = "50GiB"
+        String memory = "60GiB"
         Int timeMinutes = 120
         String dockerImage = "quay.io/biocontainers/hmftools-isofox:1.6.2--hdfd78af_0"
     }

From 6097d3db940364ba3d65abee4a420d18adb9be84 Mon Sep 17 00:00:00 2001
From: DavyCats <davycats.dc@gmail.com>
Date: Fri, 15 Sep 2023 17:26:17 +0200
Subject: [PATCH 69/75] increase time for isofox

---
 hmftools.wdl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/hmftools.wdl b/hmftools.wdl
index 7710722a..f3640aa3 100644
--- a/hmftools.wdl
+++ b/hmftools.wdl
@@ -521,7 +521,7 @@ task Isofox {
         Int threads = 10
         String javaXmx = "48G"
         String memory = "60GiB"
-        Int timeMinutes = 120
+        Int timeMinutes = 240
         String dockerImage = "quay.io/biocontainers/hmftools-isofox:1.6.2--hdfd78af_0"
     }
 

From 0d283c64f051653041e8b7164d022a879e0db0a5 Mon Sep 17 00:00:00 2001
From: DavyCats <davycats.dc@gmail.com>
Date: Tue, 26 Sep 2023 15:40:47 +0200
Subject: [PATCH 70/75] convert neoepitope csv into tsv for neoscorer

---
 hmftools.wdl | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/hmftools.wdl b/hmftools.wdl
index f3640aa3..62e95e00 100644
--- a/hmftools.wdl
+++ b/hmftools.wdl
@@ -1040,6 +1040,8 @@ task NeoScorer {
     command {
         set -e
         mkdir -p ~{outputDir}
+        mkdir isofox
+        sed 's/,/\t/g' ~{isofoxDir}/~{sampleName}.isf.neoepitope.csv > isofox/~{sampleName}.isf.neoepitope.tsv
         neo com.hartwig.hmftools.neo.scorer.NeoScorer Xmx~{javaXmx} -XX:ParallelGCThreads=1 \
         -sample ~{sampleId} \
         ~{"-cancer_type " + cancerType} \

From 063dc44c8794d78c077b82fe95a5d9ee25ffef74 Mon Sep 17 00:00:00 2001
From: DavyCats <davycats.dc@gmail.com>
Date: Tue, 26 Sep 2023 15:43:30 +0200
Subject: [PATCH 71/75] make the sed command depend on the input

---
 hmftools.wdl | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/hmftools.wdl b/hmftools.wdl
index 62e95e00..1240c586 100644
--- a/hmftools.wdl
+++ b/hmftools.wdl
@@ -1036,12 +1036,15 @@ task NeoScorer {
     }
 
     String isofoxDir = sub(select_first([isofoxOutput, [""]])[0], basename(select_first([isofoxOutput, [""]])[0]), "")
+    String sedCommand = if defined(isofoxOutput) 
+        then "sed 's/,/\t/g' ~{isofoxDir}/~{sampleName}.isf.neoepitope.csv > isofox/~{sampleName}.isf.neoepitope.tsv"
+        else ""
 
     command {
         set -e
         mkdir -p ~{outputDir}
         mkdir isofox
-        sed 's/,/\t/g' ~{isofoxDir}/~{sampleName}.isf.neoepitope.csv > isofox/~{sampleName}.isf.neoepitope.tsv
+        ~{sedCommand}
         neo com.hartwig.hmftools.neo.scorer.NeoScorer Xmx~{javaXmx} -XX:ParallelGCThreads=1 \
         -sample ~{sampleId} \
         ~{"-cancer_type " + cancerType} \
@@ -1050,7 +1053,7 @@ task NeoScorer {
         -score_file_id ~{neoBindingFileId} \
         -cancer_tpm_medians_file ~{cancerTpmMedians} \
         -neo_dir ~{sub(neoData, basename(neoData), "")} \
-        ~{if defined(isofoxOutput) then "-isofox_dir " + isofoxDir else ""} \
+        ~{if defined(isofoxOutput) then "-isofox_dir isofox" else ""} \
         -lilac_dir ~{sub(lilacOutput[0], basename(lilacOutput[0]), "")} \
         -purple_dir ~{sub(purpleOutput[0], basename(purpleOutput[0]), "")} \
         ~{"-rna_somatic_vcf " + rnaSomaticVcf} \

From 20a8563f389aa827a20a8d8d9f20a94923ea2b0c Mon Sep 17 00:00:00 2001
From: DavyCats <davycats.dc@gmail.com>
Date: Tue, 26 Sep 2023 15:58:44 +0200
Subject: [PATCH 72/75] sampleName should be sampleId

---
 hmftools.wdl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/hmftools.wdl b/hmftools.wdl
index 1240c586..10e7ae62 100644
--- a/hmftools.wdl
+++ b/hmftools.wdl
@@ -1037,7 +1037,7 @@ task NeoScorer {
 
     String isofoxDir = sub(select_first([isofoxOutput, [""]])[0], basename(select_first([isofoxOutput, [""]])[0]), "")
     String sedCommand = if defined(isofoxOutput) 
-        then "sed 's/,/\t/g' ~{isofoxDir}/~{sampleName}.isf.neoepitope.csv > isofox/~{sampleName}.isf.neoepitope.tsv"
+        then "sed 's/,/\t/g' ~{isofoxDir}/~{sampleId}.isf.neoepitope.csv > isofox/~{sampleId}.isf.neoepitope.tsv"
         else ""
 
     command {

From 19c2993a08a46aa7eb22ef892b81ebb71430f542 Mon Sep 17 00:00:00 2001
From: DavyCats <davycats.dc@gmail.com>
Date: Fri, 29 Sep 2023 10:53:48 +0200
Subject: [PATCH 73/75] fix copying of isofox data

---
 hmftools.wdl | 1 +
 1 file changed, 1 insertion(+)

diff --git a/hmftools.wdl b/hmftools.wdl
index 10e7ae62..39a518ed 100644
--- a/hmftools.wdl
+++ b/hmftools.wdl
@@ -1044,6 +1044,7 @@ task NeoScorer {
         set -e
         mkdir -p ~{outputDir}
         mkdir isofox
+        cp isofoxDir/* -t isofox
         ~{sedCommand}
         neo com.hartwig.hmftools.neo.scorer.NeoScorer Xmx~{javaXmx} -XX:ParallelGCThreads=1 \
         -sample ~{sampleId} \

From c3da2d570f653cf64c1632d064fb30c8554ccee7 Mon Sep 17 00:00:00 2001
From: DavyCats <davycats.dc@gmail.com>
Date: Fri, 29 Sep 2023 11:16:08 +0200
Subject: [PATCH 74/75] fix cp command

---
 hmftools.wdl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/hmftools.wdl b/hmftools.wdl
index 39a518ed..367668ac 100644
--- a/hmftools.wdl
+++ b/hmftools.wdl
@@ -1044,7 +1044,7 @@ task NeoScorer {
         set -e
         mkdir -p ~{outputDir}
         mkdir isofox
-        cp isofoxDir/* -t isofox
+        cp isofoxDir/* ./isofox/
         ~{sedCommand}
         neo com.hartwig.hmftools.neo.scorer.NeoScorer Xmx~{javaXmx} -XX:ParallelGCThreads=1 \
         -sample ~{sampleId} \

From 5a5bc99aeccc812ba666c308306cc7390e5933fe Mon Sep 17 00:00:00 2001
From: DavyCats <davycats.dc@gmail.com>
Date: Fri, 29 Sep 2023 11:42:20 +0200
Subject: [PATCH 75/75] fix missing placholder syntax

---
 hmftools.wdl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/hmftools.wdl b/hmftools.wdl
index 367668ac..15f89e64 100644
--- a/hmftools.wdl
+++ b/hmftools.wdl
@@ -1044,7 +1044,7 @@ task NeoScorer {
         set -e
         mkdir -p ~{outputDir}
         mkdir isofox
-        cp isofoxDir/* ./isofox/
+        cp ~{isofoxDir}/* ./isofox/
         ~{sedCommand}
         neo com.hartwig.hmftools.neo.scorer.NeoScorer Xmx~{javaXmx} -XX:ParallelGCThreads=1 \
         -sample ~{sampleId} \