nf-core · LilyAnderssonLee · Aug 26, 2025 · Aug 14, 2025 · Aug 14, 2025 · Aug 14, 2025
@@ -19,7 +19,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - [#592](https://github.com/nf-core/taxprofiler/pull/592) improved JSON schema for better validation (by @microlei)
 - [#604](https://github.com/nf-core/taxprofiler/pull/604) KrakenUniq no longer fails due to pathname splitting during batching when only single sample supplied (by @AlexHoratio)
 - [#612](https://github.com/nf-core/taxprofiler/pull/616) Fix bug with bracken results being shown in MuktiQC for long read data every if bracken not supported for long reads (added by @jfy133 and @sofstam )
-- [#630](https://github.com/nf-core/taxprofiler/pull/630) Fix a bug in KRAKENUNIQ reads channel where a single-element list containing a / in the path was being split incorrectly, causing unexpected multiple list elements.
+- [#630](https://github.com/nf-core/taxprofiler/pull/630) Fix a bug in KRAKENUNIQ reads channel where a single-element list containing a / in the path was being split incorrectly, causing unexpected multiple list elements (❤️ to @sysbiocoder for reporting and fixed by @LilyAnderssonLee)
+- [#631](https://github.com/nf-core/taxprofiler/pull/631) For paired-end reads, DIAMOND will use only the forward read file (read 1) unless reads are merged (❤️ to @sysbiocoder for reporting and fixed by @LilyAnderssonLee)
 - [#635](https://github.com/nf-core/taxprofiler/pull/635) Fix faulty parsing of memory request in BBDuk (❤️ to @Proton-NTA for reporting, fix by @mahesh-panchal)
 
 ### `Changed`

@@ -404,7 +404,14 @@ Centrifuge currently does not accept FASTA files as input, therefore no output w
 
 ##### DIAMOND
 
-DIAMOND can only accept a single input read file. To run DIAMOND on paired-end reads, please merge the reads (e.g., using `--shortread_qc_mergepairs`).
+DIAMOND can only accept a single input read file. When run DIAMOND on paired-end reads without merging, only the `read1` file will be used.
+Alternatively, you can merge the reads using `--shortread_qc_mergepairs`.
+
+:::warning
+Note however that the merging approach only works when the vast majority of reads do actually merge.
+If your DNA molecules were too short, read pairs will not overlap and not merge - by default being discarded.
+While you have the option of retaining unmerged reads as well (with `--shortread_qc_includeunmerged`), be careful that including unmerged reads retains these as _independent_ reads in the FASTQ file - thus you may get double counts on a taxon from a single read.
+:::
 
 DIAMOND only allows output of a single file format at a time, therefore parameters such `--diamond_save_reads` supplied will result in only aligned reads in SAM format will be produced, no taxonomic profiles will be available. Be aware of this when setting up your pipeline runs, depending on your particular use case.
 

@@ -423,7 +423,8 @@
                 "run_diamond": {
                     "type": "boolean",
                     "fa_icon": "fas fa-toggle-on",
-                    "description": "Turn on profiling with DIAMOND. Requires database to be present CSV file passed to --databases"
+                    "description": "Turn on profiling with DIAMOND. For unmerged paired-read libraries, only read1 will be used Requires database to be present CSV file passed to --databases",
+                    "help_text": "DIAMOND does not support paired-end input. By default if read pairs are unmerged, only read1 will be profiled.\n\nIf your reads are short enough, you can also merge reads to potentially have longer sequences. Be careful if you include remaining unmerged reads after merging as this may result in double read counts to a taxon versus a single count for merged reads."
                 },
                 "diamond_output_format": {
                     "type": "string",

@@ -323,18 +323,15 @@ workflow PROFILING {
     }
 
     if (params.run_diamond) {
-
         ch_input_for_diamond = ch_input_for_profiling.diamond
-            .filter { meta, reads, meta_db, db ->
+            .multiMap { meta, reads, meta_db, db ->
                 if (!meta.single_end) {
-                    log.warn("[nf-core/taxprofiler] DIAMOND does not accept paired-end files as input. To run DIAMOND on this sample, please merge reads (e.g. with --shortread_qc_mergepairs). Skipping DIAMOND for sample ${meta.id}.")
+                    log.warn("[nf-core/taxprofiler] DIAMOND does not accept paired-end files as input. Only read 1 will be used for profiling. Running DIAMOND for sample ${meta.id} using only read 1.")
                 }
-                meta.single_end
-            }
-            .multiMap { it ->
-                reads: [it[0] + it[2], it[1]]
-                db: [it[2], it[3]]
+                reads: [meta + meta_db, meta.single_end ? reads : reads[0]]
+                db: [meta_db, db]
             }
+
         // diamond only accepts single output file specification, therefore
         // this will replace output file!
         ch_diamond_reads_format = params.diamond_save_reads ? 'sam' : params.diamond_output_format
@@ -394,11 +391,11 @@ workflow PROFILING {
                 seqtype: meta.seqtype
             }
         // Hardcode to _always_ produce the report file (which is our basic output, and goes into)
-        KRAKENUNIQ_PRELOADEDKRAKENUNIQ ( ch_input_for_krakenuniq.reads, ch_input_for_krakenuniq.seqtype, ch_input_for_krakenuniq.db, params.krakenuniq_save_reads, true, params.krakenuniq_save_readclassifications )
-        ch_multiqc_files       = ch_multiqc_files.mix( KRAKENUNIQ_PRELOADEDKRAKENUNIQ.out.report )
-        ch_versions            = ch_versions.mix( KRAKENUNIQ_PRELOADEDKRAKENUNIQ.out.versions.first() )
-        ch_raw_classifications = ch_raw_classifications.mix( KRAKENUNIQ_PRELOADEDKRAKENUNIQ.out.classified_assignment.map{meta, profiles -> [meta - meta.subMap('seqtype'), profiles]} )
-        ch_raw_profiles        = ch_raw_profiles.mix( KRAKENUNIQ_PRELOADEDKRAKENUNIQ.out.report.map{meta, profiles -> [meta - meta.subMap('seqtype'), profiles]} )
+        KRAKENUNIQ_PRELOADEDKRAKENUNIQ(ch_input_for_krakenuniq.reads, ch_input_for_krakenuniq.seqtype, ch_input_for_krakenuniq.db, params.krakenuniq_save_reads, true, params.krakenuniq_save_readclassifications)
+        ch_multiqc_files = ch_multiqc_files.mix(KRAKENUNIQ_PRELOADEDKRAKENUNIQ.out.report)
+        ch_versions = ch_versions.mix(KRAKENUNIQ_PRELOADEDKRAKENUNIQ.out.versions.first())
+        ch_raw_classifications = ch_raw_classifications.mix(KRAKENUNIQ_PRELOADEDKRAKENUNIQ.out.classified_assignment.map { meta, profiles -> [meta - meta.subMap('seqtype'), profiles] })
+        ch_raw_profiles = ch_raw_profiles.mix(KRAKENUNIQ_PRELOADEDKRAKENUNIQ.out.report.map { meta, profiles -> [meta - meta.subMap('seqtype'), profiles] })
     }
 
     if (params.run_kmcp) {