From 4fcd0e17a80f95c0fef69c70cbf2dbdb04ec083b Mon Sep 17 00:00:00 2001 From: freerkvandijk Date: Tue, 7 Apr 2026 15:25:45 +0200 Subject: [PATCH 01/36] Initial commit fgumi implementation --- assets/schema_input.json | 2 +- assets/schema_sampleinfo.json | 2 +- conf/modules.config | 118 ++++++++++ main.nf | 27 +++ modules/local/fgumi/duplexmetrics/main.nf | 35 +++ modules/local/fgumi/extract/main.nf | 40 ++++ modules/local/fgumi/filter/main.nf | 48 ++++ modules/local/fgumi/group/main.nf | 39 ++++ modules/local/fgumi/simplex/main.nf | 37 ++++ modules/local/fgumi/snapzippersort/main.nf | 66 ++++++ nextflow.config | 29 +++ nextflow_schema.json | 77 +++++++ .../local/fastq_to_aligned_cram/main.nf | 79 ++++++- tests/inputs/fgumi/R1.fastq.gz | 4 + tests/inputs/fgumi/R2.fastq.gz | 4 + tests/inputs/fgumi/consensus.bam | 1 + tests/inputs/fgumi/grouped.bam | 1 + tests/inputs/fgumi/ref.dict | 2 + tests/inputs/fgumi/ref.fa | 2 + tests/inputs/fgumi/ref.fa.fai | 1 + .../inputs/fgumi/snap_index/OverflowTable.txt | 1 + tests/inputs/fgumi/template.bam | 1 + tests/inputs/fgumi/unmapped.bam | 1 + tests/inputs/test.yml | 15 ++ .../local/fgumi/duplexmetrics/main.nf.test | 33 +++ .../fgumi/duplexmetrics/main.nf.test.snap | 43 ++++ .../modules/local/fgumi/extract/main.nf.test | 35 +++ .../local/fgumi/extract/main.nf.test.snap | 47 ++++ tests/modules/local/fgumi/filter/main.nf.test | 33 +++ .../local/fgumi/filter/main.nf.test.snap | 75 +++++++ tests/modules/local/fgumi/group/main.nf.test | 32 +++ .../local/fgumi/group/main.nf.test.snap | 75 +++++++ .../modules/local/fgumi/simplex/main.nf.test | 32 +++ .../local/fgumi/simplex/main.nf.test.snap | 59 +++++ .../local/fgumi/snapzippersort/main.nf.test | 35 +++ .../fgumi/snapzippersort/main.nf.test.snap | 45 ++++ .../fgumi_umi_stub.nf.test | 58 +++++ .../fgumi_umi_stub.nf.test.snap | 205 ++++++++++++++++++ .../local/fastq_to_aligned_cram/main.nf.test | 46 ++++ workflows/preprocessing.nf | 8 + 40 files changed, 1488 insertions(+), 5 deletions(-) create mode 100644 modules/local/fgumi/duplexmetrics/main.nf create mode 100644 modules/local/fgumi/extract/main.nf create mode 100644 modules/local/fgumi/filter/main.nf create mode 100644 modules/local/fgumi/group/main.nf create mode 100644 modules/local/fgumi/simplex/main.nf create mode 100644 modules/local/fgumi/snapzippersort/main.nf create mode 100644 tests/inputs/fgumi/R1.fastq.gz create mode 100644 tests/inputs/fgumi/R2.fastq.gz create mode 100644 tests/inputs/fgumi/consensus.bam create mode 100644 tests/inputs/fgumi/grouped.bam create mode 100644 tests/inputs/fgumi/ref.dict create mode 100644 tests/inputs/fgumi/ref.fa create mode 100644 tests/inputs/fgumi/ref.fa.fai create mode 100644 tests/inputs/fgumi/snap_index/OverflowTable.txt create mode 100644 tests/inputs/fgumi/template.bam create mode 100644 tests/inputs/fgumi/unmapped.bam create mode 100644 tests/modules/local/fgumi/duplexmetrics/main.nf.test create mode 100644 tests/modules/local/fgumi/duplexmetrics/main.nf.test.snap create mode 100644 tests/modules/local/fgumi/extract/main.nf.test create mode 100644 tests/modules/local/fgumi/extract/main.nf.test.snap create mode 100644 tests/modules/local/fgumi/filter/main.nf.test create mode 100644 tests/modules/local/fgumi/filter/main.nf.test.snap create mode 100644 tests/modules/local/fgumi/group/main.nf.test create mode 100644 tests/modules/local/fgumi/group/main.nf.test.snap create mode 100644 tests/modules/local/fgumi/simplex/main.nf.test create mode 100644 tests/modules/local/fgumi/simplex/main.nf.test.snap create mode 100644 tests/modules/local/fgumi/snapzippersort/main.nf.test create mode 100644 tests/modules/local/fgumi/snapzippersort/main.nf.test.snap create mode 100644 tests/subworkflows/local/fastq_to_aligned_cram/fgumi_umi_stub.nf.test create mode 100644 tests/subworkflows/local/fastq_to_aligned_cram/fgumi_umi_stub.nf.test.snap diff --git a/assets/schema_input.json b/assets/schema_input.json index 8f666f27..00af1491 100644 --- a/assets/schema_input.json +++ b/assets/schema_input.json @@ -50,7 +50,7 @@ "umi_aware": { "meta": ["umi_aware"], "type": "boolean", - "description": "Run markdup in UMI-aware mode. This applies to Samtools only and requires the UMI to be in the read name.", + "description": "Enable UMI-aware processing through the fgumi consensus branch.", "default": false }, "skip_trimming": { diff --git a/assets/schema_sampleinfo.json b/assets/schema_sampleinfo.json index 092cc130..9da1b2c8 100644 --- a/assets/schema_sampleinfo.json +++ b/assets/schema_sampleinfo.json @@ -90,7 +90,7 @@ "umi_aware": { "meta": ["umi_aware"], "type": "boolean", - "description": "Run markdup in UMI-aware mode. This applies to Samtools only and requires the UMI to be in the read name.", + "description": "Enable UMI-aware processing through the fgumi consensus branch.", "default": false }, "skip_trimming": { diff --git a/conf/modules.config b/conf/modules.config index 95978df0..ebf686ff 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -231,6 +231,124 @@ process { } } + //// FGUMI extract (step 1) + withName: '.*FASTQ_TO_CRAM:FGUMI_EXTRACT' { + cpus = 8 + memory = 32.GB + ext.prefix = { "${meta.id}.fgumi.unmapped" } + ext.args = { + [ + "--read-structures ${params.fgumi_read_structures}", + params.fgumi_extract_umis_from_read_names ? "--extract-umis-from-read-names" : "", + "--threads ${task.cpus}", + "--queue-memory ${params.fgumi_queue_memory}", + params.fgumi_queue_memory_per_thread ? "--queue-memory-per-thread" : "", + "--compression-level ${params.fgumi_compression_level}", + ].join(" ").trim() + } + } + + //// FGUMI fastq | SNAP | zipper | template sort (step 3) + withName: '.*FASTQ_TO_CRAM:FGUMI_SNAP_ZIPPER_SORT' { + cpus = 16 + memory = 64.GB + ext.prefix = { "${meta.id}.fgumi" } + ext.args = { + [ + params.fgumi_snap_ignore_mismatched_pairs ? "-I" : "", + "${params.fgumi_snap_extra_args}", + ].join(" ").trim() + } + ext.args2 = { + [ + "--threads ${task.cpus}", + ].join(" ").trim() + } + ext.args3 = { + [ + "--threads ${task.cpus}", + "--max-memory ${params.fgumi_sort_max_memory}", + "--memory-per-thread=${params.fgumi_sort_memory_per_thread ? 'true' : 'false'}", + ].join(" ").trim() + } + ext.args4 = { + [ + "--threads ${task.cpus}", + "--max-memory ${params.fgumi_sort_max_memory}", + "--memory-per-thread=${params.fgumi_sort_memory_per_thread ? 'true' : 'false'}", + ].join(" ").trim() + } + ext.args5 = { + [ + "-@ ${task.cpus}", + ].join(" ").trim() + } + } + + //// FGUMI group (step 4) + withName: '.*FASTQ_TO_CRAM:FGUMI_GROUP' { + cpus = 8 + memory = 32.GB + ext.prefix = { "${meta.id}.fgumi.group" } + ext.args = { + [ + "--strategy ${params.fgumi_group_strategy}", + "--edits ${params.fgumi_group_edits}", + "--threads ${task.cpus}", + "--queue-memory ${params.fgumi_queue_memory}", + params.fgumi_queue_memory_per_thread ? "--queue-memory-per-thread" : "", + "--compression-level ${params.fgumi_compression_level}", + "--grouping-metrics ${meta.id}.fgumi.group.grouping_metrics.txt", + "--family-size-histogram ${meta.id}.fgumi.group.family_size_histogram.txt", + ].join(" ").trim() + } + } + + //// FGUMI simplex (step 5) + withName: '.*FASTQ_TO_CRAM:FGUMI_SIMPLEX' { + cpus = 8 + memory = 32.GB + ext.prefix = { "${meta.id}.fgumi.simplex" } + ext.args = { + [ + "--min-reads ${params.fgumi_simplex_min_reads}", + "--threads ${task.cpus}", + "--queue-memory ${params.fgumi_queue_memory}", + params.fgumi_queue_memory_per_thread ? "--queue-memory-per-thread" : "", + "--compression-level ${params.fgumi_compression_level}", + "--stats ${meta.id}.fgumi.simplex.consensus_metrics.txt", + ].join(" ").trim() + } + } + + //// FGUMI duplex-metrics (step 6) + withName: '.*FASTQ_TO_CRAM:FGUMI_DUPLEX_METRICS' { + cpus = 2 + memory = 8.GB + ext.prefix = { "${meta.id}.fgumi" } + ext.args = "${params.fgumi_duplex_metrics_extra_args}" + } + + //// FGUMI filter + coordinate sort/index (step 7) + withName: '.*FASTQ_TO_CRAM:FGUMI_FILTER' { + cpus = 8 + memory = 32.GB + ext.prefix = { "${meta.id}.fgumi.filter" } + ext.args = { + [ + "--min-reads ${params.fgumi_filter_min_reads}", + "--stats ${meta.id}.fgumi.filter.filtering_metrics.txt", + ].join(" ").trim() + } + ext.args2 = { + [ + "--threads ${task.cpus}", + "--max-memory ${params.fgumi_sort_max_memory}", + "--memory-per-thread=${params.fgumi_sort_memory_per_thread ? 'true' : 'false'}", + ].join(" ").trim() + } + } + // coverage //// Mosdepth withName: '.*COVERAGE:MOSDEPTH' { diff --git a/main.nf b/main.nf index 8d7667e2..1c8db700 100644 --- a/main.nf +++ b/main.nf @@ -178,6 +178,11 @@ workflow { rna_junctions = PREPROCESSING.out.rna_junctions align_reports = PREPROCESSING.out.align_reports sormadup_metrics = PREPROCESSING.out.sormadup_metrics + // Additional UMI consensus outputs. + family_size_histogram = PREPROCESSING.out.family_size_histogram + umi_filtered_consensus_bam = PREPROCESSING.out.umi_filtered_consensus_bam + umi_duplex_metrics = PREPROCESSING.out.umi_duplex_metrics + umi_crams = PREPROCESSING.out.umi_crams mosdepth_global = PREPROCESSING.out.mosdepth_global mosdepth_summary = PREPROCESSING.out.mosdepth_summary mosdepth_regions = PREPROCESSING.out.mosdepth_regions @@ -275,6 +280,28 @@ output { metrics >> (meta.library ? "${meta.library}/${meta.samplename}/${meta.samplename}.duplicate_metrics.txt" : "${meta.samplename}/${meta.samplename}.duplicate_metrics.txt") } } + // UMI consensus artefacts are published per sample next to CRAM outputs. + family_size_histogram { + path { meta, histogram -> + histogram >> (meta.library ? "${meta.library}/${meta.samplename}/${histogram.name}" : "${meta.samplename}/${histogram.name}") + } + } + umi_filtered_consensus_bam { + path { meta, bam -> + bam >> (meta.library ? "${meta.library}/${meta.samplename}/${bam.name}" : "${meta.samplename}/${bam.name}") + } + } + umi_duplex_metrics { + path { meta, _file -> + return (meta.library ? "${meta.library}/${meta.samplename}/" : "${meta.samplename}/") + } + } + umi_crams { + path { meta, cram, crai -> + cram >> (meta.library ? "${meta.library}/${meta.samplename}/${meta.samplename}.umi.cram" : "${meta.samplename}/${meta.samplename}.umi.cram") + crai >> (meta.library ? "${meta.library}/${meta.samplename}/${meta.samplename}.umi.cram.crai" : "${meta.samplename}/${meta.samplename}.umi.cram.crai") + } + } mosdepth_global { path { meta, _file -> return (meta.library ? "${meta.library}/${meta.samplename}/" : "${meta.samplename}/") diff --git a/modules/local/fgumi/duplexmetrics/main.nf b/modules/local/fgumi/duplexmetrics/main.nf new file mode 100644 index 00000000..14954864 --- /dev/null +++ b/modules/local/fgumi/duplexmetrics/main.nf @@ -0,0 +1,35 @@ +process FGUMI_DUPLEX_METRICS { + tag "$meta.id" + label 'process_low' + + container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container + ? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/95/954170443a820787c9e02ef2135ebb8ec29c6b03633b0d61b5fafa98c59a1cce/data' + : 'community.wave.seqera.io/library/fgumi_r-base_r-ggplot2_r-scales:09c99070b82c1c28'}" + + input: + tuple val(meta), path(bam) + + output: + tuple val(meta), path("${prefix}.duplex_metrics*"), emit: duplex_metrics + tuple val("${task.process}"), val('fgumi'), eval("fgumi --version | sed 's/^fgumi //;q'"), topic: versions, emit: versions_fgumi + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}.fgumi" + + """ + fgumi duplex-metrics \ + --input ${bam} \ + --output ${prefix}.duplex_metrics \ + ${args} + """ + + stub: + prefix = task.ext.prefix ?: "${meta.id}.fgumi" + """ + touch ${prefix}.duplex_metrics.txt + """ +} diff --git a/modules/local/fgumi/extract/main.nf b/modules/local/fgumi/extract/main.nf new file mode 100644 index 00000000..bd4294fc --- /dev/null +++ b/modules/local/fgumi/extract/main.nf @@ -0,0 +1,40 @@ +process FGUMI_EXTRACT { + tag "$meta.id" + label 'process_medium' + + container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container + ? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/95/954170443a820787c9e02ef2135ebb8ec29c6b03633b0d61b5fafa98c59a1cce/data' + : 'community.wave.seqera.io/library/fgumi_r-base_r-ggplot2_r-scales:09c99070b82c1c28'}" + + input: + tuple val(meta), path(reads) + + output: + tuple val(meta), path("${prefix}.bam"), emit: bam + tuple val("${task.process}"), val('fgumi'), eval("fgumi --version | sed 's/^fgumi //;q'"), topic: versions, emit: versions_fgumi + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}.fgumi.unmapped" + def sample_name = meta.samplename ?: meta.id + def library_name = meta.library ?: meta.id + def input_files = (reads instanceof List ? reads : [reads]).collect { read -> "${read}" }.join(' ') + + """ + fgumi extract \ + --inputs ${input_files} \ + --output ${prefix}.bam \ + --sample "${sample_name}" \ + --library "${library_name}" \ + ${args} + """ + + stub: + prefix = task.ext.prefix ?: "${meta.id}.fgumi.unmapped" + """ + touch ${prefix}.bam + """ +} diff --git a/modules/local/fgumi/filter/main.nf b/modules/local/fgumi/filter/main.nf new file mode 100644 index 00000000..c35bd7cc --- /dev/null +++ b/modules/local/fgumi/filter/main.nf @@ -0,0 +1,48 @@ +process FGUMI_FILTER { + tag "$meta.id" + label 'process_medium' + + container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container + ? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/95/954170443a820787c9e02ef2135ebb8ec29c6b03633b0d61b5fafa98c59a1cce/data' + : 'community.wave.seqera.io/library/fgumi_r-base_r-ggplot2_r-scales:09c99070b82c1c28'}" + + input: + tuple val(meta), path(bam), path(fasta) + + output: + tuple val(meta), path("${prefix}.bam"), emit: bam + tuple val(meta), path("${prefix}.bam.bai"), emit: bai + tuple val(meta), path("${prefix}.filtering_metrics.txt"), optional: true, emit: filtering_metrics + tuple val("${task.process}"), val('fgumi'), eval("fgumi --version | sed 's/^fgumi //;q'"), topic: versions, emit: versions_fgumi + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def sort_args = task.ext.args2 ?: '' + prefix = task.ext.prefix ?: "${meta.id}.fgumi.filter" + + """ + fgumi filter \ + --input ${bam} \ + --output ${prefix}.filtered.bam \ + --ref ${fasta} \ + ${args} + + fgumi sort \ + --input ${prefix}.filtered.bam \ + --output ${prefix}.bam \ + --order coordinate \ + --write-index \ + ${sort_args} + """ + + stub: + prefix = task.ext.prefix ?: "${meta.id}.fgumi.filter" + """ + touch ${prefix}.bam + touch ${prefix}.bam.bai + touch ${prefix}.filtering_metrics.txt + """ +} diff --git a/modules/local/fgumi/group/main.nf b/modules/local/fgumi/group/main.nf new file mode 100644 index 00000000..6779351f --- /dev/null +++ b/modules/local/fgumi/group/main.nf @@ -0,0 +1,39 @@ +process FGUMI_GROUP { + tag "$meta.id" + label 'process_medium' + + container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container + ? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/95/954170443a820787c9e02ef2135ebb8ec29c6b03633b0d61b5fafa98c59a1cce/data' + : 'community.wave.seqera.io/library/fgumi_r-base_r-ggplot2_r-scales:09c99070b82c1c28'}" + + input: + tuple val(meta), path(bam) + + output: + tuple val(meta), path("${prefix}.bam"), emit: bam + tuple val(meta), path("${prefix}.grouping_metrics.txt"), optional: true, emit: grouping_metrics + tuple val(meta), path("${prefix}.family_size_histogram.txt"), optional: true, emit: family_size_histogram + tuple val("${task.process}"), val('fgumi'), eval("fgumi --version | sed 's/^fgumi //;q'"), topic: versions, emit: versions_fgumi + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}.fgumi.group" + + """ + fgumi group \ + --input ${bam} \ + --output ${prefix}.bam \ + ${args} + """ + + stub: + prefix = task.ext.prefix ?: "${meta.id}.fgumi.group" + """ + touch ${prefix}.bam + touch ${prefix}.grouping_metrics.txt + touch ${prefix}.family_size_histogram.txt + """ +} diff --git a/modules/local/fgumi/simplex/main.nf b/modules/local/fgumi/simplex/main.nf new file mode 100644 index 00000000..81dfc29a --- /dev/null +++ b/modules/local/fgumi/simplex/main.nf @@ -0,0 +1,37 @@ +process FGUMI_SIMPLEX { + tag "$meta.id" + label 'process_medium' + + container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container + ? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/95/954170443a820787c9e02ef2135ebb8ec29c6b03633b0d61b5fafa98c59a1cce/data' + : 'community.wave.seqera.io/library/fgumi_r-base_r-ggplot2_r-scales:09c99070b82c1c28'}" + + input: + tuple val(meta), path(bam) + + output: + tuple val(meta), path("${prefix}.bam"), emit: bam + tuple val(meta), path("${prefix}.consensus_metrics.txt"), optional: true, emit: consensus_metrics + tuple val("${task.process}"), val('fgumi'), eval("fgumi --version | sed 's/^fgumi //;q'"), topic: versions, emit: versions_fgumi + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}.fgumi.simplex" + + """ + fgumi simplex \ + --input ${bam} \ + --output ${prefix}.bam \ + ${args} + """ + + stub: + prefix = task.ext.prefix ?: "${meta.id}.fgumi.simplex" + """ + touch ${prefix}.bam + touch ${prefix}.consensus_metrics.txt + """ +} diff --git a/modules/local/fgumi/snapzippersort/main.nf b/modules/local/fgumi/snapzippersort/main.nf new file mode 100644 index 00000000..0c6ec63b --- /dev/null +++ b/modules/local/fgumi/snapzippersort/main.nf @@ -0,0 +1,66 @@ +process FGUMI_SNAP_ZIPPER_SORT { + tag "$meta.id" + label 'process_high' + + container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container + ? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/24/2466270633749543330f352e34588f142de4988585ce63e7f22ee5ed1ff57450/data' + : 'community.wave.seqera.io/library/fgumi_samtools_snap-aligner:c9ba911435350668'}" + + input: + tuple val(meta), path(unmapped_bam), path(index, stageAs: "index/*"), path(fasta), path(dict) + + output: + tuple val(meta), path("${prefix}.template.bam"), emit: bam + tuple val("${task.process}"), val('fgumi'), eval("fgumi --version | sed 's/^fgumi //;q'"), topic: versions, emit: versions_fgumi + + when: + task.ext.when == null || task.ext.when + + script: + def snap_args = task.ext.args ?: '' + def zipper_args = task.ext.args2 ?: '' + def sort_args = task.ext.args3 ?: '' + prefix = task.ext.prefix ?: "${meta.id}.fgumi" + + """ + INDEX=`dirname \$(find -L ./ -name "OverflowTable*" | head -n1)` + [ -z "\$INDEX" ] && echo "Snap index files not found" 1>&2 && exit 1 + + # Ensure zipper and fastq read exactly the same queryname-ordered unmapped stream. + samtools sort \ + -n \ + -@ ${task.cpus} \ + -m 1G \ + -o ${prefix}.unmapped.queryname.bam \ + ${unmapped_bam} + + fgumi fastq --input ${prefix}.unmapped.queryname.bam \ + | snap-aligner paired \ + \$INDEX \ + -pairedInterleavedFastq - \ + -o -sam - \ + -t ${task.cpus} \ + ${snap_args} \ + | samtools sort \ + -n \ + -@ ${task.cpus} \ + -m 1G \ + -O SAM \ + - \ + | fgumi zipper \ + --unmapped ${prefix}.unmapped.queryname.bam \ + --reference ${fasta} \ + ${zipper_args} \ + | fgumi sort \ + --input /dev/stdin \ + --output ${prefix}.template.bam \ + --order template-coordinate \ + ${sort_args} + """ + + stub: + prefix = task.ext.prefix ?: "${meta.id}.fgumi" + """ + touch ${prefix}.template.bam + """ +} diff --git a/nextflow.config b/nextflow.config index 20e156f9..e2f2872c 100644 --- a/nextflow.config +++ b/nextflow.config @@ -11,6 +11,35 @@ params { igenomes_base = '/references/' igenomes_ignore = false + // Analysis options + split_fastq = 100000000 + genelists = null + + // UMI consensus (fgumi) options + fgumi_read_structures = '+T +T' + fgumi_extract_umis_from_read_names = true + fgumi_group_strategy = 'adjacency' + fgumi_group_edits = 1 + fgumi_simplex_min_reads = 1 + fgumi_filter_min_reads = '1,1,1' + fgumi_queue_memory = 768 + fgumi_queue_memory_per_thread= true + fgumi_compression_level = 1 + fgumi_sort_max_memory = '2G' + fgumi_sort_memory_per_thread = true + fgumi_snap_ignore_mismatched_pairs = true + fgumi_snap_extra_args = '' + fgumi_duplex_metrics_extra_args = '' + + // MultiQC options + multiqc_config = null + multiqc_title = null + multiqc_logo = null + max_multiqc_email_size = '25.MB' + multiqc_methods_description = null + + // Boilerplate options + outdir = null publish_dir_mode = 'copy' monochrome_logs = false hook_url = System.getenv('HOOK_URL') diff --git a/nextflow_schema.json b/nextflow_schema.json index 36936165..1c4c2c9d 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -73,6 +73,83 @@ "exists": true, "format": "directory-path", "description": "Directory containing gene list bed files for granular coverage analysis" + }, + "fgumi_read_structures": { + "type": "string", + "default": "+T +T", + "description": "Read structures passed to fgumi extract for UMI extraction from FASTQ." + }, + "fgumi_extract_umis_from_read_names": { + "type": "boolean", + "default": true, + "description": "Enable fgumi extract --extract-umis-from-read-names for read name encoded UMIs." + }, + "fgumi_group_strategy": { + "type": "string", + "default": "adjacency", + "description": "UMI grouping strategy for fgumi group.", + "enum": ["identity", "edit", "adjacency", "paired"] + }, + "fgumi_group_edits": { + "type": "integer", + "default": 1, + "minimum": 0, + "description": "Maximum UMI edit distance used by fgumi group." + }, + "fgumi_simplex_min_reads": { + "type": "integer", + "default": 1, + "minimum": 1, + "description": "Minimum number of reads required per UMI family for fgumi simplex consensus generation." + }, + "fgumi_filter_min_reads": { + "type": "string", + "default": "1,1,1", + "description": "Minimum reads triplet passed to fgumi filter --min-reads." + }, + "fgumi_queue_memory": { + "type": "integer", + "default": 768, + "minimum": 64, + "description": "fgumi queue-memory budget in MB." + }, + "fgumi_queue_memory_per_thread": { + "type": "boolean", + "default": true, + "description": "Scale fgumi queue-memory by allocated thread count." + }, + "fgumi_compression_level": { + "type": "integer", + "default": 1, + "minimum": 0, + "maximum": 12, + "description": "Compression level for fgumi BAM outputs." + }, + "fgumi_sort_max_memory": { + "type": "string", + "default": "2G", + "pattern": "^\\d+(\\.\\d+)?[KMG]$", + "description": "Memory budget for fgumi sort, for example 2G or 768M." + }, + "fgumi_sort_memory_per_thread": { + "type": "boolean", + "default": true, + "description": "Scale fgumi sort memory by allocated thread count." + }, + "fgumi_snap_ignore_mismatched_pairs": { + "type": "boolean", + "default": true, + "description": "Pass -I to SNAP to ignore mismatched read IDs in paired-end input." + }, + "fgumi_snap_extra_args": { + "type": "string", + "default": "", + "description": "Additional SNAP options for the fgumi fastq|snap|zipper pipeline." + }, + "fgumi_duplex_metrics_extra_args": { + "type": "string", + "default": "", + "description": "Additional fgumi duplex-metrics options." } } }, diff --git a/subworkflows/local/fastq_to_aligned_cram/main.nf b/subworkflows/local/fastq_to_aligned_cram/main.nf index bf87f087..1d420fdd 100644 --- a/subworkflows/local/fastq_to_aligned_cram/main.nf +++ b/subworkflows/local/fastq_to_aligned_cram/main.nf @@ -6,6 +6,12 @@ // MODULES include { BIOBAMBAM_BAMSORMADUP } from "../../../modules/nf-core/biobambam/bamsormadup/main.nf" +include { FGUMI_DUPLEX_METRICS } from "../../../modules/local/fgumi/duplexmetrics/main.nf" +include { FGUMI_EXTRACT } from "../../../modules/local/fgumi/extract/main.nf" +include { FGUMI_FILTER } from "../../../modules/local/fgumi/filter/main.nf" +include { FGUMI_GROUP } from "../../../modules/local/fgumi/group/main.nf" +include { FGUMI_SIMPLEX } from "../../../modules/local/fgumi/simplex/main.nf" +include { FGUMI_SNAP_ZIPPER_SORT } from "../../../modules/local/fgumi/snapzippersort/main.nf" include { SAMTOOLS_CONVERT } from "../../../modules/nf-core/samtools/convert/main" include { SAMTOOLS_SORMADUP } from "../../../modules/nf-core/samtools/sormadup/main.nf" include { SAMTOOLS_SORT } from "../../../modules/nf-core/samtools/sort/main" @@ -42,12 +48,59 @@ workflow FASTQ_TO_CRAM { } .set { ch_meta_reads_aligner_index_fasta_datatype } - // align fastq files per sample - // ALIGNMENT([meta,fastq], index, sort) + ch_meta_reads_aligner_index_fasta_datatype.dna + .branch { meta, reads, aligner, index, fasta -> + umi: meta.umi_aware == true + return [meta, reads, aligner, index, fasta] + non_umi: true + return [meta, reads, aligner, index, fasta] + } + .set { ch_dna_to_align } + + // Align non-UMI DNA fastq files per sample FASTQ_ALIGN_DNA( - ch_meta_reads_aligner_index_fasta_datatype.dna, + ch_dna_to_align.non_umi, false, ) + + // UMI-aware fgumi branch (steps 1, 3, 4, 5, 6, 7 in fgumi Basic Workflow) + FGUMI_EXTRACT( + ch_dna_to_align.umi + .map { meta, reads, _aligner, _index, _fasta -> [meta, reads] } + ) + + FGUMI_SNAP_ZIPPER_SORT( + FGUMI_EXTRACT.out.bam + .join( + ch_dna_to_align.umi.map { meta, _reads, _aligner, _index, fasta -> + [meta, getGenomeAttribute(meta.genome_data, 'snap'), fasta, getGenomeAttribute(meta.genome_data, 'dict')] + }, + by: 0, + ) + .map { meta, unmapped_bam, snap_index, fasta, dict -> [meta, unmapped_bam, snap_index, fasta, dict] } + ) + + FGUMI_GROUP( + FGUMI_SNAP_ZIPPER_SORT.out.bam + ) + + FGUMI_SIMPLEX( + FGUMI_GROUP.out.bam + ) + + FGUMI_DUPLEX_METRICS( + FGUMI_GROUP.out.bam + ) + + FGUMI_FILTER( + FGUMI_SIMPLEX.out.bam + .join( + ch_dna_to_align.umi.map { meta, _reads, _aligner, _index, fasta -> [meta, fasta] }, + by: 0, + ) + .map { meta, bam, fasta -> [meta, bam, fasta] } + ) + FASTQ_ALIGN_RNA( ch_meta_reads_aligner_index_fasta_datatype.rna ) @@ -100,6 +153,18 @@ workflow FASTQ_TO_CRAM { ch_markdup_index = channel.empty() + // UMI branch outputs are mixed into the common markdup/metrics streams. + ch_markdup_index = ch_markdup_index.mix( + FGUMI_FILTER.out.bam.join(FGUMI_FILTER.out.bai, failOnMismatch: true, failOnDuplicate: true) + ) + ch_sormadup_metrics = ch_sormadup_metrics.mix(FGUMI_GROUP.out.grouping_metrics) + ch_sormadup_metrics = ch_sormadup_metrics.mix(FGUMI_GROUP.out.family_size_histogram) + ch_sormadup_metrics = ch_sormadup_metrics.mix(FGUMI_SIMPLEX.out.consensus_metrics) + ch_sormadup_metrics = ch_sormadup_metrics.mix(FGUMI_FILTER.out.filtering_metrics) + ch_duplex_metrics = FGUMI_DUPLEX_METRICS.out.duplex_metrics + ch_family_size_histogram = FGUMI_GROUP.out.family_size_histogram + ch_filtered_consensus_bam = FGUMI_FILTER.out.bam + // BIOBAMBAM_BAMSORMADUP([meta, [bam, bam]], fasta, fai) BIOBAMBAM_BAMSORMADUP(ch_bam_fasta_fai.bamsormadup) ch_markdup_index = ch_markdup_index.mix(BIOBAMBAM_BAMSORMADUP.out.bam.join(BIOBAMBAM_BAMSORMADUP.out.bam_index, failOnMismatch: true, failOnDuplicate: true)) @@ -147,10 +212,18 @@ workflow FASTQ_TO_CRAM { .set { ch_cram_crai } ch_cram_crai.dump(tag: "FASTQ_TO_CRAM: cram and crai", pretty: true) + // Keep a dedicated channel for UMI-aware sample CRAM outputs. + ch_umi_cram_crai = ch_cram_crai.filter { meta, _cram, _crai -> meta.umi_aware == true } + emit: cram_crai = ch_cram_crai + // UMI-specific output channels for downstream reporting and publishing. + umi_cram_crai = ch_umi_cram_crai + filtered_consensus_bam = ch_filtered_consensus_bam rna_splice_junctions = FASTQ_ALIGN_RNA.out.splice_junctions rna_junctions = FASTQ_ALIGN_RNA.out.junctions sormadup_metrics = ch_sormadup_metrics + family_size_histogram = ch_family_size_histogram + duplex_metrics = ch_duplex_metrics align_reports = FASTQ_ALIGN_DNA.out.reports } diff --git a/tests/inputs/fgumi/R1.fastq.gz b/tests/inputs/fgumi/R1.fastq.gz new file mode 100644 index 00000000..3b41ea25 --- /dev/null +++ b/tests/inputs/fgumi/R1.fastq.gz @@ -0,0 +1,4 @@ +@r1 +ACGT ++ +!!!! diff --git a/tests/inputs/fgumi/R2.fastq.gz b/tests/inputs/fgumi/R2.fastq.gz new file mode 100644 index 00000000..c5b0a999 --- /dev/null +++ b/tests/inputs/fgumi/R2.fastq.gz @@ -0,0 +1,4 @@ +@r2 +TGCA ++ +!!!! diff --git a/tests/inputs/fgumi/consensus.bam b/tests/inputs/fgumi/consensus.bam new file mode 100644 index 00000000..48cdce85 --- /dev/null +++ b/tests/inputs/fgumi/consensus.bam @@ -0,0 +1 @@ +placeholder diff --git a/tests/inputs/fgumi/grouped.bam b/tests/inputs/fgumi/grouped.bam new file mode 100644 index 00000000..48cdce85 --- /dev/null +++ b/tests/inputs/fgumi/grouped.bam @@ -0,0 +1 @@ +placeholder diff --git a/tests/inputs/fgumi/ref.dict b/tests/inputs/fgumi/ref.dict new file mode 100644 index 00000000..c7a324e0 --- /dev/null +++ b/tests/inputs/fgumi/ref.dict @@ -0,0 +1,2 @@ +@HD VN:1.6 SO:unsorted +@SQ SN:chr1 LN:12 diff --git a/tests/inputs/fgumi/ref.fa b/tests/inputs/fgumi/ref.fa new file mode 100644 index 00000000..41884804 --- /dev/null +++ b/tests/inputs/fgumi/ref.fa @@ -0,0 +1,2 @@ +>chr1 +ACGTACGTACGT diff --git a/tests/inputs/fgumi/ref.fa.fai b/tests/inputs/fgumi/ref.fa.fai new file mode 100644 index 00000000..795162b9 --- /dev/null +++ b/tests/inputs/fgumi/ref.fa.fai @@ -0,0 +1 @@ +chr1 12 6 12 13 diff --git a/tests/inputs/fgumi/snap_index/OverflowTable.txt b/tests/inputs/fgumi/snap_index/OverflowTable.txt new file mode 100644 index 00000000..48cdce85 --- /dev/null +++ b/tests/inputs/fgumi/snap_index/OverflowTable.txt @@ -0,0 +1 @@ +placeholder diff --git a/tests/inputs/fgumi/template.bam b/tests/inputs/fgumi/template.bam new file mode 100644 index 00000000..48cdce85 --- /dev/null +++ b/tests/inputs/fgumi/template.bam @@ -0,0 +1 @@ +placeholder diff --git a/tests/inputs/fgumi/unmapped.bam b/tests/inputs/fgumi/unmapped.bam new file mode 100644 index 00000000..48cdce85 --- /dev/null +++ b/tests/inputs/fgumi/unmapped.bam @@ -0,0 +1 @@ +placeholder diff --git a/tests/inputs/test.yml b/tests/inputs/test.yml index 3a432b4b..f4a89c9e 100644 --- a/tests/inputs/test.yml +++ b/tests/inputs/test.yml @@ -49,3 +49,18 @@ run_coverage: true fastq_1: https://github.com/nf-cmgg/test-datasets/raw/main/data/genomics/homo_sapiens/illumina/fastq/test_R1.fastq.gz fastq_2: https://github.com/nf-cmgg/test-datasets/raw/main/data/genomics/homo_sapiens/illumina/fastq/test_R2.fastq.gz +# UMI consensus (fgumi) inputs +# Example DNA sample with umi_aware enabled for fgumi processing. +- id: UMI_consensus1 + samplename: HT1080-chr20 + library: test_library + organism: Homo sapiens + tag: WES + sample_type: DNA + aligner: snap + markdup: bamsormadup + umi_aware: true + run_coverage: true + fastq_1: https://github.com/nf-cmgg/test-datasets/raw/main/data/genomics/homo_sapiens/illumina/fastq/HT1080-chr20_S16_R1_001.fastq.gz + fastq_2: https://github.com/nf-cmgg/test-datasets/raw/main/data/genomics/homo_sapiens/illumina/fastq/HT1080-chr20_S16_R2_001.fastq.gz + diff --git a/tests/modules/local/fgumi/duplexmetrics/main.nf.test b/tests/modules/local/fgumi/duplexmetrics/main.nf.test new file mode 100644 index 00000000..665fb839 --- /dev/null +++ b/tests/modules/local/fgumi/duplexmetrics/main.nf.test @@ -0,0 +1,33 @@ +nextflow_process { + + name "Test Process FGUMI_DUPLEX_METRICS" + script "modules/local/fgumi/duplexmetrics/main.nf" + process "FGUMI_DUPLEX_METRICS" + + tag "modules" + tag "modules/local" + tag "modules/local/fgumi" + tag "modules/local/fgumi/duplexmetrics" + + test("test - stub") { + // Stub-mode contract test: verifies duplex-metrics output wiring. + options "-stub" + + when { + process { + """ + input[0] = [ + [id: "test"], + file("${projectDir}/tests/inputs/fgumi/grouped.bam", checkIfExists: true) + ] + """ + } + } + + then { + // Keep assertions minimal and snapshot-driven, consistent with other module tests. + assert process.success + assert snapshot(process.out).match() + } + } +} diff --git a/tests/modules/local/fgumi/duplexmetrics/main.nf.test.snap b/tests/modules/local/fgumi/duplexmetrics/main.nf.test.snap new file mode 100644 index 00000000..c2514d51 --- /dev/null +++ b/tests/modules/local/fgumi/duplexmetrics/main.nf.test.snap @@ -0,0 +1,43 @@ +{ + "test - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.fgumi.duplex_metrics.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + "FGUMI_DUPLEX_METRICS", + "fgumi", + "0.1.2" + ] + ], + "duplex_metrics": [ + [ + { + "id": "test" + }, + "test.fgumi.duplex_metrics.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions_fgumi": [ + [ + "FGUMI_DUPLEX_METRICS", + "fgumi", + "0.1.2" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.10.4" + }, + "timestamp": "2026-04-02T16:04:44.383973526" + } +} \ No newline at end of file diff --git a/tests/modules/local/fgumi/extract/main.nf.test b/tests/modules/local/fgumi/extract/main.nf.test new file mode 100644 index 00000000..ad1088a5 --- /dev/null +++ b/tests/modules/local/fgumi/extract/main.nf.test @@ -0,0 +1,35 @@ +nextflow_process { + + name "Test Process FGUMI_EXTRACT" + script "modules/local/fgumi/extract/main.nf" + process "FGUMI_EXTRACT" + + tag "modules" + tag "modules/local" + tag "modules/local/fgumi" + tag "modules/local/fgumi/extract" + + test("test - stub") { + // Stub-mode contract test: verifies extract output channel and versions tuple. + options "-stub" + + when { + process { + """ + input[0] = [ + [id: "test", samplename: "test", library: "lib1"], + [ + file("${projectDir}/tests/inputs/fgumi/R1.fastq.gz", checkIfExists: true), + file("${projectDir}/tests/inputs/fgumi/R2.fastq.gz", checkIfExists: true) + ] + ] + """ + } + } + + then { + assert process.success + assert snapshot(process.out).match() + } + } +} diff --git a/tests/modules/local/fgumi/extract/main.nf.test.snap b/tests/modules/local/fgumi/extract/main.nf.test.snap new file mode 100644 index 00000000..82b09e4b --- /dev/null +++ b/tests/modules/local/fgumi/extract/main.nf.test.snap @@ -0,0 +1,47 @@ +{ + "test - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "samplename": "test", + "library": "lib1" + }, + "test.fgumi.unmapped.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + "FGUMI_EXTRACT", + "fgumi", + "0.1.2" + ] + ], + "bam": [ + [ + { + "id": "test", + "samplename": "test", + "library": "lib1" + }, + "test.fgumi.unmapped.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions_fgumi": [ + [ + "FGUMI_EXTRACT", + "fgumi", + "0.1.2" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.10.4" + }, + "timestamp": "2026-04-02T15:59:46.342116682" + } +} \ No newline at end of file diff --git a/tests/modules/local/fgumi/filter/main.nf.test b/tests/modules/local/fgumi/filter/main.nf.test new file mode 100644 index 00000000..9c068a67 --- /dev/null +++ b/tests/modules/local/fgumi/filter/main.nf.test @@ -0,0 +1,33 @@ +nextflow_process { + + name "Test Process FGUMI_FILTER" + script "modules/local/fgumi/filter/main.nf" + process "FGUMI_FILTER" + + tag "modules" + tag "modules/local" + tag "modules/local/fgumi" + tag "modules/local/fgumi/filter" + + test("test - stub") { + // Stub-mode contract test: verifies filtered BAM/index and metrics outputs. + options "-stub" + + when { + process { + """ + input[0] = [ + [id: "test"], + file("${projectDir}/tests/inputs/fgumi/consensus.bam", checkIfExists: true), + file("${projectDir}/tests/inputs/fgumi/ref.fa", checkIfExists: true) + ] + """ + } + } + + then { + assert process.success + assert snapshot(process.out).match() + } + } +} diff --git a/tests/modules/local/fgumi/filter/main.nf.test.snap b/tests/modules/local/fgumi/filter/main.nf.test.snap new file mode 100644 index 00000000..28c1a053 --- /dev/null +++ b/tests/modules/local/fgumi/filter/main.nf.test.snap @@ -0,0 +1,75 @@ +{ + "test - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.fgumi.filter.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + { + "id": "test" + }, + "test.fgumi.filter.bam.bai:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + [ + { + "id": "test" + }, + "test.fgumi.filter.filtering_metrics.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + [ + "FGUMI_FILTER", + "fgumi", + "0.1.2" + ] + ], + "bai": [ + [ + { + "id": "test" + }, + "test.fgumi.filter.bam.bai:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "bam": [ + [ + { + "id": "test" + }, + "test.fgumi.filter.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "filtering_metrics": [ + [ + { + "id": "test" + }, + "test.fgumi.filter.filtering_metrics.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions_fgumi": [ + [ + "FGUMI_FILTER", + "fgumi", + "0.1.2" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.10.4" + }, + "timestamp": "2026-04-02T16:05:00.831097499" + } +} \ No newline at end of file diff --git a/tests/modules/local/fgumi/group/main.nf.test b/tests/modules/local/fgumi/group/main.nf.test new file mode 100644 index 00000000..8fb9b79e --- /dev/null +++ b/tests/modules/local/fgumi/group/main.nf.test @@ -0,0 +1,32 @@ +nextflow_process { + + name "Test Process FGUMI_GROUP" + script "modules/local/fgumi/group/main.nf" + process "FGUMI_GROUP" + + tag "modules" + tag "modules/local" + tag "modules/local/fgumi" + tag "modules/local/fgumi/group" + + test("test - stub") { + // Stub-mode contract test: verifies grouping and family-size outputs. + options "-stub" + + when { + process { + """ + input[0] = [ + [id: "test"], + file("${projectDir}/tests/inputs/fgumi/template.bam", checkIfExists: true) + ] + """ + } + } + + then { + assert process.success + assert snapshot(process.out).match() + } + } +} diff --git a/tests/modules/local/fgumi/group/main.nf.test.snap b/tests/modules/local/fgumi/group/main.nf.test.snap new file mode 100644 index 00000000..d242fb4a --- /dev/null +++ b/tests/modules/local/fgumi/group/main.nf.test.snap @@ -0,0 +1,75 @@ +{ + "test - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.fgumi.group.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + { + "id": "test" + }, + "test.fgumi.group.grouping_metrics.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + [ + { + "id": "test" + }, + "test.fgumi.group.family_size_histogram.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + [ + "FGUMI_GROUP", + "fgumi", + "0.1.2" + ] + ], + "bam": [ + [ + { + "id": "test" + }, + "test.fgumi.group.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "family_size_histogram": [ + [ + { + "id": "test" + }, + "test.fgumi.group.family_size_histogram.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "grouping_metrics": [ + [ + { + "id": "test" + }, + "test.fgumi.group.grouping_metrics.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions_fgumi": [ + [ + "FGUMI_GROUP", + "fgumi", + "0.1.2" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.10.4" + }, + "timestamp": "2026-04-02T16:05:08.118138918" + } +} \ No newline at end of file diff --git a/tests/modules/local/fgumi/simplex/main.nf.test b/tests/modules/local/fgumi/simplex/main.nf.test new file mode 100644 index 00000000..62b9fc5f --- /dev/null +++ b/tests/modules/local/fgumi/simplex/main.nf.test @@ -0,0 +1,32 @@ +nextflow_process { + + name "Test Process FGUMI_SIMPLEX" + script "modules/local/fgumi/simplex/main.nf" + process "FGUMI_SIMPLEX" + + tag "modules" + tag "modules/local" + tag "modules/local/fgumi" + tag "modules/local/fgumi/simplex" + + test("test - stub") { + // Stub-mode contract test: verifies simplex BAM and consensus metrics. + options "-stub" + + when { + process { + """ + input[0] = [ + [id: "test"], + file("${projectDir}/tests/inputs/fgumi/grouped.bam", checkIfExists: true) + ] + """ + } + } + + then { + assert process.success + assert snapshot(process.out).match() + } + } +} diff --git a/tests/modules/local/fgumi/simplex/main.nf.test.snap b/tests/modules/local/fgumi/simplex/main.nf.test.snap new file mode 100644 index 00000000..14ecba3d --- /dev/null +++ b/tests/modules/local/fgumi/simplex/main.nf.test.snap @@ -0,0 +1,59 @@ +{ + "test - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.fgumi.simplex.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + { + "id": "test" + }, + "test.fgumi.simplex.consensus_metrics.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + [ + "FGUMI_SIMPLEX", + "fgumi", + "0.1.2" + ] + ], + "bam": [ + [ + { + "id": "test" + }, + "test.fgumi.simplex.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "consensus_metrics": [ + [ + { + "id": "test" + }, + "test.fgumi.simplex.consensus_metrics.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions_fgumi": [ + [ + "FGUMI_SIMPLEX", + "fgumi", + "0.1.2" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.10.4" + }, + "timestamp": "2026-04-02T16:05:15.268242503" + } +} \ No newline at end of file diff --git a/tests/modules/local/fgumi/snapzippersort/main.nf.test b/tests/modules/local/fgumi/snapzippersort/main.nf.test new file mode 100644 index 00000000..e27f388e --- /dev/null +++ b/tests/modules/local/fgumi/snapzippersort/main.nf.test @@ -0,0 +1,35 @@ +nextflow_process { + + name "Test Process FGUMI_SNAP_ZIPPER_SORT" + script "modules/local/fgumi/snapzippersort/main.nf" + process "FGUMI_SNAP_ZIPPER_SORT" + + tag "modules" + tag "modules/local" + tag "modules/local/fgumi" + tag "modules/local/fgumi/snapzippersort" + + test("test - stub") { + // Stub-mode contract test: verifies template BAM output wiring. + options "-stub" + + when { + process { + """ + input[0] = [ + [id: "test", samplename: "test"], + file("${projectDir}/tests/inputs/fgumi/unmapped.bam", checkIfExists: true), + file("${projectDir}/tests/inputs/fgumi/snap_index", checkIfExists: true), + file("${projectDir}/tests/inputs/fgumi/ref.fa", checkIfExists: true), + file("${projectDir}/tests/inputs/fgumi/ref.dict", checkIfExists: true) + ] + """ + } + } + + then { + assert process.success + assert snapshot(process.out).match() + } + } +} diff --git a/tests/modules/local/fgumi/snapzippersort/main.nf.test.snap b/tests/modules/local/fgumi/snapzippersort/main.nf.test.snap new file mode 100644 index 00000000..d575f1ef --- /dev/null +++ b/tests/modules/local/fgumi/snapzippersort/main.nf.test.snap @@ -0,0 +1,45 @@ +{ + "test - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "samplename": "test" + }, + "test.fgumi.template.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + "FGUMI_SNAP_ZIPPER_SORT", + "fgumi", + "0.1.2" + ] + ], + "bam": [ + [ + { + "id": "test", + "samplename": "test" + }, + "test.fgumi.template.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions_fgumi": [ + [ + "FGUMI_SNAP_ZIPPER_SORT", + "fgumi", + "0.1.2" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.10.4" + }, + "timestamp": "2026-04-02T16:05:22.614091027" + } +} \ No newline at end of file diff --git a/tests/subworkflows/local/fastq_to_aligned_cram/fgumi_umi_stub.nf.test b/tests/subworkflows/local/fastq_to_aligned_cram/fgumi_umi_stub.nf.test new file mode 100644 index 00000000..5f192e63 --- /dev/null +++ b/tests/subworkflows/local/fastq_to_aligned_cram/fgumi_umi_stub.nf.test @@ -0,0 +1,58 @@ +nextflow_workflow { + + name "Test Workflow FASTQ_TO_CRAM UMI fgumi stub" + script "subworkflows/local/fastq_to_aligned_cram/main.nf" + workflow "FASTQ_TO_CRAM" + + tag "subworkflows" + tag "subworkflows/local" + tag "subworkflows/local/fastq_to_aligned_cram" + tag "fgumi" + + test("fastq to cram - fgumi umi-aware - stub") { + // End-to-end UMI branch contract test in stub mode. + options "-stub" + when { + workflow { + """ + input[0] = Channel.of([ + [ + id: "UMI_consensus1", + samplename: "HT1080_chr20", + single_end: false, + sample_type: "DNA", + markdup: "bamsormadup", + umi_aware: true, + genome_data: [ + fasta: "${projectDir}/tests/inputs/fgumi/ref.fa", + fai: "${projectDir}/tests/inputs/fgumi/ref.fa.fai", + dict: "${projectDir}/tests/inputs/fgumi/ref.dict", + snap: "${projectDir}/tests/inputs/fgumi/snap_index" + ] + ], + [ + file("${projectDir}/tests/inputs/fgumi/R1.fastq.gz", checkIfExists: true), + file("${projectDir}/tests/inputs/fgumi/R2.fastq.gz", checkIfExists: true) + ], + "snap", + file("${projectDir}/tests/inputs/fgumi/snap_index", checkIfExists: true), + file("${projectDir}/tests/inputs/fgumi/ref.fa", checkIfExists: true), + [] + ]) + """ + } + } + + then { + assert workflow.success + // Explicitly assert newly exposed UMI channels. + assert workflow.out.duplex_metrics.size() == 1 + assert workflow.out.family_size_histogram.size() == 1 + assert workflow.out.filtered_consensus_bam.size() == 1 + assert workflow.out.umi_cram_crai.size() == 1 + assert snapshot( + sanitizeOutput(workflow.out, unstableKeys:["cram_crai"]) + ).match() + } + } +} diff --git a/tests/subworkflows/local/fastq_to_aligned_cram/fgumi_umi_stub.nf.test.snap b/tests/subworkflows/local/fastq_to_aligned_cram/fgumi_umi_stub.nf.test.snap new file mode 100644 index 00000000..ceffdb37 --- /dev/null +++ b/tests/subworkflows/local/fastq_to_aligned_cram/fgumi_umi_stub.nf.test.snap @@ -0,0 +1,205 @@ +{ + "fastq to cram - fgumi umi-aware - stub": { + "content": [ + { + "align_reports": [ + + ], + "cram_crai": [ + [ + { + "genome_data": { + "dict": "ref.dict", + "fai": "ref.fa.fai", + "fasta": "ref.fa", + "snap": "snap_index" + }, + "id": "UMI_consensus1", + "markdup": "bamsormadup", + "sample_type": "DNA", + "samplename": "HT1080_chr20", + "single_end": false, + "umi_aware": true + }, + "UMI_consensus1.cram", + "UMI_consensus1.cram.crai" + ] + ], + "duplex_metrics": [ + [ + { + "id": "UMI_consensus1", + "samplename": "HT1080_chr20", + "single_end": false, + "sample_type": "DNA", + "markdup": "bamsormadup", + "umi_aware": true, + "genome_data": { + "fasta": "ref.fa:md5,78724096432e1b2702881f0126c656f2", + "fai": "ref.fa.fai:md5,fdd09c27f9ecf4cc7e824fd8407b72b7", + "dict": "ref.dict:md5,759dea0d5194dec0cd4f3e02fadf6e83", + "snap": [ + "OverflowTable.txt:md5,178164f81917b8e87073295a635588de" + ] + } + }, + "UMI_consensus1.fgumi.duplex_metrics.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "family_size_histogram": [ + [ + { + "id": "UMI_consensus1", + "samplename": "HT1080_chr20", + "single_end": false, + "sample_type": "DNA", + "markdup": "bamsormadup", + "umi_aware": true, + "genome_data": { + "fasta": "ref.fa:md5,78724096432e1b2702881f0126c656f2", + "fai": "ref.fa.fai:md5,fdd09c27f9ecf4cc7e824fd8407b72b7", + "dict": "ref.dict:md5,759dea0d5194dec0cd4f3e02fadf6e83", + "snap": [ + "OverflowTable.txt:md5,178164f81917b8e87073295a635588de" + ] + } + }, + "UMI_consensus1.fgumi.group.family_size_histogram.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "filtered_consensus_bam": [ + [ + { + "id": "UMI_consensus1", + "samplename": "HT1080_chr20", + "single_end": false, + "sample_type": "DNA", + "markdup": "bamsormadup", + "umi_aware": true, + "genome_data": { + "fasta": "ref.fa:md5,78724096432e1b2702881f0126c656f2", + "fai": "ref.fa.fai:md5,fdd09c27f9ecf4cc7e824fd8407b72b7", + "dict": "ref.dict:md5,759dea0d5194dec0cd4f3e02fadf6e83", + "snap": [ + "OverflowTable.txt:md5,178164f81917b8e87073295a635588de" + ] + } + }, + "UMI_consensus1.fgumi.filter.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "rna_junctions": [ + + ], + "rna_splice_junctions": [ + + ], + "sormadup_metrics": [ + [ + { + "id": "UMI_consensus1", + "samplename": "HT1080_chr20", + "single_end": false, + "sample_type": "DNA", + "markdup": "bamsormadup", + "umi_aware": true, + "genome_data": { + "fasta": "ref.fa:md5,78724096432e1b2702881f0126c656f2", + "fai": "ref.fa.fai:md5,fdd09c27f9ecf4cc7e824fd8407b72b7", + "dict": "ref.dict:md5,759dea0d5194dec0cd4f3e02fadf6e83", + "snap": [ + "OverflowTable.txt:md5,178164f81917b8e87073295a635588de" + ] + } + }, + "UMI_consensus1.fgumi.filter.filtering_metrics.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + [ + { + "id": "UMI_consensus1", + "samplename": "HT1080_chr20", + "single_end": false, + "sample_type": "DNA", + "markdup": "bamsormadup", + "umi_aware": true, + "genome_data": { + "fasta": "ref.fa:md5,78724096432e1b2702881f0126c656f2", + "fai": "ref.fa.fai:md5,fdd09c27f9ecf4cc7e824fd8407b72b7", + "dict": "ref.dict:md5,759dea0d5194dec0cd4f3e02fadf6e83", + "snap": [ + "OverflowTable.txt:md5,178164f81917b8e87073295a635588de" + ] + } + }, + "UMI_consensus1.fgumi.group.family_size_histogram.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + [ + { + "id": "UMI_consensus1", + "samplename": "HT1080_chr20", + "single_end": false, + "sample_type": "DNA", + "markdup": "bamsormadup", + "umi_aware": true, + "genome_data": { + "fasta": "ref.fa:md5,78724096432e1b2702881f0126c656f2", + "fai": "ref.fa.fai:md5,fdd09c27f9ecf4cc7e824fd8407b72b7", + "dict": "ref.dict:md5,759dea0d5194dec0cd4f3e02fadf6e83", + "snap": [ + "OverflowTable.txt:md5,178164f81917b8e87073295a635588de" + ] + } + }, + "UMI_consensus1.fgumi.group.grouping_metrics.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + [ + { + "id": "UMI_consensus1", + "samplename": "HT1080_chr20", + "single_end": false, + "sample_type": "DNA", + "markdup": "bamsormadup", + "umi_aware": true, + "genome_data": { + "fasta": "ref.fa:md5,78724096432e1b2702881f0126c656f2", + "fai": "ref.fa.fai:md5,fdd09c27f9ecf4cc7e824fd8407b72b7", + "dict": "ref.dict:md5,759dea0d5194dec0cd4f3e02fadf6e83", + "snap": [ + "OverflowTable.txt:md5,178164f81917b8e87073295a635588de" + ] + } + }, + "UMI_consensus1.fgumi.simplex.consensus_metrics.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "umi_cram_crai": [ + [ + { + "id": "UMI_consensus1", + "samplename": "HT1080_chr20", + "single_end": false, + "sample_type": "DNA", + "markdup": "bamsormadup", + "umi_aware": true, + "genome_data": { + "fasta": "ref.fa:md5,78724096432e1b2702881f0126c656f2", + "fai": "ref.fa.fai:md5,fdd09c27f9ecf4cc7e824fd8407b72b7", + "dict": "ref.dict:md5,759dea0d5194dec0cd4f3e02fadf6e83", + "snap": [ + "OverflowTable.txt:md5,178164f81917b8e87073295a635588de" + ] + } + }, + "UMI_consensus1.cram:md5,d41d8cd98f00b204e9800998ecf8427e", + "UMI_consensus1.cram.crai:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.10.4" + }, + "timestamp": "2026-04-07T14:33:07.208059007" + } +} \ No newline at end of file diff --git a/tests/subworkflows/local/fastq_to_aligned_cram/main.nf.test b/tests/subworkflows/local/fastq_to_aligned_cram/main.nf.test index f3082fc3..49b29e89 100644 --- a/tests/subworkflows/local/fastq_to_aligned_cram/main.nf.test +++ b/tests/subworkflows/local/fastq_to_aligned_cram/main.nf.test @@ -224,4 +224,50 @@ nextflow_workflow { ) } } + + test("fastq to cram - fgumi umi-aware - stub") { + options: "-stub" + when { + workflow { + """ + // [meta, [fq_1,fq_2], aligner, index, fasta] + input[0] = Channel.of([ + [ + id: "UMI_consensus1", + samplename: "HT1080_chr20", + single_end: false, + sample_type: "DNA", + markdup: "bamsormadup", + umi_aware: true, + genome_data: [ + fasta: "${projectDir}/tests/inputs/fgumi/ref.fa", + fai: "${projectDir}/tests/inputs/fgumi/ref.fa.fai", + dict: "${projectDir}/tests/inputs/fgumi/ref.dict", + snap: "${projectDir}/tests/inputs/fgumi/snap_index" + ] + ], + [ + file("${projectDir}/tests/inputs/fgumi/R1.fastq.gz", checkIfExists: true), + file("${projectDir}/tests/inputs/fgumi/R2.fastq.gz", checkIfExists: true) + ], + "snap", + file("${projectDir}/tests/inputs/fgumi/snap_index", checkIfExists: true), + file("${projectDir}/tests/inputs/fgumi/ref.fa", checkIfExists: true), + [] + ]) + """ + } + } + + then { + assertAll( + { + assert workflow.success + assert snapshot( + sanitizeOutput(workflow.out, unstableKeys:["cram_crai"]) + ).match() + } + ) + } + } } diff --git a/workflows/preprocessing.nf b/workflows/preprocessing.nf index 46c93597..9f17ef8f 100644 --- a/workflows/preprocessing.nf +++ b/workflows/preprocessing.nf @@ -271,7 +271,10 @@ workflow PREPROCESSING { FASTQ_TO_CRAM( ch_meta_reads_aligner_index_fasta_gtf ) + // Collect both standard and UMI-specific metrics for MultiQC. ch_multiqc_files = ch_multiqc_files.mix(FASTQ_TO_CRAM.out.sormadup_metrics) + ch_multiqc_files = ch_multiqc_files.mix(FASTQ_TO_CRAM.out.duplex_metrics) + ch_multiqc_files = ch_multiqc_files.mix(FASTQ_TO_CRAM.out.family_size_histogram) /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -432,6 +435,11 @@ workflow PREPROCESSING { rna_junctions = FASTQ_TO_CRAM.out.rna_junctions align_reports = FASTQ_TO_CRAM.out.align_reports sormadup_metrics = FASTQ_TO_CRAM.out.sormadup_metrics + // UMI-specific outputs exposed at workflow level. + family_size_histogram = FASTQ_TO_CRAM.out.family_size_histogram + umi_filtered_consensus_bam = FASTQ_TO_CRAM.out.filtered_consensus_bam + umi_duplex_metrics = FASTQ_TO_CRAM.out.duplex_metrics + umi_crams = FASTQ_TO_CRAM.out.umi_cram_crai mosdepth_global = COVERAGE.out.mosdepth_global mosdepth_summary = COVERAGE.out.mosdepth_summary mosdepth_regions = COVERAGE.out.mosdepth_regions From a110d131aba97dd03e3877ef214815e3f413d7d3 Mon Sep 17 00:00:00 2001 From: freerkvandijk Date: Wed, 8 Apr 2026 09:38:31 +0200 Subject: [PATCH 02/36] Fixed assets. Changed to fgumi_aware parameter. --- assets/schema_input.json | 8 +++++++- assets/schema_sampleinfo.json | 8 +++++++- docs/usage.md | 6 ++++++ subworkflows/local/fastq_to_aligned_cram/main.nf | 4 ++-- tests/inputs/test.yml | 4 ++-- 5 files changed, 24 insertions(+), 6 deletions(-) diff --git a/assets/schema_input.json b/assets/schema_input.json index 00af1491..e76d82cb 100644 --- a/assets/schema_input.json +++ b/assets/schema_input.json @@ -50,7 +50,13 @@ "umi_aware": { "meta": ["umi_aware"], "type": "boolean", - "description": "Enable UMI-aware processing through the fgumi consensus branch.", + "description": "Run markdup in UMI-aware mode. This applies to Samtools only and requires the UMI to be in the read name.", + "default": false + }, + "fgumi_aware": { + "meta": ["fgumi_aware"], + "type": "boolean", + "description": "Enable UMI-aware consensus processing through the fgumi branch.", "default": false }, "skip_trimming": { diff --git a/assets/schema_sampleinfo.json b/assets/schema_sampleinfo.json index 9da1b2c8..ccd66222 100644 --- a/assets/schema_sampleinfo.json +++ b/assets/schema_sampleinfo.json @@ -90,7 +90,13 @@ "umi_aware": { "meta": ["umi_aware"], "type": "boolean", - "description": "Enable UMI-aware processing through the fgumi consensus branch.", + "description": "Run markdup in UMI-aware mode. This applies to Samtools only and requires the UMI to be in the read name.", + "default": false + }, + "fgumi_aware": { + "meta": ["fgumi_aware"], + "type": "boolean", + "description": "Enable UMI-aware consensus processing through the fgumi branch.", "default": false }, "skip_trimming": { diff --git a/docs/usage.md b/docs/usage.md index f59c809a..7f3055af 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -26,6 +26,7 @@ A `fastq` samplesheet file consisting of paired-end data may look something like aligner: bwamem markdup: bamsormadup umi_aware: false + fgumi_aware: false skip_trimming: false trim_front: 0 trim_tail: 0 @@ -67,6 +68,10 @@ Following table shows the fields that are used by the `fastq` samplesheet: An [example samplesheet](../tests/inputs/test.yml) has been provided with the pipeline. +> [!NOTE] +> `umi_aware` and `fgumi_aware` are independent options. +> Use `umi_aware` for samtools markdup UMI mode, and `fgumi_aware` to run the fgumi consensus branch. + ### Flowcell samplesheet A `flowcell` samplesheet file consisting of one sequencing run may look something like the one below. @@ -102,6 +107,7 @@ A `flowcell` sample info JSON/YML file consisting for one sequencing run may loo aligner: bwamem markdup: bamsormadup umi_aware: false + fgumi_aware: false skip_trimming: false trim_front: 0 trim_tail: 0 diff --git a/subworkflows/local/fastq_to_aligned_cram/main.nf b/subworkflows/local/fastq_to_aligned_cram/main.nf index 1d420fdd..a517ac8d 100644 --- a/subworkflows/local/fastq_to_aligned_cram/main.nf +++ b/subworkflows/local/fastq_to_aligned_cram/main.nf @@ -50,7 +50,7 @@ workflow FASTQ_TO_CRAM { ch_meta_reads_aligner_index_fasta_datatype.dna .branch { meta, reads, aligner, index, fasta -> - umi: meta.umi_aware == true + umi: (meta.fgumi_aware == true) || (meta.umi_aware == true) return [meta, reads, aligner, index, fasta] non_umi: true return [meta, reads, aligner, index, fasta] @@ -213,7 +213,7 @@ workflow FASTQ_TO_CRAM { ch_cram_crai.dump(tag: "FASTQ_TO_CRAM: cram and crai", pretty: true) // Keep a dedicated channel for UMI-aware sample CRAM outputs. - ch_umi_cram_crai = ch_cram_crai.filter { meta, _cram, _crai -> meta.umi_aware == true } + ch_umi_cram_crai = ch_cram_crai.filter { meta, _cram, _crai -> (meta.fgumi_aware == true) || (meta.umi_aware == true) } emit: cram_crai = ch_cram_crai diff --git a/tests/inputs/test.yml b/tests/inputs/test.yml index f4a89c9e..051ac1ca 100644 --- a/tests/inputs/test.yml +++ b/tests/inputs/test.yml @@ -50,7 +50,7 @@ fastq_1: https://github.com/nf-cmgg/test-datasets/raw/main/data/genomics/homo_sapiens/illumina/fastq/test_R1.fastq.gz fastq_2: https://github.com/nf-cmgg/test-datasets/raw/main/data/genomics/homo_sapiens/illumina/fastq/test_R2.fastq.gz # UMI consensus (fgumi) inputs -# Example DNA sample with umi_aware enabled for fgumi processing. +# Example DNA sample with fgumi_aware enabled for fgumi processing. - id: UMI_consensus1 samplename: HT1080-chr20 library: test_library @@ -59,7 +59,7 @@ sample_type: DNA aligner: snap markdup: bamsormadup - umi_aware: true + fgumi_aware: true run_coverage: true fastq_1: https://github.com/nf-cmgg/test-datasets/raw/main/data/genomics/homo_sapiens/illumina/fastq/HT1080-chr20_S16_R1_001.fastq.gz fastq_2: https://github.com/nf-cmgg/test-datasets/raw/main/data/genomics/homo_sapiens/illumina/fastq/HT1080-chr20_S16_R2_001.fastq.gz From 535d39c6ecb0ed7bb74b88791a0a32c171e9638b Mon Sep 17 00:00:00 2001 From: freerkvandijk Date: Wed, 8 Apr 2026 09:42:26 +0200 Subject: [PATCH 03/36] Changed pipeline wide read-structures to meta values --- assets/schema_input.json | 12 ++++++++++++ assets/schema_sampleinfo.json | 12 ++++++++++++ conf/modules.config | 4 ++-- 3 files changed, 26 insertions(+), 2 deletions(-) diff --git a/assets/schema_input.json b/assets/schema_input.json index e76d82cb..74a0a44a 100644 --- a/assets/schema_input.json +++ b/assets/schema_input.json @@ -59,6 +59,18 @@ "description": "Enable UMI-aware consensus processing through the fgumi branch.", "default": false }, + "fgumi_read_structures": { + "meta": ["fgumi_read_structures"], + "type": "string", + "description": "Read structures passed to fgumi extract for this sample.", + "default": null + }, + "fgumi_extract_umis_from_read_names": { + "meta": ["fgumi_extract_umis_from_read_names"], + "type": "boolean", + "description": "Override fgumi extraction from read names for this sample.", + "default": null + }, "skip_trimming": { "meta": ["skip_trimming"], "type": "boolean", diff --git a/assets/schema_sampleinfo.json b/assets/schema_sampleinfo.json index ccd66222..ab6dfda3 100644 --- a/assets/schema_sampleinfo.json +++ b/assets/schema_sampleinfo.json @@ -99,6 +99,18 @@ "description": "Enable UMI-aware consensus processing through the fgumi branch.", "default": false }, + "fgumi_read_structures": { + "meta": ["fgumi_read_structures"], + "type": "string", + "description": "Read structures passed to fgumi extract for this sample.", + "default": null + }, + "fgumi_extract_umis_from_read_names": { + "meta": ["fgumi_extract_umis_from_read_names"], + "type": "boolean", + "description": "Override fgumi extraction from read names for this sample.", + "default": null + }, "skip_trimming": { "meta": ["skip_trimming"], "type": "boolean", diff --git a/conf/modules.config b/conf/modules.config index ebf686ff..90a84107 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -238,8 +238,8 @@ process { ext.prefix = { "${meta.id}.fgumi.unmapped" } ext.args = { [ - "--read-structures ${params.fgumi_read_structures}", - params.fgumi_extract_umis_from_read_names ? "--extract-umis-from-read-names" : "", + "--read-structures ${meta.fgumi_read_structures ?: params.fgumi_read_structures}", + ((meta.fgumi_extract_umis_from_read_names != null ? meta.fgumi_extract_umis_from_read_names : params.fgumi_extract_umis_from_read_names) ? "--extract-umis-from-read-names" : ""), "--threads ${task.cpus}", "--queue-memory ${params.fgumi_queue_memory}", params.fgumi_queue_memory_per_thread ? "--queue-memory-per-thread" : "", From 2dfd9c5a19ef2a84da63b3366b66eb2a7bb60101 Mon Sep 17 00:00:00 2001 From: freerkvandijk Date: Wed, 8 Apr 2026 09:46:36 +0200 Subject: [PATCH 04/36] Added usage of default SNAP parameters in config --- conf/modules.config | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/conf/modules.config b/conf/modules.config index 90a84107..604a4bb1 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -255,7 +255,14 @@ process { ext.prefix = { "${meta.id}.fgumi" } ext.args = { [ + "-b-", + "-sm 20", params.fgumi_snap_ignore_mismatched_pairs ? "-I" : "", + "-hc-", + "-S id", + "-sa", + "-xf 2", + meta.readgroup ? "-R \"@RG\\t" + meta.readgroup.findResults { rg -> rg.value?.trim() ? "${rg.key}:${rg.value}" : null }.join("\\t") + "\"" : "", "${params.fgumi_snap_extra_args}", ].join(" ").trim() } From 3bd6dbbf4a7662654612c53e558770a72ad8a1f2 Mon Sep 17 00:00:00 2001 From: freerkvandijk Date: Wed, 8 Apr 2026 09:52:21 +0200 Subject: [PATCH 05/36] Process memory now handled per nf-core style --- conf/modules.config | 15 +++------------ modules/local/fgumi/group/main.nf | 12 ++++++++++++ 2 files changed, 15 insertions(+), 12 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index 604a4bb1..e77ffc7f 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -297,18 +297,9 @@ process { cpus = 8 memory = 32.GB ext.prefix = { "${meta.id}.fgumi.group" } - ext.args = { - [ - "--strategy ${params.fgumi_group_strategy}", - "--edits ${params.fgumi_group_edits}", - "--threads ${task.cpus}", - "--queue-memory ${params.fgumi_queue_memory}", - params.fgumi_queue_memory_per_thread ? "--queue-memory-per-thread" : "", - "--compression-level ${params.fgumi_compression_level}", - "--grouping-metrics ${meta.id}.fgumi.group.grouping_metrics.txt", - "--family-size-histogram ${meta.id}.fgumi.group.family_size_histogram.txt", - ].join(" ").trim() - } + ext.strategy = { params.fgumi_group_strategy } + ext.edits = { params.fgumi_group_edits } + ext.compression_level = { params.fgumi_compression_level } } //// FGUMI simplex (step 5) diff --git a/modules/local/fgumi/group/main.nf b/modules/local/fgumi/group/main.nf index 6779351f..cecc9d5d 100644 --- a/modules/local/fgumi/group/main.nf +++ b/modules/local/fgumi/group/main.nf @@ -20,12 +20,24 @@ process FGUMI_GROUP { script: def args = task.ext.args ?: '' + def strategy = task.ext.strategy ?: 'adjacency' + def edits = task.ext.edits != null ? task.ext.edits : 1 + def compression_level = task.ext.compression_level != null ? task.ext.compression_level : 1 + def queue_memory_mb = (task.memory.mega / task.cpus * 0.75).intValue() prefix = task.ext.prefix ?: "${meta.id}.fgumi.group" """ fgumi group \ --input ${bam} \ --output ${prefix}.bam \ + --strategy ${strategy} \ + --edits ${edits} \ + --threads ${task.cpus} \ + --queue-memory ${queue_memory_mb} \ + --queue-memory-per-thread \ + --compression-level ${compression_level} \ + --grouping-metrics ${prefix}.grouping_metrics.txt \ + --family-size-histogram ${prefix}.family_size_histogram.txt \ ${args} """ From 2496e2810f3c45c1dea13d3a300f111d1f35f7bd Mon Sep 17 00:00:00 2001 From: freerkvandijk Date: Wed, 8 Apr 2026 10:00:00 +0200 Subject: [PATCH 06/36] Changes to fgumi extract to make it portable (nf-core) --- modules/local/fgumi/extract/main.nf | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/local/fgumi/extract/main.nf b/modules/local/fgumi/extract/main.nf index bd4294fc..b855ef5d 100644 --- a/modules/local/fgumi/extract/main.nf +++ b/modules/local/fgumi/extract/main.nf @@ -19,8 +19,8 @@ process FGUMI_EXTRACT { script: def args = task.ext.args ?: '' prefix = task.ext.prefix ?: "${meta.id}.fgumi.unmapped" - def sample_name = meta.samplename ?: meta.id - def library_name = meta.library ?: meta.id + def sample_name = task.ext.sample_name ?: meta.id + def library_name = task.ext.library_name ?: meta.id def input_files = (reads instanceof List ? reads : [reads]).collect { read -> "${read}" }.join(' ') """ From a04fe638f622090a40337b80d0259a0cbf223a40 Mon Sep 17 00:00:00 2001 From: freerkvandijk Date: Wed, 8 Apr 2026 10:02:34 +0200 Subject: [PATCH 07/36] List input instead of own conversion --- modules/local/fgumi/extract/main.nf | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/modules/local/fgumi/extract/main.nf b/modules/local/fgumi/extract/main.nf index b855ef5d..9b341def 100644 --- a/modules/local/fgumi/extract/main.nf +++ b/modules/local/fgumi/extract/main.nf @@ -21,11 +21,10 @@ process FGUMI_EXTRACT { prefix = task.ext.prefix ?: "${meta.id}.fgumi.unmapped" def sample_name = task.ext.sample_name ?: meta.id def library_name = task.ext.library_name ?: meta.id - def input_files = (reads instanceof List ? reads : [reads]).collect { read -> "${read}" }.join(' ') """ fgumi extract \ - --inputs ${input_files} \ + --inputs ${reads} \ --output ${prefix}.bam \ --sample "${sample_name}" \ --library "${library_name}" \ From 79e9ffd20f3bd292d23faa33480816851e783f11 Mon Sep 17 00:00:00 2001 From: freerkvandijk Date: Wed, 8 Apr 2026 10:09:26 +0200 Subject: [PATCH 08/36] Split FGUMI_FILTER into seperate filter and sort module and updated tests --- conf/modules.config | 9 ++- modules/local/fgumi/filter/main.nf | 14 +---- modules/local/fgumi/sort/main.nf | 39 ++++++++++++ .../local/fastq_to_aligned_cram/main.nf | 9 ++- tests/modules/local/fgumi/filter/main.nf.test | 2 +- .../local/fgumi/filter/main.nf.test.snap | 24 ++------ tests/modules/local/fgumi/sort/main.nf.test | 32 ++++++++++ .../local/fgumi/sort/main.nf.test.snap | 59 +++++++++++++++++++ 8 files changed, 152 insertions(+), 36 deletions(-) create mode 100644 modules/local/fgumi/sort/main.nf create mode 100644 tests/modules/local/fgumi/sort/main.nf.test create mode 100644 tests/modules/local/fgumi/sort/main.nf.test.snap diff --git a/conf/modules.config b/conf/modules.config index e77ffc7f..66565c56 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -338,7 +338,14 @@ process { "--stats ${meta.id}.fgumi.filter.filtering_metrics.txt", ].join(" ").trim() } - ext.args2 = { + } + + //// FGUMI coordinate sort/index after filter (step 7) + withName: '.*FASTQ_TO_CRAM:FGUMI_SORT' { + cpus = 8 + memory = 32.GB + ext.prefix = { "${meta.id}.fgumi.filter" } + ext.args = { [ "--threads ${task.cpus}", "--max-memory ${params.fgumi_sort_max_memory}", diff --git a/modules/local/fgumi/filter/main.nf b/modules/local/fgumi/filter/main.nf index c35bd7cc..b63752da 100644 --- a/modules/local/fgumi/filter/main.nf +++ b/modules/local/fgumi/filter/main.nf @@ -10,8 +10,7 @@ process FGUMI_FILTER { tuple val(meta), path(bam), path(fasta) output: - tuple val(meta), path("${prefix}.bam"), emit: bam - tuple val(meta), path("${prefix}.bam.bai"), emit: bai + tuple val(meta), path("${prefix}.filtered.bam"), emit: bam tuple val(meta), path("${prefix}.filtering_metrics.txt"), optional: true, emit: filtering_metrics tuple val("${task.process}"), val('fgumi'), eval("fgumi --version | sed 's/^fgumi //;q'"), topic: versions, emit: versions_fgumi @@ -20,7 +19,6 @@ process FGUMI_FILTER { script: def args = task.ext.args ?: '' - def sort_args = task.ext.args2 ?: '' prefix = task.ext.prefix ?: "${meta.id}.fgumi.filter" """ @@ -29,20 +27,12 @@ process FGUMI_FILTER { --output ${prefix}.filtered.bam \ --ref ${fasta} \ ${args} - - fgumi sort \ - --input ${prefix}.filtered.bam \ - --output ${prefix}.bam \ - --order coordinate \ - --write-index \ - ${sort_args} """ stub: prefix = task.ext.prefix ?: "${meta.id}.fgumi.filter" """ - touch ${prefix}.bam - touch ${prefix}.bam.bai + touch ${prefix}.filtered.bam touch ${prefix}.filtering_metrics.txt """ } diff --git a/modules/local/fgumi/sort/main.nf b/modules/local/fgumi/sort/main.nf new file mode 100644 index 00000000..a1b8de7c --- /dev/null +++ b/modules/local/fgumi/sort/main.nf @@ -0,0 +1,39 @@ +process FGUMI_SORT { + tag "$meta.id" + label 'process_medium' + + container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container + ? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/95/954170443a820787c9e02ef2135ebb8ec29c6b03633b0d61b5fafa98c59a1cce/data' + : 'community.wave.seqera.io/library/fgumi_r-base_r-ggplot2_r-scales:09c99070b82c1c28'}" + + input: + tuple val(meta), path(bam) + + output: + tuple val(meta), path("${prefix}.bam"), emit: bam + tuple val(meta), path("${prefix}.bam.bai"), emit: bai + tuple val("${task.process}"), val('fgumi'), eval("fgumi --version | sed 's/^fgumi //;q'"), topic: versions, emit: versions_fgumi + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}.fgumi.filter" + + """ + fgumi sort \ + --input ${bam} \ + --output ${prefix}.bam \ + --order coordinate \ + --write-index \ + ${args} + """ + + stub: + prefix = task.ext.prefix ?: "${meta.id}.fgumi.filter" + """ + touch ${prefix}.bam + touch ${prefix}.bam.bai + """ +} diff --git a/subworkflows/local/fastq_to_aligned_cram/main.nf b/subworkflows/local/fastq_to_aligned_cram/main.nf index a517ac8d..a87fcc88 100644 --- a/subworkflows/local/fastq_to_aligned_cram/main.nf +++ b/subworkflows/local/fastq_to_aligned_cram/main.nf @@ -12,6 +12,7 @@ include { FGUMI_FILTER } from "../../../modules/local/fgumi/filter/main include { FGUMI_GROUP } from "../../../modules/local/fgumi/group/main.nf" include { FGUMI_SIMPLEX } from "../../../modules/local/fgumi/simplex/main.nf" include { FGUMI_SNAP_ZIPPER_SORT } from "../../../modules/local/fgumi/snapzippersort/main.nf" +include { FGUMI_SORT } from "../../../modules/local/fgumi/sort/main.nf" include { SAMTOOLS_CONVERT } from "../../../modules/nf-core/samtools/convert/main" include { SAMTOOLS_SORMADUP } from "../../../modules/nf-core/samtools/sormadup/main.nf" include { SAMTOOLS_SORT } from "../../../modules/nf-core/samtools/sort/main" @@ -101,6 +102,10 @@ workflow FASTQ_TO_CRAM { .map { meta, bam, fasta -> [meta, bam, fasta] } ) + FGUMI_SORT( + FGUMI_FILTER.out.bam + ) + FASTQ_ALIGN_RNA( ch_meta_reads_aligner_index_fasta_datatype.rna ) @@ -155,7 +160,7 @@ workflow FASTQ_TO_CRAM { // UMI branch outputs are mixed into the common markdup/metrics streams. ch_markdup_index = ch_markdup_index.mix( - FGUMI_FILTER.out.bam.join(FGUMI_FILTER.out.bai, failOnMismatch: true, failOnDuplicate: true) + FGUMI_SORT.out.bam.join(FGUMI_SORT.out.bai, failOnMismatch: true, failOnDuplicate: true) ) ch_sormadup_metrics = ch_sormadup_metrics.mix(FGUMI_GROUP.out.grouping_metrics) ch_sormadup_metrics = ch_sormadup_metrics.mix(FGUMI_GROUP.out.family_size_histogram) @@ -163,7 +168,7 @@ workflow FASTQ_TO_CRAM { ch_sormadup_metrics = ch_sormadup_metrics.mix(FGUMI_FILTER.out.filtering_metrics) ch_duplex_metrics = FGUMI_DUPLEX_METRICS.out.duplex_metrics ch_family_size_histogram = FGUMI_GROUP.out.family_size_histogram - ch_filtered_consensus_bam = FGUMI_FILTER.out.bam + ch_filtered_consensus_bam = FGUMI_SORT.out.bam // BIOBAMBAM_BAMSORMADUP([meta, [bam, bam]], fasta, fai) BIOBAMBAM_BAMSORMADUP(ch_bam_fasta_fai.bamsormadup) diff --git a/tests/modules/local/fgumi/filter/main.nf.test b/tests/modules/local/fgumi/filter/main.nf.test index 9c068a67..bec1f2b3 100644 --- a/tests/modules/local/fgumi/filter/main.nf.test +++ b/tests/modules/local/fgumi/filter/main.nf.test @@ -10,7 +10,7 @@ nextflow_process { tag "modules/local/fgumi/filter" test("test - stub") { - // Stub-mode contract test: verifies filtered BAM/index and metrics outputs. + // Stub-mode contract test: verifies filtered BAM and metrics outputs. options "-stub" when { diff --git a/tests/modules/local/fgumi/filter/main.nf.test.snap b/tests/modules/local/fgumi/filter/main.nf.test.snap index 28c1a053..7f59c001 100644 --- a/tests/modules/local/fgumi/filter/main.nf.test.snap +++ b/tests/modules/local/fgumi/filter/main.nf.test.snap @@ -7,18 +7,10 @@ { "id": "test" }, - "test.fgumi.filter.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + "test.fgumi.filter.filtered.bam:md5,d41d8cd98f00b204e9800998ecf8427e" ] ], "1": [ - [ - { - "id": "test" - }, - "test.fgumi.filter.bam.bai:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "2": [ [ { "id": "test" @@ -26,27 +18,19 @@ "test.fgumi.filter.filtering_metrics.txt:md5,d41d8cd98f00b204e9800998ecf8427e" ] ], - "3": [ + "2": [ [ "FGUMI_FILTER", "fgumi", "0.1.2" ] ], - "bai": [ - [ - { - "id": "test" - }, - "test.fgumi.filter.bam.bai:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], "bam": [ [ { "id": "test" }, - "test.fgumi.filter.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + "test.fgumi.filter.filtered.bam:md5,d41d8cd98f00b204e9800998ecf8427e" ] ], "filtering_metrics": [ @@ -70,6 +54,6 @@ "nf-test": "0.9.2", "nextflow": "25.10.4" }, - "timestamp": "2026-04-02T16:05:00.831097499" + "timestamp": "2026-04-08T10:07:36.721154372" } } \ No newline at end of file diff --git a/tests/modules/local/fgumi/sort/main.nf.test b/tests/modules/local/fgumi/sort/main.nf.test new file mode 100644 index 00000000..7879f414 --- /dev/null +++ b/tests/modules/local/fgumi/sort/main.nf.test @@ -0,0 +1,32 @@ +nextflow_process { + + name "Test Process FGUMI_SORT" + script "modules/local/fgumi/sort/main.nf" + process "FGUMI_SORT" + + tag "modules" + tag "modules/local" + tag "modules/local/fgumi" + tag "modules/local/fgumi/sort" + + test("test - stub") { + // Stub-mode contract test: verifies coordinate-sorted BAM/index outputs. + options "-stub" + + when { + process { + """ + input[0] = [ + [id: "test"], + file("${projectDir}/tests/inputs/fgumi/consensus.bam", checkIfExists: true) + ] + """ + } + } + + then { + assert process.success + assert snapshot(process.out).match() + } + } +} diff --git a/tests/modules/local/fgumi/sort/main.nf.test.snap b/tests/modules/local/fgumi/sort/main.nf.test.snap new file mode 100644 index 00000000..c597cb17 --- /dev/null +++ b/tests/modules/local/fgumi/sort/main.nf.test.snap @@ -0,0 +1,59 @@ +{ + "test - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.fgumi.filter.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + { + "id": "test" + }, + "test.fgumi.filter.bam.bai:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + [ + "FGUMI_SORT", + "fgumi", + "0.1.2" + ] + ], + "bai": [ + [ + { + "id": "test" + }, + "test.fgumi.filter.bam.bai:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "bam": [ + [ + { + "id": "test" + }, + "test.fgumi.filter.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions_fgumi": [ + [ + "FGUMI_SORT", + "fgumi", + "0.1.2" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.10.4" + }, + "timestamp": "2026-04-08T10:07:49.850827528" + } +} \ No newline at end of file From 5ff181b649c59363b67fab8b122a1403d63499a8 Mon Sep 17 00:00:00 2001 From: freerkvandijk Date: Wed, 8 Apr 2026 10:12:16 +0200 Subject: [PATCH 09/36] decoupled umi_aware and fgumi_aware --- .../local/fastq_to_aligned_cram/main.nf | 4 ++-- .../fgumi_umi_stub.nf.test | 2 +- .../fgumi_umi_stub.nf.test.snap | 22 +++++++++---------- 3 files changed, 14 insertions(+), 14 deletions(-) diff --git a/subworkflows/local/fastq_to_aligned_cram/main.nf b/subworkflows/local/fastq_to_aligned_cram/main.nf index a87fcc88..99d028ca 100644 --- a/subworkflows/local/fastq_to_aligned_cram/main.nf +++ b/subworkflows/local/fastq_to_aligned_cram/main.nf @@ -51,7 +51,7 @@ workflow FASTQ_TO_CRAM { ch_meta_reads_aligner_index_fasta_datatype.dna .branch { meta, reads, aligner, index, fasta -> - umi: (meta.fgumi_aware == true) || (meta.umi_aware == true) + umi: meta.fgumi_aware == true return [meta, reads, aligner, index, fasta] non_umi: true return [meta, reads, aligner, index, fasta] @@ -218,7 +218,7 @@ workflow FASTQ_TO_CRAM { ch_cram_crai.dump(tag: "FASTQ_TO_CRAM: cram and crai", pretty: true) // Keep a dedicated channel for UMI-aware sample CRAM outputs. - ch_umi_cram_crai = ch_cram_crai.filter { meta, _cram, _crai -> (meta.fgumi_aware == true) || (meta.umi_aware == true) } + ch_umi_cram_crai = ch_cram_crai.filter { meta, _cram, _crai -> meta.fgumi_aware == true } emit: cram_crai = ch_cram_crai diff --git a/tests/subworkflows/local/fastq_to_aligned_cram/fgumi_umi_stub.nf.test b/tests/subworkflows/local/fastq_to_aligned_cram/fgumi_umi_stub.nf.test index 5f192e63..8a724b0d 100644 --- a/tests/subworkflows/local/fastq_to_aligned_cram/fgumi_umi_stub.nf.test +++ b/tests/subworkflows/local/fastq_to_aligned_cram/fgumi_umi_stub.nf.test @@ -22,7 +22,7 @@ nextflow_workflow { single_end: false, sample_type: "DNA", markdup: "bamsormadup", - umi_aware: true, + fgumi_aware: true, genome_data: [ fasta: "${projectDir}/tests/inputs/fgumi/ref.fa", fai: "${projectDir}/tests/inputs/fgumi/ref.fa.fai", diff --git a/tests/subworkflows/local/fastq_to_aligned_cram/fgumi_umi_stub.nf.test.snap b/tests/subworkflows/local/fastq_to_aligned_cram/fgumi_umi_stub.nf.test.snap index ceffdb37..491cd976 100644 --- a/tests/subworkflows/local/fastq_to_aligned_cram/fgumi_umi_stub.nf.test.snap +++ b/tests/subworkflows/local/fastq_to_aligned_cram/fgumi_umi_stub.nf.test.snap @@ -8,6 +8,7 @@ "cram_crai": [ [ { + "fgumi_aware": true, "genome_data": { "dict": "ref.dict", "fai": "ref.fa.fai", @@ -18,8 +19,7 @@ "markdup": "bamsormadup", "sample_type": "DNA", "samplename": "HT1080_chr20", - "single_end": false, - "umi_aware": true + "single_end": false }, "UMI_consensus1.cram", "UMI_consensus1.cram.crai" @@ -33,7 +33,7 @@ "single_end": false, "sample_type": "DNA", "markdup": "bamsormadup", - "umi_aware": true, + "fgumi_aware": true, "genome_data": { "fasta": "ref.fa:md5,78724096432e1b2702881f0126c656f2", "fai": "ref.fa.fai:md5,fdd09c27f9ecf4cc7e824fd8407b72b7", @@ -54,7 +54,7 @@ "single_end": false, "sample_type": "DNA", "markdup": "bamsormadup", - "umi_aware": true, + "fgumi_aware": true, "genome_data": { "fasta": "ref.fa:md5,78724096432e1b2702881f0126c656f2", "fai": "ref.fa.fai:md5,fdd09c27f9ecf4cc7e824fd8407b72b7", @@ -75,7 +75,7 @@ "single_end": false, "sample_type": "DNA", "markdup": "bamsormadup", - "umi_aware": true, + "fgumi_aware": true, "genome_data": { "fasta": "ref.fa:md5,78724096432e1b2702881f0126c656f2", "fai": "ref.fa.fai:md5,fdd09c27f9ecf4cc7e824fd8407b72b7", @@ -102,7 +102,7 @@ "single_end": false, "sample_type": "DNA", "markdup": "bamsormadup", - "umi_aware": true, + "fgumi_aware": true, "genome_data": { "fasta": "ref.fa:md5,78724096432e1b2702881f0126c656f2", "fai": "ref.fa.fai:md5,fdd09c27f9ecf4cc7e824fd8407b72b7", @@ -121,7 +121,7 @@ "single_end": false, "sample_type": "DNA", "markdup": "bamsormadup", - "umi_aware": true, + "fgumi_aware": true, "genome_data": { "fasta": "ref.fa:md5,78724096432e1b2702881f0126c656f2", "fai": "ref.fa.fai:md5,fdd09c27f9ecf4cc7e824fd8407b72b7", @@ -140,7 +140,7 @@ "single_end": false, "sample_type": "DNA", "markdup": "bamsormadup", - "umi_aware": true, + "fgumi_aware": true, "genome_data": { "fasta": "ref.fa:md5,78724096432e1b2702881f0126c656f2", "fai": "ref.fa.fai:md5,fdd09c27f9ecf4cc7e824fd8407b72b7", @@ -159,7 +159,7 @@ "single_end": false, "sample_type": "DNA", "markdup": "bamsormadup", - "umi_aware": true, + "fgumi_aware": true, "genome_data": { "fasta": "ref.fa:md5,78724096432e1b2702881f0126c656f2", "fai": "ref.fa.fai:md5,fdd09c27f9ecf4cc7e824fd8407b72b7", @@ -180,7 +180,7 @@ "single_end": false, "sample_type": "DNA", "markdup": "bamsormadup", - "umi_aware": true, + "fgumi_aware": true, "genome_data": { "fasta": "ref.fa:md5,78724096432e1b2702881f0126c656f2", "fai": "ref.fa.fai:md5,fdd09c27f9ecf4cc7e824fd8407b72b7", @@ -200,6 +200,6 @@ "nf-test": "0.9.2", "nextflow": "25.10.4" }, - "timestamp": "2026-04-07T14:33:07.208059007" + "timestamp": "2026-04-08T10:11:21.73767029" } } \ No newline at end of file From 2840b7338e3e6ad01eda154038eee45fe0962c58 Mon Sep 17 00:00:00 2001 From: freerkvandijk Date: Wed, 8 Apr 2026 10:18:02 +0200 Subject: [PATCH 10/36] Refactor: fgumi modules to dedicated umi_consensus subworkflow --- .../local/fastq_to_aligned_cram/main.nf | 63 +--- subworkflows/local/umi_consensus/main.nf | 69 ++++ .../local/umi_consensus/main.nf.test | 56 ++++ .../local/umi_consensus/main.nf.test.snap | 309 ++++++++++++++++++ 4 files changed, 444 insertions(+), 53 deletions(-) create mode 100644 subworkflows/local/umi_consensus/main.nf create mode 100644 tests/subworkflows/local/umi_consensus/main.nf.test create mode 100644 tests/subworkflows/local/umi_consensus/main.nf.test.snap diff --git a/subworkflows/local/fastq_to_aligned_cram/main.nf b/subworkflows/local/fastq_to_aligned_cram/main.nf index 99d028ca..f5a89a94 100644 --- a/subworkflows/local/fastq_to_aligned_cram/main.nf +++ b/subworkflows/local/fastq_to_aligned_cram/main.nf @@ -6,13 +6,6 @@ // MODULES include { BIOBAMBAM_BAMSORMADUP } from "../../../modules/nf-core/biobambam/bamsormadup/main.nf" -include { FGUMI_DUPLEX_METRICS } from "../../../modules/local/fgumi/duplexmetrics/main.nf" -include { FGUMI_EXTRACT } from "../../../modules/local/fgumi/extract/main.nf" -include { FGUMI_FILTER } from "../../../modules/local/fgumi/filter/main.nf" -include { FGUMI_GROUP } from "../../../modules/local/fgumi/group/main.nf" -include { FGUMI_SIMPLEX } from "../../../modules/local/fgumi/simplex/main.nf" -include { FGUMI_SNAP_ZIPPER_SORT } from "../../../modules/local/fgumi/snapzippersort/main.nf" -include { FGUMI_SORT } from "../../../modules/local/fgumi/sort/main.nf" include { SAMTOOLS_CONVERT } from "../../../modules/nf-core/samtools/convert/main" include { SAMTOOLS_SORMADUP } from "../../../modules/nf-core/samtools/sormadup/main.nf" include { SAMTOOLS_SORT } from "../../../modules/nf-core/samtools/sort/main" @@ -20,6 +13,7 @@ include { SAMTOOLS_SORT } from "../../../modules/nf-core/samtools/sort/m // SUBWORKFLOWS include { FASTQ_ALIGN_DNA } from '../../nf-core/fastq_align_dna/main' include { FASTQ_ALIGN_RNA } from '../../local/fastq_align_rna/main' +include { UMI_CONSENSUS_FGUMI } from '../../local/umi_consensus/main.nf' // FUNCTIONS include { getGenomeAttribute } from '../../local/utils_nfcore_preprocessing_pipeline' @@ -65,45 +59,8 @@ workflow FASTQ_TO_CRAM { ) // UMI-aware fgumi branch (steps 1, 3, 4, 5, 6, 7 in fgumi Basic Workflow) - FGUMI_EXTRACT( + UMI_CONSENSUS_FGUMI( ch_dna_to_align.umi - .map { meta, reads, _aligner, _index, _fasta -> [meta, reads] } - ) - - FGUMI_SNAP_ZIPPER_SORT( - FGUMI_EXTRACT.out.bam - .join( - ch_dna_to_align.umi.map { meta, _reads, _aligner, _index, fasta -> - [meta, getGenomeAttribute(meta.genome_data, 'snap'), fasta, getGenomeAttribute(meta.genome_data, 'dict')] - }, - by: 0, - ) - .map { meta, unmapped_bam, snap_index, fasta, dict -> [meta, unmapped_bam, snap_index, fasta, dict] } - ) - - FGUMI_GROUP( - FGUMI_SNAP_ZIPPER_SORT.out.bam - ) - - FGUMI_SIMPLEX( - FGUMI_GROUP.out.bam - ) - - FGUMI_DUPLEX_METRICS( - FGUMI_GROUP.out.bam - ) - - FGUMI_FILTER( - FGUMI_SIMPLEX.out.bam - .join( - ch_dna_to_align.umi.map { meta, _reads, _aligner, _index, fasta -> [meta, fasta] }, - by: 0, - ) - .map { meta, bam, fasta -> [meta, bam, fasta] } - ) - - FGUMI_SORT( - FGUMI_FILTER.out.bam ) FASTQ_ALIGN_RNA( @@ -160,15 +117,15 @@ workflow FASTQ_TO_CRAM { // UMI branch outputs are mixed into the common markdup/metrics streams. ch_markdup_index = ch_markdup_index.mix( - FGUMI_SORT.out.bam.join(FGUMI_SORT.out.bai, failOnMismatch: true, failOnDuplicate: true) + UMI_CONSENSUS_FGUMI.out.bam_bai ) - ch_sormadup_metrics = ch_sormadup_metrics.mix(FGUMI_GROUP.out.grouping_metrics) - ch_sormadup_metrics = ch_sormadup_metrics.mix(FGUMI_GROUP.out.family_size_histogram) - ch_sormadup_metrics = ch_sormadup_metrics.mix(FGUMI_SIMPLEX.out.consensus_metrics) - ch_sormadup_metrics = ch_sormadup_metrics.mix(FGUMI_FILTER.out.filtering_metrics) - ch_duplex_metrics = FGUMI_DUPLEX_METRICS.out.duplex_metrics - ch_family_size_histogram = FGUMI_GROUP.out.family_size_histogram - ch_filtered_consensus_bam = FGUMI_SORT.out.bam + ch_sormadup_metrics = ch_sormadup_metrics.mix(UMI_CONSENSUS_FGUMI.out.grouping_metrics) + ch_sormadup_metrics = ch_sormadup_metrics.mix(UMI_CONSENSUS_FGUMI.out.family_size_histogram) + ch_sormadup_metrics = ch_sormadup_metrics.mix(UMI_CONSENSUS_FGUMI.out.consensus_metrics) + ch_sormadup_metrics = ch_sormadup_metrics.mix(UMI_CONSENSUS_FGUMI.out.filtering_metrics) + ch_duplex_metrics = UMI_CONSENSUS_FGUMI.out.duplex_metrics + ch_family_size_histogram = UMI_CONSENSUS_FGUMI.out.family_size_histogram + ch_filtered_consensus_bam = UMI_CONSENSUS_FGUMI.out.filtered_consensus_bam // BIOBAMBAM_BAMSORMADUP([meta, [bam, bam]], fasta, fai) BIOBAMBAM_BAMSORMADUP(ch_bam_fasta_fai.bamsormadup) diff --git a/subworkflows/local/umi_consensus/main.nf b/subworkflows/local/umi_consensus/main.nf new file mode 100644 index 00000000..a29d1920 --- /dev/null +++ b/subworkflows/local/umi_consensus/main.nf @@ -0,0 +1,69 @@ +#!/usr/bin/env nextflow + +// MODULES +include { FGUMI_DUPLEX_METRICS } from "../../../modules/local/fgumi/duplexmetrics/main.nf" +include { FGUMI_EXTRACT } from "../../../modules/local/fgumi/extract/main.nf" +include { FGUMI_FILTER } from "../../../modules/local/fgumi/filter/main.nf" +include { FGUMI_GROUP } from "../../../modules/local/fgumi/group/main.nf" +include { FGUMI_SIMPLEX } from "../../../modules/local/fgumi/simplex/main.nf" +include { FGUMI_SNAP_ZIPPER_SORT } from "../../../modules/local/fgumi/snapzippersort/main.nf" +include { FGUMI_SORT } from "../../../modules/local/fgumi/sort/main.nf" + +// FUNCTIONS +include { getGenomeAttribute } from '../../local/utils_nfcore_preprocessing_pipeline' + +workflow UMI_CONSENSUS_FGUMI { + take: + ch_meta_reads_aligner_index_fasta // channel: [mandatory] [meta, reads, aligner, index, fasta] + + main: + FGUMI_EXTRACT( + ch_meta_reads_aligner_index_fasta + .map { meta, reads, _aligner, _index, _fasta -> [meta, reads] } + ) + + FGUMI_SNAP_ZIPPER_SORT( + FGUMI_EXTRACT.out.bam + .join( + ch_meta_reads_aligner_index_fasta.map { meta, _reads, _aligner, _index, fasta -> + [meta, getGenomeAttribute(meta.genome_data, 'snap'), fasta, getGenomeAttribute(meta.genome_data, 'dict')] + }, + by: 0, + ) + .map { meta, unmapped_bam, snap_index, fasta, dict -> [meta, unmapped_bam, snap_index, fasta, dict] } + ) + + FGUMI_GROUP( + FGUMI_SNAP_ZIPPER_SORT.out.bam + ) + + FGUMI_SIMPLEX( + FGUMI_GROUP.out.bam + ) + + FGUMI_DUPLEX_METRICS( + FGUMI_GROUP.out.bam + ) + + FGUMI_FILTER( + FGUMI_SIMPLEX.out.bam + .join( + ch_meta_reads_aligner_index_fasta.map { meta, _reads, _aligner, _index, fasta -> [meta, fasta] }, + by: 0, + ) + .map { meta, bam, fasta -> [meta, bam, fasta] } + ) + + FGUMI_SORT( + FGUMI_FILTER.out.bam + ) + + emit: + bam_bai = FGUMI_SORT.out.bam.join(FGUMI_SORT.out.bai, failOnMismatch: true, failOnDuplicate: true) + grouping_metrics = FGUMI_GROUP.out.grouping_metrics + family_size_histogram = FGUMI_GROUP.out.family_size_histogram + consensus_metrics = FGUMI_SIMPLEX.out.consensus_metrics + filtering_metrics = FGUMI_FILTER.out.filtering_metrics + duplex_metrics = FGUMI_DUPLEX_METRICS.out.duplex_metrics + filtered_consensus_bam = FGUMI_SORT.out.bam +} diff --git a/tests/subworkflows/local/umi_consensus/main.nf.test b/tests/subworkflows/local/umi_consensus/main.nf.test new file mode 100644 index 00000000..a365bb28 --- /dev/null +++ b/tests/subworkflows/local/umi_consensus/main.nf.test @@ -0,0 +1,56 @@ +nextflow_workflow { + + name "Test Workflow UMI_CONSENSUS_FGUMI" + script "subworkflows/local/umi_consensus/main.nf" + workflow "UMI_CONSENSUS_FGUMI" + + tag "subworkflows" + tag "subworkflows/local" + tag "subworkflows/local/umi_consensus" + tag "fgumi" + + test("umi consensus fgumi - stub") { + options "-stub" + when { + workflow { + """ + input[0] = Channel.of([ + [ + id: "UMI_consensus1", + samplename: "HT1080_chr20", + single_end: false, + sample_type: "DNA", + markdup: "bamsormadup", + fgumi_aware: true, + genome_data: [ + fasta: "${projectDir}/tests/inputs/fgumi/ref.fa", + fai: "${projectDir}/tests/inputs/fgumi/ref.fa.fai", + dict: "${projectDir}/tests/inputs/fgumi/ref.dict", + snap: "${projectDir}/tests/inputs/fgumi/snap_index" + ] + ], + [ + file("${projectDir}/tests/inputs/fgumi/R1.fastq.gz", checkIfExists: true), + file("${projectDir}/tests/inputs/fgumi/R2.fastq.gz", checkIfExists: true) + ], + "snap", + file("${projectDir}/tests/inputs/fgumi/snap_index", checkIfExists: true), + file("${projectDir}/tests/inputs/fgumi/ref.fa", checkIfExists: true) + ]) + """ + } + } + + then { + assert workflow.success + assert workflow.out.bam_bai.size() == 1 + assert workflow.out.grouping_metrics.size() == 1 + assert workflow.out.family_size_histogram.size() == 1 + assert workflow.out.consensus_metrics.size() == 1 + assert workflow.out.filtering_metrics.size() == 1 + assert workflow.out.duplex_metrics.size() == 1 + assert workflow.out.filtered_consensus_bam.size() == 1 + assert snapshot(workflow.out).match() + } + } +} diff --git a/tests/subworkflows/local/umi_consensus/main.nf.test.snap b/tests/subworkflows/local/umi_consensus/main.nf.test.snap new file mode 100644 index 00000000..9fc3df12 --- /dev/null +++ b/tests/subworkflows/local/umi_consensus/main.nf.test.snap @@ -0,0 +1,309 @@ +{ + "umi consensus fgumi - stub": { + "content": [ + { + "0": [ + [ + { + "id": "UMI_consensus1", + "samplename": "HT1080_chr20", + "single_end": false, + "sample_type": "DNA", + "markdup": "bamsormadup", + "fgumi_aware": true, + "genome_data": { + "fasta": "ref.fa:md5,78724096432e1b2702881f0126c656f2", + "fai": "ref.fa.fai:md5,fdd09c27f9ecf4cc7e824fd8407b72b7", + "dict": "ref.dict:md5,759dea0d5194dec0cd4f3e02fadf6e83", + "snap": [ + "OverflowTable.txt:md5,178164f81917b8e87073295a635588de" + ] + } + }, + "UMI_consensus1.fgumi.filter.bam:md5,d41d8cd98f00b204e9800998ecf8427e", + "UMI_consensus1.fgumi.filter.bam.bai:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + { + "id": "UMI_consensus1", + "samplename": "HT1080_chr20", + "single_end": false, + "sample_type": "DNA", + "markdup": "bamsormadup", + "fgumi_aware": true, + "genome_data": { + "fasta": "ref.fa:md5,78724096432e1b2702881f0126c656f2", + "fai": "ref.fa.fai:md5,fdd09c27f9ecf4cc7e824fd8407b72b7", + "dict": "ref.dict:md5,759dea0d5194dec0cd4f3e02fadf6e83", + "snap": [ + "OverflowTable.txt:md5,178164f81917b8e87073295a635588de" + ] + } + }, + "UMI_consensus1.fgumi.group.grouping_metrics.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + [ + { + "id": "UMI_consensus1", + "samplename": "HT1080_chr20", + "single_end": false, + "sample_type": "DNA", + "markdup": "bamsormadup", + "fgumi_aware": true, + "genome_data": { + "fasta": "ref.fa:md5,78724096432e1b2702881f0126c656f2", + "fai": "ref.fa.fai:md5,fdd09c27f9ecf4cc7e824fd8407b72b7", + "dict": "ref.dict:md5,759dea0d5194dec0cd4f3e02fadf6e83", + "snap": [ + "OverflowTable.txt:md5,178164f81917b8e87073295a635588de" + ] + } + }, + "UMI_consensus1.fgumi.group.family_size_histogram.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + [ + { + "id": "UMI_consensus1", + "samplename": "HT1080_chr20", + "single_end": false, + "sample_type": "DNA", + "markdup": "bamsormadup", + "fgumi_aware": true, + "genome_data": { + "fasta": "ref.fa:md5,78724096432e1b2702881f0126c656f2", + "fai": "ref.fa.fai:md5,fdd09c27f9ecf4cc7e824fd8407b72b7", + "dict": "ref.dict:md5,759dea0d5194dec0cd4f3e02fadf6e83", + "snap": [ + "OverflowTable.txt:md5,178164f81917b8e87073295a635588de" + ] + } + }, + "UMI_consensus1.fgumi.simplex.consensus_metrics.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "4": [ + [ + { + "id": "UMI_consensus1", + "samplename": "HT1080_chr20", + "single_end": false, + "sample_type": "DNA", + "markdup": "bamsormadup", + "fgumi_aware": true, + "genome_data": { + "fasta": "ref.fa:md5,78724096432e1b2702881f0126c656f2", + "fai": "ref.fa.fai:md5,fdd09c27f9ecf4cc7e824fd8407b72b7", + "dict": "ref.dict:md5,759dea0d5194dec0cd4f3e02fadf6e83", + "snap": [ + "OverflowTable.txt:md5,178164f81917b8e87073295a635588de" + ] + } + }, + "UMI_consensus1.fgumi.filter.filtering_metrics.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "5": [ + [ + { + "id": "UMI_consensus1", + "samplename": "HT1080_chr20", + "single_end": false, + "sample_type": "DNA", + "markdup": "bamsormadup", + "fgumi_aware": true, + "genome_data": { + "fasta": "ref.fa:md5,78724096432e1b2702881f0126c656f2", + "fai": "ref.fa.fai:md5,fdd09c27f9ecf4cc7e824fd8407b72b7", + "dict": "ref.dict:md5,759dea0d5194dec0cd4f3e02fadf6e83", + "snap": [ + "OverflowTable.txt:md5,178164f81917b8e87073295a635588de" + ] + } + }, + "UMI_consensus1.fgumi.duplex_metrics.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "6": [ + [ + { + "id": "UMI_consensus1", + "samplename": "HT1080_chr20", + "single_end": false, + "sample_type": "DNA", + "markdup": "bamsormadup", + "fgumi_aware": true, + "genome_data": { + "fasta": "ref.fa:md5,78724096432e1b2702881f0126c656f2", + "fai": "ref.fa.fai:md5,fdd09c27f9ecf4cc7e824fd8407b72b7", + "dict": "ref.dict:md5,759dea0d5194dec0cd4f3e02fadf6e83", + "snap": [ + "OverflowTable.txt:md5,178164f81917b8e87073295a635588de" + ] + } + }, + "UMI_consensus1.fgumi.filter.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "bam_bai": [ + [ + { + "id": "UMI_consensus1", + "samplename": "HT1080_chr20", + "single_end": false, + "sample_type": "DNA", + "markdup": "bamsormadup", + "fgumi_aware": true, + "genome_data": { + "fasta": "ref.fa:md5,78724096432e1b2702881f0126c656f2", + "fai": "ref.fa.fai:md5,fdd09c27f9ecf4cc7e824fd8407b72b7", + "dict": "ref.dict:md5,759dea0d5194dec0cd4f3e02fadf6e83", + "snap": [ + "OverflowTable.txt:md5,178164f81917b8e87073295a635588de" + ] + } + }, + "UMI_consensus1.fgumi.filter.bam:md5,d41d8cd98f00b204e9800998ecf8427e", + "UMI_consensus1.fgumi.filter.bam.bai:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "consensus_metrics": [ + [ + { + "id": "UMI_consensus1", + "samplename": "HT1080_chr20", + "single_end": false, + "sample_type": "DNA", + "markdup": "bamsormadup", + "fgumi_aware": true, + "genome_data": { + "fasta": "ref.fa:md5,78724096432e1b2702881f0126c656f2", + "fai": "ref.fa.fai:md5,fdd09c27f9ecf4cc7e824fd8407b72b7", + "dict": "ref.dict:md5,759dea0d5194dec0cd4f3e02fadf6e83", + "snap": [ + "OverflowTable.txt:md5,178164f81917b8e87073295a635588de" + ] + } + }, + "UMI_consensus1.fgumi.simplex.consensus_metrics.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "duplex_metrics": [ + [ + { + "id": "UMI_consensus1", + "samplename": "HT1080_chr20", + "single_end": false, + "sample_type": "DNA", + "markdup": "bamsormadup", + "fgumi_aware": true, + "genome_data": { + "fasta": "ref.fa:md5,78724096432e1b2702881f0126c656f2", + "fai": "ref.fa.fai:md5,fdd09c27f9ecf4cc7e824fd8407b72b7", + "dict": "ref.dict:md5,759dea0d5194dec0cd4f3e02fadf6e83", + "snap": [ + "OverflowTable.txt:md5,178164f81917b8e87073295a635588de" + ] + } + }, + "UMI_consensus1.fgumi.duplex_metrics.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "family_size_histogram": [ + [ + { + "id": "UMI_consensus1", + "samplename": "HT1080_chr20", + "single_end": false, + "sample_type": "DNA", + "markdup": "bamsormadup", + "fgumi_aware": true, + "genome_data": { + "fasta": "ref.fa:md5,78724096432e1b2702881f0126c656f2", + "fai": "ref.fa.fai:md5,fdd09c27f9ecf4cc7e824fd8407b72b7", + "dict": "ref.dict:md5,759dea0d5194dec0cd4f3e02fadf6e83", + "snap": [ + "OverflowTable.txt:md5,178164f81917b8e87073295a635588de" + ] + } + }, + "UMI_consensus1.fgumi.group.family_size_histogram.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "filtered_consensus_bam": [ + [ + { + "id": "UMI_consensus1", + "samplename": "HT1080_chr20", + "single_end": false, + "sample_type": "DNA", + "markdup": "bamsormadup", + "fgumi_aware": true, + "genome_data": { + "fasta": "ref.fa:md5,78724096432e1b2702881f0126c656f2", + "fai": "ref.fa.fai:md5,fdd09c27f9ecf4cc7e824fd8407b72b7", + "dict": "ref.dict:md5,759dea0d5194dec0cd4f3e02fadf6e83", + "snap": [ + "OverflowTable.txt:md5,178164f81917b8e87073295a635588de" + ] + } + }, + "UMI_consensus1.fgumi.filter.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "filtering_metrics": [ + [ + { + "id": "UMI_consensus1", + "samplename": "HT1080_chr20", + "single_end": false, + "sample_type": "DNA", + "markdup": "bamsormadup", + "fgumi_aware": true, + "genome_data": { + "fasta": "ref.fa:md5,78724096432e1b2702881f0126c656f2", + "fai": "ref.fa.fai:md5,fdd09c27f9ecf4cc7e824fd8407b72b7", + "dict": "ref.dict:md5,759dea0d5194dec0cd4f3e02fadf6e83", + "snap": [ + "OverflowTable.txt:md5,178164f81917b8e87073295a635588de" + ] + } + }, + "UMI_consensus1.fgumi.filter.filtering_metrics.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "grouping_metrics": [ + [ + { + "id": "UMI_consensus1", + "samplename": "HT1080_chr20", + "single_end": false, + "sample_type": "DNA", + "markdup": "bamsormadup", + "fgumi_aware": true, + "genome_data": { + "fasta": "ref.fa:md5,78724096432e1b2702881f0126c656f2", + "fai": "ref.fa.fai:md5,fdd09c27f9ecf4cc7e824fd8407b72b7", + "dict": "ref.dict:md5,759dea0d5194dec0cd4f3e02fadf6e83", + "snap": [ + "OverflowTable.txt:md5,178164f81917b8e87073295a635588de" + ] + } + }, + "UMI_consensus1.fgumi.group.grouping_metrics.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.10.4" + }, + "timestamp": "2026-04-08T10:16:16.364166786" + } +} \ No newline at end of file From 31d174cdab50190609cd61e9418877f7a50fab52 Mon Sep 17 00:00:00 2001 From: freerkvandijk Date: Wed, 8 Apr 2026 10:30:48 +0200 Subject: [PATCH 11/36] removed output channel --- .../local/fastq_to_aligned_cram/main.nf | 4 ---- .../fgumi_umi_stub.nf.test | 1 - .../fgumi_umi_stub.nf.test.snap | 24 +------------------ workflows/preprocessing.nf | 2 +- 4 files changed, 2 insertions(+), 29 deletions(-) diff --git a/subworkflows/local/fastq_to_aligned_cram/main.nf b/subworkflows/local/fastq_to_aligned_cram/main.nf index f5a89a94..9e61b35f 100644 --- a/subworkflows/local/fastq_to_aligned_cram/main.nf +++ b/subworkflows/local/fastq_to_aligned_cram/main.nf @@ -174,13 +174,9 @@ workflow FASTQ_TO_CRAM { .set { ch_cram_crai } ch_cram_crai.dump(tag: "FASTQ_TO_CRAM: cram and crai", pretty: true) - // Keep a dedicated channel for UMI-aware sample CRAM outputs. - ch_umi_cram_crai = ch_cram_crai.filter { meta, _cram, _crai -> meta.fgumi_aware == true } - emit: cram_crai = ch_cram_crai // UMI-specific output channels for downstream reporting and publishing. - umi_cram_crai = ch_umi_cram_crai filtered_consensus_bam = ch_filtered_consensus_bam rna_splice_junctions = FASTQ_ALIGN_RNA.out.splice_junctions rna_junctions = FASTQ_ALIGN_RNA.out.junctions diff --git a/tests/subworkflows/local/fastq_to_aligned_cram/fgumi_umi_stub.nf.test b/tests/subworkflows/local/fastq_to_aligned_cram/fgumi_umi_stub.nf.test index 8a724b0d..405fb1d7 100644 --- a/tests/subworkflows/local/fastq_to_aligned_cram/fgumi_umi_stub.nf.test +++ b/tests/subworkflows/local/fastq_to_aligned_cram/fgumi_umi_stub.nf.test @@ -49,7 +49,6 @@ nextflow_workflow { assert workflow.out.duplex_metrics.size() == 1 assert workflow.out.family_size_histogram.size() == 1 assert workflow.out.filtered_consensus_bam.size() == 1 - assert workflow.out.umi_cram_crai.size() == 1 assert snapshot( sanitizeOutput(workflow.out, unstableKeys:["cram_crai"]) ).match() diff --git a/tests/subworkflows/local/fastq_to_aligned_cram/fgumi_umi_stub.nf.test.snap b/tests/subworkflows/local/fastq_to_aligned_cram/fgumi_umi_stub.nf.test.snap index 491cd976..d699ff72 100644 --- a/tests/subworkflows/local/fastq_to_aligned_cram/fgumi_umi_stub.nf.test.snap +++ b/tests/subworkflows/local/fastq_to_aligned_cram/fgumi_umi_stub.nf.test.snap @@ -171,28 +171,6 @@ }, "UMI_consensus1.fgumi.simplex.consensus_metrics.txt:md5,d41d8cd98f00b204e9800998ecf8427e" ] - ], - "umi_cram_crai": [ - [ - { - "id": "UMI_consensus1", - "samplename": "HT1080_chr20", - "single_end": false, - "sample_type": "DNA", - "markdup": "bamsormadup", - "fgumi_aware": true, - "genome_data": { - "fasta": "ref.fa:md5,78724096432e1b2702881f0126c656f2", - "fai": "ref.fa.fai:md5,fdd09c27f9ecf4cc7e824fd8407b72b7", - "dict": "ref.dict:md5,759dea0d5194dec0cd4f3e02fadf6e83", - "snap": [ - "OverflowTable.txt:md5,178164f81917b8e87073295a635588de" - ] - } - }, - "UMI_consensus1.cram:md5,d41d8cd98f00b204e9800998ecf8427e", - "UMI_consensus1.cram.crai:md5,d41d8cd98f00b204e9800998ecf8427e" - ] ] } ], @@ -200,6 +178,6 @@ "nf-test": "0.9.2", "nextflow": "25.10.4" }, - "timestamp": "2026-04-08T10:11:21.73767029" + "timestamp": "2026-04-08T10:29:40.567401171" } } \ No newline at end of file diff --git a/workflows/preprocessing.nf b/workflows/preprocessing.nf index 9f17ef8f..bca3a175 100644 --- a/workflows/preprocessing.nf +++ b/workflows/preprocessing.nf @@ -439,7 +439,7 @@ workflow PREPROCESSING { family_size_histogram = FASTQ_TO_CRAM.out.family_size_histogram umi_filtered_consensus_bam = FASTQ_TO_CRAM.out.filtered_consensus_bam umi_duplex_metrics = FASTQ_TO_CRAM.out.duplex_metrics - umi_crams = FASTQ_TO_CRAM.out.umi_cram_crai + umi_crams = FASTQ_TO_CRAM.out.cram_crai.filter { meta, _cram, _crai -> meta.fgumi_aware == true } mosdepth_global = COVERAGE.out.mosdepth_global mosdepth_summary = COVERAGE.out.mosdepth_summary mosdepth_regions = COVERAGE.out.mosdepth_regions From 65b983b4712abb9f8f0309d9642d87b995e4e4b9 Mon Sep 17 00:00:00 2001 From: freerkvandijk Date: Wed, 8 Apr 2026 10:36:30 +0200 Subject: [PATCH 12/36] Changed tests to use template.bam and removed grouped.bam --- tests/inputs/fgumi/grouped.bam | 1 - tests/modules/local/fgumi/duplexmetrics/main.nf.test | 2 +- tests/modules/local/fgumi/simplex/main.nf.test | 2 +- 3 files changed, 2 insertions(+), 3 deletions(-) delete mode 100644 tests/inputs/fgumi/grouped.bam diff --git a/tests/inputs/fgumi/grouped.bam b/tests/inputs/fgumi/grouped.bam deleted file mode 100644 index 48cdce85..00000000 --- a/tests/inputs/fgumi/grouped.bam +++ /dev/null @@ -1 +0,0 @@ -placeholder diff --git a/tests/modules/local/fgumi/duplexmetrics/main.nf.test b/tests/modules/local/fgumi/duplexmetrics/main.nf.test index 665fb839..2266eaa9 100644 --- a/tests/modules/local/fgumi/duplexmetrics/main.nf.test +++ b/tests/modules/local/fgumi/duplexmetrics/main.nf.test @@ -18,7 +18,7 @@ nextflow_process { """ input[0] = [ [id: "test"], - file("${projectDir}/tests/inputs/fgumi/grouped.bam", checkIfExists: true) + file("${projectDir}/tests/inputs/fgumi/template.bam", checkIfExists: true) ] """ } diff --git a/tests/modules/local/fgumi/simplex/main.nf.test b/tests/modules/local/fgumi/simplex/main.nf.test index 62b9fc5f..f30cbe76 100644 --- a/tests/modules/local/fgumi/simplex/main.nf.test +++ b/tests/modules/local/fgumi/simplex/main.nf.test @@ -18,7 +18,7 @@ nextflow_process { """ input[0] = [ [id: "test"], - file("${projectDir}/tests/inputs/fgumi/grouped.bam", checkIfExists: true) + file("${projectDir}/tests/inputs/fgumi/template.bam", checkIfExists: true) ] """ } From f126d728a7a63126ba354334163bfdb4781d0f43 Mon Sep 17 00:00:00 2001 From: freerkvandijk Date: Wed, 8 Apr 2026 10:41:17 +0200 Subject: [PATCH 13/36] Changed some defaul resource requirements. Needs benchmarking though --- conf/modules.config | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index 66565c56..cc963908 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -233,8 +233,8 @@ process { //// FGUMI extract (step 1) withName: '.*FASTQ_TO_CRAM:FGUMI_EXTRACT' { - cpus = 8 - memory = 32.GB + cpus = 4 + memory = 16.GB ext.prefix = { "${meta.id}.fgumi.unmapped" } ext.args = { [ @@ -329,8 +329,8 @@ process { //// FGUMI filter + coordinate sort/index (step 7) withName: '.*FASTQ_TO_CRAM:FGUMI_FILTER' { - cpus = 8 - memory = 32.GB + cpus = 4 + memory = 16.GB ext.prefix = { "${meta.id}.fgumi.filter" } ext.args = { [ From 122b80f6acc68ebd0d2be0f7561ce5201ab79db4 Mon Sep 17 00:00:00 2001 From: freerkvandijk Date: Wed, 8 Apr 2026 10:50:54 +0200 Subject: [PATCH 14/36] Added some comments + fixed fgumi simplex parameter bug --- conf/modules.config | 14 +++++++------- modules/local/fgumi/extract/main.nf | 1 + modules/local/fgumi/group/main.nf | 1 + subworkflows/local/fastq_to_aligned_cram/main.nf | 1 + subworkflows/local/umi_consensus/main.nf | 3 +++ 5 files changed, 13 insertions(+), 7 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index cc963908..1100146c 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -232,7 +232,7 @@ process { } //// FGUMI extract (step 1) - withName: '.*FASTQ_TO_CRAM:FGUMI_EXTRACT' { + withName: '.*FGUMI_EXTRACT' { cpus = 4 memory = 16.GB ext.prefix = { "${meta.id}.fgumi.unmapped" } @@ -249,7 +249,7 @@ process { } //// FGUMI fastq | SNAP | zipper | template sort (step 3) - withName: '.*FASTQ_TO_CRAM:FGUMI_SNAP_ZIPPER_SORT' { + withName: '.*FGUMI_SNAP_ZIPPER_SORT' { cpus = 16 memory = 64.GB ext.prefix = { "${meta.id}.fgumi" } @@ -293,7 +293,7 @@ process { } //// FGUMI group (step 4) - withName: '.*FASTQ_TO_CRAM:FGUMI_GROUP' { + withName: '.*FGUMI_GROUP' { cpus = 8 memory = 32.GB ext.prefix = { "${meta.id}.fgumi.group" } @@ -303,7 +303,7 @@ process { } //// FGUMI simplex (step 5) - withName: '.*FASTQ_TO_CRAM:FGUMI_SIMPLEX' { + withName: '.*FGUMI_SIMPLEX' { cpus = 8 memory = 32.GB ext.prefix = { "${meta.id}.fgumi.simplex" } @@ -320,7 +320,7 @@ process { } //// FGUMI duplex-metrics (step 6) - withName: '.*FASTQ_TO_CRAM:FGUMI_DUPLEX_METRICS' { + withName: '.*FGUMI_DUPLEX_METRICS' { cpus = 2 memory = 8.GB ext.prefix = { "${meta.id}.fgumi" } @@ -328,7 +328,7 @@ process { } //// FGUMI filter + coordinate sort/index (step 7) - withName: '.*FASTQ_TO_CRAM:FGUMI_FILTER' { + withName: '.*FGUMI_FILTER' { cpus = 4 memory = 16.GB ext.prefix = { "${meta.id}.fgumi.filter" } @@ -341,7 +341,7 @@ process { } //// FGUMI coordinate sort/index after filter (step 7) - withName: '.*FASTQ_TO_CRAM:FGUMI_SORT' { + withName: '.*FGUMI_SORT' { cpus = 8 memory = 32.GB ext.prefix = { "${meta.id}.fgumi.filter" } diff --git a/modules/local/fgumi/extract/main.nf b/modules/local/fgumi/extract/main.nf index 9b341def..996c198e 100644 --- a/modules/local/fgumi/extract/main.nf +++ b/modules/local/fgumi/extract/main.nf @@ -19,6 +19,7 @@ process FGUMI_EXTRACT { script: def args = task.ext.args ?: '' prefix = task.ext.prefix ?: "${meta.id}.fgumi.unmapped" + // Keep module portable: only meta.id is assumed, with optional task.ext overrides. def sample_name = task.ext.sample_name ?: meta.id def library_name = task.ext.library_name ?: meta.id diff --git a/modules/local/fgumi/group/main.nf b/modules/local/fgumi/group/main.nf index cecc9d5d..9460ff86 100644 --- a/modules/local/fgumi/group/main.nf +++ b/modules/local/fgumi/group/main.nf @@ -23,6 +23,7 @@ process FGUMI_GROUP { def strategy = task.ext.strategy ?: 'adjacency' def edits = task.ext.edits != null ? task.ext.edits : 1 def compression_level = task.ext.compression_level != null ? task.ext.compression_level : 1 + // Derive per-thread queue memory from requested process resources. def queue_memory_mb = (task.memory.mega / task.cpus * 0.75).intValue() prefix = task.ext.prefix ?: "${meta.id}.fgumi.group" diff --git a/subworkflows/local/fastq_to_aligned_cram/main.nf b/subworkflows/local/fastq_to_aligned_cram/main.nf index 9e61b35f..adc8ccbf 100644 --- a/subworkflows/local/fastq_to_aligned_cram/main.nf +++ b/subworkflows/local/fastq_to_aligned_cram/main.nf @@ -45,6 +45,7 @@ workflow FASTQ_TO_CRAM { ch_meta_reads_aligner_index_fasta_datatype.dna .branch { meta, reads, aligner, index, fasta -> + // fgumi consensus is opt-in via fgumi_aware to avoid changing samtools umi_aware semantics. umi: meta.fgumi_aware == true return [meta, reads, aligner, index, fasta] non_umi: true diff --git a/subworkflows/local/umi_consensus/main.nf b/subworkflows/local/umi_consensus/main.nf index a29d1920..4cf602ab 100644 --- a/subworkflows/local/umi_consensus/main.nf +++ b/subworkflows/local/umi_consensus/main.nf @@ -17,11 +17,13 @@ workflow UMI_CONSENSUS_FGUMI { ch_meta_reads_aligner_index_fasta // channel: [mandatory] [meta, reads, aligner, index, fasta] main: + // Step 1: build an unmapped BAM with UMI tags from input FASTQ. FGUMI_EXTRACT( ch_meta_reads_aligner_index_fasta .map { meta, reads, _aligner, _index, _fasta -> [meta, reads] } ) + // Step 3: align with SNAP, zipper tags back, then template-coordinate sort. FGUMI_SNAP_ZIPPER_SORT( FGUMI_EXTRACT.out.bam .join( @@ -45,6 +47,7 @@ workflow UMI_CONSENSUS_FGUMI { FGUMI_GROUP.out.bam ) + // Step 7: filter consensus reads, then coordinate-sort/index for downstream CRAM conversion. FGUMI_FILTER( FGUMI_SIMPLEX.out.bam .join( From 88d043f772518df3312fa6d0c542443cbdd61a06 Mon Sep 17 00:00:00 2001 From: freerkvandijk Date: Thu, 9 Apr 2026 09:43:03 +0200 Subject: [PATCH 15/36] Dropped params --- conf/modules.config | 7 +++---- nextflow.config | 4 ---- nextflow_schema.json | 20 -------------------- 3 files changed, 3 insertions(+), 28 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index 1100146c..2f040e77 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -238,8 +238,8 @@ process { ext.prefix = { "${meta.id}.fgumi.unmapped" } ext.args = { [ - "--read-structures ${meta.fgumi_read_structures ?: params.fgumi_read_structures}", - ((meta.fgumi_extract_umis_from_read_names != null ? meta.fgumi_extract_umis_from_read_names : params.fgumi_extract_umis_from_read_names) ? "--extract-umis-from-read-names" : ""), + "--read-structures ${meta.fgumi_read_structures ?: '+T +T'}", + ((meta.fgumi_extract_umis_from_read_names != null ? meta.fgumi_extract_umis_from_read_names : true) ? "--extract-umis-from-read-names" : ""), "--threads ${task.cpus}", "--queue-memory ${params.fgumi_queue_memory}", params.fgumi_queue_memory_per_thread ? "--queue-memory-per-thread" : "", @@ -263,7 +263,6 @@ process { "-sa", "-xf 2", meta.readgroup ? "-R \"@RG\\t" + meta.readgroup.findResults { rg -> rg.value?.trim() ? "${rg.key}:${rg.value}" : null }.join("\\t") + "\"" : "", - "${params.fgumi_snap_extra_args}", ].join(" ").trim() } ext.args2 = { @@ -334,7 +333,7 @@ process { ext.prefix = { "${meta.id}.fgumi.filter" } ext.args = { [ - "--min-reads ${params.fgumi_filter_min_reads}", + "--min-reads 1,1,1", "--stats ${meta.id}.fgumi.filter.filtering_metrics.txt", ].join(" ").trim() } diff --git a/nextflow.config b/nextflow.config index e2f2872c..f1ff8bf3 100644 --- a/nextflow.config +++ b/nextflow.config @@ -16,19 +16,15 @@ params { genelists = null // UMI consensus (fgumi) options - fgumi_read_structures = '+T +T' - fgumi_extract_umis_from_read_names = true fgumi_group_strategy = 'adjacency' fgumi_group_edits = 1 fgumi_simplex_min_reads = 1 - fgumi_filter_min_reads = '1,1,1' fgumi_queue_memory = 768 fgumi_queue_memory_per_thread= true fgumi_compression_level = 1 fgumi_sort_max_memory = '2G' fgumi_sort_memory_per_thread = true fgumi_snap_ignore_mismatched_pairs = true - fgumi_snap_extra_args = '' fgumi_duplex_metrics_extra_args = '' // MultiQC options diff --git a/nextflow_schema.json b/nextflow_schema.json index 1c4c2c9d..7e323c01 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -74,16 +74,6 @@ "format": "directory-path", "description": "Directory containing gene list bed files for granular coverage analysis" }, - "fgumi_read_structures": { - "type": "string", - "default": "+T +T", - "description": "Read structures passed to fgumi extract for UMI extraction from FASTQ." - }, - "fgumi_extract_umis_from_read_names": { - "type": "boolean", - "default": true, - "description": "Enable fgumi extract --extract-umis-from-read-names for read name encoded UMIs." - }, "fgumi_group_strategy": { "type": "string", "default": "adjacency", @@ -102,11 +92,6 @@ "minimum": 1, "description": "Minimum number of reads required per UMI family for fgumi simplex consensus generation." }, - "fgumi_filter_min_reads": { - "type": "string", - "default": "1,1,1", - "description": "Minimum reads triplet passed to fgumi filter --min-reads." - }, "fgumi_queue_memory": { "type": "integer", "default": 768, @@ -141,11 +126,6 @@ "default": true, "description": "Pass -I to SNAP to ignore mismatched read IDs in paired-end input." }, - "fgumi_snap_extra_args": { - "type": "string", - "default": "", - "description": "Additional SNAP options for the fgumi fastq|snap|zipper pipeline." - }, "fgumi_duplex_metrics_extra_args": { "type": "string", "default": "", From 624fa90a55ed04e406204f9d0f23bd454ec5fb47 Mon Sep 17 00:00:00 2001 From: freerkvandijk Date: Thu, 9 Apr 2026 09:49:51 +0200 Subject: [PATCH 16/36] Fixed ext.args --- conf/modules.config | 12 +++++++++--- modules/local/fgumi/extract/main.nf | 5 ----- modules/local/fgumi/group/main.nf | 6 ------ 3 files changed, 9 insertions(+), 14 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index 2f040e77..dfd1cfcf 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -238,6 +238,8 @@ process { ext.prefix = { "${meta.id}.fgumi.unmapped" } ext.args = { [ + "--sample \"${meta.id}\"", + "--library \"${meta.library ?: meta.id}\"", "--read-structures ${meta.fgumi_read_structures ?: '+T +T'}", ((meta.fgumi_extract_umis_from_read_names != null ? meta.fgumi_extract_umis_from_read_names : true) ? "--extract-umis-from-read-names" : ""), "--threads ${task.cpus}", @@ -296,9 +298,13 @@ process { cpus = 8 memory = 32.GB ext.prefix = { "${meta.id}.fgumi.group" } - ext.strategy = { params.fgumi_group_strategy } - ext.edits = { params.fgumi_group_edits } - ext.compression_level = { params.fgumi_compression_level } + ext.args = { + [ + "--strategy ${params.fgumi_group_strategy}", + "--edits ${params.fgumi_group_edits}", + "--compression-level ${params.fgumi_compression_level}", + ].join(" ").trim() + } } //// FGUMI simplex (step 5) diff --git a/modules/local/fgumi/extract/main.nf b/modules/local/fgumi/extract/main.nf index 996c198e..ffabc4b4 100644 --- a/modules/local/fgumi/extract/main.nf +++ b/modules/local/fgumi/extract/main.nf @@ -19,16 +19,11 @@ process FGUMI_EXTRACT { script: def args = task.ext.args ?: '' prefix = task.ext.prefix ?: "${meta.id}.fgumi.unmapped" - // Keep module portable: only meta.id is assumed, with optional task.ext overrides. - def sample_name = task.ext.sample_name ?: meta.id - def library_name = task.ext.library_name ?: meta.id """ fgumi extract \ --inputs ${reads} \ --output ${prefix}.bam \ - --sample "${sample_name}" \ - --library "${library_name}" \ ${args} """ diff --git a/modules/local/fgumi/group/main.nf b/modules/local/fgumi/group/main.nf index 9460ff86..52040a12 100644 --- a/modules/local/fgumi/group/main.nf +++ b/modules/local/fgumi/group/main.nf @@ -20,9 +20,6 @@ process FGUMI_GROUP { script: def args = task.ext.args ?: '' - def strategy = task.ext.strategy ?: 'adjacency' - def edits = task.ext.edits != null ? task.ext.edits : 1 - def compression_level = task.ext.compression_level != null ? task.ext.compression_level : 1 // Derive per-thread queue memory from requested process resources. def queue_memory_mb = (task.memory.mega / task.cpus * 0.75).intValue() prefix = task.ext.prefix ?: "${meta.id}.fgumi.group" @@ -31,12 +28,9 @@ process FGUMI_GROUP { fgumi group \ --input ${bam} \ --output ${prefix}.bam \ - --strategy ${strategy} \ - --edits ${edits} \ --threads ${task.cpus} \ --queue-memory ${queue_memory_mb} \ --queue-memory-per-thread \ - --compression-level ${compression_level} \ --grouping-metrics ${prefix}.grouping_metrics.txt \ --family-size-histogram ${prefix}.family_size_histogram.txt \ ${args} From aafb2543fed970d65e9f214830c4d33f6094fa47 Mon Sep 17 00:00:00 2001 From: freerkvandijk Date: Thu, 9 Apr 2026 10:08:47 +0200 Subject: [PATCH 17/36] Moved hard coded options --- conf/modules.config | 4 ++++ modules/local/fgumi/group/main.nf | 2 -- modules/local/fgumi/sort/main.nf | 2 -- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index dfd1cfcf..d14cc4c5 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -303,6 +303,8 @@ process { "--strategy ${params.fgumi_group_strategy}", "--edits ${params.fgumi_group_edits}", "--compression-level ${params.fgumi_compression_level}", + "--grouping-metrics ${meta.id}.fgumi.group.grouping_metrics.txt", + "--family-size-histogram ${meta.id}.fgumi.group.family_size_histogram.txt", ].join(" ").trim() } } @@ -352,6 +354,8 @@ process { ext.prefix = { "${meta.id}.fgumi.filter" } ext.args = { [ + "--order coordinate", + "--write-index", "--threads ${task.cpus}", "--max-memory ${params.fgumi_sort_max_memory}", "--memory-per-thread=${params.fgumi_sort_memory_per_thread ? 'true' : 'false'}", diff --git a/modules/local/fgumi/group/main.nf b/modules/local/fgumi/group/main.nf index 52040a12..d079db7f 100644 --- a/modules/local/fgumi/group/main.nf +++ b/modules/local/fgumi/group/main.nf @@ -31,8 +31,6 @@ process FGUMI_GROUP { --threads ${task.cpus} \ --queue-memory ${queue_memory_mb} \ --queue-memory-per-thread \ - --grouping-metrics ${prefix}.grouping_metrics.txt \ - --family-size-histogram ${prefix}.family_size_histogram.txt \ ${args} """ diff --git a/modules/local/fgumi/sort/main.nf b/modules/local/fgumi/sort/main.nf index a1b8de7c..abfbf633 100644 --- a/modules/local/fgumi/sort/main.nf +++ b/modules/local/fgumi/sort/main.nf @@ -25,8 +25,6 @@ process FGUMI_SORT { fgumi sort \ --input ${bam} \ --output ${prefix}.bam \ - --order coordinate \ - --write-index \ ${args} """ From 54703e084db79575c35d7485256931edf644535e Mon Sep 17 00:00:00 2001 From: freerkvandijk Date: Thu, 9 Apr 2026 10:22:06 +0200 Subject: [PATCH 18/36] Removed duplex metrics from pipeline --- conf/modules.config | 8 ---- main.nf | 6 --- modules/local/fgumi/duplexmetrics/main.nf | 35 --------------- nextflow.config | 1 - nextflow_schema.json | 5 --- .../local/fastq_to_aligned_cram/main.nf | 2 - subworkflows/local/umi_consensus/main.nf | 6 --- .../local/fgumi/duplexmetrics/main.nf.test | 33 -------------- .../fgumi/duplexmetrics/main.nf.test.snap | 43 ------------------ .../fgumi_umi_stub.nf.test | 1 - .../fgumi_umi_stub.nf.test.snap | 23 +--------- .../local/umi_consensus/main.nf.test | 1 - .../local/umi_consensus/main.nf.test.snap | 44 +------------------ workflows/preprocessing.nf | 2 - 14 files changed, 2 insertions(+), 208 deletions(-) delete mode 100644 modules/local/fgumi/duplexmetrics/main.nf delete mode 100644 tests/modules/local/fgumi/duplexmetrics/main.nf.test delete mode 100644 tests/modules/local/fgumi/duplexmetrics/main.nf.test.snap diff --git a/conf/modules.config b/conf/modules.config index d14cc4c5..55baae7d 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -326,14 +326,6 @@ process { } } - //// FGUMI duplex-metrics (step 6) - withName: '.*FGUMI_DUPLEX_METRICS' { - cpus = 2 - memory = 8.GB - ext.prefix = { "${meta.id}.fgumi" } - ext.args = "${params.fgumi_duplex_metrics_extra_args}" - } - //// FGUMI filter + coordinate sort/index (step 7) withName: '.*FGUMI_FILTER' { cpus = 4 diff --git a/main.nf b/main.nf index 1c8db700..34af1923 100644 --- a/main.nf +++ b/main.nf @@ -181,7 +181,6 @@ workflow { // Additional UMI consensus outputs. family_size_histogram = PREPROCESSING.out.family_size_histogram umi_filtered_consensus_bam = PREPROCESSING.out.umi_filtered_consensus_bam - umi_duplex_metrics = PREPROCESSING.out.umi_duplex_metrics umi_crams = PREPROCESSING.out.umi_crams mosdepth_global = PREPROCESSING.out.mosdepth_global mosdepth_summary = PREPROCESSING.out.mosdepth_summary @@ -291,11 +290,6 @@ output { bam >> (meta.library ? "${meta.library}/${meta.samplename}/${bam.name}" : "${meta.samplename}/${bam.name}") } } - umi_duplex_metrics { - path { meta, _file -> - return (meta.library ? "${meta.library}/${meta.samplename}/" : "${meta.samplename}/") - } - } umi_crams { path { meta, cram, crai -> cram >> (meta.library ? "${meta.library}/${meta.samplename}/${meta.samplename}.umi.cram" : "${meta.samplename}/${meta.samplename}.umi.cram") diff --git a/modules/local/fgumi/duplexmetrics/main.nf b/modules/local/fgumi/duplexmetrics/main.nf deleted file mode 100644 index 14954864..00000000 --- a/modules/local/fgumi/duplexmetrics/main.nf +++ /dev/null @@ -1,35 +0,0 @@ -process FGUMI_DUPLEX_METRICS { - tag "$meta.id" - label 'process_low' - - container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container - ? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/95/954170443a820787c9e02ef2135ebb8ec29c6b03633b0d61b5fafa98c59a1cce/data' - : 'community.wave.seqera.io/library/fgumi_r-base_r-ggplot2_r-scales:09c99070b82c1c28'}" - - input: - tuple val(meta), path(bam) - - output: - tuple val(meta), path("${prefix}.duplex_metrics*"), emit: duplex_metrics - tuple val("${task.process}"), val('fgumi'), eval("fgumi --version | sed 's/^fgumi //;q'"), topic: versions, emit: versions_fgumi - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - prefix = task.ext.prefix ?: "${meta.id}.fgumi" - - """ - fgumi duplex-metrics \ - --input ${bam} \ - --output ${prefix}.duplex_metrics \ - ${args} - """ - - stub: - prefix = task.ext.prefix ?: "${meta.id}.fgumi" - """ - touch ${prefix}.duplex_metrics.txt - """ -} diff --git a/nextflow.config b/nextflow.config index f1ff8bf3..3af8b3db 100644 --- a/nextflow.config +++ b/nextflow.config @@ -25,7 +25,6 @@ params { fgumi_sort_max_memory = '2G' fgumi_sort_memory_per_thread = true fgumi_snap_ignore_mismatched_pairs = true - fgumi_duplex_metrics_extra_args = '' // MultiQC options multiqc_config = null diff --git a/nextflow_schema.json b/nextflow_schema.json index 7e323c01..d3c99338 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -125,11 +125,6 @@ "type": "boolean", "default": true, "description": "Pass -I to SNAP to ignore mismatched read IDs in paired-end input." - }, - "fgumi_duplex_metrics_extra_args": { - "type": "string", - "default": "", - "description": "Additional fgumi duplex-metrics options." } } }, diff --git a/subworkflows/local/fastq_to_aligned_cram/main.nf b/subworkflows/local/fastq_to_aligned_cram/main.nf index adc8ccbf..5a05e476 100644 --- a/subworkflows/local/fastq_to_aligned_cram/main.nf +++ b/subworkflows/local/fastq_to_aligned_cram/main.nf @@ -124,7 +124,6 @@ workflow FASTQ_TO_CRAM { ch_sormadup_metrics = ch_sormadup_metrics.mix(UMI_CONSENSUS_FGUMI.out.family_size_histogram) ch_sormadup_metrics = ch_sormadup_metrics.mix(UMI_CONSENSUS_FGUMI.out.consensus_metrics) ch_sormadup_metrics = ch_sormadup_metrics.mix(UMI_CONSENSUS_FGUMI.out.filtering_metrics) - ch_duplex_metrics = UMI_CONSENSUS_FGUMI.out.duplex_metrics ch_family_size_histogram = UMI_CONSENSUS_FGUMI.out.family_size_histogram ch_filtered_consensus_bam = UMI_CONSENSUS_FGUMI.out.filtered_consensus_bam @@ -183,6 +182,5 @@ workflow FASTQ_TO_CRAM { rna_junctions = FASTQ_ALIGN_RNA.out.junctions sormadup_metrics = ch_sormadup_metrics family_size_histogram = ch_family_size_histogram - duplex_metrics = ch_duplex_metrics align_reports = FASTQ_ALIGN_DNA.out.reports } diff --git a/subworkflows/local/umi_consensus/main.nf b/subworkflows/local/umi_consensus/main.nf index 4cf602ab..476df3e2 100644 --- a/subworkflows/local/umi_consensus/main.nf +++ b/subworkflows/local/umi_consensus/main.nf @@ -1,7 +1,6 @@ #!/usr/bin/env nextflow // MODULES -include { FGUMI_DUPLEX_METRICS } from "../../../modules/local/fgumi/duplexmetrics/main.nf" include { FGUMI_EXTRACT } from "../../../modules/local/fgumi/extract/main.nf" include { FGUMI_FILTER } from "../../../modules/local/fgumi/filter/main.nf" include { FGUMI_GROUP } from "../../../modules/local/fgumi/group/main.nf" @@ -43,10 +42,6 @@ workflow UMI_CONSENSUS_FGUMI { FGUMI_GROUP.out.bam ) - FGUMI_DUPLEX_METRICS( - FGUMI_GROUP.out.bam - ) - // Step 7: filter consensus reads, then coordinate-sort/index for downstream CRAM conversion. FGUMI_FILTER( FGUMI_SIMPLEX.out.bam @@ -67,6 +62,5 @@ workflow UMI_CONSENSUS_FGUMI { family_size_histogram = FGUMI_GROUP.out.family_size_histogram consensus_metrics = FGUMI_SIMPLEX.out.consensus_metrics filtering_metrics = FGUMI_FILTER.out.filtering_metrics - duplex_metrics = FGUMI_DUPLEX_METRICS.out.duplex_metrics filtered_consensus_bam = FGUMI_SORT.out.bam } diff --git a/tests/modules/local/fgumi/duplexmetrics/main.nf.test b/tests/modules/local/fgumi/duplexmetrics/main.nf.test deleted file mode 100644 index 2266eaa9..00000000 --- a/tests/modules/local/fgumi/duplexmetrics/main.nf.test +++ /dev/null @@ -1,33 +0,0 @@ -nextflow_process { - - name "Test Process FGUMI_DUPLEX_METRICS" - script "modules/local/fgumi/duplexmetrics/main.nf" - process "FGUMI_DUPLEX_METRICS" - - tag "modules" - tag "modules/local" - tag "modules/local/fgumi" - tag "modules/local/fgumi/duplexmetrics" - - test("test - stub") { - // Stub-mode contract test: verifies duplex-metrics output wiring. - options "-stub" - - when { - process { - """ - input[0] = [ - [id: "test"], - file("${projectDir}/tests/inputs/fgumi/template.bam", checkIfExists: true) - ] - """ - } - } - - then { - // Keep assertions minimal and snapshot-driven, consistent with other module tests. - assert process.success - assert snapshot(process.out).match() - } - } -} diff --git a/tests/modules/local/fgumi/duplexmetrics/main.nf.test.snap b/tests/modules/local/fgumi/duplexmetrics/main.nf.test.snap deleted file mode 100644 index c2514d51..00000000 --- a/tests/modules/local/fgumi/duplexmetrics/main.nf.test.snap +++ /dev/null @@ -1,43 +0,0 @@ -{ - "test - stub": { - "content": [ - { - "0": [ - [ - { - "id": "test" - }, - "test.fgumi.duplex_metrics.txt:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "1": [ - [ - "FGUMI_DUPLEX_METRICS", - "fgumi", - "0.1.2" - ] - ], - "duplex_metrics": [ - [ - { - "id": "test" - }, - "test.fgumi.duplex_metrics.txt:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "versions_fgumi": [ - [ - "FGUMI_DUPLEX_METRICS", - "fgumi", - "0.1.2" - ] - ] - } - ], - "meta": { - "nf-test": "0.9.2", - "nextflow": "25.10.4" - }, - "timestamp": "2026-04-02T16:04:44.383973526" - } -} \ No newline at end of file diff --git a/tests/subworkflows/local/fastq_to_aligned_cram/fgumi_umi_stub.nf.test b/tests/subworkflows/local/fastq_to_aligned_cram/fgumi_umi_stub.nf.test index 405fb1d7..2c553192 100644 --- a/tests/subworkflows/local/fastq_to_aligned_cram/fgumi_umi_stub.nf.test +++ b/tests/subworkflows/local/fastq_to_aligned_cram/fgumi_umi_stub.nf.test @@ -46,7 +46,6 @@ nextflow_workflow { then { assert workflow.success // Explicitly assert newly exposed UMI channels. - assert workflow.out.duplex_metrics.size() == 1 assert workflow.out.family_size_histogram.size() == 1 assert workflow.out.filtered_consensus_bam.size() == 1 assert snapshot( diff --git a/tests/subworkflows/local/fastq_to_aligned_cram/fgumi_umi_stub.nf.test.snap b/tests/subworkflows/local/fastq_to_aligned_cram/fgumi_umi_stub.nf.test.snap index d699ff72..55035830 100644 --- a/tests/subworkflows/local/fastq_to_aligned_cram/fgumi_umi_stub.nf.test.snap +++ b/tests/subworkflows/local/fastq_to_aligned_cram/fgumi_umi_stub.nf.test.snap @@ -25,27 +25,6 @@ "UMI_consensus1.cram.crai" ] ], - "duplex_metrics": [ - [ - { - "id": "UMI_consensus1", - "samplename": "HT1080_chr20", - "single_end": false, - "sample_type": "DNA", - "markdup": "bamsormadup", - "fgumi_aware": true, - "genome_data": { - "fasta": "ref.fa:md5,78724096432e1b2702881f0126c656f2", - "fai": "ref.fa.fai:md5,fdd09c27f9ecf4cc7e824fd8407b72b7", - "dict": "ref.dict:md5,759dea0d5194dec0cd4f3e02fadf6e83", - "snap": [ - "OverflowTable.txt:md5,178164f81917b8e87073295a635588de" - ] - } - }, - "UMI_consensus1.fgumi.duplex_metrics.txt:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], "family_size_histogram": [ [ { @@ -178,6 +157,6 @@ "nf-test": "0.9.2", "nextflow": "25.10.4" }, - "timestamp": "2026-04-08T10:29:40.567401171" + "timestamp": "2026-04-09T10:20:21.479979029" } } \ No newline at end of file diff --git a/tests/subworkflows/local/umi_consensus/main.nf.test b/tests/subworkflows/local/umi_consensus/main.nf.test index a365bb28..0ca457d6 100644 --- a/tests/subworkflows/local/umi_consensus/main.nf.test +++ b/tests/subworkflows/local/umi_consensus/main.nf.test @@ -48,7 +48,6 @@ nextflow_workflow { assert workflow.out.family_size_histogram.size() == 1 assert workflow.out.consensus_metrics.size() == 1 assert workflow.out.filtering_metrics.size() == 1 - assert workflow.out.duplex_metrics.size() == 1 assert workflow.out.filtered_consensus_bam.size() == 1 assert snapshot(workflow.out).match() } diff --git a/tests/subworkflows/local/umi_consensus/main.nf.test.snap b/tests/subworkflows/local/umi_consensus/main.nf.test.snap index 9fc3df12..9dbc33d6 100644 --- a/tests/subworkflows/local/umi_consensus/main.nf.test.snap +++ b/tests/subworkflows/local/umi_consensus/main.nf.test.snap @@ -109,27 +109,6 @@ ] ], "5": [ - [ - { - "id": "UMI_consensus1", - "samplename": "HT1080_chr20", - "single_end": false, - "sample_type": "DNA", - "markdup": "bamsormadup", - "fgumi_aware": true, - "genome_data": { - "fasta": "ref.fa:md5,78724096432e1b2702881f0126c656f2", - "fai": "ref.fa.fai:md5,fdd09c27f9ecf4cc7e824fd8407b72b7", - "dict": "ref.dict:md5,759dea0d5194dec0cd4f3e02fadf6e83", - "snap": [ - "OverflowTable.txt:md5,178164f81917b8e87073295a635588de" - ] - } - }, - "UMI_consensus1.fgumi.duplex_metrics.txt:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "6": [ [ { "id": "UMI_consensus1", @@ -193,27 +172,6 @@ "UMI_consensus1.fgumi.simplex.consensus_metrics.txt:md5,d41d8cd98f00b204e9800998ecf8427e" ] ], - "duplex_metrics": [ - [ - { - "id": "UMI_consensus1", - "samplename": "HT1080_chr20", - "single_end": false, - "sample_type": "DNA", - "markdup": "bamsormadup", - "fgumi_aware": true, - "genome_data": { - "fasta": "ref.fa:md5,78724096432e1b2702881f0126c656f2", - "fai": "ref.fa.fai:md5,fdd09c27f9ecf4cc7e824fd8407b72b7", - "dict": "ref.dict:md5,759dea0d5194dec0cd4f3e02fadf6e83", - "snap": [ - "OverflowTable.txt:md5,178164f81917b8e87073295a635588de" - ] - } - }, - "UMI_consensus1.fgumi.duplex_metrics.txt:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], "family_size_histogram": [ [ { @@ -304,6 +262,6 @@ "nf-test": "0.9.2", "nextflow": "25.10.4" }, - "timestamp": "2026-04-08T10:16:16.364166786" + "timestamp": "2026-04-09T10:19:55.500577314" } } \ No newline at end of file diff --git a/workflows/preprocessing.nf b/workflows/preprocessing.nf index bca3a175..78768cf6 100644 --- a/workflows/preprocessing.nf +++ b/workflows/preprocessing.nf @@ -273,7 +273,6 @@ workflow PREPROCESSING { ) // Collect both standard and UMI-specific metrics for MultiQC. ch_multiqc_files = ch_multiqc_files.mix(FASTQ_TO_CRAM.out.sormadup_metrics) - ch_multiqc_files = ch_multiqc_files.mix(FASTQ_TO_CRAM.out.duplex_metrics) ch_multiqc_files = ch_multiqc_files.mix(FASTQ_TO_CRAM.out.family_size_histogram) /* @@ -438,7 +437,6 @@ workflow PREPROCESSING { // UMI-specific outputs exposed at workflow level. family_size_histogram = FASTQ_TO_CRAM.out.family_size_histogram umi_filtered_consensus_bam = FASTQ_TO_CRAM.out.filtered_consensus_bam - umi_duplex_metrics = FASTQ_TO_CRAM.out.duplex_metrics umi_crams = FASTQ_TO_CRAM.out.cram_crai.filter { meta, _cram, _crai -> meta.fgumi_aware == true } mosdepth_global = COVERAGE.out.mosdepth_global mosdepth_summary = COVERAGE.out.mosdepth_summary From c7bd8547fc1319f2839bdde96c837050c0aa5d8c Mon Sep 17 00:00:00 2001 From: freerkvandijk Date: Thu, 9 Apr 2026 10:32:05 +0200 Subject: [PATCH 19/36] Removed output redundancy --- main.nf | 7 ------- tests/modules/local/fgumi/sort/main.nf.test | 1 + workflows/preprocessing.nf | 1 - 3 files changed, 1 insertion(+), 8 deletions(-) diff --git a/main.nf b/main.nf index 34af1923..76c0f2ac 100644 --- a/main.nf +++ b/main.nf @@ -181,7 +181,6 @@ workflow { // Additional UMI consensus outputs. family_size_histogram = PREPROCESSING.out.family_size_histogram umi_filtered_consensus_bam = PREPROCESSING.out.umi_filtered_consensus_bam - umi_crams = PREPROCESSING.out.umi_crams mosdepth_global = PREPROCESSING.out.mosdepth_global mosdepth_summary = PREPROCESSING.out.mosdepth_summary mosdepth_regions = PREPROCESSING.out.mosdepth_regions @@ -290,12 +289,6 @@ output { bam >> (meta.library ? "${meta.library}/${meta.samplename}/${bam.name}" : "${meta.samplename}/${bam.name}") } } - umi_crams { - path { meta, cram, crai -> - cram >> (meta.library ? "${meta.library}/${meta.samplename}/${meta.samplename}.umi.cram" : "${meta.samplename}/${meta.samplename}.umi.cram") - crai >> (meta.library ? "${meta.library}/${meta.samplename}/${meta.samplename}.umi.cram.crai" : "${meta.samplename}/${meta.samplename}.umi.cram.crai") - } - } mosdepth_global { path { meta, _file -> return (meta.library ? "${meta.library}/${meta.samplename}/" : "${meta.samplename}/") diff --git a/tests/modules/local/fgumi/sort/main.nf.test b/tests/modules/local/fgumi/sort/main.nf.test index 7879f414..045b4bbb 100644 --- a/tests/modules/local/fgumi/sort/main.nf.test +++ b/tests/modules/local/fgumi/sort/main.nf.test @@ -29,4 +29,5 @@ nextflow_process { assert snapshot(process.out).match() } } + } diff --git a/workflows/preprocessing.nf b/workflows/preprocessing.nf index 78768cf6..5a335908 100644 --- a/workflows/preprocessing.nf +++ b/workflows/preprocessing.nf @@ -437,7 +437,6 @@ workflow PREPROCESSING { // UMI-specific outputs exposed at workflow level. family_size_histogram = FASTQ_TO_CRAM.out.family_size_histogram umi_filtered_consensus_bam = FASTQ_TO_CRAM.out.filtered_consensus_bam - umi_crams = FASTQ_TO_CRAM.out.cram_crai.filter { meta, _cram, _crai -> meta.fgumi_aware == true } mosdepth_global = COVERAGE.out.mosdepth_global mosdepth_summary = COVERAGE.out.mosdepth_summary mosdepth_regions = COVERAGE.out.mosdepth_regions From 0dfc684a96ccef8d62e15b9bfea52c73bbdd6b0d Mon Sep 17 00:00:00 2001 From: freerkvandijk Date: Thu, 9 Apr 2026 11:43:21 +0200 Subject: [PATCH 20/36] Removed test BAMs + added paths to github repo test BAMs --- tests/inputs/fgumi/consensus.bam | 1 - tests/inputs/fgumi/template.bam | 1 - tests/inputs/fgumi/unmapped.bam | 1 - tests/modules/local/fgumi/extract/main.nf.test | 4 ++-- tests/modules/local/fgumi/filter/main.nf.test | 2 +- tests/modules/local/fgumi/group/main.nf.test | 2 +- tests/modules/local/fgumi/simplex/main.nf.test | 2 +- tests/modules/local/fgumi/snapzippersort/main.nf.test | 2 +- tests/modules/local/fgumi/sort/main.nf.test | 2 +- 9 files changed, 7 insertions(+), 10 deletions(-) delete mode 100644 tests/inputs/fgumi/consensus.bam delete mode 100644 tests/inputs/fgumi/template.bam delete mode 100644 tests/inputs/fgumi/unmapped.bam diff --git a/tests/inputs/fgumi/consensus.bam b/tests/inputs/fgumi/consensus.bam deleted file mode 100644 index 48cdce85..00000000 --- a/tests/inputs/fgumi/consensus.bam +++ /dev/null @@ -1 +0,0 @@ -placeholder diff --git a/tests/inputs/fgumi/template.bam b/tests/inputs/fgumi/template.bam deleted file mode 100644 index 48cdce85..00000000 --- a/tests/inputs/fgumi/template.bam +++ /dev/null @@ -1 +0,0 @@ -placeholder diff --git a/tests/inputs/fgumi/unmapped.bam b/tests/inputs/fgumi/unmapped.bam deleted file mode 100644 index 48cdce85..00000000 --- a/tests/inputs/fgumi/unmapped.bam +++ /dev/null @@ -1 +0,0 @@ -placeholder diff --git a/tests/modules/local/fgumi/extract/main.nf.test b/tests/modules/local/fgumi/extract/main.nf.test index ad1088a5..a92c75ce 100644 --- a/tests/modules/local/fgumi/extract/main.nf.test +++ b/tests/modules/local/fgumi/extract/main.nf.test @@ -19,8 +19,8 @@ nextflow_process { input[0] = [ [id: "test", samplename: "test", library: "lib1"], [ - file("${projectDir}/tests/inputs/fgumi/R1.fastq.gz", checkIfExists: true), - file("${projectDir}/tests/inputs/fgumi/R2.fastq.gz", checkIfExists: true) + file("https://github.com/nf-cmgg/test-datasets/raw/main/data/genomics/homo_sapiens/illumina/fastq/HT1080-chr20_S16_R1_001.fastq.gz", checkIfExists: true), + file("https://github.com/nf-cmgg/test-datasets/raw/main/data/genomics/homo_sapiens/illumina/fastq/HT1080-chr20_S16_R2_001.fastq.gz", checkIfExists: true) ] ] """ diff --git a/tests/modules/local/fgumi/filter/main.nf.test b/tests/modules/local/fgumi/filter/main.nf.test index bec1f2b3..1c7f7c69 100644 --- a/tests/modules/local/fgumi/filter/main.nf.test +++ b/tests/modules/local/fgumi/filter/main.nf.test @@ -18,7 +18,7 @@ nextflow_process { """ input[0] = [ [id: "test"], - file("${projectDir}/tests/inputs/fgumi/consensus.bam", checkIfExists: true), + file("https://github.com/nf-cmgg/test-datasets/raw/main/data/genomics/homo_sapiens/illumina/bam/consensus.bam", checkIfExists: true), file("${projectDir}/tests/inputs/fgumi/ref.fa", checkIfExists: true) ] """ diff --git a/tests/modules/local/fgumi/group/main.nf.test b/tests/modules/local/fgumi/group/main.nf.test index 8fb9b79e..cd54ae2f 100644 --- a/tests/modules/local/fgumi/group/main.nf.test +++ b/tests/modules/local/fgumi/group/main.nf.test @@ -18,7 +18,7 @@ nextflow_process { """ input[0] = [ [id: "test"], - file("${projectDir}/tests/inputs/fgumi/template.bam", checkIfExists: true) + file("https://github.com/nf-cmgg/test-datasets/raw/main/data/genomics/homo_sapiens/illumina/bam/template.bam", checkIfExists: true) ] """ } diff --git a/tests/modules/local/fgumi/simplex/main.nf.test b/tests/modules/local/fgumi/simplex/main.nf.test index f30cbe76..9ff5c017 100644 --- a/tests/modules/local/fgumi/simplex/main.nf.test +++ b/tests/modules/local/fgumi/simplex/main.nf.test @@ -18,7 +18,7 @@ nextflow_process { """ input[0] = [ [id: "test"], - file("${projectDir}/tests/inputs/fgumi/template.bam", checkIfExists: true) + file("https://github.com/nf-cmgg/test-datasets/raw/main/data/genomics/homo_sapiens/illumina/bam/grouped.bam", checkIfExists: true) ] """ } diff --git a/tests/modules/local/fgumi/snapzippersort/main.nf.test b/tests/modules/local/fgumi/snapzippersort/main.nf.test index e27f388e..88ca0500 100644 --- a/tests/modules/local/fgumi/snapzippersort/main.nf.test +++ b/tests/modules/local/fgumi/snapzippersort/main.nf.test @@ -18,7 +18,7 @@ nextflow_process { """ input[0] = [ [id: "test", samplename: "test"], - file("${projectDir}/tests/inputs/fgumi/unmapped.bam", checkIfExists: true), + file("https://github.com/nf-cmgg/test-datasets/raw/main/data/genomics/homo_sapiens/illumina/bam/unmapped.bam", checkIfExists: true), file("${projectDir}/tests/inputs/fgumi/snap_index", checkIfExists: true), file("${projectDir}/tests/inputs/fgumi/ref.fa", checkIfExists: true), file("${projectDir}/tests/inputs/fgumi/ref.dict", checkIfExists: true) diff --git a/tests/modules/local/fgumi/sort/main.nf.test b/tests/modules/local/fgumi/sort/main.nf.test index 045b4bbb..380727d0 100644 --- a/tests/modules/local/fgumi/sort/main.nf.test +++ b/tests/modules/local/fgumi/sort/main.nf.test @@ -18,7 +18,7 @@ nextflow_process { """ input[0] = [ [id: "test"], - file("${projectDir}/tests/inputs/fgumi/consensus.bam", checkIfExists: true) + file("https://github.com/nf-cmgg/test-datasets/raw/main/data/genomics/homo_sapiens/illumina/bam/consensus.bam", checkIfExists: true) ] """ } From fb498ecf35a124fc878d9f336d7678811a628640 Mon Sep 17 00:00:00 2001 From: freerkvandijk Date: Fri, 10 Apr 2026 15:07:59 +0200 Subject: [PATCH 21/36] Updated filepaths in tests --- tests/inputs/fgumi/R1.fastq.gz | 4 ---- tests/inputs/fgumi/R2.fastq.gz | 4 ---- tests/inputs/fgumi/ref.dict | 2 -- tests/inputs/fgumi/ref.fa | 2 -- tests/inputs/fgumi/ref.fa.fai | 1 - tests/inputs/test.yml | 8 ++++---- tests/modules/local/fgumi/extract/main.nf.test | 4 ++-- tests/modules/local/fgumi/filter/main.nf.test | 2 +- tests/modules/local/fgumi/snapzippersort/main.nf.test | 4 ++-- 9 files changed, 9 insertions(+), 22 deletions(-) delete mode 100644 tests/inputs/fgumi/R1.fastq.gz delete mode 100644 tests/inputs/fgumi/R2.fastq.gz delete mode 100644 tests/inputs/fgumi/ref.dict delete mode 100644 tests/inputs/fgumi/ref.fa delete mode 100644 tests/inputs/fgumi/ref.fa.fai diff --git a/tests/inputs/fgumi/R1.fastq.gz b/tests/inputs/fgumi/R1.fastq.gz deleted file mode 100644 index 3b41ea25..00000000 --- a/tests/inputs/fgumi/R1.fastq.gz +++ /dev/null @@ -1,4 +0,0 @@ -@r1 -ACGT -+ -!!!! diff --git a/tests/inputs/fgumi/R2.fastq.gz b/tests/inputs/fgumi/R2.fastq.gz deleted file mode 100644 index c5b0a999..00000000 --- a/tests/inputs/fgumi/R2.fastq.gz +++ /dev/null @@ -1,4 +0,0 @@ -@r2 -TGCA -+ -!!!! diff --git a/tests/inputs/fgumi/ref.dict b/tests/inputs/fgumi/ref.dict deleted file mode 100644 index c7a324e0..00000000 --- a/tests/inputs/fgumi/ref.dict +++ /dev/null @@ -1,2 +0,0 @@ -@HD VN:1.6 SO:unsorted -@SQ SN:chr1 LN:12 diff --git a/tests/inputs/fgumi/ref.fa b/tests/inputs/fgumi/ref.fa deleted file mode 100644 index 41884804..00000000 --- a/tests/inputs/fgumi/ref.fa +++ /dev/null @@ -1,2 +0,0 @@ ->chr1 -ACGTACGTACGT diff --git a/tests/inputs/fgumi/ref.fa.fai b/tests/inputs/fgumi/ref.fa.fai deleted file mode 100644 index 795162b9..00000000 --- a/tests/inputs/fgumi/ref.fa.fai +++ /dev/null @@ -1 +0,0 @@ -chr1 12 6 12 13 diff --git a/tests/inputs/test.yml b/tests/inputs/test.yml index 051ac1ca..c09566ea 100644 --- a/tests/inputs/test.yml +++ b/tests/inputs/test.yml @@ -51,8 +51,8 @@ fastq_2: https://github.com/nf-cmgg/test-datasets/raw/main/data/genomics/homo_sapiens/illumina/fastq/test_R2.fastq.gz # UMI consensus (fgumi) inputs # Example DNA sample with fgumi_aware enabled for fgumi processing. -- id: UMI_consensus1 - samplename: HT1080-chr20 +- id: sample1 + samplename: sample1-chr21 library: test_library organism: Homo sapiens tag: WES @@ -61,6 +61,6 @@ markdup: bamsormadup fgumi_aware: true run_coverage: true - fastq_1: https://github.com/nf-cmgg/test-datasets/raw/main/data/genomics/homo_sapiens/illumina/fastq/HT1080-chr20_S16_R1_001.fastq.gz - fastq_2: https://github.com/nf-cmgg/test-datasets/raw/main/data/genomics/homo_sapiens/illumina/fastq/HT1080-chr20_S16_R2_001.fastq.gz + fastq_1: https://github.com/nf-cmgg/test-datasets/raw/main/data/genomics/homo_sapiens/illumina/fastq/sample1_S31_R1_001.fastq.gz + fastq_2: https://github.com/nf-cmgg/test-datasets/raw/main/data/genomics/homo_sapiens/illumina/fastq/sample1_S31_R2_001.fastq.gz diff --git a/tests/modules/local/fgumi/extract/main.nf.test b/tests/modules/local/fgumi/extract/main.nf.test index a92c75ce..40cf1af8 100644 --- a/tests/modules/local/fgumi/extract/main.nf.test +++ b/tests/modules/local/fgumi/extract/main.nf.test @@ -19,8 +19,8 @@ nextflow_process { input[0] = [ [id: "test", samplename: "test", library: "lib1"], [ - file("https://github.com/nf-cmgg/test-datasets/raw/main/data/genomics/homo_sapiens/illumina/fastq/HT1080-chr20_S16_R1_001.fastq.gz", checkIfExists: true), - file("https://github.com/nf-cmgg/test-datasets/raw/main/data/genomics/homo_sapiens/illumina/fastq/HT1080-chr20_S16_R2_001.fastq.gz", checkIfExists: true) + file("https://github.com/nf-cmgg/test-datasets/raw/main/data/genomics/homo_sapiens/illumina/fastq/sample1_S31_R1_001.fastq.gz", checkIfExists: true), + file("https://github.com/nf-cmgg/test-datasets/raw/main/data/genomics/homo_sapiens/illumina/fastq/sample1_S31_R2_001.fastq.gz", checkIfExists: true) ] ] """ diff --git a/tests/modules/local/fgumi/filter/main.nf.test b/tests/modules/local/fgumi/filter/main.nf.test index 1c7f7c69..51c6f8c8 100644 --- a/tests/modules/local/fgumi/filter/main.nf.test +++ b/tests/modules/local/fgumi/filter/main.nf.test @@ -19,7 +19,7 @@ nextflow_process { input[0] = [ [id: "test"], file("https://github.com/nf-cmgg/test-datasets/raw/main/data/genomics/homo_sapiens/illumina/bam/consensus.bam", checkIfExists: true), - file("${projectDir}/tests/inputs/fgumi/ref.fa", checkIfExists: true) + file("https://github.com/nf-cmgg/test-datasets/raw/preprocessing/data/genomics/homo_sapiens/genome/seq/GCA_000001405.15_GRCh38_full_plus_hs38d1_analysis_set_chr21.fna", checkIfExists: true) ] """ } diff --git a/tests/modules/local/fgumi/snapzippersort/main.nf.test b/tests/modules/local/fgumi/snapzippersort/main.nf.test index 88ca0500..c3a34cbe 100644 --- a/tests/modules/local/fgumi/snapzippersort/main.nf.test +++ b/tests/modules/local/fgumi/snapzippersort/main.nf.test @@ -20,8 +20,8 @@ nextflow_process { [id: "test", samplename: "test"], file("https://github.com/nf-cmgg/test-datasets/raw/main/data/genomics/homo_sapiens/illumina/bam/unmapped.bam", checkIfExists: true), file("${projectDir}/tests/inputs/fgumi/snap_index", checkIfExists: true), - file("${projectDir}/tests/inputs/fgumi/ref.fa", checkIfExists: true), - file("${projectDir}/tests/inputs/fgumi/ref.dict", checkIfExists: true) + file("https://github.com/nf-cmgg/test-datasets/raw/preprocessing/data/genomics/homo_sapiens/genome/seq/GCA_000001405.15_GRCh38_full_plus_hs38d1_analysis_set_chr21.fna", checkIfExists: true), + file("https://github.com/nf-cmgg/test-datasets/raw/preprocessing/data/genomics/homo_sapiens/genome/seq/GCA_000001405.15_GRCh38_full_plus_hs38d1_analysis_set_chr21.dict", checkIfExists: true) ] """ } From 866e38a20fb710e4217cbcd25152cf4416e6aa83 Mon Sep 17 00:00:00 2001 From: freerkvandijk Date: Fri, 10 Apr 2026 17:24:48 +0200 Subject: [PATCH 22/36] removed thread and mem args from config --- conf/modules.config | 3 --- modules/local/fgumi/extract/main.nf | 5 +++++ 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index 55baae7d..43b5ba09 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -242,9 +242,6 @@ process { "--library \"${meta.library ?: meta.id}\"", "--read-structures ${meta.fgumi_read_structures ?: '+T +T'}", ((meta.fgumi_extract_umis_from_read_names != null ? meta.fgumi_extract_umis_from_read_names : true) ? "--extract-umis-from-read-names" : ""), - "--threads ${task.cpus}", - "--queue-memory ${params.fgumi_queue_memory}", - params.fgumi_queue_memory_per_thread ? "--queue-memory-per-thread" : "", "--compression-level ${params.fgumi_compression_level}", ].join(" ").trim() } diff --git a/modules/local/fgumi/extract/main.nf b/modules/local/fgumi/extract/main.nf index ffabc4b4..89002f13 100644 --- a/modules/local/fgumi/extract/main.nf +++ b/modules/local/fgumi/extract/main.nf @@ -18,12 +18,17 @@ process FGUMI_EXTRACT { script: def args = task.ext.args ?: '' + // Derive per-thread queue memory from requested process resources. + def queue_memory_mb = (task.memory.mega / task.cpus * 0.75).intValue() prefix = task.ext.prefix ?: "${meta.id}.fgumi.unmapped" """ fgumi extract \ --inputs ${reads} \ --output ${prefix}.bam \ + --threads ${task.cpus} \ + --queue-memory ${queue_memory_mb} \ + --queue-memory-per-thread \ ${args} """ From 8f770e09cd3e620ba0640db945f49b119b180bb9 Mon Sep 17 00:00:00 2001 From: freerkvandijk Date: Fri, 10 Apr 2026 17:28:27 +0200 Subject: [PATCH 23/36] params to meta --- conf/modules.config | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index 43b5ba09..66309529 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -297,9 +297,9 @@ process { ext.prefix = { "${meta.id}.fgumi.group" } ext.args = { [ - "--strategy ${params.fgumi_group_strategy}", - "--edits ${params.fgumi_group_edits}", - "--compression-level ${params.fgumi_compression_level}", + "--strategy ${meta.fgumi_group_strategy ?: 'adjacency'}", + "--edits ${meta.fgumi_group_edits != null ? meta.fgumi_group_edits : 1}", + "--compression-level ${meta.fgumi_compression_level != null ? meta.fgumi_compression_level : 1}", "--grouping-metrics ${meta.id}.fgumi.group.grouping_metrics.txt", "--family-size-histogram ${meta.id}.fgumi.group.family_size_histogram.txt", ].join(" ").trim() From 9feaf250ca5fd449ca7f4d46c55334980f6039a7 Mon Sep 17 00:00:00 2001 From: freerkvandijk Date: Fri, 10 Apr 2026 17:35:38 +0200 Subject: [PATCH 24/36] Fixed paths in test --- .../fastq_to_aligned_cram/fgumi_umi_stub.nf.test | 16 ++++++++-------- .../local/umi_consensus/main.nf.test | 16 ++++++++-------- 2 files changed, 16 insertions(+), 16 deletions(-) diff --git a/tests/subworkflows/local/fastq_to_aligned_cram/fgumi_umi_stub.nf.test b/tests/subworkflows/local/fastq_to_aligned_cram/fgumi_umi_stub.nf.test index 2c553192..c03f41e3 100644 --- a/tests/subworkflows/local/fastq_to_aligned_cram/fgumi_umi_stub.nf.test +++ b/tests/subworkflows/local/fastq_to_aligned_cram/fgumi_umi_stub.nf.test @@ -24,19 +24,19 @@ nextflow_workflow { markdup: "bamsormadup", fgumi_aware: true, genome_data: [ - fasta: "${projectDir}/tests/inputs/fgumi/ref.fa", - fai: "${projectDir}/tests/inputs/fgumi/ref.fa.fai", - dict: "${projectDir}/tests/inputs/fgumi/ref.dict", - snap: "${projectDir}/tests/inputs/fgumi/snap_index" + fasta: "s3://test-data/genomics/homo_sapiens/genome/seq/GCA_000001405.15_GRCh38_full_plus_hs38d1_analysis_set_chr21.fna", + fai: "s3://test-data/genomics/homo_sapiens/genome/seq/GCA_000001405.15_GRCh38_full_plus_hs38d1_analysis_set_chr21.fna.fai", + dict: "s3://test-data/genomics/homo_sapiens/genome/seq/GCA_000001405.15_GRCh38_full_plus_hs38d1_analysis_set_chr21.dict", + snap: "s3://reference-data/genomes/Hsapiens/GRCh38/snapaligner/" ] ], [ - file("${projectDir}/tests/inputs/fgumi/R1.fastq.gz", checkIfExists: true), - file("${projectDir}/tests/inputs/fgumi/R2.fastq.gz", checkIfExists: true) + file("https://github.com/nf-cmgg/test-datasets/raw/preprocessing/data/genomics/homo_sapiens/illumina/fastq/sample1_R1.fastq.gz", checkIfExists: true), + file("https://github.com/nf-cmgg/test-datasets/raw/preprocessing/data/genomics/homo_sapiens/illumina/fastq/sample1_R2.fastq.gz", checkIfExists: true) ], "snap", - file("${projectDir}/tests/inputs/fgumi/snap_index", checkIfExists: true), - file("${projectDir}/tests/inputs/fgumi/ref.fa", checkIfExists: true), + file("s3://reference-data/genomes/Hsapiens/GRCh38/snapaligner/", checkIfExists: true), + file("s3://test-data/genomics/homo_sapiens/genome/seq/GCA_000001405.15_GRCh38_full_plus_hs38d1_analysis_set_chr21.fna", checkIfExists: true), [] ]) """ diff --git a/tests/subworkflows/local/umi_consensus/main.nf.test b/tests/subworkflows/local/umi_consensus/main.nf.test index 0ca457d6..7135f5db 100644 --- a/tests/subworkflows/local/umi_consensus/main.nf.test +++ b/tests/subworkflows/local/umi_consensus/main.nf.test @@ -23,19 +23,19 @@ nextflow_workflow { markdup: "bamsormadup", fgumi_aware: true, genome_data: [ - fasta: "${projectDir}/tests/inputs/fgumi/ref.fa", - fai: "${projectDir}/tests/inputs/fgumi/ref.fa.fai", - dict: "${projectDir}/tests/inputs/fgumi/ref.dict", - snap: "${projectDir}/tests/inputs/fgumi/snap_index" + fasta: "s3://test-data/genomics/homo_sapiens/genome/seq/GCA_000001405.15_GRCh38_full_plus_hs38d1_analysis_set_chr21.fna", + fai: "s3://test-data/genomics/homo_sapiens/genome/seq/GCA_000001405.15_GRCh38_full_plus_hs38d1_analysis_set_chr21.fna.fai", + dict: "s3://test-data/genomics/homo_sapiens/genome/seq/GCA_000001405.15_GRCh38_full_plus_hs38d1_analysis_set_chr21.dict", + snap: "s3://reference-data/genomes/Hsapiens/GRCh38/snapaligner/" ] ], [ - file("${projectDir}/tests/inputs/fgumi/R1.fastq.gz", checkIfExists: true), - file("${projectDir}/tests/inputs/fgumi/R2.fastq.gz", checkIfExists: true) + file("https://github.com/nf-cmgg/test-datasets/raw/preprocessing/data/genomics/homo_sapiens/illumina/fastq/sample1_R1.fastq.gz", checkIfExists: true), + file("https://github.com/nf-cmgg/test-datasets/raw/preprocessing/data/genomics/homo_sapiens/illumina/fastq/sample1_R2.fastq.gz", checkIfExists: true) ], "snap", - file("${projectDir}/tests/inputs/fgumi/snap_index", checkIfExists: true), - file("${projectDir}/tests/inputs/fgumi/ref.fa", checkIfExists: true) + file("s3://reference-data/genomes/Hsapiens/GRCh38/snapaligner/", checkIfExists: true), + file("s3://test-data/genomics/homo_sapiens/genome/seq/GCA_000001405.15_GRCh38_full_plus_hs38d1_analysis_set_chr21.fna", checkIfExists: true) ]) """ } From 74d5c8496408296086679014acd40e12168c5688 Mon Sep 17 00:00:00 2001 From: freerkvandijk Date: Thu, 16 Apr 2026 09:22:50 +0200 Subject: [PATCH 25/36] Updated fgumi to version 0.1.3 --- modules/local/fgumi/snapzippersort/main.nf | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/local/fgumi/snapzippersort/main.nf b/modules/local/fgumi/snapzippersort/main.nf index 0c6ec63b..0af50978 100644 --- a/modules/local/fgumi/snapzippersort/main.nf +++ b/modules/local/fgumi/snapzippersort/main.nf @@ -3,8 +3,8 @@ process FGUMI_SNAP_ZIPPER_SORT { label 'process_high' container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container - ? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/24/2466270633749543330f352e34588f142de4988585ce63e7f22ee5ed1ff57450/data' - : 'community.wave.seqera.io/library/fgumi_samtools_snap-aligner:c9ba911435350668'}" + ? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/260799863489814407983695270f20538a7c28a25c1a14f4477c44e9955743b1/data' + : 'community.wave.seqera.io/library/fgumi_samtools_snap-aligner:fe040922c66ac98d'}" input: tuple val(meta), path(unmapped_bam), path(index, stageAs: "index/*"), path(fasta), path(dict) From 16edc5be5ecfc730a9f0d0b485f92ced5870a50b Mon Sep 17 00:00:00 2001 From: freerkvandijk Date: Thu, 23 Apr 2026 15:15:55 +0200 Subject: [PATCH 26/36] Updated docs + fgumi v0.2.0 --- docs/README.md | 2 + docs/analysis_flow.md | 68 ++++++++++++++++++++++ docs/usage.md | 4 +- modules/local/fgumi/filter/main.nf | 4 +- modules/local/fgumi/group/main.nf | 4 +- modules/local/fgumi/simplex/main.nf | 4 +- modules/local/fgumi/snapzippersort/main.nf | 4 +- modules/local/fgumi/sort/main.nf | 4 +- 8 files changed, 83 insertions(+), 11 deletions(-) create mode 100644 docs/analysis_flow.md diff --git a/docs/README.md b/docs/README.md index 78d3e752..f255a916 100644 --- a/docs/README.md +++ b/docs/README.md @@ -4,6 +4,8 @@ The nf-cmgg/preprocessing documentation is split into the following pages: - [Usage](usage.md) - An overview of how the pipeline works, how to run it and a description of all of the different command-line flags. +- [Analysis flow](analysis_flow.md) + - Mermaid overview of the current fgumi-aware analysis path and integration into the FASTQ to CRAM workflow. - [Output](output.md) - An overview of the different results produced by the pipeline and how to interpret them. - [Parameters](parameters.md) diff --git a/docs/analysis_flow.md b/docs/analysis_flow.md new file mode 100644 index 00000000..bc311173 --- /dev/null +++ b/docs/analysis_flow.md @@ -0,0 +1,68 @@ +# nf-cmgg/preprocessing: Analysis Flow + +## FGUMI-aware DNA flow (new branch) + +The diagram below summarizes the current fgumi-aware branch and how it joins the common FASTQ to CRAM path. +It reflects the current wiring in `FASTQ_TO_CRAM` and `UMI_CONSENSUS_FGUMI`, including the early branching and data joins. + +```mermaid +flowchart TD + A[Input channel: meta + reads + aligner + index + fasta + gtf] --> B{sample_type} + B -->|RNA| R1[FASTQ_ALIGN_RNA] + B -->|DNA| C{meta.fgumi_aware == true} + + C -->|false| D1[FASTQ_ALIGN_DNA non-UMI] + C -->|true| U0[Enter UMI_CONSENSUS_FGUMI] + + subgraph UMI_CONSENSUS_FGUMI + U1[Step 1: FGUMI_EXTRACT\n(reads -> unmapped BAM with UMI tags)] + U1J[Join with reference assets\nSNAP index + fasta + dict from meta.genome_data] + U2[Step 3: FGUMI_SNAP_ZIPPER_SORT] + U2a[samtools sort -n\nunmapped BAM] + U2b[fgumi fastq] + U2c[snap-aligner paired] + U2d[samtools sort -n\npost-SNAP] + U2e[fgumi zipper] + U2f[fgumi sort --order template-coordinate] + U3[Step 4: FGUMI_GROUP] + U4[Step 5: FGUMI_SIMPLEX] + U5J[Join simplex BAM with fasta] + U5[Step 7a: FGUMI_FILTER] + U6[Step 7b: FGUMI_SORT\ncoordinate sort + index] + + U1 --> U1J --> U2 + U2 --> U2a --> U2b --> U2c --> U2d --> U2e --> U2f + U2f --> U3 --> U4 --> U5J --> U5 --> U6 + end + + D1 --> M1[Markdup branch selector\nbamsormadup | samtools | sort] + R1 --> M1 + + U6 --> MIX1[Mix UMI BAM/BAI into common postprocess stream] + U3 --> MET1[grouping_metrics] + U3 --> MET2[family_size_histogram] + U4 --> MET3[consensus_metrics] + U5 --> MET4[filtering_metrics] + U6 --> MET5[filtered_consensus_bam] + + M1 --> P1[BIOBAMBAM_BAMSORMADUP or SAMTOOLS_SORMADUP or SAMTOOLS_SORT] + P1 --> COMP{bam or cram} + MIX1 --> COMP + + COMP -->|bam| CVT[SAMTOOLS_CONVERT to CRAM] + COMP -->|cram| OUT1[cram + crai] + CVT --> OUT1 + + OUT1 --> E1[emit: cram_crai] + MET1 --> E2[emit: sormadup_metrics] + MET2 --> E3[emit: family_size_histogram] + MET5 --> E4[emit: filtered_consensus_bam] +``` + +## Notes + +- The fgumi branch is opt-in per sample via fgumi_aware. + - Step numbering mirrors the implementation comments: steps 1, 3, 4, 5, and 7 are executed in this branch. + - The "step 2" nomenclature from upstream fgumi Basic Workflow is intentionally absent in this pipeline path. +- UMI-specific metrics are emitted and mixed into the common reporting stream. +- The output still converges to the shared CRAM/crai downstream path. diff --git a/docs/usage.md b/docs/usage.md index 7f3055af..61755d15 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -1,6 +1,8 @@ # nf-cmgg/preprocessing: Usage -Parameter documentation can be found [here](parameters.md) +Parameter documentation can be found in the [pipeline parameters reference](parameters.md). + +The current fgumi-aware branch and overall integration into the FASTQ to CRAM flow are documented in the [analysis flow diagram](analysis_flow.md). ## Introduction diff --git a/modules/local/fgumi/filter/main.nf b/modules/local/fgumi/filter/main.nf index b63752da..206e1f6a 100644 --- a/modules/local/fgumi/filter/main.nf +++ b/modules/local/fgumi/filter/main.nf @@ -3,8 +3,8 @@ process FGUMI_FILTER { label 'process_medium' container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container - ? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/95/954170443a820787c9e02ef2135ebb8ec29c6b03633b0d61b5fafa98c59a1cce/data' - : 'community.wave.seqera.io/library/fgumi_r-base_r-ggplot2_r-scales:09c99070b82c1c28'}" + ? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/f7267104b209869695781a5f4585c490a61250269f8c6f14068535a3962b865a/data' + : 'community.wave.seqera.io/library/fgumi:0.2.0--fe028e7a64e5da27'}" input: tuple val(meta), path(bam), path(fasta) diff --git a/modules/local/fgumi/group/main.nf b/modules/local/fgumi/group/main.nf index d079db7f..dd93464b 100644 --- a/modules/local/fgumi/group/main.nf +++ b/modules/local/fgumi/group/main.nf @@ -3,8 +3,8 @@ process FGUMI_GROUP { label 'process_medium' container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container - ? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/95/954170443a820787c9e02ef2135ebb8ec29c6b03633b0d61b5fafa98c59a1cce/data' - : 'community.wave.seqera.io/library/fgumi_r-base_r-ggplot2_r-scales:09c99070b82c1c28'}" + ? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/f7267104b209869695781a5f4585c490a61250269f8c6f14068535a3962b865a/data' + : 'community.wave.seqera.io/library/fgumi:0.2.0--fe028e7a64e5da27'}" input: tuple val(meta), path(bam) diff --git a/modules/local/fgumi/simplex/main.nf b/modules/local/fgumi/simplex/main.nf index 81dfc29a..fd1db692 100644 --- a/modules/local/fgumi/simplex/main.nf +++ b/modules/local/fgumi/simplex/main.nf @@ -3,8 +3,8 @@ process FGUMI_SIMPLEX { label 'process_medium' container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container - ? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/95/954170443a820787c9e02ef2135ebb8ec29c6b03633b0d61b5fafa98c59a1cce/data' - : 'community.wave.seqera.io/library/fgumi_r-base_r-ggplot2_r-scales:09c99070b82c1c28'}" + ? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/f7267104b209869695781a5f4585c490a61250269f8c6f14068535a3962b865a/data' + : 'community.wave.seqera.io/library/fgumi:0.2.0--fe028e7a64e5da27'}" input: tuple val(meta), path(bam) diff --git a/modules/local/fgumi/snapzippersort/main.nf b/modules/local/fgumi/snapzippersort/main.nf index 0af50978..bdfe9bed 100644 --- a/modules/local/fgumi/snapzippersort/main.nf +++ b/modules/local/fgumi/snapzippersort/main.nf @@ -3,8 +3,8 @@ process FGUMI_SNAP_ZIPPER_SORT { label 'process_high' container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container - ? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/260799863489814407983695270f20538a7c28a25c1a14f4477c44e9955743b1/data' - : 'community.wave.seqera.io/library/fgumi_samtools_snap-aligner:fe040922c66ac98d'}" + ? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/763a833519c23555be888065f492215f57344155106972e272a0f8df78c57659/data' + : 'community.wave.seqera.io/library/fgumi_samtools_snap-aligner:c985f9394623a414'}" input: tuple val(meta), path(unmapped_bam), path(index, stageAs: "index/*"), path(fasta), path(dict) diff --git a/modules/local/fgumi/sort/main.nf b/modules/local/fgumi/sort/main.nf index abfbf633..a7963fbf 100644 --- a/modules/local/fgumi/sort/main.nf +++ b/modules/local/fgumi/sort/main.nf @@ -3,8 +3,8 @@ process FGUMI_SORT { label 'process_medium' container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container - ? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/95/954170443a820787c9e02ef2135ebb8ec29c6b03633b0d61b5fafa98c59a1cce/data' - : 'community.wave.seqera.io/library/fgumi_r-base_r-ggplot2_r-scales:09c99070b82c1c28'}" + ? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/f7267104b209869695781a5f4585c490a61250269f8c6f14068535a3962b865a/data' + : 'community.wave.seqera.io/library/fgumi:0.2.0--fe028e7a64e5da27'}" input: tuple val(meta), path(bam) From 9be90133002e076386dc87349656685b49a33b26 Mon Sep 17 00:00:00 2001 From: freerkvandijk Date: Tue, 12 May 2026 15:03:29 +0200 Subject: [PATCH 27/36] Changed fgumi sort to samtools sort CRAM output --- conf/modules.config | 26 +++++++++++++++++-- docs/analysis_flow.md | 8 +++--- main.nf | 8 +++--- .../local/fastq_to_aligned_cram/main.nf | 6 ++--- subworkflows/local/umi_consensus/main.nf | 14 +++++++--- .../fgumi_umi_stub.nf.test | 2 +- .../fgumi_umi_stub.nf.test.snap | 4 +-- .../local/umi_consensus/main.nf.test | 4 +-- .../local/umi_consensus/main.nf.test.snap | 10 +++---- workflows/preprocessing.nf | 2 +- 10 files changed, 56 insertions(+), 28 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index 66309529..fc9d7703 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -238,8 +238,16 @@ process { ext.prefix = { "${meta.id}.fgumi.unmapped" } ext.args = { [ - "--sample \"${meta.id}\"", - "--library \"${meta.library ?: meta.id}\"", + "--read-group-id ${meta.readgroup?.ID ?: meta.id}", + "--sample \"${meta.readgroup?.SM ?: meta.id}\"", + "--library \"${meta.readgroup?.LB ?: meta.library ?: meta.id}\"", + meta.readgroup?.PL ? "--platform ${meta.readgroup.PL}" : "", + meta.readgroup?.PU ? "--platform-unit \"${meta.readgroup.PU}\"" : "", + meta.readgroup?.PM ? "--platform-model \"${meta.readgroup.PM}\"" : "", + meta.readgroup?.CN ? "--sequencing-center \"${meta.readgroup.CN}\"" : "", + meta.readgroup?.PI ? "--predicted-insert-size ${meta.readgroup.PI}" : "", + meta.readgroup?.DS ? "--description \"${meta.readgroup.DS}\"" : "", + meta.readgroup?.DT ? "--run-date \"${meta.readgroup.DT}\"" : "", "--read-structures ${meta.fgumi_read_structures ?: '+T +T'}", ((meta.fgumi_extract_umis_from_read_names != null ? meta.fgumi_extract_umis_from_read_names : true) ? "--extract-umis-from-read-names" : ""), "--compression-level ${params.fgumi_compression_level}", @@ -352,6 +360,20 @@ process { } } + //// UMI step 7b now uses samtools sort to produce CRAM directly. + withName: '.*UMI_CONSENSUS_FGUMI:SAMTOOLS_SORT' { + cpus = 8 + memory = 32.GB + ext.prefix = { "${meta.id}.fgumi.filter" } + ext.args = { + [ + "--write-index", + "--output-fmt cram,version=3.0", + "--output-fmt-option archive", + ].join(" ").trim() + } + } + // coverage //// Mosdepth withName: '.*COVERAGE:MOSDEPTH' { diff --git a/docs/analysis_flow.md b/docs/analysis_flow.md index bc311173..1aea1fb7 100644 --- a/docs/analysis_flow.md +++ b/docs/analysis_flow.md @@ -28,7 +28,7 @@ flowchart TD U4[Step 5: FGUMI_SIMPLEX] U5J[Join simplex BAM with fasta] U5[Step 7a: FGUMI_FILTER] - U6[Step 7b: FGUMI_SORT\ncoordinate sort + index] + U6[Step 7b: SAMTOOLS_SORT\ncoordinate sort + CRAM index] U1 --> U1J --> U2 U2 --> U2a --> U2b --> U2c --> U2d --> U2e --> U2f @@ -38,12 +38,12 @@ flowchart TD D1 --> M1[Markdup branch selector\nbamsormadup | samtools | sort] R1 --> M1 - U6 --> MIX1[Mix UMI BAM/BAI into common postprocess stream] + U6 --> MIX1[Mix UMI CRAM/CRAI into common postprocess stream] U3 --> MET1[grouping_metrics] U3 --> MET2[family_size_histogram] U4 --> MET3[consensus_metrics] U5 --> MET4[filtering_metrics] - U6 --> MET5[filtered_consensus_bam] + U6 --> MET5[filtered_consensus_cram] M1 --> P1[BIOBAMBAM_BAMSORMADUP or SAMTOOLS_SORMADUP or SAMTOOLS_SORT] P1 --> COMP{bam or cram} @@ -56,7 +56,7 @@ flowchart TD OUT1 --> E1[emit: cram_crai] MET1 --> E2[emit: sormadup_metrics] MET2 --> E3[emit: family_size_histogram] - MET5 --> E4[emit: filtered_consensus_bam] + MET5 --> E4[emit: filtered_consensus_cram] ``` ## Notes diff --git a/main.nf b/main.nf index 76c0f2ac..7f9d4e24 100644 --- a/main.nf +++ b/main.nf @@ -180,7 +180,7 @@ workflow { sormadup_metrics = PREPROCESSING.out.sormadup_metrics // Additional UMI consensus outputs. family_size_histogram = PREPROCESSING.out.family_size_histogram - umi_filtered_consensus_bam = PREPROCESSING.out.umi_filtered_consensus_bam + umi_filtered_consensus_cram = PREPROCESSING.out.umi_filtered_consensus_cram mosdepth_global = PREPROCESSING.out.mosdepth_global mosdepth_summary = PREPROCESSING.out.mosdepth_summary mosdepth_regions = PREPROCESSING.out.mosdepth_regions @@ -284,9 +284,9 @@ output { histogram >> (meta.library ? "${meta.library}/${meta.samplename}/${histogram.name}" : "${meta.samplename}/${histogram.name}") } } - umi_filtered_consensus_bam { - path { meta, bam -> - bam >> (meta.library ? "${meta.library}/${meta.samplename}/${bam.name}" : "${meta.samplename}/${bam.name}") + umi_filtered_consensus_cram { + path { meta, cram -> + cram >> (meta.library ? "${meta.library}/${meta.samplename}/${cram.name}" : "${meta.samplename}/${cram.name}") } } mosdepth_global { diff --git a/subworkflows/local/fastq_to_aligned_cram/main.nf b/subworkflows/local/fastq_to_aligned_cram/main.nf index 5a05e476..e3d0fc9e 100644 --- a/subworkflows/local/fastq_to_aligned_cram/main.nf +++ b/subworkflows/local/fastq_to_aligned_cram/main.nf @@ -118,14 +118,14 @@ workflow FASTQ_TO_CRAM { // UMI branch outputs are mixed into the common markdup/metrics streams. ch_markdup_index = ch_markdup_index.mix( - UMI_CONSENSUS_FGUMI.out.bam_bai + UMI_CONSENSUS_FGUMI.out.cram_crai ) ch_sormadup_metrics = ch_sormadup_metrics.mix(UMI_CONSENSUS_FGUMI.out.grouping_metrics) ch_sormadup_metrics = ch_sormadup_metrics.mix(UMI_CONSENSUS_FGUMI.out.family_size_histogram) ch_sormadup_metrics = ch_sormadup_metrics.mix(UMI_CONSENSUS_FGUMI.out.consensus_metrics) ch_sormadup_metrics = ch_sormadup_metrics.mix(UMI_CONSENSUS_FGUMI.out.filtering_metrics) ch_family_size_histogram = UMI_CONSENSUS_FGUMI.out.family_size_histogram - ch_filtered_consensus_bam = UMI_CONSENSUS_FGUMI.out.filtered_consensus_bam + ch_filtered_consensus_cram = UMI_CONSENSUS_FGUMI.out.filtered_consensus_cram // BIOBAMBAM_BAMSORMADUP([meta, [bam, bam]], fasta, fai) BIOBAMBAM_BAMSORMADUP(ch_bam_fasta_fai.bamsormadup) @@ -177,7 +177,7 @@ workflow FASTQ_TO_CRAM { emit: cram_crai = ch_cram_crai // UMI-specific output channels for downstream reporting and publishing. - filtered_consensus_bam = ch_filtered_consensus_bam + filtered_consensus_cram = ch_filtered_consensus_cram rna_splice_junctions = FASTQ_ALIGN_RNA.out.splice_junctions rna_junctions = FASTQ_ALIGN_RNA.out.junctions sormadup_metrics = ch_sormadup_metrics diff --git a/subworkflows/local/umi_consensus/main.nf b/subworkflows/local/umi_consensus/main.nf index 476df3e2..b26fcf79 100644 --- a/subworkflows/local/umi_consensus/main.nf +++ b/subworkflows/local/umi_consensus/main.nf @@ -6,7 +6,7 @@ include { FGUMI_FILTER } from "../../../modules/local/fgumi/filter/mai include { FGUMI_GROUP } from "../../../modules/local/fgumi/group/main.nf" include { FGUMI_SIMPLEX } from "../../../modules/local/fgumi/simplex/main.nf" include { FGUMI_SNAP_ZIPPER_SORT } from "../../../modules/local/fgumi/snapzippersort/main.nf" -include { FGUMI_SORT } from "../../../modules/local/fgumi/sort/main.nf" +include { SAMTOOLS_SORT } from "../../../modules/nf-core/samtools/sort/main.nf" // FUNCTIONS include { getGenomeAttribute } from '../../local/utils_nfcore_preprocessing_pipeline' @@ -52,15 +52,21 @@ workflow UMI_CONSENSUS_FGUMI { .map { meta, bam, fasta -> [meta, bam, fasta] } ) - FGUMI_SORT( + SAMTOOLS_SORT( FGUMI_FILTER.out.bam + .join( + ch_meta_reads_aligner_index_fasta.map { meta, _reads, _aligner, _index, fasta -> [meta, fasta] }, + by: 0, + ) + .map { meta, bam, fasta -> [meta, bam, fasta] }, + "crai" ) emit: - bam_bai = FGUMI_SORT.out.bam.join(FGUMI_SORT.out.bai, failOnMismatch: true, failOnDuplicate: true) + cram_crai = SAMTOOLS_SORT.out.cram.join(SAMTOOLS_SORT.out.crai, failOnMismatch: true, failOnDuplicate: true) grouping_metrics = FGUMI_GROUP.out.grouping_metrics family_size_histogram = FGUMI_GROUP.out.family_size_histogram consensus_metrics = FGUMI_SIMPLEX.out.consensus_metrics filtering_metrics = FGUMI_FILTER.out.filtering_metrics - filtered_consensus_bam = FGUMI_SORT.out.bam + filtered_consensus_cram = SAMTOOLS_SORT.out.cram } diff --git a/tests/subworkflows/local/fastq_to_aligned_cram/fgumi_umi_stub.nf.test b/tests/subworkflows/local/fastq_to_aligned_cram/fgumi_umi_stub.nf.test index c03f41e3..53aa2463 100644 --- a/tests/subworkflows/local/fastq_to_aligned_cram/fgumi_umi_stub.nf.test +++ b/tests/subworkflows/local/fastq_to_aligned_cram/fgumi_umi_stub.nf.test @@ -47,7 +47,7 @@ nextflow_workflow { assert workflow.success // Explicitly assert newly exposed UMI channels. assert workflow.out.family_size_histogram.size() == 1 - assert workflow.out.filtered_consensus_bam.size() == 1 + assert workflow.out.filtered_consensus_cram.size() == 1 assert snapshot( sanitizeOutput(workflow.out, unstableKeys:["cram_crai"]) ).match() diff --git a/tests/subworkflows/local/fastq_to_aligned_cram/fgumi_umi_stub.nf.test.snap b/tests/subworkflows/local/fastq_to_aligned_cram/fgumi_umi_stub.nf.test.snap index 55035830..a247c372 100644 --- a/tests/subworkflows/local/fastq_to_aligned_cram/fgumi_umi_stub.nf.test.snap +++ b/tests/subworkflows/local/fastq_to_aligned_cram/fgumi_umi_stub.nf.test.snap @@ -46,7 +46,7 @@ "UMI_consensus1.fgumi.group.family_size_histogram.txt:md5,d41d8cd98f00b204e9800998ecf8427e" ] ], - "filtered_consensus_bam": [ + "filtered_consensus_cram": [ [ { "id": "UMI_consensus1", @@ -64,7 +64,7 @@ ] } }, - "UMI_consensus1.fgumi.filter.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + "UMI_consensus1.fgumi.filter.cram:md5,d41d8cd98f00b204e9800998ecf8427e" ] ], "rna_junctions": [ diff --git a/tests/subworkflows/local/umi_consensus/main.nf.test b/tests/subworkflows/local/umi_consensus/main.nf.test index 7135f5db..f0648c8f 100644 --- a/tests/subworkflows/local/umi_consensus/main.nf.test +++ b/tests/subworkflows/local/umi_consensus/main.nf.test @@ -43,12 +43,12 @@ nextflow_workflow { then { assert workflow.success - assert workflow.out.bam_bai.size() == 1 + assert workflow.out.cram_crai.size() == 1 assert workflow.out.grouping_metrics.size() == 1 assert workflow.out.family_size_histogram.size() == 1 assert workflow.out.consensus_metrics.size() == 1 assert workflow.out.filtering_metrics.size() == 1 - assert workflow.out.filtered_consensus_bam.size() == 1 + assert workflow.out.filtered_consensus_cram.size() == 1 assert snapshot(workflow.out).match() } } diff --git a/tests/subworkflows/local/umi_consensus/main.nf.test.snap b/tests/subworkflows/local/umi_consensus/main.nf.test.snap index 9dbc33d6..171cbd2c 100644 --- a/tests/subworkflows/local/umi_consensus/main.nf.test.snap +++ b/tests/subworkflows/local/umi_consensus/main.nf.test.snap @@ -129,7 +129,7 @@ "UMI_consensus1.fgumi.filter.bam:md5,d41d8cd98f00b204e9800998ecf8427e" ] ], - "bam_bai": [ + "cram_crai": [ [ { "id": "UMI_consensus1", @@ -147,8 +147,8 @@ ] } }, - "UMI_consensus1.fgumi.filter.bam:md5,d41d8cd98f00b204e9800998ecf8427e", - "UMI_consensus1.fgumi.filter.bam.bai:md5,d41d8cd98f00b204e9800998ecf8427e" + "UMI_consensus1.fgumi.filter.cram:md5,d41d8cd98f00b204e9800998ecf8427e", + "UMI_consensus1.fgumi.filter.cram.crai:md5,d41d8cd98f00b204e9800998ecf8427e" ] ], "consensus_metrics": [ @@ -193,7 +193,7 @@ "UMI_consensus1.fgumi.group.family_size_histogram.txt:md5,d41d8cd98f00b204e9800998ecf8427e" ] ], - "filtered_consensus_bam": [ + "filtered_consensus_cram": [ [ { "id": "UMI_consensus1", @@ -211,7 +211,7 @@ ] } }, - "UMI_consensus1.fgumi.filter.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + "UMI_consensus1.fgumi.filter.cram:md5,d41d8cd98f00b204e9800998ecf8427e" ] ], "filtering_metrics": [ diff --git a/workflows/preprocessing.nf b/workflows/preprocessing.nf index 5a335908..2065eff2 100644 --- a/workflows/preprocessing.nf +++ b/workflows/preprocessing.nf @@ -436,7 +436,7 @@ workflow PREPROCESSING { sormadup_metrics = FASTQ_TO_CRAM.out.sormadup_metrics // UMI-specific outputs exposed at workflow level. family_size_histogram = FASTQ_TO_CRAM.out.family_size_histogram - umi_filtered_consensus_bam = FASTQ_TO_CRAM.out.filtered_consensus_bam + umi_filtered_consensus_cram = FASTQ_TO_CRAM.out.filtered_consensus_cram mosdepth_global = COVERAGE.out.mosdepth_global mosdepth_summary = COVERAGE.out.mosdepth_summary mosdepth_regions = COVERAGE.out.mosdepth_regions From 9d6690fd76280b12127ac982fb6c2682a2360723 Mon Sep 17 00:00:00 2001 From: freerkvandijk Date: Wed, 17 Jun 2026 16:14:56 +0200 Subject: [PATCH 28/36] Changed local fgumi modules to nf-core defaults --- conf/modules.config | 15 +- modules.json | 25 +++ modules/local/fgumi/snapzippersort/main.nf | 34 ++-- modules/nf-core/fgumi/extract/environment.yml | 7 + modules/nf-core/fgumi/extract/main.nf | 38 +++++ modules/nf-core/fgumi/extract/meta.yml | 70 ++++++++ .../nf-core/fgumi/extract/tests/main.nf.test | 73 +++++++++ .../fgumi/extract/tests/main.nf.test.snap | 54 ++++++ .../fgumi/extract/tests/nextflow.config | 5 + modules/nf-core/fgumi/filter/environment.yml | 7 + modules/nf-core/fgumi/filter/main.nf | 56 +++++++ modules/nf-core/fgumi/filter/meta.yml | 108 ++++++++++++ .../nf-core/fgumi/filter/tests/main.nf.test | 103 ++++++++++++ .../fgumi/filter/tests/main.nf.test.snap | 76 +++++++++ .../fgumi/filter/tests/nextflow.config | 5 + modules/nf-core/fgumi/group/environment.yml | 7 + modules/nf-core/fgumi/group/main.nf | 51 ++++++ modules/nf-core/fgumi/group/meta.yml | 100 ++++++++++++ .../nf-core/fgumi/group/tests/main.nf.test | 74 +++++++++ .../fgumi/group/tests/main.nf.test.snap | 86 ++++++++++ .../nf-core/fgumi/group/tests/nextflow.config | 5 + modules/nf-core/fgumi/simplex/environment.yml | 7 + modules/nf-core/fgumi/simplex/main.nf | 54 ++++++ modules/nf-core/fgumi/simplex/meta.yml | 98 +++++++++++ .../nf-core/fgumi/simplex/tests/main.nf.test | 108 ++++++++++++ .../fgumi/simplex/tests/main.nf.test.snap | 154 ++++++++++++++++++ .../fgumi/simplex/tests/nextflow.config | 5 + modules/nf-core/fgumi/sort/environment.yml | 7 + modules/nf-core/fgumi/sort/main.nf | 45 +++++ modules/nf-core/fgumi/sort/meta.yml | 82 ++++++++++ modules/nf-core/fgumi/sort/tests/main.nf.test | 89 ++++++++++ .../fgumi/sort/tests/main.nf.test.snap | 87 ++++++++++ .../nf-core/fgumi/sort/tests/nextflow.config | 5 + .../local/fastq_to_aligned_cram/main.nf | 2 + subworkflows/local/umi_consensus/main.nf | 30 ++-- workflows/preprocessing.nf | 1 + 36 files changed, 1726 insertions(+), 47 deletions(-) create mode 100644 modules/nf-core/fgumi/extract/environment.yml create mode 100644 modules/nf-core/fgumi/extract/main.nf create mode 100644 modules/nf-core/fgumi/extract/meta.yml create mode 100644 modules/nf-core/fgumi/extract/tests/main.nf.test create mode 100644 modules/nf-core/fgumi/extract/tests/main.nf.test.snap create mode 100644 modules/nf-core/fgumi/extract/tests/nextflow.config create mode 100644 modules/nf-core/fgumi/filter/environment.yml create mode 100644 modules/nf-core/fgumi/filter/main.nf create mode 100644 modules/nf-core/fgumi/filter/meta.yml create mode 100644 modules/nf-core/fgumi/filter/tests/main.nf.test create mode 100644 modules/nf-core/fgumi/filter/tests/main.nf.test.snap create mode 100644 modules/nf-core/fgumi/filter/tests/nextflow.config create mode 100644 modules/nf-core/fgumi/group/environment.yml create mode 100644 modules/nf-core/fgumi/group/main.nf create mode 100644 modules/nf-core/fgumi/group/meta.yml create mode 100644 modules/nf-core/fgumi/group/tests/main.nf.test create mode 100644 modules/nf-core/fgumi/group/tests/main.nf.test.snap create mode 100644 modules/nf-core/fgumi/group/tests/nextflow.config create mode 100644 modules/nf-core/fgumi/simplex/environment.yml create mode 100644 modules/nf-core/fgumi/simplex/main.nf create mode 100644 modules/nf-core/fgumi/simplex/meta.yml create mode 100644 modules/nf-core/fgumi/simplex/tests/main.nf.test create mode 100644 modules/nf-core/fgumi/simplex/tests/main.nf.test.snap create mode 100644 modules/nf-core/fgumi/simplex/tests/nextflow.config create mode 100644 modules/nf-core/fgumi/sort/environment.yml create mode 100644 modules/nf-core/fgumi/sort/main.nf create mode 100644 modules/nf-core/fgumi/sort/meta.yml create mode 100644 modules/nf-core/fgumi/sort/tests/main.nf.test create mode 100644 modules/nf-core/fgumi/sort/tests/main.nf.test.snap create mode 100644 modules/nf-core/fgumi/sort/tests/nextflow.config diff --git a/conf/modules.config b/conf/modules.config index fc9d7703..ec37a9c3 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -239,8 +239,6 @@ process { ext.args = { [ "--read-group-id ${meta.readgroup?.ID ?: meta.id}", - "--sample \"${meta.readgroup?.SM ?: meta.id}\"", - "--library \"${meta.readgroup?.LB ?: meta.library ?: meta.id}\"", meta.readgroup?.PL ? "--platform ${meta.readgroup.PL}" : "", meta.readgroup?.PU ? "--platform-unit \"${meta.readgroup.PU}\"" : "", meta.readgroup?.PM ? "--platform-model \"${meta.readgroup.PM}\"" : "", @@ -305,11 +303,8 @@ process { ext.prefix = { "${meta.id}.fgumi.group" } ext.args = { [ - "--strategy ${meta.fgumi_group_strategy ?: 'adjacency'}", "--edits ${meta.fgumi_group_edits != null ? meta.fgumi_group_edits : 1}", "--compression-level ${meta.fgumi_compression_level != null ? meta.fgumi_compression_level : 1}", - "--grouping-metrics ${meta.id}.fgumi.group.grouping_metrics.txt", - "--family-size-histogram ${meta.id}.fgumi.group.family_size_histogram.txt", ].join(" ").trim() } } @@ -321,12 +316,9 @@ process { ext.prefix = { "${meta.id}.fgumi.simplex" } ext.args = { [ - "--min-reads ${params.fgumi_simplex_min_reads}", - "--threads ${task.cpus}", "--queue-memory ${params.fgumi_queue_memory}", params.fgumi_queue_memory_per_thread ? "--queue-memory-per-thread" : "", "--compression-level ${params.fgumi_compression_level}", - "--stats ${meta.id}.fgumi.simplex.consensus_metrics.txt", ].join(" ").trim() } } @@ -336,12 +328,7 @@ process { cpus = 4 memory = 16.GB ext.prefix = { "${meta.id}.fgumi.filter" } - ext.args = { - [ - "--min-reads 1,1,1", - "--stats ${meta.id}.fgumi.filter.filtering_metrics.txt", - ].join(" ").trim() - } + ext.args = '' } //// FGUMI coordinate sort/index after filter (step 7) diff --git a/modules.json b/modules.json index 328d7a78..691ef5e7 100644 --- a/modules.json +++ b/modules.json @@ -50,6 +50,31 @@ "git_sha": "6d46786420b4d7bc88eba026eb389c0c5535d120", "installed_by": ["modules"] }, + "fgumi/extract": { + "branch": "master", + "git_sha": "52e5a02f344a873b1dc133fa1c4602f639e252c4", + "installed_by": ["modules"] + }, + "fgumi/filter": { + "branch": "master", + "git_sha": "4d10fe71e9f35f6cad3649a77ab58c59cc3bfc4d", + "installed_by": ["modules"] + }, + "fgumi/group": { + "branch": "master", + "git_sha": "4d10fe71e9f35f6cad3649a77ab58c59cc3bfc4d", + "installed_by": ["modules"] + }, + "fgumi/simplex": { + "branch": "master", + "git_sha": "4d10fe71e9f35f6cad3649a77ab58c59cc3bfc4d", + "installed_by": ["modules"] + }, + "fgumi/sort": { + "branch": "master", + "git_sha": "b5bd7ec03fe7cad8f8ab319c2f9881b256d055c6", + "installed_by": ["modules"] + }, "gnu/sort": { "branch": "master", "git_sha": "6d46786420b4d7bc88eba026eb389c0c5535d120", diff --git a/modules/local/fgumi/snapzippersort/main.nf b/modules/local/fgumi/snapzippersort/main.nf index bdfe9bed..efc05276 100644 --- a/modules/local/fgumi/snapzippersort/main.nf +++ b/modules/local/fgumi/snapzippersort/main.nf @@ -23,36 +23,28 @@ process FGUMI_SNAP_ZIPPER_SORT { prefix = task.ext.prefix ?: "${meta.id}.fgumi" """ - INDEX=`dirname \$(find -L ./ -name "OverflowTable*" | head -n1)` - [ -z "\$INDEX" ] && echo "Snap index files not found" 1>&2 && exit 1 + INDEX_FILE=\$(find -L ./ -name "OverflowTable*" -print -quit) + [ -z "\$INDEX_FILE" ] && echo "Snap index files not found" 1>&2 && exit 1 + INDEX=\$(dirname "\$INDEX_FILE") - # Ensure zipper and fastq read exactly the same queryname-ordered unmapped stream. - samtools sort \ - -n \ - -@ ${task.cpus} \ - -m 1G \ - -o ${prefix}.unmapped.queryname.bam \ - ${unmapped_bam} - - fgumi fastq --input ${prefix}.unmapped.queryname.bam \ + fgumi fastq --input ${unmapped_bam} \ | snap-aligner paired \ \$INDEX \ -pairedInterleavedFastq - \ -o -sam - \ -t ${task.cpus} \ ${snap_args} \ - | samtools sort \ - -n \ - -@ ${task.cpus} \ - -m 1G \ - -O SAM \ - - \ - | fgumi zipper \ - --unmapped ${prefix}.unmapped.queryname.bam \ + > ${prefix}.snap.sam + + fgumi zipper \ + --unmapped ${unmapped_bam} \ --reference ${fasta} \ ${zipper_args} \ - | fgumi sort \ - --input /dev/stdin \ + --output ${prefix}.zipper.bam \ + < ${prefix}.snap.sam + + fgumi sort \ + --input ${prefix}.zipper.bam \ --output ${prefix}.template.bam \ --order template-coordinate \ ${sort_args} diff --git a/modules/nf-core/fgumi/extract/environment.yml b/modules/nf-core/fgumi/extract/environment.yml new file mode 100644 index 00000000..7960b0e5 --- /dev/null +++ b/modules/nf-core/fgumi/extract/environment.yml @@ -0,0 +1,7 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - "bioconda::fgumi=0.2.0" diff --git a/modules/nf-core/fgumi/extract/main.nf b/modules/nf-core/fgumi/extract/main.nf new file mode 100644 index 00000000..fb17d5ad --- /dev/null +++ b/modules/nf-core/fgumi/extract/main.nf @@ -0,0 +1,38 @@ +process FGUMI_EXTRACT { + tag "${meta.id}" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${workflow.containerEngine in ['singularity', 'apptainer'] && !task.ext.singularity_pull_docker_container + ? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/a5/a510706f3481fae12ff6100d6e4ad298b8bf464a2d93a6afe35e9cf26542d080/data' + : 'community.wave.seqera.io/library/fgumi:0.2.0--fe028e7a64e5da27'}" + + input: + tuple val(meta), path(reads), val(library) + + output: + tuple val(meta), path("*.bam"), emit: bam + tuple val("${task.process}"), val('fgumi'), eval('fgumi --version | sed "s/^fgumi //"'), topic: versions, emit: versions_fgumi + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + + """ + fgumi extract \\ + --inputs ${reads.join(' ')} \\ + --output ${prefix}.bam \\ + ${args} \\ + --sample ${prefix} \\ + --library "${library}" + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.bam + """ +} diff --git a/modules/nf-core/fgumi/extract/meta.yml b/modules/nf-core/fgumi/extract/meta.yml new file mode 100644 index 00000000..7d62b590 --- /dev/null +++ b/modules/nf-core/fgumi/extract/meta.yml @@ -0,0 +1,70 @@ +name: fgumi_extract +description: Extract unique molecular indices (UMIs) from FASTQ files and write + an unaligned BAM file. +keywords: + - umi + - extract + - fastq + - bam +tools: + - fgumi: + description: High-performance tools for working with UMI-tagged sequencing + data. + homepage: https://github.com/fulcrumgenomics/fgumi + documentation: https://docs.rs/fgumi + tool_dev_url: https://github.com/fulcrumgenomics/fgumi + licence: + - "MIT" + identifier: "" +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - reads: + type: file + description: Input FASTQ files used for UMI extraction. + pattern: "*.fastq.gz" + ontologies: + - edam: http://edamontology.org/format_3989 + - library: + type: string + description: Library name to store in the output BAM read group. +output: + bam: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.bam": + type: file + description: Unaligned BAM with extracted UMIs in SAM tags. + pattern: "*.bam" + ontologies: [] + versions_fgumi: + - - ${task.process}: + type: string + description: The process the versions were collected from + - fgumi: + type: string + description: The tool name + - 'fgumi --version | sed "s/^fgumi //"': + type: eval + description: The expression to obtain the version of the tool +topics: + versions: + - - ${task.process}: + type: string + description: The process the versions were collected from + - fgumi: + type: string + description: The tool name + - 'fgumi --version | sed "s/^fgumi //"': + type: eval + description: The expression to obtain the version of the tool +authors: + - "@atrigila" +maintainers: + - "@atrigila" diff --git a/modules/nf-core/fgumi/extract/tests/main.nf.test b/modules/nf-core/fgumi/extract/tests/main.nf.test new file mode 100644 index 00000000..44ce2572 --- /dev/null +++ b/modules/nf-core/fgumi/extract/tests/main.nf.test @@ -0,0 +1,73 @@ +nextflow_process { + + name "Test Process FGUMI_EXTRACT" + script "../main.nf" + process "FGUMI_EXTRACT" + + tag "modules" + tag "modules_nfcore" + tag "fgumi" + tag "fgumi/extract" + + config "./nextflow.config" + + test("homo_sapiens - [fastq1, fastq2]") { + + when { + params { + module_args = '' + } + process { + """ + input[0] = [ + [ id:'test' ], + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/fastq/test.umi_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/fastq/test.umi_2.fastq.gz', checkIfExists: true) + ], + 'illumina', + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(sanitizeOutput(process.out)).match() } + ) + } + + } + + test("homo_sapiens - [fastq1, fastq2] - stub") { + + options "-stub" + + when { + params { + module_args = "--read-structures +T +M" + } + process { + """ + input[0] = [ + [ id:'test' ], + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/fastq/test.umi_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/fastq/test.umi_2.fastq.gz', checkIfExists: true) + ], + 'test', + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(sanitizeOutput(process.out)).match() } + ) + } + + } +} diff --git a/modules/nf-core/fgumi/extract/tests/main.nf.test.snap b/modules/nf-core/fgumi/extract/tests/main.nf.test.snap new file mode 100644 index 00000000..8c054caf --- /dev/null +++ b/modules/nf-core/fgumi/extract/tests/main.nf.test.snap @@ -0,0 +1,54 @@ +{ + "homo_sapiens - [fastq1, fastq2]": { + "content": [ + { + "bam": [ + [ + { + "id": "test" + }, + "test.bam:md5,9c2476e5c354f57bed109e582b8953be" + ] + ], + "versions_fgumi": [ + [ + "FGUMI_EXTRACT", + "fgumi", + "0.2.0" + ] + ] + } + ], + "timestamp": "2026-05-07T12:48:51.477812468", + "meta": { + "nf-test": "0.9.5", + "nextflow": "26.04.0" + } + }, + "homo_sapiens - [fastq1, fastq2] - stub": { + "content": [ + { + "bam": [ + [ + { + "id": "test" + }, + "test.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions_fgumi": [ + [ + "FGUMI_EXTRACT", + "fgumi", + "0.2.0" + ] + ] + } + ], + "timestamp": "2026-05-07T12:49:21.767581495", + "meta": { + "nf-test": "0.9.5", + "nextflow": "26.04.0" + } + } +} \ No newline at end of file diff --git a/modules/nf-core/fgumi/extract/tests/nextflow.config b/modules/nf-core/fgumi/extract/tests/nextflow.config new file mode 100644 index 00000000..54ef8845 --- /dev/null +++ b/modules/nf-core/fgumi/extract/tests/nextflow.config @@ -0,0 +1,5 @@ +process { + withName: FGUMI_EXTRACT { + ext.args = { "${params.module_args}" } + } +} diff --git a/modules/nf-core/fgumi/filter/environment.yml b/modules/nf-core/fgumi/filter/environment.yml new file mode 100644 index 00000000..7960b0e5 --- /dev/null +++ b/modules/nf-core/fgumi/filter/environment.yml @@ -0,0 +1,7 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - "bioconda::fgumi=0.2.0" diff --git a/modules/nf-core/fgumi/filter/main.nf b/modules/nf-core/fgumi/filter/main.nf new file mode 100644 index 00000000..5f449c34 --- /dev/null +++ b/modules/nf-core/fgumi/filter/main.nf @@ -0,0 +1,56 @@ +process FGUMI_FILTER { + tag "${meta.id}" + label 'process_low' + + conda "${moduleDir}/environment.yml" + container "${workflow.containerEngine in ['singularity', 'apptainer'] && !task.ext.singularity_pull_docker_container + ? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/a5/a510706f3481fae12ff6100d6e4ad298b8bf464a2d93a6afe35e9cf26542d080/data' + : 'community.wave.seqera.io/library/fgumi:0.2.0--fe028e7a64e5da27'}" + + input: + tuple val(meta), path(bam) + tuple val(meta2), path(fasta) + val min_reads + val keep_rejected + + output: + tuple val(meta), path("${prefix}.bam") , emit: bam + tuple val(meta), path("${prefix}.rejects.bam"), emit: rejects, optional: true + tuple val(meta), path("${prefix}.stats.txt") , emit: stats + tuple val("${task.process}"), val('fgumi'), eval('fgumi --version | sed "s/^fgumi //"'), topic: versions, emit: versions_fgumi + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}_consensus_filtered" + def rejects_command = keep_rejected ? "--rejects ${prefix}.rejects.bam" : '' + if ("${bam}" == "${prefix}.bam") { + error("Input and output names are the same, use \"task.ext.prefix\" to disambiguate!") + } + + """ + fgumi filter \\ + --input ${bam} \\ + --output ${prefix}.bam \\ + --ref ${fasta} \\ + --min-reads ${min_reads} \\ + --threads ${task.cpus} \\ + --stats ${prefix}.stats.txt \\ + ${rejects_command} \\ + ${args} + """ + + stub: + prefix = task.ext.prefix ?: "${meta.id}_consensus_filtered" + def rejects_command = keep_rejected ? "touch ${prefix}.rejects.bam" : '' + if ("${bam}" == "${prefix}.bam") { + error("Input and output names are the same, use \"task.ext.prefix\" to disambiguate!") + } + """ + touch ${prefix}.bam + ${rejects_command} + touch ${prefix}.stats.txt + """ +} diff --git a/modules/nf-core/fgumi/filter/meta.yml b/modules/nf-core/fgumi/filter/meta.yml new file mode 100644 index 00000000..b6af0220 --- /dev/null +++ b/modules/nf-core/fgumi/filter/meta.yml @@ -0,0 +1,108 @@ +name: "fgumi_filter" +description: | + Filters consensus reads generated by simplex or duplex consensus calling. + This is a high-performance replacement for fgbio FilterConsensusReads. +keywords: + - umi + - filter + - consensus + - bam +tools: + - "fgumi": + description: "High-performance tools for working with UMI-tagged sequencing data." + homepage: "https://github.com/fulcrumgenomics/fgumi" + documentation: "https://docs.rs/fgumi" + tool_dev_url: "https://github.com/fulcrumgenomics/fgumi" + licence: + - "MIT" + identifier: "" +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: Consensus BAM file to be filtered + pattern: "*.bam" + ontologies: + - edam: "http://edamontology.org/format_2572" + - - meta2: + type: map + description: | + Groovy Map containing genome information + e.g. [ id:'genome' ] + - fasta: + type: file + description: Reference genome FASTA file + pattern: "*.{fa,fasta,fna}" + ontologies: + - edam: "http://edamontology.org/format_1929" + - min_reads: + type: integer + description: Minimum number of reads required to keep a consensus read + - keep_rejected: + type: boolean + description: Whether to keep rejected reads in a separate BAM file +output: + bam: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "${prefix}.bam": + type: file + description: Filtered consensus BAM file + pattern: "*.bam" + ontologies: + - edam: "http://edamontology.org/format_2572" + rejects: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "${prefix}.rejects.bam": + type: file + description: Optional BAM file containing reads that were filtered out + pattern: "*.rejects.bam" + ontologies: + - edam: "http://edamontology.org/format_2572" + stats: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "${prefix}.stats.txt": + type: file + description: Optional text file containing filtering statistics + pattern: "*.stats.txt" + ontologies: [] + versions_fgumi: + - - ${task.process}: + type: string + description: The process the versions were collected from + - fgumi: + type: string + description: The tool name + - 'fgumi --version | sed "s/^fgumi //"': + type: eval + description: The expression to obtain the version of the tool +topics: + versions: + - - ${task.process}: + type: string + description: The process the versions were collected from + - fgumi: + type: string + description: The tool name + - 'fgumi --version | sed "s/^fgumi //"': + type: eval + description: The expression to obtain the version of the tool +authors: + - "@sppearce" +maintainers: + - "@sppearce" diff --git a/modules/nf-core/fgumi/filter/tests/main.nf.test b/modules/nf-core/fgumi/filter/tests/main.nf.test new file mode 100644 index 00000000..26fd52cf --- /dev/null +++ b/modules/nf-core/fgumi/filter/tests/main.nf.test @@ -0,0 +1,103 @@ +nextflow_process { + + name "Test Process FGUMI_FILTER" + script "../main.nf" + process "FGUMI_FILTER" + config "./nextflow.config" + + tag "modules" + tag "modules_nfcore" + tag "fgumi" + tag "fgumi/filter" + tag "fgumi/sort" + tag "fgumi/group" + tag "fgumi/simplex" + + setup { + run("FGUMI_SORT") { + script "../../sort/main.nf" + process { + """ + input[0] = [ + [ id:'test' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/umi/test.paired_end.unsorted_tagged.bam', checkIfExists: true) + ] + """ + } + + } + run("FGUMI_GROUP") { + script "../../group/main.nf" + process { + """ + input[0] = FGUMI_SORT.out.bam + input[1] = 'adjacency' + """ + } + } + run("FGUMI_SIMPLEX") { + script "../../simplex/main.nf" + process { + """ + input[0] = FGUMI_GROUP.out.bam + input[1] = 1 + input[2] = false + """ + } + } + } + + test("homo_sapiens - bam") { + + when { + process { + """ + input[0] = FGUMI_SIMPLEX.out.bam + input[1] = [ + [ id:'genome' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) + ] + input[2] = 1 + input[3] = false + """ + } + } + + then { + assert process.success + assertAll( + // bam file is non deterministic in its output order + { assert snapshot(sanitizeOutput(process.out, unstableKeys: ['bam'])).match() } + ) + } + + } + + test("homo_sapiens - bam - stub") { + + options "-stub" + + when { + process { + """ + input[0] = FGUMI_SIMPLEX.out.bam + input[1] = [ + [ id:'genome' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) + ] + input[2] = 1 + input[3] = false + """ + } + } + + then { + assert process.success + assertAll( + { assert snapshot(sanitizeOutput(process.out)).match() } + ) + } + + } + +} diff --git a/modules/nf-core/fgumi/filter/tests/main.nf.test.snap b/modules/nf-core/fgumi/filter/tests/main.nf.test.snap new file mode 100644 index 00000000..a658d249 --- /dev/null +++ b/modules/nf-core/fgumi/filter/tests/main.nf.test.snap @@ -0,0 +1,76 @@ +{ + "homo_sapiens - bam": { + "content": [ + { + "bam": [ + [ + { + "id": "test" + }, + "test_consensus_filtered.bam" + ] + ], + "rejects": [ + + ], + "stats": [ + [ + { + "id": "test" + }, + "test_consensus_filtered.stats.txt:md5,88f50c970ee78378013d32614dde9b47" + ] + ], + "versions_fgumi": [ + [ + "FGUMI_FILTER", + "fgumi", + "0.2.0" + ] + ] + } + ], + "timestamp": "2026-06-05T09:14:15.950832036", + "meta": { + "nf-test": "0.9.5", + "nextflow": "26.04.0" + } + }, + "homo_sapiens - bam - stub": { + "content": [ + { + "bam": [ + [ + { + "id": "test" + }, + "test_consensus_filtered.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "rejects": [ + + ], + "stats": [ + [ + { + "id": "test" + }, + "test_consensus_filtered.stats.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions_fgumi": [ + [ + "FGUMI_FILTER", + "fgumi", + "0.2.0" + ] + ] + } + ], + "timestamp": "2026-06-05T09:15:57.252685521", + "meta": { + "nf-test": "0.9.5", + "nextflow": "26.04.0" + } + } +} \ No newline at end of file diff --git a/modules/nf-core/fgumi/filter/tests/nextflow.config b/modules/nf-core/fgumi/filter/tests/nextflow.config new file mode 100644 index 00000000..28103edf --- /dev/null +++ b/modules/nf-core/fgumi/filter/tests/nextflow.config @@ -0,0 +1,5 @@ +process { + withName: FGUMI_SORT { + ext.args = '--order template-coordinate' + } +} diff --git a/modules/nf-core/fgumi/group/environment.yml b/modules/nf-core/fgumi/group/environment.yml new file mode 100644 index 00000000..7960b0e5 --- /dev/null +++ b/modules/nf-core/fgumi/group/environment.yml @@ -0,0 +1,7 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - "bioconda::fgumi=0.2.0" diff --git a/modules/nf-core/fgumi/group/main.nf b/modules/nf-core/fgumi/group/main.nf new file mode 100644 index 00000000..0c40b671 --- /dev/null +++ b/modules/nf-core/fgumi/group/main.nf @@ -0,0 +1,51 @@ +process FGUMI_GROUP { + tag "${meta.id}" + label 'process_low' + + conda "${moduleDir}/environment.yml" + container "${workflow.containerEngine in ['singularity', 'apptainer'] && !task.ext.singularity_pull_docker_container + ? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/a5/a510706f3481fae12ff6100d6e4ad298b8bf464a2d93a6afe35e9cf26542d080/data' + : 'community.wave.seqera.io/library/fgumi:0.2.0--fe028e7a64e5da27'}" + + input: + tuple val(meta), path(bam) + val strategy + + output: + tuple val(meta), path("*.bam") , emit: bam + tuple val(meta), path("*.family_size_histogram.txt"), emit: histogram + tuple val(meta), path("*.grouping_metrics.txt") , emit: metrics + tuple val("${task.process}"), val('fgumi'), eval('fgumi --version | sed "s/^fgumi //"'), topic: versions, emit: versions_fgumi + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}_umi-grouped" + + if ("${bam}" == "${prefix}.bam") { + error("Input and output names are the same, use \"task.ext.prefix\" to disambiguate!") + } + + """ + fgumi group \\ + --input ${bam} \\ + --output ${prefix}.bam \\ + --strategy ${strategy} \\ + --family-size-histogram ${prefix}.family_size_histogram.txt \\ + --grouping-metrics ${prefix}.grouping_metrics.txt \\ + ${args} + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}_umi-grouped" + if ("${bam}" == "${prefix}.bam") { + error("Input and output names are the same, use \"task.ext.prefix\" to disambiguate!") + } + """ + touch ${prefix}.bam + touch ${prefix}.family_size_histogram.txt + touch ${prefix}.grouping_metrics.txt + """ +} diff --git a/modules/nf-core/fgumi/group/meta.yml b/modules/nf-core/fgumi/group/meta.yml new file mode 100644 index 00000000..4a3a3c3c --- /dev/null +++ b/modules/nf-core/fgumi/group/meta.yml @@ -0,0 +1,100 @@ +name: "fgumi_group" +description: | + Groups reads together that appear to have come from the same original molecule. + Reads are grouped by template, and then templates are sorted by the 5' mapping positions + of the reads from the template. Reads that have the same end positions are then sub-grouped + by UMI sequence. This is a high-performance replacement for fgbio GroupReadsByUmi. +keywords: + - umi + - groupreads + - bam +tools: + - "fgumi": + description: "High-performance tools for working with UMI-tagged sequencing data." + homepage: "https://github.com/fulcrumgenomics/fgumi" + documentation: "https://docs.rs/fgumi" + tool_dev_url: "https://github.com/fulcrumgenomics/fgumi" + licence: + - "MIT" + identifier: "" +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: | + BAM file containing reads with UMI tags. The file must be coordinate sorted. + pattern: "*.bam" + ontologies: + - edam: "http://edamontology.org/format_2572" + - strategy: + type: string + enum: + - "Identity" + - "Edit" + - "Adjacency" + - "Paired" + description: | + Required argument: defines the UMI assignment strategy. + Must be chosen among: Identity, Edit, Adjacency, Paired. +output: + bam: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.bam": + type: file + description: UMI-grouped BAM file + pattern: "*.bam" + ontologies: + - edam: "http://edamontology.org/format_2572" + histogram: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.family_size_histogram.txt": + type: file + description: Optional output of tag family size counts + pattern: "*.family_size_histogram.txt" + metrics: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.grouping_metrics.txt": + type: file + description: Optional output of UMI grouping metrics + pattern: "*.grouping_metrics.txt" + versions_fgumi: + - - ${task.process}: + type: string + description: The process the versions were collected from + - fgumi: + type: string + description: The tool name + - fgumi --version | sed "s/^fgumi //": + type: eval + description: The expression to obtain the version of the tool +topics: + versions: + - - ${task.process}: + type: string + description: The process the versions were collected from + - fgumi: + type: string + description: The tool name + - fgumi --version | sed "s/^fgumi //": + type: eval + description: The expression to obtain the version of the tool +authors: + - "@sppearce" +maintainers: + - "@sppearce" diff --git a/modules/nf-core/fgumi/group/tests/main.nf.test b/modules/nf-core/fgumi/group/tests/main.nf.test new file mode 100644 index 00000000..6ed0f513 --- /dev/null +++ b/modules/nf-core/fgumi/group/tests/main.nf.test @@ -0,0 +1,74 @@ +nextflow_process { + + name "Test Process FGUMI_GROUP" + script "../main.nf" + process "FGUMI_GROUP" + config "./nextflow.config" + + tag "modules" + tag "modules_nfcore" + tag "fgumi" + tag "fgumi/group" + tag "fgumi/sort" + + test("sarscov2 - bam") { + + setup { + run("FGUMI_SORT") { + script "../../sort/main.nf" + process { + """ + input[0] = [ + [ id:'test' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/umi/test.paired_end.unsorted_tagged.bam', checkIfExists: true) + ] + """ + } + } + } + + when { + process { + """ + input[0] = FGUMI_SORT.out.bam + input[1] = 'adjacency' + """ + } + } + + then { + assert process.success + assertAll( + // bam file is non deterministic in its output order + { assert snapshot(sanitizeOutput(process.out, unstableKeys: ['bam'])).match() } + ) + } + + } + + test("sarscov2 - bam - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/umi/test.paired_end.unsorted_tagged.bam', checkIfExists: true) + ] + input[1] = 'adjacency' + """ + } + } + + then { + assert process.success + assertAll( + { assert snapshot(sanitizeOutput(process.out)).match() } + ) + } + + } + +} diff --git a/modules/nf-core/fgumi/group/tests/main.nf.test.snap b/modules/nf-core/fgumi/group/tests/main.nf.test.snap new file mode 100644 index 00000000..7595f16c --- /dev/null +++ b/modules/nf-core/fgumi/group/tests/main.nf.test.snap @@ -0,0 +1,86 @@ +{ + "sarscov2 - bam - stub": { + "content": [ + { + "bam": [ + [ + { + "id": "test" + }, + "test_umi-grouped.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "histogram": [ + [ + { + "id": "test" + }, + "test_umi-grouped.family_size_histogram.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "metrics": [ + [ + { + "id": "test" + }, + "test_umi-grouped.grouping_metrics.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions_fgumi": [ + [ + "FGUMI_GROUP", + "fgumi", + "0.2.0" + ] + ] + } + ], + "timestamp": "2026-06-05T14:26:33.524685932", + "meta": { + "nf-test": "0.9.5", + "nextflow": "26.04.0" + } + }, + "sarscov2 - bam": { + "content": [ + { + "bam": [ + [ + { + "id": "test" + }, + "test_umi-grouped.bam" + ] + ], + "histogram": [ + [ + { + "id": "test" + }, + "test_umi-grouped.family_size_histogram.txt:md5,4f680ae7e7413c4b88f7ee82fd237162" + ] + ], + "metrics": [ + [ + { + "id": "test" + }, + "test_umi-grouped.grouping_metrics.txt:md5,6d32f1f0d9277fe6f07d5e6ff56e70ac" + ] + ], + "versions_fgumi": [ + [ + "FGUMI_GROUP", + "fgumi", + "0.2.0" + ] + ] + } + ], + "timestamp": "2026-06-05T14:26:27.388945914", + "meta": { + "nf-test": "0.9.5", + "nextflow": "26.04.0" + } + } +} \ No newline at end of file diff --git a/modules/nf-core/fgumi/group/tests/nextflow.config b/modules/nf-core/fgumi/group/tests/nextflow.config new file mode 100644 index 00000000..28103edf --- /dev/null +++ b/modules/nf-core/fgumi/group/tests/nextflow.config @@ -0,0 +1,5 @@ +process { + withName: FGUMI_SORT { + ext.args = '--order template-coordinate' + } +} diff --git a/modules/nf-core/fgumi/simplex/environment.yml b/modules/nf-core/fgumi/simplex/environment.yml new file mode 100644 index 00000000..7960b0e5 --- /dev/null +++ b/modules/nf-core/fgumi/simplex/environment.yml @@ -0,0 +1,7 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - "bioconda::fgumi=0.2.0" diff --git a/modules/nf-core/fgumi/simplex/main.nf b/modules/nf-core/fgumi/simplex/main.nf new file mode 100644 index 00000000..9c02c7cb --- /dev/null +++ b/modules/nf-core/fgumi/simplex/main.nf @@ -0,0 +1,54 @@ +process FGUMI_SIMPLEX { + tag "${meta.id}" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${workflow.containerEngine in ['singularity', 'apptainer'] && !task.ext.singularity_pull_docker_container + ? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/a5/a510706f3481fae12ff6100d6e4ad298b8bf464a2d93a6afe35e9cf26542d080/data' + : 'community.wave.seqera.io/library/fgumi:0.2.0--fe028e7a64e5da27'}" + + input: + tuple val(meta), path(grouped_bam) + val min_reads + val keep_rejected + + output: + tuple val(meta), path("${prefix}.bam") , emit: bam + tuple val(meta), path("${prefix}.rejects.bam"), emit: rejects, optional: true + tuple val(meta), path("${prefix}.stats.txt") , emit: stats + tuple val("${task.process}"), val('fgumi'), eval('fgumi --version | sed "s/^fgumi //"'), topic: versions, emit: versions_fgumi + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}_simplex_unmapped" + def rejects_command = keep_rejected ? "--rejects ${prefix}.rejects.bam" : '' + + if ("${grouped_bam}" == "${prefix}.bam") { + error("Input and output names are the same, use \"task.ext.prefix\" to disambiguate!") + } + + """ + fgumi simplex \\ + --input ${grouped_bam} \\ + --output ${prefix}.bam \\ + --min-reads ${min_reads} \\ + --threads ${task.cpus} \\ + --stats ${prefix}.stats.txt \\ + ${rejects_command} \\ + ${args} + """ + + stub: + prefix = task.ext.prefix ?: "${meta.id}_simplex_unmapped" + if ("${grouped_bam}" == "${prefix}.bam") { + error("Input and output names are the same, use \"task.ext.prefix\" to disambiguate!") + } + """ + touch ${prefix}.bam + touch ${prefix}.rejects.bam + touch ${prefix}.stats.txt + """ +} diff --git a/modules/nf-core/fgumi/simplex/meta.yml b/modules/nf-core/fgumi/simplex/meta.yml new file mode 100644 index 00000000..0f7a751a --- /dev/null +++ b/modules/nf-core/fgumi/simplex/meta.yml @@ -0,0 +1,98 @@ +name: "fgumi_simplex" +description: | + Calls simplex consensus sequences from reads with the same unique molecular tag. + This is a high-performance replacement for fgbio CallMolecularConsensusReads. +keywords: + - umi + - consensus + - simplex + - bam +tools: + - "fgumi": + description: "High-performance tools for working with UMI-tagged sequencing data." + homepage: "https://github.com/fulcrumgenomics/fgumi" + documentation: "https://docs.rs/fgumi" + tool_dev_url: "https://github.com/fulcrumgenomics/fgumi" + licence: + - "MIT" + identifier: "" +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - grouped_bam: + type: file + description: | + The input SAM or BAM file, grouped by UMIs + pattern: "*.{bam,sam}" + ontologies: + - edam: "http://edamontology.org/format_2572" + - min_reads: + type: integer + description: Minimum number of original reads to build each consensus read. + - keep_rejected: + type: boolean + description: If true, output rejected reads to a separate BAM file +output: + bam: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "${prefix}.bam": + type: file + description: | + Output SAM or BAM file with simplex consensus reads. + pattern: "*.{bam,sam}" + ontologies: + - edam: "http://edamontology.org/format_2572" + rejects: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "${prefix}.rejects.bam": + type: file + description: Optional BAM file containing reads that were rejected + pattern: "*.rejects.bam" + ontologies: + - edam: "http://edamontology.org/format_2572" # BAM + stats: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "${prefix}.stats.txt": + type: file + description: Optional text file containing consensus statistics + pattern: "*.stats.txt" + versions_fgumi: + - - ${task.process}: + type: string + description: The process the versions were collected from + - fgumi: + type: string + description: The tool name + - fgumi --version | sed "s/^fgumi //": + type: eval + description: The expression to obtain the version of the tool +topics: + versions: + - - ${task.process}: + type: string + description: The process the versions were collected from + - fgumi: + type: string + description: The tool name + - fgumi --version | sed "s/^fgumi //": + type: eval + description: The expression to obtain the version of the tool +authors: + - "@sppearce" +maintainers: + - "@sppearce" diff --git a/modules/nf-core/fgumi/simplex/tests/main.nf.test b/modules/nf-core/fgumi/simplex/tests/main.nf.test new file mode 100644 index 00000000..2442b881 --- /dev/null +++ b/modules/nf-core/fgumi/simplex/tests/main.nf.test @@ -0,0 +1,108 @@ +nextflow_process { + + name "Test Process FGUMI_SIMPLEX" + script "../main.nf" + process "FGUMI_SIMPLEX" + config "./nextflow.config" + + tag "modules" + tag "modules_nfcore" + tag "fgumi" + tag "fgumi/simplex" + tag "fgumi/sort" + tag "fgumi/group" + + setup { + run("FGUMI_SORT") { + script "../../sort/main.nf" + process { + """ + input[0] = [ + [ id:'test' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/umi/test.paired_end.unsorted_tagged.bam', checkIfExists: true) + ] + """ + } + } + run("FGUMI_GROUP") { + script "../../group/main.nf" + process { + """ + input[0] = FGUMI_SORT.out.bam + input[1] = 'adjacency' + """ + } + } + } + + test("homo_sapiens - bam") { + + when { + process { + """ + input[0] = FGUMI_GROUP.out.bam + input[1] = 1 + input[2] = false + """ + } + } + + then { + assert process.success + assertAll( + // bam file is non deterministic in its output order + { assert snapshot(sanitizeOutput(process.out, unstableKeys: ['bam'])).match() } + ) + } + + } + + test("homo_sapiens - bam - with rejects") { + + when { + process { + """ + input[0] = FGUMI_GROUP.out.bam + input[1] = 1 + input[2] = true + """ + } + } + + then { + assert process.success + assertAll( + // bam file is non deterministic in its output order + { assert snapshot(sanitizeOutput(process.out, unstableKeys: ['bam', 'rejects'])).match() } + ) + } + + } + + test("homo_sapiens - bam - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/umi/test.paired_end.umi_grouped.bam', checkIfExists: true) + ] + input[1] = 1 + input[2] = false + """ + } + } + + then { + assert process.success + assertAll( + { assert snapshot(process.out).match() } + ) + } + + } + +} diff --git a/modules/nf-core/fgumi/simplex/tests/main.nf.test.snap b/modules/nf-core/fgumi/simplex/tests/main.nf.test.snap new file mode 100644 index 00000000..80a6adfa --- /dev/null +++ b/modules/nf-core/fgumi/simplex/tests/main.nf.test.snap @@ -0,0 +1,154 @@ +{ + "homo_sapiens - bam - with rejects": { + "content": [ + { + "bam": [ + [ + { + "id": "test" + }, + "test_simplex_unmapped.bam" + ] + ], + "rejects": [ + [ + { + "id": "test" + }, + "test_simplex_unmapped.rejects.bam" + ] + ], + "stats": [ + [ + { + "id": "test" + }, + "test_simplex_unmapped.stats.txt:md5,61bfbca538c809387368c351412732ee" + ] + ], + "versions_fgumi": [ + [ + "FGUMI_SIMPLEX", + "fgumi", + "0.2.0" + ] + ] + } + ], + "timestamp": "2026-06-05T14:26:50.835328059", + "meta": { + "nf-test": "0.9.5", + "nextflow": "26.04.0" + } + }, + "homo_sapiens - bam": { + "content": [ + { + "bam": [ + [ + { + "id": "test" + }, + "test_simplex_unmapped.bam" + ] + ], + "rejects": [ + + ], + "stats": [ + [ + { + "id": "test" + }, + "test_simplex_unmapped.stats.txt:md5,61bfbca538c809387368c351412732ee" + ] + ], + "versions_fgumi": [ + [ + "FGUMI_SIMPLEX", + "fgumi", + "0.2.0" + ] + ] + } + ], + "timestamp": "2026-06-05T09:30:50.970124786", + "meta": { + "nf-test": "0.9.5", + "nextflow": "26.04.0" + } + }, + "homo_sapiens - bam - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test_simplex_unmapped.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + { + "id": "test" + }, + "test_simplex_unmapped.rejects.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + [ + { + "id": "test" + }, + "test_simplex_unmapped.stats.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + [ + "FGUMI_SIMPLEX", + "fgumi", + "0.2.0" + ] + ], + "bam": [ + [ + { + "id": "test" + }, + "test_simplex_unmapped.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "rejects": [ + [ + { + "id": "test" + }, + "test_simplex_unmapped.rejects.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "stats": [ + [ + { + "id": "test" + }, + "test_simplex_unmapped.stats.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions_fgumi": [ + [ + "FGUMI_SIMPLEX", + "fgumi", + "0.2.0" + ] + ] + } + ], + "timestamp": "2026-06-05T14:26:58.463534187", + "meta": { + "nf-test": "0.9.5", + "nextflow": "26.04.0" + } + } +} \ No newline at end of file diff --git a/modules/nf-core/fgumi/simplex/tests/nextflow.config b/modules/nf-core/fgumi/simplex/tests/nextflow.config new file mode 100644 index 00000000..28103edf --- /dev/null +++ b/modules/nf-core/fgumi/simplex/tests/nextflow.config @@ -0,0 +1,5 @@ +process { + withName: FGUMI_SORT { + ext.args = '--order template-coordinate' + } +} diff --git a/modules/nf-core/fgumi/sort/environment.yml b/modules/nf-core/fgumi/sort/environment.yml new file mode 100644 index 00000000..7960b0e5 --- /dev/null +++ b/modules/nf-core/fgumi/sort/environment.yml @@ -0,0 +1,7 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - "bioconda::fgumi=0.2.0" diff --git a/modules/nf-core/fgumi/sort/main.nf b/modules/nf-core/fgumi/sort/main.nf new file mode 100644 index 00000000..6255810a --- /dev/null +++ b/modules/nf-core/fgumi/sort/main.nf @@ -0,0 +1,45 @@ +process FGUMI_SORT { + tag "${meta.id}" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${workflow.containerEngine in ['singularity', 'apptainer'] && !task.ext.singularity_pull_docker_container + ? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/a5/a510706f3481fae12ff6100d6e4ad298b8bf464a2d93a6afe35e9cf26542d080/data' + : 'community.wave.seqera.io/library/fgumi:0.2.0--fe028e7a64e5da27'}" + + input: + tuple val(meta), path(bam) + + output: + tuple val(meta), path("*.bam"), emit: bam + tuple val(meta), path("*.{csi,bai}"), emit: index, optional: true + tuple val("${task.process}"), val('fgumi'), eval('fgumi --version | sed "s/^fgumi //"'), topic: versions, emit: versions_fgumi + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}_sorted" + + if ("${bam}" == "${prefix}.bam") { + error("Input and output names are the same, use \"task.ext.prefix\" to disambiguate!") + } + + """ + fgumi sort \\ + --input ${bam} \\ + --output ${prefix}.bam \\ + --threads ${task.cpus} \\ + ${args} + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}_sorted" + if ("${bam}" == "${prefix}.bam") { + error("Input and output names are the same, use \"task.ext.prefix\" to disambiguate!") + } + """ + touch ${prefix}.bam + """ +} diff --git a/modules/nf-core/fgumi/sort/meta.yml b/modules/nf-core/fgumi/sort/meta.yml new file mode 100644 index 00000000..1dc25228 --- /dev/null +++ b/modules/nf-core/fgumi/sort/meta.yml @@ -0,0 +1,82 @@ +name: "fgumi_sort" +description: | + Sorts a SAM or BAM file. Several sort orders are available, including coordinate, + queryname, and template-coordinate. This is a high-performance replacement for fgbio SortBam. +keywords: + - sort + - bam + - sam +tools: + - "fgumi": + description: "High-performance tools for working with UMI-tagged sequencing data." + homepage: "https://github.com/fulcrumgenomics/fgumi" + documentation: "https://docs.rs/fgumi" + tool_dev_url: "https://github.com/fulcrumgenomics/fgumi" + licence: + - "MIT" + identifier: "" +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: | + The input SAM or BAM file to be sorted. + pattern: "*.{bam,sam}" + ontologies: + - edam: "http://edamontology.org/format_2572" +output: + bam: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.bam": + type: file + description: | + Sorted output BAM file. + pattern: "*.bam" + ontologies: + - edam: "http://edamontology.org/format_2572" + index: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.{csi,bai}": + type: file + description: | + Index file if the bam file is coordinate sorted. + pattern: "*.{csi,bai}" + ontologies: + - edam: "http://edamontology.org/format_3327" + versions_fgumi: + - - ${task.process}: + type: string + description: The process the versions were collected from + - fgumi: + type: string + description: The tool name + - 'fgumi --version | sed "s/^fgumi //"': + type: eval + description: The expression to obtain the version of the tool +topics: + versions: + - - ${task.process}: + type: string + description: The process the versions were collected from + - fgumi: + type: string + description: The tool name + - 'fgumi --version | sed "s/^fgumi //"': + type: eval + description: The expression to obtain the version of the tool +authors: + - "@sppearce" +maintainers: + - "@sppearce" diff --git a/modules/nf-core/fgumi/sort/tests/main.nf.test b/modules/nf-core/fgumi/sort/tests/main.nf.test new file mode 100644 index 00000000..fc549bf5 --- /dev/null +++ b/modules/nf-core/fgumi/sort/tests/main.nf.test @@ -0,0 +1,89 @@ +nextflow_process { + + name "Test Process FGUMI_SORT" + script "../main.nf" + process "FGUMI_SORT" + + tag "modules" + tag "modules_nfcore" + tag "fgumi" + tag "fgumi/sort" + + test("sarscov2 - bam") { + + when { + process { + """ + input[0] = [ + [ id:'test' ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + ] + """ + } + } + + then { + assert process.success + assertAll( + { assert snapshot( + bam(process.out.bam.get(0).get(1)).getReadsMD5(), + process.out.findAll { key, val -> key.startsWith('versions') } + ).match() } + ) + } + + } + + test("sarscov2 - bam - template-coordinate") { + + when { + params { + module_args = '--order template-coordinate' + } + process { + """ + input[0] = [ + [ id:'test' ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + ] + """ + } + } + + then { + assert process.success + assertAll( + { assert snapshot( + bam(process.out.bam.get(0).get(1)).getReadsMD5(), + process.out.findAll { key, val -> key.startsWith('versions') } + ).match() } + ) + } + + } + + test("sarscov2 - bam - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test' ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + ] + """ + } + } + + then { + assert process.success + assertAll( + { assert snapshot(process.out).match() } + ) + } + + } + +} diff --git a/modules/nf-core/fgumi/sort/tests/main.nf.test.snap b/modules/nf-core/fgumi/sort/tests/main.nf.test.snap new file mode 100644 index 00000000..07a9c7ee --- /dev/null +++ b/modules/nf-core/fgumi/sort/tests/main.nf.test.snap @@ -0,0 +1,87 @@ +{ + "sarscov2 - bam - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test_sorted.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + + ], + "2": [ + [ + "FGUMI_SORT", + "fgumi", + "0.2.0" + ] + ], + "bam": [ + [ + { + "id": "test" + }, + "test_sorted.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "index": [ + + ], + "versions_fgumi": [ + [ + "FGUMI_SORT", + "fgumi", + "0.2.0" + ] + ] + } + ], + "timestamp": "2026-06-04T13:47:11.834557157", + "meta": { + "nf-test": "0.9.5", + "nextflow": "26.04.0" + } + }, + "sarscov2 - bam": { + "content": [ + "461d8083b03a321eb1902ad544fd7d2f", + { + "versions_fgumi": [ + [ + "FGUMI_SORT", + "fgumi", + "0.2.0" + ] + ] + } + ], + "timestamp": "2026-06-04T13:47:00.183383916", + "meta": { + "nf-test": "0.9.5", + "nextflow": "26.04.0" + } + }, + "sarscov2 - bam - template-coordinate": { + "content": [ + "461d8083b03a321eb1902ad544fd7d2f", + { + "versions_fgumi": [ + [ + "FGUMI_SORT", + "fgumi", + "0.2.0" + ] + ] + } + ], + "timestamp": "2026-06-04T13:47:05.881895696", + "meta": { + "nf-test": "0.9.5", + "nextflow": "26.04.0" + } + } +} \ No newline at end of file diff --git a/modules/nf-core/fgumi/sort/tests/nextflow.config b/modules/nf-core/fgumi/sort/tests/nextflow.config new file mode 100644 index 00000000..4ad67e9b --- /dev/null +++ b/modules/nf-core/fgumi/sort/tests/nextflow.config @@ -0,0 +1,5 @@ +process { + withName: FGUMI_SORT { + ext.args = { params.module_args } + } +} diff --git a/subworkflows/local/fastq_to_aligned_cram/main.nf b/subworkflows/local/fastq_to_aligned_cram/main.nf index e3d0fc9e..18be9689 100644 --- a/subworkflows/local/fastq_to_aligned_cram/main.nf +++ b/subworkflows/local/fastq_to_aligned_cram/main.nf @@ -126,6 +126,7 @@ workflow FASTQ_TO_CRAM { ch_sormadup_metrics = ch_sormadup_metrics.mix(UMI_CONSENSUS_FGUMI.out.filtering_metrics) ch_family_size_histogram = UMI_CONSENSUS_FGUMI.out.family_size_histogram ch_filtered_consensus_cram = UMI_CONSENSUS_FGUMI.out.filtered_consensus_cram + ch_zipper_diagnostics = UMI_CONSENSUS_FGUMI.out.zipper_diagnostics // BIOBAMBAM_BAMSORMADUP([meta, [bam, bam]], fasta, fai) BIOBAMBAM_BAMSORMADUP(ch_bam_fasta_fai.bamsormadup) @@ -178,6 +179,7 @@ workflow FASTQ_TO_CRAM { cram_crai = ch_cram_crai // UMI-specific output channels for downstream reporting and publishing. filtered_consensus_cram = ch_filtered_consensus_cram + zipper_diagnostics = ch_zipper_diagnostics rna_splice_junctions = FASTQ_ALIGN_RNA.out.splice_junctions rna_junctions = FASTQ_ALIGN_RNA.out.junctions sormadup_metrics = ch_sormadup_metrics diff --git a/subworkflows/local/umi_consensus/main.nf b/subworkflows/local/umi_consensus/main.nf index b26fcf79..a6e2c758 100644 --- a/subworkflows/local/umi_consensus/main.nf +++ b/subworkflows/local/umi_consensus/main.nf @@ -1,10 +1,10 @@ #!/usr/bin/env nextflow // MODULES -include { FGUMI_EXTRACT } from "../../../modules/local/fgumi/extract/main.nf" -include { FGUMI_FILTER } from "../../../modules/local/fgumi/filter/main.nf" -include { FGUMI_GROUP } from "../../../modules/local/fgumi/group/main.nf" -include { FGUMI_SIMPLEX } from "../../../modules/local/fgumi/simplex/main.nf" +include { FGUMI_EXTRACT } from "../../../modules/nf-core/fgumi/extract/main.nf" +include { FGUMI_FILTER } from "../../../modules/nf-core/fgumi/filter/main.nf" +include { FGUMI_GROUP } from "../../../modules/nf-core/fgumi/group/main.nf" +include { FGUMI_SIMPLEX } from "../../../modules/nf-core/fgumi/simplex/main.nf" include { FGUMI_SNAP_ZIPPER_SORT } from "../../../modules/local/fgumi/snapzippersort/main.nf" include { SAMTOOLS_SORT } from "../../../modules/nf-core/samtools/sort/main.nf" @@ -19,7 +19,7 @@ workflow UMI_CONSENSUS_FGUMI { // Step 1: build an unmapped BAM with UMI tags from input FASTQ. FGUMI_EXTRACT( ch_meta_reads_aligner_index_fasta - .map { meta, reads, _aligner, _index, _fasta -> [meta, reads] } + .map { meta, reads, _aligner, _index, _fasta -> [meta, reads, (meta.readgroup?.LB ?: meta.library ?: meta.id)] } ) // Step 3: align with SNAP, zipper tags back, then template-coordinate sort. @@ -35,21 +35,27 @@ workflow UMI_CONSENSUS_FGUMI { ) FGUMI_GROUP( - FGUMI_SNAP_ZIPPER_SORT.out.bam + FGUMI_SNAP_ZIPPER_SORT.out.bam, + (params.fgumi_group_strategy ?: 'adjacency') ) FGUMI_SIMPLEX( - FGUMI_GROUP.out.bam + FGUMI_GROUP.out.bam, + (params.fgumi_simplex_min_reads ?: 1), + false ) // Step 7: filter consensus reads, then coordinate-sort/index for downstream CRAM conversion. FGUMI_FILTER( + FGUMI_SIMPLEX.out.bam, FGUMI_SIMPLEX.out.bam .join( ch_meta_reads_aligner_index_fasta.map { meta, _reads, _aligner, _index, fasta -> [meta, fasta] }, by: 0, ) - .map { meta, bam, fasta -> [meta, bam, fasta] } + .map { meta, _bam, fasta -> [meta, fasta] }, + "1,1,1", + false ) SAMTOOLS_SORT( @@ -64,9 +70,9 @@ workflow UMI_CONSENSUS_FGUMI { emit: cram_crai = SAMTOOLS_SORT.out.cram.join(SAMTOOLS_SORT.out.crai, failOnMismatch: true, failOnDuplicate: true) - grouping_metrics = FGUMI_GROUP.out.grouping_metrics - family_size_histogram = FGUMI_GROUP.out.family_size_histogram - consensus_metrics = FGUMI_SIMPLEX.out.consensus_metrics - filtering_metrics = FGUMI_FILTER.out.filtering_metrics + grouping_metrics = FGUMI_GROUP.out.metrics + family_size_histogram = FGUMI_GROUP.out.histogram + consensus_metrics = FGUMI_SIMPLEX.out.stats + filtering_metrics = FGUMI_FILTER.out.stats filtered_consensus_cram = SAMTOOLS_SORT.out.cram } diff --git a/workflows/preprocessing.nf b/workflows/preprocessing.nf index 2065eff2..c9ae0092 100644 --- a/workflows/preprocessing.nf +++ b/workflows/preprocessing.nf @@ -437,6 +437,7 @@ workflow PREPROCESSING { // UMI-specific outputs exposed at workflow level. family_size_histogram = FASTQ_TO_CRAM.out.family_size_histogram umi_filtered_consensus_cram = FASTQ_TO_CRAM.out.filtered_consensus_cram + umi_zipper_diagnostics = FASTQ_TO_CRAM.out.zipper_diagnostics mosdepth_global = COVERAGE.out.mosdepth_global mosdepth_summary = COVERAGE.out.mosdepth_summary mosdepth_regions = COVERAGE.out.mosdepth_regions From 6578b77f828c4110f40f03e0bbe613f26a524c4b Mon Sep 17 00:00:00 2001 From: freerkvandijk Date: Wed, 17 Jun 2026 16:25:13 +0200 Subject: [PATCH 29/36] Refactor snapzippersort to snapzipper --- conf/modules.config | 27 ++++++++---------- docs/analysis_flow.md | 2 +- .../{snapzippersort => snapzipper}/main.nf | 28 ++++++++++++------- subworkflows/local/umi_consensus/main.nf | 6 ++-- .../main.nf.test | 10 +++---- .../main.nf.test.snap | 8 +++--- 6 files changed, 42 insertions(+), 39 deletions(-) rename modules/local/fgumi/{snapzippersort => snapzipper}/main.nf (72%) rename tests/modules/local/fgumi/{snapzippersort => snapzipper}/main.nf.test (81%) rename tests/modules/local/fgumi/{snapzippersort => snapzipper}/main.nf.test.snap (78%) diff --git a/conf/modules.config b/conf/modules.config index ec37a9c3..8c051a19 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -253,8 +253,8 @@ process { } } - //// FGUMI fastq | SNAP | zipper | template sort (step 3) - withName: '.*FGUMI_SNAP_ZIPPER_SORT' { + //// FGUMI fastq | SNAP | zipper (step 3a) + withName: '.*FGUMI_SNAP_ZIPPER:FGUMI_SNAP_ZIPPER_RUN' { cpus = 16 memory = 64.GB ext.prefix = { "${meta.id}.fgumi" } @@ -275,25 +275,20 @@ process { "--threads ${task.cpus}", ].join(" ").trim() } - ext.args3 = { - [ - "--threads ${task.cpus}", - "--max-memory ${params.fgumi_sort_max_memory}", - "--memory-per-thread=${params.fgumi_sort_memory_per_thread ? 'true' : 'false'}", - ].join(" ").trim() - } - ext.args4 = { + } + + //// FGUMI template-coordinate sort after zipper (step 3b) + withName: '.*FGUMI_SNAP_ZIPPER:FGUMI_TEMPLATE_SORT' { + cpus = 16 + memory = 64.GB + ext.prefix = { "${meta.id}.fgumi.template" } + ext.args = { [ - "--threads ${task.cpus}", + "--order template-coordinate", "--max-memory ${params.fgumi_sort_max_memory}", "--memory-per-thread=${params.fgumi_sort_memory_per_thread ? 'true' : 'false'}", ].join(" ").trim() } - ext.args5 = { - [ - "-@ ${task.cpus}", - ].join(" ").trim() - } } //// FGUMI group (step 4) diff --git a/docs/analysis_flow.md b/docs/analysis_flow.md index 1aea1fb7..951e905b 100644 --- a/docs/analysis_flow.md +++ b/docs/analysis_flow.md @@ -17,7 +17,7 @@ flowchart TD subgraph UMI_CONSENSUS_FGUMI U1[Step 1: FGUMI_EXTRACT\n(reads -> unmapped BAM with UMI tags)] U1J[Join with reference assets\nSNAP index + fasta + dict from meta.genome_data] - U2[Step 3: FGUMI_SNAP_ZIPPER_SORT] + U2[Step 3: FGUMI_SNAP_ZIPPER] U2a[samtools sort -n\nunmapped BAM] U2b[fgumi fastq] U2c[snap-aligner paired] diff --git a/modules/local/fgumi/snapzippersort/main.nf b/modules/local/fgumi/snapzipper/main.nf similarity index 72% rename from modules/local/fgumi/snapzippersort/main.nf rename to modules/local/fgumi/snapzipper/main.nf index efc05276..c36028ff 100644 --- a/modules/local/fgumi/snapzippersort/main.nf +++ b/modules/local/fgumi/snapzipper/main.nf @@ -1,4 +1,6 @@ -process FGUMI_SNAP_ZIPPER_SORT { +include { FGUMI_SORT as FGUMI_TEMPLATE_SORT } from "../../../nf-core/fgumi/sort/main.nf" + +process FGUMI_SNAP_ZIPPER_RUN { tag "$meta.id" label 'process_high' @@ -10,7 +12,7 @@ process FGUMI_SNAP_ZIPPER_SORT { tuple val(meta), path(unmapped_bam), path(index, stageAs: "index/*"), path(fasta), path(dict) output: - tuple val(meta), path("${prefix}.template.bam"), emit: bam + tuple val(meta), path("${prefix}.zipper.bam"), emit: bam tuple val("${task.process}"), val('fgumi'), eval("fgumi --version | sed 's/^fgumi //;q'"), topic: versions, emit: versions_fgumi when: @@ -19,7 +21,6 @@ process FGUMI_SNAP_ZIPPER_SORT { script: def snap_args = task.ext.args ?: '' def zipper_args = task.ext.args2 ?: '' - def sort_args = task.ext.args3 ?: '' prefix = task.ext.prefix ?: "${meta.id}.fgumi" """ @@ -42,17 +43,24 @@ process FGUMI_SNAP_ZIPPER_SORT { ${zipper_args} \ --output ${prefix}.zipper.bam \ < ${prefix}.snap.sam - - fgumi sort \ - --input ${prefix}.zipper.bam \ - --output ${prefix}.template.bam \ - --order template-coordinate \ - ${sort_args} """ stub: prefix = task.ext.prefix ?: "${meta.id}.fgumi" """ - touch ${prefix}.template.bam + touch ${prefix}.zipper.bam """ } + +workflow FGUMI_SNAP_ZIPPER { + take: + ch_meta_unmapped_index_fasta_dict + + main: + FGUMI_SNAP_ZIPPER_RUN(ch_meta_unmapped_index_fasta_dict) + FGUMI_TEMPLATE_SORT(FGUMI_SNAP_ZIPPER_RUN.out.bam) + + emit: + bam = FGUMI_TEMPLATE_SORT.out.bam + versions_fgumi = FGUMI_SNAP_ZIPPER_RUN.out.versions_fgumi.mix(FGUMI_TEMPLATE_SORT.out.versions_fgumi) +} diff --git a/subworkflows/local/umi_consensus/main.nf b/subworkflows/local/umi_consensus/main.nf index a6e2c758..078226cf 100644 --- a/subworkflows/local/umi_consensus/main.nf +++ b/subworkflows/local/umi_consensus/main.nf @@ -5,7 +5,7 @@ include { FGUMI_EXTRACT } from "../../../modules/nf-core/fgumi/extract/ include { FGUMI_FILTER } from "../../../modules/nf-core/fgumi/filter/main.nf" include { FGUMI_GROUP } from "../../../modules/nf-core/fgumi/group/main.nf" include { FGUMI_SIMPLEX } from "../../../modules/nf-core/fgumi/simplex/main.nf" -include { FGUMI_SNAP_ZIPPER_SORT } from "../../../modules/local/fgumi/snapzippersort/main.nf" +include { FGUMI_SNAP_ZIPPER } from "../../../modules/local/fgumi/snapzipper/main.nf" include { SAMTOOLS_SORT } from "../../../modules/nf-core/samtools/sort/main.nf" // FUNCTIONS @@ -23,7 +23,7 @@ workflow UMI_CONSENSUS_FGUMI { ) // Step 3: align with SNAP, zipper tags back, then template-coordinate sort. - FGUMI_SNAP_ZIPPER_SORT( + FGUMI_SNAP_ZIPPER( FGUMI_EXTRACT.out.bam .join( ch_meta_reads_aligner_index_fasta.map { meta, _reads, _aligner, _index, fasta -> @@ -35,7 +35,7 @@ workflow UMI_CONSENSUS_FGUMI { ) FGUMI_GROUP( - FGUMI_SNAP_ZIPPER_SORT.out.bam, + FGUMI_SNAP_ZIPPER.out.bam, (params.fgumi_group_strategy ?: 'adjacency') ) diff --git a/tests/modules/local/fgumi/snapzippersort/main.nf.test b/tests/modules/local/fgumi/snapzipper/main.nf.test similarity index 81% rename from tests/modules/local/fgumi/snapzippersort/main.nf.test rename to tests/modules/local/fgumi/snapzipper/main.nf.test index c3a34cbe..be4d31cd 100644 --- a/tests/modules/local/fgumi/snapzippersort/main.nf.test +++ b/tests/modules/local/fgumi/snapzipper/main.nf.test @@ -1,16 +1,16 @@ nextflow_process { - name "Test Process FGUMI_SNAP_ZIPPER_SORT" - script "modules/local/fgumi/snapzippersort/main.nf" - process "FGUMI_SNAP_ZIPPER_SORT" + name "Test Process FGUMI_SNAP_ZIPPER_RUN" + script "modules/local/fgumi/snapzipper/main.nf" + process "FGUMI_SNAP_ZIPPER_RUN" tag "modules" tag "modules/local" tag "modules/local/fgumi" - tag "modules/local/fgumi/snapzippersort" + tag "modules/local/fgumi/snapzipper" test("test - stub") { - // Stub-mode contract test: verifies template BAM output wiring. + // Stub-mode contract test: verifies zipper BAM output wiring. options "-stub" when { diff --git a/tests/modules/local/fgumi/snapzippersort/main.nf.test.snap b/tests/modules/local/fgumi/snapzipper/main.nf.test.snap similarity index 78% rename from tests/modules/local/fgumi/snapzippersort/main.nf.test.snap rename to tests/modules/local/fgumi/snapzipper/main.nf.test.snap index d575f1ef..97745a48 100644 --- a/tests/modules/local/fgumi/snapzippersort/main.nf.test.snap +++ b/tests/modules/local/fgumi/snapzipper/main.nf.test.snap @@ -8,12 +8,12 @@ "id": "test", "samplename": "test" }, - "test.fgumi.template.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + "test.fgumi.zipper.bam:md5,d41d8cd98f00b204e9800998ecf8427e" ] ], "1": [ [ - "FGUMI_SNAP_ZIPPER_SORT", + "FGUMI_SNAP_ZIPPER_RUN", "fgumi", "0.1.2" ] @@ -24,12 +24,12 @@ "id": "test", "samplename": "test" }, - "test.fgumi.template.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + "test.fgumi.zipper.bam:md5,d41d8cd98f00b204e9800998ecf8427e" ] ], "versions_fgumi": [ [ - "FGUMI_SNAP_ZIPPER_SORT", + "FGUMI_SNAP_ZIPPER_RUN", "fgumi", "0.1.2" ] From 6ccb7e76e3b64f45a9515d24316629bfa2bd5e32 Mon Sep 17 00:00:00 2001 From: freerkvandijk Date: Wed, 17 Jun 2026 16:27:34 +0200 Subject: [PATCH 30/36] Removed all old local modules replaced by nf-core default --- modules/local/fgumi/extract/main.nf | 40 ---------- modules/local/fgumi/filter/main.nf | 38 ---------- modules/local/fgumi/group/main.nf | 44 ----------- modules/local/fgumi/simplex/main.nf | 37 --------- modules/local/fgumi/sort/main.nf | 37 --------- .../modules/local/fgumi/extract/main.nf.test | 35 --------- .../local/fgumi/extract/main.nf.test.snap | 47 ------------ tests/modules/local/fgumi/filter/main.nf.test | 33 -------- .../local/fgumi/filter/main.nf.test.snap | 59 --------------- tests/modules/local/fgumi/group/main.nf.test | 32 -------- .../local/fgumi/group/main.nf.test.snap | 75 ------------------- .../modules/local/fgumi/simplex/main.nf.test | 32 -------- .../local/fgumi/simplex/main.nf.test.snap | 59 --------------- tests/modules/local/fgumi/sort/main.nf.test | 33 -------- .../local/fgumi/sort/main.nf.test.snap | 59 --------------- 15 files changed, 660 deletions(-) delete mode 100644 modules/local/fgumi/extract/main.nf delete mode 100644 modules/local/fgumi/filter/main.nf delete mode 100644 modules/local/fgumi/group/main.nf delete mode 100644 modules/local/fgumi/simplex/main.nf delete mode 100644 modules/local/fgumi/sort/main.nf delete mode 100644 tests/modules/local/fgumi/extract/main.nf.test delete mode 100644 tests/modules/local/fgumi/extract/main.nf.test.snap delete mode 100644 tests/modules/local/fgumi/filter/main.nf.test delete mode 100644 tests/modules/local/fgumi/filter/main.nf.test.snap delete mode 100644 tests/modules/local/fgumi/group/main.nf.test delete mode 100644 tests/modules/local/fgumi/group/main.nf.test.snap delete mode 100644 tests/modules/local/fgumi/simplex/main.nf.test delete mode 100644 tests/modules/local/fgumi/simplex/main.nf.test.snap delete mode 100644 tests/modules/local/fgumi/sort/main.nf.test delete mode 100644 tests/modules/local/fgumi/sort/main.nf.test.snap diff --git a/modules/local/fgumi/extract/main.nf b/modules/local/fgumi/extract/main.nf deleted file mode 100644 index 89002f13..00000000 --- a/modules/local/fgumi/extract/main.nf +++ /dev/null @@ -1,40 +0,0 @@ -process FGUMI_EXTRACT { - tag "$meta.id" - label 'process_medium' - - container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container - ? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/95/954170443a820787c9e02ef2135ebb8ec29c6b03633b0d61b5fafa98c59a1cce/data' - : 'community.wave.seqera.io/library/fgumi_r-base_r-ggplot2_r-scales:09c99070b82c1c28'}" - - input: - tuple val(meta), path(reads) - - output: - tuple val(meta), path("${prefix}.bam"), emit: bam - tuple val("${task.process}"), val('fgumi'), eval("fgumi --version | sed 's/^fgumi //;q'"), topic: versions, emit: versions_fgumi - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - // Derive per-thread queue memory from requested process resources. - def queue_memory_mb = (task.memory.mega / task.cpus * 0.75).intValue() - prefix = task.ext.prefix ?: "${meta.id}.fgumi.unmapped" - - """ - fgumi extract \ - --inputs ${reads} \ - --output ${prefix}.bam \ - --threads ${task.cpus} \ - --queue-memory ${queue_memory_mb} \ - --queue-memory-per-thread \ - ${args} - """ - - stub: - prefix = task.ext.prefix ?: "${meta.id}.fgumi.unmapped" - """ - touch ${prefix}.bam - """ -} diff --git a/modules/local/fgumi/filter/main.nf b/modules/local/fgumi/filter/main.nf deleted file mode 100644 index 206e1f6a..00000000 --- a/modules/local/fgumi/filter/main.nf +++ /dev/null @@ -1,38 +0,0 @@ -process FGUMI_FILTER { - tag "$meta.id" - label 'process_medium' - - container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container - ? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/f7267104b209869695781a5f4585c490a61250269f8c6f14068535a3962b865a/data' - : 'community.wave.seqera.io/library/fgumi:0.2.0--fe028e7a64e5da27'}" - - input: - tuple val(meta), path(bam), path(fasta) - - output: - tuple val(meta), path("${prefix}.filtered.bam"), emit: bam - tuple val(meta), path("${prefix}.filtering_metrics.txt"), optional: true, emit: filtering_metrics - tuple val("${task.process}"), val('fgumi'), eval("fgumi --version | sed 's/^fgumi //;q'"), topic: versions, emit: versions_fgumi - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - prefix = task.ext.prefix ?: "${meta.id}.fgumi.filter" - - """ - fgumi filter \ - --input ${bam} \ - --output ${prefix}.filtered.bam \ - --ref ${fasta} \ - ${args} - """ - - stub: - prefix = task.ext.prefix ?: "${meta.id}.fgumi.filter" - """ - touch ${prefix}.filtered.bam - touch ${prefix}.filtering_metrics.txt - """ -} diff --git a/modules/local/fgumi/group/main.nf b/modules/local/fgumi/group/main.nf deleted file mode 100644 index dd93464b..00000000 --- a/modules/local/fgumi/group/main.nf +++ /dev/null @@ -1,44 +0,0 @@ -process FGUMI_GROUP { - tag "$meta.id" - label 'process_medium' - - container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container - ? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/f7267104b209869695781a5f4585c490a61250269f8c6f14068535a3962b865a/data' - : 'community.wave.seqera.io/library/fgumi:0.2.0--fe028e7a64e5da27'}" - - input: - tuple val(meta), path(bam) - - output: - tuple val(meta), path("${prefix}.bam"), emit: bam - tuple val(meta), path("${prefix}.grouping_metrics.txt"), optional: true, emit: grouping_metrics - tuple val(meta), path("${prefix}.family_size_histogram.txt"), optional: true, emit: family_size_histogram - tuple val("${task.process}"), val('fgumi'), eval("fgumi --version | sed 's/^fgumi //;q'"), topic: versions, emit: versions_fgumi - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - // Derive per-thread queue memory from requested process resources. - def queue_memory_mb = (task.memory.mega / task.cpus * 0.75).intValue() - prefix = task.ext.prefix ?: "${meta.id}.fgumi.group" - - """ - fgumi group \ - --input ${bam} \ - --output ${prefix}.bam \ - --threads ${task.cpus} \ - --queue-memory ${queue_memory_mb} \ - --queue-memory-per-thread \ - ${args} - """ - - stub: - prefix = task.ext.prefix ?: "${meta.id}.fgumi.group" - """ - touch ${prefix}.bam - touch ${prefix}.grouping_metrics.txt - touch ${prefix}.family_size_histogram.txt - """ -} diff --git a/modules/local/fgumi/simplex/main.nf b/modules/local/fgumi/simplex/main.nf deleted file mode 100644 index fd1db692..00000000 --- a/modules/local/fgumi/simplex/main.nf +++ /dev/null @@ -1,37 +0,0 @@ -process FGUMI_SIMPLEX { - tag "$meta.id" - label 'process_medium' - - container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container - ? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/f7267104b209869695781a5f4585c490a61250269f8c6f14068535a3962b865a/data' - : 'community.wave.seqera.io/library/fgumi:0.2.0--fe028e7a64e5da27'}" - - input: - tuple val(meta), path(bam) - - output: - tuple val(meta), path("${prefix}.bam"), emit: bam - tuple val(meta), path("${prefix}.consensus_metrics.txt"), optional: true, emit: consensus_metrics - tuple val("${task.process}"), val('fgumi'), eval("fgumi --version | sed 's/^fgumi //;q'"), topic: versions, emit: versions_fgumi - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - prefix = task.ext.prefix ?: "${meta.id}.fgumi.simplex" - - """ - fgumi simplex \ - --input ${bam} \ - --output ${prefix}.bam \ - ${args} - """ - - stub: - prefix = task.ext.prefix ?: "${meta.id}.fgumi.simplex" - """ - touch ${prefix}.bam - touch ${prefix}.consensus_metrics.txt - """ -} diff --git a/modules/local/fgumi/sort/main.nf b/modules/local/fgumi/sort/main.nf deleted file mode 100644 index a7963fbf..00000000 --- a/modules/local/fgumi/sort/main.nf +++ /dev/null @@ -1,37 +0,0 @@ -process FGUMI_SORT { - tag "$meta.id" - label 'process_medium' - - container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container - ? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/f7267104b209869695781a5f4585c490a61250269f8c6f14068535a3962b865a/data' - : 'community.wave.seqera.io/library/fgumi:0.2.0--fe028e7a64e5da27'}" - - input: - tuple val(meta), path(bam) - - output: - tuple val(meta), path("${prefix}.bam"), emit: bam - tuple val(meta), path("${prefix}.bam.bai"), emit: bai - tuple val("${task.process}"), val('fgumi'), eval("fgumi --version | sed 's/^fgumi //;q'"), topic: versions, emit: versions_fgumi - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - prefix = task.ext.prefix ?: "${meta.id}.fgumi.filter" - - """ - fgumi sort \ - --input ${bam} \ - --output ${prefix}.bam \ - ${args} - """ - - stub: - prefix = task.ext.prefix ?: "${meta.id}.fgumi.filter" - """ - touch ${prefix}.bam - touch ${prefix}.bam.bai - """ -} diff --git a/tests/modules/local/fgumi/extract/main.nf.test b/tests/modules/local/fgumi/extract/main.nf.test deleted file mode 100644 index 40cf1af8..00000000 --- a/tests/modules/local/fgumi/extract/main.nf.test +++ /dev/null @@ -1,35 +0,0 @@ -nextflow_process { - - name "Test Process FGUMI_EXTRACT" - script "modules/local/fgumi/extract/main.nf" - process "FGUMI_EXTRACT" - - tag "modules" - tag "modules/local" - tag "modules/local/fgumi" - tag "modules/local/fgumi/extract" - - test("test - stub") { - // Stub-mode contract test: verifies extract output channel and versions tuple. - options "-stub" - - when { - process { - """ - input[0] = [ - [id: "test", samplename: "test", library: "lib1"], - [ - file("https://github.com/nf-cmgg/test-datasets/raw/main/data/genomics/homo_sapiens/illumina/fastq/sample1_S31_R1_001.fastq.gz", checkIfExists: true), - file("https://github.com/nf-cmgg/test-datasets/raw/main/data/genomics/homo_sapiens/illumina/fastq/sample1_S31_R2_001.fastq.gz", checkIfExists: true) - ] - ] - """ - } - } - - then { - assert process.success - assert snapshot(process.out).match() - } - } -} diff --git a/tests/modules/local/fgumi/extract/main.nf.test.snap b/tests/modules/local/fgumi/extract/main.nf.test.snap deleted file mode 100644 index 82b09e4b..00000000 --- a/tests/modules/local/fgumi/extract/main.nf.test.snap +++ /dev/null @@ -1,47 +0,0 @@ -{ - "test - stub": { - "content": [ - { - "0": [ - [ - { - "id": "test", - "samplename": "test", - "library": "lib1" - }, - "test.fgumi.unmapped.bam:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "1": [ - [ - "FGUMI_EXTRACT", - "fgumi", - "0.1.2" - ] - ], - "bam": [ - [ - { - "id": "test", - "samplename": "test", - "library": "lib1" - }, - "test.fgumi.unmapped.bam:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "versions_fgumi": [ - [ - "FGUMI_EXTRACT", - "fgumi", - "0.1.2" - ] - ] - } - ], - "meta": { - "nf-test": "0.9.2", - "nextflow": "25.10.4" - }, - "timestamp": "2026-04-02T15:59:46.342116682" - } -} \ No newline at end of file diff --git a/tests/modules/local/fgumi/filter/main.nf.test b/tests/modules/local/fgumi/filter/main.nf.test deleted file mode 100644 index 51c6f8c8..00000000 --- a/tests/modules/local/fgumi/filter/main.nf.test +++ /dev/null @@ -1,33 +0,0 @@ -nextflow_process { - - name "Test Process FGUMI_FILTER" - script "modules/local/fgumi/filter/main.nf" - process "FGUMI_FILTER" - - tag "modules" - tag "modules/local" - tag "modules/local/fgumi" - tag "modules/local/fgumi/filter" - - test("test - stub") { - // Stub-mode contract test: verifies filtered BAM and metrics outputs. - options "-stub" - - when { - process { - """ - input[0] = [ - [id: "test"], - file("https://github.com/nf-cmgg/test-datasets/raw/main/data/genomics/homo_sapiens/illumina/bam/consensus.bam", checkIfExists: true), - file("https://github.com/nf-cmgg/test-datasets/raw/preprocessing/data/genomics/homo_sapiens/genome/seq/GCA_000001405.15_GRCh38_full_plus_hs38d1_analysis_set_chr21.fna", checkIfExists: true) - ] - """ - } - } - - then { - assert process.success - assert snapshot(process.out).match() - } - } -} diff --git a/tests/modules/local/fgumi/filter/main.nf.test.snap b/tests/modules/local/fgumi/filter/main.nf.test.snap deleted file mode 100644 index 7f59c001..00000000 --- a/tests/modules/local/fgumi/filter/main.nf.test.snap +++ /dev/null @@ -1,59 +0,0 @@ -{ - "test - stub": { - "content": [ - { - "0": [ - [ - { - "id": "test" - }, - "test.fgumi.filter.filtered.bam:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "1": [ - [ - { - "id": "test" - }, - "test.fgumi.filter.filtering_metrics.txt:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "2": [ - [ - "FGUMI_FILTER", - "fgumi", - "0.1.2" - ] - ], - "bam": [ - [ - { - "id": "test" - }, - "test.fgumi.filter.filtered.bam:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "filtering_metrics": [ - [ - { - "id": "test" - }, - "test.fgumi.filter.filtering_metrics.txt:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "versions_fgumi": [ - [ - "FGUMI_FILTER", - "fgumi", - "0.1.2" - ] - ] - } - ], - "meta": { - "nf-test": "0.9.2", - "nextflow": "25.10.4" - }, - "timestamp": "2026-04-08T10:07:36.721154372" - } -} \ No newline at end of file diff --git a/tests/modules/local/fgumi/group/main.nf.test b/tests/modules/local/fgumi/group/main.nf.test deleted file mode 100644 index cd54ae2f..00000000 --- a/tests/modules/local/fgumi/group/main.nf.test +++ /dev/null @@ -1,32 +0,0 @@ -nextflow_process { - - name "Test Process FGUMI_GROUP" - script "modules/local/fgumi/group/main.nf" - process "FGUMI_GROUP" - - tag "modules" - tag "modules/local" - tag "modules/local/fgumi" - tag "modules/local/fgumi/group" - - test("test - stub") { - // Stub-mode contract test: verifies grouping and family-size outputs. - options "-stub" - - when { - process { - """ - input[0] = [ - [id: "test"], - file("https://github.com/nf-cmgg/test-datasets/raw/main/data/genomics/homo_sapiens/illumina/bam/template.bam", checkIfExists: true) - ] - """ - } - } - - then { - assert process.success - assert snapshot(process.out).match() - } - } -} diff --git a/tests/modules/local/fgumi/group/main.nf.test.snap b/tests/modules/local/fgumi/group/main.nf.test.snap deleted file mode 100644 index d242fb4a..00000000 --- a/tests/modules/local/fgumi/group/main.nf.test.snap +++ /dev/null @@ -1,75 +0,0 @@ -{ - "test - stub": { - "content": [ - { - "0": [ - [ - { - "id": "test" - }, - "test.fgumi.group.bam:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "1": [ - [ - { - "id": "test" - }, - "test.fgumi.group.grouping_metrics.txt:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "2": [ - [ - { - "id": "test" - }, - "test.fgumi.group.family_size_histogram.txt:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "3": [ - [ - "FGUMI_GROUP", - "fgumi", - "0.1.2" - ] - ], - "bam": [ - [ - { - "id": "test" - }, - "test.fgumi.group.bam:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "family_size_histogram": [ - [ - { - "id": "test" - }, - "test.fgumi.group.family_size_histogram.txt:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "grouping_metrics": [ - [ - { - "id": "test" - }, - "test.fgumi.group.grouping_metrics.txt:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "versions_fgumi": [ - [ - "FGUMI_GROUP", - "fgumi", - "0.1.2" - ] - ] - } - ], - "meta": { - "nf-test": "0.9.2", - "nextflow": "25.10.4" - }, - "timestamp": "2026-04-02T16:05:08.118138918" - } -} \ No newline at end of file diff --git a/tests/modules/local/fgumi/simplex/main.nf.test b/tests/modules/local/fgumi/simplex/main.nf.test deleted file mode 100644 index 9ff5c017..00000000 --- a/tests/modules/local/fgumi/simplex/main.nf.test +++ /dev/null @@ -1,32 +0,0 @@ -nextflow_process { - - name "Test Process FGUMI_SIMPLEX" - script "modules/local/fgumi/simplex/main.nf" - process "FGUMI_SIMPLEX" - - tag "modules" - tag "modules/local" - tag "modules/local/fgumi" - tag "modules/local/fgumi/simplex" - - test("test - stub") { - // Stub-mode contract test: verifies simplex BAM and consensus metrics. - options "-stub" - - when { - process { - """ - input[0] = [ - [id: "test"], - file("https://github.com/nf-cmgg/test-datasets/raw/main/data/genomics/homo_sapiens/illumina/bam/grouped.bam", checkIfExists: true) - ] - """ - } - } - - then { - assert process.success - assert snapshot(process.out).match() - } - } -} diff --git a/tests/modules/local/fgumi/simplex/main.nf.test.snap b/tests/modules/local/fgumi/simplex/main.nf.test.snap deleted file mode 100644 index 14ecba3d..00000000 --- a/tests/modules/local/fgumi/simplex/main.nf.test.snap +++ /dev/null @@ -1,59 +0,0 @@ -{ - "test - stub": { - "content": [ - { - "0": [ - [ - { - "id": "test" - }, - "test.fgumi.simplex.bam:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "1": [ - [ - { - "id": "test" - }, - "test.fgumi.simplex.consensus_metrics.txt:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "2": [ - [ - "FGUMI_SIMPLEX", - "fgumi", - "0.1.2" - ] - ], - "bam": [ - [ - { - "id": "test" - }, - "test.fgumi.simplex.bam:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "consensus_metrics": [ - [ - { - "id": "test" - }, - "test.fgumi.simplex.consensus_metrics.txt:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "versions_fgumi": [ - [ - "FGUMI_SIMPLEX", - "fgumi", - "0.1.2" - ] - ] - } - ], - "meta": { - "nf-test": "0.9.2", - "nextflow": "25.10.4" - }, - "timestamp": "2026-04-02T16:05:15.268242503" - } -} \ No newline at end of file diff --git a/tests/modules/local/fgumi/sort/main.nf.test b/tests/modules/local/fgumi/sort/main.nf.test deleted file mode 100644 index 380727d0..00000000 --- a/tests/modules/local/fgumi/sort/main.nf.test +++ /dev/null @@ -1,33 +0,0 @@ -nextflow_process { - - name "Test Process FGUMI_SORT" - script "modules/local/fgumi/sort/main.nf" - process "FGUMI_SORT" - - tag "modules" - tag "modules/local" - tag "modules/local/fgumi" - tag "modules/local/fgumi/sort" - - test("test - stub") { - // Stub-mode contract test: verifies coordinate-sorted BAM/index outputs. - options "-stub" - - when { - process { - """ - input[0] = [ - [id: "test"], - file("https://github.com/nf-cmgg/test-datasets/raw/main/data/genomics/homo_sapiens/illumina/bam/consensus.bam", checkIfExists: true) - ] - """ - } - } - - then { - assert process.success - assert snapshot(process.out).match() - } - } - -} diff --git a/tests/modules/local/fgumi/sort/main.nf.test.snap b/tests/modules/local/fgumi/sort/main.nf.test.snap deleted file mode 100644 index c597cb17..00000000 --- a/tests/modules/local/fgumi/sort/main.nf.test.snap +++ /dev/null @@ -1,59 +0,0 @@ -{ - "test - stub": { - "content": [ - { - "0": [ - [ - { - "id": "test" - }, - "test.fgumi.filter.bam:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "1": [ - [ - { - "id": "test" - }, - "test.fgumi.filter.bam.bai:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "2": [ - [ - "FGUMI_SORT", - "fgumi", - "0.1.2" - ] - ], - "bai": [ - [ - { - "id": "test" - }, - "test.fgumi.filter.bam.bai:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "bam": [ - [ - { - "id": "test" - }, - "test.fgumi.filter.bam:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "versions_fgumi": [ - [ - "FGUMI_SORT", - "fgumi", - "0.1.2" - ] - ] - } - ], - "meta": { - "nf-test": "0.9.2", - "nextflow": "25.10.4" - }, - "timestamp": "2026-04-08T10:07:49.850827528" - } -} \ No newline at end of file From fa6e6902e417f0a08b1da344c1327c516c38bc92 Mon Sep 17 00:00:00 2001 From: freerkvandijk Date: Wed, 17 Jun 2026 16:30:24 +0200 Subject: [PATCH 31/36] Updated fgumi to version 0.3.0 --- modules/nf-core/fgumi/extract/main.nf | 4 ++-- modules/nf-core/fgumi/filter/main.nf | 4 ++-- modules/nf-core/fgumi/group/main.nf | 4 ++-- modules/nf-core/fgumi/sort/main.nf | 4 ++-- 4 files changed, 8 insertions(+), 8 deletions(-) diff --git a/modules/nf-core/fgumi/extract/main.nf b/modules/nf-core/fgumi/extract/main.nf index fb17d5ad..374b1a1b 100644 --- a/modules/nf-core/fgumi/extract/main.nf +++ b/modules/nf-core/fgumi/extract/main.nf @@ -4,8 +4,8 @@ process FGUMI_EXTRACT { conda "${moduleDir}/environment.yml" container "${workflow.containerEngine in ['singularity', 'apptainer'] && !task.ext.singularity_pull_docker_container - ? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/a5/a510706f3481fae12ff6100d6e4ad298b8bf464a2d93a6afe35e9cf26542d080/data' - : 'community.wave.seqera.io/library/fgumi:0.2.0--fe028e7a64e5da27'}" + ? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/394ef34259ba03c393d25ce2559530fe6df2c6f125a27be69da89ca7a1c70e30/data' : + 'community.wave.seqera.io/library/fgumi:0.3.0--bcbb552cbefcbda1' }" input: tuple val(meta), path(reads), val(library) diff --git a/modules/nf-core/fgumi/filter/main.nf b/modules/nf-core/fgumi/filter/main.nf index 5f449c34..01d68133 100644 --- a/modules/nf-core/fgumi/filter/main.nf +++ b/modules/nf-core/fgumi/filter/main.nf @@ -4,8 +4,8 @@ process FGUMI_FILTER { conda "${moduleDir}/environment.yml" container "${workflow.containerEngine in ['singularity', 'apptainer'] && !task.ext.singularity_pull_docker_container - ? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/a5/a510706f3481fae12ff6100d6e4ad298b8bf464a2d93a6afe35e9cf26542d080/data' - : 'community.wave.seqera.io/library/fgumi:0.2.0--fe028e7a64e5da27'}" + ? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/394ef34259ba03c393d25ce2559530fe6df2c6f125a27be69da89ca7a1c70e30/data' : + 'community.wave.seqera.io/library/fgumi:0.3.0--bcbb552cbefcbda1' }" input: tuple val(meta), path(bam) diff --git a/modules/nf-core/fgumi/group/main.nf b/modules/nf-core/fgumi/group/main.nf index 0c40b671..44876751 100644 --- a/modules/nf-core/fgumi/group/main.nf +++ b/modules/nf-core/fgumi/group/main.nf @@ -4,8 +4,8 @@ process FGUMI_GROUP { conda "${moduleDir}/environment.yml" container "${workflow.containerEngine in ['singularity', 'apptainer'] && !task.ext.singularity_pull_docker_container - ? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/a5/a510706f3481fae12ff6100d6e4ad298b8bf464a2d93a6afe35e9cf26542d080/data' - : 'community.wave.seqera.io/library/fgumi:0.2.0--fe028e7a64e5da27'}" + ? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/394ef34259ba03c393d25ce2559530fe6df2c6f125a27be69da89ca7a1c70e30/data' : + 'community.wave.seqera.io/library/fgumi:0.3.0--bcbb552cbefcbda1' }" input: tuple val(meta), path(bam) diff --git a/modules/nf-core/fgumi/sort/main.nf b/modules/nf-core/fgumi/sort/main.nf index 6255810a..00679c9f 100644 --- a/modules/nf-core/fgumi/sort/main.nf +++ b/modules/nf-core/fgumi/sort/main.nf @@ -4,8 +4,8 @@ process FGUMI_SORT { conda "${moduleDir}/environment.yml" container "${workflow.containerEngine in ['singularity', 'apptainer'] && !task.ext.singularity_pull_docker_container - ? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/a5/a510706f3481fae12ff6100d6e4ad298b8bf464a2d93a6afe35e9cf26542d080/data' - : 'community.wave.seqera.io/library/fgumi:0.2.0--fe028e7a64e5da27'}" + ? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/394ef34259ba03c393d25ce2559530fe6df2c6f125a27be69da89ca7a1c70e30/data' : + 'community.wave.seqera.io/library/fgumi:0.3.0--bcbb552cbefcbda1' }" input: tuple val(meta), path(bam) From 749de984c1321cece78e752e98fb85cb98353693 Mon Sep 17 00:00:00 2001 From: freerkvandijk Date: Wed, 17 Jun 2026 16:43:36 +0200 Subject: [PATCH 32/36] output fix --- subworkflows/local/umi_consensus/main.nf | 1 + 1 file changed, 1 insertion(+) diff --git a/subworkflows/local/umi_consensus/main.nf b/subworkflows/local/umi_consensus/main.nf index 078226cf..bef790e5 100644 --- a/subworkflows/local/umi_consensus/main.nf +++ b/subworkflows/local/umi_consensus/main.nf @@ -70,6 +70,7 @@ workflow UMI_CONSENSUS_FGUMI { emit: cram_crai = SAMTOOLS_SORT.out.cram.join(SAMTOOLS_SORT.out.crai, failOnMismatch: true, failOnDuplicate: true) + zipper_diagnostics = Channel.empty() grouping_metrics = FGUMI_GROUP.out.metrics family_size_histogram = FGUMI_GROUP.out.histogram consensus_metrics = FGUMI_SIMPLEX.out.stats From 15b59bf0b84e47a9dec9a0cf57c2fecba93191da Mon Sep 17 00:00:00 2001 From: freerkvandijk Date: Wed, 17 Jun 2026 17:49:49 +0200 Subject: [PATCH 33/36] Refactor snapzipper; sorting --- conf/modules.config | 38 +++++++++++- modules/local/fgumi/snapzipper/main.nf | 58 +++++++++++++++---- modules/local/fgumi/zipper/main.nf | 37 ++++++++++++ .../local/fgumi/snapzipper/main.nf.test | 2 +- .../local/fgumi/snapzipper/main.nf.test.snap | 24 +++++++- tests/modules/local/fgumi/zipper/main.nf.test | 34 +++++++++++ .../local/fgumi/zipper/main.nf.test.snap | 45 ++++++++++++++ 7 files changed, 220 insertions(+), 18 deletions(-) create mode 100644 modules/local/fgumi/zipper/main.nf create mode 100644 tests/modules/local/fgumi/zipper/main.nf.test create mode 100644 tests/modules/local/fgumi/zipper/main.nf.test.snap diff --git a/conf/modules.config b/conf/modules.config index 8c051a19..f8873de7 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -270,14 +270,46 @@ process { meta.readgroup ? "-R \"@RG\\t" + meta.readgroup.findResults { rg -> rg.value?.trim() ? "${rg.key}:${rg.value}" : null }.join("\\t") + "\"" : "", ].join(" ").trim() } - ext.args2 = { + } + + //// Queryname sort unmapped BAM before zipper (step 3b) + withName: '.*FGUMI_SNAP_ZIPPER:SAMTOOLS_QNAME_SORT_UNMAPPED' { + cpus = 16 + memory = 64.GB + ext.prefix = { "${meta.id}.fgumi.unmapped.queryname" } + ext.args = { + [ + "-n", + ].join(" ").trim() + } + } + + //// Queryname sort mapped stream before zipper (step 3c) + withName: '.*FGUMI_SNAP_ZIPPER:SAMTOOLS_QNAME_SORT_MAPPED' { + cpus = 16 + memory = 64.GB + ext.prefix = { "${meta.id}.fgumi.snap.queryname" } + ext.args = { + [ + "-n", + "--output-fmt sam", + ].join(" ").trim() + } + } + + //// FGUMI zipper between queryname-sorted streams (step 3d) + withName: '.*FGUMI_SNAP_ZIPPER:FGUMI_ZIPPER' { + cpus = 16 + memory = 64.GB + ext.prefix = { "${meta.id}.fgumi" } + ext.args = { [ "--threads ${task.cpus}", ].join(" ").trim() } } - //// FGUMI template-coordinate sort after zipper (step 3b) + //// FGUMI template-coordinate sort after zipper (step 3e) withName: '.*FGUMI_SNAP_ZIPPER:FGUMI_TEMPLATE_SORT' { cpus = 16 memory = 64.GB @@ -346,7 +378,7 @@ process { withName: '.*UMI_CONSENSUS_FGUMI:SAMTOOLS_SORT' { cpus = 8 memory = 32.GB - ext.prefix = { "${meta.id}.fgumi.filter" } + ext.prefix = { "${meta.id}.fgumi.filter.sorted" } ext.args = { [ "--write-index", diff --git a/modules/local/fgumi/snapzipper/main.nf b/modules/local/fgumi/snapzipper/main.nf index c36028ff..76b89991 100644 --- a/modules/local/fgumi/snapzipper/main.nf +++ b/modules/local/fgumi/snapzipper/main.nf @@ -1,4 +1,7 @@ include { FGUMI_SORT as FGUMI_TEMPLATE_SORT } from "../../../nf-core/fgumi/sort/main.nf" +include { FGUMI_ZIPPER } from "../zipper/main.nf" +include { SAMTOOLS_SORT as SAMTOOLS_QNAME_SORT_UNMAPPED } from "../../../nf-core/samtools/sort/main.nf" +include { SAMTOOLS_SORT as SAMTOOLS_QNAME_SORT_MAPPED } from "../../../nf-core/samtools/sort/main.nf" process FGUMI_SNAP_ZIPPER_RUN { tag "$meta.id" @@ -12,7 +15,8 @@ process FGUMI_SNAP_ZIPPER_RUN { tuple val(meta), path(unmapped_bam), path(index, stageAs: "index/*"), path(fasta), path(dict) output: - tuple val(meta), path("${prefix}.zipper.bam"), emit: bam + tuple val(meta), path("${prefix}.snap.bam"), emit: mapped_bam + tuple val(meta), path(unmapped_bam), emit: unmapped_bam tuple val("${task.process}"), val('fgumi'), eval("fgumi --version | sed 's/^fgumi //;q'"), topic: versions, emit: versions_fgumi when: @@ -20,7 +24,6 @@ process FGUMI_SNAP_ZIPPER_RUN { script: def snap_args = task.ext.args ?: '' - def zipper_args = task.ext.args2 ?: '' prefix = task.ext.prefix ?: "${meta.id}.fgumi" """ @@ -37,18 +40,18 @@ process FGUMI_SNAP_ZIPPER_RUN { ${snap_args} \ > ${prefix}.snap.sam - fgumi zipper \ - --unmapped ${unmapped_bam} \ - --reference ${fasta} \ - ${zipper_args} \ - --output ${prefix}.zipper.bam \ - < ${prefix}.snap.sam + samtools view \ + -@ ${task.cpus} \ + -b \ + -o ${prefix}.snap.bam \ + ${prefix}.snap.sam """ stub: prefix = task.ext.prefix ?: "${meta.id}.fgumi" """ - touch ${prefix}.zipper.bam + touch ${prefix}.snap.bam + touch ${unmapped_bam} """ } @@ -58,9 +61,42 @@ workflow FGUMI_SNAP_ZIPPER { main: FGUMI_SNAP_ZIPPER_RUN(ch_meta_unmapped_index_fasta_dict) - FGUMI_TEMPLATE_SORT(FGUMI_SNAP_ZIPPER_RUN.out.bam) + + // Queryname sort the unmapped BAM in parallel with mapped BAM sort. + SAMTOOLS_QNAME_SORT_UNMAPPED( + FGUMI_SNAP_ZIPPER_RUN.out.unmapped_bam + .join( + ch_meta_unmapped_index_fasta_dict.map { meta, _unmapped_bam, _index, fasta, _dict -> [meta, fasta] }, + by: 0, + ) + .map { meta, unmapped_bam, fasta -> [meta, unmapped_bam, fasta] }, + null + ) + + // Sort mapped alignments by queryname and emit SAM for zipper stdin. + SAMTOOLS_QNAME_SORT_MAPPED( + FGUMI_SNAP_ZIPPER_RUN.out.mapped_bam + .join( + ch_meta_unmapped_index_fasta_dict.map { meta, _unmapped_bam, _index, fasta, _dict -> [meta, fasta] }, + by: 0, + ) + .map { meta, mapped_bam, fasta -> [meta, mapped_bam, fasta] }, + null + ) + + FGUMI_ZIPPER( + SAMTOOLS_QNAME_SORT_MAPPED.out.sam + .join(SAMTOOLS_QNAME_SORT_UNMAPPED.out.bam, by: 0) + .join( + ch_meta_unmapped_index_fasta_dict.map { meta, _unmapped_bam, _index, fasta, dict -> [meta, fasta, dict] }, + by: 0, + ) + .map { meta, mapped_sam, unmapped_qname_bam, fasta, dict -> [meta, mapped_sam, unmapped_qname_bam, fasta, dict] } + ) + + FGUMI_TEMPLATE_SORT(FGUMI_ZIPPER.out.bam) emit: bam = FGUMI_TEMPLATE_SORT.out.bam - versions_fgumi = FGUMI_SNAP_ZIPPER_RUN.out.versions_fgumi.mix(FGUMI_TEMPLATE_SORT.out.versions_fgumi) + versions_fgumi = FGUMI_SNAP_ZIPPER_RUN.out.versions_fgumi.mix(FGUMI_ZIPPER.out.versions_fgumi).mix(FGUMI_TEMPLATE_SORT.out.versions_fgumi) } diff --git a/modules/local/fgumi/zipper/main.nf b/modules/local/fgumi/zipper/main.nf new file mode 100644 index 00000000..473e6ad3 --- /dev/null +++ b/modules/local/fgumi/zipper/main.nf @@ -0,0 +1,37 @@ +process FGUMI_ZIPPER { + tag "$meta.id" + label 'process_high' + + container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container + ? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/763a833519c23555be888065f492215f57344155106972e272a0f8df78c57659/data' + : 'community.wave.seqera.io/library/fgumi_samtools_snap-aligner:c985f9394623a414'}" + + input: + tuple val(meta), path(mapped_sam), path(unmapped_qname_bam), path(fasta), path(dict) + + output: + tuple val(meta), path("${prefix}.zipper.bam"), emit: bam + tuple val("${task.process}"), val('fgumi'), eval("fgumi --version | sed 's/^fgumi //;q'"), topic: versions, emit: versions_fgumi + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}.fgumi" + + """ + fgumi zipper \ + --unmapped ${unmapped_qname_bam} \ + --reference ${fasta} \ + ${args} \ + --output ${prefix}.zipper.bam \ + < ${mapped_sam} + """ + + stub: + prefix = task.ext.prefix ?: "${meta.id}.fgumi" + """ + touch ${prefix}.zipper.bam + """ +} diff --git a/tests/modules/local/fgumi/snapzipper/main.nf.test b/tests/modules/local/fgumi/snapzipper/main.nf.test index be4d31cd..904ed08c 100644 --- a/tests/modules/local/fgumi/snapzipper/main.nf.test +++ b/tests/modules/local/fgumi/snapzipper/main.nf.test @@ -10,7 +10,7 @@ nextflow_process { tag "modules/local/fgumi/snapzipper" test("test - stub") { - // Stub-mode contract test: verifies zipper BAM output wiring. + // Stub-mode contract test: verifies qname-sorted prep outputs. options "-stub" when { diff --git a/tests/modules/local/fgumi/snapzipper/main.nf.test.snap b/tests/modules/local/fgumi/snapzipper/main.nf.test.snap index 97745a48..bacb50b6 100644 --- a/tests/modules/local/fgumi/snapzipper/main.nf.test.snap +++ b/tests/modules/local/fgumi/snapzipper/main.nf.test.snap @@ -8,23 +8,41 @@ "id": "test", "samplename": "test" }, - "test.fgumi.zipper.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + "test.fgumi.snap.bam:md5,d41d8cd98f00b204e9800998ecf8427e" ] ], "1": [ + [ + { + "id": "test", + "samplename": "test" + }, + "unmapped.bam:md5,329308690118ca17867c78cc61a0fab0" + ] + ], + "2": [ [ "FGUMI_SNAP_ZIPPER_RUN", "fgumi", "0.1.2" ] ], - "bam": [ + "mapped_bam": [ + [ + { + "id": "test", + "samplename": "test" + }, + "test.fgumi.snap.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "unmapped_bam": [ [ { "id": "test", "samplename": "test" }, - "test.fgumi.zipper.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + "unmapped.bam:md5,329308690118ca17867c78cc61a0fab0" ] ], "versions_fgumi": [ diff --git a/tests/modules/local/fgumi/zipper/main.nf.test b/tests/modules/local/fgumi/zipper/main.nf.test new file mode 100644 index 00000000..0fe82acc --- /dev/null +++ b/tests/modules/local/fgumi/zipper/main.nf.test @@ -0,0 +1,34 @@ +nextflow_process { + + name "Test Process FGUMI_ZIPPER" + script "modules/local/fgumi/zipper/main.nf" + process "FGUMI_ZIPPER" + + tag "modules" + tag "modules/local" + tag "modules/local/fgumi" + tag "modules/local/fgumi/zipper" + + test("test - stub") { + options "-stub" + + when { + process { + """ + input[0] = [ + [id: "test", samplename: "test"], + file("https://github.com/nf-cmgg/test-datasets/raw/main/data/genomics/homo_sapiens/illumina/bam/unmapped.bam", checkIfExists: true), + file("https://github.com/nf-cmgg/test-datasets/raw/main/data/genomics/homo_sapiens/illumina/bam/unmapped.bam", checkIfExists: true), + file("https://github.com/nf-cmgg/test-datasets/raw/preprocessing/data/genomics/homo_sapiens/genome/seq/GCA_000001405.15_GRCh38_full_plus_hs38d1_analysis_set_chr21.fna", checkIfExists: true), + file("https://github.com/nf-cmgg/test-datasets/raw/preprocessing/data/genomics/homo_sapiens/genome/seq/GCA_000001405.15_GRCh38_full_plus_hs38d1_analysis_set_chr21.dict", checkIfExists: true) + ] + """ + } + } + + then { + assert process.success + assert snapshot(process.out).match() + } + } +} diff --git a/tests/modules/local/fgumi/zipper/main.nf.test.snap b/tests/modules/local/fgumi/zipper/main.nf.test.snap new file mode 100644 index 00000000..dd22e5d2 --- /dev/null +++ b/tests/modules/local/fgumi/zipper/main.nf.test.snap @@ -0,0 +1,45 @@ +{ + "test - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "samplename": "test" + }, + "test.fgumi.zipper.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + "FGUMI_ZIPPER", + "fgumi", + "0.1.2" + ] + ], + "bam": [ + [ + { + "id": "test", + "samplename": "test" + }, + "test.fgumi.zipper.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions_fgumi": [ + [ + "FGUMI_ZIPPER", + "fgumi", + "0.1.2" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.10.4" + }, + "timestamp": "2026-06-17T00:00:00.000000000" + } +} From cb512951b5104e90e3957d80b0e26966ede2d585 Mon Sep 17 00:00:00 2001 From: freerkvandijk Date: Fri, 19 Jun 2026 14:55:11 +0200 Subject: [PATCH 34/36] Some refactoring + tests added --- conf/modules.config | 2 +- modules/local/fgumi/snapalign/main.nf | 44 +++++ modules/local/fgumi/snapzipper/main.nf | 102 ----------- subworkflows/local/fgumi_snapzipper/main.nf | 54 ++++++ subworkflows/local/umi_consensus/main.nf | 4 +- tests/inputs/test.yml | 2 +- .../local/fgumi/snapalign/main.nf.test | 58 ++++++ .../local/fgumi/snapalign/main.nf.test.snap | 124 +++++++++++++ .../local/fgumi/snapzipper/main.nf.test | 35 ---- .../local/fgumi/snapzipper/main.nf.test.snap | 63 ------- .../local/umi_consensus/main.nf.test.snap | 168 +++++++++--------- workflows/preprocessing.nf | 1 - 12 files changed, 367 insertions(+), 290 deletions(-) create mode 100644 modules/local/fgumi/snapalign/main.nf delete mode 100644 modules/local/fgumi/snapzipper/main.nf create mode 100644 subworkflows/local/fgumi_snapzipper/main.nf create mode 100644 tests/modules/local/fgumi/snapalign/main.nf.test create mode 100644 tests/modules/local/fgumi/snapalign/main.nf.test.snap delete mode 100644 tests/modules/local/fgumi/snapzipper/main.nf.test delete mode 100644 tests/modules/local/fgumi/snapzipper/main.nf.test.snap diff --git a/conf/modules.config b/conf/modules.config index f8873de7..28ca1730 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -254,7 +254,7 @@ process { } //// FGUMI fastq | SNAP | zipper (step 3a) - withName: '.*FGUMI_SNAP_ZIPPER:FGUMI_SNAP_ZIPPER_RUN' { + withName: '.*FGUMI_SNAP_ZIPPER:FGUMI_SNAP_ALIGN' { cpus = 16 memory = 64.GB ext.prefix = { "${meta.id}.fgumi" } diff --git a/modules/local/fgumi/snapalign/main.nf b/modules/local/fgumi/snapalign/main.nf new file mode 100644 index 00000000..e79af27c --- /dev/null +++ b/modules/local/fgumi/snapalign/main.nf @@ -0,0 +1,44 @@ +process FGUMI_SNAP_ALIGN { + tag "$meta.id" + label 'process_high' + + container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container + ? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/763a833519c23555be888065f492215f57344155106972e272a0f8df78c57659/data' + : 'community.wave.seqera.io/library/fgumi_samtools_snap-aligner:c985f9394623a414'}" + + input: + tuple val(meta), path(unmapped_bam), path(index, stageAs: "index/*"), path(fasta), path(dict) + + output: + tuple val(meta), path("${prefix}.snap.bam"), emit: mapped_bam + tuple val(meta), path(unmapped_bam), emit: unmapped_bam + tuple val("${task.process}"), val('fgumi'), eval("fgumi --version | sed 's/^fgumi //;q'"), topic: versions, emit: versions_fgumi + + when: + task.ext.when == null || task.ext.when + + script: + def snap_args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}.fgumi" + + """ + INDEX_FILE=\$(find -L ./ -name "OverflowTable*" -print -quit) + [ -z "\$INDEX_FILE" ] && echo "Snap index files not found" 1>&2 && exit 1 + INDEX=\$(dirname "\$INDEX_FILE") + + fgumi fastq --input ${unmapped_bam} \ + | snap-aligner paired \ + \$INDEX \ + -pairedInterleavedFastq - \ + -o ${prefix}.snap.bam \ + -t ${task.cpus} \ + ${snap_args} + """ + + stub: + prefix = task.ext.prefix ?: "${meta.id}.fgumi" + """ + touch ${prefix}.snap.bam + touch ${unmapped_bam} + """ +} diff --git a/modules/local/fgumi/snapzipper/main.nf b/modules/local/fgumi/snapzipper/main.nf deleted file mode 100644 index 76b89991..00000000 --- a/modules/local/fgumi/snapzipper/main.nf +++ /dev/null @@ -1,102 +0,0 @@ -include { FGUMI_SORT as FGUMI_TEMPLATE_SORT } from "../../../nf-core/fgumi/sort/main.nf" -include { FGUMI_ZIPPER } from "../zipper/main.nf" -include { SAMTOOLS_SORT as SAMTOOLS_QNAME_SORT_UNMAPPED } from "../../../nf-core/samtools/sort/main.nf" -include { SAMTOOLS_SORT as SAMTOOLS_QNAME_SORT_MAPPED } from "../../../nf-core/samtools/sort/main.nf" - -process FGUMI_SNAP_ZIPPER_RUN { - tag "$meta.id" - label 'process_high' - - container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container - ? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/763a833519c23555be888065f492215f57344155106972e272a0f8df78c57659/data' - : 'community.wave.seqera.io/library/fgumi_samtools_snap-aligner:c985f9394623a414'}" - - input: - tuple val(meta), path(unmapped_bam), path(index, stageAs: "index/*"), path(fasta), path(dict) - - output: - tuple val(meta), path("${prefix}.snap.bam"), emit: mapped_bam - tuple val(meta), path(unmapped_bam), emit: unmapped_bam - tuple val("${task.process}"), val('fgumi'), eval("fgumi --version | sed 's/^fgumi //;q'"), topic: versions, emit: versions_fgumi - - when: - task.ext.when == null || task.ext.when - - script: - def snap_args = task.ext.args ?: '' - prefix = task.ext.prefix ?: "${meta.id}.fgumi" - - """ - INDEX_FILE=\$(find -L ./ -name "OverflowTable*" -print -quit) - [ -z "\$INDEX_FILE" ] && echo "Snap index files not found" 1>&2 && exit 1 - INDEX=\$(dirname "\$INDEX_FILE") - - fgumi fastq --input ${unmapped_bam} \ - | snap-aligner paired \ - \$INDEX \ - -pairedInterleavedFastq - \ - -o -sam - \ - -t ${task.cpus} \ - ${snap_args} \ - > ${prefix}.snap.sam - - samtools view \ - -@ ${task.cpus} \ - -b \ - -o ${prefix}.snap.bam \ - ${prefix}.snap.sam - """ - - stub: - prefix = task.ext.prefix ?: "${meta.id}.fgumi" - """ - touch ${prefix}.snap.bam - touch ${unmapped_bam} - """ -} - -workflow FGUMI_SNAP_ZIPPER { - take: - ch_meta_unmapped_index_fasta_dict - - main: - FGUMI_SNAP_ZIPPER_RUN(ch_meta_unmapped_index_fasta_dict) - - // Queryname sort the unmapped BAM in parallel with mapped BAM sort. - SAMTOOLS_QNAME_SORT_UNMAPPED( - FGUMI_SNAP_ZIPPER_RUN.out.unmapped_bam - .join( - ch_meta_unmapped_index_fasta_dict.map { meta, _unmapped_bam, _index, fasta, _dict -> [meta, fasta] }, - by: 0, - ) - .map { meta, unmapped_bam, fasta -> [meta, unmapped_bam, fasta] }, - null - ) - - // Sort mapped alignments by queryname and emit SAM for zipper stdin. - SAMTOOLS_QNAME_SORT_MAPPED( - FGUMI_SNAP_ZIPPER_RUN.out.mapped_bam - .join( - ch_meta_unmapped_index_fasta_dict.map { meta, _unmapped_bam, _index, fasta, _dict -> [meta, fasta] }, - by: 0, - ) - .map { meta, mapped_bam, fasta -> [meta, mapped_bam, fasta] }, - null - ) - - FGUMI_ZIPPER( - SAMTOOLS_QNAME_SORT_MAPPED.out.sam - .join(SAMTOOLS_QNAME_SORT_UNMAPPED.out.bam, by: 0) - .join( - ch_meta_unmapped_index_fasta_dict.map { meta, _unmapped_bam, _index, fasta, dict -> [meta, fasta, dict] }, - by: 0, - ) - .map { meta, mapped_sam, unmapped_qname_bam, fasta, dict -> [meta, mapped_sam, unmapped_qname_bam, fasta, dict] } - ) - - FGUMI_TEMPLATE_SORT(FGUMI_ZIPPER.out.bam) - - emit: - bam = FGUMI_TEMPLATE_SORT.out.bam - versions_fgumi = FGUMI_SNAP_ZIPPER_RUN.out.versions_fgumi.mix(FGUMI_ZIPPER.out.versions_fgumi).mix(FGUMI_TEMPLATE_SORT.out.versions_fgumi) -} diff --git a/subworkflows/local/fgumi_snapzipper/main.nf b/subworkflows/local/fgumi_snapzipper/main.nf new file mode 100644 index 00000000..efc96188 --- /dev/null +++ b/subworkflows/local/fgumi_snapzipper/main.nf @@ -0,0 +1,54 @@ +#!/usr/bin/env nextflow + +// MODULES +include { FGUMI_SNAP_ALIGN } from "../../../modules/local/fgumi/snapalign/main.nf" +include { FGUMI_ZIPPER } from "../../../modules/local/fgumi/zipper/main.nf" +include { FGUMI_SORT as FGUMI_TEMPLATE_SORT } from "../../../modules/nf-core/fgumi/sort/main.nf" +include { SAMTOOLS_SORT as SAMTOOLS_QNAME_SORT_UNMAPPED } from "../../../modules/nf-core/samtools/sort/main.nf" +include { SAMTOOLS_SORT as SAMTOOLS_QNAME_SORT_MAPPED } from "../../../modules/nf-core/samtools/sort/main.nf" + +workflow FGUMI_SNAP_ZIPPER { + take: + ch_meta_unmapped_index_fasta_dict + + main: + FGUMI_SNAP_ALIGN(ch_meta_unmapped_index_fasta_dict) + + // Queryname sort the unmapped BAM in parallel with mapped BAM sort. + SAMTOOLS_QNAME_SORT_UNMAPPED( + FGUMI_SNAP_ALIGN.out.unmapped_bam + .join( + ch_meta_unmapped_index_fasta_dict.map { meta, _unmapped_bam, _index, fasta, _dict -> [meta, fasta] }, + by: 0, + ) + .map { meta, unmapped_bam, fasta -> [meta, unmapped_bam, fasta] }, + '' + ) + + // Sort mapped alignments by queryname and emit SAM for zipper stdin. + SAMTOOLS_QNAME_SORT_MAPPED( + FGUMI_SNAP_ALIGN.out.mapped_bam + .join( + ch_meta_unmapped_index_fasta_dict.map { meta, _unmapped_bam, _index, fasta, _dict -> [meta, fasta] }, + by: 0, + ) + .map { meta, mapped_bam, fasta -> [meta, mapped_bam, fasta] }, + '' + ) + + FGUMI_ZIPPER( + SAMTOOLS_QNAME_SORT_MAPPED.out.sam + .join(SAMTOOLS_QNAME_SORT_UNMAPPED.out.bam, by: 0) + .join( + ch_meta_unmapped_index_fasta_dict.map { meta, _unmapped_bam, _index, fasta, dict -> [meta, fasta, dict] }, + by: 0, + ) + .map { meta, mapped_sam, unmapped_qname_bam, fasta, dict -> [meta, mapped_sam, unmapped_qname_bam, fasta, dict] } + ) + + FGUMI_TEMPLATE_SORT(FGUMI_ZIPPER.out.bam) + + emit: + bam = FGUMI_TEMPLATE_SORT.out.bam + versions_fgumi = FGUMI_SNAP_ALIGN.out.versions_fgumi.mix(FGUMI_ZIPPER.out.versions_fgumi).mix(FGUMI_TEMPLATE_SORT.out.versions_fgumi) +} diff --git a/subworkflows/local/umi_consensus/main.nf b/subworkflows/local/umi_consensus/main.nf index bef790e5..7f0c983d 100644 --- a/subworkflows/local/umi_consensus/main.nf +++ b/subworkflows/local/umi_consensus/main.nf @@ -5,7 +5,7 @@ include { FGUMI_EXTRACT } from "../../../modules/nf-core/fgumi/extract/ include { FGUMI_FILTER } from "../../../modules/nf-core/fgumi/filter/main.nf" include { FGUMI_GROUP } from "../../../modules/nf-core/fgumi/group/main.nf" include { FGUMI_SIMPLEX } from "../../../modules/nf-core/fgumi/simplex/main.nf" -include { FGUMI_SNAP_ZIPPER } from "../../../modules/local/fgumi/snapzipper/main.nf" +include { FGUMI_SNAP_ZIPPER } from "../fgumi_snapzipper/main.nf" include { SAMTOOLS_SORT } from "../../../modules/nf-core/samtools/sort/main.nf" // FUNCTIONS @@ -70,7 +70,7 @@ workflow UMI_CONSENSUS_FGUMI { emit: cram_crai = SAMTOOLS_SORT.out.cram.join(SAMTOOLS_SORT.out.crai, failOnMismatch: true, failOnDuplicate: true) - zipper_diagnostics = Channel.empty() + zipper_diagnostics = channel.empty() grouping_metrics = FGUMI_GROUP.out.metrics family_size_histogram = FGUMI_GROUP.out.histogram consensus_metrics = FGUMI_SIMPLEX.out.stats diff --git a/tests/inputs/test.yml b/tests/inputs/test.yml index c09566ea..9f2ca977 100644 --- a/tests/inputs/test.yml +++ b/tests/inputs/test.yml @@ -52,7 +52,7 @@ # UMI consensus (fgumi) inputs # Example DNA sample with fgumi_aware enabled for fgumi processing. - id: sample1 - samplename: sample1-chr21 + samplename: sample1_chr21 library: test_library organism: Homo sapiens tag: WES diff --git a/tests/modules/local/fgumi/snapalign/main.nf.test b/tests/modules/local/fgumi/snapalign/main.nf.test new file mode 100644 index 00000000..87c43d66 --- /dev/null +++ b/tests/modules/local/fgumi/snapalign/main.nf.test @@ -0,0 +1,58 @@ +nextflow_process { + + name "Test Process FGUMI_SNAP_ALIGN" + script "modules/local/fgumi/snapalign/main.nf" + process "FGUMI_SNAP_ALIGN" + + tag "modules" + tag "modules/local" + tag "modules/local/fgumi" + tag "modules/local/fgumi/snapalign" + + test("test - stub") { + // Stub-mode contract test: verifies qname-sorted prep outputs. + options "-stub" + + when { + process { + """ + input[0] = [ + [id: "test", samplename: "test"], + file("https://github.com/nf-cmgg/test-datasets/raw/main/data/genomics/homo_sapiens/illumina/bam/unmapped.bam", checkIfExists: true), + file("${projectDir}/tests/inputs/fgumi/snap_index", checkIfExists: true), + file("https://github.com/nf-cmgg/test-datasets/raw/preprocessing/data/genomics/homo_sapiens/genome/seq/GCA_000001405.15_GRCh38_full_plus_hs38d1_analysis_set_chr21.fna", checkIfExists: true), + file("https://github.com/nf-cmgg/test-datasets/raw/preprocessing/data/genomics/homo_sapiens/genome/seq/GCA_000001405.15_GRCh38_full_plus_hs38d1_analysis_set_chr21.dict", checkIfExists: true) + ] + """ + } + } + + then { + assert process.success + assert snapshot(process.out).match() + } + } + + test("test - stub alt meta") { + options "-stub" + + when { + process { + """ + input[0] = [ + [id: "sample_alt", samplename: "sample_alt"], + file("https://github.com/nf-cmgg/test-datasets/raw/main/data/genomics/homo_sapiens/illumina/bam/unmapped.bam", checkIfExists: true), + file("${projectDir}/tests/inputs/fgumi/snap_index", checkIfExists: true), + file("https://github.com/nf-cmgg/test-datasets/raw/preprocessing/data/genomics/homo_sapiens/genome/seq/GCA_000001405.15_GRCh38_full_plus_hs38d1_analysis_set_chr21.fna", checkIfExists: true), + file("https://github.com/nf-cmgg/test-datasets/raw/preprocessing/data/genomics/homo_sapiens/genome/seq/GCA_000001405.15_GRCh38_full_plus_hs38d1_analysis_set_chr21.dict", checkIfExists: true) + ] + """ + } + } + + then { + assert process.success + assert snapshot(process.out).match() + } + } +} diff --git a/tests/modules/local/fgumi/snapalign/main.nf.test.snap b/tests/modules/local/fgumi/snapalign/main.nf.test.snap new file mode 100644 index 00000000..e0d95bbd --- /dev/null +++ b/tests/modules/local/fgumi/snapalign/main.nf.test.snap @@ -0,0 +1,124 @@ +{ + "test - stub alt meta": { + "content": [ + { + "0": [ + [ + { + "id": "sample_alt", + "samplename": "sample_alt" + }, + "sample_alt.fgumi.snap.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + { + "id": "sample_alt", + "samplename": "sample_alt" + }, + "unmapped.bam:md5,329308690118ca17867c78cc61a0fab0" + ] + ], + "2": [ + [ + "FGUMI_SNAP_ALIGN", + "fgumi", + "0.1.2" + ] + ], + "mapped_bam": [ + [ + { + "id": "sample_alt", + "samplename": "sample_alt" + }, + "sample_alt.fgumi.snap.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "unmapped_bam": [ + [ + { + "id": "sample_alt", + "samplename": "sample_alt" + }, + "unmapped.bam:md5,329308690118ca17867c78cc61a0fab0" + ] + ], + "versions_fgumi": [ + [ + "FGUMI_SNAP_ALIGN", + "fgumi", + "0.1.2" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.10.4" + }, + "timestamp": "2026-06-19T14:48:47.467294268" + }, + "test - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "samplename": "test" + }, + "test.fgumi.snap.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + { + "id": "test", + "samplename": "test" + }, + "unmapped.bam:md5,329308690118ca17867c78cc61a0fab0" + ] + ], + "2": [ + [ + "FGUMI_SNAP_ALIGN", + "fgumi", + "0.1.2" + ] + ], + "mapped_bam": [ + [ + { + "id": "test", + "samplename": "test" + }, + "test.fgumi.snap.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "unmapped_bam": [ + [ + { + "id": "test", + "samplename": "test" + }, + "unmapped.bam:md5,329308690118ca17867c78cc61a0fab0" + ] + ], + "versions_fgumi": [ + [ + "FGUMI_SNAP_ALIGN", + "fgumi", + "0.1.2" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.10.4" + }, + "timestamp": "2026-04-02T16:05:22.614091027" + } +} \ No newline at end of file diff --git a/tests/modules/local/fgumi/snapzipper/main.nf.test b/tests/modules/local/fgumi/snapzipper/main.nf.test deleted file mode 100644 index 904ed08c..00000000 --- a/tests/modules/local/fgumi/snapzipper/main.nf.test +++ /dev/null @@ -1,35 +0,0 @@ -nextflow_process { - - name "Test Process FGUMI_SNAP_ZIPPER_RUN" - script "modules/local/fgumi/snapzipper/main.nf" - process "FGUMI_SNAP_ZIPPER_RUN" - - tag "modules" - tag "modules/local" - tag "modules/local/fgumi" - tag "modules/local/fgumi/snapzipper" - - test("test - stub") { - // Stub-mode contract test: verifies qname-sorted prep outputs. - options "-stub" - - when { - process { - """ - input[0] = [ - [id: "test", samplename: "test"], - file("https://github.com/nf-cmgg/test-datasets/raw/main/data/genomics/homo_sapiens/illumina/bam/unmapped.bam", checkIfExists: true), - file("${projectDir}/tests/inputs/fgumi/snap_index", checkIfExists: true), - file("https://github.com/nf-cmgg/test-datasets/raw/preprocessing/data/genomics/homo_sapiens/genome/seq/GCA_000001405.15_GRCh38_full_plus_hs38d1_analysis_set_chr21.fna", checkIfExists: true), - file("https://github.com/nf-cmgg/test-datasets/raw/preprocessing/data/genomics/homo_sapiens/genome/seq/GCA_000001405.15_GRCh38_full_plus_hs38d1_analysis_set_chr21.dict", checkIfExists: true) - ] - """ - } - } - - then { - assert process.success - assert snapshot(process.out).match() - } - } -} diff --git a/tests/modules/local/fgumi/snapzipper/main.nf.test.snap b/tests/modules/local/fgumi/snapzipper/main.nf.test.snap deleted file mode 100644 index bacb50b6..00000000 --- a/tests/modules/local/fgumi/snapzipper/main.nf.test.snap +++ /dev/null @@ -1,63 +0,0 @@ -{ - "test - stub": { - "content": [ - { - "0": [ - [ - { - "id": "test", - "samplename": "test" - }, - "test.fgumi.snap.bam:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "1": [ - [ - { - "id": "test", - "samplename": "test" - }, - "unmapped.bam:md5,329308690118ca17867c78cc61a0fab0" - ] - ], - "2": [ - [ - "FGUMI_SNAP_ZIPPER_RUN", - "fgumi", - "0.1.2" - ] - ], - "mapped_bam": [ - [ - { - "id": "test", - "samplename": "test" - }, - "test.fgumi.snap.bam:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "unmapped_bam": [ - [ - { - "id": "test", - "samplename": "test" - }, - "unmapped.bam:md5,329308690118ca17867c78cc61a0fab0" - ] - ], - "versions_fgumi": [ - [ - "FGUMI_SNAP_ZIPPER_RUN", - "fgumi", - "0.1.2" - ] - ] - } - ], - "meta": { - "nf-test": "0.9.2", - "nextflow": "25.10.4" - }, - "timestamp": "2026-04-02T16:05:22.614091027" - } -} \ No newline at end of file diff --git a/tests/subworkflows/local/umi_consensus/main.nf.test.snap b/tests/subworkflows/local/umi_consensus/main.nf.test.snap index 171cbd2c..cdb95716 100644 --- a/tests/subworkflows/local/umi_consensus/main.nf.test.snap +++ b/tests/subworkflows/local/umi_consensus/main.nf.test.snap @@ -12,16 +12,14 @@ "markdup": "bamsormadup", "fgumi_aware": true, "genome_data": { - "fasta": "ref.fa:md5,78724096432e1b2702881f0126c656f2", - "fai": "ref.fa.fai:md5,fdd09c27f9ecf4cc7e824fd8407b72b7", - "dict": "ref.dict:md5,759dea0d5194dec0cd4f3e02fadf6e83", - "snap": [ - "OverflowTable.txt:md5,178164f81917b8e87073295a635588de" - ] + "fasta": "s3://test-data/genomics/homo_sapiens/genome/seq/GCA_000001405.15_GRCh38_full_plus_hs38d1_analysis_set_chr21.fna", + "fai": "s3://test-data/genomics/homo_sapiens/genome/seq/GCA_000001405.15_GRCh38_full_plus_hs38d1_analysis_set_chr21.fna.fai", + "dict": "s3://test-data/genomics/homo_sapiens/genome/seq/GCA_000001405.15_GRCh38_full_plus_hs38d1_analysis_set_chr21.dict", + "snap": "s3://reference-data/genomes/Hsapiens/GRCh38/snapaligner/" } }, - "UMI_consensus1.fgumi.filter.bam:md5,d41d8cd98f00b204e9800998ecf8427e", - "UMI_consensus1.fgumi.filter.bam.bai:md5,d41d8cd98f00b204e9800998ecf8427e" + "UMI_consensus1.fgumi.filter.sorted.cram:md5,d41d8cd98f00b204e9800998ecf8427e", + "UMI_consensus1.fgumi.filter.sorted.cram.crai:md5,d41d8cd98f00b204e9800998ecf8427e" ] ], "1": [ @@ -34,12 +32,10 @@ "markdup": "bamsormadup", "fgumi_aware": true, "genome_data": { - "fasta": "ref.fa:md5,78724096432e1b2702881f0126c656f2", - "fai": "ref.fa.fai:md5,fdd09c27f9ecf4cc7e824fd8407b72b7", - "dict": "ref.dict:md5,759dea0d5194dec0cd4f3e02fadf6e83", - "snap": [ - "OverflowTable.txt:md5,178164f81917b8e87073295a635588de" - ] + "fasta": "s3://test-data/genomics/homo_sapiens/genome/seq/GCA_000001405.15_GRCh38_full_plus_hs38d1_analysis_set_chr21.fna", + "fai": "s3://test-data/genomics/homo_sapiens/genome/seq/GCA_000001405.15_GRCh38_full_plus_hs38d1_analysis_set_chr21.fna.fai", + "dict": "s3://test-data/genomics/homo_sapiens/genome/seq/GCA_000001405.15_GRCh38_full_plus_hs38d1_analysis_set_chr21.dict", + "snap": "s3://reference-data/genomes/Hsapiens/GRCh38/snapaligner/" } }, "UMI_consensus1.fgumi.group.grouping_metrics.txt:md5,d41d8cd98f00b204e9800998ecf8427e" @@ -55,15 +51,13 @@ "markdup": "bamsormadup", "fgumi_aware": true, "genome_data": { - "fasta": "ref.fa:md5,78724096432e1b2702881f0126c656f2", - "fai": "ref.fa.fai:md5,fdd09c27f9ecf4cc7e824fd8407b72b7", - "dict": "ref.dict:md5,759dea0d5194dec0cd4f3e02fadf6e83", - "snap": [ - "OverflowTable.txt:md5,178164f81917b8e87073295a635588de" - ] + "fasta": "s3://test-data/genomics/homo_sapiens/genome/seq/GCA_000001405.15_GRCh38_full_plus_hs38d1_analysis_set_chr21.fna", + "fai": "s3://test-data/genomics/homo_sapiens/genome/seq/GCA_000001405.15_GRCh38_full_plus_hs38d1_analysis_set_chr21.fna.fai", + "dict": "s3://test-data/genomics/homo_sapiens/genome/seq/GCA_000001405.15_GRCh38_full_plus_hs38d1_analysis_set_chr21.dict", + "snap": "s3://reference-data/genomes/Hsapiens/GRCh38/snapaligner/" } }, - "UMI_consensus1.fgumi.group.family_size_histogram.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + "UMI_consensus1.fgumi.group.grouping_metrics.txt:md5,d41d8cd98f00b204e9800998ecf8427e" ] ], "3": [ @@ -76,15 +70,13 @@ "markdup": "bamsormadup", "fgumi_aware": true, "genome_data": { - "fasta": "ref.fa:md5,78724096432e1b2702881f0126c656f2", - "fai": "ref.fa.fai:md5,fdd09c27f9ecf4cc7e824fd8407b72b7", - "dict": "ref.dict:md5,759dea0d5194dec0cd4f3e02fadf6e83", - "snap": [ - "OverflowTable.txt:md5,178164f81917b8e87073295a635588de" - ] + "fasta": "s3://test-data/genomics/homo_sapiens/genome/seq/GCA_000001405.15_GRCh38_full_plus_hs38d1_analysis_set_chr21.fna", + "fai": "s3://test-data/genomics/homo_sapiens/genome/seq/GCA_000001405.15_GRCh38_full_plus_hs38d1_analysis_set_chr21.fna.fai", + "dict": "s3://test-data/genomics/homo_sapiens/genome/seq/GCA_000001405.15_GRCh38_full_plus_hs38d1_analysis_set_chr21.dict", + "snap": "s3://reference-data/genomes/Hsapiens/GRCh38/snapaligner/" } }, - "UMI_consensus1.fgumi.simplex.consensus_metrics.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + "UMI_consensus1.fgumi.group.family_size_histogram.txt:md5,d41d8cd98f00b204e9800998ecf8427e" ] ], "4": [ @@ -97,15 +89,13 @@ "markdup": "bamsormadup", "fgumi_aware": true, "genome_data": { - "fasta": "ref.fa:md5,78724096432e1b2702881f0126c656f2", - "fai": "ref.fa.fai:md5,fdd09c27f9ecf4cc7e824fd8407b72b7", - "dict": "ref.dict:md5,759dea0d5194dec0cd4f3e02fadf6e83", - "snap": [ - "OverflowTable.txt:md5,178164f81917b8e87073295a635588de" - ] + "fasta": "s3://test-data/genomics/homo_sapiens/genome/seq/GCA_000001405.15_GRCh38_full_plus_hs38d1_analysis_set_chr21.fna", + "fai": "s3://test-data/genomics/homo_sapiens/genome/seq/GCA_000001405.15_GRCh38_full_plus_hs38d1_analysis_set_chr21.fna.fai", + "dict": "s3://test-data/genomics/homo_sapiens/genome/seq/GCA_000001405.15_GRCh38_full_plus_hs38d1_analysis_set_chr21.dict", + "snap": "s3://reference-data/genomes/Hsapiens/GRCh38/snapaligner/" } }, - "UMI_consensus1.fgumi.filter.filtering_metrics.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + "UMI_consensus1.fgumi.simplex.stats.txt:md5,d41d8cd98f00b204e9800998ecf8427e" ] ], "5": [ @@ -118,18 +108,16 @@ "markdup": "bamsormadup", "fgumi_aware": true, "genome_data": { - "fasta": "ref.fa:md5,78724096432e1b2702881f0126c656f2", - "fai": "ref.fa.fai:md5,fdd09c27f9ecf4cc7e824fd8407b72b7", - "dict": "ref.dict:md5,759dea0d5194dec0cd4f3e02fadf6e83", - "snap": [ - "OverflowTable.txt:md5,178164f81917b8e87073295a635588de" - ] + "fasta": "s3://test-data/genomics/homo_sapiens/genome/seq/GCA_000001405.15_GRCh38_full_plus_hs38d1_analysis_set_chr21.fna", + "fai": "s3://test-data/genomics/homo_sapiens/genome/seq/GCA_000001405.15_GRCh38_full_plus_hs38d1_analysis_set_chr21.fna.fai", + "dict": "s3://test-data/genomics/homo_sapiens/genome/seq/GCA_000001405.15_GRCh38_full_plus_hs38d1_analysis_set_chr21.dict", + "snap": "s3://reference-data/genomes/Hsapiens/GRCh38/snapaligner/" } }, - "UMI_consensus1.fgumi.filter.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + "UMI_consensus1.fgumi.filter.stats.txt:md5,d41d8cd98f00b204e9800998ecf8427e" ] ], - "cram_crai": [ + "6": [ [ { "id": "UMI_consensus1", @@ -139,16 +127,13 @@ "markdup": "bamsormadup", "fgumi_aware": true, "genome_data": { - "fasta": "ref.fa:md5,78724096432e1b2702881f0126c656f2", - "fai": "ref.fa.fai:md5,fdd09c27f9ecf4cc7e824fd8407b72b7", - "dict": "ref.dict:md5,759dea0d5194dec0cd4f3e02fadf6e83", - "snap": [ - "OverflowTable.txt:md5,178164f81917b8e87073295a635588de" - ] + "fasta": "s3://test-data/genomics/homo_sapiens/genome/seq/GCA_000001405.15_GRCh38_full_plus_hs38d1_analysis_set_chr21.fna", + "fai": "s3://test-data/genomics/homo_sapiens/genome/seq/GCA_000001405.15_GRCh38_full_plus_hs38d1_analysis_set_chr21.fna.fai", + "dict": "s3://test-data/genomics/homo_sapiens/genome/seq/GCA_000001405.15_GRCh38_full_plus_hs38d1_analysis_set_chr21.dict", + "snap": "s3://reference-data/genomes/Hsapiens/GRCh38/snapaligner/" } }, - "UMI_consensus1.fgumi.filter.cram:md5,d41d8cd98f00b204e9800998ecf8427e", - "UMI_consensus1.fgumi.filter.cram.crai:md5,d41d8cd98f00b204e9800998ecf8427e" + "UMI_consensus1.fgumi.filter.sorted.cram:md5,d41d8cd98f00b204e9800998ecf8427e" ] ], "consensus_metrics": [ @@ -161,15 +146,33 @@ "markdup": "bamsormadup", "fgumi_aware": true, "genome_data": { - "fasta": "ref.fa:md5,78724096432e1b2702881f0126c656f2", - "fai": "ref.fa.fai:md5,fdd09c27f9ecf4cc7e824fd8407b72b7", - "dict": "ref.dict:md5,759dea0d5194dec0cd4f3e02fadf6e83", - "snap": [ - "OverflowTable.txt:md5,178164f81917b8e87073295a635588de" - ] + "fasta": "s3://test-data/genomics/homo_sapiens/genome/seq/GCA_000001405.15_GRCh38_full_plus_hs38d1_analysis_set_chr21.fna", + "fai": "s3://test-data/genomics/homo_sapiens/genome/seq/GCA_000001405.15_GRCh38_full_plus_hs38d1_analysis_set_chr21.fna.fai", + "dict": "s3://test-data/genomics/homo_sapiens/genome/seq/GCA_000001405.15_GRCh38_full_plus_hs38d1_analysis_set_chr21.dict", + "snap": "s3://reference-data/genomes/Hsapiens/GRCh38/snapaligner/" } }, - "UMI_consensus1.fgumi.simplex.consensus_metrics.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + "UMI_consensus1.fgumi.simplex.stats.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "cram_crai": [ + [ + { + "id": "UMI_consensus1", + "samplename": "HT1080_chr20", + "single_end": false, + "sample_type": "DNA", + "markdup": "bamsormadup", + "fgumi_aware": true, + "genome_data": { + "fasta": "s3://test-data/genomics/homo_sapiens/genome/seq/GCA_000001405.15_GRCh38_full_plus_hs38d1_analysis_set_chr21.fna", + "fai": "s3://test-data/genomics/homo_sapiens/genome/seq/GCA_000001405.15_GRCh38_full_plus_hs38d1_analysis_set_chr21.fna.fai", + "dict": "s3://test-data/genomics/homo_sapiens/genome/seq/GCA_000001405.15_GRCh38_full_plus_hs38d1_analysis_set_chr21.dict", + "snap": "s3://reference-data/genomes/Hsapiens/GRCh38/snapaligner/" + } + }, + "UMI_consensus1.fgumi.filter.sorted.cram:md5,d41d8cd98f00b204e9800998ecf8427e", + "UMI_consensus1.fgumi.filter.sorted.cram.crai:md5,d41d8cd98f00b204e9800998ecf8427e" ] ], "family_size_histogram": [ @@ -182,12 +185,10 @@ "markdup": "bamsormadup", "fgumi_aware": true, "genome_data": { - "fasta": "ref.fa:md5,78724096432e1b2702881f0126c656f2", - "fai": "ref.fa.fai:md5,fdd09c27f9ecf4cc7e824fd8407b72b7", - "dict": "ref.dict:md5,759dea0d5194dec0cd4f3e02fadf6e83", - "snap": [ - "OverflowTable.txt:md5,178164f81917b8e87073295a635588de" - ] + "fasta": "s3://test-data/genomics/homo_sapiens/genome/seq/GCA_000001405.15_GRCh38_full_plus_hs38d1_analysis_set_chr21.fna", + "fai": "s3://test-data/genomics/homo_sapiens/genome/seq/GCA_000001405.15_GRCh38_full_plus_hs38d1_analysis_set_chr21.fna.fai", + "dict": "s3://test-data/genomics/homo_sapiens/genome/seq/GCA_000001405.15_GRCh38_full_plus_hs38d1_analysis_set_chr21.dict", + "snap": "s3://reference-data/genomes/Hsapiens/GRCh38/snapaligner/" } }, "UMI_consensus1.fgumi.group.family_size_histogram.txt:md5,d41d8cd98f00b204e9800998ecf8427e" @@ -203,15 +204,13 @@ "markdup": "bamsormadup", "fgumi_aware": true, "genome_data": { - "fasta": "ref.fa:md5,78724096432e1b2702881f0126c656f2", - "fai": "ref.fa.fai:md5,fdd09c27f9ecf4cc7e824fd8407b72b7", - "dict": "ref.dict:md5,759dea0d5194dec0cd4f3e02fadf6e83", - "snap": [ - "OverflowTable.txt:md5,178164f81917b8e87073295a635588de" - ] + "fasta": "s3://test-data/genomics/homo_sapiens/genome/seq/GCA_000001405.15_GRCh38_full_plus_hs38d1_analysis_set_chr21.fna", + "fai": "s3://test-data/genomics/homo_sapiens/genome/seq/GCA_000001405.15_GRCh38_full_plus_hs38d1_analysis_set_chr21.fna.fai", + "dict": "s3://test-data/genomics/homo_sapiens/genome/seq/GCA_000001405.15_GRCh38_full_plus_hs38d1_analysis_set_chr21.dict", + "snap": "s3://reference-data/genomes/Hsapiens/GRCh38/snapaligner/" } }, - "UMI_consensus1.fgumi.filter.cram:md5,d41d8cd98f00b204e9800998ecf8427e" + "UMI_consensus1.fgumi.filter.sorted.cram:md5,d41d8cd98f00b204e9800998ecf8427e" ] ], "filtering_metrics": [ @@ -224,15 +223,13 @@ "markdup": "bamsormadup", "fgumi_aware": true, "genome_data": { - "fasta": "ref.fa:md5,78724096432e1b2702881f0126c656f2", - "fai": "ref.fa.fai:md5,fdd09c27f9ecf4cc7e824fd8407b72b7", - "dict": "ref.dict:md5,759dea0d5194dec0cd4f3e02fadf6e83", - "snap": [ - "OverflowTable.txt:md5,178164f81917b8e87073295a635588de" - ] + "fasta": "s3://test-data/genomics/homo_sapiens/genome/seq/GCA_000001405.15_GRCh38_full_plus_hs38d1_analysis_set_chr21.fna", + "fai": "s3://test-data/genomics/homo_sapiens/genome/seq/GCA_000001405.15_GRCh38_full_plus_hs38d1_analysis_set_chr21.fna.fai", + "dict": "s3://test-data/genomics/homo_sapiens/genome/seq/GCA_000001405.15_GRCh38_full_plus_hs38d1_analysis_set_chr21.dict", + "snap": "s3://reference-data/genomes/Hsapiens/GRCh38/snapaligner/" } }, - "UMI_consensus1.fgumi.filter.filtering_metrics.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + "UMI_consensus1.fgumi.filter.stats.txt:md5,d41d8cd98f00b204e9800998ecf8427e" ] ], "grouping_metrics": [ @@ -245,16 +242,17 @@ "markdup": "bamsormadup", "fgumi_aware": true, "genome_data": { - "fasta": "ref.fa:md5,78724096432e1b2702881f0126c656f2", - "fai": "ref.fa.fai:md5,fdd09c27f9ecf4cc7e824fd8407b72b7", - "dict": "ref.dict:md5,759dea0d5194dec0cd4f3e02fadf6e83", - "snap": [ - "OverflowTable.txt:md5,178164f81917b8e87073295a635588de" - ] + "fasta": "s3://test-data/genomics/homo_sapiens/genome/seq/GCA_000001405.15_GRCh38_full_plus_hs38d1_analysis_set_chr21.fna", + "fai": "s3://test-data/genomics/homo_sapiens/genome/seq/GCA_000001405.15_GRCh38_full_plus_hs38d1_analysis_set_chr21.fna.fai", + "dict": "s3://test-data/genomics/homo_sapiens/genome/seq/GCA_000001405.15_GRCh38_full_plus_hs38d1_analysis_set_chr21.dict", + "snap": "s3://reference-data/genomes/Hsapiens/GRCh38/snapaligner/" } }, "UMI_consensus1.fgumi.group.grouping_metrics.txt:md5,d41d8cd98f00b204e9800998ecf8427e" ] + ], + "zipper_diagnostics": [ + ] } ], @@ -262,6 +260,6 @@ "nf-test": "0.9.2", "nextflow": "25.10.4" }, - "timestamp": "2026-04-09T10:19:55.500577314" + "timestamp": "2026-06-19T09:47:14.672939014" } } \ No newline at end of file diff --git a/workflows/preprocessing.nf b/workflows/preprocessing.nf index c9ae0092..fc8549f0 100644 --- a/workflows/preprocessing.nf +++ b/workflows/preprocessing.nf @@ -330,7 +330,6 @@ workflow PREPROCESSING { BAM_QC.out.samtools_idxstats, BAM_QC.out.picard_multiplemetrics, BAM_QC.out.picard_wgsmetrics, - BAM_QC.out.picard_wgsmetrics, BAM_QC.out.picard_hsmetrics, ) From 01acd2dff154bda4ccab5cd1f5efe3c0d2b20b5b Mon Sep 17 00:00:00 2001 From: freerkvandijk Date: Fri, 19 Jun 2026 15:03:00 +0200 Subject: [PATCH 35/36] Added snapalign tests --- tests/config/snapalign_custom_prefix.config | 5 ++ .../local/fgumi/snapalign/main.nf.test | 23 +++++++ .../local/fgumi/snapalign/main.nf.test.snap | 61 +++++++++++++++++++ 3 files changed, 89 insertions(+) create mode 100644 tests/config/snapalign_custom_prefix.config diff --git a/tests/config/snapalign_custom_prefix.config b/tests/config/snapalign_custom_prefix.config new file mode 100644 index 00000000..21d053f1 --- /dev/null +++ b/tests/config/snapalign_custom_prefix.config @@ -0,0 +1,5 @@ +process { + withName: 'FGUMI_SNAP_ALIGN' { + ext.prefix = 'snapcustom' + } +} diff --git a/tests/modules/local/fgumi/snapalign/main.nf.test b/tests/modules/local/fgumi/snapalign/main.nf.test index 87c43d66..c3f47619 100644 --- a/tests/modules/local/fgumi/snapalign/main.nf.test +++ b/tests/modules/local/fgumi/snapalign/main.nf.test @@ -55,4 +55,27 @@ nextflow_process { assert snapshot(process.out).match() } } + + test("test - stub custom prefix") { + options "-stub -c ${projectDir}/tests/config/snapalign_custom_prefix.config" + + when { + process { + """ + input[0] = [ + [id: "prefix_case", samplename: "prefix_case"], + file("https://github.com/nf-cmgg/test-datasets/raw/main/data/genomics/homo_sapiens/illumina/bam/unmapped.bam", checkIfExists: true), + file("${projectDir}/tests/inputs/fgumi/snap_index", checkIfExists: true), + file("https://github.com/nf-cmgg/test-datasets/raw/preprocessing/data/genomics/homo_sapiens/genome/seq/GCA_000001405.15_GRCh38_full_plus_hs38d1_analysis_set_chr21.fna", checkIfExists: true), + file("https://github.com/nf-cmgg/test-datasets/raw/preprocessing/data/genomics/homo_sapiens/genome/seq/GCA_000001405.15_GRCh38_full_plus_hs38d1_analysis_set_chr21.dict", checkIfExists: true) + ] + """ + } + } + + then { + assert process.success + assert snapshot(process.out).match() + } + } } diff --git a/tests/modules/local/fgumi/snapalign/main.nf.test.snap b/tests/modules/local/fgumi/snapalign/main.nf.test.snap index e0d95bbd..e9314c16 100644 --- a/tests/modules/local/fgumi/snapalign/main.nf.test.snap +++ b/tests/modules/local/fgumi/snapalign/main.nf.test.snap @@ -120,5 +120,66 @@ "nextflow": "25.10.4" }, "timestamp": "2026-04-02T16:05:22.614091027" + }, + "test - stub custom prefix": { + "content": [ + { + "0": [ + [ + { + "id": "prefix_case", + "samplename": "prefix_case" + }, + "snapcustom.snap.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + { + "id": "prefix_case", + "samplename": "prefix_case" + }, + "unmapped.bam:md5,329308690118ca17867c78cc61a0fab0" + ] + ], + "2": [ + [ + "FGUMI_SNAP_ALIGN", + "fgumi", + "0.1.2" + ] + ], + "mapped_bam": [ + [ + { + "id": "prefix_case", + "samplename": "prefix_case" + }, + "snapcustom.snap.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "unmapped_bam": [ + [ + { + "id": "prefix_case", + "samplename": "prefix_case" + }, + "unmapped.bam:md5,329308690118ca17867c78cc61a0fab0" + ] + ], + "versions_fgumi": [ + [ + "FGUMI_SNAP_ALIGN", + "fgumi", + "0.1.2" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.10.4" + }, + "timestamp": "2026-06-19T14:59:18.039374313" } } \ No newline at end of file From dcfcdcb82f63c9f8bb209e24a8e292b150c7fd04 Mon Sep 17 00:00:00 2001 From: freerkvandijk Date: Fri, 19 Jun 2026 16:07:04 +0200 Subject: [PATCH 36/36] Updated README and added inline code comments --- README.md | 2 ++ modules/local/fgumi/snapalign/main.nf | 1 + modules/local/fgumi/zipper/main.nf | 1 + subworkflows/local/umi_consensus/main.nf | 2 ++ 4 files changed, 6 insertions(+) diff --git a/README.md b/README.md index d6aca64a..93f08552 100644 --- a/README.md +++ b/README.md @@ -15,6 +15,7 @@ **nf-cmgg/preprocessing** is a bioinformatics pipeline that demultiplexes and aligns raw sequencing data. It also performs basic QC and coverage analysis. +The pipeline also includes an optional per-sample FGUMI consensus branch for UMI-aware DNA processing. The pipeline is built using Nextflow, a workflow tool to run tasks across multiple compute infrastructures in a very portable manner. It comes with docker containers making installation trivial and results highly reproducible. @@ -24,6 +25,7 @@ Steps include: - Run QC using [`MultiQC SAV`](https://github.com/MultiQC/MultiQC_SAV) - Read QC and trimming using [`fastp`](https://github.com/OpenGene/fastp) or [`falco`](https://github.com/smithlabcode/falco) - Alignment using either [`bwa`](https://github.com/lh3/bwa), [`bwa-mem2`](https://github.com/bwa-mem2/bwa-mem2), [`bowtie2`](https://github.com/BenLangmead/bowtie2), [`dragmap`](https://github.com/Illumina/DRAGMAP), [`snap`](https://github.com/amplab/snap) or [`strobe`](https://github.com/ksahlin/strobealign) for DNA-seq and [`STAR`](https://github.com/alexdobin/STAR) for RNA-seq +- Optional FGUMI consensus branch for `fgumi_aware` DNA samples (`extract -> snap/zipper -> group -> simplex -> filter -> sort`) - Duplicate marking using [`bamsormadup`](https://gitlab.com/german.tischler/biobambam2) or [`samtools markdup`](http://www.htslib.org/doc/samtools-markdup.html) - Coverage analysis using [`mosdepth`](https://github.com/brentp/mosdepth) and [`samtools coverage`](http://www.htslib.org/doc/samtools-coverage.html) - Alignment QC using [`samtools flagstat`](http://www.htslib.org/doc/samtools-flagstat.html), [`samtools stats`](http://www.htslib.org/doc/samtools-stats.html), [`samtools idxstats`](http://www.htslib.org/doc/samtools-idxstats.html) and [`picard CollectHsMetrics`](https://broadinstitute.github.io/picard/command-line-overview.html#CollectHsMetrics), [`picard CollectWgsMetrics`](https://broadinstitute.github.io/picard/command-line-overview.html#CollectWgsMetrics), [`picard CollectMultipleMetrics`](https://broadinstitute.github.io/picard/command-line-overview.html#CollectMultipleMetrics) diff --git a/modules/local/fgumi/snapalign/main.nf b/modules/local/fgumi/snapalign/main.nf index e79af27c..7520e846 100644 --- a/modules/local/fgumi/snapalign/main.nf +++ b/modules/local/fgumi/snapalign/main.nf @@ -22,6 +22,7 @@ process FGUMI_SNAP_ALIGN { prefix = task.ext.prefix ?: "${meta.id}.fgumi" """ + # SNAP index directory is resolved from staged index content. INDEX_FILE=\$(find -L ./ -name "OverflowTable*" -print -quit) [ -z "\$INDEX_FILE" ] && echo "Snap index files not found" 1>&2 && exit 1 INDEX=\$(dirname "\$INDEX_FILE") diff --git a/modules/local/fgumi/zipper/main.nf b/modules/local/fgumi/zipper/main.nf index 473e6ad3..ad25f337 100644 --- a/modules/local/fgumi/zipper/main.nf +++ b/modules/local/fgumi/zipper/main.nf @@ -21,6 +21,7 @@ process FGUMI_ZIPPER { prefix = task.ext.prefix ?: "${meta.id}.fgumi" """ + # mapped_sam and unmapped_qname_bam must be queryname-sorted in the same order. fgumi zipper \ --unmapped ${unmapped_qname_bam} \ --reference ${fasta} \ diff --git a/subworkflows/local/umi_consensus/main.nf b/subworkflows/local/umi_consensus/main.nf index 7f0c983d..2fa24401 100644 --- a/subworkflows/local/umi_consensus/main.nf +++ b/subworkflows/local/umi_consensus/main.nf @@ -16,6 +16,7 @@ workflow UMI_CONSENSUS_FGUMI { ch_meta_reads_aligner_index_fasta // channel: [mandatory] [meta, reads, aligner, index, fasta] main: + // Step numbers follow the fgumi basic workflow terminology (this path executes steps 1, 3, 4, 5, and 7). // Step 1: build an unmapped BAM with UMI tags from input FASTQ. FGUMI_EXTRACT( ch_meta_reads_aligner_index_fasta @@ -70,6 +71,7 @@ workflow UMI_CONSENSUS_FGUMI { emit: cram_crai = SAMTOOLS_SORT.out.cram.join(SAMTOOLS_SORT.out.crai, failOnMismatch: true, failOnDuplicate: true) + // Compatibility output kept for downstream interfaces; currently not produced by this branch. zipper_diagnostics = channel.empty() grouping_metrics = FGUMI_GROUP.out.metrics family_size_histogram = FGUMI_GROUP.out.histogram