nf-core · ningyuxin1999 · Sep 2, 2025 · Sep 2, 2025 · Sep 3, 2025 · Sep 3, 2025
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -19,6 +19,7 @@
 
 - Support modules with `exec:` blocks ([#3633](https://github.com/nf-core/tools/pull/3633))
 - feat: nf-core modules bump-version supports specifying the toolkit ([#3608](https://github.com/nf-core/tools/pull/3608))
+- Testing out Reftrace for regex replacement in modules ([#3745](https://github.com/nf-core/tools/pull/3745))
 
 ### Subworkflows
 

diff --git a/nf_core/components/nfcore_component.py b/nf_core/components/nfcore_component.py
@@ -7,6 +7,8 @@
 from pathlib import Path
 from typing import Any, Optional, Union
 
+from reftrace import Module
+
 log = logging.getLogger(__name__)
 
 
@@ -179,12 +181,11 @@ def _get_included_components_in_chained_tests(self, main_nf_test: Union[Path, st
         return included_components
 
     def _get_process_name(self):
-        with open(self.main_nf) as fh:
-            for line in fh:
-                if re.search(r"^\s*process\s*\w*\s*{", line):
-                    return re.search(r"^\s*process\s*(\w*)\s*{.*", line).group(1) or ""
-        return ""
-
+        try:
+            return Module.from_file(str(self.main_nf)).processes[0].name
+        except IndexError:
+            return ""
+
     def get_inputs_from_main_nf(self) -> None:
         """Collect all inputs from the main.nf file."""
         inputs: Any = []  # Can be 'list[list[dict[str, dict[str, str]]]]' or 'list[str]'

diff --git a/nf_core/modules/lint/main_nf.py b/nf_core/modules/lint/main_nf.py
@@ -268,8 +268,16 @@ def check_process_section(self, lines, registry, fix_version, progress_bar):
     else:
         self.failed.append(("process_capitals", "Process name is not in capital letters", self.main_nf))
 
-    # Check that process labels are correct
-    check_process_labels(self, lines)
+    # Check that process labels are correct using Reftrace
+    from reftrace import Module, ParseError
+
+    reftrace_mod = Module.from_file(str(self.main_nf))
+    if not isinstance(reftrace_mod, ParseError):
+        check_process_labels(self, reftrace_mod)
+    else:
+        self.failed.append(
+            ("process_standard_label", f"Failed to parse module with Reftrace: {reftrace_mod.error}", self.main_nf)
+        )
 
     # Deprecated enable_conda
     for i, raw_line in enumerate(lines):
@@ -448,7 +456,17 @@ def check_process_section(self, lines, registry, fix_version, progress_bar):
         return docker_tag == singularity_tag
 
 
-def check_process_labels(self, lines):
+def check_process_labels(self, mod):
+    """
+    Check process labels using Reftrace parsing.
+
+    This function validates that process labels conform to nf-core standards using
+    structured parsing via the Reftrace library.
+
+    Args:
+        self: ModuleLint object with passed/warned/failed lists and main_nf path
+        mod: Reftrace Module object containing parsed Nextflow processes
+    """
     correct_process_labels = [
         "process_single",
         "process_low",
@@ -457,26 +475,44 @@ def check_process_labels(self, lines):
         "process_long",
         "process_high_memory",
     ]
-    all_labels = [line.strip() for line in lines if line.lstrip().startswith("label ")]
+
+    # Defensive checks for Reftrace module structure
+    if not mod.processes:
+        self.warned.append(("process_standard_label", "No processes found in module", self.main_nf))
+        return
+
+    process = mod.processes[0]
+    if not process.labels:
+        self.warned.append(("process_standard_label", "No label found for process", self.main_nf))
+        return
+
+    # Extract label values from all label directives
+    all_labels = []
+    for label_directive in process.labels:
+        # Get the label value from Reftrace Label objects
+        if hasattr(label_directive, "value") and label_directive.value:
+            label_value = label_directive.value
+            all_labels.append(label_value)
     bad_labels = []
     good_labels = []
+    invalid_labels_count = 0
     if len(all_labels) > 0:
         for label in all_labels:
-            try:
-                label = re.match(r"^label\s+'?\"?([a-zA-Z0-9_-]+)'?\"?$", label).group(1)
-            except AttributeError:
+            if not label.replace("_", "").isalnum():
                 self.warned.append(
                     (
                         "process_standard_label",
                         f"Specified label appears to contain non-alphanumerics: {label}",
                         self.main_nf,
                     )
                 )
+                invalid_labels_count += 1
                 continue
             if label not in correct_process_labels:
                 bad_labels.append(label)
             else:
                 good_labels.append(label)
+
         if len(good_labels) > 1:
             self.warned.append(
                 (

diff --git a/requirements.txt b/requirements.txt
@@ -24,3 +24,4 @@ textual==5.1.1
 trogon
 pdiff
 ruamel.yaml
+reftrace
diff --git a/tests/modules/lint/test_lint_utils.py b/tests/modules/lint/test_lint_utils.py
@@ -1,3 +1,6 @@
+import os
+import tempfile
+
 import nf_core.modules.lint
 
 from ...test_modules import TestModules
@@ -12,7 +15,32 @@ def __init__(self):
         self.warned = []
         self.failed = []
 
-        self.main_nf = "main_nf"
+        # Create a temporary file with basic Nextflow process structure
+        # that Reftrace can parse
+        self._temp_file = tempfile.NamedTemporaryFile(mode="w", suffix=".nf", delete=False)
+        basic_process = """process TEST_PROCESS {
+    label 'process_high'
+
+    input:
+    path input_file
+
+    output:
+    path "output.txt"
+
+    script:
+    '''
+    echo "test" > output.txt
+    '''
+}
+"""
+        self._temp_file.write(basic_process)
+        self._temp_file.close()
+        self.main_nf = self._temp_file.name
+
+    def cleanup(self):
+        """Clean up the temporary file"""
+        if hasattr(self, "_temp_file") and os.path.exists(self._temp_file.name):
+            os.unlink(self._temp_file.name)
 
 
 class TestModulesLint(TestModules):
@@ -32,4 +60,4 @@ def test_mock_module_lint(self):
         assert isinstance(mock_lint.passed, list)
         assert isinstance(mock_lint.warned, list)
         assert isinstance(mock_lint.failed, list)
-        assert mock_lint.main_nf == "main_nf"
+        assert mock_lint.main_nf == mock_lint._temp_file.name
diff --git a/tests/modules/lint/test_main_nf.py b/tests/modules/lint/test_main_nf.py
@@ -1,4 +1,5 @@
 import pytest
+from reftrace import Module, ParseError
 
 import nf_core.modules.lint
 import nf_core.modules.patch
@@ -7,36 +8,104 @@
 from ...test_modules import TestModules
 from .test_lint_utils import MockModuleLint
 
+# @pytest.mark.parametrize(
+#     "content,passed,warned,failed",
+#     [
+#         # Valid process label
+#         ("label 'process_high'\ncpus 12", 1, 0, 0),
+#         # Non-alphanumeric characters in label
+#         ("label 'a:label:with:colons'\ncpus 12", 0, 2, 0),
+#         # Conflicting labels
+#         ("label 'process_high'\nlabel 'process_low'\ncpus 12", 0, 1, 0),
+#         # Duplicate labels
+#         ("label 'process_high'\nlabel 'process_high'\ncpus 12", 0, 2, 0),
+#         # Valid and non-standard labels
+#         ("label 'process_high'\nlabel 'process_extra_label'\ncpus 12", 1, 1, 0),
+#         # Non-standard label only
+#         ("label 'process_extra_label'\ncpus 12", 0, 2, 0),
+#         # Non-standard duplicates without quotes
+#         ("label process_extra_label\nlabel process_extra_label\ncpus 12", 0, 3, 0),
+#         # No label found
+#         ("cpus 12", 0, 1, 0),
+#     ],
+# )
+
 
 @pytest.mark.parametrize(
-    "content,passed,warned,failed",
+    "label_content,passed,warned,failed",
     [
         # Valid process label
-        ("label 'process_high'\ncpus 12", 1, 0, 0),
+        ("label 'process_high'", 1, 0, 0),
         # Non-alphanumeric characters in label
-        ("label 'a:label:with:colons'\ncpus 12", 0, 2, 0),
-        # Conflicting labels
-        ("label 'process_high'\nlabel 'process_low'\ncpus 12", 0, 1, 0),
+        ("label 'a:label:with:colons'", 0, 2, 0),
+        # Conflicting labels (multiple label lines)
+        ("label 'process_low'\nlabel 'process_high'", 0, 1, 0),
         # Duplicate labels
-        ("label 'process_high'\nlabel 'process_high'\ncpus 12", 0, 2, 0),
+        ("label 'process_high'\nlabel 'process_high'", 0, 2, 0),
         # Valid and non-standard labels
-        ("label 'process_high'\nlabel 'process_extra_label'\ncpus 12", 1, 1, 0),
+        ("label 'process_high'\nlabel 'process_extra_label'", 1, 1, 0),
         # Non-standard label only
-        ("label 'process_extra_label'\ncpus 12", 0, 2, 0),
-        # Non-standard duplicates without quotes
-        ("label process_extra_label\nlabel process_extra_label\ncpus 12", 0, 3, 0),
+        ("label 'process_extra_label'", 0, 2, 0),
+        # Duplicate non-standard labels
+        ("label 'process_extra_label'\nlabel 'process_extra_label'", 0, 3, 0),
         # No label found
-        ("cpus 12", 0, 1, 0),
+        ("cpus 2", 0, 1, 0),
     ],
 )
-def test_process_labels(content, passed, warned, failed):
+def test_process_labels(label_content, passed, warned, failed):
     """Test process label validation"""
-    mock_lint = MockModuleLint()
-    check_process_labels(mock_lint, content.splitlines())
+    # Create a temporary file with the specific label content
+    import os
+    import tempfile
+
+    # Create proper Nextflow content with the label
+    process_content = f"""process TEST_PROCESS {{
+    {label_content}
+
+    input:
+    path input_file
+
+    output:
+    path "output.txt"
+
+    script:
+    '''
+    echo "test" > output.txt
+    '''
+}}
+"""
+
+    temp_file = tempfile.NamedTemporaryFile(mode="w", suffix=".nf", delete=False)
+    temp_file.write(process_content)
+    temp_file.close()
+
+    try:
+        # Create MockModuleLint but override with our specific test file
+        mock_lint = MockModuleLint()
+        mock_lint.cleanup()  # Clean up the default temp file
+        mock_lint.main_nf = temp_file.name
+
+        # Parse with Reftrace
+        module = Module.from_file(temp_file.name)
+        assert not isinstance(module, ParseError), f"Failed to parse test file: {module}"
+        # Run the check_process_labels function
+        check_process_labels(mock_lint, module)
+
+        # Verify results
+        assert len(mock_lint.passed) == passed, (
+            f"Expected {passed} passed tests, got {len(mock_lint.passed)}: {mock_lint.passed}"
+        )
+        assert len(mock_lint.warned) == warned, (
+            f"Expected {warned} warned tests, got {len(mock_lint.warned)}: {mock_lint.warned}"
+        )
+        assert len(mock_lint.failed) == failed, (
+            f"Expected {failed} failed tests, got {len(mock_lint.failed)}: {mock_lint.failed}"
+        )
 
-    assert len(mock_lint.passed) == passed
-    assert len(mock_lint.warned) == warned
-    assert len(mock_lint.failed) == failed
+    finally:
+        # Clean up the temporary file
+        if os.path.exists(temp_file.name):
+            os.unlink(temp_file.name)
 
 
 @pytest.mark.parametrize(
-Original file line number
+Diff line change
@@ Expand Up / @@ -24,3 +24,4 @@ textual==5.1.1 @@
     trogon
     pdiff
     ruamel.yaml
+    reftrace