From f14d1456723af2db51f95b0fc03e156440adc928 Mon Sep 17 00:00:00 2001 From: mirpedrol Date: Tue, 21 Jan 2025 16:26:21 +0100 Subject: [PATCH 01/26] first implementation of retrieving module information of input and output and EDAM from bio.tools --- nf_core/components/components_utils.py | 72 +++++++++++++++++++++----- nf_core/components/create.py | 7 ++- nf_core/module-template/meta.yml | 45 +++++++++++++++- 3 files changed, 109 insertions(+), 15 deletions(-) diff --git a/nf_core/components/components_utils.py b/nf_core/components/components_utils.py index 23bf08bbd6..823e77b138 100644 --- a/nf_core/components/components_utils.py +++ b/nf_core/components/components_utils.py @@ -165,9 +165,9 @@ def get_components_to_install(subworkflow_dir: Union[str, Path]) -> Tuple[List[s return modules, subworkflows -def get_biotools_id(tool_name) -> str: +def get_biotools_response(tool_name: str) -> Optional[dict]: """ - Try to find a bio.tools ID for 'tool' + Try to get bio.tools information for 'tool' """ url = f"https://bio.tools/api/t/?q={tool_name}&format=json" try: @@ -176,16 +176,64 @@ def get_biotools_id(tool_name) -> str: response.raise_for_status() # Raise an error for bad status codes # Parse the JSON response data = response.json() + return data - # Iterate through the tools in the response to find the tool name - for tool in data["list"]: - if tool["name"].lower() == tool_name: - return tool["biotoolsCURIE"] + except requests.exceptions.RequestException as e: + log.warning(f"Could not find bio.tools information for '{tool_name}': {e}") + return None - # If the tool name was not found in the response - log.warning(f"Could not find a bio.tools ID for '{tool_name}'") - return "" - except requests.exceptions.RequestException as e: - log.warning(f"Could not find a bio.tools ID for '{tool_name}': {e}") - return "" +def get_biotools_id(data: dict, tool_name: str) -> str: + """ + Try to find a bio.tools ID for 'tool' + """ + # Iterate through the tools in the response to find the tool name + for tool in data["list"]: + if tool["name"].lower() == tool_name: + return tool["biotoolsCURIE"] + + # If the tool name was not found in the response + log.warning(f"Could not find a bio.tools ID for '{tool_name}'") + return "" + + +def get_channel_info_from_biotools(data: dict, tool_name: str) -> List[str]: + """ + Try to find input and output channels and the respective EDAM ontology terms + + Args: + data (dict): The bio.tools API response + tool_name (str): The name of the tool + """ + inputs = {} + outputs = {} + + def _iterate_input_output(type): + type_info = {} + if type in funct: + for element in funct[type]: + if "data" in element: + element_name = "_".join(element["data"]["term"].lower().split(" ")) + type_info[element_name] = [[element["data"]["uri"]], ""] + if "format" in element: + for format in element["format"]: + # Append the EDAM URI + type_info[element_name][0].append(format["uri"]) + # Append the EDAM term, getting the first word in case of complicated strings. i.e. "FASTA format" + type_info[element_name][1] += format["term"].lower().split(" ")[0] + "," + type_info[element_name][1] = type_info[element_name][1][:-1] # Remove the last comma + return type_info + + # Iterate through the tools in the response to find the tool name + for tool in data["list"]: + if tool["name"].lower() == tool_name: + if "function" in tool: + # Parese all tool functions + for funct in tool["function"]: + inputs.update(_iterate_input_output("input")) + outputs.update(_iterate_input_output("output")) + return inputs, outputs + + # If the tool name was not found in the response + log.warning(f"Could not find an EDAM ontology term for '{tool_name}'") + return [] diff --git a/nf_core/components/create.py b/nf_core/components/create.py index c781905618..c0b7b9157d 100644 --- a/nf_core/components/create.py +++ b/nf_core/components/create.py @@ -21,7 +21,7 @@ import nf_core import nf_core.utils from nf_core.components.components_command import ComponentCommand -from nf_core.components.components_utils import get_biotools_id +from nf_core.components.components_utils import get_biotools_id, get_biotools_response, get_channel_info_from_biotools from nf_core.pipelines.lint_utils import run_prettier_on_file log = logging.getLogger(__name__) @@ -152,7 +152,10 @@ def create(self) -> bool: # Try to find a bioconda package for 'component' self._get_bioconda_tool() # Try to find a biotools entry for 'component' - self.tool_identifier = get_biotools_id(self.component) + biotools_data = get_biotools_response(self.tool_conda_name) + self.tool_identifier = get_biotools_id(biotools_data, self.tool_conda_name) + # Obtain EDAM ontologies for inputs and outputs + self.inputs, self.outputs = get_channel_info_from_biotools(biotools_data, self.tool_conda_name) # Prompt for GitHub username self._get_username() diff --git a/nf_core/module-template/meta.yml b/nf_core/module-template/meta.yml index d9d1cc8ae8..9118972bc4 100644 --- a/nf_core/module-template/meta.yml +++ b/nf_core/module-template/meta.yml @@ -26,6 +26,26 @@ tools: ## TODO nf-core: Add a description of all of the variables used as input {% endif -%} input: + {% if inputs -%} + {% for input_name, ontologies in inputs.items() -%} + {% if has_meta %} + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + {% endif %} + - {{ input_name }}: + # TODO nf-core: Update the information obtained form bio.tools and make sure that it is correct + type: file + description: {{ input_name }} file + pattern: {{ "\"*.{" + ontologies[1] + "}\"" }} + ontologies: + {% for ontology in ontologies[0] -%} + - edam: "{{ ontology }}" + {% endfor -%} + {% endfor -%} + {% else -%} #{% if has_meta %} Only when we have meta - - meta: type: map @@ -45,14 +65,36 @@ input: - edam: "http://edamontology.org/format_25722" - edam: "http://edamontology.org/format_2573" - edam: "http://edamontology.org/format_3462" - {% else %} + {% else -%} - edam: "" {%- endif %} + {%- endif %} {% if not_empty_template -%} ## TODO nf-core: Add a description of all of the variables used as output {% endif -%} output: + {% if outputs -%} + {% for output_name, ontologies in outputs.items() -%} + - {{ output_name }}: + {% if has_meta -%} + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + {%- endif %} + - {{ output_name }}: + # TODO nf-core: Update the information obtained form bio.tools and make sure that it is correct + type: file + description: {{ output_name }} file + pattern: {{ "\"*.{" + ontologies[1] + "}\"" }} + ontologies: + {% for ontology in ontologies[0] -%} + - edam: "{{ ontology }}" + {% endfor -%} + {% endfor -%} + {% else -%} - {{ 'bam:' if not_empty_template else "output:" }} #{% if has_meta -%} Only when we have meta - meta: @@ -76,6 +118,7 @@ output: {% else -%} - edam: "" {%- endif %} + {%- endif %} - versions: - "versions.yml": type: file From 8f3d2dc2933e9ea1aa3b78e05660fd3738310048 Mon Sep 17 00:00:00 2001 From: mirpedrol Date: Tue, 21 Jan 2025 16:26:21 +0100 Subject: [PATCH 02/26] first implementation of retrieving module information of input and output and EDAM from bio.tools --- nf_core/components/components_utils.py | 24 ++++++++++++++++-------- nf_core/components/create.py | 15 ++++++++++----- nf_core/modules/lint/__init__.py | 7 +++---- 3 files changed, 29 insertions(+), 17 deletions(-) diff --git a/nf_core/components/components_utils.py b/nf_core/components/components_utils.py index 823e77b138..30b767bcfd 100644 --- a/nf_core/components/components_utils.py +++ b/nf_core/components/components_utils.py @@ -165,9 +165,11 @@ def get_components_to_install(subworkflow_dir: Union[str, Path]) -> Tuple[List[s return modules, subworkflows +def get_biotools_response(tool_name: str) -> Optional[dict]: def get_biotools_response(tool_name: str) -> Optional[dict]: """ Try to get bio.tools information for 'tool' + Try to get bio.tools information for 'tool' """ url = f"https://bio.tools/api/t/?q={tool_name}&format=json" try: @@ -197,7 +199,9 @@ def get_biotools_id(data: dict, tool_name: str) -> str: return "" -def get_channel_info_from_biotools(data: dict, tool_name: str) -> List[str]: +def get_channel_info_from_biotools( + data: dict, tool_name: str +) -> Optional[tuple[dict[str, Tuple[List[str], str]], dict[str, Tuple[List[str], str]]]]: """ Try to find input and output channels and the respective EDAM ontology terms @@ -208,20 +212,24 @@ def get_channel_info_from_biotools(data: dict, tool_name: str) -> List[str]: inputs = {} outputs = {} - def _iterate_input_output(type): - type_info = {} + def _iterate_input_output(type) -> dict[str, Tuple[List[str], str]]: + type_info: dict[str, Tuple[List[str], str]] = {} if type in funct: for element in funct[type]: if "data" in element: element_name = "_".join(element["data"]["term"].lower().split(" ")) - type_info[element_name] = [[element["data"]["uri"]], ""] + uris = [element["data"]["uri"]] + terms = "" if "format" in element: for format in element["format"]: # Append the EDAM URI - type_info[element_name][0].append(format["uri"]) + uris.append(format["uri"]) # Append the EDAM term, getting the first word in case of complicated strings. i.e. "FASTA format" - type_info[element_name][1] += format["term"].lower().split(" ")[0] + "," - type_info[element_name][1] = type_info[element_name][1][:-1] # Remove the last comma + terms = terms + format["term"].lower().split(" ")[0] + "," + type_info[element_name] = ( + uris, + terms[:-1], # Remove the last comma + ) return type_info # Iterate through the tools in the response to find the tool name @@ -236,4 +244,4 @@ def _iterate_input_output(type): # If the tool name was not found in the response log.warning(f"Could not find an EDAM ontology term for '{tool_name}'") - return [] + return None diff --git a/nf_core/components/create.py b/nf_core/components/create.py index c0b7b9157d..498afaa97c 100644 --- a/nf_core/components/create.py +++ b/nf_core/components/create.py @@ -22,6 +22,7 @@ import nf_core.utils from nf_core.components.components_command import ComponentCommand from nf_core.components.components_utils import get_biotools_id, get_biotools_response, get_channel_info_from_biotools +from nf_core.components.components_utils import get_biotools_id, get_biotools_response, get_channel_info_from_biotools from nf_core.pipelines.lint_utils import run_prettier_on_file log = logging.getLogger(__name__) @@ -151,11 +152,15 @@ def create(self) -> bool: if self.component_type == "modules": # Try to find a bioconda package for 'component' self._get_bioconda_tool() - # Try to find a biotools entry for 'component' - biotools_data = get_biotools_response(self.tool_conda_name) - self.tool_identifier = get_biotools_id(biotools_data, self.tool_conda_name) - # Obtain EDAM ontologies for inputs and outputs - self.inputs, self.outputs = get_channel_info_from_biotools(biotools_data, self.tool_conda_name) + if self.tool_conda_name: + # Try to find a biotools entry for 'component' + biotools_data = get_biotools_response(self.tool_conda_name) + if biotools_data: + self.tool_identifier = get_biotools_id(biotools_data, self.tool_conda_name) + # Obtain EDAM ontologies for inputs and outputs + channel_info = get_channel_info_from_biotools(biotools_data, self.tool_conda_name) + if channel_info: + self.inputs, self.outputs = channel_info # Prompt for GitHub username self._get_username() diff --git a/nf_core/modules/lint/__init__.py b/nf_core/modules/lint/__init__.py index 49012cff40..ad010cd994 100644 --- a/nf_core/modules/lint/__init__.py +++ b/nf_core/modules/lint/__init__.py @@ -20,7 +20,7 @@ import nf_core.components.nfcore_component import nf_core.modules.modules_utils import nf_core.utils -from nf_core.components.components_utils import get_biotools_id +from nf_core.components.components_utils import get_biotools_id, get_biotools_response from nf_core.components.lint import ComponentLint, LintExceptionError, LintResult from nf_core.components.nfcore_component import NFCoreComponent from nf_core.pipelines.lint_utils import console, run_prettier_on_file @@ -362,9 +362,8 @@ def update_meta_yml_file(self, mod): for i, tool in enumerate(corrected_meta_yml["tools"]): tool_name = list(tool.keys())[0] if "identifier" not in tool[tool_name]: - corrected_meta_yml["tools"][i][tool_name]["identifier"] = get_biotools_id( - mod.component_name if "/" not in mod.component_name else mod.component_name.split("/")[0] - ) + biotools_data = get_biotools_response(tool_name) + corrected_meta_yml["tools"][i][tool_name]["identifier"] = get_biotools_id(biotools_data, tool_name) with open(mod.meta_yml, "w") as fh: log.info(f"Updating {mod.meta_yml}") From 56f37f81bed6548abba2c856873f8fe6d603470c Mon Sep 17 00:00:00 2001 From: mirpedrol Date: Tue, 28 Jan 2025 15:54:16 +0100 Subject: [PATCH 03/26] run pre-commit --- nf_core/components/components_utils.py | 1 - nf_core/components/create.py | 1 - 2 files changed, 2 deletions(-) diff --git a/nf_core/components/components_utils.py b/nf_core/components/components_utils.py index 30b767bcfd..55dbfbf576 100644 --- a/nf_core/components/components_utils.py +++ b/nf_core/components/components_utils.py @@ -165,7 +165,6 @@ def get_components_to_install(subworkflow_dir: Union[str, Path]) -> Tuple[List[s return modules, subworkflows -def get_biotools_response(tool_name: str) -> Optional[dict]: def get_biotools_response(tool_name: str) -> Optional[dict]: """ Try to get bio.tools information for 'tool' diff --git a/nf_core/components/create.py b/nf_core/components/create.py index 498afaa97c..fe98c7c1ef 100644 --- a/nf_core/components/create.py +++ b/nf_core/components/create.py @@ -22,7 +22,6 @@ import nf_core.utils from nf_core.components.components_command import ComponentCommand from nf_core.components.components_utils import get_biotools_id, get_biotools_response, get_channel_info_from_biotools -from nf_core.components.components_utils import get_biotools_id, get_biotools_response, get_channel_info_from_biotools from nf_core.pipelines.lint_utils import run_prettier_on_file log = logging.getLogger(__name__) From 91c67f1cc2f416cf5da55ba20d33577d2b520371 Mon Sep 17 00:00:00 2001 From: mirpedrol Date: Tue, 28 Jan 2025 16:01:44 +0100 Subject: [PATCH 04/26] run prettier after creating the module --- nf_core/components/create.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/nf_core/components/create.py b/nf_core/components/create.py index fe98c7c1ef..dd5fc2290e 100644 --- a/nf_core/components/create.py +++ b/nf_core/components/create.py @@ -183,6 +183,8 @@ def create(self) -> bool: new_files = [str(path) for path in self.file_paths.values()] + run_prettier_on_file(new_files) + log.info("Created following files:\n " + "\n ".join(new_files)) return True From 38e7784b3e61dd7e0928aca1be88a13515dde043 Mon Sep 17 00:00:00 2001 From: mirpedrol Date: Tue, 28 Jan 2025 16:39:08 +0100 Subject: [PATCH 05/26] fix pytests --- nf_core/components/create.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/nf_core/components/create.py b/nf_core/components/create.py index dd5fc2290e..9f3dd3f969 100644 --- a/nf_core/components/create.py +++ b/nf_core/components/create.py @@ -151,15 +151,15 @@ def create(self) -> bool: if self.component_type == "modules": # Try to find a bioconda package for 'component' self._get_bioconda_tool() - if self.tool_conda_name: - # Try to find a biotools entry for 'component' - biotools_data = get_biotools_response(self.tool_conda_name) - if biotools_data: - self.tool_identifier = get_biotools_id(biotools_data, self.tool_conda_name) - # Obtain EDAM ontologies for inputs and outputs - channel_info = get_channel_info_from_biotools(biotools_data, self.tool_conda_name) - if channel_info: - self.inputs, self.outputs = channel_info + name = self.tool_conda_name if self.tool_conda_name else self.component + # Try to find a biotools entry for 'component' + biotools_data = get_biotools_response(name) + if biotools_data: + self.tool_identifier = get_biotools_id(biotools_data, name) + # Obtain EDAM ontologies for inputs and outputs + channel_info = get_channel_info_from_biotools(biotools_data, name) + if channel_info: + self.inputs, self.outputs = channel_info # Prompt for GitHub username self._get_username() From 3edceccc84d18cee1d4bc246b7fb9fcb6ff28b3c Mon Sep 17 00:00:00 2001 From: mirpedrol Date: Wed, 29 Jan 2025 11:14:11 +0100 Subject: [PATCH 06/26] fix typing --- nf_core/components/components_utils.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/nf_core/components/components_utils.py b/nf_core/components/components_utils.py index 55dbfbf576..26086bb163 100644 --- a/nf_core/components/components_utils.py +++ b/nf_core/components/components_utils.py @@ -200,7 +200,7 @@ def get_biotools_id(data: dict, tool_name: str) -> str: def get_channel_info_from_biotools( data: dict, tool_name: str -) -> Optional[tuple[dict[str, Tuple[List[str], str]], dict[str, Tuple[List[str], str]]]]: +) -> Optional[tuple[dict[str, tuple[list[str], str]], dict[str, tuple[list[str], str]]]]: """ Try to find input and output channels and the respective EDAM ontology terms @@ -211,8 +211,8 @@ def get_channel_info_from_biotools( inputs = {} outputs = {} - def _iterate_input_output(type) -> dict[str, Tuple[List[str], str]]: - type_info: dict[str, Tuple[List[str], str]] = {} + def _iterate_input_output(type) -> dict[str, tuple[list[str], str]]: + type_info: dict[str, tuple[list[str], str]] = {} if type in funct: for element in funct[type]: if "data" in element: From 135f5da0fb41799be66a9c8bdfe8bbc24f68b930 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=BAlia=20Mir=20Pedrol?= Date: Wed, 29 Jan 2025 11:03:17 +0000 Subject: [PATCH 07/26] more typing fixes --- nf_core/components/components_utils.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/nf_core/components/components_utils.py b/nf_core/components/components_utils.py index 26086bb163..d2e07caa1e 100644 --- a/nf_core/components/components_utils.py +++ b/nf_core/components/components_utils.py @@ -1,7 +1,7 @@ import logging import re from pathlib import Path -from typing import TYPE_CHECKING, List, Optional, Tuple, Union +from typing import TYPE_CHECKING, Dict, List, Optional, Tuple, Union import questionary import requests @@ -200,7 +200,7 @@ def get_biotools_id(data: dict, tool_name: str) -> str: def get_channel_info_from_biotools( data: dict, tool_name: str -) -> Optional[tuple[dict[str, tuple[list[str], str]], dict[str, tuple[list[str], str]]]]: +) -> Optional[Tuple[Dict[str, Tuple[List[str], str]], Dict[str, Tuple[List[str], str]]]]: """ Try to find input and output channels and the respective EDAM ontology terms @@ -211,8 +211,8 @@ def get_channel_info_from_biotools( inputs = {} outputs = {} - def _iterate_input_output(type) -> dict[str, tuple[list[str], str]]: - type_info: dict[str, tuple[list[str], str]] = {} + def _iterate_input_output(type) -> Dict[str, Tuple[List[str], str]]: + type_info = {} if type in funct: for element in funct[type]: if "data" in element: From 284bfa6e0449cd9e574724f9475218fb2f57ca76 Mon Sep 17 00:00:00 2001 From: mirpedrol Date: Wed, 29 Jan 2025 14:37:55 +0100 Subject: [PATCH 08/26] add pytests for components utils --- tests/components/__init__.py | 0 tests/components/test_components_utils.py | 55 +++++++++++++++++++++++ tests/test_components.py | 5 +++ tests/utils.py | 26 ++++++++++- 4 files changed, 85 insertions(+), 1 deletion(-) create mode 100644 tests/components/__init__.py create mode 100644 tests/components/test_components_utils.py diff --git a/tests/components/__init__.py b/tests/components/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/tests/components/test_components_utils.py b/tests/components/test_components_utils.py new file mode 100644 index 0000000000..dce9eb0e4d --- /dev/null +++ b/tests/components/test_components_utils.py @@ -0,0 +1,55 @@ +import responses + +import nf_core.components.components_utils + +from ..test_components import TestComponents +from ..utils import mock_biotools_api_calls + + +class TestTestComponentsUtils(TestComponents): + def test_get_biotools_id(self): + """Test getting the bio.tools ID for a tool""" + with responses.RequestsMock() as rsps: + mock_biotools_api_calls(rsps, "bpipe") + response = nf_core.components.components_utils.get_biotools_response("bpipe") + id = nf_core.components.components_utils.get_biotools_id(response, "bpipe") + assert id == "biotools:bpipe" + + def test_get_biotools_id_warn(self): + """Test getting the bio.tools ID for a tool and failing""" + with responses.RequestsMock() as rsps: + mock_biotools_api_calls(rsps, "bpipe") + response = nf_core.components.components_utils.get_biotools_response("bpipe") + nf_core.components.components_utils.get_biotools_id(response, "test") + assert "Could not find a bio.tools ID for 'test'" in self.caplog.text + + def test_get_biotools_ch_info(self): + """Test getting the bio.tools channel information for a tool""" + with responses.RequestsMock() as rsps: + mock_biotools_api_calls(rsps, "bpipe") + response = nf_core.components.components_utils.get_biotools_response("bpipe") + inputs, outputs = nf_core.components.components_utils.get_channel_info_from_biotools(response, "bpipe") + assert inputs == { + "raw_sequence": ( + [ + "http://edamontology.org/data_0848", + "http://edamontology.org/format_2182", + "http://edamontology.org/format_2573", + ], + "fastq-like,sam", + ) + } + assert outputs == { + "sequence_report": ( + ["http://edamontology.org/data_2955", "http://edamontology.org/format_2331"], + "html", + ) + } + + def test_get_biotools_ch_info_warn(self): + """Test getting the bio.tools channel information for a tool and failing""" + with responses.RequestsMock() as rsps: + mock_biotools_api_calls(rsps, "bpipe") + response = nf_core.components.components_utils.get_biotools_response("bpipe") + nf_core.components.components_utils.get_channel_info_from_biotools(response, "test") + assert "Could not find an EDAM ontology term for 'test'" in self.caplog.text diff --git a/tests/test_components.py b/tests/test_components.py index eaf999c3c3..b77046df2b 100644 --- a/tests/test_components.py +++ b/tests/test_components.py @@ -6,6 +6,7 @@ import unittest from pathlib import Path +import pytest from git.repo import Repo from .utils import GITLAB_NFTEST_BRANCH, GITLAB_URL @@ -32,6 +33,10 @@ def tearDown(self): if self.tmp_dir.is_dir(): shutil.rmtree(self.tmp_dir) + @pytest.fixture(autouse=True) + def _use_caplog(self, caplog): + self.caplog = caplog + ############################################ # Test of the individual components commands. # ############################################ diff --git a/tests/utils.py b/tests/utils.py index 4c1c620adb..b09e991318 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -102,7 +102,31 @@ def mock_biotools_api_calls(rsps: responses.RequestsMock, module: str) -> None: """Mock biotools api calls for module""" biotools_api_url = f"https://bio.tools/api/t/?q={module}&format=json" biotools_mock = { - "list": [{"name": "Bpipe", "biotoolsCURIE": "biotools:bpipe"}], + "list": [ + { + "name": "Bpipe", + "biotoolsCURIE": "biotools:bpipe", + "function": [ + { + "input": [ + { + "data": {"uri": "http://edamontology.org/data_0848", "term": "Raw sequence"}, + "format": [ + {"uri": "http://edamontology.org/format_2182", "term": "FASTQ-like format (text)"}, + {"uri": "http://edamontology.org/format_2573", "term": "SAM"}, + ], + } + ], + "output": [ + { + "data": {"uri": "http://edamontology.org/data_2955", "term": "Sequence report"}, + "format": [{"uri": "http://edamontology.org/format_2331", "term": "HTML"}], + } + ], + } + ], + } + ], } rsps.get(biotools_api_url, json=biotools_mock, status=200) From fbe857ea34113a9c59522e5ad32fc188fb74cc33 Mon Sep 17 00:00:00 2001 From: mirpedrol Date: Wed, 29 Jan 2025 16:11:18 +0100 Subject: [PATCH 09/26] restructure components tests --- tests/components/generate_snapshot.py | 143 ------------------ tests/components/snapshot_test.py | 40 ----- .../test_components_generate_snapshot.py | 140 +++++++++++++++++ .../test_components_snapshot_test.py | 41 +++++ tests/test_components.py | 18 --- 5 files changed, 181 insertions(+), 201 deletions(-) delete mode 100644 tests/components/generate_snapshot.py delete mode 100644 tests/components/snapshot_test.py create mode 100644 tests/components/test_components_generate_snapshot.py create mode 100644 tests/components/test_components_snapshot_test.py diff --git a/tests/components/generate_snapshot.py b/tests/components/generate_snapshot.py deleted file mode 100644 index a5a8eaba39..0000000000 --- a/tests/components/generate_snapshot.py +++ /dev/null @@ -1,143 +0,0 @@ -"""Test generate a snapshot""" - -import json -from pathlib import Path -from unittest.mock import MagicMock - -import pytest - -from nf_core.components.components_test import ComponentsTest -from nf_core.utils import set_wd - -from ..utils import GITLAB_NFTEST_BRANCH, GITLAB_URL - - -def test_generate_snapshot_module(self): - """Generate the snapshot for a module in nf-core/modules clone""" - with set_wd(self.nfcore_modules): - snap_generator = ComponentsTest( - component_type="modules", - component_name="fastqc", - no_prompts=True, - remote_url=GITLAB_URL, - branch=GITLAB_NFTEST_BRANCH, - ) - snap_generator.run() - - snap_path = Path("modules", "nf-core-test", "fastqc", "tests", "main.nf.test.snap") - assert snap_path.exists() - - with open(snap_path) as fh: - snap_content = json.load(fh) - assert "versions" in snap_content - assert "content" in snap_content["versions"] - assert "versions.yml:md5,e1cc25ca8af856014824abd842e93978" in snap_content["versions"]["content"][0] - - -def test_generate_snapshot_subworkflow(self): - """Generate the snapshot for a subworkflows in nf-core/modules clone""" - with set_wd(self.nfcore_modules): - snap_generator = ComponentsTest( - component_type="subworkflows", - component_name="bam_sort_stats_samtools", - no_prompts=True, - remote_url=GITLAB_URL, - branch=GITLAB_NFTEST_BRANCH, - ) - snap_generator.run() - - snap_path = Path("subworkflows", "nf-core-test", "bam_sort_stats_samtools", "tests", "main.nf.test.snap") - assert snap_path.exists() - - with open(snap_path) as fh: - snap_content = json.load(fh) - assert "test_bam_sort_stats_samtools_paired_end_flagstats" in snap_content - assert ( - "test.flagstat:md5,4f7ffd1e6a5e85524d443209ac97d783" - in snap_content["test_bam_sort_stats_samtools_paired_end_flagstats"]["content"][0][0] - ) - assert "test_bam_sort_stats_samtools_paired_end_idxstats" in snap_content - assert ( - "test.idxstats:md5,df60a8c8d6621100d05178c93fb053a2" - in snap_content["test_bam_sort_stats_samtools_paired_end_idxstats"]["content"][0][0] - ) - - -def test_generate_snapshot_once( - self, -): - """Generate the snapshot for a module in nf-core/modules clone only once""" - with set_wd(self.nfcore_modules): - snap_generator = ComponentsTest( - component_type="modules", - component_name="fastqc", - once=True, - no_prompts=True, - remote_url=GITLAB_URL, - branch=GITLAB_NFTEST_BRANCH, - ) - snap_generator.repo_type = "modules" - snap_generator.generate_snapshot = MagicMock() - snap_generator.run() - snap_generator.generate_snapshot.assert_called_once() - - -def test_update_snapshot_module(self): - """Update the snapshot of a module in nf-core/modules clone""" - - with set_wd(self.nfcore_modules): - snap_path = Path("modules", "nf-core-test", "bwa", "mem", "tests", "main.nf.test.snap") - with open(snap_path) as fh: - snap_content = json.load(fh) - original_timestamp = snap_content["Single-End"]["timestamp"] - # delete the timestamp in json - snap_content["Single-End"]["timestamp"] = "" - with open(snap_path, "w") as fh: - json.dump(snap_content, fh) - snap_generator = ComponentsTest( - component_type="modules", - component_name="bwa/mem", - no_prompts=True, - remote_url=GITLAB_URL, - branch=GITLAB_NFTEST_BRANCH, - update=True, - ) - snap_generator.run() - - with open(snap_path) as fh: - snap_content = json.load(fh) - assert "Single-End" in snap_content - assert snap_content["Single-End"]["timestamp"] != original_timestamp - - -def test_test_not_found(self): - """Generate the snapshot for a module in nf-core/modules clone which doesn't contain tests""" - with set_wd(self.nfcore_modules): - snap_generator = ComponentsTest( - component_type="modules", - component_name="fastp", - no_prompts=True, - remote_url=GITLAB_URL, - branch=GITLAB_NFTEST_BRANCH, - ) - test_file = Path("modules", "nf-core-test", "fastp", "tests", "main.nf.test") - test_file.rename(test_file.parent / "main.nf.test.bak") - with pytest.raises(UserWarning) as e: - snap_generator.run() - assert "Test file 'main.nf.test' not found" in str(e.value) - Path(test_file.parent / "main.nf.test.bak").rename(test_file) - - -def test_unstable_snapshot(self): - """Generate the snapshot for a module in nf-core/modules clone with unstable snapshots""" - with set_wd(self.nfcore_modules): - snap_generator = ComponentsTest( - component_type="modules", - component_name="kallisto/quant", - no_prompts=True, - remote_url=GITLAB_URL, - branch=GITLAB_NFTEST_BRANCH, - ) - with pytest.raises(UserWarning) as e: - snap_generator.run() - assert "nf-test snapshot is not stable" in str(e.value) diff --git a/tests/components/snapshot_test.py b/tests/components/snapshot_test.py deleted file mode 100644 index b3fc259770..0000000000 --- a/tests/components/snapshot_test.py +++ /dev/null @@ -1,40 +0,0 @@ -"""Test the 'modules test' or 'subworkflows test' command which runs nf-test test.""" - -import shutil -from pathlib import Path - -import pytest - -from nf_core.components.components_test import ComponentsTest -from nf_core.utils import set_wd - - -def test_components_test_check_inputs(self): - """Test the check_inputs() function - raise UserWarning because module doesn't exist""" - with set_wd(self.nfcore_modules): - meta_builder = ComponentsTest(component_type="modules", component_name="none", no_prompts=True) - with pytest.raises(UserWarning) as excinfo: - meta_builder.check_inputs() - assert "Cannot find directory" in str(excinfo.value) - - -def test_components_test_no_name_no_prompts(self): - """Test the check_inputs() function - raise UserWarning prompts are deactivated and module name is not provided.""" - with set_wd(self.nfcore_modules): - meta_builder = ComponentsTest(component_type="modules", component_name=None, no_prompts=True) - with pytest.raises(UserWarning) as excinfo: - meta_builder.check_inputs() - assert "Module name not provided and prompts deactivated." in str(excinfo.value) - - -def test_components_test_no_installed_modules(self): - """Test the check_inputs() function - raise UserWarning because installed modules were not found""" - with set_wd(self.nfcore_modules): - module_dir = Path(self.nfcore_modules, "modules") - shutil.rmtree(module_dir) - module_dir.mkdir() - meta_builder = ComponentsTest(component_type="modules", component_name=None, no_prompts=False) - meta_builder.repo_type = "modules" - with pytest.raises(LookupError) as excinfo: - meta_builder.check_inputs() - assert "Nothing installed from" in str(excinfo.value) diff --git a/tests/components/test_components_generate_snapshot.py b/tests/components/test_components_generate_snapshot.py new file mode 100644 index 0000000000..265842f5d0 --- /dev/null +++ b/tests/components/test_components_generate_snapshot.py @@ -0,0 +1,140 @@ +"""Test generate a snapshot""" + +import json +from pathlib import Path +from unittest.mock import MagicMock + +import pytest + +from nf_core.components.components_test import ComponentsTest +from nf_core.utils import set_wd + +from ..test_components import TestComponents +from ..utils import GITLAB_NFTEST_BRANCH, GITLAB_URL + + +class TestTestComponentsUtils(TestComponents): + def test_generate_snapshot_module(self): + """Generate the snapshot for a module in nf-core/modules clone""" + with set_wd(self.nfcore_modules): + snap_generator = ComponentsTest( + component_type="modules", + component_name="fastqc", + no_prompts=True, + remote_url=GITLAB_URL, + branch=GITLAB_NFTEST_BRANCH, + ) + snap_generator.run() + + snap_path = Path("modules", "nf-core-test", "fastqc", "tests", "main.nf.test.snap") + assert snap_path.exists() + + with open(snap_path) as fh: + snap_content = json.load(fh) + assert "versions" in snap_content + assert "content" in snap_content["versions"] + assert "versions.yml:md5,e1cc25ca8af856014824abd842e93978" in snap_content["versions"]["content"][0] + + def test_generate_snapshot_subworkflow(self): + """Generate the snapshot for a subworkflows in nf-core/modules clone""" + with set_wd(self.nfcore_modules): + snap_generator = ComponentsTest( + component_type="subworkflows", + component_name="bam_sort_stats_samtools", + no_prompts=True, + remote_url=GITLAB_URL, + branch=GITLAB_NFTEST_BRANCH, + ) + snap_generator.run() + + snap_path = Path("subworkflows", "nf-core-test", "bam_sort_stats_samtools", "tests", "main.nf.test.snap") + assert snap_path.exists() + + with open(snap_path) as fh: + snap_content = json.load(fh) + assert "test_bam_sort_stats_samtools_paired_end_flagstats" in snap_content + assert ( + "test.flagstat:md5,4f7ffd1e6a5e85524d443209ac97d783" + in snap_content["test_bam_sort_stats_samtools_paired_end_flagstats"]["content"][0][0] + ) + assert "test_bam_sort_stats_samtools_paired_end_idxstats" in snap_content + assert ( + "test.idxstats:md5,df60a8c8d6621100d05178c93fb053a2" + in snap_content["test_bam_sort_stats_samtools_paired_end_idxstats"]["content"][0][0] + ) + + def test_generate_snapshot_once( + self, + ): + """Generate the snapshot for a module in nf-core/modules clone only once""" + with set_wd(self.nfcore_modules): + snap_generator = ComponentsTest( + component_type="modules", + component_name="fastqc", + once=True, + no_prompts=True, + remote_url=GITLAB_URL, + branch=GITLAB_NFTEST_BRANCH, + ) + snap_generator.repo_type = "modules" + snap_generator.generate_snapshot = MagicMock() + snap_generator.run() + snap_generator.generate_snapshot.assert_called_once() + + def test_update_snapshot_module(self): + """Update the snapshot of a module in nf-core/modules clone""" + + with set_wd(self.nfcore_modules): + snap_path = Path("modules", "nf-core-test", "bwa", "mem", "tests", "main.nf.test.snap") + with open(snap_path) as fh: + snap_content = json.load(fh) + original_timestamp = snap_content["Single-End"]["timestamp"] + # delete the timestamp in json + snap_content["Single-End"]["timestamp"] = "" + with open(snap_path, "w") as fh: + json.dump(snap_content, fh) + snap_generator = ComponentsTest( + component_type="modules", + component_name="bwa/mem", + no_prompts=True, + remote_url=GITLAB_URL, + branch=GITLAB_NFTEST_BRANCH, + update=True, + ) + snap_generator.run() + + with open(snap_path) as fh: + snap_content = json.load(fh) + assert "Single-End" in snap_content + assert snap_content["Single-End"]["timestamp"] != original_timestamp + + def test_test_not_found(self): + """Generate the snapshot for a module in nf-core/modules clone which doesn't contain tests""" + with set_wd(self.nfcore_modules): + snap_generator = ComponentsTest( + component_type="modules", + component_name="fastp", + no_prompts=True, + remote_url=GITLAB_URL, + branch=GITLAB_NFTEST_BRANCH, + ) + test_file = Path("modules", "nf-core-test", "fastp", "tests", "main.nf.test") + test_file.rename(test_file.parent / "main.nf.test.bak") + with pytest.raises(UserWarning) as e: + snap_generator.run() + assert "Test file 'main.nf.test' not found" in str(e.value) + Path(test_file.parent / "main.nf.test.bak").rename(test_file) + + def test_unstable_snapshot(self): + """Generate the snapshot for a module in nf-core/modules clone with unstable snapshots""" + with set_wd(self.nfcore_modules): + snap_generator = ComponentsTest( + component_type="modules", + component_name="kallisto/quant", + no_prompts=True, + remote_url=GITLAB_URL, + branch=GITLAB_NFTEST_BRANCH, + ) + with pytest.raises(UserWarning) as e: + snap_generator.run() + assert "nf-test snapshot is not stable" in str(e.value) diff --git a/tests/components/test_components_snapshot_test.py b/tests/components/test_components_snapshot_test.py new file mode 100644 index 0000000000..8f0f2c0bd1 --- /dev/null +++ b/tests/components/test_components_snapshot_test.py @@ -0,0 +1,41 @@ +"""Test the 'modules test' or 'subworkflows test' command which runs nf-test test.""" + +import shutil +from pathlib import Path + +import pytest + +from nf_core.components.components_test import ComponentsTest +from nf_core.utils import set_wd + +from ..test_components import TestComponents + + +class TestTestComponentsUtils(TestComponents): + def test_components_test_check_inputs(self): + """Test the check_inputs() function - raise UserWarning because module doesn't exist""" + with set_wd(self.nfcore_modules): + meta_builder = ComponentsTest(component_type="modules", component_name="none", no_prompts=True) + with pytest.raises(UserWarning) as excinfo: + meta_builder.check_inputs() + assert "Cannot find directory" in str(excinfo.value) + + def test_components_test_no_name_no_prompts(self): + """Test the check_inputs() function - raise UserWarning prompts are deactivated and module name is not provided.""" + with set_wd(self.nfcore_modules): + meta_builder = ComponentsTest(component_type="modules", component_name=None, no_prompts=True) + with pytest.raises(UserWarning) as excinfo: + meta_builder.check_inputs() + assert "Module name not provided and prompts deactivated." in str(excinfo.value) + + def test_components_test_no_installed_modules(self): + """Test the check_inputs() function - raise UserWarning because installed modules were not found""" + with set_wd(self.nfcore_modules): + module_dir = Path(self.nfcore_modules, "modules") + shutil.rmtree(module_dir) + module_dir.mkdir() + meta_builder = ComponentsTest(component_type="modules", component_name=None, no_prompts=False) + meta_builder.repo_type = "modules" + with pytest.raises(LookupError) as excinfo: + meta_builder.check_inputs() + assert "Nothing installed from" in str(excinfo.value) diff --git a/tests/test_components.py b/tests/test_components.py index b77046df2b..a6b22f6686 100644 --- a/tests/test_components.py +++ b/tests/test_components.py @@ -36,21 +36,3 @@ def tearDown(self): @pytest.fixture(autouse=True) def _use_caplog(self, caplog): self.caplog = caplog - - ############################################ - # Test of the individual components commands. # - ############################################ - - from .components.generate_snapshot import ( # type: ignore[misc] - test_generate_snapshot_module, - test_generate_snapshot_once, - test_generate_snapshot_subworkflow, - test_test_not_found, - test_unstable_snapshot, - test_update_snapshot_module, - ) - from .components.snapshot_test import ( # type: ignore[misc] - test_components_test_check_inputs, - test_components_test_no_installed_modules, - test_components_test_no_name_no_prompts, - ) From 27d19962ee098d17be824f1700dd1047b43c991b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=BAlia=20Mir=20Pedrol?= Date: Thu, 30 Jan 2025 12:34:00 +0100 Subject: [PATCH 10/26] Apply suggestions from code review MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Matthias Hörtenhuber --- nf_core/components/components_utils.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/nf_core/components/components_utils.py b/nf_core/components/components_utils.py index d2e07caa1e..0ef66147d9 100644 --- a/nf_core/components/components_utils.py +++ b/nf_core/components/components_utils.py @@ -165,10 +165,9 @@ def get_components_to_install(subworkflow_dir: Union[str, Path]) -> Tuple[List[s return modules, subworkflows -def get_biotools_response(tool_name: str) -> Optional[dict]: +def get_biotools_response(tool_name: str) -> Optional[Dict]: """ Try to get bio.tools information for 'tool' - Try to get bio.tools information for 'tool' """ url = f"https://bio.tools/api/t/?q={tool_name}&format=json" try: @@ -197,10 +196,10 @@ def get_biotools_id(data: dict, tool_name: str) -> str: log.warning(f"Could not find a bio.tools ID for '{tool_name}'") return "" - +type DictWithListAndStr = Dict[str, Tuple[List[str], str]] def get_channel_info_from_biotools( data: dict, tool_name: str -) -> Optional[Tuple[Dict[str, Tuple[List[str], str]], Dict[str, Tuple[List[str], str]]]]: +) -> Optional[Tuple[DictWithListAndStr, DictWithListAndStr]] : """ Try to find input and output channels and the respective EDAM ontology terms @@ -211,7 +210,7 @@ def get_channel_info_from_biotools( inputs = {} outputs = {} - def _iterate_input_output(type) -> Dict[str, Tuple[List[str], str]]: + def _iterate_input_output(type) -> DictWithListAndStr: type_info = {} if type in funct: for element in funct[type]: @@ -235,7 +234,7 @@ def _iterate_input_output(type) -> Dict[str, Tuple[List[str], str]]: for tool in data["list"]: if tool["name"].lower() == tool_name: if "function" in tool: - # Parese all tool functions + # Parse all tool functions for funct in tool["function"]: inputs.update(_iterate_input_output("input")) outputs.update(_iterate_input_output("output")) From bde7a3b49ce1565681b586ca2a749ba01fe5e3b1 Mon Sep 17 00:00:00 2001 From: mirpedrol Date: Tue, 4 Feb 2025 15:47:33 +0100 Subject: [PATCH 11/26] use bio.tools info to create main.nf for modules template --- nf_core/module-template/main.nf | 48 +++++++++++++++++++++++++++----- nf_core/module-template/meta.yml | 4 +-- 2 files changed, 43 insertions(+), 9 deletions(-) diff --git a/nf_core/module-template/main.nf b/nf_core/module-template/main.nf index 5258403e8f..163be77690 100644 --- a/nf_core/module-template/main.nf +++ b/nf_core/module-template/main.nf @@ -33,6 +33,12 @@ process {{ component_name_underscore|upper }} { '{{ docker_container if docker_container else 'biocontainers/YOUR-TOOL-HERE' }}' }" input: + {%- if inputs %} + // TODO nf-core: Update the information obtained form bio.tools and make sure that it is correct + {%- for input_name, ontologies in inputs.items() %} + {{ 'tuple val(meta), path(' + input_name + ')' if has_meta else 'path ' + input_name }} + {%- endfor %} + {%- else -%} {% if not_empty_template -%} // TODO nf-core: Where applicable all sample-specific information e.g. "id", "single_end", "read_group" // MUST be provided as an input via a Groovy Map called "meta". @@ -44,16 +50,22 @@ process {{ component_name_underscore|upper }} { {%- else -%} {{ 'tuple val(meta), path(input)' if has_meta else 'path input' }} {%- endif %} + {%- endif %} output: + {%- if outputs %} + // TODO nf-core: Update the information obtained form bio.tools and make sure that it is correct + {%- for output_name, ontologies in outputs.items() %} + {{ 'tuple val(meta), path("*{' + ontologies[1] + '}")' if has_meta else 'path ' + output_name }}, emit: {{ output_name }} + {%- endfor %} + {%- else %} {% if not_empty_template -%} // TODO nf-core: Named file extensions MUST be emitted for ALL output channels {{ 'tuple val(meta), path("*.bam")' if has_meta else 'path "*.bam"' }}, emit: bam + // TODO nf-core: List additional required output channels/values here {%- else -%} {{ 'tuple val(meta), path("*")' if has_meta else 'path "*"' }}, emit: output {%- endif %} - {% if not_empty_template -%} - // TODO nf-core: List additional required output channels/values here {%- endif %} path "versions.yml" , emit: versions @@ -78,20 +90,33 @@ process {{ component_name_underscore|upper }} { {%- endif %} """ {% if not_empty_template -%} - samtools \\ - sort \\ + {{ component }} \\ $args \\ -@ $task.cpus \\ {%- if has_meta %} + {%- if inputs %} + {%- for input_name, ontologies in inputs.items() %} + {%- set extensions = ontologies[1].split(',') %} + {%- for ext in extensions %} + -o ${prefix}.{{ ext }} \\ + {%- endfor %} + {%- endfor %} + {%- else %} -o ${prefix}.bam \\ - -T $prefix \\ {%- endif %} + {%- endif %} + {%- if inputs %} + {%- for input_name, ontologies in inputs.items() %} + ${{ input_name }} \\ + {%- endfor %} + {%- else %} $bam + {%- endif %} {%- endif %} cat <<-END_VERSIONS > versions.yml "${task.process}": - {{ component }}: \$(samtools --version |& sed '1!d ; s/samtools //') + {{ component }}: \$({{ component }} --version) END_VERSIONS """ @@ -108,12 +133,21 @@ process {{ component_name_underscore|upper }} { {%- endif %} """ {% if not_empty_template -%} + {%- if inputs %} + {%- for input_name, ontologies in inputs.items() %} + {%- set extensions = ontologies[1].split(',') %} + {%- for ext in extensions %} + touch ${prefix}.{{ ext }} + {%- endfor %} + {%- endfor %} + {%- else %} touch ${prefix}.bam {%- endif %} + {%- endif %} cat <<-END_VERSIONS > versions.yml "${task.process}": - {{ component }}: \$(samtools --version |& sed '1!d ; s/samtools //') + {{ component }}: \$({{ component }} --version) END_VERSIONS """ } diff --git a/nf_core/module-template/meta.yml b/nf_core/module-template/meta.yml index 9118972bc4..a50d4c501b 100644 --- a/nf_core/module-template/meta.yml +++ b/nf_core/module-template/meta.yml @@ -90,9 +90,9 @@ output: description: {{ output_name }} file pattern: {{ "\"*.{" + ontologies[1] + "}\"" }} ontologies: - {% for ontology in ontologies[0] -%} + {%- for ontology in ontologies[0] %} - edam: "{{ ontology }}" - {% endfor -%} + {%- endfor %} {% endfor -%} {% else -%} - {{ 'bam:' if not_empty_template else "output:" }} From d2631b16efdd63252fc05a78b9ceaa82fc47f1db Mon Sep 17 00:00:00 2001 From: mirpedrol Date: Fri, 7 Feb 2025 14:32:22 +0100 Subject: [PATCH 12/26] add EDAM comments and suggestions from review --- nf_core/components/components_utils.py | 18 +++++++++--------- nf_core/module-template/environment.yml | 3 +++ nf_core/module-template/main.nf | 13 +++++-------- nf_core/module-template/meta.yml | 20 ++++++++++---------- tests/components/test_components_utils.py | 4 ++-- 5 files changed, 29 insertions(+), 29 deletions(-) diff --git a/nf_core/components/components_utils.py b/nf_core/components/components_utils.py index 0ef66147d9..64aaa53786 100644 --- a/nf_core/components/components_utils.py +++ b/nf_core/components/components_utils.py @@ -196,10 +196,13 @@ def get_biotools_id(data: dict, tool_name: str) -> str: log.warning(f"Could not find a bio.tools ID for '{tool_name}'") return "" -type DictWithListAndStr = Dict[str, Tuple[List[str], str]] + +type DictWithStrAndTuple = Dict[str, Tuple[List[str], List[str]]] + + def get_channel_info_from_biotools( data: dict, tool_name: str -) -> Optional[Tuple[DictWithListAndStr, DictWithListAndStr]] : +) -> Optional[Tuple[DictWithStrAndTuple, DictWithStrAndTuple]]: """ Try to find input and output channels and the respective EDAM ontology terms @@ -210,24 +213,21 @@ def get_channel_info_from_biotools( inputs = {} outputs = {} - def _iterate_input_output(type) -> DictWithListAndStr: + def _iterate_input_output(type) -> DictWithStrAndTuple: type_info = {} if type in funct: for element in funct[type]: if "data" in element: element_name = "_".join(element["data"]["term"].lower().split(" ")) uris = [element["data"]["uri"]] - terms = "" + terms = [""] if "format" in element: for format in element["format"]: # Append the EDAM URI uris.append(format["uri"]) # Append the EDAM term, getting the first word in case of complicated strings. i.e. "FASTA format" - terms = terms + format["term"].lower().split(" ")[0] + "," - type_info[element_name] = ( - uris, - terms[:-1], # Remove the last comma - ) + terms.append(format["term"].lower().split(" ")[0]) + type_info[element_name] = (uris, terms) return type_info # Iterate through the tools in the response to find the tool name diff --git a/nf_core/module-template/environment.yml b/nf_core/module-template/environment.yml index a8a40a8e03..4e74077572 100644 --- a/nf_core/module-template/environment.yml +++ b/nf_core/module-template/environment.yml @@ -4,4 +4,7 @@ channels: - conda-forge - bioconda dependencies: + # TODO nf-core: List required Conda package(s). + # Software MUST be pinned to channel (i.e. "bioconda"), version (i.e. "1.10"). + # For Conda, the build (i.e. "h9402c20_2") must be EXCLUDED to support installation on different operating systems. - "{{ bioconda if bioconda else 'YOUR-TOOL-HERE' }}" diff --git a/nf_core/module-template/main.nf b/nf_core/module-template/main.nf index 163be77690..b2383178a2 100644 --- a/nf_core/module-template/main.nf +++ b/nf_core/module-template/main.nf @@ -22,9 +22,6 @@ process {{ component_name_underscore|upper }} { label '{{ process_label }}' {% if not_empty_template -%} - // TODO nf-core: List required Conda package(s). - // Software MUST be pinned to channel (i.e. "bioconda"), version (i.e. "1.10"). - // For Conda, the build (i.e. "h9402c20_2") must be EXCLUDED to support installation on different operating systems. // TODO nf-core: See section in main README for further information regarding finding and adding container addresses to the section below. {% endif -%} conda "${moduleDir}/environment.yml" @@ -34,7 +31,7 @@ process {{ component_name_underscore|upper }} { input: {%- if inputs %} - // TODO nf-core: Update the information obtained form bio.tools and make sure that it is correct + // TODO nf-core: Update the information obtained from bio.tools and make sure that it is correct {%- for input_name, ontologies in inputs.items() %} {{ 'tuple val(meta), path(' + input_name + ')' if has_meta else 'path ' + input_name }} {%- endfor %} @@ -54,9 +51,9 @@ process {{ component_name_underscore|upper }} { output: {%- if outputs %} - // TODO nf-core: Update the information obtained form bio.tools and make sure that it is correct + // TODO nf-core: Update the information obtained from bio.tools and make sure that it is correct {%- for output_name, ontologies in outputs.items() %} - {{ 'tuple val(meta), path("*{' + ontologies[1] + '}")' if has_meta else 'path ' + output_name }}, emit: {{ output_name }} + {{ 'tuple val(meta), path("*{' + ontologies[1]|join(',') + '}")' if has_meta else 'path ' + output_name }}, emit: {{ output_name }} {%- endfor %} {%- else %} {% if not_empty_template -%} @@ -96,7 +93,7 @@ process {{ component_name_underscore|upper }} { {%- if has_meta %} {%- if inputs %} {%- for input_name, ontologies in inputs.items() %} - {%- set extensions = ontologies[1].split(',') %} + {%- set extensions = ontologies[1] %} {%- for ext in extensions %} -o ${prefix}.{{ ext }} \\ {%- endfor %} @@ -135,7 +132,7 @@ process {{ component_name_underscore|upper }} { {% if not_empty_template -%} {%- if inputs %} {%- for input_name, ontologies in inputs.items() %} - {%- set extensions = ontologies[1].split(',') %} + {%- set extensions = ontologies[1] %} {%- for ext in extensions %} touch ${prefix}.{{ ext }} {%- endfor %} diff --git a/nf_core/module-template/meta.yml b/nf_core/module-template/meta.yml index a50d4c501b..749ef57bee 100644 --- a/nf_core/module-template/meta.yml +++ b/nf_core/module-template/meta.yml @@ -33,16 +33,16 @@ input: type: map description: | Groovy Map containing sample information - e.g. `[ id:'sample1', single_end:false ]` + e.g. `[ id:'sample1' ]` {% endif %} - {{ input_name }}: - # TODO nf-core: Update the information obtained form bio.tools and make sure that it is correct + # TODO nf-core: Update the information obtained from bio.tools and make sure that it is correct type: file description: {{ input_name }} file - pattern: {{ "\"*.{" + ontologies[1] + "}\"" }} + pattern: {{ "\"*.{" + ontologies[1]|join(",") + "}\"" }} ontologies: {% for ontology in ontologies[0] -%} - - edam: "{{ ontology }}" + - edam: "{{ ontology }}" # {{ ontologies[1][loop.index0] }} {% endfor -%} {% endfor -%} {% else -%} @@ -51,7 +51,7 @@ input: type: map description: | Groovy Map containing sample information - e.g. `[ id:'sample1', single_end:false ]` + e.g. `[ id:'sample1' ]` {% endif %} {% if not_empty_template -%} ## TODO nf-core: Delete / customise this example input @@ -82,16 +82,16 @@ output: type: map description: | Groovy Map containing sample information - e.g. `[ id:'sample1', single_end:false ]` + e.g. `[ id:'sample1' ]` {%- endif %} - {{ output_name }}: - # TODO nf-core: Update the information obtained form bio.tools and make sure that it is correct + # TODO nf-core: Update the information obtained from bio.tools and make sure that it is correct type: file description: {{ output_name }} file - pattern: {{ "\"*.{" + ontologies[1] + "}\"" }} + pattern: {{ "\"*.{" + ontologies[1]|join(",") + "}\"" }} ontologies: {%- for ontology in ontologies[0] %} - - edam: "{{ ontology }}" + - edam: "{{ ontology }}" # {{ ontologies[1][loop.index0] }} {%- endfor %} {% endfor -%} {% else -%} @@ -101,7 +101,7 @@ output: type: map description: | Groovy Map containing sample information - e.g. `[ id:'sample1', single_end:false ]` + e.g. `[ id:'sample1' ]` {%- endif %} {% if not_empty_template -%} ## TODO nf-core: Delete / customise this example output diff --git a/tests/components/test_components_utils.py b/tests/components/test_components_utils.py index dce9eb0e4d..ce03f17860 100644 --- a/tests/components/test_components_utils.py +++ b/tests/components/test_components_utils.py @@ -36,13 +36,13 @@ def test_get_biotools_ch_info(self): "http://edamontology.org/format_2182", "http://edamontology.org/format_2573", ], - "fastq-like,sam", + ["fastq-like", "sam"], ) } assert outputs == { "sequence_report": ( ["http://edamontology.org/data_2955", "http://edamontology.org/format_2331"], - "html", + ["html"], ) } From 7bae22eba1f512f280e0437b8a2c8391c435aca3 Mon Sep 17 00:00:00 2001 From: mirpedrol Date: Mon, 10 Feb 2025 10:31:26 +0100 Subject: [PATCH 13/26] add missing ontologies to meta.yml when using nf-core lint --fix --- nf_core/modules/lint/__init__.py | 54 ++++++++++++++++++++++++++++++++ nf_core/modules/modules_utils.py | 14 +++++++++ requirements.txt | 1 + 3 files changed, 69 insertions(+) diff --git a/nf_core/modules/lint/__init__.py b/nf_core/modules/lint/__init__.py index ad010cd994..bd23960850 100644 --- a/nf_core/modules/lint/__init__.py +++ b/nf_core/modules/lint/__init__.py @@ -8,6 +8,7 @@ import logging import os +import re from pathlib import Path from typing import List, Optional, Union @@ -276,6 +277,9 @@ def update_meta_yml_file(self, mod): """ meta_yml = self.read_meta_yml(mod) corrected_meta_yml = meta_yml.copy() + ruamel.yaml.representer.RoundTripRepresenter.ignore_aliases = ( + lambda x, y: True + ) # Fix to not print aliases. https://stackoverflow.com/a/64717341 yaml = ruamel.yaml.YAML() yaml.preserve_quotes = True yaml.indent(mapping=2, sequence=2, offset=0) @@ -358,6 +362,56 @@ def update_meta_yml_file(self, mod): ) break + # EDAM ontologies + edam_formats = nf_core.modules.modules_utils.load_edam() + if "input" in meta_yml: + for i, channel in enumerate(corrected_meta_yml["input"]): + for j, element in enumerate(channel): + element_name = list(element.keys())[0] + expected_ontologies_i = [] + current_ontologies_i = [] + if "pattern" in corrected_meta_yml["input"][i][j][element_name]: + pattern = corrected_meta_yml["input"][i][j][element_name]["pattern"] + for extension in re.split(r",|{|}", pattern): + if extension in edam_formats: + expected_ontologies_i.append((edam_formats[extension][0], extension)) + if "ontologies" in corrected_meta_yml["input"][i][j][element_name]: + for ontology in corrected_meta_yml["input"][i][j][element_name]["ontologies"]: + current_ontologies_i.append(ontology["edam"]) + log.debug(f"expected ontologies for input: {expected_ontologies_i}") + log.debug(f"current ontologies for input: {current_ontologies_i}") + for ontology, ext in expected_ontologies_i: + if ontology not in current_ontologies_i: + corrected_meta_yml["input"][i][j][element_name]["ontologies"].append({"edam": ontology}) + corrected_meta_yml["input"][i][j][element_name]["ontologies"].yaml_add_eol_comment( + f"{edam_formats[ext][1]}", -1 + ) + if "output" in meta_yml: + for i, channel in enumerate(corrected_meta_yml["output"]): + ch_name = list(channel.keys())[0] + for j, element in enumerate(channel[ch_name]): + element_name = list(element.keys())[0] + expected_ontologies_o = [] + current_ontologies_o = [] + if "pattern" in corrected_meta_yml["output"][i][ch_name][j][element_name]: + pattern = corrected_meta_yml["output"][i][ch_name][j][element_name]["pattern"] + for extension in re.split(r",|{|}", pattern): + if extension in edam_formats: + expected_ontologies_o.append((edam_formats[extension][0], extension)) + if "ontologies" in corrected_meta_yml["output"][i][ch_name][j][element_name]: + for ontology in corrected_meta_yml["output"][i][ch_name][j][element_name]["ontologies"]: + current_ontologies_o.append(ontology["edam"]) + log.debug(f"expected ontologies for output: {expected_ontologies_o}") + log.debug(f"current ontologies for output: {current_ontologies_o}") + for ontology, ext in expected_ontologies_o: + if ontology not in current_ontologies_o: + corrected_meta_yml["output"][i][ch_name][j][element_name]["ontologies"].append( + {"edam": ontology} + ) + corrected_meta_yml["output"][i][ch_name][j][element_name][ + "ontologies" + ].yaml_add_eol_comment(f"{edam_formats[ext][1]}", -1) + # Add bio.tools identifier for i, tool in enumerate(corrected_meta_yml["tools"]): tool_name = list(tool.keys())[0] diff --git a/nf_core/modules/modules_utils.py b/nf_core/modules/modules_utils.py index 0f42d1bcea..84e34980aa 100644 --- a/nf_core/modules/modules_utils.py +++ b/nf_core/modules/modules_utils.py @@ -4,6 +4,8 @@ from typing import List, Optional, Tuple from urllib.parse import urlparse +from edam_ontology.streams import tabular_stream + from ..components.nfcore_component import NFCoreComponent log = logging.getLogger(__name__) @@ -97,3 +99,15 @@ def get_installed_modules(directory: Path, repo_type="modules") -> Tuple[List[st ] return local_modules, nfcore_modules + + +def load_edam(): + """Load the EDAM ontology from the nf-core repository""" + edam_formats = {} + with tabular_stream() as handle: + for line in handle.readlines(): + fields = line.split("\t") + if fields[0].split("/")[-1].startswith("format"): + extension = fields[1].lower().split(" ")[0] + edam_formats[extension] = (fields[0], fields[1]) # URL, name + return edam_formats diff --git a/requirements.txt b/requirements.txt index 51259938a8..78a4265a81 100644 --- a/requirements.txt +++ b/requirements.txt @@ -25,3 +25,4 @@ textual==0.71.0 trogon pdiff ruamel.yaml +edam-ontology From 1735c9e80b5b8b80479350f8270aa86afc7a2d87 Mon Sep 17 00:00:00 2001 From: mirpedrol Date: Mon, 10 Feb 2025 10:33:13 +0100 Subject: [PATCH 14/26] use pattern for output channel element in meta.yml --- nf_core/module-template/meta.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nf_core/module-template/meta.yml b/nf_core/module-template/meta.yml index 749ef57bee..e4476acb42 100644 --- a/nf_core/module-template/meta.yml +++ b/nf_core/module-template/meta.yml @@ -84,7 +84,7 @@ output: Groovy Map containing sample information e.g. `[ id:'sample1' ]` {%- endif %} - - {{ output_name }}: + - {{ "\"*.{" + ontologies[1]|join(",") + "}\"" }}: # TODO nf-core: Update the information obtained from bio.tools and make sure that it is correct type: file description: {{ output_name }} file From d730b08c67068eff3b4aa515cf26067bccc7fa37 Mon Sep 17 00:00:00 2001 From: mirpedrol Date: Mon, 10 Feb 2025 10:42:10 +0100 Subject: [PATCH 15/26] update pytest --- tests/components/test_components_utils.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/components/test_components_utils.py b/tests/components/test_components_utils.py index ce03f17860..b5859562e6 100644 --- a/tests/components/test_components_utils.py +++ b/tests/components/test_components_utils.py @@ -36,13 +36,13 @@ def test_get_biotools_ch_info(self): "http://edamontology.org/format_2182", "http://edamontology.org/format_2573", ], - ["fastq-like", "sam"], + ["", "fastq-like", "sam"], ) } assert outputs == { "sequence_report": ( ["http://edamontology.org/data_2955", "http://edamontology.org/format_2331"], - ["html"], + ["", "html"], ) } From 5cef9f68b1da07a06159681c5a3fa5868f8e1a90 Mon Sep 17 00:00:00 2001 From: mirpedrol Date: Mon, 10 Feb 2025 11:10:37 +0100 Subject: [PATCH 16/26] remove 'type' assignment for backwards python compatibility --- nf_core/components/components_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nf_core/components/components_utils.py b/nf_core/components/components_utils.py index 64aaa53786..d18cac8738 100644 --- a/nf_core/components/components_utils.py +++ b/nf_core/components/components_utils.py @@ -197,7 +197,7 @@ def get_biotools_id(data: dict, tool_name: str) -> str: return "" -type DictWithStrAndTuple = Dict[str, Tuple[List[str], List[str]]] +DictWithStrAndTuple = Dict[str, Tuple[List[str], List[str]]] def get_channel_info_from_biotools( From 76e56c5cebfadc8702a62b6bc03433eb3e2748ef Mon Sep 17 00:00:00 2001 From: mirpedrol Date: Mon, 10 Feb 2025 11:54:27 +0100 Subject: [PATCH 17/26] try ignoring errors when deleting a modules work directory created with root --- tests/test_components.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_components.py b/tests/test_components.py index a6b22f6686..2184319a20 100644 --- a/tests/test_components.py +++ b/tests/test_components.py @@ -31,7 +31,7 @@ def tearDown(self): # Clean up temporary files if self.tmp_dir.is_dir(): - shutil.rmtree(self.tmp_dir) + shutil.rmtree(self.tmp_dir, ignore_errors=True) @pytest.fixture(autouse=True) def _use_caplog(self, caplog): From 6f442c25f1469e5b197a2e75b9d0a3854a5cfa77 Mon Sep 17 00:00:00 2001 From: mirpedrol Date: Mon, 10 Feb 2025 12:12:37 +0100 Subject: [PATCH 18/26] more tryes to fix pytest --- tests/test_components.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/tests/test_components.py b/tests/test_components.py index 2184319a20..f5c174566f 100644 --- a/tests/test_components.py +++ b/tests/test_components.py @@ -2,6 +2,7 @@ import os import shutil +import stat import tempfile import unittest from pathlib import Path @@ -12,6 +13,12 @@ from .utils import GITLAB_NFTEST_BRANCH, GITLAB_URL +def remove_readonly(func, path, _): + "Clear the readonly bit and reattempt the removal" + os.chmod(path, stat.S_IWRITE) + func(path) + + class TestComponents(unittest.TestCase): """Class for components tests""" @@ -31,7 +38,7 @@ def tearDown(self): # Clean up temporary files if self.tmp_dir.is_dir(): - shutil.rmtree(self.tmp_dir, ignore_errors=True) + shutil.rmtree(self.tmp_dir, onexc=remove_readonly) @pytest.fixture(autouse=True) def _use_caplog(self, caplog): From 5301e1b0112e76d866ef356485f781f732fe8f35 Mon Sep 17 00:00:00 2001 From: mirpedrol Date: Mon, 10 Feb 2025 12:46:37 +0100 Subject: [PATCH 19/26] run test_unstable_snapshot first and don't remove_readonly --- .../test_components_generate_snapshot.py | 28 +++++++++---------- tests/test_components.py | 9 +----- 2 files changed, 15 insertions(+), 22 deletions(-) diff --git a/tests/components/test_components_generate_snapshot.py b/tests/components/test_components_generate_snapshot.py index 265842f5d0..ebdc38d016 100644 --- a/tests/components/test_components_generate_snapshot.py +++ b/tests/components/test_components_generate_snapshot.py @@ -14,6 +14,20 @@ class TestTestComponentsUtils(TestComponents): + def test_unstable_snapshot(self): + """Generate the snapshot for a module in nf-core/modules clone with unstable snapshots""" + with set_wd(self.nfcore_modules): + snap_generator = ComponentsTest( + component_type="modules", + component_name="kallisto/quant", + no_prompts=True, + remote_url=GITLAB_URL, + branch=GITLAB_NFTEST_BRANCH, + ) + with pytest.raises(UserWarning) as e: + snap_generator.run() + assert "nf-test snapshot is not stable" in str(e.value) + def test_generate_snapshot_module(self): """Generate the snapshot for a module in nf-core/modules clone""" with set_wd(self.nfcore_modules): @@ -124,17 +138,3 @@ def test_test_not_found(self): snap_generator.run() assert "Test file 'main.nf.test' not found" in str(e.value) Path(test_file.parent / "main.nf.test.bak").rename(test_file) - - def test_unstable_snapshot(self): - """Generate the snapshot for a module in nf-core/modules clone with unstable snapshots""" - with set_wd(self.nfcore_modules): - snap_generator = ComponentsTest( - component_type="modules", - component_name="kallisto/quant", - no_prompts=True, - remote_url=GITLAB_URL, - branch=GITLAB_NFTEST_BRANCH, - ) - with pytest.raises(UserWarning) as e: - snap_generator.run() - assert "nf-test snapshot is not stable" in str(e.value) diff --git a/tests/test_components.py b/tests/test_components.py index f5c174566f..2184319a20 100644 --- a/tests/test_components.py +++ b/tests/test_components.py @@ -2,7 +2,6 @@ import os import shutil -import stat import tempfile import unittest from pathlib import Path @@ -13,12 +12,6 @@ from .utils import GITLAB_NFTEST_BRANCH, GITLAB_URL -def remove_readonly(func, path, _): - "Clear the readonly bit and reattempt the removal" - os.chmod(path, stat.S_IWRITE) - func(path) - - class TestComponents(unittest.TestCase): """Class for components tests""" @@ -38,7 +31,7 @@ def tearDown(self): # Clean up temporary files if self.tmp_dir.is_dir(): - shutil.rmtree(self.tmp_dir, onexc=remove_readonly) + shutil.rmtree(self.tmp_dir, ignore_errors=True) @pytest.fixture(autouse=True) def _use_caplog(self, caplog): From 2ed1eb93387b6df6017d62101ac1c66ed6ee82f4 Mon Sep 17 00:00:00 2001 From: mirpedrol Date: Mon, 10 Feb 2025 16:09:24 +0100 Subject: [PATCH 20/26] use EDAM tsv instead of python library --- nf_core/modules/modules_utils.py | 14 +++++++------- requirements.txt | 1 - 2 files changed, 7 insertions(+), 8 deletions(-) diff --git a/nf_core/modules/modules_utils.py b/nf_core/modules/modules_utils.py index 84e34980aa..4541cd965c 100644 --- a/nf_core/modules/modules_utils.py +++ b/nf_core/modules/modules_utils.py @@ -4,7 +4,7 @@ from typing import List, Optional, Tuple from urllib.parse import urlparse -from edam_ontology.streams import tabular_stream +import requests from ..components.nfcore_component import NFCoreComponent @@ -104,10 +104,10 @@ def get_installed_modules(directory: Path, repo_type="modules") -> Tuple[List[st def load_edam(): """Load the EDAM ontology from the nf-core repository""" edam_formats = {} - with tabular_stream() as handle: - for line in handle.readlines(): - fields = line.split("\t") - if fields[0].split("/")[-1].startswith("format"): - extension = fields[1].lower().split(" ")[0] - edam_formats[extension] = (fields[0], fields[1]) # URL, name + response = requests.get("https://edamontology.org/EDAM.tsv") + for line in response.content.splitlines(): + fields = line.decode("utf-8").split("\t") + if fields[0].split("/")[-1].startswith("format"): + extension = fields[1].lower().split(" ")[0] + edam_formats[extension] = (fields[0], fields[1]) # URL, name return edam_formats diff --git a/requirements.txt b/requirements.txt index 78a4265a81..51259938a8 100644 --- a/requirements.txt +++ b/requirements.txt @@ -25,4 +25,3 @@ textual==0.71.0 trogon pdiff ruamel.yaml -edam-ontology From 8a6d45fd6985caec2cc2a1f0167e39170f2e3a61 Mon Sep 17 00:00:00 2001 From: mirpedrol Date: Mon, 10 Feb 2025 16:10:47 +0100 Subject: [PATCH 21/26] add edam comments to hardocded template example and when linting --- nf_core/module-template/meta.yml | 12 ++++++------ nf_core/modules/lint/__init__.py | 9 ++++++--- 2 files changed, 12 insertions(+), 9 deletions(-) diff --git a/nf_core/module-template/meta.yml b/nf_core/module-template/meta.yml index e4476acb42..4384cf7be4 100644 --- a/nf_core/module-template/meta.yml +++ b/nf_core/module-template/meta.yml @@ -62,9 +62,9 @@ input: pattern: {{ '"*.{bam,cram,sam}"' if not_empty_template else "" }} ontologies: {% if not_empty_template -%} - - edam: "http://edamontology.org/format_25722" - - edam: "http://edamontology.org/format_2573" - - edam: "http://edamontology.org/format_3462" + - edam: "http://edamontology.org/format_25722" # BAM + - edam: "http://edamontology.org/format_2573" # CRAM + - edam: "http://edamontology.org/format_3462" # SAM {% else -%} - edam: "" {%- endif %} @@ -112,9 +112,9 @@ output: pattern: {{ '"*.{bam,cram,sam}"' if not_empty_template else "" }} ontologies: {% if not_empty_template -%} - - edam: "http://edamontology.org/format_25722" - - edam: "http://edamontology.org/format_2573" - - edam: "http://edamontology.org/format_3462" + - edam: "http://edamontology.org/format_25722" # BAM + - edam: "http://edamontology.org/format_2573" # CRAM + - edam: "http://edamontology.org/format_3462" # SAM {% else -%} - edam: "" {%- endif %} diff --git a/nf_core/modules/lint/__init__.py b/nf_core/modules/lint/__init__.py index 19cb1330f0..6c1c273416 100644 --- a/nf_core/modules/lint/__init__.py +++ b/nf_core/modules/lint/__init__.py @@ -398,10 +398,13 @@ def update_meta_yml_file(self, mod): log.debug(f"current ontologies for input: {current_ontologies_i}") for ontology, ext in expected_ontologies_i: if ontology not in current_ontologies_i: - corrected_meta_yml["input"][i][j][element_name]["ontologies"].append({"edam": ontology}) - corrected_meta_yml["input"][i][j][element_name]["ontologies"].yaml_add_eol_comment( - f"{edam_formats[ext][1]}", -1 + corrected_meta_yml["input"][i][j][element_name]["ontologies"].append( + ruamel.yaml.comments.CommentedMap({"edam": ontology}) ) + corrected_meta_yml["input"][i][j][element_name]["ontologies"][-1].yaml_add_eol_comment( + f"{edam_formats[ext][1]}", "edam" + ) + print(f"added comment {edam_formats[ext][1]}") if "output" in meta_yml: for i, channel in enumerate(corrected_meta_yml["output"]): ch_name = list(channel.keys())[0] From ca937553f5330cb2082735db88e197644ce4c7e0 Mon Sep 17 00:00:00 2001 From: mirpedrol Date: Mon, 10 Feb 2025 16:30:48 +0100 Subject: [PATCH 22/26] don't add empty comment and comma to meta.yml --- nf_core/components/components_utils.py | 10 ++++++---- nf_core/module-template/main.nf | 6 +++--- nf_core/module-template/meta.yml | 6 +++--- 3 files changed, 12 insertions(+), 10 deletions(-) diff --git a/nf_core/components/components_utils.py b/nf_core/components/components_utils.py index d18cac8738..b1c600c27f 100644 --- a/nf_core/components/components_utils.py +++ b/nf_core/components/components_utils.py @@ -197,7 +197,7 @@ def get_biotools_id(data: dict, tool_name: str) -> str: return "" -DictWithStrAndTuple = Dict[str, Tuple[List[str], List[str]]] +DictWithStrAndTuple = Dict[str, Tuple[List[str], List[str], List[str]]] def get_channel_info_from_biotools( @@ -220,14 +220,16 @@ def _iterate_input_output(type) -> DictWithStrAndTuple: if "data" in element: element_name = "_".join(element["data"]["term"].lower().split(" ")) uris = [element["data"]["uri"]] - terms = [""] + terms = [element["data"]["term"]] + patterns = [] if "format" in element: for format in element["format"]: # Append the EDAM URI uris.append(format["uri"]) # Append the EDAM term, getting the first word in case of complicated strings. i.e. "FASTA format" - terms.append(format["term"].lower().split(" ")[0]) - type_info[element_name] = (uris, terms) + patterns.append(format["term"].lower().split(" ")[0]) + terms.append(format["term"]) + type_info[element_name] = (uris, terms, patterns) return type_info # Iterate through the tools in the response to find the tool name diff --git a/nf_core/module-template/main.nf b/nf_core/module-template/main.nf index b2383178a2..194e1da31c 100644 --- a/nf_core/module-template/main.nf +++ b/nf_core/module-template/main.nf @@ -53,7 +53,7 @@ process {{ component_name_underscore|upper }} { {%- if outputs %} // TODO nf-core: Update the information obtained from bio.tools and make sure that it is correct {%- for output_name, ontologies in outputs.items() %} - {{ 'tuple val(meta), path("*{' + ontologies[1]|join(',') + '}")' if has_meta else 'path ' + output_name }}, emit: {{ output_name }} + {{ 'tuple val(meta), path("*{' + ontologies[2]|join(',') + '}")' if has_meta else 'path ' + output_name }}, emit: {{ output_name }} {%- endfor %} {%- else %} {% if not_empty_template -%} @@ -93,7 +93,7 @@ process {{ component_name_underscore|upper }} { {%- if has_meta %} {%- if inputs %} {%- for input_name, ontologies in inputs.items() %} - {%- set extensions = ontologies[1] %} + {%- set extensions = ontologies[2] %} {%- for ext in extensions %} -o ${prefix}.{{ ext }} \\ {%- endfor %} @@ -132,7 +132,7 @@ process {{ component_name_underscore|upper }} { {% if not_empty_template -%} {%- if inputs %} {%- for input_name, ontologies in inputs.items() %} - {%- set extensions = ontologies[1] %} + {%- set extensions = ontologies[2] %} {%- for ext in extensions %} touch ${prefix}.{{ ext }} {%- endfor %} diff --git a/nf_core/module-template/meta.yml b/nf_core/module-template/meta.yml index 4384cf7be4..1227530ef7 100644 --- a/nf_core/module-template/meta.yml +++ b/nf_core/module-template/meta.yml @@ -39,7 +39,7 @@ input: # TODO nf-core: Update the information obtained from bio.tools and make sure that it is correct type: file description: {{ input_name }} file - pattern: {{ "\"*.{" + ontologies[1]|join(",") + "}\"" }} + pattern: {{ "\"*.{" + ontologies[2]|join(",") + "}\"" }} ontologies: {% for ontology in ontologies[0] -%} - edam: "{{ ontology }}" # {{ ontologies[1][loop.index0] }} @@ -84,11 +84,11 @@ output: Groovy Map containing sample information e.g. `[ id:'sample1' ]` {%- endif %} - - {{ "\"*.{" + ontologies[1]|join(",") + "}\"" }}: + - {{ "\"*.{" + ontologies[2]|join(",") + "}\"" }}: # TODO nf-core: Update the information obtained from bio.tools and make sure that it is correct type: file description: {{ output_name }} file - pattern: {{ "\"*.{" + ontologies[1]|join(",") + "}\"" }} + pattern: {{ "\"*.{" + ontologies[2]|join(",") + "}\"" }} ontologies: {%- for ontology in ontologies[0] %} - edam: "{{ ontology }}" # {{ ontologies[1][loop.index0] }} From 2795d5838e31bb307980d09d8ce5006ea4856198 Mon Sep 17 00:00:00 2001 From: mirpedrol Date: Mon, 10 Feb 2025 16:32:43 +0100 Subject: [PATCH 23/26] add log.info messages --- nf_core/components/components_utils.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/nf_core/components/components_utils.py b/nf_core/components/components_utils.py index b1c600c27f..3882b7aa1b 100644 --- a/nf_core/components/components_utils.py +++ b/nf_core/components/components_utils.py @@ -176,6 +176,7 @@ def get_biotools_response(tool_name: str) -> Optional[Dict]: response.raise_for_status() # Raise an error for bad status codes # Parse the JSON response data = response.json() + log.info(f"Found bio.tools information for '{tool_name}'") return data except requests.exceptions.RequestException as e: @@ -190,6 +191,7 @@ def get_biotools_id(data: dict, tool_name: str) -> str: # Iterate through the tools in the response to find the tool name for tool in data["list"]: if tool["name"].lower() == tool_name: + log.info(f"Found bio.tools ID: '{tool['biotoolsCURIE']}'") return tool["biotoolsCURIE"] # If the tool name was not found in the response From 3dead1efc00b8fd6ff18f0e8f3ad6615d449c4c6 Mon Sep 17 00:00:00 2001 From: mirpedrol Date: Mon, 10 Feb 2025 16:45:16 +0100 Subject: [PATCH 24/26] fix nf-test components test --- .github/workflows/pytest.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/pytest.yml b/.github/workflows/pytest.yml index 76d5d710c0..8466c132e7 100644 --- a/.github/workflows/pytest.yml +++ b/.github/workflows/pytest.yml @@ -75,8 +75,8 @@ jobs: name: Run ${{matrix.test}} with Python ${{ needs.setup.outputs.python-version }} on ${{ needs.setup.outputs.runner }} needs: [setup, list_tests] if: ${{ needs.setup.outputs.run-tests }} - # run on self-hosted runners for test_components.py (because of the gitlab branch), based on the input if it is dispatched manually, on github if it is a rerun or on self-hosted by default - runs-on: ${{ matrix.test == 'test_components.py' && 'self-hosted' || (github.event.inputs.runners || github.run_number > 1 && 'ubuntu-latest' || 'self-hosted') }} + # run on self-hosted runners for test_components_generate_snapshot.py (because of the gitlab branch), based on the input if it is dispatched manually, on github if it is a rerun or on self-hosted by default + runs-on: ${{ matrix.test == 'test_components_generate_snapshot.py' && 'self-hosted' || (github.event.inputs.runners || github.run_number > 1 && 'ubuntu-latest' || 'self-hosted') }} strategy: matrix: ${{ fromJson(needs.list_tests.outputs.tests) }} fail-fast: false # run all tests even if one fails From 47036ff47fc04192ba93799ddb8acd07001173f3 Mon Sep 17 00:00:00 2001 From: mirpedrol Date: Tue, 11 Feb 2025 11:36:31 +0100 Subject: [PATCH 25/26] fix more pytests --- nf_core/module-template/main.nf | 2 +- tests/components/test_components_utils.py | 6 ++++-- tests/modules/test_lint.py | 2 +- 3 files changed, 6 insertions(+), 4 deletions(-) diff --git a/nf_core/module-template/main.nf b/nf_core/module-template/main.nf index 194e1da31c..fe135a19a7 100644 --- a/nf_core/module-template/main.nf +++ b/nf_core/module-template/main.nf @@ -53,7 +53,7 @@ process {{ component_name_underscore|upper }} { {%- if outputs %} // TODO nf-core: Update the information obtained from bio.tools and make sure that it is correct {%- for output_name, ontologies in outputs.items() %} - {{ 'tuple val(meta), path("*{' + ontologies[2]|join(',') + '}")' if has_meta else 'path ' + output_name }}, emit: {{ output_name }} + {{ 'tuple val(meta), path("*.{' + ontologies[2]|join(',') + '}")' if has_meta else 'path ' + output_name }}, emit: {{ output_name }} {%- endfor %} {%- else %} {% if not_empty_template -%} diff --git a/tests/components/test_components_utils.py b/tests/components/test_components_utils.py index b5859562e6..19fa388d79 100644 --- a/tests/components/test_components_utils.py +++ b/tests/components/test_components_utils.py @@ -36,13 +36,15 @@ def test_get_biotools_ch_info(self): "http://edamontology.org/format_2182", "http://edamontology.org/format_2573", ], - ["", "fastq-like", "sam"], + ["Raw sequence", "FASTQ-like format (text)", "SAM"], + ["fastq-like", "sam"], ) } assert outputs == { "sequence_report": ( ["http://edamontology.org/data_2955", "http://edamontology.org/format_2331"], - ["", "html"], + ["Sequence report", "HTML"], + ["html"], ) } diff --git a/tests/modules/test_lint.py b/tests/modules/test_lint.py index c641b6dc97..cc8dc39611 100644 --- a/tests/modules/test_lint.py +++ b/tests/modules/test_lint.py @@ -526,7 +526,7 @@ def test_modules_meta_yml_output_mismatch(self): """Test linting a module with an extra entry in output fields in meta.yml compared to module.output""" with open(Path(self.nfcore_modules, "modules", "nf-core", "bpipe", "test", "main.nf")) as fh: main_nf = fh.read() - main_nf_new = main_nf.replace("emit: bam", "emit: bai") + main_nf_new = main_nf.replace("emit: sequence_report", "emit: bai") with open(Path(self.nfcore_modules, "modules", "nf-core", "bpipe", "test", "main.nf"), "w") as fh: fh.write(main_nf_new) module_lint = nf_core.modules.lint.ModuleLint(directory=self.nfcore_modules) From 1e574febec63be45b14b3073ca24444037350478 Mon Sep 17 00:00:00 2001 From: mirpedrol Date: Wed, 12 Feb 2025 11:45:39 +0100 Subject: [PATCH 26/26] fix test path --- .github/workflows/pytest.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/pytest.yml b/.github/workflows/pytest.yml index 8466c132e7..c230bda0c2 100644 --- a/.github/workflows/pytest.yml +++ b/.github/workflows/pytest.yml @@ -76,7 +76,7 @@ jobs: needs: [setup, list_tests] if: ${{ needs.setup.outputs.run-tests }} # run on self-hosted runners for test_components_generate_snapshot.py (because of the gitlab branch), based on the input if it is dispatched manually, on github if it is a rerun or on self-hosted by default - runs-on: ${{ matrix.test == 'test_components_generate_snapshot.py' && 'self-hosted' || (github.event.inputs.runners || github.run_number > 1 && 'ubuntu-latest' || 'self-hosted') }} + runs-on: ${{ matrix.test == 'components/test_components_generate_snapshot.py' && 'self-hosted' || (github.event.inputs.runners || github.run_number > 1 && 'ubuntu-latest' || 'self-hosted') }} strategy: matrix: ${{ fromJson(needs.list_tests.outputs.tests) }} fail-fast: false # run all tests even if one fails