+
Skip to content

Fix modules meta.yml file structure #3532

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 21 commits into from
Jul 7, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
### Modules

- Remove args stub from module template to satisfy language server ([#3403](https://github.com/nf-core/tools/pull/3403))
- Fix modules meta.yml file structure ([#3532](https://github.com/nf-core/tools/pull/3532))

### Subworkflows

Expand Down
192 changes: 189 additions & 3 deletions nf_core/components/create.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,13 @@
import shutil
import subprocess
from pathlib import Path
from typing import Optional
from typing import Optional, Union

import jinja2
import questionary
import rich
import rich.prompt
import yaml
import ruamel.yaml
from packaging.version import parse as parse_version

import nf_core
Expand All @@ -26,6 +26,14 @@

log = logging.getLogger(__name__)

# Set yaml options for meta.yml files
ruamel.yaml.representer.RoundTripRepresenter.ignore_aliases = (
lambda x, y: True
) # Fix to not print aliases. https://stackoverflow.com/a/64717341
yaml = ruamel.yaml.YAML()
yaml.preserve_quotes = True
yaml.indent(mapping=2, sequence=2, offset=0)


class ComponentCreate(ComponentCommand):
def __init__(
Expand Down Expand Up @@ -175,6 +183,10 @@ def create(self) -> bool:
assert self._render_template()
log.info(f"Created component template: '{self.component_name}'")

if self.component_type == "modules":
# Generate meta.yml inputs and outputs
self.generate_meta_yml_file()

if self.migrate_pytest:
self._copy_old_files(component_old_path)
log.info("Migrate pytest tests: Copied original module files to new module")
Expand Down Expand Up @@ -501,10 +513,184 @@ def _print_and_delete_pytest_files(self):
# Delete tags from pytest_modules.yml
modules_yml = Path(self.directory, "tests", "config", "pytest_modules.yml")
with open(modules_yml) as fh:
yml_file = yaml.safe_load(fh)
yml_file = yaml.load(fh)
yml_key = str(self.component_dir) if self.component_type == "modules" else f"subworkflows/{self.component_dir}"
if yml_key in yml_file:
del yml_file[yml_key]
with open(modules_yml, "w") as fh:
yaml.dump(yml_file, fh)
run_prettier_on_file(modules_yml)

def generate_meta_yml_file(self) -> None:
"""
Generate the meta.yml file.
"""
# TODO: The meta.yml could be handled with a Pydantic model. The reason it is not implemented is because we want to maintain comments in the meta.yml file.
with open(self.file_paths["meta.yml"]) as fh:
meta_yml: ruamel.yaml.comments.CommentedMap = yaml.load(fh)

versions: dict[str, list[dict[str, dict]]] = {
"versions": [
{
"versions.yml": {
"type": "file",
"description": "File containing software versions",
"pattern": "versions.yml",
"ontologies": [
ruamel.yaml.comments.CommentedMap({"edam": "http://edamontology.org/format_3750"})
],
}
}
]
}
versions["versions"][0]["versions.yml"]["ontologies"][0].yaml_add_eol_comment("YAML", "edam")

if self.not_empty_template:
meta_yml.yaml_set_comment_before_after_key(
"name", before="# TODO nf-core: Add a description of the module and list keywords"
)
meta_yml["tools"][0].yaml_set_start_comment(
"## TODO nf-core: Add a description and other details for the software below"
)
meta_yml["input"].yaml_set_start_comment(
"### TODO nf-core: Add a description of all of the variables used as input", indent=2
)
meta_yml["output"].yaml_set_start_comment(
"### TODO nf-core: Add a description of all of the variables used as output", indent=2
)

if hasattr(self, "inputs"):
inputs_array: list[Union[dict, list[dict]]] = []
for i, (input_name, ontologies) in enumerate(self.inputs.items()):
channel_entry: dict[str, dict] = {
input_name: {
"type": "file",
"description": f"{input_name} file",
"pattern": f"*.{{{','.join(ontologies[2])}}}",
"ontologies": [
ruamel.yaml.comments.CommentedMap({"edam": f"{ont_id}"}) for ont_id in ontologies[0]
],
}
}
for j, ont_desc in enumerate(ontologies[1]):
channel_entry[input_name]["ontologies"][j].yaml_add_eol_comment(ont_desc, "edam")
if self.has_meta:
meta_suffix = str(i + 1) if i > 0 else ""
meta_entry: dict[str, dict] = {
f"meta{meta_suffix}": {
"type": "map",
"description": "Groovy Map containing sample information. e.g. `[ id:'sample1' ]`",
}
}
inputs_array.append([meta_entry, channel_entry])
else:
inputs_array.append(channel_entry)
meta_yml["input"] = ruamel.yaml.comments.CommentedSeq(inputs_array)
meta_yml["input"].yaml_set_start_comment(
"# TODO nf-core: Update the information obtained from bio.tools and make sure that it is correct"
)
elif not self.has_meta:
meta_yml["input"] = [
{
"bam": {
"type": "file",
"description": "Sorted BAM/CRAM/SAM file",
"pattern": "*.{bam,cram,sam}",
"ontologies": [
ruamel.yaml.comments.CommentedMap({"edam": "http://edamontology.org/format_2572"}),
ruamel.yaml.comments.CommentedMap({"edam": "http://edamontology.org/format_2573"}),
ruamel.yaml.comments.CommentedMap({"edam": "http://edamontology.org/format_3462"}),
],
}
}
]
meta_yml["input"][0]["bam"]["ontologies"][0].yaml_add_eol_comment("BAM", "edam")
meta_yml["input"][0]["bam"]["ontologies"][1].yaml_add_eol_comment("CRAM", "edam")
meta_yml["input"][0]["bam"]["ontologies"][2].yaml_add_eol_comment("SAM", "edam")

if hasattr(self, "outputs"):
outputs_dict: dict[str, Union[list, dict]] = {}
for i, (output_name, ontologies) in enumerate(self.outputs.items()):
channel_contents: list[Union[list[dict], dict]] = []
if self.has_meta:
channel_contents.append(
[
{
"meta": {
"type": "map",
"description": "Groovy Map containing sample information. e.g. `[ id:'sample1' ]`",
}
}
]
)
pattern = f"*.{{{','.join(ontologies[2])}}}"
file_entry: dict[str, dict] = {
pattern: {
"type": "file",
"description": f"{output_name} file",
"pattern": pattern,
"ontologies": [
ruamel.yaml.comments.CommentedMap({"edam": f"{ont_id}"}) for ont_id in ontologies[0]
],
}
}
for j, ont_desc in enumerate(ontologies[1]):
file_entry[pattern]["ontologies"][j].yaml_add_eol_comment(ont_desc, "edam")
if self.has_meta:
if isinstance(channel_contents[0], list): # for mypy
channel_contents[0].append(file_entry)
else:
channel_contents.append(file_entry)
outputs_dict[output_name] = channel_contents
outputs_dict.update(versions)
meta_yml["output"] = ruamel.yaml.comments.CommentedMap(outputs_dict)
meta_yml["output"].yaml_set_start_comment(
"# TODO nf-core: Update the information obtained from bio.tools and make sure that it is correct"
)
elif not self.has_meta:
meta_yml["output"] = {
"bam": [
{
"*.bam": {
"type": "file",
"description": "Sorted BAM/CRAM/SAM file",
"pattern": "*.{bam,cram,sam}",
"ontologies": [
ruamel.yaml.comments.CommentedMap({"edam": "http://edamontology.org/format_2572"}),
ruamel.yaml.comments.CommentedMap({"edam": "http://edamontology.org/format_2573"}),
ruamel.yaml.comments.CommentedMap({"edam": "http://edamontology.org/format_3462"}),
],
}
}
]
}
meta_yml["output"]["bam"][0]["*.bam"]["ontologies"][0].yaml_add_eol_comment("BAM", "edam")
meta_yml["output"]["bam"][0]["*.bam"]["ontologies"][1].yaml_add_eol_comment("CRAM", "edam")
meta_yml["output"]["bam"][0]["*.bam"]["ontologies"][2].yaml_add_eol_comment("SAM", "edam")
meta_yml["output"].update(versions)

else:
input_entry: list[dict] = [
{"input": {"type": "file", "description": "", "pattern": "", "ontologies": [{"edam": ""}]}}
]
output_entry: list[dict] = [
{"*": {"type": "file", "description": "", "pattern": "", "ontologies": [{"edam": ""}]}}
]
if self.has_meta:
empty_meta_entry: list[dict] = [
{
"meta": {
"type": "map",
"description": "Groovy Map containing sample information. e.g. `[ id:'sample1' ]`",
}
}
]
meta_yml["input"] = [empty_meta_entry + input_entry]
meta_yml["output"] = {"output": [empty_meta_entry + output_entry]}
else:
meta_yml["input"] = input_entry
meta_yml["output"] = {"output": output_entry}
meta_yml["output"].update(versions)

with open(self.file_paths["meta.yml"], "w") as fh:
yaml.dump(meta_yml, fh)
19 changes: 13 additions & 6 deletions nf_core/components/nfcore_component.py
Original file line number Diff line number Diff line change
Expand Up @@ -221,7 +221,9 @@ def get_inputs_from_main_nf(self) -> None:
] # Takes only first part, avoid commas in quotes
input_val = input_val.strip().strip("'").strip('"') # remove quotes and whitespaces
channel_elements.append({input_val: {}})
if len(channel_elements) > 0:
if len(channel_elements) == 1:
inputs.append(channel_elements[0])
elif len(channel_elements) > 1:
inputs.append(channel_elements)
log.debug(f"Found {len(inputs)} inputs in {self.main_nf}")
self.inputs = inputs
Expand All @@ -241,10 +243,10 @@ def get_inputs_from_main_nf(self) -> None:
self.inputs = inputs

def get_outputs_from_main_nf(self):
outputs = []
with open(self.main_nf) as f:
data = f.read()
if self.component_type == "modules":
outputs = {}
# get output values from main.nf after "output:". the names are always after "emit:"
if "output:" not in data:
log.debug(f"Could not find any outputs in {self.main_nf}")
Expand All @@ -257,7 +259,8 @@ def get_outputs_from_main_nf(self):
matches_elements = re.finditer(regex_elements, line)
if not match_emit:
continue
output_channel = {match_emit.group(1): []}
channel_elements = []
outputs[match_emit.group(1)] = []
for _, match_element in enumerate(matches_elements, start=1):
output_val = None
if match_element.group(3):
Expand All @@ -269,11 +272,15 @@ def get_outputs_from_main_nf(self):
0
] # Takes only first part, avoid commas in quotes
output_val = output_val.strip().strip("'").strip('"') # remove quotes and whitespaces
output_channel[match_emit.group(1)].append({output_val: {}})
outputs.append(output_channel)
log.debug(f"Found {len(outputs)} outputs in {self.main_nf}")
channel_elements.append({output_val: {}})
if len(channel_elements) == 1:
outputs[match_emit.group(1)].append(channel_elements[0])
elif len(channel_elements) > 1:
outputs[match_emit.group(1)].append(channel_elements)
log.debug(f"Found {len(list(outputs.keys()))} outputs in {self.main_nf}")
self.outputs = outputs
elif self.component_type == "subworkflows":
outputs = []
# get output values from main.nf after "emit:". Can be named outputs or not.
if "emit:" not in data:
log.debug(f"Could not find any outputs in {self.main_nf}")
Expand Down
Loading
Loading
点击 这是indexloc提供的php浏览器服务,不要输入任何密码和下载