+
Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
- Add support for downloading docker images into tar archives
- Change long flag `--parallel-downloads` to `--parallel`. Short flag remains `-d`.
- Add pipeline to test data to be compatible with `nextflow inspect`
- Move `gather_registries` function to `ContainerFetcher` subclasses (#3634 follow-up) ([#3696](https://github.com/nf-core/tools/pull/3696))
- Add container load scripts for Docker and Podman (#3634 follow up) ([#3706](https://github.com/nf-core/tools/pull/3706))
- Replace arm profile with arm64 and emulate_amd64 profiles ([#3689](https://github.com/nf-core/tools/pull/3689))
- Update test-datasets list subcommand to output plain text urls and paths for easy copying [#3720](https://github.com/nf-core/tools/pull/3720)
Expand Down
52 changes: 51 additions & 1 deletion nf_core/pipelines/download/container_fetcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@

import rich.progress

import nf_core.utils
from nf_core.pipelines.download.utils import intermediate_file

log = logging.getLogger(__name__)
Expand Down Expand Up @@ -210,7 +211,9 @@ def __init__(
) -> None:
self._container_output_dir = container_output_dir
self.container_library = list(container_library)
self.registry_set = registry_set
self.base_registry_set: set[str] = set(registry_set)
self._registry_set: Optional[set[str]] = None

self.kill_with_fire = False
self.implementation: Optional[str] = None
self.name = None
Expand All @@ -232,6 +235,18 @@ def progress(self) -> rich.progress.Progress:
def progress(self, progress: Optional[ContainerProgress]) -> None:
self._progress = progress

@property
def registry_set(self) -> set[str]:
"""
Get the set of registries to use for the container download
"""
assert self._registry_set is not None # mypy
return self._registry_set

@registry_set.setter
def registry_set(self, registry_set: set[str]) -> None:
self._registry_set = registry_set

def get_container_output_dir(self) -> Path:
"""
Get the output directory for the container images.
Expand All @@ -250,6 +265,37 @@ def check_and_set_implementation(self) -> None:
"""
pass

@abstractmethod
def gather_registries(self, workflow_directory: Path) -> set[str]:
"""
Gather the registries from the pipeline config and CLI arguments and store them in a set.

Returns:
set[str]: The set of registries.
"""
pass

def gather_config_registries(self, workflow_directory: Path, registry_keys: list[str] = []) -> set[str]:
"""
Gather the registries from the pipeline config and store them in a set.

Args:
workflow_directory (Path): The directory containing the pipeline files we are currently processing
registry_keys (list[str]): The list of registry keys to fetch from the pipeline config

Returns:
set[str]: The set of registries defined in the pipeline config
"""
# Fetch the pipeline config
nf_config = nf_core.utils.fetch_wf_config(workflow_directory)

config_registries = set()
for registry_key in registry_keys:
if registry_key in nf_config:
config_registries.add(nf_config[registry_key])

return config_registries

@abstractmethod
def clean_container_file_extension(self, container_fn: str) -> str:
"""
Expand Down Expand Up @@ -319,6 +365,7 @@ def fetch_containers(
self,
containers: Collection[str],
exclude_list: Container[str],
workflow_directory: Path,
):
"""
This is the main entrypoint of the container fetcher. It goes through
Expand All @@ -329,6 +376,9 @@ def fetch_containers(
# Create a new progress bar
self.progress = self.progress_factory(self.hide_progress)

# Collect registries defined in the workflow directory
self.registry_set = self.gather_registries(workflow_directory)

with self.progress:
# Check each container in the list and defer actions
containers_remote_fetch: list[tuple[str, Path]] = []
Expand Down
50 changes: 36 additions & 14 deletions nf_core/pipelines/download/docker.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@

import nf_core.utils
from nf_core.pipelines.download.container_fetcher import ContainerFetcher, ContainerProgress
from nf_core.pipelines.download.utils import copy_container_load_scripts
from nf_core.pipelines.download.utils import ContainerRegistryUrls, copy_container_load_scripts

log = logging.getLogger(__name__)
stderr = rich.console.Console(
Expand Down Expand Up @@ -81,6 +81,29 @@ def check_and_set_implementation(self) -> None:
raise OSError("Docker is needed to pull images, but it is not installed or not in $PATH")
self.implementation = "docker"

def gather_registries(self, workflow_directory: Path) -> set[str]:
"""
Gather the Docker registries

Args:
workflow_directory (Path): The directory containing the pipeline files we are currently processing

Returns:
set[str]: The set of registries to use for the container download
"""
registry_set = self.base_registry_set.copy()
configured_registry_keys = ["docker.registry", "podman.registry"]

# Add the registries defined in the workflow config
registry_set |= self.gather_config_registries(
workflow_directory,
configured_registry_keys,
)

# add the new Seqera Docker container registry
registry_set.add(ContainerRegistryUrls.SEQERA_DOCKER)
return registry_set

def clean_container_file_extension(self, container_fn):
"""
This makes sure that the Docker container filename has a .tar extension
Expand Down Expand Up @@ -155,25 +178,24 @@ def pull_and_save_image(self, container: str, output_path: Path) -> None:

try:
self.pull_image(container, task)
# Update progress bar
self.progress.advance(task)
self.progress.update(task, status="Saving")
# self.progress.update(task, description=f"Saving '{container_short_name}'")

# Save the image
self.save_image(container, output_path, task)

# Update progress bar
self.progress.advance(task)
self.progress.remove_task(task)

except (DockerError.InvalidTagError, DockerError.ImageNotFoundError) as e:
log.error(e.message)
except DockerError.OtherError as e:
# Try other registries
log.error(e.message)
log.error(e.helpmessage)

# Update progress bar
self.progress.advance(task)
self.progress.update(task, status="Saving")
# self.progress.update(task, description=f"Saving '{container_short_name}'")

# Save the image
self.save_image(container, output_path, task)

# Update progress bar
self.progress.advance(task)
self.progress.remove_task(task)

# Task should advance in any case. Failure to pull will not kill the pulling process.
self.progress.advance_remote_fetch_task()

Expand Down
49 changes: 8 additions & 41 deletions nf_core/pipelines/download/download.py
Original file line number Diff line number Diff line change
Expand Up @@ -322,11 +322,11 @@ def download_workflow_static(self) -> None:

# Collect all required container images
if self.container_system in {"singularity", "docker"}:
self.find_container_images(self.outdir / revision_dirname, revision)
self.gather_registries(self.outdir / revision_dirname)
workflow_directory = self.outdir / revision_dirname
self.find_container_images(workflow_directory, revision)

try:
self.download_container_images(current_revision=revision)
self.download_container_images(workflow_directory, revision)
except OSError as e:
raise DownloadError(f"[red]{e}[/]") from e

Expand Down Expand Up @@ -362,11 +362,11 @@ def download_workflow_platform(self, location: Optional[Path] = None) -> None:
# Checkout the repo in the current revision
self.workflow_repo.checkout(commit)
# Collect all required singularity images
self.find_container_images(self.workflow_repo.access(), revision)
self.gather_registries(self.workflow_repo.access())
workflow_directory = self.workflow_repo.access()
self.find_container_images(workflow_directory, revision)

try:
self.download_container_images(current_revision=revision)
self.download_container_images(workflow_directory, revision)
except OSError as e:
raise DownloadError(f"[red]{e}[/]") from e

Expand Down Expand Up @@ -672,41 +672,11 @@ def find_container_images(
log.error("Failed to parse output of 'nextflow inspect' to extract containers")
raise DownloadError(e)

def gather_registries(self, workflow_directory: Path) -> None:
"""Fetch the registries from the pipeline config and CLI arguments and store them in a set.
This is needed to symlink downloaded container images so Nextflow will find them.
"""

# should exist, because find_container_images() is always called before
if not self.nf_config:
self.nf_config = nf_core.utils.fetch_wf_config(workflow_directory)

# Select registries defined in pipeline config
configured_registries = [
"apptainer.registry",
"docker.registry",
"podman.registry",
"singularity.registry",
]

for registry in configured_registries:
if registry in self.nf_config:
self.registry_set.add(self.nf_config[registry])

# add depot.galaxyproject.org to the set, because it is the default registry for singularity hardcoded in modules
self.registry_set.add("depot.galaxyproject.org/singularity")

# add community.wave.seqera.io/library to the set to support the new Seqera Docker container registry
self.registry_set.add("community.wave.seqera.io/library")

# add chttps://community-cr-prod.seqera.io/docker/registry/v2/ to the set to support the new Seqera Singularity container registry
self.registry_set.add("community-cr-prod.seqera.io/docker/registry/v2")

def get_container_output_dir(self) -> Path:
assert self.outdir is not None # mypy
return self.outdir / f"{self.container_system}-images"

def download_container_images(self, current_revision: str = "") -> None:
def download_container_images(self, workflow_directory: Path, current_revision: str = "") -> None:
"""
Fetch the container images with the appropriate ContainerFetcher

Expand All @@ -730,10 +700,7 @@ def download_container_images(self, current_revision: str = "") -> None:
out_path_dir.mkdir(parents=True)

if self.container_fetcher is not None:
self.container_fetcher.fetch_containers(
self.containers,
self.containers_remote,
)
self.container_fetcher.fetch_containers(self.containers, self.containers_remote, workflow_directory)

def compress_download(self) -> None:
"""Take the downloaded files and make a compressed .tar.gz archive."""
Expand Down
44 changes: 43 additions & 1 deletion nf_core/pipelines/download/singularity.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,12 @@

import nf_core.utils
from nf_core.pipelines.download.container_fetcher import ContainerFetcher, ContainerProgress
from nf_core.pipelines.download.utils import DownloadError, intermediate_file, intermediate_file_no_creation
from nf_core.pipelines.download.utils import (
ContainerRegistryUrls,
DownloadError,
intermediate_file,
intermediate_file_no_creation,
)

log = logging.getLogger(__name__)
stderr = rich.console.Console(
Expand Down Expand Up @@ -161,6 +166,43 @@ def check_and_set_implementation(self) -> None:
else:
raise OSError("Singularity/Apptainer is needed to pull images, but it is not installed or not in $PATH")

def gather_registries(self, workflow_directory: Path) -> set[str]:
"""
Fetch the registries from the pipeline config and CLI arguments and store them in a set.
This is needed to symlink downloaded container images so Nextflow will find them.

Args:
workflow_directory (Path): The directory containing the pipeline files we are currently processing

Returns:
set[str]: The set of registries to use for the container fetching
"""
registry_set = self.base_registry_set.copy()

# Select registries defined in pipeline config
configured_registry_keys = [
"apptainer.registry",
"docker.registry",
"podman.registry",
"singularity.registry",
]

registry_set |= self.gather_config_registries(
workflow_directory,
configured_registry_keys,
)

# add the default glaxy registry for singularity (hardcoded in modules) to the set
registry_set.add(ContainerRegistryUrls.GALAXY_SINGULARITY)

# add the new Seqera Docker container registry to the set to support
registry_set.add(ContainerRegistryUrls.SEQERA_DOCKER)

# add the new Seqera Singularity container registry to the set
registry_set.add(ContainerRegistryUrls.SEQERA_SINGULARITY)

return registry_set

def get_cache_dir(self) -> Path:
"""
Get the cache Singularity cache directory
Expand Down
7 changes: 7 additions & 0 deletions nf_core/pipelines/download/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,18 @@
import shutil
import tempfile
from collections.abc import Generator
from enum import StrEnum
from pathlib import Path

log = logging.getLogger(__name__)


class ContainerRegistryUrls(StrEnum):
SEQERA_DOCKER = "community.wave.seqera.io/library"
SEQERA_SINGULARITY = "community-cr-prod.seqera.io/docker/registry/v2"
GALAXY_SINGULARITY = "depot.galaxyproject.org/singularity"


def copy_container_load_scripts(container_system: str, dest_dir: Path, make_exec: bool = True) -> tuple[str, Path]:
container_load_scripts_subpackage = "nf_core.pipelines.download.load_scripts"
script_name = f"{container_system}-load.sh"
Expand Down
Loading
点击 这是indexloc提供的php浏览器服务,不要输入任何密码和下载