From 72dd8489e89c34dac5c8312fc37273ece7735502 Mon Sep 17 00:00:00 2001 From: Eddie Bergman Date: Mon, 22 Jan 2024 11:50:14 +0100 Subject: [PATCH 1/4] fix(Pynisher): Ensure system supports limit (#223) --- src/amltk/scheduling/plugins/pynisher.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/src/amltk/scheduling/plugins/pynisher.py b/src/amltk/scheduling/plugins/pynisher.py index 65cd43ef..e00af348 100644 --- a/src/amltk/scheduling/plugins/pynisher.py +++ b/src/amltk/scheduling/plugins/pynisher.py @@ -378,6 +378,19 @@ def trial_evaluator_two(..., trial: Trial) -> int: ``` """ super().__init__() + + for limit, name in [ + (memory_limit, "memory"), + (cputime_limit, "cpu_time"), + (walltime_limit, "wall_time"), + ]: + if limit is not None and not self.supports(name): # type: ignore + raise RuntimeError( + f"Your platform does not support {name} limits." + " Please see pynisher documentation for more:" + "\nhttps://github.com/automl/pynisher#features", + ) + self.memory_limit = memory_limit self.cputime_limit = cputime_limit self.walltime_limit = walltime_limit From 4c2f7a9fcab9bf2401634320512edd294d049854 Mon Sep 17 00:00:00 2001 From: Eddie Bergman Date: Mon, 22 Jan 2024 22:59:52 +0100 Subject: [PATCH 2/4] feat(Optimizer): Allow for batch ask requests (#224) --- src/amltk/optimization/optimizer.py | 27 ++++++++++++++++++--- src/amltk/optimization/optimizers/neps.py | 21 +++++++++++++--- src/amltk/optimization/optimizers/optuna.py | 20 ++++++++++++--- src/amltk/optimization/optimizers/smac.py | 22 +++++++++++++++-- tests/optimizers/test_optimizers.py | 14 ++++++++++- 5 files changed, 92 insertions(+), 12 deletions(-) diff --git a/src/amltk/optimization/optimizer.py b/src/amltk/optimization/optimizer.py index 309a182f..795c4238 100644 --- a/src/amltk/optimization/optimizer.py +++ b/src/amltk/optimization/optimizer.py @@ -16,9 +16,17 @@ from __future__ import annotations from abc import abstractmethod -from collections.abc import Callable, Sequence +from collections.abc import Callable, Iterable, Sequence from datetime import datetime -from typing import TYPE_CHECKING, Any, Concatenate, Generic, ParamSpec, TypeVar +from typing import ( + TYPE_CHECKING, + Any, + Concatenate, + Generic, + ParamSpec, + TypeVar, + overload, +) from more_itertools import all_unique @@ -81,10 +89,23 @@ def tell(self, report: Trial.Report[I]) -> None: report: The report for a trial """ + @overload @abstractmethod - def ask(self) -> Trial[I]: + def ask(self, n: int) -> Iterable[Trial[I]]: + ... + + @overload + @abstractmethod + def ask(self, n: None = None) -> Trial[I]: + ... + + @abstractmethod + def ask(self, n: int | None = None) -> Trial[I] | Iterable[Trial[I]]: """Ask the optimizer for a trial to evaluate. + Args: + n: The number of trials to ask for. If `None`, ask for a single trial. + Returns: A config to sample. """ diff --git a/src/amltk/optimization/optimizers/neps.py b/src/amltk/optimization/optimizers/neps.py index b735712d..3d468d7d 100644 --- a/src/amltk/optimization/optimizers/neps.py +++ b/src/amltk/optimization/optimizers/neps.py @@ -119,13 +119,13 @@ def add_to_history(_, report: Trial.Report): import logging import shutil -from collections.abc import Mapping, Sequence +from collections.abc import Iterable, Mapping, Sequence from copy import deepcopy from dataclasses import dataclass from datetime import datetime from functools import partial from pathlib import Path -from typing import TYPE_CHECKING, Any, Protocol +from typing import TYPE_CHECKING, Any, Protocol, overload from typing_extensions import override import metahyper.api @@ -391,13 +391,28 @@ def create( # noqa: PLR0913 working_dir=working_dir, ) + @overload + def ask(self, n: int) -> Iterable[Trial[NEPSTrialInfo]]: + ... + + @overload + def ask(self, n: None = None) -> Trial[NEPSTrialInfo]: + ... + @override - def ask(self) -> Trial[NEPSTrialInfo]: + def ask( + self, + n: int | None = None, + ) -> Trial[NEPSTrialInfo] | Iterable[Trial[NEPSTrialInfo]]: """Ask the optimizer for a new config. Returns: The trial info for the new config. """ + # TODO: Ask neps people if there's a good way to batch sample rather than 1 by 1 + if n is not None: + return (self.ask(n=None) for _ in range(n)) + with self.optimizer.using_state(self.optimizer_state_file, self.serializer): ( config_id, diff --git a/src/amltk/optimization/optimizers/optuna.py b/src/amltk/optimization/optimizers/optuna.py index 011317a8..887cf528 100644 --- a/src/amltk/optimization/optimizers/optuna.py +++ b/src/amltk/optimization/optimizers/optuna.py @@ -101,10 +101,10 @@ def add_to_history(_, report: Trial.Report): """ # noqa: E501 from __future__ import annotations -from collections.abc import Sequence +from collections.abc import Iterable, Sequence from datetime import datetime from pathlib import Path -from typing import TYPE_CHECKING, Any +from typing import TYPE_CHECKING, Any, overload from typing_extensions import Self, override import optuna @@ -265,13 +265,27 @@ def create( return cls(study=study, metrics=metrics, space=space, bucket=bucket, seed=seed) + @overload + def ask(self, n: int) -> Iterable[Trial[OptunaTrial]]: + ... + + @overload + def ask(self, n: None = None) -> Trial[OptunaTrial]: + ... + @override - def ask(self) -> Trial[OptunaTrial]: + def ask( + self, + n: int | None = None, + ) -> Trial[OptunaTrial] | Iterable[Trial[OptunaTrial]]: """Ask the optimizer for a new config. Returns: The trial info for the new config. """ + if n is not None: + return (self.ask(n=None) for _ in range(n)) + optuna_trial: optuna.Trial = self.study.ask(self.space) config = optuna_trial.params trial_number = optuna_trial.number diff --git a/src/amltk/optimization/optimizers/smac.py b/src/amltk/optimization/optimizers/smac.py index b2ac2b8c..d3bb3060 100644 --- a/src/amltk/optimization/optimizers/smac.py +++ b/src/amltk/optimization/optimizers/smac.py @@ -91,7 +91,7 @@ def add_to_history(_, report: Trial.Report): from __future__ import annotations import logging -from collections.abc import Mapping, Sequence +from collections.abc import Iterable, Mapping, Sequence from datetime import datetime from pathlib import Path from typing import TYPE_CHECKING, Any, Literal, overload @@ -249,13 +249,31 @@ def create( ) return cls(facade=facade, fidelities=fidelities, bucket=bucket, metrics=metrics) + @overload + def ask(self, n: int) -> Iterable[Trial[SMACTrialInfo]]: + ... + + @overload + def ask(self, n: None = None) -> Trial[SMACTrialInfo]: + ... + @override - def ask(self) -> Trial[SMACTrialInfo]: + def ask( + self, + n: int | None = None, + ) -> Trial[SMACTrialInfo] | Iterable[Trial[SMACTrialInfo]]: """Ask the optimizer for a new config. + Args: + n: The number of configs to ask for. If `None`, ask for a single config. + + Returns: The trial info for the new config. """ + if n is not None: + return (self.ask(n=None) for _ in range(n)) + smac_trial_info = self.facade.ask() config = smac_trial_info.config budget = smac_trial_info.budget diff --git a/tests/optimizers/test_optimizers.py b/tests/optimizers/test_optimizers.py index 52588aa5..0953ce4c 100644 --- a/tests/optimizers/test_optimizers.py +++ b/tests/optimizers/test_optimizers.py @@ -5,6 +5,7 @@ from typing import TYPE_CHECKING import pytest +from more_itertools import all_unique from pytest_cases import case, parametrize, parametrize_with_cases from amltk.optimization import Metric, Optimizer, Trial @@ -60,7 +61,7 @@ def opt_smac_hpo(metric: Metric, tmp_path: Path) -> SMACOptimizer: except ImportError: pytest.skip("SMAC is not installed") - pipeline = Component(_A, name="hi", space={"a": (1, 10)}) + pipeline = Component(_A, name="hi", space={"a": (1.0, 10.0)}) return SMACOptimizer.create( space=pipeline, bucket=tmp_path, @@ -128,3 +129,14 @@ def test_report_failure(optimizer: Optimizer): assert isinstance(report.exception, ValueError) assert isinstance(report.traceback, str) assert report.metric_values == tuple(metric.worst for metric in optimizer.metrics) + + +@parametrize_with_cases("optimizer", cases=".", prefix="opt_") +def test_batched_ask_generates_unique_configs(optimizer: Optimizer): + """Test that batched ask generates unique configs.""" + # NOTE: This was tested with up to 100, at least from SMAC and Optuna. + # It was quite slow for smac so I've reduced it to 10. + # This is not a hard requirement of optimizers (maybe it should be?) + batch = list(optimizer.ask(10)) + assert len(batch) == 10 + assert all_unique(batch) From 6c8edcbdd805bfa5104aefb9bc5ab450283194d5 Mon Sep 17 00:00:00 2001 From: Eddie Bergman Date: Fri, 26 Jan 2024 08:41:59 +0100 Subject: [PATCH 3/4] doc: Cleaner API docs, cleanup pipeline reference, faster rendering options, less noise in doc build output, changelog (#234) PR #234 Introduced a much cleaner doc layout with the latest version of the mkdocs stack we are using! Along with this, the reference for pipelines was made more streamlined and clear, while the API docs for pipelines was also cleaned up. Also, a viewable change log! For developers, we defaulted `just docs` to not run any code cells for faster iteration, with the `just docs-code` to now run the code and `just docs-full` to run code and examples. --- CONTRIBUTING.md | 5 + .../_templates/python/material/attribute.html | 70 - docs/_templates/python/material/class.html | 115 -- docs/_templates/python/material/function.html | 74 - docs/_templates/python/material/labels.html | 9 - .../_templates/python/material/signature.html | 48 - docs/changelog.md | 1 + docs/example_runner.py | 16 +- docs/guides/optimization.md | 2 +- docs/hooks/cleanup_log_output.py | 45 + .../debug_which_page_is_being_rendered.py | 28 + docs/hooks/disable_markdown_exec.py | 46 + docs/reference/pipelines/pipeline.md | 165 ++- docs/stylesheets/custom.css | 123 +- justfile | 27 +- mkdocs.yml | 63 +- pyproject.toml | 1 + src/amltk/pipeline/components.py | 1275 +++++++---------- src/amltk/pipeline/node.py | 18 +- 19 files changed, 923 insertions(+), 1208 deletions(-) delete mode 100644 docs/_templates/python/material/attribute.html delete mode 100644 docs/_templates/python/material/class.html delete mode 100644 docs/_templates/python/material/function.html delete mode 100644 docs/_templates/python/material/labels.html delete mode 100644 docs/_templates/python/material/signature.html create mode 100644 docs/changelog.md create mode 100644 docs/hooks/cleanup_log_output.py create mode 100644 docs/hooks/debug_which_page_is_being_rendered.py create mode 100644 docs/hooks/disable_markdown_exec.py diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 9b38fdec..9018a063 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -49,6 +49,8 @@ just test # Run the documentation, fix any warnings just docs +# just docs-code # Run code and display output (slower) +# just docs-full # Run examples and code (slowest) # Run pre-commit checks just check @@ -297,6 +299,9 @@ You can find a collection of features for custom documentation [here](https://squidfunk.github.io/mkdocs-material/reference/) as well as code reference documentation [here](https://mkdocstrings.github.io/usage/) +You can find the entry point for the documentation infrastructure of `mkdocs` +in `mkdocs.yml`. + ### Viewing Documentation You can live view documentation changes by running `just docs`, diff --git a/docs/_templates/python/material/attribute.html b/docs/_templates/python/material/attribute.html deleted file mode 100644 index 4f234307..00000000 --- a/docs/_templates/python/material/attribute.html +++ /dev/null @@ -1,70 +0,0 @@ -{{ log.debug("Rendering " + attribute.path) }} - -
-{% with html_id = attribute.path %} - - {% if root %} - {% set show_full_path = config.show_root_full_path %} - {% set root_members = True %} - {% elif root_members %} - {% set show_full_path = config.show_root_members_full_path or config.show_object_full_path %} - {% set root_members = False %} - {% else %} - {% set show_full_path = config.show_object_full_path %} - {% endif %} - - {% if not root or config.show_root_heading %} - - {% filter heading(heading_level, - role="data" if attribute.parent.kind.value == "module" else "attr", - id=html_id, - class="doc doc-heading", - toc_label=attribute.name, - ) - %} - - {% if config.separate_signature %} - {% if show_full_path %}{{ attribute.path }}{% else %}{{ attribute.name }}{% endif %} - {% else %} - {% filter highlight(language="python", inline=True) %} - {% if show_full_path %}{{ attribute.path }}{% else %}{{ attribute.name }}{% endif %} - {% if attribute.annotation %}: {{ attribute.annotation }}{% endif %} - {% endfilter %} - {% endif %} - - {% with labels = attribute.labels %} - {% include "labels.html" with context %} - {% endwith %} - - {% endfilter %} - - {% if config.separate_signature %} - {% filter highlight(language="python", inline=False) %} - {% filter format_code(config.line_length) %} - {% if show_full_path %}{{ attribute.path }}{% else %}{{ attribute.name }}{% endif %} - {% if attribute.annotation %}: {{ attribute.annotation|safe }}{% endif %} - # {% if attribute.value %} = {{ attribute.value|safe }}{% endif %} - {% endfilter %} - {% endfilter %} - {% endif %} - - {% else %} - {% if config.show_root_toc_entry %} - {% filter heading(heading_level, - role="data" if attribute.parent.kind.value == "module" else "attr", - id=html_id, - toc_label=attribute.path if config.show_root_full_path else attribute.name, - hidden=True) %} - {% endfilter %} - {% endif %} - {% set heading_level = heading_level - 1 %} - {% endif %} - -
- {% with docstring_sections = attribute.docstring.parsed %} - {% include "docstring.html" with context %} - {% endwith %} -
- -{% endwith %} -
diff --git a/docs/_templates/python/material/class.html b/docs/_templates/python/material/class.html deleted file mode 100644 index 0b5c8124..00000000 --- a/docs/_templates/python/material/class.html +++ /dev/null @@ -1,115 +0,0 @@ -{{ log.debug("Rendering " + class.path) }} - -
-{% with html_id = class.path %} - - {% if root %} - {% set show_full_path = config.show_root_full_path %} - {% set root_members = True %} - {% elif root_members %} - {% set show_full_path = config.show_root_members_full_path or config.show_object_full_path %} - {% set root_members = False %} - {% else %} - {% set show_full_path = config.show_object_full_path %} - {% endif %} - - {% if not root or config.show_root_heading %} - - {% filter heading(heading_level, - role="class", - id=html_id, - class="doc doc-heading", - toc_label=class.name) %} - - {% if config.separate_signature %} - {% if show_full_path %}{{ class.path }}{% else %}{{ class.name }}{% endif %} - {% elif config.merge_init_into_class and "__init__" in class.members -%} - {%- with function = class.members["__init__"] -%} - {%- filter highlight(language="python", inline=True) -%} - {% if show_full_path %}{{ class.path }}{% else %}class {{ class.name }}{% endif %} - {%- include "signature.html" with context -%} - {%- endfilter -%} - {%- endwith -%} - {% else %} - {% filter highlight(language="python", inline=True) %} - {% if show_full_path %}{{ class.path }}{% else %}class {{ class.name }}{% endif %} - {% endfilter %} - {% endif %} - - {% with labels = class.labels %} - {% include "labels.html" with context %} - {% endwith %} - - {% endfilter %} - - {% if config.separate_signature and config.merge_init_into_class %} - {% if "__init__" in class.members %} - {% with function = class.members["__init__"] %} - {% filter highlight(language="python", inline=False) %} - {% filter format_signature(config.line_length) %} - {% if show_full_path %}{{ class.path }}{% else %}{{ class.name }}{% endif %} - {% include "signature.html" with context %} - {% endfilter %} - {% endfilter %} - {% endwith %} - {% endif %} - {% endif %} - - {% else %} - {% if config.show_root_toc_entry %} - {% filter heading(heading_level, - role="class", - id=html_id, - toc_label=class.path if config.show_root_full_path else class.name, - hidden=True) %} - {% endfilter %} - {% endif %} - {% set heading_level = heading_level - 1 %} - {% endif %} - -
- {% if config.show_bases and class.bases %} -

- Bases: {% for expression in class.bases -%} - {% include "expression.html" with context %}{% if not loop.last %}, {% endif %} - {% endfor -%} -

- {% endif %} - - {% with docstring_sections = class.docstring.parsed %} - {% include "docstring.html" with context %} - {% endwith %} - - {% if config.merge_init_into_class %} - {% if "__init__" in class.members and class.members["__init__"].has_docstring %} - {% with docstring_sections = class.members["__init__"].docstring.parsed %} - {% include "docstring.html" with context %} - {% endwith %} - {% endif %} - {% endif %} - - {% if config.show_source %} - {% if config.merge_init_into_class %} - {% if "__init__" in class.members and class.members["__init__"].source %} -
- Source code in {{ class.relative_filepath }} - {{ class.members["__init__"].source|highlight(language="python", linestart=class.members["__init__"].lineno, linenums=True) }} -
- {% endif %} - {% elif class.source %} -
- Source code in {{ class.relative_filepath }} - {{ class.source|highlight(language="python", linestart=class.lineno, linenums=True) }} -
- {% endif %} - {% endif %} - - {% with obj = class %} - {% set root = False %} - {% set heading_level = heading_level + 1 %} - {% include "children.html" with context %} - {% endwith %} -
- -{% endwith %} -
diff --git a/docs/_templates/python/material/function.html b/docs/_templates/python/material/function.html deleted file mode 100644 index 171721ee..00000000 --- a/docs/_templates/python/material/function.html +++ /dev/null @@ -1,74 +0,0 @@ -{{ log.debug("Rendering " + function.path) }} - -
-{% with html_id = function.path %} - - {% if root %} - {% set show_full_path = config.show_root_full_path %} - {% set root_members = True %} - {% elif root_members %} - {% set show_full_path = config.show_root_members_full_path or config.show_object_full_path %} - {% set root_members = False %} - {% else %} - {% set show_full_path = config.show_object_full_path %} - {% endif %} - - {% if not root or config.show_root_heading %} - - {% filter heading(heading_level, - role="function", - id=html_id, - class="doc doc-heading", - toc_label=function.name ~ "()") %} - - {% if config.separate_signature %} - {% if show_full_path %}{{ function.path }}{% else %}{{ function.name }}{% endif %} - {% else %} - {% filter highlight(language="python", inline=True) %} - {% if show_full_path %}{{ function.path }}{% else %}def {{ function.name }}{% endif %} - {% include "signature.html" with context %} - {% endfilter %} - {% endif %} - - {% with labels = function.labels %} - {% include "labels.html" with context %} - {% endwith %} - - {% endfilter %} - - {% if config.separate_signature %} - {% filter highlight(language="python", inline=False) %} - {% filter format_signature(config.line_length) %} - {% if show_full_path %}{{ function.path }}{% else %}{{ function.name }}{% endif %} - {% include "signature.html" with context %} - {% endfilter %} - {% endfilter %} - {% endif %} - - {% else %} - {% if config.show_root_toc_entry %} - {% filter heading(heading_level, - role="function", - id=html_id, - toc_label=function.path if config.show_root_full_path else function.name, - hidden=True) %} - {% endfilter %} - {% endif %} - {% set heading_level = heading_level - 1 %} - {% endif %} - -
- {% with docstring_sections = function.docstring.parsed %} - {% include "docstring.html" with context %} - {% endwith %} - - {% if config.show_source and function.source %} -
- Source code in {{ function.relative_filepath }} - {{ function.source|highlight(language="python", linestart=function.lineno, linenums=True) }} -
- {% endif %} -
- -{% endwith %} -
diff --git a/docs/_templates/python/material/labels.html b/docs/_templates/python/material/labels.html deleted file mode 100644 index 27fd24c9..00000000 --- a/docs/_templates/python/material/labels.html +++ /dev/null @@ -1,9 +0,0 @@ -{% if labels %} - {{ log.debug("Rendering labels") }} - -
- {% for label in labels|sort %} - {{ label|replace("property", "prop")|replace("instance-attribute", "attr")|replace("dataclass", "dataclass")|replace("class-attribute", "classvar") }} - {% endfor %} -
-{% endif %} diff --git a/docs/_templates/python/material/signature.html b/docs/_templates/python/material/signature.html deleted file mode 100644 index ae267f5f..00000000 --- a/docs/_templates/python/material/signature.html +++ /dev/null @@ -1,48 +0,0 @@ -{%- if config.show_signature -%} - {{ log.debug("Rendering signature") }} - {%- with -%} - - {%- set ns = namespace(has_pos_only=False, render_pos_only_separator=True, render_kw_only_separator=True, equal="=") -%} - - {%- if config.show_signature_annotations -%} - {%- set ns.equal = " = " -%} - {%- endif -%} - - ( - {%- for parameter in function.parameters -%} - {%- if parameter.name not in ("self", "cls") or loop.index0 > 0 or not (function.parent and function.parent.is_class) -%} - - {%- if parameter.kind.value == "positional-only" -%} - {%- set ns.has_pos_only = True -%} - {%- else -%} - {%- if ns.has_pos_only and ns.render_pos_only_separator -%} - {%- set ns.render_pos_only_separator = False %}/, {% endif -%} - {%- if parameter.kind.value == "keyword-only" -%} - {%- if ns.render_kw_only_separator -%} - {%- set ns.render_kw_only_separator = False %}*, {% endif -%} - {%- endif -%} - {%- endif -%} - - {%- if config.show_signature_annotations and parameter.annotation is not none -%} - {%- set annotation = ": " + parameter.annotation|safe -%} - {%- endif -%} - - {%- if parameter.default is not none and parameter.kind.value != "variadic positional" and parameter.kind.value != "variadic keyword" -%} - {%- set default = ns.equal + parameter.default|safe -%} - {%- endif -%} - - {%- if parameter.kind.value == "variadic positional" -%} - {%- set ns.render_kw_only_separator = False -%} - {%- endif -%} - - {% if parameter.kind.value == "variadic positional" %}*{% elif parameter.kind.value == "variadic keyword" %}**{% endif -%} - {{ parameter.name }}{{ annotation }}{{ default }} - {%- if not loop.last %}, {% endif -%} - - {%- endif -%} - {%- endfor -%} - ) - {%- if config.show_signature_annotations and function.annotation %} -> {{ function.annotation|safe }}{%- endif -%} - - {%- endwith -%} -{%- endif -%} diff --git a/docs/changelog.md b/docs/changelog.md new file mode 100644 index 00000000..786b75d5 --- /dev/null +++ b/docs/changelog.md @@ -0,0 +1 @@ +--8<-- "CHANGELOG.md" diff --git a/docs/example_runner.py b/docs/example_runner.py index 03c24c86..ef0c84f0 100644 --- a/docs/example_runner.py +++ b/docs/example_runner.py @@ -13,10 +13,9 @@ import mkdocs_gen_files from more_itertools import first_true, peekable -logger = logging.getLogger(__name__) -logging.basicConfig(level=logging.WARNING) +logger = logging.getLogger("mkdocs") -ENV_VAR = "AMLTK_DOC_RENDER_EXAMPLES" +RUN_EXAMPLES_ENV_VAR = "AMLTK_DOC_RENDER_EXAMPLES" @dataclass @@ -144,9 +143,10 @@ def should_execute(cls, *, name: str, runnable: bool) -> bool: if not runnable: return False - env_var = os.environ.get(ENV_VAR, None) - if env_var is None: + env_var = os.environ.get(RUN_EXAMPLES_ENV_VAR, "all") + if env_var in ("false", "", "0", "no", "off"): return False + if env_var == "all": return True @@ -270,6 +270,12 @@ def copy_section(self) -> str: ) +if os.environ.get(RUN_EXAMPLES_ENV_VAR, "all") in ("false", "", "0", "no", "off"): + logger.warning( + f"Env variable {RUN_EXAMPLES_ENV_VAR} not set - not running examples." + " Use `just docs-full` to run and render examples.", + ) + for path in sorted(Path("examples").rglob("*.py")): module_path = path.relative_to("examples").with_suffix("") doc_path = path.relative_to("examples").with_suffix(".md") diff --git a/docs/guides/optimization.md b/docs/guides/optimization.md index 31fb7ca3..47896682 100644 --- a/docs/guides/optimization.md +++ b/docs/guides/optimization.md @@ -52,7 +52,7 @@ s = Searchable( {"x": (-10.0, 10.0)}, name="my-searchable" ) -from amltk._doc import doc_print; doc_print(print, s) +from amltk._doc import doc_print; doc_print(print, s) # markdown-exec: hide ``` diff --git a/docs/hooks/cleanup_log_output.py b/docs/hooks/cleanup_log_output.py new file mode 100644 index 00000000..b7aab8de --- /dev/null +++ b/docs/hooks/cleanup_log_output.py @@ -0,0 +1,45 @@ +"""The module is a hook which disables warnings and log messages which pollute the +doc build output. + +One possible downside is if one of these modules ends up giving an actual +error, such as OpenML failing to retrieve a dataset. I tried to make sure ERROR +log message are still allowed through. +""" +import logging +import warnings +from typing import Any + +import mkdocs +import mkdocs.plugins +import mkdocs.structure.pages + +log = logging.getLogger("mkdocs") + + +@mkdocs.plugins.event_priority(-50) +def on_startup(**kwargs: Any): + # We get a load of deprecation warnings from SMAC + warnings.filterwarnings("ignore", category=DeprecationWarning) + + # ConvergenceWarning from sklearn + warnings.filterwarnings("ignore", module="sklearn") + + +def on_pre_page( + page: mkdocs.structure.pages.Page, + config: Any, + files: Any, +) -> mkdocs.structure.pages.Page | None: + # NOTE: mkdocs says they're always normalized to be '/' seperated + # which means this should work on windows as well. + + # This error is actually demonstrated to the user which causes amltk + # to log the error. I don't know how to disable it for that one code cell + # put I can at least limit it to the file in which it's in. + if page.file.src_uri == "guides/scheduling.md": + scheduling_logger = logging.getLogger("amltk.scheduling.task") + scheduling_logger.setLevel(logging.CRITICAL) + + logging.getLogger("smac").setLevel(logging.ERROR) + logging.getLogger("openml").setLevel(logging.ERROR) + return page diff --git a/docs/hooks/debug_which_page_is_being_rendered.py b/docs/hooks/debug_which_page_is_being_rendered.py new file mode 100644 index 00000000..69bf86f7 --- /dev/null +++ b/docs/hooks/debug_which_page_is_being_rendered.py @@ -0,0 +1,28 @@ +"""This module is a hook that when any code is being rendered, it will +print the path to the file being rendered. + +This makes it easier to identify which file is being rendered when an error happens.""" +import logging +from typing import Any + +import mkdocs +import mkdocs.plugins +import mkdocs.structure.pages +import os + +log = logging.getLogger("mkdocs") + +RENDER_EXAMPLES_ENV_VAR = "AMLTK_DOC_RENDER_EXAMPLES" +EXEC_DOCS_ENV_VAR = "AMLTK_EXEC_DOCS" + +truthy_values = {"yes", "on", "true", "1", "all"} + +def on_pre_page( + page: mkdocs.structure.pages.Page, + config: Any, + files: Any, +) -> mkdocs.structure.pages.Page | None: + render_examples = os.environ.get(RENDER_EXAMPLES_ENV_VAR, "true") + render_code = os.environ.get(EXEC_DOCS_ENV_VAR, "true") + if render_examples.lower() in truthy_values or render_code.lower() in truthy_values: + log.info(f"{page.file.src_path}") diff --git a/docs/hooks/disable_markdown_exec.py b/docs/hooks/disable_markdown_exec.py new file mode 100644 index 00000000..9962b9cd --- /dev/null +++ b/docs/hooks/disable_markdown_exec.py @@ -0,0 +1,46 @@ +"""This disable markdown_exec based on an environment variable. +This speeds up the build of the docs for faster iteration. + +This is done by overwriting the module responsible for compiling and executing the code +by overriding the `exec(...)` global variable that is used to run the code. +We hijack it and print a helpful message about how to run the code cell instead. + +https://github.com/pawamoy/markdown-exec/blob/adff40b2928dbb2d22f27684e085f02d39a07291/src/markdown_exec/formatters/python.py#L42-L70 +""" +from __future__ import annotations + +import logging +import os +from typing import Any + +import mkdocs +import mkdocs.plugins +import mkdocs.structure.pages + +RUN_CODE_BLOCKS_ENV_VAR = "AMLTK_EXEC_DOCS" + +logger = logging.getLogger("mkdocs") + + +def _print_msg(compiled_code: Any, exec_globals: dict) -> None: + _print = exec_globals["print"] + _print( + f"Env variable {RUN_CODE_BLOCKS_ENV_VAR}=0 - No code to display." + "\nUse `just docs-code` (or `just docs-full` for examples) to run" + " the code block and display output." + ) + +truthy_values = {"yes", "on", "true", "1"} + +@mkdocs.plugins.event_priority(100) +def on_startup(**kwargs: Any): + run_code_blocks = os.environ.get(RUN_CODE_BLOCKS_ENV_VAR, "true") + if run_code_blocks.lower() not in truthy_values: + logger.warning( + f"Disabling markdown-exec due to {RUN_CODE_BLOCKS_ENV_VAR}={run_code_blocks}" + "\n.Use `just docs-full` to run and render examples.", + ) + from markdown_exec.formatters import python + + setattr(python, "exec", _print_msg) + diff --git a/docs/reference/pipelines/pipeline.md b/docs/reference/pipelines/pipeline.md index 533917c4..f19edd19 100644 --- a/docs/reference/pipelines/pipeline.md +++ b/docs/reference/pipelines/pipeline.md @@ -1,15 +1,9 @@ -## Pieces of a Pipeline A pipeline is a collection of [`Node`][amltk.pipeline.node.Node]s that are connected together to form a directed acylic graph, where the nodes follow a parent-child relation ship. The purpose of these is to form some _abstract_ representation of what you want to search over/optimize and then build into a concrete object. -These [`Node`][amltk.pipeline.node.Node]s allow you to specific the function/object that -will be used there, it's search space and any configuration you want to explicitly apply. -There are various components listed below which gives these nodes extract syntatic meaning, -e.g. a [`Choice`](#choice) which represents some choice between it's children while -a [`Sequential`](#sequential) indicates that each child follows one after the other. - +## Key Operations Once a pipeline is created, you can perform 3 very critical operations on it: * [`search_space(parser=...)`][amltk.pipeline.node.Node.search_space] - This will return the @@ -21,14 +15,155 @@ Once a pipeline is created, you can perform 3 very critical operations on it: concrete object from a configured pipeline. You can find the reference to the [available builders here](../pipelines/builders.md). -### Components +## Node +A [`Node`][amltk.pipeline.node.Node] is the basic building block of a pipeline. +It contains various attributes, such as a + +- [`.name`][amltk.pipeline.node.Node.name] - The name of the node, which is used + to identify it in the pipeline. +- [`.item`][amltk.pipeline.node.Node.item] - The concrete object or some function to construct one +- [`.space`][amltk.pipeline.node.Node.space] - A search space to consider for this node +- [`.config`][amltk.pipeline.node.Node.config] - The specific configuration to use for this + node once `build` is called. +- [`.nodes`][amltk.pipeline.node.Node.nodes] - Other nodes that this node links to. + +To give syntactic meaning to these nodes, we have various subclasses. For example, +[`Sequential`][amltk.pipeline.components.Sequential] is a node where the order of the +`nodes` it contains matter, while a [`Component`][amltk.pipeline.components.Component] is a node +that can be used to parametrize and construct a concrete object, but does not lead to anything else. + +Each node type here is either a _leaf_ or a _branch_, where a _branch_ has children, while +while a _leaf_ does not. + +There various components are listed here: + +### [`Component`][amltk.pipeline.Component] - `leaf` +A parametrizable node type with some way to build an object, given a configuration. + +```python exec="true" source="material-block" html="true" +from amltk.pipeline import Component +from dataclasses import dataclass + +@dataclass +class Model: + x: float + +c = Component(Model, space={"x": (0.0, 1.0)}, name="model") +from amltk._doc import doc_print; doc_print(print, c) # markdown-exec: hide +``` + +### [`Searchable`][amltk.pipeline.Searchable] - `leaf` +A parametrizable node type that contains a search space that should be searched over, +but does not provide a concrete object. + +```python exec="true" source="material-block" html="true" +from amltk.pipeline import Searchable + +def run_script(mode, n): + # ... run some actual script + pass + +script_space = Searchable({"mode": ["orange", "blue", "red"], "n": (10, 100)}) +from amltk._doc import doc_print; doc_print(print, script_space) # markdown-exec: hide +``` + +### [`Fixed`][amltk.pipeline.Fixed] - `leaf` +A _non-parametrizable_ node type that contains an object that should be used as is. + +```python exec="true" source="material-block" html="true" +from amltk.pipeline import Component, Fixed, Sequential +from sklearn.ensemble import RandomForestClassifier + +estimator = RandomForestClassifier() +# ... pretend it was fit +fitted_estimator = Fixed(estimator) +from amltk._doc import doc_print; doc_print(print, fitted_estimator) # markdown-exec: hide +``` + +### [`Sequential`][amltk.pipeline.Sequential] - `branch` +A node type which signifies an order between its children, such as a sequential +set of preprocessing and estimator through which the data should flow. + +```python exec="true" source="material-block" html="true" +from amltk.pipeline import Component, Sequential +from sklearn.decomposition import PCA +from sklearn.ensemble import RandomForestClassifier + +pipeline = Sequential( + PCA(n_components=3), + Component(RandomForestClassifier, space={"n_estimators": (10, 100)}), + name="my_pipeline" +) +from amltk._doc import doc_print; doc_print(print, pipeline) # markdown-exec: hide +``` + +### [`Choice`][amltk.pipeline.Choice] - `branch` +A node type that signifies a choice between multiple children, usually chosen during configuration. + +```python exec="true" source="material-block" html="true" +from amltk.pipeline import Choice, Component +from sklearn.ensemble import RandomForestClassifier +from sklearn.neural_network import MLPClassifier + +rf = Component(RandomForestClassifier, space={"n_estimators": (10, 100)}) +mlp = Component(MLPClassifier, space={"activation": ["logistic", "relu", "tanh"]}) + +estimator_choice = Choice(rf, mlp, name="estimator") +from amltk._doc import doc_print; doc_print(print, estimator_choice) # markdown-exec: hide +``` + +### [`Split`][amltk.pipeline.Split] - `branch` +A node where the output of the previous node is split amongst its children, +according to it's configuration. + +```python exec="true" source="material-block" html="true" +from amltk.pipeline import Component, Split +from sklearn.impute import SimpleImputer +from sklearn.preprocessing import OneHotEncoder +from sklearn.compose import make_column_selector + +categorical_pipeline = [ + SimpleImputer(strategy="constant", fill_value="missing"), + OneHotEncoder(drop="first"), +] +numerical_pipeline = Component(SimpleImputer, space={"strategy": ["mean", "median"]}) + +preprocessor = Split( + {"categories": categorical_pipeline, "numerical": numerical_pipeline}, + name="my_split" +) +from amltk._doc import doc_print; doc_print(print, preprocessor) # markdown-exec: hide +``` + +### [`Join`][amltk.pipeline.Join] - `branch` +A node where the output of the previous node is sent all of its children. + +```python exec="true" source="material-block" html="true" +from amltk.pipeline import Join, Component +from sklearn.decomposition import PCA +from sklearn.feature_selection import SelectKBest + +pca = Component(PCA, space={"n_components": (1, 3)}) +kbest = Component(SelectKBest, space={"k": (1, 3)}) + +join = Join(pca, kbest, name="my_feature_union") +from amltk._doc import doc_print; doc_print(print, join) # markdown-exec: hide +``` + +## Syntax Sugar +You can connect these nodes together using either the constructors explicitly, +as shown in the examples. We also provide some index operators: + +* `>>` - Connect nodes together to form a [`Sequential`][amltk.pipeline.components.Sequential] +* `&` - Connect nodes together to form a [`Join`][amltk.pipeline.components.Join] +* `|` - Connect nodes together to form a [`Choice`][amltk.pipeline.components.Choice] -::: amltk.pipeline.components - options: - members: false +There is also another short-hand that you may find useful to know: -### Node +* `{comp1, comp2, comp3}` - This will automatically be converted into a + [`Choice`][amltk.pipeline.Choice] between the given components. +* `(comp1, comp2, comp3)` - This will automatically be converted into a + [`Join`][amltk.pipeline.Join] between the given components. +* `[comp1, comp2, comp3]` - This will automatically be converted into a + [`Sequential`][amltk.pipeline.Sequential] between the given components. -::: amltk.pipeline.node - options: - members: false diff --git a/docs/stylesheets/custom.css b/docs/stylesheets/custom.css index 7da3de53..a7782f17 100644 --- a/docs/stylesheets/custom.css +++ b/docs/stylesheets/custom.css @@ -1,110 +1,25 @@ -[data-md-color-scheme="default"] { - --doc-label-instance-attribute-fg-color: #0079ff; - --doc-label-property-fg-color: #00dfa2; - --doc-label-class-attribute-fg-color: #d1b619; - --doc-label-dataclass-fg-color: #ff0060; - - --doc-label-instance-attribute-bg-color: #0079ff1a; - --doc-label-property-bg-color: #00dfa21a; - --doc-label-class-attribute-bg-color: #d1b6191a; - --doc-label-dataclass-bg-color: #ff00601a; -} - -[data-md-color-scheme="slate"] { - --doc-label-instance-attribute-fg-color: #963fb8; - --doc-label-property-fg-color: #6d67e4; - --doc-label-class-attribute-fg-color: #46c2cb; - --doc-label-dataclass-fg-color: #f2f7a1; - - --doc-label-instance-attribute-bg-color: #963fb81a; - --doc-label-property-bg-color: #6d67e41a; - --doc-label-class-attribute-bg-color: #46c2cb1a; - --doc-label-dataclass-bg-color: #f2f7a11a; -} -:root { - --md-tooltip-width: 500px; -} - -.doc.doc-label.doc-label-instance-attribute code { - background-color: var(--doc-label-instance-attribute-bg-color); - color: var(--doc-label-instance-attribute-fg-color); -} -.doc.doc-label.doc-label-class-attribute code { - background-color: var(--doc-label-class-attribute-bg-color); - color: var(--doc-label-class-attribute-fg-color); -} -.doc.doc-label.doc-label-classmethod code { - background-color: var(--doc-label-class-attribute-bg-color); - color: var(--doc-label-classattribute-fg-color); -} -.doc.doc-label.doc-label-property code { - background-color: var(--doc-label-property-bg-color); - color: var(--doc-label-property-fg-color); -} -.doc.doc-label.doc-label-dataclass code { - background-color: var(--doc-label-dataclass-bg-color); - color: var(--doc-label-dataclass-fg-color); -} -.doc.doc-label code { - font-weight: bold; -} -.doc.doc-labels { - margin-left: 10px; - border-radius: 10px; - padding-top: 1px; - padding-bottom: 1px; - padding-right: 10px; - padding-left: 10px; -} - - -body[data-md-color-primary="black"] .excalidraw svg { - filter: invert(100%) hue-rotate(180deg); -} - -body[data-md-color-primary="black"] .excalidraw svg rect { - fill: transparent; -} - -h2.doc.doc-heading { - border-top: 2px solid rgba(0, 0, 0); - padding-top: 48px; -} - +/* If anything is breaking from this css, please feel free to +* remove it. +*/ -/* Highlight None inside code blocks */ +/* Highlight None with color inside code blocks */ code.highlight.language-python span.kc { color: var(--md-code-hl-keyword-color); } - - -h3.doc.doc-heading { - border-top: 1px solid rgba(0, 0, 0, 0.2); - padding-top: 48px; -} - -a.md-nav__link--passed { - padding-left: 10px; -} - -a.md-nav__link--active { - padding-left: 10px; - font-weight: bold; - border-left: .05rem solid var(--md-accent-fg-color); -} - -h3 .doc-heading code { - font-size: 16px; -} - -/* -.doc.doc-object.doc-class h2.doc.doc-heading { - text-align: center; +/* Make tool tip annotations wider */ +:root { + --md-tooltip-width: 500px; } -*/ - -.doc-heading code { - font-weight: normal; - font-family: "Roboto Mono", "SFMono-Regular", Consolas, "Courier New", Courier, - monospace; +/* api doc attribute cards */ +div.doc-class > div.doc-contents > div.doc-children > div.doc-object { + padding-right: 20px; + padding-left: 20px; + border-radius: 15px; + margin-top: 20px; + margin-bottom: 20px; + box-shadow: 2px 2px 2px rgba(0, 0, 0, 0.2); + margin-right: 0px; + border-color: rgba(0, 0, 0, 0.2); + border-width: 1px; + border-style: solid; } diff --git a/justfile b/justfile index 7a821b0e..3941c9b6 100644 --- a/justfile +++ b/justfile @@ -25,12 +25,29 @@ check: check-types: mypy src -# Launch the docs server locally and open the webpage -docs exec_doc_code="true" example="None" offline="false": +# Launch the docs, executing code blocks and examples +docs-full: python -m webbrowser -t "http://127.0.0.1:8000/" - AMLTK_DOC_RENDER_EXAMPLES={{example}} \ - AMLTK_DOCS_OFFLINNE={{offline}} \ - AMLTK_EXEC_DOCS={{exec_doc_code}} mkdocs serve --watch-theme --dirtyreload + AMLTK_DOC_RENDER_EXAMPLES=all \ + AMLTK_DOCS_OFFLINE=true \ + AMLTK_EXEC_DOCS=true \ + mkdocs serve --watch-theme + +# Launch the docs and execute code blocks +docs-code: + python -m webbrowser -t "http://127.0.0.1:8000/" + AMLTK_DOCS_OFFLINE=true \ + AMLTK_EXEC_DOCS=true \ + AMLTK_DOC_RENDER_EXAMPLES=false \ + mkdocs serve --watch-theme + +# Launch the docs but dont run code examples +docs: + python -m webbrowser -t "http://127.0.0.1:8000/" + AMLTK_DOCS_OFFLINE=true \ + AMLTK_EXEC_DOCS=false \ + AMLTK_DOC_RENDER_EXAMPLES=false \ + mkdocs serve --watch-theme # https://github.com/pawamoy/markdown-exec/issues/19 action: diff --git a/mkdocs.yml b/mkdocs.yml index f052af9e..37d6b2cc 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -1,3 +1,17 @@ +# This project uses mkdocs to generate the documentation. +# Specifically it uses the mkdocs-material theme, which provides a whole +# host of nice features and customization +# +# mkdocs: https://www.mkdocs.org/getting-started/#getting-started-with-mkdocs +# mkdocs-material: https://squidfunk.github.io/mkdocs-material/ +# +# Please refer to these links for more information on how to use mkdocs +# +# For serving the docs locally, you can take a look at the `justfile` at +# the root of this repository, it contains a few commands for generating the docs +# with different levels of execution. +# +# Please refer to individual sections for any additional notes site_name: "AutoML-Toolkit" repo_url: https://github.com/automl/amltk/ repo_name: automl/amltk @@ -12,10 +26,11 @@ theme: - content.code.annotate - content.code.copy - navigation.footer + - navigation.sections + - toc.follow + - toc.integrate - navigation.tabs - navigation.tabs.sticky - - navigation.expand - - toc.follow - header.autohide - search.suggest - search.highlight @@ -42,6 +57,10 @@ theme: name: Switch to dark mode +# The `mike` versioning provider +# https://github.com/jimporter/mike +# +# This is what allows us to create versioned docs in the github cli extra: version: provider: mike @@ -51,6 +70,9 @@ extra: - icon: fontawesome/brands/twitter link: https://twitter.com/automl_org +# We do have some extra custom css +# If for whatever reason you think this is breaking something, +# please feel free to remove it. extra_css: - stylesheets/custom.css @@ -90,6 +112,18 @@ markdown_extensions: emoji_index: !!python/name:material.extensions.emoji.twemoji emoji_generator: !!python/name:material.extensions.emoji.to_svg +# These are files that are run when serving the docs. +hooks: + # This prevents logging messages from polluting the doc build + - docs/hooks/cleanup_log_output.py + # This prevents markdown_exec (plugin) from executing code blocks + # dependant on environment variables. These env variables are + # automatically set with the `justfile` commands to build docs + - docs/hooks/disable_markdown_exec.py + # This hook simply prints the page being rendered for an easier time debugging + # any issues with code in docs + - docs/hooks/debug_which_page_is_being_rendered.py + plugins: - search - autorefs @@ -111,10 +145,10 @@ plugins: - mkdocstrings: default_handler: python enable_inventory: true - custom_templates: docs/_templates handlers: python: paths: [src] + # Extra objects which allow for linking to external docs import: - 'https://docs.python.org/3/objects.inv' - 'https://numpy.org/doc/stable/objects.inv' @@ -123,31 +157,37 @@ plugins: - 'https://scikit-learn.org/stable/objects.inv' - 'https://pytorch.org/docs/stable/objects.inv' - 'https://jobqueue.dask.org/en/latest/objects.inv' + # Please do not try to change these without having + # looked at all of the documentation and seeing if it + # causes the API docs to look weird anywhere. options: # https://mkdocstrings.github.io/python/usage/ docstring_section_style: spacy docstring_options: ignore_init_summary: true trim_doctest_flags: true + returns_multiple_items: false show_docstring_attributes: true show_docstring_description: true - show_root_heading: false - show_root_toc_entry: false + show_root_heading: true + show_root_toc_entry: true show_object_full_path: false + show_root_members_full_path: false + signature_crossrefs: true merge_init_into_class: true + show_symbol_type_heading: true + show_symbol_type_toc: true docstring_style: google + inherited_members: true show_if_no_docstring: false show_bases: true show_source: true - members_order: "source" - # Would like to set `group_by_category` to false - # https://github.com/mkdocstrings/mkdocstrings/issues/579 + members_order: "alphabetical" group_by_category: true show_signature: true - separate_signature: false - show_signature_annotations: false + separate_signature: true + show_signature_annotations: true filters: - "!^_[^_]" - - "_sample" # Kind of a hack to have this render a private method nav: - Home: "index.md" @@ -187,4 +227,5 @@ nav: # Auto generated with docs/api_generator.py - API: "api/" - Contributing: "contributing.md" + - What's New?: "changelog.md" diff --git a/pyproject.toml b/pyproject.toml index ac4b17b3..71860579 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -47,6 +47,7 @@ doc = [ "mike", "pillow", "cairosvg", + "black", # This allows mkdocstrings to format signatures in the docs ] # --- Optional user dependancies sklearn = ["scikit-learn", "threadpoolctl"] diff --git a/src/amltk/pipeline/components.py b/src/amltk/pipeline/components.py index 45555acf..cc730178 100644 --- a/src/amltk/pipeline/components.py +++ b/src/amltk/pipeline/components.py @@ -1,73 +1,11 @@ -"""You can use the various different node types to build a pipeline. - -You can connect these nodes together using either the constructors explicitly, -as shown in the examples. We also provide some index operators: - -* `>>` - Connect nodes together to form a [`Sequential`][amltk.pipeline.components.Sequential] -* `&` - Connect nodes together to form a [`Join`][amltk.pipeline.components.Join] -* `|` - Connect nodes together to form a [`Choice`][amltk.pipeline.components.Choice] - -There is also another short-hand that you may find useful to know: - -* `{comp1, comp2, comp3}` - This will automatically be converted into a - [`Choice`][amltk.pipeline.Choice] between the given components. -* `(comp1, comp2, comp3)` - This will automatically be converted into a - [`Join`][amltk.pipeline.Join] between the given components. -* `[comp1, comp2, comp3]` - This will automatically be converted into a - [`Sequential`][amltk.pipeline.Sequential] between the given components. - -For each of these components we will show examples using -the [`#! "sklearn"` builder][amltk.pipeline.builders.sklearn.build] - -The components are: - -### Component - -::: amltk.pipeline.components.Component - options: - members: false - -### Sequential - -::: amltk.pipeline.components.Sequential - options: - members: false - -### Choice - -::: amltk.pipeline.components.Choice - options: - members: false - -### Split - -::: amltk.pipeline.components.Split - options: - members: false - -### Join - -::: amltk.pipeline.components.Join - options: - members: false - -### Fixed - -::: amltk.pipeline.components.Fixed - options: - members: false - -### Searchable - -::: amltk.pipeline.components.Searchable - options: - members: false -""" # noqa: E501 +"""The provided subclasses of a [`Node`][amltk.pipeline.node.Node] +that can be used can be assembled into a pipeline. +""" from __future__ import annotations import inspect from collections.abc import Callable, Iterator, Mapping, Sequence -from dataclasses import dataclass, field +from dataclasses import dataclass from typing import TYPE_CHECKING, Any, ClassVar, TypeVar, overload from typing_extensions import Self, override @@ -159,120 +97,84 @@ def as_node( # noqa: PLR0911 @dataclass(init=False, frozen=True, eq=True) -class Join(Node[Item, Space]): - """[`Join`][amltk.pipeline.Join] together different parts of the pipeline. - - This indicates the different children in - [`.nodes`][amltk.pipeline.Node.nodes] should act in tandem with one - another, for example, concatenating the outputs of the various members of the - `Join`. - - ```python exec="true" source="material-block" html="true" - from amltk.pipeline import Join, Component - from sklearn.decomposition import PCA - from sklearn.feature_selection import SelectKBest - - pca = Component(PCA, space={"n_components": (1, 3)}) - kbest = Component(SelectKBest, space={"k": (1, 3)}) - - join = Join(pca, kbest, name="my_feature_union") - from amltk._doc import doc_print; doc_print(print, join) # markdown-exec: hide +class Component(Node[Item, Space]): + """A [`Component`][amltk.pipeline.Component] of the pipeline with + a possible item and **no children**. - space = join.search_space("configspace") - from amltk._doc import doc_print; doc_print(print, space) # markdown-exec: hide + This is the basic building block of most pipelines, it accepts + as it's [`item=`][amltk.pipeline.node.Node.item] some function that will be + called with [`build_item()`][amltk.pipeline.components.Component.build_item] to + build that one part of the pipeline. - pipeline = join.build("sklearn") - from amltk._doc import doc_print; doc_print(print, pipeline) # markdown-exec: hide - ``` + When [`build_item()`][amltk.pipeline.Component.build_item] is called, whatever + the config of the component is at that time, will be used to construct the item. - You may also just join together nodes using an infix operator `&` if you prefer: + A common pattern is to use a [`Component`][amltk.pipeline.Component] to + wrap a constructor, specifying the [`space=`][amltk.pipeline.node.Node.space] + and [`config=`][amltk.pipeline.node.Node.config] to be used when building the + item. ```python exec="true" source="material-block" html="true" - from amltk.pipeline import Join, Component - from sklearn.decomposition import PCA - from sklearn.feature_selection import SelectKBest - - pca = Component(PCA, space={"n_components": (1, 3)}) - kbest = Component(SelectKBest, space={"k": (1, 3)}) - - # Can not parametrize or name the join - join = pca & kbest - from amltk._doc import doc_print; doc_print(print, join) # markdown-exec: hide + from amltk.pipeline import Component + from sklearn.ensemble import RandomForestClassifier - # With a parametrized join - join = ( - Join(name="my_feature_union") & pca & kbest + rf = Component( + RandomForestClassifier, + config={"max_depth": 3}, + space={"n_estimators": (10, 100)} ) - item = join.build("sklearn") - print(item._repr_html_()) # markdown-exec: hide - ``` - - Whenever some other node sees a tuple, i.e. `(comp1, comp2, comp3)`, this - will automatically be converted into a `Join`. - - ```python exec="true" source="material-block" html="true" - from amltk.pipeline import Sequential, Component - from sklearn.decomposition import PCA - from sklearn.feature_selection import SelectKBest - from sklearn.ensemble import RandomForestClassifier + from amltk._doc import doc_print; doc_print(print, rf) # markdown-exec: hide - pca = Component(PCA, space={"n_components": (1, 3)}) - kbest = Component(SelectKBest, space={"k": (1, 3)}) + config = {"n_estimators": 50} # Sample from some space or something + configured_rf = rf.configure(config) - # Can not parametrize or name the join - join = Sequential( - (pca, kbest), - RandomForestClassifier(n_estimators=5), - name="my_feature_union", - ) - from amltk._doc import doc_print; doc_print(print, join) # markdown-exec: hide + estimator = configured_rf.build_item() + from amltk._doc import doc_print; doc_print(print, estimator) # markdown-exec: hide ``` - Like all [`Node`][amltk.pipeline.node.Node]s, a `Join` accepts an explicit - [`name=`][amltk.pipeline.node.Node.name], - [`item=`][amltk.pipeline.node.Node.item], - [`config=`][amltk.pipeline.node.Node.config], - [`space=`][amltk.pipeline.node.Node.space], - [`fidelities=`][amltk.pipeline.node.Node.fidelities], - [`config_transform=`][amltk.pipeline.node.Node.config_transform] and - [`meta=`][amltk.pipeline.node.Node.meta]. - See Also: * [`Node`][amltk.pipeline.node.Node] """ - nodes: tuple[Node, ...] - """The nodes that this node leads to.""" + item: Callable[..., Item] + """A node which constructs an item in the pipeline.""" - RICH_OPTIONS: ClassVar[RichOptions] = RichOptions(panel_color="#7E6B8F") + nodes: tuple[()] + """A component has no children.""" - _NODES_INIT: ClassVar = "args" + RICH_OPTIONS: ClassVar[RichOptions] = RichOptions(panel_color="#E6AF2E") + + _NODES_INIT: ClassVar = None def __init__( self, - *nodes: Node | NodeLike, + item: Callable[..., Item], + *, name: str | None = None, - item: Item | Callable[[Item], Item] | None = None, config: Config | None = None, space: Space | None = None, fidelities: Mapping[str, Any] | None = None, config_transform: Callable[[Config, Any], Config] | None = None, meta: Mapping[str, Any] | None = None, ): - """See [`Node`][amltk.pipeline.node.Node] for details.""" - _nodes = tuple(as_node(n) for n in nodes) - if not all_unique(_nodes, key=lambda node: node.name): - raise ValueError( - f"Can't handle nodes they do not all contain unique names, {nodes=}." - "\nAll nodes must have a unique name. Please provide a `name=` to them", - ) - - if name is None: - name = f"Join-{randuid(8)}" + """Initialize a component. + Args: + item: The item attached to this node. + name: The name of the node. If not specified, the name will be + generated from the item. + config: The configuration for this node. + space: The search space for this node. This will be used when + [`search_space()`][amltk.pipeline.node.Node.search_space] is called. + fidelities: The fidelities for this node. + config_transform: A function that transforms the `config=` parameter + during [`configure(config)`][amltk.pipeline.node.Node.configure] + before return the new configured node. Useful for times where + you need to combine multiple parameters into one. + meta: Any meta information about this node. + """ super().__init__( - *_nodes, - name=name, + name=name if name is not None else entity_name(item), item=item, config=config, space=space, @@ -281,146 +183,98 @@ def __init__( meta=meta, ) - @override - def __and__(self, other: Node | NodeLike) -> Join: - other_node = as_node(other) - if any(other_node.name == node.name for node in self.nodes): - raise ValueError( - f"Can't handle node with name '{other_node.name} as" - f" there is already a node called '{other_node.name}' in {self.name}", - ) - - nodes = (*tuple(as_node(n) for n in self.nodes), other_node) - return self.mutate(name=self.name, nodes=nodes) - - -@dataclass(init=False, frozen=True, eq=True) -class Choice(Node[Item, Space]): - """A [`Choice`][amltk.pipeline.Choice] between different subcomponents. - - This indicates that a choice should be made between the different children in - [`.nodes`][amltk.pipeline.Node.nodes], usually done when you - [`configure()`][amltk.pipeline.node.Node.configure] with some `config` from - a [`search_space()`][amltk.pipeline.node.Node.search_space]. - - ```python exec="true" source="material-block" html="true" - from amltk.pipeline import Choice, Component - from sklearn.ensemble import RandomForestClassifier - from sklearn.neural_network import MLPClassifier - - rf = Component(RandomForestClassifier, space={"n_estimators": (10, 100)}) - mlp = Component(MLPClassifier, space={"activation": ["logistic", "relu", "tanh"]}) - - estimator_choice = Choice(rf, mlp, name="estimator") - from amltk._doc import doc_print; doc_print(print, estimator_choice) # markdown-exec: hide - - space = estimator_choice.search_space("configspace") - from amltk._doc import doc_print; doc_print(print, space) # markdown-exec: hide - - config = space.sample_configuration() - from amltk._doc import doc_print; doc_print(print, config) # markdown-exec: hide - - configured_choice = estimator_choice.configure(config) - from amltk._doc import doc_print; doc_print(print, configured_choice) # markdown-exec: hide - - chosen_estimator = configured_choice.chosen() - from amltk._doc import doc_print; doc_print(print, chosen_estimator) # markdown-exec: hide - - estimator = chosen_estimator.build_item() - from amltk._doc import doc_print; doc_print(print, estimator) # markdown-exec: hide - ``` + def build_item(self, **kwargs: Any) -> Item: + """Build the item attached to this component. - You may also just add nodes to a `Choice` using an infix operator `|` if you prefer: + Args: + **kwargs: Any additional arguments to pass to the item - ```python exec="true" source="material-block" html="true" - from amltk.pipeline import Choice, Component - from sklearn.ensemble import RandomForestClassifier - from sklearn.neural_network import MLPClassifier + Returns: + Item + The built item + """ + config = self.config or {} + try: + return self.item(**{**config, **kwargs}) + except TypeError as e: + new_msg = f"Failed to build `{self.item=}` with `{self.config=}`.\n" + if any(kwargs): + new_msg += f"Extra {kwargs=} were also provided.\n" + new_msg += ( + "If the item failed to initialize, a common reason can be forgetting" + " to call `configure()` on the `Component` or the pipeline it is in or" + " not calling `build()`/`build_item()` on the **returned** value of" + " `configure()`.\n" + "Reasons may also include not having fully specified the `config`" + " initially, it having not being configured fully from `configure()`" + " or from misspecfying parameters in the `space`." + ) + raise ComponentBuildError(new_msg) from e - rf = Component(RandomForestClassifier, space={"n_estimators": (10, 100)}) - mlp = Component(MLPClassifier, space={"activation": ["logistic", "relu", "tanh"]}) - estimator_choice = ( - Choice(name="estimator") | mlp | rf - ) - from amltk._doc import doc_print; doc_print(print, estimator_choice) # markdown-exec: hide - ``` +@dataclass(init=False, frozen=True, eq=True) +class Searchable(Node[None, Space]): # type: ignore + """A [`Searchable`][amltk.pipeline.Searchable] + node of the pipeline which just represents a search space, no item attached. - Whenever some other node sees a set, i.e. `{comp1, comp2, comp3}`, this - will automatically be converted into a `Choice`. + While not usually applicable to pipelines you want to build, this node + is useful for creating a search space, especially if the real pipeline you + want to optimize can not be built directly. For example, if you are optimize + a script, you may wish to use a `Searchable` to represent the search space + of that script. ```python exec="true" source="material-block" html="true" - from amltk.pipeline import Choice, Component, Sequential - from sklearn.ensemble import RandomForestClassifier - from sklearn.neural_network import MLPClassifier - from sklearn.impute import SimpleImputer - - rf = Component(RandomForestClassifier, space={"n_estimators": (10, 100)}) - mlp = Component(MLPClassifier, space={"activation": ["logistic", "relu", "tanh"]}) + from amltk.pipeline import Searchable - pipeline = Sequential( - SimpleImputer(fill_value=0), - {mlp, rf}, - name="my_pipeline", - ) - from amltk._doc import doc_print; doc_print(print, pipeline) # markdown-exec: hide + script_space = Searchable({"mode": ["orange", "blue", "red"], "n": (10, 100)}) + from amltk._doc import doc_print; doc_print(print, script_space) # markdown-exec: hide ``` - Like all [`Node`][amltk.pipeline.node.Node]s, a `Choice` accepts an explicit - [`name=`][amltk.pipeline.node.Node.name], - [`item=`][amltk.pipeline.node.Node.item], - [`config=`][amltk.pipeline.node.Node.config], - [`space=`][amltk.pipeline.node.Node.space], - [`fidelities=`][amltk.pipeline.node.Node.fidelities], - [`config_transform=`][amltk.pipeline.node.Node.config_transform] and - [`meta=`][amltk.pipeline.node.Node.meta]. - - !!! warning "Order of nodes" - - The given nodes of a choice are always ordered according - to their name, so indexing `choice.nodes` may not be reliable - if modifying the choice dynamically. - - Please use `choice["name"]` to access the nodes instead. - See Also: * [`Node`][amltk.pipeline.node.Node] """ # noqa: E501 - nodes: tuple[Node, ...] - """The nodes that this node leads to.""" + item: None = None + """A searchable has no item.""" - RICH_OPTIONS: ClassVar[RichOptions] = RichOptions(panel_color="#FF4500") - _NODES_INIT: ClassVar = "args" + nodes: tuple[()] = () + """A searchable has no children.""" + + RICH_OPTIONS: ClassVar[RichOptions] = RichOptions(panel_color="light_steel_blue") + + _NODES_INIT: ClassVar = None def __init__( self, - *nodes: Node | NodeLike, + space: Space | None = None, + *, name: str | None = None, - item: Item | Callable[[Item], Item] | None = None, config: Config | None = None, - space: Space | None = None, fidelities: Mapping[str, Any] | None = None, config_transform: Callable[[Config, Any], Config] | None = None, meta: Mapping[str, Any] | None = None, ): - """See [`Node`][amltk.pipeline.node.Node] for details.""" - _nodes: tuple[Node, ...] = tuple( - sorted((as_node(n) for n in nodes), key=lambda n: n.name), - ) - if not all_unique(_nodes, key=lambda node: node.name): - raise ValueError( - f"Can't handle nodes as we can not generate a __choice__ for {nodes=}." - "\nAll nodes must have a unique name. Please provide a `name=` to them", - ) + """Initialize a choice. + Args: + space: The search space for this node. This will be used when + [`search_space()`][amltk.pipeline.node.Node.search_space] is called. + name: The name of the node. If not specified, a random one will + be generated. + config: The configuration for this node. Useful for setting some + default values. + fidelities: The fidelities for this node. + config_transform: A function that transforms the `config=` parameter + during [`configure(config)`][amltk.pipeline.node.Node.configure] + before return the new configured node. Useful for times where + you need to combine multiple parameters into one. + meta: Any meta information about this node. + """ if name is None: - name = f"Choice-{randuid(8)}" + name = f"Searchable-{randuid(8)}" super().__init__( - *_nodes, name=name, - item=item, config=config, space=space, fidelities=fidelities, @@ -428,158 +282,87 @@ def __init__( meta=meta, ) - @override - def __or__(self, other: Node | NodeLike) -> Choice: - other_node = as_node(other) - if any(other_node.name == node.name for node in self.nodes): - raise ValueError( - f"Can't handle node with name '{other_node.name} as" - f" there is already a node called '{other_node.name}' in {self.name}", - ) - nodes = tuple( - sorted( - [as_node(n) for n in self.nodes] + [other_node], - key=lambda n: n.name, - ), - ) - return self.mutate(name=self.name, nodes=nodes) +@dataclass(init=False, frozen=True, eq=True) +class Fixed(Node[Item, None]): # type: ignore + """A [`Fixed`][amltk.pipeline.Fixed] part of the pipeline that + represents something that can not be configured and used directly as is. - def chosen(self) -> Node: - """The chosen branch. + It consists of an [`.item`][amltk.pipeline.node.Node.item] that is fixed, + non-configurable and non-searchable. It also has no children. - Returns: - The chosen branch - """ - match self.config: - case {"__choice__": choice}: - chosen = first_true( - self.nodes, - pred=lambda node: node.name == choice, - default=None, - ) - if chosen is None: - raise NodeNotFoundError(choice, self.name) + This is useful for representing parts of the pipeline that are fixed, for example + if you have a pipeline that is a `Sequential` of nodes, but you want to + fix the first component to be a `PCA` with `n_components=3`, you can use a `Fixed` + to represent that. - return chosen - case _: - raise NoChoiceMadeError(self.name) + ```python exec="true" source="material-block" html="true" + from amltk.pipeline import Component, Fixed, Sequential + from sklearn.ensemble import RandomForestClassifier + from sklearn.decomposition import PCA - @override - def configure( - self, - config: Config, - *, - prefixed_name: bool | None = None, - transform_context: Any | None = None, - params: Mapping[str, Any] | None = None, - ) -> Self: - """Configure this node and anything following it with the given config. + rf = Component(RandomForestClassifier, space={"n_estimators": (10, 100)}) + pca = Fixed(PCA(n_components=3)) - !!! note "Configuring a choice" + pipeline = Sequential(pca, rf, name="my_pipeline") + from amltk._doc import doc_print; doc_print(print, pipeline) # markdown-exec: hide + ``` - For a Choice, if the config has a `__choice__` key, then only the node - chosen will be configured. The others will not be configured at all and - their config will be discarded. + See Also: + * [`Node`][amltk.pipeline.node.Node] + """ - Args: - config: The configuration to apply - prefixed_name: Whether items in the config are prefixed by the names - of the nodes. - * If `None`, the default, then `prefixed_name` will be assumed to - be `True` if this node has a next node or if the config has - keys that begin with this nodes name. - * If `True`, then the config will be searched for items prefixed - by the name of the node (and subsequent chained nodes). - * If `False`, then the config will be searched for items without - the prefix, i.e. the config keys are exactly those matching - this nodes search space. - transform_context: Any context to give to `config_transform=` of individual - nodes. - params: The params to match any requests when configuring this node. - These will match against any ParamRequests in the config and will - be used to fill in any missing values. + item: Item + """The fixed item that this node represents.""" - Returns: - The configured node - """ - # Get the config for this node - match prefixed_name: - case True: - config = mapping_select(config, f"{self.name}:") - case False: - pass - case None if any(k.startswith(f"{self.name}:") for k in config): - config = mapping_select(config, f"{self.name}:") - case None: - pass + space: None = None + """A fixed node has no search space.""" - _kwargs: dict[str, Any] = {} + fidelities: None = None + """A fixed node has no search space.""" - # Configure all the branches if exists - # This part is what differs for a Choice - if len(self.nodes) > 0: - choice_made = config.get("__choice__", None) - if choice_made is not None: - matching_child = first_true( - self.nodes, - pred=lambda node: node.name == choice_made, - default=None, - ) - if matching_child is None: - raise ValueError( - f"Can not find matching child for choice {self.name} with child" - f" {choice_made}." - "\nPlease check the config and ensure that the choice is one of" - f" {[n.name for n in self.nodes]}." - f"\nThe config recieved at this choice node was {config=}.", - ) + config: None = None + """A fixed node has no config.""" - # We still iterate over all of them just to ensure correct ordering - nodes = tuple( - node.copy() - if node.name != choice_made - else matching_child.configure( - config, - prefixed_name=True, - transform_context=transform_context, - params=params, - ) - for node in self.nodes - ) - _kwargs["nodes"] = nodes - else: - nodes = tuple( - node.configure( - config, - prefixed_name=True, - transform_context=transform_context, - params=params, - ) - for node in self.nodes - ) - _kwargs["nodes"] = nodes + config_transform: None = None + """A fixed node has no config so no transform.""" - this_config = { - hp: v - for hp, v in config.items() - if ( - ":" not in hp - and not any(hp.startswith(f"{node.name}") for node in self.nodes) - ) - } - if self.config is not None: - this_config = {**self.config, **this_config} + nodes: tuple[()] = () + """A fixed node has no children.""" - this_config = dict(self._fufill_param_requests(this_config, params=params)) + RICH_OPTIONS: ClassVar[RichOptions] = RichOptions(panel_color="#56351E") - if self.config_transform is not None: - this_config = dict(self.config_transform(this_config, transform_context)) + _NODES_INIT: ClassVar = None - if len(this_config) > 0: - _kwargs["config"] = dict(this_config) + def __init__( # noqa: D417 + self, + item: Item, + *, + name: str | None = None, + config: None = None, + space: None = None, + fidelities: None = None, + config_transform: None = None, + meta: Mapping[str, Any] | None = None, + ): + """Initialize a fixed node. - return self.mutate(**_kwargs) + Args: + item: The item attached to this node. Will be fixed and can not + be configured. + name: The name of the node. If not specified, the name will be + generated from the item. + meta: Any meta information about this node. + """ + super().__init__( + name=name if name is not None else entity_name(item), + item=item, + config=config, + space=space, + fidelities=fidelities, + config_transform=config_transform, + meta=meta, + ) @dataclass(init=False, frozen=True, eq=True) @@ -601,68 +384,14 @@ class Sequential(Node[Item, Space]): name="my_pipeline" ) from amltk._doc import doc_print; doc_print(print, pipeline) # markdown-exec: hide - - space = pipeline.search_space("configspace") - from amltk._doc import doc_print; doc_print(print, space) # markdown-exec: hide - - configuration = space.sample_configuration() - from amltk._doc import doc_print; doc_print(print, configuration) # markdown-exec: hide - - configured_pipeline = pipeline.configure(configuration) - from amltk._doc import doc_print; doc_print(print, configured_pipeline) # markdown-exec: hide - - sklearn_pipeline = pipeline.build("sklearn") - print(sklearn_pipeline._repr_html_()) # markdown-exec: hide - ``` - - You may also just chain together nodes using an infix operator `>>` if you prefer: - - ```python exec="true" source="material-block" html="true" - from amltk.pipeline import Join, Component, Sequential - from sklearn.decomposition import PCA - from sklearn.ensemble import RandomForestClassifier - - pipeline = ( - Sequential(name="my_pipeline") - >> PCA(n_components=3) - >> Component(RandomForestClassifier, space={"n_estimators": (10, 100)}) - ) - from amltk._doc import doc_print; doc_print(print, pipeline) # markdown-exec: hide - ``` - - Whenever some other node sees a list, i.e. `[comp1, comp2, comp3]`, this - will automatically be converted into a `Sequential`. - - ```python exec="true" source="material-block" html="true" - from amltk.pipeline import Choice - from sklearn.impute import SimpleImputer - from sklearn.preprocessing import StandardScaler - from sklearn.ensemble import RandomForestClassifier - from sklearn.neural_network import MLPClassifier - - pipeline_choice = Choice( - [SimpleImputer(), RandomForestClassifier()], - [StandardScaler(), MLPClassifier()], - name="pipeline_choice" - ) - from amltk._doc import doc_print; doc_print(print, pipeline_choice) # markdown-exec: hide ``` - Like all [`Node`][amltk.pipeline.node.Node]s, a `Sequential` accepts an explicit - [`name=`][amltk.pipeline.node.Node.name], - [`item=`][amltk.pipeline.node.Node.item], - [`config=`][amltk.pipeline.node.Node.config], - [`space=`][amltk.pipeline.node.Node.space], - [`fidelities=`][amltk.pipeline.node.Node.fidelities], - [`config_transform=`][amltk.pipeline.node.Node.config_transform] and - [`meta=`][amltk.pipeline.node.Node.meta]. - See Also: * [`Node`][amltk.pipeline.node.Node] - """ # noqa: E501 + """ nodes: tuple[Node, ...] - """The nodes in series.""" + """The nodes ordered in series.""" RICH_OPTIONS: ClassVar[RichOptions] = RichOptions( panel_color="#7E6B8F", @@ -681,7 +410,25 @@ def __init__( config_transform: Callable[[Config, Any], Config] | None = None, meta: Mapping[str, Any] | None = None, ): - """See [`Node`][amltk.pipeline.node.Node] for details.""" + """Initialize a sequential node. + + Args: + nodes: The nodes that this node leads to. In the case of a `Sequential`, + the order here matters and it signifies that data should first + be passed through the first node, then the second, etc. + item: The item attached to this node (if any). + name: The name of the node. If not specified, the name will be + randomly generated. + config: The configuration for this node. + space: The search space for this node. This will be used when + [`search_space()`][amltk.pipeline.node.Node.search_space] is called. + fidelities: The fidelities for this node. + config_transform: A function that transforms the `config=` parameter + during [`configure(config)`][amltk.pipeline.node.Node.configure] + before return the new configured node. Useful for times where + you need to combine multiple parameters into one. + meta: Any meta information about this node. + """ _nodes = tuple(as_node(n) for n in nodes) # Perhaps we need to do a deeper check on this... @@ -749,88 +496,47 @@ def walk( @dataclass(init=False, frozen=True, eq=True) -class Split(Node[Item, Space]): - """A [`Split`][amltk.pipeline.Split] of data in a pipeline. +class Choice(Node[Item, Space]): + """A [`Choice`][amltk.pipeline.Choice] between different subcomponents. - This indicates the different children in - [`.nodes`][amltk.pipeline.Node.nodes] should - act in parallel but on different subsets of data. + This indicates that a choice should be made between the different children in + [`.nodes`][amltk.pipeline.Node.nodes], usually done when you + [`configure()`][amltk.pipeline.node.Node.configure] with some `config` from + a [`search_space()`][amltk.pipeline.node.Node.search_space]. ```python exec="true" source="material-block" html="true" - from amltk.pipeline import Component, Split - from sklearn.impute import SimpleImputer - from sklearn.preprocessing import OneHotEncoder - from sklearn.compose import make_column_selector - - categorical_pipeline = [ - SimpleImputer(strategy="constant", fill_value="missing"), - OneHotEncoder(drop="first"), - ] - numerical_pipeline = Component(SimpleImputer, space={"strategy": ["mean", "median"]}) - - preprocessor = Split( - { - "categories": categorical_pipeline, - "numerical": numerical_pipeline, - }, - config={ - # This is how you would configure the split for the sklearn builder in particular - "categories": make_column_selector(dtype_include="category"), - "numerical": make_column_selector(dtype_exclude="category"), - }, - name="my_split" - ) - from amltk._doc import doc_print; doc_print(print, preprocessor) # markdown-exec: hide - - space = preprocessor.search_space("configspace") - from amltk._doc import doc_print; doc_print(print, space) # markdown-exec: hide - - configuration = space.sample_configuration() - from amltk._doc import doc_print; doc_print(print, configuration) # markdown-exec: hide + from amltk.pipeline import Choice, Component + from sklearn.ensemble import RandomForestClassifier + from sklearn.neural_network import MLPClassifier - configured_preprocessor = preprocessor.configure(configuration) - from amltk._doc import doc_print; doc_print(print, configured_preprocessor) # markdown-exec: hide + rf = Component(RandomForestClassifier, space={"n_estimators": (10, 100)}) + mlp = Component(MLPClassifier, space={"activation": ["logistic", "relu", "tanh"]}) - built_preprocessor = configured_preprocessor.build("sklearn") - print(built_preprocessor._repr_html_()) # markdown-exec: hide + estimator_choice = Choice(rf, mlp, name="estimator") + from amltk._doc import doc_print; doc_print(print, estimator_choice) # markdown-exec: hide ``` - The split is a slight oddity when compared to the other kinds of components in that - it allows a `dict` as it's first argument, where the keys are the names of the - different paths through which data will go and the values are the actual nodes that - will receive the data. - - If nodes are passed in as they are for all other components, usually the name of the - first node will be important for any builder trying to make sense of how - to use the `Split` + !!! warning "Order of nodes" + The given nodes of a choice are always ordered according + to their name, so indexing `choice.nodes` may not be reliable + if modifying the choice dynamically. - Like all [`Node`][amltk.pipeline.node.Node]s, a `Split` accepts an explicit - [`name=`][amltk.pipeline.node.Node.name], - [`item=`][amltk.pipeline.node.Node.item], - [`config=`][amltk.pipeline.node.Node.config], - [`space=`][amltk.pipeline.node.Node.space], - [`fidelities=`][amltk.pipeline.node.Node.fidelities], - [`config_transform=`][amltk.pipeline.node.Node.config_transform] and - [`meta=`][amltk.pipeline.node.Node.meta]. + Please use `choice["name"]` to access the nodes instead. See Also: * [`Node`][amltk.pipeline.node.Node] """ # noqa: E501 nodes: tuple[Node, ...] - """The nodes that this node leads to.""" - - RICH_OPTIONS: ClassVar[RichOptions] = RichOptions( - panel_color="#777DA7", - node_orientation="horizontal", - ) + """The choice of possible nodes that this choice could take.""" + RICH_OPTIONS: ClassVar[RichOptions] = RichOptions(panel_color="#FF4500") _NODES_INIT: ClassVar = "args" def __init__( self, - *nodes: Node | NodeLike | dict[str, Node | NodeLike], + *nodes: Node | NodeLike, name: str | None = None, item: Item | Callable[[Item], Item] | None = None, config: Config | None = None, @@ -839,37 +545,34 @@ def __init__( config_transform: Callable[[Config, Any], Config] | None = None, meta: Mapping[str, Any] | None = None, ): - """See [`Node`][amltk.pipeline.node.Node] for details.""" - if any(isinstance(n, dict) for n in nodes): - if len(nodes) > 1: - raise ValueError( - "Can't handle multiple nodes with a dictionary as a node.\n" - f"{nodes=}", - ) - _node = nodes[0] - assert isinstance(_node, dict) - - def _construct(key: str, value: Node | NodeLike) -> Node: - match value: - case list(): - return Sequential(*value, name=key) - case set() | tuple(): - return as_node(value, name=key) - case _: - return Sequential(value, name=key) - - _nodes = tuple(_construct(key, value) for key, value in _node.items()) - else: - _nodes = tuple(as_node(n) for n in nodes) + """Initialize a choice node. + Args: + nodes: The nodes that should be chosen between for this node. + item: The item attached to this node (if any). + name: The name of the node. If not specified, the name will be + randomly generated. + config: The configuration for this node. + space: The search space for this node. This will be used when + [`search_space()`][amltk.pipeline.node.Node.search_space] is called. + fidelities: The fidelities for this node. + config_transform: A function that transforms the `config=` parameter + during [`configure(config)`][amltk.pipeline.node.Node.configure] + before return the new configured node. Useful for times where + you need to combine multiple parameters into one. + meta: Any meta information about this node. + """ + _nodes: tuple[Node, ...] = tuple( + sorted((as_node(n) for n in nodes), key=lambda n: n.name), + ) if not all_unique(_nodes, key=lambda node: node.name): raise ValueError( - f"Can't handle nodes they do not all contain unique names, {nodes=}." + f"Can't handle nodes as we can not generate a __choice__ for {nodes=}." "\nAll nodes must have a unique name. Please provide a `name=` to them", ) if name is None: - name = f"Split-{randuid(8)}" + name = f"Choice-{randuid(8)}" super().__init__( *_nodes, @@ -882,190 +585,271 @@ def _construct(key: str, value: Node | NodeLike) -> Node: meta=meta, ) + @override + def __or__(self, other: Node | NodeLike) -> Choice: + other_node = as_node(other) + if any(other_node.name == node.name for node in self.nodes): + raise ValueError( + f"Can't handle node with name '{other_node.name} as" + f" there is already a node called '{other_node.name}' in {self.name}", + ) -@dataclass(init=False, frozen=True, eq=True) -class Component(Node[Item, Space]): - """A [`Component`][amltk.pipeline.Component] of the pipeline with - a possible item and **no children**. - - This is the basic building block of most pipelines, it accepts - as it's [`item=`][amltk.pipeline.node.Node.item] some function that will be - called with [`build_item()`][amltk.pipeline.components.Component.build_item] to - build that one part of the pipeline. - - When [`build_item()`][amltk.pipeline.Component.build_item] is called, - The [`.config`][amltk.pipeline.node.Node.config] on this node will be passed - to the function to build the item. - - A common pattern is to use a [`Component`][amltk.pipeline.Component] to - wrap a constructor, specifying the [`space=`][amltk.pipeline.node.Node.space] - and [`config=`][amltk.pipeline.node.Node.config] to be used when building the - item. - - ```python exec="true" source="material-block" html="true" - from amltk.pipeline import Component - from sklearn.ensemble import RandomForestClassifier - - rf = Component( - RandomForestClassifier, - config={"max_depth": 3}, - space={"n_estimators": (10, 100)} - ) - from amltk._doc import doc_print; doc_print(print, rf) # markdown-exec: hide + nodes = tuple( + sorted( + [as_node(n) for n in self.nodes] + [other_node], + key=lambda n: n.name, + ), + ) + return self.mutate(name=self.name, nodes=nodes) - config = {"n_estimators": 50} # Sample from some space or something - configured_rf = rf.configure(config) + def chosen(self) -> Node: + """The chosen branch. - estimator = configured_rf.build_item() - from amltk._doc import doc_print; doc_print(print, estimator) # markdown-exec: hide - ``` + Returns: + The chosen branch + """ + match self.config: + case {"__choice__": choice}: + chosen = first_true( + self.nodes, + pred=lambda node: node.name == choice, + default=None, + ) + if chosen is None: + raise NodeNotFoundError(choice, self.name) - Whenever some other node sees a function/constructor, i.e. `RandomForestClassifier`, - this will automatically be converted into a `Component`. + return chosen + case _: + raise NoChoiceMadeError(self.name) - ```python exec="true" source="material-block" html="true" - from amltk.pipeline import Sequential - from sklearn.ensemble import RandomForestClassifier + @override + def configure( + self, + config: Config, + *, + prefixed_name: bool | None = None, + transform_context: Any | None = None, + params: Mapping[str, Any] | None = None, + ) -> Self: + """Configure this node and anything following it with the given config. - pipeline = Sequential(RandomForestClassifier, name="my_pipeline") - from amltk._doc import doc_print; doc_print(print, pipeline) # markdown-exec: hide - ``` + !!! note "Configuring a choice" - The default `.name` of a component is the name of the class/function that it will - use. You can explicitly set the `name=` if you want to when constructing the - component. + For a Choice, if the config has a `__choice__` key, then only the node + chosen will be configured. The others will not be configured at all and + their config will be discarded. - Like all [`Node`][amltk.pipeline.node.Node]s, a `Component` accepts an explicit - [`name=`][amltk.pipeline.node.Node.name], - [`item=`][amltk.pipeline.node.Node.item], - [`config=`][amltk.pipeline.node.Node.config], - [`space=`][amltk.pipeline.node.Node.space], - [`fidelities=`][amltk.pipeline.node.Node.fidelities], - [`config_transform=`][amltk.pipeline.node.Node.config_transform] and - [`meta=`][amltk.pipeline.node.Node.meta]. + Args: + config: The configuration to apply + prefixed_name: Whether items in the config are prefixed by the names + of the nodes. + * If `None`, the default, then `prefixed_name` will be assumed to + be `True` if this node has a next node or if the config has + keys that begin with this nodes name. + * If `True`, then the config will be searched for items prefixed + by the name of the node (and subsequent chained nodes). + * If `False`, then the config will be searched for items without + the prefix, i.e. the config keys are exactly those matching + this nodes search space. + transform_context: Any context to give to `config_transform=` of individual + nodes. + params: The params to match any requests when configuring this node. + These will match against any ParamRequests in the config and will + be used to fill in any missing values. - See Also: - * [`Node`][amltk.pipeline.node.Node] - """ + Returns: + The configured node + """ + # Get the config for this node + match prefixed_name: + case True: + config = mapping_select(config, f"{self.name}:") + case False: + pass + case None if any(k.startswith(f"{self.name}:") for k in config): + config = mapping_select(config, f"{self.name}:") + case None: + pass - item: Callable[..., Item] - """A node which constructs an item in the pipeline.""" + _kwargs: dict[str, Any] = {} - nodes: tuple[()] - """A component has no children.""" + # Configure all the branches if exists + # This part is what differs for a Choice + if len(self.nodes) > 0: + choice_made = config.get("__choice__", None) + if choice_made is not None: + matching_child = first_true( + self.nodes, + pred=lambda node: node.name == choice_made, + default=None, + ) + if matching_child is None: + raise ValueError( + f"Can not find matching child for choice {self.name} with child" + f" {choice_made}." + "\nPlease check the config and ensure that the choice is one of" + f" {[n.name for n in self.nodes]}." + f"\nThe config recieved at this choice node was {config=}.", + ) - RICH_OPTIONS: ClassVar[RichOptions] = RichOptions(panel_color="#E6AF2E") + # We still iterate over all of them just to ensure correct ordering + nodes = tuple( + node.copy() + if node.name != choice_made + else matching_child.configure( + config, + prefixed_name=True, + transform_context=transform_context, + params=params, + ) + for node in self.nodes + ) + _kwargs["nodes"] = nodes + else: + nodes = tuple( + node.configure( + config, + prefixed_name=True, + transform_context=transform_context, + params=params, + ) + for node in self.nodes + ) + _kwargs["nodes"] = nodes - _NODES_INIT: ClassVar = None + this_config = { + hp: v + for hp, v in config.items() + if ( + ":" not in hp + and not any(hp.startswith(f"{node.name}") for node in self.nodes) + ) + } + if self.config is not None: + this_config = {**self.config, **this_config} - def __init__( - self, - item: Callable[..., Item], - *, - name: str | None = None, - config: Config | None = None, - space: Space | None = None, - fidelities: Mapping[str, Any] | None = None, - config_transform: Callable[[Config, Any], Config] | None = None, - meta: Mapping[str, Any] | None = None, - ): - """See [`Node`][amltk.pipeline.node.Node] for details.""" - super().__init__( - name=name if name is not None else entity_name(item), - item=item, - config=config, - space=space, - fidelities=fidelities, - config_transform=config_transform, - meta=meta, - ) + this_config = dict(self._fufill_param_requests(this_config, params=params)) - def build_item(self, **kwargs: Any) -> Item: - """Build the item attached to this component. + if self.config_transform is not None: + this_config = dict(self.config_transform(this_config, transform_context)) - Args: - **kwargs: Any additional arguments to pass to the item + if len(this_config) > 0: + _kwargs["config"] = dict(this_config) - Returns: - Item - The built item - """ - config = self.config or {} - try: - return self.item(**{**config, **kwargs}) - except TypeError as e: - new_msg = f"Failed to build `{self.item=}` with `{self.config=}`.\n" - if any(kwargs): - new_msg += f"Extra {kwargs=} were also provided.\n" - new_msg += ( - "If the item failed to initialize, a common reason can be forgetting" - " to call `configure()` on the `Component` or the pipeline it is in or" - " not calling `build()`/`build_item()` on the **returned** value of" - " `configure()`.\n" - "Reasons may also include not having fully specified the `config`" - " initially, it having not being configured fully from `configure()`" - " or from misspecfying parameters in the `space`." - ) - raise ComponentBuildError(new_msg) from e + return self.mutate(**_kwargs) @dataclass(init=False, frozen=True, eq=True) -class Searchable(Node[None, Space]): # type: ignore - """A [`Searchable`][amltk.pipeline.Searchable] - node of the pipeline which just represents a search space, no item attached. +class Split(Node[Item, Space]): + """A [`Split`][amltk.pipeline.Split] of data in a pipeline. - While not usually applicable to pipelines you want to build, this component - is useful for creating a search space, especially if the real pipeline you - want to optimize can not be built directly. For example, if you are optimize - a script, you may wish to use a `Searchable` to represent the search space - of that script. + This indicates the different children in + [`.nodes`][amltk.pipeline.Node.nodes] should + act in parallel but on different subsets of data. ```python exec="true" source="material-block" html="true" - from amltk.pipeline import Searchable + from amltk.pipeline import Component, Split + from sklearn.impute import SimpleImputer + from sklearn.preprocessing import OneHotEncoder + from sklearn.compose import make_column_selector - script_space = Searchable({"mode": ["orange", "blue", "red"], "n": (10, 100)}) - from amltk._doc import doc_print; doc_print(print, script_space) # markdown-exec: hide - ``` + categorical_pipeline = [ + SimpleImputer(strategy="constant", fill_value="missing"), + OneHotEncoder(drop="first"), + ] + numerical_pipeline = Component(SimpleImputer, space={"strategy": ["mean", "median"]}) - A `Searchable` explicitly does not allow for `item=` to be set, nor can it have - any children. A `Searchable` accepts an explicit - [`name=`][amltk.pipeline.node.Node.name], - [`config=`][amltk.pipeline.node.Node.config], - [`space=`][amltk.pipeline.node.Node.space], - [`fidelities=`][amltk.pipeline.node.Node.fidelities], - [`config_transform=`][amltk.pipeline.node.Node.config_transform] and - [`meta=`][amltk.pipeline.node.Node.meta]. + preprocessor = Split( + { + "categories": categorical_pipeline, + "numerical": numerical_pipeline, + }, + config={ + "categories": make_column_selector(dtype_include="category"), + "numerical": make_column_selector(dtype_exclude="category"), + }, + name="my_split" + ) + from amltk._doc import doc_print; doc_print(print, preprocessor) # markdown-exec: hide + ``` See Also: * [`Node`][amltk.pipeline.node.Node] """ # noqa: E501 - item: None = None - """A searchable has no item.""" - - nodes: tuple[()] = () - """A component has no children.""" - - RICH_OPTIONS: ClassVar[RichOptions] = RichOptions(panel_color="light_steel_blue") + RICH_OPTIONS: ClassVar[RichOptions] = RichOptions( + panel_color="#777DA7", + node_orientation="horizontal", + ) - _NODES_INIT: ClassVar = None + _NODES_INIT: ClassVar = "args" def __init__( self, - space: Space | None = None, - *, + *nodes: Node | NodeLike | dict[str, Node | NodeLike], name: str | None = None, + item: Item | Callable[[Item], Item] | None = None, config: Config | None = None, + space: Space | None = None, fidelities: Mapping[str, Any] | None = None, config_transform: Callable[[Config, Any], Config] | None = None, meta: Mapping[str, Any] | None = None, ): - """See [`Node`][amltk.pipeline.node.Node] for details.""" + """Initialize a split node. + + Args: + nodes: The nodes that this node leads to. You may also provide + a dictionary where the keys are the names of the nodes and + the values are the nodes or list of nodes themselves. + item: The item attached to this node. The object created by `item` + should be capable of figuring out how to deal with its child nodes. + name: The name of the node. If not specified, the name will be + generated from the item. + config: The configuration for this split. + space: The search space for this node. This will be used when + [`search_space()`][amltk.pipeline.node.Node.search_space] is called. + fidelities: The fidelities for this node. + config_transform: A function that transforms the `config=` parameter + during [`configure(config)`][amltk.pipeline.node.Node.configure] + before return the new configured node. Useful for times where + you need to combine multiple parameters into one. + meta: Any meta information about this node. + """ + if any(isinstance(n, dict) for n in nodes): + if len(nodes) > 1: + raise ValueError( + "Can't handle multiple nodes with a dictionary as a node.\n" + f"{nodes=}", + ) + _node = nodes[0] + assert isinstance(_node, dict) + + def _construct(key: str, value: Node | NodeLike) -> Node: + match value: + case list(): + return Sequential(*value, name=key) + case set() | tuple(): + return as_node(value, name=key) + case _: + return Sequential(value, name=key) + + _nodes = tuple(_construct(key, value) for key, value in _node.items()) + else: + _nodes = tuple(as_node(n) for n in nodes) + + if not all_unique(_nodes, key=lambda node: node.name): + raise ValueError( + f"Can't handle nodes they do not all contain unique names, {nodes=}." + "\nAll nodes must have a unique name. Please provide a `name=` to them", + ) + if name is None: - name = f"Searchable-{randuid(8)}" + name = f"Split-{randuid(8)}" super().__init__( + *_nodes, name=name, + item=item, config=config, space=space, fidelities=fidelities, @@ -1075,94 +859,77 @@ def __init__( @dataclass(init=False, frozen=True, eq=True) -class Fixed(Node[Item, None]): # type: ignore - """A [`Fixed`][amltk.pipeline.Fixed] part of the pipeline that - represents something that can not be configured and used directly as is. - - It consists of an [`.item`][amltk.pipeline.node.Node.item] that is fixed, - non-configurable and non-searchable. It also has no children. +class Join(Node[Item, Space]): + """[`Join`][amltk.pipeline.Join] together different parts of the pipeline. - This is useful for representing parts of the pipeline that are fixed, for example - if you have a pipeline that is a `Sequential` of nodes, but you want to - fix the first component to be a `PCA` with `n_components=3`, you can use a `Fixed` - to represent that. + This indicates the different children in + [`.nodes`][amltk.pipeline.Node.nodes] should act in tandem with one + another, for example, concatenating the outputs of the various members of the + `Join`. ```python exec="true" source="material-block" html="true" - from amltk.pipeline import Component, Fixed, Sequential - from sklearn.ensemble import RandomForestClassifier + from amltk.pipeline import Join, Component from sklearn.decomposition import PCA + from sklearn.feature_selection import SelectKBest - rf = Component(RandomForestClassifier, space={"n_estimators": (10, 100)}) - pca = Fixed(PCA(n_components=3)) - - pipeline = Sequential(pca, rf, name="my_pipeline") - from amltk._doc import doc_print; doc_print(print, pipeline) # markdown-exec: hide - ``` - - Whenever some other node sees an instance of something, i.e. something that can't be - called, this will automatically be converted into a `Fixed`. - - ```python exec="true" source="material-block" html="true" - from amltk.pipeline import Sequential - from sklearn.ensemble import RandomForestClassifier - from sklearn.decomposition import PCA + pca = Component(PCA, space={"n_components": (1, 3)}) + kbest = Component(SelectKBest, space={"k": (1, 3)}) - pipeline = Sequential( - PCA(n_components=3), - RandomForestClassifier(n_estimators=50), - name="my_pipeline", - ) - from amltk._doc import doc_print; doc_print(print, pipeline) # markdown-exec: hide + join = Join(pca, kbest, name="my_feature_union") + from amltk._doc import doc_print; doc_print(print, join) # markdown-exec: hide ``` - The default `.name` of a component is the class name of the item that it will - use. You can explicitly set the `name=` if you want to when constructing the - component. - - A `Fixed` accepts only an explicit [`name=`][amltk.pipeline.node.Node.name], - [`item=`][amltk.pipeline.node.Node.item], - [`meta=`][amltk.pipeline.node.Node.meta]. - See Also: * [`Node`][amltk.pipeline.node.Node] """ - item: Item = field() - """The fixed item that this node represents.""" - - space: None = None - """A frozen node has no search space.""" - - fidelities: None = None - """A frozen node has no search space.""" - - config: None = None - """A frozen node has no config.""" - - config_transform: None = None - """A frozen node has no config so no transform.""" - - nodes: tuple[()] = () - """A component has no children.""" - - RICH_OPTIONS: ClassVar[RichOptions] = RichOptions(panel_color="#56351E") + nodes: tuple[Node, ...] + """The nodes that should be joined together in parallel.""" - _NODES_INIT: ClassVar = None + RICH_OPTIONS: ClassVar[RichOptions] = RichOptions(panel_color="#7E6B8F") + _NODES_INIT: ClassVar = "args" def __init__( self, - item: Item, - *, + *nodes: Node | NodeLike, name: str | None = None, - config: None = None, - space: None = None, - fidelities: None = None, - config_transform: None = None, + item: Item | Callable[[Item], Item] | None = None, + config: Config | None = None, + space: Space | None = None, + fidelities: Mapping[str, Any] | None = None, + config_transform: Callable[[Config, Any], Config] | None = None, meta: Mapping[str, Any] | None = None, ): - """See [`Node`][amltk.pipeline.node.Node] for details.""" + """Initialize a join node. + + Args: + nodes: The nodes that should be joined together in parallel. + item: The item attached to this node (if any). + name: The name of the node. If not specified, the name will be + randomly generated. + config: The configuration for this node. + space: The search space for this node. This will be used when + [`search_space()`][amltk.pipeline.node.Node.search_space] is called. + fidelities: The fidelities for this node. + config_transform: A function that transforms the `config=` parameter + during [`configure(config)`][amltk.pipeline.node.Node.configure] + before return the new configured node. Useful for times where + you need to combine multiple parameters into one. + meta: Any meta information about this node. + """ + _nodes = tuple(as_node(n) for n in nodes) + if not all_unique(_nodes, key=lambda node: node.name): + raise ValueError( + f"Can't handle nodes they do not all contain unique names, {nodes=}." + "\nAll nodes must have a unique name. Please provide a `name=` to them", + ) + + if name is None: + name = f"Join-{randuid(8)}" + super().__init__( - name=name if name is not None else entity_name(item), + *_nodes, + name=name, item=item, config=config, space=space, @@ -1170,3 +937,15 @@ def __init__( config_transform=config_transform, meta=meta, ) + + @override + def __and__(self, other: Node | NodeLike) -> Join: + other_node = as_node(other) + if any(other_node.name == node.name for node in self.nodes): + raise ValueError( + f"Can't handle node with name '{other_node.name} as" + f" there is already a node called '{other_node.name}' in {self.name}", + ) + + nodes = (*tuple(as_node(n) for n in self.nodes), other_node) + return self.mutate(name=self.name, nodes=nodes) diff --git a/src/amltk/pipeline/node.py b/src/amltk/pipeline/node.py index ec1ce55e..4c5e559e 100644 --- a/src/amltk/pipeline/node.py +++ b/src/amltk/pipeline/node.py @@ -160,6 +160,7 @@ class Node(RichRenderable, Generic[Item, Space]): fidelities: Mapping[str, Any] | None = field(hash=False) """The fidelities for this node""" + config_transform: Callable[[Config, Any], Config] | None = field(hash=False) """A function that transforms the configuration of this node""" @@ -173,7 +174,7 @@ class Node(RichRenderable, Generic[Item, Space]): panel_color="default", node_orientation="horizontal", ) - """Options for rich printing""" + """How to display this node in rich.""" def __init__( self, @@ -186,7 +187,18 @@ def __init__( config_transform: Callable[[Config, Any], Config] | None = None, meta: Mapping[str, Any] | None = None, ): - """Initialize a choice.""" + """Initialize a choice. + + Args: + nodes: The nodes that this node leads to + name: The name of the node + item: The item attached to this node + config: The configuration for this node + space: The search space for this node + fidelities: The fidelities for this node + config_transform: A function that transforms the configuration of this node + meta: Any meta information about this node + """ super().__init__() object.__setattr__(self, "name", name) object.__setattr__(self, "item", item) @@ -198,7 +210,7 @@ def __init__( object.__setattr__(self, "nodes", nodes) def __getitem__(self, key: str) -> Node: - """Get the node with the given name.""" + """Get the first from [`.nodes`][amltk.pipeline.node.Node.nodes] with `key`.""" found = first_true( self.nodes, None, From 4198de7b424879bd906140d6c963824b95e3a761 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Fri, 26 Jan 2024 07:48:56 +0000 Subject: [PATCH 4/4] =?UTF-8?q?bump:=20version=201.8.0=20=E2=86=92=201.9.0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- CHANGELOG.md | 10 ++++++++++ pyproject.toml | 4 ++-- src/amltk/__version__.py | 2 +- 3 files changed, 13 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index eb172db2..9fbb521e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,13 @@ +## 1.9.0 (2024-01-26) + +### Feat + +- **Optimizer**: Allow for batch ask requests (#224) + +### Fix + +- **Pynisher**: Ensure system supports limit (#223) + ## 1.8.0 (2024-01-22) ### Feat diff --git a/pyproject.toml b/pyproject.toml index 71860579..181ce8d4 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "amltk" -version = "1.8.0" +version = "1.9.0" dependencies = [ "typing_extensions", # Better typing "more_itertools", # Better iteration @@ -98,7 +98,7 @@ exclude_lines = [ [tool.commitizen] name = "cz_conventional_commits" -version = "1.8.0" +version = "1.9.0" update_changelog_on_bump = true version_files = ["pyproject.toml:version", "src/amltk/__version__.py"] changelog_start_rev = "1.0.0" diff --git a/src/amltk/__version__.py b/src/amltk/__version__.py index fc1822ec..0c4a0249 100644 --- a/src/amltk/__version__.py +++ b/src/amltk/__version__.py @@ -1,3 +1,3 @@ from __future__ import annotations -version = "1.8.0" +version = "1.9.0"