diff --git a/seaborn/_core/moves.py b/seaborn/_core/moves.py index b5b9593de9..14469be83d 100644 --- a/seaborn/_core/moves.py +++ b/seaborn/_core/moves.py @@ -1,19 +1,18 @@ from __future__ import annotations from dataclasses import dataclass +from typing import ClassVar, Callable, Optional, Union import numpy as np +from pandas import DataFrame from seaborn._core.groupby import GroupBy -from typing import TYPE_CHECKING -if TYPE_CHECKING: - from typing import Optional - from pandas import DataFrame - @dataclass class Move: + group_by_orient: ClassVar[bool] = True + def __call__(self, data: DataFrame, groupby: GroupBy, orient: str) -> DataFrame: raise NotImplementedError @@ -61,10 +60,12 @@ class Dodge(Move): """ Displacement and narrowing of overlapping marks along orientation axis. """ - empty: str = "keep" # keep, drop, fill + empty: str = "keep" # Options: keep, drop, fill gap: float = 0 # TODO accept just a str here? + # TODO should this always be present? + # TODO should the default be an "all" singleton? by: Optional[list[str]] = None def __call__(self, data: DataFrame, groupby: GroupBy, orient: str) -> DataFrame: @@ -117,7 +118,7 @@ class Stack(Move): """ Displacement of overlapping bar or area marks along the value axis. """ - # TODO center? (or should this be a different move?) + # TODO center? (or should this be a different move, eg. Stream()) def _stack(self, df, orient): @@ -140,6 +141,7 @@ def _stack(self, df, orient): def __call__(self, data: DataFrame, groupby: GroupBy, orient: str) -> DataFrame: # TODO where to ensure that other semantic variables are sorted properly? + # TODO why are we not using the passed in groupby here? groupers = ["col", "row", orient] return GroupBy(groupers).apply(data, self._stack, orient) @@ -158,3 +160,41 @@ def __call__(self, data: DataFrame, groupby: GroupBy, orient: str) -> DataFrame: data["x"] = data["x"] + self.x data["y"] = data["y"] + self.y return data + + +@dataclass +class Norm(Move): + """ + Divisive scaling on the value axis after aggregating within groups. + """ + + func: Union[Callable, str] = "max" + where: Optional[str] = None + by: Optional[list[str]] = None + percent: bool = False + + group_by_orient: ClassVar[bool] = False + + def _norm(self, df, var): + + if self.where is None: + denom_data = df[var] + else: + denom_data = df.query(self.where)[var] + df[var] = df[var] / denom_data.agg(self.func) + + if self.percent: + df[var] = df[var] * 100 + + return df + + def __call__(self, data: DataFrame, groupby: GroupBy, orient: str) -> DataFrame: + + other = {"x": "y", "y": "x"}[orient] + return groupby.apply(data, self._norm, other) + + +# TODO +# @dataclass +# class Ridge(Move): +# ... diff --git a/seaborn/_core/plot.py b/seaborn/_core/plot.py index 6935c4d43e..56d933cb52 100644 --- a/seaborn/_core/plot.py +++ b/seaborn/_core/plot.py @@ -1117,11 +1117,12 @@ def get_order(var): if move is not None: moves = move if isinstance(move, list) else [move] for move_step in moves: - move_groupers = [ - orient, - *(getattr(move_step, "by", None) or grouping_properties), - *default_grouping_vars, - ] + move_by = getattr(move_step, "by", None) + if move_by is None: + move_by = grouping_properties + move_groupers = [*move_by, *default_grouping_vars] + if move_step.group_by_orient: + move_groupers.insert(0, orient) order = {var: get_order(var) for var in move_groupers} groupby = GroupBy(order) df = move_step(df, groupby, orient) diff --git a/seaborn/objects.py b/seaborn/objects.py index b33243fb76..72d8cdf2de 100644 --- a/seaborn/objects.py +++ b/seaborn/objects.py @@ -14,6 +14,6 @@ from seaborn._stats.regression import OLSFit, PolyFit # noqa: F401 from seaborn._stats.histograms import Hist # noqa: F401 -from seaborn._core.moves import Dodge, Jitter, Shift, Stack # noqa: F401 +from seaborn._core.moves import Dodge, Jitter, Norm, Shift, Stack # noqa: F401 from seaborn._core.scales import Nominal, Continuous, Temporal # noqa: F401 diff --git a/seaborn/tests/_core/test_moves.py b/seaborn/tests/_core/test_moves.py index 1a55789a02..a9d221da19 100644 --- a/seaborn/tests/_core/test_moves.py +++ b/seaborn/tests/_core/test_moves.py @@ -6,7 +6,7 @@ from pandas.testing import assert_series_equal from numpy.testing import assert_array_equal, assert_array_almost_equal -from seaborn._core.moves import Dodge, Jitter, Shift, Stack +from seaborn._core.moves import Dodge, Jitter, Shift, Stack, Norm from seaborn._core.rules import categorical_order from seaborn._core.groupby import GroupBy @@ -318,3 +318,41 @@ def test_moves(self, toy_df, x, y): res = Shift(x=x, y=y)(toy_df, gb, "x") assert_array_equal(res["x"], toy_df["x"] + x) assert_array_equal(res["y"], toy_df["y"] + y) + + +class TestNorm(MoveFixtures): + + @pytest.mark.parametrize("orient", ["x", "y"]) + def test_default_no_groups(self, df, orient): + + other = {"x": "y", "y": "x"}[orient] + gb = GroupBy(["null"]) + res = Norm()(df, gb, orient) + assert res[other].max() == pytest.approx(1) + + @pytest.mark.parametrize("orient", ["x", "y"]) + def test_default_groups(self, df, orient): + + other = {"x": "y", "y": "x"}[orient] + gb = GroupBy(["grp2"]) + res = Norm()(df, gb, orient) + for _, grp in res.groupby("grp2"): + assert grp[other].max() == pytest.approx(1) + + def test_sum(self, df): + + gb = GroupBy(["null"]) + res = Norm("sum")(df, gb, "x") + assert res["y"].sum() == pytest.approx(1) + + def test_where(self, df): + + gb = GroupBy(["null"]) + res = Norm(where="x == 2")(df, gb, "x") + assert res.loc[res["x"] == 2, "y"].max() == pytest.approx(1) + + def test_percent(self, df): + + gb = GroupBy(["null"]) + res = Norm(percent=True)(df, gb, "x") + assert res["y"].max() == pytest.approx(100)