From a6b7f013eca77961b53df88afcf45e050376f2d0 Mon Sep 17 00:00:00 2001 From: Tom Magerlein Date: Sun, 9 Nov 2025 20:01:26 -0500 Subject: [PATCH 1/5] Replace pylint, pydocstyle with ruff check Replaces pylint and pydocstyle linters with `ruff check`, and configures Ruff's lint rules. All lints that are not able to be auto-fixed or trivially fixable by hand are temporarily disabled to keep the diff smaller. --- doc/conf.py | 2 - noxfile.py | 3 +- pyproject.toml | 119 ++++++++---------- src/tmlt/analytics/__init__.py | 2 - src/tmlt/analytics/_neighboring_relation.py | 16 ++- src/tmlt/analytics/_noise_info.py | 2 +- src/tmlt/analytics/_query_expr.py | 22 ++-- .../_base_measurement_visitor.py | 53 ++++---- .../_base_transformation_visitor.py | 5 +- .../_measurement_visitor.py | 4 +- src/tmlt/analytics/_transformation_utils.py | 2 - src/tmlt/analytics/binning_spec.py | 25 ++-- src/tmlt/analytics/config.py | 2 +- src/tmlt/analytics/keyset/_keyset.py | 17 +-- src/tmlt/analytics/privacy_budget.py | 5 +- src/tmlt/analytics/query_builder.py | 24 ++-- src/tmlt/analytics/session.py | 32 ++--- src/tmlt/analytics/utils.py | 10 +- test/conftest.py | 2 +- .../ids/test_constraint_propagation.py | 2 - .../ids/test_count_distinct_optimization.py | 6 +- .../session/ids/test_id_col_operations.py | 3 +- .../session/ids/test_l0_linf_truncation.py | 10 -- test/system/session/ids/test_l1_truncation.py | 4 - test/system/session/ids/test_partition.py | 6 +- .../session/mixed/test_mixed_session.py | 9 +- test/system/session/rows/test_add_max_rows.py | 27 ++-- .../rows/test_add_max_rows_in_max_groups.py | 8 +- test/system/session/rows/test_invalid.py | 4 +- test/unit/keysets/test_cross_join.py | 2 - test/unit/keysets/test_decomposition.py | 1 - test/unit/keysets/test_detect.py | 6 +- test/unit/keysets/test_equivalence.py | 5 +- test/unit/keysets/test_filter.py | 2 - test/unit/keysets/test_join.py | 2 - test/unit/keysets/test_keyset.py | 2 +- test/unit/keysets/test_product_keyset.py | 4 +- test/unit/keysets/test_project.py | 4 +- test/unit/keysets/test_rewrite.py | 1 - test/unit/keysets/test_subtract.py | 2 - .../test_measurement_visitor.py | 4 - .../test_constraints.py | 4 +- test/unit/test_binning_spec.py | 2 - test/unit/test_config.py | 9 +- test/unit/test_privacy_budget.py | 8 -- .../test_privacy_budget_rounding_helper.py | 3 - test/unit/test_query_builder.py | 8 +- test/unit/test_query_expr_compiler.py | 3 +- test/unit/test_query_expression.py | 5 +- test/unit/test_schema.py | 4 +- test/unit/test_session.py | 38 +++--- test/unit/test_table_identifiers.py | 2 +- test/unit/test_utils.py | 2 +- uv.lock | 94 +++++--------- 54 files changed, 230 insertions(+), 413 deletions(-) diff --git a/doc/conf.py b/doc/conf.py index 0c9a899e..bde158c7 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -1,5 +1,3 @@ -# pylint: skip-file - # SPDX-License-Identifier: Apache-2.0 # Copyright Tumult Labs 2025 diff --git a/noxfile.py b/noxfile.py index 7da191f2..86de375d 100644 --- a/noxfile.py +++ b/noxfile.py @@ -143,9 +143,8 @@ def is_mac(): sm.black() sm.isort() +sm.ruff_check() sm.mypy() -sm.pylint() -sm.pydocstyle() sm.smoketest() sm.release_smoketest() diff --git a/pyproject.toml b/pyproject.toml index 7322cfed..c6e488cb 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -56,11 +56,10 @@ required-version = ">=0.7.0" default-groups = "all" [dependency-groups] +ruff = ["ruff >=0.14.3,<1"] black = ["black >=23.3,<24"] isort = ["isort >=5.11,<6"] mypy = ["mypy >=1.14.0"] -pylint = ["pylint >=3.2.5"] -pydocstyle = ["pydocstyle[toml] >=6.3"] test = [ "pytest", "pytest-cov >=5.0,<6", @@ -135,6 +134,53 @@ packages = ["src/tmlt"] ################################################################################ # Linter configuration +[tool.ruff.lint] +# A list of all of Ruff's rules can be found at https://docs.astral.sh/ruff/rules/ +select = [ + # Enable Ruff-specific lints plus Pylint, pydocstyle, pyflakes, and pycodestyle. + # The latter two cover many lints that we previously used pylint for, but + # because they are overlapping Ruff only implements them in one set of rules. + "RUF", "PL", "D", "F", "E", "W", + # Also enable a subset of flake8 rules, for similar reasons to pyflakes/pycodestyle. + "ISC", "SLF" +] +ignore = [ + "PLR09", # too-many-* + "PLR2004", # magic-value-comparison + + # TODO: This disables every lint that is currently failing; go through and + # either fix/individually disable each instance, or choose to permanently + # ignore each one. + "PLW1641", # eq-without-hash + "PLC0206", # dict-index-missing-items + "RUF005", # collection-literal-concatenation + "RUF015", # unnecessary-iterable-allocation-for-first-element + "D415", # missing-terminal-punctuation + "RUF043", # pytest-raises-ambiguous-pattern + "D205", # missing-blank-line-after-summary + "D210", # surrounding-whitespace + "D102", # undocumented-public-method + "E501", # line-too-long + "E731", # lambda-assignment + "E741", # ambiguous-variable-name + "SLF001", # private-member-access + "RET504", # unnecessary-assign + "F401", # unused-import + "RUF009", # function-call-in-dataclass-default-argument + "E721", # type-comparison + "D103", # undocumented-public-function + "PLR0124", # comparison-with-itself +] + +# Ruff's RUF001-003 rules disallow certain Unicode characters that are easily +# confused with ASCII characters; this makes sense for the most part, but some +# of our docstrings use Greek letters that fall into that category. This allows +# those characters. +allowed-confusables = ['α', 'ρ', '𝝆'] + +[tool.ruff.lint.pydocstyle] +convention = "google" + [tool.black] force-exclude = "noxfile.py" @@ -162,70 +208,8 @@ module = "test.*" disallow_untyped_defs = false check_untyped_defs = true -[tool.pylint.master] -# See https://github.com/PyCQA/pylint/issues/1975#issuecomment-387924981 -extension-pkg-whitelist = ['numpy'] -load-plugins = ['pylint.extensions.docparams'] -# Only check param docs in docstrings that contain an Args: section. -# Set to "no" to show docstrings missing argument documentation. -accept-no-param-doc = true - -[tool.pylint.'MESSAGES CONTROL'] -enable = [ - # Note: there is a false positive on 'useless-suppression' when you - # use 'disable=line-too-long' at the end of a docstring. - # See: https://github.com/pylint-dev/pylint/issues/8301 - 'useless-suppression' -] -# By default, informational rules like useless-suppression don't cause PyLint to -# produce an error. -fail-on = ['useless-suppression'] -disable = [ - 'arguments-differ', - 'duplicate-code', - 'fixme', - 'invalid-name', - 'logging-format-interpolation', - 'logging-fstring-interpolation', - 'missing-function-docstring', # Redundant with pydocstyle - 'missing-raises-doc', - 'missing-return-doc', - 'no-else-return', - 'super-init-not-called', - 'too-few-public-methods', - 'too-many-ancestors', - 'too-many-arguments', - 'too-many-branches', - 'too-many-instance-attributes', - 'too-many-lines', - 'too-many-locals', - 'too-many-positional-arguments', - 'too-many-public-methods', - 'too-many-return-statements', - 'too-many-statements', - 'unbalanced-tuple-unpacking', - 'unnecessary-lambda-assignment', - 'unsubscriptable-object', - 'use-dict-literal', - # There are a lot of false positives for unsupported-binary-operation - # on Python 3.9: https://github.com/pylint-dev/pylint/issues/7381 - 'unsupported-binary-operation', - # black and isort group tmlt.core separately from tmlt.analytics, - # but pylint thinks they should both be grouped as 'tmlt'. - 'ungrouped-imports', - 'wrong-import-order', -] - -[tool.pylint.FORMAT] -max-line-length = 88 - -[tool.pydocstyle] -convention = "google" -add-ignore = [ - # `D200: One-line docstring should fit on one line with quotes` - # conflicts with pylint's `max-line-length`. - "D200", -] +################################################################################ +# Test configuration [tool.pytest.ini_options] markers = [ @@ -236,8 +220,5 @@ markers = [ # more information and a better future fix. addopts = ["--import-mode=importlib"] -################################################################################ -# Test configuration - [tool.coverage.run] relative_files = true diff --git a/src/tmlt/analytics/__init__.py b/src/tmlt/analytics/__init__.py index 7e86f4d4..eea8640a 100644 --- a/src/tmlt/analytics/__init__.py +++ b/src/tmlt/analytics/__init__.py @@ -38,8 +38,6 @@ # SPDX-License-Identifier: Apache-2.0 # Copyright Tumult Labs 2025 -from typing import List - from tmlt.analytics._utils import AnalyticsInternalError from tmlt.analytics.binning_spec import BinningSpec, BinT from tmlt.analytics.config import Config, FeatureFlag diff --git a/src/tmlt/analytics/_neighboring_relation.py b/src/tmlt/analytics/_neighboring_relation.py index 6e28b7e3..29b1b65f 100644 --- a/src/tmlt/analytics/_neighboring_relation.py +++ b/src/tmlt/analytics/_neighboring_relation.py @@ -3,7 +3,6 @@ # SPDX-License-Identifier: Apache-2.0 # Copyright Tumult Labs 2025 -# pylint: disable=protected-access from abc import ABC, abstractmethod from dataclasses import dataclass, field @@ -284,14 +283,13 @@ def _validate(self, dfs: Dict[str, DataFrame]) -> List[str]: f" has type {df_field.dataType}." ) key_type = df_field.dataType - else: - if not df_field.dataType == key_type: - raise ValueError( - f"Key column '{key_column}' has type " - f"{df_field.dataType}, but in another" - f" table it has type {key_type}. Key types" - " must match across tables" - ) + elif not df_field.dataType == key_type: + raise ValueError( + f"Key column '{key_column}' has type " + f"{df_field.dataType}, but in another" + f" table it has type {key_type}. Key types" + " must match across tables" + ) return list(self.table_to_key_column.keys()) diff --git a/src/tmlt/analytics/_noise_info.py b/src/tmlt/analytics/_noise_info.py index 2c86cb5c..cafd03a5 100644 --- a/src/tmlt/analytics/_noise_info.py +++ b/src/tmlt/analytics/_noise_info.py @@ -150,7 +150,7 @@ def _inverse_cdf(noise_info: Dict[str, Any], p: float) -> float: @singledispatch def _noise_from_info( - info: Any, # pylint: disable=unused-argument + info: Any, ) -> List[Dict[str, Any]]: """Get noise information from info (for a measurement). diff --git a/src/tmlt/analytics/_query_expr.py b/src/tmlt/analytics/_query_expr.py index f5a4213e..9d24f91f 100644 --- a/src/tmlt/analytics/_query_expr.py +++ b/src/tmlt/analytics/_query_expr.py @@ -1132,7 +1132,7 @@ def _validate(self, input_schema: Schema): if not (isinstance(val, int) and pytypes[col] == float): raise ValueError( f"Column '{col}' cannot have nulls replaced with " - f"{repr(val)}, as that value's type does not match the " + f"{val!r}, as that value's type does not match the " f"column type {input_schema[col].column_type.name}" ) @@ -1147,14 +1147,14 @@ def schema(self, catalog: Catalog) -> Schema: name for name, cd in input_schema.column_descs.items() if (cd.allow_null or cd.allow_nan) - and not (name in [input_schema.grouping_column, input_schema.id_column]) + and name not in [input_schema.grouping_column, input_schema.id_column] ] return Schema( { name: ColumnDescriptor( column_type=cd.column_type, - allow_null=(cd.allow_null and not name in columns_to_change), - allow_nan=(cd.allow_nan and not name in columns_to_change), + allow_null=(cd.allow_null and name not in columns_to_change), + allow_nan=(cd.allow_nan and name not in columns_to_change), allow_inf=cd.allow_inf, ) for name, cd in input_schema.column_descs.items() @@ -1239,7 +1239,7 @@ def schema(self, catalog: Catalog) -> Schema: for name, cd in input_schema.column_descs.items() if cd.column_type == ColumnType.DECIMAL and cd.allow_inf - and not (name in [input_schema.grouping_column, input_schema.id_column]) + and name not in [input_schema.grouping_column, input_schema.id_column] ] return Schema( { @@ -1247,7 +1247,7 @@ def schema(self, catalog: Catalog) -> Schema: column_type=cd.column_type, allow_null=cd.allow_null, allow_nan=cd.allow_nan, - allow_inf=(cd.allow_inf and not name in columns_to_change), + allow_inf=(cd.allow_inf and name not in columns_to_change), ) for name, cd in input_schema.column_descs.items() }, @@ -1322,15 +1322,15 @@ def schema(self, catalog: Catalog) -> Schema: name for name, cd in input_schema.column_descs.items() if (cd.allow_null or cd.allow_nan) - and not name in [input_schema.grouping_column, input_schema.id_column] + and name not in [input_schema.grouping_column, input_schema.id_column] ) return Schema( { name: ColumnDescriptor( column_type=cd.column_type, - allow_null=(cd.allow_null and not name in columns), - allow_nan=(cd.allow_nan and not name in columns), + allow_null=(cd.allow_null and name not in columns), + allow_nan=(cd.allow_nan and name not in columns), allow_inf=(cd.allow_inf), ) for name, cd in input_schema.column_descs.items() @@ -1403,7 +1403,7 @@ def schema(self, catalog: Catalog) -> Schema: for name, cd in input_schema.column_descs.items() if cd.column_type == ColumnType.DECIMAL and cd.allow_inf - and not name in (input_schema.grouping_column, input_schema.id_column) + and name not in (input_schema.grouping_column, input_schema.id_column) ) return Schema( @@ -1412,7 +1412,7 @@ def schema(self, catalog: Catalog) -> Schema: column_type=cd.column_type, allow_null=cd.allow_null, allow_nan=cd.allow_nan, - allow_inf=(cd.allow_inf and not name in columns), + allow_inf=(cd.allow_inf and name not in columns), ) for name, cd in input_schema.column_descs.items() }, diff --git a/src/tmlt/analytics/_query_expr_compiler/_base_measurement_visitor.py b/src/tmlt/analytics/_query_expr_compiler/_base_measurement_visitor.py index 554c1e26..9e0c90e2 100644 --- a/src/tmlt/analytics/_query_expr_compiler/_base_measurement_visitor.py +++ b/src/tmlt/analytics/_query_expr_compiler/_base_measurement_visitor.py @@ -645,34 +645,33 @@ def _validate_approxDP_and_adjust_budget( f"The budget provided was {self.budget}." ) return + elif mechanism in ( + AverageMechanism.LAPLACE, + CountDistinctMechanism.LAPLACE, + CountMechanism.LAPLACE, + StdevMechanism.LAPLACE, + SumMechanism.LAPLACE, + VarianceMechanism.LAPLACE, + ): + warnings.warn( + "When using LAPLACE with an ApproxDPBudget, the delta value of " + "the budget will be replaced with zero." + ) + self.adjusted_budget = ApproxDPBudget(epsilon, 0) + elif mechanism in ( + AverageMechanism.DEFAULT, + CountDistinctMechanism.DEFAULT, + CountMechanism.DEFAULT, + StdevMechanism.DEFAULT, + SumMechanism.DEFAULT, + VarianceMechanism.DEFAULT, + ): + self.adjusted_budget = ApproxDPBudget(epsilon, 0) + elif mechanism is None: + # Quantile has no mechanism + self.adjusted_budget = ApproxDPBudget(epsilon, 0) else: - if mechanism in ( - AverageMechanism.LAPLACE, - CountDistinctMechanism.LAPLACE, - CountMechanism.LAPLACE, - StdevMechanism.LAPLACE, - SumMechanism.LAPLACE, - VarianceMechanism.LAPLACE, - ): - warnings.warn( - "When using LAPLACE with an ApproxDPBudget, the delta value of " - "the budget will be replaced with zero." - ) - self.adjusted_budget = ApproxDPBudget(epsilon, 0) - elif mechanism in ( - AverageMechanism.DEFAULT, - CountDistinctMechanism.DEFAULT, - CountMechanism.DEFAULT, - StdevMechanism.DEFAULT, - SumMechanism.DEFAULT, - VarianceMechanism.DEFAULT, - ): - self.adjusted_budget = ApproxDPBudget(epsilon, 0) - elif mechanism is None: - # Quantile has no mechanism - self.adjusted_budget = ApproxDPBudget(epsilon, 0) - else: - raise AnalyticsInternalError(f"Unknown mechanism {mechanism}.") + raise AnalyticsInternalError(f"Unknown mechanism {mechanism}.") def _validate_measurement(self, measurement: Measurement, mid_stability: sp.Expr): """Validate a measurement.""" diff --git a/src/tmlt/analytics/_query_expr_compiler/_base_transformation_visitor.py b/src/tmlt/analytics/_query_expr_compiler/_base_transformation_visitor.py index 84b41bb8..74733c20 100644 --- a/src/tmlt/analytics/_query_expr_compiler/_base_transformation_visitor.py +++ b/src/tmlt/analytics/_query_expr_compiler/_base_transformation_visitor.py @@ -1082,7 +1082,7 @@ def _get_replace_with( else: # Check that all columns exist for col in replace_with: - if not col in analytics_schema: + if col not in analytics_schema: raise ValueError( f"Cannot replace values in column {col}, because it is not in" " the schema" @@ -1555,9 +1555,8 @@ def visit_enforce_constraint(self, expr: EnforceConstraint) -> Output: child_transformation, child_ref, child_constraints = self._visit_child( expr.child ) - # pylint: disable=protected-access transformation, ref = expr.constraint._enforce(child_transformation, child_ref) - # pylint: enable=protected-access + return self.Output( transformation, ref, diff --git a/src/tmlt/analytics/_query_expr_compiler/_measurement_visitor.py b/src/tmlt/analytics/_query_expr_compiler/_measurement_visitor.py index 43e6c60e..2ca65086 100644 --- a/src/tmlt/analytics/_query_expr_compiler/_measurement_visitor.py +++ b/src/tmlt/analytics/_query_expr_compiler/_measurement_visitor.py @@ -63,9 +63,7 @@ def _handle_enforce( **kwargs, ) -> Tuple[Transformation, TableReference]: """Enforce a constraint after a child transformation.""" - return constraint._enforce( # pylint: disable=protected-access - child_transformation, child_ref, **kwargs - ) + return constraint._enforce(child_transformation, child_ref, **kwargs) def visit_get_groups(self, expr: GetGroups) -> Tuple[Measurement, NoiseInfo]: """Create a measurement from a GetGroups query expression.""" diff --git a/src/tmlt/analytics/_transformation_utils.py b/src/tmlt/analytics/_transformation_utils.py index bd65b8aa..8db813ec 100644 --- a/src/tmlt/analytics/_transformation_utils.py +++ b/src/tmlt/analytics/_transformation_utils.py @@ -3,8 +3,6 @@ # SPDX-License-Identifier: Apache-2.0 # Copyright Tumult Labs 2025 -# pylint: disable=unused-argument - from typing import Callable, Dict, Optional, Tuple, Type, cast from tmlt.core.domains.base import Domain diff --git a/src/tmlt/analytics/binning_spec.py b/src/tmlt/analytics/binning_spec.py index 53b8b0e9..8d46e850 100644 --- a/src/tmlt/analytics/binning_spec.py +++ b/src/tmlt/analytics/binning_spec.py @@ -105,17 +105,16 @@ def _default_bin_names( f"({bin_edge_strs[i]}, {bin_edge_strs[i+1]}]" for i in range(len(bin_edges) - 1) ] + elif include_edges: + return [ + f"[{bin_edge_strs[i]}, {bin_edge_strs[i+1]})" + for i in range(len(bin_edges) - 2) + ] + [f"[{bin_edge_strs[-2]}, {bin_edge_strs[-1]}]"] else: - if include_edges: - return [ - f"[{bin_edge_strs[i]}, {bin_edge_strs[i+1]})" - for i in range(len(bin_edges) - 2) - ] + [f"[{bin_edge_strs[-2]}, {bin_edge_strs[-1]}]"] - else: - return [ - f"[{bin_edge_strs[i]}, {bin_edge_strs[i+1]})" - for i in range(len(bin_edges) - 1) - ] + return [ + f"[{bin_edge_strs[i]}, {bin_edge_strs[i+1]})" + for i in range(len(bin_edges) - 1) + ] @dataclass(frozen=True, init=False, eq=False, repr=False) @@ -234,10 +233,8 @@ def __init__( raise ValueError(f"Invalid bin names: {e}") from e # This typecheck cannot be done safely with isinstance because datetime # is a subclass of date. - if ( - # pylint: disable=unidiomatic-typecheck - nan_bin is not None - and type(nan_bin) != column_type_to_py_type(column_descriptor.column_type) + if nan_bin is not None and type(nan_bin) != column_type_to_py_type( + column_descriptor.column_type ): raise ValueError("NaN bin name must have the same type as other bin names") diff --git a/src/tmlt/analytics/config.py b/src/tmlt/analytics/config.py index 6b62fde9..ee805250 100644 --- a/src/tmlt/analytics/config.py +++ b/src/tmlt/analytics/config.py @@ -114,7 +114,7 @@ class Config: _instance = None - def __new__(cls, *args, **kwargs): # noqa: D102 + def __new__(cls, *args, **kwargs): # Enforces that Config is a singleton. # No docstring to prevent this from showing up in docs. if not cls._instance: diff --git a/src/tmlt/analytics/keyset/_keyset.py b/src/tmlt/analytics/keyset/_keyset.py index e7c39022..93c89e26 100644 --- a/src/tmlt/analytics/keyset/_keyset.py +++ b/src/tmlt/analytics/keyset/_keyset.py @@ -207,11 +207,11 @@ def _detect(columns: Sequence[str]) -> KeySetPlan: # Pydocstyle doesn't seem to understand overloads, so we need to disable the # check that a docstring exists for them. @overload - def __mul__(self, other: KeySet) -> KeySet: # noqa: D105 + def __mul__(self, other: KeySet) -> KeySet: ... @overload - def __mul__(self, other: KeySetPlan) -> KeySetPlan: # noqa: D105 + def __mul__(self, other: KeySetPlan) -> KeySetPlan: ... def __mul__(self, other): @@ -320,11 +320,11 @@ def __getitem__(self, desired_columns: Union[str, Sequence[str]]) -> KeySet: # Pydocstyle doesn't seem to understand overloads, so we need to disable the # check that a docstring exists for them. @overload - def join(self, other: KeySet) -> KeySet: # noqa: D105 + def join(self, other: KeySet) -> KeySet: ... @overload - def join(self, other: KeySetPlan) -> KeySetPlan: # noqa: D105 + def join(self, other: KeySetPlan) -> KeySetPlan: ... def join(self, other): @@ -353,13 +353,11 @@ def join(self, other): ) if isinstance(other, KeySet): return KeySet( - # pylint: disable-next=protected-access Join(self._op_tree, other._op_tree), columns=list(dict.fromkeys(self.columns() + other.columns())), ) else: return KeySetPlan( - # pylint: disable-next=protected-access Join(self._op_tree, other._op_tree), columns=list(dict.fromkeys(self.columns() + other.columns())), ) @@ -413,7 +411,6 @@ def columns(self) -> list[str]: return list(self._columns) def schema(self) -> dict[str, ColumnDescriptor]: - # pylint: disable=line-too-long """Returns the KeySet's schema. Example: @@ -427,7 +424,6 @@ def schema(self) -> dict[str, ColumnDescriptor]: {'A': ColumnDescriptor(column_type=ColumnType.VARCHAR, allow_null=False, allow_nan=False, allow_inf=False), 'B': ColumnDescriptor(column_type=ColumnType.INTEGER, allow_null=True, allow_nan=False, allow_inf=False)} """ - # pylint: enable=line-too-long schema = self._op_tree.schema() return {c: schema[c] for c in self.columns()} # Reorder to match self.columns() @@ -491,7 +487,7 @@ def is_equivalent(self, other: Union[KeySet, KeySetPlan]) -> Optional[bool]: if not isinstance(other, KeySet): return False - if self._op_tree == other._op_tree: # pylint: disable=protected-access + if self._op_tree == other._op_tree: return True # Differing column nullability doesn't necessarily mean that two KeySets @@ -729,7 +725,6 @@ def join(self, other: Union[KeySet, KeySetPlan]) -> KeySetPlan: ) return KeySetPlan( - # pylint: disable-next=protected-access Join(self._op_tree, other._op_tree), columns=list(dict.fromkeys(self.columns() + other.columns())), ) @@ -777,7 +772,7 @@ def is_equivalent(self, other: Union[KeySet, KeySetPlan]) -> Optional[bool]: if not isinstance(other, KeySetPlan): return False - if self._op_tree == other._op_tree: # pylint: disable=protected-access + if self._op_tree == other._op_tree: return True if self.columns() != other.columns(): diff --git a/src/tmlt/analytics/privacy_budget.py b/src/tmlt/analytics/privacy_budget.py index a8d32d4d..2b9680b2 100644 --- a/src/tmlt/analytics/privacy_budget.py +++ b/src/tmlt/analytics/privacy_budget.py @@ -231,7 +231,7 @@ class ApproxDPBudget(PrivacyBudget): This privacy definition is also known as (ε, δ)-differential privacy, and the associated privacy parameters are epsilon and delta. The formal definition can be found `here `__. - """ # pylint: disable=line-too-long + """ _epsilon: ExactNumber _delta: ExactNumber @@ -567,7 +567,6 @@ def _get_adjusted_budget( requested_privacy_budget: The requested privacy budget. remaining_privacy_budget: How much privacy budget we have left. """ - # pylint: disable=protected-access if isinstance(requested_privacy_budget, PureDPBudget) and isinstance( remaining_privacy_budget, PureDPBudget ): @@ -594,7 +593,7 @@ def _get_adjusted_budget( requested_privacy_budget._rho, remaining_privacy_budget._rho ) return RhoZCDPBudget(adjusted_rho) - # pylint: enable=protected-access + else: raise ValueError( "Unable to compute a privacy budget with the requested budget " diff --git a/src/tmlt/analytics/query_builder.py b/src/tmlt/analytics/query_builder.py index c879bd23..b33d1974 100644 --- a/src/tmlt/analytics/query_builder.py +++ b/src/tmlt/analytics/query_builder.py @@ -69,20 +69,20 @@ # Override exported names to include ColumnType and ColumnDescriptor, as well as # types from _query_expr. __all__ = [ - "Row", - "QueryBuilder", - "GroupedQueryBuilder", - "ColumnDescriptor", - "ColumnType", "AnalyticsDefault", "AverageMechanism", - "CountMechanism", + "ColumnDescriptor", + "ColumnType", "CountDistinctMechanism", + "CountMechanism", + "GroupbyCountQuery", + "GroupedQueryBuilder", + "Query", + "QueryBuilder", + "Row", "StdevMechanism", "SumMechanism", "VarianceMechanism", - "Query", - "GroupbyCountQuery", ] Row = Dict[str, Any] @@ -145,7 +145,7 @@ def _is_equivalent(self, other: Any) -> bool: return False query = self._query_expr - other_query = other._query_expr # pylint: disable=protected-access + other_query = other._query_expr return _query_expr_recursive_equivalence(query, other_query) @@ -290,11 +290,11 @@ def __init__(self, source_id: str): self._source_id: str = source_id self._query_expr: QueryExpr = PrivateSource(source_id) - def clone(self) -> QueryBuilder: # noqa: D102 + def clone(self) -> QueryBuilder: # Returns a new QueryBuilder with the same partial query as the current one. # No docstring to prevent this from showing in docs. builder = QueryBuilder(self._source_id) - builder._query_expr = self._query_expr # pylint: disable=protected-access + builder._query_expr = self._query_expr return builder def join_public( @@ -467,7 +467,6 @@ def join_private( truncation_strategy_right: Optional[TruncationStrategy.Type] = None, join_columns: Optional[Sequence[str]] = None, ) -> "QueryBuilder": - # pylint: disable=protected-access """Join the table with another :class:`QueryBuilder`. The current query can also be joined with a named private table @@ -2810,7 +2809,6 @@ def __init__(self, source_id, query_expr, groupby_keys) -> None: Do not construct directly; use :func:`~QueryBuilder.groupby`. """ - # pylint: disable=pointless-string-statement """ Args: source_id: The source id used in the query_expr. diff --git a/src/tmlt/analytics/session.py b/src/tmlt/analytics/session.py index ff34208f..3f193784 100644 --- a/src/tmlt/analytics/session.py +++ b/src/tmlt/analytics/session.py @@ -7,9 +7,7 @@ from typing import Any, Dict, List, Optional, Tuple, Type, Union, cast from warnings import warn -import pandas as pd # pylint: disable=unused-import import sympy as sp -from pyspark.sql import SparkSession # pylint: disable=unused-import from pyspark.sql import DataFrame from tabulate import tabulate from tmlt.core.domains.collections import DictDomain @@ -89,10 +87,9 @@ RhoZCDPBudget, _get_adjusted_budget, ) -from tmlt.analytics.protected_change import ( # pylint: disable=unused-import +from tmlt.analytics.protected_change import ( AddMaxRows, AddMaxRowsInMaxGroups, - AddOneRow, AddRowsWithID, ProtectedChange, ) @@ -269,7 +266,7 @@ def build(self) -> "Session": source_id: dataframe for source_id, (dataframe, _) in self._private_dataframes.items() } - sess = self.get_class_type()._from_neighboring_relation( # pylint: disable=protected-access + sess = self.get_class_type()._from_neighboring_relation( self._privacy_budget, tables, neighboring_relation ) # check list of ARK identifiers against session's ID spaces @@ -305,7 +302,6 @@ def __init__( @nodoc """ - # pylint: disable=pointless-string-statement """ Args documented for internal use. accountant: A PrivacyAccountant. @@ -328,7 +324,6 @@ def __init__( NamedTable(t): [] for t in self.private_sources } - # pylint: disable=line-too-long @classmethod @typechecked def from_dataframe( @@ -387,7 +382,6 @@ def from_dataframe( specifying what changes to the input data the resulting :class:`Session` should protect. """ - # pylint: enable=line-too-long session_builder = ( cls.Builder() .with_privacy_budget(privacy_budget=privacy_budget) @@ -407,7 +401,6 @@ def _create_accountant_from_neighboring_relation( private_sources: Dict[str, DataFrame], relation: NeighboringRelation, ) -> Tuple[PrivacyAccountant, Any]: - # pylint: disable=protected-access output_measure: Union[PureDP, ApproxDP, RhoZCDP] sympy_budget: Union[sp.Expr, Tuple[sp.Expr, sp.Expr]] if isinstance(privacy_budget, PureDPBudget): @@ -428,7 +421,6 @@ def _create_accountant_from_neighboring_relation( elif isinstance(privacy_budget, RhoZCDPBudget): output_measure = RhoZCDP() sympy_budget = privacy_budget._rho.expr - # pylint: enable=protected-access else: raise ValueError( f"Unsupported PrivacyBudget variant: {type(privacy_budget)}" @@ -620,7 +612,6 @@ def describe( obj: The table or query to be described, or None to describe the whole Session. """ - # pylint: disable=protected-access if obj is None: print(self._describe_self()) elif isinstance(obj, GroupedQueryBuilder): @@ -634,7 +625,6 @@ def describe( print(self._describe_query_obj(QueryBuilder(obj)._query_expr)) else: assert_never(obj) - # pylint: enable=protected-access def _describe_self(self) -> str: """Describes the current state of this session.""" @@ -922,7 +912,6 @@ def _catalog(self) -> Catalog: ) return catalog - # pylint: disable=line-too-long @typechecked def add_public_dataframe(self, source_id: str, dataframe: DataFrame): """Adds a public data source to the session. @@ -972,7 +961,6 @@ def add_public_dataframe(self, source_id: str, dataframe: DataFrame): source_id: The name of the public data source. dataframe: The public data source corresponding to the ``source_id``. """ - # pylint: enable=line-too-long assert_is_identifier(source_id) if source_id in self.public_sources or source_id in self.private_sources: raise ValueError(f"This session already has a table named '{source_id}'.") @@ -1057,11 +1045,10 @@ def _noise_info( [{'noise_mechanism': <_NoiseMechanism.GEOMETRIC: 2>, 'noise_parameter': 2}] """ if isinstance(query_expr, Query): - query_expr = query_expr._query_expr # pylint: disable=protected-access + query_expr = query_expr._query_expr _, _, noise_info = self._compile_and_get_info(query_expr, privacy_budget) return list(iter(noise_info)) - # pylint: disable=line-too-long def evaluate( self, query_expr: Query, @@ -1118,9 +1105,8 @@ def evaluate( query_expr: One query expression to answer. privacy_budget: The privacy budget used for the query. """ - # pylint: enable=line-too-long check_type(query_expr, Query) - query = query_expr._query_expr # pylint: disable=protected-access + query = query_expr._query_expr measurement, adjusted_budget, _ = self._compile_and_get_info( query, privacy_budget ) @@ -1166,7 +1152,6 @@ def evaluate( "for more information." ) from e - # pylint: disable=line-too-long @typechecked def create_view( self, @@ -1234,13 +1219,12 @@ def create_view( source_id: The name, or unique identifier, of the view. cache: Whether or not to cache the view. """ - # pylint: enable=line-too-long assert_is_identifier(source_id) self._activate_accountant() if source_id in self.private_sources or source_id in self.public_sources: raise ValueError(f"Table '{source_id}' already exists.") - query = query_expr._query_expr # pylint: disable=protected-access + query = query_expr._query_expr transformation, ref, constraints = QueryExprCompiler( self._output_measure @@ -1315,7 +1299,7 @@ def _create_partition_constraint( behavior of constraints, not for code maintainability. """ if isinstance(constraint, MaxGroupsPerID): - return constraint._enforce( # pylint: disable=protected-access + return constraint._enforce( child_transformation=child_transformation, child_ref=child_ref, update_metric=True, @@ -1326,7 +1310,7 @@ def _create_partition_constraint( raise AnalyticsInternalError( f"Expected MaxGroupsPerID or MaxRowsPerID constraints, but got {constraint} instead." ) - return constraint._enforce( # pylint: disable=protected-access + return constraint._enforce( child_transformation=child_transformation, child_ref=child_ref, update_metric=True, @@ -1482,7 +1466,6 @@ def _create_partition_transformation( ) return transformation - # pylint: disable=line-too-long @typechecked def partition_and_create( self, @@ -1576,7 +1559,6 @@ def partition_and_create( splits: Mapping of split name to value of partition. Split name is ``source_id`` in new session. """ - # pylint: enable=line-too-long # If you remove this if-block, mypy will complain if not ( isinstance(self._accountant.privacy_budget, ExactNumber) diff --git a/src/tmlt/analytics/utils.py b/src/tmlt/analytics/utils.py index 0658d62d..722e0f7c 100644 --- a/src/tmlt/analytics/utils.py +++ b/src/tmlt/analytics/utils.py @@ -65,9 +65,7 @@ def check_installation(): # If Spark is broken, the Core cleanup atexit hook will fail, which # produces some additional output the user doesn't need to see in # this case. - atexit.unregister( - core_cleanup._cleanup_temp # pylint: disable=protected-access - ) + atexit.unregister(core_cleanup._cleanup_temp) if ( e.args and isinstance(e.args[0], str) @@ -124,8 +122,8 @@ def check_installation(): ) if ( len(result.columns) != 2 - or not "A" in result.columns - or not "count" in result.columns + or "A" not in result.columns + or "count" not in result.columns ): raise RuntimeError( "Expected output to have columns 'A' and 'count', but instead it had" @@ -153,7 +151,7 @@ def check_installation(): "Installation check complete. Tumult Analytics appears to be properly" " installed." ) - except Exception as e: # pylint: disable=broad-except + except Exception as e: print(" FAILED\n") if not str(e).startswith("It looks like the analytics session"): raise RuntimeError( diff --git a/test/conftest.py b/test/conftest.py index 9df58735..3b989208 100644 --- a/test/conftest.py +++ b/test/conftest.py @@ -392,7 +392,7 @@ def create_empty_input(domain: SparkDataFrameDomain) -> DataFrame: ... -def create_empty_input(domain): # pylint: disable=missing-type-doc +def create_empty_input(domain): """Returns an empty input for a given domain. Args: diff --git a/test/system/session/ids/test_constraint_propagation.py b/test/system/session/ids/test_constraint_propagation.py index 266af448..ec34b19b 100644 --- a/test/system/session/ids/test_constraint_propagation.py +++ b/test/system/session/ids/test_constraint_propagation.py @@ -32,11 +32,9 @@ def _test_propagation(query, expected_constraints, session): """Verify that the table resulting from a query has the expected constraints.""" session.create_view(query, "view", cache=False) - # pylint: disable=protected-access assert set(session._table_constraints[NamedTable("view")]) == set( expected_constraints ) - # pylint: enable=protected-access @pytest.mark.parametrize( diff --git a/test/system/session/ids/test_count_distinct_optimization.py b/test/system/session/ids/test_count_distinct_optimization.py index 0fa36d88..e7df7dc4 100644 --- a/test/system/session/ids/test_count_distinct_optimization.py +++ b/test/system/session/ids/test_count_distinct_optimization.py @@ -144,9 +144,8 @@ def test_insufficient_constraints(query: QueryBuilder, session): ) def test_noise_scale_puredp(query: QueryExpr, expected_noise: List[float], session): """Noise scales are adjusted correctly for different truncations with pure DP.""" - # pylint: disable=protected-access noise_info = session._noise_info(query, PureDPBudget(1)) - # pylint: enable=protected-access + noise = [info["noise_parameter"] for info in noise_info] assert noise == expected_noise @@ -190,8 +189,7 @@ def test_noise_scale_puredp(query: QueryExpr, expected_noise: List[float], sessi ) def test_noise_scale_zcdp(query: QueryExpr, expected_noise: List[float], session): """Noise scales are adjusted correctly for different truncations with zCDP.""" - # pylint: disable=protected-access noise_info = session._noise_info(query, RhoZCDPBudget(1)) - # pylint: enable=protected-access + noise = [info["noise_parameter"] for info in noise_info] assert noise == expected_noise diff --git a/test/system/session/ids/test_id_col_operations.py b/test/system/session/ids/test_id_col_operations.py index a0069deb..d5efb410 100644 --- a/test/system/session/ids/test_id_col_operations.py +++ b/test/system/session/ids/test_id_col_operations.py @@ -238,7 +238,7 @@ def test_various_session_builds( "id_b2", _session_data[df4], protected_change=AddRowsWithID("id", "b") ) ).build() - # pylint: disable=protected-access + for table_collection, ark_metric in sess._input_metric.key_to_metric.items(): dict_domain = sess._input_domain.key_to_domain[table_collection] assert isinstance(ark_metric, AddRemoveKeys) @@ -264,4 +264,3 @@ def test_various_session_builds( .allow_null for table_id, key_column in ark_metric.df_to_key_column.items() ) - # pylint: enable=protected-access diff --git a/test/system/session/ids/test_l0_linf_truncation.py b/test/system/session/ids/test_l0_linf_truncation.py index acd5db74..24850b97 100644 --- a/test/system/session/ids/test_l0_linf_truncation.py +++ b/test/system/session/ids/test_l0_linf_truncation.py @@ -410,9 +410,7 @@ def test_mismatched_grouping_columns(session): ) def test_noise_scale_puredp(query: QueryExpr, expected_noise: List[float], session): """Noise scales are adjusted correctly for different truncations with pure DP.""" - # pylint: disable=protected-access noise_info = session._noise_info(query, PureDPBudget(1)) - # pylint: enable=protected-access noise = [info["noise_parameter"] for info in noise_info] assert noise == expected_noise @@ -485,9 +483,7 @@ def test_noise_scale_puredp(query: QueryExpr, expected_noise: List[float], sessi ) def test_noise_scale_zcdp(query: QueryExpr, expected_noise: List[float], session): """Noise scales are adjusted correctly for different truncations with zCDP.""" - # pylint: disable=protected-access noise_info = session._noise_info(query, RhoZCDPBudget(1)) - # pylint: enable=protected-access noise = [info["noise_parameter"] for info in noise_info] assert noise == expected_noise @@ -556,10 +552,7 @@ def test_constraint_selection_puredp( for c in constraints: base_query.enforce(c) query = base_query.groupby(group).count() if group else base_query.count() - - # pylint: disable=protected-access noise_info = session._noise_info(query, PureDPBudget(1)) - # pylint: enable=protected-access noise = [info["noise_parameter"] for info in noise_info] assert noise == expected_noise @@ -641,9 +634,6 @@ def test_constraint_selection_zcdp( for c in constraints: base_query.enforce(c) query = base_query.groupby(group).count() if group else base_query.count() - - # pylint: disable=protected-access noise_info = session._noise_info(query, RhoZCDPBudget(1)) - # pylint: enable=protected-access noise = [info["noise_parameter"] for info in noise_info] assert noise == expected_noise diff --git a/test/system/session/ids/test_l1_truncation.py b/test/system/session/ids/test_l1_truncation.py index a1d8aa73..2f9be2dc 100644 --- a/test/system/session/ids/test_l1_truncation.py +++ b/test/system/session/ids/test_l1_truncation.py @@ -304,9 +304,7 @@ def test_stdev_grouped( ) def test_noise_scale_puredp(query: QueryExpr, expected_noise: List[float], session): """Noise scales are adjusted correctly for different truncations with pure DP.""" - # pylint: disable=protected-access noise_info = session._noise_info(query, PureDPBudget(1)) - # pylint: enable=protected-access noise = [info["noise_parameter"] for info in noise_info] assert noise == expected_noise @@ -338,8 +336,6 @@ def test_noise_scale_puredp(query: QueryExpr, expected_noise: List[float], sessi ) def test_noise_scale_zcdp(query: QueryExpr, expected_noise: List[float], session): """Noise scales are adjusted correctly for different truncations with zCDP.""" - # pylint: disable=protected-access noise_info = session._noise_info(query, RhoZCDPBudget(1)) - # pylint: enable=protected-access noise = [info["noise_parameter"] for info in noise_info] assert noise == expected_noise diff --git a/test/system/session/ids/test_partition.py b/test/system/session/ids/test_partition.py index 1a34c649..ee9049b2 100644 --- a/test/system/session/ids/test_partition.py +++ b/test/system/session/ids/test_partition.py @@ -137,7 +137,7 @@ def test_partition_and_create_with_MaxRowsPerID(session, table_stability): assert_frame_equal_with_sort( answer_session3.toPandas(), pd.DataFrame({"count": [1]}) ) - # pylint: disable=protected-access + assert session2._input_metric == DictMetric( {NamedTable("part0"): SymmetricDifference()} ) @@ -146,7 +146,6 @@ def test_partition_and_create_with_MaxRowsPerID(session, table_stability): ) assert session2._accountant.d_in == {NamedTable("part0"): table_stability} assert session3._accountant.d_in == {NamedTable("part1"): table_stability} - # pylint: enable=protected-access @pytest.mark.parametrize( @@ -193,7 +192,7 @@ def test_partition_and_create_with_MaxGroupsPerID(session, table_stability): assert_frame_equal_with_sort( answer_session3.toPandas(), pd.DataFrame({"count": [1]}) ) - # pylint: disable=protected-access + assert session2._input_metric == DictMetric( {TableCollection("a"): CoreAddRemoveKeys({NamedTable("part0"): "id"})} ) @@ -202,4 +201,3 @@ def test_partition_and_create_with_MaxGroupsPerID(session, table_stability): ) assert session2._accountant.d_in == {TableCollection("a"): table_stability} assert session3._accountant.d_in == {TableCollection("a"): table_stability} - # pylint: enable=protected-access diff --git a/test/system/session/mixed/test_mixed_session.py b/test/system/session/mixed/test_mixed_session.py index 74362d73..7a5a2c66 100644 --- a/test/system/session/mixed/test_mixed_session.py +++ b/test/system/session/mixed/test_mixed_session.py @@ -1,7 +1,8 @@ """Tests for Sessions that employ a mixture of IDs and non-IDs features. These are not meant to be exhaustive, but rather to ensure that the Session -functions properly when used with a mixture of IDs and non-IDs protected changes.""" +functions properly when used with a mixture of IDs and non-IDs protected changes. +""" # SPDX-License-Identifier: Apache-2.0 # Copyright Tumult Labs 2025 @@ -33,18 +34,16 @@ def test_view_constraint(session): .enforce(MaxRowsPerGroupPerID("group", 1)) ) session.create_view(query, "view", cache=False) - # pylint: disable=protected-access + assert session._table_constraints[NamedTable("view")] == [ MaxRowsPerID(1), MaxGroupsPerID("group", 1), MaxRowsPerGroupPerID("group", 1), ] - # pylint: enable=protected-access session.delete_view("view") - # pylint: disable=protected-access + assert NamedTable("view") not in session._table_constraints - # pylint: enable=protected-access # Test creating view, then doing (1) immediate aggregation and diff --git a/test/system/session/rows/test_add_max_rows.py b/test/system/session/rows/test_add_max_rows.py index 3b7cbb2c..ceff9f25 100644 --- a/test/system/session/rows/test_add_max_rows.py +++ b/test/system/session/rows/test_add_max_rows.py @@ -79,9 +79,7 @@ def test_queries_privacy_budget_infinity_puredp( expected_df: The expected answer. """ if expected_expr is not None: - # pylint: disable=protected-access query_expr = query_expr_or_builder._query_expr - # pylint: enable=protected-access assert query_expr == expected_expr session = Session.from_dataframe( privacy_budget=PureDPBudget(float("inf")), @@ -159,9 +157,8 @@ def test_queries_privacy_budget_infinity_rhozcdp( expected_df: The expected answer. """ if expected_expr is not None: - # pylint: disable=protected-access query_expr = query_expr_or_builder._query_expr - # pylint: enable=protected-access + assert query_expr == expected_expr session = Session.from_dataframe( @@ -257,9 +254,9 @@ def test_noise_info( dataframe=self.sdf, protected_change=AddOneRow(), ) - # pylint: disable=protected-access + info = session._noise_info(query_expr, query_budget) - # pylint: enable=protected-access + assert info == expected @pytest.mark.parametrize( @@ -442,7 +439,6 @@ def duplicate_rows(_: Row) -> List[Row]: ) def test_get_bounds_inf_budget(self, spark, data): """Test that the get_bounds produces reasonable bounds.""" - sdf = spark.createDataFrame(data) session = Session.from_dataframe( privacy_budget=PureDPBudget(float("inf")), @@ -492,7 +488,6 @@ def test_get_bounds_inf_budget(self, spark, data): ) def test_get_bounds_inf_budget_sum(self, spark, data): """Test that the bounds from get_bounds produce a reasonable sum.""" - sdf = spark.createDataFrame(data) session = Session.from_dataframe( privacy_budget=PureDPBudget(float("inf")), @@ -559,7 +554,6 @@ def test_get_bounds_invalid_columns( self, spark, data, column, error_type, message, protected_change ): """Test that get_bounds throws appropriate errors.""" - sdf = spark.createDataFrame(data) session = Session.from_dataframe( privacy_budget=PureDPBudget(float("inf")), @@ -776,8 +770,8 @@ def test_partition_and_create_approxDP_session_pureDP_partition( remaining_budget: PrivacyBudget, ): """Tests using :func:`partition_and_create` to create a new ApproxDP session - that supports PureDP partitions.""" - + that supports PureDP partitions. + """ is_approxDP_starting_budget = isinstance(starting_budget, ApproxDPBudget) if is_approxDP_starting_budget and isinstance(partition_budget, PureDPBudget): partition_budget = ApproxDPBudget(partition_budget.value, 0) @@ -985,7 +979,6 @@ def test_partition_execution_order( "X": ColumnDescriptor(ColumnType.INTEGER), } - # pylint: disable=protected-access assert session1._accountant.state == PrivacyAccountantState.WAITING_FOR_CHILDREN assert session2._accountant.state == PrivacyAccountantState.ACTIVE assert session3._accountant.state == PrivacyAccountantState.WAITING_FOR_SIBLING @@ -1019,8 +1012,6 @@ def test_partition_execution_order( ): session3.create_view(select_query3, "select_view_again", cache=False) - # pylint: enable=protected-access - @pytest.mark.parametrize( "budget", [(PureDPBudget(20)), (ApproxDPBudget(20, 0.5)), (RhoZCDPBudget(20))] ) @@ -1085,7 +1076,6 @@ def test_partition_on_nongrouping_column(self, budget: PrivacyBudget): ) def test_create_view_composed(self, budget: PrivacyBudget): """Composing views with :func:`create_view` works.""" - session = Session.from_dataframe( privacy_budget=budget, source_id="private", @@ -1099,9 +1089,8 @@ def test_create_view_composed(self, budget: PrivacyBudget): max_rows=2, ) session.create_view(transformation_query1, "flatmap1", cache=False) - # pylint: disable=protected-access + assert session._accountant.d_in[NamedTable("flatmap1")] == 2 - # pylint: enable=protected-access transformation_query2 = QueryBuilder("flatmap1").flat_map( f=lambda row: [{}, {}], @@ -1110,9 +1099,8 @@ def test_create_view_composed(self, budget: PrivacyBudget): max_rows=3, ) session.create_view(transformation_query2, "flatmap2", cache=False) - # pylint: disable=protected-access + assert session._accountant.d_in[NamedTable("flatmap2")] == 6 - # pylint: enable=protected-access @pytest.mark.parametrize( "budget", [(PureDPBudget(10)), (ApproxDPBudget(10, 0.5)), (RhoZCDPBudget(10))] @@ -1199,7 +1187,6 @@ def test_create_view_composed_correct_answer( def test_caching(self, spark): """Tests that caching works as expected.""" - # pylint: disable=protected-access session = Session.from_dataframe( privacy_budget=PureDPBudget(float("inf")), source_id="private", diff --git a/test/system/session/rows/test_add_max_rows_in_max_groups.py b/test/system/session/rows/test_add_max_rows_in_max_groups.py index 9a67b71e..8476b5ef 100644 --- a/test/system/session/rows/test_add_max_rows_in_max_groups.py +++ b/test/system/session/rows/test_add_max_rows_in_max_groups.py @@ -1,10 +1,10 @@ """Tests for Session with the AddMaxRowsInMaxGroups ProtectedChange. - Note that these tests are not intended to be exhaustive. They are intended to be a sanity check that the Session is working correctly with AddMaxRowsInMaxGroups. More thorough tests for Session are in -test/system/session/rows/test_add_max_rows.py.""" +test/system/session/rows/test_add_max_rows.py. +""" # SPDX-License-Identifier: Apache-2.0 # Copyright Tumult Labs 2025 @@ -162,7 +162,7 @@ def test_noise_info( "B", max_groups=1, max_rows_per_group=1 ), ) - # pylint: disable=protected-access + info = session._noise_info(query_expr, query_budget) - # pylint: enable=protected-access + assert info == expected diff --git a/test/system/session/rows/test_invalid.py b/test/system/session/rows/test_invalid.py index 8cb810b9..98a95228 100644 --- a/test/system/session/rows/test_invalid.py +++ b/test/system/session/rows/test_invalid.py @@ -55,7 +55,7 @@ def test_invalid_queries_evaluate( error_type: Type[Exception], expected_error_msg: str, ): - """evaluate raises error on invalid queries.""" + """Evaluate raises error on invalid queries.""" mock_accountant = Mock() mock_accountant.output_measure = PureDP() mock_accountant.input_metric = DictMetric( @@ -145,7 +145,7 @@ def test_format_insufficient_budget_msg( def test_invalid_privacy_budget_evaluate_and_create( self, output_measure: Union[PureDP, RhoZCDP] ): - """evaluate and create functions raise error on invalid privacy_budget.""" + """Evaluate and create functions raise error on invalid privacy_budget.""" one_budget: Union[PureDPBudget, ApproxDPBudget, RhoZCDPBudget] two_budget: Union[PureDPBudget, ApproxDPBudget, RhoZCDPBudget] if output_measure == PureDP(): diff --git a/test/unit/keysets/test_cross_join.py b/test/unit/keysets/test_cross_join.py index 022e4e85..8184af07 100644 --- a/test/unit/keysets/test_cross_join.py +++ b/test/unit/keysets/test_cross_join.py @@ -202,7 +202,6 @@ def test_valid( assert_dataframe_equal(ks.dataframe(), expected_df) -# pylint: disable=protected-access @parametrize( Case("left_plan")( left=KeySet._detect(["A"]), @@ -235,7 +234,6 @@ def test_valid( expected_columns=["B", "A"], ), ) -# pylint: enable=protected-access def test_valid_plan( left: Union[KeySet, KeySetPlan], right: Union[KeySet, KeySetPlan], diff --git a/test/unit/keysets/test_decomposition.py b/test/unit/keysets/test_decomposition.py index f247015f..1ea8d020 100644 --- a/test/unit/keysets/test_decomposition.py +++ b/test/unit/keysets/test_decomposition.py @@ -213,7 +213,6 @@ def test_valid( expected_factors: list[KeySet], expected_subtracted_values: list[KeySet], ): - # pylint: disable-next=protected-access factors, subtracted_values = ks._decompose(split_columns) _assert_keyset_sequence_equivalent( diff --git a/test/unit/keysets/test_detect.py b/test/unit/keysets/test_detect.py index c101f618..e7724ba1 100644 --- a/test/unit/keysets/test_detect.py +++ b/test/unit/keysets/test_detect.py @@ -14,11 +14,11 @@ def test_detect(): """KeySet.detect works as expected.""" - ks = KeySet._detect(["A", "B"]) # pylint: disable=protected-access + ks = KeySet._detect(["A", "B"]) assert isinstance(ks, KeySetPlan) assert ks.columns() == ["A", "B"] - ks = KeySet._detect(["B", "A"]) # pylint: disable=protected-access + ks = KeySet._detect(["B", "A"]) assert isinstance(ks, KeySetPlan) assert ks.columns() == ["B", "A"] @@ -44,4 +44,4 @@ def test_detect(): def test_invalid(columns: Any, expectation: ContextManager[None]): """Invalid domains are rejected.""" with expectation: - KeySet._detect(columns) # pylint: disable=protected-access + KeySet._detect(columns) diff --git a/test/unit/keysets/test_equivalence.py b/test/unit/keysets/test_equivalence.py index cbac75f3..911860f1 100644 --- a/test/unit/keysets/test_equivalence.py +++ b/test/unit/keysets/test_equivalence.py @@ -3,7 +3,6 @@ # SPDX-License-Identifier: Apache-2.0 # Copyright Tumult Labs 2025 -# pylint: disable=comparison-with-itself from typing import Optional, Union @@ -258,7 +257,6 @@ def test_equivalence_different_schemas(): assert ks1.is_equivalent(ks3) is False -# pylint: disable=protected-access @parametrize( Case("detect_eq")( ks1=KeySet._detect(["A", "B"]), @@ -328,7 +326,6 @@ def test_equivalence_different_schemas(): equivalent={False, None}, ), ) -# pylint: enable=protected-access def test_plan_equivalence( ks1: KeySetPlan, ks2: KeySetPlan, equivalent: Union[None, bool, set[Optional[bool]]] ): @@ -338,7 +335,7 @@ def test_plan_equivalence( assert ks1.is_equivalent(ks1) assert ks2.is_equivalent(ks2) if isinstance(equivalent, set): - assert (ks1 == ks2) in equivalent # pylint: disable=superfluous-parens + assert (ks1 == ks2) in equivalent assert isinstance(ks1 == ks2, bool) assert ks1.is_equivalent(ks2) in equivalent assert ks2.is_equivalent(ks1) in equivalent diff --git a/test/unit/keysets/test_filter.py b/test/unit/keysets/test_filter.py index 482928a6..965a07e4 100644 --- a/test/unit/keysets/test_filter.py +++ b/test/unit/keysets/test_filter.py @@ -111,7 +111,6 @@ def test_valid( assert_dataframe_equal(ks.dataframe(), expected_df) -# pylint: disable=protected-access @parametrize( Case("one_col_str")( base=KeySet._detect(["A"]), @@ -144,7 +143,6 @@ def test_valid( expected_columns=["A", "B"], ), ) -# pylint: enable=protected-access def test_valid_plan( base: KeySetPlan, condition: Callable[[], Union[str, Column]], diff --git a/test/unit/keysets/test_join.py b/test/unit/keysets/test_join.py index 4e82206b..6a4bcc18 100644 --- a/test/unit/keysets/test_join.py +++ b/test/unit/keysets/test_join.py @@ -178,7 +178,6 @@ def test_valid( assert_dataframe_equal(ks.dataframe(), expected_df) -# pylint: disable=protected-access @parametrize( Case("left_plan")( left=KeySet._detect(["A"]), @@ -211,7 +210,6 @@ def test_valid( expected_columns=["B", "C", "A"], ), ) -# pylint: enable=protected-access def test_valid_plan( left: Union[KeySet, KeySetPlan], right: Union[KeySet, KeySetPlan], diff --git a/test/unit/keysets/test_keyset.py b/test/unit/keysets/test_keyset.py index 2a716bf9..63825e80 100644 --- a/test/unit/keysets/test_keyset.py +++ b/test/unit/keysets/test_keyset.py @@ -814,7 +814,7 @@ def test_size_from_df(_, spark, pd_df, expected_size, schema): @pytest.fixture(scope="module") def _eq_hashing_test_data(spark): - "Set up test data." + """Set up test data.""" pdf_ab = pd.DataFrame({"A": ["a1", "a2"], "B": [0, 1]}) df_ab = spark.createDataFrame(pdf_ab) pdf_ac = pd.DataFrame({"A": ["a1", "a2"], "C": [0, 1]}) diff --git a/test/unit/keysets/test_product_keyset.py b/test/unit/keysets/test_product_keyset.py index 8916e6e9..24020fbb 100644 --- a/test/unit/keysets/test_product_keyset.py +++ b/test/unit/keysets/test_product_keyset.py @@ -17,8 +17,6 @@ from ...conftest import assert_frame_equal_with_sort -# pylint: disable=unused-argument - def test_init_with_product_keyset( spark: SparkSession, @@ -392,7 +390,7 @@ def test_size(_, keyset: KeySet, expected: int): @pytest.fixture(scope="module") def _eq_hashing_test_data(spark): - "Set up test data." + """Set up test data.""" df_ab = spark.createDataFrame(pd.DataFrame({"A": ["a1", "a2"], "B": [0, 1]})) df_ij = spark.createDataFrame(pd.DataFrame({"I": ["i1", "i2"], "J": [0, 1]})) df_dc = spark.createDataFrame(pd.DataFrame({"D": ["d1", "d2"], "C": [0, 1]})) diff --git a/test/unit/keysets/test_project.py b/test/unit/keysets/test_project.py index 61019a2f..7b9f59be 100644 --- a/test/unit/keysets/test_project.py +++ b/test/unit/keysets/test_project.py @@ -69,7 +69,7 @@ ), Case("remove_detect_columns")( base=KeySet.from_tuples([(1, 2, 3)], ["A", "B", "C"]) - * KeySet._detect(["D", "E", "F"]), # pylint: disable=protected-access + * KeySet._detect(["D", "E", "F"]), columns=["A", "B"], expected_df=pd.DataFrame({"A": [1], "B": [2]}), expected_schema={ @@ -118,7 +118,6 @@ def test_valid( assert_dataframe_equal(ks.dataframe(), expected_df) -# pylint: disable=protected-access @parametrize( Case("one_column_str")( base=KeySet._detect(["A", "B"]), @@ -147,7 +146,6 @@ def test_valid( columns=["C", "A", "E"], ), ) -# pylint: enable=protected-access def test_valid_plan( base: KeySetPlan, columns: Union[str, Sequence[str]], diff --git a/test/unit/keysets/test_rewrite.py b/test/unit/keysets/test_rewrite.py index 83cd15f4..e0e57e87 100644 --- a/test/unit/keysets/test_rewrite.py +++ b/test/unit/keysets/test_rewrite.py @@ -200,7 +200,6 @@ def test_rewrite_equality( if not allow_unchanged: # Ensure that rewriting actually happened - # pylint: disable-next=protected-access assert ks_rewritten._op_tree != ks_original._op_tree assert ks_rewritten.columns() == ks_original.columns() diff --git a/test/unit/keysets/test_subtract.py b/test/unit/keysets/test_subtract.py index dfc68239..a449e7fc 100644 --- a/test/unit/keysets/test_subtract.py +++ b/test/unit/keysets/test_subtract.py @@ -151,7 +151,6 @@ def test_valid( assert ks.schema() == expected_schema -# pylint: disable=protected-access @parametrize( Case("single_column")( left=KeySet._detect(["A"]), @@ -177,7 +176,6 @@ def test_valid( expected_columns=["A", "B"], ), ) -# pylint: enable=protected-access def test_valid_plan( left: KeySetPlan, right: KeySet, diff --git a/test/unit/query_expr_compiler/test_measurement_visitor.py b/test/unit/query_expr_compiler/test_measurement_visitor.py index 7be38fa6..790b3351 100644 --- a/test/unit/query_expr_compiler/test_measurement_visitor.py +++ b/test/unit/query_expr_compiler/test_measurement_visitor.py @@ -354,7 +354,6 @@ def test_validate_measurement(self): mock_measurement.privacy_function.return_value = self.visitor.budget.value mid_stability = ExactNumber(2).expr # This should finish without raising an error - # pylint: disable=protected-access self.visitor._validate_measurement(mock_measurement, mid_stability) # Change it so that the privacy function returns something else @@ -364,7 +363,6 @@ def test_validate_measurement(self): match="Privacy function does not match per-query privacy budget.", ): self.visitor._validate_measurement(mock_measurement, mid_stability) - # pylint: enable=protected-access def _check_measurement(self, measurement: Measurement): """Check the basic attributes of a measurement (for all query exprs). @@ -692,10 +690,8 @@ def test_visit_groupby_quantile( noise_info: NoiseInfo, ) -> None: """Test visit_groupby_quantile.""" - # pylint: disable=protected-access self.run_with_empty_data_and_check_schema(query._query_expr, output_measure) self.check_noise_info(query._query_expr, output_measure, noise_info) - # pylint: enable=protected-access @pytest.mark.parametrize( "query,output_measure,noise_info", diff --git a/test/unit/query_expr_compiler/transformation_visitor/test_constraints.py b/test/unit/query_expr_compiler/transformation_visitor/test_constraints.py index e52a7868..c6b417d9 100644 --- a/test/unit/query_expr_compiler/transformation_visitor/test_constraints.py +++ b/test/unit/query_expr_compiler/transformation_visitor/test_constraints.py @@ -56,7 +56,7 @@ def test_max_rows_per_id(self, constraint_max: int): rows_per_id = result_df.groupby("id")["id"].count() assert all( rows_per_id <= constraint_max - ), f"MaxRowsPerID constraint violated, counts were:\n{str(rows_per_id)}" + ), f"MaxRowsPerID constraint violated, counts were:\n{rows_per_id!s}" self._test_is_subset(input_df, result_df) @@ -79,7 +79,7 @@ def test_max_groups_per_id(self, grouping_col: str, constraint_max: int): groups_per_id = result_df.groupby("id").nunique()[grouping_col] assert all( groups_per_id <= constraint_max - ), f"MaxGroupsPerID constraint violated, counts were:\n{str(groups_per_id)}" + ), f"MaxGroupsPerID constraint violated, counts were:\n{groups_per_id!s}" self._test_is_subset(input_df, result_df) diff --git a/test/unit/test_binning_spec.py b/test/unit/test_binning_spec.py index c5ad4045..dde28eb9 100644 --- a/test/unit/test_binning_spec.py +++ b/test/unit/test_binning_spec.py @@ -533,7 +533,6 @@ def test_immutable(): """Checks that each binning spec attribute is immutable.""" binspec = BinningSpec([0, 1, 2]) - # pylint: disable=protected-access with pytest.raises(FrozenInstanceError): binspec.bin_edges = [0, 1, 2, 3] # type: ignore @@ -571,7 +570,6 @@ def test_immutable(): with pytest.raises(FrozenInstanceError): binspec._both_endpoints = True # type: ignore - # pylint: enable=protected-access def test_repr(): diff --git a/test/unit/test_config.py b/test/unit/test_config.py index 35bb62b0..6a6e10af 100644 --- a/test/unit/test_config.py +++ b/test/unit/test_config.py @@ -20,16 +20,14 @@ def test_config_singleton(): # Adding feature flags for use in the tests is necessary because the collection # of feature flags existing at any given time is not stable. Unfortunately doing -# so makes mypy and pylint very unhappy, so we're ignoring errors related to the -# existence of an attribute on a class for the rest of this file. +# so makes mypy very unhappy, so we're ignoring errors related to the existence +# of an attribute on a class for the rest of this file. # mypy: disable-error-code=attr-defined -# pylint: disable=no-member @pytest.fixture def _with_example_features(): - # pylint: disable=protected-access """Add some example feature flags for testing.""" class _Features(Config.Features): @@ -112,7 +110,6 @@ def test_config_feature_flag_raise_if_disabled(): @pytest.mark.usefixtures("_with_example_features") def test_config_feature_flag_raise_if_disabled_snippet(): - # pylint: disable=protected-access """Feature flags' raise_if_disabled produces example code that enables flag.""" cfg = Config() @@ -130,6 +127,6 @@ def test_config_feature_flag_raise_if_disabled_snippet(): ), "No snippet to enable flag found in exception message" enable_snippet = error_message[enable_snippet_idx:] with patch("tmlt.analytics.config.config", cfg): - exec(enable_snippet, {}, {}) # pylint: disable=exec-used + exec(enable_snippet, {}, {}) assert ff, f"Flag {ff._name} did not get set by snippet from exception message" ff.disable() diff --git a/test/unit/test_privacy_budget.py b/test/unit/test_privacy_budget.py index 36b49641..8ecb2734 100644 --- a/test/unit/test_privacy_budget.py +++ b/test/unit/test_privacy_budget.py @@ -2,7 +2,6 @@ # SPDX-License-Identifier: Apache-2.0 # Copyright Tumult Labs 2025 -# pylint: disable=pointless-string-statement import math from typing import List, Type, Union @@ -265,17 +264,14 @@ def test_budget_hashing(budgets: List[PrivacyBudget], equal: bool): assert budget0_hash != budget1_hash -# pylint: disable=protected-access def test_PureDPBudget_immutability(): """Tests that the PureDPBudget is immutable.""" - with pytest.raises(AttributeError): PureDPBudget(1)._epsilon = 2 # type: ignore def test_ApproxDPBudget_immutability(): """Tests that the ApproxDPBudget is immutable.""" - with pytest.raises(AttributeError): ApproxDPBudget(1, 0.1)._epsilon = 2 # type: ignore with pytest.raises(AttributeError): @@ -284,14 +280,10 @@ def test_ApproxDPBudget_immutability(): def test_RhoZCDPBudget_immutability(): """Tests that the RhoZCDPBudget is immutable.""" - with pytest.raises(AttributeError): RhoZCDPBudget(1)._rho = 2 # type: ignore -# pylint: enable=protected-access - - @pytest.mark.parametrize( "budget_a, budget_b, equal", [ diff --git a/test/unit/test_privacy_budget_rounding_helper.py b/test/unit/test_privacy_budget_rounding_helper.py index cdde7227..3da5de4c 100644 --- a/test/unit/test_privacy_budget_rounding_helper.py +++ b/test/unit/test_privacy_budget_rounding_helper.py @@ -2,8 +2,6 @@ # SPDX-License-Identifier: Apache-2.0 # Copyright Tumult Labs 2025 -# pylint: disable=pointless-string-statement - from tmlt.core.utils.exact_number import ExactNumber from typeguard import typechecked @@ -31,7 +29,6 @@ def test_int_request(): we should never run into the tolerance threshold issue. This means the requested budget should be returned in all cases. """ - adjusted = _get_adjusted_budget(PURE_DP_99, PURE_DP_100) assert adjusted == PURE_DP_99 adjusted = _get_adjusted_budget(PURE_DP_101, PURE_DP_100) diff --git a/test/unit/test_query_builder.py b/test/unit/test_query_builder.py index 8abeabfc..2ee72f67 100644 --- a/test/unit/test_query_builder.py +++ b/test/unit/test_query_builder.py @@ -2,7 +2,7 @@ # SPDX-License-Identifier: Apache-2.0 # Copyright Tumult Labs 2025 -# pylint: disable=no-member, protected-access + # Disabling no-member because attributes of specific query types need to be referenced, # and the general QueryExpr type doesn't have the attribute. # Disabling protected-access to access the _query_expr attribute of Query regularly. @@ -73,7 +73,6 @@ def root_builder(): @pytest.mark.parametrize("join_columns", [(None), (["B"])]) def test_join_public(join_columns: Optional[List[str]]): """QueryBuilder.join_public works as expected with a public source ID.""" - join_table = "public" query = ( root_builder() @@ -105,7 +104,6 @@ def test_join_public(join_columns: Optional[List[str]]): @pytest.mark.parametrize("join_columns", [(None), (["B"])]) def test_join_public_dataframe(spark, join_columns: Optional[List[str]]): """QueryBuilder.join_public works as expected when used with a dataframe.""" - join_table = spark.createDataFrame(pd.DataFrame({"A": [1, 2]})) query = ( root_builder() @@ -628,7 +626,6 @@ def test_histogram(): def test_histogram_options(): """QueryBuilder.histogram works as expected, with options.""" - query = root_builder().histogram("A", [0, 5, 10], name="New") assert isinstance(query, Query) query_expr = query._query_expr @@ -1380,7 +1377,6 @@ def test_suppress_aggregates( ) def test_query_immutability(query: Query): """Tests that Query objects are immutable.""" - with pytest.raises(FrozenInstanceError): query._query_expr = QueryBuilder("testdf").count()._query_expr # type: ignore @@ -1438,9 +1434,7 @@ def test_query_immutability(query: Query): ) def test_query_fast_equality_check(query1: Query, query2: Query, equal: bool): """Tests that Query objects are equal when they should be.""" - # pylint: disable=protected-access assert query1._is_equivalent(query2) == equal - # pylint: enable=protected-access def root_grouped_builder(): diff --git a/test/unit/test_query_expr_compiler.py b/test/unit/test_query_expr_compiler.py index 839bee34..80968329 100644 --- a/test/unit/test_query_expr_compiler.py +++ b/test/unit/test_query_expr_compiler.py @@ -3,7 +3,6 @@ # SPDX-License-Identifier: Apache-2.0 # Copyright Tumult Labs 2025 -# pylint: disable=protected-access import datetime from typing import Dict, Union @@ -404,7 +403,7 @@ @pytest.fixture(name="test_data", scope="class") def setup(spark, request) -> None: - "Set up test data." + """Set up test data.""" sdf = spark.createDataFrame( pd.DataFrame( [["0", 0, 0.0], ["0", 0, 1.0], ["0", 1, 2.0], ["1", 0, 3.0]], diff --git a/test/unit/test_query_expression.py b/test/unit/test_query_expression.py index 5c68ab39..398e0ca4 100644 --- a/test/unit/test_query_expression.py +++ b/test/unit/test_query_expression.py @@ -3,7 +3,6 @@ # SPDX-License-Identifier: Apache-2.0 # Copyright Tumult Labs 2025 -# pylint: disable=too-many-arguments, pointless-string-statement import datetime import re @@ -579,7 +578,7 @@ def test_join_public_dataframe_validation_column_type(spark): ], ) def test_invalid_suppress_aggregates( - spark: SparkSession, # pylint: disable=unused-argument + spark: SparkSession, child: GroupByCount, column: str, threshold: int, @@ -604,4 +603,4 @@ def test_queryexpr_hashing(queryexpr): """Tests that each query expression has enabled hashing and eq.""" test_dict = {queryexpr: 1} assert test_dict[queryexpr] == 1 - assert queryexpr == queryexpr # pylint: disable=comparison-with-itself + assert queryexpr == queryexpr diff --git a/test/unit/test_schema.py b/test/unit/test_schema.py index 56c087d4..f52de2bf 100644 --- a/test/unit/test_schema.py +++ b/test/unit/test_schema.py @@ -68,7 +68,6 @@ def test_schema_equality() -> None: def test_schema_hash() -> None: """Makes sure that schema hash is consistent.""" - columns_1 = {"a": "VARCHAR", "b": "INTEGER"} columns_2 = {"a": "VARCHAR", "b": "INTEGER"} columns_3 = {"y": "VARCHAR", "z": "INTEGER"} @@ -87,7 +86,6 @@ def test_schema_hash() -> None: def test_frozen_dict(): """FrozenDict works like an immutable dict.""" - a = FrozenDict.from_dict({"a": 1, "b": 2}) assert a["a"] == 1 assert a["b"] == 2 @@ -151,7 +149,7 @@ def test_frozen_dict_order_comparison(): fd3 = FrozenDict.from_dict({3: 4, 1: 2}) fd4 = FrozenDict.from_dict({1: 2, 3: 5}) - assert fd1 == fd1 # pylint: disable=comparison-with-itself + assert fd1 == fd1 assert fd1 == fd2 assert fd1 != fd3 assert fd1 != fd4 diff --git a/test/unit/test_session.py b/test/unit/test_session.py index f1333583..3a1dd9c7 100644 --- a/test/unit/test_session.py +++ b/test/unit/test_session.py @@ -3,8 +3,6 @@ # SPDX-License-Identifier: Apache-2.0 # Copyright Tumult Labs 2025 -# pylint: disable=protected-access - import re from typing import Any, Dict, List, Tuple, Type, Union from unittest.mock import ANY, Mock, patch @@ -93,7 +91,6 @@ # Disable redefined-outer-name because spark is used to create dataframes as test # inputs and within tests to check outputs and run queries. -# pylint: disable=redefined-outer-name def _privacy_budget_to_exact_number( @@ -453,7 +450,6 @@ def test_from_neighboring_relation_single( """Tests that :func:`Session._from_neighboring_relation` works as expected with a single relation. """ - sess = Session._from_neighboring_relation( privacy_budget=budget, private_sources={"private": self.sdf}, @@ -494,7 +490,6 @@ def test_from_neighboring_relation_add_remove_keys( """Tests that :func:`Session._from_neighboring_relation` works as expected with a single AddRemoveKeys relation. """ - sess = Session._from_neighboring_relation( privacy_budget=budget, private_sources={"private": self.sdf}, @@ -585,7 +580,8 @@ def test_add_public_dataframe(self): @pytest.mark.parametrize("d_in", [(sp.Integer(1)), (sp.sqrt(sp.Integer(2)))]) def test_evaluate_puredp_session_approxdp_query(self, spark, d_in): """Confirm that using an approxdp query on a puredp accountant raises an - error.""" + error. + """ with patch.object( QueryExprCompiler, "__call__", autospec=True ) as mock_compiler, patch( @@ -1166,7 +1162,7 @@ def test_describe(self, spark): ], ) ) - # pylint: enable=line-too-long + session.describe() mock_print.assert_called_with(expected) @@ -1239,12 +1235,11 @@ def test_describe_with_constraints(self, spark): ) ) session.describe() - # pylint: enable=line-too-long + mock_print.assert_called_with(expected) def test_describe_with_id_column(self, spark): """Test :func:`_describe` with a table with an ID column.""" - with patch("builtins.print") as mock_print, patch( "tmlt.core.measurements.interactive_measurements.PrivacyAccountant" ) as mock_accountant: @@ -1312,7 +1307,7 @@ def test_describe_with_id_column(self, spark): ], ) ) - # pylint: enable=line-too-long + session.describe() mock_print.assert_called_with(expected) @@ -1509,7 +1504,7 @@ def test_describe_table_with_constraints( + expected_output ) session.describe("private") - # pylint: enable=line-too-long + mock_print.assert_called_with(expected) def test_supported_spark_types(self, spark): @@ -1653,7 +1648,7 @@ def _setup_accountant(self, mock_accountant) -> None: mock_accountant.d_in = {NamedTable("private"): sp.Integer(1)} def test_invalid_dataframe_initialization(self): - """session raises error on invalid dataframe type""" + """Session raises error on invalid dataframe type""" with patch( "tmlt.core.measurements.interactive_measurements.PrivacyAccountant" ) as mock_accountant: @@ -1679,7 +1674,7 @@ def test_invalid_dataframe_initialization(self): session.add_public_dataframe(source_id="public", dataframe=self.pdf) def test_invalid_data_properties(self, spark): - """session raises error on invalid data properties""" + """Session raises error on invalid data properties""" with patch( "tmlt.core.measurements.interactive_measurements.PrivacyAccountant" ) as mock_accountant: @@ -1815,7 +1810,7 @@ def test_invalid_key_column(self) -> None: def test_invalid_source_id( self, source_id: str, exception_type: Type[Exception], expected_error_msg: str ): - """session raises error on invalid source_id.""" + """Session raises error on invalid source_id.""" with patch( "tmlt.core.measurements.interactive_measurements.PrivacyAccountant" ) as mock_accountant: @@ -1874,7 +1869,7 @@ def test_invalid_public_source(self): "query_expr", [(["filter private A == 0"]), ([QueryBuilder("private")])] ) def test_invalid_queries_evaluate(self, query_expr: Any): - """evaluate raises error on invalid queries.""" + """Evaluate raises error on invalid queries.""" with patch( "tmlt.core.measurements.interactive_measurements.PrivacyAccountant" ) as mock_accountant: @@ -1904,7 +1899,7 @@ def test_invalid_queries_create( exception_type: Type[Exception], expected_error_msg: str, ): - """create functions raise error on invalid input queries.""" + """Create functions raise error on invalid input queries.""" with patch( "tmlt.core.measurements.interactive_measurements.PrivacyAccountant" ) as mock_accountant: @@ -2138,7 +2133,8 @@ def test_duplicate_source_id(self): def test_build_invalid_identifier(self): """Tests that build fails if protected change does - not have associated ID space.""" + not have associated ID space. + """ builder = ( Session.Builder() .with_private_dataframe( @@ -2244,7 +2240,7 @@ def test_build_with_id_and_only_one_df(self) -> None: @pytest.mark.parametrize( "builder,expected_sympy_budget,expected_output_measure," - + "private_dataframes,public_dataframes", + "private_dataframes,public_dataframes", [ ( Session.Builder().with_privacy_budget(PureDPBudget(10)), @@ -2448,7 +2444,6 @@ def test_automatic_partitions( protected_change: ProtectedChange, ): """Tests that partition selection is automatically called with correct queries.""" - # Turning on experimental features for this test. with config.features.auto_partition_selection.enabled(): spark = SparkSession.builder.getOrCreate() @@ -2561,7 +2556,6 @@ def test_automatic_partition_selection_invalid_budget( expected_error: str, ): """Test that Automatic Partition Selection queries with an invalid budget error.""" - with config.features.auto_partition_selection.enabled(): spark = SparkSession.builder.getOrCreate() test_df = spark.createDataFrame(input_data) @@ -2596,8 +2590,8 @@ def test_automatic_partition_selection_invalid_budget( ) def test_automatic_partition_null_keyset(query_expr: Query, expected_columns: List): """Tests that automatic partition selection with null keyset raises a warning and - completes with an output dataframe with len(0) but the correct schema.""" - + completes with an output dataframe with len(0) but the correct schema. + """ with config.features.auto_partition_selection.enabled(): spark = SparkSession.builder.getOrCreate() # An empty DF ensures that automatic partition selection returns a null keyset. diff --git a/test/unit/test_table_identifiers.py b/test/unit/test_table_identifiers.py index 72396170..84a24464 100644 --- a/test/unit/test_table_identifiers.py +++ b/test/unit/test_table_identifiers.py @@ -20,7 +20,7 @@ def test_table_equality(): assert TableCollection(name="private1") != TableCollection(name="private2") temp_table = TemporaryTable() - assert temp_table == temp_table # pylint: disable=comparison-with-itself + assert temp_table == temp_table assert temp_table != TemporaryTable() diff --git a/test/unit/test_utils.py b/test/unit/test_utils.py index be4b0f01..baa0785e 100644 --- a/test/unit/test_utils.py +++ b/test/unit/test_utils.py @@ -8,6 +8,6 @@ ### Test for tmlt.analytics.utils.check_installation() # We want the `spark` argument here so that the test will use the # (session-wide, pytest-provided) spark session. -def test_check_installation(spark) -> None: # pylint: disable=unused-argument +def test_check_installation(spark) -> None: """Test that check_installation works (doesn't raise an error).""" check_installation() diff --git a/uv.lock b/uv.lock index 2180c3e0..99176380 100644 --- a/uv.lock +++ b/uv.lock @@ -379,15 +379,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/99/c7/d1ec24fb280caa5a79b6b950db565dab30210a66259d17d5bb2b3a9f878d/dependency_groups-1.3.1-py3-none-any.whl", hash = "sha256:51aeaa0dfad72430fcfb7bcdbefbd75f3792e5919563077f30bc0d73f4493030", size = 8664, upload-time = "2025-05-02T00:34:27.085Z" }, ] -[[package]] -name = "dill" -version = "0.4.0" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/12/80/630b4b88364e9a8c8c5797f4602d0f76ef820909ee32f0bacb9f90654042/dill-0.4.0.tar.gz", hash = "sha256:0633f1d2df477324f53a895b02c901fb961bdbf65a17122586ea7019292cbcf0", size = 186976, upload-time = "2025-04-16T00:41:48.867Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/50/3d/9373ad9c56321fdab5b41197068e1d8c25883b3fea29dd361f9b55116869/dill-0.4.0-py3-none-any.whl", hash = "sha256:44f54bf6412c2c8464c14e8243eb163690a9800dbe2c367330883b19c7561049", size = 119668, upload-time = "2025-04-16T00:41:47.671Z" }, -] - [[package]] name = "distlib" version = "0.3.9" @@ -739,15 +730,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/6a/b9/59e120d24a2ec5fc2d30646adb2efb4621aab3c6d83d66fb2a7a182db032/matplotlib-3.10.3-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cb73d8aa75a237457988f9765e4dfe1c0d2453c5ca4eabc897d4309672c8e014", size = 8594298, upload-time = "2025-05-08T19:10:51.738Z" }, ] -[[package]] -name = "mccabe" -version = "0.7.0" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/e7/ff/0ffefdcac38932a54d2b5eed4e0ba8a408f215002cd178ad1df0f2806ff8/mccabe-0.7.0.tar.gz", hash = "sha256:348e0240c33b60bbdf4e523192ef919f28cb2c3d7d5c7794f74009290f236325", size = 9658, upload-time = "2022-01-24T01:14:51.113Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/27/1a/1f68f9ba0c207934b35b86a8ca3aad8395a3d6dd7921c0686e23853ff5a9/mccabe-0.7.0-py2.py3-none-any.whl", hash = "sha256:6c2d30ab6be0e4a46919781807b4f0d834ebdd6c6e3dca0bda5a15f863427b6e", size = 7350, upload-time = "2022-01-24T01:14:49.62Z" }, -] - [[package]] name = "mdurl" version = "0.1.2" @@ -1308,23 +1290,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/e2/0d/8ba33fa83a7dcde13eb3c1c2a0c1cc29950a048bfed6d9b0d8b6bd710b4c/pydata_sphinx_theme-0.16.1-py3-none-any.whl", hash = "sha256:225331e8ac4b32682c18fcac5a57a6f717c4e632cea5dd0e247b55155faeccde", size = 6723264, upload-time = "2024-12-17T10:53:35.645Z" }, ] -[[package]] -name = "pydocstyle" -version = "6.3.0" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "snowballstemmer" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/e9/5c/d5385ca59fd065e3c6a5fe19f9bc9d5ea7f2509fa8c9c22fb6b2031dd953/pydocstyle-6.3.0.tar.gz", hash = "sha256:7ce43f0c0ac87b07494eb9c0b462c0b73e6ff276807f204d6b53edc72b7e44e1", size = 36796, upload-time = "2023-01-17T20:29:19.838Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/36/ea/99ddefac41971acad68f14114f38261c1f27dac0b3ec529824ebc739bdaa/pydocstyle-6.3.0-py3-none-any.whl", hash = "sha256:118762d452a49d6b05e194ef344a55822987a462831ade91ec5c06fd2169d019", size = 38038, upload-time = "2023-01-17T20:29:18.094Z" }, -] - -[package.optional-dependencies] -toml = [ - { name = "tomli", marker = "python_full_version < '3.11'" }, -] - [[package]] name = "pygments" version = "2.19.2" @@ -1334,25 +1299,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/c7/21/705964c7812476f378728bdf590ca4b771ec72385c533964653c68e86bdc/pygments-2.19.2-py3-none-any.whl", hash = "sha256:86540386c03d588bb81d44bc3928634ff26449851e99741617ecb9037ee5ec0b", size = 1225217, upload-time = "2025-06-21T13:39:07.939Z" }, ] -[[package]] -name = "pylint" -version = "3.3.7" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "astroid" }, - { name = "colorama", marker = "sys_platform == 'win32'" }, - { name = "dill" }, - { name = "isort" }, - { name = "mccabe" }, - { name = "platformdirs" }, - { name = "tomli", marker = "python_full_version < '3.11'" }, - { name = "tomlkit" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/1c/e4/83e487d3ddd64ab27749b66137b26dc0c5b5c161be680e6beffdc99070b3/pylint-3.3.7.tar.gz", hash = "sha256:2b11de8bde49f9c5059452e0c310c079c746a0a8eeaa789e5aa966ecc23e4559", size = 1520709, upload-time = "2025-05-04T17:07:51.089Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/e8/83/bff755d09e31b5d25cc7fdc4bf3915d1a404e181f1abf0359af376845c24/pylint-3.3.7-py3-none-any.whl", hash = "sha256:43860aafefce92fca4cf6b61fe199cdc5ae54ea28f9bf4cd49de267b5195803d", size = 522565, upload-time = "2025-05-04T17:07:48.714Z" }, -] - [[package]] name = "pyparsing" version = "3.2.3" @@ -1575,6 +1521,32 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/0d/9b/63f4c7ebc259242c89b3acafdb37b41d1185c07ff0011164674e9076b491/rich-14.0.0-py3-none-any.whl", hash = "sha256:1c9491e1951aac09caffd42f448ee3d04e58923ffe14993f6e83068dc395d7e0", size = 243229, upload-time = "2025-03-30T14:15:12.283Z" }, ] +[[package]] +name = "ruff" +version = "0.14.4" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/df/55/cccfca45157a2031dcbb5a462a67f7cf27f8b37d4b3b1cd7438f0f5c1df6/ruff-0.14.4.tar.gz", hash = "sha256:f459a49fe1085a749f15414ca76f61595f1a2cc8778ed7c279b6ca2e1fd19df3", size = 5587844, upload-time = "2025-11-06T22:07:45.033Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/17/b9/67240254166ae1eaa38dec32265e9153ac53645a6c6670ed36ad00722af8/ruff-0.14.4-py3-none-linux_armv6l.whl", hash = "sha256:e6604613ffbcf2297cd5dcba0e0ac9bd0c11dc026442dfbb614504e87c349518", size = 12606781, upload-time = "2025-11-06T22:07:01.841Z" }, + { url = "https://files.pythonhosted.org/packages/46/c8/09b3ab245d8652eafe5256ab59718641429f68681ee713ff06c5c549f156/ruff-0.14.4-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:d99c0b52b6f0598acede45ee78288e5e9b4409d1ce7f661f0fa36d4cbeadf9a4", size = 12946765, upload-time = "2025-11-06T22:07:05.858Z" }, + { url = "https://files.pythonhosted.org/packages/14/bb/1564b000219144bf5eed2359edc94c3590dd49d510751dad26202c18a17d/ruff-0.14.4-py3-none-macosx_11_0_arm64.whl", hash = "sha256:9358d490ec030f1b51d048a7fd6ead418ed0826daf6149e95e30aa67c168af33", size = 11928120, upload-time = "2025-11-06T22:07:08.023Z" }, + { url = "https://files.pythonhosted.org/packages/a3/92/d5f1770e9988cc0742fefaa351e840d9aef04ec24ae1be36f333f96d5704/ruff-0.14.4-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:81b40d27924f1f02dfa827b9c0712a13c0e4b108421665322218fc38caf615c2", size = 12370877, upload-time = "2025-11-06T22:07:10.015Z" }, + { url = "https://files.pythonhosted.org/packages/e2/29/e9282efa55f1973d109faf839a63235575519c8ad278cc87a182a366810e/ruff-0.14.4-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:f5e649052a294fe00818650712083cddc6cc02744afaf37202c65df9ea52efa5", size = 12408538, upload-time = "2025-11-06T22:07:13.085Z" }, + { url = "https://files.pythonhosted.org/packages/8e/01/930ed6ecfce130144b32d77d8d69f5c610e6d23e6857927150adf5d7379a/ruff-0.14.4-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:aa082a8f878deeba955531f975881828fd6afd90dfa757c2b0808aadb437136e", size = 13141942, upload-time = "2025-11-06T22:07:15.386Z" }, + { url = "https://files.pythonhosted.org/packages/6a/46/a9c89b42b231a9f487233f17a89cbef9d5acd538d9488687a02ad288fa6b/ruff-0.14.4-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:1043c6811c2419e39011890f14d0a30470f19d47d197c4858b2787dfa698f6c8", size = 14544306, upload-time = "2025-11-06T22:07:17.631Z" }, + { url = "https://files.pythonhosted.org/packages/78/96/9c6cf86491f2a6d52758b830b89b78c2ae61e8ca66b86bf5a20af73d20e6/ruff-0.14.4-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a9f3a936ac27fb7c2a93e4f4b943a662775879ac579a433291a6f69428722649", size = 14210427, upload-time = "2025-11-06T22:07:19.832Z" }, + { url = "https://files.pythonhosted.org/packages/71/f4/0666fe7769a54f63e66404e8ff698de1dcde733e12e2fd1c9c6efb689cb5/ruff-0.14.4-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:95643ffd209ce78bc113266b88fba3d39e0461f0cbc8b55fb92505030fb4a850", size = 13658488, upload-time = "2025-11-06T22:07:22.32Z" }, + { url = "https://files.pythonhosted.org/packages/ee/79/6ad4dda2cfd55e41ac9ed6d73ef9ab9475b1eef69f3a85957210c74ba12c/ruff-0.14.4-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:456daa2fa1021bc86ca857f43fe29d5d8b3f0e55e9f90c58c317c1dcc2afc7b5", size = 13354908, upload-time = "2025-11-06T22:07:24.347Z" }, + { url = "https://files.pythonhosted.org/packages/b5/60/f0b6990f740bb15c1588601d19d21bcc1bd5de4330a07222041678a8e04f/ruff-0.14.4-py3-none-manylinux_2_31_riscv64.whl", hash = "sha256:f911bba769e4a9f51af6e70037bb72b70b45a16db5ce73e1f72aefe6f6d62132", size = 13587803, upload-time = "2025-11-06T22:07:26.327Z" }, + { url = "https://files.pythonhosted.org/packages/c9/da/eaaada586f80068728338e0ef7f29ab3e4a08a692f92eb901a4f06bbff24/ruff-0.14.4-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:76158a7369b3979fa878612c623a7e5430c18b2fd1c73b214945c2d06337db67", size = 12279654, upload-time = "2025-11-06T22:07:28.46Z" }, + { url = "https://files.pythonhosted.org/packages/66/d4/b1d0e82cf9bf8aed10a6d45be47b3f402730aa2c438164424783ac88c0ed/ruff-0.14.4-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:f3b8f3b442d2b14c246e7aeca2e75915159e06a3540e2f4bed9f50d062d24469", size = 12357520, upload-time = "2025-11-06T22:07:31.468Z" }, + { url = "https://files.pythonhosted.org/packages/04/f4/53e2b42cc82804617e5c7950b7079d79996c27e99c4652131c6a1100657f/ruff-0.14.4-py3-none-musllinux_1_2_i686.whl", hash = "sha256:c62da9a06779deecf4d17ed04939ae8b31b517643b26370c3be1d26f3ef7dbde", size = 12719431, upload-time = "2025-11-06T22:07:33.831Z" }, + { url = "https://files.pythonhosted.org/packages/a2/94/80e3d74ed9a72d64e94a7b7706b1c1ebaa315ef2076fd33581f6a1cd2f95/ruff-0.14.4-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:5a443a83a1506c684e98acb8cb55abaf3ef725078be40237463dae4463366349", size = 13464394, upload-time = "2025-11-06T22:07:35.905Z" }, + { url = "https://files.pythonhosted.org/packages/54/1a/a49f071f04c42345c793d22f6cf5e0920095e286119ee53a64a3a3004825/ruff-0.14.4-py3-none-win32.whl", hash = "sha256:643b69cb63cd996f1fc7229da726d07ac307eae442dd8974dbc7cf22c1e18fff", size = 12493429, upload-time = "2025-11-06T22:07:38.43Z" }, + { url = "https://files.pythonhosted.org/packages/bc/22/e58c43e641145a2b670328fb98bc384e20679b5774258b1e540207580266/ruff-0.14.4-py3-none-win_amd64.whl", hash = "sha256:26673da283b96fe35fa0c939bf8411abec47111644aa9f7cfbd3c573fb125d2c", size = 13635380, upload-time = "2025-11-06T22:07:40.496Z" }, + { url = "https://files.pythonhosted.org/packages/30/bd/4168a751ddbbf43e86544b4de8b5c3b7be8d7167a2a5cb977d274e04f0a1/ruff-0.14.4-py3-none-win_arm64.whl", hash = "sha256:dd09c292479596b0e6fec8cd95c65c3a6dc68e9ad17b8f2382130f87ff6a75bb", size = 12663065, upload-time = "2025-11-06T22:07:42.603Z" }, +] + [[package]] name = "scipy" version = "1.15.3" @@ -1975,11 +1947,8 @@ isort = [ mypy = [ { name = "mypy" }, ] -pydocstyle = [ - { name = "pydocstyle", extra = ["toml"] }, -] -pylint = [ - { name = "pylint" }, +ruff = [ + { name = "ruff" }, ] scripting = [ { name = "nox" }, @@ -2031,8 +2000,7 @@ docs-examples = [ ] isort = [{ name = "isort", specifier = ">=5.11,<6" }] mypy = [{ name = "mypy", specifier = ">=1.14.0" }] -pydocstyle = [{ name = "pydocstyle", extras = ["toml"], specifier = ">=6.3" }] -pylint = [{ name = "pylint", specifier = ">=3.2.5" }] +ruff = [{ name = "ruff", specifier = ">=0.14.3,<1" }] scripting = [ { name = "nox", specifier = ">=2024.3.2" }, { name = "tmlt-nox-utils", git = "https://github.com/opendp/tumult-tools.git?subdirectory=nox-utils" }, @@ -2078,8 +2046,8 @@ wheels = [ [[package]] name = "tmlt-nox-utils" -version = "0.0.0.post23+df6aa1c0" -source = { git = "https://github.com/opendp/tumult-tools.git?subdirectory=nox-utils#df6aa1c070f189fc6aad44fdde812781951300a9" } +version = "0.0.0.post30+8504968" +source = { git = "https://github.com/opendp/tumult-tools.git?subdirectory=nox-utils#0850496832ff017c00046a8bb8ae331945812879" } dependencies = [ { name = "gitpython" }, { name = "nox" }, From 7bbc80f4683970b6d6feb960a04e1aae0a685ca1 Mon Sep 17 00:00:00 2001 From: Tom Magerlein Date: Mon, 10 Nov 2025 18:34:22 -0500 Subject: [PATCH 2/5] Disable RUF010 and revert changes it introduced --- pyproject.toml | 11 +++++++++-- src/tmlt/analytics/_query_expr.py | 2 +- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index c6e488cb..ac527435 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -145,8 +145,15 @@ select = [ "ISC", "SLF" ] ignore = [ - "PLR09", # too-many-* - "PLR2004", # magic-value-comparison + # too-many-*: These rules are too context-dependent to be generally useful, + # we can evaluate this during code reviews. + "PLR09", + # magic-value-comparison: This rule flags a lot of constants that don't + # really make sense, we can make this call during code reviews. + "PLR2004", + # explicit-f-string-type-conversion: we don't generally use the !r syntax in + # f-strings, which this rule enforces. + "RUF010", # TODO: This disables every lint that is currently failing; go through and # either fix/individually disable each instance, or choose to permanently diff --git a/src/tmlt/analytics/_query_expr.py b/src/tmlt/analytics/_query_expr.py index 9d24f91f..4dcec824 100644 --- a/src/tmlt/analytics/_query_expr.py +++ b/src/tmlt/analytics/_query_expr.py @@ -1132,7 +1132,7 @@ def _validate(self, input_schema: Schema): if not (isinstance(val, int) and pytypes[col] == float): raise ValueError( f"Column '{col}' cannot have nulls replaced with " - f"{val!r}, as that value's type does not match the " + f"{repr(val)}, as that value's type does not match the " f"column type {input_schema[col].column_type.name}" ) From a2e01efe76478adac3d5f36a10d75d0d216e8033 Mon Sep 17 00:00:00 2001 From: Tom Magerlein Date: Mon, 10 Nov 2025 20:05:45 -0500 Subject: [PATCH 3/5] Remove unneeded whitespace where pylint control comments were removed --- test/system/session/ids/test_count_distinct_optimization.py | 2 -- test/system/session/ids/test_partition.py | 2 -- test/system/session/rows/test_add_max_rows.py | 5 ----- test/system/session/rows/test_add_max_rows_in_max_groups.py | 2 -- .../transformation_visitor/test_constraints.py | 4 ++-- test/unit/test_query_builder.py | 4 ---- test/unit/test_session.py | 4 ++-- 7 files changed, 4 insertions(+), 19 deletions(-) diff --git a/test/system/session/ids/test_count_distinct_optimization.py b/test/system/session/ids/test_count_distinct_optimization.py index e7df7dc4..657aade4 100644 --- a/test/system/session/ids/test_count_distinct_optimization.py +++ b/test/system/session/ids/test_count_distinct_optimization.py @@ -145,7 +145,6 @@ def test_insufficient_constraints(query: QueryBuilder, session): def test_noise_scale_puredp(query: QueryExpr, expected_noise: List[float], session): """Noise scales are adjusted correctly for different truncations with pure DP.""" noise_info = session._noise_info(query, PureDPBudget(1)) - noise = [info["noise_parameter"] for info in noise_info] assert noise == expected_noise @@ -190,6 +189,5 @@ def test_noise_scale_puredp(query: QueryExpr, expected_noise: List[float], sessi def test_noise_scale_zcdp(query: QueryExpr, expected_noise: List[float], session): """Noise scales are adjusted correctly for different truncations with zCDP.""" noise_info = session._noise_info(query, RhoZCDPBudget(1)) - noise = [info["noise_parameter"] for info in noise_info] assert noise == expected_noise diff --git a/test/system/session/ids/test_partition.py b/test/system/session/ids/test_partition.py index ee9049b2..1f29371e 100644 --- a/test/system/session/ids/test_partition.py +++ b/test/system/session/ids/test_partition.py @@ -137,7 +137,6 @@ def test_partition_and_create_with_MaxRowsPerID(session, table_stability): assert_frame_equal_with_sort( answer_session3.toPandas(), pd.DataFrame({"count": [1]}) ) - assert session2._input_metric == DictMetric( {NamedTable("part0"): SymmetricDifference()} ) @@ -192,7 +191,6 @@ def test_partition_and_create_with_MaxGroupsPerID(session, table_stability): assert_frame_equal_with_sort( answer_session3.toPandas(), pd.DataFrame({"count": [1]}) ) - assert session2._input_metric == DictMetric( {TableCollection("a"): CoreAddRemoveKeys({NamedTable("part0"): "id"})} ) diff --git a/test/system/session/rows/test_add_max_rows.py b/test/system/session/rows/test_add_max_rows.py index ceff9f25..10860ca9 100644 --- a/test/system/session/rows/test_add_max_rows.py +++ b/test/system/session/rows/test_add_max_rows.py @@ -158,7 +158,6 @@ def test_queries_privacy_budget_infinity_rhozcdp( """ if expected_expr is not None: query_expr = query_expr_or_builder._query_expr - assert query_expr == expected_expr session = Session.from_dataframe( @@ -254,9 +253,7 @@ def test_noise_info( dataframe=self.sdf, protected_change=AddOneRow(), ) - info = session._noise_info(query_expr, query_budget) - assert info == expected @pytest.mark.parametrize( @@ -1089,7 +1086,6 @@ def test_create_view_composed(self, budget: PrivacyBudget): max_rows=2, ) session.create_view(transformation_query1, "flatmap1", cache=False) - assert session._accountant.d_in[NamedTable("flatmap1")] == 2 transformation_query2 = QueryBuilder("flatmap1").flat_map( @@ -1099,7 +1095,6 @@ def test_create_view_composed(self, budget: PrivacyBudget): max_rows=3, ) session.create_view(transformation_query2, "flatmap2", cache=False) - assert session._accountant.d_in[NamedTable("flatmap2")] == 6 @pytest.mark.parametrize( diff --git a/test/system/session/rows/test_add_max_rows_in_max_groups.py b/test/system/session/rows/test_add_max_rows_in_max_groups.py index 8476b5ef..4a335b9d 100644 --- a/test/system/session/rows/test_add_max_rows_in_max_groups.py +++ b/test/system/session/rows/test_add_max_rows_in_max_groups.py @@ -162,7 +162,5 @@ def test_noise_info( "B", max_groups=1, max_rows_per_group=1 ), ) - info = session._noise_info(query_expr, query_budget) - assert info == expected diff --git a/test/unit/query_expr_compiler/transformation_visitor/test_constraints.py b/test/unit/query_expr_compiler/transformation_visitor/test_constraints.py index c6b417d9..02e8f44d 100644 --- a/test/unit/query_expr_compiler/transformation_visitor/test_constraints.py +++ b/test/unit/query_expr_compiler/transformation_visitor/test_constraints.py @@ -56,7 +56,7 @@ def test_max_rows_per_id(self, constraint_max: int): rows_per_id = result_df.groupby("id")["id"].count() assert all( rows_per_id <= constraint_max - ), f"MaxRowsPerID constraint violated, counts were:\n{rows_per_id!s}" + ), f"MaxRowsPerID constraint violated, counts were:\n{rows_per_id}" self._test_is_subset(input_df, result_df) @@ -79,7 +79,7 @@ def test_max_groups_per_id(self, grouping_col: str, constraint_max: int): groups_per_id = result_df.groupby("id").nunique()[grouping_col] assert all( groups_per_id <= constraint_max - ), f"MaxGroupsPerID constraint violated, counts were:\n{groups_per_id!s}" + ), f"MaxGroupsPerID constraint violated, counts were:\n{groups_per_id}" self._test_is_subset(input_df, result_df) diff --git a/test/unit/test_query_builder.py b/test/unit/test_query_builder.py index 2ee72f67..8dda2fa2 100644 --- a/test/unit/test_query_builder.py +++ b/test/unit/test_query_builder.py @@ -3,10 +3,6 @@ # SPDX-License-Identifier: Apache-2.0 # Copyright Tumult Labs 2025 -# Disabling no-member because attributes of specific query types need to be referenced, -# and the general QueryExpr type doesn't have the attribute. -# Disabling protected-access to access the _query_expr attribute of Query regularly. - import datetime import re from dataclasses import FrozenInstanceError diff --git a/test/unit/test_session.py b/test/unit/test_session.py index 3a1dd9c7..543faee2 100644 --- a/test/unit/test_session.py +++ b/test/unit/test_session.py @@ -1234,8 +1234,8 @@ def test_describe_with_constraints(self, spark): ], ) ) - session.describe() + session.describe() mock_print.assert_called_with(expected) def test_describe_with_id_column(self, spark): @@ -1503,8 +1503,8 @@ def test_describe_table_with_constraints( + """\n\tConstraints:\n""" + expected_output ) - session.describe("private") + session.describe("private") mock_print.assert_called_with(expected) def test_supported_spark_types(self, spark): From 5f77abc44dc0e42c1bfb405d63781d003f582417 Mon Sep 17 00:00:00 2001 From: Tom Magerlein Date: Wed, 12 Nov 2025 17:48:08 -0500 Subject: [PATCH 4/5] Update pyspark intersphinx location to match Core --- doc/conf.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/conf.py b/doc/conf.py index bde158c7..5cdee9de 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -176,7 +176,7 @@ "numpy": ("https://numpy.org/doc/1.18/", None), "pandas": ("https://pandas.pydata.org/pandas-docs/version/1.2.0/", None), "sympy": ("https://docs.sympy.org/latest/", None), - "pyspark": ("https://archive.apache.org/dist/spark/docs/3.1.1/api/python/", None), + "pyspark": ("https://downloads.apache.org/spark/docs/3.5.1/api/python/", None), } # Substitutions From 83bd45c08f1376bd50b3fbc99ddb3244b5f5e64a Mon Sep 17 00:00:00 2001 From: Tom Magerlein Date: Fri, 14 Nov 2025 18:56:01 -0500 Subject: [PATCH 5/5] Fix doctests, pull in changes from opendp/tumult-core#47 --- doc/conf.py | 2 +- doc/deployment/spark.rst | 9 +++------ src/tmlt/analytics/session.py | 5 ++++- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/doc/conf.py b/doc/conf.py index 5cdee9de..72bf3599 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -176,7 +176,7 @@ "numpy": ("https://numpy.org/doc/1.18/", None), "pandas": ("https://pandas.pydata.org/pandas-docs/version/1.2.0/", None), "sympy": ("https://docs.sympy.org/latest/", None), - "pyspark": ("https://downloads.apache.org/spark/docs/3.5.1/api/python/", None), + "pyspark": ("https://downloads.apache.org/spark/docs/3.5.7/api/python/", None), } # Substitutions diff --git a/doc/deployment/spark.rst b/doc/deployment/spark.rst index 24953460..fb3b70f9 100644 --- a/doc/deployment/spark.rst +++ b/doc/deployment/spark.rst @@ -44,12 +44,9 @@ database, you should use the following options when creating a Spark session: .enableHiveSupport() .getOrCreate() -To see where Hive's warehouse directory is, you can use the -`Hive CLI `_ -(or its replacement, -`Beehive `_) -to view the -`relevant configuration parameter `_: +To see where Hive's warehouse directory is, you can use the `Hive CLI `_ +(or its replacement, `Beeline `_) +to view the `relevant configuration parameter `_: .. code-block:: diff --git a/src/tmlt/analytics/session.py b/src/tmlt/analytics/session.py index 3f193784..05bc025f 100644 --- a/src/tmlt/analytics/session.py +++ b/src/tmlt/analytics/session.py @@ -7,7 +7,9 @@ from typing import Any, Dict, List, Optional, Tuple, Type, Union, cast from warnings import warn +import pandas as pd # needed for doctests import sympy as sp +from pyspark.sql import SparkSession # needed for doctests from pyspark.sql import DataFrame from tabulate import tabulate from tmlt.core.domains.collections import DictDomain @@ -87,9 +89,10 @@ RhoZCDPBudget, _get_adjusted_budget, ) -from tmlt.analytics.protected_change import ( +from tmlt.analytics.protected_change import ( # AddOneRow needed for doctests AddMaxRows, AddMaxRowsInMaxGroups, + AddOneRow, AddRowsWithID, ProtectedChange, )