From 607c6f0331f1811831f1ad30b5be641ec7dfc522 Mon Sep 17 00:00:00 2001 From: Tom Magerlein Date: Thu, 6 Nov 2025 20:42:23 -0500 Subject: [PATCH 1/9] Add ruff-check nox session and base ruff config --- noxfile.py | 1 + pyproject.toml | 13 +++++++++++++ uv.lock | 36 +++++++++++++++++++++++++++++++++--- 3 files changed, 47 insertions(+), 3 deletions(-) diff --git a/noxfile.py b/noxfile.py index e8a9360..fdbec99 100644 --- a/noxfile.py +++ b/noxfile.py @@ -178,6 +178,7 @@ def build(session): sm.black() sm.isort() +sm.ruff_check() sm.mypy() sm.pylint() sm.pydocstyle() diff --git a/pyproject.toml b/pyproject.toml index e0d92af..34bec24 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -68,6 +68,7 @@ default-groups = "all" [dependency-groups] build = ["cibuildwheel >=2,<3"] +ruff = ["ruff >=0.14.3,<1"] black = ["black >=23.3,<24"] isort = ["isort >=5.11,<6"] mypy = ["mypy >=1.14.0"] @@ -194,6 +195,18 @@ source = ["src/tmlt/core", ".nox/**/site-packages/tmlt/core"] ################################################################################ # Linter configuration +[tool.ruff.lint] +# Enable Ruff-specific lints plus Pylint and pydocstyle +select = ["RUF", "PL", "D"] +ignore = [ + "RUF002", # ambiguous-unicode-character-docstring -- doesn't like unicode Greek letters + "PLR09", # too-many-* + "PLR2004", # magic-value-comparison +] + +[tool.ruff.lint.pydocstyle] +convention = "google" + [tool.black] force-exclude = "noxfile.py" diff --git a/uv.lock b/uv.lock index 9de6836..cfc6f5c 100644 --- a/uv.lock +++ b/uv.lock @@ -1,5 +1,5 @@ version = 1 -revision = 2 +revision = 3 requires-python = ">=3.10, <3.13" resolution-markers = [ "python_full_version >= '3.12' and sys_platform != 'darwin'", @@ -1616,6 +1616,32 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/0d/9b/63f4c7ebc259242c89b3acafdb37b41d1185c07ff0011164674e9076b491/rich-14.0.0-py3-none-any.whl", hash = "sha256:1c9491e1951aac09caffd42f448ee3d04e58923ffe14993f6e83068dc395d7e0", size = 243229, upload-time = "2025-03-30T14:15:12.283Z" }, ] +[[package]] +name = "ruff" +version = "0.14.4" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/df/55/cccfca45157a2031dcbb5a462a67f7cf27f8b37d4b3b1cd7438f0f5c1df6/ruff-0.14.4.tar.gz", hash = "sha256:f459a49fe1085a749f15414ca76f61595f1a2cc8778ed7c279b6ca2e1fd19df3", size = 5587844, upload-time = "2025-11-06T22:07:45.033Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/17/b9/67240254166ae1eaa38dec32265e9153ac53645a6c6670ed36ad00722af8/ruff-0.14.4-py3-none-linux_armv6l.whl", hash = "sha256:e6604613ffbcf2297cd5dcba0e0ac9bd0c11dc026442dfbb614504e87c349518", size = 12606781, upload-time = "2025-11-06T22:07:01.841Z" }, + { url = "https://files.pythonhosted.org/packages/46/c8/09b3ab245d8652eafe5256ab59718641429f68681ee713ff06c5c549f156/ruff-0.14.4-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:d99c0b52b6f0598acede45ee78288e5e9b4409d1ce7f661f0fa36d4cbeadf9a4", size = 12946765, upload-time = "2025-11-06T22:07:05.858Z" }, + { url = "https://files.pythonhosted.org/packages/14/bb/1564b000219144bf5eed2359edc94c3590dd49d510751dad26202c18a17d/ruff-0.14.4-py3-none-macosx_11_0_arm64.whl", hash = "sha256:9358d490ec030f1b51d048a7fd6ead418ed0826daf6149e95e30aa67c168af33", size = 11928120, upload-time = "2025-11-06T22:07:08.023Z" }, + { url = "https://files.pythonhosted.org/packages/a3/92/d5f1770e9988cc0742fefaa351e840d9aef04ec24ae1be36f333f96d5704/ruff-0.14.4-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:81b40d27924f1f02dfa827b9c0712a13c0e4b108421665322218fc38caf615c2", size = 12370877, upload-time = "2025-11-06T22:07:10.015Z" }, + { url = "https://files.pythonhosted.org/packages/e2/29/e9282efa55f1973d109faf839a63235575519c8ad278cc87a182a366810e/ruff-0.14.4-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:f5e649052a294fe00818650712083cddc6cc02744afaf37202c65df9ea52efa5", size = 12408538, upload-time = "2025-11-06T22:07:13.085Z" }, + { url = "https://files.pythonhosted.org/packages/8e/01/930ed6ecfce130144b32d77d8d69f5c610e6d23e6857927150adf5d7379a/ruff-0.14.4-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:aa082a8f878deeba955531f975881828fd6afd90dfa757c2b0808aadb437136e", size = 13141942, upload-time = "2025-11-06T22:07:15.386Z" }, + { url = "https://files.pythonhosted.org/packages/6a/46/a9c89b42b231a9f487233f17a89cbef9d5acd538d9488687a02ad288fa6b/ruff-0.14.4-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:1043c6811c2419e39011890f14d0a30470f19d47d197c4858b2787dfa698f6c8", size = 14544306, upload-time = "2025-11-06T22:07:17.631Z" }, + { url = "https://files.pythonhosted.org/packages/78/96/9c6cf86491f2a6d52758b830b89b78c2ae61e8ca66b86bf5a20af73d20e6/ruff-0.14.4-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a9f3a936ac27fb7c2a93e4f4b943a662775879ac579a433291a6f69428722649", size = 14210427, upload-time = "2025-11-06T22:07:19.832Z" }, + { url = "https://files.pythonhosted.org/packages/71/f4/0666fe7769a54f63e66404e8ff698de1dcde733e12e2fd1c9c6efb689cb5/ruff-0.14.4-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:95643ffd209ce78bc113266b88fba3d39e0461f0cbc8b55fb92505030fb4a850", size = 13658488, upload-time = "2025-11-06T22:07:22.32Z" }, + { url = "https://files.pythonhosted.org/packages/ee/79/6ad4dda2cfd55e41ac9ed6d73ef9ab9475b1eef69f3a85957210c74ba12c/ruff-0.14.4-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:456daa2fa1021bc86ca857f43fe29d5d8b3f0e55e9f90c58c317c1dcc2afc7b5", size = 13354908, upload-time = "2025-11-06T22:07:24.347Z" }, + { url = "https://files.pythonhosted.org/packages/b5/60/f0b6990f740bb15c1588601d19d21bcc1bd5de4330a07222041678a8e04f/ruff-0.14.4-py3-none-manylinux_2_31_riscv64.whl", hash = "sha256:f911bba769e4a9f51af6e70037bb72b70b45a16db5ce73e1f72aefe6f6d62132", size = 13587803, upload-time = "2025-11-06T22:07:26.327Z" }, + { url = "https://files.pythonhosted.org/packages/c9/da/eaaada586f80068728338e0ef7f29ab3e4a08a692f92eb901a4f06bbff24/ruff-0.14.4-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:76158a7369b3979fa878612c623a7e5430c18b2fd1c73b214945c2d06337db67", size = 12279654, upload-time = "2025-11-06T22:07:28.46Z" }, + { url = "https://files.pythonhosted.org/packages/66/d4/b1d0e82cf9bf8aed10a6d45be47b3f402730aa2c438164424783ac88c0ed/ruff-0.14.4-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:f3b8f3b442d2b14c246e7aeca2e75915159e06a3540e2f4bed9f50d062d24469", size = 12357520, upload-time = "2025-11-06T22:07:31.468Z" }, + { url = "https://files.pythonhosted.org/packages/04/f4/53e2b42cc82804617e5c7950b7079d79996c27e99c4652131c6a1100657f/ruff-0.14.4-py3-none-musllinux_1_2_i686.whl", hash = "sha256:c62da9a06779deecf4d17ed04939ae8b31b517643b26370c3be1d26f3ef7dbde", size = 12719431, upload-time = "2025-11-06T22:07:33.831Z" }, + { url = "https://files.pythonhosted.org/packages/a2/94/80e3d74ed9a72d64e94a7b7706b1c1ebaa315ef2076fd33581f6a1cd2f95/ruff-0.14.4-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:5a443a83a1506c684e98acb8cb55abaf3ef725078be40237463dae4463366349", size = 13464394, upload-time = "2025-11-06T22:07:35.905Z" }, + { url = "https://files.pythonhosted.org/packages/54/1a/a49f071f04c42345c793d22f6cf5e0920095e286119ee53a64a3a3004825/ruff-0.14.4-py3-none-win32.whl", hash = "sha256:643b69cb63cd996f1fc7229da726d07ac307eae442dd8974dbc7cf22c1e18fff", size = 12493429, upload-time = "2025-11-06T22:07:38.43Z" }, + { url = "https://files.pythonhosted.org/packages/bc/22/e58c43e641145a2b670328fb98bc384e20679b5774258b1e540207580266/ruff-0.14.4-py3-none-win_amd64.whl", hash = "sha256:26673da283b96fe35fa0c939bf8411abec47111644aa9f7cfbd3c573fb125d2c", size = 13635380, upload-time = "2025-11-06T22:07:40.496Z" }, + { url = "https://files.pythonhosted.org/packages/30/bd/4168a751ddbbf43e86544b4de8b5c3b7be8d7167a2a5cb977d274e04f0a1/ruff-0.14.4-py3-none-win_arm64.whl", hash = "sha256:dd09c292479596b0e6fec8cd95c65c3a6dc68e9ad17b8f2382130f87ff6a75bb", size = 12663065, upload-time = "2025-11-06T22:07:42.603Z" }, +] + [[package]] name = "scipy" version = "1.15.3" @@ -1987,6 +2013,9 @@ pylint = [ { name = "pylint" }, { name = "pytest" }, ] +ruff = [ + { name = "ruff" }, +] scripting = [ { name = "nox" }, { name = "tmlt-nox-utils" }, @@ -2054,6 +2083,7 @@ pylint = [ { name = "pylint", specifier = ">=3.2.5" }, { name = "pytest" }, ] +ruff = [{ name = "ruff", specifier = ">=0.14.3,<1" }] scripting = [ { name = "nox", specifier = ">=2024.3.2" }, { name = "tmlt-nox-utils", git = "https://github.com/opendp/tumult-tools.git?subdirectory=nox-utils" }, @@ -2067,8 +2097,8 @@ test = [ [[package]] name = "tmlt-nox-utils" -version = "0.0.0.post25+eabe1054" -source = { git = "https://github.com/opendp/tumult-tools.git?subdirectory=nox-utils#eabe1054863f0916a0087ad180fd83719049c094" } +version = "0.0.0.post30+8504968" +source = { git = "https://github.com/opendp/tumult-tools.git?subdirectory=nox-utils#0850496832ff017c00046a8bb8ae331945812879" } dependencies = [ { name = "gitpython" }, { name = "nox" }, From f96a903b96ba844c03588abf561a871fce346b28 Mon Sep 17 00:00:00 2001 From: Tom Magerlein Date: Thu, 6 Nov 2025 20:51:47 -0500 Subject: [PATCH 2/9] Auto-fix ruff lints and disable remaining ones for now --- pyproject.toml | 22 ++++++++++++ src/tmlt/core/measures.py | 2 +- src/tmlt/core/metrics.py | 13 +++---- src/tmlt/core/transformations/base.py | 8 ++--- src/tmlt/core/transformations/dictionary.py | 2 +- .../spark_transformations/add_remove_keys.py | 6 ++-- .../spark_transformations/map.py | 5 ++- .../spark_transformations/nan.py | 36 ++++++++++++------- .../spark_transformations/partition.py | 3 +- src/tmlt/core/utils/validation.py | 10 +++--- test/unit/domains/test_spark_domains.py | 3 +- test/unit/measurements/test_aggregations.py | 1 - .../test_interactive_measurements.py | 5 ++- .../measurements/test_spark_measurements.py | 2 -- test/unit/test_measures.py | 1 - test/unit/test_metrics.py | 4 +-- .../map/test_flat_map.py | 1 - .../map/test_flat_map_by_key.py | 1 - .../map/test_grouping_flat_map.py | 1 - .../spark_transformations/map/test_map.py | 1 - .../spark_transformations/test_partition.py | 7 ++-- test/unit/utils/test_grouped_dataframe.py | 1 - test/unit/utils/test_truncation.py | 1 - 23 files changed, 73 insertions(+), 63 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 34bec24..6fe1669 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -202,6 +202,28 @@ ignore = [ "RUF002", # ambiguous-unicode-character-docstring -- doesn't like unicode Greek letters "PLR09", # too-many-* "PLR2004", # magic-value-comparison + + # TODO: This disables every lint that is currently failing; go through and + # either fix/individually disable each instance, or choose to permanently + # ignore each one. + "PLW1641", # eq-without-hash + "PLC0206", # dict-index-missing-items + "RUF012", # mutable-class-default + "PLC0415", # import-outside-top-level + "PLW0177", # nan-comparison + "RUF005", # collection-literal-concatenation + "PLR1730", # if-stmt-min-max + "PLW0127", # selt-assigning-variable + "PLW0128", # redeclared-assigned-name + "RUF015", # unnecessary-iterable-allocation-for-first-element + "D417", # undocumented-param + "RUF007", # zip-instead-of-pairwise + "D415", # missing-terminal-punctuation + "RUF043", # pytest-raises-ambiguous-pattern + "D205", # missing-blank-line-after-summary + "D210", # surrounding-whitespace + "D102", # undocumented-public-method + "PLW2901", # redefined-loop-name ] [tool.ruff.lint.pydocstyle] diff --git a/src/tmlt/core/measures.py b/src/tmlt/core/measures.py index 21f8bba..c484420 100644 --- a/src/tmlt/core/measures.py +++ b/src/tmlt/core/measures.py @@ -184,7 +184,7 @@ def compare( epsilon2 = ExactNumber(value2[0]) delta2 = ExactNumber(value2[1]) value2_is_infinite = not epsilon2.is_finite or delta2 == 1 - return value2_is_infinite or epsilon1 <= epsilon2 and delta1 <= delta2 + return value2_is_infinite or (epsilon1 <= epsilon2 and delta1 <= delta2) class RhoZCDP(Measure): diff --git a/src/tmlt/core/metrics.py b/src/tmlt/core/metrics.py index 9001c9d..5a4a108 100644 --- a/src/tmlt/core/metrics.py +++ b/src/tmlt/core/metrics.py @@ -61,7 +61,7 @@ def _validate_distance_arguments( """Raise an exception if the arguments to a distance method aren't valid.""" if not self.supports_domain(domain): raise UnsupportedCombinationError( - (self, domain), f"{repr(self)} does not support domain {repr(domain)}." + (self, domain), f"{self!r} does not support domain {domain!r}." ) try: domain.validate(value1) @@ -868,7 +868,7 @@ def distance(self, value1: Any, value2: Any, domain: Domain) -> ExactNumber: def __repr__(self) -> str: """Returns string representation.""" return ( - f"{self.__class__.__name__}(column={repr(self.column)}," + f"{self.__class__.__name__}(column={self.column!r}," f" metric={self.metric})" ) @@ -1426,9 +1426,8 @@ def supports_domain(self, domain: Domain) -> bool: return False if column_descriptor is None: column_descriptor = element_domain.schema[id_column] - else: - if element_domain.schema[id_column] != column_descriptor: - return False + elif element_domain.schema[id_column] != column_descriptor: + return False return True return False @@ -1488,6 +1487,4 @@ def distance(self, value1: Any, value2: Any, domain: Domain) -> ExactNumber: def __repr__(self) -> str: """Returns string representation.""" - return ( - f"{self.__class__.__name__}(df_to_key_column={repr(self.df_to_key_column)})" - ) + return f"{self.__class__.__name__}(df_to_key_column={self.df_to_key_column!r})" diff --git a/src/tmlt/core/transformations/base.py b/src/tmlt/core/transformations/base.py index d4ea0a5..07af294 100644 --- a/src/tmlt/core/transformations/base.py +++ b/src/tmlt/core/transformations/base.py @@ -102,15 +102,11 @@ def stability_relation(self, d_in: Any, d_out: Any) -> bool: return self.output_metric.compare(min_d_out, d_out) @overload - def __or__( - self, other: "Transformation" - ) -> "Transformation": # noqa: D105 https://github.com/PyCQA/pydocstyle/issues/525 + def __or__(self, other: "Transformation") -> "Transformation": ... @overload - def __or__( - self, other: Measurement - ) -> Measurement: # noqa: D105 https://github.com/PyCQA/pydocstyle/issues/525 + def __or__(self, other: Measurement) -> Measurement: ... def __or__(self, other: Any) -> Union[Measurement, Transformation]: diff --git a/src/tmlt/core/transformations/dictionary.py b/src/tmlt/core/transformations/dictionary.py index 94e817a..2bbb854 100644 --- a/src/tmlt/core/transformations/dictionary.py +++ b/src/tmlt/core/transformations/dictionary.py @@ -314,7 +314,7 @@ def __init__( """ if key not in input_domain.key_to_domain: raise DomainKeyError( - input_domain, key, f"{repr(key)} is not one of the input domain's keys" + input_domain, key, f"{key!r} is not one of the input domain's keys" ) # Below is the check in base class, but needs to happen before so # output_metric = input_metric[key] won't get a KeyError diff --git a/src/tmlt/core/transformations/spark_transformations/add_remove_keys.py b/src/tmlt/core/transformations/spark_transformations/add_remove_keys.py index 1e27fc0..f75b0af 100644 --- a/src/tmlt/core/transformations/spark_transformations/add_remove_keys.py +++ b/src/tmlt/core/transformations/spark_transformations/add_remove_keys.py @@ -197,15 +197,15 @@ def __init__( ) if key not in input_domain.key_to_domain: raise DomainKeyError( - input_domain, key, f"{repr(key)} is not one of the input domain's keys" + input_domain, key, f"{key!r} is not one of the input domain's keys" ) if new_key in input_domain.key_to_domain: - raise ValueError(f"{repr(new_key)} is already a key in the input domain") + raise ValueError(f"{new_key!r} is already a key in the input domain") if transformation.input_domain != input_domain.key_to_domain[key]: raise DomainMismatchError( (transformation.input_domain, input_domain), ( - f"Input domain's value for {repr(key)} does not match" + f"Input domain's value for {key!r} does not match" " transformation's input domain" ), ) diff --git a/src/tmlt/core/transformations/spark_transformations/map.py b/src/tmlt/core/transformations/spark_transformations/map.py index 4700982..6685b35 100644 --- a/src/tmlt/core/transformations/spark_transformations/map.py +++ b/src/tmlt/core/transformations/spark_transformations/map.py @@ -788,9 +788,8 @@ def stability_function(self, d_in: ExactNumberInput) -> ExactNumber: self.input_metric.inner_metric, SymmetricDifference ): return ExactNumber(d_in) - else: - if self.max_num_rows is None: - return ExactNumber(float("inf")) + elif self.max_num_rows is None: + return ExactNumber(float("inf")) # help mypy assert self.max_num_rows is not None return ExactNumber(d_in) * self.max_num_rows diff --git a/src/tmlt/core/transformations/spark_transformations/nan.py b/src/tmlt/core/transformations/spark_transformations/nan.py index 7ae2b66..731732b 100644 --- a/src/tmlt/core/transformations/spark_transformations/nan.py +++ b/src/tmlt/core/transformations/spark_transformations/nan.py @@ -124,8 +124,10 @@ def __init__( columns: Columns to drop +inf and -inf from. """ if isinstance(metric, IfGroupedBy) and not ( - isinstance(metric.inner_metric, (SumOf, RootSumOfSquared)) - and isinstance(metric.inner_metric.inner_metric, SymmetricDifference) + ( + isinstance(metric.inner_metric, (SumOf, RootSumOfSquared)) + and isinstance(metric.inner_metric.inner_metric, SymmetricDifference) + ) or isinstance(metric.inner_metric, SymmetricDifference) ): raise UnsupportedMetricError( @@ -300,8 +302,10 @@ def __init__( columns: Columns to drop NaNs from. """ if isinstance(metric, IfGroupedBy) and not ( - isinstance(metric.inner_metric, (SumOf, RootSumOfSquared)) - and isinstance(metric.inner_metric.inner_metric, SymmetricDifference) + ( + isinstance(metric.inner_metric, (SumOf, RootSumOfSquared)) + and isinstance(metric.inner_metric.inner_metric, SymmetricDifference) + ) or isinstance(metric.inner_metric, SymmetricDifference) ): raise UnsupportedMetricError( @@ -476,8 +480,10 @@ def __init__( columns: Columns to drop nulls from. """ if isinstance(metric, IfGroupedBy) and not ( - isinstance(metric.inner_metric, (SumOf, RootSumOfSquared)) - and isinstance(metric.inner_metric.inner_metric, SymmetricDifference) + ( + isinstance(metric.inner_metric, (SumOf, RootSumOfSquared)) + and isinstance(metric.inner_metric.inner_metric, SymmetricDifference) + ) or isinstance(metric.inner_metric, SymmetricDifference) ): raise UnsupportedMetricError( @@ -649,8 +655,10 @@ def __init__( in that column. """ if isinstance(metric, IfGroupedBy) and not ( - isinstance(metric.inner_metric, (SumOf, RootSumOfSquared)) - and isinstance(metric.inner_metric.inner_metric, SymmetricDifference) + ( + isinstance(metric.inner_metric, (SumOf, RootSumOfSquared)) + and isinstance(metric.inner_metric.inner_metric, SymmetricDifference) + ) or isinstance(metric.inner_metric, SymmetricDifference) ): raise UnsupportedMetricError( @@ -839,8 +847,10 @@ def __init__( replacing NaNs in that column. """ if isinstance(metric, IfGroupedBy) and not ( - isinstance(metric.inner_metric, (SumOf, RootSumOfSquared)) - and isinstance(metric.inner_metric.inner_metric, SymmetricDifference) + ( + isinstance(metric.inner_metric, (SumOf, RootSumOfSquared)) + and isinstance(metric.inner_metric.inner_metric, SymmetricDifference) + ) or isinstance(metric.inner_metric, SymmetricDifference) ): raise UnsupportedMetricError( @@ -1019,8 +1029,10 @@ def __init__( replacing nulls in that column. """ if isinstance(metric, IfGroupedBy) and not ( - isinstance(metric.inner_metric, (SumOf, RootSumOfSquared)) - and isinstance(metric.inner_metric.inner_metric, SymmetricDifference) + ( + isinstance(metric.inner_metric, (SumOf, RootSumOfSquared)) + and isinstance(metric.inner_metric.inner_metric, SymmetricDifference) + ) or isinstance(metric.inner_metric, SymmetricDifference) ): raise UnsupportedMetricError( diff --git a/src/tmlt/core/transformations/spark_transformations/partition.py b/src/tmlt/core/transformations/spark_transformations/partition.py index c8806b3..7824932 100644 --- a/src/tmlt/core/transformations/spark_transformations/partition.py +++ b/src/tmlt/core/transformations/spark_transformations/partition.py @@ -216,8 +216,7 @@ def __init__( if isinstance(input_metric, IfGroupedBy): if not ( (isinstance(input_metric.inner_metric, RootSumOfSquared) and use_l2) - or isinstance(input_metric.inner_metric, SumOf) - and not use_l2 + or (isinstance(input_metric.inner_metric, SumOf) and not use_l2) ): raise UnsupportedMetricError( input_metric, "IfGroupedBy inner metric must match use_l2" diff --git a/src/tmlt/core/utils/validation.py b/src/tmlt/core/utils/validation.py index ba93483..7cbd50d 100644 --- a/src/tmlt/core/utils/validation.py +++ b/src/tmlt/core/utils/validation.py @@ -165,14 +165,12 @@ def validate_exact_number( if minimum_is_inclusive: if exact_value < exact_minimum: raise ValueError(f"{value} is not greater than or equal to {minimum}") - else: - if exact_value <= exact_minimum: - raise ValueError(f"{value} is not strictly greater than {minimum}") + elif exact_value <= exact_minimum: + raise ValueError(f"{value} is not strictly greater than {minimum}") if maximum is not None: exact_maximum = ExactNumber(maximum) if maximum_is_inclusive: if exact_value > exact_maximum: raise ValueError(f"{value} is not less than or equal to {maximum}") - else: - if exact_value >= exact_maximum: - raise ValueError(f"{value} is not strictly less than {maximum}") + elif exact_value >= exact_maximum: + raise ValueError(f"{value} is not strictly less than {maximum}") diff --git a/test/unit/domains/test_spark_domains.py b/test/unit/domains/test_spark_domains.py index a5df249..5f30ee5 100644 --- a/test/unit/domains/test_spark_domains.py +++ b/test/unit/domains/test_spark_domains.py @@ -1228,7 +1228,8 @@ class TestSparkColumnDescriptors: r"""Tests for subclasses of class SparkColumnDescriptor. See subclasses of - :class:`~tmlt.core.domains.spark_domains.SparkColumnDescriptor`\ s.""" + :class:`~tmlt.core.domains.spark_domains.SparkColumnDescriptor`\ s. + """ spark: SparkSession diff --git a/test/unit/measurements/test_aggregations.py b/test/unit/measurements/test_aggregations.py index f563194..94202d5 100644 --- a/test/unit/measurements/test_aggregations.py +++ b/test/unit/measurements/test_aggregations.py @@ -925,7 +925,6 @@ def test_create_count_measurement_without_groupby( noise_mechanism: NoiseMechanism, ): """Tests that create_count_measurement works correctly without groupby.""" - if ( isinstance(input_metric, IfGroupedBy) and input_metric.column not in self.groupby_columns # type: ignore diff --git a/test/unit/measurements/test_interactive_measurements.py b/test/unit/measurements/test_interactive_measurements.py index f23d4cd..84cb665 100644 --- a/test/unit/measurements/test_interactive_measurements.py +++ b/test/unit/measurements/test_interactive_measurements.py @@ -1070,7 +1070,6 @@ def test_init_invalid_arguments( @patch.object(PrivacyAccountant, "__init__", autospec=True, return_value=None) def test_launch(self, mock_accountant_init): """PrivacyAccountant.launch works as expected.""" - mock_queryable = Mock(spec=SequentialQueryable) mock_sequential_composition = Mock( spec=SequentialComposition, return_value=mock_queryable @@ -2074,8 +2073,8 @@ def test_insufficient_budget(self): requested_budget = self.budget_type(self.budget_quarters[3]) error_message = re.escape( ( - f"The remaining privacy budget is {str(remaining_budget)}, which " - f"is insufficient given the requested budget {str(requested_budget)}." + f"The remaining privacy budget is {remaining_budget!s}, which " + f"is insufficient given the requested budget {requested_budget!s}." ) ) with self.assertRaisesRegex(ValueError, error_message): diff --git a/test/unit/measurements/test_spark_measurements.py b/test/unit/measurements/test_spark_measurements.py index 1d83371..322aed2 100644 --- a/test/unit/measurements/test_spark_measurements.py +++ b/test/unit/measurements/test_spark_measurements.py @@ -187,7 +187,6 @@ def test_correctness_test_measure( def test_privacy_function_and_relation(self): """Test that the privacy function and relation are computed correctly.""" - quantile_measurement = NoisyQuantile( PandasSeriesDomain(NumpyIntegerDomain()), output_measure=PureDP(), @@ -664,7 +663,6 @@ def test_correctness( self, input_df, threshold_fraction, grouping_columns, expected ): """Tests that SparseVectorPrefixSums works correctly for various inputs.""" - domain = SparkDataFrameDomain( { "grouping1": SparkStringColumnDescriptor(allow_null=True), diff --git a/test/unit/test_measures.py b/test/unit/test_measures.py index e777fa4..79ab16c 100644 --- a/test/unit/test_measures.py +++ b/test/unit/test_measures.py @@ -457,7 +457,6 @@ def test_InsufficientBudgetError( high_budget: Union[ExactNumber, Tuple[ExactNumber, ExactNumber]], ): """Tests that the error message related to InsufficientBudgetError is useful.""" - low = budget_type(low_budget) # type: ignore high = budget_type(high_budget) # type: ignore diff --git a/test/unit/test_metrics.py b/test/unit/test_metrics.py index 6d93179..5ec9ea4 100644 --- a/test/unit/test_metrics.py +++ b/test/unit/test_metrics.py @@ -59,12 +59,12 @@ class TestNullMetric(TestCase): """TestCase for NullMetric.""" def test_valid(self): - """validate is not implemented""" + """Validate is not implemented""" with self.assertRaises(NotImplementedError): NullMetric().validate(3) def test_compare(self): - """compare is not implemented""" + """Compare is not implemented""" with self.assertRaises(NotImplementedError): NullMetric().compare(3, 2) diff --git a/test/unit/transformations/spark_transformations/map/test_flat_map.py b/test/unit/transformations/spark_transformations/map/test_flat_map.py index 8b04053..07039bb 100644 --- a/test/unit/transformations/spark_transformations/map/test_flat_map.py +++ b/test/unit/transformations/spark_transformations/map/test_flat_map.py @@ -277,7 +277,6 @@ def test_transformation_correctness_keys( def test_null_nan_inf(spark): """Transformation handles null/NaN/inf inputs and outputs correctly.""" - # Do not use Pandas in this test! Anything passing through a Pandas # dataframe could silently modify the NaNs/nulls and invalidate the # test. diff --git a/test/unit/transformations/spark_transformations/map/test_flat_map_by_key.py b/test/unit/transformations/spark_transformations/map/test_flat_map_by_key.py index a6a3ae4..802ecfa 100644 --- a/test/unit/transformations/spark_transformations/map/test_flat_map_by_key.py +++ b/test/unit/transformations/spark_transformations/map/test_flat_map_by_key.py @@ -236,7 +236,6 @@ def test_transformation_correctness( def test_null_nan_inf(spark): """Transformation handles null/NaN/inf inputs and outputs correctly.""" - # Do not use Pandas in this test! Anything passing through a Pandas # dataframe could silently modify the NaNs/nulls and invalidate the # test. diff --git a/test/unit/transformations/spark_transformations/map/test_grouping_flat_map.py b/test/unit/transformations/spark_transformations/map/test_grouping_flat_map.py index 4406672..b22cad4 100644 --- a/test/unit/transformations/spark_transformations/map/test_grouping_flat_map.py +++ b/test/unit/transformations/spark_transformations/map/test_grouping_flat_map.py @@ -242,7 +242,6 @@ def test_stability( def test_null_nan_inf(spark): """Transformation handles null/NaN/inf inputs and outputs correctly.""" - # Do not use Pandas in this test! Anything passing through a Pandas # dataframe could silently modify the NaNs/nulls and invalidate the # test. diff --git a/test/unit/transformations/spark_transformations/map/test_map.py b/test/unit/transformations/spark_transformations/map/test_map.py index e22c21d..4c34bb2 100644 --- a/test/unit/transformations/spark_transformations/map/test_map.py +++ b/test/unit/transformations/spark_transformations/map/test_map.py @@ -163,7 +163,6 @@ def test_transformation_correctness( def test_null_nan_inf(spark): """Transformation handles null/NaN/inf inputs and outputs correctly.""" - # Do not use Pandas in this test! Anything passing through a Pandas # dataframe could silently modify the NaNs/nulls and invalidate the # test. diff --git a/test/unit/transformations/spark_transformations/test_partition.py b/test/unit/transformations/spark_transformations/test_partition.py index 2f61a8b..030c6bb 100644 --- a/test/unit/transformations/spark_transformations/test_partition.py +++ b/test/unit/transformations/spark_transformations/test_partition.py @@ -197,11 +197,8 @@ def test_partition_by_special_value_keys(self): for key, partition in zip(key_values, partitions): actual_rows = partition.collect() self.assertEqual(len(actual_rows), 1) - assert ( - actual_rows[0].A == key - or key is not None - and math.isnan(actual_rows[0].A) - and math.isnan(key) + assert actual_rows[0].A == key or ( + key is not None and math.isnan(actual_rows[0].A) and math.isnan(key) ) assert actual_rows[0].B == 1 diff --git a/test/unit/utils/test_grouped_dataframe.py b/test/unit/utils/test_grouped_dataframe.py index 5fe3d59..8c61f41 100644 --- a/test/unit/utils/test_grouped_dataframe.py +++ b/test/unit/utils/test_grouped_dataframe.py @@ -329,7 +329,6 @@ def test_agg_with_special_chars(self): def test_agg_with_rows_no_cols(self): """Tests that a groupby errors when there are keys but no columns.""" - with pytest.raises(ValueError) as exp_info: GroupedDataFrame( dataframe=self.spark.createDataFrame( diff --git a/test/unit/utils/test_truncation.py b/test/unit/utils/test_truncation.py index 9f65488..8307f01 100644 --- a/test/unit/utils/test_truncation.py +++ b/test/unit/utils/test_truncation.py @@ -145,7 +145,6 @@ def test_hash_collisions(self): :func:`~.limit_keys_per_group`. See https://gitlab.com/tumult-labs/tumult/-/issues/2455 for more details. """ - df = self.spark.createDataFrame( pd.DataFrame({"A": [1, 1, 1, 1, 2, 2, 2, 2], "B": [1, 1, 2, 2, 1, 2, 3, 4]}) ) From e473d4f2d5f477ff1bd2999c34b21edb29cd4b70 Mon Sep 17 00:00:00 2001 From: Tom Magerlein Date: Thu, 6 Nov 2025 20:56:08 -0500 Subject: [PATCH 3/9] Disable pylint, pydocstyle, and remove lint directives for them --- benchmark/benchmarking_utils.py | 2 - benchmark/count_sum.py | 12 ++-- benchmark/noise_mechanism.py | 6 +- benchmark/private_join.py | 2 +- benchmark/public_join.py | 12 ++-- benchmark/quantile.py | 4 +- benchmark/sparkflatmap.py | 8 +-- benchmark/sparkmap.py | 8 +-- doc/conf.py | 2 - noxfile.py | 2 - pyproject.toml | 63 ----------------- src/tmlt/core/domains/pandas_domains.py | 2 +- src/tmlt/core/domains/spark_domains.py | 12 +--- .../measurements/interactive_measurements.py | 11 +-- .../pandas_measurements/series.py | 9 +-- .../core/measurements/spark_measurements.py | 15 ++-- src/tmlt/core/metrics.py | 4 +- src/tmlt/core/random/discrete_gaussian.py | 2 +- src/tmlt/core/random/rng.py | 4 +- src/tmlt/core/transformations/base.py | 3 +- .../spark_transformations/agg.py | 19 ++---- .../spark_transformations/filter.py | 7 +- .../spark_transformations/groupby.py | 4 +- .../spark_transformations/id.py | 8 +-- .../spark_transformations/join.py | 18 ++--- .../spark_transformations/map.py | 17 +++-- .../spark_transformations/nan.py | 30 ++------ .../spark_transformations/partition.py | 9 +-- .../spark_transformations/persist.py | 3 +- .../spark_transformations/rename.py | 9 +-- .../spark_transformations/select.py | 7 +- .../spark_transformations/truncation.py | 12 +--- src/tmlt/core/utils/arb.py | 14 ++-- src/tmlt/core/utils/exact_number.py | 2 +- src/tmlt/core/utils/join.py | 4 +- src/tmlt/core/utils/prdp.py | 4 +- src/tmlt/core/utils/testing.py | 1 - src/tmlt/core/utils/type_utils.py | 2 - .../test_interactive_measurements.py | 6 +- test/unit/budget_abstract.py | 2 - test/unit/domains/abstract.py | 2 - test/unit/domains/test_numpy_domains.py | 2 - test/unit/domains/test_pandas_domains.py | 4 -- test/unit/domains/test_spark_domains.py | 8 +-- test/unit/measurements/abstract.py | 2 - test/unit/measurements/test_aggregations.py | 8 +-- .../test_interactive_measurements.py | 48 ++++--------- test/unit/measures_abstract.py | 2 - test/unit/metric_abstract.py | 2 - test/unit/random/test_rng.py | 2 - test/unit/transformations/abstract.py | 2 - .../test_add_remove_keys.py | 2 - .../spark_transformations/test_agg.py | 3 +- .../spark_transformations/test_groupby.py | 4 +- .../spark_transformations/test_join.py | 2 +- .../spark_transformations/test_persist.py | 1 - test/unit/utils/test_arb.py | 3 - test/unit/utils/test_grouped_dataframe.py | 4 +- tutorials/FirstSteps.ipynb | 2 +- uv.lock | 68 ------------------- 60 files changed, 120 insertions(+), 412 deletions(-) diff --git a/benchmark/benchmarking_utils.py b/benchmark/benchmarking_utils.py index ddc6d92..ccf75a3 100644 --- a/benchmark/benchmarking_utils.py +++ b/benchmark/benchmarking_utils.py @@ -3,8 +3,6 @@ # SPDX-License-Identifier: Apache-2.0 # Copyright Tumult Labs 2025 -# pylint: disable=attribute-defined-outside-init - import time import pandas as pd from pathlib import Path diff --git a/benchmark/count_sum.py b/benchmark/count_sum.py index 1fe632d..dd33958 100644 --- a/benchmark/count_sum.py +++ b/benchmark/count_sum.py @@ -113,7 +113,7 @@ def main(): # Single Groupby Column of varying domain sizes (1 row/group) for domain_size in [100, 400, 10000, 40000, 160000, 640000]: - df = spark.createDataFrame( # pylint: disable=no-member + df = spark.createDataFrame( spark.sparkContext.parallelize( [(i, randint(0, 1)) for i in range(domain_size)] ), @@ -140,7 +140,7 @@ def main(): # Single groupby column, group size = 1M for size in [100000, 900000, 10000000]: - df = spark.createDataFrame( # pylint: disable=no-member + df = spark.createDataFrame( spark.sparkContext.parallelize( [ (i, randint(0, 1)) @@ -171,7 +171,7 @@ def main(): # Group size = 10K for size in [10000, 100000, 1000000, 10000000]: - df = spark.createDataFrame( # pylint: disable=no-member + df = spark.createDataFrame( spark.sparkContext.parallelize( [ (i, randint(0, 1)) @@ -202,7 +202,7 @@ def main(): # Group size = 100 for size in [10000, 40000, 160000, 640000, 2560000]: - df = spark.createDataFrame( # pylint: disable=no-member + df = spark.createDataFrame( spark.sparkContext.parallelize( [(i, randint(0, 1)) for j in range(100) for i in range(int(size / 100))] ), @@ -244,7 +244,7 @@ def main(): for i in range(num_cols) ] ) - sdf = spark.createDataFrame( # pylint: disable=no-member + sdf = spark.createDataFrame( spark.sparkContext.parallelize( np.repeat( np.transpose( @@ -296,7 +296,7 @@ def main(): for i in range(num_cols) ] ) - sdf = spark.createDataFrame( # pylint: disable=no-member + sdf = spark.createDataFrame( spark.sparkContext.parallelize( np.repeat( np.transpose( diff --git a/benchmark/noise_mechanism.py b/benchmark/noise_mechanism.py index d75b4b5..2576a10 100644 --- a/benchmark/noise_mechanism.py +++ b/benchmark/noise_mechanism.py @@ -56,7 +56,7 @@ def main(): for size in [100, 400, 10000, 40000, 160000, 640000]: df = pd.DataFrame({"count": [0] * size}) - sdf = spark.createDataFrame(df) # pylint: disable=no-member + sdf = spark.createDataFrame(df) running_time = evaluate_runtime( input_domain=input_domain, measure_column="count", @@ -72,7 +72,7 @@ def main(): for size in [100, 400, 10000, 40000, 160000, 640000]: df = pd.DataFrame({"count": [0] * size}) - sdf = spark.createDataFrame(df) # pylint: disable=no-member + sdf = spark.createDataFrame(df) running_time = evaluate_runtime( input_domain=input_domain, measure_column="count", @@ -90,7 +90,7 @@ def main(): for size in [100, 400, 10000, 40000, 160000, 640000]: df = pd.DataFrame({"count": [0] * size}) - sdf = spark.createDataFrame(df) # pylint: disable=no-member + sdf = spark.createDataFrame(df) running_time = evaluate_runtime( input_domain=input_domain, measure_column="count", diff --git a/benchmark/private_join.py b/benchmark/private_join.py index ef74151..e56eec3 100644 --- a/benchmark/private_join.py +++ b/benchmark/private_join.py @@ -189,7 +189,7 @@ def generate_dataframe( for i in range(group_count) for _ in range(next(group_size_factory) + randint(-fuzz, fuzz)) ] - df = spark.createDataFrame( # pylint: disable=no-member + df = spark.createDataFrame( spark.sparkContext.parallelize(data), schema=list(dom.schema) ) return df, dom, len(data) diff --git a/benchmark/public_join.py b/benchmark/public_join.py index fc4813a..b2ed187 100644 --- a/benchmark/public_join.py +++ b/benchmark/public_join.py @@ -75,7 +75,7 @@ def __call__( domain_size = 2 rows_in_private = 100 input_domain = SparkDataFrameDomain(schema=self.schema) - private_df = self.spark.createDataFrame( # pylint: disable=no-member + private_df = self.spark.createDataFrame( pd.DataFrame( [[1.2, i] for i in range(domain_size)] * int(rows_in_private / domain_size), @@ -121,7 +121,7 @@ def __call__( columns=["B", "C"], ) for rows in rows_private: - private_df = self.spark.createDataFrame( # pylint: disable=no-member + private_df = self.spark.createDataFrame( pd.DataFrame( [[1.2, i] for i in range(domain_size)] * int(rows / domain_size), @@ -157,7 +157,7 @@ def __call__( rows_in_public = 10000 rows_in_private = 100 input_domain = SparkDataFrameDomain(schema=self.schema) - private_df = self.spark.createDataFrame( # pylint: disable=no-member + private_df = self.spark.createDataFrame( pd.DataFrame( [[10.0, i] for i in range(domain_size)] * int(rows_in_private / domain_size), @@ -205,7 +205,7 @@ def __call__( ) for cols in columns_private: schema = {f"Col_{i}": SparkFloatColumnDescriptor() for i in range(cols)} - private_df = self.spark.createDataFrame( # pylint: disable=no-member + private_df = self.spark.createDataFrame( pd.DataFrame( [tuple(range(cols))] * rows_in_private, columns=schema.keys() ) @@ -248,7 +248,7 @@ def __call__( schema = { f"Col_{i}": SparkStringColumnDescriptor() for i in range(num_cols) } - private_df = self.spark.createDataFrame( # pylint: disable=no-member + private_df = self.spark.createDataFrame( pd.DataFrame(data, columns=columns) ) private_df = private_df.withColumn("B", lit("B")) @@ -281,7 +281,7 @@ def __call__( rows, join_columns = 4000, 1 input_domain = SparkDataFrameDomain(schema=self.schema) for size in domain_sizes: - private_df = self.spark.createDataFrame( # pylint: disable=no-member + private_df = self.spark.createDataFrame( pd.DataFrame( [[10.0, i] for i in range(size)] * int(rows / size), columns=["A", "B"], diff --git a/benchmark/quantile.py b/benchmark/quantile.py index d2bc092..b62a6c2 100644 --- a/benchmark/quantile.py +++ b/benchmark/quantile.py @@ -86,7 +86,6 @@ def wrap_evaluation_multiple_group_counts( group_counts: List[int], benchmark_result: pd.DataFrame, ) -> pd.DataFrame: - # pylint: disable=unused-variable """Evaluate quantile runtime over multiple sizes = group_counts. Returns the resulting benchmarking information as a pandas dataframe. @@ -119,7 +118,7 @@ def wrap_evaluation_multiple_group_counts( _ = df.collect() # Help spark warm up. else: groupby_domains = {"A": list(range(int(size / group_size)))} - df = spark.createDataFrame( # pylint: disable=no-member + df = spark.createDataFrame( spark.sparkContext.parallelize( [ (i, randint(lower, upper)) @@ -154,7 +153,6 @@ def wrap_evaluation_multiple_group_counts( def benchmark_groupby_quantile( spark: SparkSession, quantile: float, epsilon: ExactNumberInput ) -> pd.DataFrame: - # pylint: disable=unused-variable """Evaluate quantile runtime with various params. Return the resulting pandas dataframe. diff --git a/benchmark/sparkflatmap.py b/benchmark/sparkflatmap.py index 873f114..f41fa88 100644 --- a/benchmark/sparkflatmap.py +++ b/benchmark/sparkflatmap.py @@ -84,7 +84,7 @@ def main(): # Various rows max_num_row = 10 for size in [100, 10000, 100000]: - sdf = spark.createDataFrame( # pylint: disable=no-member + sdf = spark.createDataFrame( spark.sparkContext.parallelize( [(i, choice(["X", "Y"])) for i in range(size)] ), @@ -146,7 +146,7 @@ def main(): schema = StructType( [StructField("Col_{}".format(i), IntegerType(), True) for i in range(cols)] ) - sdf = spark.createDataFrame( # pylint: disable=no-member + sdf = spark.createDataFrame( spark.sparkContext.parallelize([tuple(range(cols))] * rows), schema=schema ) augment = False @@ -201,7 +201,7 @@ def main(): schema = StructType( [StructField("Col_{}".format(i), IntegerType(), True) for i in range(cols)] ) - sdf = spark.createDataFrame( # pylint: disable=no-member + sdf = spark.createDataFrame( spark.sparkContext.parallelize([tuple(range(cols))] * rows), schema=schema ) for max_num_rows in [1, 10, 50]: @@ -269,7 +269,7 @@ def my_map(row): StructField("B", IntegerType(), True), ] ) - sdf = spark.createDataFrame( # pylint: disable=no-member + sdf = spark.createDataFrame( spark.sparkContext.parallelize([(i, randint(0, 1)) for i in range(10000)]), schema=schema, ) diff --git a/benchmark/sparkmap.py b/benchmark/sparkmap.py index 0ef2b0c..21bc127 100644 --- a/benchmark/sparkmap.py +++ b/benchmark/sparkmap.py @@ -94,7 +94,7 @@ def main(): schema = StructType( [StructField("A", IntegerType(), True), StructField("B", IntegerType(), True)] ) - sdf = spark.createDataFrame( # pylint: disable=no-member + sdf = spark.createDataFrame( spark.sparkContext.parallelize([(i, randint(0, 1)) for i in range(1250000)]), schema=schema, ) @@ -102,7 +102,7 @@ def main(): # various rows for size in [100, 400, 10000, 40000, 160000, 320000]: - sdf = spark.createDataFrame( # pylint: disable=no-member + sdf = spark.createDataFrame( spark.sparkContext.parallelize([(i, randint(0, 1)) for i in range(size)]), schema=schema, ) @@ -161,7 +161,7 @@ def main(): schema = StructType( [StructField(f"Col_{i}", IntegerType(), True) for i in range(size)] ) - sdf = spark.createDataFrame( # pylint: disable=no-member + sdf = spark.createDataFrame( spark.sparkContext.parallelize([tuple(range(size))] * 10000), schema=schema ) augment = False @@ -237,7 +237,7 @@ def my_map(row): StructField("B", IntegerType(), True), ] ) - sdf = spark.createDataFrame( # pylint: disable=no-member + sdf = spark.createDataFrame( spark.sparkContext.parallelize([(i, randint(0, 1)) for i in range(10000)]), schema=schema, ) diff --git a/doc/conf.py b/doc/conf.py index f9c801e..2826a1e 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -1,5 +1,3 @@ -# pylint: skip-file - # SPDX-License-Identifier: Apache-2.0 # Copyright Tumult Labs 2025 diff --git a/noxfile.py b/noxfile.py index fdbec99..9af95d0 100644 --- a/noxfile.py +++ b/noxfile.py @@ -180,8 +180,6 @@ def build(session): sm.isort() sm.ruff_check() sm.mypy() -sm.pylint() -sm.pydocstyle() sm.smoketest() sm.release_smoketest() diff --git a/pyproject.toml b/pyproject.toml index 6fe1669..ad42fd2 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -72,8 +72,6 @@ ruff = ["ruff >=0.14.3,<1"] black = ["black >=23.3,<24"] isort = ["isort >=5.11,<6"] mypy = ["mypy >=1.14.0"] -pylint = ["pylint >=3.2.5", "parameterized", "pytest"] -pydocstyle = ["pydocstyle[toml] >=6.3"] test = [ "parameterized >=0.7.4,<0.8", "pytest", @@ -261,64 +259,3 @@ check_untyped_defs = true module = "tmlt.core.utils.arb" disallow_untyped_defs = false check_untyped_defs = true - -[tool.pylint.master] -# See https://github.com/PyCQA/pylint/issues/1975#issuecomment-387924981 -extension-pkg-whitelist = ['numpy'] -load-plugins = ['pylint.extensions.docparams'] -# Only check param docs in docstrings that contain an Args: section. -# Set to "no" to show docstrings missing argument documentation. -accept-no-param-doc = true - -[tool.pylint.'MESSAGES CONTROL'] -enable = [ - # Note: there is a false positive on 'useless-suppression' when you - # use 'disable=line-too-long' at the end of a docstring. - # See: https://github.com/pylint-dev/pylint/issues/8301 - 'useless-suppression' -] -# By default, informational rules like useless-suppression don't cause PyLint to -# produce an error. -fail-on = ['useless-suppression'] -disable = [ - 'arguments-differ', - 'duplicate-code', - 'fixme', - 'invalid-name', - 'logging-format-interpolation', - 'logging-fstring-interpolation', - 'missing-raises-doc', - 'missing-return-doc', - 'no-else-return', - 'super-init-not-called', - 'too-few-public-methods', - 'too-many-ancestors', - 'too-many-arguments', - 'too-many-branches', - 'too-many-instance-attributes', - 'too-many-lines', - 'too-many-locals', - 'too-many-positional-arguments', - 'too-many-public-methods', - 'too-many-return-statements', - 'too-many-statements', - 'unbalanced-tuple-unpacking', - 'unnecessary-lambda-assignment', - 'unsubscriptable-object', - 'use-dict-literal', - # black and isort group tmlt.core separately from tmlt.analytics, - # but pylint thinks they should both be grouped as 'tmlt'. - 'ungrouped-imports', - 'wrong-import-order', -] - -[tool.pylint.FORMAT] -max-line-length = 88 - -[tool.pydocstyle] -convention = "google" -add-ignore = [ - # `D200: One-line docstring should fit on one line with quotes` - # conflicts with pylint's `max-line-length`. - "D200", -] diff --git a/src/tmlt/core/domains/pandas_domains.py b/src/tmlt/core/domains/pandas_domains.py index 5cce4af..a432dd0 100644 --- a/src/tmlt/core/domains/pandas_domains.py +++ b/src/tmlt/core/domains/pandas_domains.py @@ -41,7 +41,7 @@ def validate(self, value: Any) -> None: # iterating over a Series implicitly calls item() on the NumPy values # retrieving the corresponding python object super().validate(value) - for i in range(len(value)): # pylint: disable=consider-using-enumerate + for i in range(len(value)): try: self.element_domain.validate(value[i]) except OutOfDomainError as exception: diff --git a/src/tmlt/core/domains/spark_domains.py b/src/tmlt/core/domains/spark_domains.py index 47c36ca..f1bd684 100644 --- a/src/tmlt/core/domains/spark_domains.py +++ b/src/tmlt/core/domains/spark_domains.py @@ -544,9 +544,7 @@ def __repr__(self) -> str: def carrier_type(self) -> type: """Returns carrier type for the domain.""" # avoid circular import - from tmlt.core.utils.grouped_dataframe import ( # pylint: disable=import-outside-toplevel - GroupedDataFrame, - ) + from tmlt.core.utils.grouped_dataframe import GroupedDataFrame return GroupedDataFrame @@ -571,17 +569,13 @@ def spark_schema(self) -> StructType: def validate(self, value: Any) -> None: """Raises error if value is not a GroupedDataFrame with matching group_keys.""" # avoid circular import - from tmlt.core.utils.grouped_dataframe import ( # pylint: disable=import-outside-toplevel - GroupedDataFrame, - ) + from tmlt.core.utils.grouped_dataframe import GroupedDataFrame super().validate(value) assert isinstance(value, GroupedDataFrame) inner_df_domain = SparkDataFrameDomain(self.schema) try: - inner_df_domain.validate( - value._dataframe # pylint: disable=protected-access - ) + inner_df_domain.validate(value._dataframe) except OutOfDomainError as exception: raise OutOfDomainError( self, value, f"Invalid inner DataFrame: {exception}" diff --git a/src/tmlt/core/measurements/interactive_measurements.py b/src/tmlt/core/measurements/interactive_measurements.py index 4ac4abc..720ba9d 100644 --- a/src/tmlt/core/measurements/interactive_measurements.py +++ b/src/tmlt/core/measurements/interactive_measurements.py @@ -877,8 +877,6 @@ class PrivacyAccountant: :class:`~.PrivacyAccountantState` for more information. """ - # pylint: disable=protected-access - @typechecked def __init__( self, @@ -1137,7 +1135,7 @@ def transform_in_place( Raises: :exc:`InactiveAccountantError`: If this :class:`~.PrivacyAccountant` is not ACTIVE. - """ # pylint: disable=line-too-long + """ if self.state != PrivacyAccountantState.ACTIVE: raise InactiveAccountantError( f"PrivacyAccountant must be ACTIVE not {self.state}. To queue a" @@ -1269,7 +1267,7 @@ def measure( Raises: :exc:`InactiveAccountantError`: If this :class:`~.PrivacyAccountant` is not ACTIVE. - """ # pylint: disable=line-too-long + """ if self.state != PrivacyAccountantState.ACTIVE: raise InactiveAccountantError( f"PrivacyAccountant must be ACTIVE not {(self.state)}." @@ -1511,7 +1509,7 @@ def split( Raises: :exc:`InactiveAccountantError`: If this :class:`~.PrivacyAccountant` is not ACTIVE. - """ # pylint: disable=line-too-long + """ if self.state != PrivacyAccountantState.ACTIVE: raise InactiveAccountantError("PrivacyAccountant must be ACTIVE") if self._queryable is None: @@ -1700,7 +1698,6 @@ def retire(self, force: bool = False) -> None: def queue_transformation( self, transformation: Transformation, d_out: Optional[Any] = None ) -> None: - # pylint: disable=line-too-long """Queue ``transformation`` to be executed when this :class:`~.PrivacyAccountant` becomes ACTIVE. If this :class:`~.PrivacyAccountant` is ACTIVE, this has @@ -1721,8 +1718,6 @@ def queue_transformation( only used if ``transformation`` does not implement a :meth:`~.Transformation.stability_function`. """ - # pylint: enable=line-too-long - if self.state == PrivacyAccountantState.RETIRED: raise RuntimeError( "You cannot queue transformations on a " diff --git a/src/tmlt/core/measurements/pandas_measurements/series.py b/src/tmlt/core/measurements/pandas_measurements/series.py index d578bff..e8a20a4 100644 --- a/src/tmlt/core/measurements/pandas_measurements/series.py +++ b/src/tmlt/core/measurements/pandas_measurements/series.py @@ -303,9 +303,7 @@ def privacy_function(self, d_in: ExactNumberInput) -> ExactNumber: def __call__(self, values: pd.Series) -> pd.Series: """Adds noise to each number in the input Series.""" - return values.apply( - lambda x: self.noise_measurement(x) # pylint: disable=unnecessary-lambda - ) + return values.apply(lambda x: self.noise_measurement(x)) class _RankedInterval(NamedTuple): @@ -383,7 +381,7 @@ def _select_quantile_interval( :math:`log(x_j - x_i) - |rank - target| * \frac{epsilon}{2 \cdot \Delta U} + G` where :math:`G` is a sampled from the standard Gumbel distribution. - Returns the interval with the highest noisy score. - """ # pylint:disable=line-too-long + """ arb_q = Arb.from_float(float(q)) prec = 53 # target_rank = arb_q * len(values) @@ -456,13 +454,12 @@ def _select_quantile_interval( # try to get a noisy score which is above most others approx_max = Arb.from_float(float("-inf")) - # pylint: disable=consider-using-max-builtin + # Unclear if max works correctly with Arb for noisy_score in noisy_scores: if noisy_score > approx_max: # only if noisy_score.lower > approx_max.upper approx_max = noisy_score - # pylint: enable=consider-using-max-builtin # do another pass to eliminate other intervals new_gumbel_p_bits = [] diff --git a/src/tmlt/core/measurements/spark_measurements.py b/src/tmlt/core/measurements/spark_measurements.py index 42a0eff..0a3dea7 100644 --- a/src/tmlt/core/measurements/spark_measurements.py +++ b/src/tmlt/core/measurements/spark_measurements.py @@ -1,10 +1,9 @@ -# pylint: disable=line-too-long """Measurements on Spark DataFrames. See `the architecture guide `_ for more information. """ -# pylint: enable=line-too-long + # SPDX-License-Identifier: Apache-2.0 # Copyright Tumult Labs 2025 @@ -20,7 +19,7 @@ from typeguard import typechecked # cleanup is imported just so its cleanup function runs at exit -import tmlt.core.utils.cleanup # pylint: disable=unused-import +import tmlt.core.utils.cleanup from tmlt.core.domains.spark_domains import ( SparkDataFrameDomain, SparkFloatColumnDescriptor, @@ -162,7 +161,7 @@ class AddNoiseToColumn(SparkMeasurement): >>> add_laplace_noise_to_column.privacy_function(1) 2 - """ # pylint: disable=line-too-long + """ @typechecked def __init__( @@ -334,7 +333,6 @@ def input_domain(self) -> SparkGroupedDataFrameDomain: """Returns input domain.""" return cast(SparkGroupedDataFrameDomain, super().input_domain) - # pylint: disable=line-too-long @typechecked def privacy_function(self, d_in: ExactNumberInput) -> ExactNumber: """Returns the smallest d_out satisfied by the measurement. @@ -349,7 +347,6 @@ def privacy_function(self, d_in: ExactNumberInput) -> ExactNumber: NotImplementedError: If self.aggregation_function.privacy_function(d_in) raises :class:`NotImplementedError`. """ - # pylint: enable=line-too-long return self.aggregation_function.privacy_function(d_in) def call(self, val: GroupedDataFrame) -> DataFrame: @@ -464,7 +461,7 @@ class GeometricPartitionSelection(SparkMeasurement): 2 >>> delta.to_float(round_up=True) 5.664238400088129e-21 - """ # pylint: disable=line-too-long,useless-suppression + """ @typechecked def __init__( @@ -533,7 +530,6 @@ def count_column(self) -> str: """Returns the count column name.""" return self._count_column - # pylint: disable=line-too-long @typechecked def privacy_function( self, d_in: ExactNumberInput @@ -546,7 +542,6 @@ def privacy_function( Args: d_in: Distance between inputs under input_metric. """ - # pylint: enable=line-too-long self.input_metric.validate(d_in) d_in = ExactNumber(d_in) if d_in == 0: @@ -686,7 +681,7 @@ class SparseVectorPrefixSums(SparkMeasurement): 4 >>> measurement.privacy_function(2) 8 - """ # pylint: disable=line-too-long,useless-suppression + """ @typechecked def __init__( diff --git a/src/tmlt/core/metrics.py b/src/tmlt/core/metrics.py index 5a4a108..892b0ec 100644 --- a/src/tmlt/core/metrics.py +++ b/src/tmlt/core/metrics.py @@ -10,11 +10,11 @@ from functools import reduce from typing import Any, Dict, Iterable, List, Mapping, Sequence, Tuple, Union -import numpy as np # pylint: disable=unused-import +import numpy as np import pandas as pd import sympy as sp from pyspark.sql import functions as sf -from pyspark.sql.session import SparkSession # pylint: disable=unused-import +from pyspark.sql.session import SparkSession from typeguard import typechecked from tmlt.core.domains.base import Domain diff --git a/src/tmlt/core/random/discrete_gaussian.py b/src/tmlt/core/random/discrete_gaussian.py index 0aa41d9..e1adc91 100644 --- a/src/tmlt/core/random/discrete_gaussian.py +++ b/src/tmlt/core/random/discrete_gaussian.py @@ -118,7 +118,7 @@ def _sample_dlaplace(scale: Union[float, Fraction]) -> int: In particular, this returns an integer :math:`x` with .. math:: - Pr(x) = exp(-\frac{|x|}{scale}) \cdot \frac{exp(\frac{1}{scale}) - 1}{exp(\frac{1}{xcale}) +1} # pylint: disable=line-too-long + Pr(x) = exp(-\frac{|x|}{scale}) \cdot \frac{exp(\frac{1}{scale}) - 1}{exp(\frac{1}{xcale}) +1} Args: scale: Desired noise scale (>=0). diff --git a/src/tmlt/core/random/rng.py b/src/tmlt/core/random/rng.py index 1d07264..070950d 100644 --- a/src/tmlt/core/random/rng.py +++ b/src/tmlt/core/random/rng.py @@ -7,8 +7,8 @@ from typing import Any import numpy as np -from randomgen.rdrand import RDRAND # pylint: disable=no-name-in-module -from randomgen.wrapper import UserBitGenerator # pylint: disable=no-name-in-module +from randomgen.rdrand import RDRAND +from randomgen.wrapper import UserBitGenerator try: _core_privacy_prng = np.random.Generator(RDRAND()) diff --git a/src/tmlt/core/transformations/base.py b/src/tmlt/core/transformations/base.py index 07af294..cdc7c47 100644 --- a/src/tmlt/core/transformations/base.py +++ b/src/tmlt/core/transformations/base.py @@ -84,7 +84,7 @@ def stability_function(self, d_in: Any) -> Any: raise NotImplementedError( f"{self.__class__.__name__} does not have a stability function" ) - return d_in # pylint: disable=unreachable + return d_in @typechecked def stability_relation(self, d_in: Any, d_out: Any) -> bool: @@ -111,7 +111,6 @@ def __or__(self, other: Measurement) -> Measurement: def __or__(self, other: Any) -> Union[Measurement, Transformation]: """Return this transformation chained with another component.""" - # pylint: disable=import-outside-toplevel check_type(other, Union[Measurement, Transformation]) if isinstance(other, Measurement): from tmlt.core.measurements.chaining import ChainTM diff --git a/src/tmlt/core/transformations/spark_transformations/agg.py b/src/tmlt/core/transformations/spark_transformations/agg.py index 4e89026..fd4e825 100644 --- a/src/tmlt/core/transformations/spark_transformations/agg.py +++ b/src/tmlt/core/transformations/spark_transformations/agg.py @@ -1,10 +1,9 @@ -# pylint: disable=line-too-long """Transformations for grouping and aggregating Spark DataFrames. See `the architecture overview `_ for more information on transformations. """ -# pylint: enable=line-too-long + # SPDX-License-Identifier: Apache-2.0 # Copyright Tumult Labs 2025 @@ -111,7 +110,7 @@ class Count(Transformation): >>> count_dataframe.stability_function(1) 1 - """ # pylint: disable=line-too-long,useless-suppression + """ @typechecked def __init__( @@ -216,7 +215,7 @@ class CountDistinct(Transformation): >>> count_distinct_dataframe.stability_function(1) 1 - """ # pylint: disable=line-too-long,useless-suppression + """ @typechecked def __init__( @@ -346,7 +345,7 @@ class CountGrouped(Transformation): >>> count_by_A.stability_function(1) 1 - """ # pylint: disable=line-too-long,useless-suppression + """ @typechecked def __init__( @@ -425,7 +424,6 @@ def stability_function(self, d_in: ExactNumberInput) -> ExactNumber: def __call__(self, grouped_data: GroupedDataFrame) -> DataFrame: """Returns a DataFrame containing counts for each group.""" - # pylint: disable=no-member return grouped_data.agg( func=sf.count("*").alias(self.count_column), fill_value=0 ) @@ -522,7 +520,7 @@ class CountDistinctGrouped(Transformation): >>> count_distinct_by_A.stability_function(1) 1 - """ # pylint: disable=line-too-long,useless-suppression + """ @typechecked def __init__( @@ -695,7 +693,7 @@ class Sum(Transformation): :class:`~.SymmetricDifference` * :math:`h - \ell` if the input metric is :class:`~.HammingDistance` - """ # pylint: disable=line-too-long + """ @typechecked def __init__( @@ -916,7 +914,7 @@ class SumGrouped(Transformation): The sensitivity of the sum is: * :math:`\max(|h|, |\ell|)` - """ # pylint: disable=line-too-long,useless-suppression + """ @typechecked def __init__( @@ -1049,7 +1047,6 @@ def stability_function(self, d_in: ExactNumberInput) -> ExactNumber: def __call__(self, grouped_dataframe: GroupedDataFrame) -> DataFrame: """Returns DataFrame containing sum of specified column for each group.""" - # pylint: disable=no-member lower_ceil = self.lower.to_float(round_up=True) upper_floor = ( lower_ceil @@ -1153,7 +1150,6 @@ def create_count_distinct_aggregation( ... -# pylint: disable=line-too-long def create_count_distinct_aggregation( input_domain: Union[SparkDataFrameDomain, SparkGroupedDataFrameDomain], input_metric: Union[SymmetricDifference, HammingDistance, SumOf, RootSumOfSquared], @@ -1167,7 +1163,6 @@ def create_count_distinct_aggregation( count_column: If ``input_domain`` is a SparkGroupedDataFrameDomain, this is the name of the output count column. """ - # pylint: enable=line-too-long if isinstance(input_domain, SparkDataFrameDomain): assert isinstance(input_metric, (SymmetricDifference, HammingDistance)) return CountDistinct(input_domain=input_domain, input_metric=input_metric) diff --git a/src/tmlt/core/transformations/spark_transformations/filter.py b/src/tmlt/core/transformations/spark_transformations/filter.py index 44ce126..963c9a8 100644 --- a/src/tmlt/core/transformations/spark_transformations/filter.py +++ b/src/tmlt/core/transformations/spark_transformations/filter.py @@ -1,10 +1,9 @@ -# pylint: disable=line-too-long """Transformations for filtering Spark DataFrames. See `the architecture overview `_ for more information on transformations. """ -# pylint: enable=line-too-long + # SPDX-License-Identifier: Apache-2.0 # Copyright Tumult Labs 2025 @@ -88,7 +87,7 @@ class Filter(Transformation): 1 >>> filter_transformation.stability_function(123) 123 - """ # pylint: disable=line-too-long + """ @typechecked def __init__( @@ -147,7 +146,6 @@ def filter_expr(self) -> str: """Returns the filter expression.""" return self._filter_expr - # pylint: disable=line-too-long @typechecked def stability_function(self, d_in: ExactNumberInput) -> ExactNumber: """Returns the smallest d_out satisfied by the transformation. @@ -158,7 +156,6 @@ def stability_function(self, d_in: ExactNumberInput) -> ExactNumber: Args: d_in: Distance between inputs under input_metric. """ - # pylint: enable=line-too-long self.input_metric.validate(d_in) return ExactNumber(d_in) diff --git a/src/tmlt/core/transformations/spark_transformations/groupby.py b/src/tmlt/core/transformations/spark_transformations/groupby.py index f173e28..1712607 100644 --- a/src/tmlt/core/transformations/spark_transformations/groupby.py +++ b/src/tmlt/core/transformations/spark_transformations/groupby.py @@ -116,9 +116,7 @@ class GroupBy(Transformation): >>> groupby_B.stability_function(1) 1 - """ # pylint: disable=line-too-long,useless-suppression - - # pylint: enable=line-too-long,useless-suppression + """ @typechecked def __init__( diff --git a/src/tmlt/core/transformations/spark_transformations/id.py b/src/tmlt/core/transformations/spark_transformations/id.py index 3bc5b67..b4d533c 100644 --- a/src/tmlt/core/transformations/spark_transformations/id.py +++ b/src/tmlt/core/transformations/spark_transformations/id.py @@ -1,10 +1,8 @@ -# pylint: disable=line-too-long """Add a column containing a unique id for each row in a Spark DataFrame. See `the architecture overview `_ for more information on transformations. """ -# pylint: enable=line-too-long # SPDX-License-Identifier: Apache-2.0 @@ -96,9 +94,7 @@ class AddUniqueColumn(Transformation): 1 >>> add_unique_column.stability_function(2) 2 - """ # pylint: disable=line-too-long,useless-suppression - - # pylint: enable=line-too-long,useless-suppression + """ @typechecked def __init__(self, input_domain: SparkDataFrameDomain, column: str): @@ -126,7 +122,6 @@ def column(self) -> str: """Returns name of ID column to add.""" return self._column - # pylint: disable=line-too-long @typechecked def stability_function(self, d_in: ExactNumberInput) -> ExactNumber: """Returns the smallest d_out satisfied by the transformation. @@ -137,7 +132,6 @@ def stability_function(self, d_in: ExactNumberInput) -> ExactNumber: Args: d_in: Distance between inputs under input_metric. """ - # pylint: enable=line-too-long self.input_metric.validate(d_in) return ExactNumber(d_in) diff --git a/src/tmlt/core/transformations/spark_transformations/join.py b/src/tmlt/core/transformations/spark_transformations/join.py index 1a6a14a..6a7d8ad 100644 --- a/src/tmlt/core/transformations/spark_transformations/join.py +++ b/src/tmlt/core/transformations/spark_transformations/join.py @@ -1,10 +1,9 @@ -# pylint: disable=line-too-long """Transformations for joining Spark DataFrames. See `the architecture overview `_ for more information on transformations. """ -# pylint: enable=line-too-long + # SPDX-License-Identifier: Apache-2.0 # Copyright Tumult Labs 2025 @@ -244,9 +243,7 @@ class PublicJoin(Transformation): ... metric=IfGroupedBy("A", SymmetricDifference()), ... ).stability_function(2) 2 - """ # pylint: disable=line-too-long,useless-suppression - - # pylint: enable=line-too-long,useless-suppression + """ @typechecked def __init__( @@ -276,7 +273,7 @@ def __init__( public and private DataFrames will be considered to be equal. how: Type of join to perform. Defaults to "inner". Note that only "inner" and "left" joins are supported. - """ # pylint: disable=line-too-long,useless-suppression + """ if isinstance(metric, IfGroupedBy): if metric.inner_metric not in ( SymmetricDifference(), @@ -584,7 +581,7 @@ class PrivateJoin(Transformation): 8 >>> private_join.stability_function({"left": 1, "right": 1}) 8 - """ # pylint: disable=line-too-long + """ @typechecked def __init__( @@ -748,7 +745,6 @@ def truncation_strategy_stability( TruncationStrategy.NO_TRUNCATION: float("inf"), }[truncation_strategy] - # pylint: disable=line-too-long @typechecked def stability_function(self, d_in: Dict[Any, ExactNumberInput]) -> ExactNumber: """Returns the smallest d_out satisfied by the transformation. @@ -759,7 +755,6 @@ def stability_function(self, d_in: Dict[Any, ExactNumberInput]) -> ExactNumber: Args: d_in: Distance between inputs under input_metric. """ - # pylint: enable=line-too-long self.input_metric.validate(d_in) tau_l = self.left_truncation_threshold tau_r = self.right_truncation_threshold @@ -960,8 +955,7 @@ class PrivateJoinOnKey(Transformation): 1 >>> private_join.stability_function(2) 2 - """ # pylint: disable=line-too-long,useless-suppression - # pylint: enable=line-too-long,useless-suppression + """ @typechecked def __init__( @@ -1076,7 +1070,6 @@ def join_on_nulls(self) -> bool: """Returns whether to consider null equal to null.""" return self._join_on_nulls - # pylint: disable=line-too-long @typechecked def stability_function(self, d_in: ExactNumberInput) -> ExactNumber: """Returns the smallest d_out satisfied by the transformation. @@ -1087,7 +1080,6 @@ def stability_function(self, d_in: ExactNumberInput) -> ExactNumber: Args: d_in: Distance between inputs under input_metric. """ - # pylint: enable=line-too-long self.input_metric.validate(d_in) return ExactNumber(d_in) diff --git a/src/tmlt/core/transformations/spark_transformations/map.py b/src/tmlt/core/transformations/spark_transformations/map.py index 6685b35..0d54eb3 100644 --- a/src/tmlt/core/transformations/spark_transformations/map.py +++ b/src/tmlt/core/transformations/spark_transformations/map.py @@ -1,10 +1,9 @@ -# pylint: disable=line-too-long """Transformations for applying user defined maps to Spark DataFrames. See `the architecture overview `_ for more information on transformations. """ -# pylint: enable=line-too-long + # SPDX-License-Identifier: Apache-2.0 # Copyright Tumult Labs 2025 @@ -150,7 +149,7 @@ class RowToRowTransformation(Transformation): :class:`~.RowToRowsTransformation` is not stable! Its :meth:`~.stability_relation` always returns False, and its :meth:`~.stability_function` always raises :class:`NotImplementedError`. - """ # pylint: disable=line-too-long,useless-suppression + """ @typechecked def __init__( @@ -340,7 +339,7 @@ class RowToRowsTransformation(Transformation): :class:`~.RowToRowsTransformation` is not stable! Its :meth:`~.stability_relation` always returns False, and its :meth:`~.stability_function` always raises :class:`NotImplementedError`. - """ # pylint: disable=line-too-long,useless-suppression + """ @typechecked def __init__( @@ -514,7 +513,7 @@ class RowsToRowsTransformation(Transformation): :class:`~.RowsToRowsTransformation` is not stable! Its :meth:`~.stability_relation` always returns False, and its :meth:`~.stability_function` always raises :class:`NotImplementedError`. - """ # pylint: disable=line-too-long,useless-suppression + """ @typechecked def __init__( @@ -706,7 +705,7 @@ class FlatMap(Transformation): - IfGroupedBy(column, SymmetricDifference()) :class:`~.FlatMap`'s :meth:`~.stability_function` returns ``d_in``. - """ # pylint: disable=line-too-long,useless-suppression + """ @typechecked def __init__( @@ -947,7 +946,7 @@ class GroupingFlatMap(Transformation): sqrt(3) >>> add_i_flat_map.stability_function(2) 2*sqrt(3) - """ # pylint: disable=line-too-long,useless-suppression + """ @typechecked def __init__( @@ -1152,7 +1151,7 @@ class Map(Transformation): 1 >>> rename_b_to_c_map.stability_function(2) 2 - """ # pylint: disable=line-too-long,useless-suppression + """ @typechecked def __init__( @@ -1307,7 +1306,7 @@ class FlatMapByKey(Transformation): Stability Guarantee: :class:`~.FlatMapByKey`'s :meth:`~.stability_function` returns ``d_in``. - """ # pylint: disable=line-too-long,useless-suppression + """ @typechecked def __init__( diff --git a/src/tmlt/core/transformations/spark_transformations/nan.py b/src/tmlt/core/transformations/spark_transformations/nan.py index 731732b..be902a2 100644 --- a/src/tmlt/core/transformations/spark_transformations/nan.py +++ b/src/tmlt/core/transformations/spark_transformations/nan.py @@ -1,10 +1,9 @@ -# pylint: disable=line-too-long """Transformations to drop or replace NaNs, nulls, and infs in Spark DataFrames. See `the architecture overview `_ for more information on transformations. """ -# pylint: enable=line-too-long + # SPDX-License-Identifier: Apache-2.0 # Copyright Tumult Labs 2025 @@ -35,7 +34,6 @@ class DropInfs(Transformation): - # pylint: disable=line-too-long """Drops rows containing +inf or -inf in one or more specified columns. Examples: @@ -105,7 +103,6 @@ class DropInfs(Transformation): >>> drop_b_infs.stability_function(2) 2 """ - # pylint: enable=line-too-long @typechecked def __init__( @@ -182,7 +179,6 @@ def columns(self) -> List[str]: """Returns the columns to check for +inf and -inf.""" return self._columns.copy() - # pylint: disable=line-too-long @typechecked def stability_function(self, d_in: ExactNumberInput) -> ExactNumber: """Returns the smallest d_out satisfied by the transformation. @@ -193,7 +189,6 @@ def stability_function(self, d_in: ExactNumberInput) -> ExactNumber: Args: d_in: Distance between inputs under input_metric. """ - # pylint: enable=line-too-long self.input_metric.validate(d_in) return ExactNumber(d_in) @@ -282,7 +277,7 @@ class DropNaNs(Transformation): 1 >>> drop_b_nans.stability_function(2) 2 - """ # pylint: disable=line-too-long,useless-suppression + """ @typechecked def __init__( @@ -362,7 +357,6 @@ def columns(self) -> List[str]: """Returns the columns to check for NaNs.""" return self._columns.copy() - # pylint: disable=line-too-long @typechecked def stability_function(self, d_in: ExactNumberInput) -> ExactNumber: """Returns the smallest d_out satisfied by the transformation. @@ -373,13 +367,11 @@ def stability_function(self, d_in: ExactNumberInput) -> ExactNumber: Args: d_in: Distance between inputs under input_metric. """ - # pylint: enable=line-too-long self.input_metric.validate(d_in) return ExactNumber(d_in) def __call__(self, sdf: DataFrame) -> DataFrame: """Drops rows containing NaNs in ``self.columns``.""" - # pylint: disable=no-member return sdf.filter( reduce( lambda exp, column: exp & ~sf.isnan(sf.col(column)), @@ -460,7 +452,7 @@ class DropNulls(Transformation): 1 >>> drop_b_nulls.stability_function(2) 2 - """ # pylint: disable=line-too-long,useless-suppression + """ @typechecked def __init__( @@ -533,7 +525,6 @@ def columns(self) -> List[str]: """Returns the columns to check for nulls.""" return self._columns.copy() - # pylint: disable=line-too-long @typechecked def stability_function(self, d_in: ExactNumberInput) -> ExactNumber: """Returns the smallest d_out satisfied by the transformation. @@ -544,7 +535,6 @@ def stability_function(self, d_in: ExactNumberInput) -> ExactNumber: Args: d_in: Distance between inputs under input_metric. """ - # pylint: enable=line-too-long self.input_metric.validate(d_in) return ExactNumber(d_in) @@ -560,7 +550,6 @@ def __call__(self, sdf: DataFrame) -> DataFrame: class ReplaceInfs(Transformation): - # pylint: disable=line-too-long """Replaces +inf and -inf in one or more specified columns. Examples: @@ -635,7 +624,6 @@ class ReplaceInfs(Transformation): >>> replace_infs.stability_function(2) 2 """ - # pylint: enable=line-too-long @typechecked def __init__( @@ -723,7 +711,6 @@ def replace_map(self) -> Dict[str, Tuple[float, float]]: """Returns mapping used to replace infinite values.""" return self._replace_map.copy() - # pylint: disable=line-too-long @typechecked def stability_function(self, d_in: ExactNumberInput) -> ExactNumber: """Returns the smallest d_out satisfied by the transformation. @@ -734,7 +721,6 @@ def stability_function(self, d_in: ExactNumberInput) -> ExactNumber: Args: d_in: Distance between inputs under input_metric. """ - # pylint: enable=line-too-long self.input_metric.validate(d_in) return ExactNumber(d_in) @@ -829,7 +815,7 @@ class ReplaceNaNs(Transformation): 1 >>> replace_nans.stability_function(2) 2 - """ # pylint: disable=line-too-long,useless-suppression + """ @typechecked def __init__( @@ -911,7 +897,6 @@ def replace_map(self) -> Dict[str, Any]: """Returns mapping used to replace NaNs and nulls.""" return self._replace_map.copy() - # pylint: disable=line-too-long @typechecked def stability_function(self, d_in: ExactNumberInput) -> ExactNumber: """Returns the smallest d_out satisfied by the transformation. @@ -922,13 +907,11 @@ def stability_function(self, d_in: ExactNumberInput) -> ExactNumber: Args: d_in: Distance between inputs under input_metric. """ - # pylint: enable=line-too-long self.input_metric.validate(d_in) return ExactNumber(d_in) def __call__(self, sdf: DataFrame) -> DataFrame: """Returns DataFrame with NaNs replaced in specified columns.""" - # pylint: disable=no-member for column, replacement in self.replace_map.items(): sdf = sdf.withColumn( column, @@ -1011,7 +994,7 @@ class ReplaceNulls(Transformation): 1 >>> replace_nulls.stability_function(2) 2 - """ # pylint: disable=line-too-long,useless-suppression + """ @typechecked def __init__( @@ -1093,7 +1076,6 @@ def replace_map(self) -> Dict[str, Any]: """Returns mapping used to replace nulls.""" return self._replace_map.copy() - # pylint: disable=line-too-long @typechecked def stability_function(self, d_in: ExactNumberInput) -> ExactNumber: """Returns the smallest d_out satisfied by the transformation. @@ -1104,13 +1086,11 @@ def stability_function(self, d_in: ExactNumberInput) -> ExactNumber: Args: d_in: Distance between inputs under input_metric. """ - # pylint: enable=line-too-long self.input_metric.validate(d_in) return ExactNumber(d_in) def __call__(self, sdf: DataFrame) -> DataFrame: """Returns DataFrame with nulls replaced in specified columns.""" - # pylint: disable=no-member for column, replacement in self.replace_map.items(): sdf = sdf.withColumn( column, diff --git a/src/tmlt/core/transformations/spark_transformations/partition.py b/src/tmlt/core/transformations/spark_transformations/partition.py index 7824932..f573f7d 100644 --- a/src/tmlt/core/transformations/spark_transformations/partition.py +++ b/src/tmlt/core/transformations/spark_transformations/partition.py @@ -1,10 +1,9 @@ -# pylint: disable=line-too-long """Transformations for partitioning Spark DataFrames. See `the architecture overview `_ for more information on transformations. """ -# pylint: enable=line-too-long + # SPDX-License-Identifier: Apache-2.0 # Copyright Tumult Labs 2025 @@ -59,7 +58,6 @@ def num_partitions(self) -> Optional[int]: """ return self._num_partitions - # pylint: disable=line-too-long @typechecked def stability_function(self, d_in: ExactNumberInput) -> ExactNumber: """Returns the smallest d_out satisfied by the transformation. @@ -70,7 +68,6 @@ def stability_function(self, d_in: ExactNumberInput) -> ExactNumber: Args: d_in: Distance between inputs under input_metric. """ - # pylint: enable=line-too-long self.input_metric.validate(d_in) return ExactNumber(d_in) @@ -167,9 +164,7 @@ class PartitionByKeys(Partition): 1 >>> partition.stability_function(2) 2 - """ # pylint: disable=line-too-long,useless-suppression - - # pylint: enable=line-too-long, useless-suppression + """ @typechecked def __init__( diff --git a/src/tmlt/core/transformations/spark_transformations/persist.py b/src/tmlt/core/transformations/spark_transformations/persist.py index 3c80adc..c9e1edd 100644 --- a/src/tmlt/core/transformations/spark_transformations/persist.py +++ b/src/tmlt/core/transformations/spark_transformations/persist.py @@ -1,10 +1,9 @@ -# pylint: disable=line-too-long """Transformations for persisting and un-persisting Spark DataFrames. See `the architecture overview `_ for more information. """ -# pylint: enable=line-too-long + # SPDX-License-Identifier: Apache-2.0 # Copyright Tumult Labs 2025 diff --git a/src/tmlt/core/transformations/spark_transformations/rename.py b/src/tmlt/core/transformations/spark_transformations/rename.py index 7856b25..a7c1ec5 100644 --- a/src/tmlt/core/transformations/spark_transformations/rename.py +++ b/src/tmlt/core/transformations/spark_transformations/rename.py @@ -1,10 +1,9 @@ -# pylint: disable=line-too-long """Transformations for renaming Spark DataFrame columns. See `the architecture overview `_ for more information. """ -# pylint: enable=line-too-long + # TODO: Open question regarding "switching" column names. # SPDX-License-Identifier: Apache-2.0 @@ -103,9 +102,7 @@ class Rename(Transformation): 1 >>> rename_b_to_c.stability_function(2) 2 - """ # pylint: disable=line-too-long,useless-suppression - - # pylint: enable=line-too-long,useless-suppression + """ @typechecked def __init__( @@ -172,7 +169,6 @@ def rename_mapping(self) -> Dict[str, str]: """Returns mapping from old column names to new column names.""" return self._rename_mapping.copy() - # pylint: disable=line-too-long @typechecked def stability_function(self, d_in: ExactNumberInput) -> ExactNumber: """Returns the smallest d_out satisfied by the transformation. @@ -183,7 +179,6 @@ def stability_function(self, d_in: ExactNumberInput) -> ExactNumber: Args: d_in: Distance between inputs under input_metric. """ - # pylint: enable=line-too-long self.input_metric.validate(d_in) return ExactNumber(d_in) diff --git a/src/tmlt/core/transformations/spark_transformations/select.py b/src/tmlt/core/transformations/spark_transformations/select.py index 744375e..5d44b06 100644 --- a/src/tmlt/core/transformations/spark_transformations/select.py +++ b/src/tmlt/core/transformations/spark_transformations/select.py @@ -1,10 +1,9 @@ -# pylint: disable=line-too-long """Transformations for selecting columns from Spark DataFrames. See `the architecture overview `_ for more information. """ -# pylint: enable=line-too-long + # SPDX-License-Identifier: Apache-2.0 # Copyright Tumult Labs 2025 @@ -101,7 +100,7 @@ class Select(Transformation): 1 >>> drop_b.stability_function(2) 2 - """ # pylint: disable=line-too-long,useless-suppression + """ @typechecked def __init__( @@ -159,7 +158,6 @@ def columns(self) -> List[str]: """Returns columns being selected.""" return self._columns.copy() - # pylint: disable=line-too-long @typechecked def stability_function(self, d_in: ExactNumberInput) -> ExactNumber: """Returns the smallest d_out satisfied by the transformation. @@ -170,7 +168,6 @@ def stability_function(self, d_in: ExactNumberInput) -> ExactNumber: Args: d_in: Distance between inputs under input_metric. """ - # pylint: enable=line-too-long self.input_metric.validate(d_in) return ExactNumber(d_in) diff --git a/src/tmlt/core/transformations/spark_transformations/truncation.py b/src/tmlt/core/transformations/spark_transformations/truncation.py index 591e2fb..992f61a 100644 --- a/src/tmlt/core/transformations/spark_transformations/truncation.py +++ b/src/tmlt/core/transformations/spark_transformations/truncation.py @@ -100,7 +100,7 @@ class LimitRowsPerGroup(Transformation): 2 >>> truncate.stability_function(2) 4 - """ # pylint: disable=line-too-long,useless-suppression + """ @typechecked def __init__( @@ -154,7 +154,6 @@ def threshold(self) -> int: """Returns the maximum number of rows per group after truncation.""" return self._threshold - # pylint: disable=line-too-long @typechecked def stability_function(self, d_in: ExactNumberInput) -> ExactNumber: """Returns the smallest d_out satisfied by the transformation. @@ -165,7 +164,6 @@ def stability_function(self, d_in: ExactNumberInput) -> ExactNumber: Args: d_in: Distance between inputs under input_metric. """ - # pylint: enable=line-too-long self.input_metric.validate(d_in) if self.output_metric == SymmetricDifference(): return ExactNumber(d_in) * self.threshold @@ -268,7 +266,7 @@ class LimitKeysPerGroup(Transformation): 2 >>> truncate.stability_function(2) 4 - """ # pylint: disable=line-too-long + """ @typechecked def __init__( @@ -343,7 +341,6 @@ def threshold(self) -> int: """Returns the maximum number of keys per group after truncation.""" return self._threshold - # pylint: disable=line-too-long @typechecked def stability_function(self, d_in: ExactNumberInput) -> ExactNumber: """Returns the smallest d_out satisfied by the transformation. @@ -354,7 +351,6 @@ def stability_function(self, d_in: ExactNumberInput) -> ExactNumber: Args: d_in: Distance between inputs under input_metric. """ - # pylint: enable=line-too-long d_in = ExactNumber(d_in) self.input_metric.validate(d_in) if self.output_metric == IfGroupedBy( @@ -468,7 +464,7 @@ class LimitRowsPerKeyPerGroup(Transformation): 2 >>> truncate.stability_function(2) 4 - """ # pylint: disable=line-too-long + """ @typechecked def __init__( @@ -549,7 +545,6 @@ def threshold(self) -> int: """Returns the maximum number of rows each unique (key, grouping column value) pair may appear in after truncation.""" return self._threshold - # pylint: disable=line-too-long @typechecked def stability_function(self, d_in: ExactNumberInput) -> ExactNumber: """Returns the smallest d_out satisfied by the transformation. @@ -560,7 +555,6 @@ def stability_function(self, d_in: ExactNumberInput) -> ExactNumber: Args: d_in: Distance between inputs under input_metric. """ - # pylint: enable=line-too-long d_in = ExactNumber(d_in) self.input_metric.validate(d_in) if self.input_metric == IfGroupedBy( diff --git a/src/tmlt/core/utils/arb.py b/src/tmlt/core/utils/arb.py index 3f06dea..e2c0c66 100644 --- a/src/tmlt/core/utils/arb.py +++ b/src/tmlt/core/utils/arb.py @@ -9,14 +9,12 @@ import platform from typing import Any, List, Tuple, Union -# pylint: disable=protected-access - # importlib.resources.path was deprecated in Python 3.11, and then un-deprecated # in 3.13, so there's not actually a problem here. It's possible this code will # need to be tweaked slightly for 3.13 support, as there were some changes to # the API, but they don't obviously affect this code. -# pylint: disable=deprecated-method + if platform.system() == "Windows": with ( importlib.resources.path("tmlt.core.ext.lib", "libarb.dll") as _arb_path, @@ -59,7 +57,6 @@ "Unrecognized platform. Expected platform.system() to be one of" f" 'Windows', 'Linux', or 'Darwin' not ({platform.system()})." ) -# pylint: enable=deprecated-method class _PtrStruct(ctypes.Structure): @@ -372,10 +369,10 @@ def man_exp(self) -> Tuple[int, int]: x = self._ptr.contents.mid # Per the docs, the initializer for ctypes.c_long is optional, but # pylint thinks it is required. - # pylint: disable=no-value-for-parameter, useless-suppression + man_ptr = ctypes.pointer(ctypes.c_long()) exp_ptr = ctypes.pointer(ctypes.c_long()) - # pylint: enable=no-value-for-parameter, useless-suppression + arblib.arf_get_fmpz_2exp(man_ptr, exp_ptr, ctypes.byref(x)) return _fmpz_t_to_int(man_ptr), _fmpz_t_to_int(exp_ptr) @@ -413,10 +410,8 @@ def radius(self) -> "Arb": arblib.arb_get_rad_arb(x, self._ptr) return Arb(x) - # pylint: disable=line-too-long def __contains__(self, value: Any) -> bool: """Returns True if value is contained in the interval represented by ``self``.""" - # pylint: enable=line-too-long if isinstance(value, Arb): return arblib.arb_contains(self._ptr, value._ptr) != 0 return False @@ -632,9 +627,8 @@ def _int_to_fmpz_t(val: int) -> "ctypes._PointerLike": Args: val: Integer to convert. """ - # pylint: disable-next=no-value-for-parameter, useless-suppression fmpz_pointer = ctypes.pointer(ctypes.c_long()) - s = "%x" % int(val) # pylint: disable=consider-using-f-string + s = "%x" % int(val) val_c_string = ctypes.c_char_p(s.encode("ascii")) flintlib.fmpz_set_str(fmpz_pointer, val_c_string, 16) return fmpz_pointer diff --git a/src/tmlt/core/utils/exact_number.py b/src/tmlt/core/utils/exact_number.py index c5ad35b..a714f21 100644 --- a/src/tmlt/core/utils/exact_number.py +++ b/src/tmlt/core/utils/exact_number.py @@ -130,7 +130,7 @@ -sqrt(2) >>> 2 / ExactNumber(6) 1/3 -""" # pylint: disable=line-too-long,useless-suppression +""" # SPDX-License-Identifier: Apache-2.0 # Copyright Tumult Labs 2025 diff --git a/src/tmlt/core/utils/join.py b/src/tmlt/core/utils/join.py index 7820e8d..2e5ea63 100644 --- a/src/tmlt/core/utils/join.py +++ b/src/tmlt/core/utils/join.py @@ -91,7 +91,7 @@ def columns_after_join( name. how: Join type. Must be one of "left", "right", "inner", "outer", "left_anti". This defaults to "inner". - """ # pylint: disable=line-too-long,useless-suppression + """ if on is None: on = natural_join_columns(left_columns, right_columns) @@ -442,7 +442,7 @@ def _rename_columns( * Right dataframe with renamed columns. * Mapping from output column name to (left column name, right column name). See :func:`columns_after_join`. - """ # pylint: disable=line-too-long,useless-suppression + """ output_columns = columns_after_join( left_columns=left.columns, right_columns=right.columns, on=on, how=how ) diff --git a/src/tmlt/core/utils/prdp.py b/src/tmlt/core/utils/prdp.py index f69d1a7..deac0d0 100644 --- a/src/tmlt/core/utils/prdp.py +++ b/src/tmlt/core/utils/prdp.py @@ -106,7 +106,7 @@ def square_root_gaussian_inverse_cdf(x: Arb, sigma: Arb, prec: int) -> Arb: \end{cases} \end{equation} - """ # pylint: disable=line-too-long,useless-suppression + """ if x == Arb.from_float(0.5): return Arb.from_int(0) @@ -197,7 +197,7 @@ def exponential_polylogarithmic_inverse_cdf( 0 & x = \frac{1}{2} \end{cases} - """ # pylint: disable=line-too-long,useless-suppression + """ if x == Arb.from_float(0.5): return Arb.from_int(0) diff --git a/src/tmlt/core/utils/testing.py b/src/tmlt/core/utils/testing.py index f00794f..bc8057b 100644 --- a/src/tmlt/core/utils/testing.py +++ b/src/tmlt/core/utils/testing.py @@ -582,7 +582,6 @@ class Case: For usage information, see :func:`~tmlt.core.utils.testing.parametrize`. """ - # pylint: disable-next=redefined-builtin def __init__(self, id: Optional[str] = None, **kwargs: Any): """Constructor. diff --git a/src/tmlt/core/utils/type_utils.py b/src/tmlt/core/utils/type_utils.py index 7df1399..8bbfdab 100644 --- a/src/tmlt/core/utils/type_utils.py +++ b/src/tmlt/core/utils/type_utils.py @@ -7,8 +7,6 @@ from types import FunctionType from typing import Any, NoReturn, Sequence, Tuple, Type -# pylint: disable=cyclic-import, import-outside-toplevel - def assert_never(x: NoReturn) -> NoReturn: """Assertion for statically checking exhaustive pattern matches. diff --git a/test/system/measurements/test_interactive_measurements.py b/test/system/measurements/test_interactive_measurements.py index f1c20e2..c6d2903 100644 --- a/test/system/measurements/test_interactive_measurements.py +++ b/test/system/measurements/test_interactive_measurements.py @@ -79,9 +79,7 @@ def test_queue_transformation_on_inactive_accountant(self) -> None: self.assertEqual(self.accountant.input_domain, transformed_domain) self.assertEqual(self.accountant.input_metric, transformed_metric) self.assertEqual(self.accountant.d_in, transformed_d_in) - self.assertIsNotNone( - self.accountant._pending_transformation # pylint: disable=protected-access - ) + self.assertIsNotNone(self.accountant._pending_transformation) for c in child_accountants: c.retire() @@ -89,9 +87,7 @@ def test_queue_transformation_on_inactive_accountant(self) -> None: # Once the accountant is active again, the transformation should have # been run self.assertEqual(self.accountant.state, PrivacyAccountantState.ACTIVE) - # pylint: disable=protected-access self.assertEqual(self.accountant._input_domain, transformed_domain) self.assertEqual(self.accountant._input_metric, transformed_metric) self.assertEqual(self.accountant._d_in, transformed_d_in) self.assertIsNone(self.accountant._pending_transformation) - # pylint: enable=protected-access diff --git a/test/unit/budget_abstract.py b/test/unit/budget_abstract.py index 24a6c80..7616722 100644 --- a/test/unit/budget_abstract.py +++ b/test/unit/budget_abstract.py @@ -4,8 +4,6 @@ # Copyright Tumult Labs 2025 import copy - -# pylint: disable=unused-import from abc import ABC, abstractmethod from contextlib import nullcontext as does_not_raise from typing import Any, Callable, ContextManager, Dict, Optional, Type diff --git a/test/unit/domains/abstract.py b/test/unit/domains/abstract.py index 566649a..163f413 100644 --- a/test/unit/domains/abstract.py +++ b/test/unit/domains/abstract.py @@ -4,8 +4,6 @@ # Copyright Tumult Labs 2025 import copy - -# pylint: disable=unused-import from abc import ABC, abstractmethod from contextlib import nullcontext as does_not_raise from typing import Any, Callable, ContextManager, Dict, Optional, Type diff --git a/test/unit/domains/test_numpy_domains.py b/test/unit/domains/test_numpy_domains.py index 85d454f..decbd06 100644 --- a/test/unit/domains/test_numpy_domains.py +++ b/test/unit/domains/test_numpy_domains.py @@ -801,9 +801,7 @@ def test_validate( @pytest.mark.parametrize( "dtype, expected, expectation", [ - # pylint: disable=line-too-long (np.dtype(np.object0), NumpyStringDomain(), does_not_raise()), # type: ignore[attr-defined] - # pylint: enable=line-too-long ( np.dtype([("f1", np.int64)]), None, diff --git a/test/unit/domains/test_pandas_domains.py b/test/unit/domains/test_pandas_domains.py index 851b91f..1bcd078 100644 --- a/test/unit/domains/test_pandas_domains.py +++ b/test/unit/domains/test_pandas_domains.py @@ -259,7 +259,6 @@ def test_validate( @pytest.mark.parametrize( "dtype, expected, expectation", - # pylint: disable=line-too-long [ ( np.dtype(dtype), @@ -268,7 +267,6 @@ def test_validate( ) for dtype in [np.int8, np.int16, np.int32, np.bool8] # type: ignore[attr-defined] ] - # pylint: enable=line-too-long + [ ( np.dtype(np.float32), @@ -290,9 +288,7 @@ def test_validate( PandasSeriesDomain(NumpyIntegerDomain(size=64)), does_not_raise(), ), - # pylint: disable=line-too-long (np.dtype([("f1", np.int16)]), None, pytest.raises(KeyError)), # type: ignore - # pylint: enable=line-too-long ], ) def test_from_numpy_type( diff --git a/test/unit/domains/test_spark_domains.py b/test/unit/domains/test_spark_domains.py index 5f30ee5..fdf568f 100644 --- a/test/unit/domains/test_spark_domains.py +++ b/test/unit/domains/test_spark_domains.py @@ -921,10 +921,8 @@ def test_validate( # Separate asserts for the frames are required since GroupedDataFrame does not # implement __eq__. assert_frame_equal_with_sort( - exception_properties[ # pylint: disable=protected-access - "value" - ]._dataframe, - core_exception.value._dataframe, # pylint: disable=protected-access + exception_properties["value"]._dataframe, + core_exception.value._dataframe, ) assert_frame_equal_with_sort( exception_properties["value"].group_keys, core_exception.value.group_keys @@ -1342,7 +1340,7 @@ def test_to_numpy_domain_invalid( ValueError, match="Column must be " f"{get_fullname(_type_to_spark_type[col_type])}; got " - f"{get_fullname(_type_to_spark_type[_col_name_to_type[col_name]])} " # pylint: disable=line-too-long + f"{get_fullname(_type_to_spark_type[_col_name_to_type[col_name]])} " "instead", ) ), diff --git a/test/unit/measurements/abstract.py b/test/unit/measurements/abstract.py index 437edc5..281fcc5 100644 --- a/test/unit/measurements/abstract.py +++ b/test/unit/measurements/abstract.py @@ -4,8 +4,6 @@ # Copyright Tumult Labs 2025 import copy - -# pylint: disable=unused-import from abc import ABC, abstractmethod from contextlib import nullcontext as does_not_raise from typing import Any, Callable, ContextManager, Dict, Optional, Type diff --git a/test/unit/measurements/test_aggregations.py b/test/unit/measurements/test_aggregations.py index 94202d5..ee9fd63 100644 --- a/test/unit/measurements/test_aggregations.py +++ b/test/unit/measurements/test_aggregations.py @@ -79,7 +79,7 @@ ), ] -# pylint: disable=no-member + # Disabling no-member because groupby_columns are defined in the setup function. @@ -1634,9 +1634,6 @@ def test_functions_without_noise_mechanism( ] -# pylint: disable=redefined-outer-name - - # request is of class "FixtureRequest", which is imported from _pytest.fixtures # using type "Any" to avoid protected access. @pytest.fixture( @@ -1805,9 +1802,6 @@ def test_groupbyvar(spark_data): pd.testing.assert_frame_equal(expected_sorted, output_sorted) -# pylint: enable=redefined-outer-name - - class TestBounds: """Correctness tests for class :func:`~.create_bounds_measurement`.""" diff --git a/test/unit/measurements/test_interactive_measurements.py b/test/unit/measurements/test_interactive_measurements.py index 84cb665..6d72624 100644 --- a/test/unit/measurements/test_interactive_measurements.py +++ b/test/unit/measurements/test_interactive_measurements.py @@ -149,7 +149,6 @@ def test_privacy_function_invalid_d_in(self): def test_correctness(self): """SequentialComposition returns the expected Queryable object.""" - # pylint: disable=protected-access actual = self.measurement(self.data) self.assertIsInstance(actual, SequentialQueryable) self.assertEqual(actual._input_domain, self.measurement.input_domain) @@ -353,7 +352,6 @@ def test_privacy_function(self): def test_correctness(self): """ParallelComposition returns the expected Queryable object.""" - # pylint: disable=protected-access actual = self.measurement(self.data) self.assertIsInstance(actual, ParallelQueryable) self.assertEqual(actual._next_index, 0) @@ -423,7 +421,7 @@ def test_retirable_queryable_returned(self): self.assertIsInstance(actual, RetirableQueryable) self.assertEqual( - actual._inner_queryable, # pylint: disable=protected-access + actual._inner_queryable, returned_queryable, ) @@ -436,9 +434,7 @@ def test_retire_works_recursively(self): ) inner_most_queryable = queryable(None)(None) queryable(RetireQuery()) - self.assertTrue( - inner_most_queryable._is_retired # pylint: disable=protected-access - ) + self.assertTrue(inner_most_queryable._is_retired) def test_retire_works_when_descendant_is_retired(self): """RetirableQueryable can be retired even when a descendant is retired.""" @@ -521,9 +517,7 @@ def test_constructor_mutable_arguments(self): ) d_in["A"] = 1 d_in["B"] = 2 - self.assertDictEqual( - queryable._d_in, {"A": 2} # pylint: disable=protected-access - ) + self.assertDictEqual(queryable._d_in, {"A": 2}) def test_queryable_budget_is_decreased_correctly(self): """SequentialQueryable's internal budget is correctly decreased on query.""" @@ -540,7 +534,7 @@ def test_queryable_budget_is_decreased_correctly(self): ) ) self.assertEqual( - queryable._remaining_budget.value, # pylint: disable=protected-access + queryable._remaining_budget.value, self.budget_quarters[3], ) @@ -672,7 +666,7 @@ def test_measurement_query_with_explicit_d_out( ) ) self.assertEqual( - queryable._remaining_budget.value, # pylint: disable=protected-access + queryable._remaining_budget.value, self.budget_quarters[2], ) @@ -711,7 +705,7 @@ def test_transformation_query_with_explicit_d_out( d_out=3, ) ) - self.assertEqual(queryable._d_in, 3) # pylint: disable=protected-access + self.assertEqual(queryable._d_in, 3) def test_transformation_query_stability_relation_returns_false(self): """SequentialQueryable raises error if stability relation is not True.""" @@ -733,7 +727,6 @@ def test_transformation_query_stability_relation_returns_false(self): def test_transformation_query(self): """SequentialQueryable processes TransformationQuery correctly.""" - # pylint: disable=protected-access queryable = self.construct_queryable() transformation = create_mock_transformation( return_value=np.float64(100.0), @@ -1175,10 +1168,9 @@ def test_transform_in_place(self): self.assertEqual(accountant.input_domain, NumpyIntegerDomain()) self.assertEqual(accountant.input_metric, AbsoluteDifference()) self.assertEqual(accountant.d_in, 10) - # pylint: disable=protected-access + self.assertIsNotNone(accountant._queryable) self.assertEqual(accountant._queryable._data, np.int64(2)) # type: ignore - # pylint: enable=protected-access def test_transform_with_explicit_d_out(self): """PrivacyAccountant.transform_in_place works with a d_out provided.""" @@ -1203,7 +1195,7 @@ def test_transform_with_explicit_d_out(self): self.assertEqual(accountant.input_domain, NumpyIntegerDomain()) self.assertEqual(accountant.input_metric, AbsoluteDifference()) self.assertEqual(accountant.d_in, 10) - # pylint: disable=protected-access + self.assertIsNotNone(accountant._queryable) self.assertEqual(accountant._queryable._data, np.int64(2)) # type: ignore @@ -1497,11 +1489,10 @@ def test_queue_transformation_on_active_accountant(self): self.assertEqual(accountant.input_domain, NumpyIntegerDomain()) self.assertEqual(accountant.input_metric, AbsoluteDifference()) self.assertEqual(accountant.d_in, 10) - # pylint: disable=protected-access + self.assertIsNotNone(accountant._queryable) self.assertEqual(accountant._queryable._data, np.int64(2)) # type: ignore self.assertIsNone(accountant._pending_transformation) - # pylint: enable=protected-access def test_queue_transformation_on_inactive_accountant(self): """queue_transformation queues transformations on inactive account""" @@ -1545,9 +1536,7 @@ def test_queue_transformation_on_inactive_accountant(self): self.assertEqual(accountant.input_domain, NumpyIntegerDomain()) self.assertEqual(accountant.input_metric, AbsoluteDifference()) self.assertEqual(accountant.d_in, 10) - self.assertIsNotNone( - accountant._pending_transformation # pylint: disable=protected-access - ) + self.assertIsNotNone(accountant._pending_transformation) for c in child_accountants: c.retire() @@ -1558,11 +1547,10 @@ def test_queue_transformation_on_inactive_accountant(self): self.assertEqual(accountant.input_domain, NumpyIntegerDomain()) self.assertEqual(accountant.input_metric, AbsoluteDifference()) self.assertEqual(accountant.d_in, 10) - # pylint: disable=protected-access + self.assertIsNotNone(accountant._queryable) self.assertEqual(accountant._queryable._data, np.int64(2)) # type: ignore self.assertIsNone(accountant._pending_transformation) - # pylint: enable=protected-access @parameterized.expand( [ @@ -1700,9 +1688,7 @@ def test_queue_invalid_transformation_with_transform_in_queue( stability_function_implemented=True, ) accountant.queue_transformation(identity_transformation) - self.assertIsNotNone( - accountant._pending_transformation # pylint: disable=protected-access - ) + self.assertIsNotNone(accountant._pending_transformation) with self.assertRaisesRegex(ValueError, error_message): accountant.queue_transformation(transformation=transformation, d_out=d_out) @@ -1711,9 +1697,7 @@ def test_force_activate_raises_error_on_invalid_states(self): accountant = PrivacyAccountant.launch( measurement=self.measurement, data=self.data ) - accountant._state = ( # pylint: disable=protected-access - PrivacyAccountantState.RETIRED - ) + accountant._state = PrivacyAccountantState.RETIRED with self.assertRaisesRegex( RuntimeError, "Can not activate RETIRED PrivacyAccountant" ): @@ -1764,7 +1748,6 @@ def test_force_activate_waiting_for_siblings(self): def test_retire_raises_error_appropriately(self): """PrivacyAccountant.retire raises error appropriately.""" - # pylint: disable=protected-access accountant = PrivacyAccountant.launch( measurement=self.measurement, data=self.data ) @@ -1905,7 +1888,6 @@ def test_privacy_function(self): def test_correctness(self): """SequentialComposition returns the expected Queryable object.""" - # pylint: disable=protected-access actual = self.measurement(np.int64(10)) self.assertIsInstance(actual, DecoratedQueryable) self.assertEqual(actual._preprocess_query, self.measurement.preprocess_query) @@ -2019,11 +2001,11 @@ def test_create_adaptive_composition(self): adaptive_composition.measurement.input_metric, AbsoluteDifference() ) self.assertEqual(adaptive_composition.measurement.d_in, 1) # type: ignore - # pylint: disable=line-too-long + self.assertEqual( adaptive_composition.measurement.privacy_budget, self.privacy_budget # type: ignore ) - # pylint: enable=line-too-long + self.assertEqual( adaptive_composition.measurement.output_measure, self.output_measure ) diff --git a/test/unit/measures_abstract.py b/test/unit/measures_abstract.py index 0f9b017..07fd661 100644 --- a/test/unit/measures_abstract.py +++ b/test/unit/measures_abstract.py @@ -4,8 +4,6 @@ # Copyright Tumult Labs 2025 import copy - -# pylint: disable=unused-import from abc import ABC, abstractmethod from contextlib import nullcontext as does_not_raise from typing import Any, Callable, ContextManager, Dict, Optional, Type diff --git a/test/unit/metric_abstract.py b/test/unit/metric_abstract.py index 2347443..f6396aa 100644 --- a/test/unit/metric_abstract.py +++ b/test/unit/metric_abstract.py @@ -4,8 +4,6 @@ # Copyright Tumult Labs 2025 import copy - -# pylint: disable=unused-import from abc import ABC, abstractmethod from contextlib import nullcontext as does_not_raise from typing import Any, Callable, ContextManager, Dict, Optional, Type diff --git a/test/unit/random/test_rng.py b/test/unit/random/test_rng.py index c48fb65..303049b 100644 --- a/test/unit/random/test_rng.py +++ b/test/unit/random/test_rng.py @@ -11,8 +11,6 @@ import tmlt.core.random.rng -# pylint: disable=import-outside-toplevel, no-name-in-module - class TestRNG(TestCase): """Tests for :func:`~.laplace_inverse_cdf`.""" diff --git a/test/unit/transformations/abstract.py b/test/unit/transformations/abstract.py index a7f2fea..0d4d499 100644 --- a/test/unit/transformations/abstract.py +++ b/test/unit/transformations/abstract.py @@ -4,8 +4,6 @@ # Copyright Tumult Labs 2025 import copy - -# pylint: disable=unused-import from abc import ABC, abstractmethod from contextlib import nullcontext as does_not_raise from test.conftest import assert_frame_equal_with_sort diff --git a/test/unit/transformations/spark_transformations/test_add_remove_keys.py b/test/unit/transformations/spark_transformations/test_add_remove_keys.py index 3c08d29..74f4f7b 100644 --- a/test/unit/transformations/spark_transformations/test_add_remove_keys.py +++ b/test/unit/transformations/spark_transformations/test_add_remove_keys.py @@ -56,8 +56,6 @@ get_all_props, ) -# pylint: disable=no-member - @parameterized_class( [ diff --git a/test/unit/transformations/spark_transformations/test_agg.py b/test/unit/transformations/spark_transformations/test_agg.py index 09eb233..a419d92 100644 --- a/test/unit/transformations/spark_transformations/test_agg.py +++ b/test/unit/transformations/spark_transformations/test_agg.py @@ -1251,11 +1251,10 @@ def test_create_count_distinct_aggregation( ) # the "type: ignore" comment makes the next line 2 characters too long. - # pylint: disable=line-too-long + count_distinct_transformation = create_count_distinct_aggregation( # type: ignore input_domain=input_domain, input_metric=input_metric ) - # pylint: enable=line-too-long self.assertTrue( isinstance( diff --git a/test/unit/transformations/spark_transformations/test_groupby.py b/test/unit/transformations/spark_transformations/test_groupby.py index 2a72bb3..b7a3522 100644 --- a/test/unit/transformations/spark_transformations/test_groupby.py +++ b/test/unit/transformations/spark_transformations/test_groupby.py @@ -196,7 +196,7 @@ def test_correctness(self): grouped_dataframe = groupby_transformation(self.df) self.assertTrue(isinstance(grouped_dataframe, GroupedDataFrame)) self.assert_frame_equal_with_sort( - grouped_dataframe._dataframe.toPandas(), # pylint: disable=protected-access + grouped_dataframe._dataframe.toPandas(), self.df.toPandas(), ) self.assert_frame_equal_with_sort( @@ -214,7 +214,7 @@ def test_total(self): grouped_dataframe = groupby_transformation(self.df) self.assertTrue(isinstance(grouped_dataframe, GroupedDataFrame)) self.assert_frame_equal_with_sort( - grouped_dataframe._dataframe.toPandas(), # pylint: disable=protected-access + grouped_dataframe._dataframe.toPandas(), self.df.toPandas(), ) self.assert_frame_equal_with_sort( diff --git a/test/unit/transformations/spark_transformations/test_join.py b/test/unit/transformations/spark_transformations/test_join.py index 59de38b..f7ae50b 100644 --- a/test/unit/transformations/spark_transformations/test_join.py +++ b/test/unit/transformations/spark_transformations/test_join.py @@ -1206,7 +1206,7 @@ class TestPrivateJoinOnKey(PySparkTest): """Tests for class PrivateJoinOnKey. Tests :class:`~tmlt.core.transformations.spark_transformations.join.PrivateJoinOnKey`. - """ # pylint: disable=line-too-long + """ def setUp(self): """Setup.""" diff --git a/test/unit/transformations/spark_transformations/test_persist.py b/test/unit/transformations/spark_transformations/test_persist.py index efa4523..b3411ad 100644 --- a/test/unit/transformations/spark_transformations/test_persist.py +++ b/test/unit/transformations/spark_transformations/test_persist.py @@ -86,7 +86,6 @@ def test_property_immutability(self, prop_name: str): def test_correctness(self): """SparkAction makes Spark evaluate and persist a DataFrame immediately.""" - # pylint: disable=protected-access df = self.spark.createDataFrame([(1,)], schema=["A"]).persist() assert df.is_cached # this will assert that the list is empty diff --git a/test/unit/utils/test_arb.py b/test/unit/utils/test_arb.py index 1c2965e..0ae6e33 100644 --- a/test/unit/utils/test_arb.py +++ b/test/unit/utils/test_arb.py @@ -9,7 +9,6 @@ from parameterized import parameterized -# pylint: disable=no-name-in-module,useless-suppression # This suppression is useless on Python 3.12 but does something on Python 3.8. from scipy.special import erf, erfc @@ -35,8 +34,6 @@ arb_union, ) -# pylint: enable=no-name-in-module,useless-suppression - class TestArb(TestCase): """Tests for :class:`tmlt.core.utils.arb.Arb`.""" diff --git a/test/unit/utils/test_grouped_dataframe.py b/test/unit/utils/test_grouped_dataframe.py index 8c61f41..6bb197c 100644 --- a/test/unit/utils/test_grouped_dataframe.py +++ b/test/unit/utils/test_grouped_dataframe.py @@ -69,9 +69,7 @@ def test_select_works_correctly(self): group_keys=self.spark.createDataFrame([(1,), (1,)], schema=["A"]), ) expected = pd.DataFrame({"A": [1], "B": [2]}) - actual = grouped_dataframe.select( # pylint:disable=protected-access - ["A", "B"] - )._dataframe.toPandas() + actual = grouped_dataframe.select(["A", "B"])._dataframe.toPandas() self.assert_frame_equal_with_sort(actual, expected) def test_agg_with_nulls(self) -> None: diff --git a/tutorials/FirstSteps.ipynb b/tutorials/FirstSteps.ipynb index bffe252..7bdbd04 100644 --- a/tutorials/FirstSteps.ipynb +++ b/tutorials/FirstSteps.ipynb @@ -67,7 +67,7 @@ ").getOrCreate()\n", "spark.sparkContext.setLogLevel(\"ERROR\")\n", "\n", - "sdf = spark.createDataFrame( # pylint: disable=no-member\n", + "sdf = spark.createDataFrame(\n", " pd.DataFrame(\n", " [\n", " [\"F\", 28],\n", diff --git a/uv.lock b/uv.lock index cfc6f5c..7238dcb 100644 --- a/uv.lock +++ b/uv.lock @@ -417,15 +417,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/99/c7/d1ec24fb280caa5a79b6b950db565dab30210a66259d17d5bb2b3a9f878d/dependency_groups-1.3.1-py3-none-any.whl", hash = "sha256:51aeaa0dfad72430fcfb7bcdbefbd75f3792e5919563077f30bc0d73f4493030", size = 8664, upload-time = "2025-05-02T00:34:27.085Z" }, ] -[[package]] -name = "dill" -version = "0.4.0" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/12/80/630b4b88364e9a8c8c5797f4602d0f76ef820909ee32f0bacb9f90654042/dill-0.4.0.tar.gz", hash = "sha256:0633f1d2df477324f53a895b02c901fb961bdbf65a17122586ea7019292cbcf0", size = 186976, upload-time = "2025-04-16T00:41:48.867Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/50/3d/9373ad9c56321fdab5b41197068e1d8c25883b3fea29dd361f9b55116869/dill-0.4.0-py3-none-any.whl", hash = "sha256:44f54bf6412c2c8464c14e8243eb163690a9800dbe2c367330883b19c7561049", size = 119668, upload-time = "2025-04-16T00:41:47.671Z" }, -] - [[package]] name = "distlib" version = "0.3.9" @@ -777,15 +768,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/6a/b9/59e120d24a2ec5fc2d30646adb2efb4621aab3c6d83d66fb2a7a182db032/matplotlib-3.10.3-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cb73d8aa75a237457988f9765e4dfe1c0d2453c5ca4eabc897d4309672c8e014", size = 8594298, upload-time = "2025-05-08T19:10:51.738Z" }, ] -[[package]] -name = "mccabe" -version = "0.7.0" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/e7/ff/0ffefdcac38932a54d2b5eed4e0ba8a408f215002cd178ad1df0f2806ff8/mccabe-0.7.0.tar.gz", hash = "sha256:348e0240c33b60bbdf4e523192ef919f28cb2c3d7d5c7794f74009290f236325", size = 9658, upload-time = "2022-01-24T01:14:51.113Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/27/1a/1f68f9ba0c207934b35b86a8ca3aad8395a3d6dd7921c0686e23853ff5a9/mccabe-0.7.0-py2.py3-none-any.whl", hash = "sha256:6c2d30ab6be0e4a46919781807b4f0d834ebdd6c6e3dca0bda5a15f863427b6e", size = 7350, upload-time = "2022-01-24T01:14:49.62Z" }, -] - [[package]] name = "mdurl" version = "0.1.2" @@ -1349,23 +1331,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/e2/0d/8ba33fa83a7dcde13eb3c1c2a0c1cc29950a048bfed6d9b0d8b6bd710b4c/pydata_sphinx_theme-0.16.1-py3-none-any.whl", hash = "sha256:225331e8ac4b32682c18fcac5a57a6f717c4e632cea5dd0e247b55155faeccde", size = 6723264, upload-time = "2024-12-17T10:53:35.645Z" }, ] -[[package]] -name = "pydocstyle" -version = "6.3.0" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "snowballstemmer" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/e9/5c/d5385ca59fd065e3c6a5fe19f9bc9d5ea7f2509fa8c9c22fb6b2031dd953/pydocstyle-6.3.0.tar.gz", hash = "sha256:7ce43f0c0ac87b07494eb9c0b462c0b73e6ff276807f204d6b53edc72b7e44e1", size = 36796, upload-time = "2023-01-17T20:29:19.838Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/36/ea/99ddefac41971acad68f14114f38261c1f27dac0b3ec529824ebc739bdaa/pydocstyle-6.3.0-py3-none-any.whl", hash = "sha256:118762d452a49d6b05e194ef344a55822987a462831ade91ec5c06fd2169d019", size = 38038, upload-time = "2023-01-17T20:29:18.094Z" }, -] - -[package.optional-dependencies] -toml = [ - { name = "tomli", marker = "python_full_version < '3.11'" }, -] - [[package]] name = "pygments" version = "2.19.1" @@ -1375,25 +1340,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/8a/0b/9fcc47d19c48b59121088dd6da2488a49d5f72dacf8262e2790a1d2c7d15/pygments-2.19.1-py3-none-any.whl", hash = "sha256:9ea1544ad55cecf4b8242fab6dd35a93bbce657034b0611ee383099054ab6d8c", size = 1225293, upload-time = "2025-01-06T17:26:25.553Z" }, ] -[[package]] -name = "pylint" -version = "3.3.7" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "astroid" }, - { name = "colorama", marker = "sys_platform == 'win32'" }, - { name = "dill" }, - { name = "isort" }, - { name = "mccabe" }, - { name = "platformdirs" }, - { name = "tomli", marker = "python_full_version < '3.11'" }, - { name = "tomlkit" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/1c/e4/83e487d3ddd64ab27749b66137b26dc0c5b5c161be680e6beffdc99070b3/pylint-3.3.7.tar.gz", hash = "sha256:2b11de8bde49f9c5059452e0c310c079c746a0a8eeaa789e5aa966ecc23e4559", size = 1520709, upload-time = "2025-05-04T17:07:51.089Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/e8/83/bff755d09e31b5d25cc7fdc4bf3915d1a404e181f1abf0359af376845c24/pylint-3.3.7-py3-none-any.whl", hash = "sha256:43860aafefce92fca4cf6b61fe199cdc5ae54ea28f9bf4cd49de267b5195803d", size = 522565, upload-time = "2025-05-04T17:07:48.714Z" }, -] - [[package]] name = "pyparsing" version = "3.2.3" @@ -2005,14 +1951,6 @@ isort = [ mypy = [ { name = "mypy" }, ] -pydocstyle = [ - { name = "pydocstyle", extra = ["toml"] }, -] -pylint = [ - { name = "parameterized" }, - { name = "pylint" }, - { name = "pytest" }, -] ruff = [ { name = "ruff" }, ] @@ -2077,12 +2015,6 @@ docs-examples = [ ] isort = [{ name = "isort", specifier = ">=5.11,<6" }] mypy = [{ name = "mypy", specifier = ">=1.14.0" }] -pydocstyle = [{ name = "pydocstyle", extras = ["toml"], specifier = ">=6.3" }] -pylint = [ - { name = "parameterized" }, - { name = "pylint", specifier = ">=3.2.5" }, - { name = "pytest" }, -] ruff = [{ name = "ruff", specifier = ">=0.14.3,<1" }] scripting = [ { name = "nox", specifier = ">=2024.3.2" }, From 2a3501229c9ad9443779ff5d6c4fb061b0d45d61 Mon Sep 17 00:00:00 2001 From: Tom Magerlein Date: Thu, 6 Nov 2025 21:35:05 -0500 Subject: [PATCH 4/9] Enable pyflakes, pycodestyle lints These lints cover some of what pylint previously did for us -- ruff doesn't include them in both rulesets because they are redundant. --- pyproject.toml | 9 +++++++-- src/tmlt/core/__init__.py | 6 +++++- src/tmlt/core/measurements/pandas_measurements/series.py | 2 +- src/tmlt/core/measurements/spark_measurements.py | 1 - src/tmlt/core/metrics.py | 2 -- src/tmlt/core/transformations/converters.py | 2 +- .../system/measurements/test_interactive_measurements.py | 2 +- test/unit/budget_abstract.py | 2 -- test/unit/domains/abstract.py | 1 - test/unit/measurements/abstract.py | 1 - test/unit/measures_abstract.py | 1 - test/unit/metric_abstract.py | 1 - test/unit/transformations/abstract.py | 1 - 13 files changed, 15 insertions(+), 16 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index ad42fd2..9b5cd11 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -194,8 +194,10 @@ source = ["src/tmlt/core", ".nox/**/site-packages/tmlt/core"] # Linter configuration [tool.ruff.lint] -# Enable Ruff-specific lints plus Pylint and pydocstyle -select = ["RUF", "PL", "D"] +# Enable Ruff-specific lints plus Pylint, pydocstyle, pyflakes, and pycodestyle. +# The latter two cover many lints that we previously used pylint for, but +# because they are overlapping Ruff only implements them in one set of rules. +select = ["RUF", "PL", "D", "F", "E", "W"] ignore = [ "RUF002", # ambiguous-unicode-character-docstring -- doesn't like unicode Greek letters "PLR09", # too-many-* @@ -222,6 +224,9 @@ ignore = [ "D210", # surrounding-whitespace "D102", # undocumented-public-method "PLW2901", # redefined-loop-name + "E501", # line-too-long + "E731", # lambda-assignment + "E741", # ambiguous-variable-name ] [tool.ruff.lint.pydocstyle] diff --git a/src/tmlt/core/__init__.py b/src/tmlt/core/__init__.py index f4bdc18..4a9d65f 100644 --- a/src/tmlt/core/__init__.py +++ b/src/tmlt/core/__init__.py @@ -6,7 +6,9 @@ import warnings import pandas as pd -import setuptools # TODO(#3258): This import provides a workaround for a bug in PySpark + +# TODO(#3258): This import provides a workaround for a bug in PySpark +import setuptools # noqa: F401 import typeguard # This version file is populated during build -- do not commit it. @@ -15,6 +17,8 @@ except ImportError: from tmlt.core._version import __version__ +__all__ = ["__version__"] + # By default, typeguard only checks the first element lists, but we want to # check the type of every list item. typeguard.config.collection_check_strategy = typeguard.CollectionCheckStrategy.ALL_ITEMS diff --git a/src/tmlt/core/measurements/pandas_measurements/series.py b/src/tmlt/core/measurements/pandas_measurements/series.py index e8a20a4..6f56e61 100644 --- a/src/tmlt/core/measurements/pandas_measurements/series.py +++ b/src/tmlt/core/measurements/pandas_measurements/series.py @@ -256,7 +256,7 @@ def __init__( noise_measurement: Noise Measurement to be applied to each element in input pandas Series. """ - if not noise_measurement.output_measure in [PureDP(), RhoZCDP()]: + if noise_measurement.output_measure not in [PureDP(), RhoZCDP()]: raise AssertionError( "This is probably a bug; please let us know so we can fix it!" ) diff --git a/src/tmlt/core/measurements/spark_measurements.py b/src/tmlt/core/measurements/spark_measurements.py index 0a3dea7..d4c8ead 100644 --- a/src/tmlt/core/measurements/spark_measurements.py +++ b/src/tmlt/core/measurements/spark_measurements.py @@ -19,7 +19,6 @@ from typeguard import typechecked # cleanup is imported just so its cleanup function runs at exit -import tmlt.core.utils.cleanup from tmlt.core.domains.spark_domains import ( SparkDataFrameDomain, SparkFloatColumnDescriptor, diff --git a/src/tmlt/core/metrics.py b/src/tmlt/core/metrics.py index 892b0ec..8180a19 100644 --- a/src/tmlt/core/metrics.py +++ b/src/tmlt/core/metrics.py @@ -10,11 +10,9 @@ from functools import reduce from typing import Any, Dict, Iterable, List, Mapping, Sequence, Tuple, Union -import numpy as np import pandas as pd import sympy as sp from pyspark.sql import functions as sf -from pyspark.sql.session import SparkSession from typeguard import typechecked from tmlt.core.domains.base import Domain diff --git a/src/tmlt/core/transformations/converters.py b/src/tmlt/core/transformations/converters.py index 97cb8d8..fff6444 100644 --- a/src/tmlt/core/transformations/converters.py +++ b/src/tmlt/core/transformations/converters.py @@ -32,7 +32,7 @@ def __init__(self, domain: SparkDataFrameDomain, input_metric: IfGroupedBy): domain: Domain of input DataFrames. input_metric: IfGroupedBy metric on input DataFrames. """ - if not input_metric.column in domain.schema: + if input_metric.column not in domain.schema: raise DomainColumnError( domain, input_metric.column, diff --git a/test/system/measurements/test_interactive_measurements.py b/test/system/measurements/test_interactive_measurements.py index c6d2903..c500938 100644 --- a/test/system/measurements/test_interactive_measurements.py +++ b/test/system/measurements/test_interactive_measurements.py @@ -14,7 +14,7 @@ SequentialComposition, ) from tmlt.core.measures import PureDP -from tmlt.core.metrics import SumOf, SymmetricDifference +from tmlt.core.metrics import SymmetricDifference from tmlt.core.transformations.dictionary import CreateDictFromValue from tmlt.core.transformations.spark_transformations.partition import PartitionByKeys from tmlt.core.utils.testing import PySparkTest diff --git a/test/unit/budget_abstract.py b/test/unit/budget_abstract.py index 7616722..e21db4d 100644 --- a/test/unit/budget_abstract.py +++ b/test/unit/budget_abstract.py @@ -5,13 +5,11 @@ import copy from abc import ABC, abstractmethod -from contextlib import nullcontext as does_not_raise from typing import Any, Callable, ContextManager, Dict, Optional, Type import pytest from tmlt.core.measures import PrivacyBudget -from tmlt.core.utils.exact_number import ExactNumberInput from tmlt.core.utils.testing import assert_property_immutability, get_all_props diff --git a/test/unit/domains/abstract.py b/test/unit/domains/abstract.py index 163f413..1e89460 100644 --- a/test/unit/domains/abstract.py +++ b/test/unit/domains/abstract.py @@ -5,7 +5,6 @@ import copy from abc import ABC, abstractmethod -from contextlib import nullcontext as does_not_raise from typing import Any, Callable, ContextManager, Dict, Optional, Type import pytest diff --git a/test/unit/measurements/abstract.py b/test/unit/measurements/abstract.py index 281fcc5..4b0a5b9 100644 --- a/test/unit/measurements/abstract.py +++ b/test/unit/measurements/abstract.py @@ -5,7 +5,6 @@ import copy from abc import ABC, abstractmethod -from contextlib import nullcontext as does_not_raise from typing import Any, Callable, ContextManager, Dict, Optional, Type import pytest diff --git a/test/unit/measures_abstract.py b/test/unit/measures_abstract.py index 07fd661..f851510 100644 --- a/test/unit/measures_abstract.py +++ b/test/unit/measures_abstract.py @@ -5,7 +5,6 @@ import copy from abc import ABC, abstractmethod -from contextlib import nullcontext as does_not_raise from typing import Any, Callable, ContextManager, Dict, Optional, Type import pytest diff --git a/test/unit/metric_abstract.py b/test/unit/metric_abstract.py index f6396aa..8367561 100644 --- a/test/unit/metric_abstract.py +++ b/test/unit/metric_abstract.py @@ -5,7 +5,6 @@ import copy from abc import ABC, abstractmethod -from contextlib import nullcontext as does_not_raise from typing import Any, Callable, ContextManager, Dict, Optional, Type import pytest diff --git a/test/unit/transformations/abstract.py b/test/unit/transformations/abstract.py index 0d4d499..04e8776 100644 --- a/test/unit/transformations/abstract.py +++ b/test/unit/transformations/abstract.py @@ -5,7 +5,6 @@ import copy from abc import ABC, abstractmethod -from contextlib import nullcontext as does_not_raise from test.conftest import assert_frame_equal_with_sort from typing import Any, Callable, ContextManager, Dict, Optional, Type From 89e85e17c3e9435d98eb2e100a70f2afaabc4a7d Mon Sep 17 00:00:00 2001 From: Tom Magerlein Date: Sat, 8 Nov 2025 12:52:17 -0500 Subject: [PATCH 5/9] Use --check linter option in CI --- .github/workflows/merge_queue.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/merge_queue.yml b/.github/workflows/merge_queue.yml index b7e2271..0573d4b 100644 --- a/.github/workflows/merge_queue.yml +++ b/.github/workflows/merge_queue.yml @@ -37,7 +37,7 @@ jobs: name: dist path: dist - run: uv lock --check - - run: uv run --only-group scripting nox -t lint + - run: uv run --only-group scripting nox -t lint -- --check Test: runs-on: ubuntu-latest needs: Package From e62e57664f8bd9d34cd9ebf2fc49ff4125e47cae Mon Sep 17 00:00:00 2001 From: Tom Magerlein Date: Sat, 8 Nov 2025 13:35:18 -0500 Subject: [PATCH 6/9] Enable ISC, SLF Ruff rule sets and fix ambiguous character rules --- pyproject.toml | 24 ++++++++++++++++++------ 1 file changed, 18 insertions(+), 6 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 9b5cd11..7b0e594 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -194,12 +194,16 @@ source = ["src/tmlt/core", ".nox/**/site-packages/tmlt/core"] # Linter configuration [tool.ruff.lint] -# Enable Ruff-specific lints plus Pylint, pydocstyle, pyflakes, and pycodestyle. -# The latter two cover many lints that we previously used pylint for, but -# because they are overlapping Ruff only implements them in one set of rules. -select = ["RUF", "PL", "D", "F", "E", "W"] +# A list of all of Ruff's rules can be found at https://docs.astral.sh/ruff/rules/ +select = [ + # Enable Ruff-specific lints plus Pylint, pydocstyle, pyflakes, and pycodestyle. + # The latter two cover many lints that we previously used pylint for, but + # because they are overlapping Ruff only implements them in one set of rules. + "RUF", "PL", "D", "F", "E", "W", + # Also enable a subset of flake8 rules, for similar reasons to pyflakes/pycodestyle. + "ISC", "SLF" +] ignore = [ - "RUF002", # ambiguous-unicode-character-docstring -- doesn't like unicode Greek letters "PLR09", # too-many-* "PLR2004", # magic-value-comparison @@ -213,7 +217,7 @@ ignore = [ "PLW0177", # nan-comparison "RUF005", # collection-literal-concatenation "PLR1730", # if-stmt-min-max - "PLW0127", # selt-assigning-variable + "PLW0127", # self-assigning-variable "PLW0128", # redeclared-assigned-name "RUF015", # unnecessary-iterable-allocation-for-first-element "D417", # undocumented-param @@ -227,8 +231,16 @@ ignore = [ "E501", # line-too-long "E731", # lambda-assignment "E741", # ambiguous-variable-name + "SLF001", # private-member-access + "RET504", # unnecessary-assign ] +# Ruff's RUF001-003 rules disallow certain Unicode characters that are easily +# confused with ASCII characters; this makes sense for the most part, but some +# of our docstrings use Greek letters that fall into that category. This allows +# those characters. +allowed-confusables = ['α', 'ρ', '𝝆'] + [tool.ruff.lint.pydocstyle] convention = "google" From ea0bb60026b2ef3d515ca748e19ed21bae2bb864 Mon Sep 17 00:00:00 2001 From: Tom Magerlein Date: Mon, 10 Nov 2025 20:14:44 -0500 Subject: [PATCH 7/9] Apply changes from review of corresponding MR in Analytics --- pyproject.toml | 11 +++++++++-- src/tmlt/core/metrics.py | 8 +++++--- src/tmlt/core/transformations/dictionary.py | 2 +- .../spark_transformations/add_remove_keys.py | 6 +++--- .../measurements/test_interactive_measurements.py | 4 ++-- 5 files changed, 20 insertions(+), 11 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 7b0e594..055e6b8 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -204,8 +204,15 @@ select = [ "ISC", "SLF" ] ignore = [ - "PLR09", # too-many-* - "PLR2004", # magic-value-comparison + # too-many-*: These rules are too context-dependent to be generally useful, + # we can evaluate this during code reviews. + "PLR09", + # magic-value-comparison: This rule flags a lot of constants that don't + # really make sense, we can make this call during code reviews. + "PLR2004", + # explicit-f-string-type-conversion: we don't generally use the !r syntax in + # f-strings, which this rule enforces. + "RUF010", # TODO: This disables every lint that is currently failing; go through and # either fix/individually disable each instance, or choose to permanently diff --git a/src/tmlt/core/metrics.py b/src/tmlt/core/metrics.py index 8180a19..ea0c377 100644 --- a/src/tmlt/core/metrics.py +++ b/src/tmlt/core/metrics.py @@ -59,7 +59,7 @@ def _validate_distance_arguments( """Raise an exception if the arguments to a distance method aren't valid.""" if not self.supports_domain(domain): raise UnsupportedCombinationError( - (self, domain), f"{self!r} does not support domain {domain!r}." + (self, domain), f"{repr(self)} does not support domain {repr(domain)}." ) try: domain.validate(value1) @@ -866,7 +866,7 @@ def distance(self, value1: Any, value2: Any, domain: Domain) -> ExactNumber: def __repr__(self) -> str: """Returns string representation.""" return ( - f"{self.__class__.__name__}(column={self.column!r}," + f"{self.__class__.__name__}(column={repr(self.column)}," f" metric={self.metric})" ) @@ -1485,4 +1485,6 @@ def distance(self, value1: Any, value2: Any, domain: Domain) -> ExactNumber: def __repr__(self) -> str: """Returns string representation.""" - return f"{self.__class__.__name__}(df_to_key_column={self.df_to_key_column!r})" + return ( + f"{self.__class__.__name__}(df_to_key_column={repr(self.df_to_key_column)})" + ) diff --git a/src/tmlt/core/transformations/dictionary.py b/src/tmlt/core/transformations/dictionary.py index 2bbb854..94e817a 100644 --- a/src/tmlt/core/transformations/dictionary.py +++ b/src/tmlt/core/transformations/dictionary.py @@ -314,7 +314,7 @@ def __init__( """ if key not in input_domain.key_to_domain: raise DomainKeyError( - input_domain, key, f"{key!r} is not one of the input domain's keys" + input_domain, key, f"{repr(key)} is not one of the input domain's keys" ) # Below is the check in base class, but needs to happen before so # output_metric = input_metric[key] won't get a KeyError diff --git a/src/tmlt/core/transformations/spark_transformations/add_remove_keys.py b/src/tmlt/core/transformations/spark_transformations/add_remove_keys.py index f75b0af..1e27fc0 100644 --- a/src/tmlt/core/transformations/spark_transformations/add_remove_keys.py +++ b/src/tmlt/core/transformations/spark_transformations/add_remove_keys.py @@ -197,15 +197,15 @@ def __init__( ) if key not in input_domain.key_to_domain: raise DomainKeyError( - input_domain, key, f"{key!r} is not one of the input domain's keys" + input_domain, key, f"{repr(key)} is not one of the input domain's keys" ) if new_key in input_domain.key_to_domain: - raise ValueError(f"{new_key!r} is already a key in the input domain") + raise ValueError(f"{repr(new_key)} is already a key in the input domain") if transformation.input_domain != input_domain.key_to_domain[key]: raise DomainMismatchError( (transformation.input_domain, input_domain), ( - f"Input domain's value for {key!r} does not match" + f"Input domain's value for {repr(key)} does not match" " transformation's input domain" ), ) diff --git a/test/unit/measurements/test_interactive_measurements.py b/test/unit/measurements/test_interactive_measurements.py index 6d72624..a5de3e8 100644 --- a/test/unit/measurements/test_interactive_measurements.py +++ b/test/unit/measurements/test_interactive_measurements.py @@ -2055,8 +2055,8 @@ def test_insufficient_budget(self): requested_budget = self.budget_type(self.budget_quarters[3]) error_message = re.escape( ( - f"The remaining privacy budget is {remaining_budget!s}, which " - f"is insufficient given the requested budget {requested_budget!s}." + f"The remaining privacy budget is {remaining_budget}, which " + f"is insufficient given the requested budget {requested_budget}." ) ) with self.assertRaisesRegex(ValueError, error_message): From 42e41b4827ba233616ddc50585f58856eb49eaa0 Mon Sep 17 00:00:00 2001 From: Tom Magerlein Date: Wed, 12 Nov 2025 17:32:38 -0500 Subject: [PATCH 8/9] Fix various broken links Replaces links to projects.iq.harvard.edu with Wayback Machine links, as that site has shut down. Also updates intersphinx to point to a (hopefully more stable) location for the Spark docs, and fixes a missing import in some doctests. --- doc/conf.py | 2 +- doc/index.rst | 2 +- doc/topic-guides/architecture.rst | 2 +- src/tmlt/core/metrics.py | 1 + 4 files changed, 4 insertions(+), 3 deletions(-) diff --git a/doc/conf.py b/doc/conf.py index 2826a1e..8bd7597 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -171,7 +171,7 @@ def autoapi_prepare_jinja_env(jinja_env): "numpy": ("https://numpy.org/doc/1.18/", None), "pandas": ("https://pandas.pydata.org/pandas-docs/version/1.2.0/", None), "sympy": ("https://docs.sympy.org/latest/", None), - "pyspark": ("https://spark.apache.org/docs/3.5.1/api/python/", None), + "pyspark": ("https://downloads.apache.org/spark/docs/3.5.7/api/python/", None), } # The ACM website seems to have some sort of protection that foils the linkchecker. diff --git a/doc/index.rst b/doc/index.rst index 005b3cc..c8e08c0 100644 --- a/doc/index.rst +++ b/doc/index.rst @@ -20,7 +20,7 @@ Introduction to Tumult Core Tumult Core is a collection of composable components for implementing algorithms to perform differentially private computations. The design of Tumult Core -is based on the design proposed in the `OpenDP White Paper `__, +is based on the design proposed in the `OpenDP White Paper `_, and can automatically verify the privacy properties of algorithms constructed from Tumult Core components. Tumult Core is scalable, includes a wide variety of components, and supports multiple privacy definitions. diff --git a/doc/topic-guides/architecture.rst b/doc/topic-guides/architecture.rst index 10300eb..772e5bf 100644 --- a/doc/topic-guides/architecture.rst +++ b/doc/topic-guides/architecture.rst @@ -24,7 +24,7 @@ Tumult Core Architecture Tumult Core is a collection of composable components for implementing algorithms to perform differentially private computations. The design of Tumult Core is based on the design proposed in the -`OpenDP White Paper `_. +`OpenDP White Paper `_. On this page, we give an overview of this design. Readers who want more information should refer to the linked white paper. diff --git a/src/tmlt/core/metrics.py b/src/tmlt/core/metrics.py index ea0c377..b3d928b 100644 --- a/src/tmlt/core/metrics.py +++ b/src/tmlt/core/metrics.py @@ -10,6 +10,7 @@ from functools import reduce from typing import Any, Dict, Iterable, List, Mapping, Sequence, Tuple, Union +import numpy as np # noqa: F401 -- needed for doctests import pandas as pd import sympy as sp from pyspark.sql import functions as sf From 91b6a997676c3701167bcb9c5d00b9f6a06db49c Mon Sep 17 00:00:00 2001 From: Tom Magerlein Date: Fri, 14 Nov 2025 17:38:31 -0500 Subject: [PATCH 9/9] Update links to Apache wiki --- doc/topic-guides/spark.rst | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/doc/topic-guides/spark.rst b/doc/topic-guides/spark.rst index 657c3dd..c52029c 100644 --- a/doc/topic-guides/spark.rst +++ b/doc/topic-guides/spark.rst @@ -42,12 +42,9 @@ database, you should use the following options when creating a Spark session: .enableHiveSupport() .getOrCreate() -To see where Hive's warehouse directory is, you can use the -`Hive CLI `_ -(or its replacement, -`Beehive `_) -to view the -`relevant configuration parameter `_: +To see where Hive's warehouse directory is, you can use the `Hive CLI `_ +(or its replacement, `Beeline `_) +to view the `relevant configuration parameter `_: .. code-block::