opendp · tmager · Nov 15, 2025 · Nov 10, 2025 · Nov 10, 2025 · Nov 11, 2025
diff --git a/doc/conf.py b/doc/conf.py
@@ -1,5 +1,3 @@
-# pylint: skip-file
-
 # SPDX-License-Identifier: Apache-2.0
 # Copyright Tumult Labs 2025
 
@@ -178,7 +176,7 @@
     "numpy": ("https://numpy.org/doc/1.18/", None),
     "pandas": ("https://pandas.pydata.org/pandas-docs/version/1.2.0/", None),
     "sympy": ("https://docs.sympy.org/latest/", None),
-    "pyspark": ("https://archive.apache.org/dist/spark/docs/3.1.1/api/python/", None),
+    "pyspark": ("https://downloads.apache.org/spark/docs/3.5.7/api/python/", None),
 }
 
 # Substitutions

diff --git a/doc/deployment/spark.rst b/doc/deployment/spark.rst
@@ -44,12 +44,9 @@ database, you should use the following options when creating a Spark session:
         .enableHiveSupport()
         .getOrCreate()
 
-To see where Hive's warehouse directory is, you can use the
-`Hive CLI <https://cwiki.apache.org/confluence/display/Hive/LanguageManual+Cli#LanguageManualCli-HiveInteractiveShellCommands>`_
-(or its replacement,
-`Beehive <https://cwiki.apache.org/confluence/display/Hive/HiveServer2+Clients#HiveServer2Clients-BeelineHiveCommands>`_)
-to view the
-`relevant configuration parameter <https://cwiki.apache.org/confluence/display/Hive/AdminManual+Metastore+3.0+Administration#AdminManualMetastore3.0Administration-GeneralConfiguration>`_:
+To see where Hive's warehouse directory is, you can use the `Hive CLI <https://hive.apache.org/docs/latest/language/languagemanual-cli/#hive-interactive-shell-commands>`_
+(or its replacement, `Beeline <https://hive.apache.org/docs/latest/user/hiveserver2-clients/#beeline-hive-commands>`_)
+to view the `relevant configuration parameter <https://hive.apache.org/docs/latest/admin/adminmanual-metastore-3-0-administration/#general-configuration>`_:
 
 .. code-block::
 

diff --git a/noxfile.py b/noxfile.py
@@ -143,9 +143,8 @@ def is_mac():
 
 sm.black()
 sm.isort()
+sm.ruff_check()
 sm.mypy()
-sm.pylint()
-sm.pydocstyle()
 
 sm.smoketest()
 sm.release_smoketest()

diff --git a/pyproject.toml b/pyproject.toml
@@ -56,11 +56,10 @@ required-version = ">=0.7.0"
 default-groups = "all"
 
 [dependency-groups]
+ruff = ["ruff >=0.14.3,<1"]
 black = ["black >=23.3,<24"]
 isort = ["isort >=5.11,<6"]
 mypy = ["mypy >=1.14.0"]
-pylint = ["pylint >=3.2.5"]
-pydocstyle = ["pydocstyle[toml] >=6.3"]
 test = [
     "pytest",
     "pytest-cov >=5.0,<6",
@@ -135,6 +134,60 @@ packages = ["src/tmlt"]
 ################################################################################
 # Linter configuration
 
+[tool.ruff.lint]
+# A list of all of Ruff's rules can be found at https://docs.astral.sh/ruff/rules/
+select = [
+    # Enable Ruff-specific lints plus Pylint, pydocstyle, pyflakes, and pycodestyle.
+    # The latter two cover many lints that we previously used pylint for, but
+    # because they are overlapping Ruff only implements them in one set of rules.
+    "RUF", "PL", "D", "F", "E", "W",
+    # Also enable a subset of flake8 rules, for similar reasons to pyflakes/pycodestyle.
+    "ISC", "SLF"
+]
+ignore = [
+    # too-many-*: These rules are too context-dependent to be generally useful,
+    #     we can evaluate this during code reviews.
+    "PLR09",
+    # magic-value-comparison: This rule flags a lot of constants that don't
+    #     really make sense, we can make this call during code reviews.
+    "PLR2004",
+    # explicit-f-string-type-conversion: we don't generally use the !r syntax in
+    #     f-strings, which this rule enforces.
+    "RUF010",
+
+    # TODO: This disables every lint that is currently failing; go through and
+    #     either fix/individually disable each instance, or choose to permanently
+    #     ignore each one.
+    "PLW1641", # eq-without-hash
+    "PLC0206", # dict-index-missing-items
+    "RUF005",  # collection-literal-concatenation
+    "RUF015",  # unnecessary-iterable-allocation-for-first-element
+    "D415",    # missing-terminal-punctuation
+    "RUF043",  # pytest-raises-ambiguous-pattern
+    "D205",    # missing-blank-line-after-summary
+    "D210",    # surrounding-whitespace
+    "D102",    # undocumented-public-method
+    "E501",    # line-too-long
+    "E731",    # lambda-assignment
+    "E741",    # ambiguous-variable-name
+    "SLF001",  # private-member-access
+    "RET504",  # unnecessary-assign
+    "F401",    # unused-import
+    "RUF009",  # function-call-in-dataclass-default-argument
+    "E721",    # type-comparison
+    "D103",    # undocumented-public-function
+    "PLR0124", # comparison-with-itself
+]
+
+# Ruff's RUF001-003 rules disallow certain Unicode characters that are easily
+# confused with ASCII characters; this makes sense for the most part, but some
+# of our docstrings use Greek letters that fall into that category. This allows
+# those characters.
+allowed-confusables = ['α', 'ρ', '𝝆']
+
+[tool.ruff.lint.pydocstyle]
+convention = "google"
+
 [tool.black]
 force-exclude = "noxfile.py"
 
@@ -162,70 +215,8 @@ module = "test.*"
 disallow_untyped_defs = false
 check_untyped_defs = true
 
-[tool.pylint.master]
-# See https://github.com/PyCQA/pylint/issues/1975#issuecomment-387924981
-extension-pkg-whitelist = ['numpy']
-load-plugins = ['pylint.extensions.docparams']
-# Only check param docs in docstrings that contain an Args: section.
-# Set to "no" to show docstrings missing argument documentation.
-accept-no-param-doc = true
-
-[tool.pylint.'MESSAGES CONTROL']
-enable = [
-  # Note: there is a false positive on 'useless-suppression' when you
-  # use 'disable=line-too-long' at the end of a docstring.
-  # See: https://github.com/pylint-dev/pylint/issues/8301
-  'useless-suppression'
-]
-# By default, informational rules like useless-suppression don't cause PyLint to
-# produce an error.
-fail-on = ['useless-suppression']
-disable = [
-  'arguments-differ',
-  'duplicate-code',
-  'fixme',
-  'invalid-name',
-  'logging-format-interpolation',
-  'logging-fstring-interpolation',
-  'missing-function-docstring',  # Redundant with pydocstyle
-  'missing-raises-doc',
-  'missing-return-doc',
-  'no-else-return',
-  'super-init-not-called',
-  'too-few-public-methods',
-  'too-many-ancestors',
-  'too-many-arguments',
-  'too-many-branches',
-  'too-many-instance-attributes',
-  'too-many-lines',
-  'too-many-locals',
-  'too-many-positional-arguments',
-  'too-many-public-methods',
-  'too-many-return-statements',
-  'too-many-statements',
-  'unbalanced-tuple-unpacking',
-  'unnecessary-lambda-assignment',
-  'unsubscriptable-object',
-  'use-dict-literal',
-  # There are a lot of false positives for unsupported-binary-operation
-  # on Python 3.9: https://github.com/pylint-dev/pylint/issues/7381
-  'unsupported-binary-operation',
-  # black and isort group tmlt.core separately from tmlt.analytics,
-  # but pylint thinks they should both be grouped as 'tmlt'.
-  'ungrouped-imports',
-  'wrong-import-order',
-]
-
-[tool.pylint.FORMAT]
-max-line-length = 88
-
-[tool.pydocstyle]
-convention = "google"
-add-ignore = [
-  # `D200: One-line docstring should fit on one line with quotes`
-  # conflicts with pylint's `max-line-length`.
-  "D200",
-]
+################################################################################
+# Test configuration
 
 [tool.pytest.ini_options]
 markers = [
@@ -236,8 +227,5 @@ markers = [
 # more information and a better future fix.
 addopts = ["--import-mode=importlib"]
 
-################################################################################
-# Test configuration
-
 [tool.coverage.run]
 relative_files = true
diff --git a/src/tmlt/analytics/__init__.py b/src/tmlt/analytics/__init__.py
@@ -38,8 +38,6 @@
 # SPDX-License-Identifier: Apache-2.0
 # Copyright Tumult Labs 2025
 
-from typing import List
-
 from tmlt.analytics._utils import AnalyticsInternalError
 from tmlt.analytics.binning_spec import BinningSpec, BinT
 from tmlt.analytics.config import Config, FeatureFlag

diff --git a/src/tmlt/analytics/_neighboring_relation.py b/src/tmlt/analytics/_neighboring_relation.py
@@ -3,7 +3,6 @@
 # SPDX-License-Identifier: Apache-2.0
 # Copyright Tumult Labs 2025
 
-# pylint: disable=protected-access
 
 from abc import ABC, abstractmethod
 from dataclasses import dataclass, field
@@ -284,14 +283,13 @@ def _validate(self, dfs: Dict[str, DataFrame]) -> List[str]:
                                 f" has type {df_field.dataType}."
                             )
                         key_type = df_field.dataType
-                    else:
-                        if not df_field.dataType == key_type:
-                            raise ValueError(
-                                f"Key column '{key_column}' has type "
-                                f"{df_field.dataType}, but in another"
-                                f" table it has type {key_type}. Key types"
-                                " must match across tables"
-                            )
+                    elif not df_field.dataType == key_type:
+                        raise ValueError(
+                            f"Key column '{key_column}' has type "
+                            f"{df_field.dataType}, but in another"
+                            f" table it has type {key_type}. Key types"
+                            " must match across tables"
+                        )
 
         return list(self.table_to_key_column.keys())
 

diff --git a/src/tmlt/analytics/_noise_info.py b/src/tmlt/analytics/_noise_info.py
@@ -150,7 +150,7 @@ def _inverse_cdf(noise_info: Dict[str, Any], p: float) -> float:
 
 @singledispatch
 def _noise_from_info(
-    info: Any,  # pylint: disable=unused-argument
+    info: Any,
 ) -> List[Dict[str, Any]]:
     """Get noise information from info (for a measurement).
 

diff --git a/src/tmlt/analytics/_query_expr.py b/src/tmlt/analytics/_query_expr.py
@@ -1147,14 +1147,14 @@ def schema(self, catalog: Catalog) -> Schema:
                 name
                 for name, cd in input_schema.column_descs.items()
                 if (cd.allow_null or cd.allow_nan)
-                and not (name in [input_schema.grouping_column, input_schema.id_column])
+                and name not in [input_schema.grouping_column, input_schema.id_column]
             ]
         return Schema(
             {
                 name: ColumnDescriptor(
                     column_type=cd.column_type,
-                    allow_null=(cd.allow_null and not name in columns_to_change),
-                    allow_nan=(cd.allow_nan and not name in columns_to_change),
+                    allow_null=(cd.allow_null and name not in columns_to_change),
+                    allow_nan=(cd.allow_nan and name not in columns_to_change),
                     allow_inf=cd.allow_inf,
                 )
                 for name, cd in input_schema.column_descs.items()
@@ -1239,15 +1239,15 @@ def schema(self, catalog: Catalog) -> Schema:
                 for name, cd in input_schema.column_descs.items()
                 if cd.column_type == ColumnType.DECIMAL
                 and cd.allow_inf
-                and not (name in [input_schema.grouping_column, input_schema.id_column])
+                and name not in [input_schema.grouping_column, input_schema.id_column]
             ]
         return Schema(
             {
                 name: ColumnDescriptor(
                     column_type=cd.column_type,
                     allow_null=cd.allow_null,
                     allow_nan=cd.allow_nan,
-                    allow_inf=(cd.allow_inf and not name in columns_to_change),
+                    allow_inf=(cd.allow_inf and name not in columns_to_change),
                 )
                 for name, cd in input_schema.column_descs.items()
             },
@@ -1322,15 +1322,15 @@ def schema(self, catalog: Catalog) -> Schema:
                 name
                 for name, cd in input_schema.column_descs.items()
                 if (cd.allow_null or cd.allow_nan)
-                and not name in [input_schema.grouping_column, input_schema.id_column]
+                and name not in [input_schema.grouping_column, input_schema.id_column]
             )
 
         return Schema(
             {
                 name: ColumnDescriptor(
                     column_type=cd.column_type,
-                    allow_null=(cd.allow_null and not name in columns),
-                    allow_nan=(cd.allow_nan and not name in columns),
+                    allow_null=(cd.allow_null and name not in columns),
+                    allow_nan=(cd.allow_nan and name not in columns),
                     allow_inf=(cd.allow_inf),
                 )
                 for name, cd in input_schema.column_descs.items()
@@ -1403,7 +1403,7 @@ def schema(self, catalog: Catalog) -> Schema:
                 for name, cd in input_schema.column_descs.items()
                 if cd.column_type == ColumnType.DECIMAL
                 and cd.allow_inf
-                and not name in (input_schema.grouping_column, input_schema.id_column)
+                and name not in (input_schema.grouping_column, input_schema.id_column)
             )
 
         return Schema(
@@ -1412,7 +1412,7 @@ def schema(self, catalog: Catalog) -> Schema:
                     column_type=cd.column_type,
                     allow_null=cd.allow_null,
                     allow_nan=cd.allow_nan,
-                    allow_inf=(cd.allow_inf and not name in columns),
+                    allow_inf=(cd.allow_inf and name not in columns),
                 )
                 for name, cd in input_schema.column_descs.items()
             },

diff --git a/src/tmlt/analytics/_query_expr_compiler/_base_measurement_visitor.py b/src/tmlt/analytics/_query_expr_compiler/_base_measurement_visitor.py
@@ -645,34 +645,33 @@ def _validate_approxDP_and_adjust_budget(
                     f"The budget provided was {self.budget}."
                 )
             return
+        elif mechanism in (
+            AverageMechanism.LAPLACE,
+            CountDistinctMechanism.LAPLACE,
+            CountMechanism.LAPLACE,
+            StdevMechanism.LAPLACE,
+            SumMechanism.LAPLACE,
+            VarianceMechanism.LAPLACE,
+        ):
+            warnings.warn(
+                "When using LAPLACE with an ApproxDPBudget, the delta value of "
+                "the budget will be replaced with zero."
+            )
+            self.adjusted_budget = ApproxDPBudget(epsilon, 0)
+        elif mechanism in (
+            AverageMechanism.DEFAULT,
+            CountDistinctMechanism.DEFAULT,
+            CountMechanism.DEFAULT,
+            StdevMechanism.DEFAULT,
+            SumMechanism.DEFAULT,
+            VarianceMechanism.DEFAULT,
+        ):
+            self.adjusted_budget = ApproxDPBudget(epsilon, 0)
+        elif mechanism is None:
+            # Quantile has no mechanism
+            self.adjusted_budget = ApproxDPBudget(epsilon, 0)
         else:
-            if mechanism in (
-                AverageMechanism.LAPLACE,
-                CountDistinctMechanism.LAPLACE,
-                CountMechanism.LAPLACE,
-                StdevMechanism.LAPLACE,
-                SumMechanism.LAPLACE,
-                VarianceMechanism.LAPLACE,
-            ):
-                warnings.warn(
-                    "When using LAPLACE with an ApproxDPBudget, the delta value of "
-                    "the budget will be replaced with zero."
-                )
-                self.adjusted_budget = ApproxDPBudget(epsilon, 0)
-            elif mechanism in (
-                AverageMechanism.DEFAULT,
-                CountDistinctMechanism.DEFAULT,
-                CountMechanism.DEFAULT,
-                StdevMechanism.DEFAULT,
-                SumMechanism.DEFAULT,
-                VarianceMechanism.DEFAULT,
-            ):
-                self.adjusted_budget = ApproxDPBudget(epsilon, 0)
-            elif mechanism is None:
-                # Quantile has no mechanism
-                self.adjusted_budget = ApproxDPBudget(epsilon, 0)
-            else:
-                raise AnalyticsInternalError(f"Unknown mechanism {mechanism}.")
+            raise AnalyticsInternalError(f"Unknown mechanism {mechanism}.")
 
     def _validate_measurement(self, measurement: Measurement, mid_stability: sp.Expr):
         """Validate a measurement."""

diff --git a/src/tmlt/analytics/_query_expr_compiler/_base_transformation_visitor.py b/src/tmlt/analytics/_query_expr_compiler/_base_transformation_visitor.py
@@ -1082,7 +1082,7 @@ def _get_replace_with(
         else:
             # Check that all columns exist
             for col in replace_with:
-                if not col in analytics_schema:
+                if col not in analytics_schema:
                     raise ValueError(
                         f"Cannot replace values in column {col}, because it is not in"
                         " the schema"
@@ -1555,9 +1555,8 @@ def visit_enforce_constraint(self, expr: EnforceConstraint) -> Output:
         child_transformation, child_ref, child_constraints = self._visit_child(
             expr.child
         )
-        # pylint: disable=protected-access
         transformation, ref = expr.constraint._enforce(child_transformation, child_ref)
-        # pylint: enable=protected-access
+
         return self.Output(
             transformation,
             ref,