这是indexloc提供的服务,不要输入任何密码
Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 1 addition & 3 deletions doc/conf.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
# pylint: skip-file

# SPDX-License-Identifier: Apache-2.0
# Copyright Tumult Labs 2025

Expand Down Expand Up @@ -178,7 +176,7 @@
"numpy": ("https://numpy.org/doc/1.18/", None),
"pandas": ("https://pandas.pydata.org/pandas-docs/version/1.2.0/", None),
"sympy": ("https://docs.sympy.org/latest/", None),
"pyspark": ("https://archive.apache.org/dist/spark/docs/3.1.1/api/python/", None),
"pyspark": ("https://downloads.apache.org/spark/docs/3.5.7/api/python/", None),
}

# Substitutions
Expand Down
9 changes: 3 additions & 6 deletions doc/deployment/spark.rst
Original file line number Diff line number Diff line change
Expand Up @@ -44,12 +44,9 @@ database, you should use the following options when creating a Spark session:
.enableHiveSupport()
.getOrCreate()

To see where Hive's warehouse directory is, you can use the
`Hive CLI <https://cwiki.apache.org/confluence/display/Hive/LanguageManual+Cli#LanguageManualCli-HiveInteractiveShellCommands>`_
(or its replacement,
`Beehive <https://cwiki.apache.org/confluence/display/Hive/HiveServer2+Clients#HiveServer2Clients-BeelineHiveCommands>`_)
to view the
`relevant configuration parameter <https://cwiki.apache.org/confluence/display/Hive/AdminManual+Metastore+3.0+Administration#AdminManualMetastore3.0Administration-GeneralConfiguration>`_:
To see where Hive's warehouse directory is, you can use the `Hive CLI <https://hive.apache.org/docs/latest/language/languagemanual-cli/#hive-interactive-shell-commands>`_
(or its replacement, `Beeline <https://hive.apache.org/docs/latest/user/hiveserver2-clients/#beeline-hive-commands>`_)
to view the `relevant configuration parameter <https://hive.apache.org/docs/latest/admin/adminmanual-metastore-3-0-administration/#general-configuration>`_:

.. code-block::

Expand Down
3 changes: 1 addition & 2 deletions noxfile.py
Original file line number Diff line number Diff line change
Expand Up @@ -143,9 +143,8 @@ def is_mac():

sm.black()
sm.isort()
sm.ruff_check()
sm.mypy()
sm.pylint()
sm.pydocstyle()

sm.smoketest()
sm.release_smoketest()
Expand Down
126 changes: 57 additions & 69 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -56,11 +56,10 @@ required-version = ">=0.7.0"
default-groups = "all"

[dependency-groups]
ruff = ["ruff >=0.14.3,<1"]
black = ["black >=23.3,<24"]
isort = ["isort >=5.11,<6"]
mypy = ["mypy >=1.14.0"]
pylint = ["pylint >=3.2.5"]
pydocstyle = ["pydocstyle[toml] >=6.3"]
test = [
"pytest",
"pytest-cov >=5.0,<6",
Expand Down Expand Up @@ -135,6 +134,60 @@ packages = ["src/tmlt"]
################################################################################
# Linter configuration

[tool.ruff.lint]
# A list of all of Ruff's rules can be found at https://docs.astral.sh/ruff/rules/
select = [
# Enable Ruff-specific lints plus Pylint, pydocstyle, pyflakes, and pycodestyle.
# The latter two cover many lints that we previously used pylint for, but
# because they are overlapping Ruff only implements them in one set of rules.
"RUF", "PL", "D", "F", "E", "W",
# Also enable a subset of flake8 rules, for similar reasons to pyflakes/pycodestyle.
"ISC", "SLF"
]
ignore = [
# too-many-*: These rules are too context-dependent to be generally useful,
# we can evaluate this during code reviews.
"PLR09",
# magic-value-comparison: This rule flags a lot of constants that don't
# really make sense, we can make this call during code reviews.
"PLR2004",
# explicit-f-string-type-conversion: we don't generally use the !r syntax in
# f-strings, which this rule enforces.
"RUF010",

# TODO: This disables every lint that is currently failing; go through and
# either fix/individually disable each instance, or choose to permanently
# ignore each one.
"PLW1641", # eq-without-hash
"PLC0206", # dict-index-missing-items
"RUF005", # collection-literal-concatenation
"RUF015", # unnecessary-iterable-allocation-for-first-element
"D415", # missing-terminal-punctuation
"RUF043", # pytest-raises-ambiguous-pattern
"D205", # missing-blank-line-after-summary
"D210", # surrounding-whitespace
"D102", # undocumented-public-method
"E501", # line-too-long
"E731", # lambda-assignment
"E741", # ambiguous-variable-name
"SLF001", # private-member-access
"RET504", # unnecessary-assign
"F401", # unused-import
"RUF009", # function-call-in-dataclass-default-argument
"E721", # type-comparison
"D103", # undocumented-public-function
"PLR0124", # comparison-with-itself
]

# Ruff's RUF001-003 rules disallow certain Unicode characters that are easily
# confused with ASCII characters; this makes sense for the most part, but some
# of our docstrings use Greek letters that fall into that category. This allows
# those characters.
allowed-confusables = ['α', 'ρ', '𝝆']

[tool.ruff.lint.pydocstyle]
convention = "google"

[tool.black]
force-exclude = "noxfile.py"

Expand Down Expand Up @@ -162,70 +215,8 @@ module = "test.*"
disallow_untyped_defs = false
check_untyped_defs = true

[tool.pylint.master]
# See https://github.com/PyCQA/pylint/issues/1975#issuecomment-387924981
extension-pkg-whitelist = ['numpy']
load-plugins = ['pylint.extensions.docparams']
# Only check param docs in docstrings that contain an Args: section.
# Set to "no" to show docstrings missing argument documentation.
accept-no-param-doc = true

[tool.pylint.'MESSAGES CONTROL']
enable = [
# Note: there is a false positive on 'useless-suppression' when you
# use 'disable=line-too-long' at the end of a docstring.
# See: https://github.com/pylint-dev/pylint/issues/8301
'useless-suppression'
]
# By default, informational rules like useless-suppression don't cause PyLint to
# produce an error.
fail-on = ['useless-suppression']
disable = [
'arguments-differ',
'duplicate-code',
'fixme',
'invalid-name',
'logging-format-interpolation',
'logging-fstring-interpolation',
'missing-function-docstring', # Redundant with pydocstyle
'missing-raises-doc',
'missing-return-doc',
'no-else-return',
'super-init-not-called',
'too-few-public-methods',
'too-many-ancestors',
'too-many-arguments',
'too-many-branches',
'too-many-instance-attributes',
'too-many-lines',
'too-many-locals',
'too-many-positional-arguments',
'too-many-public-methods',
'too-many-return-statements',
'too-many-statements',
'unbalanced-tuple-unpacking',
'unnecessary-lambda-assignment',
'unsubscriptable-object',
'use-dict-literal',
# There are a lot of false positives for unsupported-binary-operation
# on Python 3.9: https://github.com/pylint-dev/pylint/issues/7381
'unsupported-binary-operation',
# black and isort group tmlt.core separately from tmlt.analytics,
# but pylint thinks they should both be grouped as 'tmlt'.
'ungrouped-imports',
'wrong-import-order',
]

[tool.pylint.FORMAT]
max-line-length = 88

[tool.pydocstyle]
convention = "google"
add-ignore = [
# `D200: One-line docstring should fit on one line with quotes`
# conflicts with pylint's `max-line-length`.
"D200",
]
################################################################################
# Test configuration

[tool.pytest.ini_options]
markers = [
Expand All @@ -236,8 +227,5 @@ markers = [
# more information and a better future fix.
addopts = ["--import-mode=importlib"]

################################################################################
# Test configuration

[tool.coverage.run]
relative_files = true
2 changes: 0 additions & 2 deletions src/tmlt/analytics/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,8 +38,6 @@
# SPDX-License-Identifier: Apache-2.0
# Copyright Tumult Labs 2025

from typing import List

from tmlt.analytics._utils import AnalyticsInternalError
from tmlt.analytics.binning_spec import BinningSpec, BinT
from tmlt.analytics.config import Config, FeatureFlag
Expand Down
16 changes: 7 additions & 9 deletions src/tmlt/analytics/_neighboring_relation.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
# SPDX-License-Identifier: Apache-2.0
# Copyright Tumult Labs 2025

# pylint: disable=protected-access

from abc import ABC, abstractmethod
from dataclasses import dataclass, field
Expand Down Expand Up @@ -284,14 +283,13 @@ def _validate(self, dfs: Dict[str, DataFrame]) -> List[str]:
f" has type {df_field.dataType}."
)
key_type = df_field.dataType
else:
if not df_field.dataType == key_type:
raise ValueError(
f"Key column '{key_column}' has type "
f"{df_field.dataType}, but in another"
f" table it has type {key_type}. Key types"
" must match across tables"
)
elif not df_field.dataType == key_type:
raise ValueError(
f"Key column '{key_column}' has type "
f"{df_field.dataType}, but in another"
f" table it has type {key_type}. Key types"
" must match across tables"
)

return list(self.table_to_key_column.keys())

Expand Down
2 changes: 1 addition & 1 deletion src/tmlt/analytics/_noise_info.py
Original file line number Diff line number Diff line change
Expand Up @@ -150,7 +150,7 @@ def _inverse_cdf(noise_info: Dict[str, Any], p: float) -> float:

@singledispatch
def _noise_from_info(
info: Any, # pylint: disable=unused-argument
info: Any,
) -> List[Dict[str, Any]]:
"""Get noise information from info (for a measurement).

Expand Down
20 changes: 10 additions & 10 deletions src/tmlt/analytics/_query_expr.py
Original file line number Diff line number Diff line change
Expand Up @@ -1147,14 +1147,14 @@ def schema(self, catalog: Catalog) -> Schema:
name
for name, cd in input_schema.column_descs.items()
if (cd.allow_null or cd.allow_nan)
and not (name in [input_schema.grouping_column, input_schema.id_column])
and name not in [input_schema.grouping_column, input_schema.id_column]
]
return Schema(
{
name: ColumnDescriptor(
column_type=cd.column_type,
allow_null=(cd.allow_null and not name in columns_to_change),
allow_nan=(cd.allow_nan and not name in columns_to_change),
allow_null=(cd.allow_null and name not in columns_to_change),
allow_nan=(cd.allow_nan and name not in columns_to_change),
allow_inf=cd.allow_inf,
)
for name, cd in input_schema.column_descs.items()
Expand Down Expand Up @@ -1239,15 +1239,15 @@ def schema(self, catalog: Catalog) -> Schema:
for name, cd in input_schema.column_descs.items()
if cd.column_type == ColumnType.DECIMAL
and cd.allow_inf
and not (name in [input_schema.grouping_column, input_schema.id_column])
and name not in [input_schema.grouping_column, input_schema.id_column]
]
return Schema(
{
name: ColumnDescriptor(
column_type=cd.column_type,
allow_null=cd.allow_null,
allow_nan=cd.allow_nan,
allow_inf=(cd.allow_inf and not name in columns_to_change),
allow_inf=(cd.allow_inf and name not in columns_to_change),
)
for name, cd in input_schema.column_descs.items()
},
Expand Down Expand Up @@ -1322,15 +1322,15 @@ def schema(self, catalog: Catalog) -> Schema:
name
for name, cd in input_schema.column_descs.items()
if (cd.allow_null or cd.allow_nan)
and not name in [input_schema.grouping_column, input_schema.id_column]
and name not in [input_schema.grouping_column, input_schema.id_column]
)

return Schema(
{
name: ColumnDescriptor(
column_type=cd.column_type,
allow_null=(cd.allow_null and not name in columns),
allow_nan=(cd.allow_nan and not name in columns),
allow_null=(cd.allow_null and name not in columns),
allow_nan=(cd.allow_nan and name not in columns),
allow_inf=(cd.allow_inf),
)
for name, cd in input_schema.column_descs.items()
Expand Down Expand Up @@ -1403,7 +1403,7 @@ def schema(self, catalog: Catalog) -> Schema:
for name, cd in input_schema.column_descs.items()
if cd.column_type == ColumnType.DECIMAL
and cd.allow_inf
and not name in (input_schema.grouping_column, input_schema.id_column)
and name not in (input_schema.grouping_column, input_schema.id_column)
)

return Schema(
Expand All @@ -1412,7 +1412,7 @@ def schema(self, catalog: Catalog) -> Schema:
column_type=cd.column_type,
allow_null=cd.allow_null,
allow_nan=cd.allow_nan,
allow_inf=(cd.allow_inf and not name in columns),
allow_inf=(cd.allow_inf and name not in columns),
)
for name, cd in input_schema.column_descs.items()
},
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -645,34 +645,33 @@ def _validate_approxDP_and_adjust_budget(
f"The budget provided was {self.budget}."
)
return
elif mechanism in (
AverageMechanism.LAPLACE,
CountDistinctMechanism.LAPLACE,
CountMechanism.LAPLACE,
StdevMechanism.LAPLACE,
SumMechanism.LAPLACE,
VarianceMechanism.LAPLACE,
):
warnings.warn(
"When using LAPLACE with an ApproxDPBudget, the delta value of "
"the budget will be replaced with zero."
)
self.adjusted_budget = ApproxDPBudget(epsilon, 0)
elif mechanism in (
AverageMechanism.DEFAULT,
CountDistinctMechanism.DEFAULT,
CountMechanism.DEFAULT,
StdevMechanism.DEFAULT,
SumMechanism.DEFAULT,
VarianceMechanism.DEFAULT,
):
self.adjusted_budget = ApproxDPBudget(epsilon, 0)
elif mechanism is None:
# Quantile has no mechanism
self.adjusted_budget = ApproxDPBudget(epsilon, 0)
else:
if mechanism in (
AverageMechanism.LAPLACE,
CountDistinctMechanism.LAPLACE,
CountMechanism.LAPLACE,
StdevMechanism.LAPLACE,
SumMechanism.LAPLACE,
VarianceMechanism.LAPLACE,
):
warnings.warn(
"When using LAPLACE with an ApproxDPBudget, the delta value of "
"the budget will be replaced with zero."
)
self.adjusted_budget = ApproxDPBudget(epsilon, 0)
elif mechanism in (
AverageMechanism.DEFAULT,
CountDistinctMechanism.DEFAULT,
CountMechanism.DEFAULT,
StdevMechanism.DEFAULT,
SumMechanism.DEFAULT,
VarianceMechanism.DEFAULT,
):
self.adjusted_budget = ApproxDPBudget(epsilon, 0)
elif mechanism is None:
# Quantile has no mechanism
self.adjusted_budget = ApproxDPBudget(epsilon, 0)
else:
raise AnalyticsInternalError(f"Unknown mechanism {mechanism}.")
raise AnalyticsInternalError(f"Unknown mechanism {mechanism}.")

def _validate_measurement(self, measurement: Measurement, mid_stability: sp.Expr):
"""Validate a measurement."""
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1082,7 +1082,7 @@ def _get_replace_with(
else:
# Check that all columns exist
for col in replace_with:
if not col in analytics_schema:
if col not in analytics_schema:
raise ValueError(
f"Cannot replace values in column {col}, because it is not in"
" the schema"
Expand Down Expand Up @@ -1555,9 +1555,8 @@ def visit_enforce_constraint(self, expr: EnforceConstraint) -> Output:
child_transformation, child_ref, child_constraints = self._visit_child(
expr.child
)
# pylint: disable=protected-access
transformation, ref = expr.constraint._enforce(child_transformation, child_ref)
# pylint: enable=protected-access

return self.Output(
transformation,
ref,
Expand Down
Loading