From 6be80100ee84b1c5ccdfa0ee9dfb7f1a4e6f6a13 Mon Sep 17 00:00:00 2001 From: dasm Date: Thu, 25 Sep 2025 13:21:22 -0700 Subject: [PATCH 1/2] Re-enable older pyspark on linux --- CHANGELOG.rst | 3 +- pyproject.toml | 5 ++- uv.lock | 101 +++++++++++++++++++++++++++++++++---------------- 3 files changed, 73 insertions(+), 36 deletions(-) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index b65bf933..33bd5aa5 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -12,8 +12,7 @@ Unreleased Changed ~~~~~~~ -- Dropped support for pyspark <3.5.0 after discovering that it does not work on Macs (this may not work for older versions as well). - +- Dropped support for pyspark <3.5.0 on Macs after discovering that these configurations frequently crash. Older versions of the library may also be affected. .. _v0.20.2: diff --git a/pyproject.toml b/pyproject.toml index f23a756f..296df0f1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -35,7 +35,10 @@ dependencies = [ "pandas >=1.4.0,<2 ; python_version < '3.11'", "pandas >=1.5.0,<2 ; python_version == '3.11'", "pandas >=2.2.0,<3 ; python_version >= '3.12'", - "pyspark[sql] >=3.5.0,<3.6", + "pyspark[sql] >=3.3.1,<3.6 ; python_version < '3.11' and sys_platform != 'darwin'", + "pyspark[sql] >=3.4.0,<3.6 ; python_version == '3.11' and sys_platform != 'darwin'", + "pyspark[sql] >=3.5.0,<3.6 ; python_version >= '3.12' and sys_platform != 'darwin'", + "pyspark[sql] >=3.5.0,<3.6 ; sys_platform == 'darwin'", "sympy >=1.8,<1.13", "typeguard >=4.0.0,<5", "typing-extensions >=4.1.0,<5", diff --git a/uv.lock b/uv.lock index 63e47920..cb74f831 100644 --- a/uv.lock +++ b/uv.lock @@ -2,10 +2,14 @@ version = 1 revision = 2 requires-python = ">=3.9, <3.13" resolution-markers = [ - "python_full_version >= '3.12'", - "python_full_version == '3.11.*'", - "python_full_version == '3.10.*'", - "python_full_version < '3.10'", + "python_full_version >= '3.12' and sys_platform != 'darwin'", + "python_full_version >= '3.12' and sys_platform == 'darwin'", + "python_full_version == '3.11.*' and sys_platform != 'darwin'", + "python_full_version == '3.11.*' and sys_platform == 'darwin'", + "python_full_version == '3.10.*' and sys_platform != 'darwin'", + "python_full_version == '3.10.*' and sys_platform == 'darwin'", + "python_full_version < '3.10' and sys_platform != 'darwin'", + "python_full_version < '3.10' and sys_platform == 'darwin'", ] [[package]] @@ -227,7 +231,8 @@ name = "click" version = "8.1.8" source = { registry = "https://pypi.org/simple" } resolution-markers = [ - "python_full_version < '3.10'", + "python_full_version < '3.10' and sys_platform != 'darwin'", + "python_full_version < '3.10' and sys_platform == 'darwin'", ] dependencies = [ { name = "colorama", marker = "python_full_version < '3.10' and sys_platform == 'win32'" }, @@ -242,9 +247,12 @@ name = "click" version = "8.2.1" source = { registry = "https://pypi.org/simple" } resolution-markers = [ - "python_full_version >= '3.12'", - "python_full_version == '3.11.*'", - "python_full_version == '3.10.*'", + "python_full_version >= '3.12' and sys_platform != 'darwin'", + "python_full_version >= '3.12' and sys_platform == 'darwin'", + "python_full_version == '3.11.*' and sys_platform != 'darwin'", + "python_full_version == '3.11.*' and sys_platform == 'darwin'", + "python_full_version == '3.10.*' and sys_platform != 'darwin'", + "python_full_version == '3.10.*' and sys_platform == 'darwin'", ] dependencies = [ { name = "colorama", marker = "python_full_version >= '3.10' and sys_platform == 'win32'" }, @@ -280,7 +288,8 @@ name = "contourpy" version = "1.3.0" source = { registry = "https://pypi.org/simple" } resolution-markers = [ - "python_full_version < '3.10'", + "python_full_version < '3.10' and sys_platform != 'darwin'", + "python_full_version < '3.10' and sys_platform == 'darwin'", ] dependencies = [ { name = "numpy", marker = "python_full_version < '3.10'" }, @@ -340,9 +349,12 @@ name = "contourpy" version = "1.3.2" source = { registry = "https://pypi.org/simple" } resolution-markers = [ - "python_full_version >= '3.12'", - "python_full_version == '3.11.*'", - "python_full_version == '3.10.*'", + "python_full_version >= '3.12' and sys_platform != 'darwin'", + "python_full_version >= '3.12' and sys_platform == 'darwin'", + "python_full_version == '3.11.*' and sys_platform != 'darwin'", + "python_full_version == '3.11.*' and sys_platform == 'darwin'", + "python_full_version == '3.10.*' and sys_platform != 'darwin'", + "python_full_version == '3.10.*' and sys_platform == 'darwin'", ] dependencies = [ { name = "numpy", marker = "python_full_version >= '3.10'" }, @@ -726,7 +738,8 @@ name = "kiwisolver" version = "1.4.7" source = { registry = "https://pypi.org/simple" } resolution-markers = [ - "python_full_version < '3.10'", + "python_full_version < '3.10' and sys_platform != 'darwin'", + "python_full_version < '3.10' and sys_platform == 'darwin'", ] sdist = { url = "https://files.pythonhosted.org/packages/85/4d/2255e1c76304cbd60b48cee302b66d1dde4468dc5b1160e4b7cb43778f2a/kiwisolver-1.4.7.tar.gz", hash = "sha256:9893ff81bd7107f7b685d3017cc6583daadb4fc26e4a888350df530e41980a60", size = 97286, upload-time = "2024-09-04T09:39:44.302Z" } wheels = [ @@ -813,9 +826,12 @@ name = "kiwisolver" version = "1.4.8" source = { registry = "https://pypi.org/simple" } resolution-markers = [ - "python_full_version >= '3.12'", - "python_full_version == '3.11.*'", - "python_full_version == '3.10.*'", + "python_full_version >= '3.12' and sys_platform != 'darwin'", + "python_full_version >= '3.12' and sys_platform == 'darwin'", + "python_full_version == '3.11.*' and sys_platform != 'darwin'", + "python_full_version == '3.11.*' and sys_platform == 'darwin'", + "python_full_version == '3.10.*' and sys_platform != 'darwin'", + "python_full_version == '3.10.*' and sys_platform == 'darwin'", ] sdist = { url = "https://files.pythonhosted.org/packages/82/59/7c91426a8ac292e1cdd53a63b6d9439abd573c875c3f92c146767dd33faf/kiwisolver-1.4.8.tar.gz", hash = "sha256:23d5f023bdc8c7e54eb65f03ca5d5bb25b601eac4d7f1a042888a1f45237987e", size = 97538, upload-time = "2024-12-24T18:30:51.519Z" } wheels = [ @@ -958,7 +974,8 @@ name = "matplotlib" version = "3.9.4" source = { registry = "https://pypi.org/simple" } resolution-markers = [ - "python_full_version < '3.10'", + "python_full_version < '3.10' and sys_platform != 'darwin'", + "python_full_version < '3.10' and sys_platform == 'darwin'", ] dependencies = [ { name = "contourpy", version = "1.3.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.10'" }, @@ -1009,9 +1026,12 @@ name = "matplotlib" version = "3.10.3" source = { registry = "https://pypi.org/simple" } resolution-markers = [ - "python_full_version >= '3.12'", - "python_full_version == '3.11.*'", - "python_full_version == '3.10.*'", + "python_full_version >= '3.12' and sys_platform != 'darwin'", + "python_full_version >= '3.12' and sys_platform == 'darwin'", + "python_full_version == '3.11.*' and sys_platform != 'darwin'", + "python_full_version == '3.11.*' and sys_platform == 'darwin'", + "python_full_version == '3.10.*' and sys_platform != 'darwin'", + "python_full_version == '3.10.*' and sys_platform == 'darwin'", ] dependencies = [ { name = "contourpy", version = "1.3.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.10'" }, @@ -1256,9 +1276,12 @@ name = "pandas" version = "1.5.3" source = { registry = "https://pypi.org/simple" } resolution-markers = [ - "python_full_version == '3.11.*'", - "python_full_version == '3.10.*'", - "python_full_version < '3.10'", + "python_full_version == '3.11.*' and sys_platform != 'darwin'", + "python_full_version == '3.11.*' and sys_platform == 'darwin'", + "python_full_version == '3.10.*' and sys_platform != 'darwin'", + "python_full_version == '3.10.*' and sys_platform == 'darwin'", + "python_full_version < '3.10' and sys_platform != 'darwin'", + "python_full_version < '3.10' and sys_platform == 'darwin'", ] dependencies = [ { name = "numpy", marker = "python_full_version < '3.12'" }, @@ -1293,7 +1316,8 @@ name = "pandas" version = "2.3.0" source = { registry = "https://pypi.org/simple" } resolution-markers = [ - "python_full_version >= '3.12'", + "python_full_version >= '3.12' and sys_platform != 'darwin'", + "python_full_version >= '3.12' and sys_platform == 'darwin'", ] dependencies = [ { name = "numpy", marker = "python_full_version >= '3.12'" }, @@ -1883,8 +1907,10 @@ name = "randomgen" version = "1.26.0" source = { registry = "https://pypi.org/simple" } resolution-markers = [ - "python_full_version == '3.10.*'", - "python_full_version < '3.10'", + "python_full_version == '3.10.*' and sys_platform != 'darwin'", + "python_full_version == '3.10.*' and sys_platform == 'darwin'", + "python_full_version < '3.10' and sys_platform != 'darwin'", + "python_full_version < '3.10' and sys_platform == 'darwin'", ] dependencies = [ { name = "numpy", marker = "python_full_version < '3.11'" }, @@ -1921,8 +1947,10 @@ name = "randomgen" version = "1.26.1" source = { registry = "https://pypi.org/simple" } resolution-markers = [ - "python_full_version >= '3.12'", - "python_full_version == '3.11.*'", + "python_full_version >= '3.12' and sys_platform != 'darwin'", + "python_full_version >= '3.12' and sys_platform == 'darwin'", + "python_full_version == '3.11.*' and sys_platform != 'darwin'", + "python_full_version == '3.11.*' and sys_platform == 'darwin'", ] dependencies = [ { name = "numpy", marker = "python_full_version >= '3.11'" }, @@ -1988,7 +2016,8 @@ name = "scipy" version = "1.13.1" source = { registry = "https://pypi.org/simple" } resolution-markers = [ - "python_full_version < '3.10'", + "python_full_version < '3.10' and sys_platform != 'darwin'", + "python_full_version < '3.10' and sys_platform == 'darwin'", ] dependencies = [ { name = "numpy", marker = "python_full_version < '3.10'" }, @@ -2026,7 +2055,8 @@ name = "scipy" version = "1.15.3" source = { registry = "https://pypi.org/simple" } resolution-markers = [ - "python_full_version == '3.10.*'", + "python_full_version == '3.10.*' and sys_platform != 'darwin'", + "python_full_version == '3.10.*' and sys_platform == 'darwin'", ] dependencies = [ { name = "numpy", marker = "python_full_version == '3.10.*'" }, @@ -2067,8 +2097,10 @@ name = "scipy" version = "1.16.0" source = { registry = "https://pypi.org/simple" } resolution-markers = [ - "python_full_version >= '3.12'", - "python_full_version == '3.11.*'", + "python_full_version >= '3.12' and sys_platform != 'darwin'", + "python_full_version >= '3.12' and sys_platform == 'darwin'", + "python_full_version == '3.11.*' and sys_platform != 'darwin'", + "python_full_version == '3.11.*' and sys_platform == 'darwin'", ] dependencies = [ { name = "numpy", marker = "python_full_version >= '3.11'" }, @@ -2453,7 +2485,10 @@ requires-dist = [ { name = "pandas", marker = "python_full_version < '3.11'", specifier = ">=1.4.0,<2" }, { name = "pandas", marker = "python_full_version == '3.11.*'", specifier = ">=1.5.0,<2" }, { name = "pandas", marker = "python_full_version >= '3.12'", specifier = ">=2.2.0,<3" }, - { name = "pyspark", extras = ["sql"], specifier = ">=3.5.0,<3.6" }, + { name = "pyspark", extras = ["sql"], marker = "sys_platform == 'darwin'", specifier = ">=3.5.0,<3.6" }, + { name = "pyspark", extras = ["sql"], marker = "python_full_version < '3.11' and sys_platform != 'darwin'", specifier = ">=3.3.1,<3.6" }, + { name = "pyspark", extras = ["sql"], marker = "python_full_version == '3.11.*' and sys_platform != 'darwin'", specifier = ">=3.4.0,<3.6" }, + { name = "pyspark", extras = ["sql"], marker = "python_full_version >= '3.12' and sys_platform != 'darwin'", specifier = ">=3.5.0,<3.6" }, { name = "sympy", specifier = ">=1.8,<1.13" }, { name = "tabulate", specifier = ">=0.8.9,<0.9" }, { name = "tmlt-core", specifier = ">=0.18.0,<0.19" }, From 5e7a3456f6af8f35affb2aa77d933f3a7ebb0ad7 Mon Sep 17 00:00:00 2001 From: dasm Date: Thu, 25 Sep 2025 13:24:52 -0700 Subject: [PATCH 2/2] Add older pyspark versions to noxfile. --- noxfile.py | 99 ++++++++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 88 insertions(+), 11 deletions(-) diff --git a/noxfile.py b/noxfile.py index adbb6427..990a6e00 100644 --- a/noxfile.py +++ b/noxfile.py @@ -5,6 +5,7 @@ API reference. """ +import sys from pathlib import Path import nox @@ -28,17 +29,93 @@ """For test suites where we track coverage (i.e. the fast tests and the full test suite), fail if test coverage falls below this percentage.""" + +def is_mac(): + """Returns true if the current system is a mac.""" + return sys.platform == "darwin" + + DEPENDENCY_MATRIX = [ - #fmt: off - DependencyConfiguration(id="3.9-oldest", python="3.9", packages={"pyspark[sql]": "==3.5.0", "sympy": "==1.8", "pandas": "==1.4.0", "tmlt.core": "==0.18.0"}), - DependencyConfiguration(id="3.9-newest", python="3.9", packages={"pyspark[sql]": "==3.5.6", "sympy": "==1.9", "pandas": "==1.5.3", "tmlt.core": ">=0.18.0"}), - DependencyConfiguration(id="3.10-oldest", python="3.10", packages={"pyspark[sql]": "==3.5.0", "sympy": "==1.8", "pandas": "==1.4.0", "tmlt.core": "==0.18.0"}), - DependencyConfiguration(id="3.10-newest", python="3.10", packages={"pyspark[sql]": "==3.5.6", "sympy": "==1.9", "pandas": "==1.5.3", "tmlt.core": ">=0.18.0"}), - DependencyConfiguration(id="3.11-oldest", python="3.11", packages={"pyspark[sql]": "==3.5.0", "sympy": "==1.8", "pandas": "==1.5.0", "tmlt.core": "==0.18.0"}), - DependencyConfiguration(id="3.11-newest", python="3.11", packages={"pyspark[sql]": "==3.5.6", "sympy": "==1.9", "pandas": "==1.5.3", "tmlt.core": ">=0.18.0"}), - DependencyConfiguration(id="3.12-oldest", python="3.12", packages={"pyspark[sql]": "==3.5.0", "sympy": "==1.8", "pandas": "==2.2.0", "tmlt.core": "==0.18.0"}), - DependencyConfiguration(id="3.12-newest", python="3.12", packages={"pyspark[sql]": "==3.5.6", "sympy": "==1.9", "pandas": "==2.2.3", "tmlt.core": ">=0.18.0"}), - #fmt: on + DependencyConfiguration( + id="3.9-oldest", + python="3.9", + packages={ + "pyspark[sql]": "==3.3.1" if not is_mac() else "==3.5.0", + "sympy": "==1.8", + "pandas": "==1.4.0", + "tmlt.core": "==0.18.0", + }, + ), + DependencyConfiguration( + id="3.9-newest", + python="3.9", + packages={ + "pyspark[sql]": "==3.5.6", + "sympy": "==1.9", + "pandas": "==1.5.3", + "tmlt.core": ">=0.18.0", + }, + ), + DependencyConfiguration( + id="3.10-oldest", + python="3.10", + packages={ + "pyspark[sql]": "==3.3.1" if not is_mac() else "==3.5.0", + "sympy": "==1.8", + "pandas": "==1.4.0", + "tmlt.core": "==0.18.0", + }, + ), + DependencyConfiguration( + id="3.10-newest", + python="3.10", + packages={ + "pyspark[sql]": "==3.5.6", + "sympy": "==1.9", + "pandas": "==1.5.3", + "tmlt.core": ">=0.18.0", + }, + ), + DependencyConfiguration( + id="3.11-oldest", + python="3.11", + packages={ + "pyspark[sql]": "==3.4.0" if not is_mac() else "==3.5.0", + "sympy": "==1.8", + "pandas": "==1.5.0", + "tmlt.core": "==0.18.0", + }, + ), + DependencyConfiguration( + id="3.11-newest", + python="3.11", + packages={ + "pyspark[sql]": "==3.5.6", + "sympy": "==1.9", + "pandas": "==1.5.3", + "tmlt.core": ">=0.18.0", + }, + ), + DependencyConfiguration( + id="3.12-oldest", + python="3.12", + packages={ + "pyspark[sql]": "==3.5.0", + "sympy": "==1.8", + "pandas": "==2.2.0", + "tmlt.core": "==0.18.0", + }, + ), + DependencyConfiguration( + id="3.12-newest", + python="3.12", + packages={ + "pyspark[sql]": "==3.5.6", + "sympy": "==1.9", + "pandas": "==2.2.3", + "tmlt.core": ">=0.18.0", + }, + ), ] AUDIT_VERSIONS = ["3.9", "3.10", "3.11", "3.12"] @@ -101,7 +178,7 @@ sm.docs() for benchmark_name, timeout in BENCHMARK_TO_TIMEOUT.items(): - sm.benchmark(CWD / benchmark_name, timeout*60) + sm.benchmark(CWD / benchmark_name, timeout * 60) sm.audit()