diff --git a/.github/workflows/build_wheels.yml b/.github/workflows/build_wheels.yml index a58e554b1..f005a593d 100644 --- a/.github/workflows/build_wheels.yml +++ b/.github/workflows/build_wheels.yml @@ -12,13 +12,10 @@ jobs: runs-on: ${{ matrix.os }} strategy: matrix: - # macos-14 (which is macos-latest) is ARM only. macos-13 is latest intel runner. + # macos-14 (which is macos-latest) is ARM only. macos-13 is the latest intel runner. os: [ ubuntu-latest, macos-13 ] - python-version: ["3.8", "3.9", "3.10", "3.11"] + python-version: ["3.9", "3.10", "3.11", "3.12", "3.13"] include: - - os: ubuntu-latest - python-version: "3.8" - cibw-build: "cp38-manylinux_x86_64" - os: ubuntu-latest python-version: "3.9" cibw-build: "cp39-manylinux_x86_64" @@ -28,9 +25,12 @@ jobs: - os: ubuntu-latest python-version: "3.11" cibw-build: "cp311-manylinux_x86_64" - - os: macos-13 - python-version: "3.8" - cibw-build: "cp38-macosx_x86_64" + - os: ubuntu-latest + python-version: "3.12" + cibw-build: "cp312-manylinux_x86_64" + - os: ubuntu-latest + python-version: "3.13" + cibw-build: "cp313-manylinux_x86_64" - os: macos-13 python-version: "3.9" cibw-build: "cp39-macosx_x86_64" @@ -40,6 +40,12 @@ jobs: - os: macos-13 python-version: "3.11" cibw-build: "cp311-macosx_x86_64" + - os: macos-13 + python-version: "3.12" + cibw-build: "cp312-macosx_x86_64" + - os: macos-13 + python-version: "3.13" + cibw-build: "cp313-macosx_x86_64" steps: - uses: actions/checkout@v4 diff --git a/README.md b/README.md index 081d0c109..6161b900c 100644 --- a/README.md +++ b/README.md @@ -2,19 +2,15 @@ [![Build Status](https://dev.azure.com/leonhardspiegelberg/Tuplex%20-%20Open%20Source/_apis/build/status/tuplex.tuplex?branchName=master)](https://dev.azure.com/leonhardspiegelberg/Tuplex%20-%20Open%20Source/_build/latest?definitionId=2&branchName=master) [![License](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](https://opensource.org/licenses/Apache-2.0) -![Supported python versions](https://img.shields.io/badge/python-3.8%20%7C%203.9%20%7C%203.10%20%7C%203.11-blue) -[![PyPi Downloads](https://img.shields.io/pypi/dm/tuplex)](https://img.shields.io/pypi/dm/tuplex) +![Supported python versions](https://img.shields.io/badge/3.9%20%7C%203.10%20%7C%203.11%20%7C%203.12%20%7C%203.13-blue) [Website](https://tuplex.cs.brown.edu/) [Documentation](https://tuplex.cs.brown.edu/python-api.html) Tuplex is a parallel big data processing framework that runs data science pipelines written in Python at the speed of compiled code. Tuplex has similar Python APIs to [Apache Spark](https://spark.apache.org/) or [Dask](https://dask.org/), but rather than invoking the Python interpreter, Tuplex generates optimized LLVM bytecode for the given pipeline and input data set. Under the hood, Tuplex is based on data-driven compilation and dual-mode processing, two key techniques that make it possible for Tuplex to provide speed comparable to a pipeline written in hand-optimized C++. -You can join the discussion on Tuplex on our [Gitter community](https://gitter.im/tuplex/community) or read up more on the background of Tuplex in our [SIGMOD'21 paper](https://dl.acm.org/doi/abs/10.1145/3448016.3457244). - Contributions welcome! - ### Contents + [Example](#example) + [Quickstart](#quickstart) diff --git a/scripts/build_linux_wheels.sh b/scripts/build_linux_wheels.sh index 9bf5885fb..28f0cf4e1 100755 --- a/scripts/build_linux_wheels.sh +++ b/scripts/build_linux_wheels.sh @@ -39,16 +39,12 @@ fi export CIBW_ENVIRONMENT="TUPLEX_LAMBDA_ZIP='./tuplex/other/tplxlam.zip' CMAKE_ARGS='-DBUILD_WITH_AWS=ON -DBUILD_WITH_ORC=ON' LD_LIBRARY_PATH=/usr/local/lib:/opt/lib" # Use the following line to build only python3.7-3.9 wheel -export CIBW_BUILD="cp3{8,9,10,11}-*" +export CIBW_BUILD="cp3{9,10,11,12,13}-*" export CIBW_ARCHS_LINUX="x86_64" # do not build musllinux yet export CIBW_SKIP="*-musllinux_*" -# to test the others from 3.7-3.9, use these two lines: -#export CIBW_BUILD="cp3{7,8,9}-*" -#export CIBW_SKIP="cp3{5,6,7,8}-macosx* pp*" - export CIBW_BUILD_VERBOSITY=3 export CIBW_PROJECT_REQUIRES_PYTHON=">=3.8" diff --git a/scripts/build_linux_wheels_with_test.sh b/scripts/build_linux_wheels_with_test.sh index 6830bb0b2..a595de289 100755 --- a/scripts/build_linux_wheels_with_test.sh +++ b/scripts/build_linux_wheels_with_test.sh @@ -39,18 +39,14 @@ fi export CIBW_ENVIRONMENT="TUPLEX_LAMBDA_ZIP='./tuplex/other/tplxlam.zip' CMAKE_ARGS='-DBUILD_WITH_AWS=ON -DBUILD_WITH_ORC=ON' LD_LIBRARY_PATH=/usr/local/lib:/opt/lib" # Use the following line to build only python3.7-3.9 wheel -export CIBW_BUILD="cp3{8,9,10,11}-*" +export CIBW_BUILD="cp3{9,10,11,12,13}-*" export CIBW_ARCHS_LINUX="x86_64" # do not build musllinux yet export CIBW_SKIP="*-musllinux_*" -# to test the others from 3.7-3.9, use these two lines: -#export CIBW_BUILD="cp3{7,8,9}-*" -#export CIBW_SKIP="cp3{5,6,7,8}-macosx* pp*" - export CIBW_BUILD_VERBOSITY=3 -export CIBW_PROJECT_REQUIRES_PYTHON=">=3.8" +export CIBW_PROJECT_REQUIRES_PYTHON=">=3.9" # uncomment to increase verbosity of cibuildwheel # export CIBW_BUILD_VERBOSITY=3 diff --git a/scripts/build_macos_wheels.sh b/scripts/build_macos_wheels.sh index 63246f974..2e19c2a47 100755 --- a/scripts/build_macos_wheels.sh +++ b/scripts/build_macos_wheels.sh @@ -50,11 +50,11 @@ echo "-- Detected Xcode ${xcode_version_str}" # if no param is given, use defaults to build all if [ "${arch}" = "arm64" ]; then - # build Python 3.9 - 3.11 - CIBW_BUILD=${CIBW_BUILD-"cp3{9,10,11}-macosx_arm64"} + # build Python 3.9 - 3.13 + CIBW_BUILD=${CIBW_BUILD-"cp3{9,10,11,12,13}-macosx_arm64"} else - # build Python 3.8 - 3.11 - CIBW_BUILD=${CIBW_BUILD-"cp3{8,9,10,11}-macosx_x86_64"} + # build Python 3.9 - 3.13 + CIBW_BUILD=${CIBW_BUILD-"cp3{9,10,11,12,13}-macosx_x86_64"} fi echo "-- Building wheels for ${CIBW_BUILD}" @@ -82,7 +82,7 @@ export CIBW_BEFORE_BUILD_MACOS="brew install protobuf coreutils zstd zlib libmag export CIBW_ENVIRONMENT_MACOS="MACOSX_DEPLOYMENT_TARGET=${MINIMUM_TARGET} CMAKE_ARGS='-DBUILD_WITH_AWS=ON -DBUILD_WITH_ORC=ON -DCMAKE_BUILD_TYPE=Release' CMAKE_BUILD_TYPE=Release" export CIBW_BUILD="${CIBW_BUILD}" -export CIBW_PROJECT_REQUIRES_PYTHON=">=3.8" +export CIBW_PROJECT_REQUIRES_PYTHON=">=3.9" # uncomment to increase verbosity of cibuildwheel export CIBW_BUILD_VERBOSITY=3 diff --git a/scripts/build_macos_wheels_with_test.sh b/scripts/build_macos_wheels_with_test.sh index 6ced7cfcb..c10d55180 100755 --- a/scripts/build_macos_wheels_with_test.sh +++ b/scripts/build_macos_wheels_with_test.sh @@ -51,10 +51,10 @@ echo "-- Detected Xcode ${xcode_version_str}" # if no param is given, use defaults to build all if [ "${arch}" = "arm64" ]; then # build Python 3.9 - 3.11 - CIBW_BUILD=${CIBW_BUILD-"cp3{9,10,11}-macosx_arm64"} + CIBW_BUILD=${CIBW_BUILD-"cp3{9,10,11,12,13}-macosx_arm64"} else # build Python 3.8 - 3.11 - CIBW_BUILD=${CIBW_BUILD-"cp3{8,9,10,11}-macosx_x86_64"} + CIBW_BUILD=${CIBW_BUILD-"cp3{9,10,11,12,13}-macosx_x86_64"} fi echo "-- Building wheels for ${CIBW_BUILD}" @@ -92,7 +92,7 @@ export CIBW_ENVIRONMENT_MACOS="MACOSX_DEPLOYMENT_TARGET=${MINIMUM_TARGET} CMAKE_ #export CIBW_ENVIRONMENT_MACOS="MACOSX_DEPLOYMENT_TARGET=${MINIMUM_TARGET} CMAKE_ARGS='-DBUILD_WITH_AWS=ON -DBUILD_WITH_ORC=ON' TUPLEX_BUILD_TYPE=Debug" export CIBW_BUILD="${CIBW_BUILD}" -export CIBW_PROJECT_REQUIRES_PYTHON=">=3.8" +export CIBW_PROJECT_REQUIRES_PYTHON=">=3.9" # uncomment to increase verbosity of cibuildwheel export CIBW_BUILD_VERBOSITY=3 diff --git a/setup.py b/setup.py index 5d2e86484..3d26dada7 100644 --- a/setup.py +++ b/setup.py @@ -695,7 +695,7 @@ def tplx_package_data(): # The information here can also be placed in setup.cfg - better separation of # logic and declaration, and simpler if you include description/version in a file. setup(name="tuplex", - python_requires='>=3.8.0', + python_requires='>=3.9.0', version="0.3.7", author="Leonhard Spiegelberg", author_email="tuplex@cs.brown.edu", @@ -735,10 +735,11 @@ def tplx_package_data(): # Specify the Python versions you support here. In particular, ensure # that you indicate whether you support Python 2, Python 3 or both. - 'Programming Language :: Python :: 3.8', 'Programming Language :: Python :: 3.9', 'Programming Language :: Python :: 3.10', 'Programming Language :: Python :: 3.11', + 'Programming Language :: Python :: 3.12', + 'Programming Language :: Python :: 3.13', ], scripts=['tuplex/historyserver/bin/tuplex-webui'], project_urls={ diff --git a/tuplex/adapters/cpython/include/PythonHelpers.h b/tuplex/adapters/cpython/include/PythonHelpers.h index 4ed86197e..58389b98c 100644 --- a/tuplex/adapters/cpython/include/PythonHelpers.h +++ b/tuplex/adapters/cpython/include/PythonHelpers.h @@ -319,6 +319,13 @@ namespace python { */ extern void runGC(); + /*! + * checks whether python error is set, if so extracts error and traceback into string and resets + * interpreter's error flag. + * @return + */ + extern std::string extract_and_reset_py_error(); + /*! * check whether Python interpreter is running in/available to this process * @return bool when is running else false diff --git a/tuplex/adapters/cpython/src/PythonHelpers.cc b/tuplex/adapters/cpython/src/PythonHelpers.cc index 7f37dd615..f26184989 100644 --- a/tuplex/adapters/cpython/src/PythonHelpers.cc +++ b/tuplex/adapters/cpython/src/PythonHelpers.cc @@ -52,7 +52,7 @@ namespace python { Py_SetPythonHome(&vec[0]); } - void handle_and_throw_py_error() { + std::string extract_and_reset_py_error() { if(PyErr_Occurred()) { PyObject *ptype = NULL, *pvalue = NULL, *ptraceback = NULL; PyErr_Fetch(&ptype,&pvalue,&ptraceback); @@ -107,8 +107,15 @@ namespace python { } Py_XDECREF(lines_obj); - throw std::runtime_error(ss.str()); + return ss.str(); } + return ""; + } + + void handle_and_throw_py_error() { + auto err = extract_and_reset_py_error(); + if(!err.empty()) + throw std::runtime_error(err); } diff --git a/tuplex/codegen/tools/.gitignore b/tuplex/codegen/tools/.gitignore new file mode 100644 index 000000000..77185d1d7 --- /dev/null +++ b/tuplex/codegen/tools/.gitignore @@ -0,0 +1 @@ +antlr-*-complete.jar diff --git a/tuplex/core/include/HybridHashTable.h b/tuplex/core/include/HybridHashTable.h index 55472d04e..c26a9c5c1 100644 --- a/tuplex/core/include/HybridHashTable.h +++ b/tuplex/core/include/HybridHashTable.h @@ -16,6 +16,12 @@ #include #include +// Python 3.13 moved internal APIs from modsupport.h to internal/pycore_modsupport.h +#if PY_MAJOR_VERSION >=3 && PY_MINOR_VERSION >= 13 +#define Py_BUILD_CORE +#include +#endif + namespace tuplex { /*! diff --git a/tuplex/historyserver/thserver/version.py b/tuplex/historyserver/thserver/version.py index 22182bb91..8a14b5846 100644 --- a/tuplex/historyserver/thserver/version.py +++ b/tuplex/historyserver/thserver/version.py @@ -1,2 +1,2 @@ -# (c) L.Spiegelberg 2017 - 2024 +# (c) L.Spiegelberg 2017 - 2025 __version__="0.3.7" \ No newline at end of file diff --git a/tuplex/python/setup.py b/tuplex/python/setup.py index b59fe4c86..cf6b2b100 100644 --- a/tuplex/python/setup.py +++ b/tuplex/python/setup.py @@ -64,12 +64,4 @@ 'iso8601' ], url="https://tuplex.cs.brown.edu" - #, - # project_urls={ - # "Bug Tracker": "https://bugs.example.com/HelloWorld/", - # "Documentation": "https://docs.example.com/HelloWorld/", - # "Source Code": "https://code.example.com/HelloWorld/", - # } - - # could also include long_description, download_url, classifiers, etc. ) diff --git a/tuplex/python/src/PythonContext.cc b/tuplex/python/src/PythonContext.cc index 2d9b11acb..79f9e1ad1 100644 --- a/tuplex/python/src/PythonContext.cc +++ b/tuplex/python/src/PythonContext.cc @@ -256,7 +256,7 @@ namespace tuplex { check = check ? PyTuple_Size(obj) == numTupleElements : false; if(check) { - // it's a tuple with macthing size + // it's a tuple with matching size // first get how many bytes are required size_t requiredBytes = baseRequiredBytes; if(varLenField) { @@ -265,7 +265,21 @@ namespace tuplex { if (typeStr[j] == 's') { auto tupleItem = PyTuple_GET_ITEM(obj, j); if (PyUnicode_Check(tupleItem)) { - requiredBytes += PyUnicode_GET_SIZE(tupleItem) + 1; // +1 for '\0' + // new: + Py_ssize_t utf8str_size = -1; + auto utf8str = PyUnicode_AsUTF8AndSize(tupleItem, &utf8str_size); + requiredBytes += utf8str_size + 1; // +1 for '\0'. + if(utf8str_size == -1 || !utf8str) { + // error happened, translate and create error dataset. + auto err= python::extract_and_reset_py_error(); + if(err.empty()) { + err = "PyUnicode_AsUTF8AndSize error, but not python error set."; + } + return _context->makeError(err); + } + + // old: + // requiredBytes += PyUnicode_GET_SIZE(tupleItem) + 1; // +1 for '\0' } else { nonConforming = true; break; @@ -330,18 +344,31 @@ namespace tuplex { if(!PyUnicode_Check(el)) goto bad_element; - auto utf8ptr = PyUnicode_AsUTF8(el); - auto len = PyUnicode_GET_SIZE(el); + // new: + Py_ssize_t utf8str_size = -1; + auto utf8str = PyUnicode_AsUTF8AndSize(el, &utf8str_size); + if(utf8str_size == -1 || !utf8str) { + // error happened, translate and create error dataset. + auto err= python::extract_and_reset_py_error(); + if(err.empty()) { + err = "PyUnicode_AsUTF8AndSize error, but not python error set."; + } + return _context->makeError(err); + } + + // // old: + // auto utf8ptr = PyUnicode_AsUTF8(el); + // auto len = PyUnicode_GET_SIZE(el); - assert(len == strlen(utf8ptr)); - size_t varFieldSize = len + 1; // + 1 for '\0' char! + assert(utf8str_size == strlen(utf8str)); + size_t varFieldSize = utf8str_size + 1; // + 1 for '\0' char! size_t varLenOffset = (numTupleElements + 1 - j) * sizeof(int64_t) + rowVarFieldSizes; // 16 bytes offset int64_t info_field = varLenOffset | (varFieldSize << 32); *((int64_t*)(ptr)) = info_field; // copy string contents - memcpy(ptr + varLenOffset, utf8ptr, len + 1); // +1 for 0 delimiter + memcpy(ptr + varLenOffset, utf8str, utf8str_size + 1); // +1 for 0 delimiter ptr += sizeof(int64_t); // move to next field rowVarFieldSizes += varFieldSize; @@ -502,11 +529,25 @@ namespace tuplex { // (3) is the actual string content (incl. '\0' delimiter) if(PyUnicode_Check(obj)) { - auto len = PyUnicode_GET_SIZE(obj); + // new: + Py_ssize_t utf8str_size = -1; + auto utf8str = PyUnicode_AsUTF8AndSize(obj, &utf8str_size); + if(utf8str_size == -1 || !utf8str) { + // error happened, translate and create error dataset. + auto err= python::extract_and_reset_py_error(); + if(err.empty()) { + err = "PyUnicode_AsUTF8AndSize error, but not python error set."; + } + return _context->makeError(err); + } + + + // // old: + // auto len = PyUnicode_GET_SIZE(obj); + // auto utf8ptr = PyUnicode_AsUTF8(obj); - auto utf8ptr = PyUnicode_AsUTF8(obj); - size_t requiredBytes = sizeof(int64_t) * 2 + len + 1; + size_t requiredBytes = sizeof(int64_t) * 2 + utf8str_size + 1; // check capacity and realloc if necessary get a new partition if(partition->capacity() < numBytesSerialized + requiredBytes) { @@ -525,9 +566,9 @@ namespace tuplex { numBytesSerialized = 0; } - assert(len == strlen(utf8ptr)); + assert(utf8str_size == strlen(utf8str)); - size_t varFieldSize = len + 1; // + 1 for '\0' char! + size_t varFieldSize = utf8str_size + 1; // + 1 for '\0' char! size_t varLenOffset = 2 * sizeof(int64_t); // 16 bytes offset int64_t info_field = varLenOffset | (varFieldSize << 32); @@ -535,7 +576,7 @@ namespace tuplex { // after fixed length fields comes total varlen info field *((int64_t*)(ptr + sizeof(int64_t))) = varFieldSize; // copy string contents - memcpy(ptr + sizeof(int64_t) * 2, utf8ptr, len + 1); // +1 for 0 delimiter + memcpy(ptr + sizeof(int64_t) * 2, utf8str, utf8str_size + 1); // +1 for 0 delimiter ptr += requiredBytes; *rawPtr = *rawPtr + 1; numBytesSerialized += requiredBytes; diff --git a/tuplex/python/tuplex/utils/version.py b/tuplex/python/tuplex/utils/version.py index 22182bb91..8a14b5846 100644 --- a/tuplex/python/tuplex/utils/version.py +++ b/tuplex/python/tuplex/utils/version.py @@ -1,2 +1,2 @@ -# (c) L.Spiegelberg 2017 - 2024 +# (c) L.Spiegelberg 2017 - 2025 __version__="0.3.7" \ No newline at end of file