From 52698259a9e40dd54329328253edf5241a1ac4ad Mon Sep 17 00:00:00 2001 From: Leonhard Spiegelberg Date: Sun, 23 Feb 2025 21:40:27 -0800 Subject: [PATCH 1/7] update python versions --- .github/workflows/build_wheels.yml | 20 +++++++++++++------- README.md | 6 +----- scripts/build_linux_wheels.sh | 6 +----- scripts/build_linux_wheels_with_test.sh | 8 ++------ scripts/build_macos_wheels.sh | 10 +++++----- scripts/build_macos_wheels_with_test.sh | 6 +++--- setup.py | 5 +++-- tuplex/historyserver/thserver/version.py | 2 +- tuplex/python/setup.py | 8 -------- tuplex/python/tuplex/utils/version.py | 2 +- 10 files changed, 30 insertions(+), 43 deletions(-) diff --git a/.github/workflows/build_wheels.yml b/.github/workflows/build_wheels.yml index a58e554b1..8fed9c163 100644 --- a/.github/workflows/build_wheels.yml +++ b/.github/workflows/build_wheels.yml @@ -14,11 +14,8 @@ jobs: matrix: # macos-14 (which is macos-latest) is ARM only. macos-13 is latest intel runner. os: [ ubuntu-latest, macos-13 ] - python-version: ["3.8", "3.9", "3.10", "3.11"] + python-version: ["3.9", "3.10", "3.11", "3.12", "3.13"] include: - - os: ubuntu-latest - python-version: "3.8" - cibw-build: "cp38-manylinux_x86_64" - os: ubuntu-latest python-version: "3.9" cibw-build: "cp39-manylinux_x86_64" @@ -28,9 +25,12 @@ jobs: - os: ubuntu-latest python-version: "3.11" cibw-build: "cp311-manylinux_x86_64" - - os: macos-13 - python-version: "3.8" - cibw-build: "cp38-macosx_x86_64" + - os: ubuntu-latest + python-version: "3.12" + cibw-build: "cp311-manylinux_x86_64" + - os: ubuntu-latest + python-version: "3.13" + cibw-build: "cp311-manylinux_x86_64" - os: macos-13 python-version: "3.9" cibw-build: "cp39-macosx_x86_64" @@ -40,6 +40,12 @@ jobs: - os: macos-13 python-version: "3.11" cibw-build: "cp311-macosx_x86_64" + - os: macos-13 + python-version: "3.12" + cibw-build: "cp310-macosx_x86_64" + - os: macos-13 + python-version: "3.13" + cibw-build: "cp311-macosx_x86_64" steps: - uses: actions/checkout@v4 diff --git a/README.md b/README.md index 081d0c109..6161b900c 100644 --- a/README.md +++ b/README.md @@ -2,19 +2,15 @@ [![Build Status](https://dev.azure.com/leonhardspiegelberg/Tuplex%20-%20Open%20Source/_apis/build/status/tuplex.tuplex?branchName=master)](https://dev.azure.com/leonhardspiegelberg/Tuplex%20-%20Open%20Source/_build/latest?definitionId=2&branchName=master) [![License](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](https://opensource.org/licenses/Apache-2.0) -![Supported python versions](https://img.shields.io/badge/python-3.8%20%7C%203.9%20%7C%203.10%20%7C%203.11-blue) -[![PyPi Downloads](https://img.shields.io/pypi/dm/tuplex)](https://img.shields.io/pypi/dm/tuplex) +![Supported python versions](https://img.shields.io/badge/3.9%20%7C%203.10%20%7C%203.11%20%7C%203.12%20%7C%203.13-blue) [Website](https://tuplex.cs.brown.edu/) [Documentation](https://tuplex.cs.brown.edu/python-api.html) Tuplex is a parallel big data processing framework that runs data science pipelines written in Python at the speed of compiled code. Tuplex has similar Python APIs to [Apache Spark](https://spark.apache.org/) or [Dask](https://dask.org/), but rather than invoking the Python interpreter, Tuplex generates optimized LLVM bytecode for the given pipeline and input data set. Under the hood, Tuplex is based on data-driven compilation and dual-mode processing, two key techniques that make it possible for Tuplex to provide speed comparable to a pipeline written in hand-optimized C++. -You can join the discussion on Tuplex on our [Gitter community](https://gitter.im/tuplex/community) or read up more on the background of Tuplex in our [SIGMOD'21 paper](https://dl.acm.org/doi/abs/10.1145/3448016.3457244). - Contributions welcome! - ### Contents + [Example](#example) + [Quickstart](#quickstart) diff --git a/scripts/build_linux_wheels.sh b/scripts/build_linux_wheels.sh index 9bf5885fb..28f0cf4e1 100755 --- a/scripts/build_linux_wheels.sh +++ b/scripts/build_linux_wheels.sh @@ -39,16 +39,12 @@ fi export CIBW_ENVIRONMENT="TUPLEX_LAMBDA_ZIP='./tuplex/other/tplxlam.zip' CMAKE_ARGS='-DBUILD_WITH_AWS=ON -DBUILD_WITH_ORC=ON' LD_LIBRARY_PATH=/usr/local/lib:/opt/lib" # Use the following line to build only python3.7-3.9 wheel -export CIBW_BUILD="cp3{8,9,10,11}-*" +export CIBW_BUILD="cp3{9,10,11,12,13}-*" export CIBW_ARCHS_LINUX="x86_64" # do not build musllinux yet export CIBW_SKIP="*-musllinux_*" -# to test the others from 3.7-3.9, use these two lines: -#export CIBW_BUILD="cp3{7,8,9}-*" -#export CIBW_SKIP="cp3{5,6,7,8}-macosx* pp*" - export CIBW_BUILD_VERBOSITY=3 export CIBW_PROJECT_REQUIRES_PYTHON=">=3.8" diff --git a/scripts/build_linux_wheels_with_test.sh b/scripts/build_linux_wheels_with_test.sh index 6830bb0b2..a595de289 100755 --- a/scripts/build_linux_wheels_with_test.sh +++ b/scripts/build_linux_wheels_with_test.sh @@ -39,18 +39,14 @@ fi export CIBW_ENVIRONMENT="TUPLEX_LAMBDA_ZIP='./tuplex/other/tplxlam.zip' CMAKE_ARGS='-DBUILD_WITH_AWS=ON -DBUILD_WITH_ORC=ON' LD_LIBRARY_PATH=/usr/local/lib:/opt/lib" # Use the following line to build only python3.7-3.9 wheel -export CIBW_BUILD="cp3{8,9,10,11}-*" +export CIBW_BUILD="cp3{9,10,11,12,13}-*" export CIBW_ARCHS_LINUX="x86_64" # do not build musllinux yet export CIBW_SKIP="*-musllinux_*" -# to test the others from 3.7-3.9, use these two lines: -#export CIBW_BUILD="cp3{7,8,9}-*" -#export CIBW_SKIP="cp3{5,6,7,8}-macosx* pp*" - export CIBW_BUILD_VERBOSITY=3 -export CIBW_PROJECT_REQUIRES_PYTHON=">=3.8" +export CIBW_PROJECT_REQUIRES_PYTHON=">=3.9" # uncomment to increase verbosity of cibuildwheel # export CIBW_BUILD_VERBOSITY=3 diff --git a/scripts/build_macos_wheels.sh b/scripts/build_macos_wheels.sh index 63246f974..2e19c2a47 100755 --- a/scripts/build_macos_wheels.sh +++ b/scripts/build_macos_wheels.sh @@ -50,11 +50,11 @@ echo "-- Detected Xcode ${xcode_version_str}" # if no param is given, use defaults to build all if [ "${arch}" = "arm64" ]; then - # build Python 3.9 - 3.11 - CIBW_BUILD=${CIBW_BUILD-"cp3{9,10,11}-macosx_arm64"} + # build Python 3.9 - 3.13 + CIBW_BUILD=${CIBW_BUILD-"cp3{9,10,11,12,13}-macosx_arm64"} else - # build Python 3.8 - 3.11 - CIBW_BUILD=${CIBW_BUILD-"cp3{8,9,10,11}-macosx_x86_64"} + # build Python 3.9 - 3.13 + CIBW_BUILD=${CIBW_BUILD-"cp3{9,10,11,12,13}-macosx_x86_64"} fi echo "-- Building wheels for ${CIBW_BUILD}" @@ -82,7 +82,7 @@ export CIBW_BEFORE_BUILD_MACOS="brew install protobuf coreutils zstd zlib libmag export CIBW_ENVIRONMENT_MACOS="MACOSX_DEPLOYMENT_TARGET=${MINIMUM_TARGET} CMAKE_ARGS='-DBUILD_WITH_AWS=ON -DBUILD_WITH_ORC=ON -DCMAKE_BUILD_TYPE=Release' CMAKE_BUILD_TYPE=Release" export CIBW_BUILD="${CIBW_BUILD}" -export CIBW_PROJECT_REQUIRES_PYTHON=">=3.8" +export CIBW_PROJECT_REQUIRES_PYTHON=">=3.9" # uncomment to increase verbosity of cibuildwheel export CIBW_BUILD_VERBOSITY=3 diff --git a/scripts/build_macos_wheels_with_test.sh b/scripts/build_macos_wheels_with_test.sh index 6ced7cfcb..c10d55180 100755 --- a/scripts/build_macos_wheels_with_test.sh +++ b/scripts/build_macos_wheels_with_test.sh @@ -51,10 +51,10 @@ echo "-- Detected Xcode ${xcode_version_str}" # if no param is given, use defaults to build all if [ "${arch}" = "arm64" ]; then # build Python 3.9 - 3.11 - CIBW_BUILD=${CIBW_BUILD-"cp3{9,10,11}-macosx_arm64"} + CIBW_BUILD=${CIBW_BUILD-"cp3{9,10,11,12,13}-macosx_arm64"} else # build Python 3.8 - 3.11 - CIBW_BUILD=${CIBW_BUILD-"cp3{8,9,10,11}-macosx_x86_64"} + CIBW_BUILD=${CIBW_BUILD-"cp3{9,10,11,12,13}-macosx_x86_64"} fi echo "-- Building wheels for ${CIBW_BUILD}" @@ -92,7 +92,7 @@ export CIBW_ENVIRONMENT_MACOS="MACOSX_DEPLOYMENT_TARGET=${MINIMUM_TARGET} CMAKE_ #export CIBW_ENVIRONMENT_MACOS="MACOSX_DEPLOYMENT_TARGET=${MINIMUM_TARGET} CMAKE_ARGS='-DBUILD_WITH_AWS=ON -DBUILD_WITH_ORC=ON' TUPLEX_BUILD_TYPE=Debug" export CIBW_BUILD="${CIBW_BUILD}" -export CIBW_PROJECT_REQUIRES_PYTHON=">=3.8" +export CIBW_PROJECT_REQUIRES_PYTHON=">=3.9" # uncomment to increase verbosity of cibuildwheel export CIBW_BUILD_VERBOSITY=3 diff --git a/setup.py b/setup.py index 5d2e86484..3d26dada7 100644 --- a/setup.py +++ b/setup.py @@ -695,7 +695,7 @@ def tplx_package_data(): # The information here can also be placed in setup.cfg - better separation of # logic and declaration, and simpler if you include description/version in a file. setup(name="tuplex", - python_requires='>=3.8.0', + python_requires='>=3.9.0', version="0.3.7", author="Leonhard Spiegelberg", author_email="tuplex@cs.brown.edu", @@ -735,10 +735,11 @@ def tplx_package_data(): # Specify the Python versions you support here. In particular, ensure # that you indicate whether you support Python 2, Python 3 or both. - 'Programming Language :: Python :: 3.8', 'Programming Language :: Python :: 3.9', 'Programming Language :: Python :: 3.10', 'Programming Language :: Python :: 3.11', + 'Programming Language :: Python :: 3.12', + 'Programming Language :: Python :: 3.13', ], scripts=['tuplex/historyserver/bin/tuplex-webui'], project_urls={ diff --git a/tuplex/historyserver/thserver/version.py b/tuplex/historyserver/thserver/version.py index 22182bb91..8a14b5846 100644 --- a/tuplex/historyserver/thserver/version.py +++ b/tuplex/historyserver/thserver/version.py @@ -1,2 +1,2 @@ -# (c) L.Spiegelberg 2017 - 2024 +# (c) L.Spiegelberg 2017 - 2025 __version__="0.3.7" \ No newline at end of file diff --git a/tuplex/python/setup.py b/tuplex/python/setup.py index b59fe4c86..cf6b2b100 100644 --- a/tuplex/python/setup.py +++ b/tuplex/python/setup.py @@ -64,12 +64,4 @@ 'iso8601' ], url="https://tuplex.cs.brown.edu" - #, - # project_urls={ - # "Bug Tracker": "https://bugs.example.com/HelloWorld/", - # "Documentation": "https://docs.example.com/HelloWorld/", - # "Source Code": "https://code.example.com/HelloWorld/", - # } - - # could also include long_description, download_url, classifiers, etc. ) diff --git a/tuplex/python/tuplex/utils/version.py b/tuplex/python/tuplex/utils/version.py index 22182bb91..8a14b5846 100644 --- a/tuplex/python/tuplex/utils/version.py +++ b/tuplex/python/tuplex/utils/version.py @@ -1,2 +1,2 @@ -# (c) L.Spiegelberg 2017 - 2024 +# (c) L.Spiegelberg 2017 - 2025 __version__="0.3.7" \ No newline at end of file From 5bc588a6ff4686bd1ff60bca093c0430252b5f3f Mon Sep 17 00:00:00 2001 From: Leonhard Spiegelberg Date: Sun, 23 Feb 2025 21:46:11 -0800 Subject: [PATCH 2/7] skip ubuntu 3.13 for now --- .github/workflows/build_wheels.yml | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/.github/workflows/build_wheels.yml b/.github/workflows/build_wheels.yml index 8fed9c163..61417d4ef 100644 --- a/.github/workflows/build_wheels.yml +++ b/.github/workflows/build_wheels.yml @@ -28,9 +28,10 @@ jobs: - os: ubuntu-latest python-version: "3.12" cibw-build: "cp311-manylinux_x86_64" - - os: ubuntu-latest - python-version: "3.13" - cibw-build: "cp311-manylinux_x86_64" +# Need to upload newer image first. +# - os: ubuntu-latest +# python-version: "3.13" +# cibw-build: "cp311-manylinux_x86_64" - os: macos-13 python-version: "3.9" cibw-build: "cp39-macosx_x86_64" From fc5736986cbfab075f8146cde40f674c3f893a3e Mon Sep 17 00:00:00 2001 From: Leonhard Spiegelberg Date: Sun, 23 Feb 2025 21:48:07 -0800 Subject: [PATCH 3/7] comment 3.13 --- .github/workflows/build_wheels.yml | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/workflows/build_wheels.yml b/.github/workflows/build_wheels.yml index 61417d4ef..09e23d3bf 100644 --- a/.github/workflows/build_wheels.yml +++ b/.github/workflows/build_wheels.yml @@ -12,9 +12,9 @@ jobs: runs-on: ${{ matrix.os }} strategy: matrix: - # macos-14 (which is macos-latest) is ARM only. macos-13 is latest intel runner. + # macos-14 (which is macos-latest) is ARM only. macos-13 is the latest intel runner. os: [ ubuntu-latest, macos-13 ] - python-version: ["3.9", "3.10", "3.11", "3.12", "3.13"] + python-version: ["3.9", "3.10", "3.11", "3.12"] include: - os: ubuntu-latest python-version: "3.9" @@ -44,9 +44,9 @@ jobs: - os: macos-13 python-version: "3.12" cibw-build: "cp310-macosx_x86_64" - - os: macos-13 - python-version: "3.13" - cibw-build: "cp311-macosx_x86_64" +# - os: macos-13 +# python-version: "3.13" +# cibw-build: "cp311-macosx_x86_64" steps: - uses: actions/checkout@v4 From 028652ab853566bb2abcbdea37101e78c9fd6ff3 Mon Sep 17 00:00:00 2001 From: Leonhard Spiegelberg Date: Sun, 23 Feb 2025 21:57:25 -0800 Subject: [PATCH 4/7] tpoy --- .github/workflows/build_wheels.yml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/build_wheels.yml b/.github/workflows/build_wheels.yml index 09e23d3bf..ffbd34932 100644 --- a/.github/workflows/build_wheels.yml +++ b/.github/workflows/build_wheels.yml @@ -27,11 +27,11 @@ jobs: cibw-build: "cp311-manylinux_x86_64" - os: ubuntu-latest python-version: "3.12" - cibw-build: "cp311-manylinux_x86_64" + cibw-build: "cp312-manylinux_x86_64" # Need to upload newer image first. # - os: ubuntu-latest # python-version: "3.13" -# cibw-build: "cp311-manylinux_x86_64" +# cibw-build: "cp313-manylinux_x86_64" - os: macos-13 python-version: "3.9" cibw-build: "cp39-macosx_x86_64" @@ -43,10 +43,10 @@ jobs: cibw-build: "cp311-macosx_x86_64" - os: macos-13 python-version: "3.12" - cibw-build: "cp310-macosx_x86_64" + cibw-build: "cp312-macosx_x86_64" # - os: macos-13 # python-version: "3.13" -# cibw-build: "cp311-macosx_x86_64" +# cibw-build: "cp313-macosx_x86_64" steps: - uses: actions/checkout@v4 From d9d6b0303d5fd142d5d05df4ce22f0e8a818e1eb Mon Sep 17 00:00:00 2001 From: Leonhard Spiegelberg Date: Mon, 24 Feb 2025 23:23:33 -0800 Subject: [PATCH 5/7] draft for unicode replacement --- .github/workflows/build_wheels.yml | 13 ++-- .../adapters/cpython/include/PythonHelpers.h | 7 +++ tuplex/adapters/cpython/src/PythonHelpers.cc | 11 +++- tuplex/codegen/tools/.gitignore | 1 + tuplex/python/src/PythonContext.cc | 59 ++++++++++++++++--- 5 files changed, 74 insertions(+), 17 deletions(-) create mode 100644 tuplex/codegen/tools/.gitignore diff --git a/.github/workflows/build_wheels.yml b/.github/workflows/build_wheels.yml index ffbd34932..dd00135d7 100644 --- a/.github/workflows/build_wheels.yml +++ b/.github/workflows/build_wheels.yml @@ -28,10 +28,9 @@ jobs: - os: ubuntu-latest python-version: "3.12" cibw-build: "cp312-manylinux_x86_64" -# Need to upload newer image first. -# - os: ubuntu-latest -# python-version: "3.13" -# cibw-build: "cp313-manylinux_x86_64" + - os: ubuntu-latest + python-version: "3.13" + cibw-build: "cp313-manylinux_x86_64" - os: macos-13 python-version: "3.9" cibw-build: "cp39-macosx_x86_64" @@ -44,9 +43,9 @@ jobs: - os: macos-13 python-version: "3.12" cibw-build: "cp312-macosx_x86_64" -# - os: macos-13 -# python-version: "3.13" -# cibw-build: "cp313-macosx_x86_64" + - os: macos-13 + python-version: "3.13" + cibw-build: "cp313-macosx_x86_64" steps: - uses: actions/checkout@v4 diff --git a/tuplex/adapters/cpython/include/PythonHelpers.h b/tuplex/adapters/cpython/include/PythonHelpers.h index 4ed86197e..58389b98c 100644 --- a/tuplex/adapters/cpython/include/PythonHelpers.h +++ b/tuplex/adapters/cpython/include/PythonHelpers.h @@ -319,6 +319,13 @@ namespace python { */ extern void runGC(); + /*! + * checks whether python error is set, if so extracts error and traceback into string and resets + * interpreter's error flag. + * @return + */ + extern std::string extract_and_reset_py_error(); + /*! * check whether Python interpreter is running in/available to this process * @return bool when is running else false diff --git a/tuplex/adapters/cpython/src/PythonHelpers.cc b/tuplex/adapters/cpython/src/PythonHelpers.cc index 7f37dd615..f26184989 100644 --- a/tuplex/adapters/cpython/src/PythonHelpers.cc +++ b/tuplex/adapters/cpython/src/PythonHelpers.cc @@ -52,7 +52,7 @@ namespace python { Py_SetPythonHome(&vec[0]); } - void handle_and_throw_py_error() { + std::string extract_and_reset_py_error() { if(PyErr_Occurred()) { PyObject *ptype = NULL, *pvalue = NULL, *ptraceback = NULL; PyErr_Fetch(&ptype,&pvalue,&ptraceback); @@ -107,8 +107,15 @@ namespace python { } Py_XDECREF(lines_obj); - throw std::runtime_error(ss.str()); + return ss.str(); } + return ""; + } + + void handle_and_throw_py_error() { + auto err = extract_and_reset_py_error(); + if(!err.empty()) + throw std::runtime_error(err); } diff --git a/tuplex/codegen/tools/.gitignore b/tuplex/codegen/tools/.gitignore new file mode 100644 index 000000000..77185d1d7 --- /dev/null +++ b/tuplex/codegen/tools/.gitignore @@ -0,0 +1 @@ +antlr-*-complete.jar diff --git a/tuplex/python/src/PythonContext.cc b/tuplex/python/src/PythonContext.cc index 2d9b11acb..368c5574a 100644 --- a/tuplex/python/src/PythonContext.cc +++ b/tuplex/python/src/PythonContext.cc @@ -265,7 +265,21 @@ namespace tuplex { if (typeStr[j] == 's') { auto tupleItem = PyTuple_GET_ITEM(obj, j); if (PyUnicode_Check(tupleItem)) { - requiredBytes += PyUnicode_GET_SIZE(tupleItem) + 1; // +1 for '\0' + // new: + Py_ssize_t utf8str_size = -1; + auto uft8str = PyUnicode_AsUTF8AndSize(tupleItem, &utf8str_size); + requiredBytes += utf8ste_size + 1; // +1 for '\0'. + if(utf8str_size == -1 || !utf8str) { + // error happened, translate and create error dataset. + auto err= extract_and_reset_py_error(); + if(err.empty()) { + err = "PyUnicode_AsUTF8AndSize error, but not python error set."; + } + return _context->makeError(err) + } + + // old: + // requiredBytes += PyUnicode_GET_SIZE(tupleItem) + 1; // +1 for '\0' } else { nonConforming = true; break; @@ -330,8 +344,22 @@ namespace tuplex { if(!PyUnicode_Check(el)) goto bad_element; - auto utf8ptr = PyUnicode_AsUTF8(el); - auto len = PyUnicode_GET_SIZE(el); + // new: + Py_ssize_t utf8str_size = -1; + auto uft8str = PyUnicode_AsUTF8AndSize(tupleItem, &utf8str_size); + requiredBytes += utf8ste_size + 1; // +1 for '\0'. + if(utf8str_size == -1 || !utf8str) { + // error happened, translate and create error dataset. + auto err= extract_and_reset_py_error(); + if(err.empty()) { + err = "PyUnicode_AsUTF8AndSize error, but not python error set."; + } + return _context->makeError(err) + } + + // // old: + // auto utf8ptr = PyUnicode_AsUTF8(el); + // auto len = PyUnicode_GET_SIZE(el); assert(len == strlen(utf8ptr)); size_t varFieldSize = len + 1; // + 1 for '\0' char! @@ -341,7 +369,7 @@ namespace tuplex { *((int64_t*)(ptr)) = info_field; // copy string contents - memcpy(ptr + varLenOffset, utf8ptr, len + 1); // +1 for 0 delimiter + memcpy(ptr + varLenOffset, utf8str, utf8str_size + 1); // +1 for 0 delimiter ptr += sizeof(int64_t); // move to next field rowVarFieldSizes += varFieldSize; @@ -502,9 +530,24 @@ namespace tuplex { // (3) is the actual string content (incl. '\0' delimiter) if(PyUnicode_Check(obj)) { - auto len = PyUnicode_GET_SIZE(obj); + // new: + Py_ssize_t utf8str_size = -1; + auto uft8str = PyUnicode_AsUTF8AndSize(tupleItem, &utf8str_size); + requiredBytes += utf8ste_size + 1; // +1 for '\0'. + if(utf8str_size == -1 || !utf8str) { + // error happened, translate and create error dataset. + auto err= extract_and_reset_py_error(); + if(err.empty()) { + err = "PyUnicode_AsUTF8AndSize error, but not python error set."; + } + return _context->makeError(err) + } + + + // // old: + // auto len = PyUnicode_GET_SIZE(obj); + // auto utf8ptr = PyUnicode_AsUTF8(obj); - auto utf8ptr = PyUnicode_AsUTF8(obj); size_t requiredBytes = sizeof(int64_t) * 2 + len + 1; @@ -527,7 +570,7 @@ namespace tuplex { assert(len == strlen(utf8ptr)); - size_t varFieldSize = len + 1; // + 1 for '\0' char! + size_t varFieldSize = utf8str_size + 1; // + 1 for '\0' char! size_t varLenOffset = 2 * sizeof(int64_t); // 16 bytes offset int64_t info_field = varLenOffset | (varFieldSize << 32); @@ -535,7 +578,7 @@ namespace tuplex { // after fixed length fields comes total varlen info field *((int64_t*)(ptr + sizeof(int64_t))) = varFieldSize; // copy string contents - memcpy(ptr + sizeof(int64_t) * 2, utf8ptr, len + 1); // +1 for 0 delimiter + memcpy(ptr + sizeof(int64_t) * 2, utf8str, utf8str_size + 1); // +1 for 0 delimiter ptr += requiredBytes; *rawPtr = *rawPtr + 1; numBytesSerialized += requiredBytes; From d01bf8a46c6537de38244ca2d1c89926ac6c2c1c Mon Sep 17 00:00:00 2001 From: Leonhard Spiegelberg Date: Mon, 24 Feb 2025 23:48:35 -0800 Subject: [PATCH 6/7] fixes for unicode --- .github/workflows/build_wheels.yml | 2 +- tuplex/python/src/PythonContext.cc | 32 ++++++++++++++---------------- 2 files changed, 16 insertions(+), 18 deletions(-) diff --git a/.github/workflows/build_wheels.yml b/.github/workflows/build_wheels.yml index dd00135d7..f005a593d 100644 --- a/.github/workflows/build_wheels.yml +++ b/.github/workflows/build_wheels.yml @@ -14,7 +14,7 @@ jobs: matrix: # macos-14 (which is macos-latest) is ARM only. macos-13 is the latest intel runner. os: [ ubuntu-latest, macos-13 ] - python-version: ["3.9", "3.10", "3.11", "3.12"] + python-version: ["3.9", "3.10", "3.11", "3.12", "3.13"] include: - os: ubuntu-latest python-version: "3.9" diff --git a/tuplex/python/src/PythonContext.cc b/tuplex/python/src/PythonContext.cc index 368c5574a..79f9e1ad1 100644 --- a/tuplex/python/src/PythonContext.cc +++ b/tuplex/python/src/PythonContext.cc @@ -256,7 +256,7 @@ namespace tuplex { check = check ? PyTuple_Size(obj) == numTupleElements : false; if(check) { - // it's a tuple with macthing size + // it's a tuple with matching size // first get how many bytes are required size_t requiredBytes = baseRequiredBytes; if(varLenField) { @@ -267,15 +267,15 @@ namespace tuplex { if (PyUnicode_Check(tupleItem)) { // new: Py_ssize_t utf8str_size = -1; - auto uft8str = PyUnicode_AsUTF8AndSize(tupleItem, &utf8str_size); - requiredBytes += utf8ste_size + 1; // +1 for '\0'. + auto utf8str = PyUnicode_AsUTF8AndSize(tupleItem, &utf8str_size); + requiredBytes += utf8str_size + 1; // +1 for '\0'. if(utf8str_size == -1 || !utf8str) { // error happened, translate and create error dataset. - auto err= extract_and_reset_py_error(); + auto err= python::extract_and_reset_py_error(); if(err.empty()) { err = "PyUnicode_AsUTF8AndSize error, but not python error set."; } - return _context->makeError(err) + return _context->makeError(err); } // old: @@ -346,23 +346,22 @@ namespace tuplex { // new: Py_ssize_t utf8str_size = -1; - auto uft8str = PyUnicode_AsUTF8AndSize(tupleItem, &utf8str_size); - requiredBytes += utf8ste_size + 1; // +1 for '\0'. + auto utf8str = PyUnicode_AsUTF8AndSize(el, &utf8str_size); if(utf8str_size == -1 || !utf8str) { // error happened, translate and create error dataset. - auto err= extract_and_reset_py_error(); + auto err= python::extract_and_reset_py_error(); if(err.empty()) { err = "PyUnicode_AsUTF8AndSize error, but not python error set."; } - return _context->makeError(err) + return _context->makeError(err); } // // old: // auto utf8ptr = PyUnicode_AsUTF8(el); // auto len = PyUnicode_GET_SIZE(el); - assert(len == strlen(utf8ptr)); - size_t varFieldSize = len + 1; // + 1 for '\0' char! + assert(utf8str_size == strlen(utf8str)); + size_t varFieldSize = utf8str_size + 1; // + 1 for '\0' char! size_t varLenOffset = (numTupleElements + 1 - j) * sizeof(int64_t) + rowVarFieldSizes; // 16 bytes offset int64_t info_field = varLenOffset | (varFieldSize << 32); @@ -532,15 +531,14 @@ namespace tuplex { // new: Py_ssize_t utf8str_size = -1; - auto uft8str = PyUnicode_AsUTF8AndSize(tupleItem, &utf8str_size); - requiredBytes += utf8ste_size + 1; // +1 for '\0'. + auto utf8str = PyUnicode_AsUTF8AndSize(obj, &utf8str_size); if(utf8str_size == -1 || !utf8str) { // error happened, translate and create error dataset. - auto err= extract_and_reset_py_error(); + auto err= python::extract_and_reset_py_error(); if(err.empty()) { err = "PyUnicode_AsUTF8AndSize error, but not python error set."; } - return _context->makeError(err) + return _context->makeError(err); } @@ -549,7 +547,7 @@ namespace tuplex { // auto utf8ptr = PyUnicode_AsUTF8(obj); - size_t requiredBytes = sizeof(int64_t) * 2 + len + 1; + size_t requiredBytes = sizeof(int64_t) * 2 + utf8str_size + 1; // check capacity and realloc if necessary get a new partition if(partition->capacity() < numBytesSerialized + requiredBytes) { @@ -568,7 +566,7 @@ namespace tuplex { numBytesSerialized = 0; } - assert(len == strlen(utf8ptr)); + assert(utf8str_size == strlen(utf8str)); size_t varFieldSize = utf8str_size + 1; // + 1 for '\0' char! size_t varLenOffset = 2 * sizeof(int64_t); // 16 bytes offset From 325039ceb08e6745ceae45523c2590a7e132bca5 Mon Sep 17 00:00:00 2001 From: Leonhard Spiegelberg Date: Tue, 25 Feb 2025 18:08:29 -0800 Subject: [PATCH 7/7] fix usage of internal APIs for python 3.13+ --- tuplex/core/include/HybridHashTable.h | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tuplex/core/include/HybridHashTable.h b/tuplex/core/include/HybridHashTable.h index 55472d04e..c26a9c5c1 100644 --- a/tuplex/core/include/HybridHashTable.h +++ b/tuplex/core/include/HybridHashTable.h @@ -16,6 +16,12 @@ #include #include +// Python 3.13 moved internal APIs from modsupport.h to internal/pycore_modsupport.h +#if PY_MAJOR_VERSION >=3 && PY_MINOR_VERSION >= 13 +#define Py_BUILD_CORE +#include +#endif + namespace tuplex { /*!