这是indexloc提供的服务,不要输入任何密码
Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 14 additions & 8 deletions .github/workflows/build_wheels.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,13 +12,10 @@ jobs:
runs-on: ${{ matrix.os }}
strategy:
matrix:
# macos-14 (which is macos-latest) is ARM only. macos-13 is latest intel runner.
# macos-14 (which is macos-latest) is ARM only. macos-13 is the latest intel runner.
os: [ ubuntu-latest, macos-13 ]
python-version: ["3.8", "3.9", "3.10", "3.11"]
python-version: ["3.9", "3.10", "3.11", "3.12", "3.13"]
include:
- os: ubuntu-latest
python-version: "3.8"
cibw-build: "cp38-manylinux_x86_64"
- os: ubuntu-latest
python-version: "3.9"
cibw-build: "cp39-manylinux_x86_64"
Expand All @@ -28,9 +25,12 @@ jobs:
- os: ubuntu-latest
python-version: "3.11"
cibw-build: "cp311-manylinux_x86_64"
- os: macos-13
python-version: "3.8"
cibw-build: "cp38-macosx_x86_64"
- os: ubuntu-latest
python-version: "3.12"
cibw-build: "cp312-manylinux_x86_64"
- os: ubuntu-latest
python-version: "3.13"
cibw-build: "cp313-manylinux_x86_64"
- os: macos-13
python-version: "3.9"
cibw-build: "cp39-macosx_x86_64"
Expand All @@ -40,6 +40,12 @@ jobs:
- os: macos-13
python-version: "3.11"
cibw-build: "cp311-macosx_x86_64"
- os: macos-13
python-version: "3.12"
cibw-build: "cp312-macosx_x86_64"
- os: macos-13
python-version: "3.13"
cibw-build: "cp313-macosx_x86_64"
steps:
- uses: actions/checkout@v4

Expand Down
6 changes: 1 addition & 5 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,19 +2,15 @@

[![Build Status](https://dev.azure.com/leonhardspiegelberg/Tuplex%20-%20Open%20Source/_apis/build/status/tuplex.tuplex?branchName=master)](https://dev.azure.com/leonhardspiegelberg/Tuplex%20-%20Open%20Source/_build/latest?definitionId=2&branchName=master)
[![License](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](https://opensource.org/licenses/Apache-2.0)
![Supported python versions](https://img.shields.io/badge/python-3.8%20%7C%203.9%20%7C%203.10%20%7C%203.11-blue)
[![PyPi Downloads](https://img.shields.io/pypi/dm/tuplex)](https://img.shields.io/pypi/dm/tuplex)
![Supported python versions](https://img.shields.io/badge/3.9%20%7C%203.10%20%7C%203.11%20%7C%203.12%20%7C%203.13-blue)

[Website](https://tuplex.cs.brown.edu/) [Documentation](https://tuplex.cs.brown.edu/python-api.html)

Tuplex is a parallel big data processing framework that runs data science pipelines written in Python at the speed of compiled code.
Tuplex has similar Python APIs to [Apache Spark](https://spark.apache.org/) or [Dask](https://dask.org/), but rather than invoking the Python interpreter, Tuplex generates optimized LLVM bytecode for the given pipeline and input data set. Under the hood, Tuplex is based on data-driven compilation and dual-mode processing, two key techniques that make it possible for Tuplex to provide speed comparable to a pipeline written in hand-optimized C++.

You can join the discussion on Tuplex on our [Gitter community](https://gitter.im/tuplex/community) or read up more on the background of Tuplex in our [SIGMOD'21 paper](https://dl.acm.org/doi/abs/10.1145/3448016.3457244).

Contributions welcome!


### Contents
+ [Example](#example)
+ [Quickstart](#quickstart)
Expand Down
6 changes: 1 addition & 5 deletions scripts/build_linux_wheels.sh
Original file line number Diff line number Diff line change
Expand Up @@ -39,16 +39,12 @@ fi
export CIBW_ENVIRONMENT="TUPLEX_LAMBDA_ZIP='./tuplex/other/tplxlam.zip' CMAKE_ARGS='-DBUILD_WITH_AWS=ON -DBUILD_WITH_ORC=ON' LD_LIBRARY_PATH=/usr/local/lib:/opt/lib"

# Use the following line to build only python3.7-3.9 wheel
export CIBW_BUILD="cp3{8,9,10,11}-*"
export CIBW_BUILD="cp3{9,10,11,12,13}-*"
export CIBW_ARCHS_LINUX="x86_64"

# do not build musllinux yet
export CIBW_SKIP="*-musllinux_*"

# to test the others from 3.7-3.9, use these two lines:
#export CIBW_BUILD="cp3{7,8,9}-*"
#export CIBW_SKIP="cp3{5,6,7,8}-macosx* pp*"

export CIBW_BUILD_VERBOSITY=3
export CIBW_PROJECT_REQUIRES_PYTHON=">=3.8"

Expand Down
8 changes: 2 additions & 6 deletions scripts/build_linux_wheels_with_test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -39,18 +39,14 @@ fi
export CIBW_ENVIRONMENT="TUPLEX_LAMBDA_ZIP='./tuplex/other/tplxlam.zip' CMAKE_ARGS='-DBUILD_WITH_AWS=ON -DBUILD_WITH_ORC=ON' LD_LIBRARY_PATH=/usr/local/lib:/opt/lib"

# Use the following line to build only python3.7-3.9 wheel
export CIBW_BUILD="cp3{8,9,10,11}-*"
export CIBW_BUILD="cp3{9,10,11,12,13}-*"
export CIBW_ARCHS_LINUX="x86_64"

# do not build musllinux yet
export CIBW_SKIP="*-musllinux_*"

# to test the others from 3.7-3.9, use these two lines:
#export CIBW_BUILD="cp3{7,8,9}-*"
#export CIBW_SKIP="cp3{5,6,7,8}-macosx* pp*"

export CIBW_BUILD_VERBOSITY=3
export CIBW_PROJECT_REQUIRES_PYTHON=">=3.8"
export CIBW_PROJECT_REQUIRES_PYTHON=">=3.9"

# uncomment to increase verbosity of cibuildwheel
# export CIBW_BUILD_VERBOSITY=3
Expand Down
10 changes: 5 additions & 5 deletions scripts/build_macos_wheels.sh
Original file line number Diff line number Diff line change
Expand Up @@ -50,11 +50,11 @@ echo "-- Detected Xcode ${xcode_version_str}"

# if no param is given, use defaults to build all
if [ "${arch}" = "arm64" ]; then
# build Python 3.9 - 3.11
CIBW_BUILD=${CIBW_BUILD-"cp3{9,10,11}-macosx_arm64"}
# build Python 3.9 - 3.13
CIBW_BUILD=${CIBW_BUILD-"cp3{9,10,11,12,13}-macosx_arm64"}
else
# build Python 3.8 - 3.11
CIBW_BUILD=${CIBW_BUILD-"cp3{8,9,10,11}-macosx_x86_64"}
# build Python 3.9 - 3.13
CIBW_BUILD=${CIBW_BUILD-"cp3{9,10,11,12,13}-macosx_x86_64"}
fi

echo "-- Building wheels for ${CIBW_BUILD}"
Expand Down Expand Up @@ -82,7 +82,7 @@ export CIBW_BEFORE_BUILD_MACOS="brew install protobuf coreutils zstd zlib libmag
export CIBW_ENVIRONMENT_MACOS="MACOSX_DEPLOYMENT_TARGET=${MINIMUM_TARGET} CMAKE_ARGS='-DBUILD_WITH_AWS=ON -DBUILD_WITH_ORC=ON -DCMAKE_BUILD_TYPE=Release' CMAKE_BUILD_TYPE=Release"

export CIBW_BUILD="${CIBW_BUILD}"
export CIBW_PROJECT_REQUIRES_PYTHON=">=3.8"
export CIBW_PROJECT_REQUIRES_PYTHON=">=3.9"

# uncomment to increase verbosity of cibuildwheel
export CIBW_BUILD_VERBOSITY=3
Expand Down
6 changes: 3 additions & 3 deletions scripts/build_macos_wheels_with_test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -51,10 +51,10 @@ echo "-- Detected Xcode ${xcode_version_str}"
# if no param is given, use defaults to build all
if [ "${arch}" = "arm64" ]; then
# build Python 3.9 - 3.11
CIBW_BUILD=${CIBW_BUILD-"cp3{9,10,11}-macosx_arm64"}
CIBW_BUILD=${CIBW_BUILD-"cp3{9,10,11,12,13}-macosx_arm64"}
else
# build Python 3.8 - 3.11
CIBW_BUILD=${CIBW_BUILD-"cp3{8,9,10,11}-macosx_x86_64"}
CIBW_BUILD=${CIBW_BUILD-"cp3{9,10,11,12,13}-macosx_x86_64"}
fi

echo "-- Building wheels for ${CIBW_BUILD}"
Expand Down Expand Up @@ -92,7 +92,7 @@ export CIBW_ENVIRONMENT_MACOS="MACOSX_DEPLOYMENT_TARGET=${MINIMUM_TARGET} CMAKE_
#export CIBW_ENVIRONMENT_MACOS="MACOSX_DEPLOYMENT_TARGET=${MINIMUM_TARGET} CMAKE_ARGS='-DBUILD_WITH_AWS=ON -DBUILD_WITH_ORC=ON' TUPLEX_BUILD_TYPE=Debug"

export CIBW_BUILD="${CIBW_BUILD}"
export CIBW_PROJECT_REQUIRES_PYTHON=">=3.8"
export CIBW_PROJECT_REQUIRES_PYTHON=">=3.9"

# uncomment to increase verbosity of cibuildwheel
export CIBW_BUILD_VERBOSITY=3
Expand Down
5 changes: 3 additions & 2 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -695,7 +695,7 @@ def tplx_package_data():
# The information here can also be placed in setup.cfg - better separation of
# logic and declaration, and simpler if you include description/version in a file.
setup(name="tuplex",
python_requires='>=3.8.0',
python_requires='>=3.9.0',
version="0.3.7",
author="Leonhard Spiegelberg",
author_email="tuplex@cs.brown.edu",
Expand Down Expand Up @@ -735,10 +735,11 @@ def tplx_package_data():

# Specify the Python versions you support here. In particular, ensure
# that you indicate whether you support Python 2, Python 3 or both.
'Programming Language :: Python :: 3.8',
'Programming Language :: Python :: 3.9',
'Programming Language :: Python :: 3.10',
'Programming Language :: Python :: 3.11',
'Programming Language :: Python :: 3.12',
'Programming Language :: Python :: 3.13',
],
scripts=['tuplex/historyserver/bin/tuplex-webui'],
project_urls={
Expand Down
7 changes: 7 additions & 0 deletions tuplex/adapters/cpython/include/PythonHelpers.h
Original file line number Diff line number Diff line change
Expand Up @@ -319,6 +319,13 @@ namespace python {
*/
extern void runGC();

/*!
* checks whether python error is set, if so extracts error and traceback into string and resets
* interpreter's error flag.
* @return
*/
extern std::string extract_and_reset_py_error();

/*!
* check whether Python interpreter is running in/available to this process
* @return bool when is running else false
Expand Down
11 changes: 9 additions & 2 deletions tuplex/adapters/cpython/src/PythonHelpers.cc
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ namespace python {
Py_SetPythonHome(&vec[0]);
}

void handle_and_throw_py_error() {
std::string extract_and_reset_py_error() {
if(PyErr_Occurred()) {
PyObject *ptype = NULL, *pvalue = NULL, *ptraceback = NULL;
PyErr_Fetch(&ptype,&pvalue,&ptraceback);
Expand Down Expand Up @@ -107,8 +107,15 @@ namespace python {
}
Py_XDECREF(lines_obj);

throw std::runtime_error(ss.str());
return ss.str();
}
return "";
}

void handle_and_throw_py_error() {
auto err = extract_and_reset_py_error();
if(!err.empty())
throw std::runtime_error(err);
}


Expand Down
1 change: 1 addition & 0 deletions tuplex/codegen/tools/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
antlr-*-complete.jar
6 changes: 6 additions & 0 deletions tuplex/core/include/HybridHashTable.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,12 @@
#include <physical/TransformTask.h>
#include <TypeSystem.h>

// Python 3.13 moved internal APIs from modsupport.h to internal/pycore_modsupport.h
#if PY_MAJOR_VERSION >=3 && PY_MINOR_VERSION >= 13
#define Py_BUILD_CORE
#include <internal/pycore_modsupport.h>
#endif

namespace tuplex {

/*!
Expand Down
2 changes: 1 addition & 1 deletion tuplex/historyserver/thserver/version.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
# (c) L.Spiegelberg 2017 - 2024
# (c) L.Spiegelberg 2017 - 2025
__version__="0.3.7"
8 changes: 0 additions & 8 deletions tuplex/python/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,12 +64,4 @@
'iso8601'
],
url="https://tuplex.cs.brown.edu"
#,
# project_urls={
# "Bug Tracker": "https://bugs.example.com/HelloWorld/",
# "Documentation": "https://docs.example.com/HelloWorld/",
# "Source Code": "https://code.example.com/HelloWorld/",
# }

# could also include long_description, download_url, classifiers, etc.
)
67 changes: 54 additions & 13 deletions tuplex/python/src/PythonContext.cc
Original file line number Diff line number Diff line change
Expand Up @@ -256,7 +256,7 @@ namespace tuplex {
check = check ? PyTuple_Size(obj) == numTupleElements : false;
if(check) {

// it's a tuple with macthing size
// it's a tuple with matching size
// first get how many bytes are required
size_t requiredBytes = baseRequiredBytes;
if(varLenField) {
Expand All @@ -265,7 +265,21 @@ namespace tuplex {
if (typeStr[j] == 's') {
auto tupleItem = PyTuple_GET_ITEM(obj, j);
if (PyUnicode_Check(tupleItem)) {
requiredBytes += PyUnicode_GET_SIZE(tupleItem) + 1; // +1 for '\0'
// new:
Py_ssize_t utf8str_size = -1;
auto utf8str = PyUnicode_AsUTF8AndSize(tupleItem, &utf8str_size);
requiredBytes += utf8str_size + 1; // +1 for '\0'.
if(utf8str_size == -1 || !utf8str) {
// error happened, translate and create error dataset.
auto err= python::extract_and_reset_py_error();
if(err.empty()) {
err = "PyUnicode_AsUTF8AndSize error, but not python error set.";
}
return _context->makeError(err);
}

// old:
// requiredBytes += PyUnicode_GET_SIZE(tupleItem) + 1; // +1 for '\0'
} else {
nonConforming = true;
break;
Expand Down Expand Up @@ -330,18 +344,31 @@ namespace tuplex {
if(!PyUnicode_Check(el))
goto bad_element;

auto utf8ptr = PyUnicode_AsUTF8(el);
auto len = PyUnicode_GET_SIZE(el);
// new:
Py_ssize_t utf8str_size = -1;
auto utf8str = PyUnicode_AsUTF8AndSize(el, &utf8str_size);
if(utf8str_size == -1 || !utf8str) {
// error happened, translate and create error dataset.
auto err= python::extract_and_reset_py_error();
if(err.empty()) {
err = "PyUnicode_AsUTF8AndSize error, but not python error set.";
}
return _context->makeError(err);
}

// // old:
// auto utf8ptr = PyUnicode_AsUTF8(el);
// auto len = PyUnicode_GET_SIZE(el);

assert(len == strlen(utf8ptr));
size_t varFieldSize = len + 1; // + 1 for '\0' char!
assert(utf8str_size == strlen(utf8str));
size_t varFieldSize = utf8str_size + 1; // + 1 for '\0' char!
size_t varLenOffset = (numTupleElements + 1 - j) * sizeof(int64_t) + rowVarFieldSizes; // 16 bytes offset
int64_t info_field = varLenOffset | (varFieldSize << 32);

*((int64_t*)(ptr)) = info_field;

// copy string contents
memcpy(ptr + varLenOffset, utf8ptr, len + 1); // +1 for 0 delimiter
memcpy(ptr + varLenOffset, utf8str, utf8str_size + 1); // +1 for 0 delimiter
ptr += sizeof(int64_t); // move to next field
rowVarFieldSizes += varFieldSize;

Expand Down Expand Up @@ -502,11 +529,25 @@ namespace tuplex {
// (3) is the actual string content (incl. '\0' delimiter)
if(PyUnicode_Check(obj)) {

auto len = PyUnicode_GET_SIZE(obj);
// new:
Py_ssize_t utf8str_size = -1;
auto utf8str = PyUnicode_AsUTF8AndSize(obj, &utf8str_size);
if(utf8str_size == -1 || !utf8str) {
// error happened, translate and create error dataset.
auto err= python::extract_and_reset_py_error();
if(err.empty()) {
err = "PyUnicode_AsUTF8AndSize error, but not python error set.";
}
return _context->makeError(err);
}


// // old:
// auto len = PyUnicode_GET_SIZE(obj);
// auto utf8ptr = PyUnicode_AsUTF8(obj);

auto utf8ptr = PyUnicode_AsUTF8(obj);

size_t requiredBytes = sizeof(int64_t) * 2 + len + 1;
size_t requiredBytes = sizeof(int64_t) * 2 + utf8str_size + 1;

// check capacity and realloc if necessary get a new partition
if(partition->capacity() < numBytesSerialized + requiredBytes) {
Expand All @@ -525,17 +566,17 @@ namespace tuplex {
numBytesSerialized = 0;
}

assert(len == strlen(utf8ptr));
assert(utf8str_size == strlen(utf8str));

size_t varFieldSize = len + 1; // + 1 for '\0' char!
size_t varFieldSize = utf8str_size + 1; // + 1 for '\0' char!
size_t varLenOffset = 2 * sizeof(int64_t); // 16 bytes offset
int64_t info_field = varLenOffset | (varFieldSize << 32);

*((int64_t*)(ptr)) = info_field;
// after fixed length fields comes total varlen info field
*((int64_t*)(ptr + sizeof(int64_t))) = varFieldSize;
// copy string contents
memcpy(ptr + sizeof(int64_t) * 2, utf8ptr, len + 1); // +1 for 0 delimiter
memcpy(ptr + sizeof(int64_t) * 2, utf8str, utf8str_size + 1); // +1 for 0 delimiter
ptr += requiredBytes;
*rawPtr = *rawPtr + 1;
numBytesSerialized += requiredBytes;
Expand Down
2 changes: 1 addition & 1 deletion tuplex/python/tuplex/utils/version.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
# (c) L.Spiegelberg 2017 - 2024
# (c) L.Spiegelberg 2017 - 2025
__version__="0.3.7"
Loading