这是indexloc提供的服务,不要输入任何密码
Skip to content
2 changes: 1 addition & 1 deletion scripts/azure/install_azure_ci_reqs.sh
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,7 @@ mkdir -p ${WORKDIR}/antlr && cd ${WORKDIR}/antlr \
&& make -j$(nproc) && make install
mkdir -p ${WORKDIR}/aws && cd ${WORKDIR}/aws \
&& git clone --recurse-submodules https://github.com/aws/aws-sdk-cpp.git \
&& cd aws-sdk-cpp && git checkout tags/1.11.164 && mkdir build && cd build \
&& cd aws-sdk-cpp && git checkout tags/1.11.524 && mkdir build && cd build \
&& cmake -DCMAKE_BUILD_TYPE=Release -DUSE_OPENSSL=ON -DENABLE_TESTING=OFF -DENABLE_UNITY_BUILD=ON -DCPP_STANDARD=17 -DBUILD_SHARED_LIBS=OFF -DBUILD_ONLY="s3;core;lambda;transfer" -DCMAKE_INSTALL_PREFIX=${PREFIX} .. \
&& make -j$(nproc) \
&& make install
Expand Down
2 changes: 1 addition & 1 deletion scripts/docker/ci/install_tuplex_reqs.sh
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
set -euxo pipefail

# dependency versions
AWSSDK_CPP_VERSION=1.11.164
AWSSDK_CPP_VERSION=1.11.524
ANTLR4_VERSION=4.13.1
YAML_CPP_VERSION=0.8.0
AWS_LAMBDA_CPP_VERSION=0.2.8
Expand Down
2 changes: 1 addition & 1 deletion scripts/generate_scripts.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ def configure_versions(osname):
VERSIONS['YAMLCPP_VERSION'] = '0.8.0'
VERSIONS['CELERO_VERSION'] = '2.8.3'
VERSIONS['ANTLR_VERSION'] = '4.13.1'
VERSIONS['AWSSDK_VERSION'] = '1.11.164'
VERSIONS['AWSSDK_VERSION'] = '1.11.524'
VERSIONS['AWSLAMBDACPP_VERSION'] = '0.2.8'
VERSIONS['PCRE2_VERSION'] = '10.42'
VERSIONS['PROTOBUF_VERSION'] = '24.3'
Expand Down
2 changes: 1 addition & 1 deletion scripts/macos/install_antlr4_cpp_runtime.sh
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,6 @@ git clone https://github.com/antlr/antlr4.git \
ls -l $PREFIX/include
ls -l $PREFIX/lib

cp lib/libantlr4-runtime.dylib /Users/runner/work/tuplex/tuplex/libantlr4-runtime.dylib
cp $PREFIX/lib/libantlr4-runtime.dylib /Users/runner/work/tuplex/tuplex/libantlr4-runtime.dylib || echo "cp failed."

exit 0
2 changes: 1 addition & 1 deletion scripts/macos/install_aws-sdk-cpp.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
set -euxo pipefail

PREFIX=${PREFIX:-/usr/local}
AWSSDK_CPP_VERSION=1.11.164
AWSSDK_CPP_VERSION=1.11.524 # need at least 1.11.267 because of pyarrow bugs...

# check if dir exists (i.e. restored from cache, then skip)
if [ -d "${PREFIX}/include/aws" ]; then
Expand Down
2 changes: 1 addition & 1 deletion scripts/ubuntu2004/install_requirements.sh
Original file line number Diff line number Diff line change
Expand Up @@ -137,7 +137,7 @@ mkdir -p ${WORKDIR}/protobuf && cd ${WORKDIR}/protobuf && git clone -b v24.3 htt
echo ">> Installing AWS SDK"
mkdir -p ${WORKDIR}/aws && cd ${WORKDIR}/aws \
&& git clone --recurse-submodules https://github.com/aws/aws-sdk-cpp.git \
&& cd aws-sdk-cpp && git checkout tags/1.11.164 && mkdir build && cd build \
&& cd aws-sdk-cpp && git checkout tags/1.11.524 && mkdir build && cd build \
&& cmake -DCMAKE_BUILD_TYPE=Release -DUSE_OPENSSL=ON -DENABLE_TESTING=OFF -DENABLE_UNITY_BUILD=ON -DCPP_STANDARD=17 -DBUILD_SHARED_LIBS=OFF -DBUILD_ONLY="s3;core;lambda;transfer" -DCMAKE_INSTALL_PREFIX=${PREFIX} .. \
&& make -j$(nproc) \
&& make install
Expand Down
2 changes: 1 addition & 1 deletion scripts/ubuntu2204/install_requirements.sh
Original file line number Diff line number Diff line change
Expand Up @@ -133,7 +133,7 @@ mkdir -p ${WORKDIR}/protobuf && cd ${WORKDIR}/protobuf && git clone -b v24.3 htt
echo ">> Installing AWS SDK"
mkdir -p ${WORKDIR}/aws && cd ${WORKDIR}/aws \
&& git clone --recurse-submodules https://github.com/aws/aws-sdk-cpp.git \
&& cd aws-sdk-cpp && git checkout tags/1.11.164 && mkdir build && cd build \
&& cd aws-sdk-cpp && git checkout tags/1.11.524 && mkdir build && cd build \
&& cmake -DCMAKE_BUILD_TYPE=Release -DUSE_OPENSSL=ON -DENABLE_TESTING=OFF -DENABLE_UNITY_BUILD=ON -DCPP_STANDARD=17 -DBUILD_SHARED_LIBS=OFF -DBUILD_ONLY="s3;core;lambda;transfer" -DCMAKE_INSTALL_PREFIX=${PREFIX} .. \
&& make -j$(nproc) \
&& make install
Expand Down
1 change: 1 addition & 0 deletions tuplex/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -325,6 +325,7 @@ if(BUILD_WITH_AWS)
endif()
endif()
find_package(AWSSDK REQUIRED COMPONENTS s3 core lambda transfer)
message(STATUS "AWS SDK version: ${AWSSDK_VERSION}")
message(STATUS "AWS libs: ${AWSSDK_LINK_LIBRARIES}")
message(STATUS "AWS include dirs: ${AWSSDK_INCLUDE_DIR}")
if(AWSSDK_FOUND)
Expand Down
7 changes: 7 additions & 0 deletions tuplex/io/include/AWSCommon.h
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,13 @@ namespace tuplex {
* @return true/false.
*/
extern bool isValidAWSZone(const std::string& zone);

/*!
* Use this function to suggest to Tuplex the state of the AWS SDK, e.g. if in the process the
* Aws sdk is already initialized in some form.
* @param overrideAwssdkInitializedValue
*/
extern void setExternalAwssdk(bool overrideAwssdkInitializedValue);
}

// Amazon frequently changes the parameters of lambda functions,
Expand Down
6 changes: 6 additions & 0 deletions tuplex/io/src/AWSCommon.cc
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,12 @@ static std::string throw_if_missing_envvar(const std::string &name) {
static bool isAWSInitialized = false;
static Aws::SDKOptions aws_options;

namespace tuplex {
void setExternalAwssdk(bool overrideAwssdkInitializedValue) {
isAWSInitialized = overrideAwssdkInitializedValue;
}
}

// for Lambda, check: https://docs.aws.amazon.com/code-samples/latest/catalog/cpp-lambda-lambda_example.cpp.html

// https://sdk.amazonaws.com/cpp/api/LATEST/class_aws_1_1_utils_1_1_logging_1_1_formatted_log_system.html
Expand Down
1 change: 1 addition & 0 deletions tuplex/python/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,7 @@ FILE(COPY ${CMAKE_CURRENT_SOURCE_DIR}/tuplex/utils/__init__.py
${CMAKE_CURRENT_SOURCE_DIR}/tuplex/utils/tracebacks.py
${CMAKE_CURRENT_SOURCE_DIR}/tuplex/utils/version.py
${CMAKE_CURRENT_SOURCE_DIR}/tuplex/utils/globs.py
${CMAKE_CURRENT_SOURCE_DIR}/tuplex/utils/dllist.py
DESTINATION ${PYTHON_DIST_DIR}/tuplex/utils)

FILE(COPY ${CMAKE_CURRENT_SOURCE_DIR}/tests/test_tuples.py
Expand Down
13 changes: 13 additions & 0 deletions tuplex/python/include/PythonCommon.h
Original file line number Diff line number Diff line change
Expand Up @@ -154,6 +154,19 @@ namespace tuplex {

return py::cast<py::list>(listObj);
}

extern py::object getPythonVersion();

#ifndef BUILD_WITH_AWS
// if not building with aws, define dummy function, else this function lives in AWSCommon.h/cc
/*!
* Use this function to suggest to Tuplex the state of the AWS SDK, e.g. if in the process the
* Aws sdk is already initialized in some form.
* @param overrideAwssdkInitializedValue
*/
[[nodiscard]] inline void setExternalAwssdk(bool overrideAwssdkInitializedValue) {
}
#endif
}

#endif //TUPLEX_PYTHONCOMMON_H
22 changes: 22 additions & 0 deletions tuplex/python/src/PythonBindings.cc
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,24 @@ PYMODULE {
m.attr("__version__") = "dev";
#endif

// Perform cleanup (e.g., AWS SDK shutdown if necessary to await endless loop)
// Register a callback function that is invoked when the BaseClass object is collected
// cf. https://pybind11.readthedocs.io/en/stable/advanced/misc.html
auto cleanup_callback = []() {
// perform cleanup here -- this function is called with the GIL held
// std::cout<<"Pybind11 clean up call here."<<std::endl;

// When using AWS SDK, important to explicitly call sdk shutdown. AWS SDK creates a threadpool,
// if shutdown is not issued an endless loop will occur at shutdown due to the SDK waiting on mutexes.
#ifdef BUILD_WITH_AWS
// std::cout<<"Shutting down AWS SDK."<<std::endl;
tuplex::shutdownAWS();
// std::cout<<"AWS cleanup done."<<std::endl;
#endif
};

m.add_object("_cleanup", py::capsule(cleanup_callback));

// Note: before constructing any object - call registerWithInterpreter to setup GIL properly!

py::class_<tuplex::PythonDataSet>(m, "_DataSet")
Expand Down Expand Up @@ -94,4 +112,8 @@ PYMODULE {
m.def("registerLoggingCallback", &tuplex::registerPythonLoggingCallback);

m.def("registerWithInterpreter", &python::registerWithInterpreter);

m.def("getPythonVersion", &tuplex::getPythonVersion);

m.def("setExternalAwssdk", &tuplex::setExternalAwssdk);
}
8 changes: 8 additions & 0 deletions tuplex/python/src/PythonCommon.cc
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,14 @@ backward::SignalHandling sh;
#endif

namespace tuplex {

py::object getPythonVersion() {
std::stringstream ss;
ss<<PY_VERSION<<" ("<<PY_VERSION_HEX<<")";
auto version_string = ss.str();
return py::str(version_string);
}

py::object registerPythonLoggingCallback(py::object callback_functor) {
python::registerWithInterpreter();

Expand Down
5 changes: 5 additions & 0 deletions tuplex/python/tuplex/context.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@

import logging

from .utils.common import pyarrow_aws_sdk_cpp_check

try:
from .libexec.tuplex import _Context, getDefaultOptionsAsJSON
except ModuleNotFoundError as e:
Expand Down Expand Up @@ -231,6 +233,9 @@ def __init__(
options["tuplex.webui.enable"] = options["webui"]
del options["webui"]

# Ensure no crash due to PyArrow potentially being present.
pyarrow_aws_sdk_cpp_check()

# last arg are the options as json string serialized b.c. of boost python problems
self._context = _Context(name, runtime_path, json.dumps(options))
python_metrics = self._context.getMetrics()
Expand Down
30 changes: 30 additions & 0 deletions tuplex/python/tuplex/utils/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,8 @@
import psutil
import yaml

from tuplex.utils.dllist import dllist

try:
import pwd
except ImportError:
Expand Down Expand Up @@ -1077,3 +1079,31 @@ def ensure_webui(options: dict) -> None:
# log gunicorn errors for local startup
if os.path.isfile(gunicorn_logpath) and "localhost" == webui_url:
log_gunicorn_errors(gunicorn_logpath)


def pyarrow_aws_sdk_cpp_check() -> None:
"""Help fix issue of pyarrow (frequent because pyarrow seems to be shipped very often)
Call this function BEFORE initializing the _Context object from the tuplex C extension object."""
# Newer PyArrow versions use a more recent version of the AWS SDK, which leads to pyarrow crashing
# other libraries under macOS. Warn here explicitly about this to avoid a segfault, and provide error.

if os.name == "posix" and sys.platform == "darwin":
loaded_shared_objects = dllist()
pyarrow_loaded = any("pyarrow/lib" in path for path in loaded_shared_objects)

if pyarrow_loaded:
import pyarrow as pa

pyarrow_version = [int(v) for v in pa.__version__.split(".")]

# PyArrow has since v13+ a bug with crashes other libraries due to bad use of AWS SDK.
# cf. https://github.com/aws/aws-sdk-cpp/issues/2699 which has been merged,
# but whose solution has not been reflected in pyarrow yet.
# Display to user actionable usage on what to do.
if pyarrow_version[0] >= 13:
raise RuntimeError(
"PyArrow {pa.__version__} present in process and loaded or imported before tuplex."
" If you need to import/load pyarrow first, only compatible with pyarrow versions < 13.0.0."
" If you must use pyarrow >= 13.0.0, import tuplex first and then load pyarrow. "
"Note that pyarrow < 13.0.0 is not compatible with numpy >= 2.0."
)
Loading
Loading