diff --git a/pyproject.toml b/pyproject.toml index b0fe0164d..1475264ca 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -6,7 +6,7 @@ requires = [ "cloudpickle", "numpy", "ninja; sys_platform != 'win32'", - "cmake>=3.12,<3.22", + "cmake>=3.19,<3.22", "delocate; sys.platform == 'darwin'", "auditwheel; sys.platform == 'linux'", "requests" diff --git a/scripts/docker/benchmark/install_cmake.sh b/scripts/docker/benchmark/install_cmake.sh index 979008c89..ae98567c7 100644 --- a/scripts/docker/benchmark/install_cmake.sh +++ b/scripts/docker/benchmark/install_cmake.sh @@ -5,15 +5,16 @@ apt-get install -y curl # fetch recent cmake & install CMAKE_VER_MAJOR=3 -CMAKE_VER_MINOR=19 -CMAKE_VER_PATCH=7 +CMAKE_VER_MINOR=23 +CMAKE_VER_PATCH=2 CMAKE_VER="${CMAKE_VER_MAJOR}.${CMAKE_VER_MINOR}" CMAKE_VERSION="${CMAKE_VER}.${CMAKE_VER_PATCH}" +URL=https://github.com/Kitware/CMake/releases/download/v${CMAKE_VERSION}/cmake-${CMAKE_VERSION}-linux-x86_64.tar.gz mkdir -p /tmp/build && cd /tmp/build && - curl -sSL https://cmake.org/files/v${CMAKE_VER}/cmake-${CMAKE_VERSION}-Linux-x86_64.tar.gz >cmake-${CMAKE_VERSION}-Linux-x86_64.tar.gz && - tar -v -zxf cmake-${CMAKE_VERSION}-Linux-x86_64.tar.gz && - rm -f cmake-${CMAKE_VERSION}-Linux-x86_64.tar.gz && - cd cmake-${CMAKE_VERSION}-Linux-x86_64 && + curl -sSL $URL -o cmake-${CMAKE_VERSION}-linux-x86_64.tar.gz && + tar -v -zxf cmake-${CMAKE_VERSION}-linux-x86_64.tar.gz && + rm -f cmake-${CMAKE_VERSION}-linux-x86_64.tar.gz && + cd cmake-${CMAKE_VERSION}-linux-x86_64 && cp -rp bin/* /usr/local/bin/ && cp -rp share/* /usr/local/share/ && cd / && rm -rf /tmp/build \ No newline at end of file diff --git a/scripts/docker/ci/install_cmake.sh b/scripts/docker/ci/install_cmake.sh index 979008c89..ae98567c7 100644 --- a/scripts/docker/ci/install_cmake.sh +++ b/scripts/docker/ci/install_cmake.sh @@ -5,15 +5,16 @@ apt-get install -y curl # fetch recent cmake & install CMAKE_VER_MAJOR=3 -CMAKE_VER_MINOR=19 -CMAKE_VER_PATCH=7 +CMAKE_VER_MINOR=23 +CMAKE_VER_PATCH=2 CMAKE_VER="${CMAKE_VER_MAJOR}.${CMAKE_VER_MINOR}" CMAKE_VERSION="${CMAKE_VER}.${CMAKE_VER_PATCH}" +URL=https://github.com/Kitware/CMake/releases/download/v${CMAKE_VERSION}/cmake-${CMAKE_VERSION}-linux-x86_64.tar.gz mkdir -p /tmp/build && cd /tmp/build && - curl -sSL https://cmake.org/files/v${CMAKE_VER}/cmake-${CMAKE_VERSION}-Linux-x86_64.tar.gz >cmake-${CMAKE_VERSION}-Linux-x86_64.tar.gz && - tar -v -zxf cmake-${CMAKE_VERSION}-Linux-x86_64.tar.gz && - rm -f cmake-${CMAKE_VERSION}-Linux-x86_64.tar.gz && - cd cmake-${CMAKE_VERSION}-Linux-x86_64 && + curl -sSL $URL -o cmake-${CMAKE_VERSION}-linux-x86_64.tar.gz && + tar -v -zxf cmake-${CMAKE_VERSION}-linux-x86_64.tar.gz && + rm -f cmake-${CMAKE_VERSION}-linux-x86_64.tar.gz && + cd cmake-${CMAKE_VERSION}-linux-x86_64 && cp -rp bin/* /usr/local/bin/ && cp -rp share/* /usr/local/share/ && cd / && rm -rf /tmp/build \ No newline at end of file diff --git a/scripts/set_version.py b/scripts/set_version.py index 6b0fac696..faf8a2424 100755 --- a/scripts/set_version.py +++ b/scripts/set_version.py @@ -20,20 +20,23 @@ def LooseVersion(v): # https://pypi.org/simple/tuplex/ # or https://test.pypi.org/simple/tuplex/ def get_latest_pypi_version(url='https://pypi.org/simple/tuplex/'): - r = requests.get(url) + try: + r = requests.get(url) + + # parse all strings from page + links = re.findall(r'href=[\'"]?([^\'" >]+)', r.text) - # parse all strings from page - links = re.findall(r'href=[\'"]?([^\'" >]+)', r.text) + links = list(filter(lambda s: 'tuplex' in s, map(lambda s: s[s.find('tuplex'):s.rfind('.whl')], links))) - links = list(filter(lambda s: 'tuplex' in s, map(lambda s: s[s.find('tuplex'):s.rfind('.whl')], links))) + # extract version string & sort + links = {link[len('tuplex-'):link.find('-cp')] for link in links} - # extract version string & sort - links = {link[len('tuplex-'):link.find('-cp')] for link in links} + links = sorted(list(links), key=LooseVersion) - links = sorted(list(links), key=LooseVersion) - - # what's the latest version? - return links[-1] + # what's the latest version? + return links[-1] + except: + return None if __name__ == '__main__': file_handler = logging.FileHandler(filename='version.log') @@ -65,6 +68,9 @@ def get_latest_pypi_version(url='https://pypi.org/simple/tuplex/'): logging.info('latest pypi.org version of tuplex is: {}'.format(version_pypi)) logging.info('latest test.pypi.org version of tuplex is: {}'.format(version_test)) + if version_test is None and version_pypi is not None: + version_test = version_pypi + if args.dev: # get from testpypi (dynamic renaming basically) major, minor, patch = version_test.split('.') diff --git a/scripts/ubuntu1804/install_reqs.sh b/scripts/ubuntu1804/install_reqs.sh index 90b9c835f..1bb12b720 100644 --- a/scripts/ubuntu1804/install_reqs.sh +++ b/scripts/ubuntu1804/install_reqs.sh @@ -41,15 +41,16 @@ python3.7 -m pip install --upgrade pip # fetch recent cmake & install CMAKE_VER_MAJOR=3 -CMAKE_VER_MINOR=16 -CMAKE_VER_PATCH=4 +CMAKE_VER_MINOR=23 +CMAKE_VER_PATCH=2 CMAKE_VER="${CMAKE_VER_MAJOR}.${CMAKE_VER_MINOR}" CMAKE_VERSION="${CMAKE_VER}.${CMAKE_VER_PATCH}" +URL=https://github.com/Kitware/CMake/releases/download/v${CMAKE_VERSION}/cmake-${CMAKE_VERSION}-linux-x86_64.tar.gz mkdir -p /tmp/build && cd /tmp/build && - curl -sSL https://cmake.org/files/v${CMAKE_VER}/cmake-${CMAKE_VERSION}-Linux-x86_64.tar.gz >cmake-${CMAKE_VERSION}-Linux-x86_64.tar.gz && - tar -v -zxf cmake-${CMAKE_VERSION}-Linux-x86_64.tar.gz && - rm -f cmake-${CMAKE_VERSION}-Linux-x86_64.tar.gz && - cd cmake-${CMAKE_VERSION}-Linux-x86_64 && + curl -sSL $URL -o cmake-${CMAKE_VERSION}-linux-x86_64.tar.gz && + tar -v -zxf cmake-${CMAKE_VERSION}-linux-x86_64.tar.gz && + rm -f cmake-${CMAKE_VERSION}-linux-x86_64.tar.gz && + cd cmake-${CMAKE_VERSION}-linux-x86_64 && cp -rp bin/* /usr/local/bin/ && cp -rp share/* /usr/local/share/ && cd / && rm -rf /tmp/build diff --git a/scripts/ubuntu2004/install_reqs.sh b/scripts/ubuntu2004/install_reqs.sh index 18dae06c0..94d82d123 100644 --- a/scripts/ubuntu2004/install_reqs.sh +++ b/scripts/ubuntu2004/install_reqs.sh @@ -48,15 +48,16 @@ python3 -m pip install --upgrade pip # fetch recent cmake & install CMAKE_VER_MAJOR=3 -CMAKE_VER_MINOR=19 -CMAKE_VER_PATCH=7 +CMAKE_VER_MINOR=23 +CMAKE_VER_PATCH=2 CMAKE_VER="${CMAKE_VER_MAJOR}.${CMAKE_VER_MINOR}" CMAKE_VERSION="${CMAKE_VER}.${CMAKE_VER_PATCH}" +URL=https://github.com/Kitware/CMake/releases/download/v${CMAKE_VERSION}/cmake-${CMAKE_VERSION}-linux-x86_64.tar.gz mkdir -p /tmp/build && cd /tmp/build && - curl -sSL https://cmake.org/files/v${CMAKE_VER}/cmake-${CMAKE_VERSION}-Linux-x86_64.tar.gz >cmake-${CMAKE_VERSION}-Linux-x86_64.tar.gz && - tar -v -zxf cmake-${CMAKE_VERSION}-Linux-x86_64.tar.gz && - rm -f cmake-${CMAKE_VERSION}-Linux-x86_64.tar.gz && - cd cmake-${CMAKE_VERSION}-Linux-x86_64 && + curl -sSL $URL -o cmake-${CMAKE_VERSION}-linux-x86_64.tar.gz && + tar -v -zxf cmake-${CMAKE_VERSION}-linux-x86_64.tar.gz && + rm -f cmake-${CMAKE_VERSION}-linux-x86_64.tar.gz && + cd cmake-${CMAKE_VERSION}-linux-x86_64 && cp -rp bin/* /usr/local/bin/ && cp -rp share/* /usr/local/share/ && cd / && rm -rf /tmp/build diff --git a/setup.py b/setup.py index d4f0fc210..a769d333d 100644 --- a/setup.py +++ b/setup.py @@ -58,8 +58,9 @@ def in_google_colab(): # TODO: add option to install these test_dependencies = [ -'jupyter', -'nbformat', +'nbocnvert<7.0', +'jupyter<7.0', +'nbformat<7.0', 'prompt_toolkit>=2.0.7', 'pytest>=5.3.2', ] @@ -75,7 +76,8 @@ def in_google_colab(): ] # dependencies for AWS Lambda backend... -aws_lambda_dependencies = ['boto3'] +# boto is broken currently... +aws_lambda_dependencies = [] # manual fix for google colab @@ -100,7 +102,6 @@ def in_google_colab(): 'PyYAML>=3.13', 'psutil', 'pymongo', - 'boto3', 'iso8601' ] else: diff --git a/tuplex/CMakeLists.txt b/tuplex/CMakeLists.txt index 52861ffb2..512b74276 100755 --- a/tuplex/CMakeLists.txt +++ b/tuplex/CMakeLists.txt @@ -51,6 +51,7 @@ message(STATUS "Using language versions C++${CMAKE_CXX_STANDARD} and C${CMAKE_C_ list(APPEND CMAKE_MODULE_PATH "${CMAKE_SOURCE_DIR}/cmake/") message(STATUS "additional cmake module path is ${CMAKE_MODULE_PATH}") include("${CMAKE_SOURCE_DIR}/cmake/ucm.cmake") #handy package to manipulate compiler flags +include("${CMAKE_SOURCE_DIR}/cmake/CPM.cmake") # package manager from https://github.com/cpm-cmake/CPM.cmake # for debug mode export all symbols set(CMAKE_ENABLE_EXPORTS true) include(targetLinkLibrariesWithDynamicLookup) @@ -474,7 +475,87 @@ if(PYTHON3_EXECUTABLE) message(STATUS "Detected Python3 Root dir to be: ${Python3_ROOT_DIR}") endif() + +# Computes the realtionship between two version strings. A version +# string is a number delineated by '.'s such as 1.3.2 and 0.99.9.1. +# You can feed version strings with different number of dot versions, +# and the shorter version number will be padded with zeros: 9.2 < +# 9.2.1 will actually compare 9.2.0 < 9.2.1. +# +# Input: a_in - value, not variable +# b_in - value, not variable +# result_out - variable with value: +# -1 : a_in < b_in +# 0 : a_in == b_in +# 1 : a_in > b_in +# +# Written by James Bigler. +MACRO(COMPARE_VERSION_STRINGS a_in b_in result_out) + # Since SEPARATE_ARGUMENTS using ' ' as the separation token, + # replace '.' with ' ' to allow easy tokenization of the string. + STRING(REPLACE "." " " a ${a_in}) + STRING(REPLACE "." " " b ${b_in}) + SEPARATE_ARGUMENTS(a) + SEPARATE_ARGUMENTS(b) + + # Check the size of each list to see if they are equal. + LIST(LENGTH a a_length) + LIST(LENGTH b b_length) + + # Note that range needs to be one less than the length as the for + # loop is inclusive (silly CMake). + IF(a_length LESS b_length) + # a is shorter + MATH(EXPR range "${a_length} - 1") + ELSE(a_length LESS b_length) + # b is shorter + MATH(EXPR range "${b_length} - 1") + ENDIF(a_length LESS b_length) + + SET(result 0) + FOREACH(index RANGE ${range}) + IF(result EQUAL 0) + # Only continue to compare things as long as they are equal + LIST(GET a ${index} a_version) + LIST(GET b ${index} b_version) + # LESS + IF(a_version LESS b_version) + SET(result -1) + ENDIF(a_version LESS b_version) + # GREATER + IF(a_version GREATER b_version) + SET(result 1) + ENDIF(a_version GREATER b_version) + ENDIF(result EQUAL 0) + ENDFOREACH(index) + + # Copy out the return result + SET(${result_out} ${result}) +ENDMACRO(COMPARE_VERSION_STRINGS) + + + # this is a macro to find python3 depending on version etc. +function(FindPython3Exe NAMES VERSION EXECUTABLE) + # first check if appropriate program can be found by general path search + set(NAMES_TO_SEARCH ${NAMES}) + separate_arguments(NAMES_TO_SEARCH) + message(STATUS "names to search to find python: ${NAMES_TO_SEARCH}") + find_program(TEMP_EXE NAMES ${NAMES_TO_SEARCH}) + message(STATUS "exe: ${TEMP_EXE}") + if(TEMP_EXE) + # check version (must match VERSION) + execute_process(COMMAND "${TEMP_EXE}" -c "import platform;print(platform.python_version())" RESULT_VARIABLE _result OUTPUT_VARIABLE TEMP_VERSION OUTPUT_STRIP_TRAILING_WHITESPACE) + # check if version matches + + compare_version_strings(${VERSION} ${TEMP_VERSION} _result) + if(result EQUAL 0) + message(STATUS "Found ${TEMP_EXE} with version ${TEMP_VERSION} matching desired version ${VERSION}") + set(${EXECUTABLE} ${TEMP_EXE} PARENT_SCOPE) # write out + endif() + endif() + +endfunction() # is a python3 version set? if(PYTHON3_VERSION STREQUAL "") @@ -498,6 +579,14 @@ else() if(PYTHON3_VERSION MATCHES "^([0-9]+)\\.([0-9]+)(\\.([0-9]+))?$") string(REGEX MATCH "^([0-9]+)\\.([0-9]+)(\\.([0-9]+))?$" VERSION_STRING "${PYTHON3_VERSION}") message(STATUS "Forcing python3 version to ${VERSION_STRING}") + + # get python3 maj.min + string(REPLACE "." ";" VERSION_LIST ${VERSION_STRING}) + list(GET VERSION_LIST 0 PYTHON3_VERSION_MAJOR) + list(GET VERSION_LIST 1 PYTHON3_VERSION_MINOR) + + #if no executable is specified, try to search for python3 executable + FindPython3Exe("python${PYTHON3_VERSION_MAJOR}.${PYTHON3_VERSION_MINOR} python${PYTHON3_VERSION_MAJOR}" "${PYTHON3_VERSION}" Python3_EXECUTABLE) # include/lib folders might not adhere to schema, therefore if manually given give that precedence # try first to find full Python3, if it fails find only Interpreter & Module find_package(Python3 ${VERSION_STRING} EXACT COMPONENTS Interpreter Development QUIET) @@ -505,7 +594,6 @@ else() message(STATUS "Found full python3-dev installation") set(Python3_Embed_FOUND TRUE) else() - set(Python3_FIND_STRATEGY LOCATION) # use more modern approach using findpython3 set(Python3_EXECUTABLE "${PYTHON3_EXECUTABLE}") @@ -536,6 +624,8 @@ if(Python3_FOUND) if(Python3_VERSION_MAJOR LESS 3 OR Python3_VERSION_MINOR LESS 5) message(FATAL_ERROR "Tuplex requires python3.5 at least, found incompatible python ${Python3_VERSION_MAJOR}.${Python3_VERSION_MINOR}") endif() + + if(UNIX AND NOT APPLE) set(Boost_USE_STATIC_LIBS ON) endif() @@ -567,9 +657,20 @@ else() message(FATAL_ERROR "Python not found, required to compile Tuplex.") endif() +# retrieve Root dir if empty +if(NOT "${Python3_ROOT_DIR}" OR "${Python3_ROOT_DIR}" STREQUAL "") + # python3 -c 'import sys,pathlib; print(pathlib.Path(sys.executable).parent.parent)' + execute_process (COMMAND "${Python3_EXECUTABLE}" -c "import sys,pathlib; print(pathlib.Path(sys.executable).parent.parent)" + RESULT_VARIABLE _result + OUTPUT_VARIABLE Python3_ROOT_DIR + ERROR_QUIET + OUTPUT_STRIP_TRAILING_WHITESPACE) +endif() + message(STATUS "Using Python3 executable ${Python3_EXECUTABLE}") message(STATUS "Found Python3 headers in ${Python3_INCLUDE_DIRS}") message(STATUS "Found Python3 libs in ${Python3_LIBRARIES}") +message(STATUS "Python3 Root dir is ${Python3_ROOT_DIR}") find_package(pcre2 REQUIRED) if(pcre2_FOUND) diff --git a/tuplex/adapters/cpython/CMakeLists.txt b/tuplex/adapters/cpython/CMakeLists.txt index 4f514d3bd..eecd507e8 100644 --- a/tuplex/adapters/cpython/CMakeLists.txt +++ b/tuplex/adapters/cpython/CMakeLists.txt @@ -1,7 +1,6 @@ # (c) 2018 Andy Ly # this build file builds the adapters of the Tuplex project CMAKE_MINIMUM_REQUIRED(VERSION 3.12 FATAL_ERROR) - INCLUDE_DIRECTORIES("include") INCLUDE_DIRECTORIES("${Python3_INCLUDE_DIRS}") @@ -21,4 +20,4 @@ target_include_directories(libcpythonadapter PUBLIC ) # Declare the library -target_link_libraries(libcpythonadapter libutils) \ No newline at end of file +target_link_libraries(libcpythonadapter libutils) diff --git a/tuplex/awslambda/CMakeLists.txt b/tuplex/awslambda/CMakeLists.txt index de76a78b8..c275c87a9 100644 --- a/tuplex/awslambda/CMakeLists.txt +++ b/tuplex/awslambda/CMakeLists.txt @@ -61,10 +61,4 @@ else() aws_lambda_package_target(${LAMBDA_NAME}) endif() -# Add all the python dependencies to the zip file -set(PYTHON_RESOURCES_LOC ${CMAKE_CURRENT_SOURCE_DIR}/python38_resources) -set(PYTHON_RESOURCES_ZIP ${PYTHON_RESOURCES_LOC}.zip) -message("PYTHON_RESOURCES_ZIP = ${PYTHON_RESOURCES_ZIP}") -message("PYTHON_RESOURCES_LOC = ${PYTHON_RESOURCES_LOC}") - # To build Lambda runner deployment package, use ./scripts/create_lambda.zip.sh diff --git a/tuplex/cmake/CPM.cmake b/tuplex/cmake/CPM.cmake new file mode 100644 index 000000000..6073550c1 --- /dev/null +++ b/tuplex/cmake/CPM.cmake @@ -0,0 +1,21 @@ +set(CPM_DOWNLOAD_VERSION 0.35.1) + +if(CPM_SOURCE_CACHE) + # Expand relative path. This is important if the provided path contains a tilde (~) + get_filename_component(CPM_SOURCE_CACHE ${CPM_SOURCE_CACHE} ABSOLUTE) + set(CPM_DOWNLOAD_LOCATION "${CPM_SOURCE_CACHE}/cpm/CPM_${CPM_DOWNLOAD_VERSION}.cmake") +elseif(DEFINED ENV{CPM_SOURCE_CACHE}) + set(CPM_DOWNLOAD_LOCATION "$ENV{CPM_SOURCE_CACHE}/cpm/CPM_${CPM_DOWNLOAD_VERSION}.cmake") +else() + set(CPM_DOWNLOAD_LOCATION "${CMAKE_BINARY_DIR}/cmake/CPM_${CPM_DOWNLOAD_VERSION}.cmake") +endif() + +if(NOT (EXISTS ${CPM_DOWNLOAD_LOCATION})) + message(STATUS "Downloading CPM.cmake to ${CPM_DOWNLOAD_LOCATION}") + file(DOWNLOAD + https://github.com/cpm-cmake/CPM.cmake/releases/download/v${CPM_DOWNLOAD_VERSION}/CPM.cmake + ${CPM_DOWNLOAD_LOCATION} + ) +endif() + +include(${CPM_DOWNLOAD_LOCATION}) diff --git a/tuplex/codegen/CMakeLists.txt b/tuplex/codegen/CMakeLists.txt index 2d205ce46..4472dc083 100755 --- a/tuplex/codegen/CMakeLists.txt +++ b/tuplex/codegen/CMakeLists.txt @@ -1,4 +1,4 @@ -CMAKE_MINIMUM_REQUIRED(VERSION 3.12 FATAL_ERROR) +CMAKE_MINIMUM_REQUIRED(VERSION 3.19 FATAL_ERROR) # enable c++14 set(CMAKE_CXX_STANDARD 14) @@ -81,9 +81,15 @@ ENDIF() IF(LLVM_ROOT_DIR) # make cmake find in config mode the right LLVMConfig.cmake file which is located here set(LLVM_DIR "${LLVM_ROOT_DIR}/lib/cmake/llvm") + find_package(LLVM CONFIG REQUIRED) # find with whatever llvm version has been specified +ELSE() + # try to search for LLVM9, then LLVM6 + find_package(LLVM 9 CONFIG) + if(NOT LLVM_FOUND) + find_package(LLVM 6 CONFIG REQUIRED) + endif() ENDIF() -FIND_PACKAGE(LLVM REQUIRED CONFIG) MESSAGE(STATUS "Found LLVM ${LLVM_PACKAGE_VERSION}") MESSAGE(STATUS "Using LLVMConfig.cmake in: ${LLVM_DIR}") MESSAGE(STATUS "Found llvm include dirs at: " ${LLVM_INCLUDE_DIRS}) @@ -164,4 +170,4 @@ target_link_libraries(libcodegen ${FFI_LIBRARIES} ${ANTLR4Runtime_LIB} ${AWSSDK_LINK_LIBRARIES} - ${PCRE2_LIBRARIES}) \ No newline at end of file + ${PCRE2_LIBRARIES}) diff --git a/tuplex/codegen/include/TGraph.h b/tuplex/codegen/include/TGraph.h index 512fcdbd4..e4fa8ca73 100644 --- a/tuplex/codegen/include/TGraph.h +++ b/tuplex/codegen/include/TGraph.h @@ -20,6 +20,8 @@ #include #include +namespace tuplex { + /*! * template class to model a graph. Internally, a graph is represented via adjacency lists * might be a slow implementation for now. @@ -265,4 +267,5 @@ template class TGraph { } }; -#endif //TUPLEX_TGRAPH_H \ No newline at end of file +} +#endif //TUPLEX_TGRAPH_H diff --git a/tuplex/codegen/include/graphviz/GraphVizBuilder.h b/tuplex/codegen/include/graphviz/GraphVizBuilder.h index c0e89bfaf..3f6a523ff 100644 --- a/tuplex/codegen/include/graphviz/GraphVizBuilder.h +++ b/tuplex/codegen/include/graphviz/GraphVizBuilder.h @@ -15,29 +15,31 @@ #include #include -// this class can be used to generate a graphviz dot file +namespace tuplex { + // this class can be used to generate a graphviz dot file // useful for display of graphs -class GraphVizBuilder { -private: - int _id; - std::vector _nodes; - std::vector _edges; + class GraphVizBuilder { + private: + int _id; + std::vector _nodes; + std::vector _edges; - static const std::string nodePrefix; + static const std::string nodePrefix; - bool writeToStream(std::ostream& os); -public: - GraphVizBuilder() :_id(0) {} + bool writeToStream(std::ostream& os); + public: + GraphVizBuilder() :_id(0) {} - int addNode(const std::string& label); - int addHTMLNode(const std::string& label); + int addNode(const std::string& label); + int addHTMLNode(const std::string& label); - void addEdge(const int iFrom, const int iTo, const std::string& subfieldFrom="", const std::string& subfieldTo=""); + void addEdge(const int iFrom, const int iTo, const std::string& subfieldFrom="", const std::string& subfieldTo=""); - bool saveToDotFile(const std::string& path); + bool saveToDotFile(const std::string& path); - bool saveToPDF(const std::string& path); + bool saveToPDF(const std::string& path); + }; +} -}; #endif //TUPLEX_GRAPHVIZBUILDER_H \ No newline at end of file diff --git a/tuplex/codegen/src/graphviz/GraphVizBuilder.cc b/tuplex/codegen/src/graphviz/GraphVizBuilder.cc index aab8190f4..bdb906396 100644 --- a/tuplex/codegen/src/graphviz/GraphVizBuilder.cc +++ b/tuplex/codegen/src/graphviz/GraphVizBuilder.cc @@ -18,113 +18,116 @@ #include #include #include - -const std::string GraphVizBuilder::nodePrefix = "n"; - - -std::string escapeDotLabel(const std::string& s) { - std::string res = ""; - res.reserve(s.length()); - for(auto c : s) { - // chars to escape - if(c == '{' || c== '}' || c=='"' || c=='<' || c=='>' || c=='|') { - res.push_back('\\'); +#include + +namespace tuplex { + const std::string GraphVizBuilder::nodePrefix = "n"; + + std::string escapeDotLabel(const std::string& s) { + std::string res = ""; + res.reserve(s.length()); + for(auto c : s) { + // chars to escape + if(c == '{' || c== '}' || c=='"' || c=='<' || c=='>' || c=='|') { + res.push_back('\\'); + } + res.push_back(c); } - res.push_back(c); + + return res; } - return res; -} + int GraphVizBuilder::addNode(const std::string &label) { + int id = _id++; -int GraphVizBuilder::addNode(const std::string &label) { - int id = _id++; + auto escaped_label = escapeDotLabel(label); + _nodes.push_back(nodePrefix + std::to_string(id) + " [shape=record, ordering=out, label=\"" + escaped_label + "\"];"); - auto escaped_label = escapeDotLabel(label); - _nodes.push_back(nodePrefix + std::to_string(id) + " [shape=record, ordering=out, label=\"" + escaped_label + "\"];"); + return id; + } - return id; -} + int GraphVizBuilder::addHTMLNode(const std::string &label) { + int id = _id++; -int GraphVizBuilder::addHTMLNode(const std::string &label) { - int id = _id++; + _nodes.push_back(nodePrefix + std::to_string(id) + " [shape=plaintext, ordering=out, label=<" + label + ">];"); - _nodes.push_back(nodePrefix + std::to_string(id) + " [shape=plaintext, ordering=out, label=<" + label + ">];"); + return id; + } - return id; -} + void GraphVizBuilder::addEdge(const int iFrom, const int iTo, const std::string &subfieldFrom, + const std::string &subfieldTo) { + std::string from = nodePrefix + std::to_string(iFrom); + std::string to = nodePrefix + std::to_string(iTo); -void GraphVizBuilder::addEdge(const int iFrom, const int iTo, const std::string &subfieldFrom, - const std::string &subfieldTo) { - std::string from = nodePrefix + std::to_string(iFrom); - std::string to = nodePrefix + std::to_string(iTo); + if(subfieldFrom.length() > 0) + from += ":" + subfieldFrom; - if(subfieldFrom.length() > 0) - from += ":" + subfieldFrom; + if(subfieldTo.length() > 0) + to += ":" + subfieldTo; - if(subfieldTo.length() > 0) - to += ":" + subfieldTo; + _edges.push_back(from + " -> " + to + ";"); + } - _edges.push_back(from + " -> " + to + ";"); -} + bool GraphVizBuilder::writeToStream(std::ostream &os) { + os << "digraph G {" << std::endl; -bool GraphVizBuilder::writeToStream(std::ostream &os) { - os << "digraph G {" << std::endl; + // first node definitions + if(!_nodes.empty()) + for(auto it = _nodes.cbegin(); it != _nodes.cend(); ++it) { + os << "\t" << *it << std::endl; + } - // first node definitions - if(!_nodes.empty()) - for(auto it = _nodes.cbegin(); it != _nodes.cend(); ++it) { - os << "\t" << *it << std::endl; - } + os << std::endl; - os << std::endl; + if(!_edges.empty()) + for(auto it = _edges.cbegin(); it != _edges.cend(); ++it) { + os << "\t" << *it << std::endl; + } - if(!_edges.empty()) - for(auto it = _edges.cbegin(); it != _edges.cend(); ++it) { - os << "\t" << *it << std::endl; - } + os << "}" << std::endl; - os << "}" << std::endl; + return os.good(); + } - return os.good(); -} + bool GraphVizBuilder::saveToDotFile(const std::string &path) { -bool GraphVizBuilder::saveToDotFile(const std::string &path) { + std::ofstream ofs(path, std::ofstream::out); - std::ofstream ofs(path, std::ofstream::out); + writeToStream(ofs); - writeToStream(ofs); + ofs.close(); - ofs.close(); + return ofs.good(); + } - return ofs.good(); -} + bool GraphVizBuilder::saveToPDF(const std::string &path) { -bool GraphVizBuilder::saveToPDF(const std::string &path) { + // @TODO: Improve this later via direct streaming! + // @TODO: This is bad design... However, I don't wanna spend time on handling temp files in C++... - // @TODO: Improve this later via direct streaming! - // @TODO: This is bad design... However, I don't wanna spend time on handling temp files in C++... - std::string tempfile = "/tmp/graph.dot"; + std::string tempfile = tempFileName(); - saveToDotFile(tempfile); + saveToDotFile(tempfile); - // @TODO: make this code here signal-safe! - // https://www.oreilly.com/library/view/secure-programming-cookbook/0596003943/ch01s06.html - // i.e. check https://www.oreilly.com/library/view/secure-programming-cookbook/0596003943/ch01s07.html - // => left for now, b.c. saveToPDF will be anyways only handled in debug version... - std::string cmd = "dot -Tpdf " + tempfile + " -o " + path; - std::array buffer; - std::string result; - std::shared_ptr pipe(popen(cmd.c_str(), "r"), pclose); - if (!pipe) throw std::runtime_error("popen() failed!"); - while (!feof(pipe.get())) { - if (fgets(buffer.data(), 128, pipe.get()) != nullptr) - result += buffer.data(); - } + // @TODO: make this code here signal-safe! + // https://www.oreilly.com/library/view/secure-programming-cookbook/0596003943/ch01s06.html + // i.e. check https://www.oreilly.com/library/view/secure-programming-cookbook/0596003943/ch01s07.html + // => left for now, b.c. saveToPDF will be anyways only handled in debug version... + std::string cmd = "dot -Tpdf " + tempfile + " -o " + path; + std::array buffer; + std::string result; + std::shared_ptr pipe(popen(cmd.c_str(), "r"), pclose); + if (!pipe) throw std::runtime_error("popen() failed!"); + while (!feof(pipe.get())) { + if (fgets(buffer.data(), 128, pipe.get()) != nullptr) + result += buffer.data(); + } - // can process result here if necessary... + // can process result here if necessary... - return true; + return true; + } } \ No newline at end of file diff --git a/tuplex/core/include/Environment.h b/tuplex/core/include/Environment.h index 80407c1ca..0ed9bd1ec 100644 --- a/tuplex/core/include/Environment.h +++ b/tuplex/core/include/Environment.h @@ -21,12 +21,6 @@ namespace tuplex { * @return */ extern std::map getTuplexEnvironment(); - - /*! - * get user name who runs the program - * @return - */ - extern std::string getUserName(); } #endif //TUPLEX_ENVIRONMENT_H \ No newline at end of file diff --git a/tuplex/core/include/Partition.h b/tuplex/core/include/Partition.h index 9bc7fc54c..6e3e1a912 100644 --- a/tuplex/core/include/Partition.h +++ b/tuplex/core/include/Partition.h @@ -116,9 +116,10 @@ namespace tuplex { /*! * lock memory belonging to partition for (exclusive) write ownership + * @param allowForeignOwnerAccess this is a dangeours flag, when set to true, no check is carried out when a non-owning thread accesses this partition. * @return memory pointer. Nullptr if there was a recovery error */ - uint8_t* lockWriteRaw(); + uint8_t* lockWriteRaw(bool allowForeignOwnerAccess=false); /*! * unlock write ownership diff --git a/tuplex/core/src/Environment.cc b/tuplex/core/src/Environment.cc index 8cbc02a5c..736013f09 100644 --- a/tuplex/core/src/Environment.cc +++ b/tuplex/core/src/Environment.cc @@ -14,27 +14,10 @@ #include #include #include +#include namespace tuplex { - std::string getUserName() { - // UNIX specific function for username - // check env variables 'LOGNAME', 'USER', 'LNAME', 'USERNAME' - using namespace std; - - std::vector vars = {"LOGNAME", "USER", "LNAME", "USERNAME"}; - - for(auto var : vars) { - auto name = getenv(var.c_str()); - if(name) - return std::string(name); - } - - // no user found above, return "" - // more advanced methods possible... - return ""; - } - // cf. https://stackoverflow.com/questions/27914311/get-computer-name-and-logged-user-name std::string getHostName() { char hostname[1024]; diff --git a/tuplex/core/src/Partition.cc b/tuplex/core/src/Partition.cc index a16d1c2eb..ea08a65f1 100644 --- a/tuplex/core/src/Partition.cc +++ b/tuplex/core/src/Partition.cc @@ -53,9 +53,12 @@ namespace tuplex { _locked = false; } - uint8_t* Partition::lockWriteRaw() { + uint8_t* Partition::lockWriteRaw(bool allowForeignOwnerAccess) { // must be the thread who allocated this - assert(_owner->getThreadID() == std::this_thread::get_id()); + if(!allowForeignOwnerAccess) { + _owner->error("non-owner thread accessing partition"); + assert(_owner->getThreadID() == std::this_thread::get_id()); + } TRACE_LOCK("partition " + uuidToString(_uuid)); std::this_thread::yield(); @@ -236,4 +239,4 @@ namespace tuplex { _owner->freePartition(this); } } -} \ No newline at end of file +} diff --git a/tuplex/core/src/TraceVisitor.cc b/tuplex/core/src/TraceVisitor.cc index 11670d649..1a3ad32b0 100644 --- a/tuplex/core/src/TraceVisitor.cc +++ b/tuplex/core/src/TraceVisitor.cc @@ -26,6 +26,10 @@ namespace tuplex { node->accept(*this); } catch(TraceException& exc) { // nothing todo... + } catch(const std::runtime_error& e) { + logger().error(e.what()); + } catch(...) { + // important b.c. of GIL } // inc. counter diff --git a/tuplex/core/src/physical/TransformTask.cc b/tuplex/core/src/physical/TransformTask.cc index ffcb6022a..ecac0dd74 100644 --- a/tuplex/core/src/physical/TransformTask.cc +++ b/tuplex/core/src/physical/TransformTask.cc @@ -536,6 +536,8 @@ namespace tuplex { assert(!_inputPartitions.empty()); assert(_functor); +#warning "there's a temp fix here, better to write partition/thread system and transfer ownership of partitions to write too upfront." + _numInputRowsRead = 0; _numOutputRowsWritten = 0; @@ -550,7 +552,7 @@ namespace tuplex { std::vector generalPartitions(_generalPartitions.size(), nullptr); for (int i = 0; i < _generalPartitions.size(); ++i) - generalPartitions[i] = _generalPartitions[i]->lockWriteRaw(); + generalPartitions[i] = _generalPartitions[i]->lockWriteRaw(true); int64_t numGeneralPartitions = _generalPartitions.size(); int64_t generalIndexOffset = 0; int64_t generalRowOffset = 0; @@ -558,7 +560,7 @@ namespace tuplex { std::vector fallbackPartitions(_fallbackPartitions.size(), nullptr); for (int i = 0; i < _fallbackPartitions.size(); ++i) - fallbackPartitions[i] = _fallbackPartitions[i]->lockWriteRaw(); + fallbackPartitions[i] = _fallbackPartitions[i]->lockWriteRaw(true); int64_t numFallbackPartitions = _fallbackPartitions.size(); int64_t fallbackIndexOffset = 0; int64_t fallbackRowOffset = 0; diff --git a/tuplex/historyserver/requirements.txt b/tuplex/historyserver/requirements.txt index 167fd59e4..aa2fcc311 100644 --- a/tuplex/historyserver/requirements.txt +++ b/tuplex/historyserver/requirements.txt @@ -4,6 +4,7 @@ jedi astor==0.7.1 pandas>=0.23.4 cloudpickle +Werkzeug==2.0.1 flask>=2.0.1 flask_socketio==4.3.1 python-socketio==4.6.0 diff --git a/tuplex/historyserver/thserver/rest.py b/tuplex/historyserver/thserver/rest.py index f083800ca..71f50b9cc 100644 --- a/tuplex/historyserver/thserver/rest.py +++ b/tuplex/historyserver/thserver/rest.py @@ -407,8 +407,22 @@ def update_exception(): @app.route('/api/version', methods=['GET']) def get_info(): - - mongo_info = dict(zip(('host', 'port'), mongo.cx.address)) + client = mongo.cx + # there's a bug in the ubuntu version, if no session has been started then + # the address field is None. Therefore, start by retrieving server info a session + info = client.server_info() + nodes = client.nodes + addr = mongo.cx.address + if addr is None: + if nodes is not None and len(nodes) > 0: + nodes = list(nodes) + addr = nodes[0] + if not isinstance(addr, tuple) and len(addr) == 2: + addr = (None, None) + else: + addr = (None, None) + + mongo_info = dict(zip(('host', 'port'), addr)) mongo_info['uri'] = MONGO_URI return jsonify({'version': __version__, diff --git a/tuplex/python/CMakeLists.txt b/tuplex/python/CMakeLists.txt index b0b0e54c5..6ea09725c 100644 --- a/tuplex/python/CMakeLists.txt +++ b/tuplex/python/CMakeLists.txt @@ -11,10 +11,14 @@ set(CMAKE_CXX_STANDARD_REQUIRED ON) # how should the module be named? SET(MODULE_NAME tuplex) -# fetch pybind11 (external project) +# deprecated include(FetchContent) +#set(PYBIND11_NOPYTHON ON CACHE INTERNAL "") +message(STATUS "Pybind11 uses python version ${Python3_VERSION}") +set(PYBIND11_FINDPYTHON OFF CACHE INTERNAL "") +set(PYBIND11_PYTHON_VERSION "${Python3_VERSION}" CACHE INTERNAL "") FetchContent_Declare(pybind11 GIT_REPOSITORY https://github.com/pybind/pybind11 - GIT_TAG v2.9.1) + GIT_TAG v2.9.1 ) FetchContent_GetProperties(pybind11) if(NOT pybind11_POPULATED) FetchContent_Populate(pybind11) @@ -29,6 +33,28 @@ file(GLOB_RECURSE SOURCES src/*.cc) message(STATUS "libs: ${Python3_LIBRARIES}") message(STATUS "includes: ${Python3_INCLUDE_DIRS}") + +## use e.g. cpm https://github.com/cpm-cmake/CPM.cmake +## fetch pybind11 (external project) +#CPMAddPackage( +# NAME pybind11 +# VERSION 2.9.1 +# GITHUB_REPOSITORY pybind/pybind11 +# OPTIONS +# "PYBIND11_NOPYTHON ON" +# "PYBIND11_FINDPYTHON OFF" +#) +###add_subdirectory(${pybind11_SOURCE_DIR} ${pybind11_BINARY_DIR}) +#if(pybind11_ADDED) +# message(STATUS "cpm added pybin11 ${pybind11_SOURCE_DIR}") +#else() +# message(STATUS "not added ${pybind11_SOURCE_DIR}") +#endif() +#include(${pybind11_SOURCE_DIR}/tools/pybind11Common.cmake) +#include(${pybind11_SOURCE_DIR}/tools/pybind11Tools.cmake) +#include(${pybind11_SOURCE_DIR}/tools/pybind11NewTools.cmake) + + # cf. https://pybind11.readthedocs.io/en/stable/cmake/index.html pybind11_add_module(${MODULE_NAME} ${SOURCES}) set(PYMODULE "PYBIND11_MODULE(${MODULE_NAME}, m)") diff --git a/tuplex/python/setup.py b/tuplex/python/setup.py index 549625188..f76592266 100644 --- a/tuplex/python/setup.py +++ b/tuplex/python/setup.py @@ -15,6 +15,12 @@ # i.e. runtime + the .so module need to be copied... import os +import sys +import logging + +logging.basicConfig(level=logging.INFO) +logging.info('installing for {} (python {}.{})'.format(sys.executable, sys.version_info[0], sys.version_info[1])) + # files to copy for install files = [os.path.join(dp, f) for dp, dn, fn in os.walk(os.path.expanduser("tuplex")) for f in fn] @@ -36,8 +42,9 @@ license="Apache 2.0", keywords="ETL BigData Python LLVM UDF", install_requires=[ - 'jupyter', - 'nbformat', + 'nbconvert<7.0', + 'jupyter<7.0', + 'nbformat<7.0', 'attrs>=19.2.0', 'dill>=0.2.7.1', 'pluggy>=0.6.0, <1.0.0', @@ -64,4 +71,4 @@ # } # could also include long_description, download_url, classifiers, etc. -) \ No newline at end of file +) diff --git a/tuplex/python/tuplex/utils/interactive_shell.py b/tuplex/python/tuplex/utils/interactive_shell.py index bdcc84210..4d432b4c4 100644 --- a/tuplex/python/tuplex/utils/interactive_shell.py +++ b/tuplex/python/tuplex/utils/interactive_shell.py @@ -13,6 +13,8 @@ import os import sys +import re +import logging from code import InteractiveConsole from prompt_toolkit.history import InMemoryHistory # old version: 1.0 @@ -29,7 +31,7 @@ from pygments.styles import get_style_by_name from tuplex.utils.jedi_completer import JediCompleter from tuplex.utils.source_vault import SourceVault -from types import LambdaType +from types import LambdaType, FunctionType from tuplex.utils.globs import get_globals @@ -125,6 +127,35 @@ def get_lambda_source(self, f): vault.extractAndPutAllLambdas(src_info, f_filename, f_lineno, f_colno, f_globs) return vault.get(f, f_filename, f_lineno, f_colno, f_globs) + def get_function_source(self, f): + + assert self.initialized, 'must call init on TuplexShell object first' + + assert isinstance(f, + FunctionType) and f.__code__.co_name != '', 'object needs to be a function (non-lambda) object' + + # fetch all data + f_globs = get_globals(f) + f_filename = f.__code__.co_filename + f_lineno = f.__code__.co_firstlineno + f_colno = f.__code__.co_firstcolno if hasattr(f.__code__, 'co_firstcolno') else None + + # retrieve func source from historyDict + lines = self.historyDict[f_filename] + + # check whether def is found in here + source = '\n'.join(lines).strip() + + function_name = f.__code__.co_name + regex = r"def\s*{}\(.*\)\s*:[\t ]*\n".format(function_name) + prog = re.compile(regex) + + if not prog.search(source): + logging.error('Could not find function "{}" in source'.format(function_name)) + return None + + return source + # taken from Lib/code.py # overwritten to customize behaviour def interact(self, banner=None, exitmsg=None): diff --git a/tuplex/python/tuplex/utils/reflection.py b/tuplex/python/tuplex/utils/reflection.py index bf0de3fb5..fd4e6a295 100644 --- a/tuplex/python/tuplex/utils/reflection.py +++ b/tuplex/python/tuplex/utils/reflection.py @@ -153,7 +153,20 @@ def get_function_code(f): if in_jupyter_notebook() or in_google_colab(): return extract_function_code(function_name, get_jupyter_raw_code(function_name)) else: - return extract_function_code(function_name, dill.source.getsource(f)) + if is_in_interactive_mode(): + # need to extract lines from shell + # import here, avoids also trouble with jupyter notebooks + from tuplex.utils.interactive_shell import TuplexShell + + # for this to work, a dummy shell has to be instantiated + # through which all typing occurs. Thus, the history can + # be properly captured for source code lookup. + # shell is a borg object, i.e. singleton alike behaviour + shell = TuplexShell() + return shell.get_function_source(f) + else: + # extract using dill from file + return extract_function_code(function_name, dill.source.getsource(f)) vault = SourceVault() diff --git a/tuplex/test/CMakeLists.txt b/tuplex/test/CMakeLists.txt index 437bd93c4..3f3721780 100755 --- a/tuplex/test/CMakeLists.txt +++ b/tuplex/test/CMakeLists.txt @@ -1,19 +1,19 @@ find_package(Threads REQUIRED) -# some tests require the cloudpickle package to be installed, hence check for it here -# try first to find full dev version of python, if that fails - interpreter only. -find_package(Python3 COMPONENTS Interpreter Development QUIET) -if (Python3_FOUND) - message(STATUS "Found full python3-dev installation") - set(Python3_Embed_FOUND TRUE) -else() - find_package(Python3 COMPONENTS Interpreter REQUIRED) - # python3 -c 'import distutils.sysconfig; print(distutils.sysconfig.get_python_lib(plat_specific=False,standard_lib=True))' - # try to get get module libs at least - - # mark embed lib as not found - unset(Python3_Embed_FOUND) -endif() +## some tests require the cloudpickle package to be installed, hence check for it here +## try first to find full dev version of python, if that fails - interpreter only. +#find_package(Python3 COMPONENTS Interpreter Development QUIET) +#if (Python3_FOUND) +# message(STATUS "Found full python3-dev installation") +# set(Python3_Embed_FOUND TRUE) +#else() +# find_package(Python3 COMPONENTS Interpreter REQUIRED) +# # python3 -c 'import distutils.sysconfig; print(distutils.sysconfig.get_python_lib(plat_specific=False,standard_lib=True))' +# # try to get get module libs at least +# +# # mark embed lib as not found +# unset(Python3_Embed_FOUND) +#endif() if (Python3_FOUND) # check that cloudpickle is installed via import @@ -37,6 +37,7 @@ endif() # check whether googletest is locally installed, if not download and fetch find_package(GTest CONFIG) if(NOT GTest_FOUND) + # new way of including googletest # Download and unpack googletest at configure time configure_file(CMakeLists.txt.in googletest-download/CMakeLists.txt) @@ -59,9 +60,11 @@ if(NOT GTest_FOUND) # Add googletest directly to our build. This defines # the gtest and gtest_main targets. + # this here picks up potentially a different python version, but not used in build. add_subdirectory(${CMAKE_BINARY_DIR}/googletest-src ${CMAKE_BINARY_DIR}/googletest-build EXCLUDE_FROM_ALL) + message(STATUS "gtest done (may display different python version before)") set(GTest_LIBRARIES "gtest") else() message(STATUS "Using locally installed GoogleTest") @@ -89,4 +92,4 @@ endif() file(COPY resources DESTINATION ${DIST_DIR}) # copy resources folder one more time (little hack, but this is where ctest needs the files) # it uses build/test as working directory -file(COPY resources DESTINATION ${CMAKE_BINARY_DIR}/test) \ No newline at end of file +file(COPY resources DESTINATION ${CMAKE_BINARY_DIR}/test) diff --git a/tuplex/test/codegen/TGraphTest.cc b/tuplex/test/codegen/TGraphTest.cc index 265fc172e..20ff03651 100644 --- a/tuplex/test/codegen/TGraphTest.cc +++ b/tuplex/test/codegen/TGraphTest.cc @@ -12,6 +12,8 @@ #include #include +using namespace tuplex; + TEST(Graph, BasicConnectivity) { TGraph g; g.addNode(10); @@ -40,4 +42,4 @@ TEST(Graph, PDFExport) { // can run this EXPECT_TRUE(builder.saveToPDF("test.pdf")); -} \ No newline at end of file +} diff --git a/tuplex/test/core/MathFunctionsTest.cc b/tuplex/test/core/MathFunctionsTest.cc index ed81a0eeb..d0816d322 100644 --- a/tuplex/test/core/MathFunctionsTest.cc +++ b/tuplex/test/core/MathFunctionsTest.cc @@ -677,7 +677,7 @@ TEST_F(MathFunctionsTest, MathPow) { Row(2), Row(1), Row(-1), Row(-2), Row(0) }).map(UDF("lambda y: math.pow(y, 5)", "", ce)).collectAsVector(); - EXPECT_EQ(v2.size(), 5); + EXPECT_EQ(v4.size(), 5); EXPECT_DOUBLE_EQ(v4[0].getDouble(0), 32.0); EXPECT_DOUBLE_EQ(v4[1].getDouble(0), 1.0); EXPECT_DOUBLE_EQ(v4[2].getDouble(0), -1.0); diff --git a/tuplex/test/wrappers/CMakeLists.txt b/tuplex/test/wrappers/CMakeLists.txt index 1d95d1fa7..c5e13bfd8 100644 --- a/tuplex/test/wrappers/CMakeLists.txt +++ b/tuplex/test/wrappers/CMakeLists.txt @@ -14,16 +14,25 @@ FILE(GLOB PYSRCS ../../python/src/*.cc) #list(REMOVE_ITEM PYSRCS "../../python/src/PythonBindings.cc") list(FILTER PYSRCS EXCLUDE REGEX ".*PythonBindings.cc$") -# use pybind11 +## use pybind11 +#CPMAddPackage( +# NAME pybind11 +# VERSION 2.9.1 +# GITHUB_REPOSITORY pybind/pybind11 +# OPTIONS +# "PYBIND11_NOPYTHON ON" +# "PYBIND11_FINDPYTHON OFF" +#) + # fetch pybind11 (external project) -include(FetchContent) -FetchContent_Declare(pybind11 GIT_REPOSITORY https://github.com/pybind/pybind11 - GIT_TAG v2.9.1) -FetchContent_GetProperties(pybind11) -if(NOT pybind11_POPULATED) - FetchContent_Populate(pybind11) - add_subdirectory(${pybind11_SOURCE_DIR} ${pybind11_BINARY_DIR}) -endif() +#iinclude(FetchContent) +#FetchContent_Declare(pybind11 GIT_REPOSITORY https://github.com/pybind/pybind11 +# GIT_TAG v2.9.1) +#FetchContent_GetProperties(pybind11) +#if(NOT pybind11_POPULATED) +# FetchContent_Populate(pybind11) +# add_subdirectory(${pybind11_SOURCE_DIR} ${pybind11_BINARY_DIR}) +#endif() include(GoogleTest) diff --git a/tuplex/test/wrappers/WrapperTest.cc b/tuplex/test/wrappers/WrapperTest.cc index 52d3fc137..5b51e1aaa 100644 --- a/tuplex/test/wrappers/WrapperTest.cc +++ b/tuplex/test/wrappers/WrapperTest.cc @@ -2570,6 +2570,46 @@ TEST_F(WrapperTest, PartitionRelease) { } +TEST_F(WrapperTest, TracingVisitorError) { + using namespace tuplex; + + auto udf_code = "def count(L):\n" + " d = {}\n" + " for x in L:\n" + " if x not in d.keys():\n" + " d[x] = 0\n" + " d[x] += 1\n" + " return d"; + + auto ctx_opts = "{\"webui.enable\": false," + " \"driverMemory\": \"8MB\"," + " \"partitionSize\": \"256KB\"," + "\"executorCount\": 0," + "\"tuplex.scratchDir\": \"file://" + scratchDir + "\"," + "\"resolveWithInterpreterOnly\": true}"; + + PythonContext ctx("", "", ctx_opts); + { + auto cols_to_select = PyList_New(1); + PyList_SET_ITEM(cols_to_select, 0, python::PyString_FromString("Unique Key")); + + + PyObject *listObj = PyList_New(1); + PyObject *sublistObj1 = PyList_New(2); + PyList_SET_ITEM(sublistObj1, 0, python::PyString_FromString("a")); + PyList_SET_ITEM(sublistObj1, 1, python::PyString_FromString("b")); + + PyList_SetItem(listObj, 0, sublistObj1); + + auto list = py::reinterpret_borrow(listObj); + + auto res = ctx.parallelize(list).map(udf_code, "").collect(); + auto resObj = res.ptr(); + ASSERT_TRUE(PyList_Check(resObj)); + EXPECT_GE(PyList_Size(resObj), 1); + } +} + //// debug any python module... ///** Takes a path and adds it to sys.paths by calling PyRun_SimpleString. diff --git a/tuplex/utils/include/Utils.h b/tuplex/utils/include/Utils.h index ea09b78c7..2a5b30e40 100644 --- a/tuplex/utils/include/Utils.h +++ b/tuplex/utils/include/Utils.h @@ -58,6 +58,8 @@ namespace std { #include #include +#include + static_assert(__cplusplus >= 201402L, "need at least C++ 14 to compile this file"); // check https://blog.galowicz.de/2016/02/20/short_file_macro/ // for another cool macro @@ -199,6 +201,23 @@ namespace tuplex { */ extern uniqueid_t getUniqueID(); + /*! + * get user name who runs the program + * @return + */ + extern std::string getUserName(); + + inline std::string tempFileName() { + // uses boost filesystem to safely crae a temp file. In Unix, creates the file. + boost::filesystem::path temp = boost::filesystem::temp_directory_path() / boost::filesystem::unique_path(); // note: this could yield a race condition + + auto user = getUserName(); + if(!user.empty()) + return temp.native() + "-" + user; + else + return temp.native(); + } + inline std::string uuidToString(const uniqueid_t& uuid) { std::stringstream ss; ss< vars = {"LOGNAME", "USER", "LNAME", "USERNAME"}; + + for(auto var : vars) { + auto name = getenv(var.c_str()); + if(name) + return std::string(name); + } + + // no user found above, return "" + // more advanced methods possible... + return ""; + } + size_t memStringToSize(const std::string& str) { using namespace boost::algorithm;