这是indexloc提供的服务,不要输入任何密码
Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ To install Tuplex, simply install the dependencies first and then build the pack
#### MacOS build from source
To build Tuplex, you need several other packages first which can be easily installed via [brew](https://brew.sh/).
```
brew install llvm@9 boost boost-python3 aws-sdk-cpp pcre2 antlr4-cpp-runtime googletest gflags yaml-cpp celero
brew install llvm@9 boost boost-python3 aws-sdk-cpp pcre2 antlr4-cpp-runtime googletest gflags yaml-cpp celero protobuf libmagic
python3 -m pip cloudpickle numpy
python3 setup.py install
```
Expand Down
6 changes: 3 additions & 3 deletions doc/source/gettingstarted.rst
Original file line number Diff line number Diff line change
Expand Up @@ -242,13 +242,13 @@ A convenient option to install packages under Mac OS X is `Homebrew <http://brew

brew install git cmake python@3.9 llvm@9 boost \
boost-python3 aws-sdk-cpp pcre2 antlr4-cpp-runtime \
yaml-cpp celero gflags libmagic
googletest gflags yaml-cpp celero protobuf libmagic

Further, you need to install the ``cloudpickle`` python module in order to compile Tuplex.
Further, you need to install the ``cloudpickle`` python module and ``numpy`` in order to compile Tuplex.

.. code:: console

pip3 install cloudpickle # or use python3 -m pip install cloudpickle
pip3 install numpy cloudpickle # or use python3 -m pip install cloudpickle


Note: Per default Tuplex uses static libs. However, recently brew changed the AWS SDK to be with shared libs, i.e. running ``cmake`` might complain about missing ```*.cmake`` files. To deal with this, you've multiple options:
Expand Down
20 changes: 20 additions & 0 deletions tuplex/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,26 @@ endif()

message(STATUS "build type is " "${CMAKE_BUILD_TYPE}")


# fix for Mac OS X/ brew, find openSSL (required by some dependencies)
## check for ICU
IF(BREW_FOUND)
IF(APPLE)
# MESSAGE("brew on Mac found")
EXECUTE_PROCESS(COMMAND brew --prefix openssl OUTPUT_VARIABLE OSSL_ROOT_DIR ERROR_VARIABLE BREW_OPENSSL_NOTFOUND OUTPUT_STRIP_TRAILING_WHITESPACE)
IF(BREW_OPENSSL_NOTFOUND)
MESSAGE("did not find brewed openssl, you might install it via brew install openssl")
ELSE()
set(ENV{OPENSSL_ROOT_DIR} /usr/local/opt/openssl)
MESSAGE(STATUS "found brewed openssl under: ${OSSL_ROOT_DIR}")
ENDIF()

ELSEIF(UNIX)
#MESSAGE("brew on Unix found")
ENDIF()
ENDIF()


# Options:
# build for CI (disable some tests)
option(BUILD_FOR_CI "Build for the CI - disable tests that require AWS credentials." OFF)
Expand Down
1 change: 0 additions & 1 deletion tuplex/codegen/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,6 @@ ENDIF()
IF(LLVM_ROOT_DIR)
message(STATUS "Given LLVM_ROOT_DIR=${LLVM_ROOT_DIR}")
message(STATUS "CMAKE_PREFIX_PATH=${CMAKE_PREFIX_PATH}")
message(STATUS "LLVM_ROOT=${LLVM_ROOT}")
EXECUTE_PROCESS(COMMAND "ls" "/opt" COMMAND_ECHO STDOUT)
EXECUTE_PROCESS(COMMAND "ls" "${LLVM_ROOT_DIR}" COMMAND_ECHO STDOUT)
# make cmake find in config mode the right LLVMConfig.cmake file which is located here
Expand Down
18 changes: 0 additions & 18 deletions tuplex/core/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -8,24 +8,6 @@ set(CMAKE_CXX_STANDARD_REQUIRED ON)

find_package(YAMLCPP REQUIRED)

# fix for Mac OS X/ brew, find openSSL
## check for ICU
IF(BREW_FOUND)
IF(APPLE)
# MESSAGE("brew on Mac found")
EXECUTE_PROCESS(COMMAND brew --prefix openssl OUTPUT_VARIABLE OSSL_ROOT_DIR ERROR_VARIABLE BREW_OPENSSL_NOTFOUND OUTPUT_STRIP_TRAILING_WHITESPACE)
IF(BREW_OPENSSL_NOTFOUND)
MESSAGE("did not find brewed openssl, you might install it via brew install openssl")
ELSE()
set(ENV{OPENSSL_ROOT_DIR} /usr/local/opt/openssl)
MESSAGE(STATUS "found brewed openssl under: ${OSSL_ROOT_DIR}")
ENDIF()

ELSEIF(UNIX)
#MESSAGE("brew on Unix found")
ENDIF()
ENDIF()

set(CURL_LIBRARY "-lcurl")
find_package(CURL REQUIRED)

Expand Down
47 changes: 43 additions & 4 deletions tuplex/io/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -24,17 +24,56 @@ get_filename_component(Protobuf_HOME "${Protobuf_INCLUDE_DIRS}" DIRECTORY)

include(ExternalProject)
set(EXTERNAL_INSTALL_LOCATION ${CMAKE_BINARY_DIR}/third_party)

# For MacOS, check whether certain 3rd party libs are already installed via brew
# check if snappy is already installed under MacOS
if(BREW_FOUND)
if(APPLE)
EXECUTE_PROCESS(COMMAND brew --prefix snappy OUTPUT_VARIABLE BREW_SNAPPY_DIR ERROR_VARIABLE BREW_SNAPPY_NOTFOUND OUTPUT_STRIP_TRAILING_WHITESPACE)
if(BREW_SNAPPY_NOTFOUND)
set(SNAPPY_LIBRARIES "${EXTERNAL_INSTALL_LOCATION}/lib/libsnappy.a")
else()
set(ENV{SNAPPY_HOME} ${BREW_SNAPPY_DIR})
set(SNAPPY_HOME ${BREW_SNAPPY_DIR})
message(STATUS "Found locally installed snappy under $ENV{SNAPPY_HOME}")
# set variables
file (TO_CMAKE_PATH "${SNAPPY_HOME}" _snappy_path)
find_library (SNAPPY_LIBRARY NAMES snappy HINTS
${_snappy_path}
PATH_SUFFIXES "lib" "lib64")
if(SNAPPY_LIBRARY)
message(STATUS "snappy lib: ${SNAPPY_LIBRARY}")
endif()
find_library (SNAPPY_STATIC_LIB NAMES ${CMAKE_STATIC_LIBRARY_PREFIX}${SNAPPY_LIB_NAME}${CMAKE_STATIC_LIBRARY_SUFFIX} HINTS
${_snappy_path}
PATH_SUFFIXES "lib" "lib64")
if(SNAPPY_LIBRARY)
set(SNAPPY_LIBRARIES "${SNAPPY_LIBRARY}")
elseif(SNAPPY_STATIC_LIB)
set(SNAPPY_LIBRARIES "${SNAPPY_STATIC_LIB}")
endif()
message(STATUS "Snappy libraries: ${SNAPPY_LIBRARIES}")
endif()
endif()
endif()

# set to third-party build
if(NOT SNAPPY_LIBRARIES)
set(SNAPPY_HOME "")
set(SNAPPY_LIBRARIES ${EXTERNAL_INSTALL_LOCATION}/lib/libsnappy.a)
endif()

ExternalProject_Add(orc
GIT_REPOSITORY https://github.com/apache/orc.git
GIT_TAG main
GIT_TAG rel/release-1.7.0
TIMEOUT 5
CMAKE_ARGS -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER} -DCMAKE_CXX_FLAGS="-Wno-poison-system-directories" -DCMAKE_INSTALL_PREFIX=${EXTERNAL_INSTALL_LOCATION} -DBUILD_JAVA=OFF -DBUILD_TOOLS=OFF -DBUILD_CPP_TESTS=OFF -DBUILD_POSITION_INDEPENDENT_LIB=ON -DPROTOBUF_HOME=${Protobuf_HOME}
CMAKE_ARGS -DBUILD_LIBHDFSPP=OFF -DSNAPPY_HOME=${SNAPPY_HOME} -DOPENSSL_ROOT_DIR=${OPENSSL_ROOT_DIR} -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER} -DCMAKE_CXX_FLAGS="-Wno-poison-system-directories" -DCMAKE_INSTALL_PREFIX=${EXTERNAL_INSTALL_LOCATION} -DBUILD_JAVA=OFF -DBUILD_TOOLS=OFF -DBUILD_CPP_TESTS=OFF -DBUILD_POSITION_INDEPENDENT_LIB=ON -DPROTOBUF_HOME=${Protobuf_HOME}
PREFIX "${EXTERNAL_INSTALL_LOCATION}"
UPDATE_COMMAND "" # Disable update step: clones the project only once
BUILD_BYPRODUCTS
${EXTERNAL_INSTALL_LOCATION}/lib/liborc.a
${EXTERNAL_INSTALL_LOCATION}/lib/liblz4.a
${EXTERNAL_INSTALL_LOCATION}/lib/libsnappy.a
${SNAPPY_LIBRARIES}
${EXTERNAL_INSTALL_LOCATION}/lib/libz.a
${EXTERNAL_INSTALL_LOCATION}/lib/libzstd.a
)
Expand Down Expand Up @@ -62,7 +101,7 @@ target_include_directories(libio PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/include
target_link_libraries(libio libutils
${AWSSDK_LINK_LIBRARIES} ${LibMagic_LIBRARIES}
${EXTERNAL_INSTALL_LOCATION}/lib/liblz4.a
${EXTERNAL_INSTALL_LOCATION}/lib/libsnappy.a
${SNAPPY_LIBRARIES}
${EXTERNAL_INSTALL_LOCATION}/lib/libz.a
${EXTERNAL_INSTALL_LOCATION}/lib/libzstd.a
liborc)
Expand Down
4 changes: 3 additions & 1 deletion tuplex/python/tuplex/context.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
import glob
import sys
import cloudpickle
from tuplex.utils.common import flatten_dict, load_conf_yaml, stringify_dict, unflatten_dict, save_conf_yaml, in_jupyter_notebook, is_in_interactive_mode, current_user, host_name
from tuplex.utils.common import flatten_dict, load_conf_yaml, stringify_dict, unflatten_dict, save_conf_yaml, in_jupyter_notebook, in_google_colab, is_in_interactive_mode, current_user, host_name
import uuid
import json
from .metrics import Metrics
Expand Down Expand Up @@ -109,6 +109,8 @@ def __init__(self, conf=None, name="", **kwargs):
mode = 'shell'
if in_jupyter_notebook():
mode = 'jupyter'
if in_google_colab():
mode = 'colab'
host = host_name()

# pass above options as env.user, ...
Expand Down
4 changes: 2 additions & 2 deletions tuplex/python/tuplex/repl/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
import os
import sys

from tuplex.utils.common import is_in_interactive_mode, in_jupyter_notebook
from tuplex.utils.common import is_in_interactive_mode, in_jupyter_notebook, in_google_colab
from tuplex.utils.interactive_shell import TuplexShell

from tuplex.utils.version import __version__
Expand All @@ -33,7 +33,7 @@ def TuplexBanner():
# if the module is imported in interactive mode, overwrite shell with own shell
# else, provide code-closure functionality through readline module

if is_in_interactive_mode() and not in_jupyter_notebook():
if is_in_interactive_mode() and not in_jupyter_notebook() and not in_google_colab():
os.system('clear')

from tuplex.context import Context
Expand Down
24 changes: 24 additions & 0 deletions tuplex/python/tuplex/utils/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,30 @@ def in_jupyter_notebook():
except NameError:
return False # Probably standard Python interpreter

def in_google_colab():
"""
check whether framework runs in Google Colab environment
Returns:
True if Tuplex is running in Google Colab
"""
found_colab_package = False
try:
import google.colab
found_colab_package = True
except:
pass

shell_name_matching = False
try:
shell_name_matching = 'google.colab' in str(get_ipython())
except:
pass

if found_colab_package or shell_name_matching:
return True
else:
return False

def is_in_interactive_mode():
"""checks whether the module is loaded in an interactive shell session or not

Expand Down
4 changes: 2 additions & 2 deletions tuplex/python/tuplex/utils/reflection.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@

from tuplex.utils.globs import get_globals
from tuplex.utils.source_vault import SourceVault, supports_lambda_closure
from tuplex.utils.common import in_jupyter_notebook, is_in_interactive_mode
from tuplex.utils.common import in_jupyter_notebook, in_google_colab, is_in_interactive_mode
from tuplex.utils.interactive_shell import TuplexShell

# only export get_source function, rest shall be private.
Expand Down Expand Up @@ -150,7 +150,7 @@ def get_function_code(f):
function_name = f.__code__.co_name
assert isinstance(function_name, str)

if in_jupyter_notebook():
if in_jupyter_notebook() or in_google_colab():
return extract_function_code(function_name, get_jupyter_raw_code(function_name))
else:
return extract_function_code(function_name, dill.source.getsource(f))
Expand Down