这是indexloc提供的服务,不要输入任何密码
Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,8 @@ To customize the cmake build, the following options are available to be passed v
| ------ | ------ | ----------- |
| `CMAKE_BUILD_TYPE` | `Release` (default), `Debug`, `RelWithDebInfo`, `tsan`, `asan`, `ubsan` | select compile mode. Tsan/Asan/Ubsan correspond to Google Sanitizers. |
| `BUILD_WITH_AWS` | `ON` (default), `OFF` | build with AWS SDK or not. On Ubuntu this will build the Lambda executor. |
| `BUILD_WITH_ORC` | `ON`, `OFF` (default) | build with ORC file format support. |
| `SKIP_AWS_TESTS` | `ON` (default), `OFF` | skip aws tests, helpful when no AWS credentials/AWS Tuplex chain is setup. |
| `GENERATE_PDFS` | `ON`, `OFF` (default) | output in Debug mode PDF files if graphviz is installed (e.g., `brew install graphviz`) for ASTs of UDFs, query plans, ...|
| `PYTHON3_VERSION` | `3.6`, ... | when trying to select a python3 version to build against, use this by specifying `major.minor`. To specify the python executable, use the options provided by [cmake](https://cmake.org/cmake/help/git-stage/module/FindPython3.html). |
| `LLVM_ROOT_DIR` | e.g. `/usr/lib/llvm-9` | specify which LLVM version to use |
Expand Down
20 changes: 18 additions & 2 deletions tuplex/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,8 @@ if (CCACHE_FOUND AND CCACHE_SUPPORT)
set_property(GLOBAL PROPERTY RULE_LAUNCH_LINK "ccache")
endif()

# Tuplex build options:
# =====================

# Option on whether to use shared libraries or perform a static link.
# Must be identical to how AWS SDK was installed. E.g., when installing brew aws-sdk-cpp the default is
Expand All @@ -37,6 +39,19 @@ endif()
option(BUILD_SHARED_LIBS "Build shared libraries" OFF)
set(CMAKE_MACOSX_RPATH 1) # fix for gtest warning

option(SKIP_AWS_TESTS "Skip AWS tests" ON)
option(BUILD_WITH_ORC "Build with Orc file support" OFF)


# translate to C++ flags
if(SKIP_AWS_TESTS)
add_definitions(-DSKIP_AWS_TESTS)
endif()

if(BUILD_WITH_ORC)
add_definitions(-DBUILD_WITH_ORC)
endif()

# add -Werror=return-type to turn missing returns into errors!!!
macro(append_if list condition var)
if (${condition})
Expand Down Expand Up @@ -79,14 +94,15 @@ enable_testing()

# mainly from https://github.com/AdaCore/z3/blob/master/CMakeLists.txt
message(STATUS "CMake generator: ${CMAKE_GENERATOR}")
set(available_build_types Debug Release RelWithDebInfo MinSizeRel)
set(available_build_types Debug Release RelWithDebInfo MinSizeRel tsan asan)
if(DEFINED CMAKE_CONFIGURATION_TYPES)
# multi-configuration build, i.e. MSVC or Xcode
message(STATUS "Available configurations: ${CMAKE_CONFIGURATION_TYPES}")
else()
# single-configuration build, i.e. Unix Makefiles, Ninja...
if(NOT CMAKE_BUILD_TYPE)
message(STATUS "CMAKE_BUILD_TYPE is not set. Using default")
set(CMAKE_BUILD_TYPE Debug) # use Debug per default?
message(STATUS "CMAKE_BUILD_TYPE is not set. Using ${CMAKE_BUILD_TYPE}")
message(STATUS "Available build types are: ${available_build_types}")

# Provide drop down menu options in cmake-gui
Expand Down
4 changes: 1 addition & 3 deletions tuplex/codegen/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ IF(BREW_FOUND)
# could use brew prefix here, but let's leave it like this
EXECUTE_PROCESS(COMMAND bash "-c" "brew info llvm | grep Cellar | cut -d ' ' -f 1" OUTPUT_VARIABLE LLVM_ROOT_DIR RESULT_VARIABLE BREW_LLVM_NOTFOUND OUTPUT_STRIP_TRAILING_WHITESPACE)
IF(NOT BREW_LLVM_NOTFOUND EQUAL "0")
MESSAGE("did not find llvm, you might install it via `brew install llvm@9`")
MESSAGE(WARNING "did not find llvm, you might install it via `brew install llvm@9`")
ELSE()
# check version, needs to be within 5 and 9 incl.
# i.e. execute something like /usr/local/opt/llvm/bin/llvm-config --version
Expand Down Expand Up @@ -79,8 +79,6 @@ ENDIF()

# for brewed llvm, add to cmakemodulepath
IF(LLVM_ROOT_DIR)
message(STATUS "Given LLVM_ROOT_DIR=${LLVM_ROOT_DIR}")
message(STATUS "CMAKE_PREFIX_PATH=${CMAKE_PREFIX_PATH}")
EXECUTE_PROCESS(COMMAND "ls" "/opt" COMMAND_ECHO STDOUT)
EXECUTE_PROCESS(COMMAND "ls" "${LLVM_ROOT_DIR}" COMMAND_ECHO STDOUT)
# make cmake find in config mode the right LLVMConfig.cmake file which is located here
Expand Down
4 changes: 4 additions & 0 deletions tuplex/core/include/DataSet.h
Original file line number Diff line number Diff line change
Expand Up @@ -305,6 +305,10 @@ namespace tuplex {
void toorc(const URI &uri,
const std::unordered_map<std::string, std::string> &outputOptions = defaultORCOutputOptions(),
std::ostream &os = std::cout) {
#ifndef BUILD_WITH_ORC
throw std::runtime_error(MISSING_ORC_MESSAGE);
#endif

tofile(FileFormat::OUTFMT_ORC, uri, UDF(""), 0, 0, outputOptions, std::numeric_limits<size_t>::max(),
os);
}
Expand Down
3 changes: 3 additions & 0 deletions tuplex/core/include/physical/OrcReader.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@
#ifndef TUPLEX_ORCREADER_H
#define TUPLEX_ORCREADER_H

#ifdef BUILD_WITH_ORC

#include <orc/OrcFile.hh>
#include <PartitionWriter.h>
#include <orc/OrcBatch.h>
Expand Down Expand Up @@ -195,4 +197,5 @@ namespace tuplex {
};
}

#endif
#endif //TUPLEX_ORCREADER_H
5 changes: 5 additions & 0 deletions tuplex/core/include/physical/SimpleOrcWriteTask.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@
#ifndef TUPLEX_SIMPLEORCWRITETASK_H
#define TUPLEX_SIMPLEORCWRITETASK_H

#ifdef BUILD_WITH_ORC

#include <orc/OrcFile.hh>
#include "IExecutorTask.h"
#include <orc/OrcTypes.h>
Expand Down Expand Up @@ -205,4 +207,7 @@ class SimpleOrcWriteTask : public IExecutorTask {
};

}

#endif

#endif //TUPLEX_SIMPLEORCWRITETASK_H
5 changes: 5 additions & 0 deletions tuplex/core/src/Context.cc
Original file line number Diff line number Diff line change
Expand Up @@ -452,6 +452,11 @@ namespace tuplex {
DataSet& Context::orc(const std::string &pattern,
const std::vector<std::string>& columns) {
using namespace std;

#ifndef BUILD_WITH_ORC
return makeError(MISSING_ORC_MESSAGE);
#endif

Schema schema;
int dataSetID = getNextDataSetID();
DataSet *dsptr = createDataSet(schema);
Expand Down
8 changes: 8 additions & 0 deletions tuplex/core/src/ee/local/LocalBackend.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1954,7 +1954,11 @@ namespace tuplex {
wtask = new SimpleFileWriteTask(outputURI(udf, uri, partNo++, fmt), header, header_length, partitions);
break;
case FileFormat::OUTFMT_ORC:
#ifdef BUILD_WITH_ORC
wtask = new SimpleOrcWriteTask(outputURI(udf, uri, partNo++, fmt), partitions, tstage->outputSchema(), outOptions["columnNames"]);
#else
throw std::runtime_error(MISSING_ORC_MESSAGE);
#endif
break;
default:
throw std::runtime_error("file output format not supported.");
Expand All @@ -1973,7 +1977,11 @@ namespace tuplex {
break;
}
case FileFormat::OUTFMT_ORC: {
#ifdef BUILD_WITH_ORC
wtask = new SimpleOrcWriteTask(outputURI(udf, uri, partNo++, fmt), partitions, tstage->outputSchema(), outOptions["columnNames"]);
#else
throw std::runtime_error(MISSING_ORC_MESSAGE);
#endif
break;
}
default:
Expand Down
5 changes: 5 additions & 0 deletions tuplex/core/src/logical/FileInputOperator.cc
Original file line number Diff line number Diff line change
Expand Up @@ -329,6 +329,8 @@ namespace tuplex {
}

FileInputOperator::FileInputOperator(const std::string &pattern, const ContextOptions &co): _sampling_time_s(0.0) {

#ifdef BUILD_WITH_ORC
auto &logger = Logger::instance().logger("fileinputoperator");
_fmt = FileFormat::OUTFMT_ORC;
Timer timer;
Expand Down Expand Up @@ -376,6 +378,9 @@ namespace tuplex {
setSchema(Schema(Schema::MemoryLayout::ROW, python::Type::EMPTYTUPLE));
}
_sampling_time_s += timer.time();
#else
throw std::runtime_error(MISSING_ORC_MESSAGE);
#endif
}

void FileInputOperator::setProjectionDefaults() {// set optimized schema to current one
Expand Down
5 changes: 5 additions & 0 deletions tuplex/core/src/physical/TransformTask.cc
Original file line number Diff line number Diff line change
Expand Up @@ -769,9 +769,14 @@ namespace tuplex {
break;
}
case FileFormat::OUTFMT_ORC: {

#ifdef BUILD_WITH_ORC
auto orc = new OrcReader(this, reinterpret_cast<codegen::read_block_f>(_functor), operatorID, partitionSize, _inputSchema);
orc->setRange(rangeStart, rangeSize);
_reader.reset(orc);
#else
throw std::runtime_error(MISSING_ORC_MESSAGE);
#endif
break;
}
default:
Expand Down
144 changes: 76 additions & 68 deletions tuplex/io/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -16,76 +16,87 @@ message(STATUS "Found LibMagic ${LibMagic_INCLUDE_DIR}, ${LibMagic_LIBRARIES}")
include_directories("include")
include_directories(${Boost_INCLUDE_DIR})

# Install and build ORC C++ APIs
# Orc provides builds for the following libraries:
# - liblz4, libsnappy, libz, and libzstd
find_package(Protobuf REQUIRED)
get_filename_component(Protobuf_HOME "${Protobuf_INCLUDE_DIRS}" DIRECTORY)

include(ExternalProject)
set(EXTERNAL_INSTALL_LOCATION ${CMAKE_BINARY_DIR}/third_party)

# For MacOS, check whether certain 3rd party libs are already installed via brew
# check if snappy is already installed under MacOS
if(BREW_FOUND)
if(APPLE)
EXECUTE_PROCESS(COMMAND brew --prefix snappy OUTPUT_VARIABLE BREW_SNAPPY_DIR ERROR_VARIABLE BREW_SNAPPY_NOTFOUND OUTPUT_STRIP_TRAILING_WHITESPACE)
if(BREW_SNAPPY_NOTFOUND)
set(SNAPPY_LIBRARIES "${EXTERNAL_INSTALL_LOCATION}/lib/libsnappy.a")
else()
set(ENV{SNAPPY_HOME} ${BREW_SNAPPY_DIR})
set(SNAPPY_HOME ${BREW_SNAPPY_DIR})
message(STATUS "Found locally installed snappy under $ENV{SNAPPY_HOME}")
# set variables
file (TO_CMAKE_PATH "${SNAPPY_HOME}" _snappy_path)
find_library (SNAPPY_LIBRARY NAMES snappy HINTS
${_snappy_path}
PATH_SUFFIXES "lib" "lib64")
if(SNAPPY_LIBRARY)
message(STATUS "snappy lib: ${SNAPPY_LIBRARY}")
endif()
find_library (SNAPPY_STATIC_LIB NAMES ${CMAKE_STATIC_LIBRARY_PREFIX}${SNAPPY_LIB_NAME}${CMAKE_STATIC_LIBRARY_SUFFIX} HINTS
${_snappy_path}
PATH_SUFFIXES "lib" "lib64")
if(SNAPPY_LIBRARY)
set(SNAPPY_LIBRARIES "${SNAPPY_LIBRARY}")
elseif(SNAPPY_STATIC_LIB)
set(SNAPPY_LIBRARIES "${SNAPPY_STATIC_LIB}")

# Install and build ORC C++ APIs when BUILD_WITH_ORC is active
if(BUILD_WITH_ORC)
message(STATUS "Building Tuplex with ORC support")

# Orc provides builds for the following libraries:
# - liblz4, libsnappy, libz, and libzstd
find_package(Protobuf REQUIRED)
get_filename_component(Protobuf_HOME "${Protobuf_INCLUDE_DIRS}" DIRECTORY)

include(ExternalProject)
set(EXTERNAL_INSTALL_LOCATION ${CMAKE_BINARY_DIR}/third_party)

# For MacOS, check whether certain 3rd party libs are already installed via brew
# check if snappy is already installed under MacOS
if(BREW_FOUND)
if(APPLE)
EXECUTE_PROCESS(COMMAND brew --prefix snappy OUTPUT_VARIABLE BREW_SNAPPY_DIR ERROR_VARIABLE BREW_SNAPPY_NOTFOUND OUTPUT_STRIP_TRAILING_WHITESPACE)
if(BREW_SNAPPY_NOTFOUND)
set(SNAPPY_LIBRARIES "${EXTERNAL_INSTALL_LOCATION}/lib/libsnappy.a")
else()
set(ENV{SNAPPY_HOME} ${BREW_SNAPPY_DIR})
set(SNAPPY_HOME ${BREW_SNAPPY_DIR})
message(STATUS "Found locally installed snappy under $ENV{SNAPPY_HOME}")
# set variables
file (TO_CMAKE_PATH "${SNAPPY_HOME}" _snappy_path)
find_library (SNAPPY_LIBRARY NAMES snappy HINTS
${_snappy_path}
PATH_SUFFIXES "lib" "lib64")
if(SNAPPY_LIBRARY)
message(STATUS "snappy lib: ${SNAPPY_LIBRARY}")
endif()
find_library (SNAPPY_STATIC_LIB NAMES ${CMAKE_STATIC_LIBRARY_PREFIX}${SNAPPY_LIB_NAME}${CMAKE_STATIC_LIBRARY_SUFFIX} HINTS
${_snappy_path}
PATH_SUFFIXES "lib" "lib64")
if(SNAPPY_LIBRARY)
set(SNAPPY_LIBRARIES "${SNAPPY_LIBRARY}")
elseif(SNAPPY_STATIC_LIB)
set(SNAPPY_LIBRARIES "${SNAPPY_STATIC_LIB}")
endif()
message(STATUS "Snappy libraries: ${SNAPPY_LIBRARIES}")
endif()
message(STATUS "Snappy libraries: ${SNAPPY_LIBRARIES}")
endif()
endif()
endif()

# set to third-party build
if(NOT SNAPPY_LIBRARIES)
set(SNAPPY_HOME "")
set(SNAPPY_LIBRARIES ${EXTERNAL_INSTALL_LOCATION}/lib/libsnappy.a)
endif()
# set to third-party build
if(NOT SNAPPY_LIBRARIES)
set(SNAPPY_HOME "")
set(SNAPPY_LIBRARIES ${EXTERNAL_INSTALL_LOCATION}/lib/libsnappy.a)
endif()

ExternalProject_Add(orc
GIT_REPOSITORY https://github.com/apache/orc.git
GIT_TAG rel/release-1.7.0
TIMEOUT 5
CMAKE_ARGS -DBUILD_LIBHDFSPP=OFF -DSNAPPY_HOME=${SNAPPY_HOME} -DOPENSSL_ROOT_DIR=${OPENSSL_ROOT_DIR} -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER} -DCMAKE_CXX_FLAGS="-Wno-poison-system-directories" -DCMAKE_INSTALL_PREFIX=${EXTERNAL_INSTALL_LOCATION} -DBUILD_JAVA=OFF -DBUILD_TOOLS=OFF -DBUILD_CPP_TESTS=OFF -DBUILD_POSITION_INDEPENDENT_LIB=ON -DPROTOBUF_HOME=${Protobuf_HOME}
PREFIX "${EXTERNAL_INSTALL_LOCATION}"
UPDATE_COMMAND "" # Disable update step: clones the project only once
BUILD_BYPRODUCTS
${EXTERNAL_INSTALL_LOCATION}/lib/liborc.a
${EXTERNAL_INSTALL_LOCATION}/lib/liblz4.a
${SNAPPY_LIBRARIES}
${EXTERNAL_INSTALL_LOCATION}/lib/libz.a
${EXTERNAL_INSTALL_LOCATION}/lib/libzstd.a
)
set(orc_INCLUDE_DIR ${EXTERNAL_INSTALL_LOCATION}/include)
ExternalProject_Get_Property(orc binary_dir)
set(orc_LIBRARY ${EXTERNAL_INSTALL_LOCATION}/lib/liborc.a)

add_library(liborc STATIC IMPORTED)
set_target_properties(liborc PROPERTIES IMPORTED_LOCATION ${orc_LIBRARY})

add_dependencies(liborc orc)
include_directories(${orc_INCLUDE_DIR})

ExternalProject_Add(orc
GIT_REPOSITORY https://github.com/apache/orc.git
GIT_TAG rel/release-1.7.0
TIMEOUT 5
CMAKE_ARGS -DBUILD_LIBHDFSPP=OFF -DSNAPPY_HOME=${SNAPPY_HOME} -DOPENSSL_ROOT_DIR=${OPENSSL_ROOT_DIR} -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER} -DCMAKE_CXX_FLAGS="-Wno-poison-system-directories" -DCMAKE_INSTALL_PREFIX=${EXTERNAL_INSTALL_LOCATION} -DBUILD_JAVA=OFF -DBUILD_TOOLS=OFF -DBUILD_CPP_TESTS=OFF -DBUILD_POSITION_INDEPENDENT_LIB=ON -DPROTOBUF_HOME=${Protobuf_HOME}
PREFIX "${EXTERNAL_INSTALL_LOCATION}"
UPDATE_COMMAND "" # Disable update step: clones the project only once
BUILD_BYPRODUCTS
${EXTERNAL_INSTALL_LOCATION}/lib/liborc.a
${EXTERNAL_INSTALL_LOCATION}/lib/liblz4.a
set(ORC_LIBRARIES ${EXTERNAL_INSTALL_LOCATION}/lib/liblz4.a
${SNAPPY_LIBRARIES}
${EXTERNAL_INSTALL_LOCATION}/lib/libz.a
${EXTERNAL_INSTALL_LOCATION}/lib/libzstd.a
)
set(orc_INCLUDE_DIR ${EXTERNAL_INSTALL_LOCATION}/include)
ExternalProject_Get_Property(orc binary_dir)
set(orc_LIBRARY ${EXTERNAL_INSTALL_LOCATION}/lib/liborc.a)

add_library(liborc STATIC IMPORTED)
set_target_properties(liborc PROPERTIES IMPORTED_LOCATION ${orc_LIBRARY})

add_dependencies(liborc orc)
include_directories(${orc_INCLUDE_DIR})
liborc)
endif()

add_library(libio STATIC
${CMAKE_CURRENT_BINARY_DIR} ${SOURCES} ${INCLUDES})
Expand All @@ -99,11 +110,8 @@ target_include_directories(libio PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/include
#Note: If awssdk not found, then awssdk_link_librarires is empty...
# Specify here the libraries this program depends on
target_link_libraries(libio libutils
${AWSSDK_LINK_LIBRARIES} ${LibMagic_LIBRARIES}
${EXTERNAL_INSTALL_LOCATION}/lib/liblz4.a
${SNAPPY_LIBRARIES}
${EXTERNAL_INSTALL_LOCATION}/lib/libz.a
${EXTERNAL_INSTALL_LOCATION}/lib/libzstd.a
liborc)
${AWSSDK_LINK_LIBRARIES}
${LibMagic_LIBRARIES}
${ORC_LIBRARIES})

install(TARGETS libio DESTINATION bin)
4 changes: 4 additions & 0 deletions tuplex/io/include/orc/BoolBatch.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@
#ifndef TUPLEX_BOOLBATCH_H
#define TUPLEX_BOOLBATCH_H

#ifdef BUILD_WITH_ORC

namespace tuplex { namespace orc {

/*!
Expand Down Expand Up @@ -86,4 +88,6 @@ class BoolBatch : public OrcBatch {

}}

#endif

#endif //TUPLEX_BOOLBATCH_H
4 changes: 4 additions & 0 deletions tuplex/io/include/orc/DictBatch.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@
#ifndef TUPLEX_DICTBATCH_H
#define TUPLEX_DICTBATCH_H

#ifdef BUILD_WITH_ORC

namespace tuplex { namespace orc {

/*!
Expand Down Expand Up @@ -180,4 +182,6 @@ class DictBatch : public OrcBatch {

}}

#endif

#endif //TUPLEX_DICTBATCH_H
Loading