From c762151d83f28080628e55be1f4547bf9ffc5ceb Mon Sep 17 00:00:00 2001 From: leonhards Date: Wed, 20 Oct 2021 12:26:05 -0400 Subject: [PATCH 1/2] new flag to disable aws tests --- tuplex/CMakeLists.txt | 20 ++++++++++++++++++-- tuplex/codegen/CMakeLists.txt | 4 +--- tuplex/test/core/AWSLambdaTest.cc | 23 +++++++++++++++++++++-- tuplex/test/core/FullPipelines.cc | 5 +++-- 4 files changed, 43 insertions(+), 9 deletions(-) diff --git a/tuplex/CMakeLists.txt b/tuplex/CMakeLists.txt index 8dd965b9b..f96e12f5e 100755 --- a/tuplex/CMakeLists.txt +++ b/tuplex/CMakeLists.txt @@ -29,6 +29,8 @@ if (CCACHE_FOUND AND CCACHE_SUPPORT) set_property(GLOBAL PROPERTY RULE_LAUNCH_LINK "ccache") endif() +# Tuplex build options: +# ===================== # Option on whether to use shared libraries or perform a static link. # Must be identical to how AWS SDK was installed. E.g., when installing brew aws-sdk-cpp the default is @@ -37,6 +39,19 @@ endif() option(BUILD_SHARED_LIBS "Build shared libraries" OFF) set(CMAKE_MACOSX_RPATH 1) # fix for gtest warning +option(SKIP_AWS_TESTS "Skip AWS tests" ON) +option(BUILD_WITH_ORC "Build with Orc file support" OFF) + + +# translate to C++ flags +if(SKIP_AWS_TESTS) + add_definitions(-DSKIP_AWS_TESTS) +endif() + +if(BUILD_WITH_ORC) + add_definitions(-DBUILD_WITH_ORC) +endif() + # add -Werror=return-type to turn missing returns into errors!!! macro(append_if list condition var) if (${condition}) @@ -79,14 +94,15 @@ enable_testing() # mainly from https://github.com/AdaCore/z3/blob/master/CMakeLists.txt message(STATUS "CMake generator: ${CMAKE_GENERATOR}") -set(available_build_types Debug Release RelWithDebInfo MinSizeRel) +set(available_build_types Debug Release RelWithDebInfo MinSizeRel tsan asan) if(DEFINED CMAKE_CONFIGURATION_TYPES) # multi-configuration build, i.e. MSVC or Xcode message(STATUS "Available configurations: ${CMAKE_CONFIGURATION_TYPES}") else() # single-configuration build, i.e. Unix Makefiles, Ninja... if(NOT CMAKE_BUILD_TYPE) - message(STATUS "CMAKE_BUILD_TYPE is not set. Using default") + set(CMAKE_BUILD_TYPE Debug) # use Debug per default? + message(STATUS "CMAKE_BUILD_TYPE is not set. Using ${CMAKE_BUILD_TYPE}") message(STATUS "Available build types are: ${available_build_types}") # Provide drop down menu options in cmake-gui diff --git a/tuplex/codegen/CMakeLists.txt b/tuplex/codegen/CMakeLists.txt index a5e17e5b8..6171d4d1f 100755 --- a/tuplex/codegen/CMakeLists.txt +++ b/tuplex/codegen/CMakeLists.txt @@ -44,7 +44,7 @@ IF(BREW_FOUND) # could use brew prefix here, but let's leave it like this EXECUTE_PROCESS(COMMAND bash "-c" "brew info llvm | grep Cellar | cut -d ' ' -f 1" OUTPUT_VARIABLE LLVM_ROOT_DIR RESULT_VARIABLE BREW_LLVM_NOTFOUND OUTPUT_STRIP_TRAILING_WHITESPACE) IF(NOT BREW_LLVM_NOTFOUND EQUAL "0") - MESSAGE("did not find llvm, you might install it via `brew install llvm@9`") + MESSAGE(WARNING "did not find llvm, you might install it via `brew install llvm@9`") ELSE() # check version, needs to be within 5 and 9 incl. # i.e. execute something like /usr/local/opt/llvm/bin/llvm-config --version @@ -79,8 +79,6 @@ ENDIF() # for brewed llvm, add to cmakemodulepath IF(LLVM_ROOT_DIR) - message(STATUS "Given LLVM_ROOT_DIR=${LLVM_ROOT_DIR}") - message(STATUS "CMAKE_PREFIX_PATH=${CMAKE_PREFIX_PATH}") EXECUTE_PROCESS(COMMAND "ls" "/opt" COMMAND_ECHO STDOUT) EXECUTE_PROCESS(COMMAND "ls" "${LLVM_ROOT_DIR}" COMMAND_ECHO STDOUT) # make cmake find in config mode the right LLVMConfig.cmake file which is located here diff --git a/tuplex/test/core/AWSLambdaTest.cc b/tuplex/test/core/AWSLambdaTest.cc index 37fdf753a..f5ab67ef5 100644 --- a/tuplex/test/core/AWSLambdaTest.cc +++ b/tuplex/test/core/AWSLambdaTest.cc @@ -9,7 +9,6 @@ //--------------------------------------------------------------------------------------------------------------------// #ifdef BUILD_WITH_AWS -#ifndef BUILD_FOR_CI #include "TestUtils.h" #include @@ -24,12 +23,21 @@ class AWSTest : public PyTest { PyTest::SetUp(); using namespace tuplex; + + // to speedup testing, if we anyways skip the tests, can skip init here too. + // !!! Dangerous !!! +#ifndef SKIP_AWS_TESTS initAWS(AWSCredentials::get()); VirtualFileSystem::addS3FileSystem(); +#endif } }; TEST_F(AWSTest, BucketOperations) { +#ifdef SKIP_AWS_TESTS + GTEST_SKIP(); +#endif + using namespace tuplex; using namespace std; @@ -49,6 +57,10 @@ TEST_F(AWSTest, BucketOperations) { } TEST_F(AWSTest, FolderCopy) { +#ifdef SKIP_AWS_TESTS + GTEST_SKIP(); +#endif + using namespace tuplex; using namespace std; @@ -124,6 +136,10 @@ TEST_F(AWSTest, FolderCopy) { } TEST_F(AWSTest, FileUploadAndDownload) { +#ifdef SKIP_AWS_TESTS + GTEST_SKIP(); +#endif + using namespace tuplex; using namespace std; @@ -165,6 +181,10 @@ TEST_F(AWSTest, FileUploadAndDownload) { } TEST_F(AWSTest, SimpleLambdaInvoke) { +#ifdef SKIP_AWS_TESTS + GTEST_SKIP(); +#endif + using namespace std; using namespace tuplex; @@ -184,5 +204,4 @@ TEST_F(AWSTest, SimpleLambdaInvoke) { for(int i = 0; i < N; ++i) EXPECT_EQ(v[i].toPythonString(), ref[i].toPythonString()); } -#endif // BUILD_FOR_CI #endif // BUILD_WITH_AWS \ No newline at end of file diff --git a/tuplex/test/core/FullPipelines.cc b/tuplex/test/core/FullPipelines.cc index 5d340213c..b7858d4dc 100644 --- a/tuplex/test/core/FullPipelines.cc +++ b/tuplex/test/core/FullPipelines.cc @@ -603,9 +603,11 @@ namespace tuplex { class PipelinesTest : public PyTest {}; #ifdef BUILD_WITH_AWS -#ifndef BUILD_FOR_CI TEST_F(PipelinesTest, ZillowAWS) { +#ifdef SKIP_AWS_TESTS + GTEST_SKIP(); +#endif using namespace std; using namespace tuplex; auto co = ContextOptions::defaults(); @@ -637,7 +639,6 @@ TEST_F(PipelinesTest, ZillowAWS) { // } } -#endif // BUILD_FOR_CI #endif // BUILD_WITH_AWS TEST_F(PipelinesTest, ZillowConfigHarness) { From c139f85dee98567a44d5bb7cd69f560ce6618eb1 Mon Sep 17 00:00:00 2001 From: leonhards Date: Wed, 20 Oct 2021 14:38:15 -0400 Subject: [PATCH 2/2] new ORC option which is disabled by default until ORC tests are figured out --- README.md | 2 + tuplex/core/include/DataSet.h | 4 + tuplex/core/include/physical/OrcReader.h | 3 + .../include/physical/SimpleOrcWriteTask.h | 5 + tuplex/core/src/Context.cc | 5 + tuplex/core/src/ee/local/LocalBackend.cc | 8 + tuplex/core/src/logical/FileInputOperator.cc | 5 + tuplex/core/src/physical/TransformTask.cc | 5 + tuplex/io/CMakeLists.txt | 144 +++++++++--------- tuplex/io/include/orc/BoolBatch.h | 4 + tuplex/io/include/orc/DictBatch.h | 4 + tuplex/io/include/orc/F64Batch.h | 4 + tuplex/io/include/orc/I64Batch.h | 4 + tuplex/io/include/orc/ListBatch.h | 4 + tuplex/io/include/orc/OrcBatch.h | 4 + tuplex/io/include/orc/OrcTypes.h | 4 + tuplex/io/include/orc/StringBatch.h | 4 + tuplex/io/include/orc/TimestampBatch.h | 4 + tuplex/io/include/orc/TupleBatch.h | 4 + tuplex/io/include/orc/VirtualInputStream.h | 4 + tuplex/io/include/orc/VirtualOutputStream.h | 4 + tuplex/io/src/OrcTypes.cc | 2 + tuplex/test/io/OrcReadTest.cc | 6 +- tuplex/test/io/OrcTypesTest.cc | 8 +- tuplex/test/io/OrcWriteTest.cc | 5 + tuplex/utils/include/Utils.h | 4 + 26 files changed, 184 insertions(+), 70 deletions(-) diff --git a/README.md b/README.md index 061109c1b..c9b868615 100644 --- a/README.md +++ b/README.md @@ -76,6 +76,8 @@ To customize the cmake build, the following options are available to be passed v | ------ | ------ | ----------- | | `CMAKE_BUILD_TYPE` | `Release` (default), `Debug`, `RelWithDebInfo`, `tsan`, `asan`, `ubsan` | select compile mode. Tsan/Asan/Ubsan correspond to Google Sanitizers. | | `BUILD_WITH_AWS` | `ON` (default), `OFF` | build with AWS SDK or not. On Ubuntu this will build the Lambda executor. | +| `BUILD_WITH_ORC` | `ON`, `OFF` (default) | build with ORC file format support. | +| `SKIP_AWS_TESTS` | `ON` (default), `OFF` | skip aws tests, helpful when no AWS credentials/AWS Tuplex chain is setup. | | `GENERATE_PDFS` | `ON`, `OFF` (default) | output in Debug mode PDF files if graphviz is installed (e.g., `brew install graphviz`) for ASTs of UDFs, query plans, ...| | `PYTHON3_VERSION` | `3.6`, ... | when trying to select a python3 version to build against, use this by specifying `major.minor`. To specify the python executable, use the options provided by [cmake](https://cmake.org/cmake/help/git-stage/module/FindPython3.html). | | `LLVM_ROOT_DIR` | e.g. `/usr/lib/llvm-9` | specify which LLVM version to use | diff --git a/tuplex/core/include/DataSet.h b/tuplex/core/include/DataSet.h index 3e54714c1..15b52901a 100644 --- a/tuplex/core/include/DataSet.h +++ b/tuplex/core/include/DataSet.h @@ -305,6 +305,10 @@ namespace tuplex { void toorc(const URI &uri, const std::unordered_map &outputOptions = defaultORCOutputOptions(), std::ostream &os = std::cout) { +#ifndef BUILD_WITH_ORC + throw std::runtime_error(MISSING_ORC_MESSAGE); +#endif + tofile(FileFormat::OUTFMT_ORC, uri, UDF(""), 0, 0, outputOptions, std::numeric_limits::max(), os); } diff --git a/tuplex/core/include/physical/OrcReader.h b/tuplex/core/include/physical/OrcReader.h index c394e3d2f..8a3200922 100644 --- a/tuplex/core/include/physical/OrcReader.h +++ b/tuplex/core/include/physical/OrcReader.h @@ -11,6 +11,8 @@ #ifndef TUPLEX_ORCREADER_H #define TUPLEX_ORCREADER_H +#ifdef BUILD_WITH_ORC + #include #include #include @@ -195,4 +197,5 @@ namespace tuplex { }; } +#endif #endif //TUPLEX_ORCREADER_H diff --git a/tuplex/core/include/physical/SimpleOrcWriteTask.h b/tuplex/core/include/physical/SimpleOrcWriteTask.h index 3429f2b03..1d9634bea 100644 --- a/tuplex/core/include/physical/SimpleOrcWriteTask.h +++ b/tuplex/core/include/physical/SimpleOrcWriteTask.h @@ -11,6 +11,8 @@ #ifndef TUPLEX_SIMPLEORCWRITETASK_H #define TUPLEX_SIMPLEORCWRITETASK_H +#ifdef BUILD_WITH_ORC + #include #include "IExecutorTask.h" #include @@ -205,4 +207,7 @@ class SimpleOrcWriteTask : public IExecutorTask { }; } + +#endif + #endif //TUPLEX_SIMPLEORCWRITETASK_H \ No newline at end of file diff --git a/tuplex/core/src/Context.cc b/tuplex/core/src/Context.cc index a8692b205..ba1bc897f 100644 --- a/tuplex/core/src/Context.cc +++ b/tuplex/core/src/Context.cc @@ -452,6 +452,11 @@ namespace tuplex { DataSet& Context::orc(const std::string &pattern, const std::vector& columns) { using namespace std; + +#ifndef BUILD_WITH_ORC + return makeError(MISSING_ORC_MESSAGE); +#endif + Schema schema; int dataSetID = getNextDataSetID(); DataSet *dsptr = createDataSet(schema); diff --git a/tuplex/core/src/ee/local/LocalBackend.cc b/tuplex/core/src/ee/local/LocalBackend.cc index ce38ad721..6ef79e0b2 100644 --- a/tuplex/core/src/ee/local/LocalBackend.cc +++ b/tuplex/core/src/ee/local/LocalBackend.cc @@ -1954,7 +1954,11 @@ namespace tuplex { wtask = new SimpleFileWriteTask(outputURI(udf, uri, partNo++, fmt), header, header_length, partitions); break; case FileFormat::OUTFMT_ORC: +#ifdef BUILD_WITH_ORC wtask = new SimpleOrcWriteTask(outputURI(udf, uri, partNo++, fmt), partitions, tstage->outputSchema(), outOptions["columnNames"]); +#else + throw std::runtime_error(MISSING_ORC_MESSAGE); +#endif break; default: throw std::runtime_error("file output format not supported."); @@ -1973,7 +1977,11 @@ namespace tuplex { break; } case FileFormat::OUTFMT_ORC: { +#ifdef BUILD_WITH_ORC wtask = new SimpleOrcWriteTask(outputURI(udf, uri, partNo++, fmt), partitions, tstage->outputSchema(), outOptions["columnNames"]); +#else + throw std::runtime_error(MISSING_ORC_MESSAGE); +#endif break; } default: diff --git a/tuplex/core/src/logical/FileInputOperator.cc b/tuplex/core/src/logical/FileInputOperator.cc index 6d06baa11..d0b699f02 100644 --- a/tuplex/core/src/logical/FileInputOperator.cc +++ b/tuplex/core/src/logical/FileInputOperator.cc @@ -329,6 +329,8 @@ namespace tuplex { } FileInputOperator::FileInputOperator(const std::string &pattern, const ContextOptions &co): _sampling_time_s(0.0) { + +#ifdef BUILD_WITH_ORC auto &logger = Logger::instance().logger("fileinputoperator"); _fmt = FileFormat::OUTFMT_ORC; Timer timer; @@ -376,6 +378,9 @@ namespace tuplex { setSchema(Schema(Schema::MemoryLayout::ROW, python::Type::EMPTYTUPLE)); } _sampling_time_s += timer.time(); +#else + throw std::runtime_error(MISSING_ORC_MESSAGE); +#endif } void FileInputOperator::setProjectionDefaults() {// set optimized schema to current one diff --git a/tuplex/core/src/physical/TransformTask.cc b/tuplex/core/src/physical/TransformTask.cc index f0556f462..fded14154 100644 --- a/tuplex/core/src/physical/TransformTask.cc +++ b/tuplex/core/src/physical/TransformTask.cc @@ -769,9 +769,14 @@ namespace tuplex { break; } case FileFormat::OUTFMT_ORC: { + +#ifdef BUILD_WITH_ORC auto orc = new OrcReader(this, reinterpret_cast(_functor), operatorID, partitionSize, _inputSchema); orc->setRange(rangeStart, rangeSize); _reader.reset(orc); +#else + throw std::runtime_error(MISSING_ORC_MESSAGE); +#endif break; } default: diff --git a/tuplex/io/CMakeLists.txt b/tuplex/io/CMakeLists.txt index 5015c31ef..b23840d07 100644 --- a/tuplex/io/CMakeLists.txt +++ b/tuplex/io/CMakeLists.txt @@ -16,76 +16,87 @@ message(STATUS "Found LibMagic ${LibMagic_INCLUDE_DIR}, ${LibMagic_LIBRARIES}") include_directories("include") include_directories(${Boost_INCLUDE_DIR}) -# Install and build ORC C++ APIs -# Orc provides builds for the following libraries: -# - liblz4, libsnappy, libz, and libzstd -find_package(Protobuf REQUIRED) -get_filename_component(Protobuf_HOME "${Protobuf_INCLUDE_DIRS}" DIRECTORY) - -include(ExternalProject) -set(EXTERNAL_INSTALL_LOCATION ${CMAKE_BINARY_DIR}/third_party) - -# For MacOS, check whether certain 3rd party libs are already installed via brew -# check if snappy is already installed under MacOS -if(BREW_FOUND) - if(APPLE) - EXECUTE_PROCESS(COMMAND brew --prefix snappy OUTPUT_VARIABLE BREW_SNAPPY_DIR ERROR_VARIABLE BREW_SNAPPY_NOTFOUND OUTPUT_STRIP_TRAILING_WHITESPACE) - if(BREW_SNAPPY_NOTFOUND) - set(SNAPPY_LIBRARIES "${EXTERNAL_INSTALL_LOCATION}/lib/libsnappy.a") - else() - set(ENV{SNAPPY_HOME} ${BREW_SNAPPY_DIR}) - set(SNAPPY_HOME ${BREW_SNAPPY_DIR}) - message(STATUS "Found locally installed snappy under $ENV{SNAPPY_HOME}") - # set variables - file (TO_CMAKE_PATH "${SNAPPY_HOME}" _snappy_path) - find_library (SNAPPY_LIBRARY NAMES snappy HINTS - ${_snappy_path} - PATH_SUFFIXES "lib" "lib64") - if(SNAPPY_LIBRARY) - message(STATUS "snappy lib: ${SNAPPY_LIBRARY}") - endif() - find_library (SNAPPY_STATIC_LIB NAMES ${CMAKE_STATIC_LIBRARY_PREFIX}${SNAPPY_LIB_NAME}${CMAKE_STATIC_LIBRARY_SUFFIX} HINTS - ${_snappy_path} - PATH_SUFFIXES "lib" "lib64") - if(SNAPPY_LIBRARY) - set(SNAPPY_LIBRARIES "${SNAPPY_LIBRARY}") - elseif(SNAPPY_STATIC_LIB) - set(SNAPPY_LIBRARIES "${SNAPPY_STATIC_LIB}") + +# Install and build ORC C++ APIs when BUILD_WITH_ORC is active +if(BUILD_WITH_ORC) + message(STATUS "Building Tuplex with ORC support") + + # Orc provides builds for the following libraries: + # - liblz4, libsnappy, libz, and libzstd + find_package(Protobuf REQUIRED) + get_filename_component(Protobuf_HOME "${Protobuf_INCLUDE_DIRS}" DIRECTORY) + + include(ExternalProject) + set(EXTERNAL_INSTALL_LOCATION ${CMAKE_BINARY_DIR}/third_party) + + # For MacOS, check whether certain 3rd party libs are already installed via brew + # check if snappy is already installed under MacOS + if(BREW_FOUND) + if(APPLE) + EXECUTE_PROCESS(COMMAND brew --prefix snappy OUTPUT_VARIABLE BREW_SNAPPY_DIR ERROR_VARIABLE BREW_SNAPPY_NOTFOUND OUTPUT_STRIP_TRAILING_WHITESPACE) + if(BREW_SNAPPY_NOTFOUND) + set(SNAPPY_LIBRARIES "${EXTERNAL_INSTALL_LOCATION}/lib/libsnappy.a") + else() + set(ENV{SNAPPY_HOME} ${BREW_SNAPPY_DIR}) + set(SNAPPY_HOME ${BREW_SNAPPY_DIR}) + message(STATUS "Found locally installed snappy under $ENV{SNAPPY_HOME}") + # set variables + file (TO_CMAKE_PATH "${SNAPPY_HOME}" _snappy_path) + find_library (SNAPPY_LIBRARY NAMES snappy HINTS + ${_snappy_path} + PATH_SUFFIXES "lib" "lib64") + if(SNAPPY_LIBRARY) + message(STATUS "snappy lib: ${SNAPPY_LIBRARY}") + endif() + find_library (SNAPPY_STATIC_LIB NAMES ${CMAKE_STATIC_LIBRARY_PREFIX}${SNAPPY_LIB_NAME}${CMAKE_STATIC_LIBRARY_SUFFIX} HINTS + ${_snappy_path} + PATH_SUFFIXES "lib" "lib64") + if(SNAPPY_LIBRARY) + set(SNAPPY_LIBRARIES "${SNAPPY_LIBRARY}") + elseif(SNAPPY_STATIC_LIB) + set(SNAPPY_LIBRARIES "${SNAPPY_STATIC_LIB}") + endif() + message(STATUS "Snappy libraries: ${SNAPPY_LIBRARIES}") endif() - message(STATUS "Snappy libraries: ${SNAPPY_LIBRARIES}") endif() endif() -endif() -# set to third-party build -if(NOT SNAPPY_LIBRARIES) - set(SNAPPY_HOME "") - set(SNAPPY_LIBRARIES ${EXTERNAL_INSTALL_LOCATION}/lib/libsnappy.a) -endif() + # set to third-party build + if(NOT SNAPPY_LIBRARIES) + set(SNAPPY_HOME "") + set(SNAPPY_LIBRARIES ${EXTERNAL_INSTALL_LOCATION}/lib/libsnappy.a) + endif() + + ExternalProject_Add(orc + GIT_REPOSITORY https://github.com/apache/orc.git + GIT_TAG rel/release-1.7.0 + TIMEOUT 5 + CMAKE_ARGS -DBUILD_LIBHDFSPP=OFF -DSNAPPY_HOME=${SNAPPY_HOME} -DOPENSSL_ROOT_DIR=${OPENSSL_ROOT_DIR} -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER} -DCMAKE_CXX_FLAGS="-Wno-poison-system-directories" -DCMAKE_INSTALL_PREFIX=${EXTERNAL_INSTALL_LOCATION} -DBUILD_JAVA=OFF -DBUILD_TOOLS=OFF -DBUILD_CPP_TESTS=OFF -DBUILD_POSITION_INDEPENDENT_LIB=ON -DPROTOBUF_HOME=${Protobuf_HOME} + PREFIX "${EXTERNAL_INSTALL_LOCATION}" + UPDATE_COMMAND "" # Disable update step: clones the project only once + BUILD_BYPRODUCTS + ${EXTERNAL_INSTALL_LOCATION}/lib/liborc.a + ${EXTERNAL_INSTALL_LOCATION}/lib/liblz4.a + ${SNAPPY_LIBRARIES} + ${EXTERNAL_INSTALL_LOCATION}/lib/libz.a + ${EXTERNAL_INSTALL_LOCATION}/lib/libzstd.a + ) + set(orc_INCLUDE_DIR ${EXTERNAL_INSTALL_LOCATION}/include) + ExternalProject_Get_Property(orc binary_dir) + set(orc_LIBRARY ${EXTERNAL_INSTALL_LOCATION}/lib/liborc.a) + + add_library(liborc STATIC IMPORTED) + set_target_properties(liborc PROPERTIES IMPORTED_LOCATION ${orc_LIBRARY}) + + add_dependencies(liborc orc) + include_directories(${orc_INCLUDE_DIR}) -ExternalProject_Add(orc - GIT_REPOSITORY https://github.com/apache/orc.git - GIT_TAG rel/release-1.7.0 - TIMEOUT 5 - CMAKE_ARGS -DBUILD_LIBHDFSPP=OFF -DSNAPPY_HOME=${SNAPPY_HOME} -DOPENSSL_ROOT_DIR=${OPENSSL_ROOT_DIR} -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER} -DCMAKE_CXX_FLAGS="-Wno-poison-system-directories" -DCMAKE_INSTALL_PREFIX=${EXTERNAL_INSTALL_LOCATION} -DBUILD_JAVA=OFF -DBUILD_TOOLS=OFF -DBUILD_CPP_TESTS=OFF -DBUILD_POSITION_INDEPENDENT_LIB=ON -DPROTOBUF_HOME=${Protobuf_HOME} - PREFIX "${EXTERNAL_INSTALL_LOCATION}" - UPDATE_COMMAND "" # Disable update step: clones the project only once - BUILD_BYPRODUCTS - ${EXTERNAL_INSTALL_LOCATION}/lib/liborc.a - ${EXTERNAL_INSTALL_LOCATION}/lib/liblz4.a + set(ORC_LIBRARIES ${EXTERNAL_INSTALL_LOCATION}/lib/liblz4.a ${SNAPPY_LIBRARIES} ${EXTERNAL_INSTALL_LOCATION}/lib/libz.a ${EXTERNAL_INSTALL_LOCATION}/lib/libzstd.a - ) -set(orc_INCLUDE_DIR ${EXTERNAL_INSTALL_LOCATION}/include) -ExternalProject_Get_Property(orc binary_dir) -set(orc_LIBRARY ${EXTERNAL_INSTALL_LOCATION}/lib/liborc.a) - -add_library(liborc STATIC IMPORTED) -set_target_properties(liborc PROPERTIES IMPORTED_LOCATION ${orc_LIBRARY}) - -add_dependencies(liborc orc) -include_directories(${orc_INCLUDE_DIR}) + liborc) +endif() add_library(libio STATIC ${CMAKE_CURRENT_BINARY_DIR} ${SOURCES} ${INCLUDES}) @@ -99,11 +110,8 @@ target_include_directories(libio PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/include #Note: If awssdk not found, then awssdk_link_librarires is empty... # Specify here the libraries this program depends on target_link_libraries(libio libutils - ${AWSSDK_LINK_LIBRARIES} ${LibMagic_LIBRARIES} - ${EXTERNAL_INSTALL_LOCATION}/lib/liblz4.a - ${SNAPPY_LIBRARIES} - ${EXTERNAL_INSTALL_LOCATION}/lib/libz.a - ${EXTERNAL_INSTALL_LOCATION}/lib/libzstd.a - liborc) + ${AWSSDK_LINK_LIBRARIES} + ${LibMagic_LIBRARIES} + ${ORC_LIBRARIES}) install(TARGETS libio DESTINATION bin) \ No newline at end of file diff --git a/tuplex/io/include/orc/BoolBatch.h b/tuplex/io/include/orc/BoolBatch.h index 1fb1d16e6..0e7935c89 100644 --- a/tuplex/io/include/orc/BoolBatch.h +++ b/tuplex/io/include/orc/BoolBatch.h @@ -11,6 +11,8 @@ #ifndef TUPLEX_BOOLBATCH_H #define TUPLEX_BOOLBATCH_H +#ifdef BUILD_WITH_ORC + namespace tuplex { namespace orc { /*! @@ -86,4 +88,6 @@ class BoolBatch : public OrcBatch { }} +#endif + #endif //TUPLEX_BOOLBATCH_H diff --git a/tuplex/io/include/orc/DictBatch.h b/tuplex/io/include/orc/DictBatch.h index 32003b361..d884a3286 100644 --- a/tuplex/io/include/orc/DictBatch.h +++ b/tuplex/io/include/orc/DictBatch.h @@ -11,6 +11,8 @@ #ifndef TUPLEX_DICTBATCH_H #define TUPLEX_DICTBATCH_H +#ifdef BUILD_WITH_ORC + namespace tuplex { namespace orc { /*! @@ -180,4 +182,6 @@ class DictBatch : public OrcBatch { }} +#endif + #endif //TUPLEX_DICTBATCH_H diff --git a/tuplex/io/include/orc/F64Batch.h b/tuplex/io/include/orc/F64Batch.h index 0e8b55640..c5074bf06 100644 --- a/tuplex/io/include/orc/F64Batch.h +++ b/tuplex/io/include/orc/F64Batch.h @@ -11,6 +11,8 @@ #ifndef TUPLEX_F64BATCH_H #define TUPLEX_F64BATCH_H +#ifdef BUILD_WITH_ORC + namespace tuplex { namespace orc { /*! @@ -86,4 +88,6 @@ class F64Batch : public OrcBatch { }} +#endif + #endif //TUPLEX_F64BATCH_H diff --git a/tuplex/io/include/orc/I64Batch.h b/tuplex/io/include/orc/I64Batch.h index 19345fd61..31b996052 100644 --- a/tuplex/io/include/orc/I64Batch.h +++ b/tuplex/io/include/orc/I64Batch.h @@ -11,6 +11,8 @@ #ifndef TUPLEX_I64BATCH_H #define TUPLEX_I64BATCH_H +#ifdef BUILD_WITH_ORC + namespace tuplex { namespace orc { /*! @@ -85,4 +87,6 @@ class I64Batch : public OrcBatch { }} +#endif + #endif //TUPLEX_I64BATCH_H diff --git a/tuplex/io/include/orc/ListBatch.h b/tuplex/io/include/orc/ListBatch.h index 6efae51fc..e45431573 100644 --- a/tuplex/io/include/orc/ListBatch.h +++ b/tuplex/io/include/orc/ListBatch.h @@ -11,6 +11,8 @@ #ifndef TUPLEX_LISTBATCH_H #define TUPLEX_LISTBATCH_H +#ifdef BUILD_WITH_ORC + namespace tuplex { namespace orc { class ListBatch : public OrcBatch { @@ -103,4 +105,6 @@ class ListBatch : public OrcBatch { }} +#endif + #endif //TUPLEX_LISTBATCH_H diff --git a/tuplex/io/include/orc/OrcBatch.h b/tuplex/io/include/orc/OrcBatch.h index 4735ff6f0..5e87b5efe 100644 --- a/tuplex/io/include/orc/OrcBatch.h +++ b/tuplex/io/include/orc/OrcBatch.h @@ -11,6 +11,8 @@ #ifndef TUPLEX_ORCBATCH_H #define TUPLEX_ORCBATCH_H +#ifdef BUILD_WITH_ORC + namespace tuplex { namespace orc { /*! @@ -68,4 +70,6 @@ class OrcBatch { }} +#endif + #endif //TUPLEX_ORCBATCH_H diff --git a/tuplex/io/include/orc/OrcTypes.h b/tuplex/io/include/orc/OrcTypes.h index e207844d2..86b85112a 100644 --- a/tuplex/io/include/orc/OrcTypes.h +++ b/tuplex/io/include/orc/OrcTypes.h @@ -12,6 +12,8 @@ #define TUPLEX_ORCTYPES_H #include +#ifdef BUILD_WITH_ORC + #include #include @@ -80,4 +82,6 @@ ORC_UNIQUE_PTR<::orc::Type> tuplexRowTypeToOrcType(const python::Type &rowType, }} +#endif + #endif //TUPLEX_ORCTYPES_H diff --git a/tuplex/io/include/orc/StringBatch.h b/tuplex/io/include/orc/StringBatch.h index 0da5968ae..507ea5273 100644 --- a/tuplex/io/include/orc/StringBatch.h +++ b/tuplex/io/include/orc/StringBatch.h @@ -11,6 +11,8 @@ #ifndef TUPLEX_STRINGBATCH_H #define TUPLEX_STRINGBATCH_H +#ifdef BUILD_WITH_ORC + namespace tuplex { namespace orc { /*! @@ -107,4 +109,6 @@ class StringBatch : public OrcBatch { }} +#endif + #endif //TUPLEX_STRINGBATCH_H diff --git a/tuplex/io/include/orc/TimestampBatch.h b/tuplex/io/include/orc/TimestampBatch.h index f94ff7473..e1de3d0bf 100644 --- a/tuplex/io/include/orc/TimestampBatch.h +++ b/tuplex/io/include/orc/TimestampBatch.h @@ -11,6 +11,8 @@ #ifndef TUPLEX_TIMESTAMP_H #define TUPLEX_TIMESTAMP_H +#ifdef BUILD_WITH_ORC + namespace tuplex { namespace orc { /*! @@ -96,4 +98,6 @@ class TimestampBatch : public OrcBatch { }} +#endif + #endif //TUPLEX_TIMESTAMP_H diff --git a/tuplex/io/include/orc/TupleBatch.h b/tuplex/io/include/orc/TupleBatch.h index 71621eb6f..d9547e61e 100644 --- a/tuplex/io/include/orc/TupleBatch.h +++ b/tuplex/io/include/orc/TupleBatch.h @@ -11,6 +11,8 @@ #ifndef TUPLEX_TUPLEBATCH_H #define TUPLEX_TUPLEBATCH_H +#ifdef BUILD_WITH_ORC + namespace tuplex { namespace orc { class TupleBatch : public OrcBatch { @@ -93,4 +95,6 @@ class TupleBatch : public OrcBatch { }} +#endif + #endif //TUPLEX_TUPLEBATCH_H diff --git a/tuplex/io/include/orc/VirtualInputStream.h b/tuplex/io/include/orc/VirtualInputStream.h index f42c069bb..6d4359689 100644 --- a/tuplex/io/include/orc/VirtualInputStream.h +++ b/tuplex/io/include/orc/VirtualInputStream.h @@ -11,6 +11,8 @@ #ifndef TUPLEX_VIRTUALINPUTSTREAM_H #define TUPLEX_VIRTUALINPUTSTREAM_H +#ifdef BUILD_WITH_ORC + #include namespace tuplex { namespace orc { @@ -51,4 +53,6 @@ class VirtualInputStream : public ::orc::InputStream { }} +#endif + #endif //TUPLEX_VIRTUALINPUTSTREAM_H diff --git a/tuplex/io/include/orc/VirtualOutputStream.h b/tuplex/io/include/orc/VirtualOutputStream.h index c90f98995..cf692d75e 100644 --- a/tuplex/io/include/orc/VirtualOutputStream.h +++ b/tuplex/io/include/orc/VirtualOutputStream.h @@ -11,6 +11,8 @@ #ifndef TUPLEX_VIRTUALOUTPUTSTREAM_H #define TUPLEX_VIRTUALOUTPUTSTREAM_H +#ifdef BUILD_WITH_ORC + #include namespace tuplex { namespace orc { @@ -55,4 +57,6 @@ class VirtualOutputStream : public ::orc::OutputStream { }} +#endif + #endif //TUPLEX_VIRTUALOUTPUTSTREAM_H diff --git a/tuplex/io/src/OrcTypes.cc b/tuplex/io/src/OrcTypes.cc index 39febdcff..53bd32f9a 100644 --- a/tuplex/io/src/OrcTypes.cc +++ b/tuplex/io/src/OrcTypes.cc @@ -7,6 +7,7 @@ // Created by Ben Givertz first on 8/31/2021 // // License: Apache 2.0 // //--------------------------------------------------------------------------------------------------------------------// +#ifdef BUILD_WITH_ORC #include @@ -139,3 +140,4 @@ python::Type orcTypeToTuplex(const ::orc::Type &type, bool hasNull) { } }} +#endif \ No newline at end of file diff --git a/tuplex/test/io/OrcReadTest.cc b/tuplex/test/io/OrcReadTest.cc index c3fc10a47..d37a16bee 100644 --- a/tuplex/test/io/OrcReadTest.cc +++ b/tuplex/test/io/OrcReadTest.cc @@ -1,6 +1,8 @@ #include #include +#ifdef BUILD_WITH_ORC + void testReadInput(const std::vector &rows); TEST(ORC, ReadOption) { @@ -166,4 +168,6 @@ void testReadInput(const std::vector &rows) { for (int i = 0; i < rows.size(); ++i) { EXPECT_EQ(rows.at(i).toPythonString(), results.at(i).toPythonString()); } -} \ No newline at end of file +} + +#endif \ No newline at end of file diff --git a/tuplex/test/io/OrcTypesTest.cc b/tuplex/test/io/OrcTypesTest.cc index a8c009712..9379d67a3 100644 --- a/tuplex/test/io/OrcTypesTest.cc +++ b/tuplex/test/io/OrcTypesTest.cc @@ -1,4 +1,8 @@ + #include + +#ifdef BUILD_WITH_ORC + #include // Tests for ORCToTuplex @@ -176,4 +180,6 @@ TEST(ORC, TuplexToORCPrimitive) { auto stringType = tuplexRowTypeToOrcType(python::Type::STRING); EXPECT_EQ(orc::STRING, stringType->getKind()); -} \ No newline at end of file +} + +#endif \ No newline at end of file diff --git a/tuplex/test/io/OrcWriteTest.cc b/tuplex/test/io/OrcWriteTest.cc index ee043085e..ba3f37b19 100644 --- a/tuplex/test/io/OrcWriteTest.cc +++ b/tuplex/test/io/OrcWriteTest.cc @@ -1,6 +1,10 @@ #include #include #include + + +#ifdef BUILD_WITH_ORC + #include #include #include "../core/TestUtils.h" @@ -371,3 +375,4 @@ TEST(ORC, RowToStr) { auto row8 = Row(1, option::none, 2); EXPECT_EQ(R"({"": 1, "": null, "": 2})", rowToORCString(row8)); } +#endif \ No newline at end of file diff --git a/tuplex/utils/include/Utils.h b/tuplex/utils/include/Utils.h index 5fd9026cc..95ddf4022 100644 --- a/tuplex/utils/include/Utils.h +++ b/tuplex/utils/include/Utils.h @@ -11,6 +11,10 @@ #ifndef TUPLEX_UTILS_H #define TUPLEX_UTILS_H +// standard message strings +#define MISSING_ORC_MESSAGE ("Tuplex was not built with ORC support. To build Tuplex with ORC, set BUILD_WITH_ORC=ON.") + + #include "Base.h" #include "StringUtils.h" #include "StatUtils.h"