diff --git a/.gitignore b/.gitignore index fc895bf44..20506c1f7 100644 --- a/.gitignore +++ b/.gitignore @@ -94,3 +94,6 @@ wheelhouse/ *.zip tuplex/other/tplxlam.zip fixed_wheels/ + +tuplex/.vscode/ +.vscode/ \ No newline at end of file diff --git a/scripts/docker/benchmark/install_tuplex_reqs.sh b/scripts/docker/benchmark/install_tuplex_reqs.sh index 535d80f76..01f14127e 100644 --- a/scripts/docker/benchmark/install_tuplex_reqs.sh +++ b/scripts/docker/benchmark/install_tuplex_reqs.sh @@ -150,7 +150,7 @@ make -j4 && make install && ldconfig && popd # install python packages for tuplex (needs cloudpickle to compile, numpy to run certain tests) -pip3 install cloudpickle numpy +pip3 install 'cloudpickle<2.0.0' numpy # protobuf 3.12 cd /tmp && diff --git a/scripts/docker/ci/install_centos.sh b/scripts/docker/ci/install_centos.sh index 65d7e9d8e..9d0373249 100644 --- a/scripts/docker/ci/install_centos.sh +++ b/scripts/docker/ci/install_centos.sh @@ -31,10 +31,10 @@ bash install_boost.sh /opt/python/cp39-cp39/bin/python3.9 /opt/boost/python3.9 bash install_boost.sh /opt/python/cp310-cp310/bin/python3.10 /opt/boost/python3.10 # matrix? -python3.7 -m pip install cloudpickle numpy -python3.8 -m pip install cloudpickle numpy -python3.9 -m pip install cloudpickle numpy -python3.10 -m pip install cloudpickle numpy +python3.7 -m pip install 'cloudpickle<2.0.0' numpy +python3.8 -m pip install 'cloudpickle<2.0.0' numpy +python3.9 -m pip install 'cloudpickle<2.0.0' numpy +python3.10 -m pip install 'cloudpickle<2.0.0' numpy # tuplex requirements bash install_tuplex_reqs.sh \ No newline at end of file diff --git a/scripts/docker/ci/install_lambda_python.sh b/scripts/docker/ci/install_lambda_python.sh index 86b0eab59..20c969407 100644 --- a/scripts/docker/ci/install_lambda_python.sh +++ b/scripts/docker/ci/install_lambda_python.sh @@ -43,4 +43,4 @@ set -ex && cd /tmp && wget https://www.python.org/ftp/python/${PYTHON3_VERSION}/ # install cloudpickle numpy for Lambda python export LD_LIBRARY_PATH=/opt/lambda-python/lib:$LD_LIBRARY_PATH -/opt/lambda-python/bin/python${PYTHON3_MAJMIN} -m pip install cloudpickle numpy tqdm +/opt/lambda-python/bin/python${PYTHON3_MAJMIN} -m pip install 'cloudpickle<2.0.0' numpy tqdm diff --git a/scripts/docker/ubuntu1804/build_and_test.sh b/scripts/docker/ubuntu1804/build_and_test.sh index 1feb83e24..818dbee58 100755 --- a/scripts/docker/ubuntu1804/build_and_test.sh +++ b/scripts/docker/ubuntu1804/build_and_test.sh @@ -1,7 +1,7 @@ #!/usr/bin/env bash # tests using fallback solution require cloudpickle, so install it -pip3 install cloudpickle numpy +pip3 install 'cloudpickle<2.0.0' numpy cd /code && mkdir -p build && diff --git a/scripts/macos/setup-macos.sh b/scripts/macos/setup-macos.sh index 33092b285..c890610dc 100755 --- a/scripts/macos/setup-macos.sh +++ b/scripts/macos/setup-macos.sh @@ -91,7 +91,7 @@ for ((i=0; i<${#PY_VERSIONS[@]}; ++i)); do # support. $PIP_CMD install -q numpy=="$NUMPY_VERSION" cython==0.29.26 # Install wheel to avoid the error "invalid command 'bdist_wheel'". - $PIP_CMD install -q wheel cloudpickle delocate + $PIP_CMD install -q wheel 'cloudpickle<2.0.0' delocate done # install boost python for this script diff --git a/scripts/ubuntu1804/install_reqs.sh b/scripts/ubuntu1804/install_reqs.sh index 1bb12b720..c192908bd 100644 --- a/scripts/ubuntu1804/install_reqs.sh +++ b/scripts/ubuntu1804/install_reqs.sh @@ -165,7 +165,7 @@ make -j4 && make install && ldconfig && pushd # install python packages for tuplex (needs cloudpickle to compile, numpy to run certain tests) -pip3 install cloudpickle numpy +pip3 install 'cloudpickle<2.0.0' numpy # setup bash aliases echo "alias antlr='java -jar /opt/lib/antlr-4.8-complete.jar'" >>"$HOME/.bashrc" diff --git a/scripts/ubuntu2004/install_reqs.sh b/scripts/ubuntu2004/install_reqs.sh index 94d82d123..217ac79f3 100644 --- a/scripts/ubuntu2004/install_reqs.sh +++ b/scripts/ubuntu2004/install_reqs.sh @@ -159,7 +159,7 @@ make -j4 && make install && ldconfig && popd # install python packages for tuplex (needs cloudpickle to compile, numpy to run certain tests) -pip3 install cloudpickle numpy +pip3 install 'cloudpickle<2.0.0' numpy # protobuf 3.12 cd /tmp && diff --git a/setup.py b/setup.py index a769d333d..a1c9d44ed 100644 --- a/setup.py +++ b/setup.py @@ -62,12 +62,13 @@ def in_google_colab(): 'jupyter<7.0', 'nbformat<7.0', 'prompt_toolkit>=2.0.7', -'pytest>=5.3.2', +'pytest>=5.3.2' ] # Also requires to install MongoDB webui_dependencies = [ 'Flask>=2.0.2', + 'Werkzeug<2.2.0', 'gunicorn', 'eventlet==0.30.0', # newer versions of eventlet have a bug under MacOS 'flask-socketio', @@ -98,7 +99,7 @@ def in_google_colab(): 'astor', 'prompt_toolkit', 'jedi', - 'cloudpickle>=0.6.1', + 'cloudpickle>=0.6.1,<2.0.0', 'PyYAML>=3.13', 'psutil', 'pymongo', @@ -118,7 +119,7 @@ def in_google_colab(): 'astor', 'prompt_toolkit', 'jedi', - 'cloudpickle>=0.6.1', + 'cloudpickle>=0.6.1,<2.0.0', 'PyYAML>=3.13', 'psutil', 'pymongo', @@ -597,7 +598,7 @@ def tplx_package_data(): # logic and declaration, and simpler if you include description/version in a file. setup(name="tuplex", python_requires='>=3.7.0', - version="0.3.3rc0", + version="0.3.3", author="Leonhard Spiegelberg", author_email="tuplex@cs.brown.edu", description="Tuplex is a novel big data analytics framework incorporating a Python UDF compiler based on LLVM " diff --git a/tuplex/codegen/include/FunctionRegistry.h b/tuplex/codegen/include/FunctionRegistry.h index 01db27e30..ba38f2f86 100644 --- a/tuplex/codegen/include/FunctionRegistry.h +++ b/tuplex/codegen/include/FunctionRegistry.h @@ -189,6 +189,18 @@ namespace tuplex { const python::Type &retType, const std::vector &args); + SerializableValue createMathIsNanCall(llvm::IRBuilder<>& builder, const python::Type &argsType, + const python::Type &retType, + const std::vector &args); + + SerializableValue createMathIsInfCall(llvm::IRBuilder<>& builder, const python::Type &argsType, + const python::Type &retType, + const std::vector &args); + + SerializableValue createMathIsCloseCall(tuplex::codegen::LambdaFunctionBuilder &lfb, + llvm::IRBuilder<>& builder, const python::Type &argsType, + const std::vector &args); + // math module functions SerializableValue createMathCeilFloorCall(LambdaFunctionBuilder& lfb, llvm::IRBuilder<>& builder, const std::string& qual_name, const SerializableValue& arg); diff --git a/tuplex/codegen/include/LLVMEnvironment.h b/tuplex/codegen/include/LLVMEnvironment.h index e401ad8ce..6ed5cad5e 100644 --- a/tuplex/codegen/include/LLVMEnvironment.h +++ b/tuplex/codegen/include/LLVMEnvironment.h @@ -553,6 +553,13 @@ namespace tuplex { */ void printValue(llvm::IRBuilder<>& builder, llvm::Value*, std::string msg=""); + /*! + * debug print any llvm value as its corresponding hex value + * @param builder + * @param val + */ + void printHexValue(llvm::IRBuilder<> &builder, llvm::Value* val, std::string msg=""); + llvm::Type* pythonToLLVMType(const python::Type &t); /*! diff --git a/tuplex/codegen/src/FunctionRegistry.cc b/tuplex/codegen/src/FunctionRegistry.cc index 78bdb37bd..ab4ba1c5d 100644 --- a/tuplex/codegen/src/FunctionRegistry.cc +++ b/tuplex/codegen/src/FunctionRegistry.cc @@ -966,6 +966,353 @@ namespace tuplex { return SerializableValue(resVal, resSize); } + codegen::SerializableValue FunctionRegistry::createMathIsNanCall(llvm::IRBuilder<>& builder, const python::Type &argsType, + const python::Type &retType, + const std::vector &args) { + using namespace llvm; + auto& context = builder.GetInsertBlock()->getContext(); + assert(args.size() >= 1); + auto val = args.front(); + auto type = argsType.parameters().front(); + + if (python::Type::F64 == type) { + /* Note that there are multiple possible ways to represent NAN + + A NAN must be a float/double, where the sign bit is 0 or 1, all exponent bits are set to 1, + and the mantissa is anything except all 0 bits (because that's how infinity is defined) + + According to this: https://www.geeksforgeeks.org/floating-point-representation-basics/ + a quiet NAN (QNAN) is represented with only the most significant bit of the mantissa set to 1. + a signaling NAN (SNAN) has only the two most significant bits of the mantissa set to 1. + (all other bits are set to 0) + + QNAN = 0x7FF8000000000000 + SNAN = 0x7FFC000000000000 + */ + llvm::Value* i64Val = builder.CreateBitCast(val.val, llvm::Type::getInt64Ty(context)); + /* The below instructions shift the bits of the input value right by 32 bits, + and then compute the result & (bitwise AND) 0x7fffffff = 2147483647. + Effectively: (x >> 32) & 0x7fffffff + + Note that 0x7fffffff has the 31 least significant bits set to 1, and the + most significant bit set to 0. + If the input value was QNAN, the result would be 0x7FF80000. + If the input value was SNAN, the result would be 0x7FFC0000. + */ + auto shiftedVal = builder.CreateLShr(i64Val, 32); + auto i32Shift = builder.CreateTrunc(shiftedVal, llvm::Type::getInt32Ty(context)); + auto andRes = builder.CreateAnd(i32Shift, 2147483647); + /* The next instructions check if the input value is not equal to 0. + Then, the result of this is added to the result of (x >> 32) & 0x7fffffff. + Finally, this sum is compared to 0x7ff00000 = 2146435072; if the sum is greater than + 0x7ff00000, isnan returns true, otherwise, false. + */ + auto i32Val = builder.CreateTrunc(i64Val, llvm::Type::getInt32Ty(context)); + auto cmpRes = builder.CreateICmpNE(i32Val, ConstantInt::get(i32Val->getType(), 0)); + auto i32cmp = builder.CreateZExt(cmpRes, llvm::Type::getInt32Ty(context)); + auto added = builder.CreateNUWAdd(andRes, i32cmp); + auto addCmp = builder.CreateICmpUGT(added, ConstantInt::get(i32Val->getType(), 2146435072)); + + auto resVal = _env.upcastToBoolean(builder, addCmp); + auto resSize = _env.i64Const(sizeof(int64_t)); + + return SerializableValue(resVal, resSize); + } else { + // only other valid input types are integer and boolean + assert(python::Type::BOOLEAN == type || python::Type::I64 == type); + + return SerializableValue(_env.boolConst(false), _env.i64Const(sizeof(int64_t))); + } + } + + codegen::SerializableValue FunctionRegistry::createMathIsInfCall(llvm::IRBuilder<>& builder, const python::Type &argsType, + const python::Type &retType, + const std::vector &args) { + using namespace llvm; + auto& context = builder.GetInsertBlock()->getContext(); + assert(args.size() >= 1); + auto val = args.front(); + auto type = argsType.parameters().front(); + + if (python::Type::F64 == type) { + // compare input to positive and negative infinity (check if equal) + auto posCmp = builder.CreateFCmpOEQ(val.val, _env.f64Const(INFINITY)); + auto negCmp = builder.CreateFCmpOEQ(val.val, _env.f64Const(-INFINITY)); + + // if the input is equal to either positive or negative infinity, this 'or' instruction should return 1 + auto orRes = builder.CreateOr(negCmp, posCmp); + + auto resVal = _env.upcastToBoolean(builder, orRes); + auto resSize = _env.i64Const(sizeof(int64_t)); + + return SerializableValue(resVal, resSize); + } else { + // only other valid input types are integer and boolean + assert(python::Type::BOOLEAN == type || python::Type::I64 == type); + + return SerializableValue(_env.boolConst(false), _env.i64Const(sizeof(int64_t))); + } + } + + codegen::SerializableValue FunctionRegistry::createMathIsCloseCall(tuplex::codegen::LambdaFunctionBuilder &lfb, + llvm::IRBuilder<>& builder, const python::Type &argsType, + const std::vector &args) { + assert(argsType.isTupleType()); + assert(args.size() == argsType.parameters().size()); + assert(args.size() >= 2); + + using namespace llvm; + auto& context = builder.GetInsertBlock()->getContext(); + Module *M = builder.GetInsertBlock()->getModule(); + std::vector input_types = argsType.parameters(); + + auto x_val = args[0].val; + auto y_val = args[1].val; + llvm::Value* rel_tol_val = _env.f64Const(1e-09); + llvm::Value* abs_tol_val = _env.i64Const(0); + auto x_ty = input_types[0]; + auto y_ty = input_types[1]; + python::Type rel_ty = python::Type::F64; + python::Type abs_ty = python::Type::I64; + + switch(args.size()) { + case 2: + // rel_tol and abs_tol not specified; stick with default values + break; + case 3: + // assume that the third argument is rel_tol + rel_tol_val = args[2].val; + rel_ty = input_types[2]; + break; + default: + assert(args.size() == 4); + // assume that the third argument is rel_tol and the fourth argument is abs_tol + // note: this doesn't support the case where abs_tol is specified but rel_tol isn't + rel_tol_val = args[2].val; + abs_tol_val = args[3].val; + rel_ty = input_types[2]; + abs_ty = input_types[3]; + } + + // error check rel_tol and abs_tol (both must be at least 0) + llvm::Value* rel_tol_check; + if (rel_ty == python::Type::BOOLEAN || rel_ty == python::Type::I64) { + auto upcast_rel = _env.upCast(builder, rel_tol_val, _env.i64Type()); + rel_tol_check = builder.CreateICmpSLT(upcast_rel, _env.i64Const(0)); + } else { + assert(rel_ty == python::Type::F64); + rel_tol_check = builder.CreateFCmpOLT(rel_tol_val, _env.f64Const(0)); + } + + llvm::Value* abs_tol_check; + if (abs_ty == python::Type::BOOLEAN || abs_ty == python::Type::I64) { + auto upcast_abs = _env.upCast(builder, abs_tol_val, _env.i64Type()); + abs_tol_check = builder.CreateICmpSLT(upcast_abs, _env.i64Const(0)); + } else { + assert(abs_ty == python::Type::F64); + abs_tol_check = builder.CreateFCmpOLT(abs_tol_val, _env.f64Const(0)); + } + + // if either rel_tol or abs_tol are < 0, throw exception + auto below_zero = builder.CreateOr(rel_tol_check, abs_tol_check); + lfb.addException(builder, ExceptionCode::VALUEERROR, below_zero); + + // check x and y types - bools and ints can be optimized! + if (x_ty == python::Type::BOOLEAN && y_ty == python::Type::BOOLEAN) { + auto xor_xy = builder.CreateXor(x_val, y_val); + // if rel_tol or abs_tol is a bool or int, use ICmp instead of FCmp + llvm::Value* rel_cmp; + if (rel_ty == python::Type::BOOLEAN || rel_ty == python::Type::I64) { + auto rel_tol = _env.upCast(builder, rel_tol_val, _env.i64Type()); + rel_cmp = builder.CreateICmpUGE(rel_tol, _env.i64Const(1)); + } else { + assert(rel_ty == python::Type::F64); + rel_cmp = builder.CreateFCmpOGE(rel_tol_val, _env.f64Const(1)); + } + + llvm::Value* abs_cmp; + if (abs_ty == python::Type::BOOLEAN || abs_ty == python::Type::I64) { + auto abs_tol = _env.upCast(builder, abs_tol_val, _env.i64Type()); + abs_cmp = builder.CreateICmpUGE(abs_tol, _env.i64Const(1)); + } else { + assert(abs_ty == python::Type::F64); + abs_cmp = builder.CreateFCmpOGE(abs_tol_val, _env.f64Const(1)); + } + + auto rel_or_abs = builder.CreateOr(rel_cmp, abs_cmp); + auto eq_check = builder.CreateXor(xor_xy, _env.boolConst(true)); + auto bool_val = _env.upcastToBoolean(builder, rel_or_abs); + auto or_res = builder.CreateOr(bool_val, eq_check); + + auto resVal = _env.upcastToBoolean(builder, or_res); + auto resSize = _env.i64Const(sizeof(int64_t)); + + return SerializableValue(resVal, resSize); + } else if (x_ty != python::Type::F64 && y_ty != python::Type::F64) { + // cast x/y to integers + auto x = _env.upCast(builder, x_val, _env.i64Type()); + auto y = _env.upCast(builder, y_val, _env.i64Type()); + auto rel_tol = _env.upCast(builder, rel_tol_val, _env.doubleType()); + + auto cur_block = builder.GetInsertBlock(); + assert(cur_block); + + // create new blocks for each case + BasicBlock *bb_below_one = BasicBlock::Create(builder.getContext(), "opt_lt_one", builder.GetInsertBlock()->getParent()); + BasicBlock *bb_standard = BasicBlock::Create(builder.getContext(), "opt_standard", builder.GetInsertBlock()->getParent()); + BasicBlock *bb_done = BasicBlock::Create(builder.getContext(), "cmp_done", builder.GetInsertBlock()->getParent()); + + // allocate space for return value + auto val = _env.CreateFirstBlockAlloca(builder, _env.getBooleanType()); + + // first block comparison (x ?== y) + auto xy_eq = builder.CreateICmpEQ(x, y); + auto eq_res = _env.upcastToBoolean(builder, xy_eq); + builder.CreateStore(eq_res, val); + builder.CreateCondBr(xy_eq, bb_done, bb_below_one); + + // check if rel_tol * max_val < 0 and abs_tol < 0 (should return false) + builder.SetInsertPoint(bb_below_one); + auto x_d = builder.CreateSIToFP(x, _env.doubleType()); + auto y_d = builder.CreateSIToFP(y, _env.doubleType()); + auto x_abs = llvm::createUnaryIntrinsic(builder, llvm::Intrinsic::ID::fabs, x_d); + auto y_abs = llvm::createUnaryIntrinsic(builder, llvm::Intrinsic::ID::fabs, y_d); + auto xy_cmp = builder.CreateFCmpOLT(x_abs, y_abs); + auto max_val = builder.CreateSelect(xy_cmp, y_abs, x_abs); + auto relxmax = builder.CreateFMul(max_val, rel_tol_val); + auto relxmax_cmp = builder.CreateFCmpOLT(relxmax, _env.f64Const(1)); + + // if abs_tol is a bool or int, use int instructions + llvm::Value* abs_tol = abs_tol_val; + llvm::Value* abs_cmp; + if (abs_ty == python::Type::BOOLEAN || abs_ty == python::Type::I64) { + abs_tol = _env.upCast(builder, abs_tol_val, _env.i64Type()); + abs_cmp = builder.CreateICmpULT(abs_tol, _env.i64Const(1)); + } else { + assert(abs_ty == python::Type::F64); + // so we don't leave abs_tol uninitialized + abs_cmp = builder.CreateFCmpOLT(abs_tol, _env.f64Const(1)); + } + + auto l1_res = builder.CreateAnd(abs_cmp, relxmax_cmp); + builder.CreateStore(_env.boolConst(false), val); // should overwrite value from first block + builder.CreateCondBr(l1_res, bb_done, bb_standard); + + // standard check for isclose + builder.SetInsertPoint(bb_standard); + auto diff = builder.CreateFSub(x_d, y_d); + auto LHS = llvm::createUnaryIntrinsic(builder, llvm::Intrinsic::ID::fabs, diff); + + llvm::Value* d_abs_tol = abs_tol; + if (abs_ty == python::Type::BOOLEAN || abs_ty == python::Type::I64) { + d_abs_tol = _env.upCast(builder, abs_tol, _env.doubleType()); + } else { + assert(abs_ty == python::Type::F64); + } + + auto RHS_cmp = builder.CreateFCmpOLT(relxmax, d_abs_tol); + auto RHS = builder.CreateSelect(RHS_cmp, d_abs_tol, relxmax); + auto standard_cmp = builder.CreateFCmpOLE(LHS, RHS); + auto standard_res = _env.upcastToBoolean(builder, standard_cmp); + builder.CreateStore(standard_res, val); // should overwrite value from bb_below_one + builder.CreateBr(bb_done); + + // return value stored in val + builder.SetInsertPoint(bb_done); + lfb.setLastBlock(bb_done); + auto resVal = _env.upcastToBoolean(builder, builder.CreateLoad(val)); + auto resSize = _env.i64Const(sizeof(int64_t)); + + return SerializableValue(resVal, resSize); + } else { + // case where x or y is a float + // if either is a float, can't optimize since floats can be arbitrarily close to any other value + // cast all values to doubles for comparison + auto x = _env.upCast(builder, x_val, _env.doubleType()); + auto y = _env.upCast(builder, y_val, _env.doubleType()); + auto rel_tol = _env.upCast(builder, rel_tol_val, _env.doubleType()); + auto abs_tol = _env.upCast(builder, abs_tol_val, _env.doubleType()); + + auto cur_block = builder.GetInsertBlock(); + assert(cur_block); + + // create new blocks for each case + BasicBlock *bb_nany = BasicBlock::Create(builder.getContext(), "cmp_y_nan", builder.GetInsertBlock()->getParent()); + BasicBlock *bb_isinf = BasicBlock::Create(builder.getContext(), "cmp_inf", builder.GetInsertBlock()->getParent()); + BasicBlock *bb_infres = BasicBlock::Create(builder.getContext(), "opt_isinf", builder.GetInsertBlock()->getParent()); + BasicBlock *bb_standard = BasicBlock::Create(builder.getContext(), "opt_standard", builder.GetInsertBlock()->getParent()); + BasicBlock *bb_done = BasicBlock::Create(builder.getContext(), "cmp_done", builder.GetInsertBlock()->getParent()); + + // allocate space for return value + auto val = _env.CreateFirstBlockAlloca(builder, _env.getBooleanType()); + + // first block + // this block checks if x is NAN - in which case isclose returns 0 (jump to bb_done) + const std::vector isnan_argx{SerializableValue(x, _env.i64Const(sizeof(int64_t)))}; + auto is_x_nan = FunctionRegistry::createMathIsNanCall(builder, python::Type::propagateToTupleType(python::Type::F64), python::Type::BOOLEAN, isnan_argx); + auto x_nan = builder.CreateZExtOrTrunc(is_x_nan.val, _env.i1Type()); + builder.CreateStore(_env.boolConst(false), val); + builder.CreateCondBr(x_nan, bb_done, bb_nany); + + // bb_nany + // this block checks if y is NAN - in which case isclose returns 0 (jump to bb_done) + builder.SetInsertPoint(bb_nany); + const std::vector isnan_argy{SerializableValue(y, _env.i64Const(sizeof(int64_t)))}; + auto is_y_nan = FunctionRegistry::createMathIsNanCall(builder, python::Type::propagateToTupleType(python::Type::F64), python::Type::BOOLEAN, isnan_argy); + auto y_nan = builder.CreateZExtOrTrunc(is_y_nan.val, _env.i1Type()); + builder.CreateStore(_env.boolConst(false), val); // overwrite value from first block + builder.CreateCondBr(y_nan, bb_done, bb_isinf); + + // bb_isinf + // this block checks if x or y is positive infinity or negative infinity + builder.SetInsertPoint(bb_isinf); + auto x_pinf = builder.CreateFCmpOEQ(x, ConstantFP::get(llvm::Type::getDoubleTy(context), INFINITY)); + auto y_pinf = builder.CreateFCmpOEQ(y, ConstantFP::get(llvm::Type::getDoubleTy(context), INFINITY)); + auto either_pinf = builder.CreateOr(x_pinf, y_pinf); + auto x_ninf = builder.CreateFCmpOEQ(x, ConstantFP::get(llvm::Type::getDoubleTy(context), -INFINITY)); + auto check_xninf = builder.CreateOr(x_ninf, either_pinf); + auto y_ninf = builder.CreateFCmpOEQ(y, ConstantFP::get(llvm::Type::getDoubleTy(context), -INFINITY)); + auto check_yninf = builder.CreateOr(y_ninf, check_xninf); + builder.CreateCondBr(check_yninf, bb_infres, bb_standard); + + // bb_infres + // if either x or y is +/- infinity, need to check that x == y + // so if x == y is true, isclose returns true, otherwise false + builder.SetInsertPoint(bb_infres); + auto infres = builder.CreateFCmpOEQ(x, y); + auto bool_res = _env.upcastToBoolean(builder, infres); + builder.CreateStore(bool_res, val); // overwrite value from bb_nany + builder.CreateBr(bb_done); + + // bb_standard + // this block computes the result of the standard inequality that isclose uses: + // |x - y| <= max([rel_tol * max(|x|, |y|)], abs_tol) + builder.SetInsertPoint(bb_standard); + auto x_abs = llvm::createUnaryIntrinsic(builder, llvm::Intrinsic::ID::fabs, x); + auto y_abs = llvm::createUnaryIntrinsic(builder, llvm::Intrinsic::ID::fabs, y); + auto xy_cmp = builder.CreateFCmpOLT(x_abs, y_abs); + auto xy_max = builder.CreateSelect(xy_cmp, y_abs, x_abs); + auto diff = builder.CreateFSub(x, y); + auto LHS = llvm::createUnaryIntrinsic(builder, llvm::Intrinsic::ID::fabs, diff); + auto relxmax = builder.CreateFMul(xy_max, rel_tol); + auto RHS_cmp = builder.CreateFCmpOLT(relxmax, abs_tol); + auto RHS = builder.CreateSelect(RHS_cmp, abs_tol, relxmax); + auto standard_cmp = builder.CreateFCmpOLE(LHS, RHS); + auto standard_res = _env.upcastToBoolean(builder, standard_cmp); + builder.CreateStore(standard_res, val); // overwrite value from bb_infres + builder.CreateBr(bb_done); + + // bb_done + builder.SetInsertPoint(bb_done); + lfb.setLastBlock(bb_done); + // return the value that was stored in val + auto resVal = builder.CreateLoad(val); + auto resSize = _env.i64Const(sizeof(int64_t)); + + return SerializableValue(resVal, resSize); + } + } + codegen::SerializableValue createMathCosCall(llvm::IRBuilder<>& builder, const python::Type &argsType, const python::Type &retType, const std::vector &args) { @@ -1193,7 +1540,34 @@ namespace tuplex { if (symbol == "math.degrees") return createMathToDegreesCall(builder, argsType, retType, args); + + if (symbol == "math.isnan") + return createMathIsNanCall(builder, argsType, retType, args); + + if (symbol == "math.isinf") + return createMathIsInfCall(builder, argsType, retType, args); + + if (symbol == "math.isclose") { + if (args.size() != 2 && args.size() != 3 && args.size() != 4) { + std::string err = "math.isclose needs 2, 3, or 4 args; got " + std::to_string(args.size()) + " args\n"; + throw std::runtime_error(err); + } + + assert(argsType.isTupleType()); + assert(args.size() == argsType.parameters().size()); + + // check all argument types + std::vector input_types = argsType.parameters(); + int i = 1; + for (const auto& type : input_types) { + if (type != python::Type::BOOLEAN && type != python::Type::I64 && type != python::Type::F64) { + throw std::runtime_error("argument " + std::to_string(i) + " is of type " + type.desc() + " but math.isclose expected a float, integer, or boolean"); + } + i++; + } + return createMathIsCloseCall(lfb, builder, argsType, args); + } // re module if (symbol == "re.search") diff --git a/tuplex/codegen/src/LLVMEnvironment.cc b/tuplex/codegen/src/LLVMEnvironment.cc index f9e76ca02..6d035acb9 100644 --- a/tuplex/codegen/src/LLVMEnvironment.cc +++ b/tuplex/codegen/src/LLVMEnvironment.cc @@ -1123,7 +1123,37 @@ namespace tuplex { } else if (val->getType() == Type::getInt64Ty(_context)) { sconst = builder.CreateGlobalStringPtr(msg + " [i64] : %lu\n"); } else if (val->getType() == Type::getDoubleTy(_context)) { - sconst = builder.CreateGlobalStringPtr(msg + " [f64] : %f\n"); + sconst = builder.CreateGlobalStringPtr(msg + " [f64] : %.12f\n"); + } else if (val->getType() == Type::getInt8PtrTy(_context, 0)) { + sconst = builder.CreateGlobalStringPtr(msg + " [i8*] : [%p] %s\n"); + } + auto fmt = builder.CreatePointerCast(sconst, llvm::Type::getInt8PtrTy(_context, 0)); + if (val->getType() != Type::getInt8PtrTy(_context, 0)) + builder.CreateCall(printf_F, {fmt, casted_val}); + else + builder.CreateCall(printf_F, {fmt, casted_val, casted_val}); + } + + void LLVMEnvironment::printHexValue(llvm::IRBuilder<> &builder, llvm::Value *val, std::string msg) { + using namespace llvm; + + auto printf_F = printf_prototype(_context, _module.get()); + llvm::Value *sconst = builder.CreateGlobalStringPtr("unknown type: ??"); + + llvm::Value *casted_val = val; + // check type of value + if (val->getType() == Type::getInt1Ty(_context)) { + sconst = builder.CreateGlobalStringPtr(msg + " [i1] : 0x%" PRIx8 "\n"); + casted_val = builder.CreateSExt(val, i8Type()); + } else if (val->getType() == Type::getInt8Ty(_context)) { + sconst = builder.CreateGlobalStringPtr(msg + " [i8] : 0x%" PRIx8 "\n"); + } else if (val->getType() == Type::getInt32Ty(_context)) { + sconst = builder.CreateGlobalStringPtr(msg + " [i32] : 0x%" PRIx32 "\n"); + } else if (val->getType() == Type::getInt64Ty(_context)) { + sconst = builder.CreateGlobalStringPtr(msg + " [i64] : 0x%" PRIx64 "\n"); + } else if (val->getType() == Type::getDoubleTy(_context)) { + sconst = builder.CreateGlobalStringPtr(msg + " [f64] : 0x%" PRIx64 "\n"); + casted_val = builder.CreateBitCast(val, i64Type()); } else if (val->getType() == Type::getInt8PtrTy(_context, 0)) { sconst = builder.CreateGlobalStringPtr(msg + " [i8*] : [%p] %s\n"); } diff --git a/tuplex/codegen/src/StandardModules.cc b/tuplex/codegen/src/StandardModules.cc index 322d9fb61..c12587467 100644 --- a/tuplex/codegen/src/StandardModules.cc +++ b/tuplex/codegen/src/StandardModules.cc @@ -56,6 +56,33 @@ namespace tuplex { m->addAttribute(make_shared(name, name, python::Type::makeFunctionType(python::Type::propagateToTupleType(type), python::Type::F64), SymbolType::FUNCTION)); } } + + // math.isnan + auto isnanSym = make_shared("isnan", "isnan", python::Type::makeFunctionType(python::Type::propagateToTupleType(python::Type::F64), python::Type::BOOLEAN), SymbolType::FUNCTION); + isnanSym->addTypeIfNotExists(python::Type::makeFunctionType(python::Type::propagateToTupleType(python::Type::I64), python::Type::BOOLEAN)); + isnanSym->addTypeIfNotExists(python::Type::makeFunctionType(python::Type::propagateToTupleType(python::Type::BOOLEAN), python::Type::BOOLEAN)); + m->addAttribute(isnanSym); + + // math.isinf + auto isinfSym = make_shared("isinf", "isinf", python::Type::makeFunctionType(python::Type::propagateToTupleType(python::Type::F64), python::Type::BOOLEAN), SymbolType::FUNCTION); + isinfSym->addTypeIfNotExists(python::Type::makeFunctionType(python::Type::propagateToTupleType(python::Type::I64), python::Type::BOOLEAN)); + isinfSym->addTypeIfNotExists(python::Type::makeFunctionType(python::Type::propagateToTupleType(python::Type::BOOLEAN), python::Type::BOOLEAN)); + m->addAttribute(isinfSym); + + // math.isclose + // typing is: + // ({f64, i64, bool}, {f64, i64, bool}[, rel_tol={f64, i64, bool}, abs_tol={f64, i64, bool}]) -> bool + auto iscloseSym = make_shared("isclose", [](const python::Type ¶ms) { + assert(params.isTupleType()); + if (params.parameters().size() != 2 && params.parameters().size() != 3 && params.parameters().size() != 4) { + throw std::runtime_error("isclose needs 2, 3, or 4 arguments"); + return python::Type::UNKNOWN; + } + return python::Type::makeFunctionType(params, python::Type::BOOLEAN); + }); + // check that iscloseSym is function symbol + assert(iscloseSym->symbolType == SymbolType::FUNCTION); + m->addAttribute(iscloseSym); // math.ceil/math.floor for(const auto& name : vector{"ceil", "floor"}) { @@ -74,7 +101,7 @@ namespace tuplex { // inf/nan since version 3.5 // tau since version 3.6 // use here C constants. Note, python might have depending on version different constants. - m->addAttribute(Symbol::makeConstant("nan", Field(NAN))); + m->addAttribute(Symbol::makeConstant("nan", Field(D_NAN))); m->addAttribute(Symbol::makeConstant("inf", Field(INFINITY))); m->addAttribute(Symbol::makeConstant("pi", Field(Py_MATH_PI))); // C constant M_PI m->addAttribute(Symbol::makeConstant("e", Field(Py_MATH_E))); // C constant M_E diff --git a/tuplex/python/setup.py b/tuplex/python/setup.py index f76592266..9fdf71798 100644 --- a/tuplex/python/setup.py +++ b/tuplex/python/setup.py @@ -45,6 +45,7 @@ 'nbconvert<7.0', 'jupyter<7.0', 'nbformat<7.0', + 'Werkzeug<2.0.0', 'attrs>=19.2.0', 'dill>=0.2.7.1', 'pluggy>=0.6.0, <1.0.0', @@ -56,7 +57,7 @@ 'astor', 'prompt_toolkit>=2.0.7', 'jedi>=0.13.2', - 'cloudpickle>=0.6.1', + 'cloudpickle>=0.6.1,<2.0.0', 'PyYAML>=3.13', 'psutil', 'pymongo', diff --git a/tuplex/python/src/PythonDataSet.cc b/tuplex/python/src/PythonDataSet.cc index 96b550431..c6c69e76b 100644 --- a/tuplex/python/src/PythonDataSet.cc +++ b/tuplex/python/src/PythonDataSet.cc @@ -1058,14 +1058,8 @@ namespace tuplex { size_t numRows = *((const int64_t *) ptr); ptr += sizeof(int64_t); - logger.info("found partition with " + std::to_string(numRows) + " rows "); - int64_t *dataptr = (int64_t *) ptr; for (unsigned i = 0; i < numRows && pos < maxRowCount; ++i) { - -#ifndef NDEBUG - logger.info("value of row "+ std::to_string(pos) + " is: " + std::to_string(*dataptr)); -#endif if (*dataptr > 0) { Py_INCREF(Py_True); // list needs a ref, so inc ref count PyList_SET_ITEM(listObj, pos++, Py_True); diff --git a/tuplex/python/tests/test_math.py b/tuplex/python/tests/test_math.py index f60750d17..81f2f1127 100644 --- a/tuplex/python/tests/test_math.py +++ b/tuplex/python/tests/test_math.py @@ -299,7 +299,6 @@ def testExpm1(self): assert L_bool[1] == math.expm1(False) - def testPow(self): c = tuplex.Context(self.conf) @@ -357,4 +356,174 @@ def testPow(self): assert L_bool[0] == math.pow(True, False) assert L_bool[1] == math.pow(True, True) assert L_bool[2] == math.pow(False, True) - assert L_bool[3] == math.pow(False, False) \ No newline at end of file + assert L_bool[3] == math.pow(False, False) + + + def testIsNan(self): + c = tuplex.Context(self.conf) + + # floats + test = [math.nan, -math.nan, math.inf * 0, math.inf, math.pi, 0.0, 5.0, -128.0] + L = c.parallelize(test).map(lambda x: math.isnan(x)).collect() + assert len(L) == 8, 'wrong length' + self.assertEqual(L[0], True) + self.assertEqual(L[1], True) + self.assertEqual(L[2], True) + self.assertEqual(L[3], False) + self.assertEqual(L[4], False) + self.assertEqual(L[5], False) + self.assertEqual(L[6], False) + self.assertEqual(L[7], False) + + # integers + test1 = [0, -1, 5, math.nan, math.inf * math.inf, 97] + L1 = c.parallelize(test1).map(lambda x: math.isnan(x)).collect() + assert len(L1) == 6, 'wrong length' + self.assertEqual(L1[0], False) + self.assertEqual(L1[1], False) + self.assertEqual(L1[2], False) + self.assertEqual(L1[3], True) + self.assertEqual(L1[4], False) + self.assertEqual(L1[5], False) + + # booleans + test2 = [math.nan, True, False, False, math.nan] + L2 = c.parallelize(test2).map(lambda x: math.isnan(x)).collect() + assert len(L2) == 5, 'wrong length' + self.assertEqual(L2[0], True) + self.assertEqual(L2[1], False) + self.assertEqual(L2[2], False) + self.assertEqual(L2[3], False) + self.assertEqual(L2[4], True) + + # mix + test3 = [True, 128, -50.0, 0, math.inf, math.nan, False, 7] + L3 = c.parallelize(test3).map(lambda x: math.isnan(x)).collect() + assert len(L3) == 8, 'wrong length' + self.assertEqual(L3[0], False) + self.assertEqual(L3[1], False) + self.assertEqual(L3[2], False) + self.assertEqual(L3[3], False) + self.assertEqual(L3[4], False) + self.assertEqual(L3[5], True) + self.assertEqual(L3[6], False) + self.assertEqual(L3[7], False) + + + def testIsInf(self): + c = tuplex.Context(self.conf) + + # floats + test = [math.inf, -math.inf, math.inf + math.inf, math.inf * math.inf, math.nan, math.pi, 0.0, 5.0, -128.0] + L = c.parallelize(test).map(lambda x: math.isinf(x)).collect() + assert len(L) == 9, 'wrong length' + self.assertEqual(L[0], True) + self.assertEqual(L[1], True) + self.assertEqual(L[2], True) + self.assertEqual(L[3], True) + self.assertEqual(L[4], False) + self.assertEqual(L[5], False) + self.assertEqual(L[6], False) + self.assertEqual(L[7], False) + self.assertEqual(L[8], False) + + # integers + test1 = [0, -1, 5, math.inf * 0, math.inf, 97] + L1 = c.parallelize(test1).map(lambda x: math.isinf(x)).collect() + assert len(L1) == 6, 'wrong length' + self.assertEqual(L1[0], False) + self.assertEqual(L1[1], False) + self.assertEqual(L1[2], False) + self.assertEqual(L1[3], False) + self.assertEqual(L1[4], True) + self.assertEqual(L1[5], False) + + # booleans + test2 = [math.inf, True, False, False, -math.inf] + L2 = c.parallelize(test2).map(lambda x: math.isinf(x)).collect() + assert len(L2) == 5, 'wrong length' + self.assertEqual(L2[0], True) + self.assertEqual(L2[1], False) + self.assertEqual(L2[2], False) + self.assertEqual(L2[3], False) + self.assertEqual(L2[4], True) + + # mix + test3 = [True, 128, -50.0, 0, -math.inf, math.nan, False, 7] + L3 = c.parallelize(test3).map(lambda x: math.isinf(x)).collect() + assert len(L3) == 8, 'wrong length' + self.assertEqual(L3[0], False) + self.assertEqual(L3[1], False) + self.assertEqual(L3[2], False) + self.assertEqual(L3[3], False) + self.assertEqual(L3[4], True) + self.assertEqual(L3[5], False) + self.assertEqual(L3[6], False) + self.assertEqual(L3[7], False) + + + def testIsClose(self): + c = tuplex.Context(self.conf) + + test = [(-0.5, 0.0), (0.5, 0.50001), (0.5, 0.500000005), (-0.5, -0.5000000001, 0.0), (0.5, 0.50000000005)] + L = c.parallelize(test).map(lambda x, y: math.isclose(x, y)).collect() + assert len(L) == 5, 'wrong length' + self.assertAlmostEqual(L[0], False) + self.assertAlmostEqual(L[1], False) + self.assertAlmostEqual(L[2], False) + self.assertAlmostEqual(L[3], True) + self.assertAlmostEqual(L[3], True) + + test1 = [(0, 0), (0, -1), (5, 128), (-1, -1)] + L1 = c.parallelize(test1).map(lambda x, y: math.isclose(x, y)).collect() + assert len(L1) == 4, 'wrong length' + self.assertAlmostEqual(L1[0], True) + self.assertAlmostEqual(L1[1], False) + self.assertAlmostEqual(L1[2], False) + self.assertAlmostEqual(L1[3], True) + + test2 = [(True, True), (False, True), (True, False), (False, False)] + L2 = c.parallelize(test2).map(lambda x, y: math.isclose(x, y)).collect() + assert len(L2) == 4, 'wrong length' + self.assertAlmostEqual(L2[0], True) + self.assertAlmostEqual(L2[1], False) + self.assertAlmostEqual(L2[2], False) + self.assertAlmostEqual(L2[3], True) + + test3 = [(0.5, 1), (2.0000000009, 2), (1.999999, 2)] + L3 = c.parallelize(test3).map(lambda x, y: math.isclose(x, y)).collect() + assert len(L3) == 3, 'wrong length' + self.assertAlmostEqual(L3[0], False) + self.assertAlmostEqual(L3[1], True) + self.assertAlmostEqual(L3[2], False) + + test4 = [(1, True), (1, False), (0, False), (0, True)] + L4 = c.parallelize(test4).map(lambda x, y: math.isclose(x, y)).collect() + assert len(L4) == 4, 'wrong length' + self.assertAlmostEqual(L4[0], True) + self.assertAlmostEqual(L4[1], False) + self.assertAlmostEqual(L4[2], True) + self.assertAlmostEqual(L4[3], False) + + test5 = [(1.0000000009, True), (0.0000000001, False)] + L5 = c.parallelize(test5).map(lambda x, y: math.isclose(x, y)).collect() + assert len(L5) == 2, 'wrong length' + self.assertAlmostEqual(L5[0], True) + self.assertAlmostEqual(L5[1], False) + + test6 = [(math.inf, math.inf), + (math.inf, -math.inf), + (-math.inf, -math.inf), + (math.inf, 5), + (math.nan, math.nan), + (math.pi, math.pi), + (math.pi, 3.14159265)] + L6 = c.parallelize(test6).map(lambda x, y: math.isclose(x, y)).collect() + assert len(L6) == 7, 'wrong length' + self.assertAlmostEqual(L6[0], True) + self.assertAlmostEqual(L6[1], False) + self.assertAlmostEqual(L6[2], True) + self.assertAlmostEqual(L6[3], False) + self.assertAlmostEqual(L6[4], False) + self.assertAlmostEqual(L6[5], True) + self.assertAlmostEqual(L6[6], False) \ No newline at end of file diff --git a/tuplex/test/core/MathFunctionsTest.cc b/tuplex/test/core/MathFunctionsTest.cc index d0816d322..ad7bbb97f 100644 --- a/tuplex/test/core/MathFunctionsTest.cc +++ b/tuplex/test/core/MathFunctionsTest.cc @@ -628,7 +628,6 @@ TEST_F(MathFunctionsTest, MathAsin) { } - TEST_F(MathFunctionsTest, MathPow) { using namespace std; using namespace tuplex; @@ -696,4 +695,234 @@ TEST_F(MathFunctionsTest, MathPow) { python::lockGIL(); python::closeInterpreter(); -} \ No newline at end of file +} + + +TEST_F(MathFunctionsTest, MathIsNan) { + using namespace std; + using namespace tuplex; + + python::initInterpreter(); + python::unlockGIL(); + + Context c(microTestOptions()); + ClosureEnvironment ce; + ce.importModuleAs("math", "math"); + + auto v1 = c.parallelize({ + Row(0.0), Row(D_NAN), Row(INFINITY), Row(-INFINITY) + }).map(UDF("lambda x: math.isnan(x)", "", ce)).collectAsVector(); + + EXPECT_EQ(v1.size(), 4); + EXPECT_EQ(v1[0].getBoolean(0), false); + EXPECT_EQ(v1[1].getBoolean(0), true); + EXPECT_EQ(v1[2].getBoolean(0), false); + EXPECT_EQ(v1[3].getBoolean(0), false); + + auto v2 = c.parallelize({ + Row(0), Row(-1), Row(5), Row(-97) + }).map(UDF("lambda x: math.isnan(x)", "", ce)).collectAsVector(); + EXPECT_EQ(v2.size(), 4); + EXPECT_EQ(v2[0].getBoolean(0), false); + EXPECT_EQ(v2[1].getBoolean(0), false); + EXPECT_EQ(v2[2].getBoolean(0), false); + EXPECT_EQ(v2[3].getBoolean(0), false); + + auto v3 = c.parallelize({ + Row(true), Row(false), Row(true) + }).map(UDF("lambda x: math.isnan(x)", "", ce)).collectAsVector(); + EXPECT_EQ(v3.size(), 3); + EXPECT_EQ(v3[0].getBoolean(0), false); + EXPECT_EQ(v3[1].getBoolean(0), false); + EXPECT_EQ(v3[2].getBoolean(0), false); + + auto v4 = c.parallelize({ + Row(-0.89), Row(10.23), Row(-97.484), Row(-D_NAN) + }).map(UDF("lambda x: math.isnan(x)", "", ce)).collectAsVector(); + EXPECT_EQ(v4.size(), 4); + EXPECT_EQ(v4[0].getBoolean(0), false); + EXPECT_EQ(v4[1].getBoolean(0), false); + EXPECT_EQ(v4[2].getBoolean(0), false); + EXPECT_EQ(v4[3].getBoolean(0), true); + + python::lockGIL(); + python::closeInterpreter(); +} + + +TEST_F(MathFunctionsTest, MathIsInf) { + using namespace std; + using namespace tuplex; + + python::initInterpreter(); + python::unlockGIL(); + + Context c(microTestOptions()); + ClosureEnvironment ce; + ce.importModuleAs("math", "math"); + + auto v1 = c.parallelize({ + Row(M_PI), Row(D_NAN), Row(INFINITY), Row(-INFINITY) + }).map(UDF("lambda x: math.isinf(x)", "", ce)).collectAsVector(); + EXPECT_EQ(v1.size(), 4); + EXPECT_EQ(v1[0].getBoolean(0), false); + EXPECT_EQ(v1[1].getBoolean(0), false); + EXPECT_EQ(v1[2].getBoolean(0), true); + EXPECT_EQ(v1[3].getBoolean(0), true); + + auto v2 = c.parallelize({ + Row(0), Row(-1), Row(5), Row(-97) + }).map(UDF("lambda x: math.isinf(x)", "", ce)).collectAsVector(); + EXPECT_EQ(v2.size(), 4); + EXPECT_EQ(v2[0].getBoolean(0), false); + EXPECT_EQ(v2[1].getBoolean(0), false); + EXPECT_EQ(v2[2].getBoolean(0), false); + EXPECT_EQ(v2[3].getBoolean(0), false); + + auto v3 = c.parallelize({ + Row(1.5), Row(-0.89), Row(10.23), Row(-97.484), Row(-INFINITY) + }).map(UDF("lambda x: math.isinf(x)", "", ce)).collectAsVector(); + EXPECT_EQ(v3.size(), 5); + EXPECT_EQ(v3[0].getBoolean(0), false); + EXPECT_EQ(v3[1].getBoolean(0), false); + EXPECT_EQ(v3[2].getBoolean(0), false); + EXPECT_EQ(v3[3].getBoolean(0), false); + EXPECT_EQ(v3[4].getBoolean(0), true); + + auto v4 = c.parallelize({ + Row(true), Row(false) + }).map(UDF("lambda x: math.isinf(x)", "", ce)).collectAsVector(); + EXPECT_EQ(v4.size(), 2); + EXPECT_EQ(v4[0].getBoolean(0), false); + EXPECT_EQ(v4[1].getBoolean(0), false); + + python::lockGIL(); + python::closeInterpreter(); +} + + +TEST_F(MathFunctionsTest, MathIsClose) { + using namespace std; + using namespace tuplex; + + python::initInterpreter(); + python::unlockGIL(); + + Context c(microTestOptions()); + ClosureEnvironment ce; + ce.importModuleAs("math", "math"); + + auto v1 = c.parallelize({ + Row(-0.5, 0.0), Row(0.5, 0.50001), Row(0.5, 0.500000005), Row(-0.5, -0.5000000001), Row(0.5, 0.50000000005) + }).map(UDF("lambda x, y: math.isclose(x, y)", "", ce)).collectAsVector(); + + EXPECT_EQ(v1.size(), 5); + EXPECT_EQ(v1[0].getBoolean(0), false); + EXPECT_EQ(v1[1].getBoolean(0), false); + EXPECT_EQ(v1[2].getBoolean(0), false); + EXPECT_EQ(v1[3].getBoolean(0), true); + EXPECT_EQ(v1[4].getBoolean(0), true); + + auto v2 = c.parallelize({ + Row(0.5, 0.0, 1e-09, 1e-09), Row(0.5, 0.500000005, 5e-09, 0.5) + }).map(UDF("lambda x, y, r, a: math.isclose(x, y, r, a)", "", ce)).collectAsVector(); + + EXPECT_EQ(v2.size(), 2); + EXPECT_EQ(v2[0].getBoolean(0), false); + EXPECT_EQ(v2[1].getBoolean(0), true); + + auto v2_1 = c.parallelize({ + Row(0.5, 0.50001, 5e-09) + }).map(UDF("lambda x, y, r: math.isclose(x, y, r)", "", ce)).collectAsVector(); + + EXPECT_EQ(v2_1.size(), 1); + EXPECT_EQ(v2_1[0].getBoolean(0), false); + + auto v3 = c.parallelize({ + Row(0, 0), Row(0, -1), Row(5, 128) + }).map(UDF("lambda x, y: math.isclose(x, y)", "", ce)).collectAsVector(); + + EXPECT_EQ(v3.size(), 3); + EXPECT_EQ(v3[0].getBoolean(0), true); + EXPECT_EQ(v3[1].getBoolean(0), false); + EXPECT_EQ(v3[2].getBoolean(0), false); + + auto v4 = c.parallelize({ + Row(0, 0, 1e-09, 1e-09), Row(5, 10, 0.5, 15) + }).map(UDF("lambda x, y, r, a: math.isclose(x, y, r, a)", "", ce)).collectAsVector(); + + EXPECT_EQ(v4.size(), 2); + EXPECT_EQ(v4[0].getBoolean(0), true); + EXPECT_EQ(v4[1].getBoolean(0), true); + + auto v5 = c.parallelize({ + Row(0, 1, 5e-09) + }).map(UDF("lambda x, y, r: math.isclose(x, y, r)", "", ce)).collectAsVector(); + + EXPECT_EQ(v5.size(), 1); + EXPECT_EQ(v5[0].getBoolean(0), false); + + auto v6 = c.parallelize({ + Row(true, true, false, false), Row(true, false, true, 1) + }).map(UDF("lambda x, y, r, a: math.isclose(x, y, r, a)", "", ce)).collectAsVector(); + + EXPECT_EQ(v6.size(), 2); + EXPECT_EQ(v6[0].getBoolean(0), true); + EXPECT_EQ(v6[1].getBoolean(0), true); + + auto v7 = c.parallelize({ + Row(false, true, 5e-09), Row(false, true, 1.0), Row(false, true, 0.999999) + }).map(UDF("lambda x, y, r: math.isclose(x, y, r)", "", ce)).collectAsVector(); + + EXPECT_EQ(v7.size(), 3); + EXPECT_EQ(v7[0].getBoolean(0), false); + EXPECT_EQ(v7[1].getBoolean(0), true); + EXPECT_EQ(v7[2].getBoolean(0), false); + + auto v8 = c.parallelize({ + Row(0.5, 1, 1e-09, 1e-09), Row(2.0000000009, 2, 5e-09, 0) + }).map(UDF("lambda x, y, r, a: math.isclose(x, y, r, a)", "", ce)).collectAsVector(); + + EXPECT_EQ(v8.size(), 2); + EXPECT_EQ(v8[0].getBoolean(0), false); + EXPECT_EQ(v8[1].getBoolean(0), true); + + auto v9 = c.parallelize({ + Row(1, true, 5e-09), Row(1, false, 1e-09) + }).map(UDF("lambda x, y, r: math.isclose(x, y, r)", "", ce)).collectAsVector(); + + EXPECT_EQ(v9.size(), 2); + EXPECT_EQ(v9[0].getBoolean(0), true); + EXPECT_EQ(v9[1].getBoolean(0), false); + + auto v10 = c.parallelize({ + Row(1.0000000009, true), Row(0.0000000001, false) + }).map(UDF("lambda x, y: math.isclose(x, y)", "", ce)).collectAsVector(); + + EXPECT_EQ(v10.size(), 2); + EXPECT_EQ(v10[0].getBoolean(0), true); + EXPECT_EQ(v10[1].getBoolean(0), false); + + auto v11 = c.parallelize({ + Row(INFINITY, INFINITY), + Row(INFINITY, -INFINITY), + Row(-INFINITY, -INFINITY), + Row(INFINITY, 5), + Row(D_NAN, D_NAN), + Row(M_PI, M_PI), + Row(M_PI, 3.14159265) + }).map(UDF("lambda x, y: math.isclose(x, y)", "", ce)).collectAsVector(); + + EXPECT_EQ(v11.size(), 7); + EXPECT_EQ(v11[0].getBoolean(0), true); + EXPECT_EQ(v11[1].getBoolean(0), false); + EXPECT_EQ(v11[2].getBoolean(0), true); + EXPECT_EQ(v11[3].getBoolean(0), false); + EXPECT_EQ(v11[4].getBoolean(0), false); + EXPECT_EQ(v11[5].getBoolean(0), true); + EXPECT_EQ(v11[6].getBoolean(0), false); + + python::lockGIL(); + python::closeInterpreter(); +} + diff --git a/tuplex/test/wrappers/WrapperTest.cc b/tuplex/test/wrappers/WrapperTest.cc index bc6906a37..d3b602ca8 100644 --- a/tuplex/test/wrappers/WrapperTest.cc +++ b/tuplex/test/wrappers/WrapperTest.cc @@ -51,6 +51,37 @@ TEST_F(WrapperTest, LambdaBackend) { // Important detail: RAII of boost python requires call to all boost::python destructors before closing the interpreter. +/** below is a template for a wrapper test function **/ +// TEST_F(WrapperTest, WrapperTestTemplate) { +// using namespace tuplex; + +// // create Python context (pass options as JSON) +// PythonContext c("c", "", microTestOptions().asJSON()); + +// // list object contains all rows in test (in this test, only one row) +// PyObject *listObj = PyList_New(1); + +// // initialize first row +// PyObject *tupleObj1 = PyTuple_New(2); +// PyTuple_SET_ITEM(tupleObj1, 0, python::PyString_FromString("a")); +// PyTuple_SET_ITEM(tupleObj1, 1, python::PyString_FromString("a")); + +// PyList_SetItem(listObj, 0, tupleObj1); + +// { // need to keep curly braces (for weird memory errors) +// auto list = py::reinterpret_borrow(listObj); +// // add parallelize-map-collect +// auto res = c.parallelize(list).map("lambda x: x", "").collect(); +// auto resObj = res.ptr(); + +// ASSERT_TRUE(PyList_Check(resObj)); +// // check size of resulting list +// ASSERT_EQ(PyList_GET_SIZE(resObj), 4); + +// PyObject_Print(resObj, stdout, 0); +// } +// } + TEST_F(WrapperTest, BasicMergeInOrder) { using namespace tuplex; @@ -87,6 +118,45 @@ TEST_F(WrapperTest, BasicMergeInOrder) { } } +TEST_F(WrapperTest, MathIsInf) { + using namespace tuplex; + + // create Python context (pass options as JSON) + PythonContext c("c", "", microTestOptions().asJSON()); + + // list object contains all rows in test (in this test, only one row) + PyObject *listObj = PyList_New(1); + + // initialize listObj + // note that using runAndGet on each individual value, and then setting + // them as an element of the list is buggy (doesn't always return the right value) + listObj = python::runAndGet( + "import math; x = [0, -1, 5, math.inf * 0, math.inf, 97]", + "x"); + + Py_XINCREF(listObj); + PyObject_Print(listObj, stdout, 0); + std::cout << std::endl; + + { + auto list = py::reinterpret_borrow(listObj); + + auto ba_closure = PyDict_New(); + auto math_mod = PyImport_ImportModule("math"); + assert(math_mod); + PyDict_SetItemString(ba_closure, "math", math_mod); + + // write parallelize function + auto res = c.parallelize(list).map("lambda x: math.isinf(x)", "", py::reinterpret_steal(ba_closure)).collect(); + auto resObj = res.ptr(); + + ASSERT_TRUE(PyList_Check(resObj)); + + PyObject_Print(resObj, stdout, 0); + std::cout << std::endl; + } +} + TEST_F(WrapperTest, StringTuple) { using namespace tuplex; diff --git a/tuplex/utils/include/Utils.h b/tuplex/utils/include/Utils.h index 2a5b30e40..e7c105eeb 100644 --- a/tuplex/utils/include/Utils.h +++ b/tuplex/utils/include/Utils.h @@ -73,6 +73,10 @@ constexpr const char* base_file_name(const char* path) { return file; } +// generates a QNAN +// note: not used for direct comparison in isnan, as there are other representations of NAN (e.g. SNAN) +constexpr double D_NAN = nan(""); + // macros to print out filename + line #define FLINESTR (std::string(base_file_name(__FILE__)) + "+" + std::to_string(__LINE__))