这是indexloc提供的服务,不要输入任何密码
Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 31 additions & 1 deletion tuplex/codegen/src/BlockGeneratorVisitor.cc
Original file line number Diff line number Diff line change
Expand Up @@ -961,12 +961,42 @@ namespace tuplex {
assert(R);

if(tt == TokenType::IS || tt == TokenType::ISNOT) {

// special case: left/right is not boolean
// --> Python allows that, it's bad coding style though.
// compile and hint.
if(leftType != python::Type::BOOLEAN || rightType != python::Type::BOOLEAN) {
_logger.warn("SyntaxWarning: UDF contains is comparison between " + leftType.desc() + " and "
+ rightType.desc() + ". Better avoid, is should be only used to test against booleans or None.");

// though upcast may be defined, for is this will be ignored.

// only for integers there's actual code. Else, assume always false due to memory
// address issue
if(leftType == rightType && leftType == python::Type::I64) {
_logger.warn("SyntaxWarning: Emitting code for integer is comparison, i.e. for integers in range [-5, 256] is behaves like ==");

// result is: L == R && -5 <= L <= 256
assert(L && R);
auto equal = builder.CreateICmpEQ(L, R);
auto upperBound = builder.CreateICmpSLE(L, _env->i64Const(256));
auto lowerBound = builder.CreateICmpSGE(L, _env->i64Const(-5));
// could short-circuit here, but & does fine as well...
auto resValue = builder.CreateAnd(equal, builder.CreateAnd(upperBound, lowerBound));
return _env->upcastToBoolean(builder, resValue);
} else {
return _env->boolConst(false);
}
}

// rest of the code is for the boolean case
assert(leftType == python::Type::BOOLEAN || rightType == python::Type::BOOLEAN);

// one of the types must be boolean, otherwise compareInst with _isnull would've taken care.
if((leftType == python::Type::BOOLEAN) != (rightType == python::Type::BOOLEAN)) {
// one of the types is boolean, other isn't. comparison results in false.
return _env->boolConst(tt == TokenType::ISNOT);
}
}

// both must be boolean.
auto cmpPredicate = (tt == TokenType::ISNOT) ? llvm::CmpInst::Predicate::ICMP_NE : llvm::CmpInst::Predicate::ICMP_EQ;
Expand Down
24 changes: 24 additions & 0 deletions tuplex/test/core/DataFrameOperations.cc
Original file line number Diff line number Diff line change
Expand Up @@ -407,4 +407,28 @@ TEST_F(DataFrameTest, RenameColumns) {
// check now fuzzy matching
auto& err_ds = ds3.renameColumn("secund", "+1");
EXPECT_TRUE(err_ds.isError());
}

TEST_F(DataFrameTest, IsKeywordAndFilter) {
// Following causes a bug (https://github.com/tuplex/tuplex/issues/54), this test is to fix it.
// c = Context()
// c.parallelize([1, 2, 3]).filter(lambda x: x is 2).collect()

using namespace tuplex;
Context c(microTestOptions());

// rename test, position based:
auto& ds = c.parallelize({Row(1), Row(2), Row(3)}).filter(UDF("lambda x: x is 2"));
ASSERT_FALSE(ds.isError());

// for integers -5 <= x <= 256 python is weird, is acts like equality!
auto v = ds.collectAsVector();
ASSERT_EQ(v.size(), 1);
EXPECT_EQ(v.front().getInt(0), 2);

// also check here floats to be sure the filter doesn't screw things up.
ds = c.parallelize({Row(1.0), Row(2.0), Row(3.0)}).filter(UDF("lambda x: x is 2.0"));
ASSERT_FALSE(ds.isError());
v = ds.collectAsVector();
ASSERT_EQ(v.size(), 0);
}