From 7c0984e4724d613475afc26136fe3ad43ddbc271 Mon Sep 17 00:00:00 2001 From: Rhea Goyal Date: Thu, 2 Jun 2022 12:57:41 -0400 Subject: [PATCH 01/23] added new tests (for already existing functions) --- tuplex/python/tests/test_math.py | 100 +++++++++++++++++++++++++- tuplex/test/core/MathFunctionsTest.cc | 89 +++++++++++++++++++++-- 2 files changed, 183 insertions(+), 6 deletions(-) diff --git a/tuplex/python/tests/test_math.py b/tuplex/python/tests/test_math.py index f60750d17..8763391fc 100644 --- a/tuplex/python/tests/test_math.py +++ b/tuplex/python/tests/test_math.py @@ -299,7 +299,6 @@ def testExpm1(self): assert L_bool[1] == math.expm1(False) - def testPow(self): c = tuplex.Context(self.conf) @@ -357,4 +356,101 @@ def testPow(self): assert L_bool[0] == math.pow(True, False) assert L_bool[1] == math.pow(True, True) assert L_bool[2] == math.pow(False, True) - assert L_bool[3] == math.pow(False, False) \ No newline at end of file + assert L_bool[3] == math.pow(False, False) + + + def testTEST(self): + c = tuplex.Context(self.conf) + + pow_test = [(25, 0.5), (3, -2), (-4.0, 3.0), (-5, -4)] + c.parallelize(pow_test).map(lambda x, y: math.pow(x, y)).collect() + assert len(pow_test) == 4, 'wrong length' + self.assertAlmostEqual(pow_test[0], 5.0) + self.assertAlmostEqual(pow_test[1], -1.0 / 9.0) + self.assertAlmostEqual(pow_test[2], -64.0) + self.assertAlmostEqual(pow_test[3], 1.0 / 625.0) + + sqrt_test = [0, 1.0, 4.0, 16] + c.parallelize(sqrt_test).map(lambda x: math.sqrt(x)).collect() + assert len(sqrt_test) == 4, 'wrong length' + self.assertAlmostEqual(sqrt_test[0], 0.0) + self.assertAlmostEqual(sqrt_test[1], 1.0) + self.assertAlmostEqual(sqrt_test[2], 2.0) + self.assertAlmostEqual(sqrt_test[3], 4.0) + + + # def testIsInf(self): + # c = tuplex.Context(self.conf) + + # float_test = [0.0, 1.0, -1.0, -math.inf, 3.0, math.inf] + # L0 = c.parallelize(float_test).map(lambda x: math.isinf(x)).collect() + # assert len(L0) == 6, 'wrong length' + # self.assertEqual(L0[0], False) + # self.assertEqual(L0[1], False) + # self.assertEqual(L0[2], False) + # self.assertEqual(L0[3], True) + # self.assertEqual(L0[4], False) + # self.assertEqual(L0[5], True) + + # tuple_test = [(1.0, math.inf), (-math.inf, 0.0), (-math.inf, math.inf), (-2.0, 0.0)] + # L1 = c.parallelize(tuple_test).map(lambda x, y: (math.isinf(x), math.isinf(y))).collect() + # assert len(L1) == 4, 'wrong length' + # self.assertEqual(L1[0], (False, True)) + # self.assertEqual(L1[1], (True, False)) + # self.assertEqual(L1[2], (True, True)) + # self.assertEqual(L1[3], (False, False)) + + # int_test = [1, -math.inf, -1, 0, math.inf] + # L2 = c.parallelize(int_test).map(lambda x: math.isinf(x)).collect() + # assert len(L2) == 5, 'wrong length' + # self.assertEqual(L2[0], False) + # self.assertEqual(L2[1], True) + # self.assertEqual(L2[2], False) + # self.assertEqual(L2[3], False) + # self.assertEqual(L2[4], True) + + # mix_test = [-1, math.inf, 1.5, math.nan, -math.inf, 0.0] + # L3 = c.parallelize(mix_test).map(lambda x, y: math.pow(x, y)).collect() + # assert len(L3) == 6 + # self.assertEqual(L3[0], False) + # self.assertEqual(L3[1], True) + # self.assertEqual(L3[2], False) + # self.assertEqual(L3[3], False) + # self.assertEqual(L3[4], True) + # self.assertEqual(L3[5], False) + + + # def testIsNan(self): + # c = tuplex.Context(self.conf) + + # test0 = [0.0, math.nan, -3.5, -math.inf] + # L0 = c.parallelize(test0).map(lambda x: math.isnan(x)).collect() + # assert len(L0) == 4, 'wrong length' + # self.assertEqual(L0[0], False) + # self.assertEqual(L0[1], True) + # self.assertEqual(L0[2], False) + # self.assertEqual(L0[3], False) + + # test1 = [0, -1, math.nan, math.inf, 97] + # L1 = c.parallelize(test1).map(lambda x: math.isnan(x)).collect() + # assert len(L1) == 5, 'wrong length' + # self.assertEqual(L1[0], False) + # self.assertEqual(L1[1], False) + # self.assertEqual(L1[2], True) + # self.assertEqual(L1[3], False) + # self.assertEqual(L1[4], False) + + # test2 = [math.nan, 0, -math.inf, -1.5, math.nan, 97] + # L2 = c.parallelize(test2).map(lambda x: math.isnan(x)).collect() + # assert len(L2) == 6, 'wrong length' + # self.assertEqual(L2[0], True) + # self.assertEqual(L2[1], False) + # self.assertEqual(L2[2], False) + # self.assertEqual(L2[3], False) + # self.assertEqual(L2[4], True) + # self.assertEqual(L2[4], False) + + + # def testIsClose(self): + # c = tuplex.Context(self.conf) + diff --git a/tuplex/test/core/MathFunctionsTest.cc b/tuplex/test/core/MathFunctionsTest.cc index ed81a0eeb..5be4b6ef4 100644 --- a/tuplex/test/core/MathFunctionsTest.cc +++ b/tuplex/test/core/MathFunctionsTest.cc @@ -627,8 +627,6 @@ TEST_F(MathFunctionsTest, MathAsin) { python::closeInterpreter(); } - - TEST_F(MathFunctionsTest, MathPow) { using namespace std; using namespace tuplex; @@ -677,7 +675,7 @@ TEST_F(MathFunctionsTest, MathPow) { Row(2), Row(1), Row(-1), Row(-2), Row(0) }).map(UDF("lambda y: math.pow(y, 5)", "", ce)).collectAsVector(); - EXPECT_EQ(v2.size(), 5); + EXPECT_EQ(v4.size(), 5); EXPECT_DOUBLE_EQ(v4[0].getDouble(0), 32.0); EXPECT_DOUBLE_EQ(v4[1].getDouble(0), 1.0); EXPECT_DOUBLE_EQ(v4[2].getDouble(0), -1.0); @@ -696,4 +694,87 @@ TEST_F(MathFunctionsTest, MathPow) { python::lockGIL(); python::closeInterpreter(); -} \ No newline at end of file +} + +TEST_F(MathFunctionsTest, TEST) { + using namespace std; + using namespace tuplex; + + python::initInterpreter(); + python::unlockGIL(); + + Context c(microTestOptions()); + ClosureEnvironment ce; + ce.importModuleAs("math", "math"); + + auto v1 = c.parallelize({ + Row(25, 0.5), Row(3, -2), Row(-4.0, 3.0), Row(-5, -4) + }).map(UDF("lambda x, y: math.pow(x, y)", "", ce)).collectAsVector(); + EXPECT_EQ(v1.size(), 4); + EXPECT_DOUBLE_EQ(v1[0].getDouble(0), 5.0); + EXPECT_DOUBLE_EQ(v1[1].getDouble(0), pow(3.0, -2.0)); + EXPECT_DOUBLE_EQ(v1[2].getDouble(0), -64.0); + EXPECT_DOUBLE_EQ(v1[3].getDouble(0), pow(-5.0, -4.0)); + + auto v2 = c.parallelize({ + Row(0), Row(1.0), Row(4.0), Row(16) + }).map(UDF("lambda x: math.sqrt(x)", "", ce)).collectAsVector(); + EXPECT_EQ(v2.size(), 4); + EXPECT_DOUBLE_EQ(v2[0].getDouble(0), 0.0); + EXPECT_DOUBLE_EQ(v2[1].getDouble(0), 1.0); + EXPECT_DOUBLE_EQ(v2[2].getDouble(0), 2.0); + EXPECT_DOUBLE_EQ(v2[3].getDouble(0), 4.0); + + python::lockGIL(); + python::closeInterpreter(); +} + +// TEST_F(MathFunctionsTest, MathIsInf) { +// using namespace std; +// using namespace tuplex; + +// python::initInterpreter(); +// python::unlockGIL(); + +// Context c(microTestOptions()); +// ClosureEnvironment ce; +// ce.importModuleAs("math", "math"); + +// auto v1 = c.parallelize({ +// Row(0.0), Row(1.0), Row(-1.0), Row(-INFINITY), Row(3.0), Row(INFINITY) +// }).map(UDF("lambda x: math.isinf(x)", "", ce)).collectAsVector(); + +// EXPECT_EQ(v1.size(), 6); +// EXPECT_EQ(v1[0].getBool(0), false); +// EXPECT_EQ(v1[1].getBool(0), false); +// EXPECT_EQ(v1[2].getBool(0), false); +// EXPECT_EQ(v1[3].getBool(0), true); +// EXPECT_EQ(v1[4].getBool(0), false); +// EXPECT_EQ(v1[5].getBool(0), true); + +// auto v2 = c.parallelize({ +// Row(1), Row(-INFINITY), Row(-1), Row(0), Row(INFINITY) +// }).map(UDF("lambda x: math.isinf(x)", "", ce)).collectAsVector(); + +// EXPECT_EQ(v2.size(), 5); +// EXPECT_EQ(v2[0].getBool(0), false); +// EXPECT_EQ(v2[1].getBool(0), true); +// EXPECT_EQ(v2[2].getBool(0), false); +// EXPECT_EQ(v2[3].getBool(0), false); +// EXPECT_EQ(v2[4].getBool(0), true); + +// auto v3 = c.parallelize({ +// Row(-1), Row(INFINITY), Row(1.5), Row(NAN), Row(-INFINITY), Row(0.0) +// }).map(UDF("lambda x: math.isinf(x)", "", ce)).collectAsVector(); + +// EXPECT_EQ(v3.size(), 6); +// EXPECT_DOUBLE_EQ(v3[0].getBool(0), false); +// EXPECT_DOUBLE_EQ(v3[1].getBool(0), true); +// EXPECT_DOUBLE_EQ(v3[2].getBool(0), false); +// EXPECT_DOUBLE_EQ(v3[3].getBool(0), false); +// EXPECT_DOUBLE_EQ(v3[4].getBool(0), true); +// EXPECT_DOUBLE_EQ(v3[5].getBool(0), false); + +// python::lockGIL(); +// python::closeInterpreter(); +// } From 985b4ddd96bdc3349c82dc67d88cf4471c917f3a Mon Sep 17 00:00:00 2001 From: Rhea Goyal Date: Thu, 2 Jun 2022 14:27:31 -0400 Subject: [PATCH 02/23] whoops --- tuplex/python/tests/test_math.py | 96 --------------------------- tuplex/test/core/MathFunctionsTest.cc | 83 ----------------------- 2 files changed, 179 deletions(-) diff --git a/tuplex/python/tests/test_math.py b/tuplex/python/tests/test_math.py index 8763391fc..eb1aa6fcb 100644 --- a/tuplex/python/tests/test_math.py +++ b/tuplex/python/tests/test_math.py @@ -358,99 +358,3 @@ def testPow(self): assert L_bool[2] == math.pow(False, True) assert L_bool[3] == math.pow(False, False) - - def testTEST(self): - c = tuplex.Context(self.conf) - - pow_test = [(25, 0.5), (3, -2), (-4.0, 3.0), (-5, -4)] - c.parallelize(pow_test).map(lambda x, y: math.pow(x, y)).collect() - assert len(pow_test) == 4, 'wrong length' - self.assertAlmostEqual(pow_test[0], 5.0) - self.assertAlmostEqual(pow_test[1], -1.0 / 9.0) - self.assertAlmostEqual(pow_test[2], -64.0) - self.assertAlmostEqual(pow_test[3], 1.0 / 625.0) - - sqrt_test = [0, 1.0, 4.0, 16] - c.parallelize(sqrt_test).map(lambda x: math.sqrt(x)).collect() - assert len(sqrt_test) == 4, 'wrong length' - self.assertAlmostEqual(sqrt_test[0], 0.0) - self.assertAlmostEqual(sqrt_test[1], 1.0) - self.assertAlmostEqual(sqrt_test[2], 2.0) - self.assertAlmostEqual(sqrt_test[3], 4.0) - - - # def testIsInf(self): - # c = tuplex.Context(self.conf) - - # float_test = [0.0, 1.0, -1.0, -math.inf, 3.0, math.inf] - # L0 = c.parallelize(float_test).map(lambda x: math.isinf(x)).collect() - # assert len(L0) == 6, 'wrong length' - # self.assertEqual(L0[0], False) - # self.assertEqual(L0[1], False) - # self.assertEqual(L0[2], False) - # self.assertEqual(L0[3], True) - # self.assertEqual(L0[4], False) - # self.assertEqual(L0[5], True) - - # tuple_test = [(1.0, math.inf), (-math.inf, 0.0), (-math.inf, math.inf), (-2.0, 0.0)] - # L1 = c.parallelize(tuple_test).map(lambda x, y: (math.isinf(x), math.isinf(y))).collect() - # assert len(L1) == 4, 'wrong length' - # self.assertEqual(L1[0], (False, True)) - # self.assertEqual(L1[1], (True, False)) - # self.assertEqual(L1[2], (True, True)) - # self.assertEqual(L1[3], (False, False)) - - # int_test = [1, -math.inf, -1, 0, math.inf] - # L2 = c.parallelize(int_test).map(lambda x: math.isinf(x)).collect() - # assert len(L2) == 5, 'wrong length' - # self.assertEqual(L2[0], False) - # self.assertEqual(L2[1], True) - # self.assertEqual(L2[2], False) - # self.assertEqual(L2[3], False) - # self.assertEqual(L2[4], True) - - # mix_test = [-1, math.inf, 1.5, math.nan, -math.inf, 0.0] - # L3 = c.parallelize(mix_test).map(lambda x, y: math.pow(x, y)).collect() - # assert len(L3) == 6 - # self.assertEqual(L3[0], False) - # self.assertEqual(L3[1], True) - # self.assertEqual(L3[2], False) - # self.assertEqual(L3[3], False) - # self.assertEqual(L3[4], True) - # self.assertEqual(L3[5], False) - - - # def testIsNan(self): - # c = tuplex.Context(self.conf) - - # test0 = [0.0, math.nan, -3.5, -math.inf] - # L0 = c.parallelize(test0).map(lambda x: math.isnan(x)).collect() - # assert len(L0) == 4, 'wrong length' - # self.assertEqual(L0[0], False) - # self.assertEqual(L0[1], True) - # self.assertEqual(L0[2], False) - # self.assertEqual(L0[3], False) - - # test1 = [0, -1, math.nan, math.inf, 97] - # L1 = c.parallelize(test1).map(lambda x: math.isnan(x)).collect() - # assert len(L1) == 5, 'wrong length' - # self.assertEqual(L1[0], False) - # self.assertEqual(L1[1], False) - # self.assertEqual(L1[2], True) - # self.assertEqual(L1[3], False) - # self.assertEqual(L1[4], False) - - # test2 = [math.nan, 0, -math.inf, -1.5, math.nan, 97] - # L2 = c.parallelize(test2).map(lambda x: math.isnan(x)).collect() - # assert len(L2) == 6, 'wrong length' - # self.assertEqual(L2[0], True) - # self.assertEqual(L2[1], False) - # self.assertEqual(L2[2], False) - # self.assertEqual(L2[3], False) - # self.assertEqual(L2[4], True) - # self.assertEqual(L2[4], False) - - - # def testIsClose(self): - # c = tuplex.Context(self.conf) - diff --git a/tuplex/test/core/MathFunctionsTest.cc b/tuplex/test/core/MathFunctionsTest.cc index 5be4b6ef4..80aed9625 100644 --- a/tuplex/test/core/MathFunctionsTest.cc +++ b/tuplex/test/core/MathFunctionsTest.cc @@ -695,86 +695,3 @@ TEST_F(MathFunctionsTest, MathPow) { python::lockGIL(); python::closeInterpreter(); } - -TEST_F(MathFunctionsTest, TEST) { - using namespace std; - using namespace tuplex; - - python::initInterpreter(); - python::unlockGIL(); - - Context c(microTestOptions()); - ClosureEnvironment ce; - ce.importModuleAs("math", "math"); - - auto v1 = c.parallelize({ - Row(25, 0.5), Row(3, -2), Row(-4.0, 3.0), Row(-5, -4) - }).map(UDF("lambda x, y: math.pow(x, y)", "", ce)).collectAsVector(); - EXPECT_EQ(v1.size(), 4); - EXPECT_DOUBLE_EQ(v1[0].getDouble(0), 5.0); - EXPECT_DOUBLE_EQ(v1[1].getDouble(0), pow(3.0, -2.0)); - EXPECT_DOUBLE_EQ(v1[2].getDouble(0), -64.0); - EXPECT_DOUBLE_EQ(v1[3].getDouble(0), pow(-5.0, -4.0)); - - auto v2 = c.parallelize({ - Row(0), Row(1.0), Row(4.0), Row(16) - }).map(UDF("lambda x: math.sqrt(x)", "", ce)).collectAsVector(); - EXPECT_EQ(v2.size(), 4); - EXPECT_DOUBLE_EQ(v2[0].getDouble(0), 0.0); - EXPECT_DOUBLE_EQ(v2[1].getDouble(0), 1.0); - EXPECT_DOUBLE_EQ(v2[2].getDouble(0), 2.0); - EXPECT_DOUBLE_EQ(v2[3].getDouble(0), 4.0); - - python::lockGIL(); - python::closeInterpreter(); -} - -// TEST_F(MathFunctionsTest, MathIsInf) { -// using namespace std; -// using namespace tuplex; - -// python::initInterpreter(); -// python::unlockGIL(); - -// Context c(microTestOptions()); -// ClosureEnvironment ce; -// ce.importModuleAs("math", "math"); - -// auto v1 = c.parallelize({ -// Row(0.0), Row(1.0), Row(-1.0), Row(-INFINITY), Row(3.0), Row(INFINITY) -// }).map(UDF("lambda x: math.isinf(x)", "", ce)).collectAsVector(); - -// EXPECT_EQ(v1.size(), 6); -// EXPECT_EQ(v1[0].getBool(0), false); -// EXPECT_EQ(v1[1].getBool(0), false); -// EXPECT_EQ(v1[2].getBool(0), false); -// EXPECT_EQ(v1[3].getBool(0), true); -// EXPECT_EQ(v1[4].getBool(0), false); -// EXPECT_EQ(v1[5].getBool(0), true); - -// auto v2 = c.parallelize({ -// Row(1), Row(-INFINITY), Row(-1), Row(0), Row(INFINITY) -// }).map(UDF("lambda x: math.isinf(x)", "", ce)).collectAsVector(); - -// EXPECT_EQ(v2.size(), 5); -// EXPECT_EQ(v2[0].getBool(0), false); -// EXPECT_EQ(v2[1].getBool(0), true); -// EXPECT_EQ(v2[2].getBool(0), false); -// EXPECT_EQ(v2[3].getBool(0), false); -// EXPECT_EQ(v2[4].getBool(0), true); - -// auto v3 = c.parallelize({ -// Row(-1), Row(INFINITY), Row(1.5), Row(NAN), Row(-INFINITY), Row(0.0) -// }).map(UDF("lambda x: math.isinf(x)", "", ce)).collectAsVector(); - -// EXPECT_EQ(v3.size(), 6); -// EXPECT_DOUBLE_EQ(v3[0].getBool(0), false); -// EXPECT_DOUBLE_EQ(v3[1].getBool(0), true); -// EXPECT_DOUBLE_EQ(v3[2].getBool(0), false); -// EXPECT_DOUBLE_EQ(v3[3].getBool(0), false); -// EXPECT_DOUBLE_EQ(v3[4].getBool(0), true); -// EXPECT_DOUBLE_EQ(v3[5].getBool(0), false); - -// python::lockGIL(); -// python::closeInterpreter(); -// } From ed3a1b30df3d501ad2b334038bd4f9b0570033f5 Mon Sep 17 00:00:00 2001 From: Rhea Goyal Date: Thu, 30 Jun 2022 15:26:49 -0400 Subject: [PATCH 03/23] ignore commits to master branch Revert "whoops" This reverts commit 985b4ddd96bdc3349c82dc67d88cf4471c917f3a. --- tuplex/python/tests/test_math.py | 96 +++++++++++++++++++++++++++ tuplex/test/core/MathFunctionsTest.cc | 83 +++++++++++++++++++++++ 2 files changed, 179 insertions(+) diff --git a/tuplex/python/tests/test_math.py b/tuplex/python/tests/test_math.py index eb1aa6fcb..8763391fc 100644 --- a/tuplex/python/tests/test_math.py +++ b/tuplex/python/tests/test_math.py @@ -358,3 +358,99 @@ def testPow(self): assert L_bool[2] == math.pow(False, True) assert L_bool[3] == math.pow(False, False) + + def testTEST(self): + c = tuplex.Context(self.conf) + + pow_test = [(25, 0.5), (3, -2), (-4.0, 3.0), (-5, -4)] + c.parallelize(pow_test).map(lambda x, y: math.pow(x, y)).collect() + assert len(pow_test) == 4, 'wrong length' + self.assertAlmostEqual(pow_test[0], 5.0) + self.assertAlmostEqual(pow_test[1], -1.0 / 9.0) + self.assertAlmostEqual(pow_test[2], -64.0) + self.assertAlmostEqual(pow_test[3], 1.0 / 625.0) + + sqrt_test = [0, 1.0, 4.0, 16] + c.parallelize(sqrt_test).map(lambda x: math.sqrt(x)).collect() + assert len(sqrt_test) == 4, 'wrong length' + self.assertAlmostEqual(sqrt_test[0], 0.0) + self.assertAlmostEqual(sqrt_test[1], 1.0) + self.assertAlmostEqual(sqrt_test[2], 2.0) + self.assertAlmostEqual(sqrt_test[3], 4.0) + + + # def testIsInf(self): + # c = tuplex.Context(self.conf) + + # float_test = [0.0, 1.0, -1.0, -math.inf, 3.0, math.inf] + # L0 = c.parallelize(float_test).map(lambda x: math.isinf(x)).collect() + # assert len(L0) == 6, 'wrong length' + # self.assertEqual(L0[0], False) + # self.assertEqual(L0[1], False) + # self.assertEqual(L0[2], False) + # self.assertEqual(L0[3], True) + # self.assertEqual(L0[4], False) + # self.assertEqual(L0[5], True) + + # tuple_test = [(1.0, math.inf), (-math.inf, 0.0), (-math.inf, math.inf), (-2.0, 0.0)] + # L1 = c.parallelize(tuple_test).map(lambda x, y: (math.isinf(x), math.isinf(y))).collect() + # assert len(L1) == 4, 'wrong length' + # self.assertEqual(L1[0], (False, True)) + # self.assertEqual(L1[1], (True, False)) + # self.assertEqual(L1[2], (True, True)) + # self.assertEqual(L1[3], (False, False)) + + # int_test = [1, -math.inf, -1, 0, math.inf] + # L2 = c.parallelize(int_test).map(lambda x: math.isinf(x)).collect() + # assert len(L2) == 5, 'wrong length' + # self.assertEqual(L2[0], False) + # self.assertEqual(L2[1], True) + # self.assertEqual(L2[2], False) + # self.assertEqual(L2[3], False) + # self.assertEqual(L2[4], True) + + # mix_test = [-1, math.inf, 1.5, math.nan, -math.inf, 0.0] + # L3 = c.parallelize(mix_test).map(lambda x, y: math.pow(x, y)).collect() + # assert len(L3) == 6 + # self.assertEqual(L3[0], False) + # self.assertEqual(L3[1], True) + # self.assertEqual(L3[2], False) + # self.assertEqual(L3[3], False) + # self.assertEqual(L3[4], True) + # self.assertEqual(L3[5], False) + + + # def testIsNan(self): + # c = tuplex.Context(self.conf) + + # test0 = [0.0, math.nan, -3.5, -math.inf] + # L0 = c.parallelize(test0).map(lambda x: math.isnan(x)).collect() + # assert len(L0) == 4, 'wrong length' + # self.assertEqual(L0[0], False) + # self.assertEqual(L0[1], True) + # self.assertEqual(L0[2], False) + # self.assertEqual(L0[3], False) + + # test1 = [0, -1, math.nan, math.inf, 97] + # L1 = c.parallelize(test1).map(lambda x: math.isnan(x)).collect() + # assert len(L1) == 5, 'wrong length' + # self.assertEqual(L1[0], False) + # self.assertEqual(L1[1], False) + # self.assertEqual(L1[2], True) + # self.assertEqual(L1[3], False) + # self.assertEqual(L1[4], False) + + # test2 = [math.nan, 0, -math.inf, -1.5, math.nan, 97] + # L2 = c.parallelize(test2).map(lambda x: math.isnan(x)).collect() + # assert len(L2) == 6, 'wrong length' + # self.assertEqual(L2[0], True) + # self.assertEqual(L2[1], False) + # self.assertEqual(L2[2], False) + # self.assertEqual(L2[3], False) + # self.assertEqual(L2[4], True) + # self.assertEqual(L2[4], False) + + + # def testIsClose(self): + # c = tuplex.Context(self.conf) + diff --git a/tuplex/test/core/MathFunctionsTest.cc b/tuplex/test/core/MathFunctionsTest.cc index 80aed9625..5be4b6ef4 100644 --- a/tuplex/test/core/MathFunctionsTest.cc +++ b/tuplex/test/core/MathFunctionsTest.cc @@ -695,3 +695,86 @@ TEST_F(MathFunctionsTest, MathPow) { python::lockGIL(); python::closeInterpreter(); } + +TEST_F(MathFunctionsTest, TEST) { + using namespace std; + using namespace tuplex; + + python::initInterpreter(); + python::unlockGIL(); + + Context c(microTestOptions()); + ClosureEnvironment ce; + ce.importModuleAs("math", "math"); + + auto v1 = c.parallelize({ + Row(25, 0.5), Row(3, -2), Row(-4.0, 3.0), Row(-5, -4) + }).map(UDF("lambda x, y: math.pow(x, y)", "", ce)).collectAsVector(); + EXPECT_EQ(v1.size(), 4); + EXPECT_DOUBLE_EQ(v1[0].getDouble(0), 5.0); + EXPECT_DOUBLE_EQ(v1[1].getDouble(0), pow(3.0, -2.0)); + EXPECT_DOUBLE_EQ(v1[2].getDouble(0), -64.0); + EXPECT_DOUBLE_EQ(v1[3].getDouble(0), pow(-5.0, -4.0)); + + auto v2 = c.parallelize({ + Row(0), Row(1.0), Row(4.0), Row(16) + }).map(UDF("lambda x: math.sqrt(x)", "", ce)).collectAsVector(); + EXPECT_EQ(v2.size(), 4); + EXPECT_DOUBLE_EQ(v2[0].getDouble(0), 0.0); + EXPECT_DOUBLE_EQ(v2[1].getDouble(0), 1.0); + EXPECT_DOUBLE_EQ(v2[2].getDouble(0), 2.0); + EXPECT_DOUBLE_EQ(v2[3].getDouble(0), 4.0); + + python::lockGIL(); + python::closeInterpreter(); +} + +// TEST_F(MathFunctionsTest, MathIsInf) { +// using namespace std; +// using namespace tuplex; + +// python::initInterpreter(); +// python::unlockGIL(); + +// Context c(microTestOptions()); +// ClosureEnvironment ce; +// ce.importModuleAs("math", "math"); + +// auto v1 = c.parallelize({ +// Row(0.0), Row(1.0), Row(-1.0), Row(-INFINITY), Row(3.0), Row(INFINITY) +// }).map(UDF("lambda x: math.isinf(x)", "", ce)).collectAsVector(); + +// EXPECT_EQ(v1.size(), 6); +// EXPECT_EQ(v1[0].getBool(0), false); +// EXPECT_EQ(v1[1].getBool(0), false); +// EXPECT_EQ(v1[2].getBool(0), false); +// EXPECT_EQ(v1[3].getBool(0), true); +// EXPECT_EQ(v1[4].getBool(0), false); +// EXPECT_EQ(v1[5].getBool(0), true); + +// auto v2 = c.parallelize({ +// Row(1), Row(-INFINITY), Row(-1), Row(0), Row(INFINITY) +// }).map(UDF("lambda x: math.isinf(x)", "", ce)).collectAsVector(); + +// EXPECT_EQ(v2.size(), 5); +// EXPECT_EQ(v2[0].getBool(0), false); +// EXPECT_EQ(v2[1].getBool(0), true); +// EXPECT_EQ(v2[2].getBool(0), false); +// EXPECT_EQ(v2[3].getBool(0), false); +// EXPECT_EQ(v2[4].getBool(0), true); + +// auto v3 = c.parallelize({ +// Row(-1), Row(INFINITY), Row(1.5), Row(NAN), Row(-INFINITY), Row(0.0) +// }).map(UDF("lambda x: math.isinf(x)", "", ce)).collectAsVector(); + +// EXPECT_EQ(v3.size(), 6); +// EXPECT_DOUBLE_EQ(v3[0].getBool(0), false); +// EXPECT_DOUBLE_EQ(v3[1].getBool(0), true); +// EXPECT_DOUBLE_EQ(v3[2].getBool(0), false); +// EXPECT_DOUBLE_EQ(v3[3].getBool(0), false); +// EXPECT_DOUBLE_EQ(v3[4].getBool(0), true); +// EXPECT_DOUBLE_EQ(v3[5].getBool(0), false); + +// python::lockGIL(); +// python::closeInterpreter(); +// } From 8128020f1b44e6ff896fd0df56c84f31749cc6d4 Mon Sep 17 00:00:00 2001 From: Rhea Goyal Date: Thu, 30 Jun 2022 15:36:46 -0400 Subject: [PATCH 04/23] Revert "added new tests (for already existing functions)" This reverts commit 7c0984e4724d613475afc26136fe3ad43ddbc271. --- tuplex/python/tests/test_math.py | 100 +------------------------- tuplex/test/core/MathFunctionsTest.cc | 89 ++--------------------- 2 files changed, 6 insertions(+), 183 deletions(-) diff --git a/tuplex/python/tests/test_math.py b/tuplex/python/tests/test_math.py index 8763391fc..f60750d17 100644 --- a/tuplex/python/tests/test_math.py +++ b/tuplex/python/tests/test_math.py @@ -299,6 +299,7 @@ def testExpm1(self): assert L_bool[1] == math.expm1(False) + def testPow(self): c = tuplex.Context(self.conf) @@ -356,101 +357,4 @@ def testPow(self): assert L_bool[0] == math.pow(True, False) assert L_bool[1] == math.pow(True, True) assert L_bool[2] == math.pow(False, True) - assert L_bool[3] == math.pow(False, False) - - - def testTEST(self): - c = tuplex.Context(self.conf) - - pow_test = [(25, 0.5), (3, -2), (-4.0, 3.0), (-5, -4)] - c.parallelize(pow_test).map(lambda x, y: math.pow(x, y)).collect() - assert len(pow_test) == 4, 'wrong length' - self.assertAlmostEqual(pow_test[0], 5.0) - self.assertAlmostEqual(pow_test[1], -1.0 / 9.0) - self.assertAlmostEqual(pow_test[2], -64.0) - self.assertAlmostEqual(pow_test[3], 1.0 / 625.0) - - sqrt_test = [0, 1.0, 4.0, 16] - c.parallelize(sqrt_test).map(lambda x: math.sqrt(x)).collect() - assert len(sqrt_test) == 4, 'wrong length' - self.assertAlmostEqual(sqrt_test[0], 0.0) - self.assertAlmostEqual(sqrt_test[1], 1.0) - self.assertAlmostEqual(sqrt_test[2], 2.0) - self.assertAlmostEqual(sqrt_test[3], 4.0) - - - # def testIsInf(self): - # c = tuplex.Context(self.conf) - - # float_test = [0.0, 1.0, -1.0, -math.inf, 3.0, math.inf] - # L0 = c.parallelize(float_test).map(lambda x: math.isinf(x)).collect() - # assert len(L0) == 6, 'wrong length' - # self.assertEqual(L0[0], False) - # self.assertEqual(L0[1], False) - # self.assertEqual(L0[2], False) - # self.assertEqual(L0[3], True) - # self.assertEqual(L0[4], False) - # self.assertEqual(L0[5], True) - - # tuple_test = [(1.0, math.inf), (-math.inf, 0.0), (-math.inf, math.inf), (-2.0, 0.0)] - # L1 = c.parallelize(tuple_test).map(lambda x, y: (math.isinf(x), math.isinf(y))).collect() - # assert len(L1) == 4, 'wrong length' - # self.assertEqual(L1[0], (False, True)) - # self.assertEqual(L1[1], (True, False)) - # self.assertEqual(L1[2], (True, True)) - # self.assertEqual(L1[3], (False, False)) - - # int_test = [1, -math.inf, -1, 0, math.inf] - # L2 = c.parallelize(int_test).map(lambda x: math.isinf(x)).collect() - # assert len(L2) == 5, 'wrong length' - # self.assertEqual(L2[0], False) - # self.assertEqual(L2[1], True) - # self.assertEqual(L2[2], False) - # self.assertEqual(L2[3], False) - # self.assertEqual(L2[4], True) - - # mix_test = [-1, math.inf, 1.5, math.nan, -math.inf, 0.0] - # L3 = c.parallelize(mix_test).map(lambda x, y: math.pow(x, y)).collect() - # assert len(L3) == 6 - # self.assertEqual(L3[0], False) - # self.assertEqual(L3[1], True) - # self.assertEqual(L3[2], False) - # self.assertEqual(L3[3], False) - # self.assertEqual(L3[4], True) - # self.assertEqual(L3[5], False) - - - # def testIsNan(self): - # c = tuplex.Context(self.conf) - - # test0 = [0.0, math.nan, -3.5, -math.inf] - # L0 = c.parallelize(test0).map(lambda x: math.isnan(x)).collect() - # assert len(L0) == 4, 'wrong length' - # self.assertEqual(L0[0], False) - # self.assertEqual(L0[1], True) - # self.assertEqual(L0[2], False) - # self.assertEqual(L0[3], False) - - # test1 = [0, -1, math.nan, math.inf, 97] - # L1 = c.parallelize(test1).map(lambda x: math.isnan(x)).collect() - # assert len(L1) == 5, 'wrong length' - # self.assertEqual(L1[0], False) - # self.assertEqual(L1[1], False) - # self.assertEqual(L1[2], True) - # self.assertEqual(L1[3], False) - # self.assertEqual(L1[4], False) - - # test2 = [math.nan, 0, -math.inf, -1.5, math.nan, 97] - # L2 = c.parallelize(test2).map(lambda x: math.isnan(x)).collect() - # assert len(L2) == 6, 'wrong length' - # self.assertEqual(L2[0], True) - # self.assertEqual(L2[1], False) - # self.assertEqual(L2[2], False) - # self.assertEqual(L2[3], False) - # self.assertEqual(L2[4], True) - # self.assertEqual(L2[4], False) - - - # def testIsClose(self): - # c = tuplex.Context(self.conf) - + assert L_bool[3] == math.pow(False, False) \ No newline at end of file diff --git a/tuplex/test/core/MathFunctionsTest.cc b/tuplex/test/core/MathFunctionsTest.cc index 5be4b6ef4..ed81a0eeb 100644 --- a/tuplex/test/core/MathFunctionsTest.cc +++ b/tuplex/test/core/MathFunctionsTest.cc @@ -627,6 +627,8 @@ TEST_F(MathFunctionsTest, MathAsin) { python::closeInterpreter(); } + + TEST_F(MathFunctionsTest, MathPow) { using namespace std; using namespace tuplex; @@ -675,7 +677,7 @@ TEST_F(MathFunctionsTest, MathPow) { Row(2), Row(1), Row(-1), Row(-2), Row(0) }).map(UDF("lambda y: math.pow(y, 5)", "", ce)).collectAsVector(); - EXPECT_EQ(v4.size(), 5); + EXPECT_EQ(v2.size(), 5); EXPECT_DOUBLE_EQ(v4[0].getDouble(0), 32.0); EXPECT_DOUBLE_EQ(v4[1].getDouble(0), 1.0); EXPECT_DOUBLE_EQ(v4[2].getDouble(0), -1.0); @@ -694,87 +696,4 @@ TEST_F(MathFunctionsTest, MathPow) { python::lockGIL(); python::closeInterpreter(); -} - -TEST_F(MathFunctionsTest, TEST) { - using namespace std; - using namespace tuplex; - - python::initInterpreter(); - python::unlockGIL(); - - Context c(microTestOptions()); - ClosureEnvironment ce; - ce.importModuleAs("math", "math"); - - auto v1 = c.parallelize({ - Row(25, 0.5), Row(3, -2), Row(-4.0, 3.0), Row(-5, -4) - }).map(UDF("lambda x, y: math.pow(x, y)", "", ce)).collectAsVector(); - EXPECT_EQ(v1.size(), 4); - EXPECT_DOUBLE_EQ(v1[0].getDouble(0), 5.0); - EXPECT_DOUBLE_EQ(v1[1].getDouble(0), pow(3.0, -2.0)); - EXPECT_DOUBLE_EQ(v1[2].getDouble(0), -64.0); - EXPECT_DOUBLE_EQ(v1[3].getDouble(0), pow(-5.0, -4.0)); - - auto v2 = c.parallelize({ - Row(0), Row(1.0), Row(4.0), Row(16) - }).map(UDF("lambda x: math.sqrt(x)", "", ce)).collectAsVector(); - EXPECT_EQ(v2.size(), 4); - EXPECT_DOUBLE_EQ(v2[0].getDouble(0), 0.0); - EXPECT_DOUBLE_EQ(v2[1].getDouble(0), 1.0); - EXPECT_DOUBLE_EQ(v2[2].getDouble(0), 2.0); - EXPECT_DOUBLE_EQ(v2[3].getDouble(0), 4.0); - - python::lockGIL(); - python::closeInterpreter(); -} - -// TEST_F(MathFunctionsTest, MathIsInf) { -// using namespace std; -// using namespace tuplex; - -// python::initInterpreter(); -// python::unlockGIL(); - -// Context c(microTestOptions()); -// ClosureEnvironment ce; -// ce.importModuleAs("math", "math"); - -// auto v1 = c.parallelize({ -// Row(0.0), Row(1.0), Row(-1.0), Row(-INFINITY), Row(3.0), Row(INFINITY) -// }).map(UDF("lambda x: math.isinf(x)", "", ce)).collectAsVector(); - -// EXPECT_EQ(v1.size(), 6); -// EXPECT_EQ(v1[0].getBool(0), false); -// EXPECT_EQ(v1[1].getBool(0), false); -// EXPECT_EQ(v1[2].getBool(0), false); -// EXPECT_EQ(v1[3].getBool(0), true); -// EXPECT_EQ(v1[4].getBool(0), false); -// EXPECT_EQ(v1[5].getBool(0), true); - -// auto v2 = c.parallelize({ -// Row(1), Row(-INFINITY), Row(-1), Row(0), Row(INFINITY) -// }).map(UDF("lambda x: math.isinf(x)", "", ce)).collectAsVector(); - -// EXPECT_EQ(v2.size(), 5); -// EXPECT_EQ(v2[0].getBool(0), false); -// EXPECT_EQ(v2[1].getBool(0), true); -// EXPECT_EQ(v2[2].getBool(0), false); -// EXPECT_EQ(v2[3].getBool(0), false); -// EXPECT_EQ(v2[4].getBool(0), true); - -// auto v3 = c.parallelize({ -// Row(-1), Row(INFINITY), Row(1.5), Row(NAN), Row(-INFINITY), Row(0.0) -// }).map(UDF("lambda x: math.isinf(x)", "", ce)).collectAsVector(); - -// EXPECT_EQ(v3.size(), 6); -// EXPECT_DOUBLE_EQ(v3[0].getBool(0), false); -// EXPECT_DOUBLE_EQ(v3[1].getBool(0), true); -// EXPECT_DOUBLE_EQ(v3[2].getBool(0), false); -// EXPECT_DOUBLE_EQ(v3[3].getBool(0), false); -// EXPECT_DOUBLE_EQ(v3[4].getBool(0), true); -// EXPECT_DOUBLE_EQ(v3[5].getBool(0), false); - -// python::lockGIL(); -// python::closeInterpreter(); -// } +} \ No newline at end of file From 4967b98d9c70716ca05224dc8473a13765347fca Mon Sep 17 00:00:00 2001 From: Rhea Goyal Date: Wed, 13 Jul 2022 12:18:39 -0400 Subject: [PATCH 05/23] moved changes to new branch --- tuplex/codegen/include/AnnotatedAST.h | 2 +- tuplex/test/core/DictionaryFunctions.cc | 20 ++++++++ tuplex/test/core/DictionaryTyping.cc | 65 +++++++++++++++++++++++++ 3 files changed, 86 insertions(+), 1 deletion(-) create mode 100644 tuplex/test/core/DictionaryTyping.cc diff --git a/tuplex/codegen/include/AnnotatedAST.h b/tuplex/codegen/include/AnnotatedAST.h index 47c85f830..90e574de8 100644 --- a/tuplex/codegen/include/AnnotatedAST.h +++ b/tuplex/codegen/include/AnnotatedAST.h @@ -177,7 +177,7 @@ namespace tuplex { /*! * annotates the tree with final types. If this is not possible, returns false - * @param pokicy compiler policy + * @param policy compiler policy * @param silentMode determines whether the type inference should log out problems or not * @param removeBranches whether to use RemoveDeadBranchesVisitor to prune AST * @return whether types could be successfully annotated/defined for all AST nodes diff --git a/tuplex/test/core/DictionaryFunctions.cc b/tuplex/test/core/DictionaryFunctions.cc index 955014748..01e63533e 100644 --- a/tuplex/test/core/DictionaryFunctions.cc +++ b/tuplex/test/core/DictionaryFunctions.cc @@ -13,6 +13,8 @@ #include "../../utils/include/Utils.h" #include "TestUtils.h" #include "RuntimeInterface.h" +#include +#include // need for these tests a running python interpreter, so spin it up class DictionaryFunctions : public PyTest {}; @@ -496,4 +498,22 @@ TEST_F(DictionaryFunctions, EmptyDict) { // .pop(val) KeyError // ==> left for later testing because it's a bit more complicated... #warning "implement fast, special functions for empty dict..." +} + +TEST_F(DictionaryFunctions, DictCount) { + using namespace tuplex; + auto code = "def count(L):\n" + " d = {}\n" + " for x in L:\n" + " if x not in d.keys():\n" + " d[x] = 0\n" + " d[x] += 1\n" + " return d"; + + auto root = std::unique_ptr(parseToAST(code)); + EXPECT_TRUE(root.get()); + + GraphVizGraph graph; + graph.createFromAST(root.get(), true); + graph.saveAsPDF("/home/rgoyal6/tuplex/tuplex/build/dict_count.pdf"); } \ No newline at end of file diff --git a/tuplex/test/core/DictionaryTyping.cc b/tuplex/test/core/DictionaryTyping.cc new file mode 100644 index 000000000..1fdccab1e --- /dev/null +++ b/tuplex/test/core/DictionaryTyping.cc @@ -0,0 +1,65 @@ +//--------------------------------------------------------------------------------------------------------------------// +// // +// Tuplex: Blazing Fast Python Data Science // +// // +// // +// (c) 2017 - 2021, Tuplex team // +// Created by Leonhard Spiegelberg first on 1/1/2021 // +// License: Apache 2.0 // +//--------------------------------------------------------------------------------------------------------------------// + +#include +#include +#include +#include +#include +#include +#include +#include + +// classes to work with: +// type annotator visitor +// trace visitor + +TEST(DictionaryTyping, Count) { + using namespace tuplex; + using namespace std; + + // // test count UDF + // auto count_c = "def count(L):\n" + // " d = {}\n" + // " for x in L:\n" + // " if x not in d.keys():\n" + // " d[x] = 0\n" + // " d[x] += 1\n" + // " return d"; + + // test simple UDF + auto count_c = "def f(L):\n" + " d = {}\n" + " k = L[0]\n" + " d[k] = 0\n" + " d[k] += 1\n" + " return d"; + + // parse code to AST + auto ast = tuplex::codegen::AnnotatedAST(); + ast.parseString(count_c); + + // make typing + python::Type inputType = python::Type::makeListType(python::Type::I64); + + // create symbol table + ast.addTypeHint("L", inputType); + ast.defineTypes(codegen::DEFAULT_COMPILE_POLICY); + + // print type annotated ast + GraphVizGraph graph; + graph.createFromAST(ast.getFunctionAST(), true); + graph.saveAsPDF("typed_ast.pdf"); + + cout<<"return type of function is: "<getInferredType().getReturnType(), python::Type::makeDictionaryType(python::Type::I64, python::Type::I64)); + +} \ No newline at end of file From 6cee06d177b543bb82ebf77080944d5c294960df Mon Sep 17 00:00:00 2001 From: Rhea Goyal Date: Thu, 14 Jul 2022 19:16:07 -0400 Subject: [PATCH 06/23] added dict case to nassign; need to gdb --- tuplex/codegen/include/TypeAnnotatorVisitor.h | 1 + tuplex/codegen/src/TypeAnnotatorVisitor.cc | 65 ++++++++++++++++++- 2 files changed, 65 insertions(+), 1 deletion(-) diff --git a/tuplex/codegen/include/TypeAnnotatorVisitor.h b/tuplex/codegen/include/TypeAnnotatorVisitor.h index 922c5f1e2..8c4d94032 100644 --- a/tuplex/codegen/include/TypeAnnotatorVisitor.h +++ b/tuplex/codegen/include/TypeAnnotatorVisitor.h @@ -53,6 +53,7 @@ namespace tuplex { const TokenType tt, ASTNode* right, const python::Type& b); void assignHelper(NIdentifier *id, python::Type type); + void dictAssign(NSubscription* subscript, python::Type key_type, python::Type value_type); void checkRetType(python::Type t); /*! * Annotate iterator-related NCall with iterator-specific info diff --git a/tuplex/codegen/src/TypeAnnotatorVisitor.cc b/tuplex/codegen/src/TypeAnnotatorVisitor.cc index dd19474e7..ec0ca1dc1 100644 --- a/tuplex/codegen/src/TypeAnnotatorVisitor.cc +++ b/tuplex/codegen/src/TypeAnnotatorVisitor.cc @@ -1220,6 +1220,28 @@ namespace tuplex { _nameTable[id->_name] = type; } + void TypeAnnotatorVisitor::dictAssign(NSubscription* subscript, python::Type key_type, python::Type value_type) { + assert(subscript->_value->getInferredType().isDictionaryType()); + + NDictionary* dict = (NDictionary*)subscript->_value; + // not entirely sure what the below loop is for rn + // if(_ongoingLoopCount != 0 && !_loopTypeChange) { + // // we are now inside a loop; no type change detected yet + // // check potential type change during loops + // if(_nameTable.find(id->_name) != _nameTable.end() && type != _nameTable.at(id->_name)) { + // error("variable " + id->_name + " changed type during loop from " + _nameTable.at(id->_name).desc() + " to " + type.desc() + ", traced typing needed to determine if the type change is stable"); + // _loopTypeChange = true; + // } + // } + + // set dictionary's inferred type to be key_type -> value_type + // should maybe make a helper function for this? or does this count as the helper function... + dict->setInferredType(python::TypeFactory::instance().createOrGetDictionaryType(key_type, value_type)); + + // overwrite entry in nametable with new type (Q: how to do this for dictionaries?) + // _nameTable[dict->] = type; + } + void TypeAnnotatorVisitor::visit(NAssign *assign) { ApatheticVisitor::visit(assign); @@ -1277,8 +1299,49 @@ namespace tuplex { } else { error("bad type annotation in tuple assign"); } + } else if (assign->_target->type() == ASTNodeType::Subscription) { + NSubscription* subscript = (NSubscription*)assign->_target; + + assert(subscript->_value); + assert(subscript->_expression); + + auto type = subscript->_value->getInferredType(); + auto index_type = subscript->_expression->getInferredType(); + + // this is a null check operation. I.e. strip option from either type or index type + if (type.isOptionType()) + type = type.getReturnType(); + if (index_type.isOptionType()) + index_type = index_type.getReturnType(); + + // if object is dict-like, subscript must have a type compatible with mapping's key type + // question: the index is technically an expression: so we need to be able to handle multiple kinds of expressions? + // although, we don't really need to know what kind of expression the index is, we just need the resulting return type. + // is there an easy way to get this without having to check what kind of expression the index is? + + if (type == python::Type::EMPTYDICT) { + // if object is an empty dictionary, upcast empty dictionary to match type of requested subscript and value + // Q: do I need to check if the value being assigned is an iterator here? + dictAssign(subscript, index_type, assign->_value->getInferredType()); + } else if (python::Type::GENERICDICT == type) { + dictAssign(subscript, python::Type::PYOBJECT, python::Type::PYOBJECT); + } else if (type.isDictionaryType()) { + // if object is not an empty dictionary, check if dict's key type matches subscript type + // if they don't match, mark the dictionary as having type [PYOBJECT, PYOBJECT] + // and set a marker in the typeannotator that this function always triggers the interpreter fallback + // Q: how to do ^^ ? + dictAssign(subscript, python::Type::PYOBJECT, python::Type::PYOBJECT); + } else { + error("only assignment to dictionary subscriptions supported yet!"); + // if object is list-like, subscript must be an integer + // if subscript is negative, list-like object's length is added to subscript + // resulting subscript must be in range of object, then ask object to assign value to element/item at the subscript + } + + NDictionary* dict = (NDictionary*)subscript->_value; + } else { - error("only assignment to tuples/identifiers supported yet!!!"); + error("only assignment to tuples/identifiers/subscriptions supported yet!!!"); } // in all cases, set the type of the entire assign // TODO we def want this in the single identifier case, but in general? From 57a36b172229c626f6b5afa4a20f46524e2fc5f4 Mon Sep 17 00:00:00 2001 From: Rhea Goyal Date: Fri, 15 Jul 2022 12:58:26 -0400 Subject: [PATCH 07/23] not sure what value_type needs to be for recursive case --- tuplex/codegen/include/TypeAnnotatorVisitor.h | 5 + tuplex/codegen/src/TypeAnnotatorVisitor.cc | 187 +++++++++--------- tuplex/test/core/DictionaryTyping.cc | 48 +++-- 3 files changed, 137 insertions(+), 103 deletions(-) diff --git a/tuplex/codegen/include/TypeAnnotatorVisitor.h b/tuplex/codegen/include/TypeAnnotatorVisitor.h index 8c4d94032..3c7adeef5 100644 --- a/tuplex/codegen/include/TypeAnnotatorVisitor.h +++ b/tuplex/codegen/include/TypeAnnotatorVisitor.h @@ -53,7 +53,12 @@ namespace tuplex { const TokenType tt, ASTNode* right, const python::Type& b); void assignHelper(NIdentifier *id, python::Type type); + void dictAssign(NSubscription* subscript, python::Type key_type, python::Type value_type); + bool is_nested_subscript_target(ASTNode* target); + void recursive_set_subscript_types(ASTNode* next_target, python::Type value_type); + + void checkRetType(python::Type t); /*! * Annotate iterator-related NCall with iterator-specific info diff --git a/tuplex/codegen/src/TypeAnnotatorVisitor.cc b/tuplex/codegen/src/TypeAnnotatorVisitor.cc index ec0ca1dc1..70c815dbf 100644 --- a/tuplex/codegen/src/TypeAnnotatorVisitor.cc +++ b/tuplex/codegen/src/TypeAnnotatorVisitor.cc @@ -1222,6 +1222,8 @@ namespace tuplex { void TypeAnnotatorVisitor::dictAssign(NSubscription* subscript, python::Type key_type, python::Type value_type) { assert(subscript->_value->getInferredType().isDictionaryType()); + // check what type the _value is + NDictionary* dict = (NDictionary*)subscript->_value; // not entirely sure what the below loop is for rn @@ -1242,110 +1244,111 @@ namespace tuplex { // _nameTable[dict->] = type; } - void TypeAnnotatorVisitor::visit(NAssign *assign) { - ApatheticVisitor::visit(assign); + bool TypeAnnotatorVisitor::is_nested_subscript_target(ASTNode* target) { + // check if target is a subscript target + return assign->_target->type() == ASTNodeType::Subscription; + } + + // note: "target" refers to the LHS of the assign (should be a subscription), and then + // the value of every subsequent subscription + void TypeAnnotatorVisitor::recursive_set_subscript_types(ASTNode* target, python::Type value_type) { + // if the target is not a subscription (should be an identifier/dictionary ?), then + // the next target should be an identifier + // check what type the identifier maps to + // error check if the type of the identifier is something subscriptable (for now, a dictionary) + // if type is subscriptable, then + // set the typing for the identifier to be index_type -> value_type + // if type of identifier is empty_dict, then we can just reset type (i.e. upcast dictionary) + // else if generic dict: type is still generic dict, and need to set flag in annotator? + // else: + // check if index_type matches current index type, if not upcast and set flag + + // otherwise if the target is a subscription + // do recursive_set_subscript_types on the next target, with value_type being ???? + + } + void TypeAnnotatorVisitor::visit(NAssign *assign) { // now interesting part comes // check what left side is - // TODO cases - /** - * id = id - * id, id, ... = id/val - * id, id, ... = id, val, ... (SPECIAL CASE even here for a, b = b, a) - */ - if(assign->_target->type() == ASTNodeType::Identifier) { - // Single identifier case - //@Todo: check that symbol table contains target! - - // then check if identifier is already within symbol table. If not, add! - NIdentifier* id = (NIdentifier*)assign->_target; - assignHelper(id, assign->_value->getInferredType()); - if(assign->_value->getInferredType().isIteratorType()) { - id->annotation().iteratorInfo = assign->_value->annotation().iteratorInfo; - _iteratorInfoTable[id->_name] = assign->_value->annotation().iteratorInfo; - } - } else if(assign->_target->type() == ASTNodeType::Tuple) { - // now we have a tuple assignment! - // the right hand side MUST be some unpackable thing. Currently this is a tuple but later we will - // have lists as well - NTuple *ids = (NTuple *) assign->_target; - auto rhsInferredType = assign->_value->getInferredType(); - // TODO add support for dictionaries, etc. - if (rhsInferredType.isTupleType()) { - // get the types contained in our tuple - std::vector tupleTypes = rhsInferredType.parameters(); - if(ids->_elements.size() != tupleTypes.size()) { - error("Incorrect number of arguments to unpack in assignment"); + // a[x][y][z][w] = b + // a[5 + x * 2] = b + + // could have assign single target helper + + if (is_nested_subscript_target(assign->_target)) { + // visit b's tree + assign->_value->accept(*this); + + auto value_type = assign->_value->getInferredType(); + + // recursively handle each subscription target + recursive_set_subscript_types(assign->_target, value_type); + + // set assign type to value type + assign->setInferredType(value_type); + } else { + ApatheticVisitor::visit(assign); + // TODO cases + /** + * id = id + * id, id, ... = id/val + * id, id, ... = id, val, ... (SPECIAL CASE even here for a, b = b, a) + */ + if(assign->_target->type() == ASTNodeType::Identifier) { + // Single identifier case + //@Todo: check that symbol table contains target! + + // then check if identifier is already within symbol table. If not, add! + NIdentifier* id = (NIdentifier*)assign->_target; + assignHelper(id, assign->_value->getInferredType()); + if(assign->_value->getInferredType().isIteratorType()) { + id->annotation().iteratorInfo = assign->_value->annotation().iteratorInfo; + _iteratorInfoTable[id->_name] = assign->_value->annotation().iteratorInfo; } + } else if(assign->_target->type() == ASTNodeType::Tuple) { + // now we have a tuple assignment! + // the right hand side MUST be some unpackable thing. Currently this is a tuple but later we will + // have lists as well + NTuple *ids = (NTuple *) assign->_target; + auto rhsInferredType = assign->_value->getInferredType(); + // TODO add support for dictionaries, etc. + if (rhsInferredType.isTupleType()) { + // get the types contained in our tuple + std::vector tupleTypes = rhsInferredType.parameters(); + if(ids->_elements.size() != tupleTypes.size()) { + error("Incorrect number of arguments to unpack in assignment"); + } - for(unsigned long i = 0; i < ids->_elements.size(); i ++) { - auto elt = ids->_elements[i]; - if(elt->type() != ASTNodeType::Identifier) { - error("Trying to assign to a non identifier in a tuple"); + for(unsigned long i = 0; i < ids->_elements.size(); i ++) { + auto elt = ids->_elements[i]; + if(elt->type() != ASTNodeType::Identifier) { + error("Trying to assign to a non identifier in a tuple"); + } + NIdentifier *id = (NIdentifier *) elt; + // assign each identifier to the type in the tuple at the corresponding index + assignHelper(id, tupleTypes[i]); } - NIdentifier *id = (NIdentifier *) elt; - // assign each identifier to the type in the tuple at the corresponding index - assignHelper(id, tupleTypes[i]); - } - } else if(rhsInferredType == python::Type::STRING) { - for(const auto& elt : ids->_elements) { - if(elt->type() != ASTNodeType::Identifier) { - error("Trying to assign to a non identifier in a tuple"); + } else if(rhsInferredType == python::Type::STRING) { + for(const auto& elt : ids->_elements) { + if(elt->type() != ASTNodeType::Identifier) { + error("Trying to assign to a non identifier in a tuple"); + } + NIdentifier *id = (NIdentifier *) elt; + assignHelper(id, python::Type::STRING); } - NIdentifier *id = (NIdentifier *) elt; - assignHelper(id, python::Type::STRING); + } else { + error("bad type annotation in tuple assign"); } } else { - error("bad type annotation in tuple assign"); - } - } else if (assign->_target->type() == ASTNodeType::Subscription) { - NSubscription* subscript = (NSubscription*)assign->_target; - - assert(subscript->_value); - assert(subscript->_expression); - - auto type = subscript->_value->getInferredType(); - auto index_type = subscript->_expression->getInferredType(); - - // this is a null check operation. I.e. strip option from either type or index type - if (type.isOptionType()) - type = type.getReturnType(); - if (index_type.isOptionType()) - index_type = index_type.getReturnType(); - - // if object is dict-like, subscript must have a type compatible with mapping's key type - // question: the index is technically an expression: so we need to be able to handle multiple kinds of expressions? - // although, we don't really need to know what kind of expression the index is, we just need the resulting return type. - // is there an easy way to get this without having to check what kind of expression the index is? - - if (type == python::Type::EMPTYDICT) { - // if object is an empty dictionary, upcast empty dictionary to match type of requested subscript and value - // Q: do I need to check if the value being assigned is an iterator here? - dictAssign(subscript, index_type, assign->_value->getInferredType()); - } else if (python::Type::GENERICDICT == type) { - dictAssign(subscript, python::Type::PYOBJECT, python::Type::PYOBJECT); - } else if (type.isDictionaryType()) { - // if object is not an empty dictionary, check if dict's key type matches subscript type - // if they don't match, mark the dictionary as having type [PYOBJECT, PYOBJECT] - // and set a marker in the typeannotator that this function always triggers the interpreter fallback - // Q: how to do ^^ ? - dictAssign(subscript, python::Type::PYOBJECT, python::Type::PYOBJECT); - } else { - error("only assignment to dictionary subscriptions supported yet!"); - // if object is list-like, subscript must be an integer - // if subscript is negative, list-like object's length is added to subscript - // resulting subscript must be in range of object, then ask object to assign value to element/item at the subscript + error("only assignment to tuples/identifiers supported yet!!!"); + // error("only assignment to tuples/identifiers/subscriptions supported yet!!!"); } - - NDictionary* dict = (NDictionary*)subscript->_value; - - } else { - error("only assignment to tuples/identifiers/subscriptions supported yet!!!"); + // in all cases, set the type of the entire assign + // TODO we def want this in the single identifier case, but in general? + assign->setInferredType(assign->_target->getInferredType()); } - // in all cases, set the type of the entire assign - // TODO we def want this in the single identifier case, but in general? - assign->setInferredType(assign->_target->getInferredType()); } void TypeAnnotatorVisitor::resolveNameConflicts(const std::unordered_map &table) { diff --git a/tuplex/test/core/DictionaryTyping.cc b/tuplex/test/core/DictionaryTyping.cc index 1fdccab1e..d076c3a07 100644 --- a/tuplex/test/core/DictionaryTyping.cc +++ b/tuplex/test/core/DictionaryTyping.cc @@ -21,27 +21,53 @@ // type annotator visitor // trace visitor -TEST(DictionaryTyping, Count) { +TEST(DictionaryTyping, Simple) { using namespace tuplex; using namespace std; - // // test count UDF - // auto count_c = "def count(L):\n" - // " d = {}\n" - // " for x in L:\n" - // " if x not in d.keys():\n" - // " d[x] = 0\n" - // " d[x] += 1\n" - // " return d"; - // test simple UDF - auto count_c = "def f(L):\n" + auto simple_c = "def f(L):\n" " d = {}\n" " k = L[0]\n" " d[k] = 0\n" " d[k] += 1\n" " return d"; + // parse code to AST + auto ast = tuplex::codegen::AnnotatedAST(); + ast.parseString(simple_c); + + // make typing + python::Type inputType = python::Type::makeListType(python::Type::I64); + + // create symbol table + ast.addTypeHint("L", inputType); + ast.defineTypes(codegen::DEFAULT_COMPILE_POLICY); + + // print type annotated ast + GraphVizGraph graph; + graph.createFromAST(ast.getFunctionAST(), true); + graph.saveAsPDF("typed_ast.pdf"); + + cout<<"return type of function is: "<getInferredType().getReturnType(), python::Type::makeDictionaryType(python::Type::I64, python::Type::I64)); + +} + +TEST(DictionaryTyping, Count) { + using namespace tuplex; + using namespace std; + + // test count UDF + auto count_c = "def count(L):\n" + " d = {}\n" + " for x in L:\n" + " if x not in d.keys():\n" + " d[x] = 0\n" + " d[x] += 1\n" + " return d"; + // parse code to AST auto ast = tuplex::codegen::AnnotatedAST(); ast.parseString(count_c); From 42a2715c2e7fcba470d09306adb49528141a240d Mon Sep 17 00:00:00 2001 From: Rhea Goyal Date: Mon, 18 Jul 2022 14:51:20 -0400 Subject: [PATCH 08/23] typing working for simple function --- tuplex/codegen/include/TypeAnnotatorVisitor.h | 2 +- tuplex/codegen/src/TypeAnnotatorVisitor.cc | 96 +++++++++++-------- tuplex/test/core/DictionaryTyping.cc | 2 +- 3 files changed, 57 insertions(+), 43 deletions(-) diff --git a/tuplex/codegen/include/TypeAnnotatorVisitor.h b/tuplex/codegen/include/TypeAnnotatorVisitor.h index 3c7adeef5..caa2093d0 100644 --- a/tuplex/codegen/include/TypeAnnotatorVisitor.h +++ b/tuplex/codegen/include/TypeAnnotatorVisitor.h @@ -56,7 +56,7 @@ namespace tuplex { void dictAssign(NSubscription* subscript, python::Type key_type, python::Type value_type); bool is_nested_subscript_target(ASTNode* target); - void recursive_set_subscript_types(ASTNode* next_target, python::Type value_type); + void recursive_set_subscript_types(NSubscription* target, python::Type value_type); void checkRetType(python::Type t); diff --git a/tuplex/codegen/src/TypeAnnotatorVisitor.cc b/tuplex/codegen/src/TypeAnnotatorVisitor.cc index 70c815dbf..0d9028194 100644 --- a/tuplex/codegen/src/TypeAnnotatorVisitor.cc +++ b/tuplex/codegen/src/TypeAnnotatorVisitor.cc @@ -787,7 +787,7 @@ namespace tuplex { auto func_type = python::Type::makeFunctionType(python::Type::makeTupleType(param_types), ret_type); call->_func->setInferredType(func_type); } else { - fatal_error("Could not infer typing for callable " + name); + fatal_error("Could not infer typing for callable " + name); //$$ } } @@ -1220,52 +1220,62 @@ namespace tuplex { _nameTable[id->_name] = type; } - void TypeAnnotatorVisitor::dictAssign(NSubscription* subscript, python::Type key_type, python::Type value_type) { - assert(subscript->_value->getInferredType().isDictionaryType()); - // check what type the _value is - - - NDictionary* dict = (NDictionary*)subscript->_value; - // not entirely sure what the below loop is for rn - // if(_ongoingLoopCount != 0 && !_loopTypeChange) { - // // we are now inside a loop; no type change detected yet - // // check potential type change during loops - // if(_nameTable.find(id->_name) != _nameTable.end() && type != _nameTable.at(id->_name)) { - // error("variable " + id->_name + " changed type during loop from " + _nameTable.at(id->_name).desc() + " to " + type.desc() + ", traced typing needed to determine if the type change is stable"); - // _loopTypeChange = true; - // } - // } - - // set dictionary's inferred type to be key_type -> value_type - // should maybe make a helper function for this? or does this count as the helper function... - dict->setInferredType(python::TypeFactory::instance().createOrGetDictionaryType(key_type, value_type)); - - // overwrite entry in nametable with new type (Q: how to do this for dictionaries?) - // _nameTable[dict->] = type; - } - bool TypeAnnotatorVisitor::is_nested_subscript_target(ASTNode* target) { // check if target is a subscript target - return assign->_target->type() == ASTNodeType::Subscription; + return target->type() == ASTNodeType::Subscription; } // note: "target" refers to the LHS of the assign (should be a subscription), and then // the value of every subsequent subscription - void TypeAnnotatorVisitor::recursive_set_subscript_types(ASTNode* target, python::Type value_type) { - // if the target is not a subscription (should be an identifier/dictionary ?), then - // the next target should be an identifier - // check what type the identifier maps to - // error check if the type of the identifier is something subscriptable (for now, a dictionary) - // if type is subscriptable, then - // set the typing for the identifier to be index_type -> value_type - // if type of identifier is empty_dict, then we can just reset type (i.e. upcast dictionary) - // else if generic dict: type is still generic dict, and need to set flag in annotator? - // else: - // check if index_type matches current index type, if not upcast and set flag - - // otherwise if the target is a subscription - // do recursive_set_subscript_types on the next target, with value_type being ???? + void TypeAnnotatorVisitor::recursive_set_subscript_types(NSubscription* target, python::Type value_type) { + target->_expression->accept(*this); + python::Type index_type = target->_expression->getInferredType(); + python::Type new_value_type = python::TypeFactory::instance().createOrGetDictionaryType(index_type, value_type); + + if (target->_value->type() == ASTNodeType::Subscription) { + /* if the next target is a subscription, do recursive_set_subscript_types + on the next target, with value_type being Dict[index_type, value_type] */ + // Q: do I need to set intermediate types? e.g. for a[x][y][z] do I need to set the type for a[x][y]? (don't think there would be anywhere to rewrite in the nametable...) + recursive_set_subscript_types((NSubscription*)target->_value, new_value_type); + } else if (target->_value->type() == ASTNodeType::Identifier) { + // if the next target is an identifier (e.g. d[0]) + NIdentifier* id = (NIdentifier*)target->_value; + // check if the type the identifier maps to is something subscriptable (for now, a dictionary) + // could use _nameTable[id->_name].isIterableType() ? + // No - tuples can't have element assignment, and each type that can needs to be handled differently + if (_nameTable[id->_name].isDictionaryType()) { + python::Type curr_type = _nameTable[id->_name]; + + if (curr_type == python::Type::EMPTYDICT) { + // we can just upcast type to Dict[index_type, value_type] + assignHelper(id, new_value_type); + } else if (curr_type == python::Type::GENERICDICT) { + // type remains generic dict (and need to set flag in annotator?) + // Q: Do I need to do anything in this branch? + // assignHelper(python::Type::PYOBJECT, python::Type::PYOBJECT); + } else { + // check if index_type and new_value_type match current index type and value type + if (curr_type.keyType() != index_type) { + // upcast index type to PYOBJECT and set flag + index_type = python::Type::PYOBJECT; + } + + if (curr_type.valueType() != value_type) { + // upcast value type to PYOBJECT and set flag + new_value_type = python::TypeFactory::instance().createOrGetDictionaryType(index_type, python::Type::PYOBJECT); + } + assignHelper(id, new_value_type); + } + } else { + // otherwise, raise an error (identifier not subscriptable) + error("cannot index into type " + _nameTable[id->_name].desc()); + } + } else { + // otherwise, need to check if final type of expression is something subscriptable + // TODO: not really sure how to do this case + // else: raise error (can't subscript type) + } } void TypeAnnotatorVisitor::visit(NAssign *assign) { @@ -1278,13 +1288,17 @@ namespace tuplex { // could have assign single target helper if (is_nested_subscript_target(assign->_target)) { + assert(assign->_target->type() == ASTNodeType::Subscription); + + NSubscription* sub_node = (NSubscription*) assign->_target; + // visit b's tree assign->_value->accept(*this); auto value_type = assign->_value->getInferredType(); // recursively handle each subscription target - recursive_set_subscript_types(assign->_target, value_type); + recursive_set_subscript_types(sub_node, value_type); // set assign type to value type assign->setInferredType(value_type); diff --git a/tuplex/test/core/DictionaryTyping.cc b/tuplex/test/core/DictionaryTyping.cc index d076c3a07..d9c4a1108 100644 --- a/tuplex/test/core/DictionaryTyping.cc +++ b/tuplex/test/core/DictionaryTyping.cc @@ -38,7 +38,7 @@ TEST(DictionaryTyping, Simple) { ast.parseString(simple_c); // make typing - python::Type inputType = python::Type::makeListType(python::Type::I64); + python::Type inputType = python::Type::makeListType(python::Type::F64); // create symbol table ast.addTypeHint("L", inputType); From 601ed05d45c636a01c2a7ae3c44c6395d03b7565 Mon Sep 17 00:00:00 2001 From: Rhea Goyal Date: Wed, 20 Jul 2022 10:06:03 -0400 Subject: [PATCH 09/23] AST fully typed for simple case --- tuplex/codegen/include/TypeAnnotatorVisitor.h | 2 - tuplex/codegen/src/TypeAnnotatorVisitor.cc | 46 ++-- tuplex/test/core/DictionaryTyping.cc | 207 +++++++++++++++++- tuplex/utils/include/TypeSystem.h | 4 + 4 files changed, 231 insertions(+), 28 deletions(-) diff --git a/tuplex/codegen/include/TypeAnnotatorVisitor.h b/tuplex/codegen/include/TypeAnnotatorVisitor.h index caa2093d0..298effefb 100644 --- a/tuplex/codegen/include/TypeAnnotatorVisitor.h +++ b/tuplex/codegen/include/TypeAnnotatorVisitor.h @@ -54,11 +54,9 @@ namespace tuplex { const python::Type& b); void assignHelper(NIdentifier *id, python::Type type); - void dictAssign(NSubscription* subscript, python::Type key_type, python::Type value_type); bool is_nested_subscript_target(ASTNode* target); void recursive_set_subscript_types(NSubscription* target, python::Type value_type); - void checkRetType(python::Type t); /*! * Annotate iterator-related NCall with iterator-specific info diff --git a/tuplex/codegen/src/TypeAnnotatorVisitor.cc b/tuplex/codegen/src/TypeAnnotatorVisitor.cc index 0d9028194..f0b1bd212 100644 --- a/tuplex/codegen/src/TypeAnnotatorVisitor.cc +++ b/tuplex/codegen/src/TypeAnnotatorVisitor.cc @@ -787,7 +787,7 @@ namespace tuplex { auto func_type = python::Type::makeFunctionType(python::Type::makeTupleType(param_types), ret_type); call->_func->setInferredType(func_type); } else { - fatal_error("Could not infer typing for callable " + name); //$$ + fatal_error("Could not infer typing for callable " + name); } } @@ -1230,29 +1230,31 @@ namespace tuplex { void TypeAnnotatorVisitor::recursive_set_subscript_types(NSubscription* target, python::Type value_type) { target->_expression->accept(*this); python::Type index_type = target->_expression->getInferredType(); - python::Type new_value_type = python::TypeFactory::instance().createOrGetDictionaryType(index_type, value_type); + python::Type new_value_type = python::Type::makeDictionaryType(index_type, value_type); if (target->_value->type() == ASTNodeType::Subscription) { /* if the next target is a subscription, do recursive_set_subscript_types - on the next target, with value_type being Dict[index_type, value_type] */ - // Q: do I need to set intermediate types? e.g. for a[x][y][z] do I need to set the type for a[x][y]? (don't think there would be anywhere to rewrite in the nametable...) + on the next target, with value_type being Dict[index_type, value_type] */ + // set type of subscription + // target->setInferredType(); recursive_set_subscript_types((NSubscription*)target->_value, new_value_type); } else if (target->_value->type() == ASTNodeType::Identifier) { // if the next target is an identifier (e.g. d[0]) NIdentifier* id = (NIdentifier*)target->_value; - // check if the type the identifier maps to is something subscriptable (for now, a dictionary) - // could use _nameTable[id->_name].isIterableType() ? - // No - tuples can't have element assignment, and each type that can needs to be handled differently + // check if the type the identifier maps to is something subscriptable (for now, just a dictionary) if (_nameTable[id->_name].isDictionaryType()) { python::Type curr_type = _nameTable[id->_name]; if (curr_type == python::Type::EMPTYDICT) { // we can just upcast type to Dict[index_type, value_type] assignHelper(id, new_value_type); + // set type of subscription: value_type + target->setInferredType(value_type); } else if (curr_type == python::Type::GENERICDICT) { // type remains generic dict (and need to set flag in annotator?) // Q: Do I need to do anything in this branch? // assignHelper(python::Type::PYOBJECT, python::Type::PYOBJECT); + target->setInferredType(python::Type::PYOBJECT); } else { // check if index_type and new_value_type match current index type and value type if (curr_type.keyType() != index_type) { @@ -1262,19 +1264,32 @@ namespace tuplex { if (curr_type.valueType() != value_type) { // upcast value type to PYOBJECT and set flag - new_value_type = python::TypeFactory::instance().createOrGetDictionaryType(index_type, python::Type::PYOBJECT); + new_value_type = python::Type::makeDictionaryType(index_type, python::Type::PYOBJECT); } assignHelper(id, new_value_type); + + if (curr_type.valueType() != value_type) { + // set subscript type to PYOBJECT + target->setInferredType(python::Type::PYOBJECT); + } else { + // set subscript type to value_type + target->setInferredType(value_type); + } } } else { - // otherwise, raise an error (identifier not subscriptable) - error("cannot index into type " + _nameTable[id->_name].desc()); + // otherwise, raise an error (identifier type not subscriptable) + error("only dictionary subscription supported; " + _nameTable[id->_name].desc() + " not (yet) supported"); } } else { - // otherwise, need to check if final type of expression is something subscriptable - // TODO: not really sure how to do this case + // otherwise, need to check if final type of expression is something subscriptable (just dictionary for now) // else: raise error (can't subscript type) + target->_value->accept(*this); + if (!target->_value->getInferredType().isDictionaryType()) { + error(target->_value->getInferredType().desc() + " is not (yet) subscriptable; only dictionaries supported"); + } + + // TODO: anything else here? } } @@ -1282,11 +1297,6 @@ namespace tuplex { // now interesting part comes // check what left side is - // a[x][y][z][w] = b - // a[5 + x * 2] = b - - // could have assign single target helper - if (is_nested_subscript_target(assign->_target)) { assert(assign->_target->type() == ASTNodeType::Subscription); @@ -1297,7 +1307,7 @@ namespace tuplex { auto value_type = assign->_value->getInferredType(); - // recursively handle each subscription target + // recursively set types for each subscription target recursive_set_subscript_types(sub_node, value_type); // set assign type to value type diff --git a/tuplex/test/core/DictionaryTyping.cc b/tuplex/test/core/DictionaryTyping.cc index d9c4a1108..b711864c7 100644 --- a/tuplex/test/core/DictionaryTyping.cc +++ b/tuplex/test/core/DictionaryTyping.cc @@ -17,10 +17,6 @@ #include #include -// classes to work with: -// type annotator visitor -// trace visitor - TEST(DictionaryTyping, Simple) { using namespace tuplex; using namespace std; @@ -32,7 +28,7 @@ TEST(DictionaryTyping, Simple) { " d[k] = 0\n" " d[k] += 1\n" " return d"; - + // parse code to AST auto ast = tuplex::codegen::AnnotatedAST(); ast.parseString(simple_c); @@ -47,12 +43,208 @@ TEST(DictionaryTyping, Simple) { // print type annotated ast GraphVizGraph graph; graph.createFromAST(ast.getFunctionAST(), true); - graph.saveAsPDF("typed_ast.pdf"); + graph.saveAsPDF("/home/rgoyal6/tuplex/tuplex/build/typed_ast.pdf"); + + cout<<"return type of function is: "< Dict[i64, Option[Dict[i64, i64]]] +} + +TEST(DictionaryTyping, IndexExpression) { + using namespace tuplex; + using namespace std; + + // a[2 * k + 1] = n + auto code = "def f(L):\n" + " d = {}\n" + " k = L[0]\n" + " d[2 * k + 1] = 0\n" + " return d"; + + // parse code to AST + auto ast = tuplex::codegen::AnnotatedAST(); + ast.parseString(code); + + // make typing + python::Type inputType = python::Type::makeListType(python::Type::I64); + + // create symbol table + ast.addTypeHint("L", inputType); + ast.defineTypes(codegen::DEFAULT_COMPILE_POLICY); + + // print type annotated ast + GraphVizGraph graph; + graph.createFromAST(ast.getFunctionAST(), true); + graph.saveAsPDF("/home/rgoyal6/tuplex/tuplex/build/typed_ast_1.pdf"); cout<<"return type of function is: "<getInferredType().getReturnType(), python::Type::makeDictionaryType(python::Type::I64, python::Type::I64)); +} + +TEST(DictionaryTyping, AttributeSubscripts) { + using namespace tuplex; + using namespace std; + + // how should I write tests that use classes + class attributes? + + // a.b[x] = n + auto code_1 = "def f(L):\n" + " d = {0: {0: 10, 1: 100}, 1: {0: 15, 1: 500}}\n" + " w = L[0]\n" + " x = L[1]\n" + " d[w][x] = 15\n" + " return d"; + // parse code to AST + auto ast_1 = tuplex::codegen::AnnotatedAST(); + ast_1.parseString(code_1); + + // make typing + python::Type inputType_1 = python::Type::makeListType(python::Type::I64); + + // create symbol table + ast_1.addTypeHint("L", inputType_1); + ast_1.defineTypes(codegen::DEFAULT_COMPILE_POLICY); + + // print type annotated ast + GraphVizGraph graph_1; + graph_1.createFromAST(ast_1.getFunctionAST(), true); + graph_1.saveAsPDF("typed_ast.pdf"); + + cout<<"return type of function is: "<getInferredType().getReturnType(), python::Type::makeDictionaryType(python::Type::I64, python::Type::I64)); - + ASSERT_EQ(ast.getReturnType(), python::Type::makeDictionaryType(python::Type::I64, python::Type::I64)); } \ No newline at end of file diff --git a/tuplex/utils/include/TypeSystem.h b/tuplex/utils/include/TypeSystem.h index 6861f24de..1035dff0c 100644 --- a/tuplex/utils/include/TypeSystem.h +++ b/tuplex/utils/include/TypeSystem.h @@ -18,6 +18,9 @@ #include #include +// need to define new type for d.keys (compound type - dictkeys) +// make createdictkeystype and createdictvaluestype + namespace python { class Type { @@ -345,6 +348,7 @@ namespace python { Type createOrGetFunctionType(const Type& param, const Type& ret=Type::EMPTYTUPLE); Type createOrGetDictionaryType(const Type& key, const Type& val); Type createOrGetListType(const Type& val); + // add support for dictkeys and dictvalues types Type createOrGetTupleType(const std::initializer_list args); Type createOrGetTupleType(const TTuple& args); From 891441ae870017348f65a5a48d0207fbe9e338c4 Mon Sep 17 00:00:00 2001 From: Rhea Goyal Date: Wed, 20 Jul 2022 14:51:56 -0400 Subject: [PATCH 10/23] added more tests/examples for dict typing --- tuplex/test/core/DictionaryTyping.cc | 491 ++++++++++++++++++++------- 1 file changed, 367 insertions(+), 124 deletions(-) diff --git a/tuplex/test/core/DictionaryTyping.cc b/tuplex/test/core/DictionaryTyping.cc index b711864c7..281880d32 100644 --- a/tuplex/test/core/DictionaryTyping.cc +++ b/tuplex/test/core/DictionaryTyping.cc @@ -17,21 +17,50 @@ #include #include +// TEST(DictionaryTyping, Template) { +// using namespace tuplex; +// using namespace std; + +// auto code = ""; + +// // parse code to AST +// auto ast = tuplex::codegen::AnnotatedAST(); +// ast.parseString(code); + +// // make input typing +// python::Type inputType = python::Type::PYOBJECT; + +// // create symbol table (add parameters and types) +// ast.addTypeHint("L", inputType); +// ast.defineTypes(codegen::DEFAULT_COMPILE_POLICY); + +// // print type annotated ast +// GraphVizGraph graph; +// graph.createFromAST(ast.getFunctionAST(), true); +// graph.saveAsPDF("/home/rgoyal6/tuplex/tuplex/build/dictionary_asts/.pdf"); + +// cout<<"return type of function is: "< Dict[i64, Option[Dict[i64, i64]]] } TEST(DictionaryTyping, IndexExpression) { using namespace tuplex; using namespace std; - // a[2 * k + 1] = n auto code = "def f(L):\n" " d = {}\n" " k = L[0]\n" @@ -77,174 +103,391 @@ TEST(DictionaryTyping, IndexExpression) { // print type annotated ast GraphVizGraph graph; graph.createFromAST(ast.getFunctionAST(), true); - graph.saveAsPDF("/home/rgoyal6/tuplex/tuplex/build/typed_ast_1.pdf"); + graph.saveAsPDF("/home/rgoyal6/tuplex/tuplex/build/dictionary_asts/index_exp_ast.pdf"); cout<<"return type of function is: "<getInferredType().getReturnType(), python::Type::makeDictionaryType(python::Type::I64, python::Type::I64)); + // check return type + ASSERT_EQ(ast.getReturnType(), expected_ret); } -TEST(DictionaryTyping, AttributeSubscripts) { +TEST(DictionaryTyping, NestedSubscriptMultiple) { using namespace tuplex; using namespace std; - // how should I write tests that use classes + class attributes? + auto code = "def f(L):\n" + " d = {0: {0: {0: {0: 10}, 1: {0: 1}}, 1: {0: {0: 15}, 1: {0: 2}}}, 1: {0: {0: {0: 20}, 1: {0: 0}}, 1: {0: {0: 19}, 1: {0: 4}}}}\n" + " w = L[0]\n" + " x = L[1]\n" + " y = L[2]\n" + " z = L[3]\n" + " d[w][x][y][z] = 60\n" + " return d"; + + // parse code to AST + auto ast = tuplex::codegen::AnnotatedAST(); + ast.parseString(code); - // a.b[x] = n - auto code_1 = "def f(L):\n" - " d = {0: {0: 10, 1: 100}, 1: {0: 15, 1: 500}}\n" - " w = L[0]\n" - " x = L[1]\n" - " d[w][x] = 15\n" - " return d"; + // make input typing + python::Type inputType = python::Type::makeListType(python::Type::I64); + // create symbol table (add parameters and types) + ast.addTypeHint("L", inputType); + ast.defineTypes(codegen::DEFAULT_COMPILE_POLICY); + + // print type annotated ast + GraphVizGraph graph; + graph.createFromAST(ast.getFunctionAST(), true); + graph.saveAsPDF("/home/rgoyal6/tuplex/tuplex/build/dictionary_asts/nested_sub_multiple.pdf"); + + cout<<"return type of function is: "< 5:\n" + " d[i % 2] += 5\n" + " else:\n" + " d[i % 2] += i\n" + " return d"; + + // parse code to AST + auto ast = tuplex::codegen::AnnotatedAST(); + ast.parseString(code); + + // make input typing + python::Type inputType = python::Type::makeListType(python::Type::I64); + + // create symbol table (add parameters and types) + ast.addTypeHint("L", inputType); + ast.defineTypes(codegen::DEFAULT_COMPILE_POLICY); + + // print type annotated ast + GraphVizGraph graph; + graph.createFromAST(ast.getFunctionAST(), true); + graph.saveAsPDF("/home/rgoyal6/tuplex/tuplex/build/dictionary_asts/control_flow_key_assign.pdf"); + + cout<<"return type of function is: "< Date: Fri, 22 Jul 2022 15:01:00 -0400 Subject: [PATCH 11/23] fixed typing for empty diicts and option case --- tuplex/codegen/src/TypeAnnotatorVisitor.cc | 63 ++++++----- tuplex/test/core/DictionaryTyping.cc | 117 ++++++++++++++++++++- tuplex/utils/include/TypeSystem.h | 23 +++- 3 files changed, 169 insertions(+), 34 deletions(-) diff --git a/tuplex/codegen/src/TypeAnnotatorVisitor.cc b/tuplex/codegen/src/TypeAnnotatorVisitor.cc index f0b1bd212..46c758d93 100644 --- a/tuplex/codegen/src/TypeAnnotatorVisitor.cc +++ b/tuplex/codegen/src/TypeAnnotatorVisitor.cc @@ -826,7 +826,7 @@ namespace tuplex { void TypeAnnotatorVisitor::visit(NDictionary* dict) { ApatheticVisitor::visit(dict); - + // Try to make it Dictionary[Key, Val] type (if every pair has the same key type and val type, respectively) bool is_key_val = true; python::Type keyType, valType; @@ -835,8 +835,20 @@ namespace tuplex { valType = dict->_pairs[0].second->getInferredType(); // save the key type, val type of the first pair for(const auto& p: dict->_pairs) { // check if every pair has the same key type, val type if(p.first->getInferredType() != keyType || p.second->getInferredType() != valType) { - is_key_val = false; // if they are not the same, then it is not of type Dictionary[Key, Val] - break; + // also for None case + if (valType.isDictionaryType() && p.second->getInferredType() == python::Type::EMPTYDICT) { + continue; + } else if (valType == python::Type::EMPTYDICT && p.second->getInferredType().isDictionaryType()) { + // upcast valType + valType = p.second->getInferredType(); + } else if (valType == python::Type::NULLVALUE) { + valType = python::Type::makeOptionType(p.second->getInferredType()); + } else if (p.second->getInferredType() == python::Type::NULLVALUE) { + valType = python::Type::makeOptionType(valType); + } else { + is_key_val = false; // if they are not the same, then it is not of type Dictionary[Key, Val] + break; + } } } @@ -1225,18 +1237,15 @@ namespace tuplex { return target->type() == ASTNodeType::Subscription; } - // note: "target" refers to the LHS of the assign (should be a subscription), and then - // the value of every subsequent subscription void TypeAnnotatorVisitor::recursive_set_subscript_types(NSubscription* target, python::Type value_type) { target->_expression->accept(*this); + python::Type subscription_type = value_type; python::Type index_type = target->_expression->getInferredType(); - python::Type new_value_type = python::Type::makeDictionaryType(index_type, value_type); + python::Type new_value_type = python::Type::makeDictionaryType(index_type, subscription_type); if (target->_value->type() == ASTNodeType::Subscription) { /* if the next target is a subscription, do recursive_set_subscript_types on the next target, with value_type being Dict[index_type, value_type] */ - // set type of subscription - // target->setInferredType(); recursive_set_subscript_types((NSubscription*)target->_value, new_value_type); } else if (target->_value->type() == ASTNodeType::Identifier) { // if the next target is an identifier (e.g. d[0]) @@ -1244,17 +1253,13 @@ namespace tuplex { // check if the type the identifier maps to is something subscriptable (for now, just a dictionary) if (_nameTable[id->_name].isDictionaryType()) { python::Type curr_type = _nameTable[id->_name]; - + if (curr_type == python::Type::EMPTYDICT) { // we can just upcast type to Dict[index_type, value_type] assignHelper(id, new_value_type); - // set type of subscription: value_type - target->setInferredType(value_type); } else if (curr_type == python::Type::GENERICDICT) { // type remains generic dict (and need to set flag in annotator?) - // Q: Do I need to do anything in this branch? - // assignHelper(python::Type::PYOBJECT, python::Type::PYOBJECT); - target->setInferredType(python::Type::PYOBJECT); + subscription_type = python::Type::PYOBJECT; } else { // check if index_type and new_value_type match current index type and value type if (curr_type.keyType() != index_type) { @@ -1263,19 +1268,24 @@ namespace tuplex { } if (curr_type.valueType() != value_type) { - // upcast value type to PYOBJECT and set flag - new_value_type = python::Type::makeDictionaryType(index_type, python::Type::PYOBJECT); + if (curr_type.valueType().isOptionType()) { + // case where dictionary values are nullable + // check if non-null option is the same type as value_type + if (curr_type.valueType().elementType() == value_type) { + // need to make subscription_type an option type instead + subscription_type = python::Type::makeOptionType(subscription_type); + } else { + // upcast value type to PYOBJECT and set flag + subscription_type = python::Type::PYOBJECT; + } + } else { + // upcast value type to PYOBJECT and set flag + subscription_type = python::Type::PYOBJECT; + } } + new_value_type = python::Type::makeDictionaryType(index_type, subscription_type); assignHelper(id, new_value_type); - - if (curr_type.valueType() != value_type) { - // set subscript type to PYOBJECT - target->setInferredType(python::Type::PYOBJECT); - } else { - // set subscript type to value_type - target->setInferredType(value_type); - } } } else { // otherwise, raise an error (identifier type not subscriptable) @@ -1288,9 +1298,10 @@ namespace tuplex { if (!target->_value->getInferredType().isDictionaryType()) { error(target->_value->getInferredType().desc() + " is not (yet) subscriptable; only dictionaries supported"); } - - // TODO: anything else here? } + + // set type of subscription node (target) + target->setInferredType(subscription_type); } void TypeAnnotatorVisitor::visit(NAssign *assign) { diff --git a/tuplex/test/core/DictionaryTyping.cc b/tuplex/test/core/DictionaryTyping.cc index 281880d32..5b6cfeeba 100644 --- a/tuplex/test/core/DictionaryTyping.cc +++ b/tuplex/test/core/DictionaryTyping.cc @@ -79,6 +79,108 @@ TEST(DictionaryTyping, Simple) { ASSERT_EQ(ast.getReturnType(), python::Type::makeDictionaryType(python::Type::F64, python::Type::I64)); } +TEST(DictionaryTyping, KeyTypeChange) { + using namespace tuplex; + using namespace std; + + auto code = "def f(L):\n" + " d = {}\n" + " d['a'] = L[0]\n" + " d[2] = L[1]\n" + " return d"; + + // parse code to AST + auto ast = tuplex::codegen::AnnotatedAST(); + ast.parseString(code); + + // make input typing + python::Type inputType = python::Type::makeListType(python::Type::I64); + + // create symbol table (add parameters and types) + ast.addTypeHint("L", inputType); + ast.defineTypes(codegen::DEFAULT_COMPILE_POLICY); + + // print type annotated ast + GraphVizGraph graph; + graph.createFromAST(ast.getFunctionAST(), true); + graph.saveAsPDF("/home/rgoyal6/tuplex/tuplex/build/dictionary_asts/key_type_change.pdf"); + + cout<<"return type of function is: "< baseClasses() const; + // TODO: add dict_keys and dict_values cases to below function /*! * retrieves vector of all types which are derived from this type * @return vector of type, may be empty. @@ -219,6 +233,8 @@ namespace python { static Type makeListType(const python::Type &elementType); + // TODO: add make...Type for dict_keys and dict_values + /*! * create iterator type from yieldType. * @param yieldType @@ -285,6 +301,7 @@ namespace python { CLASS, OPTION, // for nullable ITERATOR + // TODO: add dict_keys and dict_values types (?) }; struct TypeEntry { @@ -328,6 +345,7 @@ namespace python { bool isOptionType(const Type& t) const; bool isListType(const Type& t) const; bool isIteratorType(const Type& t) const; + // TODO: add is...Type functions for dict_keys and dict_values std::vector parameters(const Type& t) const; Type returnType(const Type& t) const; @@ -348,8 +366,7 @@ namespace python { Type createOrGetFunctionType(const Type& param, const Type& ret=Type::EMPTYTUPLE); Type createOrGetDictionaryType(const Type& key, const Type& val); Type createOrGetListType(const Type& val); - // add support for dictkeys and dictvalues types - + // TODO: add create...Type functions for dict_keys and dict_values Type createOrGetTupleType(const std::initializer_list args); Type createOrGetTupleType(const TTuple& args); Type createOrGetTupleType(const std::vector& args); From ce3709a571f80655fb98393470b377e0c71e7c42 Mon Sep 17 00:00:00 2001 From: Rhea Goyal Date: Tue, 26 Jul 2022 11:49:31 -0400 Subject: [PATCH 12/23] added dict_keys and dict_values types --- tuplex/codegen/src/SymbolTable.cc | 28 +++++++++++- tuplex/test/core/DictionaryTyping.cc | 2 +- tuplex/utils/include/TypeSystem.h | 37 +++++++--------- tuplex/utils/src/TypeSystem.cc | 64 ++++++++++++++++++++++++++-- 4 files changed, 104 insertions(+), 27 deletions(-) diff --git a/tuplex/codegen/src/SymbolTable.cc b/tuplex/codegen/src/SymbolTable.cc index 99f9bb9d6..9e4a629f1 100644 --- a/tuplex/codegen/src/SymbolTable.cc +++ b/tuplex/codegen/src/SymbolTable.cc @@ -409,6 +409,32 @@ namespace tuplex { // i.e. type depending on input + // typer function for dict.keys() and dict.values() + // this currently doesn't handle empty dicts... + std::vector all_types = {python::Type::BOOLEAN, python::Type::I64, python::Type::F64, + python::Type::STRING, python::Type::PYOBJECT}; + for (const auto &t1 : all_types) { + for (const auto &t2 : all_types) { + + auto dict_type = python::Type::makeDictionaryType(t1, t2); + + // create specialized dict type + auto dict_sym = std::make_shared(dict_type.desc(), "dictionary", t1, SymbolType::TYPE); + // add here symbol so other functions can be easily added. + addSymbol(dict_sym); + + // dict_keys + auto keys_sym = std::make_shared("keys", python::Type::makeFunctionType(python::Type::EMPTYTUPLE, python::Type::makeDictKeysType(dict_type.keyType()))); + dict_sym->addAttribute(keys_sym); + + // dict_keys + auto values_sym = std::make_shared("values", python::Type::makeFunctionType(python::Type::EMPTYTUPLE, python::Type::makeDictValuesType(dict_type.valueType()))); + dict_sym->addAttribute(values_sym); + } + } + + // addBuiltinTypeAttribute(python::Type::EMPTYDICT, "keys", python::Type::makeFunctionType(python::Type::EMPTYTUPLE, ???)); + // for pop/popitem things are actually a bit more complicated... // i.e. the default keyword may introduce an issue... // https://www.programiz.com/python-programming/methods/dictionary/pop @@ -477,7 +503,7 @@ namespace tuplex { python::Type::makeTupleType({dict_type.keyType(), dict_type.valueType()}))); } - + // for the weird case of the default object having different type than the dict value type, use tracing. // another good design for builtin functions could be: diff --git a/tuplex/test/core/DictionaryTyping.cc b/tuplex/test/core/DictionaryTyping.cc index 5b6cfeeba..f7e7aa097 100644 --- a/tuplex/test/core/DictionaryTyping.cc +++ b/tuplex/test/core/DictionaryTyping.cc @@ -624,7 +624,7 @@ TEST(DictionaryTyping, Count) { // print type annotated ast GraphVizGraph graph; graph.createFromAST(ast.getFunctionAST(), true); - graph.saveAsPDF("typed_ast.pdf"); + graph.saveAsPDF("/home/rgoyal6/tuplex/tuplex/build/dictionary_asts/dict_count.pdf"); cout<<"return type of function is: "< baseClasses() const; - // TODO: add dict_keys and dict_values cases to below function /*! * retrieves vector of all types which are derived from this type * @return vector of type, may be empty. @@ -233,7 +223,8 @@ namespace python { static Type makeListType(const python::Type &elementType); - // TODO: add make...Type for dict_keys and dict_values + static Type makeDictKeysType(const python::Type& keyType); + static Type makeDictValuesType(const python::Type& valType); /*! * create iterator type from yieldType. @@ -297,11 +288,12 @@ namespace python { FUNCTION, TUPLE, DICTIONARY, + DICT_KEYS, + DICT_VALUES, LIST, CLASS, OPTION, // for nullable ITERATOR - // TODO: add dict_keys and dict_values types (?) }; struct TypeEntry { @@ -341,12 +333,13 @@ namespace python { bool isFunctionType(const Type& t) const; bool isDictionaryType(const Type& t) const; + bool isDictKeysType(const Type& t); + bool isDictValuesType(const Type& t); bool isTupleType(const Type& t) const; bool isOptionType(const Type& t) const; bool isListType(const Type& t) const; bool isIteratorType(const Type& t) const; - // TODO: add is...Type functions for dict_keys and dict_values - + std::vector parameters(const Type& t) const; Type returnType(const Type& t) const; @@ -365,15 +358,15 @@ namespace python { // right now, no tuples or other weird types... Type createOrGetFunctionType(const Type& param, const Type& ret=Type::EMPTYTUPLE); Type createOrGetDictionaryType(const Type& key, const Type& val); + Type createOrGetDictKeysType(const Type& key); + Type createOrGetDictValuesType(const Type& val); Type createOrGetListType(const Type& val); - // TODO: add create...Type functions for dict_keys and dict_values Type createOrGetTupleType(const std::initializer_list args); Type createOrGetTupleType(const TTuple& args); Type createOrGetTupleType(const std::vector& args); Type createOrGetOptionType(const Type& type); Type createOrGetIteratorType(const Type& yieldType); - Type getByName(const std::string& name); // helper function to connect type system to codegen diff --git a/tuplex/utils/src/TypeSystem.cc b/tuplex/utils/src/TypeSystem.cc index 2fd3fe064..f38864999 100644 --- a/tuplex/utils/src/TypeSystem.cc +++ b/tuplex/utils/src/TypeSystem.cc @@ -147,6 +147,24 @@ namespace python { return registerOrGetType(name, AbstractType::DICTIONARY, {key, val}); } + Type TypeFactory::createOrGetDictKeysType(const Type& key) { + std::string name; + name += "["; + name += TypeFactory::instance().getDesc(key._hash); + name += "]"; + + return registerOrGetType(name, AbstractType::DICT_KEYS, {key}); + } + + Type TypeFactory::createOrGetDictValuesType(const Type& val) { + std::string name; + name += "["; + name += TypeFactory::instance().getDesc(val._hash); + name += "]"; + + return registerOrGetType(name, AbstractType::DICT_VALUES, {val}); + } + Type TypeFactory::createOrGetListType(const Type &val) { std::string name; name += "["; @@ -275,6 +293,14 @@ namespace python { return TypeFactory::instance().isIteratorType(*this); } + bool Type::isDictKeysType() const { + return TypeFactory::instance().isDictKeysType(*this); + } + + bool Type::isDictValuesType() const { + return TypeFactory::instance().isDictValuesType(*this); + } + Type Type::getReturnType() const { // first make sure this a function type! if( ! (TypeFactory::instance().isFunctionType(*this) || @@ -311,6 +337,22 @@ namespace python { return type == AbstractType::DICTIONARY || t == Type::EMPTYDICT || t == Type::GENERICDICT; } + bool TypeFactory::isDictKeysType(const Type& t) { + auto it = _typeMap.find(t._hash); + if(it == _typeMap.end()) + return false; + + return it->second._type == AbstractType::DICT_KEYS; + } + + bool TypeFactory::isDictValuesType(const Type& t) { + auto it = _typeMap.find(t._hash); + if(it == _typeMap.end()) + return false; + + return it->second._type == AbstractType::DICT_VALUES; + } + bool TypeFactory::isListType(const Type &t) const { auto it = _typeMap.find(t._hash); if(it == _typeMap.end()) @@ -382,8 +424,8 @@ namespace python { } Type Type::elementType() const { - if(isListType()) { - assert(isListType() && _hash != EMPTYLIST._hash); + if(isListType() || isDictKeysType() || isDictValuesType()) { + assert((isListType() && _hash != EMPTYLIST._hash) || isDictKeysType() || isDictValuesType()); auto& factory = TypeFactory::instance(); auto it = factory._typeMap.find(_hash); assert(it != factory._typeMap.end()); @@ -413,7 +455,7 @@ namespace python { } bool Type::isIterableType() const { - return (*this).isIteratorType() || (*this).isListType() || (*this).isTupleType() || *this == python::Type::STRING || *this == python::Type::RANGE || (*this).isDictionaryType(); + return (*this).isIteratorType() || (*this).isListType() || (*this).isTupleType() || *this == python::Type::STRING || *this == python::Type::RANGE || (*this).isDictionaryType() || (*this).isDictKeysType() || (*this).isDictValuesType(); } bool Type::isFixedSizeType() const { @@ -447,6 +489,10 @@ namespace python { // ==> base type decides! if(isOptionType()) return withoutOptions().isFixedSizeType(); + + // dict_keys and dict_values are both immutable + if(isDictKeysType() || isDictValuesType()) + return true; // functions, dictionaries, and lists are never a fixed type return false; @@ -501,6 +547,10 @@ namespace python { if(elementType().isIllDefined()) return true; return false; + } else if (isDictKeysType() || isDictValuesType()) { + if (elementType().isIllDefined()) + return true; + return false; } else { // must be primitive, directly check return *this == Type::UNKNOWN @@ -525,6 +575,14 @@ namespace python { return python::TypeFactory::instance().createOrGetDictionaryType(keyType, valType); } + Type Type::makeDictKeysType(const python::Type& keyType) { + return python::TypeFactory::instance().createOrGetDictKeysType(keyType); + } + + Type Type::makeDictValuesType(const python::Type& valType) { + return python::TypeFactory::instance().createOrGetDictValuesType(valType); + } + Type Type::makeListType(const python::Type &elementType){ #warning "Nested lists are not yet supported!" return python::TypeFactory::instance().createOrGetListType(elementType); From 7be771871dfec570a6c63175391a367148528e25 Mon Sep 17 00:00:00 2001 From: Leonhard Spiegelberg Date: Wed, 27 Jul 2022 12:52:54 -0400 Subject: [PATCH 13/23] compiles now, emptydict promotion --- tuplex/codegen/include/SymbolTable.h | 8 +++ tuplex/codegen/src/SymbolTable.cc | 81 ++++++++++++++++------ tuplex/codegen/src/TypeAnnotatorVisitor.cc | 15 +++- tuplex/test/core/DictionaryTyping.cc | 2 +- tuplex/utils/include/TypeSystem.h | 1 + tuplex/utils/src/TypeSystem.cc | 17 ++++- 6 files changed, 100 insertions(+), 24 deletions(-) diff --git a/tuplex/codegen/include/SymbolTable.h b/tuplex/codegen/include/SymbolTable.h index c6aa32f89..3b14a721d 100644 --- a/tuplex/codegen/include/SymbolTable.h +++ b/tuplex/codegen/include/SymbolTable.h @@ -175,6 +175,14 @@ namespace tuplex { */ void addBuiltinTypeAttribute(const python::Type& builtinType, const std::string& name, const python::Type& type); + /*! + * add an attribute to a builtin type, e.g. dict.keys() + * @param builtinType to which type to add the function + * @param name name of the attribute + * @param typer a dynamic typing function + */ + void addBuiltinTypeAttribute(const python::Type& builtinType, const std::string& name, std::function typer); + /*! * checks whether a symbol can be looked up or not * @param symbol diff --git a/tuplex/codegen/src/SymbolTable.cc b/tuplex/codegen/src/SymbolTable.cc index 9e4a629f1..cfa9e3f84 100644 --- a/tuplex/codegen/src/SymbolTable.cc +++ b/tuplex/codegen/src/SymbolTable.cc @@ -407,31 +407,43 @@ namespace tuplex { // for dict, list, tuple use generic type version! - // i.e. type depending on input + // for keys()/values() use generic dict and let symbol table create specialized type on the fly using + // typer function + { + addBuiltinTypeAttribute(python::Type::GENERICDICT, "keys", [](const python::Type& parameterType) { - // typer function for dict.keys() and dict.values() - // this currently doesn't handle empty dicts... - std::vector all_types = {python::Type::BOOLEAN, python::Type::I64, python::Type::F64, - python::Type::STRING, python::Type::PYOBJECT}; - for (const auto &t1 : all_types) { - for (const auto &t2 : all_types) { + std::cout<<"need to get concrete dict type here!"<(dict_type.desc(), "dictionary", t1, SymbolType::TYPE); - // add here symbol so other functions can be easily added. - addSymbol(dict_sym); - // dict_keys - auto keys_sym = std::make_shared("keys", python::Type::makeFunctionType(python::Type::EMPTYTUPLE, python::Type::makeDictKeysType(dict_type.keyType()))); - dict_sym->addAttribute(keys_sym); + // i.e. type depending on input - // dict_keys - auto values_sym = std::make_shared("values", python::Type::makeFunctionType(python::Type::EMPTYTUPLE, python::Type::makeDictValuesType(dict_type.valueType()))); - dict_sym->addAttribute(values_sym); - } - } +// // typer function for dict.keys() and dict.values() +// // this currently doesn't handle empty dicts... +// std::vector all_types = {python::Type::BOOLEAN, python::Type::I64, python::Type::F64, +// python::Type::STRING, python::Type::PYOBJECT}; +// for (const auto &t1 : all_types) { +// for (const auto &t2 : all_types) { +// +// auto dict_type = python::Type::makeDictionaryType(t1, t2); +// +// // create specialized dict type +// auto dict_sym = std::make_shared(dict_type.desc(), "dictionary", t1, SymbolType::TYPE); +// // add here symbol so other functions can be easily added. +// addSymbol(dict_sym); +// +// // dict_keys +// auto keys_sym = std::make_shared("keys", python::Type::makeFunctionType(python::Type::EMPTYTUPLE, python::Type::makeDictKeysType(dict_type.keyType()))); +// dict_sym->addAttribute(keys_sym); +// +// // dict_keys +// auto values_sym = std::make_shared("values", python::Type::makeFunctionType(python::Type::EMPTYTUPLE, python::Type::makeDictValuesType(dict_type.valueType()))); +// dict_sym->addAttribute(values_sym); +// } +// } // addBuiltinTypeAttribute(python::Type::EMPTYDICT, "keys", python::Type::makeFunctionType(python::Type::EMPTYTUPLE, ???)); @@ -670,6 +682,35 @@ namespace tuplex { return addSymbol(make_shared(name, type)); } + void SymbolTable::addBuiltinTypeAttribute(const python::Type &builtinType, const std::string &name, + std::function typer) { + using namespace std; + // this seems wrong, need to perform the lookup directly... + // use desc as name + auto scope = currentScope(); + auto it = scope->symbols.find(builtinType.desc()); + if(it == scope->symbols.end()) { + scope->symbols[builtinType.desc()] = make_shared(builtinType.desc(), typer); + it = scope->symbols.find(builtinType.desc()); + assert(it != scope->symbols.end()); + } + auto sym_att = it->second->findAttribute(name); + if(!sym_att) { + it->second->addAttribute(make_shared(name, name, builtinType, builtinType.isFunctionType() ? SymbolType::FUNCTION : SymbolType::VARIABLE)); + sym_att = it->second->findAttribute(name); + } else { + // replace symbol, there can be only one symbol with a typer function + auto symbolType = builtinType.isFunctionType() ? SymbolType::FUNCTION : SymbolType::VARIABLE; + if(symbolType != sym_att->symbolType) + throw std::runtime_error("symbol can only have one kind of types associated with it!"); + assert(sym_att->qualifiedName == name); + sym_att->name = name; + } + assert(sym_att); + sym_att->parent = scope->symbols[name]; + sym_att->functionTyper = typer; + } + void SymbolTable::addBuiltinTypeAttribute(const python::Type &builtinType, const std::string &name, const python::Type &type) { // this seems wrong, need to perform the lookup directly... diff --git a/tuplex/codegen/src/TypeAnnotatorVisitor.cc b/tuplex/codegen/src/TypeAnnotatorVisitor.cc index 46c758d93..d70cd0b71 100644 --- a/tuplex/codegen/src/TypeAnnotatorVisitor.cc +++ b/tuplex/codegen/src/TypeAnnotatorVisitor.cc @@ -1220,8 +1220,19 @@ namespace tuplex { // we are now inside a loop; no type change detected yet // check potential type change during loops if(_nameTable.find(id->_name) != _nameTable.end() && type != _nameTable.at(id->_name)) { - error("variable " + id->_name + " changed type during loop from " + _nameTable.at(id->_name).desc() + " to " + type.desc() + ", traced typing needed to determine if the type change is stable"); - _loopTypeChange = true; + + // special case: + // emptylist, emptydict (and emptyset) can get promoted + auto type_of_named = _nameTable.at(id->_name); + if((type_of_named == python::Type::EMPTYLIST && type.isListType()) || + (type_of_named == python::Type::EMPTYDICT && type.isDictionaryType()) ) { + // || (type_of_named == python::Type::EMPTYSET && type.isSetType()) + auto& logger = Logger::instance().logger("codegen"); + logger.debug("promoting " + id->_name + " from " + _nameTable.at(id->_name).desc() + " to " + type.desc()); + } else { + error("variable " + id->_name + " changed type during loop from " + _nameTable.at(id->_name).desc() + " to " + type.desc() + ", traced typing needed to determine if the type change is stable"); + _loopTypeChange = true; + } } } diff --git a/tuplex/test/core/DictionaryTyping.cc b/tuplex/test/core/DictionaryTyping.cc index f7e7aa097..be5d8cfc7 100644 --- a/tuplex/test/core/DictionaryTyping.cc +++ b/tuplex/test/core/DictionaryTyping.cc @@ -624,7 +624,7 @@ TEST(DictionaryTyping, Count) { // print type annotated ast GraphVizGraph graph; graph.createFromAST(ast.getFunctionAST(), true); - graph.saveAsPDF("/home/rgoyal6/tuplex/tuplex/build/dictionary_asts/dict_count.pdf"); + graph.saveAsPDF("dict_count.pdf"); cout<<"return type of function is: "<()); const Type Type::EMPTYDICT = python::TypeFactory::instance().createOrGetPrimitiveType("{}"); // empty dict const Type Type::EMPTYLIST = python::TypeFactory::instance().createOrGetPrimitiveType("[]"); // empty list: primitive because it can have any type element + const Type Type::EMPTYSET = python::TypeFactory::instance().createOrGetPrimitiveType("empty_set"); // empty list: primitive because it can have any type element const Type Type::NULLVALUE = python::TypeFactory::instance().createOrGetPrimitiveType("null"); const Type Type::PYOBJECT = python::TypeFactory::instance().createOrGetPrimitiveType("pyobject"); const Type Type::GENERICTUPLE = python::TypeFactory::instance().createOrGetPrimitiveType("tuple"); const Type Type::GENERICDICT = python::TypeFactory::instance().createOrGetDictionaryType(python::Type::PYOBJECT, python::Type::PYOBJECT); const Type Type::GENERICLIST = python::TypeFactory::instance().createOrGetListType(python::Type::PYOBJECT); + //const Type Type::GENERICSET = python::TypeFactory::instance().createOrGetSetType(python::Type::PYOBJECT); // @TODO: implement. const Type Type::VOID = python::TypeFactory::instance().createOrGetPrimitiveType("void"); const Type Type::MATCHOBJECT = python::TypeFactory::instance().createOrGetPrimitiveType("matchobject"); const Type Type::RANGE = python::TypeFactory::instance().createOrGetPrimitiveType("range"); @@ -398,6 +400,9 @@ namespace python { } Type Type::keyType() const { + if(_hash == EMPTYDICT._hash || _hash == GENERICDICT._hash) + return PYOBJECT; + assert(isDictionaryType() && _hash != EMPTYDICT._hash && _hash != GENERICDICT._hash); auto& factory = TypeFactory::instance(); auto it = factory._typeMap.find(_hash); @@ -415,6 +420,9 @@ namespace python { } Type Type::valueType() const { + if(_hash == EMPTYDICT._hash || _hash == GENERICDICT._hash) + return PYOBJECT; + assert(isDictionaryType() && _hash != EMPTYDICT._hash && _hash != GENERICDICT._hash); auto& factory = TypeFactory::instance(); auto it = factory._typeMap.find(_hash); @@ -1115,8 +1123,15 @@ namespace python { // dictionary type if(aUnderlyingType.isDictionaryType() && bUnderlyingType.isDictionaryType()) { + + // empty dict can be always upcasted to concrete dict + if(python::Type::EMPTYDICT == aUnderlyingType) + return bUnderlyingType; + if(python::Type::EMPTYDICT == bUnderlyingType) + return aUnderlyingType; + auto key_t = unifyTypes(aUnderlyingType.keyType(), bUnderlyingType.keyType(), autoUpcast); - auto val_t = unifyTypes(aUnderlyingType.elementType(), bUnderlyingType.elementType(), autoUpcast); + auto val_t = unifyTypes(aUnderlyingType.valueType(), bUnderlyingType.valueType(), autoUpcast); if(key_t == python::Type::UNKNOWN || val_t == python::Type::UNKNOWN) { return python::Type::UNKNOWN; } From 64901a1ed44e577437133cf68b55bf83c1051313 Mon Sep 17 00:00:00 2001 From: Leonhard Spiegelberg Date: Wed, 27 Jul 2022 16:15:37 -0400 Subject: [PATCH 14/23] stubs --- tuplex/codegen/include/SymbolTable.h | 12 +++++++--- tuplex/codegen/src/SymbolTable.cc | 21 +++++++++------- tuplex/test/core/DictionaryTyping.cc | 36 ++++++++++++++++++++++++++++ 3 files changed, 58 insertions(+), 11 deletions(-) diff --git a/tuplex/codegen/include/SymbolTable.h b/tuplex/codegen/include/SymbolTable.h index 3b14a721d..0a6b3854d 100644 --- a/tuplex/codegen/include/SymbolTable.h +++ b/tuplex/codegen/include/SymbolTable.h @@ -171,17 +171,23 @@ namespace tuplex { * add an attribute to a builtin type, e.g. str.lower * @param builtinType * @param name - * @param type + * @param type type of the attribute, i.e. if it is a function type then a function symbol will be added. + * If it is not a function type a variable symbol will be added. */ - void addBuiltinTypeAttribute(const python::Type& builtinType, const std::string& name, const python::Type& type); + void addBuiltinTypeAttribute(const python::Type& builtinType, + const std::string& name, + const python::Type& type); /*! * add an attribute to a builtin type, e.g. dict.keys() * @param builtinType to which type to add the function * @param name name of the attribute * @param typer a dynamic typing function + * @param sym_type what kind of symbol it is (function? variable?), needed because typer works for both. */ - void addBuiltinTypeAttribute(const python::Type& builtinType, const std::string& name, std::function typer); + void addBuiltinTypeAttribute(const python::Type& builtinType, const std::string& name, + std::function typer, + const SymbolType& sym_type); /*! * checks whether a symbol can be looked up or not diff --git a/tuplex/codegen/src/SymbolTable.cc b/tuplex/codegen/src/SymbolTable.cc index cfa9e3f84..5af167db9 100644 --- a/tuplex/codegen/src/SymbolTable.cc +++ b/tuplex/codegen/src/SymbolTable.cc @@ -415,7 +415,7 @@ namespace tuplex { std::cout<<"need to get concrete dict type here!"< typer) { + std::function typer, + const SymbolType& sym_type = SymbolType::VARIABLE) { using namespace std; + assert(sym_type == SymbolType::VARIABLE || sym_type == SymbolType::FUNCTION); + // this seems wrong, need to perform the lookup directly... // use desc as name auto scope = currentScope(); @@ -696,12 +699,11 @@ namespace tuplex { } auto sym_att = it->second->findAttribute(name); if(!sym_att) { - it->second->addAttribute(make_shared(name, name, builtinType, builtinType.isFunctionType() ? SymbolType::FUNCTION : SymbolType::VARIABLE)); + it->second->addAttribute(make_shared(name, name, builtinType, sym_type)); sym_att = it->second->findAttribute(name); } else { // replace symbol, there can be only one symbol with a typer function - auto symbolType = builtinType.isFunctionType() ? SymbolType::FUNCTION : SymbolType::VARIABLE; - if(symbolType != sym_att->symbolType) + if(sym_type != sym_att->symbolType) throw std::runtime_error("symbol can only have one kind of types associated with it!"); assert(sym_att->qualifiedName == name); sym_att->name = name; @@ -796,7 +798,10 @@ namespace tuplex { return python::Type::UNKNOWN; } - static python::Type typeAttribute(std::shared_ptr sym, std::string attribute, python::Type parameterType) { + static python::Type typeAttribute(std::shared_ptr sym, + std::string attribute, + python::Type parameterType, + python::Type objectType) { if(sym) { auto attr_sym = sym->findAttribute(attribute); @@ -855,7 +860,7 @@ namespace tuplex { auto name = type.desc(); auto sym = findSymbol(name); - resultType = typeAttribute(sym, attribute, parameterType); + resultType = typeAttribute(sym, attribute, parameterType, type); if(resultType != python::Type::UNKNOWN) return resultType; @@ -869,7 +874,7 @@ namespace tuplex { if(type.isDictionaryType() || type == python::Type::EMPTYDICT) name = python::Type::GENERICDICT.desc(); sym = findSymbol(name); - resultType = typeAttribute(sym, attribute, parameterType); + resultType = typeAttribute(sym, attribute, parameterType, type); } return resultType; diff --git a/tuplex/test/core/DictionaryTyping.cc b/tuplex/test/core/DictionaryTyping.cc index be5d8cfc7..df8c532f5 100644 --- a/tuplex/test/core/DictionaryTyping.cc +++ b/tuplex/test/core/DictionaryTyping.cc @@ -628,5 +628,41 @@ TEST(DictionaryTyping, Count) { cout<<"return type of function is: "< needs speculation. + + // test count UDF +// auto count_c = "def count_keys(x):\n" +// " d = {'A':10, 'B': 10, x: 20}\n" +// " return list(d.keys())"; + auto count_c = "def count_keys(x):\n" + " d = {'A':10, 'B': 10, x: 20}\n" + " return d.keys()"; + + // parse code to AST + auto ast = tuplex::codegen::AnnotatedAST(); + ast.parseString(count_c); + + // make typing + python::Type inputType = python::Type::STRING; + + // create symbol table + ast.addTypeHint("x", inputType); + ast.defineTypes(codegen::DEFAULT_COMPILE_POLICY); + + // print type annotated ast + GraphVizGraph graph; + graph.createFromAST(ast.getFunctionAST(), true); + graph.saveAsPDF("dict_count_keys.pdf"); + + cout<<"return type of function is: "< Date: Wed, 27 Jul 2022 16:17:45 -0400 Subject: [PATCH 15/23] fix --- tuplex/codegen/src/SymbolTable.cc | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tuplex/codegen/src/SymbolTable.cc b/tuplex/codegen/src/SymbolTable.cc index 5af167db9..8352ba317 100644 --- a/tuplex/codegen/src/SymbolTable.cc +++ b/tuplex/codegen/src/SymbolTable.cc @@ -412,6 +412,8 @@ namespace tuplex { { addBuiltinTypeAttribute(python::Type::GENERICDICT, "keys", [](const python::Type& parameterType) { + // @TODO: @rhea once you changed the signature of the Lambda here, you should be abel to type correctly. + // I can give it a try to refactor everything better than. std::cout<<"need to get concrete dict type here!"<type(); python::Type funcType = python::Type::UNKNOWN; + + // @TODO: @rhea -> change function here to include objectType as well and make typer a two parameter function attr_sym->findFunctionTypeBasedOnParameterType(parameterType, funcType); // ignore ret value. return funcType; } From 3936c34acdc3f4e137c3955b9c709ff3f6a2d6fd Mon Sep 17 00:00:00 2001 From: Rhea Goyal Date: Wed, 27 Jul 2022 18:04:27 -0400 Subject: [PATCH 16/23] pulled dict proxy files --- tuplex/codegen/include/BuiltinDictProxy.h | 90 +++++++++++++ tuplex/codegen/include/BuiltinDictProxyImpl.h | 28 ++++ tuplex/codegen/include/cJSONDictProxyImpl.h | 59 +++++++++ tuplex/core/src/cJSONDictProxyImpl.cc | 69 ++++++++++ tuplex/test/dict/CMakeLists.txt | 17 +++ tuplex/test/dict/DictProxyTest.cc | 121 ++++++++++++++++++ tuplex/test/dict/main.cc | 18 +++ 7 files changed, 402 insertions(+) create mode 100644 tuplex/codegen/include/BuiltinDictProxy.h create mode 100644 tuplex/codegen/include/BuiltinDictProxyImpl.h create mode 100644 tuplex/codegen/include/cJSONDictProxyImpl.h create mode 100644 tuplex/core/src/cJSONDictProxyImpl.cc create mode 100644 tuplex/test/dict/CMakeLists.txt create mode 100644 tuplex/test/dict/DictProxyTest.cc create mode 100644 tuplex/test/dict/main.cc diff --git a/tuplex/codegen/include/BuiltinDictProxy.h b/tuplex/codegen/include/BuiltinDictProxy.h new file mode 100644 index 000000000..cd5fb979d --- /dev/null +++ b/tuplex/codegen/include/BuiltinDictProxy.h @@ -0,0 +1,90 @@ +//--------------------------------------------------------------------------------------------------------------------// +// // +// Tuplex: Blazing Fast Python Data Science // +// // +// // +// (c) 2017 - 2021, Tuplex team // +// Created by Leonhard Spiegelberg first on 8/9/2021 // +// License: Apache 2.0 // +//--------------------------------------------------------------------------------------------------------------------// +#ifndef TUPLEX_BUILTINDICTPROXY_H +#define TUPLEX_BUILTINDICTPROXY_H + +#include + +#include +#include +#include + +// TODO: Could also use a general object based system which would make things easier... +// -> i.e., sequence protocol strings/lists/... + +// basically for each object we need +// 1.) representation as C++ object (field) +// 2.) code-generated logic (i.e., codegen specialization) +// 3.) to/from python object + +namespace tuplex { + namespace codegen { + class BuiltinDictProxy { + public: + // BuiltinDictProxy (--> specializedDictType) + BuiltinDictProxy(const python::Type& specializedDictType) : _specializedType(specializedDictType) { + // use cJSON as default for now... + _impl = std::make_shared(); + } + + // use both codegen/non-codegen version + // putItem + BuiltinDictProxy& putItem(const Field& key, const Field& value) { assert(_impl); _impl->putItem(key, value); return *this; } + BuiltinDictProxy& putItem(const python::Type& keyType, const SerializableValue& key, const python::Type& valueType, const SerializableValue& value) { assert(_impl); _impl->putItem(keyType, key, valueType, value); return *this; } + +// // getItemß +// BuiltinDictProxy& getItem(const Field& key); +// BuiltinDictProxy& getItem(const python::Type& keyType, const SerializableValue& key); +// +// // delItem +// BuiltinDictProxy& delItem(const Field& key); +// BuiltinDictProxy& delItem(const python::Type& keyType, const SerializableValue& key); +// +// // allocSize() --> helpful when dict size is known upfront, can be used for optimization. +// BuiltinDictProxy& allocSize(llvm::Value* size); + + // getKeyView() --> codegen object + + // getValuesView() --> codegen object + + python::Type dictType() const { + throw std::runtime_error("not yet implemented"); + } + + python::Type specializedDictType() const { + return _specializedType; + } + + // codegenToMemory + + // codegenFromMemory + // static function? + + // codegenSerializedLength + + // toMemory + + // fromMemory + // static function? + + // serializedLength + private: + python::Type _specializedType; + + // implementation... + // -> cJSON + // -> ... + // -> ... + std::shared_ptr _impl; + }; + } +} + +#endif //TUPLEX_BUILTINDICTPROXY_H diff --git a/tuplex/codegen/include/BuiltinDictProxyImpl.h b/tuplex/codegen/include/BuiltinDictProxyImpl.h new file mode 100644 index 000000000..7665f6cca --- /dev/null +++ b/tuplex/codegen/include/BuiltinDictProxyImpl.h @@ -0,0 +1,28 @@ +//--------------------------------------------------------------------------------------------------------------------// +// // +// Tuplex: Blazing Fast Python Data Science // +// // +// // +// (c) 2017 - 2021, Tuplex team // +// Created by Leonhard Spiegelberg first on 8/9/2021 // +// License: Apache 2.0 // +//--------------------------------------------------------------------------------------------------------------------// + +#ifndef TUPLEX_BUILTINDICTPROXYIMPL_H +#define TUPLEX_BUILTINDICTPROXYIMPL_H + +#include +#include +#include + +namespace tuplex { + namespace codegen { + class BuiltinDictProxyImpl { + public: + virtual void putItem(const Field& key, const Field& value) = 0; + virtual void putItem(const python::Type& keyType, const SerializableValue& key, const python::Type& valueType, const SerializableValue& value) = 0; + }; + } +} + +#endif //TUPLEX_BUILTINDICTPROXYIMPL_H diff --git a/tuplex/codegen/include/cJSONDictProxyImpl.h b/tuplex/codegen/include/cJSONDictProxyImpl.h new file mode 100644 index 000000000..a3218c3e6 --- /dev/null +++ b/tuplex/codegen/include/cJSONDictProxyImpl.h @@ -0,0 +1,59 @@ +//--------------------------------------------------------------------------------------------------------------------// +// // +// Tuplex: Blazing Fast Python Data Science // +// // +// // +// (c) 2017 - 2021, Tuplex team // +// Created by Leonhard Spiegelberg first on 8/9/2021 // +// License: Apache 2.0 // +//--------------------------------------------------------------------------------------------------------------------// +#ifndef TUPLEX_CJSONDICTPROXYIMPL_H +#define TUPLEX_CJSONDICTPROXYIMPL_H + +#ifdef BUILD_WITH_AWS +#include +#else +#include +#endif +#include "optional.h" +#include + +namespace tuplex { + namespace codegen { + class cJSONDictProxyImpl : public BuiltinDictProxyImpl { + public: + cJSONDictProxyImpl() : _root(nullptr) {} + ~cJSONDictProxyImpl() { + if(_root) { + cJSON_free(_root); + _root = nullptr; + } + } + cJSONDictProxyImpl(const cJSONDictProxyImpl& other) = delete; + cJSONDictProxyImpl& operator = (const cJSONDictProxyImpl& other) = delete; + + void putItem(const Field& key, const Field& value) override; + void putItem(const python::Type& keyType, const SerializableValue& key, const python::Type& valueType, const SerializableValue& value) override; + + + // notes: + // for cJSON subscripting, need to perform + // SerializableValue BlockGeneratorVisitor::subscriptCJSONDictionary(NSubscription *sub, SerializableValue index, + // const python::Type &index_type, + // SerializableValue value) { + + private: + cJSON *_root; // a map of the elements + cJSON *_typeMap; // a map of strings -> types (nested) + + /*! + * returns a string representing a type prefix when storing type information in cJSON object as well. + * @param type + * @return + */ + static std::string typePrefix(const python::Type& type); + }; + } +} + +#endif //TUPLEX_CJSONDICTPROXYIMPL_H diff --git a/tuplex/core/src/cJSONDictProxyImpl.cc b/tuplex/core/src/cJSONDictProxyImpl.cc new file mode 100644 index 000000000..edc701dd4 --- /dev/null +++ b/tuplex/core/src/cJSONDictProxyImpl.cc @@ -0,0 +1,69 @@ +//--------------------------------------------------------------------------------------------------------------------// +// // +// Tuplex: Blazing Fast Python Data Science // +// // +// // +// (c) 2017 - 2021, Tuplex team // +// Created by Leonhard Spiegelberg first on 8/9/2021 // +// License: Apache 2.0 // +//--------------------------------------------------------------------------------------------------------------------// +#include + +namespace tuplex { + namespace codegen { + + + // in general cJSON supports following data types: + // string + // number + // boolean + // null + // object + // array + // --> yet type info from python might get lost. Hence, store it when possible as well! + + // this is a general helper function to turn a Field into a cJSON object + + /*! + * converts a field into a cJSON object. If not convertible, returns nullptr. + * @param f Field + * @param includeTypePrefix + * @return cJSON* object + */ + cJSON* fieldToCJSON(const Field& f, bool includeTypePrefix=false) { + + return nullptr; + } + + Field cJSONToField(const cJSON* object) { + assert(object); + + return Field::null(); + } + + std::string cJSONDictProxyImpl::typePrefix(const python::Type& type) { + + // init map for a couple common types (int, float, bool, ...) + + // since keys in JSON are always strings, need to store type info in that string! + return ""; + } + + void cJSONDictProxyImpl::putItem(const Field &key, const Field &value) { + // put into cJSON, yet due to both key/type being not necessary type stable, encode type as base64 into values! + // map primitive types directly into cJSON if possible + if(!_root) + _root = cJSON_CreateObject(); + + // type prefix + + throw std::runtime_error("to implement..."); + } + + void cJSONDictProxyImpl::putItem(const python::Type &keyType, const SerializableValue &key, + const python::Type &valueType, const SerializableValue &value) { + + throw std::runtime_error("to implement..."); + } + } +} diff --git a/tuplex/test/dict/CMakeLists.txt b/tuplex/test/dict/CMakeLists.txt new file mode 100644 index 000000000..6f3d18cef --- /dev/null +++ b/tuplex/test/dict/CMakeLists.txt @@ -0,0 +1,17 @@ +CMAKE_MINIMUM_REQUIRED(VERSION 3.12 FATAL_ERROR) + +# enable c++14 +SET(CMAKE_CXX_STANDARD 14) + +FILE(GLOB SRCS *.cc) + +include(GoogleTest) + +ADD_EXECUTABLE(testutils ${SRCS}) + +TARGET_LINK_LIBRARIES(testutils + libutils + ${GTest_LIBRARIES} + ) + +gtest_add_tests(TARGET testutils TEST_PREFIX "") \ No newline at end of file diff --git a/tuplex/test/dict/DictProxyTest.cc b/tuplex/test/dict/DictProxyTest.cc new file mode 100644 index 000000000..ae9b2aad6 --- /dev/null +++ b/tuplex/test/dict/DictProxyTest.cc @@ -0,0 +1,121 @@ +//--------------------------------------------------------------------------------------------------------------------// +// // +// Tuplex: Blazing Fast Python Data Science // +// // +// // +// (c) 2017 - 2021, Tuplex team // +// Created by Leonhard Spiegelberg first on 8/9/2021 // +// License: Apache 2.0 // +//--------------------------------------------------------------------------------------------------------------------// + +#include "TestUtils.h" +#include + +class DictProxyTest : public PyTest {}; + + +// helper function to generate combinations with repititions +template void combinations_r_recursive(const std::vector &elements, std::size_t combination_length, + std::vector &pos, unsigned long depth, + unsigned long margin, std::vector>& result) { + // Have we selected the number of required elements? + if (depth >= combination_length) { + std::vector combination; + combination.reserve(combination_length); + for(unsigned long ii = 0; ii < pos.size(); ++ii) + combination.push_back(elements[pos[ii]]); + combination.shrink_to_fit(); + result.push_back(combination); + return; + } + + // Try to select new elements to the right of the last selected one. + for (unsigned long ii = margin; ii < elements.size(); ++ii) { + pos[depth] = ii; + combinations_r_recursive(elements, combination_length, pos, depth + 1, ii, result); + } +} + +template std::vector> combinations_with_repetition(const std::vector &elements, size_t combination_length) { + assert(combination_length <= elements.size()); + std::vector positions(combination_length, 0); + std::vector> result; + combinations_r_recursive(elements, combination_length, positions, 0, 0, result); + + return result; +} + + + +TEST_F(DictProxyTest, PutItemTest) { + using namespace tuplex; + using namespace std; + + // testing the non-codegenerated put item test + + + // tests to write: + + // 1. heterogenous dict -> basically use modified JSON as in-memory storage format. + // 2. homogenous keytype dict -> can encode dict directly & serialize it more efficiently. Represent in-memory as hash table specialized depending on type. + // 3. homogenous valuetype -> ignore case, specialize to 1. + // 4. compile-time known keys/restricted keyset, keys do not change. -> struct type with fixed offsets! + + // put and get + auto dict_fun_code = "def f(a, b, c, d):\n" + " M = dict()\n" + " M[a] = b\n" + " M[c] = d\n" + " return M, M[a], M[c]\n"; + + codegen::BuiltinDictProxy dict_proxy(python::Type::UNKNOWN); + + // create test setups (4 values, all combos) + vector test_values{Field((int64_t)0), Field(10.0), Field(false), Field::null(), Field("hello world"), Field(Tuple(10, 20)), Field(Tuple(3.141, 10, false, "test")), Field(List(1.0, 3.0, 4.0))}; + + // NOTE: list/dict is not hashable in python! + // + + // create combos + // 4 ^ len(test_values) + + + // what about nested dicts? + // -> unflatten? + // --> unflatten using combined keys? i.e. a/b/c ? which char to use as separator? + // maybe start with non-nested dicts. + // dicts should be able to store lists etc. + + auto combos = combinations_with_repetition(test_values, 4); + + cout<<"Generated "< can be checked dynamically at runtime. I.e., good for read-only dictionaries, rarely changed ones. etc. --> requires dispatch dictionary for each type for dynamic types. Constants can be translated during compile time. + // -> because dicts support in syntax, need to keep additional bitmap to check whether there's a valid entry or not! + + + // 2. fixed key type/value type dicts -> can be used in dynamic settings. E.g., when accumulating things! + + // 3. other usage should be esoteric... + +} diff --git a/tuplex/test/dict/main.cc b/tuplex/test/dict/main.cc new file mode 100644 index 000000000..af04e4577 --- /dev/null +++ b/tuplex/test/dict/main.cc @@ -0,0 +1,18 @@ +//--------------------------------------------------------------------------------------------------------------------// +// // +// Tuplex: Blazing Fast Python Data Science // +// // +// // +// (c) 2017 - 2021, Tuplex team // +// Created by Leonhard Spiegelberg first on 1/1/2021 // +// License: Apache 2.0 // +//--------------------------------------------------------------------------------------------------------------------// + +#include "gtest/gtest.h" + +int main(int argc, char **argv) +{ + ::testing::InitGoogleTest(&argc, argv); + int ret = RUN_ALL_TESTS(); + return ret; +} \ No newline at end of file From 510977a3fb95e0eca08adc234307ffebbcbad138 Mon Sep 17 00:00:00 2001 From: Leonhard Spiegelberg Date: Thu, 28 Jul 2022 14:37:33 -0400 Subject: [PATCH 17/23] wip --- tuplex/codegen/src/SymbolTable.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tuplex/codegen/src/SymbolTable.cc b/tuplex/codegen/src/SymbolTable.cc index 8352ba317..9f8979b7a 100644 --- a/tuplex/codegen/src/SymbolTable.cc +++ b/tuplex/codegen/src/SymbolTable.cc @@ -413,7 +413,7 @@ namespace tuplex { addBuiltinTypeAttribute(python::Type::GENERICDICT, "keys", [](const python::Type& parameterType) { // @TODO: @rhea once you changed the signature of the Lambda here, you should be abel to type correctly. - // I can give it a try to refactor everything better than. + // I can give it a try to refactor everything better than. std::cout<<"need to get concrete dict type here!"< Date: Sat, 30 Jul 2022 21:02:13 -0400 Subject: [PATCH 18/23] pushing so I can write code locally --- tuplex/codegen/include/BuiltinDictProxy.h | 2 +- tuplex/codegen/src/SymbolTable.cc | 28 +---------------- tuplex/core/src/cJSONDictProxyImpl.cc | 37 +++++++++++++++++++++-- 3 files changed, 37 insertions(+), 30 deletions(-) diff --git a/tuplex/codegen/include/BuiltinDictProxy.h b/tuplex/codegen/include/BuiltinDictProxy.h index cd5fb979d..a37f2a634 100644 --- a/tuplex/codegen/include/BuiltinDictProxy.h +++ b/tuplex/codegen/include/BuiltinDictProxy.h @@ -39,7 +39,7 @@ namespace tuplex { BuiltinDictProxy& putItem(const Field& key, const Field& value) { assert(_impl); _impl->putItem(key, value); return *this; } BuiltinDictProxy& putItem(const python::Type& keyType, const SerializableValue& key, const python::Type& valueType, const SerializableValue& value) { assert(_impl); _impl->putItem(keyType, key, valueType, value); return *this; } -// // getItemß +// // getItem // BuiltinDictProxy& getItem(const Field& key); // BuiltinDictProxy& getItem(const python::Type& keyType, const SerializableValue& key); // diff --git a/tuplex/codegen/src/SymbolTable.cc b/tuplex/codegen/src/SymbolTable.cc index 5af167db9..48b088d59 100644 --- a/tuplex/codegen/src/SymbolTable.cc +++ b/tuplex/codegen/src/SymbolTable.cc @@ -409,6 +409,7 @@ namespace tuplex { // for keys()/values() use generic dict and let symbol table create specialized type on the fly using // typer function + /** TODO: finish implementing! (c++ lambda to get correct result) **/ { addBuiltinTypeAttribute(python::Type::GENERICDICT, "keys", [](const python::Type& parameterType) { @@ -418,35 +419,8 @@ namespace tuplex { }, SymbolType::FUNCTION); } - // i.e. type depending on input -// // typer function for dict.keys() and dict.values() -// // this currently doesn't handle empty dicts... -// std::vector all_types = {python::Type::BOOLEAN, python::Type::I64, python::Type::F64, -// python::Type::STRING, python::Type::PYOBJECT}; -// for (const auto &t1 : all_types) { -// for (const auto &t2 : all_types) { -// -// auto dict_type = python::Type::makeDictionaryType(t1, t2); -// -// // create specialized dict type -// auto dict_sym = std::make_shared(dict_type.desc(), "dictionary", t1, SymbolType::TYPE); -// // add here symbol so other functions can be easily added. -// addSymbol(dict_sym); -// -// // dict_keys -// auto keys_sym = std::make_shared("keys", python::Type::makeFunctionType(python::Type::EMPTYTUPLE, python::Type::makeDictKeysType(dict_type.keyType()))); -// dict_sym->addAttribute(keys_sym); -// -// // dict_keys -// auto values_sym = std::make_shared("values", python::Type::makeFunctionType(python::Type::EMPTYTUPLE, python::Type::makeDictValuesType(dict_type.valueType()))); -// dict_sym->addAttribute(values_sym); -// } -// } - - // addBuiltinTypeAttribute(python::Type::EMPTYDICT, "keys", python::Type::makeFunctionType(python::Type::EMPTYTUPLE, ???)); - // for pop/popitem things are actually a bit more complicated... // i.e. the default keyword may introduce an issue... // https://www.programiz.com/python-programming/methods/dictionary/pop diff --git a/tuplex/core/src/cJSONDictProxyImpl.cc b/tuplex/core/src/cJSONDictProxyImpl.cc index edc701dd4..5ecc9f1ba 100644 --- a/tuplex/core/src/cJSONDictProxyImpl.cc +++ b/tuplex/core/src/cJSONDictProxyImpl.cc @@ -23,7 +23,6 @@ namespace tuplex { // --> yet type info from python might get lost. Hence, store it when possible as well! // this is a general helper function to turn a Field into a cJSON object - /*! * converts a field into a cJSON object. If not convertible, returns nullptr. * @param f Field @@ -54,14 +53,48 @@ namespace tuplex { // map primitive types directly into cJSON if possible if(!_root) _root = cJSON_CreateObject(); + + cJSON* to_add; // = cJSON_CreateNull(); + + // check type of value, create corresponding cJSON type object + if (value.getType() == python::Type::BOOLEAN) { + if (value.getInt() > 0) { + to_add = cJSON_CreateTrue(); + } else { + to_add = cJSON_CreateFalse(); + } + } else if (value.getType() == python::Type::F64) { + to_add = cJSON_CreateNumber(value.getDouble()); + } else if (value.getType() == python::Type::I64) { + // should I be upcasting? + to_add = cJSON_CreateNumber((double)value.getInt()); + } else if (value.getType() == python::Type::STRING) { + to_add = cJSON_CreateString((const char*)value.getPtr()); + } else if (value.getType().isTupleType()) { + assert(value.getPtr()); + + std::tuple* tup = (std::tuple*)value.getPtr(); + to_add = cJSON_CreateArray(); + + for (auto i : tup) { + + } + } else { + throw std::runtime_error("cannot put value with type " + value.getType().desc() + " into cJSON object"); + } + + // add to cJSON object + cJSON_AddItemToObject(_root, key.desc().c_str(), to_add); // type prefix - throw std::runtime_error("to implement..."); + // throw std::runtime_error("to implement..."); } void cJSONDictProxyImpl::putItem(const python::Type &keyType, const SerializableValue &key, const python::Type &valueType, const SerializableValue &value) { + if(!_root) + _root = cJSON_CreateObject(); throw std::runtime_error("to implement..."); } From 607592f5bb1b4773d50d28af5d58489721a0d7cd Mon Sep 17 00:00:00 2001 From: Rhea Goyal Date: Fri, 12 Aug 2022 06:47:23 -0400 Subject: [PATCH 19/23] basic tests passing for cJSON dict proxy --- tuplex/codegen/include/BuiltinDictProxyImpl.h | 15 +- tuplex/codegen/include/cJSONDictProxyImpl.h | 15 +- tuplex/codegen/src/cJSONDictProxyImpl.cc | 206 ++++++++++++++++++ tuplex/core/src/cJSONDictProxyImpl.cc | 102 --------- tuplex/test/CMakeLists.txt | 1 + tuplex/test/dict/CMakeLists.txt | 11 +- tuplex/test/dict/DictProxyTest.cc | 173 ++++++++------- tuplex/test/dict/cJSONTest.cc | 125 +++++++++++ 8 files changed, 454 insertions(+), 194 deletions(-) create mode 100644 tuplex/codegen/src/cJSONDictProxyImpl.cc delete mode 100644 tuplex/core/src/cJSONDictProxyImpl.cc create mode 100644 tuplex/test/dict/cJSONTest.cc diff --git a/tuplex/codegen/include/BuiltinDictProxyImpl.h b/tuplex/codegen/include/BuiltinDictProxyImpl.h index 7665f6cca..6defe6d90 100644 --- a/tuplex/codegen/include/BuiltinDictProxyImpl.h +++ b/tuplex/codegen/include/BuiltinDictProxyImpl.h @@ -19,10 +19,23 @@ namespace tuplex { namespace codegen { class BuiltinDictProxyImpl { public: + // Q: what does virtual do ? virtual void putItem(const Field& key, const Field& value) = 0; virtual void putItem(const python::Type& keyType, const SerializableValue& key, const python::Type& valueType, const SerializableValue& value) = 0; + + virtual bool keyExists(const Field& key) = 0; + + virtual Field getItem(const Field& key) = 0; + + virtual void replaceItem(const Field& key, const Field& value) = 0; + + virtual void deleteItem(const Field& key) = 0; + + // virtual void getKeyView() = 0; + + // virtual void getValuesView() = 0; }; } } -#endif //TUPLEX_BUILTINDICTPROXYIMPL_H +#endif //TUPLEX_BUILTINDICTPROXYIMPL_H \ No newline at end of file diff --git a/tuplex/codegen/include/cJSONDictProxyImpl.h b/tuplex/codegen/include/cJSONDictProxyImpl.h index a3218c3e6..cb461ea7c 100644 --- a/tuplex/codegen/include/cJSONDictProxyImpl.h +++ b/tuplex/codegen/include/cJSONDictProxyImpl.h @@ -22,7 +22,9 @@ namespace tuplex { namespace codegen { class cJSONDictProxyImpl : public BuiltinDictProxyImpl { public: - cJSONDictProxyImpl() : _root(nullptr) {} + // cJSONDictProxyImpl() : _root(nullptr) {} + // is there a reason we want to separate the initialisation of cjsondictproxy objects and the actual cjson object? + cJSONDictProxyImpl() : _root(cJSON_CreateObject()) {} ~cJSONDictProxyImpl() { if(_root) { cJSON_free(_root); @@ -35,6 +37,17 @@ namespace tuplex { void putItem(const Field& key, const Field& value) override; void putItem(const python::Type& keyType, const SerializableValue& key, const python::Type& valueType, const SerializableValue& value) override; + bool keyExists(const Field& key) override; + + Field getItem(const Field& key) override; + + void replaceItem(const Field& key, const Field& value) override; + + void deleteItem(const Field& key) override; + + // void getKeyView() override; + + // void getValuesView() override; // notes: // for cJSON subscripting, need to perform diff --git a/tuplex/codegen/src/cJSONDictProxyImpl.cc b/tuplex/codegen/src/cJSONDictProxyImpl.cc new file mode 100644 index 000000000..d0a2d634b --- /dev/null +++ b/tuplex/codegen/src/cJSONDictProxyImpl.cc @@ -0,0 +1,206 @@ +//--------------------------------------------------------------------------------------------------------------------// +// // +// Tuplex: Blazing Fast Python Data Science // +// // +// // +// (c) 2017 - 2021, Tuplex team // +// Created by Leonhard Spiegelberg first on 8/9/2021 // +// License: Apache 2.0 // +//--------------------------------------------------------------------------------------------------------------------// +#include + +namespace tuplex { + namespace codegen { + // in general cJSON supports following data types: + // string + // number + // boolean + // null + // object + // array + // --> yet type info from python might get lost. Hence, store it when possible as well! + + // this is a general helper function to turn a Field into a cJSON object + /*! + * converts a field into a cJSON object. If not convertible, returns nullptr. + * @param f Field + * @param includeTypePrefix + * @return cJSON* object + */ + cJSON* fieldToCJSON(const Field& f, bool includeTypePrefix=false) { + // initialise cJSON object + cJSON* cjson_obj = nullptr; + + // check type of Field, create corresponding cJSON type object + if (f.getType() == python::Type::BOOLEAN) { + if (f.getInt() > 0) { + cjson_obj = cJSON_CreateTrue(); + } else { + cjson_obj = cJSON_CreateFalse(); + } + } else if (f.getType() == python::Type::F64) { + cjson_obj = cJSON_CreateNumber(f.getDouble()); + } else if (f.getType() == python::Type::I64) { + // should I be upcasting? + cjson_obj = cJSON_CreateNumber((double)f.getInt()); + } else if (f.getType() == python::Type::STRING) { + assert(f.getPtr()); + cjson_obj = cJSON_CreateString((const char*)f.getPtr()); + } else if (f.getType().isListType()) { + assert(f.getPtr()); + + tuplex::List* lis = (tuplex::List*)f.getPtr(); + cjson_obj = cJSON_CreateArray(); + + for (int i = 0; i < lis->numElements(); i++) { + // retrieve ith element from list + Field element = lis->getField(i); + // convert to cJSON object + cJSON* cjson_elt = fieldToCJSON(element); + + // add element to cJSON array + cJSON_AddItemToArray(cjson_obj, cjson_elt); + } + } else if (f.getType().isTupleType()) { + assert(f.getPtr()); + + tuplex::Tuple* tup = (tuplex::Tuple*)f.getPtr(); + cjson_obj = cJSON_CreateArray(); + + for (int i = 0; i < tup->numElements(); i++) { + // retrieve ith element from tuple + Field element = tup->getField(i); + // convert to cJSON object + cJSON* cjson_elt = fieldToCJSON(element); + + // add element to cJSON array + cJSON_AddItemToArray(cjson_obj, cjson_elt); + } + } else if (f.getType() == python::Type::NULLVALUE) { + cjson_obj = cJSON_CreateNull(); + } else { + // throw std::runtime_error("cannot change value with type " + value.getType().desc() + " into cJSON object"); + } + + return cjson_obj; + } + + Field cJSONToField(const cJSON* object) { + assert(object); + + Field ret = Field::null(); + + if (cJSON_IsNumber(object)) { + ret = Field(cJSON_GetNumberValue(object)); + } else if (cJSON_IsString(object)) { + ret = Field(cJSON_GetStringValue(object)); + } else if (cJSON_IsTrue(object)) { + ret = Field(true); + } else if (cJSON_IsFalse(object)) { + ret = Field(false); + } else if (cJSON_IsNull(object)) { + ret = Field::null(); + } else if (cJSON_IsArray(object)) { + throw std::runtime_error("not yet implemented..."); + } else if (cJSON_IsObject(object)) { + throw std::runtime_error("not yet implemented..."); + } + + return ret; + } + + std::string cJSONDictProxyImpl::typePrefix(const python::Type& type) { + + // init map for a couple common types (int, float, bool, ...) + + // since keys in JSON are always strings, need to store type info in that string! + return ""; + } + + void cJSONDictProxyImpl::putItem(const Field &key, const Field &value) { + // put into cJSON, yet due to both key/type being not necessary type stable, encode type as base64 into values! + // map primitive types directly into cJSON if possible + if(!_root) + // _root = cJSON_CreateObject(); + throw std::runtime_error("cannot use putItem on an uninitialised dictionary"); + + cJSON* to_add = fieldToCJSON(value); + if (!to_add) { + throw std::runtime_error("item to add not convertible to cJSON object"); + } + + // add to cJSON object + // TODO: what's the difference between key.desc and getting the key's ptr value? + // A: key.desc gets the string of the Field regardless of the type of the Field + cJSON_AddItemToObject(_root, key.desc().c_str(), to_add); + + // type prefix + + // throw std::runtime_error("to implement..."); + } + + void cJSONDictProxyImpl::putItem(const python::Type &keyType, const SerializableValue &key, + const python::Type &valueType, const SerializableValue &value) { + if(!_root) + _root = cJSON_CreateObject(); + + throw std::runtime_error("to implement..."); + } + + bool cJSONDictProxyImpl::keyExists(const Field& key) { + if(!_root) + throw std::runtime_error("cannot use keyExists on an uninitialised dictionary"); + + cJSON* res = cJSON_GetObjectItemCaseSensitive(_root, key.desc().c_str()); + + return (res != NULL); + } + + Field cJSONDictProxyImpl::getItem(const Field& key) { + if (!_root) + throw std::runtime_error("cannot use getItem on an uninitialised dictionary"); + + // retrieve value from dict + cJSON* item = cJSON_GetObjectItemCaseSensitive(_root, key.desc().c_str()); + + if (!item) + throw std::runtime_error("error retrieving value from cJSON dictionary"); + + // convert into Field + Field field_item = cJSONToField(item); + + return field_item; + } + + void cJSONDictProxyImpl::replaceItem(const Field& key, const Field& value) { + if (!_root) + throw std::runtime_error("cannot use replaceItem on an uninitialised dictionary"); + + // assert(key.getType() == python::Type::STRING); + + // attempt to retrieve value from dict + cJSON* item = cJSON_GetObjectItemCaseSensitive(_root, key.desc().c_str()); + + if (!item) { + // key doesn't already exist; simply perform putItem instead (?) + putItem(key, value); + } else { + // replace value at key + cJSON* new_item = fieldToCJSON(value); + if (!new_item) { + throw std::runtime_error("new item not convertible to cJSON object"); + } + + cJSON_ReplaceItemInObjectCaseSensitive(_root, key.desc().c_str(), new_item); + } + } + + void cJSONDictProxyImpl::deleteItem(const Field& key) { + if (!_root) + throw std::runtime_error("cannot use deleteItem on an uninitialised dictionary"); + + // delete value from dict + cJSON_DeleteItemFromObjectCaseSensitive(_root, (const char*)key.desc().c_str()); + } + } +} diff --git a/tuplex/core/src/cJSONDictProxyImpl.cc b/tuplex/core/src/cJSONDictProxyImpl.cc deleted file mode 100644 index 5ecc9f1ba..000000000 --- a/tuplex/core/src/cJSONDictProxyImpl.cc +++ /dev/null @@ -1,102 +0,0 @@ -//--------------------------------------------------------------------------------------------------------------------// -// // -// Tuplex: Blazing Fast Python Data Science // -// // -// // -// (c) 2017 - 2021, Tuplex team // -// Created by Leonhard Spiegelberg first on 8/9/2021 // -// License: Apache 2.0 // -//--------------------------------------------------------------------------------------------------------------------// -#include - -namespace tuplex { - namespace codegen { - - - // in general cJSON supports following data types: - // string - // number - // boolean - // null - // object - // array - // --> yet type info from python might get lost. Hence, store it when possible as well! - - // this is a general helper function to turn a Field into a cJSON object - /*! - * converts a field into a cJSON object. If not convertible, returns nullptr. - * @param f Field - * @param includeTypePrefix - * @return cJSON* object - */ - cJSON* fieldToCJSON(const Field& f, bool includeTypePrefix=false) { - - return nullptr; - } - - Field cJSONToField(const cJSON* object) { - assert(object); - - return Field::null(); - } - - std::string cJSONDictProxyImpl::typePrefix(const python::Type& type) { - - // init map for a couple common types (int, float, bool, ...) - - // since keys in JSON are always strings, need to store type info in that string! - return ""; - } - - void cJSONDictProxyImpl::putItem(const Field &key, const Field &value) { - // put into cJSON, yet due to both key/type being not necessary type stable, encode type as base64 into values! - // map primitive types directly into cJSON if possible - if(!_root) - _root = cJSON_CreateObject(); - - cJSON* to_add; // = cJSON_CreateNull(); - - // check type of value, create corresponding cJSON type object - if (value.getType() == python::Type::BOOLEAN) { - if (value.getInt() > 0) { - to_add = cJSON_CreateTrue(); - } else { - to_add = cJSON_CreateFalse(); - } - } else if (value.getType() == python::Type::F64) { - to_add = cJSON_CreateNumber(value.getDouble()); - } else if (value.getType() == python::Type::I64) { - // should I be upcasting? - to_add = cJSON_CreateNumber((double)value.getInt()); - } else if (value.getType() == python::Type::STRING) { - to_add = cJSON_CreateString((const char*)value.getPtr()); - } else if (value.getType().isTupleType()) { - assert(value.getPtr()); - - std::tuple* tup = (std::tuple*)value.getPtr(); - to_add = cJSON_CreateArray(); - - for (auto i : tup) { - - } - } else { - throw std::runtime_error("cannot put value with type " + value.getType().desc() + " into cJSON object"); - } - - // add to cJSON object - cJSON_AddItemToObject(_root, key.desc().c_str(), to_add); - - // type prefix - - // throw std::runtime_error("to implement..."); - } - - void cJSONDictProxyImpl::putItem(const python::Type &keyType, const SerializableValue &key, - const python::Type &valueType, const SerializableValue &value) { - if(!_root) - _root = cJSON_CreateObject(); - - throw std::runtime_error("to implement..."); - } - } -} diff --git a/tuplex/test/CMakeLists.txt b/tuplex/test/CMakeLists.txt index 3f3721780..3e422d54b 100755 --- a/tuplex/test/CMakeLists.txt +++ b/tuplex/test/CMakeLists.txt @@ -78,6 +78,7 @@ add_subdirectory(io) add_subdirectory(runtime) add_subdirectory(adapters) add_subdirectory(utils) +add_subdirectory(dict) # these require python, so only if embed is active! if(Python3_Embed_FOUND) diff --git a/tuplex/test/dict/CMakeLists.txt b/tuplex/test/dict/CMakeLists.txt index 6f3d18cef..9e3c566c9 100644 --- a/tuplex/test/dict/CMakeLists.txt +++ b/tuplex/test/dict/CMakeLists.txt @@ -7,11 +7,16 @@ FILE(GLOB SRCS *.cc) include(GoogleTest) -ADD_EXECUTABLE(testutils ${SRCS}) +ADD_EXECUTABLE(testdict ${SRCS}) -TARGET_LINK_LIBRARIES(testutils +TARGET_LINK_LIBRARIES(testdict libutils + libcodegen + libcpythonadapter + libio ${GTest_LIBRARIES} + ${AWSSDK_LINK_LIBRARIES} + ${Python3_LIBRARIES} ) -gtest_add_tests(TARGET testutils TEST_PREFIX "") \ No newline at end of file +gtest_add_tests(TARGET testdict TEST_PREFIX "") \ No newline at end of file diff --git a/tuplex/test/dict/DictProxyTest.cc b/tuplex/test/dict/DictProxyTest.cc index ae9b2aad6..d74f989cd 100644 --- a/tuplex/test/dict/DictProxyTest.cc +++ b/tuplex/test/dict/DictProxyTest.cc @@ -1,121 +1,120 @@ -//--------------------------------------------------------------------------------------------------------------------// -// // -// Tuplex: Blazing Fast Python Data Science // -// // -// // -// (c) 2017 - 2021, Tuplex team // -// Created by Leonhard Spiegelberg first on 8/9/2021 // -// License: Apache 2.0 // -//--------------------------------------------------------------------------------------------------------------------// +// //--------------------------------------------------------------------------------------------------------------------// +// // // +// // Tuplex: Blazing Fast Python Data Science // +// // // +// // // +// // (c) 2017 - 2021, Tuplex team // +// // Created by Leonhard Spiegelberg first on 8/9/2021 // +// // License: Apache 2.0 // +// //--------------------------------------------------------------------------------------------------------------------// -#include "TestUtils.h" -#include +// #include +// #include "gtest/gtest.h" -class DictProxyTest : public PyTest {}; +// class DictProxyTest : public TuplexTest {}; +// // helper function to generate combinations with repititions +// template void combinations_r_recursive(const std::vector &elements, std::size_t combination_length, +// std::vector &pos, unsigned long depth, +// unsigned long margin, std::vector>& result) { +// // Have we selected the number of required elements? +// if (depth >= combination_length) { +// std::vector combination; +// combination.reserve(combination_length); +// for(unsigned long ii = 0; ii < pos.size(); ++ii) +// combination.push_back(elements[pos[ii]]); +// combination.shrink_to_fit(); +// result.push_back(combination); +// return; +// } -// helper function to generate combinations with repititions -template void combinations_r_recursive(const std::vector &elements, std::size_t combination_length, - std::vector &pos, unsigned long depth, - unsigned long margin, std::vector>& result) { - // Have we selected the number of required elements? - if (depth >= combination_length) { - std::vector combination; - combination.reserve(combination_length); - for(unsigned long ii = 0; ii < pos.size(); ++ii) - combination.push_back(elements[pos[ii]]); - combination.shrink_to_fit(); - result.push_back(combination); - return; - } +// // Try to select new elements to the right of the last selected one. +// for (unsigned long ii = margin; ii < elements.size(); ++ii) { +// pos[depth] = ii; +// combinations_r_recursive(elements, combination_length, pos, depth + 1, ii, result); +// } +// } - // Try to select new elements to the right of the last selected one. - for (unsigned long ii = margin; ii < elements.size(); ++ii) { - pos[depth] = ii; - combinations_r_recursive(elements, combination_length, pos, depth + 1, ii, result); - } -} +// template std::vector> combinations_with_repetition(const std::vector &elements, size_t combination_length) { +// assert(combination_length <= elements.size()); +// std::vector positions(combination_length, 0); +// std::vector> result; +// combinations_r_recursive(elements, combination_length, positions, 0, 0, result); -template std::vector> combinations_with_repetition(const std::vector &elements, size_t combination_length) { - assert(combination_length <= elements.size()); - std::vector positions(combination_length, 0); - std::vector> result; - combinations_r_recursive(elements, combination_length, positions, 0, 0, result); +// return result; +// } - return result; -} +// TEST_F(DictProxyTest, PutItemTest) { +// using namespace tuplex; +// using namespace std; -TEST_F(DictProxyTest, PutItemTest) { - using namespace tuplex; - using namespace std; +// // testing the non-codegenerated put item test - // testing the non-codegenerated put item test +// // tests to write: - // tests to write: +// // 1. heterogenous dict -> basically use modified JSON as in-memory storage format. +// // 2. homogenous keytype dict -> can encode dict directly & serialize it more efficiently. Represent in-memory as hash table specialized depending on type. +// // 3. homogenous valuetype -> ignore case, specialize to 1. +// // 4. compile-time known keys/restricted keyset, keys do not change. -> struct type with fixed offsets! - // 1. heterogenous dict -> basically use modified JSON as in-memory storage format. - // 2. homogenous keytype dict -> can encode dict directly & serialize it more efficiently. Represent in-memory as hash table specialized depending on type. - // 3. homogenous valuetype -> ignore case, specialize to 1. - // 4. compile-time known keys/restricted keyset, keys do not change. -> struct type with fixed offsets! +// // put and get +// auto dict_fun_code = "def f(a, b, c, d):\n" +// " M = dict()\n" +// " M[a] = b\n" +// " M[c] = d\n" +// " return M, M[a], M[c]\n"; - // put and get - auto dict_fun_code = "def f(a, b, c, d):\n" - " M = dict()\n" - " M[a] = b\n" - " M[c] = d\n" - " return M, M[a], M[c]\n"; +// codegen::BuiltinDictProxy dict_proxy(python::Type::UNKNOWN); - codegen::BuiltinDictProxy dict_proxy(python::Type::UNKNOWN); +// // create test setups (4 values, all combos) +// vector test_values{Field((int64_t)0), Field(10.0), Field(false), Field::null(), Field("hello world"), Field(Tuple(10, 20)), Field(Tuple(3.141, 10, false, "test")), Field(List(1.0, 3.0, 4.0))}; - // create test setups (4 values, all combos) - vector test_values{Field((int64_t)0), Field(10.0), Field(false), Field::null(), Field("hello world"), Field(Tuple(10, 20)), Field(Tuple(3.141, 10, false, "test")), Field(List(1.0, 3.0, 4.0))}; +// // NOTE: list/dict is not hashable in python! +// // - // NOTE: list/dict is not hashable in python! - // +// // create combos +// // 4 ^ len(test_values) - // create combos - // 4 ^ len(test_values) +// // what about nested dicts? +// // -> unflatten? +// // --> unflatten using combined keys? i.e. a/b/c ? which char to use as separator? +// // maybe start with non-nested dicts. +// // dicts should be able to store lists etc. - // what about nested dicts? - // -> unflatten? - // --> unflatten using combined keys? i.e. a/b/c ? which char to use as separator? - // maybe start with non-nested dicts. - // dicts should be able to store lists etc. +// auto combos = combinations_with_repetition(test_values, 4); - auto combos = combinations_with_repetition(test_values, 4); +// cout<<"Generated "< can be checked dynamically at runtime. I.e., good for read-only dictionaries, rarely changed ones. etc. --> requires dispatch dictionary for each type for dynamic types. Constants can be translated during compile time. +// // -> because dicts support in syntax, need to keep additional bitmap to check whether there's a valid entry or not! - // 1. fixed set of keys -> can be checked dynamically at runtime. I.e., good for read-only dictionaries, rarely changed ones. etc. --> requires dispatch dictionary for each type for dynamic types. Constants can be translated during compile time. - // -> because dicts support in syntax, need to keep additional bitmap to check whether there's a valid entry or not! +// // 2. fixed key type/value type dicts -> can be used in dynamic settings. E.g., when accumulating things! - // 2. fixed key type/value type dicts -> can be used in dynamic settings. E.g., when accumulating things! +// // 3. other usage should be esoteric... - // 3. other usage should be esoteric... - -} +// } diff --git a/tuplex/test/dict/cJSONTest.cc b/tuplex/test/dict/cJSONTest.cc new file mode 100644 index 000000000..22e0c0a00 --- /dev/null +++ b/tuplex/test/dict/cJSONTest.cc @@ -0,0 +1,125 @@ +//--------------------------------------------------------------------------------------------------------------------// +// // +// Tuplex: Blazing Fast Python Data Science // +// // +// // +// (c) 2017 - 2021, Tuplex team // +// Created by Leonhard Spiegelberg first on 8/9/2021 // +// License: Apache 2.0 // +//--------------------------------------------------------------------------------------------------------------------// + +#include +#include "gtest/gtest.h" + +TEST(cJSONTest, PutItemTest) { + using namespace tuplex; + using namespace std; + + // testing non-codegenerated put item + // initialise test dict + codegen::cJSONDictProxyImpl dict_proxy; + + EXPECT_EQ(false, dict_proxy.keyExists(Field((int64_t)10))); + + // put test values into test dict + dict_proxy.putItem(Field((int64_t)10), Field("a")); + dict_proxy.putItem(Field((int64_t)20), Field("b")); + + EXPECT_EQ(true, dict_proxy.keyExists(Field((int64_t)10))); + EXPECT_EQ(true, dict_proxy.keyExists(Field((int64_t)20))); + EXPECT_EQ(false, dict_proxy.keyExists(Field((int64_t)30))); + + dict_proxy.putItem(Field((int64_t)30), Field("c")); + + EXPECT_EQ(true, dict_proxy.keyExists(Field((int64_t)20))); + EXPECT_EQ(true, dict_proxy.keyExists(Field((int64_t)30))); +} + +TEST(cJSONTest, GetItemTest) { + using namespace tuplex; + using namespace std; + + // testing non-codegenerated put item + // initialise test dict + codegen::cJSONDictProxyImpl dict_proxy; + + // put test values into test dict + dict_proxy.putItem(Field((int64_t)10), Field("a")); + dict_proxy.putItem(Field((int64_t)20), Field("b")); + + EXPECT_EQ(Field("a"), dict_proxy.getItem(Field((int64_t)10))); + EXPECT_EQ(Field("b"), dict_proxy.getItem(Field((int64_t)20))); + EXPECT_THROW(dict_proxy.getItem(Field((int64_t)30)), std::runtime_error); + + dict_proxy.putItem(Field((int64_t)30), Field("c")); + + EXPECT_EQ(Field("c"), dict_proxy.getItem(Field((int64_t)30))); +} + +TEST(cJSONTest, DeleteItemTest) { + using namespace tuplex; + using namespace std; + + // testing non-codegenerated put item + // initialise test dict + codegen::cJSONDictProxyImpl dict_proxy; + + // put test values into test dict + dict_proxy.putItem(Field((int64_t)10), Field("a")); + dict_proxy.putItem(Field((int64_t)20), Field("b")); + + EXPECT_EQ(Field("a"), dict_proxy.getItem(Field((int64_t)10))); + EXPECT_EQ(Field("b"), dict_proxy.getItem(Field((int64_t)20))); + + dict_proxy.deleteItem(Field((int64_t)10)); + + EXPECT_EQ(false, dict_proxy.keyExists(Field((int64_t)10))); + EXPECT_EQ(true, dict_proxy.keyExists(Field((int64_t)20))); + + dict_proxy.deleteItem(Field((int64_t)20)); + dict_proxy.putItem(Field((int64_t)10), Field((int64_t)100)); + + Field res = dict_proxy.getItem(Field((int64_t)10)); + + // NOTE: expected result will be a double, bc I think cJSON stores all numbers as doubles + EXPECT_EQ(Field((double)100), dict_proxy.getItem(Field((int64_t)10))); + EXPECT_EQ(false, dict_proxy.keyExists(Field((int64_t)20))); +} + +TEST(cJSONTest, ReplaceItemTest) { + using namespace tuplex; + using namespace std; + + // testing non-codegenerated put item + // initialise test dict + codegen::cJSONDictProxyImpl dict_proxy; + + // put test values into test dict + dict_proxy.putItem(Field((int64_t)10), Field("a")); + dict_proxy.putItem(Field((int64_t)20), Field("b")); + + EXPECT_EQ(Field("a"), dict_proxy.getItem(Field((int64_t)10))); + EXPECT_EQ(Field("b"), dict_proxy.getItem(Field((int64_t)20))); + + dict_proxy.replaceItem(Field((int64_t)10), Field("c")); + + EXPECT_EQ(Field("c"), dict_proxy.getItem(Field((int64_t)10))); + EXPECT_EQ(Field("b"), dict_proxy.getItem(Field((int64_t)20))); + + dict_proxy.putItem(Field((int64_t)30), Field("c")); + + EXPECT_EQ(Field("c"), dict_proxy.getItem(Field((int64_t)10))); + EXPECT_EQ(Field("c"), dict_proxy.getItem(Field((int64_t)30))); + + dict_proxy.replaceItem(Field((int64_t)30), Field((int64_t)50)); + + // NOTE: expected result will be a double, bc I think cJSON stores all numbers as doubles + EXPECT_EQ(Field((double)50), dict_proxy.getItem(Field((int64_t)30))); +} + +// tests to write: + +// 1. heterogenous dict -> basically use modified JSON as in-memory storage format. +// 2. homogenous keytype dict -> can encode dict directly & serialize it more efficiently. Represent in-memory as hash table specialized depending on type. +// 3. homogenous valuetype -> ignore case, specialize to 1. +// 4. compile-time known keys/restricted keyset, keys do not change. -> struct type with fixed offsets! \ No newline at end of file From 234b59105ed8b92dcfc49b08d0e517044ce454ad Mon Sep 17 00:00:00 2001 From: Leonhard Spiegelberg Date: Mon, 15 Aug 2022 14:25:36 +0200 Subject: [PATCH 20/23] typing fixes --- tuplex/codegen/include/ASTAnnotation.h | 134 +++++++++++++++++-------- tuplex/codegen/include/SymbolTable.h | 15 ++- tuplex/codegen/src/SymbolTable.cc | 60 +++++++++-- tuplex/utils/include/TypeSystem.h | 8 +- tuplex/utils/src/TypeSystem.cc | 16 +-- 5 files changed, 168 insertions(+), 65 deletions(-) diff --git a/tuplex/codegen/include/ASTAnnotation.h b/tuplex/codegen/include/ASTAnnotation.h index 8512f4087..c26a0d7a0 100644 --- a/tuplex/codegen/include/ASTAnnotation.h +++ b/tuplex/codegen/include/ASTAnnotation.h @@ -48,8 +48,13 @@ class Symbol : public std::enable_shared_from_this { std::shared_ptr parent; ///! an optional abstract typer function which can be applied if the symboltype is function + ///! to deliver a concretely typed type based on the paramter type std::function functionTyper; + ///! an optional abstract typer that takes the original type of the caller (e.g., for an attribute) + ///! and provides then together with the parameterType symilar to functionTyper a concrete type for the attribute function + std::function attributeFunctionTyper; + ///! optionally constant data associated with that symbol tuplex::Field constantData; @@ -90,55 +95,43 @@ class Symbol : public std::enable_shared_from_this { auto generic_result = functionTyper(parameterType); if(generic_result != python::Type::UNKNOWN) { specializedFunctionType = generic_result; - assertFunctionDoesNotReturnGeneric(specializedFunctionType); return true; } - for(auto& type : types) { - // found symbol, now check its type - if(!type.isFunctionType()) - continue; - - auto tupleArgType = getTupleArg(type.getParamsType()); + // typer did not yield a result, hence try stored funciton types incl. upcasting + return findStoredTypedFunction(parameterType, specializedFunctionType); + } - // check if there's a direct type match => use that function then! - if(parameterType == tupleArgType) { - specializedFunctionType = type; - assertFunctionDoesNotReturnGeneric(specializedFunctionType); - return true; - } + /*! + * the typing of an attribute which is a function may be based on both the callerType and the parameters. I.e., + * this function helps to type an attribute x.a(p) where callerType = type(x) and parameterType = type(p) for some + * symbol a which is this. + * @param callerType + * @param parameterType + * @param specializedFunctionType where to store the concrete (non-generic!) output type! + * @return true if a specialized function type could be generated, false else. + */ + inline bool findAttributeFunctionType(const python::Type& callerType, + const python::Type& parameterType, + python::Type& specializedFunctionType) { + // fallback based typing: + // 1. check attribute typer + // 2. check general typer + auto typed_result = attributeFunctionTyper(callerType, parameterType); + if(python::Type::UNKNOWN == typed_result) { + typed_result = functionTyper(parameterType); } - // no direct match was found. Check whether casting would work or partial matching. - for(auto& type : types) { - // found symbol, now check its type - if (!type.isFunctionType()) - continue; - - auto tupleArgType = getTupleArg(type.getParamsType()); - - // check if given parameters type is compatible with function type? - // actual invocation is with parameterType - // ==> can we upcast them to fit the defined one OR does is partially work? - // e.g., when the function is defined for NULL, but we have opt? - if (isTypeCompatible(parameterType, tupleArgType)) { - specializedFunctionType = type; - - // specialize according to parameterType if it's a generic function so further typing works - assert(!specializedFunctionType.getReturnType().isGeneric()); - if(specializedFunctionType.getParamsType().isGeneric()) { - auto specializedParams = python::specializeGenerics(parameterType, tupleArgType); - specializedFunctionType = python::Type::makeFunctionType(specializedParams, - specializedFunctionType.getReturnType()); - } - - assertFunctionDoesNotReturnGeneric(specializedFunctionType); - return true; - } + // check if result is valid, then take it + if(typed_result != python::Type::UNKNOWN) { + specializedFunctionType = typed_result; + assertFunctionDoesNotReturnGeneric(specializedFunctionType); + return true; } - return false; + // typer did not yield a result, hence try stored funciton types incl. upcasting + return findStoredTypedFunction(parameterType, specializedFunctionType); } /*! @@ -169,7 +162,8 @@ class Symbol : public std::enable_shared_from_this { return full_name; } - Symbol() {} + Symbol() : functionTyper([](const python::Type&){return python::Type::UNKNOWN;}), + attributeFunctionTyper([](const python::Type&, const python::Type&){return python::Type::UNKNOWN;}) {} virtual ~Symbol() { _attributes.clear(); parent.reset(); @@ -216,17 +210,20 @@ class Symbol : public std::enable_shared_from_this { Symbol(std::string _name, std::function typer) : name(_name), qualifiedName(_name), - functionTyper(std::move(typer)), symbolType(SymbolType::FUNCTION) {} + functionTyper(std::move(typer)), attributeFunctionTyper([](const python::Type&, const python::Type&){return python::Type::UNKNOWN;}), symbolType(SymbolType::FUNCTION) {} Symbol(std::string _name, python::Type _type) : name(_name), qualifiedName(_name), types{_type}, - symbolType(_type.isFunctionType() ? SymbolType::FUNCTION : SymbolType::VARIABLE), functionTyper([](const python::Type&) { return python::Type::UNKNOWN; }) {} + symbolType(_type.isFunctionType() ? SymbolType::FUNCTION : SymbolType::VARIABLE), + functionTyper([](const python::Type&) { return python::Type::UNKNOWN; }), + attributeFunctionTyper([](const python::Type&, const python::Type&){return python::Type::UNKNOWN;}) {} Symbol(std::string _name, std::string _qualifiedName, python::Type _type, SymbolType _symbolType) : name(_name), qualifiedName(_qualifiedName), types{_type}, symbolType(_symbolType), - functionTyper([](const python::Type&) { return python::Type::UNKNOWN; }) {} + functionTyper([](const python::Type&) { return python::Type::UNKNOWN; }), + attributeFunctionTyper([](const python::Type&, const python::Type&){return python::Type::UNKNOWN;}) {} private: ///! i.e. to store something like re.search. re is then of module type. search will have a concrete function type. @@ -234,6 +231,55 @@ class Symbol : public std::enable_shared_from_this { /********* HELPER FUNCTIONS *************/ + inline bool findStoredTypedFunction(const python::Type& parameterType, python::Type& specializedFunctionType) { + + // typing using typer functions above failed, hence now search for concrete stored types. + for(auto& type : types) { + // found symbol, now check its type + if(!type.isFunctionType()) + continue; + + auto tupleArgType = getTupleArg(type.getParamsType()); + + // check if there's a direct type match => use that function then! + if(parameterType == tupleArgType) { + specializedFunctionType = type; + assertFunctionDoesNotReturnGeneric(specializedFunctionType); + return true; + } + } + + // no direct match was found. Check whether casting would work or partial matching. + for(auto& type : types) { + // found symbol, now check its type + if (!type.isFunctionType()) + continue; + + auto tupleArgType = getTupleArg(type.getParamsType()); + + // check if given parameters type is compatible with function type? + // actual invocation is with parameterType + // ==> can one upcast them to fit the defined one OR does is partially work? + // e.g., when the function is defined for NULL, but we have opt? + if (isTypeCompatible(parameterType, tupleArgType)) { + specializedFunctionType = type; + + // specialize according to parameterType if it's a generic function so further typing works + assert(!specializedFunctionType.getReturnType().isGeneric()); + if(specializedFunctionType.getParamsType().isGeneric()) { + auto specializedParams = python::specializeGenerics(parameterType, tupleArgType); + specializedFunctionType = python::Type::makeFunctionType(specializedParams, + specializedFunctionType.getReturnType()); + } + + assertFunctionDoesNotReturnGeneric(specializedFunctionType); + return true; + } + } + + return false; + } + /*! * helper function to check for compatibility, i.e. whether from type can be cast to to type. * @param from source type diff --git a/tuplex/codegen/include/SymbolTable.h b/tuplex/codegen/include/SymbolTable.h index 0a6b3854d..13a4abb76 100644 --- a/tuplex/codegen/include/SymbolTable.h +++ b/tuplex/codegen/include/SymbolTable.h @@ -185,10 +185,23 @@ namespace tuplex { * @param typer a dynamic typing function * @param sym_type what kind of symbol it is (function? variable?), needed because typer works for both. */ - void addBuiltinTypeAttribute(const python::Type& builtinType, const std::string& name, + void addBuiltinTypeAttribute(const python::Type& builtinType, + const std::string& name, std::function typer, const SymbolType& sym_type); + /*! + * add an attribute to a builtin type, e.g. dict.keys() + * @param builtinType to which type to add the function + * @param name name of the attribute + * @param typer a dynamic typing function + * @param sym_type what kind of symbol it is (function? variable?), needed because typer works for both. + */ + void addBuiltinTypeAttribute(const python::Type& builtinType, + const std::string& name, + std::function attributeTyper, + const SymbolType& sym_type=SymbolType::FUNCTION); + /*! * checks whether a symbol can be looked up or not * @param symbol diff --git a/tuplex/codegen/src/SymbolTable.cc b/tuplex/codegen/src/SymbolTable.cc index 9f8979b7a..d08dd4ec4 100644 --- a/tuplex/codegen/src/SymbolTable.cc +++ b/tuplex/codegen/src/SymbolTable.cc @@ -410,13 +410,24 @@ namespace tuplex { // for keys()/values() use generic dict and let symbol table create specialized type on the fly using // typer function { - addBuiltinTypeAttribute(python::Type::GENERICDICT, "keys", [](const python::Type& parameterType) { + addBuiltinTypeAttribute(python::Type::GENERICDICT, "keys", [](const python::Type& callerType, + const python::Type& parameterType) { - // @TODO: @rhea once you changed the signature of the Lambda here, you should be abel to type correctly. - // I can give it a try to refactor everything better than. - std::cout<<"need to get concrete dict type here!"<(name, type)); } + void SymbolTable::addBuiltinTypeAttribute(const python::Type &builtinType, const std::string &name, + std::function attributeTyper, + const SymbolType &sym_type) { + using namespace std; + assert(sym_type == SymbolType::VARIABLE || sym_type == SymbolType::FUNCTION); + + // this seems wrong, need to perform the lookup directly... + // use desc as name + auto scope = currentScope(); + auto it = scope->symbols.find(builtinType.desc()); + if(it == scope->symbols.end()) { + auto sym = make_shared(); + sym->name = sym->qualifiedName = builtinType.desc(); + scope->symbols[builtinType.desc()] = sym; + + it = scope->symbols.find(builtinType.desc()); + assert(it != scope->symbols.end()); + } + auto sym_att = it->second->findAttribute(name); + if(!sym_att) { + it->second->addAttribute(make_shared(name, name, builtinType, sym_type)); + sym_att = it->second->findAttribute(name); + } else { + // replace symbol, there can be only one symbol with a typer function + if(sym_type != sym_att->symbolType) + throw std::runtime_error("symbol can only have one kind of types associated with it!"); + assert(sym_att->qualifiedName == name); + sym_att->name = name; + } + assert(sym_att); + sym_att->parent = scope->symbols[name]; + sym_att->attributeFunctionTyper = attributeTyper; + } + void SymbolTable::addBuiltinTypeAttribute(const python::Type &builtinType, const std::string &name, std::function typer, const SymbolType& sym_type = SymbolType::VARIABLE) { @@ -813,9 +859,7 @@ namespace tuplex { // else, return single type return attr_sym->type(); python::Type funcType = python::Type::UNKNOWN; - - // @TODO: @rhea -> change function here to include objectType as well and make typer a two parameter function - attr_sym->findFunctionTypeBasedOnParameterType(parameterType, funcType); // ignore ret value. + attr_sym->findAttributeFunctionType(objectType, parameterType, funcType); // ignore ret value. return funcType; } } diff --git a/tuplex/utils/include/TypeSystem.h b/tuplex/utils/include/TypeSystem.h index 5fc1f6110..f6698d3bb 100644 --- a/tuplex/utils/include/TypeSystem.h +++ b/tuplex/utils/include/TypeSystem.h @@ -224,8 +224,8 @@ namespace python { static Type makeListType(const python::Type &elementType); - static Type makeDictKeysType(const python::Type& keyType); - static Type makeDictValuesType(const python::Type& valType); + static Type makeDictKeysViewType(const python::Type& dictType); + static Type makeDictValuesViewType(const python::Type& dictType); /*! * create iterator type from yieldType. @@ -359,8 +359,8 @@ namespace python { // right now, no tuples or other weird types... Type createOrGetFunctionType(const Type& param, const Type& ret=Type::EMPTYTUPLE); Type createOrGetDictionaryType(const Type& key, const Type& val); - Type createOrGetDictKeysType(const Type& key); - Type createOrGetDictValuesType(const Type& val); + Type createOrGetDictKeysViewType(const Type& key); + Type createOrGetDictValuesViewType(const Type& val); Type createOrGetListType(const Type& val); Type createOrGetTupleType(const std::initializer_list args); Type createOrGetTupleType(const TTuple& args); diff --git a/tuplex/utils/src/TypeSystem.cc b/tuplex/utils/src/TypeSystem.cc index cf33b751c..bc80963c3 100644 --- a/tuplex/utils/src/TypeSystem.cc +++ b/tuplex/utils/src/TypeSystem.cc @@ -149,18 +149,18 @@ namespace python { return registerOrGetType(name, AbstractType::DICTIONARY, {key, val}); } - Type TypeFactory::createOrGetDictKeysType(const Type& key) { + Type TypeFactory::createOrGetDictKeysViewType(const Type& key) { std::string name; - name += "["; + name += "DictKeysView["; name += TypeFactory::instance().getDesc(key._hash); name += "]"; return registerOrGetType(name, AbstractType::DICT_KEYS, {key}); } - Type TypeFactory::createOrGetDictValuesType(const Type& val) { + Type TypeFactory::createOrGetDictValuesViewType(const Type& val) { std::string name; - name += "["; + name += "DictValuesView["; name += TypeFactory::instance().getDesc(val._hash); name += "]"; @@ -583,12 +583,12 @@ namespace python { return python::TypeFactory::instance().createOrGetDictionaryType(keyType, valType); } - Type Type::makeDictKeysType(const python::Type& keyType) { - return python::TypeFactory::instance().createOrGetDictKeysType(keyType); + Type Type::makeDictKeysViewType(const python::Type& keyType) { + return python::TypeFactory::instance().createOrGetDictKeysViewType(keyType); } - Type Type::makeDictValuesType(const python::Type& valType) { - return python::TypeFactory::instance().createOrGetDictValuesType(valType); + Type Type::makeDictValuesViewType(const python::Type& valType) { + return python::TypeFactory::instance().createOrGetDictValuesViewType(valType); } Type Type::makeListType(const python::Type &elementType){ From 4ecbc9d0b7211c703ab4649d106f8695a8b37724 Mon Sep 17 00:00:00 2001 From: Leonhard Spiegelberg Date: Mon, 15 Aug 2022 15:09:45 +0200 Subject: [PATCH 21/23] adding list conversion --- tuplex/codegen/src/SymbolTable.cc | 49 ++++++++++++++++++++++++++++ tuplex/test/core/DictionaryTyping.cc | 33 ++++++++++++++++--- 2 files changed, 77 insertions(+), 5 deletions(-) diff --git a/tuplex/codegen/src/SymbolTable.cc b/tuplex/codegen/src/SymbolTable.cc index d08dd4ec4..e41232144 100644 --- a/tuplex/codegen/src/SymbolTable.cc +++ b/tuplex/codegen/src/SymbolTable.cc @@ -346,6 +346,55 @@ namespace tuplex { addSymbol(make_shared("enumerate", enumerateFunctionTyper)); addSymbol(make_shared("next", nextFunctionTyper)); + // conversions for list/tuple + + auto list_ret_type = [](const python::Type& type) { + // list? trivial + if(type.isListType()) + return type; + + // what can be converted to/from list? + // -> homogenous tuple + + // TODO iterator... + + // -> string + if(type == python::Type::STRING) { + return python::Type::makeListType(python::Type::STRING); + } + if(type.isOptionType() && type.withoutOptions() == python::Type::STRING) { + return python::Type::makeListType(python::Type::makeOptionType(python::Type::STRING)); + } + + // -> keyview/valueview + if(type.isDictKeysType() || type.isDictValuesType()) { + // get dict type + auto dict_type = type.elementType(); + + if(type.isDictValuesType()) + return python::Type::makeListType(dict_type.valueType()); + if(type.isDictKeysType()) + return python::Type::makeListType(dict_type.keyType()); + } + + return python::Type::UNKNOWN; + }; + + addSymbol(make_shared("list", [&list_ret_type](const python::Type& parameterType) { + + python::Type type = parameterType; + + // param should be single tuple + if(parameterType.isTupleType() && parameterType.parameters().size() == 1) + type = parameterType.parameters().front(); + + auto ret_type = list_ret_type(type); + if(ret_type != python::Type::UNKNOWN) + return python::Type::makeFunctionType(parameterType, ret_type); + return python::Type::UNKNOWN; + })); + // tuple is special case -> need to speculate on list/str/sequence length! + // TODO: other parameters? i.e. step size and Co? // also, boolean, float? etc.? addSymbol("range", python::Type::makeFunctionType(python::Type::I64, python::Type::RANGE)); diff --git a/tuplex/test/core/DictionaryTyping.cc b/tuplex/test/core/DictionaryTyping.cc index df8c532f5..685d884ed 100644 --- a/tuplex/test/core/DictionaryTyping.cc +++ b/tuplex/test/core/DictionaryTyping.cc @@ -637,11 +637,7 @@ TEST(DictionaryTyping, KeyView) { using namespace std; // could also use list((10, 20, 30)) e.g., or tuple(list(...)) -> needs speculation. - // test count UDF -// auto count_c = "def count_keys(x):\n" -// " d = {'A':10, 'B': 10, x: 20}\n" -// " return list(d.keys())"; auto count_c = "def count_keys(x):\n" " d = {'A':10, 'B': 10, x: 20}\n" " return d.keys()"; @@ -663,6 +659,33 @@ TEST(DictionaryTyping, KeyView) { graph.saveAsPDF("dict_count_keys.pdf"); cout<<"return type of function is: "< needs speculation. + + // test count UDF + auto count_c = "def count_keys(x):\n" + " d = {'A':10, 'B': 10, x: 20}\n" + " return list(d.keys())"; + + // parse code to AST + auto ast = tuplex::codegen::AnnotatedAST(); + ast.parseString(count_c); + + // make typing + python::Type inputType = python::Type::STRING; + + // create symbol table + ast.addTypeHint("x", inputType); + ast.defineTypes(codegen::DEFAULT_COMPILE_POLICY); + + cout<<"return type of function is: "< Date: Mon, 15 Aug 2022 15:17:11 +0200 Subject: [PATCH 22/23] all typing tests pass --- tuplex/codegen/src/TypeAnnotatorVisitor.cc | 20 ++++++++++++++++++++ tuplex/test/core/DictionaryTyping.cc | 4 ++-- 2 files changed, 22 insertions(+), 2 deletions(-) diff --git a/tuplex/codegen/src/TypeAnnotatorVisitor.cc b/tuplex/codegen/src/TypeAnnotatorVisitor.cc index d70cd0b71..ccff1262f 100644 --- a/tuplex/codegen/src/TypeAnnotatorVisitor.cc +++ b/tuplex/codegen/src/TypeAnnotatorVisitor.cc @@ -1763,6 +1763,26 @@ namespace tuplex { } else if(exprType.isIteratorType()) { _nameTable[id->_name] = exprType.yieldType(); id->setInferredType(exprType.yieldType()); + } else if(exprType.isDictValuesType()) { + auto dict_type = exprType.elementType(); + auto yield_type = dict_type.valueType(); + if(yield_type == python::Type::PYOBJECT || yield_type == python::Type::UNKNOWN) { + // might require unrolling & speculation on view length! + addCompileError(CompileError::TYPE_ERROR_UNSUPPORTED_LOOP_TESTLIST_TYPE); + return; + } + _nameTable[id->_name] = yield_type; + id->setInferredType(yield_type); + } else if(exprType.isDictKeysType()) { + auto dict_type = exprType.elementType(); + auto yield_type = dict_type.keyType(); + if(yield_type == python::Type::PYOBJECT || yield_type == python::Type::UNKNOWN) { + // might require unrolling & speculation on view length! + addCompileError(CompileError::TYPE_ERROR_UNSUPPORTED_LOOP_TESTLIST_TYPE); + return; + } + _nameTable[id->_name] = yield_type; + id->setInferredType(yield_type); } else { addCompileError(CompileError::TYPE_ERROR_UNSUPPORTED_LOOP_TESTLIST_TYPE); } diff --git a/tuplex/test/core/DictionaryTyping.cc b/tuplex/test/core/DictionaryTyping.cc index 685d884ed..bb44dee6c 100644 --- a/tuplex/test/core/DictionaryTyping.cc +++ b/tuplex/test/core/DictionaryTyping.cc @@ -542,7 +542,7 @@ TEST(DictionaryTyping, DictionaryInputControlFlow) { // print type annotated ast GraphVizGraph graph; graph.createFromAST(ast.getFunctionAST(), true); - graph.saveAsPDF("/home/rgoyal6/tuplex/tuplex/build/dictionary_asts/dict_input_control_flow.pdf"); + graph.saveAsPDF("dict_input_control_flow.pdf"); cout<<"return type of function is: "< Date: Sun, 28 Aug 2022 08:35:47 -0400 Subject: [PATCH 23/23] all tests except keys/values view failing --- tuplex/codegen/include/BuiltinDictProxy.h | 2 +- tuplex/codegen/include/BuiltinDictProxyImpl.h | 5 +- tuplex/codegen/include/cJSONDictProxyImpl.h | 23 +- tuplex/codegen/src/cJSONDictProxyImpl.cc | 257 +++++++++++++-- tuplex/test/dict/cJSONTest.cc | 308 +++++++++++++++++- tuplex/utils/CMakeLists.txt | 6 +- tuplex/utils/src/TypeSystem.cc | 3 +- 7 files changed, 540 insertions(+), 64 deletions(-) diff --git a/tuplex/codegen/include/BuiltinDictProxy.h b/tuplex/codegen/include/BuiltinDictProxy.h index a37f2a634..c7110eaec 100644 --- a/tuplex/codegen/include/BuiltinDictProxy.h +++ b/tuplex/codegen/include/BuiltinDictProxy.h @@ -50,7 +50,7 @@ namespace tuplex { // // allocSize() --> helpful when dict size is known upfront, can be used for optimization. // BuiltinDictProxy& allocSize(llvm::Value* size); - // getKeyView() --> codegen object + // getKeysView() --> codegen object // getValuesView() --> codegen object diff --git a/tuplex/codegen/include/BuiltinDictProxyImpl.h b/tuplex/codegen/include/BuiltinDictProxyImpl.h index 6defe6d90..b4f41d08a 100644 --- a/tuplex/codegen/include/BuiltinDictProxyImpl.h +++ b/tuplex/codegen/include/BuiltinDictProxyImpl.h @@ -19,7 +19,6 @@ namespace tuplex { namespace codegen { class BuiltinDictProxyImpl { public: - // Q: what does virtual do ? virtual void putItem(const Field& key, const Field& value) = 0; virtual void putItem(const python::Type& keyType, const SerializableValue& key, const python::Type& valueType, const SerializableValue& value) = 0; @@ -31,9 +30,9 @@ namespace tuplex { virtual void deleteItem(const Field& key) = 0; - // virtual void getKeyView() = 0; + virtual std::vector getKeysView() = 0; - // virtual void getValuesView() = 0; + virtual std::vector getValuesView() = 0; }; } } diff --git a/tuplex/codegen/include/cJSONDictProxyImpl.h b/tuplex/codegen/include/cJSONDictProxyImpl.h index cb461ea7c..8f1570266 100644 --- a/tuplex/codegen/include/cJSONDictProxyImpl.h +++ b/tuplex/codegen/include/cJSONDictProxyImpl.h @@ -22,9 +22,9 @@ namespace tuplex { namespace codegen { class cJSONDictProxyImpl : public BuiltinDictProxyImpl { public: - // cJSONDictProxyImpl() : _root(nullptr) {} - // is there a reason we want to separate the initialisation of cjsondictproxy objects and the actual cjson object? - cJSONDictProxyImpl() : _root(cJSON_CreateObject()) {} + cJSONDictProxyImpl() { + _root = cJSON_CreateObject(); + } ~cJSONDictProxyImpl() { if(_root) { cJSON_free(_root); @@ -45,9 +45,9 @@ namespace tuplex { void deleteItem(const Field& key) override; - // void getKeyView() override; + std::vector getKeysView() override; - // void getValuesView() override; + std::vector getValuesView() override; // notes: // for cJSON subscripting, need to perform @@ -57,14 +57,21 @@ namespace tuplex { private: cJSON *_root; // a map of the elements - cJSON *_typeMap; // a map of strings -> types (nested) /*! - * returns a string representing a type prefix when storing type information in cJSON object as well. + * returns a key (as a string) with the added type prefix + * @param key * @param type * @return */ - static std::string typePrefix(const python::Type& type); + std::string addTypePrefix(std::string key, const python::Type& type); + + /*! + * converts a key (stored as a string in cJSON) to equivalent Field value + * @param prefixed_key + * @return + */ + Field keyToField(std::string prefixed_key); }; } } diff --git a/tuplex/codegen/src/cJSONDictProxyImpl.cc b/tuplex/codegen/src/cJSONDictProxyImpl.cc index d0a2d634b..a0fe425a4 100644 --- a/tuplex/codegen/src/cJSONDictProxyImpl.cc +++ b/tuplex/codegen/src/cJSONDictProxyImpl.cc @@ -33,16 +33,15 @@ namespace tuplex { // check type of Field, create corresponding cJSON type object if (f.getType() == python::Type::BOOLEAN) { - if (f.getInt() > 0) { - cjson_obj = cJSON_CreateTrue(); - } else { + if (f.getInt() == 0) { cjson_obj = cJSON_CreateFalse(); + } else { + cjson_obj = cJSON_CreateTrue(); } } else if (f.getType() == python::Type::F64) { - cjson_obj = cJSON_CreateNumber(f.getDouble()); + cjson_obj = cJSON_CreateNumber(f.getDouble(), 0); } else if (f.getType() == python::Type::I64) { - // should I be upcasting? - cjson_obj = cJSON_CreateNumber((double)f.getInt()); + cjson_obj = cJSON_CreateNumber(f.getInt(), 1); } else if (f.getType() == python::Type::STRING) { assert(f.getPtr()); cjson_obj = cJSON_CreateString((const char*)f.getPtr()); @@ -50,14 +49,15 @@ namespace tuplex { assert(f.getPtr()); tuplex::List* lis = (tuplex::List*)f.getPtr(); - cjson_obj = cJSON_CreateArray(); + cjson_obj = cJSON_CreateArray(1); for (int i = 0; i < lis->numElements(); i++) { // retrieve ith element from list Field element = lis->getField(i); + // convert to cJSON object cJSON* cjson_elt = fieldToCJSON(element); - + // add element to cJSON array cJSON_AddItemToArray(cjson_obj, cjson_elt); } @@ -65,11 +65,12 @@ namespace tuplex { assert(f.getPtr()); tuplex::Tuple* tup = (tuplex::Tuple*)f.getPtr(); - cjson_obj = cJSON_CreateArray(); + cjson_obj = cJSON_CreateArray(0); for (int i = 0; i < tup->numElements(); i++) { // retrieve ith element from tuple Field element = tup->getField(i); + // convert to cJSON object cJSON* cjson_elt = fieldToCJSON(element); @@ -79,7 +80,7 @@ namespace tuplex { } else if (f.getType() == python::Type::NULLVALUE) { cjson_obj = cJSON_CreateNull(); } else { - // throw std::runtime_error("cannot change value with type " + value.getType().desc() + " into cJSON object"); + throw std::runtime_error("cannot change Field with type " + f.getType().desc() + " into cJSON object"); } return cjson_obj; @@ -91,7 +92,15 @@ namespace tuplex { Field ret = Field::null(); if (cJSON_IsNumber(object)) { - ret = Field(cJSON_GetNumberValue(object)); + if (((object->type & ~cJSON_IsReference) & ~cJSON_StringIsConst) == cJSON_Int64) { + // type is int, convert ret to int + double dbl_val = cJSON_GetNumberValue(object); + int64_t int_val = (int64_t) std::round(dbl_val); + ret = Field(int_val); + } else { + assert(((object->type & ~cJSON_IsReference) & ~cJSON_StringIsConst) == cJSON_Double); + ret = Field(cJSON_GetNumberValue(object)); + } } else if (cJSON_IsString(object)) { ret = Field(cJSON_GetStringValue(object)); } else if (cJSON_IsTrue(object)) { @@ -101,27 +110,155 @@ namespace tuplex { } else if (cJSON_IsNull(object)) { ret = Field::null(); } else if (cJSON_IsArray(object)) { - throw std::runtime_error("not yet implemented..."); + std::vector init_vec; + init_vec.reserve(cJSON_GetArraySize(object)); + + for (int i = 0; i < cJSON_GetArraySize(object); i++) { + // retrieve ith element from array + cJSON* cjson_elt = cJSON_GetArrayItem(object, i); + if (!cjson_elt) + throw std::runtime_error("could not retrieve element from cJSON array"); + + // convert to field + Field field_elt = cJSONToField(cjson_elt); + + // add element to init vector + init_vec.push_back(field_elt); + } + + if (((object->type & ~cJSON_IsReference) & ~cJSON_StringIsConst) == cJSON_List) { + List ret_list = List::from_vector(init_vec); + ret = Field(ret_list); + } else { + assert(((object->type & ~cJSON_IsReference) & ~cJSON_StringIsConst) == cJSON_Tuple); + Tuple ret_tup = Tuple::from_vector(init_vec); + ret = Field(ret_tup); + } } else if (cJSON_IsObject(object)) { + /** TODO: what type should nested dictionaries + * (i.e. cjson objects) be converted to as a Field? */ throw std::runtime_error("not yet implemented..."); } return ret; } - std::string cJSONDictProxyImpl::typePrefix(const python::Type& type) { + std::string cJSONDictProxyImpl::addTypePrefix(std::string key, const python::Type& type) { + auto ret = type.desc() + "/" + key; + + return ret; + } + + // general helper function to convert a string into a Field given a python type + /*! + * convert a string into a Field given a python type, if not convertible, returns nullptr + * @param str string + * @param type python type + * @return Field object + */ + Field stringToField(std::string str, python::Type type) { + if (str.empty()) + throw std::runtime_error("cannot pass in empty string"); + + Field ret_val = Field::null(); + + if (type == python::Type::BOOLEAN) { + if (str.compare("True") == 0) { + ret_val = Field(true); + } else if (str.compare("False") == 0) { + ret_val = Field(false); + } else { + throw std::runtime_error("expected bool value, got " + str); + } + } else if (type == python::Type::F64) { + double dbl_val = std::stod(str); + ret_val = Field(dbl_val); + } else if (type == python::Type::I64) { + long long int_val = std::stoll(str); + ret_val = Field((int64_t)int_val); + } else if (type == python::Type::STRING) { + ret_val = Field(str.substr(1, str.length() - 2)); + } else if (type.isListType()) { + throw std::runtime_error("(list) not yet implemented..."); + } else if (type.isTupleType()) { + std::vector init_vec; + init_vec.reserve(type.parameters().size()); + + assert(str[0] == '('); + assert(str[str.length() - 1] == ')'); + int done = 1; + int curr_index = 0; + while (done < (str.length() - 1)) { + std::string curr_elt = ""; + python::Type curr_type = type.parameters().at(curr_index); + Field field_elt = Field::null(); + + if (str[done] == '\'') { + // current item is a string; need to find next ' + assert(curr_type == python::Type::STRING); + + size_t next_quote = str.find('\'', done + 1); + if (next_quote == std::string::npos) + throw std::runtime_error("could not parse tuple string: matching \' not present"); + + curr_elt = str.substr(done + 1, next_quote); + assert(str[next_quote + 1] == ','); + done = next_quote + 2; + } else { + size_t next_comma = str.find(',', done); + + if (next_comma == std::string::npos) { + // last element in tuple + curr_elt = str.substr(done, str.length() - 1); + done = str.length() - 1; + } else { + curr_elt = str.substr(done, next_comma); + done = next_comma + 1; + } + } + + field_elt = stringToField(curr_elt, curr_type); + if (field_elt == nullptr) + throw std::runtime_error("could not parse tuple string: could not convert element into Field"); + // return nullptr; + init_vec.push_back(field_elt); + curr_index++; + } + + assert(type.parameters().size() == curr_index); + ret_val = Field(Tuple::from_vector(init_vec)); + } else if (type == python::Type::NULLVALUE) { + ret_val = Field::null(); + } else { + throw std::runtime_error("conversion from string " + str + " to type " + type.desc() + " not supported"); + } + + return ret_val; + } + + Field cJSONDictProxyImpl::keyToField(std::string prefixed_key) { + std::size_t slash_index = prefixed_key.find("/"); + + std::string key_type = prefixed_key.substr(0, slash_index); + std::string key_str = prefixed_key.substr(slash_index + 1); + + python::Type ret_type = python::Type::NULLVALUE; + + if (key_type.substr(0, 4).compare("bool") == 0) { + ret_type = python::Type::BOOLEAN; + } else { + ret_type = python::decodeType(key_type); + } - // init map for a couple common types (int, float, bool, ...) + Field ret_val = stringToField(key_str, ret_type); + if (ret_val.isNull() && (ret_type != python::Type::NULLVALUE)) + throw std::runtime_error("could not convert key-string to Field object"); - // since keys in JSON are always strings, need to store type info in that string! - return ""; + return ret_val; } void cJSONDictProxyImpl::putItem(const Field &key, const Field &value) { - // put into cJSON, yet due to both key/type being not necessary type stable, encode type as base64 into values! - // map primitive types directly into cJSON if possible if(!_root) - // _root = cJSON_CreateObject(); throw std::runtime_error("cannot use putItem on an uninitialised dictionary"); cJSON* to_add = fieldToCJSON(value); @@ -129,14 +266,17 @@ namespace tuplex { throw std::runtime_error("item to add not convertible to cJSON object"); } - // add to cJSON object - // TODO: what's the difference between key.desc and getting the key's ptr value? - // A: key.desc gets the string of the Field regardless of the type of the Field - cJSON_AddItemToObject(_root, key.desc().c_str(), to_add); + // check if key already exists + if (keyExists(key)) { + // replace existing key + replaceItem(key, value); + } - // type prefix + // add type prefix to key + std::string prefixed = addTypePrefix(key.desc(), key.getType()); - // throw std::runtime_error("to implement..."); + // key doesn't exist; add to cJSON object + cJSON_AddItemToObject(_root, prefixed.c_str(), to_add); } void cJSONDictProxyImpl::putItem(const python::Type &keyType, const SerializableValue &key, @@ -150,18 +290,28 @@ namespace tuplex { bool cJSONDictProxyImpl::keyExists(const Field& key) { if(!_root) throw std::runtime_error("cannot use keyExists on an uninitialised dictionary"); + + // make prefixed key + std::string prefixed = addTypePrefix(key.desc(), key.getType()); - cJSON* res = cJSON_GetObjectItemCaseSensitive(_root, key.desc().c_str()); + cJSON* res = cJSON_GetObjectItemCaseSensitive(_root, prefixed.c_str()); - return (res != NULL); + if (!res) { + return false; + } + + return true; } Field cJSONDictProxyImpl::getItem(const Field& key) { if (!_root) throw std::runtime_error("cannot use getItem on an uninitialised dictionary"); + // make prefixed key + std::string prefixed = addTypePrefix(key.desc(), key.getType()); + // retrieve value from dict - cJSON* item = cJSON_GetObjectItemCaseSensitive(_root, key.desc().c_str()); + cJSON* item = cJSON_GetObjectItemCaseSensitive(_root, prefixed.c_str()); if (!item) throw std::runtime_error("error retrieving value from cJSON dictionary"); @@ -176,22 +326,23 @@ namespace tuplex { if (!_root) throw std::runtime_error("cannot use replaceItem on an uninitialised dictionary"); - // assert(key.getType() == python::Type::STRING); - + // make prefixed key + std::string prefixed = addTypePrefix(key.desc(), key.getType()); + // attempt to retrieve value from dict - cJSON* item = cJSON_GetObjectItemCaseSensitive(_root, key.desc().c_str()); + cJSON* item = cJSON_GetObjectItemCaseSensitive(_root, prefixed.c_str()); if (!item) { - // key doesn't already exist; simply perform putItem instead (?) + // key doesn't already exist; do putItem instead putItem(key, value); } else { - // replace value at key + // make new cJSON item cJSON* new_item = fieldToCJSON(value); if (!new_item) { throw std::runtime_error("new item not convertible to cJSON object"); } - cJSON_ReplaceItemInObjectCaseSensitive(_root, key.desc().c_str(), new_item); + cJSON_ReplaceItemInObjectCaseSensitive(_root, prefixed.c_str(), new_item); } } @@ -199,8 +350,44 @@ namespace tuplex { if (!_root) throw std::runtime_error("cannot use deleteItem on an uninitialised dictionary"); + // make prefixed key + std::string prefixed = addTypePrefix(key.desc(), key.getType()); + // delete value from dict - cJSON_DeleteItemFromObjectCaseSensitive(_root, (const char*)key.desc().c_str()); + cJSON_DeleteItemFromObjectCaseSensitive(_root, prefixed.c_str()); + } + + std::vector cJSONDictProxyImpl::getKeysView() { + std::vector ret; + ret.reserve(cJSON_GetArraySize(_root)); + + cJSON* entry = NULL; + cJSON_ArrayForEach(entry, _root) { + // convert key to Field + std::string key_str = entry->string; + Field field_val = keyToField(key_str); + + // add to end of ret vector + ret.push_back(field_val); + } + + return ret; + } + + std::vector cJSONDictProxyImpl::getValuesView() { + std::vector ret; + ret.reserve(cJSON_GetArraySize(_root)); + + cJSON* entry = NULL; + cJSON_ArrayForEach(entry, _root) { + // convert entry to Field + Field field_val = cJSONToField(entry); + + // add to end of ret vector + ret.push_back(field_val); + } + + return ret; } } } diff --git a/tuplex/test/dict/cJSONTest.cc b/tuplex/test/dict/cJSONTest.cc index 22e0c0a00..0708be732 100644 --- a/tuplex/test/dict/cJSONTest.cc +++ b/tuplex/test/dict/cJSONTest.cc @@ -39,11 +39,10 @@ TEST(cJSONTest, GetItemTest) { using namespace tuplex; using namespace std; - // testing non-codegenerated put item + // testing non-codegenerated get item // initialise test dict codegen::cJSONDictProxyImpl dict_proxy; - // put test values into test dict dict_proxy.putItem(Field((int64_t)10), Field("a")); dict_proxy.putItem(Field((int64_t)20), Field("b")); @@ -60,11 +59,10 @@ TEST(cJSONTest, DeleteItemTest) { using namespace tuplex; using namespace std; - // testing non-codegenerated put item + // testing non-codegenerated delete item // initialise test dict codegen::cJSONDictProxyImpl dict_proxy; - // put test values into test dict dict_proxy.putItem(Field((int64_t)10), Field("a")); dict_proxy.putItem(Field((int64_t)20), Field("b")); @@ -82,7 +80,7 @@ TEST(cJSONTest, DeleteItemTest) { Field res = dict_proxy.getItem(Field((int64_t)10)); // NOTE: expected result will be a double, bc I think cJSON stores all numbers as doubles - EXPECT_EQ(Field((double)100), dict_proxy.getItem(Field((int64_t)10))); + EXPECT_EQ(Field((int64_t)100), dict_proxy.getItem(Field((int64_t)10))); EXPECT_EQ(false, dict_proxy.keyExists(Field((int64_t)20))); } @@ -90,11 +88,10 @@ TEST(cJSONTest, ReplaceItemTest) { using namespace tuplex; using namespace std; - // testing non-codegenerated put item + // testing non-codegenerated replace item // initialise test dict codegen::cJSONDictProxyImpl dict_proxy; - // put test values into test dict dict_proxy.putItem(Field((int64_t)10), Field("a")); dict_proxy.putItem(Field((int64_t)20), Field("b")); @@ -113,13 +110,296 @@ TEST(cJSONTest, ReplaceItemTest) { dict_proxy.replaceItem(Field((int64_t)30), Field((int64_t)50)); - // NOTE: expected result will be a double, bc I think cJSON stores all numbers as doubles - EXPECT_EQ(Field((double)50), dict_proxy.getItem(Field((int64_t)30))); + // NOTE: expected result will be a double, bc cJSON stores all numbers as doubles + EXPECT_EQ(Field((int64_t)50), dict_proxy.getItem(Field((int64_t)30))); +} + +// str -> _ +TEST(cJSONTest, StrKeysTest) { + using namespace tuplex; + using namespace std; + + // initialise test dict + codegen::cJSONDictProxyImpl dict_proxy; + + dict_proxy.putItem(Field("a"), Field((int64_t)1)); + dict_proxy.putItem(Field("b"), Field((int64_t)2)); + + EXPECT_EQ(Field((int64_t)1), dict_proxy.getItem(Field("a"))); + EXPECT_EQ(Field((int64_t)2), dict_proxy.getItem(Field("b"))); + + dict_proxy.putItem(Field("a"), Field("hello")); + dict_proxy.replaceItem(Field("b"), Field(true)); + + EXPECT_EQ(Field("hello"), dict_proxy.getItem(Field("a"))); + EXPECT_EQ(Field(true), dict_proxy.getItem(Field("b"))); + + dict_proxy.deleteItem(Field("b")); + + EXPECT_EQ(true, dict_proxy.keyExists(Field("a"))); + EXPECT_EQ(false, dict_proxy.keyExists(Field("b"))); +} + +// _ -> null +TEST(cJSONTest, NullValsTest) { + using namespace tuplex; + using namespace std; + + // initialise test dict + codegen::cJSONDictProxyImpl dict_proxy; + + dict_proxy.putItem(Field((int64_t)10), Field::null()); + dict_proxy.putItem(Field("a"), Field((int64_t)10)); + + EXPECT_EQ(Field::null(), dict_proxy.getItem(Field((int64_t)10))); + EXPECT_EQ(Field((int64_t)10), dict_proxy.getItem(Field("a"))); + + dict_proxy.replaceItem(Field("a"), Field::null()); + + EXPECT_EQ(Field::null(), dict_proxy.getItem(Field("a"))); +} + +// null -> _ +TEST(cJSONTest, NullKeysTest) { + using namespace tuplex; + using namespace std; + + // initialise test dict + codegen::cJSONDictProxyImpl dict_proxy; + + dict_proxy.putItem(Field::null(), Field((int64_t)10)); + + EXPECT_EQ(Field((int64_t)10), dict_proxy.getItem(Field::null())); + + dict_proxy.putItem(Field::null(), Field("a")); + + EXPECT_EQ(Field("a"), dict_proxy.getItem(Field::null())); + + dict_proxy.replaceItem(Field::null(), Field(true)); + + EXPECT_EQ(Field(true), dict_proxy.getItem(Field::null())); +} + +// mix -> mix +TEST(cJSONTest, FloatTest) { + using namespace tuplex; + using namespace std; + + // initialise test dict + codegen::cJSONDictProxyImpl dict_proxy; + + dict_proxy.putItem(Field((double)3.14), Field("pi")); + dict_proxy.putItem(Field((double)1), Field(true)); + + EXPECT_EQ(Field("pi"), dict_proxy.getItem(Field((double)3.14))); + EXPECT_EQ(Field(true), dict_proxy.getItem(Field((double)1))); + + dict_proxy.putItem(Field("pi"), Field((double)3.14)); + dict_proxy.putItem(Field(true), Field((double)1)); + + EXPECT_EQ(Field((double)3.14), dict_proxy.getItem(Field("pi"))); + EXPECT_EQ(Field((double)1), dict_proxy.getItem(Field(true))); + + dict_proxy.replaceItem(Field((double)3.14), Field((int64_t)3)); + dict_proxy.replaceItem(Field("pi"), Field((int64_t)3)); + + EXPECT_EQ(Field((int64_t)3), dict_proxy.getItem(Field((double)3.14))); + EXPECT_EQ(Field((int64_t)3), dict_proxy.getItem(Field("pi"))); +} + +// _ -> list +TEST(cJSONTest, ListValsTest) { + using namespace tuplex; + using namespace std; + + // initialise test dict + codegen::cJSONDictProxyImpl dict_proxy; + + // init list 1 + vector vec_1{ Field((int64_t)10), Field((int64_t)20), Field((int64_t)30) }; + List list_1 = List::from_vector(vec_1); + + // init list 2 + vector vec_2{ Field("a"), Field("b"), Field("c") }; + List list_2 = List::from_vector(vec_2); + + // init list 3 + vector vec_3{ Field((double)15), Field((double)3.14), Field((double)2.7) }; + List list_3 = List::from_vector(vec_3); + + dict_proxy.putItem(Field((int64_t)10), Field(list_1)); + + EXPECT_EQ(Field(list_1), dict_proxy.getItem(Field((int64_t)10))); + + dict_proxy.putItem(Field("a"), Field(list_2)); + dict_proxy.replaceItem(Field((int64_t)10), Field(list_3)); + + EXPECT_EQ(Field(list_2), dict_proxy.getItem(Field("a"))); + EXPECT_EQ(Field(list_3), dict_proxy.getItem(Field((int64_t)10))); + + dict_proxy.deleteItem(Field("a")); + + EXPECT_EQ(false, dict_proxy.keyExists(Field("a"))); + EXPECT_EQ(true, dict_proxy.keyExists(Field((int64_t)10))); } -// tests to write: +// _ -> tuple +TEST(cJSONTest, TupleValsTest) { + using namespace tuplex; + using namespace std; + + // initialise test dict + codegen::cJSONDictProxyImpl dict_proxy; + + // init tuple 1 + vector vec_1{ Field((int64_t)10), Field("a"), Field((double)30) }; + Tuple tup_1 = Tuple::from_vector(vec_1); + + // init tuple 2 + vector vec_2{ Field("a"), Field(true), Field((int64_t)30) }; + Tuple tup_2 = Tuple::from_vector(vec_2); + + // init tuple 3 + vector vec_3{ Field((double)3.14), Field((int64_t)2), Field(false) }; + Tuple tup_3 = Tuple::from_vector(vec_3); -// 1. heterogenous dict -> basically use modified JSON as in-memory storage format. -// 2. homogenous keytype dict -> can encode dict directly & serialize it more efficiently. Represent in-memory as hash table specialized depending on type. -// 3. homogenous valuetype -> ignore case, specialize to 1. -// 4. compile-time known keys/restricted keyset, keys do not change. -> struct type with fixed offsets! \ No newline at end of file + dict_proxy.putItem(Field((int64_t)10), Field(tup_1)); + + EXPECT_EQ(Field(tup_1), dict_proxy.getItem(Field((int64_t)10))); + + dict_proxy.putItem(Field("a"), Field(tup_2)); + dict_proxy.replaceItem(Field((int64_t)10), Field(tup_3)); + + EXPECT_EQ(Field(tup_2), dict_proxy.getItem(Field("a"))); + EXPECT_EQ(Field(tup_3), dict_proxy.getItem(Field((int64_t)10))); + + dict_proxy.deleteItem(Field("a")); + + EXPECT_EQ(false, dict_proxy.keyExists(Field("a"))); + EXPECT_EQ(true, dict_proxy.keyExists(Field((int64_t)10))); +} + +TEST(cJSONTest, FloatIntDifferentiation) { + using namespace tuplex; + using namespace std; + + // initialise test dict + codegen::cJSONDictProxyImpl dict_proxy; + + dict_proxy.putItem(Field((double)10), Field("a")); + + EXPECT_EQ(true, dict_proxy.keyExists(Field((double)10))); + EXPECT_EQ(false, dict_proxy.keyExists(Field((int64_t)10))); + + dict_proxy.putItem(Field((int64_t)10), Field("b")); + + EXPECT_EQ(true, dict_proxy.keyExists(Field((double)10))); + EXPECT_EQ(true, dict_proxy.keyExists(Field((int64_t)10))); + EXPECT_EQ(Field("a"), dict_proxy.getItem(Field((double)10))); + EXPECT_EQ(Field("b"), dict_proxy.getItem(Field((int64_t)10))); + + dict_proxy.replaceItem(Field((double)10), Field((int64_t)64)); + + EXPECT_EQ(Field((int64_t)64), dict_proxy.getItem(Field((double)10))); + EXPECT_EQ(Field("b"), dict_proxy.getItem(Field((int64_t)10))); + + dict_proxy.deleteItem(Field((double)10)); + + EXPECT_EQ(false, dict_proxy.keyExists(Field((double)10))); + EXPECT_EQ(true, dict_proxy.keyExists(Field((int64_t)10))); +} + +TEST(cJSONTest, ListTupleDifferentiation) { + using namespace tuplex; + using namespace std; + + // initialise test dict + codegen::cJSONDictProxyImpl dict_proxy; + + vector vec{ Field("a"), Field("a"), Field("a") }; + Tuple tup = Tuple::from_vector(vec); + List lis = List::from_vector(vec); + + dict_proxy.putItem(Field("tup"), Field(tup)); + dict_proxy.putItem(Field("lis"), Field(lis)); + + EXPECT_EQ(Field(tup), dict_proxy.getItem(Field("tup"))); + EXPECT_EQ(Field(lis), dict_proxy.getItem(Field("lis"))); + EXPECT_EQ(true, dict_proxy.getItem(Field("tup")).getType().isTupleType()); + EXPECT_EQ(true, dict_proxy.getItem(Field("lis")).getType().isListType()); + + vector vec_2{ Field((double)3.14), Field((double)2.7), Field((double)1.414) }; + Tuple tup_2 = Tuple::from_vector(vec_2); + List lis_2 = List::from_vector(vec_2); + + dict_proxy.replaceItem(Field("lis"), Field(tup_2)); + dict_proxy.replaceItem(Field("tup"), Field(lis_2)); + + EXPECT_EQ(Field(tup_2), dict_proxy.getItem(Field("lis"))); + EXPECT_EQ(Field(lis_2), dict_proxy.getItem(Field("tup"))); + EXPECT_EQ(true, dict_proxy.getItem(Field("tup")).getType().isListType()); + EXPECT_EQ(true, dict_proxy.getItem(Field("lis")).getType().isTupleType()); +} + +TEST(cJSONTest, KeysValuesView) { + using namespace tuplex; + using namespace std; + + // initialise test dict + codegen::cJSONDictProxyImpl dict_proxy; + + vector vec_1{ Field("a"), Field("b"), Field("c") }; + List lis = List::from_vector(vec_1); + vector vec_2{ Field("a"), Field((int64_t)10), Field(true) }; + Tuple tup = Tuple::from_vector(vec_2); + + std::cout << tup.desc() << "\n"; + + dict_proxy.putItem(Field((int64_t)15), Field("test")); + dict_proxy.putItem(Field((double)3.14), Field((int64_t)15)); + dict_proxy.putItem(Field(false), Field((double)3.14)); + dict_proxy.putItem(Field(tup), Field(false)); + // dict_proxy.putItem(Field(lis), Field(tup)); + dict_proxy.putItem(Field("list"), Field(lis)); + + vector keys{ Field((int64_t)15), + Field((double)3.14), + Field(false), + Field(tup), + // Field(lis), + Field("list") + }; + + vector vals{ Field("test"), + Field((int64_t)15), + Field((double)3.14), + Field(false), + // Field(tup), + Field(lis) + }; + + EXPECT_EQ(keys, dict_proxy.getKeysView()); + EXPECT_EQ(vals, dict_proxy.getValuesView()); + + dict_proxy.replaceItem(Field((int64_t)15), Field((int64_t)150)); + dict_proxy.deleteItem(Field(false)); + dict_proxy.deleteItem(Field("list")); + dict_proxy.putItem(Field("new"), Field(true)); + + vector keys_2{ Field((int64_t)15), + Field((double)3.14), + // Field(lis), + Field("list"), + Field("new") + }; + + vector vals_2{ Field((int64_t)150), + Field((int64_t)15), + // Field(tup), + Field(lis), + Field(true) + }; + + EXPECT_EQ(keys_2, dict_proxy.getKeysView()); + EXPECT_EQ(vals_2, dict_proxy.getValuesView()); +} \ No newline at end of file diff --git a/tuplex/utils/CMakeLists.txt b/tuplex/utils/CMakeLists.txt index 832d90167..e86ce50a8 100644 --- a/tuplex/utils/CMakeLists.txt +++ b/tuplex/utils/CMakeLists.txt @@ -48,8 +48,10 @@ if(NOT BUILD_WITH_AWS) include(FetchContent) set(FETCHCONTENT_BASE_DIR ${CMAKE_BINARY_DIR}/third_party/src) FetchContent_Declare(cJSON - GIT_REPOSITORY https://github.com/DaveGamble/cJSON.git - GIT_TAG v1.7.14 + # GIT_REPOSITORY https://github.com/DaveGamble/cJSON.git + # GIT_TAG v1.7.14 + GIT_REPOSITORY https://github.com/aletheia094/cJSON.git + GIT_TAG ada484892e39329d90c0519fb34a90a340309a60 GIT_CONFIG advice.detachedHead=false ) FetchContent_GetProperties(cJSON) diff --git a/tuplex/utils/src/TypeSystem.cc b/tuplex/utils/src/TypeSystem.cc index bc80963c3..d123f4e65 100644 --- a/tuplex/utils/src/TypeSystem.cc +++ b/tuplex/utils/src/TypeSystem.cc @@ -24,7 +24,8 @@ namespace python { const Type Type::UNKNOWN = TypeFactory::instance().createOrGetPrimitiveType("unknown"); - const Type Type::BOOLEAN = TypeFactory::instance().createOrGetPrimitiveType("boolean"); + // const Type Type::BOOLEAN = TypeFactory::instance().createOrGetPrimitiveType("boolean"); + const Type Type::BOOLEAN = TypeFactory::instance().createOrGetPrimitiveType("bool"); const Type Type::I64 = TypeFactory::instance().createOrGetPrimitiveType("i64", {python::Type::BOOLEAN}); const Type Type::F64 = TypeFactory::instance().createOrGetPrimitiveType("f64", {python::Type::I64}); const Type Type::STRING = TypeFactory::instance().createOrGetPrimitiveType("str");