From 7c0984e4724d613475afc26136fe3ad43ddbc271 Mon Sep 17 00:00:00 2001
From: Rhea Goyal <rhea@thanatos.local>
Date: Thu, 2 Jun 2022 12:57:41 -0400
Subject: [PATCH 01/23] added new tests (for already existing functions)

---
 tuplex/python/tests/test_math.py      | 100 +++++++++++++++++++++++++-
 tuplex/test/core/MathFunctionsTest.cc |  89 +++++++++++++++++++++--
 2 files changed, 183 insertions(+), 6 deletions(-)

diff --git a/tuplex/python/tests/test_math.py b/tuplex/python/tests/test_math.py
index f60750d17..8763391fc 100644
--- a/tuplex/python/tests/test_math.py
+++ b/tuplex/python/tests/test_math.py
@@ -299,7 +299,6 @@ def testExpm1(self):
         assert L_bool[1] == math.expm1(False)
 
 
-
     def testPow(self):
         c = tuplex.Context(self.conf)
 
@@ -357,4 +356,101 @@ def testPow(self):
         assert L_bool[0] == math.pow(True, False)
         assert L_bool[1] == math.pow(True, True)
         assert L_bool[2] == math.pow(False, True)
-        assert L_bool[3] == math.pow(False, False)
\ No newline at end of file
+        assert L_bool[3] == math.pow(False, False)
+
+
+    def testTEST(self):
+        c = tuplex.Context(self.conf)
+
+        pow_test = [(25, 0.5), (3, -2), (-4.0, 3.0), (-5, -4)]
+        c.parallelize(pow_test).map(lambda x, y: math.pow(x, y)).collect()
+        assert len(pow_test) == 4, 'wrong length'
+        self.assertAlmostEqual(pow_test[0], 5.0)
+        self.assertAlmostEqual(pow_test[1], -1.0 / 9.0)
+        self.assertAlmostEqual(pow_test[2], -64.0)
+        self.assertAlmostEqual(pow_test[3], 1.0 / 625.0)
+
+        sqrt_test = [0, 1.0, 4.0, 16]
+        c.parallelize(sqrt_test).map(lambda x: math.sqrt(x)).collect()
+        assert len(sqrt_test) == 4, 'wrong length'
+        self.assertAlmostEqual(sqrt_test[0], 0.0)
+        self.assertAlmostEqual(sqrt_test[1], 1.0)
+        self.assertAlmostEqual(sqrt_test[2], 2.0)
+        self.assertAlmostEqual(sqrt_test[3], 4.0)
+
+    
+    # def testIsInf(self):
+    #     c = tuplex.Context(self.conf)
+
+    #     float_test = [0.0, 1.0, -1.0, -math.inf, 3.0, math.inf]
+    #     L0 = c.parallelize(float_test).map(lambda x: math.isinf(x)).collect()
+    #     assert len(L0) == 6, 'wrong length'
+    #     self.assertEqual(L0[0], False)
+    #     self.assertEqual(L0[1], False)
+    #     self.assertEqual(L0[2], False)
+    #     self.assertEqual(L0[3], True)
+    #     self.assertEqual(L0[4], False)
+    #     self.assertEqual(L0[5], True)
+
+    #     tuple_test = [(1.0, math.inf), (-math.inf, 0.0), (-math.inf, math.inf), (-2.0, 0.0)]
+    #     L1 = c.parallelize(tuple_test).map(lambda x, y: (math.isinf(x), math.isinf(y))).collect()
+    #     assert len(L1) == 4, 'wrong length'
+    #     self.assertEqual(L1[0], (False, True))
+    #     self.assertEqual(L1[1], (True, False))
+    #     self.assertEqual(L1[2], (True, True))
+    #     self.assertEqual(L1[3], (False, False))
+
+    #     int_test = [1, -math.inf, -1, 0, math.inf]
+    #     L2 = c.parallelize(int_test).map(lambda x: math.isinf(x)).collect()
+    #     assert len(L2) == 5, 'wrong length'
+    #     self.assertEqual(L2[0], False)
+    #     self.assertEqual(L2[1], True)
+    #     self.assertEqual(L2[2], False)
+    #     self.assertEqual(L2[3], False)
+    #     self.assertEqual(L2[4], True)
+
+    #     mix_test = [-1, math.inf, 1.5, math.nan, -math.inf, 0.0]
+    #     L3 = c.parallelize(mix_test).map(lambda x, y: math.pow(x, y)).collect()
+    #     assert len(L3) == 6
+    #     self.assertEqual(L3[0], False)
+    #     self.assertEqual(L3[1], True)
+    #     self.assertEqual(L3[2], False)
+    #     self.assertEqual(L3[3], False)
+    #     self.assertEqual(L3[4], True)
+    #     self.assertEqual(L3[5], False)
+    
+
+    # def testIsNan(self):
+    #     c = tuplex.Context(self.conf)
+
+    #     test0 = [0.0, math.nan, -3.5, -math.inf]
+    #     L0 = c.parallelize(test0).map(lambda x: math.isnan(x)).collect()
+    #     assert len(L0) == 4, 'wrong length'
+    #     self.assertEqual(L0[0], False)
+    #     self.assertEqual(L0[1], True)
+    #     self.assertEqual(L0[2], False)
+    #     self.assertEqual(L0[3], False)
+
+    #     test1 = [0, -1, math.nan, math.inf, 97]
+    #     L1 = c.parallelize(test1).map(lambda x: math.isnan(x)).collect()
+    #     assert len(L1) == 5, 'wrong length'
+    #     self.assertEqual(L1[0], False)
+    #     self.assertEqual(L1[1], False)
+    #     self.assertEqual(L1[2], True)
+    #     self.assertEqual(L1[3], False)
+    #     self.assertEqual(L1[4], False)
+        
+    #     test2 = [math.nan, 0, -math.inf, -1.5, math.nan, 97]
+    #     L2 = c.parallelize(test2).map(lambda x: math.isnan(x)).collect()
+    #     assert len(L2) == 6, 'wrong length'
+    #     self.assertEqual(L2[0], True)
+    #     self.assertEqual(L2[1], False)
+    #     self.assertEqual(L2[2], False)
+    #     self.assertEqual(L2[3], False)
+    #     self.assertEqual(L2[4], True)
+    #     self.assertEqual(L2[4], False)
+    
+
+    # def testIsClose(self):
+    #     c = tuplex.Context(self.conf)
+
diff --git a/tuplex/test/core/MathFunctionsTest.cc b/tuplex/test/core/MathFunctionsTest.cc
index ed81a0eeb..5be4b6ef4 100644
--- a/tuplex/test/core/MathFunctionsTest.cc
+++ b/tuplex/test/core/MathFunctionsTest.cc
@@ -627,8 +627,6 @@ TEST_F(MathFunctionsTest, MathAsin) {
     python::closeInterpreter();
 }
 
-
-
 TEST_F(MathFunctionsTest, MathPow) {
     using namespace std;
     using namespace tuplex;
@@ -677,7 +675,7 @@ TEST_F(MathFunctionsTest, MathPow) {
         Row(2), Row(1), Row(-1), Row(-2), Row(0)
     }).map(UDF("lambda y: math.pow(y, 5)", "", ce)).collectAsVector();
 
-    EXPECT_EQ(v2.size(), 5);
+    EXPECT_EQ(v4.size(), 5);
     EXPECT_DOUBLE_EQ(v4[0].getDouble(0), 32.0);
     EXPECT_DOUBLE_EQ(v4[1].getDouble(0), 1.0);
     EXPECT_DOUBLE_EQ(v4[2].getDouble(0), -1.0);
@@ -696,4 +694,87 @@ TEST_F(MathFunctionsTest, MathPow) {
 
     python::lockGIL();
     python::closeInterpreter();
-}
\ No newline at end of file
+}
+
+TEST_F(MathFunctionsTest, TEST) {
+    using namespace std;
+    using namespace tuplex;
+
+    python::initInterpreter();
+    python::unlockGIL();
+
+    Context c(microTestOptions());
+    ClosureEnvironment ce;
+    ce.importModuleAs("math", "math");
+
+    auto v1 = c.parallelize({
+        Row(25, 0.5), Row(3, -2), Row(-4.0, 3.0), Row(-5, -4)
+    }).map(UDF("lambda x, y: math.pow(x, y)", "", ce)).collectAsVector();
+    EXPECT_EQ(v1.size(), 4);
+    EXPECT_DOUBLE_EQ(v1[0].getDouble(0), 5.0);
+    EXPECT_DOUBLE_EQ(v1[1].getDouble(0), pow(3.0, -2.0));
+    EXPECT_DOUBLE_EQ(v1[2].getDouble(0), -64.0);
+    EXPECT_DOUBLE_EQ(v1[3].getDouble(0), pow(-5.0, -4.0));
+
+    auto v2 = c.parallelize({
+        Row(0), Row(1.0), Row(4.0), Row(16)
+    }).map(UDF("lambda x: math.sqrt(x)", "", ce)).collectAsVector();
+    EXPECT_EQ(v2.size(), 4);
+    EXPECT_DOUBLE_EQ(v2[0].getDouble(0), 0.0);
+    EXPECT_DOUBLE_EQ(v2[1].getDouble(0), 1.0);
+    EXPECT_DOUBLE_EQ(v2[2].getDouble(0), 2.0);
+    EXPECT_DOUBLE_EQ(v2[3].getDouble(0), 4.0);
+
+    python::lockGIL();
+    python::closeInterpreter();
+}
+
+// TEST_F(MathFunctionsTest, MathIsInf) {
+//     using namespace std;
+//     using namespace tuplex;
+
+//     python::initInterpreter();
+//     python::unlockGIL();
+
+//     Context c(microTestOptions());
+//     ClosureEnvironment ce;
+//     ce.importModuleAs("math", "math");
+
+//     auto v1 = c.parallelize({
+//         Row(0.0), Row(1.0), Row(-1.0), Row(-INFINITY), Row(3.0), Row(INFINITY)
+//     }).map(UDF("lambda x: math.isinf(x)", "", ce)).collectAsVector();
+
+//     EXPECT_EQ(v1.size(), 6);
+//     EXPECT_EQ(v1[0].getBool(0), false);
+//     EXPECT_EQ(v1[1].getBool(0), false);
+//     EXPECT_EQ(v1[2].getBool(0), false);
+//     EXPECT_EQ(v1[3].getBool(0), true);
+//     EXPECT_EQ(v1[4].getBool(0), false);
+//     EXPECT_EQ(v1[5].getBool(0), true);
+    
+//     auto v2 = c.parallelize({
+//         Row(1), Row(-INFINITY), Row(-1), Row(0), Row(INFINITY)
+//     }).map(UDF("lambda x: math.isinf(x)", "", ce)).collectAsVector();
+
+//     EXPECT_EQ(v2.size(), 5);
+//     EXPECT_EQ(v2[0].getBool(0), false);
+//     EXPECT_EQ(v2[1].getBool(0), true);
+//     EXPECT_EQ(v2[2].getBool(0), false);
+//     EXPECT_EQ(v2[3].getBool(0), false);
+//     EXPECT_EQ(v2[4].getBool(0), true);
+
+//     auto v3 = c.parallelize({
+//         Row(-1), Row(INFINITY), Row(1.5), Row(NAN), Row(-INFINITY), Row(0.0)
+//     }).map(UDF("lambda x: math.isinf(x)", "", ce)).collectAsVector();
+
+//     EXPECT_EQ(v3.size(), 6);
+//     EXPECT_DOUBLE_EQ(v3[0].getBool(0), false);
+//     EXPECT_DOUBLE_EQ(v3[1].getBool(0), true);
+//     EXPECT_DOUBLE_EQ(v3[2].getBool(0), false);
+//     EXPECT_DOUBLE_EQ(v3[3].getBool(0), false);
+//     EXPECT_DOUBLE_EQ(v3[4].getBool(0), true);
+//     EXPECT_DOUBLE_EQ(v3[5].getBool(0), false);
+
+//     python::lockGIL();
+//     python::closeInterpreter();
+// }

From 985b4ddd96bdc3349c82dc67d88cf4471c917f3a Mon Sep 17 00:00:00 2001
From: Rhea Goyal <rhea@thanatos.local>
Date: Thu, 2 Jun 2022 14:27:31 -0400
Subject: [PATCH 02/23] whoops

---
 tuplex/python/tests/test_math.py      | 96 ---------------------------
 tuplex/test/core/MathFunctionsTest.cc | 83 -----------------------
 2 files changed, 179 deletions(-)

diff --git a/tuplex/python/tests/test_math.py b/tuplex/python/tests/test_math.py
index 8763391fc..eb1aa6fcb 100644
--- a/tuplex/python/tests/test_math.py
+++ b/tuplex/python/tests/test_math.py
@@ -358,99 +358,3 @@ def testPow(self):
         assert L_bool[2] == math.pow(False, True)
         assert L_bool[3] == math.pow(False, False)
 
-
-    def testTEST(self):
-        c = tuplex.Context(self.conf)
-
-        pow_test = [(25, 0.5), (3, -2), (-4.0, 3.0), (-5, -4)]
-        c.parallelize(pow_test).map(lambda x, y: math.pow(x, y)).collect()
-        assert len(pow_test) == 4, 'wrong length'
-        self.assertAlmostEqual(pow_test[0], 5.0)
-        self.assertAlmostEqual(pow_test[1], -1.0 / 9.0)
-        self.assertAlmostEqual(pow_test[2], -64.0)
-        self.assertAlmostEqual(pow_test[3], 1.0 / 625.0)
-
-        sqrt_test = [0, 1.0, 4.0, 16]
-        c.parallelize(sqrt_test).map(lambda x: math.sqrt(x)).collect()
-        assert len(sqrt_test) == 4, 'wrong length'
-        self.assertAlmostEqual(sqrt_test[0], 0.0)
-        self.assertAlmostEqual(sqrt_test[1], 1.0)
-        self.assertAlmostEqual(sqrt_test[2], 2.0)
-        self.assertAlmostEqual(sqrt_test[3], 4.0)
-
-    
-    # def testIsInf(self):
-    #     c = tuplex.Context(self.conf)
-
-    #     float_test = [0.0, 1.0, -1.0, -math.inf, 3.0, math.inf]
-    #     L0 = c.parallelize(float_test).map(lambda x: math.isinf(x)).collect()
-    #     assert len(L0) == 6, 'wrong length'
-    #     self.assertEqual(L0[0], False)
-    #     self.assertEqual(L0[1], False)
-    #     self.assertEqual(L0[2], False)
-    #     self.assertEqual(L0[3], True)
-    #     self.assertEqual(L0[4], False)
-    #     self.assertEqual(L0[5], True)
-
-    #     tuple_test = [(1.0, math.inf), (-math.inf, 0.0), (-math.inf, math.inf), (-2.0, 0.0)]
-    #     L1 = c.parallelize(tuple_test).map(lambda x, y: (math.isinf(x), math.isinf(y))).collect()
-    #     assert len(L1) == 4, 'wrong length'
-    #     self.assertEqual(L1[0], (False, True))
-    #     self.assertEqual(L1[1], (True, False))
-    #     self.assertEqual(L1[2], (True, True))
-    #     self.assertEqual(L1[3], (False, False))
-
-    #     int_test = [1, -math.inf, -1, 0, math.inf]
-    #     L2 = c.parallelize(int_test).map(lambda x: math.isinf(x)).collect()
-    #     assert len(L2) == 5, 'wrong length'
-    #     self.assertEqual(L2[0], False)
-    #     self.assertEqual(L2[1], True)
-    #     self.assertEqual(L2[2], False)
-    #     self.assertEqual(L2[3], False)
-    #     self.assertEqual(L2[4], True)
-
-    #     mix_test = [-1, math.inf, 1.5, math.nan, -math.inf, 0.0]
-    #     L3 = c.parallelize(mix_test).map(lambda x, y: math.pow(x, y)).collect()
-    #     assert len(L3) == 6
-    #     self.assertEqual(L3[0], False)
-    #     self.assertEqual(L3[1], True)
-    #     self.assertEqual(L3[2], False)
-    #     self.assertEqual(L3[3], False)
-    #     self.assertEqual(L3[4], True)
-    #     self.assertEqual(L3[5], False)
-    
-
-    # def testIsNan(self):
-    #     c = tuplex.Context(self.conf)
-
-    #     test0 = [0.0, math.nan, -3.5, -math.inf]
-    #     L0 = c.parallelize(test0).map(lambda x: math.isnan(x)).collect()
-    #     assert len(L0) == 4, 'wrong length'
-    #     self.assertEqual(L0[0], False)
-    #     self.assertEqual(L0[1], True)
-    #     self.assertEqual(L0[2], False)
-    #     self.assertEqual(L0[3], False)
-
-    #     test1 = [0, -1, math.nan, math.inf, 97]
-    #     L1 = c.parallelize(test1).map(lambda x: math.isnan(x)).collect()
-    #     assert len(L1) == 5, 'wrong length'
-    #     self.assertEqual(L1[0], False)
-    #     self.assertEqual(L1[1], False)
-    #     self.assertEqual(L1[2], True)
-    #     self.assertEqual(L1[3], False)
-    #     self.assertEqual(L1[4], False)
-        
-    #     test2 = [math.nan, 0, -math.inf, -1.5, math.nan, 97]
-    #     L2 = c.parallelize(test2).map(lambda x: math.isnan(x)).collect()
-    #     assert len(L2) == 6, 'wrong length'
-    #     self.assertEqual(L2[0], True)
-    #     self.assertEqual(L2[1], False)
-    #     self.assertEqual(L2[2], False)
-    #     self.assertEqual(L2[3], False)
-    #     self.assertEqual(L2[4], True)
-    #     self.assertEqual(L2[4], False)
-    
-
-    # def testIsClose(self):
-    #     c = tuplex.Context(self.conf)
-
diff --git a/tuplex/test/core/MathFunctionsTest.cc b/tuplex/test/core/MathFunctionsTest.cc
index 5be4b6ef4..80aed9625 100644
--- a/tuplex/test/core/MathFunctionsTest.cc
+++ b/tuplex/test/core/MathFunctionsTest.cc
@@ -695,86 +695,3 @@ TEST_F(MathFunctionsTest, MathPow) {
     python::lockGIL();
     python::closeInterpreter();
 }
-
-TEST_F(MathFunctionsTest, TEST) {
-    using namespace std;
-    using namespace tuplex;
-
-    python::initInterpreter();
-    python::unlockGIL();
-
-    Context c(microTestOptions());
-    ClosureEnvironment ce;
-    ce.importModuleAs("math", "math");
-
-    auto v1 = c.parallelize({
-        Row(25, 0.5), Row(3, -2), Row(-4.0, 3.0), Row(-5, -4)
-    }).map(UDF("lambda x, y: math.pow(x, y)", "", ce)).collectAsVector();
-    EXPECT_EQ(v1.size(), 4);
-    EXPECT_DOUBLE_EQ(v1[0].getDouble(0), 5.0);
-    EXPECT_DOUBLE_EQ(v1[1].getDouble(0), pow(3.0, -2.0));
-    EXPECT_DOUBLE_EQ(v1[2].getDouble(0), -64.0);
-    EXPECT_DOUBLE_EQ(v1[3].getDouble(0), pow(-5.0, -4.0));
-
-    auto v2 = c.parallelize({
-        Row(0), Row(1.0), Row(4.0), Row(16)
-    }).map(UDF("lambda x: math.sqrt(x)", "", ce)).collectAsVector();
-    EXPECT_EQ(v2.size(), 4);
-    EXPECT_DOUBLE_EQ(v2[0].getDouble(0), 0.0);
-    EXPECT_DOUBLE_EQ(v2[1].getDouble(0), 1.0);
-    EXPECT_DOUBLE_EQ(v2[2].getDouble(0), 2.0);
-    EXPECT_DOUBLE_EQ(v2[3].getDouble(0), 4.0);
-
-    python::lockGIL();
-    python::closeInterpreter();
-}
-
-// TEST_F(MathFunctionsTest, MathIsInf) {
-//     using namespace std;
-//     using namespace tuplex;
-
-//     python::initInterpreter();
-//     python::unlockGIL();
-
-//     Context c(microTestOptions());
-//     ClosureEnvironment ce;
-//     ce.importModuleAs("math", "math");
-
-//     auto v1 = c.parallelize({
-//         Row(0.0), Row(1.0), Row(-1.0), Row(-INFINITY), Row(3.0), Row(INFINITY)
-//     }).map(UDF("lambda x: math.isinf(x)", "", ce)).collectAsVector();
-
-//     EXPECT_EQ(v1.size(), 6);
-//     EXPECT_EQ(v1[0].getBool(0), false);
-//     EXPECT_EQ(v1[1].getBool(0), false);
-//     EXPECT_EQ(v1[2].getBool(0), false);
-//     EXPECT_EQ(v1[3].getBool(0), true);
-//     EXPECT_EQ(v1[4].getBool(0), false);
-//     EXPECT_EQ(v1[5].getBool(0), true);
-    
-//     auto v2 = c.parallelize({
-//         Row(1), Row(-INFINITY), Row(-1), Row(0), Row(INFINITY)
-//     }).map(UDF("lambda x: math.isinf(x)", "", ce)).collectAsVector();
-
-//     EXPECT_EQ(v2.size(), 5);
-//     EXPECT_EQ(v2[0].getBool(0), false);
-//     EXPECT_EQ(v2[1].getBool(0), true);
-//     EXPECT_EQ(v2[2].getBool(0), false);
-//     EXPECT_EQ(v2[3].getBool(0), false);
-//     EXPECT_EQ(v2[4].getBool(0), true);
-
-//     auto v3 = c.parallelize({
-//         Row(-1), Row(INFINITY), Row(1.5), Row(NAN), Row(-INFINITY), Row(0.0)
-//     }).map(UDF("lambda x: math.isinf(x)", "", ce)).collectAsVector();
-
-//     EXPECT_EQ(v3.size(), 6);
-//     EXPECT_DOUBLE_EQ(v3[0].getBool(0), false);
-//     EXPECT_DOUBLE_EQ(v3[1].getBool(0), true);
-//     EXPECT_DOUBLE_EQ(v3[2].getBool(0), false);
-//     EXPECT_DOUBLE_EQ(v3[3].getBool(0), false);
-//     EXPECT_DOUBLE_EQ(v3[4].getBool(0), true);
-//     EXPECT_DOUBLE_EQ(v3[5].getBool(0), false);
-
-//     python::lockGIL();
-//     python::closeInterpreter();
-// }

From ed3a1b30df3d501ad2b334038bd4f9b0570033f5 Mon Sep 17 00:00:00 2001
From: Rhea Goyal <rhea_goyal@brown.edu>
Date: Thu, 30 Jun 2022 15:26:49 -0400
Subject: [PATCH 03/23] ignore commits to master branch

Revert "whoops"

This reverts commit 985b4ddd96bdc3349c82dc67d88cf4471c917f3a.
---
 tuplex/python/tests/test_math.py      | 96 +++++++++++++++++++++++++++
 tuplex/test/core/MathFunctionsTest.cc | 83 +++++++++++++++++++++++
 2 files changed, 179 insertions(+)

diff --git a/tuplex/python/tests/test_math.py b/tuplex/python/tests/test_math.py
index eb1aa6fcb..8763391fc 100644
--- a/tuplex/python/tests/test_math.py
+++ b/tuplex/python/tests/test_math.py
@@ -358,3 +358,99 @@ def testPow(self):
         assert L_bool[2] == math.pow(False, True)
         assert L_bool[3] == math.pow(False, False)
 
+
+    def testTEST(self):
+        c = tuplex.Context(self.conf)
+
+        pow_test = [(25, 0.5), (3, -2), (-4.0, 3.0), (-5, -4)]
+        c.parallelize(pow_test).map(lambda x, y: math.pow(x, y)).collect()
+        assert len(pow_test) == 4, 'wrong length'
+        self.assertAlmostEqual(pow_test[0], 5.0)
+        self.assertAlmostEqual(pow_test[1], -1.0 / 9.0)
+        self.assertAlmostEqual(pow_test[2], -64.0)
+        self.assertAlmostEqual(pow_test[3], 1.0 / 625.0)
+
+        sqrt_test = [0, 1.0, 4.0, 16]
+        c.parallelize(sqrt_test).map(lambda x: math.sqrt(x)).collect()
+        assert len(sqrt_test) == 4, 'wrong length'
+        self.assertAlmostEqual(sqrt_test[0], 0.0)
+        self.assertAlmostEqual(sqrt_test[1], 1.0)
+        self.assertAlmostEqual(sqrt_test[2], 2.0)
+        self.assertAlmostEqual(sqrt_test[3], 4.0)
+
+    
+    # def testIsInf(self):
+    #     c = tuplex.Context(self.conf)
+
+    #     float_test = [0.0, 1.0, -1.0, -math.inf, 3.0, math.inf]
+    #     L0 = c.parallelize(float_test).map(lambda x: math.isinf(x)).collect()
+    #     assert len(L0) == 6, 'wrong length'
+    #     self.assertEqual(L0[0], False)
+    #     self.assertEqual(L0[1], False)
+    #     self.assertEqual(L0[2], False)
+    #     self.assertEqual(L0[3], True)
+    #     self.assertEqual(L0[4], False)
+    #     self.assertEqual(L0[5], True)
+
+    #     tuple_test = [(1.0, math.inf), (-math.inf, 0.0), (-math.inf, math.inf), (-2.0, 0.0)]
+    #     L1 = c.parallelize(tuple_test).map(lambda x, y: (math.isinf(x), math.isinf(y))).collect()
+    #     assert len(L1) == 4, 'wrong length'
+    #     self.assertEqual(L1[0], (False, True))
+    #     self.assertEqual(L1[1], (True, False))
+    #     self.assertEqual(L1[2], (True, True))
+    #     self.assertEqual(L1[3], (False, False))
+
+    #     int_test = [1, -math.inf, -1, 0, math.inf]
+    #     L2 = c.parallelize(int_test).map(lambda x: math.isinf(x)).collect()
+    #     assert len(L2) == 5, 'wrong length'
+    #     self.assertEqual(L2[0], False)
+    #     self.assertEqual(L2[1], True)
+    #     self.assertEqual(L2[2], False)
+    #     self.assertEqual(L2[3], False)
+    #     self.assertEqual(L2[4], True)
+
+    #     mix_test = [-1, math.inf, 1.5, math.nan, -math.inf, 0.0]
+    #     L3 = c.parallelize(mix_test).map(lambda x, y: math.pow(x, y)).collect()
+    #     assert len(L3) == 6
+    #     self.assertEqual(L3[0], False)
+    #     self.assertEqual(L3[1], True)
+    #     self.assertEqual(L3[2], False)
+    #     self.assertEqual(L3[3], False)
+    #     self.assertEqual(L3[4], True)
+    #     self.assertEqual(L3[5], False)
+    
+
+    # def testIsNan(self):
+    #     c = tuplex.Context(self.conf)
+
+    #     test0 = [0.0, math.nan, -3.5, -math.inf]
+    #     L0 = c.parallelize(test0).map(lambda x: math.isnan(x)).collect()
+    #     assert len(L0) == 4, 'wrong length'
+    #     self.assertEqual(L0[0], False)
+    #     self.assertEqual(L0[1], True)
+    #     self.assertEqual(L0[2], False)
+    #     self.assertEqual(L0[3], False)
+
+    #     test1 = [0, -1, math.nan, math.inf, 97]
+    #     L1 = c.parallelize(test1).map(lambda x: math.isnan(x)).collect()
+    #     assert len(L1) == 5, 'wrong length'
+    #     self.assertEqual(L1[0], False)
+    #     self.assertEqual(L1[1], False)
+    #     self.assertEqual(L1[2], True)
+    #     self.assertEqual(L1[3], False)
+    #     self.assertEqual(L1[4], False)
+        
+    #     test2 = [math.nan, 0, -math.inf, -1.5, math.nan, 97]
+    #     L2 = c.parallelize(test2).map(lambda x: math.isnan(x)).collect()
+    #     assert len(L2) == 6, 'wrong length'
+    #     self.assertEqual(L2[0], True)
+    #     self.assertEqual(L2[1], False)
+    #     self.assertEqual(L2[2], False)
+    #     self.assertEqual(L2[3], False)
+    #     self.assertEqual(L2[4], True)
+    #     self.assertEqual(L2[4], False)
+    
+
+    # def testIsClose(self):
+    #     c = tuplex.Context(self.conf)
+
diff --git a/tuplex/test/core/MathFunctionsTest.cc b/tuplex/test/core/MathFunctionsTest.cc
index 80aed9625..5be4b6ef4 100644
--- a/tuplex/test/core/MathFunctionsTest.cc
+++ b/tuplex/test/core/MathFunctionsTest.cc
@@ -695,3 +695,86 @@ TEST_F(MathFunctionsTest, MathPow) {
     python::lockGIL();
     python::closeInterpreter();
 }
+
+TEST_F(MathFunctionsTest, TEST) {
+    using namespace std;
+    using namespace tuplex;
+
+    python::initInterpreter();
+    python::unlockGIL();
+
+    Context c(microTestOptions());
+    ClosureEnvironment ce;
+    ce.importModuleAs("math", "math");
+
+    auto v1 = c.parallelize({
+        Row(25, 0.5), Row(3, -2), Row(-4.0, 3.0), Row(-5, -4)
+    }).map(UDF("lambda x, y: math.pow(x, y)", "", ce)).collectAsVector();
+    EXPECT_EQ(v1.size(), 4);
+    EXPECT_DOUBLE_EQ(v1[0].getDouble(0), 5.0);
+    EXPECT_DOUBLE_EQ(v1[1].getDouble(0), pow(3.0, -2.0));
+    EXPECT_DOUBLE_EQ(v1[2].getDouble(0), -64.0);
+    EXPECT_DOUBLE_EQ(v1[3].getDouble(0), pow(-5.0, -4.0));
+
+    auto v2 = c.parallelize({
+        Row(0), Row(1.0), Row(4.0), Row(16)
+    }).map(UDF("lambda x: math.sqrt(x)", "", ce)).collectAsVector();
+    EXPECT_EQ(v2.size(), 4);
+    EXPECT_DOUBLE_EQ(v2[0].getDouble(0), 0.0);
+    EXPECT_DOUBLE_EQ(v2[1].getDouble(0), 1.0);
+    EXPECT_DOUBLE_EQ(v2[2].getDouble(0), 2.0);
+    EXPECT_DOUBLE_EQ(v2[3].getDouble(0), 4.0);
+
+    python::lockGIL();
+    python::closeInterpreter();
+}
+
+// TEST_F(MathFunctionsTest, MathIsInf) {
+//     using namespace std;
+//     using namespace tuplex;
+
+//     python::initInterpreter();
+//     python::unlockGIL();
+
+//     Context c(microTestOptions());
+//     ClosureEnvironment ce;
+//     ce.importModuleAs("math", "math");
+
+//     auto v1 = c.parallelize({
+//         Row(0.0), Row(1.0), Row(-1.0), Row(-INFINITY), Row(3.0), Row(INFINITY)
+//     }).map(UDF("lambda x: math.isinf(x)", "", ce)).collectAsVector();
+
+//     EXPECT_EQ(v1.size(), 6);
+//     EXPECT_EQ(v1[0].getBool(0), false);
+//     EXPECT_EQ(v1[1].getBool(0), false);
+//     EXPECT_EQ(v1[2].getBool(0), false);
+//     EXPECT_EQ(v1[3].getBool(0), true);
+//     EXPECT_EQ(v1[4].getBool(0), false);
+//     EXPECT_EQ(v1[5].getBool(0), true);
+    
+//     auto v2 = c.parallelize({
+//         Row(1), Row(-INFINITY), Row(-1), Row(0), Row(INFINITY)
+//     }).map(UDF("lambda x: math.isinf(x)", "", ce)).collectAsVector();
+
+//     EXPECT_EQ(v2.size(), 5);
+//     EXPECT_EQ(v2[0].getBool(0), false);
+//     EXPECT_EQ(v2[1].getBool(0), true);
+//     EXPECT_EQ(v2[2].getBool(0), false);
+//     EXPECT_EQ(v2[3].getBool(0), false);
+//     EXPECT_EQ(v2[4].getBool(0), true);
+
+//     auto v3 = c.parallelize({
+//         Row(-1), Row(INFINITY), Row(1.5), Row(NAN), Row(-INFINITY), Row(0.0)
+//     }).map(UDF("lambda x: math.isinf(x)", "", ce)).collectAsVector();
+
+//     EXPECT_EQ(v3.size(), 6);
+//     EXPECT_DOUBLE_EQ(v3[0].getBool(0), false);
+//     EXPECT_DOUBLE_EQ(v3[1].getBool(0), true);
+//     EXPECT_DOUBLE_EQ(v3[2].getBool(0), false);
+//     EXPECT_DOUBLE_EQ(v3[3].getBool(0), false);
+//     EXPECT_DOUBLE_EQ(v3[4].getBool(0), true);
+//     EXPECT_DOUBLE_EQ(v3[5].getBool(0), false);
+
+//     python::lockGIL();
+//     python::closeInterpreter();
+// }

From 8128020f1b44e6ff896fd0df56c84f31749cc6d4 Mon Sep 17 00:00:00 2001
From: Rhea Goyal <rhea_goyal@brown.edu>
Date: Thu, 30 Jun 2022 15:36:46 -0400
Subject: [PATCH 04/23] Revert "added new tests (for already existing
 functions)"

This reverts commit 7c0984e4724d613475afc26136fe3ad43ddbc271.
---
 tuplex/python/tests/test_math.py      | 100 +-------------------------
 tuplex/test/core/MathFunctionsTest.cc |  89 ++---------------------
 2 files changed, 6 insertions(+), 183 deletions(-)

diff --git a/tuplex/python/tests/test_math.py b/tuplex/python/tests/test_math.py
index 8763391fc..f60750d17 100644
--- a/tuplex/python/tests/test_math.py
+++ b/tuplex/python/tests/test_math.py
@@ -299,6 +299,7 @@ def testExpm1(self):
         assert L_bool[1] == math.expm1(False)
 
 
+
     def testPow(self):
         c = tuplex.Context(self.conf)
 
@@ -356,101 +357,4 @@ def testPow(self):
         assert L_bool[0] == math.pow(True, False)
         assert L_bool[1] == math.pow(True, True)
         assert L_bool[2] == math.pow(False, True)
-        assert L_bool[3] == math.pow(False, False)
-
-
-    def testTEST(self):
-        c = tuplex.Context(self.conf)
-
-        pow_test = [(25, 0.5), (3, -2), (-4.0, 3.0), (-5, -4)]
-        c.parallelize(pow_test).map(lambda x, y: math.pow(x, y)).collect()
-        assert len(pow_test) == 4, 'wrong length'
-        self.assertAlmostEqual(pow_test[0], 5.0)
-        self.assertAlmostEqual(pow_test[1], -1.0 / 9.0)
-        self.assertAlmostEqual(pow_test[2], -64.0)
-        self.assertAlmostEqual(pow_test[3], 1.0 / 625.0)
-
-        sqrt_test = [0, 1.0, 4.0, 16]
-        c.parallelize(sqrt_test).map(lambda x: math.sqrt(x)).collect()
-        assert len(sqrt_test) == 4, 'wrong length'
-        self.assertAlmostEqual(sqrt_test[0], 0.0)
-        self.assertAlmostEqual(sqrt_test[1], 1.0)
-        self.assertAlmostEqual(sqrt_test[2], 2.0)
-        self.assertAlmostEqual(sqrt_test[3], 4.0)
-
-    
-    # def testIsInf(self):
-    #     c = tuplex.Context(self.conf)
-
-    #     float_test = [0.0, 1.0, -1.0, -math.inf, 3.0, math.inf]
-    #     L0 = c.parallelize(float_test).map(lambda x: math.isinf(x)).collect()
-    #     assert len(L0) == 6, 'wrong length'
-    #     self.assertEqual(L0[0], False)
-    #     self.assertEqual(L0[1], False)
-    #     self.assertEqual(L0[2], False)
-    #     self.assertEqual(L0[3], True)
-    #     self.assertEqual(L0[4], False)
-    #     self.assertEqual(L0[5], True)
-
-    #     tuple_test = [(1.0, math.inf), (-math.inf, 0.0), (-math.inf, math.inf), (-2.0, 0.0)]
-    #     L1 = c.parallelize(tuple_test).map(lambda x, y: (math.isinf(x), math.isinf(y))).collect()
-    #     assert len(L1) == 4, 'wrong length'
-    #     self.assertEqual(L1[0], (False, True))
-    #     self.assertEqual(L1[1], (True, False))
-    #     self.assertEqual(L1[2], (True, True))
-    #     self.assertEqual(L1[3], (False, False))
-
-    #     int_test = [1, -math.inf, -1, 0, math.inf]
-    #     L2 = c.parallelize(int_test).map(lambda x: math.isinf(x)).collect()
-    #     assert len(L2) == 5, 'wrong length'
-    #     self.assertEqual(L2[0], False)
-    #     self.assertEqual(L2[1], True)
-    #     self.assertEqual(L2[2], False)
-    #     self.assertEqual(L2[3], False)
-    #     self.assertEqual(L2[4], True)
-
-    #     mix_test = [-1, math.inf, 1.5, math.nan, -math.inf, 0.0]
-    #     L3 = c.parallelize(mix_test).map(lambda x, y: math.pow(x, y)).collect()
-    #     assert len(L3) == 6
-    #     self.assertEqual(L3[0], False)
-    #     self.assertEqual(L3[1], True)
-    #     self.assertEqual(L3[2], False)
-    #     self.assertEqual(L3[3], False)
-    #     self.assertEqual(L3[4], True)
-    #     self.assertEqual(L3[5], False)
-    
-
-    # def testIsNan(self):
-    #     c = tuplex.Context(self.conf)
-
-    #     test0 = [0.0, math.nan, -3.5, -math.inf]
-    #     L0 = c.parallelize(test0).map(lambda x: math.isnan(x)).collect()
-    #     assert len(L0) == 4, 'wrong length'
-    #     self.assertEqual(L0[0], False)
-    #     self.assertEqual(L0[1], True)
-    #     self.assertEqual(L0[2], False)
-    #     self.assertEqual(L0[3], False)
-
-    #     test1 = [0, -1, math.nan, math.inf, 97]
-    #     L1 = c.parallelize(test1).map(lambda x: math.isnan(x)).collect()
-    #     assert len(L1) == 5, 'wrong length'
-    #     self.assertEqual(L1[0], False)
-    #     self.assertEqual(L1[1], False)
-    #     self.assertEqual(L1[2], True)
-    #     self.assertEqual(L1[3], False)
-    #     self.assertEqual(L1[4], False)
-        
-    #     test2 = [math.nan, 0, -math.inf, -1.5, math.nan, 97]
-    #     L2 = c.parallelize(test2).map(lambda x: math.isnan(x)).collect()
-    #     assert len(L2) == 6, 'wrong length'
-    #     self.assertEqual(L2[0], True)
-    #     self.assertEqual(L2[1], False)
-    #     self.assertEqual(L2[2], False)
-    #     self.assertEqual(L2[3], False)
-    #     self.assertEqual(L2[4], True)
-    #     self.assertEqual(L2[4], False)
-    
-
-    # def testIsClose(self):
-    #     c = tuplex.Context(self.conf)
-
+        assert L_bool[3] == math.pow(False, False)
\ No newline at end of file
diff --git a/tuplex/test/core/MathFunctionsTest.cc b/tuplex/test/core/MathFunctionsTest.cc
index 5be4b6ef4..ed81a0eeb 100644
--- a/tuplex/test/core/MathFunctionsTest.cc
+++ b/tuplex/test/core/MathFunctionsTest.cc
@@ -627,6 +627,8 @@ TEST_F(MathFunctionsTest, MathAsin) {
     python::closeInterpreter();
 }
 
+
+
 TEST_F(MathFunctionsTest, MathPow) {
     using namespace std;
     using namespace tuplex;
@@ -675,7 +677,7 @@ TEST_F(MathFunctionsTest, MathPow) {
         Row(2), Row(1), Row(-1), Row(-2), Row(0)
     }).map(UDF("lambda y: math.pow(y, 5)", "", ce)).collectAsVector();
 
-    EXPECT_EQ(v4.size(), 5);
+    EXPECT_EQ(v2.size(), 5);
     EXPECT_DOUBLE_EQ(v4[0].getDouble(0), 32.0);
     EXPECT_DOUBLE_EQ(v4[1].getDouble(0), 1.0);
     EXPECT_DOUBLE_EQ(v4[2].getDouble(0), -1.0);
@@ -694,87 +696,4 @@ TEST_F(MathFunctionsTest, MathPow) {
 
     python::lockGIL();
     python::closeInterpreter();
-}
-
-TEST_F(MathFunctionsTest, TEST) {
-    using namespace std;
-    using namespace tuplex;
-
-    python::initInterpreter();
-    python::unlockGIL();
-
-    Context c(microTestOptions());
-    ClosureEnvironment ce;
-    ce.importModuleAs("math", "math");
-
-    auto v1 = c.parallelize({
-        Row(25, 0.5), Row(3, -2), Row(-4.0, 3.0), Row(-5, -4)
-    }).map(UDF("lambda x, y: math.pow(x, y)", "", ce)).collectAsVector();
-    EXPECT_EQ(v1.size(), 4);
-    EXPECT_DOUBLE_EQ(v1[0].getDouble(0), 5.0);
-    EXPECT_DOUBLE_EQ(v1[1].getDouble(0), pow(3.0, -2.0));
-    EXPECT_DOUBLE_EQ(v1[2].getDouble(0), -64.0);
-    EXPECT_DOUBLE_EQ(v1[3].getDouble(0), pow(-5.0, -4.0));
-
-    auto v2 = c.parallelize({
-        Row(0), Row(1.0), Row(4.0), Row(16)
-    }).map(UDF("lambda x: math.sqrt(x)", "", ce)).collectAsVector();
-    EXPECT_EQ(v2.size(), 4);
-    EXPECT_DOUBLE_EQ(v2[0].getDouble(0), 0.0);
-    EXPECT_DOUBLE_EQ(v2[1].getDouble(0), 1.0);
-    EXPECT_DOUBLE_EQ(v2[2].getDouble(0), 2.0);
-    EXPECT_DOUBLE_EQ(v2[3].getDouble(0), 4.0);
-
-    python::lockGIL();
-    python::closeInterpreter();
-}
-
-// TEST_F(MathFunctionsTest, MathIsInf) {
-//     using namespace std;
-//     using namespace tuplex;
-
-//     python::initInterpreter();
-//     python::unlockGIL();
-
-//     Context c(microTestOptions());
-//     ClosureEnvironment ce;
-//     ce.importModuleAs("math", "math");
-
-//     auto v1 = c.parallelize({
-//         Row(0.0), Row(1.0), Row(-1.0), Row(-INFINITY), Row(3.0), Row(INFINITY)
-//     }).map(UDF("lambda x: math.isinf(x)", "", ce)).collectAsVector();
-
-//     EXPECT_EQ(v1.size(), 6);
-//     EXPECT_EQ(v1[0].getBool(0), false);
-//     EXPECT_EQ(v1[1].getBool(0), false);
-//     EXPECT_EQ(v1[2].getBool(0), false);
-//     EXPECT_EQ(v1[3].getBool(0), true);
-//     EXPECT_EQ(v1[4].getBool(0), false);
-//     EXPECT_EQ(v1[5].getBool(0), true);
-    
-//     auto v2 = c.parallelize({
-//         Row(1), Row(-INFINITY), Row(-1), Row(0), Row(INFINITY)
-//     }).map(UDF("lambda x: math.isinf(x)", "", ce)).collectAsVector();
-
-//     EXPECT_EQ(v2.size(), 5);
-//     EXPECT_EQ(v2[0].getBool(0), false);
-//     EXPECT_EQ(v2[1].getBool(0), true);
-//     EXPECT_EQ(v2[2].getBool(0), false);
-//     EXPECT_EQ(v2[3].getBool(0), false);
-//     EXPECT_EQ(v2[4].getBool(0), true);
-
-//     auto v3 = c.parallelize({
-//         Row(-1), Row(INFINITY), Row(1.5), Row(NAN), Row(-INFINITY), Row(0.0)
-//     }).map(UDF("lambda x: math.isinf(x)", "", ce)).collectAsVector();
-
-//     EXPECT_EQ(v3.size(), 6);
-//     EXPECT_DOUBLE_EQ(v3[0].getBool(0), false);
-//     EXPECT_DOUBLE_EQ(v3[1].getBool(0), true);
-//     EXPECT_DOUBLE_EQ(v3[2].getBool(0), false);
-//     EXPECT_DOUBLE_EQ(v3[3].getBool(0), false);
-//     EXPECT_DOUBLE_EQ(v3[4].getBool(0), true);
-//     EXPECT_DOUBLE_EQ(v3[5].getBool(0), false);
-
-//     python::lockGIL();
-//     python::closeInterpreter();
-// }
+}
\ No newline at end of file

From 4967b98d9c70716ca05224dc8473a13765347fca Mon Sep 17 00:00:00 2001
From: Rhea Goyal <rhea_goyal@brown.edu>
Date: Wed, 13 Jul 2022 12:18:39 -0400
Subject: [PATCH 05/23] moved changes to new branch

---
 tuplex/codegen/include/AnnotatedAST.h   |  2 +-
 tuplex/test/core/DictionaryFunctions.cc | 20 ++++++++
 tuplex/test/core/DictionaryTyping.cc    | 65 +++++++++++++++++++++++++
 3 files changed, 86 insertions(+), 1 deletion(-)
 create mode 100644 tuplex/test/core/DictionaryTyping.cc

diff --git a/tuplex/codegen/include/AnnotatedAST.h b/tuplex/codegen/include/AnnotatedAST.h
index 47c85f830..90e574de8 100644
--- a/tuplex/codegen/include/AnnotatedAST.h
+++ b/tuplex/codegen/include/AnnotatedAST.h
@@ -177,7 +177,7 @@ namespace tuplex {
 
             /*!
              * annotates the tree with final types. If this is not possible, returns false
-             * @param pokicy compiler policy
+             * @param policy compiler policy
              * @param silentMode determines whether the type inference should log out problems or not
              * @param removeBranches whether to use RemoveDeadBranchesVisitor to prune AST
              * @return whether types could be successfully annotated/defined for all AST nodes
diff --git a/tuplex/test/core/DictionaryFunctions.cc b/tuplex/test/core/DictionaryFunctions.cc
index 955014748..01e63533e 100644
--- a/tuplex/test/core/DictionaryFunctions.cc
+++ b/tuplex/test/core/DictionaryFunctions.cc
@@ -13,6 +13,8 @@
 #include "../../utils/include/Utils.h"
 #include "TestUtils.h"
 #include "RuntimeInterface.h"
+#include <parser/Parser.h>
+#include <graphviz/GraphVizGraph.h>
 
 // need for these tests a running python interpreter, so spin it up
 class DictionaryFunctions : public PyTest {};
@@ -496,4 +498,22 @@ TEST_F(DictionaryFunctions, EmptyDict) {
     // .pop(val) KeyError
     // ==> left for later testing because it's a bit more complicated...
 #warning "implement fast, special functions for empty dict..."
+}
+
+TEST_F(DictionaryFunctions, DictCount) {
+    using namespace tuplex;
+    auto code = "def count(L):\n"
+                "    d = {}\n"
+                "    for x in L:\n"
+                "        if x not in d.keys():\n"
+                "            d[x] = 0\n"
+                "        d[x] += 1\n"
+                "    return d";
+
+    auto root = std::unique_ptr<ASTNode>(parseToAST(code));
+    EXPECT_TRUE(root.get());
+
+    GraphVizGraph graph;
+    graph.createFromAST(root.get(), true);
+    graph.saveAsPDF("/home/rgoyal6/tuplex/tuplex/build/dict_count.pdf");
 }
\ No newline at end of file
diff --git a/tuplex/test/core/DictionaryTyping.cc b/tuplex/test/core/DictionaryTyping.cc
new file mode 100644
index 000000000..1fdccab1e
--- /dev/null
+++ b/tuplex/test/core/DictionaryTyping.cc
@@ -0,0 +1,65 @@
+//--------------------------------------------------------------------------------------------------------------------//
+//                                                                                                                    //
+//                                      Tuplex: Blazing Fast Python Data Science                                      //
+//                                                                                                                    //
+//                                                                                                                    //
+//  (c) 2017 - 2021, Tuplex team                                                                                      //
+//  Created by Leonhard Spiegelberg first on 1/1/2021                                                                 //
+//  License: Apache 2.0                                                                                               //
+//--------------------------------------------------------------------------------------------------------------------//
+
+#include <UDF.h>
+#include <TypeAnnotatorVisitor.h>
+#include <SymbolTable.h>
+#include <gtest/gtest.h>
+#include <parser/Parser.h>
+#include <graphviz/GraphVizGraph.h>
+#include <CodegenHelper.h>
+#include <AnnotatedAST.h>
+
+// classes to work with:
+// type annotator visitor
+// trace visitor
+
+TEST(DictionaryTyping, Count) {
+    using namespace tuplex;
+    using namespace std;
+
+    // // test count UDF
+    // auto count_c = "def count(L):\n"
+    //                 "    d = {}\n"
+    //                 "    for x in L:\n"
+    //                 "        if x not in d.keys():\n"
+    //                 "            d[x] = 0\n"
+    //                 "        d[x] += 1\n"
+    //                 "    return d";
+
+    // test simple UDF
+    auto count_c = "def f(L):\n"
+                    "    d = {}\n"
+                    "    k = L[0]\n"
+                    "    d[k] = 0\n"
+                    "    d[k] += 1\n"
+                    "    return d";
+
+    // parse code to AST
+    auto ast = tuplex::codegen::AnnotatedAST();
+    ast.parseString(count_c);
+
+    // make typing
+    python::Type inputType = python::Type::makeListType(python::Type::I64);
+
+    // create symbol table
+    ast.addTypeHint("L", inputType);
+    ast.defineTypes(codegen::DEFAULT_COMPILE_POLICY);
+
+    // print type annotated ast
+    GraphVizGraph graph;
+    graph.createFromAST(ast.getFunctionAST(), true);
+    graph.saveAsPDF("typed_ast.pdf");
+
+    cout<<"return type of function is: "<<ast.getReturnType().desc()<<endl;
+
+    // ASSERT_EQ(ast->getInferredType().getReturnType(), python::Type::makeDictionaryType(python::Type::I64, python::Type::I64));
+
+}
\ No newline at end of file

From 6cee06d177b543bb82ebf77080944d5c294960df Mon Sep 17 00:00:00 2001
From: Rhea Goyal <rhea_goyal@brown.edu>
Date: Thu, 14 Jul 2022 19:16:07 -0400
Subject: [PATCH 06/23] added dict case to nassign; need to gdb

---
 tuplex/codegen/include/TypeAnnotatorVisitor.h |  1 +
 tuplex/codegen/src/TypeAnnotatorVisitor.cc    | 65 ++++++++++++++++++-
 2 files changed, 65 insertions(+), 1 deletion(-)

diff --git a/tuplex/codegen/include/TypeAnnotatorVisitor.h b/tuplex/codegen/include/TypeAnnotatorVisitor.h
index 922c5f1e2..8c4d94032 100644
--- a/tuplex/codegen/include/TypeAnnotatorVisitor.h
+++ b/tuplex/codegen/include/TypeAnnotatorVisitor.h
@@ -53,6 +53,7 @@ namespace tuplex {
                                        const TokenType tt, ASTNode* right,
                                        const python::Type& b);
         void assignHelper(NIdentifier *id, python::Type type);
+        void dictAssign(NSubscription* subscript, python::Type key_type, python::Type value_type);
         void checkRetType(python::Type t);
         /*!
          * Annotate iterator-related NCall with iterator-specific info
diff --git a/tuplex/codegen/src/TypeAnnotatorVisitor.cc b/tuplex/codegen/src/TypeAnnotatorVisitor.cc
index dd19474e7..ec0ca1dc1 100644
--- a/tuplex/codegen/src/TypeAnnotatorVisitor.cc
+++ b/tuplex/codegen/src/TypeAnnotatorVisitor.cc
@@ -1220,6 +1220,28 @@ namespace tuplex {
         _nameTable[id->_name] = type;
     }
 
+    void TypeAnnotatorVisitor::dictAssign(NSubscription* subscript, python::Type key_type, python::Type value_type) {
+        assert(subscript->_value->getInferredType().isDictionaryType());
+
+        NDictionary* dict = (NDictionary*)subscript->_value;
+        // not entirely sure what the below loop is for rn
+        // if(_ongoingLoopCount != 0 && !_loopTypeChange) {
+        //     // we are now inside a loop; no type change detected yet
+        //     // check potential type change during loops
+        //     if(_nameTable.find(id->_name) != _nameTable.end() && type != _nameTable.at(id->_name)) {
+        //         error("variable " + id->_name + " changed type during loop from " + _nameTable.at(id->_name).desc() + " to " + type.desc() + ", traced typing needed to determine if the type change is stable");
+        //         _loopTypeChange = true;
+        //     }
+        // }
+
+        // set dictionary's inferred type to be key_type -> value_type
+        // should maybe make a helper function for this? or does this count as the helper function...
+        dict->setInferredType(python::TypeFactory::instance().createOrGetDictionaryType(key_type, value_type));
+        
+        // overwrite entry in nametable with new type (Q: how to do this for dictionaries?)
+        // _nameTable[dict->] = type;
+    }
+
     void TypeAnnotatorVisitor::visit(NAssign *assign) {
         ApatheticVisitor::visit(assign);
 
@@ -1277,8 +1299,49 @@ namespace tuplex {
             } else {
                 error("bad type annotation in tuple assign");
             }
+        } else if (assign->_target->type() == ASTNodeType::Subscription) {
+            NSubscription* subscript = (NSubscription*)assign->_target;
+
+            assert(subscript->_value);
+            assert(subscript->_expression);
+
+            auto type = subscript->_value->getInferredType();
+            auto index_type = subscript->_expression->getInferredType();
+
+            // this is a null check operation. I.e. strip option from either type or index type
+            if (type.isOptionType())
+                type = type.getReturnType();
+            if (index_type.isOptionType())
+                index_type = index_type.getReturnType();
+
+            // if object is dict-like, subscript must have a type compatible with mapping's key type
+            // question: the index is technically an expression: so we need to be able to handle multiple kinds of expressions?
+            // although, we don't really need to know what kind of expression the index is, we just need the resulting return type.
+            // is there an easy way to get this without having to check what kind of expression the index is?
+
+            if (type == python::Type::EMPTYDICT) {
+                // if object is an empty dictionary, upcast empty dictionary to match type of requested subscript and value
+                // Q: do I need to check if the value being assigned is an iterator here?
+                dictAssign(subscript, index_type, assign->_value->getInferredType()); 
+            } else if (python::Type::GENERICDICT == type) {
+                dictAssign(subscript, python::Type::PYOBJECT, python::Type::PYOBJECT);
+            } else if (type.isDictionaryType()) {
+                // if object is not an empty dictionary, check if dict's key type matches subscript type
+                    // if they don't match, mark the dictionary as having type [PYOBJECT, PYOBJECT]
+                    // and set a marker in the typeannotator that this function always triggers the interpreter fallback
+                    // Q: how to do ^^ ?
+                dictAssign(subscript, python::Type::PYOBJECT, python::Type::PYOBJECT);
+            } else {
+                error("only assignment to dictionary subscriptions supported yet!");
+                // if object is list-like, subscript must be an integer
+                // if subscript is negative, list-like object's length is added to subscript
+                // resulting subscript must be in range of object, then ask object to assign value to element/item at the subscript
+            }
+            
+            NDictionary* dict = (NDictionary*)subscript->_value;
+
         } else {
-            error("only assignment to tuples/identifiers supported yet!!!");
+            error("only assignment to tuples/identifiers/subscriptions supported yet!!!");
         }
         // in all cases, set the type of the entire assign
         // TODO we def want this in the single identifier case, but in general?

From 57a36b172229c626f6b5afa4a20f46524e2fc5f4 Mon Sep 17 00:00:00 2001
From: Rhea Goyal <rhea_goyal@brown.edu>
Date: Fri, 15 Jul 2022 12:58:26 -0400
Subject: [PATCH 07/23] not sure what value_type needs to be for recursive case

---
 tuplex/codegen/include/TypeAnnotatorVisitor.h |   5 +
 tuplex/codegen/src/TypeAnnotatorVisitor.cc    | 187 +++++++++---------
 tuplex/test/core/DictionaryTyping.cc          |  48 +++--
 3 files changed, 137 insertions(+), 103 deletions(-)

diff --git a/tuplex/codegen/include/TypeAnnotatorVisitor.h b/tuplex/codegen/include/TypeAnnotatorVisitor.h
index 8c4d94032..3c7adeef5 100644
--- a/tuplex/codegen/include/TypeAnnotatorVisitor.h
+++ b/tuplex/codegen/include/TypeAnnotatorVisitor.h
@@ -53,7 +53,12 @@ namespace tuplex {
                                        const TokenType tt, ASTNode* right,
                                        const python::Type& b);
         void assignHelper(NIdentifier *id, python::Type type);
+        
         void dictAssign(NSubscription* subscript, python::Type key_type, python::Type value_type);
+        bool is_nested_subscript_target(ASTNode* target);
+        void recursive_set_subscript_types(ASTNode* next_target, python::Type value_type);
+
+        
         void checkRetType(python::Type t);
         /*!
          * Annotate iterator-related NCall with iterator-specific info
diff --git a/tuplex/codegen/src/TypeAnnotatorVisitor.cc b/tuplex/codegen/src/TypeAnnotatorVisitor.cc
index ec0ca1dc1..70c815dbf 100644
--- a/tuplex/codegen/src/TypeAnnotatorVisitor.cc
+++ b/tuplex/codegen/src/TypeAnnotatorVisitor.cc
@@ -1222,6 +1222,8 @@ namespace tuplex {
 
     void TypeAnnotatorVisitor::dictAssign(NSubscription* subscript, python::Type key_type, python::Type value_type) {
         assert(subscript->_value->getInferredType().isDictionaryType());
+        // check what type the _value is 
+
 
         NDictionary* dict = (NDictionary*)subscript->_value;
         // not entirely sure what the below loop is for rn
@@ -1242,110 +1244,111 @@ namespace tuplex {
         // _nameTable[dict->] = type;
     }
 
-    void TypeAnnotatorVisitor::visit(NAssign *assign) {
-        ApatheticVisitor::visit(assign);
+    bool TypeAnnotatorVisitor::is_nested_subscript_target(ASTNode* target) {
+        // check if target is a subscript target
+        return assign->_target->type() == ASTNodeType::Subscription;
+    }
+
+    // note: "target" refers to the LHS of the assign (should be a subscription), and then 
+    //       the value of every subsequent subscription
+    void TypeAnnotatorVisitor::recursive_set_subscript_types(ASTNode* target, python::Type value_type) {
+        // if the target is not a subscription (should be an identifier/dictionary ?), then 
+            // the next target should be an identifier
+            // check what type the identifier maps to
+            // error check if the type of the identifier is something subscriptable (for now, a dictionary)
+            // if type is subscriptable, then
+                // set the typing for the identifier to be index_type -> value_type
+                // if type of identifier is empty_dict, then we can just reset type (i.e. upcast dictionary)
+                // else if generic dict: type is still generic dict, and need to set flag in annotator?
+                // else:
+                    // check if index_type matches current index type, if not upcast and set flag
+            
+        // otherwise if the target is a subscription
+            // do recursive_set_subscript_types on the next target, with value_type being ????
+
+    }
 
+    void TypeAnnotatorVisitor::visit(NAssign *assign) {
         // now interesting part comes
         // check what left side is
 
-        // TODO cases
-        /**
-         * id = id
-         * id, id, ... = id/val
-         * id, id, ... = id, val, ... (SPECIAL CASE even here for a, b = b, a)
-         */
-        if(assign->_target->type() == ASTNodeType::Identifier) {
-            // Single identifier case
-            //@Todo: check that symbol table contains target!
-
-            // then check if identifier is already within symbol table. If not, add!
-            NIdentifier* id = (NIdentifier*)assign->_target;
-            assignHelper(id, assign->_value->getInferredType());
-            if(assign->_value->getInferredType().isIteratorType()) {
-                id->annotation().iteratorInfo = assign->_value->annotation().iteratorInfo;
-                _iteratorInfoTable[id->_name] = assign->_value->annotation().iteratorInfo;
-            }
-        } else if(assign->_target->type() == ASTNodeType::Tuple) {
-            // now we have a tuple assignment!
-            // the right hand side MUST be some unpackable thing. Currently this is a tuple but later we will
-            // have lists as well
-            NTuple *ids = (NTuple *) assign->_target;
-            auto rhsInferredType = assign->_value->getInferredType();
-            // TODO add support for dictionaries, etc.
-            if (rhsInferredType.isTupleType()) {
-                // get the types contained in our tuple
-                std::vector<python::Type> tupleTypes = rhsInferredType.parameters();
-                if(ids->_elements.size() != tupleTypes.size()) {
-                    error("Incorrect number of arguments to unpack in assignment");
+        // a[x][y][z][w] = b
+        // a[5 + x * 2] = b
+
+        // could have assign single target helper
+        
+        if (is_nested_subscript_target(assign->_target)) {
+            // visit b's tree
+            assign->_value->accept(*this);
+
+            auto value_type = assign->_value->getInferredType();
+
+            // recursively handle each subscription target 
+            recursive_set_subscript_types(assign->_target, value_type);
+
+            // set assign type to value type
+            assign->setInferredType(value_type);
+        } else {
+            ApatheticVisitor::visit(assign);
+            // TODO cases
+            /**
+             * id = id
+             * id, id, ... = id/val
+             * id, id, ... = id, val, ... (SPECIAL CASE even here for a, b = b, a)
+             */
+            if(assign->_target->type() == ASTNodeType::Identifier) {
+                // Single identifier case
+                //@Todo: check that symbol table contains target!
+
+                // then check if identifier is already within symbol table. If not, add!
+                NIdentifier* id = (NIdentifier*)assign->_target;
+                assignHelper(id, assign->_value->getInferredType());
+                if(assign->_value->getInferredType().isIteratorType()) {
+                    id->annotation().iteratorInfo = assign->_value->annotation().iteratorInfo;
+                    _iteratorInfoTable[id->_name] = assign->_value->annotation().iteratorInfo;
                 }
+            } else if(assign->_target->type() == ASTNodeType::Tuple) {
+                // now we have a tuple assignment!
+                // the right hand side MUST be some unpackable thing. Currently this is a tuple but later we will
+                // have lists as well
+                NTuple *ids = (NTuple *) assign->_target;
+                auto rhsInferredType = assign->_value->getInferredType();
+                // TODO add support for dictionaries, etc.
+                if (rhsInferredType.isTupleType()) {
+                    // get the types contained in our tuple
+                    std::vector<python::Type> tupleTypes = rhsInferredType.parameters();
+                    if(ids->_elements.size() != tupleTypes.size()) {
+                        error("Incorrect number of arguments to unpack in assignment");
+                    }
 
-                for(unsigned long i = 0; i < ids->_elements.size(); i ++) {
-                    auto elt = ids->_elements[i];
-                    if(elt->type() != ASTNodeType::Identifier) {
-                        error("Trying to assign to a non identifier in a tuple");
+                    for(unsigned long i = 0; i < ids->_elements.size(); i ++) {
+                        auto elt = ids->_elements[i];
+                        if(elt->type() != ASTNodeType::Identifier) {
+                            error("Trying to assign to a non identifier in a tuple");
+                        }
+                        NIdentifier *id = (NIdentifier *) elt;
+                        // assign each identifier to the type in the tuple at the corresponding index
+                        assignHelper(id, tupleTypes[i]);
                     }
-                    NIdentifier *id = (NIdentifier *) elt;
-                    // assign each identifier to the type in the tuple at the corresponding index
-                    assignHelper(id, tupleTypes[i]);
-                }
-            } else if(rhsInferredType == python::Type::STRING) {
-                for(const auto& elt : ids->_elements) {
-                    if(elt->type() != ASTNodeType::Identifier) {
-                        error("Trying to assign to a non identifier in a tuple");
+                } else if(rhsInferredType == python::Type::STRING) {
+                    for(const auto& elt : ids->_elements) {
+                        if(elt->type() != ASTNodeType::Identifier) {
+                            error("Trying to assign to a non identifier in a tuple");
+                        }
+                        NIdentifier *id = (NIdentifier *) elt;
+                        assignHelper(id, python::Type::STRING);
                     }
-                    NIdentifier *id = (NIdentifier *) elt;
-                    assignHelper(id, python::Type::STRING);
+                } else {
+                    error("bad type annotation in tuple assign");
                 }
             } else {
-                error("bad type annotation in tuple assign");
-            }
-        } else if (assign->_target->type() == ASTNodeType::Subscription) {
-            NSubscription* subscript = (NSubscription*)assign->_target;
-
-            assert(subscript->_value);
-            assert(subscript->_expression);
-
-            auto type = subscript->_value->getInferredType();
-            auto index_type = subscript->_expression->getInferredType();
-
-            // this is a null check operation. I.e. strip option from either type or index type
-            if (type.isOptionType())
-                type = type.getReturnType();
-            if (index_type.isOptionType())
-                index_type = index_type.getReturnType();
-
-            // if object is dict-like, subscript must have a type compatible with mapping's key type
-            // question: the index is technically an expression: so we need to be able to handle multiple kinds of expressions?
-            // although, we don't really need to know what kind of expression the index is, we just need the resulting return type.
-            // is there an easy way to get this without having to check what kind of expression the index is?
-
-            if (type == python::Type::EMPTYDICT) {
-                // if object is an empty dictionary, upcast empty dictionary to match type of requested subscript and value
-                // Q: do I need to check if the value being assigned is an iterator here?
-                dictAssign(subscript, index_type, assign->_value->getInferredType()); 
-            } else if (python::Type::GENERICDICT == type) {
-                dictAssign(subscript, python::Type::PYOBJECT, python::Type::PYOBJECT);
-            } else if (type.isDictionaryType()) {
-                // if object is not an empty dictionary, check if dict's key type matches subscript type
-                    // if they don't match, mark the dictionary as having type [PYOBJECT, PYOBJECT]
-                    // and set a marker in the typeannotator that this function always triggers the interpreter fallback
-                    // Q: how to do ^^ ?
-                dictAssign(subscript, python::Type::PYOBJECT, python::Type::PYOBJECT);
-            } else {
-                error("only assignment to dictionary subscriptions supported yet!");
-                // if object is list-like, subscript must be an integer
-                // if subscript is negative, list-like object's length is added to subscript
-                // resulting subscript must be in range of object, then ask object to assign value to element/item at the subscript
+                error("only assignment to tuples/identifiers supported yet!!!");
+                // error("only assignment to tuples/identifiers/subscriptions supported yet!!!");
             }
-            
-            NDictionary* dict = (NDictionary*)subscript->_value;
-
-        } else {
-            error("only assignment to tuples/identifiers/subscriptions supported yet!!!");
+            // in all cases, set the type of the entire assign
+            // TODO we def want this in the single identifier case, but in general?
+            assign->setInferredType(assign->_target->getInferredType());
         }
-        // in all cases, set the type of the entire assign
-        // TODO we def want this in the single identifier case, but in general?
-        assign->setInferredType(assign->_target->getInferredType());
     }
 
     void TypeAnnotatorVisitor::resolveNameConflicts(const std::unordered_map<std::string, python::Type> &table) {
diff --git a/tuplex/test/core/DictionaryTyping.cc b/tuplex/test/core/DictionaryTyping.cc
index 1fdccab1e..d076c3a07 100644
--- a/tuplex/test/core/DictionaryTyping.cc
+++ b/tuplex/test/core/DictionaryTyping.cc
@@ -21,27 +21,53 @@
 // type annotator visitor
 // trace visitor
 
-TEST(DictionaryTyping, Count) {
+TEST(DictionaryTyping, Simple) {
     using namespace tuplex;
     using namespace std;
 
-    // // test count UDF
-    // auto count_c = "def count(L):\n"
-    //                 "    d = {}\n"
-    //                 "    for x in L:\n"
-    //                 "        if x not in d.keys():\n"
-    //                 "            d[x] = 0\n"
-    //                 "        d[x] += 1\n"
-    //                 "    return d";
-
     // test simple UDF
-    auto count_c = "def f(L):\n"
+    auto simple_c = "def f(L):\n"
                     "    d = {}\n"
                     "    k = L[0]\n"
                     "    d[k] = 0\n"
                     "    d[k] += 1\n"
                     "    return d";
 
+    // parse code to AST
+    auto ast = tuplex::codegen::AnnotatedAST();
+    ast.parseString(simple_c);
+
+    // make typing
+    python::Type inputType = python::Type::makeListType(python::Type::I64);
+
+    // create symbol table
+    ast.addTypeHint("L", inputType);
+    ast.defineTypes(codegen::DEFAULT_COMPILE_POLICY);
+
+    // print type annotated ast
+    GraphVizGraph graph;
+    graph.createFromAST(ast.getFunctionAST(), true);
+    graph.saveAsPDF("typed_ast.pdf");
+
+    cout<<"return type of function is: "<<ast.getReturnType().desc()<<endl;
+
+    // ASSERT_EQ(ast->getInferredType().getReturnType(), python::Type::makeDictionaryType(python::Type::I64, python::Type::I64));
+
+}
+
+TEST(DictionaryTyping, Count) {
+    using namespace tuplex;
+    using namespace std;
+
+    // test count UDF
+    auto count_c = "def count(L):\n"
+                    "    d = {}\n"
+                    "    for x in L:\n"
+                    "        if x not in d.keys():\n"
+                    "            d[x] = 0\n"
+                    "        d[x] += 1\n"
+                    "    return d";
+
     // parse code to AST
     auto ast = tuplex::codegen::AnnotatedAST();
     ast.parseString(count_c);

From 42a2715c2e7fcba470d09306adb49528141a240d Mon Sep 17 00:00:00 2001
From: Rhea Goyal <rhea_goyal@brown.edu>
Date: Mon, 18 Jul 2022 14:51:20 -0400
Subject: [PATCH 08/23] typing working for simple function

---
 tuplex/codegen/include/TypeAnnotatorVisitor.h |  2 +-
 tuplex/codegen/src/TypeAnnotatorVisitor.cc    | 96 +++++++++++--------
 tuplex/test/core/DictionaryTyping.cc          |  2 +-
 3 files changed, 57 insertions(+), 43 deletions(-)

diff --git a/tuplex/codegen/include/TypeAnnotatorVisitor.h b/tuplex/codegen/include/TypeAnnotatorVisitor.h
index 3c7adeef5..caa2093d0 100644
--- a/tuplex/codegen/include/TypeAnnotatorVisitor.h
+++ b/tuplex/codegen/include/TypeAnnotatorVisitor.h
@@ -56,7 +56,7 @@ namespace tuplex {
         
         void dictAssign(NSubscription* subscript, python::Type key_type, python::Type value_type);
         bool is_nested_subscript_target(ASTNode* target);
-        void recursive_set_subscript_types(ASTNode* next_target, python::Type value_type);
+        void recursive_set_subscript_types(NSubscription* target, python::Type value_type);
 
         
         void checkRetType(python::Type t);
diff --git a/tuplex/codegen/src/TypeAnnotatorVisitor.cc b/tuplex/codegen/src/TypeAnnotatorVisitor.cc
index 70c815dbf..0d9028194 100644
--- a/tuplex/codegen/src/TypeAnnotatorVisitor.cc
+++ b/tuplex/codegen/src/TypeAnnotatorVisitor.cc
@@ -787,7 +787,7 @@ namespace tuplex {
                 auto func_type = python::Type::makeFunctionType(python::Type::makeTupleType(param_types), ret_type);
                 call->_func->setInferredType(func_type);
             } else {
-                fatal_error("Could not infer typing for callable " + name);
+                fatal_error("Could not infer typing for callable " + name); //$$
             }
         }
 
@@ -1220,52 +1220,62 @@ namespace tuplex {
         _nameTable[id->_name] = type;
     }
 
-    void TypeAnnotatorVisitor::dictAssign(NSubscription* subscript, python::Type key_type, python::Type value_type) {
-        assert(subscript->_value->getInferredType().isDictionaryType());
-        // check what type the _value is 
-
-
-        NDictionary* dict = (NDictionary*)subscript->_value;
-        // not entirely sure what the below loop is for rn
-        // if(_ongoingLoopCount != 0 && !_loopTypeChange) {
-        //     // we are now inside a loop; no type change detected yet
-        //     // check potential type change during loops
-        //     if(_nameTable.find(id->_name) != _nameTable.end() && type != _nameTable.at(id->_name)) {
-        //         error("variable " + id->_name + " changed type during loop from " + _nameTable.at(id->_name).desc() + " to " + type.desc() + ", traced typing needed to determine if the type change is stable");
-        //         _loopTypeChange = true;
-        //     }
-        // }
-
-        // set dictionary's inferred type to be key_type -> value_type
-        // should maybe make a helper function for this? or does this count as the helper function...
-        dict->setInferredType(python::TypeFactory::instance().createOrGetDictionaryType(key_type, value_type));
-        
-        // overwrite entry in nametable with new type (Q: how to do this for dictionaries?)
-        // _nameTable[dict->] = type;
-    }
-
     bool TypeAnnotatorVisitor::is_nested_subscript_target(ASTNode* target) {
         // check if target is a subscript target
-        return assign->_target->type() == ASTNodeType::Subscription;
+        return target->type() == ASTNodeType::Subscription;
     }
 
     // note: "target" refers to the LHS of the assign (should be a subscription), and then 
     //       the value of every subsequent subscription
-    void TypeAnnotatorVisitor::recursive_set_subscript_types(ASTNode* target, python::Type value_type) {
-        // if the target is not a subscription (should be an identifier/dictionary ?), then 
-            // the next target should be an identifier
-            // check what type the identifier maps to
-            // error check if the type of the identifier is something subscriptable (for now, a dictionary)
-            // if type is subscriptable, then
-                // set the typing for the identifier to be index_type -> value_type
-                // if type of identifier is empty_dict, then we can just reset type (i.e. upcast dictionary)
-                // else if generic dict: type is still generic dict, and need to set flag in annotator?
-                // else:
-                    // check if index_type matches current index type, if not upcast and set flag
-            
-        // otherwise if the target is a subscription
-            // do recursive_set_subscript_types on the next target, with value_type being ????
+    void TypeAnnotatorVisitor::recursive_set_subscript_types(NSubscription* target, python::Type value_type) {
+        target->_expression->accept(*this);
+        python::Type index_type = target->_expression->getInferredType();
+        python::Type new_value_type = python::TypeFactory::instance().createOrGetDictionaryType(index_type, value_type);
+
+        if (target->_value->type() == ASTNodeType::Subscription) {
+            /* if the next target is a subscription, do recursive_set_subscript_types 
+               on the next target, with value_type being Dict[index_type, value_type] */
+            // Q: do I need to set intermediate types? e.g. for a[x][y][z] do I need to set the type for a[x][y]? (don't think there would be anywhere to rewrite in the nametable...)
+            recursive_set_subscript_types((NSubscription*)target->_value, new_value_type);
+        } else if (target->_value->type() == ASTNodeType::Identifier) {
+            // if the next target is an identifier (e.g. d[0])
+            NIdentifier* id = (NIdentifier*)target->_value;
+            // check if the type the identifier maps to is something subscriptable (for now, a dictionary)
+            // could use _nameTable[id->_name].isIterableType() ?
+                // No - tuples can't have element assignment, and each type that can needs to be handled differently
+            if (_nameTable[id->_name].isDictionaryType()) {
+                python::Type curr_type = _nameTable[id->_name];
+
+                if (curr_type == python::Type::EMPTYDICT) {
+                    // we can just upcast type to Dict[index_type, value_type]
+                    assignHelper(id, new_value_type);
+                } else if (curr_type == python::Type::GENERICDICT) {
+                    // type remains generic dict (and need to set flag in annotator?)
+                    // Q: Do I need to do anything in this branch?
+                    // assignHelper(python::Type::PYOBJECT, python::Type::PYOBJECT);
+                } else {
+                    // check if index_type and new_value_type match current index type and value type
+                    if (curr_type.keyType() != index_type) {
+                        // upcast index type to PYOBJECT and set flag
+                        index_type = python::Type::PYOBJECT;
+                    }
+
+                    if (curr_type.valueType() != value_type) {
+                        // upcast value type to PYOBJECT and set flag
+                        new_value_type = python::TypeFactory::instance().createOrGetDictionaryType(index_type, python::Type::PYOBJECT);
+                    }
 
+                    assignHelper(id, new_value_type);
+                }
+            } else {
+                // otherwise, raise an error (identifier not subscriptable)
+                error("cannot index into type " + _nameTable[id->_name].desc());
+            }
+        } else {
+            // otherwise, need to check if final type of expression is something subscriptable
+                // TODO: not really sure how to do this case
+            // else: raise error (can't subscript type)
+        }
     }
 
     void TypeAnnotatorVisitor::visit(NAssign *assign) {
@@ -1278,13 +1288,17 @@ namespace tuplex {
         // could have assign single target helper
         
         if (is_nested_subscript_target(assign->_target)) {
+            assert(assign->_target->type() == ASTNodeType::Subscription);
+
+            NSubscription* sub_node = (NSubscription*) assign->_target;
+
             // visit b's tree
             assign->_value->accept(*this);
 
             auto value_type = assign->_value->getInferredType();
 
             // recursively handle each subscription target 
-            recursive_set_subscript_types(assign->_target, value_type);
+            recursive_set_subscript_types(sub_node, value_type);
 
             // set assign type to value type
             assign->setInferredType(value_type);
diff --git a/tuplex/test/core/DictionaryTyping.cc b/tuplex/test/core/DictionaryTyping.cc
index d076c3a07..d9c4a1108 100644
--- a/tuplex/test/core/DictionaryTyping.cc
+++ b/tuplex/test/core/DictionaryTyping.cc
@@ -38,7 +38,7 @@ TEST(DictionaryTyping, Simple) {
     ast.parseString(simple_c);
 
     // make typing
-    python::Type inputType = python::Type::makeListType(python::Type::I64);
+    python::Type inputType = python::Type::makeListType(python::Type::F64);
 
     // create symbol table
     ast.addTypeHint("L", inputType);

From 601ed05d45c636a01c2a7ae3c44c6395d03b7565 Mon Sep 17 00:00:00 2001
From: Rhea Goyal <rhea_goyal@brown.edu>
Date: Wed, 20 Jul 2022 10:06:03 -0400
Subject: [PATCH 09/23] AST fully typed for simple case

---
 tuplex/codegen/include/TypeAnnotatorVisitor.h |   2 -
 tuplex/codegen/src/TypeAnnotatorVisitor.cc    |  46 ++--
 tuplex/test/core/DictionaryTyping.cc          | 207 +++++++++++++++++-
 tuplex/utils/include/TypeSystem.h             |   4 +
 4 files changed, 231 insertions(+), 28 deletions(-)

diff --git a/tuplex/codegen/include/TypeAnnotatorVisitor.h b/tuplex/codegen/include/TypeAnnotatorVisitor.h
index caa2093d0..298effefb 100644
--- a/tuplex/codegen/include/TypeAnnotatorVisitor.h
+++ b/tuplex/codegen/include/TypeAnnotatorVisitor.h
@@ -54,11 +54,9 @@ namespace tuplex {
                                        const python::Type& b);
         void assignHelper(NIdentifier *id, python::Type type);
         
-        void dictAssign(NSubscription* subscript, python::Type key_type, python::Type value_type);
         bool is_nested_subscript_target(ASTNode* target);
         void recursive_set_subscript_types(NSubscription* target, python::Type value_type);
 
-        
         void checkRetType(python::Type t);
         /*!
          * Annotate iterator-related NCall with iterator-specific info
diff --git a/tuplex/codegen/src/TypeAnnotatorVisitor.cc b/tuplex/codegen/src/TypeAnnotatorVisitor.cc
index 0d9028194..f0b1bd212 100644
--- a/tuplex/codegen/src/TypeAnnotatorVisitor.cc
+++ b/tuplex/codegen/src/TypeAnnotatorVisitor.cc
@@ -787,7 +787,7 @@ namespace tuplex {
                 auto func_type = python::Type::makeFunctionType(python::Type::makeTupleType(param_types), ret_type);
                 call->_func->setInferredType(func_type);
             } else {
-                fatal_error("Could not infer typing for callable " + name); //$$
+                fatal_error("Could not infer typing for callable " + name);
             }
         }
 
@@ -1230,29 +1230,31 @@ namespace tuplex {
     void TypeAnnotatorVisitor::recursive_set_subscript_types(NSubscription* target, python::Type value_type) {
         target->_expression->accept(*this);
         python::Type index_type = target->_expression->getInferredType();
-        python::Type new_value_type = python::TypeFactory::instance().createOrGetDictionaryType(index_type, value_type);
+        python::Type new_value_type = python::Type::makeDictionaryType(index_type, value_type);
 
         if (target->_value->type() == ASTNodeType::Subscription) {
             /* if the next target is a subscription, do recursive_set_subscript_types 
-               on the next target, with value_type being Dict[index_type, value_type] */
-            // Q: do I need to set intermediate types? e.g. for a[x][y][z] do I need to set the type for a[x][y]? (don't think there would be anywhere to rewrite in the nametable...)
+               on the next target, with value_type being Dict[index_type, value_type] */            
+            // set type of subscription
+            // target->setInferredType();
             recursive_set_subscript_types((NSubscription*)target->_value, new_value_type);
         } else if (target->_value->type() == ASTNodeType::Identifier) {
             // if the next target is an identifier (e.g. d[0])
             NIdentifier* id = (NIdentifier*)target->_value;
-            // check if the type the identifier maps to is something subscriptable (for now, a dictionary)
-            // could use _nameTable[id->_name].isIterableType() ?
-                // No - tuples can't have element assignment, and each type that can needs to be handled differently
+            // check if the type the identifier maps to is something subscriptable (for now, just a dictionary)
             if (_nameTable[id->_name].isDictionaryType()) {
                 python::Type curr_type = _nameTable[id->_name];
 
                 if (curr_type == python::Type::EMPTYDICT) {
                     // we can just upcast type to Dict[index_type, value_type]
                     assignHelper(id, new_value_type);
+                    // set type of subscription: value_type
+                    target->setInferredType(value_type);
                 } else if (curr_type == python::Type::GENERICDICT) {
                     // type remains generic dict (and need to set flag in annotator?)
                     // Q: Do I need to do anything in this branch?
                     // assignHelper(python::Type::PYOBJECT, python::Type::PYOBJECT);
+                    target->setInferredType(python::Type::PYOBJECT);
                 } else {
                     // check if index_type and new_value_type match current index type and value type
                     if (curr_type.keyType() != index_type) {
@@ -1262,19 +1264,32 @@ namespace tuplex {
 
                     if (curr_type.valueType() != value_type) {
                         // upcast value type to PYOBJECT and set flag
-                        new_value_type = python::TypeFactory::instance().createOrGetDictionaryType(index_type, python::Type::PYOBJECT);
+                        new_value_type = python::Type::makeDictionaryType(index_type, python::Type::PYOBJECT);
                     }
 
                     assignHelper(id, new_value_type);
+
+                    if (curr_type.valueType() != value_type) {
+                        // set subscript type to PYOBJECT
+                        target->setInferredType(python::Type::PYOBJECT);
+                    } else {
+                        // set subscript type to value_type
+                        target->setInferredType(value_type);
+                    }
                 }
             } else {
-                // otherwise, raise an error (identifier not subscriptable)
-                error("cannot index into type " + _nameTable[id->_name].desc());
+                // otherwise, raise an error (identifier type not subscriptable)
+                error("only dictionary subscription supported; " + _nameTable[id->_name].desc() + " not (yet) supported");
             }
         } else {
-            // otherwise, need to check if final type of expression is something subscriptable
-                // TODO: not really sure how to do this case
+            // otherwise, need to check if final type of expression is something subscriptable (just dictionary for now)
             // else: raise error (can't subscript type)
+            target->_value->accept(*this);
+            if (!target->_value->getInferredType().isDictionaryType()) {
+                error(target->_value->getInferredType().desc() + " is not (yet) subscriptable; only dictionaries supported");
+            }
+
+            // TODO: anything else here?
         }
     }
 
@@ -1282,11 +1297,6 @@ namespace tuplex {
         // now interesting part comes
         // check what left side is
 
-        // a[x][y][z][w] = b
-        // a[5 + x * 2] = b
-
-        // could have assign single target helper
-        
         if (is_nested_subscript_target(assign->_target)) {
             assert(assign->_target->type() == ASTNodeType::Subscription);
 
@@ -1297,7 +1307,7 @@ namespace tuplex {
 
             auto value_type = assign->_value->getInferredType();
 
-            // recursively handle each subscription target 
+            // recursively set types for each subscription target 
             recursive_set_subscript_types(sub_node, value_type);
 
             // set assign type to value type
diff --git a/tuplex/test/core/DictionaryTyping.cc b/tuplex/test/core/DictionaryTyping.cc
index d9c4a1108..b711864c7 100644
--- a/tuplex/test/core/DictionaryTyping.cc
+++ b/tuplex/test/core/DictionaryTyping.cc
@@ -17,10 +17,6 @@
 #include <CodegenHelper.h>
 #include <AnnotatedAST.h>
 
-// classes to work with:
-// type annotator visitor
-// trace visitor
-
 TEST(DictionaryTyping, Simple) {
     using namespace tuplex;
     using namespace std;
@@ -32,7 +28,7 @@ TEST(DictionaryTyping, Simple) {
                     "    d[k] = 0\n"
                     "    d[k] += 1\n"
                     "    return d";
-
+    
     // parse code to AST
     auto ast = tuplex::codegen::AnnotatedAST();
     ast.parseString(simple_c);
@@ -47,12 +43,208 @@ TEST(DictionaryTyping, Simple) {
     // print type annotated ast
     GraphVizGraph graph;
     graph.createFromAST(ast.getFunctionAST(), true);
-    graph.saveAsPDF("typed_ast.pdf");
+    graph.saveAsPDF("/home/rgoyal6/tuplex/tuplex/build/typed_ast.pdf");
+
+    cout<<"return type of function is: "<<ast.getReturnType().desc()<<endl;
+
+    ASSERT_EQ(ast.getReturnType(), python::Type::makeDictionaryType(python::Type::F64, python::Type::I64));
+
+    // TODO: case where d = {0: {}, 1: {0: 10, 1: 15}, 2: None} --> Dict[i64, Option[Dict[i64, i64]]]
+}
+
+TEST(DictionaryTyping, IndexExpression) {
+    using namespace tuplex;
+    using namespace std;
+
+    // a[2 * k + 1] = n
+    auto code = "def f(L):\n"
+                "    d = {}\n"
+                "    k = L[0]\n"
+                "    d[2 * k + 1] = 0\n"
+                "    return d";
+
+    // parse code to AST
+    auto ast = tuplex::codegen::AnnotatedAST();
+    ast.parseString(code);
+
+    // make typing
+    python::Type inputType = python::Type::makeListType(python::Type::I64);
+
+    // create symbol table
+    ast.addTypeHint("L", inputType);
+    ast.defineTypes(codegen::DEFAULT_COMPILE_POLICY);
+
+    // print type annotated ast
+    GraphVizGraph graph;
+    graph.createFromAST(ast.getFunctionAST(), true);
+    graph.saveAsPDF("/home/rgoyal6/tuplex/tuplex/build/typed_ast_1.pdf");
 
     cout<<"return type of function is: "<<ast.getReturnType().desc()<<endl;
 
+    ASSERT_EQ(ast.getReturnType(), python::Type::makeDictionaryType(python::Type::I64, python::Type::I64));
+}
+
+TEST(DictionaryTyping, NestedSubscripts) {
+    using namespace tuplex;
+    using namespace std;
+
+    // Q: what should I do about the case where dictionaries don't have the same number of entries (is this supported?)
+
+    // a[x][y] = n
+    auto code_1 = "def f(L):\n"
+                  "    d = {0: {0: 10, 1: 100}, 1: {0: 15, 1: 500}}\n"
+                  "    w = L[0]\n"
+                  "    x = L[1]\n"
+                  "    d[w][x] = 15\n"
+                  "    return d";
+
+    // parse code to AST
+    auto ast_1 = tuplex::codegen::AnnotatedAST();
+    ast_1.parseString(code_1);
+
+    // make typing
+    python::Type inputType_1 = python::Type::makeListType(python::Type::I64);
+
+    // create symbol table
+    ast_1.addTypeHint("L", inputType_1);
+    ast_1.defineTypes(codegen::DEFAULT_COMPILE_POLICY);
+
+    // print type annotated ast
+    GraphVizGraph graph_1;
+    graph_1.createFromAST(ast_1.getFunctionAST(), true);
+    graph_1.saveAsPDF("typed_ast.pdf");
+
+    cout<<"return type of function is: "<<ast_1.getReturnType().desc()<<endl;
+
+    python::Type expected_ret_1 = python::Type::makeDictionaryType(python::Type::I64, python::Type::makeDictionaryType(python::Type::I64, python::Type::I64));
+    ASSERT_EQ(ast_1.getReturnType(), expected_ret_1);
+
+    // a[x][y] = n, with case where empty dictionary type should be upcasted to Dict[i64, i64]
+    auto code_2 = "def f(L):\n"
+                  "    d = {0: {0: 10, 1: 100}, 1: {}}\n"
+                  "    w = L[0]\n"
+                  "    x = L[1]\n"
+                  "    d[w][x] = 15\n"
+                  "    return d";
+
+    // parse code to AST
+    auto ast_2 = tuplex::codegen::AnnotatedAST();
+    ast_2.parseString(code_2);
+
+    // make typing
+    python::Type inputType_2 = python::Type::makeListType(python::Type::I64);
+
+    // create symbol table
+    ast_2.addTypeHint("L", inputType_2);
+    ast_2.defineTypes(codegen::DEFAULT_COMPILE_POLICY);
+
+    // print type annotated ast
+    GraphVizGraph graph_2;
+    graph_2.createFromAST(ast_2.getFunctionAST(), true);
+    graph_2.saveAsPDF("typed_ast.pdf");
+
+    cout<<"return type of function is: "<<ast_2.getReturnType().desc()<<endl;
+
+    python::Type expected_ret_2 = python::Type::makeDictionaryType(python::Type::I64, python::Type::makeDictionaryType(python::Type::I64, python::Type::I64));
+    ASSERT_EQ(ast_2.getReturnType(), expected_ret_2);
+
+    // a[x][y][z][w] = n
+    // auto code = "def f(L):\n"
+    //             "    d = {0: {0: {0: {0: 10}, 1: {0: 1}}, 1: {0: {0: 15}, 1: {0: 2}}}, 1: {0: {0: {0: 20}, 1: {0: 0}}, 1: {0: {0: 19}, 1: {0: 4}}}}\n"
+    //             "    w = L[0]\n"
+    //             "    x = L[1]\n"
+    //             "    y = L[2]\n"
+    //             "    z = L[3]\n"
+    //             "    d[w][x][y][z] = 60\n"
+    //             "    return d";
+
+    // // parse code to AST
+    // auto ast = tuplex::codegen::AnnotatedAST();
+    // ast.parseString(code);
+
+    // // make typing
+    // python::Type inputType = python::Type::makeListType(python::Type::I64);
+
+    // // create symbol table
+    // ast.addTypeHint("L", inputType);
+    // ast.defineTypes(codegen::DEFAULT_COMPILE_POLICY);
+
+    // // print type annotated ast
+    // GraphVizGraph graph;
+    // graph.createFromAST(ast.getFunctionAST(), true);
+    // graph.saveAsPDF("typed_ast.pdf");
+
+    // cout<<"return type of function is: "<<ast.getReturnType().desc()<<endl;
+
     // ASSERT_EQ(ast->getInferredType().getReturnType(), python::Type::makeDictionaryType(python::Type::I64, python::Type::I64));
+}
+
+TEST(DictionaryTyping, AttributeSubscripts) {
+    using namespace tuplex;
+    using namespace std;
+
+    // how should I write tests that use classes + class attributes?
+
+    // a.b[x] = n
+    auto code_1 = "def f(L):\n"
+                  "    d = {0: {0: 10, 1: 100}, 1: {0: 15, 1: 500}}\n"
+                  "    w = L[0]\n"
+                  "    x = L[1]\n"
+                  "    d[w][x] = 15\n"
+                  "    return d";
 
+    // parse code to AST
+    auto ast_1 = tuplex::codegen::AnnotatedAST();
+    ast_1.parseString(code_1);
+
+    // make typing
+    python::Type inputType_1 = python::Type::makeListType(python::Type::I64);
+
+    // create symbol table
+    ast_1.addTypeHint("L", inputType_1);
+    ast_1.defineTypes(codegen::DEFAULT_COMPILE_POLICY);
+
+    // print type annotated ast
+    GraphVizGraph graph_1;
+    graph_1.createFromAST(ast_1.getFunctionAST(), true);
+    graph_1.saveAsPDF("typed_ast.pdf");
+
+    cout<<"return type of function is: "<<ast_1.getReturnType().desc()<<endl;
+
+    python::Type expected_ret_1 = python::Type::makeDictionaryType(python::Type::I64, python::Type::makeDictionaryType(python::Type::I64, python::Type::I64));
+    ASSERT_EQ(ast_1.getReturnType(), expected_ret_1);
+
+    // a.b.c[x] = n
+    auto code_2 = "def f(L):\n"
+                  "    d = {0: {0: 10, 1: 100}, 1: {}}\n"
+                  "    w = L[0]\n"
+                  "    x = L[1]\n"
+                  "    d[w][x] = 15\n"
+                  "    return d";
+
+    // parse code to AST
+    auto ast_2 = tuplex::codegen::AnnotatedAST();
+    ast_2.parseString(code_2);
+
+    // make typing
+    python::Type inputType_2 = python::Type::makeListType(python::Type::I64);
+
+    // create symbol table
+    ast_2.addTypeHint("L", inputType_2);
+    ast_2.defineTypes(codegen::DEFAULT_COMPILE_POLICY);
+
+    // print type annotated ast
+    GraphVizGraph graph_2;
+    graph_2.createFromAST(ast_2.getFunctionAST(), true);
+    graph_2.saveAsPDF("typed_ast.pdf");
+
+    cout<<"return type of function is: "<<ast_2.getReturnType().desc()<<endl;
+
+    python::Type expected_ret_2 = python::Type::makeDictionaryType(python::Type::I64, python::Type::makeDictionaryType(python::Type::I64, python::Type::I64));
+    ASSERT_EQ(ast_2.getReturnType(), expected_ret_2);
+    
+    // a[x].b[y] = n
+    // a.b[x][y][z] = n
 }
 
 TEST(DictionaryTyping, Count) {
@@ -86,6 +278,5 @@ TEST(DictionaryTyping, Count) {
 
     cout<<"return type of function is: "<<ast.getReturnType().desc()<<endl;
 
-    // ASSERT_EQ(ast->getInferredType().getReturnType(), python::Type::makeDictionaryType(python::Type::I64, python::Type::I64));
-
+    ASSERT_EQ(ast.getReturnType(), python::Type::makeDictionaryType(python::Type::I64, python::Type::I64));
 }
\ No newline at end of file
diff --git a/tuplex/utils/include/TypeSystem.h b/tuplex/utils/include/TypeSystem.h
index 6861f24de..1035dff0c 100644
--- a/tuplex/utils/include/TypeSystem.h
+++ b/tuplex/utils/include/TypeSystem.h
@@ -18,6 +18,9 @@
 #include <algorithm>
 #include <TTuple.h>
 
+// need to define new type for d.keys (compound type - dictkeys)
+// make createdictkeystype and createdictvaluestype
+
 namespace python {
 
     class Type {
@@ -345,6 +348,7 @@ namespace python {
         Type createOrGetFunctionType(const Type& param, const Type& ret=Type::EMPTYTUPLE);
         Type createOrGetDictionaryType(const Type& key, const Type& val);
         Type createOrGetListType(const Type& val);
+        // add support for dictkeys and dictvalues types
 
         Type createOrGetTupleType(const std::initializer_list<Type> args);
         Type createOrGetTupleType(const TTuple<Type>& args);

From 891441ae870017348f65a5a48d0207fbe9e338c4 Mon Sep 17 00:00:00 2001
From: Rhea Goyal <rhea_goyal@brown.edu>
Date: Wed, 20 Jul 2022 14:51:56 -0400
Subject: [PATCH 10/23] added more tests/examples for dict typing

---
 tuplex/test/core/DictionaryTyping.cc | 491 ++++++++++++++++++++-------
 1 file changed, 367 insertions(+), 124 deletions(-)

diff --git a/tuplex/test/core/DictionaryTyping.cc b/tuplex/test/core/DictionaryTyping.cc
index b711864c7..281880d32 100644
--- a/tuplex/test/core/DictionaryTyping.cc
+++ b/tuplex/test/core/DictionaryTyping.cc
@@ -17,21 +17,50 @@
 #include <CodegenHelper.h>
 #include <AnnotatedAST.h>
 
+// TEST(DictionaryTyping, Template) {
+//     using namespace tuplex;
+//     using namespace std;
+
+//     auto code = "";
+    
+//     // parse code to AST
+//     auto ast = tuplex::codegen::AnnotatedAST();
+//     ast.parseString(code);
+
+//     // make input typing
+//     python::Type inputType = python::Type::PYOBJECT;
+
+//     // create symbol table (add parameters and types)
+//     ast.addTypeHint("L", inputType);
+//     ast.defineTypes(codegen::DEFAULT_COMPILE_POLICY);
+
+//     // print type annotated ast
+//     GraphVizGraph graph;
+//     graph.createFromAST(ast.getFunctionAST(), true);
+//     graph.saveAsPDF("/home/rgoyal6/tuplex/tuplex/build/dictionary_asts/<test_name>.pdf");
+
+//     cout<<"return type of function is: "<<ast.getReturnType().desc()<<endl;
+
+//     python::Type expected_ret = python::Type::PYOBJECT;
+
+//     // check return type
+//     ASSERT_EQ(ast.getReturnType(), expected_ret);
+// }
+
 TEST(DictionaryTyping, Simple) {
     using namespace tuplex;
     using namespace std;
 
-    // test simple UDF
-    auto simple_c = "def f(L):\n"
-                    "    d = {}\n"
-                    "    k = L[0]\n"
-                    "    d[k] = 0\n"
-                    "    d[k] += 1\n"
-                    "    return d";
+    auto code = "def f(L):\n"
+                "    d = {}\n"
+                "    k = L[0]\n"
+                "    d[k] = 0\n"
+                "    d[k] += 1\n"
+                "    return d";
     
     // parse code to AST
     auto ast = tuplex::codegen::AnnotatedAST();
-    ast.parseString(simple_c);
+    ast.parseString(code);
 
     // make typing
     python::Type inputType = python::Type::makeListType(python::Type::F64);
@@ -43,20 +72,17 @@ TEST(DictionaryTyping, Simple) {
     // print type annotated ast
     GraphVizGraph graph;
     graph.createFromAST(ast.getFunctionAST(), true);
-    graph.saveAsPDF("/home/rgoyal6/tuplex/tuplex/build/typed_ast.pdf");
+    graph.saveAsPDF("/home/rgoyal6/tuplex/tuplex/build/dictionary_asts/simple_ast.pdf");
 
     cout<<"return type of function is: "<<ast.getReturnType().desc()<<endl;
 
     ASSERT_EQ(ast.getReturnType(), python::Type::makeDictionaryType(python::Type::F64, python::Type::I64));
-
-    // TODO: case where d = {0: {}, 1: {0: 10, 1: 15}, 2: None} --> Dict[i64, Option[Dict[i64, i64]]]
 }
 
 TEST(DictionaryTyping, IndexExpression) {
     using namespace tuplex;
     using namespace std;
 
-    // a[2 * k + 1] = n
     auto code = "def f(L):\n"
                 "    d = {}\n"
                 "    k = L[0]\n"
@@ -77,174 +103,391 @@ TEST(DictionaryTyping, IndexExpression) {
     // print type annotated ast
     GraphVizGraph graph;
     graph.createFromAST(ast.getFunctionAST(), true);
-    graph.saveAsPDF("/home/rgoyal6/tuplex/tuplex/build/typed_ast_1.pdf");
+    graph.saveAsPDF("/home/rgoyal6/tuplex/tuplex/build/dictionary_asts/index_exp_ast.pdf");
 
     cout<<"return type of function is: "<<ast.getReturnType().desc()<<endl;
 
     ASSERT_EQ(ast.getReturnType(), python::Type::makeDictionaryType(python::Type::I64, python::Type::I64));
 }
 
-TEST(DictionaryTyping, NestedSubscripts) {
+TEST(DictionaryTyping, NestedSubscriptSimple) {
     using namespace tuplex;
     using namespace std;
 
-    // Q: what should I do about the case where dictionaries don't have the same number of entries (is this supported?)
-
-    // a[x][y] = n
-    auto code_1 = "def f(L):\n"
-                  "    d = {0: {0: 10, 1: 100}, 1: {0: 15, 1: 500}}\n"
-                  "    w = L[0]\n"
-                  "    x = L[1]\n"
-                  "    d[w][x] = 15\n"
-                  "    return d";
-
+    auto code = "def f(L):\n"
+                "    d = {0: {0: 10, 1: 100}, 1: {0: 15, 1: 500}}\n"
+                "    w = L[0]\n"
+                "    x = L[1]\n"
+                "    d[w][x] = 15\n"
+                "    return d";
+    
     // parse code to AST
-    auto ast_1 = tuplex::codegen::AnnotatedAST();
-    ast_1.parseString(code_1);
+    auto ast = tuplex::codegen::AnnotatedAST();
+    ast.parseString(code);
 
     // make typing
-    python::Type inputType_1 = python::Type::makeListType(python::Type::I64);
+    python::Type inputType = python::Type::makeListType(python::Type::I64);
 
     // create symbol table
-    ast_1.addTypeHint("L", inputType_1);
-    ast_1.defineTypes(codegen::DEFAULT_COMPILE_POLICY);
+    ast.addTypeHint("L", inputType);
+    ast.defineTypes(codegen::DEFAULT_COMPILE_POLICY);
 
     // print type annotated ast
-    GraphVizGraph graph_1;
-    graph_1.createFromAST(ast_1.getFunctionAST(), true);
-    graph_1.saveAsPDF("typed_ast.pdf");
+    GraphVizGraph graph;
+    graph.createFromAST(ast.getFunctionAST(), true);
+    graph.saveAsPDF("/home/rgoyal6/tuplex/tuplex/build/dictionary_asts/nested_sub_simple.pdf");
 
-    cout<<"return type of function is: "<<ast_1.getReturnType().desc()<<endl;
+    cout<<"return type of function is: "<<ast.getReturnType().desc()<<endl;
 
-    python::Type expected_ret_1 = python::Type::makeDictionaryType(python::Type::I64, python::Type::makeDictionaryType(python::Type::I64, python::Type::I64));
-    ASSERT_EQ(ast_1.getReturnType(), expected_ret_1);
+    python::Type expected_ret = python::Type::makeDictionaryType(python::Type::I64, python::Type::makeDictionaryType(python::Type::I64, python::Type::I64));
 
-    // a[x][y] = n, with case where empty dictionary type should be upcasted to Dict[i64, i64]
-    auto code_2 = "def f(L):\n"
-                  "    d = {0: {0: 10, 1: 100}, 1: {}}\n"
-                  "    w = L[0]\n"
-                  "    x = L[1]\n"
-                  "    d[w][x] = 15\n"
-                  "    return d";
+    // check return type
+    ASSERT_EQ(ast.getReturnType(), expected_ret);
+}
 
+TEST(DictionaryTyping, NestedSubscriptUpcast) {
+    using namespace tuplex;
+    using namespace std;
+
+    auto code = "def f(L):\n"
+                "    d = {0: {0: 10, 1: 100}, 1: {}}\n"
+                "    w = L[0]\n"
+                "    x = L[1]\n"
+                "    d[w][x] = 60\n"
+                "    return d";
+    
     // parse code to AST
-    auto ast_2 = tuplex::codegen::AnnotatedAST();
-    ast_2.parseString(code_2);
+    auto ast = tuplex::codegen::AnnotatedAST();
+    ast.parseString(code);
 
-    // make typing
-    python::Type inputType_2 = python::Type::makeListType(python::Type::I64);
+    // make input typing
+    python::Type inputType = python::Type::makeListType(python::Type::I64);
 
-    // create symbol table
-    ast_2.addTypeHint("L", inputType_2);
-    ast_2.defineTypes(codegen::DEFAULT_COMPILE_POLICY);
+    // create symbol table (add parameters and types)
+    ast.addTypeHint("L", inputType);
+    ast.defineTypes(codegen::DEFAULT_COMPILE_POLICY);
 
     // print type annotated ast
-    GraphVizGraph graph_2;
-    graph_2.createFromAST(ast_2.getFunctionAST(), true);
-    graph_2.saveAsPDF("typed_ast.pdf");
+    GraphVizGraph graph;
+    graph.createFromAST(ast.getFunctionAST(), true);
+    graph.saveAsPDF("/home/rgoyal6/tuplex/tuplex/build/dictionary_asts/nested_sub_upcast.pdf");
 
-    cout<<"return type of function is: "<<ast_2.getReturnType().desc()<<endl;
+    cout<<"return type of function is: "<<ast.getReturnType().desc()<<endl;
 
-    python::Type expected_ret_2 = python::Type::makeDictionaryType(python::Type::I64, python::Type::makeDictionaryType(python::Type::I64, python::Type::I64));
-    ASSERT_EQ(ast_2.getReturnType(), expected_ret_2);
+    python::Type expected_ret = python::Type::makeDictionaryType(python::Type::I64, python::Type::makeDictionaryType(python::Type::I64, python::Type::I64));
 
-    // a[x][y][z][w] = n
-    // auto code = "def f(L):\n"
-    //             "    d = {0: {0: {0: {0: 10}, 1: {0: 1}}, 1: {0: {0: 15}, 1: {0: 2}}}, 1: {0: {0: {0: 20}, 1: {0: 0}}, 1: {0: {0: 19}, 1: {0: 4}}}}\n"
-    //             "    w = L[0]\n"
-    //             "    x = L[1]\n"
-    //             "    y = L[2]\n"
-    //             "    z = L[3]\n"
-    //             "    d[w][x][y][z] = 60\n"
-    //             "    return d";
+    // check return type
+    ASSERT_EQ(ast.getReturnType(), expected_ret);
+}
 
-    // // parse code to AST
-    // auto ast = tuplex::codegen::AnnotatedAST();
-    // ast.parseString(code);
+TEST(DictionaryTyping, NestedSubscriptOption) {
+    using namespace tuplex;
+    using namespace std;
 
-    // // make typing
-    // python::Type inputType = python::Type::makeListType(python::Type::I64);
+    auto code = "def f(L):\n"
+                "    d = {0: {}, 1: {0: 10, 1: 15}, 2: None}\n"
+                "    w = L[0]\n"
+                "    x = L[1]\n"
+                "    d[w][x] = 60\n"
+                "    return d";
+    
+    // parse code to AST
+    auto ast = tuplex::codegen::AnnotatedAST();
+    ast.parseString(code);
 
-    // // create symbol table
-    // ast.addTypeHint("L", inputType);
-    // ast.defineTypes(codegen::DEFAULT_COMPILE_POLICY);
+    // make input typing
+    python::Type inputType = python::Type::makeListType(python::Type::I64);
 
-    // // print type annotated ast
-    // GraphVizGraph graph;
-    // graph.createFromAST(ast.getFunctionAST(), true);
-    // graph.saveAsPDF("typed_ast.pdf");
+    // create symbol table (add parameters and types)
+    ast.addTypeHint("L", inputType);
+    ast.defineTypes(codegen::DEFAULT_COMPILE_POLICY);
+
+    // print type annotated ast
+    GraphVizGraph graph;
+    graph.createFromAST(ast.getFunctionAST(), true);
+    graph.saveAsPDF("/home/rgoyal6/tuplex/tuplex/build/dictionary_asts/nested_sub_option.pdf");
+
+    cout<<"return type of function is: "<<ast.getReturnType().desc()<<endl;
 
-    // cout<<"return type of function is: "<<ast.getReturnType().desc()<<endl;
+    python::Type expected_ret = python::Type::makeDictionaryType(python::Type::I64, python::Type::makeOptionType(python::Type::makeDictionaryType(python::Type::I64, python::Type::I64)));
 
-    // ASSERT_EQ(ast->getInferredType().getReturnType(), python::Type::makeDictionaryType(python::Type::I64, python::Type::I64));
+    // check return type
+    ASSERT_EQ(ast.getReturnType(), expected_ret);
 }
 
-TEST(DictionaryTyping, AttributeSubscripts) {
+TEST(DictionaryTyping, NestedSubscriptMultiple) {
     using namespace tuplex;
     using namespace std;
 
-    // how should I write tests that use classes + class attributes?
+    auto code = "def f(L):\n"
+                "    d = {0: {0: {0: {0: 10}, 1: {0: 1}}, 1: {0: {0: 15}, 1: {0: 2}}}, 1: {0: {0: {0: 20}, 1: {0: 0}}, 1: {0: {0: 19}, 1: {0: 4}}}}\n"
+                "    w = L[0]\n"
+                "    x = L[1]\n"
+                "    y = L[2]\n"
+                "    z = L[3]\n"
+                "    d[w][x][y][z] = 60\n"
+                "    return d";
+    
+    // parse code to AST
+    auto ast = tuplex::codegen::AnnotatedAST();
+    ast.parseString(code);
 
-    // a.b[x] = n
-    auto code_1 = "def f(L):\n"
-                  "    d = {0: {0: 10, 1: 100}, 1: {0: 15, 1: 500}}\n"
-                  "    w = L[0]\n"
-                  "    x = L[1]\n"
-                  "    d[w][x] = 15\n"
-                  "    return d";
+    // make input typing
+    python::Type inputType = python::Type::makeListType(python::Type::I64);
 
+    // create symbol table (add parameters and types)
+    ast.addTypeHint("L", inputType);
+    ast.defineTypes(codegen::DEFAULT_COMPILE_POLICY);
+
+    // print type annotated ast
+    GraphVizGraph graph;
+    graph.createFromAST(ast.getFunctionAST(), true);
+    graph.saveAsPDF("/home/rgoyal6/tuplex/tuplex/build/dictionary_asts/nested_sub_multiple.pdf");
+
+    cout<<"return type of function is: "<<ast.getReturnType().desc()<<endl;
+
+    python::Type expected_ret = 
+        python::Type::makeDictionaryType(
+            python::Type::I64, 
+            python::Type::makeDictionaryType(
+                python::Type::I64, 
+                python::Type::makeDictionaryType(
+                    python::Type::I64, 
+                    python::Type::makeDictionaryType(
+                        python::Type::I64, 
+                        python::Type::I64))));
+
+    // check return type
+    ASSERT_EQ(ast.getReturnType(), expected_ret);
+}
+
+TEST(DictionaryTyping, ControlFlowSimple) {
+    using namespace tuplex;
+    using namespace std;
+
+    auto code = "def f(L):\n"
+                "    d = {}\n"
+                "    d[0] = 0\n"
+                "    d[1] = 0\n"
+                "    if L[0] <= 5:\n"
+                "        d[0] += 1\n"
+                "    else:\n"
+                "        d[1] += 1\n"
+                "    return d";
+    
     // parse code to AST
-    auto ast_1 = tuplex::codegen::AnnotatedAST();
-    ast_1.parseString(code_1);
+    auto ast = tuplex::codegen::AnnotatedAST();
+    ast.parseString(code);
 
-    // make typing
-    python::Type inputType_1 = python::Type::makeListType(python::Type::I64);
+    // make input typing
+    python::Type inputType = python::Type::makeListType(python::Type::I64);
 
-    // create symbol table
-    ast_1.addTypeHint("L", inputType_1);
-    ast_1.defineTypes(codegen::DEFAULT_COMPILE_POLICY);
+    // create symbol table (add parameters and types)
+    ast.addTypeHint("L", inputType);
+    ast.defineTypes(codegen::DEFAULT_COMPILE_POLICY);
 
     // print type annotated ast
-    GraphVizGraph graph_1;
-    graph_1.createFromAST(ast_1.getFunctionAST(), true);
-    graph_1.saveAsPDF("typed_ast.pdf");
+    GraphVizGraph graph;
+    graph.createFromAST(ast.getFunctionAST(), true);
+    graph.saveAsPDF("/home/rgoyal6/tuplex/tuplex/build/dictionary_asts/control_flow_simple.pdf");
 
-    cout<<"return type of function is: "<<ast_1.getReturnType().desc()<<endl;
+    cout<<"return type of function is: "<<ast.getReturnType().desc()<<endl;
 
-    python::Type expected_ret_1 = python::Type::makeDictionaryType(python::Type::I64, python::Type::makeDictionaryType(python::Type::I64, python::Type::I64));
-    ASSERT_EQ(ast_1.getReturnType(), expected_ret_1);
+    python::Type expected_ret = python::Type::makeDictionaryType(python::Type::I64, python::Type::I64);
 
-    // a.b.c[x] = n
-    auto code_2 = "def f(L):\n"
-                  "    d = {0: {0: 10, 1: 100}, 1: {}}\n"
-                  "    w = L[0]\n"
-                  "    x = L[1]\n"
-                  "    d[w][x] = 15\n"
-                  "    return d";
+    // check return type
+    ASSERT_EQ(ast.getReturnType(), expected_ret);
+}
 
+TEST(DictionaryTyping, ControlFlowLoop) {
+    using namespace tuplex;
+    using namespace std;
+
+    auto code = "def f(L):\n"
+                "    d = {}\n"
+                "    d[0] = 0\n"
+                "    d[1] = 0\n"
+                "    for i in L:\n"
+                "        if i <= 5:\n"
+                "            d[0] += 1\n"
+                "        else:\n"
+                "            d[1] += 1\n"
+                "    return d";
+    
     // parse code to AST
-    auto ast_2 = tuplex::codegen::AnnotatedAST();
-    ast_2.parseString(code_2);
+    auto ast = tuplex::codegen::AnnotatedAST();
+    ast.parseString(code);
 
-    // make typing
-    python::Type inputType_2 = python::Type::makeListType(python::Type::I64);
+    // make input typing
+    python::Type inputType = python::Type::makeListType(python::Type::I64);
 
-    // create symbol table
-    ast_2.addTypeHint("L", inputType_2);
-    ast_2.defineTypes(codegen::DEFAULT_COMPILE_POLICY);
+    // create symbol table (add parameters and types)
+    ast.addTypeHint("L", inputType);
+    ast.defineTypes(codegen::DEFAULT_COMPILE_POLICY);
+
+    // print type annotated ast
+    GraphVizGraph graph;
+    graph.createFromAST(ast.getFunctionAST(), true);
+    graph.saveAsPDF("/home/rgoyal6/tuplex/tuplex/build/dictionary_asts/control_flow_loop.pdf");
+
+    cout<<"return type of function is: "<<ast.getReturnType().desc()<<endl;
+
+    python::Type expected_ret = python::Type::makeDictionaryType(python::Type::I64, python::Type::I64);
+
+    // check return type
+    ASSERT_EQ(ast.getReturnType(), expected_ret);
+}
+
+TEST(DictionaryTyping, ControlFlowKeyAssignment) {
+    using namespace tuplex;
+    using namespace std;
+
+    auto code = "def f(L):\n"
+                "    d = {}\n"
+                "    for i in L:\n"
+                "        if (i % 2) not in d.keys():\n"
+                "            d[i % 2] = 0\n"
+                "        if i > 5:\n"
+                "            d[i % 2] += 5\n"
+                "        else:\n"
+                "            d[i % 2] += i\n"
+                "    return d";
+    
+    // parse code to AST
+    auto ast = tuplex::codegen::AnnotatedAST();
+    ast.parseString(code);
+
+    // make input typing
+    python::Type inputType = python::Type::makeListType(python::Type::I64);
+
+    // create symbol table (add parameters and types)
+    ast.addTypeHint("L", inputType);
+    ast.defineTypes(codegen::DEFAULT_COMPILE_POLICY);
+
+    // print type annotated ast
+    GraphVizGraph graph;
+    graph.createFromAST(ast.getFunctionAST(), true);
+    graph.saveAsPDF("/home/rgoyal6/tuplex/tuplex/build/dictionary_asts/control_flow_key_assign.pdf");
+
+    cout<<"return type of function is: "<<ast.getReturnType().desc()<<endl;
+
+    python::Type expected_ret = python::Type::makeDictionaryType(python::Type::I64, python::Type::I64);
+
+    // check return type
+    ASSERT_EQ(ast.getReturnType(), expected_ret);
+}
+
+TEST(DictionaryTyping, DictionaryInputSimple) {
+    using namespace tuplex;
+    using namespace std;
+
+    auto code = "def f(D):\n"
+                "    D[0] += 1\n"
+                "    D[1] += 2\n"
+                "    D[2] = D[0] + D[1]\n"
+                "    return D";
+    
+    // parse code to AST
+    auto ast = tuplex::codegen::AnnotatedAST();
+    ast.parseString(code);
+
+    // make input typing
+    python::Type inputType = python::Type::makeDictionaryType(python::Type::F64, python::Type::F64);
+
+    // create symbol table (add parameters and types)
+    ast.addTypeHint("D", inputType);
+    ast.defineTypes(codegen::DEFAULT_COMPILE_POLICY);
 
     // print type annotated ast
-    GraphVizGraph graph_2;
-    graph_2.createFromAST(ast_2.getFunctionAST(), true);
-    graph_2.saveAsPDF("typed_ast.pdf");
+    GraphVizGraph graph;
+    graph.createFromAST(ast.getFunctionAST(), true);
+    graph.saveAsPDF("/home/rgoyal6/tuplex/tuplex/build/dictionary_asts/dict_input_simple.pdf");
+
+    cout<<"return type of function is: "<<ast.getReturnType().desc()<<endl;
+
+    python::Type expected_ret = python::Type::makeDictionaryType(python::Type::F64, python::Type::F64);
+
+    // check return type
+    ASSERT_EQ(ast.getReturnType(), expected_ret);
+}
 
-    cout<<"return type of function is: "<<ast_2.getReturnType().desc()<<endl;
+TEST(DictionaryTyping, DictionaryInputControlFlow) {
+    using namespace tuplex;
+    using namespace std;
 
-    python::Type expected_ret_2 = python::Type::makeDictionaryType(python::Type::I64, python::Type::makeDictionaryType(python::Type::I64, python::Type::I64));
-    ASSERT_EQ(ast_2.getReturnType(), expected_ret_2);
+    auto code = "def f(D):\n"
+                "    if D[0] < D[1]:\n"
+                "        D[0] = D[1]\n"
+                "        return D[1]\n"
+                "    else:\n"
+                "        D[0] = D[2]\n"
+                "        return D[2]";
     
-    // a[x].b[y] = n
-    // a.b[x][y][z] = n
+    // parse code to AST
+    auto ast = tuplex::codegen::AnnotatedAST();
+    ast.parseString(code);
+
+    // make input typing
+    python::Type inputType = python::Type::makeDictionaryType(python::Type::I64, python::Type::F64);
+
+    // create symbol table (add parameters and types)
+    ast.addTypeHint("D", inputType);
+    ast.defineTypes(codegen::DEFAULT_COMPILE_POLICY);
+
+    // print type annotated ast
+    GraphVizGraph graph;
+    graph.createFromAST(ast.getFunctionAST(), true);
+    graph.saveAsPDF("/home/rgoyal6/tuplex/tuplex/build/dictionary_asts/dict_input_control_flow.pdf");
+
+    cout<<"return type of function is: "<<ast.getReturnType().desc()<<endl;
+
+    python::Type expected_ret = python::Type::makeDictionaryType(python::Type::I64, python::Type::F64);
+
+    // check return type
+    ASSERT_EQ(ast.getReturnType(), expected_ret);
+}
+
+TEST(DictionaryTyping, Everything) {
+    using namespace tuplex;
+    using namespace std;
+
+    // example: we have a dictionary mapping countries to the continent they are in
+    auto code = "def f(D):\n"
+                "    continents = {\n"
+                "        'Africa': 0,\n"
+                "        'Asia': 0,\n"
+                "        'Europe': 0,\n"
+                "        'Other': 0\n"
+                "    }\n"
+                "    for for continent in D.values():\n"
+                "        if continent not in continents.keys():\n"
+                "            continents['Other'] += 1\n"
+                "        else:\n"
+                "            continents[continent] += 1\n"
+                "    return continents";
+    
+    // parse code to AST
+    auto ast = tuplex::codegen::AnnotatedAST();
+    ast.parseString(code);
+
+    // make input typing
+    python::Type inputType = python::Type::makeDictionaryType(python::Type::STRING, python::Type::STRING);
+
+    // create symbol table (add parameters and types)
+    ast.addTypeHint("D", inputType);
+    ast.defineTypes(codegen::DEFAULT_COMPILE_POLICY);
+
+    // print type annotated ast
+    GraphVizGraph graph;
+    graph.createFromAST(ast.getFunctionAST(), true);
+    graph.saveAsPDF("/home/rgoyal6/tuplex/tuplex/build/dictionary_asts/everything.pdf");
+
+    cout<<"return type of function is: "<<ast.getReturnType().desc()<<endl;
+
+    python::Type expected_ret = python::Type::makeDictionaryType(python::Type::STRING, python::Type::I64);
+
+    // check return type
+    ASSERT_EQ(ast.getReturnType(), expected_ret);
 }
 
 TEST(DictionaryTyping, Count) {
@@ -265,7 +508,7 @@ TEST(DictionaryTyping, Count) {
     ast.parseString(count_c);
 
     // make typing
-    python::Type inputType = python::Type::makeListType(python::Type::I64);
+    python::Type inputType = python::Type::makeListType(python::Type::STRING);
 
     // create symbol table
     ast.addTypeHint("L", inputType);
@@ -278,5 +521,5 @@ TEST(DictionaryTyping, Count) {
 
     cout<<"return type of function is: "<<ast.getReturnType().desc()<<endl;
 
-    ASSERT_EQ(ast.getReturnType(), python::Type::makeDictionaryType(python::Type::I64, python::Type::I64));
+    ASSERT_EQ(ast.getReturnType(), python::Type::makeDictionaryType(python::Type::STRING, python::Type::I64));
 }
\ No newline at end of file

From cf32f5710faf7091bf8fa4919767d757796305f2 Mon Sep 17 00:00:00 2001
From: Rhea Goyal <rhea_goyal@brown.edu>
Date: Fri, 22 Jul 2022 15:01:00 -0400
Subject: [PATCH 11/23] fixed typing for empty diicts and option case

---
 tuplex/codegen/src/TypeAnnotatorVisitor.cc |  63 ++++++-----
 tuplex/test/core/DictionaryTyping.cc       | 117 ++++++++++++++++++++-
 tuplex/utils/include/TypeSystem.h          |  23 +++-
 3 files changed, 169 insertions(+), 34 deletions(-)

diff --git a/tuplex/codegen/src/TypeAnnotatorVisitor.cc b/tuplex/codegen/src/TypeAnnotatorVisitor.cc
index f0b1bd212..46c758d93 100644
--- a/tuplex/codegen/src/TypeAnnotatorVisitor.cc
+++ b/tuplex/codegen/src/TypeAnnotatorVisitor.cc
@@ -826,7 +826,7 @@ namespace tuplex {
 
     void TypeAnnotatorVisitor::visit(NDictionary* dict) {
         ApatheticVisitor::visit(dict);
-
+        
         // Try to make it Dictionary[Key, Val] type (if every pair has the same key type and val type, respectively)
         bool is_key_val = true;
         python::Type keyType, valType;
@@ -835,8 +835,20 @@ namespace tuplex {
             valType = dict->_pairs[0].second->getInferredType(); // save the key type, val type of the first pair
             for(const auto& p: dict->_pairs) { // check if every pair has the same key type, val type
                 if(p.first->getInferredType() != keyType || p.second->getInferredType() != valType) {
-                    is_key_val = false; // if they are not the same, then it is not of type Dictionary[Key, Val]
-                    break;
+                    // also for None case
+                    if (valType.isDictionaryType() && p.second->getInferredType() == python::Type::EMPTYDICT) {
+                        continue;
+                    } else if (valType == python::Type::EMPTYDICT && p.second->getInferredType().isDictionaryType()) {
+                        // upcast valType
+                        valType = p.second->getInferredType();
+                    } else if (valType == python::Type::NULLVALUE) {
+                        valType = python::Type::makeOptionType(p.second->getInferredType());
+                    } else if (p.second->getInferredType() == python::Type::NULLVALUE) {
+                        valType = python::Type::makeOptionType(valType);
+                    } else {
+                        is_key_val = false; // if they are not the same, then it is not of type Dictionary[Key, Val]
+                        break;
+                    }
                 }
             }
 
@@ -1225,18 +1237,15 @@ namespace tuplex {
         return target->type() == ASTNodeType::Subscription;
     }
 
-    // note: "target" refers to the LHS of the assign (should be a subscription), and then 
-    //       the value of every subsequent subscription
     void TypeAnnotatorVisitor::recursive_set_subscript_types(NSubscription* target, python::Type value_type) {
         target->_expression->accept(*this);
+        python::Type subscription_type = value_type;
         python::Type index_type = target->_expression->getInferredType();
-        python::Type new_value_type = python::Type::makeDictionaryType(index_type, value_type);
+        python::Type new_value_type = python::Type::makeDictionaryType(index_type, subscription_type);
 
         if (target->_value->type() == ASTNodeType::Subscription) {
             /* if the next target is a subscription, do recursive_set_subscript_types 
                on the next target, with value_type being Dict[index_type, value_type] */            
-            // set type of subscription
-            // target->setInferredType();
             recursive_set_subscript_types((NSubscription*)target->_value, new_value_type);
         } else if (target->_value->type() == ASTNodeType::Identifier) {
             // if the next target is an identifier (e.g. d[0])
@@ -1244,17 +1253,13 @@ namespace tuplex {
             // check if the type the identifier maps to is something subscriptable (for now, just a dictionary)
             if (_nameTable[id->_name].isDictionaryType()) {
                 python::Type curr_type = _nameTable[id->_name];
-
+                
                 if (curr_type == python::Type::EMPTYDICT) {
                     // we can just upcast type to Dict[index_type, value_type]
                     assignHelper(id, new_value_type);
-                    // set type of subscription: value_type
-                    target->setInferredType(value_type);
                 } else if (curr_type == python::Type::GENERICDICT) {
                     // type remains generic dict (and need to set flag in annotator?)
-                    // Q: Do I need to do anything in this branch?
-                    // assignHelper(python::Type::PYOBJECT, python::Type::PYOBJECT);
-                    target->setInferredType(python::Type::PYOBJECT);
+                    subscription_type = python::Type::PYOBJECT;
                 } else {
                     // check if index_type and new_value_type match current index type and value type
                     if (curr_type.keyType() != index_type) {
@@ -1263,19 +1268,24 @@ namespace tuplex {
                     }
 
                     if (curr_type.valueType() != value_type) {
-                        // upcast value type to PYOBJECT and set flag
-                        new_value_type = python::Type::makeDictionaryType(index_type, python::Type::PYOBJECT);
+                        if (curr_type.valueType().isOptionType()) {
+                            // case where dictionary values are nullable
+                            // check if non-null option is the same type as value_type
+                            if (curr_type.valueType().elementType() == value_type) {
+                                // need to make subscription_type an option type instead
+                                subscription_type = python::Type::makeOptionType(subscription_type);
+                            } else {
+                                // upcast value type to PYOBJECT and set flag
+                                subscription_type = python::Type::PYOBJECT;
+                            }
+                        } else {
+                            // upcast value type to PYOBJECT and set flag
+                            subscription_type = python::Type::PYOBJECT;
+                        }
                     }
 
+                    new_value_type = python::Type::makeDictionaryType(index_type, subscription_type);
                     assignHelper(id, new_value_type);
-
-                    if (curr_type.valueType() != value_type) {
-                        // set subscript type to PYOBJECT
-                        target->setInferredType(python::Type::PYOBJECT);
-                    } else {
-                        // set subscript type to value_type
-                        target->setInferredType(value_type);
-                    }
                 }
             } else {
                 // otherwise, raise an error (identifier type not subscriptable)
@@ -1288,9 +1298,10 @@ namespace tuplex {
             if (!target->_value->getInferredType().isDictionaryType()) {
                 error(target->_value->getInferredType().desc() + " is not (yet) subscriptable; only dictionaries supported");
             }
-
-            // TODO: anything else here?
         }
+
+        // set type of subscription node (target)
+        target->setInferredType(subscription_type);
     }
 
     void TypeAnnotatorVisitor::visit(NAssign *assign) {
diff --git a/tuplex/test/core/DictionaryTyping.cc b/tuplex/test/core/DictionaryTyping.cc
index 281880d32..5b6cfeeba 100644
--- a/tuplex/test/core/DictionaryTyping.cc
+++ b/tuplex/test/core/DictionaryTyping.cc
@@ -79,6 +79,108 @@ TEST(DictionaryTyping, Simple) {
     ASSERT_EQ(ast.getReturnType(), python::Type::makeDictionaryType(python::Type::F64, python::Type::I64));
 }
 
+TEST(DictionaryTyping, KeyTypeChange) {
+    using namespace tuplex;
+    using namespace std;
+
+    auto code = "def f(L):\n"
+                "    d = {}\n"
+                "    d['a'] = L[0]\n"
+                "    d[2] = L[1]\n"
+                "    return d";
+    
+    // parse code to AST
+    auto ast = tuplex::codegen::AnnotatedAST();
+    ast.parseString(code);
+
+    // make input typing
+    python::Type inputType = python::Type::makeListType(python::Type::I64);
+
+    // create symbol table (add parameters and types)
+    ast.addTypeHint("L", inputType);
+    ast.defineTypes(codegen::DEFAULT_COMPILE_POLICY);
+
+    // print type annotated ast
+    GraphVizGraph graph;
+    graph.createFromAST(ast.getFunctionAST(), true);
+    graph.saveAsPDF("/home/rgoyal6/tuplex/tuplex/build/dictionary_asts/key_type_change.pdf");
+
+    cout<<"return type of function is: "<<ast.getReturnType().desc()<<endl;
+
+    python::Type expected_ret = python::Type::makeDictionaryType(python::Type::PYOBJECT, python::Type::I64);
+
+    // check return type
+    ASSERT_EQ(ast.getReturnType(), expected_ret);
+}
+
+TEST(DictionaryTyping, ValueTypeChange) {
+    using namespace tuplex;
+    using namespace std;
+
+    auto code = "def f(L):\n"
+                "    d = {}\n"
+                "    d[0] = L[0]\n"
+                "    d[1] = L[1]\n"
+                "    return d";
+    
+    // parse code to AST
+    auto ast = tuplex::codegen::AnnotatedAST();
+    ast.parseString(code);
+
+    // make input typing
+    python::Type inputType = python::Type::GENERICLIST;
+
+    // create symbol table (add parameters and types)
+    ast.addTypeHint("L", inputType);
+    ast.defineTypes(codegen::DEFAULT_COMPILE_POLICY);
+
+    // print type annotated ast
+    GraphVizGraph graph;
+    graph.createFromAST(ast.getFunctionAST(), true);
+    graph.saveAsPDF("/home/rgoyal6/tuplex/tuplex/build/dictionary_asts/value_type_change.pdf");
+
+    cout<<"return type of function is: "<<ast.getReturnType().desc()<<endl;
+
+    python::Type expected_ret = python::Type::makeDictionaryType(python::Type::I64, python::Type::PYOBJECT);
+
+    // check return type
+    ASSERT_EQ(ast.getReturnType(), expected_ret);
+}
+
+TEST(DictionaryTyping, DictTypeChange) {
+    using namespace tuplex;
+    using namespace std;
+
+    auto code = "def f(L):\n"
+                "    d = {}\n"
+                "    d[0] = L[0]\n"
+                "    d['one'] = L[1]\n"
+                "    return d";
+    
+    // parse code to AST
+    auto ast = tuplex::codegen::AnnotatedAST();
+    ast.parseString(code);
+
+    // make input typing
+    python::Type inputType = python::Type::GENERICLIST;
+
+    // create symbol table (add parameters and types)
+    ast.addTypeHint("L", inputType);
+    ast.defineTypes(codegen::DEFAULT_COMPILE_POLICY);
+
+    // print type annotated ast
+    GraphVizGraph graph;
+    graph.createFromAST(ast.getFunctionAST(), true);
+    graph.saveAsPDF("/home/rgoyal6/tuplex/tuplex/build/dictionary_asts/dict_type_change.pdf");
+
+    cout<<"return type of function is: "<<ast.getReturnType().desc()<<endl;
+
+    python::Type expected_ret = python::Type::makeDictionaryType(python::Type::PYOBJECT, python::Type::PYOBJECT);
+
+    // check return type
+    ASSERT_EQ(ast.getReturnType(), expected_ret);
+}
+
 TEST(DictionaryTyping, IndexExpression) {
     using namespace tuplex;
     using namespace std;
@@ -338,7 +440,10 @@ TEST(DictionaryTyping, ControlFlowLoop) {
     ASSERT_EQ(ast.getReturnType(), expected_ret);
 }
 
+// TODO: need case where value being assigned to the same key is of a different type in each if/else branch
+
 TEST(DictionaryTyping, ControlFlowKeyAssignment) {
+    // currently fails; need to add support for dict_keys
     using namespace tuplex;
     using namespace std;
 
@@ -382,9 +487,9 @@ TEST(DictionaryTyping, DictionaryInputSimple) {
     using namespace std;
 
     auto code = "def f(D):\n"
-                "    D[0] += 1\n"
-                "    D[1] += 2\n"
-                "    D[2] = D[0] + D[1]\n"
+                "    D[0.0] += 1\n"
+                "    D[1.0] += 2\n"
+                "    D[2.0] = D[0] + D[1]\n"
                 "    return D";
     
     // parse code to AST
@@ -441,13 +546,14 @@ TEST(DictionaryTyping, DictionaryInputControlFlow) {
 
     cout<<"return type of function is: "<<ast.getReturnType().desc()<<endl;
 
-    python::Type expected_ret = python::Type::makeDictionaryType(python::Type::I64, python::Type::F64);
+    python::Type expected_ret = python::Type::F64;
 
     // check return type
     ASSERT_EQ(ast.getReturnType(), expected_ret);
 }
 
 TEST(DictionaryTyping, Everything) {
+    // expected to fail; need to add support for dict_keys and dict_values
     using namespace tuplex;
     using namespace std;
 
@@ -459,7 +565,7 @@ TEST(DictionaryTyping, Everything) {
                 "        'Europe': 0,\n"
                 "        'Other': 0\n"
                 "    }\n"
-                "    for for continent in D.values():\n"
+                "    for continent in D.values():\n"
                 "        if continent not in continents.keys():\n"
                 "            continents['Other'] += 1\n"
                 "        else:\n"
@@ -491,6 +597,7 @@ TEST(DictionaryTyping, Everything) {
 }
 
 TEST(DictionaryTyping, Count) {
+    // expected to fail; need to add support for dict_keys
     using namespace tuplex;
     using namespace std;
 
diff --git a/tuplex/utils/include/TypeSystem.h b/tuplex/utils/include/TypeSystem.h
index 1035dff0c..217d46db7 100644
--- a/tuplex/utils/include/TypeSystem.h
+++ b/tuplex/utils/include/TypeSystem.h
@@ -52,7 +52,10 @@ namespace python {
         static const Type MODULE; //! generic module object, used in symbol table
         static const Type ITERATOR; //! iterator/generator type
         static const Type EMPTYITERATOR; //! special type for empty iterator
-
+        // TODO: add types for dict_keys and dict_values
+        static const Type DICTKEYS; //! special type for list of dictionary keys
+        static const Type DICTVALUES; //! special type for list of dictionary values
+        
         // define two special types, used in the inference to describe bounds
         // any is a subtype of everything
         static const Type ANY;
@@ -95,6 +98,7 @@ namespace python {
         bool isTupleType() const;
         bool isFunctionType() const;
         bool isDictionaryType() const;
+        // TODO: add is...Type for dict_keys and dict_values
         bool isListType() const;
         bool isNumericType() const;
         bool isOptionType() const;
@@ -137,6 +141,8 @@ namespace python {
                 return false;
             }
 
+            // TODO: add case for iterating through dict_keys and dict_values
+
             return false;
         }
         /*!
@@ -160,36 +166,42 @@ namespace python {
          */
         Type yieldType() const;
 
+        // TODO: add dict_keys and dict_values cases to below function
         /*!
          * checks whether type contains one or more of Unknown, Inf, Any.
          * @return
          */
         bool isIllDefined() const;
 
+        // TODO: add dict_keys and dict_values cases to below function
         /*!
          * checks whether type is of fixed size. I.e. also a tuple of fixed size datatypes will yield true
          * @return
          */
         bool isFixedSizeType() const;
 
+        // TODO: add dict_keys and dict_values cases to below function
         /*!
          * if tuple of nulls/empty dict etc.
          * @return
          */
         bool isZeroSerializationSize() const;
 
+        // TODO: add dict_keys and dict_values cases to below function
          /*!
          * checks whether given type is a primtive type. Currently true for bool, i64, double, str
          * @return
          */
         bool isPrimitiveType() const;
 
+        // TODO: add dict_keys and dict_values cases to below function
         /*!
          * check whether a given type is iterable. Currently true for iterator, list, tuple, string, range and dictionary.
          * @return
          */
         bool isIterableType() const;
 
+        // TODO: add dict_keys and dict_values cases to below function
         /*!
          * check whether this is a base class of derived. E.g. int.subclass(float) is true,
          * but float.subclass(int) is false
@@ -198,12 +210,14 @@ namespace python {
          */
         bool isSubclass(const Type& derived) const;
 
+        // TODO: add dict_keys and dict_values cases to below function
         /*!
          * retrieves a vector of all types which are base classes of this type
          * @return all types which are a base class
          */
         std::vector<Type> baseClasses() const;
 
+        // TODO: add dict_keys and dict_values cases to below function
         /*!
          * retrieves vector of all types which are derived from this type
          * @return vector of type, may be empty.
@@ -219,6 +233,8 @@ namespace python {
 
         static Type makeListType(const python::Type &elementType);
 
+        // TODO: add make...Type for dict_keys and dict_values
+
         /*!
          * create iterator type from yieldType.
          * @param yieldType
@@ -285,6 +301,7 @@ namespace python {
             CLASS,
             OPTION, // for nullable
             ITERATOR
+            // TODO: add dict_keys and dict_values types (?)
         };
 
         struct TypeEntry {
@@ -328,6 +345,7 @@ namespace python {
         bool isOptionType(const Type& t) const;
         bool isListType(const Type& t) const;
         bool isIteratorType(const Type& t) const;
+        // TODO: add is...Type functions for dict_keys and dict_values
 
         std::vector<Type> parameters(const Type& t) const;
         Type returnType(const Type& t) const;
@@ -348,8 +366,7 @@ namespace python {
         Type createOrGetFunctionType(const Type& param, const Type& ret=Type::EMPTYTUPLE);
         Type createOrGetDictionaryType(const Type& key, const Type& val);
         Type createOrGetListType(const Type& val);
-        // add support for dictkeys and dictvalues types
-
+        // TODO: add create...Type functions for dict_keys and dict_values
         Type createOrGetTupleType(const std::initializer_list<Type> args);
         Type createOrGetTupleType(const TTuple<Type>& args);
         Type createOrGetTupleType(const std::vector<Type>& args);

From ce3709a571f80655fb98393470b377e0c71e7c42 Mon Sep 17 00:00:00 2001
From: Rhea Goyal <rhea_goyal@brown.edu>
Date: Tue, 26 Jul 2022 11:49:31 -0400
Subject: [PATCH 12/23] added dict_keys and dict_values types

---
 tuplex/codegen/src/SymbolTable.cc    | 28 +++++++++++-
 tuplex/test/core/DictionaryTyping.cc |  2 +-
 tuplex/utils/include/TypeSystem.h    | 37 +++++++---------
 tuplex/utils/src/TypeSystem.cc       | 64 ++++++++++++++++++++++++++--
 4 files changed, 104 insertions(+), 27 deletions(-)

diff --git a/tuplex/codegen/src/SymbolTable.cc b/tuplex/codegen/src/SymbolTable.cc
index 99f9bb9d6..9e4a629f1 100644
--- a/tuplex/codegen/src/SymbolTable.cc
+++ b/tuplex/codegen/src/SymbolTable.cc
@@ -409,6 +409,32 @@ namespace tuplex {
 
         // i.e. type depending on input
 
+        // typer function for dict.keys() and dict.values()
+        // this currently doesn't handle empty dicts...
+        std::vector<python::Type> all_types = {python::Type::BOOLEAN, python::Type::I64, python::Type::F64,
+                                                python::Type::STRING, python::Type::PYOBJECT};
+        for (const auto &t1 : all_types) {
+            for (const auto &t2 : all_types) {
+
+                auto dict_type = python::Type::makeDictionaryType(t1, t2);
+
+                // create specialized dict type
+                auto dict_sym = std::make_shared<Symbol>(dict_type.desc(), "dictionary", t1, SymbolType::TYPE);
+                // add here symbol so other functions can be easily added.
+                addSymbol(dict_sym);
+
+                // dict_keys
+                auto keys_sym = std::make_shared<Symbol>("keys", python::Type::makeFunctionType(python::Type::EMPTYTUPLE, python::Type::makeDictKeysType(dict_type.keyType())));
+                dict_sym->addAttribute(keys_sym);
+
+                // dict_keys
+                auto values_sym = std::make_shared<Symbol>("values", python::Type::makeFunctionType(python::Type::EMPTYTUPLE, python::Type::makeDictValuesType(dict_type.valueType())));
+                dict_sym->addAttribute(values_sym);
+            }
+        }
+
+        // addBuiltinTypeAttribute(python::Type::EMPTYDICT, "keys", python::Type::makeFunctionType(python::Type::EMPTYTUPLE, ???));
+
         // for pop/popitem things are actually a bit more complicated...
         // i.e. the default keyword may introduce an issue...
         // https://www.programiz.com/python-programming/methods/dictionary/pop
@@ -477,7 +503,7 @@ namespace tuplex {
                                                                        python::Type::makeTupleType({dict_type.keyType(), dict_type.valueType()})));
 
             }
-
+        
         // for the weird case of the default object having different type than the dict value type, use tracing.
 
         // another good design for builtin functions could be:
diff --git a/tuplex/test/core/DictionaryTyping.cc b/tuplex/test/core/DictionaryTyping.cc
index 5b6cfeeba..f7e7aa097 100644
--- a/tuplex/test/core/DictionaryTyping.cc
+++ b/tuplex/test/core/DictionaryTyping.cc
@@ -624,7 +624,7 @@ TEST(DictionaryTyping, Count) {
     // print type annotated ast
     GraphVizGraph graph;
     graph.createFromAST(ast.getFunctionAST(), true);
-    graph.saveAsPDF("typed_ast.pdf");
+    graph.saveAsPDF("/home/rgoyal6/tuplex/tuplex/build/dictionary_asts/dict_count.pdf");
 
     cout<<"return type of function is: "<<ast.getReturnType().desc()<<endl;
 
diff --git a/tuplex/utils/include/TypeSystem.h b/tuplex/utils/include/TypeSystem.h
index 217d46db7..1b0674447 100644
--- a/tuplex/utils/include/TypeSystem.h
+++ b/tuplex/utils/include/TypeSystem.h
@@ -52,9 +52,8 @@ namespace python {
         static const Type MODULE; //! generic module object, used in symbol table
         static const Type ITERATOR; //! iterator/generator type
         static const Type EMPTYITERATOR; //! special type for empty iterator
-        // TODO: add types for dict_keys and dict_values
-        static const Type DICTKEYS; //! special type for list of dictionary keys
-        static const Type DICTVALUES; //! special type for list of dictionary values
+        // static const Type DICTKEYS; //! special type for list of dictionary keys
+        // static const Type DICTVALUES; //! special type for list of dictionary values
         
         // define two special types, used in the inference to describe bounds
         // any is a subtype of everything
@@ -98,7 +97,6 @@ namespace python {
         bool isTupleType() const;
         bool isFunctionType() const;
         bool isDictionaryType() const;
-        // TODO: add is...Type for dict_keys and dict_values
         bool isListType() const;
         bool isNumericType() const;
         bool isOptionType() const;
@@ -107,6 +105,8 @@ namespace python {
         bool hasVariablePositionalArgs() const;
         bool isExceptionType() const;
         bool isIteratorType() const;
+        bool isDictKeysType() const;
+        bool isDictValuesType() const;
 
         inline bool isGeneric() const {
             if(_hash == python::Type::PYOBJECT._hash ||
@@ -121,7 +121,7 @@ namespace python {
                 return false;
             }
 
-            if(isListType() || isOptionType()) {
+            if(isListType() || isOptionType() || isDictKeysType() || isDictValuesType()) {
                 if(elementType().isGeneric())
                     return true;
                 return false;
@@ -141,8 +141,6 @@ namespace python {
                 return false;
             }
 
-            // TODO: add case for iterating through dict_keys and dict_values
-
             return false;
         }
         /*!
@@ -166,42 +164,36 @@ namespace python {
          */
         Type yieldType() const;
 
-        // TODO: add dict_keys and dict_values cases to below function
         /*!
          * checks whether type contains one or more of Unknown, Inf, Any.
          * @return
          */
         bool isIllDefined() const;
 
-        // TODO: add dict_keys and dict_values cases to below function
         /*!
          * checks whether type is of fixed size. I.e. also a tuple of fixed size datatypes will yield true
          * @return
          */
         bool isFixedSizeType() const;
 
-        // TODO: add dict_keys and dict_values cases to below function
         /*!
          * if tuple of nulls/empty dict etc.
          * @return
          */
         bool isZeroSerializationSize() const;
 
-        // TODO: add dict_keys and dict_values cases to below function
          /*!
          * checks whether given type is a primtive type. Currently true for bool, i64, double, str
          * @return
          */
         bool isPrimitiveType() const;
 
-        // TODO: add dict_keys and dict_values cases to below function
         /*!
-         * check whether a given type is iterable. Currently true for iterator, list, tuple, string, range and dictionary.
+         * check whether a given type is iterable. Currently true for iterator, list, tuple, string, range, dictionary, dict_keys, and dict_values.
          * @return
          */
         bool isIterableType() const;
 
-        // TODO: add dict_keys and dict_values cases to below function
         /*!
          * check whether this is a base class of derived. E.g. int.subclass(float) is true,
          * but float.subclass(int) is false
@@ -210,14 +202,12 @@ namespace python {
          */
         bool isSubclass(const Type& derived) const;
 
-        // TODO: add dict_keys and dict_values cases to below function
         /*!
          * retrieves a vector of all types which are base classes of this type
          * @return all types which are a base class
          */
         std::vector<Type> baseClasses() const;
 
-        // TODO: add dict_keys and dict_values cases to below function
         /*!
          * retrieves vector of all types which are derived from this type
          * @return vector of type, may be empty.
@@ -233,7 +223,8 @@ namespace python {
 
         static Type makeListType(const python::Type &elementType);
 
-        // TODO: add make...Type for dict_keys and dict_values
+        static Type makeDictKeysType(const python::Type& keyType);
+        static Type makeDictValuesType(const python::Type& valType);
 
         /*!
          * create iterator type from yieldType.
@@ -297,11 +288,12 @@ namespace python {
             FUNCTION,
             TUPLE,
             DICTIONARY,
+            DICT_KEYS,
+            DICT_VALUES,
             LIST,
             CLASS,
             OPTION, // for nullable
             ITERATOR
-            // TODO: add dict_keys and dict_values types (?)
         };
 
         struct TypeEntry {
@@ -341,12 +333,13 @@ namespace python {
 
         bool isFunctionType(const Type& t) const;
         bool isDictionaryType(const Type& t) const;
+        bool isDictKeysType(const Type& t);
+        bool isDictValuesType(const Type& t);
         bool isTupleType(const Type& t) const;
         bool isOptionType(const Type& t) const;
         bool isListType(const Type& t) const;
         bool isIteratorType(const Type& t) const;
-        // TODO: add is...Type functions for dict_keys and dict_values
-
+        
         std::vector<Type> parameters(const Type& t) const;
         Type returnType(const Type& t) const;
 
@@ -365,15 +358,15 @@ namespace python {
         // right now, no tuples or other weird types...
         Type createOrGetFunctionType(const Type& param, const Type& ret=Type::EMPTYTUPLE);
         Type createOrGetDictionaryType(const Type& key, const Type& val);
+        Type createOrGetDictKeysType(const Type& key);
+        Type createOrGetDictValuesType(const Type& val);
         Type createOrGetListType(const Type& val);
-        // TODO: add create...Type functions for dict_keys and dict_values
         Type createOrGetTupleType(const std::initializer_list<Type> args);
         Type createOrGetTupleType(const TTuple<Type>& args);
         Type createOrGetTupleType(const std::vector<Type>& args);
         Type createOrGetOptionType(const Type& type);
         Type createOrGetIteratorType(const Type& yieldType);
 
-
         Type getByName(const std::string& name);
 
         // helper function to connect type system to codegen
diff --git a/tuplex/utils/src/TypeSystem.cc b/tuplex/utils/src/TypeSystem.cc
index 2fd3fe064..f38864999 100644
--- a/tuplex/utils/src/TypeSystem.cc
+++ b/tuplex/utils/src/TypeSystem.cc
@@ -147,6 +147,24 @@ namespace python {
         return registerOrGetType(name, AbstractType::DICTIONARY, {key, val});
     }
 
+    Type TypeFactory::createOrGetDictKeysType(const Type& key) {
+        std::string name;
+        name += "[";
+        name += TypeFactory::instance().getDesc(key._hash);
+        name += "]";
+
+        return registerOrGetType(name, AbstractType::DICT_KEYS, {key});
+    }
+
+    Type TypeFactory::createOrGetDictValuesType(const Type& val) {
+        std::string name;
+        name += "[";
+        name += TypeFactory::instance().getDesc(val._hash);
+        name += "]";
+
+        return registerOrGetType(name, AbstractType::DICT_VALUES, {val});
+    }
+
     Type TypeFactory::createOrGetListType(const Type &val) {
         std::string name;
         name += "[";
@@ -275,6 +293,14 @@ namespace python {
         return TypeFactory::instance().isIteratorType(*this);
     }
 
+    bool Type::isDictKeysType() const {
+        return TypeFactory::instance().isDictKeysType(*this);
+    }
+    
+    bool Type::isDictValuesType() const {
+        return TypeFactory::instance().isDictValuesType(*this);
+    }
+
     Type Type::getReturnType() const {
         // first make sure this a function type!
         if( ! (TypeFactory::instance().isFunctionType(*this) ||
@@ -311,6 +337,22 @@ namespace python {
         return type == AbstractType::DICTIONARY || t == Type::EMPTYDICT || t == Type::GENERICDICT;
     }
 
+    bool TypeFactory::isDictKeysType(const Type& t) {
+        auto it = _typeMap.find(t._hash);
+        if(it == _typeMap.end())
+            return false;
+        
+        return it->second._type == AbstractType::DICT_KEYS;
+    }
+
+    bool TypeFactory::isDictValuesType(const Type& t) {
+        auto it = _typeMap.find(t._hash);
+        if(it == _typeMap.end())
+            return false;
+        
+        return it->second._type == AbstractType::DICT_VALUES;
+    }
+
     bool TypeFactory::isListType(const Type &t) const {
         auto it = _typeMap.find(t._hash);
         if(it == _typeMap.end())
@@ -382,8 +424,8 @@ namespace python {
     }
 
     Type Type::elementType() const {
-        if(isListType()) {
-            assert(isListType() && _hash != EMPTYLIST._hash);
+        if(isListType() || isDictKeysType() || isDictValuesType()) {
+            assert((isListType() && _hash != EMPTYLIST._hash) || isDictKeysType() || isDictValuesType());
             auto& factory = TypeFactory::instance();
             auto it = factory._typeMap.find(_hash);
             assert(it != factory._typeMap.end());
@@ -413,7 +455,7 @@ namespace python {
     }
 
     bool Type::isIterableType() const {
-        return (*this).isIteratorType() || (*this).isListType() || (*this).isTupleType() || *this == python::Type::STRING || *this == python::Type::RANGE || (*this).isDictionaryType();
+        return (*this).isIteratorType() || (*this).isListType() || (*this).isTupleType() || *this == python::Type::STRING || *this == python::Type::RANGE || (*this).isDictionaryType() || (*this).isDictKeysType() || (*this).isDictValuesType();
     }
 
     bool Type::isFixedSizeType() const {
@@ -447,6 +489,10 @@ namespace python {
         // ==> base type decides!
         if(isOptionType())
             return withoutOptions().isFixedSizeType();
+        
+        // dict_keys and dict_values are both immutable
+        if(isDictKeysType() || isDictValuesType())
+            return true;
 
         // functions, dictionaries, and lists are never a fixed type
         return false;
@@ -501,6 +547,10 @@ namespace python {
             if(elementType().isIllDefined())
                 return true;
             return false;
+        } else if (isDictKeysType() || isDictValuesType()) {
+            if (elementType().isIllDefined())
+                return true;
+            return false;
         } else {
             // must be primitive, directly check
             return    *this == Type::UNKNOWN
@@ -525,6 +575,14 @@ namespace python {
         return python::TypeFactory::instance().createOrGetDictionaryType(keyType, valType);
     }
 
+    Type Type::makeDictKeysType(const python::Type& keyType) {
+        return python::TypeFactory::instance().createOrGetDictKeysType(keyType);
+    }
+
+    Type Type::makeDictValuesType(const python::Type& valType) {
+        return python::TypeFactory::instance().createOrGetDictValuesType(valType);
+    }
+
     Type Type::makeListType(const python::Type &elementType){
 #warning "Nested lists are not yet supported!"
         return python::TypeFactory::instance().createOrGetListType(elementType);

From 7be771871dfec570a6c63175391a367148528e25 Mon Sep 17 00:00:00 2001
From: Leonhard Spiegelberg <leonhard_spiegelberg@brown.edu>
Date: Wed, 27 Jul 2022 12:52:54 -0400
Subject: [PATCH 13/23] compiles now, emptydict promotion

---
 tuplex/codegen/include/SymbolTable.h       |  8 +++
 tuplex/codegen/src/SymbolTable.cc          | 81 ++++++++++++++++------
 tuplex/codegen/src/TypeAnnotatorVisitor.cc | 15 +++-
 tuplex/test/core/DictionaryTyping.cc       |  2 +-
 tuplex/utils/include/TypeSystem.h          |  1 +
 tuplex/utils/src/TypeSystem.cc             | 17 ++++-
 6 files changed, 100 insertions(+), 24 deletions(-)

diff --git a/tuplex/codegen/include/SymbolTable.h b/tuplex/codegen/include/SymbolTable.h
index c6aa32f89..3b14a721d 100644
--- a/tuplex/codegen/include/SymbolTable.h
+++ b/tuplex/codegen/include/SymbolTable.h
@@ -175,6 +175,14 @@ namespace tuplex {
          */
         void addBuiltinTypeAttribute(const python::Type& builtinType, const std::string& name, const python::Type& type);
 
+        /*!
+         * add an attribute to a builtin type, e.g. dict.keys()
+         * @param builtinType to which type to add the function
+         * @param name name of the attribute
+         * @param typer a dynamic typing function
+         */
+        void addBuiltinTypeAttribute(const python::Type& builtinType, const std::string& name, std::function<python::Type(const python::Type&)> typer);
+
         /*!
          * checks whether a symbol can be looked up or not
          * @param symbol
diff --git a/tuplex/codegen/src/SymbolTable.cc b/tuplex/codegen/src/SymbolTable.cc
index 9e4a629f1..cfa9e3f84 100644
--- a/tuplex/codegen/src/SymbolTable.cc
+++ b/tuplex/codegen/src/SymbolTable.cc
@@ -407,31 +407,43 @@ namespace tuplex {
 
         // for dict, list, tuple use generic type version!
 
-        // i.e. type depending on input
+        // for keys()/values() use generic dict and let symbol table create specialized type on the fly using
+        // typer function
+        {
+            addBuiltinTypeAttribute(python::Type::GENERICDICT, "keys", [](const python::Type& parameterType) {
 
-        // typer function for dict.keys() and dict.values()
-        // this currently doesn't handle empty dicts...
-        std::vector<python::Type> all_types = {python::Type::BOOLEAN, python::Type::I64, python::Type::F64,
-                                                python::Type::STRING, python::Type::PYOBJECT};
-        for (const auto &t1 : all_types) {
-            for (const auto &t2 : all_types) {
+                std::cout<<"need to get concrete dict type here!"<<std::endl;
 
-                auto dict_type = python::Type::makeDictionaryType(t1, t2);
+                return python::Type::UNKNOWN;
+            });
+        }
 
-                // create specialized dict type
-                auto dict_sym = std::make_shared<Symbol>(dict_type.desc(), "dictionary", t1, SymbolType::TYPE);
-                // add here symbol so other functions can be easily added.
-                addSymbol(dict_sym);
 
-                // dict_keys
-                auto keys_sym = std::make_shared<Symbol>("keys", python::Type::makeFunctionType(python::Type::EMPTYTUPLE, python::Type::makeDictKeysType(dict_type.keyType())));
-                dict_sym->addAttribute(keys_sym);
+        // i.e. type depending on input
 
-                // dict_keys
-                auto values_sym = std::make_shared<Symbol>("values", python::Type::makeFunctionType(python::Type::EMPTYTUPLE, python::Type::makeDictValuesType(dict_type.valueType())));
-                dict_sym->addAttribute(values_sym);
-            }
-        }
+//        // typer function for dict.keys() and dict.values()
+//        // this currently doesn't handle empty dicts...
+//        std::vector<python::Type> all_types = {python::Type::BOOLEAN, python::Type::I64, python::Type::F64,
+//                                                python::Type::STRING, python::Type::PYOBJECT};
+//        for (const auto &t1 : all_types) {
+//            for (const auto &t2 : all_types) {
+//
+//                auto dict_type = python::Type::makeDictionaryType(t1, t2);
+//
+//                // create specialized dict type
+//                auto dict_sym = std::make_shared<Symbol>(dict_type.desc(), "dictionary", t1, SymbolType::TYPE);
+//                // add here symbol so other functions can be easily added.
+//                addSymbol(dict_sym);
+//
+//                // dict_keys
+//                auto keys_sym = std::make_shared<Symbol>("keys", python::Type::makeFunctionType(python::Type::EMPTYTUPLE, python::Type::makeDictKeysType(dict_type.keyType())));
+//                dict_sym->addAttribute(keys_sym);
+//
+//                // dict_keys
+//                auto values_sym = std::make_shared<Symbol>("values", python::Type::makeFunctionType(python::Type::EMPTYTUPLE, python::Type::makeDictValuesType(dict_type.valueType())));
+//                dict_sym->addAttribute(values_sym);
+//            }
+//        }
 
         // addBuiltinTypeAttribute(python::Type::EMPTYDICT, "keys", python::Type::makeFunctionType(python::Type::EMPTYTUPLE, ???));
 
@@ -670,6 +682,35 @@ namespace tuplex {
         return addSymbol(make_shared<Symbol>(name, type));
     }
 
+    void SymbolTable::addBuiltinTypeAttribute(const python::Type &builtinType, const std::string &name,
+                                              std::function<python::Type(const python::Type &)> typer) {
+        using namespace std;
+        // this seems wrong, need to perform the lookup directly...
+        // use desc as name
+        auto scope = currentScope();
+        auto it = scope->symbols.find(builtinType.desc());
+        if(it == scope->symbols.end()) {
+            scope->symbols[builtinType.desc()] = make_shared<Symbol>(builtinType.desc(), typer);
+            it = scope->symbols.find(builtinType.desc());
+            assert(it != scope->symbols.end());
+        }
+        auto sym_att = it->second->findAttribute(name);
+        if(!sym_att) {
+            it->second->addAttribute(make_shared<Symbol>(name, name, builtinType, builtinType.isFunctionType() ? SymbolType::FUNCTION : SymbolType::VARIABLE));
+            sym_att = it->second->findAttribute(name);
+        } else {
+            // replace symbol, there can be only one symbol with a typer function
+            auto symbolType = builtinType.isFunctionType() ? SymbolType::FUNCTION : SymbolType::VARIABLE;
+            if(symbolType != sym_att->symbolType)
+                throw std::runtime_error("symbol can only have one kind of types associated with it!");
+            assert(sym_att->qualifiedName == name);
+            sym_att->name = name;
+        }
+        assert(sym_att);
+        sym_att->parent = scope->symbols[name];
+        sym_att->functionTyper = typer;
+    }
+
     void SymbolTable::addBuiltinTypeAttribute(const python::Type &builtinType, const std::string &name,
                                               const python::Type &type) {
         // this seems wrong, need to perform the lookup directly...
diff --git a/tuplex/codegen/src/TypeAnnotatorVisitor.cc b/tuplex/codegen/src/TypeAnnotatorVisitor.cc
index 46c758d93..d70cd0b71 100644
--- a/tuplex/codegen/src/TypeAnnotatorVisitor.cc
+++ b/tuplex/codegen/src/TypeAnnotatorVisitor.cc
@@ -1220,8 +1220,19 @@ namespace tuplex {
             // we are now inside a loop; no type change detected yet
             // check potential type change during loops
             if(_nameTable.find(id->_name) != _nameTable.end() && type != _nameTable.at(id->_name)) {
-                error("variable " + id->_name + " changed type during loop from " + _nameTable.at(id->_name).desc() + " to " + type.desc() + ", traced typing needed to determine if the type change is stable");
-                _loopTypeChange = true;
+
+                // special case:
+                // emptylist, emptydict (and emptyset) can get promoted
+                auto type_of_named = _nameTable.at(id->_name);
+                if((type_of_named == python::Type::EMPTYLIST && type.isListType()) ||
+                   (type_of_named == python::Type::EMPTYDICT && type.isDictionaryType()) ) {
+                    // || (type_of_named == python::Type::EMPTYSET && type.isSetType())
+                    auto& logger = Logger::instance().logger("codegen");
+                    logger.debug("promoting " + id->_name + " from " + _nameTable.at(id->_name).desc() + " to " + type.desc());
+                } else {
+                    error("variable " + id->_name + " changed type during loop from " + _nameTable.at(id->_name).desc() + " to " + type.desc() + ", traced typing needed to determine if the type change is stable");
+                    _loopTypeChange = true;
+                }
             }
         }
 
diff --git a/tuplex/test/core/DictionaryTyping.cc b/tuplex/test/core/DictionaryTyping.cc
index f7e7aa097..be5d8cfc7 100644
--- a/tuplex/test/core/DictionaryTyping.cc
+++ b/tuplex/test/core/DictionaryTyping.cc
@@ -624,7 +624,7 @@ TEST(DictionaryTyping, Count) {
     // print type annotated ast
     GraphVizGraph graph;
     graph.createFromAST(ast.getFunctionAST(), true);
-    graph.saveAsPDF("/home/rgoyal6/tuplex/tuplex/build/dictionary_asts/dict_count.pdf");
+    graph.saveAsPDF("dict_count.pdf");
 
     cout<<"return type of function is: "<<ast.getReturnType().desc()<<endl;
 
diff --git a/tuplex/utils/include/TypeSystem.h b/tuplex/utils/include/TypeSystem.h
index 1b0674447..5fc1f6110 100644
--- a/tuplex/utils/include/TypeSystem.h
+++ b/tuplex/utils/include/TypeSystem.h
@@ -42,6 +42,7 @@ namespace python {
         static const Type EMPTYTUPLE; //! special type for an empty tuple
         static const Type EMPTYDICT; //! special type for empty dict
         static const Type EMPTYLIST; //! special type for empty list
+        static const Type EMPTYSET; //! special type for empty set
         static const Type NULLVALUE; //! special type for a nullvalue / None
         static const Type PYOBJECT; //! special type for any python object
         static const Type GENERICTUPLE; //! special type to accept ANY tuple object (helpful for symbol table)
diff --git a/tuplex/utils/src/TypeSystem.cc b/tuplex/utils/src/TypeSystem.cc
index f38864999..cf33b751c 100644
--- a/tuplex/utils/src/TypeSystem.cc
+++ b/tuplex/utils/src/TypeSystem.cc
@@ -33,11 +33,13 @@ namespace python {
     const Type Type::EMPTYTUPLE = python::TypeFactory::instance().createOrGetTupleType(std::vector<python::Type>());
     const Type Type::EMPTYDICT = python::TypeFactory::instance().createOrGetPrimitiveType("{}"); // empty dict
     const Type Type::EMPTYLIST = python::TypeFactory::instance().createOrGetPrimitiveType("[]"); // empty list: primitive because it can have any type element
+    const Type Type::EMPTYSET = python::TypeFactory::instance().createOrGetPrimitiveType("empty_set"); // empty list: primitive because it can have any type element
     const Type Type::NULLVALUE = python::TypeFactory::instance().createOrGetPrimitiveType("null");
     const Type Type::PYOBJECT = python::TypeFactory::instance().createOrGetPrimitiveType("pyobject");
     const Type Type::GENERICTUPLE = python::TypeFactory::instance().createOrGetPrimitiveType("tuple");
     const Type Type::GENERICDICT = python::TypeFactory::instance().createOrGetDictionaryType(python::Type::PYOBJECT, python::Type::PYOBJECT);
     const Type Type::GENERICLIST = python::TypeFactory::instance().createOrGetListType(python::Type::PYOBJECT);
+    //const Type Type::GENERICSET = python::TypeFactory::instance().createOrGetSetType(python::Type::PYOBJECT); // @TODO: implement.
     const Type Type::VOID = python::TypeFactory::instance().createOrGetPrimitiveType("void");
     const Type Type::MATCHOBJECT = python::TypeFactory::instance().createOrGetPrimitiveType("matchobject");
     const Type Type::RANGE = python::TypeFactory::instance().createOrGetPrimitiveType("range");
@@ -398,6 +400,9 @@ namespace python {
     }
 
     Type Type::keyType() const {
+        if(_hash == EMPTYDICT._hash || _hash == GENERICDICT._hash)
+            return PYOBJECT;
+
         assert(isDictionaryType() && _hash != EMPTYDICT._hash && _hash != GENERICDICT._hash);
         auto& factory = TypeFactory::instance();
         auto it = factory._typeMap.find(_hash);
@@ -415,6 +420,9 @@ namespace python {
     }
 
     Type Type::valueType() const {
+        if(_hash == EMPTYDICT._hash || _hash == GENERICDICT._hash)
+            return PYOBJECT;
+
         assert(isDictionaryType() && _hash != EMPTYDICT._hash && _hash != GENERICDICT._hash);
         auto& factory = TypeFactory::instance();
         auto it = factory._typeMap.find(_hash);
@@ -1115,8 +1123,15 @@ namespace python {
 
         // dictionary type
         if(aUnderlyingType.isDictionaryType() && bUnderlyingType.isDictionaryType()) {
+
+            // empty dict can be always upcasted to concrete dict
+            if(python::Type::EMPTYDICT == aUnderlyingType)
+                return bUnderlyingType;
+            if(python::Type::EMPTYDICT == bUnderlyingType)
+                return aUnderlyingType;
+
             auto key_t = unifyTypes(aUnderlyingType.keyType(), bUnderlyingType.keyType(), autoUpcast);
-            auto val_t = unifyTypes(aUnderlyingType.elementType(), bUnderlyingType.elementType(), autoUpcast);
+            auto val_t = unifyTypes(aUnderlyingType.valueType(), bUnderlyingType.valueType(), autoUpcast);
             if(key_t == python::Type::UNKNOWN || val_t == python::Type::UNKNOWN) {
                 return python::Type::UNKNOWN;
             }

From 64901a1ed44e577437133cf68b55bf83c1051313 Mon Sep 17 00:00:00 2001
From: Leonhard Spiegelberg <leonhard_spiegelberg@brown.edu>
Date: Wed, 27 Jul 2022 16:15:37 -0400
Subject: [PATCH 14/23] stubs

---
 tuplex/codegen/include/SymbolTable.h | 12 +++++++---
 tuplex/codegen/src/SymbolTable.cc    | 21 +++++++++-------
 tuplex/test/core/DictionaryTyping.cc | 36 ++++++++++++++++++++++++++++
 3 files changed, 58 insertions(+), 11 deletions(-)

diff --git a/tuplex/codegen/include/SymbolTable.h b/tuplex/codegen/include/SymbolTable.h
index 3b14a721d..0a6b3854d 100644
--- a/tuplex/codegen/include/SymbolTable.h
+++ b/tuplex/codegen/include/SymbolTable.h
@@ -171,17 +171,23 @@ namespace tuplex {
          * add an attribute to a builtin type, e.g. str.lower
          * @param builtinType
          * @param name
-         * @param type
+         * @param type type of the attribute, i.e. if it is a function type then a function symbol will be added.
+         * If it is not a function type a variable symbol will be added.
          */
-        void addBuiltinTypeAttribute(const python::Type& builtinType, const std::string& name, const python::Type& type);
+        void addBuiltinTypeAttribute(const python::Type& builtinType,
+                                     const std::string& name,
+                                     const python::Type& type);
 
         /*!
          * add an attribute to a builtin type, e.g. dict.keys()
          * @param builtinType to which type to add the function
          * @param name name of the attribute
          * @param typer a dynamic typing function
+         * @param sym_type what kind of symbol it is (function? variable?), needed because typer works for both.
          */
-        void addBuiltinTypeAttribute(const python::Type& builtinType, const std::string& name, std::function<python::Type(const python::Type&)> typer);
+        void addBuiltinTypeAttribute(const python::Type& builtinType, const std::string& name,
+                                     std::function<python::Type(const python::Type&)> typer,
+                                     const SymbolType& sym_type);
 
         /*!
          * checks whether a symbol can be looked up or not
diff --git a/tuplex/codegen/src/SymbolTable.cc b/tuplex/codegen/src/SymbolTable.cc
index cfa9e3f84..5af167db9 100644
--- a/tuplex/codegen/src/SymbolTable.cc
+++ b/tuplex/codegen/src/SymbolTable.cc
@@ -415,7 +415,7 @@ namespace tuplex {
                 std::cout<<"need to get concrete dict type here!"<<std::endl;
 
                 return python::Type::UNKNOWN;
-            });
+            }, SymbolType::FUNCTION);
         }
 
 
@@ -683,8 +683,11 @@ namespace tuplex {
     }
 
     void SymbolTable::addBuiltinTypeAttribute(const python::Type &builtinType, const std::string &name,
-                                              std::function<python::Type(const python::Type &)> typer) {
+                                              std::function<python::Type(const python::Type &)> typer,
+                                              const SymbolType& sym_type = SymbolType::VARIABLE) {
         using namespace std;
+        assert(sym_type == SymbolType::VARIABLE || sym_type == SymbolType::FUNCTION);
+
         // this seems wrong, need to perform the lookup directly...
         // use desc as name
         auto scope = currentScope();
@@ -696,12 +699,11 @@ namespace tuplex {
         }
         auto sym_att = it->second->findAttribute(name);
         if(!sym_att) {
-            it->second->addAttribute(make_shared<Symbol>(name, name, builtinType, builtinType.isFunctionType() ? SymbolType::FUNCTION : SymbolType::VARIABLE));
+            it->second->addAttribute(make_shared<Symbol>(name, name, builtinType, sym_type));
             sym_att = it->second->findAttribute(name);
         } else {
             // replace symbol, there can be only one symbol with a typer function
-            auto symbolType = builtinType.isFunctionType() ? SymbolType::FUNCTION : SymbolType::VARIABLE;
-            if(symbolType != sym_att->symbolType)
+            if(sym_type != sym_att->symbolType)
                 throw std::runtime_error("symbol can only have one kind of types associated with it!");
             assert(sym_att->qualifiedName == name);
             sym_att->name = name;
@@ -796,7 +798,10 @@ namespace tuplex {
         return python::Type::UNKNOWN;
     }
 
-    static python::Type typeAttribute(std::shared_ptr<Symbol> sym, std::string attribute, python::Type parameterType) {
+    static python::Type typeAttribute(std::shared_ptr<Symbol> sym,
+                                      std::string attribute,
+                                      python::Type parameterType,
+                                      python::Type objectType) {
         if(sym) {
             auto attr_sym = sym->findAttribute(attribute);
 
@@ -855,7 +860,7 @@ namespace tuplex {
         auto name = type.desc();
         auto sym = findSymbol(name);
 
-        resultType = typeAttribute(sym, attribute, parameterType);
+        resultType = typeAttribute(sym, attribute, parameterType, type);
         if(resultType != python::Type::UNKNOWN)
             return resultType;
 
@@ -869,7 +874,7 @@ namespace tuplex {
             if(type.isDictionaryType() || type == python::Type::EMPTYDICT)
                 name = python::Type::GENERICDICT.desc();
             sym = findSymbol(name);
-            resultType = typeAttribute(sym, attribute, parameterType);
+            resultType = typeAttribute(sym, attribute, parameterType, type);
         }
 
         return resultType;
diff --git a/tuplex/test/core/DictionaryTyping.cc b/tuplex/test/core/DictionaryTyping.cc
index be5d8cfc7..df8c532f5 100644
--- a/tuplex/test/core/DictionaryTyping.cc
+++ b/tuplex/test/core/DictionaryTyping.cc
@@ -628,5 +628,41 @@ TEST(DictionaryTyping, Count) {
 
     cout<<"return type of function is: "<<ast.getReturnType().desc()<<endl;
 
+    ASSERT_EQ(ast.getReturnType(), python::Type::makeDictionaryType(python::Type::STRING, python::Type::I64));
+}
+
+TEST(DictionaryTyping, KeyView) {
+    // expected to fail; need to add support for dict_keys
+    using namespace tuplex;
+    using namespace std;
+
+    // could also use list((10, 20, 30)) e.g., or tuple(list(...)) -> needs speculation.
+
+    // test count UDF
+//    auto count_c = "def count_keys(x):\n"
+//                   "    d = {'A':10, 'B': 10, x: 20}\n"
+//                   "    return list(d.keys())";
+    auto count_c = "def count_keys(x):\n"
+                   "    d = {'A':10, 'B': 10, x: 20}\n"
+                   "    return d.keys()";
+
+    // parse code to AST
+    auto ast = tuplex::codegen::AnnotatedAST();
+    ast.parseString(count_c);
+
+    // make typing
+    python::Type inputType = python::Type::STRING;
+
+    // create symbol table
+    ast.addTypeHint("x", inputType);
+    ast.defineTypes(codegen::DEFAULT_COMPILE_POLICY);
+
+    // print type annotated ast
+    GraphVizGraph graph;
+    graph.createFromAST(ast.getFunctionAST(), true);
+    graph.saveAsPDF("dict_count_keys.pdf");
+
+    cout<<"return type of function is: "<<ast.getReturnType().desc()<<endl;
+
     ASSERT_EQ(ast.getReturnType(), python::Type::makeDictionaryType(python::Type::STRING, python::Type::I64));
 }
\ No newline at end of file

From d4e1431199ea4320ab5be8dcedde8de7798ebf1e Mon Sep 17 00:00:00 2001
From: Leonhard Spiegelberg <leonhard_spiegelberg@brown.edu>
Date: Wed, 27 Jul 2022 16:17:45 -0400
Subject: [PATCH 15/23] fix

---
 tuplex/codegen/src/SymbolTable.cc | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/tuplex/codegen/src/SymbolTable.cc b/tuplex/codegen/src/SymbolTable.cc
index 5af167db9..8352ba317 100644
--- a/tuplex/codegen/src/SymbolTable.cc
+++ b/tuplex/codegen/src/SymbolTable.cc
@@ -412,6 +412,8 @@ namespace tuplex {
         {
             addBuiltinTypeAttribute(python::Type::GENERICDICT, "keys", [](const python::Type& parameterType) {
 
+                //  @TODO: @rhea once you changed the signature of the Lambda here, you should be abel to type correctly.
+                // I can give it a try to refactor everything better than. 
                 std::cout<<"need to get concrete dict type here!"<<std::endl;
 
                 return python::Type::UNKNOWN;
@@ -811,6 +813,8 @@ namespace tuplex {
                     // else, return single type
                     return attr_sym->type();
                 python::Type funcType = python::Type::UNKNOWN;
+
+                //  @TODO: @rhea -> change function here to include objectType as well and make typer a two parameter function
                 attr_sym->findFunctionTypeBasedOnParameterType(parameterType, funcType); // ignore ret value.
                 return funcType;
             }

From 3936c34acdc3f4e137c3955b9c709ff3f6a2d6fd Mon Sep 17 00:00:00 2001
From: Rhea Goyal <rhea_goyal@brown.edu>
Date: Wed, 27 Jul 2022 18:04:27 -0400
Subject: [PATCH 16/23] pulled dict proxy files

---
 tuplex/codegen/include/BuiltinDictProxy.h     |  90 +++++++++++++
 tuplex/codegen/include/BuiltinDictProxyImpl.h |  28 ++++
 tuplex/codegen/include/cJSONDictProxyImpl.h   |  59 +++++++++
 tuplex/core/src/cJSONDictProxyImpl.cc         |  69 ++++++++++
 tuplex/test/dict/CMakeLists.txt               |  17 +++
 tuplex/test/dict/DictProxyTest.cc             | 121 ++++++++++++++++++
 tuplex/test/dict/main.cc                      |  18 +++
 7 files changed, 402 insertions(+)
 create mode 100644 tuplex/codegen/include/BuiltinDictProxy.h
 create mode 100644 tuplex/codegen/include/BuiltinDictProxyImpl.h
 create mode 100644 tuplex/codegen/include/cJSONDictProxyImpl.h
 create mode 100644 tuplex/core/src/cJSONDictProxyImpl.cc
 create mode 100644 tuplex/test/dict/CMakeLists.txt
 create mode 100644 tuplex/test/dict/DictProxyTest.cc
 create mode 100644 tuplex/test/dict/main.cc

diff --git a/tuplex/codegen/include/BuiltinDictProxy.h b/tuplex/codegen/include/BuiltinDictProxy.h
new file mode 100644
index 000000000..cd5fb979d
--- /dev/null
+++ b/tuplex/codegen/include/BuiltinDictProxy.h
@@ -0,0 +1,90 @@
+//--------------------------------------------------------------------------------------------------------------------//
+//                                                                                                                    //
+//                                      Tuplex: Blazing Fast Python Data Science                                      //
+//                                                                                                                    //
+//                                                                                                                    //
+//  (c) 2017 - 2021, Tuplex team                                                                                      //
+//  Created by Leonhard Spiegelberg first on 8/9/2021                                                                 //
+//  License: Apache 2.0                                                                                               //
+//--------------------------------------------------------------------------------------------------------------------//
+#ifndef TUPLEX_BUILTINDICTPROXY_H
+#define TUPLEX_BUILTINDICTPROXY_H
+
+#include <memory>
+
+#include <TypeSystem.h>
+#include <BuiltinDictProxyImpl.h>
+#include <cJSONDictProxyImpl.h>
+
+// TODO: Could also use a general object based system which would make things easier...
+// -> i.e., sequence protocol strings/lists/...
+
+// basically for each object we need
+// 1.) representation as C++ object (field)
+// 2.) code-generated logic (i.e., codegen specialization)
+// 3.) to/from python object
+
+namespace tuplex {
+    namespace codegen {
+        class BuiltinDictProxy {
+        public:
+            // BuiltinDictProxy (--> specializedDictType)
+            BuiltinDictProxy(const python::Type& specializedDictType) : _specializedType(specializedDictType) {
+                // use cJSON as default for now...
+                _impl = std::make_shared<cJSONDictProxyImpl>();
+            }
+
+            // use both codegen/non-codegen version
+            // putItem
+            BuiltinDictProxy& putItem(const Field& key, const Field& value) { assert(_impl); _impl->putItem(key, value); return *this; }
+            BuiltinDictProxy& putItem(const python::Type& keyType, const SerializableValue& key, const python::Type& valueType, const SerializableValue& value) { assert(_impl); _impl->putItem(keyType, key, valueType, value); return *this; }
+
+//            // getItemß
+//            BuiltinDictProxy& getItem(const Field& key);
+//            BuiltinDictProxy& getItem(const python::Type& keyType, const SerializableValue& key);
+//
+//            // delItem
+//            BuiltinDictProxy& delItem(const Field& key);
+//            BuiltinDictProxy& delItem(const python::Type& keyType, const SerializableValue& key);
+//
+//            // allocSize() --> helpful when dict size is known upfront, can be used for optimization.
+//            BuiltinDictProxy& allocSize(llvm::Value* size);
+
+            // getKeyView() --> codegen object
+
+            // getValuesView() --> codegen object
+
+            python::Type dictType() const {
+                throw std::runtime_error("not yet implemented");
+            }
+
+            python::Type specializedDictType() const {
+                return _specializedType;
+            }
+
+            // codegenToMemory
+
+            // codegenFromMemory
+            // static function?
+
+            // codegenSerializedLength
+
+            // toMemory
+
+            // fromMemory
+            // static function?
+
+            // serializedLength
+        private:
+            python::Type _specializedType;
+
+            // implementation...
+            // -> cJSON
+            // -> ...
+            // -> ...
+            std::shared_ptr<BuiltinDictProxyImpl> _impl;
+        };
+    }
+}
+
+#endif //TUPLEX_BUILTINDICTPROXY_H
diff --git a/tuplex/codegen/include/BuiltinDictProxyImpl.h b/tuplex/codegen/include/BuiltinDictProxyImpl.h
new file mode 100644
index 000000000..7665f6cca
--- /dev/null
+++ b/tuplex/codegen/include/BuiltinDictProxyImpl.h
@@ -0,0 +1,28 @@
+//--------------------------------------------------------------------------------------------------------------------//
+//                                                                                                                    //
+//                                      Tuplex: Blazing Fast Python Data Science                                      //
+//                                                                                                                    //
+//                                                                                                                    //
+//  (c) 2017 - 2021, Tuplex team                                                                                      //
+//  Created by Leonhard Spiegelberg first on 8/9/2021                                                                 //
+//  License: Apache 2.0                                                                                               //
+//--------------------------------------------------------------------------------------------------------------------//
+
+#ifndef TUPLEX_BUILTINDICTPROXYIMPL_H
+#define TUPLEX_BUILTINDICTPROXYIMPL_H
+
+#include <TypeSystem.h>
+#include <Row.h>
+#include <CodegenHelper.h>
+
+namespace tuplex {
+    namespace codegen {
+        class BuiltinDictProxyImpl {
+        public:
+            virtual void putItem(const Field& key, const Field& value) = 0;
+            virtual void putItem(const python::Type& keyType, const SerializableValue& key, const python::Type& valueType, const SerializableValue& value) = 0;
+        };
+    }
+}
+
+#endif //TUPLEX_BUILTINDICTPROXYIMPL_H
diff --git a/tuplex/codegen/include/cJSONDictProxyImpl.h b/tuplex/codegen/include/cJSONDictProxyImpl.h
new file mode 100644
index 000000000..a3218c3e6
--- /dev/null
+++ b/tuplex/codegen/include/cJSONDictProxyImpl.h
@@ -0,0 +1,59 @@
+//--------------------------------------------------------------------------------------------------------------------//
+//                                                                                                                    //
+//                                      Tuplex: Blazing Fast Python Data Science                                      //
+//                                                                                                                    //
+//                                                                                                                    //
+//  (c) 2017 - 2021, Tuplex team                                                                                      //
+//  Created by Leonhard Spiegelberg first on 8/9/2021                                                                 //
+//  License: Apache 2.0                                                                                               //
+//--------------------------------------------------------------------------------------------------------------------//
+#ifndef TUPLEX_CJSONDICTPROXYIMPL_H
+#define TUPLEX_CJSONDICTPROXYIMPL_H
+
+#ifdef BUILD_WITH_AWS
+#include <aws/core/external/cjson/cJSON.h>
+#else
+#include <cJSON.h>
+#endif
+#include "optional.h"
+#include <BuiltinDictProxyImpl.h>
+
+namespace tuplex {
+    namespace codegen {
+        class cJSONDictProxyImpl : public BuiltinDictProxyImpl {
+        public:
+            cJSONDictProxyImpl() : _root(nullptr) {}
+            ~cJSONDictProxyImpl() {
+                if(_root) {
+                    cJSON_free(_root);
+                    _root = nullptr;
+                }
+            }
+            cJSONDictProxyImpl(const cJSONDictProxyImpl& other) = delete;
+            cJSONDictProxyImpl& operator = (const cJSONDictProxyImpl& other) = delete;
+
+            void putItem(const Field& key, const Field& value) override;
+            void putItem(const python::Type& keyType, const SerializableValue& key, const python::Type& valueType, const SerializableValue& value) override;
+
+
+            // notes:
+            // for cJSON subscripting, need to perform
+            //  SerializableValue BlockGeneratorVisitor::subscriptCJSONDictionary(NSubscription *sub, SerializableValue index,
+            //                                                                          const python::Type &index_type,
+            //                                                                          SerializableValue value) {
+
+        private:
+            cJSON *_root;   // a map of the elements
+            cJSON *_typeMap; // a map of strings -> types (nested)
+
+            /*!
+            * returns a string representing a type prefix when storing type information in cJSON object as well.
+            * @param type
+            * @return
+            */
+            static std::string typePrefix(const python::Type& type);
+        };
+    }
+}
+
+#endif //TUPLEX_CJSONDICTPROXYIMPL_H
diff --git a/tuplex/core/src/cJSONDictProxyImpl.cc b/tuplex/core/src/cJSONDictProxyImpl.cc
new file mode 100644
index 000000000..edc701dd4
--- /dev/null
+++ b/tuplex/core/src/cJSONDictProxyImpl.cc
@@ -0,0 +1,69 @@
+//--------------------------------------------------------------------------------------------------------------------//
+//                                                                                                                    //
+//                                      Tuplex: Blazing Fast Python Data Science                                      //
+//                                                                                                                    //
+//                                                                                                                    //
+//  (c) 2017 - 2021, Tuplex team                                                                                      //
+//  Created by Leonhard Spiegelberg first on 8/9/2021                                                                 //
+//  License: Apache 2.0                                                                                               //
+//--------------------------------------------------------------------------------------------------------------------//
+#include <cJSONDictProxyImpl.h>
+
+namespace tuplex {
+    namespace codegen {
+
+
+        // in general cJSON supports following data types:
+        // string
+        // number
+        // boolean
+        // null
+        // object
+        // array
+        // --> yet type info from python might get lost. Hence, store it when possible as well!
+
+        // this is a general helper function to turn a Field into a cJSON object
+
+        /*!
+         * converts a field into a cJSON object. If not convertible, returns nullptr.
+         * @param f Field
+         * @param includeTypePrefix
+         * @return cJSON* object
+         */
+        cJSON* fieldToCJSON(const Field& f, bool includeTypePrefix=false) {
+
+            return nullptr;
+        }
+
+        Field cJSONToField(const cJSON* object) {
+            assert(object);
+
+            return Field::null();
+        }
+
+        std::string cJSONDictProxyImpl::typePrefix(const python::Type& type) {
+
+            // init map for a couple common types (int, float, bool, ...)
+
+            // since keys in JSON are always strings, need to store type info in that string!
+            return "";
+        }
+
+        void cJSONDictProxyImpl::putItem(const Field &key, const Field &value) {
+            // put into cJSON, yet due to both key/type being not necessary type stable, encode type as base64 into values!
+            // map primitive types directly into cJSON if possible
+            if(!_root)
+                _root = cJSON_CreateObject();
+
+            // type prefix
+
+            throw std::runtime_error("to implement...");
+        }
+
+        void cJSONDictProxyImpl::putItem(const python::Type &keyType, const SerializableValue &key,
+                                         const python::Type &valueType, const SerializableValue &value) {
+
+            throw std::runtime_error("to implement...");
+        }
+    }
+}
diff --git a/tuplex/test/dict/CMakeLists.txt b/tuplex/test/dict/CMakeLists.txt
new file mode 100644
index 000000000..6f3d18cef
--- /dev/null
+++ b/tuplex/test/dict/CMakeLists.txt
@@ -0,0 +1,17 @@
+CMAKE_MINIMUM_REQUIRED(VERSION 3.12 FATAL_ERROR)
+
+# enable c++14
+SET(CMAKE_CXX_STANDARD 14)
+
+FILE(GLOB SRCS *.cc)
+
+include(GoogleTest)
+
+ADD_EXECUTABLE(testutils ${SRCS})
+
+TARGET_LINK_LIBRARIES(testutils
+        libutils
+        ${GTest_LIBRARIES}
+        )
+
+gtest_add_tests(TARGET testutils TEST_PREFIX "")
\ No newline at end of file
diff --git a/tuplex/test/dict/DictProxyTest.cc b/tuplex/test/dict/DictProxyTest.cc
new file mode 100644
index 000000000..ae9b2aad6
--- /dev/null
+++ b/tuplex/test/dict/DictProxyTest.cc
@@ -0,0 +1,121 @@
+//--------------------------------------------------------------------------------------------------------------------//
+//                                                                                                                    //
+//                                      Tuplex: Blazing Fast Python Data Science                                      //
+//                                                                                                                    //
+//                                                                                                                    //
+//  (c) 2017 - 2021, Tuplex team                                                                                      //
+//  Created by Leonhard Spiegelberg first on 8/9/2021                                                                 //
+//  License: Apache 2.0                                                                                               //
+//--------------------------------------------------------------------------------------------------------------------//
+
+#include "TestUtils.h"
+#include <BuiltinDictProxy.h>
+
+class DictProxyTest : public PyTest {};
+
+
+// helper function to generate combinations with repititions
+template<typename T> void combinations_r_recursive(const std::vector<T> &elements, std::size_t combination_length,
+                              std::vector<unsigned long> &pos, unsigned long depth,
+                              unsigned long margin, std::vector<std::vector<T>>& result) {
+    // Have we selected the number of required elements?
+    if (depth >= combination_length) {
+        std::vector<T> combination;
+        combination.reserve(combination_length);
+        for(unsigned long ii = 0; ii < pos.size(); ++ii)
+            combination.push_back(elements[pos[ii]]);
+        combination.shrink_to_fit();
+        result.push_back(combination);
+        return;
+    }
+
+    // Try to select new elements to the right of the last selected one.
+    for (unsigned long ii = margin; ii < elements.size(); ++ii) {
+        pos[depth] = ii;
+        combinations_r_recursive(elements, combination_length, pos, depth + 1, ii, result);
+    }
+}
+
+template<typename T> std::vector<std::vector<T>> combinations_with_repetition(const std::vector<T> &elements, size_t combination_length) {
+    assert(combination_length <= elements.size());
+    std::vector<unsigned long> positions(combination_length, 0);
+    std::vector<std::vector<T>> result;
+    combinations_r_recursive(elements, combination_length, positions, 0, 0, result);
+
+    return result;
+}
+
+
+
+TEST_F(DictProxyTest, PutItemTest) {
+    using namespace tuplex;
+    using namespace std;
+
+    // testing the non-codegenerated put item test
+
+
+    // tests to write:
+
+    // 1. heterogenous dict -> basically use modified JSON as in-memory storage format.
+    // 2. homogenous keytype dict -> can encode dict directly & serialize it more efficiently. Represent in-memory as hash table specialized depending on type.
+    // 3. homogenous valuetype -> ignore case, specialize to 1.
+    // 4. compile-time known keys/restricted keyset, keys do not change. -> struct type with fixed offsets!
+
+    // put and get
+    auto dict_fun_code = "def f(a, b, c, d):\n"
+                         "    M = dict()\n"
+                         "    M[a] = b\n"
+                         "    M[c] = d\n"
+                         "    return M, M[a], M[c]\n";
+
+    codegen::BuiltinDictProxy dict_proxy(python::Type::UNKNOWN);
+
+    // create test setups (4 values, all combos)
+    vector<Field> test_values{Field((int64_t)0), Field(10.0), Field(false), Field::null(), Field("hello world"), Field(Tuple(10, 20)), Field(Tuple(3.141, 10, false, "test")), Field(List(1.0, 3.0, 4.0))};
+
+    // NOTE: list/dict is not hashable in python!
+    // 
+
+    // create combos
+    // 4 ^ len(test_values)
+
+
+    // what about nested dicts?
+    // -> unflatten?
+    // --> unflatten using combined keys? i.e. a/b/c ? which char to use as separator?
+    // maybe start with non-nested dicts.
+    // dicts should be able to store lists etc.
+
+    auto combos = combinations_with_repetition(test_values, 4);
+
+    cout<<"Generated "<<combos.size()<<" combinations."<<endl<<endl;
+    for(auto combo : combos) {
+        for(auto f : combo)
+            cout<<f.toPythonString()<<" ";
+        cout<<endl;
+    }
+
+    // this should a good variety of what things to store in dictionaries
+    for(int i = 0; i < std::pow(4, test_values.size()); ++i) {
+
+    }
+
+
+//
+//    dict_proxy.putItem(Field((int64_t)10), Field("test"));
+//    dict_proxy.putItem(Field((int64_t)20), Field("hello"));
+
+
+    // limited keyset, dynamic access etc.
+
+    // basically we need only a couple dictionary primitives:
+
+    // 1. fixed set of keys -> can be checked dynamically at runtime. I.e., good for read-only dictionaries, rarely changed ones. etc. --> requires dispatch dictionary for each type for dynamic types. Constants can be translated during compile time.
+    // -> because dicts support in syntax, need to keep additional bitmap to check whether there's a valid entry or not!
+
+
+    // 2. fixed key type/value type dicts -> can be used in dynamic settings. E.g., when accumulating things!
+
+    // 3. other usage should be esoteric...
+
+}
diff --git a/tuplex/test/dict/main.cc b/tuplex/test/dict/main.cc
new file mode 100644
index 000000000..af04e4577
--- /dev/null
+++ b/tuplex/test/dict/main.cc
@@ -0,0 +1,18 @@
+//--------------------------------------------------------------------------------------------------------------------//
+//                                                                                                                    //
+//                                      Tuplex: Blazing Fast Python Data Science                                      //
+//                                                                                                                    //
+//                                                                                                                    //
+//  (c) 2017 - 2021, Tuplex team                                                                                      //
+//  Created by Leonhard Spiegelberg first on 1/1/2021                                                                 //
+//  License: Apache 2.0                                                                                               //
+//--------------------------------------------------------------------------------------------------------------------//
+
+#include "gtest/gtest.h"
+
+int main(int argc, char **argv)
+{
+    ::testing::InitGoogleTest(&argc, argv);
+    int ret = RUN_ALL_TESTS();
+    return ret;
+}
\ No newline at end of file

From 510977a3fb95e0eca08adc234307ffebbcbad138 Mon Sep 17 00:00:00 2001
From: Leonhard Spiegelberg <leonhard_spiegelberg@brown.edu>
Date: Thu, 28 Jul 2022 14:37:33 -0400
Subject: [PATCH 17/23] wip

---
 tuplex/codegen/src/SymbolTable.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tuplex/codegen/src/SymbolTable.cc b/tuplex/codegen/src/SymbolTable.cc
index 8352ba317..9f8979b7a 100644
--- a/tuplex/codegen/src/SymbolTable.cc
+++ b/tuplex/codegen/src/SymbolTable.cc
@@ -413,7 +413,7 @@ namespace tuplex {
             addBuiltinTypeAttribute(python::Type::GENERICDICT, "keys", [](const python::Type& parameterType) {
 
                 //  @TODO: @rhea once you changed the signature of the Lambda here, you should be abel to type correctly.
-                // I can give it a try to refactor everything better than. 
+                // I can give it a try to refactor everything better than.
                 std::cout<<"need to get concrete dict type here!"<<std::endl;
 
                 return python::Type::UNKNOWN;

From b5407ebd9df7979ce0ee225e9dd119185f57f390 Mon Sep 17 00:00:00 2001
From: Rhea Goyal <rhea_goyal@brown.edu>
Date: Sat, 30 Jul 2022 21:02:13 -0400
Subject: [PATCH 18/23] pushing so I can write code locally

---
 tuplex/codegen/include/BuiltinDictProxy.h |  2 +-
 tuplex/codegen/src/SymbolTable.cc         | 28 +----------------
 tuplex/core/src/cJSONDictProxyImpl.cc     | 37 +++++++++++++++++++++--
 3 files changed, 37 insertions(+), 30 deletions(-)

diff --git a/tuplex/codegen/include/BuiltinDictProxy.h b/tuplex/codegen/include/BuiltinDictProxy.h
index cd5fb979d..a37f2a634 100644
--- a/tuplex/codegen/include/BuiltinDictProxy.h
+++ b/tuplex/codegen/include/BuiltinDictProxy.h
@@ -39,7 +39,7 @@ namespace tuplex {
             BuiltinDictProxy& putItem(const Field& key, const Field& value) { assert(_impl); _impl->putItem(key, value); return *this; }
             BuiltinDictProxy& putItem(const python::Type& keyType, const SerializableValue& key, const python::Type& valueType, const SerializableValue& value) { assert(_impl); _impl->putItem(keyType, key, valueType, value); return *this; }
 
-//            // getItemß
+//            // getItem
 //            BuiltinDictProxy& getItem(const Field& key);
 //            BuiltinDictProxy& getItem(const python::Type& keyType, const SerializableValue& key);
 //
diff --git a/tuplex/codegen/src/SymbolTable.cc b/tuplex/codegen/src/SymbolTable.cc
index 5af167db9..48b088d59 100644
--- a/tuplex/codegen/src/SymbolTable.cc
+++ b/tuplex/codegen/src/SymbolTable.cc
@@ -409,6 +409,7 @@ namespace tuplex {
 
         // for keys()/values() use generic dict and let symbol table create specialized type on the fly using
         // typer function
+        /** TODO: finish implementing! (c++ lambda to get correct result) **/
         {
             addBuiltinTypeAttribute(python::Type::GENERICDICT, "keys", [](const python::Type& parameterType) {
 
@@ -418,35 +419,8 @@ namespace tuplex {
             }, SymbolType::FUNCTION);
         }
 
-
         // i.e. type depending on input
 
-//        // typer function for dict.keys() and dict.values()
-//        // this currently doesn't handle empty dicts...
-//        std::vector<python::Type> all_types = {python::Type::BOOLEAN, python::Type::I64, python::Type::F64,
-//                                                python::Type::STRING, python::Type::PYOBJECT};
-//        for (const auto &t1 : all_types) {
-//            for (const auto &t2 : all_types) {
-//
-//                auto dict_type = python::Type::makeDictionaryType(t1, t2);
-//
-//                // create specialized dict type
-//                auto dict_sym = std::make_shared<Symbol>(dict_type.desc(), "dictionary", t1, SymbolType::TYPE);
-//                // add here symbol so other functions can be easily added.
-//                addSymbol(dict_sym);
-//
-//                // dict_keys
-//                auto keys_sym = std::make_shared<Symbol>("keys", python::Type::makeFunctionType(python::Type::EMPTYTUPLE, python::Type::makeDictKeysType(dict_type.keyType())));
-//                dict_sym->addAttribute(keys_sym);
-//
-//                // dict_keys
-//                auto values_sym = std::make_shared<Symbol>("values", python::Type::makeFunctionType(python::Type::EMPTYTUPLE, python::Type::makeDictValuesType(dict_type.valueType())));
-//                dict_sym->addAttribute(values_sym);
-//            }
-//        }
-
-        // addBuiltinTypeAttribute(python::Type::EMPTYDICT, "keys", python::Type::makeFunctionType(python::Type::EMPTYTUPLE, ???));
-
         // for pop/popitem things are actually a bit more complicated...
         // i.e. the default keyword may introduce an issue...
         // https://www.programiz.com/python-programming/methods/dictionary/pop
diff --git a/tuplex/core/src/cJSONDictProxyImpl.cc b/tuplex/core/src/cJSONDictProxyImpl.cc
index edc701dd4..5ecc9f1ba 100644
--- a/tuplex/core/src/cJSONDictProxyImpl.cc
+++ b/tuplex/core/src/cJSONDictProxyImpl.cc
@@ -23,7 +23,6 @@ namespace tuplex {
         // --> yet type info from python might get lost. Hence, store it when possible as well!
 
         // this is a general helper function to turn a Field into a cJSON object
-
         /*!
          * converts a field into a cJSON object. If not convertible, returns nullptr.
          * @param f Field
@@ -54,14 +53,48 @@ namespace tuplex {
             // map primitive types directly into cJSON if possible
             if(!_root)
                 _root = cJSON_CreateObject();
+            
+            cJSON* to_add; // = cJSON_CreateNull();
+
+            // check type of value, create corresponding cJSON type object
+            if (value.getType() == python::Type::BOOLEAN) {
+                if (value.getInt() > 0) {
+                    to_add = cJSON_CreateTrue();
+                } else {
+                    to_add = cJSON_CreateFalse();
+                }
+            } else if (value.getType() == python::Type::F64) {
+                to_add = cJSON_CreateNumber(value.getDouble());
+            } else if (value.getType() == python::Type::I64) {
+                // should I be upcasting?
+                to_add = cJSON_CreateNumber((double)value.getInt());
+            } else if (value.getType() == python::Type::STRING) {
+                to_add = cJSON_CreateString((const char*)value.getPtr());
+            } else if (value.getType().isTupleType()) {
+                assert(value.getPtr());
+
+                std::tuple* tup = (std::tuple*)value.getPtr();
+                to_add = cJSON_CreateArray();
+                
+                for (auto i : tup) {
+                    
+                }
+            } else {
+                throw std::runtime_error("cannot put value with type " + value.getType().desc() + " into cJSON object");
+            }
+
+            // add to cJSON object
+            cJSON_AddItemToObject(_root, key.desc().c_str(), to_add);
 
             // type prefix
 
-            throw std::runtime_error("to implement...");
+            // throw std::runtime_error("to implement...");
         }
 
         void cJSONDictProxyImpl::putItem(const python::Type &keyType, const SerializableValue &key,
                                          const python::Type &valueType, const SerializableValue &value) {
+            if(!_root)
+                _root = cJSON_CreateObject();
 
             throw std::runtime_error("to implement...");
         }

From 607592f5bb1b4773d50d28af5d58489721a0d7cd Mon Sep 17 00:00:00 2001
From: Rhea Goyal <rhea_goyal@brown.edu>
Date: Fri, 12 Aug 2022 06:47:23 -0400
Subject: [PATCH 19/23] basic tests passing for cJSON dict proxy

---
 tuplex/codegen/include/BuiltinDictProxyImpl.h |  15 +-
 tuplex/codegen/include/cJSONDictProxyImpl.h   |  15 +-
 tuplex/codegen/src/cJSONDictProxyImpl.cc      | 206 ++++++++++++++++++
 tuplex/core/src/cJSONDictProxyImpl.cc         | 102 ---------
 tuplex/test/CMakeLists.txt                    |   1 +
 tuplex/test/dict/CMakeLists.txt               |  11 +-
 tuplex/test/dict/DictProxyTest.cc             | 173 ++++++++-------
 tuplex/test/dict/cJSONTest.cc                 | 125 +++++++++++
 8 files changed, 454 insertions(+), 194 deletions(-)
 create mode 100644 tuplex/codegen/src/cJSONDictProxyImpl.cc
 delete mode 100644 tuplex/core/src/cJSONDictProxyImpl.cc
 create mode 100644 tuplex/test/dict/cJSONTest.cc

diff --git a/tuplex/codegen/include/BuiltinDictProxyImpl.h b/tuplex/codegen/include/BuiltinDictProxyImpl.h
index 7665f6cca..6defe6d90 100644
--- a/tuplex/codegen/include/BuiltinDictProxyImpl.h
+++ b/tuplex/codegen/include/BuiltinDictProxyImpl.h
@@ -19,10 +19,23 @@ namespace tuplex {
     namespace codegen {
         class BuiltinDictProxyImpl {
         public:
+            // Q: what does virtual do ?
             virtual void putItem(const Field& key, const Field& value) = 0;
             virtual void putItem(const python::Type& keyType, const SerializableValue& key, const python::Type& valueType, const SerializableValue& value) = 0;
+
+            virtual bool keyExists(const Field& key) = 0;
+
+            virtual Field getItem(const Field& key) = 0;
+
+            virtual void replaceItem(const Field& key, const Field& value) = 0;
+
+            virtual void deleteItem(const Field& key) = 0;
+
+            // virtual void getKeyView() = 0;
+
+            // virtual void getValuesView() = 0;
         };
     }
 }
 
-#endif //TUPLEX_BUILTINDICTPROXYIMPL_H
+#endif //TUPLEX_BUILTINDICTPROXYIMPL_H
\ No newline at end of file
diff --git a/tuplex/codegen/include/cJSONDictProxyImpl.h b/tuplex/codegen/include/cJSONDictProxyImpl.h
index a3218c3e6..cb461ea7c 100644
--- a/tuplex/codegen/include/cJSONDictProxyImpl.h
+++ b/tuplex/codegen/include/cJSONDictProxyImpl.h
@@ -22,7 +22,9 @@ namespace tuplex {
     namespace codegen {
         class cJSONDictProxyImpl : public BuiltinDictProxyImpl {
         public:
-            cJSONDictProxyImpl() : _root(nullptr) {}
+            // cJSONDictProxyImpl() : _root(nullptr) {}
+            // is there a reason we want to separate the initialisation of cjsondictproxy objects and the actual cjson object?
+            cJSONDictProxyImpl() : _root(cJSON_CreateObject()) {}
             ~cJSONDictProxyImpl() {
                 if(_root) {
                     cJSON_free(_root);
@@ -35,6 +37,17 @@ namespace tuplex {
             void putItem(const Field& key, const Field& value) override;
             void putItem(const python::Type& keyType, const SerializableValue& key, const python::Type& valueType, const SerializableValue& value) override;
 
+            bool keyExists(const Field& key) override;
+
+            Field getItem(const Field& key) override;
+
+            void replaceItem(const Field& key, const Field& value) override;
+
+            void deleteItem(const Field& key) override;
+
+            // void getKeyView() override;
+
+            // void getValuesView() override;
 
             // notes:
             // for cJSON subscripting, need to perform
diff --git a/tuplex/codegen/src/cJSONDictProxyImpl.cc b/tuplex/codegen/src/cJSONDictProxyImpl.cc
new file mode 100644
index 000000000..d0a2d634b
--- /dev/null
+++ b/tuplex/codegen/src/cJSONDictProxyImpl.cc
@@ -0,0 +1,206 @@
+//--------------------------------------------------------------------------------------------------------------------//
+//                                                                                                                    //
+//                                      Tuplex: Blazing Fast Python Data Science                                      //
+//                                                                                                                    //
+//                                                                                                                    //
+//  (c) 2017 - 2021, Tuplex team                                                                                      //
+//  Created by Leonhard Spiegelberg first on 8/9/2021                                                                 //
+//  License: Apache 2.0                                                                                               //
+//--------------------------------------------------------------------------------------------------------------------//
+#include <cJSONDictProxyImpl.h>
+
+namespace tuplex {
+    namespace codegen {
+        // in general cJSON supports following data types:
+        // string
+        // number
+        // boolean
+        // null
+        // object
+        // array
+        // --> yet type info from python might get lost. Hence, store it when possible as well!
+
+        // this is a general helper function to turn a Field into a cJSON object
+        /*!
+         * converts a field into a cJSON object. If not convertible, returns nullptr.
+         * @param f Field
+         * @param includeTypePrefix
+         * @return cJSON* object
+         */
+        cJSON* fieldToCJSON(const Field& f, bool includeTypePrefix=false) {
+            // initialise cJSON object
+            cJSON* cjson_obj = nullptr;
+
+            // check type of Field, create corresponding cJSON type object
+            if (f.getType() == python::Type::BOOLEAN) {
+                if (f.getInt() > 0) {
+                    cjson_obj = cJSON_CreateTrue();
+                } else {
+                    cjson_obj = cJSON_CreateFalse();
+                }
+            } else if (f.getType() == python::Type::F64) {
+                cjson_obj = cJSON_CreateNumber(f.getDouble());
+            } else if (f.getType() == python::Type::I64) {
+                // should I be upcasting?
+                cjson_obj = cJSON_CreateNumber((double)f.getInt());       
+            } else if (f.getType() == python::Type::STRING) {
+                assert(f.getPtr());
+                cjson_obj = cJSON_CreateString((const char*)f.getPtr());
+            } else if (f.getType().isListType()) {
+                assert(f.getPtr());
+
+                tuplex::List* lis = (tuplex::List*)f.getPtr();
+                cjson_obj = cJSON_CreateArray();
+                
+                for (int i = 0; i < lis->numElements(); i++) {
+                    // retrieve ith element from list
+                    Field element = lis->getField(i);
+                    // convert to cJSON object
+                    cJSON* cjson_elt = fieldToCJSON(element);
+
+                    // add element to cJSON array
+                    cJSON_AddItemToArray(cjson_obj, cjson_elt);
+                }
+            } else if (f.getType().isTupleType()) {
+                assert(f.getPtr());
+
+                tuplex::Tuple* tup = (tuplex::Tuple*)f.getPtr();
+                cjson_obj = cJSON_CreateArray();
+                
+                for (int i = 0; i < tup->numElements(); i++) {
+                    // retrieve ith element from tuple
+                    Field element = tup->getField(i);
+                    // convert to cJSON object
+                    cJSON* cjson_elt = fieldToCJSON(element);
+
+                    // add element to cJSON array
+                    cJSON_AddItemToArray(cjson_obj, cjson_elt);
+                }
+            } else if (f.getType() == python::Type::NULLVALUE) {
+                cjson_obj = cJSON_CreateNull();
+            } else {
+                // throw std::runtime_error("cannot change value with type " + value.getType().desc() + " into cJSON object");
+            }
+
+            return cjson_obj;
+        }
+
+        Field cJSONToField(const cJSON* object) {
+            assert(object);
+
+            Field ret = Field::null();
+
+            if (cJSON_IsNumber(object)) {
+                ret = Field(cJSON_GetNumberValue(object));
+            } else if (cJSON_IsString(object)) {
+                ret = Field(cJSON_GetStringValue(object));
+            } else if (cJSON_IsTrue(object)) {
+                ret = Field(true);
+            } else if (cJSON_IsFalse(object)) {
+                ret = Field(false);
+            } else if (cJSON_IsNull(object)) {
+                ret = Field::null();
+            } else if (cJSON_IsArray(object)) {
+                throw std::runtime_error("not yet implemented...");                
+            } else if (cJSON_IsObject(object)) {
+                throw std::runtime_error("not yet implemented...");
+            }
+
+            return ret;
+        }
+
+        std::string cJSONDictProxyImpl::typePrefix(const python::Type& type) {
+
+            // init map for a couple common types (int, float, bool, ...)
+
+            // since keys in JSON are always strings, need to store type info in that string!
+            return "";
+        }
+
+        void cJSONDictProxyImpl::putItem(const Field &key, const Field &value) {
+            // put into cJSON, yet due to both key/type being not necessary type stable, encode type as base64 into values!
+            // map primitive types directly into cJSON if possible
+            if(!_root)
+                // _root = cJSON_CreateObject();
+                throw std::runtime_error("cannot use putItem on an uninitialised dictionary");
+            
+            cJSON* to_add = fieldToCJSON(value);
+            if (!to_add) {
+                throw std::runtime_error("item to add not convertible to cJSON object");
+            }
+
+            // add to cJSON object
+            // TODO: what's the difference between key.desc and getting the key's ptr value?
+            // A: key.desc gets the string of the Field regardless of the type of the Field
+            cJSON_AddItemToObject(_root, key.desc().c_str(), to_add);
+
+            // type prefix
+
+            // throw std::runtime_error("to implement...");
+        }
+        
+        void cJSONDictProxyImpl::putItem(const python::Type &keyType, const SerializableValue &key,
+                                         const python::Type &valueType, const SerializableValue &value) {
+            if(!_root)
+                _root = cJSON_CreateObject();
+
+            throw std::runtime_error("to implement...");
+        }
+
+        bool cJSONDictProxyImpl::keyExists(const Field& key) {
+            if(!_root)
+                throw std::runtime_error("cannot use keyExists on an uninitialised dictionary");
+            
+            cJSON* res = cJSON_GetObjectItemCaseSensitive(_root, key.desc().c_str());
+
+            return (res != NULL);
+        }
+
+        Field cJSONDictProxyImpl::getItem(const Field& key) {
+            if (!_root)
+                throw std::runtime_error("cannot use getItem on an uninitialised dictionary");
+            
+            // retrieve value from dict
+            cJSON* item = cJSON_GetObjectItemCaseSensitive(_root, key.desc().c_str());
+
+            if (!item)
+                throw std::runtime_error("error retrieving value from cJSON dictionary");
+
+            // convert into Field
+            Field field_item = cJSONToField(item);
+
+            return field_item;
+        }
+
+        void cJSONDictProxyImpl::replaceItem(const Field& key, const Field& value) {
+            if (!_root)
+                throw std::runtime_error("cannot use replaceItem on an uninitialised dictionary");
+            
+            // assert(key.getType() == python::Type::STRING);
+
+            // attempt to retrieve value from dict
+            cJSON* item = cJSON_GetObjectItemCaseSensitive(_root, key.desc().c_str());
+
+            if (!item) {
+                // key doesn't already exist; simply perform putItem instead (?)
+                putItem(key, value);
+            } else {
+                // replace value at key
+                cJSON* new_item = fieldToCJSON(value);
+                if (!new_item) {
+                    throw std::runtime_error("new item not convertible to cJSON object");
+                }
+
+                cJSON_ReplaceItemInObjectCaseSensitive(_root, key.desc().c_str(), new_item);
+            }
+        }
+
+        void cJSONDictProxyImpl::deleteItem(const Field& key) {
+            if (!_root)
+                throw std::runtime_error("cannot use deleteItem on an uninitialised dictionary");
+            
+            // delete value from dict
+            cJSON_DeleteItemFromObjectCaseSensitive(_root, (const char*)key.desc().c_str());
+        }
+    }
+}
diff --git a/tuplex/core/src/cJSONDictProxyImpl.cc b/tuplex/core/src/cJSONDictProxyImpl.cc
deleted file mode 100644
index 5ecc9f1ba..000000000
--- a/tuplex/core/src/cJSONDictProxyImpl.cc
+++ /dev/null
@@ -1,102 +0,0 @@
-//--------------------------------------------------------------------------------------------------------------------//
-//                                                                                                                    //
-//                                      Tuplex: Blazing Fast Python Data Science                                      //
-//                                                                                                                    //
-//                                                                                                                    //
-//  (c) 2017 - 2021, Tuplex team                                                                                      //
-//  Created by Leonhard Spiegelberg first on 8/9/2021                                                                 //
-//  License: Apache 2.0                                                                                               //
-//--------------------------------------------------------------------------------------------------------------------//
-#include <cJSONDictProxyImpl.h>
-
-namespace tuplex {
-    namespace codegen {
-
-
-        // in general cJSON supports following data types:
-        // string
-        // number
-        // boolean
-        // null
-        // object
-        // array
-        // --> yet type info from python might get lost. Hence, store it when possible as well!
-
-        // this is a general helper function to turn a Field into a cJSON object
-        /*!
-         * converts a field into a cJSON object. If not convertible, returns nullptr.
-         * @param f Field
-         * @param includeTypePrefix
-         * @return cJSON* object
-         */
-        cJSON* fieldToCJSON(const Field& f, bool includeTypePrefix=false) {
-
-            return nullptr;
-        }
-
-        Field cJSONToField(const cJSON* object) {
-            assert(object);
-
-            return Field::null();
-        }
-
-        std::string cJSONDictProxyImpl::typePrefix(const python::Type& type) {
-
-            // init map for a couple common types (int, float, bool, ...)
-
-            // since keys in JSON are always strings, need to store type info in that string!
-            return "";
-        }
-
-        void cJSONDictProxyImpl::putItem(const Field &key, const Field &value) {
-            // put into cJSON, yet due to both key/type being not necessary type stable, encode type as base64 into values!
-            // map primitive types directly into cJSON if possible
-            if(!_root)
-                _root = cJSON_CreateObject();
-            
-            cJSON* to_add; // = cJSON_CreateNull();
-
-            // check type of value, create corresponding cJSON type object
-            if (value.getType() == python::Type::BOOLEAN) {
-                if (value.getInt() > 0) {
-                    to_add = cJSON_CreateTrue();
-                } else {
-                    to_add = cJSON_CreateFalse();
-                }
-            } else if (value.getType() == python::Type::F64) {
-                to_add = cJSON_CreateNumber(value.getDouble());
-            } else if (value.getType() == python::Type::I64) {
-                // should I be upcasting?
-                to_add = cJSON_CreateNumber((double)value.getInt());
-            } else if (value.getType() == python::Type::STRING) {
-                to_add = cJSON_CreateString((const char*)value.getPtr());
-            } else if (value.getType().isTupleType()) {
-                assert(value.getPtr());
-
-                std::tuple* tup = (std::tuple*)value.getPtr();
-                to_add = cJSON_CreateArray();
-                
-                for (auto i : tup) {
-                    
-                }
-            } else {
-                throw std::runtime_error("cannot put value with type " + value.getType().desc() + " into cJSON object");
-            }
-
-            // add to cJSON object
-            cJSON_AddItemToObject(_root, key.desc().c_str(), to_add);
-
-            // type prefix
-
-            // throw std::runtime_error("to implement...");
-        }
-
-        void cJSONDictProxyImpl::putItem(const python::Type &keyType, const SerializableValue &key,
-                                         const python::Type &valueType, const SerializableValue &value) {
-            if(!_root)
-                _root = cJSON_CreateObject();
-
-            throw std::runtime_error("to implement...");
-        }
-    }
-}
diff --git a/tuplex/test/CMakeLists.txt b/tuplex/test/CMakeLists.txt
index 3f3721780..3e422d54b 100755
--- a/tuplex/test/CMakeLists.txt
+++ b/tuplex/test/CMakeLists.txt
@@ -78,6 +78,7 @@ add_subdirectory(io)
 add_subdirectory(runtime)
 add_subdirectory(adapters)
 add_subdirectory(utils)
+add_subdirectory(dict)
 
 # these require python, so only if embed is active!
 if(Python3_Embed_FOUND)
diff --git a/tuplex/test/dict/CMakeLists.txt b/tuplex/test/dict/CMakeLists.txt
index 6f3d18cef..9e3c566c9 100644
--- a/tuplex/test/dict/CMakeLists.txt
+++ b/tuplex/test/dict/CMakeLists.txt
@@ -7,11 +7,16 @@ FILE(GLOB SRCS *.cc)
 
 include(GoogleTest)
 
-ADD_EXECUTABLE(testutils ${SRCS})
+ADD_EXECUTABLE(testdict ${SRCS})
 
-TARGET_LINK_LIBRARIES(testutils
+TARGET_LINK_LIBRARIES(testdict
         libutils
+        libcodegen
+        libcpythonadapter
+        libio
         ${GTest_LIBRARIES}
+        ${AWSSDK_LINK_LIBRARIES}
+        ${Python3_LIBRARIES}
         )
 
-gtest_add_tests(TARGET testutils TEST_PREFIX "")
\ No newline at end of file
+gtest_add_tests(TARGET testdict TEST_PREFIX "")
\ No newline at end of file
diff --git a/tuplex/test/dict/DictProxyTest.cc b/tuplex/test/dict/DictProxyTest.cc
index ae9b2aad6..d74f989cd 100644
--- a/tuplex/test/dict/DictProxyTest.cc
+++ b/tuplex/test/dict/DictProxyTest.cc
@@ -1,121 +1,120 @@
-//--------------------------------------------------------------------------------------------------------------------//
-//                                                                                                                    //
-//                                      Tuplex: Blazing Fast Python Data Science                                      //
-//                                                                                                                    //
-//                                                                                                                    //
-//  (c) 2017 - 2021, Tuplex team                                                                                      //
-//  Created by Leonhard Spiegelberg first on 8/9/2021                                                                 //
-//  License: Apache 2.0                                                                                               //
-//--------------------------------------------------------------------------------------------------------------------//
+// //--------------------------------------------------------------------------------------------------------------------//
+// //                                                                                                                    //
+// //                                      Tuplex: Blazing Fast Python Data Science                                      //
+// //                                                                                                                    //
+// //                                                                                                                    //
+// //  (c) 2017 - 2021, Tuplex team                                                                                      //
+// //  Created by Leonhard Spiegelberg first on 8/9/2021                                                                 //
+// //  License: Apache 2.0                                                                                               //
+// //--------------------------------------------------------------------------------------------------------------------//
 
-#include "TestUtils.h"
-#include <BuiltinDictProxy.h>
+// #include <BuiltinDictProxy.h>
+// #include "gtest/gtest.h"
 
-class DictProxyTest : public PyTest {};
+// class DictProxyTest : public TuplexTest {};
 
+// // helper function to generate combinations with repititions
+// template<typename T> void combinations_r_recursive(const std::vector<T> &elements, std::size_t combination_length,
+//                               std::vector<unsigned long> &pos, unsigned long depth,
+//                               unsigned long margin, std::vector<std::vector<T>>& result) {
+//     // Have we selected the number of required elements?
+//     if (depth >= combination_length) {
+//         std::vector<T> combination;
+//         combination.reserve(combination_length);
+//         for(unsigned long ii = 0; ii < pos.size(); ++ii)
+//             combination.push_back(elements[pos[ii]]);
+//         combination.shrink_to_fit();
+//         result.push_back(combination);
+//         return;
+//     }
 
-// helper function to generate combinations with repititions
-template<typename T> void combinations_r_recursive(const std::vector<T> &elements, std::size_t combination_length,
-                              std::vector<unsigned long> &pos, unsigned long depth,
-                              unsigned long margin, std::vector<std::vector<T>>& result) {
-    // Have we selected the number of required elements?
-    if (depth >= combination_length) {
-        std::vector<T> combination;
-        combination.reserve(combination_length);
-        for(unsigned long ii = 0; ii < pos.size(); ++ii)
-            combination.push_back(elements[pos[ii]]);
-        combination.shrink_to_fit();
-        result.push_back(combination);
-        return;
-    }
+//     // Try to select new elements to the right of the last selected one.
+//     for (unsigned long ii = margin; ii < elements.size(); ++ii) {
+//         pos[depth] = ii;
+//         combinations_r_recursive(elements, combination_length, pos, depth + 1, ii, result);
+//     }
+// }
 
-    // Try to select new elements to the right of the last selected one.
-    for (unsigned long ii = margin; ii < elements.size(); ++ii) {
-        pos[depth] = ii;
-        combinations_r_recursive(elements, combination_length, pos, depth + 1, ii, result);
-    }
-}
+// template<typename T> std::vector<std::vector<T>> combinations_with_repetition(const std::vector<T> &elements, size_t combination_length) {
+//     assert(combination_length <= elements.size());
+//     std::vector<unsigned long> positions(combination_length, 0);
+//     std::vector<std::vector<T>> result;
+//     combinations_r_recursive(elements, combination_length, positions, 0, 0, result);
 
-template<typename T> std::vector<std::vector<T>> combinations_with_repetition(const std::vector<T> &elements, size_t combination_length) {
-    assert(combination_length <= elements.size());
-    std::vector<unsigned long> positions(combination_length, 0);
-    std::vector<std::vector<T>> result;
-    combinations_r_recursive(elements, combination_length, positions, 0, 0, result);
+//     return result;
+// }
 
-    return result;
-}
 
 
+// TEST_F(DictProxyTest, PutItemTest) {
+//     using namespace tuplex;
+//     using namespace std;
 
-TEST_F(DictProxyTest, PutItemTest) {
-    using namespace tuplex;
-    using namespace std;
+//     // testing the non-codegenerated put item test
 
-    // testing the non-codegenerated put item test
 
+//     // tests to write:
 
-    // tests to write:
+//     // 1. heterogenous dict -> basically use modified JSON as in-memory storage format.
+//     // 2. homogenous keytype dict -> can encode dict directly & serialize it more efficiently. Represent in-memory as hash table specialized depending on type.
+//     // 3. homogenous valuetype -> ignore case, specialize to 1.
+//     // 4. compile-time known keys/restricted keyset, keys do not change. -> struct type with fixed offsets!
 
-    // 1. heterogenous dict -> basically use modified JSON as in-memory storage format.
-    // 2. homogenous keytype dict -> can encode dict directly & serialize it more efficiently. Represent in-memory as hash table specialized depending on type.
-    // 3. homogenous valuetype -> ignore case, specialize to 1.
-    // 4. compile-time known keys/restricted keyset, keys do not change. -> struct type with fixed offsets!
+//     // put and get
+//     auto dict_fun_code = "def f(a, b, c, d):\n"
+//                          "    M = dict()\n"
+//                          "    M[a] = b\n"
+//                          "    M[c] = d\n"
+//                          "    return M, M[a], M[c]\n";
 
-    // put and get
-    auto dict_fun_code = "def f(a, b, c, d):\n"
-                         "    M = dict()\n"
-                         "    M[a] = b\n"
-                         "    M[c] = d\n"
-                         "    return M, M[a], M[c]\n";
+//     codegen::BuiltinDictProxy dict_proxy(python::Type::UNKNOWN);
 
-    codegen::BuiltinDictProxy dict_proxy(python::Type::UNKNOWN);
+//     // create test setups (4 values, all combos)
+//     vector<Field> test_values{Field((int64_t)0), Field(10.0), Field(false), Field::null(), Field("hello world"), Field(Tuple(10, 20)), Field(Tuple(3.141, 10, false, "test")), Field(List(1.0, 3.0, 4.0))};
 
-    // create test setups (4 values, all combos)
-    vector<Field> test_values{Field((int64_t)0), Field(10.0), Field(false), Field::null(), Field("hello world"), Field(Tuple(10, 20)), Field(Tuple(3.141, 10, false, "test")), Field(List(1.0, 3.0, 4.0))};
+//     // NOTE: list/dict is not hashable in python!
+//     // 
 
-    // NOTE: list/dict is not hashable in python!
-    // 
+//     // create combos
+//     // 4 ^ len(test_values)
 
-    // create combos
-    // 4 ^ len(test_values)
 
+//     // what about nested dicts?
+//     // -> unflatten?
+//     // --> unflatten using combined keys? i.e. a/b/c ? which char to use as separator?
+//     // maybe start with non-nested dicts.
+//     // dicts should be able to store lists etc.
 
-    // what about nested dicts?
-    // -> unflatten?
-    // --> unflatten using combined keys? i.e. a/b/c ? which char to use as separator?
-    // maybe start with non-nested dicts.
-    // dicts should be able to store lists etc.
+//     auto combos = combinations_with_repetition(test_values, 4);
 
-    auto combos = combinations_with_repetition(test_values, 4);
+//     cout<<"Generated "<<combos.size()<<" combinations."<<endl<<endl;
+//     for(auto combo : combos) {
+//         for(auto f : combo)
+//             cout<<f.toPythonString()<<" ";
+//         cout<<endl;
+//     }
 
-    cout<<"Generated "<<combos.size()<<" combinations."<<endl<<endl;
-    for(auto combo : combos) {
-        for(auto f : combo)
-            cout<<f.toPythonString()<<" ";
-        cout<<endl;
-    }
+//     // this should a good variety of what things to store in dictionaries
+//     for(int i = 0; i < std::pow(4, test_values.size()); ++i) {
 
-    // this should a good variety of what things to store in dictionaries
-    for(int i = 0; i < std::pow(4, test_values.size()); ++i) {
+//     }
 
-    }
 
+// //
+// //    dict_proxy.putItem(Field((int64_t)10), Field("test"));
+// //    dict_proxy.putItem(Field((int64_t)20), Field("hello"));
 
-//
-//    dict_proxy.putItem(Field((int64_t)10), Field("test"));
-//    dict_proxy.putItem(Field((int64_t)20), Field("hello"));
 
+//     // limited keyset, dynamic access etc.
 
-    // limited keyset, dynamic access etc.
+//     // basically we need only a couple dictionary primitives:
 
-    // basically we need only a couple dictionary primitives:
+//     // 1. fixed set of keys -> can be checked dynamically at runtime. I.e., good for read-only dictionaries, rarely changed ones. etc. --> requires dispatch dictionary for each type for dynamic types. Constants can be translated during compile time.
+//     // -> because dicts support in syntax, need to keep additional bitmap to check whether there's a valid entry or not!
 
-    // 1. fixed set of keys -> can be checked dynamically at runtime. I.e., good for read-only dictionaries, rarely changed ones. etc. --> requires dispatch dictionary for each type for dynamic types. Constants can be translated during compile time.
-    // -> because dicts support in syntax, need to keep additional bitmap to check whether there's a valid entry or not!
 
+//     // 2. fixed key type/value type dicts -> can be used in dynamic settings. E.g., when accumulating things!
 
-    // 2. fixed key type/value type dicts -> can be used in dynamic settings. E.g., when accumulating things!
+//     // 3. other usage should be esoteric...
 
-    // 3. other usage should be esoteric...
-
-}
+// }
diff --git a/tuplex/test/dict/cJSONTest.cc b/tuplex/test/dict/cJSONTest.cc
new file mode 100644
index 000000000..22e0c0a00
--- /dev/null
+++ b/tuplex/test/dict/cJSONTest.cc
@@ -0,0 +1,125 @@
+//--------------------------------------------------------------------------------------------------------------------//
+//                                                                                                                    //
+//                                      Tuplex: Blazing Fast Python Data Science                                      //
+//                                                                                                                    //
+//                                                                                                                    //
+//  (c) 2017 - 2021, Tuplex team                                                                                      //
+//  Created by Leonhard Spiegelberg first on 8/9/2021                                                                 //
+//  License: Apache 2.0                                                                                               //
+//--------------------------------------------------------------------------------------------------------------------//
+
+#include <cJSONDictProxyImpl.h>
+#include "gtest/gtest.h"
+
+TEST(cJSONTest, PutItemTest) {
+    using namespace tuplex;
+    using namespace std;
+
+    // testing non-codegenerated put item
+    // initialise test dict
+    codegen::cJSONDictProxyImpl dict_proxy;
+
+    EXPECT_EQ(false, dict_proxy.keyExists(Field((int64_t)10)));
+
+    // put test values into test dict
+    dict_proxy.putItem(Field((int64_t)10), Field("a"));
+    dict_proxy.putItem(Field((int64_t)20), Field("b"));
+
+    EXPECT_EQ(true, dict_proxy.keyExists(Field((int64_t)10)));
+    EXPECT_EQ(true, dict_proxy.keyExists(Field((int64_t)20)));
+    EXPECT_EQ(false, dict_proxy.keyExists(Field((int64_t)30)));
+
+    dict_proxy.putItem(Field((int64_t)30), Field("c"));
+
+    EXPECT_EQ(true, dict_proxy.keyExists(Field((int64_t)20)));
+    EXPECT_EQ(true, dict_proxy.keyExists(Field((int64_t)30)));
+}
+
+TEST(cJSONTest, GetItemTest) {
+    using namespace tuplex;
+    using namespace std;
+
+    // testing non-codegenerated put item
+    // initialise test dict
+    codegen::cJSONDictProxyImpl dict_proxy;
+
+    // put test values into test dict
+    dict_proxy.putItem(Field((int64_t)10), Field("a"));
+    dict_proxy.putItem(Field((int64_t)20), Field("b"));
+
+    EXPECT_EQ(Field("a"), dict_proxy.getItem(Field((int64_t)10)));
+    EXPECT_EQ(Field("b"), dict_proxy.getItem(Field((int64_t)20)));
+    EXPECT_THROW(dict_proxy.getItem(Field((int64_t)30)), std::runtime_error);
+
+    dict_proxy.putItem(Field((int64_t)30), Field("c"));
+
+    EXPECT_EQ(Field("c"), dict_proxy.getItem(Field((int64_t)30)));
+}
+
+TEST(cJSONTest, DeleteItemTest) {
+    using namespace tuplex;
+    using namespace std;
+
+    // testing non-codegenerated put item
+    // initialise test dict
+    codegen::cJSONDictProxyImpl dict_proxy;
+
+    // put test values into test dict
+    dict_proxy.putItem(Field((int64_t)10), Field("a"));
+    dict_proxy.putItem(Field((int64_t)20), Field("b"));
+
+    EXPECT_EQ(Field("a"), dict_proxy.getItem(Field((int64_t)10)));
+    EXPECT_EQ(Field("b"), dict_proxy.getItem(Field((int64_t)20)));
+
+    dict_proxy.deleteItem(Field((int64_t)10));
+
+    EXPECT_EQ(false, dict_proxy.keyExists(Field((int64_t)10)));
+    EXPECT_EQ(true, dict_proxy.keyExists(Field((int64_t)20)));
+
+    dict_proxy.deleteItem(Field((int64_t)20));
+    dict_proxy.putItem(Field((int64_t)10), Field((int64_t)100));
+
+    Field res = dict_proxy.getItem(Field((int64_t)10));
+
+    // NOTE: expected result will be a double, bc I think cJSON stores all numbers as doubles
+    EXPECT_EQ(Field((double)100), dict_proxy.getItem(Field((int64_t)10)));
+    EXPECT_EQ(false, dict_proxy.keyExists(Field((int64_t)20)));
+}
+
+TEST(cJSONTest, ReplaceItemTest) {
+    using namespace tuplex;
+    using namespace std;
+
+    // testing non-codegenerated put item
+    // initialise test dict
+    codegen::cJSONDictProxyImpl dict_proxy;
+
+    // put test values into test dict
+    dict_proxy.putItem(Field((int64_t)10), Field("a"));
+    dict_proxy.putItem(Field((int64_t)20), Field("b"));
+
+    EXPECT_EQ(Field("a"), dict_proxy.getItem(Field((int64_t)10)));
+    EXPECT_EQ(Field("b"), dict_proxy.getItem(Field((int64_t)20)));
+
+    dict_proxy.replaceItem(Field((int64_t)10), Field("c"));
+
+    EXPECT_EQ(Field("c"), dict_proxy.getItem(Field((int64_t)10)));
+    EXPECT_EQ(Field("b"), dict_proxy.getItem(Field((int64_t)20)));
+
+    dict_proxy.putItem(Field((int64_t)30), Field("c"));
+
+    EXPECT_EQ(Field("c"), dict_proxy.getItem(Field((int64_t)10)));
+    EXPECT_EQ(Field("c"), dict_proxy.getItem(Field((int64_t)30)));
+    
+    dict_proxy.replaceItem(Field((int64_t)30), Field((int64_t)50));
+
+    // NOTE: expected result will be a double, bc I think cJSON stores all numbers as doubles
+    EXPECT_EQ(Field((double)50), dict_proxy.getItem(Field((int64_t)30)));
+}
+
+// tests to write:
+
+// 1. heterogenous dict -> basically use modified JSON as in-memory storage format.
+// 2. homogenous keytype dict -> can encode dict directly & serialize it more efficiently. Represent in-memory as hash table specialized depending on type.
+// 3. homogenous valuetype -> ignore case, specialize to 1.
+// 4. compile-time known keys/restricted keyset, keys do not change. -> struct type with fixed offsets!
\ No newline at end of file

From 234b59105ed8b92dcfc49b08d0e517044ce454ad Mon Sep 17 00:00:00 2001
From: Leonhard Spiegelberg <leonhard_spiegelberg@brown.edu>
Date: Mon, 15 Aug 2022 14:25:36 +0200
Subject: [PATCH 20/23] typing fixes

---
 tuplex/codegen/include/ASTAnnotation.h | 134 +++++++++++++++++--------
 tuplex/codegen/include/SymbolTable.h   |  15 ++-
 tuplex/codegen/src/SymbolTable.cc      |  60 +++++++++--
 tuplex/utils/include/TypeSystem.h      |   8 +-
 tuplex/utils/src/TypeSystem.cc         |  16 +--
 5 files changed, 168 insertions(+), 65 deletions(-)

diff --git a/tuplex/codegen/include/ASTAnnotation.h b/tuplex/codegen/include/ASTAnnotation.h
index 8512f4087..c26a0d7a0 100644
--- a/tuplex/codegen/include/ASTAnnotation.h
+++ b/tuplex/codegen/include/ASTAnnotation.h
@@ -48,8 +48,13 @@ class Symbol : public std::enable_shared_from_this<Symbol> {
     std::shared_ptr<Symbol> parent;
 
     ///! an optional abstract typer function which can be applied if the symboltype is function
+    ///! to deliver a concretely typed type based on the paramter type
     std::function<python::Type(const python::Type&)> functionTyper;
 
+    ///! an optional abstract typer that takes the original type of the caller (e.g., for an attribute)
+    ///! and provides then together with the parameterType symilar to functionTyper a concrete type for the attribute function
+    std::function<python::Type(const python::Type&,const python::Type&)> attributeFunctionTyper;
+
     ///! optionally constant data associated with that symbol
     tuplex::Field constantData;
 
@@ -90,55 +95,43 @@ class Symbol : public std::enable_shared_from_this<Symbol> {
         auto generic_result = functionTyper(parameterType);
         if(generic_result != python::Type::UNKNOWN) {
             specializedFunctionType = generic_result;
-
             assertFunctionDoesNotReturnGeneric(specializedFunctionType);
             return true;
         }
 
-        for(auto& type : types) {
-            // found symbol, now check its type
-            if(!type.isFunctionType())
-                continue;
-
-            auto tupleArgType = getTupleArg(type.getParamsType());
+        // typer did not yield a result, hence try stored funciton types incl. upcasting
+        return findStoredTypedFunction(parameterType, specializedFunctionType);
+    }
 
-            // check if there's a direct type match => use that function then!
-            if(parameterType == tupleArgType) {
-                specializedFunctionType = type;
-                assertFunctionDoesNotReturnGeneric(specializedFunctionType);
-                return true;
-            }
+    /*!
+     * the typing of an attribute which is a function may be based on both the callerType and the parameters. I.e.,
+     * this function helps to type an attribute x.a(p) where callerType = type(x) and parameterType = type(p) for some
+     * symbol a which is this.
+     * @param callerType
+     * @param parameterType
+     * @param specializedFunctionType where to store the concrete (non-generic!) output type!
+     * @return true if a specialized function type could be generated, false else.
+     */
+    inline bool findAttributeFunctionType(const python::Type& callerType,
+                                          const python::Type& parameterType,
+                                          python::Type& specializedFunctionType) {
+        // fallback based typing:
+        // 1. check attribute typer
+        // 2. check general typer
+        auto typed_result = attributeFunctionTyper(callerType, parameterType);
+        if(python::Type::UNKNOWN == typed_result) {
+            typed_result = functionTyper(parameterType);
         }
 
-        // no direct match was found. Check whether casting would work or partial matching.
-        for(auto& type : types) {
-            // found symbol, now check its type
-            if (!type.isFunctionType())
-                continue;
-
-            auto tupleArgType = getTupleArg(type.getParamsType());
-
-            // check if given parameters type is compatible with function type?
-            // actual invocation is with parameterType
-            // ==> can we upcast them to fit the defined one OR does is partially work?
-            // e.g., when the function is defined for NULL, but we have opt?
-            if (isTypeCompatible(parameterType, tupleArgType)) {
-                specializedFunctionType = type;
-
-                // specialize according to parameterType if it's a generic function so further typing works
-                assert(!specializedFunctionType.getReturnType().isGeneric());
-                if(specializedFunctionType.getParamsType().isGeneric()) {
-                    auto specializedParams = python::specializeGenerics(parameterType, tupleArgType);
-                    specializedFunctionType = python::Type::makeFunctionType(specializedParams,
-                                                                             specializedFunctionType.getReturnType());
-                }
-
-                assertFunctionDoesNotReturnGeneric(specializedFunctionType);
-                return true;
-            }
+        // check if result is valid, then take it
+        if(typed_result != python::Type::UNKNOWN) {
+            specializedFunctionType = typed_result;
+            assertFunctionDoesNotReturnGeneric(specializedFunctionType);
+            return true;
         }
 
-        return false;
+        // typer did not yield a result, hence try stored funciton types incl. upcasting
+        return findStoredTypedFunction(parameterType, specializedFunctionType);
     }
 
     /*!
@@ -169,7 +162,8 @@ class Symbol : public std::enable_shared_from_this<Symbol> {
         return full_name;
     }
 
-    Symbol() {}
+    Symbol() : functionTyper([](const python::Type&){return python::Type::UNKNOWN;}),
+               attributeFunctionTyper([](const python::Type&, const python::Type&){return python::Type::UNKNOWN;}) {}
     virtual ~Symbol()  {
         _attributes.clear();
         parent.reset();
@@ -216,17 +210,20 @@ class Symbol : public std::enable_shared_from_this<Symbol> {
 
     Symbol(std::string _name,
            std::function<python::Type(const python::Type&)> typer) : name(_name), qualifiedName(_name),
-           functionTyper(std::move(typer)), symbolType(SymbolType::FUNCTION) {}
+           functionTyper(std::move(typer)), attributeFunctionTyper([](const python::Type&, const python::Type&){return python::Type::UNKNOWN;}), symbolType(SymbolType::FUNCTION) {}
 
     Symbol(std::string _name, python::Type _type) : name(_name), qualifiedName(_name),
     types{_type},
-    symbolType(_type.isFunctionType() ? SymbolType::FUNCTION : SymbolType::VARIABLE), functionTyper([](const python::Type&) { return python::Type::UNKNOWN; }) {}
+    symbolType(_type.isFunctionType() ? SymbolType::FUNCTION : SymbolType::VARIABLE),
+    functionTyper([](const python::Type&) { return python::Type::UNKNOWN; }),
+    attributeFunctionTyper([](const python::Type&, const python::Type&){return python::Type::UNKNOWN;}) {}
 
     Symbol(std::string _name, std::string _qualifiedName, python::Type _type, SymbolType _symbolType) : name(_name),
     qualifiedName(_qualifiedName),
     types{_type},
     symbolType(_symbolType),
-    functionTyper([](const python::Type&) { return python::Type::UNKNOWN; }) {}
+    functionTyper([](const python::Type&) { return python::Type::UNKNOWN; }),
+    attributeFunctionTyper([](const python::Type&, const python::Type&){return python::Type::UNKNOWN;}) {}
 
 private:
     ///! i.e. to store something like re.search. re is then of module type. search will have a concrete function type.
@@ -234,6 +231,55 @@ class Symbol : public std::enable_shared_from_this<Symbol> {
 
     /********* HELPER FUNCTIONS *************/
 
+    inline bool findStoredTypedFunction(const python::Type& parameterType, python::Type& specializedFunctionType) {
+
+        // typing using typer functions above failed, hence now search for concrete stored types.
+        for(auto& type : types) {
+            // found symbol, now check its type
+            if(!type.isFunctionType())
+                continue;
+
+            auto tupleArgType = getTupleArg(type.getParamsType());
+
+            // check if there's a direct type match => use that function then!
+            if(parameterType == tupleArgType) {
+                specializedFunctionType = type;
+                assertFunctionDoesNotReturnGeneric(specializedFunctionType);
+                return true;
+            }
+        }
+
+        // no direct match was found. Check whether casting would work or partial matching.
+        for(auto& type : types) {
+            // found symbol, now check its type
+            if (!type.isFunctionType())
+                continue;
+
+            auto tupleArgType = getTupleArg(type.getParamsType());
+
+            // check if given parameters type is compatible with function type?
+            // actual invocation is with parameterType
+            // ==> can one upcast them to fit the defined one OR does is partially work?
+            // e.g., when the function is defined for NULL, but we have opt?
+            if (isTypeCompatible(parameterType, tupleArgType)) {
+                specializedFunctionType = type;
+
+                // specialize according to parameterType if it's a generic function so further typing works
+                assert(!specializedFunctionType.getReturnType().isGeneric());
+                if(specializedFunctionType.getParamsType().isGeneric()) {
+                    auto specializedParams = python::specializeGenerics(parameterType, tupleArgType);
+                    specializedFunctionType = python::Type::makeFunctionType(specializedParams,
+                                                                             specializedFunctionType.getReturnType());
+                }
+
+                assertFunctionDoesNotReturnGeneric(specializedFunctionType);
+                return true;
+            }
+        }
+
+        return false;
+    }
+
     /*!
      * helper function to check for compatibility, i.e. whether from type can be cast to to type.
      * @param from source type
diff --git a/tuplex/codegen/include/SymbolTable.h b/tuplex/codegen/include/SymbolTable.h
index 0a6b3854d..13a4abb76 100644
--- a/tuplex/codegen/include/SymbolTable.h
+++ b/tuplex/codegen/include/SymbolTable.h
@@ -185,10 +185,23 @@ namespace tuplex {
          * @param typer a dynamic typing function
          * @param sym_type what kind of symbol it is (function? variable?), needed because typer works for both.
          */
-        void addBuiltinTypeAttribute(const python::Type& builtinType, const std::string& name,
+        void addBuiltinTypeAttribute(const python::Type& builtinType,
+                                     const std::string& name,
                                      std::function<python::Type(const python::Type&)> typer,
                                      const SymbolType& sym_type);
 
+        /*!
+         * add an attribute to a builtin type, e.g. dict.keys()
+         * @param builtinType to which type to add the function
+         * @param name name of the attribute
+         * @param typer a dynamic typing function
+         * @param sym_type what kind of symbol it is (function? variable?), needed because typer works for both.
+         */
+        void addBuiltinTypeAttribute(const python::Type& builtinType,
+                                     const std::string& name,
+                                     std::function<python::Type(const python::Type&, const python::Type&)> attributeTyper,
+                                     const SymbolType& sym_type=SymbolType::FUNCTION);
+
         /*!
          * checks whether a symbol can be looked up or not
          * @param symbol
diff --git a/tuplex/codegen/src/SymbolTable.cc b/tuplex/codegen/src/SymbolTable.cc
index 9f8979b7a..d08dd4ec4 100644
--- a/tuplex/codegen/src/SymbolTable.cc
+++ b/tuplex/codegen/src/SymbolTable.cc
@@ -410,13 +410,24 @@ namespace tuplex {
         // for keys()/values() use generic dict and let symbol table create specialized type on the fly using
         // typer function
         {
-            addBuiltinTypeAttribute(python::Type::GENERICDICT, "keys", [](const python::Type& parameterType) {
+            addBuiltinTypeAttribute(python::Type::GENERICDICT, "keys", [](const python::Type& callerType,
+                    const python::Type& parameterType) {
 
-                //  @TODO: @rhea once you changed the signature of the Lambda here, you should be abel to type correctly.
-                // I can give it a try to refactor everything better than.
-                std::cout<<"need to get concrete dict type here!"<<std::endl;
+                assert(callerType.isDictionaryType() && callerType != python::Type::GENERICDICT);
+                // dict_view is always based on dictionary type
+                auto view_type = python::Type::makeDictKeysViewType(callerType);
 
-                return python::Type::UNKNOWN;
+                return python::Type::makeFunctionType(callerType, view_type);
+            }, SymbolType::FUNCTION);
+
+            addBuiltinTypeAttribute(python::Type::GENERICDICT, "values", [](const python::Type& callerType,
+                                                                          const python::Type& parameterType) {
+
+                assert(callerType.isDictionaryType() && callerType != python::Type::GENERICDICT);
+                // dict_view is always based on dictionary type
+                auto values_type = python::Type::makeDictValuesViewType(callerType);
+
+                return python::Type::makeFunctionType(callerType, values_type);
             }, SymbolType::FUNCTION);
         }
 
@@ -684,6 +695,41 @@ namespace tuplex {
         return addSymbol(make_shared<Symbol>(name, type));
     }
 
+    void SymbolTable::addBuiltinTypeAttribute(const python::Type &builtinType, const std::string &name,
+                                              std::function<python::Type(const python::Type &,
+                                                                         const python::Type &)> attributeTyper,
+                                              const SymbolType &sym_type) {
+        using namespace std;
+        assert(sym_type == SymbolType::VARIABLE || sym_type == SymbolType::FUNCTION);
+
+        // this seems wrong, need to perform the lookup directly...
+        // use desc as name
+        auto scope = currentScope();
+        auto it = scope->symbols.find(builtinType.desc());
+        if(it == scope->symbols.end()) {
+            auto sym = make_shared<Symbol>();
+            sym->name = sym->qualifiedName = builtinType.desc();
+            scope->symbols[builtinType.desc()] = sym;
+
+            it = scope->symbols.find(builtinType.desc());
+            assert(it != scope->symbols.end());
+        }
+        auto sym_att = it->second->findAttribute(name);
+        if(!sym_att) {
+            it->second->addAttribute(make_shared<Symbol>(name, name, builtinType, sym_type));
+            sym_att = it->second->findAttribute(name);
+        } else {
+            // replace symbol, there can be only one symbol with a typer function
+            if(sym_type != sym_att->symbolType)
+                throw std::runtime_error("symbol can only have one kind of types associated with it!");
+            assert(sym_att->qualifiedName == name);
+            sym_att->name = name;
+        }
+        assert(sym_att);
+        sym_att->parent = scope->symbols[name];
+        sym_att->attributeFunctionTyper = attributeTyper;
+    }
+
     void SymbolTable::addBuiltinTypeAttribute(const python::Type &builtinType, const std::string &name,
                                               std::function<python::Type(const python::Type &)> typer,
                                               const SymbolType& sym_type = SymbolType::VARIABLE) {
@@ -813,9 +859,7 @@ namespace tuplex {
                     // else, return single type
                     return attr_sym->type();
                 python::Type funcType = python::Type::UNKNOWN;
-
-                //  @TODO: @rhea -> change function here to include objectType as well and make typer a two parameter function
-                attr_sym->findFunctionTypeBasedOnParameterType(parameterType, funcType); // ignore ret value.
+                attr_sym->findAttributeFunctionType(objectType, parameterType, funcType); // ignore ret value.
                 return funcType;
             }
         }
diff --git a/tuplex/utils/include/TypeSystem.h b/tuplex/utils/include/TypeSystem.h
index 5fc1f6110..f6698d3bb 100644
--- a/tuplex/utils/include/TypeSystem.h
+++ b/tuplex/utils/include/TypeSystem.h
@@ -224,8 +224,8 @@ namespace python {
 
         static Type makeListType(const python::Type &elementType);
 
-        static Type makeDictKeysType(const python::Type& keyType);
-        static Type makeDictValuesType(const python::Type& valType);
+        static Type makeDictKeysViewType(const python::Type& dictType);
+        static Type makeDictValuesViewType(const python::Type& dictType);
 
         /*!
          * create iterator type from yieldType.
@@ -359,8 +359,8 @@ namespace python {
         // right now, no tuples or other weird types...
         Type createOrGetFunctionType(const Type& param, const Type& ret=Type::EMPTYTUPLE);
         Type createOrGetDictionaryType(const Type& key, const Type& val);
-        Type createOrGetDictKeysType(const Type& key);
-        Type createOrGetDictValuesType(const Type& val);
+        Type createOrGetDictKeysViewType(const Type& key);
+        Type createOrGetDictValuesViewType(const Type& val);
         Type createOrGetListType(const Type& val);
         Type createOrGetTupleType(const std::initializer_list<Type> args);
         Type createOrGetTupleType(const TTuple<Type>& args);
diff --git a/tuplex/utils/src/TypeSystem.cc b/tuplex/utils/src/TypeSystem.cc
index cf33b751c..bc80963c3 100644
--- a/tuplex/utils/src/TypeSystem.cc
+++ b/tuplex/utils/src/TypeSystem.cc
@@ -149,18 +149,18 @@ namespace python {
         return registerOrGetType(name, AbstractType::DICTIONARY, {key, val});
     }
 
-    Type TypeFactory::createOrGetDictKeysType(const Type& key) {
+    Type TypeFactory::createOrGetDictKeysViewType(const Type& key) {
         std::string name;
-        name += "[";
+        name += "DictKeysView[";
         name += TypeFactory::instance().getDesc(key._hash);
         name += "]";
 
         return registerOrGetType(name, AbstractType::DICT_KEYS, {key});
     }
 
-    Type TypeFactory::createOrGetDictValuesType(const Type& val) {
+    Type TypeFactory::createOrGetDictValuesViewType(const Type& val) {
         std::string name;
-        name += "[";
+        name += "DictValuesView[";
         name += TypeFactory::instance().getDesc(val._hash);
         name += "]";
 
@@ -583,12 +583,12 @@ namespace python {
         return python::TypeFactory::instance().createOrGetDictionaryType(keyType, valType);
     }
 
-    Type Type::makeDictKeysType(const python::Type& keyType) {
-        return python::TypeFactory::instance().createOrGetDictKeysType(keyType);
+    Type Type::makeDictKeysViewType(const python::Type& keyType) {
+        return python::TypeFactory::instance().createOrGetDictKeysViewType(keyType);
     }
 
-    Type Type::makeDictValuesType(const python::Type& valType) {
-        return python::TypeFactory::instance().createOrGetDictValuesType(valType);
+    Type Type::makeDictValuesViewType(const python::Type& valType) {
+        return python::TypeFactory::instance().createOrGetDictValuesViewType(valType);
     }
 
     Type Type::makeListType(const python::Type &elementType){

From 4ecbc9d0b7211c703ab4649d106f8695a8b37724 Mon Sep 17 00:00:00 2001
From: Leonhard Spiegelberg <leonhard_spiegelberg@brown.edu>
Date: Mon, 15 Aug 2022 15:09:45 +0200
Subject: [PATCH 21/23] adding list conversion

---
 tuplex/codegen/src/SymbolTable.cc    | 49 ++++++++++++++++++++++++++++
 tuplex/test/core/DictionaryTyping.cc | 33 ++++++++++++++++---
 2 files changed, 77 insertions(+), 5 deletions(-)

diff --git a/tuplex/codegen/src/SymbolTable.cc b/tuplex/codegen/src/SymbolTable.cc
index d08dd4ec4..e41232144 100644
--- a/tuplex/codegen/src/SymbolTable.cc
+++ b/tuplex/codegen/src/SymbolTable.cc
@@ -346,6 +346,55 @@ namespace tuplex {
         addSymbol(make_shared<Symbol>("enumerate", enumerateFunctionTyper));
         addSymbol(make_shared<Symbol>("next", nextFunctionTyper));
 
+        // conversions for list/tuple
+
+        auto list_ret_type = [](const python::Type& type) {
+            // list? trivial
+            if(type.isListType())
+                return type;
+
+            // what can be converted to/from list?
+            // -> homogenous tuple
+
+            // TODO iterator...
+
+            // -> string
+            if(type == python::Type::STRING) {
+                return python::Type::makeListType(python::Type::STRING);
+            }
+            if(type.isOptionType() && type.withoutOptions() == python::Type::STRING) {
+                return python::Type::makeListType(python::Type::makeOptionType(python::Type::STRING));
+            }
+
+            // -> keyview/valueview
+            if(type.isDictKeysType() || type.isDictValuesType()) {
+                // get dict type
+                auto dict_type = type.elementType();
+
+                if(type.isDictValuesType())
+                    return python::Type::makeListType(dict_type.valueType());
+                if(type.isDictKeysType())
+                    return python::Type::makeListType(dict_type.keyType());
+            }
+
+            return python::Type::UNKNOWN;
+        };
+
+        addSymbol(make_shared<Symbol>("list", [&list_ret_type](const python::Type& parameterType) {
+
+            python::Type type = parameterType;
+
+            // param should be single tuple
+            if(parameterType.isTupleType() && parameterType.parameters().size() == 1)
+                type = parameterType.parameters().front();
+
+            auto ret_type = list_ret_type(type);
+            if(ret_type != python::Type::UNKNOWN)
+                return python::Type::makeFunctionType(parameterType, ret_type);
+            return python::Type::UNKNOWN;
+        }));
+        // tuple is special case -> need to speculate on list/str/sequence length!
+
         // TODO: other parameters? i.e. step size and Co?
         // also, boolean, float? etc.?
         addSymbol("range", python::Type::makeFunctionType(python::Type::I64, python::Type::RANGE));
diff --git a/tuplex/test/core/DictionaryTyping.cc b/tuplex/test/core/DictionaryTyping.cc
index df8c532f5..685d884ed 100644
--- a/tuplex/test/core/DictionaryTyping.cc
+++ b/tuplex/test/core/DictionaryTyping.cc
@@ -637,11 +637,7 @@ TEST(DictionaryTyping, KeyView) {
     using namespace std;
 
     // could also use list((10, 20, 30)) e.g., or tuple(list(...)) -> needs speculation.
-
     // test count UDF
-//    auto count_c = "def count_keys(x):\n"
-//                   "    d = {'A':10, 'B': 10, x: 20}\n"
-//                   "    return list(d.keys())";
     auto count_c = "def count_keys(x):\n"
                    "    d = {'A':10, 'B': 10, x: 20}\n"
                    "    return d.keys()";
@@ -663,6 +659,33 @@ TEST(DictionaryTyping, KeyView) {
     graph.saveAsPDF("dict_count_keys.pdf");
 
     cout<<"return type of function is: "<<ast.getReturnType().desc()<<endl;
+    auto underlying_dict = python::Type::makeDictionaryType(python::Type::STRING, python::Type::I64);
+    ASSERT_EQ(ast.getReturnType(), python::Type::makeDictKeysViewType(underlying_dict));
+}
 
-    ASSERT_EQ(ast.getReturnType(), python::Type::makeDictionaryType(python::Type::STRING, python::Type::I64));
+TEST(DictionaryTyping, KeyViewWithListConversion) {
+    // expected to fail; need to add support for dict_keys
+    using namespace tuplex;
+    using namespace std;
+
+    // could also use list((10, 20, 30)) e.g., or tuple(list(...)) -> needs speculation.
+
+    // test count UDF
+    auto count_c = "def count_keys(x):\n"
+                   "    d = {'A':10, 'B': 10, x: 20}\n"
+                   "    return list(d.keys())";
+
+    // parse code to AST
+    auto ast = tuplex::codegen::AnnotatedAST();
+    ast.parseString(count_c);
+
+    // make typing
+    python::Type inputType = python::Type::STRING;
+
+    // create symbol table
+    ast.addTypeHint("x", inputType);
+    ast.defineTypes(codegen::DEFAULT_COMPILE_POLICY);
+
+    cout<<"return type of function is: "<<ast.getReturnType().desc()<<endl;
+    ASSERT_EQ(ast.getReturnType(), python::Type::makeListType(python::Type::STRING));
 }
\ No newline at end of file

From 662564ef0849eb735cacae3f1a937b0b88e768c0 Mon Sep 17 00:00:00 2001
From: Leonhard Spiegelberg <leonhard_spiegelberg@brown.edu>
Date: Mon, 15 Aug 2022 15:17:11 +0200
Subject: [PATCH 22/23] all typing tests pass

---
 tuplex/codegen/src/TypeAnnotatorVisitor.cc | 20 ++++++++++++++++++++
 tuplex/test/core/DictionaryTyping.cc       |  4 ++--
 2 files changed, 22 insertions(+), 2 deletions(-)

diff --git a/tuplex/codegen/src/TypeAnnotatorVisitor.cc b/tuplex/codegen/src/TypeAnnotatorVisitor.cc
index d70cd0b71..ccff1262f 100644
--- a/tuplex/codegen/src/TypeAnnotatorVisitor.cc
+++ b/tuplex/codegen/src/TypeAnnotatorVisitor.cc
@@ -1763,6 +1763,26 @@ namespace tuplex {
             } else if(exprType.isIteratorType()) {
                 _nameTable[id->_name] = exprType.yieldType();
                 id->setInferredType(exprType.yieldType());
+            } else if(exprType.isDictValuesType()) {
+                auto dict_type = exprType.elementType();
+                auto yield_type = dict_type.valueType();
+                if(yield_type == python::Type::PYOBJECT || yield_type == python::Type::UNKNOWN) {
+                    // might require unrolling & speculation on view length!
+                    addCompileError(CompileError::TYPE_ERROR_UNSUPPORTED_LOOP_TESTLIST_TYPE);
+                    return;
+                }
+                _nameTable[id->_name] = yield_type;
+                id->setInferredType(yield_type);
+            } else if(exprType.isDictKeysType()) {
+                auto dict_type = exprType.elementType();
+                auto yield_type = dict_type.keyType();
+                if(yield_type == python::Type::PYOBJECT || yield_type == python::Type::UNKNOWN) {
+                    // might require unrolling & speculation on view length!
+                    addCompileError(CompileError::TYPE_ERROR_UNSUPPORTED_LOOP_TESTLIST_TYPE);
+                    return;
+                }
+                _nameTable[id->_name] = yield_type;
+                id->setInferredType(yield_type);
             } else {
                 addCompileError(CompileError::TYPE_ERROR_UNSUPPORTED_LOOP_TESTLIST_TYPE);
             }
diff --git a/tuplex/test/core/DictionaryTyping.cc b/tuplex/test/core/DictionaryTyping.cc
index 685d884ed..bb44dee6c 100644
--- a/tuplex/test/core/DictionaryTyping.cc
+++ b/tuplex/test/core/DictionaryTyping.cc
@@ -542,7 +542,7 @@ TEST(DictionaryTyping, DictionaryInputControlFlow) {
     // print type annotated ast
     GraphVizGraph graph;
     graph.createFromAST(ast.getFunctionAST(), true);
-    graph.saveAsPDF("/home/rgoyal6/tuplex/tuplex/build/dictionary_asts/dict_input_control_flow.pdf");
+    graph.saveAsPDF("dict_input_control_flow.pdf");
 
     cout<<"return type of function is: "<<ast.getReturnType().desc()<<endl;
 
@@ -586,7 +586,7 @@ TEST(DictionaryTyping, Everything) {
     // print type annotated ast
     GraphVizGraph graph;
     graph.createFromAST(ast.getFunctionAST(), true);
-    graph.saveAsPDF("/home/rgoyal6/tuplex/tuplex/build/dictionary_asts/everything.pdf");
+    graph.saveAsPDF("dict_everything.pdf");
 
     cout<<"return type of function is: "<<ast.getReturnType().desc()<<endl;
 

From d4448b2f82167471a4d963c459cc0f2c111c662e Mon Sep 17 00:00:00 2001
From: Rhea Goyal <rhea_goyal@brown.edu>
Date: Sun, 28 Aug 2022 08:35:47 -0400
Subject: [PATCH 23/23] all tests except keys/values view failing

---
 tuplex/codegen/include/BuiltinDictProxy.h     |   2 +-
 tuplex/codegen/include/BuiltinDictProxyImpl.h |   5 +-
 tuplex/codegen/include/cJSONDictProxyImpl.h   |  23 +-
 tuplex/codegen/src/cJSONDictProxyImpl.cc      | 257 +++++++++++++--
 tuplex/test/dict/cJSONTest.cc                 | 308 +++++++++++++++++-
 tuplex/utils/CMakeLists.txt                   |   6 +-
 tuplex/utils/src/TypeSystem.cc                |   3 +-
 7 files changed, 540 insertions(+), 64 deletions(-)

diff --git a/tuplex/codegen/include/BuiltinDictProxy.h b/tuplex/codegen/include/BuiltinDictProxy.h
index a37f2a634..c7110eaec 100644
--- a/tuplex/codegen/include/BuiltinDictProxy.h
+++ b/tuplex/codegen/include/BuiltinDictProxy.h
@@ -50,7 +50,7 @@ namespace tuplex {
 //            // allocSize() --> helpful when dict size is known upfront, can be used for optimization.
 //            BuiltinDictProxy& allocSize(llvm::Value* size);
 
-            // getKeyView() --> codegen object
+            // getKeysView() --> codegen object
 
             // getValuesView() --> codegen object
 
diff --git a/tuplex/codegen/include/BuiltinDictProxyImpl.h b/tuplex/codegen/include/BuiltinDictProxyImpl.h
index 6defe6d90..b4f41d08a 100644
--- a/tuplex/codegen/include/BuiltinDictProxyImpl.h
+++ b/tuplex/codegen/include/BuiltinDictProxyImpl.h
@@ -19,7 +19,6 @@ namespace tuplex {
     namespace codegen {
         class BuiltinDictProxyImpl {
         public:
-            // Q: what does virtual do ?
             virtual void putItem(const Field& key, const Field& value) = 0;
             virtual void putItem(const python::Type& keyType, const SerializableValue& key, const python::Type& valueType, const SerializableValue& value) = 0;
 
@@ -31,9 +30,9 @@ namespace tuplex {
 
             virtual void deleteItem(const Field& key) = 0;
 
-            // virtual void getKeyView() = 0;
+            virtual std::vector<Field> getKeysView() = 0;
 
-            // virtual void getValuesView() = 0;
+            virtual std::vector<Field> getValuesView() = 0;
         };
     }
 }
diff --git a/tuplex/codegen/include/cJSONDictProxyImpl.h b/tuplex/codegen/include/cJSONDictProxyImpl.h
index cb461ea7c..8f1570266 100644
--- a/tuplex/codegen/include/cJSONDictProxyImpl.h
+++ b/tuplex/codegen/include/cJSONDictProxyImpl.h
@@ -22,9 +22,9 @@ namespace tuplex {
     namespace codegen {
         class cJSONDictProxyImpl : public BuiltinDictProxyImpl {
         public:
-            // cJSONDictProxyImpl() : _root(nullptr) {}
-            // is there a reason we want to separate the initialisation of cjsondictproxy objects and the actual cjson object?
-            cJSONDictProxyImpl() : _root(cJSON_CreateObject()) {}
+            cJSONDictProxyImpl() {
+                _root = cJSON_CreateObject();   
+            }
             ~cJSONDictProxyImpl() {
                 if(_root) {
                     cJSON_free(_root);
@@ -45,9 +45,9 @@ namespace tuplex {
 
             void deleteItem(const Field& key) override;
 
-            // void getKeyView() override;
+            std::vector<Field> getKeysView() override;
 
-            // void getValuesView() override;
+            std::vector<Field> getValuesView() override;
 
             // notes:
             // for cJSON subscripting, need to perform
@@ -57,14 +57,21 @@ namespace tuplex {
 
         private:
             cJSON *_root;   // a map of the elements
-            cJSON *_typeMap; // a map of strings -> types (nested)
 
             /*!
-            * returns a string representing a type prefix when storing type information in cJSON object as well.
+            * returns a key (as a string) with the added type prefix
+            * @param key
             * @param type
             * @return
             */
-            static std::string typePrefix(const python::Type& type);
+            std::string addTypePrefix(std::string key, const python::Type& type);
+
+            /*!
+            * converts a key (stored as a string in cJSON) to equivalent Field value
+            * @param prefixed_key
+            * @return
+            */
+            Field keyToField(std::string prefixed_key);
         };
     }
 }
diff --git a/tuplex/codegen/src/cJSONDictProxyImpl.cc b/tuplex/codegen/src/cJSONDictProxyImpl.cc
index d0a2d634b..a0fe425a4 100644
--- a/tuplex/codegen/src/cJSONDictProxyImpl.cc
+++ b/tuplex/codegen/src/cJSONDictProxyImpl.cc
@@ -33,16 +33,15 @@ namespace tuplex {
 
             // check type of Field, create corresponding cJSON type object
             if (f.getType() == python::Type::BOOLEAN) {
-                if (f.getInt() > 0) {
-                    cjson_obj = cJSON_CreateTrue();
-                } else {
+                if (f.getInt() == 0) {
                     cjson_obj = cJSON_CreateFalse();
+                } else {
+                    cjson_obj = cJSON_CreateTrue();
                 }
             } else if (f.getType() == python::Type::F64) {
-                cjson_obj = cJSON_CreateNumber(f.getDouble());
+                cjson_obj = cJSON_CreateNumber(f.getDouble(), 0);
             } else if (f.getType() == python::Type::I64) {
-                // should I be upcasting?
-                cjson_obj = cJSON_CreateNumber((double)f.getInt());       
+                cjson_obj = cJSON_CreateNumber(f.getInt(), 1);       
             } else if (f.getType() == python::Type::STRING) {
                 assert(f.getPtr());
                 cjson_obj = cJSON_CreateString((const char*)f.getPtr());
@@ -50,14 +49,15 @@ namespace tuplex {
                 assert(f.getPtr());
 
                 tuplex::List* lis = (tuplex::List*)f.getPtr();
-                cjson_obj = cJSON_CreateArray();
+                cjson_obj = cJSON_CreateArray(1);
                 
                 for (int i = 0; i < lis->numElements(); i++) {
                     // retrieve ith element from list
                     Field element = lis->getField(i);
+                    
                     // convert to cJSON object
                     cJSON* cjson_elt = fieldToCJSON(element);
-
+                    
                     // add element to cJSON array
                     cJSON_AddItemToArray(cjson_obj, cjson_elt);
                 }
@@ -65,11 +65,12 @@ namespace tuplex {
                 assert(f.getPtr());
 
                 tuplex::Tuple* tup = (tuplex::Tuple*)f.getPtr();
-                cjson_obj = cJSON_CreateArray();
+                cjson_obj = cJSON_CreateArray(0);
                 
                 for (int i = 0; i < tup->numElements(); i++) {
                     // retrieve ith element from tuple
                     Field element = tup->getField(i);
+
                     // convert to cJSON object
                     cJSON* cjson_elt = fieldToCJSON(element);
 
@@ -79,7 +80,7 @@ namespace tuplex {
             } else if (f.getType() == python::Type::NULLVALUE) {
                 cjson_obj = cJSON_CreateNull();
             } else {
-                // throw std::runtime_error("cannot change value with type " + value.getType().desc() + " into cJSON object");
+                throw std::runtime_error("cannot change Field with type " + f.getType().desc() + " into cJSON object");
             }
 
             return cjson_obj;
@@ -91,7 +92,15 @@ namespace tuplex {
             Field ret = Field::null();
 
             if (cJSON_IsNumber(object)) {
-                ret = Field(cJSON_GetNumberValue(object));
+                if (((object->type & ~cJSON_IsReference) & ~cJSON_StringIsConst) == cJSON_Int64) {
+                    // type is int, convert ret to int
+                    double dbl_val = cJSON_GetNumberValue(object);
+                    int64_t int_val = (int64_t) std::round(dbl_val);
+                    ret = Field(int_val);
+                } else {
+                    assert(((object->type & ~cJSON_IsReference) & ~cJSON_StringIsConst) == cJSON_Double);
+                    ret = Field(cJSON_GetNumberValue(object));
+                }
             } else if (cJSON_IsString(object)) {
                 ret = Field(cJSON_GetStringValue(object));
             } else if (cJSON_IsTrue(object)) {
@@ -101,27 +110,155 @@ namespace tuplex {
             } else if (cJSON_IsNull(object)) {
                 ret = Field::null();
             } else if (cJSON_IsArray(object)) {
-                throw std::runtime_error("not yet implemented...");                
+                std::vector<tuplex::Field> init_vec;
+                init_vec.reserve(cJSON_GetArraySize(object));
+
+                for (int i = 0; i < cJSON_GetArraySize(object); i++) {
+                    // retrieve ith element from array
+                    cJSON* cjson_elt = cJSON_GetArrayItem(object, i);
+                    if (!cjson_elt)
+                        throw std::runtime_error("could not retrieve element from cJSON array");
+
+                    // convert to field
+                    Field field_elt = cJSONToField(cjson_elt);
+
+                    // add element to init vector
+                    init_vec.push_back(field_elt);
+                }
+
+                if (((object->type & ~cJSON_IsReference) & ~cJSON_StringIsConst) == cJSON_List) {
+                    List ret_list = List::from_vector(init_vec);
+                    ret = Field(ret_list);
+                } else {
+                    assert(((object->type & ~cJSON_IsReference) & ~cJSON_StringIsConst) == cJSON_Tuple);
+                    Tuple ret_tup = Tuple::from_vector(init_vec);
+                    ret = Field(ret_tup);
+                }
             } else if (cJSON_IsObject(object)) {
+                /** TODO: what type should nested dictionaries
+                 *        (i.e. cjson objects) be converted to as a Field? */
                 throw std::runtime_error("not yet implemented...");
             }
 
             return ret;
         }
 
-        std::string cJSONDictProxyImpl::typePrefix(const python::Type& type) {
+        std::string cJSONDictProxyImpl::addTypePrefix(std::string key, const python::Type& type) {
+            auto ret = type.desc() + "/" + key;
+
+            return ret;
+        }
+
+        // general helper function to convert a string into a Field given a python type
+        /*!
+         * convert a string into a Field given a python type, if not convertible, returns nullptr
+         * @param str string
+         * @param type python type
+         * @return Field object
+         */
+        Field stringToField(std::string str, python::Type type) {
+            if (str.empty())
+                throw std::runtime_error("cannot pass in empty string");
+            
+            Field ret_val = Field::null();
+            
+            if (type == python::Type::BOOLEAN) {
+                if (str.compare("True") == 0) {
+                    ret_val = Field(true);
+                } else if (str.compare("False") == 0) {
+                    ret_val = Field(false);
+                } else {
+                    throw std::runtime_error("expected bool value, got " + str);
+                }
+            } else if (type == python::Type::F64) {
+                double dbl_val = std::stod(str);
+                ret_val = Field(dbl_val);
+            } else if (type == python::Type::I64) {
+                long long int_val = std::stoll(str);
+                ret_val = Field((int64_t)int_val);
+            } else if (type == python::Type::STRING) {
+                ret_val = Field(str.substr(1, str.length() - 2));
+            } else if (type.isListType()) {
+                throw std::runtime_error("(list) not yet implemented...");
+            } else if (type.isTupleType()) {
+                std::vector<tuplex::Field> init_vec;
+                init_vec.reserve(type.parameters().size());
+
+                assert(str[0] == '(');
+                assert(str[str.length() - 1] == ')');
+                int done = 1;
+                int curr_index = 0;
+                while (done < (str.length() - 1)) {
+                    std::string curr_elt = "";
+                    python::Type curr_type = type.parameters().at(curr_index);
+                    Field field_elt = Field::null();
+
+                    if (str[done] == '\'') {
+                        // current item is a string; need to find next '
+                        assert(curr_type == python::Type::STRING);
+
+                        size_t next_quote = str.find('\'', done + 1);
+                        if (next_quote == std::string::npos)
+                            throw std::runtime_error("could not parse tuple string: matching \' not present");
+
+                        curr_elt = str.substr(done + 1, next_quote);
+                        assert(str[next_quote + 1] == ',');
+                        done = next_quote + 2;
+                    } else {
+                        size_t next_comma = str.find(',', done);
+                        
+                        if (next_comma == std::string::npos) {
+                            // last element in tuple
+                            curr_elt = str.substr(done, str.length() - 1);
+                            done = str.length() - 1;
+                        } else {
+                            curr_elt = str.substr(done, next_comma);
+                            done = next_comma + 1;
+                        }
+                    }
+
+                    field_elt = stringToField(curr_elt, curr_type);
+                    if (field_elt == nullptr)
+                        throw std::runtime_error("could not parse tuple string: could not convert element into Field");
+                        // return nullptr;
+                    init_vec.push_back(field_elt);
+                    curr_index++;
+                }
+
+                assert(type.parameters().size() == curr_index);
+                ret_val = Field(Tuple::from_vector(init_vec));
+            } else if (type == python::Type::NULLVALUE) {
+                ret_val = Field::null();
+            } else {
+                throw std::runtime_error("conversion from string " + str + " to type " + type.desc() + " not supported");
+            }
+
+            return ret_val;
+        }
+
+        Field cJSONDictProxyImpl::keyToField(std::string prefixed_key) {
+            std::size_t slash_index = prefixed_key.find("/");
+
+            std::string key_type = prefixed_key.substr(0, slash_index);
+            std::string key_str = prefixed_key.substr(slash_index + 1);
+
+            python::Type ret_type = python::Type::NULLVALUE;
+
+            if (key_type.substr(0, 4).compare("bool") == 0) {
+                ret_type = python::Type::BOOLEAN;
+            } else {
+                ret_type = python::decodeType(key_type);
+            }
 
-            // init map for a couple common types (int, float, bool, ...)
+            Field ret_val = stringToField(key_str, ret_type);
+            if (ret_val.isNull() && (ret_type != python::Type::NULLVALUE))
+                throw std::runtime_error("could not convert key-string to Field object");
 
-            // since keys in JSON are always strings, need to store type info in that string!
-            return "";
+            return ret_val;
         }
 
         void cJSONDictProxyImpl::putItem(const Field &key, const Field &value) {
-            // put into cJSON, yet due to both key/type being not necessary type stable, encode type as base64 into values!
-            // map primitive types directly into cJSON if possible
             if(!_root)
-                // _root = cJSON_CreateObject();
                 throw std::runtime_error("cannot use putItem on an uninitialised dictionary");
             
             cJSON* to_add = fieldToCJSON(value);
@@ -129,14 +266,17 @@ namespace tuplex {
                 throw std::runtime_error("item to add not convertible to cJSON object");
             }
 
-            // add to cJSON object
-            // TODO: what's the difference between key.desc and getting the key's ptr value?
-            // A: key.desc gets the string of the Field regardless of the type of the Field
-            cJSON_AddItemToObject(_root, key.desc().c_str(), to_add);
+            // check if key already exists
+            if (keyExists(key)) {
+                // replace existing key
+                replaceItem(key, value);
+            }
 
-            // type prefix
+            // add type prefix to key
+            std::string prefixed = addTypePrefix(key.desc(), key.getType());
 
-            // throw std::runtime_error("to implement...");
+            // key doesn't exist; add to cJSON object
+            cJSON_AddItemToObject(_root, prefixed.c_str(), to_add);
         }
         
         void cJSONDictProxyImpl::putItem(const python::Type &keyType, const SerializableValue &key,
@@ -150,18 +290,28 @@ namespace tuplex {
         bool cJSONDictProxyImpl::keyExists(const Field& key) {
             if(!_root)
                 throw std::runtime_error("cannot use keyExists on an uninitialised dictionary");
+
+            // make prefixed key
+            std::string prefixed = addTypePrefix(key.desc(), key.getType());
             
-            cJSON* res = cJSON_GetObjectItemCaseSensitive(_root, key.desc().c_str());
+            cJSON* res = cJSON_GetObjectItemCaseSensitive(_root, prefixed.c_str());
 
-            return (res != NULL);
+            if (!res) {
+                return false;
+            }
+            
+            return true;
         }
 
         Field cJSONDictProxyImpl::getItem(const Field& key) {
             if (!_root)
                 throw std::runtime_error("cannot use getItem on an uninitialised dictionary");
             
+            // make prefixed key
+            std::string prefixed = addTypePrefix(key.desc(), key.getType());
+            
             // retrieve value from dict
-            cJSON* item = cJSON_GetObjectItemCaseSensitive(_root, key.desc().c_str());
+            cJSON* item = cJSON_GetObjectItemCaseSensitive(_root, prefixed.c_str());
 
             if (!item)
                 throw std::runtime_error("error retrieving value from cJSON dictionary");
@@ -176,22 +326,23 @@ namespace tuplex {
             if (!_root)
                 throw std::runtime_error("cannot use replaceItem on an uninitialised dictionary");
             
-            // assert(key.getType() == python::Type::STRING);
-
+            // make prefixed key
+            std::string prefixed = addTypePrefix(key.desc(), key.getType());
+            
             // attempt to retrieve value from dict
-            cJSON* item = cJSON_GetObjectItemCaseSensitive(_root, key.desc().c_str());
+            cJSON* item = cJSON_GetObjectItemCaseSensitive(_root, prefixed.c_str());
 
             if (!item) {
-                // key doesn't already exist; simply perform putItem instead (?)
+                // key doesn't already exist; do putItem instead
                 putItem(key, value);
             } else {
-                // replace value at key
+                // make new cJSON item
                 cJSON* new_item = fieldToCJSON(value);
                 if (!new_item) {
                     throw std::runtime_error("new item not convertible to cJSON object");
                 }
 
-                cJSON_ReplaceItemInObjectCaseSensitive(_root, key.desc().c_str(), new_item);
+                cJSON_ReplaceItemInObjectCaseSensitive(_root, prefixed.c_str(), new_item);
             }
         }
 
@@ -199,8 +350,44 @@ namespace tuplex {
             if (!_root)
                 throw std::runtime_error("cannot use deleteItem on an uninitialised dictionary");
             
+            // make prefixed key
+            std::string prefixed = addTypePrefix(key.desc(), key.getType());
+
             // delete value from dict
-            cJSON_DeleteItemFromObjectCaseSensitive(_root, (const char*)key.desc().c_str());
+            cJSON_DeleteItemFromObjectCaseSensitive(_root, prefixed.c_str());
+        }
+
+        std::vector<Field> cJSONDictProxyImpl::getKeysView() {
+            std::vector<Field> ret;
+            ret.reserve(cJSON_GetArraySize(_root));
+
+            cJSON* entry = NULL;
+            cJSON_ArrayForEach(entry, _root) {
+                // convert key to Field
+                std::string key_str = entry->string;
+                Field field_val = keyToField(key_str);
+
+                // add to end of ret vector
+                ret.push_back(field_val);
+            }
+
+            return ret;
+        }
+
+        std::vector<Field> cJSONDictProxyImpl::getValuesView() {
+            std::vector<Field> ret;
+            ret.reserve(cJSON_GetArraySize(_root));
+
+            cJSON* entry = NULL;
+            cJSON_ArrayForEach(entry, _root) {
+                // convert entry to Field
+                Field field_val = cJSONToField(entry);
+
+                // add to end of ret vector
+                ret.push_back(field_val);
+            }
+
+            return ret;
         }
     }
 }
diff --git a/tuplex/test/dict/cJSONTest.cc b/tuplex/test/dict/cJSONTest.cc
index 22e0c0a00..0708be732 100644
--- a/tuplex/test/dict/cJSONTest.cc
+++ b/tuplex/test/dict/cJSONTest.cc
@@ -39,11 +39,10 @@ TEST(cJSONTest, GetItemTest) {
     using namespace tuplex;
     using namespace std;
 
-    // testing non-codegenerated put item
+    // testing non-codegenerated get item
     // initialise test dict
     codegen::cJSONDictProxyImpl dict_proxy;
 
-    // put test values into test dict
     dict_proxy.putItem(Field((int64_t)10), Field("a"));
     dict_proxy.putItem(Field((int64_t)20), Field("b"));
 
@@ -60,11 +59,10 @@ TEST(cJSONTest, DeleteItemTest) {
     using namespace tuplex;
     using namespace std;
 
-    // testing non-codegenerated put item
+    // testing non-codegenerated delete item
     // initialise test dict
     codegen::cJSONDictProxyImpl dict_proxy;
 
-    // put test values into test dict
     dict_proxy.putItem(Field((int64_t)10), Field("a"));
     dict_proxy.putItem(Field((int64_t)20), Field("b"));
 
@@ -82,7 +80,7 @@ TEST(cJSONTest, DeleteItemTest) {
     Field res = dict_proxy.getItem(Field((int64_t)10));
 
     // NOTE: expected result will be a double, bc I think cJSON stores all numbers as doubles
-    EXPECT_EQ(Field((double)100), dict_proxy.getItem(Field((int64_t)10)));
+    EXPECT_EQ(Field((int64_t)100), dict_proxy.getItem(Field((int64_t)10)));
     EXPECT_EQ(false, dict_proxy.keyExists(Field((int64_t)20)));
 }
 
@@ -90,11 +88,10 @@ TEST(cJSONTest, ReplaceItemTest) {
     using namespace tuplex;
     using namespace std;
 
-    // testing non-codegenerated put item
+    // testing non-codegenerated replace item
     // initialise test dict
     codegen::cJSONDictProxyImpl dict_proxy;
 
-    // put test values into test dict
     dict_proxy.putItem(Field((int64_t)10), Field("a"));
     dict_proxy.putItem(Field((int64_t)20), Field("b"));
 
@@ -113,13 +110,296 @@ TEST(cJSONTest, ReplaceItemTest) {
     
     dict_proxy.replaceItem(Field((int64_t)30), Field((int64_t)50));
 
-    // NOTE: expected result will be a double, bc I think cJSON stores all numbers as doubles
-    EXPECT_EQ(Field((double)50), dict_proxy.getItem(Field((int64_t)30)));
+    // NOTE: expected result will be a double, bc cJSON stores all numbers as doubles
+    EXPECT_EQ(Field((int64_t)50), dict_proxy.getItem(Field((int64_t)30)));
+}
+
+// str -> _
+TEST(cJSONTest, StrKeysTest) {
+    using namespace tuplex;
+    using namespace std;
+
+    // initialise test dict
+    codegen::cJSONDictProxyImpl dict_proxy;
+
+    dict_proxy.putItem(Field("a"), Field((int64_t)1));
+    dict_proxy.putItem(Field("b"), Field((int64_t)2));
+
+    EXPECT_EQ(Field((int64_t)1), dict_proxy.getItem(Field("a")));
+    EXPECT_EQ(Field((int64_t)2), dict_proxy.getItem(Field("b")));
+
+    dict_proxy.putItem(Field("a"), Field("hello"));
+    dict_proxy.replaceItem(Field("b"), Field(true));
+
+    EXPECT_EQ(Field("hello"), dict_proxy.getItem(Field("a")));
+    EXPECT_EQ(Field(true), dict_proxy.getItem(Field("b")));
+
+    dict_proxy.deleteItem(Field("b"));
+
+    EXPECT_EQ(true, dict_proxy.keyExists(Field("a")));
+    EXPECT_EQ(false, dict_proxy.keyExists(Field("b")));
+}
+
+// _ -> null
+TEST(cJSONTest, NullValsTest) {
+    using namespace tuplex;
+    using namespace std;
+
+    // initialise test dict
+    codegen::cJSONDictProxyImpl dict_proxy;
+
+    dict_proxy.putItem(Field((int64_t)10), Field::null());
+    dict_proxy.putItem(Field("a"), Field((int64_t)10));
+
+    EXPECT_EQ(Field::null(), dict_proxy.getItem(Field((int64_t)10)));
+    EXPECT_EQ(Field((int64_t)10), dict_proxy.getItem(Field("a")));
+
+    dict_proxy.replaceItem(Field("a"), Field::null());
+
+    EXPECT_EQ(Field::null(), dict_proxy.getItem(Field("a")));
+}
+
+// null -> _
+TEST(cJSONTest, NullKeysTest) {
+    using namespace tuplex;
+    using namespace std;
+
+    // initialise test dict
+    codegen::cJSONDictProxyImpl dict_proxy;
+
+    dict_proxy.putItem(Field::null(), Field((int64_t)10));
+
+    EXPECT_EQ(Field((int64_t)10), dict_proxy.getItem(Field::null()));
+
+    dict_proxy.putItem(Field::null(), Field("a"));
+
+    EXPECT_EQ(Field("a"), dict_proxy.getItem(Field::null()));
+
+    dict_proxy.replaceItem(Field::null(), Field(true));
+
+    EXPECT_EQ(Field(true), dict_proxy.getItem(Field::null()));
+}
+
+// mix -> mix
+TEST(cJSONTest, FloatTest) {
+    using namespace tuplex;
+    using namespace std;
+
+    // initialise test dict
+    codegen::cJSONDictProxyImpl dict_proxy;
+
+    dict_proxy.putItem(Field((double)3.14), Field("pi"));
+    dict_proxy.putItem(Field((double)1), Field(true));
+
+    EXPECT_EQ(Field("pi"), dict_proxy.getItem(Field((double)3.14)));
+    EXPECT_EQ(Field(true), dict_proxy.getItem(Field((double)1)));
+
+    dict_proxy.putItem(Field("pi"), Field((double)3.14));
+    dict_proxy.putItem(Field(true), Field((double)1));
+
+    EXPECT_EQ(Field((double)3.14), dict_proxy.getItem(Field("pi")));
+    EXPECT_EQ(Field((double)1), dict_proxy.getItem(Field(true)));
+
+    dict_proxy.replaceItem(Field((double)3.14), Field((int64_t)3));
+    dict_proxy.replaceItem(Field("pi"), Field((int64_t)3));
+
+    EXPECT_EQ(Field((int64_t)3), dict_proxy.getItem(Field((double)3.14)));
+    EXPECT_EQ(Field((int64_t)3), dict_proxy.getItem(Field("pi")));
+}
+
+// _ -> list
+TEST(cJSONTest, ListValsTest) {
+    using namespace tuplex;
+    using namespace std;
+
+    // initialise test dict
+    codegen::cJSONDictProxyImpl dict_proxy;
+
+    // init list 1
+    vector<Field> vec_1{ Field((int64_t)10), Field((int64_t)20), Field((int64_t)30) };
+    List list_1 = List::from_vector(vec_1);
+
+    // init list 2
+    vector<Field> vec_2{ Field("a"), Field("b"), Field("c") };
+    List list_2 = List::from_vector(vec_2);
+
+    // init list 3
+    vector<Field> vec_3{ Field((double)15), Field((double)3.14), Field((double)2.7) };
+    List list_3 = List::from_vector(vec_3);
+
+    dict_proxy.putItem(Field((int64_t)10), Field(list_1));
+
+    EXPECT_EQ(Field(list_1), dict_proxy.getItem(Field((int64_t)10)));
+
+    dict_proxy.putItem(Field("a"), Field(list_2));
+    dict_proxy.replaceItem(Field((int64_t)10), Field(list_3));
+
+    EXPECT_EQ(Field(list_2), dict_proxy.getItem(Field("a")));
+    EXPECT_EQ(Field(list_3), dict_proxy.getItem(Field((int64_t)10)));
+
+    dict_proxy.deleteItem(Field("a"));
+
+    EXPECT_EQ(false, dict_proxy.keyExists(Field("a")));
+    EXPECT_EQ(true, dict_proxy.keyExists(Field((int64_t)10)));
 }
 
-// tests to write:
+// _ -> tuple
+TEST(cJSONTest, TupleValsTest) {
+    using namespace tuplex;
+    using namespace std;
+
+    // initialise test dict
+    codegen::cJSONDictProxyImpl dict_proxy;
+
+    // init tuple 1
+    vector<Field> vec_1{ Field((int64_t)10), Field("a"), Field((double)30) };
+    Tuple tup_1 = Tuple::from_vector(vec_1);
+
+    // init tuple 2
+    vector<Field> vec_2{ Field("a"), Field(true), Field((int64_t)30) };
+    Tuple tup_2 = Tuple::from_vector(vec_2);
+
+    // init tuple 3
+    vector<Field> vec_3{ Field((double)3.14), Field((int64_t)2), Field(false) };
+    Tuple tup_3 = Tuple::from_vector(vec_3);
 
-// 1. heterogenous dict -> basically use modified JSON as in-memory storage format.
-// 2. homogenous keytype dict -> can encode dict directly & serialize it more efficiently. Represent in-memory as hash table specialized depending on type.
-// 3. homogenous valuetype -> ignore case, specialize to 1.
-// 4. compile-time known keys/restricted keyset, keys do not change. -> struct type with fixed offsets!
\ No newline at end of file
+    dict_proxy.putItem(Field((int64_t)10), Field(tup_1));
+
+    EXPECT_EQ(Field(tup_1), dict_proxy.getItem(Field((int64_t)10)));
+
+    dict_proxy.putItem(Field("a"), Field(tup_2));
+    dict_proxy.replaceItem(Field((int64_t)10), Field(tup_3));
+
+    EXPECT_EQ(Field(tup_2), dict_proxy.getItem(Field("a")));
+    EXPECT_EQ(Field(tup_3), dict_proxy.getItem(Field((int64_t)10)));
+
+    dict_proxy.deleteItem(Field("a"));
+
+    EXPECT_EQ(false, dict_proxy.keyExists(Field("a")));
+    EXPECT_EQ(true, dict_proxy.keyExists(Field((int64_t)10)));
+}
+
+TEST(cJSONTest, FloatIntDifferentiation) {
+    using namespace tuplex;
+    using namespace std;
+
+    // initialise test dict
+    codegen::cJSONDictProxyImpl dict_proxy;
+
+    dict_proxy.putItem(Field((double)10), Field("a"));
+
+    EXPECT_EQ(true, dict_proxy.keyExists(Field((double)10)));
+    EXPECT_EQ(false, dict_proxy.keyExists(Field((int64_t)10)));
+
+    dict_proxy.putItem(Field((int64_t)10), Field("b"));
+
+    EXPECT_EQ(true, dict_proxy.keyExists(Field((double)10)));
+    EXPECT_EQ(true, dict_proxy.keyExists(Field((int64_t)10)));
+    EXPECT_EQ(Field("a"), dict_proxy.getItem(Field((double)10)));
+    EXPECT_EQ(Field("b"), dict_proxy.getItem(Field((int64_t)10)));
+
+    dict_proxy.replaceItem(Field((double)10), Field((int64_t)64));
+
+    EXPECT_EQ(Field((int64_t)64), dict_proxy.getItem(Field((double)10)));
+    EXPECT_EQ(Field("b"), dict_proxy.getItem(Field((int64_t)10)));
+
+    dict_proxy.deleteItem(Field((double)10));
+    
+    EXPECT_EQ(false, dict_proxy.keyExists(Field((double)10)));
+    EXPECT_EQ(true, dict_proxy.keyExists(Field((int64_t)10)));
+}
+
+TEST(cJSONTest, ListTupleDifferentiation) {
+    using namespace tuplex;
+    using namespace std;
+
+    // initialise test dict
+    codegen::cJSONDictProxyImpl dict_proxy;
+
+    vector<Field> vec{ Field("a"), Field("a"), Field("a") };
+    Tuple tup = Tuple::from_vector(vec);
+    List lis = List::from_vector(vec);
+
+    dict_proxy.putItem(Field("tup"), Field(tup));
+    dict_proxy.putItem(Field("lis"), Field(lis));
+
+    EXPECT_EQ(Field(tup), dict_proxy.getItem(Field("tup")));
+    EXPECT_EQ(Field(lis), dict_proxy.getItem(Field("lis")));
+    EXPECT_EQ(true, dict_proxy.getItem(Field("tup")).getType().isTupleType());
+    EXPECT_EQ(true, dict_proxy.getItem(Field("lis")).getType().isListType());
+
+    vector<Field> vec_2{ Field((double)3.14), Field((double)2.7), Field((double)1.414) };
+    Tuple tup_2 = Tuple::from_vector(vec_2);
+    List lis_2 = List::from_vector(vec_2);
+
+    dict_proxy.replaceItem(Field("lis"), Field(tup_2));
+    dict_proxy.replaceItem(Field("tup"), Field(lis_2));
+
+    EXPECT_EQ(Field(tup_2), dict_proxy.getItem(Field("lis")));
+    EXPECT_EQ(Field(lis_2), dict_proxy.getItem(Field("tup")));
+    EXPECT_EQ(true, dict_proxy.getItem(Field("tup")).getType().isListType());
+    EXPECT_EQ(true, dict_proxy.getItem(Field("lis")).getType().isTupleType());
+}
+
+TEST(cJSONTest, KeysValuesView) {
+    using namespace tuplex;
+    using namespace std;
+
+    // initialise test dict
+    codegen::cJSONDictProxyImpl dict_proxy;
+
+    vector<Field> vec_1{ Field("a"), Field("b"), Field("c") };
+    List lis = List::from_vector(vec_1);
+    vector<Field> vec_2{ Field("a"), Field((int64_t)10), Field(true) };
+    Tuple tup = Tuple::from_vector(vec_2);
+
+    std::cout << tup.desc() << "\n";
+
+    dict_proxy.putItem(Field((int64_t)15), Field("test"));
+    dict_proxy.putItem(Field((double)3.14), Field((int64_t)15));
+    dict_proxy.putItem(Field(false), Field((double)3.14));
+    dict_proxy.putItem(Field(tup), Field(false));
+    // dict_proxy.putItem(Field(lis), Field(tup));
+    dict_proxy.putItem(Field("list"), Field(lis));
+
+    vector<Field> keys{ Field((int64_t)15),
+                        Field((double)3.14),
+                        Field(false),
+                        Field(tup),
+                        // Field(lis),
+                        Field("list")
+                        };
+    
+    vector<Field> vals{ Field("test"),
+                        Field((int64_t)15),
+                        Field((double)3.14),
+                        Field(false),
+                        // Field(tup),
+                        Field(lis)
+                        };
+    
+    EXPECT_EQ(keys, dict_proxy.getKeysView());
+    EXPECT_EQ(vals, dict_proxy.getValuesView());
+
+    dict_proxy.replaceItem(Field((int64_t)15), Field((int64_t)150));
+    dict_proxy.deleteItem(Field(false));
+    dict_proxy.deleteItem(Field("list"));
+    dict_proxy.putItem(Field("new"), Field(true));
+
+    vector<Field> keys_2{ Field((int64_t)15),
+                          Field((double)3.14),
+                        //   Field(lis),
+                          Field("list"),
+                          Field("new") 
+                          };
+    
+    vector<Field> vals_2{ Field((int64_t)150),
+                          Field((int64_t)15),
+                        //   Field(tup),
+                          Field(lis),
+                          Field(true) 
+                          };
+
+    EXPECT_EQ(keys_2, dict_proxy.getKeysView());
+    EXPECT_EQ(vals_2, dict_proxy.getValuesView());
+}
\ No newline at end of file
diff --git a/tuplex/utils/CMakeLists.txt b/tuplex/utils/CMakeLists.txt
index 832d90167..e86ce50a8 100644
--- a/tuplex/utils/CMakeLists.txt
+++ b/tuplex/utils/CMakeLists.txt
@@ -48,8 +48,10 @@ if(NOT BUILD_WITH_AWS)
     include(FetchContent)
     set(FETCHCONTENT_BASE_DIR ${CMAKE_BINARY_DIR}/third_party/src)
     FetchContent_Declare(cJSON
-        GIT_REPOSITORY https://github.com/DaveGamble/cJSON.git
-        GIT_TAG        v1.7.14
+        # GIT_REPOSITORY https://github.com/DaveGamble/cJSON.git
+        # GIT_TAG        v1.7.14
+        GIT_REPOSITORY https://github.com/aletheia094/cJSON.git
+        GIT_TAG ada484892e39329d90c0519fb34a90a340309a60
         GIT_CONFIG advice.detachedHead=false
     )
     FetchContent_GetProperties(cJSON)
diff --git a/tuplex/utils/src/TypeSystem.cc b/tuplex/utils/src/TypeSystem.cc
index bc80963c3..d123f4e65 100644
--- a/tuplex/utils/src/TypeSystem.cc
+++ b/tuplex/utils/src/TypeSystem.cc
@@ -24,7 +24,8 @@
 
 namespace python {
     const Type Type::UNKNOWN = TypeFactory::instance().createOrGetPrimitiveType("unknown");
-    const Type Type::BOOLEAN = TypeFactory::instance().createOrGetPrimitiveType("boolean");
+    // const Type Type::BOOLEAN = TypeFactory::instance().createOrGetPrimitiveType("boolean");
+    const Type Type::BOOLEAN = TypeFactory::instance().createOrGetPrimitiveType("bool");
     const Type Type::I64 = TypeFactory::instance().createOrGetPrimitiveType("i64", {python::Type::BOOLEAN});
     const Type Type::F64 = TypeFactory::instance().createOrGetPrimitiveType("f64", {python::Type::I64});
     const Type Type::STRING = TypeFactory::instance().createOrGetPrimitiveType("str");