diff --git a/tuplex/codegen/include/ASTAnnotation.h b/tuplex/codegen/include/ASTAnnotation.h index 8512f4087..c26a0d7a0 100644 --- a/tuplex/codegen/include/ASTAnnotation.h +++ b/tuplex/codegen/include/ASTAnnotation.h @@ -48,8 +48,13 @@ class Symbol : public std::enable_shared_from_this { std::shared_ptr parent; ///! an optional abstract typer function which can be applied if the symboltype is function + ///! to deliver a concretely typed type based on the paramter type std::function functionTyper; + ///! an optional abstract typer that takes the original type of the caller (e.g., for an attribute) + ///! and provides then together with the parameterType symilar to functionTyper a concrete type for the attribute function + std::function attributeFunctionTyper; + ///! optionally constant data associated with that symbol tuplex::Field constantData; @@ -90,55 +95,43 @@ class Symbol : public std::enable_shared_from_this { auto generic_result = functionTyper(parameterType); if(generic_result != python::Type::UNKNOWN) { specializedFunctionType = generic_result; - assertFunctionDoesNotReturnGeneric(specializedFunctionType); return true; } - for(auto& type : types) { - // found symbol, now check its type - if(!type.isFunctionType()) - continue; - - auto tupleArgType = getTupleArg(type.getParamsType()); + // typer did not yield a result, hence try stored funciton types incl. upcasting + return findStoredTypedFunction(parameterType, specializedFunctionType); + } - // check if there's a direct type match => use that function then! - if(parameterType == tupleArgType) { - specializedFunctionType = type; - assertFunctionDoesNotReturnGeneric(specializedFunctionType); - return true; - } + /*! + * the typing of an attribute which is a function may be based on both the callerType and the parameters. I.e., + * this function helps to type an attribute x.a(p) where callerType = type(x) and parameterType = type(p) for some + * symbol a which is this. + * @param callerType + * @param parameterType + * @param specializedFunctionType where to store the concrete (non-generic!) output type! + * @return true if a specialized function type could be generated, false else. + */ + inline bool findAttributeFunctionType(const python::Type& callerType, + const python::Type& parameterType, + python::Type& specializedFunctionType) { + // fallback based typing: + // 1. check attribute typer + // 2. check general typer + auto typed_result = attributeFunctionTyper(callerType, parameterType); + if(python::Type::UNKNOWN == typed_result) { + typed_result = functionTyper(parameterType); } - // no direct match was found. Check whether casting would work or partial matching. - for(auto& type : types) { - // found symbol, now check its type - if (!type.isFunctionType()) - continue; - - auto tupleArgType = getTupleArg(type.getParamsType()); - - // check if given parameters type is compatible with function type? - // actual invocation is with parameterType - // ==> can we upcast them to fit the defined one OR does is partially work? - // e.g., when the function is defined for NULL, but we have opt? - if (isTypeCompatible(parameterType, tupleArgType)) { - specializedFunctionType = type; - - // specialize according to parameterType if it's a generic function so further typing works - assert(!specializedFunctionType.getReturnType().isGeneric()); - if(specializedFunctionType.getParamsType().isGeneric()) { - auto specializedParams = python::specializeGenerics(parameterType, tupleArgType); - specializedFunctionType = python::Type::makeFunctionType(specializedParams, - specializedFunctionType.getReturnType()); - } - - assertFunctionDoesNotReturnGeneric(specializedFunctionType); - return true; - } + // check if result is valid, then take it + if(typed_result != python::Type::UNKNOWN) { + specializedFunctionType = typed_result; + assertFunctionDoesNotReturnGeneric(specializedFunctionType); + return true; } - return false; + // typer did not yield a result, hence try stored funciton types incl. upcasting + return findStoredTypedFunction(parameterType, specializedFunctionType); } /*! @@ -169,7 +162,8 @@ class Symbol : public std::enable_shared_from_this { return full_name; } - Symbol() {} + Symbol() : functionTyper([](const python::Type&){return python::Type::UNKNOWN;}), + attributeFunctionTyper([](const python::Type&, const python::Type&){return python::Type::UNKNOWN;}) {} virtual ~Symbol() { _attributes.clear(); parent.reset(); @@ -216,17 +210,20 @@ class Symbol : public std::enable_shared_from_this { Symbol(std::string _name, std::function typer) : name(_name), qualifiedName(_name), - functionTyper(std::move(typer)), symbolType(SymbolType::FUNCTION) {} + functionTyper(std::move(typer)), attributeFunctionTyper([](const python::Type&, const python::Type&){return python::Type::UNKNOWN;}), symbolType(SymbolType::FUNCTION) {} Symbol(std::string _name, python::Type _type) : name(_name), qualifiedName(_name), types{_type}, - symbolType(_type.isFunctionType() ? SymbolType::FUNCTION : SymbolType::VARIABLE), functionTyper([](const python::Type&) { return python::Type::UNKNOWN; }) {} + symbolType(_type.isFunctionType() ? SymbolType::FUNCTION : SymbolType::VARIABLE), + functionTyper([](const python::Type&) { return python::Type::UNKNOWN; }), + attributeFunctionTyper([](const python::Type&, const python::Type&){return python::Type::UNKNOWN;}) {} Symbol(std::string _name, std::string _qualifiedName, python::Type _type, SymbolType _symbolType) : name(_name), qualifiedName(_qualifiedName), types{_type}, symbolType(_symbolType), - functionTyper([](const python::Type&) { return python::Type::UNKNOWN; }) {} + functionTyper([](const python::Type&) { return python::Type::UNKNOWN; }), + attributeFunctionTyper([](const python::Type&, const python::Type&){return python::Type::UNKNOWN;}) {} private: ///! i.e. to store something like re.search. re is then of module type. search will have a concrete function type. @@ -234,6 +231,55 @@ class Symbol : public std::enable_shared_from_this { /********* HELPER FUNCTIONS *************/ + inline bool findStoredTypedFunction(const python::Type& parameterType, python::Type& specializedFunctionType) { + + // typing using typer functions above failed, hence now search for concrete stored types. + for(auto& type : types) { + // found symbol, now check its type + if(!type.isFunctionType()) + continue; + + auto tupleArgType = getTupleArg(type.getParamsType()); + + // check if there's a direct type match => use that function then! + if(parameterType == tupleArgType) { + specializedFunctionType = type; + assertFunctionDoesNotReturnGeneric(specializedFunctionType); + return true; + } + } + + // no direct match was found. Check whether casting would work or partial matching. + for(auto& type : types) { + // found symbol, now check its type + if (!type.isFunctionType()) + continue; + + auto tupleArgType = getTupleArg(type.getParamsType()); + + // check if given parameters type is compatible with function type? + // actual invocation is with parameterType + // ==> can one upcast them to fit the defined one OR does is partially work? + // e.g., when the function is defined for NULL, but we have opt? + if (isTypeCompatible(parameterType, tupleArgType)) { + specializedFunctionType = type; + + // specialize according to parameterType if it's a generic function so further typing works + assert(!specializedFunctionType.getReturnType().isGeneric()); + if(specializedFunctionType.getParamsType().isGeneric()) { + auto specializedParams = python::specializeGenerics(parameterType, tupleArgType); + specializedFunctionType = python::Type::makeFunctionType(specializedParams, + specializedFunctionType.getReturnType()); + } + + assertFunctionDoesNotReturnGeneric(specializedFunctionType); + return true; + } + } + + return false; + } + /*! * helper function to check for compatibility, i.e. whether from type can be cast to to type. * @param from source type diff --git a/tuplex/codegen/include/AnnotatedAST.h b/tuplex/codegen/include/AnnotatedAST.h index 47c85f830..90e574de8 100644 --- a/tuplex/codegen/include/AnnotatedAST.h +++ b/tuplex/codegen/include/AnnotatedAST.h @@ -177,7 +177,7 @@ namespace tuplex { /*! * annotates the tree with final types. If this is not possible, returns false - * @param pokicy compiler policy + * @param policy compiler policy * @param silentMode determines whether the type inference should log out problems or not * @param removeBranches whether to use RemoveDeadBranchesVisitor to prune AST * @return whether types could be successfully annotated/defined for all AST nodes diff --git a/tuplex/codegen/include/BuiltinDictProxy.h b/tuplex/codegen/include/BuiltinDictProxy.h new file mode 100644 index 000000000..a37f2a634 --- /dev/null +++ b/tuplex/codegen/include/BuiltinDictProxy.h @@ -0,0 +1,90 @@ +//--------------------------------------------------------------------------------------------------------------------// +// // +// Tuplex: Blazing Fast Python Data Science // +// // +// // +// (c) 2017 - 2021, Tuplex team // +// Created by Leonhard Spiegelberg first on 8/9/2021 // +// License: Apache 2.0 // +//--------------------------------------------------------------------------------------------------------------------// +#ifndef TUPLEX_BUILTINDICTPROXY_H +#define TUPLEX_BUILTINDICTPROXY_H + +#include + +#include +#include +#include + +// TODO: Could also use a general object based system which would make things easier... +// -> i.e., sequence protocol strings/lists/... + +// basically for each object we need +// 1.) representation as C++ object (field) +// 2.) code-generated logic (i.e., codegen specialization) +// 3.) to/from python object + +namespace tuplex { + namespace codegen { + class BuiltinDictProxy { + public: + // BuiltinDictProxy (--> specializedDictType) + BuiltinDictProxy(const python::Type& specializedDictType) : _specializedType(specializedDictType) { + // use cJSON as default for now... + _impl = std::make_shared(); + } + + // use both codegen/non-codegen version + // putItem + BuiltinDictProxy& putItem(const Field& key, const Field& value) { assert(_impl); _impl->putItem(key, value); return *this; } + BuiltinDictProxy& putItem(const python::Type& keyType, const SerializableValue& key, const python::Type& valueType, const SerializableValue& value) { assert(_impl); _impl->putItem(keyType, key, valueType, value); return *this; } + +// // getItem +// BuiltinDictProxy& getItem(const Field& key); +// BuiltinDictProxy& getItem(const python::Type& keyType, const SerializableValue& key); +// +// // delItem +// BuiltinDictProxy& delItem(const Field& key); +// BuiltinDictProxy& delItem(const python::Type& keyType, const SerializableValue& key); +// +// // allocSize() --> helpful when dict size is known upfront, can be used for optimization. +// BuiltinDictProxy& allocSize(llvm::Value* size); + + // getKeyView() --> codegen object + + // getValuesView() --> codegen object + + python::Type dictType() const { + throw std::runtime_error("not yet implemented"); + } + + python::Type specializedDictType() const { + return _specializedType; + } + + // codegenToMemory + + // codegenFromMemory + // static function? + + // codegenSerializedLength + + // toMemory + + // fromMemory + // static function? + + // serializedLength + private: + python::Type _specializedType; + + // implementation... + // -> cJSON + // -> ... + // -> ... + std::shared_ptr _impl; + }; + } +} + +#endif //TUPLEX_BUILTINDICTPROXY_H diff --git a/tuplex/codegen/include/BuiltinDictProxyImpl.h b/tuplex/codegen/include/BuiltinDictProxyImpl.h new file mode 100644 index 000000000..6defe6d90 --- /dev/null +++ b/tuplex/codegen/include/BuiltinDictProxyImpl.h @@ -0,0 +1,41 @@ +//--------------------------------------------------------------------------------------------------------------------// +// // +// Tuplex: Blazing Fast Python Data Science // +// // +// // +// (c) 2017 - 2021, Tuplex team // +// Created by Leonhard Spiegelberg first on 8/9/2021 // +// License: Apache 2.0 // +//--------------------------------------------------------------------------------------------------------------------// + +#ifndef TUPLEX_BUILTINDICTPROXYIMPL_H +#define TUPLEX_BUILTINDICTPROXYIMPL_H + +#include +#include +#include + +namespace tuplex { + namespace codegen { + class BuiltinDictProxyImpl { + public: + // Q: what does virtual do ? + virtual void putItem(const Field& key, const Field& value) = 0; + virtual void putItem(const python::Type& keyType, const SerializableValue& key, const python::Type& valueType, const SerializableValue& value) = 0; + + virtual bool keyExists(const Field& key) = 0; + + virtual Field getItem(const Field& key) = 0; + + virtual void replaceItem(const Field& key, const Field& value) = 0; + + virtual void deleteItem(const Field& key) = 0; + + // virtual void getKeyView() = 0; + + // virtual void getValuesView() = 0; + }; + } +} + +#endif //TUPLEX_BUILTINDICTPROXYIMPL_H \ No newline at end of file diff --git a/tuplex/codegen/include/SymbolTable.h b/tuplex/codegen/include/SymbolTable.h index c6aa32f89..13a4abb76 100644 --- a/tuplex/codegen/include/SymbolTable.h +++ b/tuplex/codegen/include/SymbolTable.h @@ -171,9 +171,36 @@ namespace tuplex { * add an attribute to a builtin type, e.g. str.lower * @param builtinType * @param name - * @param type + * @param type type of the attribute, i.e. if it is a function type then a function symbol will be added. + * If it is not a function type a variable symbol will be added. + */ + void addBuiltinTypeAttribute(const python::Type& builtinType, + const std::string& name, + const python::Type& type); + + /*! + * add an attribute to a builtin type, e.g. dict.keys() + * @param builtinType to which type to add the function + * @param name name of the attribute + * @param typer a dynamic typing function + * @param sym_type what kind of symbol it is (function? variable?), needed because typer works for both. + */ + void addBuiltinTypeAttribute(const python::Type& builtinType, + const std::string& name, + std::function typer, + const SymbolType& sym_type); + + /*! + * add an attribute to a builtin type, e.g. dict.keys() + * @param builtinType to which type to add the function + * @param name name of the attribute + * @param typer a dynamic typing function + * @param sym_type what kind of symbol it is (function? variable?), needed because typer works for both. */ - void addBuiltinTypeAttribute(const python::Type& builtinType, const std::string& name, const python::Type& type); + void addBuiltinTypeAttribute(const python::Type& builtinType, + const std::string& name, + std::function attributeTyper, + const SymbolType& sym_type=SymbolType::FUNCTION); /*! * checks whether a symbol can be looked up or not diff --git a/tuplex/codegen/include/TypeAnnotatorVisitor.h b/tuplex/codegen/include/TypeAnnotatorVisitor.h index 922c5f1e2..298effefb 100644 --- a/tuplex/codegen/include/TypeAnnotatorVisitor.h +++ b/tuplex/codegen/include/TypeAnnotatorVisitor.h @@ -53,6 +53,10 @@ namespace tuplex { const TokenType tt, ASTNode* right, const python::Type& b); void assignHelper(NIdentifier *id, python::Type type); + + bool is_nested_subscript_target(ASTNode* target); + void recursive_set_subscript_types(NSubscription* target, python::Type value_type); + void checkRetType(python::Type t); /*! * Annotate iterator-related NCall with iterator-specific info diff --git a/tuplex/codegen/include/cJSONDictProxyImpl.h b/tuplex/codegen/include/cJSONDictProxyImpl.h new file mode 100644 index 000000000..cb461ea7c --- /dev/null +++ b/tuplex/codegen/include/cJSONDictProxyImpl.h @@ -0,0 +1,72 @@ +//--------------------------------------------------------------------------------------------------------------------// +// // +// Tuplex: Blazing Fast Python Data Science // +// // +// // +// (c) 2017 - 2021, Tuplex team // +// Created by Leonhard Spiegelberg first on 8/9/2021 // +// License: Apache 2.0 // +//--------------------------------------------------------------------------------------------------------------------// +#ifndef TUPLEX_CJSONDICTPROXYIMPL_H +#define TUPLEX_CJSONDICTPROXYIMPL_H + +#ifdef BUILD_WITH_AWS +#include +#else +#include +#endif +#include "optional.h" +#include + +namespace tuplex { + namespace codegen { + class cJSONDictProxyImpl : public BuiltinDictProxyImpl { + public: + // cJSONDictProxyImpl() : _root(nullptr) {} + // is there a reason we want to separate the initialisation of cjsondictproxy objects and the actual cjson object? + cJSONDictProxyImpl() : _root(cJSON_CreateObject()) {} + ~cJSONDictProxyImpl() { + if(_root) { + cJSON_free(_root); + _root = nullptr; + } + } + cJSONDictProxyImpl(const cJSONDictProxyImpl& other) = delete; + cJSONDictProxyImpl& operator = (const cJSONDictProxyImpl& other) = delete; + + void putItem(const Field& key, const Field& value) override; + void putItem(const python::Type& keyType, const SerializableValue& key, const python::Type& valueType, const SerializableValue& value) override; + + bool keyExists(const Field& key) override; + + Field getItem(const Field& key) override; + + void replaceItem(const Field& key, const Field& value) override; + + void deleteItem(const Field& key) override; + + // void getKeyView() override; + + // void getValuesView() override; + + // notes: + // for cJSON subscripting, need to perform + // SerializableValue BlockGeneratorVisitor::subscriptCJSONDictionary(NSubscription *sub, SerializableValue index, + // const python::Type &index_type, + // SerializableValue value) { + + private: + cJSON *_root; // a map of the elements + cJSON *_typeMap; // a map of strings -> types (nested) + + /*! + * returns a string representing a type prefix when storing type information in cJSON object as well. + * @param type + * @return + */ + static std::string typePrefix(const python::Type& type); + }; + } +} + +#endif //TUPLEX_CJSONDICTPROXYIMPL_H diff --git a/tuplex/codegen/src/SymbolTable.cc b/tuplex/codegen/src/SymbolTable.cc index 99f9bb9d6..ca29b85d2 100644 --- a/tuplex/codegen/src/SymbolTable.cc +++ b/tuplex/codegen/src/SymbolTable.cc @@ -346,6 +346,55 @@ namespace tuplex { addSymbol(make_shared("enumerate", enumerateFunctionTyper)); addSymbol(make_shared("next", nextFunctionTyper)); + // conversions for list/tuple + + auto list_ret_type = [](const python::Type& type) { + // list? trivial + if(type.isListType()) + return type; + + // what can be converted to/from list? + // -> homogenous tuple + + // TODO iterator... + + // -> string + if(type == python::Type::STRING) { + return python::Type::makeListType(python::Type::STRING); + } + if(type.isOptionType() && type.withoutOptions() == python::Type::STRING) { + return python::Type::makeListType(python::Type::makeOptionType(python::Type::STRING)); + } + + // -> keyview/valueview + if(type.isDictKeysType() || type.isDictValuesType()) { + // get dict type + auto dict_type = type.elementType(); + + if(type.isDictValuesType()) + return python::Type::makeListType(dict_type.valueType()); + if(type.isDictKeysType()) + return python::Type::makeListType(dict_type.keyType()); + } + + return python::Type::UNKNOWN; + }; + + addSymbol(make_shared("list", [&list_ret_type](const python::Type& parameterType) { + + python::Type type = parameterType; + + // param should be single tuple + if(parameterType.isTupleType() && parameterType.parameters().size() == 1) + type = parameterType.parameters().front(); + + auto ret_type = list_ret_type(type); + if(ret_type != python::Type::UNKNOWN) + return python::Type::makeFunctionType(parameterType, ret_type); + return python::Type::UNKNOWN; + })); + // tuple is special case -> need to speculate on list/str/sequence length! + // TODO: other parameters? i.e. step size and Co? // also, boolean, float? etc.? addSymbol("range", python::Type::makeFunctionType(python::Type::I64, python::Type::RANGE)); @@ -407,6 +456,31 @@ namespace tuplex { // for dict, list, tuple use generic type version! + // for keys()/values() use generic dict and let symbol table create specialized type on the fly using + // typer function + /** TODO: finish implementing! (c++ lambda to get correct result) **/ + { + addBuiltinTypeAttribute(python::Type::GENERICDICT, "keys", [](const python::Type& callerType, + const python::Type& parameterType) { + + assert(callerType.isDictionaryType() && callerType != python::Type::GENERICDICT); + // dict_view is always based on dictionary type + auto view_type = python::Type::makeDictKeysViewType(callerType); + + return python::Type::makeFunctionType(callerType, view_type); + }, SymbolType::FUNCTION); + + addBuiltinTypeAttribute(python::Type::GENERICDICT, "values", [](const python::Type& callerType, + const python::Type& parameterType) { + + assert(callerType.isDictionaryType() && callerType != python::Type::GENERICDICT); + // dict_view is always based on dictionary type + auto values_type = python::Type::makeDictValuesViewType(callerType); + + return python::Type::makeFunctionType(callerType, values_type); + }, SymbolType::FUNCTION); + } + // i.e. type depending on input // for pop/popitem things are actually a bit more complicated... @@ -477,7 +551,7 @@ namespace tuplex { python::Type::makeTupleType({dict_type.keyType(), dict_type.valueType()}))); } - + // for the weird case of the default object having different type than the dict value type, use tracing. // another good design for builtin functions could be: @@ -644,6 +718,72 @@ namespace tuplex { return addSymbol(make_shared(name, type)); } + void SymbolTable::addBuiltinTypeAttribute(const python::Type &builtinType, const std::string &name, + std::function attributeTyper, + const SymbolType &sym_type) { + using namespace std; + assert(sym_type == SymbolType::VARIABLE || sym_type == SymbolType::FUNCTION); + + // this seems wrong, need to perform the lookup directly... + // use desc as name + auto scope = currentScope(); + auto it = scope->symbols.find(builtinType.desc()); + if(it == scope->symbols.end()) { + auto sym = make_shared(); + sym->name = sym->qualifiedName = builtinType.desc(); + scope->symbols[builtinType.desc()] = sym; + + it = scope->symbols.find(builtinType.desc()); + assert(it != scope->symbols.end()); + } + auto sym_att = it->second->findAttribute(name); + if(!sym_att) { + it->second->addAttribute(make_shared(name, name, builtinType, sym_type)); + sym_att = it->second->findAttribute(name); + } else { + // replace symbol, there can be only one symbol with a typer function + if(sym_type != sym_att->symbolType) + throw std::runtime_error("symbol can only have one kind of types associated with it!"); + assert(sym_att->qualifiedName == name); + sym_att->name = name; + } + assert(sym_att); + sym_att->parent = scope->symbols[name]; + sym_att->attributeFunctionTyper = attributeTyper; + } + + void SymbolTable::addBuiltinTypeAttribute(const python::Type &builtinType, const std::string &name, + std::function typer, + const SymbolType& sym_type = SymbolType::VARIABLE) { + using namespace std; + assert(sym_type == SymbolType::VARIABLE || sym_type == SymbolType::FUNCTION); + + // this seems wrong, need to perform the lookup directly... + // use desc as name + auto scope = currentScope(); + auto it = scope->symbols.find(builtinType.desc()); + if(it == scope->symbols.end()) { + scope->symbols[builtinType.desc()] = make_shared(builtinType.desc(), typer); + it = scope->symbols.find(builtinType.desc()); + assert(it != scope->symbols.end()); + } + auto sym_att = it->second->findAttribute(name); + if(!sym_att) { + it->second->addAttribute(make_shared(name, name, builtinType, sym_type)); + sym_att = it->second->findAttribute(name); + } else { + // replace symbol, there can be only one symbol with a typer function + if(sym_type != sym_att->symbolType) + throw std::runtime_error("symbol can only have one kind of types associated with it!"); + assert(sym_att->qualifiedName == name); + sym_att->name = name; + } + assert(sym_att); + sym_att->parent = scope->symbols[name]; + sym_att->functionTyper = typer; + } + void SymbolTable::addBuiltinTypeAttribute(const python::Type &builtinType, const std::string &name, const python::Type &type) { // this seems wrong, need to perform the lookup directly... @@ -729,7 +869,10 @@ namespace tuplex { return python::Type::UNKNOWN; } - static python::Type typeAttribute(std::shared_ptr sym, std::string attribute, python::Type parameterType) { + static python::Type typeAttribute(std::shared_ptr sym, + std::string attribute, + python::Type parameterType, + python::Type objectType) { if(sym) { auto attr_sym = sym->findAttribute(attribute); @@ -739,7 +882,7 @@ namespace tuplex { // else, return single type return attr_sym->type(); python::Type funcType = python::Type::UNKNOWN; - attr_sym->findFunctionTypeBasedOnParameterType(parameterType, funcType); // ignore ret value. + attr_sym->findAttributeFunctionType(objectType, parameterType, funcType); // ignore ret value. return funcType; } } @@ -788,7 +931,7 @@ namespace tuplex { auto name = type.desc(); auto sym = findSymbol(name); - resultType = typeAttribute(sym, attribute, parameterType); + resultType = typeAttribute(sym, attribute, parameterType, type); if(resultType != python::Type::UNKNOWN) return resultType; @@ -802,7 +945,7 @@ namespace tuplex { if(type.isDictionaryType() || type == python::Type::EMPTYDICT) name = python::Type::GENERICDICT.desc(); sym = findSymbol(name); - resultType = typeAttribute(sym, attribute, parameterType); + resultType = typeAttribute(sym, attribute, parameterType, type); } return resultType; diff --git a/tuplex/codegen/src/TypeAnnotatorVisitor.cc b/tuplex/codegen/src/TypeAnnotatorVisitor.cc index dd19474e7..ccff1262f 100644 --- a/tuplex/codegen/src/TypeAnnotatorVisitor.cc +++ b/tuplex/codegen/src/TypeAnnotatorVisitor.cc @@ -826,7 +826,7 @@ namespace tuplex { void TypeAnnotatorVisitor::visit(NDictionary* dict) { ApatheticVisitor::visit(dict); - + // Try to make it Dictionary[Key, Val] type (if every pair has the same key type and val type, respectively) bool is_key_val = true; python::Type keyType, valType; @@ -835,8 +835,20 @@ namespace tuplex { valType = dict->_pairs[0].second->getInferredType(); // save the key type, val type of the first pair for(const auto& p: dict->_pairs) { // check if every pair has the same key type, val type if(p.first->getInferredType() != keyType || p.second->getInferredType() != valType) { - is_key_val = false; // if they are not the same, then it is not of type Dictionary[Key, Val] - break; + // also for None case + if (valType.isDictionaryType() && p.second->getInferredType() == python::Type::EMPTYDICT) { + continue; + } else if (valType == python::Type::EMPTYDICT && p.second->getInferredType().isDictionaryType()) { + // upcast valType + valType = p.second->getInferredType(); + } else if (valType == python::Type::NULLVALUE) { + valType = python::Type::makeOptionType(p.second->getInferredType()); + } else if (p.second->getInferredType() == python::Type::NULLVALUE) { + valType = python::Type::makeOptionType(valType); + } else { + is_key_val = false; // if they are not the same, then it is not of type Dictionary[Key, Val] + break; + } } } @@ -1208,8 +1220,19 @@ namespace tuplex { // we are now inside a loop; no type change detected yet // check potential type change during loops if(_nameTable.find(id->_name) != _nameTable.end() && type != _nameTable.at(id->_name)) { - error("variable " + id->_name + " changed type during loop from " + _nameTable.at(id->_name).desc() + " to " + type.desc() + ", traced typing needed to determine if the type change is stable"); - _loopTypeChange = true; + + // special case: + // emptylist, emptydict (and emptyset) can get promoted + auto type_of_named = _nameTable.at(id->_name); + if((type_of_named == python::Type::EMPTYLIST && type.isListType()) || + (type_of_named == python::Type::EMPTYDICT && type.isDictionaryType()) ) { + // || (type_of_named == python::Type::EMPTYSET && type.isSetType()) + auto& logger = Logger::instance().logger("codegen"); + logger.debug("promoting " + id->_name + " from " + _nameTable.at(id->_name).desc() + " to " + type.desc()); + } else { + error("variable " + id->_name + " changed type during loop from " + _nameTable.at(id->_name).desc() + " to " + type.desc() + ", traced typing needed to determine if the type change is stable"); + _loopTypeChange = true; + } } } @@ -1220,69 +1243,158 @@ namespace tuplex { _nameTable[id->_name] = type; } - void TypeAnnotatorVisitor::visit(NAssign *assign) { - ApatheticVisitor::visit(assign); + bool TypeAnnotatorVisitor::is_nested_subscript_target(ASTNode* target) { + // check if target is a subscript target + return target->type() == ASTNodeType::Subscription; + } + + void TypeAnnotatorVisitor::recursive_set_subscript_types(NSubscription* target, python::Type value_type) { + target->_expression->accept(*this); + python::Type subscription_type = value_type; + python::Type index_type = target->_expression->getInferredType(); + python::Type new_value_type = python::Type::makeDictionaryType(index_type, subscription_type); + + if (target->_value->type() == ASTNodeType::Subscription) { + /* if the next target is a subscription, do recursive_set_subscript_types + on the next target, with value_type being Dict[index_type, value_type] */ + recursive_set_subscript_types((NSubscription*)target->_value, new_value_type); + } else if (target->_value->type() == ASTNodeType::Identifier) { + // if the next target is an identifier (e.g. d[0]) + NIdentifier* id = (NIdentifier*)target->_value; + // check if the type the identifier maps to is something subscriptable (for now, just a dictionary) + if (_nameTable[id->_name].isDictionaryType()) { + python::Type curr_type = _nameTable[id->_name]; + + if (curr_type == python::Type::EMPTYDICT) { + // we can just upcast type to Dict[index_type, value_type] + assignHelper(id, new_value_type); + } else if (curr_type == python::Type::GENERICDICT) { + // type remains generic dict (and need to set flag in annotator?) + subscription_type = python::Type::PYOBJECT; + } else { + // check if index_type and new_value_type match current index type and value type + if (curr_type.keyType() != index_type) { + // upcast index type to PYOBJECT and set flag + index_type = python::Type::PYOBJECT; + } + + if (curr_type.valueType() != value_type) { + if (curr_type.valueType().isOptionType()) { + // case where dictionary values are nullable + // check if non-null option is the same type as value_type + if (curr_type.valueType().elementType() == value_type) { + // need to make subscription_type an option type instead + subscription_type = python::Type::makeOptionType(subscription_type); + } else { + // upcast value type to PYOBJECT and set flag + subscription_type = python::Type::PYOBJECT; + } + } else { + // upcast value type to PYOBJECT and set flag + subscription_type = python::Type::PYOBJECT; + } + } + + new_value_type = python::Type::makeDictionaryType(index_type, subscription_type); + assignHelper(id, new_value_type); + } + } else { + // otherwise, raise an error (identifier type not subscriptable) + error("only dictionary subscription supported; " + _nameTable[id->_name].desc() + " not (yet) supported"); + } + } else { + // otherwise, need to check if final type of expression is something subscriptable (just dictionary for now) + // else: raise error (can't subscript type) + target->_value->accept(*this); + if (!target->_value->getInferredType().isDictionaryType()) { + error(target->_value->getInferredType().desc() + " is not (yet) subscriptable; only dictionaries supported"); + } + } + + // set type of subscription node (target) + target->setInferredType(subscription_type); + } + void TypeAnnotatorVisitor::visit(NAssign *assign) { // now interesting part comes // check what left side is - // TODO cases - /** - * id = id - * id, id, ... = id/val - * id, id, ... = id, val, ... (SPECIAL CASE even here for a, b = b, a) - */ - if(assign->_target->type() == ASTNodeType::Identifier) { - // Single identifier case - //@Todo: check that symbol table contains target! - - // then check if identifier is already within symbol table. If not, add! - NIdentifier* id = (NIdentifier*)assign->_target; - assignHelper(id, assign->_value->getInferredType()); - if(assign->_value->getInferredType().isIteratorType()) { - id->annotation().iteratorInfo = assign->_value->annotation().iteratorInfo; - _iteratorInfoTable[id->_name] = assign->_value->annotation().iteratorInfo; - } - } else if(assign->_target->type() == ASTNodeType::Tuple) { - // now we have a tuple assignment! - // the right hand side MUST be some unpackable thing. Currently this is a tuple but later we will - // have lists as well - NTuple *ids = (NTuple *) assign->_target; - auto rhsInferredType = assign->_value->getInferredType(); - // TODO add support for dictionaries, etc. - if (rhsInferredType.isTupleType()) { - // get the types contained in our tuple - std::vector tupleTypes = rhsInferredType.parameters(); - if(ids->_elements.size() != tupleTypes.size()) { - error("Incorrect number of arguments to unpack in assignment"); + if (is_nested_subscript_target(assign->_target)) { + assert(assign->_target->type() == ASTNodeType::Subscription); + + NSubscription* sub_node = (NSubscription*) assign->_target; + + // visit b's tree + assign->_value->accept(*this); + + auto value_type = assign->_value->getInferredType(); + + // recursively set types for each subscription target + recursive_set_subscript_types(sub_node, value_type); + + // set assign type to value type + assign->setInferredType(value_type); + } else { + ApatheticVisitor::visit(assign); + // TODO cases + /** + * id = id + * id, id, ... = id/val + * id, id, ... = id, val, ... (SPECIAL CASE even here for a, b = b, a) + */ + if(assign->_target->type() == ASTNodeType::Identifier) { + // Single identifier case + //@Todo: check that symbol table contains target! + + // then check if identifier is already within symbol table. If not, add! + NIdentifier* id = (NIdentifier*)assign->_target; + assignHelper(id, assign->_value->getInferredType()); + if(assign->_value->getInferredType().isIteratorType()) { + id->annotation().iteratorInfo = assign->_value->annotation().iteratorInfo; + _iteratorInfoTable[id->_name] = assign->_value->annotation().iteratorInfo; } + } else if(assign->_target->type() == ASTNodeType::Tuple) { + // now we have a tuple assignment! + // the right hand side MUST be some unpackable thing. Currently this is a tuple but later we will + // have lists as well + NTuple *ids = (NTuple *) assign->_target; + auto rhsInferredType = assign->_value->getInferredType(); + // TODO add support for dictionaries, etc. + if (rhsInferredType.isTupleType()) { + // get the types contained in our tuple + std::vector tupleTypes = rhsInferredType.parameters(); + if(ids->_elements.size() != tupleTypes.size()) { + error("Incorrect number of arguments to unpack in assignment"); + } - for(unsigned long i = 0; i < ids->_elements.size(); i ++) { - auto elt = ids->_elements[i]; - if(elt->type() != ASTNodeType::Identifier) { - error("Trying to assign to a non identifier in a tuple"); + for(unsigned long i = 0; i < ids->_elements.size(); i ++) { + auto elt = ids->_elements[i]; + if(elt->type() != ASTNodeType::Identifier) { + error("Trying to assign to a non identifier in a tuple"); + } + NIdentifier *id = (NIdentifier *) elt; + // assign each identifier to the type in the tuple at the corresponding index + assignHelper(id, tupleTypes[i]); } - NIdentifier *id = (NIdentifier *) elt; - // assign each identifier to the type in the tuple at the corresponding index - assignHelper(id, tupleTypes[i]); - } - } else if(rhsInferredType == python::Type::STRING) { - for(const auto& elt : ids->_elements) { - if(elt->type() != ASTNodeType::Identifier) { - error("Trying to assign to a non identifier in a tuple"); + } else if(rhsInferredType == python::Type::STRING) { + for(const auto& elt : ids->_elements) { + if(elt->type() != ASTNodeType::Identifier) { + error("Trying to assign to a non identifier in a tuple"); + } + NIdentifier *id = (NIdentifier *) elt; + assignHelper(id, python::Type::STRING); } - NIdentifier *id = (NIdentifier *) elt; - assignHelper(id, python::Type::STRING); + } else { + error("bad type annotation in tuple assign"); } } else { - error("bad type annotation in tuple assign"); + error("only assignment to tuples/identifiers supported yet!!!"); + // error("only assignment to tuples/identifiers/subscriptions supported yet!!!"); } - } else { - error("only assignment to tuples/identifiers supported yet!!!"); + // in all cases, set the type of the entire assign + // TODO we def want this in the single identifier case, but in general? + assign->setInferredType(assign->_target->getInferredType()); } - // in all cases, set the type of the entire assign - // TODO we def want this in the single identifier case, but in general? - assign->setInferredType(assign->_target->getInferredType()); } void TypeAnnotatorVisitor::resolveNameConflicts(const std::unordered_map &table) { @@ -1651,6 +1763,26 @@ namespace tuplex { } else if(exprType.isIteratorType()) { _nameTable[id->_name] = exprType.yieldType(); id->setInferredType(exprType.yieldType()); + } else if(exprType.isDictValuesType()) { + auto dict_type = exprType.elementType(); + auto yield_type = dict_type.valueType(); + if(yield_type == python::Type::PYOBJECT || yield_type == python::Type::UNKNOWN) { + // might require unrolling & speculation on view length! + addCompileError(CompileError::TYPE_ERROR_UNSUPPORTED_LOOP_TESTLIST_TYPE); + return; + } + _nameTable[id->_name] = yield_type; + id->setInferredType(yield_type); + } else if(exprType.isDictKeysType()) { + auto dict_type = exprType.elementType(); + auto yield_type = dict_type.keyType(); + if(yield_type == python::Type::PYOBJECT || yield_type == python::Type::UNKNOWN) { + // might require unrolling & speculation on view length! + addCompileError(CompileError::TYPE_ERROR_UNSUPPORTED_LOOP_TESTLIST_TYPE); + return; + } + _nameTable[id->_name] = yield_type; + id->setInferredType(yield_type); } else { addCompileError(CompileError::TYPE_ERROR_UNSUPPORTED_LOOP_TESTLIST_TYPE); } diff --git a/tuplex/codegen/src/cJSONDictProxyImpl.cc b/tuplex/codegen/src/cJSONDictProxyImpl.cc new file mode 100644 index 000000000..d0a2d634b --- /dev/null +++ b/tuplex/codegen/src/cJSONDictProxyImpl.cc @@ -0,0 +1,206 @@ +//--------------------------------------------------------------------------------------------------------------------// +// // +// Tuplex: Blazing Fast Python Data Science // +// // +// // +// (c) 2017 - 2021, Tuplex team // +// Created by Leonhard Spiegelberg first on 8/9/2021 // +// License: Apache 2.0 // +//--------------------------------------------------------------------------------------------------------------------// +#include + +namespace tuplex { + namespace codegen { + // in general cJSON supports following data types: + // string + // number + // boolean + // null + // object + // array + // --> yet type info from python might get lost. Hence, store it when possible as well! + + // this is a general helper function to turn a Field into a cJSON object + /*! + * converts a field into a cJSON object. If not convertible, returns nullptr. + * @param f Field + * @param includeTypePrefix + * @return cJSON* object + */ + cJSON* fieldToCJSON(const Field& f, bool includeTypePrefix=false) { + // initialise cJSON object + cJSON* cjson_obj = nullptr; + + // check type of Field, create corresponding cJSON type object + if (f.getType() == python::Type::BOOLEAN) { + if (f.getInt() > 0) { + cjson_obj = cJSON_CreateTrue(); + } else { + cjson_obj = cJSON_CreateFalse(); + } + } else if (f.getType() == python::Type::F64) { + cjson_obj = cJSON_CreateNumber(f.getDouble()); + } else if (f.getType() == python::Type::I64) { + // should I be upcasting? + cjson_obj = cJSON_CreateNumber((double)f.getInt()); + } else if (f.getType() == python::Type::STRING) { + assert(f.getPtr()); + cjson_obj = cJSON_CreateString((const char*)f.getPtr()); + } else if (f.getType().isListType()) { + assert(f.getPtr()); + + tuplex::List* lis = (tuplex::List*)f.getPtr(); + cjson_obj = cJSON_CreateArray(); + + for (int i = 0; i < lis->numElements(); i++) { + // retrieve ith element from list + Field element = lis->getField(i); + // convert to cJSON object + cJSON* cjson_elt = fieldToCJSON(element); + + // add element to cJSON array + cJSON_AddItemToArray(cjson_obj, cjson_elt); + } + } else if (f.getType().isTupleType()) { + assert(f.getPtr()); + + tuplex::Tuple* tup = (tuplex::Tuple*)f.getPtr(); + cjson_obj = cJSON_CreateArray(); + + for (int i = 0; i < tup->numElements(); i++) { + // retrieve ith element from tuple + Field element = tup->getField(i); + // convert to cJSON object + cJSON* cjson_elt = fieldToCJSON(element); + + // add element to cJSON array + cJSON_AddItemToArray(cjson_obj, cjson_elt); + } + } else if (f.getType() == python::Type::NULLVALUE) { + cjson_obj = cJSON_CreateNull(); + } else { + // throw std::runtime_error("cannot change value with type " + value.getType().desc() + " into cJSON object"); + } + + return cjson_obj; + } + + Field cJSONToField(const cJSON* object) { + assert(object); + + Field ret = Field::null(); + + if (cJSON_IsNumber(object)) { + ret = Field(cJSON_GetNumberValue(object)); + } else if (cJSON_IsString(object)) { + ret = Field(cJSON_GetStringValue(object)); + } else if (cJSON_IsTrue(object)) { + ret = Field(true); + } else if (cJSON_IsFalse(object)) { + ret = Field(false); + } else if (cJSON_IsNull(object)) { + ret = Field::null(); + } else if (cJSON_IsArray(object)) { + throw std::runtime_error("not yet implemented..."); + } else if (cJSON_IsObject(object)) { + throw std::runtime_error("not yet implemented..."); + } + + return ret; + } + + std::string cJSONDictProxyImpl::typePrefix(const python::Type& type) { + + // init map for a couple common types (int, float, bool, ...) + + // since keys in JSON are always strings, need to store type info in that string! + return ""; + } + + void cJSONDictProxyImpl::putItem(const Field &key, const Field &value) { + // put into cJSON, yet due to both key/type being not necessary type stable, encode type as base64 into values! + // map primitive types directly into cJSON if possible + if(!_root) + // _root = cJSON_CreateObject(); + throw std::runtime_error("cannot use putItem on an uninitialised dictionary"); + + cJSON* to_add = fieldToCJSON(value); + if (!to_add) { + throw std::runtime_error("item to add not convertible to cJSON object"); + } + + // add to cJSON object + // TODO: what's the difference between key.desc and getting the key's ptr value? + // A: key.desc gets the string of the Field regardless of the type of the Field + cJSON_AddItemToObject(_root, key.desc().c_str(), to_add); + + // type prefix + + // throw std::runtime_error("to implement..."); + } + + void cJSONDictProxyImpl::putItem(const python::Type &keyType, const SerializableValue &key, + const python::Type &valueType, const SerializableValue &value) { + if(!_root) + _root = cJSON_CreateObject(); + + throw std::runtime_error("to implement..."); + } + + bool cJSONDictProxyImpl::keyExists(const Field& key) { + if(!_root) + throw std::runtime_error("cannot use keyExists on an uninitialised dictionary"); + + cJSON* res = cJSON_GetObjectItemCaseSensitive(_root, key.desc().c_str()); + + return (res != NULL); + } + + Field cJSONDictProxyImpl::getItem(const Field& key) { + if (!_root) + throw std::runtime_error("cannot use getItem on an uninitialised dictionary"); + + // retrieve value from dict + cJSON* item = cJSON_GetObjectItemCaseSensitive(_root, key.desc().c_str()); + + if (!item) + throw std::runtime_error("error retrieving value from cJSON dictionary"); + + // convert into Field + Field field_item = cJSONToField(item); + + return field_item; + } + + void cJSONDictProxyImpl::replaceItem(const Field& key, const Field& value) { + if (!_root) + throw std::runtime_error("cannot use replaceItem on an uninitialised dictionary"); + + // assert(key.getType() == python::Type::STRING); + + // attempt to retrieve value from dict + cJSON* item = cJSON_GetObjectItemCaseSensitive(_root, key.desc().c_str()); + + if (!item) { + // key doesn't already exist; simply perform putItem instead (?) + putItem(key, value); + } else { + // replace value at key + cJSON* new_item = fieldToCJSON(value); + if (!new_item) { + throw std::runtime_error("new item not convertible to cJSON object"); + } + + cJSON_ReplaceItemInObjectCaseSensitive(_root, key.desc().c_str(), new_item); + } + } + + void cJSONDictProxyImpl::deleteItem(const Field& key) { + if (!_root) + throw std::runtime_error("cannot use deleteItem on an uninitialised dictionary"); + + // delete value from dict + cJSON_DeleteItemFromObjectCaseSensitive(_root, (const char*)key.desc().c_str()); + } + } +} diff --git a/tuplex/test/CMakeLists.txt b/tuplex/test/CMakeLists.txt index 3f3721780..3e422d54b 100755 --- a/tuplex/test/CMakeLists.txt +++ b/tuplex/test/CMakeLists.txt @@ -78,6 +78,7 @@ add_subdirectory(io) add_subdirectory(runtime) add_subdirectory(adapters) add_subdirectory(utils) +add_subdirectory(dict) # these require python, so only if embed is active! if(Python3_Embed_FOUND) diff --git a/tuplex/test/core/DictionaryFunctions.cc b/tuplex/test/core/DictionaryFunctions.cc index 955014748..01e63533e 100644 --- a/tuplex/test/core/DictionaryFunctions.cc +++ b/tuplex/test/core/DictionaryFunctions.cc @@ -13,6 +13,8 @@ #include "../../utils/include/Utils.h" #include "TestUtils.h" #include "RuntimeInterface.h" +#include +#include // need for these tests a running python interpreter, so spin it up class DictionaryFunctions : public PyTest {}; @@ -496,4 +498,22 @@ TEST_F(DictionaryFunctions, EmptyDict) { // .pop(val) KeyError // ==> left for later testing because it's a bit more complicated... #warning "implement fast, special functions for empty dict..." +} + +TEST_F(DictionaryFunctions, DictCount) { + using namespace tuplex; + auto code = "def count(L):\n" + " d = {}\n" + " for x in L:\n" + " if x not in d.keys():\n" + " d[x] = 0\n" + " d[x] += 1\n" + " return d"; + + auto root = std::unique_ptr(parseToAST(code)); + EXPECT_TRUE(root.get()); + + GraphVizGraph graph; + graph.createFromAST(root.get(), true); + graph.saveAsPDF("/home/rgoyal6/tuplex/tuplex/build/dict_count.pdf"); } \ No newline at end of file diff --git a/tuplex/test/core/DictionaryTyping.cc b/tuplex/test/core/DictionaryTyping.cc new file mode 100644 index 000000000..bb44dee6c --- /dev/null +++ b/tuplex/test/core/DictionaryTyping.cc @@ -0,0 +1,691 @@ +//--------------------------------------------------------------------------------------------------------------------// +// // +// Tuplex: Blazing Fast Python Data Science // +// // +// // +// (c) 2017 - 2021, Tuplex team // +// Created by Leonhard Spiegelberg first on 1/1/2021 // +// License: Apache 2.0 // +//--------------------------------------------------------------------------------------------------------------------// + +#include +#include +#include +#include +#include +#include +#include +#include + +// TEST(DictionaryTyping, Template) { +// using namespace tuplex; +// using namespace std; + +// auto code = ""; + +// // parse code to AST +// auto ast = tuplex::codegen::AnnotatedAST(); +// ast.parseString(code); + +// // make input typing +// python::Type inputType = python::Type::PYOBJECT; + +// // create symbol table (add parameters and types) +// ast.addTypeHint("L", inputType); +// ast.defineTypes(codegen::DEFAULT_COMPILE_POLICY); + +// // print type annotated ast +// GraphVizGraph graph; +// graph.createFromAST(ast.getFunctionAST(), true); +// graph.saveAsPDF("/home/rgoyal6/tuplex/tuplex/build/dictionary_asts/.pdf"); + +// cout<<"return type of function is: "< 5:\n" + " d[i % 2] += 5\n" + " else:\n" + " d[i % 2] += i\n" + " return d"; + + // parse code to AST + auto ast = tuplex::codegen::AnnotatedAST(); + ast.parseString(code); + + // make input typing + python::Type inputType = python::Type::makeListType(python::Type::I64); + + // create symbol table (add parameters and types) + ast.addTypeHint("L", inputType); + ast.defineTypes(codegen::DEFAULT_COMPILE_POLICY); + + // print type annotated ast + GraphVizGraph graph; + graph.createFromAST(ast.getFunctionAST(), true); + graph.saveAsPDF("/home/rgoyal6/tuplex/tuplex/build/dictionary_asts/control_flow_key_assign.pdf"); + + cout<<"return type of function is: "< needs speculation. + // test count UDF + auto count_c = "def count_keys(x):\n" + " d = {'A':10, 'B': 10, x: 20}\n" + " return d.keys()"; + + // parse code to AST + auto ast = tuplex::codegen::AnnotatedAST(); + ast.parseString(count_c); + + // make typing + python::Type inputType = python::Type::STRING; + + // create symbol table + ast.addTypeHint("x", inputType); + ast.defineTypes(codegen::DEFAULT_COMPILE_POLICY); + + // print type annotated ast + GraphVizGraph graph; + graph.createFromAST(ast.getFunctionAST(), true); + graph.saveAsPDF("dict_count_keys.pdf"); + + cout<<"return type of function is: "< needs speculation. + + // test count UDF + auto count_c = "def count_keys(x):\n" + " d = {'A':10, 'B': 10, x: 20}\n" + " return list(d.keys())"; + + // parse code to AST + auto ast = tuplex::codegen::AnnotatedAST(); + ast.parseString(count_c); + + // make typing + python::Type inputType = python::Type::STRING; + + // create symbol table + ast.addTypeHint("x", inputType); + ast.defineTypes(codegen::DEFAULT_COMPILE_POLICY); + + cout<<"return type of function is: "< +// #include "gtest/gtest.h" + +// class DictProxyTest : public TuplexTest {}; + +// // helper function to generate combinations with repititions +// template void combinations_r_recursive(const std::vector &elements, std::size_t combination_length, +// std::vector &pos, unsigned long depth, +// unsigned long margin, std::vector>& result) { +// // Have we selected the number of required elements? +// if (depth >= combination_length) { +// std::vector combination; +// combination.reserve(combination_length); +// for(unsigned long ii = 0; ii < pos.size(); ++ii) +// combination.push_back(elements[pos[ii]]); +// combination.shrink_to_fit(); +// result.push_back(combination); +// return; +// } + +// // Try to select new elements to the right of the last selected one. +// for (unsigned long ii = margin; ii < elements.size(); ++ii) { +// pos[depth] = ii; +// combinations_r_recursive(elements, combination_length, pos, depth + 1, ii, result); +// } +// } + +// template std::vector> combinations_with_repetition(const std::vector &elements, size_t combination_length) { +// assert(combination_length <= elements.size()); +// std::vector positions(combination_length, 0); +// std::vector> result; +// combinations_r_recursive(elements, combination_length, positions, 0, 0, result); + +// return result; +// } + + + +// TEST_F(DictProxyTest, PutItemTest) { +// using namespace tuplex; +// using namespace std; + +// // testing the non-codegenerated put item test + + +// // tests to write: + +// // 1. heterogenous dict -> basically use modified JSON as in-memory storage format. +// // 2. homogenous keytype dict -> can encode dict directly & serialize it more efficiently. Represent in-memory as hash table specialized depending on type. +// // 3. homogenous valuetype -> ignore case, specialize to 1. +// // 4. compile-time known keys/restricted keyset, keys do not change. -> struct type with fixed offsets! + +// // put and get +// auto dict_fun_code = "def f(a, b, c, d):\n" +// " M = dict()\n" +// " M[a] = b\n" +// " M[c] = d\n" +// " return M, M[a], M[c]\n"; + +// codegen::BuiltinDictProxy dict_proxy(python::Type::UNKNOWN); + +// // create test setups (4 values, all combos) +// vector test_values{Field((int64_t)0), Field(10.0), Field(false), Field::null(), Field("hello world"), Field(Tuple(10, 20)), Field(Tuple(3.141, 10, false, "test")), Field(List(1.0, 3.0, 4.0))}; + +// // NOTE: list/dict is not hashable in python! +// // + +// // create combos +// // 4 ^ len(test_values) + + +// // what about nested dicts? +// // -> unflatten? +// // --> unflatten using combined keys? i.e. a/b/c ? which char to use as separator? +// // maybe start with non-nested dicts. +// // dicts should be able to store lists etc. + +// auto combos = combinations_with_repetition(test_values, 4); + +// cout<<"Generated "< can be checked dynamically at runtime. I.e., good for read-only dictionaries, rarely changed ones. etc. --> requires dispatch dictionary for each type for dynamic types. Constants can be translated during compile time. +// // -> because dicts support in syntax, need to keep additional bitmap to check whether there's a valid entry or not! + + +// // 2. fixed key type/value type dicts -> can be used in dynamic settings. E.g., when accumulating things! + +// // 3. other usage should be esoteric... + +// } diff --git a/tuplex/test/dict/cJSONTest.cc b/tuplex/test/dict/cJSONTest.cc new file mode 100644 index 000000000..22e0c0a00 --- /dev/null +++ b/tuplex/test/dict/cJSONTest.cc @@ -0,0 +1,125 @@ +//--------------------------------------------------------------------------------------------------------------------// +// // +// Tuplex: Blazing Fast Python Data Science // +// // +// // +// (c) 2017 - 2021, Tuplex team // +// Created by Leonhard Spiegelberg first on 8/9/2021 // +// License: Apache 2.0 // +//--------------------------------------------------------------------------------------------------------------------// + +#include +#include "gtest/gtest.h" + +TEST(cJSONTest, PutItemTest) { + using namespace tuplex; + using namespace std; + + // testing non-codegenerated put item + // initialise test dict + codegen::cJSONDictProxyImpl dict_proxy; + + EXPECT_EQ(false, dict_proxy.keyExists(Field((int64_t)10))); + + // put test values into test dict + dict_proxy.putItem(Field((int64_t)10), Field("a")); + dict_proxy.putItem(Field((int64_t)20), Field("b")); + + EXPECT_EQ(true, dict_proxy.keyExists(Field((int64_t)10))); + EXPECT_EQ(true, dict_proxy.keyExists(Field((int64_t)20))); + EXPECT_EQ(false, dict_proxy.keyExists(Field((int64_t)30))); + + dict_proxy.putItem(Field((int64_t)30), Field("c")); + + EXPECT_EQ(true, dict_proxy.keyExists(Field((int64_t)20))); + EXPECT_EQ(true, dict_proxy.keyExists(Field((int64_t)30))); +} + +TEST(cJSONTest, GetItemTest) { + using namespace tuplex; + using namespace std; + + // testing non-codegenerated put item + // initialise test dict + codegen::cJSONDictProxyImpl dict_proxy; + + // put test values into test dict + dict_proxy.putItem(Field((int64_t)10), Field("a")); + dict_proxy.putItem(Field((int64_t)20), Field("b")); + + EXPECT_EQ(Field("a"), dict_proxy.getItem(Field((int64_t)10))); + EXPECT_EQ(Field("b"), dict_proxy.getItem(Field((int64_t)20))); + EXPECT_THROW(dict_proxy.getItem(Field((int64_t)30)), std::runtime_error); + + dict_proxy.putItem(Field((int64_t)30), Field("c")); + + EXPECT_EQ(Field("c"), dict_proxy.getItem(Field((int64_t)30))); +} + +TEST(cJSONTest, DeleteItemTest) { + using namespace tuplex; + using namespace std; + + // testing non-codegenerated put item + // initialise test dict + codegen::cJSONDictProxyImpl dict_proxy; + + // put test values into test dict + dict_proxy.putItem(Field((int64_t)10), Field("a")); + dict_proxy.putItem(Field((int64_t)20), Field("b")); + + EXPECT_EQ(Field("a"), dict_proxy.getItem(Field((int64_t)10))); + EXPECT_EQ(Field("b"), dict_proxy.getItem(Field((int64_t)20))); + + dict_proxy.deleteItem(Field((int64_t)10)); + + EXPECT_EQ(false, dict_proxy.keyExists(Field((int64_t)10))); + EXPECT_EQ(true, dict_proxy.keyExists(Field((int64_t)20))); + + dict_proxy.deleteItem(Field((int64_t)20)); + dict_proxy.putItem(Field((int64_t)10), Field((int64_t)100)); + + Field res = dict_proxy.getItem(Field((int64_t)10)); + + // NOTE: expected result will be a double, bc I think cJSON stores all numbers as doubles + EXPECT_EQ(Field((double)100), dict_proxy.getItem(Field((int64_t)10))); + EXPECT_EQ(false, dict_proxy.keyExists(Field((int64_t)20))); +} + +TEST(cJSONTest, ReplaceItemTest) { + using namespace tuplex; + using namespace std; + + // testing non-codegenerated put item + // initialise test dict + codegen::cJSONDictProxyImpl dict_proxy; + + // put test values into test dict + dict_proxy.putItem(Field((int64_t)10), Field("a")); + dict_proxy.putItem(Field((int64_t)20), Field("b")); + + EXPECT_EQ(Field("a"), dict_proxy.getItem(Field((int64_t)10))); + EXPECT_EQ(Field("b"), dict_proxy.getItem(Field((int64_t)20))); + + dict_proxy.replaceItem(Field((int64_t)10), Field("c")); + + EXPECT_EQ(Field("c"), dict_proxy.getItem(Field((int64_t)10))); + EXPECT_EQ(Field("b"), dict_proxy.getItem(Field((int64_t)20))); + + dict_proxy.putItem(Field((int64_t)30), Field("c")); + + EXPECT_EQ(Field("c"), dict_proxy.getItem(Field((int64_t)10))); + EXPECT_EQ(Field("c"), dict_proxy.getItem(Field((int64_t)30))); + + dict_proxy.replaceItem(Field((int64_t)30), Field((int64_t)50)); + + // NOTE: expected result will be a double, bc I think cJSON stores all numbers as doubles + EXPECT_EQ(Field((double)50), dict_proxy.getItem(Field((int64_t)30))); +} + +// tests to write: + +// 1. heterogenous dict -> basically use modified JSON as in-memory storage format. +// 2. homogenous keytype dict -> can encode dict directly & serialize it more efficiently. Represent in-memory as hash table specialized depending on type. +// 3. homogenous valuetype -> ignore case, specialize to 1. +// 4. compile-time known keys/restricted keyset, keys do not change. -> struct type with fixed offsets! \ No newline at end of file diff --git a/tuplex/test/dict/main.cc b/tuplex/test/dict/main.cc new file mode 100644 index 000000000..af04e4577 --- /dev/null +++ b/tuplex/test/dict/main.cc @@ -0,0 +1,18 @@ +//--------------------------------------------------------------------------------------------------------------------// +// // +// Tuplex: Blazing Fast Python Data Science // +// // +// // +// (c) 2017 - 2021, Tuplex team // +// Created by Leonhard Spiegelberg first on 1/1/2021 // +// License: Apache 2.0 // +//--------------------------------------------------------------------------------------------------------------------// + +#include "gtest/gtest.h" + +int main(int argc, char **argv) +{ + ::testing::InitGoogleTest(&argc, argv); + int ret = RUN_ALL_TESTS(); + return ret; +} \ No newline at end of file diff --git a/tuplex/utils/include/TypeSystem.h b/tuplex/utils/include/TypeSystem.h index 6861f24de..f6698d3bb 100644 --- a/tuplex/utils/include/TypeSystem.h +++ b/tuplex/utils/include/TypeSystem.h @@ -18,6 +18,9 @@ #include #include +// need to define new type for d.keys (compound type - dictkeys) +// make createdictkeystype and createdictvaluestype + namespace python { class Type { @@ -39,6 +42,7 @@ namespace python { static const Type EMPTYTUPLE; //! special type for an empty tuple static const Type EMPTYDICT; //! special type for empty dict static const Type EMPTYLIST; //! special type for empty list + static const Type EMPTYSET; //! special type for empty set static const Type NULLVALUE; //! special type for a nullvalue / None static const Type PYOBJECT; //! special type for any python object static const Type GENERICTUPLE; //! special type to accept ANY tuple object (helpful for symbol table) @@ -49,7 +53,9 @@ namespace python { static const Type MODULE; //! generic module object, used in symbol table static const Type ITERATOR; //! iterator/generator type static const Type EMPTYITERATOR; //! special type for empty iterator - + // static const Type DICTKEYS; //! special type for list of dictionary keys + // static const Type DICTVALUES; //! special type for list of dictionary values + // define two special types, used in the inference to describe bounds // any is a subtype of everything static const Type ANY; @@ -100,6 +106,8 @@ namespace python { bool hasVariablePositionalArgs() const; bool isExceptionType() const; bool isIteratorType() const; + bool isDictKeysType() const; + bool isDictValuesType() const; inline bool isGeneric() const { if(_hash == python::Type::PYOBJECT._hash || @@ -114,7 +122,7 @@ namespace python { return false; } - if(isListType() || isOptionType()) { + if(isListType() || isOptionType() || isDictKeysType() || isDictValuesType()) { if(elementType().isGeneric()) return true; return false; @@ -182,7 +190,7 @@ namespace python { bool isPrimitiveType() const; /*! - * check whether a given type is iterable. Currently true for iterator, list, tuple, string, range and dictionary. + * check whether a given type is iterable. Currently true for iterator, list, tuple, string, range, dictionary, dict_keys, and dict_values. * @return */ bool isIterableType() const; @@ -216,6 +224,9 @@ namespace python { static Type makeListType(const python::Type &elementType); + static Type makeDictKeysViewType(const python::Type& dictType); + static Type makeDictValuesViewType(const python::Type& dictType); + /*! * create iterator type from yieldType. * @param yieldType @@ -278,6 +289,8 @@ namespace python { FUNCTION, TUPLE, DICTIONARY, + DICT_KEYS, + DICT_VALUES, LIST, CLASS, OPTION, // for nullable @@ -321,11 +334,13 @@ namespace python { bool isFunctionType(const Type& t) const; bool isDictionaryType(const Type& t) const; + bool isDictKeysType(const Type& t); + bool isDictValuesType(const Type& t); bool isTupleType(const Type& t) const; bool isOptionType(const Type& t) const; bool isListType(const Type& t) const; bool isIteratorType(const Type& t) const; - + std::vector parameters(const Type& t) const; Type returnType(const Type& t) const; @@ -344,15 +359,15 @@ namespace python { // right now, no tuples or other weird types... Type createOrGetFunctionType(const Type& param, const Type& ret=Type::EMPTYTUPLE); Type createOrGetDictionaryType(const Type& key, const Type& val); + Type createOrGetDictKeysViewType(const Type& key); + Type createOrGetDictValuesViewType(const Type& val); Type createOrGetListType(const Type& val); - Type createOrGetTupleType(const std::initializer_list args); Type createOrGetTupleType(const TTuple& args); Type createOrGetTupleType(const std::vector& args); Type createOrGetOptionType(const Type& type); Type createOrGetIteratorType(const Type& yieldType); - Type getByName(const std::string& name); // helper function to connect type system to codegen diff --git a/tuplex/utils/src/TypeSystem.cc b/tuplex/utils/src/TypeSystem.cc index 2fd3fe064..bc80963c3 100644 --- a/tuplex/utils/src/TypeSystem.cc +++ b/tuplex/utils/src/TypeSystem.cc @@ -33,11 +33,13 @@ namespace python { const Type Type::EMPTYTUPLE = python::TypeFactory::instance().createOrGetTupleType(std::vector()); const Type Type::EMPTYDICT = python::TypeFactory::instance().createOrGetPrimitiveType("{}"); // empty dict const Type Type::EMPTYLIST = python::TypeFactory::instance().createOrGetPrimitiveType("[]"); // empty list: primitive because it can have any type element + const Type Type::EMPTYSET = python::TypeFactory::instance().createOrGetPrimitiveType("empty_set"); // empty list: primitive because it can have any type element const Type Type::NULLVALUE = python::TypeFactory::instance().createOrGetPrimitiveType("null"); const Type Type::PYOBJECT = python::TypeFactory::instance().createOrGetPrimitiveType("pyobject"); const Type Type::GENERICTUPLE = python::TypeFactory::instance().createOrGetPrimitiveType("tuple"); const Type Type::GENERICDICT = python::TypeFactory::instance().createOrGetDictionaryType(python::Type::PYOBJECT, python::Type::PYOBJECT); const Type Type::GENERICLIST = python::TypeFactory::instance().createOrGetListType(python::Type::PYOBJECT); + //const Type Type::GENERICSET = python::TypeFactory::instance().createOrGetSetType(python::Type::PYOBJECT); // @TODO: implement. const Type Type::VOID = python::TypeFactory::instance().createOrGetPrimitiveType("void"); const Type Type::MATCHOBJECT = python::TypeFactory::instance().createOrGetPrimitiveType("matchobject"); const Type Type::RANGE = python::TypeFactory::instance().createOrGetPrimitiveType("range"); @@ -147,6 +149,24 @@ namespace python { return registerOrGetType(name, AbstractType::DICTIONARY, {key, val}); } + Type TypeFactory::createOrGetDictKeysViewType(const Type& key) { + std::string name; + name += "DictKeysView["; + name += TypeFactory::instance().getDesc(key._hash); + name += "]"; + + return registerOrGetType(name, AbstractType::DICT_KEYS, {key}); + } + + Type TypeFactory::createOrGetDictValuesViewType(const Type& val) { + std::string name; + name += "DictValuesView["; + name += TypeFactory::instance().getDesc(val._hash); + name += "]"; + + return registerOrGetType(name, AbstractType::DICT_VALUES, {val}); + } + Type TypeFactory::createOrGetListType(const Type &val) { std::string name; name += "["; @@ -275,6 +295,14 @@ namespace python { return TypeFactory::instance().isIteratorType(*this); } + bool Type::isDictKeysType() const { + return TypeFactory::instance().isDictKeysType(*this); + } + + bool Type::isDictValuesType() const { + return TypeFactory::instance().isDictValuesType(*this); + } + Type Type::getReturnType() const { // first make sure this a function type! if( ! (TypeFactory::instance().isFunctionType(*this) || @@ -311,6 +339,22 @@ namespace python { return type == AbstractType::DICTIONARY || t == Type::EMPTYDICT || t == Type::GENERICDICT; } + bool TypeFactory::isDictKeysType(const Type& t) { + auto it = _typeMap.find(t._hash); + if(it == _typeMap.end()) + return false; + + return it->second._type == AbstractType::DICT_KEYS; + } + + bool TypeFactory::isDictValuesType(const Type& t) { + auto it = _typeMap.find(t._hash); + if(it == _typeMap.end()) + return false; + + return it->second._type == AbstractType::DICT_VALUES; + } + bool TypeFactory::isListType(const Type &t) const { auto it = _typeMap.find(t._hash); if(it == _typeMap.end()) @@ -356,6 +400,9 @@ namespace python { } Type Type::keyType() const { + if(_hash == EMPTYDICT._hash || _hash == GENERICDICT._hash) + return PYOBJECT; + assert(isDictionaryType() && _hash != EMPTYDICT._hash && _hash != GENERICDICT._hash); auto& factory = TypeFactory::instance(); auto it = factory._typeMap.find(_hash); @@ -373,6 +420,9 @@ namespace python { } Type Type::valueType() const { + if(_hash == EMPTYDICT._hash || _hash == GENERICDICT._hash) + return PYOBJECT; + assert(isDictionaryType() && _hash != EMPTYDICT._hash && _hash != GENERICDICT._hash); auto& factory = TypeFactory::instance(); auto it = factory._typeMap.find(_hash); @@ -382,8 +432,8 @@ namespace python { } Type Type::elementType() const { - if(isListType()) { - assert(isListType() && _hash != EMPTYLIST._hash); + if(isListType() || isDictKeysType() || isDictValuesType()) { + assert((isListType() && _hash != EMPTYLIST._hash) || isDictKeysType() || isDictValuesType()); auto& factory = TypeFactory::instance(); auto it = factory._typeMap.find(_hash); assert(it != factory._typeMap.end()); @@ -413,7 +463,7 @@ namespace python { } bool Type::isIterableType() const { - return (*this).isIteratorType() || (*this).isListType() || (*this).isTupleType() || *this == python::Type::STRING || *this == python::Type::RANGE || (*this).isDictionaryType(); + return (*this).isIteratorType() || (*this).isListType() || (*this).isTupleType() || *this == python::Type::STRING || *this == python::Type::RANGE || (*this).isDictionaryType() || (*this).isDictKeysType() || (*this).isDictValuesType(); } bool Type::isFixedSizeType() const { @@ -447,6 +497,10 @@ namespace python { // ==> base type decides! if(isOptionType()) return withoutOptions().isFixedSizeType(); + + // dict_keys and dict_values are both immutable + if(isDictKeysType() || isDictValuesType()) + return true; // functions, dictionaries, and lists are never a fixed type return false; @@ -501,6 +555,10 @@ namespace python { if(elementType().isIllDefined()) return true; return false; + } else if (isDictKeysType() || isDictValuesType()) { + if (elementType().isIllDefined()) + return true; + return false; } else { // must be primitive, directly check return *this == Type::UNKNOWN @@ -525,6 +583,14 @@ namespace python { return python::TypeFactory::instance().createOrGetDictionaryType(keyType, valType); } + Type Type::makeDictKeysViewType(const python::Type& keyType) { + return python::TypeFactory::instance().createOrGetDictKeysViewType(keyType); + } + + Type Type::makeDictValuesViewType(const python::Type& valType) { + return python::TypeFactory::instance().createOrGetDictValuesViewType(valType); + } + Type Type::makeListType(const python::Type &elementType){ #warning "Nested lists are not yet supported!" return python::TypeFactory::instance().createOrGetListType(elementType); @@ -1057,8 +1123,15 @@ namespace python { // dictionary type if(aUnderlyingType.isDictionaryType() && bUnderlyingType.isDictionaryType()) { + + // empty dict can be always upcasted to concrete dict + if(python::Type::EMPTYDICT == aUnderlyingType) + return bUnderlyingType; + if(python::Type::EMPTYDICT == bUnderlyingType) + return aUnderlyingType; + auto key_t = unifyTypes(aUnderlyingType.keyType(), bUnderlyingType.keyType(), autoUpcast); - auto val_t = unifyTypes(aUnderlyingType.elementType(), bUnderlyingType.elementType(), autoUpcast); + auto val_t = unifyTypes(aUnderlyingType.valueType(), bUnderlyingType.valueType(), autoUpcast); if(key_t == python::Type::UNKNOWN || val_t == python::Type::UNKNOWN) { return python::Type::UNKNOWN; }