这是indexloc提供的服务,不要输入任何密码
Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 9 additions & 6 deletions tuplex/adapters/cpython/include/PythonHelpers.h
Original file line number Diff line number Diff line change
Expand Up @@ -257,17 +257,19 @@ namespace python {
/*!
* converts a python object to tuplex object, obj must be not null.
* @param obj if it can not be mapped to a tuplex type, stored as PYOBJECT (cloudpickled).
* @param autoUpcast whether to upcast numeric types to a unified type when type conflicts, false by default
* @return C++ Tuplex Field object
*/
extern tuplex::Field pythonToField(PyObject* obj);
extern tuplex::Field pythonToField(PyObject *obj, bool autoUpcast=false);

/*!
* converts python object to Row using row type supplied in type.
* @param obj
* @param type specify what type of objects should be serialized, may contain options.
* @param autoUpcast whether to upcast numeric types to a unified type when type conflicts, false by default
* @return Tuplex C++ row object.
*/
extern tuplex::Row pythonToRow(PyObject *obj, const python::Type &type);
extern tuplex::Row pythonToRow(PyObject *obj, const python::Type &type, bool autoUpcast=false);

/*!
* converts a Tuplex C++ object to a python object
Expand Down Expand Up @@ -318,10 +320,11 @@ namespace python {

/*!
* get corresponding tuplex type for python object
* @param o
* @return
* @param o python object to map to Tuplex type
* @param autoUpcast whether to upcast numeric types to a unified type when type conflicts, false by default
* @return internal Tuplex type corresponding to given python object.
*/
extern python::Type mapPythonClassToTuplexType(PyObject *o);
extern python::Type mapPythonClassToTuplexType(PyObject *o, bool autoUpcast=false);

/*!
* Tuplex's python API provides a paramter to (optionally) specify a schema, this functions decodes that PyObject
Expand Down Expand Up @@ -391,4 +394,4 @@ namespace python {
}


#endif //TUPLEX_PYTHONHELPERS_H
#endif //TUPLEX_PYTHONHELPERS_H
7 changes: 5 additions & 2 deletions tuplex/adapters/cpython/include/PythonSerializer.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,10 +26,13 @@ namespace tuplex {
* @param nextptr ptr position after deserialization
* @return bool for if deserialization was successful or not
*/
extern bool fromSerializedMemory(const uint8_t *ptr, int64_t capacity, const Schema &schema, PyObject **obj,
extern bool fromSerializedMemory(const uint8_t *ptr,
size_t capacity,
const Schema &schema,
PyObject **obj,
const uint8_t **nextptr = nullptr);
}

}

#endif //TUPLEX_PYTHONSERIALIZER_H
#endif //TUPLEX_PYTHONSERIALIZER_H
31 changes: 25 additions & 6 deletions tuplex/adapters/cpython/include/PythonSerializer_private.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,28 +21,38 @@ namespace tuplex {
* Creates Python object from raw memory (deserialize)
* @param ptr memory location to where serialized data is
* @param row_type holding information on types of input memory
* @param capacity size of buffer
* @param bitmap pointer to bitmap (i.e. multiple 64bit blocks)
* @param index of the element within the bitmap
* @return Python object holding deserialized elements
*/
PyObject *createPyObjectFromMemory(const uint8_t *ptr, const python::Type &row_type, const uint8_t *bitmap = nullptr, int index = 0);
PyObject* createPyObjectFromMemory(const uint8_t *ptr, const python::Type &row_type, size_t capacity,
const uint8_t *bitmap = nullptr, unsigned index = 0);

/*!
* Creates Python tuple object from raw memory (deserialize)
* @param ptr memory location to where serialized data is
* @param row_type holding information on types of input memory
* @param capacity size of buffer
* @return Python object holding deserialized elements
*/
PyObject *createPyTupleFromMemory(const uint8_t *ptr, const python::Type &row_type);
PyObject *createPyTupleFromMemory(const uint8_t *ptr, const python::Type &row_type, size_t capacity);

PyObject *createPyDictFromMemory(const uint8_t *ptr);

PyObject *createPyListFromMemory(const uint8_t *ptr, const python::Type &row_type);
/*!
* Creates Python list object from raw memory (deserialize)
* @param ptr memory location to where serialized data is
* @param row_type holding information on types of input memory
* @param capacity size of buffer
* @return Python object holding deserialized elements
*/
PyObject *createPyListFromMemory(const uint8_t *ptr, const python::Type &row_type, size_t capacity);

/*!
* Checks if capacity for buffer with schema is valid (if it is possible for buffer to hold such data given schema)
* @param ptr memory location to where serialized data is
* @param capacity size of buffer
* @param capacity size of buffer, negative values are invalid.
* @param row_type holding information on types of input memory
* @return bool for if capacity is valid or not
*/
Expand All @@ -55,10 +65,19 @@ namespace tuplex {
* @param row_type holding information on types of input memory
* @return -1 if invalid, size of serialized data if valid
*/
int64_t checkTupleCapacity(const uint8_t *ptr, int64_t capacity, const python::Type &row_type);
int64_t checkTupleCapacity(const uint8_t *ptr, size_t capacity, const python::Type &row_type);

/*!
* map bitmap of the object at ptr to a vector with numElements true/false values
* @param objectType current object type that contains optional value
* @param ptr memory location to where the start of bitmap
* @param numElements number of elements in objectType for which bitmap is needed
* @return vector of booleans representing a bitmap indicating whether element is null (true) or not (false).
*/
std::vector<bool> getBitmapFromType(const python::Type &objectType, const uint8_t *&ptr, size_t numElements);
}
}



#endif //TUPLEX_PYTHONSERIALIZER_PRIVATE_H
#endif //TUPLEX_PYTHONSERIALIZER_PRIVATE_H
85 changes: 63 additions & 22 deletions tuplex/adapters/cpython/src/PythonHelpers.cc
Original file line number Diff line number Diff line change
Expand Up @@ -473,7 +473,7 @@ namespace python {
return f;
}

tuplex::Field pythonToField(PyObject* obj) {
tuplex::Field pythonToField(PyObject *obj, bool autoUpcast) {
using namespace tuplex;
using namespace std;

Expand All @@ -498,7 +498,7 @@ namespace python {
vector<Field> v;
v.reserve(numElements);
for(unsigned i = 0; i < numElements; ++i) {
v.push_back(pythonToField(PyTuple_GetItem(obj, i)));
v.push_back(pythonToField(PyTuple_GetItem(obj, i), autoUpcast));
}
return Field(Tuple::from_vector(v));
} else if(PyBool_Check(obj)) { // important to call this before isinstance long since isinstance also return long for bool
Expand Down Expand Up @@ -529,15 +529,15 @@ namespace python {
if(PyDict_Size(obj) == 0)
return Field::empty_dict();

auto dictType = mapPythonClassToTuplexType(obj);
auto dictType = mapPythonClassToTuplexType(obj, autoUpcast);
std::string dictStr;
PyObject *key = nullptr, *val = nullptr;
Py_ssize_t pos = 0; // must be initialized to 0 to start iteration, however internal iterator variable. Don't use semantically.
dictStr += "{";
while(PyDict_Next(obj, &pos, &key, &val)) {
// create key
auto keyStr = PyString_AsString(PyObject_Str(key));
auto keyType = mapPythonClassToTuplexType(key);
auto keyType = mapPythonClassToTuplexType(key, autoUpcast);
python::Type valType;

// create value, mimicking cJSON printing standards
Expand Down Expand Up @@ -625,7 +625,7 @@ namespace python {
vector<Field> v;
v.reserve(numElements);
for(unsigned i = 0; i < numElements; ++i) {
v.push_back(pythonToField(PyList_GET_ITEM(obj, i)));
v.push_back(pythonToField(PyList_GET_ITEM(obj, i), autoUpcast));
}
return Field(List::from_vector(v));
} else if(obj == Py_None) {
Expand Down Expand Up @@ -660,9 +660,10 @@ namespace python {
* converts object to field of specified type.
* @param obj
* @param type
* @param autoUpcast whether to upcast numeric types to a unified type when type conflicts, false by default
* @return Field object
*/
tuplex::Field pythonToField(PyObject *obj, const python::Type &type) {
tuplex::Field pythonToField(PyObject *obj, const python::Type &type, bool autoUpcast=false) {
assert(obj);

// TODO: check assumptions about whether nonempty tuple can be an option
Expand All @@ -671,8 +672,15 @@ namespace python {
return tuplex::Field::null(type);
} else {
tuplex::Field f;
f = pythonToField(obj);
f = fieldCastTo(f, type.getReturnType());
auto rtType = type.getReturnType();
if(rtType.isListType() || rtType.isTupleType()) {
// type still needed to correctly construct field
f = pythonToField(obj, rtType, autoUpcast);
} else {
// simple types
f = pythonToField(obj, autoUpcast);
f = autoUpcast? fieldCastTo(f, type.getReturnType()) : f;
}
f.makeOptional();
return f;
}
Expand All @@ -682,19 +690,30 @@ namespace python {

std::vector<tuplex::Field> v;
for(unsigned i = 0; i < numElements; ++i) {
v.push_back(pythonToField(PyTuple_GetItem(obj, i), type.parameters()[i]));
v.push_back(pythonToField(PyTuple_GetItem(obj, i), type.parameters()[i], autoUpcast));
}
return tuplex::Field(tuplex::Tuple::from_vector(v));
} else if(type.isListType() && type != python::Type::EMPTYLIST) {
auto numElements = PyList_Size(obj);
auto elementType = type.elementType();
std::vector<tuplex::Field> v;
v.reserve(numElements);
for(unsigned i = 0; i < numElements; ++i) {
auto currListItem = PyList_GetItem(obj, i);
v.push_back(pythonToField(currListItem, elementType, autoUpcast));
Py_IncRef(currListItem);
}
return tuplex::Field(tuplex::List::from_vector(v));
} else {
auto f = pythonToField(obj);
return fieldCastTo(f, type);
auto f = pythonToField(obj, autoUpcast);
return autoUpcast? fieldCastTo(f, type) : f;
}
}

tuplex::Row pythonToRow(PyObject *obj, const python::Type &type) {
tuplex::Row pythonToRow(PyObject *obj, const python::Type &type, bool autoUpcast) {
assert(obj);

tuplex::Field f = pythonToField(obj, type);
tuplex::Field f = pythonToField(obj, type, autoUpcast);

// unpack the tuples one level
if(f.getType().isTupleType() && f.getType() != python::Type::EMPTYTUPLE) {
Expand Down Expand Up @@ -1365,7 +1384,7 @@ namespace python {
}

// mapping type to internal types, unknown as default
python::Type mapPythonClassToTuplexType(PyObject *o) {
python::Type mapPythonClassToTuplexType(PyObject *o, bool autoUpcast) {
if(Py_None == o)
return python::Type::NULLVALUE;

Expand Down Expand Up @@ -1393,7 +1412,7 @@ namespace python {
for(int j = 0; j < numElements; j++) {
auto item = PyTuple_GET_ITEM(o, j); // borrowed reference
assert(item->ob_refcnt > 0); // important!!!
elementTypes.push_back(mapPythonClassToTuplexType(item));
elementTypes.push_back(mapPythonClassToTuplexType(item, autoUpcast));
}
return python::TypeFactory::instance().createOrGetTupleType(elementTypes);
}
Expand All @@ -1410,8 +1429,8 @@ namespace python {
Py_ssize_t pos = 0; // must be initialized to 0 to start iteration, however internal iterator variable. Don't use semantically.
bool types_set = false; // need extra var here b/c vals could be unknown.
while(PyDict_Next(o, &pos, &key, &val)) {
auto curKeyType = mapPythonClassToTuplexType(key);
auto curValType = mapPythonClassToTuplexType(val);
auto curKeyType = mapPythonClassToTuplexType(key, autoUpcast);
auto curValType = mapPythonClassToTuplexType(val, autoUpcast);
if(!types_set) {
types_set = true;
keyType = curKeyType;
Expand All @@ -1431,13 +1450,18 @@ namespace python {
if(numElements == 0)
return python::Type::EMPTYLIST;

python::Type elementType = mapPythonClassToTuplexType(PyList_GetItem(o, 0));
python::Type elementType = mapPythonClassToTuplexType(PyList_GetItem(o, 0), autoUpcast);
// verify that all elements have the same type
for(int j = 0; j < numElements; j++) {
if(elementType != mapPythonClassToTuplexType(PyList_GetItem(o, j))) {
Logger::instance().defaultLogger().error("lists with variable type elements are not supported.");
return python::Type::UNKNOWN;
// TODO: the general case should return python::Type::PyObject in the future
python::Type currElementType = mapPythonClassToTuplexType(PyList_GetItem(o, j), autoUpcast);
if(elementType != currElementType) {
// possible to use nullable type as element type?
auto newElementType = unifyTypes(elementType, currElementType, autoUpcast);
if (newElementType == python::Type::UNKNOWN) {
Logger::instance().defaultLogger().error("list with variable element type " + elementType.desc() + " and " + currElementType.desc() + " not supported.");
return python::Type::PYOBJECT;
}
elementType = newElementType;
}
}
return python::Type::makeListType(elementType);
Expand Down Expand Up @@ -1500,6 +1524,23 @@ namespace python {
// typing.Optional[str] which is equal to typing.Union[str, NoneType]
auto typing_optional = PyDict_GetItemString(typing_dict, "Optional");
assert(typing_optional);
if (t.getReturnType().isTupleType()) {
// https://docs.python.org/3/library/typing.html#typing.Tuple
#if (PY_MAJOR_VERSION >= 3 && PY_MINOR_VERSION >= 9)
// use builtin.tuple[...]
auto builtin_mod = PyImport_AddModule("builtins");
assert(builtin_mod);
auto builtin_dict = PyModule_GetDict(builtin_mod);
assert(builtin_dict);
auto tuple = PyDict_GetItemString(builtin_dict, "tuple");
tobj = PyObject_GetItem(tuple, tobj);
#else
// use Tuple[...]
auto typing_tuple = PyDict_GetItemString(typing_dict, "Tuple");
assert(typing_tuple);
tobj = PyObject_GetItem(typing_tuple, tobj);
#endif
}
auto opt_type = PyObject_GetItem(typing_optional, tobj);
return opt_type;
}
Expand Down
Loading