diff --git a/doc/source/conf.py b/doc/source/conf.py index 52275332d..5cf8ed44f 100755 --- a/doc/source/conf.py +++ b/doc/source/conf.py @@ -36,7 +36,7 @@ # The short X.Y version version="0.3" # The full version, including alpha/beta/rc tags -release="0.3.4" +release="0.3.5dev" # -- General configuration --------------------------------------------------- diff --git a/examples/02_Working_with_files.ipynb b/examples/02_Working_with_files.ipynb index e9bf66d50..468d4f94f 100644 --- a/examples/02_Working_with_files.ipynb +++ b/examples/02_Working_with_files.ipynb @@ -616,7 +616,7 @@ }, "outputs": [], "source": [ - "sorted(data, key=lambda x: x[1])\n", + "data = sorted(data, key=lambda x: -x[1])\n", "\n", "data[:5]" ] @@ -669,7 +669,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.13" + "version": "3.9.14" } }, "nbformat": 4, diff --git a/scripts/docker/tuplex/Dockerfile b/scripts/docker/tuplex/Dockerfile index 4353686a9..2a59b4313 100644 --- a/scripts/docker/tuplex/Dockerfile +++ b/scripts/docker/tuplex/Dockerfile @@ -1,4 +1,4 @@ -# (c) 2021 Tuplex contributors +# (c) 2022 Tuplex contributors # a ready-to-run Tuplex version on a jupyter image # docker pull jupyter/minimal-notebook:python-3.9.13 diff --git a/scripts/docker/tuplex/create-image.sh b/scripts/docker/tuplex/create-image.sh index 99b247ebc..b19d66625 100755 --- a/scripts/docker/tuplex/create-image.sh +++ b/scripts/docker/tuplex/create-image.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# (c) 2021 Tuplex contributors +# (c) 2022 Tuplex contributors # builds notebook image while :; do @@ -20,10 +20,10 @@ cp -R ../../../examples/sample_data . # build benchmark docker image # copy from scripts to current dir because docker doesn't understand files # outside the build context -docker build -t tuplex/tuplex:v0.3.3 -f Dockerfile . || exit 1 +docker build -t tuplex/tuplex:0.3.5dev -f Dockerfile . || exit 1 # is upload set? if [[ "${UPLOAD}" == 'SET' ]]; then docker login - docker push tuplex/tuplex:v0.3.3 + docker push tuplex/tuplex:0.3.5dev fi diff --git a/scripts/set_version.py b/scripts/set_version.py index d1b6a08c0..0b0d0686b 100755 --- a/scripts/set_version.py +++ b/scripts/set_version.py @@ -7,6 +7,8 @@ import sys import requests import re +import pathlib + #from distutils.version import LooseVersion def LooseVersion(v): @@ -15,7 +17,7 @@ def LooseVersion(v): # to create a testpypi version use X.Y.devN -version = '0.3.4' +version = '0.3.5dev' # https://pypi.org/simple/tuplex/ # or https://test.pypi.org/simple/tuplex/ @@ -38,6 +40,15 @@ def get_latest_pypi_version(url='https://pypi.org/simple/tuplex/'): except: return None +def modify_example_script(path, version): + contents = open(path, 'r').read() + assert(len(contents) != 1) + + new_contents = re.sub('tuplex:v\\d.\\d.\\d(dev)?', 'tuplex:' + version, contents) + + with open(path, 'w') as fp: + fp.write(new_contents) + if __name__ == '__main__': file_handler = logging.FileHandler(filename='version.log') stdout_handler = logging.StreamHandler(sys.stdout) @@ -68,6 +79,9 @@ def get_latest_pypi_version(url='https://pypi.org/simple/tuplex/'): logging.info('latest pypi.org version of tuplex is: {}'.format(version_pypi)) logging.info('latest test.pypi.org version of tuplex is: {}'.format(version_test)) + script_root = str(pathlib.Path(os.path.abspath(__file__)).parent) + logging.info("path of this file: {}".format(script_root)) + if version_test is None and version_pypi is not None: version_test = version_pypi @@ -100,13 +114,17 @@ def get_latest_pypi_version(url='https://pypi.org/simple/tuplex/'): sys.exit(1) # paths etc. - doc_path = '../doc/source/conf.py' - version_py_path = '../tuplex/python/tuplex/utils/version.py' - setup_py_path = '../tuplex/python/setup.py' - toplevel_setup_py_path = '../setup.py' - version_hist_path = '../tuplex/historyserver/thserver/version.py' + doc_path = os.path.abspath(os.path.join(script_root, '../doc/source/conf.py')) + version_py_path = os.path.abspath(os.path.join(script_root, '../tuplex/python/tuplex/utils/version.py')) + setup_py_path = os.path.abspath(os.path.join(script_root, '../tuplex/python/setup.py')) + toplevel_setup_py_path = os.path.abspath(os.path.join(script_root, '../setup.py')) + version_hist_path = os.path.abspath(os.path.join(script_root, '../tuplex/historyserver/thserver/version.py')) + + example_script_path = os.path.abspath(os.path.join(script_root, 'docker/tuplex/create-image.sh')) # modify files... + modify_example_script(example_script_path, version) + with open(version_py_path, 'w') as fp: fp.writelines('# (c) L.Spiegelberg 2017 - {}\n__version__="{}"'.format(datetime.datetime.now().year, version)) diff --git a/setup.py b/setup.py index 6f1144b0e..1214b9f31 100644 --- a/setup.py +++ b/setup.py @@ -653,7 +653,7 @@ def tplx_package_data(): # logic and declaration, and simpler if you include description/version in a file. setup(name="tuplex", python_requires='>=3.7.0', - version="0.3.4", + version="0.3.5dev", author="Leonhard Spiegelberg", author_email="tuplex@cs.brown.edu", description="Tuplex is a novel big data analytics framework incorporating a Python UDF compiler based on LLVM " diff --git a/tuplex/core/src/ee/local/LocalBackend.cc b/tuplex/core/src/ee/local/LocalBackend.cc index f014e766a..859c58635 100644 --- a/tuplex/core/src/ee/local/LocalBackend.cc +++ b/tuplex/core/src/ee/local/LocalBackend.cc @@ -1491,27 +1491,6 @@ namespace tuplex { logger().info(std::to_string(resolveTasks.size()) + "/" + pluralize(tasks.size(), "task") + " require executing the slow path."); timer.reset(); - - // check that each task has its own hashtable/sink - if(hashOutput) { - std::set S_hm_ptrs; - size_t num_tasks = resolveTasks.size() + tasks_result.size(); - - for(auto& t : resolveTasks) { - auto rtask = (ResolveTask*)t; - assert(rtask->hashTableSink()); - S_hm_ptrs.insert(reinterpret_cast(rtask->hashTableSink()->hm)); - } - - for(auto& t : tasks_result) { - auto task = (TransformTask*)t; - assert(task->hashTableSink()); - S_hm_ptrs.insert(reinterpret_cast(task->hashTableSink()->hm)); - } - - assert(S_hm_ptrs.size() == num_tasks); - } - // add all resolved tasks to the result // cout<<"*** need to compute "<hm == nullptr && task_sink->hybrid_hm == nullptr); } else if(task_sink->hm) { - hashmap_free_key_and_data(task_sink->hm); - hashmap_free(task_sink->hm); + if(8 == hashtableKeyByteWidth) { + int64_hashmap_free_key_and_data(task_sink->hm); + int64_hashmap_free(task_sink->hm); + } else { + hashmap_free_key_and_data(task_sink->hm); + hashmap_free(task_sink->hm); + } + task_sink->hm = nullptr; } diff --git a/tuplex/core/src/physical/TransformStage.cc b/tuplex/core/src/physical/TransformStage.cc index a81b52570..c6f956a03 100644 --- a/tuplex/core/src/physical/TransformStage.cc +++ b/tuplex/core/src/physical/TransformStage.cc @@ -708,8 +708,14 @@ namespace tuplex { free(null_bucket); if(hm) { - hashmap_free_key_and_data(hm); - hashmap_free(hm); + + if(8 == hashtableKeyByteWidth()) { + int64_hashmap_free_key_and_data(hm); + int64_hashmap_free(hm); + } else { + hashmap_free_key_and_data(hm); + hashmap_free(hm); + } hm = nullptr; } } diff --git a/tuplex/historyserver/thserver/version.py b/tuplex/historyserver/thserver/version.py index 1f80cd420..fd718f07f 100644 --- a/tuplex/historyserver/thserver/version.py +++ b/tuplex/historyserver/thserver/version.py @@ -1,2 +1,2 @@ # (c) L.Spiegelberg 2017 - 2022 -__version__="0.3.4" \ No newline at end of file +__version__="0.3.5dev" \ No newline at end of file diff --git a/tuplex/python/setup.py b/tuplex/python/setup.py index 512413d6e..575545ffa 100644 --- a/tuplex/python/setup.py +++ b/tuplex/python/setup.py @@ -29,7 +29,7 @@ setup( name="Tuplex", - version="0.3.4", + version="0.3.5dev", packages=find_packages(), package_data={ # include libs in libexec diff --git a/tuplex/python/tuplex/utils/version.py b/tuplex/python/tuplex/utils/version.py index 1f80cd420..fd718f07f 100644 --- a/tuplex/python/tuplex/utils/version.py +++ b/tuplex/python/tuplex/utils/version.py @@ -1,2 +1,2 @@ # (c) L.Spiegelberg 2017 - 2022 -__version__="0.3.4" \ No newline at end of file +__version__="0.3.5dev" \ No newline at end of file diff --git a/tuplex/utils/src/hashmap.cc b/tuplex/utils/src/hashmap.cc index 42aa69e05..9c2607b34 100644 --- a/tuplex/utils/src/hashmap.cc +++ b/tuplex/utils/src/hashmap.cc @@ -480,18 +480,20 @@ int hashmap_free_key_and_data(map_t in) { return MAP_MISSING; /* Linear probing */ - for (i = 0; i < m->table_size; i++) + for (i = 0; i < m->table_size; i++) { if (m->data[i].in_use != 0) { assert(m->data[i].key); if(m->data[i].key) free(m->data[i].key); if(m->data[i].data) free(m->data[i].data); - m->data[i].key = NULL; - m->data[i].keylen = 0; - m->data[i].data = NULL; - m->data[i].in_use = 0; } + // make sure to null properly. + m->data[i].key = nullptr; + m->data[i].keylen = 0; + m->data[i].data = nullptr; + m->data[i].in_use = 0; + } return MAP_OK; } @@ -536,7 +538,7 @@ int hashmap_remove(map_t in, char *key, uint64_t keylen) { /* Deallocate the hashmap */ void hashmap_free(map_t in) { - hashmap_map *m = (hashmap_map *) in; + auto *m = (hashmap_map *) in; if(!m) return;