From de8d1d6ee3f650bd47d2ca8dcd8a6e2cac0cc41b Mon Sep 17 00:00:00 2001 From: Leonhard Spiegelberg Date: Mon, 3 Oct 2022 22:42:35 -0400 Subject: [PATCH 01/18] prepping v0.3.5 release --- README.md | 10 +++++----- scripts/set_version.py | 2 +- tuplex/python/tuplex/context.py | 3 +++ 3 files changed, 9 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index d277e75c2..c73a49d6e 100644 --- a/README.md +++ b/README.md @@ -43,8 +43,8 @@ To try out Tuplex, simply try out the following starter notebooks using Google C | Name | Link | Description | |-------------------------|------------------|---------------------------------------------------------------------| -| (01) Intro to Tuplex | [Google Colab](https://colab.research.google.com/drive/1idqCRmvN-9_F2naJ6k1hbslbQT-2bAqa?usp=sharing) | Basic commands to manipulate columns and modify data with user code | -| (02) Working with Files | [Google Colab](https://colab.research.google.com/drive/10gOYUpxK_Bjkw11WYupuaflATsBPRgU0?usp=sharing) | Loading and saving files, detecting types. | +| 1. Intro to Tuplex | [Google Colab](https://colab.research.google.com/drive/1idqCRmvN-9_F2naJ6k1hbslbQT-2bAqa?usp=sharing) | Basic commands to manipulate columns and modify data with user code | +| 2. Working with Files | [Google Colab](https://colab.research.google.com/drive/10gOYUpxK_Bjkw11WYupuaflATsBPRgU0?usp=sharing) | Loading and saving files, detecting types. | More examples can be found [here](https://tuplex.cs.brown.edu/gettingstarted.html). @@ -53,7 +53,7 @@ More examples can be found [here](https://tuplex.cs.brown.edu/gettingstarted.htm To install Tuplex, you can use a PyPi package for Linux or MacOS(Intel), or a Docker container which will launch a jupyter notebook with Tuplex preinstalled. #### Docker ``` -docker run -p 8888:8888 tuplex/tuplex +docker run -p 8888:8888 tuplex/tuplex:v0.3.5 ``` #### PyPI ``` @@ -66,7 +66,7 @@ Tuplex is available for MacOS and Linux. The current version has been tested und To install Tuplex, simply install the dependencies first and then build the package. #### MacOS build from source -To build Tuplex, you need several other packages first which can be easily installed via [brew](https://brew.sh/). If you want to build Tuplex with AWS support, you need `macOS 10.13+`. +To build Tuplex, you need several other packages first which can be easily installed via [brew](https://brew.sh/). If you want to build Tuplex with AWS support, you need `macOS 10.13+`. Python 3.9 or earlier requires an older cloudpickle version (1.6.0) whereas Python 3.10+ requires cloudpickle 2.1.0+. ``` brew install llvm@9 boost boost-python3 aws-sdk-cpp pcre2 antlr4-cpp-runtime googletest gflags yaml-cpp celero protobuf libmagic python3 -m pip install 'cloudpickle<2.0' numpy @@ -102,7 +102,7 @@ To customize the cmake build, the following options are available to be passed v | `BUILD_NATIVE` | `ON`, `OFF` (default) | build with `-march=native` to target platform architecture. | | `SKIP_AWS_TESTS` | `ON` (default), `OFF` | skip aws tests, helpful when no AWS credentials/AWS Tuplex chain is setup. | | `GENERATE_PDFS` | `ON`, `OFF` (default) | output in Debug mode PDF files if graphviz is installed (e.g., `brew install graphviz`) for ASTs of UDFs, query plans, ...| -| `PYTHON3_VERSION` | `3.6`, ... | when trying to select a python3 version to build against, use this by specifying `major.minor`. To specify the python executable, use the options provided by [cmake](https://cmake.org/cmake/help/git-stage/module/FindPython3.html). | +| `PYTHON3_VERSION` | `3.7`, ... | when trying to select a python3 version to build against, use this by specifying `major.minor`. To specify the python executable, use the options provided by [cmake](https://cmake.org/cmake/help/git-stage/module/FindPython3.html). | | `LLVM_ROOT_DIR` | e.g. `/usr/lib/llvm-9` | specify which LLVM version to use | | `BOOST_DIR` | e.g. `/opt/boost` | specify which Boost version to use. Note that the python component of boost has to be built against the python version used to build Tuplex | diff --git a/scripts/set_version.py b/scripts/set_version.py index 0b0d0686b..14713a010 100755 --- a/scripts/set_version.py +++ b/scripts/set_version.py @@ -17,7 +17,7 @@ def LooseVersion(v): # to create a testpypi version use X.Y.devN -version = '0.3.5dev' +version = '0.3.5' # https://pypi.org/simple/tuplex/ # or https://test.pypi.org/simple/tuplex/ diff --git a/tuplex/python/tuplex/context.py b/tuplex/python/tuplex/context.py index f05902c61..72439a3aa 100644 --- a/tuplex/python/tuplex/context.py +++ b/tuplex/python/tuplex/context.py @@ -96,6 +96,9 @@ def __init__(self, conf=None, name="", **kwargs): options = dict() # put meaningful defaults for special environments... + + # per default disable webui + options['tuplex.webui.enable'] = False if in_google_colab(): logging.debug('Detected Google Colab environment, adjusting options...') From 92245acaf5bb45f79751a926a8429e1e99a14621 Mon Sep 17 00:00:00 2001 From: Leonhard Spiegelberg Date: Mon, 3 Oct 2022 22:44:03 -0400 Subject: [PATCH 02/18] update versions --- doc/source/conf.py | 2 +- setup.py | 2 +- tuplex/historyserver/thserver/version.py | 2 +- tuplex/python/setup.py | 2 +- tuplex/python/tuplex/utils/version.py | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/doc/source/conf.py b/doc/source/conf.py index 5cf8ed44f..6eb192064 100755 --- a/doc/source/conf.py +++ b/doc/source/conf.py @@ -36,7 +36,7 @@ # The short X.Y version version="0.3" # The full version, including alpha/beta/rc tags -release="0.3.5dev" +release="0.3.5" # -- General configuration --------------------------------------------------- diff --git a/setup.py b/setup.py index 1214b9f31..ec82c1dcb 100644 --- a/setup.py +++ b/setup.py @@ -653,7 +653,7 @@ def tplx_package_data(): # logic and declaration, and simpler if you include description/version in a file. setup(name="tuplex", python_requires='>=3.7.0', - version="0.3.5dev", + version="0.3.5", author="Leonhard Spiegelberg", author_email="tuplex@cs.brown.edu", description="Tuplex is a novel big data analytics framework incorporating a Python UDF compiler based on LLVM " diff --git a/tuplex/historyserver/thserver/version.py b/tuplex/historyserver/thserver/version.py index fd718f07f..c79089893 100644 --- a/tuplex/historyserver/thserver/version.py +++ b/tuplex/historyserver/thserver/version.py @@ -1,2 +1,2 @@ # (c) L.Spiegelberg 2017 - 2022 -__version__="0.3.5dev" \ No newline at end of file +__version__="0.3.5" \ No newline at end of file diff --git a/tuplex/python/setup.py b/tuplex/python/setup.py index 575545ffa..e1e5b842e 100644 --- a/tuplex/python/setup.py +++ b/tuplex/python/setup.py @@ -29,7 +29,7 @@ setup( name="Tuplex", - version="0.3.5dev", + version="0.3.5", packages=find_packages(), package_data={ # include libs in libexec diff --git a/tuplex/python/tuplex/utils/version.py b/tuplex/python/tuplex/utils/version.py index fd718f07f..c79089893 100644 --- a/tuplex/python/tuplex/utils/version.py +++ b/tuplex/python/tuplex/utils/version.py @@ -1,2 +1,2 @@ # (c) L.Spiegelberg 2017 - 2022 -__version__="0.3.5dev" \ No newline at end of file +__version__="0.3.5" \ No newline at end of file From 13d5c8f73e147fb214798b3d273374e5e7a2a859 Mon Sep 17 00:00:00 2001 From: Leonhard Spiegelberg Date: Mon, 3 Oct 2022 22:47:05 -0400 Subject: [PATCH 03/18] version fix --- scripts/docker/tuplex/create-image.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/docker/tuplex/create-image.sh b/scripts/docker/tuplex/create-image.sh index b19d66625..975eee88a 100755 --- a/scripts/docker/tuplex/create-image.sh +++ b/scripts/docker/tuplex/create-image.sh @@ -20,10 +20,10 @@ cp -R ../../../examples/sample_data . # build benchmark docker image # copy from scripts to current dir because docker doesn't understand files # outside the build context -docker build -t tuplex/tuplex:0.3.5dev -f Dockerfile . || exit 1 +docker build -t tuplex/tuplex:0.3.5 -f Dockerfile . || exit 1 # is upload set? if [[ "${UPLOAD}" == 'SET' ]]; then docker login - docker push tuplex/tuplex:0.3.5dev + docker push tuplex/tuplex:0.3.5 fi From ce4ed19f3e7d4e34da845d75654491fbdfe844f6 Mon Sep 17 00:00:00 2001 From: Leonhard Spiegelberg Date: Mon, 3 Oct 2022 22:59:00 -0400 Subject: [PATCH 04/18] readme update --- README.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index c73a49d6e..8c3fcb3c4 100644 --- a/README.md +++ b/README.md @@ -41,10 +41,10 @@ print(res) ### Quickstart To try out Tuplex, simply try out the following starter notebooks using Google Colab: -| Name | Link | Description | -|-------------------------|------------------|---------------------------------------------------------------------| -| 1. Intro to Tuplex | [Google Colab](https://colab.research.google.com/drive/1idqCRmvN-9_F2naJ6k1hbslbQT-2bAqa?usp=sharing) | Basic commands to manipulate columns and modify data with user code | -| 2. Working with Files | [Google Colab](https://colab.research.google.com/drive/10gOYUpxK_Bjkw11WYupuaflATsBPRgU0?usp=sharing) | Loading and saving files, detecting types. | +| Name | Link | Description | +|--------------------------------|------------------|------------------------------------------------------------| +| 1. Intro to Tuplex | [Google Colab](https://colab.research.google.com/drive/1idqCRmvN-9_F2naJ6k1hbslbQT-2bAqa?usp=sharing) | Basic commands to manipulate columns and modify data with user code. | +| 2. Working with Files | [Google Colab](https://colab.research.google.com/drive/10gOYUpxK_Bjkw11WYupuaflATsBPRgU0?usp=sharing) | Loading and saving files, detecting types. | More examples can be found [here](https://tuplex.cs.brown.edu/gettingstarted.html). From 0ad0994747dffece6d4cd83bd48e982524ed9611 Mon Sep 17 00:00:00 2001 From: Leonhard Spiegelberg Date: Tue, 4 Oct 2022 11:30:42 -0400 Subject: [PATCH 05/18] python 3.10+ fix --- tuplex/python/tests/test_webui.py | 4 ++-- tuplex/python/tuplex/utils/common.py | 17 ++++++++++++++--- 2 files changed, 16 insertions(+), 5 deletions(-) diff --git a/tuplex/python/tests/test_webui.py b/tuplex/python/tests/test_webui.py index 918a18e7f..d336b4b99 100644 --- a/tuplex/python/tests/test_webui.py +++ b/tuplex/python/tests/test_webui.py @@ -23,7 +23,7 @@ class TestWebUI(unittest.TestCase): def setUpClass(cls): logging.basicConfig(format='%(asctime)s %(message)s', level=logging.DEBUG) # bug in logging redirect? - conf ={'webui.enable': True, "driverMemory": "8MB", "executorMemory" : "1MB", + conf ={"webui.enable": True, "driverMemory": "8MB", "executorMemory" : "1MB", "partitionSize": "256KB", "tuplex.redirectToPythonLogging": True} logging.debug('WebUI Test setUpClass called') @@ -58,4 +58,4 @@ def test_webuiconnect(self): # make API request version_info = get_json(ui_url + '/api/version') - self.assertIn('version', version_info) \ No newline at end of file + self.assertIn('version', version_info) diff --git a/tuplex/python/tuplex/utils/common.py b/tuplex/python/tuplex/utils/common.py index a100e96a8..75314698f 100644 --- a/tuplex/python/tuplex/utils/common.py +++ b/tuplex/python/tuplex/utils/common.py @@ -9,7 +9,10 @@ # License: Apache 2.0 # #----------------------------------------------------------------------------------------------------------------------# import atexit +import sys import collections +if sys.version_info.major == 3 and sys.version_info.minor >= 10: + import collections.abc import pathlib import signal @@ -197,10 +200,18 @@ def flatten_dict(d, sep='.', parent_key=''): items = [] for key, val in d.items(): new_key = parent_key + sep + key if parent_key else key - if isinstance(val, collections.MutableMapping): - items.extend(flatten_dict(val, sep, new_key).items()) + + # Python 3.10+ moved MutableMapping to collections.abc.MutableMapping + if sys.version_info.major == 3 and sys.version_info.minor >= 10: + if isinstance(val, collections.abc.MutableMapping): + items.extend(flatten_dict(val, sep, new_key).items()) + else: + items.append((new_key, val)) else: - items.append((new_key, val)) + if isinstance(val, collections.MutableMapping): + items.extend(flatten_dict(val, sep, new_key).items()) + else: + items.append((new_key, val)) return dict(items) def unflatten_dict(dictionary, sep='.'): From 3e521901516eb545cc5900e7252babe9b20ef3e0 Mon Sep 17 00:00:00 2001 From: Leonhard Spiegelberg Date: Tue, 4 Oct 2022 11:50:12 -0400 Subject: [PATCH 06/18] change localhost to 127.0.0.1 --- azure-pipelines.yml | 2 +- tuplex/python/tests/test_webui.py | 8 ++++++-- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/azure-pipelines.yml b/azure-pipelines.yml index 5f6d3e6a7..c79d8648e 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -28,5 +28,5 @@ jobs: displayName: 'Build Tuplex' - script: cd build/temp.linux-x86_64-3.7 && ctest --timeout 180 --output-on-failure displayName: 'C++ tests' - - script: cd build/temp.linux-x86_64-3.7/dist/python && python3.7 -m pytest -x --full-trace -l --log-cli-level debug + - script: cd build/temp.linux-x86_64-3.7/dist/python && python3.7 -m pytest -x --full-trace -l --log-cli-level=DEBUG --capture=tee-sys displayName: 'Python tests' diff --git a/tuplex/python/tests/test_webui.py b/tuplex/python/tests/test_webui.py index d336b4b99..3080183cc 100644 --- a/tuplex/python/tests/test_webui.py +++ b/tuplex/python/tests/test_webui.py @@ -22,9 +22,13 @@ class TestWebUI(unittest.TestCase): @classmethod def setUpClass(cls): logging.basicConfig(format='%(asctime)s %(message)s', level=logging.DEBUG) + + localhost_ip = '127.0.0.1' + # bug in logging redirect? conf ={"webui.enable": True, "driverMemory": "8MB", "executorMemory" : "1MB", - "partitionSize": "256KB", "tuplex.redirectToPythonLogging": True} + "partitionSize": "256KB", "tuplex.redirectToPythonLogging": True, + "webui.mongodb.url" : localhost_ip, "webui.url" : localhost_ip} logging.debug('WebUI Test setUpClass called') cls.context = Context(conf) @@ -34,11 +38,11 @@ def setUpClass(cls): def tearDownClass(cls) -> None: logging.debug('WebUI Test tearDownClass called') del cls.context + cls.context = None # shutdown processes manually! auto_shutdown_all() - # check connection to WebUI works def test_webuiconnect(self): From eb3ee86938a6c5d9180bdd521caff4f05867e60f Mon Sep 17 00:00:00 2001 From: Leonhard Spiegelberg Date: Tue, 4 Oct 2022 13:01:08 -0400 Subject: [PATCH 07/18] mongodb adjustment --- tuplex/python/tests/test_webui.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tuplex/python/tests/test_webui.py b/tuplex/python/tests/test_webui.py index 3080183cc..abea4edea 100644 --- a/tuplex/python/tests/test_webui.py +++ b/tuplex/python/tests/test_webui.py @@ -28,7 +28,7 @@ def setUpClass(cls): # bug in logging redirect? conf ={"webui.enable": True, "driverMemory": "8MB", "executorMemory" : "1MB", "partitionSize": "256KB", "tuplex.redirectToPythonLogging": True, - "webui.mongodb.url" : localhost_ip, "webui.url" : localhost_ip} + "webui.url" : localhost_ip} logging.debug('WebUI Test setUpClass called') cls.context = Context(conf) From 515fdf1304f82befbe37f9195c0f8c59e09027f8 Mon Sep 17 00:00:00 2001 From: Leonhard Spiegelberg Date: Tue, 4 Oct 2022 16:29:56 -0400 Subject: [PATCH 08/18] explicit localhost --- tuplex/python/tests/test_webui.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tuplex/python/tests/test_webui.py b/tuplex/python/tests/test_webui.py index abea4edea..a584048d7 100644 --- a/tuplex/python/tests/test_webui.py +++ b/tuplex/python/tests/test_webui.py @@ -28,7 +28,7 @@ def setUpClass(cls): # bug in logging redirect? conf ={"webui.enable": True, "driverMemory": "8MB", "executorMemory" : "1MB", "partitionSize": "256KB", "tuplex.redirectToPythonLogging": True, - "webui.url" : localhost_ip} + "webui.mongodb.url": "localhost", "webui.url" : localhost_ip} logging.debug('WebUI Test setUpClass called') cls.context = Context(conf) From 817fad6f4e0767466f532cd2e0af877bd988a0db Mon Sep 17 00:00:00 2001 From: Leonhard Spiegelberg Date: Tue, 4 Oct 2022 18:01:35 -0400 Subject: [PATCH 09/18] MongoDB connection test --- azure-pipelines.yml | 2 + tuplex/python/mongodb_test.py | 9 +++ tuplex/python/tuplex/context.py | 6 +- tuplex/python/tuplex/dataset.py | 7 +- tuplex/python/tuplex/metrics.py | 15 ++-- tuplex/python/tuplex/utils/common.py | 102 ++++++++++++++++++++------- 6 files changed, 106 insertions(+), 35 deletions(-) create mode 100644 tuplex/python/mongodb_test.py diff --git a/azure-pipelines.yml b/azure-pipelines.yml index c79d8648e..0e7ad53cf 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -20,6 +20,8 @@ jobs: steps: - script: sudo bash scripts/ubuntu1804/install_mongodb.sh displayName: 'Install MongoDB' + - script: cd tuplex/python && python3 mongodb_test.py && pkill mongod + displayName: 'Test MongoDB init' - script: sudo bash scripts/install_azure_ci_reqs.sh displayName: 'Install required packages' - script: sudo apt-get install -y python3-setuptools ninja-build && sudo apt-get remove -y python-pexpect python3-pexpect && sudo python3.7 -m pip install --upgrade pip && sudo python3.7 -m pip uninstall -y pygments && sudo python3.7 -m pip install pytest pygments>=2.4.1 MarkupSafe==2.0 pexpect setuptools astor PyYAML jupyter nbformat pymongo eventlet==0.30.0 gunicorn pymongo && jupyter --version diff --git a/tuplex/python/mongodb_test.py b/tuplex/python/mongodb_test.py new file mode 100644 index 000000000..5e4fd93e1 --- /dev/null +++ b/tuplex/python/mongodb_test.py @@ -0,0 +1,9 @@ +import logging +logging.basicConfig() +logger = logging.getLogger() +logger.setLevel(logging.DEBUG) +logging.info('testing mongodb init') + +from tuplex.utils.common import find_or_start_mongodb + +res = find_or_start_mongodb('localhost', 27017, './webui/data', './webui/mongod.log') diff --git a/tuplex/python/tuplex/context.py b/tuplex/python/tuplex/context.py index 72439a3aa..4267a2b1a 100644 --- a/tuplex/python/tuplex/context.py +++ b/tuplex/python/tuplex/context.py @@ -11,7 +11,11 @@ import logging -from .libexec.tuplex import _Context, _DataSet, getDefaultOptionsAsJSON +try: + from .libexec.tuplex import _Context, _DataSet, getDefaultOptionsAsJSON +except ModuleNotFoundError as e: + logging.error("need to compiled Tuplex first, details: {}".format(e)) + from .dataset import DataSet import os import glob diff --git a/tuplex/python/tuplex/dataset.py b/tuplex/python/tuplex/dataset.py index 8e2fbacca..2d7eecc00 100644 --- a/tuplex/python/tuplex/dataset.py +++ b/tuplex/python/tuplex/dataset.py @@ -13,7 +13,10 @@ import sys import logging -from .libexec.tuplex import _Context, _DataSet +try: + from .libexec.tuplex import _Context, _DataSet +except ModuleNotFoundError as e: + logging.error("need to compiled Tuplex first, details: {}".format(e)) from tuplex.utils.reflection import get_source as get_udf_source from tuplex.utils.reflection import get_globals from tuplex.utils.framework import UDFCodeExtractionError @@ -578,4 +581,4 @@ def exception_counts(self): The counts returned here correspond to whatever type is being raised. """ - return self._dataSet.exception_counts() \ No newline at end of file + return self._dataSet.exception_counts() diff --git a/tuplex/python/tuplex/metrics.py b/tuplex/python/tuplex/metrics.py index eb2286c95..19903032f 100644 --- a/tuplex/python/tuplex/metrics.py +++ b/tuplex/python/tuplex/metrics.py @@ -8,16 +8,21 @@ # Created by Leonhard Spiegelberg first on 1/1/2021 # # License: Apache 2.0 # #----------------------------------------------------------------------------------------------------------------------# - -from .libexec.tuplex import _Context -from .libexec.tuplex import _Metrics +import logging +import typing +try: + from .libexec.tuplex import _Context + from .libexec.tuplex import _Metrics +except ModuleNotFoundError as e: + logging.error("need to compiled Tuplex first, details: {}".format(e)) + _Metrics = typing.Any import json class Metrics: """ Stores a reference to the metrics associated with a - context object. + context object. """ def __init__(self, metrics: _Metrics): @@ -97,4 +102,4 @@ def as_dict(self): Returns: dict: measurements """ - return json.loads(self.as_json()) \ No newline at end of file + return json.loads(self.as_json()) diff --git a/tuplex/python/tuplex/utils/common.py b/tuplex/python/tuplex/utils/common.py index 75314698f..f241aac4f 100644 --- a/tuplex/python/tuplex/utils/common.py +++ b/tuplex/python/tuplex/utils/common.py @@ -1,5 +1,5 @@ #!/usr/bin/env python3 -#----------------------------------------------------------------------------------------------------------------------# +# ----------------------------------------------------------------------------------------------------------------------# # # # Tuplex: Blazing Fast Python Data Science # # # @@ -7,10 +7,11 @@ # (c) 2017 - 2021, Tuplex team # # Created by Leonhard Spiegelberg first on 1/1/2021 # # License: Apache 2.0 # -#----------------------------------------------------------------------------------------------------------------------# +# ----------------------------------------------------------------------------------------------------------------------# import atexit import sys import collections + if sys.version_info.major == 3 and sys.version_info.minor >= 10: import collections.abc import pathlib @@ -37,16 +38,18 @@ import shlex try: - import pwd + import pwd except ImportError: - import getpass - pwd = None + import getpass + + pwd = None try: from tuplex.utils.version import __version__ except: __version__ = 'dev' + def cmd_exists(cmd): """ checks whether command `cmd` exists or not @@ -58,6 +61,7 @@ def cmd_exists(cmd): """ return shutil.which(cmd) is not None + def is_shared_lib(path): """ Args: @@ -73,6 +77,7 @@ def is_shared_lib(path): mime_type = res.split()[-1].decode() return mime_type == 'application/x-sharedlib' or mime_type == 'application/x-application' + def current_timestamp(): """ get current time as isoformatted string @@ -81,6 +86,7 @@ def current_timestamp(): """ return str(datetime.now().isoformat()) + def current_user(): """ retrieve current user name @@ -92,6 +98,7 @@ def current_user(): else: return getpass.getuser() + def host_name(): """ retrieve host name to identify machine @@ -103,6 +110,7 @@ def host_name(): else: return socket.gethostbyaddr(socket.gethostname())[0] + def post_json(url, data): """ perform a post request to a REST endpoint with JSON @@ -120,6 +128,7 @@ def post_json(url, data): response = urllib.request.urlopen(req) return json.loads(response.read()) + def get_json(url, timeout=10): """ perform a GET request to given URL @@ -134,6 +143,7 @@ def get_json(url, timeout=10): response = urllib.request.urlopen(req, timeout=timeout) return json.loads(response.read()) + def in_jupyter_notebook(): """check whether frameworks runs in jupyter notebook. @@ -155,6 +165,7 @@ def in_jupyter_notebook(): except NameError: return False # Probably standard Python interpreter + def in_google_colab(): """ check whether framework runs in Google Colab environment @@ -170,7 +181,7 @@ def in_google_colab(): shell_name_matching = False try: - shell_name_matching = 'google.colab' in str(get_ipython()) + shell_name_matching = 'google.colab' in str(get_ipython()) except: pass @@ -179,6 +190,7 @@ def in_google_colab(): else: return False + def is_in_interactive_mode(): """checks whether the module is loaded in an interactive shell session or not @@ -188,6 +200,7 @@ def is_in_interactive_mode(): return bool(getattr(sys, 'ps1', sys.flags.interactive)) + def flatten_dict(d, sep='.', parent_key=''): """ flattens a nested dictionary into a flat dictionary by concatenating keys with the separator. Args: @@ -214,6 +227,7 @@ def flatten_dict(d, sep='.', parent_key=''): items.append((new_key, val)) return dict(items) + def unflatten_dict(dictionary, sep='.'): """ unflattens a dictionary into a nested dictionary according to sep @@ -239,6 +253,7 @@ def unflatten_dict(dictionary, sep='.'): d[parts[-1]] = value return resultDict + def save_conf_yaml(conf, file_path): """saves a dictionary holding the configuration options to Tuplex Yaml format. \ Dict can be either flattened or not. @@ -247,13 +262,15 @@ def save_conf_yaml(conf, file_path): conf: a dictionary holding the configuration. file_path: """ + def beautify_nesting(d): # i.e. make lists out of dicts if isinstance(d, dict): items = d.items() - return [{key : beautify_nesting(val)} for key, val in items] + return [{key: beautify_nesting(val)} for key, val in items] else: return d + assert isinstance(file_path, str), 'file_path must be instance of str' with open(file_path, 'w') as f: @@ -261,7 +278,7 @@ def beautify_nesting(d): f.write('# created {} UTC\n'.format(datetime.utcnow())) out = yaml.dump(beautify_nesting(unflatten_dict(conf))) - #pyyaml prints { } around single item dicts. Remove by hand + # pyyaml prints { } around single item dicts. Remove by hand out = out.replace('{', '').replace('}', '') f.write(out) @@ -311,7 +328,8 @@ def parse_string(item): pass return item - return {k : parse_string(v) for k, v in options.items()} + return {k: parse_string(v) for k, v in options.items()} + def load_conf_yaml(file_path): """loads yaml file and converts contents to nested dictionary @@ -358,7 +376,8 @@ def stringify_dict(d): dictionary with keys and vals as strs """ assert isinstance(d, dict), 'd must be a dictionary' - return {str(key) : str(val) for key, val in d.items()} + return {str(key): str(val) for key, val in d.items()} + def registerLoggingCallback(callback): """ @@ -382,6 +401,7 @@ def wrapper(level, time_info, logger_name, msg): ccRegister(wrapper) + def logging_callback(level, time_info, logger_name, msg): """ this is a callback function which can be used to redirect C++ logging to python logging. @@ -393,9 +413,9 @@ def logging_callback(level, time_info, logger_name, msg): """ # convert level to logging levels - if 0 == level: # unsupported level in C++ + if 0 == level: # unsupported level in C++ level = logging.INFO - if 1 == level: # trace in C++ + if 1 == level: # trace in C++ level = logging.DEBUG if 2 == level: level = logging.DEBUG @@ -435,6 +455,7 @@ def logging_callback(level, time_info, logger_name, msg): # tuple of (key, func). __exit_handlers__ = [] + # register at exit function to take care of exit handlers def auto_shutdown_all(): """ @@ -456,10 +477,12 @@ def auto_shutdown_all(): logging.error('Failed to shutdown {}'.format(name)) __exit_handlers__ = [] + def register_auto_shutdown(name, func, args, msg=None): global __exit_handlers__ __exit_handlers__.append((name, func, args, msg)) + atexit.register(auto_shutdown_all) @@ -482,6 +505,7 @@ def is_process_running(name): pass return False + def mongodb_uri(mongodb_url, mongodb_port, db_name='tuplex-history'): """ constructs a fully qualified MongoDB URI @@ -495,6 +519,7 @@ def mongodb_uri(mongodb_url, mongodb_port, db_name='tuplex-history'): """ return 'mongodb://{}:{}/{}'.format(mongodb_url, mongodb_port, db_name) + def check_mongodb_connection(mongodb_url, mongodb_port, db_name='tuplex-history', timeout=10.0): """ connects to a MongoDB database instance, raises exception if connection fails @@ -533,7 +558,8 @@ def check_mongodb_connection(mongodb_url, mongodb_port, db_name='tuplex-history' break time.sleep(0.05) # sleep for 50ms - logging.debug('Contacting MongoDB under {}... -- {:.2f}s of poll time left'.format(uri, timeout - (time.time() - start_time))) + logging.debug('Contacting MongoDB under {}... -- {:.2f}s of poll time left'.format(uri, timeout - ( + time.time() - start_time))) connect_try += 1 if connect_successful is False: @@ -541,6 +567,7 @@ def check_mongodb_connection(mongodb_url, mongodb_port, db_name='tuplex-history' logging.debug('Connection test to MongoDB succeeded') + def shutdown_process_via_kill(pid): """ issues a KILL signals to a process with pid @@ -553,6 +580,7 @@ def shutdown_process_via_kill(pid): logging.debug('Shutting down process PID={}'.format(pid)) os.kill(pid, signal.SIGKILL) + def find_or_start_mongodb(mongodb_url, mongodb_port, mongodb_datapath, mongodb_logpath, db_name='tuplex-history'): """ attempts to connect to a MongoDB database. If no running local MongoDB is found, will auto-start a mongodb database. R @@ -592,12 +620,23 @@ def find_or_start_mongodb(mongodb_url, mongodb_port, mongodb_datapath, mongodb_l # startup via mongod --fork --logpath /var/log/mongodb/mongod.log --port 1234 --dbpath try: - cmd = ['mongod', '--fork', '--logpath', str(mongodb_logpath), '--port', str(mongodb_port), '--dbpath', str(mongodb_datapath)] + cmd = ['mongod', '--fork', '--logpath', str(mongodb_logpath), '--port', str(mongodb_port), '--dbpath', + str(mongodb_datapath)] logging.debug('starting MongoDB daemon process via {}'.format(' '.join(cmd))) process = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) - # set a timeout of 2 seconds to keep everything interactive - p_stdout, p_stderr = process.communicate(timeout=2) + + short_timeout = 2.5 + max_mongodb_timeout = 10 # maximum timeout is 10s + try: + # set a timeout of 2 seconds to keep everything interactive + p_stdout, p_stderr = process.communicate(timeout=short_timeout) + except subprocess.TimeoutExpired: + # try now with more time (up to max) + logging.info( + "Could not start MongoDB daemon process in {}s, trying with timeout={}s".format(short_timeout, + max_mongodb_timeout)) + p_stdout, p_stderr = process.communicate(timeout=max_mongodb_timeout) # decode p_stdout = p_stdout.decode() @@ -641,6 +680,7 @@ def find_or_start_mongodb(mongodb_url, mongodb_port, mongodb_datapath, mongodb_l logging.debug('Connecting to remote MongoDB instance') check_mongodb_connection(mongodb_url, mongodb_port, db_name) + def log_gunicorn_errors(logpath): """ uses logging module to print out gunicorn errors if something went wrong @@ -659,6 +699,7 @@ def log_gunicorn_errors(logpath): first_idx = min(indices) logging.error('Gunicorn error log:\n {}'.format(''.join(lines[first_idx:]))) + def find_or_start_webui(mongo_uri, hostname, port, web_logfile): """ tries to connect to Tuplex WebUI. If local uri is specified, autostarts WebUI. @@ -671,7 +712,7 @@ def find_or_start_webui(mongo_uri, hostname, port, web_logfile): Returns: None, raises exceptions on failure """ - version_endpoint = '/api/version' # use this to connect and trigger WebUI connection + version_endpoint = '/api/version' # use this to connect and trigger WebUI connection if not hostname.startswith('http://') and not hostname.startswith('https://'): hostname = 'http://' + str(hostname) @@ -690,12 +731,12 @@ def find_or_start_webui(mongo_uri, hostname, port, web_logfile): else: # start WebUI up! if not cmd_exists('gunicorn'): - raise Exception('Tuplex uses per default gunicorn with eventlet to run the WebUI. Please install via `pip3 install "gunicorn[eventlet]"` or add to PATH') + raise Exception( + 'Tuplex uses per default gunicorn with eventlet to run the WebUI. Please install via `pip3 install "gunicorn[eventlet]"` or add to PATH') # command for this is: # env MONGO_URI=$MONGO_URI gunicorn --daemon --worker-class eventlet --log-file $GUNICORN_LOGFILE -b $HOST:$PORT thserver:app - # directory needs to be the one where the history server is located in! # ==> from structure of file we can infer that dir_path = os.path.dirname(os.path.realpath(__file__)) @@ -718,8 +759,11 @@ def find_or_start_webui(mongo_uri, hostname, port, web_logfile): path = path.parent # check dir historyserver/thserver exists! - assert os.path.isdir(os.path.join(tuplex_basedir, 'historyserver', 'thserver')), 'could not find Tuplex WebUI WebApp in {}'.format(tuplex_basedir) - assert os.path.isfile(os.path.join(tuplex_basedir, 'historyserver', 'thserver', '__init__.py')), 'could not find Tuplex WebUI __init__.py file in thserver folder' + assert os.path.isdir(os.path.join(tuplex_basedir, 'historyserver', + 'thserver')), 'could not find Tuplex WebUI WebApp in {}'.format( + tuplex_basedir) + assert os.path.isfile(os.path.join(tuplex_basedir, 'historyserver', 'thserver', + '__init__.py')), 'could not find Tuplex WebUI __init__.py file in thserver folder' # history server dir to use to start gunicorn ui_basedir = os.path.join(tuplex_basedir, 'historyserver') @@ -730,8 +774,9 @@ def find_or_start_webui(mongo_uri, hostname, port, web_logfile): ui_env = os.environ ui_env['MONGO_URI'] = mongo_uri - gunicorn_host = '{}:{}'.format(hostname.replace('http://', '').replace('https://',''), port) - cmd = ['gunicorn', '--daemon', '--worker-class', 'eventlet', '--chdir', ui_basedir, '--pid', PID_FILE, '--log-file', web_logfile, '-b', gunicorn_host, 'thserver:app'] + gunicorn_host = '{}:{}'.format(hostname.replace('http://', '').replace('https://', ''), port) + cmd = ['gunicorn', '--daemon', '--worker-class', 'eventlet', '--chdir', ui_basedir, '--pid', PID_FILE, + '--log-file', web_logfile, '-b', gunicorn_host, 'thserver:app'] logging.debug('Starting gunicorn with command: {}'.format(' '.join(cmd))) @@ -756,10 +801,11 @@ def find_or_start_webui(mongo_uri, hostname, port, web_logfile): start_time = time.time() while time.time() - start_time < TIME_LIMIT: if not os.path.isfile(PID_FILE) or os.stat(PID_FILE).st_size == 0: - time.sleep(0.05) # sleep for 50ms + time.sleep(0.05) # sleep for 50ms else: break - logging.debug('Polling for Gunicorn PID... -- {:.2f}s of poll time left'.format(TIME_LIMIT - (time.time() - start_time))) + logging.debug('Polling for Gunicorn PID... -- {:.2f}s of poll time left'.format( + TIME_LIMIT - (time.time() - start_time))) # Read PID file with open(PID_FILE, 'r') as fp: @@ -769,6 +815,7 @@ def find_or_start_webui(mongo_uri, hostname, port, web_logfile): # register daemon shutdown logging.debug('Adding auto-shutdown of process with PID={} (WebUI)'.format(ui_pid)) + def shutdown_gunicorn(pid): pids_to_kill = [] @@ -833,7 +880,7 @@ def ensure_webui(options): mongodb_logpath = os.path.join(options['tuplex.scratchDir'], 'webui', 'logs', 'mongod.log') gunicorn_logpath = os.path.join(options['tuplex.scratchDir'], 'webui', 'logs', 'gunicorn.log') webui_url = options['tuplex.webui.url'] - webui_port = options['tuplex.webui.port'] + webui_port = options['tuplex.webui.port'] try: logging.debug('finding MongoDB...') @@ -850,7 +897,8 @@ def ensure_webui(options): # check that version of WebUI and Tuplex version match # exclude dev versions, i.e. silence warning there. if 'dev' not in __version__ and version_info['version'] != __version__: - logging.warning('Version of Tuplex WebUI ({}) and Tuplex ({}) do not match.'.format(version_info['version'], __version__)) + logging.warning('Version of Tuplex WebUI ({}) and Tuplex ({}) do not match.'.format(version_info['version'], + __version__)) # all good, print out link so user can access WebUI easily webui_uri = webui_url + ':' + str(webui_port) From 3eaa3b613989035fbb58326f4cdf79d342e29b50 Mon Sep 17 00:00:00 2001 From: Leonhard Spiegelberg Date: Tue, 4 Oct 2022 18:02:54 -0400 Subject: [PATCH 10/18] add psutil --- azure-pipelines.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/azure-pipelines.yml b/azure-pipelines.yml index 0e7ad53cf..e57b779a9 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -20,7 +20,7 @@ jobs: steps: - script: sudo bash scripts/ubuntu1804/install_mongodb.sh displayName: 'Install MongoDB' - - script: cd tuplex/python && python3 mongodb_test.py && pkill mongod + - script: cd tuplex/python && python3 -m pip install psutil && python3 mongodb_test.py && pkill mongod displayName: 'Test MongoDB init' - script: sudo bash scripts/install_azure_ci_reqs.sh displayName: 'Install required packages' From 973064dd26ec4911428b886e9ee3894deb519c92 Mon Sep 17 00:00:00 2001 From: Leonhard Spiegelberg Date: Tue, 4 Oct 2022 18:04:38 -0400 Subject: [PATCH 11/18] test add --- azure-pipelines.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/azure-pipelines.yml b/azure-pipelines.yml index e57b779a9..39c7531bb 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -20,7 +20,7 @@ jobs: steps: - script: sudo bash scripts/ubuntu1804/install_mongodb.sh displayName: 'Install MongoDB' - - script: cd tuplex/python && python3 -m pip install psutil && python3 mongodb_test.py && pkill mongod + - script: sudo apt-get install -y python3-setuptools && cd tuplex/python && python3 -m pip install psutil && python3 mongodb_test.py && pkill mongod displayName: 'Test MongoDB init' - script: sudo bash scripts/install_azure_ci_reqs.sh displayName: 'Install required packages' From dade9bf86ad707d3a427dcd9e2a515801c648514 Mon Sep 17 00:00:00 2001 From: Leonhard Spiegelberg Date: Tue, 4 Oct 2022 18:08:18 -0400 Subject: [PATCH 12/18] azure ci --- azure-pipelines.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/azure-pipelines.yml b/azure-pipelines.yml index 39c7531bb..ab93a6830 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -20,12 +20,12 @@ jobs: steps: - script: sudo bash scripts/ubuntu1804/install_mongodb.sh displayName: 'Install MongoDB' - - script: sudo apt-get install -y python3-setuptools && cd tuplex/python && python3 -m pip install psutil && python3 mongodb_test.py && pkill mongod - displayName: 'Test MongoDB init' - script: sudo bash scripts/install_azure_ci_reqs.sh displayName: 'Install required packages' - script: sudo apt-get install -y python3-setuptools ninja-build && sudo apt-get remove -y python-pexpect python3-pexpect && sudo python3.7 -m pip install --upgrade pip && sudo python3.7 -m pip uninstall -y pygments && sudo python3.7 -m pip install pytest pygments>=2.4.1 MarkupSafe==2.0 pexpect setuptools astor PyYAML jupyter nbformat pymongo eventlet==0.30.0 gunicorn pymongo && jupyter --version displayName: 'Install python dependencies' + - script: cd tuplex/python && python3 -m pip install psutil iso8601 && python3 mongodb_test.py && pkill mongod + displayName: 'test local MongoDB' - script: TUPLEX_BUILD_ALL=1 CMAKE_ARGS="-DBUILD_WITH_ORC=ON -DLLVM_ROOT_DIR=/usr/lib/llvm-9 -DCMAKE_BUILD_TYPE=Release -DBUILD_FOR_CI=ON" python3 setup.py install --user displayName: 'Build Tuplex' - script: cd build/temp.linux-x86_64-3.7 && ctest --timeout 180 --output-on-failure From 6eaa25bbb09d9bd6c497bc42a4f77747053bdb34 Mon Sep 17 00:00:00 2001 From: Leonhard Spiegelberg Date: Tue, 4 Oct 2022 18:31:18 -0400 Subject: [PATCH 13/18] adding req file for mongodb testing --- azure-pipelines.yml | 4 ++-- tuplex/python/requirements.txt | 20 ++++++++++++++++++++ 2 files changed, 22 insertions(+), 2 deletions(-) create mode 100644 tuplex/python/requirements.txt diff --git a/azure-pipelines.yml b/azure-pipelines.yml index ab93a6830..1815f1659 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -24,8 +24,8 @@ jobs: displayName: 'Install required packages' - script: sudo apt-get install -y python3-setuptools ninja-build && sudo apt-get remove -y python-pexpect python3-pexpect && sudo python3.7 -m pip install --upgrade pip && sudo python3.7 -m pip uninstall -y pygments && sudo python3.7 -m pip install pytest pygments>=2.4.1 MarkupSafe==2.0 pexpect setuptools astor PyYAML jupyter nbformat pymongo eventlet==0.30.0 gunicorn pymongo && jupyter --version displayName: 'Install python dependencies' - - script: cd tuplex/python && python3 -m pip install psutil iso8601 && python3 mongodb_test.py && pkill mongod - displayName: 'test local MongoDB' + - script: cd tuplex/python && python3 -m pip install -r requirements.txt && python3 mongodb_test.py && pkill mongod + displayName: 'Test local MongoDB' - script: TUPLEX_BUILD_ALL=1 CMAKE_ARGS="-DBUILD_WITH_ORC=ON -DLLVM_ROOT_DIR=/usr/lib/llvm-9 -DCMAKE_BUILD_TYPE=Release -DBUILD_FOR_CI=ON" python3 setup.py install --user displayName: 'Build Tuplex' - script: cd build/temp.linux-x86_64-3.7 && ctest --timeout 180 --output-on-failure diff --git a/tuplex/python/requirements.txt b/tuplex/python/requirements.txt new file mode 100644 index 000000000..db5769ac3 --- /dev/null +++ b/tuplex/python/requirements.txt @@ -0,0 +1,20 @@ +nbconvert<7.0 +jupyter<7.0 +nbformat<7.0 +Werkzeug<2.2.0 +attrs>=19.2.0 +dill>=0.2.7.1 +pluggy>=0.6.0, <1.0.0 +py>=1.5.2 +pygments>=2.4.1 +pytest>=5.3.2 +six>=1.11.0 +wcwidth>=0.1.7 +astor +prompt_toolkit>=2.0.7 +jedi>=0.13.2 +cloudpickle>=0.6.1,<2.0.0 +PyYAML>=3.13 +psutil +pymongo +iso8601 From 09c88141755bce4905e455dbeb8edfa869714f06 Mon Sep 17 00:00:00 2001 From: Leonhard Spiegelberg Date: Tue, 4 Oct 2022 18:57:18 -0400 Subject: [PATCH 14/18] ignore pkill --- azure-pipelines.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/azure-pipelines.yml b/azure-pipelines.yml index 1815f1659..69761049c 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -24,7 +24,7 @@ jobs: displayName: 'Install required packages' - script: sudo apt-get install -y python3-setuptools ninja-build && sudo apt-get remove -y python-pexpect python3-pexpect && sudo python3.7 -m pip install --upgrade pip && sudo python3.7 -m pip uninstall -y pygments && sudo python3.7 -m pip install pytest pygments>=2.4.1 MarkupSafe==2.0 pexpect setuptools astor PyYAML jupyter nbformat pymongo eventlet==0.30.0 gunicorn pymongo && jupyter --version displayName: 'Install python dependencies' - - script: cd tuplex/python && python3 -m pip install -r requirements.txt && python3 mongodb_test.py && pkill mongod + - script: cd tuplex/python && python3 -m pip install -r requirements.txt && python3 mongodb_test.py && pkill mongod || true displayName: 'Test local MongoDB' - script: TUPLEX_BUILD_ALL=1 CMAKE_ARGS="-DBUILD_WITH_ORC=ON -DLLVM_ROOT_DIR=/usr/lib/llvm-9 -DCMAKE_BUILD_TYPE=Release -DBUILD_FOR_CI=ON" python3 setup.py install --user displayName: 'Build Tuplex' From 1666c34c1bde120c3c5f3e958bb92c33b0ef5216 Mon Sep 17 00:00:00 2001 From: Leonhard Spiegelberg Date: Tue, 4 Oct 2022 23:14:12 -0400 Subject: [PATCH 15/18] unique test options --- tuplex/python/tests/helper.py | 13 +++++++------ tuplex/python/tests/test_aggregates.py | 4 +++- tuplex/python/tests/test_arithmetic.py | 6 ++++-- tuplex/python/tests/test_closure.py | 4 +++- tuplex/python/tests/test_columns.py | 4 +++- tuplex/python/tests/test_config.py | 4 +--- tuplex/python/tests/test_csv.py | 4 +++- tuplex/python/tests/test_dictionaries.py | 4 +++- tuplex/python/tests/test_exceptions.py | 7 +++++-- tuplex/python/tests/test_fallback.py | 5 +++-- tuplex/python/tests/test_filter.py | 4 +++- tuplex/python/tests/test_index.py | 4 +++- tuplex/python/tests/test_inspect.py | 4 +++- tuplex/python/tests/test_is.py | 6 +++++- tuplex/python/tests/test_lists.py | 5 +++-- tuplex/python/tests/test_logical.py | 6 ++++-- tuplex/python/tests/test_math.py | 1 - tuplex/python/tests/test_metrics.py | 5 ++++- tuplex/python/tests/test_multi.py | 4 +++- tuplex/python/tests/test_nulls.py | 5 +++-- tuplex/python/tests/test_parallelize.py | 5 +++-- tuplex/python/tests/test_resolve.py | 6 ++++-- tuplex/python/tests/test_srcextract.py | 4 +++- tuplex/python/tests/test_strings.py | 4 +++- tuplex/python/tests/test_tuples.py | 4 +++- tuplex/python/tests/test_webui.py | 7 ++++--- tuplex/python/tuplex/utils/common.py | 17 +++++------------ 27 files changed, 91 insertions(+), 55 deletions(-) diff --git a/tuplex/python/tests/helper.py b/tuplex/python/tests/helper.py index b64ff0748..6ea7b2985 100644 --- a/tuplex/python/tests/helper.py +++ b/tuplex/python/tests/helper.py @@ -10,10 +10,11 @@ #----------------------------------------------------------------------------------------------------------------------# def test_options(): - return {'tuplex.partitionSize' : "128KB", - "tuplex.executorMemory" : "4MB", - "tuplex.useLLVMOptimizer" : True, - "tuplex.allowUndefinedBehavior" : False, - "tuplex.webui.enable" : False, + return {'tuplex.partitionSize': "128KB", + "tuplex.executorMemory": "8MB", + "tuplex.useLLVMOptimizer": True, + "tuplex.allowUndefinedBehavior": False, + "tuplex.webui.enable": False, "tuplex.optimizer.mergeExceptionsInOrder": True, - "tuplex.csv.selectionPushdown" : True} \ No newline at end of file + "tuplex.csv.selectionPushdown": True, + "tuplex.scratchDir": ".cache/"} \ No newline at end of file diff --git a/tuplex/python/tests/test_aggregates.py b/tuplex/python/tests/test_aggregates.py index f36fdae02..c04b34e54 100644 --- a/tuplex/python/tests/test_aggregates.py +++ b/tuplex/python/tests/test_aggregates.py @@ -16,10 +16,12 @@ from tuplex import * import typing import os +from .helper import test_options class TestAggregates(unittest.TestCase): def setUp(self): - self.conf = {"webui.enable": False, "driverMemory": "8MB", "partitionSize": "256KB"} + self.conf = test_options() + self.conf.update({"webui.enable": False, "driverMemory": "8MB", "partitionSize": "256KB"}) def test_simple_count(self): c = Context(self.conf) diff --git a/tuplex/python/tests/test_arithmetic.py b/tuplex/python/tests/test_arithmetic.py index 15b3a1a8a..8bb47a5e0 100644 --- a/tuplex/python/tests/test_arithmetic.py +++ b/tuplex/python/tests/test_arithmetic.py @@ -14,11 +14,13 @@ import random import numpy as np from tuplex import * - +from .helper import test_options class TestArithmetic(unittest.TestCase): def setUp(self): - self.conf = {"webui.enable": False, "driverMemory": "8MB", "partitionSize": "256KB", "tuplex.optimizer.mergeExceptionsInOrder": True} + self.conf = test_options() + self.conf.update({"webui.enable": False, "driverMemory": "8MB", + "partitionSize": "256KB", "tuplex.optimizer.mergeExceptionsInOrder": True}) def test_add(self): c = Context(self.conf) diff --git a/tuplex/python/tests/test_closure.py b/tuplex/python/tests/test_closure.py index cde1c0ce2..1d9d6c754 100644 --- a/tuplex/python/tests/test_closure.py +++ b/tuplex/python/tests/test_closure.py @@ -12,11 +12,13 @@ from unittest import TestCase import tuplex import time +from .helper import test_options class TestClosure(TestCase): def setUp(self): - self.c = tuplex.Context(webui=False) + self.conf = test_options() + self.c = tuplex.Context(self.conf) def testGlobalVar(self): diff --git a/tuplex/python/tests/test_columns.py b/tuplex/python/tests/test_columns.py index b76a5f10e..e84ab431b 100644 --- a/tuplex/python/tests/test_columns.py +++ b/tuplex/python/tests/test_columns.py @@ -12,11 +12,13 @@ from unittest import TestCase import tuplex +from .helper import test_options class TestColumns(TestCase): def setUp(self): - self.conf = {"webui.enable" : False, "driverMemory" : "8MB", "partitionSize" : "256KB"} + self.conf = test_options() + self.conf.update({"webui.enable" : False, "driverMemory" : "8MB", "partitionSize" : "256KB"}) self.c = tuplex.Context(self.conf) def test_withColumnNew(self): diff --git a/tuplex/python/tests/test_config.py b/tuplex/python/tests/test_config.py index 3b29334e8..da3be8ff0 100644 --- a/tuplex/python/tests/test_config.py +++ b/tuplex/python/tests/test_config.py @@ -15,9 +15,7 @@ from tuplex.utils.common import * class TestConfig(unittest.TestCase): - - - + # DO NOT USE test_options() here, these tests are designed to actually test options... def testNestedDictOptions(self): c = Context(conf={'executorMemory':'1MB', 'executorCount':3}) diff --git a/tuplex/python/tests/test_csv.py b/tuplex/python/tests/test_csv.py index 4a9be9376..314dda905 100644 --- a/tuplex/python/tests/test_csv.py +++ b/tuplex/python/tests/test_csv.py @@ -12,6 +12,7 @@ import unittest import os from tuplex import * +from .helper import test_options class TestCSV(unittest.TestCase): @@ -30,7 +31,8 @@ def setUp(self): self._generate_csv_file('test.csv', ',') self._generate_csv_file('test.tsv', '\t') self._generate_csv_file('test_header.csv', ',', True) - self.conf = {"webui.enable" : False, "driverMemory" : "8MB", "partitionSize" : "256KB"} + self.conf = test_options() + self.conf.update({"webui.enable" : False, "driverMemory" : "8MB", "partitionSize" : "256KB"}) def tearDown(self): os.remove('test.csv') diff --git a/tuplex/python/tests/test_dictionaries.py b/tuplex/python/tests/test_dictionaries.py index b72c14ba8..f3910fad6 100644 --- a/tuplex/python/tests/test_dictionaries.py +++ b/tuplex/python/tests/test_dictionaries.py @@ -12,11 +12,13 @@ import unittest from tuplex import * from math import isclose +from .helper import test_options class TestDictionaries(unittest.TestCase): def setUp(self): - self.conf = {"webui.enable" : False, "driverMemory" : "8MB", "partitionSize" : "256KB"} + self.conf = test_options() + self.conf.update({"webui.enable" : False, "driverMemory" : "8MB", "partitionSize" : "256KB"}) # test pop(), popitem() def test_attributes(self): diff --git a/tuplex/python/tests/test_exceptions.py b/tuplex/python/tests/test_exceptions.py index 6086c2ce9..3000779cc 100644 --- a/tuplex/python/tests/test_exceptions.py +++ b/tuplex/python/tests/test_exceptions.py @@ -13,12 +13,15 @@ from tuplex import Context from random import randint, sample, shuffle from math import floor +from .helper import test_options class TestExceptions(unittest.TestCase): def setUp(self): - self.conf = {"tuplex.webui.enable": False, "executorCount": 8, "executorMemory": "256MB", "driverMemory": "256MB", "partitionSize": "256KB", "tuplex.optimizer.mergeExceptionsInOrder": False} - self.conf_in_order = {"tuplex.webui.enable": False, "executorCount": 8, "executorMemory": "256MB", "driverMemory": "256MB", "partitionSize": "256KB", "tuplex.optimizer.mergeExceptionsInOrder": True} + self.conf = test_options() + self.conf.update({"tuplex.webui.enable": False, "executorCount": 8, "executorMemory": "256MB", "driverMemory": "256MB", "partitionSize": "256KB", "tuplex.optimizer.mergeExceptionsInOrder": False}) + self.conf_in_order = test_options() + self.conf_in_order.update({"tuplex.webui.enable": False, "executorCount": 8, "executorMemory": "256MB", "driverMemory": "256MB", "partitionSize": "256KB", "tuplex.optimizer.mergeExceptionsInOrder": True}) def test_merge_with_filter(self): c = Context(self.conf_in_order) diff --git a/tuplex/python/tests/test_fallback.py b/tuplex/python/tests/test_fallback.py index 12e8d112b..da806a5b6 100644 --- a/tuplex/python/tests/test_fallback.py +++ b/tuplex/python/tests/test_fallback.py @@ -12,13 +12,14 @@ import unittest from tuplex import * import numpy as np - +from .helper import test_options # test fallback functionality, i.e. executing cloudpickled code class TestFallback(unittest.TestCase): def setUp(self): - self.conf = {"webui.enable" : False, "driverMemory" : "8MB", "partitionSize" : "256KB"} + self.conf = test_options() + self.conf.update({"webui.enable" : False, "driverMemory" : "8MB", "partitionSize" : "256KB"}) self.c = Context(self.conf) def testArbitaryObjecsts(self): diff --git a/tuplex/python/tests/test_filter.py b/tuplex/python/tests/test_filter.py index 6ce29573d..382c65860 100644 --- a/tuplex/python/tests/test_filter.py +++ b/tuplex/python/tests/test_filter.py @@ -11,12 +11,14 @@ import unittest from tuplex import * +from .helper import test_options # test filter functionality class TestFilter(unittest.TestCase): def setUp(self): - self.conf = {"webui.enable" : False, "driverMemory" : "8MB", "partitionSize" : "256KB"} + self.conf = test_options() + self.conf.update({"webui.enable" : False, "driverMemory" : "8MB", "partitionSize" : "256KB"}) def testFilter(self): c = Context(self.conf) diff --git a/tuplex/python/tests/test_index.py b/tuplex/python/tests/test_index.py index a953d9c50..ddf0bd984 100644 --- a/tuplex/python/tests/test_index.py +++ b/tuplex/python/tests/test_index.py @@ -11,11 +11,13 @@ import unittest from tuplex import * +from .helper import test_options class TestTuples(unittest.TestCase): def setUp(self): - self.conf = {"webui.enable" : False, "driverMemory" : "8MB", "partitionSize" : "256KB"} + self.conf = test_options() + self.conf.update({"webui.enable" : False, "driverMemory" : "8MB", "partitionSize" : "256KB"}) def testIndexI(self): c = Context(self.conf) diff --git a/tuplex/python/tests/test_inspect.py b/tuplex/python/tests/test_inspect.py index da582262d..9d1a5f9d8 100644 --- a/tuplex/python/tests/test_inspect.py +++ b/tuplex/python/tests/test_inspect.py @@ -12,12 +12,14 @@ import typing import unittest from tuplex import * +from .helper import test_options # test filter functionality class TestInspection(unittest.TestCase): def setUp(self): - self.conf = {"webui.enable" : False, "driverMemory" : "8MB", "partitionSize" : "256KB"} + self.conf = test_options() + self.conf.update({"webui.enable" : False, "driverMemory" : "8MB", "partitionSize" : "256KB"}) def testTypes(self): """ test .types property """ diff --git a/tuplex/python/tests/test_is.py b/tuplex/python/tests/test_is.py index 351decc84..31fa00ad2 100644 --- a/tuplex/python/tests/test_is.py +++ b/tuplex/python/tests/test_is.py @@ -1,12 +1,16 @@ import tuplex from unittest import TestCase + +from .helper import test_options + """ Tests functionality for `is` keyword. """ class TestIs(TestCase): def setUp(self): - self.conf = {"webui.enable": False, "executorCount": "0"} + self.conf = test_options() + self.conf.update({"webui.enable": False, "executorCount": "0"}) self.c = tuplex.Context(self.conf) def test_boolIsBool(self): diff --git a/tuplex/python/tests/test_lists.py b/tuplex/python/tests/test_lists.py index e239a777b..0c9c25d6a 100644 --- a/tuplex/python/tests/test_lists.py +++ b/tuplex/python/tests/test_lists.py @@ -12,12 +12,13 @@ import unittest from tuplex import * from math import isclose - +from .helper import test_options class TestLists(unittest.TestCase): def setUp(self): - self.conf = {"webui.enable" : False, "driverMemory" : "16MB", "partitionSize" : "256KB"} + self.conf = test_options() + self.conf.update({"webui.enable" : False, "driverMemory" : "16MB", "partitionSize" : "256KB"}) def test_subscripts(self): c = Context(self.conf) diff --git a/tuplex/python/tests/test_logical.py b/tuplex/python/tests/test_logical.py index c21a90797..0a89932be 100644 --- a/tuplex/python/tests/test_logical.py +++ b/tuplex/python/tests/test_logical.py @@ -11,12 +11,14 @@ import unittest from tuplex import * - +from .helper import test_options class TestLogical(unittest.TestCase): def __init__(self, *args, **kwargs): - self.conf = {"webui.enable": False, "driverMemory": "64MB", "executorMemory": "2MB", "partitionSize": "128KB"} + self.conf = test_options() + self.conf.update({"webui.enable": False, "driverMemory": "64MB", + "executorMemory": "2MB", "partitionSize": "128KB"}) super(TestLogical, self).__init__(*args, **kwargs) def testAnd(self): diff --git a/tuplex/python/tests/test_math.py b/tuplex/python/tests/test_math.py index 142e77ae8..9e477d826 100644 --- a/tuplex/python/tests/test_math.py +++ b/tuplex/python/tests/test_math.py @@ -18,7 +18,6 @@ from math import pi import math - class TestMath(unittest.TestCase): def setUp(self): diff --git a/tuplex/python/tests/test_metrics.py b/tuplex/python/tests/test_metrics.py index a922c6e13..25ed906b3 100644 --- a/tuplex/python/tests/test_metrics.py +++ b/tuplex/python/tests/test_metrics.py @@ -11,13 +11,16 @@ import unittest from tuplex import * +from .helper import test_options # this test is a basic test to make sure that times/exceptions for a project are # working correctly class TestMetrics(unittest.TestCase): def testTimes(self): - conf = {"tuplex.useLLVMOptimizer" : "true", "webui.enable" : False, "driverMemory" : "8MB", "partitionSize" : "256KB"} + conf = test_options() + conf.update({"tuplex.useLLVMOptimizer" : "true", "webui.enable" : False, + "driverMemory" : "8MB", "partitionSize" : "256KB"}) c = Context(conf) c.parallelize([1, 2, 3, 4, 5]).map(lambda x: x + 4).collect() metrics = c.metrics diff --git a/tuplex/python/tests/test_multi.py b/tuplex/python/tests/test_multi.py index 1b9ca2806..c88df694a 100644 --- a/tuplex/python/tests/test_multi.py +++ b/tuplex/python/tests/test_multi.py @@ -11,13 +11,15 @@ import unittest from tuplex import * +from .helper import test_options # this test is addressed on issues with the framework usage. I.e. whether data is kept correctly # in memory for parallelize class TestMultiStatements(unittest.TestCase): def setUp(self): - self.conf = {"webui.enable" : False, "driverMemory" : "8MB", "partitionSize" : "256KB"} + self.conf = test_options() + self.conf.update({"webui.enable" : False, "driverMemory" : "8MB", "partitionSize" : "256KB"}) def testParallelize(self): c = Context(self.conf) diff --git a/tuplex/python/tests/test_nulls.py b/tuplex/python/tests/test_nulls.py index 035f7e301..be66aae6a 100644 --- a/tuplex/python/tests/test_nulls.py +++ b/tuplex/python/tests/test_nulls.py @@ -11,12 +11,13 @@ import unittest from tuplex import * - +from .helper import test_options class TestNulls(unittest.TestCase): def __init__(self, *args, **kwargs): - self.conf = {"webui.enable" : False, "driverMemory" : "16MB", "partitionSize" : "256KB"} + self.conf = test_options() + self.conf.update({"webui.enable" : False, "driverMemory" : "16MB", "partitionSize" : "256KB"}) super(TestNulls, self).__init__(*args, **kwargs) def testEqAndNotEq(self): diff --git a/tuplex/python/tests/test_parallelize.py b/tuplex/python/tests/test_parallelize.py index 78803117a..da3272f5f 100644 --- a/tuplex/python/tests/test_parallelize.py +++ b/tuplex/python/tests/test_parallelize.py @@ -11,13 +11,14 @@ import unittest from tuplex import * - +from .helper import test_options # all of these below should be executed with faster, optimized serialization code class TestFastParallelize(unittest.TestCase): def __init__(self, *args, **kwargs): - self.conf = {"webui.enable" : False, "driverMemory" : "8MB", "partitionSize" : "256KB"} + self.conf = test_options() + self.conf.update({"webui.enable" : False, "driverMemory" : "8MB", "partitionSize" : "256KB"}) super(TestFastParallelize, self).__init__(*args, **kwargs) def testI64(self): diff --git a/tuplex/python/tests/test_resolve.py b/tuplex/python/tests/test_resolve.py index 37efff4a5..845e4d130 100644 --- a/tuplex/python/tests/test_resolve.py +++ b/tuplex/python/tests/test_resolve.py @@ -12,12 +12,14 @@ from unittest import TestCase import tuplex import time +from .helper import test_options class TestResolveMechanism(TestCase): def setUp(self): - self.conf = {"webui.enable": False, "driverMemory": "8MB", - "partitionSize": "256KB", "optimizer.mergeExceptionsInOrder": True} + self.conf = test_options() + self.conf.update({"webui.enable": False, "driverMemory": "8MB", + "partitionSize": "256KB", "optimizer.mergeExceptionsInOrder": True}) self.c = tuplex.Context(self.conf) def test_LambdaResolveI(self): diff --git a/tuplex/python/tests/test_srcextract.py b/tuplex/python/tests/test_srcextract.py index 63df57092..34b19814c 100644 --- a/tuplex/python/tests/test_srcextract.py +++ b/tuplex/python/tests/test_srcextract.py @@ -14,6 +14,7 @@ from tuplex.utils.reflection import get_source, get_globals, supports_lambda_closure from notebook_utils import get_jupyter_function_code +from .helper import test_options SOME_CONSTANT_TO_EXTRACT=42 @@ -21,7 +22,8 @@ class TestSourceExtract(TestCase): def setUp(self): - self.conf = {"webui.enable" : False, "driverMemory" : "8MB", "partitionSize" : "256KB"} + self.conf = test_options() + self.conf.update({"webui.enable" : False, "driverMemory" : "8MB", "partitionSize" : "256KB"}) self.c = tuplex.Context(self.conf) def test_singlelam(self): diff --git a/tuplex/python/tests/test_strings.py b/tuplex/python/tests/test_strings.py index ba42785ae..c537339c7 100644 --- a/tuplex/python/tests/test_strings.py +++ b/tuplex/python/tests/test_strings.py @@ -11,11 +11,13 @@ import unittest from tuplex import * +from .helper import test_options class TestString(unittest.TestCase): def setUp(self): - self.conf = {"webui.enable" : False, "driverMemory" : "8MB", "partitionSize" : "256KB"} + self.conf = test_options() + self.conf.update({"webui.enable" : False, "driverMemory" : "8MB", "partitionSize" : "256KB"}) def test_concat(self): c = Context(self.conf) diff --git a/tuplex/python/tests/test_tuples.py b/tuplex/python/tests/test_tuples.py index 7d3374808..b06865b70 100644 --- a/tuplex/python/tests/test_tuples.py +++ b/tuplex/python/tests/test_tuples.py @@ -11,11 +11,13 @@ import unittest from tuplex import * +from .helper import test_options class TestTuples(unittest.TestCase): def setUp(self): - self.conf = {"webui.enable" : False, "driverMemory" : "16MB", "partitionSize" : "256KB"} + self.conf = test_options() + self.conf.update({"webui.enable" : False, "driverMemory" : "16MB", "partitionSize" : "256KB"}) def testEmptyTupleI(self): c = Context(self.conf) diff --git a/tuplex/python/tests/test_webui.py b/tuplex/python/tests/test_webui.py index a584048d7..f5ef9c5bc 100644 --- a/tuplex/python/tests/test_webui.py +++ b/tuplex/python/tests/test_webui.py @@ -16,6 +16,7 @@ import logging import urllib.request +from .helper import test_options class TestWebUI(unittest.TestCase): @@ -24,11 +25,11 @@ def setUpClass(cls): logging.basicConfig(format='%(asctime)s %(message)s', level=logging.DEBUG) localhost_ip = '127.0.0.1' - + conf = test_options() # bug in logging redirect? - conf ={"webui.enable": True, "driverMemory": "8MB", "executorMemory" : "1MB", + conf.update({"webui.enable": True, "driverMemory": "8MB", "executorMemory" : "1MB", "partitionSize": "256KB", "tuplex.redirectToPythonLogging": True, - "webui.mongodb.url": "localhost", "webui.url" : localhost_ip} + "webui.mongodb.url": "localhost", "webui.url" : localhost_ip}) logging.debug('WebUI Test setUpClass called') cls.context = Context(conf) diff --git a/tuplex/python/tuplex/utils/common.py b/tuplex/python/tuplex/utils/common.py index f241aac4f..9e38b5bc7 100644 --- a/tuplex/python/tuplex/utils/common.py +++ b/tuplex/python/tuplex/utils/common.py @@ -12,8 +12,7 @@ import sys import collections -if sys.version_info.major == 3 and sys.version_info.minor >= 10: - import collections.abc +import collections.abc import pathlib import signal @@ -214,17 +213,11 @@ def flatten_dict(d, sep='.', parent_key=''): for key, val in d.items(): new_key = parent_key + sep + key if parent_key else key - # Python 3.10+ moved MutableMapping to collections.abc.MutableMapping - if sys.version_info.major == 3 and sys.version_info.minor >= 10: - if isinstance(val, collections.abc.MutableMapping): - items.extend(flatten_dict(val, sep, new_key).items()) - else: - items.append((new_key, val)) + # Python 3.10+ moved MutableMapping to collections.abc.MutableMapping permanently + if isinstance(val, collections.abc.MutableMapping): + items.extend(flatten_dict(val, sep, new_key).items()) else: - if isinstance(val, collections.MutableMapping): - items.extend(flatten_dict(val, sep, new_key).items()) - else: - items.append((new_key, val)) + items.append((new_key, val)) return dict(items) From f05fbbde7532d3388d738be9ef078d4427cd42fd Mon Sep 17 00:00:00 2001 From: Leonhard Spiegelberg Date: Tue, 4 Oct 2022 23:20:58 -0400 Subject: [PATCH 16/18] relative path fix --- tuplex/python/tests/test_aggregates.py | 2 +- tuplex/python/tests/test_arithmetic.py | 2 +- tuplex/python/tests/test_closure.py | 2 +- tuplex/python/tests/test_columns.py | 2 +- tuplex/python/tests/test_csv.py | 2 +- tuplex/python/tests/test_dictionaries.py | 2 +- tuplex/python/tests/test_exceptions.py | 2 +- tuplex/python/tests/test_fallback.py | 2 +- tuplex/python/tests/test_filter.py | 2 +- tuplex/python/tests/test_index.py | 2 +- tuplex/python/tests/test_inspect.py | 2 +- tuplex/python/tests/test_is.py | 2 +- tuplex/python/tests/test_lists.py | 2 +- tuplex/python/tests/test_logical.py | 2 +- tuplex/python/tests/test_metrics.py | 2 +- tuplex/python/tests/test_multi.py | 2 +- tuplex/python/tests/test_nulls.py | 2 +- tuplex/python/tests/test_parallelize.py | 2 +- tuplex/python/tests/test_resolve.py | 2 +- tuplex/python/tests/test_srcextract.py | 2 +- tuplex/python/tests/test_strings.py | 2 +- tuplex/python/tests/test_tuples.py | 2 +- tuplex/python/tests/test_webui.py | 2 +- 23 files changed, 23 insertions(+), 23 deletions(-) diff --git a/tuplex/python/tests/test_aggregates.py b/tuplex/python/tests/test_aggregates.py index c04b34e54..84268e677 100644 --- a/tuplex/python/tests/test_aggregates.py +++ b/tuplex/python/tests/test_aggregates.py @@ -16,7 +16,7 @@ from tuplex import * import typing import os -from .helper import test_options +from helper import test_options class TestAggregates(unittest.TestCase): def setUp(self): diff --git a/tuplex/python/tests/test_arithmetic.py b/tuplex/python/tests/test_arithmetic.py index 8bb47a5e0..d31e3e007 100644 --- a/tuplex/python/tests/test_arithmetic.py +++ b/tuplex/python/tests/test_arithmetic.py @@ -14,7 +14,7 @@ import random import numpy as np from tuplex import * -from .helper import test_options +from helper import test_options class TestArithmetic(unittest.TestCase): def setUp(self): diff --git a/tuplex/python/tests/test_closure.py b/tuplex/python/tests/test_closure.py index 1d9d6c754..13f82f024 100644 --- a/tuplex/python/tests/test_closure.py +++ b/tuplex/python/tests/test_closure.py @@ -12,7 +12,7 @@ from unittest import TestCase import tuplex import time -from .helper import test_options +from helper import test_options class TestClosure(TestCase): diff --git a/tuplex/python/tests/test_columns.py b/tuplex/python/tests/test_columns.py index e84ab431b..facb48b34 100644 --- a/tuplex/python/tests/test_columns.py +++ b/tuplex/python/tests/test_columns.py @@ -12,7 +12,7 @@ from unittest import TestCase import tuplex -from .helper import test_options +from helper import test_options class TestColumns(TestCase): diff --git a/tuplex/python/tests/test_csv.py b/tuplex/python/tests/test_csv.py index 314dda905..d069f4f76 100644 --- a/tuplex/python/tests/test_csv.py +++ b/tuplex/python/tests/test_csv.py @@ -12,7 +12,7 @@ import unittest import os from tuplex import * -from .helper import test_options +from helper import test_options class TestCSV(unittest.TestCase): diff --git a/tuplex/python/tests/test_dictionaries.py b/tuplex/python/tests/test_dictionaries.py index f3910fad6..58a52cda3 100644 --- a/tuplex/python/tests/test_dictionaries.py +++ b/tuplex/python/tests/test_dictionaries.py @@ -12,7 +12,7 @@ import unittest from tuplex import * from math import isclose -from .helper import test_options +from helper import test_options class TestDictionaries(unittest.TestCase): diff --git a/tuplex/python/tests/test_exceptions.py b/tuplex/python/tests/test_exceptions.py index 3000779cc..210a351f8 100644 --- a/tuplex/python/tests/test_exceptions.py +++ b/tuplex/python/tests/test_exceptions.py @@ -13,7 +13,7 @@ from tuplex import Context from random import randint, sample, shuffle from math import floor -from .helper import test_options +from helper import test_options class TestExceptions(unittest.TestCase): diff --git a/tuplex/python/tests/test_fallback.py b/tuplex/python/tests/test_fallback.py index da806a5b6..1019875d2 100644 --- a/tuplex/python/tests/test_fallback.py +++ b/tuplex/python/tests/test_fallback.py @@ -12,7 +12,7 @@ import unittest from tuplex import * import numpy as np -from .helper import test_options +from helper import test_options # test fallback functionality, i.e. executing cloudpickled code class TestFallback(unittest.TestCase): diff --git a/tuplex/python/tests/test_filter.py b/tuplex/python/tests/test_filter.py index 382c65860..32ca16c05 100644 --- a/tuplex/python/tests/test_filter.py +++ b/tuplex/python/tests/test_filter.py @@ -11,7 +11,7 @@ import unittest from tuplex import * -from .helper import test_options +from helper import test_options # test filter functionality class TestFilter(unittest.TestCase): diff --git a/tuplex/python/tests/test_index.py b/tuplex/python/tests/test_index.py index ddf0bd984..8b6718d6b 100644 --- a/tuplex/python/tests/test_index.py +++ b/tuplex/python/tests/test_index.py @@ -11,7 +11,7 @@ import unittest from tuplex import * -from .helper import test_options +from helper import test_options class TestTuples(unittest.TestCase): diff --git a/tuplex/python/tests/test_inspect.py b/tuplex/python/tests/test_inspect.py index 9d1a5f9d8..249d1d768 100644 --- a/tuplex/python/tests/test_inspect.py +++ b/tuplex/python/tests/test_inspect.py @@ -12,7 +12,7 @@ import typing import unittest from tuplex import * -from .helper import test_options +from helper import test_options # test filter functionality class TestInspection(unittest.TestCase): diff --git a/tuplex/python/tests/test_is.py b/tuplex/python/tests/test_is.py index 31fa00ad2..f2c8ad898 100644 --- a/tuplex/python/tests/test_is.py +++ b/tuplex/python/tests/test_is.py @@ -1,7 +1,7 @@ import tuplex from unittest import TestCase -from .helper import test_options +from helper import test_options """ Tests functionality for `is` keyword. diff --git a/tuplex/python/tests/test_lists.py b/tuplex/python/tests/test_lists.py index 0c9c25d6a..de4d6bd25 100644 --- a/tuplex/python/tests/test_lists.py +++ b/tuplex/python/tests/test_lists.py @@ -12,7 +12,7 @@ import unittest from tuplex import * from math import isclose -from .helper import test_options +from helper import test_options class TestLists(unittest.TestCase): diff --git a/tuplex/python/tests/test_logical.py b/tuplex/python/tests/test_logical.py index 0a89932be..fb2d32b5a 100644 --- a/tuplex/python/tests/test_logical.py +++ b/tuplex/python/tests/test_logical.py @@ -11,7 +11,7 @@ import unittest from tuplex import * -from .helper import test_options +from helper import test_options class TestLogical(unittest.TestCase): diff --git a/tuplex/python/tests/test_metrics.py b/tuplex/python/tests/test_metrics.py index 25ed906b3..c4c2d3a02 100644 --- a/tuplex/python/tests/test_metrics.py +++ b/tuplex/python/tests/test_metrics.py @@ -11,7 +11,7 @@ import unittest from tuplex import * -from .helper import test_options +from helper import test_options # this test is a basic test to make sure that times/exceptions for a project are # working correctly diff --git a/tuplex/python/tests/test_multi.py b/tuplex/python/tests/test_multi.py index c88df694a..1a5259e1e 100644 --- a/tuplex/python/tests/test_multi.py +++ b/tuplex/python/tests/test_multi.py @@ -11,7 +11,7 @@ import unittest from tuplex import * -from .helper import test_options +from helper import test_options # this test is addressed on issues with the framework usage. I.e. whether data is kept correctly # in memory for parallelize diff --git a/tuplex/python/tests/test_nulls.py b/tuplex/python/tests/test_nulls.py index be66aae6a..2121b9332 100644 --- a/tuplex/python/tests/test_nulls.py +++ b/tuplex/python/tests/test_nulls.py @@ -11,7 +11,7 @@ import unittest from tuplex import * -from .helper import test_options +from helper import test_options class TestNulls(unittest.TestCase): diff --git a/tuplex/python/tests/test_parallelize.py b/tuplex/python/tests/test_parallelize.py index da3272f5f..0d3b63544 100644 --- a/tuplex/python/tests/test_parallelize.py +++ b/tuplex/python/tests/test_parallelize.py @@ -11,7 +11,7 @@ import unittest from tuplex import * -from .helper import test_options +from helper import test_options # all of these below should be executed with faster, optimized serialization code class TestFastParallelize(unittest.TestCase): diff --git a/tuplex/python/tests/test_resolve.py b/tuplex/python/tests/test_resolve.py index 845e4d130..e447234cc 100644 --- a/tuplex/python/tests/test_resolve.py +++ b/tuplex/python/tests/test_resolve.py @@ -12,7 +12,7 @@ from unittest import TestCase import tuplex import time -from .helper import test_options +from helper import test_options class TestResolveMechanism(TestCase): diff --git a/tuplex/python/tests/test_srcextract.py b/tuplex/python/tests/test_srcextract.py index 34b19814c..dedee473d 100644 --- a/tuplex/python/tests/test_srcextract.py +++ b/tuplex/python/tests/test_srcextract.py @@ -14,7 +14,7 @@ from tuplex.utils.reflection import get_source, get_globals, supports_lambda_closure from notebook_utils import get_jupyter_function_code -from .helper import test_options +from helper import test_options SOME_CONSTANT_TO_EXTRACT=42 diff --git a/tuplex/python/tests/test_strings.py b/tuplex/python/tests/test_strings.py index c537339c7..1a1236cf2 100644 --- a/tuplex/python/tests/test_strings.py +++ b/tuplex/python/tests/test_strings.py @@ -11,7 +11,7 @@ import unittest from tuplex import * -from .helper import test_options +from helper import test_options class TestString(unittest.TestCase): diff --git a/tuplex/python/tests/test_tuples.py b/tuplex/python/tests/test_tuples.py index b06865b70..2c96edec7 100644 --- a/tuplex/python/tests/test_tuples.py +++ b/tuplex/python/tests/test_tuples.py @@ -11,7 +11,7 @@ import unittest from tuplex import * -from .helper import test_options +from helper import test_options class TestTuples(unittest.TestCase): diff --git a/tuplex/python/tests/test_webui.py b/tuplex/python/tests/test_webui.py index f5ef9c5bc..ac8651230 100644 --- a/tuplex/python/tests/test_webui.py +++ b/tuplex/python/tests/test_webui.py @@ -16,7 +16,7 @@ import logging import urllib.request -from .helper import test_options +from helper import test_options class TestWebUI(unittest.TestCase): From f48be6320d3ba319dc416bc07d9b15125598f5ad Mon Sep 17 00:00:00 2001 From: Leonhard Spiegelberg Date: Wed, 5 Oct 2022 00:41:01 -0400 Subject: [PATCH 17/18] log update --- tuplex/python/tests/test_webui.py | 2 +- tuplex/python/tuplex/utils/common.py | 32 ++++++++++++++++++++++++---- 2 files changed, 29 insertions(+), 5 deletions(-) diff --git a/tuplex/python/tests/test_webui.py b/tuplex/python/tests/test_webui.py index ac8651230..158dbeb22 100644 --- a/tuplex/python/tests/test_webui.py +++ b/tuplex/python/tests/test_webui.py @@ -27,7 +27,7 @@ def setUpClass(cls): localhost_ip = '127.0.0.1' conf = test_options() # bug in logging redirect? - conf.update({"webui.enable": True, "driverMemory": "8MB", "executorMemory" : "1MB", + conf.update({"webui.enable": True, "executorCount": 1, "driverMemory": "8MB", "executorMemory" : "1MB", "partitionSize": "256KB", "tuplex.redirectToPythonLogging": True, "webui.mongodb.url": "localhost", "webui.url" : localhost_ip}) diff --git a/tuplex/python/tuplex/utils/common.py b/tuplex/python/tuplex/utils/common.py index 9e38b5bc7..10227e637 100644 --- a/tuplex/python/tuplex/utils/common.py +++ b/tuplex/python/tuplex/utils/common.py @@ -782,7 +782,7 @@ def find_or_start_webui(mongo_uri, hostname, port, web_logfile): p_stderr = p_stderr.decode() if len(p_stderr.strip()) > 0: - raise Exception('mongod produced following errors: {}'.format(p_stderr)) + raise Exception('gunicorn produced following errors: {}'.format(p_stderr)) logging.info('Gunicorn locally started...') @@ -800,9 +800,33 @@ def find_or_start_webui(mongo_uri, hostname, port, web_logfile): logging.debug('Polling for Gunicorn PID... -- {:.2f}s of poll time left'.format( TIME_LIMIT - (time.time() - start_time))) - # Read PID file - with open(PID_FILE, 'r') as fp: - ui_pid = int(fp.read()) + ui_pid = None + try: + # Read PID file + with open(PID_FILE, 'r') as fp: + ui_pid = int(fp.read()) + except Exception as e: + logging.debug("failed to retrieve PID for WebUI, details: {}".format(e)) + + non_daemon_log = 'timeout - no log' + # something went wrong with starting gunicorn. Try to capture some meaningful output and abort + try: + cmd = ['gunicorn', '--worker-class', 'eventlet', '--chdir', ui_basedir, '--pid', PID_FILE, + '--log-file', '-', '-b', gunicorn_host, 'thserver:app'] + process = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, env=ui_env) + # set a timeout of 5 seconds to keep everything interactive + p_stdout, p_stderr = process.communicate(timeout=5) + + # decode + p_stdout = p_stdout.decode() + p_stderr = p_stderr.decode() + + non_daemon_log = p_stdout + '\n' + p_stderr + except subprocess.TimeoutExpired: + pass + logging.error('Gunicorn process log:\n' + non_daemon_log) + raise Exception("Failed to start gunicorn daemon, non-daemon run yielded:\n{}".format(non_daemon_log)) + assert ui_pid is not None, 'Invalid PID for WebUI' logging.info('Gunicorn PID={}'.format(ui_pid)) From 5607f42dbc72b9e8da3733faf01e019254d282dd Mon Sep 17 00:00:00 2001 From: Leonhard Spiegelberg Date: Wed, 5 Oct 2022 01:04:25 -0400 Subject: [PATCH 18/18] abspaths for gunicorn and creating necessary dirs --- tuplex/python/tuplex/utils/common.py | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/tuplex/python/tuplex/utils/common.py b/tuplex/python/tuplex/utils/common.py index 10227e637..ea9dcf51e 100644 --- a/tuplex/python/tuplex/utils/common.py +++ b/tuplex/python/tuplex/utils/common.py @@ -35,6 +35,7 @@ import tempfile import time import shlex +import pathlib try: import pwd @@ -768,6 +769,17 @@ def find_or_start_webui(mongo_uri, hostname, port, web_logfile): ui_env = os.environ ui_env['MONGO_URI'] = mongo_uri gunicorn_host = '{}:{}'.format(hostname.replace('http://', '').replace('https://', ''), port) + + # need to convert everything to absolute paths (b.c. gunicorn fails else) + web_logfile = os.path.abspath(web_logfile) + ui_basedir = os.path.abspath(ui_basedir) + # also make sure parent dir of web_logfile exists + try: + wl_path = pathlib.Path(web_logfile).parent + os.makedirs(str(wl_path), exist_ok=True) + except Exception as e: + logging.error("ensuring parent dir of {} exists, failed with {}".format(web_logfile, e)) + cmd = ['gunicorn', '--daemon', '--worker-class', 'eventlet', '--chdir', ui_basedir, '--pid', PID_FILE, '--log-file', web_logfile, '-b', gunicorn_host, 'thserver:app'] @@ -790,7 +802,7 @@ def find_or_start_webui(mongo_uri, hostname, port, web_logfile): ui_pid = None # Writing the PID might require some time for gunicorn, therefore poll the temp file for up to 2s - TIME_LIMIT = 2 + TIME_LIMIT = 3 start_time = time.time() while time.time() - start_time < TIME_LIMIT: if not os.path.isfile(PID_FILE) or os.stat(PID_FILE).st_size == 0: