这是indexloc提供的服务,不要输入任何密码
Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
53 commits
Select commit Hold shift + click to select a range
0f26da6
init
its-colby Jul 15, 2021
852a6a9
seems to be working
its-colby Jul 16, 2021
7c21a1b
added small changes to reflect Leonhard's comments on this PR
its-colby Jul 18, 2021
832e917
accidental error in last commit
its-colby Jul 18, 2021
49ab7b3
fixed webui documentation webui -> webui.enable
its-colby Jul 21, 2021
2ae7426
udpated footer
LeonhardFS Sep 21, 2021
63f9f6c
Merge branch 'master' of https://github.com/tuplex/tuplex into histor…
LeonhardFS Sep 21, 2021
ebf2e28
Merge remote-tracking branch 'upstream/master' into historyserver
LeonhardFS Oct 2, 2021
3b28a74
Merge branch 'master' into historyserver
LeonhardFS Oct 18, 2021
d1adf01
Merge branch 'historyserver' of https://github.com/colby-anderson/tup…
LeonhardFS Oct 25, 2021
16adf5d
packaging webui with tuplex
LeonhardFS Oct 26, 2021
d8da913
improving startup script
LeonhardFS Oct 26, 2021
bd0b518
adding helpers to autostart mongodb/webui
LeonhardFS Oct 27, 2021
d18873d
dev
LeonhardFS Oct 27, 2021
2b3ce67
merged in master
LeonhardFS Oct 27, 2021
f3c4a25
autostart mongodb in case via python
LeonhardFS Oct 28, 2021
a494360
helper functions to startup webui
LeonhardFS Oct 28, 2021
9a584c0
Merge branch 'master' into webui
LeonhardFS Oct 28, 2021
fb8fb6e
proper gunicorn shutdown
LeonhardFS Oct 28, 2021
3c3d3dd
auto start webui
LeonhardFS Oct 28, 2021
2d6d476
printing webui hint
LeonhardFS Oct 28, 2021
5a03517
adding psutil to dependencies
LeonhardFS Oct 28, 2021
1fd7b0c
webui autostart works now
LeonhardFS Oct 28, 2021
8d1619d
updating dependencies, newer astor required
LeonhardFS Oct 28, 2021
d2365c7
fix
LeonhardFS Oct 29, 2021
8b85d0f
fix "." to not be detected as floating point number
LeonhardFS Oct 29, 2021
74d5328
Merge branch 'webui' of github.com:LeonhardFS/tuplex-public into webui
LeonhardFS Oct 29, 2021
522f88c
fixing code smell bug
LeonhardFS Oct 29, 2021
f8428c4
basic testing for WebUI
LeonhardFS Nov 1, 2021
4af5ded
Merge branch 'master' into webui
LeonhardFS Nov 1, 2021
b345c94
fix for options
LeonhardFS Nov 1, 2021
cf1a9d7
Merge branch 'webui' of github.com:LeonhardFS/tuplex-public into webui
LeonhardFS Nov 1, 2021
171eb3d
docker changes to include MongoDB in CI container
LeonhardFS Nov 1, 2021
dd9c564
refactor name to avoid pytest to pick up connect test function
LeonhardFS Nov 1, 2021
566b2b0
Merge branch 'webui' of github.com:LeonhardFS/tuplex-public into webui
LeonhardFS Nov 1, 2021
9e0bc7a
PR cleanup
LeonhardFS Nov 1, 2021
f581de3
passing extra CMake args to top-level setup
LeonhardFS Nov 1, 2021
0405059
updating root level setup.py to build tests as well when right option…
LeonhardFS Nov 2, 2021
d029b58
added mongodb script
LeonhardFS Nov 2, 2021
dc5aeb1
CI update
LeonhardFS Nov 2, 2021
1afbe14
script update
LeonhardFS Nov 2, 2021
f685c98
updating azure
LeonhardFS Nov 2, 2021
b904cc7
CI fix
LeonhardFS Nov 2, 2021
18b0d70
remove debug step
LeonhardFS Nov 2, 2021
4efb2b7
ci fix 2
LeonhardFS Nov 2, 2021
39e1463
fix
LeonhardFS Nov 2, 2021
e19e878
option fix
LeonhardFS Nov 2, 2021
df24c54
dependency update for webui test
LeonhardFS Nov 2, 2021
7242c5d
more logging
LeonhardFS Nov 2, 2021
4a24f45
fixes
LeonhardFS Nov 2, 2021
071889c
flask fix?
LeonhardFS Nov 3, 2021
5b53ded
fixing flask version
LeonhardFS Nov 3, 2021
1431b1c
refactor according to old PR comments
LeonhardFS Nov 3, 2021
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 6 additions & 6 deletions azure-pipelines.yml
Original file line number Diff line number Diff line change
Expand Up @@ -18,15 +18,15 @@ jobs:
- job: 'tuplex'
timeoutInMinutes: 180
steps:
- script: sudo bash scripts/ubuntu1804/install_mongodb.sh
displayName: 'Install MongoDB'
- script: sudo bash scripts/ubuntu1804/install_reqs.sh
displayName: 'Install required packages'
- script: sudo apt-get install python3-setuptools && sudo apt-get remove python-pexpect python3-pexpect && sudo python3.7 -m pip install --upgrade pip && sudo python3.7 -m pip uninstall -y pygments && sudo python3.7 -m pip install pygments>=2.4.1 pexpect setuptools astor PyYAML jupyter nbformat && jupyter --version
- script: sudo apt-get install -y python3-setuptools ninja-build && sudo apt-get remove -y python-pexpect python3-pexpect && sudo python3.7 -m pip install --upgrade pip && sudo python3.7 -m pip uninstall -y pygments && sudo python3.7 -m pip install pytest pygments>=2.4.1 pexpect setuptools astor PyYAML jupyter nbformat pymongo eventlet==0.30.0 gunicorn pymongo && jupyter --version
displayName: 'Install python dependencies'
- script: cd tuplex && mkdir build && cd build && cmake -DBUILD_WITH_ORC=ON -DLLVM_ROOT_DIR=/usr/lib/llvm-9 -DCMAKE_BUILD_TYPE=Release -DBUILD_FOR_CI=ON .. && make -j$(nproc)
- script: TUPLEX_BUILD_ALL=1 CMAKE_ARGS="-DBUILD_WITH_ORC=ON -DLLVM_ROOT_DIR=/usr/lib/llvm-9 -DCMAKE_BUILD_TYPE=Release -DBUILD_FOR_CI=ON" python3 setup.py install --user
displayName: 'Build Tuplex'
- script: cd tuplex && cd build && ctest --timeout 180 --output-on-failure
- script: cd build/temp.linux-x86_64-3.7 && ctest --timeout 180 --output-on-failure
displayName: 'C++ tests'
- script: cd tuplex/build/dist/python && python3.7 setup.py install --user && rm -rf build/third_party && sudo rm -rf tmp/*
displayName: 'Install tuplex package and clear tmp files'
- script: cd tuplex/build/dist/python && python3.7 -m pytest -x --full-trace -l
- script: cd build/temp.linux-x86_64-3.7/dist/python && python3.7 -m pytest -x --full-trace -l --log-cli-level debug
displayName: 'Python tests'
11 changes: 10 additions & 1 deletion scripts/build_wheel_linux.sh
Original file line number Diff line number Diff line change
@@ -1,7 +1,13 @@
#!/usr/bin/env bash
# this script invokes the cibuildwheel process with necessary env variables to build the wheel for linux/docker

# check from where script is invoked
CWD="$(cd -P -- "$(dirname -- "${BASH_SOURCE[0]}")" && pwd -P)"

echo "Executing buildwheel script located in $CWD"
pushd $CWD > /dev/null
cd ..

# delete dir if exists
rm -rf wheelhouse
# delete in tree build files
Expand All @@ -10,6 +16,7 @@ rm -rf tuplex/python/tuplex/libexec/tuplex*.so

# CIBUILDWHEEL CONFIGURATION
export CIBUILDWHEEL=1
export TUPLEX_BUILD_ALL=0
export CIBW_ARCHS_LINUX=native
export CIBW_MANYLINUX_X86_64_IMAGE='registry-1.docker.io/tuplex/ci:latest'

Expand All @@ -29,4 +36,6 @@ export CIBW_BUILD_VERBOSITY=3
export CIBW_PROJECT_REQUIRES_PYTHON=">=3.7"
cibuildwheel --platform linux .

cd scripts
popd > /dev/null

echo "Done!"
6 changes: 6 additions & 0 deletions scripts/docker/ci/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,12 @@ RUN python3.10 -m pip install cloudpickle # numpy # pandas
# tuplex requirements
RUN bash /opt/sbin/install_tuplex_reqs.sh


## MongoDB community edition for WebUI testing
ADD mongodb-org-5.0.repo /etc/yum.repos.d/mongodb-org-5.0.repo
RUN yum update -y && yum install -y mongodb-org


# remove all the tmp stuff
RUN rm -rf /tmp/*

Expand Down
7 changes: 7 additions & 0 deletions scripts/docker/ci/mongodb-org-5.0.repo
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
[mongodb-org-5.0]
name=MongoDB Repository
baseurl=https://repo.mongodb.org/yum/redhat/$releasever/mongodb-org/5.0/x86_64/
gpgcheck=1
enabled=1
gpgkey=https://www.mongodb.org/static/pgp/server-5.0.asc

10 changes: 10 additions & 0 deletions scripts/ubuntu1804/install_mongodb.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
#!/usr/bin/env bash
# installs MongoDB instance
# adapted from https://www.digitalocean.com/community/tutorials/how-to-install-mongodb-on-ubuntu-18-04-source
# and https://docs.mongodb.com/manual/tutorial/install-mongodb-on-ubuntu/
# needs sudo

curl -fsSL https://www.mongodb.org/static/pgp/server-5.0.asc | apt-key add -
echo "deb [ arch=amd64,arm64 ] https://repo.mongodb.org/apt/ubuntu bionic/mongodb-org/5.0 multiverse" | sudo tee /etc/apt/sources.list.d/mongodb-org-5.0.list
apt update
apt install -y mongodb-org
131 changes: 119 additions & 12 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,14 +7,23 @@
import subprocess
import logging
import shutil
import distutils
import distutils.dir_util
import platform
import shlex
import shutil

from setuptools import setup, Extension, find_packages
from setuptools.command.build_ext import build_ext
from distutils import sysconfig

import fnmatch
import re
import atexit

# configure logging here
logging.basicConfig(level=logging.INFO)


# TODO: add option to install these
test_dependencies = [
Expand All @@ -24,6 +33,16 @@
'pytest>=5.3.2',
]

# Also requires to install MongoDB
webui_dependencies = [
'Flask>=2.0.2',
'gunicorn',
'eventlet==0.30.0', # newer versions of eventlet have a bug under MacOS
'flask-socketio',
'flask-pymongo',
'iso8601'
]

install_dependencies = [
'attrs>=19.2.0',
'dill>=0.2.7.1',
Expand All @@ -32,12 +51,14 @@
'pygments>=2.4.1',
'six>=1.11.0',
'wcwidth>=0.1.7',
'astor>=0.7.1',
'astor',
'prompt_toolkit',
'jedi',
'cloudpickle>=0.6.1',
'PyYAML>=3.13'
]
'PyYAML>=3.13',
'psutil',
'pymongo'
] + webui_dependencies

def ninja_installed():
# check whether ninja is on the path
Expand All @@ -52,6 +73,17 @@ def find_files(pattern, path):
result.append(os.path.join(root, name))
return result

def remove_temp_files(build_dir):
"""
remove temp cmake files but LEAVE files necessary to run ctest.
"""
paths = set(os.listdir(build_dir)) - {'dist', 'test', 'CTestTestfile.cmake'}
paths = map(lambda name: os.path.join(build_dir, name), paths)
for path in paths:
if os.path.isfile(path):
os.remove(path)
else:
shutil.rmtree(path)

# Convert distutils Windows platform specifiers to CMake -A arguments
PLAT_TO_CMAKE = {
Expand All @@ -72,6 +104,7 @@ def __init__(self, name, sourcedir=""):
class CMakeBuild(build_ext):

def build_extension(self, ext):

ext_filename = str(ext.name)
ext_filename = ext_filename[ext_filename.rfind('.') + 1:] # i.e. this is "tuplex"
extdir = os.path.abspath(os.path.dirname(self.get_ext_fullpath(ext.name)))
Expand Down Expand Up @@ -227,17 +260,58 @@ def find_pkg_path(lines):

# because the goal of setup.py is to only build the package, build only target tuplex.
# changed from before.
build_args += ['--target', 'tuplex']

def parse_bool_option(key):
val = os.environ.get(key, None)
if not val:
return False
if val.lower() == 'on' or val.lower() == 'yes' or val.lower() == 'true' or val.lower() == '1':
return True
if val.lower() == 'off' or val.lower() == 'no' or val.lower() == 'false' or val.lower() == '0':
return True
return False


BUILD_ALL = parse_bool_option('TUPLEX_BUILD_ALL')
if BUILD_ALL is True:
# build everything incl. all google tests...
logging.info('Building all Tuplex targets (incl. tests)...')
else:
# restrict to shared object only...
logging.info('Building only shared objects...')
build_args += ['--target', 'tuplex']

# hack: only run for first invocation!
if ext_filename == 'tuplex_runtime':
return

print('configuring cmake with: {}'.format(' '.join(["cmake", ext.sourcedir] + cmake_args)))
# check environment variable CMAKE_ARGS and overwrite whichever args are passed there
if len(os.environ.get('CMAKE_ARGS', '')) > 0:
extra_args = shlex.split(os.environ['CMAKE_ARGS'])

print(cmake_args)
for arg in extra_args:
# cmake option in the style of -D/-G=?
m = re.search("-[DG][a-zA-z_]+=", arg)
if m:
# search for substring in existing args, if found replace!
idxs = list(filter(lambda t: t[0].lower().strip().startswith(m[0].lower()), zip(cmake_args, range(len(cmake_args)))))
if len(idxs) > 0:
idx = idxs[0][1]
cmake_args[idx] = arg
else:
# append!
cmake_args.append(arg)
else:
# append
cmake_args.append(arg)

logging.info('configuring cmake with: {}'.format(' '.join(["cmake", ext.sourcedir] + cmake_args)))
logging.info('compiling with: {}'.format(' '.join(["cmake", "--build", "."] + build_args)))
subprocess.check_call(
["cmake", ext.sourcedir] + cmake_args, cwd=self.build_temp
)
print('compiling with: {}'.format(' '.join(["cmake", "--build", "."] + build_args)))
logging.info('configuration done, workdir={}'.format(self.build_temp))
subprocess.check_call(
["cmake", "--build", "."] + build_args, cwd=self.build_temp
)
Expand Down Expand Up @@ -286,10 +360,19 @@ def find_pkg_path(lines):

# run clean, to reclaim space
# also remove third_party folder, because it is big!
print('running cmake clean target to reclaim space')
subprocess.check_call(
['cmake', '--build', '.', '--target', 'clean'], cwd=self.build_temp
)

# this will remove test executables as well...
if not BUILD_ALL:
logging.info('Running cmake clean target to reclaim space')
subprocess.check_call(
['cmake', '--build', '.', '--target', 'clean'], cwd=self.build_temp
)
else:
# when build all is hit, preserve test files
# i.e. need folders test, dist and CTestTestfile.cmake
logging.info('Removing temporary build files, preserving test files...')
remove_temp_files(self.build_temp)

subprocess.check_call(
['rm', '-rf', 'third_party'], cwd=self.build_temp
)
Expand Down Expand Up @@ -334,6 +417,26 @@ def read_readme():
long_description = f.read()
return long_description

def reorg_historyserver():
"""
reorganize historyserver to become part of pip package.
"""
# get absolute path of this file's location
import pathlib
current_path = pathlib.Path(__file__).parent.resolve()
assert os.path.exists(os.path.join(current_path, 'tuplex', 'historyserver')), 'Could not find historyserver root dir'

# copy all the files from history server to directory historyserver under tuplex/python
src_path = os.path.join(current_path, 'tuplex', 'historyserver')
dst_path = os.path.join(current_path, 'tuplex', 'python', 'tuplex', 'historyserver')
distutils.dir_util.copy_tree(src_path, dst_path)

# at-exit, delete
def remove_history():
shutil.rmtree(dst_path)
atexit.register(remove_history)

return []

# The information here can also be placed in setup.cfg - better separation of
# logic and declaration, and simpler if you include description/version in a file.
Expand All @@ -346,11 +449,14 @@ def read_readme():
"together with a query compiler featuring whole-stage code generation and optimization.",
long_description=read_readme(),
long_description_content_type='text/markdown',
packages=discover_packages(where="tuplex/python"),
packages=reorg_historyserver() + discover_packages(where="tuplex/python"),
package_dir={"": "tuplex/python"},
package_data={
# include libs in libexec
'tuplex.libexec' : ['*.so', '*.dylib']
'tuplex.libexec' : ['*.so', '*.dylib'],
'tuplex.historyserver': ['thserver/templates/*.html', 'thserver/static/css/*.css', 'thserver/static/css/styles/*.css',
'thserver/static/img/*.*', 'thserver/static/js/*.js', 'thserver/static/js/modules/*.js',
'thserver/static/js/styles/*.css']
},
ext_modules=[CMakeExtension("tuplex.libexec.tuplex", "tuplex"), CMakeExtension("tuplex.libexec.tuplex_runtime", "tuplex")],
cmdclass={"build_ext": CMakeBuild},
Expand Down Expand Up @@ -385,6 +491,7 @@ def read_readme():
'Programming Language :: Python :: 3.8',
'Programming Language :: Python :: 3.9',
],
scripts=['tuplex/historyserver/bin/tuplex-webui'],
project_urls={
"Bug Tracker": "https://github.com/tuplex",
"Documentation": "https://tuplex.cs.brown.edu/python-api.html",
Expand Down
6 changes: 6 additions & 0 deletions tuplex/core/include/ContextOptions.h
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,12 @@ namespace tuplex {
return get("tuplex.aws.scratchDir");
}

/*!
* return options as JSON string (string,string keys)
* @return JSON
*/
std::string asJSON() const;

/*!
* saves current configuration object to yaml file
* @param uri path where to store the data
Expand Down
2 changes: 1 addition & 1 deletion tuplex/core/include/ee/local/LocalBackend.h
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ namespace tuplex {
* @param combine whether this is an aggregate (e.g. if we should call the aggregate combiner, rather than simply merging the hashtables)
* @return the final hashtable sink
*/
HashTableSink createFinalHashmap(std::vector<IExecutorTask*>& tasks, int hashtableKeyByteWidth, bool combine);
HashTableSink createFinalHashmap(const std::vector<const IExecutorTask*>& tasks, int hashtableKeyByteWidth, bool combine);

// hash join stage
void executeHashJoinStage(HashJoinStage* hstage);
Expand Down
10 changes: 5 additions & 5 deletions tuplex/core/include/physical/PhysicalPlan.h
Original file line number Diff line number Diff line change
Expand Up @@ -41,8 +41,6 @@ namespace tuplex {

IBackend* backend() const { return _context.backend(); }

std::shared_ptr<HistoryServerConnector> _hs;

// ---- OLD CODE -----
// experimental: AWS backend
LogicalPlan *_lp;
Expand All @@ -67,11 +65,13 @@ namespace tuplex {
std::vector<std::string> outColumns;
};

File2FilePipeline planFile2FilePipeline(const std::vector<LogicalOperator*>& nodes, bool allowUndefinedBehavior);
void executeFile2FilePipelineViaLambda(const File2FilePipeline& pip);
void executeWithParts(const tuplex::PhysicalPlan::File2FilePipeline &pip);
double aggregateSamplingTime() const;
public:
/*!
* gets the number of stages in a physical plan
* @returns number of stages in the physical plan
*/
size_t getNumStages() const { return _num_stages; }

PhysicalPlan(LogicalPlan* optimizedPlan, LogicalPlan* originalPlan, const Context& context);

Expand Down
11 changes: 11 additions & 0 deletions tuplex/core/include/physical/PhysicalStage.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@
#include "ResultSet.h"
#define EOF (-1)
#include <nlohmann/json.hpp>
#include <HistoryServerConnector.h>
#include <logical/LogicalOperator.h>

namespace tuplex {

Expand All @@ -26,6 +28,8 @@ namespace tuplex {
class LogicalPlan;
class Context;
class ResultSet;
class LogicalOperator;
class HistoryServerConnector;

// various sinks/sources/...
enum class EndPointMode {
Expand All @@ -44,16 +48,23 @@ namespace tuplex {
std::vector<PhysicalStage*> _predecessors;
int64_t _number;
std::unordered_map<std::tuple<int64_t, ExceptionCode>, size_t> _ecounts; //! exception counts for this stage.
std::vector<LogicalOperator*> _operators; //! operators belonging to stage.
protected:
IBackend* _backend;
std::shared_ptr<HistoryServerConnector> _historyServer;
public:
void setHistoryServer(std::shared_ptr<HistoryServerConnector> hsc) { _historyServer = hsc; }
PhysicalStage() = delete;
PhysicalStage(PhysicalPlan *plan, IBackend* backend, int64_t number, std::vector<PhysicalStage*> predecessors=std::vector<PhysicalStage*>()) : _plan(plan), _backend(backend), _number(number), _predecessors(predecessors) {
// allow plan/backend to be nullptrs for dummy stage in lambda executor.
}

virtual ~PhysicalStage();

std::vector<LogicalOperator*> operators() const { return _operators; }

void setOperators(std::vector<LogicalOperator*> operators) { _operators = operators; }

std::vector<PhysicalStage*> predecessors() const { return _predecessors; }

/*!
Expand Down
Loading