#!/usr/bin/env python3
# top-level setuo file to create package uploadable to pypi.
# -*- coding: utf-8 -*-
import os
import pathlib
import sys
import sysconfig as pyconfig
import subprocess
import logging
import shutil
import distutils
import distutils.dir_util
import platform
import shlex
import shutil

import setuptools
from setuptools import setup, Extension, find_packages
from setuptools.command.build_ext import build_ext
from distutils import sysconfig

import fnmatch
import re
import atexit

# variables for build configuration
LLVM_CI_ROOT_DIR = '/opt/llvm-16.0.6'

def in_google_colab():
    """
        check whether framework runs in Google Colab environment
    Returns:
        True if Tuplex is running in Google Colab
    """
    found_colab_package = False
    try:
        import google.colab
        found_colab_package = True
    except:
        pass

    shell_name_matching = False
    try:
        shell_name_matching = 'google.colab' in str(get_ipython())
    except:
        pass

    if found_colab_package or shell_name_matching:
        return True
    else:
        return False

# configure logging here
logging.basicConfig(level=logging.INFO)


# fixes for google colab
colab_requirements = ['urllib3==1.26.7']
# urllib3 1.26.7


# TODO: add option to install these
test_dependencies = [
'nbocnvert<7.0',
'jupyter<7.0',
'nbformat<7.0',
'prompt_toolkit>=2.0.7',
'pytest>=5.3.2'
]

# Also requires to install MongoDB
webui_dependencies = [
    'Flask>=2.0.2,<2.2.0',
    'Werkzeug<2.2.0',
    'gunicorn',
    'eventlet==0.30.0', # newer versions of eventlet have a bug under MacOS
    'flask-socketio',
    'flask-pymongo',
    'iso8601'
]

def run_command(cmd, cwd, env):
    """
    run shell command `cmd`
    :param cmd: command to run (list of strings)
    :param cwd: working directory for command
    :param env: environment dictionary
    
    "raises": raise subprocess.Ca
    """

    output, error = None, None
    res = None
    try:
        res = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, cwd=cwd, env=env)
        output, error = res.communicate()
        if output:
            logging.info(f"ret> {res.returncode}")
            logging.info(f"OK> output {output.decode()}")
        if error:
            logging.info(f"ret> {res.returncode}")
            logging.info(f"Error> error {error.decode().strip()}")
    except os.OSError as e:
        logging.error(f"OSError > {e.errno}")
        logging.error(f"OSError > {e.strerror}")
        logging.error(f"OSError > {e.filename}")
    except:
        logging.error("Error > {sys.exc_info()[0]}")
        raise subprocess.CalledProcessError(res.returncode if res else 1, cmd, output, error)

# dependencies for AWS Lambda backend...
# boto is broken currently...
aws_lambda_dependencies = []

# manual fix for google colab
if in_google_colab():
    logging.debug('Building dependencies for Google Colab environment')

    install_dependencies = [
        'urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1',
        'folium==0.2.1'
        'requests',
        'attrs>=19.2.0',
        'dill>=0.2.7.1',
        'pluggy',
        'py>=1.5.2',
        'pygments>=2.4.1',
        'six>=1.11.0',
        'wcwidth>=0.1.7',
        'astor',
        'prompt_toolkit',
        'jedi',
        "cloudpickle>=0.6.1,<2.0.0;python_version<'3.10'",
        "cloudpickle>=2.1.0;python_version=='3.10'",
        "cloudpickle>=2.2.1;python_version>='3.11'",
        'PyYAML>=3.13',
        'psutil',
        'pymongo',
        'iso8601'
    ]
else:
    logging.debug('Building dependencies for non Colab environment')

    install_dependencies = webui_dependencies + [
        'attrs>=19.2.0',
        'dill>=0.2.7.1',
        'pluggy',
        'py>=1.5.2',
        'pygments>=2.4.1',
        'six>=1.11.0',
        'wcwidth>=0.1.7',
        'astor',
        'prompt_toolkit',
        'jedi',
        "cloudpickle>=0.6.1,<2.0.0;python_version<'3.10'",
        "cloudpickle>=2.1.0;python_version=='3.10'",
        "cloudpickle>=2.2.1;python_version>='3.11'",
        'PyYAML>=3.13',
        'psutil',
        'pymongo',
        'iso8601'
    ] + aws_lambda_dependencies

def ninja_installed():
    # check whether ninja is on the path
    from distutils.spawn import find_executable
    return find_executable('ninja') is not None

def find_files(pattern, path):
    result = []
    for root, dirs, files in os.walk(path):
        for name in files:
            if fnmatch.fnmatch(name, pattern):
                result.append(os.path.join(root, name))
    return result

def remove_temp_files(build_dir):
    """
    remove temp cmake files but LEAVE files necessary to run ctest.
    """
    paths = set(os.listdir(build_dir)) - {'dist', 'test', 'CTestTestfile.cmake'}
    paths = map(lambda name: os.path.join(build_dir, name), paths)
    for path in paths:
        if os.path.isfile(path):
            os.remove(path)
        else:
            shutil.rmtree(path)

# Convert distutils Windows platform specifiers to CMake -A arguments
PLAT_TO_CMAKE = {
    "win32": "Win32",
    "win-amd64": "x64",
    "win-arm32": "ARM",
    "win-arm64": "ARM64",
}


# subclassing both install/develop in order to process custom options
from setuptools import Command
import setuptools.command.install
import setuptools.command.develop


# check environment variables and print
build_type = None
if os.environ.get('TUPLEX_BUILD_TYPE', None):
    build_type = os.environ['TUPLEX_BUILD_TYPE']
    logging.info('Found TUPLEX_BUILD_TYPE environment variable, setting C extension build type to {}'.format(build_type))
elif os.environ.get('CMAKE_BUILD_TYPE', None):
    build_type = os.environ['CMAKE_BUILD_TYPE']
    logging.info('Found CMAKE_BUILD_TYPEenvironment variable, setting C extension build type to {}'.format(build_type))
else:
    build_type = 'Release' # per default

supported_modes = ['Debug', 'Release', 'RelWithDebInfo', 'MinSizeRel', 'tsan', 'asan']
if build_type.lower() not in [t.lower() for t in supported_modes]:
    logging.error('Unsupported build type {} found, aborting build.'.format(build_type))
    sys.exit(1)
else:
    # lookup spelling
    d = dict(zip([t.lower() for t in supported_modes], supported_modes))
    build_type = d[build_type.lower()]

build_config = {'BUILD_TYPE' : build_type}
logging.info('Building Tuplex with build type {}'.format(build_type))

class DevelopCommand(setuptools.command.develop.develop):

    user_options = setuptools.command.develop.develop.user_options + [
        ('debug', None, 'Create debug version of Tuplex, Release per default'),
        ('relwithdebinfo', None, 'Create Release With Debug Info version of Tuplex, Release per default')
    ]

    def initialize_options(self):
        setuptools.command.develop.develop.initialize_options(self)
        self.debug = None
        self.relwithdebinfo = None

    def finalize_options(self):
        setuptools.command.develop.develop.finalize_options(self)

    def run(self):
        global build_config

        # update global variables!
        if self.debug:
            build_config['BUILD_TYPE'] = 'Debug'
        if self.relwithdebinfo:
            build_config['BUILD_TYPE'] = 'RelWithDebInfo'

        setuptools.command.develop.develop.run(self)

# A CMakeExtension needs a sourcedir instead of a file list.
# The name must be the _single_ output extension from the CMake build.
# If you need multiple extensions, see scikit-build.
class CMakeExtension(Extension):
    def __init__(self, name, sourcedir=""):
        Extension.__init__(self, name, sources=[])
        self.sourcedir = os.path.abspath(sourcedir)

class CMakeBuild(build_ext):

    def build_extension(self, ext):

        macos_build_target = ''

        ext_filename = str(ext.name)
        ext_filename = ext_filename[ext_filename.rfind('.') + 1:]  # i.e. this is "tuplex"
        extdir = os.path.abspath(os.path.dirname(self.get_ext_fullpath(ext.name)))

        # for whatever reason below lambda copying doesn't work, hence manually copy to extension dir
        # extdir = /project/build/lib.linux-x86_64-3.7/tuplex/libexec/ e.g.
        tplx_lib_root = pathlib.Path(extdir).parent

        # required for auto-detection of auxiliary "native" libs
        if not extdir.endswith(os.path.sep):
            extdir += os.path.sep

        logging.info('Extension dir is: {}'.format(extdir))
        logging.info('Build temp is: {}'.format(self.build_temp))

        lambda_zip = os.environ.get('TUPLEX_LAMBDA_ZIP', None)
        if lambda_zip:

            tplx_src_root = os.path.abspath(os.path.dirname(__file__))
            tplx_package_root = os.path.join(tplx_src_root, 'tuplex', 'python')

            # check whether file exists under the given directory
            if not os.path.isfile(lambda_zip):
                logging.warning('file {} not found'.format(lambda_zip))

                # check if perhaps tplxlam.zip exists relative to source root?
                alt_path = os.path.join(tplx_package_root, 'tuplex', 'other', 'tplxlam.zip')
                if os.path.isfile(alt_path):
                    logging.info('Found tplxlam.zip under {}, using...'.format(alt_path))
                    lambda_zip = alt_path
                else:
                    logging.warn("Tuplex Lambda runner not found, not packaging.")
                    lambda_zip = None

            if lambda_zip and os.path.isfile(lambda_zip):
                logging.info('Packaging Tuplex Lambda runner')

                # need to copy / link zip file into temp dir
                # -> this is the root setup.py file, hence find root
                logging.info('Root path is: {}'.format(tplx_package_root))
                zip_target = os.path.join(self.build_temp, 'tuplex', 'other')
                os.makedirs(zip_target, exist_ok=True)
                zip_dest = os.path.join(zip_target, 'tplxlam.zip')
                shutil.copyfile(lambda_zip, zip_dest)
                logging.info('Copied {} to {}'.format(lambda_zip, zip_dest))

                alt_dest = os.path.join(tplx_lib_root, 'other')
                os.makedirs(alt_dest, exist_ok=True)
                shutil.copyfile(lambda_zip, os.path.join(alt_dest, 'tplxlam.zip'))
                logging.info('Copied {} to {} as well'.format(lambda_zip, os.path.join(alt_dest, 'tplxlam.zip')))

        # get from BuildType info
        cfg = build_config['BUILD_TYPE']
        logging.info('Building Tuplex in {} mode'.format(cfg))

        # CMake lets you override the generator - we need to check this.
        # Can be set with Conda-Build, for example.
        cmake_generator = os.environ.get("CMAKE_GENERATOR", "")

        py_maj_min = "{}.{}".format(sys.version_info.major, sys.version_info.minor)

        llvm_root = None
        boost_include_dir = None
        py_include_dir = None
        py_libs_dir = None

        # check whether run with cibuildwheel:
        # Note: manylinux2014 does NOT contain the shared objects, therefore
        #       can't build/test testcore etc. => only build tuplex
        if os.environ.get('CIBUILDWHEEL', '0') == '1':
            # run in cibuildwheel, adjust options to fit docker image...

            # e.g., to adjust use:
            # /opt/_internal/cpython-3.7.10/bin/python3-config  --ldflags
            # -L/opt/_internal/cpython-3.7.10/lib/python3.7/config-3.7m-x86_64-linux-gnu -L/opt/_internal/cpython-3.7.10/lib -lpython3.7m -lcrypt -lpthread -ldl  -lutil -lm

            # command that works:
            # cmake -DPython3_INCLUDE_DIRS=/opt/python/cp37-cp37m/include/python3.7/ \
            #       -DPython3_LIBRARY=/opt/python/cp37-cp37m/lib/python3.7/ \
            #       -DBoost_INCLUDE_DIR=/opt/boost/python3.7/include/ \
            #       -DLLVM_ROOT=/usr/lib64/llvm9.0/ ..
            llvm_root = LLVM_CI_ROOT_DIR # set via variable (configurable above)
            boost_include_dir = '/opt/boost/python{}/include/'.format(py_maj_min)
            py_include_dir = pyconfig.get_paths()['include']
            py_libs_dir = pyconfig.get_paths()['stdlib']

            # Mac OS? Use boost python versions!
            # /usr/local/Cellar/boost/1.75.0_2
            if platform.system().lower() == 'darwin':
                # mac os, use brewed versions!
                out_py = subprocess.check_output(['brew', 'info', 'python3']).decode()
                def find_pkg_path(lines):
                    ans = list(filter(lambda x: 'usr/local' in x, lines.split('\n')))
                    return None if 0 == len(ans) else ans[0]

                out_py = find_pkg_path(out_py)
                if out_py:
                    logging.info('Found python3 @ {}'.format(out_py))

                # setups find everything automatically...
                llvm_root = None
                boost_include_dir = None
                py_include_dir = None
                py_libs_dir = None

        # Set Python_EXECUTABLE instead if you use PYBIND11_FINDPYTHON
        cmake_args = [
            "-DBUILD_NATIVE=OFF", # disable march=native to avoid issues.
            # "-DCMAKE_LIBRARY_OUTPUT_DIRECTORY={}".format(extdir),
            "-DPYTHON_EXECUTABLE={}".format(sys.executable),
            "-DCMAKE_BUILD_TYPE={}".format(cfg),  # not used on MSVC, but no harm
            "-DPYTHON3_VERSION={}".format(py_maj_min),
        ]

        # set correct mac os target
        if platform.system().lower() == 'darwin':
            macos_build_target = '10.13'
            try:
                macos_version = platform.mac_ver()[0]
                macos_major_version = int(macos_version.split('.')[0])
                if macos_major_version >= 11:
                    macos_build_target = '{}.0'.format(macos_major_version)
                logging.info("Found macOS {}, using build target {}".format(macos_version, macos_build_target))
            except Exception as e:
                logging.error('Could not detect macos version via python, details: {}', e)

                try:
                    # use process MACOS_VERSION=$(sw_vers -productVersion)
                    process_output = subprocess.Popen("sw_vers -productVersion", shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, encoding='utf-8').communicate()
                    out = process_output[0].strip()
                    logging.info('Detected MacOS version {}'.format(out))
                    macos_major_version = int(out.split('.')[0])
                    if macos_major_version >= 11:
                        macos_build_target = '{}.0'.format(macos_major_version)
                except:
                    logging.error('Could not detect macos version, defaulting to macos 10.13 as build target')

            # special case: Python3.8 earlier, widely deployed versions only support suffxi 10_13 or up to 10.16 so use that as target
            if sys.version_info.major == 3 and sys.version_info.minor == 8:
                if macos_build_target != "10.13" or macos_build_target != "10.16":
                    logging.warning(f"Building Tuplex with Python {sys.version_info}, however earlier versions of Python 3.8 can only comprehend tag 10_13, using therefore deployment target 10.13")
                    macos_build_target = "10.13"

            logging.info(f"Building with macOS platform tag {macos_build_target}")
            # get mac OS version
            cmake_args.append('-DCMAKE_OSX_DEPLOYMENT_TARGET={}'.format(macos_build_target))

        # add version info if not dev
        version_cmake = "-DVERSION_INFO={}".format(self.distribution.get_version())
        if re.match(r'\d+.\d+.\d+', version_cmake):
            cmake_args.append(version_cmake)

        if llvm_root is not None:
            cmake_args.append('-DLLVM_ROOT={}'.format(llvm_root))
            if os.environ.get('CIBUILDWHEEL', '0') == '1':
                # ci buildwheel?
                # /opt/llvm-9.0/lib/cmake/llvm/
                prefix_path = os.path.join(llvm_root, '/lib/cmake/llvm')
                
                #cmake_args.append('-DCMAKE_PREFIX_PATH={}'.format(prefix_path))
                cmake_args.append('-DLLVM_DIR={}'.format(prefix_path))
                cmake_args.append('-DLLVM_ROOT_DIR={}'.format(llvm_root))

        if py_include_dir is not None:
            cmake_args.append('-DPython3_INCLUDE_DIRS={}'.format(py_include_dir))
        if py_libs_dir is not None:
            cmake_args.append('-DPython3_LIBRARY={}'.format(py_libs_dir))
        if boost_include_dir is not None:
            cmake_args.append('-DBoost_INCLUDE_DIR={}'.format(boost_include_dir))

        build_args = []
        if self.compiler.compiler_type != "msvc":
            # Using Ninja-build since it a) is available as a wheel and b)
            # multithreads automatically. MSVC would require all variables be
            # exported for Ninja to pick it up, which is a little tricky to do.
            # Users can override the generator with CMAKE_GENERATOR in CMake
            # 3.15+.
            if not cmake_generator:

                # yet, check if Ninja exists...
                if ninja_installed():
                    cmake_args += ["-GNinja"]
        else:

            # Single config generators are handled "normally"
            single_config = any(x in cmake_generator for x in {"NMake", "Ninja"})

            # CMake allows an arch-in-generator style for backward compatibility
            contains_arch = any(x in cmake_generator for x in {"ARM", "Win64"})

            # Specify the arch if using MSVC generator, but only if it doesn't
            # contain a backward-compatibility arch spec already in the
            # generator name.
            if not single_config and not contains_arch:
                cmake_args += ["-A", PLAT_TO_CMAKE[self.plat_name]]

            # Multi-config generators have a different way to specify configs
            if not single_config:
                cmake_args += [
                    # "-DCMAKE_LIBRARY_OUTPUT_DIRECTORY_{}={}".format(cfg.upper(), extdir)
                ]
                build_args += ["--config", cfg]

        # Set CMAKE_BUILD_PARALLEL_LEVEL to control the parallel build level
        # across all generators.
        if "CMAKE_BUILD_PARALLEL_LEVEL" not in os.environ:
            # self.parallel is a Python 3 only way to set parallel jobs by hand
            # using -j in the build_ext call, not supported by pip or PyPA-build.
            if hasattr(self, "parallel") and self.parallel:
                # CMake 3.12+ only.
                build_args += ["-j{}".format(self.parallel)]

        if not os.path.exists(self.build_temp):
            os.makedirs(self.build_temp)

        ## on cibuildwheel b.c. manylinux2014 does not have python shared objects, build
        ## only tuplex target (the python shared object)
        #if os.environ.get('CIBUILDWHEEL', '0') == '1':

        # because the goal of setup.py is to only build the package, build only target tuplex.
        # changed from before.

        def parse_bool_option(key):
            val = os.environ.get(key, None)
            if not val:
                return False
            if val.lower() == 'on' or val.lower() == 'yes' or val.lower() == 'true' or val.lower() == '1':
                return True
            if val.lower() == 'off' or val.lower() == 'no' or val.lower() == 'false' or val.lower() == '0':
                return True
            return False


        BUILD_ALL = parse_bool_option('TUPLEX_BUILD_ALL')
        if BUILD_ALL is True:
            # build everything incl. all google tests...
            logging.info('Building all Tuplex targets (incl. tests)...')
        else:
            # restrict to shared object only...
            logging.info('Building only shared objects...')
            build_args += ['--target', 'tuplex', 'runtime']

        # hack: only run for first invocation!
        if ext_filename == 'tuplex_runtime':
            return

        # check environment variable CMAKE_ARGS and overwrite whichever args are passed there
        if len(os.environ.get('CMAKE_ARGS', '')) > 0:
            logging.info('Found CMAKE_ARGS in environment: {}'.format(os.environ.get('CMAKE_ARGS')))
            extra_args = shlex.split(os.environ['CMAKE_ARGS'])
            logging.info('Args: {}'.format(extra_args))
            print(cmake_args)
            for arg in extra_args:
                # cmake option in the style of -D/-G=?
                m = re.search("-[DG][a-zA-z_]+=", arg)
                if m:
                    # search for substring in existing args, if found replace!
                    idxs = list(filter(lambda t: t[0].lower().strip().startswith(m[0].lower()), zip(cmake_args, range(len(cmake_args)))))
                    if len(idxs) > 0:
                        idx = idxs[0][1]
                        cmake_args[idx] = arg
                    else:
                        # append!
                        cmake_args.append(arg)
                else:
                    # append
                    cmake_args.append(arg)

        logging.info('configuring cmake with: {}'.format(' '.join(["cmake", ext.sourcedir] + cmake_args)))
        logging.info('compiling with: {}'.format(' '.join(["cmake", "--build", "."] + build_args)))

        build_env = dict(os.environ)
        logging.info('LD_LIBRARY_PATH is: {}'.format(build_env.get('LD_LIBRARY_PATH', '')))

        # on mac os, set  MACOSX_DEPLOYMENT_TARGET
        if 'MACOSX_DEPLOYMENT_TARGET' not in build_env.keys() and platform.system().lower() == 'darwin':
            build_env['MACOSX_DEPLOYMENT_TARGET'] = macos_build_target

        cmake_command = ["cmake", ext.sourcedir] + cmake_args
        logging.info('cmake build command: {}'.format(' '.join(cmake_command)))
        run_command(cmake_command, cwd=self.build_temp, env=build_env)

        logging.info('configuration done, workdir={}'.format(self.build_temp))
        subprocess.check_call(
            ["cmake", "--build", "."] + build_args, cwd=self.build_temp, env=build_env
        )

        # this helps to search paths in doubt
        # logging.info('searching for .so files in {}'.format(self.build_temp))
        # subprocess.check_call(['find', '.', '-name', '*.so'], cwd = self.build_temp)
        # subprocess.check_call(['find', '.', '-name', '*.so'], cwd = ext.sourcedir)

        # check whether files can be located, if this doesn't work, search for files!
        tuplexso_path = os.path.join('dist', 'python', 'tuplex', 'libexec', 'tuplex.so')
        src_runtime = os.path.join('dist', 'python', 'tuplex', 'libexec', 'tuplex_runtime.so')

        if not os.path.isfile(tuplexso_path):
            print('Could not find file tuplex.so under {}, searching for it...'.format(tuplexso_path))
            paths = find_files("*tuplex*.so", self.build_temp)
            # filter out runtime
            paths = list(filter(lambda p: 'runtime' not in os.path.basename(p), paths))
            assert len(paths) > 0, 'did not find any file under {}'.format(self.build_temp)
            print('Found following paths: {}'.format(''.join(paths)))
            print('Using {}'.format(paths[0]))
            tuplexso_path = paths[0]

        if not os.path.isfile(src_runtime):
            print('Could not find tuplex_runtime under {}, searching for it...'.format(tuplexso_path))
            paths = find_files("*tuplex_runtime*.*", self.build_temp)
            assert len(paths) > 0, 'did not find any file under {}'.format(self.build_temp)
            print('Found following paths: {}'.format(''.join(paths)))
            print('Using {}'.format(paths[0]))
            src_runtime = paths[0]

        # copy over modules so that setup.py picks them up.
        # i.e. according to current setup, the file is expected to be in
        # build/lib.macosx-10.15-x86_64-3.9/tuplex/libexec/tuplex.cpython-39-darwin.so e.g. for Mac OS X
        ext_suffix = sysconfig.get_config_var('EXT_SUFFIX')
        target_path = os.path.join(extdir, ext_filename + ext_suffix)
        print('target path is: {}'.format(target_path))
        os.makedirs(extdir, exist_ok=True)

        # copy file from build temp dir
        shutil.copyfile(tuplexso_path, target_path)
        if not os.path.isfile(src_runtime):
            src_runtime = src_runtime.replace('.so', '.dylib')
            assert os.path.isfile(src_runtime), 'Tuplex runtime does not exist'

        runtime_target = os.path.join(extdir, 'tuplex_runtime' + ext_suffix)
        shutil.copyfile(src_runtime, runtime_target)

        # run clean, to reclaim space
        # also remove third_party folder, because it is big!

        # this will remove test executables as well...
        if not BUILD_ALL:
            logging.info('Running cmake clean target to reclaim space')
            subprocess.check_call(
                ['cmake', '--build', '.', '--target', 'clean'], cwd=self.build_temp
            )
        else:
            # when build all is hit, preserve test files
            # i.e. need folders test, dist and CTestTestfile.cmake
            logging.info('Removing temporary build files, preserving test files...')
            remove_temp_files(self.build_temp)

        subprocess.check_call(
            ['rm', '-rf', 'third_party'], cwd=self.build_temp
        )


def get_subfolders(rootdir='.'):
    subfolders = []
    for rootdir, dirs, files in os.walk(rootdir):
        for subdir in dirs:
            subfolders.append(os.path.join(rootdir, subdir))
    return subfolders


# helper function to retrieve list of packages, i.e. ['tuplex', 'tuplex.repl', ...]
def discover_packages(where='.'):
    # files to copy for install
    files = [os.path.join(dp, f) for dp, dn, fn in os.walk(os.path.expanduser(where)) for f in fn]
    subfolders = [f.path for f in os.scandir(where) if f.is_dir()]

    subfolders = get_subfolders(where)

    # remove __pycache__ dirs
    subfolders = filter(lambda x: '__pycache__' not in x, subfolders)
    # to extract dirs, check what are dirs and whether there exists some __init__.py in the dir!
    # i.e., only keep folders where there is an __init__.py in it!
    # @TODO: could add some warnings here for developers...
    subfolders = filter(lambda p: os.path.isfile(os.path.join(p, '__init__.py')), subfolders)

    # remove where prefix
    if not where.endswith(os.sep):
        where += os.sep
    packages = map(lambda p: p[len(where):], subfolders)
    packages = map(lambda x: x.replace(os.sep, '.'), packages)
    packages = sorted(packages)
    return list(packages)


def read_readme():
    # read the contents of your README file
    this_directory = os.path.abspath(os.path.dirname(__file__))
    with open(os.path.join(this_directory, 'README.md'), encoding='utf-8') as f:
        long_description = f.read()
        return long_description

def reorg_historyserver():
    """
    reorganize historyserver to become part of pip package.
    """
    # get absolute path of this file's location
    import pathlib
    current_path = pathlib.Path(__file__).parent.resolve()
    assert os.path.exists(os.path.join(current_path, 'tuplex', 'historyserver')), 'Could not find historyserver root dir'

    # copy all the files from history server to directory historyserver under tuplex/python
    src_path = os.path.join(current_path, 'tuplex', 'historyserver')
    dst_path = os.path.join(current_path, 'tuplex', 'python', 'tuplex', 'historyserver')
    distutils.dir_util.copy_tree(src_path, dst_path)

    # at-exit, delete
    def remove_history():
        shutil.rmtree(dst_path)
    atexit.register(remove_history)

    return []

def tplx_package_data():

    package_data = {
      # include libs in libexec
    'tuplex.libexec' : ['*.so', '*.dylib'],
    'tuplex.historyserver': ['thserver/templates/*.html', 'thserver/static/css/*.css', 'thserver/static/css/styles/*.css',
                                 'thserver/static/img/*.*', 'thserver/static/js/*.js', 'thserver/static/js/modules/*.js',
                                 'thserver/static/js/styles/*.css']
    }

    # package lambda as well?
    lambda_zip = os.environ.get('TUPLEX_LAMBDA_ZIP', None)
    if lambda_zip and os.path.isfile(lambda_zip):
        package_data['tuplex.other'] = ['*.zip']
    return package_data

# The information here can also be placed in setup.cfg - better separation of
# logic and declaration, and simpler if you include description/version in a file.
setup(name="tuplex",
    python_requires='>=3.8.0',
    version="0.3.6",
    author="Leonhard Spiegelberg",
    author_email="tuplex@cs.brown.edu",
    description="Tuplex is a novel big data analytics framework incorporating a Python UDF compiler based on LLVM "
                "together with a query compiler featuring whole-stage code generation and optimization.",
    long_description=read_readme(),
    long_description_content_type='text/markdown',
    packages=reorg_historyserver() + discover_packages(where="tuplex/python"),
    package_dir={"": "tuplex/python"},
    package_data=tplx_package_data(),
    ext_modules=[CMakeExtension("tuplex.libexec.tuplex", "tuplex"), CMakeExtension("tuplex.libexec.tuplex_runtime", "tuplex")],
    cmdclass={"build_ext": CMakeBuild, 'develop': DevelopCommand},
    # deactivate for now, first fix python sources to work properly!
    zip_safe=False,
    install_requires=install_dependencies,
    # metadata for upload to PyPI
    url="https://tuplex.cs.brown.edu",
    license="Apache 2.0",
    keywords="ETL BigData Python LLVM UDF Data Analytics",
    classifiers=[
        # How mature is this project? Common values are
        #   2 - Pre-Alpha
        #   3 - Alpha
        #   4 - Beta
        #   5 - Production/Stable
        'Development Status :: 2 - Pre-Alpha',

        # supported environments
        'Operating System :: MacOS',
        'Operating System :: POSIX :: Linux',

        # Indicate who your project is intended for
        'Intended Audience :: Developers',

        # Pick your license as you wish (should match "license" above)
        'License :: OSI Approved :: Apache Software License',

        # Specify the Python versions you support here. In particular, ensure
        # that you indicate whether you support Python 2, Python 3 or both.
        'Programming Language :: Python :: 3.8',
        'Programming Language :: Python :: 3.9',
        'Programming Language :: Python :: 3.10',
        'Programming Language :: Python :: 3.11',
    ],
    scripts=['tuplex/historyserver/bin/tuplex-webui'],
    project_urls={
        "Bug Tracker": "https://github.com/tuplex",
        "Documentation": "https://tuplex.cs.brown.edu/python-api.html",
        "Source Code": "https://github.com/tuplex",
    }
)
