这是indexloc提供的服务,不要输入任何密码
Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ repos:
# Run the linter.
- id: ruff
files: ^tuplex/python/tuplex.*\.py$
args: [ "--fix", "--select", "I" ]
args: [ "--fix", "--config", "ruff.toml"]
types_or: [ python, pyi ]
# Run the formatter.
- id: ruff-format
Expand Down
4 changes: 0 additions & 4 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,3 @@ requires = [
"requests"
]
build-backend = "setuptools.build_meta"


[tool.ruff]
include = ["pyproject.toml", "tuplex/python/tuplex/**/*.py"]
9 changes: 9 additions & 0 deletions ruff.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
#"--select", "I", "--select", "F"
[lint]
# Add "B", "Q" for flake8 checks.
select = ["I", "E4", "E7", "E9", "F", "CPY001", "T201", "T203", "ANN001", "ANN002", "ANN003", "ANN201", "ANN202", "ANN204", "ANN205", "ANN206"]
ignore = []

# Allow fix for all enabled rules (when `--fix`) is provided.
fixable = ["ALL"]
unfixable = []
8 changes: 7 additions & 1 deletion tuplex/python/tuplex/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
# ----------------------------------------------------------------------------------------------------------------------#

import logging
from typing import Optional, Union

# expose aws setup for better convenience
import tuplex.distributed
Expand All @@ -23,7 +24,12 @@


# for convenience create a dummy function to return a default-configured Lambda context
def LambdaContext(conf=None, name=None, s3_scratch_dir=None, **kwargs):
def LambdaContext(
conf: Union[None, str, dict] = None,
name: Optional[str] = None,
s3_scratch_dir: Optional[str] = None,
**kwargs: dict,
) -> Context:
import uuid

if s3_scratch_dir is None:
Expand Down
47 changes: 28 additions & 19 deletions tuplex/python/tuplex/context.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
import os
import sys
import uuid
from typing import Any, List, Optional, Tuple, Union

from tuplex.utils.common import (
current_user,
Expand All @@ -45,7 +46,9 @@


class Context:
def __init__(self, conf=None, name="", **kwargs):
def __init__(
self, conf: Union[None, str, dict] = None, name: str = "", **kwargs: dict
) -> None:
r"""creates new Context object, the main entry point for all operations with the Tuplex big data framework

Args:
Expand Down Expand Up @@ -235,7 +238,13 @@ def __init__(self, conf=None, name="", **kwargs):
self.metrics = Metrics(python_metrics)
assert self.metrics

def parallelize(self, value_list, columns=None, schema=None, auto_unpack=True):
def parallelize(
self,
value_list: List[Any],
columns: Optional[List[str]] = None,
schema: Optional[Union[Tuple, List]] = None,
auto_unpack: bool = True,
) -> "DataSet":
"""passes data to the Tuplex framework. Must be a list of primitive objects (e.g. of type bool, int, float, str) or
a list of (nested) tuples of these types.

Expand Down Expand Up @@ -273,14 +282,14 @@ def parallelize(self, value_list, columns=None, schema=None, auto_unpack=True):

def csv(
self,
pattern,
columns=None,
header=None,
delimiter=None,
quotechar='"',
null_values=[""],
type_hints={},
):
pattern: str,
columns: Optional[List[str]] = None,
header: Optional[bool] = None,
delimiter: Optional[str] = None,
quotechar: str = '"',
null_values: List[str] = [""],
type_hints: dict = {},
) -> "DataSet":
"""reads csv (comma separated values) files. This function may either be provided with
parameters that help to determine the delimiter, whether a header present or what kind
of quote char is used. Overall, CSV parsing is done according to the RFC-4180 standard
Expand Down Expand Up @@ -350,11 +359,11 @@ def csv(
)
return ds

def text(self, pattern, null_values=None):
def text(self, pattern: str, null_values: Optional[List[str]] = None) -> "DataSet":
"""reads text files.
Args:
pattern (str): a file glob pattern, e.g. /data/file.csv or /data/\*.csv or /\*/\*csv
null_values (List[str]): a list of string to interpret as None. When empty list or None, empty lines will be the empty string ''
null_values (List[str]): a list of strings to interpret as None. When empty list or None, empty lines will be the empty string ''
Returns:
tuplex.dataset.DataSet: A Tuplex Dataset object that allows further ETL operations
"""
Expand All @@ -372,7 +381,7 @@ def text(self, pattern, null_values=None):
ds._dataSet = self._context.text(pattern, null_values)
return ds

def orc(self, pattern, columns=None):
def orc(self, pattern: str, columns: Optional[List[str]] = None) -> "DataSet":
"""reads orc files.
Args:
pattern (str): a file glob pattern, e.g. /data/file.csv or /data/\*.csv or /\*/\*csv
Expand All @@ -390,7 +399,7 @@ def orc(self, pattern, columns=None):
ds._dataSet = self._context.orc(pattern, columns)
return ds

def options(self, nested=False):
def options(self, nested: bool = False) -> dict:
"""retrieves all framework parameters as dictionary

Args:
Expand All @@ -411,7 +420,7 @@ def options(self, nested=False):
else:
return opt

def optionsToYAML(self, file_path="config.yaml"):
def optionsToYAML(self, file_path: str = "config.yaml") -> None:
"""saves options as yaml file to (local) filepath

Args:
Expand All @@ -420,7 +429,7 @@ def optionsToYAML(self, file_path="config.yaml"):

save_conf_yaml(self.options(), file_path)

def ls(self, pattern):
def ls(self, pattern: str) -> List[str]:
"""
return a list of strings of all files found matching the pattern. The same pattern can be supplied to read inputs.
Args:
Expand All @@ -433,7 +442,7 @@ def ls(self, pattern):
assert self._context
return self._context.ls(pattern)

def cp(self, pattern, target_uri):
def cp(self, pattern: str, target_uri: str) -> None:
"""
copies all files matching the pattern to a target uri. If more than one file is found, a folder is created
containing all the files relative to the longest shared path prefix.
Expand All @@ -448,7 +457,7 @@ def cp(self, pattern, target_uri):
assert self._context
return self._context.cp(pattern, target_uri)

def rm(self, pattern):
def rm(self, pattern: str) -> None:
"""
removes all files matching the pattern
Args:
Expand All @@ -463,7 +472,7 @@ def rm(self, pattern):
return self._context.rm(pattern)

@property
def uiWebURL(self):
def uiWebURL(self) -> str:
"""
retrieve URL of webUI if running
Returns:
Expand Down
Loading
Loading