diff --git a/benchmarks/incremental/.gitignore b/benchmarks/incremental/.gitignore
new file mode 100644
index 000000000..be21f6805
--- /dev/null
+++ b/benchmarks/incremental/.gitignore
@@ -0,0 +1,6 @@
+data/*.csv
+tuplex_output/
+python_code_pipeline_stage_*.py
+transform_stage_*.txt
+results_dirty_zillow@10G/
+tuplex_config.json
\ No newline at end of file
diff --git a/benchmarks/incremental/README.md b/benchmarks/incremental/README.md
new file mode 100644
index 000000000..e98368ccc
--- /dev/null
+++ b/benchmarks/incremental/README.md
@@ -0,0 +1,25 @@
+
+## Incremental Exception Resolution Experiment
+
+In this experiment, we run a sequence of six pipelines over the dirty zillow dataset.
+- The first pipeline contains no ignore or resolver operations
+- The final pipeline contains 5 unique ignore and resolver operations
+- Each of the pipelines in between incrementally adds on an additional resolver until all are present for the final pipeline
+
+We compare the following conditions, for a total of 8 experimental trials
+- Plain vs Incremental Resolution
+- Single vs Multi-threaded
+- Merge in order vs Merge without order
+
+In order to get 10GB of input data, replicate dirty zillow data 1460x (or use 1500x for simplicity).
+
+### Setup
+To replicate the original data, create the 10G files with the following settings:
+```
+python3 replicate-data.py -s 1460 -o data/zillow_dirty@10G.csv
+```
+Note that both files have the same number of rows, but the synthetic version is slightly larger.
+
+### Running the benchmark
+Use
+`nohup perflock ./benchmark.sh -hwloc &`
diff --git a/benchmarks/incremental/benchmark-synthetic.sh b/benchmarks/incremental/benchmark-synthetic.sh
new file mode 100755
index 000000000..54217275a
--- /dev/null
+++ b/benchmarks/incremental/benchmark-synthetic.sh
@@ -0,0 +1,96 @@
+#!/usr/bin/env bash
+
+# Parse HWLOC settings
+HWLOC=""
+if [ $# -ne 0 ] && [ $# -ne 1 ]; then # check nmber of inputs
+  echo "usage: ./benchmark.sh [-hwloc]"
+  exit 1
+fi
+
+if [ $# -eq 1 ]; then # check if hwloc
+  if [ "$1" != "-hwloc" ]; then # check flag
+    echo -e "invalid flag: $1\nusage: ./benchmark.sh [-hwloc]"
+    exit 1
+  fi
+  HWLOC="hwloc-bind --cpubind node:1 --membind node:1 --cpubind node:2 --membind node:2"
+fi
+
+# use 10 runs (3 for very long jobs) and a timeout after 180min/3h
+NUM_RUNS=1
+NUM_STEPS=10
+TIMEOUT=14400
+
+RESDIR='results_synthetic'
+DATA_PATH='/hot/scratch/bgivertz/data/synthetic/synth'
+INCREMENTAL_OUT_PATH='/hot/scratch/bgivertz/output/incremental'
+COMMIT_OUT_PATH='/hot/scratch/bgivertz/output/commit'
+PLAIN_OUT_PATH='/hot/scratch/bgivertz/output/plain'
+
+rm -rf $RESDIR
+rm -rf $INCREMENTAL_OUT_PATH
+rm -rf $PLAIN_OUT_PATH
+rm -rf $COMMIT_OUT_PATH
+
+mkdir -p ${RESDIR}
+
+# create tuplex_config.json
+python3 create_conf.py --opt-pushdown --opt-filter --opt-llvm --executor-count 63 --executor-memory "6G" > tuplex_config.json
+
+echo "running out of order experiments"
+for ((r = 1; r <= NUM_RUNS; r++)); do
+  echo "trial ($r/$NUM_RUNS)"
+
+  echo "running plain (0/10)"
+  LOG="${RESDIR}/plain-out-of-order-e0-t$r.txt"
+  timeout $TIMEOUT ${HWLOC} python3 runsynthetic.py --clear-cache --input-path "$DATA_PATH""0.csv" --output-path $PLAIN_OUT_PATH >$LOG 2>$LOG.stderr
+
+  echo "running incremental (0/10)"
+  LOG="${RESDIR}/incremental-out-of-order-e0-t$r.txt"
+  timeout $TIMEOUT ${HWLOC} python3 runsynthetic.py --clear-cache --incremental-resolution --input-path "$DATA_PATH""0.csv" --output-path $INCREMENTAL_OUT_PATH >$LOG 2>$LOG.stderr
+
+  for ((s = 1; s <= 10; s++)) do
+    echo "running plain ($s/10)"
+    LOG="${RESDIR}/plain-out-of-order-e$s-t$r.txt"
+    timeout $TIMEOUT ${HWLOC} python3 runsynthetic.py --use-resolve-step --clear-cache --input-path "$DATA_PATH$s.csv" --output-path $PLAIN_OUT_PATH >$LOG 2>$LOG.stderr
+
+    echo "running incremental ($s/10)"
+    LOG="${RESDIR}/incremental-out-of-order-e$s-t$r.txt"
+    timeout $TIMEOUT ${HWLOC} python3 runsynthetic.py --use-resolve-step --clear-cache --incremental-resolution --input-path "$DATA_PATH$s.csv" --output-path $INCREMENTAL_OUT_PATH >$LOG 2>$LOG.stderr
+  done
+done
+
+echo "running in order experiments"
+for ((r = 1; r <= NUM_RUNS; r++)); do
+  echo "trial ($r/$NUM_RUNS)"
+
+  echo "running plain (0/10)"
+  LOG="${RESDIR}/plain-in-order-e0-t$r.txt"
+  timeout $TIMEOUT ${HWLOC} python3 runsynthetic.py --clear-cache --resolve-in-order --input-path "$DATA_PATH""0.csv" --output-path $PLAIN_OUT_PATH >$LOG 2>$LOG.stderr
+
+  echo "running incremental (0/10)"
+  LOG="${RESDIR}/incremental-in-order-e0-t$r.txt"
+  timeout $TIMEOUT ${HWLOC} python3 runsynthetic.py --clear-cache --resolve-in-order --incremental-resolution --input-path "$DATA_PATH""0.csv" --output-path $INCREMENTAL_OUT_PATH >$LOG 2>$LOG.stderr
+
+  echo "running commit (0/10)"
+  LOG="${RESDIR}/commit-in-order-e0-t$r.txt"
+  timeout $TIMEOUT ${HWLOC} python3 runsynthetic.py --clear-cache --resolve-in-order --incremental-resolution --commit --input-path "$DATA_PATH""0.csv" --output-path $COMMIT_OUT_PATH >$LOG 2>$LOG.stderr
+
+  for ((s = 1; s <= 10; s++)) do
+    echo "running plain ($s/10)"
+    LOG="${RESDIR}/plain-in-order-e$s-t$r.txt"
+    timeout $TIMEOUT ${HWLOC} python3 runsynthetic.py --use-resolve-step --clear-cache --resolve-in-order --input-path "$DATA_PATH$s.csv" --output-path $PLAIN_OUT_PATH >$LOG 2>$LOG.stderr
+
+    echo "running incremental ($s/10)"
+    LOG="${RESDIR}/incremental-in-order-e$s-t$r.txt"
+    timeout $TIMEOUT ${HWLOC} python3 runsynthetic.py --use-resolve-step --clear-cache --resolve-in-order --incremental-resolution --input-path "$DATA_PATH$s.csv" --output-path $INCREMENTAL_OUT_PATH >$LOG 2>$LOG.stderr
+
+    echo "running commit ($s/10)"
+    LOG="${RESDIR}/commit-in-order-e$s-t$r.txt"
+    timeout $TIMEOUT ${HWLOC} python3 runsynthetic.py --use-resolve-step --clear-cache --resolve-in-order --incremental-resolution --commit --input-path "$DATA_PATH$s.csv" --output-path $COMMIT_OUT_PATH >$LOG 2>$LOG.stderr
+  done
+done
+
+
+rm -rf $INCREMENTAL_OUT_PATH
+rm -rf $PLAIN_OUT_PATH
+rm -rf $COMMIT_OUT_PATH
diff --git a/benchmarks/incremental/benchmark.sh b/benchmarks/incremental/benchmark.sh
new file mode 100755
index 000000000..1691a9a6c
--- /dev/null
+++ b/benchmarks/incremental/benchmark.sh
@@ -0,0 +1,90 @@
+#!/usr/bin/env bash
+
+# Parse HWLOC settings
+HWLOC=""
+if [ $# -ne 0 ] && [ $# -ne 1 ]; then # check nmber of inputs
+  echo "usage: ./benchmark.sh [-hwloc]"
+  exit 1
+fi
+
+if [ $# -eq 1 ]; then # check if hwloc
+  if [ "$1" != "-hwloc" ]; then # check flag
+    echo -e "invalid flag: $1\nusage: ./benchmark.sh [-hwloc]"
+    exit 1
+  fi
+  HWLOC="hwloc-bind --cpubind node:1 --membind node:1 --cpubind node:2 --membind node:2"
+fi
+
+# use 10 runs (3 for very long jobs) and a timeout after 180min/3h
+NUM_RUNS=3
+TIMEOUT=14400
+
+RESDIR='results_dirty_zillow@100G'
+DATA_PATH_SSD='/hot/scratch/bgivertz/data/zillow_dirty@100G.csv'
+INCREMENTAL_OUT_PATH_SSD='/hot/scratch/bgivertz/output/incremental'
+INCREMENTAL_COMMIT_OUT_PATH_SSD='/hot/scratch/bgivertz/output/commit'
+PLAIN_OUT_PATH_SSD='/hot/scratch/bgivertz/output/plain'
+
+rm -rf $RESDIR
+rm -rf $INCREMENTAL_OUT_PATH_SSD
+rm -rf $PLAIN_OUT_PATH_SSD
+rm -rf $INCREMENTAL_COMMIT_OUT_PATH_SSD
+
+# does file exist?
+if [[ ! -f "$DATA_PATH_SSD" ]]; then
+	echo "file $DATA_PATH_SSD not found, abort."
+	exit 1
+fi
+
+mkdir -p ${RESDIR}
+
+# create tuplex_config.json
+python3 create_conf.py --opt-pushdown --opt-filter --opt-llvm --executor-count 63 --executor-memory "6G" > tuplex_config.json
+
+echo "running out-of-order ssd experiments"
+for ((r = 1; r <= NUM_RUNS; r++)); do
+  echo "trial ($r/$NUM_RUNS)"
+
+  echo "running plain"
+  LOG="${RESDIR}/tuplex-plain-out-of-order-ssd-$r.txt"
+  timeout $TIMEOUT ${HWLOC} python3 runtuplex.py --clear-cache --path $DATA_PATH_SSD --output-path $PLAIN_OUT_PATH_SSD >$LOG 2>$LOG.stderr
+
+  echo "running incremental"
+  LOG="${RESDIR}/tuplex-incremental-out-of-order-ssd-$r.txt"
+  timeout $TIMEOUT ${HWLOC} python3 runtuplex.py --clear-cache --incremental-resolution --path $DATA_PATH_SSD --output-path $INCREMENTAL_OUT_PATH_SSD >$LOG 2>$LOG.stderr
+
+#  echo "validating results"
+#  LOG="${RESDIR}/tuplex-compare-out-of-order-ssd-$r.txt"
+#  timeout $TIMEOUT ${HWLOC} python3 compare_folders.py $PLAIN_OUT_PATH_SSD $INCREMENTAL_OUT_PATH_SSD >$LOG 2>$LOG.stderr
+done
+
+echo "running in-order ssd experiments"
+for ((r = 1; r <= NUM_RUNS; r++)); do
+  echo "trial ($r/$NUM_RUNS)"
+
+  echo "running plain"
+  LOG="${RESDIR}/tuplex-plain-in-order-ssd-$r.txt"
+  timeout $TIMEOUT ${HWLOC} python3 runtuplex.py --clear-cache --resolve-in-order --path $DATA_PATH_SSD --output-path $PLAIN_OUT_PATH_SSD >$LOG 2>$LOG.stderr
+
+  echo "running incremental"
+  LOG="${RESDIR}/tuplex-incremental-in-order-ssd-$r.txt"
+  timeout $TIMEOUT ${HWLOC} python3 runtuplex.py --clear-cache --resolve-in-order --incremental-resolution --path $DATA_PATH_SSD --output-path $INCREMENTAL_OUT_PATH_SSD >$LOG 2>$LOG.stderr
+
+  echo "running commit"
+  LOG="${RESDIR}/tuplex-incremental-in-order-commit-ssd-$r.txt"
+  timeout $TIMEOUT ${HWLOC} python3 runtuplex.py --clear-cache --resolve-in-order --incremental-resolution --commit --path $DATA_PATH_SSD --output-path $INCREMENTAL_COMMIT_OUT_PATH_SSD >$LOG 2>$LOG.stderr
+
+#  echo "validating results"
+#  LOG="${RESDIR}/tuplex-compare-in-order-ssd-$r.txt"
+#  timeout $TIMEOUT ${HWLOC} python3 compare_folders.py --in-order $PLAIN_OUT_PATH_SSD $INCREMENTAL_OUT_PATH_SSD >$LOG 2>$LOG.stderr
+#
+#  LOG="${RESDIR}/tuplex-compare-in-order-commit-ssd-$r.txt"
+#  timeout $TIMEOUT ${HWLOC} python3 compare_folders.py --in-order $INCREMENTAL_COMMIT_OUT_PATH_SSD $INCREMENTAL_OUT_PATH_SSD >$LOG 2>$LOG.stderr
+done
+
+echo "graphing results"
+python3 graph.py --results-path $RESDIR --num-trials $NUM_RUNS --num-steps 7
+
+rm -rf $INCREMENTAL_OUT_PATH_SSD
+rm -rf $PLAIN_OUT_PATH_SSD
+rm -rf $INCREMENTAL_COMMIT_OUT_PATH_SSD
diff --git a/benchmarks/incremental/compare_folders.py b/benchmarks/incremental/compare_folders.py
new file mode 100755
index 000000000..f8daddb6c
--- /dev/null
+++ b/benchmarks/incremental/compare_folders.py
@@ -0,0 +1,79 @@
+#!/usr/bin/env python3
+# (c) L.Spiegelberg 2021
+# compare the csv output contents of two folders (ignoring order)
+
+import os
+import sys
+import argparse
+import glob
+
+
+def wc_files(path):
+    files = sorted(glob.glob(os.path.join(path, '*.csv')))
+
+    all_lines = []
+    num_rows = 0
+    header = None
+    matching_headers = 0
+    for f in files:
+        with open(f, 'r') as fp:
+            lines = fp.readlines()
+            if header is None and len(lines) > 0:
+                header = lines[0]
+            num_rows += len(lines)
+            if len(lines) > 0:
+                if header == lines[0]:
+                    matching_headers += 1
+                    all_lines += lines[1:]
+                else:
+                    all_lines += lines
+
+    if matching_headers == len(files):
+        num_rows -= matching_headers
+
+    print('-- counted {} rows in {} files in folder {}'.format(num_rows, len(files), path))
+    return num_rows, len(files), all_lines
+
+
+def main():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("folderA")
+    parser.add_argument("folderB")
+    parser.add_argument("--in-order", help='whether to compare in order', action='store_true')
+    args = parser.parse_args()
+
+    print('== Dirty Zillow experiment validation ==')
+
+    # count lines in each folder
+    print('-- loading folder contents...')
+    rowCountA, filesA, rowsA  = wc_files(args.folderA)
+    rowCountB, filesB, rowsB = wc_files(args.folderB)
+
+    if rowCountA != rowCountB:
+        print('>>> number of rows does not match')
+        sys.exit(1)
+
+    # sort lines and compare them
+    if not args.in_order:
+        print('-- sorting rows from {}'.format(args.folderA))
+        rowsA = sorted(rowsA)
+        print('-- sorting rows from {}'.format(args.folderB))
+        rowsB = sorted(rowsB)
+
+    print('-- computing comparison of rows...')
+    non_matching_indices = [ind for ind, (i, j) in enumerate(zip(rowsA, rowsB)) if i != j]
+
+    if len(non_matching_indices) > 0:
+        print('>>> rows do not match up, details:')
+
+        for idx in non_matching_indices:
+            print('{:5d}: {} != {}'.format(idx, rowsA[idx], rowsB[idx]))
+        sys.exit(1)
+
+    print('>>> contents of folders match.')
+
+    sys.exit(0)
+
+
+if __name__ == '__main__':
+    main()
diff --git a/benchmarks/incremental/create-synthetic.sh b/benchmarks/incremental/create-synthetic.sh
new file mode 100644
index 000000000..dc862b661
--- /dev/null
+++ b/benchmarks/incremental/create-synthetic.sh
@@ -0,0 +1,11 @@
+#!/usr/bin/env bash
+
+set -x
+
+python3 synthesize-data.py --dataset-size $1 --output-path /hot/scratch/bgivertz/data/synthetic/synth0.csv --exceptions 0
+
+for ((i = 1; i <= 9; i++)) do
+  python3 synthesize-data.py --dataset-size $1 --output-path /hot/scratch/bgivertz/data/synthetic/synth$i.csv --exceptions 0.$i
+done
+
+python3 synthesize-data.py --dataset-size $1 --output-path /hot/scratch/bgivertz/data/synthetic/synth10.csv --exceptions 1
diff --git a/benchmarks/incremental/create_conf.py b/benchmarks/incremental/create_conf.py
new file mode 100644
index 000000000..10420e3c8
--- /dev/null
+++ b/benchmarks/incremental/create_conf.py
@@ -0,0 +1,36 @@
+#!/usr/bin/env python3
+# (c) 2020 L.Spiegelberg
+# this script creates Tuplex json configuration files for benchmarks
+
+import json
+import argparse
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--executor-memory', default='10G', help='how much memory each thread gets')
+    parser.add_argument('--executor-count', default=15, help='how many worker threads')
+    parser.add_argument('--partition-size', default='32MB', help='task size')
+    parser.add_argument('--runtime-memory', default='64MB', help='how much maximum runtime memory to use')
+    parser.add_argument('--input-split-size', default='64MB', help='chunk size of input files')
+    parser.add_argument('--opt-null', help='enable null value optimization', action='store_true')
+    parser.add_argument('--opt-pushdown', help='enable projection pushdown', action='store_true')
+    parser.add_argument('--opt-filter', help='enable filter pushdown', action='store_true')
+    parser.add_argument('--opt-parser', help='generate CSV parser', action='store_true')
+    parser.add_argument('--opt-llvm', help='run llvm optimizers', action='store_true')
+
+    args = parser.parse_args()
+
+    conf = {'webui.enable' : False,
+            'executorMemory' : args.executor_memory,
+            'executorCount' : args.executor_count,
+            'driverMemory' : args.executor_memory,
+            'partitionSize' : args.partition_size,
+            'runTimeMemory' : args.runtime_memory,
+            'inputSplitSize' : args.input_split_size,
+            'useLLVMOptimizer' : args.opt_llvm,
+            'optimizer.nullValueOptimization' : args.opt_null,
+            'csv.selectionPushdown' : args.opt_pushdown,
+            'optimizer.generateParser' : args.opt_parser,
+            'optimizer.filterPushdown' : args.opt_filter}
+
+    print(json.dumps(conf))
\ No newline at end of file
diff --git a/benchmarks/incremental/export_results.py b/benchmarks/incremental/export_results.py
new file mode 100644
index 000000000..60fa7f18d
--- /dev/null
+++ b/benchmarks/incremental/export_results.py
@@ -0,0 +1,121 @@
+import argparse
+import os
+import json
+from enum import Enum
+
+class Mode(Enum):
+    OUT_OF_ORDER = 1
+    IN_ORDER = 2
+    COMMIT = 3
+
+def validate_experiment(compare_path):
+    with open(compare_path) as f:
+        lines = f.read().splitlines()
+        return ">>> contents of folders match." in lines
+
+def get_metric(path, metric, step):
+    with open(path) as f:
+        lines = f.read().splitlines()
+        ind = lines.index("EXPERIMENTAL RESULTS") + 2
+        line = lines[ind + step]
+        metrics = json.loads(line)
+        if metric == 'jobTime':
+            return metrics[metric]
+        else:
+            return metrics["stages"][0][metric]
+
+def compare_path(trial, mode):
+    return "tuplex-compare-{}{}-ssd-{}.txt".format('out-of-order' if mode == Mode.OUT_OF_ORDER else 'in-order',
+                                                   '-commit' if mode == Mode.COMMIT else '',
+                                                   trial)
+
+
+def experiment_path(trial, incremental, mode):
+    return "tuplex-{}-{}{}-ssd-{}.txt".format('incremental' if incremental else 'plain',
+                                              'out-of-order' if mode == Mode.OUT_OF_ORDER else 'in-order',
+                                              '-commit' if mode == Mode.COMMIT else '',
+                                              trial)
+
+def write_metric_to_file(f, results_path, num_trials, num_steps, mode, metric):
+    header = "{},".format("Out of Order" if mode == Mode.OUT_OF_ORDER else "In Order") + \
+             "," * num_trials + "Plain," + \
+             "," * num_trials + "Incremental" + \
+             ("," * (num_trials + 1) + "Commit\n" if mode == Mode.IN_ORDER else "\n")
+    f.write(header)
+
+    header = "Resolvers," + \
+             ','.join(["Trial {}".format(i + 1) for i in range(num_trials)]) + \
+             ",Average," + \
+             ','.join(["Trial {}".format(i + 1) for i in range(num_trials)]) + \
+             ",Average" + \
+             ("," + ','.join(["Trial {}".format(i + 1) for i in range(num_trials)]) + ",Average\n" if mode == Mode.IN_ORDER else "\n")
+    f.write(header)
+
+    for step in range(num_steps):
+        line = f"{step},"
+
+        plain_total = 0
+        for trial in range(num_trials):
+            plain_path = os.path.join(results_path, experiment_path(trial + 1, False, mode))
+            plain_time = get_metric(plain_path, metric, step)
+
+            plain_total += plain_time
+            line += f"{plain_time},"
+        line += f"{plain_total / num_trials},"
+
+        incremental_total = 0
+        for trial in range(num_trials):
+            incremental_path = os.path.join(results_path, experiment_path(trial + 1, True, mode))
+            incremental_time = get_metric(incremental_path, metric, step)
+
+            incremental_total += incremental_time
+            line += f"{incremental_time},"
+        line += f"{incremental_total / num_trials}"
+
+        if mode == Mode.IN_ORDER:
+            line += ","
+            commit_total = 0
+            for trial in range(num_trials):
+                commit_path = os.path.join(results_path, experiment_path(trial + 1, True, Mode.COMMIT))
+                commit_time = get_metric(commit_path, metric, step)
+
+                commit_total += commit_time
+                line += f"{commit_time},"
+            line += f"{commit_total / num_trials}\n"
+        else:
+            line += "\n"
+
+        f.write(line)
+
+def export_experiments(results_path, num_trials, num_steps):
+    # Validate all experiments
+    # for i in range(num_trials):
+    #     for mode in Mode:
+    #         validate_path = os.path.join(results_path, compare_path(i + 1, mode))
+    #         assert validate_experiment(validate_path)
+
+    metrics = ['jobTime', 'fast_path_time_s', 'slow_path_time_s', 'write_output_wall_time_s']
+
+    file_path = "experiments.csv"
+    with open(file_path, 'w') as f:
+        for metric in metrics:
+            write_metric_to_file(f, results_path, num_trials, num_steps, Mode.OUT_OF_ORDER, metric)
+            write_metric_to_file(f, results_path, num_trials, num_steps, Mode.IN_ORDER, metric)
+
+def main():
+    parser = argparse.ArgumentParser(description='Parse results of experiment')
+    parser.add_argument('--results-path', type=str, dest='results_path', default='results_dirty_zillow@10G')
+    parser.add_argument('--num-trials', type=int, dest='num_trials', default=1,)
+    parser.add_argument('--num-steps', type=int, dest='num_steps', default=7)
+    args = parser.parse_args()
+
+    results_path = args.results_path
+    num_trials = args.num_trials
+    num_steps = args.num_steps
+
+    assert os.path.isdir(results_path)
+
+    export_experiments(results_path, num_trials, num_steps)
+
+if __name__ == '__main__':
+    main()
diff --git a/benchmarks/incremental/graph-synthetic.py b/benchmarks/incremental/graph-synthetic.py
new file mode 100644
index 000000000..e74f9a0a6
--- /dev/null
+++ b/benchmarks/incremental/graph-synthetic.py
@@ -0,0 +1,120 @@
+import argparse
+import os.path
+import json
+import numpy as np
+import matplotlib.pyplot as plt
+import matplotlib.patches as mpatches
+
+PLAIN_COLOR = "#4285F4"
+INCREMENTAL_COLOR = '#DB4437'
+COMMIT_COLOR = "#F4B400"
+
+class Experiment:
+    def __init__(self, results_path, num_trials, num_steps, save_path):
+        self.results_path = results_path
+        self.num_trials = num_trials
+        self.num_steps = num_steps
+        self.save_path = save_path
+
+    def graph_in_order(self):
+        plain_results = self.get_results(False, 'plain')
+        inc_results = self.get_results(False, 'incremental')
+        commit_results = self.get_results(False, 'commit')
+
+        fig = plt.figure(figsize=(6, 4))
+
+        plt.plot(plain_results, marker='o', color=PLAIN_COLOR)
+        plt.plot(inc_results, marker='o', color=INCREMENTAL_COLOR)
+        plt.plot(commit_results, marker='o', color=COMMIT_COLOR)
+        # plt.ylim(0, 110)
+
+        plt.ylabel('Total Execution Time (s)')
+        plt.xlabel('Amount of Exceptions')
+        labels = ['0', '0.1', '0.2', '0.3', '0.4', '0.5', '0.6', '0.7', '0.8', '0.9', '1.0']
+        x = np.arange(len(labels))
+        plt.xticks(x, labels)
+
+        plt.title('In Order | Synthetic')
+        plt.legend(handles=[
+            mpatches.Patch(color=PLAIN_COLOR, label='Plain'),
+            mpatches.Patch(color=INCREMENTAL_COLOR, label='Incremental'),
+            mpatches.Patch(color=COMMIT_COLOR, label='Commit')
+        ], loc='lower right')
+
+        fig.savefig(os.path.join(self.save_path, 'in-order-synth.png'), dpi=400, bbox_inches='tight')
+
+    def graph_out_of_order(self):
+        plain_results = self.get_results(True, 'plain')
+        inc_results = self.get_results(True, 'incremental')
+
+        fig = plt.figure(figsize=(6, 4))
+
+        plt.plot(plain_results, marker='o', color=PLAIN_COLOR)
+        plt.plot(inc_results, marker='o', color=INCREMENTAL_COLOR)
+        # plt.ylim(0, 110)
+
+        plt.ylabel('Total Execution Time (s)')
+        plt.xlabel('Amount of Exceptions')
+        labels = ['0', '0.1', '0.2', '0.3', '0.4', '0.5', '0.6', '0.7', '0.8', '0.9', '1.0']
+        x = np.arange(len(labels))
+        plt.xticks(x, labels)
+
+        plt.title('Out of Order | Synthetic')
+        plt.legend(handles=[
+            mpatches.Patch(color=PLAIN_COLOR, label='Plain'),
+            mpatches.Patch(color=INCREMENTAL_COLOR, label='Incremental'),
+        ], loc='upper right')
+
+        fig.savefig(os.path.join(self.save_path, 'out-of-order-synth.png'), dpi=400, bbox_inches='tight')
+
+    def get_path(self, out_of_order, mode, step, trial):
+        filename = f"{mode}-{'out-of-order' if out_of_order else 'in-order'}-e{step}-t{trial}.txt"
+        return os.path.join(self.results_path, filename)
+
+    def get_results(self, out_of_order, mode):
+        results = []
+        for step in range(self.num_steps):
+            step_results = []
+            for trial in range(self.num_trials):
+                path = self.get_path(out_of_order, mode, step, trial + 1)
+                step_results.append(self.get_metric(path))
+            results.append(sum(step_results) / len(step_results))
+        return np.array(results)
+
+    def get_metric(self, path):
+        with open(path, 'r') as fp:
+            lines = fp.read().splitlines()
+            ind = lines.index("EXPERIMENTAL RESULTS") + 1
+            line = lines[ind]
+            metrics = json.loads(line)
+            return metrics['totalRunTime']
+
+def main():
+    parser = argparse.ArgumentParser(description='Graph results of synthetic experiment')
+    parser.add_argument('--results-path', type=str, dest='results_path', default='results_synthetic')
+    parser.add_argument('--num-trials', type=int, dest='num_trials', default=1)
+    parser.add_argument('--num-steps', type=int, dest='num_steps', default=11)
+    parser.add_argument('--save-path', type=str, dest='save_path', default='graphs-synthetic')
+    args = parser.parse_args()
+
+    results_path = args.results_path
+    num_trials = args.num_trials
+    num_steps = args.num_steps
+    save_path = args.save_path
+
+    if not os.path.isdir(save_path):
+        os.makedirs(save_path)
+    assert os.path.isdir(results_path)
+
+    params = {'font.family': 'Times',
+              'legend.fontsize': 'medium',
+              'axes.labelsize': 'medium',
+              'axes.titlesize': 'medium'}
+    plt.rcParams.update(params)
+
+    e = Experiment(results_path, num_trials, num_steps, save_path)
+    e.graph_out_of_order()
+    e.graph_in_order()
+
+if __name__ == '__main__':
+    main()
\ No newline at end of file
diff --git a/benchmarks/incremental/graph.py b/benchmarks/incremental/graph.py
new file mode 100644
index 000000000..13ddaefa5
--- /dev/null
+++ b/benchmarks/incremental/graph.py
@@ -0,0 +1,279 @@
+import matplotlib.pyplot as plt
+import numpy as np
+import matplotlib.patches as mpatches
+from brokenaxes import brokenaxes
+from enum import Enum
+import argparse
+import os
+import json
+
+PLAIN_COLOR = "#4285F4"
+INCREMENTAL_COLOR = '#DB4437'
+COMMIT_COLOR = "#F4B400"
+
+class Mode(Enum):
+    OUT_OF_ORDER = 1
+    IN_ORDER = 2
+    COMMIT = 3
+
+def in_order_total(save_path, plain_times, incremental_times, commit_times):
+    width = 0.7
+    separator = 0.02
+
+    # labels = ['No\nResolvers', 'Bedroom\nResolve', 'Bedroom\nIgnore', 'Bathroom\nResolve', 'Bathroom\nIgnore', 'Price\nResolve', 'Price\nIgnore']
+    labels = ['0', '1', '2', '3', '4', '5', '6']
+    x = np.arange(len(labels))
+
+    fig, (ax1, ax2) = plt.subplots(2, 1, sharex=True, figsize=(6, 4), gridspec_kw={'height_ratios': [1, 4]})
+    fig.subplots_adjust(hspace=0.05)
+
+    ax1.bar(x - width/3 - separator, plain_times, width/3, color=PLAIN_COLOR)
+    ax1.bar(x, incremental_times, width/3, color=INCREMENTAL_COLOR)
+    ax1.bar(x + width/3 + separator, commit_times, width/3, color=COMMIT_COLOR)
+
+    ax2.bar(x - width/3 - separator, plain_times, width/3, color=PLAIN_COLOR)
+    ax2.bar(x, incremental_times, width/3, color=INCREMENTAL_COLOR)
+    ax2.bar(x + width/3 + separator, commit_times, width/3, color=COMMIT_COLOR)
+
+    ax1.set_ylim(164.0, 200.0)
+    ax2.set_ylim(0.0, 38.0)
+
+    ax1.spines.bottom.set_visible(False)
+    ax2.spines.top.set_visible(False)
+    ax1.xaxis.tick_top()
+    ax1.tick_params(labeltop=False)
+    ax2.xaxis.tick_bottom()
+    d = 0.5
+    kwargs = dict(marker=[(-1, -d), (1, d)], markersize=12,
+                  linestyle="none", color='k', mec='k', mew=1, clip_on=False)
+    ax1.plot([0, 1], [0, 0], transform=ax1.transAxes, **kwargs)
+    ax2.plot([0, 1], [1, 1], transform=ax2.transAxes, **kwargs)
+
+    ax1.set_title('In Order')
+    plt.ylabel('Execution Time (s)')
+    plt.xlabel('Exception Resolution Step')
+    fig.legend(handles=[
+        mpatches.Patch(color=PLAIN_COLOR, label='Plain'),
+        mpatches.Patch(color=INCREMENTAL_COLOR, label='Incremental'),
+        mpatches.Patch(color=COMMIT_COLOR, label='Commit')
+    ], loc=(0.727, 0.748))
+
+    fig.savefig(os.path.join(save_path, 'in-order-total.png'), dpi=400, bbox_inches='tight')
+
+def out_of_order_total(save_path, plain_times, incremental_times):
+    width = 0.35
+    separator = 0.02
+
+    labels = ['0', '1', '2', '3', '4', '5', '6']
+    x = np.arange(len(labels))
+
+    # Use 6x4 for size, use latex text from paper script
+    fig, (ax1, ax2) = plt.subplots(2, 1, sharex=True, figsize=(6, 4), gridspec_kw={'height_ratios': [1, 5]})
+    fig.subplots_adjust(hspace=0.05)
+
+    ax1.bar(x - width/2 - separator, plain_times, width + separator, color=PLAIN_COLOR)
+    ax1.bar(x + width/2 + separator, incremental_times, width + separator, color=INCREMENTAL_COLOR)
+
+    ax2.bar(x - width/2 - separator, plain_times, width + separator, color=PLAIN_COLOR)
+    ax2.bar(x + width/2 + separator, incremental_times, width + separator, color=INCREMENTAL_COLOR)
+
+    ax1.set_ylim(184.0, 200.0)
+    ax2.set_ylim(0.0, 38.0)
+
+    ax1.set_title('Out of Order')
+    plt.xlabel('Exception Resolution Step')
+    plt.ylabel('Execution Time (s)')
+    ax1.legend(handles=[
+        mpatches.Patch(color=PLAIN_COLOR, label='Plain'),
+        mpatches.Patch(color=INCREMENTAL_COLOR, label='Incremental')
+    ], loc='upper right')
+
+    ax1.spines.bottom.set_visible(False)
+    ax2.spines.top.set_visible(False)
+    ax1.xaxis.tick_top()
+    ax1.tick_params(labeltop=False)
+    ax2.xaxis.tick_bottom()
+    d = 0.5
+    kwargs = dict(marker=[(-1, -d), (1, d)], markersize=12,
+                  linestyle="none", color='k', mec='k', mew=1, clip_on=False)
+    ax1.plot([0, 1], [0, 0], transform=ax1.transAxes, **kwargs)
+    ax2.plot([0, 1], [1, 1], transform=ax2.transAxes, **kwargs)
+
+    fig.savefig(os.path.join(save_path, 'out-of-order-total.png'), dpi=400, bbox_inches='tight')
+
+def time_breakdown(save_path, title, save_name, fast_path, slow_path, write):
+    width = 0.6
+
+    # labels = ['No\nResolvers', 'Bedroom\nResolve', 'Bedroom\nIgnore', 'Bathroom\nResolve', 'Bathroom\nIgnore', 'Price\nResolve', 'Price\nIgnore']
+    labels = ['0', '1', '2', '3', '4', '5', '6']
+    x = np.arange(len(labels))
+
+    fig, (ax1, ax2) = plt.subplots(2, 1, sharex=True, figsize=(6, 4), gridspec_kw={'height_ratios': [1, 5]})
+    fig.subplots_adjust(hspace=0.05)
+
+
+    ax1.bar(x, fast_path, width, color=PLAIN_COLOR)
+    ax1.bar(x, slow_path, width, bottom=fast_path, color=INCREMENTAL_COLOR)
+    ax1.bar(x, write, width, bottom=fast_path + slow_path, color=COMMIT_COLOR)
+
+    ax2.bar(x, fast_path, width, color=PLAIN_COLOR)
+    ax2.bar(x, slow_path, width, bottom=fast_path, color=INCREMENTAL_COLOR)
+    ax2.bar(x, write, width, bottom=fast_path + slow_path, color=COMMIT_COLOR)
+
+    ax1.set_ylim(184.0, 200.0)
+    ax2.set_ylim(0.0, 38.0)
+
+    ax1.spines.bottom.set_visible(False)
+    ax2.spines.top.set_visible(False)
+    ax1.xaxis.tick_top()
+    ax1.tick_params(labeltop=False)
+    ax2.xaxis.tick_bottom()
+    d = 0.5
+    kwargs = dict(marker=[(-1, -d), (1, d)], markersize=12,
+                  linestyle="none", color='k', mec='k', mew=1, clip_on=False)
+    ax1.plot([0, 1], [0, 0], transform=ax1.transAxes, **kwargs)
+    ax2.plot([0, 1], [1, 1], transform=ax2.transAxes, **kwargs)
+
+    ax1.set_title(title)
+    plt.ylabel('Execution Time (s)')
+    plt.xlabel('Exception Resolution Step')
+
+    fig.legend(handles=[
+        mpatches.Patch(color=PLAIN_COLOR, label='Fast Path'),
+        mpatches.Patch(color=INCREMENTAL_COLOR, label='Slow Path'),
+        mpatches.Patch(color=COMMIT_COLOR, label='Write')
+    ], loc=(0.745, 0.748))
+
+    fig.savefig(os.path.join(save_path, save_name), dpi=400, bbox_inches='tight')
+
+# def out_of_order_total(save_path, plain_times, incremental_times):
+#     width = 0.35
+#     separator = 0.02
+#
+#     # labels = ['No\nResolvers', 'Bedroom\nResolve', 'Bedroom\nIgnore', 'Bathroom\nResolve', 'Bathroom\nIgnore', 'Price\nResolve', 'Price\nIgnore']
+#     labels = ['0', '1', '2', '3', '4', '5', '6']
+#     x = np.arange(len(labels))
+#
+#     fig = plt.figure(figsize=(10, 6))
+#
+#     plt.bar(x - width/2 - separator, plain_times, width + separator, color=PLAIN_COLOR)
+#     plt.bar(x + width/2 + separator, incremental_times, width + separator, color=INCREMENTAL_COLOR)
+#
+#     plt.title('Out of Order')
+#     plt.xticks(x, labels)
+#     plt.ylabel('Execution Time (s)')
+#     plt.xlabel('Exception Resolution Step')
+#     plt.legend(handles=[
+#         mpatches.Patch(color=PLAIN_COLOR, label='Plain'),
+#         mpatches.Patch(color=INCREMENTAL_COLOR, label='Incremental')
+#     ], loc='upper right')
+#
+#     fig.savefig(os.path.join(save_path, 'out-of-order-total.png'), dpi=400)
+
+def validate_experiment(compare_path):
+    with open(compare_path) as f:
+        lines = f.read().splitlines()
+        return ">>> contents of folders match." in lines
+
+def get_metric(path, metric, step):
+    with open(path) as f:
+        lines = f.read().splitlines()
+        ind = lines.index("EXPERIMENTAL RESULTS") + 2
+        line = lines[ind + step]
+        metrics = json.loads(line)
+        if metric == 'jobTime':
+            return metrics[metric]
+        else:
+            return metrics["stages"][0][metric]
+
+def compare_path(trial, mode):
+    return "tuplex-compare-{}{}-ssd-{}.txt".format('out-of-order' if mode == Mode.OUT_OF_ORDER else 'in-order',
+                                                   '-commit' if mode == Mode.COMMIT else '',
+                                                   trial)
+
+
+def experiment_path(trial, incremental, mode):
+    return "tuplex-{}-{}{}-ssd-{}.txt".format('incremental' if incremental else 'plain',
+                                              'out-of-order' if mode == Mode.OUT_OF_ORDER else 'in-order',
+                                              '-commit' if mode == Mode.COMMIT else '',
+                                              trial)
+
+def get_average_times(results_path, metric, num_trials, num_steps, incremental, mode):
+    times = []
+    for i in range(num_steps):
+        total = 0
+        for j in range(num_trials):
+            total += get_metric(os.path.join(results_path, experiment_path(j + 1, incremental, mode)), metric, i)
+        total /= num_trials
+        times.append(total)
+    return np.array(times)
+
+def main():
+    parser = argparse.ArgumentParser(description='Parse results of experiment')
+    parser.add_argument('--results-path', type=str, dest='results_path', default='results_dirty_zillow@10G')
+    parser.add_argument('--num-trials', type=int, dest='num_trials', default=1)
+    parser.add_argument('--num-steps', type=int, dest='num_steps', default=7)
+    parser.add_argument('--save-path', type=str, dest='save_path', default='graphs')
+    args = parser.parse_args()
+
+    results_path = args.results_path
+    num_trials = args.num_trials
+    num_steps = args.num_steps
+    save_path = args.save_path
+
+    if not os.path.isdir(save_path):
+        os.makedirs(save_path)
+    assert os.path.isdir(results_path)
+
+    params = {'font.family': 'Times',
+              'legend.fontsize': 'medium',
+              'axes.labelsize': 'medium',
+              'axes.titlesize': 'medium'}
+    plt.rcParams.update(params)
+
+    # for i in range(num_trials):
+    #     for mode in Mode:
+    #         validate_path = os.path.join(results_path, compare_path(i + 1, mode))
+    #         assert validate_experiment(validate_path)
+
+
+    # Total Times
+    plain_times = get_average_times(results_path, 'jobTime', num_trials, num_steps, False, Mode.OUT_OF_ORDER)
+    inc_times = get_average_times(results_path, 'jobTime', num_trials, num_steps, True, Mode.OUT_OF_ORDER)
+    out_of_order_total(save_path, plain_times, inc_times)
+
+    plain_times = get_average_times(results_path, 'jobTime', num_trials, num_steps, False, Mode.IN_ORDER)
+    inc_times = get_average_times(results_path, 'jobTime', num_trials, num_steps, True, Mode.IN_ORDER)
+    commit_times = get_average_times(results_path, 'jobTime', num_trials, num_steps, True, Mode.COMMIT)
+    in_order_total(save_path, plain_times, inc_times, commit_times)
+
+    # Time Break Down
+    plain_fast = get_average_times(results_path, 'fast_path_time_s', num_trials, num_steps, False, Mode.OUT_OF_ORDER)
+    plain_slow = get_average_times(results_path, 'slow_path_time_s', num_trials, num_steps, False, Mode.OUT_OF_ORDER)
+    plain_write = get_average_times(results_path, 'write_output_wall_time_s', num_trials, num_steps, False, Mode.OUT_OF_ORDER)
+    time_breakdown(save_path, 'Out of Order | Plain', 'out-of-order-plain-breakdown.png', plain_fast, plain_slow, plain_write)
+
+    inc_fast = get_average_times(results_path, 'fast_path_time_s', num_trials, num_steps, True, Mode.OUT_OF_ORDER)
+    inc_slow = get_average_times(results_path, 'slow_path_time_s', num_trials, num_steps, True, Mode.OUT_OF_ORDER)
+    inc_write = get_average_times(results_path, 'write_output_wall_time_s', num_trials, num_steps, True, Mode.OUT_OF_ORDER)
+    time_breakdown(save_path, 'Out of Order | Incremental', 'out-of-order-incremental-breakdown.png', inc_fast, inc_slow, inc_write)
+
+    plain_fast = get_average_times(results_path, 'fast_path_time_s', num_trials, num_steps, False, Mode.IN_ORDER)
+    plain_slow = get_average_times(results_path, 'slow_path_time_s', num_trials, num_steps, False, Mode.IN_ORDER)
+    plain_write = get_average_times(results_path, 'write_output_wall_time_s', num_trials, num_steps, False, Mode.IN_ORDER)
+    time_breakdown(save_path, 'In Order | Plain', 'in-order-plain-breakdown.png', plain_fast, plain_slow, plain_write)
+
+    inc_fast = get_average_times(results_path, 'fast_path_time_s', num_trials, num_steps, True, Mode.IN_ORDER)
+    inc_slow = get_average_times(results_path, 'slow_path_time_s', num_trials, num_steps, True, Mode.IN_ORDER)
+    inc_write = get_average_times(results_path, 'write_output_wall_time_s', num_trials, num_steps, True, Mode.IN_ORDER)
+    time_breakdown(save_path, 'In Order | Incremental', 'in-order-incremental-breakdown.png', inc_fast, inc_slow, inc_write)
+
+    commit_fast = get_average_times(results_path, 'fast_path_time_s', num_trials, num_steps, True, Mode.COMMIT)
+    commit_slow = get_average_times(results_path, 'slow_path_time_s', num_trials, num_steps, True, Mode.COMMIT)
+    commit_write = get_average_times(results_path, 'write_output_wall_time_s', num_trials, num_steps, True, Mode.COMMIT)
+    time_breakdown(save_path, 'In Order | Commit', 'in-order-commit-breakdown.png', commit_fast, commit_slow, commit_write)
+
+
+
+if __name__ == '__main__':
+    main()
\ No newline at end of file
diff --git a/benchmarks/incremental/piechart.py b/benchmarks/incremental/piechart.py
new file mode 100644
index 000000000..ee9fafc5d
--- /dev/null
+++ b/benchmarks/incremental/piechart.py
@@ -0,0 +1,123 @@
+import matplotlib.pyplot as plt
+from matplotlib import gridspec
+import math
+import matplotlib.patches as mpatches
+import numpy as np
+
+class Graph:
+    def __init__(self, data):
+        self.data = data
+        self.num_cols = len(data[0])
+        self.num_rows = len(data)
+        self.num_grid_cols = 1000
+
+    def plot(self):
+        fig = plt.figure(figsize=(6, 4))
+        for i in range(self.num_rows):
+            row = self.data[i]
+            cur_col = 0
+            for j in range(self.num_cols):
+                col = row[j]
+                colspan = self.get_num_cols(i, j)
+                print(colspan)
+                ax = plt.subplot2grid((self.num_rows, self.num_grid_cols), (i, cur_col), colspan=colspan)
+                cur_col += colspan
+                ax.pie(col.get_data())
+        plt.show()
+
+    def get_num_cols(self, row, col):
+        total = 0
+        for step in self.data[row]:
+            total += sum(step.get_data())
+        val = int((sum(self.data[row][col].get_data()) / total) * self.num_grid_cols)
+        if val == 0:
+            return val + 1
+        else:
+            return val
+
+class Step:
+    def __init__(self, fast_path, slow_path, write):
+        self.fast_path = fast_path
+        self.slow_path = slow_path
+        self.write = write
+
+    def get_data(self):
+        return [self.fast_path, self.slow_path, self.write]
+
+    def get_ratio(self):
+        return math.sqrt(sum(self.get_data()))
+
+def main():
+    # data = [
+    #     [Step(190.9079, 0, 2.381094), Step(17.8558, 6.106002, 6.650906), Step(15.79836, 2.799532, 6.771469), Step(16.50858, 2.214784, 6.747705), Step(15.70112, 1.991375, 6.759192), Step(15.06146, 0.7536615, 6.796239), Step(14.98719, 0.8343657, 6.780956)],
+    #     [Step(191.0442, 0, 2.409159), Step(0, 9.969823, 0.06659347), Step(0, 3.194658, 0.001719711), Step(0, 2.255742, 0.00131141), Step(0, 2.273456, 0.001078326), Step(0, 1.590703, 0.0008960344), Step(0, 0.1210075, 0.0004500517)]
+    # ]
+
+    data = [Step(190.9079, 0, 2.381094), Step(17.8558, 6.106002, 6.650906), Step(15.79836, 2.799532, 6.771469), Step(16.50858, 2.214784, 6.747705), Step(15.70112, 1.991375, 6.759192), Step(15.06146, 0.7536615, 6.796239), Step(14.98719, 0.8343657, 6.780956)]
+    # data = [Step(191.0442, 0, 2.409159), Step(0, 9.969823, 0.06659347), Step(0, 3.194658, 0.001719711), Step(0, 2.255742, 0.00131141), Step(0, 2.273456, 0.001078326), Step(0, 1.590703, 0.0008960344), Step(0, 0.1210075, 0.0004500517)]
+    # data = [Step(0, 9.969823, 0.06659347), Step(0, 3.194658, 0.001719711), Step(0, 2.255742, 0.00131141), Step(0, 2.273456, 0.001078326), Step(0, 1.590703, 0.0008960344), Step(0, 0.1210075, 0.0004500517)]
+
+    # graph = Graph(data)
+    # graph.plot()
+    # for i in range(num_rows):
+    #     row = data[i]
+    #     for j in range(num_cols):
+    #         col = row[j]
+    #         ax = plt.subplot2grid((num_rows, num_cols), (i, j))
+    #         ax.pie(col.data())
+    #         # ax.axis('equal')
+    #
+    # plt.show()
+
+    fig, axes = plt.subplots(1, len(data), figsize=(6, 4), sharex=True, gridspec_kw={'width_ratios': [d.get_ratio() for d in data]})
+    for i, step in enumerate(data):
+        axes[i].pie(step.get_data())
+
+    plt.suptitle('Time breakdown')
+    labels = ['0', '1', '2', '3', '4', '5', '6']
+    plt.xticks(np.arange(len(labels)), labels)
+    # plt.legend(handles=[
+    #     # mpatches.Patch(label='Fast Path'),
+    #     # mpatches.Patch(label='Slow Path'),
+    #     # mpatches.Patch(label='Write')
+    # ])
+
+    plt.show()
+
+    # p1 = Step(190.9079, 0, 2.381094)
+    # p2 = Step(17.8558, 6.106002, 6.650906)
+
+    # gs1 = gridspec.GridSpec(1, 2, width_ratios=[p1.ratio(), p2.ratio()])
+
+    # i1 = Step(191.0442, 0, 2.409159)
+    # i2 = Step(0, 9.969823, 0.09659347)
+
+    # gs2 = gridspec.GridSpec(1, 2, width_ratios=[i1.ratio(), i2.ratio()])
+
+    # fig = plt.figure(figsize=(6, 4))
+    # fig, axes = plt.subplots(1, 2, figsize=(6, 4), gridspec_kw={'width_ratios': [p1.ratio(), p2.ratio()]})
+    # ax1 = plt.subplot2grid((2, 100), (0, 0), colspan=86, rowspan=1)
+    # ax2 = plt.subplot2grid((2, 100), (0, 86), colspan=14, rowspan=1)
+    # ax3 = plt.subplot2grid((2, 100), (1, 0), colspan=95, rowspan=1)
+    # ax4 = plt.subplot2grid((2, 100), (1, 95), colspan=5, rowspan=1)
+
+    # axes[0].pie([p1.fast_path, p1.slow_path, p1.write])
+    # axes[1].pie([p2.fast_path, p2.slow_path, p2.write])
+
+    # axes.pie([i1.fast_path, i1.slow_path, i1.write])
+    # ax4.pie([i2.fast_path, i2.slow_path, i2.write])
+
+    # plt.tight_layout()
+
+
+# fig, axes = plt.subplots(rows, cols)
+    # fig.suptitle('Out of Order Time Breakdown')
+    #
+    # for row in range(rows):
+    #     for col in range(cols):
+    #         axes[row, col].pie([10, 20, 70])
+
+    # plt.show()
+
+if __name__ == '__main__':
+    main()
\ No newline at end of file
diff --git a/benchmarks/incremental/replicate-data.py b/benchmarks/incremental/replicate-data.py
new file mode 100755
index 000000000..705f2e8ce
--- /dev/null
+++ b/benchmarks/incremental/replicate-data.py
@@ -0,0 +1,36 @@
+#!/usr/bin/env python3
+
+import argparse
+import os
+from tqdm import tqdm
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser(description='Zillow cleaning')
+    parser.add_argument('-i', '--in', type=str, dest='input_path', default='data/zillow_dirty.csv',
+                        help='path or pattern to zillow data')
+    parser.add_argument('-o', '--output-path', type=str, dest='output_path', default='data/zillow_dirty@10G.csv',
+                        help='specify path where to save output data files')
+    parser.add_argument('-s', '--scale-factor', type=int, dest='scale_factor', default=1460, help='how many times to replicate file')
+    parser.add_argument('--include-header', action='store_true', dest='include_header', help='whether to explicitly include the first line or not when replicating')
+    args = parser.parse_args()
+
+    assert args.input_path, 'need to set input data path!'
+    assert args.output_path, 'need to set output data path!'
+
+    args.scale_factor = int(max(1, args.scale_factor)) # no fractional support yet
+
+    print('>>> reading input file')
+    with open(args.input_path, 'r') as fp:
+        lines = fp.readlines()
+
+    print('>>> replicating data {}x'.format(args.scale_factor))
+    with open(args.output_path, 'w') as fp:
+        # write lines as is
+        fp.writelines(lines)
+
+        if not args.include_header:
+            lines = lines[1:]
+        for n in tqdm(range(args.scale_factor)):
+            fp.writelines(lines)
+
+    print('done.')
\ No newline at end of file
diff --git a/benchmarks/incremental/runsynthetic.py b/benchmarks/incremental/runsynthetic.py
new file mode 100644
index 000000000..9ed1fa2f6
--- /dev/null
+++ b/benchmarks/incremental/runsynthetic.py
@@ -0,0 +1,131 @@
+#!/usr/bin/env python3
+# (c) L.Spiegelberg 2021
+# conduct dirty Zillow data experiment as described in README.md
+
+import tuplex
+import time
+import sys
+import json
+import os
+import glob
+import argparse
+import math
+import re
+import shutil
+import subprocess
+import random
+
+def synthetic_pipeline(ctx, path, output_path, resolve, commit):
+    ds = ctx.csv(path, header=True)
+    ds = ds.withColumn("c", lambda x: 1 // x["a"] if x["a"] == 0 else x["a"])
+    if resolve:
+        ds = ds.resolve(ZeroDivisionError, lambda x: x["a"])
+    ds.tocsv(output_path, commit=commit)
+    return ctx.metrics
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser(description='Incremental resolution')
+    parser.add_argument('--input-path', type=str, dest='data_path', default='synth0.csv', help='path or pattern to synthetic data')
+    parser.add_argument('--output-path', type=str, dest='output_path', default='synthetic-output/', help='specify path where to save output data files')
+    parser.add_argument('--use-resolve-step', dest='use_resolve_step', action='store_true')
+    parser.add_argument('--incremental-resolution', dest='incremental_resolution', action="store_true", help="whether to use incremental resolution")
+    parser.add_argument('--commit-mode', dest='commit_mode', action='store_true', help='whether to use commit mode')
+    parser.add_argument('--resolve-in-order', dest='resolve_in_order', action="store_true", help="whether to resolve exceptions in order")
+    parser.add_argument('--clear-cache', dest='clear_cache', action='store_true', help='whether to clear the cache or not')
+    args = parser.parse_args()
+
+    assert args.data_path, 'need to set data path!'
+
+    # config vars
+    path = args.data_path
+    output_path = args.output_path
+
+    if not path:
+        print('found no zillow data to process, abort.')
+        sys.exit(1)
+
+    print('>>> running {} on {}'.format('tuplex', path))
+
+    # load data
+    tstart = time.time()
+
+    # configuration, make sure to give enough runtime memory to the executors!
+    conf = {"webui.enable" : False,
+            "executorCount" : 16,
+            "executorMemory" : "6G",
+            "driverMemory" : "6G",
+            "partitionSize" : "32MB",
+            "runTimeMemory" : "128MB",
+            "useLLVMOptimizer" : True,
+            "optimizer.nullValueOptimization" : False,
+            "csv.selectionPushdown" : True,
+            "optimizer.generateParser" : False} # bug when using generated parser. Need to fix that.
+
+    if os.path.exists('tuplex_config.json'):
+        with open('tuplex_config.json') as fp:
+            conf = json.load(fp)
+
+    if args.incremental_resolution:
+        conf["optimizer.incrementalResolution"] = True
+    else:
+        conf["optimizer.incrementalResolution"] = False
+
+    if args.resolve_in_order:
+        conf['optimizer.mergeExceptionsInOrder'] = True
+    else:
+        conf['optimizer.mergeExceptionsInOrder'] = False
+
+    # Note: there's a bug in the merge in order mode here -.-
+    # force to false version
+    conf["optimizer.generateParser"] = False
+    conf["tuplex.optimizer.sharedObjectPropagation"] = False
+    conf["resolveWithInterpreterOnly"] = False
+
+    tstart = time.time()
+    import tuplex
+    ctx = tuplex.Context(conf)
+
+    print(json.dumps(ctx.options()))
+
+    startup_time = time.time() - tstart
+    print('Tuplex startup time: {}'.format(startup_time))
+
+    shutil.rmtree(output_path, ignore_errors=True)
+
+    if args.clear_cache:
+        subprocess.run(["clearcache"])
+
+    tstart = time.time()
+    metrics = []
+
+    use_resolve_step = args.use_resolve_step
+
+    if use_resolve_step:
+        jobstart = time.time()
+        m = synthetic_pipeline(ctx, path, output_path, False, not args.commit_mode)
+        m = m.as_dict()
+        m["jobTime"] = time.time() - jobstart
+        metrics.append(m)
+
+        jobstart = time.time()
+        m = synthetic_pipeline(ctx, path, output_path, True, True)
+        m = m.as_dict()
+        m["jobTime"] = time.time() - jobstart
+        metrics.append(m)
+    else:
+        jobstart = time.time()
+        m = synthetic_pipeline(ctx, path, output_path, False, True)
+        m = m.as_dict()
+        m['jobTime'] = time.time() - jobstart
+        metrics.append(m)
+
+    runtime = time.time() - tstart
+
+    print("EXPERIMENTAL RESULTS")
+    print(json.dumps({"startupTime": startup_time,
+                      "totalRunTime": runtime,
+                      "mergeExceptionsInOrder": conf["optimizer.mergeExceptionsInOrder"],
+                      "incrementalResolution": conf["optimizer.incrementalResolution"]}))
+
+    for metric in metrics:
+        print(json.dumps(metric))
diff --git a/benchmarks/incremental/runtuplex.py b/benchmarks/incremental/runtuplex.py
new file mode 100644
index 000000000..5e94fe67c
--- /dev/null
+++ b/benchmarks/incremental/runtuplex.py
@@ -0,0 +1,294 @@
+#!/usr/bin/env python3
+# (c) L.Spiegelberg 2021
+# conduct dirty Zillow data experiment as described in README.md
+
+import tuplex
+import time
+import sys
+import json
+import os
+import glob
+import argparse
+import math
+import re
+import shutil
+import subprocess
+import random
+
+# UDFs for pipeline
+def extractBd(x):
+    val = x['facts and features']
+    max_idx = val.find(' bd')
+    if max_idx < 0:
+        max_idx = len(val)
+    s = val[:max_idx]
+
+    # find comma before
+    split_idx = s.rfind(',')
+    if split_idx < 0:
+        split_idx = 0
+    else:
+        split_idx += 2
+    r = s[split_idx:]
+    return int(r)
+
+def extractBa(x):
+    val = x['facts and features']
+    max_idx = val.find(' ba')
+    if max_idx < 0:
+        max_idx = len(val)
+    s = val[:max_idx]
+
+    # find comma before
+    split_idx = s.rfind(',')
+    if split_idx < 0:
+        split_idx = 0
+    else:
+        split_idx += 2
+    r = s[split_idx:]
+    ba = math.ceil(2.0 * float(r)) / 2.0
+    return ba
+
+def extractSqft(x):
+    val = x['facts and features']
+    max_idx = val.find(' sqft')
+    if max_idx < 0:
+        max_idx = len(val)
+    s = val[:max_idx]
+
+    split_idx = s.rfind('ba ,')
+    if split_idx < 0:
+        split_idx = 0
+    else:
+        split_idx += 5
+    r = s[split_idx:]
+    r = r.replace(',', '')
+    return int(r)
+
+def extractOffer(x):
+    offer = x['title'].lower()
+    if 'sale' in offer:
+        return 'sale'
+    if 'rent' in offer:
+        return 'rent'
+    if 'sold' in offer:
+        return 'sold'
+    if 'foreclose' in offer.lower():
+        return 'foreclosed'
+    return offer
+
+def extractType(x):
+    t = x['title'].lower()
+    type = 'unknown'
+    if 'condo' in t or 'apartment' in t:
+        type = 'condo'
+    if 'house' in t:
+        type = 'house'
+    return type
+
+def extractPrice(x):
+    price = x['price']
+    p = 0
+    if x['offer'] == 'sold':
+        # price is to be calculated using price/sqft * sqft
+        val = x['facts and features']
+        s = val[val.find('Price/sqft:') + len('Price/sqft:') + 1:]
+        r = s[s.find('$')+1:s.find(', ') - 1]
+        price_per_sqft = int(r)
+        p = price_per_sqft * x['sqft']
+    elif x['offer'] == 'rent':
+        max_idx = price.rfind('/')
+        p = int(price[1:max_idx].replace(',', ''))
+    else:
+        # take price from price column
+        p = int(price[1:].replace(',', ''))
+
+    return p
+
+def resolveBd(x):
+    if 'Studio' in x['facts and features']:
+        return 1
+    raise ValueError
+
+#compare types and contents
+def dirty_zillow_pipeline(ctx, path, output_path, step, commit):
+
+    # Increases write times to highlight differences
+
+    # ds = ctx.csv(path)
+    #
+    # ds = ds.withColumn('bedrooms', extractBd)
+    # if step > 0:
+    #     ds = ds.resolve(ValueError, resolveBd)
+    # if step > 1:
+    #     ds = ds.ignore(ValueError)
+    #
+    # ds = ds.withColumn('bathrooms', extractBa)
+    # if step > 2:
+    #     ds = ds.resolve(ValueError, resolveBa)
+    # if step > 3:
+    #     ds = ds.ignore(ValueError)
+    #
+    # ds = ds.withColumn('sqft', extractSqft)
+    # if step > 3:
+    #     ds = ds.ignore(ValueError)
+    #
+    # ds = ds.withColumn('offer', extractOffer)
+    # ds = ds.withColumn('price', extractPrice)
+    # if step > 4:
+    #     ds = ds.resolve(ValueError, lambda x: int(re.sub('[^0-9.]*', '', x['price'])))
+    # if step > 5:
+    #     ds = ds.ignore(TypeError)
+    #     ds = ds.ignore(ValueError)
+    # ds = ds.selectColumns(["address", "bedrooms", "bathrooms", "sqft", "price"])
+
+# Original pipeline, most realistic, taken from previous paper to run benchmark on
+
+    # ds = ds.withColumn("bedrooms", extractBd)
+    # if step > 0:
+    #     ds = ds.resolve(ValueError, resolveBd)
+    # if step > 1:
+    #     ds = ds.ignore(ValueError)
+    # ds = ds.withColumn("type", extractType)
+    # ds = ds.withColumn("zipcode", lambda x: '%05d' % int(x['postal_code']))
+    # if step > 2:
+    #     ds = ds.ignore(TypeError)
+    # ds = ds.mapColumn("city", lambda x: x[0].upper() + x[1:].lower())
+    # ds = ds.withColumn("bathrooms", extractBa)
+    # if step > 3:
+    #     ds = ds.ignore(ValueError)
+    # ds = ds.withColumn("sqft", extractSqft)
+    # if step > 4:
+    #     ds = ds.ignore(ValueError)
+    # ds = ds.withColumn("offer", extractOffer)
+    # ds = ds.withColumn("price", extractPrice)
+    # if step > 5:
+    #     ds = ds.resolve(ValueError, lambda x: int(re.sub('[^0-9.]*', '', x['price'])))
+    # ds = ds.filter(lambda x: 100000 < x['price'] < 2e7 and x['offer'] == 'sale')
+    # ds = ds.selectColumns(["url", "zipcode", "address", "city", "state",
+    #                         "bedrooms", "bathrooms", "sqft", "offer", "type", "price"])
+    # ds.tocsv(output_path, commit=commit)
+
+    ds = ctx.csv(path)
+    ds = ds.withColumn('bedrooms', extractBd)
+    if step > 0:
+        ds = ds.resolve(ValueError, resolveBd)
+    if step > 1:
+        ds = ds.ignore(ValueError)
+    ds = ds.filter(lambda x: x['bedrooms'] < 10)
+    ds = ds.withColumn('type', extractType)
+    ds = ds.filter(lambda x: x['type'] == 'condo')
+    ds = ds.withColumn('zipcode', lambda x: '%05d' % int(x['postal_code']))
+    if step > 2:
+        ds = ds.ignore(TypeError)
+    ds = ds.mapColumn("city", lambda x: x[0].upper() + x[1:].lower())
+    ds = ds.withColumn("bathrooms", extractBa)
+    if step > 3:
+        ds = ds.ignore(ValueError)
+    ds = ds.withColumn('sqft', extractSqft)
+    if step > 4:
+        ds = ds.ignore(ValueError)
+    ds = ds.withColumn('offer', extractOffer)
+    ds = ds.withColumn('price', extractPrice)
+    if step > 5:
+        ds = ds.resolve(ValueError, lambda x: int(re.sub('[^0-9.]*', '', x['price'])))
+    ds = ds.filter(lambda x: 100000 < x['price'] < 2e7 and x['offer'] == 'sale')
+    ds = ds.selectColumns(["url", "zipcode", "address", "city", "state",
+                           "bedrooms", "bathrooms", "sqft", "offer", "type", "price"])
+    ds.tocsv(output_path, commit=commit)
+    return ctx.metrics
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser(description='Incremental resolution')
+    parser.add_argument('--path', type=str, dest='data_path', default='/hot/scratch/bgivertz/data/zillow_dirty.csv', help='path or pattern to zillow data')
+    parser.add_argument('--output-path', type=str, dest='output_path', default='/hot/scratch/bgivertz/output/', help='specify path where to save output data files')
+    parser.add_argument('--resolve-in-order', dest='resolve_in_order', action="store_true", help="whether to resolve exceptions in order")
+    parser.add_argument('--num-steps', dest='num_steps', type=int, default=7)
+    parser.add_argument('--incremental-resolution', dest='incremental_resolution', action="store_true", help="whether to use incremental resolution")
+    parser.add_argument('--commit-mode', dest='commit_mode', action='store_true', help='whether to use commit mode')
+    parser.add_argument('--clear-cache', dest='clear_cache', action='store_true', help='whether to clear the cache or not')
+    args = parser.parse_args()
+
+    assert args.data_path, 'need to set data path!'
+
+    # config vars
+    path = args.data_path
+    output_path = args.output_path
+
+    if not path:
+        print('found no zillow data to process, abort.')
+        sys.exit(1)
+
+    print('>>> running {} on {}'.format('tuplex', path))
+
+    # load data
+    tstart = time.time()
+
+    # configuration, make sure to give enough runtime memory to the executors!
+    conf = {"webui.enable" : False,
+            "executorCount" : 16,
+            "executorMemory" : "6G",
+            "driverMemory" : "6G",
+            "partitionSize" : "32MB",
+            "runTimeMemory" : "128MB",
+            "useLLVMOptimizer" : True,
+            "optimizer.nullValueOptimization" : False,
+            "csv.selectionPushdown" : True,
+            "optimizer.generateParser" : False} # bug when using generated parser. Need to fix that.
+
+    if os.path.exists('tuplex_config.json'):
+        with open('tuplex_config.json') as fp:
+            conf = json.load(fp)
+
+    if args.incremental_resolution:
+        conf["optimizer.incrementalResolution"] = True
+    else:
+        conf["optimizer.incrementalResolution"] = False
+
+    if args.resolve_in_order:
+        conf['optimizer.mergeExceptionsInOrder'] = True
+    else:
+        conf['optimizer.mergeExceptionsInOrder'] = False
+
+    # Note: there's a bug in the merge in order mode here -.-
+    # force to false version
+    conf["optimizer.generateParser"] = False
+    conf["tuplex.optimizer.sharedObjectPropagation"] = False
+
+    tstart = time.time()
+    import tuplex
+    ctx = tuplex.Context(conf)
+
+    print(json.dumps(ctx.options()))
+
+    startup_time = time.time() - tstart
+    print('Tuplex startup time: {}'.format(startup_time))
+
+    shutil.rmtree(output_path, ignore_errors=True)
+
+    if args.clear_cache:
+        subprocess.run(["clearcache"])
+
+    tstart = time.time()
+    # decide which pipeline to run based on argparse arg!
+    num_steps = args.num_steps
+    metrics = []
+    for step in range(num_steps):
+        print(f'>>> running pipeline with {step} resolver(s) enabled...')
+        jobstart = time.time()
+        m = dirty_zillow_pipeline(ctx, path, output_path, step, not args.commit_mode or step == num_steps - 1)
+        m = m.as_dict()
+        m["numResolvers"] = step
+        m["jobTime"] = time.time() - jobstart
+        metrics.append(m)
+
+    runtime = time.time() - tstart
+
+    print("EXPERIMENTAL RESULTS")
+    print(json.dumps({"startupTime": startup_time,
+                      "totalRunTime": runtime,
+                      "mergeExceptionsInOrder": conf["optimizer.mergeExceptionsInOrder"],
+                      "incrementalResolution": conf["optimizer.incrementalResolution"]}))
+
+    for metric in metrics:
+        print(json.dumps(metric))
diff --git a/benchmarks/incremental/synthesize-data.py b/benchmarks/incremental/synthesize-data.py
new file mode 100644
index 000000000..7c50d329e
--- /dev/null
+++ b/benchmarks/incremental/synthesize-data.py
@@ -0,0 +1,49 @@
+import argparse
+import math
+import random
+from tqdm import tqdm
+
+def generate_data(num_rows, row_size, exceptions):
+    num_exceptions = int(num_rows * exceptions)
+    exps = set(random.sample(range(num_rows), num_exceptions))
+
+    data = []
+
+    padding = 'a' * (row_size - 3)
+    normal_row = '1,' + padding + "\n"
+    exp_row = '0,' + padding + "\n"
+    for i in range(num_rows):
+        if i in exps:
+            data.append(exp_row)
+        else:
+            data.append(normal_row)
+
+    return data
+
+def main():
+    parser = argparse.ArgumentParser(description='Synthesize data')
+    parser.add_argument('--row-size', type=int, dest='row_size', default=200, help='number of bytes per row')
+    parser.add_argument('--exceptions', type=float, dest='exceptions', default=0.25, help='amount of exception rows in dataset')
+    parser.add_argument('--dataset-size', type=int, dest='dataset_size', default=10, help='number of megabytes in dataset')
+    parser.add_argument('--output-path', type=str, dest='output_path', default='synthetic.csv', help='path to output the file')
+    args = parser.parse_args()
+
+    row_size = args.row_size
+    exceptions = args.exceptions
+    dataset_size = args.dataset_size * 1000000
+    output_path = args.output_path
+
+    num_rows = dataset_size // row_size
+    num_sample_rows = min(num_rows, 100000)
+
+    data = generate_data(num_sample_rows, row_size, exceptions)
+
+    with open(output_path, 'w') as fp:
+        header = "a,b\n"
+        fp.write(header)
+
+        for _ in tqdm(range(math.ceil(num_rows // num_sample_rows))):
+            fp.writelines(data)
+
+if __name__ == '__main__':
+    main()
\ No newline at end of file
diff --git a/tuplex/core/include/Context.h b/tuplex/core/include/Context.h
index b75409533..6f070d2ff 100644
--- a/tuplex/core/include/Context.h
+++ b/tuplex/core/include/Context.h
@@ -15,6 +15,7 @@
 #include <logical/LogicalOperator.h>
 #include <DataSet.h>
 #include "Partition.h"
+#include <PartitionGroup.h>
 #include "Row.h"
 #include "HistoryServerClasses.h"
 #include <initializer_list>
@@ -27,6 +28,7 @@
 #include <graphviz/GraphVizBuilder.h>
 #include <ee/IBackend.h>
 #include "JobMetrics.h"
+#include <IncrementalCache.h>
 
 
 namespace tuplex {
@@ -37,7 +39,8 @@ namespace tuplex {
     class Executor;
     class Partition;
     class IBackend;
-    class ExceptionInfo;
+    class PartitionGroup;
+    class IncrementalCache;
 
     class Context {
     private:
@@ -59,24 +62,7 @@ namespace tuplex {
 
         // needed because of C++ template issues
         void addPartition(DataSet* ds, Partition *partition);
-        void addParallelizeNode(DataSet *ds, const std::vector<std::tuple<size_t, PyObject*>> &badParallelizeObjects=std::vector<std::tuple<size_t, PyObject*>>(), const std::vector<size_t> &numExceptionsInPartition=std::vector<size_t>()); //! adds a paralellize node to the computation graph
-
-        /*!
-         * serialize python objects as pickled objects into exception partitions. Set the python objects map to
-         * map all normalPartitions to the exceptions that occured within them.
-         * @param pythonObjects normal case schema violations and their initial row numbers
-         * @param numExceptionsInPartition number of exceptions in each normal partition
-         * @param normalPartitions normal partitions created
-         * @param opID parallelize operator ID
-         * @param serializedPythonObjects output vector for partitions
-         * @param pythonObjectsMap output for mapping
-         */
-        void serializePythonObjects(const std::vector<std::tuple<size_t, PyObject*>>& pythonObjects,
-                                    const std::vector<size_t> &numExceptionsInPartition,
-                                    const std::vector<Partition*> &normalPartitions,
-                                    const int64_t opID,
-                                    std::vector<Partition*> &serializedPythonObjects,
-                                    std::unordered_map<std::string, ExceptionInfo> &pythonObjectsMap);
+        void addParallelizeNode(DataSet *ds, const std::vector<Partition*>& fallbackPartitions=std::vector<Partition*>{}, const std::vector<PartitionGroup>& partitionGroups=std::vector<PartitionGroup>{}); //! adds a paralellize node to the computation graph
 
         Partition* requestNewPartition(const Schema& schema, const int dataSetID, size_t minBytesRequired);
         uint8_t* partitionLockRaw(Partition *partition);
@@ -93,6 +79,8 @@ namespace tuplex {
 
         std::shared_ptr<JobMetrics> _lastJobMetrics;
 
+        std::shared_ptr<IncrementalCache> _incrementalCache;
+
         codegen::CompilePolicy _compilePolicy;
         codegen::CompilePolicy compilePolicyFromOptions(const ContextOptions& options);
 
@@ -264,6 +252,14 @@ namespace tuplex {
             return _lastJobMetrics;
         }
 
+        /*!
+         * gets an IncrementalCache object
+         * @return ptr to IncrementalCache object
+         */
+        std::shared_ptr<IncrementalCache> getIncrementalCache() const {
+            return _incrementalCache;
+        }
+
         /*!
          * construct a dataset using customly allocated partitions.
          * @param schema schema of the data within this dataset.
@@ -276,7 +272,7 @@ namespace tuplex {
          * @param numExceptionsInPartition number of schema violations that occured in each of the partitions
          * @return reference to newly created dataset.
          */
-        DataSet& fromPartitions(const Schema& schema, const std::vector<Partition*>& partitions, const std::vector<std::string>& columns, const std::vector<std::tuple<size_t, PyObject*>> &badParallelizeObjects, const std::vector<size_t> &numExceptionsInPartition);
+        DataSet& fromPartitions(const Schema& schema, const std::vector<Partition*>& partitions, const std::vector<Partition*>& fallbackPartitions, const std::vector<PartitionGroup>& partitionGroups, const std::vector<std::string>& columns);
     };
     // needed for template mechanism to work
 #include <DataSet.h>
diff --git a/tuplex/core/include/ContextOptions.h b/tuplex/core/include/ContextOptions.h
index 51912124f..c776b188c 100644
--- a/tuplex/core/include/ContextOptions.h
+++ b/tuplex/core/include/ContextOptions.h
@@ -53,6 +53,7 @@ namespace tuplex {
         bool OPT_FILTER_PUSHDOWN() const { return stringToBool(_store.at("tuplex.optimizer.filterPushdown")); }
         bool OPT_OPERATOR_REORDERING() const { return stringToBool(_store.at("tuplex.optimizer.operatorReordering")); }
         bool OPT_MERGE_EXCEPTIONS_INORDER() const { return stringToBool(_store.at("tuplex.optimizer.mergeExceptionsInOrder")); }
+        bool OPT_INCREMENTAL_RESOLUTION() const { return stringToBool(_store.at("tuplex.optimizer.incrementalResolution")); }
         bool CSV_PARSER_SELECTION_PUSHDOWN() const; //! whether to use selection pushdown in the parser. If false, then full data will be serialized.
         bool INTERLEAVE_IO() const { return stringToBool(_store.at("tuplex.interleaveIO")); } //! whether to first load, compute, then write or use IO thread to interleave IO work with compute work for faster speeds.
         bool RESOLVE_WITH_INTERPRETER_ONLY() const { return stringToBool(_store.at("tuplex.resolveWithInterpreterOnly")); }
diff --git a/tuplex/core/include/ExceptionInfo.h b/tuplex/core/include/ExceptionInfo.h
deleted file mode 100644
index d6ee35886..000000000
--- a/tuplex/core/include/ExceptionInfo.h
+++ /dev/null
@@ -1,47 +0,0 @@
-//--------------------------------------------------------------------------------------------------------------------//
-//                                                                                                                    //
-//                                      Tuplex: Blazing Fast Python Data Science                                      //
-//                                                                                                                    //
-//                                                                                                                    //
-//  (c) 2017 - 2021, Tuplex team                                                                                      //
-//  Created by Benjamin Givertz first on 1/1/2022                                                                     //
-//  License: Apache 2.0                                                                                               //
-//--------------------------------------------------------------------------------------------------------------------//
-
-#ifndef TUPLEX_EXCEPTIONINFO_H
-#define TUPLEX_EXCEPTIONINFO_H
-
-namespace tuplex {
-    /*!
-     * Struct to hold information that maps input partitions to input exceptions that occur within them.
-     *
-     * Explanation:
-     * Each input partition is passed the same vector of all input exceptions that occured during data parallelization
-     * or caching. Thus, each input partition must know how many input exceptions occur in its partition, the index
-     * of the input exception partition where its first exception occurs, and the offset into that partition where the
-     * first exception occurs. These values are held in this struct and each input partition is mapped to an ExceptionInfo.
-     */
-    struct ExceptionInfo {
-        size_t numExceptions; //! number of exception rows that occur within a single input partition
-        size_t exceptionIndex; //! index into a vector of input exception partitions that holds the first input exception
-        size_t exceptionRowOffset; //! offset in rows into the first input exception partition where the first exception occurs.
-        size_t exceptionByteOffset; //! offset in bytes into the first input exception partition where the first exception occurs
-
-        ExceptionInfo() :
-                numExceptions(0),
-                exceptionIndex(0),
-                exceptionRowOffset(0),
-                exceptionByteOffset(0) {}
-
-        ExceptionInfo(size_t numExps,
-                      size_t expIndex,
-                      size_t expRowOffset,
-                      size_t expByteOffset) :
-                numExceptions(numExps),
-                exceptionIndex(expIndex),
-                exceptionRowOffset(expRowOffset),
-                exceptionByteOffset(expByteOffset) {}
-    };
-}
-
-#endif //TUPLEX_EXCEPTIONINFO_H
\ No newline at end of file
diff --git a/tuplex/core/include/IncrementalCache.h b/tuplex/core/include/IncrementalCache.h
new file mode 100644
index 000000000..a3fc41fb7
--- /dev/null
+++ b/tuplex/core/include/IncrementalCache.h
@@ -0,0 +1,134 @@
+//--------------------------------------------------------------------------------------------------------------------//
+//                                                                                                                    //
+//                                      Tuplex: Blazing Fast Python Data Science                                      //
+//                                                                                                                    //
+//                                                                                                                    //
+//  (c) 2017 - 2021, Tuplex team                                                                                      //
+//  Created by Leonhard Spiegelberg first on 1/1/2021                                                                 //
+//  License: Apache 2.0                                                                                               //
+//--------------------------------------------------------------------------------------------------------------------//
+
+#ifndef TUPLEX_INCREMENTALCACHE_H
+#define TUPLEX_INCREMENTALCACHE_H
+
+#include <logical/LogicalOperator.h>
+#include <Partition.h>
+
+namespace tuplex {
+
+    /*!
+     * Holds information about pipeline execution to use for incremental exception resolution
+     */
+    class IncrementalCacheEntry {
+    private:
+        LogicalOperator* _pipeline;
+        std::vector<Partition*> _normalPartitions;
+        std::vector<Partition*> _exceptionPartitions;
+        std::vector<Partition*> _generalPartitions;
+        std::vector<Partition*> _fallbackPartitions;
+        std::vector<PartitionGroup> _partitionGroups;
+        size_t _startFileNumber;
+    public:
+        /*!
+         * Incremental cache entry for merge out of order
+         * @param pipeline original logical plan
+         * @param exceptionPartitions exception rows
+         * @param generalPartitions general rows
+         * @param fallbackPartitions fallback rows
+         * @param startFileNumber next available file number
+         */
+        IncrementalCacheEntry(LogicalOperator* pipeline,
+                              const std::vector<Partition*>& exceptionPartitions,
+                              const std::vector<Partition*>& generalPartitions,
+                              const std::vector<Partition*>& fallbackPartitions,
+                              size_t startFileNumber);
+
+        /*!
+         * Incremental cache entry for merge in order
+         * @param pipeline original logical plan
+         * @param normalPartitions normal rows
+         * @param exceptionPartitions exception rows
+         * @param partitionGroups mapping of normal rows to exception rows
+         */
+        IncrementalCacheEntry(LogicalOperator *pipeline,
+                              const std::vector<Partition*>& normalPartitions,
+                              const std::vector<Partition*>& exceptionPartitions,
+                              const std::vector<PartitionGroup>& partitionGroups);
+
+        ~IncrementalCacheEntry();
+
+        LogicalOperator* pipeline() const {
+            return _pipeline;
+        }
+
+        void setExceptionPartitions(const std::vector<Partition*>& exceptionPartitions) { _exceptionPartitions = exceptionPartitions; }
+
+        std::vector<PartitionGroup> partitionGroups() const { return _partitionGroups; }
+
+        std::vector<Partition*> normalPartitions() const { return _normalPartitions; }
+
+        std::vector<Partition*> exceptionPartitions() const {
+            return _exceptionPartitions;
+        }
+
+        std::vector<Partition*> generalPartitions() const {
+            return _generalPartitions;
+        }
+
+        std::vector<Partition*> fallbackPartitions() const {
+            return _fallbackPartitions;
+        }
+
+        size_t startFileNumber() const {
+            return _startFileNumber;
+        }
+    };
+
+    /*!
+     * Maps pipelines to their cached results
+     */
+    class IncrementalCache {
+    private:
+        std::unordered_map<std::string, IncrementalCacheEntry*> _cache;
+    public:
+        ~IncrementalCache() {
+            clear();
+        }
+
+        /*!
+         * Add entry to the cache
+         * @param key string representation of the pipeline
+         * @param entry entry to store
+         */
+        void addEntry(const std::string& key, IncrementalCacheEntry* entry);
+
+        /*!
+         * Retrieve entry from the cache
+         * @param key string representation of the pipeline
+         * @return cache entry
+         */
+        IncrementalCacheEntry* getEntry(const std::string& key) const {
+            auto elt = _cache.find(key);
+            if (elt == _cache.end())
+                return nullptr;
+            return elt->second;
+        }
+
+        /*!
+         * Reset incremental cache entries
+         */
+        void clear() {
+            _cache.clear();
+        }
+
+        /*!
+         * convert pipeline to unique string
+         * @param pipeline original logical plan
+         * @return 
+         */
+        static std::string newKey(LogicalOperator* pipeline);
+    };
+
+}
+
+#endif //TUPLEX_INCREMENTALCACHE_H
\ No newline at end of file
diff --git a/tuplex/core/include/JobMetrics.h b/tuplex/core/include/JobMetrics.h
index 54a9cabec..071e561db 100644
--- a/tuplex/core/include/JobMetrics.h
+++ b/tuplex/core/include/JobMetrics.h
@@ -38,10 +38,13 @@ namespace tuplex {
             double slow_path_time_s = 0.0;
             double fast_path_per_row_time_ns = 0.0;
             double slow_path_per_row_time_ns = 0.0;
-            // size_t fast_path_input_row_count;
-            // size_t fast_path_output_row_count;
-            // size_t slow_path_input_row_count;
-            // size_t slow_path_output_row_count;
+
+             size_t fast_path_input_row_count = 0;
+             size_t fast_path_output_row_count = 0;
+             size_t slow_path_input_row_count = 0;
+             size_t slow_path_output_row_count = 0;
+
+             double write_output_time_s = 0.0;
 
             // disk spilling metrics
             int partitions_swapin_count = 0;
@@ -161,6 +164,40 @@ namespace tuplex {
             it->slow_path_per_row_time_ns = slow_path_per_row_time_ns;
         }
 
+        /*!
+         * set fast path row count info
+         * @param stageNo
+         * @param inputRows
+         * @param outputRows
+         */
+        void setFastPathRowCount(int stageNo, size_t inputRows, size_t outputRows) {
+            auto it = get_or_create_stage_metrics(stageNo);
+            it->fast_path_input_row_count = inputRows;
+            it->fast_path_output_row_count = outputRows;
+        }
+
+        /*!
+         * set slow path row count info
+         * @param stageNo
+         * @param inputRows
+         * @param outputRows
+         */
+        void setSlowPathRowCount(int stageNo, size_t inputRows, size_t outputRows) {
+            auto it = get_or_create_stage_metrics(stageNo);
+            it->slow_path_input_row_count = inputRows;
+            it->slow_path_output_row_count = outputRows;
+        }
+
+        /*!
+         * set write timing info
+         * @param stageNo
+         * @param wallTime
+         */
+        void setWriteOutputTimes(int stageNo, double wallTime) {
+            auto it = get_or_create_stage_metrics(stageNo);
+            it->write_output_time_s = wallTime;
+        }
+
         /*!
          * set fast path timing info
          * @param stageNo
@@ -234,7 +271,12 @@ namespace tuplex {
                 ss<<"\"partitions_swapin_count\":"<<s.partitions_swapin_count<<",";
                 ss<<"\"partitions_swapout_count\":"<<s.partitions_swapout_count<<",";
                 ss<<"\"partitions_bytes_swapped_in\":"<<s.partitions_bytes_swapped_in<<",";
-                ss<<"\"partitions_bytes_swapped_out\":"<<s.partitions_bytes_swapped_out;
+                ss<<"\"partitions_bytes_swapped_out\":"<<s.partitions_bytes_swapped_out<<",";
+                ss<<"\"fast_path_input_row_count\":"<<s.fast_path_input_row_count<<",";
+                ss<<"\"fast_path_output_row_count\":"<<s.fast_path_output_row_count<<",";
+                ss<<"\"slow_path_input_row_count\":"<<s.slow_path_input_row_count<<",";
+                ss<<"\"slow_path_output_row_count\":"<<s.slow_path_output_row_count<<",";
+                ss<<"\"write_output_wall_time_s\":"<<s.write_output_time_s;
                 ss<<"}";
                 if(s.stageNo != _stage_metrics.back().stageNo)
                     ss<<",";
diff --git a/tuplex/core/include/Partition.h b/tuplex/core/include/Partition.h
index 9bc7fc54c..2a1cbeb05 100644
--- a/tuplex/core/include/Partition.h
+++ b/tuplex/core/include/Partition.h
@@ -198,6 +198,8 @@ namespace tuplex {
 
         void makeImmortal() { _immortal = true; }
 
+        void makeMortal() { _immortal = false; }
+
         /*!
          * swaps out contents by saving them to a swap file
          * @param allocator allocator on which arena was allocated
diff --git a/tuplex/core/include/PartitionGroup.h b/tuplex/core/include/PartitionGroup.h
new file mode 100644
index 000000000..ade1e5d7a
--- /dev/null
+++ b/tuplex/core/include/PartitionGroup.h
@@ -0,0 +1,55 @@
+//--------------------------------------------------------------------------------------------------------------------//
+//                                                                                                                    //
+//                                      Tuplex: Blazing Fast Python Data Science                                      //
+//                                                                                                                    //
+//                                                                                                                    //
+//  (c) 2017 - 2021, Tuplex team                                                                                      //
+//  Created by Benjamin Givertz first on 1/1/2021                                                                     //
+//  License: Apache 2.0                                                                                               //
+//--------------------------------------------------------------------------------------------------------------------//
+
+#ifndef TUPLEX_PARTITIONGROUP_H
+#define TUPLEX_PARTITIONGROUP_H
+
+namespace tuplex {
+    struct PartitionGroup {
+    public:
+        /*!
+         * Groups normal, general, and fallback partitions into groups, with all partitions that originated from a single
+         * task being grouped together.
+         * @param numNormalPartitons number of normal partitions in group
+         * @param normalPartitionStartInd starting index in list of all normal partitions
+         * @param numGeneralPartitions number of general partitions in group
+         * @param generalPartitionStartInd starting index in list of all general partitions
+         * @param numFallbackPartitions number of fallback partitions in group
+         * @param fallbackPartitionStartInd starting index in list of all fallback partitions
+         */
+        PartitionGroup(size_t numNormalPartitons, size_t normalPartitionStartInd,
+                       size_t numGeneralPartitions=0, size_t generalPartitionStartInd=0,
+                       size_t numFallbackPartitions=0, size_t fallbackPartitionStartInd=0,
+                       size_t numExceptionPartitions=0, size_t exceptionPartitionStartInd=0):
+                       numNormalPartitions(numNormalPartitons), normalPartitionStartInd(normalPartitionStartInd),
+                       numGeneralPartitions(numGeneralPartitions), generalPartitionStartInd(generalPartitionStartInd),
+                       numFallbackPartitions(numFallbackPartitions), fallbackPartitionStartInd(fallbackPartitionStartInd),
+                       numExceptionPartitions(numExceptionPartitions), exceptionPartitionStartInd(exceptionPartitionStartInd) {}
+
+       /*!
+        * Initialize empty struct with all values set to zero.
+        */
+        PartitionGroup() :
+            numNormalPartitions(0), numGeneralPartitions(0), numFallbackPartitions(0),
+            normalPartitionStartInd(0), generalPartitionStartInd(0), fallbackPartitionStartInd(0),
+            numExceptionPartitions(0), exceptionPartitionStartInd(0) {}
+
+        size_t numNormalPartitions;
+        size_t normalPartitionStartInd;
+        size_t numGeneralPartitions;
+        size_t generalPartitionStartInd;
+        size_t numFallbackPartitions;
+        size_t fallbackPartitionStartInd;
+        size_t numExceptionPartitions;
+        size_t exceptionPartitionStartInd;
+    };
+}
+
+#endif //TUPLEX_PARTITIONGROUP_H
diff --git a/tuplex/core/include/ee/local/LocalBackend.h b/tuplex/core/include/ee/local/LocalBackend.h
index 77d375aed..79419ca6b 100644
--- a/tuplex/core/include/ee/local/LocalBackend.h
+++ b/tuplex/core/include/ee/local/LocalBackend.h
@@ -69,6 +69,8 @@ namespace tuplex {
         std::vector<IExecutorTask*> createLoadAndTransformToMemoryTasks(TransformStage* tstage, const ContextOptions& options, const std::shared_ptr<TransformStage::JITSymbols>& syms);
         void executeTransformStage(TransformStage* tstage);
 
+        std::vector<IExecutorTask*> createIncrementalTasks(TransformStage* tstage, const ContextOptions& options, const std::shared_ptr<TransformStage::JITSymbols>& syms);
+        void executeIncrementalStage(TransformStage* tstage);
 
         /*!
          * Create the final hashmap from all of the input [tasks] (e.g. either merge them (join) or combine them (aggregate)
@@ -89,7 +91,7 @@ namespace tuplex {
         MessageHandler& logger() const { return Logger::instance().logger("local ee"); }
 
         // write output (may be already in correct format!)
-        void writeOutput(TransformStage* tstage, std::vector<IExecutorTask*>& sortedTasks);
+        size_t writeOutput(TransformStage* tstage, std::vector<IExecutorTask*>& sortedTasks, size_t startFileNumber=0);
 
         std::vector<IExecutorTask*> performTasks(std::vector<IExecutorTask*>& tasks, std::function<void()> driverCallback=[](){});
 
@@ -99,7 +101,7 @@ namespace tuplex {
             return std::accumulate(counts.begin(), counts.end(), 0, [](size_t acc, std::pair<std::tuple<int64_t, ExceptionCode>, size_t> val) { return acc + val.second; });
         }
 
-        inline std::vector<Partition*> getOutputPartitions(IExecutorTask* task) {
+        inline std::vector<Partition*> getNormalPartitions(IExecutorTask* task) {
             if(!task)
                 return std::vector<Partition*>();
 
@@ -113,7 +115,7 @@ namespace tuplex {
             return std::vector<Partition*>();
         }
 
-        inline std::vector<Partition*> getRemainingExceptions(IExecutorTask* task) {
+        inline std::vector<Partition*> getExceptionPartitions(IExecutorTask* task) {
             if(!task)
                 return std::vector<Partition*>();
 
@@ -127,7 +129,7 @@ namespace tuplex {
             return std::vector<Partition*>();
         }
 
-        inline std::vector<Partition*> generalCasePartitions(IExecutorTask* task) {
+        inline std::vector<Partition*> getGeneralPartitions(IExecutorTask* task) {
             if(!task)
                 return std::vector<Partition*>();
 
@@ -155,18 +157,18 @@ namespace tuplex {
             return std::unordered_map<std::tuple<int64_t, ExceptionCode>, size_t>();
         }
 
-        inline std::vector<std::tuple<size_t, PyObject*>> getNonConformingRows(IExecutorTask* task) {
+        inline std::vector<Partition*> getFallbackPartitions(IExecutorTask* task) {
             if(!task)
-                return std::vector<std::tuple<size_t, PyObject*>>();
+                return std::vector<Partition*>();
 
             if(task->type() == TaskType::UDFTRAFOTASK)
-                return std::vector<std::tuple<size_t, PyObject*>>(); // none here, can be only result from ResolveTask.
+                return std::vector<Partition*>(); // none here, can be only result from ResolveTask.
 
             if(task->type() == TaskType::RESOLVE)
-                return dynamic_cast<ResolveTask*>(task)->getNonConformingRows();
+                return dynamic_cast<ResolveTask*>(task)->getOutputFallbackPartitions();
 
             throw std::runtime_error("unknown task type seen in " + std::string(__FILE_NAME__) + ":" + std::to_string(__LINE__));
-            return std::vector<std::tuple<size_t, PyObject*>>();
+            return std::vector<Partition*>();
         }
 
         std::vector<IExecutorTask*> resolveViaSlowPath(std::vector<IExecutorTask*>& tasks,
diff --git a/tuplex/core/include/logical/CacheOperator.h b/tuplex/core/include/logical/CacheOperator.h
index 563aa8f0b..2ca184a1d 100644
--- a/tuplex/core/include/logical/CacheOperator.h
+++ b/tuplex/core/include/logical/CacheOperator.h
@@ -79,9 +79,10 @@ namespace tuplex {
          * @return
          */
         bool isCached() const { return _cached; }
-        std::vector<Partition*> cachedPartitions() const { return _normalCasePartitions; }
-        std::vector<Partition*> cachedExceptions() const { return _generalCasePartitions; }
-        std::unordered_map<std::string, ExceptionInfo> partitionToExceptionsMap() const { return _partitionToExceptionsMap; }
+        std::vector<Partition*> cachedNormalPartitions() const { return _normalPartitions; }
+        std::vector<Partition*> cachedGeneralPartitions() const { return _generalPartitions; }
+        std::vector<Partition*> cachedFallbackPartitions() const { return _fallbackPartitions; }
+        std::vector<PartitionGroup> partitionGroups() const { return _partitionGroups; }
 
         size_t getTotalCachedRows() const;
 
@@ -107,28 +108,19 @@ namespace tuplex {
         // or merge them.
         bool _cached;
         bool _storeSpecialized;
-        std::vector<Partition*> _normalCasePartitions;    //! holds all data conforming to the normal case schema
-        std::vector<Partition*> _generalCasePartitions;   //! holds all data which is considered to be a normal-case violation,
-                                                          //! i.e. which does not adhere to the normal case schema, but did not produce
-                                                          //! an exception while being processed through the pipeline before
-        std::unordered_map<std::string, ExceptionInfo> _partitionToExceptionsMap; //! maps normal case partitions to corresponding general case ones
-        std::vector<PyObject*>  _py_objects;              //! all python objects who do not adhere to the general case schema
+        std::vector<Partition*> _normalPartitions;    //! holds all data conforming to the normal case schema
+        std::vector<Partition*> _generalPartitions;   //! holds all data which is considered to be a normal-case violation,
+        std::vector<Partition*> _fallbackPartitions;  //! holds all data which is output as a python object from interpreter processing
+        std::vector<PartitionGroup> _partitionGroups; //! groups together partitions for correct row ordering
         std::vector<std::string> _columns;
 
         // internal sample of normal case rows, used for tracing & Co.
         std::vector<Row> _sample;
 
         // number of rows need to be stored for cost estimates
-        size_t _normalCaseRowCount;
-        size_t _generalCaseRowCount;
-
-        // @TODO: there should be 3 things stored
-        // 1.) common case => i.e.
-        // 2.) general case => i.e. what in general can be done (null-values & Co, wide integers, ...)
-        // 3.) python case => i.e. things that don't fit into either case (interpreter objects serialized via pickle)
-
-        // Note: the pickling could be parallelized by simply matching python types & Co...
-        //       ==> store python data as tuple of elements!
+        size_t _normalRowCount;
+        size_t _generalRowCount;
+        size_t _fallbackRowCount;
     };
 }
 
diff --git a/tuplex/core/include/logical/LogicalPlan.h b/tuplex/core/include/logical/LogicalPlan.h
index fa6e9ea30..a5bc09596 100644
--- a/tuplex/core/include/logical/LogicalPlan.h
+++ b/tuplex/core/include/logical/LogicalPlan.h
@@ -32,7 +32,7 @@ namespace tuplex {
         void optimizeFilters();
         void emitPartialFilters();
         void reorderDataProcessingOperators();
-
+        void incrementalResolution(const Context &context);
     public:
 
         LogicalPlan() = delete;
diff --git a/tuplex/core/include/logical/ParallelizeOperator.h b/tuplex/core/include/logical/ParallelizeOperator.h
index 0960baf89..87666950c 100644
--- a/tuplex/core/include/logical/ParallelizeOperator.h
+++ b/tuplex/core/include/logical/ParallelizeOperator.h
@@ -17,10 +17,9 @@
 namespace tuplex {
     class ParallelizeOperator : public LogicalOperator {
 
-        std::vector<Partition*> _partitions; // data, conforming to majority type
-        std::vector<Partition*> _pythonObjects; // schema violations stored for interpreter processing as python objects
-        // maps partitions to their corresponding python objects
-        std::unordered_map<std::string, ExceptionInfo> _inputPartitionToPythonObjectsMap;
+        std::vector<Partition*> _normalPartitions; // data, conforming to majority type
+        std::vector<Partition*> _fallbackPartitions; // schema violations stored for interpreter processing as python objects
+        std::vector<PartitionGroup> _partitionGroups; // maps normal partitions to their corresponding fallback partitions
         std::vector<std::string> _columnNames;
 
         std::vector<Row> _sample; // sample, not necessary conforming to one type
@@ -31,7 +30,7 @@ namespace tuplex {
 
         // this a root node
         ParallelizeOperator(const Schema& schema,
-                            const std::vector<Partition*>& partitions,
+                            const std::vector<Partition*>& normalPartitions,
                             const std::vector<std::string>& columns);
 
         std::string name() override { return "parallelize"; }
@@ -47,13 +46,13 @@ namespace tuplex {
          * get the partitions where the parallelized data is stored.
          * @return vector of partitions.
          */
-        std::vector<tuplex::Partition*> getPartitions();
+        std::vector<tuplex::Partition*> getNormalPartitions();
 
-        void setPythonObjects(const std::vector<Partition*> &pythonObjects) { _pythonObjects = pythonObjects; }
-        std::vector<Partition *> getPythonObjects() { return _pythonObjects; }
+        void setFallbackPartitions(const std::vector<Partition*> &fallbackPartitions) { _fallbackPartitions = fallbackPartitions; }
+        std::vector<Partition *> getFallbackPartitions() { return _fallbackPartitions; }
 
-        void setInputPartitionToPythonObjectsMap(const std::unordered_map<std::string, ExceptionInfo>& pythonObjectsMap) { _inputPartitionToPythonObjectsMap = pythonObjectsMap; }
-        std::unordered_map<std::string, ExceptionInfo> getInputPartitionToPythonObjectsMap() { return _inputPartitionToPythonObjectsMap; }
+        void setPartitionGroups(const std::vector<PartitionGroup>& partitionGroups) { _partitionGroups = partitionGroups; }
+        std::vector<PartitionGroup> getPartitionGroups() { return _partitionGroups; }
 
         Schema getInputSchema() const override { return getOutputSchema(); }
 
diff --git a/tuplex/core/include/physical/CodeDefs.h b/tuplex/core/include/physical/CodeDefs.h
index b8c3cd76b..c185b2d54 100644
--- a/tuplex/core/include/physical/CodeDefs.h
+++ b/tuplex/core/include/physical/CodeDefs.h
@@ -48,8 +48,14 @@ namespace tuplex {
         typedef int64_t(*read_block_f)(void*, const uint8_t*, int64_t, int64_t*, int64_t*, int8_t);
 
         // protoype of the function generated by the below builder
-        // parameters are userData, block, blocksize, expPtrs, expPtrSizes, numExceptions, normalrowsout, badrowsout, lastRow
-        typedef int64_t(*read_block_exp_f)(void*, const uint8_t*, int64_t, uint8_t **, int64_t *, int64_t, int64_t*, int64_t*, bool);
+        // parameters are userData, block, blocksize, normalrowsout, badrowsout, lastRow,
+        // totalFilterCounter, totalNormalRowCounter, totalGeneralRowCounter, totalFallbackRowCounter,
+        // generalPartitions, numGeneralPartitions, generalIndexOffset, generalRowOffset, generalByteOffset
+        // fallbackPartitions, numFallbackPartitions, fallbackIndexOffset, fallbackRowOffset, fallbackByteOffset
+        typedef int64_t(*read_block_exp_f)(void*, const uint8_t*, int64_t, int64_t*, int64_t*, bool,
+                int64_t*, int64_t*, int64_t*, int64_t*,
+                uint8_t **, int64_t, int64_t*, int64_t*, int64_t*,
+                uint8_t **, int64_t, int64_t*, int64_t*, int64_t*);
 
         /*!
          * prototype for processing a single row (with callbacks etc.). Returns how many bytes were processed
diff --git a/tuplex/core/include/physical/PhysicalPlan.h b/tuplex/core/include/physical/PhysicalPlan.h
index 560867b64..fe02bcc92 100644
--- a/tuplex/core/include/physical/PhysicalPlan.h
+++ b/tuplex/core/include/physical/PhysicalPlan.h
@@ -101,6 +101,8 @@ namespace tuplex {
 
         const LogicalPlan* logicalPlan() const { return _lp; }
 
+        const LogicalPlan* originalLogicalPlan() const { return _lpOriginal; }
+
         nlohmann::json getStagedRepresentationAsJSON() const;
 
         /*!
diff --git a/tuplex/core/include/physical/PhysicalStage.h b/tuplex/core/include/physical/PhysicalStage.h
index 34e5b2c51..764ac3f85 100644
--- a/tuplex/core/include/physical/PhysicalStage.h
+++ b/tuplex/core/include/physical/PhysicalStage.h
@@ -22,7 +22,7 @@
 
 namespace tuplex {
 
-    class IBackend;\
+    class IBackend;
     class PhysicalStage;
     class PhysicalPlan;
     class LogicalPlan;
diff --git a/tuplex/core/include/physical/ResolveTask.h b/tuplex/core/include/physical/ResolveTask.h
index 2044a5699..83d3d8bfd 100644
--- a/tuplex/core/include/physical/ResolveTask.h
+++ b/tuplex/core/include/physical/ResolveTask.h
@@ -61,9 +61,9 @@ namespace tuplex {
         ResolveTask(int64_t stageID,
                     int64_t contextID,
                     const std::vector<Partition*>& partitions,
-                    const std::vector<Partition*>& runtimeExceptions,
-                    const std::vector<Partition*>& inputExceptions,
-                    ExceptionInfo inputExceptionInfo,
+                    const std::vector<Partition*>& exceptionPartitions,
+                    const std::vector<Partition*>& generalPartitions,
+                    const std::vector<Partition*>& fallbackPartitions,
                     const std::vector<int64_t>& operatorIDsAffectedByResolvers, //! used to identify which exceptions DO require reprocessing because there might be a resolver in the slow path for them.
                     Schema exceptionInputSchema, //! schema of the input rows in which both user exceptions and normal-case violations are stored in. This is also the schema in which rows which on the slow path produce again an exception will be stored in.
                     Schema resolverOutputSchema, //! schema of rows that the resolve function outputs if it doesn't rethrow exceptions
@@ -75,15 +75,16 @@ namespace tuplex {
                     char csvDelimiter,
                     char csvQuotechar,
                     codegen::resolve_f functor=nullptr,
-                    PyObject* interpreterFunctor=nullptr) : IExceptionableTask::IExceptionableTask(exceptionInputSchema, contextID),
+                    PyObject* interpreterFunctor=nullptr,
+                    bool isIncremental=false) : IExceptionableTask::IExceptionableTask(exceptionInputSchema, contextID),
                                                             _stageID(stageID),
                                                             _partitions(partitions),
-                                                            _runtimeExceptions(runtimeExceptions),
-                                                            _inputExceptions(inputExceptions),
-                                                            _numInputExceptions(inputExceptionInfo.numExceptions),
-                                                            _inputExceptionIndex(inputExceptionInfo.exceptionIndex),
-                                                            _inputExceptionRowOffset(inputExceptionInfo.exceptionRowOffset),
-                                                            _inputExceptionByteOffset(inputExceptionInfo.exceptionByteOffset),
+                                                            _exceptionPartitions(exceptionPartitions),
+                                                            _generalPartitions(generalPartitions),
+                                                            _fallbackPartitions(fallbackPartitions),
+                                                            _exceptionCounter(0),
+                                                            _generalCounter(0),
+                                                            _fallbackCounter(0),
                                                             _resolverOutputSchema(resolverOutputSchema),
                                                             _targetOutputSchema(targetNormalCaseOutputSchema),
                                                             _mergeRows(mergeRows),
@@ -100,7 +101,9 @@ namespace tuplex {
                                                             _outputRowNumber(0),
                                                             _wallTime(0.0),
                                                             _numInputRowsRead(0),
-                                                            _numUnresolved(0) {
+                                                            _numUnresolved(0),
+                                                            _numResolved(0),
+                                                            _isIncremental(isIncremental) {
             // copy the IDs and sort them so binary search can be used.
             std::sort(_operatorIDsAffectedByResolvers.begin(), _operatorIDsAffectedByResolvers.end());
             _normalPtrBytesRemaining = 0;
@@ -170,7 +173,7 @@ namespace tuplex {
 
         std::vector<Partition*> getOutputPartitions() const override { return _partitions; }
 
-        std::vector<std::tuple<size_t, PyObject*>> getNonConformingRows() const { return _py_nonconfirming; }
+        std::vector<Partition*> getOutputFallbackPartitions() const { return _fallbackSink.partitions; }
 
         /// very important to override this because of the special two exceptions fields of ResolveTask
         /// i.e. _generalCasePartitions store what exceptions to resolve, IExceptionableTask::_generalCasePartitions exceptions that occurred
@@ -214,12 +217,16 @@ namespace tuplex {
     private:
         int64_t                 _stageID; /// to which stage does this task belong to.
         std::vector<Partition*> _partitions;
-        std::vector<Partition*> _runtimeExceptions;
-        std::vector<Partition*> _inputExceptions;
-        size_t _numInputExceptions;
-        size_t _inputExceptionIndex;
-        size_t _inputExceptionRowOffset;
-        size_t _inputExceptionByteOffset;
+        std::vector<Partition*> _exceptionPartitions;
+        std::vector<Partition*> _generalPartitions;
+        std::vector<Partition*> _fallbackPartitions;
+
+        size_t _exceptionCounter;
+        size_t _generalCounter;
+        size_t _fallbackCounter;
+
+        bool _isIncremental;
+
         inline Schema commonCaseInputSchema() const { return _deserializerGeneralCaseOutput->getSchema(); }
         Schema                  _resolverOutputSchema; //! what the resolve functor produces
         Schema                  _targetOutputSchema; //! which schema the final rows should be in...
@@ -234,6 +241,7 @@ namespace tuplex {
         char _csvQuotechar;
 
         size_t _numUnresolved;
+        size_t _numResolved;
 
         int64_t                 _currentRowNumber;
         // std::vector<Partition*> _mergedPartitions;
@@ -258,6 +266,8 @@ namespace tuplex {
         // sink for type violation rows
         MemorySink _generalCaseSink;
 
+        MemorySink _fallbackSink;
+
         // hash table sink
         // -> hash to be a hybrid because sometimes incompatible python objects have to be hashed here.
         HashTableSink _htable;
@@ -271,7 +281,6 @@ namespace tuplex {
 
         // python output which can't be consolidated, saved as separate list
         void writePythonObject(PyObject* out_row);
-        std::vector<std::tuple<size_t, PyObject*>> _py_nonconfirming;
 
         int64_t _outputRowNumber;
 
diff --git a/tuplex/core/include/physical/ResultSet.h b/tuplex/core/include/physical/ResultSet.h
index e94b8f1ae..6a97be70c 100644
--- a/tuplex/core/include/physical/ResultSet.h
+++ b/tuplex/core/include/physical/ResultSet.h
@@ -13,7 +13,7 @@
 
 #include <Row.h>
 #include <Partition.h>
-#include <ExceptionInfo.h>
+#include <PartitionGroup.h>
 #include <deque>
 #include <limits>
 #include <ExceptionCodes.h>
@@ -25,23 +25,61 @@ namespace tuplex {
 
     class ResultSet {
     private:
-        std::list<Partition*> _partitions;
-        std::vector<Partition*> _exceptions; // unresolved exceptions
-        std::unordered_map<std::string, ExceptionInfo> _partitionToExceptionsMap;
-        // @TODO: use here rows instead? would make it potentially cleaner...
-        std::deque<std::tuple<size_t, PyObject*>> _pyobjects; // python objects remaining whose type
-        // did not confirm to the one of partitions. Maybe use Row here instead?
-        size_t _curRowCounter; //! row counter for the current partition
-        size_t _byteCounter;   //! byte offset for the current partition
-        size_t _rowsRetrieved;
-        size_t _totalRowCounter; // used for merging in rows!
-        size_t _maxRows;
-        Schema _schema;
-
-        void removeFirstPartition();
+        std::list<Partition*> _currentNormalPartitions; //! normal partitions in current group
+        std::list<Partition*> _currentGeneralPartitions; //! general partitions in current group
+        std::list<Partition*> _currentFallbackPartitions; //! fallback partitions in current group
+        std::list<Partition*> _remainingNormalPartitions; //! remaining normal partitions in other groups
+        std::list<Partition*> _remainingGeneralPartitions; //! remaining general partitions in other groups
+        std::list<Partition*> _remainingFallbackPartitions; //! remaining fallback partitions in other groups
+        std::list<PartitionGroup> _partitionGroups; //! groups together normal, general, and fallback partitions for merging
+
+        size_t _totalRowCounter; //! total rows emitted across all groups
+        size_t _maxRows; //! max number of rows to emit
+        Schema _schema; //! normal case schema
+
+        size_t _curNormalRowCounter;
+        size_t _curNormalByteCounter;
+        size_t _curGeneralRowCounter;
+        size_t _curGeneralByteCounter;
+        size_t _curFallbackRowCounter;
+        size_t _curFallbackByteCounter;
+        size_t _normalRowCounter;
+        size_t _generalRowCounter;
+        size_t _fallbackRowCounter;
+
+        int64_t currentGeneralRowInd();
+        int64_t currentFallbackRowInd();
+
+        Row getNextNormalRow();
+        bool hasNextNormalRow();
+        Row getNextFallbackRow();
+        bool hasNextFallbackRow();
+        Row getNextGeneralRow();
+        bool hasNextGeneralRow();
+
+        void removeFirstGeneralPartition();
+        void removeFirstFallbackPartition();
+        void removeFirstNormalPartition();
     public:
-        ResultSet() : _curRowCounter(0), _byteCounter(0), _rowsRetrieved(0),
-        _totalRowCounter(0), _maxRows(0), _schema(Schema::UNKNOWN)  {}
+        /*!
+         * Create new result set with normal, general, and fallback rows
+         * @param schema normal case schema
+         * @param normalPartitions normal case rows
+         * @param generalPartitions general case rows
+         * @param fallbackPartitions fallback case rows
+         * @param partitionGroups information to merge row numbers correctly
+         * @param maxRows limit on rows to emit
+         */
+        ResultSet(const Schema& schema,
+                  const std::vector<Partition*>& normalPartitions,
+                  const std::vector<Partition*>& generalPartitions=std::vector<Partition*>{},
+                  const std::vector<Partition*>& fallbackPartitions=std::vector<Partition*>{},
+                  const std::vector<PartitionGroup>& partitionGroups=std::vector<PartitionGroup>{},
+                  int64_t maxRows=std::numeric_limits<int64_t>::max());
+
+        ResultSet() : _curNormalRowCounter(0), _curNormalByteCounter(0), _curGeneralRowCounter(0), _curGeneralByteCounter(0),
+                      _curFallbackRowCounter(0), _curFallbackByteCounter(0), _totalRowCounter(0), _maxRows(0), _schema(Schema::UNKNOWN),
+                      _normalRowCounter(0), _generalRowCounter(0), _fallbackRowCounter(0) {}
         ~ResultSet() = default;
 
         // Non copyable
@@ -51,13 +89,6 @@ namespace tuplex {
         ResultSet(const ResultSet&) = delete;
         ResultSet& operator = (const ResultSet&) = delete;
 
-        ResultSet(const Schema& _schema,
-                  const std::vector<Partition*>& partitions,
-                  const std::vector<Partition*>& exceptions=std::vector<Partition*>{},
-                  const std::unordered_map<std::string, ExceptionInfo>& partitionToExceptionsMap=std::unordered_map<std::string, ExceptionInfo>(),
-                  const std::vector<std::tuple<size_t, PyObject*>> pyobjects=std::vector<std::tuple<size_t, PyObject*>>{},
-                  int64_t maxRows=std::numeric_limits<int64_t>::max());
-
         /*!
          * check whether result contains one more row
          */
@@ -75,52 +106,107 @@ namespace tuplex {
          */
         std::vector<Row> getRows(size_t limit);
 
-        bool hasNextPartition() const;
+        /*!
+         * check whether general partitions remain
+         * @return
+         */
+        bool hasNextGeneralPartition() const;
+
+        /*!
+         * get next general partition but does not invalidate it
+         * @return
+         */
+        Partition* getNextGeneralPartition();
+
+        /*!
+         * check whether fallback partitions remain
+         * @return
+         */
+        bool hasNextFallbackPartition() const;
+
+        /*!
+         * get next fallback partition but does not invalidate it
+         * @return
+         */
+        Partition* getNextFallbackPartition();
+
+        /*!
+         * check whether normal partitions remain
+         * @return
+         */
+        bool hasNextNormalPartition() const;
 
         /*! user needs to invalidate then!
-         *
          * @return
          */
-        Partition* getNextPartition();
+        Partition* getNextNormalPartition();
+
+        /*!
+         * number of rows across all cases of partitions
+         * @return
+         */
         size_t rowCount() const;
 
+        /*!
+         * normal case schema
+         * @return
+         */
         Schema schema() const { return _schema; }
 
         /*!
-         * removes and invalidates all partitions!
+         * removes and invalidates all normalPartitions!
          */
         void clear();
 
+        /*!
+         * number of rows in fallback partitions
+         * @return
+         */
+        size_t fallbackRowCount() const;
+
         /*!
          * retrieve all good rows in bulk, removes them from this result set.
          * @return
          */
-        std::vector<Partition*> partitions() {
+        std::vector<Partition*> normalPartitions() {
             std::vector<Partition*> p;
-            while(hasNextPartition())
-                p.push_back(getNextPartition());
+            while(hasNextNormalPartition())
+                p.push_back(getNextNormalPartition());
             return p;
         }
 
         /*!
-         * retrieve all unresolved rows (should be only called internally). DOES NOT REMOVE THEM FROM result set.
+         * returns/removes all general partitions
          * @return
          */
-        std::vector<Partition*> exceptions() const { return _exceptions; }
-
-        std::unordered_map<std::string, ExceptionInfo> partitionToExceptionsMap() const { return _partitionToExceptionsMap; }
+        std::vector<Partition*> generalPartitions() {
+            std::vector<Partition*> p;
+            while(hasNextGeneralPartition())
+                p.push_back(getNextGeneralPartition());
+            return p;
+        }
 
         /*!
-         * returns/removes all objects
+         * returns/removes all fallback partitions
          * @return
          */
-        std::deque<std::tuple<size_t, PyObject*>> pyobjects() {
-            return std::move(_pyobjects);
+        std::vector<Partition*> fallbackPartitions() {
+            std::vector<Partition*> p;
+            while(hasNextFallbackPartition())
+                p.push_back(getNextFallbackPartition());
+            return p;
         }
 
-        size_t pyobject_count() const { return _pyobjects.size(); }
-
-        size_t numPartitions() const { return _partitions.size(); }
+        /*!
+         * returns/removes all partition groups
+         * @return
+         */
+        std::vector<PartitionGroup> partitionGroups() {
+            std::vector<PartitionGroup> g;
+            for (const auto& group : _partitionGroups)
+                g.push_back(group);
+            return g;
+        }
     };
 }
 #endif //TUPLEX_RESULTSET_H
\ No newline at end of file
diff --git a/tuplex/core/include/physical/StageBuilder.h b/tuplex/core/include/physical/StageBuilder.h
index 63b94bd57..551878f3a 100644
--- a/tuplex/core/include/physical/StageBuilder.h
+++ b/tuplex/core/include/physical/StageBuilder.h
@@ -35,6 +35,7 @@ namespace tuplex {
              * @param sharedObjectPropagation whether to use shared object propogation
              * @param nullValueOptimization whether to use null value optimization
              * @param updateInputExceptions whether input exceptions indices need to be updated
+             * @param incrementalResolution whether to execute with incremental resolution
              */
             StageBuilder(int64_t stage_number,
                          bool rootStage,
@@ -43,7 +44,8 @@ namespace tuplex {
                          double normalCaseThreshold,
                          bool sharedObjectPropagation,
                          bool nullValueOptimization,
-                         bool updateInputExceptions);
+                         bool updateInputExceptions,
+                         bool incrementalResolution);
 
             // builder functions
             void addMemoryInput(const Schema& schema, LogicalOperator* node);
@@ -91,6 +93,7 @@ namespace tuplex {
             bool _sharedObjectPropagation;
             bool _nullValueOptimization;
             bool _updateInputExceptions;
+            bool _incrementalResolution;
             std::vector<LogicalOperator*> _operators;
 
             // codegen strings
diff --git a/tuplex/core/include/physical/TransformStage.h b/tuplex/core/include/physical/TransformStage.h
index 22d7f5fb4..ec64c9dea 100644
--- a/tuplex/core/include/physical/TransformStage.h
+++ b/tuplex/core/include/physical/TransformStage.h
@@ -13,7 +13,7 @@
 
 #include <Schema.h>
 #include <Partition.h>
-#include <ExceptionInfo.h>
+#include <PartitionGroup.h>
 #include "PhysicalStage.h"
 #include "LLVMOptimizer.h"
 #include <logical/ParallelizeOperator.h>
@@ -32,6 +32,7 @@
 #include <Defs.h>
 #include <logical/FileOutputOperator.h>
 #include <logical/AggregateOperator.h>
+#include <IncrementalCache.h>
 
 #ifdef BUILD_WITH_AWS
 // include protobuf serialization of TrafoStage for Lambda executor
@@ -95,16 +96,59 @@ namespace tuplex {
         }
 
         /*!
-         * set input exceptions, i.e. rows that could come from a parallelize or csv operator.
-         * @param pythonObjects
+         * set stage's general case normalPartitions
+         * @param generalPartitions
          */
-        void setInputExceptions(const std::vector<Partition *>& inputExceptions) { _inputExceptions = inputExceptions; }
+        void setGeneralPartitions(const std::vector<Partition*>& generalPartitions) { _generalPartitions = generalPartitions; }
 
-        std::vector<Partition *> inputExceptions() { return _inputExceptions; }
+        /*!
+         * get stage's general case normalPartitions
+         * @return
+         */
+        std::vector<Partition*> generalPartitions() const { return _generalPartitions; }
 
-        void setPartitionToExceptionsMap(const std::unordered_map<std::string, ExceptionInfo>& partitionToExceptionsMap) { _partitionToExceptionsMap = partitionToExceptionsMap; }
+        /*!
+         * set stage's fallback normalPartitions as serialized python objects
+         * @param fallbackPartitions
+         */
+        void setFallbackPartitions(const std::vector<Partition*>& fallbackPartitions) { _fallbackPartitions = fallbackPartitions; }
 
-        std::unordered_map<std::string, ExceptionInfo> partitionToExceptionsMap() { return _partitionToExceptionsMap; }
+        /*!
+         * get fallback normalPartitions as serialized python objects
+         * @return
+         */
+        std::vector<Partition*> fallbackPartitions() const { return _fallbackPartitions; }
+
+        /*!
+         * set merge information for each set of normal, fallback, and general partitions
+         * @param partitionGroups
+         */
+        void setPartitionGroups(const std::vector<PartitionGroup>& partitionGroups) {
+            _partitionGroups = partitionGroups;
+        }
+
+        /*!
+         * get partition groups for all sets of partitions
+         */
+         std::vector<PartitionGroup> partitionGroups() const { return _partitionGroups; }
+
+         /*!
+          * set cache entry of previous execution to be used by the incremental resolution
+          * @param entry
+          */
+         void setIncrementalCacheEntry(IncrementalCacheEntry* entry) { _incrementalCacheEntry = entry; }
+
+         /*!
+          * get cache entry of previous execution
+          * @return
+          */
+         IncrementalCacheEntry* incrementalCacheEntry() const { return _incrementalCacheEntry; }
+
+         /*!
+          * whether or not to use incremental resolution during stage execution
+          * @return
+          */
+         bool incrementalResolution() const { return _incrementalResolution; }
 
         /*!
          * sets maximum number of rows this pipeline will produce
@@ -157,12 +201,34 @@ namespace tuplex {
          */
         std::shared_ptr<ResultSet> resultSet() const override { return _rs;}
 
-        void setMemoryResult(const std::vector<Partition*>& partitions,
-                             const std::vector<Partition*>& generalCase=std::vector<Partition*>{},
-                             const std::unordered_map<std::string, ExceptionInfo>& parttionToExceptionsMap=std::unordered_map<std::string, ExceptionInfo>(),
-                             const std::vector<std::tuple<size_t, PyObject*>>& interpreterRows=std::vector<std::tuple<size_t, PyObject*>>{},
-                             const std::vector<Partition*>& remainingExceptions=std::vector<Partition*>{},
-                             const std::unordered_map<std::tuple<int64_t, ExceptionCode>, size_t>& ecounts=std::unordered_map<std::tuple<int64_t, ExceptionCode>, size_t>()); // creates local result set?
+        /*!
+         * Cache pipeline execution for merge in order
+         * @param normalPartitions normal rows
+         * @param exceptionPartitions exception rows
+         * @param partitionGroups mapping of normal to exception rows
+         */
+        void setIncrementalResult(const std::vector<Partition*>& normalPartitions,
+                                  const std::vector<Partition*>& exceptionPartitions,
+                                  const std::vector<PartitionGroup>& partitionGroups);
+
+        /*!
+         * Cache pipeline execution for merge out of order
+         * @param exceptionPartitions exception rows
+         * @param generalPartitions general rows
+         * @param fallbackPartitions fallback rows
+         * @param startFileNumber next file number to output rows to
+         */
+        void setIncrementalResult(const std::vector<Partition*>& exceptionPartitions,
+                                  const std::vector<Partition*>& generalPartitions,
+                                  const std::vector<Partition*>& fallbackPartitions,
+                                  size_t startFileNumber);
+
+        void setMemoryResult(const std::vector<Partition*>& normalPartitions=std::vector<Partition*>{},
+                             const std::vector<Partition*>& generalPartitions=std::vector<Partition*>{},
+                             const std::vector<Partition*>& fallbackPartitions=std::vector<Partition*>{},
+                             const std::vector<PartitionGroup>& partitionGroups=std::vector<PartitionGroup>{},
+                             const std::unordered_map<std::tuple<int64_t, ExceptionCode>, size_t>& exceptionCounts=std::unordered_map<std::tuple<int64_t, ExceptionCode>, size_t>()); // creates local result set?
+
         void setFileResult(const std::unordered_map<std::tuple<int64_t, ExceptionCode>, size_t>& ecounts); // creates empty result set with exceptions
 
         void setEmptyResult() {
@@ -173,9 +239,8 @@ namespace tuplex {
                 setMemoryResult(
                         std::vector<Partition*>(),
                         std::vector<Partition*>(),
-                        std::unordered_map<std::string, ExceptionInfo>(),
-                        std::vector<std::tuple<size_t, PyObject*>>(),
                         std::vector<Partition*>(),
+                        std::vector<PartitionGroup>(),
                         ecounts);
         }
 
@@ -443,6 +508,9 @@ namespace tuplex {
         std::vector<Partition*> _inputPartitions; //! memory input partitions for this task.
         size_t                  _inputLimit; //! limit number of input rows (inf per default)
         size_t                  _outputLimit; //! output limit, set e.g. by take, to_csv etc. (inf per default)
+        std::vector<Partition*> _generalPartitions; //! general case input partitions
+        std::vector<Partition*> _fallbackPartitions; //! fallback case input partitions
+        std::vector<PartitionGroup> _partitionGroups; //! groups partitions together for correct row indices
 
         std::shared_ptr<ResultSet> _rs; //! result set
 
@@ -459,7 +527,10 @@ namespace tuplex {
         std::string _pyCode;
         std::string _pyPipelineName;
         std::string _writerFuncName;
+
         bool _updateInputExceptions;
+        bool _incrementalResolution;
+        IncrementalCacheEntry* _incrementalCacheEntry;
 
         std::shared_ptr<ResultSet> emptyResultSet() const;
 
@@ -469,11 +540,6 @@ namespace tuplex {
         // Todo: move this to physicalplan!!!
         //void pushDownOutputLimit(); //! enable optimizations for limited pipeline by restricting input read!
 
-        // unresolved exceptions. Important i.e. when no IO interleave is used...
-        std::vector<Partition*> _inputExceptions;
-        std::unordered_map<std::string, ExceptionInfo> _partitionToExceptionsMap;
-
-
         // for hash output, the key and bucket type
         python::Type _hashOutputKeyType;
         python::Type _hashOutputBucketType;
diff --git a/tuplex/core/include/physical/TransformTask.h b/tuplex/core/include/physical/TransformTask.h
index 2868ba668..3eb8013dd 100644
--- a/tuplex/core/include/physical/TransformTask.h
+++ b/tuplex/core/include/physical/TransformTask.h
@@ -239,11 +239,14 @@ namespace tuplex {
         */
         std::unordered_map<std::tuple<int64_t, ExceptionCode>, size_t> exceptionCounts() const { return _exceptionCounts; }
 
-        ExceptionInfo inputExceptionInfo() { return _inputExceptionInfo; }
-        std::vector<Partition*> inputExceptions() { return _inputExceptions; }
+        std::vector<Partition*> generalPartitions() const { return _generalPartitions; }
+
+        std::vector<Partition*> fallbackPartitions() const { return _fallbackPartitions; }
+
+        void setGeneralPartitions(const std::vector<Partition*>& generalPartitions) { _generalPartitions = generalPartitions; }
+
+        void setFallbackPartitions(const std::vector<Partition*>& fallbackPartitions) { _fallbackPartitions = fallbackPartitions; }
 
-        void setInputExceptionInfo(ExceptionInfo info) { _inputExceptionInfo = info; }
-        void setInputExceptions(const std::vector<Partition*>& inputExceptions) { _inputExceptions = inputExceptions; }
         void setUpdateInputExceptions(bool updateInputExceptions) { _updateInputExceptions = updateInputExceptions; }
 
         double wallTime() const override { return _wallTime; }
@@ -292,8 +295,8 @@ namespace tuplex {
         MemorySink _exceptions;
         Schema _inputSchema;
 
-        ExceptionInfo _inputExceptionInfo;
-        std::vector<Partition*> _inputExceptions;
+        std::vector<Partition*> _generalPartitions;
+        std::vector<Partition*> _fallbackPartitions;
         bool _updateInputExceptions;
 
         // hash table sink
diff --git a/tuplex/core/src/Context.cc b/tuplex/core/src/Context.cc
index e9a30e902..0fa4faa6c 100644
--- a/tuplex/core/src/Context.cc
+++ b/tuplex/core/src/Context.cc
@@ -31,6 +31,9 @@ namespace tuplex {
     Context::Context(const ContextOptions& options) : _datasetIDGenerator(0), _compilePolicy(compilePolicyFromOptions(options)), _id(getNextContextID()) {
         // init metrics
         _lastJobMetrics = std::make_unique<JobMetrics>();
+        // init incremental cache
+        _incrementalCache = std::make_shared<IncrementalCache>();
+
         // make sure this is called without holding the GIL
         if(python::isInterpreterRunning())
             assert(!python::holdsGIL());
@@ -202,7 +205,7 @@ namespace tuplex {
 
     }
 
-    DataSet& Context::fromPartitions(const Schema& schema, const std::vector<Partition*>& partitions, const std::vector<std::string>& columns, const std::vector<std::tuple<size_t, PyObject*>> &badParallelizeObjects, const std::vector<size_t> &numExceptionsInPartition) {
+    DataSet& Context::fromPartitions(const Schema& schema, const std::vector<Partition*>& partitions, const std::vector<Partition*>& fallbackPartitions, const std::vector<PartitionGroup>& partitionGroups, const std::vector<std::string>& columns) {
         auto dataSetID = getNextDataSetID();
         DataSet *dsptr = createDataSet(schema);
 
@@ -214,7 +217,7 @@ namespace tuplex {
         // empty?
         if(partitions.empty()) {
             dsptr->setColumns(columns);
-            addParallelizeNode(dsptr, badParallelizeObjects, numExceptionsInPartition);
+            addParallelizeNode(dsptr, fallbackPartitions, partitionGroups);
             return *dsptr;
         } else {
             size_t numRows = 0;
@@ -230,7 +233,8 @@ namespace tuplex {
 
             // set rows
             dsptr->setColumns(columns);
-            addParallelizeNode(dsptr, badParallelizeObjects, numExceptionsInPartition);
+            addParallelizeNode(dsptr, fallbackPartitions, partitionGroups);
+
 
             // signal check
             if(check_and_forward_signals()) {
@@ -257,6 +261,7 @@ namespace tuplex {
             addParallelizeNode(dsptr);
             return *dsptr;
         } else {
+            std::vector<PartitionGroup> partitionGroups;
             // get row type from first element @TODO: should be inferred from sample, no?
             auto rtype = rows.front().getRowType();
             schema = Schema(Schema::MemoryLayout::ROW, rtype);
@@ -303,6 +308,7 @@ namespace tuplex {
                     numWrittenRowsInPartition++;
                     capacityRemaining -= bytesWritten;
                 } else {
+                    partitionGroups.push_back(PartitionGroup(1, dsptr->getPartitions().size()));
                     // partition is full, request new one.
                     // create new partition...
                     partition->unlock();
@@ -319,6 +325,7 @@ namespace tuplex {
                     base_ptr = (uint8_t*)partition->lock();
                 }
             }
+            partitionGroups.push_back(PartitionGroup(1, dsptr->getPartitions().size()));
 
             partition->unlock();
             partition->setNumRows(numWrittenRowsInPartition);
@@ -330,7 +337,7 @@ namespace tuplex {
 
             // set rows
             dsptr->setColumns(columnNames);
-            addParallelizeNode(dsptr);
+            addParallelizeNode(dsptr, std::vector<Partition*>{}, partitionGroups);
 
             // signal check
             if(check_and_forward_signals()) {
@@ -349,94 +356,7 @@ namespace tuplex {
         return op;
     }
 
-    void Context::serializePythonObjects(const std::vector<std::tuple<size_t, PyObject*>>& pythonObjects,
-                                         const std::vector<size_t> &numExceptionsInPartition,
-                                         const std::vector<Partition*> &normalPartitions,
-                                         const int64_t opID,
-                                         std::vector<Partition*> &serializedPythonObjects,
-                                         std::unordered_map<std::string, ExceptionInfo> &pythonObjectsMap) {
-        if (pythonObjects.empty()) {
-            for (const auto &p : normalPartitions) {
-                pythonObjectsMap[uuidToString(p->uuid())] = ExceptionInfo();
-            }
-            return;
-        }
-
-        Schema schema(Schema::MemoryLayout::ROW, python::Type::makeTupleType({python::Type::STRING}));
-        const size_t allocMinSize = 1024 * 64; // 64KB
-
-        Partition* partition = requestNewPartition(schema, -1, allocMinSize);
-        int64_t* rawPtr = (int64_t*)partition->lockWriteRaw();
-        *rawPtr = 0;
-        uint8_t* ptr = (uint8_t*)(rawPtr + 1);
-        size_t numBytesSerialized = 0;
-
-        auto prevExpByteOffset = 0;
-        auto prevExpRowOffset = 0;
-        auto prevExpInd = 0;
-        auto curNormalPartitionInd = 0;
-        auto numNewExps = 0;
-
-        // Serialize each exception to a partition using the following schema:
-        // (1) is the field containing rowNum
-        // (2) is the field containing ecCode
-        // (3) is the field containing opID
-        // (4) is the field containing pickledObjectSize
-        // (5) is the field containing pickledObject
-        for(auto &exception : pythonObjects) {
-            auto rowNum = std::get<0>(exception);
-            auto pyObj = std::get<1>(exception);
-            auto ecCode = ecToI64(ExceptionCode::PYTHON_PARALLELIZE);
-            auto pickledObject = python::pickleObject(python::getMainModule(), pyObj);
-            auto pickledObjectSize = pickledObject.size();
-            size_t requiredBytes = sizeof(int64_t) * 4 + pickledObjectSize;
-
-            if (partition->capacity() < numBytesSerialized + requiredBytes) {
-                partition->unlockWrite();
-                serializedPythonObjects.push_back(partition);
-                partition = requestNewPartition(schema, -1, allocMinSize);
-                rawPtr = (int64_t *) partition->lockWriteRaw();
-                *rawPtr = 0;
-                ptr = (uint8_t * )(rawPtr + 1);
-                numBytesSerialized = 0;
-            }
-
-            // Check if we have reached the number of exceptions in the input partition
-            // Record the current exception index and offset and iterate to next one
-            auto curNormalPartition = normalPartitions[curNormalPartitionInd];
-            auto normalUUID = uuidToString(curNormalPartition->uuid());
-            auto numExps = numExceptionsInPartition[curNormalPartitionInd];
-            if (numNewExps >= numExps) {
-                pythonObjectsMap[normalUUID] = ExceptionInfo(numExps, prevExpInd, prevExpRowOffset, prevExpByteOffset);
-                prevExpRowOffset = *rawPtr;
-                prevExpByteOffset = numBytesSerialized;
-                prevExpInd = serializedPythonObjects.size();
-                numNewExps = 0;
-                curNormalPartitionInd++;
-            }
-
-            *((int64_t*)(ptr)) = rowNum; ptr += sizeof(int64_t);
-            *((int64_t*)(ptr)) = ecCode; ptr += sizeof(int64_t);
-            *((int64_t*)(ptr)) = opID; ptr += sizeof(int64_t);
-            *((int64_t*)(ptr)) = pickledObjectSize; ptr += sizeof(int64_t);
-            memcpy(ptr, pickledObject.c_str(), pickledObjectSize); ptr += pickledObjectSize;
-
-            *rawPtr = *rawPtr + 1;
-            numBytesSerialized += requiredBytes;
-            numNewExps += 1;
-        }
-
-        // Record mapping for normal last partition
-        auto curNormalPartition = normalPartitions[curNormalPartitionInd];
-        auto normalUUID = uuidToString(curNormalPartition->uuid());
-        auto numExceptions = numExceptionsInPartition[curNormalPartitionInd];
-        pythonObjectsMap[normalUUID] = ExceptionInfo(numExceptions, prevExpInd, prevExpRowOffset, prevExpByteOffset);
-
-        partition->unlockWrite();
-        serializedPythonObjects.push_back(partition);
-    }
-
-    void Context::addParallelizeNode(DataSet *ds, const std::vector<std::tuple<size_t, PyObject*>> &badParallelizeObjects, const std::vector<size_t> &numExceptionsInPartition) {
+    void Context::addParallelizeNode(DataSet *ds, const std::vector<Partition*>& fallbackPartitions, const std::vector<PartitionGroup>& partitionGroups) {
         assert(ds);
 
         // @TODO: make empty list as special case work. Also true for empty files.
@@ -446,11 +366,17 @@ namespace tuplex {
         assert(ds->_schema.getRowType() != python::Type::UNKNOWN);
 
         auto op = new ParallelizeOperator(ds->_schema, ds->getPartitions(), ds->columns());
-        std::vector<Partition*> serializedPythonObjects;
-        std::unordered_map<std::string, ExceptionInfo> pythonObjectsMap;
-        serializePythonObjects(badParallelizeObjects, numExceptionsInPartition, ds->getPartitions(), op->getID(), serializedPythonObjects, pythonObjectsMap);
-        op->setPythonObjects(serializedPythonObjects);
-        op->setInputPartitionToPythonObjectsMap(pythonObjectsMap);
+        op->setFallbackPartitions(fallbackPartitions);
+        if (partitionGroups.empty()) {
+            std::vector<PartitionGroup> defaultPartitionGroups;
+            for (int i = 0; i < ds->getPartitions().size(); ++i) {
+                defaultPartitionGroups.push_back(PartitionGroup(1, i));
+            }
+            op->setPartitionGroups(defaultPartitionGroups);
+        } else {
+            op->setPartitionGroups(partitionGroups);
+        }
+
 
         // add new (root) node
         ds->_operator = addOperator(op);
diff --git a/tuplex/core/src/ContextOptions.cc b/tuplex/core/src/ContextOptions.cc
index 49b498969..7879376cc 100644
--- a/tuplex/core/src/ContextOptions.cc
+++ b/tuplex/core/src/ContextOptions.cc
@@ -232,6 +232,7 @@ namespace tuplex {
                      {"tuplex.optimizer.operatorReordering", "false"},
                      {"tuplex.optimizer.sharedObjectPropagation", "true"},
                      {"tuplex.optimizer.mergeExceptionsInOrder", "true"},
+                     {"tuplex.optimizer.incrementalResolution", "false"},
                      {"tuplex.interleaveIO", "true"},
                      {"tuplex.aws.scratchDir", ""},
                      {"tuplex.aws.requestTimeout", "600"},
@@ -286,6 +287,7 @@ namespace tuplex {
                      {"tuplex.optimizer.operatorReordering", "false"},
                      {"tuplex.optimizer.sharedObjectPropagation", "true"},
                      {"tuplex.optimizer.mergeExceptionsInOrder", "false"},
+                     {"tuplex.optimizer.incrementalResolution", "false"},
                      {"tuplex.interleaveIO", "true"},
                      {"tuplex.aws.scratchDir", ""},
                      {"tuplex.aws.requestTimeout", "600"},
diff --git a/tuplex/core/src/DataSet.cc b/tuplex/core/src/DataSet.cc
index a53a14094..a33925d7f 100644
--- a/tuplex/core/src/DataSet.cc
+++ b/tuplex/core/src/DataSet.cc
@@ -869,9 +869,10 @@ namespace tuplex {
               // what data source operators are there?
               if(_operator->type() == LogicalOperatorType::FILEINPUT)
                   return static_cast<FileInputOperator*>(_operator)->isEmpty();
-              else if(_operator->type() == LogicalOperatorType::PARALLELIZE)
-                  return static_cast<ParallelizeOperator*>(_operator)->getPartitions().empty();
-              else
+              else if(_operator->type() == LogicalOperatorType::PARALLELIZE) {
+                  auto pop = static_cast<ParallelizeOperator*>(_operator); assert(pop);
+                  return pop->getNormalPartitions().empty() && pop->getFallbackPartitions().empty();
+              } else
                   throw std::runtime_error("unknown data source operator detected");
             } else
                 return false;
diff --git a/tuplex/core/src/Executor.cc b/tuplex/core/src/Executor.cc
index 845b78e6a..7aab9c5fa 100644
--- a/tuplex/core/src/Executor.cc
+++ b/tuplex/core/src/Executor.cc
@@ -325,7 +325,7 @@ namespace tuplex {
                 // remove from list
                 _storedPartitions.remove(partition);
             } else {
-
+                return;
                 error("INTERNAL ERROR: Could not find partition " + uuidToString(partition->uuid())
                                + " belonging to operator " + std::to_string(partition->getDataSetID()) + " and type " + partition->schema().getRowType().desc() + "");
                 std::abort();
diff --git a/tuplex/core/src/IncrementalCache.cc b/tuplex/core/src/IncrementalCache.cc
new file mode 100644
index 000000000..102aaae96
--- /dev/null
+++ b/tuplex/core/src/IncrementalCache.cc
@@ -0,0 +1,72 @@
+//--------------------------------------------------------------------------------------------------------------------//
+//                                                                                                                    //
+//                                      Tuplex: Blazing Fast Python Data Science                                      //
+//                                                                                                                    //
+//                                                                                                                    //
+//  (c) 2017 - 2021, Tuplex team                                                                                      //
+//  Created by Leonhard Spiegelberg first on 1/1/2021                                                                 //
+//  License: Apache 2.0                                                                                               //
+//--------------------------------------------------------------------------------------------------------------------//
+
+#include <IncrementalCache.h>
+
+#include <utility>
+
+namespace tuplex {
+
+    IncrementalCacheEntry::IncrementalCacheEntry(
+            LogicalOperator* pipeline,
+            const std::vector<Partition*>& exceptionPartitions,
+            const std::vector<Partition*>& generalPartitions,
+            const std::vector<Partition*>& fallbackPartitions,
+            size_t startFileNumber) {
+        _pipeline = pipeline->clone();
+        _exceptionPartitions = exceptionPartitions;
+        _generalPartitions = generalPartitions;
+        _fallbackPartitions = fallbackPartitions;
+        _startFileNumber = startFileNumber;
+    }
+
+    IncrementalCacheEntry::IncrementalCacheEntry(LogicalOperator *pipeline,
+        const std::vector<Partition*>& normalPartitions,
+        const std::vector<Partition*>& exceptionPartitions,
+        const std::vector<PartitionGroup>& partitionGroups) {
+        _pipeline = pipeline->clone();
+        _normalPartitions = normalPartitions;
+        for (auto &p : _normalPartitions)
+            p->makeImmortal();
+        _exceptionPartitions = exceptionPartitions;
+        _partitionGroups = partitionGroups;
+    }
+
+    void IncrementalCache::addEntry(const std::string& key, IncrementalCacheEntry* entry) {
+        auto elt = _cache.find(key);
+        if (elt != _cache.end())
+            _cache.erase(key);
+
+        _cache[key] = entry;
+    }
+
+    IncrementalCacheEntry::~IncrementalCacheEntry() {
+        delete _pipeline;
+    }
+
+    std::string IncrementalCache::newKey(LogicalOperator* pipeline) {
+        assert(pipeline);
+        std::stringstream ss;
+
+        std::queue<LogicalOperator*> q;
+        q.push(pipeline);
+        while (!q.empty()) {
+            auto cur = q.front(); q.pop();
+            if (cur->type() != LogicalOperatorType::RESOLVE && cur->type() != LogicalOperatorType::IGNORE) {
+                ss << std::to_string(static_cast<int>(cur->type()));
+            }
+            for (const auto& p : cur->parents()) {
+                q.push(p);
+            }
+        }
+
+        return ss.str();
+    }
+}
\ No newline at end of file
diff --git a/tuplex/core/src/Partition.cc b/tuplex/core/src/Partition.cc
index a16d1c2eb..c554788cd 100644
--- a/tuplex/core/src/Partition.cc
+++ b/tuplex/core/src/Partition.cc
@@ -55,7 +55,7 @@ namespace tuplex {
 
     uint8_t* Partition::lockWriteRaw() {
         // must be the thread who allocated this
-        assert(_owner->getThreadID() == std::this_thread::get_id());
+//        assert(_owner->getThreadID() == std::this_thread::get_id());
 
         TRACE_LOCK("partition " + uuidToString(_uuid));
         std::this_thread::yield();
diff --git a/tuplex/core/src/ee/local/LocalBackend.cc b/tuplex/core/src/ee/local/LocalBackend.cc
index bed96ec5a..da131be3f 100644
--- a/tuplex/core/src/ee/local/LocalBackend.cc
+++ b/tuplex/core/src/ee/local/LocalBackend.cc
@@ -147,8 +147,13 @@ namespace tuplex {
 
         // check what type of stage it is
         auto tstage = dynamic_cast<TransformStage*>(stage);
-        if(tstage)
-            executeTransformStage(tstage);
+        if(tstage) {
+            if (tstage->incrementalResolution()) {
+                executeIncrementalStage(tstage);
+            } else {
+                executeTransformStage(tstage);
+            }
+        }
         else if(dynamic_cast<HashJoinStage*>(stage)) {
             executeHashJoinStage(dynamic_cast<HashJoinStage*>(stage));
         } else if(dynamic_cast<AggregateStage*>(stage)) {
@@ -266,8 +271,8 @@ namespace tuplex {
         Timer timer;
         // BUILD phase
         // TODO: codegen build phase. I.e. a function should be code generated which hashes a partition to a hashmap.
-        while(rsRight->hasNextPartition()) {
-            Partition* p = rsRight->getNextPartition();
+        while(rsRight->hasNextNormalPartition()) {
+            Partition* p = rsRight->getNextNormalPartition();
 
             // lock partition!
             auto ptr = p->lockRaw();
@@ -435,7 +440,7 @@ namespace tuplex {
         auto combinedType = hstage->combinedType();
         Schema combinedSchema(Schema::MemoryLayout::ROW, combinedType);
         std::vector<IExecutorTask*> probeTasks;
-        for(auto partition : rsLeft->partitions()) {
+        for(auto partition : rsLeft->normalPartitions()) {
             probeTasks.emplace_back(new HashProbeTask(partition, hmap, probeFunction,
                                                       hstage->combinedType(),
                                                       hstage->outputDataSetID(),
@@ -648,12 +653,35 @@ namespace tuplex {
             // --> issue for each memory partition a transform task and put it into local workqueue
             assert(tstage->inputMode() == EndPointMode::MEMORY);
 
-
-            // restrict after input limit
             size_t numInputRows = 0;
+
             auto inputPartitions = tstage->inputPartitions();
-            for(int i = 0; i < inputPartitions.size(); ++i) {
-                auto partition = inputPartitions[i];
+            auto generalPartitions = tstage->generalPartitions();
+            auto fallbackPartitions = tstage->fallbackPartitions();
+            auto partitionGroups = tstage->partitionGroups();
+            for (const auto &group : partitionGroups) {
+                std::vector<Partition*> taskNormalPartitions;
+                bool invalidateAfterUse = false;
+                for (int i = group.normalPartitionStartInd; i < group.normalPartitionStartInd + group.numNormalPartitions; ++i) {
+                    auto p = inputPartitions[i];
+                    numInputRows += p->getNumRows();
+                    if (!p->isImmortal())
+                        invalidateAfterUse = true;
+                    taskNormalPartitions.push_back(p);
+                }
+                std::vector<Partition*> taskGeneralPartitions;
+                for (int i = group.generalPartitionStartInd; i < group.generalPartitionStartInd + group.numGeneralPartitions; ++i) {
+                    auto p = generalPartitions[i];
+                    numInputRows += p->getNumRows();
+                    taskGeneralPartitions.push_back(p);
+                }
+                std::vector<Partition*> taskFallbackPartitions;
+                for (int i = group.fallbackPartitionStartInd; i < group.fallbackPartitionStartInd + group.numFallbackPartitions; ++i) {
+                    auto p = fallbackPartitions[i];
+                    numInputRows += p->getNumRows();
+                    taskFallbackPartitions.push_back(p);
+                }
+
                 auto task = new TransformTask();
                 if (tstage->updateInputExceptions()) {
                     task->setFunctor(syms->functorWithExp);
@@ -661,7 +689,9 @@ namespace tuplex {
                     task->setFunctor(syms->functor);
                 }
                 task->setUpdateInputExceptions(tstage->updateInputExceptions());
-                task->setInputMemorySource(partition, !partition->isImmortal());
+                task->setInputMemorySources(taskNormalPartitions, invalidateAfterUse);
+                task->setGeneralPartitions(taskGeneralPartitions);
+                task->setFallbackPartitions(taskFallbackPartitions);
                 // hash table or memory output?
                 if(tstage->outputMode() == EndPointMode::HASHTABLE) {
                     if (tstage->hashtableKeyByteWidth() == 8)
@@ -676,16 +706,10 @@ namespace tuplex {
                            tstage->outputMode() == EndPointMode::MEMORY);
                     task->sinkOutputToMemory(outputSchema, tstage->outputDataSetID(), tstage->context().id());
                 }
-
-                auto partitionId = uuidToString(partition->uuid());
-                auto info = tstage->partitionToExceptionsMap()[partitionId];
-                task->setInputExceptionInfo(info);
-                task->setInputExceptions(tstage->inputExceptions());
-                task->sinkExceptionsToMemory(inputSchema);
+                task->sinkExceptionsToMemory(tstage->normalCaseInputSchema());
                 task->setStageID(tstage->getID());
                 task->setOutputLimit(tstage->outputLimit());
                 tasks.emplace_back(std::move(task));
-                numInputRows += partition->getNumRows();
 
                 // input limit exhausted? break!
                 if(numInputRows >= tstage->inputLimit())
@@ -750,90 +774,296 @@ namespace tuplex {
         return pip_object;
     }
 
-    std::vector<std::tuple<size_t, PyObject*>> inputExceptionsToPythonObjects(const std::vector<Partition *>& partitions, Schema schema) {
-        using namespace tuplex;
+    std::vector<IExecutorTask*> LocalBackend::createIncrementalTasks(TransformStage* tstage,  const ContextOptions& options, const std::shared_ptr<TransformStage::JITSymbols>& syms) {
+        using namespace std;
+        vector<IExecutorTask*> tasks;
+        assert(tstage);
+        assert(syms);
 
-        std::vector<std::tuple<size_t, PyObject*>> pyObjects;
-        for (const auto &partition : partitions) {
-            auto numRows = partition->getNumRows();
-            const uint8_t* ptr = partition->lock();
+        auto cacheEntry = tstage->incrementalCacheEntry();
+        assert(cacheEntry);
+        auto cachedExceptionPartitions = cacheEntry->exceptionPartitions();
+        auto cachedGeneralPartitions = cacheEntry->generalPartitions();
+        auto cachedFallbackPartitions = cacheEntry->fallbackPartitions();
+        auto cachedPartitionGroups = cacheEntry->partitionGroups();
+        auto cachedNormalPartitions = cacheEntry->normalPartitions();
 
-            python::lockGIL();
-            for (int i = 0; i < numRows; ++i) {
-                int64_t rowNum = *((int64_t*)ptr);
-                ptr += sizeof(int64_t);
-                int64_t ecCode = *((int64_t*)ptr);
-                ptr += 2 * sizeof(int64_t);
-                int64_t objSize = *((int64_t*)ptr);
-                ptr += sizeof(int64_t);
-
-                PyObject* pyObj = nullptr;
-                if (ecCode == ecToI64(ExceptionCode::PYTHON_PARALLELIZE)) {
-                    pyObj = python::deserializePickledObject(python::getMainModule(), (char *) ptr, objSize);
-                } else {
-                    pyObj = python::rowToPython(Row::fromMemory(schema, ptr, objSize), true);
-                }
+        for (auto &p : cachedNormalPartitions)
+            p->makeMortal();
+        for (auto &p : cachedExceptionPartitions)
+            p->makeMortal();
 
-                ptr += objSize;
-                pyObjects.emplace_back(rowNum, pyObj);
-            }
-            python::unlockGIL();
+        auto stageID = tstage->getID();
+        auto contextID = tstage->context().id();
+        auto operatorIDsWithResolvers = tstage->operatorIDsWithResolvers();
+        auto exceptionInputSchema = tstage->inputSchema();
+        auto outputSchema = tstage->outputSchema();
+        auto normalCaseOutputSchema = tstage->normalCaseOutputSchema();
+        auto mergeExceptionsInOrder = options.OPT_MERGE_EXCEPTIONS_INORDER();
+        auto autoUpcastNumbers = options.AUTO_UPCAST_NUMBERS();
+        auto outputFormat = tstage->outputFormat();
+        auto csvOutputDelimiter = tstage->csvOutputDelimiter();
+        auto csvOutputQuotechar = tstage->csvOutputQuotechar();
+        auto resolveFunctor = options.RESOLVE_WITH_INTERPRETER_ONLY() ? nullptr : syms->resolveFunctor;
 
-            partition->unlock();
-            partition->invalidate();
+
+        // compile & prep python pipeline for this stage
+        Timer timer;
+        auto pipObject = preparePythonPipeline(tstage->purePythonCode(), tstage->pythonPipelineName());
+        if(!pipObject) {
+            logger().error("python pipeline invalid, details: \n" + core::withLineNumbers(tstage->purePythonCode()));
+            return tasks;
         }
+        logger().info("compiled pure python pipeline in " + std::to_string(timer.time()) + "s");
+        timer.reset();
+
+        auto order = 0;
+        if (mergeExceptionsInOrder) {
+            for (const auto &partitionGroup : cachedPartitionGroups) {
+                std::vector<Partition*> taskNormalPartitions;
+                for (int i = partitionGroup.normalPartitionStartInd; i < partitionGroup.normalPartitionStartInd + partitionGroup.numNormalPartitions; ++i)
+                    taskNormalPartitions.push_back(cachedNormalPartitions[i]);
+                std::vector<Partition*> taskExceptionPartitions;
+                for (int i = partitionGroup.exceptionPartitionStartInd; i < partitionGroup.exceptionPartitionStartInd + partitionGroup.numExceptionPartitions; ++i)
+                    taskExceptionPartitions.push_back(cachedExceptionPartitions[i]);
+
+
+                auto rtask = new ResolveTask(
+                        stageID,
+                        contextID,
+                        taskNormalPartitions,
+                        taskExceptionPartitions,
+                        vector<Partition*>{},
+                        vector<Partition*>{},
+                        operatorIDsWithResolvers,
+                        exceptionInputSchema,
+                        outputSchema,
+                        normalCaseOutputSchema,
+                        outputSchema,
+                        mergeExceptionsInOrder,
+                        autoUpcastNumbers,
+                        outputFormat,
+                        csvOutputDelimiter,
+                        csvOutputQuotechar,
+                        resolveFunctor,
+                        pipObject,
+                        true);
+
+                rtask->setOrder(order);
+                order++;
+                tasks.push_back(rtask);
+            }
+        } else {
+            for (const auto &p : cachedExceptionPartitions) {
+                tasks.push_back(new ResolveTask(
+                        stageID,
+                        contextID,
+                        vector<Partition*>{},
+                        vector<Partition*>{p},
+                        vector<Partition*>{},
+                        vector<Partition*>{},
+                        operatorIDsWithResolvers,
+                        exceptionInputSchema,
+                        outputSchema,
+                        normalCaseOutputSchema,
+                        outputSchema,
+                        mergeExceptionsInOrder,
+                        autoUpcastNumbers,
+                        outputFormat,
+                        csvOutputDelimiter,
+                        csvOutputQuotechar,
+                        resolveFunctor,
+                        pipObject,
+                        true));
+            }
+
+            for (const auto &p : cachedGeneralPartitions) {
+                tasks.push_back(new ResolveTask(
+                        stageID,
+                        contextID,
+                        vector<Partition*>{},
+                        vector<Partition*>{},
+                        vector<Partition*>{p},
+                        vector<Partition*>{},
+                        operatorIDsWithResolvers,
+                        exceptionInputSchema,
+                        outputSchema,
+                        normalCaseOutputSchema,
+                        outputSchema,
+                        mergeExceptionsInOrder,
+                        autoUpcastNumbers,
+                        outputFormat,
+                        csvOutputDelimiter,
+                        csvOutputQuotechar,
+                        resolveFunctor,
+                        pipObject,
+                        true));
+            }
 
-        return pyObjects;
+            for (const auto &p : cachedFallbackPartitions) {
+                tasks.push_back(new ResolveTask(
+                        stageID,
+                        contextID,
+                        vector<Partition*>{},
+                        vector<Partition*>{},
+                        vector<Partition*>{},
+                        vector<Partition*>{p},
+                        operatorIDsWithResolvers,
+                        exceptionInputSchema,
+                        outputSchema,
+                        normalCaseOutputSchema,
+                        outputSchema,
+                        mergeExceptionsInOrder,
+                        autoUpcastNumbers,
+                        outputFormat,
+                        csvOutputDelimiter,
+                        csvOutputQuotechar,
+                        resolveFunctor,
+                        pipObject,
+                        true));
+            }
+        }
+        return tasks;
     }
 
-    void setExceptionInfo(const std::vector<Partition*> &normalOutput, const std::vector<Partition*> &exceptions, std::unordered_map<std::string, ExceptionInfo> &partitionToExceptionsMap) {
-        if (exceptions.empty()) {
-            for (const auto &p : normalOutput) {
-                partitionToExceptionsMap[uuidToString(p->uuid())] = ExceptionInfo();
+    void LocalBackend::executeIncrementalStage(TransformStage *tstage) {
+        using namespace std;
+
+        Timer stageTimer;
+        Timer timer;
+
+        Partition::resetStatistics();
+
+        assert(tstage);
+        auto cacheEntry = tstage->incrementalCacheEntry();
+        assert(cacheEntry);
+        auto cachedGeneralPartitions = cacheEntry->generalPartitions();
+        auto cachedFallbackPartitions = cacheEntry->fallbackPartitions();
+        auto cachedExceptionPartitions = cacheEntry->exceptionPartitions();
+
+        // If pipeline does not contain code, or no new exceptions to resolve skip stage and store new cache entry
+        if (cachedExceptionPartitions.empty() && cachedGeneralPartitions.empty() && cachedFallbackPartitions.empty()) {
+            switch (tstage->outputMode()) {
+                case EndPointMode::FILE: {
+                    tstage->setFileResult(std::unordered_map<std::tuple<int64_t, ExceptionCode>, size_t>());
+                    break;
+                }
+                default:
+                    throw std::runtime_error("output mode not yet supported for incremental resolution");
             }
+            Logger::instance().defaultLogger().info("[Transform Stage] skipped stage " + std::to_string(tstage->number()) + " because there is nothing todo here.");
             return;
         }
 
-        auto expRowCount = 0;
-        auto expInd = 0;
-        auto expRowOff = 0;
-        auto expByteOff = 0;
-
-        auto expNumRows = exceptions[0]->getNumRows();
-        auto expPtr = exceptions[0]->lockWrite();
-        auto rowsProcessed = 0;
-        for (const auto &p : normalOutput) {
-            auto pNumRows = p->getNumRows();
-            auto curNumExps = 0;
-            auto curExpOff = expRowOff;
-            auto curExpInd = expInd;
-            auto curExpByteOff = expByteOff;
-
-            while (*((int64_t *) expPtr) - rowsProcessed <= pNumRows + curNumExps && expRowCount < expNumRows) {
-                *((int64_t *) expPtr) -= rowsProcessed;
-                curNumExps++;
-                expRowOff++;
-                auto eSize = ((int64_t *)expPtr)[3] + 4*sizeof(int64_t);
-                expPtr += eSize;
-                expByteOff += eSize;
-                expRowCount++;
-
-                if (expRowOff == expNumRows && expInd < exceptions.size() - 1) {
-                    exceptions[expInd]->unlockWrite();
-                    expInd++;
-                    expPtr = exceptions[expInd]->lockWrite();
-                    expNumRows = exceptions[expInd]->getNumRows();
-                    expRowOff = 0;
-                    expByteOff = 0;
-                    expRowCount = 0;
+        // Compile the pipeline
+        LLVMOptimizer optimizer;
+        auto syms = tstage->compile(*_compiler, _options.USE_LLVM_OPTIMIZER() ? &optimizer : nullptr, false);
+        bool combineOutputHashmaps = syms->aggInitFunctor && syms->aggCombineFunctor && syms->aggAggregateFunctor;
+        JobMetrics& metrics = tstage->PhysicalStage::plan()->getContext().metrics();
+        double total_compilation_time = metrics.getTotalCompilationTime() + timer.time();
+        metrics.setTotalCompilationTime(total_compilation_time);
+        {
+            std::stringstream ss;
+            ss<<"[Transform Stage] Stage "<<tstage->number()<<" compiled to x86 in "<<timer.time()<<"s";
+            Logger::instance().defaultLogger().info(ss.str());
+        }
+
+        // Process tasks
+        timer.reset();
+
+        auto tasks = createIncrementalTasks(tstage, _options, syms);
+        auto completedTasks = performTasks(tasks);
+
+        size_t numInputRows = 0;
+        size_t numOutputRows = 0;
+        double totalWallTime = 0.0;
+        for(auto task : completedTasks) {
+            numInputRows += task->getNumInputRows();
+            numOutputRows += task->getNumOutputRows();
+            totalWallTime += task->wallTime();
+        }
+        {
+            std::stringstream ss;
+            double time_per_slow_path_row_in_ms = totalWallTime / numInputRows * 1000.0;
+            ss<<"[Transform Stage] Stage "<<tstage->number()<<" total wall clock time: "
+              <<totalWallTime<<"s, "<<pluralize(numInputRows, "input row")
+              <<", time to process 1 row via fast path: "<<time_per_slow_path_row_in_ms<<"ms";
+            Logger::instance().defaultLogger().info(ss.str());
+
+            metrics.setSlowPathRowCount(tstage->number(), numInputRows, numOutputRows);
+            // fast path
+            metrics.setFastPathTimes(tstage->number(), 0, 0, 0);
+            metrics.setSlowPathTimes(tstage->number(), totalWallTime, timer.time(), time_per_slow_path_row_in_ms * 1000000.0);
+        }
+
+        sortTasks(completedTasks);
+
+        // fetch partitions & ecounts
+        vector<Partition*> normalPartitions;
+        vector<Partition*> generalPartitions;
+        vector<Partition*> fallbackPartitions;
+        vector<Partition*> exceptionPartitions;
+        vector<PartitionGroup> partitionGroups;
+        unordered_map<tuple<int64_t, ExceptionCode>, size_t> exceptionCounts;
+
+        for (const auto& task : completedTasks) {
+            auto taskNormalPartitions = getNormalPartitions(task);
+            auto taskGeneralPartitions = getGeneralPartitions(task);
+            auto taskFallbackPartitions = getFallbackPartitions(task);
+            auto taskExceptionPartitions = getExceptionPartitions(task);
+            auto taskExceptionCounts = getExceptionCounts(task);
+
+            // update exception counts
+            exceptionCounts = merge_ecounts(exceptionCounts, taskExceptionCounts);
+
+            // debug trace issues
+            using namespace std;
+            std::string task_name = "unknown";
+            if(task->type() == TaskType::UDFTRAFOTASK)
+                task_name = "udf trafo task";
+            if(task->type() == TaskType::RESOLVE)
+                task_name = "resolve";
+
+            partitionGroups.push_back(PartitionGroup(
+                    taskNormalPartitions.size(), normalPartitions.size(),
+                    taskGeneralPartitions.size(), generalPartitions.size(),
+                    taskFallbackPartitions.size(), fallbackPartitions.size(),
+                    taskExceptionPartitions.size(), exceptionPartitions.size()));
+            std::copy(taskNormalPartitions.begin(), taskNormalPartitions.end(), std::back_inserter(normalPartitions));
+            std::copy(taskGeneralPartitions.begin(), taskGeneralPartitions.end(), std::back_inserter(generalPartitions));
+            std::copy(taskFallbackPartitions.begin(), taskFallbackPartitions.end(), std::back_inserter(fallbackPartitions));
+            std::copy(taskExceptionPartitions.begin(), taskExceptionPartitions.end(), std::back_inserter(exceptionPartitions));
+        }
+
+        switch (tstage->outputMode()) {
+            case EndPointMode::FILE: {
+                if (_options.OPT_MERGE_EXCEPTIONS_INORDER()) {
+                    tstage->setIncrementalResult(normalPartitions, exceptionPartitions, partitionGroups);
+                    if (stringToBool(tstage->outputOptions()["commit"])) {
+                        timer.reset();
+                        writeOutput(tstage, completedTasks);
+                        metrics.setWriteOutputTimes(tstage->number(), timer.time());
+                    } else {
+                        tstage->setFileResult(exceptionCounts);
+                    }
+                } else {
+                    timer.reset();
+                    auto partNo = writeOutput(tstage, completedTasks, cacheEntry->startFileNumber());
+                    metrics.setWriteOutputTimes(tstage->number(), timer.time());
+                    tstage->setIncrementalResult(exceptionPartitions, generalPartitions, fallbackPartitions,
+                                                 partNo);
                 }
+                break;
             }
-
-            rowsProcessed += curNumExps + pNumRows;
-            partitionToExceptionsMap[uuidToString(p->uuid())] = ExceptionInfo(curNumExps, curExpInd, curExpOff, curExpByteOff);
+            default:
+                throw std::runtime_error("output mode not yet supported for incremental resolution");
         }
 
-        exceptions[expInd]->unlockWrite();
+        // call release func for stage globals
+        if(syms->releaseStageFunctor() != 0)
+            throw std::runtime_error("releaseStage() failed for stage " + std::to_string(tstage->number()));
+
+        freeTasks(completedTasks);
     }
 
     void LocalBackend::executeTransformStage(tuplex::TransformStage *tstage) {
@@ -855,9 +1085,8 @@ namespace tuplex {
 
         // special case: skip stage, i.e. empty code and mem2mem
         if(tstage->code().empty() &&  !tstage->fileInputMode() && !tstage->fileOutputMode()) {
-            auto pyObjects = inputExceptionsToPythonObjects(tstage->inputExceptions(), tstage->normalCaseInputSchema());
-            tstage->setMemoryResult(tstage->inputPartitions(), std::vector<Partition*>{}, std::unordered_map<std::string, ExceptionInfo>(), pyObjects);
-            pyObjects.clear();
+            tstage->setMemoryResult(tstage->inputPartitions(), tstage->generalPartitions(), tstage->fallbackPartitions(),
+                                    tstage->partitionGroups());
             // skip stage
             Logger::instance().defaultLogger().info("[Transform Stage] skipped stage " + std::to_string(tstage->number()) + " because there is nothing todo here.");
             return;
@@ -949,9 +1178,11 @@ namespace tuplex {
 
         // calc number of input rows and total wall clock time
         size_t numInputRows = 0;
+        size_t numOutputRows = 0;
         double totalWallTime = 0.0;
         for(auto task : completedTasks) {
             numInputRows += task->getNumInputRows();
+            numOutputRows += task->getNumOutputRows();
             totalWallTime += task->wallTime();
         }
 
@@ -969,6 +1200,7 @@ namespace tuplex {
               <<", time to process 1 row via fast path: "<<time_per_fast_path_row_in_ms<<"ms";
             Logger::instance().defaultLogger().info(ss.str());
 
+            metrics.setFastPathRowCount(tstage->number(), numInputRows, numOutputRows);
             // fast path
             metrics.setFastPathTimes(tstage->number(), totalWallTime, timer.time(), time_per_fast_path_row_in_ms * 1000000.0);
         }
@@ -985,7 +1217,7 @@ namespace tuplex {
         bool executeSlowPath = true;
         //TODO: implement pure python resolution here...
         // exceptions found or slowpath data given?
-        if(totalECountsBeforeResolution > 0 || !tstage->inputExceptions().empty()) {
+        if(totalECountsBeforeResolution > 0 || !tstage->generalPartitions().empty() || !tstage->fallbackPartitions().empty()) {
             stringstream ss;
             // log out what exists in a table
             ss<<"Exception details: "<<endl;
@@ -1009,11 +1241,19 @@ namespace tuplex {
                 }
             }
 
-            if(!tstage->inputExceptions().empty()) {
+            if(!tstage->generalPartitions().empty()) {
+                size_t numExceptions = 0;
+                for (auto &p : tstage->generalPartitions())
+                    numExceptions += p->getNumRows();
+                lines.push_back(Row("(cache)", exceptionCodeToPythonClass(ExceptionCode::NORMALCASEVIOLATION), (int64_t)numExceptions));
+                totalECountsBeforeResolution += numExceptions;
+            }
+
+            if(!tstage->fallbackPartitions().empty()) {
                 size_t numExceptions = 0;
-                for (auto &p : tstage->inputExceptions())
+                for (auto &p : tstage->fallbackPartitions())
                     numExceptions += p->getNumRows();
-                lines.push_back(Row("(input)", exceptionCodeToPythonClass(ExceptionCode::NORMALCASEVIOLATION), (int64_t)numExceptions));
+                lines.push_back(Row("(parallelize)", exceptionCodeToPythonClass(ExceptionCode::NORMALCASEVIOLATION), (int64_t)numExceptions));
                 totalECountsBeforeResolution += numExceptions;
             }
 
@@ -1044,7 +1284,7 @@ namespace tuplex {
                 executeSlowPath = true;
 
             // input exceptions or py objects?
-            if(!tstage->inputExceptions().empty())
+            if(!tstage->generalPartitions().empty() || !tstage->fallbackPartitions().empty())
                 executeSlowPath = true;
 
             if(executeSlowPath) {
@@ -1076,10 +1316,12 @@ namespace tuplex {
 
                 totalWallTime = 0.0;
                 size_t slowPathNumInputRows = 0;
+                size_t slowPathNumOutputRows = 0;
                 for(auto task : completedTasks) {
                     if(task->type() == TaskType::RESOLVE) {
                         totalWallTime += task->wallTime();
                         slowPathNumInputRows += task->getNumInputRows();
+                        slowPathNumOutputRows += task->getNumOutputRows();
                     }
                 }
                 double time_per_row_slow_path_ms = totalWallTime / slowPathNumInputRows * 1000.0;
@@ -1089,6 +1331,7 @@ namespace tuplex {
                 ss<<"slow path for Stage "<<tstage->number()<<": total wall clock time: "<<totalWallTime<<"s, "
                   <<"time to process 1 row via slow path: "<<time_per_row_slow_path_ms<<"ms";
                 logger().info(ss.str());
+                metrics.setSlowPathRowCount(tstage->number(), slowPathNumInputRows, slowPathNumOutputRows);
                 metrics.setSlowPathTimes(tstage->number(), totalWallTime, slow_path_total_time,
                                          time_per_row_slow_path_ms * 1000000.0);
             }
@@ -1113,64 +1356,76 @@ namespace tuplex {
         // sorting only make sense when order is needed
         sortTasks(completedTasks);
 
+        // fetch partitions & ecounts
+        vector<Partition*> normalPartitions;
+        vector<Partition*> generalPartitions;
+        vector<Partition*> fallbackPartitions;
+        vector<Partition*> exceptionPartitions;
+        vector<PartitionGroup> partitionGroups;
+        unordered_map<tuple<int64_t, ExceptionCode>, size_t> exceptionCounts;
+
+        for (const auto& task : completedTasks) {
+            auto taskNormalPartitions = getNormalPartitions(task);
+            auto taskGeneralPartitions = getGeneralPartitions(task);
+            auto taskFallbackPartitions = getFallbackPartitions(task);
+            auto taskExceptionPartitions = getExceptionPartitions(task);
+            auto taskExceptionCounts = getExceptionCounts(task);
+
+            // update exception counts
+            exceptionCounts = merge_ecounts(exceptionCounts, taskExceptionCounts);
+
+            // debug trace issues
+            using namespace std;
+            std::string task_name = "unknown";
+            if(task->type() == TaskType::UDFTRAFOTASK)
+                task_name = "udf trafo task";
+            if(task->type() == TaskType::RESOLVE)
+                task_name = "resolve";
+
+            auto pGroup = PartitionGroup(
+                    taskNormalPartitions.size(), normalPartitions.size(),
+                    taskGeneralPartitions.size(), generalPartitions.size(),
+                    taskFallbackPartitions.size(), fallbackPartitions.size());
+            pGroup.numExceptionPartitions = taskExceptionPartitions.size();
+            pGroup.exceptionPartitionStartInd = exceptionPartitions.size();
+            partitionGroups.push_back(pGroup);
+            std::copy(taskNormalPartitions.begin(), taskNormalPartitions.end(), std::back_inserter(normalPartitions));
+            std::copy(taskGeneralPartitions.begin(), taskGeneralPartitions.end(), std::back_inserter(generalPartitions));
+            std::copy(taskFallbackPartitions.begin(), taskFallbackPartitions.end(), std::back_inserter(fallbackPartitions));
+            std::copy(taskExceptionPartitions.begin(), taskExceptionPartitions.end(), std::back_inserter(exceptionPartitions));
+        }
+
         // set result according to endpoint mode
         switch(tstage->outputMode()) {
             case EndPointMode::FILE: {
                 // i.e. if output format is tuplex, then attach special writer!
                 // ==> could maybe codegen avro as output format, and then write to whatever??
-                writeOutput(tstage, completedTasks);
+                if (_options.OPT_INCREMENTAL_RESOLUTION()) {
+                    if (_options.OPT_MERGE_EXCEPTIONS_INORDER()) {
+                        tstage->setIncrementalResult(normalPartitions, exceptionPartitions, partitionGroups);
+                        if (stringToBool(tstage->outputOptions()["commit"])) {
+                            timer.reset();
+                            writeOutput(tstage, completedTasks);
+                            metrics.setWriteOutputTimes(tstage->number(), timer.time());
+                        } else {
+                            tstage->setFileResult(exceptionCounts);
+                        }
+                    } else {
+                        timer.reset();
+                        auto partNo = writeOutput(tstage, completedTasks);
+                        metrics.setWriteOutputTimes(tstage->number(), timer.time());
+                        tstage->setIncrementalResult(exceptionPartitions, generalPartitions, fallbackPartitions,
+                                                     partNo);
+                    }
+                } else {
+                    timer.reset();
+                    writeOutput(tstage, completedTasks);
+                    metrics.setWriteOutputTimes(tstage->number(), timer.time());
+                }
                 break;
             }
             case EndPointMode::MEMORY: {
-                // memory output, fetch partitions & ecounts
-                vector<Partition *> output;
-                vector<Partition *> generalOutput;
-                unordered_map<string, ExceptionInfo> partitionToExceptionsMap;
-                vector<Partition*> remainingExceptions;
-                vector<tuple<size_t, PyObject*>> nonConformingRows; // rows where the output type does not fit,
-                                                                     // need to manually merged.
-                unordered_map<tuple<int64_t, ExceptionCode>, size_t> ecounts;
-                size_t rowDelta = 0;
-                for (const auto& task : completedTasks) {
-                    auto taskOutput = getOutputPartitions(task);
-                    auto taskRemainingExceptions = getRemainingExceptions(task);
-                    auto taskGeneralOutput = generalCasePartitions(task);
-                    auto taskNonConformingRows = getNonConformingRows(task);
-                    auto taskExceptionCounts = getExceptionCounts(task);
-
-                    // update exception counts
-                    ecounts = merge_ecounts(ecounts, taskExceptionCounts);
-
-                    // update nonConforming with delta
-                    for(int i = 0; i < taskNonConformingRows.size(); ++i) {
-                        auto t = taskNonConformingRows[i];
-                        t = std::make_tuple(std::get<0>(t) + rowDelta, std::get<1>(t));
-                        taskNonConformingRows[i] = t;
-                    }
-
-                    // debug trace issues
-                    using namespace std;
-                    std::string task_name = "unknown";
-                    if(task->type() == TaskType::UDFTRAFOTASK)
-                        task_name = "udf trafo task";
-                    if(task->type() == TaskType::RESOLVE)
-                        task_name = "resolve";
-
-                    setExceptionInfo(taskOutput, taskGeneralOutput, partitionToExceptionsMap);
-                    std::copy(taskOutput.begin(), taskOutput.end(), std::back_inserter(output));
-                    std::copy(taskRemainingExceptions.begin(), taskRemainingExceptions.end(), std::back_inserter(remainingExceptions));
-                    std::copy(taskGeneralOutput.begin(), taskGeneralOutput.end(), std::back_inserter(generalOutput));
-                    std::copy(taskNonConformingRows.begin(), taskNonConformingRows.end(), std::back_inserter(nonConformingRows));
-
-                    // compute the delta used to offset records!
-                    for (const auto &p : taskOutput)
-                        rowDelta += p->getNumRows();
-                    for (const auto &p : taskGeneralOutput)
-                        rowDelta += p->getNumRows();
-                    rowDelta += taskNonConformingRows.size();
-                }
-
-                tstage->setMemoryResult(output, generalOutput, partitionToExceptionsMap, nonConformingRows, remainingExceptions, ecounts);
+                tstage->setMemoryResult(normalPartitions, generalPartitions, fallbackPartitions, partitionGroups, exceptionCounts);
                 break;
             }
             case EndPointMode::HASHTABLE: {
@@ -1247,6 +1502,21 @@ namespace tuplex {
             Logger::instance().defaultLogger().info(ss.str());
         }
 
+//        if(_driver)
+//            _driver->freeAllPartitionsOfContext(&context());
+//        for(auto exec : _executors)
+//            exec->freeAllPartitionsOfContext(&context());
+
+
+//        for (auto task : completedTasks)
+//            task->freePartitions();
+
+        if (!_options.OPT_INCREMENTAL_RESOLUTION()) {
+            for (auto &p : exceptionPartitions) {
+                p->invalidate();
+            }
+        }
+
         freeTasks(completedTasks);
 
         // update metrics
@@ -1262,6 +1532,140 @@ namespace tuplex {
         Logger::instance().defaultLogger().info(ss.str());
     }
 
+//    void LocalBackend::setPartitionMergeInfo(const std::vector<Partition*>& normalPartitions,
+//                               const std::vector<Partition*>& generalPartitions, const size_t generalStartInd,
+//                               const std::vector<Partition*>& fallbackPartitions, const size_t fallbackStartInd,
+//                               std::vector<MergeInfo>& partitionMergeInfo) {
+//
+//
+//
+//
+//        auto generalInd = 0;
+//        auto generalRowOff = 0;
+//        auto generalByteOff = 0;
+//        auto generalRowsInPartition = 0;
+//        const uint8_t *generalPtr = nullptr;
+//        if (!generalPartitions.empty()) {
+//            generalRowsInPartition = generalPartitions[0]->getNumRows();
+//            generalPtr = generalPartitions[0]->lock();
+//        }
+//
+//        auto fallbackInd = 0;
+//        auto fallbackRowOff = 0;
+//        auto fallbackByteOff = 0;
+//        auto fallbackRowsInPartition = 0;
+//        const uint8_t *fallbackPtr = nullptr;
+//        if (!fallbackPartitions.empty()) {
+//            fallbackRowsInPartition = fallbackPartitions[0]->getNumRows();
+//            fallbackPtr = fallbackPartitions[0]->lock();
+//        }
+//
+//        auto exceptionInd = 0;
+//        auto exceptionRowOff = 0;
+//        auto exceptionByteOff = 0;
+//        auto exceptionRowsInPartition = 0;
+//        const uint8_t *exceptionPtr = nullptr;
+//        if (!exceptionPartitions.empty()) {
+//            exceptionRowsInPartition = exceptionPartitions[0]->getNumRows();
+//            exceptionPtr = exceptionPartitions[0]->lock();
+//        }
+//
+//        auto totalRowCounter = 0;
+//        auto rowDelta = 0;
+//        for (const auto &p : normalPartitions) {
+//            auto mergeInfo = MergeInfo();
+//            mergeInfo.setRowDelta(rowDelta);
+//            auto numNormalRows = p->getNumRows();
+//
+//            auto generalRowCounter = 0;
+//            auto curGeneralStartInd = generalInd + generalStartInd;
+//            auto curGeneralRowOff = generalRowOff;
+//            auto curGeneralByteOff = generalByteOff;
+//            while (generalPtr && *((int64_t*)generalPtr) <= totalRowCounter + numNormalRows) {
+//                generalRowCounter++;
+//                totalRowCounter++;
+//
+//                auto dataSize = ((int64_t*)generalPtr)[3] + 4*sizeof(int64_t);
+//                generalByteOff += dataSize;
+//                generalPtr += dataSize;
+//                generalRowOff++;
+//
+//                if (generalRowOff == generalRowsInPartition) {
+//                    generalPartitions[generalInd]->unlock();
+//                    generalInd++;
+//                    if (generalInd < generalPartitions.size()) {
+//                        generalPtr = generalPartitions[generalInd]->lock();
+//                        generalRowsInPartition = generalPartitions[generalInd]->getNumRows();
+//                        generalRowOff = 0;
+//                        generalByteOff = 0;
+//                    } else {
+//                        generalPtr = nullptr;
+//                    }
+//                }
+//            }
+//            mergeInfo.setGeneralInfo(generalRowCounter, curGeneralStartInd, curGeneralRowOff, curGeneralByteOff);
+//
+//            auto fallbackRowCounter = 0;
+//            auto curFallbackStartInd = fallbackInd + fallbackStartInd;
+//            auto curFallbackRowOff = fallbackRowOff;
+//            auto curFallbackByteOff = fallbackByteOff;
+//            while (fallbackPtr && *((int64_t*)fallbackPtr) <= totalRowCounter + numNormalRows + generalRowCounter) {
+//                fallbackRowCounter++;
+//                totalRowCounter++;
+//
+//                auto dataSize = ((int64_t*)fallbackPtr)[1] + 2*sizeof(int64_t);
+//                fallbackByteOff += dataSize;
+//                fallbackPtr += dataSize;
+//                fallbackRowOff++;
+//
+//                if (fallbackRowOff == fallbackRowsInPartition) {
+//                    fallbackPartitions[fallbackInd]->unlock();
+//                    fallbackInd++;
+//                    if (fallbackInd < fallbackPartitions.size()) {
+//                        fallbackPtr = fallbackPartitions[fallbackInd]->lock();
+//                        fallbackRowsInPartition = fallbackPartitions[fallbackInd]->getNumRows();
+//                        fallbackRowOff = 0;
+//                        fallbackByteOff = 0;
+//                    } else {
+//                        fallbackPtr = nullptr;
+//                    }
+//                }
+//            }
+//            mergeInfo.setFallbackInfo(fallbackRowCounter, curFallbackStartInd, curFallbackRowOff, curFallbackByteOff);
+//
+//            auto exceptionRowCounter = 0;
+//            auto curExceptionStartInd = exceptionInd + exceptionStartInd;
+//            auto curExceptionRowOff = exceptionRowOff;
+//            auto curExceptionByteOff = exceptionByteOff;
+//            while (exceptionPtr && *((int64_t*)exceptionPtr) <= totalRowCounter + numNormalRows + generalRowCounter + fallbackRowCounter) {
+//                exceptionRowCounter++;
+//                totalRowCounter++;
+//
+//                auto dataSize = ((int64_t*)exceptionPtr)[3] + 4*sizeof(int64_t);
+//                exceptionByteOff += dataSize;
+//                exceptionPtr += dataSize;
+//                exceptionRowOff++;
+//
+//                if (exceptionRowOff == exceptionRowsInPartition) {
+//                    exceptionPartitions[exceptionInd]->unlock();
+//                    exceptionInd++;
+//                    if (exceptionInd < exceptionPartitions.size()) {
+//                        exceptionPtr = exceptionPartitions[exceptionInd]->lock();
+//                        exceptionRowsInPartition = exceptionPartitions[exceptionInd]->getNumRows();
+//                        exceptionRowOff = 0;
+//                        exceptionByteOff = 0;
+//                    } else {
+//                        exceptionPtr = nullptr;
+//                    }
+//                }
+//            }
+//            mergeInfo.setExceptionInfo(exceptionRowCounter, curExceptionStartInd, curExceptionRowOff, curExceptionByteOff);
+//
+//            rowDelta += numNormalRows + generalRowCounter + fallbackRowCounter + exceptionRowCounter;
+//            partitionMergeInfo.push_back(mergeInfo);
+//        }
+//    }
+
     std::vector<IExecutorTask*> LocalBackend::resolveViaSlowPath(
             std::vector<IExecutorTask*> &tasks,
             bool merge_rows_in_order,
@@ -1391,7 +1795,7 @@ namespace tuplex {
             else if(compareOrders(maxOrder, tt->getOrder()))
                 maxOrder = tt->getOrder();
 
-            if (tt->exceptionCounts().size() > 0 || tt->inputExceptionInfo().numExceptions > 0) {
+            if (tt->exceptionCounts().size() > 0 || !tt->generalPartitions().empty() || !tt->fallbackPartitions().empty()) {
                 // task found with exceptions in it => exception partitions need to be resolved using special functor
 
                 // hash-table output not yet supported
@@ -1407,8 +1811,8 @@ namespace tuplex {
                                              tstage->context().id(),
                                              tt->getOutputPartitions(),
                                              tt->getExceptionPartitions(),
-                                             tt->inputExceptions(),
-                                             tt->inputExceptionInfo(),
+                                             tt->generalPartitions(),
+                                             tt->fallbackPartitions(),
                                              opsToCheck,
                                              exceptionInputSchema,
                                              compiledSlowPathOutputSchema,
@@ -1498,11 +1902,6 @@ namespace tuplex {
         // cout<<"*** git "<<resolvedTasks.size()<<" resolve tasks ***"<<endl;
         std::copy(resolvedTasks.cbegin(), resolvedTasks.cend(), std::back_inserter(tasks_result));
 
-        // Invalidate partitions after all resolve tasks execute because shared among tasks
-        for (auto& p : tstage->inputExceptions()) {
-            p->invalidate();
-        }
-
         // cout<<"*** total number of tasks to return is "<<tasks_result.size()<<endl;
         return tasks_result;
     }
@@ -1670,8 +2069,8 @@ namespace tuplex {
 
         // first a dummy implementation:
         // basically hash the complete row (can be done faster later) into a hashmap and then write back the result...
-        while(rs->hasNextPartition()) {
-            Partition* p = rs->getNextPartition();
+        while(rs->hasNextNormalPartition()) {
+            Partition* p = rs->getNextNormalPartition();
 
             // lock partition!
             auto ptr = p->lockRaw();
@@ -1946,7 +2345,7 @@ namespace tuplex {
         }
     }
 
-    void LocalBackend::writeOutput(TransformStage *tstage, std::vector<IExecutorTask*> &tasks) {
+    size_t LocalBackend::writeOutput(TransformStage *tstage, std::vector<IExecutorTask*> &tasks, size_t startFileNumber) {
         using namespace std;
 
         Timer timer;
@@ -1973,8 +2372,13 @@ namespace tuplex {
 
         auto ecounts = calcExceptionCounts(tasks);
 
+        if (outputs.empty()) {
+            tstage->setFileResult(ecounts);
+            return startFileNumber;
+        }
+
         // write to one file
-        int partNo = 0;
+        int partNo = startFileNumber;
         auto outputFilePath = outputURI(udf, uri, partNo, fmt);
 
         // check that outputFilePath is NOT empty.
@@ -2071,7 +2475,6 @@ namespace tuplex {
         // run using queue!
         // execute tasks using work queue.
         auto completedTasks = performTasks(wtasks);
-
         if(header) {
             delete [] header;
             header = nullptr;
@@ -2079,5 +2482,7 @@ namespace tuplex {
 
         Logger::instance().defaultLogger().info("writing output took " + std::to_string(timer.time()) + "s");
         tstage->setFileResult(ecounts);
+
+        return partNo;
     }
 } // namespace tuplex
\ No newline at end of file
diff --git a/tuplex/core/src/logical/CacheOperator.cc b/tuplex/core/src/logical/CacheOperator.cc
index f71522f21..4a571599c 100644
--- a/tuplex/core/src/logical/CacheOperator.cc
+++ b/tuplex/core/src/logical/CacheOperator.cc
@@ -22,19 +22,16 @@ namespace tuplex {
         LogicalOperator::copyMembers(other);
         auto cop = (CacheOperator*)other;
         setSchema(other->getOutputSchema());
-        _normalCasePartitions = cop->cachedPartitions();
-        _generalCasePartitions = cop->cachedExceptions();
-        _partitionToExceptionsMap = cop->partitionToExceptionsMap();
-        // copy python objects and incref for each!
-        _py_objects = cop->_py_objects;
-        python::lockGIL();
-        for(auto obj : _py_objects)
-            Py_XINCREF(obj);
-        python::unlockGIL();
+        _normalPartitions = cop->cachedNormalPartitions();
+        _generalPartitions = cop->cachedGeneralPartitions();
+        _fallbackPartitions = cop->cachedFallbackPartitions();
+        _partitionGroups = cop->partitionGroups();
+
         _optimizedSchema = cop->_optimizedSchema;
         _cached = cop->_cached;
-        _normalCaseRowCount = cop->_normalCaseRowCount;
-        _generalCaseRowCount = cop->_generalCaseRowCount;
+        _normalRowCount = cop->_normalRowCount;
+        _generalRowCount = cop->_generalRowCount;
+        _fallbackRowCount = cop->_fallbackRowCount;
         _columns = cop->_columns;
         _sample = cop->_sample;
         _storeSpecialized = cop->_storeSpecialized;
@@ -60,7 +57,7 @@ namespace tuplex {
         // is operator cached? => return combined cost!
         // @NOTE: could make exceptions more expensive than normal rows
         if(isCached()) {
-            return _generalCaseRowCount + _normalCaseRowCount;
+            return _generalRowCount + _fallbackRowCount + _normalRowCount;
         } else {
             // return parent cost
             return parent()->cost();
@@ -73,30 +70,29 @@ namespace tuplex {
         _cached = true;
 
         // fetch both partitions (consume) from resultset + any unresolved exceptions
-        _normalCasePartitions = rs->partitions();
-        for(auto p : _normalCasePartitions)
+        _normalPartitions = rs->normalPartitions();
+        for(auto p : _normalPartitions)
             p->makeImmortal();
 
-        // @TODO: there are two sorts of exceptions here...
-        // i.e. separate normal-case violations out from the rest
-        // => these can be stored separately for faster processing!
-        // @TODO: right now, everything just gets cached...
+        _generalPartitions = rs->generalPartitions();
+        for(auto p : _generalPartitions)
+            p->makeImmortal();
 
-        _generalCasePartitions = rs->exceptions();
-        for(auto p : _generalCasePartitions)
+        _fallbackPartitions = rs->fallbackPartitions();
+        for(auto p : _fallbackPartitions)
             p->makeImmortal();
 
-        _partitionToExceptionsMap = rs->partitionToExceptionsMap();
+        _partitionGroups = rs->partitionGroups();
 
         // check whether partitions have different schema than the currently set one
         // => i.e. they have been specialized.
-        if(!_normalCasePartitions.empty()) {
-            _optimizedSchema = _normalCasePartitions.front()->schema();
+        if(!_normalPartitions.empty()) {
+            _optimizedSchema = _normalPartitions.front()->schema();
             assert(_optimizedSchema != Schema::UNKNOWN);
         }
 
         // if exceptions are empty, then force output schema to be the optimized one as well!
-        if(_generalCasePartitions.empty())
+        if(_generalPartitions.empty())
             setSchema(_optimizedSchema);
 
         // because the schema might have changed due to the result, need to update the dataset!
@@ -104,36 +100,46 @@ namespace tuplex {
             getDataSet()->setSchema(getOutputSchema());
 
         // print out some statistics about cached data
-        size_t cachedPartitionsMemory = 0;
-        size_t totalCachedPartitionsMemory = 0;
-        size_t totalCachedRows = 0;
-        size_t cachedExceptionsMemory = 0;
-        size_t totalCachedExceptionsMemory = 0;
-        size_t totalCachedExceptions = 0;
-
-        int pos = 0;
-        for(auto p : _normalCasePartitions) {
-            totalCachedRows += p->getNumRows();
-            cachedPartitionsMemory += p->bytesWritten();
-            totalCachedPartitionsMemory += p->size();
-            pos++;
+        size_t normalBytesWritten = 0;
+        size_t normalCapacity = 0;
+        size_t normalRows = 0;
+        size_t generalBytesWritten = 0;
+        size_t generalCapacity = 0;
+        size_t generalRows = 0;
+        size_t fallbackBytesWritten = 0;
+        size_t fallbackCapacity = 0;
+        size_t fallbackRows = 0;
+
+        for(const auto &p : _normalPartitions) {
+            normalRows += p->getNumRows();
+            normalBytesWritten += p->bytesWritten();
+            normalCapacity += p->size();
         }
-        for(auto p : _generalCasePartitions) {
-            totalCachedExceptions += p->getNumRows();
-            cachedExceptionsMemory += p->bytesWritten();
-            totalCachedExceptionsMemory += p->size();
+        for(const auto &p : _generalPartitions) {
+            generalRows += p->getNumRows();
+            generalBytesWritten += p->bytesWritten();
+            generalCapacity += p->size();
         }
+        for(const auto &p : _fallbackPartitions) {
+            fallbackRows += p->getNumRows();
+            fallbackBytesWritten += p->bytesWritten();
+            fallbackCapacity += p->size();
+        }
+
 
-        _normalCaseRowCount = totalCachedRows;
-        _generalCaseRowCount = totalCachedExceptions;
+        _normalRowCount = normalRows;
+        _generalRowCount = generalRows;
+        _fallbackRowCount = fallbackRows;
 
         stringstream ss;
-        ss<<"Cached "<<pluralize(totalCachedRows, "common row")
-          <<" ("<<pluralize(totalCachedExceptions, "general row")
-          <<"), memory usage: "<<sizeToMemString(cachedPartitionsMemory)
-          <<"/"<<sizeToMemString(totalCachedPartitionsMemory)<<" ("
-          <<sizeToMemString(cachedExceptionsMemory)
-          <<"/"<<sizeToMemString(totalCachedExceptionsMemory)<<")";
+        ss<<"Cached "<<pluralize(normalRows, "common row")
+          <<" ("<<pluralize(generalRows, "general row") << ")"
+          <<" ("<<pluralize(fallbackRows, "fallback row")
+          <<"), memory usage: "<<sizeToMemString(normalBytesWritten)
+          <<"/"<<sizeToMemString(normalCapacity)<<" ("
+          <<sizeToMemString(generalBytesWritten)
+          <<"/"<<sizeToMemString(generalCapacity)<<")"
+          <<" ("<<sizeToMemString(normalBytesWritten)<<"/"<<sizeToMemString(normalCapacity)<<")";
         Logger::instance().defaultLogger().info(ss.str());
 
 #ifndef NDEBUG
@@ -145,10 +151,13 @@ namespace tuplex {
 
     size_t CacheOperator::getTotalCachedRows() const {
         size_t totalCachedRows = 0;
-        for(auto p : _normalCasePartitions) {
+        for(const auto &p : _normalPartitions) {
+            totalCachedRows += p->getNumRows();
+        }
+        for(const auto &p : _generalPartitions) {
             totalCachedRows += p->getNumRows();
         }
-        for(auto p : _generalCasePartitions) {
+        for (const auto &p : _fallbackPartitions) {
             totalCachedRows += p->getNumRows();
         }
         return totalCachedRows;
diff --git a/tuplex/core/src/logical/LogicalPlan.cc b/tuplex/core/src/logical/LogicalPlan.cc
index f9322a203..25449dc28 100644
--- a/tuplex/core/src/logical/LogicalPlan.cc
+++ b/tuplex/core/src/logical/LogicalPlan.cc
@@ -58,6 +58,8 @@ namespace tuplex {
         // optimize first if desired (context options object)
         // ==> optimize creates a copy if required
 
+        incrementalResolution(context);
+
         auto optimized_plan = optimize(context, !copy_required); // overwrite
 
         double logical_optimization_time = timer.time();
@@ -68,6 +70,40 @@ namespace tuplex {
         return new PhysicalPlan(optimized_plan, this, context);
     }
 
+    void updateIDs(LogicalOperator *previous, LogicalOperator *current) {
+        std::queue<LogicalOperator*> currentQ;
+        std::queue<LogicalOperator*> previousQ;
+        currentQ.push(current);
+        previousQ.push(previous);
+        bool updated = false;
+        while(!currentQ.empty() && !previousQ.empty()) {
+            auto curNode = currentQ.front(); currentQ.pop();
+            auto prevNode = previousQ.front(); previousQ.pop();
+
+            if (!updated && (curNode->type() == LogicalOperatorType::RESOLVE || curNode->type() == LogicalOperatorType::IGNORE)) {
+                curNode = curNode->parent();
+                updated = true;
+            }
+
+            curNode->setID(prevNode->getID());
+            for (auto parent : curNode->parents()) {
+                currentQ.push(parent);
+            }
+            for (auto parent : prevNode->parents()) {
+                previousQ.push(parent);
+            }
+        }
+    }
+
+    void LogicalPlan::incrementalResolution(const Context& context) {
+        // If cache entry exists, need to copy over operator Ids from previous pipeline to current pipeline
+        // This is because the exceptions are already encoded with the previous pipeline's operator IDs.
+        auto cache = context.getIncrementalCache();
+        auto cacheEntry = cache->getEntry(IncrementalCache::newKey(_action));
+        if (cacheEntry && context.getOptions().OPT_INCREMENTAL_RESOLUTION()) {
+            updateIDs(cacheEntry->pipeline(), _action);
+        }
+    }
 
     void rewriteAllFollowingResolvers(LogicalOperator* op, const std::unordered_map<size_t, size_t>& rewriteMap) {
         // go over children (single!)
diff --git a/tuplex/core/src/logical/ParallelizeOperator.cc b/tuplex/core/src/logical/ParallelizeOperator.cc
index 770ac2d4f..3ea6916e6 100644
--- a/tuplex/core/src/logical/ParallelizeOperator.cc
+++ b/tuplex/core/src/logical/ParallelizeOperator.cc
@@ -12,15 +12,15 @@
 
 namespace tuplex {
     ParallelizeOperator::ParallelizeOperator(const Schema& schema,
-            const std::vector<Partition*>& partitions,
-            const std::vector<std::string>& columns) :  _partitions(partitions),
-            _columnNames(columns) {
+            const std::vector<Partition*>& normalPartitions,
+            const std::vector<std::string>& columns) : _normalPartitions(normalPartitions),
+                                                       _columnNames(columns) {
 
         setSchema(schema);
 
         // parallelize operator holds data in memory for infinite lifetime.
         // => make partitions immortal
-        for(auto& partition : _partitions)
+        for(auto& partition : _normalPartitions)
             partition->makeImmortal();
 
         // get sample
@@ -31,15 +31,15 @@ namespace tuplex {
         _sample.clear();
 
         // todo: general python objects from parallelize...
-        if(!_partitions.empty()) {
+        if(!_normalPartitions.empty()) {
            auto maxRows = getDataSet() ? getDataSet()->getContext()->getOptions().CSV_MAX_DETECTION_ROWS() : MAX_TYPE_SAMPLING_ROWS; // @TODO: change this variable/config name
 
            // fetch up to maxRows from partitions!
-           auto schema = _partitions.front()->schema();
+           auto schema = _normalPartitions.front()->schema();
            Deserializer ds(schema);
            size_t rowCount = 0;
            size_t numBytesRead = 0;
-           for(auto p : _partitions) {
+           for(auto p : _normalPartitions) {
                const uint8_t* ptr = p->lockRaw();
                auto partitionRowCount = *(int64_t*)ptr;
                ptr += sizeof(int64_t);
@@ -59,8 +59,8 @@ namespace tuplex {
         }
     }
 
-    std::vector<tuplex::Partition*> ParallelizeOperator::getPartitions() {
-        return _partitions;
+    std::vector<tuplex::Partition*> ParallelizeOperator::getNormalPartitions() {
+        return _normalPartitions;
     }
 
     bool ParallelizeOperator::good() const {
@@ -69,7 +69,7 @@ namespace tuplex {
 
     std::vector<Row> ParallelizeOperator::getSample(const size_t num) const {
         // samples exist?
-        if(_partitions.empty() || 0 == num) {
+        if(_normalPartitions.empty() || 0 == num) {
             return std::vector<Row>();
         }
 
@@ -109,11 +109,11 @@ namespace tuplex {
     }
 
     LogicalOperator *ParallelizeOperator::clone() {
-        auto copy = new ParallelizeOperator(getOutputSchema(), _partitions, columns());
+        auto copy = new ParallelizeOperator(getOutputSchema(), _normalPartitions, columns());
         copy->setDataSet(getDataSet());
         copy->copyMembers(this);
-        copy->setPythonObjects(_pythonObjects);
-        copy->setInputPartitionToPythonObjectsMap(_inputPartitionToPythonObjectsMap);
+        copy->setFallbackPartitions(_fallbackPartitions);
+        copy->setPartitionGroups(_partitionGroups);
         assert(getID() == copy->getID());
         return copy;
     }
@@ -121,7 +121,9 @@ namespace tuplex {
     int64_t ParallelizeOperator::cost() const {
         // use #rows stored in partitions
         int64_t numRows = 0;
-        for(auto p : _partitions)
+        for(const auto& p : _normalPartitions)
+            numRows += p->getNumRows();
+        for(const auto& p : _fallbackPartitions)
             numRows += p->getNumRows();
         return numRows;
     }
diff --git a/tuplex/core/src/physical/BlockBasedTaskBuilder.cc b/tuplex/core/src/physical/BlockBasedTaskBuilder.cc
index 80e21c0a1..111e97d8f 100644
--- a/tuplex/core/src/physical/BlockBasedTaskBuilder.cc
+++ b/tuplex/core/src/physical/BlockBasedTaskBuilder.cc
@@ -58,12 +58,23 @@ namespace tuplex {
             FunctionType* read_block_type = FunctionType::get(env().i64Type(), {env().i8ptrType(),
                                                                                 env().i8ptrType(),
                                                                                 env().i64Type(),
+                                                                                env().i64Type()->getPointerTo(0),
+                                                                                env().i64Type()->getPointerTo(0),
+                                                                                env().getBooleanType(),
+                                                                                env().i64Type()->getPointerTo(0),
+                                                                                env().i64Type()->getPointerTo(0),
+                                                                                env().i64Type()->getPointerTo(0),
+                                                                                env().i64Type()->getPointerTo(0),
                                                                                 env().i8ptrType()->getPointerTo(0),
+                                                                                env().i64Type(),
                                                                                 env().i64Type()->getPointerTo(0),
+                                                                                env().i64Type()->getPointerTo(0),
+                                                                                env().i64Type()->getPointerTo(0),
+                                                                                env().i8ptrType()->getPointerTo(0),
                                                                                 env().i64Type(),
                                                                                 env().i64Type()->getPointerTo(0),
                                                                                 env().i64Type()->getPointerTo(0),
-                                                                                env().getBooleanType()}, false);
+                                                                                env().i64Type()->getPointerTo(0)}, false);
             // create function and set argNames
             Function* read_block_func = Function::Create(read_block_type, Function::ExternalLinkage, _desiredFuncName, env().getModule().get());
 
@@ -76,12 +87,24 @@ namespace tuplex {
             vector<string> argNames{"userData",
                                     "inPtr",
                                     "inSize",
-                                    "expPtrs",
-                                    "expPtrSizes",
-                                    "numExps",
                                     "outNormalRowCount",
                                     "outBadRowCount",
-                                    "ignoreLastRow"};
+                                    "ignoreLastRow",
+                                    "totalFilterCounter",
+                                    "totalNormalRowCounter",
+                                    "totalGeneralRowCounter",
+                                    "totalFallbackRowCounter",
+                                    "generalPartitions",
+                                    "numGeneralPartitions",
+                                    "generalIndexOffset",
+                                    "generalRowOffset",
+                                    "generalByteOffset",
+                                    "fallbackPartitions",
+                                    "numFallbackPartitions",
+                                    "fallbackIndexOffset",
+                                    "fallbackRowOffset",
+                                    "fallbackByteOffset"};
+
             for(int i = 0; i < argNames.size(); ++i) {
                 args[i]->setName(argNames[i]);
                 _args[argNames[i]] = args[i];
diff --git a/tuplex/core/src/physical/ExceptionSourceTaskBuilder.cc b/tuplex/core/src/physical/ExceptionSourceTaskBuilder.cc
index b3bd3847f..dd37a1c07 100644
--- a/tuplex/core/src/physical/ExceptionSourceTaskBuilder.cc
+++ b/tuplex/core/src/physical/ExceptionSourceTaskBuilder.cc
@@ -106,89 +106,115 @@ namespace tuplex {
 
             assert(read_block_func);
 
+            // Initialize context
             auto& context = env().getContext();
 
+            // Load function arguments
             auto argUserData = arg("userData");
             auto argInPtr = arg("inPtr");
             auto argInSize = arg("inSize");
-            auto argExpPtrs = arg("expPtrs");
-            auto argExpPtrSizes = arg("expPtrSizes");
-            auto argNumExps = arg("numExps");
             auto argOutNormalRowCount = arg("outNormalRowCount");
             auto argOutBadRowCount = arg("outBadRowCount");
             auto argIgnoreLastRow = arg("ignoreLastRow");
-
+            auto totalFilterCounter = arg("totalFilterCounter");
+            auto totalNormalRowCounter = arg("totalNormalRowCounter");
+            auto totalGeneralRowCounter = arg("totalGeneralRowCounter");
+            auto totalFallbackRowCounter = arg("totalFallbackRowCounter");
+            auto generalPartitions = arg("generalPartitions");
+            auto numGeneralPartitions = arg("numGeneralPartitions");
+            auto generalIndexOffset = arg("generalIndexOffset");
+            auto generalRowOffset = arg("generalRowOffset");
+            auto generalByteOffset = arg("generalByteOffset");
+            auto fallbackPartitions = arg("fallbackPartitions");
+            auto numFallbackPartitions = arg("numFallbackPartitions");
+            auto fallbackIndexOffset = arg("fallbackIndexOffset");
+            auto fallbackRowOffset = arg("fallbackRowOffset");
+            auto fallbackByteOffset = arg("fallbackByteOffset");
+
+            // Initialize function body
             BasicBlock *bbBody = BasicBlock::Create(context, "entry", read_block_func);
 
             IRBuilder<> builder(bbBody);
 
-
-            // there should be a check if argInSize is 0
-            // if so -> handle separately, i.e. return immediately
-#warning "add here argInSize > 0 check"
-
-
-            // compute endptr from args
-            Value *endPtr = builder.CreateGEP(argInPtr, argInSize, "endPtr");
-            Value *currentPtrVar = builder.CreateAlloca(env().i8ptrType(), 0, nullptr, "readPtrVar");
-            // later use combi of normal & bad rows
-            Value *outRowCountVar = builder.CreateAlloca(env().i64Type(), 0, nullptr, "outRowCountVar"); // counter for output row number (used for exception resolution)
+            // Define basic blocks for function
+            auto bbInitializeGeneral = llvm::BasicBlock::Create(context, "initialize_general", builder.GetInsertBlock()->getParent());
+            auto bbDeclareFallback = llvm::BasicBlock::Create(context, "declare_fallback", builder.GetInsertBlock()->getParent());
+            auto bbInitializeFallback = llvm::BasicBlock::Create(context, "initialize_fallback", builder.GetInsertBlock()->getParent());
+            auto bbUpdateGeneralCond = llvm::BasicBlock::Create(context, "update_general_cond", builder.GetInsertBlock()->getParent());
+            auto bbUpdateGeneralBody = llvm::BasicBlock::Create(context, "update_general_body", builder.GetInsertBlock()->getParent());
+            auto bbNextGeneralPartition = llvm::BasicBlock::Create(context, "next_general_partition", builder.GetInsertBlock()->getParent());
+            auto bbUpdateFallbackCond = llvm::BasicBlock::Create(context, "update_fallback_cond", builder.GetInsertBlock()->getParent());
+            auto bbUpdateFallbackBody = llvm::BasicBlock::Create(context, "update_fallback_body", builder.GetInsertBlock()->getParent());
+            auto bbNextFallbackPartition = llvm::BasicBlock::Create(context, "next_fallback_partition", builder.GetInsertBlock()->getParent());
+            auto bbUpdateDone = llvm::BasicBlock::Create(context, "update_done", builder.GetInsertBlock()->getParent());
+            auto bbLoopCondition = BasicBlock::Create(context, "loop_cond", read_block_func);
+            auto bbLoopBody = BasicBlock::Create(context, "loop_body", read_block_func);
+            auto bbLoopDone = BasicBlock::Create(context, "loop_done", read_block_func);
+
+            // Initialize values for normal partitions
+            auto endPtr = builder.CreateGEP(argInPtr, argInSize, "endPtr");
+            auto currentPtrVar = builder.CreateAlloca(env().i8ptrType(), 0, nullptr, "readPtrVar");
+            auto outRowCountVar = builder.CreateAlloca(env().i64Type(), 0, nullptr, "outRowCountVar"); // counter for output row number (used for exception resolution)
             builder.CreateStore(argInPtr, currentPtrVar);
-
-            Value *normalRowCountVar = argOutNormalRowCount;
-            Value *badRowCountVar = argOutBadRowCount;
-            builder.CreateStore(builder.CreateAdd(builder.CreateLoad(argOutBadRowCount),
-                                                  builder.CreateLoad(argOutNormalRowCount)), outRowCountVar);
-
-            // current index into array of exception partitions
-            auto curExpIndVar = builder.CreateAlloca(env().i64Type(), 0, nullptr, "curExpIndVar");
-            builder.CreateStore(env().i64Const(0), curExpIndVar);
-
-            // current partition pointer
-            auto curExpPtrVar = builder.CreateAlloca(env().i8ptrType(), 0, nullptr, "curExpPtrVar");
-            builder.CreateStore(builder.CreateLoad(argExpPtrs), curExpPtrVar);
-
-            // number of rows total in current partition
-            auto curExpNumRowsVar = builder.CreateAlloca(env().i64Type(), 0, nullptr, "curExpNumRowsVar");
-            builder.CreateStore(builder.CreateLoad(argExpPtrSizes), curExpNumRowsVar);
-
-            // current row number in current partition
-            auto curExpCurRowVar = builder.CreateAlloca(env().i64Type(), 0, nullptr, "curExpCurRowVar");
-            builder.CreateStore(env().i64Const(0), curExpCurRowVar);
-
-            // accumulator used to update exception indices when rows are filtered, counts number of previously fitlered rows
-            auto expAccVar = builder.CreateAlloca(env().i64Type(), 0, nullptr, "expAccVar");
-            builder.CreateStore(env().i64Const(0), expAccVar);
-
-            // used to see if rows are filtered during pipeline execution
-            auto prevRowNumVar = builder.CreateAlloca(env().i64Type(), 0, nullptr, "prevRowNumVar");
-            auto prevBadRowNumVar = builder.CreateAlloca(env().i64Type(), 0, nullptr, "prevBadRowNumVar");
-
-            // current number of exceptions prosessed across all partitions
-            auto expCurRowVar = builder.CreateAlloca(env().i64Type(), 0, nullptr, "expCurRowVar");
-            builder.CreateStore(env().i64Const(0), expCurRowVar);
-
+            // Update the arguments at the end
+            auto normalRowCountVar = argOutNormalRowCount;
+            auto badRowCountVar = argOutBadRowCount;
+            builder.CreateStore(builder.CreateAdd(builder.CreateLoad(argOutBadRowCount),builder.CreateLoad(argOutNormalRowCount)), outRowCountVar);
             // get num rows to read & process in loop
-            Value *numRowsVar = builder.CreateAlloca(env().i64Type(), 0, nullptr, "numRowsVar");
-            Value *input_ptr = builder.CreatePointerCast(argInPtr, env().i64Type()->getPointerTo(0));
+            auto numRowsVar = builder.CreateAlloca(env().i64Type(), 0, nullptr, "numRowsVar");
+            auto input_ptr = builder.CreatePointerCast(argInPtr, env().i64Type()->getPointerTo(0));
             builder.CreateStore(builder.CreateLoad(input_ptr), numRowsVar);
             // store current input ptr
-            Value *currentInputPtrVar = builder.CreateAlloca(env().i8ptrType(), 0, nullptr, "ptr");
+            auto currentInputPtrVar = builder.CreateAlloca(env().i8ptrType(), 0, nullptr, "ptr");
             builder.CreateStore(builder.CreateGEP(argInPtr, env().i32Const(sizeof(int64_t))), currentInputPtrVar);
-
-
             // variable for current row number...
-            Value *rowVar = builder.CreateAlloca(env().i64Type(), 0, nullptr);
+            auto rowVar = builder.CreateAlloca(env().i64Type(), 0, nullptr);
             builder.CreateStore(env().i64Const(0), rowVar);
 
-            BasicBlock* bbLoopCondition = BasicBlock::Create(context, "loop_cond", read_block_func);
-            BasicBlock* bbLoopBody = BasicBlock::Create(context, "loop_body", read_block_func);
-            BasicBlock* bbLoopDone = BasicBlock::Create(context, "loop_done", read_block_func);
+            // used to see if rows are filtered during pipeline execution
+            auto prevRowNumVar = builder.CreateAlloca(env().i64Type(), 0, nullptr, "prevRowNumVar");
+            auto prevBadRowNumVar = builder.CreateAlloca(env().i64Type(), 0, nullptr, "prevBadRowNumVar");
 
-            // go from entry block to loop body
+            // Initialize values for index updating
+            // uint8_t *curGeneralPtr;
+            // int64_t curGeneralNumRows = 0;
+            // if (*generalIndexOffset < numGeneralPartitions) {
+            //     curGeneralPtr = generalPartitions[*generalIndexOffset];
+            //     curGeneralNumRows = *curGeneralPtr;
+            //     curGeneralPtr += sizeof(int64_t) + *generalByteOffset;
+            // }
+            auto curGeneralPtr = builder.CreateAlloca(env().i8ptrType(), 0, nullptr, "curGeneralPtr");
+            auto curGeneralNumRows = builder.CreateAlloca(env().i64Type(), 0, nullptr, "curGeneralNumRows");
+            builder.CreateStore(env().i64Const(0), curGeneralNumRows);
+            auto shouldInitializeGeneral = builder.CreateICmpSLT(builder.CreateLoad(generalIndexOffset), numGeneralPartitions);
+            builder.CreateCondBr(shouldInitializeGeneral, bbInitializeGeneral, bbDeclareFallback);
+
+            builder.SetInsertPoint(bbInitializeGeneral);
+            builder.CreateStore(builder.CreateLoad(builder.CreateGEP(generalPartitions, builder.CreateLoad(generalIndexOffset))), curGeneralPtr);
+            builder.CreateStore(builder.CreateLoad(builder.CreatePointerCast(builder.CreateLoad(curGeneralPtr), env().i64ptrType())), curGeneralNumRows);
+            builder.CreateStore(builder.CreateGEP(builder.CreateLoad(curGeneralPtr), builder.CreateAdd(env().i64Const(sizeof(int64_t)), builder.CreateLoad(generalByteOffset))), curGeneralPtr);
+            builder.CreateBr(bbDeclareFallback);
+
+            // uint8_t *curFallbackPtr;
+            // int64_t curFallbackNumRows = 0;
+            // if (*fallbackIndexOffset < numFallbackPartitions) {
+            //     curFallbackPtr = fallbackPartitions[*fallbackIndexOffset];
+            //     curFallbackNumRows = *curFallbackPtr;
+            //     curFallbackPtr += sizeof(int64_t) + *fallbackByteOffset;
+            // }
+            builder.SetInsertPoint(bbDeclareFallback);
+            auto curFallbackPtr = builder.CreateAlloca(env().i8ptrType(), 0, nullptr, "curFallbackPtr");
+            auto curFallbackNumRows = builder.CreateAlloca(env().i64Type(), 0, nullptr, "curFallbackNumRows");
+            builder.CreateStore(env().i64Const(0), curFallbackNumRows);
+            auto shouldInitializeFallback = builder.CreateICmpSLT(builder.CreateLoad(fallbackIndexOffset), numFallbackPartitions);
+            builder.CreateCondBr(shouldInitializeFallback, bbInitializeFallback, bbLoopBody);
+
+            builder.SetInsertPoint(bbInitializeFallback);
+            builder.CreateStore(builder.CreateLoad(builder.CreateGEP(fallbackPartitions, builder.CreateLoad(fallbackIndexOffset))), curFallbackPtr);
+            builder.CreateStore(builder.CreateLoad(builder.CreatePointerCast(builder.CreateLoad(curFallbackPtr), env().i64ptrType())), curFallbackNumRows);
+            builder.CreateStore(builder.CreateGEP(builder.CreateLoad(curFallbackPtr), builder.CreateAdd(env().i64Const(sizeof(int64_t)), builder.CreateLoad(fallbackByteOffset))), curFallbackPtr);
             builder.CreateBr(bbLoopBody);
 
-            // --------------
             // loop condition
             builder.SetInsertPoint(bbLoopCondition);
             Value *row = builder.CreateLoad(rowVar, "row");
@@ -198,8 +224,6 @@ namespace tuplex {
             auto cond = builder.CreateICmpSLT(nextRow, numRows);
             builder.CreateCondBr(cond, bbLoopBody, bbLoopDone);
 
-
-            // ---------
             // loop body
             builder.SetInsertPoint(bbLoopBody);
             // decode tuple from input ptr
@@ -207,9 +231,8 @@ namespace tuplex {
             ft.init(_inputRowType);
             Value* oldInputPtr = builder.CreateLoad(currentInputPtrVar, "ptr");
             ft.deserializationCode(builder, oldInputPtr);
-            Value* newInputPtr = builder.CreateGEP(oldInputPtr, ft.getSize(builder)); // @TODO: maybe use inbounds
+            Value* newInputPtr = builder.CreateGEP(oldInputPtr, ft.getSize(builder));
             builder.CreateStore(newInputPtr, currentInputPtrVar);
-
             builder.CreateStore(builder.CreateLoad(outRowCountVar), prevRowNumVar);
             builder.CreateStore(builder.CreateLoad(badRowCountVar), prevBadRowNumVar);
 
@@ -218,117 +241,113 @@ namespace tuplex {
             Value *inputRowSize = ft.getSize(builder);
             processRow(builder, argUserData, ft, normalRowCountVar, badRowCountVar, outRowCountVar, oldInputPtr, inputRowSize, terminateEarlyOnLimitCode, pipeline() ? pipeline()->getFunction() : nullptr);
 
+            builder.CreateStore(builder.CreateAdd(env().i64Const(1), builder.CreateLoad(totalNormalRowCounter)), totalNormalRowCounter);
+
             // After row is processed we need to update exceptions if the row was filtered
             // We check that: outRowCountVar == prevRowCountVar (no new row was emitted)
             //                badRowCountVar == prevBadRowNumVar (it was filtered, not just an exception)
-            //                expCurRowVar < argNumExps (we still have exceptions that need updating)
-            // if (outRowCountVar == prevRowNumVar && badRowCountVar == prevBadRowNumVar && expCurRowVar < argNumExps)
-            auto bbExpUpdate = llvm::BasicBlock::Create(context, "exp_update", builder.GetInsertBlock()->getParent());
-            auto expCond = builder.CreateICmpEQ(builder.CreateLoad(outRowCountVar), builder.CreateLoad(prevRowNumVar));
-            auto badCond = builder.CreateICmpEQ(builder.CreateLoad(badRowCountVar), builder.CreateLoad(prevBadRowNumVar));
-            auto remCond = builder.CreateICmpSLT(builder.CreateLoad(expCurRowVar), argNumExps);
-            builder.CreateCondBr(builder.CreateAnd(remCond, builder.CreateAnd(badCond, expCond)), bbExpUpdate, bbLoopCondition);
-
-            // We have determined a row is filtered so we can iterate through all the input exceptions that occured before this
-            // row and decrement their row index with the number of previously filtered rows (expAccVar).
-            // This is a while loop that iterates over all exceptions that occured before this filtered row
-            //
-            // while (expCurRowVar < numExps && ((*outNormalRowCount - 1) + expCurRowVar) >= *((int64_t *) curExpPtrVar))
-            //
-            // *outNormalRowCount - 1 changes cardinality of rows into its row index, we add the number of previously processed
-            // exceptions because the normal rows do not know about the exceptions to obtain the correct index. It's then compared
-            // against the row index of the exception pointed to currently in our partition
-            builder.SetInsertPoint(bbExpUpdate);
-            auto bbIncrement = llvm::BasicBlock::Create(context, "increment", builder.GetInsertBlock()->getParent());
-            auto bbIncrementDone = llvm::BasicBlock::Create(context, "increment_done", builder.GetInsertBlock()->getParent());
-            auto curExpRowIndPtr = builder.CreatePointerCast(builder.CreateLoad(curExpPtrVar), env().i64ptrType());
-            auto incCond = builder.CreateICmpSGE(builder.CreateAdd(builder.CreateLoad(badRowCountVar), builder.CreateAdd(builder.CreateSub(builder.CreateLoad(normalRowCountVar), env().i64Const(1)), builder.CreateLoad(expCurRowVar))), builder.CreateLoad(curExpRowIndPtr));
-            auto remCond2 = builder.CreateICmpSLT(builder.CreateLoad(expCurRowVar), argNumExps);
-            builder.CreateCondBr(builder.CreateAnd(remCond2, incCond), bbIncrement, bbIncrementDone);
-
-            // Body of the while loop we need to
-            // 1. decrement the current exception row index by the expAccVar (all rows previously filtered)
-            // 2. Increment our partition pointer to next exception
-            // 3. Change partitions if we've exhausted all exceptions in the current, but still have more remaining in tototal
-            //
-            // Increment to the next exception by adding eSize and 4*sizeof(int64_t) to the partition pointer
-            // *((int64_t *) curExpPtrVar) -= expAccVar;
-            // curExpPtrVar += 4 * sizeof(int64_t) + ((int64_t *)curExpPtrVar)[3];
-            // expCurRowVar += 1;
-            // curExpCurRowVar += 1;
-            //
-            // Finally we check to see if a partition change is required
-            // if (expCurRowVar < numExps && curExpCurRowVar >= curExpNumRowsVar)
-            builder.SetInsertPoint(bbIncrement);
-            // Change row index and go to next exception in partition
-            auto curExpRowIndPtr2 = builder.CreatePointerCast(builder.CreateLoad(curExpPtrVar), env().i64Type()->getPointerTo(0));
-            builder.CreateStore(builder.CreateSub(builder.CreateLoad(curExpRowIndPtr2), builder.CreateLoad(expAccVar)), curExpRowIndPtr2);
-            auto curOffset = builder.CreateAlloca(env().i64Type(), 0, nullptr, "curOffset");
-            builder.CreateStore(builder.CreateLoad(builder.CreateGEP(curExpRowIndPtr2, env().i64Const(3))), curOffset);
-            builder.CreateStore(builder.CreateAdd(builder.CreateLoad(curOffset), env().i64Const(4 * sizeof(int64_t))), curOffset);
-            builder.CreateStore(builder.CreateGEP(builder.CreateLoad(curExpPtrVar), builder.CreateLoad(curOffset)), curExpPtrVar);
-            builder.CreateStore(builder.CreateAdd(builder.CreateLoad(curExpCurRowVar), env().i64Const(1)), curExpCurRowVar);
-            builder.CreateStore(builder.CreateAdd(builder.CreateLoad(expCurRowVar), env().i64Const(1)), expCurRowVar);
-            // See if partition change needs to occur
-            auto bbChange = llvm::BasicBlock::Create(context, "change", builder.GetInsertBlock()->getParent());
-            auto changeCond = builder.CreateICmpSGE(builder.CreateLoad(curExpCurRowVar), builder.CreateLoad(curExpNumRowsVar));
-            auto leftCond = builder.CreateICmpSLT(builder.CreateLoad(expCurRowVar), argNumExps);
-            builder.CreateCondBr(builder.CreateAnd(leftCond, changeCond), bbChange, bbExpUpdate);
-
-            // This block changes to the next partition
-            // curExpCurRowVar = 0;
-            // curExpIndVar = curExpIndVar + 1;
-            // curExpPtrVar = expPtrs[curExpIndVar];
-            // curExpNumRowsVar = expPtrSizes[curExpIndVar];
-            builder.SetInsertPoint(bbChange);
-            builder.CreateStore(env().i64Const(0), curExpCurRowVar);
-            builder.CreateStore(builder.CreateAdd(builder.CreateLoad(curExpIndVar), env().i64Const(1)), curExpIndVar);
-            builder.CreateStore(builder.CreateLoad(builder.CreateGEP(argExpPtrs, builder.CreateLoad(curExpIndVar))), curExpPtrVar);
-            builder.CreateStore(builder.CreateLoad(builder.CreateGEP(argExpPtrSizes, builder.CreateLoad(curExpIndVar))), curExpNumRowsVar);
-            builder.CreateBr(bbExpUpdate);
-
-            // Finally increment the expAccVar by 1 becasue a row was filtered
-            // expAccVar += 1;
-            builder.SetInsertPoint(bbIncrementDone);
-            builder.CreateStore(builder.CreateAdd(builder.CreateLoad(expAccVar), env().i64Const(1)), expAccVar);
+            // if (outRowCountVar == prevRowNumVar && badRowCountVar == prevBadRowNumVar)
+            auto rowNotEmitted = builder.CreateICmpEQ(builder.CreateLoad(outRowCountVar), builder.CreateLoad(prevRowNumVar));
+            auto rowNotException = builder.CreateICmpEQ(builder.CreateLoad(badRowCountVar), builder.CreateLoad(prevBadRowNumVar));
+            builder.CreateCondBr(builder.CreateAnd(rowNotEmitted, rowNotException), bbUpdateGeneralCond, bbLoopCondition);
+
+            // Update general cond
+            // while (*generalRowOffset < curGeneralNumRows && *((int64_t*)curGeneralPtr) < curNormalRowInd + totalGeneralRowCounter)
+            builder.SetInsertPoint(bbUpdateGeneralCond);
+            auto generalRowsRemainCond = builder.CreateICmpSLT(builder.CreateLoad(generalRowOffset), builder.CreateLoad(curGeneralNumRows));
+            auto curGeneralRowInd = builder.CreateLoad(builder.CreatePointerCast(builder.CreateLoad(curGeneralPtr), env().i64ptrType()));
+            auto generalIndexLTCond = builder.CreateICmpSLT(curGeneralRowInd, builder.CreateAdd(builder.CreateLoad(totalGeneralRowCounter), builder.CreateLoad(totalNormalRowCounter)));
+            builder.CreateCondBr(builder.CreateAnd(generalRowsRemainCond, generalIndexLTCond), bbUpdateGeneralBody, bbUpdateFallbackCond);
+
+            // Update general body
+            // generalNewRowInd = *((int64_t*)curGeneralPtr) - totalFilterCounter;
+            // *((int64_t*)curGeneralPtr) = generalNewRowInd;
+            // auto generalRowDelta = 4 * sizeof(int64_t) + ((int64_t*)curGeneralPtr)[3];
+            // curGeneralPtr += generalRowDelta;
+            // *generalByteOffset += generalRowDelta;
+            // *generalRowOffset++;
+            // *totalGeneralRowCounter++;
+            builder.SetInsertPoint(bbUpdateGeneralBody);
+            auto generalNewRowInd = builder.CreateSub(builder.CreateLoad(builder.CreatePointerCast(builder.CreateLoad(curGeneralPtr), env().i64ptrType())), builder.CreateLoad(totalFilterCounter));
+            builder.CreateStore(generalNewRowInd, builder.CreatePointerCast(builder.CreateLoad(curGeneralPtr), env().i64ptrType()));
+            auto generalRowDelta = builder.CreateAdd(builder.CreateLoad(builder.CreateGEP(builder.CreatePointerCast(builder.CreateLoad(curGeneralPtr), env().i64ptrType()), env().i64Const(3))), env().i64Const(4 * sizeof(int64_t)));
+            builder.CreateStore(builder.CreateGEP(builder.CreateLoad(curGeneralPtr), generalRowDelta), curGeneralPtr);
+            builder.CreateStore(builder.CreateAdd(generalRowDelta, builder.CreateLoad(generalByteOffset)), generalByteOffset);
+            builder.CreateStore(builder.CreateAdd(env().i64Const(1), builder.CreateLoad(generalRowOffset)), generalRowOffset);
+            builder.CreateStore(builder.CreateAdd(env().i64Const(1), builder.CreateLoad(totalGeneralRowCounter)), totalGeneralRowCounter);
+
+            // if (*generalRowOffset == curGeneralNumRows && *generalIndexOffset < numGeneralPartitions - 1)
+            auto generalNoRowsRemain = builder.CreateICmpEQ(builder.CreateLoad(generalRowOffset), builder.CreateLoad(curGeneralNumRows));
+            auto generalHasMorePartitions = builder.CreateICmpSLT(builder.CreateLoad(generalIndexOffset), builder.CreateSub(numGeneralPartitions, env().i64Const(1)));
+            builder.CreateCondBr(builder.CreateAnd(generalNoRowsRemain, generalHasMorePartitions), bbNextGeneralPartition, bbUpdateGeneralCond);
+
+            // generalIndexOffset += 1;
+            // *generalRowOffset = 0;
+            // *generalByteOffset = 0;
+            // curGeneralPtr = generalPartitions[*generalIndexOffset];
+            // curGeneralNumRows = *((int64_t*)curGeneralPtr);
+            // curGeneralPtr += sizeof(int64_t);
+            builder.SetInsertPoint(bbNextGeneralPartition);
+            builder.CreateStore(builder.CreateAdd(builder.CreateLoad(generalIndexOffset), env().i64Const(1)), generalIndexOffset);
+            builder.CreateStore(env().i64Const(0), generalRowOffset);
+            builder.CreateStore(env().i64Const(0), generalByteOffset);
+            builder.CreateStore(builder.CreateLoad(builder.CreateGEP(generalPartitions, builder.CreateLoad(generalIndexOffset))), curGeneralPtr);
+            builder.CreateStore(builder.CreateLoad(builder.CreatePointerCast(builder.CreateLoad(curGeneralPtr), env().i64ptrType())), curGeneralNumRows);
+            builder.CreateStore(builder.CreateGEP(builder.CreateLoad(curGeneralPtr), builder.CreateAdd(env().i64Const(sizeof(int64_t)), builder.CreateLoad(generalByteOffset))), curGeneralPtr);
+            builder.CreateBr(bbUpdateGeneralCond);
+
+            // Update fallback cond
+            // while (*fallbackRowOffset < curFallbackNumRows && *((int64_t*)curFallbackPtr) < curNormalRowInd + totalGeneralRowCounter + totalFallbackRowCounter)
+            builder.SetInsertPoint(bbUpdateFallbackCond);
+            auto fallbackRowsRemainCond = builder.CreateICmpSLT(builder.CreateLoad(fallbackRowOffset), builder.CreateLoad(curFallbackNumRows));
+            auto curFallbackRowInd = builder.CreateLoad(builder.CreatePointerCast(builder.CreateLoad(curFallbackPtr), env().i64ptrType()));
+            auto fallbackIndexLTCond = builder.CreateICmpSLT(curFallbackRowInd, builder.CreateAdd(builder.CreateLoad(totalGeneralRowCounter), builder.CreateAdd(builder.CreateLoad(totalFallbackRowCounter), builder.CreateLoad(totalNormalRowCounter))));
+            builder.CreateCondBr(builder.CreateAnd(fallbackRowsRemainCond, fallbackIndexLTCond), bbUpdateFallbackBody, bbUpdateDone);
+
+            // Update fallback body
+            // fallbackNewRowInd = *((int64_t*)curFallbackPtr) - totalFilterCounter;
+            // *((int64_t*)curFallbackPtr) = fallbackNewRowInd;
+            // auto fallbackRowDelta = 4 * sizeof(int64_t) + ((int64_t*)curFallbackPtr)[3];
+            // curFallbackPtr += fallbackRowDelta;
+            // *fallbackByteOffset += fallbackRowDelta;
+            // *fallbackRowOffset++;
+            // *totalFallbackRowCounter++;
+            builder.SetInsertPoint(bbUpdateFallbackBody);
+            auto fallbackNewRowInd = builder.CreateSub(builder.CreateLoad(builder.CreatePointerCast(builder.CreateLoad(curFallbackPtr), env().i64ptrType())), builder.CreateLoad(totalFilterCounter));
+            builder.CreateStore(fallbackNewRowInd, builder.CreatePointerCast(builder.CreateLoad(curFallbackPtr), env().i64ptrType()));
+            auto fallbackRowDelta = builder.CreateAdd(builder.CreateLoad(builder.CreateGEP(builder.CreatePointerCast(builder.CreateLoad(curFallbackPtr), env().i64ptrType()), env().i64Const(3))), env().i64Const(4 * sizeof(int64_t)));
+            builder.CreateStore(builder.CreateGEP(builder.CreateLoad(curFallbackPtr), fallbackRowDelta), curFallbackPtr);
+            builder.CreateStore(builder.CreateAdd(fallbackRowDelta, builder.CreateLoad(fallbackByteOffset)), fallbackByteOffset);
+            builder.CreateStore(builder.CreateAdd(env().i64Const(1), builder.CreateLoad(fallbackRowOffset)), fallbackRowOffset);
+            builder.CreateStore(builder.CreateAdd(env().i64Const(1), builder.CreateLoad(totalFallbackRowCounter)), totalFallbackRowCounter);
+
+            // if (*fallbackRowOffset == curFallbackNumRows && *fallbackIndexOffset < numFallbackPartitions - 1)
+            auto fallbackNoRowsRemain = builder.CreateICmpEQ(builder.CreateLoad(fallbackRowOffset), builder.CreateLoad(curFallbackNumRows));
+            auto fallbackHasMorePartitions = builder.CreateICmpSLT(builder.CreateLoad(fallbackIndexOffset), builder.CreateSub(numFallbackPartitions, env().i64Const(1)));
+            builder.CreateCondBr(builder.CreateAnd(fallbackNoRowsRemain, fallbackHasMorePartitions), bbNextFallbackPartition, bbUpdateFallbackCond);
+
+            // fallbackIndexOffset += 1;
+            // *fallbackRowOffset = 0;
+            // *fallbackByteOffset = 0;
+            // curFallbackPtr = fallbackPartitions[*fallbackIndexOffset];
+            // curFallbackNumRows = *((int64_t*)curFallbackPtr);
+            // curFallbackPtr += sizeof(int64_t);
+            builder.SetInsertPoint(bbNextFallbackPartition);
+            builder.CreateStore(builder.CreateAdd(builder.CreateLoad(fallbackIndexOffset), env().i64Const(1)), fallbackIndexOffset);
+            builder.CreateStore(env().i64Const(0), fallbackRowOffset);
+            builder.CreateStore(env().i64Const(0), fallbackByteOffset);
+            builder.CreateStore(builder.CreateLoad(builder.CreateGEP(fallbackPartitions, builder.CreateLoad(fallbackIndexOffset))), curFallbackPtr);
+            builder.CreateStore(builder.CreateLoad(builder.CreatePointerCast(builder.CreateLoad(curFallbackPtr), env().i64ptrType())), curFallbackNumRows);
+            builder.CreateStore(builder.CreateGEP(builder.CreateLoad(curFallbackPtr), builder.CreateAdd(env().i64Const(sizeof(int64_t)), builder.CreateLoad(fallbackByteOffset))), curFallbackPtr);
+            builder.CreateBr(bbUpdateFallbackCond);
+
+            // Update done
+            // totalFilterCounter += 1;
+            builder.SetInsertPoint(bbUpdateDone);
+            builder.CreateStore(builder.CreateAdd(env().i64Const(1), builder.CreateLoad(totalFilterCounter)), totalFilterCounter);
             builder.CreateBr(bbLoopCondition);
 
-            // ---------
-            // loop done
             builder.SetInsertPoint(bbLoopDone);
-            auto bbRemainingExceptions = llvm::BasicBlock::Create(context, "remaining_exceptions", builder.GetInsertBlock()->getParent());
-            auto bbRemainingDone = llvm::BasicBlock::Create(context, "remaining_done", builder.GetInsertBlock()->getParent());
-            auto expRemaining = builder.CreateICmpSLT(builder.CreateLoad(expCurRowVar), argNumExps);
-            builder.CreateCondBr(expRemaining, bbRemainingExceptions, bbRemainingDone);
-
-            // We have processed all of the normal rows. If we have not exhausted all of our exceptions
-            // we just iterate through the remaining exceptions and decrement their row index by the final
-            // value of expAccVar counting our filtered rows.
-            // Same code as above, but just don't need to keep updating expAccVar by 1.
-            builder.SetInsertPoint(bbRemainingExceptions);
-            auto curExpRowIndPtr3 = builder.CreatePointerCast(builder.CreateLoad(curExpPtrVar), env().i64Type()->getPointerTo(0));
-            builder.CreateStore(builder.CreateSub(builder.CreateLoad(curExpRowIndPtr3), builder.CreateLoad(expAccVar)), curExpRowIndPtr3);
-            auto curOffset2 = builder.CreateAlloca(env().i64Type(), 0, nullptr, "curOffset2");
-            builder.CreateStore(builder.CreateLoad(builder.CreateGEP(curExpRowIndPtr3, env().i64Const(3))), curOffset2);
-            builder.CreateStore(builder.CreateAdd(builder.CreateLoad(curOffset2), env().i64Const(4 * sizeof(int64_t))), curOffset2);
-            builder.CreateStore(builder.CreateGEP(builder.CreateLoad(curExpPtrVar), builder.CreateLoad(curOffset2)), curExpPtrVar);
-            builder.CreateStore(builder.CreateAdd(builder.CreateLoad(curExpCurRowVar), env().i64Const(1)), curExpCurRowVar);
-            builder.CreateStore(builder.CreateAdd(builder.CreateLoad(expCurRowVar), env().i64Const(1)), expCurRowVar);
-
-            auto bbChange2 = llvm::BasicBlock::Create(context, "change2", builder.GetInsertBlock()->getParent());
-            auto changeCond2 = builder.CreateICmpSGE(builder.CreateLoad(curExpCurRowVar), builder.CreateLoad(curExpNumRowsVar));
-            auto leftCond2 = builder.CreateICmpSLT(builder.CreateLoad(expCurRowVar), argNumExps);
-            builder.CreateCondBr(builder.CreateAnd(leftCond2, changeCond2), bbChange2, bbLoopDone);
-
-            builder.SetInsertPoint(bbChange2);
-            builder.CreateStore(env().i64Const(0), curExpCurRowVar);
-            builder.CreateStore(builder.CreateAdd(builder.CreateLoad(curExpIndVar), env().i64Const(1)), curExpIndVar);
-            builder.CreateStore(builder.CreateLoad(builder.CreateGEP(argExpPtrs, builder.CreateLoad(curExpIndVar))), curExpPtrVar);
-            builder.CreateStore(builder.CreateLoad(builder.CreateGEP(argExpPtrSizes, builder.CreateLoad(curExpIndVar))), curExpNumRowsVar);
-            builder.CreateBr(bbLoopDone);
-
-            builder.SetInsertPoint(bbRemainingDone);
-            // if intermediate callback desired, perform!
             if(_intermediateType != python::Type::UNKNOWN && !_intermediateCallbackName.empty()) {
                 writeIntermediate(builder, argUserData, _intermediateCallbackName);
             }
diff --git a/tuplex/core/src/physical/PhysicalPlan.cc b/tuplex/core/src/physical/PhysicalPlan.cc
index 2399edf6f..c7eb06825 100644
--- a/tuplex/core/src/physical/PhysicalPlan.cc
+++ b/tuplex/core/src/physical/PhysicalPlan.cc
@@ -199,9 +199,11 @@ namespace tuplex {
                 auto t = ops.front()->type();
                 assert(t == LogicalOperatorType::PARALLELIZE || t == LogicalOperatorType::CACHE);
                 if (t == LogicalOperatorType::PARALLELIZE)
-                    hasInputExceptions = !((ParallelizeOperator *)ops.front())->getPythonObjects().empty();
-                if (t == LogicalOperatorType::CACHE)
-                    hasInputExceptions = !((CacheOperator *)ops.front())->cachedExceptions().empty();
+                    hasInputExceptions = !((ParallelizeOperator *) ops.front())->getFallbackPartitions().empty();
+                if (t == LogicalOperatorType::CACHE) {
+                    auto cop = (CacheOperator *) ops.front();
+                    hasInputExceptions = !cop->cachedGeneralPartitions().empty() || !cop->cachedFallbackPartitions().empty();
+                }
             }
         }
 
@@ -239,6 +241,11 @@ namespace tuplex {
         // user wants to merge exceptions in order.
         bool updateInputExceptions = hasFilter && hasInputExceptions && _context.getOptions().OPT_MERGE_EXCEPTIONS_INORDER();
 
+        // Use incremental resolution if pipelines match and user has enabled the option
+        auto cache = _context.getIncrementalCache();
+        auto cacheEntry = cache->getEntry(IncrementalCache::newKey(originalLogicalPlan()->getAction()));
+        auto incrementalResolution = cacheEntry && _context.getOptions().OPT_INCREMENTAL_RESOLUTION();
+
         // create trafostage via builder pattern
         auto builder = codegen::StageBuilder(_num_stages++,
                                                isRootStage,
@@ -247,7 +254,8 @@ namespace tuplex {
                                                _context.getOptions().NORMALCASE_THRESHOLD(),
                                                _context.getOptions().OPT_SHARED_OBJECT_PROPAGATION(),
                                                _context.getOptions().OPT_NULLVALUE_OPTIMIZATION(),
-                                               updateInputExceptions);
+                                               updateInputExceptions,
+                                               incrementalResolution);
         // start code generation
 
         // first, add input
@@ -401,19 +409,24 @@ namespace tuplex {
         // fill in data to start processing from operators.
         if (inputNode->type() == LogicalOperatorType::PARALLELIZE) {
             auto pop = dynamic_cast<ParallelizeOperator *>(inputNode); assert(inputNode);
-            stage->setInputPartitions(pop->getPartitions());
-            stage->setInputExceptions(pop->getPythonObjects());
-            stage->setPartitionToExceptionsMap(pop->getInputPartitionToPythonObjectsMap());
+            stage->setInputPartitions(pop->getNormalPartitions());
+            stage->setFallbackPartitions(pop->getFallbackPartitions());
+            stage->setPartitionGroups(pop->getPartitionGroups());
         } else if(inputNode->type() == LogicalOperatorType::CACHE) {
             auto cop = dynamic_cast<CacheOperator*>(inputNode);  assert(inputNode);
-            stage->setInputPartitions(cop->cachedPartitions());
-            stage->setInputExceptions(cop->cachedExceptions());
-            stage->setPartitionToExceptionsMap(cop->partitionToExceptionsMap());
+            stage->setInputPartitions(cop->cachedNormalPartitions());
+            stage->setGeneralPartitions(cop->cachedGeneralPartitions());
+            stage->setFallbackPartitions(cop->cachedFallbackPartitions());
+            stage->setPartitionGroups(cop->partitionGroups());
         } else if(inputNode->type() == LogicalOperatorType::FILEINPUT) {
             auto csvop = dynamic_cast<FileInputOperator*>(inputNode);
             stage->setInputFiles(csvop->getURIs(), csvop->getURISizes());
         } // else it must be an internal node! => need to set manually based on result
 
+        if (incrementalResolution) {
+            stage->setIncrementalCacheEntry(cacheEntry);
+        }
+
         return stage;
     }
 
diff --git a/tuplex/core/src/physical/ResolveTask.cc b/tuplex/core/src/physical/ResolveTask.cc
index 6ae6723f0..95a91b934 100644
--- a/tuplex/core/src/physical/ResolveTask.cc
+++ b/tuplex/core/src/physical/ResolveTask.cc
@@ -228,8 +228,21 @@ namespace tuplex {
 
         // needs to be put into separate list of python objects...
         // save index as well to merge back in order.
-        assert(_rowNumber >= _numUnresolved);
-        _py_nonconfirming.push_back(std::make_tuple(_rowNumber - _numUnresolved, out_row));
+        assert(_currentRowNumber >= _numUnresolved);
+        auto pickledObject = python::pickleObject(python::getMainModule(), out_row);
+        auto pyObjectSize = pickledObject.size();
+        auto bufSize = 4 * sizeof(int64_t) + pyObjectSize;
+
+        uint8_t *buf = new uint8_t[bufSize];
+        auto ptr = buf;
+        *((int64_t*)ptr) = _currentRowNumber - _numUnresolved; ptr += sizeof(int64_t);
+        *((int64_t*)ptr) = ecToI64(ExceptionCode::PYTHON_PARALLELIZE); ptr += sizeof(int64_t);
+        *((int64_t*)ptr) = -1; ptr += sizeof(int64_t);
+        *((int64_t*)ptr) = pyObjectSize; ptr += sizeof(int64_t);
+        memcpy(ptr, pickledObject.c_str(), pyObjectSize);
+        rowToMemorySink(owner(), _fallbackSink, Schema(Schema::MemoryLayout::ROW, python::Type::makeTupleType({python::Type::STRING})),
+                        0, contextID(), buf, bufSize);
+        delete[] buf;
     }
 
     int64_t ResolveTask::mergeNormalRow(const uint8_t *buf, int64_t bufSize) {
@@ -273,7 +286,7 @@ namespace tuplex {
                         // exceptionCode, exceptionOperatorID, rowNumber, size
                         int64_t ecCode = ecToI64(ExceptionCode::NORMALCASEVIOLATION);
                         int64_t ecOpID = 0; // dummy
-                        int64_t rowNumber = _currentRowNumber;
+                        int64_t rowNumber = _currentRowNumber - _numUnresolved;
                         uint8_t* except_buf = serializeExceptionToMemory(ecCode, ecOpID, rowNumber, buf, bufSize, &except_size);
 
                         // sink row to type violation exceptions with commonCaseOutputSchema
@@ -339,6 +352,7 @@ namespace tuplex {
                     // check if there is a partition left
                     if(_currentNormalPartitionIdx + 1 < _partitions.size()) {
                         _partitions[_currentNormalPartitionIdx]->unlock();
+                        _partitions[_currentNormalPartitionIdx]->invalidate();
                         _currentNormalPartitionIdx++;
 
                         _normalPtr = _partitions[_currentNormalPartitionIdx]->lockRaw();
@@ -406,14 +420,15 @@ namespace tuplex {
         bool potentiallyHasResolverOnSlowPath = !_operatorIDsAffectedByResolvers.empty() &&
                                                 std::binary_search(_operatorIDsAffectedByResolvers.begin(),
                                                                    _operatorIDsAffectedByResolvers.end(), operatorID);
-        if(!requiresInterpreterReprocessing(i64ToEC(ecCode)) && !potentiallyHasResolverOnSlowPath) {
+//        bool potentiallyHasResolverOnSlowPath = true;
+        if(!_isIncremental && !requiresInterpreterReprocessing(i64ToEC(ecCode)) && !potentiallyHasResolverOnSlowPath) {
             // TODO: check with resolvers!
             // i.e., we can directly save this as exception IF code is not an interpreter code
             // and true exception, i.e. no resolvers available.
             // => need a list of for which opIds/codes resolvers are available...
             ///....
             _numUnresolved++;
-            exceptionCallback(ecCode, operatorID, _rowNumber, ebuf, eSize);
+            exceptionCallback(ecCode, operatorID, _currentRowNumber, ebuf, eSize);
             return;
         }
 
@@ -443,7 +458,6 @@ namespace tuplex {
         // fallback 2: interpreter path
         // --> only go there if a non-true exception was recorded. Else, it will be dealt with above
         if(resCode == -1 && _interpreterFunctor) {
-
             // acquire GIL
             python::lockGIL();
             PyCallable_Check(_interpreterFunctor);
@@ -648,7 +662,11 @@ namespace tuplex {
                                     mergeRow(buf, serialized_length, BUF_FORMAT_GENERAL_OUTPUT);
                                     delete [] buf;
                                 } else {
-                                    writePythonObject(rowObj);
+                                    if(PyTuple_Check(rowObj) && PyTuple_Size(rowObj) == 1) {
+                                        writePythonObject(PyTuple_GetItem(rowObj, 0));
+                                    } else {
+                                        writePythonObject(rowObj);
+                                    }
                                 }
                                 // Py_XDECREF(rowObj);
                             }
@@ -676,7 +694,7 @@ namespace tuplex {
         // fallback 3: still exception? save...
         if(resCode == -1) {
             _numUnresolved++;
-            exceptionCallback(ecCode, operatorID, _rowNumber, ebuf, eSize);
+            exceptionCallback(ecCode, operatorID, _currentRowNumber, ebuf, eSize);
         }
     }
 
@@ -711,7 +729,7 @@ namespace tuplex {
         }
 
         // abort if no exceptions!
-        if(_runtimeExceptions.empty() && _numInputExceptions == 0)
+        if(_exceptionPartitions.empty() && _generalPartitions.empty() && _fallbackPartitions.empty())
             return;
 
         // special case: no functor & no python pipeline functor given
@@ -724,12 +742,12 @@ namespace tuplex {
 #endif
 
             // copy _generalCasePartitions over to base class
-            IExceptionableTask::setExceptions(_runtimeExceptions);
+            IExceptionableTask::setExceptions(_generalPartitions);
 
             // clear exceptions, because they have been resolved (or put to new exceptions!)
             // if task produced exceptions, they are stored in the IExceptionableTask class!
             // => no need to overwrite them, getter for iexceptionabletask has all info!
-            _runtimeExceptions.clear();
+            _generalPartitions.clear();
             _wallTime = timer.time();
 
             return;
@@ -742,78 +760,61 @@ namespace tuplex {
             // merge exceptions with normal rows after calling slow code over them...
             // basic idea is go over all exception partitions, execute row wise the resolution function
             // and merge the result back to the partitions
-            for(auto partition : _runtimeExceptions) {
-                const uint8_t *ptr = partition->lockRaw();
-                int64_t numRows = *((int64_t *) ptr);
-                ptr += sizeof(int64_t);
-
-                for(int i = 0; i < numRows; ++i) {
-                    // old
-                    // _currentRowNumber = *((int64_t*)ptr);
-                    // ptr += sizeof(int64_t);
-                    // int64_t ecCode = *((int64_t*)ptr);
-                    // ptr += sizeof(int64_t);
-                    // int64_t operatorID = *((int64_t*)ptr);
-                    // ptr += sizeof(int64_t);
-                    // int64_t eSize = *((int64_t*)ptr);
-                    // ptr += sizeof(int64_t);
-
+            for (const auto &partition : _generalPartitions) {
+                const uint8_t *ptr = partition->lock();
+                auto numRows = partition->getNumRows();
+                for (int i = 0; i < numRows; ++i) {
                     const uint8_t *ebuf = nullptr;
                     int64_t ecCode = -1, operatorID = -1;
                     size_t eSize = 0;
                     auto delta = deserializeExceptionFromMemory(ptr, &ecCode, &operatorID, &_currentRowNumber, &ebuf,
                                                                 &eSize);
 
+                    processExceptionRow(ecCode, operatorID, ebuf, eSize);
 
-                    // call functor over this...
-                    // ==> important to use row number here for continuous exception resolution!
-                    // args are: "userData",  "rowNumber", "exceptionCode", "rowBuf", "bufSize"
+                    ptr += delta;
+                    _rowNumber++;
+                }
+                partition->unlock();
+                partition->invalidate();
+            }
+
+            for (const auto &partition : _fallbackPartitions) {
+                const uint8_t *ptr = partition->lock();
+                auto numRows = partition->getNumRows();
+                for (int i = 0; i < numRows; ++i) {
+                    const uint8_t *ebuf = nullptr;
+                    int64_t ecCode = -1, operatorID = -1;
+                    size_t eSize = 0;
+                    auto delta = deserializeExceptionFromMemory(ptr, &ecCode, &operatorID, &_currentRowNumber, &ebuf,
+                                                                &eSize);
 
                     processExceptionRow(ecCode, operatorID, ebuf, eSize);
 
                     ptr += delta;
-                    // old
-                    //ptr += eSize;
-
-                    // always inc row number
                     _rowNumber++;
                 }
                 partition->unlock();
-
-                // exception partition is done or exceptions are transferred to new partition...
                 partition->invalidate();
             }
 
-            // now process all of the input exceptions
-            if (_numInputExceptions > 0) {
-                // Initialize input exception to starting index
-                auto partition = _inputExceptions[_inputExceptionIndex];
-                auto rowsLeftInPartition = partition->getNumRows() - _inputExceptionRowOffset;
-                const uint8_t *ptr = partition->lock() + _inputExceptionByteOffset;
-
-                // Iterate over all input exceptions, may be accross multiple partitions
-                for (int i = 0; i < _numInputExceptions; ++i) {
-                    // Change partition once exhausted
-                    if (rowsLeftInPartition == 0) {
-                        partition->unlock();
-                        _inputExceptionIndex++;
-                        partition = _inputExceptions[_inputExceptionIndex];
-                        rowsLeftInPartition = partition->getNumRows();
-                        ptr = partition->lock();
-                    }
-
+            for (const auto &partition : _exceptionPartitions) {
+                const uint8_t *ptr = partition->lock();
+                auto numRows = partition->getNumRows();
+                for (int i = 0; i < numRows; ++i) {
                     const uint8_t *ebuf = nullptr;
                     int64_t ecCode = -1, operatorID = -1;
                     size_t eSize = 0;
                     auto delta = deserializeExceptionFromMemory(ptr, &ecCode, &operatorID, &_currentRowNumber, &ebuf,
                                                                 &eSize);
+
                     processExceptionRow(ecCode, operatorID, ebuf, eSize);
+
                     ptr += delta;
                     _rowNumber++;
-                    rowsLeftInPartition--;
                 }
-                // Unlock but wait to invalidate until all resolve tasks have finished
                 partition->unlock();
+                partition->invalidate();
             }
 
             // merging is done, unlock the last partition & copy the others over.
@@ -832,8 +833,9 @@ namespace tuplex {
             // clear exceptions, because they have been resolved (or put to new exceptions!)
             // if task produced exceptions, they are stored in the IExceptionableTask class!
             // => no need to overwrite them, getter for iexceptionabletask has all info!
-            _runtimeExceptions.clear();
-            _inputExceptions.clear();
+            _exceptionPartitions.clear();
+            _generalPartitions.clear();
+            _fallbackPartitions.clear();
         } else {
             executeInOrder();
         }
@@ -881,146 +883,323 @@ namespace tuplex {
             _rowNumber = 0;
         }
 
-        // Initialize runtime exception variables
-        size_t curRuntimePartitionInd = 0; // current index into vector of runtime exception partitions
-        int64_t numRuntimeRowsLeftInPartition = 0; // number of rows remaining in partition
-        const uint8_t *runPtr = nullptr;
-        if (_runtimeExceptions.size() > 0) {
-            curRuntimePartitionInd = 0;
-            numRuntimeRowsLeftInPartition = _runtimeExceptions[curRuntimePartitionInd]->getNumRows();
-            runPtr = _runtimeExceptions[curRuntimePartitionInd]->lock();
+        size_t curExceptionInd = 0;
+        size_t exceptionsRemaining = 0;
+        const uint8_t *expPtr = nullptr;
+        size_t exceptionNumRows = 0;
+        for (int i = 0; i < _exceptionPartitions.size(); ++i) {
+            auto numRows = _exceptionPartitions[i]->getNumRows();
+            exceptionNumRows += numRows;
+            if (i == 0) {
+                expPtr = _exceptionPartitions[i]->lock();
+                exceptionsRemaining = numRows;
+            }
         }
 
-        // Initialize input exception variables
-        size_t curInputPartitionInd = 0; // current index into vector of input exception partitions
-        int64_t numInputRowsLeftInPartition = 0;  // number of rows remaining in partition
-        const uint8_t *inputPtr = nullptr;
-        if (_numInputExceptions > 0) {
-            curInputPartitionInd = _inputExceptionIndex;
-            numInputRowsLeftInPartition = _inputExceptions[curInputPartitionInd]->getNumRows() - _inputExceptionRowOffset;
-            inputPtr = _inputExceptions[curInputPartitionInd]->lock() + _inputExceptionByteOffset;
+        size_t curGeneralInd = 0;
+        size_t generalRemaining = 0;
+        const uint8_t *generalPtr = nullptr;
+        size_t generalNumRows = 0;
+        for (int i = 0; i < _generalPartitions.size(); ++i) {
+            auto numRows = _generalPartitions[i]->getNumRows();
+            generalNumRows += numRows;
+            if (i == 0) {
+                generalPtr = _generalPartitions[i]->lock();
+                generalRemaining = numRows;
+            }
+        }
+
+        size_t curFallbackInd = 0;
+        size_t fallbackRemaining = 0;
+        const uint8_t *fallPtr = nullptr;
+        size_t fallbackNumRows = 0;
+        for (int i = 0; i < _fallbackPartitions.size(); ++i) {
+            auto numRows = _fallbackPartitions[i]->getNumRows();
+            fallbackNumRows += numRows;
+            if (i == 0) {
+                fallPtr = _fallbackPartitions[i]->lock();
+                fallbackRemaining = numRows;
+            }
         }
 
         // Merge input and runtime exceptions in order. To do so, we can compare the row indices of the
         // current runtime and input exception and process the one that occurs first. The saved row indices of
         // runtime exceptions do not account for the existence of input exceptions, so we need to add the previous
         // input exceptions to compare the true row number
-        size_t inputRowsProcessed = 0;
-        const uint8_t *ptr = nullptr;
-        while (runPtr && inputPtr) {
-            auto runRowInd = *((int64_t *) runPtr); // get current runtime row index
-            auto inputRowInd = *((int64_t *) inputPtr); // get current input row index
-            bool isRuntimeException = false;
-            // compare indices with accounting for previous input exceptions
-            if (runRowInd + inputRowsProcessed < inputRowInd) {
-                ptr = runPtr;
-                numRuntimeRowsLeftInPartition--;
-                isRuntimeException = true;
+        while (_exceptionCounter < exceptionNumRows && _generalCounter < generalNumRows && _fallbackCounter < fallbackNumRows) {
+            auto expRowInd = *((int64_t *) expPtr) + _fallbackCounter + _generalCounter;
+            auto generalRowInd = *((int64_t *) generalPtr) + _fallbackCounter;
+            auto fallbackRowInd = *((int64_t *) fallPtr);
+
+            const uint8_t *buf = nullptr;
+            int64_t ecCode = 0, operatorID = -1;
+            size_t eSize = 0;
+            if (fallbackRowInd <= expRowInd && fallbackRowInd <= generalRowInd) {
+                fallbackRemaining--;
+                _fallbackCounter++;
+
+                auto delta = deserializeExceptionFromMemory(fallPtr, &ecCode, &operatorID, &_currentRowNumber, &buf, &eSize);
+                fallPtr += delta;
+            } else if (generalRowInd <= expRowInd && generalRowInd <= fallbackRowInd) {
+                generalRemaining--;
+                _generalCounter++;
+
+                auto delta = deserializeExceptionFromMemory(generalPtr, &ecCode, &operatorID, &_currentRowNumber, &buf, &eSize);
+                _currentRowNumber += _fallbackCounter;
+                generalPtr += delta;
             } else {
-                ptr = inputPtr;
-                numInputRowsLeftInPartition--;
-                inputRowsProcessed++;
+                exceptionsRemaining--;
+                _exceptionCounter++;
+
+                auto delta = deserializeExceptionFromMemory(expPtr, &ecCode, &operatorID, &_currentRowNumber, &buf, &eSize);
+                _currentRowNumber += _fallbackCounter + _generalCounter;
+                expPtr += delta;
             }
 
-            const uint8_t *ebuf = nullptr;
-            int64_t ecCode = -1, operatorID = -1;
+            processExceptionRow(ecCode, operatorID, buf, eSize);
+            _rowNumber++;
+
+            if (exceptionsRemaining == 0) {
+                _exceptionPartitions[curExceptionInd]->unlock();
+                _exceptionPartitions[curExceptionInd]->invalidate();
+                curExceptionInd++;
+                if (curExceptionInd < _exceptionPartitions.size()) {
+                    exceptionsRemaining = _exceptionPartitions[curExceptionInd]->getNumRows();
+                    expPtr = _exceptionPartitions[curExceptionInd]->lock();
+                }
+            }
+
+            if (generalRemaining == 0) {
+                _generalPartitions[curGeneralInd]->unlock();
+                _generalPartitions[curGeneralInd]->invalidate();
+                curGeneralInd++;
+                if (curGeneralInd < _generalPartitions.size()) {
+                    generalRemaining = _generalPartitions[curGeneralInd]->getNumRows();
+                    generalPtr = _generalPartitions[curGeneralInd]->lock();
+                }
+            }
+
+            if (fallbackRemaining == 0) {
+                _fallbackPartitions[curFallbackInd]->unlock();
+                _fallbackPartitions[curFallbackInd]->invalidate();
+                curFallbackInd++;
+                if (curFallbackInd < _fallbackPartitions.size()) {
+                    fallbackRemaining = _fallbackPartitions[curFallbackInd]->getNumRows();
+                    fallPtr = _fallbackPartitions[curFallbackInd]->lock();
+                }
+            }
+        }
+
+        while (_exceptionCounter < exceptionNumRows && _generalCounter < generalNumRows) {
+            auto expRowInd = *((int64_t *) expPtr) + _fallbackCounter + _generalCounter;
+            auto generalRowInd = *((int64_t *) generalPtr) + _generalCounter;
+
+            const uint8_t *buf = nullptr;
+            int64_t ecCode = 0, operatorID = -1;
             size_t eSize = 0;
-            auto delta = deserializeExceptionFromMemory(ptr, &ecCode, &operatorID, &_currentRowNumber, &ebuf,
-                                                        &eSize);
+            if (generalRowInd <= expRowInd) {
+                generalRemaining--;
+                _generalCounter++;
 
-            if (isRuntimeException) {
-                _currentRowNumber += inputRowsProcessed;
-                runPtr += delta;
+                auto delta = deserializeExceptionFromMemory(generalPtr, &ecCode, &operatorID, &_currentRowNumber, &buf, &eSize);
+                _currentRowNumber += _fallbackCounter;
+                generalPtr += delta;
             } else {
-                inputPtr += delta;
+                exceptionsRemaining--;
+                _exceptionCounter++;
+
+                auto delta = deserializeExceptionFromMemory(expPtr, &ecCode, &operatorID, &_currentRowNumber, &buf, &eSize);
+                _currentRowNumber += _fallbackCounter + _generalCounter;
+                expPtr += delta;
             }
 
-            processExceptionRow(ecCode, operatorID, ebuf, eSize);
+            processExceptionRow(ecCode, operatorID, buf, eSize);
             _rowNumber++;
 
-            // Exhausted current runtime exceptions, need to switch partitions
-            if (numRuntimeRowsLeftInPartition == 0) {
-                _runtimeExceptions[curRuntimePartitionInd]->unlock();
-                _runtimeExceptions[curRuntimePartitionInd]->invalidate();
-                curRuntimePartitionInd++;
-                // Still have more exceptions to go through
-                if (curRuntimePartitionInd < _runtimeExceptions.size()) {
-                    numRuntimeRowsLeftInPartition = _runtimeExceptions[curRuntimePartitionInd]->getNumRows();
-                    runPtr = _runtimeExceptions[curRuntimePartitionInd]->lock();
-                } else {
-                    // processed all exceptions
-                    runPtr = nullptr;
+            if (exceptionsRemaining == 0) {
+                _exceptionPartitions[curExceptionInd]->unlock();
+                _exceptionPartitions[curExceptionInd]->invalidate();
+                curExceptionInd++;
+                if (curExceptionInd < _exceptionPartitions.size()) {
+                    exceptionsRemaining = _exceptionPartitions[curExceptionInd]->getNumRows();
+                    expPtr = _exceptionPartitions[curExceptionInd]->lock();
                 }
             }
 
-            // Exhausted current input exceptions, need to switch partitions
-            if (numInputRowsLeftInPartition == 0 || inputRowsProcessed == _numInputExceptions) {
-                _inputExceptions[curInputPartitionInd]->unlock();
-                curInputPartitionInd++;
-                // Still have more exceptions to go through
-                if (curInputPartitionInd < _inputExceptions.size() && inputRowsProcessed < _numInputExceptions) {
-                    numInputRowsLeftInPartition = _inputExceptions[curInputPartitionInd]->getNumRows();
-                    inputPtr = _inputExceptions[curInputPartitionInd]->lock();
-                } else {
-                    // processed all exceptions
-                    inputPtr = nullptr;
+            if (generalRemaining == 0) {
+                _generalPartitions[curGeneralInd]->unlock();
+                _generalPartitions[curGeneralInd]->invalidate();
+                curGeneralInd++;
+                if (curGeneralInd < _generalPartitions.size()) {
+                    generalRemaining = _generalPartitions[curGeneralInd]->getNumRows();
+                    generalPtr = _generalPartitions[curGeneralInd]->lock();
                 }
             }
         }
 
-        // Process remaining runtime exceptions if any exist
-        while (runPtr) {
-            const uint8_t *ebuf = nullptr;
+        while (_generalCounter < generalNumRows && _fallbackCounter < fallbackNumRows) {
+            auto generalRowInd = *((int64_t *) generalPtr) + _fallbackCounter;
+            auto fallbackRowInd = *((int64_t *) fallPtr);
+
+            const uint8_t *buf = nullptr;
+            int64_t ecCode = 0, operatorID = -1;
+            size_t eSize = 0;
+            if (fallbackRowInd <= generalRowInd) {
+                fallbackRemaining--;
+                _fallbackCounter++;
+
+                auto delta = deserializeExceptionFromMemory(fallPtr, &ecCode, &operatorID, &_currentRowNumber, &buf, &eSize);
+                fallPtr += delta;
+            } else {
+                generalRemaining--;
+                _generalCounter++;
+
+                auto delta = deserializeExceptionFromMemory(generalPtr, &ecCode, &operatorID, &_currentRowNumber, &buf, &eSize);
+                _currentRowNumber += _fallbackCounter;
+                generalPtr += delta;
+            }
+
+            processExceptionRow(ecCode, operatorID, buf, eSize);
+            _rowNumber++;
+
+            if (generalRemaining == 0) {
+                _generalPartitions[curGeneralInd]->unlock();
+                _generalPartitions[curGeneralInd]->invalidate();
+                curGeneralInd++;
+                if (curGeneralInd < _generalPartitions.size()) {
+                    generalRemaining = _generalPartitions[curGeneralInd]->getNumRows();
+                    generalPtr = _generalPartitions[curGeneralInd]->lock();
+                }
+            }
+
+            if (fallbackRemaining == 0) {
+                _fallbackPartitions[curFallbackInd]->unlock();
+                _fallbackPartitions[curFallbackInd]->invalidate();
+                curFallbackInd++;
+                if (curFallbackInd < _fallbackPartitions.size()) {
+                    fallbackRemaining = _fallbackPartitions[curFallbackInd]->getNumRows();
+                    fallPtr = _fallbackPartitions[curFallbackInd]->lock();
+                }
+            }
+        }
+
+        while (_exceptionCounter < exceptionNumRows && _fallbackCounter < fallbackNumRows) {
+            auto expRowInd = *((int64_t *) expPtr) + _fallbackCounter + _generalCounter;
+            auto fallbackRowInd = *((int64_t *) fallPtr);
+
+            const uint8_t *buf = nullptr;
+            int64_t ecCode = 0, operatorID = -1;
+            size_t eSize = 0;
+            if (fallbackRowInd <= expRowInd) {
+                fallbackRemaining--;
+                _fallbackCounter++;
+
+                auto delta = deserializeExceptionFromMemory(fallPtr, &ecCode, &operatorID, &_currentRowNumber, &buf, &eSize);
+                fallPtr += delta;
+            } else {
+                exceptionsRemaining--;
+                _exceptionCounter++;
+
+                auto delta = deserializeExceptionFromMemory(expPtr, &ecCode, &operatorID, &_currentRowNumber, &buf, &eSize);
+                _currentRowNumber += _fallbackCounter + _generalCounter;
+                expPtr += delta;
+            }
+
+            processExceptionRow(ecCode, operatorID, buf, eSize);
+            _rowNumber++;
+
+            if (exceptionsRemaining == 0) {
+                _exceptionPartitions[curExceptionInd]->unlock();
+                _exceptionPartitions[curExceptionInd]->invalidate();
+                curExceptionInd++;
+                if (curExceptionInd < _exceptionPartitions.size()) {
+                    exceptionsRemaining = _exceptionPartitions[curExceptionInd]->getNumRows();
+                    expPtr = _exceptionPartitions[curExceptionInd]->lock();
+                }
+            }
+
+            if (fallbackRemaining == 0) {
+                _fallbackPartitions[curFallbackInd]->unlock();
+                _fallbackPartitions[curFallbackInd]->invalidate();
+                curFallbackInd++;
+                if (curFallbackInd < _fallbackPartitions.size()) {
+                    fallbackRemaining = _fallbackPartitions[curFallbackInd]->getNumRows();
+                    fallPtr = _fallbackPartitions[curFallbackInd]->lock();
+                }
+            }
+        }
+
+        while (_exceptionCounter < exceptionNumRows) {
+            const uint8_t *buf = nullptr;
             int64_t ecCode = -1, operatorID = -1;
             size_t eSize = 0;
-            auto delta = deserializeExceptionFromMemory(runPtr, &ecCode, &operatorID, &_currentRowNumber, &ebuf,
-                                                        &eSize);
-            _currentRowNumber += inputRowsProcessed;
-            processExceptionRow(ecCode, operatorID, ebuf, eSize);
-            runPtr += delta;
+            auto delta = deserializeExceptionFromMemory(expPtr, &ecCode, &operatorID, &_currentRowNumber, &buf, &eSize);
+            _currentRowNumber += _generalCounter + _fallbackCounter;
+            expPtr += delta;
+
+            processExceptionRow(ecCode, operatorID, buf, eSize);
             _rowNumber++;
 
-            numRuntimeRowsLeftInPartition--;
-            // Exhausted current runtime exceptions in partitions need to switch partitions or could be done
-            if (numRuntimeRowsLeftInPartition == 0) {
-                _runtimeExceptions[curRuntimePartitionInd]->unlock();
-                _runtimeExceptions[curRuntimePartitionInd]->invalidate();
-                curRuntimePartitionInd++;
-                // More exceptions to process
-                if (curRuntimePartitionInd < _runtimeExceptions.size()) {
-                    numRuntimeRowsLeftInPartition = _runtimeExceptions[curRuntimePartitionInd]->getNumRows();
-                    runPtr = _runtimeExceptions[curRuntimePartitionInd]->lock();
-                } else {
-                    // processed all exceptions
-                    runPtr = nullptr;
+            exceptionsRemaining--;
+            _exceptionCounter++;
+
+            if (exceptionsRemaining == 0) {
+                _exceptionPartitions[curExceptionInd]->unlock();
+                _exceptionPartitions[curExceptionInd]->invalidate();
+                curExceptionInd++;
+                if (curExceptionInd < _exceptionPartitions.size()) {
+                    exceptionsRemaining = _exceptionPartitions[curExceptionInd]->getNumRows();
+                    expPtr = _exceptionPartitions[curExceptionInd]->lock();
                 }
             }
         }
 
-        // Process remaining input exceptions if any exist
-        while (inputPtr) {
-            const uint8_t *ebuf = nullptr;
+        while (_generalCounter < generalNumRows) {
+            const uint8_t *buf = nullptr;
             int64_t ecCode = -1, operatorID = -1;
             size_t eSize = 0;
-            auto delta = deserializeExceptionFromMemory(inputPtr, &ecCode, &operatorID, &_currentRowNumber, &ebuf,
-                                                        &eSize);
-            processExceptionRow(ecCode, operatorID, ebuf, eSize);
-            inputPtr += delta;
+            auto delta = deserializeExceptionFromMemory(generalPtr, &ecCode, &operatorID, &_currentRowNumber, &buf, &eSize);
+            _currentRowNumber += _fallbackCounter;
+
+            generalPtr += delta;
+
+            processExceptionRow(ecCode, operatorID, buf, eSize);
             _rowNumber++;
 
-            numInputRowsLeftInPartition--;
-            inputRowsProcessed++;
-            // Exhausted current input exceptions, need to switch partitions
-            if (numInputRowsLeftInPartition == 0 || inputRowsProcessed == _numInputExceptions) {
-                _inputExceptions[curInputPartitionInd]->unlock();
-                curInputPartitionInd++;
-                // Still have more exceptions
-                if (curInputPartitionInd < _inputExceptions.size() && inputRowsProcessed < _numInputExceptions) {
-                    numInputRowsLeftInPartition = _inputExceptions[curInputPartitionInd]->getNumRows();
-                    inputPtr = _inputExceptions[curInputPartitionInd]->lock();
-                } else {
-                    // processed all exceptions
-                    inputPtr = nullptr;
+            generalRemaining--;
+            _generalCounter++;
+
+            if (generalRemaining == 0) {
+                _generalPartitions[curGeneralInd]->unlock();
+                _generalPartitions[curGeneralInd]->invalidate();
+                curGeneralInd++;
+                if (curGeneralInd < _generalPartitions.size()) {
+                    generalRemaining = _generalPartitions[curGeneralInd]->getNumRows();
+                    generalPtr = _generalPartitions[curGeneralInd]->lock();
+                }
+            }
+        }
+
+        while (_fallbackCounter < fallbackNumRows) {
+            const uint8_t *buf = nullptr;
+            int64_t ecCode = -1, operatorID = -1;
+            size_t eSize = 0;
+            auto delta = deserializeExceptionFromMemory(fallPtr, &ecCode, &operatorID, &_currentRowNumber, &buf, &eSize);
+            fallPtr += delta;
+
+            processExceptionRow(ecCode, operatorID, buf, eSize);
+            _rowNumber++;
+
+            fallbackRemaining--;
+            _fallbackCounter++;
+
+            if (fallbackRemaining == 0) {
+                _fallbackPartitions[curFallbackInd]->unlock();
+                _fallbackPartitions[curFallbackInd]->invalidate();
+                curFallbackInd++;
+                if (curFallbackInd < _fallbackPartitions.size()) {
+                    fallbackRemaining = _fallbackPartitions[curFallbackInd]->getNumRows();
+                    fallPtr = _fallbackPartitions[curFallbackInd]->lock();
                 }
             }
         }
@@ -1037,8 +1216,10 @@ namespace tuplex {
             _normalRowNumber++;
         }
 
-        if (!_partitions.empty())
+        if (!_partitions.empty()) {
             _partitions[_currentNormalPartitionIdx]->unlock();
+            _partitions[_currentNormalPartitionIdx]->invalidate();
+        }
 
         // merging is done, unlock the last partition & copy the others over.
         unlockAll();
@@ -1054,7 +1235,9 @@ namespace tuplex {
         // clear exceptions, because they have been resolved (or put to new exceptions!)
         // if task produced exceptions, they are stored in the IExceptionableTask class!
         // => no need to overwrite them, getter for iexceptionabletask has all info!
-        _runtimeExceptions.clear();
+        _exceptionPartitions.clear();
+        _generalPartitions.clear();
+        _fallbackPartitions.clear();
     }
 
     void ResolveTask::sendStatusToHistoryServer() {
@@ -1071,6 +1254,7 @@ namespace tuplex {
     void ResolveTask::unlockAll() {
         _mergedRowsSink.unlock();
         _generalCaseSink.unlock();
+        _fallbackSink.unlock();
 
         // unlock exceptionable task
         IExceptionableTask::unlockAll();
diff --git a/tuplex/core/src/physical/ResultSet.cc b/tuplex/core/src/physical/ResultSet.cc
index 0f7bf7319..153553f25 100644
--- a/tuplex/core/src/physical/ResultSet.cc
+++ b/tuplex/core/src/physical/ResultSet.cc
@@ -13,97 +13,175 @@
 
 namespace tuplex {
     ResultSet::ResultSet(const Schema& schema,
-            const std::vector<Partition*>& partitions,
-            const std::vector<Partition*>& exceptions,
-            const std::unordered_map<std::string, ExceptionInfo>& partitionToExceptionsMap,
-            const std::vector<std::tuple<size_t, PyObject*>> pyobjects,
+            const std::vector<Partition*>& normalPartitions,
+            const std::vector<Partition*>& generalPartitions,
+            const std::vector<Partition*>& fallbackPartitions,
+            const std::vector<PartitionGroup>& partitionGroups,
             int64_t maxRows) : ResultSet::ResultSet() {
-        for(Partition *p : partitions)
-            _partitions.push_back(p);
-
-        _pyobjects = std::deque<std::tuple<size_t, PyObject*>>(pyobjects.begin(), pyobjects.end());
-        _exceptions = exceptions;
-        _partitionToExceptionsMap = partitionToExceptionsMap;
-        _curRowCounter = 0;
+        for (const auto &group : partitionGroups)
+            _partitionGroups.push_back(group);
+
+        for (const auto &p : normalPartitions)
+            _remainingNormalPartitions.push_back(p);
+        for (const auto &p : generalPartitions)
+            _remainingGeneralPartitions.push_back(p);
+        for (const auto &p : fallbackPartitions)
+            _remainingFallbackPartitions.push_back(p);
+
+        _curNormalRowCounter = 0;
+        _curNormalByteCounter = 0;
+        _curGeneralRowCounter = 0;
+        _curGeneralByteCounter = 0;
+        _curFallbackRowCounter = 0;
+        _curFallbackByteCounter = 0;
+        _normalRowCounter = 0;
+        _generalRowCounter = 0;
+        _fallbackRowCounter = 0;
         _totalRowCounter = 0;
-        _byteCounter = 0;
+
         _schema = schema;
         _maxRows = maxRows < 0 ? std::numeric_limits<int64_t>::max() : maxRows;
-        _rowsRetrieved = 0;
     }
 
-    void ResultSet::clear() {
-        for(auto partition : _partitions)
-            partition->invalidate();
-        _partitions.clear();
-        for(auto partition : _exceptions)
+    void clearPartitions(std::list<Partition*>& partitions) {
+        for (auto &partition : partitions) {
             partition->invalidate();
+        }
+        partitions.clear();
+    }
 
-        _curRowCounter = 0;
-        _byteCounter = 0;
+    void ResultSet::clear() {
+        clearPartitions(_remainingNormalPartitions);
+        clearPartitions(_currentNormalPartitions);
+        clearPartitions(_remainingGeneralPartitions);
+        clearPartitions(_currentGeneralPartitions);
+        clearPartitions(_remainingFallbackPartitions);
+        clearPartitions(_currentFallbackPartitions);
+        _partitionGroups.clear();
+
+        _curNormalRowCounter = 0;
+        _curNormalByteCounter = 0;
+        _curGeneralRowCounter = 0;
+        _curGeneralByteCounter = 0;
+        _curFallbackRowCounter = 0;
+        _curFallbackByteCounter = 0;
+        _normalRowCounter = 0;
+        _generalRowCounter = 0;
+        _fallbackRowCounter = 0;
+        _totalRowCounter = 0;
         _maxRows = 0;
-        _rowsRetrieved = 0;
     }
 
-    bool ResultSet::hasNextRow() {
-
+    bool ResultSet::hasNextNormalPartition() const {
         // all rows already retrieved?
-        if(_rowsRetrieved >= _maxRows)
+        if (_totalRowCounter >= _maxRows)
             return false;
 
         // empty?
-        if(_partitions.empty() && _pyobjects.empty())
+        if (_currentNormalPartitions.empty() && _remainingNormalPartitions.empty()) {
             return false;
-        else {
-            // partitions empty?
-            if(_partitions.empty())
-                return true;
-            else if(_pyobjects.empty()) {
-                assert(_partitions.size() > 0);
-                assert(_partitions.front());
-
-                // still one row left?
-                return _curRowCounter < _partitions.front()->getNumRows();
-            } else {
-                return true; // there's for sure at least one object left!
-            }
+        } else if (!_currentNormalPartitions.empty()) {
+            return _curNormalRowCounter < _currentNormalPartitions.front()->getNumRows();
+        } else {
+            return _remainingNormalPartitions.front()->getNumRows() > 0;
         }
-
     }
 
+    bool ResultSet::hasNextGeneralPartition() const {
+        // all rows already retrieved?
+        if (_totalRowCounter >= _maxRows)
+            return false;
+
+        // empty?
+        if (_currentGeneralPartitions.empty() && _remainingGeneralPartitions.empty()) {
+            return false;
+        } else if (!_currentGeneralPartitions.empty()) {
+            return _curGeneralRowCounter < _currentGeneralPartitions.front()->getNumRows();
+        } else {
+            return _remainingGeneralPartitions.front()->getNumRows() > 0;
+        }
+    }
 
-    bool ResultSet::hasNextPartition() const {
+    bool ResultSet::hasNextFallbackPartition() const {
         // all rows already retrieved?
-        if(_rowsRetrieved >= _maxRows)
+        if (_totalRowCounter >= _maxRows)
             return false;
 
         // empty?
-        if(_partitions.empty())
+        if (_currentFallbackPartitions.empty() && _remainingFallbackPartitions.empty()) {
             return false;
-        else {
-            assert(_partitions.size() > 0);
-            assert(_partitions.front());
+        } else if (!_currentFallbackPartitions.empty()) {
+            return _curFallbackRowCounter < _currentFallbackPartitions.front()->getNumRows();
+        } else {
+            return _remainingFallbackPartitions.front()->getNumRows() > 0;
+        }
+    }
+
+    Partition* ResultSet::getNextGeneralPartition() {
+        if (_currentGeneralPartitions.empty() && _remainingGeneralPartitions.empty())
+            return nullptr;
 
-            // still one row left?
-            return _curRowCounter < _partitions.front()->getNumRows();
+        Partition *first = nullptr;
+        if (!_currentGeneralPartitions.empty()) {
+            first = _currentGeneralPartitions.front();
+            _currentGeneralPartitions.pop_front();
+        } else {
+            first = _remainingGeneralPartitions.front();
+            _remainingGeneralPartitions.pop_front();
         }
+
+        auto numRows = first->getNumRows();
+        _totalRowCounter += numRows;
+        _generalRowCounter += numRows;
+
+        _curGeneralRowCounter = 0;
+        _curGeneralByteCounter = 0;
+
+        return first;
     }
 
-    Partition* ResultSet::getNextPartition() {
-        if(_partitions.empty())
+    Partition* ResultSet::getNextFallbackPartition() {
+        if (_currentFallbackPartitions.empty() && _remainingFallbackPartitions.empty())
             return nullptr;
 
-        assert(_partitions.size() > 0);
+        Partition *first = nullptr;
+        if (!_currentFallbackPartitions.empty()) {
+            first = _currentFallbackPartitions.front();
+            _currentFallbackPartitions.pop_front();
+        } else {
+            first = _remainingFallbackPartitions.front();
+            _remainingFallbackPartitions.pop_front();
+        }
+
+        auto numRows = first->getNumRows();
+        _totalRowCounter += numRows;
+        _fallbackRowCounter += numRows;
 
-        Partition *first = _partitions.front();
-        assert(_schema == first->schema());
+        _curFallbackRowCounter = 0;
+        _curFallbackByteCounter = 0;
+
+        return first;
+    }
+
+    Partition* ResultSet::getNextNormalPartition() {
+        if (_currentNormalPartitions.empty() && _remainingNormalPartitions.empty())
+            return nullptr;
+
+        Partition *first = nullptr;
+        if (!_currentNormalPartitions.empty()) {
+            first = _currentNormalPartitions.front();
+            _currentNormalPartitions.pop_front();
+        } else {
+            first = _remainingNormalPartitions.front();
+            _remainingNormalPartitions.pop_front();
+        }
 
         auto numRows = first->getNumRows();
-        _rowsRetrieved += numRows;
+        _totalRowCounter += numRows;
+        _normalRowCounter += numRows;
 
-        _partitions.pop_front();
-        _curRowCounter = 0;
-        _byteCounter = 0;
+        _curNormalRowCounter = 0;
+        _curNormalByteCounter = 0;
 
         return first;
     }
@@ -121,23 +199,25 @@ namespace tuplex {
             v.reserve(limit);
 
         // do a quick check whether there are ANY pyobjects, if not deserialize quickly!
-        if(_pyobjects.empty()) {
-
-            if(_partitions.empty())
+        if(_currentGeneralPartitions.empty() && _remainingGeneralPartitions.empty() && _currentFallbackPartitions.empty() && _remainingFallbackPartitions.empty()) {
+            if (_currentNormalPartitions.empty() && _remainingNormalPartitions.empty())
                 return vector<Row>{};
 
+            for (const auto &p : _remainingNormalPartitions)
+                _currentNormalPartitions.push_back(p);
+
             Deserializer ds(_schema);
             for(int i = 0; i < limit;) {
 
                 // all exhausted
-                if(_partitions.empty())
+                if(_currentNormalPartitions.empty())
                     break;
 
                 // get number of rows in first partition
-                Partition *first = _partitions.front();
+                Partition *first = _currentNormalPartitions.front();
                 auto num_rows = first->getNumRows();
                 // how many left to retrieve?
-                auto num_to_retrieve_from_partition = std::min(limit - i, num_rows - _curRowCounter);
+                auto num_to_retrieve_from_partition = std::min(limit - i, num_rows - _curNormalRowCounter);
                 if(num_to_retrieve_from_partition <= 0)
                     break;
 
@@ -148,11 +228,11 @@ namespace tuplex {
                 // get next element of partition
                 const uint8_t* ptr = first->lock();
                 for(int j = 0; j < num_to_retrieve_from_partition; ++j) {
-                    auto row = Row::fromMemory(ds, ptr + _byteCounter, first->capacity() - _byteCounter);
-                    _byteCounter += row.serializedLength();
-                    _curRowCounter++;
-                    _rowsRetrieved++;
+                    auto row = Row::fromMemory(ds, ptr + _curNormalByteCounter, first->capacity() - _curNormalByteCounter);
+                    _curNormalByteCounter += row.serializedLength();
+                    _curNormalRowCounter++;
                     _totalRowCounter++;
+                    _normalRowCounter++;
                     v.push_back(row);
                 }
 
@@ -163,17 +243,13 @@ namespace tuplex {
                 i += num_to_retrieve_from_partition;
 
                 // get next Partition ready when current one is exhausted
-                if(_curRowCounter == first->getNumRows())
-                    removeFirstPartition();
+                if(_curNormalRowCounter == first->getNumRows())
+                    removeFirstNormalPartition();
             }
 
             v.shrink_to_fit();
             return v;
         } else {
-            // fallback solution:
-            // @TODO: write faster version with proper merging!
-
-             std::vector<Row> v;
              while (hasNextRow() && v.size() < limit) {
                  v.push_back(getNextRow());
              }
@@ -182,81 +258,252 @@ namespace tuplex {
         }
     }
 
-    Row ResultSet::getNextRow() {
-        // merge rows from objects
-        if(!_pyobjects.empty()) {
-            auto row_number = std::get<0>(_pyobjects.front());
-            auto obj = std::get<1>(_pyobjects.front());
-
-            // partitions empty?
-            // => simply return next row. no fancy merging possible
-            // else merge based on row number.
-            if(_partitions.empty() || row_number <= _totalRowCounter) {
-                // merge
-                python::lockGIL();
-                auto row = python::pythonToRow(obj);
-                python::unlockGIL();
-                _pyobjects.pop_front();
-                _rowsRetrieved++;
-
-                // update row counter (not for double indices which could occur from flatMap!)
-                if(_pyobjects.empty())
-                    _totalRowCounter++;
-                else {
-                    auto next_row_number = std::get<0>(_pyobjects.front());
-                    if(next_row_number != row_number)
-                        _totalRowCounter++;
-                }
+    bool ResultSet::hasNextNormalRow() {
+        if (!_currentNormalPartitions.empty() && _curNormalRowCounter < _currentNormalPartitions.front()->getNumRows())
+            return true;
+        for (const auto &p : _remainingNormalPartitions)
+            if (p->getNumRows() > 0)
+                return true;
+        return false;
+    }
+
+    bool ResultSet::hasNextGeneralRow() {
+        if (!_currentGeneralPartitions.empty() && _curGeneralRowCounter < _currentGeneralPartitions.front()->getNumRows())
+            return true;
+        for (const auto &p : _remainingGeneralPartitions)
+            if (p->getNumRows() > 0)
+                return true;
+        return false;
+    }
+
+    bool ResultSet::hasNextFallbackRow() {
+        if (!_currentFallbackPartitions.empty() && _curFallbackRowCounter < _currentFallbackPartitions.front()->getNumRows())
+            return true;
+        for (const auto &p : _remainingFallbackPartitions)
+            if (p->getNumRows() > 0)
+                return true;
+        return false;
+    }
+
+    bool ResultSet::hasNextRow() {
+        // all rows already retrieved?
+        if(_totalRowCounter >= _maxRows)
+            return false;
+
+        return hasNextNormalRow() || hasNextGeneralRow() || hasNextFallbackRow();
+    }
 
-                return row;
+    Row ResultSet::getNextRow() {
+        if (_currentNormalPartitions.empty() && _currentFallbackPartitions.empty() && _currentGeneralPartitions.empty()) {
+            // all partitions are exhausted return empty row as default value
+            if (_partitionGroups.empty())
+                return Row();
+            _normalRowCounter = 0;
+            _generalRowCounter = 0;
+            _fallbackRowCounter = 0;
+            auto group = _partitionGroups.front();
+            _partitionGroups.pop_front();
+            for (int i = group.normalPartitionStartInd; i < group.normalPartitionStartInd + group.numNormalPartitions; ++i) {
+                _currentNormalPartitions.push_back(_remainingNormalPartitions.front());
+                _remainingNormalPartitions.pop_front();
+            }
+            for (int i = group.generalPartitionStartInd; i < group.generalPartitionStartInd + group.numGeneralPartitions; ++i) {
+                _currentGeneralPartitions.push_back(_remainingGeneralPartitions.front());
+                _remainingGeneralPartitions.pop_front();
+            }
+            for (int i = group.fallbackPartitionStartInd; i < group.fallbackPartitionStartInd + group.numFallbackPartitions; ++i) {
+                _currentFallbackPartitions.push_back(_remainingFallbackPartitions.front());
+                _remainingFallbackPartitions.pop_front();
+            }
+            return getNextRow();
+        } else if (_currentNormalPartitions.empty() && _currentFallbackPartitions.empty()) {
+            // only general rows remain, return next general row
+            return getNextGeneralRow();
+        } else if (_currentNormalPartitions.empty() && _currentGeneralPartitions.empty()) {
+            // only fallback rows remain, return next fallback row
+            return getNextFallbackRow();
+        } else if (_currentFallbackPartitions.empty() && _currentGeneralPartitions.empty()) {
+            // only normal rows remain, return next normal row
+            return getNextNormalRow();
+        } else if (_currentFallbackPartitions.empty()) {
+            // only normal and general rows remain, compare row index
+            // emit normal rows until reached current general ind
+            if (_normalRowCounter + _generalRowCounter < currentGeneralRowInd()) {
+                return getNextNormalRow();
+            } else {
+                return getNextGeneralRow();
+            }
+        }  else if (_currentGeneralPartitions.empty()) {
+            // only normal and fallback rows remain, compare row index
+            // emit normal rows until reached current fallback ind
+            if (_normalRowCounter + _generalRowCounter + _fallbackRowCounter < currentFallbackRowInd()) {
+                return getNextNormalRow();
+            } else {
+                return getNextFallbackRow();
+            }
+        } else {
+            // all three cases remain, three way row comparison
+            auto generalRowInd = currentGeneralRowInd();
+            auto fallbackRowInd = currentFallbackRowInd();
+            if (_normalRowCounter + _generalRowCounter < generalRowInd && _normalRowCounter + _generalRowCounter + _fallbackRowCounter < fallbackRowInd) {
+                return getNextNormalRow();
+            } else if (generalRowInd <= fallbackRowInd) {
+                return getNextGeneralRow();
+            } else {
+                return getNextFallbackRow();
             }
         }
+    }
 
-        // check whether entry is available, else return empty row
-        if(_partitions.empty())
-            return Row();
+    int64_t ResultSet::currentFallbackRowInd() {
+        assert(!_currentFallbackPartitions.empty());
+        auto p = _currentFallbackPartitions.front();
+        auto ptr = p->lock() + _curFallbackByteCounter;
+        auto rowInd = *((int64_t*) ptr);
+        p->unlock();
+        return rowInd;
+    }
 
-        assert(_partitions.size() > 0);
-        Partition *first = _partitions.front();
+    int64_t ResultSet::currentGeneralRowInd() {
+        assert(!_currentGeneralPartitions.empty());
+        auto p = _currentGeneralPartitions.front();
+        auto ptr = p->lock() + _curGeneralByteCounter;
+        auto rowInd = *((int64_t*) ptr);
+        p->unlock();
+        return rowInd;
+    }
 
-        // make sure partition schema matches stored schema
-        assert(_schema == first->schema());
+    Row ResultSet::getNextNormalRow() {
+        assert (!_currentNormalPartitions.empty());
+        auto p = _currentNormalPartitions.front();
+        assert(_schema == p->schema());
 
-        Row row;
+        auto ptr = p->lock() + _curNormalByteCounter;
+        auto capacity = p->capacity() - _curNormalByteCounter;
+        auto row = Row::fromMemory(_schema, ptr, capacity);
+        p->unlock();
 
-        // thread safe version (slow)
-        // get next element of partition
-        const uint8_t* ptr = first->lock();
+        _curNormalByteCounter += row.serializedLength();
+        _curNormalRowCounter++;
+        _totalRowCounter++;
+        _normalRowCounter++;
 
-        row = Row::fromMemory(_schema, ptr + _byteCounter, first->capacity() - _byteCounter);
+        if (_curNormalRowCounter == p->getNumRows()) {
+            removeFirstNormalPartition();
+        }
 
-        // thread safe version (slow)
-        // deserialize
-        first->unlock();
+        return row;
+    }
+
+    Row ResultSet::getNextGeneralRow() {
+        assert (!_currentGeneralPartitions.empty());
+        auto p = _currentGeneralPartitions.front();
+        assert(_schema == p->schema());
+
+        auto prevRowInd = currentGeneralRowInd();
+        _curGeneralByteCounter += 4 * sizeof(int64_t);
+        auto ptr = p->lock() + _curGeneralByteCounter;
+        auto capacity = p->capacity() - _curGeneralByteCounter;
+        auto row = Row::fromMemory(_schema, ptr, capacity);
+        p->unlock();
+
+        _curGeneralByteCounter += row.serializedLength();
+        _curGeneralRowCounter++;
+
+        if (_curGeneralRowCounter == p->getNumRows()) {
+            removeFirstGeneralPartition();
+        }
 
-        _byteCounter += row.serializedLength();
-        _curRowCounter++;
-        _rowsRetrieved++;
         _totalRowCounter++;
+        if (_currentGeneralPartitions.empty() || currentGeneralRowInd() > prevRowInd) {
+            _generalRowCounter++;
+        }
+
+        return row;
+    }
+
+    Row ResultSet::getNextFallbackRow() {
+        assert (!_currentFallbackPartitions.empty());
+
+        auto prevRowInd = currentFallbackRowInd();
+        auto p = _currentFallbackPartitions.front();
+        auto ptr = p->lock() + _curFallbackByteCounter;
+        auto pyObjectSize = ((int64_t *) ptr)[3]; ptr += 4 * sizeof(int64_t);
+
+        python::lockGIL();
+        auto row = python::pythonToRow(python::deserializePickledObject(python::getMainModule(), (char *) ptr, pyObjectSize));
+        python::unlockGIL();
+
+        p->unlock();
+
+        _curFallbackByteCounter += pyObjectSize + 4*sizeof(int64_t);
+        _curFallbackRowCounter++;
+
+        if (_curFallbackRowCounter == p->getNumRows()) {
+            removeFirstFallbackPartition();
+        }
 
-        // get next Partition ready when current one is exhausted
-        if(_curRowCounter == first->getNumRows())
-            removeFirstPartition();
+        _totalRowCounter++;
+        if (_currentFallbackPartitions.empty() || currentFallbackRowInd() > prevRowInd) {
+            _fallbackRowCounter++;
+        }
 
         return row;
     }
 
     size_t ResultSet::rowCount() const {
         size_t count = 0;
-        for(const auto& partition : _partitions) {
+        for (const auto& partition : _currentNormalPartitions)
             count += partition->getNumRows();
-        }
-        return count + _pyobjects.size();
+        for (const auto& partition : _remainingNormalPartitions)
+            count += partition->getNumRows();
+        for (const auto& partition : _currentGeneralPartitions)
+            count += partition->getNumRows();
+        for (const auto& partition : _remainingGeneralPartitions)
+            count += partition->getNumRows();
+        for (const auto& partition : _currentFallbackPartitions)
+            count += partition->getNumRows();
+        for (const auto& partition : _remainingFallbackPartitions)
+            count += partition->getNumRows();
+        return count;
     }
 
-    void ResultSet::removeFirstPartition() {
-        assert(_partitions.size() > 0);
-        Partition *first = _partitions.front();
+    void ResultSet::removeFirstGeneralPartition() {
+        assert(!_currentGeneralPartitions.empty());
+        Partition *first = _currentGeneralPartitions.front();
+        assert(first);
+
+        // invalidate partition
+#ifndef NDEBUG
+        Logger::instance().defaultLogger().info("ResultSet invalidates partition " + hexAddr(first) + " uuid " + uuidToString(first->uuid()));
+#endif
+        first->invalidate();
+
+        _currentGeneralPartitions.pop_front();
+        _curGeneralRowCounter = 0;
+        _curGeneralByteCounter = 0;
+    }
+
+    void ResultSet::removeFirstFallbackPartition() {
+        assert(!_currentFallbackPartitions.empty());
+        Partition *first = _currentFallbackPartitions.front();
+        assert(first);
+
+        // invalidate partition
+#ifndef NDEBUG
+        Logger::instance().defaultLogger().info("ResultSet invalidates partition " + hexAddr(first) + " uuid " + uuidToString(first->uuid()));
+#endif
+        first->invalidate();
+
+        // remove partition (is now processed)
+        _currentFallbackPartitions.pop_front();
+        _curFallbackRowCounter = 0;
+        _curFallbackByteCounter = 0;
+    }
+
+    void ResultSet::removeFirstNormalPartition() {
+        assert(!_currentNormalPartitions.empty());
+        Partition *first = _currentNormalPartitions.front();
         assert(first);
 
         // invalidate partition
@@ -266,8 +513,18 @@ namespace tuplex {
         first->invalidate();
 
         // remove partition (is now processed)
-        _partitions.pop_front();
-        _curRowCounter = 0;
-        _byteCounter = 0;
+
+        _currentNormalPartitions.pop_front();
+        _curNormalRowCounter = 0;
+        _curNormalByteCounter = 0;
+    }
+
+    size_t ResultSet::fallbackRowCount() const {
+        size_t count = 0;
+        for (const auto &p : _currentFallbackPartitions)
+            count += p->getNumRows();
+        for (const auto&p : _remainingFallbackPartitions)
+            count += p->getNumRows();
+        return count;
     }
 }
\ No newline at end of file
diff --git a/tuplex/core/src/physical/StageBuilder.cc b/tuplex/core/src/physical/StageBuilder.cc
index 72f01e2b8..12e3b24a6 100644
--- a/tuplex/core/src/physical/StageBuilder.cc
+++ b/tuplex/core/src/physical/StageBuilder.cc
@@ -46,10 +46,11 @@ namespace tuplex {
                                    double normalCaseThreshold,
                                    bool sharedObjectPropagation,
                                    bool nullValueOptimization,
-                                   bool updateInputExceptions)
+                                   bool updateInputExceptions,
+                                   bool incrementalResolution)
                 : _stageNumber(stage_number), _isRootStage(rootStage), _allowUndefinedBehavior(allowUndefinedBehavior),
                   _generateParser(generateParser), _normalCaseThreshold(normalCaseThreshold), _sharedObjectPropagation(sharedObjectPropagation),
-                  _nullValueOptimization(nullValueOptimization), _updateInputExceptions(updateInputExceptions),
+                  _nullValueOptimization(nullValueOptimization), _updateInputExceptions(updateInputExceptions), _incrementalResolution(incrementalResolution),
                   _inputNode(nullptr), _outputLimit(std::numeric_limits<size_t>::max()) {
         }
 
@@ -1051,7 +1052,7 @@ namespace tuplex {
             bool requireSlowPath = _nullValueOptimization; // per default, slow path is always required when null-value opt is enabled.
 
             // special case: input source is cached and no exceptions happened => no resolve path necessary if there are no resolvers!
-            if(_inputNode->type() == LogicalOperatorType::CACHE && dynamic_cast<CacheOperator*>(_inputNode)->cachedExceptions().empty())
+            if(_inputNode->type() == LogicalOperatorType::CACHE && dynamic_cast<CacheOperator *>(_inputNode)->cachedGeneralPartitions().empty() && dynamic_cast<CacheOperator *>(_inputNode)->cachedFallbackPartitions().empty())
                 requireSlowPath = false;
 
             if (numResolveOperators > 0 || requireSlowPath) {
@@ -1443,7 +1444,9 @@ namespace tuplex {
             stage->_irBitCode = _irBitCode;
             stage->_pyCode = _pyCode;
             stage->_pyPipelineName = _pyPipelineName;
+
             stage->_updateInputExceptions = _updateInputExceptions;
+            stage->_incrementalResolution = _incrementalResolution;
 
             // if last op is CacheOperator, check whether normal/exceptional case should get cached separately
             // or an upcasting step should be performed.
diff --git a/tuplex/core/src/physical/TransformStage.cc b/tuplex/core/src/physical/TransformStage.cc
index b61f9cbe2..0be4d1a45 100644
--- a/tuplex/core/src/physical/TransformStage.cc
+++ b/tuplex/core/src/physical/TransformStage.cc
@@ -110,7 +110,22 @@ namespace tuplex {
         int64_t dataSetID = 0; // no ID here
         _inputPartitions = rowsToPartitions(backend()->driver(), dataSetID, context().id(), rows);
     }
+    void TransformStage::setIncrementalResult(const std::vector<Partition*>& normalPartitions,
+                              const std::vector<Partition*>& exceptionPartitions,
+                              const std::vector<PartitionGroup>& partitionGroups) {
+        auto pipeline = PhysicalStage::plan()->originalLogicalPlan()->getAction();
+        auto cacheEntry = new IncrementalCacheEntry(pipeline, normalPartitions, exceptionPartitions, partitionGroups);
+        PhysicalStage::plan()->getContext().getIncrementalCache()->addEntry(IncrementalCache::newKey(pipeline), cacheEntry);
+    }
 
+    void TransformStage::setIncrementalResult(const std::vector<Partition*>& exceptionPartitions,
+                                              const std::vector<Partition*>& generalPartitions,
+                                              const std::vector<Partition*>& fallbackPartitions,
+                                              size_t startFileNumber) {
+        auto pipeline = PhysicalStage::plan()->originalLogicalPlan()->getAction();
+        auto cacheEntry = new IncrementalCacheEntry(pipeline, exceptionPartitions, generalPartitions, fallbackPartitions, startFileNumber);
+        PhysicalStage::plan()->getContext().getIncrementalCache()->addEntry(IncrementalCache::newKey(pipeline), cacheEntry);
+    }
 
     void TransformStage::setFileResult(const std::unordered_map<std::tuple<int64_t, ExceptionCode>, size_t> &ecounts) {
         setExceptionCounts(ecounts);
@@ -118,29 +133,28 @@ namespace tuplex {
         _rs = emptyResultSet();
     }
 
-    void TransformStage::setMemoryResult(const std::vector<Partition *> &partitions,
-                                         const std::vector<Partition*>& generalCase,
-                                         const std::unordered_map<std::string, ExceptionInfo>& partitionToExceptionsMap,
-                                         const std::vector<std::tuple<size_t, PyObject*>>& interpreterRows,
-                                         const std::vector<Partition*>& remainingExceptions,
-                                         const std::unordered_map<std::tuple<int64_t, ExceptionCode>, size_t> &ecounts) {
-        setExceptionCounts(ecounts);
+    void TransformStage::setMemoryResult(const std::vector<Partition*>& normalPartitions,
+                                         const std::vector<Partition*>& generalPartitions,
+                                         const std::vector<Partition*>& fallbackPartitions,
+                                         const std::vector<PartitionGroup>& partitionGroups,
+                                         const std::unordered_map<std::tuple<int64_t, ExceptionCode>, size_t>& exceptionCounts) {
+        setExceptionCounts(exceptionCounts);
 
-        if (partitions.empty() && interpreterRows.empty() && generalCase.empty())
+        if (normalPartitions.empty() && generalPartitions.empty() && fallbackPartitions.empty())
             _rs = emptyResultSet();
         else {
             std::vector<Partition *> limitedPartitions;
             auto schema = Schema::UNKNOWN;
 
-            if(!partitions.empty()) {
-                schema = partitions.front()->schema();
-                for (auto partition : partitions) {
+            if(!normalPartitions.empty()) {
+                schema = normalPartitions.front()->schema();
+                for (auto partition : normalPartitions) {
                     assert(schema == partition->schema());
                 }
 
                 // check output limit, adjust partitions if necessary
                 size_t numOutputRows = 0;
-                for (auto partition : partitions) {
+                for (auto partition : normalPartitions) {
                     numOutputRows += partition->getNumRows();
                     if (numOutputRows >= outputLimit()) {
                         // clip last partition & leave loop
@@ -157,10 +171,7 @@ namespace tuplex {
                 }
             }
 
-            // put ALL partitions to result set
-            _rs = std::make_shared<ResultSet>(schema, limitedPartitions,
-                                              generalCase, partitionToExceptionsMap, interpreterRows,
-                                              outputLimit());
+            _rs = std::make_shared<ResultSet>(schema, limitedPartitions, generalPartitions, fallbackPartitions, partitionGroups, outputLimit());
         }
     }
 
@@ -654,7 +665,7 @@ namespace tuplex {
                     }
                     case EndPointMode::MEMORY:
                     case EndPointMode::FILE: {
-                        auto p = stage->resultSet()->partitions();
+                        auto p = stage->resultSet()->normalPartitions();
                         std::copy(std::begin(p), std::end(p), std::back_inserter(partitions));
                         break;
                     }
diff --git a/tuplex/core/src/physical/TransformTask.cc b/tuplex/core/src/physical/TransformTask.cc
index c560c4af4..1d24d6fdc 100644
--- a/tuplex/core/src/physical/TransformTask.cc
+++ b/tuplex/core/src/physical/TransformTask.cc
@@ -543,43 +543,65 @@ namespace tuplex {
 
         auto functor = reinterpret_cast<codegen::read_block_exp_f>(_functor);
 
-        auto numInputExceptions = _inputExceptionInfo.numExceptions;
-        auto inputExceptionIndex = _inputExceptionInfo.exceptionIndex;
-        auto inputExceptionRowOffset = _inputExceptionInfo.exceptionRowOffset;
-        auto inputExceptionByteOffset = _inputExceptionInfo.exceptionByteOffset;
-
-        // First, prepare the input exception partitions to pass into the code-gen
-        // This is done to simplify the LLVM code. We will end up passing it an
-        // array of expPtrs which point to the first exception in their partition
-        // and expPtrSizes which tell how many exceptions are in that partition.
-        auto arrSize = _inputExceptions.size() - inputExceptionIndex;
-        auto expPtrs = new uint8_t*[arrSize];
-        auto expPtrSizes = new int64_t[arrSize];
-        int expInd = 0;
-        // Iterate through all exception partitions beginning at the one specified by the starting index
-        for (int i = inputExceptionIndex; i < _inputExceptions.size(); ++i) {
-            auto numRows = _inputExceptions[i]->getNumRows();
-            auto ptr = _inputExceptions[i]->lock();
-            // If its the first partition, we need to account for the offset
-            if (i == inputExceptionIndex) {
-                numRows -= inputExceptionRowOffset;
-                ptr += inputExceptionByteOffset;
-            }
-            expPtrSizes[expInd] = numRows;
-            expPtrs[expInd] = (uint8_t *) ptr;
-            expInd++;
-        }
+        int64_t totalNormalRowCounter = 0;
+        int64_t totalGeneralRowCounter = 0;
+        int64_t totalFallbackRowCounter = 0;
+        int64_t totalFilterCounter = 0;
+
+        uint8_t **generalPartitions = new uint8_t*[_generalPartitions.size()];
+        for (int i = 0; i < _generalPartitions.size(); ++i)
+            generalPartitions[i] = _generalPartitions[i]->lockWriteRaw();
+        int64_t numGeneralPartitions = _generalPartitions.size();
+        int64_t generalIndexOffset = 0;
+        int64_t generalRowOffset = 0;
+        int64_t generalByteOffset = 0;
+
+        uint8_t **fallbackPartitions = new uint8_t*[_fallbackPartitions.size()];
+        for (int i = 0; i < _fallbackPartitions.size(); ++i)
+            fallbackPartitions[i] = _fallbackPartitions[i]->lockWriteRaw();
+        int64_t numFallbackPartitions = _fallbackPartitions.size();
+        int64_t fallbackIndexOffset = 0;
+        int64_t fallbackRowOffset = 0;
+        int64_t fallbackByteOffset = 0;
 
         // go over all input partitions.
-        for(auto inputPartition : _inputPartitions) {
+        for(auto &inputPartition : _inputPartitions) {
             // lock ptr, extract number of rows ==> store them
             // lock raw & call functor!
             int64_t inSize = inputPartition->size();
             const uint8_t *inPtr = inputPartition->lockRaw();
             _numInputRowsRead += static_cast<size_t>(*((int64_t*)inPtr));
+//
+//            int64_t totalNormalRowCounter = 0;
+//            int64_t totalGeneralRowCounter = 0;
+//            int64_t totalFallbackRowCounter = 0;
+//
+//            int64_t g1[] = {2,
+//                            1, -1, -1, 8, -1,
+//                            2, -1, -1, 8, -1};
+//            int64_t g2[] = {1,
+//                        3, -1, -1, 8, -1};
+//            int64_t g3[] = {2,
+//                            5, -1, -1, 8, -1,
+//                            6, -1, -1, 8, -1};
+//            uint8_t *generalPartitions[] = {(uint8_t*)g1, (uint8_t*)g2, (uint8_t*)g3};
+//            int64_t numGeneralPartitions = 3;
+//            int64_t generalIndexOffset = 0;
+//            int64_t generalRowOffset = 0;
+//            int64_t generalByteOffset = 0;
+//
+//            int64_t f1[] = {1, 2, 3};
+//            uint8_t *fallbackPartitions[] = {};
+//            int64_t numFallbackPartitions = 0;
+//            int64_t fallbackIndexOffset = 0;
+//            int64_t fallbackRowOffset = 0;
+//            int64_t fallbackByteOffset = 0;
 
             // call functor
-            auto bytesParsed = functor(this, inPtr, inSize, expPtrs, expPtrSizes, numInputExceptions, &num_normal_rows, &num_bad_rows, false);
+            auto bytesParsed = functor(this, inPtr, inSize, &num_normal_rows, &num_bad_rows, false,
+                                       &totalFilterCounter, &totalNormalRowCounter, &totalGeneralRowCounter, &totalFallbackRowCounter,
+                                       generalPartitions, numGeneralPartitions, &generalIndexOffset, &generalRowOffset, &generalByteOffset,
+                                       fallbackPartitions, numFallbackPartitions, &fallbackIndexOffset, &fallbackRowOffset, &fallbackByteOffset);
 
             // save number of normal rows to output rows written if not writeTofile
             if(hasMemorySink())
@@ -595,13 +617,55 @@ namespace tuplex {
                 inputPartition->invalidate();
         }
 
-        delete[] expPtrs;
-        delete[] expPtrSizes;
+        if (generalIndexOffset < numGeneralPartitions) {
+            auto curGeneralPtr = generalPartitions[generalIndexOffset];
+            auto numRowsInPartition = *((int64_t*)curGeneralPtr);
+            curGeneralPtr += sizeof(int64_t) + generalByteOffset;
+            while (generalRowOffset < numRowsInPartition) {
+                *((int64_t*)curGeneralPtr) -= totalFilterCounter;
+                curGeneralPtr += 4 * sizeof(int64_t) + ((int64_t*)curGeneralPtr)[3];
+                generalRowOffset += 1;
+
+                if (generalRowOffset == numRowsInPartition && generalIndexOffset < numGeneralPartitions - 1) {
+                    generalIndexOffset += 1;
+                    curGeneralPtr = generalPartitions[generalIndexOffset];
+                    numRowsInPartition = *((int64_t*)curGeneralPtr);
+                    curGeneralPtr += sizeof(int64_t);
+                    generalByteOffset = 0;
+                    generalRowOffset = 0;
+                }
+            }
+        }
 
-        for (int i = inputExceptionIndex; i < _inputExceptions.size(); ++i) {
-            _inputExceptions[i]->unlock();
+        if (fallbackIndexOffset < numFallbackPartitions) {
+            auto curFallbackPtr = fallbackPartitions[fallbackIndexOffset];
+            auto numRowsInPartition = *((int64_t*)curFallbackPtr);
+            curFallbackPtr += sizeof(int64_t) + fallbackByteOffset;
+            while (fallbackRowOffset < numRowsInPartition) {
+                *((int64_t*)curFallbackPtr) -= totalFilterCounter;
+                curFallbackPtr += 4 * sizeof(int64_t) + ((int64_t*)curFallbackPtr)[3];
+                fallbackRowOffset += 1;
+
+                if (fallbackRowOffset == numRowsInPartition && fallbackIndexOffset < numFallbackPartitions - 1) {
+                    fallbackIndexOffset += 1;
+                    curFallbackPtr = fallbackPartitions[fallbackIndexOffset];
+                    numRowsInPartition = *((int64_t*)curFallbackPtr);
+                    curFallbackPtr += sizeof(int64_t);
+                    fallbackByteOffset = 0;
+                    fallbackRowOffset = 0;
+                }
+            }
         }
 
+        for (auto & _generalPartition : _generalPartitions)
+            _generalPartition->unlockWrite();
+
+        for (auto & _fallbackPartition : _fallbackPartitions)
+            _fallbackPartition->unlockWrite();
+
+        delete[] fallbackPartitions;
+        delete[] generalPartitions;
+
 #ifndef NDEBUG
         owner()->info("Trafo task memory source exhausted (" + pluralize(_inputPartitions.size(), "partition") + ", "
                       + pluralize(num_normal_rows, "normal row") + ", " + pluralize(num_bad_rows, "exceptional row") + ")");
diff --git a/tuplex/python/include/PythonContext.h b/tuplex/python/include/PythonContext.h
index 66e87523b..b3888f342 100644
--- a/tuplex/python/include/PythonContext.h
+++ b/tuplex/python/include/PythonContext.h
@@ -117,6 +117,8 @@ namespace tuplex {
             pds.wrap(&_context->makeError(message));
             return pds;
         }
+
+        std::vector<Partition*> serializeFallbackRows(const std::vector<std::tuple<size_t, PyObject*>>& fallbackRows);
     public:
 
         /*!
diff --git a/tuplex/python/include/PythonDataSet.h b/tuplex/python/include/PythonDataSet.h
index 665d68856..14eff690c 100644
--- a/tuplex/python/include/PythonDataSet.h
+++ b/tuplex/python/include/PythonDataSet.h
@@ -140,7 +140,8 @@ namespace tuplex {
               size_t shardSize=0,
               size_t limit=std::numeric_limits<size_t>::max(),
               const std::string& null_value="",
-              py::object header=py::none());
+              py::object header=py::none(),
+              const bool commit=true);
 
         /*!
          * save dataset to one or more orc files. Triggers execution of pipeline.
diff --git a/tuplex/python/src/PythonContext.cc b/tuplex/python/src/PythonContext.cc
index b70be013a..6425cdad2 100644
--- a/tuplex/python/src/PythonContext.cc
+++ b/tuplex/python/src/PythonContext.cc
@@ -39,35 +39,35 @@ namespace tuplex {
 
         Schema schema(Schema::MemoryLayout::ROW, python::Type::makeTupleType({python::Type::F64}));
 
-        std::vector<std::tuple<size_t, PyObject*>> badParallelizeObjects;
-        std::vector<size_t> numExceptionsInPartition;
-        
         // check if empty?
         if(0 == numElements)
-            return _context->fromPartitions(schema, std::vector<Partition*>(), columns, badParallelizeObjects, numExceptionsInPartition);
+            return _context->fromPartitions(schema, std::vector<Partition*>(), std::vector<Partition*>(), std::vector<PartitionGroup>(), columns);
 
         // create new partition on driver
         auto driver = _context->getDriver();
 
+        std::vector<std::tuple<size_t, PyObject*>> fallbackRows;
+        std::vector<Partition*> fallbackPartitions;
+        std::vector<PartitionGroup> partitionMergeInfo;
+
         std::vector<Partition*> partitions;
         Partition* partition = driver->allocWritablePartition(allocMinSize, schema, -1, _context->id());
         int64_t* rawPtr = (int64_t*)partition->lockWriteRaw();
         *rawPtr = 0;
         double* ptr = (double*)(rawPtr + 1);
         size_t numBytesSerialized = 0;
-        size_t prevNumExceptions = 0;
-        size_t prevNumRows = 0;
+        auto rowDelta = 0;
         for(unsigned i = 0; i < numElements; ++i) {
             auto obj = PyList_GET_ITEM(listObj, i);
             Py_XINCREF(obj);
 
             // check capacity and realloc if necessary get a new partition
             if(partition->capacity() < numBytesSerialized + sizeof(double)) {
-                assert(badParallelizeObjects.size() >= prevNumExceptions);
-                auto numNewExceptions = badParallelizeObjects.size() - prevNumExceptions;
-                numExceptionsInPartition.push_back(numNewExceptions);
-                prevNumExceptions = badParallelizeObjects.size();
-                prevNumRows += numNewExceptions + *rawPtr;
+                rowDelta += *rawPtr + fallbackRows.size();
+                auto serializedRows = serializeFallbackRows(fallbackRows);
+                fallbackRows.clear();
+                partitionMergeInfo.push_back(PartitionGroup(1, partitions.size(), 0, 0, serializedRows.size(), fallbackPartitions.size()));
+                std::copy(serializedRows.begin(), serializedRows.end(), std::back_inserter(fallbackPartitions));
 
                 partition->unlockWrite();
                 partitions.push_back(partition);
@@ -89,15 +89,15 @@ namespace tuplex {
                         val = (double)PyLong_AsLongLong(obj);
                         if(PyErr_Occurred()) { // too large integer?
                             PyErr_Clear();
-                            assert(i >= prevNumRows);
-                            badParallelizeObjects.emplace_back(std::make_tuple(i - prevNumRows, obj));
+                            assert(i >= rowDelta);
+                            fallbackRows.emplace_back(std::make_tuple(i - rowDelta, obj));
                             continue;
                         }
                     }
 
                 } else {
-                    assert(i >= prevNumRows);
-                    badParallelizeObjects.emplace_back(std::make_tuple(i - prevNumRows, obj));
+                    assert(i >= rowDelta);
+                    fallbackRows.emplace_back(std::make_tuple(i - rowDelta, obj));
                     continue;
                 }
             }
@@ -108,15 +108,16 @@ namespace tuplex {
             numBytesSerialized += sizeof(double);
         }
 
-        assert(badParallelizeObjects.size() >= prevNumExceptions);
-        auto numNewExceptions = badParallelizeObjects.size() - prevNumExceptions;
-        numExceptionsInPartition.push_back(numNewExceptions);
+        auto serializedRows = serializeFallbackRows(fallbackRows);
+        fallbackRows.clear();
+        partitionMergeInfo.push_back(PartitionGroup(1, partitions.size(), 0, 0, serializedRows.size(), fallbackPartitions.size()));
+        std::copy(serializedRows.begin(), serializedRows.end(), std::back_inserter(fallbackPartitions));
 
         partition->unlockWrite();
         partitions.push_back(partition);
 
         // create dataset from partitions.
-        return _context->fromPartitions(schema, partitions, columns, badParallelizeObjects, numExceptionsInPartition);
+        return _context->fromPartitions(schema, partitions, fallbackPartitions, partitionMergeInfo, columns);
     }
 
     DataSet& PythonContext::fastI64Parallelize(PyObject* listObj, const std::vector<std::string>& columns, bool upcast) {
@@ -127,16 +128,17 @@ namespace tuplex {
 
         Schema schema(Schema::MemoryLayout::ROW, python::Type::makeTupleType({python::Type::I64}));
 
-        std::vector<std::tuple<size_t, PyObject*>> badParallelizeObjects;
-        std::vector<size_t> numExceptionsInPartition;
-
         // check if empty?
         if(0 == numElements)
-            return _context->fromPartitions(schema, std::vector<Partition*>(), columns, badParallelizeObjects, numExceptionsInPartition);
+            return _context->fromPartitions(schema, std::vector<Partition*>(), std::vector<Partition*>(), std::vector<PartitionGroup>(), columns);
 
         // create new partition on driver
         auto driver = _context->getDriver();
 
+        std::vector<std::tuple<size_t, PyObject*>> fallbackRows;
+        std::vector<Partition*> fallbackPartitions;
+        std::vector<PartitionGroup> partitionMergeInfo;
+
         std::vector<Partition*> partitions;
         Partition* partition = driver->allocWritablePartition(std::max(sizeof(int64_t), allocMinSize), schema, -1,  _context->id());
         int64_t* rawPtr = (int64_t*)partition->lockWriteRaw();
@@ -144,18 +146,18 @@ namespace tuplex {
         int64_t* ptr = rawPtr + 1;
         size_t numBytesSerialized = 0;
         size_t prevNumExceptions = 0;
-        size_t prevNumRows = 0;
+        auto rowDelta = 0;
         for(unsigned i = 0; i < numElements; ++i) {
             auto obj = PyList_GET_ITEM(listObj, i);
             Py_XINCREF(obj);
 
             // check capacity and realloc if necessary get a new partition
             if(partition->capacity() < numBytesSerialized + sizeof(int64_t)) {
-                assert(badParallelizeObjects.size() >= prevNumExceptions);
-                auto numNewExceptions = badParallelizeObjects.size() - prevNumExceptions;
-                numExceptionsInPartition.push_back(numNewExceptions);
-                prevNumExceptions = badParallelizeObjects.size();
-                prevNumRows += numNewExceptions + *rawPtr;
+                rowDelta += *rawPtr + fallbackRows.size();
+                auto serializedRows = serializeFallbackRows(fallbackRows);
+                fallbackRows.clear();
+                partitionMergeInfo.push_back(PartitionGroup(1, partitions.size(), 0, 0, serializedRows.size(), fallbackPartitions.size()));
+                std::copy(serializedRows.begin(), serializedRows.end(), std::back_inserter(fallbackPartitions));
 
                 partition->unlockWrite();
                 partitions.push_back(partition);
@@ -171,8 +173,8 @@ namespace tuplex {
                 val = PyLong_AsLongLong(obj);
                 if(PyErr_Occurred()) { // too large integer?
                     PyErr_Clear();
-                    assert(i >= prevNumRows);
-                    badParallelizeObjects.emplace_back(std::make_tuple(i - prevNumRows, obj));
+                    assert(i >= rowDelta);
+                    fallbackRows.emplace_back(std::make_tuple(i - rowDelta, obj));
                     continue;
                 }
             } else {
@@ -180,8 +182,8 @@ namespace tuplex {
                 if(upcast && (obj == Py_True || obj == Py_False))
                     val = obj == Py_True;
                 else {
-                    assert(i >= prevNumRows);
-                    badParallelizeObjects.emplace_back(std::make_tuple(i - prevNumRows, obj));
+                    assert(i >= rowDelta);
+                    fallbackRows.emplace_back(std::make_tuple(i - rowDelta, obj));
                     continue;
                 }
             }
@@ -191,15 +193,16 @@ namespace tuplex {
             *rawPtr = *rawPtr + 1;
             numBytesSerialized += sizeof(int64_t);
         }
-        assert(badParallelizeObjects.size() >= prevNumExceptions);
-        auto numNewExceptions = badParallelizeObjects.size() - prevNumExceptions;
-        numExceptionsInPartition.push_back(numNewExceptions);
+        auto serializedRows = serializeFallbackRows(fallbackRows);
+        fallbackRows.clear();
+        partitionMergeInfo.push_back(PartitionGroup(1, partitions.size(), 0, 0, serializedRows.size(), fallbackPartitions.size()));
+        std::copy(serializedRows.begin(), serializedRows.end(), std::back_inserter(fallbackPartitions));
 
         partition->unlockWrite();
         partitions.push_back(partition);
 
         // create dataset from partitions.
-        return _context->fromPartitions(schema, partitions, columns, badParallelizeObjects, numExceptionsInPartition);
+        return _context->fromPartitions(schema, partitions, fallbackPartitions, partitionMergeInfo, columns);
     }
 
     DataSet& PythonContext::fastMixedSimpleTypeTupleTransfer(PyObject *listObj, const python::Type &majType,
@@ -215,12 +218,9 @@ namespace tuplex {
         // now create partitions super fast
         Schema schema(Schema::MemoryLayout::ROW, majType);
 
-        std::vector<std::tuple<size_t, PyObject*>> badParallelizeObjects;
-        std::vector<size_t> numExceptionsInPartition;
-
         // check if empty?
         if(0 == numElements)
-            return _context->fromPartitions(schema, std::vector<Partition*>(), columns, badParallelizeObjects, numExceptionsInPartition);
+            return _context->fromPartitions(schema, std::vector<Partition*>(), std::vector<Partition*>(), std::vector<PartitionGroup>(), columns);
 
 
         // encode type of tuple quickly into string
@@ -232,6 +232,10 @@ namespace tuplex {
         // create new partition on driver
         auto driver = _context->getDriver();
 
+        std::vector<std::tuple<size_t, PyObject*>> fallbackRows;
+        std::vector<Partition*> fallbackPartitions;
+        std::vector<PartitionGroup> partitionMergeInfo;
+
         std::vector<Partition*> partitions;
         Partition* partition = driver->allocWritablePartition(allocMinSize, schema, -1, _context->id());
         int64_t* rawPtr = (int64_t*)partition->lockWriteRaw();
@@ -239,7 +243,7 @@ namespace tuplex {
         uint8_t* ptr = (uint8_t*)(rawPtr + 1);
         size_t numBytesSerialized = 0;
         size_t prevNumExceptions = 0;
-        size_t prevNumRows = 0;
+        auto rowDelta = 0;
         for(unsigned i = 0; i < numElements; ++i) {
             auto obj = PyList_GET_ITEM(listObj, i);
             Py_XINCREF(obj);
@@ -266,19 +270,19 @@ namespace tuplex {
                         }
                     }
                     if (nonConforming) {
-                        assert(i >= prevNumRows);
-                        badParallelizeObjects.emplace_back(i - prevNumRows, obj);
+                        assert(i >= rowDelta);
+                        fallbackRows.emplace_back(i - rowDelta, obj);
                         continue;
                     }
                 }
 
                 // get new partition if capacity exhausted
                 if(partition->capacity() < numBytesSerialized + requiredBytes) {
-                    assert(badParallelizeObjects.size() >= prevNumExceptions);
-                    auto numNewExceptions = badParallelizeObjects.size() - prevNumExceptions;
-                    numExceptionsInPartition.push_back(numNewExceptions);
-                    prevNumExceptions = badParallelizeObjects.size();
-                    prevNumRows += numNewExceptions + *rawPtr;
+                    rowDelta += *rawPtr + fallbackRows.size();
+                    auto serializedRows = serializeFallbackRows(fallbackRows);
+                    fallbackRows.clear();
+                    partitionMergeInfo.push_back(PartitionGroup(1, partitions.size(), 0, 0, serializedRows.size(), fallbackPartitions.size()));
+                    std::copy(serializedRows.begin(), serializedRows.end(), std::back_inserter(fallbackPartitions));
 
                     partition->unlockWrite();
                     partitions.push_back(partition);
@@ -358,11 +362,11 @@ namespace tuplex {
                 // special part when bad row encountered
             bad_element:
                 ptr = rowStartPtr;
-                assert(i >= prevNumRows);
-                badParallelizeObjects.emplace_back(std::make_tuple(i - prevNumRows, obj));
+                assert(i >= rowDelta);
+                fallbackRows.emplace_back(i - rowDelta, obj);
             } else {
-                assert(i >= prevNumRows);
-                badParallelizeObjects.emplace_back(std::make_tuple(i - prevNumRows, obj));
+                assert(i >= rowDelta);
+                fallbackRows.emplace_back(i - rowDelta, obj);
             }
 
             // serialization code here is a little bit more complicated
@@ -371,9 +375,10 @@ namespace tuplex {
             // (2) is the field containing total varlength
             // (3) is the actual string content (incl. '\0' delimiter)
         }
-        assert(badParallelizeObjects.size() >= prevNumExceptions);
-        auto numNewExceptions = badParallelizeObjects.size() - prevNumExceptions;
-        numExceptionsInPartition.push_back(numNewExceptions);
+        auto serializedRows = serializeFallbackRows(fallbackRows);
+        fallbackRows.clear();
+        partitionMergeInfo.push_back(PartitionGroup(1, partitions.size(), 0, 0, serializedRows.size(), fallbackPartitions.size()));
+        std::copy(serializedRows.begin(), serializedRows.end(), std::back_inserter(fallbackPartitions));
 
         partition->unlockWrite();
         partitions.push_back(partition);
@@ -381,7 +386,7 @@ namespace tuplex {
         delete [] typeStr;
 
         // create dataset from partitions.
-        return _context->fromPartitions(schema, partitions, columns, badParallelizeObjects, numExceptionsInPartition);
+        return _context->fromPartitions(schema, partitions, fallbackPartitions, partitionMergeInfo, columns);
     }
 
     DataSet& PythonContext::fastBoolParallelize(PyObject *listObj, const std::vector<std::string>& columns) {
@@ -392,17 +397,18 @@ namespace tuplex {
 
         Schema schema(Schema::MemoryLayout::ROW, python::Type::makeTupleType({python::Type::BOOLEAN}));
 
-        std::vector<std::tuple<size_t, PyObject*>> badParallelizeObjects;
-        std::vector<size_t> numExceptionsInPartition;
-
         // check if empty?
         if(0 == numElements)
-            return _context->fromPartitions(schema, std::vector<Partition*>(), columns, badParallelizeObjects, numExceptionsInPartition);
+            return _context->fromPartitions(schema, std::vector<Partition*>(), std::vector<Partition*>(), std::vector<PartitionGroup>(), columns);
 
 
         // create new partition on driver
         auto driver = _context->getDriver();
 
+        std::vector<std::tuple<size_t, PyObject*>> fallbackRows;
+        std::vector<Partition*> fallbackPartitions;
+        std::vector<PartitionGroup> partitionMergeInfo;
+
         std::vector<Partition*> partitions;
         Partition* partition = driver->allocWritablePartition(std::max(sizeof(int64_t), allocMinSize), schema, -1, _context->id());
         int64_t* rawPtr = (int64_t*)partition->lockWriteRaw();
@@ -410,18 +416,18 @@ namespace tuplex {
         int64_t* ptr = rawPtr + 1;
         size_t numBytesSerialized = 0;
         size_t prevNumExceptions = 0;
-        size_t prevNumRows = 0;
+        auto rowDelta = 0;
         for(unsigned i = 0; i < numElements; ++i) {
             auto obj = PyList_GET_ITEM(listObj, i);
             Py_XINCREF(obj);
 
             // check capacity and realloc if necessary get a new partition
             if(partition->capacity() < numBytesSerialized + sizeof(int64_t)) {
-                assert(badParallelizeObjects.size() >= prevNumExceptions);
-                auto numNewExceptions = badParallelizeObjects.size() - prevNumExceptions;
-                numExceptionsInPartition.push_back(numNewExceptions);
-                prevNumExceptions = badParallelizeObjects.size();
-                prevNumRows += numNewExceptions + *rawPtr;
+                rowDelta += *rawPtr + fallbackRows.size();
+                auto serializedRows = serializeFallbackRows(fallbackRows);
+                fallbackRows.clear();
+                partitionMergeInfo.push_back(PartitionGroup(1, partitions.size(), 0, 0, serializedRows.size(), fallbackPartitions.size()));
+                std::copy(serializedRows.begin(), serializedRows.end(), std::back_inserter(fallbackPartitions));
 
                 partition->unlockWrite();
                 partitions.push_back(partition);
@@ -438,20 +444,20 @@ namespace tuplex {
                 *rawPtr = *rawPtr + 1;
                 numBytesSerialized += sizeof(int64_t);
             } else {
-                assert(i >= prevNumRows);
-                badParallelizeObjects.emplace_back(std::make_tuple(i - prevNumRows, obj));
+                assert(i >= rowDelta);
+                fallbackRows.emplace_back(i - rowDelta, obj);
             }
         }
-
-        assert(badParallelizeObjects.size() >= prevNumExceptions);
-        auto numNewExceptions = badParallelizeObjects.size() - prevNumExceptions;
-        numExceptionsInPartition.push_back(numNewExceptions);
+        auto serializedRows = serializeFallbackRows(fallbackRows);
+        fallbackRows.clear();
+        partitionMergeInfo.push_back(PartitionGroup(1, partitions.size(), 0, 0, serializedRows.size(), fallbackPartitions.size()));
+        std::copy(serializedRows.begin(), serializedRows.end(), std::back_inserter(fallbackPartitions));
 
         partition->unlockWrite();
         partitions.push_back(partition);
 
         // create dataset from partitions.
-        return _context->fromPartitions(schema, partitions, columns, badParallelizeObjects, numExceptionsInPartition);
+        return _context->fromPartitions(schema, partitions, fallbackPartitions, partitionMergeInfo, columns);
     }
 
     DataSet& PythonContext::fastStrParallelize(PyObject* listObj, const std::vector<std::string>& columns) {
@@ -462,17 +468,18 @@ namespace tuplex {
 
         Schema schema(Schema::MemoryLayout::ROW, python::Type::makeTupleType({python::Type::STRING}));
 
-        std::vector<std::tuple<size_t, PyObject*>> badParallelizeObjects;
-        std::vector<size_t> numExceptionsInPartition;
-
         // check if empty?
         if(0 == numElements)
-            return _context->fromPartitions(schema, std::vector<Partition*>(), columns, badParallelizeObjects, numExceptionsInPartition);
+            return _context->fromPartitions(schema, std::vector<Partition*>(), std::vector<Partition*>(), std::vector<PartitionGroup>(), columns);
 
 
         // create new partition on driver
         auto driver = _context->getDriver();
 
+        std::vector<std::tuple<size_t, PyObject*>> fallbackRows;
+        std::vector<Partition*> fallbackPartitions;
+        std::vector<PartitionGroup> partitionMergeInfo;
+
         std::vector<Partition*> partitions;
         Partition* partition = driver->allocWritablePartition(allocMinSize, schema, -1, _context->id());
         int64_t* rawPtr = (int64_t*)partition->lockWriteRaw();
@@ -480,7 +487,7 @@ namespace tuplex {
         uint8_t* ptr = (uint8_t*)(rawPtr + 1);
         size_t numBytesSerialized = 0;
         size_t prevNumExceptions = 0;
-        size_t prevNumRows = 0;
+        auto rowDelta = 0;
         for(unsigned i = 0; i < numElements; ++i) {
             auto obj = PyList_GET_ITEM(listObj, i);
             Py_XINCREF(obj);
@@ -500,11 +507,11 @@ namespace tuplex {
 
                 // check capacity and realloc if necessary get a new partition
                 if(partition->capacity() < numBytesSerialized + requiredBytes) {
-                    assert(badParallelizeObjects.size() >= prevNumExceptions);
-                    auto numNewExceptions = badParallelizeObjects.size() - prevNumExceptions;
-                    numExceptionsInPartition.push_back(numNewExceptions);
-                    prevNumExceptions = badParallelizeObjects.size();
-                    prevNumRows += numNewExceptions + *rawPtr;
+                    rowDelta += *rawPtr + fallbackRows.size();
+                    auto serializedRows = serializeFallbackRows(fallbackRows);
+                    fallbackRows.clear();
+                    partitionMergeInfo.push_back(PartitionGroup(1, partitions.size(), 0, 0, serializedRows.size(), fallbackPartitions.size()));
+                    std::copy(serializedRows.begin(), serializedRows.end(), std::back_inserter(fallbackPartitions));
 
                     partition->unlockWrite();
                     partitions.push_back(partition);
@@ -530,19 +537,20 @@ namespace tuplex {
                 *rawPtr = *rawPtr + 1;
                 numBytesSerialized += requiredBytes;
             } else {
-                assert(i >= prevNumRows);
-                badParallelizeObjects.emplace_back(std::make_tuple(i - prevNumRows, obj));
+                assert(i >= rowDelta);
+                fallbackRows.emplace_back(i - rowDelta, obj);
             }
         }
-        assert(badParallelizeObjects.size() >= prevNumExceptions);
-        auto numNewExceptions = badParallelizeObjects.size() - prevNumExceptions;
-        numExceptionsInPartition.push_back(numNewExceptions);
+        auto serializedRows = serializeFallbackRows(fallbackRows);
+        fallbackRows.clear();
+        partitionMergeInfo.push_back(PartitionGroup(1, partitions.size(), 0, 0, serializedRows.size(), fallbackPartitions.size()));
+        std::copy(serializedRows.begin(), serializedRows.end(), std::back_inserter(fallbackPartitions));
 
         partition->unlockWrite();
         partitions.push_back(partition);
 
         // create dataset from partitions.
-        return _context->fromPartitions(schema, partitions, columns, badParallelizeObjects, numExceptionsInPartition);
+        return _context->fromPartitions(schema, partitions, fallbackPartitions, partitionMergeInfo, columns);
     }
 
     // Returns true if t1 can be considered a subtype of t2, specifically in the context of Option types
@@ -578,12 +586,9 @@ namespace tuplex {
         auto numElements = PyList_Size(listObj);
         logger.debug("transferring " + std::to_string(numElements) + " elements. ");
 
-        std::vector<std::tuple<size_t, PyObject*>> badParallelizeObjects;
-        std::vector<size_t> numExceptionsInPartition;
-
         // check if empty?
         if(0 == numElements)
-            return _context->fromPartitions(schema, std::vector<Partition*>(), columns, badParallelizeObjects, numExceptionsInPartition);
+            return _context->fromPartitions(schema, std::vector<Partition*>(), std::vector<Partition*>(), std::vector<PartitionGroup>(), columns);
 
         auto firstRow = PyList_GET_ITEM(listObj, 0);
         Py_XINCREF(firstRow);
@@ -592,6 +597,10 @@ namespace tuplex {
         // create new partition on driver
         auto driver = _context->getDriver();
 
+        std::vector<std::tuple<size_t, PyObject*>> fallbackRows;
+        std::vector<Partition*> fallbackPartitions;
+        std::vector<PartitionGroup> partitionMergeInfo;
+
         std::vector<Partition*> partitions;
         Partition* partition = driver->allocWritablePartition(allocMinSize, schema, -1, _context->id());
         int64_t* rawPtr = (int64_t*)partition->lockWriteRaw();
@@ -599,7 +608,7 @@ namespace tuplex {
         uint8_t* ptr = (uint8_t*)(rawPtr + 1);
         size_t numBytesSerialized = 0;
         size_t prevNumExceptions = 0;
-        size_t prevNumRows = 0;
+        auto rowDelta = 0;
         for (unsigned i = 0; i < numElements; ++i) {
 
             // because this a slow transfer loop, check explicitly for signals and free anything if there's something...
@@ -611,10 +620,10 @@ namespace tuplex {
                 logger.warn("slow transfer to backend interrupted.");
 
                 // free items (decref)
-                for(auto t : badParallelizeObjects) {
+                for(auto t : fallbackRows) {
                     Py_XDECREF(std::get<1>(t));
                 }
-                badParallelizeObjects.clear();
+                fallbackRows.clear();
 
                 return _context->makeError("interrupted transfer");
             }
@@ -632,11 +641,11 @@ namespace tuplex {
                 auto requiredBytes = row.serializedLength();
 
                 if(partition->capacity() < numBytesSerialized + requiredBytes) {
-                    assert(badParallelizeObjects.size() >= prevNumExceptions);
-                    auto numNewExceptions = badParallelizeObjects.size() - prevNumExceptions;
-                    numExceptionsInPartition.push_back(numNewExceptions);
-                    prevNumExceptions = badParallelizeObjects.size();
-                    prevNumRows += numNewExceptions + *rawPtr;
+                    rowDelta += *rawPtr + fallbackRows.size();
+                    auto serializedRows = serializeFallbackRows(fallbackRows);
+                    fallbackRows.clear();
+                    partitionMergeInfo.push_back(PartitionGroup(1, partitions.size(), 0, 0, serializedRows.size(), fallbackPartitions.size()));
+                    std::copy(serializedRows.begin(), serializedRows.end(), std::back_inserter(fallbackPartitions));
 
                     partition->unlockWrite();
                     partitions.push_back(partition);
@@ -653,17 +662,18 @@ namespace tuplex {
                 *rawPtr = *rawPtr + 1;
                 numBytesSerialized += requiredBytes;
             } else
-                badParallelizeObjects.emplace_back(std::make_tuple(i - prevNumRows, item));
+                fallbackRows.emplace_back(std::make_tuple(i - rowDelta, item));
         }
-        assert(badParallelizeObjects.size() >= prevNumExceptions);
-        auto numNewExceptions = badParallelizeObjects.size() - prevNumExceptions;
-        numExceptionsInPartition.push_back(numNewExceptions);
+        auto serializedRows = serializeFallbackRows(fallbackRows);
+        fallbackRows.clear();
+        partitionMergeInfo.push_back(PartitionGroup(1, partitions.size(), 0, 0, serializedRows.size(), fallbackPartitions.size()));
+        std::copy(serializedRows.begin(), serializedRows.end(), std::back_inserter(fallbackPartitions));
 
         partition->unlockWrite();
         partitions.push_back(partition);
 
         // serialize in main memory
-        return _context->fromPartitions(schema, partitions, columns, badParallelizeObjects, numExceptionsInPartition);
+        return _context->fromPartitions(schema, partitions, fallbackPartitions, partitionMergeInfo, columns);
     }
 
     DataSet& PythonContext::strDictParallelize(PyObject *listObj, const python::Type &rowType,
@@ -679,16 +689,17 @@ namespace tuplex {
         assert(rowType.parameters().size() == columns.size()); // also very important!!!
         Schema schema(Schema::MemoryLayout::ROW, rowType);
 
-        std::vector<std::tuple<size_t, PyObject*>> badParallelizeObjects;
-        std::vector<size_t> numExceptionsInPartition;
-
         // check if empty?
         if(0 == numElements)
-            return _context->fromPartitions(schema, std::vector<Partition*>(), columns, badParallelizeObjects, numExceptionsInPartition);
+            return _context->fromPartitions(schema, std::vector<Partition*>(), std::vector<Partition*>(), std::vector<PartitionGroup>(), columns);
 
         // create new partition on driver
         auto driver = _context->getDriver();
 
+        std::vector<std::tuple<size_t, PyObject*>> fallbackRows;
+        std::vector<Partition*> fallbackPartitions;
+        std::vector<PartitionGroup> partitionMergeInfo;
+
         std::vector<Partition*> partitions;
         Partition* partition = driver->allocWritablePartition(allocMinSize, schema, -1, _context->id());
         int64_t* rawPtr = (int64_t*)partition->lockWriteRaw();
@@ -696,7 +707,7 @@ namespace tuplex {
         uint8_t* ptr = (uint8_t*)(rawPtr + 1);
         size_t numBytesSerialized = 0;
         size_t prevNumExceptions = 0;
-        size_t prevNumRows = 0;
+        auto rowDelta = 0;
         for(unsigned i = 0; i < numElements; ++i) {
             auto obj = PyList_GET_ITEM(listObj, i);
             Py_XINCREF(obj);
@@ -724,11 +735,11 @@ namespace tuplex {
                     size_t requiredBytes = row.serializedLength();
                     // check capacity and realloc if necessary get a new partition
                     if (partition->capacity() < numBytesSerialized + allocMinSize) {
-                        assert(badParallelizeObjects.size() >= prevNumExceptions);
-                        auto numNewExceptions = badParallelizeObjects.size() - prevNumExceptions;
-                        numExceptionsInPartition.push_back(numNewExceptions);
-                        prevNumExceptions = badParallelizeObjects.size();
-                        prevNumRows += numNewExceptions + *rawPtr;
+                        rowDelta += *rawPtr + fallbackRows.size();
+                        auto serializedRows = serializeFallbackRows(fallbackRows);
+                        fallbackRows.clear();
+                        partitionMergeInfo.push_back(PartitionGroup(1, partitions.size(), 0, 0, serializedRows.size(), fallbackPartitions.size()));
+                        std::copy(serializedRows.begin(), serializedRows.end(), std::back_inserter(fallbackPartitions));
 
                         partition->unlockWrite();
                         partitions.push_back(partition);
@@ -744,24 +755,25 @@ namespace tuplex {
                     *rawPtr = *rawPtr + 1;
                     numBytesSerialized += requiredBytes;
                 } catch (const std::exception& e) {
-                    assert(i >= prevNumRows);
-                    badParallelizeObjects.emplace_back(i - prevNumRows, obj);
+                    assert(i >= rowDelta);
+                    fallbackRows.emplace_back(std::make_tuple(i - rowDelta, obj));
                 }
 
             } else {
-                assert(i >= prevNumRows);
-                badParallelizeObjects.emplace_back(i - prevNumRows, obj);
+                assert(i >= rowDelta);
+                fallbackRows.emplace_back(i - rowDelta, obj);
             }
         }
-        assert(badParallelizeObjects.size() >= prevNumExceptions);
-        auto numNewExceptions = badParallelizeObjects.size() - prevNumExceptions;
-        numExceptionsInPartition.push_back(numNewExceptions);
+        auto serializedRows = serializeFallbackRows(fallbackRows);
+        fallbackRows.clear();
+        partitionMergeInfo.push_back(PartitionGroup(1, partitions.size(), 0, 0, serializedRows.size(), fallbackPartitions.size()));
+        std::copy(serializedRows.begin(), serializedRows.end(), std::back_inserter(fallbackPartitions));
 
         partition->unlockWrite();
         partitions.push_back(partition);
 
         // create dataset from partitions.
-        return _context->fromPartitions(schema, partitions, columns, badParallelizeObjects, numExceptionsInPartition);
+        return _context->fromPartitions(schema, partitions, fallbackPartitions, partitionMergeInfo, columns);
     }
 
     PythonDataSet PythonContext::parallelize(py::list L,
@@ -1290,7 +1302,55 @@ namespace tuplex {
         return co;
     }
 
-   // // running with another python version might lead to severe issues
+    std::vector<Partition*> PythonContext::serializeFallbackRows(const std::vector<std::tuple<size_t, PyObject*>>& fallbackRows) {
+        std::vector<Partition*> fallbackPartitions;
+        if (fallbackRows.empty()) {
+            return fallbackPartitions;
+        }
+
+        auto driver = _context->getDriver();
+        Schema schema(Schema::MemoryLayout::ROW, python::Type::makeTupleType({python::Type::STRING}));
+        auto partition = driver->allocWritablePartition(allocMinSize, schema, -1, _context->id());
+        int64_t* rawPtr = (int64_t*)partition->lockWriteRaw();
+        *rawPtr = 0;
+        uint8_t* ptr = (uint8_t*)(rawPtr + 1);
+        size_t numBytesSerialized = 0;
+
+        for (const auto& row: fallbackRows) {
+            auto rowNum = std::get<0>(row);
+            auto pythonObject = std::get<1>(row);
+            auto ecCode = ecToI64(ExceptionCode::PYTHON_PARALLELIZE);
+            auto pickledObject = python::pickleObject(python::getMainModule(), pythonObject);
+            auto pickledObjectSize = pickledObject.size();
+            size_t requiredBytes = sizeof(int64_t) * 4 + pickledObjectSize;
+
+            if (partition->capacity() < numBytesSerialized + requiredBytes) {
+                partition->unlockWrite();
+                fallbackPartitions.push_back(partition);
+                partition = driver->allocWritablePartition(allocMinSize, schema, -1, _context->id());
+                rawPtr = (int64_t *) partition->lockWriteRaw();
+                *rawPtr = 0;
+                ptr = (uint8_t * )(rawPtr + 1);
+                numBytesSerialized = 0;
+            }
+
+            *((int64_t*)(ptr)) = rowNum; ptr += sizeof(int64_t);
+            *((int64_t*)(ptr)) = ecCode; ptr += sizeof(int64_t);
+            *((int64_t*)(ptr)) = -1; ptr += sizeof(int64_t);
+            *((int64_t*)(ptr)) = pickledObjectSize; ptr += sizeof(int64_t);
+            memcpy(ptr, pickledObject.c_str(), pickledObjectSize); ptr += pickledObjectSize;
+
+            *rawPtr = *rawPtr + 1;
+            numBytesSerialized += requiredBytes;
+        }
+
+        partition->unlockWrite();
+        fallbackPartitions.push_back(partition);
+
+        return fallbackPartitions;
+    }
+
+    // // running with another python version might lead to severe issues
    // // hence, perform check at context startup!
    // bool checkPythonVersion() {
    //    using namespace std;
diff --git a/tuplex/python/src/PythonDataSet.cc b/tuplex/python/src/PythonDataSet.cc
index 36f9a392b..3f9ca82a2 100644
--- a/tuplex/python/src/PythonDataSet.cc
+++ b/tuplex/python/src/PythonDataSet.cc
@@ -721,7 +721,7 @@ namespace tuplex {
 
     void PythonDataSet::tocsv(const std::string &file_path, const std::string &lambda_code, const std::string &pickled_code,
                          size_t fileCount, size_t shardSize, size_t limit, const std::string &null_value,
-                         py::object header) {
+                         py::object header, const bool commit) {
         // make sure a dataset is wrapped
         assert(this->_dataset);
         // ==> error handled below.
@@ -757,6 +757,8 @@ namespace tuplex {
                 outputOptions["header"] = "true";
             }
 
+            outputOptions["commit"] = boolToString(commit);
+
             // release GIL & hand over everything to Tuplex
             assert(PyGILState_Check()); // make sure this thread holds the GIL!
             python::unlockGIL();
@@ -909,8 +911,8 @@ namespace tuplex {
         // retrieve full partitions for speed
         Partition *partition = nullptr;
         size_t pos = 0;
-        while (rs->hasNextPartition() && pos < maxRowCount) {
-            partition = rs->getNextPartition();
+        while (rs->hasNextNormalPartition() && pos < maxRowCount) {
+            partition = rs->getNextNormalPartition();
             auto schema = partition->schema();
             // single value? --> reset rowtype by one level
             auto type = schema.getRowType();
@@ -964,8 +966,8 @@ namespace tuplex {
 
         Partition *partition = nullptr;
         size_t pos = 0;
-        while (rs->hasNextPartition() && pos < maxRowCount) {
-            partition = rs->getNextPartition();
+        while (rs->hasNextNormalPartition() && pos < maxRowCount) {
+            partition = rs->getNextNormalPartition();
 
             // add memory towards list object
             auto ptr = partition->lockRaw();
@@ -1002,8 +1004,8 @@ namespace tuplex {
 
         Partition *partition = nullptr;
         size_t pos = 0;
-        while (rs->hasNextPartition() && pos < maxRowCount) {
-            partition = rs->getNextPartition();
+        while (rs->hasNextNormalPartition() && pos < maxRowCount) {
+            partition = rs->getNextNormalPartition();
 
             // add memory towards list object
             auto ptr = partition->lockRaw();
@@ -1042,8 +1044,8 @@ namespace tuplex {
 
         Partition *partition = nullptr;
         size_t pos = 0;
-        while (rs->hasNextPartition() && pos < maxRowCount) {
-            partition = rs->getNextPartition();
+        while (rs->hasNextNormalPartition() && pos < maxRowCount) {
+            partition = rs->getNextNormalPartition();
 
             // add memory towards list object
             auto ptr = partition->lockRaw();
@@ -1091,8 +1093,8 @@ namespace tuplex {
 
         Partition *partition = nullptr;
         size_t pos = 0;
-        while (rs->hasNextPartition() && pos < maxRowCount) {
-            partition = rs->getNextPartition();
+        while (rs->hasNextNormalPartition() && pos < maxRowCount) {
+            partition = rs->getNextNormalPartition();
 
             // add memory towards list object
             auto ptr = partition->lockRaw();
@@ -1147,8 +1149,8 @@ namespace tuplex {
 
         Partition *partition = nullptr;
         size_t pos = 0;
-        while (rs->hasNextPartition() && pos < maxRowCount) {
-            partition = rs->getNextPartition();
+        while (rs->hasNextNormalPartition() && pos < maxRowCount) {
+            partition = rs->getNextNormalPartition();
 
             // add memory towards list object
             auto ptr = partition->lockRaw();
@@ -1191,8 +1193,8 @@ namespace tuplex {
 
         Partition *partition = nullptr;
         size_t pos = 0;
-        while (rs->hasNextPartition() && pos < maxRowCount) {
-            partition = rs->getNextPartition();
+        while (rs->hasNextNormalPartition() && pos < maxRowCount) {
+            partition = rs->getNextNormalPartition();
 
             // add memory towards list object
             auto ptr = partition->lockRaw();
@@ -1251,8 +1253,8 @@ namespace tuplex {
 
         Partition* partition = nullptr;
         size_t pos = 0;
-        while(rs->hasNextPartition() && pos < maxRowCount) {
-            partition = rs->getNextPartition();
+        while(rs->hasNextNormalPartition() && pos < maxRowCount) {
+            partition = rs->getNextNormalPartition();
 
             // add memory towards list object
             auto ptr = partition->lockRaw();
@@ -1348,7 +1350,7 @@ namespace tuplex {
         // b.c. merging of arbitrary python objects is not implemented yet, whenever they're present, use general
         // version
         // @TODO: this could be optimized!
-        if(rs->pyobject_count() != 0)
+        if(rs->fallbackRowCount() != 0)
             return anyToCPythonWithPyObjects(rs, maxRowCount);
 
         auto type = rs->schema().getRowType();
diff --git a/tuplex/python/src/PythonWrappers.cc b/tuplex/python/src/PythonWrappers.cc
index 8e35d5d4c..9968a14fa 100644
--- a/tuplex/python/src/PythonWrappers.cc
+++ b/tuplex/python/src/PythonWrappers.cc
@@ -172,8 +172,8 @@ namespace tuplex {
 
                     Partition* partition = nullptr;
                     size_t pos = 0;
-                    while(rs->hasNextPartition()) {
-                        partition = rs->getNextPartition();
+                    while(rs->hasNextNormalPartition()) {
+                        partition = rs->getNextNormalPartition();
 
                         // add memory towards list object
                         auto ptr = partition->lockRaw();
diff --git a/tuplex/python/tuplex/dataset.py b/tuplex/python/tuplex/dataset.py
index a2b8c0b33..76f555309 100644
--- a/tuplex/python/tuplex/dataset.py
+++ b/tuplex/python/tuplex/dataset.py
@@ -425,7 +425,7 @@ def leftJoin(self, dsRight, leftKeyColumn, rightKeyColumn, prefixes=None, suffix
         return ds
 
 
-    def tocsv(self, path, part_size=0, num_rows=max_rows, num_parts=0, part_name_generator=None, null_value=None, header=True):
+    def tocsv(self, path, part_size=0, num_rows=max_rows, num_parts=0, part_name_generator=None, null_value=None, header=True, commit=True):
         """ save dataset to one or more csv files. Triggers execution of pipeline.
         Args:
             path: path where to save files to
@@ -458,7 +458,7 @@ def tocsv(self, path, part_size=0, num_rows=max_rows, num_parts=0, part_name_gen
         if null_value is None:
             null_value = ''
 
-        self._dataSet.tocsv(path, code, code_pickled, num_parts, part_size, num_rows, null_value, header)
+        self._dataSet.tocsv(path, code, code_pickled, num_parts, part_size, num_rows, null_value, header, commit)
 
     def toorc(self, path, part_size=0, num_rows=max_rows, num_parts=0, part_name_generator=None):
         """ save dataset to one or more orc files. Triggers execution of pipeline.
diff --git a/tuplex/test/core/DataSetShow.cc b/tuplex/test/core/DataSetShow.cc
index cf50705b8..4ec70c4e6 100644
--- a/tuplex/test/core/DataSetShow.cc
+++ b/tuplex/test/core/DataSetShow.cc
@@ -14,7 +14,7 @@
 #include <sstream>
 #include "TestUtils.h"
 
-class DataSetTest : public TuplexTest {};
+class DataSetTest : public PyTest {};
 
 TEST_F(DataSetTest, DataSetShow) {
     using namespace tuplex;
diff --git a/tuplex/test/core/ExceptionsTest.cc b/tuplex/test/core/ExceptionsTest.cc
new file mode 100644
index 000000000..ef9fae67e
--- /dev/null
+++ b/tuplex/test/core/ExceptionsTest.cc
@@ -0,0 +1,232 @@
+//--------------------------------------------------------------------------------------------------------------------//
+//                                                                                                                    //
+//                                      Tuplex: Blazing Fast Python Data Science                                      //
+//                                                                                                                    //
+//                                                                                                                    //
+//  (c) 2017 - 2021, Tuplex team                                                                                      //
+//  Created by Leonhard Spiegelberg first on 1/1/2021                                                                 //
+//  License: Apache 2.0                                                                                               //
+//--------------------------------------------------------------------------------------------------------------------//
+
+#include "gtest/gtest.h"
+#include <Context.h>
+#include "TestUtils.h"
+
+class ExceptionsTest : public PyTest {};
+
+TEST_F(ExceptionsTest, Basic) {
+    using namespace tuplex;
+
+    auto opts = microTestOptions();
+    opts.set("tuplex.optimizer.mergeExceptionsInOrder", "true");
+    Context c(opts);
+
+    std::vector<Row> inputRows({Row(1), Row(2), Row(0), Row(4), Row(5)});
+    auto res = c.parallelize(inputRows).map(UDF("lambda x: 1 // x if x == 0 else x")).resolve(ExceptionCode::ZERODIVISIONERROR, UDF("lambda x: -1")).collectAsVector();
+    std::vector<Row> expectedOutput({Row(1), Row(2), Row(-1), Row(4), Row(5)});
+    ASSERT_EQ(res.size(), expectedOutput.size());
+    for (int i = 0; i < expectedOutput.size(); ++i)
+        EXPECT_EQ(res[i].toPythonString(), expectedOutput[i].toPythonString());
+}
+
+TEST_F(ExceptionsTest, Debug) {
+    using namespace tuplex;
+
+    auto opts = microTestOptions();
+    opts.set("tuplex.optimizer.mergeExceptionsInOrder", "true");
+    opts.set("tuplex.partitionSize", "40B");
+    Context c(opts);
+
+    std::vector<Row> inputData({
+        Row(1), Row(2), Row(0), Row(4),
+        Row(5), Row(6), Row(0), Row(8),
+    });
+
+    auto res = c.parallelize(inputData).map(UDF("lambda x: 1 // x if x == 0 else x")).resolve(ExceptionCode::ZERODIVISIONERROR, UDF("lambda x: -1")).collectAsVector();
+    std::vector<Row> expectedRes({Row(1), Row(2), Row(-1), Row(4), Row(5), Row(6), Row(-1), Row(8)});
+    ASSERT_EQ(res.size(), expectedRes.size());
+    for (int i = 0; i < expectedRes.size(); ++i) {
+        EXPECT_EQ(res[i].toPythonString(), expectedRes[i].toPythonString());
+    }
+}
+
+typedef bool (*filter_t)(int64_t);
+
+void processPartition(filter_t filter, int64_t* normalPartition, int64_t* totalFilterCounter, int64_t* totalNormalRowCounter, int64_t* totalGeneralRowCounter, int64_t* totalFallbackRowCounter,
+                      uint8_t** generalPartitions, int64_t numGeneralPartitons, int64_t* generalIndexOffset, int64_t* generalRowOffset, int64_t* generalByteOffset,
+                      uint8_t** fallbackPartitions, int64_t numFallbackPartitons, int64_t* fallbackIndexOffset, int64_t *fallbackRowOffset, int64_t* fallbackByteOffset) {
+    uint8_t *curGeneralPtr;
+    int64_t curGeneralNumRows = 0;
+    if (*generalIndexOffset < numGeneralPartitons) {
+        curGeneralPtr = generalPartitions[*generalIndexOffset];
+        curGeneralNumRows = *((int64_t*)curGeneralPtr);
+        curGeneralPtr += sizeof(int64_t) + *generalByteOffset;
+    }
+
+    uint8_t *curFallbackPtr;
+    int64_t curFallbackNumRows = 0;
+    if (*fallbackIndexOffset < numFallbackPartitons) {
+        curFallbackPtr = fallbackPartitions[*fallbackIndexOffset];
+        curFallbackNumRows = *((int64_t*)curFallbackPtr);
+        curFallbackPtr += sizeof(int64_t) + *fallbackByteOffset;
+    }
+
+    int64_t numNormalRows = normalPartition[0];
+    for (int normalRowCountVar = 1; normalRowCountVar < numNormalRows + 1; ++normalRowCountVar) {
+        int64_t curNormalRow = normalPartition[normalRowCountVar];
+        if (filter(curNormalRow)) {
+            int64_t curNormalRowInd = normalRowCountVar + *totalNormalRowCounter;
+
+            while (*generalRowOffset < curGeneralNumRows && *((int64_t*)curGeneralPtr) < curNormalRowInd + *totalGeneralRowCounter) {
+                *curGeneralPtr -= *totalFilterCounter;
+                curGeneralPtr += 4 * sizeof(int64_t) + ((int64_t*)curGeneralPtr)[3];
+                *generalByteOffset += 4 * sizeof(int64_t) + ((int64_t*)curGeneralPtr)[3];
+                *generalRowOffset += 1;
+                *totalGeneralRowCounter += 1;
+
+                if (*generalRowOffset == curGeneralNumRows && *generalIndexOffset < numGeneralPartitons - 1) {
+                    *generalIndexOffset += 1;
+                    *generalRowOffset = 0;
+                    *generalByteOffset = 0;
+                    curGeneralPtr = generalPartitions[*generalIndexOffset];
+                    curGeneralNumRows = *curGeneralPtr;
+                    curGeneralPtr += sizeof(int64_t);
+                }
+            }
+
+            while (*fallbackRowOffset < curFallbackNumRows && *((int64_t*)curFallbackPtr) < curNormalRowInd + *totalGeneralRowCounter + *totalFallbackRowCounter) {
+                *curFallbackPtr -= *totalFilterCounter;
+                curFallbackPtr += 4 * sizeof(int64_t) + ((int64_t*)curFallbackPtr)[3];
+                *fallbackByteOffset += 4 * sizeof(int64_t) + ((int64_t*)curFallbackPtr)[3];
+                *fallbackRowOffset += 1;
+                *totalFallbackRowCounter += 1;
+
+                if (*fallbackRowOffset == curFallbackNumRows && *fallbackIndexOffset < numFallbackPartitons - 1) {
+                    *fallbackIndexOffset += 1;
+                    *fallbackRowOffset = 0;
+                    *fallbackByteOffset = 0;
+                    curFallbackPtr = fallbackPartitions[*fallbackIndexOffset];
+                    curFallbackNumRows = *curFallbackPtr;
+                    curFallbackPtr += sizeof(int64_t);
+                }
+            }
+
+            *totalFilterCounter += 1;
+        }
+    }
+    *totalNormalRowCounter += numNormalRows;
+}
+
+void processPartitions(filter_t filter, int64_t** normalPartitions, int64_t numNormalPartitions, uint8_t** generalPartitions, int64_t numGeneralPartitions, uint8_t** fallbackPartitions, int64_t numFallbackPartitions) {
+    int64_t totalNormalRowCounter = 0;
+    int64_t totalGeneralRowCounter = 0;
+    int64_t totalFallbackRowCounter = 0;
+    int64_t totalFilterCounter = 0;
+
+    int64_t generalIndexOffset = 0;
+    int64_t generalByteOffset = 0;
+    int64_t generalRowOffset = 0;
+    int64_t fallbackIndexOffset = 0;
+    int64_t fallbackByteOffset = 0;
+    int64_t fallbackRowOffset = 0;
+    for (int i = 0; i < numNormalPartitions; ++i) {
+        processPartition(filter, normalPartitions[i],
+                         &totalFilterCounter, &totalNormalRowCounter, &totalGeneralRowCounter, &totalFallbackRowCounter,
+                         generalPartitions, numGeneralPartitions, &generalIndexOffset, &generalRowOffset, &generalByteOffset,
+                         fallbackPartitions, numFallbackPartitions, &fallbackIndexOffset, &fallbackRowOffset, &fallbackByteOffset);
+    }
+
+    if (generalIndexOffset < numGeneralPartitions) {
+        auto curGeneralPtr = generalPartitions[generalIndexOffset];
+        auto numRowsInPartition = *((int64_t*)curGeneralPtr);
+        curGeneralPtr += sizeof(int64_t) + generalByteOffset;
+        while (generalRowOffset < numRowsInPartition) {
+            *((int64_t*)curGeneralPtr) -= totalFilterCounter;
+            curGeneralPtr += 4 * sizeof(int64_t) + ((int64_t*)curGeneralPtr)[3];
+            generalRowOffset += 1;
+
+            if (generalRowOffset == numRowsInPartition && generalIndexOffset < numGeneralPartitions - 1) {
+                generalIndexOffset += 1;
+                curGeneralPtr = generalPartitions[generalIndexOffset];
+                numRowsInPartition = *((int64_t*)curGeneralPtr);
+                curGeneralPtr += sizeof(int64_t);
+                generalByteOffset = 0;
+                generalRowOffset = 0;
+            }
+        }
+    }
+
+    if (fallbackIndexOffset < numFallbackPartitions) {
+        auto curFallbackPtr = fallbackPartitions[fallbackIndexOffset];
+        auto numRowsInPartition = *((int64_t*)curFallbackPtr);
+        curFallbackPtr += sizeof(int64_t) + fallbackByteOffset;
+        while (fallbackRowOffset < numRowsInPartition) {
+            *((int64_t*)curFallbackPtr) -= totalFilterCounter;
+            curFallbackPtr += 4 * sizeof(int64_t) + ((int64_t*)curFallbackPtr)[3];
+            fallbackRowOffset += 1;
+
+            if (fallbackRowOffset == numRowsInPartition && fallbackIndexOffset < numFallbackPartitions - 1) {
+                fallbackIndexOffset += 1;
+                curFallbackPtr = fallbackPartitions[fallbackIndexOffset];
+                numRowsInPartition = *((int64_t*)curFallbackPtr);
+                curFallbackPtr += sizeof(int64_t);
+                fallbackByteOffset = 0;
+                fallbackRowOffset = 0;
+            }
+        }
+    }
+}
+
+bool filter2(int64_t row) {
+    return row % 3 == 0;
+}
+
+TEST_F(ExceptionsTest, Algo) {
+    int64_t n1[] = {15, 1, 2, 3, 5, 6, 7, 9, 10, 11, 13, 14, 15, 17, 18, 19};
+    int64_t n2[] = {3, 21, 22, 23};
+    int64_t *normalPartitions[] = {n1, n2};
+    int64_t numNormalPartitions = 2;
+
+    int64_t g1[] = {4,
+                    0, -1, -1, 8, -1,
+                    4, -1, -1, 8, -1,
+                    8, -1, -1, 8, -1,
+                    12, -1, -1, 8, -1};
+    int64_t g2[] = {3,
+                    16, -1, -1, 8, -1,
+                    20, -1, -1, 8, -1,
+                    24, -1, -1, 8, -1};
+    uint8_t *generalPartitions[] = {(uint8_t*)g1, (uint8_t*)g2};
+    int64_t numGeneralPartitions = 2;
+
+    uint8_t *fallbackPartitions[] = {};
+    int64_t numFallbackPartitions = 0;
+
+    processPartitions(filter2, normalPartitions, numNormalPartitions, generalPartitions, numGeneralPartitions, fallbackPartitions, numFallbackPartitions);
+
+
+    std::cout << "Done";
+}
+
+
+bool filter1(int64_t row) {
+    return true;
+}
+
+TEST_F(ExceptionsTest, ProcessDebug) {
+    int64_t n1[] = {2, 1, 2};
+    int64_t *normalPartitions[] = {n1};
+    int64_t numNormalPartitions = 1;
+
+    int64_t g1[] = {3,
+                    1, -1, -1, 8, -1,
+                    2, -1, -1, 8, -1,
+                    3, -1, -1, 8, -1};
+    uint8_t *generalPartitions[] = {(uint8_t*)g1};
+    int64_t numGeneralPartitions = 1;
+
+    uint8_t *fallbackPartitions[] = {};
+    int64_t numFallbackPartitions = 0;
+
+    processPartitions(filter1, normalPartitions, numNormalPartitions, generalPartitions, numGeneralPartitions, fallbackPartitions, numFallbackPartitions);
+}
\ No newline at end of file
diff --git a/tuplex/test/core/IncrementalTest.cc b/tuplex/test/core/IncrementalTest.cc
new file mode 100644
index 000000000..dd9a92656
--- /dev/null
+++ b/tuplex/test/core/IncrementalTest.cc
@@ -0,0 +1,566 @@
+//--------------------------------------------------------------------------------------------------------------------//
+//                                                                                                                    //
+//                                      Tuplex: Blazing Fast Python Data Science                                      //
+//                                                                                                                    //
+//                                                                                                                    //
+//  (c) 2017 - 2021, Tuplex team                                                                                      //
+//  Created by Benjamin Givertz first on 1/1/2021                                                                     //
+//  License: Apache 2.0                                                                                               //
+//--------------------------------------------------------------------------------------------------------------------//
+
+#include <gtest/gtest.h>
+#include <Context.h>
+#include "TestUtils.h"
+
+class IncrementalTest : public PyTest {
+protected:
+
+    void SetUp() override {
+        PyTest::SetUp();
+
+        using namespace tuplex;
+        auto vfs = VirtualFileSystem::fromURI(".");
+        vfs.remove(testName);
+        auto err = vfs.create_dir(testName);
+        ASSERT_TRUE(err == VirtualFileSystemStatus::VFS_OK);
+    }
+
+    void TearDown() override {
+        PyTest::TearDown();
+
+        using namespace tuplex;
+        auto vfs = VirtualFileSystem::fromURI(".");
+        vfs.remove(testName);
+    }
+};
+
+TEST_F(IncrementalTest, CommitMode) {
+    using namespace tuplex;
+    using namespace std;
+
+    auto opts = microTestOptions();
+    opts.set("tuplex.optimizer.mergeExceptionsInOrder", "true");
+    opts.set("tuplex.optimizer.incrementalResolution", "true");
+    Context c(opts);
+
+    auto outputURI = URI(testName + "/" + testName + ".csv");
+
+    auto csvops = defaultCSVOutputOptions();
+    csvops["commit"] = boolToString(false);
+
+    c.parallelize({Row(1), Row(-1), Row(2), Row(-2), Row(3), Row(-3)})
+        .map(UDF("lambda x: 1 // (x - x) if x == -1 else x"))
+        .map(UDF("lambda x: 1 // (x - x) if x == -2 else x"))
+        .map(UDF("lambda x: 1 // (x - x) if x == -3 else x"))
+        .tocsv(outputURI, csvops);
+
+    c.parallelize({Row(1), Row(-1), Row(2), Row(-2), Row(3), Row(-3)})
+            .map(UDF("lambda x: 1 // (x - x) if x == -1 else x"))
+            .resolve(ExceptionCode::ZERODIVISIONERROR, UDF("lambda x: x"))
+            .map(UDF("lambda x: 1 // (x - x) if x == -2 else x"))
+            .map(UDF("lambda x: 1 // (x - x) if x == -3 else x"))
+            .tocsv(outputURI, csvops);
+
+    c.parallelize({Row(1), Row(-1), Row(2), Row(-2), Row(3), Row(-3)})
+            .map(UDF("lambda x: 1 // (x - x) if x == -1 else x"))
+            .resolve(ExceptionCode::ZERODIVISIONERROR, UDF("lambda x: x"))
+            .map(UDF("lambda x: 1 // (x - x) if x == -2 else x"))
+            .resolve(ExceptionCode::ZERODIVISIONERROR, UDF("lambda x: x"))
+            .map(UDF("lambda x: 1 // (x - x) if x == -3 else x"))
+            .tocsv(outputURI, csvops);
+
+    csvops["commit"] = boolToString(true);
+
+    c.parallelize({Row(1), Row(-1), Row(2), Row(-2), Row(3), Row(-3)})
+            .map(UDF("lambda x: 1 // (x - x) if x == -1 else x"))
+            .resolve(ExceptionCode::ZERODIVISIONERROR, UDF("lambda x: x"))
+            .map(UDF("lambda x: 1 // (x - x) if x == -2 else x"))
+            .resolve(ExceptionCode::ZERODIVISIONERROR, UDF("lambda x: x"))
+            .map(UDF("lambda x: 1 // (x - x) if x == -3 else x"))
+            .resolve(ExceptionCode::ZERODIVISIONERROR, UDF("lambda x: x"))
+            .tocsv(outputURI, csvops);
+}
+
+void testIncrementalNoMerge(tuplex::ContextOptions opts, tuplex::URI fileURI, size_t numRows, float general, float fallback, float exception) {
+    using namespace tuplex;
+    using namespace std;
+
+    opts.set("tuplex.executorCount", "4");
+    opts.set("tuplex.optimizer.mergeExceptionsInOrder", "false");
+    opts.set("tuplex.optimizer.incrementalResolution", "true");
+    opts.set("tuplex.optimizer.nullValueOptimization", "true");
+    opts.set("tuplex.normalcaseThreshold", "0.6");
+    opts.set("tuplex.resolveWithInterpreterOnly", "true");
+    opts.set("tuplex.useLLVMOptimizer", "true");
+    Context c(opts);
+
+    vector<int> inputRows;
+    vector<int> inputRowInds;
+    inputRows.reserve(numRows);
+    inputRowInds.reserve(numRows);
+    for (int i = 0; i < numRows; ++i) {
+        inputRows.push_back(i + 1);
+        inputRowInds.push_back(i);
+    }
+
+    std::random_shuffle(inputRowInds.begin(), inputRowInds.end());
+    int counter = 0;
+    for (int i = 0; i < (int) (general * numRows); ++i) {
+        inputRows[inputRowInds[counter]] = -1;
+        counter++;
+    }
+    for (int i = 0; i < (int) (fallback * numRows); ++i) {
+        inputRows[inputRowInds[counter]] = -2;
+        counter++;
+    }
+    for (int i = 0; i < (int) (exception * numRows); ++i) {
+        inputRows[inputRowInds[counter]] = -3;
+        counter++;
+    }
+
+    stringstream ss;
+    for (int i = 0; i < numRows; ++i) {
+        ss << "1,";
+        if (inputRows[i] != -1) {
+            ss << to_string(inputRows[i]);
+        }
+        ss << "\n";
+    }
+    stringToFile(fileURI.toPath(), ss.str());
+
+    auto udf = "def udf(x, y):\n"
+               "    if y == -2:\n"
+               "        return y ** 0.5\n"
+               "    elif y == -1:\n"
+               "        raise ValueError\n"
+               "    else:\n"
+               "        return float(y)";
+
+    auto &ds_cached = c.csv(fileURI.toPath()).cache().map(UDF(udf)).cache();
+}
+
+TEST_F(IncrementalTest, NoMergeFallback) {
+    using namespace tuplex;
+    testIncrementalNoMerge(microTestOptions(), URI(testName + ".csv"), 100, 0.25, 0.25, 0.25);
+}
+
+void executeZillow(tuplex::Context &context, const tuplex::URI& outputURI, int step, bool commit) {
+        using namespace tuplex;
+
+        auto extractBd = "def extractBd(x):\n"
+                         "    val = x['facts and features']\n"
+                         "    max_idx = val.find(' bd')\n"
+                         "    if max_idx < 0:\n"
+                         "        max_idx = len(val)\n"
+                         "    s = val[:max_idx]\n"
+                         "\n"
+                         "    # find comma before\n"
+                         "    split_idx = s.rfind(',')\n"
+                         "    if split_idx < 0:\n"
+                         "        split_idx = 0\n"
+                         "    else:\n"
+                         "        split_idx += 2\n"
+                         "    r = s[split_idx:]\n"
+                         "    return int(r)";
+
+        auto extractType = "def extractType(x):\n"
+                           "    t = x['title'].lower()\n"
+                           "    type = 'unknown'\n"
+                           "    if 'condo' in t or 'apartment' in t:\n"
+                           "        type = 'condo'\n"
+                           "    if 'house' in t:\n"
+                           "        type = 'house'\n"
+                           "    return type";
+
+        auto extractBa = "def extractBa(x):\n"
+                         "    val = x['facts and features']\n"
+                         "    max_idx = val.find(' ba')\n"
+                         "    if max_idx < 0:\n"
+                         "        max_idx = len(val)\n"
+                         "    s = val[:max_idx]\n"
+                         "\n"
+                         "    # find comma before\n"
+                         "    split_idx = s.rfind(',')\n"
+                         "    if split_idx < 0:\n"
+                         "        split_idx = 0\n"
+                         "    else:\n"
+                         "        split_idx += 2\n"
+                         "    r = s[split_idx:]\n"
+                         "    return int(r)";
+
+        auto extractSqft = "def extractSqft(x):\n"
+                           "    val = x['facts and features']\n"
+                           "    max_idx = val.find(' sqft')\n"
+                           "    if max_idx < 0:\n"
+                           "        max_idx = len(val)\n"
+                           "    s = val[:max_idx]\n"
+                           "\n"
+                           "    split_idx = s.rfind('ba ,')\n"
+                           "    if split_idx < 0:\n"
+                           "        split_idx = 0\n"
+                           "    else:\n"
+                           "        split_idx += 5\n"
+                           "    r = s[split_idx:]\n"
+                           "    r = r.replace(',', '')\n"
+                           "    return int(r)";
+
+        auto extractPrice = "def extractPrice(x):\n"
+                            "    price = x['price']\n"
+                            "    p = 0\n"
+                            "    if x['offer'] == 'sold':\n"
+                            "        # price is to be calculated using price/sqft * sqft\n"
+                            "        val = x['facts and features']\n"
+                            "        s = val[val.find('Price/sqft:') + len('Price/sqft:') + 1:]\n"
+                            "        r = s[s.find('$')+1:s.find(', ') - 1]\n"
+                            "        price_per_sqft = int(r)\n"
+                            "        p = price_per_sqft * x['sqft']\n"
+                            "    elif x['offer'] == 'rent':\n"
+                            "        max_idx = price.rfind('/')\n"
+                            "        p = int(price[1:max_idx].replace(',', ''))\n"
+                            "    else:\n"
+                            "        # take price from price column\n"
+                            "        p = int(price[1:].replace(',', ''))\n"
+                            "\n"
+                            "    return p";
+        auto extractOffer = "def extractOffer(x):\n"
+                            "    offer = x['title'].lower()\n"
+                            "\n"
+                            "    if 'sale' in offer:\n"
+                            "        offer = 'sale'\n"
+                            "    elif 'rent' in offer:\n"
+                            "        offer = 'rent'\n"
+                            "    elif 'sold' in offer:\n"
+                            "        offer = 'sold'\n"
+                            "    elif 'foreclos' in offer.lower():\n"
+                            "        offer = 'foreclosed'\n"
+                            "    else:\n"
+                            "        offer = 'unknown'\n"
+                            "\n"
+                            "    return offer";
+
+        auto resolveBd = "def resolveBd(x):\n"
+                         "    if 'Studio' in x['facts and features']:\n"
+                         "        return 1\n"
+                         "    raise ValueError\n";
+
+        auto csvops = defaultCSVOutputOptions();
+        csvops["commit"] = boolToString(commit);
+        std::vector<std::string> columnNames({"url", "zipcode", "address", "city", "state", "bedrooms", "bathrooms", "sqft", "offer", "type", "price"});
+
+        auto &ds = context.csv("../../../../benchmarks/incremental/data/zillow_dirty.csv");
+        ds = ds.withColumn("bedrooms", UDF(extractBd));
+        if (step > 0)
+            ds = ds.resolve(ExceptionCode::VALUEERROR, UDF(resolveBd));
+        if (step > 1)
+            ds = ds.ignore(ExceptionCode::VALUEERROR);
+        ds = ds.filter(UDF("lambda x: x ['bedrooms'] < 10"));
+        ds = ds.withColumn("type", UDF(extractType));
+        ds = ds.filter(UDF("lambda x: x['type'] == 'condo'"));
+        ds = ds.withColumn("zipcode", UDF("lambda x: '%05d' % int(x['postal_code'])"));
+        if (step > 2)
+            ds = ds.ignore(ExceptionCode::TYPEERROR);
+        ds = ds.mapColumn("city", UDF("lambda x: x[0].upper() + x[1:].lower()"));
+        ds = ds.withColumn("bathrooms", UDF(extractBa));
+        if (step > 3)
+            ds = ds.ignore(ExceptionCode::VALUEERROR);
+        ds = ds.withColumn("sqft", UDF(extractSqft));
+        if (step > 4)
+            ds = ds.ignore(ExceptionCode::VALUEERROR);
+        ds = ds.withColumn("offer", UDF(extractOffer));
+        ds = ds.withColumn("price", UDF(extractPrice));
+        if (step > 5)
+            ds = ds.resolve(ExceptionCode::VALUEERROR, UDF("lambda x: int(100020)"));
+        ds = ds.filter(UDF("lambda x: 100000 < x['price'] < 2e7 and x['offer'] == 'sale'"));
+        ds = ds.selectColumns(columnNames);
+        ds.tocsv(outputURI, csvops);
+}
+
+TEST_F(IncrementalTest, DirtyZilow) {
+    using namespace tuplex;
+    using namespace std;
+
+    auto opts = testOptions();
+    opts.set("tuplex.executorCount", "0");
+    opts.set("tuplex.executorMemory", "2G");
+    opts.set("tuplex.driverMemory", "2G");
+    opts.set("tuplex.partitionSize", "32MB");
+    opts.set("tuplex.resolveWithInterpreterOnly", "false");
+    opts.set("tuplex.optimizer.incrementalResolution", "true");
+    opts.set("tuplex.optimizer.mergeExceptionsInOrder", "false");
+    Context incrementalContext(opts);
+    opts.set("tuplex.optimizer.incrementalResolution", "false");
+    Context plainContext(opts);
+
+    for (int step = 0; step < 7; ++step) {
+        executeZillow(incrementalContext, testName + "/incremental.csv", step, true);
+//        executeZillow(plainContext, testName + "/plain.csv", step);
+    }
+
+//    std::vector<std::string> incrementalRows;
+//    auto incrementalResult = plainContext.csv(testName + "/incremental.*.csv").collect();
+//    while (incrementalResult->hasNextRow())
+//        incrementalRows.push_back(incrementalResult->getNextRow().toPythonString());
+//
+//    std::vector<std::string> plainRows;
+//    auto plainResult = plainContext.csv(testName + "/plain.*.csv").collect();
+//    while (plainResult->hasNextRow())
+//        plainRows.push_back(plainResult->getNextRow().toPythonString());
+//
+//    ASSERT_EQ(incrementalRows.size(), plainRows.size());
+//    for (int i = 0; i < plainRows.size(); ++i)
+//        ASSERT_EQ(incrementalRows[i], plainRows[i]);
+}
+
+TEST_F(IncrementalTest, FileOutput) {
+    using namespace tuplex;
+    using namespace std;
+
+    auto opts = microTestOptions();
+    opts.set("tuplex.executorCount", "0");
+    opts.set("tuplex.optimizer.incrementalResolution", "true");
+    opts.set("tuplex.optimizer.mergeExceptionsInOrder", "false");
+    Context c(opts);
+
+    auto numRows = 50;
+    auto amountExps = 0.25;
+    std::vector<Row> inputRows;
+    inputRows.reserve(numRows);
+    std::unordered_multiset<std::string> expectedOutput1;
+    expectedOutput1.reserve((int) (numRows * amountExps));
+    std::unordered_multiset<std::string> expectedOutput2;
+    expectedOutput2.reserve(numRows);
+
+    auto inputFileURI = URI(testName + "/in.csv");
+    auto fileURI = URI(testName + "/out.csv");
+    auto outputFileURI = URI(testName + "/out.*.csv");
+
+    std::stringstream ss;
+    for (int i = 0; i < numRows; ++i) {
+        if (i % (int) (1 / amountExps) == 0) {
+            ss << "0\n";
+            expectedOutput2.insert(Row(-1).toPythonString());
+        } else {
+            ss << to_string(i) << "\n";
+            expectedOutput1.insert(Row(i).toPythonString());
+            expectedOutput2.insert(Row(i).toPythonString());
+        }
+    }
+    stringToFile(inputFileURI, ss.str());
+
+    c.csv(inputFileURI.toPath()).map(UDF("lambda x: 1 // x if x == 0 else x")).tocsv(fileURI.toPath());
+    auto output1 = c.csv(outputFileURI.toPath()).collectAsVector();
+    ASSERT_EQ(output1.size(), expectedOutput1.size());
+    for (const auto &row : output1) {
+        ASSERT_TRUE(expectedOutput1.find(row.toPythonString()) != expectedOutput1.end());
+    }
+
+    c.csv(inputFileURI.toPath()).map(UDF("lambda x: 1 // x if x == 0 else x")).resolve(ExceptionCode::ZERODIVISIONERROR, UDF("lambda x: -1")).tocsv(fileURI.toPath());
+    auto output2 = c.csv(outputFileURI.toPath()).collectAsVector();
+    ASSERT_EQ(output2.size(), expectedOutput2.size());
+    for (const auto &row : output2) {
+        ASSERT_TRUE(expectedOutput2.find(row.toPythonString()) != expectedOutput2.end());
+    }
+}
+
+TEST_F(IncrementalTest, FileOutputInOrder) {
+    using namespace tuplex;
+    using namespace std;
+
+    auto opts = microTestOptions();
+    opts.set("tuplex.executorCount", "0");
+    opts.set("tuplex.optimizer.incrementalResolution", "true");
+    opts.set("tuplex.optimizer.mergeExceptionsInOrder", "true");
+    Context c(opts);
+
+    auto numRows = 50;
+    auto amountExps = 0.25;
+    std::vector<Row> inputRows;
+    inputRows.reserve(numRows);
+    std::vector<std::string> expectedOutput1;
+    expectedOutput1.reserve((int) (numRows * amountExps));
+    std::vector<std::string> expectedOutput2;
+    expectedOutput2.reserve(numRows);
+
+    auto inputFileURI = URI(testName + "/in.csv");
+    auto fileURI = URI(testName + "/out.csv");
+    auto outputFileURI = URI(testName + "/out.*.csv");
+
+    std::stringstream ss;
+    for (int i = 0; i < numRows; ++i) {
+        if (i % (int) (1 / amountExps) == 0) {
+            ss << "0\n";
+            expectedOutput2.push_back(Row(-1).toPythonString());
+        } else {
+            ss << to_string(i) << "\n";
+            expectedOutput1.push_back(Row(i).toPythonString());
+            expectedOutput2.push_back(Row(i).toPythonString());
+        }
+    }
+    stringToFile(inputFileURI, ss.str());
+
+    c.csv(inputFileURI.toPath()).map(UDF("lambda x: 1 // x if x == 0 else x")).tocsv(fileURI.toPath());
+    auto output1 = c.csv(outputFileURI.toPath()).collectAsVector();
+    ASSERT_EQ(output1.size(), expectedOutput1.size());
+    for (int i = 0; i < expectedOutput1.size(); ++i) {
+        ASSERT_EQ(expectedOutput1[i], output1[i].toPythonString());
+    }
+
+    c.csv(inputFileURI.toPath()).map(UDF("lambda x: 1 // x if x == 0 else x")).resolve(ExceptionCode::ZERODIVISIONERROR, UDF("lambda x: -1")).tocsv(fileURI.toPath());
+    auto output2 = c.csv(outputFileURI.toPath()).collectAsVector();
+    ASSERT_EQ(output2.size(), expectedOutput2.size());
+    for (int i = 0; i < expectedOutput2.size(); ++i) {
+        ASSERT_EQ(expectedOutput2[i], output2[i].toPythonString());
+    }
+}
+
+TEST_F(IncrementalTest, DebugResolver) {
+    using namespace tuplex;
+    using namespace std;
+
+    auto opts = microTestOptions();
+    opts.set("tuplex.optimizer.incrementalResolution", "false");
+    opts.set("tuplex.optimizer.mergeExceptionsInOrder", "false");
+    Context c(opts);
+
+//    c.parallelize({Row(1), Row(0), Row(3)})
+//        .map(UDF("lambda x: 1 // x if x == 0 else x"))
+//        .tocsv(testName + "/out.csv");
+//
+    c.parallelize({Row(1), Row(0), Row(3)})
+        .map(UDF("lambda x: 1 // x if x == 0 else x"))
+        .resolve(ExceptionCode::ZERODIVISIONERROR, UDF("lambda x: 1 // x"))
+        .tocsv(testName + "/out.csv");
+
+    c.parallelize({Row(1), Row(0), Row(3)})
+            .map(UDF("lambda x: 1 // x if x == 0 else x"))
+            .resolve(ExceptionCode::ZERODIVISIONERROR, UDF("lambda x: 1 // x"))
+            .ignore(ExceptionCode::ZERODIVISIONERROR)
+            .tocsv(testName + "/out.csv");
+}
+
+TEST_F(IncrementalTest, Filter) {
+    using namespace tuplex;
+    using namespace std;
+
+    auto opts = microTestOptions();
+    opts.set("tuplex.executorCount", "2");
+    opts.set("tuplex.optimizer.incrementalResolution", "true");
+    opts.set("tuplex.optimizer.mergeExceptionsInOrder", "true");
+    opts.set("tuplex.resolveWithInterpreterOnly", "false");
+    Context c(opts);
+
+    auto inputFileURI = URI(testName + "/in.csv");
+    auto fileURI = URI(testName + "/out.csv");
+    auto outputFileURI = URI(testName + "/out.*.csv");
+
+    std::vector<Row> expectedOutput1;
+    std::vector<Row> expectedOutput2;
+    std::stringstream ss;
+    for (int i = 0; i < 100000; ++i) {
+        auto num = rand()%4;
+        switch (num) {
+            case 0: {
+                ss << to_string(i) << "\n";
+                expectedOutput1.push_back(Row(i));
+                expectedOutput2.push_back(Row(i));
+                break;
+            }
+            case 1: {
+                ss << "-1\n";
+                break;
+            }
+            case 2: {
+                ss << "-2\n";
+                expectedOutput2.push_back(Row(-2));
+                break;
+            }
+            case 3: {
+                ss << "0\n";
+                break;
+            }
+        }
+    }
+    stringToFile(inputFileURI, ss.str());
+
+    c.csv(inputFileURI.toPath()).map(UDF("lambda x: 1 // (x - x) if x < 0 else x")).filter(UDF("lambda x: x != 0")).tocsv(fileURI.toPath());
+    auto output1 = c.csv(outputFileURI.toPath()).collectAsVector();
+    ASSERT_EQ(output1.size(), expectedOutput1.size());
+    for (int i = 0; i < expectedOutput1.size(); ++i) {
+        ASSERT_EQ(expectedOutput1[i].toPythonString(), output1[i].toPythonString());
+    }
+
+    c.csv(inputFileURI.toPath()).map(UDF("lambda x: 1 // (x - x) if x < 0 else x")).resolve(ExceptionCode::ZERODIVISIONERROR, UDF("lambda x: 1 // (x - x) if x == -1 else x")).filter(UDF("lambda x: x != 0")).tocsv(fileURI.toPath());
+    auto output2 = c.csv(outputFileURI.toPath()).collectAsVector();
+    ASSERT_EQ(output2.size(), expectedOutput2.size());
+    for (int i = 0; i < expectedOutput2.size(); ++i) {
+        ASSERT_EQ(expectedOutput2[i].toPythonString(), output2[i].toPythonString());
+    }
+}
+
+TEST_F(IncrementalTest, FileOutput2) {
+    using namespace tuplex;
+    using namespace std;
+
+    auto opts = microTestOptions();
+    opts.set("tuplex.resolverWithInterpreterOnly", "false");
+    opts.set("tuplex.optimizer.incrementalResolution", "true");
+    opts.set("tuplex.optimizer.mergeExceptionsInOrder", "true");
+    Context c(opts);
+
+    auto numRows = 10000;
+    auto amountExps = 0.25;
+    std::vector<Row> inputRows;
+    inputRows.reserve(numRows);
+    std::unordered_multiset<std::string> expectedOutput1;
+    expectedOutput1.reserve((int) (numRows * amountExps));
+    std::unordered_multiset<std::string> expectedOutput2;
+    expectedOutput2.reserve(numRows);
+
+    auto inputFileURI = URI(testName + "/in.csv");
+    auto fileURI = URI(testName + "/out.csv");
+    auto outputFileURI = URI(testName + "/out.*.csv");
+
+    std::stringstream ss;
+    for (int i = 0; i < numRows; ++i) {
+        if (i % (int) (1 / amountExps) == 0) {
+            ss << "0\n";
+            expectedOutput2.insert(Row(-1).toPythonString());
+        } else {
+            ss << to_string(i) << "\n";
+            expectedOutput1.insert(Row(i).toPythonString());
+            expectedOutput2.insert(Row(i).toPythonString());
+        }
+    }
+    stringToFile(inputFileURI, ss.str());
+
+    c.csv(inputFileURI.toPath()).map(UDF("lambda x: 1 // x if x == 0 else x"))
+        .map(UDF("lambda x: 1 // x if x == 0 else x"))
+        .tocsv(fileURI.toPath());
+    auto output1 = c.csv(outputFileURI.toPath()).collectAsVector();
+    ASSERT_EQ(output1.size(), expectedOutput1.size());
+    for (const auto &row : output1) {
+        ASSERT_TRUE(expectedOutput1.find(row.toPythonString()) != expectedOutput1.end());
+    }
+
+    c.csv(inputFileURI.toPath()).map(UDF("lambda x: 1 // x if x == 0 else x"))
+            .resolve(ExceptionCode::ZERODIVISIONERROR, UDF("lambda x: 0"))
+            .map(UDF("lambda x: 1 // x if x == 0 else x"))
+            .tocsv(fileURI.toPath());
+    auto output2 = c.csv(outputFileURI.toPath()).collectAsVector();
+    ASSERT_EQ(output2.size(), expectedOutput1.size());
+    for (const auto &row : output2) {
+        ASSERT_TRUE(expectedOutput1.find(row.toPythonString()) != expectedOutput1.end());
+    }
+
+    c.csv(inputFileURI.toPath())
+        .map(UDF("lambda x: 1 // x if x == 0 else x"))
+        .resolve(ExceptionCode::ZERODIVISIONERROR, UDF("lambda x: 0"))
+        .map(UDF("lambda x: 1 // x if x == 0 else x"))
+        .ignore(ExceptionCode::ZERODIVISIONERROR)
+        .tocsv(fileURI.toPath());
+    auto output3 = c.csv(outputFileURI.toPath()).collectAsVector();
+    ASSERT_EQ(output3.size(), expectedOutput1.size());
+    for (const auto &row : output3) {
+        ASSERT_TRUE(expectedOutput2.find(row.toPythonString()) != expectedOutput1.end());
+    }
+}
\ No newline at end of file
diff --git a/tuplex/test/core/ResultSetTest.cc b/tuplex/test/core/ResultSetTest.cc
index 4acd38921..cffbc29a9 100644
--- a/tuplex/test/core/ResultSetTest.cc
+++ b/tuplex/test/core/ResultSetTest.cc
@@ -51,6 +51,57 @@ class ResultSetTest : public PyTest {
         return pw.getOutputPartitions();
     }
 
+    std::vector<tuplex::Partition*> pyObjectsToPartitions(const std::vector<std::tuple<size_t, PyObject*>>& pyObjects) {
+        using namespace tuplex;
+
+        std::vector<Partition*> partitions;
+        if (pyObjects.empty()) {
+            return partitions;
+        }
+
+        Schema schema(Schema::MemoryLayout::ROW, python::Type::makeTupleType({python::Type::STRING}));
+        Partition* partition = allocPartition(schema.getRowType(), -1);
+        auto rawPtr = (int64_t*)partition->lockWriteRaw();
+        *rawPtr = 0;
+        auto ptr = (uint8_t*)(rawPtr + 1);
+        size_t numBytesSerialized = 0;
+
+        python::lockGIL();
+        for (auto &row: pyObjects) {
+            auto rowNum = std::get<0>(row);
+            auto pyObj = std::get<1>(row);
+            auto ecCode = -1;
+            auto opID = -1;
+            auto pickledObject = python::pickleObject(python::getMainModule(), pyObj);
+            auto pickledObjectSize = pickledObject.size();
+            size_t requiredBytes = sizeof(int64_t) * 4 + pickledObjectSize;
+
+            if (partition->capacity() < numBytesSerialized + requiredBytes) {
+                partition->unlockWrite();
+                partitions.push_back(partition);
+                partition = allocPartition(schema.getRowType(), -1);
+                rawPtr = (int64_t *) partition->lockWriteRaw();
+                *rawPtr = 0;
+                ptr = (uint8_t*)(rawPtr + 1);
+                numBytesSerialized = 0;
+            }
+
+            *((int64_t*)ptr) = rowNum; ptr += sizeof(int64_t);
+            *((int64_t*)ptr) = ecCode; ptr += sizeof(int64_t);
+            *((int64_t*)ptr) = opID; ptr += sizeof(int64_t);
+            *((int64_t*)ptr) = pickledObjectSize; ptr += sizeof(int64_t);
+            memcpy(ptr, pickledObject.c_str(), pickledObjectSize); ptr += pickledObjectSize;
+
+            *rawPtr += 1;
+            numBytesSerialized += requiredBytes;
+        }
+        python::unlockGIL();
+
+        partition->unlockWrite();
+        partitions.push_back(partition);
+
+        return partitions;
+    }
 };
 
 TEST_F(ResultSetTest, NoPyObjects) {
@@ -68,10 +119,13 @@ TEST_F(ResultSetTest, NoPyObjects) {
         sample_rows.push_back(Row(rand() % 256, rand() % 256 * 0.1 - 1.0, strs[rand() % strs.size()]));
     }
     auto partitions = rowsToPartitions(sample_rows);
-    for(auto p : partitions)
-        p->makeImmortal();
+    std::vector<PartitionGroup> partitionGroups;
+    for(int i = 0; i < partitions.size(); ++i) {
+        partitions[i]->makeImmortal();
+        partitionGroups.push_back(PartitionGroup(1, i));
+    }
 
-    auto rsA = make_shared<ResultSet>(Schema(Schema::MemoryLayout::ROW, sample_rows.front().getRowType()), partitions);
+    auto rsA = make_shared<ResultSet>(Schema(Schema::MemoryLayout::ROW, sample_rows.front().getRowType()), partitions, std::vector<Partition*>{}, std::vector<Partition*>{}, partitionGroups);
     EXPECT_EQ(rsA->rowCount(), sample_rows.size());
 
     // check correct order returned
@@ -79,13 +133,14 @@ TEST_F(ResultSetTest, NoPyObjects) {
     while(rsA->hasNextRow()) {
         EXPECT_EQ(rsA->getNextRow().toPythonString(), sample_rows[pos++].toPythonString());
     }
+    EXPECT_EQ(pos, sample_rows.size());
 
     // now limit result set to 17 rows, check this works as well!
     int Nlimit = 17;
     auto rsB = make_shared<ResultSet>(Schema(Schema::MemoryLayout::ROW, sample_rows.front().getRowType()), partitions,
                                       std::vector<Partition*>{},
-                                      std::unordered_map<std::string, ExceptionInfo>(),
-                                      vector<tuple<size_t, PyObject*>>{},
+                                      std::vector<Partition*>{},
+                                      partitionGroups,
                                       Nlimit);
     pos = 0;
     while(rsB->hasNextRow()) {
@@ -137,13 +192,15 @@ TEST_F(ResultSetTest, WithPyObjects) {
     vector<Row> refC = {Row(10), Row(20), Row(30), Row(35), Row(37)};
     vector<Row> refD = {Row(-1), Row(0), Row(1)};
 
+    auto partitionGroups = std::vector<PartitionGroup>{PartitionGroup(1,0,0,0,1,0)};
+
     // TEST A:
     // -----------------
     auto rsA = make_shared<ResultSet>(Schema(Schema::MemoryLayout::ROW, rows.front().getRowType()),
                                       partitions,
                                       std::vector<Partition*>{},
-                                      std::unordered_map<std::string, ExceptionInfo>(),
-                                      objsA);
+                                      pyObjectsToPartitions(objsA),
+                                      partitionGroups);
     EXPECT_EQ(rsA->rowCount(), objsA.size() + rows.size());
     pos = 0;
     while(rsA->hasNextRow()) {
@@ -156,8 +213,8 @@ TEST_F(ResultSetTest, WithPyObjects) {
     auto rsB = make_shared<ResultSet>(Schema(Schema::MemoryLayout::ROW, rows.front().getRowType()),
                                       partitions,
                                       std::vector<Partition*>{},
-                                      std::unordered_map<std::string, ExceptionInfo>(),
-                                      objsB);
+                                      pyObjectsToPartitions(objsB),
+                                      partitionGroups);
     EXPECT_EQ(rsB->rowCount(), objsB.size() + rows.size());
     pos = 0;
     while(rsB->hasNextRow()) {
@@ -171,8 +228,8 @@ TEST_F(ResultSetTest, WithPyObjects) {
     auto rsC = make_shared<ResultSet>(Schema(Schema::MemoryLayout::ROW, rows.front().getRowType()),
                                       partitions,
                                       std::vector<Partition*>{},
-                                      std::unordered_map<std::string, ExceptionInfo>(),
-                                      objsC);
+                                      pyObjectsToPartitions(objsC),
+                                      partitionGroups);
     EXPECT_EQ(rsC->rowCount(), objsC.size() + rows.size());
     pos = 0;
     while(rsC->hasNextRow()) {
@@ -180,6 +237,8 @@ TEST_F(ResultSetTest, WithPyObjects) {
         EXPECT_EQ(rsC->getNextRow().toPythonString(), refC[pos++].toPythonString());
     }
 
+    partitionGroups = std::vector<PartitionGroup>{PartitionGroup(0, 0, 0, 0, 1, 0)};
+
     // TEST D:
     // -------
     // only pyobjects.
@@ -188,8 +247,8 @@ TEST_F(ResultSetTest, WithPyObjects) {
     auto rsD = make_shared<ResultSet>(Schema(Schema::MemoryLayout::ROW, rows.front().getRowType()),
                                       std::vector<Partition*>{},
                                       std::vector<Partition*>{},
-                                      std::unordered_map<std::string, ExceptionInfo>(),
-                                      objsD);
+                                      pyObjectsToPartitions(objsD),
+                                      partitionGroups);
     EXPECT_EQ(rsD->rowCount(), objsD.size());
     pos = 0;
     while(rsD->hasNextRow()) {
diff --git a/tuplex/test/wrappers/WrapperTest.cc b/tuplex/test/wrappers/WrapperTest.cc
index ede9dd82d..40804ad41 100644
--- a/tuplex/test/wrappers/WrapperTest.cc
+++ b/tuplex/test/wrappers/WrapperTest.cc
@@ -17,59 +17,32 @@
 #include <CSVUtils.h>
 #include <VirtualFileSystem.h>
 #include <parser/Parser.h>
+#include "../core/TestUtils.h"
 
 #include <boost/filesystem/operations.hpp>
 
 // need for these tests a running python interpreter, so spin it up
-class WrapperTest : public ::testing::Test {
-protected:
-    std::string testName;
-    std::string scratchDir;
-
+class WrapperTest : public TuplexTest {
     void SetUp() override {
-        testName = std::string(::testing::UnitTest::GetInstance()->current_test_info()->test_case_name()) + std::string(::testing::UnitTest::GetInstance()->current_test_info()->name());
-        scratchDir = "/tmp/" + testName;
+        TuplexTest::SetUp();
 
         python::initInterpreter();
-
-        // hold GIL
         assert(python::holdsGIL());
+
+        using namespace tuplex;
+        auto vfs = VirtualFileSystem::fromURI(".");
+        vfs.remove(testName);
+        auto err = vfs.create_dir(testName);
+        ASSERT_TRUE(err == VirtualFileSystemStatus::VFS_OK);
     }
 
     void TearDown() override {
-
-        // important to get GIL for this
+        TuplexTest::TearDown();
         python::closeInterpreter();
-    }
-
-    inline void remove_temp_files() {
-        tuplex::Timer timer;
-        boost::filesystem::remove_all(scratchDir.c_str());
-        std::cout<<"removed temp files in "<<timer.time()<<"s"<<std::endl;
-    }
-
-    ~WrapperTest() override {
-        remove_temp_files();
-    }
 
-    inline std::string testOptions() {
         using namespace tuplex;
-        std::stringstream ss;
-        ss << "{";
-        ss << "\"tuplex.executorCount\": \"4\",";
-        ss << "\"tuplex.partitionSize\": \"512KB\",";
-        ss << "\"tuplex.executorMemory\": \"8MB\",";
-        ss << "\"tuplex.useLLVMOptimizer\": \"true\",";
-        ss << "\"tuplex.allowUndefinedBehavior\": \"false\",";
-        ss << "\"tuplex.webui.enable\": \"false\",";
-        ss << "\"tuplex.scratchDir\": \"file://" << scratchDir << "\",";
-#ifdef BUILD_FOR_CI
-        ss << "\"tuplex.aws.httpThreadCount\": \"0\"";
-#else
-        ss << "\"tuplex.aws.httpThreadCount\": \"1\"";
-#endif
-        ss << "}";
-        return ss.str();
+        auto vfs = VirtualFileSystem::fromURI(".");
+        vfs.remove(testName);
     }
 };
 
@@ -88,10 +61,46 @@ TEST_F(WrapperTest, LambdaBackend) {
 
 // Important detail: RAII of boost python requires call to all boost::python destructors before closing the interpreter.
 
+TEST_F(WrapperTest, BasicMergeInOrder) {
+    using namespace tuplex;
+
+    auto opts = microTestOptions();
+    opts.set("tuplex.optimizer.mergeExceptionsInOrder", "true");
+    PythonContext c("c", "", opts.asJSON());
+
+    auto listSize = 30000;
+    auto listObj = PyList_New(listSize);
+    auto expectedResult = PyList_New(listSize);
+    for (int i = 0; i < listSize; ++i) {
+        if (i % 3 == 0) {
+            PyList_SetItem(listObj, i, PyLong_FromLong(0));
+            PyList_SetItem(expectedResult, i, PyLong_FromLong(-1));
+        } else if (i % 5 == 0) {
+            PyList_SetItem(listObj, i, python::PyString_FromString(std::to_string(i).c_str()));
+            PyList_SetItem(expectedResult, i, python::PyString_FromString(std::to_string(i).c_str()));
+        } else {
+            PyList_SetItem(listObj, i, PyLong_FromLong(i));
+            PyList_SetItem(expectedResult, i, PyLong_FromLong(i));
+        }
+    }
+
+    {
+        auto list = py::reinterpret_borrow<py::list>(listObj);
+        auto res = c.parallelize(list).map("lambda x: 1 // x if x == 0 else x", "").resolve(ecToI64(ExceptionCode::ZERODIVISIONERROR), "lambda x: -1", "").collect();
+        auto resObj = res.ptr();
+
+        ASSERT_EQ(PyList_Size(resObj), PyList_Size(expectedResult));
+        for (int i = 0; i < PyList_Size(expectedResult); ++i) {
+            EXPECT_EQ(python::pythonToRow(PyList_GetItem(resObj, i)).toPythonString(), python::pythonToRow(
+                    PyList_GetItem(expectedResult, i)).toPythonString());
+        }
+    }
+}
+
 TEST_F(WrapperTest, StringTuple) {
     using namespace tuplex;
 
-    PythonContext c("c", "", testOptions());
+    PythonContext c("c", "", microTestOptions().asJSON());
 
     PyObject *listObj = PyList_New(4);
     PyObject *tupleObj1 = PyTuple_New(2);
@@ -131,7 +140,7 @@ TEST_F(WrapperTest, StringTuple) {
 TEST_F(WrapperTest, MixedSimpleTupleTuple) {
     using namespace tuplex;
 
-    PythonContext c("c", "", testOptions());
+    PythonContext c("c", "", microTestOptions().asJSON());
 
     PyObject *listObj = PyList_New(4);
     PyObject *tupleObj1 = PyTuple_New(2);
@@ -170,7 +179,7 @@ TEST_F(WrapperTest, MixedSimpleTupleTuple) {
 TEST_F(WrapperTest, StringParallelize) {
     using namespace tuplex;
 
-    PythonContext c("c", "", testOptions());
+    PythonContext c("c", "", microTestOptions().asJSON());
 
     PyObject * listObj = PyList_New(3);
     PyList_SET_ITEM(listObj, 0, python::PyString_FromString("Hello"));
@@ -194,7 +203,7 @@ TEST_F(WrapperTest, StringParallelize) {
 TEST_F(WrapperTest, DictionaryParallelize) {
     using namespace tuplex;
 
-    PythonContext c("c", "", testOptions());
+    PythonContext c("c", "", microTestOptions().asJSON());
 
     PyObject * dictObj1 = PyDict_New();
     PyDict_SetItem(dictObj1, python::PyString_FromString("a"), PyFloat_FromDouble(0.0));
@@ -243,7 +252,7 @@ TEST_F(WrapperTest, SimpleCSVParse) {
     PyDict_SetItemString(pyopt, "tuplex.webui.enable", Py_False);
 
     // RAII, destruct python context!
-    PythonContext c("c", "", testOptions());
+    PythonContext c("c", "", microTestOptions().asJSON());
 
     // weird block syntax due to RAII problems.
     {
@@ -274,7 +283,7 @@ TEST_F(WrapperTest, SimpleCSVParse) {
 TEST_F(WrapperTest, GetOptions) {
     using namespace tuplex;
 
-    PythonContext c("c", "", testOptions());
+    PythonContext c("c", "", microTestOptions().asJSON());
 
     // weird RAII problems of boost python
     {
@@ -290,8 +299,8 @@ TEST_F(WrapperTest, GetOptions) {
 TEST_F(WrapperTest, TwoContexts) {
     using namespace tuplex;
 
-    PythonContext c("", "", testOptions());
-    PythonContext c2("", "", testOptions());
+    PythonContext c("", "", microTestOptions().asJSON());
+    PythonContext c2("", "", microTestOptions().asJSON());
 
     {
         auto opt1 = c.options();
@@ -315,7 +324,7 @@ TEST_F(WrapperTest, Show) {
     PyDict_SetItemString(pyopt, "tuplex.webui.enable", Py_False);
 
     // RAII, destruct python context!
-    PythonContext c("python", "", testOptions());
+    PythonContext c("python", "", microTestOptions().asJSON());
 
     // weird block syntax due to RAII problems.
     {
@@ -340,7 +349,7 @@ TEST_F(WrapperTest, GoogleTrace) {
     PyDict_SetItemString(pyopt, "tuplex.webui.enable", Py_False);
 
     // RAII, destruct python context!
-    PythonContext c("python", "", testOptions());
+    PythonContext c("python", "", testOptions().asJSON());
     /// Based on Google trace data, this mini pipeline serves as CSV parsing test ground.
     ///  c.csv(file_path) \
     ///   .filter(lambda x: x[3] == 0) \
@@ -487,7 +496,7 @@ TEST_F(WrapperTest, extractPriceExample) {
     auto cols = py::reinterpret_borrow<py::list>(colObj);
 
     // RAII, destruct python context!
-    PythonContext c("python", "", testOptions());
+    PythonContext c("python", "", testOptions().asJSON());
 
     {
         // all calls go here...
@@ -587,7 +596,7 @@ TEST_F(WrapperTest, DictListParallelize) {
     using namespace tuplex;
 
     // RAII, destruct python context!
-    PythonContext c("python", "", testOptions());
+    PythonContext c("python", "", microTestOptions().asJSON());
 
     // weird block syntax due to RAII problems.
     {
@@ -624,9 +633,9 @@ TEST_F(WrapperTest, UpcastParallelizeI) {
     using namespace tuplex;
 
     // RAII, destruct python context!
-    auto opts = testOptions();
-    opts = opts.substr(0, opts.length() - 1) + ", \"tuplex.autoUpcast\":\"True\"}";
-    PythonContext c("python", "", opts);
+    auto opts = microTestOptions();
+    opts.set("tuplex.autoUpcast", "true");
+    PythonContext c("python", "", opts.asJSON());
 
     // weird block syntax due to RAII problems.
     {
@@ -656,9 +665,9 @@ TEST_F(WrapperTest, UpcastParallelizeII) {
     using namespace tuplex;
 
     // RAII, destruct python context!
-    auto opts = testOptions();
-    opts = opts.substr(0, opts.length() - 1) + ", \"tuplex.autoUpcast\":\"True\"}";
-    PythonContext c("python", "", opts);
+    auto opts = microTestOptions();
+    opts.set("tuplex.autoUpcast", "true");
+    PythonContext c("python", "", opts.asJSON());
 
     // weird block syntax due to RAII problems.
     {
@@ -692,9 +701,9 @@ TEST_F(WrapperTest, FilterAll) {
     using namespace tuplex;
 
     // RAII, destruct python context!
-    auto opts = testOptions();
-    opts = opts.substr(0, opts.length() - 1) + ",\"tuplex.autoUpcast\":\"True\"}";
-    PythonContext c("python", "", opts);
+    auto opts = microTestOptions();
+    opts.set("tuplex.autoUpcast", "true");
+    PythonContext c("python", "", opts.asJSON());
 
     // weird block syntax due to RAII problems.
     {
@@ -719,7 +728,7 @@ TEST_F(WrapperTest, ColumnNames) {
     using namespace tuplex;
 
     // RAII, destruct python context!
-    PythonContext c("python", "", testOptions());
+    PythonContext c("python", "", microTestOptions().asJSON());
 
     // weird block syntax due to RAII problems.
     {
@@ -781,9 +790,9 @@ TEST_F(WrapperTest, IntegerTuple) {
     PyDict_SetItemString(pyopt, "tuplex.autoUpcast", Py_True);
 
     // RAII, destruct python context!
-    auto opts = testOptions();
-    opts = opts.substr(0, opts.length() - 1) + ",\"tuplex.autoUpcast\":\"True\"}";
-    PythonContext c("python", "", opts);
+    auto opts = microTestOptions();
+    opts.set("tuplex.autoUpcast", "true");
+    PythonContext c("python", "", opts.asJSON());
 
     // weird block syntax due to RAII problems.
     {
@@ -838,8 +847,9 @@ TEST_F(WrapperTest, IfWithNull) {
 
     // RAII, destruct python context!
     auto opts = testOptions();
-    opts = opts.substr(0, opts.length() - 1) + ",\"tuplex.useLLVMOptimizer\" : \"False\", \"tuplex.executorCount\":0}";
-    PythonContext c("python", "", opts);
+    opts.set("tuplex.useLLVMOptimizer", "false");
+    opts.set("tuplex.executorCount", "0");
+    PythonContext c("python", "", opts.asJSON());
     // execute mini part of pipeline and output csv to file
     // pipeline is
     // df = ctx.csv(perf_path)
@@ -913,8 +923,9 @@ TEST_F(WrapperTest, FlightData) {
 
     // RAII, destruct python context!
     auto opts = testOptions();
-    opts = opts.substr(0, opts.length() - 1) + ",\"tuplex.useLLVMOptimizer\" : \"False\", \"tuplex.executorCount\":0}";
-    PythonContext c("python", "", opts);
+    opts.set("tuplex.useLLVMOptimizer", "false");
+    opts.set("tuplex.executorCount", "0");
+    PythonContext c("python", "", opts.asJSON());
     // execute mini part of pipeline and output csv to file
     // pipeline is
     // df = ctx.csv(perf_path)
@@ -1122,7 +1133,7 @@ TEST_F(WrapperTest, Airport) {
 
     // RAII, destruct python context!
     PythonContext c("python", "",
-                    testOptions());
+                    testOptions().asJSON());
 
     // execute mini part of pipeline and output csv to file
     // pipeline is
@@ -1166,7 +1177,7 @@ TEST_F(WrapperTest, Airport) {
 TEST_F(WrapperTest, OptionParallelizeI) {
     using namespace tuplex;
 
-    PythonContext c("c", "", testOptions());
+    PythonContext c("c", "", microTestOptions().asJSON());
 
     PyObject * listObj = PyList_New(5);
     PyList_SET_ITEM(listObj, 0, PyLong_FromLong(112));
@@ -1196,7 +1207,7 @@ TEST_F(WrapperTest, OptionParallelizeI) {
 TEST_F(WrapperTest, OptionParallelizeII) {
     using namespace tuplex;
 
-    PythonContext c("c", "", testOptions());
+    PythonContext c("c", "", microTestOptions().asJSON());
 
     PyObject * listObj = PyList_New(5);
 
@@ -1239,7 +1250,7 @@ TEST_F(WrapperTest, OptionParallelizeII) {
 TEST_F(WrapperTest, NoneParallelize) {
     using namespace tuplex;
 
-    PythonContext c("c", "", testOptions());
+    PythonContext c("c", "", microTestOptions().asJSON());
 
     PyObject * listObj = PyList_New(2);
     PyList_SET_ITEM(listObj, 0, Py_None);
@@ -1263,7 +1274,7 @@ TEST_F(WrapperTest, NoneParallelize) {
 TEST_F(WrapperTest, EmptyMapI) {
     using namespace tuplex;
 
-    PythonContext c("c", "", testOptions());
+    PythonContext c("c", "", microTestOptions().asJSON());
 
     PyObject * listObj = PyList_New(4);
     PyList_SET_ITEM(listObj, 0, PyLong_FromLong(1));
@@ -1291,7 +1302,7 @@ TEST_F(WrapperTest, EmptyMapI) {
 TEST_F(WrapperTest, EmptyMapII) {
     using namespace tuplex;
 
-    PythonContext c("c", "", testOptions());
+    PythonContext c("c", "", microTestOptions().asJSON());
 
     PyObject * listObj = PyList_New(4);
     PyList_SET_ITEM(listObj, 0, PyLong_FromLong(1));
@@ -1323,7 +1334,7 @@ TEST_F(WrapperTest, EmptyMapII) {
 TEST_F(WrapperTest, EmptyMapIII) {
     using namespace tuplex;
 
-    PythonContext c("c", "", testOptions());
+    PythonContext c("c", "", microTestOptions().asJSON());
 
     PyObject * listObj = PyList_New(4);
     PyList_SET_ITEM(listObj, 0, PyLong_FromLong(1));
@@ -1355,7 +1366,7 @@ TEST_F(WrapperTest, EmptyMapIII) {
 TEST_F(WrapperTest, EmptyOptionMapI) {
     using namespace tuplex;
 
-    PythonContext c("c", "", testOptions());
+    PythonContext c("c", "", microTestOptions().asJSON());
 
     PyObject * listObj = PyList_New(4);
     PyList_SET_ITEM(listObj, 0, PyLong_FromLong(1));
@@ -1385,7 +1396,7 @@ TEST_F(WrapperTest, EmptyOptionMapI) {
 TEST_F(WrapperTest, EmptyOptionMapII) {
     using namespace tuplex;
 
-    PythonContext c("c", "", testOptions());
+    PythonContext c("c", "", microTestOptions().asJSON());
 
     PyObject * listObj = PyList_New(4);
     PyList_SET_ITEM(listObj, 0, PyLong_FromLong(1));
@@ -1415,7 +1426,7 @@ TEST_F(WrapperTest, EmptyOptionMapII) {
 TEST_F(WrapperTest, OptionTupleParallelizeI) {
     using namespace tuplex;
 
-    PythonContext c("c", "", testOptions());
+    PythonContext c("c", "", microTestOptions().asJSON());
 
     PyObject * listObj = PyList_New(3);
 
@@ -1464,7 +1475,7 @@ TEST_F(WrapperTest, OptionTupleParallelizeI) {
 TEST_F(WrapperTest, OptionTupleParallelizeII) {
     using namespace tuplex;
 
-    PythonContext c("c", "", testOptions());
+    PythonContext c("c", "", microTestOptions().asJSON());
 
     PyObject * listObj = PyList_New(3);
 
@@ -1513,7 +1524,7 @@ TEST_F(WrapperTest, OptionTupleParallelizeII) {
 TEST_F(WrapperTest, OptionTupleParallelizeIII) {
     using namespace tuplex;
 
-    PythonContext c("c", "", testOptions());
+    PythonContext c("c", "", microTestOptions().asJSON());
 
     PyObject * listObj = PyList_New(3);
 
@@ -1562,7 +1573,7 @@ TEST_F(WrapperTest, OptionTupleParallelizeIII) {
 TEST_F(WrapperTest, parallelizeOptionTypeI) {
     using namespace tuplex;
 
-    PythonContext c("c", "", testOptions());
+    PythonContext c("c", "", microTestOptions().asJSON());
 
     PyObject * listObj = python::runAndGet(
             "test_input = [(1.0, '2', 3, '4', 5, 6, True, 8, 9, None), (None, '2', 3, None, 5, 6, True, 8, 9, None)"
@@ -1589,7 +1600,7 @@ TEST_F(WrapperTest, parallelizeOptionTypeI) {
 TEST_F(WrapperTest, parallelizeNestedSlice) {
     using namespace tuplex;
 
-    PythonContext c("c", "", testOptions());
+    PythonContext c("c", "", microTestOptions().asJSON());
 
     PyObject * listObj = python::runAndGet(
             "test_input = [((), (\"hello\",), 123, \"oh no\", (1, 2)), ((), (\"goodbye\",), 123, \"yes\", (-10, 2)),\n"
@@ -1621,7 +1632,7 @@ TEST_F(WrapperTest, TPCHQ6) {
                                            "                    'l_discount', 'l_tax', 'l_returnflag', 'l_linestatus',\n"
                                            "                    'l_shipdate', 'l_commitdate', 'l_receiptdate',\n"
                                            "                    'l_shipinstruct', 'l_shipmode', 'l_comment']", "listitem_columns");
-    PythonContext c("c", "", testOptions());
+    PythonContext c("c", "", testOptions().asJSON());
 
     {
 
@@ -1643,7 +1654,7 @@ TEST_F(WrapperTest, TupleParallelizeI) {
 
     PyObject* listObj = python::runAndGet("L = [('hello', 'world', 'hi', 1, 2, 3), ('foo', 'bar', 'baz', 4, 5, 6), ('blank', '', 'not', 7, 8, 9)]", "L");
 
-    PythonContext c("c", "", testOptions());
+    PythonContext c("c", "", microTestOptions().asJSON());
     {
         auto list = py::reinterpret_borrow<py::list>(listObj);
         c.parallelize(list).map("lambda x: ({x[0]: x[3], x[1]: x[4], x[2]: x[5]},)", "").show();
@@ -1655,7 +1666,7 @@ TEST_F(WrapperTest, TupleParallelizeII) {
 
     PyObject* listObj = python::runAndGet("L = [({}, {}, {}), ({}, {}, {}), ({}, {}, {})]", "L");
 
-    PythonContext c("c", "", testOptions());
+    PythonContext c("c", "", microTestOptions().asJSON());
     {
         auto list = py::reinterpret_borrow<py::list>(listObj);
         c.parallelize(list).map("lambda x, y, z: [x, y, z]", "").show();
@@ -1672,7 +1683,7 @@ TEST_F(WrapperTest, DictParallelizeRefTest) {
     PyObject* strings = python::runAndGet("strings = [('hello', 'world', 'hi'), ('foo', 'bar', 'baz'), ('blank', '', 'not')]\n", "strings");
     PyObject* floats = python::runAndGet("floats = [(1.2, 3.4, -100.2), (5.6, 7.8, -1.234), (9.0, 0.1, 2.3)]\n", "floats");
     ASSERT_TRUE(floats->ob_refcnt > 0);
-    PythonContext c("c", "", testOptions());
+    PythonContext c("c", "", microTestOptions().asJSON());
 
     {
 
@@ -1715,7 +1726,7 @@ TEST_F(WrapperTest, DictParallelizeRefTest) {
 TEST_F(WrapperTest, BuiltinModule) {
     using namespace tuplex;
     using namespace std;
-    PythonContext c("c", "", testOptions());
+    PythonContext c("c", "", microTestOptions().asJSON());
 
     {
         PyObject* L = PyList_New(3);
@@ -1747,7 +1758,7 @@ TEST_F(WrapperTest, SwapIII) {
                 "    return a, b\n"
                 "\n";
 
-    PythonContext c("c", "", testOptions());
+    PythonContext c("c", "", microTestOptions().asJSON());
     {
         PyObject* L = PyList_New(2);
         auto tuple1 = PyTuple_New(2);
@@ -2069,6 +2080,181 @@ namespace tuplex {
     }
 }
 
+void executeZillow(tuplex::PythonContext &context, const tuplex::URI& outputURI, int step) {
+    using namespace tuplex;
+    using namespace std;
+
+    auto extractBd = "def extractBd(x):\n"
+                     "    val = x['facts and features']\n"
+                     "    max_idx = val.find(' bd')\n"
+                     "    if max_idx < 0:\n"
+                     "        max_idx = len(val)\n"
+                     "    s = val[:max_idx]\n"
+                     "\n"
+                     "    # find comma before\n"
+                     "    split_idx = s.rfind(',')\n"
+                     "    if split_idx < 0:\n"
+                     "        split_idx = 0\n"
+                     "    else:\n"
+                     "        split_idx += 2\n"
+                     "    r = s[split_idx:]\n"
+                     "    return int(r)";
+
+    auto extractType = "def extractType(x):\n"
+                       "    t = x['title'].lower()\n"
+                       "    type = 'unknown'\n"
+                       "    if 'condo' in t or 'apartment' in t:\n"
+                       "        type = 'condo'\n"
+                       "    if 'house' in t:\n"
+                       "        type = 'house'\n"
+                       "    return type";
+
+    auto extractBa = "def extractBa(x):\n"
+                     "    val = x['facts and features']\n"
+                     "    max_idx = val.find(' ba')\n"
+                     "    if max_idx < 0:\n"
+                     "        max_idx = len(val)\n"
+                     "    s = val[:max_idx]\n"
+                     "\n"
+                     "    # find comma before\n"
+                     "    split_idx = s.rfind(',')\n"
+                     "    if split_idx < 0:\n"
+                     "        split_idx = 0\n"
+                     "    else:\n"
+                     "        split_idx += 2\n"
+                     "    r = s[split_idx:]\n"
+                     "    return int(r)";
+
+    auto extractSqft = "def extractSqft(x):\n"
+                       "    val = x['facts and features']\n"
+                       "    max_idx = val.find(' sqft')\n"
+                       "    if max_idx < 0:\n"
+                       "        max_idx = len(val)\n"
+                       "    s = val[:max_idx]\n"
+                       "\n"
+                       "    split_idx = s.rfind('ba ,')\n"
+                       "    if split_idx < 0:\n"
+                       "        split_idx = 0\n"
+                       "    else:\n"
+                       "        split_idx += 5\n"
+                       "    r = s[split_idx:]\n"
+                       "    r = r.replace(',', '')\n"
+                       "    return int(r)";
+
+    auto extractPrice = "def extractPrice(x):\n"
+                        "    price = x['price']\n"
+                        "    p = 0\n"
+                        "    if x['offer'] == 'sold':\n"
+                        "        # price is to be calculated using price/sqft * sqft\n"
+                        "        val = x['facts and features']\n"
+                        "        s = val[val.find('Price/sqft:') + len('Price/sqft:') + 1:]\n"
+                        "        r = s[s.find('$')+1:s.find(', ') - 1]\n"
+                        "        price_per_sqft = int(r)\n"
+                        "        p = price_per_sqft * x['sqft']\n"
+                        "    elif x['offer'] == 'rent':\n"
+                        "        max_idx = price.rfind('/')\n"
+                        "        p = int(price[1:max_idx].replace(',', ''))\n"
+                        "    else:\n"
+                        "        # take price from price column\n"
+                        "        p = int(price[1:].replace(',', ''))\n"
+                        "\n"
+                        "    return p";
+    auto extractOffer = "def extractOffer(x):\n"
+                        "    offer = x['title'].lower()\n"
+                        "\n"
+                        "    if 'sale' in offer:\n"
+                        "        offer = 'sale'\n"
+                        "    elif 'rent' in offer:\n"
+                        "        offer = 'rent'\n"
+                        "    elif 'sold' in offer:\n"
+                        "        offer = 'sold'\n"
+                        "    elif 'foreclos' in offer.lower():\n"
+                        "        offer = 'foreclosed'\n"
+                        "    else:\n"
+                        "        offer = 'unknown'\n"
+                        "\n"
+                        "    return offer";
+
+    auto resolveBd = "def resolveBd(x):\n"
+                     "    if 'Studio' in x['facts and features']:\n"
+                     "        return 1\n"
+                     "    raise ValueError\n";
+
+    // create closure object for resolve_Ba
+    auto ba_closure = PyDict_New();
+    auto math_mod = PyImport_ImportModule("math");
+    auto re_mod = PyImport_ImportModule("re");
+    assert(math_mod); assert(re_mod);
+    PyDict_SetItemString(ba_closure, "math", math_mod);
+    PyDict_SetItemString(ba_closure, "re", re_mod);
+
+    auto cols_to_select = python::runAndGet("L = ['url', 'zipcode', 'address', 'city', 'state',"
+                                            "'bedrooms', 'bathrooms', 'sqft', 'offer', 'type', 'price']", "L");
+
+    {
+        auto ds = context.csv("../resources/zillow_dirty_sample_mini.csv");
+        ds = ds.withColumn("bedrooms", extractBd, "", py::reinterpret_steal<py::dict>(ba_closure));
+        if (step > 0)
+            ds = ds.resolve(ecToI64(ExceptionCode::VALUEERROR), resolveBd, "");
+        if (step > 1)
+            ds = ds.ignore(ecToI64(ExceptionCode::VALUEERROR));
+        ds = ds.filter("lambda x: x['bedrooms'] < 10", "");
+        ds = ds.withColumn("type", extractType, "", py::reinterpret_steal<py::dict>(ba_closure));
+        ds = ds.filter("lambda x: x['type'] == 'condo'", "");
+        ds = ds.withColumn("zipcode", "lambda x: '%05d' % int(x['postal_code'])", "");
+        if (step > 2)
+            ds = ds.ignore(ecToI64(ExceptionCode::TYPEERROR));
+        ds = ds.mapColumn("city", "lambda x: x[0].upper() + x[1:].lower()", "");
+        ds = ds.withColumn("bathrooms", extractBa, "", py::reinterpret_steal<py::dict>(ba_closure));
+        if (step > 3)
+            ds = ds.ignore(ecToI64(ExceptionCode::VALUEERROR));
+        ds = ds.withColumn("sqft", extractSqft, "", py::reinterpret_steal<py::dict>(ba_closure));
+        if (step > 4)
+            ds = ds.ignore(ecToI64(ExceptionCode::VALUEERROR));
+        ds = ds.withColumn("offer", extractOffer, "", py::reinterpret_steal<py::dict>(ba_closure));
+        ds = ds.withColumn("price", extractPrice, "", py::reinterpret_steal<py::dict>(ba_closure));
+        if (step > 5)
+            ds = ds.resolve(ecToI64(ExceptionCode::VALUEERROR), "lambda x: int(re.sub('[^0-9.]*', '', x['price']))", "", py::reinterpret_steal<py::dict>(ba_closure));
+        ds = ds.filter("lambda x: 100000 < x['price'] < 2e7 and x['offer'] == 'sale'", "");
+        ds = ds.selectColumns(py::reinterpret_borrow<py::list>(cols_to_select));
+        ds.tocsv(outputURI.toPath());
+    }
+}
+
+TEST_F(WrapperTest, IncrementalZillow) {
+    using namespace tuplex;
+    using namespace std;
+
+    auto opts = testOptions();
+    opts.set("tuplex.driverMemory", "512MB");
+    opts.set("tuplex.executorCount", "0");
+    opts.set("tuplex.optimizer.incrementalResolution", "true");
+    opts.set("tuplex.optimizer.mergeExceptionsInOrder", "false");
+    opts.set("tuplex.optimizer.nullValueOptimization", "false");
+//    opts.set("tuplex.inputSplitSize", "16MB");
+//    opts.set("tuplex.optimizer.codeStats", "true");
+//    opts.set("tuplex.readBufferSize", "4KB");
+//    opts.set("tuplex.resolveWithInterpreterOnly", "true");
+//    opts.set("tuplex.allowUndefinedBehavior", "false");
+//    opts.set("tuplex.autoUpcast", "false");
+//    opts.set("tuplex.executorMemory", "512MB");
+//    opts.set("tuplex.inputSplitSize", "16MB");
+//    opts.set("tuplex.runTimeMemory", "32MB");
+//    PythonContext incrementalContext("incremental", "", opts.asJSON());
+    opts.set("tuplex.optimizer.incrementalResolution", "false");
+    PythonContext plainContext("plain", "", opts.asJSON());
+
+    for (int step = 0; step < 7; ++step) {
+//        executeZillow(incrementalContext, testName + "/incremental.csv", step);
+        executeZillow(plainContext, testName + "/plain.csv", step);
+
+//        auto incrementalResult = plainContext.csv(testName + "/incremental.*.csv").collect().ptr();
+//        auto plainResult = plainContext.csv(testName + "/plain.*.csv").collect().ptr();
+//        ASSERT_EQ(PyList_Size(incrementalResult), PyList_Size(plainResult));
+    }
+
+}
+
 TEST_F(WrapperTest, ZillowDirty) {
     using namespace tuplex;
     using namespace std;
@@ -2156,7 +2342,7 @@ TEST_F(WrapperTest, BitwiseAnd) {
 
     PyObject* listObj = python::runAndGet("L = [(False, False), (False, True), (True, False), (True, True)]", "L");
 
-    PythonContext c("c", "", testOptions());
+    PythonContext c("c", "", microTestOptions().asJSON());
     {
         auto list = py::reinterpret_borrow<py::list>(listObj);
         auto res_list = c.parallelize(list).map("lambda a, b: a & b", "").collect();
@@ -2172,7 +2358,7 @@ TEST_F(WrapperTest, MetricsTest) {
 
     PyObject* listObj = python::runAndGet("L = [(False, False), (False, True), (True, False), (True, True)]", "L");
 
-    PythonContext c("c", "", testOptions());
+    PythonContext c("c", "", microTestOptions().asJSON());
     {
         auto list = py::reinterpret_borrow<py::list>(listObj);
         auto res_list = c.parallelize(list).map("lambda a, b: a & b", "").collect();
@@ -2364,9 +2550,9 @@ TEST_F(WrapperTest, MixedTypesIsWithNone) {
     using namespace tuplex;
     using namespace std;
 
-    auto opts = testOptions();
-    opts = opts.substr(0, opts.length() - 1) + ",\"tuplex.optimizer.mergeExceptionsInOrder\":\"True\"}";
-    PythonContext c("python", "",  opts);
+    auto opts = microTestOptions();
+    opts.set("tuplex.optimizer.mergeExceptionsInOrder", "true");
+    PythonContext c("python", "",  opts.asJSON());
 
     PyObject *listObj = PyList_New(8);
     PyList_SetItem(listObj, 0, Py_None);