diff --git a/alpacka/envs/__init__.py b/alpacka/envs/__init__.py
index 55de8d6ee994377ffe0c19f696150966507eacd5..e4cbe274a5dcf5447bd46c348c815fe6ce74261d 100644
--- a/alpacka/envs/__init__.py
+++ b/alpacka/envs/__init__.py
@@ -5,6 +5,7 @@ import gin
 from alpacka.envs import cartpole
 from alpacka.envs import gfootball
 from alpacka.envs import sokoban
+from alpacka.envs import rubik
 from alpacka.envs.base import *
 from alpacka.envs.wrappers import *
 
@@ -21,3 +22,4 @@ ActionNoiseSokoban = configure_env(sokoban.ActionNoiseSokoban) # pylint: disable
 CartPole = configure_env(cartpole.CartPole)  # pylint: disable=invalid-name
 GoogleFootball = configure_env(gfootball.GoogleFootball)  # pylint: disable=invalid-name
 Sokoban = configure_env(sokoban.Sokoban)  # pylint: disable=invalid-name
+Rubik = configure_env(rubik.Rubik)  # pylint: disable=invalid-name
diff --git a/alpacka/envs/rubik.py b/alpacka/envs/rubik.py
new file mode 100644
index 0000000000000000000000000000000000000000..4d8899e2ef43c30822ceb82e81bda2af2a855342
--- /dev/null
+++ b/alpacka/envs/rubik.py
@@ -0,0 +1,750 @@
+"""Rubik's Cube env."""
+
+import copy
+from enum import Enum
+import numpy as np
+import matplotlib.pyplot as plt
+from gym import spaces
+from matplotlib.patches import Rectangle
+from matplotlib.patches import Polygon
+
+from alpacka.envs import base
+
+
+class Actions(Enum):
+    """Actions' paremeters"""
+    U = {'name': 'U', 'f': 'U', 'd': 1, 'opposite': 'U_1'}
+    U_1 = {'name': 'U\'', 'f': 'U', 'd': -1, 'opposite': 'U'}
+    D = {'name': 'D', 'f': 'D', 'd': 1, 'opposite': 'D_1'}
+    D_1 = {'name': 'D\'', 'f': 'D', 'd': -1, 'opposite': 'D'}
+    F = {'name': 'F', 'f': 'F', 'd': 1, 'opposite': 'F_1'}
+    F_1 = {'name': 'F\'', 'f': 'F', 'd': -1, 'opposite': 'F'}
+    B = {'name': 'B', 'f': 'B', 'd': 1, 'opposite': 'B_1'}
+    B_1 = {'name': 'B\'', 'f': 'B', 'd': -1, 'opposite': 'B'}
+    R = {'name': 'R', 'f': 'R', 'd': 1, 'opposite': 'R_1'}
+    R_1 = {'name': 'R\'', 'f': 'R', 'd': -1, 'opposite': 'R'}
+    L = {'name': 'L', 'f': 'L', 'd': 1, 'opposite': 'L_1'}
+    L_1 = {'name': 'L\'', 'f': 'L', 'd': -1, 'opposite': 'L'}
+
+
+class Cube():
+    """
+    Cube
+    ----
+    Initialize with arguments:
+    - `n`, the side length (the cube is `n`x`n`x`n`)
+    - optional `whiteplastic=True` if you like white cubes
+    """
+
+    action_names = [a.name for a in Actions]
+
+    facedict = {'U': 0, 'D': 1, 'F': 2, 'B': 3, 'R': 4, 'L': 5}
+    dictface = {v: k for k, v in facedict.items()}
+    normals = [np.array([0., 1., 0.]), np.array([0., -1., 0.]),
+               np.array([0., 0., 1.]), np.array([0., 0., -1.]),
+               np.array([1., 0., 0.]), np.array([-1., 0., 0.])]
+    # this xdirs has to be synchronized with the self.move() function
+    xdirs = [np.array([1., 0., 0.]), np.array([1., 0., 0.]),
+             np.array([1., 0., 0.]), np.array([-1., 0., 0.]),
+             np.array([0., 0., -1.]), np.array([0, 0., 1.])]
+    colordict = {'w': 0, 'y': 1, 'b': 2, 'g': 3, 'o': 4, 'r': 5}
+    pltpos = [(0., 1.05), (0., -1.05), (0., 0.), (2.10, 0.), (1.05, 0.),
+              (-1.05, 0.)]
+    labelcolor = '#7f00ff'
+
+    def __init__(self, n, whiteplastic=False):
+        """
+        (see above)
+        """
+        self.n = n
+        self.stickers = np.array(
+            [np.tile(i, (self.n, self.n)) for i in range(6)])
+        self.stickercolors = ['w', '#ffcf00', '#00008f', '#009f0f', '#ff6f00',
+                              '#cf0000']
+        # sticker thickness in units of total cube size
+        self.stickerthickness = 0.001
+        # sticker size relative to cubie size (must be < 1)
+        self.stickerwidth = 0.9
+        if whiteplastic:
+            self.plasticcolor = '#dfdfdf'
+        else:
+            self.plasticcolor = '#1f1f1f'
+        self.fontsize = 12. * (self.n / 5.)
+        self.solved_score = self.score()
+
+    def turn(self, f, d):
+        """
+        Turn whole cube (without making a layer move) around face `f`
+        `d` 90-degree turns in the clockwise direction.  Use `d=3` or
+        `d=-1` for counter-clockwise.
+        """
+        for l in range(self.n):
+            self.move(f, l, d)
+
+    def move(self, f, l, d):
+        """
+        Make a layer move of layer `l` parallel to face `f` through
+        `d` 90-degree turns in the clockwise direction.  Layer `0` is
+        the face itself, and higher `l` values are for layers deeper
+        into the cube.  Use `d=3` or `d=-1` for counter-clockwise
+        moves, and `d=2` for a 180-degree move..
+        """
+        i = self.facedict[f]
+        l2 = self.n - 1 - l
+        assert l < self.n
+        ds = range((d + 4) % 4)
+        if f == 'U':
+            f2 = 'D'
+            i2 = self.facedict[f2]
+            for _ in ds:
+                self._rotate([(self.facedict['F'], range(self.n), l2),
+                              (self.facedict['R'], range(self.n), l2),
+                              (self.facedict['B'], range(self.n), l2),
+                              (self.facedict['L'], range(self.n), l2)])
+        if f == 'D':
+            return self.move('U', l2, -d)
+        if f == 'F':
+            f2 = 'B'
+            i2 = self.facedict[f2]
+            for _ in ds:
+                self._rotate([(self.facedict['U'], range(self.n), l),
+                              (self.facedict['L'], l2, range(self.n)),
+                              (self.facedict['D'], range(self.n)[::-1], l2),
+                              (self.facedict['R'], l, range(self.n)[::-1])])
+        if f == 'B':
+            return self.move('F', l2, -d)
+        if f == 'R':
+            f2 = 'L'
+            i2 = self.facedict[f2]
+            for _ in ds:
+                self._rotate([(self.facedict['U'], l2, range(self.n)),
+                              (self.facedict['F'], l2, range(self.n)),
+                              (self.facedict['D'], l2, range(self.n)),
+                              (self.facedict['B'], l, range(self.n)[::-1])])
+        if f == 'L':
+            return self.move('R', l2, -d)
+        for _ in ds:
+            if l == 0:
+                self.stickers[i] = np.rot90(self.stickers[i], 3)
+            if l == self.n - 1:
+                self.stickers[i2] = np.rot90(self.stickers[i2], 1)
+        # print('moved', f, l, len(ds))
+        return None
+
+    def _rotate(self, args):
+        """
+        Internal function for the `move()` function.
+        """
+        a0 = args[0]
+        foo = self.stickers[a0]
+        a = a0
+        for b in args[1:]:
+            self.stickers[a] = self.stickers[b]
+            a = b
+        self.stickers[a] = foo
+
+    def randomize(self, number):
+        """
+        Make `number` randomly chosen moves to scramble the cube.
+        """
+        for _ in range(number):
+            f = self.dictface[np.random.randint(6)]
+            l = np.random.randint(self.n)
+            d = 1 + np.random.randint(3)
+            self.move(f, l, d)
+
+    def _render_points(self, points, viewpoint):
+        """
+        Internal function for the `render()` function.  Clunky
+        projection from 3-d to 2-d, but also return a zorder variable.
+        """
+        v2 = np.dot(viewpoint, viewpoint)
+        zdir = viewpoint / np.sqrt(v2)
+        xdir = np.cross(np.array([0., 1., 0.]), zdir)
+        xdir /= np.sqrt(np.dot(xdir, xdir))
+        ydir = np.cross(zdir, xdir)
+        result = []
+        for p in points:
+            dpoint = p - viewpoint
+            dproj = 0.5 * dpoint * v2 / np.dot(dpoint, -1. * viewpoint)
+            result += [np.array([np.dot(xdir, dproj),
+                                 np.dot(ydir, dproj),
+                                 np.dot(zdir, dpoint / np.sqrt(v2))])]
+        return result
+
+    def render_views(self, ax):
+        """
+        Make three projected 3-dimensional views of the cube for the
+        `render()` function.  Because of zorder / occulting issues,
+        this code is very brittle; it will not work for all viewpoints
+        (the `np.dot(zdir, viewpoint)` test is not general; the corect
+        test involves the 'handedness' of the projected polygon).
+        """
+        csz = 2. / self.n
+        x2 = 8.
+        x1 = 0.5 * x2
+        for viewpoint, shift in [
+            (np.array([-x1, -x1, x2]), np.array([-1.5, 3.])),
+            (np.array([x1, x1, x2]), np.array([0.5, 3.])),
+            (np.array([x2, x1, -x1]), np.array([2.5, 3.]))]:
+            for f, i in self.facedict.items():
+                zdir = self.normals[i]
+                if np.dot(zdir, viewpoint) < 0:
+                    continue
+                xdir = self.xdirs[i]
+                ydir = np.cross(zdir, xdir)  # insanity: left-handed!
+                psc = 1. - 2. * self.stickerthickness
+                corners = [psc * zdir - psc * xdir - psc * ydir,
+                           psc * zdir + psc * xdir - psc * ydir,
+                           psc * zdir + psc * xdir + psc * ydir,
+                           psc * zdir - psc * xdir + psc * ydir]
+                projects = self._render_points(corners, viewpoint)
+                xys = [p[0:2] + shift for p in projects]
+                zorder = np.mean([p[2] for p in projects])
+                ax.add_artist(Polygon(xys, ec='none', fc=self.plasticcolor))
+                for j in range(self.n):
+                    for k in range(self.n):
+                        corners = self._stickerpolygon(xdir, ydir, zdir, csz, j,
+                                                       k)
+                        projects = self._render_points(corners, viewpoint)
+                        xys = [p[0:2] + shift for p in projects]
+                        ax.add_artist(Polygon(xys, ec='none',
+                                              fc=self.stickercolors[
+                                                  self.stickers[i, j, k]]))
+                x0, y0, zorder = \
+                    self._render_points([1.5 * self.normals[i], ], viewpoint)[0]
+                ax.text(x0 + shift[0], y0 + shift[1], f, color=self.labelcolor,
+                        ha='center', va='center', rotation=20,
+                        fontsize=self.fontsize / (-zorder))
+
+    def _stickerpolygon(self, xdir, ydir, zdir, csz, j, k):
+        small = 0.5 * (1. - self.stickerwidth)
+        large = 1. - small
+        return [zdir - xdir + (j + small) * csz * xdir - ydir + (
+                k + small + small) * csz * ydir,
+                zdir - xdir + (j + small + small) * csz * xdir - ydir + (
+                        k + small) * csz * ydir,
+                zdir - xdir + (j + large - small) * csz * xdir - ydir + (
+                        k + small) * csz * ydir,
+                zdir - xdir + (j + large) * csz * xdir - ydir + (
+                        k + small + small) * csz * ydir,
+                zdir - xdir + (j + large) * csz * xdir - ydir + (
+                        k + large - small) * csz * ydir,
+                zdir - xdir + (j + large - small) * csz * xdir - ydir + (
+                        k + large) * csz * ydir,
+                zdir - xdir + (j + small + small) * csz * xdir - ydir + (
+                        k + large) * csz * ydir,
+                zdir - xdir + (j + small) * csz * xdir - ydir + (
+                        k + large - small) * csz * ydir]
+
+    def render_flat(self, ax):
+        """
+        Make an unwrapped, flat view of the cube for the `render()`
+        function.  This is a map, not a view really.  It does not
+        properly render the plastic and stickers.
+        """
+        for f, i in self.facedict.items():
+            x0, y0 = self.pltpos[i]
+            cs = 1. / self.n
+            for j in range(self.n):
+                for k in range(self.n):
+                    ax.add_artist(Rectangle((x0 + j * cs, y0 + k * cs), cs, cs,
+                                            ec=self.plasticcolor,
+                                            fc=self.stickercolors[
+                                                self.stickers[i, j, k]]))
+            ax.text(x0 + 0.5, y0 + 0.5, f, color=self.labelcolor,
+                    ha='center', va='center', rotation=20,
+                    fontsize=self.fontsize)
+
+    def render(self, fig, flat=True, views=True):
+        """
+        Visualize the cube in a standard layout, including a flat,
+        unwrapped view and three perspective views.
+        """
+        assert flat or views
+        xlim = (-2.4, 3.4)
+        ylim = (-1.2, 4.)
+        if not flat:
+            ylim = (2., 4.)
+        if not views:
+            xlim = (-1.2, 3.2)
+            ylim = (-1.2, 2.2)
+        if not fig:
+            fig = plt.figure(figsize=((xlim[1] - xlim[0]) * self.n / 5.,
+                                      (ylim[1] - ylim[0]) * self.n / 5.))
+        ax = fig.add_axes((0, 0, 1, 1), frameon=False, xticks=[], yticks=[])
+        if views:
+            self.render_views(ax)
+        if flat:
+            self.render_flat(ax)
+        ax.set_xlim(xlim)
+        ax.set_ylim(ylim)
+        return fig
+
+    def score(self):
+        """
+        Calculate cube distance from solution
+        """
+        temp_score = 1
+        for i in range(6):
+            side = self.stickers[i]
+            side_color = side[1][1]
+            side_score = 0
+            for x in range(3):
+                for y in range(3):
+                    if side[x][y] == side_color:
+                        side_score += 1
+            temp_score *= side_score
+        return temp_score
+
+    def move_by_action(self, action):
+        # action = self.actions.get(action_name)
+        f = action.value.get('f')
+        d = action.value.get('d')
+        self.move(f, 0, d)
+
+    def solved(self, score):
+        return score == self.solved_score
+
+    def get_state(self):
+        return self.stickers
+
+    def opposite_actions(self, previous_action_name, action):
+        return previous_action_name == action.value.get('opposite')
+
+
+def checkerboard(cube):
+    """
+    Dumbness.
+    """
+    ls = range(cube.n)[::2]
+    for f in ['U', 'F', 'R']:
+        for l in ls:
+            cube.move(f, l, 2)
+    if cube.n % 2 == 0:
+        for l in ls:
+            cube.move('F', l, 2)
+
+
+class CubeletSet:
+    """
+    Helper structure for Rubik's observations type converter.
+    See CubeConverter() for more info.
+    """
+
+    def __init__(self, colours_list, assign_table, is_even=None):
+        self.count = len(colours_list)
+        self.colours = colours_list
+        self.dim = len(colours_list[0])
+        self.assign_table = assign_table
+        self.is_even = [False] * self.count if is_even is None else is_even
+
+        self.ids = None
+        self.position_table = None
+
+        self.make_ids()
+        self.make_position_table()
+
+    def make_ids(self):
+        self.ids = dict()
+
+        for i in range(self.count):
+            self.ids[self.colours[i]] = i
+
+    def make_position_table(self):
+        self.position_table = [sorted([tuple(place) for place in np.transpose(
+            np.where(self.assign_table == i))]) for i in range(self.count)]
+
+    def encode(self, observation):
+        """
+        Encodes positions of cubelets in the set.
+        Returns as one-hot over possible positions
+        """
+        res = np.zeros((self.count, 24), dtype=np.float)
+
+        for i in range(self.count):
+            position = self.position_table[i]
+            colours = [observation[place] for place in position]
+            colours_sorted = tuple(sorted(colours))
+            cubelet_id = self.ids[colours_sorted]
+            res[cubelet_id, self.dim * i + np.argmin(colours)] = 1.
+
+        return res
+
+    def decode(self, observation, result):
+        """
+        Transforms positions of cubelets to sticker colours.
+        Places proper stickers in the result array.
+        """
+        for i in range(self.count):
+            idx = np.where(observation[i] == 1)[0][0]
+            place = idx // self.dim
+            rotation = idx % self.dim
+
+            colours = self.colours[i]
+            colours_rotated = [0] * self.dim
+            step_direction = -1 if self.is_even[i] ^ self.is_even[place] else 1
+
+            for j in range(self.dim):
+                colours_rotated[(rotation + j * step_direction) % self.dim] = \
+                    colours[j]
+
+            for k, pos in enumerate(self.position_table[place]):
+                result[pos] = colours_rotated[k]
+
+
+class CubeConverter:
+    """
+    Allows transformation between sticker-based and cubelet-based observations
+    for Rubik's Cube environment.
+
+    Sticker-based observation encodes colours of stickers on all the 6*3*3
+    positions.
+
+    Cubelet-based observation is taken from https://arxiv.org/pdf/1805.07470.pdf
+    and encodes positions of 8 corner cubelets and 12 edge cubelets.
+    """
+
+    def __init__(self, debug=False):
+        self.debug = debug
+
+        x = -1
+        self.corners = CubeletSet(
+            colours_list=[(0, 2, 5), (0, 3, 5), (0, 2, 4), (0, 3, 4), (1, 2, 5),
+                          (1, 3, 5), (1, 2, 4), (1, 3, 4)],
+            is_even=[False, True, True, False, True, False, False, True],
+            assign_table=np.array(
+                [[[0, x, 1],
+                  [x, x, x],
+                  [2, x, 3]],
+
+                 [[5, x, 4],
+                  [x, x, x],
+                  [7, x, 6]],
+
+                 [[4, x, 0],
+                  [x, x, x],
+                  [6, x, 2]],
+
+                 [[7, x, 3],
+                  [x, x, x],
+                  [5, x, 1]],
+
+                 [[6, x, 2],
+                  [x, x, x],
+                  [7, x, 3]],
+
+                 [[5, x, 1],
+                  [x, x, x],
+                  [4, x, 0]]]),
+        )
+
+        self.edges = CubeletSet(
+            colours_list=[(0, 5), (0, 2), (0, 3), (0, 4), (2, 5), (3, 5),
+                          (2, 4), (3, 4), (1, 5), (1, 2), (1, 3), (1, 4)],
+            assign_table=np.array(
+                [[[x, 0, x],
+                  [1, x, 2],
+                  [x, 3, x]],
+
+                 [[x, 8, x],
+                  [10, x, 9],
+                  [x, 11, x]],
+
+                 [[x, 4, x],
+                  [9, x, 1],
+                  [x, 6, x]],
+
+                 [[x, 7, x],
+                  [10, x, 2],
+                  [x, 5, x]],
+
+                 [[x, 6, x],
+                  [11, x, 3],
+                  [x, 7, x]],
+
+                 [[x, 5, x],
+                  [8, x, 0],
+                  [x, 4, x]]]),
+        )
+
+    def convert_sticker_to_cubelet(self, basic_observation,
+                                   force_no_debug=False):
+        """
+        Converts sticker-based observation to cubelet-based observation.
+        """
+        result = np.concatenate([self.corners.encode(basic_observation),
+                                 self.edges.encode(basic_observation)], axis=0)
+
+        if self.debug and not force_no_debug:
+            assert (np.array_equal(basic_observation,
+                                   self.convert_cubelet_to_sticker(result,
+                                                        force_no_debug=True)))
+
+        return result
+
+    def convert_cubelet_to_sticker(self, reduced_observation,
+                                   force_no_debug=False):
+        """
+        Converts cubelet-based observation to sticker-based observation.
+        """
+        result = np.zeros((6, 3, 3), dtype=np.float32)
+
+        self.corners.decode(reduced_observation[:self.corners.count, :], result)
+        self.edges.decode(reduced_observation[self.corners.count:, :], result)
+
+        for i in range(6):
+            result[i, 1, 1] = i
+
+        if self.debug and not force_no_debug:
+            assert (np.array_equal(reduced_observation,
+                                   self.convert_sticker_to_cubelet(result,
+                                                        force_no_debug=True)))
+
+        return result
+
+
+class DebugLevel(Enum):
+    WARNING = 0
+    INFO = 1
+    VERBOSE = 2
+
+
+class Rubik(base.ModelEnv):
+    """
+    Rubik's Cube as RL environment
+
+    step_limit:
+        Number of actions until episode termination
+    shuffles:
+        Number of moves taken to initially shuffle the cube
+    obs_type: ['stickers', 'cubelets']
+        State encoding, see CubeConverter for more info
+    """
+    metadata = {'render.modes': ['human']}
+
+    def __init__(self, step_limit=100, shuffles=50, obs_type='stickers'):
+        self.cube = Cube(3, whiteplastic=False)
+        self.action_space = spaces.Discrete(len(ACTION_LOOKUP))
+        self.fig = None
+        self.solved_state = self.cube.get_state()
+
+        self.observation_space = None
+        self.obs_type = obs_type
+        self.converter = None
+        self.create_observation_space()
+
+        self.scramble = []
+
+        self.debug_level = DebugLevel.WARNING
+        self.render_views = True
+        self.render_flat = True
+        self.render_cube = False
+        self.scramble_size = shuffles
+
+        self.num_steps = 0
+        self.step_limit = step_limit
+
+        self.config()
+
+    def config(self, debug_level=DebugLevel.WARNING, render_cube=False,
+               scramble_size=None, render_views=True,
+               render_flat=True, step_limit=None):
+        """
+        Configures the cube with given parameters.
+        """
+        self.debug_level = debug_level
+        self.render_cube = render_cube
+        if scramble_size is not None:
+            self.scramble_size = scramble_size
+        if step_limit is not None:
+            self.step_limit = step_limit
+
+        self.render_views = render_views
+        self.render_flat = render_flat
+
+        if self.render_cube:
+            plt.ion()
+            plt.show()
+
+    def create_observation_space(self):
+        if self.obs_type == 'sticker':
+            self.observation_space = spaces.Box(low=0, high=1,
+                                                shape=(6 * 3 * 3 * 6,),
+                                                dtype=np.float32)
+        else:  # self.obs_type == 'cubelet'
+            self.observation_space = spaces.Box(low=0, high=1, shape=(20 * 24,),
+                                                dtype=np.float32)
+            self.converter = CubeConverter()
+
+    def step(self, action):
+        self._take_action(action)
+        reward = -1
+        self.num_steps += 1
+
+        observation = self._get_state()
+        solved = np.array_equal(self.cube.get_state(), self.solved_state)
+
+        if solved:
+            reward = 0
+
+        episode_over = solved or (self.num_steps == self.step_limit)
+
+        return observation.flatten(), reward, episode_over, {'solved': solved}
+
+    def reset(self):
+        self.cube = Cube(3, whiteplastic=False)
+        self.scramble = []
+        if self.scramble_size > 0:
+            if self.debug_level == DebugLevel.INFO:
+                print('scramble ' + str(self.scramble_size) + ' moves')
+            self.randomize(self.scramble_size)
+
+        self.num_steps = 0
+        return self._get_state().flatten()
+
+    def render(self, mode='human'):
+        if self.render_cube:
+            if self.fig:
+                plt.clf()
+            self.fig = self.cube.render(self.fig, views=self.render_views,
+                                        flat=self.render_flat)
+            plt.pause(0.001)
+
+    def _take_action(self, action):
+        self.cube.move_by_action(ACTION_LOOKUP[action])
+
+    @staticmethod
+    def action_name(action):
+        return ACTION_LOOKUP[action].name
+
+    def get_scramble(self):
+        return self.scramble
+
+    def valid_scramble_action(self, action, previous_actions):
+        """
+        Determines whether given action can be used during scrambling.
+        Action is considered invalid in case it undos the previous one or equals
+        previous two (which would give three equal actions in a row, so
+        essentially a single one).
+        """
+        num_previous_actions = len(previous_actions)
+        if num_previous_actions > 2 \
+                and previous_actions[num_previous_actions - 1] == \
+                previous_actions[num_previous_actions - 2] \
+                and action.name == previous_actions[num_previous_actions - 1]:
+            return False
+        if num_previous_actions > 1 \
+                and self.cube.opposite_actions(
+            previous_actions[num_previous_actions - 1], action):
+            return False
+        return True
+
+    def randomize(self, number):
+        t = 0
+        while t < number:
+            action = ACTION_LOOKUP[np.random.randint(len(ACTION_LOOKUP.keys()))]
+            if self.valid_scramble_action(action, self.scramble):
+                self.scramble.append(action.name)
+                self.cube.move_by_action(action)
+                t += 1
+
+    def _get_state(self):
+        raw_state = self.cube.get_state()
+        if self.obs_type == 'sticker':
+            state = (np.arange(6) == raw_state[..., np.newaxis]).astype(int)
+        else:
+            state = self.converter.convert_sticker_to_cubelet(raw_state)
+        return state
+
+    def clone_state(self):
+        return (
+            copy.deepcopy(self.cube),
+            self.fig,
+            self.debug_level,
+            self.render_views,
+            self.render_flat,
+            self.render_cube,
+            self.scramble_size,
+            self.num_steps,
+            self.step_limit,
+        )
+
+    def restore_state(self, state):
+        (
+            cube,
+            self.fig,
+            self.debug_level,
+            self.render_views,
+            self.render_flat,
+            self.render_cube,
+            self.scramble_size,
+            self.num_steps,
+            self.step_limit,
+        ) = state
+        self.cube = copy.deepcopy(cube)
+
+        return self._get_state()
+
+
+ACTION_LOOKUP = {
+    0: Actions.U,
+    1: Actions.U_1,
+    2: Actions.D,
+    3: Actions.D_1,
+    4: Actions.F,
+    5: Actions.F_1,
+    6: Actions.B,
+    7: Actions.B_1,
+    8: Actions.R,
+    9: Actions.R_1,
+    10: Actions.L,
+    11: Actions.L_1
+}
+
+
+class GoalRubik(Rubik):
+    """
+    Goal-oriented interface for Rubik environment.
+    """
+
+    def __init__(self, step_limit=100, shuffles=50, obs_type='sticker'):
+        super(GoalRubik, self).__init__(step_limit, shuffles, obs_type)
+        self.goal_obs = self._get_state()
+
+    def create_observation_space(self):
+        if self.obs_type == 'sticker':
+            self.observation_space = spaces.Box(low=0, high=1,
+                                                shape=(6 * 3 * 3 * 12,),
+                                                dtype=np.float32)
+        else:
+            self.observation_space = spaces.Box(low=0, high=1, shape=(20 * 48,),
+                                                dtype=np.float32)
+            self.converter = CubeConverter()
+
+    def step(self, action):
+        obs, reward, done, info = super(GoalRubik, self).step(action)
+
+        obs = self._get_goal_observation(obs)
+        reward = self._calculate_reward(obs['observation'],
+                                        obs['achieved_goal'],
+                                        obs['desired_goal'])
+
+        return obs, reward, done, info
+
+    def reset(self):
+        obs = super(GoalRubik, self).reset()
+        return self._get_goal_observation(obs)
+
+    def _get_goal_observation(self, obs):
+        return self._convert_observation(obs, obs, self.goal_obs)
+
+    def _convert_observation(self, obs, state, goal):
+        return {'observation': obs, 'achieved_goal': state,
+                'desired_goal': goal}
+
+    def _calculate_reward(self, _, state, goal):
+        return 0 if np.array_equal(state, goal) else -1
+
+    def set_goal(self, goal_obs):
+        self.goal_obs = goal_obs
diff --git a/configs/deterministic_mcts_value_rubik.gin b/configs/deterministic_mcts_value_rubik.gin
new file mode 100644
index 0000000000000000000000000000000000000000..4a138f641fd5447562e75e92d05f10d70691e575
--- /dev/null
+++ b/configs/deterministic_mcts_value_rubik.gin
@@ -0,0 +1,63 @@
+# Parameters for DeterministicMCTSAgent:
+# ==============================================================================
+DeterministicMCTSAgent.avoid_loops = True
+DeterministicMCTSAgent.gamma = 0.99
+DeterministicMCTSAgent.n_passes = 10
+DeterministicMCTSAgent.value_traits_class = @alpacka.agents.deterministic_mcts.ScalarValueTraits
+DeterministicMCTSAgent.value_accumulator_class = @alpacka.agents.deterministic_mcts.ScalarValueAccumulator
+
+# Parameters for KerasNetwork:
+# ==============================================================================
+KerasNetwork.loss = ('mean_squared_error')
+KerasNetwork.metrics = ['mse']
+KerasNetwork.model_fn = @alpacka.networks.keras.mlp
+KerasNetwork.optimizer = 'adam'
+KerasNetwork.weight_decay = 0.0
+KerasNetwork.train_callbacks = None
+
+# Parameters for mlp:
+# ==============================================================================
+mlp.activation = 'relu'
+mlp.hidden_sizes = (256,)
+
+# Parameters for LocalBatchStepper:
+# ==============================================================================
+# None.
+
+# Parameters for Runner:
+# ==============================================================================
+Runner.agent_class = @alpacka.agents.DeterministicMCTSAgent
+Runner.batch_stepper_class = @alpacka.batch_steppers.LocalBatchStepper
+Runner.env_class = @alpacka.envs.Rubik
+Runner.episode_time_limit = 100
+Runner.n_envs = 10
+Runner.n_epochs = 1500
+Runner.n_precollect_epochs = 15
+Runner.network_class = @alpacka.networks.KerasNetwork
+Runner.trainer_class = @alpacka.trainers.SupervisedTrainer
+
+# Parameters for ScalarValueAccumulator:
+# ==============================================================================
+# None.
+
+# Parameters for ScalarValueTraits:
+# ==============================================================================
+# None.
+
+# Parameters for Rubik:
+# ==============================================================================
+Rubik.step_limit = 12
+Rubik.shuffles = 4
+Rubik.obs_type = 'sticker'
+
+# Parameters for SupervisedTrainer:
+# ==============================================================================
+SupervisedTrainer.batch_size = 32
+SupervisedTrainer.n_steps_per_epoch = 13
+SupervisedTrainer.replay_buffer_capacity = 500000
+SupervisedTrainer.replay_buffer_sampling_hierarchy = ['solved']
+SupervisedTrainer.target = @alpacka.trainers.supervised.target_value
+
+# Parameters for target_value:
+# ==============================================================================
+# None.