diff --git a/examples/point_maze_td3.py b/examples/point_maze_td3.py index a31326271..e72351839 100644 --- a/examples/point_maze_td3.py +++ b/examples/point_maze_td3.py @@ -9,13 +9,12 @@ from tianshou.trainer import offpolicy_trainer from tianshou.data import Collector, ReplayBuffer from tianshou.env import VectorEnv, SubprocVectorEnv - from continuous_net import Actor, Critic def get_args(): parser = argparse.ArgumentParser() - parser.add_argument('--task', type=str, default='PointMaze-v0') + parser.add_argument('--task', type=str, default='PointMaze-v1') parser.add_argument('--seed', type=int, default=1626) parser.add_argument('--buffer-size', type=int, default=20000) parser.add_argument('--actor-lr', type=float, default=3e-5) diff --git a/tianshou/env/__init__.py b/tianshou/env/__init__.py index 81f20145b..878c800b5 100644 --- a/tianshou/env/__init__.py +++ b/tianshou/env/__init__.py @@ -2,8 +2,10 @@ from tianshou.env.common import EnvWrapper, FrameStack from tianshou.env.vecenv import BaseVectorEnv, VectorEnv, \ SubprocVectorEnv, RayVectorEnv +from tianshou.env import mujoco __all__ = [ + 'mujoco', 'EnvWrapper', 'FrameStack', 'BaseVectorEnv', diff --git a/tianshou/env/mujoco/point_maze_env.py b/tianshou/env/mujoco/point_maze_env.py index 81ce29dc0..c8e8ef84b 100644 --- a/tianshou/env/mujoco/point_maze_env.py +++ b/tianshou/env/mujoco/point_maze_env.py @@ -233,7 +233,7 @@ def get_top_down_view(self): def valid(row, col): return self._view.shape[0] > row >= 0 \ - and self._view.shape[1] > col >= 0 + and self._view.shape[1] > col >= 0 def update_view(x, y, d, row=None, col=None): if row is None or col is None: @@ -252,36 +252,36 @@ def update_view(x, y, d, row=None, col=None): if valid(row, col): self._view[row, col, d] += ( - (min(1., row_frac + 0.5) - max(0., row_frac - 0.5)) * - (min(1., col_frac + 0.5) - max(0., col_frac - 0.5))) + (min(1., row_frac + 0.5) - max(0., row_frac - 0.5)) * + (min(1., col_frac + 0.5) - max(0., col_frac - 0.5))) if valid(row - 1, col): self._view[row - 1, col, d] += ( - (max(0., 0.5 - row_frac)) * - (min(1., col_frac + 0.5) - max(0., col_frac - 0.5))) + (max(0., 0.5 - row_frac)) * + (min(1., col_frac + 0.5) - max(0., col_frac - 0.5))) if valid(row + 1, col): self._view[row + 1, col, d] += ( - (max(0., row_frac - 0.5)) * - (min(1., col_frac + 0.5) - max(0., col_frac - 0.5))) + (max(0., row_frac - 0.5)) * + (min(1., col_frac + 0.5) - max(0., col_frac - 0.5))) if valid(row, col - 1): self._view[row, col - 1, d] += ( - (min(1., row_frac + 0.5) - max(0., row_frac - 0.5)) * - (max(0., 0.5 - col_frac))) + (min(1., row_frac + 0.5) - max(0., row_frac - 0.5)) * + (max(0., 0.5 - col_frac))) if valid(row, col + 1): self._view[row, col + 1, d] += ( - (min(1., row_frac + 0.5) - max(0., row_frac - 0.5)) * - (max(0., col_frac - 0.5))) + (min(1., row_frac + 0.5) - max(0., row_frac - 0.5)) * + (max(0., col_frac - 0.5))) if valid(row - 1, col - 1): self._view[row - 1, col - 1, d] += ( - (max(0., 0.5 - row_frac)) * max(0., 0.5 - col_frac)) + (max(0., 0.5 - row_frac)) * max(0., 0.5 - col_frac)) if valid(row - 1, col + 1): self._view[row - 1, col + 1, d] += ( - (max(0., 0.5 - row_frac)) * max(0., col_frac - 0.5)) + (max(0., 0.5 - row_frac)) * max(0., col_frac - 0.5)) if valid(row + 1, col + 1): self._view[row + 1, col + 1, d] += ( - (max(0., row_frac - 0.5)) * max(0., col_frac - 0.5)) + (max(0., row_frac - 0.5)) * max(0., col_frac - 0.5)) if valid(row + 1, col - 1): self._view[row + 1, col - 1, d] += ( - (max(0., row_frac - 0.5)) * max(0., 0.5 - col_frac)) + (max(0., row_frac - 0.5)) * max(0., 0.5 - col_frac)) # Draw ant. robot_x, robot_y = self.wrapped_env.get_body_com("torso")[:2] @@ -376,7 +376,8 @@ def get_range_sensor_obs(self): sensor_readings = np.zeros((self._n_bins, 3)) for ray_idx in range(self._n_bins): ray_ori = (ori - self._sensor_span * 0.5 + ( - 2 * ray_idx + 1.0) / (2 * self._n_bins) * self._sensor_span) + 2 * ray_idx + 1.0) / + (2 * self._n_bins) * self._sensor_span) ray_segments = [] # Get all segments that intersect with ray. for seg in segments: @@ -401,8 +402,8 @@ def get_range_sensor_obs(self): 2 if maze_env_utils.can_move(seg_type) else # Block. None) if first_seg["distance"] <= self._sensor_range: - sensor_readings[ray_idx][idx] = ( - self._sensor_range - first_seg[ + sensor_readings[ray_idx][idx] = \ + (self._sensor_range - first_seg[ "distance"]) / self._sensor_range return sensor_readings