From 1d94337fc591955cc730133670ca813134e287d9 Mon Sep 17 00:00:00 2001 From: BELFADIL BELFADIL Date: Wed, 2 Mar 2022 15:26:25 +0100 Subject: [PATCH 01/12] Fixed hardcoded reward_treshold --- test/offline/test_cql.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/offline/test_cql.py b/test/offline/test_cql.py index be43ea247..ef003a3f8 100644 --- a/test/offline/test_cql.py +++ b/test/offline/test_cql.py @@ -26,6 +26,7 @@ def get_args(): parser = argparse.ArgumentParser() parser.add_argument('--task', type=str, default='Pendulum-v1') + parser.add_argument('--reward_threshold', type=float, default=-1200) parser.add_argument('--seed', type=int, default=0) parser.add_argument('--hidden-sizes', type=int, nargs='*', default=[64, 64]) parser.add_argument('--actor-lr', type=float, default=1e-3) @@ -78,8 +79,7 @@ def test_cql(args=get_args()): args.state_shape = env.observation_space.shape or env.observation_space.n args.action_shape = env.action_space.shape or env.action_space.n args.max_action = env.action_space.high[0] # float - if args.task == 'Pendulum-v1': - env.spec.reward_threshold = -1200 # too low? + env.spec.reward_threshold = args.reward_threshold args.state_dim = args.state_shape[0] args.action_dim = args.action_shape[0] From e726fb946a17fa2a71e705b21c5fcb522f17db91 Mon Sep 17 00:00:00 2001 From: BELFADIL BELFADIL Date: Thu, 9 Jun 2022 18:39:11 +0200 Subject: [PATCH 02/12] MultiDiscreteToDiscrete gym action space wrapper --- tianshou/env/gym_wrappers.py | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/tianshou/env/gym_wrappers.py b/tianshou/env/gym_wrappers.py index f63bc9e2e..77ec2a463 100644 --- a/tianshou/env/gym_wrappers.py +++ b/tianshou/env/gym_wrappers.py @@ -26,3 +26,26 @@ def __init__(self, env: gym.Env, action_per_branch: int) -> None: def action(self, act: np.ndarray) -> np.ndarray: # modify act return np.array([self.mesh[i][a] for i, a in enumerate(act)]) + +class MultiDiscreteToDiscrete(gym.ActionWrapper): + """Gym environment wrapper to discrete action in multidiscrete environment. + + :param gym.Env env: gym environment with continuous action space. + """ + def __init__(self, env: gym.Env) -> None: + super().__init__(env) + assert isinstance(env.action_space, gym.spaces.MultiDiscrete) + self.num_dim = env.action_space.shape[0] + assert len(set(env.action_space.nvec)) == 1 # TODO support for different num of actions per dim + self.action_per_dim = env.action_space.nvec[0] + self.action_space = gym.spaces.Discrete( + self.action_per_dim ** self.num_dim + ) + + def action(self, act: np.ndarray) -> np.ndarray: + # modify act + converted_act = [] + for i in range(self.num_dim): + converted_act.append(act // self.action_per_dim ** (self.num_dim-i)) + act = act % self.action_per_dim ** (self.num_dim-i) + return np.array(converted_act) \ No newline at end of file From 6e86a0bed7d1117c5ad6a421b483b45a6adfe336 Mon Sep 17 00:00:00 2001 From: BELFADIL BELFADIL Date: Thu, 9 Jun 2022 18:55:40 +0200 Subject: [PATCH 03/12] Fix formatting --- tianshou/env/gym_wrappers.py | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/tianshou/env/gym_wrappers.py b/tianshou/env/gym_wrappers.py index 77ec2a463..b2e6d5223 100644 --- a/tianshou/env/gym_wrappers.py +++ b/tianshou/env/gym_wrappers.py @@ -27,25 +27,27 @@ def action(self, act: np.ndarray) -> np.ndarray: # modify act return np.array([self.mesh[i][a] for i, a in enumerate(act)]) + class MultiDiscreteToDiscrete(gym.ActionWrapper): """Gym environment wrapper to discrete action in multidiscrete environment. :param gym.Env env: gym environment with continuous action space. """ + def __init__(self, env: gym.Env) -> None: super().__init__(env) assert isinstance(env.action_space, gym.spaces.MultiDiscrete) self.num_dim = env.action_space.shape[0] - assert len(set(env.action_space.nvec)) == 1 # TODO support for different num of actions per dim + assert len( + set(env.action_space.nvec) + ) == 1 # TODO support for different num of actions per dim self.action_per_dim = env.action_space.nvec[0] - self.action_space = gym.spaces.Discrete( - self.action_per_dim ** self.num_dim - ) + self.action_space = gym.spaces.Discrete(self.action_per_dim**self.num_dim) def action(self, act: np.ndarray) -> np.ndarray: # modify act converted_act = [] for i in range(self.num_dim): - converted_act.append(act // self.action_per_dim ** (self.num_dim-i)) - act = act % self.action_per_dim ** (self.num_dim-i) - return np.array(converted_act) \ No newline at end of file + converted_act.append(act // self.action_per_dim**(self.num_dim - i)) + act = act % self.action_per_dim**(self.num_dim - i) + return np.array(converted_act) From ab3a9522ca89843273d25221d3aac1c7a0585817 Mon Sep 17 00:00:00 2001 From: BELFADIL BELFADIL Date: Sat, 11 Jun 2022 13:34:57 +0200 Subject: [PATCH 04/12] Support for different num of actions per dim --- tianshou/env/__init__.py | 3 ++- tianshou/env/gym_wrappers.py | 28 +++++++++++++--------------- 2 files changed, 15 insertions(+), 16 deletions(-) diff --git a/tianshou/env/__init__.py b/tianshou/env/__init__.py index 8b1c71a46..6abea3280 100644 --- a/tianshou/env/__init__.py +++ b/tianshou/env/__init__.py @@ -1,6 +1,6 @@ """Env package.""" -from tianshou.env.gym_wrappers import ContinuousToDiscrete +from tianshou.env.gym_wrappers import ContinuousToDiscrete, MultiDiscreteToDiscrete from tianshou.env.venv_wrappers import VectorEnvNormObs, VectorEnvWrapper from tianshou.env.venvs import ( BaseVectorEnv, @@ -25,4 +25,5 @@ "VectorEnvNormObs", "PettingZooEnv", "ContinuousToDiscrete", + "MultiDiscreteToDiscrete", ] diff --git a/tianshou/env/gym_wrappers.py b/tianshou/env/gym_wrappers.py index b2e6d5223..e451abc9e 100644 --- a/tianshou/env/gym_wrappers.py +++ b/tianshou/env/gym_wrappers.py @@ -31,23 +31,21 @@ def action(self, act: np.ndarray) -> np.ndarray: class MultiDiscreteToDiscrete(gym.ActionWrapper): """Gym environment wrapper to discrete action in multidiscrete environment. - :param gym.Env env: gym environment with continuous action space. + :param gym.Env env: gym environment with multidiscrete action space. """ def __init__(self, env: gym.Env) -> None: super().__init__(env) - assert isinstance(env.action_space, gym.spaces.MultiDiscrete) - self.num_dim = env.action_space.shape[0] - assert len( - set(env.action_space.nvec) - ) == 1 # TODO support for different num of actions per dim - self.action_per_dim = env.action_space.nvec[0] - self.action_space = gym.spaces.Discrete(self.action_per_dim**self.num_dim) - - def action(self, act: np.ndarray) -> np.ndarray: - # modify act + nvec = env.action_space.nvec + assert nvec.ndim == 1 + self.bases = np.ones_like(nvec) + for i in range(1, len(self.bases)): + self.bases[i] = self.bases[i-1]*nvec[-i] + self.action_space = gym.spaces.Discrete(np.prod(nvec)) + + def action(self, act: int) -> np.ndarray: converted_act = [] - for i in range(self.num_dim): - converted_act.append(act // self.action_per_dim**(self.num_dim - i)) - act = act % self.action_per_dim**(self.num_dim - i) - return np.array(converted_act) + for b in np.flip(self.bases): + converted_act.append(act // b) + act = act % b + return np.array(converted_act).transpose() From 78105f5019231209e7945342002c09d00bdd2ddc Mon Sep 17 00:00:00 2001 From: BELFADIL BELFADIL Date: Sat, 11 Jun 2022 13:42:16 +0200 Subject: [PATCH 05/12] Fix formatting --- tianshou/env/gym_wrappers.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tianshou/env/gym_wrappers.py b/tianshou/env/gym_wrappers.py index e451abc9e..ee2a80cb7 100644 --- a/tianshou/env/gym_wrappers.py +++ b/tianshou/env/gym_wrappers.py @@ -40,7 +40,7 @@ def __init__(self, env: gym.Env) -> None: assert nvec.ndim == 1 self.bases = np.ones_like(nvec) for i in range(1, len(self.bases)): - self.bases[i] = self.bases[i-1]*nvec[-i] + self.bases[i] = self.bases[i - 1] * nvec[-i] self.action_space = gym.spaces.Discrete(np.prod(nvec)) def action(self, act: int) -> np.ndarray: From 57be573692fa0a8a4c02c44ff92fa6b7eff339b7 Mon Sep 17 00:00:00 2001 From: BELFADIL BELFADIL Date: Sat, 11 Jun 2022 13:50:12 +0200 Subject: [PATCH 06/12] Fix type error --- tianshou/env/gym_wrappers.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tianshou/env/gym_wrappers.py b/tianshou/env/gym_wrappers.py index ee2a80cb7..318f3823b 100644 --- a/tianshou/env/gym_wrappers.py +++ b/tianshou/env/gym_wrappers.py @@ -36,6 +36,7 @@ class MultiDiscreteToDiscrete(gym.ActionWrapper): def __init__(self, env: gym.Env) -> None: super().__init__(env) + assert isinstance(env.action_space, gym.spaces.MultiDiscrete) nvec = env.action_space.nvec assert nvec.ndim == 1 self.bases = np.ones_like(nvec) From 2ddf384b81cd2424de03ea51e68a653cd46066cd Mon Sep 17 00:00:00 2001 From: BELFADIL BELFADIL Date: Sun, 12 Jun 2022 17:53:00 +0200 Subject: [PATCH 07/12] Batch of actions support in gym wrappers --- tianshou/env/gym_wrappers.py | 31 ++++++++++++++++++++----------- 1 file changed, 20 insertions(+), 11 deletions(-) diff --git a/tianshou/env/gym_wrappers.py b/tianshou/env/gym_wrappers.py index 318f3823b..caaafadc9 100644 --- a/tianshou/env/gym_wrappers.py +++ b/tianshou/env/gym_wrappers.py @@ -1,3 +1,5 @@ +from typing import List, Union + import gym import numpy as np @@ -6,30 +8,37 @@ class ContinuousToDiscrete(gym.ActionWrapper): """Gym environment wrapper to take discrete action in a continuous environment. :param gym.Env env: gym environment with continuous action space. - :param int action_per_branch: number of discrete actions in each dimension + :param int action_per_dim: number of discrete actions in each dimension of the action space. """ - def __init__(self, env: gym.Env, action_per_branch: int) -> None: + def __init__(self, env: gym.Env, action_per_dim: Union[int, List[int]]) -> None: super().__init__(env) assert isinstance(env.action_space, gym.spaces.Box) low, high = env.action_space.low, env.action_space.high - num_branches = env.action_space.shape[0] - self.action_space = gym.spaces.MultiDiscrete( - [action_per_branch] * num_branches - ) + if isinstance(action_per_dim, int): + action_per_dim = [action_per_dim] * env.action_space.shape[0] + assert len(action_per_dim) == env.action_space.shape[0] + self.action_space = gym.spaces.MultiDiscrete(action_per_dim) mesh = [] - for lo, hi in zip(low, high): - mesh.append(np.linspace(lo, hi, action_per_branch)) + for lo, hi, a in zip(low, high, action_per_dim): + mesh.append(np.linspace(lo, hi, a), dtype=object) self.mesh = np.array(mesh) def action(self, act: np.ndarray) -> np.ndarray: # modify act - return np.array([self.mesh[i][a] for i, a in enumerate(act)]) + if len(act.shape) == 1: + return np.array([self.mesh[i][a] for i, a in enumerate(act)]) + elif len(act.shape) == 2: + return np.array( + [[self.mesh[i][a] for i, a in enumerate(a_)] for a_ in act] + ) + else: + raise Exception class MultiDiscreteToDiscrete(gym.ActionWrapper): - """Gym environment wrapper to discrete action in multidiscrete environment. + """Gym environment wrapper to take discrete action in multidiscrete environment. :param gym.Env env: gym environment with multidiscrete action space. """ @@ -44,7 +53,7 @@ def __init__(self, env: gym.Env) -> None: self.bases[i] = self.bases[i - 1] * nvec[-i] self.action_space = gym.spaces.Discrete(np.prod(nvec)) - def action(self, act: int) -> np.ndarray: + def action(self, act: np.ndarray) -> np.ndarray: converted_act = [] for b in np.flip(self.bases): converted_act.append(act // b) From b30738c8e41bc01f1af9c2af96e4981ff55e7aec Mon Sep 17 00:00:00 2001 From: BELFADIL BELFADIL Date: Sun, 12 Jun 2022 17:53:15 +0200 Subject: [PATCH 08/12] Test gym action wrappers --- test/base/test_env.py | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/test/base/test_env.py b/test/base/test_env.py index 3b3a74c31..f9919609f 100644 --- a/test/base/test_env.py +++ b/test/base/test_env.py @@ -1,13 +1,16 @@ import sys import time +import gym import numpy as np import pytest from gym.spaces.discrete import Discrete from tianshou.data import Batch from tianshou.env import ( + ContinuousToDiscrete, DummyVectorEnv, + MultiDiscreteToDiscrete, RayVectorEnv, ShmemVectorEnv, SubprocVectorEnv, @@ -265,6 +268,34 @@ def test_venv_norm_obs(): run_align_norm_obs(raw, train_env, test_env, action_list) +def test_gym_wrappers(): + bsz = 10 + action_per_branch = [4, 6, 10, 7] + env = gym.make("BipedalWalker-v3") + original_act = env.action_space.high + # convert continous to multidiscrete action space + # with different action number per dimension + env_m = ContinuousToDiscrete(env, action_per_branch) + # check conversion is working properly for one action + assert ((env_m.action(env_m.action_space.nvec - 1)) == original_act).all() + # check conversion is working properly for a batch of actions + assert ( + env_m.action(np.array([env_m.action_space.nvec - 1] * bsz) + ) == np.array([original_act] * bsz) + ).all() + # convert multidiscrete with different action number per + # dimension to discrete action space + env_d = MultiDiscreteToDiscrete(env_m) + # check conversion is working properly for one action + assert (env_d.action(env_d.action_space.n - 1) == env_m.action_space.nvec - + 1).all() + # check conversion is working properly for a batch of actions + assert ( + env_d.action(np.array([env_d.action_space.n - 1] * bsz) + ) == np.array([env_m.action_space.nvec - 1] * bsz) + ).all() + + @pytest.mark.skipif(envpool is None, reason="EnvPool doesn't support this platform") def test_venv_wrapper_envpool(): raw = envpool.make_gym("Ant-v3", num_envs=4) From 0a1dfcbdee6bd82b765086d17d0a157401c2cb1e Mon Sep 17 00:00:00 2001 From: BELFADIL BELFADIL Date: Sun, 12 Jun 2022 18:03:02 +0200 Subject: [PATCH 09/12] Fix typing --- tianshou/env/gym_wrappers.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tianshou/env/gym_wrappers.py b/tianshou/env/gym_wrappers.py index caaafadc9..681697f86 100644 --- a/tianshou/env/gym_wrappers.py +++ b/tianshou/env/gym_wrappers.py @@ -20,10 +20,10 @@ def __init__(self, env: gym.Env, action_per_dim: Union[int, List[int]]) -> None: action_per_dim = [action_per_dim] * env.action_space.shape[0] assert len(action_per_dim) == env.action_space.shape[0] self.action_space = gym.spaces.MultiDiscrete(action_per_dim) - mesh = [] - for lo, hi, a in zip(low, high, action_per_dim): - mesh.append(np.linspace(lo, hi, a), dtype=object) - self.mesh = np.array(mesh) + self.mesh = np.array( + [np.linspace(lo, hi, a) for lo, hi, a in zip(low, high, action_per_dim)], + dtype=object + ) def action(self, act: np.ndarray) -> np.ndarray: # modify act From 7af131cfda71c2a291218194d89d90297c4e52b6 Mon Sep 17 00:00:00 2001 From: BELFADIL BELFADIL Date: Sun, 12 Jun 2022 19:23:44 +0200 Subject: [PATCH 10/12] Use DummyEnv class for testing gym env wrappers --- test/base/test_env.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/test/base/test_env.py b/test/base/test_env.py index f9919609f..4c62d5641 100644 --- a/test/base/test_env.py +++ b/test/base/test_env.py @@ -269,9 +269,17 @@ def test_venv_norm_obs(): def test_gym_wrappers(): + + class DummyEnv(): + + def __init__(self): + self.action_space = gym.spaces.Box( + low=-1.0, high=2.0, shape=(4, ), dtype=np.float32 + ) + bsz = 10 action_per_branch = [4, 6, 10, 7] - env = gym.make("BipedalWalker-v3") + env = DummyEnv() original_act = env.action_space.high # convert continous to multidiscrete action space # with different action number per dimension @@ -317,3 +325,4 @@ def test_venv_wrapper_envpool(): test_vecenv() test_async_env() test_async_check_id() + test_gym_wrappers() From 0cc6d9b5e7b3cb323a23e4dd50f3cfc839015920 Mon Sep 17 00:00:00 2001 From: Jiayi Weng Date: Sun, 12 Jun 2022 17:00:50 -0400 Subject: [PATCH 11/12] polish --- test/base/test_env.py | 27 ++++++++++++++------------- tianshou/env/gym_wrappers.py | 10 ++++------ 2 files changed, 18 insertions(+), 19 deletions(-) diff --git a/test/base/test_env.py b/test/base/test_env.py index 4c62d5641..002799b60 100644 --- a/test/base/test_env.py +++ b/test/base/test_env.py @@ -270,7 +270,7 @@ def test_venv_norm_obs(): def test_gym_wrappers(): - class DummyEnv(): + class DummyEnv(gym.Env): def __init__(self): self.action_space = gym.spaces.Box( @@ -285,23 +285,24 @@ def __init__(self): # with different action number per dimension env_m = ContinuousToDiscrete(env, action_per_branch) # check conversion is working properly for one action - assert ((env_m.action(env_m.action_space.nvec - 1)) == original_act).all() + np.testing.assert_allclose(env_m.action(env_m.action_space.nvec - 1), original_act) # check conversion is working properly for a batch of actions - assert ( - env_m.action(np.array([env_m.action_space.nvec - 1] * bsz) - ) == np.array([original_act] * bsz) - ).all() + np.testing.assert_allclose( + env_m.action(np.array([env_m.action_space.nvec - 1] * bsz)), + np.array([original_act] * bsz) + ) # convert multidiscrete with different action number per # dimension to discrete action space env_d = MultiDiscreteToDiscrete(env_m) # check conversion is working properly for one action - assert (env_d.action(env_d.action_space.n - 1) == env_m.action_space.nvec - - 1).all() + np.testing.assert_allclose( + env_d.action(env_d.action_space.n - 1), env_m.action_space.nvec - 1 + ) # check conversion is working properly for a batch of actions - assert ( - env_d.action(np.array([env_d.action_space.n - 1] * bsz) - ) == np.array([env_m.action_space.nvec - 1] * bsz) - ).all() + np.testing.assert_allclose( + env_d.action(np.array([env_d.action_space.n - 1] * bsz)), + np.array([env_m.action_space.nvec - 1] * bsz) + ) @pytest.mark.skipif(envpool is None, reason="EnvPool doesn't support this platform") @@ -318,7 +319,7 @@ def test_venv_wrapper_envpool(): run_align_norm_obs(raw, train, test, actions) -if __name__ == '__main__': +if __name__ == "__main__": test_venv_norm_obs() test_venv_wrapper_envpool() test_env_obs_dtype() diff --git a/tianshou/env/gym_wrappers.py b/tianshou/env/gym_wrappers.py index 681697f86..3bd985098 100644 --- a/tianshou/env/gym_wrappers.py +++ b/tianshou/env/gym_wrappers.py @@ -27,14 +27,12 @@ def __init__(self, env: gym.Env, action_per_dim: Union[int, List[int]]) -> None: def action(self, act: np.ndarray) -> np.ndarray: # modify act + assert len(act.shape) <= 2, f"Unknown action format with shape {act.shape}." if len(act.shape) == 1: return np.array([self.mesh[i][a] for i, a in enumerate(act)]) - elif len(act.shape) == 2: - return np.array( - [[self.mesh[i][a] for i, a in enumerate(a_)] for a_ in act] - ) - else: - raise Exception + return np.array( + [[self.mesh[i][a] for i, a in enumerate(a_)] for a_ in act] + ) class MultiDiscreteToDiscrete(gym.ActionWrapper): From 7f0cf0e95907385f8076f6fbd72c75ed86fd6922 Mon Sep 17 00:00:00 2001 From: Jiayi Weng Date: Sun, 12 Jun 2022 17:06:53 -0400 Subject: [PATCH 12/12] fix --- tianshou/env/gym_wrappers.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/tianshou/env/gym_wrappers.py b/tianshou/env/gym_wrappers.py index 3bd985098..5b98e77af 100644 --- a/tianshou/env/gym_wrappers.py +++ b/tianshou/env/gym_wrappers.py @@ -30,9 +30,7 @@ def action(self, act: np.ndarray) -> np.ndarray: assert len(act.shape) <= 2, f"Unknown action format with shape {act.shape}." if len(act.shape) == 1: return np.array([self.mesh[i][a] for i, a in enumerate(act)]) - return np.array( - [[self.mesh[i][a] for i, a in enumerate(a_)] for a_ in act] - ) + return np.array([[self.mesh[i][a] for i, a in enumerate(a_)] for a_ in act]) class MultiDiscreteToDiscrete(gym.ActionWrapper):