diff --git a/.gitignore b/.gitignore index e9510a1df..fd72be398 100644 --- a/.gitignore +++ b/.gitignore @@ -148,3 +148,4 @@ MUJOCO_LOG.TXT *.pkl *.hdf5 wandb/ +videos/ diff --git a/examples/atari/atari_bcq.py b/examples/atari/atari_bcq.py index 1be441013..ec89243b4 100644 --- a/examples/atari/atari_bcq.py +++ b/examples/atari/atari_bcq.py @@ -11,7 +11,7 @@ from torch.utils.tensorboard import SummaryWriter from tianshou.data import Collector, VectorReplayBuffer -from tianshou.env import SubprocVectorEnv +from tianshou.env import ShmemVectorEnv from tianshou.policy import DiscreteBCQPolicy from tianshou.trainer import offline_trainer from tianshou.utils import TensorboardLogger @@ -77,7 +77,7 @@ def test_discrete_bcq(args=get_args()): print("Observations shape:", args.state_shape) print("Actions shape:", args.action_shape) # make environments - test_envs = SubprocVectorEnv( + test_envs = ShmemVectorEnv( [lambda: make_atari_env_watch(args) for _ in range(args.test_num)] ) # seed diff --git a/examples/atari/atari_c51.py b/examples/atari/atari_c51.py index 291fb7007..dcd1911dc 100644 --- a/examples/atari/atari_c51.py +++ b/examples/atari/atari_c51.py @@ -9,7 +9,7 @@ from torch.utils.tensorboard import SummaryWriter from tianshou.data import Collector, VectorReplayBuffer -from tianshou.env import SubprocVectorEnv +from tianshou.env import ShmemVectorEnv from tianshou.policy import C51Policy from tianshou.trainer import offpolicy_trainer from tianshou.utils import TensorboardLogger @@ -75,10 +75,10 @@ def test_c51(args=get_args()): print("Observations shape:", args.state_shape) print("Actions shape:", args.action_shape) # make environments - train_envs = SubprocVectorEnv( + train_envs = ShmemVectorEnv( [lambda: make_atari_env(args) for _ in range(args.training_num)] ) - test_envs = SubprocVectorEnv( + test_envs = ShmemVectorEnv( [lambda: make_atari_env_watch(args) for _ in range(args.test_num)] ) # seed @@ -141,7 +141,8 @@ def train_fn(epoch, env_step): else: eps = args.eps_train_final policy.set_eps(eps) - logger.write('train/eps', env_step, eps) + if env_step % 1000 == 0: + logger.write("train/env_step", env_step, {"train/eps": eps}) def test_fn(epoch, env_step): policy.set_eps(args.eps_test) diff --git a/examples/atari/atari_cql.py b/examples/atari/atari_cql.py index db4e33a9a..685e006db 100644 --- a/examples/atari/atari_cql.py +++ b/examples/atari/atari_cql.py @@ -11,7 +11,7 @@ from torch.utils.tensorboard import SummaryWriter from tianshou.data import Collector, VectorReplayBuffer -from tianshou.env import SubprocVectorEnv +from tianshou.env import ShmemVectorEnv from tianshou.policy import DiscreteCQLPolicy from tianshou.trainer import offline_trainer from tianshou.utils import TensorboardLogger @@ -76,7 +76,7 @@ def test_discrete_cql(args=get_args()): print("Observations shape:", args.state_shape) print("Actions shape:", args.action_shape) # make environments - test_envs = SubprocVectorEnv( + test_envs = ShmemVectorEnv( [lambda: make_atari_env_watch(args) for _ in range(args.test_num)] ) # seed diff --git a/examples/atari/atari_crr.py b/examples/atari/atari_crr.py index 06cde415b..8905c7e58 100644 --- a/examples/atari/atari_crr.py +++ b/examples/atari/atari_crr.py @@ -11,7 +11,7 @@ from torch.utils.tensorboard import SummaryWriter from tianshou.data import Collector, VectorReplayBuffer -from tianshou.env import SubprocVectorEnv +from tianshou.env import ShmemVectorEnv from tianshou.policy import DiscreteCRRPolicy from tianshou.trainer import offline_trainer from tianshou.utils import TensorboardLogger @@ -77,7 +77,7 @@ def test_discrete_crr(args=get_args()): print("Observations shape:", args.state_shape) print("Actions shape:", args.action_shape) # make environments - test_envs = SubprocVectorEnv( + test_envs = ShmemVectorEnv( [lambda: make_atari_env_watch(args) for _ in range(args.test_num)] ) # seed diff --git a/examples/atari/atari_dqn.py b/examples/atari/atari_dqn.py index c9f74af8c..67a44d002 100644 --- a/examples/atari/atari_dqn.py +++ b/examples/atari/atari_dqn.py @@ -9,7 +9,7 @@ from torch.utils.tensorboard import SummaryWriter from tianshou.data import Collector, VectorReplayBuffer -from tianshou.env import SubprocVectorEnv +from tianshou.env import ShmemVectorEnv from tianshou.policy import DQNPolicy from tianshou.trainer import offpolicy_trainer from tianshou.utils import TensorboardLogger @@ -72,10 +72,10 @@ def test_dqn(args=get_args()): print("Observations shape:", args.state_shape) print("Actions shape:", args.action_shape) # make environments - train_envs = SubprocVectorEnv( + train_envs = ShmemVectorEnv( [lambda: make_atari_env(args) for _ in range(args.training_num)] ) - test_envs = SubprocVectorEnv( + test_envs = ShmemVectorEnv( [lambda: make_atari_env_watch(args) for _ in range(args.test_num)] ) # seed @@ -135,7 +135,8 @@ def train_fn(epoch, env_step): else: eps = args.eps_train_final policy.set_eps(eps) - logger.write('train/eps', env_step, eps) + if env_step % 1000 == 0: + logger.write("train/env_step", env_step, {"train/eps": eps}) def test_fn(epoch, env_step): policy.set_eps(args.eps_test) diff --git a/examples/atari/atari_fqf.py b/examples/atari/atari_fqf.py index 4629bede2..99f8957c4 100644 --- a/examples/atari/atari_fqf.py +++ b/examples/atari/atari_fqf.py @@ -9,7 +9,7 @@ from torch.utils.tensorboard import SummaryWriter from tianshou.data import Collector, VectorReplayBuffer -from tianshou.env import SubprocVectorEnv +from tianshou.env import ShmemVectorEnv from tianshou.policy import FQFPolicy from tianshou.trainer import offpolicy_trainer from tianshou.utils import TensorboardLogger @@ -78,10 +78,10 @@ def test_fqf(args=get_args()): print("Observations shape:", args.state_shape) print("Actions shape:", args.action_shape) # make environments - train_envs = SubprocVectorEnv( + train_envs = ShmemVectorEnv( [lambda: make_atari_env(args) for _ in range(args.training_num)] ) - test_envs = SubprocVectorEnv( + test_envs = ShmemVectorEnv( [lambda: make_atari_env_watch(args) for _ in range(args.test_num)] ) # seed @@ -158,7 +158,8 @@ def train_fn(epoch, env_step): else: eps = args.eps_train_final policy.set_eps(eps) - logger.write('train/eps', env_step, eps) + if env_step % 1000 == 0: + logger.write("train/env_step", env_step, {"train/eps": eps}) def test_fn(epoch, env_step): policy.set_eps(args.eps_test) diff --git a/examples/atari/atari_iqn.py b/examples/atari/atari_iqn.py index d0e7773d0..532d59482 100644 --- a/examples/atari/atari_iqn.py +++ b/examples/atari/atari_iqn.py @@ -9,7 +9,7 @@ from torch.utils.tensorboard import SummaryWriter from tianshou.data import Collector, VectorReplayBuffer -from tianshou.env import SubprocVectorEnv +from tianshou.env import ShmemVectorEnv from tianshou.policy import IQNPolicy from tianshou.trainer import offpolicy_trainer from tianshou.utils import TensorboardLogger @@ -78,10 +78,10 @@ def test_iqn(args=get_args()): print("Observations shape:", args.state_shape) print("Actions shape:", args.action_shape) # make environments - train_envs = SubprocVectorEnv( + train_envs = ShmemVectorEnv( [lambda: make_atari_env(args) for _ in range(args.training_num)] ) - test_envs = SubprocVectorEnv( + test_envs = ShmemVectorEnv( [lambda: make_atari_env_watch(args) for _ in range(args.test_num)] ) # seed @@ -153,7 +153,8 @@ def train_fn(epoch, env_step): else: eps = args.eps_train_final policy.set_eps(eps) - logger.write('train/eps', env_step, eps) + if env_step % 1000 == 0: + logger.write("train/env_step", env_step, {"train/eps": eps}) def test_fn(epoch, env_step): policy.set_eps(args.eps_test) diff --git a/examples/atari/atari_qrdqn.py b/examples/atari/atari_qrdqn.py index 23a7966eb..af5d78e3f 100644 --- a/examples/atari/atari_qrdqn.py +++ b/examples/atari/atari_qrdqn.py @@ -9,7 +9,7 @@ from torch.utils.tensorboard import SummaryWriter from tianshou.data import Collector, VectorReplayBuffer -from tianshou.env import SubprocVectorEnv +from tianshou.env import ShmemVectorEnv from tianshou.policy import QRDQNPolicy from tianshou.trainer import offpolicy_trainer from tianshou.utils import TensorboardLogger @@ -73,10 +73,10 @@ def test_qrdqn(args=get_args()): print("Observations shape:", args.state_shape) print("Actions shape:", args.action_shape) # make environments - train_envs = SubprocVectorEnv( + train_envs = ShmemVectorEnv( [lambda: make_atari_env(args) for _ in range(args.training_num)] ) - test_envs = SubprocVectorEnv( + test_envs = ShmemVectorEnv( [lambda: make_atari_env_watch(args) for _ in range(args.test_num)] ) # seed @@ -137,7 +137,8 @@ def train_fn(epoch, env_step): else: eps = args.eps_train_final policy.set_eps(eps) - logger.write('train/eps', env_step, eps) + if env_step % 1000 == 0: + logger.write("train/env_step", env_step, {"train/eps": eps}) def test_fn(epoch, env_step): policy.set_eps(args.eps_test) diff --git a/examples/atari/atari_rainbow.py b/examples/atari/atari_rainbow.py index b131cce5f..4e1a78ced 100644 --- a/examples/atari/atari_rainbow.py +++ b/examples/atari/atari_rainbow.py @@ -10,7 +10,7 @@ from torch.utils.tensorboard import SummaryWriter from tianshou.data import Collector, PrioritizedVectorReplayBuffer, VectorReplayBuffer -from tianshou.env import SubprocVectorEnv +from tianshou.env import ShmemVectorEnv from tianshou.policy import RainbowPolicy from tianshou.trainer import offpolicy_trainer from tianshou.utils import TensorboardLogger @@ -85,10 +85,10 @@ def test_rainbow(args=get_args()): print("Observations shape:", args.state_shape) print("Actions shape:", args.action_shape) # make environments - train_envs = SubprocVectorEnv( + train_envs = ShmemVectorEnv( [lambda: make_atari_env(args) for _ in range(args.training_num)] ) - test_envs = SubprocVectorEnv( + test_envs = ShmemVectorEnv( [lambda: make_atari_env_watch(args) for _ in range(args.test_num)] ) # seed @@ -174,7 +174,8 @@ def train_fn(epoch, env_step): else: eps = args.eps_train_final policy.set_eps(eps) - logger.write('train/eps', env_step, eps) + if env_step % 1000 == 0: + logger.write("train/env_step", env_step, {"train/eps": eps}) if not args.no_priority: if env_step <= args.beta_anneal_step: beta = args.beta - env_step / args.beta_anneal_step * \ @@ -182,7 +183,8 @@ def train_fn(epoch, env_step): else: beta = args.beta_final buffer.set_beta(beta) - logger.write('train/beta', env_step, beta) + if env_step % 1000 == 0: + logger.write("train/env_step", env_step, {"train/beta": beta}) def test_fn(epoch, env_step): policy.set_eps(args.eps_test) diff --git a/examples/vizdoom/vizdoom_c51.py b/examples/vizdoom/vizdoom_c51.py index bb3a1f207..53eafae20 100644 --- a/examples/vizdoom/vizdoom_c51.py +++ b/examples/vizdoom/vizdoom_c51.py @@ -9,7 +9,7 @@ from torch.utils.tensorboard import SummaryWriter from tianshou.data import Collector, VectorReplayBuffer -from tianshou.env import SubprocVectorEnv +from tianshou.env import ShmemVectorEnv from tianshou.policy import C51Policy from tianshou.trainer import offpolicy_trainer from tianshou.utils import TensorboardLogger @@ -72,13 +72,13 @@ def test_c51(args=get_args()): print("Observations shape:", args.state_shape) print("Actions shape:", args.action_shape) # make environments - train_envs = SubprocVectorEnv( + train_envs = ShmemVectorEnv( [ lambda: Env(args.cfg_path, args.frames_stack, args.res) for _ in range(args.training_num) ] ) - test_envs = SubprocVectorEnv( + test_envs = ShmemVectorEnv( [ lambda: Env(args.cfg_path, args.frames_stack, args.res, args.save_lmp) for _ in range(min(os.cpu_count() - 1, args.test_num)) @@ -144,7 +144,8 @@ def train_fn(epoch, env_step): else: eps = args.eps_train_final policy.set_eps(eps) - logger.write('train/eps', env_step, eps) + if env_step % 1000 == 0: + logger.write("train/env_step", env_step, {"train/eps": eps}) def test_fn(epoch, env_step): policy.set_eps(args.eps_test)