这是indexloc提供的服务,不要输入任何密码
Skip to content

update atari.py #2

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Mar 26, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 9 additions & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,11 +37,19 @@
'examples', 'examples.*',
'docs', 'docs.*']),
install_requires=[
'gym',
'gym>=0.15.0',
'tqdm',
'numpy',
'cloudpickle',
'tensorboard',
'torch>=1.4.0',
],
extras_require={
'atari': [
'atari_py',
],
'mujoco': [
'mujoco_py',
]
},
)
1 change: 1 addition & 0 deletions test/base/test_buffer.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from tianshou.data import ReplayBuffer

if __name__ == '__main__':
from env import MyTestEnv
else: # pytest
Expand Down
1 change: 1 addition & 0 deletions test/base/test_collector.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@

class MyPolicy(BasePolicy):
"""docstring for MyPolicy"""

def __init__(self):
super().__init__()

Expand Down
2 changes: 1 addition & 1 deletion tianshou/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from tianshou import data, env, utils, policy, trainer,\
from tianshou import data, env, utils, policy, trainer, \
exploration

__version__ = '0.2.0'
Expand Down
2 changes: 1 addition & 1 deletion tianshou/data/batch.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ def append(self, batch):
else:
raise TypeError(
'No support for append with type {} in class Batch.'
.format(type(batch.__dict__[k])))
.format(type(batch.__dict__[k])))

def split(self, size=None, permute=True):
length = min([
Expand Down
2 changes: 1 addition & 1 deletion tianshou/data/buffer.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ def add(self, obs, act, rew, done, obs_next=0, info={}, weight=None):
'''
weight: importance weights, disabled here
'''
assert isinstance(info, dict),\
assert isinstance(info, dict), \
'You should return a dict in the last argument of env.step().'
self._add_to_buffer('obs', obs)
self._add_to_buffer('act', act)
Expand Down
6 changes: 3 additions & 3 deletions tianshou/data/collector.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,8 @@ def __init__(self, policy, env, buffer=None, stat_size=100):
if self._multi_env:
self.env_num = len(env)
if isinstance(self.buffer, list):
assert len(self.buffer) == self.env_num,\
'The number of data buffer does not match the number of '\
assert len(self.buffer) == self.env_num, \
'The number of data buffer does not match the number of ' \
'input env.'
self._multi_buf = True
elif isinstance(self.buffer, ReplayBuffer):
Expand Down Expand Up @@ -87,7 +87,7 @@ def collect(self, n_step=0, n_episode=0, render=0):
if not self._multi_env:
n_episode = np.sum(n_episode)
start_time = time.time()
assert sum([(n_step != 0), (n_episode != 0)]) == 1,\
assert sum([(n_step != 0), (n_episode != 0)]) == 1, \
"One and only one collection number specification permitted!"
cur_step = 0
cur_episode = np.zeros(self.env_num) if self._multi_env else 0
Expand Down
2 changes: 1 addition & 1 deletion tianshou/env/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from tianshou.env.utils import CloudpickleWrapper
from tianshou.env.common import EnvWrapper, FrameStack
from tianshou.env.vecenv import BaseVectorEnv, VectorEnv,\
from tianshou.env.vecenv import BaseVectorEnv, VectorEnv, \
SubprocVectorEnv, RayVectorEnv

__all__ = [
Expand Down
3 changes: 2 additions & 1 deletion tianshou/env/vecenv.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import numpy as np
from abc import ABC, abstractmethod
from multiprocessing import Process, Pipe

try:
import ray
except ImportError:
Expand Down Expand Up @@ -122,7 +123,7 @@ def __init__(self, env_fns):
zip(*[Pipe() for _ in range(self.env_num)])
self.processes = [
Process(target=worker, args=(
parent, child, CloudpickleWrapper(env_fn)), daemon=True)
parent, child, CloudpickleWrapper(env_fn)), daemon=True)
for (parent, child, env_fn) in zip(
self.parent_remote, self.child_remote, env_fns)
]
Expand Down
2 changes: 1 addition & 1 deletion tianshou/exploration/random.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ def __call__(self, size, mu=.1):
if self.x is None or self.x.shape != size:
self.x = 0
self.x = self.x + self.alpha * (mu - self.x) + \
self.beta * np.random.normal(size=size)
self.beta * np.random.normal(size=size)
return self.x

def reset(self):
Expand Down
4 changes: 2 additions & 2 deletions tianshou/policy/a2c.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,8 +40,8 @@ def learn(self, batch, batch_size=None, repeat=1):
vf_loss = F.mse_loss(r[:, None], v)
ent_loss = dist.entropy().mean()
loss = actor_loss \
+ self._w_vf * vf_loss \
- self._w_ent * ent_loss
+ self._w_vf * vf_loss \
- self._w_ent * ent_loss
loss.backward()
if self._grad_norm:
nn.utils.clip_grad_norm_(
Expand Down
2 changes: 2 additions & 0 deletions tianshou/policy/ddpg.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@

from tianshou.data import Batch
from tianshou.policy import BasePolicy


# from tianshou.exploration import OUNoise


Expand Down
2 changes: 1 addition & 1 deletion tianshou/policy/pg.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ def __call__(self, batch, state=None):
def learn(self, batch, batch_size=None, repeat=1):
losses = []
batch.returns = (batch.returns - batch.returns.mean()) \
/ (batch.returns.std() + self._eps)
/ (batch.returns.std() + self._eps)
for _ in range(repeat):
for b in batch.split(batch_size):
self.optim.zero_grad()
Expand Down
6 changes: 3 additions & 3 deletions tianshou/policy/ppo.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ def sync_weight(self):
def learn(self, batch, batch_size=None, repeat=1):
losses, clip_losses, vf_losses, ent_losses = [], [], [], []
batch.returns = (batch.returns - batch.returns.mean()) \
/ (batch.returns.std() + self._eps)
/ (batch.returns.std() + self._eps)
batch.act = torch.tensor(batch.act)
batch.returns = torch.tensor(batch.returns)[:, None]
for _ in range(repeat):
Expand All @@ -82,13 +82,13 @@ def learn(self, batch, batch_size=None, repeat=1):
ent_loss = dist.entropy().mean()
ent_losses.append(ent_loss.detach().cpu().numpy())
loss = clip_loss \
+ self._w_vf * vf_loss - self._w_ent * ent_loss
+ self._w_vf * vf_loss - self._w_ent * ent_loss
losses.append(loss.detach().cpu().numpy())
self.optim.zero_grad()
loss.backward()
nn.utils.clip_grad_norm_(list(
self.actor.parameters()) + list(self.critic.parameters()),
self._max_grad_norm)
self._max_grad_norm)
self.optim.step()
self.sync_weight()
return {
Expand Down