这是indexloc提供的服务,不要输入任何密码
Skip to content

buffer update bug fix #154

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Jul 20, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 16 additions & 7 deletions test/base/test_buffer.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import numpy as np
from tianshou.data import Batch, ReplayBuffer, PrioritizedReplayBuffer

from tianshou.data import Batch, PrioritizedReplayBuffer, ReplayBuffer

if __name__ == '__main__':
from env import MyTestEnv
Expand All @@ -10,7 +11,6 @@
def test_replaybuffer(size=10, bufsize=20):
env = MyTestEnv(size)
buf = ReplayBuffer(bufsize)
buf2 = ReplayBuffer(bufsize)
obs = env.reset()
action_list = [1] * 5 + [0] * 10 + [1] * 10
for i, a in enumerate(action_list):
Expand All @@ -22,11 +22,6 @@ def test_replaybuffer(size=10, bufsize=20):
assert (indice < len(buf)).all()
assert (data.obs < size).all()
assert (0 <= data.done).all() and (data.done <= 1).all()
assert len(buf) > len(buf2)
buf2.update(buf)
assert len(buf) == len(buf2)
assert buf2[0].obs == buf[5].obs
assert buf2[-1].obs == buf[4].obs
b = ReplayBuffer(size=10)
b.add(1, 1, 1, 'str', 1, {'a': 3, 'b': {'c': 5.0}})
assert b.obs[0] == 1
Expand Down Expand Up @@ -104,8 +99,22 @@ def test_priortized_replaybuffer(size=32, bufsize=15):
buf.weight[indice], np.abs(-data.weight / 2) ** buf._alpha)


def test_update():
buf1 = ReplayBuffer(4, stack_num=2)
buf2 = ReplayBuffer(4, stack_num=2)
for i in range(5):
buf1.add(obs=np.array([i]), act=float(i), rew=i * i,
done=False, info={'incident': 'found'})
assert len(buf1) > len(buf2)
buf2.update(buf1)
assert len(buf1) == len(buf2)
assert (buf2[0].obs == buf1[1].obs).all()
assert (buf2[-1].obs == buf1[0].obs).all()


if __name__ == '__main__':
test_replaybuffer()
test_ignore_obs_next()
test_stack()
test_priortized_replaybuffer(233333, 200000)
test_update()
11 changes: 10 additions & 1 deletion tianshou/data/buffer.py
Original file line number Diff line number Diff line change
Expand Up @@ -157,16 +157,25 @@ def _add_to_buffer(self, name: str, inst: Any) -> None:
value.__dict__[key] = _create_value(inst[key], self._maxsize)
value[self._index] = inst

def _get_stack_num(self):
return self._stack

def _set_stack_num(self, num):
self._stack = num

def update(self, buffer: 'ReplayBuffer') -> None:
"""Move the data from the given buffer to self."""
if len(buffer) == 0:
return
i = begin = buffer._index % len(buffer)
origin = buffer._get_stack_num()
buffer._set_stack_num(0)
while True:
self.add(**buffer[i])
i = (i + 1) % len(buffer)
if i == begin:
break
buffer._set_stack_num(origin)

def add(self,
obs: Union[dict, Batch, np.ndarray],
Expand Down Expand Up @@ -408,7 +417,7 @@ def sample(self, batch_size: int) -> Tuple[Batch, np.ndarray]:
replace=self._replace)
p = p[indice] # weight of each sample
elif batch_size == 0:
p = np.full(shape=self._size, fill_value=1.0/self._size)
p = np.full(shape=self._size, fill_value=1.0 / self._size)
indice = np.concatenate([
np.arange(self._index, self._size),
np.arange(0, self._index),
Expand Down