diff --git a/setup.py b/setup.py
index f8736fa8a..24220c2bd 100644
--- a/setup.py
+++ b/setup.py
@@ -47,7 +47,7 @@ def get_version() -> str:
     install_requires=[
         "gym>=0.15.4",
         "tqdm",
-        "numpy!=1.16.0,<1.20.0",  # https://github.com/numpy/numpy/issues/12793
+        "numpy>1.16.0",  # https://github.com/numpy/numpy/issues/12793
         "tensorboard",
         "torch>=1.4.0",
         "numba>=0.51.0",
diff --git a/test/base/test_batch.py b/test/base/test_batch.py
index 0898e154c..09b280ee6 100644
--- a/test/base/test_batch.py
+++ b/test/base/test_batch.py
@@ -20,9 +20,9 @@ def test_batch():
     assert len(Batch(a=[1, 2, 3], b={'c': {}})) == 3
     assert not Batch(a=[1, 2, 3]).is_empty()
     b = Batch({'a': [4, 4], 'b': [5, 5]}, c=[None, None])
-    assert b.c.dtype == np.object
+    assert b.c.dtype == object
     b = Batch(d=[None], e=[starmap], f=Batch)
-    assert b.d.dtype == b.e.dtype == np.object and b.f == Batch
+    assert b.d.dtype == b.e.dtype == object and b.f == Batch
     b = Batch()
     b.update()
     assert b.is_empty()
@@ -153,10 +153,10 @@ def test_batch():
         batch3[0] = Batch(a={"c": 2, "e": 1})
     # auto convert
     batch4 = Batch(a=np.array(['a', 'b']))
-    assert batch4.a.dtype == np.object  # auto convert to np.object
+    assert batch4.a.dtype == object  # auto convert to object
     batch4.update(a=np.array(['c', 'd']))
     assert list(batch4.a) == ['c', 'd']
-    assert batch4.a.dtype == np.object  # auto convert to np.object
+    assert batch4.a.dtype == object  # auto convert to object
     batch5 = Batch(a=np.array([{'index': 0}]))
     assert isinstance(batch5.a, Batch)
     assert np.allclose(batch5.a.index, [0])
@@ -405,21 +405,23 @@ def test_utils_to_torch_numpy():
     assert data_list_2_torch.shape == (2, 3, 3)
     assert np.allclose(to_numpy(to_torch(data_list_2)), data_list_2)
     data_list_3 = [np.zeros((3, 2)), np.zeros((3, 3))]
-    data_list_3_torch = to_torch(data_list_3)
-    assert isinstance(data_list_3_torch, list)
-    assert all(isinstance(e, torch.Tensor) for e in data_list_3_torch)
-    assert all(starmap(np.allclose,
-                       zip(to_numpy(to_torch(data_list_3)), data_list_3)))
+    data_list_3_torch = [torch.zeros((3, 2)), torch.zeros((3, 3))]
+    with pytest.raises(TypeError):
+        to_torch(data_list_3)
+    with pytest.raises(TypeError):
+        to_numpy(data_list_3_torch)
     data_list_4 = [np.zeros((2, 3)), np.zeros((3, 3))]
-    data_list_4_torch = to_torch(data_list_4)
-    assert isinstance(data_list_4_torch, list)
-    assert all(isinstance(e, torch.Tensor) for e in data_list_4_torch)
-    assert all(starmap(np.allclose,
-                       zip(to_numpy(to_torch(data_list_4)), data_list_4)))
+    data_list_4_torch = [torch.zeros((2, 3)), torch.zeros((3, 3))]
+    with pytest.raises(TypeError):
+        to_torch(data_list_4)
+    with pytest.raises(TypeError):
+        to_numpy(data_list_4_torch)
     data_list_5 = [np.zeros(2), np.zeros((3, 3))]
-    data_list_5_torch = to_torch(data_list_5)
-    assert isinstance(data_list_5_torch, list)
-    assert all(isinstance(e, torch.Tensor) for e in data_list_5_torch)
+    data_list_5_torch = [torch.zeros(2), torch.zeros((3, 3))]
+    with pytest.raises(TypeError):
+        to_torch(data_list_5)
+    with pytest.raises(TypeError):
+        to_numpy(data_list_5_torch)
     data_array = np.random.rand(3, 2, 2)
     data_empty_tensor = to_torch(data_array[[]])
     assert isinstance(data_empty_tensor, torch.Tensor)
@@ -508,10 +510,10 @@ def test_batch_empty():
     assert np.allclose(b5.b.c, [2, 0])
     assert np.allclose(b5.b.d, [1, 0])
     data = Batch(a=[False, True],
-                 b={'c': np.array([2., 'st'], dtype=np.object),
+                 b={'c': np.array([2., 'st'], dtype=object),
                     'd': [1, None],
                     'e': [2., float('nan')]},
-                 c=np.array([1, 3, 4], dtype=np.int),
+                 c=np.array([1, 3, 4], dtype=int),
                  t=torch.tensor([4, 5, 6, 7.]))
     data[-1] = Batch.empty(data[1])
     assert np.allclose(data.c, [1, 3, 0])
diff --git a/test/base/test_buffer.py b/test/base/test_buffer.py
index 225375d0a..04348cdd4 100644
--- a/test/base/test_buffer.py
+++ b/test/base/test_buffer.py
@@ -33,7 +33,7 @@ def test_replaybuffer(size=10, bufsize=20):
                       done=done, obs_next=obs_next, info=info))
         obs = obs_next
         assert len(buf) == min(bufsize, i + 1)
-    assert buf.act.dtype == np.int
+    assert buf.act.dtype == int
     assert buf.act.shape == (bufsize, 1)
     data, indice = buf.sample(bufsize * 2)
     assert (indice < len(buf)).all()
@@ -50,9 +50,9 @@ def test_replaybuffer(size=10, bufsize=20):
     assert b.obs_next[0] == 'str'
     assert np.all(b.obs[1:] == 0)
     assert np.all(b.obs_next[1:] == np.array(None))
-    assert b.info.a[0] == 3 and b.info.a.dtype == np.integer
+    assert b.info.a[0] == 3 and b.info.a.dtype == int
     assert np.all(b.info.a[1:] == 0)
-    assert b.info.b.c[0] == 5.0 and b.info.b.c.dtype == np.inexact
+    assert b.info.b.c[0] == 5.0 and b.info.b.c.dtype == float
     assert np.all(b.info.b.c[1:] == 0.0)
     assert ptr.shape == (1,) and ptr[0] == 0
     assert ep_rew.shape == (1,) and ep_rew[0] == 1
@@ -180,8 +180,8 @@ def test_priortized_replaybuffer(size=32, bufsize=15):
         assert len(buf2) == min(bufsize, 3 * (i + 1))
     # check single buffer's data
     assert buf.info.key.shape == (buf.maxsize,)
-    assert buf.rew.dtype == np.float
-    assert buf.done.dtype == np.bool_
+    assert buf.rew.dtype == float
+    assert buf.done.dtype == bool
     data, indice = buf.sample(len(buf) // 2)
     buf.update_weight(indice, -data.weight / 2)
     assert np.allclose(buf.weight[indice], np.abs(-data.weight / 2) ** buf._alpha)
@@ -273,7 +273,7 @@ def test_segtree():
         index = tree.get_prefix_sum_idx(scalar)
         assert naive[:index].sum() <= scalar <= naive[:index + 1].sum()
     # corner case here
-    naive = np.ones(actual_len, np.int)
+    naive = np.ones(actual_len, int)
     tree[np.arange(actual_len)] = naive
     for scalar in range(actual_len):
         index = tree.get_prefix_sum_idx(scalar * 1.)
@@ -485,7 +485,7 @@ def test_replaybuffermanager():
     buf.set_batch(batch)
     assert np.allclose(buf.buffers[-1].info, [1] * 5)
     assert buf.sample_index(-1).tolist() == []
-    assert np.array([ReplayBuffer(0, ignore_obs_next=True)]).dtype == np.object
+    assert np.array([ReplayBuffer(0, ignore_obs_next=True)]).dtype == object
 
 
 def test_cachedbuffer():
diff --git a/tianshou/data/batch.py b/tianshou/data/batch.py
index a07ad67ed..ae907f00f 100644
--- a/tianshou/data/batch.py
+++ b/tianshou/data/batch.py
@@ -7,16 +7,18 @@
 from collections.abc import Collection
 from typing import Any, List, Dict, Union, Iterator, Optional, Iterable, Sequence
 
+IndexType = Union[slice, int, np.ndarray, List[int]]
+
 
 def _is_batch_set(data: Any) -> bool:
     # Batch set is a list/tuple of dict/Batch objects,
-    # or 1-D np.ndarray with np.object type,
+    # or 1-D np.ndarray with object type,
     # where each element is a dict/Batch object
     if isinstance(data, np.ndarray):  # most often case
         # "for e in data" will just unpack the first dimension,
         # but data.tolist() will flatten ndarray of objects
         # so do not use data.tolist()
-        return data.dtype == np.object and all(
+        return data.dtype == object and all(
             isinstance(e, (dict, Batch)) for e in data)
     elif isinstance(data, (list, tuple)):
         if len(data) > 0 and all(isinstance(e, (dict, Batch)) for e in data):
@@ -50,13 +52,13 @@ def _to_array_with_correct_type(v: Any) -> np.ndarray:
     if isinstance(v, np.ndarray) and issubclass(v.dtype.type, (np.bool_, np.number)):
         return v  # most often case
     # convert the value to np.ndarray
-    # convert to np.object data type if neither bool nor number
+    # convert to object data type if neither bool nor number
     # raises an exception if array's elements are tensors themself
     v = np.asanyarray(v)
     if not issubclass(v.dtype.type, (np.bool_, np.number)):
-        v = v.astype(np.object)
-    if v.dtype == np.object:
-        # scalar ndarray with np.object data type is very annoying
+        v = v.astype(object)
+    if v.dtype == object:
+        # scalar ndarray with object data type is very annoying
         # a=np.array([np.array({}, dtype=object), np.array({}, dtype=object)])
         # a is not array([{}, {}], dtype=object), and a[0]={} results in
         # something very strange:
@@ -87,13 +89,11 @@ def _create_value(
     if has_shape:
         shape = (size, *inst.shape) if stack else (size, *inst.shape[1:])
     if isinstance(inst, np.ndarray):
-        if issubclass(inst.dtype.type, (np.bool_, np.number)):
-            target_type = inst.dtype.type
-        else:
-            target_type = np.object
+        target_type = inst.dtype.type if issubclass(
+            inst.dtype.type, (np.bool_, np.number)) else object
         return np.full(
             shape,
-            fill_value=None if target_type == np.object else 0,
+            fill_value=None if target_type == object else 0,
             dtype=target_type
         )
     elif isinstance(inst, torch.Tensor):
@@ -105,8 +105,8 @@ def _create_value(
         return zero_batch
     elif is_scalar:
         return _create_value(np.asarray(inst), size, stack=stack)
-    else:  # fall back to np.object
-        return np.array([None for _ in range(size)])
+    else:  # fall back to object
+        return np.array([None for _ in range(size)], object)
 
 
 def _assert_type_keys(keys: Iterable[str]) -> None:
@@ -187,7 +187,7 @@ def __init__(
                 for k, v in batch_dict.items():
                     self.__dict__[k] = _parse_value(v)
             elif _is_batch_set(batch_dict):
-                self.stack_(batch_dict)
+                self.stack_(batch_dict)  # type: ignore
         if len(kwargs) > 0:
             self.__init__(kwargs, copy=copy)  # type: ignore
 
@@ -223,9 +223,7 @@ def __setstate__(self, state: Dict[str, Any]) -> None:
         """
         self.__init__(**state)  # type: ignore
 
-    def __getitem__(
-        self, index: Union[str, slice, int, np.integer, np.ndarray, List[int]]
-    ) -> Any:
+    def __getitem__(self, index: Union[str, IndexType]) -> Any:
         """Return self[index]."""
         if isinstance(index, str):
             return self.__dict__[index]
@@ -241,11 +239,7 @@ def __getitem__(
         else:
             raise IndexError("Cannot access item from empty Batch object.")
 
-    def __setitem__(
-        self,
-        index: Union[str, slice, int, np.integer, np.ndarray, List[int]],
-        value: Any,
-    ) -> None:
+    def __setitem__(self, index: Union[str, IndexType], value: Any) -> None:
         """Assign value to self[index]."""
         value = _parse_value(value)
         if isinstance(index, str):
@@ -530,8 +524,7 @@ def stack_(self, batches: Sequence[Union[dict, "Batch"]], axis: int = 0) -> None
             elif all(isinstance(e, (Batch, dict)) for e in v):  # third often
                 self.__dict__[k] = Batch.stack(v, axis)
             else:  # most often case is np.ndarray
-                v = np.stack(v, axis)
-                self.__dict__[k] = _to_array_with_correct_type(v)
+                self.__dict__[k] = _to_array_with_correct_type(np.stack(v, axis))
         # all the keys
         keys_total = set.union(*[set(b.keys()) for b in batches])
         # keys that are reserved in all batches
@@ -587,9 +580,7 @@ def stack(batches: Sequence[Union[dict, "Batch"]], axis: int = 0) -> "Batch":
         batch.stack_(batches, axis)
         return batch
 
-    def empty_(
-        self, index: Union[str, slice, int, np.integer, np.ndarray, List[int]] = None
-    ) -> "Batch":
+    def empty_(self, index: Optional[Union[slice, IndexType]] = None) -> "Batch":
         """Return an empty Batch object with 0 or None filled.
 
         If "index" is specified, it will only reset the specific indexed-data.
@@ -620,7 +611,7 @@ def empty_(
             elif v is None:
                 continue
             elif isinstance(v, np.ndarray):
-                if v.dtype == np.object:
+                if v.dtype == object:
                     self.__dict__[k][index] = None
                 else:
                     self.__dict__[k][index] = 0
@@ -636,10 +627,7 @@ def empty_(
         return self
 
     @staticmethod
-    def empty(
-        batch: "Batch",
-        index: Union[str, slice, int, np.integer, np.ndarray, List[int]] = None,
-    ) -> "Batch":
+    def empty(batch: "Batch", index: Optional[IndexType] = None) -> "Batch":
         """Return an empty Batch object with 0 or None filled.
 
         The shape is the same as the given Batch.
diff --git a/tianshou/data/buffer/base.py b/tianshou/data/buffer/base.py
index 54c9f1cf6..4189207a9 100644
--- a/tianshou/data/buffer/base.py
+++ b/tianshou/data/buffer/base.py
@@ -115,9 +115,9 @@ def set_batch(self, batch: Batch) -> None:
     def unfinished_index(self) -> np.ndarray:
         """Return the index of unfinished episode."""
         last = (self._index - 1) % self._size if self._size else 0
-        return np.array([last] if not self.done[last] and self._size else [], np.int)
+        return np.array([last] if not self.done[last] and self._size else [], int)
 
-    def prev(self, index: Union[int, np.integer, np.ndarray]) -> np.ndarray:
+    def prev(self, index: Union[int, np.ndarray]) -> np.ndarray:
         """Return the index of previous transition.
 
         The index won't be modified if it is the beginning of an episode.
@@ -126,7 +126,7 @@ def prev(self, index: Union[int, np.integer, np.ndarray]) -> np.ndarray:
         end_flag = self.done[index] | (index == self.last_index[0])
         return (index + end_flag) % self._size
 
-    def next(self, index: Union[int, np.integer, np.ndarray]) -> np.ndarray:
+    def next(self, index: Union[int, np.ndarray]) -> np.ndarray:
         """Return the index of next transition.
 
         The index won't be modified if it is the end of an episode.
@@ -140,12 +140,12 @@ def update(self, buffer: "ReplayBuffer") -> np.ndarray:
         Return the updated indices. If update fails, return an empty array.
         """
         if len(buffer) == 0 or self.maxsize == 0:
-            return np.array([], np.int)
+            return np.array([], int)
         stack_num, buffer.stack_num = buffer.stack_num, 1
         from_indices = buffer.sample_index(0)  # get all available indices
         buffer.stack_num = stack_num
         if len(from_indices) == 0:
-            return np.array([], np.int)
+            return np.array([], int)
         to_indices = []
         for _ in range(len(from_indices)):
             to_indices.append(self._index)
@@ -224,8 +224,8 @@ def add(
             self._meta[ptr] = batch
         except ValueError:
             stack = not stacked_batch
-            batch.rew = batch.rew.astype(np.float)
-            batch.done = batch.done.astype(np.bool_)
+            batch.rew = batch.rew.astype(float)
+            batch.done = batch.done.astype(bool)
             if self._meta.is_empty():
                 self._meta = _create_value(  # type: ignore
                     batch, self.maxsize, stack)
@@ -248,10 +248,10 @@ def sample_index(self, batch_size: int) -> np.ndarray:
                     [np.arange(self._index, self._size), np.arange(self._index)]
                 )
             else:
-                return np.array([], np.int)
+                return np.array([], int)
         else:
             if batch_size < 0:
-                return np.array([], np.int)
+                return np.array([], int)
             all_indices = prev_indices = np.concatenate(
                 [np.arange(self._index, self._size), np.arange(self._index)]
             )
@@ -275,9 +275,9 @@ def sample(self, batch_size: int) -> Tuple[Batch, np.ndarray]:
 
     def get(
         self,
-        index: Union[int, np.integer, np.ndarray],
+        index: Union[int, List[int], np.ndarray],
         key: str,
-        default_value: Optional[Any] = None,
+        default_value: Any = None,
         stack_num: Optional[int] = None,
     ) -> Union[Batch, np.ndarray]:
         """Return the stacked result.
@@ -303,7 +303,7 @@ def get(
             if isinstance(index, list):
                 indice = np.array(index)
             else:
-                indice = index
+                indice = index  # type: ignore
             for _ in range(stack_num):
                 stack = [val[indice]] + stack
                 indice = self.prev(indice)
@@ -316,30 +316,31 @@ def get(
                 raise e  # val != Batch()
             return Batch()
 
-    def __getitem__(self, index: Union[slice, int, np.integer, np.ndarray]) -> Batch:
+    def __getitem__(self, index: Union[slice, int, List[int], np.ndarray]) -> Batch:
         """Return a data batch: self[index].
 
         If stack_num is larger than 1, return the stacked obs and obs_next with shape
         (batch, len, ...).
         """
         if isinstance(index, slice):  # change slice to np array
-            if index == slice(None):  # buffer[:] will get all available data
-                index = self.sample_index(0)
-            else:
-                index = self._indices[:len(self)][index]
+            # buffer[:] will get all available data
+            indice = self.sample_index(0) if index == slice(None) \
+                else self._indices[:len(self)][index]
+        else:
+            indice = index
         # raise KeyError first instead of AttributeError,
         # to support np.array([ReplayBuffer()])
-        obs = self.get(index, "obs")
+        obs = self.get(indice, "obs")
         if self._save_obs_next:
-            obs_next = self.get(index, "obs_next", Batch())
+            obs_next = self.get(indice, "obs_next", Batch())
         else:
-            obs_next = self.get(self.next(index), "obs", Batch())
+            obs_next = self.get(self.next(indice), "obs", Batch())
         return Batch(
             obs=obs,
-            act=self.act[index],
-            rew=self.rew[index],
-            done=self.done[index],
+            act=self.act[indice],
+            rew=self.rew[indice],
+            done=self.done[indice],
             obs_next=obs_next,
-            info=self.get(index, "info", Batch()),
-            policy=self.get(index, "policy", Batch()),
+            info=self.get(indice, "info", Batch()),
+            policy=self.get(indice, "policy", Batch()),
         )
diff --git a/tianshou/data/buffer/cached.py b/tianshou/data/buffer/cached.py
index acbae6f9a..49bb33bcf 100644
--- a/tianshou/data/buffer/cached.py
+++ b/tianshou/data/buffer/cached.py
@@ -58,14 +58,14 @@ def add(
         cached_buffer_ids[i]th cached buffer's corresponding episode result.
         """
         if buffer_ids is None:
-            buffer_ids = np.arange(1, 1 + self.cached_buffer_num)
+            buf_arr = np.arange(1, 1 + self.cached_buffer_num)
         else:  # make sure it is np.ndarray
-            buffer_ids = np.asarray(buffer_ids) + 1
-        ptr, ep_rew, ep_len, ep_idx = super().add(batch, buffer_ids=buffer_ids)
+            buf_arr = np.asarray(buffer_ids) + 1
+        ptr, ep_rew, ep_len, ep_idx = super().add(batch, buffer_ids=buf_arr)
         # find the terminated episode, move data from cached buf to main buf
         updated_ptr, updated_ep_idx = [], []
-        done = batch.done.astype(np.bool_)
-        for buffer_idx in buffer_ids[done]:
+        done = batch.done.astype(bool)
+        for buffer_idx in buf_arr[done]:
             index = self.main_buffer.update(self.buffers[buffer_idx])
             if len(index) == 0:  # unsuccessful move, replace with -1
                 index = [-1]
diff --git a/tianshou/data/buffer/manager.py b/tianshou/data/buffer/manager.py
index fa9db2556..3258b1203 100644
--- a/tianshou/data/buffer/manager.py
+++ b/tianshou/data/buffer/manager.py
@@ -22,7 +22,7 @@ class ReplayBufferManager(ReplayBuffer):
 
     def __init__(self, buffer_list: List[ReplayBuffer]) -> None:
         self.buffer_num = len(buffer_list)
-        self.buffers = np.array(buffer_list, dtype=np.object)
+        self.buffers = np.array(buffer_list, dtype=object)
         offset, size = [], 0
         buffer_type = type(self.buffers[0])
         kwargs = self.buffers[0].options
@@ -46,7 +46,7 @@ def _compile(self) -> None:
         _next_index(index, offset, done, last, lens)
 
     def __len__(self) -> int:
-        return self._lengths.sum()
+        return int(self._lengths.sum())
 
     def reset(self, keep_statistics: bool = False) -> None:
         self.last_index = self._offset.copy()
@@ -68,7 +68,7 @@ def unfinished_index(self) -> np.ndarray:
             for offset, buf in zip(self._offset, self.buffers)
         ])
 
-    def prev(self, index: Union[int, np.integer, np.ndarray]) -> np.ndarray:
+    def prev(self, index: Union[int, np.ndarray]) -> np.ndarray:
         if isinstance(index, (list, np.ndarray)):
             return _prev_index(np.asarray(index), self._extend_offset,
                                self.done, self.last_index, self._lengths)
@@ -76,7 +76,7 @@ def prev(self, index: Union[int, np.integer, np.ndarray]) -> np.ndarray:
             return _prev_index(np.array([index]), self._extend_offset,
                                self.done, self.last_index, self._lengths)[0]
 
-    def next(self, index: Union[int, np.integer, np.ndarray]) -> np.ndarray:
+    def next(self, index: Union[int, np.ndarray]) -> np.ndarray:
         if isinstance(index, (list, np.ndarray)):
             return _next_index(np.asarray(index), self._extend_offset,
                                self.done, self.last_index, self._lengths)
@@ -130,8 +130,8 @@ def add(
         try:
             self._meta[ptrs] = batch
         except ValueError:
-            batch.rew = batch.rew.astype(np.float)
-            batch.done = batch.done.astype(np.bool_)
+            batch.rew = batch.rew.astype(float)
+            batch.done = batch.done.astype(bool)
             if self._meta.is_empty():
                 self._meta = _create_value(  # type: ignore
                     batch, self.maxsize, stack=False)
@@ -143,7 +143,7 @@ def add(
 
     def sample_index(self, batch_size: int) -> np.ndarray:
         if batch_size < 0:
-            return np.array([], np.int)
+            return np.array([], int)
         if self._sample_avail and self.stack_num > 1:
             all_indices = np.concatenate([
                 buf.sample_index(0) + offset
@@ -154,7 +154,7 @@ def sample_index(self, batch_size: int) -> np.ndarray:
             else:
                 return np.random.choice(all_indices, batch_size)
         if batch_size == 0:  # get all available indices
-            sample_num = np.zeros(self.buffer_num, np.int)
+            sample_num = np.zeros(self.buffer_num, int)
         else:
             buffer_idx = np.random.choice(
                 self.buffer_num, batch_size, p=self._lengths / self._lengths.sum()
diff --git a/tianshou/data/buffer/prio.py b/tianshou/data/buffer/prio.py
index 46c0be5e4..e5c490926 100644
--- a/tianshou/data/buffer/prio.py
+++ b/tianshou/data/buffer/prio.py
@@ -34,6 +34,7 @@ def init_weight(self, index: Union[int, np.ndarray]) -> None:
     def update(self, buffer: ReplayBuffer) -> np.ndarray:
         indices = super().update(buffer)
         self.init_weight(indices)
+        return indices
 
     def add(
         self, batch: Batch, buffer_ids: Optional[Union[np.ndarray, List[int]]] = None
@@ -45,13 +46,11 @@ def add(
     def sample_index(self, batch_size: int) -> np.ndarray:
         if batch_size > 0 and len(self) > 0:
             scalar = np.random.rand(batch_size) * self.weight.reduce()
-            return self.weight.get_prefix_sum_idx(scalar)
+            return self.weight.get_prefix_sum_idx(scalar)  # type: ignore
         else:
             return super().sample_index(batch_size)
 
-    def get_weight(
-        self, index: Union[slice, int, np.integer, np.ndarray]
-    ) -> np.ndarray:
+    def get_weight(self, index: Union[int, np.ndarray]) -> Union[float, np.ndarray]:
         """Get the importance sampling weight.
 
         The "weight" in the returned Batch is the weight on loss function to de-bias
@@ -76,7 +75,13 @@ def update_weight(
         self._max_prio = max(self._max_prio, weight.max())
         self._min_prio = min(self._min_prio, weight.min())
 
-    def __getitem__(self, index: Union[slice, int, np.integer, np.ndarray]) -> Batch:
-        batch = super().__getitem__(index)
-        batch.weight = self.get_weight(index)
+    def __getitem__(self, index: Union[slice, int, List[int], np.ndarray]) -> Batch:
+        if isinstance(index, slice):  # change slice to np array
+            # buffer[:] will get all available data
+            indice = self.sample_index(0) if index == slice(None) \
+                else self._indices[:len(self)][index]
+        else:
+            indice = index
+        batch = super().__getitem__(indice)
+        batch.weight = self.get_weight(indice)
         return batch
diff --git a/tianshou/data/collector.py b/tianshou/data/collector.py
index bf7399080..37ddabf50 100644
--- a/tianshou/data/collector.py
+++ b/tianshou/data/collector.py
@@ -123,7 +123,7 @@ def _reset_state(self, id: Union[int, List[int]]) -> None:
             if isinstance(state, torch.Tensor):
                 state[id].zero_()
             elif isinstance(state, np.ndarray):
-                state[id] = None if state.dtype == np.object else 0
+                state[id] = None if state.dtype == object else 0
             elif isinstance(state, Batch):
                 state.empty_(id)
 
@@ -266,7 +266,7 @@ def collect(
                 if n_episode:
                     surplus_env_num = len(ready_env_ids) - (n_episode - episode_count)
                     if surplus_env_num > 0:
-                        mask = np.ones_like(ready_env_ids, np.bool)
+                        mask = np.ones_like(ready_env_ids, dtype=bool)
                         mask[env_ind_local[:surplus_env_num]] = False
                         ready_env_ids = ready_env_ids[mask]
                         self.data = self.data[mask]
@@ -291,7 +291,7 @@ def collect(
             rews, lens, idxs = list(map(
                 np.concatenate, [episode_rews, episode_lens, episode_start_indices]))
         else:
-            rews, lens, idxs = np.array([]), np.array([], np.int), np.array([], np.int)
+            rews, lens, idxs = np.array([]), np.array([], int), np.array([], int)
 
         return {
             "n/ep": episode_count,
@@ -493,7 +493,7 @@ def collect(
             rews, lens, idxs = list(map(
                 np.concatenate, [episode_rews, episode_lens, episode_start_indices]))
         else:
-            rews, lens, idxs = np.array([]), np.array([], np.int), np.array([], np.int)
+            rews, lens, idxs = np.array([]), np.array([], int), np.array([], int)
 
         return {
             "n/ep": episode_count,
diff --git a/tianshou/data/utils/converter.py b/tianshou/data/utils/converter.py
index 52b0744cf..9f7d88a82 100644
--- a/tianshou/data/utils/converter.py
+++ b/tianshou/data/utils/converter.py
@@ -4,15 +4,12 @@
 import numpy as np
 from copy import deepcopy
 from numbers import Number
-from typing import Dict, Union, Optional
+from typing import Any, Dict, Union, Optional
 
 from tianshou.data.batch import _parse_value, Batch
 
 
-def to_numpy(
-    x: Optional[Union[Batch, dict, list, tuple, np.number, np.bool_, Number,
-                      np.ndarray, torch.Tensor]]
-) -> Union[Batch, dict, list, tuple, np.ndarray]:
+def to_numpy(x: Any) -> Union[Batch, np.ndarray]:
     """Return an object without torch.Tensor."""
     if isinstance(x, torch.Tensor):  # most often case
         return x.detach().cpu().numpy()
@@ -21,28 +18,22 @@ def to_numpy(
     elif isinstance(x, (np.number, np.bool_, Number)):
         return np.asanyarray(x)
     elif x is None:
-        return np.array(None, dtype=np.object)
-    elif isinstance(x, Batch):
-        x = deepcopy(x)
+        return np.array(None, dtype=object)
+    elif isinstance(x, (dict, Batch)):
+        x = Batch(x) if isinstance(x, dict) else deepcopy(x)
         x.to_numpy()
         return x
-    elif isinstance(x, dict):
-        return {k: to_numpy(v) for k, v in x.items()}
     elif isinstance(x, (list, tuple)):
-        try:
-            return to_numpy(_parse_value(x))
-        except TypeError:
-            return [to_numpy(e) for e in x]
+        return to_numpy(_parse_value(x))
     else:  # fallback
         return np.asanyarray(x)
 
 
 def to_torch(
-    x: Union[Batch, dict, list, tuple, np.number, np.bool_, Number, np.ndarray,
-             torch.Tensor],
+    x: Any,
     dtype: Optional[torch.dtype] = None,
     device: Union[str, int, torch.device] = "cpu",
-) -> Union[Batch, dict, list, tuple, torch.Tensor]:
+) -> Union[Batch, torch.Tensor]:
     """Return an object without np.ndarray."""
     if isinstance(x, np.ndarray) and issubclass(
         x.dtype.type, (np.bool_, np.number)
@@ -57,25 +48,17 @@ def to_torch(
         return x.to(device)  # type: ignore
     elif isinstance(x, (np.number, np.bool_, Number)):
         return to_torch(np.asanyarray(x), dtype, device)
-    elif isinstance(x, dict):
-        return {k: to_torch(v, dtype, device) for k, v in x.items()}
-    elif isinstance(x, Batch):
-        x = deepcopy(x)
+    elif isinstance(x, (dict, Batch)):
+        x = Batch(x, copy=True) if isinstance(x, dict) else deepcopy(x)
         x.to_torch(dtype, device)
         return x
     elif isinstance(x, (list, tuple)):
-        try:
-            return to_torch(_parse_value(x), dtype, device)
-        except TypeError:
-            return [to_torch(e, dtype, device) for e in x]
+        return to_torch(_parse_value(x), dtype, device)
     else:  # fallback
         raise TypeError(f"object {x} cannot be converted to torch.")
 
 
-def to_torch_as(
-    x: Union[Batch, dict, list, tuple, np.ndarray, torch.Tensor],
-    y: torch.Tensor,
-) -> Union[Batch, dict, list, tuple, torch.Tensor]:
+def to_torch_as(x: Any, y: torch.Tensor) -> Union[Batch, torch.Tensor]:
     """Return an object without np.ndarray.
 
     Same as ``to_torch(x, dtype=y.dtype, device=y.device)``.
@@ -147,25 +130,20 @@ def to_hdf5_via_pickle(x: object, y: h5py.Group, key: str) -> None:
             y[k].attrs["__data_type__"] = v.__class__.__name__
 
 
-def from_hdf5(
-    x: h5py.Group, device: Optional[str] = None
-) -> Hdf5ConvertibleType:
+def from_hdf5(x: h5py.Group, device: Optional[str] = None) -> Hdf5ConvertibleValues:
     """Restore object from HDF5 group."""
     if isinstance(x, h5py.Dataset):
         # handle datasets
         if x.attrs["__data_type__"] == "ndarray":
-            y = np.array(x)
+            return np.array(x)
         elif x.attrs["__data_type__"] == "Tensor":
-            y = torch.tensor(x, device=device)
+            return torch.tensor(x, device=device)
         else:
-            y = pickle.loads(x[()])
+            return pickle.loads(x[()])
     else:
         # handle groups representing a dict or a Batch
-        y = {k: v for k, v in x.attrs.items() if k != "__data_type__"}
+        y = dict(x.attrs.items())
+        data_type = y.pop("__data_type__", None)
         for k, v in x.items():
             y[k] = from_hdf5(v, device)
-        if "__data_type__" in x.attrs:
-            # if dictionary represents Batch, convert to Batch
-            if x.attrs["__data_type__"] == "Batch":
-                y = Batch(y)
-    return y
+        return Batch(y) if data_type == "Batch" else y
diff --git a/tianshou/env/venvs.py b/tianshou/env/venvs.py
index a15a4e26a..b2fc73b33 100644
--- a/tianshou/env/venvs.py
+++ b/tianshou/env/venvs.py
@@ -140,12 +140,10 @@ def _wrap_id(
         self, id: Optional[Union[int, List[int], np.ndarray]] = None
     ) -> Union[List[int], np.ndarray]:
         if id is None:
-            id = list(range(self.env_num))
-        elif np.isscalar(id):
-            id = [id]
-        return id
+            return list(range(self.env_num))
+        return [id] if np.isscalar(id) else id  # type: ignore
 
-    def _assert_id(self, id: List[int]) -> None:
+    def _assert_id(self, id: Union[List[int], np.ndarray]) -> None:
         for i in id:
             assert i not in self.waiting_id, \
                 f"Cannot interact with environment {i} which is stepping now."
@@ -291,7 +289,7 @@ def normalize_obs(self, obs: np.ndarray) -> np.ndarray:
             clip_max = 10.0  # this magic number is from openai baselines
             # see baselines/common/vec_env/vec_normalize.py#L10
             obs = (obs - self.obs_rms.mean) / np.sqrt(self.obs_rms.var + self.__eps)
-            obs = np.clip(obs, -clip_max, clip_max)
+            obs = np.clip(obs, -clip_max, clip_max)  # type: ignore
         return obs
 
     def __del__(self) -> None:
diff --git a/tianshou/env/worker/base.py b/tianshou/env/worker/base.py
index d22d60b62..dbf350a33 100644
--- a/tianshou/env/worker/base.py
+++ b/tianshou/env/worker/base.py
@@ -25,9 +25,7 @@ def reset(self) -> Any:
     def send_action(self, action: np.ndarray) -> None:
         pass
 
-    def get_result(
-        self,
-    ) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]:
+    def get_result(self) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]:
         return self.result
 
     def step(
@@ -45,9 +43,7 @@ def step(
 
     @staticmethod
     def wait(
-        workers: List["EnvWorker"],
-        wait_num: int,
-        timeout: Optional[float] = None,
+        workers: List["EnvWorker"], wait_num: int, timeout: Optional[float] = None
     ) -> List["EnvWorker"]:
         """Given a list of workers, return those ready ones."""
         raise NotImplementedError
diff --git a/tianshou/env/worker/dummy.py b/tianshou/env/worker/dummy.py
index eafa690b1..d0579d162 100644
--- a/tianshou/env/worker/dummy.py
+++ b/tianshou/env/worker/dummy.py
@@ -20,9 +20,7 @@ def reset(self) -> Any:
 
     @staticmethod
     def wait(  # type: ignore
-        workers: List["DummyEnvWorker"],
-        wait_num: int,
-        timeout: Optional[float] = None,
+        workers: List["DummyEnvWorker"], wait_num: int, timeout: Optional[float] = None
     ) -> List["DummyEnvWorker"]:
         # Sequential EnvWorker objects are always ready
         return workers
diff --git a/tianshou/env/worker/ray.py b/tianshou/env/worker/ray.py
index 8139ed9d5..af7285b22 100644
--- a/tianshou/env/worker/ray.py
+++ b/tianshou/env/worker/ray.py
@@ -25,9 +25,7 @@ def reset(self) -> Any:
 
     @staticmethod
     def wait(  # type: ignore
-        workers: List["RayEnvWorker"],
-        wait_num: int,
-        timeout: Optional[float] = None,
+        workers: List["RayEnvWorker"], wait_num: int, timeout: Optional[float] = None
     ) -> List["RayEnvWorker"]:
         results = [x.result for x in workers]
         ready_results, _ = ray.wait(results, num_returns=wait_num, timeout=timeout)
diff --git a/tianshou/env/worker/subproc.py b/tianshou/env/worker/subproc.py
index 822d65ccf..8b89b6c34 100644
--- a/tianshou/env/worker/subproc.py
+++ b/tianshou/env/worker/subproc.py
@@ -12,7 +12,6 @@
 
 
 _NP_TO_CT = {
-    np.bool: ctypes.c_bool,
     np.bool_: ctypes.c_bool,
     np.uint8: ctypes.c_uint8,
     np.uint16: ctypes.c_uint16,
@@ -31,7 +30,7 @@ class ShArray:
     """Wrapper of multiprocessing Array."""
 
     def __init__(self, dtype: np.generic, shape: Tuple[int]) -> None:
-        self.arr = Array(_NP_TO_CT[dtype.type], int(np.prod(shape)))
+        self.arr = Array(_NP_TO_CT[dtype.type], int(np.prod(shape)))  # type: ignore
         self.dtype = dtype
         self.shape = shape
 
@@ -64,8 +63,7 @@ def _worker(
     obs_bufs: Optional[Union[dict, tuple, ShArray]] = None,
 ) -> None:
     def _encode_obs(
-        obs: Union[dict, tuple, np.ndarray],
-        buffer: Union[dict, tuple, ShArray],
+        obs: Union[dict, tuple, np.ndarray], buffer: Union[dict, tuple, ShArray]
     ) -> None:
         if isinstance(obs, np.ndarray) and isinstance(buffer, ShArray):
             buffer.save(obs)
diff --git a/tianshou/exploration/random.py b/tianshou/exploration/random.py
index 2e495dc35..a59085809 100644
--- a/tianshou/exploration/random.py
+++ b/tianshou/exploration/random.py
@@ -68,9 +68,7 @@ def reset(self) -> None:
         """Reset to the initial state."""
         self._x = self._x0
 
-    def __call__(
-        self, size: Sequence[int], mu: Optional[float] = None
-    ) -> np.ndarray:
+    def __call__(self, size: Sequence[int], mu: Optional[float] = None) -> np.ndarray:
         """Generate new noise.
 
         Return an numpy array which size is equal to ``size``.
@@ -82,4 +80,4 @@ def __call__(
             mu = self._mu
         r = self._beta * np.random.normal(size=size)
         self._x = self._x + self._alpha * (mu - self._x) + r
-        return self._x
+        return self._x  # type: ignore
diff --git a/tianshou/policy/base.py b/tianshou/policy/base.py
index b29706575..238aacee5 100644
--- a/tianshou/policy/base.py
+++ b/tianshou/policy/base.py
@@ -142,14 +142,14 @@ def map_action(self, act: Union[Batch, np.ndarray]) -> Union[Batch, np.ndarray]:
                 isinstance(act, np.ndarray):
             # currently this action mapping only supports np.ndarray action
             if self.action_bound_method == "clip":
-                act = np.clip(act, -1.0, 1.0)
+                act = np.clip(act, -1.0, 1.0)  # type: ignore
             elif self.action_bound_method == "tanh":
                 act = np.tanh(act)
             if self.action_scaling:
-                assert np.all(act >= -1.0) and np.all(act <= 1.0), \
+                assert np.min(act) >= -1.0 and np.max(act) <= 1.0, \
                     "action scaling only accepts raw action range = [-1, 1]"
                 low, high = self.action_space.low, self.action_space.high
-                act = low + (high - low) * (act + 1.0) / 2.0
+                act = low + (high - low) * (act + 1.0) / 2.0  # type: ignore
         return act
 
     def process_fn(
@@ -241,9 +241,9 @@ def value_mask(buffer: ReplayBuffer, indice: np.ndarray) -> np.ndarray:
         :return: A bool type numpy.ndarray in the same shape with indice. "True" means
             "obs_next" of that buffer[indice] is valid.
         """
-        mask = ~buffer.done[indice].astype(np.bool)
-        # info['TimeLimit.truncated'] will be set to True if 'done' flag is generated
-        # because of timelimit of environments. Checkout gym.wrappers.TimeLimit.
+        mask = ~buffer.done[indice]
+        # info["TimeLimit.truncated"] will be True if "done" flag is generated by
+        # timelimit of environments. Checkout gym.wrappers.TimeLimit.
         if hasattr(buffer, 'info') and 'TimeLimit.truncated' in buffer.info:
             mask = mask | buffer.info['TimeLimit.truncated'][indice]
         return mask
@@ -281,7 +281,8 @@ def compute_episodic_return(
             assert np.isclose(gae_lambda, 1.0)
             v_s_ = np.zeros_like(rew)
         else:
-            v_s_ = to_numpy(v_s_.flatten()) * BasePolicy.value_mask(buffer, indice)
+            v_s_ = to_numpy(v_s_.flatten())  # type: ignore
+            v_s_ = v_s_ * BasePolicy.value_mask(buffer, indice)
         v_s = np.roll(v_s_, 1) if v_s is None else to_numpy(v_s.flatten())
 
         end_flag = batch.done.copy()
diff --git a/tianshou/policy/imitation/discrete_bcq.py b/tianshou/policy/imitation/discrete_bcq.py
index 5d7082243..38ae0c4f9 100644
--- a/tianshou/policy/imitation/discrete_bcq.py
+++ b/tianshou/policy/imitation/discrete_bcq.py
@@ -58,7 +58,7 @@ def __init__(
         else:
             self._log_tau = -np.inf
         assert 0.0 <= eval_eps < 1.0
-        self._eps = eval_eps
+        self.eps = eval_eps
         self._weight_reg = imitation_logits_penalty
 
     def train(self, mode: bool = True) -> "DiscreteBCQPolicy":
@@ -96,15 +96,6 @@ def forward(  # type: ignore
         return Batch(act=action, state=state, q_value=q_value,
                      imitation_logits=imitation_logits)
 
-    def exploration_noise(self, act: np.ndarray, batch: Batch) -> np.ndarray:
-        # add eps to act
-        if not np.isclose(self._eps, 0.0):
-            bsz = len(act)
-            mask = np.random.rand(bsz) < self._eps
-            act_rand = np.random.randint(self.max_action_num, size=[bsz])
-            act[mask] = act_rand[mask]
-        return act
-
     def learn(self, batch: Batch, **kwargs: Any) -> Dict[str, float]:
         if self._iter % self._freq == 0:
             self.sync_weight()
diff --git a/tianshou/policy/modelbase/psrl.py b/tianshou/policy/modelbase/psrl.py
index 4a565976f..b438dbcbc 100644
--- a/tianshou/policy/modelbase/psrl.py
+++ b/tianshou/policy/modelbase/psrl.py
@@ -1,6 +1,6 @@
 import torch
 import numpy as np
-from typing import Any, Dict, Union, Optional
+from typing import Any, Dict, Tuple, Union, Optional
 
 from tianshou.data import Batch
 from tianshou.policy import BasePolicy
@@ -100,7 +100,7 @@ def value_iteration(
         discount_factor: float,
         eps: float,
         value: np.ndarray,
-    ) -> np.ndarray:
+    ) -> Tuple[np.ndarray, np.ndarray]:
         """Value iteration solver for MDPs.
 
         :param np.ndarray trans_prob: transition probabilities, with shape
@@ -126,7 +126,7 @@ def value_iteration(
     def __call__(
         self,
         obs: np.ndarray,
-        state: Optional[Any] = None,
+        state: Any = None,
         info: Dict[str, Any] = {},
     ) -> np.ndarray:
         if not self.updated:
@@ -215,6 +215,6 @@ def learn(
                 rew_count[obs_next, :] += 1
         self.model.observe(trans_count, rew_sum, rew_square_sum, rew_count)
         return {
-            "psrl/rew_mean": self.model.rew_mean.mean(),
-            "psrl/rew_std": self.model.rew_std.mean(),
+            "psrl/rew_mean": float(self.model.rew_mean.mean()),
+            "psrl/rew_std": float(self.model.rew_std.mean()),
         }
diff --git a/tianshou/policy/modelfree/a2c.py b/tianshou/policy/modelfree/a2c.py
index 9396971df..433810d1f 100644
--- a/tianshou/policy/modelfree/a2c.py
+++ b/tianshou/policy/modelfree/a2c.py
@@ -5,7 +5,7 @@
 from typing import Any, Dict, List, Type, Optional
 
 from tianshou.policy import PGPolicy
-from tianshou.data import Batch, ReplayBuffer, to_numpy, to_torch_as
+from tianshou.data import Batch, ReplayBuffer, to_torch_as
 
 
 class A2CPolicy(PGPolicy):
@@ -84,8 +84,8 @@ def _compute_returns(
                 v_s.append(self.critic(b.obs))
                 v_s_.append(self.critic(b.obs_next))
         batch.v_s = torch.cat(v_s, dim=0).flatten()  # old value
-        v_s = to_numpy(batch.v_s)
-        v_s_ = to_numpy(torch.cat(v_s_, dim=0).flatten())
+        v_s = batch.v_s.cpu().numpy()
+        v_s_ = torch.cat(v_s_, dim=0).flatten().cpu().numpy()
         # when normalizing values, we do not minus self.ret_rms.mean to be numerically
         # consistent with OPENAI baselines' value normalization pipeline. Emperical
         # study also shows that "minus mean" will harm performances a tiny little bit
diff --git a/tianshou/policy/modelfree/ddpg.py b/tianshou/policy/modelfree/ddpg.py
index 7d582fbae..324467fc1 100644
--- a/tianshou/policy/modelfree/ddpg.py
+++ b/tianshou/policy/modelfree/ddpg.py
@@ -1,4 +1,5 @@
 import torch
+import warnings
 import numpy as np
 from copy import deepcopy
 from typing import Any, Dict, Tuple, Union, Optional
@@ -167,7 +168,12 @@ def learn(self, batch: Batch, **kwargs: Any) -> Dict[str, float]:
             "loss/critic": critic_loss.item(),
         }
 
-    def exploration_noise(self, act: np.ndarray, batch: Batch) -> np.ndarray:
-        if self._noise:
-            act = act + self._noise(act.shape)
+    def exploration_noise(
+        self, act: Union[np.ndarray, Batch], batch: Batch
+    ) -> Union[np.ndarray, Batch]:
+        if self._noise is None:
+            return act
+        if isinstance(act, np.ndarray):
+            return act + self._noise(act.shape)
+        warnings.warn("Cannot add exploration noise to non-numpy_array action.")
         return act
diff --git a/tianshou/policy/modelfree/dqn.py b/tianshou/policy/modelfree/dqn.py
index 5b9f463ed..5a4f663a6 100644
--- a/tianshou/policy/modelfree/dqn.py
+++ b/tianshou/policy/modelfree/dqn.py
@@ -168,8 +168,10 @@ def learn(self, batch: Batch, **kwargs: Any) -> Dict[str, float]:
         self._iter += 1
         return {"loss": loss.item()}
 
-    def exploration_noise(self, act: np.ndarray, batch: Batch) -> np.ndarray:
-        if not np.isclose(self.eps, 0.0):
+    def exploration_noise(
+        self, act: Union[np.ndarray, Batch], batch: Batch
+    ) -> Union[np.ndarray, Batch]:
+        if isinstance(act, np.ndarray) and not np.isclose(self.eps, 0.0):
             bsz = len(act)
             rand_mask = np.random.rand(bsz) < self.eps
             q = np.random.rand(bsz, self.max_action_num)  # [0, 1]
diff --git a/tianshou/policy/multiagent/mapolicy.py b/tianshou/policy/multiagent/mapolicy.py
index 7aa1f661c..176f86a36 100644
--- a/tianshou/policy/multiagent/mapolicy.py
+++ b/tianshou/policy/multiagent/mapolicy.py
@@ -1,5 +1,5 @@
 import numpy as np
-from typing import Any, Dict, List, Union, Optional
+from typing import Any, Dict, List, Tuple, Union, Optional
 
 from tianshou.policy import BasePolicy
 from tianshou.data import Batch, ReplayBuffer
@@ -71,7 +71,7 @@ def exploration_noise(
                 act[agent_index], batch[agent_index])
         return act
 
-    def forward(
+    def forward(  # type: ignore
         self,
         batch: Batch,
         state: Optional[Union[dict, Batch]] = None,
@@ -100,7 +100,8 @@ def forward(
                     "agent_n": xxx}
             }
         """
-        results = []
+        results: List[Tuple[bool, np.ndarray, Batch,
+                            Union[np.ndarray, Batch], Batch]] = []
         for policy in self.policies:
             # This part of code is difficult to understand.
             # Let's follow an example with two agents
@@ -112,7 +113,7 @@ def forward(
             agent_index = np.nonzero(batch.obs.agent_id == policy.agent_id)[0]
             if len(agent_index) == 0:
                 # (has_data, agent_index, out, act, state)
-                results.append((False, None, Batch(), None, Batch()))
+                results.append((False, np.array([-1]), Batch(), Batch(), Batch()))
                 continue
             tmp_batch = batch[agent_index]
             if isinstance(tmp_batch.rew, np.ndarray):
diff --git a/tianshou/utils/log_tools.py b/tianshou/utils/log_tools.py
index c50c8ebae..fcd1d5575 100644
--- a/tianshou/utils/log_tools.py
+++ b/tianshou/utils/log_tools.py
@@ -14,16 +14,12 @@ def __init__(self, writer: Any) -> None:
 
     @abstractmethod
     def write(
-        self,
-        key: str,
-        x: Union[Number, np.number, np.ndarray],
-        y: Union[Number, np.number, np.ndarray],
-        **kwargs: Any,
+        self, key: str, x: int, y: Union[Number, np.number, np.ndarray], **kwargs: Any
     ) -> None:
         """Specify how the writer is used to log data.
 
-        :param key: namespace which the input data tuple belongs to.
-        :param x: stands for the ordinate of the input data tuple.
+        :param str key: namespace which the input data tuple belongs to.
+        :param int x: stands for the ordinate of the input data tuple.
         :param y: stands for the abscissa of the input data tuple.
         """
         pass
@@ -84,11 +80,7 @@ def __init__(
         self.last_log_update_step = -1
 
     def write(
-        self,
-        key: str,
-        x: Union[Number, np.number, np.ndarray],
-        y: Union[Number, np.number, np.ndarray],
-        **kwargs: Any,
+        self, key: str, x: int, y: Union[Number, np.number, np.ndarray], **kwargs: Any
     ) -> None:
         self.writer.add_scalar(key, y, global_step=x)
 
@@ -149,11 +141,7 @@ def __init__(self) -> None:
         super().__init__(None)  # type: ignore
 
     def write(
-        self,
-        key: str,
-        x: Union[Number, np.number, np.ndarray],
-        y: Union[Number, np.number, np.ndarray],
-        **kwargs: Any,
+        self, key: str, x: int, y: Union[Number, np.number, np.ndarray], **kwargs: Any
     ) -> None:
         """The LazyLogger writes nothing."""
         pass
diff --git a/tianshou/utils/net/common.py b/tianshou/utils/net/common.py
index b41346e9a..664b488f0 100644
--- a/tianshou/utils/net/common.py
+++ b/tianshou/utils/net/common.py
@@ -50,8 +50,7 @@ def __init__(
         output_dim: int = 0,
         hidden_sizes: Sequence[int] = (),
         norm_layer: Optional[Union[ModuleType, Sequence[ModuleType]]] = None,
-        activation: Optional[Union[ModuleType, Sequence[ModuleType]]]
-        = nn.ReLU,
+        activation: Optional[Union[ModuleType, Sequence[ModuleType]]] = nn.ReLU,
         device: Optional[Union[str, int, torch.device]] = None,
     ) -> None:
         super().__init__()
@@ -139,7 +138,7 @@ class Net(nn.Module):
     def __init__(
         self,
         state_shape: Union[int, Sequence[int]],
-        action_shape: Optional[Union[int, Sequence[int]]] = 0,
+        action_shape: Union[int, Sequence[int]] = 0,
         hidden_sizes: Sequence[int] = (),
         norm_layer: Optional[ModuleType] = None,
         activation: Optional[ModuleType] = nn.ReLU,
@@ -153,8 +152,8 @@ def __init__(
         self.device = device
         self.softmax = softmax
         self.num_atoms = num_atoms
-        input_dim = np.prod(state_shape)
-        action_dim = np.prod(action_shape) * num_atoms
+        input_dim = int(np.prod(state_shape))
+        action_dim = int(np.prod(action_shape)) * num_atoms
         if concat:
             input_dim += action_dim
         self.use_dueling = dueling_param is not None
@@ -179,7 +178,7 @@ def __init__(
     def forward(
         self,
         s: Union[np.ndarray, torch.Tensor],
-        state: Optional[Any] = None,
+        state: Any = None,
         info: Dict[str, Any] = {},
     ) -> Tuple[torch.Tensor, Any]:
         """Mapping: s -> flatten (inside MLP)-> logits."""
@@ -221,8 +220,8 @@ def __init__(
             num_layers=layer_num,
             batch_first=True,
         )
-        self.fc1 = nn.Linear(np.prod(state_shape), hidden_layer_size)
-        self.fc2 = nn.Linear(hidden_layer_size, np.prod(action_shape))
+        self.fc1 = nn.Linear(int(np.prod(state_shape)), hidden_layer_size)
+        self.fc2 = nn.Linear(hidden_layer_size, int(np.prod(action_shape)))
 
     def forward(
         self,
diff --git a/tianshou/utils/net/continuous.py b/tianshou/utils/net/continuous.py
index a8f667532..36c178612 100644
--- a/tianshou/utils/net/continuous.py
+++ b/tianshou/utils/net/continuous.py
@@ -46,7 +46,7 @@ def __init__(
         super().__init__()
         self.device = device
         self.preprocess = preprocess_net
-        self.output_dim = np.prod(action_shape)
+        self.output_dim = int(np.prod(action_shape))
         input_dim = getattr(preprocess_net, "output_dim",
                             preprocess_net_output_dim)
         self.last = MLP(input_dim, self.output_dim,
@@ -56,7 +56,7 @@ def __init__(
     def forward(
         self,
         s: Union[np.ndarray, torch.Tensor],
-        state: Optional[Any] = None,
+        state: Any = None,
         info: Dict[str, Any] = {},
     ) -> Tuple[torch.Tensor, Any]:
         """Mapping: s -> logits -> action."""
@@ -162,7 +162,7 @@ def __init__(
         super().__init__()
         self.preprocess = preprocess_net
         self.device = device
-        self.output_dim = np.prod(action_shape)
+        self.output_dim = int(np.prod(action_shape))
         input_dim = getattr(preprocess_net, "output_dim",
                             preprocess_net_output_dim)
         self.mu = MLP(input_dim, self.output_dim,
@@ -179,7 +179,7 @@ def __init__(
     def forward(
         self,
         s: Union[np.ndarray, torch.Tensor],
-        state: Optional[Any] = None,
+        state: Any = None,
         info: Dict[str, Any] = {},
     ) -> Tuple[Tuple[torch.Tensor, torch.Tensor], Any]:
         """Mapping: s -> logits -> (mu, sigma)."""
@@ -219,12 +219,12 @@ def __init__(
         super().__init__()
         self.device = device
         self.nn = nn.LSTM(
-            input_size=np.prod(state_shape),
+            input_size=int(np.prod(state_shape)),
             hidden_size=hidden_layer_size,
             num_layers=layer_num,
             batch_first=True,
         )
-        output_dim = np.prod(action_shape)
+        output_dim = int(np.prod(action_shape))
         self.mu = nn.Linear(hidden_layer_size, output_dim)
         self._c_sigma = conditioned_sigma
         if conditioned_sigma:
@@ -293,12 +293,12 @@ def __init__(
         self.action_shape = action_shape
         self.device = device
         self.nn = nn.LSTM(
-            input_size=np.prod(state_shape),
+            input_size=int(np.prod(state_shape)),
             hidden_size=hidden_layer_size,
             num_layers=layer_num,
             batch_first=True,
         )
-        self.fc2 = nn.Linear(hidden_layer_size + np.prod(action_shape), 1)
+        self.fc2 = nn.Linear(hidden_layer_size + int(np.prod(action_shape)), 1)
 
     def forward(
         self,
diff --git a/tianshou/utils/net/discrete.py b/tianshou/utils/net/discrete.py
index fc7c9b002..ee1294f3e 100644
--- a/tianshou/utils/net/discrete.py
+++ b/tianshou/utils/net/discrete.py
@@ -45,7 +45,7 @@ def __init__(
         super().__init__()
         self.device = device
         self.preprocess = preprocess_net
-        self.output_dim = np.prod(action_shape)
+        self.output_dim = int(np.prod(action_shape))
         input_dim = getattr(preprocess_net, "output_dim",
                             preprocess_net_output_dim)
         self.last = MLP(input_dim, self.output_dim,
@@ -55,7 +55,7 @@ def __init__(
     def forward(
         self,
         s: Union[np.ndarray, torch.Tensor],
-        state: Optional[Any] = None,
+        state: Any = None,
         info: Dict[str, Any] = {},
     ) -> Tuple[torch.Tensor, Any]:
         r"""Mapping: s -> Q(s, \*)."""
diff --git a/tianshou/utils/statistics.py b/tianshou/utils/statistics.py
index 009ad4dc9..1ff1e00f4 100644
--- a/tianshou/utils/statistics.py
+++ b/tianshou/utils/statistics.py
@@ -3,8 +3,6 @@
 from numbers import Number
 from typing import List, Union
 
-from tianshou.data import to_numpy
-
 
 class MovAvg(object):
     """Class for moving average.
@@ -28,44 +26,43 @@ class MovAvg(object):
     def __init__(self, size: int = 100) -> None:
         super().__init__()
         self.size = size
-        self.cache: List[Union[Number, np.number]] = []
+        self.cache: List[np.number] = []
         self.banned = [np.inf, np.nan, -np.inf]
 
     def add(
         self, x: Union[Number, np.number, list, np.ndarray, torch.Tensor]
-    ) -> np.number:
+    ) -> float:
         """Add a scalar into :class:`MovAvg`.
 
         You can add ``torch.Tensor`` with only one element, a python scalar, or
         a list of python scalar.
         """
         if isinstance(x, torch.Tensor):
-            x = to_numpy(x.flatten())
-        if isinstance(x, list) or isinstance(x, np.ndarray):
-            for i in x:
-                if i not in self.banned:
-                    self.cache.append(i)
-        elif x not in self.banned:
-            self.cache.append(x)
+            x = x.flatten().cpu().numpy()
+        if np.isscalar(x):
+            x = [x]
+        for i in x:  # type: ignore
+            if i not in self.banned:
+                self.cache.append(i)
         if self.size > 0 and len(self.cache) > self.size:
             self.cache = self.cache[-self.size:]
         return self.get()
 
-    def get(self) -> np.number:
+    def get(self) -> float:
         """Get the average."""
         if len(self.cache) == 0:
-            return 0
-        return np.mean(self.cache)
+            return 0.0
+        return float(np.mean(self.cache))
 
-    def mean(self) -> np.number:
+    def mean(self) -> float:
         """Get the average. Same as :meth:`get`."""
         return self.get()
 
-    def std(self) -> np.number:
+    def std(self) -> float:
         """Get the standard deviation."""
         if len(self.cache) == 0:
-            return 0
-        return np.std(self.cache)
+            return 0.0
+        return float(np.std(self.cache))
 
 
 class RunningMeanStd(object):
@@ -74,8 +71,10 @@ class RunningMeanStd(object):
     https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Parallel_algorithm
     """
 
-    def __init__(self) -> None:
-        self.mean, self.var = 0.0, 1.0
+    def __init__(
+        self, mean: Union[float, np.ndarray] = 0.0, std: Union[float, np.ndarray] = 1.0
+    ) -> None:
+        self.mean, self.var = mean, std
         self.count = 0
 
     def update(self, x: np.ndarray) -> None:
@@ -92,5 +91,5 @@ def update(self, x: np.ndarray) -> None:
         m_2 = m_a + m_b + delta ** 2 * self.count * batch_count / total_count
         new_var = m_2 / total_count
 
-        self.mean, self.var = new_mean, new_var
+        self.mean, self.var = new_mean, new_var  # type: ignore
         self.count = total_count