The target_q function of the ddpg policy sets the info in batch to None. Why does this happen? def _target_q(self, buffer: ReplayBuffer, indices: np.ndarray) -> torch.Tensor: obs_next_batch = Batch( obs=buffer[indices].obs_next, info=[None] * len(indices), #WHY? ) # obs_next: s_{t+n} return self.critic_old(obs_next_batch.obs, self(obs_next_batch, model="actor_old").act)