The target_q function of the ddpg policy sets the info in batch to None. Why does this happen?
def target_q(self, buffer: ReplayBuffer, indices: np.ndarray) -> torch.Tensor:
obs_next_batch = Batch(
obs=buffer[indices].obs_next,
info=[None] * len(indices), #WHY?
) # obs_next: s{t+n}
return self.critic_old(obs_next_batch.obs, self(obs_next_batch, model="actor_old").act)