thu-ml · Trinkle23897 · Mar 22, 2021 · Mar 21, 2021 · Mar 22, 2021 · Mar 22, 2021
diff --git a/tianshou/policy/modelfree/a2c.py b/tianshou/policy/modelfree/a2c.py
@@ -38,6 +38,8 @@ class A2CPolicy(PGPolicy):
         squashing) for now, or empty string for no bounding. Default to "clip".
     :param Optional[gym.Space] action_space: env's action space, mandatory if you want
         to use option "action_scaling" or "action_bound_method". Default to None.
+    :param lr_scheduler: a learning rate scheduler that adjusts the learning rate in
+        optimizer in each policy.update(). Default to None (no lr_scheduler).
 
     .. seealso::
 
@@ -142,6 +144,10 @@ def learn(  # type: ignore
                 vf_losses.append(vf_loss.item())
                 ent_losses.append(ent_loss.item())
                 losses.append(loss.item())
+        # update learning rate if lr_scheduler is given
+        if self.lr_scheduler is not None:
+            self.lr_scheduler.step()
+
         return {
             "loss": losses,
             "loss/actor": actor_losses,

diff --git a/tianshou/policy/modelfree/pg.py b/tianshou/policy/modelfree/pg.py
@@ -22,6 +22,8 @@ class PGPolicy(BasePolicy):
         squashing) for now, or empty string for no bounding. Default to "clip".
     :param Optional[gym.Space] action_space: env's action space, mandatory if you want
         to use option "action_scaling" or "action_bound_method". Default to None.
+    :param lr_scheduler: a learning rate scheduler that adjusts the learning rate in
+        optimizer in each policy.update(). Default to None (no lr_scheduler).
 
     .. seealso::
 
@@ -38,13 +40,15 @@ def __init__(
         reward_normalization: bool = False,
         action_scaling: bool = True,
         action_bound_method: str = "clip",
+        lr_scheduler: Optional[torch.optim.lr_scheduler.LambdaLR] = None,
         **kwargs: Any,
     ) -> None:
         super().__init__(action_scaling=action_scaling,
                          action_bound_method=action_bound_method, **kwargs)
         if model is not None:
             self.model: torch.nn.Module = model
         self.optim = optim
+        self.lr_scheduler = lr_scheduler
         self.dist_fn = dist_fn
         assert 0.0 <= discount_factor <= 1.0, "discount factor should be in [0, 1]"
         self._gamma = discount_factor
@@ -110,6 +114,10 @@ def learn(  # type: ignore
                 loss.backward()
                 self.optim.step()
                 losses.append(loss.item())
+        # update learning rate if lr_scheduler is given
+        if self.lr_scheduler is not None:
+            self.lr_scheduler.step()
+
         return {"loss": losses}
 
     # def _vanilla_returns(self, batch):

diff --git a/tianshou/policy/modelfree/ppo.py b/tianshou/policy/modelfree/ppo.py
@@ -43,6 +43,8 @@ class PPOPolicy(PGPolicy):
         squashing) for now, or empty string for no bounding. Default to "clip".
     :param Optional[gym.Space] action_space: env's action space, mandatory if you want
         to use option "action_scaling" or "action_bound_method". Default to None.
+    :param lr_scheduler: a learning rate scheduler that adjusts the learning rate in
+        optimizer in each policy.update(). Default to None (no lr_scheduler).
 
     .. seealso::
 
@@ -179,6 +181,10 @@ def learn(  # type: ignore
                         list(self.actor.parameters()) + list(self.critic.parameters()),
                         self._max_grad_norm)
                 self.optim.step()
+        # update learning rate if lr_scheduler is given
+        if self.lr_scheduler is not None:
+            self.lr_scheduler.step()
+
         return {
             "loss": losses,
             "loss/clip": clip_losses,