thu-ml · MischaPanch · Aug 6, 2024 · Aug 1, 2024 · Aug 1, 2024 · Aug 1, 2024
diff --git a/docs/spelling_wordlist.txt b/docs/spelling_wordlist.txt
@@ -271,6 +271,8 @@ v_s_
 obs
 obs_next
 dtype
+iqm
+kwarg
 entrypoint
 interquantile
 init

diff --git a/examples/mujoco/mujoco_ppo_hl_multi.py b/examples/mujoco/mujoco_ppo_hl_multi.py
@@ -13,6 +13,7 @@
 """
 
 import os
+import warnings
 
 import torch
 
@@ -38,16 +39,25 @@
 
 def main(
     num_experiments: int = 5,
-    run_experiments_sequentially: bool = False,
+    run_experiments_sequentially: bool = True,
+    logger_type: str = "wandb",
 ) -> RLiableExperimentResult:
     """:param num_experiments: the number of experiments to run. The experiments differ exclusively in the seeds.
     :param run_experiments_sequentially: if True, the experiments are run sequentially, otherwise in parallel.
         If a single experiment is set to use all available CPU cores,
         it might be undesired to run multiple experiments in parallel on the same machine,
+    :param logger_type: the type of logger to use. Currently, "wandb" and "tensorboard" are supported.
     :return: an object containing rliable-based evaluation results
     """
+    if not run_experiments_sequentially and logger_type == "wandb":
+        warnings.warn(
+            "Parallel execution with wandb logger is still under development. Falling back to tensorboard.",
+        )
+        logger_type = "tensorboard"
+
     task = "Ant-v4"
-    persistence_dir = os.path.abspath(os.path.join("log", task, "ppo", datetime_tag()))
+    tag = datetime_tag()
+    persistence_dir = os.path.abspath(os.path.join("log", task, "ppo", tag))
 
     experiment_config = ExperimentConfig(persistence_base_dir=persistence_dir, watch=False)
 
@@ -72,6 +82,21 @@ def main(
 
     hidden_sizes = (64, 64)
 
+    match logger_type:
+        case "wandb":
+            job_type = f"ppo/{tag}"
+            logger_factory = LoggerFactoryDefault(
+                logger_type="wandb",
+                wandb_project="tianshou",
+                group=task,
+                job_type=job_type,
+                save_interval=1,
+            )
+        case "tensorboard":
+            logger_factory = LoggerFactoryDefault("tensorboard")
+        case _:
+            raise ValueError(f"Unknown logger type: {logger_type}")
+
     experiment_collection = (
         PPOExperimentBuilder(env_factory, experiment_config, sampling_config)
         .with_ppo_params(
@@ -95,7 +120,7 @@ def main(
         )
         .with_actor_factory_default(hidden_sizes, torch.nn.Tanh, continuous_unbounded=True)
         .with_critic_factory_default(hidden_sizes, torch.nn.Tanh)
-        .with_logger_factory(LoggerFactoryDefault("tensorboard"))
+        .with_logger_factory(logger_factory)
         .build_seeded_collection(num_experiments)
     )
 

diff --git a/tianshou/data/stats.py b/tianshou/data/stats.py
@@ -22,6 +22,8 @@ class SequenceSummaryStats(DataclassPPrintMixin):
 
     @classmethod
     def from_sequence(cls, sequence: Sequence[float | int] | np.ndarray) -> "SequenceSummaryStats":
+        if len(sequence) == 0:
+            return cls(mean=0.0, std=0.0, max=0.0, min=0.0)
         return cls(
             mean=float(np.mean(sequence)),
             std=float(np.std(sequence)),

diff --git a/tianshou/evaluation/launcher.py b/tianshou/evaluation/launcher.py
@@ -94,12 +94,11 @@ def _launch(self, experiments: Sequence[Experiment]) -> list[InfoStats | None]:
         successful_exp_stats = []
         failed_exps = []
         for exp in experiments:
-            for exp in experiments:
-                exp_stats = self._safe_execute(exp)
-                if exp_stats == "failed":
-                    failed_exps.append(exp)
-                else:
-                    successful_exp_stats.append(exp_stats)
+            exp_stats = self._safe_execute(exp)
+            if exp_stats == "failed":
+                failed_exps.append(exp)
+            else:
+                successful_exp_stats.append(exp_stats)
         # noinspection PyTypeChecker
         return self._return_from_successful_and_failed_exps(successful_exp_stats, failed_exps)
-Original file line number
+Diff line change
@@ Expand Up / @@ -271,6 +271,8 @@ v_s_ @@
     obs
     obs_next
     dtype
+    iqm
+    kwarg
     entrypoint
     interquantile
     init
@@ Expand Down @@