thu-ml · MischaPanch · Aug 1, 2025 · Sep 6, 2025 · Oct 13, 2025 · Oct 16, 2025
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -320,7 +320,7 @@ Our main test environment remains Python 3.11-based for the time being (see `poe
 - `highlevel`:
     - Change the way in which seeding is handled: The mechanism introduced in v1.1.0 
       was completely revised:
-        - The `train_seed` and `test_seed` attributes were removed from `SamplingConfig`.
+        - The `training_seed` and `test_seed` attributes were removed from `SamplingConfig`.
           Instead, the seeds are derived from the seed defined in `ExperimentConfig`.
         - Seed attributes of `EnvFactory` classes were removed. 
           Instead, seeds are passed to methods of `EnvFactory`.
@@ -555,7 +555,7 @@ A detailed list of changes can be found below.
       #1194 #1195
     - `env`:
         - `EnvFactoryRegistered`: parameter `seed` has been replaced by the pair
-          of parameters `train_seed` and `test_seed`
+          of parameters `training_seed` and `test_seed`
           Persisted instances will continue to work correctly.
           Subclasses such as `AtariEnvFactory` are also affected requires
           explicit train and test seeds. #1074

diff --git a/README.md b/README.md
@@ -235,53 +235,53 @@ almost exclusively concerned with configuration that controls what to do
 ```python
 from tianshou.highlevel.config import OffPolicyTrainingConfig
 from tianshou.highlevel.env import (
-  EnvFactoryRegistered,
-  VectorEnvType,
+    EnvFactoryRegistered,
+    VectorEnvType,
 )
 from tianshou.highlevel.experiment import DQNExperimentBuilder, ExperimentConfig
 from tianshou.highlevel.params.algorithm_params import DQNParams
 from tianshou.highlevel.trainer import (
-  EpochStopCallbackRewardThreshold,
+    EpochStopCallbackRewardThreshold,
 )
 
 experiment = (
-  DQNExperimentBuilder(
-    EnvFactoryRegistered(
-      task="CartPole-v1",
-      venv_type=VectorEnvType.DUMMY,
-      train_seed=0,
-      test_seed=10,
-    ),
-    ExperimentConfig(
-      persistence_enabled=False,
-      watch=True,
-      watch_render=1 / 35,
-      watch_num_episodes=100,
-    ),
-    OffPolicyTrainingConfig(
-      max_epochs=10,
-      epoch_num_steps=10000,
-      batch_size=64,
-      num_train_envs=10,
-      num_test_envs=100,
-      buffer_size=20000,
-      collection_step_num_env_steps=10,
-      update_step_num_gradient_steps_per_sample=1 / 10,
-    ),
-  )
-  .with_dqn_params(
-    DQNParams(
-      lr=1e-3,
-      gamma=0.9,
-      n_step_return_horizon=3,
-      target_update_freq=320,
-      eps_training=0.3,
-      eps_inference=0.0,
-    ),
-  )
-  .with_model_factory_default(hidden_sizes=(64, 64))
-  .with_epoch_stop_callback(EpochStopCallbackRewardThreshold(195))
-  .build()
+    DQNExperimentBuilder(
+        EnvFactoryRegistered(
+            task="CartPole-v1",
+            venv_type=VectorEnvType.DUMMY,
+            training_seed=0,
+            test_seed=10,
+        ),
+        ExperimentConfig(
+            persistence_enabled=False,
+            watch=True,
+            watch_render=1 / 35,
+            watch_num_episodes=100,
+        ),
+        OffPolicyTrainingConfig(
+            max_epochs=10,
+            epoch_num_steps=10000,
+            batch_size=64,
+            num_training_envs=10,
+            num_test_envs=100,
+            buffer_size=20000,
+            collection_step_num_env_steps=10,
+            update_step_num_gradient_steps_per_sample=1 / 10,
+        ),
+    )
+    .with_dqn_params(
+        DQNParams(
+            lr=1e-3,
+            gamma=0.9,
+            n_step_return_horizon=3,
+            target_update_freq=320,
+            eps_training=0.3,
+            eps_inference=0.0,
+        ),
+    )
+    .with_model_factory_default(hidden_sizes=(64, 64))
+    .with_epoch_stop_callback(EpochStopCallbackRewardThreshold(195))
+    .build()
 )
 experiment.run()
 ```
@@ -352,7 +352,7 @@ Define hyper-parameters:
 ```python
 task = 'CartPole-v1'
 lr, epoch, batch_size = 1e-3, 10, 64
-train_num, test_num = 10, 100
+num_training_envs, num_test_envs = 10, 100
 gamma, n_step, target_freq = 0.9, 3, 320
 buffer_size = 20000
 eps_train, eps_test = 0.1, 0.05
@@ -369,8 +369,8 @@ Create the environments:
 
 ```python
 # You can also try SubprocVectorEnv, which will use parallelization
-train_envs = ts.env.DummyVectorEnv([lambda: gym.make(task) for _ in range(train_num)])
-test_envs = ts.env.DummyVectorEnv([lambda: gym.make(task) for _ in range(test_num)])
+training_envs = ts.env.DummyVectorEnv([lambda: gym.make(task) for _ in range(num_training_envs)])
+test_envs = ts.env.DummyVectorEnv([lambda: gym.make(task) for _ in range(num_test_envs)])
 ```
 
 Create the network, policy, and algorithm:
@@ -408,10 +408,10 @@ algorithm = DQN(
 Set up the collectors:
 
 ```python
-train_collector = ts.data.Collector[CollectStats](
+training_collector = ts.data.Collector[CollectStats](
   algorithm,
-  train_envs,
-  ts.data.VectorReplayBuffer(buffer_size, num_train_envs),
+  training_envs,
+  ts.data.VectorReplayBuffer(buffer_size, num_training_envs),
   exploration_noise=True,
 )
 test_collector = ts.data.Collector[CollectStats](
@@ -426,7 +426,7 @@ Let's train the model using the algorithm:
 ```python
 result = algorithm.run_training(
   OffPolicyTrainerParams(
-    train_collector=train_collector,
+    training_collector=training_collector,
     test_collector=test_collector,
     max_epochs=epoch,
     epoch_num_steps=epoch_num_steps,