Wrong output of forward for custom policy

- [ ] I have marked all applicable categories:
    + [x] exception-raising bug
    + [x] RL algorithm bug
    + [ ] documentation request (i.e. "X is missing from the documentation.")
    + [ ] new feature request
    + [ ] design request (i.e. "X should be changed to Y.")
- [ ] I have visited the [source website](https://github.com/thu-ml/tianshou/)
- [ ] I have searched through the [issue tracker](https://github.com/thu-ml/tianshou/issues) for duplicates
- [ ] I have mentioned version numbers, operating system and environment, where applicable:
  ```python
  import tianshou, gymnasium as gym, torch, numpy, sys
  print(tianshou.__version__, gym.__version__, torch.__version__, numpy.__version__, sys.version, sys.platform)
  ```

  ```
  import gymnasium as gym
  from tianshou.data.batch import Batch
  import torch
  import numpy as np
  import torch.nn as nn
  from torch.utils.tensorboard import SummaryWriter
  import tianshou as ts
  from copy import deepcopy
  from tianshou.env import DummyVectorEnv
  from torch.optim.lr_scheduler import LambdaLR
  import torch.nn.functional as F
  import os
  import time
  import json
  import math
  from tqdm import tqdm
  from env import SDN_Env
  from network import conv_mlp_net
  
  cloud_num = 1
  edge_num = 1
  expn = 'exp1'
  config = 'multi-edge'
  lr, epoch, batch_size = 1e-6, 1, 1024 * 4
  train_num, test_num = 64, 1024
  gamma, lr_decay = 0.9, None
  buffer_size = 100000
  eps_train, eps_test = 0.1, 0.00
  step_per_epoch, episode_per_collect = 100 * train_num * 700, train_num
  writer = SummaryWriter('tensor-board-log/ppo')  # tensorboard is also supported!
  logger = ts.utils.TensorboardLogger(writer)
  is_gpu_default = torch.cuda.is_available()  # Check if GPU is available
  # ppo
  gae_lambda, max_grad_norm = 0.95, 0.5
  vf_coef, ent_coef = 0.5, 0.0
  rew_norm, action_scaling = False, False
  bound_action_method = "clip"
  eps_clip, value_clip = 0.2, False
  repeat_per_collect = 2
  dual_clip, norm_adv = None, 0.0
  recompute_adv = 0
  
  INPUT_CH = 67
  FEATURE_CH = 512
  MLP_CH = 1024
  
  
  
  
  
  class sdn_net(nn.Module):
      def __init__(self, mode='actor', is_gpu=is_gpu_default):
          super().__init__()
          self.is_gpu = is_gpu
          self.mode = mode
  
          if self.mode == 'actor':
              self.network = conv_mlp_net(conv_in=INPUT_CH, conv_ch=FEATURE_CH, mlp_in=(edge_num+cloud_num)*FEATURE_CH,\
                                      mlp_ch=MLP_CH, out_ch=edge_num+cloud_num, block_num=3)
          else:
              self.network = conv_mlp_net(conv_in=INPUT_CH, conv_ch=FEATURE_CH, mlp_in=(edge_num+cloud_num)*FEATURE_CH,\
                                      mlp_ch=MLP_CH, out_ch=cloud_num, block_num=3)
  
      def load_model(self, filename):
          map_location = lambda storage, loc: storage
          self.load_state_dict(torch.load(filename, map_location=map_location))
          print('load model!')
  
      def save_model(self, filename):
          torch.save(self.state_dict(), filename)
          # print('save model!')
  
  
      def forward(self, obs, state=None, info={}):
          state = obs.clone().detach().requires_grad_(True).to(torch.float32)
          if self.is_gpu:
              state = state.cuda()
  
          logits = self.network(state)
          return Batch(logits=logits, state=state),None
  
  class Actor(nn.Module):
      def __init__(self, is_gpu=is_gpu_default):
          super().__init__()
          self.is_gpu = is_gpu
          self.net = sdn_net(mode='actor')
  
      def load_model(self, filename):
          map_location = lambda storage, loc: storage
          self.load_state_dict(torch.load(filename, map_location=map_location))
          print('load model!')
  
      def save_model(self, filename):
          torch.save(self.state_dict(), filename)
          # print('save model!')
  
      def forward(self, obs, state=None, info={}):
          result, _ = self.net(obs)
          logits, state = result[0]['logits'], result[0]['state']
          # Ensure logits is a PyTorch tensor
          logits = logits.to(torch.float32)
          logits = F.softmax(logits, dim=-1, dtype=torch.float32)
  
          return Batch(logits=logits, state=state),None
  class Critic(nn.Module):
      def __init__(self, is_gpu=is_gpu_default):
          super().__init__()
  
          self.is_gpu = is_gpu
  
          self.net = sdn_net(mode='critic')
  
      def load_model(self, filename):
          map_location = lambda storage, loc: storage
          self.load_state_dict(torch.load(filename, map_location=map_location))
          print('load model!')
  
      def save_model(self, filename):
          torch.save(self.state_dict(), filename)
          # print('save model!')
  
      def forward(self, obs, state=None, info={}):
          result, _ = self.net(obs)
          logits, state = result[0]['logits'], result[0]['state']
          # Ensure logits is a PyTorch tensor
          logits = logits.to(torch.float32)
          return Batch(logits=logits, state=state), None
  
  actor = Actor(is_gpu=is_gpu_default)
  critic = Critic(is_gpu=is_gpu_default)
  actor_critic = ts.utils.net.common.ActorCritic(actor, critic)
  optim = torch.optim.Adam(actor_critic.parameters(), lr=lr)
  
  dist = torch.distributions.Categorical
  
  action_space = gym.spaces.Discrete(edge_num+cloud_num)
  
  if lr_decay:
      lr_scheduler = LambdaLR(
          optim, lr_lambda=lambda epoch: lr_decay ** (epoch - 1)
      )
  else:
      lr_scheduler = None
  
  policy = ts.policy.PPOPolicy(actor, critic, optim, dist,
                               discount_factor=gamma, max_grad_norm=max_grad_norm,
                               eps_clip=eps_clip, vf_coef=vf_coef,
                               ent_coef=ent_coef, reward_normalization=rew_norm,
                               advantage_normalization=norm_adv, recompute_advantage=recompute_adv,
                               dual_clip=dual_clip, value_clip=value_clip,
                               gae_lambda=gae_lambda, action_space=action_space,
                               lr_scheduler=lr_scheduler)
  
  for i in range(101):
      try:
          os.mkdir('save/pth-e%d/' % (edge_num) + 'cloud%d/' % (cloud_num) + expn + '/w%03d' % (i))
      except:
          pass
  
  
  for wi in range(100, 0 - 1, -2):
  
      if wi == 100:
          epoch_a = epoch * 10
      else:
          epoch_a = epoch
  
      train_envs = DummyVectorEnv(
          [lambda: SDN_Env(conf_name=config, w=wi / 100.0, fc=4e9, fe=2e9, edge_num=edge_num, cloud_num=cloud_num) for _ in range(train_num)])
      test_envs = DummyVectorEnv(
          [lambda: SDN_Env(conf_name=config, w=wi / 100.0, fc=4e9, fe=2e9, edge_num=edge_num, cloud_num=cloud_num) for _ in range(test_num)])
      buffer = ts.data.VectorReplayBuffer(buffer_size, train_num)
      def preprocess_fn(**kwargs):
          obs = kwargs.get("obs", np.array([[]]))
          reward = kwargs.get("reward", 0)
          done = kwargs.get("done", {})
          truncated = kwargs.get("truncated", {})
          info = kwargs.get("info", {})
          env_id = kwargs.get("env_id", "default_value")
          
          # Convert obs to a PyTorch tensor
          obs = torch.tensor(obs, dtype=torch.float32)
          reward = torch.tensor(reward, dtype=torch.float32)
          
          # Make sure to include 'dim' attribute in the Batch object
          batch = Batch(
              obs=obs,
              reward=reward,
              done=done,
              info=info,
              truncated=truncated,
              env_id=env_id,
          )
          print(batch)
          
          # Assuming a normal environment step
          return batch
  
  
      # Initialize Collector with preprocess_fn
      train_collector = ts.data.Collector(
          policy=policy,
          env=train_envs,
          buffer=buffer,
          preprocess_fn=preprocess_fn,
      )
      print(train_collector)
  
      test_collector = ts.data.Collector(policy, test_envs)
      train_collector.collect(n_episode=train_num)
  
      def save_best_fn(policy):
          pass
  
      def test_fn(epoch, env_step, cloud_num):
          policy.actor.save_model('save/pth-e%d/' % (edge_num) + 'cloud%d/' % (cloud_num) + expn + '/w%03d/ep%02d-actor.pth' % (wi, epoch))
          policy.critic.save_model('save/pth-e%d/' % (edge_num) + 'cloud%d/' % (cloud_num) + expn + '/w%03d/ep%02d-critic.pth' % (wi, epoch))
  
      def train_fn(epoch, env_step):
          pass
  
      def reward_metric(rews):
          return rews
  
      result = ts.trainer.onpolicy_trainer(
          policy=policy,
          train_collector=train_collector,
          test_collector=test_collector,
          max_epoch=epoch_a,
          step_per_epoch=step_per_epoch,
          repeat_per_collect=repeat_per_collect,
          episode_per_test=test_num,
          batch_size=batch_size,
          step_per_collect=None,
          episode_per_collect=episode_per_collect,
          train_fn=train_fn,
          test_fn=test_fn,
          save_best_fn=save_best_fn,
          stop_fn=None,  # You may need to define your own stop function if needed
          save_checkpoint_fn=save_best_fn,
          reward_metric=reward_metric,
          logger=logger,
      )
  ```
I have ensure the logic but the result always:  

Traceback (most recent call last):
  File "/home/ad/mec_morl_multipolicy/train.py", line 210, in <module>
    train_collector.collect(n_episode=train_num)
  File "/home/ad/.local/lib/python3.10/site-packages/tianshou/data/collector.py", line 279, in collect
    result = self.policy(self.data, last_state)
  File "/home/ad/.local/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1518, in _wrapped_call_impl
    return self._call_impl(*args, **kwargs)
  File "/home/ad/.local/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1527, in _call_impl
    return forward_call(*args, **kwargs)
  File "/home/ad/.local/lib/python3.10/site-packages/tianshou/policy/modelfree/pg.py", line 124, in forward
    dist = self.dist_fn(logits)
  File "/home/ad/.local/lib/python3.10/site-packages/torch/distributions/categorical.py", line 57, in __init__
    if probs.dim() < 1:
  File "/home/ad/.local/lib/python3.10/site-packages/tianshou/data/batch.py", line 213, in __getattr__
    return getattr(self.__dict__, key)
AttributeError: 'dict' object has no attribute 'dim'


Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

Wrong output of forward for custom policy #1029

Metadata

Assignees

Labels

Type

Projects

Milestone

Relationships

Development

Wrong output of forward for custom policy #1029

Description

Metadata

Metadata

Assignees

Labels

Type

Projects

Milestone

Relationships

Development

Issue actions