这是indexloc提供的服务,不要输入任何密码
Skip to content

Wrong output of forward for custom policy #1029

@hazel260802

Description

@hazel260802
  • I have marked all applicable categories:

    • exception-raising bug
    • RL algorithm bug
    • documentation request (i.e. "X is missing from the documentation.")
    • new feature request
    • design request (i.e. "X should be changed to Y.")
  • I have visited the source website

  • I have searched through the issue tracker for duplicates

  • I have mentioned version numbers, operating system and environment, where applicable:

    import tianshou, gymnasium as gym, torch, numpy, sys
    print(tianshou.__version__, gym.__version__, torch.__version__, numpy.__version__, sys.version, sys.platform)
    import gymnasium as gym
    from tianshou.data.batch import Batch
    import torch
    import numpy as np
    import torch.nn as nn
    from torch.utils.tensorboard import SummaryWriter
    import tianshou as ts
    from copy import deepcopy
    from tianshou.env import DummyVectorEnv
    from torch.optim.lr_scheduler import LambdaLR
    import torch.nn.functional as F
    import os
    import time
    import json
    import math
    from tqdm import tqdm
    from env import SDN_Env
    from network import conv_mlp_net
    
    cloud_num = 1
    edge_num = 1
    expn = 'exp1'
    config = 'multi-edge'
    lr, epoch, batch_size = 1e-6, 1, 1024 * 4
    train_num, test_num = 64, 1024
    gamma, lr_decay = 0.9, None
    buffer_size = 100000
    eps_train, eps_test = 0.1, 0.00
    step_per_epoch, episode_per_collect = 100 * train_num * 700, train_num
    writer = SummaryWriter('tensor-board-log/ppo')  # tensorboard is also supported!
    logger = ts.utils.TensorboardLogger(writer)
    is_gpu_default = torch.cuda.is_available()  # Check if GPU is available
    # ppo
    gae_lambda, max_grad_norm = 0.95, 0.5
    vf_coef, ent_coef = 0.5, 0.0
    rew_norm, action_scaling = False, False
    bound_action_method = "clip"
    eps_clip, value_clip = 0.2, False
    repeat_per_collect = 2
    dual_clip, norm_adv = None, 0.0
    recompute_adv = 0
    
    INPUT_CH = 67
    FEATURE_CH = 512
    MLP_CH = 1024
    
    
    
    
    
    class sdn_net(nn.Module):
        def __init__(self, mode='actor', is_gpu=is_gpu_default):
            super().__init__()
            self.is_gpu = is_gpu
            self.mode = mode
    
            if self.mode == 'actor':
                self.network = conv_mlp_net(conv_in=INPUT_CH, conv_ch=FEATURE_CH, mlp_in=(edge_num+cloud_num)*FEATURE_CH,\
                                        mlp_ch=MLP_CH, out_ch=edge_num+cloud_num, block_num=3)
            else:
                self.network = conv_mlp_net(conv_in=INPUT_CH, conv_ch=FEATURE_CH, mlp_in=(edge_num+cloud_num)*FEATURE_CH,\
                                        mlp_ch=MLP_CH, out_ch=cloud_num, block_num=3)
    
        def load_model(self, filename):
            map_location = lambda storage, loc: storage
            self.load_state_dict(torch.load(filename, map_location=map_location))
            print('load model!')
    
        def save_model(self, filename):
            torch.save(self.state_dict(), filename)
            # print('save model!')
    
    
        def forward(self, obs, state=None, info={}):
            state = obs.clone().detach().requires_grad_(True).to(torch.float32)
            if self.is_gpu:
                state = state.cuda()
    
            logits = self.network(state)
            return Batch(logits=logits, state=state),None
    
    class Actor(nn.Module):
        def __init__(self, is_gpu=is_gpu_default):
            super().__init__()
            self.is_gpu = is_gpu
            self.net = sdn_net(mode='actor')
    
        def load_model(self, filename):
            map_location = lambda storage, loc: storage
            self.load_state_dict(torch.load(filename, map_location=map_location))
            print('load model!')
    
        def save_model(self, filename):
            torch.save(self.state_dict(), filename)
            # print('save model!')
    
        def forward(self, obs, state=None, info={}):
            result, _ = self.net(obs)
            logits, state = result[0]['logits'], result[0]['state']
            # Ensure logits is a PyTorch tensor
            logits = logits.to(torch.float32)
            logits = F.softmax(logits, dim=-1, dtype=torch.float32)
    
            return Batch(logits=logits, state=state),None
    class Critic(nn.Module):
        def __init__(self, is_gpu=is_gpu_default):
            super().__init__()
    
            self.is_gpu = is_gpu
    
            self.net = sdn_net(mode='critic')
    
        def load_model(self, filename):
            map_location = lambda storage, loc: storage
            self.load_state_dict(torch.load(filename, map_location=map_location))
            print('load model!')
    
        def save_model(self, filename):
            torch.save(self.state_dict(), filename)
            # print('save model!')
    
        def forward(self, obs, state=None, info={}):
            result, _ = self.net(obs)
            logits, state = result[0]['logits'], result[0]['state']
            # Ensure logits is a PyTorch tensor
            logits = logits.to(torch.float32)
            return Batch(logits=logits, state=state), None
    
    actor = Actor(is_gpu=is_gpu_default)
    critic = Critic(is_gpu=is_gpu_default)
    actor_critic = ts.utils.net.common.ActorCritic(actor, critic)
    optim = torch.optim.Adam(actor_critic.parameters(), lr=lr)
    
    dist = torch.distributions.Categorical
    
    action_space = gym.spaces.Discrete(edge_num+cloud_num)
    
    if lr_decay:
        lr_scheduler = LambdaLR(
            optim, lr_lambda=lambda epoch: lr_decay ** (epoch - 1)
        )
    else:
        lr_scheduler = None
    
    policy = ts.policy.PPOPolicy(actor, critic, optim, dist,
                                 discount_factor=gamma, max_grad_norm=max_grad_norm,
                                 eps_clip=eps_clip, vf_coef=vf_coef,
                                 ent_coef=ent_coef, reward_normalization=rew_norm,
                                 advantage_normalization=norm_adv, recompute_advantage=recompute_adv,
                                 dual_clip=dual_clip, value_clip=value_clip,
                                 gae_lambda=gae_lambda, action_space=action_space,
                                 lr_scheduler=lr_scheduler)
    
    for i in range(101):
        try:
            os.mkdir('save/pth-e%d/' % (edge_num) + 'cloud%d/' % (cloud_num) + expn + '/w%03d' % (i))
        except:
            pass
    
    
    for wi in range(100, 0 - 1, -2):
    
        if wi == 100:
            epoch_a = epoch * 10
        else:
            epoch_a = epoch
    
        train_envs = DummyVectorEnv(
            [lambda: SDN_Env(conf_name=config, w=wi / 100.0, fc=4e9, fe=2e9, edge_num=edge_num, cloud_num=cloud_num) for _ in range(train_num)])
        test_envs = DummyVectorEnv(
            [lambda: SDN_Env(conf_name=config, w=wi / 100.0, fc=4e9, fe=2e9, edge_num=edge_num, cloud_num=cloud_num) for _ in range(test_num)])
        buffer = ts.data.VectorReplayBuffer(buffer_size, train_num)
        def preprocess_fn(**kwargs):
            obs = kwargs.get("obs", np.array([[]]))
            reward = kwargs.get("reward", 0)
            done = kwargs.get("done", {})
            truncated = kwargs.get("truncated", {})
            info = kwargs.get("info", {})
            env_id = kwargs.get("env_id", "default_value")
            
            # Convert obs to a PyTorch tensor
            obs = torch.tensor(obs, dtype=torch.float32)
            reward = torch.tensor(reward, dtype=torch.float32)
            
            # Make sure to include 'dim' attribute in the Batch object
            batch = Batch(
                obs=obs,
                reward=reward,
                done=done,
                info=info,
                truncated=truncated,
                env_id=env_id,
            )
            print(batch)
            
            # Assuming a normal environment step
            return batch
    
    
        # Initialize Collector with preprocess_fn
        train_collector = ts.data.Collector(
            policy=policy,
            env=train_envs,
            buffer=buffer,
            preprocess_fn=preprocess_fn,
        )
        print(train_collector)
    
        test_collector = ts.data.Collector(policy, test_envs)
        train_collector.collect(n_episode=train_num)
    
        def save_best_fn(policy):
            pass
    
        def test_fn(epoch, env_step, cloud_num):
            policy.actor.save_model('save/pth-e%d/' % (edge_num) + 'cloud%d/' % (cloud_num) + expn + '/w%03d/ep%02d-actor.pth' % (wi, epoch))
            policy.critic.save_model('save/pth-e%d/' % (edge_num) + 'cloud%d/' % (cloud_num) + expn + '/w%03d/ep%02d-critic.pth' % (wi, epoch))
    
        def train_fn(epoch, env_step):
            pass
    
        def reward_metric(rews):
            return rews
    
        result = ts.trainer.onpolicy_trainer(
            policy=policy,
            train_collector=train_collector,
            test_collector=test_collector,
            max_epoch=epoch_a,
            step_per_epoch=step_per_epoch,
            repeat_per_collect=repeat_per_collect,
            episode_per_test=test_num,
            batch_size=batch_size,
            step_per_collect=None,
            episode_per_collect=episode_per_collect,
            train_fn=train_fn,
            test_fn=test_fn,
            save_best_fn=save_best_fn,
            stop_fn=None,  # You may need to define your own stop function if needed
            save_checkpoint_fn=save_best_fn,
            reward_metric=reward_metric,
            logger=logger,
        )
    

I have ensure the logic but the result always:

Traceback (most recent call last):
File "/home/ad/mec_morl_multipolicy/train.py", line 210, in
train_collector.collect(n_episode=train_num)
File "/home/ad/.local/lib/python3.10/site-packages/tianshou/data/collector.py", line 279, in collect
result = self.policy(self.data, last_state)
File "/home/ad/.local/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1518, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/home/ad/.local/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1527, in _call_impl
return forward_call(*args, **kwargs)
File "/home/ad/.local/lib/python3.10/site-packages/tianshou/policy/modelfree/pg.py", line 124, in forward
dist = self.dist_fn(logits)
File "/home/ad/.local/lib/python3.10/site-packages/torch/distributions/categorical.py", line 57, in init
if probs.dim() < 1:
File "/home/ad/.local/lib/python3.10/site-packages/tianshou/data/batch.py", line 213, in getattr
return getattr(self.dict, key)
AttributeError: 'dict' object has no attribute 'dim'

Metadata

Metadata

Assignees

No one assigned

    Labels

    questionFurther information is requested

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions