-
Notifications
You must be signed in to change notification settings - Fork 1.2k
Description
-
I have marked all applicable categories:
- exception-raising bug
- RL algorithm bug
- documentation request (i.e. "X is missing from the documentation.")
- new feature request
- design request (i.e. "X should be changed to Y.")
-
I have visited the source website
-
I have searched through the issue tracker for duplicates
-
I have mentioned version numbers, operating system and environment, where applicable:
import tianshou, gymnasium as gym, torch, numpy, sys print(tianshou.__version__, gym.__version__, torch.__version__, numpy.__version__, sys.version, sys.platform)
import gymnasium as gym from tianshou.data.batch import Batch import torch import numpy as np import torch.nn as nn from torch.utils.tensorboard import SummaryWriter import tianshou as ts from copy import deepcopy from tianshou.env import DummyVectorEnv from torch.optim.lr_scheduler import LambdaLR import torch.nn.functional as F import os import time import json import math from tqdm import tqdm from env import SDN_Env from network import conv_mlp_net cloud_num = 1 edge_num = 1 expn = 'exp1' config = 'multi-edge' lr, epoch, batch_size = 1e-6, 1, 1024 * 4 train_num, test_num = 64, 1024 gamma, lr_decay = 0.9, None buffer_size = 100000 eps_train, eps_test = 0.1, 0.00 step_per_epoch, episode_per_collect = 100 * train_num * 700, train_num writer = SummaryWriter('tensor-board-log/ppo') # tensorboard is also supported! logger = ts.utils.TensorboardLogger(writer) is_gpu_default = torch.cuda.is_available() # Check if GPU is available # ppo gae_lambda, max_grad_norm = 0.95, 0.5 vf_coef, ent_coef = 0.5, 0.0 rew_norm, action_scaling = False, False bound_action_method = "clip" eps_clip, value_clip = 0.2, False repeat_per_collect = 2 dual_clip, norm_adv = None, 0.0 recompute_adv = 0 INPUT_CH = 67 FEATURE_CH = 512 MLP_CH = 1024 class sdn_net(nn.Module): def __init__(self, mode='actor', is_gpu=is_gpu_default): super().__init__() self.is_gpu = is_gpu self.mode = mode if self.mode == 'actor': self.network = conv_mlp_net(conv_in=INPUT_CH, conv_ch=FEATURE_CH, mlp_in=(edge_num+cloud_num)*FEATURE_CH,\ mlp_ch=MLP_CH, out_ch=edge_num+cloud_num, block_num=3) else: self.network = conv_mlp_net(conv_in=INPUT_CH, conv_ch=FEATURE_CH, mlp_in=(edge_num+cloud_num)*FEATURE_CH,\ mlp_ch=MLP_CH, out_ch=cloud_num, block_num=3) def load_model(self, filename): map_location = lambda storage, loc: storage self.load_state_dict(torch.load(filename, map_location=map_location)) print('load model!') def save_model(self, filename): torch.save(self.state_dict(), filename) # print('save model!') def forward(self, obs, state=None, info={}): state = obs.clone().detach().requires_grad_(True).to(torch.float32) if self.is_gpu: state = state.cuda() logits = self.network(state) return Batch(logits=logits, state=state),None class Actor(nn.Module): def __init__(self, is_gpu=is_gpu_default): super().__init__() self.is_gpu = is_gpu self.net = sdn_net(mode='actor') def load_model(self, filename): map_location = lambda storage, loc: storage self.load_state_dict(torch.load(filename, map_location=map_location)) print('load model!') def save_model(self, filename): torch.save(self.state_dict(), filename) # print('save model!') def forward(self, obs, state=None, info={}): result, _ = self.net(obs) logits, state = result[0]['logits'], result[0]['state'] # Ensure logits is a PyTorch tensor logits = logits.to(torch.float32) logits = F.softmax(logits, dim=-1, dtype=torch.float32) return Batch(logits=logits, state=state),None class Critic(nn.Module): def __init__(self, is_gpu=is_gpu_default): super().__init__() self.is_gpu = is_gpu self.net = sdn_net(mode='critic') def load_model(self, filename): map_location = lambda storage, loc: storage self.load_state_dict(torch.load(filename, map_location=map_location)) print('load model!') def save_model(self, filename): torch.save(self.state_dict(), filename) # print('save model!') def forward(self, obs, state=None, info={}): result, _ = self.net(obs) logits, state = result[0]['logits'], result[0]['state'] # Ensure logits is a PyTorch tensor logits = logits.to(torch.float32) return Batch(logits=logits, state=state), None actor = Actor(is_gpu=is_gpu_default) critic = Critic(is_gpu=is_gpu_default) actor_critic = ts.utils.net.common.ActorCritic(actor, critic) optim = torch.optim.Adam(actor_critic.parameters(), lr=lr) dist = torch.distributions.Categorical action_space = gym.spaces.Discrete(edge_num+cloud_num) if lr_decay: lr_scheduler = LambdaLR( optim, lr_lambda=lambda epoch: lr_decay ** (epoch - 1) ) else: lr_scheduler = None policy = ts.policy.PPOPolicy(actor, critic, optim, dist, discount_factor=gamma, max_grad_norm=max_grad_norm, eps_clip=eps_clip, vf_coef=vf_coef, ent_coef=ent_coef, reward_normalization=rew_norm, advantage_normalization=norm_adv, recompute_advantage=recompute_adv, dual_clip=dual_clip, value_clip=value_clip, gae_lambda=gae_lambda, action_space=action_space, lr_scheduler=lr_scheduler) for i in range(101): try: os.mkdir('save/pth-e%d/' % (edge_num) + 'cloud%d/' % (cloud_num) + expn + '/w%03d' % (i)) except: pass for wi in range(100, 0 - 1, -2): if wi == 100: epoch_a = epoch * 10 else: epoch_a = epoch train_envs = DummyVectorEnv( [lambda: SDN_Env(conf_name=config, w=wi / 100.0, fc=4e9, fe=2e9, edge_num=edge_num, cloud_num=cloud_num) for _ in range(train_num)]) test_envs = DummyVectorEnv( [lambda: SDN_Env(conf_name=config, w=wi / 100.0, fc=4e9, fe=2e9, edge_num=edge_num, cloud_num=cloud_num) for _ in range(test_num)]) buffer = ts.data.VectorReplayBuffer(buffer_size, train_num) def preprocess_fn(**kwargs): obs = kwargs.get("obs", np.array([[]])) reward = kwargs.get("reward", 0) done = kwargs.get("done", {}) truncated = kwargs.get("truncated", {}) info = kwargs.get("info", {}) env_id = kwargs.get("env_id", "default_value") # Convert obs to a PyTorch tensor obs = torch.tensor(obs, dtype=torch.float32) reward = torch.tensor(reward, dtype=torch.float32) # Make sure to include 'dim' attribute in the Batch object batch = Batch( obs=obs, reward=reward, done=done, info=info, truncated=truncated, env_id=env_id, ) print(batch) # Assuming a normal environment step return batch # Initialize Collector with preprocess_fn train_collector = ts.data.Collector( policy=policy, env=train_envs, buffer=buffer, preprocess_fn=preprocess_fn, ) print(train_collector) test_collector = ts.data.Collector(policy, test_envs) train_collector.collect(n_episode=train_num) def save_best_fn(policy): pass def test_fn(epoch, env_step, cloud_num): policy.actor.save_model('save/pth-e%d/' % (edge_num) + 'cloud%d/' % (cloud_num) + expn + '/w%03d/ep%02d-actor.pth' % (wi, epoch)) policy.critic.save_model('save/pth-e%d/' % (edge_num) + 'cloud%d/' % (cloud_num) + expn + '/w%03d/ep%02d-critic.pth' % (wi, epoch)) def train_fn(epoch, env_step): pass def reward_metric(rews): return rews result = ts.trainer.onpolicy_trainer( policy=policy, train_collector=train_collector, test_collector=test_collector, max_epoch=epoch_a, step_per_epoch=step_per_epoch, repeat_per_collect=repeat_per_collect, episode_per_test=test_num, batch_size=batch_size, step_per_collect=None, episode_per_collect=episode_per_collect, train_fn=train_fn, test_fn=test_fn, save_best_fn=save_best_fn, stop_fn=None, # You may need to define your own stop function if needed save_checkpoint_fn=save_best_fn, reward_metric=reward_metric, logger=logger, )
I have ensure the logic but the result always:
Traceback (most recent call last):
File "/home/ad/mec_morl_multipolicy/train.py", line 210, in
train_collector.collect(n_episode=train_num)
File "/home/ad/.local/lib/python3.10/site-packages/tianshou/data/collector.py", line 279, in collect
result = self.policy(self.data, last_state)
File "/home/ad/.local/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1518, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/home/ad/.local/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1527, in _call_impl
return forward_call(*args, **kwargs)
File "/home/ad/.local/lib/python3.10/site-packages/tianshou/policy/modelfree/pg.py", line 124, in forward
dist = self.dist_fn(logits)
File "/home/ad/.local/lib/python3.10/site-packages/torch/distributions/categorical.py", line 57, in init
if probs.dim() < 1:
File "/home/ad/.local/lib/python3.10/site-packages/tianshou/data/batch.py", line 213, in getattr
return getattr(self.dict, key)
AttributeError: 'dict' object has no attribute 'dim'