关于tensor取索引置0一个奇怪的错误

当处理二维Tensor时,尝试同时设置多个二维索引为0可能会导致错误。对于少于或等于2个索引的操作,系统可以处理,但超过这个数量就会出错。这可能是由于二维结构的限制。为解决此问题,可以采用不同的编程策略,如使用循环或Numpy的布尔索引功能。

当tensor维度是2的时候,假如你想把多个二维索引同时写入令其为0,会发生错误:少于等于2个不会报错,多于两个会报错
在这里插入图片描述

逻辑上并不会有问题,可能他就是因为是二维所以你输多了他就不答应。换一种做法就可以:

在这里插入图片描述

# 填充序 > padded[..., dest_start:dest_end, ...] = sequences[..., current_pos:current_pos + seq_len, ...] E RuntimeError: The expanded size of the tensor (512) must match the existing size (132) at non-singleton dimension 1. Target sizes: [64, 512, 23]. Tensor sizes: [64, 132, 23] test_supa_qk_matmul_infer.py:108: RuntimeError FAILED test_supa_qk_matmul_infer.py::test_vllm_qmatmul[1-128-128-64] - RuntimeError: The expanded size of the tensor (128) must match the existing size (113) at non-singleton dimension 1. Target sizes: [64, 128, 113]. Tensor sizes: [64, 113, 113] FAILED test_supa_qk_matmul_infer.py::test_vllm_qmatmul[1-128-256-64] - RuntimeError: The expanded size of the tensor (128) must match the existing size (126) at non-singleton dimension 1. Target sizes: [64, 128, 126]. Tensor sizes: [64, 126, 126] FAILED test_supa_qk_matmul_infer.py::test_vllm_qmatmul[1-256-128-64] - RuntimeError: The expanded size of the tensor (256) must match the existing size (229) at non-singleton dimension 1. Target sizes: [64, 256, 128]. Tensor sizes: [64, 229, 128] FAILED test_supa_qk_matmul_infer.py::test_vllm_qmatmul[1-256-256-64] - RuntimeError: The expanded size of the tensor (256) must match the existing size (253) at non-singleton dimension 1. Target sizes: [64, 256, 128]. Tensor sizes: [64, 253, 128] FAILED test_supa_qk_matmul_infer.py::test_vllm_qmatmul[2-128-128-64] - RuntimeError: The expanded size of the tensor (256) must match the existing size (164) at non-singleton dimension 1. Target sizes: [64, 256, 111]. Tensor sizes: [64, 164, 111] FAILED test_supa_qk_matmul_infer.py::test_vllm_qmatmul[2-128-256-64] - RuntimeError: The expanded size of the tensor (256) must match the existing size (124) at non-singleton dimension 1. Target sizes: [64, 256, 94]. Tensor sizes: [64, 124, 94] FAILED test_supa_qk_matmul_infer.py::test_vllm_qmatmul[2-256-128-64] - RuntimeError: The expanded size of the tensor (512) must match the existing size (109) at non-singleton dimension 1. Target sizes: [64, 512, 59]. Tensor sizes: [64, 109, 59] FAILED test_supa_qk_matmul_infer.py::test_vllm_qmatmul[2-256-256-64] - RuntimeError: The expanded size of the tensor (512) must match the existing size (132) at non-singleton dimension 1. Target sizes: [64, 512, 23]. Tensor sizes: [64, 132, 23]执行脚本后报错,如何解决
11-21
import mindspore as ms import mindspore.nn as nn import gymnasium as gym import numpy as np import collections import matplotlib.pyplot as plt class ReplayBuffer: def __init__(self,capacity): self.buffer=collections.deque(maxlen=capacity) self.count=0 def push(self,state,action,reward,next_state,done): self.buffer.append((state,action,reward,next_state,done)) self.count+=1 def sample(self,batch_size): samples=np.random.randint(0,len(self.buffer),batch_size) states,actions,rewards,next_states,dones=zip(*[self.buffer[i] for i in samples]) transition_dict={ "states": states, "actions": actions, "rewards": rewards, "next_states": next_states, "dones": dones } return transition_dict class DQNNet(nn.Cell): def __init__(self,state_dim,hidden_dim,action_dim): super(DQNNet,self).__init__() self.fc1=nn.Dense(state_dim,hidden_dim) self.fc2=nn.Dense(hidden_dim,action_dim) self.relu=nn.ReLU() def construct(self,x): x=self.fc1(x) x=self.relu(x) x=self.fc2(x) return x class DQNAgent: def __init__(self,state_dim,hidden_dim,action_dim,learning_rate,discount_factor, buffer_capacity,epsilon_min,epsilon_decay): self.q_net=DQNNet(state_dim,hidden_dim,action_dim) self.target_net=DQNNet(state_dim,hidden_dim,action_dim) self.target_net.load_state_dict(self.q_net.parameters_dict()) self.loss_fn=nn.MSELoss() self.optimizer=nn.Adam(self.q_net.trainable_params(),learning_rate) self.grad_fn=ms.value_and_grad( self.forward_fn, None,self.optimizer.parameters ) self.action_dim=action_dim self.discount_factor=discount_factor self.epsilon_max=1-epsilon_min self.epsilon_min=epsilon_min self.epsilon=1 self.epsilon_decay=epsilon_decay self.count=0 self.update_count=0 self.buffer=ReplayBuffer(buffer_capacity) def forward_fn(self, states, actions, td_targets): # 使用 mindspore.numpy.arange 构造行索引 indices = ms.numpy.arange(actions.shape[0]) # [0, 1, ..., batch_size-1] actions_flat = actions.squeeze(-1) # [B] q_all = self.q_net(states) # [B, A] q_values = q_all[indices, actions_flat] # [B], 每个样本取一个动作的Q值 td_targets_flat = td_targets.squeeze(-1) # [B] loss = self.loss_fn(q_values, td_targets_flat) return loss def take_action(self,state): if np.random.random()<self.epsilon: action=np.random.randint(0,self.action_dim) else: state=ms.tensor(state,dtype=ms.float32) action=self.q_net(state) action=action.argmax().item() self.count+=1 self.epsilon=self.epsilon_min+self.epsilon_max*np.exp(-self.count/self.epsilon_decay) return action def predict_action(self,state): state=ms.tensor([state],dtype=ms.float32) action=self.q_net(state) action=action.argmax().item() return action def update(self,transition_dict): states=ms.tensor(transition_dict["states"],dtype=ms.float32) rewards=ms.tensor(transition_dict["rewards"],dtype=ms.float32).view(-1,1) actions=ms.tensor(transition_dict["actions"],dtype=ms.int32).view(-1,1) next_states=ms.tensor(transition_dict["next_states"],dtype=ms.float32) # noinspection SpellCheckingInspection dones=ms.tensor(transition_dict["dones"],dtype=ms.float32).view(-1,1) next_rewards=self.target_net(next_states).max(1)[0].view(-1,1) td_rewards=rewards+self.discount_factor*next_rewards*(1-dones) loss,grads=self.grad_fn(states, actions,td_rewards) self.optimizer(grads) self.update_count+=1 if self.update_count % 30 ==0: self.target_net.load_state_dict(self.q_net.parameters_dict()) def train(env,agent,batch_size,num_episodes): return_list=[] return_mean_list=[] for episode in range(num_episodes): state,_=env.reset() terminated=truncated=False episode_reward=0 while not (terminated or truncated): action=agent.take_action(state) next_state,reward,terminated,truncated,_=env.step(action) agent.buffer.push(state,action,reward,next_state,terminated or truncated) episode_reward+=reward if agent.buffer.count>batch_size and agent.buffer.count % 3 ==0: transition_dict=agent.buffer.sample(batch_size) agent.update(transition_dict) return_list.append(episode_reward) if (episode+1) % 100 ==0: return_mean_list.append(np.mean(return_list[-100:])) print(f"Episode: {episode+1}/{num_episodes}, Reward: {return_mean_list[-1]:.1f}, ") env.close() return return_mean_list def display(env_name,agent): env=gym.make(env_name,render_mode="human") state,_=env.reset() terminated=truncated=False total_reward=0 total_steps=0 try: while not (terminated or truncated): action=agent.predict_action(state) next_state,reward,terminated,truncated,_=env.step(action) state=next_state total_reward+=reward total_steps+=1 print(f"Total steps: {total_steps}, Total reward: {total_reward}") finally: env.close() env_name="CartPole-v1" env=gym.make(env_name) state_dim=env.observation_space.shape[0] action_dim=int(env.action_space.n) hidden_dim=128 learning_rate=1e-3 discount_factor=0.999 buffer_capacity=10000 epsilon_min=0.01 epsilon_decay=3000 batch_size=64 num_episodes=10000 agent=DQNAgent(state_dim,hidden_dim,action_dim,learning_rate,discount_factor, buffer_capacity,epsilon_min,epsilon_decay) return_mean_list=train(env,agent,batch_size,num_episodes) 为什么我的代码reward仍然随着训练不停下降?
最新发布
12-16
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值