6.word_average pytorch

本文介绍了WordAveraging模型的实现过程,包括使用Embedding层将单词转换为向量,进行平均池化得到句子向量,最后通过线性层进行分类。文中详细展示了使用PyTorch和torchtext库进行IMDB电影评论情感分析的具体步骤。
部署运行你感兴趣的模型镜像

Word Averaging模型

我们首先介绍一个简单的Word Averaging模型。这个模型非常简单,我们把每个单词都通过Embedding层投射成word embedding vector,然后把一句话中的所有word vector做个平均,就是整个句子的vector表示了。接下来把这个sentence vector传入一个Linear层,做分类即可。

我们使用avg_pool2d来做average pooling。我们的目标是把sentence length那个维度平均成1,然后保留embedding这个维度。

avg_pool2d的kernel size是 (embedded.shape[1], 1),所以句子长度的那个维度会被压扁。

#-*- coding:utf-8 -*-

import torch
import torch.nn.functional as F
import time,os,random

import torchtext
from torchtext import datasets

SEED = 1000

TEXT = torchtext.data.Field(tokenize='spacy') # pip install -U spacy  python -m spacy download en
LABEL= torchtext.data.LabelField(dtype=torch.float)
train_data, test_data = datasets.IMDB.splits(TEXT,LABEL)
train_data, valid_data = train_data.split(random_state = random.seed(SEED))

print(f'Number of training examples: {len(train_data)}')
print(f'Number of validation examples: {len(valid_data)}')
print(f'Number of testing examples: {len(test_data)}')
# print(vars(train_data.examples[0]))

TEXT.build_vocab(train_data, max_size=25000, vectors='glove.6B.100d', unk_init=torch.Tensor.normal_)
LABEL.build_vocab(train_data)
# print(TEXT.vocab.freqs.most_common(20))
# print(TEXT.vocab.itos[:10])
# print(LABEL.vocab.stoi)

USE_CUDA = torch.cuda.is_available()
device = torch.device( 'cuda' if USE_CUDA  else 'cpu')

torch.manual_seed(SEED)
if USE_CUDA:
    torch.cuda.manual_seed_all(SEED)
torch.backends.cudnn.deterministic = True

BATCH_SIZE = 64
DATA_PATH  = r'./data/demo14'
SAVE_MODEL = DATA_PATH + os.sep + 'loss_model.pth'

train_iterator, valid_iterator, test_iterator = torchtext.data.BucketIterator.splits(dataset=(train_data, valid_data,test_data),
                                                                                     batch_size=BATCH_SIZE,
                                                                                     device=device)

class WordAVGModel(torch.nn.Module):
    def __init__(self, vocab_size,embedding_size,output_size,pad_idx):
        super(WordAVGModel,self).__init__()
        self.embed = torch.nn.Embedding(vocab_size, embedding_size, padding_idx=pad_idx)
        self.linear = torch.nn.Linear(embedding_size, output_size)

    def forward(self,text):
        embedded = self.embed(text) # seg_len, batch_size, embedding_size
        embedded = embedded.permute(1,0,2) # 重新排序 比view好用多了  batch_size ,seg_len, embedding_size
        pooled   = F.avg_pool2d(embedded, kernel_size=(embedded.shape[1], 1)).squeeze() # batch_size ,1, embedding_size --》 squeeze()  [batch_size, embedding_size]
        #  kernel_size=(embedded.shape[1], 1)  窗口的大小.
        # seueeze() 表示把1维的都去除,squeeze(x,1) 去除指定维度, 只能去除1维的
        return  self.linear(pooled)

def count_parameters(model):
    return sum( p.numel() for p in model.parameters() if p.requires_grad) # numel 统计

VOCAB_SIZE = len(TEXT.vocab)
EMBEDDING_SIZE = 100
OUTPUT_SIZE = 1
PAD_IDX = TEXT.vocab.stoi(TEXT.pad_token)
UNK_IDX = TEXT.vocab.stoi(TEXT.unk_token)
learning_rate = 0.002

# glove 初始化模型
model = WordAVGModel(VOCAB_SIZE,EMBEDDING_SIZE,OUTPUT_SIZE,PAD_IDX)
print(count_parameters(model))

# 把模型初始化 glove的形状
pretained_embedding = TEXT.vocab.vectors
model.embed.weight.data.copy_(pretained_embedding)

model.embed.weight.data[PAD_IDX] = torch.zeros(EMBEDDING_SIZE)
model.embed.weight.data[UNK_IDX] = torch.zeros(EMBEDDING_SIZE)


optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
criterion = torch.nn.BCEWithLogitsLoss() # 不许要 sigmoid   而分类的 crossentryloss
if USE_CUDA:
    model = model.to(device)
    criterion = criterion.to(device)

def binary_accuarcy(preds, y):
    rouneded_preds = torch.round(torch.sigmoid(preds))
    correct = (rouneded_preds == y).float() # ture.float() false.float()
    acc = correct.sum()/len(correct)
    return acc

def train(model, iterator, optimizer, criterion):
    epoch_loss = 0
    epoch_acc  = 0
    model.train()

    for batch in iterator:
        predicitions = model(batch.text).squezze(1) #
        loss = criterion(predicitions, batch.label)
        acc  = binary_accuarcy(predicitions, batch.label)
        #SGD
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        epoch_loss += loss.item()
        epoch_acc += acc.item()

    return epoch_loss/len(iterator), epoch_acc/len(iterator)

def evaluate(model, iterator, criterion):
    epoch_loss = 0
    epoch_acc  = 0
    model.eval()

    with torch.no_grad():
        for batch in iterator:
            predictions = model(batch.text).squeeze(1)
            loss = criterion(predictions, batch.label)
            acc  = binary_accuarcy(predictions, batch.label)

            # no need SGD
            epoch_loss += loss.item()
            epoch_acc += acc.item()

    model.train()
    return epoch_loss / len(iterator), epoch_acc / len(iterator)

def epoch_time(start_time, end_time):
    elapsed_time = end_time - start_time
    elapsed_mins = int(elapsed_time/60)
    elapsed_secs = int(elapsed_time - (elapsed_mins * 60))
    return  elapsed_mins, elapsed_secs

N_EPOCHS = 10
best_valid_loss = float('inf')
for epoch in range(N_EPOCHS):
    start_time = time.time()

    train_loss, train_acc = train(model, train_iterator, optimizer, criterion)
    valid_loss, valid_acc = evaluate(model, valid_iterator, criterion)

    end_time = time.time()

    epoch_mins, epoch_secs = epoch_time(start_time, end_time)

    if valid_loss < best_valid_loss:
        best_valid_loss = valid_loss
        torch.save(model.state_dict(), SAVE_MODEL)

    print(f'Epoch: {epoch + 1:02} | Epoch Time: {epoch_mins}m {epoch_secs}s')
    print(f'\tTrain Loss: {train_loss:.3f} | Train Acc: {train_acc * 100:.2f}%')
    print(f'\t Val. Loss: {valid_loss:.3f} |  Val. Acc: {valid_acc * 100:.2f}%')


import spacy
nlp = spacy.load('en')

def predict_sentiment(sentence):
    tokenized = [ tok.text for tok in nlp.tokenizer(sentence)]
    indexed = [ TEXT.vocab.stoi[t] for t in tokenized]
    tensor  = torch.LongTensor(indexed).to(device)
    tensor  = tensor.unsqueeze(1)
    prediction = torch.sigmoid(model(tensor))
    return prediction.item()

predict_sentiment('This film is terrible')
predict_sentiment('This film is great')

https://github.com/zmjames2000/NLP_basis

您可能感兴趣的与本文相关的镜像

PyTorch 2.5

PyTorch 2.5

PyTorch
Cuda

PyTorch 是一个开源的 Python 机器学习库,基于 Torch 库,底层由 C++ 实现,应用于人工智能领域,如计算机视觉和自然语言处理

--------------------------------------------------------------------------- IndexError Traceback (most recent call last) Cell In[9], line 8 5 for step in range(500): 6 input_ids, labels, value_mask = generate_arithmetic_batch_v2(batch_size=16) ----> 8 outputs = model(input_ids=input_ids, value_mask=value_mask, labels=labels) 9 loss = outputs["loss"] 11 loss.backward() File f:\Programmer\python\MyAI\.venv\Lib\site-packages\torch\nn\modules\module.py:1775, in Module._wrapped_call_impl(self, *args, **kwargs) 1773 return self._compiled_call_impl(*args, **kwargs) # type: ignore[misc] 1774 else: -> 1775 return self._call_impl(*args, **kwargs) File f:\Programmer\python\MyAI\.venv\Lib\site-packages\torch\nn\modules\module.py:1786, in Module._call_impl(self, *args, **kwargs) 1781 # If we don't have any hooks, we want to skip the rest of the logic in 1782 # this function, and just call forward. 1783 if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks 1784 or _global_backward_pre_hooks or _global_backward_hooks 1785 or _global_forward_hooks or _global_forward_pre_hooks): -> 1786 return forward_call(*args, **kwargs) 1788 result = None 1789 called_always_called_hooks = set() Cell In[6], line 98, in YiziLM.forward(self, input_ids, value_mask, labels) 96 output = {"logits": logits} 97 if labels is not None: ---> 98 loss = nn.CrossEntropyLoss(ignore_index=-100)( 99 logits.view(-1, logits.size(-1)), 100 labels.view(-1)) 101 output["loss"] = loss 103 return output File f:\Programmer\python\MyAI\.venv\Lib\site-packages\torch\nn\modules\module.py:1775, in Module._wrapped_call_impl(self, *args, **kwargs) 1773 return self._compiled_call_impl(*args, **kwargs) # type: ignore[misc] 1774 else: -> 1775 return self._call_impl(*args, **kwargs) File f:\Programmer\python\MyAI\.venv\Lib\site-packages\torch\nn\modules\module.py:1786, in Module._call_impl(self, *args, **kwargs) 1781 # If we don't have any hooks, we want to skip the rest of the logic in 1782 # this function, and just call forward. 1783 if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks 1784 or _global_backward_pre_hooks or _global_backward_hooks 1785 or _global_forward_hooks or _global_forward_pre_hooks): -> 1786 return forward_call(*args, **kwargs) 1788 result = None 1789 called_always_called_hooks = set() File f:\Programmer\python\MyAI\.venv\Lib\site-packages\torch\nn\modules\loss.py:1385, in CrossEntropyLoss.forward(self, input, target) 1383 def forward(self, input: Tensor, target: Tensor) -> Tensor: 1384 """Runs the forward pass.""" -> 1385 return F.cross_entropy( 1386 input, 1387 target, 1388 weight=self.weight, 1389 ignore_index=self.ignore_index, 1390 reduction=self.reduction, 1391 label_smoothing=self.label_smoothing, 1392 ) File f:\Programmer\python\MyAI\.venv\Lib\site-packages\torch\nn\functional.py:3458, in cross_entropy(input, target, weight, size_average, ignore_index, reduce, reduction, label_smoothing) 3456 if size_average is not None or reduce is not None: 3457 reduction = _Reduction.legacy_get_string(size_average, reduce) -> 3458 return torch._C._nn.cross_entropy_loss( 3459 input, 3460 target, 3461 weight, 3462 _Reduction.get_enum(reduction), 3463 ignore_index, 3464 label_smoothing, 3465 ) IndexError: Target 30754 is out of bounds.
最新发布
12-03
import numpy as np import torch import torch.nn as nn import torch.optim as optim import matplotlib.pyplot as plt import matplotlib.colors as mcolors from collections import deque import random import os EPISODES = 5 # 训练轮数 # 修改文件扩展名为TXT DATA_PATH = "/tmp/pycharm_project_79/D3QN-main/mk/Mk01.txt" # ================= 增强版数据集解析 ================= def parse_mk01(file_path): """动态解析MK01文件,包含多层数据校验""" jobs = [] machine_ids = set() try: with open(file_path, 'r') as f: lines = [line.strip() for line in f if line.strip()] # 动态解析机器数 machine_count = 10 # 默认值 if len(lines) > 0: header = list(map(int, lines[0].split()[:2])) # 只取前两个有效数字 job_count = max(header[0], 0) if len(header) > 0 else 0 if len(header) > 1: machine_count = max(machine_count, header[1]) lines = lines[1:] current_idx = 0 for job_id in range(job_count): if current_idx >= len(lines): break # 解析工序数 op_info = lines[current_idx].split() op_count = int(op_info[0]) if op_info else 0 current_idx += 1 operations = [] for _ in range(op_count): if current_idx >= len(lines): break parts = list(map(int, lines[current_idx].split())) current_idx += 1 machines = [] # 动态解析机器-时间对 for i in range(0, len(parts), 2): if i + 1 >= len(parts): break raw_machine = parts[i] duration = max(parts[i + 1], 1) # 确保持续时间有效 # 计算有效机器ID machine = (abs(raw_machine) - 1) % machine_count machine_ids.add(machine) machines.append((machine, duration)) # 确保至少有一个有效机器 if not machines: machine = 0 machines.append((machine, 10)) machine_ids.add(machine) operations.append({"machines": machines}) # 确保至少有一个有效工序 if operations: jobs.append({ "operations": operations, "due_date": None }) # 生成有效机器列表 valid_machines = list(machine_ids) if machine_ids else [0] return { "jobs": jobs if jobs else [{ "operations": [{"machines": [(0, 10)]}], "due_date": 100 }], "machines": valid_machines } except Exception as e: print(f"解析警告:{str(e)},已启用备用数据") return { "jobs": [{ "operations": [{"machines": [(0, 10)]}], "due_date": 100 }], "machines": [0] } # ================= 神经网络架构 ================= class DuelingDQN(nn.Module): def __init__(self, input_dim, output_dim): super().__init__() self.feature = nn.Sequential( nn.Linear(input_dim, 256), nn.ReLU(), nn.Linear(256, 128), nn.ReLU() ) self.value_stream = nn.Sequential( nn.Linear(128, 64), nn.ReLU(), nn.Linear(64, 1) ) self.advantage_stream = nn.Sequential( nn.Linear(128, 64), nn.ReLU(), nn.Linear(64, output_dim) ) def forward(self, x): x = self.feature(x) value = self.value_stream(x) advantage = self.advantage_stream(x) return value + (advantage - advantage.mean(dim=-1, keepdim=True)) # ================= 增强版智能体类定义 ================= class OperationAgent: """工序智能体:带安全保护的机器选择""" def __init__(self, state_dim, action_dim): self.policy_net = DuelingDQN(state_dim, action_dim) self.target_net = DuelingDQN(state_dim, action_dim) self.target_net.load_state_dict(self.policy_net.state_dict()) self.optimizer = optim.Adam(self.policy_net.parameters(), lr=1e-4) self.memory = deque(maxlen=100000) self.batch_size = 128 self.gamma = 0.99 self.epsilon = 1.0 self.epsilon_min = 0.01 self.epsilon_decay = 0.995 def select_action(self, state, valid_actions): try: if not valid_actions: return 0 # 返回默认机器0(需确保其存在) # 添加有效性检查 valid_actions = list(set(valid_actions)) # 去重 if not valid_actions: return 0 if np.random.random() < self.epsilon: return np.random.choice(valid_actions) else: with torch.no_grad(): state_tensor = torch.FloatTensor(state).unsqueeze(0) q_values = self.policy_net(state_tensor) return valid_actions[torch.argmax(q_values[0, valid_actions]).item()] except Exception as e: print(f"动作选择异常: {str(e)}, 使用默认动作") return valid_actions[0] if valid_actions else 0 def update_epsilon(self): self.epsilon = max(self.epsilon_min, self.epsilon * self.epsilon_decay) def store_experience(self, state, action, reward, next_state, done): self.memory.append(( torch.FloatTensor(state), torch.LongTensor([action]), torch.FloatTensor([reward]), torch.FloatTensor(next_state), torch.BoolTensor([done]) )) def optimize(self): if len(self.memory) < self.batch_size: return batch = random.sample(self.memory, self.batch_size) states, actions, rewards, next_states, dones = zip(*batch) states = torch.stack(states) actions = torch.stack(actions) rewards = torch.stack(rewards) next_states = torch.stack(next_states) dones = torch.stack(dones) current_q = self.policy_net(states).gather(1, actions) next_q = self.target_net(next_states).max(1)[0].detach() target_q = rewards + (1 - dones.float()) * self.gamma * next_q.unsqueeze(1) loss = nn.MSELoss()(current_q, target_q) self.optimizer.zero_grad() loss.backward() nn.utils.clip_grad_norm_(self.policy_net.parameters(), 1.0) self.optimizer.step() def update_target_net(self): self.target_net.load_state_dict(self.policy_net.state_dict()) class QueueAgent: """队列智能体:管理工序优先级排序""" def __init__(self, state_dim): self.policy_net = DuelingDQN(state_dim, 2) # 接受/拒绝 self.optimizer = optim.Adam(self.policy_net.parameters(), lr=1e-4) self.memory = deque(maxlen=50000) self.batch_size = 64 def prioritize_operations(self, state, operations): with torch.no_grad(): state_tensor = torch.FloatTensor(state).unsqueeze(0) priority_scores = self.policy_net(state_tensor).squeeze().numpy() sorted_indices = np.argsort(-priority_scores) return [operations[i] for i in sorted_indices] class MachineAgent: """机器智能体:执行加工决策""" def __init__(self, state_dim): self.policy_net = DuelingDQN(state_dim, 3) # 加工/维护/空闲 self.optimizer = optim.Adam(self.policy_net.parameters(), lr=1e-4) self.memory = deque(maxlen=50000) self.batch_size = 64 def select_action(self, state): with torch.no_grad(): state_tensor = torch.FloatTensor(state).unsqueeze(0) q_values = self.policy_net(state_tensor) return torch.argmax(q_values).item() # ================= 调度环境 ================= class FlexibleJobShopEnv: def __init__(self, config): # 数据校验与修复 self.jobs = [] for job in config.get("jobs", []): valid_ops = [] for op in job.get("operations", []): if len(op.get("machines", [])) > 0: valid_ops.append(op) if valid_ops: self.jobs.append({ "operations": valid_ops, "due_date": job.get("due_date", 100) }) # 确保至少有一个有效作业 if not self.jobs: self.jobs = [{ "operations": [{"machines": [(0, 10)]}], "due_date": 100 }] # 动态生成机器列表 self.machines = list(set( m for job in self.jobs for op in job["operations"] for m, _ in op["machines"] )) or [0] # 其他初始化参数 self.max_steps = 1000 self.job_arrival_rate = 0.1 self.machine_break_prob = 0.02 self.reset() def reset(self): self.current_step = 0 self.schedule = {m: [] for m in self.machines} self.active_jobs = [] self.completed_jobs = [] self.machine_states = { m: { "status": "idle", "current_job": None, "remaining_time": 0 } for m in self.machines } self.event_queue = deque() # 安全初始化作业 try: for _ in range(min(3, len(self.jobs))): self._add_job(random.choice(range(len(self.jobs)))) except: self._add_job(0) def _add_job(self, job_idx): try: job_data = self.jobs[job_idx] job = { "id": len(self.active_jobs), "operations": [op.copy() for op in job_data["operations"]], "current_op": 0, "arrival_time": self.current_step, "due_date": self.current_step + np.random.randint(50, 100) } self.active_jobs.append(job) except: self.active_jobs.append({ "id": len(self.active_jobs), "operations": [{"machines": [(0, 10)]}], "current_op": 0, "arrival_time": self.current_step, "due_date": self.current_step + 100 }) def _get_state(self): """构建全局状态向量""" state = [] # 机器状态特征 for m in self.machines: state += [ 1 if self.machine_states[m]["status"] == "busy" else 0, self.machine_states[m]["remaining_time"] / 100 ] # 作业状态特征 job_features = [] for job in self.active_jobs: job_features += [ (job["due_date"] - self.current_step) / 100, job["current_op"] / len(job["operations"]) ] state += job_features[:10] # 取前5个作业的特征 # 队列状态 state.append(len(self.event_queue) / 20) return np.array(state, dtype=np.float32) def step(self, machine_actions): """执行一个时间步""" reward = 0 done = False # 处理机器动作 for machine, action in machine_actions.items(): state = self.machine_states[machine] if action == 0 and state["status"] == "idle": if self.event_queue: selected_op = self.event_queue.popleft() duration = selected_op["duration"] self.schedule[machine].append({ "job_id": selected_op["job_id"], "op_idx": selected_op["op_idx"], "start": self.current_step, "end": self.current_step + duration }) state.update({ "status": "busy", "current_job": selected_op["job_id"], "remaining_time": duration }) reward += 2.0 # 成功处理奖励 # 更新机器状态 for machine in self.machines: state = self.machine_states[machine] if state["status"] == "busy": state["remaining_time"] -= 1 if state["remaining_time"] <= 0: job_id = state["current_job"] job = next(j for j in self.active_jobs if j["id"] == job_id) job["current_op"] += 1 if job["current_op"] >= len(job["operations"]): self.completed_jobs.append(job) self.active_jobs.remove(job) reward += 10.0 # 完成作业奖励 state.update({"status": "idle", "current_job": None}) # 处理动态事件 self._handle_events() # 计算奖励 reward += self._calculate_utilization_reward() reward -= self._calculate_tardiness_penalty() reward -= len(self.event_queue) * 0.1 # 队列长度惩罚 # 检查终止条件 self.current_step += 1 if self.current_step >= self.max_steps: done = True if len(self.completed_jobs) >= len(self.jobs): done = True reward += 100.0 # 提前完成所有作业奖励 return self._get_state(), reward, done, {} def _calculate_utilization_reward(self): busy_machines = sum(1 for m in self.machines if self.machine_states[m]["status"] == "busy") return (busy_machines / len(self.machines)) * 2.5 def _calculate_tardiness_penalty(self): penalty = 0 for job in self.active_jobs: if self.current_step > job["due_date"]: penalty += (self.current_step - job["due_date"]) * 0.2 return min(penalty, 15.0) # 限制最大惩罚 def _handle_events(self): # 新作业到达 if np.random.poisson(self.job_arrival_rate): self._add_job(random.choice(range(len(self.jobs)))) # 机器故障处理 for machine in self.machines: if self.machine_states[machine]["status"] == "busy": if np.random.rand() < self.machine_break_prob: self.machine_states[machine]["status"] = "break" self.machine_states[machine]["remaining_time"] = np.random.randint(5, 15) def render(self): """可视化当前状态""" plt.figure(figsize=(12, 6)) colors = list(mcolors.TABLEAU_COLORS.values()) # 机器利用率 plt.subplot(1, 2, 1) util = [len(ops) for ops in self.schedule.values()] plt.bar(range(len(self.machines)), util, color=colors[0]) plt.title("Machine Utilization") plt.xlabel("Machine ID") plt.ylabel("Completed Operations") # 作业进度 plt.subplot(1, 2, 2) if self.active_jobs: progress = [ (job["current_op"] / max(len(job["operations"]), 1)) * 100 # 防止除零 for job in self.active_jobs ] plt.bar(range(len(progress)), progress, color=colors[1]) plt.title("Job Progress") plt.xlabel("Job ID") plt.ylabel("Completion (%)") plt.tight_layout() plt.pause(0.01) plt.close() # ================= 训练框架 ================= class Trainer: def __init__(self, env_config): self.env = FlexibleJobShopEnv(env_config) # 动态获取状态维度 self.env.reset() sample_state = self.env._get_state() state_dim = len(sample_state) self.op_agents = [ OperationAgent(state_dim=state_dim, action_dim=10) for _ in range(len(env_config["jobs"])) ] self.queue_agent = QueueAgent(state_dim=state_dim) self.machine_agents = { m: MachineAgent(state_dim=state_dim) for m in env_config["machines"] } def train(self, episodes=5): rewards_history = [] moving_avg = [] for ep in range(episodes): state = self.env.reset() total_reward = 0 done = False while not done: # 工序智能体决策 # 在Trainer的train方法中修改: op_actions = [] for job in self.env.active_jobs: if job["current_op"] < len(job["operations"]): # 确保存在可选机器 machines = job["operations"][job["current_op"]]["machines"] if not machines: machines = [(0, 10)] # 默认机器 valid_machines = [m[0] for m in machines] agent = self.op_agents[job["id"]] action = agent.select_action(state, valid_machines) # 查找对应duration try: duration = next(d for m, d in machines if m == action) except StopIteration: duration = 10 # 默认值 op_actions.append({ "job_id": job["id"], "op_idx": job["current_op"], "machine": action, "duration": duration }) # 队列智能体排序 sorted_ops = self.queue_agent.prioritize_operations(state, op_actions) self.env.event_queue = deque(sorted_ops) # 机器智能体执行 machine_actions = {} for machine in self.env.machines: if self.env.machine_states[machine]["status"] == "idle": action = self.machine_agents[machine].select_action(state) machine_actions[machine] = action # 环境步进 next_state, reward, done, _ = self.env.step(machine_actions) total_reward += reward # 存储经验 for agent in self.op_agents: agent.store_experience(state, action, reward, next_state, done) state = next_state # 优化模型 for agent in self.op_agents: agent.optimize() agent.update_epsilon() if ep % 10 == 0: agent.update_target_net() rewards_history.append(total_reward) moving_avg.append(np.mean(rewards_history[-10:])) print(f"Episode {ep + 1}/{episodes}, Reward: {total_reward:.2f}, Epsilon: {self.op_agents[0].epsilon:.3f}") # 定期保存模型 if ep % 50 == 0: self.save_models(f"checkpoint_ep{ep}") # 训练后可视化 self.plot_training(rewards_history, moving_avg) self.env.render() def save_models(self, path): os.makedirs(path, exist_ok=True) for idx, agent in enumerate(self.op_agents): torch.save(agent.policy_net.state_dict(), f"{path}/op_agent_{idx}.pth") torch.save(self.queue_agent.policy_net.state_dict(), f"{path}/queue_agent.pth") for m in self.env.machines: torch.save(self.machine_agents[m].policy_net.state_dict(), f"{path}/machine_{m}.pth") def plot_training(self, rewards, moving_avg): plt.figure(figsize=(12, 6)) plt.plot(rewards, alpha=0.6, label='Episode Reward') plt.plot(moving_avg, linewidth=2, label='Moving Average (10)') plt.xlabel("Episode") plt.ylabel("Reward") plt.title("Training Progress") plt.legend() plt.savefig("training_progress.png") plt.close() # ================= 主程序 ================= if __name__ == "__main__": # 加载配置 mk_data = parse_mk01(DATA_PATH) # 使用配置好的路径 # 初始化训练器 trainer = Trainer(mk_data) # 开始训练 try: trainer.train(episodes=EPISODES) except KeyboardInterrupt: print("\n训练被用户中断,正在保存当前模型...") trainer.save_models("interrupted_training") except Exception as e: print(f"训练过程中出现异常:{str(e)}") exit(1) print("训练完成!结果已保存至当前目录") 这个代码报错,如何解决?new(): data must be a sequence (got NoneType)
05-12
评论
成就一亿技术人!
拼手气红包6.0元
还能输入1000个字符
 
红包 添加红包
表情包 插入表情
 条评论被折叠 查看
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值