import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import matplotlib.pyplot as plt
import matplotlib.colors as mcolors
from collections import deque
import random
import os
EPISODES = 5 # 训练轮数
# 修改文件扩展名为TXT
DATA_PATH = "/tmp/pycharm_project_79/D3QN-main/mk/Mk01.txt"
# ================= 增强版数据集解析 =================
def parse_mk01(file_path):
"""动态解析MK01文件,包含多层数据校验"""
jobs = []
machine_ids = set()
try:
with open(file_path, 'r') as f:
lines = [line.strip() for line in f if line.strip()]
# 动态解析机器数
machine_count = 10 # 默认值
if len(lines) > 0:
header = list(map(int, lines[0].split()[:2])) # 只取前两个有效数字
job_count = max(header[0], 0) if len(header) > 0 else 0
if len(header) > 1:
machine_count = max(machine_count, header[1])
lines = lines[1:]
current_idx = 0
for job_id in range(job_count):
if current_idx >= len(lines):
break
# 解析工序数
op_info = lines[current_idx].split()
op_count = int(op_info[0]) if op_info else 0
current_idx += 1
operations = []
for _ in range(op_count):
if current_idx >= len(lines):
break
parts = list(map(int, lines[current_idx].split()))
current_idx += 1
machines = []
# 动态解析机器-时间对
for i in range(0, len(parts), 2):
if i + 1 >= len(parts):
break
raw_machine = parts[i]
duration = max(parts[i + 1], 1) # 确保持续时间有效
# 计算有效机器ID
machine = (abs(raw_machine) - 1) % machine_count
machine_ids.add(machine)
machines.append((machine, duration))
# 确保至少有一个有效机器
if not machines:
machine = 0
machines.append((machine, 10))
machine_ids.add(machine)
operations.append({"machines": machines})
# 确保至少有一个有效工序
if operations:
jobs.append({
"operations": operations,
"due_date": None
})
# 生成有效机器列表
valid_machines = list(machine_ids) if machine_ids else [0]
return {
"jobs": jobs if jobs else [{
"operations": [{"machines": [(0, 10)]}],
"due_date": 100
}],
"machines": valid_machines
}
except Exception as e:
print(f"解析警告:{str(e)},已启用备用数据")
return {
"jobs": [{
"operations": [{"machines": [(0, 10)]}],
"due_date": 100
}],
"machines": [0]
}
# ================= 神经网络架构 =================
class DuelingDQN(nn.Module):
def __init__(self, input_dim, output_dim):
super().__init__()
self.feature = nn.Sequential(
nn.Linear(input_dim, 256),
nn.ReLU(),
nn.Linear(256, 128),
nn.ReLU()
)
self.value_stream = nn.Sequential(
nn.Linear(128, 64),
nn.ReLU(),
nn.Linear(64, 1)
)
self.advantage_stream = nn.Sequential(
nn.Linear(128, 64),
nn.ReLU(),
nn.Linear(64, output_dim)
)
def forward(self, x):
x = self.feature(x)
value = self.value_stream(x)
advantage = self.advantage_stream(x)
return value + (advantage - advantage.mean(dim=-1, keepdim=True))
# ================= 增强版智能体类定义 =================
class OperationAgent:
"""工序智能体:带安全保护的机器选择"""
def __init__(self, state_dim, action_dim):
self.policy_net = DuelingDQN(state_dim, action_dim)
self.target_net = DuelingDQN(state_dim, action_dim)
self.target_net.load_state_dict(self.policy_net.state_dict())
self.optimizer = optim.Adam(self.policy_net.parameters(), lr=1e-4)
self.memory = deque(maxlen=100000)
self.batch_size = 128
self.gamma = 0.99
self.epsilon = 1.0
self.epsilon_min = 0.01
self.epsilon_decay = 0.995
def select_action(self, state, valid_actions):
try:
if not valid_actions:
return 0 # 返回默认机器0(需确保其存在)
# 添加有效性检查
valid_actions = list(set(valid_actions)) # 去重
if not valid_actions:
return 0
if np.random.random() < self.epsilon:
return np.random.choice(valid_actions)
else:
with torch.no_grad():
state_tensor = torch.FloatTensor(state).unsqueeze(0)
q_values = self.policy_net(state_tensor)
return valid_actions[torch.argmax(q_values[0, valid_actions]).item()]
except Exception as e:
print(f"动作选择异常: {str(e)}, 使用默认动作")
return valid_actions[0] if valid_actions else 0
def update_epsilon(self):
self.epsilon = max(self.epsilon_min, self.epsilon * self.epsilon_decay)
def store_experience(self, state, action, reward, next_state, done):
self.memory.append((
torch.FloatTensor(state),
torch.LongTensor([action]),
torch.FloatTensor([reward]),
torch.FloatTensor(next_state),
torch.BoolTensor([done])
))
def optimize(self):
if len(self.memory) < self.batch_size:
return
batch = random.sample(self.memory, self.batch_size)
states, actions, rewards, next_states, dones = zip(*batch)
states = torch.stack(states)
actions = torch.stack(actions)
rewards = torch.stack(rewards)
next_states = torch.stack(next_states)
dones = torch.stack(dones)
current_q = self.policy_net(states).gather(1, actions)
next_q = self.target_net(next_states).max(1)[0].detach()
target_q = rewards + (1 - dones.float()) * self.gamma * next_q.unsqueeze(1)
loss = nn.MSELoss()(current_q, target_q)
self.optimizer.zero_grad()
loss.backward()
nn.utils.clip_grad_norm_(self.policy_net.parameters(), 1.0)
self.optimizer.step()
def update_target_net(self):
self.target_net.load_state_dict(self.policy_net.state_dict())
class QueueAgent:
"""队列智能体:管理工序优先级排序"""
def __init__(self, state_dim):
self.policy_net = DuelingDQN(state_dim, 2) # 接受/拒绝
self.optimizer = optim.Adam(self.policy_net.parameters(), lr=1e-4)
self.memory = deque(maxlen=50000)
self.batch_size = 64
def prioritize_operations(self, state, operations):
with torch.no_grad():
state_tensor = torch.FloatTensor(state).unsqueeze(0)
priority_scores = self.policy_net(state_tensor).squeeze().numpy()
sorted_indices = np.argsort(-priority_scores)
return [operations[i] for i in sorted_indices]
class MachineAgent:
"""机器智能体:执行加工决策"""
def __init__(self, state_dim):
self.policy_net = DuelingDQN(state_dim, 3) # 加工/维护/空闲
self.optimizer = optim.Adam(self.policy_net.parameters(), lr=1e-4)
self.memory = deque(maxlen=50000)
self.batch_size = 64
def select_action(self, state):
with torch.no_grad():
state_tensor = torch.FloatTensor(state).unsqueeze(0)
q_values = self.policy_net(state_tensor)
return torch.argmax(q_values).item()
# ================= 调度环境 =================
class FlexibleJobShopEnv:
def __init__(self, config):
# 数据校验与修复
self.jobs = []
for job in config.get("jobs", []):
valid_ops = []
for op in job.get("operations", []):
if len(op.get("machines", [])) > 0:
valid_ops.append(op)
if valid_ops:
self.jobs.append({
"operations": valid_ops,
"due_date": job.get("due_date", 100)
})
# 确保至少有一个有效作业
if not self.jobs:
self.jobs = [{
"operations": [{"machines": [(0, 10)]}],
"due_date": 100
}]
# 动态生成机器列表
self.machines = list(set(
m for job in self.jobs
for op in job["operations"]
for m, _ in op["machines"]
)) or [0]
# 其他初始化参数
self.max_steps = 1000
self.job_arrival_rate = 0.1
self.machine_break_prob = 0.02
self.reset()
def reset(self):
self.current_step = 0
self.schedule = {m: [] for m in self.machines}
self.active_jobs = []
self.completed_jobs = []
self.machine_states = {
m: {
"status": "idle",
"current_job": None,
"remaining_time": 0
} for m in self.machines
}
self.event_queue = deque()
# 安全初始化作业
try:
for _ in range(min(3, len(self.jobs))):
self._add_job(random.choice(range(len(self.jobs))))
except:
self._add_job(0)
def _add_job(self, job_idx):
try:
job_data = self.jobs[job_idx]
job = {
"id": len(self.active_jobs),
"operations": [op.copy() for op in job_data["operations"]],
"current_op": 0,
"arrival_time": self.current_step,
"due_date": self.current_step + np.random.randint(50, 100)
}
self.active_jobs.append(job)
except:
self.active_jobs.append({
"id": len(self.active_jobs),
"operations": [{"machines": [(0, 10)]}],
"current_op": 0,
"arrival_time": self.current_step,
"due_date": self.current_step + 100
})
def _get_state(self):
"""构建全局状态向量"""
state = []
# 机器状态特征
for m in self.machines:
state += [
1 if self.machine_states[m]["status"] == "busy" else 0,
self.machine_states[m]["remaining_time"] / 100
]
# 作业状态特征
job_features = []
for job in self.active_jobs:
job_features += [
(job["due_date"] - self.current_step) / 100,
job["current_op"] / len(job["operations"])
]
state += job_features[:10] # 取前5个作业的特征
# 队列状态
state.append(len(self.event_queue) / 20)
return np.array(state, dtype=np.float32)
def step(self, machine_actions):
"""执行一个时间步"""
reward = 0
done = False
# 处理机器动作
for machine, action in machine_actions.items():
state = self.machine_states[machine]
if action == 0 and state["status"] == "idle":
if self.event_queue:
selected_op = self.event_queue.popleft()
duration = selected_op["duration"]
self.schedule[machine].append({
"job_id": selected_op["job_id"],
"op_idx": selected_op["op_idx"],
"start": self.current_step,
"end": self.current_step + duration
})
state.update({
"status": "busy",
"current_job": selected_op["job_id"],
"remaining_time": duration
})
reward += 2.0 # 成功处理奖励
# 更新机器状态
for machine in self.machines:
state = self.machine_states[machine]
if state["status"] == "busy":
state["remaining_time"] -= 1
if state["remaining_time"] <= 0:
job_id = state["current_job"]
job = next(j for j in self.active_jobs if j["id"] == job_id)
job["current_op"] += 1
if job["current_op"] >= len(job["operations"]):
self.completed_jobs.append(job)
self.active_jobs.remove(job)
reward += 10.0 # 完成作业奖励
state.update({"status": "idle", "current_job": None})
# 处理动态事件
self._handle_events()
# 计算奖励
reward += self._calculate_utilization_reward()
reward -= self._calculate_tardiness_penalty()
reward -= len(self.event_queue) * 0.1 # 队列长度惩罚
# 检查终止条件
self.current_step += 1
if self.current_step >= self.max_steps:
done = True
if len(self.completed_jobs) >= len(self.jobs):
done = True
reward += 100.0 # 提前完成所有作业奖励
return self._get_state(), reward, done, {}
def _calculate_utilization_reward(self):
busy_machines = sum(1 for m in self.machines
if self.machine_states[m]["status"] == "busy")
return (busy_machines / len(self.machines)) * 2.5
def _calculate_tardiness_penalty(self):
penalty = 0
for job in self.active_jobs:
if self.current_step > job["due_date"]:
penalty += (self.current_step - job["due_date"]) * 0.2
return min(penalty, 15.0) # 限制最大惩罚
def _handle_events(self):
# 新作业到达
if np.random.poisson(self.job_arrival_rate):
self._add_job(random.choice(range(len(self.jobs))))
# 机器故障处理
for machine in self.machines:
if self.machine_states[machine]["status"] == "busy":
if np.random.rand() < self.machine_break_prob:
self.machine_states[machine]["status"] = "break"
self.machine_states[machine]["remaining_time"] = np.random.randint(5, 15)
def render(self):
"""可视化当前状态"""
plt.figure(figsize=(12, 6))
colors = list(mcolors.TABLEAU_COLORS.values())
# 机器利用率
plt.subplot(1, 2, 1)
util = [len(ops) for ops in self.schedule.values()]
plt.bar(range(len(self.machines)), util, color=colors[0])
plt.title("Machine Utilization")
plt.xlabel("Machine ID")
plt.ylabel("Completed Operations")
# 作业进度
plt.subplot(1, 2, 2)
if self.active_jobs:
progress = [
(job["current_op"] / max(len(job["operations"]), 1)) * 100 # 防止除零
for job in self.active_jobs
]
plt.bar(range(len(progress)), progress, color=colors[1])
plt.title("Job Progress")
plt.xlabel("Job ID")
plt.ylabel("Completion (%)")
plt.tight_layout()
plt.pause(0.01)
plt.close()
# ================= 训练框架 =================
class Trainer:
def __init__(self, env_config):
self.env = FlexibleJobShopEnv(env_config)
# 动态获取状态维度
self.env.reset()
sample_state = self.env._get_state()
state_dim = len(sample_state)
self.op_agents = [
OperationAgent(state_dim=state_dim, action_dim=10)
for _ in range(len(env_config["jobs"]))
]
self.queue_agent = QueueAgent(state_dim=state_dim)
self.machine_agents = {
m: MachineAgent(state_dim=state_dim)
for m in env_config["machines"]
}
def train(self, episodes=5):
rewards_history = []
moving_avg = []
for ep in range(episodes):
state = self.env.reset()
total_reward = 0
done = False
while not done:
# 工序智能体决策
# 在Trainer的train方法中修改:
op_actions = []
for job in self.env.active_jobs:
if job["current_op"] < len(job["operations"]):
# 确保存在可选机器
machines = job["operations"][job["current_op"]]["machines"]
if not machines:
machines = [(0, 10)] # 默认机器
valid_machines = [m[0] for m in machines]
agent = self.op_agents[job["id"]]
action = agent.select_action(state, valid_machines)
# 查找对应duration
try:
duration = next(d for m, d in machines if m == action)
except StopIteration:
duration = 10 # 默认值
op_actions.append({
"job_id": job["id"],
"op_idx": job["current_op"],
"machine": action,
"duration": duration
})
# 队列智能体排序
sorted_ops = self.queue_agent.prioritize_operations(state, op_actions)
self.env.event_queue = deque(sorted_ops)
# 机器智能体执行
machine_actions = {}
for machine in self.env.machines:
if self.env.machine_states[machine]["status"] == "idle":
action = self.machine_agents[machine].select_action(state)
machine_actions[machine] = action
# 环境步进
next_state, reward, done, _ = self.env.step(machine_actions)
total_reward += reward
# 存储经验
for agent in self.op_agents:
agent.store_experience(state, action, reward, next_state, done)
state = next_state
# 优化模型
for agent in self.op_agents:
agent.optimize()
agent.update_epsilon()
if ep % 10 == 0:
agent.update_target_net()
rewards_history.append(total_reward)
moving_avg.append(np.mean(rewards_history[-10:]))
print(f"Episode {ep + 1}/{episodes}, Reward: {total_reward:.2f}, Epsilon: {self.op_agents[0].epsilon:.3f}")
# 定期保存模型
if ep % 50 == 0:
self.save_models(f"checkpoint_ep{ep}")
# 训练后可视化
self.plot_training(rewards_history, moving_avg)
self.env.render()
def save_models(self, path):
os.makedirs(path, exist_ok=True)
for idx, agent in enumerate(self.op_agents):
torch.save(agent.policy_net.state_dict(), f"{path}/op_agent_{idx}.pth")
torch.save(self.queue_agent.policy_net.state_dict(), f"{path}/queue_agent.pth")
for m in self.env.machines:
torch.save(self.machine_agents[m].policy_net.state_dict(), f"{path}/machine_{m}.pth")
def plot_training(self, rewards, moving_avg):
plt.figure(figsize=(12, 6))
plt.plot(rewards, alpha=0.6, label='Episode Reward')
plt.plot(moving_avg, linewidth=2, label='Moving Average (10)')
plt.xlabel("Episode")
plt.ylabel("Reward")
plt.title("Training Progress")
plt.legend()
plt.savefig("training_progress.png")
plt.close()
# ================= 主程序 =================
if __name__ == "__main__":
# 加载配置
mk_data = parse_mk01(DATA_PATH) # 使用配置好的路径
# 初始化训练器
trainer = Trainer(mk_data)
# 开始训练
try:
trainer.train(episodes=EPISODES)
except KeyboardInterrupt:
print("\n训练被用户中断,正在保存当前模型...")
trainer.save_models("interrupted_training")
except Exception as e:
print(f"训练过程中出现异常:{str(e)}")
exit(1)
print("训练完成!结果已保存至当前目录") 这个代码报错,如何解决?new(): data must be a sequence (got NoneType)
最新发布