本代码实现了一个运输优化问题的数学建模与算法对比分析。通过混合整数规划(MIP)和强化学习(Q-learning)两种方法,解决多车辆、多任务的路径优化问题,目标是最小化运输成本和时间,同时考虑车辆容量、路径连续性等约束。
1. 导入库
python
运行
import numpy as np
import pulp
import matplotlib.pyplot as plt
from scipy.spatial.distance import euclidean
import tkinter as tk
- 功能:导入必要的库
- 说明:
numpy
:用于生成随机数据和矩阵计算pulp
:线性规划求解器matplotlib
:数据可视化euclidean
:计算欧氏距离tkinter
:用于字体设置(避免中文显示警告)
2. 数据生成模块
python
运行
np.random.seed(42)
n = 20 # 任务数
m = 10 # 车辆数
# 任务数据
q = np.random.randint(5, 20, n) # 货物量(5-20吨)
coordinates = np.random.rand(n, 2) * 100 # 坐标(0-100)
# 车辆数据
C = np.random.randint(30, 80, m) # 车辆容量(30-80吨)
vehicle_speeds = np.random.uniform(40, 60, m) # 速度(40-60km/h)
# 成本与时间计算
c = np.random.randint(50, 200, (n, m)) # 运输成本(50-200元/任务)
# 距离矩阵
d = np.zeros((n, n))
for i in range(n):
for k in range(n):
d[i][k] = euclidean(coordinates[i], coordinates[k])
# 时间矩阵(考虑交通波动)
t = np.zeros((n, m, n))
for i in range(n):
for j in range(m):
for k in range(n):
base_time = d[i][k] / vehicle_speeds[j]
traffic_factor = np.random.uniform(0.8, 1.2) # 交通波动±20%
t[i][j][k] = base_time * traffic_factor
- 功能:生成运输问题的随机数据
- 关键数据结构:
q
:任务货物量coordinates
:任务坐标C
:车辆容量d
:任务间距离矩阵t
:考虑交通波动的时间矩阵
3. PuLP 优化模型
python
运行
def solve_pulp_model():
model = pulp.LpProblem("Transport_Optimization", pulp.LpMinimize)
# 决策变量
x = pulp.LpVariable.dicts("x", (range(n), range(m)), cat=pulp.LpBinary) # 任务-车辆分配
y = pulp.LpVariable.dicts("y", (range(n), range(m), range(n)), cat=pulp.LpBinary) # 路径选择
# 目标函数:最小化总成本+总时间
total_cost = pulp.lpSum([c[i][j] * x[i][j] for i in range(n) for j in range(m)])
total_time = pulp.lpSum([t[i][j][k] * y[i][j][k] for i in range(n) for j in range(m) for k in range(n)])
model += total_cost + total_time, "Total_Objective"
# 约束1:每个任务仅由一辆车承担
for i in range(n):
model += pulp.lpSum([x[i][j] for j in range(m)]) == 1, f"Task_{i}_Assignment"
# 约束2:车辆容量限制
for j in range(m):
model += pulp.lpSum([q[i] * x[i][j] for i in range(n)]) <= C[j], f"Vehicle_{j}_Capacity"
# 约束3:路径连续性
for j in range(m):
for k in range(n):
model += pulp.lpSum([y[i][j][k] for i in range(n)]) == x[k][j], f"Enter_{j}_{k}"
# 约束4:路径连续性
for j in range(m):
for i in range(n):
model += pulp.lpSum([y[i][j][k] for k in range(n)]) == x[i][j], f"Exit_{j}_{i}"
# 求解模型
try:
solver = pulp.CBC_CMD(path="", keepFiles=0, logLevel=1)
model.solve(solver)
except:
solver = pulp.PULP_CBC_CMD()
model.solve(solver)
if pulp.LpStatus[model.status] != "Optimal":
print("警告:求解器未找到最优解!")
return None, None, None
# 提取结果
x_sol = np.array([[x[i][j].value() for j in range(m)] for i in range(n)])
y_sol = np.array([[[y[i][j][k].value() for k in range(n)] for j in range(m)] for i in range(n)])
total_objective = pulp.value(model.objective)
return x_sol, y_sol, total_objective
- 功能:使用线性规划求解最优运输方案
- 核心组件:
- 决策变量:
x[i][j]
:任务i
是否由车辆j
承担y[i][j][k]
:车辆j
是否从任务i
前往任务k
- 目标函数:最小化总成本和总时间
- 约束条件:任务分配、容量限制、路径连续性
- 决策变量:
4. 强化学习模型
RLAgent 类
python
运行
class RLAgent:
def __init__(self, state_dims, action_size):
self.state_dims = state_dims # 状态维度
self.action_size = action_size
# 计算总状态数
self.total_states = 1
for dim in state_dims:
self.total_states *= dim
# 初始化Q表
self.q_table = np.zeros((self.total_states, action_size))
# 学习参数
self.lr = 0.1
self.gamma = 0.95
self.epsilon = 0.9
self.epsilon_decay = 0.995
self.min_epsilon = 0.1
def state_to_index(self, state):
"""将多维状态转换为一维索引"""
index = 0
multiplier = 1
for i, value in enumerate(state):
index += value * multiplier
multiplier *= self.state_dims[i]
return index
def act(self, state):
"""ε-贪心策略选择动作"""
state_idx = self.state_to_index(state)
if np.random.rand() < self.epsilon:
return np.random.choice(self.action_size) # 探索
return np.argmax(self.q_table[state_idx]) # 利用
def update(self, state, action, reward, next_state):
"""更新Q表"""
state_idx = self.state_to_index(state)
next_state_idx = self.state_to_index(next_state)
current_q = self.q_table[state_idx, action]
next_max_q = np.max(self.q_table[next_state_idx])
# Q-learning更新公式
new_q = current_q + self.lr * (reward + self.gamma * next_max_q - current_q)
self.q_table[state_idx, action] = new_q
# 衰减探索率
if self.epsilon > self.min_epsilon:
self.epsilon *= self.epsilon_decay
- 功能:Q-learning 智能体
- 关键方法:
state_to_index
:将多维状态映射为一维索引act
:ε- 贪心策略选择动作update
:更新 Q 表值
TransportEnv 类
python
运行
class TransportEnv:
def __init__(self, tasks, vehicles):
self.tasks = tasks
self.vehicles = vehicles
self.n = len(tasks)
self.m = len(vehicles)
self.reset()
def reset(self):
"""重置环境状态"""
self.current_vehicle = 0
self.vehicle_load = [0]*self.m
self.visited = [False]*self.n
self.current_position = np.zeros(2)
return self._get_state()
def _get_state(self):
"""将环境状态编码为多维元组"""
if self.current_vehicle >= self.m:
vehicle_idx = self.m-1
else:
vehicle_idx = self.current_vehicle
# 负载比例(0~10)
if self.current_vehicle < self.m:
load_ratio = min(int(self.vehicle_load[self.current_vehicle] / self.vehicles[self.current_vehicle]["capacity"] * 10), 10)
else:
load_ratio = 10
# 坐标离散化(0~9)
x_bin = int(self.current_position[0] // 10)
y_bin = int(self.current_position[1] // 10)
return (vehicle_idx, load_ratio, x_bin, y_bin)
def step(self, action):
"""执行动作并返回新状态"""
done = False
reward = -5 # 基础步长惩罚
if self.visited[action]:
return self._get_state(), reward, done, {} # 重复任务惩罚
task = self.tasks[action]
if self.current_vehicle >= self.m:
return self._get_state(), reward, done, {} # 无可用车辆
vehicle = self.vehicles[self.current_vehicle]
# 检查负载是否超限
if self.vehicle_load[self.current_vehicle] + task["q"] > vehicle["capacity"]:
self.current_vehicle += 1 # 切换车辆
if self.current_vehicle >= self.m:
done = True
return self._get_state(), reward, done, {}
# 计算行驶时间奖励
distance = euclidean(self.current_position, task["coords"])
time = distance / vehicle["speed"]
reward = -time # 时间越短,奖励越高
# 更新环境状态
self.vehicle_load[self.current_vehicle] += task["q"]
self.visited[action] = True
self.current_position = task["coords"]
# 检查是否完成所有任务
if all(self.visited) or self.current_vehicle >= self.m:
done = True
reward += 100 # 完成奖励
return self._get_state(), reward, done, {}
- 功能:运输问题的环境模拟
- 关键方法:
reset
:重置环境状态_get_state
:将状态编码为四维元组step
:执行动作并返回新状态、奖励和终止标志
5. 主流程与可视化
python
运行
def main():
# 1. 准备数据
tasks = [{"q": q[i], "coords": coordinates[i]} for i in range(n)]
vehicles = [{"capacity": C[j], "speed": vehicle_speeds[j]} for j in range(m)]
# 2. 求解PuLP模型
print("正在求解PuLP模型...")
x_sol, y_sol, pulp_obj = solve_pulp_model()
if x_sol is None:
return
# 3. 强化学习训练
env = TransportEnv(tasks, vehicles)
# 定义状态维度
state_dims = (m, 11, 10, 10) # 车辆索引、负载比例、x坐标、y坐标
action_size = n
agent = RLAgent(state_dims, action_size)
print("\n正在训练强化学习模型...")
for episode in range(1000):
state = env.reset()
total_reward = 0
while True:
action = agent.act(state)
next_state, reward, done, _ = env.step(action)
agent.update(state, action, reward, next_state)
state = next_state
total_reward += reward
if done:
break
# 4. 结果对比
print(f"\nPuLP优化结果:总目标值 {pulp_obj:.2f}")
print("车辆分配(1表示分配):")
print(x_sol)
# 5. 可视化任务分配
plt.figure(figsize=(12, 8))
plt.scatter(coordinates[:, 0], coordinates[:, 1], c='blue', label='任务点', zorder=2)
plt.scatter(0, 0, c='red', marker='s', label='起点', zorder=3)
for j in range(m):
assigned_tasks = np.where(x_sol[:, j] == 1)[0]
if len(assigned_tasks) == 0:
continue
# 生成车辆路径
path = [(0, 0)] + [coordinates[i] for i in assigned_tasks]
path = np.array(path)
plt.plot(path[:, 0], path[:, 1], marker='o', linestyle='-', label=f'车辆{j+1}路径', zorder=1)
plt.legend()
plt.title("运输任务分配可视化")
plt.xlabel("X坐标")
plt.ylabel("Y坐标")
plt.grid(alpha=0.3)
plt.gca().set_aspect('equal', adjustable='box')
plt.show()
- 功能:整合整个程序流程并可视化结果
- 关键步骤:
- 数据准备
- 求解 PuLP 优化模型
- 训练强化学习智能体
- 对比两种方法的结果
- 可视化任务分配和车辆路径
总结
这段代码实现了一个运输优化问题的对比分析:
- 精确方法:使用 PuLP 线性规划求解最优解
- 近似方法:使用 Q-learning 强化学习求解近似最优解
- 应用场景:物流配送、车辆路径规划等问题
- 技术亮点:多维状态编码、Q 表优化、路径连续性约束
完整代码
import numpy as np
import pulp
import matplotlib.pyplot as plt
from scipy.spatial.distance import euclidean
import tkinter as tk
# ==============================
# 一、数据生成模块
# ==============================
np.random.seed(42)
n = 20 # 任务数
m = 10 # 车辆数
# 1. 任务数据生成
q = np.random.randint(5, 20, n) # 货物量(5-20吨)
coordinates = np.random.rand(n, 2) * 100 # 坐标(0-100)
# 2. 车辆数据生成
C = np.random.randint(30, 80, m) # 车辆容量(30-80吨)
vehicle_speeds = np.random.uniform(40, 60, m) # 车辆速度(40-60km/h)
# 3. 成本与时间计算
c = np.random.randint(50, 200, (n, m)) # 运输成本(50-200元/任务)
# 生成任务间距离矩阵(欧氏距离)
d = np.zeros((n, n))
for i in range(n):
for k in range(n):
d[i][k] = euclidean(coordinates[i], coordinates[k])
# 生成时间矩阵(考虑车辆速度和随机交通影响)
t = np.zeros((n, m, n))
for i in range(n):
for j in range(m):
for k in range(n):
base_time = d[i][k] / vehicle_speeds[j]
traffic_factor = np.random.uniform(0.8, 1.2) # 交通波动±20%
t[i][j][k] = base_time * traffic_factor
# ==============================
# 二、PuLP优化模型
# ==============================
def solve_pulp_model():
model = pulp.LpProblem("Transport_Optimization", pulp.LpMinimize)
# 决策变量定义
x = pulp.LpVariable.dicts("x", (range(n), range(m)), cat=pulp.LpBinary)
y = pulp.LpVariable.dicts("y", (range(n), range(m), range(n)), cat=pulp.LpBinary)
# 目标函数:最小化总成本+总时间
total_cost = pulp.lpSum([c[i][j] * x[i][j] for i in range(n) for j in range(m)])
total_time = pulp.lpSum([t[i][j][k] * y[i][j][k] for i in range(n) for j in range(m) for k in range(n)])
model += total_cost + total_time, "Total_Objective"
# 约束1:每个任务仅由一辆车承担
for i in range(n):
model += pulp.lpSum([x[i][j] for j in range(m)]) == 1, f"Task_{i}_Assignment"
# 约束2:车辆容量限制
for j in range(m):
model += pulp.lpSum([q[i] * x[i][j] for i in range(n)]) <= C[j], f"Vehicle_{j}_Capacity"
# 约束3:路径连续性(进入任务k的车辆必须已分配k)
for j in range(m):
for k in range(n):
model += pulp.lpSum([y[i][j][k] for i in range(n)]) == x[k][j], f"Enter_{j}_{k}"
# 约束4:路径连续性(离开任务i的车辆必须已分配i)
for j in range(m):
for i in range(n):
model += pulp.lpSum([y[i][j][k] for k in range(n)]) == x[i][j], f"Exit_{j}_{i}"
# 求解器配置(使用CBC)
try:
solver = pulp.CBC_CMD(path="", keepFiles=0, logLevel=1) # 若CBC在环境变量中,path留空
model.solve(solver)
except:
solver = pulp.PULP_CBC_CMD()
model.solve(solver)
if pulp.LpStatus[model.status] != "Optimal":
print("警告:求解器未找到最优解!")
return None, None, None
# 提取结果
x_sol = np.array([[x[i][j].value() for j in range(m)] for i in range(n)])
y_sol = np.array([[[y[i][j][k].value() for k in range(n)] for j in range(m)] for i in range(n)])
total_objective = pulp.value(model.objective)
return x_sol, y_sol, total_objective
# ==============================
# 三、强化学习对比模型(改进版Q-learning)
# ==============================
class RLAgent:
def __init__(self, state_dims, action_size):
self.state_dims = state_dims # 各状态维度的取值范围(元组)
self.action_size = action_size
# 计算总状态数 = 各维度取值范围的乘积
self.total_states = 1
for dim in state_dims:
self.total_states *= dim
# 初始化Q表:[总状态数, 动作数]
self.q_table = np.zeros((self.total_states, action_size))
# 学习参数
self.lr = 0.1
self.gamma = 0.95
self.epsilon = 0.9
self.epsilon_decay = 0.995
self.min_epsilon = 0.1
def state_to_index(self, state):
"""将多维状态元组转换为Q表的一维索引"""
index = 0
multiplier = 1
for i, value in enumerate(state):
index += value * multiplier
multiplier *= self.state_dims[i]
return index
def act(self, state):
"""根据状态选择动作(ε-贪心策略)"""
state_idx = self.state_to_index(state)
if np.random.rand() < self.epsilon:
return np.random.choice(self.action_size) # 探索
return np.argmax(self.q_table[state_idx]) # 利用
def update(self, state, action, reward, next_state):
"""更新Q表"""
state_idx = self.state_to_index(state)
next_state_idx = self.state_to_index(next_state)
current_q = self.q_table[state_idx, action]
next_max_q = np.max(self.q_table[next_state_idx])
# Q-learning更新公式
new_q = current_q + self.lr * (reward + self.gamma * next_max_q - current_q)
self.q_table[state_idx, action] = new_q
# 衰减探索率
if self.epsilon > self.min_epsilon:
self.epsilon *= self.epsilon_decay
class TransportEnv:
def __init__(self, tasks, vehicles):
self.tasks = tasks # 任务列表 [{"q":..., "coords":...}]
self.vehicles = vehicles # 车辆列表 [{"capacity":..., "speed":...}]
self.n = len(tasks)
self.m = len(vehicles)
self.reset()
def reset(self):
"""重置环境状态"""
self.current_vehicle = 0 # 当前车辆索引(0~m-1)
self.vehicle_load = [0]*self.m # 车辆负载
self.visited = [False]*self.n # 任务完成状态
self.current_position = np.zeros(2) # 起点坐标
return self._get_state()
def _get_state(self):
"""将环境状态编码为多维元组"""
if self.current_vehicle >= self.m:
vehicle_idx = self.m-1 # 防止越界
else:
vehicle_idx = self.current_vehicle
# 负载比例(0~10,对应0%~100%)
if self.current_vehicle < self.m:
load_ratio = min(int(self.vehicle_load[self.current_vehicle] / self.vehicles[self.current_vehicle]["capacity"] * 10), 10)
else:
load_ratio = 10
# 坐标离散化(0~9,对应0~100坐标分为10个区间)
x_bin = int(self.current_position[0] // 10)
y_bin = int(self.current_position[1] // 10)
return (vehicle_idx, load_ratio, x_bin, y_bin)
def step(self, action):
"""执行动作并返回新状态"""
done = False
reward = -5 # 基础步长惩罚
if self.visited[action]:
return self._get_state(), reward, done, {} # 重复任务惩罚
task = self.tasks[action]
if self.current_vehicle >= self.m:
return self._get_state(), reward, done, {} # 无可用车辆
vehicle = self.vehicles[self.current_vehicle]
# 检查负载是否超限
if self.vehicle_load[self.current_vehicle] + task["q"] > vehicle["capacity"]:
self.current_vehicle += 1 # 切换车辆
if self.current_vehicle >= self.m:
done = True
return self._get_state(), reward, done, {}
# 计算行驶时间奖励(距离越短奖励越高)
distance = euclidean(self.current_position, task["coords"])
time = distance / vehicle["speed"]
reward = -time # 最小化时间等价于最大化负时间的奖励
# 更新环境状态
self.vehicle_load[self.current_vehicle] += task["q"]
self.visited[action] = True
self.current_position = task["coords"]
# 检查是否完成所有任务或用尽车辆
if all(self.visited) or self.current_vehicle >= self.m:
done = True
reward += 100 # 完成奖励
return self._get_state(), reward, done, {}
# ==============================
# 四、主流程与可视化
# ==============================
def main():
# 1. 准备数据
tasks = [{"q": q[i], "coords": coordinates[i]} for i in range(n)]
vehicles = [{"capacity": C[j], "speed": vehicle_speeds[j]} for j in range(m)]
# 2. 求解PuLP模型
print("正在求解PuLP模型...")
x_sol, y_sol, pulp_obj = solve_pulp_model()
if x_sol is None:
return
# 3. 强化学习训练
env = TransportEnv(tasks, vehicles)
# 定义状态各维度的取值范围
state_dims = (
m, # 当前车辆索引:0~m-1(共m种状态)
11, # 负载比例:0~10(共11种状态)
10, # x坐标区间:0~9(共10种状态)
10 # y坐标区间:0~9(共10种状态)
)
action_size = n
agent = RLAgent(state_dims, action_size)
print("\n正在训练强化学习模型...")
for episode in range(1000):
state = env.reset()
total_reward = 0
while True:
action = agent.act(state)
next_state, reward, done, _ = env.step(action)
agent.update(state, action, reward, next_state)
state = next_state
total_reward += reward
if done:
break
# 4. 结果对比
print(f"\nPuLP优化结果:总目标值 {pulp_obj:.2f}")
print("车辆分配(1表示分配):")
print(x_sol)
# 5. 可视化任务分配
plt.figure(figsize=(12, 8))
plt.scatter(coordinates[:, 0], coordinates[:, 1], c='blue', label='任务点', zorder=2)
plt.scatter(0, 0, c='red', marker='s', label='起点', zorder=3)
for j in range(m):
assigned_tasks = np.where(x_sol[:, j] == 1)[0]
if len(assigned_tasks) == 0:
continue
# 生成车辆路径(起点为原点)
path = [(0, 0)] + [coordinates[i] for i in assigned_tasks]
path = np.array(path)
plt.plot(path[:, 0], path[:, 1], marker='o', linestyle='-', label=f'车辆{j+1}路径', zorder=1)
plt.legend()
plt.title("运输任务分配可视化")
plt.xlabel("X坐标")
plt.ylabel("Y坐标")
plt.grid(alpha=0.3)
plt.gca().set_aspect('equal', adjustable='box')
plt.show()
if __name__ == "__main__":
main()