文章目录
知识补充
一、代码源码
import gymnasium as gym
from gymnasium import spaces
import numpy as np
import torch
import torch.nn as nn
from torch_geometric.nn import GATConv, global_mean_pool
from torch_geometric.data import Data, Batch
from stable_baselines3.common.torch_layers import BaseFeaturesExtractor
from stable_baselines3 import DQN
from stable_baselines3.common.monitor import Monitor
from stable_baselines3.common.vec_env import DummyVecEnv
from stable_baselines3.dqn.policies import DQNPolicy
import wandb
from wandb.integration.sb3 import WandbCallback
from gymnasium.envs.registration import register
# 首先,定义CVRP环境
class CVRPEnv(gym.Env):
def __init__(self, num_nodes=10, vehicle_capacity=15):
super(CVRPEnv, self).__init__()
self.num_nodes = num_nodes
self.vehicle_capacity = vehicle_capacity
self.depot = np.array([0.0, 0.0])
self.action_space = spaces.Discrete(self.num_nodes + 1) # 包含仓库节点
# 定义观测空间大小
node_feature_size = (self.num_nodes + 1) * 3 # 每个节点3个特征
adjacency_size = (self.num_nodes + 1) * (self.num_nodes + 1) # 邻接矩阵大小
observation_size = node_feature_size + adjacency_size + 1 # 加上车辆状态
self.observation_space = spaces.Box(
low=-np.inf,
high=np.inf,
shape=(observation_size,),
dtype=np.float32
)
def reset(self, seed=None, options=None):
super().reset(seed=seed)
self.np_random, seed = gym.utils.seeding.np_random(seed)
self.node_positions = self.np_random.uniform(-10, 10, size=(self.num_nodes, 2))
self.demands = self.np_random.integers(1, 5, size=self.num_nodes)
self.current_node = 0 # 起始于仓库
self.visited = np.zeros(self.num_nodes + 1, dtype=bool)
self.visited[0] = True # 仓库已访问
self.remaining_capacity = self.vehicle_capacity
self.total_distance = 0.0
self.done = False
return self._get_observation(), {}
def _get_observation(self):
node_features = np.zeros((self.num_nodes + 1, 3), dtype=np.float32)
node_features[0, :2] = self.depot
node_features[0, 2] = 0.0 # 仓库需求为零
node_features[1:, :2] = self.node_positions
node_features[1:, 2] = self.demands
positions = np.vstack((self.depot, self.node_positions))
adjacency_matrix = np.linalg.norm(positions[:, np.newaxis, :] - positions[np.newaxis, :, :], axis=-1)
adjacency_matrix = adjacency_matrix.astype(np.float32)
current_vehicle_state = np.array([self.remaining_capacity], dtype=np.float32)
# 将所有观测数据展开为一维向量
observation = np.concatenate([
node_features.flatten(),
adjacency_matrix.flatten(),
current_vehicle_state
]).astype(np.float32)
return observation
def step(self, action):
if self.done:
raise Exception("Episode is done")
if action < 0 or action > self.num_nodes:
raise ValueError("Invalid action")
if self.visited[action]:
reward = -10.0 # Penalty for visiting an already visited node
done = False
return self._get_observation(), reward, done, False, {}
positions = np.vstack((self.depot, self.node_positions))
distance = np.linalg.norm(positions[self.current_node] - positions[action])
self.total_distance += distance
if action != 0:
demand = self.demands[action - 1]
if demand > self.remaining_capacity:
reward = -10.0 # Penalty for exceeding capacity
done = True # End the episode
self.done = True
return self._get_observation(), reward, done, False, {}
else:
self.remaining_capacity -= demand
else:
self.remaining_capacity = self.vehicle_capacity
self.visited[action] = True
self.current_node = action
if np.all(self.visited[1:]):
if self.current_node != 0:
distance_to_depot = np.linalg.norm(positions[self.current_node] - positions[0])
self.total_distance += distance_to_depot
self.current_node = 0
reward = -self.total_distance # Reward is negative total distance
done = True
self.done = True
return self._get_observation(), reward, done, False, {}
else:
reward = -distance # Immediate reward is negative distance
done = False
return self._get_observation(), reward, done, False, {}
def render(self):
pass
def close(self):
pass
# 注册环境
register(
id='CVRPEnv-v0',
entry_point='__main__:CVRPEnv',
)
# 定义GAT特征提取器
class GATFeatureExtractor(BaseFeaturesExtractor):
def __init__(self, observation_space, num_nodes, embed_dim=32):
super(GATFeatureExtractor, self).__init__(observation_space, features_dim=embed_dim)
self.num_nodes = num_nodes
self.node_feature_dim = 3
self.embed_dim = embed_dim
self.gat1 = GATConv(self.node_feature_dim, embed_dim, heads=4, concat=False)
self.gat2 = GATConv(embed_dim, embed_dim, heads=4, concat=False)
self.fc = nn.Sequential(
nn.Linear(embed_dim + 1, embed_dim),
nn.ReLU(),
nn.Linear(embed_dim, embed_dim),
nn.ReLU(),
)
def forward(self, observations):
# 从一维向量中恢复数据
batch_size = observations.shape[0]
node_feature_size = (self.num_nodes + 1) * 3
adjacency_size = (self.num_nodes + 1) * (self.num_nodes + 1)
node_features = observations[:, :node_feature_size].reshape(batch_size, self.num_nodes + 1, 3)
adjacency_matrix = observations[:, node_feature_size:node_feature_size + adjacency_size].reshape(
batch_size, self.num_nodes + 1, self.num_nodes + 1)
current_vehicle_state = observations[:, -1].unsqueeze(-1)
data_list = []
for i in range(batch_size):
x = node_features[i]
adj = adjacency_matrix[i]
edge_index = adj.nonzero(as_tuple=False).t().contiguous()
data = Data(x=x, edge_index=edge_index)
data_list.append(data)
batch = Batch.from_data_list(data_list)
x = batch.x
edge_index = batch.edge_index
x = self.gat1(x, edge_index)
x = torch.relu(x)
x = self.gat2(x, edge_index)
x = torch.relu(x)
graph_embedding = global_mean_pool(x, batch.batch)
vehicle_state = current_vehicle_state
features = torch.cat([graph_embedding, vehicle_state], dim=-1)
features = self.fc(features)
return features
# 定义自定义策略类
class CustomDQNPolicy(DQNPolicy):
def __init__(self, observation_space, action_space, lr_schedule, **kwargs):
super(CustomDQNPolicy, self).__init__(
observation_space,
action_space,
lr_schedule,
features_extractor_class=GATFeatureExtractor,
features_extractor_kwargs=dict(num_nodes=10, embed_dim=32),
**kwargs
)
# 设置Wandb配置
config = {
"policy_type": "CustomDQNPolicy",
"total_timesteps": 100_000,
"env_name": "CVRPEnv-v0",
"num_nodes": 10,
}
run = wandb.init(
project="sb3",
config=config,
sync_tensorboard=True,
monitor_gym=True,
save_code=True,
)
def make_env():
env = gym.make(config["env_name"], num_nodes=config["num_nodes"])
env = Monitor(env)
return env
env = DummyVecEnv([make_env])
model = DQN(
policy=CustomDQNPolicy,
env=env,
verbose=1,
tensorboard_log=f"runs/{run.id}",
)
model.learn(
total_timesteps=config["total_timesteps"],
callback=WandbCallback(
gradient_save_freq=100,
model_save_path=f"models/{run.id}",
verbose=2,
),
)
run.finish()
二、代码分析——class CVRPEnv(gym.Env)
# 首先,定义CVRP环境
class CVRPEnv(gym.Env):
def __init__(self, num_nodes=10, vehicle_capacity=15):
super(CVRPEnv, self).__init__()
self.num_nodes = num_nodes
self.vehicle_capacity = vehicle_capacity
self.depot = np.array([0.0, 0.0])
self.action_space = spaces.Discrete(self.num_nodes + 1) # 包含仓库节点
# 定义观测空间大小
node_feature_size = (self.num_nodes + 1) * 3 # 每个节点3个特征
adjacency_size = (self.num_nodes + 1) * (self.num_nodes + 1) # 邻接矩阵大小
observation_size = node_feature_size + adjacency_size + 1 # 加上车辆状态
self.observation_space = spaces.Box(
low=-np.inf,
high=np.inf,
shape=(observation_size,),
dtype=np.float32
)
def reset(self, seed=None, options=None):
super().reset(seed=seed)
self.np_random, seed = gym.utils.seeding.np_random(seed)
self.node_positions = self.np_random.uniform(-10, 10, size=(self.num_nodes, 2))
self.demands = self.np_random.integers(1, 5, size=self.num_nodes)
self.current_node = 0 # 起始于仓库
self.visited = np.zeros(self.num_nodes + 1, dtype=bool)
self.visited[0] = True # 仓库已访问
self.remaining_capacity = self.vehicle_capacity
self.total_distance = 0.0
self.done = False
return self._get_observation(), {}
def _get_observation(self):
node_features = np.zeros((self.num_nodes + 1, 3), dtype=np.float32)
node_features[0, :2] = self.depot
node_features[0, 2] = 0.0 # 仓库需求为零
node_features[1:, :2] = self.node_positions
node_features[1:, 2] = self.demands
positions = np.vstack((self.depot, self.node_positions))
adjacency_matrix = np.linalg.norm(positions[:, np.newaxis, :] - positions[np.newaxis, :, :], axis=-1)
adjacency_matrix = adjacency_matrix.astype(np.float32)
current_vehicle_state = np.array([self.remaining_capacity], dtype=np.float32)
# 将所有观测数据展开为一维向量
observation = np.concatenate([
node_features.flatten(),
adjacency_matrix.flatten(),
current_vehicle_state
]).astype(np.float32)
return observation
def step(self, action):
if self.done:
raise Exception("Episode is done")
if action < 0 or action > self.num_nodes:
raise ValueError("Invalid action")
if self.visited[action]:
reward = -10.0 # Penalty for visiting an already visited node
done = False
return self._get_observation(), reward, done, False, {}
positions = np.vstack((self.depot, self.node_positions))
distance = np.linalg.norm(positions[self.current_node] - positions[action])
self.total_distance += distance
if action != 0:
demand = self.demands[action - 1]
if demand > self.remaining_capacity:
reward = -10.0 # Penalty for exceeding capacity
done = True # End the episode
self.done = True
return self._get_observation(), reward, done, False, {}
else:
self.remaining_capacity -= demand
else:
self.remaining_capacity = self.vehicle_capacity
self.visited[action] = True
self.current_node = action
if np.all(self.visited[1:]):
if self.current_node != 0:
distance_to_depot = np.linalg.norm(positions[self.current_node] - positions[0])
self.total_distance += distance_to_depot
self.current_node = 0
reward = -self.total_distance # Reward is negative total distance
done = True
self.done = True
return self._get_observation(), reward, done, False, {}
else:
reward = -distance # Immediate reward is negative distance
done = False
return self._get_observation(), reward, done, False, {}
def render(self):
pass
def close(self):
pass
1. def init(self, num_nodes=10, vehicle_capacity=15):
def __init__(self, num_nodes=10, vehicle_capacity=15):
super(CVRPEnv, self).__init__()
self.num_nodes = num_nodes
self.vehicle_capacity = vehicle_capacity
self.depot = np.array([0.0, 0.0])
self.action_space = spaces.Discrete(self.num_nodes + 1) # 包含仓库节点
# 定义观测空间大小
node_feature_size = (self.num_nodes + 1) * 3 # 每个节点3个特征
adjacency_size = (self.num_nodes + 1) * (self.num_nodes + 1) # 邻接矩阵大小
observation_size = node_feature_size + adjacency_size + 1 # 加上车辆状态
self.observation_space = spaces.Box(
low=-np.inf,
high=np.inf,
shape=(observation_size,),
dtype=np.float32
)
2. def reset(self, seed=None, options=None):
def reset(self, seed=None, options=None):
super().reset(seed=seed)
self.np_random, seed = gym.utils.seeding.np_random(seed)
self.node_positions = self.np_random.uniform(-10, 10, size=(self.num_nodes, 2))
self.demands = self.np_random.integers(1, 5, size=self.num_nodes)
self.current_node = 0 # 起始于仓库
self.visited = np.zeros(self.num_nodes + 1, dtype=bool)
self.visited[0] = True # 仓库已访问
self.remaining_capacity = self.vehicle_capacity
self.total_distance = 0.0
self.done = False
return self._get_observation(), {}
3. def _get_observation(self)
def _get_observation(self):
node_features = np.zeros((self.num_nodes + 1, 3), dtype=np.float32)
node_features[0, :2] = self.depot
node_features[0, 2] = 0.0 # 仓库需求为零
node_features[1:, :2] = self.node_positions
node_features[1:, 2] = self.demands
positions = np.vstack((self.depot, self.node_positions))
adjacency_matrix = np.linalg.norm(positions[:, np.newaxis, :] - positions[np.newaxis, :, :], axis=-1)
adjacency_matrix = adjacency_matrix.astype(np.float32)
current_vehicle_state = np.array([self.remaining_capacity], dtype=np.float32)
# 将所有观测数据展开为一维向量
observation = np.concatenate([
node_features.flatten(),
adjacency_matrix.flatten(),
current_vehicle_state
]).astype(np.float32)
return observation
node_features = np.zeros((self.num_nodes + 1, 3), dtype=np.float32)
node_features[0, :2] = self.depot
node_features[0, 2] = 0.0 # 仓库需求为零
node_features[1:, :2] = self.node_positions
node_features[1:, 2] = self.demand
positions = np.vstack((self.depot, self.node_positions))
adjacency_matrix = np.linalg.norm(positions[:, np.newaxis, :] - positions[np.newaxis, :, :], axis=-1)
adjacency_matrix = adjacency_matrix.astype(np.float32)
# 将所有观测数据展开为一维向量
observation = np.concatenate([
node_features.flatten(),
adjacency_matrix.flatten(),
current_vehicle_state
]).astype(np.float32)
4. def step(self, action):
常规MDP模型
def step(self, action):
if self.done:
raise Exception("Episode is done")
if action < 0 or action > self.num_nodes:
raise ValueError("Invalid action")
if self.visited[action]:
reward = -10.0 # Penalty for visiting an already visited node
done = False
return self._get_observation(), reward, done, False, {}
positions = np.vstack((self.depot, self.node_positions))
distance = np.linalg.norm(positions[self.current_node] - positions[action])
self.total_distance += distance
if action != 0:
demand = self.demands[action - 1]
if demand > self.remaining_capacity:
reward = -10.0 # Penalty for exceeding capacity
done = True # End the episode
self.done = True
return self._get_observation(), reward, done, False, {}
else:
self.remaining_capacity -= demand
else:
self.remaining_capacity = self.vehicle_capacity
self.visited[action] = True
self.current_node = action
if np.all(self.visited[1:]):
if self.current_node != 0:
distance_to_depot = np.linalg.norm(positions[self.current_node] - positions[0])
self.total_distance += distance_to_depot
self.current_node = 0
reward = -self.total_distance # Reward is negative total distance
done = True
self.done = True
return self._get_observation(), reward, done, False, {}
else:
reward = -distance # Immediate reward is negative distance
done = False
return self._get_observation(), reward, done, False, {}
distance_to_depot = np.linalg.norm(positions[self.current_node] - positions[0])
self.total_distance += distance_to_depot
三、代码分析——class GATFeatureExtractor(BaseFeaturesExtractor):
# 定义GAT特征提取器
class GATFeatureExtractor(BaseFeaturesExtractor):
def __init__(self, observation_space, num_nodes, embed_dim=32):
super(GATFeatureExtractor, self).__init__(observation_space, features_dim=embed_dim)
self.num_nodes = num_nodes
self.node_feature_dim = 3
self.embed_dim = embed_dim
self.gat1 = GATConv(self.node_feature_dim, embed_dim, heads=4, concat=False)
self.gat2 = GATConv(embed_dim, embed_dim, heads=4, concat=False)
self.fc = nn.Sequential(
nn.Linear(embed_dim + 1, embed_dim),
nn.ReLU(),
nn.Linear(embed_dim, embed_dim),
nn.ReLU(),
)
def forward(self, observations):
# 从一维向量中恢复数据
batch_size = observations.shape[0]
node_feature_size = (self.num_nodes + 1) * 3
adjacency_size = (self.num_nodes + 1) * (self.num_nodes + 1)
node_features = observations[:, :node_feature_size].reshape(batch_size, self.num_nodes + 1, 3)
adjacency_matrix = observations[:, node_feature_size:node_feature_size + adjacency_size].reshape(
batch_size, self.num_nodes + 1, self.num_nodes + 1)#重塑
current_vehicle_state = observations[:, -1].unsqueez·e(-1)
data_list = []
for i in range(batch_size):
x = node_features[i]
adj = adjacency_matrix[i]
edge_index = adj.nonzero(as_tuple=False).t().contiguous()
data = Data(x=x, edge_index=edge_index)
data_list.append(data)
batch = Batch.from_data_list(data_list)
x = batch.x
edge_index = batch.edge_index
x = self.gat1(x, edge_index)
x = torch.relu(x)
x = self.gat2(x, edge_index)
x = torch.relu(x)
graph_embedding = global_mean_pool(x, batch.batch)
vehicle_state = current_vehicle_state
features = torch.cat([graph_embedding, vehicle_state], dim=-1)
features = self.fc(features)
return features
1. def init(self, observation_space, num_nodes, embed_dim=32):
def __init__(self, observation_space, num_nodes, embed_dim=32):
super(GATFeatureExtractor, self).__init__(observation_space, features_dim=embed_dim)
self.num_nodes = num_nodes
self.node_feature_dim = 3
self.embed_dim = embed_dim
self.gat1 = GATConv(self.node_feature_dim, embed_dim, heads=4, concat=False)
self.gat2 = GATConv(embed_dim, embed_dim, heads=4, concat=False)
self.fc = nn.Sequential(
nn.Linear(embed_dim + 1, embed_dim),
nn.ReLU(),
nn.Linear(embed_dim, embed_dim),
nn.ReLU(),
)
2. def forward(self, observations):
def forward(self, observations):
# 从一维向量中恢复数据
batch_size = observations.shape[0]
node_feature_size = (self.num_nodes + 1) * 3
adjacency_size = (self.num_nodes + 1) * (self.num_nodes + 1)
node_features = observations[:, :node_feature_size].reshape(batch_size, self.num_nodes + 1, 3)
adjacency_matrix = observations[:, node_feature_size:node_feature_size + adjacency_size].reshape(
batch_size, self.num_nodes + 1, self.num_nodes + 1)#重塑
current_vehicle_state = observations[:, -1].unsqueez·e(-1)
data_list = []
for i in range(batch_size):
x = node_features[i]
adj = adjacency_matrix[i]
edge_index = adj.nonzero(as_tuple=False).t().contiguous()
data = Data(x=x, edge_index=edge_index)
data_list.append(data)
batch = Batch.from_data_list(data_list)
x = batch.x
edge_index = batch.edge_index
x = self.gat1(x, edge_index)
x = torch.relu(x)
x = self.gat2(x, edge_index)
x = torch.relu(x)
graph_embedding = global_mean_pool(x, batch.batch)
vehicle_state = current_vehicle_state
features = torch.cat([graph_embedding, vehicle_state], dim=-1)
features = self.fc(features)
return features