# config.py
import numpy as np
C0 = 3e8
def xi_from_fc(fc_hz: float) -> float:
lam = C0 / fc_hz
return (lam / (4.0 * np.pi)) ** 2
def sigma2_pdf(B_hz: float, F_db: float) -> float:
return 10 ** ((-174 + 10 * np.log10(B_hz) + F_db) / 10.0) * 1e-3
# 收敛优化参数 - 重新调整权重
convergence_params = {
# 奖励权重重新平衡 - 大幅提高速率权重
"ee_weight": 1e-4, # 提高能效权重
"rate_weight": 5e-5, # 大幅提高速率权重
"pen_kin": 0.5, # 增加运动惩罚
"pen_qos": 0.8, # 增加QoS惩罚
"noma_bonus": 1.0, # 增加NOMA奖励
"ris_bonus": 0.8, # 增加RIS奖励
# 训练参数
"max_episodes": 400,
"convergence_window": 30,
"convergence_threshold": 0.01,
}
# 高级训练参数
advanced_training_params = {
# TD3算法参数
"actor_lr": 5e-4, # 提高学习率
"critic_lr": 5e-4,
"tau": 0.01, # 提高目标网络更新率
"gamma": 0.95, # 调整折扣因子
"policy_noise": 0.15,
"noise_clip": 0.3,
"policy_freq": 2, # 更频繁的策略更新
# 探索参数
"initial_noise": 0.4,
"min_noise": 0.05,
"noise_decay": 0.995,
# 训练参数
"replay_buffer_size": 100000,
"batch_size": 128,
"warmup_steps": 1000,
"training_frequency": 2,
}
# Sub-6 GHz - 优化收敛
cfg_A = {
"name": "Sub6",
"f_c": 2.4e9,
"B": 20e6,
"F": 7.0,
"xi": xi_from_fc(2.4e9),
"sigma2": sigma2_pdf(20e6, 7.0),
"P": 2.0,
# UAV / 场景
"dt": 1.0, "N": 40,
"hmin": 50, "hmax": 150, "Vh_max": 20, "Vv_max": 5,
"uav_start": [100, 100, 100], "uav_end": [600, 600, 100],
"Xmax": 1000, "Ymax": 1000,
"dx_max": 15 * 1.0, "dy_max": 15 * 1.0, "dh_max": 3 * 1.0,
# RIS
"x_RIS": 300, "y_RIS": 300, "z_RIS": 30,
"Mr": 10, "Mc": 10, "a_ris": 0.9,
"P_RIS": 0.5,
"P_ris_unit": 5e-3,
# 路径损耗参数 - 调整以体现差异
"alpha_los": 2.0,
"alpha_nlos": 3.5, # 增加NLOS损耗
"alpha_ur": 2.0,
"alpha_rg": 2.0,
# NOMA / QoS
"xi_sic": 0.001, # 降低SIC误差
"rmin": 1.0e6, # 提高最低速率要求
# 推进能耗
"P0": 158.0, "Utip": 120.0, "d0": 0.6, "rho": 1.225,
"s": 0.05, "G": 0.503, "P1": 88.6, "v0": 4.03, "P2": 12.4,
}
# mmWave - 保持原有
cfg_B = {
"name": "mmWave",
"f_c": 28e9,
"B": 100e6,
"F": 7.0,
"xi": xi_from_fc(28e9),
"sigma2": sigma2_pdf(100e6, 7.0),
"P": 1.0,
"dt": 1.0, "N": 40,
"hmin": 50, "hmax": 150, "Vh_max": 20, "Vv_max": 5,
"uav_start": [100, 100, 100], "uav_end": [600, 600, 100],
"Xmax": 1000, "Ymax": 1000,
"dx_max": 15 * 1.0, "dy_max": 15 * 1.0, "dh_max": 3 * 1.0,
"x_RIS": 300, "y_RIS": 300, "z_RIS": 30,
"Mr": 16, "Mc": 16, "a_ris": 0.9,
"P_RIS": 2.0,
"P_ris_unit": 8e-3,
"alpha_los": 2.5,
"alpha_nlos": 4.5, # 增加NLOS损耗
"alpha_ur": 2.5,
"alpha_rg": 2.5,
"xi_sic": 0.001, # 降低SIC误差
"rmin": 3e6, # 提高最低速率要求
"P0": 158.0, "Utip": 120.0, "d0": 0.6, "rho": 1.225,
"s": 0.05, "G": 0.503, "P1": 88.6, "v0": 4.03, "P2": 12.4,
}
# TD3相位优化参数 - 更稳定
td3_phase_params = {
"block_size": 4,
"max_phase_perturb": 0.5, # 增加相位扰动范围
"phase_smooth_penalty": 0.001,
}
# 应用所有优化参数
cfg_A.update(convergence_params)
cfg_B.update(convergence_params)
cfg_A.update(advanced_training_params)
cfg_B.update(advanced_training_params)
cfg_A.update(td3_phase_params)
cfg_B.update(td3_phase_params)
# 用户位置 - 调整位置以增加信道差异
cfg_A["user1"] = np.array([150, 150, 0])
cfg_A["user2"] = np.array([450, 450, 0])
cfg_A["seed"] = 2025
cfg_B["user1"] = np.array([150, 150, 0])
cfg_B["user2"] = np.array([450, 450, 0])
cfg_B["seed"] = 2025
cfg = cfg_A # RIS_UAV_env.py
import numpy as np
class RISEnv2User:
"""优化收敛版本的RIS环境"""
def __init__(self, cfg: dict, ris_mode='random', access_mode='noma'):
assert ris_mode in ('none', 'random', 'td3_optimized')
assert access_mode in ('noma', 'oma')
self.cfg = cfg.copy()
self.ris_mode = ris_mode
self.access_mode = access_mode
self.user1 = cfg['user1'].copy()
self.user2 = cfg['user2'].copy()
self.ris_pos = np.array([cfg['x_RIS'], cfg['y_RIS'], cfg['z_RIS']])
self.rng = np.random.default_rng(self.cfg.get('seed', 2025))
self.phase_blocks = None
self.ris_elements = cfg['Mr'] * cfg['Mc']
# 收敛优化:历史记录
self.reward_history = []
self.max_history = 20
self.reset()
def _calculate_channel_gain_convergent(self, user_pos):
"""改进的信道增益计算"""
c = self.cfg
uav_pos = np.array([self.x, self.y, self.h])
# 距离计算
d_ug = np.linalg.norm(uav_pos - user_pos)
d_ug = max(d_ug, 10.0) # 防止除零
# 改进的LoS概率计算
horizontal_dist = np.linalg.norm(uav_pos[:2] - user_pos[:2])
elevation_angle = np.arctan2(self.h, horizontal_dist) if horizontal_dist > 0 else np.pi / 2
p_los = 1.0 / (1 + 9.61 * np.exp(-0.16 * (elevation_angle * 180 / np.pi - 9.61)))
# 直达链路 - 使用更准确的信道模型
g_dir_los = c['xi'] / (d_ug ** c['alpha_los'])
g_dir_nlos = c['xi'] / (d_ug ** c['alpha_nlos']) * 0.1 # NLOS有额外衰减
g_dir = p_los * g_dir_los + (1 - p_los) * g_dir_nlos
# RIS辅助链路
g_ris = 0.0
if self.ris_mode != 'none':
d_ur = np.linalg.norm(uav_pos - self.ris_pos)
d_ur = max(d_ur, 10.0)
d_rg = np.linalg.norm(self.ris_pos - user_pos)
d_rg = max(d_rg, 10.0)
# RIS路径损耗
path_loss_ur = c['xi'] / (d_ur ** c['alpha_ur'])
path_loss_rg = c['xi'] / (d_rg ** c['alpha_rg'])
if self.ris_mode == 'random':
# 随机相位 - 较低增益
effective_gain = np.sqrt(self.ris_elements) * 0.3
elif self.ris_mode == 'td3_optimized':
if self.phase_blocks is not None:
# 优化相位 - 更高增益
phase_coherence = 1.0 - 0.3 * np.std(self.phase_blocks) / np.pi
effective_gain = self.ris_elements * 0.8 * phase_coherence
else:
effective_gain = self.ris_elements * 0.6
g_ris = c['a_ris'] * effective_gain * path_loss_ur * path_loss_rg
return max(g_dir + g_ris, 1e-20)
def _calculate_rates_convergent(self, g1, g2, power_ratio):
"""改进的速率计算 - 体现NOMA优势"""
c = self.cfg
P, B, sig2 = c['P'], c['B'], c['sigma2']
if self.access_mode == 'noma':
# 确定用户强弱顺序
if g1 >= g2:
g_strong, g_weak = g1, g2
user_order = [1, 2]
else:
g_strong, g_weak = g2, g1
user_order = [2, 1]
# 功率分配 - 弱用户分配更多功率
p_weak = P * np.clip(power_ratio, 0.4, 0.8) # 增加弱用户功率
p_strong = P - p_weak
# SINR计算
sinr_weak = (p_weak * g_weak) / (sig2 + p_strong * g_weak * c['xi_sic'] + 1e-15)
sinr_strong = (p_strong * g_strong) / (sig2 + 1e-15)
# 速率计算
r_weak = B * np.log2(1 + np.clip(sinr_weak, 0, 100))
r_strong = B * np.log2(1 + np.clip(sinr_strong, 0, 100))
if user_order == [1, 2]:
return r_strong, r_weak
else:
return r_weak, r_strong
else:
# OMA - 均分资源和功率
p_per_user = P / 2
bandwidth_per_user = B / 2
sinr1 = (p_per_user * g1) / (sig2 + 1e-15)
sinr2 = (p_per_user * g2) / (sig2 + 1e-15)
r1 = bandwidth_per_user * np.log2(1 + np.clip(sinr1, 0, 100))
r2 = bandwidth_per_user * np.log2(1 + np.clip(sinr2, 0, 100))
return r1, r2
def _improved_reward_function(self, total_rate, ee, g1, g2, power_ratio, kin_violate, r1, r2):
"""改进的奖励函数 - 更好体现性能差异"""
c = self.cfg
reward = 0
# 能效奖励 - 线性奖励
ee_reward = ee / 50000 # 归一化
reward += c['ee_weight'] * ee_reward * 100
# 速率奖励 - 线性奖励
rate_reward = total_rate / 5e7 # 归一化
reward += c['rate_weight'] * rate_reward * 1000
# NOMA性能奖励 - 与OMA对比
if self.access_mode == 'noma':
# 计算OMA基准性能
r1_oma, r2_oma = self._calculate_rates_convergent(g1, g2, 0.5)
oma_rate = r1_oma + r2_oma
if oma_rate > 1e6:
noma_gain = (total_rate - oma_rate) / oma_rate
if noma_gain > 0:
reward += c['noma_bonus'] * min(noma_gain, 1.0)
else:
reward -= 0.5 # NOMA性能不如OMA的惩罚
# RIS性能奖励 - 与无RIS对比
if self.ris_mode != 'none':
# 计算无RIS时的性能
g1_dir = self._calculate_direct_channel_gain(self.user1)
g2_dir = self._calculate_direct_channel_gain(self.user2)
r1_dir, r2_dir = self._calculate_rates_convergent(g1_dir, g2_dir, power_ratio)
dir_rate = r1_dir + r2_dir
if dir_rate > 1e6:
ris_gain = (total_rate - dir_rate) / dir_rate
if ris_gain > 0:
reward += c['ris_bonus'] * min(ris_gain, 1.0)
else:
reward -= 0.3 # RIS性能不如无RIS的惩罚
# 约束惩罚
penalty = 0
if kin_violate:
penalty += c['pen_kin']
if r1 < c['rmin']:
penalty += c['pen_qos'] * (1.0 - r1 / c['rmin'])
if r2 < c['rmin']:
penalty += c['pen_qos'] * (1.0 - r2 / c['rmin'])
# 功率分配合理性奖励
if 0.4 <= power_ratio <= 0.7: # 弱用户应该分配更多功率
reward += 0.2
else:
reward -= 0.1
final_reward = reward - penalty
# 温和裁剪
final_reward = np.clip(final_reward, -2.0, 5.0)
return final_reward
def _calculate_direct_channel_gain(self, user_pos):
"""计算无RIS时的直达信道增益"""
c = self.cfg
uav_pos = np.array([self.x, self.y, self.h])
d_ug = np.linalg.norm(uav_pos - user_pos)
d_ug = max(d_ug, 10.0)
horizontal_dist = np.linalg.norm(uav_pos[:2] - user_pos[:2])
elevation_angle = np.arctan2(self.h, horizontal_dist) if horizontal_dist > 0 else np.pi / 2
p_los = 1.0 / (1 + 9.61 * np.exp(-0.16 * (elevation_angle * 180 / np.pi - 9.61)))
g_dir_los = c['xi'] / (d_ug ** c['alpha_los'])
g_dir_nlos = c['xi'] / (d_ug ** c['alpha_nlos']) * 0.1
g_dir = p_los * g_dir_los + (1 - p_los) * g_dir_nlos
return max(g_dir, 1e-20)
def step(self, action):
"""改进的step函数"""
c, dt = self.cfg, self.cfg['dt']
old_pos = np.array([self.x, self.y, self.h])
# 解析动作
if self.ris_mode == 'td3_optimized':
dx = np.clip(action[0], -1.0, 1.0) * c['dx_max']
dy = np.clip(action[1], -1.0, 1.0) * c['dy_max']
dh = np.clip(action[2], -1.0, 1.0) * c['dh_max']
power_action = np.clip(action[3], -1.0, 1.0)
phase_actions = action[4:] if len(action) > 4 else None
else:
dx = np.clip(action[0], -1.0, 1.0) * c['dx_max']
dy = np.clip(action[1], -1.0, 1.0) * c['dy_max']
dh = np.clip(action[2], -1.0, 1.0) * c['dh_max']
power_action = np.clip(action[3], -1.0, 1.0)
phase_actions = None
# 功率分配 - 弱用户分配40%-80%功率
power_ratio = 0.4 + 0.4 * (power_action * 0.5 + 0.5)
# 位置更新
new_x = np.clip(self.x + dx, 50.0, c['Xmax'] - 50)
new_y = np.clip(self.y + dy, 50.0, c['Ymax'] - 50)
new_h = np.clip(self.h + dh, c['hmin'] + 10, c['hmax'] - 10)
actual_dx = new_x - self.x
actual_dy = new_y - self.y
actual_dh = new_h - self.h
self.x, self.y, self.h = new_x, new_y, new_h
# 运动学约束
vh = np.hypot(actual_dx, actual_dy) / dt
vv = abs(actual_dh) / dt
kin_violate = (vh > c['Vh_max']) or (vv > c['Vv_max'])
# RIS相位更新
phase_penalty = self.update_ris_phases(phase_actions)
# 计算信道增益和速率
g1 = self._calculate_channel_gain_convergent(self.user1)
g2 = self._calculate_channel_gain_convergent(self.user2)
r1, r2 = self._calculate_rates_convergent(g1, g2, power_ratio)
total_rate = r1 + r2
# 计算能效
propulsion_energy = self._propulsion_energy_simple(actual_dx, actual_dy, actual_dh, dt)
transmit_energy = (c['P'] + (0 if self.ris_mode == 'none' else c['P_RIS'])) * dt
total_energy = transmit_energy + propulsion_energy
total_bits = total_rate * dt
ee = total_bits / (total_energy + 1e-20)
# 使用改进的奖励函数
reward = self._improved_reward_function(total_rate, ee, g1, g2, power_ratio, kin_violate, r1, r2)
reward -= phase_penalty
self.time_step += 1
done = (self.time_step >= c['N'])
obs = self._get_observation()
info = {
'ee': ee,
'total_rate': total_rate,
'r1': r1, 'r2': r2,
'g1': g1, 'g2': g2,
'power_ratio': power_ratio,
'position': [self.x, self.y, self.h],
'kin_violate': kin_violate,
'trajectory_reward': reward,
}
return obs, float(reward), done, info
def _propulsion_energy_simple(self, dx, dy, dh, dt):
c = self.cfg
vh = np.hypot(dx, dy) / dt
vv = abs(dh) / dt
base_power = c['P0'] + c['P1'] * vh + c['P2'] * vv
return base_power * dt
def update_ris_phases(self, phase_actions):
if self.ris_mode == 'td3_optimized' and phase_actions is not None and self.phase_blocks is not None:
max_perturb = self.cfg.get('max_phase_perturb', 0.5)
n_blocks = len(self.phase_blocks)
n_actions = len(phase_actions)
if n_actions >= n_blocks:
phase_perturb = np.clip(phase_actions[:n_blocks], -1, 1) * max_perturb
else:
phase_perturb = np.full(n_blocks, np.clip(phase_actions[0], -1, 1) * max_perturb)
new_phases = (self.phase_blocks + phase_perturb) % (2 * np.pi)
self.phase_blocks = new_phases
return np.sum(np.abs(phase_perturb)) * self.cfg.get('phase_smooth_penalty', 0.001)
return 0.0
def _init_ris_phases(self):
if self.ris_mode == 'none':
self.phase_blocks = None
elif self.ris_mode == 'random':
num_blocks = self.ris_elements // self.cfg.get('block_size', 4)
self.phase_blocks = self.rng.uniform(0, 2 * np.pi, num_blocks)
elif self.ris_mode == 'td3_optimized':
num_blocks = self.ris_elements // self.cfg.get('block_size', 4)
self.phase_blocks = np.zeros(num_blocks) # 初始零相位
def reset(self):
c = self.cfg
self.x, self.y, self.h = c['uav_start']
self.time_step = 0
self.reward_history = []
self._init_ris_phases()
return self._get_observation()
def _get_observation(self):
base_obs = [
self.x / 1000.0, self.y / 1000.0, self.h / 200.0,
self.user1[0] / 1000.0, self.user1[1] / 1000.0,
self.user2[0] / 1000.0, self.user2[1] / 1000.0,
self.ris_pos[0] / 1000.0, self.ris_pos[1] / 1000.0
]
if self.ris_mode == 'td3_optimized' and self.phase_blocks is not None:
n_phase_obs = min(3, len(self.phase_blocks))
phase_obs = (self.phase_blocks[:n_phase_obs] / np.pi) - 1
base_obs.extend(phase_obs)
return np.array(base_obs, dtype=np.float32)
class OMAEnv(RISEnv2User):
def __init__(self, cfg: dict, ris_mode='random'):
super().__init__(cfg, ris_mode, 'oma') # TD3.py
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import random
from collections import deque
class Actor(nn.Module):
def __init__(self, state_dim, act_dim, max_action=1.0, hidden_dim=256):
super(Actor, self).__init__()
self.max_action = max_action
self.net = nn.Sequential(
nn.Linear(state_dim, hidden_dim),
nn.ReLU(),
nn.Linear(hidden_dim, hidden_dim),
nn.ReLU(),
nn.Linear(hidden_dim, hidden_dim // 2),
nn.ReLU(),
nn.Linear(hidden_dim // 2, act_dim),
nn.Tanh()
)
def forward(self, state):
return self.max_action * self.net(state)
class Critic(nn.Module):
def __init__(self, state_dim, act_dim, hidden_dim=256):
super(Critic, self).__init__()
# Q1 网络
self.q1_net = nn.Sequential(
nn.Linear(state_dim + act_dim, hidden_dim),
nn.ReLU(),
nn.Linear(hidden_dim, hidden_dim),
nn.ReLU(),
nn.Linear(hidden_dim, hidden_dim // 2),
nn.ReLU(),
nn.Linear(hidden_dim // 2, 1)
)
# Q2 网络
self.q2_net = nn.Sequential(
nn.Linear(state_dim + act_dim, hidden_dim),
nn.ReLU(),
nn.Linear(hidden_dim, hidden_dim),
nn.ReLU(),
nn.Linear(hidden_dim, hidden_dim // 2),
nn.ReLU(),
nn.Linear(hidden_dim // 2, 1)
)
def forward(self, state, action):
sa = torch.cat([state, action], 1)
q1 = self.q1_net(sa)
q2 = self.q2_net(sa)
return q1, q2
def q1(self, state, action):
sa = torch.cat([state, action], 1)
return self.q1_net(sa)
class ReplayBuffer:
def __init__(self, capacity=100000):
self.buffer = deque(maxlen=capacity)
def push(self, state, action, reward, next_state, done):
self.buffer.append((state, action, reward, next_state, done))
def sample(self, batch_size):
if len(self.buffer) < batch_size:
return None
batch = random.sample(self.buffer, batch_size)
state, action, reward, next_state, done = map(np.stack, zip(*batch))
return (
torch.FloatTensor(state),
torch.FloatTensor(action),
torch.FloatTensor(reward).unsqueeze(1),
torch.FloatTensor(next_state),
torch.FloatTensor(done).unsqueeze(1)
)
def __len__(self):
return len(self.buffer)
class TD3:
def __init__(self, state_dim, act_dim, max_action=1.0, device='cpu',
actor_lr=5e-4, critic_lr=5e-4, tau=0.01):
self.device = device
self.max_action = max_action
# 演员网络
self.actor = Actor(state_dim, act_dim, max_action).to(device)
self.actor_target = Actor(state_dim, act_dim, max_action).to(device)
self.actor_target.load_state_dict(self.actor.state_dict())
self.actor_optimizer = optim.Adam(self.actor.parameters(), lr=actor_lr)
# 评论家网络
self.critic = Critic(state_dim, act_dim).to(device)
self.critic_target = Critic(state_dim, act_dim).to(device)
self.critic_target.load_state_dict(self.critic.state_dict())
self.critic_optimizer = optim.Adam(self.critic.parameters(), lr=critic_lr)
# TD3 超参数
self.policy_noise = 0.15
self.noise_clip = 0.3
self.policy_freq = 2
self.total_it = 0
self.gamma = 0.95
self.tau = tau
# 训练记录
self.actor_losses = []
self.critic_losses = []
self.q_values = []
@torch.no_grad()
def select_action(self, state, noise_scale=0.1):
state_tensor = torch.FloatTensor(state.reshape(1, -1)).to(self.device)
action = self.actor(state_tensor).cpu().data.numpy().flatten()
if noise_scale > 0:
noise = np.random.normal(0, noise_scale, size=action.shape)
action = (action + noise).clip(-self.max_action, self.max_action)
return action
def train(self, replay_buffer, batch_size=128):
self.total_it += 1
batch = replay_buffer.sample(batch_size)
if batch is None:
return
state, action, reward, next_state, done = batch
state = state.to(self.device)
action = action.to(self.device)
reward = reward.to(self.device)
next_state = next_state.to(self.device)
done = done.to(self.device)
with torch.no_grad():
# 目标策略平滑
noise = (torch.randn_like(action) * self.policy_noise).clamp(-self.noise_clip, self.noise_clip)
next_action = (self.actor_target(next_state) + noise).clamp(-self.max_action, self.max_action)
# 计算目标Q值
target_q1, target_q2 = self.critic_target(next_state, next_action)
target_q = torch.min(target_q1, target_q2)
target_q = reward + (1 - done) * self.gamma * target_q
target_q = torch.clamp(target_q, -5.0, 10.0) # 调整裁剪范围
# 获取当前Q估计
current_q1, current_q2 = self.critic(state, action)
# 计算评论家损失
critic_loss = nn.MSELoss()(current_q1, target_q) + nn.MSELoss()(current_q2, target_q)
# 优化评论家
self.critic_optimizer.zero_grad()
critic_loss.backward()
torch.nn.utils.clip_grad_norm_(self.critic.parameters(), 1.0)
self.critic_optimizer.step()
self.q_values.append(current_q1.mean().item())
# 延迟策略更新
if self.total_it % self.policy_freq == 0:
actor_actions = self.actor(state)
actor_loss = -self.critic.q1(state, actor_actions).mean()
# 添加策略熵正则化鼓励探索
action_std = torch.std(actor_actions, dim=0).mean()
entropy_bonus = -0.02 * action_std
actor_loss += entropy_bonus
self.actor_optimizer.zero_grad()
actor_loss.backward()
torch.nn.utils.clip_grad_norm_(self.actor.parameters(), 1.0)
self.actor_optimizer.step()
# 软更新目标网络
for param, target_param in zip(self.critic.parameters(), self.critic_target.parameters()):
target_param.data.copy_(self.tau * param.data + (1 - self.tau) * target_param.data)
for param, target_param in zip(self.actor.parameters(), self.actor_target.parameters()):
target_param.data.copy_(self.tau * param.data + (1 - self.tau) * target_param.data)
self.actor_losses.append(actor_loss.item())
self.critic_losses.append(critic_loss.item())
def save(self, filename):
torch.save({
'actor': self.actor.state_dict(),
'critic': self.critic.state_dict(),
'actor_target': self.actor_target.state_dict(),
'critic_target': self.critic_target.state_dict(),
'actor_optimizer': self.actor_optimizer.state_dict(),
'critic_optimizer': self.critic_optimizer.state_dict(),
}, filename)
def load(self, filename):
checkpoint = torch.load(filename, map_location=self.device)
self.actor.load_state_dict(checkpoint['actor'])
self.critic.load_state_dict(checkpoint['critic'])
self.actor_target.load_state_dict(checkpoint['actor_target'])
self.critic_target.load_state_dict(checkpoint['critic_target'])
self.actor_o# run_pytorch.py
import numpy as np
import matplotlib.pyplot as plt
import torch
import random
import time
import os
from RIS_UAV_env import RISEnv2User, OMAEnv
from TD3 import TD3, ReplayBuffer
from config import cfg_A, cfg_B, td3_phase_params
def set_seed(seed):
torch.manual_seed(seed)
np.random.seed(seed)
random.seed(seed)
if torch.cuda.is_available():
torch.cuda.manual_seed(seed)
class ConvergenceMonitor:
"""改进的收敛监控器"""
def __init__(self, window_size=30, threshold=0.01):
self.window_size = window_size
self.threshold = threshold
self.rewards = []
self.converged = False
def update(self, reward):
self.rewards.append(reward)
if len(self.rewards) > self.window_size:
self.rewards.pop(0)
if len(self.rewards) == self.window_size:
recent_mean = np.mean(self.rewards[-self.window_size // 2:])
older_mean = np.mean(self.rewards[:self.window_size // 2])
reward_std = np.std(self.rewards)
if (abs(recent_mean - older_mean) < self.threshold and
reward_std < 0.5 and recent_mean > older_mean):
self.converged = True
return True
return False
def train_td3_agent_advanced(cfg, ris_mode, episodes=400, steps_per_ep=40):
"""高级训练函数"""
print(f"\nAdvanced Training {cfg['name']} - {ris_mode} mode")
# 状态和动作维度
if ris_mode == 'td3_optimized':
phase_dim = 3
action_dim = 4 + phase_dim
state_dim = 9 + phase_dim
else:
action_dim = 4
state_dim = 9
env = RISEnv2User(cfg, ris_mode=ris_mode, access_mode='noma')
# 使用改进的TD3参数
agent = TD3(
state_dim,
action_dim,
max_action=1.0,
device='cpu',
actor_lr=cfg.get('actor_lr', 5e-4),
critic_lr=cfg.get('critic_lr', 5e-4),
tau=cfg.get('tau', 0.01)
)
replay_buffer = ReplayBuffer(capacity=cfg.get('replay_buffer_size', 100000))
# 改进的收敛监控
monitor = ConvergenceMonitor(window_size=35, threshold=0.008)
training_history = {
'episodes': [], 'rewards': [], 'moving_avg_rewards': [],
'ees': [], 'rates': [], 'actor_losses': [], 'critic_losses': [],
'trajectory_rewards': [], 'power_ratios': [], 'q_values': []
}
best_reward = -np.inf
convergence_episode = None
early_stop_count = 0
for episode in range(episodes):
state = env.reset()
episode_reward = 0
episode_ee = 0
episode_rate = 0
episode_trajectory_reward = 0
episode_power_ratios = []
steps = 0
# 改进的噪声衰减 - 指数衰减
current_noise = cfg.get('initial_noise', 0.4) * (cfg.get('noise_decay', 0.995) ** episode)
current_noise = max(current_noise, cfg.get('min_noise', 0.05))
for step in range(steps_per_ep):
action = agent.select_action(state, noise_scale=current_noise)
next_state, reward, done, info = env.step(action)
replay_buffer.push(state, action, reward, next_state, done)
# 改进的训练逻辑
if len(replay_buffer) > cfg.get('warmup_steps', 1000) and step % cfg.get('training_frequency', 2) == 0:
agent.train(replay_buffer, batch_size=cfg.get('batch_size', 128))
state = next_state
episode_reward += reward
episode_ee += info['ee']
episode_rate += info['total_rate']
episode_trajectory_reward += info.get('trajectory_reward', 0)
episode_power_ratios.append(info['power_ratio'])
steps += 1
if done:
break
# 计算平均性能
if steps > 0:
avg_reward = episode_reward / steps
avg_ee = episode_ee / steps
avg_rate = episode_rate / steps
avg_trajectory_reward = episode_trajectory_reward / steps
avg_power_ratio = np.mean(episode_power_ratios)
else:
avg_reward = avg_ee = avg_rate = avg_trajectory_reward = avg_power_ratio = 0
# 更新训练历史
training_history['episodes'].append(episode)
training_history['rewards'].append(avg_reward)
training_history['ees'].append(avg_ee)
training_history['rates'].append(avg_rate)
training_history['trajectory_rewards'].append(avg_trajectory_reward)
training_history['power_ratios'].append(avg_power_ratio)
# 记录Q值 - 只在有训练时记录
if len(agent.q_values) > 0:
training_history['q_values'].append(
np.mean(agent.q_values[-10:]) if len(agent.q_values) >= 10 else agent.q_values[-1])
else:
training_history['q_values'].append(0)
# 计算移动平均
window = min(20, episode + 1)
if episode >= window:
moving_avg = np.mean(training_history['rewards'][-window:])
else:
moving_avg = avg_reward
training_history['moving_avg_rewards'].append(moving_avg)
# 记录损失
if len(agent.actor_losses) > 0:
training_history['actor_losses'].append(agent.actor_losses[-1])
training_history['critic_losses'].append(agent.critic_losses[-1])
else:
training_history['actor_losses'].append(0)
training_history['critic_losses'].append(0)
# 改进的收敛检测
convergence_detected = monitor.update(moving_avg)
if convergence_detected and convergence_episode is None:
convergence_episode = episode
print(f" -> Converged at episode {episode}")
# 更智能的提前停止
if convergence_episode is not None:
early_stop_count += 1
# 如果收敛后性能下降,提前停止
if early_stop_count > 50 and moving_avg < best_reward * 0.95:
print(f" -> Early stopping at episode {episode} (performance degradation)")
break
# 正常收敛后训练足够回合
if early_stop_count > 100:
print(f" -> Early stopping at episode {episode} (sufficient training)")
break
# 保存最佳模型
if avg_reward > best_reward:
best_reward = avg_reward
os.makedirs('models', exist_ok=True)
agent.save(f'models/best_{cfg["name"]}_{ris_mode}.pth')
# 更详细的进度输出
if episode % 35 == 0 or episode < 10:
q_value = training_history['q_values'][-1] if training_history['q_values'] else 0
print(f"Episode {episode:4d} | Reward: {avg_reward:7.3f} | "
f"EE: {avg_ee / 1000:7.1f}K | Rate: {avg_rate / 1e6:6.2f} Mbps | "
f"Noise: {current_noise:.3f} | Q: {q_value:6.2f}")
return agent, training_history, convergence_episode
def comprehensive_evaluation(cfg, agents, num_episodes=20):
"""全面性能评估"""
print(f"\nEvaluating {cfg['name']} configuration...")
schemes = {
'NOMA - No RIS': ('noma', 'none'),
'NOMA - Random RIS': ('noma', 'random'),
'NOMA - TD3 Optimized': ('noma', 'td3_optimized'),
'OMA - No RIS': ('oma', 'none'),
'OMA - Random RIS': ('oma', 'random'),
}
results = {}
trajectory_data = {}
for scheme_name, (access, ris) in schemes.items():
print(f" Evaluating {scheme_name}...")
all_ees, all_rates, all_positions = [], [], []
for ep in range(num_episodes):
episode_seed = cfg['seed'] + ep * 100
set_seed(episode_seed)
if access == 'oma':
env = OMAEnv(cfg, ris_mode=ris)
else:
env = RISEnv2User(cfg, ris_mode=ris, access_mode=access)
state = env.reset()
episode_ees, episode_rates, episode_positions = [], [], []
for t in range(cfg['N']):
if access == 'noma' and ris == 'td3_optimized' and ris in agents:
agent = agents[ris]
action = agent.select_action(state, noise_scale=0.0)
else:
action = heuristic_policy(state, env, ris, access)
next_state, reward, done, info = env.step(action)
state = next_state
episode_ees.append(info['ee'])
episode_rates.append(info['total_rate'])
episode_positions.append(info['position'])
if done:
break
all_ees.extend(episode_ees)
all_rates.extend(episode_rates)
all_positions.append(episode_positions)
results[scheme_name] = {
'ee_mean': np.mean(all_ees),
'ee_std': np.std(all_ees),
'rate_mean': np.mean(all_rates),
'rate_std': np.std(all_rates),
'ee_samples': all_ees,
'rate_samples': all_rates
}
trajectory_data[scheme_name] = all_positions
return results, trajectory_data
def heuristic_policy(state, env, ris_mode, access_mode):
"""启发式策略"""
action_dim = 4
if ris_mode == 'td3_optimized':
phase_dim = 3
action_dim = 4 + phase_dim
action = np.zeros(action_dim)
uav_x, uav_y = state[0] * 1000, state[1] * 1000
user1_x, user1_y = env.user1[0], env.user1[1]
user2_x, user2_y = env.user2[0], env.user2[1]
ris_x, ris_y = env.ris_pos[0], env.ris_pos[1]
center_x, center_y = (user1_x + user2_x) / 2, (user1_y + user2_y) / 2
to_center = np.array([center_x - uav_x, center_y - uav_y])
to_ris = np.array([ris_x - uav_x, ris_y - uav_y])
to_center_norm = np.linalg.norm(to_center)
to_ris_norm = np.linalg.norm(to_ris)
if to_center_norm > 0:
to_center = to_center / to_center_norm
if to_ris_norm > 0:
to_ris = to_ris / to_ris_norm
if ris_mode == 'td3_optimized':
direction = 0.6 * to_ris + 0.4 * to_center
elif ris_mode == 'random':
direction = 0.4 * to_ris + 0.6 * to_center
else:
direction = to_center
action[0] = direction[0] * 0.8
action[1] = direction[1] * 0.8
action[2] = 0.1 # 轻微爬升
if access_mode == 'noma':
action[3] = 0.2 # 弱用户分配更多功率
else:
action[3] = 0.0
if ris_mode == 'td3_optimized' and len(action) > 4:
action[4:] = np.random.uniform(-0.3, 0.3, len(action) - 4)
return action
def plot_training_curves(training_histories, cfg_name):
"""绘制训练曲线 - 英文标签"""
fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2, figsize=(15, 10))
colors = ['blue', 'red', 'green']
ris_modes = ['none', 'random', 'td3_optimized']
labels = ['No RIS', 'Random RIS', 'TD3 Optimized']
for i, ris_mode in enumerate(ris_modes):
if ris_mode in training_histories:
hist = training_histories[ris_mode]
episodes = hist['episodes']
ax1.plot(episodes, hist['rewards'], color=colors[i], alpha=0.6, label=labels[i])
ax1.plot(episodes, hist['moving_avg_rewards'], color=colors[i], linewidth=2)
ax2.plot(episodes, np.array(hist['ees']) / 1000, color=colors[i], label=labels[i])
ax3.plot(episodes, np.array(hist['rates']) / 1e6, color=colors[i], label=labels[i])
ax1.set_title(f'{cfg_name} - Training Rewards')
ax1.set_xlabel('Episode')
ax1.set_ylabel('Average Reward')
ax1.legend()
ax1.grid(True, alpha=0.3)
ax2.set_title(f'{cfg_name} - Energy Efficiency')
ax2.set_xlabel('Episode')
ax2.set_ylabel('EE (Kbits/J)')
ax2.legend()
ax2.grid(True, alpha=0.3)
ax3.set_title(f'{cfg_name} - Data Rate')
ax3.set_xlabel('Episode')
ax3.set_ylabel('Rate (Mbps)')
ax3.legend()
ax3.grid(True, alpha=0.3)
if 'td3_optimized' in training_histories:
hist = training_histories['td3_optimized']
if len(hist['actor_losses']) > 0 and len(hist['actor_losses']) == len(hist['episodes']):
ax4.plot(hist['episodes'], hist['actor_losses'], label='Actor Loss', color='blue')
ax4.plot(hist['episodes'], hist['critic_losses'], label='Critic Loss', color='red')
ax4.set_title('TD3 Training Losses')
ax4.set_xlabel('Episode')
ax4.set_ylabel('Loss')
ax4.legend()
ax4.grid(True, alpha=0.3)
plt.tight_layout()
plt.savefig(f'training_curves_{cfg_name}.png', dpi=300, bbox_inches='tight')
plt.show()
def plot_performance_comparison(all_results, cfg_name):
"""性能对比图 - 英文标签"""
schemes = ['NOMA - No RIS', 'NOMA - Random RIS', 'NOMA - TD3 Optimized',
'OMA - No RIS', 'OMA - Random RIS']
ees, ee_stds = [], []
rates, rate_stds = [], []
for scheme in schemes:
if scheme in all_results:
result = all_results[scheme]
ees.append(result['ee_mean'] / 1000)
ee_stds.append(result['ee_std'] / 1000)
rates.append(result['rate_mean'] / 1e6)
rate_stds.append(result['rate_std'] / 1e6)
else:
ees.append(0)
ee_stds.append(0)
rates.append(0)
rate_stds.append(0)
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(14, 6))
x = np.arange(len(schemes))
width = 0.35
bars1 = ax1.bar(x, ees, width, yerr=ee_stds, capsize=5, alpha=0.7,
color=['blue', 'green', 'red', 'orange', 'purple'])
ax1.set_xlabel('Scheme')
ax1.set_ylabel('Energy Efficiency (Kbits/J)')
ax1.set_title(f'{cfg_name} - Energy Efficiency Comparison')
ax1.set_xticks(x)
ax1.set_xticklabels(['NOMA\nNo RIS', 'NOMA\nRandom', 'NOMA\nTD3',
'OMA\nNo RIS', 'OMA\nRandom'], rotation=45, ha='right')
ax1.grid(True, alpha=0.3)
for i, bar in enumerate(bars1):
height = bar.get_height()
ax1.text(bar.get_x() + bar.get_width() / 2., height + max(ees) * 0.02,
f'{height:.0f}K', ha='center', va='bottom', fontsize=9)
bars2 = ax2.bar(x, rates, width, yerr=rate_stds, capsize=5, alpha=0.7,
color=['blue', 'green', 'red', 'orange', 'purple'])
ax2.set_xlabel('Scheme')
ax2.set_ylabel('Data Rate (Mbps)')
ax2.set_title(f'{cfg_name} - Data Rate Comparison')
ax2.set_xticks(x)
ax2.set_xticklabels(['NOMA\nNo RIS', 'NOMA\nRandom', 'NOMA\nTD3',
'OMA\nNo RIS', 'OMA\nRandom'], rotation=45, ha='right')
ax2.grid(True, alpha=0.3)
for i, bar in enumerate(bars2):
height = bar.get_height()
ax2.text(bar.get_x() + bar.get_width() / 2., height + max(rates) * 0.02,
f'{height:.1f}', ha='center', va='bottom', fontsize=9)
plt.tight_layout()
plt.savefig(f'performance_comparison_{cfg_name}.png', dpi=300, bbox_inches='tight')
plt.show()
def plot_ee_cdf(all_results, cfg_name):
"""EE CDF曲线 - 英文标签"""
plt.figure(figsize=(10, 6))
schemes = ['NOMA - No RIS', 'NOMA - Random RIS', 'NOMA - TD3 Optimized']
colors = ['blue', 'green', 'red']
linestyles = ['--', '-.', '-']
for i, scheme in enumerate(schemes):
if scheme in all_results:
data = np.array(all_results[scheme]['ee_samples']) / 1000
sorted_data = np.sort(data)
cdf = np.arange(1, len(sorted_data) + 1) / len(sorted_data)
plt.plot(sorted_data, cdf, label=scheme, color=colors[i],
linestyle=linestyles[i], linewidth=2)
plt.xlabel('Energy Efficiency (Kbits/J)')
plt.ylabel('CDF')
plt.title(f'{cfg_name} - Energy Efficiency CDF')
plt.legend()
plt.grid(True, alpha=0.3)
plt.savefig(f'ee_cdf_{cfg_name}.png', dpi=300, bbox_inches='tight')
plt.show()
def plot_trajectories(trajectory_data, cfg, cfg_name):
"""UAV轨迹图 - 英文标签"""
plt.figure(figsize=(12, 8))
schemes = ['NOMA - No RIS', 'NOMA - Random RIS', 'NOMA - TD3 Optimized']
colors = ['blue', 'green', 'red']
plt.scatter(cfg['user1'][0], cfg['user1'][1], s=200, marker='^', color='black', label='User 1', zorder=5)
plt.scatter(cfg['user2'][0], cfg['user2'][1], s=200, marker='^', color='gray', label='User 2', zorder=5)
plt.scatter(cfg['x_RIS'], cfg['y_RIS'], s=200, marker='s', color='orange', label='RIS', zorder=5)
plt.scatter(cfg['uav_start'][0], cfg['uav_start'][1], s=100, marker='o', color='purple', label='Start Point',
zorder=5)
for i, scheme in enumerate(schemes):
if scheme in trajectory_data:
trajectories = trajectory_data[scheme]
# 取第一个轨迹进行展示
if len(trajectories) > 0:
trajectory = trajectories[0]
x_pos = [pos[0] for pos in trajectory]
y_pos = [pos[1] for pos in trajectory]
plt.plot(x_pos, y_pos, color=colors[i], linewidth=2, label=scheme, alpha=0.8)
plt.scatter(x_pos[::5], y_pos[::5], color=colors[i], s=20, alpha=0.6)
plt.xlabel('X Coordinate (m)')
plt.ylabel('Y Coordinate (m)')
plt.title(f'{cfg_name} - UAV Trajectory Comparison')
plt.legend()
plt.grid(True, alpha=0.3)
plt.axis('equal')
plt.savefig(f'trajectories_{cfg_name}.png', dpi=300, bbox_inches='tight')
plt.show()
def plot_convergence_analysis(training_histories, cfg_name):
"""收敛分析图 - 英文标签"""
plt.figure(figsize=(12, 4))
ris_modes = ['none', 'random', 'td3_optimized']
colors = ['blue', 'green', 'red']
labels = ['No RIS', 'Random RIS', 'TD3 Optimized']
for i, ris_mode in enumerate(ris_modes):
if ris_mode in training_histories:
hist = training_histories[ris_mode]
episodes = hist['episodes']
moving_avg = hist['moving_avg_rewards']
plt.plot(episodes, moving_avg, color=colors[i], linewidth=2, label=labels[i])
plt.xlabel('Training Episode')
plt.ylabel('Moving Average Reward')
plt.title(f'{cfg_name} - Convergence Performance Comparison')
plt.legend()
plt.grid(True, alpha=0.3)
plt.savefig(f'convergence_{cfg_name}.png', dpi=300, bbox_inches='tight')
plt.show()
def plot_joint_optimization_results(training_histories, cfg_name):
"""三变量联合优化结果 - 英文标签"""
fig, axes = plt.subplots(2, 3, figsize=(18, 10))
ris_modes = ['none', 'random', 'td3_optimized']
colors = ['blue', 'green', 'red']
labels = ['No RIS', 'Random RIS', 'TD3 Optimized']
for i, ris_mode in enumerate(ris_modes):
if ris_mode in training_histories:
hist = training_histories[ris_mode]
episodes = hist['episodes']
axes[0, 0].plot(episodes, hist['rewards'], color=colors[i], alpha=0.7, label=labels[i])
axes[0, 0].set_title('Total Reward')
axes[0, 0].set_ylabel('Reward')
axes[0, 0].grid(True, alpha=0.3)
axes[0, 1].plot(episodes, np.array(hist['ees']) / 1000, color=colors[i], label=labels[i])
axes[0, 1].set_title('Energy Efficiency')
axes[0, 1].set_ylabel('EE (Kbits/J)')
axes[0, 1].grid(True, alpha=0.3)
if 'trajectory_rewards' in hist and len(hist['trajectory_rewards']) == len(episodes):
axes[0, 2].plot(episodes, hist['trajectory_rewards'], color=colors[i], label=labels[i])
axes[0, 2].set_title('Trajectory Reward')
axes[0, 2].set_ylabel('Reward')
axes[0, 2].grid(True, alpha=0.3)
if 'power_ratios' in hist and len(hist['power_ratios']) == len(episodes):
axes[1, 0].plot(episodes, hist['power_ratios'], color=colors[i], label=labels[i])
axes[1, 0].set_title('Power Allocation Ratio')
axes[1, 0].set_ylabel('Power Ratio')
axes[1, 0].set_ylim(0.1, 0.9)
axes[1, 0].grid(True, alpha=0.3)
# 修复Q值绘图维度问题
if ris_mode == 'td3_optimized' and 'q_values' in hist and len(hist['q_values']) > 0:
q_episodes = episodes[:len(hist['q_values'])]
axes[1, 1].plot(q_episodes, hist['q_values'], color='purple', label='Q Value', linewidth=2)
axes[1, 1].set_title('Q Value Evolution')
axes[1, 1].set_ylabel('Q Value')
axes[1, 1].legend()
axes[1, 1].grid(True, alpha=0.3)
for i, ris_mode in enumerate(ris_modes):
if ris_mode in training_histories:
hist = training_histories[ris_mode]
if len(hist['rates']) == len(hist['episodes']):
axes[1, 2].plot(hist['episodes'], np.array(hist['rates']) / 1e6,
color=colors[i], label=labels[i])
axes[1, 2].set_title('Data Rate')
axes[1, 2].set_ylabel('Rate (Mbps)')
axes[1, 2].legend()
axes[1, 2].grid(True, alpha=0.3)
fig.suptitle(f'{cfg_name} - Joint Three-Variable Optimization Performance', fontsize=16, y=0.98)
plt.tight_layout()
plt.savefig(f'joint_optimization_{cfg_name}.png', dpi=300, bbox_inches='tight')
plt.show()
def analyze_joint_optimization(training_histories, cfg_name):
"""分析三变量优化效果"""
print(f"\n{cfg_name} - Joint Optimization Analysis:")
print("-" * 50)
if 'td3_optimized' in training_histories:
hist = training_histories['td3_optimized']
final_reward = hist['rewards'][-1] if hist['rewards'] else 0
final_ee = hist['ees'][-1] if hist['ees'] else 0
final_rate = hist['rates'][-1] if hist['rates'] else 0
if 'power_ratios' in hist and len(hist['power_ratios']) > 0:
power_data = hist['power_ratios'][-20:] if len(hist['power_ratios']) >= 20 else hist['power_ratios']
power_std = np.std(power_data)
avg_power = np.mean(power_data)
print(f"Final Performance:")
print(f" - Reward: {final_reward:.3f}")
print(f" - Energy Efficiency: {final_ee / 1000:.1f} Kbits/J")
print(f" - Data Rate: {final_rate / 1e6:.2f} Mbps")
print(f"Power Allocation Analysis:")
print(f" - Average: {avg_power:.3f}")
print(f" - Stability: {power_std:.4f} (std)")
if 0.4 <= avg_power <= 0.7:
print(" ✓ Power allocation follows NOMA principles")
else:
print(" ⚠ Power allocation needs adjustment")
def main():
"""主函数"""
print("=== RIS-assisted UAV Communication System Energy Efficiency Optimization ===")
print("Start time:", time.strftime("%Y-%m-%d %H:%M:%S"))
cfgs = {
'Sub6': cfg_A,
'mmWave': cfg_B
}
all_training_results = {}
all_evaluation_results = {}
all_trajectory_data = {}
for cfg_name, cfg in cfgs.items():
print(f"\n{'=' * 60}")
print(f"Processing {cfg_name} configuration")
print(f"{'=' * 60}")
set_seed(cfg['seed'])
agents = {}
training_histories = {}
ris_modes = ['none', 'random', 'td3_optimized']
for ris_mode in ris_modes:
agent, history, conv_ep = train_td3_agent_advanced(cfg, ris_mode, episodes=400)
agents[ris_mode] = agent
training_histories[ris_mode] = history
if conv_ep is not None:
print(f" {ris_mode}: Converged at episode {conv_ep}")
else:
print(f" {ris_mode}: Not fully converged")
evaluation_results, trajectory_data = comprehensive_evaluation(cfg, agents)
all_training_results[cfg_name] = training_histories
all_evaluation_results[cfg_name] = evaluation_results
all_trajectory_data[cfg_name] = trajectory_data
plot_training_curves(training_histories, cfg_name)
plot_performance_comparison(evaluation_results, cfg_name)
plot_ee_cdf(evaluation_results, cfg_name)
plot_trajectories(trajectory_data, cfg, cfg_name)
plot_convergence_analysis(training_histories, cfg_name)
plot_joint_optimization_results(training_histories, cfg_name)
analyze_joint_optimization(training_histories, cfg_name)
print(f"\n{'=' * 60}")
print("Performance Analysis Summary")
print(f"{'=' * 60}")
for cfg_name in cfgs.keys():
print(f"\n{cfg_name} Configuration:")
results = all_evaluation_results[cfg_name]
if 'NOMA - TD3 Optimized' in results and 'OMA - No RIS' in results:
td3_ee = results['NOMA - TD3 Optimized']['ee_mean']
oma_ee = results['OMA - No RIS']['ee_mean']
improvement = (td3_ee - oma_ee) / oma_ee * 100 if oma_ee > 0 else 0
td3_rate = results['NOMA - TD3 Optimized']['rate_mean']
oma_rate = results['OMA - No RIS']['rate_mean']
rate_improvement = (td3_rate - oma_rate) / oma_rate * 100 if oma_rate > 0 else 0
print(f" TD3 Optimized vs OMA No RIS:")
print(f" - EE Improvement: {improvement:+.1f}%")
print(f" - Rate Improvement: {rate_improvement:+.1f}%")
if improvement > 0:
print(" ✓ Validates performance advantages of RIS and NOMA")
if 'NOMA - TD3 Optimized' in results and 'NOMA - Random RIS' in results:
td3_ee = results['NOMA - TD3 Optimized']['ee_mean']
random_ee = results['NOMA - Random RIS']['ee_mean']
improvement = (td3_ee - random_ee) / random_ee * 100 if random_ee > 0 else 0
print(f" TD3 Optimized vs Random RIS:")
print(f" - EE Improvement: {improvement:+.1f}%")
if improvement > 0:
print(" ✓ Validates optimization effectiveness of TD3 algorithm")
else:
print(" ⚠ TD3 optimization needs improvement")
print(f"\nSimulation completed at: {time.strftime('%Y-%m-%d %H:%M:%S')}")
print("All charts saved as PNG files")
if __name__ == "__main__":
main()ptimizer.load_state_dict(checkpoint['actor_optimizer'])
self.critic_optimizer.load_state_dict(checkpoint['critic_optimizer']) 以上代码有什么问题吗
最新发布