import os
import time
import itertools
import math
import numpy as np
import scipy as sp
import scipy.sparse as sps
from scipy.sparse.linalg import splu
import torch
import torch.nn as nn
import pyamg
from scipy.sparse import csr_matrix, isspmatrix_csr, diags
from pyamg.multilevel import multilevel_solver
from warnings import warn
from scipy.sparse import csr_matrix, isspmatrix_csr, SparseEfficiencyWarning
from pyamg.relaxation.smoothing import change_smoothers
device = 'cpu'
# ========== 辅助函数 ==========
def prolongation_fn(grid_size):
res_stencil = np.zeros((3,3), dtype=np.double)
k=16
res_stencil[0,0] = 1/k
res_stencil[0,1] = 2/k
res_stencil[0,2] = 1/k
res_stencil[1,0] = 2/k
res_stencil[1,1] = 4/k
res_stencil[1,2] = 2/k
res_stencil[2,0] = 1/k
res_stencil[2,1] = 2/k
res_stencil[2,2] = 1/k
P_stencils = np.zeros((grid_size//2, grid_size//2, 3, 3))
for i in range(grid_size//2):
for j in range(grid_size//2):
P_stencils[i,j,:,:] = res_stencil
return compute_p2(P_stencils, grid_size).astype(np.double)
def compute_p2(P_stencil, grid_size):
indexes = get_p_matrix_indices_one(grid_size)
P = csr_matrix((P_stencil.reshape(-1), (indexes[:, 1], indexes[:, 0])),
shape=((grid_size//2) ** 2, (grid_size) ** 2))
return P
def get_p_matrix_indices_one(grid_size):
K = map_2_to_1(grid_size=grid_size)
indices = []
for ic in range(grid_size // 2):
i = 2 * ic + 1
for jc in range(grid_size // 2):
j = 2 * jc + 1
J = int(grid_size // 2 * jc + ic)
for k in range(3):
for m in range(3):
I = int(K[i, j, k, m])
indices.append([I, J])
return np.array(indices)
def map_2_to_1(grid_size=8):
k = np.zeros((grid_size, grid_size, 3, 3))
M = np.reshape(np.arange(grid_size ** 2), (grid_size, grid_size)).T
M = np.concatenate([M, M], axis=0)
M = np.concatenate([M, M], axis=1)
for i in range(3):
I = (i - 1) % grid_size
for j in range(3):
J = (j - 1) % grid_size
k[:, :, i, j] = M[I:I + grid_size, J:J + grid_size]
return k
def diffusion_stencil_2d(epsilon=1.0, theta=0.0, type='FD'):
eps = float(epsilon)
theta = float(theta)
C = np.cos(theta)
S = np.sin(theta)
CS = C*S
CC = C**2
SS = S**2
if type == 'FE':
a = (-1*eps - 1)*CC + (-1*eps - 1)*SS + (3*eps - 3)*CS
b = (2*eps - 4)*CC + (-4*eps + 2)*SS
c = (-1*eps - 1)*CC + (-1*eps - 1)*SS + (-3*eps + 3)*CS
d = (-4*eps + 2)*CC + (2*eps - 4)*SS
e = (8*eps + 8)*CC + (8*eps + 8)*SS
stencil = np.array([[a, b, c],[d, e, d],[c, b, a]]) / 6.0
elif type == 'FD':
a = -0.5*(eps - 1)*CS
b = -(eps*SS + CC)
c = -a
d = -(eps*CC + SS)
e = 2.0*(eps + 1)
stencil = np.array([[a, d, c],[b, e, b],[c, d, a]])
return stencil
def coo_to_tensor(coo):
values = coo.data.astype(np.float64)
indices = np.vstack((coo.row, coo.col))
i = torch.LongTensor(indices)
v = torch.DoubleTensor(values)
shape = coo.shape
return torch.sparse_coo_tensor(i, v, torch.Size(shape)).to(device)
# ========== 光滑算子 ==========
def neural_smoother(net, size, mixed=0):
# 返回PyTorch张量而不是SciPy矩阵
if mixed == 1:
I = torch.eye(size*size, dtype=torch.double, device=device)
x0 = I
for conv_layer in net.convLayers1:
kernel = conv_layer.weight.detach().view(3, 3)
M = toeplitz_conv(kernel, size)
x0 = torch.mm(M, x0)
return x0
else:
I = torch.eye(size*size, dtype=torch.double, device=device)
x0 = I
for conv_layer in net.convLayers1:
kernel = conv_layer.weight.detach().view(3, 3)
M = toeplitz_conv(kernel, size)
x0 = torch.mm(M, x0)
kernel2 = net.convLayers2[0].weight.detach().view(3, 3)
M2 = toeplitz_conv(kernel2, size)
y = x0 + (2/3) * M2
return y
def toeplitz_conv(kernel, size):
# 将3x3卷积核转换为Toeplitz矩阵
full_size = size * size
M = torch.zeros(full_size, full_size, dtype=torch.double, device=device)
for i in range(size):
for j in range(size):
idx = i * size + j
for di in [-1, 0, 1]:
for dj in [-1, 0, 1]:
ni, nj = i + di, j + dj
if 0 <= ni < size and 0 <= nj < size:
nidx = ni * size + nj
k_val = kernel[di+1, dj+1]
M[idx, nidx] = k_val
return M
# ========== Level 创建 ==========
def create_levels(eps, theta, n):
mxl = 5 # max levels
levels = []
# 创建最细层
s = diffusion_stencil_2d(eps, theta * np.pi / 180, 'FD') * 2
A = pyamg.gallery.stencil_grid(s, (n, n)).tocsr()
# 创建第一层 - 使用PyAMG的level类而不是字典
level0 = multilevel_solver.level()
level0.A = A
level0.N = n
level0.l = A.shape[0]
levels.append(level0)
current_n = n
for i in range(1, mxl): # 因为已经有一层,所以从1开始
# 获取当前最细层(最后一层)
fine_level = levels[-1]
current_n = fine_level.N
# 创建限制算子
R = prolongation_fn(current_n)
# 插值算子是限制算子的转置
P = R.T * 4
# 存储到当前层(细层)
fine_level.R = R
fine_level.P = P
# 为下一层准备:计算粗网格矩阵
A_coarse = R @ fine_level.A @ P
# 创建粗网格层
coarse_level = multilevel_solver.level()
coarse_level.A = A_coarse
coarse_level.N = current_n // 2 # 网格大小减半
coarse_level.l = A_coarse.shape[0]
levels.append(coarse_level)
# 检查是否达到最小网格
if coarse_level.N < 8:
break
return levels
# ========== Problem Class ==========
class Problem:
def __init__(self, eps, theta, grid_size, k=20, initial_ground_truth=None, initial_u=None,
levels=None, net_trained=None, mxl=0):
self.eps = eps
self.theta = theta
self.grid_size = grid_size
if levels is None:
levels = create_levels(eps, theta, grid_size)
self.levels = levels
N = levels[0].N
l = levels[0].l
# 初始化真实解
if initial_ground_truth is None:
self.ground_truth = torch.rand(l, 1, dtype=torch.double, device=device, requires_grad=False)
else:
self.ground_truth = initial_ground_truth.detach().requires_grad_(False)
# 初始解
if initial_u is None:
self.initial_u = torch.rand(l, 1, dtype=torch.double, device=device, requires_grad=False)
else:
self.initial_u = initial_u.detach().requires_grad_(False)
self.k = k
self.N = N
self.levels = levels
self.mxl = mxl
self.net_trained = net_trained or []
# 冻结预训练网络的参数
for net in self.net_trained:
for param in net.parameters():
param.requires_grad = False
# 使用SciPy稀疏矩阵计算右端项
A_sparse = self.levels[0].A
gt_numpy = self.ground_truth.detach().cpu().numpy().flatten()
f_numpy = A_sparse @ gt_numpy
self.f = torch.tensor(f_numpy, dtype=torch.double, device=device).view(-1, 1).requires_grad_(False)
def compute_solution(self, net):
with torch.no_grad(): # 禁用梯度计算
A_sparse = self.levels[0].A # SciPy稀疏矩阵
b = self.f.detach().cpu().numpy().flatten()
# 创建多重网格求解器
solver_a_CNN = multigrid_solver(A_sparse, self.grid_size,
{'smoother': 'a-CNN', 'eps': self.eps, 'theta': self.theta},
net,
self.net_trained,
self.mxl)
u_solution = solver_a_CNN.solve(b, maxiter=10, tol=1e-6)
return torch.tensor(u_solution, dtype=torch.double, device=device).view(-1, 1)
# ========== 求解器 ==========
def multigrid_solver(A, size, args, net, net_trained, mxl):
solver = geometric_solver(A, prolongation_fn, max_levels=5, coarse_solver='splu')
if net_trained!=0:
nets = [net]+net_trained
else:
nets = [net]
if args['smoother'] == 'a-CNN':
# mxl最大是5 i in range(4) 0 1 2 3
for i in range(mxl-1):
# 创建当前层的光滑算子
M = neural_smoother(nets[i], size// (2 ** i ))
# 定义光滑函数 - 修改后版本
def relax(A, x, b, M_new=M):
# 计算残差 (使用NumPy的稀疏矩阵操作)
r = b - A.dot(x)
# 转换为PyTorch张量进行矩阵乘法
r_tensor = torch.tensor(r, dtype=torch.double, device='cpu').view(-1, 1)
correction = M_new @ r_tensor
# 转回NumPy并更新解
x += correction.view(-1).cpu().numpy()
# 设置光滑器
solver.levels[i].presmoother = relax
solver.levels[i].postsmoother = relax
return solver
def geometric_solver(A, prolongation_function,
presmoother=('gauss_seidel', {'sweep': 'forward'}),
postsmoother=('gauss_seidel', {'sweep': 'forward'}),
max_levels=5, max_coarse=10, coarse_solver='splu', **kwargs):
levels = [multilevel_solver.level()]
# convert A to csr
if not isspmatrix_csr(A):
try:
A = csr_matrix(A)
warn("Implicit conversion of A to CSR", SparseEfficiencyWarning)
except BaseException:
raise TypeError('Argument A must have type csr_matrix, or be convertible to csr_matrix')
# preprocess A
A = A.asfptype()
if A.shape[0] != A.shape[1]:
raise ValueError('expected square matrix')
levels[-1].A = A
while len(levels) < max_levels and levels[-1].A.shape[0] > max_coarse:
extend_hierarchy(levels, prolongation_function)
# 使用MultilevelSolver代替弃用的multilevel_solver
ml = pyamg.multilevel.MultilevelSolver(levels, **kwargs)
change_smoothers(ml, presmoother, postsmoother)
return ml
# internal function
def extend_hierarchy(levels, prolongation_fn):
"""Extend the multigrid hierarchy."""
A = levels[-1].A
N = A.shape[0]
n = int(math.sqrt(N))
R = prolongation_fn(n)
P = R.T.tocsr() * 4
levels[-1].P = P # prolongation operator
levels[-1].R = R # restriction operator
levels.append(multilevel_solver.level())
# Form next level through Galerkin product
A = R * A * P
A = A.astype(np.float64) # convert from complex numbers, should have A.imag==0
levels[-1].A = A
# ========== 神经网络模型 ==========
class _ConvNet_(nn.Module):
def __init__(self, initial=5, kernel_size=3, initial_kernel=0.1):
super(_ConvNet_, self).__init__()
self.convLayers1 = nn.ModuleList([
nn.Conv2d(1, 1, kernel_size, padding=kernel_size//2, bias=False).double()
for _ in range(5)
])
self.convLayers2 = nn.ModuleList([
nn.Conv2d(1, 1, kernel_size, padding=kernel_size//2, bias=False).double()
for _ in range(2)
])
# 初始化权重
initial_weights = torch.zeros(1, 1, kernel_size, kernel_size, dtype=torch.double)
initial_weights[0, 0, kernel_size//2, kernel_size//2] = initial_kernel
for net in self.convLayers1:
net.weight = nn.Parameter(initial_weights.clone())
for net in self.convLayers2:
net.weight = nn.Parameter(initial_weights.clone())
def forward(self, x):
y1 = x
y2 = x
for net in self.convLayers1:
y1 = torch.tanh(net(y1))
for net in self.convLayers2:
y2 = torch.tanh(net(y2))
return y1 + (2/3) * y2
def compute_loss(net, problem_instances):
loss = torch.zeros(1, device=device, requires_grad=True)
for problem in problem_instances:
# 确保计算图连接
with torch.set_grad_enabled(True):
u_pred = problem.compute_solution(net)
u_true = problem.ground_truth
# 确保梯度可以回传
u_pred.requires_grad_(True)
u_true.requires_grad_(False)
# 计算损失
diff = u_pred - u_true
norm_diff = torch.norm(diff)
norm_true = torch.norm(u_true)
loss = loss + norm_diff / norm_true
return loss
def chunks(l, n):
for i in range(0, len(l), n):
yield l[i:i + n]
def set_seed(seed):
torch.manual_seed(seed)
np.random.seed(seed)
# ========== AlphaCNN ==========
class alphaCNN:
def __init__(self, net=None, batch_size=1, learning_rate=1e-6, max_epochs=1000,
nb_layers=5, tol=1e-6, stable_count=50, optimizer='SGD',
check_spectral_radius=False, random_seed=None, kernel_size=3, initial_kernel=0.1):
if random_seed is not None:
set_seed(random_seed)
if net is None:
self.net = _ConvNet_(initial=5, kernel_size=kernel_size, initial_kernel=initial_kernel).to(device)
else:
self.net = net
# 确保网络参数需要梯度
for param in self.net.parameters():
param.requires_grad = True
self.learning_rate = learning_rate
if optimizer == 'Adadelta':
self.optim = torch.optim.Adadelta(self.net.parameters(), lr=learning_rate)
elif optimizer == 'Adam':
self.optim = torch.optim.Adam(self.net.parameters(), lr=learning_rate)
else:
self.optim = torch.optim.SGD(self.net.parameters(), lr=learning_rate)
self.batch_size = batch_size
self.max_epochs = max_epochs
self.tol = tol
self.stable_count = stable_count
def _optimization_step_(self, problem_instances):
shuffled_problem_instances = np.random.permutation(problem_instances)
for problem_chunk in chunks(shuffled_problem_instances, self.batch_size):
self.optim.zero_grad()
loss = compute_loss(self.net, problem_chunk)
# 检查梯度是否存在
if loss.grad_fn is None:
raise RuntimeError("Loss has no gradient. Check the computation graph.")
loss.backward()
self.optim.step()
# 确保梯度被应用
with torch.no_grad():
for param in self.net.parameters():
if param.grad is not None:
param -= self.learning_rate * param.grad
def fit(self, problem_instances):
losses = []
prev_total_loss = compute_loss(self.net, problem_instances).item()
convergence_counter = 0
problem_number = len(problem_instances)
for n_epoch in range(self.max_epochs):
start_time = time.time()
self._optimization_step_(problem_instances)
total_loss = compute_loss(self.net, problem_instances).item()
losses.append(total_loss)
if np.abs(total_loss - prev_total_loss) < self.tol * problem_number:
convergence_counter += 1
if convergence_counter >= self.stable_count:
print(f"Converged after {n_epoch} epochs")
break
else:
convergence_counter = 0
prev_total_loss = total_loss
epoch_time = time.time() - start_time
if n_epoch % 10 == 0:
print(f"Epoch: {n_epoch:>3} Loss: {total_loss:>10.6f} Time: {epoch_time:.2f}s")
self.losses = losses
print(f"Training completed. Final loss: {total_loss:.6f}")
return self
# ========== 模型训练 ==========
def train_and_save_model(eps, theta, coarsening='full'):
n = 33 # 网格大小
# 创建模型目录
model_dir = f'./models/theta_{theta}_eps_{eps}'
if not os.path.isdir(model_dir):
os.makedirs(model_dir)
# 创建层级结构
levels = create_levels(eps, theta, n)
# 第一层训练 (最粗层)
problem_instances1 = [
Problem(eps, theta, n, k=k, levels=levels, mxl=1)
for k in range(1, 13)
]
model1 = alphaCNN(
batch_size=8, learning_rate=1e-8, max_epochs=1000,
nb_layers=5, tol=1e-6, stable_count=10, optimizer='Adam',
random_seed=9, initial_kernel=0.1
)
model1.fit(problem_instances1)
torch.save(model1.net.state_dict(), os.path.join(model_dir, f'theta_{theta}_eps_{eps}_level1.pth'))
# 第二层训练
problem_instances2 = [
Problem(eps, theta, n, k=k, levels=levels, mxl=2, net_trained=[model1.net])
for k in range(1, 15)
]
model2 = alphaCNN(
batch_size=8, learning_rate=1e-8, max_epochs=1000,
nb_layers=5, tol=1e-6, stable_count=10, optimizer='Adam',
random_seed=9, initial_kernel=0.02/3
)
model2.fit(problem_instances2)
torch.save(model2.net.state_dict(), os.path.join(model_dir, f'theta_{theta}_eps_{eps}_level2.pth'))
# 第三层训练
problem_instances3 = [
Problem(eps, theta, n, k=k, levels=levels, mxl=3, net_trained=[model1.net, model2.net])
for k in range(1, 17)
]
model3 = alphaCNN(
batch_size=8, learning_rate=1e-8, max_epochs=1000,
nb_layers=5, tol=1e-6, stable_count=10, optimizer='Adam',
random_seed=9, initial_kernel=0.002/3
)
model3.fit(problem_instances3)
torch.save(model3.net.state_dict(), os.path.join(model_dir, f'theta_{theta}_eps_{eps}_level3.pth'))
# 第四层训练 (最细层)
problem_instances4 = [
Problem(eps, theta, n, k=k, levels=levels, mxl=4, net_trained=[model1.net, model2.net, model3.net])
for k in range(1, 20)
]
model4 = alphaCNN(
batch_size=8, learning_rate=1e-8, max_epochs=1000,
nb_layers=5, tol=1e-6, stable_count=10, optimizer='Adam',
random_seed=9, initial_kernel=0.002/3
)
model4.fit(problem_instances4)
torch.save(model4.net.state_dict(), os.path.join(model_dir, f'theta_{theta}_eps_{eps}_level4.pth'))
# 训练模型
if __name__ == "__main__":
train_and_save_model(100, 75) 这是代码,帮我修改