ValueError: Tensor Tensor(“conv2d_18/Relu:0“, shape=(?, 512, 512, 3), dtype=float32)

本文探讨了在Django项目中使用Keras时遇到的ValueError错误,重点在于Tensorflow版本问题的解决过程,包括从1.15到2.0的迁移,及后续兼容性问题的处理,最终确保了Django服务的正常运行。

摘要生成于 C知道 ,由 DeepSeek-R1 满血版支持, 前往体验 >

解决django+keras ValueError: Tensor Tensor(“conv2d_18/Relu:0”, shape=(?, 512, 512, 3), dtype=float32) is not an element of this graph.

报错信息
ValueError: Tensor Tensor("conv2d_18/Relu:0", shape=(?, 512, 512, 3), dtype=float32) is not an element of this graph.
排查问题
# 库版本
tensorflow==1.15
keras==2.3.1

# 尝试
在你create/load model 的地方之后加上代码:
self.graph = tf.get_default_graph()

在你使用model之前加上
with self.graph.as_default():
    (... do inference here ...)

# 继续报错
tensorflow.python.framework.errors_impl.FailedPreconditionError: 。。。from Container: localhost. This could mean that the variable was uninitialized. Not found: Container localhost does not exis

# 之后换了一种思路,如下

解决办法
pip install tensorflow==2.0

# 接着解决替换tensorflow2.0后产生的其他报错(相对简单)
# 之后django服务起来了
import torch.nn as nn import math import torch import torch.nn as nn import torch.nn as nn import torch import torch.nn.functional as F import numpy as np import math import numpy as np from typing import Any, Callable import torch from torch import nn, Tensor from typing import List, Optional import math from ultralytics.nn.modules.conv import Conv from typing import Union var: Union[int, tuple] = 1 # build RepVGG block # ----------------------------- def conv_bn(in_channels, out_channels, kernel_size, stride, padding, groups=1): result = nn.Sequential() result.add_module(&#39;conv&#39;, nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=kernel_size, stride=stride, padding=padding, groups=groups, bias=False)) result.add_module(&#39;bn&#39;, nn.BatchNorm2d(num_features=out_channels)) return result class SEBlock(nn.Module): def __init__(self, input_channels): super(SEBlock, self).__init__() internal_neurons = input_channels // 8 self.down = nn.Conv2d(in_channels=input_channels, out_channels=internal_neurons, kernel_size=1, stride=1, bias=True) self.up = nn.Conv2d(in_channels=internal_neurons, out_channels=input_channels, kernel_size=1, stride=1, bias=True) self.input_channels = input_channels def forward(self, inputs): x = F.avg_pool2d(inputs, kernel_size=inputs.size(3)) x = self.down(x) x = F.relu(x) x = self.up(x) x = torch.sigmoid(x) x = x.view(-1, self.input_channels, 1, 1) return inputs * x class RepVGG(nn.Module): def __init__(self, in_channels, out_channels, kernel_size=3, stride=1, padding=1, dilation=1, groups=1, padding_mode=&#39;zeros&#39;, deploy=False, use_se=False): super(RepVGG, self).__init__() self.deploy = deploy self.groups = groups self.in_channels = in_channels padding_11 = padding - kernel_size // 2 self.nonlinearity = nn.SiLU() # self.nonlinearity = nn.ReLU() if use_se: self.se = SEBlock(out_channels, internal_neurons=out_channels // 16) else: self.se = nn.Identity() if deploy: self.rbr_reparam = nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=kernel_size, stride=stride, padding=padding, dilation=dilation, groups=groups, bias=True, padding_mode=padding_mode) else: self.rbr_identity = nn.BatchNorm2d( num_features=in_channels) if out_channels == in_channels and stride == 1 else None self.rbr_dense = conv_bn(in_channels=in_channels, out_channels=out_channels, kernel_size=kernel_size, stride=stride, padding=padding, groups=groups) self.rbr_1x1 = conv_bn(in_channels=in_channels, out_channels=out_channels, kernel_size=1, stride=stride, padding=padding_11, groups=groups) # print(&#39;RepVGG Block, identity = &#39;, self.rbr_identity) def get_equivalent_kernel_bias(self): kernel3x3, bias3x3 = self._fuse_bn_tensor(self.rbr_dense) kernel1x1, bias1x1 = self._fuse_bn_tensor(self.rbr_1x1) kernelid, biasid = self._fuse_bn_tensor(self.rbr_identity) return kernel3x3 + self._pad_1x1_to_3x3_tensor(kernel1x1) + kernelid, bias3x3 + bias1x1 + biasid def _pad_1x1_to_3x3_tensor(self, kernel1x1): if kernel1x1 is None: return 0 else: return torch.nn.functional.pad(kernel1x1, [1, 1, 1, 1]) def _fuse_bn_tensor(self, branch): if branch is None: return 0, 0 if isinstance(branch, nn.Sequential): kernel = branch.conv.weight running_mean = branch.bn.running_mean running_var = branch.bn.running_var gamma = branch.bn.weight beta = branch.bn.bias eps = branch.bn.eps else: assert isinstance(branch, nn.BatchNorm2d) if not hasattr(self, &#39;id_tensor&#39;): input_dim = self.in_channels // self.groups kernel_value = np.zeros((self.in_channels, input_dim, 3, 3), dtype=np.float32) for i in range(self.in_channels): kernel_value[i, i % input_dim, 1, 1] = 1 self.id_tensor = torch.from_numpy(kernel_value).to(branch.weight.device) kernel = self.id_tensor running_mean = branch.running_mean running_var = branch.running_var gamma = branch.weight beta = branch.bias eps = branch.eps std = (running_var + eps).sqrt() t = (gamma / std).reshape(-1, 1, 1, 1) return kernel * t, beta - running_mean * gamma / std def forward(self, inputs): if hasattr(self, &#39;rbr_reparam&#39;): return self.nonlinearity(self.se(self.rbr_reparam(inputs))) if self.rbr_identity is None: id_out = 0 else: id_out = self.rbr_identity(inputs) return self.nonlinearity(self.se(self.rbr_dense(inputs) + self.rbr_1x1(inputs) + id_out)) def fusevggforward(self, x): return self.nonlinearity(self.rbr_dense(x)) # RepVGG block end # ----------------------------- def autopad(k, p=None, d=1): # kernel, padding, dilation """Pad to &#39;same&#39; shape outputs.""" if d > 1: k = d * (k - 1) + 1 if isinstance(k, int) else [d * (x - 1) + 1 for x in k] # actual kernel-size if p is None: p = k // 2 if isinstance(k, int) else [x // 2 for x in k] # auto-pad return p def makeDivisible(v: float, divisor: int, min_value: Optional[int] = None) -> int: """ This function is taken from the original tf repo. It ensures that all layers have a channel number that is divisible by 8 It can be seen here: https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/mobilenet.Py """ if min_value is None: min_value = divisor new_v = max(min_value, int(v + divisor / 2) // divisor * divisor) # Make sure that round down does not go down by more than 10%. if new_v < 0.9 * v: new_v += divisor return new_v def callMethod(self, ElementName): return getattr(self, ElementName) def setMethod(self, ElementName, ElementValue): return setattr(self, ElementName, ElementValue) def shuffleTensor(Feature: Tensor, Mode: int=1) -> Tensor: # shuffle multiple tensors with the same indexs # all tensors must have the same shape if isinstance(Feature, Tensor): Feature = [Feature] Indexs = None Output = [] for f in Feature: # not in-place operation, should update output B, C, H, W = f.shape if Mode == 1: # fully shuffle f = f.flatten(2) if Indexs is None: Indexs = torch.randperm(f.shape[-1], device=f.device) f = f[:, :, Indexs.to(f.device)] f = f.reshape(B, C, H, W) else: # shuflle along y and then x axis if Indexs is None: Indexs = [torch.randperm(H, device=f.device), torch.randperm(W, device=f.device)] f = f[:, :, Indexs[0].to(f.device)] f = f[:, :, :, Indexs[1].to(f.device)] Output.append(f) return Output class AdaptiveAvgPool2d(nn.AdaptiveAvgPool2d): def __init__(self, output_size: Union[int, tuple] = 1 ): super(AdaptiveAvgPool2d, self).__init__(output_size=output_size) def profileModule(self, Input: Tensor): Output = self.forward(Input) return Output, 0.0, 0.0 class AdaptiveMaxPool2d(nn.AdaptiveMaxPool2d): def __init__(self, output_size: Union[int, tuple] = 1): super(AdaptiveMaxPool2d, self).__init__(output_size=output_size) def profileModule(self, Input: Tensor): Output = self.forward(Input) return Output, 0.0, 0.0 NormLayerTuple = ( nn.BatchNorm1d, nn.BatchNorm2d, nn.SyncBatchNorm, nn.LayerNorm, nn.InstanceNorm1d, nn.InstanceNorm2d, nn.GroupNorm, nn.BatchNorm3d, ) def initWeight(Module): # init conv, norm , and linear layers ## empty module if Module is None: return ## conv layer elif isinstance(Module, (nn.Conv2d, nn.Conv3d, nn.ConvTranspose2d)): nn.init.kaiming_uniform_(Module.weight, a=math.sqrt(5)) if Module.bias is not None: fan_in, _ = nn.init._calculate_fan_in_and_fan_out(Module.weight) if fan_in != 0: bound = 1 / math.sqrt(fan_in) nn.init.uniform_(Module.bias, -bound, bound) ## norm layer elif isinstance(Module, NormLayerTuple): if Module.weight is not None: nn.init.ones_(Module.weight) if Module.bias is not None: nn.init.zeros_(Module.bias) ## linear layer elif isinstance(Module, nn.Linear): nn.init.kaiming_uniform_(Module.weight, a=math.sqrt(5)) if Module.bias is not None: fan_in, _ = nn.init._calculate_fan_in_and_fan_out(Module.weight) bound = 1 / math.sqrt(fan_in) if fan_in > 0 else 0 nn.init.uniform_(Module.bias, -bound, bound) elif isinstance(Module, (nn.Sequential, nn.ModuleList)): for m in Module: initWeight(m) elif list(Module.children()): for m in Module.children(): initWeight(m) class BaseConv2d(nn.Module): def __init__( self, in_channels: int, out_channels: int, kernel_size: int, stride: Optional[int] = 1, padding: Optional[int] = None, groups: Optional[int] = 1, bias: Optional[bool] = None, BNorm: bool = False, # norm_layer: Optional[Callable[..., nn.Module]]=nn.BatchNorm2d, ActLayer: Optional[Callable[..., nn.Module]] = None, dilation: int = 1, Momentum: Optional[float] = 0.1, **kwargs: Any ) -> None: super(BaseConv2d, self).__init__() if padding is None: padding = int((kernel_size - 1) // 2 * dilation) if bias is None: bias = not BNorm self.in_channels = in_channels self.out_channels = out_channels self.kernel_size = kernel_size self.stride = stride self.padding = padding self.groups = groups self.bias = bias self.Conv = nn.Conv2d(in_channels, out_channels, kernel_size, stride, padding, dilation, groups, bias, **kwargs) self.Bn = nn.BatchNorm2d(out_channels, eps=0.001, momentum=Momentum) if BNorm else nn.Identity() if ActLayer is not None: if isinstance(list(ActLayer().named_modules())[0][1], nn.Sigmoid): self.Act = ActLayer() else: self.Act = ActLayer(inplace=True) else: self.Act = ActLayer self.apply(initWeight) def forward(self, x: Tensor) -> Tensor: x = self.Conv(x) x = self.Bn(x) if self.Act is not None: x = self.Act(x) return x def profileModule(self, Input: Tensor): if Input.dim() != 4: print(&#39;Conv2d requires 4-dimensional Input (BxCxHxW). Provided Input has shape: {}&#39;.format(Input.size())) BatchSize, in_channels, in_h, in_w = Input.size() assert in_channels == self.in_channels, &#39;{}!={}&#39;.format(in_channels, self.in_channels) k_h, k_w = pair(self.kernel_size) stride_h, stride_w = pair(self.stride) pad_h, pad_w = pair(self.padding) groups = self.groups out_h = (in_h - k_h + 2 * pad_h) // stride_h + 1 out_w = (in_w - k_w + 2 * pad_w) // stride_w + 1 # compute MACs MACs = (k_h * k_w) * (in_channels * self.out_channels) * (out_h * out_w) * 1.0 MACs /= groups if self.bias: MACs += self.out_channels * out_h * out_w # compute parameters Params = sum([p.numel() for p in self.parameters()]) Output = torch.zeros(size=(BatchSize, self.out_channels, out_h, out_w), dtype=Input.dtype, device=Input.device) # print(MACs) return Output, Params, MACs class MoCAttention(nn.Module): # Monte carlo attention def __init__( self, InChannels: int, HidChannels: int=None, SqueezeFactor: int=4, PoolRes: list=[1, 2, 3], Act: Callable[..., nn.Module]=nn.ReLU, ScaleAct: Callable[..., nn.Module]=nn.Sigmoid, MoCOrder: bool=True, **kwargs: Any, ) -> None: super().__init__() if HidChannels is None: HidChannels = max(makeDivisible(InChannels // SqueezeFactor, 8), 32) AllPoolRes = PoolRes + [1] if 1 not in PoolRes else PoolRes for k in AllPoolRes: Pooling = AdaptiveAvgPool2d(k) setMethod(self, &#39;Pool%d&#39; % k, Pooling) self.SELayer = nn.Sequential( BaseConv2d(InChannels, HidChannels, 1, ActLayer=Act), BaseConv2d(HidChannels, InChannels, 1, ActLayer=ScaleAct), ) self.PoolRes = PoolRes self.MoCOrder = MoCOrder def monteCarloSample(self, x: Tensor) -> Tensor: if self.training: PoolKeep = np.random.choice(self.PoolRes) x1 = shuffleTensor(x)[0] if self.MoCOrder else x AttnMap: Tensor = callMethod(self, &#39;Pool%d&#39; % PoolKeep)(x1) if AttnMap.shape[-1] > 1: AttnMap = AttnMap.flatten(2) AttnMap = AttnMap[:, :, torch.randperm(AttnMap.shape[-1])[0]] AttnMap = AttnMap[:, :, None, None] # squeeze twice else: AttnMap: Tensor = callMethod(self, &#39;Pool%d&#39; % 1)(x) return AttnMap def forward(self, x: Tensor) -> Tensor: AttnMap = self.monteCarloSample(x) return x * self.SELayer(AttnMap) class Conv(nn.Module): """Standard convolution with args(ch_in, ch_out, kernel, stride, padding, groups, dilation, activation).""" default_act = nn.SiLU() def __init__(self, c1, c2, k=1, s=1, p=None, g=1, d=1, act=True): super().__init__() self.conv = nn.Conv2d(c1, c2, k, s, autopad(k, p, d), groups=g, dilation=d, bias=False) self.bn = nn.BatchNorm2d(c2) self.act = self.default_act if act is True else act if isinstance(act, nn.Module) else nn.Identity() def forward(self, x): return self.act(self.bn(self.conv(x))) class RepMCABottleneck(nn.Module): """Attentional Gated Convolution Bottleneck with RepVGG and MoCAttention.""" def __init__(self, c1, c2, shortcut=True, g=1, k=(3, 3), e=0.5): """ Args: c1 (int): Input channels c2 (int): Output channels shortcut (bool): Whether to use shortcut connection g (int): Groups for convolutions k (tuple): Kernel sizes for convolutions (k1, k2) e (float): Expansion ratio for intermediate channels """ super().__init__() c_ = int(c2 * e) # Intermediate channels # Attention module self.att = MoCAttention(InChannels=c1) # Extract individual kernel sizes from tuple k1, k2 = k # First RepVGG convolution self.repvgg1 = RepVGG(in_channels=c1, out_channels=c1, kernel_size=k1, padding=k1//2) # Use k1 # Additional convolution branch self.conv_branch = Conv(c1, c2, 1) # 1x1 convolution # Second RepVGG convolution self.repvgg2 = RepVGG(in_channels=c1, out_channels=c2, kernel_size=k2, padding=k2//2) # Use k2 # Shortcut handling self.add = shortcut and c1 == c2 if shortcut and c1 != c2: # Adjust dimensions if needed self.shortcut_conv = Conv(c1, c2, 1) # 1x1 conv for channel adjustment else: self.shortcut_conv = nn.Identity() def forward(self, x): # Apply attention att_out = self.att(x) # First RepVGG convolution repvgg1_out = self.repvgg1(att_out) # Additional convolution branch conv_branch_out = self.conv_branch(att_out) # Second RepVGG convolution repvgg2_out = self.repvgg2(repvgg1_out) # Combine outputs combined = repvgg2_out + conv_branch_out # Shortcut connection if self.add: return combined + self.shortcut_conv(x) return combined class C2f(nn.Module): """Faster Implementation of CSP Bottleneck with 2 convolutions.""" def __init__(self, c1, c2, n=1, shortcut=False, g=1, e=0.5): """Initializes a CSP bottleneck with 2 convolutions and n Bottleneck blocks for faster processing.""" super().__init__() self.c = int(c2 * e) # hidden channels self.cv1 = Conv(c1, 2 * self.c, 1, 1) self.cv2 = Conv((2 + n) * self.c, c2, 1) # optional act=FReLU(c2) self.m = nn.ModuleList(RepMCABottleneck(self.c, self.c, shortcut, g, k=((3, 3), (3, 3)), e=1.0) for _ in range(n)) def forward(self, x): """Forward pass through C2f layer.""" y = list(self.cv1(x).chunk(2, 1)) y.extend(m(y[-1]) for m in self.m) return self.cv2(torch.cat(y, 1)) def forward_split(self, x): """Forward pass using split() instead of chunk().""" y = list(self.cv1(x).split((self.c, self.c), 1)) y.extend(m(y[-1]) for m in self.m) return self.cv2(torch.cat(y, 1)) class C3(nn.Module): """CSP Bottleneck with 3 convolutions.""" def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5): """Initialize the CSP Bottleneck with given channels, number, shortcut, groups, and expansion values.""" super().__init__() c_ = int(c2 * e) # hidden channels self.cv1 = Conv(c1, c_, 1, 1) self.cv2 = Conv(c1, c_, 1, 1) self.cv3 = Conv(2 * c_, c2, 1) # optional act=FReLU(c2) self.m = nn.Sequential(*(RepMCABottleneck(c_, c_, shortcut, g, k=((1, 1), (3, 3)), e=1.0) for _ in range(n))) def forward(self, x): """Forward pass through the CSP bottleneck with 2 convolutions.""" return self.cv3(torch.cat((self.m(self.cv1(x)), self.cv2(x)), 1)) class C3k2_RepMCABottleneck(C2f): """Faster Implementation of CSP Bottleneck with 2 convolutions.""" def __init__(self, c1, c2, n=1, c3k=False, e=0.5, g=1, shortcut=True): """Initializes the C3k2 module, a faster CSP Bottleneck with 2 convolutions and optional C3k blocks.""" super().__init__(c1, c2, n, shortcut, g, e) self.m = nn.ModuleList( C3k(self.c, self.c, 2, shortcut, g) if c3k else RepMCABottleneck(self.c, self.c, shortcut, g) for _ in range(n) ) class C3k(C3): """C3k is a CSP bottleneck module with customizable kernel sizes for feature extraction in neural networks.""" def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5, k=3): """Initializes the C3k module with specified channels, number of layers, and configurations.""" super().__init__(c1, c2, n, shortcut, g, e) c_ = int(c2 * e) # hidden channels # Create a tuple of kernel sizes (k, k) for RepMCABottleneck self.m = nn.Sequential(*(RepMCABottleneck(c_, c_, shortcut, g, k=(k, k), e=1.0) for _ in range(n))) # Add to module exports __all__ = [&#39;C3k2_RepMCABottleneck&#39;] 报错:TypeError: unsupported operand type(s) for //: &#39;tuple&#39; and &#39;int&#39;
07-16
import os import time import itertools import math import numpy as np import scipy as sp import scipy.sparse as sps from scipy.sparse.linalg import splu import torch import torch.nn as nn import pyamg from scipy.sparse import csr_matrix, isspmatrix_csr, diags from pyamg.multilevel import multilevel_solver from warnings import warn from scipy.sparse import csr_matrix, isspmatrix_csr, SparseEfficiencyWarning from pyamg.relaxation.smoothing import change_smoothers device = &#39;cpu&#39; # ========== 辅助函数 ========== def prolongation_fn(grid_size): res_stencil = np.zeros((3,3), dtype=np.double) k=16 res_stencil[0,0] = 1/k res_stencil[0,1] = 2/k res_stencil[0,2] = 1/k res_stencil[1,0] = 2/k res_stencil[1,1] = 4/k res_stencil[1,2] = 2/k res_stencil[2,0] = 1/k res_stencil[2,1] = 2/k res_stencil[2,2] = 1/k P_stencils = np.zeros((grid_size//2, grid_size//2, 3, 3)) for i in range(grid_size//2): for j in range(grid_size//2): P_stencils[i,j,:,:] = res_stencil return compute_p2(P_stencils, grid_size).astype(np.double) def compute_p2(P_stencil, grid_size): indexes = get_p_matrix_indices_one(grid_size) P = csr_matrix((P_stencil.reshape(-1), (indexes[:, 1], indexes[:, 0])), shape=((grid_size//2) ** 2, (grid_size) ** 2)) return P def get_p_matrix_indices_one(grid_size): K = map_2_to_1(grid_size=grid_size) indices = [] for ic in range(grid_size // 2): i = 2 * ic + 1 for jc in range(grid_size // 2): j = 2 * jc + 1 J = int(grid_size // 2 * jc + ic) for k in range(3): for m in range(3): I = int(K[i, j, k, m]) indices.append([I, J]) return np.array(indices) def map_2_to_1(grid_size=8): k = np.zeros((grid_size, grid_size, 3, 3)) M = np.reshape(np.arange(grid_size ** 2), (grid_size, grid_size)).T M = np.concatenate([M, M], axis=0) M = np.concatenate([M, M], axis=1) for i in range(3): I = (i - 1) % grid_size for j in range(3): J = (j - 1) % grid_size k[:, :, i, j] = M[I:I + grid_size, J:J + grid_size] return k def diffusion_stencil_2d(epsilon=1.0, theta=0.0, type=&#39;FD&#39;): eps = float(epsilon) theta = float(theta) C = np.cos(theta) S = np.sin(theta) CS = C*S CC = C**2 SS = S**2 if type == &#39;FE&#39;: a = (-1*eps - 1)*CC + (-1*eps - 1)*SS + (3*eps - 3)*CS b = (2*eps - 4)*CC + (-4*eps + 2)*SS c = (-1*eps - 1)*CC + (-1*eps - 1)*SS + (-3*eps + 3)*CS d = (-4*eps + 2)*CC + (2*eps - 4)*SS e = (8*eps + 8)*CC + (8*eps + 8)*SS stencil = np.array([[a, b, c],[d, e, d],[c, b, a]]) / 6.0 elif type == &#39;FD&#39;: a = -0.5*(eps - 1)*CS b = -(eps*SS + CC) c = -a d = -(eps*CC + SS) e = 2.0*(eps + 1) stencil = np.array([[a, d, c],[b, e, b],[c, d, a]]) return stencil def coo_to_tensor(coo): values = coo.data.astype(np.float64) indices = np.vstack((coo.row, coo.col)) i = torch.LongTensor(indices) v = torch.DoubleTensor(values) shape = coo.shape return torch.sparse_coo_tensor(i, v, torch.Size(shape)).to(device) # ========== 光滑算子 ========== def neural_smoother(net, size, mixed=0): # 返回PyTorch张量而不是SciPy矩阵 if mixed == 1: I = torch.eye(size*size, dtype=torch.double, device=device) x0 = I for conv_layer in net.convLayers1: kernel = conv_layer.weight.detach().view(3, 3) M = toeplitz_conv(kernel, size) x0 = torch.mm(M, x0) return x0 else: I = torch.eye(size*size, dtype=torch.double, device=device) x0 = I for conv_layer in net.convLayers1: kernel = conv_layer.weight.detach().view(3, 3) M = toeplitz_conv(kernel, size) x0 = torch.mm(M, x0) kernel2 = net.convLayers2[0].weight.detach().view(3, 3) M2 = toeplitz_conv(kernel2, size) y = x0 + (2/3) * M2 return y def toeplitz_conv(kernel, size): # 将3x3卷积核转换为Toeplitz矩阵 full_size = size * size M = torch.zeros(full_size, full_size, dtype=torch.double, device=device) for i in range(size): for j in range(size): idx = i * size + j for di in [-1, 0, 1]: for dj in [-1, 0, 1]: ni, nj = i + di, j + dj if 0 <= ni < size and 0 <= nj < size: nidx = ni * size + nj k_val = kernel[di+1, dj+1] M[idx, nidx] = k_val return M # ========== Level 创建 ========== def create_levels(eps, theta, n): mxl = 5 # max levels levels = [] # 创建最细层 s = diffusion_stencil_2d(eps, theta * np.pi / 180, &#39;FD&#39;) * 2 A = pyamg.gallery.stencil_grid(s, (n, n)).tocsr() # 创建第一层 - 使用PyAMG的level类而不是字典 level0 = multilevel_solver.level() level0.A = A level0.N = n level0.l = A.shape[0] levels.append(level0) current_n = n for i in range(1, mxl): # 因为已经有一层,所以从1开始 # 获取当前最细层(最后一层) fine_level = levels[-1] current_n = fine_level.N # 创建限制算子 R = prolongation_fn(current_n) # 插值算子是限制算子的转置 P = R.T * 4 # 存储到当前层(细层) fine_level.R = R fine_level.P = P # 为下一层准备:计算粗网格矩阵 A_coarse = R @ fine_level.A @ P # 创建粗网格层 coarse_level = multilevel_solver.level() coarse_level.A = A_coarse coarse_level.N = current_n // 2 # 网格大小减半 coarse_level.l = A_coarse.shape[0] levels.append(coarse_level) # 检查是否达到最小网格 if coarse_level.N < 8: break return levels # ========== Problem Class ========== class Problem: def __init__(self, eps, theta, grid_size, k=20, initial_ground_truth=None, initial_u=None, levels=None, net_trained=None, mxl=0): self.eps = eps self.theta = theta self.grid_size = grid_size if levels is None: levels = create_levels(eps, theta, grid_size) self.levels = levels N = levels[0].N l = levels[0].l # 初始化真实解 if initial_ground_truth is None: self.ground_truth = torch.rand(l, 1, dtype=torch.double, device=device, requires_grad=False) else: self.ground_truth = initial_ground_truth.detach().requires_grad_(False) # 初始解 if initial_u is None: self.initial_u = torch.rand(l, 1, dtype=torch.double, device=device, requires_grad=False) else: self.initial_u = initial_u.detach().requires_grad_(False) self.k = k self.N = N self.levels = levels self.mxl = mxl self.net_trained = net_trained or [] # 冻结预训练网络的参数 for net in self.net_trained: for param in net.parameters(): param.requires_grad = False # 使用SciPy稀疏矩阵计算右端项 A_sparse = self.levels[0].A gt_numpy = self.ground_truth.detach().cpu().numpy().flatten() f_numpy = A_sparse @ gt_numpy self.f = torch.tensor(f_numpy, dtype=torch.double, device=device).view(-1, 1).requires_grad_(False) def compute_solution(self, net): with torch.no_grad(): # 禁用梯度计算 A_sparse = self.levels[0].A # SciPy稀疏矩阵 b = self.f.detach().cpu().numpy().flatten() # 创建多重网格求解器 solver_a_CNN = multigrid_solver(A_sparse, self.grid_size, {&#39;smoother&#39;: &#39;a-CNN&#39;, &#39;eps&#39;: self.eps, &#39;theta&#39;: self.theta}, net, self.net_trained, self.mxl) u_solution = solver_a_CNN.solve(b, maxiter=10, tol=1e-6) return torch.tensor(u_solution, dtype=torch.double, device=device).view(-1, 1) # ========== 求解器 ========== def multigrid_solver(A, size, args, net, net_trained, mxl): solver = geometric_solver(A, prolongation_fn, max_levels=5, coarse_solver=&#39;splu&#39;) if net_trained!=0: nets = [net]+net_trained else: nets = [net] if args[&#39;smoother&#39;] == &#39;a-CNN&#39;: # mxl最大是5 i in range(4) 0 1 2 3 for i in range(mxl-1): # 创建当前层的光滑算子 M = neural_smoother(nets[i], size// (2 ** i )) # 定义光滑函数 - 修改后版本 def relax(A, x, b, M_new=M): # 计算残差 (使用NumPy的稀疏矩阵操作) r = b - A.dot(x) # 转换为PyTorch张量进行矩阵乘法 r_tensor = torch.tensor(r, dtype=torch.double, device=&#39;cpu&#39;).view(-1, 1) correction = M_new @ r_tensor # 转回NumPy并更新解 x += correction.view(-1).cpu().numpy() # 设置光滑器 solver.levels[i].presmoother = relax solver.levels[i].postsmoother = relax return solver def geometric_solver(A, prolongation_function, presmoother=(&#39;gauss_seidel&#39;, {&#39;sweep&#39;: &#39;forward&#39;}), postsmoother=(&#39;gauss_seidel&#39;, {&#39;sweep&#39;: &#39;forward&#39;}), max_levels=5, max_coarse=10, coarse_solver=&#39;splu&#39;, **kwargs): levels = [multilevel_solver.level()] # convert A to csr if not isspmatrix_csr(A): try: A = csr_matrix(A) warn("Implicit conversion of A to CSR", SparseEfficiencyWarning) except BaseException: raise TypeError(&#39;Argument A must have type csr_matrix, or be convertible to csr_matrix&#39;) # preprocess A A = A.asfptype() if A.shape[0] != A.shape[1]: raise ValueError(&#39;expected square matrix&#39;) levels[-1].A = A while len(levels) < max_levels and levels[-1].A.shape[0] > max_coarse: extend_hierarchy(levels, prolongation_function) # 使用MultilevelSolver代替弃用的multilevel_solver ml = pyamg.multilevel.MultilevelSolver(levels, **kwargs) change_smoothers(ml, presmoother, postsmoother) return ml # internal function def extend_hierarchy(levels, prolongation_fn): """Extend the multigrid hierarchy.""" A = levels[-1].A N = A.shape[0] n = int(math.sqrt(N)) R = prolongation_fn(n) P = R.T.tocsr() * 4 levels[-1].P = P # prolongation operator levels[-1].R = R # restriction operator levels.append(multilevel_solver.level()) # Form next level through Galerkin product A = R * A * P A = A.astype(np.float64) # convert from complex numbers, should have A.imag==0 levels[-1].A = A # ========== 神经网络模型 ========== class _ConvNet_(nn.Module): def __init__(self, initial=5, kernel_size=3, initial_kernel=0.1): super(_ConvNet_, self).__init__() self.convLayers1 = nn.ModuleList([ nn.Conv2d(1, 1, kernel_size, padding=kernel_size//2, bias=False).double() for _ in range(5) ]) self.convLayers2 = nn.ModuleList([ nn.Conv2d(1, 1, kernel_size, padding=kernel_size//2, bias=False).double() for _ in range(2) ]) # 初始化权重 initial_weights = torch.zeros(1, 1, kernel_size, kernel_size, dtype=torch.double) initial_weights[0, 0, kernel_size//2, kernel_size//2] = initial_kernel for net in self.convLayers1: net.weight = nn.Parameter(initial_weights.clone()) for net in self.convLayers2: net.weight = nn.Parameter(initial_weights.clone()) def forward(self, x): y1 = x y2 = x for net in self.convLayers1: y1 = torch.tanh(net(y1)) for net in self.convLayers2: y2 = torch.tanh(net(y2)) return y1 + (2/3) * y2 def compute_loss(net, problem_instances): loss = torch.zeros(1, device=device, requires_grad=True) for problem in problem_instances: # 确保计算图连接 with torch.set_grad_enabled(True): u_pred = problem.compute_solution(net) u_true = problem.ground_truth # 确保梯度可以回传 u_pred.requires_grad_(True) u_true.requires_grad_(False) # 计算损失 diff = u_pred - u_true norm_diff = torch.norm(diff) norm_true = torch.norm(u_true) loss = loss + norm_diff / norm_true return loss def chunks(l, n): for i in range(0, len(l), n): yield l[i:i + n] def set_seed(seed): torch.manual_seed(seed) np.random.seed(seed) # ========== AlphaCNN ========== class alphaCNN: def __init__(self, net=None, batch_size=1, learning_rate=1e-6, max_epochs=1000, nb_layers=5, tol=1e-6, stable_count=50, optimizer=&#39;SGD&#39;, check_spectral_radius=False, random_seed=None, kernel_size=3, initial_kernel=0.1): if random_seed is not None: set_seed(random_seed) if net is None: self.net = _ConvNet_(initial=5, kernel_size=kernel_size, initial_kernel=initial_kernel).to(device) else: self.net = net # 确保网络参数需要梯度 for param in self.net.parameters(): param.requires_grad = True self.learning_rate = learning_rate if optimizer == &#39;Adadelta&#39;: self.optim = torch.optim.Adadelta(self.net.parameters(), lr=learning_rate) elif optimizer == &#39;Adam&#39;: self.optim = torch.optim.Adam(self.net.parameters(), lr=learning_rate) else: self.optim = torch.optim.SGD(self.net.parameters(), lr=learning_rate) self.batch_size = batch_size self.max_epochs = max_epochs self.tol = tol self.stable_count = stable_count def _optimization_step_(self, problem_instances): shuffled_problem_instances = np.random.permutation(problem_instances) for problem_chunk in chunks(shuffled_problem_instances, self.batch_size): self.optim.zero_grad() loss = compute_loss(self.net, problem_chunk) # 检查梯度是否存在 if loss.grad_fn is None: raise RuntimeError("Loss has no gradient. Check the computation graph.") loss.backward() self.optim.step() # 确保梯度被应用 with torch.no_grad(): for param in self.net.parameters(): if param.grad is not None: param -= self.learning_rate * param.grad def fit(self, problem_instances): losses = [] prev_total_loss = compute_loss(self.net, problem_instances).item() convergence_counter = 0 problem_number = len(problem_instances) for n_epoch in range(self.max_epochs): start_time = time.time() self._optimization_step_(problem_instances) total_loss = compute_loss(self.net, problem_instances).item() losses.append(total_loss) if np.abs(total_loss - prev_total_loss) < self.tol * problem_number: convergence_counter += 1 if convergence_counter >= self.stable_count: print(f"Converged after {n_epoch} epochs") break else: convergence_counter = 0 prev_total_loss = total_loss epoch_time = time.time() - start_time if n_epoch % 10 == 0: print(f"Epoch: {n_epoch:>3} Loss: {total_loss:>10.6f} Time: {epoch_time:.2f}s") self.losses = losses print(f"Training completed. Final loss: {total_loss:.6f}") return self # ========== 模型训练 ========== def train_and_save_model(eps, theta, coarsening=&#39;full&#39;): n = 33 # 网格大小 # 创建模型目录 model_dir = f&#39;./models/theta_{theta}_eps_{eps}&#39; if not os.path.isdir(model_dir): os.makedirs(model_dir) # 创建层级结构 levels = create_levels(eps, theta, n) # 第一层训练 (最粗层) problem_instances1 = [ Problem(eps, theta, n, k=k, levels=levels, mxl=1) for k in range(1, 13) ] model1 = alphaCNN( batch_size=8, learning_rate=1e-8, max_epochs=1000, nb_layers=5, tol=1e-6, stable_count=10, optimizer=&#39;Adam&#39;, random_seed=9, initial_kernel=0.1 ) model1.fit(problem_instances1) torch.save(model1.net.state_dict(), os.path.join(model_dir, f&#39;theta_{theta}_eps_{eps}_level1.pth&#39;)) # 第二层训练 problem_instances2 = [ Problem(eps, theta, n, k=k, levels=levels, mxl=2, net_trained=[model1.net]) for k in range(1, 15) ] model2 = alphaCNN( batch_size=8, learning_rate=1e-8, max_epochs=1000, nb_layers=5, tol=1e-6, stable_count=10, optimizer=&#39;Adam&#39;, random_seed=9, initial_kernel=0.02/3 ) model2.fit(problem_instances2) torch.save(model2.net.state_dict(), os.path.join(model_dir, f&#39;theta_{theta}_eps_{eps}_level2.pth&#39;)) # 第三层训练 problem_instances3 = [ Problem(eps, theta, n, k=k, levels=levels, mxl=3, net_trained=[model1.net, model2.net]) for k in range(1, 17) ] model3 = alphaCNN( batch_size=8, learning_rate=1e-8, max_epochs=1000, nb_layers=5, tol=1e-6, stable_count=10, optimizer=&#39;Adam&#39;, random_seed=9, initial_kernel=0.002/3 ) model3.fit(problem_instances3) torch.save(model3.net.state_dict(), os.path.join(model_dir, f&#39;theta_{theta}_eps_{eps}_level3.pth&#39;)) # 第四层训练 (最细层) problem_instances4 = [ Problem(eps, theta, n, k=k, levels=levels, mxl=4, net_trained=[model1.net, model2.net, model3.net]) for k in range(1, 20) ] model4 = alphaCNN( batch_size=8, learning_rate=1e-8, max_epochs=1000, nb_layers=5, tol=1e-6, stable_count=10, optimizer=&#39;Adam&#39;, random_seed=9, initial_kernel=0.002/3 ) model4.fit(problem_instances4) torch.save(model4.net.state_dict(), os.path.join(model_dir, f&#39;theta_{theta}_eps_{eps}_level4.pth&#39;)) # 训练模型 if __name__ == "__main__": train_and_save_model(100, 75) 损失值太大,帮我修改代码,检查是否有错误
最新发布
07-30
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值