import torch.nn as nn
import math
import torch
import torch.nn as nn
import torch.nn as nn
import torch
import torch.nn.functional as F
import numpy as np
import math
import numpy as np
from typing import Any, Callable
import torch
from torch import nn, Tensor
from typing import List, Optional
import math
from ultralytics.nn.modules.conv import Conv
from typing import Union
var: Union[int, tuple] = 1
# build RepVGG block
# -----------------------------
def conv_bn(in_channels, out_channels, kernel_size, stride, padding, groups=1):
result = nn.Sequential()
result.add_module('conv', nn.Conv2d(in_channels=in_channels, out_channels=out_channels,
kernel_size=kernel_size, stride=stride, padding=padding, groups=groups,
bias=False))
result.add_module('bn', nn.BatchNorm2d(num_features=out_channels))
return result
class SEBlock(nn.Module):
def __init__(self, input_channels):
super(SEBlock, self).__init__()
internal_neurons = input_channels // 8
self.down = nn.Conv2d(in_channels=input_channels, out_channels=internal_neurons, kernel_size=1, stride=1,
bias=True)
self.up = nn.Conv2d(in_channels=internal_neurons, out_channels=input_channels, kernel_size=1, stride=1,
bias=True)
self.input_channels = input_channels
def forward(self, inputs):
x = F.avg_pool2d(inputs, kernel_size=inputs.size(3))
x = self.down(x)
x = F.relu(x)
x = self.up(x)
x = torch.sigmoid(x)
x = x.view(-1, self.input_channels, 1, 1)
return inputs * x
class RepVGG(nn.Module):
def __init__(self, in_channels, out_channels, kernel_size=3,
stride=1, padding=1, dilation=1, groups=1, padding_mode='zeros', deploy=False, use_se=False):
super(RepVGG, self).__init__()
self.deploy = deploy
self.groups = groups
self.in_channels = in_channels
padding_11 = padding - kernel_size // 2
self.nonlinearity = nn.SiLU()
# self.nonlinearity = nn.ReLU()
if use_se:
self.se = SEBlock(out_channels, internal_neurons=out_channels // 16)
else:
self.se = nn.Identity()
if deploy:
self.rbr_reparam = nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=kernel_size,
stride=stride,
padding=padding, dilation=dilation, groups=groups, bias=True,
padding_mode=padding_mode)
else:
self.rbr_identity = nn.BatchNorm2d(
num_features=in_channels) if out_channels == in_channels and stride == 1 else None
self.rbr_dense = conv_bn(in_channels=in_channels, out_channels=out_channels, kernel_size=kernel_size,
stride=stride, padding=padding, groups=groups)
self.rbr_1x1 = conv_bn(in_channels=in_channels, out_channels=out_channels, kernel_size=1, stride=stride,
padding=padding_11, groups=groups)
# print('RepVGG Block, identity = ', self.rbr_identity)
def get_equivalent_kernel_bias(self):
kernel3x3, bias3x3 = self._fuse_bn_tensor(self.rbr_dense)
kernel1x1, bias1x1 = self._fuse_bn_tensor(self.rbr_1x1)
kernelid, biasid = self._fuse_bn_tensor(self.rbr_identity)
return kernel3x3 + self._pad_1x1_to_3x3_tensor(kernel1x1) + kernelid, bias3x3 + bias1x1 + biasid
def _pad_1x1_to_3x3_tensor(self, kernel1x1):
if kernel1x1 is None:
return 0
else:
return torch.nn.functional.pad(kernel1x1, [1, 1, 1, 1])
def _fuse_bn_tensor(self, branch):
if branch is None:
return 0, 0
if isinstance(branch, nn.Sequential):
kernel = branch.conv.weight
running_mean = branch.bn.running_mean
running_var = branch.bn.running_var
gamma = branch.bn.weight
beta = branch.bn.bias
eps = branch.bn.eps
else:
assert isinstance(branch, nn.BatchNorm2d)
if not hasattr(self, 'id_tensor'):
input_dim = self.in_channels // self.groups
kernel_value = np.zeros((self.in_channels, input_dim, 3, 3), dtype=np.float32)
for i in range(self.in_channels):
kernel_value[i, i % input_dim, 1, 1] = 1
self.id_tensor = torch.from_numpy(kernel_value).to(branch.weight.device)
kernel = self.id_tensor
running_mean = branch.running_mean
running_var = branch.running_var
gamma = branch.weight
beta = branch.bias
eps = branch.eps
std = (running_var + eps).sqrt()
t = (gamma / std).reshape(-1, 1, 1, 1)
return kernel * t, beta - running_mean * gamma / std
def forward(self, inputs):
if hasattr(self, 'rbr_reparam'):
return self.nonlinearity(self.se(self.rbr_reparam(inputs)))
if self.rbr_identity is None:
id_out = 0
else:
id_out = self.rbr_identity(inputs)
return self.nonlinearity(self.se(self.rbr_dense(inputs) + self.rbr_1x1(inputs) + id_out))
def fusevggforward(self, x):
return self.nonlinearity(self.rbr_dense(x))
# RepVGG block end
# -----------------------------
def autopad(k, p=None, d=1): # kernel, padding, dilation
"""Pad to 'same' shape outputs."""
if d > 1:
k = d * (k - 1) + 1 if isinstance(k, int) else [d * (x - 1) + 1 for x in k] # actual kernel-size
if p is None:
p = k // 2 if isinstance(k, int) else [x // 2 for x in k] # auto-pad
return p
def makeDivisible(v: float, divisor: int, min_value: Optional[int] = None) -> int:
"""
This function is taken from the original tf repo.
It ensures that all layers have a channel number that is divisible by 8
It can be seen here:
https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/mobilenet.Py
"""
if min_value is None:
min_value = divisor
new_v = max(min_value, int(v + divisor / 2) // divisor * divisor)
# Make sure that round down does not go down by more than 10%.
if new_v < 0.9 * v:
new_v += divisor
return new_v
def callMethod(self, ElementName):
return getattr(self, ElementName)
def setMethod(self, ElementName, ElementValue):
return setattr(self, ElementName, ElementValue)
def shuffleTensor(Feature: Tensor, Mode: int=1) -> Tensor:
# shuffle multiple tensors with the same indexs
# all tensors must have the same shape
if isinstance(Feature, Tensor):
Feature = [Feature]
Indexs = None
Output = []
for f in Feature:
# not in-place operation, should update output
B, C, H, W = f.shape
if Mode == 1:
# fully shuffle
f = f.flatten(2)
if Indexs is None:
Indexs = torch.randperm(f.shape[-1], device=f.device)
f = f[:, :, Indexs.to(f.device)]
f = f.reshape(B, C, H, W)
else:
# shuflle along y and then x axis
if Indexs is None:
Indexs = [torch.randperm(H, device=f.device),
torch.randperm(W, device=f.device)]
f = f[:, :, Indexs[0].to(f.device)]
f = f[:, :, :, Indexs[1].to(f.device)]
Output.append(f)
return Output
class AdaptiveAvgPool2d(nn.AdaptiveAvgPool2d):
def __init__(self, output_size: Union[int, tuple] = 1 ):
super(AdaptiveAvgPool2d, self).__init__(output_size=output_size)
def profileModule(self, Input: Tensor):
Output = self.forward(Input)
return Output, 0.0, 0.0
class AdaptiveMaxPool2d(nn.AdaptiveMaxPool2d):
def __init__(self, output_size: Union[int, tuple] = 1):
super(AdaptiveMaxPool2d, self).__init__(output_size=output_size)
def profileModule(self, Input: Tensor):
Output = self.forward(Input)
return Output, 0.0, 0.0
NormLayerTuple = (
nn.BatchNorm1d,
nn.BatchNorm2d,
nn.SyncBatchNorm,
nn.LayerNorm,
nn.InstanceNorm1d,
nn.InstanceNorm2d,
nn.GroupNorm,
nn.BatchNorm3d,
)
def initWeight(Module):
# init conv, norm , and linear layers
## empty module
if Module is None:
return
## conv layer
elif isinstance(Module, (nn.Conv2d, nn.Conv3d, nn.ConvTranspose2d)):
nn.init.kaiming_uniform_(Module.weight, a=math.sqrt(5))
if Module.bias is not None:
fan_in, _ = nn.init._calculate_fan_in_and_fan_out(Module.weight)
if fan_in != 0:
bound = 1 / math.sqrt(fan_in)
nn.init.uniform_(Module.bias, -bound, bound)
## norm layer
elif isinstance(Module, NormLayerTuple):
if Module.weight is not None:
nn.init.ones_(Module.weight)
if Module.bias is not None:
nn.init.zeros_(Module.bias)
## linear layer
elif isinstance(Module, nn.Linear):
nn.init.kaiming_uniform_(Module.weight, a=math.sqrt(5))
if Module.bias is not None:
fan_in, _ = nn.init._calculate_fan_in_and_fan_out(Module.weight)
bound = 1 / math.sqrt(fan_in) if fan_in > 0 else 0
nn.init.uniform_(Module.bias, -bound, bound)
elif isinstance(Module, (nn.Sequential, nn.ModuleList)):
for m in Module:
initWeight(m)
elif list(Module.children()):
for m in Module.children():
initWeight(m)
class BaseConv2d(nn.Module):
def __init__(
self,
in_channels: int,
out_channels: int,
kernel_size: int,
stride: Optional[int] = 1,
padding: Optional[int] = None,
groups: Optional[int] = 1,
bias: Optional[bool] = None,
BNorm: bool = False,
# norm_layer: Optional[Callable[..., nn.Module]]=nn.BatchNorm2d,
ActLayer: Optional[Callable[..., nn.Module]] = None,
dilation: int = 1,
Momentum: Optional[float] = 0.1,
**kwargs: Any
) -> None:
super(BaseConv2d, self).__init__()
if padding is None:
padding = int((kernel_size - 1) // 2 * dilation)
if bias is None:
bias = not BNorm
self.in_channels = in_channels
self.out_channels = out_channels
self.kernel_size = kernel_size
self.stride = stride
self.padding = padding
self.groups = groups
self.bias = bias
self.Conv = nn.Conv2d(in_channels, out_channels,
kernel_size, stride, padding, dilation, groups, bias, **kwargs)
self.Bn = nn.BatchNorm2d(out_channels, eps=0.001, momentum=Momentum) if BNorm else nn.Identity()
if ActLayer is not None:
if isinstance(list(ActLayer().named_modules())[0][1], nn.Sigmoid):
self.Act = ActLayer()
else:
self.Act = ActLayer(inplace=True)
else:
self.Act = ActLayer
self.apply(initWeight)
def forward(self, x: Tensor) -> Tensor:
x = self.Conv(x)
x = self.Bn(x)
if self.Act is not None:
x = self.Act(x)
return x
def profileModule(self, Input: Tensor):
if Input.dim() != 4:
print('Conv2d requires 4-dimensional Input (BxCxHxW). Provided Input has shape: {}'.format(Input.size()))
BatchSize, in_channels, in_h, in_w = Input.size()
assert in_channels == self.in_channels, '{}!={}'.format(in_channels, self.in_channels)
k_h, k_w = pair(self.kernel_size)
stride_h, stride_w = pair(self.stride)
pad_h, pad_w = pair(self.padding)
groups = self.groups
out_h = (in_h - k_h + 2 * pad_h) // stride_h + 1
out_w = (in_w - k_w + 2 * pad_w) // stride_w + 1
# compute MACs
MACs = (k_h * k_w) * (in_channels * self.out_channels) * (out_h * out_w) * 1.0
MACs /= groups
if self.bias:
MACs += self.out_channels * out_h * out_w
# compute parameters
Params = sum([p.numel() for p in self.parameters()])
Output = torch.zeros(size=(BatchSize, self.out_channels, out_h, out_w), dtype=Input.dtype, device=Input.device)
# print(MACs)
return Output, Params, MACs
class MoCAttention(nn.Module):
# Monte carlo attention
def __init__(
self,
InChannels: int,
HidChannels: int=None,
SqueezeFactor: int=4,
PoolRes: list=[1, 2, 3],
Act: Callable[..., nn.Module]=nn.ReLU,
ScaleAct: Callable[..., nn.Module]=nn.Sigmoid,
MoCOrder: bool=True,
**kwargs: Any,
) -> None:
super().__init__()
if HidChannels is None:
HidChannels = max(makeDivisible(InChannels // SqueezeFactor, 8), 32)
AllPoolRes = PoolRes + [1] if 1 not in PoolRes else PoolRes
for k in AllPoolRes:
Pooling = AdaptiveAvgPool2d(k)
setMethod(self, 'Pool%d' % k, Pooling)
self.SELayer = nn.Sequential(
BaseConv2d(InChannels, HidChannels, 1, ActLayer=Act),
BaseConv2d(HidChannels, InChannels, 1, ActLayer=ScaleAct),
)
self.PoolRes = PoolRes
self.MoCOrder = MoCOrder
def monteCarloSample(self, x: Tensor) -> Tensor:
if self.training:
PoolKeep = np.random.choice(self.PoolRes)
x1 = shuffleTensor(x)[0] if self.MoCOrder else x
AttnMap: Tensor = callMethod(self, 'Pool%d' % PoolKeep)(x1)
if AttnMap.shape[-1] > 1:
AttnMap = AttnMap.flatten(2)
AttnMap = AttnMap[:, :, torch.randperm(AttnMap.shape[-1])[0]]
AttnMap = AttnMap[:, :, None, None] # squeeze twice
else:
AttnMap: Tensor = callMethod(self, 'Pool%d' % 1)(x)
return AttnMap
def forward(self, x: Tensor) -> Tensor:
AttnMap = self.monteCarloSample(x)
return x * self.SELayer(AttnMap)
class Conv(nn.Module):
"""Standard convolution with args(ch_in, ch_out, kernel, stride, padding, groups, dilation, activation)."""
default_act = nn.SiLU()
def __init__(self, c1, c2, k=1, s=1, p=None, g=1, d=1, act=True):
super().__init__()
self.conv = nn.Conv2d(c1, c2, k, s, autopad(k, p, d), groups=g, dilation=d, bias=False)
self.bn = nn.BatchNorm2d(c2)
self.act = self.default_act if act is True else act if isinstance(act, nn.Module) else nn.Identity()
def forward(self, x):
return self.act(self.bn(self.conv(x)))
class RepMCABottleneck(nn.Module):
"""Attentional Gated Convolution Bottleneck with RepVGG and MoCAttention."""
def __init__(self, c1, c2, shortcut=True, g=1, k=(3, 3), e=0.5):
"""
Args:
c1 (int): Input channels
c2 (int): Output channels
shortcut (bool): Whether to use shortcut connection
g (int): Groups for convolutions
k (tuple): Kernel sizes for convolutions
e (float): Expansion ratio for intermediate channels
"""
super().__init__()
c_ = int(c2 * e) # Intermediate channels
# Attention module
self.att = MoCAttention(InChannels=c1)
# First RepVGG convolution
self.repvgg1 = RepVGG(in_channels=c1, out_channels=c1,
kernel_size=k[0], padding=k[0]//2)
# Additional convolution branch
self.conv_branch = Conv(c1, c2, 1) # 1x1 convolution
# Second RepVGG convolution
self.repvgg2 = RepVGG(in_channels=c1, out_channels=c2,
kernel_size=k[1], padding=k[1]//2)
# Shortcut handling
self.add = shortcut and c1 == c2
if shortcut and c1 != c2:
# Adjust dimensions if needed
self.shortcut_conv = Conv(c1, c2, 1) # 1x1 conv for channel adjustment
else:
self.shortcut_conv = nn.Identity()
def forward(self, x):
# Apply attention
att_out = self.att(x)
# First RepVGG convolution
repvgg1_out = self.repvgg1(att_out)
# Additional convolution branch
conv_branch_out = self.conv_branch(att_out)
# Second RepVGG convolution
repvgg2_out = self.repvgg2(repvgg1_out)
# Combine outputs
combined = repvgg2_out + conv_branch_out
# Shortcut connection
if self.add:
return combined + self.shortcut_conv(x)
return combined
class C2f(nn.Module):
"""Faster Implementation of CSP Bottleneck with 2 convolutions."""
def __init__(self, c1, c2, n=1, shortcut=False, g=1, e=0.5):
"""Initializes a CSP bottleneck with 2 convolutions and n Bottleneck blocks for faster processing."""
super().__init__()
self.c = int(c2 * e) # hidden channels
self.cv1 = Conv(c1, 2 * self.c, 1, 1)
self.cv2 = Conv((2 + n) * self.c, c2, 1) # optional act=FReLU(c2)
self.m = nn.ModuleList(RepMCABottleneck(self.c, self.c, shortcut, g, k=((3, 3), (3, 3)), e=1.0) for _ in range(n))
def forward(self, x):
"""Forward pass through C2f layer."""
y = list(self.cv1(x).chunk(2, 1))
y.extend(m(y[-1]) for m in self.m)
return self.cv2(torch.cat(y, 1))
def forward_split(self, x):
"""Forward pass using split() instead of chunk()."""
y = list(self.cv1(x).split((self.c, self.c), 1))
y.extend(m(y[-1]) for m in self.m)
return self.cv2(torch.cat(y, 1))
class C3(nn.Module):
"""CSP Bottleneck with 3 convolutions."""
def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):
"""Initialize the CSP Bottleneck with given channels, number, shortcut, groups, and expansion values."""
super().__init__()
c_ = int(c2 * e) # hidden channels
self.cv1 = Conv(c1, c_, 1, 1)
self.cv2 = Conv(c1, c_, 1, 1)
self.cv3 = Conv(2 * c_, c2, 1) # optional act=FReLU(c2)
self.m = nn.Sequential(*(RepMCABottleneck(c_, c_, shortcut, g, k=((1, 1), (3, 3)), e=1.0) for _ in range(n)))
def forward(self, x):
"""Forward pass through the CSP bottleneck with 2 convolutions."""
return self.cv3(torch.cat((self.m(self.cv1(x)), self.cv2(x)), 1))
class C3k2_RepMCABottleneck(C2f):
"""Faster Implementation of CSP Bottleneck with 2 convolutions."""
def __init__(self, c1, c2, n=1, c3k=False, e=0.5, g=1, shortcut=True):
"""Initializes the C3k2 module, a faster CSP Bottleneck with 2 convolutions and optional C3k blocks."""
super().__init__(c1, c2, n, shortcut, g, e)
self.m = nn.ModuleList(
C3k(self.c, self.c, 2, shortcut, g) if c3k else RepMCABottleneck(self.c, self.c, shortcut, g) for _ in range(n)
)
class C3k(C3):
"""C3k is a CSP bottleneck module with customizable kernel sizes for feature extraction in neural networks."""
def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5, k=3):
"""Initializes the C3k module with specified channels, number of layers, and configurations."""
super().__init__(c1, c2, n, shortcut, g, e)
c_ = int(c2 * e) # hidden channels
# self.m = nn.Sequential(*(RepBottleneck(c_, c_, shortcut, g, k=(k, k), e=1.0) for _ in range(n)))
self.m = nn.Sequential(*(RepMCABottleneck(c_, c_, shortcut, g, k=(k, k), e=1.0) for _ in range(n)))
# Add to module exports
__all__ = ['C3k2_RepMCABottleneck']
报错:TypeError: unsupported operand type(s) for //: 'tuple' and 'int'
最新发布