这里yolov8源码版本是 ultralytics-8.2.54
GhostNetV3 源码下载 https://codeload.github.com/huawei-noah/Efficient-AI-Backbones
将ghostnetv3.py文件复制一份到源码./ultralytics-8.2.54/ultralytics/nn/modules路径下
我根据mobilenetv4的教程,修改了ghostnetv3.py文件的以下部分:
class GhostNet(nn.Module):
def __init__(self, block_specs, num_classes=1000):
super(GhostNet, self).__init__()
width=1.6
dropout=0.2
block=GhostBottleneck
# setting of inverted residual blocks
self.dropout = dropout
# building first layer
output_channel = _make_divisible(16 * width, 4)
self.conv_stem = nn.Conv2d(3, output_channel, 3, 2, 1, bias=False)
self.bn1 = nn.BatchNorm2d(output_channel)
self.act1 = nn.ReLU(inplace=True)
input_channel = output_channel
# building inverted residual blocks
stages = []
layer_id=0
for block_cfg in block_specs:
layers = []
for k, exp_size, c, se_ratio, s in block_cfg:
output_channel = _make_divisible(c * width, 4)
hidden_channel = _make_divisible(exp_size * width, 4)
if block==GhostBottleneck:
layers.append(block(input_channel, hidden_channel, output_channel, k, s, se_ratio=se_ratio,layer_id=layer_id))
input_channel = output_channel
layer_id+=1
stages.append(nn.Sequential(*layers))
output_channel = _make_divisible(exp_size * width, 4)
stages.append(nn.Sequential(ConvBnAct(input_channel, output_channel, 1)))
input_channel = output_channel
self.blocks = nn.Sequential(*stages)
del self.blocks[9]
# building last several layers
output_channel = 1280
self.global_pool = nn.AdaptiveAvgPool2d((1, 1))
self.conv_head = nn.Conv2d(input_channel, output_channel, 1, 1, 0, bias=True)
self.act2 = nn.ReLU(inplace=True)
self.classifier = nn.Linear(output_channel, num_classes)
self.layers_out_filters = [16, 24, 40, 112, 160]
self.channels = [40, 64, 180, 256]
def forward(self, x):
x = self.conv_stem(x)
x = self.bn1(x)
x = self.act1(x)
feature_maps = []
for idx, block in enumerate(self.blocks):
x = block(x)
if idx in [2,4,6,8]:
feature_maps.append(x)
return feature_maps
# def forward_ori(self, x):
# x = self.conv_stem(x)
# x = self.bn1(x)
# x = self.act1(x)
# x = self.blocks(x)
# x = self.global_pool(x)
# x = self.conv_head(x)
# x = self.act2(x)
# x = x.view(x.size(0), -1)
# if self.dropout > 0.:
# x = F.dropout(x, p=self.dropout, training=self.training)
# x = self.classifier(x)
# x = x.squeeze()
# return x
def reparameterize(self):
for _, module in self.named_modules():
if isinstance(module, GhostModule):
module.reparameterize()
if isinstance(module, GhostBottleneck):
module.reparameterize()
@register_model
def ghostnetv3(**kwargs):
"""
Constructs a GhostNet model
"""
block_specs = [
# k, t, c, SE, s
# stage1
[[3, 16, 16, 0, 1]],
# stage2
[[3, 48, 24, 0, 2]],
[[3, 72, 24, 0, 1]],
# stage3
[[5, 72, 40, 0.25, 2]],
[[5, 120, 40, 0.25, 1]],
# stage4
[[3, 240, 80, 0, 2]],
[[3, 200, 80, 0, 1],
[3, 184, 80, 0, 1],
[3, 184, 80, 0, 1],
[3, 480, 112, 0.25, 1],
[3, 672, 112, 0.25, 1]
],
# stage5
[[5, 672, 160, 0.25, 2]],
[[5, 960, 160, 0, 1],
[5, 960, 160, 0.25, 1],
[5, 960, 160, 0, 1],
[5, 960, 160, 0.25, 1]
]
]
model = GhostNet(block_specs, **kwargs) #num_classes=4, width=1.6, dropout=0.2)
return model
模型注册以及引入
需要在./ultralytics-8.2.54/ultralytics/nn/modules/init.py文件中引入ghostnetv3
task.py文件的修改
(如果不了解这个文件是做啥的,推荐学习一下https://www.bilibili.com/video/BV1QC4y1R74t/?spm_id_from=333.999.top_right_bar_window_default_collection.content.click)
# Ultralytics YOLO 🚀, AGPL-3.0 license
import contextlib
from copy import deepcopy
from pathlib import Path
import torch
import torch.nn as nn
from ultralytics.nn.modules import (
C2f_UIB,
mobilenetv4_conv_large,
ghostnetv3,
AIFI,
C1,
C2,
C3,
C3TR,
ELAN1,
OBB,
PSA,
SPP,
SPPELAN,
SPPF,
AConv,
ADown,
Bottleneck,
BottleneckCSP,
C2f,
C2fAttn,
C2fCIB,
C3Ghost,
C3x,
CBFuse,
CBLinear,
Classify,
Concat,
Conv,
Conv2,
ConvTranspose,
Detect,
DWConv,
DWConvTranspose2d,
Focus,
GhostBottleneck,
GhostConv,
HGBlock,
HGStem,
ImagePoolingAttn,
Pose,
RepC3,
RepConv,
RepNCSPELAN4,
RepVGGDW,
ResNetLayer,
RTDETRDecoder,
SCDown,
Segment,
WorldDetect,
v10Detect,
)
from ultralytics.utils import DEFAULT_CFG_DICT, DEFAULT_CFG_KEYS, LOGGER, colorstr, emojis, yaml_load
from ultralytics.utils.checks import check_requirements, check_suffix, check_yaml
from ultralytics.utils.loss import (
E2EDetectLoss,
v8ClassificationLoss,
v8DetectionLoss,
v8OBBLoss,
v8PoseLoss,
v8SegmentationLoss,
)
from ultralytics.utils.plotting import feature_visualization
from ultralytics.utils.torch_utils import (
fuse_conv_and_bn,
fuse_deconv_and_bn,
initialize_weights,
intersect_dicts,
make_divisible,
model_info,
scale_img,
time_sync,
)
try:
import thop
except ImportError:
thop = None
class BaseModel(nn.Module):
"""The BaseModel class serves as a base class for all the models in the Ultralytics YOLO family."""
def forward(self, x, *args, **kwargs):
"""
Forward pass of the model on a single scale. Wrapper for `_forward_once` method.
Args:
x (torch.Tensor | dict): The input image tensor or a dict including image tensor and gt labels.
Returns:
(torch.Tensor): The output of the network.
"""
if isinstance(x, dict): # for cases of training and validating while training.
return self.loss(x, *args, **kwargs)
return self.predict(x, *args, **kwargs)
def predict(self, x, profile=False, visualize=False, augment=False, embed=None):
"""
Perform a forward pass through the network.
Args:
x (torch.Tensor): The input tensor to the model.
profile (bool): Print the computation time of each layer if True, defaults to False.
visualize (bool): Save the feature maps of the model if True, defaults to False.
augment (bool): Augment image during prediction, defaults to False.
embed (list, optional): A list of feature vectors/embeddings to return.
Returns:
(torch.Tensor): The last output of the model.
"""
if augment:
return self._predict_augment(x)
return self._predict_once(x, profile, visualize, embed)
def _predict_once(self, x, profile=False, visualize=False, embed=None):
"""
Perform a forward pass through the network.
Args:
x (torch.Tensor): The input tensor to the model.
profile (bool): Print the computation time of each layer if True, defaults to False.
visualize (bool): Save the feature maps of the model if True, defaults to False.
embed (list, optional): A list of feature vectors/embeddings to return.
Returns:
(torch.Tensor): The last output of the model.
"""
y, dt, embeddings = [], [], [] # outputs
for m in self.model:
if m.f != -1: # if not from previous layer
x = y[m.f] if isinstance(m.f, int) else [x if j == -1 else y[j] for j in m.f] # from earlier layers
if profile:
self._profile_one_layer(m, x, dt)
if hasattr(m, 'backbone'):
x = m(x)
for _ in range(5 - len(x)):
x.insert(0, None)
for i_idx, i in enumerate(x):
if i_idx in self.save:
y.append(i)
else:
y.append(None)
x = x[-1]
else:
x = m(x) # run
y.append(x if m.i in self.save else None) # save output
if visualize:
feature_visualization(x, m.type, m.i, save_dir=visualize)
if embed and m.i in embed:
embeddings.append(nn.functional.adaptive_avg_pool2d(x, (1, 1)).squeeze(-1).squeeze(-1)) # flatten
if m.i == max(embed):
return torch.unbind(torch.cat(embeddings, 1), dim=0)
return x
def _predict_augment(self, x):
"""Perform augmentations on input image x and return augmented inference."""
LOGGER.warning(
f"WARNING ⚠️ {
self.__class__.__name__} does not support augmented inference yet. "
f"Reverting to single-scale inference instead."
)
return self._predict_once(x)
def _profile_one_layer(self, m, x, dt):
"""
Profile the computation time and FLOPs of a single layer of the model on a given input. Appends the results to
the provided list.
Args:
m (nn.Module): The layer to be profiled.
x (torch.Tensor): The input data to the layer.
dt (list): A list to store the computation time of the layer.
Returns:
None
"""
c = m == self.model[-1] and isinstance(x, list) # is final layer list, copy input as inplace fix
flops = thop.profile(m, inputs=[x.copy() if c else x], verbose=False)[0] / 1e9 * 2 if thop else 0 # GFLOPs
t = time_sync()
for _ in range(10):
m(x.copy() if c else x)
dt.append((time_sync() - t) * 100)
if m == self.model[0]:
LOGGER.info(f"{
'time (ms)':>10s} {
'GFLOPs':>10s} {
'params':>10s} module")
LOGGER.info(f"{
dt[-1]:10.2f} {
flops:10.2f} {
m.np:10.0f} {
m.type}")
if c:
LOGGER.info(f"{
sum(dt):10.2f} {
'-':>10s} {
'-'