Pytorch|YOWO原理及代码详解(一)
阅前可看:YOWO论文翻译
YOWO很有趣,使用价值很大,最近刚好需要,所以就研究一下。一直认为只有把源码看懂,才知道诸多细节,才算真正了解一个算法。笔者能力有限,博文若有出错,欢迎指正交流。
这次为了方便debug,所以就稍微改动了train.py 文件,修改为myTrain.py,代码分析就从这里开始,但在之前需要完成各项配置。
1.训练之前需要的工作。
1.1 ucf101-24数据集
ucf101-24数据集下载。论文使用了两个数据集,本次代码分析只使用ucf24数据集。
1.2 基础骨干网络预训练模型
有两个,第一个是2d网络yolov2。还有一个是3d网络ResNeXt ve ResNet。本次代码分析使用:“resnext-101-kinetics.pth”。
1.3 YOWO网络预训练模型
作者放百度云了,密码:95mm。
1.4 路径配置
基础骨干网络放到“weight”中,ucf24路径随意,但记得需要在ucf24.data中进行修改,如下:
2. 准备开始训练
首先附上myTrain.py的完整代码:
from __future__ import print_function
import sys
import time
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torch.backends.cudnn as cudnn
from torchvision import datasets, transforms
import dataset
import random
import math
import os
from opts import parse_opts
from utils import *
from cfg import parse_cfg
from region_loss import RegionLoss
from model import YOWO, get_fine_tuning_parameters
import argparse
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("--dataset", type=str, default="ucf101-24", help="dataset")
parser.add_argument("--data_cfg", type=str, default="cfg/ucf24.data ", help="data_cfg")
parser.add_argument("--cfg_file", type=str, default="cfg/ucf24.cfg ", help="cfg_file")
parser.add_argument("--n_classes", type=int, default=24, help="n_classes")
parser.add_argument("--backbone_3d", type=str, default="resnext101", help="backbone_3d")
parser.add_argument("--backbone_3d_weights", type=str, default="weights/resnext-101-kinetics.pth", help="backbone_3d_weights")
parser.add_argument("--backbone_2d", type=str, default="darknet", help="backbone_3d_weights")
parser.add_argument("--backbone_2d_weights", type=str, default="weights/yolo.weights", help="backbone_2d_weights")
parser.add_argument("--freeze_backbone_2d", type=bool, default=True, help="freeze_backbone_2d")
parser.add_argument("--freeze_backbone_3d", type=bool, default=True, help="freeze_backbone_3d")
parser.add_argument("--evaluate", type=bool, default=False, help="evaluate")
parser.add_argument("--begin_epoch", type=int, default=0, help="begin_epoch")
parser.add_argument("--end_epoch", type=int, default=4, help="evaluate")
opt = parser.parse_args()
# opt = parse_opts()
# which dataset to use
dataset_use = opt.dataset
assert dataset_use == 'ucf101-24' or dataset_use == 'jhmdb-21', 'invalid dataset'
# path for dataset of training and validation
datacfg = opt.data_cfg
# path for cfg file
cfgfile = opt.cfg_file
data_options = read_data_cfg(datacfg)
net_options = parse_cfg(cfgfile)[0]
# obtain list for training and testing
basepath = data_options['base']
trainlist = data_options['train']
testlist = data_options['valid']
backupdir = data_options['backup']
# number of training samples
nsamples = file_lines(trainlist)
gpus = data_options['gpus'] # e.g. 0,1,2,3
ngpus = len(gpus.split(','))
num_workers = int(data_options['num_workers'])
batch_size = int(net_options['batch'])
clip_duration = int(net_options['clip_duration'])
max_batches = int(net_options['max_batches'])
learning_rate = float(net_options['learning_rate'])
momentum = float(net_options['momentum'])
decay = float(net_options['decay'])
steps = [float(step) for step in net_options['steps'].split(',')]
scales = [float(scale) for scale in net_options['scales'].split(',')]
# loss parameters
loss_options = parse_cfg(cfgfile)[1]
region_loss = RegionLoss()
anchors = loss_options['anchors'].split(',')
region_loss.anchors = [float(i) for i in anchors]
region_loss.num_classes = int(loss_options['classes'])
region_loss.num_anchors = int(loss_options['num'])
region_loss.anchor_step = len(region_loss.anchors) // region_loss.num_anchors
region_loss.object_scale = float(loss_options['object_scale'])
region_loss.noobject_scale = float(loss_options['noobject_scale'])
region_loss.class_scale = float(loss_options['class_scale'])
region_loss.coord_scale = float(loss_options['coord_scale'])
region_loss.batch = batch_size
# Train parameters
max_epochs = max_batches * batch_size // nsamples + 1
use_cuda = True
seed = int(time.time())
eps = 1e-5
best_fscore = 0 # initialize best fscore
# Test parameters
nms_thresh = 0.4
iou_thresh = 0.5
if not os.path.exists(backupdir):
os.mkdir(backupdir)
# 设置随机种子
torch.manual_seed(seed)
if use_cuda:
os.environ['CUDA_VISIBLE_DEVICES'] = gpus
torch.cuda.manual_seed(seed)
# Create model
model = YOWO(opt)
model = model.cuda()
model = nn.DataParallel(model, device_ids=None) # in multi-gpu case
model.seen = 0
print(model)
parameters = get_fine_tuning_parameters(model, opt)
optimizer = optim.SGD(parameters, lr=learning_rate / batch_size, momentum=momentum, dampening=0,
weight_decay=decay * batch_size)
kwargs = {
'num_workers': num_workers, 'pin_memory': True} if use_cuda else {
}
# Load resume path if necessary
# if opt.resume_path:
# print("===================================================================")
# print('loading checkpoint {}'.format(opt.resume_path))
# checkpoint = torch.load(opt.resume_path)
# opt.begin_epoch = checkpoint['epoch']
# best_fscore = checkpoint['fscore']
# model.load_state_dict(checkpoint['state_dict'])
# optimizer.load_state_dict(checkpoint['optimizer'])
# model.seen = checkpoint['epoch'] * nsamples
# print("Loaded model fscore: ", checkpoint['fscore'])
# print("===================================================================")
region_loss.seen = model.seen
processed_batches = model.seen // batch_size
init_width = int(net_options['width'])
init_height = int(net_options['height'])
init_epoch = model.seen // nsamples
def adjust_learning_rate(optimizer, batch):
lr = learning_rate
for i in range(len(steps)):
scale = scales[i] if i < len(scales) else 1
if batch >= steps[i]:
lr = lr * scale
if batch == steps[i]:
break
else:
break
for param_group in optimizer.param_groups:
param_group['lr'] = lr / batch_size
return lr
def train(epoch):
global processed_batches
t0 = time.time()
cur_model = model.module
region_loss.l_x.reset()
region_loss.l_y.reset()
region_loss.l_w.reset()
region_loss.l_h.reset()
region_loss.l_conf.reset()
region_loss.l_cls.reset()
region_loss.l_total.reset()
train_loader = torch.utils.data.DataLoader(
dataset.listDataset(basepath, trainlist, dataset_use=dataset_use, shape=(init_width, init_height),
shuffle=True,
transform=transforms.Compose([
transforms.ToTensor(),
]),
train=True,
seen=cur_model.seen,
batch_size