前言
人脸关键点检测是人脸后续处理算法的基础。在本文,基于kaggle的关键点检测的挑战赛尝试了对人脸15个关键点进行检测。
一、数据介绍
1. kaggle比赛介绍
本文章的数据来自五年前kaggle比赛中检测面部关键点的比赛。当时比赛的目的希望通过人脸的关键点检测实现: 1. 实现对视频和图像中的人脸的追踪 2.分析面部表情 3. 检测畸形面部体征以进行医学诊断 4. 生物识别/人脸识别 。检测面部关键点是一个非常具有挑战性的问题。 面部特征因人而异,即使对于单个人,也会因 3D 姿势、大小、位置、视角和光照条件而存在大量差异。 计算机视觉研究在解决这些困难方面取得了长足的进步,但仍有许多改进的机会。
比赛链接:https://www.kaggle.com/c/facial-keypoints-detection
2. 数据集介绍
标注方式描述: 每个关键点都由像素空间中的(x,y)实值对指定。每个人脸对象对应15个关键点,分别代表人脸的如下元素:left_eye_center, right_eye_center, left_eye_inner_corner, left_eye_outer_corner, right_eye_inner_corner, right_eye_outer_corner, left_eyebrow_inner_end, left_eyebrow_outer_end, right_eyebrow_inner_end, right_eyebrow_outer_end, nose_tip, mouth_left_corner, mouth_right_corner, mouth_center_top_lip, mouth_center_bottom_lip。每张图片被保存为96*96的大小,用0~255的数值表示对应像素点的灰度值。
数据格式描述:
训练集: 一共包含7049张图片。每一行包含了对应的15个关键点坐标(x,y)和对应图片像素集合对应的行向量。
测试集: 一共包含1783张图片。每一行对应了图片的序号和像素集合
数据集链接:https://www.kaggle.com/c/facial-keypoints-detection/data
(温馨提示:下载前需要先注册kaggle账号)
由于测试集的标签稍微给出,在本文中仅对训练集进行划分。
二、数据清洗
1. 数据集清洗
在kaggle提供的训练集中有相当一部分的关键点是空缺的,所以需要先对数据中的无效数据进行清除。先读取提高的训练集,并保存为train_without_nan.csv
2. 数据可视化
使用该操作对训练集中的部分数据进行读取,并将其可视化出来。
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import matplotlib.pyplot as plt
train_dir = 'training.csv' # 读取训练图片数据集
train_data = pd.read_csv(train_dir)
print("Size of train data: " + str(len(train_data)) + '*' + str(len(train_data.columns))) #查看训练集尺寸
print(train_data.info()) # 查看训练集信息
train = train_data.dropna() # 删除缺失行
train.info()
train.to_csv('train_without_nan.csv') # 将删除了缺失项的序列进行保存
X=[]
Y=[]
for img in train['Image']: # 读取图片序列
X.append(np.asarray(img.split(),dtype=float).reshape(96,96,1))
X=np.reshape(X,(-1,96,96,1))
X = np.asarray(X).astype('float32')
for i in range(len((train))): # 读取标签
Y.append(np.asarray(train.iloc[i][0:30].to_numpy()))
Y = np.asarray(Y).astype('float32')
disp=8
fig,axes=plt.subplots(int((disp+3)/4),4,figsize=(15,10))
for i in range(disp):
axes[int(i/4),i%4].axis('off')
axes[int(i/4),i%4].imshow(X[i+8].reshape(96,96),cmap='gray')
axes[int(i/4),i%4].scatter([train[train.columns[2*j]][i+8] for j in range(15)],[train[train.columns[2*j+1]][i+8] for j in range(15)],s=10,c='r')
plt.savefig('emotion_face_temp.jpg',dpi=500)
三、模型训练和测试
使用了warm up的学习率调整策略,使用resnet作为特征提取的主干网络,预先导入了选练权重,使用全连接层将1000个输出转换了30个坐标值的输出。
# -*-coding:utf-8-*-
import numpy as np
import torch
import pandas as pd
import matplotlib.pyplot as plt
import cv2
from math import sin, cos, pi
from keypoint_detect_network import FACERegNet
from dataset import Face_dataset,FacialKeypoiuntsTrainDataset,FacialKeypoiuntsTestDataset
from torch import nn, optim
from torch.utils.data import DataLoader,random_split,ConcatDataset
from torchvision.transforms import transforms
# from transform import RandomTranslation,RandomVerticalFlip,ToTensor
from transform import *
from predict import *
import math
import warnings
# from torchvision.models import resnet34,mobilenetv3
from model_v3 import mobilenet_v3_large,MobileNetV3,mobilenet_v3_small
warnings.filterwarnings('ignore')
from tqdm import tqdm
#-----------------------------------------#
#对输入数据进行转换
#-----------------------------------------#
x_transforms = transforms.Compose([
transforms.Resize((96, 96)), # 缩小对应到UNET的输入接口大小
transforms.ToTensor(), # 转换成对应的tensor型数据
# transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])
transforms.Normalize((0.5,), (0.5,)) # 归一化操作,0.1307均值 0.3081标准差
])
#----------------------------------------#
#学习率调整函数
#----------------------------------------#
def warmup_learning_rate(optimizer,iteration): # warmup函数在上升阶段的预热函数
lr_ini = 0.0001
for param_group in optimizer.param_groups:
param_group['lr'] = lr_ini+(initial_lr - lr_ini)*iteration/100
def cosin_deacy(optimizer,lr_base,current_epoch,warmup_epoch,global_epoch): # 余弦退火函数设计
lr_new = 0.5 * lr_base*(1+np.cos(np.pi*(current_epoch - warmup_epoch)/np.float(global_epoch-warmup_epoch)))
for param_group in optimizer.param_groups:
param_group['lr'] = lr_new
def adjust_learning_rate(optimizer, gamma, step):
"""Sets the learning rate to the initial LR decayed by 10 at every
specified step
# Adapted from PyTorch Imagenet example:
# https://github.com/pytorch/examples/blob/master/imagenet/main.py
"""
lr = initial_lr * (gamma ** (step))
for param_group in optimizer.param_groups:
param_group['lr'] = lr
def warmup_learning_rate(optimizer,iteration):
lr_ini = 0.000001
for param_group in optimizer.param_groups:
param_group['lr'] = lr_ini+(initial_lr - lr_ini)*iteration/100
#-----------------------------------------#
#对数据进行训练
#-----------------------------------------#
def train(epoch_num,train_dataset):
count = 0
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
# print("ok")
criterion = nn.L1Loss()
# initial_lr = 0.0001
net = nn.Sequential(
nn.Conv2d(1,3,3,padding =1),
resnet50(pretrained=True),
nn.Linear(1000,100),
nn.Linear(100,30)
)
# net = FACERegNet(1)
net = torch.nn.DataParallel(net)
net = net.to(device)
optimizer = optim.Adam(net.parameters(), lr=initial_lr)
for epoch in range(epoch_num):
trainloader = DataLoader(train_dataset,batch_size=batch_size,shuffle=True, num_workers=num_workers)
loss_sum = 0
count = 0
aver = 0
for imgs, labels in tqdm(trainloader):
imgs = imgs.to(device)
labels = labels.to(device)
shape = labels.shape
optimizer.zero_grad()
predict = net(imgs)
outputs = predict.squeeze()
outputs = outputs.reshape(shape)
loss0 = criterion(outputs, labels)
loss0.backward()
optimizer.step()
if epoch == warmup_epoch:
lr_base = optimizer.state_dict()['param_groups'][0]['lr']
if epoch >= warmup_epoch:
cosin_deacy(optimizer, lr_base, epoch,warmup_epoch, global_epoch)
loss_sum += loss0.item()
count = count + 1
aver = loss_sum / count
print("#------------------------------#")
print("epoch:%d aver_loss:%.5f"%(epoch,aver))
print("current learning_rate:%.9f"%optimizer.param_groups[0]["lr"])
torch.save(net.state_dict(), "./weights/train_resnet50.pth")
print("Finish Training...")
def test(test_dataset,weight_path):
# net = FACERegNet(1)
net = nn.Sequential(
nn.Conv2d(1,3,3,padding =1),
mobilenet_v3_small(),
nn.Linear(1000,100),
nn.Linear(100,30)
)
net = torch.nn.DataParallel(net)
net = net.to(device)
net.load_state_dict(torch.load(weight_path))
count = 0
MSE = []
for img,pix in tqdm(test_dataset):
temp_sum = 0
img = img.unsqueeze(0)
img_points = net(img)
img_points = img_points.cpu().detach().numpy()
img_points_flatten = img_points.reshape(-1)
pix = pix.numpy()
pix_flatten = pix.reshape(-1)
for i in range(len(pix_flatten)):
temp = (img_points_flatten[i] - pix_flatten[i])**2
temp_sum = temp_sum + temp
aver_temp = temp_sum / len(pix_flatten)
rmse = math.sqrt(aver_temp)
MSE.append(rmse)
count = count + 1
print('#----image_id:%d------MSE:%5f----------------------#'%(count+1,rmse))
aver_MSE = np.mean(np.array(MSE))
print("#aver_mse of %d images is: %5f-------------------------#"%(len(test_dataset),aver_MSE))
if __name__ == '__main__':
#-----------------------------------------#
#读取数据
#-----------------------------------------#
all_dataset = FacialKeypoiuntsTrainDataset('train_pure.csv',
transform=transforms.Compose([
# RandomVerticalFlip(0.5),
# RandomTranslation((0.2, 0.2)),
ToTensor(),
])
)
torch.manual_seed(0)
total_length = len(all_dataset)
test_length = np.int(total_length/5) # 将训练集和测试集按1:4划分
train_length = total_length-test_length
train_dataset,test_dataset = random_split(dataset=all_dataset,lengths=[train_length,test_length])
#-----------------------------------------#
#初始权重设置
#-----------------------------------------#
initial_lr = 0.001
batch_size = 50
global_epoch = 2000
warmup_epoch = 200
num_workers = 16
weight_path = "./weights/train_resnet50.pth"
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
train(global_epoch,train_dataset)
test(test_dataset,weight_path)
结果
如果感觉有用,麻烦各位大佬点个赞,谢谢,best wishes!