MTCNN中生成positive,negative,part样本python代码解读

最新推荐文章于 2024-03-19 18:23:28 发布

intjun

最新推荐文章于 2024-03-19 18:23:28 发布

阅读量2k

点赞数

分类专栏： MTCNN

MTCNN 专栏收录该内容

2 篇文章

订阅专栏

本文介绍了一个用于生成MTCNN人脸检测训练样本的Python脚本gen_48net_data2.py。该脚本从原始图片集中随机裁剪不同类型的样本（正面样本、部分样本、负面样本），并对这些样本进行预处理，包括缩放和标注偏移量等，以便于后续的人脸检测网络训练。

摘要生成于 C知道，由 DeepSeek-R1 满血版支持，前往体验 >

最近跑通了MTCNN的训练代码，对其中生成positive,negative,part样本gen_48net_data2.py代码进行解读。

项目地址：https://github.com/dlunion/mtcnn

对应代码地址：https://github.com/dlunion/mtcnn/blob/master/train/gen_48net_data2.py


import sys
sys.path.append(‘D:\\Anaconda2\\libs’)    # 在windows系统上，导入python库目录
import numpy as np
import cv2
import os
import numpy.random as npr
from utils import IoU
 
# stdsize：随机crop的窗口大小，positive,negative,part样本都对应此大小
stdsize = 48    
anno_file = “E:/face_alignment/data/CelebA/Anno/mtcnn_train_label_2.txt”
im_dir = “E:/face_alignment/data/CelebA/Img/img_celeba.7z/img_celeba/”
pos_save_dir = str(stdsize) + “/positive”
part_save_dir = str(stdsize) + “/part”
neg_save_dir = str(stdsize) + ‘/negative’
save_dir = “./” + str(stdsize)
 
# 生成文件夹函数
def mkr(dr):
    if not os.path.exists(dr):
        os.mkdir(dr)
 
mkr(save_dir)
mkr(pos_save_dir)
mkr(part_save_dir)
mkr(neg_save_dir)
 
# 打开保存pos,neg,part文件名、标签的txt文件，这三个是生成文件
f1 = open(os.path.join(save_dir, ‘pos_’ + str(stdsize) + ‘.txt’), ‘w’)
f2 = open(os.path.join(save_dir, ‘neg_’ + str(stdsize) + ‘.txt’), ‘w’)
f3 = open(os.path.join(save_dir, ‘part_’ + str(stdsize) + ‘.txt’), ‘w’)
# 打开原始图片标注txt文件
with open(anno_file, ‘r’) as f:
    annotations = f.readlines()
num = len(annotations)
print “%d pics in total” % num
p_idx = 0 # positive
n_idx = 0 # negative
d_idx = 0 # part
idx = 0
box_idx = 0
 
# 原始图片根据标注的bbox，生成negative,posotive,part图片，标注形式也做相应变化
for annotation in annotations:    #逐行读取，按作者的方式，每行为一个原图
    annotation = annotation.strip().split(’ ‘)    #对读取的每一行，按空格进行切片
    im_path = annotation[0]    # 第1个为图片名
    bbox = map(float, annotation[1:-10]) #第2个~~倒数第11个为bbox
    # pts = map(float, annotation[5:])
    pts = map(float, annotation[-10:])  #倒数第10个~~倒数第1个为landmark
  # 对bbox进行reshape，4个一列
    boxes = np.array(bbox, dtype=np.float32).reshape(-1, 4) 
    im_path = im_dir + im_path  #图片地址拼接
    img = cv2.imread(im_path)  #读取图片
    idx += 1
    if idx % 100 == 0:
        print idx, “images done”
 
    height, width, channel = img.shape
 
    neg_num = 0
  # 生成nagative，每个原图生成100个negative sample
    while neg_num < 100:
      # size表示neg样本大小，在40和min(width, height)/2之间随机取一个整数
        size = npr.randint(40, min(width, height) / 2)
     # neg的左上角坐标(x1,y1)，在0和(width - size)之间随机取一个整数
        nx = npr.randint(0, width - size)
        ny = npr.randint(0, height - size)
        # 随机生成的bbox位置(x1,y1),(x2,y2)
        crop_box = np.array([nx, ny, nx + size, ny + size])
 
      # 计算随机生成的bbox和原图中所有标注bboxs的交并比
        Iou = IoU(crop_box, boxes)
 
     # 在原图中crop对应的区域图片，作为negative sample
        cropped_im = img[ny : ny + size, nx : nx + size, :]
        # 对crop的图像进行resize，大小为stdsize*stdsize
        resized_im = cv2.resize(cropped_im, (stdsize, stdsize), interpolation=cv2.INTER_LINEAR)
 
     # 如果crop_box与所有boxes的Iou都小于0.3，那么认为它是nagative sample
        if np.max(Iou) < 0.3:
            # Iou with all gts must below 0.3
            # 保存图片的地址和图片名
            save_file = os.path.join(neg_save_dir, “%s.jpg”%n_idx)
          # 往neg_48.txt文件中写入该negative样本的图片地址和名字，分类标签
            f2.write(str(stdsize)+“/negative/%s”%n_idx + ’ 0\n’)
          # 保存该负样本图片
            cv2.imwrite(save_file, resized_im)
            n_idx += 1
            neg_num += 1
 
    backupPts = pts[:]  # 该列表用于landmark      
    for box in boxes:  #逐行读取，每次循环处理一个box
        # box (x_left, y_top, x_right, y_bottom)
        x1, y1, x2, y2 = box
        w = x2 - x1 + 1
        h = y2 - y1 + 1
 
        # 忽略小脸
        # in case the ground truth boxes of small faces are not accurate
        if max(w, h) < 12 or x1 < 0 or y1 < 0:
            continue
 
        # 生成 positive examples and part faces
        # 每个box随机生成50个box，Iou>=0.65的作为positive examples，0.4<=Iou<0.65的作为part faces，其他忽略
        for i in range(50):
            pts = backupPts[:]
         # size表示随机生成样本的大小，在int(min(w, h) * 0.8) 和 np.ceil(1.25 * max(w, h)) 之间
            size = npr.randint(int(min(w, h) * 0.8), np.ceil(1.25 * max(w, h)))
 
            # delta 表示相对于标注box center的偏移量
            delta_x = npr.randint(-w * 0.2, w * 0.2)
            delta_y = npr.randint(-h * 0.2, h * 0.2)
 
           # nx,ny表示偏移后的box坐标位置
            nx1 = max(x1 + w / 2 + delta_x - size / 2, 0)
            ny1 = max(y1 + h / 2 + delta_y - size / 2, 0)
            nx2 = nx1 + size
            ny2 = ny1 + size
 
           # 去掉超出原图的box
            if nx2 > width or ny2 > height:
                continue
            crop_box = np.array([nx1, ny1, nx2, ny2])
          
            #bbox偏移量的计算，由 x1 = nx1 + float(size)*offset_x1 推导而来，可以参考bounding box regression博客 
            offset_x1 = (x1 - nx1) / float(size)
            offset_y1 = (y1 - ny1) / float(size)
            offset_x2 = (x2 - nx1) / float(size)
            offset_y2 = (y2 - ny1) / float(size)
 
            # landmark偏移量的计算，即landmark相对于随机生成bbox的归一化相对位置。
            for k in range(len(pts) / 2):
                pts[k*2] = (pts[k*2] - nx1) / float(size);
                pts[k*2+1] = (pts[k*2+1] - ny1) / float(size);
 
            cropped_im = img[int(ny1) : int(ny2), int(nx1) : int(nx2), :]
            resized_im = cv2.resize(cropped_im, (stdsize, stdsize), interpolation=cv2.INTER_LINEAR)
 
          # 将box reshape为一行
            box_ = box.reshape(1, -1)
            # Iou>=0.65的作为positive examples
            if IoU(crop_box, box_) >= 0.65:
                save_file = os.path.join(pos_save_dir, “%s.jpg”%p_idx)
             # 将图片路径，类别，偏移量写入到positive_48.txt文件中
                f1.write(str(stdsize)+“/positive/%s”%p_idx + ’ 1 %f %f %f %f’%(offset_x1, offset_y1, offset_x2, offset_y2))
                
               # 将landmark写入到positive_48.txt文件中
                for k in range(len(pts)):
                    f1.write(” %f” % pts[k])
                f1.write(“\n”)
                cv2.imwrite(save_file, resized_im)
                p_idx += 1
              
           # 0.4<=Iou<0.65的作为part faces
            elif IoU(crop_box, box_) >= 0.4:
                save_file = os.path.join(part_save_dir, “%s.jpg”%d_idx)
                f3.write(str(stdsize)+“/part/%s”%d_idx + ’ -1 %f %f %f %f’%(offset_x1, offset_y1, offset_x2, offset_y2))
 
                for k in range(len(pts)):
                    f3.write(” %f” % pts[k])
                f3.write(“\n”)
                cv2.imwrite(save_file, resized_im)
                d_idx += 1
 
        box_idx += 1
        print “%s images done, pos: %s part: %s neg: %s”%(idx, p_idx, d_idx, n_idx)
 
f1.close()
f2.close()
f3.close()