Multi-task learning of facial landmarks and attributes with Tensorflow（一）

最新推荐文章于 2022-03-29 21:10:19 发布

转载最新推荐文章于 2022-03-29 21:10:19 发布 · 273 阅读

python 同时被 2 个专栏收录

142 篇文章

订阅专栏

人脸识别

38 篇文章

订阅专栏

本文介绍MTFL人脸数据库的预处理方法，包括图像resize、坐标缩放、属性编码及图像翻转增强，适用于人脸特征点定位、性别、微笑、眼镜和姿态的多任务学习。

摘要生成于 C知道，由 DeepSeek-R1 满血版支持，前往体验 >

数据集下载链接：

http://mmlab.ie.cuhk.edu.hk/projects/TCDCN.html

github地址：

https://github.com/Mimsyy/Deep-learning-MTFL

Multi-Task Facial Landmark (MTFL人脸数据库)，这个数据库包括了12,995张人脸图片，每张图片都被做了一些标注。包括
（1）5个人脸特征点，包括左眼、右眼、鼻子、左嘴角、右嘴角。
（2）标记性别、微笑、眼镜、姿态。

标记数据格式

数据库被分成两个部分：training和tesing，标记的信息全部存放在txt文件中（traning和testing文件）。
文件中的每一行为一张人脸图片的标记信息，他的格式如下：
image path x1...x5,y1..y5 gender smile wearing glasses head pose
--x1...x5,y1...y5: 标记了左眼、右眼、鼻子、左嘴角、右嘴角的坐标位置。
--gender: 1代表男, 2代表女
--smile: 1代表微笑, 2代表不微笑。
--glasses: 1代表戴眼镜, 2代表没戴眼镜。
--head pose: 1 for left profile，2 for left, 3 for frontal, 4 for right, 5 for right profile

对数据预处理文件的解读


 
   
    
   
   
    
     from PIL 
     import Image
    
   

   
    
   
   
    
     import os
    
   

   
    
   
   
    
     import numpy 
     as np
    
   

   
    
   
   
    
     #数据集存放地址
    
   

   
    
   
   
    
     folder = os.path.abspath(os.path.join(
     "./", os.pardir)+
     "/MTFL") + 
     "/"
    
   

   
    
   
   
    
     #最终要将图全部resize成这个大小150*150
    
   

   
    
   
   
    
     finalSize = 
     150
    
   

   
    
   
   
    
     counter = 
     0
    
   

   
    
   
   
    
     infoFiles = [
     "training.txt", 
     "testing.txt"]
    
   

   
    
   
   
    
     validation_counter = 
     0
    
   

   
    
   
   
    
     #建立一个validation.txt文件用于存放验证数据信息
    
   

   
    
   
   
    
     val_file = open(folder + 
     "validation.txt",
     "a")
    
   

   
    
   
   
    
     #"training.txt", "testing.txt"中的信息进行处理
    
   

   
    
   
   
    
     for idx 
     in range(len(infoFiles)):
    
   

   
    
   
   
    
         info = infoFiles[idx]
    
   

   
    
   
   
        
     #当idx=0时读取training.txt，当idx=1时读取testing.txt
    
   

   
    
   
   
    
         f = open(folder+info,
     "r")
    
   

   
    
   
   
        
     #打开一个新的文件，用于写入信息
    
   

   
    
   
   
    
         fnew = open(
     "tmp", 
     'a')
    
   

   
    
   
   
        
     #当读取training.txt信息时，创建aug_training.txt文件
    
   

   
    
   
   
        
     if idx == 
     0:
    
   

   
    
   
   
    
             fnew_augmented = open(folder+
     "aug_"+info,
     'a')
    
   

   
    
   
   
    
         lines = f.readlines()
    
   

   
    
   
   
        
     for line 
     in lines:
    
   

   
    
   
   
    
             line = line.strip(
     "\n ").split(
     " ")
    
   

   
    
   
   
            
     #获取到图片名称
    
   

   
    
   
   
    
             imgName = line[
     0].replace(
     "\\", 
     "/")
    
   

   
    
   
   
            
     if imgName == 
     "":
    
   

   
    
   
   
                
     break
    
   

   
    
   
   
            
     #打开图片，读取图片的宽高
    
   

   
    
   
   
    
             img = Image.open(folder+imgName, mode=
     'r')
    
   

   
    
   
   
    
             originalWidth = img.width
    
   

   
    
   
   
    
             originalHeight = img.height
    
   

   
    
   
   
            
     if (originalWidth != originalHeight):
    
   

   
    
   
   
                
     # Skip non-square images，若高宽不一致则跳过
    
   

   
    
   
   
    
                 img.close()
    
   

   
    
   
   
                
     continue
    
   

   
    
   
   
     
    
   

   
    
   
   
            
     # Resize and check RGB，将图片resize到150*150
    
   

   
    
   
   
            
     if(originalWidth != finalSize):
    
   

   
    
   
   
    
                 img = img.resize((finalSize, finalSize), Image.ANTIALIAS)    
     # Image.ANTIALIAS代表高质量
    
   

   
    
   
   
     
    
   

   
    
   
   
                
     # Check if img is RGB or greyscale，若图片不是彩色图，则跳过
    
   

   
    
   
   
    
                 pixels = list(img.getdata())
    
   

   
    
   
   
    
                 width, height = img.size
    
   

   
    
   
   
    
                 pixelsArray = np.asarray([pixels[i * width:(i + 
     1) * width] 
     for i 
     in range(height)])
    
   

   
    
   
   
                
     if len(pixelsArray.shape) != 
     3:
    
   

   
    
   
   
                    
     # img is greyscale, skip it
    
   

   
    
   
   
    
                     img.close()
    
   

   
    
   
   
                    
     continue
    
   

   
    
   
   
     
    
   

   
    
   
   
    
             coords = []
    
   

   
    
   
   
            
     # 图片的缩放比例，若大小没有被调整该系数为1，若图像大小调整，同比缩放坐标数据，记录在coords中line[i][1]-line[i][10]
    
   

   
    
   
   
            
     # 对应x1,x2,x3,x4,x5,y1,y2,y3,y4,y5: 标记了左眼、右眼、鼻子、左嘴角、右嘴角的坐标位置
    
   

   
    
   
   
    
             coordsScaleFactor = float(finalSize) / float(originalWidth)
    
   

   
    
   
   
            
     for i 
     in range(
     1,
     11):
    
   

   
    
   
   
    
                 coords.append(float(line[i])*coordsScaleFactor)
    
   

   
    
   
   
            
     #属性gender,smile,glasses,head pose分别对应每行line[i][11]-line[i][14]
    
   

   
    
   
   
    
             attributes = np.array([int(line[i]) 
     for i 
     in range(
     11,
     15)])
    
   

   
    
   
   
     
    
   

   
    
   
   
            
     #对testing.txt进行读取，生成1000个用于验证的图片信息于validation.txt中
    
   

   
    
   
   
            
     if(idx == 
     1 
     and validation_counter <= 
     1000):
    
   

   
    
   
   
                
     #line[i][0]
    
   

   
    
   
   
    
                 val_file.write(imgName)
    
   

   
    
   
   
                
     #写入特征点信息
    
   

   
    
   
   
                
     for coord 
     in coords:
    
   

   
    
   
   
    
                     val_file.write(
     " " + str(coord))
    
   

   
    
   
   
                
     #写入属性信息
    
   

   
    
   
   
                
     for attribute 
     in attributes:
    
   

   
    
   
   
    
                     val_file.write(
     " " + str(attribute - 
     1)) 
     # Subtract 1 for better indexing
    
   

   
    
   
   
    
                 val_file.write(
     "\n")
    
   

   
    
   
   
    
                 validation_counter += 
     1
    
   

   
    
   
   
            
     # '''
    
   

   
    
   
   
            
     #         --gender: 1代表男, 2代表女
    
   

   
    
   
   
            
     #         --smile: 1代表微笑, 2代表不微笑。
    
   

   
    
   
   
            
     #         --glasses: 1代表戴眼镜, 2代表没戴眼镜。
    
   

   
    
   
   
            
     #         --head pose: 1 for left profile，2 for left, 3 for frontal, 4 for right, 5 for right profile
    
   

   
    
   
   
            
     #         attribute - 1:是为了使索引从0开始，方便训练
    
   

   
    
   
   
            
     # '''
    
   

   
    
   
   
            
     #剩下的写入fnew中用于训练
    
   

   
    
   
   
            
     else:
    
   

   
    
   
   
                
     # Write resized img to file
    
   

   
    
   
   
    
                 fnew.write(imgName)
    
   

   
    
   
   
                
     for coord 
     in coords:
    
   

   
    
   
   
    
                     fnew.write(
     " " + str(coord))
    
   

   
    
   
   
                
     for attribute 
     in attributes:
    
   

   
    
   
   
    
                     fnew.write(
     " " + str(attribute - 
     1)) 
     # Subtract 1 for better indexing
    
   

   
    
   
   
    
                 fnew.write(
     "\n")
    
   

   
    
   
   
     
    
   

   
    
   
   
            
     # Mirror the image if it's not part of test data
    
   

   
    
   
   
            
     if idx == 
     0:
    
   

   
    
   
   
                
     # Get the new img name,生成翻转图像的名称./MTFL/lfw_5590/Aaron_Eckhart_0001.jpg-->./MTFL/lfw_5590/Aaron_Eckhart_0001_transl.jpg
    
   

   
    
   
   
    
                 splitName = imgName.split(
     '.')
    
   

   
    
   
   
    
                 imgNameTransp = splitName[
     0] + 
     '_transl.' + splitName[
     1]
    
   

   
    
   
   
     
    
   

   
    
   
   
                
     # Mirror the image and save it
    
   

   
    
   
   
                
     #左右翻转图像，存图
    
   

   
    
   
   
    
                 imgTransp = img.copy().transpose(Image.FLIP_LEFT_RIGHT)
    
   

   
    
   
   
    
                 imgTransp.save(folder+imgNameTransp)
    
   

   
    
   
   
    
                 imgTransp.close()
    
   

   
    
   
   
                
     #生成对应翻转图像特征点及属性特征
    
   

   
    
   
   
    
                 coordsTransp = [
     0 
     for i 
     in range(
     10)]
    
   

   
    
   
   
                
     # Translate x-coords for eyes, nose, and mouth
    
   

   
    
   
   
    
                 coordsTransp[
     0] = 
     150 - coords[
     1]    
     #左眼睛x1
    
   

   
    
   
   
    
                 coordsTransp[
     1] = 
     150 - coords[
     0]    
     #右眼睛x2
    
   

   
    
   
   
    
                 coordsTransp[
     2] = 
     150 - coords[
     2]    
     #鼻子x3
    
   

   
    
   
   
    
                 coordsTransp[
     3] = 
     150 - coords[
     4]    
     #左嘴角X4
    
   

   
    
   
   
    
                 coordsTransp[
     4] = 
     150 - coords[
     3]    
     #右嘴角x5
    
   

   
    
   
   
                
     # Translate y-coords for eyes, nose, and mouth
    
   

   
    
   
   
    
                 coordsTransp[
     5] = coords[
     6]          
     #左眼睛y1
    
   

   
    
   
   
    
                 coordsTransp[
     6] = coords[
     5]          
     #右眼睛y2
    
   

   
    
   
   
    
                 coordsTransp[
     7] = coords[
     7]          
     #鼻子y3
    
   

   
    
   
   
    
                 coordsTransp[
     8] = coords[
     9]          
     #左嘴角y4
    
   

   
    
   
   
    
                 coordsTransp[
     9] = coords[
     8]          
     #右嘴角y5
    
   

   
    
   
   
                
     # Translate attributes  属性变换gender,smile,glasses,head pose
    
   

   
    
   
   
    
                 attributesTransp = np.array([int(line[i]) 
     for i 
     in range(
     11,
     15)])
    
   

   
    
   
   
    
                 attributesTransp[
     3] = 
     6 - attributesTransp[
     3] 
     # Translate head：1 for left profile，2 for left, 3 for frontal, 4 for right, 5 for right profile
    
   

   
    
   
   
     
    
   

   
    
   
   
                
     # Write resized old img to augmented file，写入原图的图像信息
    
   

   
    
   
   
    
                 fnew_augmented.write(imgName)
    
   

   
    
   
   
                
     for coord 
     in coords:
    
   

   
    
   
   
    
                     fnew_augmented.write(
     " " + str(coord))
    
   

   
    
   
   
                
     for attribute 
     in attributes:
    
   

   
    
   
   
    
                     fnew_augmented.write(
     " " + str(attribute - 
     1))
    
   

   
    
   
   
    
                 fnew_augmented.write(
     "\n")
    
   

   
    
   
   
     
    
   

   
    
   
   
                
     # Write mirrored img to augmented file，写入翻转后的图像信息
    
   

   
    
   
   
    
                 fnew_augmented.write(imgNameTransp)
    
   

   
    
   
   
                
     for coord 
     in coordsTransp:
    
   

   
    
   
   
    
                     fnew_augmented.write(
     " " + str(coord))
    
   

   
    
   
   
                
     for attribute 
     in attributesTransp:
    
   

   
    
   
   
    
                     fnew_augmented.write(
     " " + str(attribute - 
     1))
    
   

   
    
   
   
    
                 fnew_augmented.write(
     "\n")
    
   

   
    
   
   
     
    
   

   
    
   
   
            
     # Save resized img 存的都是resize到150*150的图片
    
   

   
    
   
   
    
             img.save(folder+imgName)
    
   

   
    
   
   
    
             img.close()
    
   

   
    
   
   
            
    
   

   
    
   
   
    
             counter = counter + 
     1
    
   

   
    
   
   
            
     if counter % 
     1000 == 
     0:
    
   

   
    
   
   
    
                 print(counter,
     "files processed")
    
   

   
    
   
   
        
     #对"training.txt"/"testing.txt"中的每一行读完之后，就关闭该文件
    
   

   
    
   
   
    
         f.close()
    
   

   
    
   
   
        
     #对"training.txt"/"testing.txt"中的每一行数据进行操作完成，写入完毕后，就关闭该"tmp"文件
    
   

   
    
   
   
    
         fnew.close()
    
   

   
    
   
   
        
     #关闭aug_training.txt
    
   

   
    
   
   
        
     if idx == 
     0:
    
   

   
    
   
   
    
             fnew_augmented.close()
    
   

   
    
   
   
        
     #删除之前的"training.txt"/"testing.txt"
    
   

   
    
   
   
    
         os.remove(folder+info)
    
   

   
    
   
   
        
     #将tmp重命名为"training.txt"/"testing.txt"
    
   

   
    
   
   
    
         os.rename(
     "tmp",folder+info)
    
   

   
    
   
   
    
     #都循环完毕后，关闭validation.txt
    
   

   
    
   
   
    
     val_file.close()