图片文件处理（rename image-重命名，change ‘txt’ file，crop image-裁剪图，delete image-删除图，图片直方图均衡化，图像转视频，RGB转灰度图）

1.读取某一文件夹（包含子文件夹）下面的所有图片（如“inputpath”文件夹），修改文件夹里面所有图片的名字，并且按序命名

# This code is used to rename images.
import os
import shutil
def renameXML(inputPath,outputPath,t,i):
    if os.path.exists(outputPath) == False:
        os.makedirs(outputPath)#make directory and subdirectory
    imgorder=i
    for _,dirs,files in os.walk(inputPath):
        for f1 in files:
            if (os.path.splitext(f1)[1]== '.bmp')\
            |(os.path.splitext(f1)[1]== '.jpg'):
                temp=str(t).zfill(2)
                imgnewname='s'+temp+str(imgorder).zfill(5)+'.bmp'
                print(os.path.join(outputPath,imgnewname))
                shutil.copy(os.path.join(inputPath,f1),os.path.join(outputPath,imgnewname))
                imgorder = imgorder + 1 

if __name__=='__main__':
    for pathDir in range(1,41):
        inputpath="input/"+'s'+str(pathDir)+'/' #此处意：input文件夹下面包含了s1-s40 总共40个文件夹
        # print(inputpath)
        outputpath="output/"+'s'+str(pathDir)+'/'
        renameXML(inputpath,outputpath,pathDir,1)

2.数据集处理

1）Multi-Task Facial Landmark (MTFL) dataset中笑脸与非笑脸筛选

“training.txt”文件中的内容如下图所示。

意为：#image path #x1...x5,y1..y5 #gender #smile #wearing glasses #head pose

--x1...x5,y1...y5: the locations for left eye, right eye, nose, left mouth corner, right mouth corner.
--gender: 1 for male, 2 for female
--smile: 1 for smiling, 2 for not smiling
--glasses: 1 for wearing glasses, 2 for not wearing glasses.
--head pose: 1 for left profile, 2 for left, 3 for frontal, 4 for right, 5 for right profile

# -*- coding: UTF-8 -*-
# 程序changeXMLcontent()读取文件每一行中的第1（图片名）、12（是否是笑脸）字符串，并且找到数据库中对应的笑脸
import os
import shutil
from PIL import Image
import random
import numpy as np

def changeXMLcontent(XMLfile, imgPath, outputPath):
    with open(XMLfile, 'r', encoding="utf-8") as f_r:
        lines = f_r.readlines()
    with open(XMLfile, 'w', encoding="utf-8") as f_w:
        for line in lines:
            for _, dirs, files in os.walk(imgPath):
                for f1 in files:
                    if os.path.splitext(f1)[0] in line.split()[0] and line.split()[12] == '1':
                        print(line.split()[0], line.split()[12])#split()意为以"空格"切片，split('/')以'/'切片，具体可做实验检查
                        shutil.copy(os.path.join(imgPath, f1), os.path.join(outputPath, f1))
            f_w.write(line)

#crop the edge of face
def cutImg(imgPath, outputPath):
    for _, dirs, files in os.walk(imgPath):
        for f1 in files:
            im = Image.open(os.path.join(imgPath, f1)).convert('L')
            row, col = im.size
            print(row, col)
            if row == 400 :
                afterCrop = im.crop((row * 1.8 / 8, col * 2 / 8, row * 6.1 / 8, col * 6.4 / 8))  # 4-tuple:(left,upper,right,down)
            elif row == 250:
                afterCrop = im.crop((row * 1.8 / 8, col * 2 / 8, row * 6.1 / 8, col * 6.3 / 8))  # 4-tuple:(left,upper,right,down)
            elif row == 160:
                afterCrop = im.crop((row * 1.6 / 8, col * 1.2 / 8, row * 6.2 / 8, col * 4.4 / 8))  # 4-tuple:(left,upper,right,down)
            # afterCrop.show()
            afterCrop.save(os.path.join(outputPath, f1))

#remove image
def remove_img(smileImgPath, unsmileImgPath):
    cout = 0
    for _s, sdirs, sfiles in os.walk(smileImgPath):
        for sf1 in sfiles:
            for _us,usdirs,usfiles in os.walk(unsmileImgPath):
                for usf1 in usfiles:
                    if sf1 == usf1:
                        # print(sf1)
                        os.remove(os.path.join(unsmileImgPath, sf1))
                        cout = cout + 1
                        print(cout)

def rename(imgPath, outputPath):
    imgorder = 1
    for _, dirs, files in os.walk(imgPath):
        for f1 in files:
            grayim = Image.open(os.path.join(imgPath, f1)).convert('L') #转灰度图
            afterbalance = histeq(grayim)[0]
            # huiduim.show()
            # afterbalance.show()
            imgnewname = 's' + str(imgorder).zfill(5) + '.jpg'
            imgorder = imgorder + 1
            afterbalance.convert('RGB').save(os.path.join(outputPath, imgnewname))#必须转回'RGB'才能保存
            # shutil.copy(os.path.join(imgPath, f1), os.path.join(outputPath, imgnewname))

def histeq(im, nbr_bins=256):#直方图均衡化
    """ Histogram equalization of a grayscale image. """
    array_im = np.array(im)
    imhist, bins = np.histogram(array_im.flatten(),nbr_bins,normed=True)
    cdf = imhist.cumsum()   #累积分布函数 cumulative distribution function
    cdf = 255*cdf/cdf[-1]   #归一化(灰度变换函数)
    #使用累积分布函数的线性插值，计算新的像素值
    im2 = np.interp(array_im.flatten(), bins[:-1], cdf)
    return Image.fromarray(im2.reshape(array_im.shape)), cdf

def selectRandom(imgPath, outputPath):
    cout = 1
    selectNum = 0
    for _, dirs, files in os.walk(imgPath):
        for f1 in files:
            cout = cout + 1
    for i in range(1, 601):
        rd = random.randint(1, cout+1)
        randomname = str(rd).zfill(5)
        for _, dirs, files in os.walk(imgPath):
            for f1 in files:
                if randomname in f1:
                    shutil.move(os.path.join(imgPath, f1), os.path.join(outputPath, f1))
                    print(str(f1))
                    selectNum = selectNum + 1
    print('\n\n随机选择图片数量：'+str(selectNum))

if __name__ == '__main__':
    xmlinput = "./test/training.txt"
    outputpath = "D:/face_project/smile/"
    testpath="D:/face_project/MTFL/lfw_5590"

    # for f in ['net_7876', 'lfw_5590']:
    #     imgpath = "D:/face_project/MTFL/" + f + '/'
    #     changeXMLcontent(xmlinput, imgpath, outputpath)

    # cutImg(testpath, "D:/face_project/testoutput")
    # remove_img("D:/face_project/笑脸数据集/renamesmile", "D:/face_project/笑脸数据集/renameunsmile")
    # rename("D:/face_project/笑脸数据集/output_smile/", "D:/face_project/笑脸数据集/rename_smile")
    selectRandom("D:/face_project/笑脸数据集/rename_unsmile/", "D:/face_project/笑脸数据集/random_unsmile")

2）人脸数据集转VOC数据集

数据集中人脸标注文件（wider_face_train_bbx_gt.txt）格式如下图所示。

意为：The format of txt ground truth.
--File name
--Number of bounding box
--x1, y1, w, h, blur, expression, illumination, invalid, occlusion, pose

实现方法：将“wider_face_train_bbx_gt.txt”中的File name，x1，y1，x1+w，y1+h的值转存到VOC数据集图片对应的xml标签中。

import io
import os
import shutil

def mkfile(file, flag, *text): #*--传元组，**--传字典
    if flag == 1:
        file.write('<annotation>\n'
                   '    <folder>VOC2007</folder>\n'
                   '    <filename>')
        file.write(str(text[0]).strip('\n')+'</filename>\n') #strip('\n')--delete string '\n'
        file.write('    <source>\n'
                   '        <database>The VOC2007 Database</database>\n'
                   '        <annotation>PASCAL VOC2007</annotation>\n'
                   '        <image>flickr</image>\n'
                   '        <flickrid>NULL</flickrid>\n'
                   '    </source>\n'
                   '    <owner>\n'
                   '        <flickrid>NULL</flickrid>\n'
                   '        <name>Face</name>\n'
                   '    </owner>\n'
                   '    <size>\n'
                   '        <width>0</width>\n'
                   '        <height>0</height>\n'
                   '        <depth>3</depth>\n'
                   '    </size>\n'
                   '    <segmented>0</segmented>\n'
                   )
    if flag == 0:
        # print('str(text[0])'+str(text[0][0]))
        file.write('    <object>\n'
                   '        <name>face</name>\n'
                   '        <pose>Unspecified</pose>\n'
                   '        <truncated>0</truncated>\n'
                   '        <difficult>0</difficult>\n'
                   '        <bndbox>\n'
                   '            <xmin>'
                   + str(text[0][0]) + '</xmin>\n'
                   '            <ymin>'
                   + str(text[0][1]) + '</ymin>\n'
                   '            <xmax>'
                   + str(text[0][2]) + '</xmax>\n'
                   '            <ymax>'
                   + str(text[0][3]) + '</ymax>\n'
                   '        </bndbox>\n'
                   '    </object>\n')
    if flag == 2:
        file.write(str(text[0]))
    # file.close()

def face_generate_voc(inputFile, outputPath):
    nameflag = 0
    facenumber = 0
    with open(inputFile, 'r', encoding="utf-8") as f_r:
        lines = f_r.readlines()
    with open(inputFile, 'w', encoding="utf-8") as f_w:
        for line in lines:
            if facenumber > 0:
                facenumber = facenumber - 1 #the number of face in an image
                tup = (int(line.split()[0]), int(line.split()[1]), \
                                   int(line.split()[0])+int(line.split()[2]),\
                                   int(line.split()[1])+int(line.split()[3]))
                print(tup)
                nameflag = 0
                mkfile(file, nameflag, tup)#file解释：代码前文虽然未对file做初始化，但是后文做了初始化（逻辑上做了初始化的--后文中的file相关代码先执行）
                if facenumber == 0:
                    mkfile(file, 2, '</annotation>')  # the end of xml content

            if nameflag == 1:
                facenumber = int(line.split()[0])
                print(facenumber)
                #if facenumber > 0:
                    #f_w.write(line) #every line must be written back
                    #continue

            if '.jpg' in line:
                nameflag = 1
                #print(line)
                filename = line.split('/')[1]
                #print(line.split('/')[1])#以'/'切片将字符串分开，如'string/123.jpg',切片后：line.split('/')[1]='123.jpg'
                if os.path.exists(outputPath) == False:
                    os.makedirs(outputPath)  # make directory and subdirectory
                file = open(os.path.join(outputPath, filename.split('.')[0]+'.xml'), 'w')
                mkfile(file, nameflag, filename)
            f_w.write(line) #you must write every line back to original file

if __name__=='__main__':
    inputfile = 'wider_face_val_bbx_gt.txt'
    outputpath =  './output/'
    face_generate_voc(inputfile, outputpath)

3.图像转视频

多张图像（我用的是从视频中分离出来的每一帧图像），在VS中配置好OpenCV

#include "stdafx.h"
#include<io.h> 
#include <cv.h>
#include <cvaux.h>  
#include <highgui.h>  
#include<stdio.h>
#include<string>

int numFrames = 300; //只处理前300帧，根据视频帧数可修改 

int Image_to_video(string imagePath)
{
	int i = 0;
	IplImage* img = 0;

	printf("------------- image to video ... ----------------n");

	//初始化视频编写器，参数根据实际视频文件修改  
	CvVideoWriter *writer = 0;
	int isColor = 1;
	int fps = 30; // or 25  
	int frameW = 544; // 744 for firewire cameras  
	int frameH = 966; // 480 for firewire cameras  
	writer = cvCreateVideoWriter("out.avi", CV_FOURCC('X', 'V', 'I', 'D'), fps, cvSize(frameW, frameH), isColor);

	printf("tvideo height : %dntvideo width : %dntfps : %dn", frameH, frameW, fps);

	//创建窗口  
	cvNamedWindow("mainWin", CV_WINDOW_AUTOSIZE);
	struct _finddata_t fileinfo;
	int k;
	intptr_t handle;//这里改类型了，若是long会报错
	string tempImagePath = imagePath + "*.jpg";
	k = handle = _findfirst(tempImagePath.c_str(), &fileinfo);
	if (!handle)
	{
		cout<< "输入的路径有错误" << endl;
		return -1;
	}
	else
	{
		while (k != -1)
		{
			cout << fileinfo.name << endl;
			string str0(fileinfo.name);
			string str1 = imagePath + str0;
			cout << str0 << "\n" << str1 << endl;
			img = cvLoadImage(str1.c_str());
			if (!img)
			{
				printf("Could not load image file...n");
				exit(0);
			}
			cvShowImage("mainWin", img);
			char key = cvWaitKey(200);
			cvWriteFrame(writer, img);
			k = _findnext(handle, &fileinfo);
		}
	}
	cvReleaseVideoWriter(&writer);
	cvDestroyWindow("mainWin");
	if (_findclose(handle) == 0) cout << "文件句柄成功关闭" << endl;  //不要忘了关闭句柄，至关重要  
	else cout << "文件句柄关闭失败..." << endl;
	return 0;
}

int main()
{
	Image_to_video("J:/results_imgs/");    //图片转视频  
	system("pause");
    return 0;
}

可参考：https://www.jianshu.com/p/65aa6516baa6

4.彩色图转灰度图

公式：Gray = R*0.299 + G*0.587 + B*0.114

可参考：https://www.zhihu.com/question/21593044

5.numpy.ndarray类型保存为图像

方式一：(不能放大缩小图片)

# save image from numpy array.
import matplotlib
matplotlib.image.imsave("1.png",ndarray,dpi=1000)

方式二：(能放大缩小图片)

# save image from numpy array. 1 pass
import scipy.misc
ndarray = scipy.misc.imresize(ndarray,(300,300))  # resize image
scipy.misc.imsave("1.png",ndarray)

当然，还可以采用pillow、opencv等工具包

6.查看两个文件夹下的相同图片(.jpg)

import os 

class ImageNameList():
    def __init__(self,im_path):
        self.im_path = im_path
    def get_im_name_list(self):
        return [im_name for _,dirs,files in os.walk(self.im_path) \
                for im_name in files  if os.path.splitext(im_name)[-1]==".jpg" ]

if __name__ == '__main__':
    train_path = "../../data/train/"
    test_path = "../../data/test/"
    train_im_obj = ImageNameList(train_path)
    train_im_list = train_im_obj.get_im_name_list()
    test_im_obj = ImageNameList(test_path)
    test_im_list = test_im_obj.get_im_name_list()
    same_im_list = [test_im for train_im in train_im_list for test_im in test_im_list if test_im == train_im]
    print("The number of train images:{}".format(len(train_im_list)))
    print("The number of test images:{}".format(len(test_im_list)))
    print("The same images between train folder and test folder:{}".format(same_im_list))

7.TT100K数据集的json文件转VOC2007

TT100K数据集：腾讯+清华合作完成的traffic sign数据集

其中标注有边界框(bndbox)，多边形，圆形等。详细标注说明：

The images collected were next annotated by hand. Traffic signs in China follow international patterns, and can be classified into three categories: warnings (mostly yellow triangles with a black boundary and information), prohibitions(mostly white surrounded by a red circle and also possibly having a diagonal bar), mandatory (mostly blue circles with white information).Other signs exist that resemble traffic-signs but are in fact not; some are illustrated in Figure 3. Such signs are placed in an ‘other’ class of a particular category. During traffic-sign annotation, we recorded the bounding box, boundary vertices and class label for the sign. To determine the pixel mask for the sign, we use two modes: polygon mode and ellipse mode. In polygon mode,we mark the vertices of the polygon while in ellipse mode we mark arbitrary ‘vertices’ along the boundary of the ellipse, and we fit the shape automatically using the marked vertices. For a triangle sign we only mark three vertices; for distorted signs we may mark additional vertices for accurate segmentation. Circle signs appear as ellipses, unless occluded, so we mark 5 vertices to which we can fit a ellipse during post-processing. 遮挡The most complicated cases concern occluded signs. In this case, we mark the bounding box, the polygon boundary and ellipse boundary (if appropriate), and intersect them to find the final mask. We illustrate our annotation pipeline in Figure 4, and show a complicated annotation case in Figure 5.

既然是转成VOC2007，就只需要取出bndbox即可：

# .....ReadJson.py.....
import os

class ReadJson():
    def __init__(self,json_file_name):
        self.json_file_name = json_file_name
        self.json_file_content = []
    
    def read_json(self):
        with open(self.json_file_name,'r') as f_r:
            lines = f_r.readlines()
            self.json_file_content = [line for line in lines]
        return self.json_file_content

# ......main.py.....
import os
import ReadJson 
import json
import xml.etree.ElementTree as ET 

def mkdirs(path):
    if os.path.exists(path) == False:
        os.makedirs(path)

def voc_xml_head(im_key=None):
    annotation_el = ET.Element("annotation")

    fold_el = ET.SubElement(annotation_el,"folder")
    fold_el.text = "VOC2007"
    filename_el = ET.SubElement(annotation_el,"filename")
    filename_el.text = im_key+".jpg"

    source_el = ET.SubElement(annotation_el,"source")
    # source sub-element
    database_source_el = ET.SubElement(source_el,"database")
    database_source_el.text = "TT100K Database"

    size_el = ET.SubElement(annotation_el,"size")
    # size sub-element
    width_size_el = ET.SubElement(size_el,"width")
    width_size_el.text = "2048"
    height_size_el = ET.SubElement(size_el,"height")
    height_size_el.text = "2048"
    depth_size_el = ET.SubElement(size_el,"depth")
    depth_size_el.text = "3"
    segmented_el = ET.SubElement(annotation_el,"segmented")
    segmented_el.text = "0"
    return annotation_el

def voc_xml_addobject(name_str):
    object_el = ET.Element("object")
    # object sub-element
    name_object_el = ET.SubElement(object_el,"name")
    name_object_el.text = name_str
    pose_object_el = ET.SubElement(object_el,"pose")
    pose_object_el.text = "Unspecified"
    truncated_object_el = ET.SubElement(object_el,"truncated")
    truncated_object_el.text = "0"
    difficult_object_el = ET.SubElement(object_el,"difficult")
    difficult_object_el.text = "0"
    return object_el

def voc_xml_addobject_position(xmin,ymin,xmax,ymax):
    bndbox_object_el = ET.Element("bndbox")
    # bndbox_object_el sub-element
    xmin_bndbox_object_el = ET.SubElement(bndbox_object_el,"xmin")
    xmin_bndbox_object_el.text = xmin
    ymin_bndbox_object_el = ET.SubElement(bndbox_object_el,"ymin")
    ymin_bndbox_object_el.text = ymin
    xmax_bndbox_object_el = ET.SubElement(bndbox_object_el,"xmax")
    xmax_bndbox_object_el.text = xmax
    ymax_bndbox_object_el = ET.SubElement(bndbox_object_el,"ymax")
    ymax_bndbox_object_el.text = ymax
    return bndbox_object_el

def voc_xml_test(imgs_dict,im_key=None):
    tree = ET.ElementTree()
    annotation_el = voc_xml_head(im_key)

    img_path = imgs_dict[im_key]['path']
    save_path = "TT100KBBox_XML/"+img_path.split("/")[0]
    mkdirs(save_path)
    img_objects = imgs_dict[im_key]['objects']

    for img_object in img_objects:
        img_category = img_object['category']
        object_el = voc_xml_addobject(img_category)
        img_bbox = img_object['bbox']
        xmin = str(int(img_bbox['xmin']))
        ymin = str(int(img_bbox['ymin']))
        xmax = str(int(img_bbox['xmax']))
        ymax = str(int(img_bbox['ymax']))
        bndbox_object_el = voc_xml_addobject_position(xmin,ymin,xmax,ymax)
        object_el.append(bndbox_object_el)
        annotation_el.append(object_el)
    format_xml(annotation_el)
    ET.dump(annotation_el)
    tree._setroot(annotation_el)
    tree.write(os.path.join(save_path,im_key+'.xml'))
    #import pdb; pdb.set_trace()


def format_xml(root_el, indent_num=0):
    if root_el.text == None or root_el.text.isspace():
        root_el.text = "\n"+"\t"*(indent_num+1)
    root_el_list = list(root_el)
    for item in root_el_list:
        if root_el_list.index(item) < (len(root_el_list)-1):
            item.tail = "\n"+"\t"*(indent_num+1)
        else:
            item.tail = "\n"+"\t"*indent_num
        format_xml(item, indent_num = indent_num+1)

if __name__ == "__main__":
    json_file_name = "../../data/annotations.json"
    json_file = ReadJson.ReadJson(json_file_name)
    json_file_content = json_file.read_json()
        
    # convert json(string) to dictionary
    imgs_types_dict = json.loads(json_file_content[0])
    imgs_dict = imgs_types_dict['imgs']

    for img_key in imgs_dict.keys():
       voc_xml_test(imgs_dict,img_key)

8.裁剪图像(指定矩形框，c++)

/*src 表示原图像(待裁剪图像)，dst表示返回裁剪后的图像，
x,y表示裁剪的起始点，w,h表示裁剪的宽度与高度，
可用cv::imwrite(filename,dst)把裁剪后的图像写出来*/
cv::Mat im_crop(cv::Mat src, int x, int y, int w, int h)  
{  
    cv::Mat dst;
    cv::Mat ROI(src, cv::Rect(x,y,w,h));
    ROI.copyTo(dst);
    return dst;
}