- 有时候我们需要对图片进行如加噪、放缩等变换,当数据集比较大时,遍历整个数据集所用时间太长,为此可以写一点多线程处理的代码,下面以压缩为例子:
import os
import cv2
import numpy as np
from threading import Thread
input_folder = '/data0/wjh/coco/val2017'
output_jpg50_folder = '/data0/wjh/coco/val2017-jpg50'
output_jpg70_folder = '/data0/wjh/coco/val2017-jpg70'
num_threads = 5
def jpg_compress(filelist):
for file in filelist:
img = cv2.imread(os.path.join(input_folder, file))
new_jpg50name = os.path.join(output_jpg50_folder, file.split('.')[0]+'.jpg')
new_jpg70name = os.path.join(output_jpg70_folder, file.split('.')[0]+'.jpg')
cv2.imwrite(new_jpg50name, img, [int(cv2.IMWRITE_JPEG_QUALITY), 50])
cv2.imwrite(new_jpg70name, img, [int(cv2.IMWRITE_JPEG_QUALITY), 70])
def multi_thread_process():
if num_threads == 1:
jpg_compress(os.listdir(input_folder))
return
filelist_total = os.listdir(input_folder)
filenum = len(filelist_total)
filenum_each_thread = int(filenum/num_threads)
thread_list = []
for i in range(num_threads-1):
thread_list.append(Thread(target=jpg_compress, args=(filelist_total[i*filenum_each_thread : (i+1)*filenum_each_thread],)))
thread_list.append(Thread(target=jpg_compress, args=(filelist_total[(num_threads-1)*filenum_each_thread :],)))
for th in thread_list:
th.start()
for th in thread_list:
th.join()
multi_thread_process()
- 当图像处理的需求产生变化时,只需要修改jpg_compress函数即可