前言
读取txt里每行,进行处理,然后保存
#!/usr/bin/python
#-*- coding: UTF-8 -*-
import os,sys
import numpy as np
import shutil
f1 = open("image.txt",encoding='utf-8')
out = open("new_image.txt","w",encoding='utf-8') #继续写入 ‘a’,重新写入 ‘w’
lines1 = f1.readlines()
for line1 in lines1:
image_list = line1.strip().split(" ")
write_str = ' '.join(image_list) + ' add_new\n'
1.训练list整理
判断list中是否有重复元素
#!usr/bin/env python
#encoding:utf-8
from collections import Counter
num_list=[[1,2,3,4,4,5,6,6,6]
# 直接使用set方法
if len(num_list)!=len(set(num_list)):
print 'have duplicates!!!'
else:
print 'no duplicates!!'
#使用collection的Counter类
cou=Counter(num_list)
first=cou.most_common(1)
if first[0][1]>1:
print 'have duplicates!!!'
else:
print 'no duplicates!!'
读取
source_list_txt = 'data_list.txt'
excel_path = 'result.xls'
label_matrix = []
source_list = open(source_list_txt, mode='r', encoding='utf-8')
for source_line in source_list:
label_list=source_line.strip().split()
labels=splits[1:] #首项为图片名
label=[int(item) for item in labeal]
label_matrix.append(labels)
print('total num',len(label_matrix))
有时需要对图片进行操作,操作完后保存
2.图片拷贝
整理梳理图片时,将某些符合要求的图片拷贝到指定文件夹中。
#!/usr/bin/python
#-*- coding: UTF-8 -*-
import os,sys
import numpy as np
import shutil
# 指定图片原始路径A
sourcePath = r'old_path'
# 指定图片存放目录B
targetPath = r'new_path'
fileNameList = r"image_list.txt"
objNameList = []
for i in open(fileNameList, 'r',encoding='utf-8'):
list_file = i.split(" ")[0]
objNameList.append(list_file.replace('\n', ''))
for objName in objNameList:
img_list=objName.split("/")
img_name=img_list[-1]
#这里在补充目录
for j in range(len(img_list)-1):
targetPath = targetPath + "/" + img_list[j]
#判断路径是否存在
if not os.path.exists(targetPath):
os.makedirs(targetPath)
if not os.path.expanduser(sourcePath +"/"+ objName):
print(sourcePath +"/"+ objName)
num_error+=1
#copy 对应数据
shutil.copy(sourcePath +"/"+ objName, targetPath + "/"+ img_name)
3.分类数据统计
目前主流的论文在采用多标签分类时都会采用sigmoid loss,其将多分类拆分成多个二分类,可以有效提高颜色等难分类别的效果。
这里说下如何统计指标
# coding=utf-8
import numpy as np
from collections import Counter
import xlwt
source_train_list_txt = 'list.txt'
src_file = open(source_train_list_txt, mode='r', encoding='utf8')
excel_path = r'result.xls'
labels_matrix = []
for src_line in src_file:
splits = src_line.strip().split()
labels = splits[1:]
labels = [int(item) for item in labels]
labels_matrix.append(labels)
print('total_num: ', len(labels_matrix))
labels_matrix = np.asarray(labels_matrix)
rb = xlwt.Workbook()
sheet = rb.add_sheet(u'result_index1.xls', cell_overwrite_ok=True) # 新建sheet
for i in range(labels_matrix.shape[1]):
c = Counter(labels_matrix[:, i])
sheet.write(i, 2, str(c[-1]))
sheet.write(i, 4, str(c[0]))
sheet.write(i, 5, str(c[1]))
sheet.write(i, 6, str(c[2]))
rb.save(excel_path)
这里直接将指标存excel,其中sheet.write(行数,列数,保存数据值)。
4.图片裁剪填充
裁剪图片,外扩,到达边缘填充黑色
import os
import os.path as osp
import numpy as np
import cv2
import glob
#读取指定目录下符合要求的所有文件
path_file = 'D:\\draco\\'
path_list = glob.glob(path_file + '\\**\\' + '*.txt', recursive=True)
for txt_path in path_list:
crop_image(txt_path, path_file)
def crop_image(txt_path, path_file):
image_list = open(txt_path, encoding='utf-8')
lines1 = image_list.readlines()
for line1 in lines1:
image_list = line1.strip().split(" ")
image_path_name = image_list[0]
#读取图片
img = cv2.imdecode(np.fromfile(image_path_name, dtype=np.uint8), 1)
# img = cv2.imread(image_path_name)
if img is None:
print("error")
# print(img.shape)
#假设目标框 左上坐标为(leftup_x,leftup_y) 右下坐标为(rightdown_x,rightdown_y)
#图片外扩一倍
img_w = (int(rightdown_x) - int(leftup_x))//2
img_h = (int(rightdown_y) - int(leftup_y))//2
roi_img = img[max(int(leftup_y) - img_h,0):min(int(rightdown_y) + img_h, img.shape[0]),
max(int(leftup_x) - img_w, 0):min(int(rightdoen_x) + img_w, img.shape[1])]
roi_img = cv2.copyMakeBorder(roi_img, max(img_h - int(leftup_y), 0), max(int(rightdown_y) + img_h - img.shape[0], 0), max(img_w - int(leftup_x), 0),max(int(rightdoen_x) + img_w - img.shape[1], 0), cv2.BORDER_CONSTANT, 0)
if not os.path.exists(path):
os.makedirs(path)
save_path = image_path_name.split('.jpg')[0]+'_1.jpg'
cv2.imencode('.jpg',roi_img)[1].tofile(save_path)