python 目标分类检测检索任务中，整理list常用函数小结_python label = list(data['案件类别'].???)-优快云博客

本文链接：https://blog.youkuaiyun.com/chanbo8205/article/details/109849179

前言

读取txt里每行，进行处理，然后保存

#!/usr/bin/python
#-*- coding: UTF-8 -*-
import os,sys
import numpy as np
import shutil


f1 = open("image.txt",encoding='utf-8')
out = open("new_image.txt","w",encoding='utf-8') #继续写入 ‘a’，重新写入 ‘w’

lines1 = f1.readlines()
for line1 in lines1:
    image_list = line1.strip().split(" ")
    write_str = ' '.join(image_list) + ' add_new\n'

1.训练list整理

判断list中是否有重复元素

#!usr/bin/env python
#encoding:utf-8
 
from collections import Counter

num_list=[[1,2,3,4,4,5,6,6,6]

# 直接使用set方法
if len(num_list)!=len(set(num_list)):
    print 'have duplicates!!!'
else:
    print 'no duplicates!!'
 
#使用collection的Counter类

cou=Counter(num_list)
first=cou.most_common(1)
if first[0][1]>1:
    print 'have duplicates!!!'
else:
    print 'no duplicates!!'

读取

source_list_txt = 'data_list.txt'
excel_path = 'result.xls'
label_matrix = []

source_list = open(source_list_txt, mode='r', encoding='utf-8')

for source_line in source_list:
    label_list=source_line.strip().split()
    labels=splits[1:] #首项为图片名
    label=[int(item) for item in labeal]
    label_matrix.append(labels)

print('total num',len(label_matrix))

有时需要对图片进行操作，操作完后保存

2.图片拷贝

整理梳理图片时，将某些符合要求的图片拷贝到指定文件夹中。

#!/usr/bin/python
#-*- coding: UTF-8 -*-
import os,sys
import numpy as np
import shutil

# 指定图片原始路径A
sourcePath = r'old_path'

# 指定图片存放目录B
targetPath = r'new_path'

fileNameList = r"image_list.txt"
objNameList = []
for i in open(fileNameList, 'r',encoding='utf-8'):
    list_file = i.split(" ")[0]
    objNameList.append(list_file.replace('\n', ''))

for objName in objNameList:
    img_list=objName.split("/")
    img_name=img_list[-1]

    #这里在补充目录
    for j in range(len(img_list)-1):
        targetPath = targetPath + "/" + img_list[j]
    
    #判断路径是否存在
    if not os.path.exists(targetPath):
        os.makedirs(targetPath)
    if not os.path.expanduser(sourcePath +"/"+ objName):
        print(sourcePath +"/"+ objName)
        num_error+=1

    #copy 对应数据
    shutil.copy(sourcePath +"/"+ objName, targetPath + "/"+ img_name)

3.分类数据统计

目前主流的论文在采用多标签分类时都会采用sigmoid loss，其将多分类拆分成多个二分类，可以有效提高颜色等难分类别的效果。

这里说下如何统计指标

# coding=utf-8
import numpy as np
from collections import Counter
import xlwt

source_train_list_txt = 'list.txt'
src_file = open(source_train_list_txt, mode='r', encoding='utf8')

excel_path = r'result.xls'

labels_matrix = []
for src_line in src_file:
    splits = src_line.strip().split()
    labels = splits[1:]
    labels = [int(item) for item in labels]
    labels_matrix.append(labels)

print('total_num: ', len(labels_matrix))
labels_matrix = np.asarray(labels_matrix)

rb = xlwt.Workbook()  
sheet = rb.add_sheet(u'result_index1.xls', cell_overwrite_ok=True)  # 新建sheet

for i in range(labels_matrix.shape[1]):
    c = Counter(labels_matrix[:, i])
    sheet.write(i, 2, str(c[-1]))
    sheet.write(i, 4, str(c[0]))
    sheet.write(i, 5, str(c[1]))
    sheet.write(i, 6, str(c[2]))

rb.save(excel_path)

这里直接将指标存excel，其中sheet.write(行数，列数，保存数据值)。

4.图片裁剪填充

裁剪图片，外扩，到达边缘填充黑色

import os
import os.path as osp
import numpy as np
import cv2
import glob


#读取指定目录下符合要求的所有文件
path_file = 'D:\\draco\\'
path_list = glob.glob(path_file + '\\**\\' + '*.txt', recursive=True)

for txt_path in path_list:
    crop_image(txt_path, path_file)


def crop_image(txt_path, path_file):
    image_list = open(txt_path, encoding='utf-8')
    lines1 = image_list.readlines()
    for line1 in lines1:
        image_list = line1.strip().split(" ")
        image_path_name = image_list[0]

        #读取图片
        img = cv2.imdecode(np.fromfile(image_path_name, dtype=np.uint8), 1)
        # img = cv2.imread(image_path_name)
        if img is None:
            print("error")
        # print(img.shape)

        #假设目标框 左上坐标为（leftup_x，leftup_y）    右下坐标为（rightdown_x，rightdown_y） 
        
        #图片外扩一倍
        img_w = (int(rightdown_x) - int(leftup_x))//2
        img_h = (int(rightdown_y) - int(leftup_y))//2
        
        roi_img = img[max(int(leftup_y) - img_h,0):min(int(rightdown_y) + img_h, img.shape[0]),
                  max(int(leftup_x) - img_w, 0):min(int(rightdoen_x) + img_w, img.shape[1])]

        roi_img = cv2.copyMakeBorder(roi_img, max(img_h - int(leftup_y), 0), max(int(rightdown_y) + img_h - img.shape[0], 0), max(img_w - int(leftup_x), 0),max(int(rightdoen_x) + img_w - img.shape[1], 0),        cv2.BORDER_CONSTANT, 0)

        if not os.path.exists(path):
            os.makedirs(path)

        save_path = image_path_name.split('.jpg')[0]+'_1.jpg'
        cv2.imencode('.jpg',roi_img)[1].tofile(save_path)