用Python编写小工具来统计数据

本文介绍了一个用于统计XML标注文件中各类目标出现次数的Python程序。该程序读取原始和新标注集,比较两者的差异,并汇总统计结果到CSV文件中。适用于评估数据标注项目的准确性。

摘要生成于 C知道 ,由 DeepSeek-R1 满血版支持, 前往体验 >

from xml.dom.minidom import Document
import xml.etree.ElementTree as ET
import os
import cv2
import csv 

time_str = "2022_08_10"
statisticFileName = time_str + ".csv"
 
labelClasses = []
with open('labelClasses.txt', 'r') as f:
    s = f.read()
    labelClasses = s.split('\n')
numClasses = len(labelClasses)

newAllCounts = [0] * numClasses

noteClasses = []
with open('noteClasses.txt', 'r', encoding='utf-8') as f:
    s = f.read()
    noteClasses = s.split('\n')

i=0
dic = {}
for lblcls in labelClasses:
    dic[str(i)] = lblcls
    i += 1
    

oldDict = {}
newDict = {}
diffDict = {}

def getSrcStatistics(oldpath):
    files = os.listdir(oldpath)
    for i, name in enumerate(files):
        oldDict[name] = [0] *len(labelClasses)
        in_file = open(oldpath+name)
        tree =ET.parse(in_file)
        root = tree.getroot()
        for obj in root.iter('object'):
            difficult = obj.find('difficult').text
            cls = obj.find('name').text
            if cls not in labelClasses or int(difficult) == 1:
                continue
            classid = labelClasses.index(cls)
            oldDict[name][classid] += 1
            
##    print(oldDict)
            

def getDstStatistics(newpath):
    files = os.listdir(newpath)
    for i, name in enumerate(files):
        newDict[name] = [0] *len(labelClasses)
        in_file = open(newpath+name)
        tree =ET.parse(in_file)
        root = tree.getroot()
        for obj in root.iter('object'):
            difficult = obj.find('difficult').text
            cls = obj.find('name').text
            if cls not in labelClasses or int(difficult) == 1:
                continue
            classid = labelClasses.index(cls)
            newDict[name][classid] += 1
            newAllCounts[classid] += 1
        
        
def getDiffStatistics():
    for k1, v1 in oldDict.items():
        for k2,v2 in newDict.items():
            if(k1 == k2):
                diffDict[k1] = list(map(lambda x: x[0]-x[1], zip(v2, v1)))                
            else:
                continue

def getStatisticsSummary(resdir):
    Negtive = [0] *len(labelClasses)
    Positive = [0] *len(labelClasses)
    Zeros = [0] *len(labelClasses)

    sNegtive = [0] *len(labelClasses)
    sPositive = [0] *len(labelClasses)
    sZeros = [0] *len(labelClasses)
    
    for k,v in diffDict.items():
        for i in range(numClasses):
            if v[i] < 0:
                Negtive[i] += v[i];
            elif v[i] > 0:
                Positive[i] += v[i];
            else:
                Zeros[i] += v[i];
                
    fp = open(resPath + statisticFileName, 'w', newline='') # 生成文件句柄
    writer = csv.writer(fp)
    
    writer.writerow(("序号","标签","说明","总数","误标","漏标"))

    for i in range(numClasses):
        writer.writerow((i,labelClasses[i],noteClasses[i],newAllCounts[i],abs(Negtive[i]),Positive[i]))
        
  
    fp.close()

        


if __name__ == "__main__":
    srcPath = "xml/"  
    dstPath = "xml_new/"  
    resPath = "result/"
    
    getSrcStatistics(srcPath)
    getDstStatistics(dstPath)
    
    getDiffStatistics()
    getStatisticsSummary(resPath)
    
 

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值