系列文章目录
前言
一、数据处理功能集合
'''
该模板用于多功能数据汇总处理:
1、用于解压压缩包,输入指定路径,即可解压多级压缩文件;
2、镜筒反射率、LAB文件汇总;
3、镜片反射率数据汇总,区分S1、S2;
4、统计Lab,L值、a值、b值的NG占比;(新增)
5、按指定字符或正则表达式分类文件;(新增)
6、增加了standered版本,可以自动判定OK和NG
7、优化了df.iloc[]左闭右开问题
8、测试了标准代入的问题
9、整理了各项功能的运行顺序
10、新增了读取多个xlsx文件,统计需要的数据后汇总到指定文件
'''
import os
import sys
import shutil
import zipfile
import pandas as pd
import xlrd
import xlwt
import csv
from xlutils.copy import copy
from openpyxl import Workbook
from openpyxl import load_workbook
from os.path import dirname
from decimal import Decimal
from openpyxl.utils.dataframe import dataframe_to_rows
import openpyxl
from openpyxl.styles import numbers
from openpyxl.styles import Alignment
import glob
import tkinter as tk
from tkinter import messagebox
from tkinter import simpledialog
import re
from shapely.geometry import Polygon, Point
from openpyxl.styles import PatternFill
from openpyxl.utils import get_column_letter
from datetime import datetime,timedelta
def del_old_zip(file_path):
os.remove(file_path)
def decompress(file_path,root):
z = zipfile.ZipFile(f"{
file_path}","r")
z.extractall(path=f"{
root}")
for names in z.namelist():
if names.endswith(file_flag):
z.close()
return 1
z.close()
return 0
def start_dir_make(root,dirname):
os.chdir(root)
os.mkdir(dirname)
return os.path.join(root,dirname)
def rem_dir_extra(root,father_dir_name):
try:
for item in os.listdir(os.path.join(root,father_dir_name)):
if not os.path.isdir(os.path.join(root,father_dir_name,item)):
continue
if item == father_dir_name and len(os.listdir(os.path.join(root,father_dir_name))) == 1:
os.chdir(root)
os.rename(father_dir_name,father_dir_name + '-old')
shutil.move(os.path.join(root,father_dir_name + '-old', item),os.path.join(root))
os.rmdir(os.path.join(root,father_dir_name + '-old'))
rem_dir_extra(root,item)
else:
rem_dir_extra(os.path.join(root,father_dir_name),item)
except Exception as e:
print("清除文件夹出错"+str(e))
def get_allfile_msg(file_dir):
for root, dirs, files in os.walk(file_dir):
return root, dirs, [file for file in files if file.endswith('.xls') or file.endswith('.xlsx') or file.endswith('.csv')]
def get_allfile_url(root, files):
allFile_url = []
for file_name in files:
file_url = root + "/" + file_name
allFile_url.append(file_url)
return allFile_url
def get_file_name(path, suffix = ['.xlsx', '.xls','.csv']):
tmp_lst = []
for root,dirs,files in os.walk(path):
for file in files:
tmp_lst.append(os.path.join(root, file))
return tmp_lst
def extract_last_part_of_path(path):
return os.path.basename(path)
def read_csv_file(file_path):
return pd.read_csv(file_path,encoding = 'latin1',sep = r"\s+\s{0}",dtype=object,quotechar="'",delimiter=',',doublequote=True,engine="python",header = 1)
def read_csv_file1(file_path):
return pd.read_csv(file_path,encoding = 'latin1',sep = r"\s+\s{0}",dtype=object,quotechar="'",delimiter=',',doublequote=True,engine="python",header = None,skiprows=lambda x:x not in [2,3,4])
def count_and_list_folders(target_dir):
try:
if not os.path.exists(target_dir):
raise FileNotFoundError(f"目录不存在:{
target_dir}")
if not os.path.isdir(target_dir):
raise NotADirectoryError(f"路径不是目录:{
target_dir}")
folders = [entry.name for entry in os.scandir(target_dir) if entry.is_dir()]
return folders
except Exception as e:
print(f"错误:{
str(e)}")
def format_excel_date(cell_value):
"""智能格式化Excel单元格中的日期"""
if pd.isna(cell_value):
return ""
if isinstance(cell_value, pd.Timestamp):
return cell_value.strftime("%Y-%m-%d")
if isinstance(cell_value, (int, float)):
try:
base_date = pd.Timestamp("1899-12-30")
dt = base_date + pd.Timedelta(days=cell_value)
if dt.year > 1900:
return dt.strftime("%Y-%m-%d")
except:
pass
if isinstance(cell_value, str):
try:
dt = pd.to_datetime(cell_value, dayfirst=False, yearfirst=False)
return dt.strftime("%Y-%m-%d")
except:
pass
return cell_value
if __name__ == '__main__':
red_fill = PatternFill(start_color='FF0000',end_color='FF0000',fill_type='solid')
sort = input("请选择数据处理模式:\n 1-解压 7-镜筒,9-镜片 lab-统计lab异常占比 c-分类汇总文件 s-oppo/vivo数据 \n")
if sort == "1":
zippath = input("请输入需解压的文件路径:\n")
parent_path = zippath
file_flag = '.zip'
flag = 1
while flag:
for root,dirs,files in os.walk(parent_path):
for name in files:
if name.endswith(file_flag):
new_ws = start_dir_make(root,name.replace(file_flag,""))
zip_path = os.path.join(root,name)
flag = decompress(zip_path,new_ws)
del_old_zip(zip_path)
rem_dir_extra(root,name.replace(file_flag,""))
print(f'{
root}\\{
name}'.join(['文件:','\n解压完成\n']))
rem_dir_extra(os.path.split(parent_path)[0],os.path.split(parent_path)[1])
print("解压完成,请检查!!")
elif sort == "7":
mode = input("请选择汇总模式: 1-待补充 2-汇总单机种 \n")
if mode == "1":
print("待补充,请期待")
elif mode == "2":
Summarypath = input("请输入汇总路径:汇总路径格式为:701/77b113e5-b3ad-4be2-8bcf-1544d5d901da/1.csv \n")
position = input("请输入位置类型:上中下左右-5,上中下-3 \n")
if position == "5":
file_dir = Summarypath
folder_count = sum(1 for _ in os.scandir(file_dir) if _.is_dir())
count = 0
item = 701
with os.scandir(file_dir) as it:
wb = Workbook()
ws1 = wb.create_sheet('F')
ws2 = wb.create_sheet('L')
ws1.cell(row = 1,column = 1).value = '文件名'
ws1.cell(row = 2,column = 1).value = 'wave'
ws2.cell(row = 2,column = 1).value = 'L*'
ws2.cell(row = 3,column = 1).value = 'a*'
ws2.cell(row = 4,column = 1).value = 'b*'
alignment = Alignment(horizontal='center',vertical='center')
titles = ['data1','data2','data3','data4']
for l in range(380,1051):
ws1.cell(l-377,1).value = l
ws1.cell(l-377,1).alignment = alignment
continue
for entry in it:
count += 1
c = count
m = c - 1
if entry.is_dir():
print(entry.name)
file_dir1 = file_dir+'\\'+entry.name+"\\"
print(file_dir1)
for root,dirs,files in os.walk(file_dir1):
files_F = ['1.csv', '2.csv', '3.csv', '4.csv', '5.csv']
files_L = ['1Add.csv', '2Add.csv', '3Add.csv', '4Add.csv', '5Add.csv']
z = 0
for f in files_F:
z += 1
y = z - 1
df = read_csv_file(file_dir1+f)
df = df.iloc[:,1:5]
for k,title in enumerate(titles,2):
ws1.cell(row = 2,column = k+4*y+4*m).value = title
ws1.cell(row = 2,column = k+4*y+4*m).alignment = alignment
continue
for i ,row in df.iterrows():
for j ,value in enumerate(row,start=1):
ws1.cell(row = i+3,column = j+1+4*y+4*m).value = value
z = 0
for l in files_L:
z += 1
y = z - 1
df = read_csv_file1(file_dir1+l)
df = df.iloc[0:3,1:5]
for k,title in enumerate(titles,2):
ws2.cell(row = 1,column = k+4*y+4*m).value = title
ws2.cell(row = 1,column = k+4*y+4*m).alignment = alignment
continue
for i ,row in df.iterrows():
for j ,value in enumerate(row,start=1):
ws2.cell(row = i+2,column = j+1+4*y+4*m).value = value
output_file_path=os.path.join(file_dir,f'Summaryout{
item}.xlsx')
wb.remove(wb['Sheet'])
wb.save(output_file_path)
elif position == "3":
file_dir = Summarypath
folder_count = sum(1 for _ in os.scandir(file_dir) if _.is_dir())
count = 0
item = 701
with os.scandir(file_dir) as it:
wb = Workbook()
ws1 = wb.create_sheet('F')
ws2 = wb.create_sheet('L')
ws1.cell(row = 1,column = 1).value = '文件名'
ws1.cell(row = 2,column = 1).value = 'wave'
ws2.cell(row = 2,column = 1).value = 'L*'
ws2.cell(row = 3,column = 1).value = 'a*'
ws2.cell(row = 4,column = 1).value = 'b*'
alignment = Alignment(horizontal='center',vertical='center')
titles = ['data1','data2','data3','data4']
for l in range(380,1051):
ws1.cell(l-377,1).value = l
ws1.cell(l-377,1).alignment = alignment
continue
for entry in it:
count += 1
c = count
m = c - 1
if entry.is_dir():
print(entry.name)
file_dir1 = file_dir+'\\'+entry.name+"\\"
print(file_dir1)
for root,dirs,files in os.walk(file_dir1):
files_F = ['1.csv', '2.csv', '3.csv']
files_L = ['1Add.csv', '2Add.csv', '3Add.csv']
z = 0
for f in files_F:
z += 1
y = z - 1
df = read_csv_file(file_dir1+f)
df = df.iloc[:,1:5]
for k,title in enumerate(titles,2):
ws1.cell(row = 2,column = k+4*y+4*m).value = title
ws1.cell(row = 2,column = k+4*y+4*m).alignment = alignment
continue
for i ,row in df.iterrows():
for j ,value in enumerate(row,start=1):
ws1.cell(row = i+3,column = j+1+4*y+4*m).value = value
z = 0
for l in files_L:
z += 1
y = z - 1
df = read_csv_file1(file_dir1+l)
df = df.iloc[0:3,1:5]
for k,title in enumerate(titles,2):
ws2.cell(row = 1,column = k+4*y+4*m).value = title
ws2.cell(row = 1,column = k+4*y+4*m).alignment = alignment
continue
for i ,row in df.iterrows():
for j ,value in enumerate(row,start=1):
ws2.cell(row = i+2,column = j+1+4*y+4*m).value = value
output_file_path=os.path.join(file_dir,f'Summaryout{
item}.xlsx')
wb.remove(wb['Sheet'])
wb.save(output_file_path)
else:
print("指令错误,请重新运行。")
else:
print("指令错误,请重新运行。")
elif sort == "9":
mode = input("请选择汇总模式:\n1-汇总单镜片(无标准版, 某一个镜片901文件夹下有若干单罩次文件夹,其中文件为一个csv,一个Add文件)\n2-汇总单机种多件号(无标准版,文件夹下有多个同一个机种的多个件号,可多个月)\n3-汇总多机种或单机种多件号(分类选择standered版,先利用分类模块将多机种分类,然后汇总)\n4-汇总单机种单件号(standered版本,文件夹下有若干个csv,Addcsv,可选择标准)\n")
if mode == "1":
Summarypath = input("请输入汇总路径:路径格式:901/77b113e5-b3ad-4be2-8bcf-1544d5d901da \n")
item = input("请输入汇总件号:(例如:39642A) \n")
user_input = input("请输入汇总model,S1或S2或S1S2或S1&S2 \n")
wb = Workbook()
ws = wb.create_sheet("S1S2")
ws1 = wb.create_sheet("S1")
ws2 = wb.create_sheet("S2")
alignment = Alignment(horizontal='center',vertical='center')
titlesS1 = ['data1','data2','data3']
titlesS2 = ['data4','data5','data6']
titlesS1S2 = ['data1','data2','data3','data4','data5','data6']
ws.cell(row = 5,column = 1).value = '文件名'
ws.cell(row = 5,column = 1).alignment = alignment
ws.cell(row = 6,column = 1).value = 'wave'
ws.cell(row = 6,column = 1).alignment = alignment
ws1.cell(row = 5,column = 1).value = '文件名'
ws1.cell(row = 5,column = 1).alignment = alignment
ws1.cell(row = 6,column = 1).value = 'wave'
ws1.cell(row = 6,column = 1).alignment = alignment
ws2.cell(row = 5,column = 1).value = '文件名'
ws2.cell(row = 5,column = 1).alignment = alignment
ws2.cell(row = 6,column = 1).value = 'wave'
ws2.cell(row = 6,column = 1).alignment = alignment
for l in range(380,1051):
ws.cell(l-373,1).value = l
ws.cell(l-373,1).alignment = alignment
ws1.cell(l-373,1).value = l
ws1.cell(l-373,1).alignment = alignment
ws2.cell(l-373,1).value = l
ws2.cell(l-373,1).alignment = alignment
continue
file_dir = Summarypath
current_path = os.path.dirname(os.path.abspath(__file__))
root, dirs, files = get_allfile_msg(file_dir)
allFile_url = get_allfile_url(root, files)
dir_numbers = len(dirs)
count = 0
for root,dirs,files in os.walk(file_dir):
for file_path in glob.glob(os.path.join(root,'*.csv')):
if item in file_path and 'Add' not in file_path:
print(file_path)
xl = file_path
count += 1
c = count
m = c - 1
print(f"共发现 {
m} 个文件!")
try:
last_part = extract_last_part_of_path(xl)
filename = xl
csv_data = read_csv_file(filename)
df = csv_data
if user_input == "S1":
df = df.iloc[:,1:4]
df = df.astype(float)
ws1.cell(row = 5,column = 2+3*m).value = last_part
for k,title in enumerate(titlesS1,2):
ws1.cell(row = 6,column = k+3*m).value = title
ws1.cell(row = 6,column = k+3*m).alignment = alignment
continue
for i ,row in df.iterrows():
for j ,value in enumerate(row,start=1):
ws1.cell(row = i+7,column = j+1+3*m).value = value
elif user_input == "S2":
df = df.iloc[:,4:7]
df = df.astype(float)
ws2.cell(row = 5,column = 2+3*m).value = last_part
for k,title in enumerate(titlesS2,2):
ws2.cell(row = 6,column = k+3*m).value = title
ws2.cell(row = 6,column = k+3*m).alignment = alignment
continue
for i ,row in df.iterrows():
for j ,value in enumerate(row,start=1):
ws2.cell(row = i+7,column = j+1+3*m).value = value
elif user_input == "S1S2":
df = df.iloc[:,1:7]
df = df.astype(float)
ws.cell(row = 5,column = 2+6*m).value = last_part
for k,title in enumerate(titlesS1S2,2):
ws.cell(row = 6,column = k+6*m).value = title
ws.cell(row = 6,column = k+6*m).alignment = alignment
continue
for i ,row in df.iterrows():
for j ,value in enumerate(row,start=1):
ws.cell(row = i+7,column = j+1+6*m).value = value
elif user_input == "S1&S2":
df1 = df.iloc[:,1:4]
df2 = df.iloc[:,4:7]
df1 = df1.astype(float)
df2 = df2.astype(float)
ws1.cell(row = 5,column = 2+3*m).value = last_part
ws2.cell(row = 5,column = 2+3*m).value = last_part
for k,title in enumerate(titlesS1,2):
ws1.cell(row = 6,column = k+3*m).value = title
ws1.cell(row = 6,column = k+3*m).alignment = alignment
continue
for k,title in enumerate(titlesS2,2):
ws2.cell(row = 6,column = k+3*m).value = title
ws2.cell(row = 6,column = k+3*m).alignment = alignment
continue
for i ,row in df1.iterrows():
for j ,value in enumerate(row,start=1):
ws1.cell(row = i+7,column = j+1+3*m).value = value
for i ,row in df2.iterrows():
for j ,value in enumerate(row,start=1):
ws2.cell(row = i+7,column = j+1+3*m).value = value
else:
print("非指定指令")
except Exception as e:
print(e)
if user_input == "S1":
output_file_path=os.path.join(current_path,f'Summaryout{
item}{
user_input}.xlsx')
wb.remove(wb['S2'])
wb.remove(wb['S1S2'])
wb.remove(wb['Sheet'])
wb.save(output_file_path)
elif user_input == "S2":
output_file_path=os.path.join(current_path,f'Summaryout{
item}{
user_input}.xlsx')
wb.remove(wb['S1'])
wb.remove(wb['S1S2'])
wb.remove(wb['Sheet'])
wb.save(output_file_path)
elif user_input == "S1S2":
output_file_path=os.path.join(current_path,f'Summaryout{
item}{
user_input}.xlsx')
wb.remove(wb['S1'])
wb.remove(wb['S2'])
wb.remove(wb['Sheet'])
wb.save(output_file_path)
elif user_input == "S1&S2":
output_file_path=os.path.join(current_path,f'Summaryout{
item}{
user_input}.xlsx')
wb.remove(wb['S1S2'])
wb.remove(wb['Sheet'])
wb.save(output_file_path)
else :
print("请检查程序!")
elif mode == "2":
parent_path = input("请输入汇总路径:路径格式:39827A/901/000b1664-57f5-4de9-adf2-c029898a669c \n")
file_dir = parent_path
item_1 = input("请输入汇总机种:(例如:39642A)\n")
item_p = input("请输入汇总P数: (例如:5)\n")
item_2 = int('90'+str(item_p))
print("请输入汇总model,S1或S2或S1S2或S1&S2")
folder_count = sum(1 for _ in os.scandir(file_dir) if _.is_dir())
print(folder_count)
user_input = input("请输入汇总model,S1或S2或S1S2或S1&S2 \n")
with os.scandir(file_dir) as it:
for entry in it:
if entry.is_dir():
print(entry.name)
file_dir1 = file_dir+'\\'+entry.name
print(file_dir1)
file_dir2 = file_dir1.replace("\\","")
count = 0
wb = Workbook()
ws = wb.create_sheet("S1S2")
ws1 = wb.create_sheet("S1")
ws2 = wb.create_sheet("S2")
alignment = Alignment(horizontal='center',vertical='center')
titlesS1 = ['data1','data2','data3']
titlesS2 = ['data4','data5','data6']
titlesS1S2 = ['data1','data2','data3','data4','data5','data6']
ws.cell(row = 5,column = 1).value = '文件名'
ws.cell(row = 5,column = 1).alignment = alignment
ws.cell(row = 6,column = 1).value = 'wave'
ws.cell(row = 6,column = 1).alignment = alignment
ws1.cell(row = 5,column = 1).value = '文件名'
ws1.cell(row = 5,column = 1).alignment = alignment
ws1.cell(row