txt转5个excel

import pandas as pd

# 要读取处理的excel文件
read_file = '原始.txt'

# 定义要输出的文件名
color_out_file = 'Color_V2.xlsx'
devices_out_file = 'Devices_V2.xlsx'
fixed_out_file = 'Devices_Fixed_Item_V2.xlsx'
other_out_file = 'Devices_Other_Item_V2.xlsx'
parameter_out_file = 'Parameter_V2.xlsx'

"""
初始值
"""
program = ''
lot_number = ''
flow_id = ''
wafer_number = ''
date = ''
times = ''
"""
原始txt处理成标准行列格式
"""
data_line = []
columns = None
with open(read_file, 'r', encoding='utf-8') as fp:
    lines = fp.readlines()
    for line in lines:
        line = line.strip()
        # 对最开始的几行,截取数据
        if 'Program name' in line:
            program = line.replace('Program name', '').strip().split('.')[0]
        elif 'Lot number' in line:
            lot_number = line.replace('Lot number', '').strip()
            flow_id = lot_number.split('@')[-1]
        elif 'Wafer number' in line:
            wafer_number = line.replace('Wafer number', '').strip()
        elif 'Date' in line:
            date = line.replace('Date', '').strip()
        elif 'Time' in line:
            times = line.replace('Time', '').strip()
        # 跳过空行
        if line == '':
            continue
        # 跳过Bias行
        if 'Bias' in line:
            continue
        # 列头
        if 'No.U' in line:
            columns = line.split()
            continue
        # 只有有列头了,才开始处理
        if columns is None:
            continue
        # 处理limit行
        line = line.split()
        # print(line)
        if 'Limit' in line[0]:
            # 插入对应的空格,补全行数据,对齐列
            line.insert(1, '')
            line.insert(1, '')
            line.insert(1, '')
        data_line.append(line)

# 转为dataframe处理
df = pd.DataFrame(data_line, columns=columns)
print(df)
limit_u = df.iloc[0, 4:].values
# print(limit_u)
limit_l = df.iloc[1, 4:].values
# print(limit_l)
test_nums = columns[4:]

"""
处理parameter
"""
to_parameter_datas = []
for x in range(len(limit_u)):
    u_index = 0
    # 找出limit_u中数值和字母的切分点
    for i in range(len(limit_u[x]) - 1, -1, -1):
        if limit_u[x][i].isdigit():
            u_index = i
            break
    l_index = 0
    # 找出limit_l中数值和字母的切分点
    for i in range(len(limit_l[x]) - 1, -1, -1):
        if limit_l[x][i].isdigit():
            l_index = i
            break

    li_l = limit_l[x][:l_index + 1]
    li_u = limit_u[x][:u_index + 1]
    li_unit = limit_u[x][u_index + 1:]
    parameter_line = ['I', program, flow_id.replace('0', ''), test_nums[x], li_l, li_u, li_unit,
                      test_nums[x], program, x + 1, li_unit, 1]
    to_parameter_datas.append(parameter_line)

"""
处理color
"""
to_color_datas = []
soft_bin = 2
for x in range(len(limit_u)):
    color_line = [soft_bin, test_nums[x], 'I', program, flow_id.replace('0', ''), program, '']
    to_color_datas.append(color_line)
    soft_bin += 1

"""
处理devices
"""
lot_ids = lot_number.split('-')
lot_id = lot_ids[0] + '-' + lot_ids[1]
wafer_id = lot_id + '#' + wafer_number
to_devices_datas = []
for idx, row in df.iterrows():
    # 跳过limitU和limitL
    if idx < 2:
        continue
    x = row['X']
    y = row['Y']
    soft_bin = row['Bin']
    part_id = row['No.U']
    devices_line = [wafer_id, x, y, 0, '', soft_bin, soft_bin, lot_id, part_id, '', program, date + " " + times]
    to_devices_datas.append(devices_line)

"""
处理fixed
"""
to_fixed_datas = []
u_count = 0
bin_count = 0
for idx, row in df.iterrows():
    # 跳过limitU和limitL
    if idx < 2:
        continue
    soft_bin = row['Bin']
    part_id = row['No.U']
    u_count += 1
    if int(soft_bin) == 1:
        bin_count += 1

fixed_line = [read_file, date + " " + times, wafer_id, lot_id, lot_id, program[:-1], u_count, u_count,
              'NULL', lot_ids[2], program, flow_id.replace('0', ''), '',
              'p1', 'NULL', 'NULL', 'NULL', 'F1', '25', bin_count]
to_fixed_datas.append(fixed_line)

"""
处理other
"""
to_other_datas = []
u_count = 0
bin_count = 0
for idx, row in df.iterrows():
    # 跳过limitU和limitL
    if idx < 2:
        continue
    x = row['X']
    y = row['Y']
    for test_name in test_nums:
        value = row[test_name]
        other_line = [wafer_id, x, y, '0', test_name, value, program, lot_id, date + " " + times]
        to_other_datas.append(other_line)

"""
输出到文件中
"""
columns = ['SOFT_BIN', 'BIN_NAME', 'PROJECT_TYPE', 'PART_NO', 'PROCESS', 'TEST_PROGRAM', 'COLOR']
out_df = pd.DataFrame(to_color_datas, columns=columns)
out_df.to_excel(color_out_file, index=None)

columns = ['Wafer_id', 'Locate_X', 'Locate_Y', 'Retest', 'T_Time', 'Soft_Bin', 'Hard_Bin', 'lot_id',
           'part_id', 'site_num', 'Program', 'Ending_Time']
out_df = pd.DataFrame(to_devices_datas, columns=columns)
out_df.to_excel(devices_out_file, index=None)

columns = ['File_name', 'Ending_time', 'Wafer_id', 'lot_id', 'C_lot', 'Part_no', 'Records', 'Insert_num',
           'Update_num', 'Machine_Name', 'Program', 'Step', 'Tempreature', 'FLOW', 'PARA_RECORDS',
           'PARA_INSERT', 'PARA_UPDATE', 'VENDOR', 'RAW_WAFERID', 'PASS_DIE']
out_df = pd.DataFrame(to_fixed_datas, columns=columns)
out_df.to_excel(fixed_out_file, index=None)

columns = ['wafer_id', 'Locate_X', 'Locate_Y', 'Retest', 'Test_Name', 'Value', 'Program', 'Lot_id', 'Ending_time']
out_df = pd.DataFrame(to_other_datas, columns=columns)
out_df.to_excel(other_out_file, index=None)

columns = ['PROJECT_TYPE', 'PART_NO', 'PROCESS', 'PARAMETER', 'LIMIT_L', 'LIMIT_H', 'UNIT', 'TEST_NUM',
           'TEST_PROGRAM', 'PARAMETER_ID', 'DISPLAY_UNIT', 'IS_CHART']
out_df = pd.DataFrame(to_parameter_datas, columns=columns)
out_df.to_excel(parameter_out_file, index=None)

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值