# coding=utf-8
import csv
import ntpath
import pandas as pd
import pyhdb
def parse_excel(df, file, program, ending_time):
file_group = file.replace('__', '_').split('_')[1]
if file_group not in prefix_dict_1:
prefix_dict_1[file_group] = []
prefix_dict_2[file_group] = []
lots = file.replace('__', '_').split('_')[0]
op = file.replace('__', '_').split('_')[2]
part_no = file.replace('__', '_').split('_')[3]
if len(op) < 2:
return
if 'T1' == op or 'P1' == op:
op = 'P1'
retest = 0
elif 'R1' == op:
retest = 1
elif 'R2' == op:
retest = 2
elif 'R3' == op:
retest = 3
else:
print('\t', '非T1R123')
return
if 'FA' in file.replace('__', '_').split('_')[4]:
temperature = 25
elif 'FH' in file.replace('__', '_').split('_')[4]:
temperature = 150
elif 'FM' in file.replace('__', '_').split('_')[4]:
temperature = 80
else:
temperature = 0
res_data = []
# 对规则和不规则csv文件判断
df_group = df.groupby('PART_ID')
for key in df_group.groups:
pro_data = df_group.get_group(key)
now_row = None
for idx, row in pro_data.iterrows():
if key is None or len(key.strip()) == 0:
continue
if now_row is None:
now_row = row
else:
now_row = row
if now_row is None:
continue
for x in range(len(title)):
# print(title)
nt = title[x]
if len(nt.strip()) <= 0:
continue
line = [file, ending_time, key, lots, part_no, program, file_group, temperature, retest, nt,
now_row[nt], now_row['T_TIME'], now_row['SOFT_BIN'], op]
# print(line)
res_data.append(line)
sel_line(res_data)
for idx, rows in df.iterrows():
if idx < 1:
global parameter_id, is_chart
if parameter_id <= 50:
is_chart = 1
else:
is_chart += 1
for x in range(len(title)):
nt = title[x]
if len(nt.strip()) <= 0:
continue
module_line = ['AUTOMOTIVE', part_no, file_group, nt, limit_l[x], limit_u[x],
unit[x], parameter_id, program, parameter_id, unit[x], 1]
parameter_id += 1
is_chart += 1
# print(module_line)
# PARAMETER
module_excel.append(module_line)
# 数据格式化
def sel_line(res_data):
res_header = ['file', 'ending_time', 'part_id', 'lot_id', 'part_no', 'program', 'file_group', 'temperature',
'retest', 'nt', 'value', 't_time', 'soft_bin', 'lots']
df = pd.DataFrame(res_data, columns=res_header)
df_group = df.groupby('part_id')
for key in df_group.groups:
# print(key)
pro_data = df_group.get_group(key)
for idx, row in pro_data.iterrows():
out_line = row
# Fixed
line_3 = [out_line['file'], out_line['ending_time'], out_line['part_id'], out_line['lot_id'],
out_line['lot_id'], out_line['part_no'], 0, 0, '', '', out_line['program'],
out_line['file_group'],
out_line['temperature'], out_line['lots'], 0, 0, '', 'TF', 0, 0]
# Other
line_1 = [out_line['part_id'], 0, 0, out_line['retest'], out_line['nt'], out_line['value'],
out_line['program'], out_line['lot_id'], out_line['ending_time']]
# Devices
line_2 = [out_line['part_id'], 0, 0, out_line['retest'], out_line['t_time'], out_line['soft_bin'],
out_line['soft_bin'], out_line['lot_id'], '', 1, out_line['program'], out_line['ending_time']]
if not pd.isna(out_line['value']) and len(str(out_line['value'])) > 0:
prefix_dict_1[out_line['file_group']].append(line_1)
if line_2 not in prefix_dict_2[out_line['file_group']]:
prefix_dict_2[out_line['file_group']].append(line_2)
if line_3 not in all_ft:
all_ft.append(line_3)
# Fixed
header = ['FLIE_NAME', 'ENDING_TIME', 'WAFER_ID', 'LOT_ID', 'C_LOT', 'PART_NO', 'RECORDS', 'INSERT_NUM', 'UPDATE_NUM',
'MACHINE_NAME', 'PROGRAM', 'STEP', 'TEMPERATURE', 'FLOW', 'PARA_RECORDS', 'PARA_INSERT', 'PARA_UPDATE',
'VENDOR', 'RAW_WAFERID', 'RASS_DIE']
# Other
prefix_header_1 = ['WAFER_ID', 'LOCATE_X', 'LOCATE_Y', 'RETEST', 'TEST_NAME', 'VALUE', 'PROGRAM', 'LOT_ID',
'ENDING_TIME']
# Devices
prefix_header_2 = ['WAFER_ID', 'LOCATE_X', 'LOCATE_Y', 'RETEST', 'T_TIME', 'SOFT_BIN', 'HARD_BIN', 'LOT_ID', 'PART_ID',
'SITE_NUM', 'PROGRAM', 'ENDING_TIME']
# Color
new_header = ['SOFT_BIN', 'BIN_NAME', 'PROJECT_TYPE', 'PART_NO', 'PROCESS', 'TEST_PROGRAM', 'COLOR', 'CREATE_DATE']
# Parameter
module_header = ['PROJECT_TYPE', 'PART_NO', 'PROCESS', 'PARAMETER', 'LIMIT_L', 'LIMIT_H', 'UNIT', 'TEST_NUM',
'TEST_PROGRAM', 'PARAMETER_ID', 'DISPLAY_UNIT', 'IS_CHART']
new_excel = []
module_excel = []
# 输出文件夹
out_file_dir = 'Output'
# 读取excel目录
file_dir = r'ASAASDAASDC_FT2_P1_ASDASFS_FM111_A3_SR01_2022-02-02_01_11_11_.csv'
file = file_dir.split('\\')[-1]
prefix_dict_1 = {}
prefix_dict_2 = {}
all_ft = []
num = 0
xlsx_file = file_dir
read_data = []
head = None
new_head = None
program = file.replace('__', '_').split('_')[5] + '_' + file.replace('__', '_').split('_')[6]
file_group = file.replace('__', '_').split('_')[1]
ending_time = file.replace('__', '_').split('_')[7] + ' ' + file.replace('__', '_').split('_')[8] + ':' + \
file.replace('__', '_').split('_')[9] + ':' + file.replace('__', '_').split('_')[10].split('.')[0]
# print(file_group)
parameter_id = 1
with open(xlsx_file, encoding='gbk') as f:
row = csv.reader(f, delimiter=',')
for idx, r in enumerate(row):
if len(r) <= 0:
continue
if head is not None or new_head is not None:
read_data.append(r)
first = r[0]
# 初始化数据开始行
if 'SITE_NUM' in first:
head = r
elif 'SBin' in first:
l1 = first.split(']')[0].replace('SBin[', '')
if 'Fail__Default' in first:
l2 = 'Fail__Default'
elif 'Pass__Default' in first:
l2 = 'Pass__Default'
else:
l2 = first.split()[1].replace('__AllFail', '')
part_no = file.replace('__', '_').split('_')[0]
# COLOR
new_excel.append([l1, l2, 'AUTOMOTIVE', part_no, file_group, program, 'test color', ''])
# print('\t', '当前开始行:', head)
map = {}
df = pd.DataFrame(read_data, columns=head, dtype=object)
df['SOFT_BIN'].replace('3', '1', inplace=True)
title = list(df.columns.values[6:])
unit = df.iloc[0, 6:].values
# print(unit)
limit_l = df.iloc[1, 6:].values
# print(limit_l)
limit_u = df.iloc[2, 6:].values
# print(limit_u)
parse_excel(df, file, program, ending_time)
"""--------------------------------------------------Upsert To HANA--------------------------------------------------"""
"""--------------------------------------------------Upsert To HANA--------------------------------------------------"""
"""--------------------------------------------------Upsert To HANA--------------------------------------------------"""
def get_connection():
conn = pyhdb.connect(
host="",
port=34215, # 多租户的端口需要准确的如30053,
user="",
password=""
)
return conn
"""---------------------------------------------------Fixed To HANA--------------------------------------------------"""
df_fixed = pd.DataFrame(all_ft, columns=header)
# print(df_fixed['WAFER_ID'])
# ----------------------------------------------------------------------------------------------------------------------
def select_wafer(file_wafer_ids):
cur = conn.cursor()
sql = "SELECT WAFERID,DEVICES_TIME FROM AUTOMOTIVE_FT_DEVICES_FIXED_ITEM WHERE WAFERID IN ({})" \
.format(','.join(["?" for _ in file_wafer_ids]))
cur.execute(sql, file_wafer_ids)
results = cur.fetchall()
return results
conn = get_connection()
cursor = conn.cursor()
# 取文件中的wafer_id 去查数据库
file_wafer_ids = list(df_fixed['WAFER_ID'])
results = select_wafer(file_wafer_ids)
if len(results) > 0:
# 库中有数据才处理, 转为id映射
db_id_time_map = {}
for res in results:
db_id_time_map[res[0]] = res[1]
file_del = []
for idx, rows in df_fixed.iterrows():
file_wafer_id = df_fixed['WAFER_ID']
if file_wafer_id in db_id_time_map:
db_time = db_id_time_map[file_wafer_id]
if db_time < file_wafer_id: # 如果数据库中的时间小于文件中的,删库
cur = conn.cursor()
sql = "DELETE FROM AUTOMOTIVE_FT_DEVICES_FIXED_ITEM WHERE WAFERID = ?"
cur.execute(sql, (file_wafer_id,))
else: # 文件 < 数据库, 删文件里的
file_del.append(idx)
df_fixed = df.drop(file_del)
df_fixed.to_csv(ntpath.join(out_file_dir, 'Automotive_FT_Devices_Fixed_Item.csv'),
header=header, encoding='gbk', index=False)
# -----------------------------------------------------------------------------------------------------------------------
def every_strand(list_temp, n):
for i in range(0, len(list_temp), n):
yield list_temp[i:i + n]
def write_fixed_item(fixed_item):
mark = '?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?' # sql values
cols_str = 'FILE_NAME,' \
'DEVICES_TIME,' \
'C_LOT,' \
'SUB_LOT,' \
'PART_NO,' \
'WAFERID,' \
'RECORDS,' \
'INSERT_NUM,' \
'UPDATE_NUM,' \
'MACHINE_NAME,' \
'PARA_RECORDS,' \
'PARA_INSERT,' \
'STEP,' \
'TEMPERATURE,' \
'TESTER_ID,' \
'PARA_UPDATE,' \
'FLOW,' \
'RAW_WAFERID,' \
'PASS_DIE,' \
'VENDOR'
sql = '''UPSERT "{}" ({}) values ({}) WITH PRIMARY KEY'''.format(fixed_table_name, cols_str, mark)
cursor.executemany(sql, fixed_item)
conn.commit()
print("==== FIXED Inserted Successfully ====")
fixed_table_name = 'AUTOMOTIVE_FT_DEVICES_FIXED_ITEM'
num = 1000 # 每次插入的条数
df_fixed['ENDING_TIME'] = pd.to_datetime(df_fixed['ENDING_TIME'], format='%Y-%m-%d')
list_data_fixed = []
for idx, row in df_fixed.iterrows():
val = [row['FLIE_NAME'],
row['ENDING_TIME'],
row['C_LOT'],
row['LOT_ID'],
row['PART_NO'],
row['WAFER_ID'],
row['RECORDS'],
row['INSERT_NUM'],
row['UPDATE_NUM'],
row['MACHINE_NAME'],
row['PARA_RECORDS'],
row['PARA_INSERT'],
row['STEP'],
row['TEMPERATURE'],
row['PROGRAM'],
row['PARA_UPDATE'],
row['FLOW'],
row['RAW_WAFERID'],
row['RASS_DIE'],
row['VENDOR']]
for x in range(len(val)):
if pd.isna(val[x]):
val[x] = None
if val[x] == '':
val[x] = None
list_data_fixed.append(val)
fixed_result = [list_data_fixed[i * num:(i + 1) * num] for i in range(int(len(list_data_fixed) / num) + 1) if
list_data_fixed[i * num:(i + 1) * num]]
for fixed_item in fixed_result:
write_fixed_item(fixed_item)
"""--------------------------------------------------Color To HANA---------------------------------------------------"""
df_color = pd.DataFrame(new_excel, columns=new_header)
df_color.to_csv(ntpath.join(out_file_dir, 'Global_Map_Color_V2.csv'),
header=new_header, encoding='gbk', index=False)
def write_color_item(color_item):
mark = '?,?,?,?,?,?,?' # sql values
cols_str = 'PROJECT_TYPE,' \
'PART_NO,' \
'PROCESS,' \
'COLOR,' \
'SOFT_BIN,' \
'BIN_NAME,' \
'TEST_PROGRAM'
sql = '''UPSERT "{}" ({}) values ({}) WITH PRIMARY KEY'''.format(color_table_name, cols_str, mark)
cursor.executemany(sql, color_item)
conn.commit()
print("==== COLOR Inserted successfully ====")
color_table_name = 'GLOBAL_MAP_COLOR_V2'
num = 1000 # 每次插入的条数
list_data_color = []
for idx, row in df_color.iterrows():
val = [row['PROJECT_TYPE'],
row['PART_NO'],
row['PROCESS'],
row['COLOR'],
row['SOFT_BIN'],
row['BIN_NAME'],
row['TEST_PROGRAM']]
for x in range(len(val)):
if pd.isna(val[x]):
val[x] = None
if val[x] == '':
val[x] = None
list_data_color.append(val)
color_result = [list_data_color[i * num:(i + 1) * num] for i in range(int(len(list_data_color) / num) + 1) if
list_data_color[i * num:(i + 1) * num]]
for color_item in color_result:
write_color_item(color_item)
"""------------------------------------------------Parameter To HANA-------------------------------------------------"""
df_parameter = pd.DataFrame(module_excel, columns=module_header)
df_parameter.to_csv(ntpath.join(out_file_dir, 'Global_Test_Parameter_V2.csv'),
header=module_header, encoding='gbk', index=False)
def write_parameter_item(parameter_item):
mark = '?,?,?,?,?,?,?,?,?,?,?,?' # sql values
cols_str = 'PROJECT_TYPE,' \
'PART_NO,' \
'PROCESS,' \
'PARAMETER,' \
'LIMIT_L,' \
'LIMIT_H,' \
'UNIT,' \
'TEST_NUM,' \
'TEST_PROGRAM,' \
'PARAMETER_ID,' \
'DISPLAY_UNIT,' \
'IS_CHART'
sql = '''UPSERT "{}" ({}) values ({}) WITH PRIMARY KEY'''.format(parameter_table_name, cols_str, mark)
# print(item)
cursor.executemany(sql, parameter_item)
conn.commit()
print("==== PARAMETER Inserted successfully ====")
parameter_table_name = 'GLOBAL_TEST_PARAMETER_V2'
num = 1000 # 每次插入的条数
list_data_parameter = []
for idx, row in df_parameter.iterrows():
val = [row['PROJECT_TYPE'],
row['PART_NO'],
row['PROCESS'],
row['PARAMETER'],
row['LIMIT_L'],
row['LIMIT_H'],
row['UNIT'],
row['TEST_NUM'],
row['TEST_PROGRAM'],
row['PARAMETER_ID'],
row['DISPLAY_UNIT'],
row['IS_CHART']]
for x in range(len(val)):
if pd.isna(val[x]):
val[x] = None
if val[x] == '':
val[x] = None
list_data_parameter.append(val)
parameter_result = [list_data_parameter[i * num:(i + 1) * num] for i in range(int(len(list_data_parameter) / num) + 1)
if list_data_parameter[i * num:(i + 1) * num]]
for parameter_item in parameter_result:
write_parameter_item(parameter_item)
"""--------------------------------------------------Other To HANA---------------------------------------------------"""
for key in prefix_dict_1:
df_other = pd.DataFrame(prefix_dict_1[key], columns=prefix_header_1)
df_other.to_csv(ntpath.join(out_file_dir, 'Automotive_' + key + '_Devices_Other_Items.csv'),
header=prefix_header_1, encoding='gbk', index=False)
other_table_name = 'AUTOMOTIVE_' + key + '_DEVICES_OTHER_ITEMS'
def write_other_item(other_item):
mark = '?,?,?,?,?,?,?,?,?' # sql values
cols_str = 'WAFERID,' \
'LOCATE_X,' \
'LOCATE_Y,' \
'RETEST,' \
'RESULT,' \
'PARAMETER,' \
'TEST_PROGRAM,' \
'DEVICES_TIME,' \
'SUB_LOT'
sql = '''UPSERT "{}" ({}) values ({}) WITH PRIMARY KEY'''.format(other_table_name, cols_str, mark)
# print(item)
cursor.executemany(sql, other_item)
conn.commit()
print("==== OTHER Inserted successfully ====")
num = 10000 # 每次插入的条数
df_other['ENDING_TIME'] = pd.to_datetime(df_other['ENDING_TIME'], format='%Y-%m-%d')
list_data_other = []
for idx, row in df_other.iterrows():
val = [row['WAFER_ID'],
row['LOCATE_X'],
row['LOCATE_Y'],
row['RETEST'],
row['VALUE'],
row['TEST_NAME'],
row['PROGRAM'],
row['ENDING_TIME'],
row['LOT_ID']]
for x in range(len(val)):
if pd.isna(val[x]):
val[x] = None
if val[x] == '':
val[x] = None
list_data_other.append(val)
other_result = [list_data_other[i * num:(i + 1) * num] for i in range(int(len(list_data_other) / num) + 1) if
list_data_other[i * num:(i + 1) * num]]
for other_item in other_result:
write_other_item(other_item)
"""-------------------------------------------------Devices To HANA--------------------------------------------------"""
for key in prefix_dict_2:
df_devices = pd.DataFrame(prefix_dict_2[key], columns=prefix_header_2)
df_devices.to_csv(ntpath.join(out_file_dir, 'Automotive_' + key + '_Devices.csv'),
header=prefix_header_2, encoding='gbk', index=False)
devices_table_name = 'AUTOMOTIVE_' + key + '_DEVICES'
def write_devices_item(devices_item):
mark = '?,?,?,?,?,?,?,?,?,?,?,?'
cols_str = 'WAFERID,' \
'LOCATE_X,' \
'LOCATE_Y,' \
'SOFT_BIN,' \
'DEVICES_TIME,' \
'RETEST,' \
'HARD_BIN,' \
'PART_ID,' \
'TEST_PROGRAM,' \
'SITE_NUM,' \
'TEST_TIME,' \
'SUB_LOT'
sql = '''UPSERT "{}" ({}) values ({}) WITH PRIMARY KEY'''.format(devices_table_name, cols_str, mark)
# print(item)
cursor.executemany(sql, devices_item)
conn.commit()
print("==== DEVICES Inserted successfully ====")
num = 1000 # 每次插入的条数
df_devices['ENDING_TIME'] = pd.to_datetime(df_devices['ENDING_TIME'], format='%Y-%m-%d')
list_data_devices = []
for idx, row in df_devices.iterrows():
val = [row['WAFER_ID'],
row['LOCATE_X'],
row['LOCATE_Y'],
row['SOFT_BIN'],
row['ENDING_TIME'],
row['RETEST'],
row['HARD_BIN'],
row['PART_ID'],
row['PROGRAM'],
row['SITE_NUM'],
row['T_TIME'],
row['LOT_ID']]
for x in range(len(val)):
if pd.isna(val[x]):
val[x] = None
if val[x] == '':
val[x] = None
list_data_devices.append(val)
devices_result = [list_data_devices[i * num:(i + 1) * num] for i in range(int(len(list_data_devices) / num) + 1) if
list_data_devices[i * num:(i + 1) * num]]
for devices_item in devices_result:
write_devices_item(devices_item)
print("TableName :" + other_table_name)
print("TableName :" + devices_table_name)
"""----------------------------------------------------HANA Close----------------------------------------------------"""
try:
cursor.close()
conn.close()
except Exception as e:
print("Close HANA :" + str(e))
pass
比对删除库中数据
最新推荐文章于 2024-04-30 14:45:52 发布