帮我优化以下代码,使得代码简洁明了,提高运行效率,易于修改:import logging
import os
import re
from typing import Tuple, Optional, List, Set, Dict
import pandas as pd
from Data_Treating_Functions import select_folder
logger = logging.getLogger(__name__)
TEST_NAME_SITE = 2
PIN_SITE = 3
MEASURED_SITE = 7
UNIT_SITE = 8
FORCE_SITE = 11
PLATFORM_LIST = ["bcd", "ms", "d11", "55ll", "55ef", "40ll", "40ef", "ips", "soi"]
TEMPERATURE_MAP = {"ht85": 85, "ht125": 125, "ht150": 150, "lt-40": -40, "lt": -40, "nt": 25}
TIMING_TYPE2_PLATFORM = {"IPS40V", "IPS40VG1.5", "65RFSOI", "130RFSOI"}
FIXED_COLUMNS = ["Node", "Platform", "Type", "Corner", "Tem.", "Lot_ID", "Wafer_ID", "Die_ID"]
def extract_basic_info_for_htol(filename: str) -> Tuple[str, Optional[str], Optional[str], int, str, bool, Optional[str], Optional[str]]:
"""
从文件名中提取基本信息。
"""
if not filename or not isinstance(filename, str):
return None
try:
# Pre-compile all platform patterns for efficiency
platform_patterns = {
plat: re.compile(plat, re.IGNORECASE)
for plat in PLATFORM_LIST
}
parts = filename.replace(".txt", "").split("_")
# Determine product ID
product_id = wafer_id = lot_id = die_id = start_die = None
temperature, corner = 25, "TT"
wafer_flag = False
for i, part in enumerate(parts):
part_lower = part.casefold()
# 跳过平台标识部分
if any(pattern.search(part_lower) for pattern in platform_patterns.values()):
continue
# 处理批次号
elif part_lower.startswith(('cl', 'al')):
lot_id = part_lower.upper()
if i > 0:
product_id = parts[i - 1].upper()
# 处理晶圆号
elif part_lower.startswith('#'):
wafer_id = f"#{part.split('#')[1].zfill(2)}"
# 处理温度标识
elif part_lower.startswith(('ht', 'nt', 'lt')) and part_lower not in {'htol', 'hts'}:
temperature = TEMPERATURE_MAP.get(part_lower, 25)
# 处理工艺角
elif part_lower in {"tt", "ff", "ss", "sf", "fs"}:
corner = part_lower.upper()
# 处理坐标
elif '-' in part_lower:
if part_lower.split('-')[0].isdigit() and part_lower.split('-')[1].isdigit():
wafer_flag = True
start_die = part_lower.split('-')[0]
elif part_lower.isdigit():
die_id = part_lower
if product_id is None:
product_id = input("请输入product:")
return product_id, lot_id, wafer_id, temperature, corner, wafer_flag, die_id, start_die
except Exception as e:
print(f"Error processing filename: {str(e)}")
return None
def find_string_in_file(file_path, target_string):
"""
Finds the line numbers in a file where a target string occurs.
Args:
file_path (str): The path to the file to be searched.
target_string (str): The string to be searched for.
Returns:
list: A list of line numbers where the target string occurs in the file.
"""
# 定义一个空列表,用于存放目标字符串在文件中的行号
line_numbers = []
total_number = 0
# 以只读模式打开文件,并设置编码为utf-8
with open(file_path, "r", encoding="utf-8") as file:
# 遍历文件的每一行,并从1开始计数,将行号减1,将结果添加到line_numbers列表中
for line_number, line in enumerate(file, start=1):
total_number += 1
if target_string in line:
line_numbers.append(line_number - 1)
line_numbers.append(total_number)
# 返回line_numbers列表
return line_numbers
def get_bin_value(contents_list: List[str]) -> int:
"""
从测试数据内容中提取Bin和坐标信息
Args:
contents_list: 按行分割的文件内容列表
Returns:
Tuple[Bin, X, Y]:
- Bin: 始终有默认值0
- X/Y: 可能为None(当未找到时)
Raises:
ValueError: 当数据格式异常时抛出
"""
# 初始化默认值
bin_val = 0
for line in contents_list:
line_lower = line.casefold()
# 情况1:标准格式(含Site/Bin/X_Coord)
if 'site' in line_lower and 'bin' in line_lower:
try:
data = contents_list[contents_list.index(line) + 2].split()
bin_val = 1 if data[2].casefold() == 'n/a' else int(data[2])
break
except (IndexError, ValueError):
pass
return bin_val
def find_test_items(
contents_list: List[str],
mode: str = "dc",
start_row: int = 0,
custom_stop_conditions: Optional[Set[str]] = None
) -> Tuple[List[int], List[str], int]:
"""
严格区分DC/FT模式的测试项查找
Args:
contents_list: 文件内容行列表
mode: 'dc' 或 'ft'
start_row: 行号偏移(仅FT模式有效)
custom_stop_conditions: 自定义DC终止条件(覆盖默认)
Returns:
tuple: (
测试项行号列表,
测试项名称列表,
停止行号(DC模式有效,FT模式返回0)
)
"""
line_numbers = []
test_items = []
stop_line = 0
for line_num, line in enumerate(contents_list):
# 通用处理:提取所有<>标记的测试项
if "<" in line and ">" in line:
adjusted_line_num = line_num + (start_row if mode == "ft" else 0)
line_numbers.append(adjusted_line_num)
test_items.extend(re.compile(r"<(.*?)>").findall(line))
# DC模式专属:检测终止条件
if mode == "dc" and not stop_line:
line_lower = line.lower()
stop_conditions = custom_stop_conditions or {"pattern.*1st", "site failed tests/executed tests"}
if any(re.search(pattern, line_lower) for pattern in stop_conditions):
stop_line = line_num
line_numbers.append(line_num)
break # DC模式找到终止条件后立即停止
# FT模式需返回所有测试项(不提前终止)
return line_numbers, test_items, (stop_line if mode == "dc" else 0)
def process_dc_test(contents: List[str], die_num: int, bin_val: int,
platform: str, cell_type: str, corner: str, temp: int, lot_id: str, wafer_id: str) -> pd.DataFrame:
"""
处理DC测试数据
"""
dc_lines, dc_items, stop_line = find_test_items(contents, mode="dc")
if not dc_lines:
return pd.DataFrame()
dc_data = []
for j in range(len(dc_lines) - 1):
start = max(0, dc_lines[j] + 1)
end = min(len(contents), dc_lines[j + 1])
sheet_name = dc_items[j]
test_data = {
"test_name": [], "pin": [], "Value": [], "Unit": [], "force": [],
"Die_ID": die_num, "X": None, "Y": None, "Bin": bin_val,
"Platform": platform, "Type": cell_type, "Corner": corner,
"Tem.": temp, "Lot_ID": lot_id, "Wafer_ID": wafer_id,
"sheet": sheet_name
}
for line in contents[start:end]:
line_lower = line.lower()
if 'alarm' in line_lower or not line.strip():
continue
parts = line.split()
try:
test_data["test_name"].append(parts[TEST_NAME_SITE])
test_data["pin"].append(parts[PIN_SITE])
if "N/A" in line:
test_data["Value"].append(parts[MEASURED_SITE - 1])
test_data["Unit"].append(parts[UNIT_SITE - 1])
force_idx = FORCE_SITE - (2 if "(F)" not in line and "(A)" not in line else 1)
voltage = format(float(parts[force_idx]), ".2f")
test_data["force"].append(f"{voltage}{parts[force_idx + 1]}")
else:
test_data["Value"].append(parts[MEASURED_SITE])
test_data["Unit"].append(parts[UNIT_SITE])
force_idx = FORCE_SITE + (1 if "(F)" in line or "(A)" in line else 0)
voltage = format(float(parts[force_idx]), ".2f")
test_data["force"].append(f"{voltage}{parts[force_idx + 1]}")
except (IndexError, ValueError) as e:
logger.warning(f"解析DC测试数据出错: {e}, 行内容: {line}")
# 添加到总数据
if test_data["test_name"]:
dc_data.append(pd.DataFrame(test_data))
return pd.concat(dc_data, ignore_index=True) if dc_data else pd.DataFrame()
def _process_timing_test(contents: List[str], *args) -> pd.DataFrame:
"""处理时序测试数据,确保所有列表长度一致"""
# 初始化数据结构
test_data = {
"test_name": [], "pin": [], "Value": [], "Unit": [], "force": [],
"Die_ID": [], "X": [], "Y": [], "Bin": [],
"Platform": [], "Type": [], "Corner": [],
"Tem.": [], "Lot_ID": [], "Wafer_ID": [],
"sheet": []
}
test_seq = 0
for line in contents:
line_lower = line.lower()
if 'alarm' in line_lower or not line.strip():
continue
if "timing_test" in line_lower or "measured" in line_lower or "pattern" in line_lower or "Site" in line:
continue
if "Freq" in line or "Power" in line:
parts = line.split()
try:
# 记录当前数据条目数
current_count = len(test_data["test_name"])
# 添加测试数据
if args[4] in TIMING_TYPE2_PLATFORM:
if "Freq" in line:
if test_seq == 4:
test_seq = 1
else:
test_seq += 1
test_mode = ''.join(re.findall(r"\D", parts[TEST_NAME_SITE].split("_")[0]))
test_voltage = parts[TEST_NAME_SITE].split("_")[1]
test_data["test_name"].append(f"{test_mode}{str(test_seq)}_{test_voltage}")
else:
test_data["test_name"].append(parts[TEST_NAME_SITE])
test_data["pin"].append(parts[PIN_SITE])
test_data["Value"].append(parts[MEASURED_SITE])
test_data["Unit"].append(parts[UNIT_SITE])
if "(F)" in line or "(A)" in line:
voltage = format(float(parts[FORCE_SITE + 1]), ".2f")
test_data["force"].append(f"{voltage}{parts[FORCE_SITE + 2]}")
else:
voltage = format(float(parts[FORCE_SITE]), ".2f")
test_data["force"].append(f"{voltage}{parts[FORCE_SITE + 1]}")
# 添加固定信息(确保与其他列表长度一致)
test_data["Die_ID"].append(args[0])
test_data["X"].append(args[1])
test_data["Y"].append(args[2])
test_data["Bin"].append(args[3])
test_data["Platform"].append(args[4])
test_data["Type"].append(args[5])
test_data["Corner"].append(args[6])
test_data["Tem."].append(args[7])
test_data["Lot_ID"].append(args[8])
test_data["Wafer_ID"].append(args[9])
test_data["sheet"].append("Timing_Test")
# print(test_data)
# 验证长度一致性
assert all(len(lst) == current_count + 1 for lst in test_data.values()), "列表长度不一致"
except (IndexError, ValueError) as e:
logger.warning(f"解析Timing测试数据出错: {e}, 行内容: {line}")
# 如果出错,移除最后添加的不完整数据
for key in test_data:
if len(test_data[key]) > current_count:
test_data[key].pop()
return pd.DataFrame(test_data) if test_data["test_name"] else pd.DataFrame()
def _process_function_test(contents: List[str], *args) -> Tuple[pd.DataFrame, pd.DataFrame]:
"""处理功能测试数据,确保电压值与测试结果正确对应"""
# 初始化数据结构
test_records = [] # 存储每条测试记录
levelshift_records = [] # 存储LevelShift记录
current_voltage = None
levelshift_found = False
test_list, pattern_list, result_list, unit_list, force_list = [[] for i in range(5)]
for num, line in enumerate(contents):
line_lower = line.lower()
if 'alarm' in line_lower or 'error' in line_lower:
continue
# 处理电压值(先于测试结果获取)
if ("function_test" in line_lower or "Function" in line) and '>-' in line:
try:
voltage_part = line.split()[1].split("@")[1]
voltage_val = format(float(re.split(re.compile(r"[V|mV]", re.IGNORECASE), voltage_part)[0]), ".2f")
unit = re.compile(r"[V|mV]", re.IGNORECASE).search(voltage_part).group()
force_list.append(f"{voltage_val}{unit}")
except (IndexError, ValueError, AttributeError) as e:
logger.warning(f"解析电压数据出错: {e}, 行内容: {line}")
continue
# 处理测试结果
if ("function_test" in line_lower or "Function" in line) and '>-' not in line:
parts = line.split()
try:
test_list.append(parts[TEST_NAME_SITE])
pattern_list.append(parts[PIN_SITE].split(".pat")[0].split("_module")[0])
result_list.append("Pass" if "N/A" in line else parts[6])
except (IndexError, ValueError) as e:
logger.warning(f"解析Function测试数据出错: {e}, 行内容: {line}")
# 处理LevelShift数据
if 'levelshift' in line_lower and not levelshift_found:
levelshift_found = True
for line_2 in contents[num + 3:]:
line_2 = line_2.strip()
if not line_2 or "Pattern" in line_2 or 'ALARM' in line_2:
continue
if 'Site' in line_2 and 'Failed' in line_2:
break
parts = line_2.split()
try:
ls_record = {
"Coord_X": parts[1],
"Coord_Y": parts[2],
"vdin1": round(float(parts[4]) * (0.001 if "m" == parts[5] else 1), 2),
"vih": round(float(parts[5 if "m" != parts[5] else 6]) *
(0.001 if "m" == parts[6 if "m" != parts[5] else 7] else 1), 2),
"result": "Fail" if parts[-1] == 'Alarm' else parts[-1],
"Die_ID": args[0],
"X": args[1],
"Y": args[2],
"Bin": args[3],
"Platform": args[4],
"Type": args[5],
"Corner": args[6],
"Tem.": args[7],
"Lot_ID": args[8],
"Wafer_ID": args[9],
"sheet": "LevelShift"
}
levelshift_records.append(ls_record)
except (IndexError, ValueError) as e:
logger.warning(f"解析LevelShift数据出错: {e}, 行内容: {line_2}")
# 转换为DataFrame
test_records = {
"test_name": test_list,
"pin": pattern_list,
"result": result_list,
"force": force_list,
"Die_ID": [args[0]] * len(result_list), "X": [args[1]] * len(result_list), "Y": [args[2]] * len(result_list),
"Bin": [args[3]] * len(result_list),
"Platform": [args[4]] * len(result_list), "Type": [args[5]] * len(result_list), "Corner": [args[6]] * len(result_list),
"Tem.": [args[7]] * len(result_list), "Lot_ID": [args[8]] * len(result_list), "Wafer_ID": [args[9]] * len(result_list),
"sheet": ["Function_Test"] * len(result_list), "Result": ["Result"] * len(result_list)
}
func_df = pd.DataFrame(test_records) if test_records else pd.DataFrame()
levelshift_df = pd.DataFrame(levelshift_records) if levelshift_records else pd.DataFrame()
return func_df, levelshift_df
def _process_retention_test(contents: List[str], *args) -> pd.DataFrame:
"""处理保持测试数据,确保所有列表长度一致"""
test_data = {
"test_name": [], "pin": [], "result": [], "force": [],
"Die_ID": [], "X": [], "Y": [], "Bin": [],
"Platform": [], "Type": [], "Corner": [],
"Tem.": [], "Lot_ID": [], "Wafer_ID": [],
"sheet": []
}
force_voltage = None
force_count = 0
for line in contents:
line_lower = line.lower()
if 'alarm' in line_lower:
continue
if "retention" in line_lower and '>-' not in line:
parts = line.split()
try:
# 记录当前数据条目数
current_count = len(test_data["test_name"])
# 添加测试数据
test_data["test_name"].append(parts[TEST_NAME_SITE])
pin = parts[PIN_SITE].split(".pat")[0].split("_module")[0]
test_data["pin"].append(pin)
test_data["result"].append("Pass" if "N/A" in line else parts[6])
# 添加固定信息
test_data["Die_ID"].append(args[0])
test_data["X"].append(args[1])
test_data["Y"].append(args[2])
test_data["Bin"].append(args[3])
test_data["Platform"].append(args[4])
test_data["Type"].append(args[5])
test_data["Corner"].append(args[6])
test_data["Tem."].append(args[7])
test_data["Lot_ID"].append(args[8])
test_data["Wafer_ID"].append(args[9])
test_data["sheet"].append("retention")
# 验证长度一致性
# assert all(len(lst) == current_count + 1 for lst in test_data.values()), "列表长度不一致"
except (IndexError, ValueError) as e:
logger.warning(f"解析Retention测试数据出错: {e}, 行内容: {line}")
# 如果出错,移除最后添加的不完整数据
for key in test_data:
if len(test_data[key]) > current_count:
test_data[key].pop()
if "retention" in line_lower and '>-' in line:
try:
voltage_part = line.split()[1].split("@")[1]
voltage = format(float(re.split(re.compile(r"[V|mV]", re.IGNORECASE), voltage_part)[0]), ".2f")
unit = re.compile(r"[V|mV]", re.IGNORECASE).search(voltage_part).group()
force_voltage = [f"{voltage}{unit}"] * 3
test_data["force"].extend(force_voltage)
except (IndexError, ValueError, AttributeError) as e:
logger.warning(f"解析Retention电压数据出错: {e}, 行内容: {line}")
return pd.DataFrame(test_data) if test_data["test_name"] else pd.DataFrame()
def process_ft_test(contents: List[str], die_num: int, bin_val: int,
platform: str, cell_type: str, corner: str, temp: int, lot_id: str, wafer_id: str) -> Dict[
str, pd.DataFrame]:
"""
处理功能测试数据,返回包含不同类型测试数据的字典
"""
ft_data = {
"timing": pd.DataFrame(),
"function": pd.DataFrame(),
"retention": pd.DataFrame(),
"levelshift": pd.DataFrame()
}
# 获取功能测试起始行
_, _, function_start = find_test_items(contents, mode="dc")
if function_start == 0:
return ft_data
# 处理FT测试项
ft_lines, ft_items, _ = find_test_items(contents[function_start:], mode="ft", start_row=function_start)
ft_lines.append(len(contents))
for j in range(len(ft_lines) - 1):
start = max(0, ft_lines[j] + 1)
end = min(len(contents), ft_lines[j + 1])
sheet_name = ft_items[j]
ft_contents = contents[start:end]
if sheet_name == "Timing_Test":
ft_data["timing"] = _process_timing_test(ft_contents, die_num, None, None, bin_val,
platform, cell_type, corner, temp, lot_id, wafer_id)
if "Function" in sheet_name:
func_data, levelshift_data = _process_function_test(ft_contents, die_num, None, None, bin_val,
platform, cell_type, corner, temp, lot_id, wafer_id)
ft_data["function"] = func_data
if not levelshift_data.empty:
ft_data["levelshift"] = levelshift_data
elif sheet_name == "retention":
ft_data["retention"] = _process_retention_test(ft_contents, die_num, None, None, bin_val,
platform, cell_type, corner, temp, lot_id, wafer_id)
return ft_data
def process_die_data(contents: List[str], die_num: int, platform: str, cell_type: str,
corner: str, temp: int, lot_id: str, wafer_id: str):
"""优化单个die的数据处理"""
bin_val = get_bin_value(contents)
# 处理DC测试数据
dc_data = process_dc_test(contents, die_num, bin_val,
platform, cell_type, corner, temp, lot_id, wafer_id)
# 处理FT测试数据
ft_data = process_ft_test(contents, die_num, bin_val,
platform, cell_type, corner, temp, lot_id, wafer_id)
return {
"die_info": (die_num, None, None, bin_val),
"dc_data": dc_data,
"ft_data": ft_data
}
def _add_fixed_columns(df: pd.DataFrame, platform: str, cell_type: str, corner: str,
temperature: int, lot_id: str, wafer_id: str) -> pd.DataFrame:
"""添加固定列到DataFrame"""
df.insert(0, "Platform", platform)
df.insert(1, "Type", cell_type)
df.insert(2, "Corner", corner)
df.insert(3, "Tem.", temperature)
df.insert(4, "Lot_ID", lot_id)
df.insert(5, "Wafer_ID", wafer_id)
return df
def unit_conversion(value: str, from_unit: str, to_unit: str = None) -> float:
"""
通用单位转换函数,支持电流、电压、频率等单位的相互转换
Args:
value: 要转换的数值字符串
from_unit: 原始单位(不区分大小写)
to_unit: 目标单位(不区分大小写),默认为None表示自动选择常用单位
Returns:
转换后的数值
Raises:
ValueError: 如果输入单位不支持或转换不兼容
"""
# 定义单位转换基准和转换因子
UNIT_CONVERSION = {
# 电流单位
'a': 1e6, # 安培 -> 微安
'ma': 1e3, # 毫安 -> 微安
'ua': 1, # 微安
'na': 1e-3, # 纳安 -> 微安
'pa': 1e-6, # 皮安 -> 微安
'fa': 1e-9, # 飞安 -> 微安
# 电压单位
'v': 1e3, # 伏特 -> 毫伏
'mv': 1, # 毫伏
'uv': 1e-3, # 微伏 -> 毫伏
'kv': 1e6, # 千伏 -> 毫伏
# 频率单位
'hz': 1e-6, # 赫兹 -> 兆赫
'khz': 1e-3, # 千赫 -> 兆赫
'mhz': 1, # 兆赫
'ghz': 1e3, # 千兆赫 -> 兆赫
}
# 自动选择目标单位
if to_unit is None:
if any(u in from_unit.lower() for u in ['a', 'ma', 'ua', 'na', 'pa', 'fa']):
to_unit = 'ua' # 电流默认转微安
elif any(u in from_unit.lower() for u in ['v', 'mv', 'uv', 'kv']):
to_unit = 'mv' # 电压默认转毫伏
elif any(u in from_unit.lower() for u in ['hz', 'khz', 'mhz', 'ghz']):
to_unit = 'mhz' # 频率默认转兆赫
from_unit = from_unit.lower()
to_unit = to_unit.lower()
# 验证单位是否支持
if from_unit not in UNIT_CONVERSION or to_unit not in UNIT_CONVERSION:
raise ValueError(f"不支持的转换单位: {from_unit} -> {to_unit}")
# 检查单位类型是否一致
from_type = 'current' if 'a' in from_unit else 'voltage' if 'v' in from_unit else 'frequency'
to_type = 'current' if 'a' in to_unit else 'voltage' if 'v' in to_unit else 'frequency'
if from_type != to_type:
raise ValueError(f"单位类型不匹配: 不能将{from_type}单位转换为{to_type}单位")
try:
num_value = float(value)
except ValueError:
raise ValueError(f"无效的数值格式: {value}")
# 执行转换: 先转为基础单位,再转为目标单位
base_value = num_value * UNIT_CONVERSION[from_unit]
converted_value = base_value / UNIT_CONVERSION[to_unit]
return converted_value
def write_dc_test_data(df: pd.DataFrame, writer: pd.ExcelWriter) -> None:
"""写入DC测试数据,自动处理单位转换和结果转置"""
# 预定义列名常量
VALUE_COL = "Value"
UNIT_COL = "Unit"
PIN_COL = "Pin"
for name, group in df.groupby("sheet"):
group = group.copy() # 避免SettingWithCopyWarning
# 单位转换(向量化操作替代apply)
unit = "mV" if 'V' in group[UNIT_COL].iloc[0] else "uA"
group[VALUE_COL] = group.apply(
lambda row: unit_conversion(row[VALUE_COL], row[UNIT_COL]),
axis=1
)
group[UNIT_COL] = unit
# 动态生成Pin列名
test_count = group["test_name"].nunique()
pin_count = group["pin"].nunique()
die_count = group["Die_ID"].value_counts().iloc[0]
if test_count == 1 and pin_count == die_count:
group[PIN_COL] = group["pin"]
elif name == "Stdby_VDDT":
group[PIN_COL] = group["pin"] + "_" + group["force"]
elif test_count == 2 and pin_count > test_count:
for test_name, test_value in group.groupby("test_name"):
test_value["Pin"] = test_value.apply(lambda row: row["pin"], axis=1)
test_value.to_excel(writer, sheet_name=test_name, index=False)
continue
else:
group[PIN_COL] = group["test_name"] + "_" + group["pin"]
# 写入数据
output_cols = FIXED_COLUMNS + [UNIT_COL, PIN_COL, VALUE_COL]
group[output_cols].to_excel(writer, sheet_name=f"{name}_Summary", index=False)
# 转置数据(pivot性能优于手动处理)
pivot_df = group.pivot_table(
index=FIXED_COLUMNS + [UNIT_COL],
columns=PIN_COL,
values=VALUE_COL,
aggfunc='mean' # 避免重复值报错
).reset_index()
pivot_df.to_excel(writer, sheet_name=name, index=False)
def write_fuc_test_data(df: pd.DataFrame, writer: pd.ExcelWriter) -> None:
"""写入功能测试数据,自动处理结果列和转置"""
RESULT_COL = "result"
PIN_COL = "Pin"
for name, group in df.groupby("sheet"):
group = group.copy()
# 统一生成Pin列名
if "force" in group.columns:
group[PIN_COL] = group["force"]
else:
group[PIN_COL] = group["pin"] + "_" + group.get("force", "")
# 处理结果列
output_cols = FIXED_COLUMNS + ["Result", PIN_COL, RESULT_COL]
group[output_cols].to_excel(writer, sheet_name=f"{name}_Summary", index=False)
# 转置数据(自动排序Pin列)
pivot_df = group.pivot_table(
index= FIXED_COLUMNS + ["Result"],
columns=PIN_COL,
values=RESULT_COL,
aggfunc=lambda x: x.iloc[0] # 取第一个非空值
)
pivot_df = pivot_df.reindex(
columns=sorted(pivot_df.columns, reverse=True), # Pin列倒序排列
fill_value="N/A"
).reset_index()
pivot_df.to_excel(writer, sheet_name=name, index=False)
def write_Stdcell_timing(df: pd.DataFrame, writer: pd.ExcelWriter) -> None:
"""处理Timing测试数据,分离Frequency和Power模式"""
VOLTAGE_SUFFIX = "V"
# 预处理列
df[["Pin", "RawVoltage"]] = df["test_name"].str.split("_", n=1, expand=True)
df["Voltage"] = df["RawVoltage"].str.extract(r"(\d+\.?\d*)")[0].astype(float).map(lambda x: f"{x:.2f}") + VOLTAGE_SUFFIX
df["Mode"] = df["Pin"].str.extract(r"([A-Z][a-z]+)")[0] # 提取首字母大写的模式名
# 按模式分组处理
for mode, group in df.groupby("Mode"):
group = group.copy()
unit = "MHz" if "hz" in group["Unit"].iloc[0].lower() else "uA"
# 批量单位转换
group["Value"] = group.apply(
lambda row: unit_conversion(row["Value"], row["Unit"]),
axis=1
)
group["Unit"] = unit
# 写入数据
sheet_name = f"Timing_Test_{mode}"
output_cols = FIXED_COLUMNS + ["Unit", "Pin", "Voltage", "Value"]
group[output_cols].to_excel(writer, sheet_name=f"{sheet_name}_Summary", index=False)
# 合并Pin和Voltage作为新列名
group["Pin"] = group["Pin"] + "_" + group["Voltage"]
pivot_df = group.pivot_table(
index= FIXED_COLUMNS + ["Unit"],
columns="Pin",
values="Value"
).reset_index()
pivot_df.to_excel(writer, sheet_name=sheet_name, index=False)
def write_Stdcell_retention(
df: pd.DataFrame,
writer: pd.ExcelWriter
) -> None:
"""写入Retention测试数据,自动过滤重复Die"""
if df.empty:
return # 提前返回空数据
# 统一列名处理
df["Pin"] = df["pin"]
df["Voltage"] = df["force"]
# 写入精简列
output_cols = FIXED_COLUMNS + ["Pin", "Voltage", "result"]
df[output_cols].to_excel(writer, sheet_name="retention", index=False)
data_dir = select_folder()
info_dict = {'FolderPath': data_dir, 'Platform': 'SOI200V', 'CellType': '7T'}
platform = info_dict["Platform"]
cell_type = info_dict["CellType"]
files = [f for f in os.listdir(data_dir) if f.endswith(".txt")]
node = input("请输入测试节点:")
# 创建输出目录
output_dir = os.path.join(data_dir, "Data_Extraction")
os.makedirs(output_dir, exist_ok=True)
# 4. 准备数据容器
summary_data = []
dc_data = []
ft_data = {
"timing": [],
"function": [],
"retention": [],
"levelshift": []
}
for file in files:
file_path = os.path.join(data_dir, file)
# 2. 提取基本信息
product_id, lot_id, wafer_id, temperature, corner, wafer_flag, die_id, start_die = extract_basic_info_for_htol(file)
print(f"提取基本信息: {lot_id=}, {wafer_id=}, {corner=}, {temperature=}, {die_id=}")
if wafer_flag:
# 3. 获取die范围
die_ranges = list(find_string_in_file(file_path, "Device#:"))
if len(die_ranges) < 2:
logger.warning(f"文件中未找到有效的die数据: {file}")
with open(file_path, 'r', encoding='utf-8') as f:
contents = f.readlines()
for die_num, (start, end) in enumerate(zip(die_ranges[:-1], die_ranges[1:])):
die_contents = contents[max(1, start):min(len(contents), end)]
result = process_die_data(contents, die_num + int(start_die), platform, cell_type, corner, temperature, lot_id, wafer_id)
summary_data.append(result["die_info"])
if not result["dc_data"].empty:
dc_data.append(result["dc_data"])
for key in ft_data:
if not result["ft_data"][key].empty:
ft_data[key].append(result["ft_data"][key])
else:
with open(file_path, "r", encoding="utf-8") as f:
contents = f.readlines()
result = process_die_data(contents, int(die_id), platform, cell_type, corner, temperature, lot_id, wafer_id)
summary_data.append(result["die_info"])
if not result["dc_data"].empty:
dc_data.append(result["dc_data"])
for key in ft_data:
if not result["ft_data"][key].empty:
ft_data[key].append(result["ft_data"][key])
summary_df = pd.DataFrame(summary_data, columns=["Die_ID", "X", "Y", "Bin"])
summary_df = _add_fixed_columns(summary_df, platform, cell_type, corner, temperature, lot_id, wafer_id)
dc_df = pd.concat(dc_data, ignore_index=True) if dc_data else pd.DataFrame()
ft_timing_df = pd.concat(ft_data["timing"], ignore_index=True) if ft_data["timing"] else pd.DataFrame()
ft_func_df = pd.concat(ft_data["function"], ignore_index=True) if ft_data["function"] else pd.DataFrame()
ft_retention_df = pd.concat(ft_data["retention"], ignore_index=True) if ft_data["retention"] else pd.DataFrame()
levelshift_df = pd.concat(ft_data["levelshift"], ignore_index=True) if ft_data["levelshift"] else pd.DataFrame()
# 7. 处理重复die
duplicate_mask = summary_df.duplicated(subset=["Die_ID"], keep="last")
duplicate_dies = summary_df[duplicate_mask]["Die_ID"].tolist()
summary_df = summary_df[~duplicate_mask].sort_values(by='Die_ID')
summary_df.insert(0, 'Node', node)
if not dc_df.empty:
dc_df = dc_df[~dc_df["Die_ID"].isin(duplicate_dies)].sort_values(by='Die_ID')
dc_df.insert(0, "Node", node)
if not ft_timing_df.empty:
ft_timing_df = ft_timing_df[~ft_timing_df["Die_ID"].isin(duplicate_dies)].sort_values(by='Die_ID')
ft_timing_df.insert(0, "Node", node)
if not ft_func_df.empty:
ft_func_df = ft_func_df[~ft_func_df["Die_ID"].isin(duplicate_dies)].sort_values(by='Die_ID')
ft_func_df.insert(0, "Node", node)
if not ft_retention_df.empty:
ft_retention_df = ft_retention_df[~ft_retention_df["Die_ID"].isin(duplicate_dies)].sort_values(by='Die_ID')
ft_retention_df.insert(0, "Node", node)
if not levelshift_df.empty:
levelshift_df = levelshift_df[~levelshift_df["Die_ID"].isin(duplicate_dies)].sort_values(by='Die_ID')
levelshift_df.insert(0, "Node", node)
# 8. 写入Excel
output_filename = f"{platform}_{product_id}_{lot_id}_Stdcell{cell_type}_{wafer_id}_{corner}_{temperature}℃_{node}H.xlsx"
output_path = os.path.join(output_dir, output_filename)
with pd.ExcelWriter(output_path, engine='xlsxwriter') as writer:
summary_df.to_excel(writer, sheet_name='Summary', index=False)
if not dc_df.empty:
write_dc_test_data(dc_df, writer)
if not ft_timing_df.empty:
write_Stdcell_timing(ft_timing_df, writer)
if not ft_func_df.empty:
write_fuc_test_data(ft_func_df, writer)
if not ft_retention_df.empty:
write_Stdcell_retention(ft_retention_df, writer)
if not levelshift_df.empty:
levelshift_cols = FIXED_COLUMNS + ["Coord_X", "Coord_Y", "vdin1", "vih", "result"]
output_filename = f"{platform}_{product_id}_{lot_id}_Stdcell{cell_type}_{wafer_id}_{corner}_{temperature}℃_{node}H_levelshift.csv"
levelshift_df[levelshift_cols].to_csv(os.path.join(output_dir, output_filename), index=False)