1、warn sheet中第二行为标题行,第三行往后才是数据行,并且你是按照顺序填充的而不是按照名称进行填充的,实际上在warn sheet中上述对应的列并不是按照顺序进行排列的。
2、如果你能够按照递归将文件的变更差分出来,并且能够找到具体变更了哪几行,可以将winmerge的功能全部删除。
3、在ファイル差分 sheet中你将我在N列的内容给删除掉了,我只要你将A列内容从第二行开始填充即可,N列内容为公式不要删除。
4、_org_fm sheet中的内容,也是与warn sheet一样的处理但是这个标题行在第一行。
import os
import re
import subprocess
import pandas as pd
from openpyxl import load_workbook, Workbook
import difflib
import sys
import io
import time
import shutil
from pathlib import Path
from collections import defaultdict
import numpy as np
import traceback
# 设置系统标准输出为UTF-8
sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8', errors='replace')
sys.stderr = io.TextIOWrapper(sys.stderr.buffer, encoding='utf-8', errors='replace')
def recursive_compare_dirs(old_dir, new_dir):
"""
递归比较两个目录,获取变更文件和变更行号(基于新文件行号)
"""
changed_files = defaultdict(set)
print(f"递归比较目录: {old_dir} 和 {new_dir}")
# 使用更高效的os.scandir替代os.walk
for entry in os.scandir(new_dir):
if entry.is_dir():
# 递归处理子目录
sub_changed = recursive_compare_dirs(
os.path.join(old_dir, entry.name),
os.path.join(new_dir, entry.name)
)
for rel_path, lines in sub_changed.items():
changed_files[os.path.join(entry.name, rel_path)] = lines
elif entry.is_file():
rel_path = os.path.relpath(entry.path, new_dir)
old_path = os.path.join(old_dir, rel_path)
# 处理新增文件
if not os.path.exists(old_path):
try:
# 使用更高效的行数统计方法
with open(entry.path, 'rb') as f:
line_count = sum(1 for _ in f)
# 标记所有行为已变更(基于新文件行号)
changed_lines = set(range(1, line_count + 1))
changed_files[rel_path] = changed_lines
print(f"新增文件: {rel_path}, 行数: {line_count}")
except Exception as e:
print(f"读取新文件出错: {entry.path} - {e}")
continue
# 处理修改文件 - 使用文件大小和修改时间快速过滤
if (os.path.getsize(entry.path) == os.path.getsize(old_path) and
os.path.getmtime(entry.path) <= os.path.getmtime(old_path)):
continue
try:
# 读取文件内容
with open(old_path, 'r', encoding='utf-8', errors='ignore') as f_old:
old_content = f_old.readlines()
with open(entry.path, 'r', encoding='utf-8', errors='ignore') as f_new:
new_content = f_new.readlines()
# 比较内容差异并获取变更行号(基于新文件)
changed_lines = detect_changed_lines(old_content, new_content)
if changed_lines:
changed_files[rel_path] = changed_lines
print(f"变更文件: {rel_path}, 变更行数: {len(changed_lines)}")
except Exception as e:
print(f"比较文件出错: {rel_path} - {e}")
return dict(changed_files)
def detect_changed_lines(old_content, new_content):
"""
优化版:检测文件中的变更行号(基于新文件行号)
"""
changed_lines = set()
matcher = difflib.SequenceMatcher(None, old_content, new_content)
for opcode in matcher.get_opcodes():
if opcode[0] != 'equal':
# 获取新文件中的变更行号范围(索引+1转换为实际行号)
start = opcode[3] + 1 # 转换为基于1的行号
end = opcode[4] + 1 # 转换为基于1的行号
changed_lines.update(range(start, end))
return changed_lines
def get_changed_files_and_lines(old_dir, new_dir, winmerge_path, save_report=False):
"""使用WinMerge获取变更文件列表及具体变更行号(基于新文件行号)"""
# 创建临时目录存放报告
temp_dir = os.path.join(os.path.dirname(__file__), "temp")
os.makedirs(temp_dir, exist_ok=True)
report_file = os.path.join(temp_dir, "winmerge_diff_report.txt")
# 移除路径结尾的反斜杠
old_dir = old_dir.rstrip('\\')
new_dir = new_dir.rstrip('\\')
print(f"开始WinMerge比较: 旧目录={old_dir}, 新目录={new_dir}")
# WinMerge命令参数
cmd = [
f'"{winmerge_path}"',
'/u',
'/r',
'/minimize',
'/noprefs',
'/noninteractive',
f'/report="{report_file}"',
'/f "Text Report"',
f'"{old_dir}"',
f'"{new_dir}"'
]
full_cmd = ' '.join(cmd)
print(f"执行命令: {full_cmd}")
changed_files = {}
try:
# 运行WinMerge
result = subprocess.run(
full_cmd,
shell=True,
timeout=600,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
text=True,
encoding='utf-8',
errors='ignore'
)
# 检查WinMerge输出
print(f"WinMerge退出代码: {result.returncode}")
print(f"WinMerge stdout: {result.stdout[:300] if result.stdout else '空'}")
print(f"WinMerge stderr: {result.stderr[:300] if result.stderr else '空'}")
# 检查报告文件
report_content = ""
if os.path.exists(report_file):
print(f"找到报告文件: {report_file}")
try:
with open(report_file, 'r', encoding='utf-16') as f:
report_content = f.read()
print("成功以UTF-16读取报告文件")
except UnicodeError:
try:
with open(report_file, 'r', encoding='utf-8') as f:
report_content = f.read()
print("成功以UTF-8读取报告文件")
except Exception as e:
print(f"读取报告文件失败: {e}")
report_content = result.stdout
else:
print(f"未找到报告文件: {report_file}")
report_content = result.stdout
# 当报告内容可用时处理
diff_files = set()
if report_content.strip():
print(f"报告内容长度: {len(report_content)} 字符")
# 使用更高效的正则表达式匹配
pattern = re.compile(
r'(?:文件|Files|Comparing|File)\s+["\']?(.+?)["\']?\s+(?:和|and|are)\s+["\']?(.+?)["\']?\s+(?:不同|differ|different)',
re.IGNORECASE
)
# 查找所有匹配的文件路径
matches = pattern.findall(report_content)
print(f"找到 {len(matches)} 个匹配的差异文件")
for match in matches:
# 提取新版本文件路径(第二组)
new_file = match[1].strip()
# 规范化路径
new_file = os.path.normpath(new_file)
diff_files.add(new_file)
print(f"识别到差异文件: {new_file}")
# 批量处理文件比较
for file_path in diff_files:
# 获取相对路径
try:
rel_path = os.path.relpath(file_path, new_dir)
old_file_path = os.path.join(old_dir, rel_path)
if os.path.isfile(old_file_path) and os.path.isfile(file_path):
print(f"比较文件: {old_file_path} vs {file_path}")
with open(old_file_path, 'r', encoding='utf-8', errors='ignore') as f_old:
content_old = f_old.readlines()
with open(file_path, 'r', encoding='utf-8', errors='ignore') as f_new:
content_new = f_new.readlines()
# 获取变更行号(基于新文件)
changed_lines = detect_changed_lines(content_old, content_new)
if changed_lines:
changed_files[rel_path] = changed_lines
print(f"变更文件: {rel_path}, 变更行号: {sorted(changed_lines)[:5]}... (共{len(changed_lines)}行)")
except Exception as e:
print(f"处理文件 {file_path} 出错: {e}")
else:
print("报告内容为空,使用递归比较目录")
changed_files = recursive_compare_dirs(old_dir, new_dir)
except subprocess.TimeoutExpired:
print("WinMerge执行超时,使用递归比较")
changed_files = recursive_compare_dirs(old_dir, new_dir)
except Exception as e:
print(f"WinMerge处理出错: {e}")
changed_files = recursive_compare_dirs(old_dir, new_dir)
print(f"找到 {len(changed_files)} 个变更文件")
return changed_files
def detect_encoding(file_path):
"""优化版:检测文件编码"""
# 常见编码类型列表(优先级排序)
encodings = ['utf-8', 'utf-16', 'cp932', 'shift_jis', 'gbk', 'big5', 'latin1']
for encoding in encodings:
try:
with open(file_path, 'r', encoding=encoding) as f:
f.read(4096)
return encoding
except:
continue
return 'utf-8'
def update_excel_sheets(csv_folder, output_excel, changed_files):
"""优化版:更新Excel表格(修复填充问题)"""
try:
print(f"开始更新Excel: {output_excel}")
# 加载或创建Excel文件
if os.path.exists(output_excel):
print(f"加载现有Excel文件: {output_excel}")
wb = load_workbook(output_excel)
print(f"现有工作表: {wb.sheetnames}")
else:
print("创建新的Excel文件")
wb = Workbook()
# 删除默认创建的工作表
for sheet_name in wb.sheetnames:
wb.remove(wb[sheet_name])
# 创建所需的工作表
wb.create_sheet("ファイル差分")
wb.create_sheet("_org_fm")
wb.create_sheet("warn")
# === 功能1: 写入文件差分表 ===
print("\n=== 写入文件差分表 ===")
if "ファイル差分" not in wb.sheetnames:
wb.create_sheet("ファイル差分")
print("创建'ファイル差分'工作表")
ws_diff = wb["ファイル差分"]
# 清空工作表(保留标题)
if ws_diff.max_row > 1:
print(f"清除'ファイル差分'工作表数据 (现有行数: {ws_diff.max_row})")
ws_diff.delete_rows(2, ws_diff.max_row - 1)
if ws_diff.max_row == 0 or ws_diff["A1"].value != "文件路径":
ws_diff.append(["文件路径"])
print("添加'文件路径'标题")
# 写入变更文件
print(f"写入 {len(changed_files)} 个变更文件路径")
for file_path in changed_files.keys():
ws_diff.append([file_path])
# === 功能2: 复制func_met.csv到_org_fm工作表 ===
func_met_path = os.path.join(csv_folder, "func_met.csv")
if os.path.exists(func_met_path):
print("\n=== 处理func_met.csv ===")
if "_org_fm" not in wb.sheetnames:
wb.create_sheet("_org_fm")
print("创建'_org_fm'工作表")
ws_fm = wb["_org_fm"]
# 清空工作表
if ws_fm.max_row > 1:
print(f"清除'_org_fm'工作表数据 (现有行数: {ws_fm.max_row})")
ws_fm.delete_rows(2, ws_fm.max_row - 1)
# 读取并写入数据
encoding = detect_encoding(func_met_path)
print(f"检测到func_met.csv编码: {encoding}")
df_fm = pd.read_csv(func_met_path, encoding=encoding)
print(f"func_met.csv 列名: {df_fm.columns.tolist()}")
print(f"行数: {len(df_fm)}")
# 写入标题
if ws_fm.max_row == 0:
ws_fm.append(df_fm.columns.tolist())
# 批量写入数据
print("写入func_met.csv数据...")
for _, row in df_fm.iterrows():
ws_fm.append(row.tolist())
else:
print(f"未找到func_met.csv: {func_met_path}")
# === 功能3: 高效处理warn.csv ===
warn_path = os.path.join(csv_folder, "warn.csv")
if os.path.exists(warn_path):
print("\n=== 处理warn.csv ===")
if "warn" not in wb.sheetnames:
wb.create_sheet("warn")
print("创建'warn'工作表")
ws_warn = wb["warn"]
headers = ['Source', 'Line #', 'Level', 'Warn #', 'Message', 'WarnFilter(变更有无)']
ws_warn.append(headers)
print(f"添加标题行: {headers}")
else:
ws_warn = wb["warn"]
print(f"使用现有'warn'工作表")
# 读取CSV文件
encoding = detect_encoding(warn_path)
print(f"检测到warn.csv编码: {encoding}")
df_warn = pd.read_csv(warn_path, encoding=encoding)
print(f"warn.csv 列名: {df_warn.columns.tolist()}")
print(f"行数: {len(df_warn)}")
print("前5行数据:")
print(df_warn.head())
# 列名映射详情
column_mapping = {
'File': 'Source',
'Line': 'Line #',
'Grp': 'Level',
'Nbr': 'Warn #',
'Description': 'Message'
}
print(f"列名映射规则: {column_mapping}")
# 构建变更文件映射
file_map = {}
for file_path, changed_lines in changed_files.items():
# 文件名映射
filename = Path(file_path).name
if filename not in file_map:
file_map[filename] = set()
file_map[filename] = file_map[filename].union(changed_lines)
# 完整路径映射
file_map[file_path] = changed_lines
# 打印变更文件信息
print(f"变更文件数量: {len(changed_files)}")
for i, (file_path, lines) in enumerate(changed_files.items()):
if i < 5: # 只打印前5个文件
print(f"变更文件: {file_path}, 变更行数: {len(lines)}")
# 准备数据批量写入
rows_to_write = []
match_count = 0
for index, row in df_warn.iterrows():
# 创建映射行
new_row = {
'Source': row.get('File', row.get('Source', '')),
'Line #': row.get('Line', row.get('Line #', '')),
'Level': row.get('Grp', row.get('Level', '')),
'Warn #': row.get('Nbr', row.get('Warn #', '')),
'Message': row.get('Description', row.get('Message', '')),
'WarnFilter(变更有无)': 'No' # 默认值
}
# 计算变更标记
source = str(new_row['Source'])
line_num = new_row['Line #']
# 检查是否为有效行号
try:
line_num = int(line_num) if not pd.isna(line_num) else 0
except:
line_num = 0
if source and line_num > 0:
# 尝试文件名匹配
filename = Path(source).name
if filename in file_map and line_num in file_map[filename]:
new_row['WarnFilter(变更有无)'] = 'Yes'
match_count += 1
# 尝试完整路径匹配
elif source in file_map and line_num in file_map[source]:
new_row['WarnFilter(变更有无)'] = 'Yes'
match_count += 1
# 转换为有序列表
row_data = [
new_row['Source'],
new_row['Line #'],
new_row['Level'],
new_row['Warn #'],
new_row['Message'],
new_row['WarnFilter(变更有无)']
]
rows_to_write.append(row_data)
print(f"匹配到 {match_count} 条变更警告")
# 清空现有数据行
if ws_warn.max_row > 1:
print(f"清除'warn'工作表数据 (现有行数: {ws_warn.max_row})")
ws_warn.delete_rows(2, ws_warn.max_row - 1)
# 批量写入Excel
print(f"写入 {len(rows_to_write)} 行数据到'warn'工作表")
start_time = time.time()
# 逐行写入数据
for i, row_data in enumerate(rows_to_write):
ws_warn.append(row_data)
# 每1000行输出一次进度
if (i + 1) % 1000 == 0:
print(f"已写入 {i + 1} 行数据...")
end_time = time.time()
print(f"数据写入完成,耗时: {end_time - start_time:.2f}秒")
# 打印前5行写入的数据样本
print("\n写入的前5行数据示例:")
for row_idx in range(2, min(7, len(rows_to_write) + 2)):
row_data = []
for col_idx in range(1, 7):
cell_value = ws_warn.cell(row=row_idx, column=col_idx).value
# 截断过长的值以便显示
if cell_value and len(str(cell_value)) > 50:
row_data.append(str(cell_value)[:50] + "...")
else:
row_data.append(str(cell_value))
print(f"行 {row_idx}: {row_data}")
else:
print(f"未找到warn.csv: {warn_path}")
# === 保存Excel文件 ===
print("\n保存Excel文件...")
save_start = time.time()
# 创建备份
backup_path = None
if os.path.exists(output_excel):
timestamp = time.strftime("%Y%m%d_%H%M%S")
backup_dir = os.path.join(os.path.dirname(output_excel), "backups")
os.makedirs(backup_dir, exist_ok=True)
backup_filename = f"{Path(output_excel).stem}_backup_{timestamp}{Path(output_excel).suffix}"
backup_path = os.path.join(backup_dir, backup_filename)
shutil.copy2(output_excel, backup_path)
print(f"创建备份: {backup_path}")
# 保存Excel
try:
wb.save(output_excel)
save_duration = time.time() - save_start
print(f"Excel保存完成,耗时: {save_duration:.2f}秒")
print(f"最终文件: {output_excel}")
file_size = os.path.getsize(output_excel) / 1024 / 1024
print(f"文件大小: {file_size:.2f} MB")
# 验证保存结果
if os.path.exists(output_excel):
print("文件保存验证成功")
else:
print("!!! 文件保存后不存在,保存可能失败 !!!")
return True
except Exception as save_error:
print(f"保存Excel时出错: {save_error}")
if backup_path and os.path.exists(backup_path):
print(f"恢复备份文件: {backup_path}")
try:
shutil.copy2(backup_path, output_excel)
print("恢复成功")
except Exception as restore_error:
print(f"恢复备份失败: {restore_error}")
return False
except Exception as e:
print(f"\n!!! 更新Excel出错: {str(e)} !!!")
traceback.print_exc()
# 错误备份逻辑
timestamp = time.strftime("%Y%m%d_%H%M%S")
backup_name = f"{output_excel}.error_{timestamp}.xlsx"
print(f"尝试将错误状态Excel备份至: {backup_name}")
try:
wb.save(backup_name)
print("备份成功")
except Exception as backup_error:
print(f"备份失败: {backup_error}")
return False
def main():
# 配置路径(根据实际情况修改)
old_code_dir = r"E:\system\Desktop\项目所需文件\工具\ffff\code\old\GA_D82DD83D_00-00-07\mainline\spa_traveo\src"
new_code_dir = r"E:\system\Desktop\项目所需文件\工具\ffff\code\new\GA_D82DD83D_00-00-08\mainline\spa_traveo\src"
csv_folder = r"E:\system\Desktop\项目所需文件\工具\ffff\APL\Tool出力結果"
output_excel = r"E:\system\Desktop\项目所需文件\工具\ffff\GA_D24D_00-00-01(三回目)_QAC.xlsx"
winmerge_path = r"E:/App/WinMerge/WinMerge/WinMergeU.exe"
print("="*80)
print("开始文件比较...")
print(f"旧代码目录: {old_code_dir}")
print(f"新代码目录: {new_code_dir}")
print(f"CSV文件夹: {csv_folder}")
print(f"输出Excel: {output_excel}")
print(f"WinMerge路径: {winmerge_path}")
try:
start_time = time.time()
# 获取变更文件
changed_files = get_changed_files_and_lines(old_code_dir, new_code_dir, winmerge_path)
print(f"\n找到 {len(changed_files)} 个变更文件")
# 打印变更文件详情
print("\n变更文件详情:")
for i, (file_path, lines) in enumerate(changed_files.items()):
if i < 10: # 最多打印10个文件详情
print(f"{i+1}. {file_path}: 变更行数 {len(lines)}")
if len(lines) < 10: # 打印少于10行的变更行号
print(f" 行号: {sorted(lines)}")
# 更新Excel
print("="*80)
print("更新Excel表格...")
success = update_excel_sheets(csv_folder, output_excel, changed_files)
total_time = time.time() - start_time
print(f"\n总处理时间: {total_time:.2f}秒")
if success:
print(f"\n处理完成! 输出文件: {output_excel}")
else:
print("\n处理失败,请检查错误日志")
except Exception as e:
print(f"\n!!! 处理过程中发生严重错误: {str(e)} !!!")
traceback.print_exc()
print("="*80)
print("程序结束")
if __name__ == "__main__":
main()