import os
import re
import csv
from openpyxl import load_workbook, Workbook
import difflib
import sys
import io
import time
import shutil
from pathlib import Path
from collections import defaultdict
import traceback
# 设置系统标准输出为UTF-8
sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8', errors='replace')
sys.stderr = io.TextIOWrapper(sys.stderr.buffer, encoding='utf-8', errors='replace')
def recursive_compare_dirs(old_dir, new_dir):
"""
递归比较两个目录,获取变更文件和变更行号(基于新文件行号)
"""
changed_files = defaultdict(set)
# 使用更高效的os.scandir替代os.walk
for entry in os.scandir(new_dir):
if entry.is_dir():
# 递归处理子目录
subdir_path = os.path.join(old_dir, entry.name)
if os.path.exists(subdir_path):
sub_changed = recursive_compare_dirs(
subdir_path,
os.path.join(new_dir, entry.name)
)
for rel_path, lines in sub_changed.items():
changed_files[os.path.join(entry.name, rel_path)] = lines
elif entry.is_file():
rel_path = os.path.relpath(entry.path, new_dir)
old_path = os.path.join(old_dir, rel_path)
# 处理新增文件
if not os.path.exists(old_path):
try:
# 使用更高效的行数统计方法
with open(entry.path, 'rb') as f:
line_count = sum(1 for _ in f)
# 标记所有行为已变更(基于新文件行号)
changed_lines = set(range(1, line_count + 1))
changed_files[rel_path] = changed_lines
print(f"新增文件: {rel_path}, 行数: {line_count}")
except Exception as e:
print(f"读取新文件出错: {entry.path} - {e}")
continue
# 处理修改文件 - 使用文件大小和修改时间快速过滤
if (os.path.getsize(entry.path) == os.path.getsize(old_path) and
os.path.getmtime(entry.path) <= os.path.getmtime(old_path)):
continue
try:
# 读取文件内容
with open(old_path, 'r', encoding='utf-8', errors='ignore') as f_old:
old_content = f_old.readlines()
with open(entry.path, 'r', encoding='utf-8', errors='ignore') as f_new:
new_content = f_new.readlines()
# 比较内容差异并获取变更行号(基于新文件)
changed_lines = detect_changed_lines(old_content, new_content)
if changed_lines:
changed_files[rel_path] = changed_lines
print(f"变更文件: {rel_path}, 变更行数: {len(changed_lines)}")
except Exception as e:
print(f"比较文件出错: {rel_path} - {e}")
return dict(changed_files)
def detect_changed_lines(old_content, new_content):
"""
优化版:检测文件中的变更行号(基于新文件行号)
"""
changed_lines = set()
matcher = difflib.SequenceMatcher(None, old_content, new_content)
for opcode in matcher.get_opcodes():
if opcode[0] != 'equal':
# 获取新文件中的变更行号范围(索引+1转换为实际行号)
start = opcode[3]
end = opcode[4]
# 转换为基于1的行号
changed_lines.update(range(start + 1, end + 1))
return changed_lines
def detect_encoding(file_path):
"""优化版:检测文件编码"""
# 常见编码类型列表(优先级排序)
encodings = ['utf-8', 'utf-16', 'cp932', 'shift_jis', 'gbk', 'big5', 'latin1']
for encoding in encodings:
try:
with open(file_path, 'r', encoding=encoding) as f:
f.read(4096)
return encoding
except:
continue
return 'utf-8'
def update_excel_sheets(csv_folder, output_excel, changed_files):
"""优化版:更新Excel表格"""
try:
print(f"开始更新Excel: {output_excel}")
# 加载或创建Excel文件
if os.path.exists(output_excel):
print(f"加载现有Excel文件: {output_excel}")
wb = load_workbook(output_excel)
print(f"现有工作表: {wb.sheetnames}")
else:
print("创建新的Excel文件")
wb = Workbook()
# 删除默认创建的工作表
for sheet_name in wb.sheetnames:
wb.remove(wb[sheet_name])
# 创建所需的工作表
wb.create_sheet("ファイル差分")
wb.create_sheet("_org_fm")
wb.create_sheet("warn")
# === 功能1: 写入文件差分表 ===
print("\n=== 写入文件差分表 ===")
if "ファイル差分" not in wb.sheetnames:
wb.create_sheet("ファイル差分")
print("创建'ファイル差分'工作表")
ws_diff = wb["ファイル差分"]
# 保留标题行和公式列(只清除A列的数据)
if ws_diff.max_row > 1:
print(f"清除'ファイル差分'工作表A列数据 (现有行数: {ws_diff.max_row})")
# 只删除A列第二行及以后的数据,保留其他列
for row_idx in range(2, ws_diff.max_row + 1):
if ws_diff.cell(row=row_idx, column=1).value is not None:
ws_diff.cell(row=row_idx, column=1, value=None)
# 确保有标题行
if ws_diff["A1"].value != "文件路径":
ws_diff["A1"] = "文件路径"
# 写入变更文件到A列第二行开始
print(f"写入 {len(changed_files)} 个变更文件路径到A列")
row_num = 2
for file_path in changed_files.keys():
ws_diff.cell(row=row_num, column=1, value=file_path)
row_num += 1
# === 功能2: 复制func_met.csv到_org_fm工作表 ===
func_met_path = os.path.join(csv_folder, "func_met.csv")
if os.path.exists(func_met_path):
print("\n=== 处理func_met.csv ===")
if "_org_fm" not in wb.sheetnames:
wb.create_sheet("_org_fm")
print("创建'_org_fm'工作表")
ws_fm = wb["_org_fm"]
# 保留标题行在第一行(不清除标题行)
# 清除数据行(第二行及以后)
if ws_fm.max_row > 1:
print(f"清除'_org_fm'工作表数据行 (现有行数: {ws_fm.max_row})")
ws_fm.delete_rows(2, ws_fm.max_row - 1)
# 读取CSV文件
encoding = detect_encoding(func_met_path)
print(f"检测到func_met.csv编码: {encoding}")
# 使用csv模块按行读取处理
with open(func_met_path, 'r', encoding=encoding, errors='replace') as f:
csv_reader = csv.reader(f)
# 跳过标题行(因为Excel中已经有标题行)
next(csv_reader)
# 写入数据行
print("写入func_met.csv数据行...")
row_count = 0
for row in csv_reader:
ws_fm.append(row)
row_count += 1
if row_count % 1000 == 0:
print(f"已写入 {row_count} 行数据...")
print(f"写入完成,共 {row_count} 行数据")
else:
print(f"未找到func_met.csv: {func_met_path}")
# === 功能3: 高效处理warn.csv ===
warn_path = os.path.join(csv_folder, "warn.csv")
if os.path.exists(warn_path):
print("\n=== 处理warn.csv ===")
if "warn" not in wb.sheetnames:
wb.create_sheet("warn")
print("创建'warn'工作表")
ws_warn = wb["warn"]
# 写入标题行(第二行)
headers = ['Source', 'Line #', 'Level', 'Warn #', 'Message', 'WarnFilter(变更有无)']
for col_idx, header in enumerate(headers, 1):
ws_warn.cell(row=2, column=col_idx, value=header)
else:
ws_warn = wb["warn"]
print(f"使用现有'warn'工作表")
# 构建变更文件映射
file_map = {}
for file_path, changed_lines in changed_files.items():
# 文件名映射
filename = Path(file_path).name
if filename not in file_map:
file_map[filename] = set()
file_map[filename] = file_map[filename].union(changed_lines)
# 完整路径映射
file_map[file_path] = changed_lines
# 准备列名映射(根据标题行确定列位置)
header_mapping = {}
for col in range(1, ws_warn.max_column + 1):
header_value = ws_warn.cell(row=2, column=col).value
if header_value:
header_mapping[header_value] = col
print(f"列名映射: {header_mapping}")
# 获取变更标记列的列号
change_col_idx = header_mapping.get('WarnFilter(变更有无)')
if not change_col_idx:
# 如果变更标记列不存在,创建它
change_col_idx = ws_warn.max_column + 1
ws_warn.cell(row=2, column=change_col_idx, value='WarnFilter(变更有无)')
header_mapping['WarnFilter(变更有无)'] = change_col_idx
print(f"添加变更标记列到第 {change_col_idx} 列")
# 读取CSV文件
encoding = detect_encoding(warn_path)
print(f"检测到warn.csv编码: {encoding}")
# 使用csv模块按行处理
with open(warn_path, 'r', encoding=encoding, errors='replace') as f:
csv_reader = csv.DictReader(f)
fieldnames = csv_reader.fieldnames
# 打印CSV字段信息
print(f"CSV字段: {fieldnames}")
# CSV列名到Excel列名的映射
column_mapping = {
'File': 'Source',
'Line': 'Line #',
'Grp': 'Level',
'Nbr': 'Warn #',
'Description': 'Message'
}
# 准备数据
match_count = 0
row_offset = 2 # 标题行位置
# 创建源和行号的键值映射,用于快速查找
source_line_map = {}
# 记录现有行的键值映射
for row_idx in range(3, ws_warn.max_row + 1):
source = ws_warn.cell(row=row_idx, column=header_mapping['Source']).value
line_num = ws_warn.cell(row=row_idx, column=header_mapping['Line #']).value
if source and line_num:
# 使用警告编号避免冲突
warn_num = ws_warn.cell(row=row_idx, column=header_mapping['Warn #']).value
key = f"{source}:{line_num}:{warn_num}" if warn_num else f"{source}:{line_num}"
source_line_map[key] = row_idx
# 动态行号处理 - 从现有最大行之后开始添加新行
next_row = ws_warn.max_row + 1
processed_rows = 0 # 处理行计数器
csv_total_rows = 0 # CSV总行数计数器
# 处理CSV中的每一行
for csv_row in csv_reader:
processed_rows += 1
csv_total_rows += 1
# 创建新行数据
new_row = {
header: csv_row.get(csv_col, '')
for csv_col, header in column_mapping.items()
}
# 计算变更标记
source = str(new_row.get('Source', ''))
try:
line_num = int(new_row.get('Line #', 0)) if new_row.get('Line #') else 0
except (ValueError, TypeError):
line_num = 0
# 获取警告编号
warn_num = new_row.get('Warn #', '')
change_marker = 'No'
if source and line_num > 0:
# 尝试文件名匹配
filename = Path(source).name
if filename in file_map and line_num in file_map[filename]:
change_marker = 'Yes'
match_count += 1
# 尝试完整路径匹配
elif source in file_map and line_num in file_map[source]:
change_marker = 'Yes'
match_count += 1
# 使用警告编号创建唯一键
key = f"{source}:{line_num}:{warn_num}" if warn_num else f"{source}:{line_num}"
# 检查该行是否已存在
if key in source_line_map:
row_idx = source_line_map[key]
# 更新所有数据列,不仅仅是变更标记
for col_name, value in new_row.items():
if col_name in header_mapping:
col_idx = header_mapping[col_name]
ws_warn.cell(row=row_idx, column=col_idx, value=value)
# 更新变更标记列
ws_warn.cell(row=row_idx, column=change_col_idx, value=change_marker)
else:
# 添加新行:写入所有数据和变更标记
new_row['WarnFilter(变更有无)'] = change_marker
# 写入新行
for col_name, value in new_row.items():
if col_name in header_mapping:
col_idx = header_mapping[col_name]
ws_warn.cell(row=next_row, column=col_idx, value=value)
# 更新行索引映射
source_line_map[key] = next_row
next_row += 1 # 移动到下一行
if processed_rows % 1000 == 0:
print(f"已处理 {processed_rows} 行数据...")
# 打印统计信息
print(f"CSV总行数: {csv_total_rows}")
print(f"匹配到 {match_count} 条变更警告")
print(f"总共处理了 {processed_rows} 行数据")
# 检查行数是否匹配
ws_total_rows = ws_warn.max_row - 2 # 减去标题行
if ws_total_rows != csv_total_rows:
print(f"警告: 工作表行数({ws_total_rows})与CSV行数({csv_total_rows})不匹配!")
else:
print("工作表行数与CSV行数匹配")
else:
print(f"未找到warn.csv: {warn_path}")
# === 保存Excel文件 ===
print("\n保存Excel文件...")
save_start = time.time()
try:
wb.save(output_excel)
save_duration = time.time() - save_start
print(f"Excel保存完成,耗时: {save_duration:.2f}秒")
print(f"最终文件: {output_excel}")
file_size = os.path.getsize(output_excel) / 1024 / 1024
print(f"文件大小: {file_size:.2f} MB")
# 验证保存结果
if os.path.exists(output_excel):
print("文件保存验证成功")
else:
print("!!! 文件保存后不存在,保存可能失败 !!!")
return True
except Exception as save_error:
print(f"保存Excel时出错: {save_error}")
return False
except Exception as e:
print(f"\n!!! 更新Excel出错: {str(e)} !!!")
traceback.print_exc()
return False
def main():
# 配置路径(根据实际情况修改)
old_code_dir = r"E:\system\Desktop\项目所需文件\工具\ffff\code\old\GA_D82DD83D_00-00-07\mainline\spa_traveo\src"
new_code_dir = r"E:\system\Desktop\项目所需文件\工具\ffff\code\new\GA_D82DD83D_00-00-08\mainline\spa_traveo\src"
csv_folder = r"E:\system\Desktop\项目所需文件\工具\ffff\APL\Tool出力結果"
output_excel = r"E:\system\Desktop\项目所需文件\工具\ffff\GA_D24D_00-00-01(三回目)_QAC.xlsx"
print("="*80)
print("开始文件比较...")
print(f"旧代码目录: {old_code_dir}")
print(f"新代码目录: {new_code_dir}")
print(f"CSV文件夹: {csv_folder}")
print(f"输出Excel: {output_excel}")
try:
start_time = time.time()
# 获取变更文件(完全使用递归比较)
changed_files = recursive_compare_dirs(old_code_dir, new_code_dir)
print(f"\n找到 {len(changed_files)} 个变更文件")
# 打印变更文件详情
print("\n变更文件详情:")
for i, (file_path, lines) in enumerate(changed_files.items()):
if i < 10: # 最多打印10个文件详情
print(f"{i+1}. {file_path}: 变更行数 {len(lines)}")
if len(lines) < 10: # 打印少于10行的变更行号
print(f" 行号: {sorted(lines)}")
# 更新Excel
print("="*80)
print("更新Excel表格...")
success = update_excel_sheets(csv_folder, output_excel, changed_files)
total_time = time.time() - start_time
print(f"\n总处理时间: {total_time:.2f}秒")
if success:
print(f"\n处理完成! 输出文件: {output_excel}")
else:
print("\n处理失败,请检查错误日志")
except Exception as e:
print(f"\n!!! 处理过程中发生严重错误: {str(e)} !!!")
traceback.print_exc()
print("="*80)
print("程序结束")
if __name__ == "__main__":
main()
我需要的是对应列里面就算有数据,也覆盖,而不是不覆盖,现在只能在末尾开始写,不能从第三行开始写,但是覆盖只能覆盖对应列里面的内容,而不能把我其他列里面的东西给删除或者覆盖掉了
最新发布