#!/usr/bin/env python
# -*- coding: utf-8 -*-
import os
import re
import sys
import argparse
import xlwt
from collections import defaultdict
# 分区名称映射表(前缀 → 友好名称)
PARTITION_NAME_MAP = {
'02_': 'system',
'03_': 'vendor',
'04_': 'product',
'05_': 'odm',
'06_': 'my_product',
'07_': 'my_engineering',
'08_': 'my_stock',
'09_': 'my_heytap',
'10_': 'my_company',
'11_': 'my_carrier',
'12_': 'my_region',
'13_': 'my_preload',
'14_': 'data',
'15_': 'my_bigball',
'16_': 'my_manifest',
'17_system_dlkm': 'system_dlkm', # 新增独立分区
'17_vendor_dlkm': 'vendor_dlkm', # 新增独立分区
'17_cache': 'cache' # 新增独立分区
}
def parse_du_file(file_path):
"""解析du命令输出文件并转换为MB"""
data = {}
try:
with open(file_path, 'r') as f:
for line in f:
if 'Permission denied' in line or 'No such file' in line or not line.strip():
continue
match = re.match(r'(\d+\.?\d*)\s*([KMG]?)[Bb]?\s+(.*)', line.strip())
if match:
size, unit, path = match.groups()
size = float(size)
# 单位转换到MB
if unit == 'K': size = size / 1024.0
elif unit == '': size = size / (1024*1024.0) # 默认字节转MB
elif unit == 'M': pass # 已经是MB
elif unit == 'G': size = size * 1024.0
data[path] = round(size, 4) # 保留两位小数
except IOError as e:
print("警告: 无法读取文件 {}: {}".format(file_path, str(e)))
return data
def extract_file_prefix(filename):
"""提取文件前缀"""
if filename.startswith('17_'):
# 移除.txt扩展名后返回完整标识
return filename.replace('.txt', '')
match = re.match(r'^(\d+_)', filename)
return match.group(1) if match else "other_"
def is_main_partition_file(filename, prefix):
"""检查是否为主分区文件"""
# 17_开头的文件都视为主分区文件
if prefix.startswith('17_'):
return True
# 使用兼容的字符串拼接方式替代f-string
expected_name = prefix + PARTITION_NAME_MAP[prefix] + ".txt"
return filename == expected_name
def generate_dual_report(folder1, folder2, output_xlsx):
"""生成双机对比报告"""
# 获取文件夹名称用于显示
folder1_name = os.path.basename(os.path.normpath(folder1))
folder2_name = os.path.basename(os.path.normpath(folder2))
# 验证文件夹是否存在
for folder in [folder1, folder2]:
if not os.path.exists(folder):
print("错误: 目录不存在 - {}".format(folder))
return "目录 {} 不存在,请检查路径".format(folder)
if not os.path.isdir(folder):
print("错误: 路径不是目录 - {}".format(folder))
return "{} 不是有效目录".format(folder)
# 初始化数据结构
# 主文件数据(用于总览页)
machine1_main_data = {} # {prefix: {path: size}}
machine2_main_data = {} # {prefix: {path: size}}
# 所有文件数据(用于分区详细页)
machine1_all_files = defaultdict(dict) # {filename: {path: size}}
machine2_all_files = defaultdict(dict) # {filename: {path: size}}
# 收集数据
for folder_path, main_dict, all_dict in [
(folder1, machine1_main_data, machine1_all_files),
(folder2, machine2_main_data, machine2_all_files)
]:
print("处理目录: {}".format(folder_path))
try:
for filename in os.listdir(folder_path):
if not filename.endswith('.txt'):
continue
# 提取文件前缀
prefix = extract_file_prefix(filename)
# 跳过01_mount.txt和无效前缀
if prefix == '01_' or prefix not in PARTITION_NAME_MAP:
continue
file_path = os.path.join(folder_path, filename)
partition_name = PARTITION_NAME_MAP[prefix]
# 解析文件数据
file_data = parse_du_file(file_path)
# 添加到所有文件数据
all_dict[filename] = file_data
# 如果是主文件,添加到主文件数据(用于总览页)
if is_main_partition_file(filename, prefix):
print("解析主分区文件: {}".format(file_path))
main_dict[prefix] = file_data
except OSError as e:
print("目录访问错误: {}".format(str(e)))
return "无法访问目录 {}: {}".format(folder_path, str(e))
# 创建Excel工作簿
try:
wb = xlwt.Workbook(encoding='utf-8')
header_style = xlwt.easyxf('font: bold on')
title_style = xlwt.easyxf('font: bold on, height 280; align: wrap on, vert centre')
normal_style = xlwt.easyxf()
added_style = xlwt.easyxf('pattern: pattern solid, fore_colour light_green;')
removed_style = xlwt.easyxf('pattern: pattern solid, fore_colour rose;')
summary_style = xlwt.easyxf('font: bold on, color blue;')
# ====== 创建总览Sheet页(添加备注列) ======
ws_overview = wb.add_sheet('总览')
print("创建总览Sheet页(仅主文件数据)")
current_row = 0
# 写入总览标题(扩展列数)
ws_overview.write_merge(
current_row, current_row, 0, 5, # 扩展到6列
"存储使用总览(仅主分区文件)",
title_style
)
current_row += 1
# 写入文件夹名称
ws_overview.write(current_row, 1, folder1_name, header_style)
ws_overview.write(current_row, 2, folder2_name, header_style)
current_row += 1
# 写入表头(增加备注列)
headers = ['分区', '总大小(MB)', '总大小(MB)', '差值(MB)', '标记', '增大TOP路径']
for col, header in enumerate(headers):
ws_overview.write(current_row, col, header, header_style)
current_row += 1
# 存储各分区汇总数据
overview_data = []
total_machine1 = 0.0
total_machine2 = 0.0
# 按分区顺序处理数据
for prefix in sorted(PARTITION_NAME_MAP.keys()):
partition_name = PARTITION_NAME_MAP[prefix]
# 跳过data分区
if partition_name == 'data':
continue
# 获取主文件数据
data1 = machine1_main_data.get(prefix, {})
data2 = machine2_main_data.get(prefix, {})
# 计算主文件总大小
partition_total1 = round(sum(data1.values()), 2)
partition_total2 = round(sum(data2.values()), 2)
diff = partition_total1 - partition_total2
# 更新总计
total_machine1 += partition_total1
total_machine2 += partition_total2
# 确定标记样式
if diff > 0:
mark = "增加"
style = added_style
elif diff < 0:
mark = "减少"
style = removed_style
else:
mark = "无变化"
style = normal_style
# 计算分区中增大的TOP3路径(仅当分区增大时)
top_notes = []
if diff > 0:
path_diffs = []
all_paths = set(data1.keys()) | set(data2.keys())
for path in all_paths:
size1 = data1.get(path, 0.0)
size2 = data2.get(path, 0.0)
path_diff = size1 - size2
if path_diff > 0: # 只记录增大的路径
path_diffs.append((path, path_diff))
# 按增大值降序排序,取TOP3
path_diffs.sort(key=lambda x: x[1], reverse=True)
for i, (path, diff_val) in enumerate(path_diffs[:3]):
# 简化长路径显示
if len(path) > 50:
path = "..." + path[-47:]
top_notes.append(f"{i+1}. {path}: +{diff_val:.2f}MB")
# 保存分区数据
overview_data.append({
'name': partition_name,
'machine1': partition_total1,
'machine2': partition_total2,
'diff': diff,
'style': style,
'mark': mark,
'notes': "\n".join(top_notes) if top_notes else "无显著增大路径"
})
# 写入行数据到总览页(新增备注列)
ws_overview.write(current_row, 0, partition_name, style)
ws_overview.write(current_row, 1, partition_total1, style)
ws_overview.write(current_row, 2, partition_total2, style)
ws_overview.write(current_row, 3, diff, style)
ws_overview.write(current_row, 4, mark, style)
ws_overview.write(current_row, 5, overview_data[-1]['notes'], wrap_style)
current_row += 1
# 设置备注列宽度(100字符)
ws_overview.col(5).width = 256 * 100
# 添加空行
current_row += 1
# 写入总计行(使用新的差值计算方式)
total_diff = total_machine1 - total_machine2
if total_diff > 0:
total_mark = "总增加" # folder1 > folder2
total_style = added_style
elif total_diff < 0:
total_mark = "总减少" # folder1 < folder2
total_style = removed_style
else:
total_mark = "无变化"
total_style = normal_style
ws_overview.write(current_row, 0, "总计", header_style)
ws_overview.write(current_row, 1, total_machine1, header_style)
ws_overview.write(current_row, 2, total_machine2, header_style)
ws_overview.write(current_row, 3, total_diff, header_style)
ws_overview.write(current_row, 4, total_mark, header_style)
# ====== 为每个文件创建单独的Sheet页 ======
# 获取所有唯一的文件名(两个文件夹的并集)
all_filenames = sorted(set(machine1_all_files.keys()) | set(machine2_all_files.keys()))
for filename in all_filenames:
# 提取文件前缀
prefix = extract_file_prefix(filename)
# 跳过无效前缀
if prefix not in PARTITION_NAME_MAP:
continue
# 获取分区名称
partition_name = PARTITION_NAME_MAP[prefix]
# 创建Sheet页名称(文件名不带扩展名)
sheet_name = filename.replace('.txt', '')
if len(sheet_name) > 31: # Excel sheet名称长度限制
sheet_name = sheet_name[:31]
# 创建Sheet页
ws = wb.add_sheet(sheet_name)
print("创建文件Sheet页: {}".format(sheet_name))
# 当前行指针
current_row = 0
# 写入分区标题
title = "分区: {} - 文件: {}".format(partition_name, filename)
ws.write_merge(
current_row, current_row, 0, 5,
title,
title_style
)
current_row += 1
# 写入文件夹名称(新格式)
ws.write_merge(current_row, current_row, 0, 1, folder1_name, header_style)
ws.write_merge(current_row, current_row, 2, 3, folder2_name, header_style)
ws.write(current_row, 4, "差异(M)", header_style)
ws.write(current_row, 5, "标记", header_style)
current_row += 1
# 写入表头(新格式)
headers = ['路径', '大小(M)', '路径', '大小(M)', '差异(M)', '标记']
for col, header in enumerate(headers):
ws.write(current_row, col, header, header_style)
current_row += 1
# 获取文件数据
data1 = machine1_all_files.get(filename, {})
data2 = machine2_all_files.get(filename, {})
# 获取所有路径(合并两个文件夹的路径)
all_paths = sorted(set(data1.keys()) | set(data2.keys()))
# 初始化变化统计数据
total_increase = 0.0 # 增大总和
total_decrease = 0.0 # 减小总和
total_added = 0.0 # 新增文件总和
total_removed = 0.0 # 去除文件总和
# 写入数据行(新格式)
for path in all_paths:
size1 = data1.get(path, 0.0)
size2 = data2.get(path, 0.0)
# 修改为 folder1 - folder2 的差值计算方式
diff = size1 - size2
# 确定标记和样式(根据新的差值计算方式调整)
if size1 == 0 and size2 > 0:
mark = "除去"
cell_style = removed_style
total_removed += size2
elif size1 > 0 and size2 == 0:
mark = "新增"
cell_style = added_style
total_added += size1
else:
if diff > 0:
mark = "增大"
cell_style = added_style
total_increase += diff
elif diff < 0:
mark = "减小"
cell_style = removed_style
total_decrease += abs(diff)
else:
mark = "相同"
cell_style = normal_style
# 写入行数据(新格式)
# folder1列
if size1 > 0:
ws.write(current_row, 0, path, cell_style)
ws.write(current_row, 1, size1, cell_style)
else:
ws.write(current_row, 0, "", cell_style)
ws.write(current_row, 1, "", cell_style)
# folder2列
if size2 > 0:
ws.write(current_row, 2, path, cell_style)
ws.write(current_row, 3, size2, cell_style)
else:
ws.write(current_row, 2, "", cell_style)
ws.write(current_row, 3, "", cell_style)
# 差异和标记列
ws.write(current_row, 4, diff, cell_style)
ws.write(current_row, 5, mark, cell_style)
current_row += 1
# 添加文件汇总行(新格式)
file_total1 = sum(data1.values())
file_total2 = sum(data2.values())
file_diff = file_total1 - file_total2
# 写入汇总行(新格式)
ws.write(current_row, 0, "文件汇总", header_style)
ws.write(current_row, 1, file_total1, header_style)
ws.write(current_row, 2, "", header_style)
ws.write(current_row, 3, file_total2, header_style)
ws.write(current_row, 4, file_diff, header_style)
ws.write(current_row, 5, "", header_style)
current_row += 1
# 添加变化分类统计行 - 修改为Python 2.7兼容的格式
message = (
u"{partition_name}路径下: "
u"减小{total_decrease:.2f}M "
u"增大{total_increase:.2f}M "
u"新增文件{total_added:.2f}M "
u"减少文件{total_removed:.2f}M"
).format(
partition_name=partition_name,
total_decrease=total_decrease,
total_increase=total_increase,
total_added=total_added,
total_removed=total_removed
)
ws.write_merge(
current_row, current_row, 0, 5,
message,
summary_style
)
# 保存文件
wb.save(output_xlsx)
return "对比报告已成功生成: {}".format(output_xlsx)
except Exception as e:
import traceback
traceback.print_exc()
return "生成Excel文件时出错: {}".format(str(e))
def generate_single_report(folder, output_xlsx):
"""生成单机拆解报告"""
# 获取文件夹名称用于显示
folder_name = os.path.basename(os.path.normpath(folder))
# 验证文件夹是否存在
if not os.path.exists(folder):
print("错误: 目录不存在 - {}".format(folder))
return "目录 {} 不存在,请检查路径".format(folder)
if not os.path.isdir(folder):
print("错误: 路径不是目录 - {}".format(folder))
return "{} 不是有效目录".format(folder)
# 初始化数据结构
main_data = {} # {prefix: {path: size}} 主文件数据
all_files = defaultdict(dict) # {filename: {path: size}} 所有文件数据
print("处理目录: {}".format(folder))
try:
for filename in os.listdir(folder):
if not filename.endswith('.txt'):
continue
# 提取文件前缀
prefix = extract_file_prefix(filename)
# 跳过01_mount.txt和无效前缀
if prefix == '01_' or prefix not in PARTITION_NAME_MAP:
continue
file_path = os.path.join(folder, filename)
partition_name = PARTITION_NAME_MAP[prefix]
# 解析文件数据
file_data = parse_du_file(file_path)
# 添加到所有文件数据
all_files[filename] = file_data
# 如果是主文件,添加到主文件数据(用于总览页)
if is_main_partition_file(filename, prefix):
print("解析主分区文件: {}".format(file_path))
main_data[prefix] = file_data
except OSError as e:
print("目录访问错误: {}".format(str(e)))
return "无法访问目录 {}: {}".format(folder, str(e))
# 创建Excel工作簿
try:
wb = xlwt.Workbook(encoding='utf-8')
header_style = xlwt.easyxf('font: bold on')
title_style = xlwt.easyxf('font: bold on, height 280; align: wrap on, vert centre')
normal_style = xlwt.easyxf()
# ====== 创建总览Sheet页(只包含主文件数据) ======
ws_overview = wb.add_sheet('总览')
print("创建总览Sheet页(仅主文件数据)")
# 当前行指针
current_row = 0
# 写入总览标题
ws_overview.write_merge(
current_row, current_row, 0, 1,
"存储使用总览(仅主分区文件) - 单机报告",
title_style
)
current_row += 1
# 写入文件夹名称
ws_overview.write(current_row, 0, "分区", header_style)
ws_overview.write(current_row, 1, "总大小(MB)", header_style)
current_row += 1
# 存储各分区汇总数据
total_size = 0.0
# 按分区顺序处理数据
for prefix in sorted(PARTITION_NAME_MAP.keys()):
partition_name = PARTITION_NAME_MAP[prefix]
# 跳过data分区
if partition_name == 'data':
continue
# 获取主文件数据
data = main_data.get(prefix, {})
partition_total = round(sum(data.values()), 2)
total_size += partition_total
# 写入行数据到总览页
ws_overview.write(current_row, 0, partition_name, normal_style)
ws_overview.write(current_row, 1, partition_total, normal_style)
current_row += 1
# 添加空行
current_row += 1
# 写入总计行
ws_overview.write(current_row, 0, "总计", header_style)
ws_overview.write(current_row, 1, total_size, header_style)
# ====== 为每个文件创建单独的Sheet页 ======
for filename in sorted(all_files.keys()):
# 提取文件前缀
prefix = extract_file_prefix(filename)
# 跳过无效前缀
if prefix not in PARTITION_NAME_MAP:
continue
# 获取分区名称
partition_name = PARTITION_NAME_MAP[prefix]
# 创建Sheet页名称(文件名不带扩展名)
sheet_name = filename.replace('.txt', '')
if len(sheet_name) > 31: # Excel sheet名称长度限制
sheet_name = sheet_name[:31]
# 创建Sheet页
ws = wb.add_sheet(sheet_name)
print("创建文件Sheet页: {}".format(sheet_name))
# 当前行指针
current_row = 0
# 写入分区标题
title = "分区: {} - 文件: {}".format(partition_name, filename)
ws.write_merge(
current_row, current_row, 0, 1,
title,
title_style
)
current_row += 1
# 写入表头
headers = ['路径', '大小(M)']
for col, header in enumerate(headers):
ws.write(current_row, col, header, header_style)
current_row += 1
# 获取文件数据
data = all_files[filename]
# 按路径排序
sorted_paths = sorted(data.keys())
# 写入数据行
for path in sorted_paths:
size = data[path]
ws.write(current_row, 0, path, normal_style)
ws.write(current_row, 1, size, normal_style)
current_row += 1
# 写入汇总行
file_total = sum(data.values())
ws.write(current_row, 0, "文件汇总", header_style)
ws.write(current_row, 1, file_total, header_style)
# 保存文件
wb.save(output_xlsx)
return "单机报告已成功生成: {}".format(output_xlsx)
except Exception as e:
import traceback
traceback.print_exc()
return "生成Excel文件时出错: {}".format(str(e))
if __name__ == "__main__":
# 创建参数解析器
parser = argparse.ArgumentParser(description='存储空间分析工具')
subparsers = parser.add_subparsers(dest='mode', help='运行模式')
# 双机对比模式
dual_parser = subparsers.add_parser('dual', help='双机对比模式')
dual_parser.add_argument('folder1', help='第一个文件夹路径')
dual_parser.add_argument('folder2', help='第二个文件夹路径')
dual_parser.add_argument('output', help='输出Excel文件路径')
# 单机拆解模式
single_parser = subparsers.add_parser('single', help='单机拆解模式')
single_parser.add_argument('folder', help='待分析文件夹路径')
single_parser.add_argument('output', help='输出Excel文件路径')
# 解析参数
args = parser.parse_args()
if args.mode == 'dual':
print("运行双机对比模式...")
result = generate_dual_report(args.folder1, args.folder2, args.output)
elif args.mode == 'single':
print("运行单机拆解模式...")
result = generate_single_report(args.folder, args.output)
else:
result = "错误:请选择 'dual' 或 'single' 模式"
print(result)V27960021@dg03podv27960021kj4p:~/存储/hawaii$ python storage.py dual ./22610/存储/测试机 ./22610/存储/对比机 22610_report_dual.xlsx
File "storage.py", line 222
top_notes.append(f"{i+1}. {path}: +{diff_val:.2f}MB")
^
SyntaxError: invalid syntax