解压文件
import os
import zipfile
# 定义 ZIP 文件所在目录和目标解压目录
source_dir = "/home/philtell/Documents/xwechat_files/wxid_f0rg2f76s76t22_0b3d/msg/file/2025-01" # 替换为存放 ZIP 文件的目录路径
target_dir = "/home/philtell/Documents/xwechat_files/wxid_f0rg2f76s76t22_0b3d/msg/file/2025-01/data" # 替换为解压目标目录路径
# 确保目标目录存在
os.makedirs(target_dir, exist_ok=True)
# 遍历源目录中的所有 ZIP 文件
for filename in os.listdir(source_dir):
if filename.endswith(".zip"):
zip_path = os.path.join(source_dir, filename)
try:
# 打开 ZIP 文件
with zipfile.ZipFile(zip_path, 'r') as zip_ref:
# 获取所有文件名
file_list = zip_ref.namelist()
# 筛选符合条件的文件(以 hil 开头,且以 .csv 结尾)
matching_files = [f for f in file_list if f.startswith("his") and f.endswith(".csv")]
if matching_files:
print(f"正在解压: {filename}")
# 解压符合条件的文件到目标目录
for file in matching_files:
zip_ref.extract(file, target_dir)
else:
print(f"跳过: {filename} (没有符合条件的文件)")
except zipfile.BadZipFile:
print(f"无法解压: {filename} (文件可能已损坏)")
print("处理完成!")
显示日志内容
import pandas as pd
from collections import defaultdict
import os
import matplotlib.pyplot as plt
from matplotlib.font_manager import FontProperties
# 定义需要统计的报警内容
alarm_contents = [
"长时间未收到后向点云输出",
"长时间未收到传统点云输出",
"长时间未收到点云AI输出结果"
]
# 动态加载中文字体
font_path = '/usr/share/fonts/truetype/wqy/wqy-zenhei.ttc' # 替换为实际字体路径
my_font = FontProperties(fname=font_path)
# 初始化计数器
alarm_counts = {content: defaultdict(int) for content in alarm_contents}
# 读取CSV文件
def process_csv(file_path):
# 使用GBK编码读取CSV文件
df = pd.read_csv(file_path, encoding='gbk')
# 确保报警内容列没有NaN值
df['报警内容'] = df['报警内容'].fillna('')
# 获取日期列,并转换为日期格式
df['开始时间'] = pd.to_datetime(df['开始时间'], errors='coerce')
# 筛选出符合条件的报警内容
for content in alarm_contents:
mask = df['报警内容'].str.contains(content, na=False) # na=False 确保NaN不影响布尔索引
for date in df[mask]['开始时间'].dt.date.unique():
alarm_counts[content][date] += len(df[mask]['开始时间'].dt.date[df[mask]['开始时间'].dt.date == date])
# 处理当前路径下所有的CSV文件
csv_files = [f for f in os.listdir() if f.endswith('.csv')]
for file in csv_files:
process_csv(file)
# 准备绘图数据
dates = sorted(set.union(*[set(alarm_counts[content].keys()) for content in alarm_contents]))
daily_counts = {content: [alarm_counts[content].get(date, 0) for date in dates] for content in alarm_contents}
# 计算平均每天的告警次数
average_counts = {content: sum(counts) / len(dates) for content, counts in daily_counts.items()}
# 打印平均次数
date_range = f"{dates[0].strftime('%m.%d')}-{dates[-1].strftime('%m.%d')}"
print(f"统计日期范围:{date_range}")
for content, avg_count in average_counts.items():
print(f"{content} {avg_count:.2f}次/天")
# 更新图表标题,显示平均次数
title = f"每日特定告警内容统计 ({date_range})\n"
for content, avg_count in average_counts.items():
title += f"{content}: {avg_count:.2f}次/天\n"
# 绘制折线图
plt.figure(figsize=(14, 7))
for content, counts in daily_counts.items():
plt.plot(dates, counts, marker='o', label=content)
# 为每个点添加标签
for i, count in enumerate(counts):
plt.text(dates[i], counts[i], f'{count}', fontsize=10, ha='center', va='bottom', fontproperties=my_font)
plt.xlabel('日期', fontproperties=my_font)
plt.ylabel('告警次数', fontproperties=my_font)
plt.title(title, fontproperties=my_font)
plt.xticks(dates, [date.strftime('%Y-%m-%d') for date in dates], rotation=45, fontproperties=my_font)
plt.yticks(fontproperties=my_font)
plt.grid(True)
plt.legend(prop=my_font)
plt.tight_layout()
plt.show()