import oml
import xml.etree.ElementTree as ET
from datetime import datetime, timedelta
import re
import numpy as np
import pandas as pd
import warnings
import os
import matplotlib.pyplot as plt
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler
import seaborn as sns
import matplotlib.dates as mdates
warnings.filterwarnings('ignore')
PIC_DIR = "pic"
if not os.path.exists(PIC_DIR):
os.makedirs(PIC_DIR)
def format_timestamp(ts):
"""格式化时间戳为Oracle可接受的字符串"""
return ts.strftime("%Y-%m-%d %H:%M:%S.%f")[:23]
def is_holiday(date):
"""判断日期是否为休息日(简化实现)"""
# 在实际应用中,这里应该连接到日历表或API
# 周末判断(周六=5,周日=6)
if date.weekday() in [5, 6]:
return True
# 这里可以添加特定假期的判断
# 例如:if date.month == 1 and date.day == 1: return True
return False
def find_previous_workday(target_date):
"""找到前一个工作日"""
current = target_date - timedelta(days=1)
while is_holiday(current):
current -= timedelta(days=1)
return current
def calculate_std_dev(df, exclude_columns):
"""计算数据框的标准差(排除指定列)"""
# 排除不需要的列
numeric_cols = [col for col in df.columns if col not in exclude_columns]
if not numeric_cols:
return 0, {}
# 计算每列的标准差
col_std = {}
for col in numeric_cols:
if df[col].dtype in [np.int64, np.float64]:
col_std[col] = df[col].std()
# 计算所有数值列合并后的标准差
all_values = []
for col in numeric_cols:
if df[col].dtype in [np.int64, np.float64]:
all_values.extend(df[col].values)
total_std = np.std(all_values) if all_values else 0
return total_std, col_std
def get_relative_change(current, reference):
"""计算相对变化率,处理除零问题"""
if reference == 0 and current == 0:
return 0 # 两者都为零,无变化
elif reference == 0:
return float('inf') # 参考值为零,当前值非零
else:
return abs(current - reference) / reference * 100
def classify_change(change_rate):
"""分类变化率,处理无穷大情况"""
if change_rate == float('inf'):
return "极端变化(参考值为零)", "m4"
elif change_rate < 5:
return "正常波动(可忽略)", "m1"
elif 5 <= change_rate < 10:
return "值得关注的变化", "m2"
elif 10 <= change_rate < 20:
return "显著变化", "m3"
else:
return "重大变化", "m4"
def format_change_rate(change_rate):
"""格式化变化率显示"""
if change_rate == float('inf'):
return "无穷大"
else:
return f"{change_rate:.2f}%"
def get_data_samples(cursor, obj_name, inst_number, target_time, num_samples=10):
"""获取目标时间点之前的样本数据,处理标准差为零的情况"""
# 获取目标时间点之前的10个采样点
query = f"""
SELECT *
FROM "{obj_name}"
WHERE instance_number = {inst_number}
AND snap_time < TO_TIMESTAMP('{format_timestamp(target_time)}', 'YYYY-MM-DD HH24:MI:SS.FF3')
ORDER BY snap_time DESC
FETCH FIRST {num_samples} ROWS ONLY
"""
cursor.execute(query)
result = cursor.fetchall()
if not result:
return None, None, None
# 获取列名
col_names = [col[0] for col in cursor.description]
# 创建DataFrame
df = pd.DataFrame(result, columns=col_names)
# 排除不需要的列
exclude_cols = ['SNAP_TIME', 'INSTANCE_NUMBER', 'PRED', 'PROB', 'ANOMALYDETAILS']
# 计算标准差
total_std, col_std = calculate_std_dev(df, exclude_cols)
# 如果标准差为零,添加微小噪声避免除零错误
if total_std == 0:
total_std = 1e-6
for col in col_std:
if col_std[col] == 0:
col_std[col] = 1e-6
return df, total_std, col_std
def generate_description(attr):
"""生成指标问题描述"""
rank = attr['rank']
weight = attr['weight']
name = attr['name']
if rank == 1:
return f"主要异常指标({weight*100:.0f}%权重),直接影响系统稳定性"
elif weight > 0.7:
return f"关键异常指标({weight*100:.0f}%权重),需要立即处理"
elif weight > 0.3:
return f"重要异常指标({weight*100:.0f}%权重),建议优先排查"
else:
return f"参考指标({weight*100:.0f}%权重),可作为辅助分析"
def escape_sql(text):
"""转义SQL中的特殊字符"""
return text.replace("'", "''").replace("\n", " ")
def analyze_anomalies():
cursor = None
conn = None
obj_name, anom_time, anom_inst = None, None, None
pic_paths = [] # 存储生成的图片路径
m4_indicators = [] # 存储m4级别的指标名称
try:
# 1. 连接数据库
conn = oml.connect(user='aiopsoraoem', password='oracle',
host='localhost', port=1521, service_name='aiopspdb')
print("数据库连接成功")
# 创建游标对象用于执行SQL
cursor = oml.cursor()
# 2. 获取待分析的异常记录
query = """
SELECT object_name, anomalies_time, anomalies_inst_number
FROM RT_RESULT
WHERE alert_analysis_status IS NULL
AND anomalies_time > SYSDATE - 10/1440
ORDER BY anomalies_time DESC
FETCH FIRST 1 ROWS ONLY
"""
# 执行查询
cursor.execute(query)
result = cursor.fetchall()
if not result:
print("没有需要分析的异常记录")
return
# 提取数据
obj_name = result[0][0]
anom_time = result[0][1]
anom_inst = int(result[0][2])
print(f"分析记录: {obj_name} at {anom_time} (实例 {anom_inst})")
# 3. 更新状态为doing
update_query = f"""
UPDATE RT_RESULT
SET alert_analysis_status = 'doing'
WHERE object_name = '{escape_sql(obj_name)}'
AND anomalies_time = TO_TIMESTAMP('{format_timestamp(anom_time)}', 'YYYY-MM-DD HH24:MI:SS.FF3')
AND anomalies_inst_number = {anom_inst}
"""
cursor.execute(update_query)
cursor.connection.commit()
print("状态更新为doing")
# 4. 获取异常详情
detail_query = f"""
SELECT ANOMALYDETAILS
FROM "{obj_name}"
WHERE snap_time = TO_TIMESTAMP('{format_timestamp(anom_time)}', 'YYYY-MM-DD HH24:MI:SS.FF3')
AND instance_number = {anom_inst}
"""
# 执行查询
cursor.execute(detail_query)
details_result = cursor.fetchall()
if not details_result:
raise Exception(f"{obj_name}表中未找到匹配记录")
# 获取XML数据
xml_data = details_result[0][0]
# 处理XML命名空间问题
xml_data = re.sub(r'\sxmlns="[^"]+"', '', xml_data, count=1)
root = ET.fromstring(xml_data)
attributes = []
for attr in root.findall('.//Attribute'):
try:
attr_data = {
'name': attr.get('name'),
'value': float(attr.get('actualValue')),
'weight': float(attr.get('weight')),
'rank': int(attr.get('rank'))
}
attributes.append(attr_data)
except (TypeError, ValueError) as e:
print(f"解析属性时出错: {e}")
# 按rank排序
attributes.sort(key=lambda x: x['rank'])
# 5. 交叉验证 - 获取异常点前10个采样点
print("获取异常点前10个采样点数据...")
anomaly_df, B_total_std, C_col_std = get_data_samples(cursor, obj_name, anom_inst, anom_time)
if anomaly_df is None:
raise Exception(f"未找到异常点前10个采样点数据: {obj_name} at {anom_time} (实例 {anom_inst})")
# 6. 获取正常模式数据
print("获取正常模式数据...")
# P1: 一天前相同时间点(跳过休息日)
p1_time = anom_time - timedelta(days=1)
if is_holiday(p1_time):
p1_time = find_previous_workday(p1_time)
day_ago_df, A1_total_std, A2_col_std = get_data_samples(cursor, obj_name, anom_inst, p1_time)
# P2: 一周前相同时间点
p2_time = anom_time - timedelta(weeks=1)
week_ago_df, B1_total_std, B2_col_std = get_data_samples(cursor, obj_name, anom_inst, p2_time)
# P3: 一个月前相同日期(跳过休息日)
p3_time = anom_time - timedelta(days=30)
if is_holiday(p3_time):
p3_time = find_previous_workday(p3_time)
month_ago_df, C1_total_std, C2_col_std = get_data_samples(cursor, obj_name, anom_inst, p3_time)
# 如果正常模式数据为空,使用默认值
if day_ago_df is None:
print("警告: 未找到一天前相同时间点数据")
day_ago_df = pd.DataFrame()
A1_total_std, A2_col_std = 0, {}
if week_ago_df is None:
print("警告: 未找到一周前相同时间点数据")
week_ago_df = pd.DataFrame()
B1_total_std, B2_col_std = 0, {}
if month_ago_df is None:
print("警告: 未找到一个月前相同日期数据")
month_ago_df = pd.DataFrame()
C1_total_std, C2_col_std = 0, {}
# 7. 生成验证结果
validation_results = "\n\n===== 交叉验证结果 =====\n"
m4_changes = []
m4_indicator_set = set() # 用于存储m4级别的指标名称
# 第一轮验证:总标准差比较
validation_results += "\n第一轮验证:总标准差比较\n"
validation_results += f"异常模式总标准差 (B): {B_total_std:.4f}\n"
# 比较1:B vs A1
if A1_total_std is not None:
change_rate = get_relative_change(B_total_std, A1_total_std)
change_desc, change_class = classify_change(change_rate)
rate_str = format_change_rate(change_rate)
comp_result = f"与一天前相同时间点 (A1: {A1_total_std:.4f}) 比较: 变化率 {rate_str} - {change_desc} ({change_class})\n"
validation_results += comp_result
if change_class == "m4":
m4_changes.append(comp_result.strip())
#m4_indicator_set.add("总标准差")
# 比较2:B vs B1
if B1_total_std is not None:
change_rate = get_relative_change(B_total_std, B1_total_std)
change_desc, change_class = classify_change(change_rate)
rate_str = format_change_rate(change_rate)
comp_result = f"与一周前相同时间点 (B1: {B1_total_std:.4f}) 比较: 变化率 {rate_str} - {change_desc} ({change_class})\n"
validation_results += comp_result
if change_class == "m4":
m4_changes.append(comp_result.strip())
#m4_indicator_set.add("总标准差")
# 比较3:B vs C1
if C1_total_std is not None:
change_rate = get_relative_change(B_total_std, C1_total_std)
change_desc, change_class = classify_change(change_rate)
rate_str = format_change_rate(change_rate)
comp_result = f"与一个月前相同日期 (C1: {C1_total_std:.4f}) 比较: 变化率 {rate_str} - {change_desc} ({change_class})\n"
validation_results += comp_result
if change_class == "m4":
m4_changes.append(comp_result.strip())
#m4_indicator_set.add("总标准差")
# 第二轮验证:各列标准差比较
validation_results += "\n第二轮验证:各列标准差比较\n"
# 只分析排名前3的指标
top_attributes = attributes[:3]
for attr in top_attributes:
col_name = attr['name']
validation_results += f"\n指标: {col_name} (当前值: {attr['value']}, 权重: {attr['weight']:.2f}, 排名: {attr['rank']})\n"
# 异常模式该列标准差
col_std_b = C_col_std.get(col_name, 0)
validation_results += f"异常模式标准差 (C): {col_std_b:.4f}\n"
# 比较1:C vs A2
if A2_col_std and col_name in A2_col_std:
col_std_a2 = A2_col_std[col_name]
change_rate = get_relative_change(col_std_b, col_std_a2)
change_desc, change_class = classify_change(change_rate)
rate_str = format_change_rate(change_rate)
comp_result = f"与一天前相同时间点 (A2: {col_std_a2:.4f}) 比较: 变化率 {rate_str} - {change_desc} ({change_class})\n"
validation_results += comp_result
if change_class == "m4":
m4_changes.append(f"指标 {col_name}: {comp_result.strip()}")
m4_indicator_set.add(col_name)
# 比较2:C vs B2
if B2_col_std and col_name in B2_col_std:
col_std_b2 = B2_col_std[col_name]
change_rate = get_relative_change(col_std_b, col_std_b2)
change_desc, change_class = classify_change(change_rate)
rate_str = format_change_rate(change_rate)
comp_result = f"与一周前相同时间点 (B2: {col_std_b2:.4f}) 比较: 变化率 {rate_str} - {change_desc} ({change_class})\n"
validation_results += comp_result
if change_class == "m4":
m4_changes.append(f"指标 {col_name}: {comp_result.strip()}")
m4_indicator_set.add(col_name)
# 比较3:C vs C2
if C2_col_std and col_name in C2_col_std:
col_std_c2 = C2_col_std[col_name]
change_rate = get_relative_change(col_std_b, col_std_c2)
change_desc, change_class = classify_change(change_rate)
rate_str = format_change_rate(change_rate)
comp_result = f"与一个月前相同日期 (C2: {col_std_c2:.4f}) 比较: 变化率 {rate_str} - {change_desc} ({change_class})\n"
validation_results += comp_result
if change_class == "m4":
m4_changes.append(f"指标 {col_name}: {comp_result.strip()}")
m4_indicator_set.add(col_name)
# 7. 数据可视化
# 创建特定目录保存图片
timestamp_str = anom_time.strftime("%Y%m%d_%H%M%S")
save_dir = os.path.join(PIC_DIR, f"{obj_name}_{anom_inst}_{timestamp_str}")
if not os.path.exists(save_dir):
os.makedirs(save_dir)
# 准备所有样本数据用于聚类
all_data = []
labels = []
# 添加正常模式样本(只添加非空数据)
if day_ago_df is not None and not day_ago_df.empty:
all_data.append(day_ago_df)
labels.extend(['Day Ago'] * len(day_ago_df))
if week_ago_df is not None and not week_ago_df.empty:
all_data.append(week_ago_df)
labels.extend(['Week Ago'] * len(week_ago_df))
if month_ago_df is not None and not month_ago_df.empty:
all_data.append(month_ago_df)
labels.extend(['Month Ago'] * len(month_ago_df))
# 添加异常样本
if anomaly_df is not None and not anomaly_df.empty:
all_data.append(anomaly_df)
labels.extend(['Anomaly'] * len(anomaly_df))
# 检查是否有足够数据
if len(all_data) == 0:
print("警告: 没有足够的数据进行可视化")
else:
# 合并所有数据
combined_df = pd.concat(all_data, ignore_index=True)
# 排除非数值列
exclude_cols = ['SNAP_TIME', 'INSTANCE_NUMBER', 'PRED', 'PROB', 'ANOMALYDETAILS']
numeric_cols = [col for col in combined_df.columns
if col not in exclude_cols
and combined_df[col].dtype in [np.int64, np.float64]]
if not numeric_cols:
print("警告: 没有数值列可用于可视化")
else:
# 提取数值数据
data = combined_df[numeric_cols].values
# 标准化数据
scaler = StandardScaler()
scaled_data = scaler.fit_transform(data)
# 使用K-Means聚类
kmeans = KMeans(n_clusters=2, random_state=42)
clusters = kmeans.fit_predict(scaled_data)
# 使用PCA降维可视化
from sklearn.decomposition import PCA
pca = PCA(n_components=2)
reduced_data = pca.fit_transform(scaled_data)
# 创建聚类图
plt.figure(figsize=(12, 8))
# 为不同标签使用不同颜色
colors = {'Day Ago': 'blue', 'Week Ago': 'green',
'Month Ago': 'purple', 'Anomaly': 'red'}
# 绘制所有点
for label in set(labels):
idx = [i for i, l in enumerate(labels) if l == label]
plt.scatter(reduced_data[idx, 0], reduced_data[idx, 1],
c=colors[label], label=label, alpha=0.6)
# 标记异常点(红色)
anomaly_idx = [i for i, l in enumerate(labels) if l == 'Anomaly']
plt.scatter(reduced_data[anomaly_idx, 0], reduced_data[anomaly_idx, 1],
c='red', marker='x', s=100, label='Anomaly Points')
plt.title('K-Means Clustering of System Metrics')
plt.xlabel('Principal Component 1')
plt.ylabel('Principal Component 2')
plt.legend()
plt.grid(True)
# 保存聚类图
kmeans_path = os.path.join(save_dir, 'kmeans_clustering.png')
plt.savefig(kmeans_path)
plt.close()
pic_paths.append(kmeans_path)
print(f"保存K-Means聚类图到: {kmeans_path}")
# 创建时间序列图
plt.figure(figsize=(15, 10))
# 为每个指标创建子图
num_plots = min(5, len(numeric_cols)) # 最多显示5个指标
fig, axes = plt.subplots(num_plots, 1, figsize=(15, 5*num_plots))
if num_plots == 1:
axes = [axes] # 确保单图时axes是列表
for i, col in enumerate(numeric_cols[:num_plots]):
ax = axes[i]
# 绘制正常模式(只绘制非空数据)
if day_ago_df is not None and not day_ago_df.empty:
day_ago_df['SNAP_TIME'] = pd.to_datetime(day_ago_df['SNAP_TIME'])
ax.plot(day_ago_df['SNAP_TIME'], day_ago_df[col],
'b-', label='Day Ago', alpha=0.7)
if week_ago_df is not None and not week_ago_df.empty:
week_ago_df['SNAP_TIME'] = pd.to_datetime(week_ago_df['SNAP_TIME'])
ax.plot(week_ago_df['SNAP_TIME'], week_ago_df[col],
'g-', label='Week Ago', alpha=0.7)
if month_ago_df is not None and not month_ago_df.empty:
month_ago_df['SNAP_TIME'] = pd.to_datetime(month_ago_df['SNAP_TIME'])
ax.plot(month_ago_df['SNAP_TIME'], month_ago_df[col],
'm-', label='Month Ago', alpha=0.7)
# 绘制异常点
if anomaly_df is not None and not anomaly_df.empty:
anomaly_df['SNAP_TIME'] = pd.to_datetime(anomaly_df['SNAP_TIME'])
# 只绘制异常点
ax.scatter(anomaly_df['SNAP_TIME'], anomaly_df[col],
c='red', s=50, label='Anomaly', zorder=5)
ax.xaxis.set_major_formatter(mdates.DateFormatter('%m-%d %H:%M'))
ax.xaxis.set_major_locator(mdates.AutoDateLocator())
ax.set_title(f'Time Series: {col}')
ax.set_xlabel('Time')
ax.set_ylabel('Value')
ax.legend(loc='upper right')
ax.grid(True, linestyle='--', alpha=0.7)
plt.setp(ax.get_xticklabels(), rotation=45, ha='right')
plt.tight_layout()
# 保存时间序列图
timeseries_path = os.path.join(save_dir, 'timeseries_comparison.png')
plt.savefig(timeseries_path, bbox_inches='tight')
plt.close()
pic_paths.append(timeseries_path)
print(f"保存时间序列图到: {timeseries_path}")
# 评估哪种可视化更直观
evaluation = """
=== 可视化方法评估 ===
1. K-Means聚类图:
- 优点: 展示数据点在高维空间的分布,可以清晰看到异常点与正常模式的分离
- 缺点: 需要降维处理,可能丢失部分信息;不直观展示时间变化
2. 时间序列图:
- 优点: 直观展示指标随时间的变化趋势,容易识别异常点
- 缺点: 当指标过多时难以在同一图中展示
结论: 时间序列图更直观展示异常点信息,特别是当需要分析指标随时间的变化趋势时。
建议将时间序列图作为主要可视化工具,聚类图作为辅助分析工具。
"""
print(evaluation)
# 保存评估结论
with open(os.path.join(save_dir, 'visualization_evaluation.txt'), 'w') as f:
f.write(evaluation)
# 8. 生成分析报告
analysis_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
summary = f"检测到{len(attributes)}个异常指标"
if m4_changes:
summary += ",检测到指标有重大变化,请及时关注!"
m4_warning = "\n\n===== 重大变化警告 =====\n"
for i, change in enumerate(m4_changes, 1):
m4_warning += f"{i}. {change}\n"
summary += m4_warning
details = f"异常分析报告 ({analysis_time})\n"
details += f"目标对象: {obj_name}\n"
details += f"异常时间: {anom_time}\n"
details += f"实例编号: {anom_inst}\n"
details += "="*50 + "\n"
# 添加指标详情
for attr in attributes:
details += (
f"指标 {attr['rank']}: {attr['name']}\n"
f" 当前值: {attr['value']}\n"
f" 影响权重: {attr['weight']:.2f}\n"
f" 问题描述: {generate_description(attr)}\n\n"
)
# 添加交叉验证结果
details += validation_results
# 添加数据样本信息
details += "\n\n===== 数据样本详情 =====\n"
# 定义要排除的列
exclude_columns = ['ANOMALYDETAILS']
# 异常点前10个采样点
if anomaly_df is not None:
# 排除指定列
anomaly_df_display = anomaly_df.drop(columns=exclude_columns, errors='ignore')
details += f"异常点前10个采样点:\n{anomaly_df_display.to_string()}\n\n"
else:
details += "异常点前10个采样点: 无数据\n\n"
# 一天前相同时间点采样点
if day_ago_df is not None:
# 排除指定列
day_ago_df_display = day_ago_df.drop(columns=exclude_columns, errors='ignore')
details += f"一天前相同时间点采样点:\n{day_ago_df_display.to_string()}\n\n"
else:
details += "一天前相同时间点采样点: 无数据\n\n"
# 一周前相同时间点采样点
if week_ago_df is not None:
# 排除指定列
week_ago_df_display = week_ago_df.drop(columns=exclude_columns, errors='ignore')
details += f"一周前相同时间点采样点:\n{week_ago_df_display.to_string()}\n\n"
else:
details += "一周前相同时间点采样点: 无数据\n\n"
# 一个月前相同日期采样点
if month_ago_df is not None:
# 排除指定列
month_ago_df_display = month_ago_df.drop(columns=exclude_columns, errors='ignore')
details += f"一个月前相同日期采样点:\n{month_ago_df_display.to_string()}\n"
else:
details += "一个月前相同日期采样点: 无数据\n"
print(details)
# 9. 更新分析结果
# 根据是否有重大变化设置alert_status
alert_status = "sending" if m4_changes else "normal"
# 准备图片路径字符串
pic_paths_str = ",".join(pic_paths) if pic_paths else "无图片"
# 准备m4指标字符串
m4_indicators_str = ",".join(m4_indicator_set) if m4_indicator_set else "无"
update_query = """
UPDATE RT_RESULT
SET
alert_analysis_status = :status,
alert_analysis_summary = :summary,
alert_analysis_detail = :detail,
alert_status = :alert_status,
alert_analysis_pic = :pic_paths,
alert_analysis_dar = :m4_indicators
WHERE object_name = :obj_name
AND anomalies_time = TO_TIMESTAMP(:anom_time, 'YYYY-MM-DD HH24:MI:SS.FF3')
AND anomalies_inst_number = :inst_number
"""
params = {
'status': 'done',
'summary': summary,
'detail': details,
'alert_status': alert_status,
'pic_paths': pic_paths_str,
'm4_indicators': m4_indicators_str,
'obj_name': obj_name,
'anom_time': format_timestamp(anom_time),
'inst_number': anom_inst
}
cursor.execute(update_query, params)
cursor.connection.commit()
print("分析结果保存成功")
print(f"图片路径: {pic_paths_str}")
print(f"m4指标: {m4_indicators_str}")
print(details)
except Exception as e:
print(f"处理失败: {str(e)}")
# 更新状态为error
if obj_name and anom_time and anom_inst is not None:
error_update = f"""
UPDATE RT_RESULT
SET alert_analysis_status = 'error'
WHERE object_name = '{escape_sql(obj_name)}'
AND anomalies_time = TO_TIMESTAMP('{format_timestamp(anom_time)}', 'YYYY-MM-DD HH24:MI:SS.FF3')
AND anomalies_inst_number = {anom_inst}
"""
try:
if cursor:
cursor.execute(error_update)
cursor.connection.commit()
except Exception as inner_e:
print(f"更新错误状态时出错: {str(inner_e)}")
finally:
# 关闭游标
if cursor:
cursor.close()
# 关闭连接
if conn:
conn.close()
print("数据库连接关闭")
if __name__ == "__main__":
analyze_anomalies()
======================
以上代码在《创建时间序列图》这段代码中画出来的图上图所示
都是按天,我的采样点比较密集,就容易混到一起,是否能按照我采集的数据来做图,
没有日期显示的就不用画出图了,也就是绘制时间序列时跳过无数据的区间。
请修改以上的相关代码段
最新发布