# -*- coding: utf-8 -*-
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy.stats import linregress
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression
from statsmodels.tsa.seasonal import STL
from scipy.stats import poisson
sql_ihap = """select EQP_G, EQ_ID, EQ_ID_PRE, SHIFT_DATE
,NVL(START_CNT, 0) + NVL(RUNNING, 0) + NVL(WAITING, 0) + NVL(CHNOTUSE, 0) + NVL(COMPLETE_CNT, 0) as EQ_CN
from USRIPT.PA_RUNTIME_ABNOR_TYPE_DATE_V
"""
df = fetch_data(sql_ihap, env='PROD')
print(df)
# 步骤1: 数据准备(假设df已包含报警次数列)
# 从数据库获取数据(使用您提供的SQL)
# df = fetch_data(sql_ihap, env='PROD')
# 添加分组列(提取EQ_ID前8位)
df['GROUP_ID'] = df['EQ_ID'].str[:8]
# 处理缺失值(假设报警次数列名为ALARM_COUNT)
df['ALARM_COUNT'] = df['EQ_CN'].fillna(0)
print(df)
# -*- coding: utf-8 -*-
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy.stats import linregress
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression
from statsmodels.tsa.seasonal import STL
import matplotlib.dates as mdates
class GroupAnomalyDetector:
"""同组内多设备报警趋势异常检测系统"""
def __init__(self, group_data):
"""
初始化检测器
:param group_data: DataFrame, 包含同组内所有设备的数据
列要求:EQ_ID, SHIFT_DATE, ALARM_COUNT
"""
self.group_data = df
self.preprocessed = False
self.results = None
self.anomaly_scores = None
def preprocess_data(self):
"""数据预处理:转换为宽表格式并标准化"""
# 转换为宽表(行=日期,列=设备ID)
wide_df = self.group_data.pivot_table(
index='SHIFT_DATE',
columns='EQ_ID',
values='ALARM_COUNT',
fill_value=0
)
# 确保日期索引有序
wide_df = wide_df.sort_index()
print(wide_df)
# 标准化数据(每个设备单独标准化)
scaler = StandardScaler()
self.scaled_data = pd.DataFrame(
scaler.fit_transform(wide_df),
columns=wide_df.columns,
index=wide_df.index
)
self.raw_data = wide_df # 保存原始数据用于可视化
self.preprocessed = True
return self.scaled_data
def calculate_cusum(self, series):
"""计算CUSUM控制图指标(针对单个设备序列)"""
if len(series) < 7:
return {
's_plus': [0] * len(series),
's_minus': [0] * len(series),
'max_plus': 0,
'max_minus': 0
}
mu0 = series[:7].mean()
k = 0.5 * series.std() or 0.001 # 避免零标准差
s_plus = [0]
s_minus = [0]
for x in series:
s_plus.append(max(0, s_plus[-1] + (x - mu0) - k))
s_minus.append(max(0, s_minus[-1] - (x - mu0) - k))
return {
's_plus': s_plus[1:],
's_minus': s_minus[1:],
'max_plus': max(s_plus),
'max_minus': max(s_minus)
}
def analyze_trends(self):
"""分析组内所有设备的趋势特征"""
if not self.preprocessed:
self.preprocess_data()
results = {}
for eq_id in self.scaled_data.columns:
series = self.scaled_data[eq_id]
# 1. CUSUM分析
cusum = self.calculate_cusum(series)
# 2. STL趋势分解(仅当数据足够时)
trend = None
if len(series) >= 14: # STL要求至少2个周期
try:
stl = STL(series, period=7)
res = stl.fit()
trend = res.trend
except:
trend = series
# 3. 线性趋势斜率
days = np.arange(len(series)).reshape(-1, 1)
slope = LinearRegression().fit(days, series).coef_[0]
# 4. 波动率变化(后1/2与前1/2标准差比)
split_point = len(series) // 2
if split_point > 0:
std_ratio = series[split_point:].std() / (series[:split_point].std() or 0.001)
else:
std_ratio = 1.0
# 5. 均值变化(整体均值与前7天均值差)
mean_diff = series.mean() - series[:7].mean() if len(series) > 7 else 0
results[eq_id] = {
'cusum': cusum,
'trend': trend if trend is not None else series,
'slope': slope,
'std_ratio': std_ratio,
'mean_diff': mean_diff
}
self.results = results
return results
def calculate_anomaly_scores(self, weights=None):
"""计算设备异常评分"""
if self.results is None:
self.analyze_trends()
# 默认权重配置
if weights is None:
weights = {
'趋势斜率': 0.4,
'CUSUM峰值': 0.3,
'波动率变化': 0.2,
'均值变化': 0.1
}
# 创建特征矩阵
features = pd.DataFrame({
'趋势斜率': [res['slope'] for res in self.results.values()],
'CUSUM峰值': [res['cusum']['max_plus'] for res in self.results.values()],
'波动率变化': [res['std_ratio'] for res in self.results.values()],
'均值变化': [res['mean_diff'] for res in self.results.values()]
}, index=self.results.keys())
# 标准化特征值
feature_scores = (features - features.mean()) / features.std()
# 综合异常评分
feature_scores['综合异常分'] = sum(
weights[k] * abs(feature_scores[k]) for k in weights
)
self.anomaly_scores = feature_scores.sort_values('综合异常分', ascending=False)
return self.anomaly_scores
'''
def plot_results(self, top_n=15):
"""可视化分析结果"""
if self.anomaly_scores is None:
self.calculate_anomaly_scores()
plt.figure(figsize=(16, 12))
date_format = mdates.DateFormatter("%m-%d")
# 1. 原始报警次数对比
plt.subplot(2, 1, 1)
for eq_id in self.raw_data.columns:
plt.plot(
self.raw_data.index,
self.raw_data[eq_id],
'o-',
label=f"{eq_id}{' (异常)' if eq_id in self.get_top_anomalies(top_n).index else ''}",
markersize=4
)
plt.gca().xaxis.set_major_formatter(date_format)
plt.title('同组设备报警次数对比')
plt.ylabel('报警次数')
plt.grid(linestyle='--', alpha=0.7)
plt.legend()
# 2. 标准化趋势与CUSUM
plt.subplot(2, 1, 2)
for eq_id in self.scaled_data.columns:
# 标准化值
plt.plot(
self.scaled_data.index,
self.scaled_data[eq_id],
label=f'{eq_id} (标准化)',
alpha=0.7
)
# CUSUM值(缩小比例显示)
cusum = np.array(self.results[eq_id]['cusum']['s_plus'])
plt.plot(
self.scaled_data.index,
cusum / 5,
'--',
label=f'{eq_id} CUSUM/5',
alpha=0.5
)
plt.gca().xaxis.set_major_formatter(date_format)
plt.axhline(2, color='r', linestyle='--', alpha=0.5, label='2σ警戒线')
plt.title('标准化趋势与CUSUM控制图')
plt.ylabel('标准化值')
plt.grid(linestyle='--', alpha=0.7)
plt.legend(ncol=2)
plt.tight_layout()
plt.show()
# 3. 异常评分排序
plt.figure(figsize=(12, 6))
scores = self.anomaly_scores['综合异常分'].head(top_n)
colors = ['green' if s < 1.5 else 'orange' if s < 2.5 else 'red' for s in scores]
bars = plt.bar(scores.index, scores, color=colors)
plt.axhline(1.5, color='orange', linestyle='--', alpha=0.7, label='警戒阈值')
plt.axhline(2.5, color='red', linestyle='--', alpha=0.7, label='严重异常阈值')
plt.title('设备异常评分排序 (TOP {})'.format(top_n))
plt.ylabel('综合异常评分')
# 添加数值标签
for bar in bars:
height = bar.get_height()
plt.text(
bar.get_x() + bar.get_width() / 2.,
height,
f'{height:.2f}',
ha='center',
va='bottom'
)
plt.legend()
plt.tight_layout()
plt.show()
# 4. 输出异常设备诊断报告
anomalies = self.get_top_anomalies(top_n)
if not anomalies.empty:
print("\n==== 异常设备诊断报告 ====")
for eq_id, row in anomalies.iterrows():
result = self.results[eq_id]
print(f"\n设备 {eq_id} [评分: {row['综合异常分']:.2f}]")
print(f"- 趋势斜率: {result['slope']:.2f} (每日变化)")
print(f"- CUSUM峰值: {result['cusum']['max_plus']:.2f}")
print(f"- 波动率变化: {result['std_ratio']:.2f}x")
# 诊断建议
if result['slope'] > 0.3:
print(" 诊断: 持续上升趋势 → 检查设备老化或参数漂移")
elif result['std_ratio'] > 1.8:
print(" 诊断: 波动加剧 → 检查连接稳定性或间歇性故障")
elif result['cusum']['max_plus'] > 4:
print(" 诊断: 显著偏移 → 检查突发性故障或设置变更")
elif result['mean_diff'] < -1.5:
print(" 诊断: 异常减少 → 检查报警系统是否失效")'''
def plot_results(self, top_n=5):
"""
可视化分析结果
参数:
top_n -- 显示前n个异常设备
"""
if self.anomaly_scores is None:
self.calculate_anomaly_scores()
# 获取异常设备
top_anomalies = self.get_top_anomalies(top_n).index.tolist()
print(top_anomalies)
# 定义同组设备颜色方案(10种不同颜色)
group_colors = [
'#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd',
'#8c564b', '#e377c2', '#7f7f7f', '#bcbd22', '#17becf'
]
# 1. 创建同组设备对比图(每个异常设备一张)
for anomaly_eq in top_anomalies:
# 获取异常设备所属组ID(前8位)
group_id = anomaly_eq[:8]
# 筛选同组设备(包含异常设备自身)
group_eqs = [eq for eq in self.raw_data.columns if eq.startswith(group_id)]
plt.figure(figsize=(14, 8))
plt.title(f"组 {group_id} - 异常设备 {anomaly_eq} 与同组设备对比", fontsize=16)
# 颜色计数器(为每个同组设备分配不同颜色)
color_idx = 0
# 绘制同组所有设备曲线
for eq_id in group_eqs:
# 异常设备用红色粗线突出显示
if eq_id == anomaly_eq:
plt.plot(
self.raw_data.index,
self.raw_data[eq_id],
'r-', linewidth=3,
label=f"{eq_id} [Abnormal]"
)
# 其他同组设备用不同颜色的细线
else:
# 获取颜色(循环使用预设颜色)
color = group_colors[color_idx % len(group_colors)]
plt.plot(
self.raw_data.index,
self.raw_data[eq_id],
'-', color=color, linewidth=1.5,
label=f"{eq_id}"
)
color_idx += 1
# 设置图表属性
plt.gca().xaxis.set_major_formatter(mdates.DateFormatter("%m-%d"))
plt.ylabel('Alarm CNT', fontsize=12)
plt.grid(linestyle='--', alpha=0.5)
plt.legend(loc='best', ncol=2, fontsize=10)
plt.tight_layout()
plt.show()
# 3. 异常评分排序图
plt.figure(figsize=(12, 8))
scores = self.anomaly_scores.sort_values('Abnormal Score', ascending=False).head(top_n)['综合异常分']
colors = ['green' if s < 1.5 else 'orange' if s < 2.5 else 'red' for s in scores]
# 创建水平条形图(设备名在左侧)
bars = plt.barh(scores.index, scores, color=colors, height=0.7)
# 添加阈值线
plt.axvline(1.5, color='orange', linestyle='--', alpha=0.7, label='B')
plt.axvline(2.5, color='red', linestyle='--', alpha=0.7, label='W')
plt.title(f'Equment Abnormal Score (TOP {top_n})', fontsize=16)
plt.xlabel('Abnormal Score', fontsize=12)
# 添加数值标签
for bar in bars:
width = bar.get_width()
plt.text(
width + 0.05,
bar.get_y() + bar.get_height() / 2,
f'{width:.2f}',
ha='left',
va='center',
fontsize=10
)
plt.legend(loc='lower right', fontsize=10)
plt.tight_layout()
plt.show()
# 4. 诊断报告
if not self.anomaly_scores.empty:
print("\n" + "=" * 40)
print("==== 异常设备诊断报告 ====")
print("=" * 40)
for eq_id in top_anomalies:
row = self.anomaly_scores.loc[eq_id]
result = self.results[eq_id]
print(f"\n {eq_id} [Abnormal Score: {row['Abnormal Score']:.2f}]")
print(f"- 趋势斜率: {result['slope']:.2f} (每日变化)")
print(f"- CUSUM峰值: {result['cusum']['max_plus']:.2f}")
print(f"- 波动率变化: {result['std_ratio']:.2f}x")
# 诊断建议
if result['slope'] > 0.3:
print(" 诊断建议: 持续上升趋势 → 检查设备老化或参数漂移")
elif result['std_ratio'] > 1.8:
print(" 诊断建议: 波动加剧 → 检查连接稳定性或间歇性故障")
elif result['cusum']['max_plus'] > 4:
print(" 诊断建议: 显著偏移 → 检查突发性故障或设置变更")
elif result['mean_diff'] < -1.5:
print(" 诊断建议: 异常减少 → 检查报警系统是否失效")
else:
print(" 诊断建议: 多因素异常 → 进行综合检查")
print("-" * 50)
def get_top_anomalies(self, n=3):
"""获取评分最高的n个异常设备"""
if self.anomaly_scores is None:
self.calculate_anomaly_scores()
return self.anomaly_scores.head(n)
def run_full_analysis(self, top_n=15):
"""执行完整分析流程"""
self.preprocess_data()
self.analyze_trends()
self.calculate_anomaly_scores()
self.plot_results(top_n)
return self.anomaly_scores
# =====================
# 执行完整分析
# =====================
if __name__ == "__main__":
print("=== 多机台报警趋势异常分析系统 ===")
print("正在初始化分析引擎...")
detector = GroupAnomalyDetector(df)
#print("\n生成模拟数据...")
#raw_data = detector.generate_data()
print("数据预览:")
print(df.head())
print("\n数据预处理...")
scaled_data = detector.preprocess_data()
print("\n分析趋势特征...")
detector.analyze_trends()
print("\n计算异常评分...")
anomaly_scores = detector.calculate_anomaly_scores()
print("\n异常评分结果:")
print(anomaly_scores)
print("\n生成可视化报告...")
detector.plot_results()
print("\n=== 分析完成 ==="),在可视化1. 创建同组设备对比图(每个异常设备一张)部分plt.gca().xaxis.set_major_formatter(mdates.DateFormatter("%m-%d")),这边想要改成以df内的SHIFT_DATE数据作为set_major_formatter的数据,该怎么改代码
最新发布