“”"
传感器“卡值”(stuck value) 轻量级检测脚本(稳定相位+模板Z过滤版,Python 3.8+)
特点:
仅依赖 numpy / pandas / matplotlib
同时支持:
绝对卡值/重复值段(平坦段)
低变化段(导数接近0)
周期性数据的“相位差残差”卡值(去除周期基线后再检测)
自动估计或手动指定主周期
新增:跨周期“天然稳定相位”与模板Z分数过滤,显著降低对天然平台的误报
返回区间级别的告警(起止索引/时间、类型、置信度)
用法示例见底部 main 区域。
“”"
from dataclasses import dataclass
from typing import List, Optional, Tuple, Dict
import numpy as np
import pandas as pd
-----------------------------
工具函数
-----------------------------
def _rolling_std(x: np.ndarray, window: int) -> np.ndarray:
if window <= 1:
return np.zeros_like(x, dtype=float)
s = pd.Series(x)
return s.rolling(window, min_periods=window).std(ddof=0).to_numpy()
def _autocorr_via_fft(x: np.ndarray) -> np.ndarray:
“”“快速自相关(归一化),返回与 x 等长的自相关数组。”“”
x = np.asarray(x, dtype=float)
x = x - np.nanmean(x)
x[np.isnan(x)] = 0.0
n = int(1 << (len(x) * 2 - 1).bit_length())
fx = np.fft.rfft(x, n=n)
acf = np.fft.irfft(fx * np.conjugate(fx), n=n)[: len(x)]
acf /= np.maximum(acf[0], 1e-12)
return acf
def estimate_period(
x: np.ndarray,
min_period: int = 5,
max_period: Optional[int] = None,
) -> Optional[int]:
“”“粗略估计主周期(样本点单位)。返回最可能的周期长度,找不到返回 None。”“”
n = len(x)
if n < 3 * min_period:
return None
if max_period is None:
max_period = max(min(n // 3, 2000), min_period + 1)
acf = _autocorr_via_fft(x)
seg = acf[min_period : max_period]
if len(seg) == 0:
return None
k = int(np.nanargmax(seg)) + min_period
if acf[k] < 0.15:
return None
return k
def _group_runs(mask: np.ndarray) -> List[Tuple[int, int]]:
“”“将布尔序列中为 True 的连续段转为 [start, end](含 end)。”“”
runs: List[Tuple[int, int]] = []
i = 0
n = len(mask)
while i < n:
if mask[i]:
j = i
while j + 1 < n and mask[j + 1]:
j += 1
runs.append((i, j))
i = j + 1
else:
i += 1
return runs
def _phase_template_sigma(x: np.ndarray, period: int):
“”"
计算“同相位跨周期”的模板(相位中位数)与 σ(由MAD近似)。
返回 (template, sigma),若周期样本不足(<2个周期)则返回 (None, None)。
“”"
m = len(x) // period
if m < 2:
return None, None
X = x[: m * period].reshape(m, period)
template = np.nanmedian(X, axis=0)
mad = np.nanmedian(np.abs(X - template[None, :]), axis=0)
sigma = 1.4826 * mad + 1e-12
return template, sigma
-----------------------------
结果数据结构
-----------------------------
@dataclass
class StuckInterval:
start_idx: int
end_idx: int
kind: str # “flat”, “low_var”, “seasonal_flat”
score: float # 0~1 大致置信度
value_summary: str
-----------------------------
主检测器
-----------------------------
class StuckDetector:
def init(
self,
min_flat_run: int = 5,
value_tol: float = 0.0,
deriv_window: int = 3,
deriv_tol: float = 1e-6,
lowvar_window: int = 15,
lowvar_std_tol: float = 1e-4,
seasonal_period: Optional[int] = None,
seasonal_robust: bool = True,
seasonal_min_run: int = 5,
seasonal_value_tol: float = 0.0,
# —— 稳定相位+Z过滤参数 ——
stable_phase_q: float = 0.20, # σ 的分位阈值,以下视为“天然稳定相位”
stable_overlap_thr: float = 0.60, # 区间与稳定相位重叠比例阈值
require_z_k: float = 3.0 # 在稳定相位里仍要报卡值的最小Z阈值
) -> None:
self.min_flat_run = min_flat_run
self.value_tol = value_tol
self.deriv_window = deriv_window
self.deriv_tol = deriv_tol
self.lowvar_window = lowvar_window
self.lowvar_std_tol = lowvar_std_tol
self.seasonal_period = seasonal_period
self.seasonal_robust = seasonal_robust
self.seasonal_min_run = seasonal_min_run
self.seasonal_value_tol = seasonal_value_tol
self.stable_phase_q = stable_phase_q self.stable_overlap_thr = stable_overlap_thr self.require_z_k = require_z_k # ---- 基础检测 ---- def _flat_runs(self, x: np.ndarray) -> List[StuckInterval]: eq = np.abs(np.diff(x, prepend=x[0])) <= self.value_tol runs = _group_runs(eq) out: List[StuckInterval] = [] for s, e in runs: if e - s + 1 >= self.min_flat_run: v = np.median(x[s : e + 1]) out.append(StuckInterval(s, e, "flat", score=0.7, value_summary="~{:.6g}".format(v))) return out def _low_variability(self, x: np.ndarray) -> List[StuckInterval]: sd = _rolling_std(x, self.lowvar_window) mask = sd <= self.lowvar_std_tol runs = _group_runs(mask) out: List[StuckInterval] = [] for s, e in runs: if e - s + 1 >= max(self.lowvar_window, self.min_flat_run): v = np.median(x[s : e + 1]) local_sd = float(np.nanmax(sd[s : e + 1])) if np.isfinite(sd[s : e + 1]).any() else 0.0 score = float(np.clip(1.0 - local_sd / (self.lowvar_std_tol + 1e-12), 0, 1)) out.append(StuckInterval(s, e, "low_var", score=score, value_summary="~{:.6g}".format(v))) return out def _derivative_flat(self, x: np.ndarray) -> List[StuckInterval]: dx = np.diff(x, prepend=x[0]) if self.deriv_window > 1: dx = pd.Series(dx).rolling(self.deriv_window, min_periods=1, center=True).mean().to_numpy() mask = np.abs(dx) <= self.deriv_tol runs = _group_runs(mask) out: List[StuckInterval] = [] for s, e in runs: if e - s + 1 >= self.min_flat_run: v = np.median(x[s : e + 1]) local_absmax = float(np.nanmax(np.abs(dx[s : e + 1]))) if np.isfinite(dx[s : e + 1]).any() else 0.0 score = float(np.clip(1.0 - local_absmax / (self.deriv_tol + 1e-12), 0, 1)) out.append(StuckInterval(s, e, "flat", score=score, value_summary="~{:.6g}".format(v))) return out # ---- 周期性处理 ---- def _seasonal_baseline(self, x: np.ndarray, period: int) -> np.ndarray: phase_vals = [x[i::period] for i in range(period)] if self.seasonal_robust: phase_stats = [np.nanmedian(v) for v in phase_vals] else: phase_stats = [np.nanmean(v) for v in phase_vals] baseline = np.empty_like(x, dtype=float) for i in range(period): baseline[i::period] = phase_stats[i] return baseline def _seasonal_flat_runs(self, x: np.ndarray, period: int) -> List[StuckInterval]: baseline = self._seasonal_baseline(x, period) resid = x - baseline template, sigma = _phase_template_sigma(x, period) use_z = template is not None and sigma is not None eps = 1e-12 eq = np.abs(np.diff(resid, prepend=resid[0])) <= self.seasonal_value_tol runs = _group_runs(eq) out: List[StuckInterval] = [] for s, e in runs: if e - s + 1 >= self.seasonal_min_run: v = np.median(x[s : e + 1]) rmad_series = pd.Series(resid).rolling(period, min_periods=period)\ .apply(lambda w: np.nanmedian(np.abs(w - np.nanmedian(w))), raw=False) # 取窗口末端的RMAD,防止 NaN rmad_val = rmad_series.iloc[e] if pd.isna(rmad_val): rmad_val = 0.0 rmad = float(rmad_val) flat_strength = 1.0 if self.seasonal_value_tol <= 0 else float( np.clip(1.0 - np.nanmax(np.abs(np.diff(resid[s : e + 1], prepend=resid[s]))) / (self.seasonal_value_tol + 1e-12), 0, 1) ) season_stability = float(np.clip(1.0 - rmad / (np.nanstd(resid) + 1e-12), 0, 1)) if np.isfinite(rmad) else 0.5 score = float(np.clip(0.5 * flat_strength + 0.5 * season_stability, 0, 1)) if use_z: idx = np.arange(s, e + 1) phase = idx % period z = np.abs(x[idx] - template[phase]) / (sigma[phase] + eps) z_med = float(np.nanmedian(z)) if np.isfinite(z).any() else 0.0 if z_med < max(2.5, self.require_z_k - 0.5): continue out.append(StuckInterval(s, e, "seasonal_flat", score=score, value_summary="~{:.6g}".format(v))) return out # ---- 稳定相位+Z过滤 ---- def _filter_by_stability_and_z( self, x: np.ndarray, intervals: List[StuckInterval], period: int, template: np.ndarray, sigma: np.ndarray, stable_mask: np.ndarray, ) -> List[StuckInterval]: keep: List[StuckInterval] = [] eps = 1e-12 n = len(x) for it in intervals: s, e = it.start_idx, it.end_idx s = max(0, int(s)); e = min(n - 1, int(e)) if s > e: continue overlap = float(np.mean(stable_mask[s:e+1])) if e >= s else 0.0 idx = np.arange(s, e + 1) phase = idx % period z = np.abs(x[idx] - template[phase]) / (sigma[phase] + eps) z_med = float(np.nanmedian(z)) if np.isfinite(z).any() else 0.0 if overlap >= self.stable_overlap_thr and z_med < self.require_z_k: continue keep.append(it) return keep # ---- 主入口 ---- def detect(self, series: pd.Series) -> Tuple[List[StuckInterval], Dict[str, Optional[int]]]: """ 输入:时间序列(pd.Series,索引可为时间戳或整数) 输出: - intervals: StuckInterval 列表(不重叠;若重叠会做简单合并) - meta: {"period": 周期估计} """ x = series.to_numpy(dtype=float) n = len(x) intervals: List[StuckInterval] = [] period = self.seasonal_period or estimate_period(x) template = sigma = None stable_phase = None stable_mask = np.zeros(n, dtype=bool) if period is not None and period >= 3 and period * 2 <= n: template, sigma = _phase_template_sigma(x, period) if template is not None: thr = np.nanquantile(sigma, self.stable_phase_q) stable_phase = sigma <= thr m = n // period for i in range(m): stable_mask[i * period : i * period + period] = stable_phase rem = n - m * period if rem > 0: stable_mask[m * period : m * period + rem] = stable_phase[:rem] intervals.extend(self._flat_runs(x)) intervals.extend(self._low_variability(x)) intervals.extend(self._derivative_flat(x)) if period is not None and period >= 3 and period * 2 <= n: intervals.extend(self._seasonal_flat_runs(x, period)) if (period is not None) and (template is not None) and (sigma is not None): intervals = self._filter_by_stability_and_z(x, intervals, period, template, sigma, stable_mask) intervals = self._merge_intervals(intervals) return intervals, {"period": period} @staticmethod def _merge_intervals(intervals: List[StuckInterval]) -> List[StuckInterval]: if not intervals: return [] intervals = sorted(intervals, key=lambda z: (z.start_idx, z.end_idx)) merged: List[StuckInterval] = [] cur = intervals[0] for nx in intervals[1:]: if nx.start_idx <= cur.end_idx + 1 and nx.kind == cur.kind: new_s = cur.start_idx new_e = max(cur.end_idx, nx.end_idx) score = max(cur.score, nx.score) cur = StuckInterval(new_s, new_e, cur.kind, score=score, value_summary=cur.value_summary) elif nx.start_idx <= cur.end_idx + 1 and nx.kind != cur.kind: if nx.score >= cur.score: cur = StuckInterval(cur.start_idx, max(cur.end_idx, nx.end_idx), nx.kind, nx.score, nx.value_summary) else: cur = StuckInterval(cur.start_idx, max(cur.end_idx, nx.end_idx), cur.kind, cur.score, cur.value_summary) else: merged.append(cur) cur = nx merged.append(cur) return merged
-----------------------------
便捷接口
-----------------------------
def detect_stuck_segments(
series: pd.Series,
sampling_period: Optional[pd.Timedelta] = None,
**kwargs,
) -> pd.DataFrame:
“”“一次性运行并返回 DataFrame 结果。”“”
det = StuckDetector(**kwargs)
intervals, meta = det.detect(series)
rows: List[Dict[str, object]] = [] for it in intervals: start_time = end_time = None if isinstance(series.index, pd.DatetimeIndex): if sampling_period is None: start_time = series.index[it.start_idx] end_time = series.index[it.end_idx] else: start_time = series.index[0] + it.start_idx * sampling_period end_time = series.index[0] + it.end_idx * sampling_period rows.append( { "start_idx": it.start_idx, "end_idx": it.end_idx, "start_time": start_time, "end_time": end_time, "kind": it.kind, "score": it.score, "value_summary": it.value_summary, "length": it.end_idx - it.start_idx + 1, } ) df = pd.DataFrame(rows) if "period" in meta: df.attrs["estimated_period"] = meta["period"] return df
-----------------------------
进阶:基于“同周期模板”的卡值定位(周期内卡段)
-----------------------------
def detect_within_cycle_stuck(
series: pd.Series,
period: Optional[int] = None, # 支持自动估计
min_run: int = 5,
value_tol: float = 0.0,
run_std_tol: float = 1e-4,
peer_z_k: float = 4.0,
) -> pd.DataFrame:
x = series.to_numpy(dtype=float)
n = len(x)
# 自动估计周期 if period is None: period = estimate_period(x) if period is None or period < 3: return pd.DataFrame() m = n // period if m < 2: return pd.DataFrame() X = x[: m * period].reshape(m, period) template = np.nanmedian(X, axis=0) mad = np.nanmedian(np.abs(X - template[None, :]), axis=0) sigma = 1.4826 * mad + 1e-12 rows: List[Dict[str, object]] = [] for ci in range(m): cyc = X[ci] diff = np.abs(cyc - template) eq = np.abs(np.diff(cyc, prepend=cyc[0])) <= value_tol run_std = pd.Series(cyc).rolling(min_run, min_periods=min_run).std(ddof=0).to_numpy() std_mask = run_std <= run_std_tol mask = np.zeros(period, dtype=bool) for i in range(period): L = max(0, i - min_run + 1) R = i if R - L + 1 >= min_run: cond_std = (not np.isnan(std_mask[R])) and bool(std_mask[R]) if eq[L:R+1].all() and cond_std: mask[L:R+1] = True runs = _group_runs(mask) for s, e in runs: if float(np.nanmedian(sigma[s:e+1])) < run_std_tol: continue z = diff[s:e+1] / sigma[s:e+1] z_med = float(np.nanmedian(z)) if np.isfinite(z_med) and z_med >= peer_z_k: abs_s = ci * period + s abs_e = ci * period + e mean_diff = float(np.nanmean(np.sign(cyc[s:e+1] - template[s:e+1]) * diff[s:e+1])) score = float(np.tanh((z_med - peer_z_k) / 2 + 1)) rows.append({ "cycle_idx": ci, "start_phase": s, "end_phase": e, "abs_start_idx": abs_s, "abs_end_idx": abs_e, "mean_diff_to_template": mean_diff, "kind": "cycle_flat", "score": score, }) return pd.DataFrame(rows)
-----------------------------
合成温度型周期数据(非正弦,含天然稳定段 + 可选卡值段)
-----------------------------
def generate_temperature_like_series(
start_time: str = “2025-01-01”,
period: int = 60,
cycles: int = 20,
noise_std: float = 0.03,
stable_plateau_len: int = 8,
stable_values: Tuple[float, float] = (31.0, 34.0),
temp_range: Tuple[float, float] = (30.0, 35.0),
stuck_cycle_idx: Optional[int] = 8,
stuck_phase_range: Tuple[int, int] = (20, 35),
stuck_value: Optional[float] = None,
seed: Optional[int] = 42,
) -> pd.Series:
“”“生成更接近温度传感器的周期信号(非正弦)。”“”
rng = np.random.RandomState(seed) # 为兼容旧版 NumPy
a = max(6, (period // 3) - stable_plateau_len) b = max(6, (period - a - 2 * stable_plateau_len)) v_low, v_high = stable_values v_low = float(np.clip(v_low, *temp_range)) v_high = float(np.clip(v_high, *temp_range)) one_cycle_parts = [] if a > 0: ramp_up = np.linspace(v_low, v_high, a, endpoint=False) ramp_up = ramp_up + rng.normal(0, noise_std, size=a) one_cycle_parts.append(ramp_up) plateau1 = np.full(stable_plateau_len, v_high) one_cycle_parts.append(plateau1) if b > 0: mid = v_low + 0.5 * (v_high - v_low) ramp_var = np.linspace(v_high, mid, b, endpoint=False) ramp_var = ramp_var + rng.normal(0, noise_std, size=b) one_cycle_parts.append(ramp_var) plateau2 = np.full(stable_plateau_len, v_low) one_cycle_parts.append(plateau2) one_cycle = np.concatenate(one_cycle_parts) if len(one_cycle) < period: pad = np.full(period - len(one_cycle), v_low) one_cycle = np.concatenate([one_cycle, pad]) else: one_cycle = one_cycle[:period] sig = np.tile(one_cycle, cycles) plateau_mask = np.zeros(period, dtype=bool) plateau_mask[a:a + stable_plateau_len] = True plateau_mask[a + stable_plateau_len + b : a + stable_plateau_len + b + stable_plateau_len] = True plateau_mask_full = np.tile(plateau_mask, cycles) non_plateau_idx = np.where(~plateau_mask_full)[0] sig[non_plateau_idx] += rng.normal(0, noise_std, size=len(non_plateau_idx)) if stuck_cycle_idx is not None: s = stuck_cycle_idx * period + stuck_phase_range[0] e = stuck_cycle_idx * period + stuck_phase_range[1] s = int(np.clip(s, 0, len(sig) - 1)) e = int(np.clip(e, 0, len(sig) - 1)) if s <= e: if stuck_value is None: seg = sig[s:e + 1] sv = float(np.round(np.median(seg), 3)) else: sv = float(np.clip(stuck_value, *temp_range)) sig[s:e + 1] = sv sig = np.clip(sig, *temp_range) idx = pd.date_range(start_time, periods=len(sig), freq="S") return pd.Series(sig, index=idx)
-----------------------------
可视化:原始数据 + 卡值区间 + 稳定点 标注
-----------------------------
import matplotlib.pyplot as plt
def _mask_from_intervals(n: int, intervals_df: pd.DataFrame, start_col: str, end_col: str) -> np.ndarray:
m = np.zeros(n, dtype=bool)
if intervals_df is None or len(intervals_df) == 0:
return m
for s, e in intervals_df[[start_col, end_col]].to_numpy():
s = int(max(0, s))
e = int(min(n - 1, e))
if s <= e:
m[s : e + 1] = True
return m
def detect_stable_points(
series: pd.Series,
period: Optional[int] = None,
stuck_mask: Optional[np.ndarray] = None,
method: str = “cycle_sigma”,
stable_sigma_q: float = 0.2,
rolling_window: int = 20,
rolling_std_tol: float = 1e-3,
) -> np.ndarray:
“”“返回布尔数组,表示“本身为稳定的点”。
method=“cycle_sigma” 时若未提供 period,会尝试自动估计;失败则退回滚动标准差法。
“””
x = series.to_numpy(dtype=float)
n = len(x)
stable = np.zeros(n, dtype=bool)
if stuck_mask is None:
stuck_mask = np.zeros(n, dtype=bool)
if method == "cycle_sigma": p = period if p is None: p = estimate_period(x) if p is not None and n // p >= 2: m = n // p X = x[: m * p].reshape(m, p) template = np.nanmedian(X, axis=0) mad = np.nanmedian(np.abs(X - template[None, :]), axis=0) sigma = 1.4826 * mad + 1e-12 thr = np.nanquantile(sigma, stable_sigma_q) stable_phase = sigma <= thr for i in range(m): idx0 = i * p stable[idx0 : idx0 + p] = stable_phase rem = n - m * p if rem > 0: stable[m * p : m * p + rem] = stable_phase[:rem] else: sd = _rolling_std(x, rolling_window) stable = sd <= rolling_std_tol else: sd = _rolling_std(x, rolling_window) stable = sd <= rolling_std_tol stable = np.logical_and(stable, ~stuck_mask) return stable
def plot_stuck_overview(
series: pd.Series,
res_basic: Optional[pd.DataFrame] = None,
res_within: Optional[pd.DataFrame] = None,
period: Optional[int] = None,
stable_method: str = “cycle_sigma”,
stable_sigma_q: float = 0.2,
rolling_window: int = 20,
rolling_std_tol: float = 1e-3,
figsize: Tuple[int, int] = (12, 5),
save_path: Optional[str] = None,
):
“”“绘制原始数据,并叠加:卡值区间(红)+ 稳定点(绿)。”“”
x = series.to_numpy(dtype=float)
n = len(x)
mask_basic = _mask_from_intervals(n, res_basic, "start_idx", "end_idx") if res_basic is not None else np.zeros(n, dtype=bool) mask_within = _mask_from_intervals(n, res_within, "abs_start_idx", "abs_end_idx") if res_within is not None else np.zeros(n, dtype=bool) stuck_mask = np.logical_or(mask_basic, mask_within) stable_mask = detect_stable_points( series, period=period, stuck_mask=stuck_mask, method=stable_method, stable_sigma_q=stable_sigma_q, rolling_window=rolling_window, rolling_std_tol=rolling_std_tol, ) fig, ax = plt.subplots(1, 1, figsize=figsize) if isinstance(series.index, pd.DatetimeIndex): t = series.index else: t = np.arange(n) ax.plot(t, x, linewidth=1.2, label="signal") def _add_spans(mask, color="#ff4d4f", alpha=0.25, label="stuck"): runs = _group_runs(mask) for i, (s, e) in enumerate(runs): ax.axvspan(t[s], t[e], color=color, alpha=alpha, lw=0, label=(label if i == 0 else None)) _add_spans(stuck_mask, label="stuck") idx_stable = np.where(stable_mask)[0] if len(idx_stable) > 0: ax.scatter(t[idx_stable], x[idx_stable], s=12, color="#52c41a", label="stable points", zorder=3) ax.set_title("Signal with Stuck Segments and Stable Points") ax.set_xlabel("Time" if isinstance(series.index, pd.DatetimeIndex) else "Index") ax.set_ylabel("Value") ax.legend() ax.grid(True, linestyle=":", alpha=0.5) if save_path: fig.savefig(save_path, dpi=160, bbox_inches="tight") return fig, ax
-----------------------------
示例(可注释/删除)
-----------------------------
if name == “main”:
s = generate_temperature_like_series(
start_time=“2025-01-01”,
period=60,
cycles=20,
noise_std=0.03,
stable_plateau_len=8,
stable_values=(31.0, 33.0),
temp_range=(30.0, 35.0),
stuck_cycle_idx=8,
stuck_phase_range=(20, 35),
stuck_value=None,
seed=42
)
# 基础检测:seasonal_period 未给则自动估计;此处演示手动指定为 60 res1 = detect_stuck_segments( s, sampling_period=pd.Timedelta(seconds=1), min_flat_run=5, value_tol=5e-4, deriv_window=3, deriv_tol=5e-4, lowvar_window=10, lowvar_std_tol=1e-3, seasonal_period=60, # 可改为 None 让其自动估计 seasonal_min_run=5, seasonal_value_tol=5e-4, stable_phase_q=0.20, stable_overlap_thr=0.60, require_z_k=3.0 ) # 周期内卡段检测:period=None 将自动估计 res2 = detect_within_cycle_stuck( s, period=None, # ← 自动估计 min_run=6, value_tol=1e-4, run_std_tol=8e-4, peer_z_k=4.0, ) plot_stuck_overview( s, res_basic=res1, res_within=res2, period=None, # 绘图也会尝试自动估计周期,不行则退回滚动法 stable_method="cycle_sigma", stable_sigma_q=0.2, figsize=(12, 4.5), save_path=None, ) import matplotlib.pyplot as plt plt.show()请帮我提取整个算法的逻辑框架
最新发布