From 49f2fc443887fba0aeddf216bc966d0d47fbc483 Mon Sep 17 00:00:00 2001 From: zhangshaozhi Date: Sun, 17 May 2026 20:26:16 +0800 Subject: [PATCH 01/55] fix: make application runnable in production - Dockerfile: include README.md in COPY statement - main.py: add static file serving for built frontend (SPA routing support) - App.vue: wrap template with Naive UI message/dialog/notification providers Co-Authored-By: Claude Sonnet 4.6 --- Dockerfile | 2 +- protoforge/main.py | 10 ++++++++++ web/src/App.vue | 8 +++++++- 3 files changed, 18 insertions(+), 2 deletions(-) diff --git a/Dockerfile b/Dockerfile index babbf5f..2528ade 100644 --- a/Dockerfile +++ b/Dockerfile @@ -10,7 +10,7 @@ WORKDIR /app RUN apt-get update && apt-get install -y --no-install-recommends curl && rm -rf /var/lib/apt/lists/* -COPY pyproject.toml . +COPY pyproject.toml README.md ./ COPY protoforge/ protoforge/ COPY --from=frontend-builder /app/web/dist /app/static diff --git a/protoforge/main.py b/protoforge/main.py index 894c5bc..0e43951 100644 --- a/protoforge/main.py +++ b/protoforge/main.py @@ -1,8 +1,11 @@ import logging from contextlib import asynccontextmanager +from pathlib import Path from fastapi import FastAPI from fastapi.middleware.cors import CORSMiddleware +from fastapi.responses import FileResponse +from fastapi.staticfiles import StaticFiles from protoforge.api.v1.router import router from protoforge.core.engine import SimulationEngine @@ -184,12 +187,19 @@ def create_app() -> FastAPI: @app.get("/") async def root(): + index = Path("/app/static/index.html") + if index.exists(): + return FileResponse(index) return { "name": "ProtoForge", "version": "0.1.0", "description": "物联网协议仿真与测试平台", } + static_dir = Path("/app/static") + if static_dir.exists(): + app.mount("/assets", StaticFiles(directory=static_dir / "assets"), name="assets") + @app.get("/health") async def health(): return {"status": "ok"} diff --git a/web/src/App.vue b/web/src/App.vue index ae6d315..22eabe9 100644 --- a/web/src/App.vue +++ b/web/src/App.vue @@ -1,4 +1,7 @@ From 550d8e20b98481a5390be716104142d6aec87770 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=BC=A0=E5=B0=91=E6=99=BA?= Date: Wed, 20 May 2026 19:57:06 +0800 Subject: [PATCH 17/55] feat(ai): support ai --- ai/predict.py | 97 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100755 ai/predict.py diff --git a/ai/predict.py b/ai/predict.py new file mode 100755 index 0000000..b70f822 --- /dev/null +++ b/ai/predict.py @@ -0,0 +1,97 @@ +# -*- coding: utf-8 -*- + +import requests +import numpy as np +from datetime import datetime, timedelta + +VM_URL = "http://localhost:8428" +DEVICE_ID = "fanuc-cnc" +METRIC = f'feed_rate{{device_id="{DEVICE_ID}"}}' + +def fetch_history(minutes=30): + """从VM拉取历史数据""" + end = datetime.now() + start = end - timedelta(minutes=minutes) + resp = requests.get(f"{VM_URL}/api/v1/query_range", params={ + "query": METRIC, + "start": start.timestamp(), + "end": end.timestamp(), + "step": "1s", + }) + result = resp.json()["data"]["result"] + if not result: + return [], [] + values = result[0]["values"] + ts = [float(v[0]) for v in values] + ys = [float(v[1]) for v in values] + return ts, ys + +def predict_next(ts, ys, horizon=60): + """ + 用FFT检测主频,拟合正弦波,外推未来horizon秒 + 适合周期性信号 + """ + if len(ys) < 60: + return [], [] + + ys = np.array(ys) + n = len(ys) + dt = 1.0 # 1秒采样 + + # FFT找主频 + fft = np.fft.rfft(ys - ys.mean()) + freqs = np.fft.rfftfreq(n, d=dt) + dominant_idx = np.argmax(np.abs(fft[1:])) + 1 + dominant_freq = freqs[dominant_idx] + period = 1.0 / dominant_freq if dominant_freq > 0 else 60 + + # 拟合:y = A*sin(2π/T * t + φ) + offset + from scipy.optimize import curve_fit + t_rel = np.arange(n, dtype=float) + offset = ys.mean() + amplitude = (ys.max() - ys.min()) / 2 + + def sine_model(t, A, T, phi, C): + return A * np.sin(2 * np.pi / T * t + phi) + C + + try: + popt, _ = curve_fit( + sine_model, t_rel, ys, + p0=[amplitude, period, 0, offset], + maxfev=5000 + ) + # 外推 + t_future = np.arange(n, n + horizon, dtype=float) + y_pred = sine_model(t_future, *popt) + ts_future = [ts[-1] + i + 1 for i in range(horizon)] + return ts_future, y_pred.tolist() + except Exception: + # 拟合失败降级为线性 + slope = (ys[-1] - ys[-10]) / 10 + ts_future = [ts[-1] + i + 1 for i in range(horizon)] + y_pred = [ys[-1] + slope * (i + 1) for i in range(horizon)] + return ts_future, y_pred + +def write_predictions(ts_future, y_pred, metric_name="protoforge_feed_rate_predicted"): + """写回VictoriaMetrics""" + lines = [] + for t, y in zip(ts_future, y_pred): + ts_ms = int(t * 1000) + lines.append(f'{metric_name}{{device_id="{DEVICE_ID}"}} {y:.2f} {ts_ms}') + payload = "\n".join(lines) + requests.post(f"{VM_URL}/api/v1/import/prometheus", data=payload) + +def run_once(): + ts, ys = fetch_history(minutes=30) + if len(ys) < 60: + print("数据不足") + return + ts_future, y_pred = predict_next(ts, ys, horizon=120) + write_predictions(ts_future, y_pred) + print(f"写入 {len(y_pred)} 个预测点,预测到 +{len(y_pred)}s") + +if __name__ == "__main__": + import time + while True: + run_once() + time.sleep(30) # 每30秒重新预测一次 From e8f70d09c27d53bff6c8f310b7a2064496632a33 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=BC=A0=E5=B0=91=E6=99=BA?= Date: Wed, 20 May 2026 20:29:13 +0800 Subject: [PATCH 18/55] feat(predict_v2): add predict_v2 python file --- ai/predict_v2.py | 206 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 206 insertions(+) create mode 100755 ai/predict_v2.py diff --git a/ai/predict_v2.py b/ai/predict_v2.py new file mode 100755 index 0000000..df5dd97 --- /dev/null +++ b/ai/predict_v2.py @@ -0,0 +1,206 @@ +# -*- coding: utf-8 -*- +""" +ProtoForge 预测服务 v2 +从 VictoriaMetrics 拉取历史数据,用 FFT + 正弦拟合预测未来值,写回 VM。 +预测值时间戳为未来时间,Grafana 中预测线出现在实测线右侧延伸处。 +""" + +import logging +import time +from datetime import datetime, timedelta + +import numpy as np +import requests +from scipy.optimize import curve_fit + +logging.basicConfig( + level=logging.INFO, + format="%(asctime)s [%(levelname)s] %(message)s", +) +logger = logging.getLogger(__name__) + +# ── 配置 ────────────────────────────────────────────────────────────────────── +VM_URL = "http://localhost:8428" + +# 要预测的指标列表,每项:(查询表达式, 写回指标名) +PREDICT_TARGETS = [ + ('feed_rate{device_id="fanuc-cnc"}', "feed_rate_predicted"), + ('spindle_speed{device_id="fanuc-cnc"}', "spindle_speed_predicted"), + ('spindle_current{device_id="fanuc-cnc"}', "spindle_current_predicted"), + ('vibration_x{device_id="fanuc-cnc"}', "vibration_x_predicted"), + ('vibration_y{device_id="fanuc-cnc"}', "vibration_y_predicted"), + ('vibration_z{device_id="fanuc-cnc"}', "vibration_z_predicted"), +] + +HISTORY_MINUTES = 30 # 拉取多少分钟历史数据用于拟合 +HORIZON_SECONDS = 120 # 预测未来多少秒 +POLL_INTERVAL = 30 # 每隔多少秒重新预测一次 +MIN_POINTS = 120 # 至少需要多少个历史点才开始预测 +# ───────────────────────────────────────────────────────────────────────────── + + +def fetch_history(query: str, minutes: int = HISTORY_MINUTES): + """从 VictoriaMetrics 拉取历史时序数据,返回 (timestamps, values)。""" + now = datetime.now() + start = now - timedelta(minutes=minutes) + try: + resp = requests.get( + f"{VM_URL}/api/v1/query_range", + params={ + "query": query, + "start": start.timestamp(), + "end": now.timestamp(), + "step": "1s", + }, + timeout=10, + ) + resp.raise_for_status() + except requests.RequestException as e: + logger.error("拉取数据失败 query=%s: %s", query, e) + return [], [] + + result = resp.json().get("data", {}).get("result", []) + if not result: + return [], [] + + values = result[0]["values"] + ts = [float(v[0]) for v in values] + ys = [float(v[1]) for v in values] + return ts, ys + + +def _sine_model(t, A, T, phi, C): + return A * np.sin(2 * np.pi / T * t + phi) + C + + +def predict_next(ts: list, ys: list, horizon: int = HORIZON_SECONDS): + """ + 用 FFT 检测主频,拟合正弦波,外推未来 horizon 秒。 + 返回 (future_timestamps, predicted_values),时间戳均在最后一个真实点之后。 + 降级策略:拟合失败时用最近 10 点线性外推。 + """ + ys_arr = np.array(ys) + n = len(ys_arr) + + # ── FFT 找主频 ──────────────────────────────────────────────────────────── + fft_vals = np.fft.rfft(ys_arr - ys_arr.mean()) + freqs = np.fft.rfftfreq(n, d=1.0) # d=1 表示 1 秒采样间隔 + # 跳过直流分量(index 0) + dominant_idx = int(np.argmax(np.abs(fft_vals[1:]))) + 1 + dominant_freq = freqs[dominant_idx] + period = 1.0 / dominant_freq if dominant_freq > 0 else 60.0 + period = float(np.clip(period, 5.0, 3600.0)) # 限制在合理范围 + + # ── 正弦拟合 ────────────────────────────────────────────────────────────── + t_rel = np.arange(n, dtype=float) + amplitude = (ys_arr.max() - ys_arr.min()) / 2.0 + offset = float(ys_arr.mean()) + + # 最后一个真实数据点的 Unix 时间戳(秒) + last_ts = ts[-1] + + try: + popt, _ = curve_fit( + _sine_model, + t_rel, + ys_arr, + p0=[amplitude, period, 0.0, offset], + bounds=( + [0, 5.0, -np.pi, ys_arr.min()], + [np.inf, 3600.0, np.pi, ys_arr.max()], + ), + maxfev=8000, + ) + t_future = np.arange(n, n + horizon, dtype=float) + y_pred = _sine_model(t_future, *popt) + # 裁剪到历史数据值域,避免外推飞出合理范围 + y_pred = np.clip(y_pred, ys_arr.min() * 0.5, ys_arr.max() * 1.5) + + # 未来时间戳:last_ts + 1s, +2s, ..., +horizon s + ts_future = [last_ts + i + 1 for i in range(horizon)] + logger.debug("正弦拟合成功 period=%.1fs amplitude=%.2f", popt[1], popt[0]) + return ts_future, y_pred.tolist() + + except Exception as e: + logger.warning("正弦拟合失败,降级为线性外推: %s", e) + tail = min(10, n) + slope = (ys_arr[-1] - ys_arr[-tail]) / tail + ts_future = [last_ts + i + 1 for i in range(horizon)] + y_pred = [float(ys_arr[-1] + slope * (i + 1)) for i in range(horizon)] + return ts_future, y_pred + + +def write_predictions(ts_future: list, y_pred: list, metric_name: str, extra_labels: dict = None): + """ + 将预测值以 Prometheus exposition 格式写入 VictoriaMetrics。 + 时间戳为毫秒级 Unix 时间戳,对应未来时间点。 + """ + label_str = "" + if extra_labels: + parts = [f'{k}="{v}"' for k, v in extra_labels.items()] + label_str = "{" + ",".join(parts) + "}" + + lines = [] + for t, y in zip(ts_future, y_pred): + ts_ms = int(t * 1000) + lines.append(f"{metric_name}{label_str} {y:.4f} {ts_ms}") + + payload = "\n".join(lines) + try: + resp = requests.post( + f"{VM_URL}/api/v1/import/prometheus", + data=payload, + timeout=10, + ) + resp.raise_for_status() + except requests.RequestException as e: + logger.error("写入预测数据失败 metric=%s: %s", metric_name, e) + + +def _parse_labels(query: str) -> dict: + """从查询表达式中解析标签,如 feed_rate{device_id="fanuc-cnc"} → {"device_id": "fanuc-cnc"}""" + labels = {} + if "{" not in query: + return labels + label_part = query[query.index("{") + 1: query.index("}")] + for item in label_part.split(","): + if "=" in item: + k, v = item.split("=", 1) + labels[k.strip()] = v.strip().strip('"') + return labels + + +def run_once(): + now_str = datetime.now().strftime("%H:%M:%S") + for query, pred_metric in PREDICT_TARGETS: + ts, ys = fetch_history(query) + if len(ys) < MIN_POINTS: + logger.info("[%s] %s 数据不足(%d 点),跳过", now_str, query, len(ys)) + continue + + ts_future, y_pred = predict_next(ts, ys, horizon=HORIZON_SECONDS) + if not ts_future: + continue + + extra_labels = _parse_labels(query) + write_predictions(ts_future, y_pred, pred_metric, extra_labels) + + future_time = datetime.fromtimestamp(ts_future[-1]).strftime("%H:%M:%S") + logger.info( + "[%s] %-40s → %-35s 写入 %d 点,预测至 %s", + now_str, query, pred_metric, len(y_pred), future_time, + ) + + +def main(): + logger.info( + "预测服务启动 VM=%s 预测窗口=%ds 轮询间隔=%ds", + VM_URL, HORIZON_SECONDS, POLL_INTERVAL, + ) + while True: + run_once() + time.sleep(POLL_INTERVAL) + + +if __name__ == "__main__": + main() From 57df20284645347aba7de4e2399640e05b6d0b3c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=BC=A0=E5=B0=91=E6=99=BA?= Date: Wed, 20 May 2026 21:13:52 +0800 Subject: [PATCH 19/55] fix --- ai/predict_v2.py | 536 +++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 450 insertions(+), 86 deletions(-) diff --git a/ai/predict_v2.py b/ai/predict_v2.py index df5dd97..bc425c8 100755 --- a/ai/predict_v2.py +++ b/ai/predict_v2.py @@ -1,56 +1,93 @@ # -*- coding: utf-8 -*- """ -ProtoForge 预测服务 v2 -从 VictoriaMetrics 拉取历史数据,用 FFT + 正弦拟合预测未来值,写回 VM。 -预测值时间戳为未来时间,Grafana 中预测线出现在实测线右侧延伸处。 +ProtoForge 预测服务 v3 + +修复点: +1. 解决 HORIZON_SECONDS > POLL_INTERVAL 时,多轮预测窗口重叠导致 Grafana 出现毛刺/竖线问题。 +2. 每轮写入新预测前,删除同一个预测 metric 的旧预测序列,只保留最新一轮预测。 +3. 预测时间戳按整秒写入,避免毫秒时间戳和 Grafana step 不对齐。 +4. 拟合使用真实 timestamp 相对时间,不再假设历史数据严格 1 秒等间隔。 +5. 对历史数据做排序、去重、NaN/Inf 清洗。 """ import logging +import math +import re import time from datetime import datetime, timedelta +from typing import Dict, List, Tuple import numpy as np import requests from scipy.optimize import curve_fit + logging.basicConfig( level=logging.INFO, format="%(asctime)s [%(levelname)s] %(message)s", ) + logger = logging.getLogger(__name__) + # ── 配置 ────────────────────────────────────────────────────────────────────── + VM_URL = "http://localhost:8428" -# 要预测的指标列表,每项:(查询表达式, 写回指标名) PREDICT_TARGETS = [ - ('feed_rate{device_id="fanuc-cnc"}', "feed_rate_predicted"), - ('spindle_speed{device_id="fanuc-cnc"}', "spindle_speed_predicted"), - ('spindle_current{device_id="fanuc-cnc"}', "spindle_current_predicted"), - ('vibration_x{device_id="fanuc-cnc"}', "vibration_x_predicted"), - ('vibration_y{device_id="fanuc-cnc"}', "vibration_y_predicted"), - ('vibration_z{device_id="fanuc-cnc"}', "vibration_z_predicted"), + ('feed_rate{device_id="fanuc-cnc"}', "feed_rate_predicted"), + ('spindle_speed{device_id="fanuc-cnc"}', "spindle_speed_predicted"), + ('spindle_current{device_id="fanuc-cnc"}', "spindle_current_predicted"), + ('vibration_x{device_id="fanuc-cnc"}', "vibration_x_predicted"), + ('vibration_y{device_id="fanuc-cnc"}', "vibration_y_predicted"), + ('vibration_z{device_id="fanuc-cnc"}', "vibration_z_predicted"), ] -HISTORY_MINUTES = 30 # 拉取多少分钟历史数据用于拟合 -HORIZON_SECONDS = 120 # 预测未来多少秒 -POLL_INTERVAL = 30 # 每隔多少秒重新预测一次 -MIN_POINTS = 120 # 至少需要多少个历史点才开始预测 +HISTORY_MINUTES = 30 +HORIZON_SECONDS = 120 +POLL_INTERVAL = 30 +MIN_POINTS = 120 +QUERY_STEP = "1s" + +# 关键修复:每轮写入前删除旧预测,避免 120s 预测窗口和 30s 轮询周期重叠 +CLEAR_OLD_PREDICTIONS = True + +# 如果删除旧预测失败,是否跳过本轮写入。 +# 建议 True,避免继续叠加脏数据。 +SKIP_WRITE_IF_CLEAR_FAILED = True + +# 给新预测数据加一个稳定标签,方便 Grafana 查询过滤。 +# Grafana 可以查询:feed_rate_predicted{device_id="fanuc-cnc",forecast="latest"} +EXTRA_PREDICT_LABELS = { + "forecast": "latest", + "source": "protoforge", +} + +# 正弦周期限制 +MIN_PERIOD_SECONDS = 5.0 +MAX_PERIOD_SECONDS = 3600.0 + # ───────────────────────────────────────────────────────────────────────────── -def fetch_history(query: str, minutes: int = HISTORY_MINUTES): - """从 VictoriaMetrics 拉取历史时序数据,返回 (timestamps, values)。""" +def fetch_history(query: str, minutes: int = HISTORY_MINUTES) -> Tuple[List[float], List[float]]: + """ + 从 VictoriaMetrics 拉取历史时序数据。 + 返回: + timestamps: Unix 秒级时间戳 + values: float 数值 + """ now = datetime.now() start = now - timedelta(minutes=minutes) + try: resp = requests.get( f"{VM_URL}/api/v1/query_range", params={ "query": query, "start": start.timestamp(), - "end": now.timestamp(), - "step": "1s", + "end": now.timestamp(), + "step": QUERY_STEP, }, timeout=10, ) @@ -59,148 +96,475 @@ def fetch_history(query: str, minutes: int = HISTORY_MINUTES): logger.error("拉取数据失败 query=%s: %s", query, e) return [], [] - result = resp.json().get("data", {}).get("result", []) + try: + result = resp.json().get("data", {}).get("result", []) + except Exception as e: + logger.error("解析 VM 返回失败 query=%s: %s", query, e) + return [], [] + if not result: return [], [] - values = result[0]["values"] - ts = [float(v[0]) for v in values] - ys = [float(v[1]) for v in values] + values = result[0].get("values", []) + if not values: + return [], [] + + ts = [] + ys = [] + + for item in values: + if len(item) < 2: + continue + + try: + t = float(item[0]) + y = float(item[1]) + except Exception: + continue + + if not math.isfinite(t) or not math.isfinite(y): + continue + + ts.append(t) + ys.append(y) + return ts, ys -def _sine_model(t, A, T, phi, C): - return A * np.sin(2 * np.pi / T * t + phi) + C +def normalize_history(ts: List[float], ys: List[float]) -> Tuple[np.ndarray, np.ndarray]: + """ + 清洗历史数据: + 1. 转换为整秒时间戳 + 2. 排序 + 3. 同一秒多个值时保留最后一个 + 4. 插值补齐中间缺失秒 + """ + if not ts or not ys or len(ts) != len(ys): + return np.array([]), np.array([]) + + data = {} + + for t, y in zip(ts, ys): + try: + sec = int(round(float(t))) + val = float(y) + except Exception: + continue + + if not math.isfinite(sec) or not math.isfinite(val): + continue + + data[sec] = val + + if not data: + return np.array([]), np.array([]) + + sorted_items = sorted(data.items(), key=lambda x: x[0]) + ts_clean = np.array([x[0] for x in sorted_items], dtype=float) + ys_clean = np.array([x[1] for x in sorted_items], dtype=float) -def predict_next(ts: list, ys: list, horizon: int = HORIZON_SECONDS): + if len(ts_clean) < 2: + return ts_clean, ys_clean + + start_sec = int(ts_clean[0]) + end_sec = int(ts_clean[-1]) + + if end_sec <= start_sec: + return ts_clean, ys_clean + + # 统一为 1 秒网格,减少 query_range 缺点、抖动、缺失点对 FFT 的影响 + ts_grid = np.arange(start_sec, end_sec + 1, 1, dtype=float) + ys_grid = np.interp(ts_grid, ts_clean, ys_clean) + + return ts_grid, ys_grid + + +def _sine_model(t: np.ndarray, A: float, T: float, phi: float, C: float) -> np.ndarray: + return A * np.sin(2.0 * np.pi / T * t + phi) + C + + +def estimate_period_by_fft(ys_arr: np.ndarray) -> float: """ - 用 FFT 检测主频,拟合正弦波,外推未来 horizon 秒。 - 返回 (future_timestamps, predicted_values),时间戳均在最后一个真实点之后。 - 降级策略:拟合失败时用最近 10 点线性外推。 + 使用 FFT 估算主周期。 + ys_arr 默认是 1 秒间隔。 """ - ys_arr = np.array(ys) n = len(ys_arr) - # ── FFT 找主频 ──────────────────────────────────────────────────────────── - fft_vals = np.fft.rfft(ys_arr - ys_arr.mean()) - freqs = np.fft.rfftfreq(n, d=1.0) # d=1 表示 1 秒采样间隔 - # 跳过直流分量(index 0) - dominant_idx = int(np.argmax(np.abs(fft_vals[1:]))) + 1 - dominant_freq = freqs[dominant_idx] - period = 1.0 / dominant_freq if dominant_freq > 0 else 60.0 - period = float(np.clip(period, 5.0, 3600.0)) # 限制在合理范围 + if n < 4: + return 60.0 + + centered = ys_arr - np.mean(ys_arr) + + if np.allclose(centered, 0): + return 60.0 + + fft_vals = np.fft.rfft(centered) + freqs = np.fft.rfftfreq(n, d=1.0) + + if len(freqs) <= 1: + return 60.0 + + # 跳过直流分量 index 0 + power = np.abs(fft_vals[1:]) + if len(power) == 0 or np.max(power) <= 0: + return 60.0 + + dominant_idx = int(np.argmax(power)) + 1 + dominant_freq = float(freqs[dominant_idx]) + + if dominant_freq <= 0: + return 60.0 + + period = 1.0 / dominant_freq + period = float(np.clip(period, MIN_PERIOD_SECONDS, MAX_PERIOD_SECONDS)) + + return period - # ── 正弦拟合 ────────────────────────────────────────────────────────────── - t_rel = np.arange(n, dtype=float) - amplitude = (ys_arr.max() - ys_arr.min()) / 2.0 - offset = float(ys_arr.mean()) - # 最后一个真实数据点的 Unix 时间戳(秒) - last_ts = ts[-1] +def predict_next( + ts: List[float], + ys: List[float], + horizon: int = HORIZON_SECONDS, + start_from_now: bool = True, +) -> Tuple[List[float], List[float]]: + """ + 用 FFT 检测主频,拟合正弦波,外推未来 horizon 秒。 + 返回: + future_timestamps: 未来整秒时间戳 + predicted_values: 预测值 + """ + ts_grid, ys_grid = normalize_history(ts, ys) + + if len(ys_grid) < MIN_POINTS: + return [], [] + + n = len(ys_grid) + + y_min = float(np.min(ys_grid)) + y_max = float(np.max(ys_grid)) + y_mean = float(np.mean(ys_grid)) + y_range = y_max - y_min + + # 数据几乎不波动时,直接使用最后一个值保持 + if y_range <= 1e-9: + base_ts = int(time.time()) if start_from_now else int(ts_grid[-1]) + base_ts = max(base_ts, int(ts_grid[-1])) + + ts_future = [base_ts + i + 1 for i in range(horizon)] + y_pred = [float(ys_grid[-1])] * horizon + return ts_future, y_pred + + period = estimate_period_by_fft(ys_grid) + + # 用真实时间戳做相对时间,而不是 np.arange(n) + t_fit = ts_grid - ts_grid[0] + + amplitude = y_range / 2.0 + offset = y_mean + + # 预测起点统一对齐到整秒 + if start_from_now: + base_ts = int(time.time()) + else: + base_ts = int(ts_grid[-1]) + + # 避免因为 VM 查询延迟导致预测点落在最后一个真实点之前 + base_ts = max(base_ts, int(ts_grid[-1])) + + ts_future_arr = np.arange(base_ts + 1, base_ts + 1 + horizon, 1, dtype=float) + t_future = ts_future_arr - ts_grid[0] try: popt, _ = curve_fit( _sine_model, - t_rel, - ys_arr, + t_fit, + ys_grid, p0=[amplitude, period, 0.0, offset], bounds=( - [0, 5.0, -np.pi, ys_arr.min()], - [np.inf, 3600.0, np.pi, ys_arr.max()], + [0.0, MIN_PERIOD_SECONDS, -2.0 * np.pi, y_min - y_range], + [np.inf, MAX_PERIOD_SECONDS, 2.0 * np.pi, y_max + y_range], ), - maxfev=8000, + maxfev=12000, + ) + + y_pred_arr = _sine_model(t_future, *popt) + + # 裁剪到合理范围,避免拟合异常时飞出去 + margin = y_range * 0.2 + lower = y_min - margin + upper = y_max + margin + y_pred_arr = np.clip(y_pred_arr, lower, upper) + + if not np.all(np.isfinite(y_pred_arr)): + raise ValueError("预测结果包含 NaN/Inf") + + logger.debug( + "正弦拟合成功 period=%.2fs amplitude=%.4f offset=%.4f", + popt[1], + popt[0], + popt[3], ) - t_future = np.arange(n, n + horizon, dtype=float) - y_pred = _sine_model(t_future, *popt) - # 裁剪到历史数据值域,避免外推飞出合理范围 - y_pred = np.clip(y_pred, ys_arr.min() * 0.5, ys_arr.max() * 1.5) - # 未来时间戳:last_ts + 1s, +2s, ..., +horizon s - ts_future = [last_ts + i + 1 for i in range(horizon)] - logger.debug("正弦拟合成功 period=%.1fs amplitude=%.2f", popt[1], popt[0]) - return ts_future, y_pred.tolist() + return ts_future_arr.tolist(), y_pred_arr.astype(float).tolist() except Exception as e: - logger.warning("正弦拟合失败,降级为线性外推: %s", e) + logger.warning("正弦拟合失败,降级为最近值平滑外推: %s", e) + + # 降级策略:用最近 10 个点的均值保持,避免线性外推越走越偏 tail = min(10, n) - slope = (ys_arr[-1] - ys_arr[-tail]) / tail - ts_future = [last_ts + i + 1 for i in range(horizon)] - y_pred = [float(ys_arr[-1] + slope * (i + 1)) for i in range(horizon)] + last_value = float(np.mean(ys_grid[-tail:])) + + ts_future = ts_future_arr.tolist() + y_pred = [last_value] * horizon + return ts_future, y_pred -def write_predictions(ts_future: list, y_pred: list, metric_name: str, extra_labels: dict = None): +def prom_escape_label_value(value: str) -> str: + """ + Prometheus exposition label value 转义。 + """ + return ( + str(value) + .replace("\\", "\\\\") + .replace("\n", "\\n") + .replace('"', '\\"') + ) + + +def build_selector(metric_name: str, labels: Dict[str, str]) -> str: + """ + 构造 PromQL selector,用于 delete_series。 + + 示例: + feed_rate_predicted{device_id="fanuc-cnc"} + """ + if not labels: + return metric_name + + parts = [] + for k in sorted(labels.keys()): + v = prom_escape_label_value(labels[k]) + parts.append(f'{k}="{v}"') + + return f'{metric_name}' + "{" + ",".join(parts) + "}" + + +def delete_old_predictions(metric_name: str, base_labels: Dict[str, str]) -> bool: + """ + 删除旧预测序列,避免多轮预测窗口重叠。 + + 注意: + 这里故意只用 base_labels,比如 device_id。 + 不带 forecast/source 标签,是为了兼容旧版本脚本写入的无 forecast 标签数据。 + """ + selector = build_selector(metric_name, base_labels) + + try: + resp = requests.post( + f"{VM_URL}/api/v1/admin/tsdb/delete_series", + params=[("match[]", selector)], + timeout=10, + ) + + if resp.status_code not in (200, 204): + logger.error( + "删除旧预测数据失败 metric=%s selector=%s status=%s body=%s", + metric_name, + selector, + resp.status_code, + resp.text[:500], + ) + return False + + logger.debug("已删除旧预测数据 selector=%s", selector) + return True + + except requests.RequestException as e: + logger.error("删除旧预测数据异常 metric=%s selector=%s: %s", metric_name, selector, e) + return False + + +def write_predictions( + ts_future: List[float], + y_pred: List[float], + metric_name: str, + labels: Dict[str, str] = None, +) -> bool: """ 将预测值以 Prometheus exposition 格式写入 VictoriaMetrics。 - 时间戳为毫秒级 Unix 时间戳,对应未来时间点。 + 时间戳为毫秒级 Unix timestamp。 """ + if labels is None: + labels = {} + + if not ts_future or not y_pred or len(ts_future) != len(y_pred): + logger.warning("预测数据为空或长度不一致 metric=%s", metric_name) + return False + label_str = "" - if extra_labels: - parts = [f'{k}="{v}"' for k, v in extra_labels.items()] + if labels: + parts = [] + for k in sorted(labels.keys()): + v = prom_escape_label_value(labels[k]) + parts.append(f'{k}="{v}"') label_str = "{" + ",".join(parts) + "}" lines = [] + for t, y in zip(ts_future, y_pred): - ts_ms = int(t * 1000) - lines.append(f"{metric_name}{label_str} {y:.4f} {ts_ms}") + try: + ts_sec = int(round(float(t))) + val = float(y) + except Exception: + continue + + if not math.isfinite(ts_sec) or not math.isfinite(val): + continue + + ts_ms = ts_sec * 1000 + lines.append(f"{metric_name}{label_str} {val:.6f} {ts_ms}") + + if not lines: + logger.warning("没有可写入的预测点 metric=%s", metric_name) + return False + + payload = "\n".join(lines) + "\n" - payload = "\n".join(lines) try: resp = requests.post( f"{VM_URL}/api/v1/import/prometheus", - data=payload, + data=payload.encode("utf-8"), + headers={ + "Content-Type": "text/plain; version=0.0.4; charset=utf-8", + }, timeout=10, ) resp.raise_for_status() + return True + except requests.RequestException as e: logger.error("写入预测数据失败 metric=%s: %s", metric_name, e) + return False + + +_LABEL_PATTERN = re.compile(r'\s*([a-zA-Z_][a-zA-Z0-9_]*)\s*=\s*"((?:\\.|[^"])*)"\s*') -def _parse_labels(query: str) -> dict: - """从查询表达式中解析标签,如 feed_rate{device_id="fanuc-cnc"} → {"device_id": "fanuc-cnc"}""" +def _parse_labels(query: str) -> Dict[str, str]: + """ + 从查询表达式中解析标签。 + + 示例: + feed_rate{device_id="fanuc-cnc"} -> {"device_id": "fanuc-cnc"} + """ labels = {} - if "{" not in query: + + if "{" not in query or "}" not in query: return labels - label_part = query[query.index("{") + 1: query.index("}")] - for item in label_part.split(","): - if "=" in item: - k, v = item.split("=", 1) - labels[k.strip()] = v.strip().strip('"') + + try: + label_part = query[query.index("{") + 1: query.rindex("}")] + except Exception: + return labels + + for match in _LABEL_PATTERN.finditer(label_part): + key = match.group(1) + value = match.group(2) + value = value.replace('\\"', '"').replace("\\n", "\n").replace("\\\\", "\\") + labels[key] = value + return labels +def merge_labels(*dicts: Dict[str, str]) -> Dict[str, str]: + result = {} + + for d in dicts: + if not d: + continue + result.update(d) + + return result + + def run_once(): now_str = datetime.now().strftime("%H:%M:%S") + for query, pred_metric in PREDICT_TARGETS: ts, ys = fetch_history(query) + if len(ys) < MIN_POINTS: logger.info("[%s] %s 数据不足(%d 点),跳过", now_str, query, len(ys)) continue - ts_future, y_pred = predict_next(ts, ys, horizon=HORIZON_SECONDS) - if not ts_future: + ts_future, y_pred = predict_next( + ts, + ys, + horizon=HORIZON_SECONDS, + start_from_now=True, + ) + + if not ts_future or not y_pred: + logger.warning("[%s] %s 预测结果为空,跳过", now_str, query) + continue + + base_labels = _parse_labels(query) + + # 先删除旧预测,再写入新预测。 + # 删除条件只带 base_labels,兼容老版本无 forecast/source 标签的脏数据。 + if CLEAR_OLD_PREDICTIONS: + clear_ok = delete_old_predictions(pred_metric, base_labels) + + if not clear_ok and SKIP_WRITE_IF_CLEAR_FAILED: + logger.error( + "[%s] %s 删除旧预测失败,为避免继续制造重叠数据,本轮跳过写入", + now_str, + pred_metric, + ) + continue + + write_labels = merge_labels(base_labels, EXTRA_PREDICT_LABELS) + + ok = write_predictions( + ts_future=ts_future, + y_pred=y_pred, + metric_name=pred_metric, + labels=write_labels, + ) + + if not ok: continue - extra_labels = _parse_labels(query) - write_predictions(ts_future, y_pred, pred_metric, extra_labels) + future_start = datetime.fromtimestamp(ts_future[0]).strftime("%H:%M:%S") + future_end = datetime.fromtimestamp(ts_future[-1]).strftime("%H:%M:%S") - future_time = datetime.fromtimestamp(ts_future[-1]).strftime("%H:%M:%S") logger.info( - "[%s] %-40s → %-35s 写入 %d 点,预测至 %s", - now_str, query, pred_metric, len(y_pred), future_time, + "[%s] %-40s → %-35s 写入 %d 点,预测区间 %s ~ %s", + now_str, + query, + pred_metric, + len(y_pred), + future_start, + future_end, ) def main(): logger.info( - "预测服务启动 VM=%s 预测窗口=%ds 轮询间隔=%ds", - VM_URL, HORIZON_SECONDS, POLL_INTERVAL, + "预测服务启动 VM=%s 历史窗口=%dmin 预测窗口=%ds 轮询间隔=%ds 清理旧预测=%s", + VM_URL, + HISTORY_MINUTES, + HORIZON_SECONDS, + POLL_INTERVAL, + CLEAR_OLD_PREDICTIONS, ) + while True: run_once() time.sleep(POLL_INTERVAL) if __name__ == "__main__": - main() + main() \ No newline at end of file From 88aec295671ca112fd422a28acd4d76d43a82f20 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=BC=A0=E5=B0=91=E6=99=BA?= Date: Wed, 20 May 2026 21:21:11 +0800 Subject: [PATCH 20/55] fix --- ai/predict_v2.py | 263 +++++++++++++++++++++++------------------------ 1 file changed, 128 insertions(+), 135 deletions(-) diff --git a/ai/predict_v2.py b/ai/predict_v2.py index bc425c8..f631e12 100755 --- a/ai/predict_v2.py +++ b/ai/predict_v2.py @@ -1,13 +1,14 @@ # -*- coding: utf-8 -*- """ -ProtoForge 预测服务 v3 +ProtoForge 预测服务 v4 修复点: -1. 解决 HORIZON_SECONDS > POLL_INTERVAL 时,多轮预测窗口重叠导致 Grafana 出现毛刺/竖线问题。 -2. 每轮写入新预测前,删除同一个预测 metric 的旧预测序列,只保留最新一轮预测。 -3. 预测时间戳按整秒写入,避免毫秒时间戳和 Grafana step 不对齐。 -4. 拟合使用真实 timestamp 相对时间,不再假设历史数据严格 1 秒等间隔。 -5. 对历史数据做排序、去重、NaN/Inf 清洗。 +1. 不再使用 VictoriaMetrics delete_series,避免预测历史被整条删除。 +2. 不再每 30 秒写未来 120 秒,避免多轮预测窗口重叠导致 Grafana 出现竖线/毛刺。 +3. 每轮只写未来 min(HORIZON_SECONDS, POLL_INTERVAL) 秒的数据。 +4. 使用 forecast="rolling_v2" 新标签,避免和上一版 forecast="latest" 的旧预测数据混在一起。 +5. 使用真实 timestamp 做拟合,不假设采样严格等间隔。 +6. 拟合失败时不再简单写平直线,而是尽量重复最近一个周期的波形。 """ import logging @@ -44,29 +45,35 @@ ] HISTORY_MINUTES = 30 + +# 理论预测窗口 HORIZON_SECONDS = 120 + +# 轮询间隔 POLL_INTERVAL = 30 + +# 实际写入窗口。 +# 关键点:实际写入窗口不要大于轮询间隔,否则不同批次预测会重叠。 +WRITE_HORIZON_SECONDS = min(HORIZON_SECONDS, POLL_INTERVAL) + MIN_POINTS = 120 QUERY_STEP = "1s" -# 关键修复:每轮写入前删除旧预测,避免 120s 预测窗口和 30s 轮询周期重叠 -CLEAR_OLD_PREDICTIONS = True - -# 如果删除旧预测失败,是否跳过本轮写入。 -# 建议 True,避免继续叠加脏数据。 -SKIP_WRITE_IF_CLEAR_FAILED = True +# 不要再清理旧预测,否则历史预测会被整条删除。 +CLEAR_OLD_PREDICTIONS = False -# 给新预测数据加一个稳定标签,方便 Grafana 查询过滤。 -# Grafana 可以查询:feed_rate_predicted{device_id="fanuc-cnc",forecast="latest"} +# 使用新标签,避免和上一版 forecast="latest" 数据混在一起。 EXTRA_PREDICT_LABELS = { - "forecast": "latest", + "forecast": "rolling_v2", "source": "protoforge", } -# 正弦周期限制 MIN_PERIOD_SECONDS = 5.0 MAX_PERIOD_SECONDS = 3600.0 +# 进程内记录每条预测序列上次写到哪里,避免本进程运行期间重复写同一时间段 +LAST_WRITTEN_UNTIL: Dict[str, int] = {} + # ───────────────────────────────────────────────────────────────────────────── @@ -134,10 +141,10 @@ def fetch_history(query: str, minutes: int = HISTORY_MINUTES) -> Tuple[List[floa def normalize_history(ts: List[float], ys: List[float]) -> Tuple[np.ndarray, np.ndarray]: """ 清洗历史数据: - 1. 转换为整秒时间戳 + 1. 时间戳转为整秒 2. 排序 3. 同一秒多个值时保留最后一个 - 4. 插值补齐中间缺失秒 + 4. 插值补齐缺失秒 """ if not ts or not ys or len(ts) != len(ys): return np.array([]), np.array([]) @@ -173,7 +180,6 @@ def normalize_history(ts: List[float], ys: List[float]) -> Tuple[np.ndarray, np. if end_sec <= start_sec: return ts_clean, ys_clean - # 统一为 1 秒网格,减少 query_range 缺点、抖动、缺失点对 FFT 的影响 ts_grid = np.arange(start_sec, end_sec + 1, 1, dtype=float) ys_grid = np.interp(ts_grid, ts_clean, ys_clean) @@ -187,7 +193,7 @@ def _sine_model(t: np.ndarray, A: float, T: float, phi: float, C: float) -> np.n def estimate_period_by_fft(ys_arr: np.ndarray) -> float: """ 使用 FFT 估算主周期。 - ys_arr 默认是 1 秒间隔。 + ys_arr 默认已经是 1 秒间隔。 """ n = len(ys_arr) @@ -205,8 +211,8 @@ def estimate_period_by_fft(ys_arr: np.ndarray) -> float: if len(freqs) <= 1: return 60.0 - # 跳过直流分量 index 0 power = np.abs(fft_vals[1:]) + if len(power) == 0 or np.max(power) <= 0: return 60.0 @@ -222,59 +228,84 @@ def estimate_period_by_fft(ys_arr: np.ndarray) -> float: return period +def repeat_last_period( + ts_grid: np.ndarray, + ys_grid: np.ndarray, + ts_future_arr: np.ndarray, + period_seconds: float, +) -> np.ndarray: + """ + 拟合失败时的降级策略: + 不直接写平直线,而是把未来时间映射回最近一个周期的历史波形。 + """ + if len(ts_grid) < 2: + return np.full_like(ts_future_arr, float(ys_grid[-1]), dtype=float) + + period = max(int(round(period_seconds)), 1) + + y_pred = [] + + hist_start = float(ts_grid[0]) + hist_end = float(ts_grid[-1]) + + for future_ts in ts_future_arr: + mapped_ts = float(future_ts) + + while mapped_ts > hist_end: + mapped_ts -= period + + while mapped_ts < hist_start: + mapped_ts += period + + val = float(np.interp(mapped_ts, ts_grid, ys_grid)) + y_pred.append(val) + + return np.array(y_pred, dtype=float) + + def predict_next( ts: List[float], ys: List[float], - horizon: int = HORIZON_SECONDS, - start_from_now: bool = True, + horizon: int, + base_ts: int, ) -> Tuple[List[float], List[float]]: """ 用 FFT 检测主频,拟合正弦波,外推未来 horizon 秒。 - 返回: - future_timestamps: 未来整秒时间戳 - predicted_values: 预测值 + + base_ts: + 从 base_ts + 1 开始写预测。 """ ts_grid, ys_grid = normalize_history(ts, ys) if len(ys_grid) < MIN_POINTS: return [], [] - n = len(ys_grid) - y_min = float(np.min(ys_grid)) y_max = float(np.max(ys_grid)) y_mean = float(np.mean(ys_grid)) y_range = y_max - y_min - # 数据几乎不波动时,直接使用最后一个值保持 - if y_range <= 1e-9: - base_ts = int(time.time()) if start_from_now else int(ts_grid[-1]) - base_ts = max(base_ts, int(ts_grid[-1])) + base_ts = max(int(base_ts), int(ts_grid[-1])) + + ts_future_arr = np.arange( + base_ts + 1, + base_ts + 1 + horizon, + 1, + dtype=float, + ) - ts_future = [base_ts + i + 1 for i in range(horizon)] - y_pred = [float(ys_grid[-1])] * horizon - return ts_future, y_pred + if y_range <= 1e-9: + y_pred_arr = np.full_like(ts_future_arr, float(ys_grid[-1]), dtype=float) + return ts_future_arr.tolist(), y_pred_arr.tolist() period = estimate_period_by_fft(ys_grid) - # 用真实时间戳做相对时间,而不是 np.arange(n) t_fit = ts_grid - ts_grid[0] + t_future = ts_future_arr - ts_grid[0] amplitude = y_range / 2.0 offset = y_mean - # 预测起点统一对齐到整秒 - if start_from_now: - base_ts = int(time.time()) - else: - base_ts = int(ts_grid[-1]) - - # 避免因为 VM 查询延迟导致预测点落在最后一个真实点之前 - base_ts = max(base_ts, int(ts_grid[-1])) - - ts_future_arr = np.arange(base_ts + 1, base_ts + 1 + horizon, 1, dtype=float) - t_future = ts_future_arr - ts_grid[0] - try: popt, _ = curve_fit( _sine_model, @@ -290,7 +321,6 @@ def predict_next( y_pred_arr = _sine_model(t_future, *popt) - # 裁剪到合理范围,避免拟合异常时飞出去 margin = y_range * 0.2 lower = y_min - margin upper = y_max + margin @@ -309,16 +339,21 @@ def predict_next( return ts_future_arr.tolist(), y_pred_arr.astype(float).tolist() except Exception as e: - logger.warning("正弦拟合失败,降级为最近值平滑外推: %s", e) + logger.warning("正弦拟合失败,降级为最近周期波形复制: %s", e) - # 降级策略:用最近 10 个点的均值保持,避免线性外推越走越偏 - tail = min(10, n) - last_value = float(np.mean(ys_grid[-tail:])) + y_pred_arr = repeat_last_period( + ts_grid=ts_grid, + ys_grid=ys_grid, + ts_future_arr=ts_future_arr, + period_seconds=period, + ) - ts_future = ts_future_arr.tolist() - y_pred = [last_value] * horizon + margin = y_range * 0.2 + lower = y_min - margin + upper = y_max + margin + y_pred_arr = np.clip(y_pred_arr, lower, upper) - return ts_future, y_pred + return ts_future_arr.tolist(), y_pred_arr.astype(float).tolist() def prom_escape_label_value(value: str) -> str: @@ -333,83 +368,34 @@ def prom_escape_label_value(value: str) -> str: ) -def build_selector(metric_name: str, labels: Dict[str, str]) -> str: - """ - 构造 PromQL selector,用于 delete_series。 - - 示例: - feed_rate_predicted{device_id="fanuc-cnc"} - """ +def labels_to_str(labels: Dict[str, str]) -> str: if not labels: - return metric_name + return "" parts = [] + for k in sorted(labels.keys()): v = prom_escape_label_value(labels[k]) parts.append(f'{k}="{v}"') - return f'{metric_name}' + "{" + ",".join(parts) + "}" - - -def delete_old_predictions(metric_name: str, base_labels: Dict[str, str]) -> bool: - """ - 删除旧预测序列,避免多轮预测窗口重叠。 - - 注意: - 这里故意只用 base_labels,比如 device_id。 - 不带 forecast/source 标签,是为了兼容旧版本脚本写入的无 forecast 标签数据。 - """ - selector = build_selector(metric_name, base_labels) - - try: - resp = requests.post( - f"{VM_URL}/api/v1/admin/tsdb/delete_series", - params=[("match[]", selector)], - timeout=10, - ) - - if resp.status_code not in (200, 204): - logger.error( - "删除旧预测数据失败 metric=%s selector=%s status=%s body=%s", - metric_name, - selector, - resp.status_code, - resp.text[:500], - ) - return False - - logger.debug("已删除旧预测数据 selector=%s", selector) - return True - - except requests.RequestException as e: - logger.error("删除旧预测数据异常 metric=%s selector=%s: %s", metric_name, selector, e) - return False + return "{" + ",".join(parts) + "}" def write_predictions( ts_future: List[float], y_pred: List[float], metric_name: str, - labels: Dict[str, str] = None, + labels: Dict[str, str], ) -> bool: """ 将预测值以 Prometheus exposition 格式写入 VictoriaMetrics。 时间戳为毫秒级 Unix timestamp。 """ - if labels is None: - labels = {} - if not ts_future or not y_pred or len(ts_future) != len(y_pred): logger.warning("预测数据为空或长度不一致 metric=%s", metric_name) return False - label_str = "" - if labels: - parts = [] - for k in sorted(labels.keys()): - v = prom_escape_label_value(labels[k]) - parts.append(f'{k}="{v}"') - label_str = "{" + ",".join(parts) + "}" + label_str = labels_to_str(labels) lines = [] @@ -449,7 +435,9 @@ def write_predictions( return False -_LABEL_PATTERN = re.compile(r'\s*([a-zA-Z_][a-zA-Z0-9_]*)\s*=\s*"((?:\\.|[^"])*)"\s*') +_LABEL_PATTERN = re.compile( + r'\s*([a-zA-Z_][a-zA-Z0-9_]*)\s*=\s*"((?:\\.|[^"])*)"\s*' +) def _parse_labels(query: str) -> Dict[str, str]: @@ -489,6 +477,13 @@ def merge_labels(*dicts: Dict[str, str]) -> Dict[str, str]: return result +def series_key(metric_name: str, labels: Dict[str, str]) -> str: + """ + 构造进程内唯一 key,用于记录上次写到哪个时间点。 + """ + return metric_name + labels_to_str(labels) + + def run_once(): now_str = datetime.now().strftime("%H:%M:%S") @@ -499,34 +494,28 @@ def run_once(): logger.info("[%s] %s 数据不足(%d 点),跳过", now_str, query, len(ys)) continue + base_labels = _parse_labels(query) + write_labels = merge_labels(base_labels, EXTRA_PREDICT_LABELS) + + key = series_key(pred_metric, write_labels) + + now_sec = int(time.time()) + last_until = LAST_WRITTEN_UNTIL.get(key, 0) + + # 防止同一进程内重复写入已经预测过的时间段 + base_ts = max(now_sec, last_until) + ts_future, y_pred = predict_next( - ts, - ys, - horizon=HORIZON_SECONDS, - start_from_now=True, + ts=ts, + ys=ys, + horizon=WRITE_HORIZON_SECONDS, + base_ts=base_ts, ) if not ts_future or not y_pred: logger.warning("[%s] %s 预测结果为空,跳过", now_str, query) continue - base_labels = _parse_labels(query) - - # 先删除旧预测,再写入新预测。 - # 删除条件只带 base_labels,兼容老版本无 forecast/source 标签的脏数据。 - if CLEAR_OLD_PREDICTIONS: - clear_ok = delete_old_predictions(pred_metric, base_labels) - - if not clear_ok and SKIP_WRITE_IF_CLEAR_FAILED: - logger.error( - "[%s] %s 删除旧预测失败,为避免继续制造重叠数据,本轮跳过写入", - now_str, - pred_metric, - ) - continue - - write_labels = merge_labels(base_labels, EXTRA_PREDICT_LABELS) - ok = write_predictions( ts_future=ts_future, y_pred=y_pred, @@ -537,26 +526,30 @@ def run_once(): if not ok: continue + LAST_WRITTEN_UNTIL[key] = int(max(ts_future)) + future_start = datetime.fromtimestamp(ts_future[0]).strftime("%H:%M:%S") future_end = datetime.fromtimestamp(ts_future[-1]).strftime("%H:%M:%S") logger.info( - "[%s] %-40s → %-35s 写入 %d 点,预测区间 %s ~ %s", + "[%s] %-40s → %-35s 写入 %d 点,预测区间 %s ~ %s,标签=%s", now_str, query, pred_metric, len(y_pred), future_start, future_end, + labels_to_str(write_labels), ) def main(): logger.info( - "预测服务启动 VM=%s 历史窗口=%dmin 预测窗口=%ds 轮询间隔=%ds 清理旧预测=%s", + "预测服务启动 VM=%s 历史窗口=%dmin 理论预测窗口=%ds 实际写入窗口=%ds 轮询间隔=%ds 清理旧预测=%s", VM_URL, HISTORY_MINUTES, HORIZON_SECONDS, + WRITE_HORIZON_SECONDS, POLL_INTERVAL, CLEAR_OLD_PREDICTIONS, ) From 72d5c092018c5caf59f1a9f6ae556e6eff24ecca Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=BC=A0=E5=B0=91=E6=99=BA?= Date: Wed, 20 May 2026 21:35:15 +0800 Subject: [PATCH 21/55] fix --- ai/predict_v2.py | 368 ++++++++++++++++++++++++----------------------- 1 file changed, 188 insertions(+), 180 deletions(-) diff --git a/ai/predict_v2.py b/ai/predict_v2.py index f631e12..933a34f 100755 --- a/ai/predict_v2.py +++ b/ai/predict_v2.py @@ -1,14 +1,14 @@ # -*- coding: utf-8 -*- """ -ProtoForge 预测服务 v4 +ProtoForge 预测服务 v5 修复点: -1. 不再使用 VictoriaMetrics delete_series,避免预测历史被整条删除。 -2. 不再每 30 秒写未来 120 秒,避免多轮预测窗口重叠导致 Grafana 出现竖线/毛刺。 -3. 每轮只写未来 min(HORIZON_SECONDS, POLL_INTERVAL) 秒的数据。 -4. 使用 forecast="rolling_v2" 新标签,避免和上一版 forecast="latest" 的旧预测数据混在一起。 -5. 使用真实 timestamp 做拟合,不假设采样严格等间隔。 -6. 拟合失败时不再简单写平直线,而是尽量重复最近一个周期的波形。 +1. 不再使用“单正弦拟合”作为主预测算法。 +2. 主算法改为:周期模板预测(同相位历史值加权平均)。 +3. 周期估计使用 FFT 粗估 + 自相关细化,比单纯 FFT 更稳。 +4. 若可用完整周期不足,则降级为多谐波回归(而不是单正弦)。 +5. 每轮只写入未来 min(HORIZON_SECONDS, POLL_INTERVAL) 秒,避免预测窗口重叠。 +6. 不删除旧预测历史,避免历史预测消失。 """ import logging @@ -20,17 +20,13 @@ import numpy as np import requests -from scipy.optimize import curve_fit - logging.basicConfig( level=logging.INFO, format="%(asctime)s [%(levelname)s] %(message)s", ) - logger = logging.getLogger(__name__) - # ── 配置 ────────────────────────────────────────────────────────────────────── VM_URL = "http://localhost:8428" @@ -45,45 +41,36 @@ ] HISTORY_MINUTES = 30 - -# 理论预测窗口 HORIZON_SECONDS = 120 - -# 轮询间隔 POLL_INTERVAL = 30 - -# 实际写入窗口。 -# 关键点:实际写入窗口不要大于轮询间隔,否则不同批次预测会重叠。 WRITE_HORIZON_SECONDS = min(HORIZON_SECONDS, POLL_INTERVAL) - MIN_POINTS = 120 QUERY_STEP = "1s" -# 不要再清理旧预测,否则历史预测会被整条删除。 -CLEAR_OLD_PREDICTIONS = False +# 至少要有多少个完整周期,才使用“周期模板预测” +MIN_FULL_CYCLES_FOR_TEMPLATE = 3 +MAX_CYCLES_FOR_TEMPLATE = 6 + +# 周期范围 +MIN_PERIOD_SECONDS = 5 +MAX_PERIOD_SECONDS = 3600 + +# 多谐波回归最高阶数(降级模式) +MAX_HARMONICS = 4 -# 使用新标签,避免和上一版 forecast="latest" 数据混在一起。 EXTRA_PREDICT_LABELS = { - "forecast": "rolling_v2", + "forecast": "seasonal_v1", "source": "protoforge", } -MIN_PERIOD_SECONDS = 5.0 -MAX_PERIOD_SECONDS = 3600.0 - -# 进程内记录每条预测序列上次写到哪里,避免本进程运行期间重复写同一时间段 +# 进程内记录每条预测序列上次写到哪里,避免本进程运行时重复写 LAST_WRITTEN_UNTIL: Dict[str, int] = {} # ───────────────────────────────────────────────────────────────────────────── def fetch_history(query: str, minutes: int = HISTORY_MINUTES) -> Tuple[List[float], List[float]]: - """ - 从 VictoriaMetrics 拉取历史时序数据。 - 返回: - timestamps: Unix 秒级时间戳 - values: float 数值 - """ + """从 VictoriaMetrics 拉取历史时序数据。""" now = datetime.now() start = now - timedelta(minutes=minutes) @@ -118,20 +105,16 @@ def fetch_history(query: str, minutes: int = HISTORY_MINUTES) -> Tuple[List[floa ts = [] ys = [] - for item in values: if len(item) < 2: continue - try: t = float(item[0]) y = float(item[1]) except Exception: continue - if not math.isfinite(t) or not math.isfinite(y): continue - ts.append(t) ys.append(y) @@ -141,33 +124,29 @@ def fetch_history(query: str, minutes: int = HISTORY_MINUTES) -> Tuple[List[floa def normalize_history(ts: List[float], ys: List[float]) -> Tuple[np.ndarray, np.ndarray]: """ 清洗历史数据: - 1. 时间戳转为整秒 + 1. 时间戳整秒化 2. 排序 - 3. 同一秒多个值时保留最后一个 - 4. 插值补齐缺失秒 + 3. 同一秒多个点保留最后一个 + 4. 按 1 秒插值补齐 """ if not ts or not ys or len(ts) != len(ys): return np.array([]), np.array([]) data = {} - for t, y in zip(ts, ys): try: sec = int(round(float(t))) val = float(y) except Exception: continue - if not math.isfinite(sec) or not math.isfinite(val): continue - data[sec] = val if not data: return np.array([]), np.array([]) sorted_items = sorted(data.items(), key=lambda x: x[0]) - ts_clean = np.array([x[0] for x in sorted_items], dtype=float) ys_clean = np.array([x[1] for x in sorted_items], dtype=float) @@ -186,22 +165,13 @@ def normalize_history(ts: List[float], ys: List[float]) -> Tuple[np.ndarray, np. return ts_grid, ys_grid -def _sine_model(t: np.ndarray, A: float, T: float, phi: float, C: float) -> np.ndarray: - return A * np.sin(2.0 * np.pi / T * t + phi) + C - - def estimate_period_by_fft(ys_arr: np.ndarray) -> float: - """ - 使用 FFT 估算主周期。 - ys_arr 默认已经是 1 秒间隔。 - """ + """FFT 粗估周期。""" n = len(ys_arr) - - if n < 4: + if n < 8: return 60.0 centered = ys_arr - np.mean(ys_arr) - if np.allclose(centered, 0): return 60.0 @@ -212,55 +182,139 @@ def estimate_period_by_fft(ys_arr: np.ndarray) -> float: return 60.0 power = np.abs(fft_vals[1:]) - if len(power) == 0 or np.max(power) <= 0: return 60.0 dominant_idx = int(np.argmax(power)) + 1 dominant_freq = float(freqs[dominant_idx]) - if dominant_freq <= 0: return 60.0 period = 1.0 / dominant_freq - period = float(np.clip(period, MIN_PERIOD_SECONDS, MAX_PERIOD_SECONDS)) + return float(np.clip(period, MIN_PERIOD_SECONDS, MAX_PERIOD_SECONDS)) + + +def refine_period_by_autocorr(ys_arr: np.ndarray, init_period: float) -> float: + """ + 用自相关在 init_period 附近细化周期估计。 + """ + n = len(ys_arr) + if n < 20: + return float(np.clip(init_period, MIN_PERIOD_SECONDS, MAX_PERIOD_SECONDS)) + + centered = ys_arr - np.mean(ys_arr) + if np.allclose(centered, 0): + return float(np.clip(init_period, MIN_PERIOD_SECONDS, MAX_PERIOD_SECONDS)) - return period + corr = np.correlate(centered, centered, mode="full")[n - 1:] + p0 = int(round(init_period)) + left = max(MIN_PERIOD_SECONDS, int(max(2, p0 * 0.7))) + right = min(n // 2, int(max(left + 1, p0 * 1.3))) -def repeat_last_period( - ts_grid: np.ndarray, - ys_grid: np.ndarray, - ts_future_arr: np.ndarray, - period_seconds: float, -) -> np.ndarray: + if right <= left: + return float(np.clip(init_period, MIN_PERIOD_SECONDS, MAX_PERIOD_SECONDS)) + + search = corr[left:right + 1] + if len(search) == 0: + return float(np.clip(init_period, MIN_PERIOD_SECONDS, MAX_PERIOD_SECONDS)) + + best_lag = left + int(np.argmax(search)) + return float(np.clip(best_lag, MIN_PERIOD_SECONDS, MAX_PERIOD_SECONDS)) + + +def estimate_period(ys_arr: np.ndarray) -> float: + """FFT + 自相关 的组合周期估计。""" + p_fft = estimate_period_by_fft(ys_arr) + p_refined = refine_period_by_autocorr(ys_arr, p_fft) + return p_refined + + +def seasonal_template_predict( + ys_arr: np.ndarray, + horizon: int, + period: int, + gap: int = 0, + max_cycles: int = MAX_CYCLES_FOR_TEMPLATE, +) -> List[float]: """ - 拟合失败时的降级策略: - 不直接写平直线,而是把未来时间映射回最近一个周期的历史波形。 + 同相位历史值加权平均预测。 + 对未来第 k 个点,取过去多个周期同相位点做加权平均: + y[n-1+gap+k] ≈ avg(y[n-1+gap+k-p], y[n-1+gap+k-2p], ...) """ - if len(ts_grid) < 2: - return np.full_like(ts_future_arr, float(ys_grid[-1]), dtype=float) + n = len(ys_arr) + preds = [] - period = max(int(round(period_seconds)), 1) + for k in range(1, horizon + 1): + target_idx = (n - 1) + gap + k - y_pred = [] + values = [] + weights = [] - hist_start = float(ts_grid[0]) - hist_end = float(ts_grid[-1]) + # m=1 表示最近一个周期;m 越大越久远 + for m in range(1, max_cycles + 1): + hist_idx = target_idx - m * period + if 0 <= hist_idx < n: + # 越近权重越大 + w = 1.0 / m + values.append(float(ys_arr[hist_idx])) + weights.append(w) - for future_ts in ts_future_arr: - mapped_ts = float(future_ts) + if not values: + # 万一拿不到,退化为最后一个值 + preds.append(float(ys_arr[-1])) + else: + preds.append(float(np.average(values, weights=weights))) - while mapped_ts > hist_end: - mapped_ts -= period + return preds - while mapped_ts < hist_start: - mapped_ts += period - val = float(np.interp(mapped_ts, ts_grid, ys_grid)) - y_pred.append(val) +def harmonic_regression_predict( + ys_arr: np.ndarray, + horizon: int, + period: int, + gap: int = 0, + max_harmonics: int = MAX_HARMONICS, +) -> List[float]: + """ + 多谐波回归(降级模式): + y = c + Σ [a_k sin(2πkt/P) + b_k cos(2πkt/P)] + 相比单正弦,更能表达非标准正弦波形。 + """ + n = len(ys_arr) + if n < 10 or period <= 1: + return [float(ys_arr[-1])] * horizon + + # 周期太短时,谐波数不能太大 + K = min(max_harmonics, max(1, period // 4)) + + t = np.arange(n, dtype=float) + cols = [np.ones(n, dtype=float)] + + for k in range(1, K + 1): + angle = 2.0 * np.pi * k * t / period + cols.append(np.sin(angle)) + cols.append(np.cos(angle)) + + X = np.column_stack(cols) + + try: + coef, _, _, _ = np.linalg.lstsq(X, ys_arr, rcond=None) + except Exception: + return [float(ys_arr[-1])] * horizon + + t_future = np.arange(n + gap, n + gap + horizon, dtype=float) + cols_future = [np.ones(horizon, dtype=float)] - return np.array(y_pred, dtype=float) + for k in range(1, K + 1): + angle = 2.0 * np.pi * k * t_future / period + cols_future.append(np.sin(angle)) + cols_future.append(np.cos(angle)) + + X_future = np.column_stack(cols_future) + y_pred = X_future @ coef + + return y_pred.astype(float).tolist() def predict_next( @@ -270,96 +324,74 @@ def predict_next( base_ts: int, ) -> Tuple[List[float], List[float]]: """ - 用 FFT 检测主频,拟合正弦波,外推未来 horizon 秒。 - - base_ts: - 从 base_ts + 1 开始写预测。 + 主预测函数: + 1. 周期估计 + 2. 优先使用周期模板预测 + 3. 周期不够时降级为多谐波回归 """ ts_grid, ys_grid = normalize_history(ts, ys) - if len(ys_grid) < MIN_POINTS: return [], [] y_min = float(np.min(ys_grid)) y_max = float(np.max(ys_grid)) - y_mean = float(np.mean(ys_grid)) y_range = y_max - y_min - base_ts = max(int(base_ts), int(ts_grid[-1])) - - ts_future_arr = np.arange( - base_ts + 1, - base_ts + 1 + horizon, - 1, - dtype=float, - ) - if y_range <= 1e-9: - y_pred_arr = np.full_like(ts_future_arr, float(ys_grid[-1]), dtype=float) - return ts_future_arr.tolist(), y_pred_arr.tolist() + base_ts = max(int(base_ts), int(ts_grid[-1])) + ts_future = [base_ts + i + 1 for i in range(horizon)] + y_pred = [float(ys_grid[-1])] * horizon + return ts_future, y_pred - period = estimate_period_by_fft(ys_grid) + period_est = estimate_period(ys_grid) + period = int(round(period_est)) + period = max(MIN_PERIOD_SECONDS, min(MAX_PERIOD_SECONDS, period)) - t_fit = ts_grid - ts_grid[0] - t_future = ts_future_arr - ts_grid[0] + last_real_ts = int(ts_grid[-1]) + base_ts = max(int(base_ts), last_real_ts) - amplitude = y_range / 2.0 - offset = y_mean + # 如果当前时间已经超过最后一个真实点,gap 表示中间“空过去”的秒数 + gap = max(0, base_ts - last_real_ts) - try: - popt, _ = curve_fit( - _sine_model, - t_fit, - ys_grid, - p0=[amplitude, period, 0.0, offset], - bounds=( - [0.0, MIN_PERIOD_SECONDS, -2.0 * np.pi, y_min - y_range], - [np.inf, MAX_PERIOD_SECONDS, 2.0 * np.pi, y_max + y_range], - ), - maxfev=12000, - ) + ts_future = [base_ts + i + 1 for i in range(horizon)] - y_pred_arr = _sine_model(t_future, *popt) + full_cycles = len(ys_grid) // period if period > 0 else 0 - margin = y_range * 0.2 - lower = y_min - margin - upper = y_max + margin - y_pred_arr = np.clip(y_pred_arr, lower, upper) - - if not np.all(np.isfinite(y_pred_arr)): - raise ValueError("预测结果包含 NaN/Inf") - - logger.debug( - "正弦拟合成功 period=%.2fs amplitude=%.4f offset=%.4f", - popt[1], - popt[0], - popt[3], + if full_cycles >= MIN_FULL_CYCLES_FOR_TEMPLATE: + y_pred = seasonal_template_predict( + ys_arr=ys_grid, + horizon=horizon, + period=period, + gap=gap, + max_cycles=min(MAX_CYCLES_FOR_TEMPLATE, full_cycles), ) - - return ts_future_arr.tolist(), y_pred_arr.astype(float).tolist() - - except Exception as e: - logger.warning("正弦拟合失败,降级为最近周期波形复制: %s", e) - - y_pred_arr = repeat_last_period( - ts_grid=ts_grid, - ys_grid=ys_grid, - ts_future_arr=ts_future_arr, - period_seconds=period, + model_name = "seasonal_template" + else: + y_pred = harmonic_regression_predict( + ys_arr=ys_grid, + horizon=horizon, + period=period, + gap=gap, + max_harmonics=MAX_HARMONICS, ) + model_name = "harmonic_regression" + + # 合理裁剪,避免偶然外推过大 + margin = y_range * 0.15 + lower = y_min - margin + upper = y_max + margin + y_pred = np.clip(np.array(y_pred, dtype=float), lower, upper).astype(float).tolist() - margin = y_range * 0.2 - lower = y_min - margin - upper = y_max + margin - y_pred_arr = np.clip(y_pred_arr, lower, upper) + logger.debug( + "predict_next model=%s period=%ss full_cycles=%s gap=%s", + model_name, period, full_cycles, gap + ) - return ts_future_arr.tolist(), y_pred_arr.astype(float).tolist() + return ts_future, y_pred def prom_escape_label_value(value: str) -> str: - """ - Prometheus exposition label value 转义。 - """ + """Prometheus label value 转义。""" return ( str(value) .replace("\\", "\\\\") @@ -371,13 +403,10 @@ def prom_escape_label_value(value: str) -> str: def labels_to_str(labels: Dict[str, str]) -> str: if not labels: return "" - parts = [] - for k in sorted(labels.keys()): v = prom_escape_label_value(labels[k]) parts.append(f'{k}="{v}"') - return "{" + ",".join(parts) + "}" @@ -387,16 +416,12 @@ def write_predictions( metric_name: str, labels: Dict[str, str], ) -> bool: - """ - 将预测值以 Prometheus exposition 格式写入 VictoriaMetrics。 - 时间戳为毫秒级 Unix timestamp。 - """ + """将预测值以 Prometheus exposition 格式写入 VictoriaMetrics。""" if not ts_future or not y_pred or len(ts_future) != len(y_pred): logger.warning("预测数据为空或长度不一致 metric=%s", metric_name) return False label_str = labels_to_str(labels) - lines = [] for t, y in zip(ts_future, y_pred): @@ -422,14 +447,11 @@ def write_predictions( resp = requests.post( f"{VM_URL}/api/v1/import/prometheus", data=payload.encode("utf-8"), - headers={ - "Content-Type": "text/plain; version=0.0.4; charset=utf-8", - }, + headers={"Content-Type": "text/plain; version=0.0.4; charset=utf-8"}, timeout=10, ) resp.raise_for_status() return True - except requests.RequestException as e: logger.error("写入预测数据失败 metric=%s: %s", metric_name, e) return False @@ -441,12 +463,7 @@ def write_predictions( def _parse_labels(query: str) -> Dict[str, str]: - """ - 从查询表达式中解析标签。 - - 示例: - feed_rate{device_id="fanuc-cnc"} -> {"device_id": "fanuc-cnc"} - """ + """从查询表达式中解析标签。""" labels = {} if "{" not in query or "}" not in query: @@ -468,19 +485,13 @@ def _parse_labels(query: str) -> Dict[str, str]: def merge_labels(*dicts: Dict[str, str]) -> Dict[str, str]: result = {} - for d in dicts: - if not d: - continue - result.update(d) - + if d: + result.update(d) return result def series_key(metric_name: str, labels: Dict[str, str]) -> str: - """ - 构造进程内唯一 key,用于记录上次写到哪个时间点。 - """ return metric_name + labels_to_str(labels) @@ -489,7 +500,6 @@ def run_once(): for query, pred_metric in PREDICT_TARGETS: ts, ys = fetch_history(query) - if len(ys) < MIN_POINTS: logger.info("[%s] %s 数据不足(%d 点),跳过", now_str, query, len(ys)) continue @@ -502,7 +512,7 @@ def run_once(): now_sec = int(time.time()) last_until = LAST_WRITTEN_UNTIL.get(key, 0) - # 防止同一进程内重复写入已经预测过的时间段 + # 避免同一进程内写重叠时间段 base_ts = max(now_sec, last_until) ts_future, y_pred = predict_next( @@ -522,7 +532,6 @@ def run_once(): metric_name=pred_metric, labels=write_labels, ) - if not ok: continue @@ -545,13 +554,12 @@ def run_once(): def main(): logger.info( - "预测服务启动 VM=%s 历史窗口=%dmin 理论预测窗口=%ds 实际写入窗口=%ds 轮询间隔=%ds 清理旧预测=%s", + "预测服务启动 VM=%s 历史窗口=%dmin 理论预测窗口=%ds 实际写入窗口=%ds 轮询间隔=%ds", VM_URL, HISTORY_MINUTES, HORIZON_SECONDS, WRITE_HORIZON_SECONDS, POLL_INTERVAL, - CLEAR_OLD_PREDICTIONS, ) while True: From c26b9991d4cfac374d56829b47023b043d270aba Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=BC=A0=E5=B0=91=E6=99=BA?= Date: Thu, 21 May 2026 09:03:00 +0800 Subject: [PATCH 22/55] feat(ai): support single scene predict --- ai/predict_v3_single_scene.py | 1058 +++++++++++++++++++++++++++++++++ 1 file changed, 1058 insertions(+) create mode 100644 ai/predict_v3_single_scene.py diff --git a/ai/predict_v3_single_scene.py b/ai/predict_v3_single_scene.py new file mode 100644 index 0000000..23af8c5 --- /dev/null +++ b/ai/predict_v3_single_scene.py @@ -0,0 +1,1058 @@ +# -*- coding: utf-8 -*- +""" +ProtoForge 预测服务 v6 + +核心能力: +1. 周期模板预测:适合 CNC 这类强周期、非标准正弦波形。 +2. 健康基线冻结:检测到异常后,不再用故障数据更新预测模板。 +3. 恢复冷却机制:故障恢复后,需要连续稳定多个周期,才恢复学习。 +4. 预测上下界:写入 predicted_upper / predicted_lower,方便 Grafana 展示预测带。 +5. 异常标记:写入 xxx_anomaly,1 表示异常,0 表示正常。 +6. 不删除历史预测,不使用 delete_series。 +""" + +""" +场景:不考虑物料、不考虑跨程序场景算法预测 +""" + +import json +import logging +import math +import os +import re +import time +from dataclasses import asdict, dataclass +from datetime import datetime, timedelta +from typing import Dict, List, Optional, Tuple + +import numpy as np +import requests + + +logging.basicConfig( + level=logging.INFO, + format="%(asctime)s [%(levelname)s] %(message)s", +) + +logger = logging.getLogger(__name__) + + +# ── 基础配置 ────────────────────────────────────────────────────────────────── + +VM_URL = "http://localhost:8428" + +STATE_FILE = "/tmp/protoforge_predictor_state.json" + +HISTORY_MINUTES = 30 +HORIZON_SECONDS = 120 +POLL_INTERVAL = 30 + +# 实际每轮写入未来多少秒。 +# 不要大于 POLL_INTERVAL,否则多轮预测会重叠。 +WRITE_HORIZON_SECONDS = min(HORIZON_SECONDS, POLL_INTERVAL) + +QUERY_STEP = "1s" +MIN_POINTS = 120 + +MIN_PERIOD_SECONDS = 5 +MAX_PERIOD_SECONDS = 3600 + +# 至少多少个完整周期才允许构建健康模板 +MIN_FULL_CYCLES_FOR_TEMPLATE = 3 + +# 构建模板最多使用最近多少个周期 +MAX_CYCLES_FOR_TEMPLATE = 6 + +# 检测异常使用最近多少秒实际数据 +DETECT_WINDOW_SECONDS = 15 + +# 恢复后,至少连续正常多少秒才考虑恢复学习 +RECOVERY_MIN_SECONDS = 60 + +# 健康状态下模板更新速度,越小越保守 +HEALTHY_EMA_ALPHA = 0.15 + +# 故障恢复后第一次重新学习时的更新速度 +RECOVERY_EMA_ALPHA = 0.35 + +# 最近窗口里有多少比例的点超过阈值,才认为异常 +OUTSIDE_RATIO_THRESHOLD = 0.60 + +# 最近窗口里有多少比例的点回到阈值内,才认为恢复正常 +RECOVERY_INSIDE_RATIO_THRESHOLD = 0.80 + + +# ── 指标配置 ────────────────────────────────────────────────────────────────── +# abs_threshold / rel_threshold 需要按指标单位调。 +# feed_rate 单位 mm/min,这里先给 400 和 25%。 + +PREDICT_TARGETS = [ + { + "query": 'feed_rate{device_id="fanuc-cnc"}', + "pred_metric": "feed_rate_predicted", + "anomaly_metric": "feed_rate_anomaly", + "abs_threshold": 400.0, + "rel_threshold": 0.25, + }, + { + "query": 'spindle_speed{device_id="fanuc-cnc"}', + "pred_metric": "spindle_speed_predicted", + "anomaly_metric": "spindle_speed_anomaly", + "abs_threshold": 500.0, + "rel_threshold": 0.25, + }, + { + "query": 'spindle_current{device_id="fanuc-cnc"}', + "pred_metric": "spindle_current_predicted", + "anomaly_metric": "spindle_current_anomaly", + "abs_threshold": 5.0, + "rel_threshold": 0.25, + }, + { + "query": 'vibration_x{device_id="fanuc-cnc"}', + "pred_metric": "vibration_x_predicted", + "anomaly_metric": "vibration_x_anomaly", + "abs_threshold": 1.0, + "rel_threshold": 0.30, + }, + { + "query": 'vibration_y{device_id="fanuc-cnc"}', + "pred_metric": "vibration_y_predicted", + "anomaly_metric": "vibration_y_anomaly", + "abs_threshold": 1.0, + "rel_threshold": 0.30, + }, + { + "query": 'vibration_z{device_id="fanuc-cnc"}', + "pred_metric": "vibration_z_predicted", + "anomaly_metric": "vibration_z_anomaly", + "abs_threshold": 1.0, + "rel_threshold": 0.30, + }, +] + +EXTRA_PREDICT_LABELS = { + "forecast": "health_gated_v1", + "source": "protoforge", +} + +BASELINE_STATUS_HEALTHY = "healthy" +BASELINE_STATUS_ANOMALY = "anomaly" +BASELINE_STATUS_RECOVERING = "recovering" +BASELINE_STATUS_LEARNING = "learning" + + +# ── 状态结构 ────────────────────────────────────────────────────────────────── + +@dataclass +class BaselineState: + period: int + template: List[float] + status: str + clean_seconds: int + last_update_ts: int + last_seen_ts: int + y_min: float + y_max: float + + +BASELINE_STATES: Dict[str, BaselineState] = {} +LAST_WRITTEN_UNTIL: Dict[str, int] = {} + + +# ── VM 读取 ─────────────────────────────────────────────────────────────────── + +def fetch_history(query: str, minutes: int = HISTORY_MINUTES) -> Tuple[List[float], List[float]]: + now = datetime.now() + start = now - timedelta(minutes=minutes) + + try: + resp = requests.get( + f"{VM_URL}/api/v1/query_range", + params={ + "query": query, + "start": start.timestamp(), + "end": now.timestamp(), + "step": QUERY_STEP, + }, + timeout=10, + ) + resp.raise_for_status() + except requests.RequestException as e: + logger.error("拉取数据失败 query=%s: %s", query, e) + return [], [] + + try: + result = resp.json().get("data", {}).get("result", []) + except Exception as e: + logger.error("解析 VM 返回失败 query=%s: %s", query, e) + return [], [] + + if not result: + return [], [] + + values = result[0].get("values", []) + if not values: + return [], [] + + ts = [] + ys = [] + + for item in values: + if len(item) < 2: + continue + + try: + t = float(item[0]) + y = float(item[1]) + except Exception: + continue + + if not math.isfinite(t) or not math.isfinite(y): + continue + + ts.append(t) + ys.append(y) + + return ts, ys + + +def normalize_history(ts: List[float], ys: List[float]) -> Tuple[np.ndarray, np.ndarray]: + if not ts or not ys or len(ts) != len(ys): + return np.array([]), np.array([]) + + data = {} + + for t, y in zip(ts, ys): + try: + sec = int(round(float(t))) + val = float(y) + except Exception: + continue + + if not math.isfinite(sec) or not math.isfinite(val): + continue + + data[sec] = val + + if not data: + return np.array([]), np.array([]) + + sorted_items = sorted(data.items(), key=lambda x: x[0]) + + ts_clean = np.array([x[0] for x in sorted_items], dtype=float) + ys_clean = np.array([x[1] for x in sorted_items], dtype=float) + + if len(ts_clean) < 2: + return ts_clean, ys_clean + + start_sec = int(ts_clean[0]) + end_sec = int(ts_clean[-1]) + + if end_sec <= start_sec: + return ts_clean, ys_clean + + ts_grid = np.arange(start_sec, end_sec + 1, 1, dtype=float) + ys_grid = np.interp(ts_grid, ts_clean, ys_clean) + + return ts_grid, ys_grid + + +# ── 周期估计 ────────────────────────────────────────────────────────────────── + +def estimate_period_by_fft(ys_arr: np.ndarray) -> float: + n = len(ys_arr) + + if n < 8: + return 60.0 + + centered = ys_arr - np.mean(ys_arr) + + if np.allclose(centered, 0): + return 60.0 + + fft_vals = np.fft.rfft(centered) + freqs = np.fft.rfftfreq(n, d=1.0) + + if len(freqs) <= 1: + return 60.0 + + power = np.abs(fft_vals[1:]) + + if len(power) == 0 or np.max(power) <= 0: + return 60.0 + + dominant_idx = int(np.argmax(power)) + 1 + dominant_freq = float(freqs[dominant_idx]) + + if dominant_freq <= 0: + return 60.0 + + period = 1.0 / dominant_freq + + return float(np.clip(period, MIN_PERIOD_SECONDS, MAX_PERIOD_SECONDS)) + + +def refine_period_by_autocorr(ys_arr: np.ndarray, init_period: float) -> float: + n = len(ys_arr) + + if n < 20: + return float(np.clip(init_period, MIN_PERIOD_SECONDS, MAX_PERIOD_SECONDS)) + + centered = ys_arr - np.mean(ys_arr) + + if np.allclose(centered, 0): + return float(np.clip(init_period, MIN_PERIOD_SECONDS, MAX_PERIOD_SECONDS)) + + corr = np.correlate(centered, centered, mode="full")[n - 1:] + + p0 = int(round(init_period)) + left = max(MIN_PERIOD_SECONDS, int(max(2, p0 * 0.7))) + right = min(n // 2, int(max(left + 1, p0 * 1.3))) + + if right <= left: + return float(np.clip(init_period, MIN_PERIOD_SECONDS, MAX_PERIOD_SECONDS)) + + search = corr[left:right + 1] + + if len(search) == 0: + return float(np.clip(init_period, MIN_PERIOD_SECONDS, MAX_PERIOD_SECONDS)) + + best_lag = left + int(np.argmax(search)) + + return float(np.clip(best_lag, MIN_PERIOD_SECONDS, MAX_PERIOD_SECONDS)) + + +def estimate_period(ys_arr: np.ndarray) -> int: + p_fft = estimate_period_by_fft(ys_arr) + p_refined = refine_period_by_autocorr(ys_arr, p_fft) + + period = int(round(p_refined)) + period = max(MIN_PERIOD_SECONDS, min(MAX_PERIOD_SECONDS, period)) + + return int(period) + + +# ── 模板构建与预测 ───────────────────────────────────────────────────────────── + +def fill_template_nan(template: np.ndarray) -> np.ndarray: + period = len(template) + + if period == 0: + return template + + idx = np.arange(period) + valid = np.isfinite(template) + + if not np.any(valid): + return np.zeros(period, dtype=float) + + if np.all(valid): + return template + + x_valid = idx[valid] + y_valid = template[valid] + + # 环形插值,处理 phase 0 附近缺口 + x_ext = np.concatenate([x_valid - period, x_valid, x_valid + period]) + y_ext = np.concatenate([y_valid, y_valid, y_valid]) + + filled = np.interp(idx, x_ext, y_ext) + + return filled.astype(float) + + +def build_phase_template( + ts_grid: np.ndarray, + ys_grid: np.ndarray, + period: int, + max_cycles: int = MAX_CYCLES_FOR_TEMPLATE, + tail_seconds: Optional[int] = None, +) -> Optional[np.ndarray]: + if period <= 1 or len(ys_grid) < period * MIN_FULL_CYCLES_FOR_TEMPLATE: + return None + + max_seconds = period * max_cycles + + if tail_seconds is not None: + max_seconds = min(max_seconds, int(tail_seconds)) + + max_seconds = max(period * MIN_FULL_CYCLES_FOR_TEMPLATE, max_seconds) + + if len(ys_grid) < max_seconds: + start_idx = 0 + else: + start_idx = len(ys_grid) - max_seconds + + ts_tail = ts_grid[start_idx:] + ys_tail = ys_grid[start_idx:] + + if len(ys_tail) < period * MIN_FULL_CYCLES_FOR_TEMPLATE: + return None + + sums = np.zeros(period, dtype=float) + weights = np.zeros(period, dtype=float) + + total = len(ys_tail) + + for i, (t, y) in enumerate(zip(ts_tail, ys_tail)): + phase = int(t) % period + + # 越近的数据权重越高 + recency = (i + 1) / total + weight = 0.3 + 0.7 * recency + + sums[phase] += float(y) * weight + weights[phase] += weight + + template = np.full(period, np.nan, dtype=float) + + valid = weights > 0 + template[valid] = sums[valid] / weights[valid] + + template = fill_template_nan(template) + + return template + + +def resample_template(old_template: np.ndarray, new_period: int) -> np.ndarray: + old_period = len(old_template) + + if old_period == new_period: + return old_template.astype(float) + + if old_period <= 1 or new_period <= 1: + return np.full(new_period, float(np.mean(old_template)), dtype=float) + + old_x = np.linspace(0.0, 1.0, old_period, endpoint=False) + new_x = np.linspace(0.0, 1.0, new_period, endpoint=False) + + old_x_ext = np.concatenate([old_x - 1.0, old_x, old_x + 1.0]) + old_y_ext = np.concatenate([old_template, old_template, old_template]) + + return np.interp(new_x, old_x_ext, old_y_ext).astype(float) + + +def merge_template( + old_template: np.ndarray, + new_template: np.ndarray, + alpha: float, +) -> np.ndarray: + alpha = float(np.clip(alpha, 0.0, 1.0)) + + if len(old_template) != len(new_template): + old_template = resample_template(old_template, len(new_template)) + + return ((1.0 - alpha) * old_template + alpha * new_template).astype(float) + + +def predict_by_state(state: BaselineState, ts_list: List[int]) -> np.ndarray: + template = np.array(state.template, dtype=float) + period = int(state.period) + + if period <= 1 or len(template) != period: + return np.zeros(len(ts_list), dtype=float) + + values = [] + + for ts in ts_list: + phase = int(ts) % period + values.append(float(template[phase])) + + return np.array(values, dtype=float) + + +def calc_threshold(pred: np.ndarray, abs_threshold: float, rel_threshold: float) -> np.ndarray: + return np.maximum(abs_threshold, np.abs(pred) * rel_threshold) + + +def calc_bounds(pred: np.ndarray, abs_threshold: float, rel_threshold: float) -> Tuple[np.ndarray, np.ndarray]: + threshold = calc_threshold(pred, abs_threshold, rel_threshold) + lower = pred - threshold + upper = pred + threshold + return lower, upper + + +# ── 异常检测与状态更新 ──────────────────────────────────────────────────────── + +def detect_anomaly( + state: BaselineState, + ts_grid: np.ndarray, + ys_grid: np.ndarray, + abs_threshold: float, + rel_threshold: float, +) -> Tuple[bool, float, float, float]: + if len(ys_grid) < DETECT_WINDOW_SECONDS: + return False, 0.0, 0.0, 0.0 + + ts_recent = ts_grid[-DETECT_WINDOW_SECONDS:].astype(int).tolist() + actual = ys_grid[-DETECT_WINDOW_SECONDS:].astype(float) + + pred = predict_by_state(state, ts_recent) + threshold = calc_threshold(pred, abs_threshold, rel_threshold) + + abs_err = np.abs(actual - pred) + outside = abs_err > threshold + + outside_ratio = float(np.mean(outside)) + mean_abs_err = float(np.mean(abs_err)) + mean_rel_err = float(np.mean(abs_err / np.maximum(np.abs(pred), 1.0))) + + is_anomaly = outside_ratio >= OUTSIDE_RATIO_THRESHOLD + + return is_anomaly, outside_ratio, mean_abs_err, mean_rel_err + + +def is_recovered( + state: BaselineState, + ts_grid: np.ndarray, + ys_grid: np.ndarray, + abs_threshold: float, + rel_threshold: float, +) -> Tuple[bool, float]: + if len(ys_grid) < DETECT_WINDOW_SECONDS: + return False, 0.0 + + ts_recent = ts_grid[-DETECT_WINDOW_SECONDS:].astype(int).tolist() + actual = ys_grid[-DETECT_WINDOW_SECONDS:].astype(float) + + pred = predict_by_state(state, ts_recent) + threshold = calc_threshold(pred, abs_threshold, rel_threshold) + + abs_err = np.abs(actual - pred) + inside = abs_err <= threshold + + inside_ratio = float(np.mean(inside)) + + return inside_ratio >= RECOVERY_INSIDE_RATIO_THRESHOLD, inside_ratio + + +def create_initial_state( + ts_grid: np.ndarray, + ys_grid: np.ndarray, + now_sec: int, +) -> Optional[BaselineState]: + if len(ys_grid) < MIN_POINTS: + return None + + period = estimate_period(ys_grid) + + template = build_phase_template( + ts_grid=ts_grid, + ys_grid=ys_grid, + period=period, + max_cycles=MAX_CYCLES_FOR_TEMPLATE, + tail_seconds=period * MAX_CYCLES_FOR_TEMPLATE, + ) + + if template is None: + return None + + return BaselineState( + period=int(period), + template=template.astype(float).tolist(), + status=BASELINE_STATUS_HEALTHY, + clean_seconds=int(period * MAX_CYCLES_FOR_TEMPLATE), + last_update_ts=now_sec, + last_seen_ts=now_sec, + y_min=float(np.min(ys_grid)), + y_max=float(np.max(ys_grid)), + ) + + +def maybe_update_state( + key: str, + ts_grid: np.ndarray, + ys_grid: np.ndarray, + abs_threshold: float, + rel_threshold: float, +) -> Tuple[Optional[BaselineState], bool, float, float, float]: + now_sec = int(time.time()) + + state = BASELINE_STATES.get(key) + + if state is None: + state = create_initial_state(ts_grid, ys_grid, now_sec) + + if state is None: + return None, False, 0.0, 0.0, 0.0 + + BASELINE_STATES[key] = state + logger.info( + "初始化健康模板 key=%s period=%ss clean_seconds=%ss", + key, + state.period, + state.clean_seconds, + ) + return state, False, 0.0, 0.0, 0.0 + + elapsed = max(1, now_sec - int(state.last_seen_ts)) + elapsed = min(elapsed, POLL_INTERVAL * 2) + state.last_seen_ts = now_sec + + is_anom, outside_ratio, mean_abs_err, mean_rel_err = detect_anomaly( + state=state, + ts_grid=ts_grid, + ys_grid=ys_grid, + abs_threshold=abs_threshold, + rel_threshold=rel_threshold, + ) + + if is_anom: + state.status = BASELINE_STATUS_ANOMALY + state.clean_seconds = 0 + + logger.warning( + "检测到异常,冻结模板 key=%s outside_ratio=%.2f mean_abs_err=%.2f mean_rel_err=%.2f", + key, + outside_ratio, + mean_abs_err, + mean_rel_err, + ) + + BASELINE_STATES[key] = state + return state, True, outside_ratio, mean_abs_err, mean_rel_err + + recovered, inside_ratio = is_recovered( + state=state, + ts_grid=ts_grid, + ys_grid=ys_grid, + abs_threshold=abs_threshold, + rel_threshold=rel_threshold, + ) + + if state.status == BASELINE_STATUS_ANOMALY: + if recovered: + state.status = BASELINE_STATUS_RECOVERING + state.clean_seconds = elapsed + logger.info( + "异常开始恢复 key=%s inside_ratio=%.2f clean_seconds=%ss", + key, + inside_ratio, + state.clean_seconds, + ) + else: + state.clean_seconds = 0 + BASELINE_STATES[key] = state + return state, True, outside_ratio, mean_abs_err, mean_rel_err + + elif state.status == BASELINE_STATUS_RECOVERING: + if recovered: + state.clean_seconds += elapsed + else: + state.status = BASELINE_STATUS_ANOMALY + state.clean_seconds = 0 + BASELINE_STATES[key] = state + return state, True, outside_ratio, mean_abs_err, mean_rel_err + + else: + state.status = BASELINE_STATUS_HEALTHY + state.clean_seconds += elapsed + + # 故障恢复后,不要立刻学习。 + # 必须至少连续正常:max(RECOVERY_MIN_SECONDS, 3 个周期) + min_clean_for_update = max( + RECOVERY_MIN_SECONDS, + int(state.period) * MIN_FULL_CYCLES_FOR_TEMPLATE, + ) + + if state.clean_seconds < min_clean_for_update: + BASELINE_STATES[key] = state + return state, False, outside_ratio, mean_abs_err, mean_rel_err + + # 只使用最近 clean_seconds 这段连续正常数据来更新模板,避免历史故障污染。 + new_period = estimate_period(ys_grid) + tail_seconds = min( + int(state.clean_seconds), + int(new_period) * MAX_CYCLES_FOR_TEMPLATE, + ) + + new_template = build_phase_template( + ts_grid=ts_grid, + ys_grid=ys_grid, + period=new_period, + max_cycles=MAX_CYCLES_FOR_TEMPLATE, + tail_seconds=tail_seconds, + ) + + if new_template is None: + BASELINE_STATES[key] = state + return state, False, outside_ratio, mean_abs_err, mean_rel_err + + old_template = np.array(state.template, dtype=float) + + if state.status == BASELINE_STATUS_RECOVERING: + alpha = RECOVERY_EMA_ALPHA + state.status = BASELINE_STATUS_HEALTHY + else: + alpha = HEALTHY_EMA_ALPHA + + merged = merge_template( + old_template=old_template, + new_template=new_template, + alpha=alpha, + ) + + state.period = int(new_period) + state.template = merged.astype(float).tolist() + state.last_update_ts = now_sec + state.y_min = float(np.min(ys_grid[-tail_seconds:])) + state.y_max = float(np.max(ys_grid[-tail_seconds:])) + + BASELINE_STATES[key] = state + + logger.info( + "更新健康模板 key=%s period=%ss status=%s clean_seconds=%ss alpha=%.2f", + key, + state.period, + state.status, + state.clean_seconds, + alpha, + ) + + return state, False, outside_ratio, mean_abs_err, mean_rel_err + + +# ── Prometheus 格式写入 ─────────────────────────────────────────────────────── + +def prom_escape_label_value(value: str) -> str: + return ( + str(value) + .replace("\\", "\\\\") + .replace("\n", "\\n") + .replace('"', '\\"') + ) + + +def labels_to_str(labels: Dict[str, str]) -> str: + if not labels: + return "" + + parts = [] + + for k in sorted(labels.keys()): + v = prom_escape_label_value(labels[k]) + parts.append(f'{k}="{v}"') + + return "{" + ",".join(parts) + "}" + + +def write_series( + metric_name: str, + labels: Dict[str, str], + ts_list: List[int], + values: List[float], +) -> bool: + if not ts_list or not values or len(ts_list) != len(values): + return False + + label_str = labels_to_str(labels) + lines = [] + + for t, y in zip(ts_list, values): + try: + ts_sec = int(round(float(t))) + val = float(y) + except Exception: + continue + + if not math.isfinite(ts_sec) or not math.isfinite(val): + continue + + ts_ms = ts_sec * 1000 + lines.append(f"{metric_name}{label_str} {val:.6f} {ts_ms}") + + if not lines: + return False + + payload = "\n".join(lines) + "\n" + + try: + resp = requests.post( + f"{VM_URL}/api/v1/import/prometheus", + data=payload.encode("utf-8"), + headers={"Content-Type": "text/plain; version=0.0.4; charset=utf-8"}, + timeout=10, + ) + resp.raise_for_status() + return True + except requests.RequestException as e: + logger.error("写入数据失败 metric=%s: %s", metric_name, e) + return False + + +def write_prediction_bundle( + pred_metric: str, + anomaly_metric: str, + labels: Dict[str, str], + ts_future: List[int], + pred_values: np.ndarray, + lower_values: np.ndarray, + upper_values: np.ndarray, + is_anomaly: bool, + outside_ratio: float, + mean_abs_err: float, + mean_rel_err: float, +) -> bool: + ok1 = write_series( + metric_name=pred_metric, + labels=labels, + ts_list=ts_future, + values=pred_values.astype(float).tolist(), + ) + + ok2 = write_series( + metric_name=f"{pred_metric}_lower", + labels=labels, + ts_list=ts_future, + values=lower_values.astype(float).tolist(), + ) + + ok3 = write_series( + metric_name=f"{pred_metric}_upper", + labels=labels, + ts_list=ts_future, + values=upper_values.astype(float).tolist(), + ) + + now_sec = int(time.time()) + + anomaly_labels = dict(labels) + anomaly_labels["type"] = "prediction_deviation" + + ok4 = write_series( + metric_name=anomaly_metric, + labels=anomaly_labels, + ts_list=[now_sec], + values=[1.0 if is_anomaly else 0.0], + ) + + ok5 = write_series( + metric_name=f"{anomaly_metric}_outside_ratio", + labels=anomaly_labels, + ts_list=[now_sec], + values=[outside_ratio], + ) + + ok6 = write_series( + metric_name=f"{anomaly_metric}_mean_abs_error", + labels=anomaly_labels, + ts_list=[now_sec], + values=[mean_abs_err], + ) + + ok7 = write_series( + metric_name=f"{anomaly_metric}_mean_rel_error", + labels=anomaly_labels, + ts_list=[now_sec], + values=[mean_rel_err], + ) + + return ok1 and ok2 and ok3 and ok4 and ok5 and ok6 and ok7 + + +# ── 标签解析 ────────────────────────────────────────────────────────────────── + +_LABEL_PATTERN = re.compile( + r'\s*([a-zA-Z_][a-zA-Z0-9_]*)\s*=\s*"((?:\\.|[^"])*)"\s*' +) + + +def _parse_labels(query: str) -> Dict[str, str]: + labels = {} + + if "{" not in query or "}" not in query: + return labels + + try: + label_part = query[query.index("{") + 1: query.rindex("}")] + except Exception: + return labels + + for match in _LABEL_PATTERN.finditer(label_part): + key = match.group(1) + value = match.group(2) + value = value.replace('\\"', '"').replace("\\n", "\n").replace("\\\\", "\\") + labels[key] = value + + return labels + + +def merge_labels(*dicts: Dict[str, str]) -> Dict[str, str]: + result = {} + + for d in dicts: + if d: + result.update(d) + + return result + + +def series_key(metric_name: str, labels: Dict[str, str]) -> str: + return metric_name + labels_to_str(labels) + + +# ── 状态持久化 ──────────────────────────────────────────────────────────────── + +def load_state(): + global BASELINE_STATES + + if not os.path.exists(STATE_FILE): + return + + try: + with open(STATE_FILE, "r", encoding="utf-8") as f: + raw = json.load(f) + + states = {} + + for key, value in raw.get("baseline_states", {}).items(): + states[key] = BaselineState(**value) + + BASELINE_STATES = states + + logger.info("已加载预测状态文件 %s,状态数量=%d", STATE_FILE, len(BASELINE_STATES)) + + except Exception as e: + logger.warning("加载预测状态文件失败,将重新学习: %s", e) + + +def save_state(): + try: + raw = { + "baseline_states": { + key: asdict(value) + for key, value in BASELINE_STATES.items() + } + } + + tmp_file = STATE_FILE + ".tmp" + + with open(tmp_file, "w", encoding="utf-8") as f: + json.dump(raw, f, ensure_ascii=False, indent=2) + + os.replace(tmp_file, STATE_FILE) + + except Exception as e: + logger.warning("保存预测状态文件失败: %s", e) + + +# ── 主逻辑 ──────────────────────────────────────────────────────────────────── + +def run_once(): + now_str = datetime.now().strftime("%H:%M:%S") + + for target in PREDICT_TARGETS: + query = target["query"] + pred_metric = target["pred_metric"] + anomaly_metric = target["anomaly_metric"] + abs_threshold = float(target["abs_threshold"]) + rel_threshold = float(target["rel_threshold"]) + + ts, ys = fetch_history(query) + + if len(ys) < MIN_POINTS: + logger.info("[%s] %s 数据不足(%d 点),跳过", now_str, query, len(ys)) + continue + + ts_grid, ys_grid = normalize_history(ts, ys) + + if len(ys_grid) < MIN_POINTS: + logger.info("[%s] %s 清洗后数据不足(%d 点),跳过", now_str, query, len(ys_grid)) + continue + + base_labels = _parse_labels(query) + write_labels = merge_labels(base_labels, EXTRA_PREDICT_LABELS) + + key = series_key(pred_metric, write_labels) + + state, is_anomaly, outside_ratio, mean_abs_err, mean_rel_err = maybe_update_state( + key=key, + ts_grid=ts_grid, + ys_grid=ys_grid, + abs_threshold=abs_threshold, + rel_threshold=rel_threshold, + ) + + if state is None: + logger.info("[%s] %s 暂无可用健康模板,等待学习", now_str, query) + continue + + now_sec = int(time.time()) + last_until = LAST_WRITTEN_UNTIL.get(key, 0) + last_real_ts = int(ts_grid[-1]) + + base_ts = max(now_sec, last_until, last_real_ts) + + ts_future = [ + base_ts + i + 1 + for i in range(WRITE_HORIZON_SECONDS) + ] + + pred_values = predict_by_state(state, ts_future) + + lower_values, upper_values = calc_bounds( + pred=pred_values, + abs_threshold=abs_threshold, + rel_threshold=rel_threshold, + ) + + ok = write_prediction_bundle( + pred_metric=pred_metric, + anomaly_metric=anomaly_metric, + labels=write_labels, + ts_future=ts_future, + pred_values=pred_values, + lower_values=lower_values, + upper_values=upper_values, + is_anomaly=is_anomaly, + outside_ratio=outside_ratio, + mean_abs_err=mean_abs_err, + mean_rel_err=mean_rel_err, + ) + + if not ok: + continue + + LAST_WRITTEN_UNTIL[key] = int(max(ts_future)) + + future_start = datetime.fromtimestamp(ts_future[0]).strftime("%H:%M:%S") + future_end = datetime.fromtimestamp(ts_future[-1]).strftime("%H:%M:%S") + + logger.info( + "[%s] %-40s → %-35s status=%s anomaly=%s period=%ss clean=%ss 写入 %d 点,预测区间 %s ~ %s", + now_str, + query, + pred_metric, + state.status, + is_anomaly, + state.period, + state.clean_seconds, + len(ts_future), + future_start, + future_end, + ) + + save_state() + + +def main(): + load_state() + + logger.info( + "预测服务启动 VM=%s 历史窗口=%dmin 理论预测窗口=%ds 实际写入窗口=%ds 轮询间隔=%ds state=%s", + VM_URL, + HISTORY_MINUTES, + HORIZON_SECONDS, + WRITE_HORIZON_SECONDS, + POLL_INTERVAL, + STATE_FILE, + ) + + while True: + run_once() + time.sleep(POLL_INTERVAL) + + +if __name__ == "__main__": + main() \ No newline at end of file From 54c4b851a004567078cfec337933aafcbd676b44 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=BC=A0=E5=B0=91=E6=99=BA?= Date: Thu, 21 May 2026 13:39:33 +0800 Subject: [PATCH 23/55] fix --- ai/predict_v3_single_scene.py | 701 +++++++++++++++++++++++----------- 1 file changed, 488 insertions(+), 213 deletions(-) mode change 100644 => 100755 ai/predict_v3_single_scene.py diff --git a/ai/predict_v3_single_scene.py b/ai/predict_v3_single_scene.py old mode 100644 new mode 100755 index 23af8c5..fc07f4f --- a/ai/predict_v3_single_scene.py +++ b/ai/predict_v3_single_scene.py @@ -1,18 +1,22 @@ # -*- coding: utf-8 -*- """ -ProtoForge 预测服务 v6 - -核心能力: -1. 周期模板预测:适合 CNC 这类强周期、非标准正弦波形。 -2. 健康基线冻结:检测到异常后,不再用故障数据更新预测模板。 -3. 恢复冷却机制:故障恢复后,需要连续稳定多个周期,才恢复学习。 -4. 预测上下界:写入 predicted_upper / predicted_lower,方便 Grafana 展示预测带。 -5. 异常标记:写入 xxx_anomaly,1 表示异常,0 表示正常。 -6. 不删除历史预测,不使用 delete_series。 -""" - -""" -场景:不考虑物料、不考虑跨程序场景算法预测 +ProtoForge Predictor v8 + +功能: +1. 从 VictoriaMetrics 拉取历史数据。 +2. 对 CNC 周期型指标进行相位对齐预测。 +3. 使用“谷底锚点”对齐周期,减少上升沿/下降沿相位偏差。 +4. 每轮只写入未来 min(HORIZON_SECONDS, POLL_INTERVAL) 秒,避免预测窗口重叠。 +5. 检测异常后冻结健康模板,不把故障数据学进去。 +6. 故障恢复后等待稳定一段时间,再恢复模板更新。 +7. 写入: + - xxx_predicted + - xxx_predicted_upper + - xxx_predicted_lower + - xxx_anomaly + - xxx_anomaly_outside_ratio + - xxx_anomaly_mean_abs_error + - xxx_anomaly_mean_rel_error """ import json @@ -29,6 +33,10 @@ import requests +# ============================================================================= +# 日志配置 +# ============================================================================= + logging.basicConfig( level=logging.INFO, format="%(asctime)s [%(levelname)s] %(message)s", @@ -37,18 +45,19 @@ logger = logging.getLogger(__name__) -# ── 基础配置 ────────────────────────────────────────────────────────────────── +# ============================================================================= +# 基础配置 +# ============================================================================= VM_URL = "http://localhost:8428" -STATE_FILE = "/tmp/protoforge_predictor_state.json" +STATE_FILE = "/tmp/protoforge_predictor_state_v8.json" HISTORY_MINUTES = 30 HORIZON_SECONDS = 120 POLL_INTERVAL = 30 -# 实际每轮写入未来多少秒。 -# 不要大于 POLL_INTERVAL,否则多轮预测会重叠。 +# 实际写入窗口不要大于轮询间隔,否则多轮预测会重叠。 WRITE_HORIZON_SECONDS = min(HORIZON_SECONDS, POLL_INTERVAL) QUERY_STEP = "1s" @@ -57,34 +66,25 @@ MIN_PERIOD_SECONDS = 5 MAX_PERIOD_SECONDS = 3600 -# 至少多少个完整周期才允许构建健康模板 MIN_FULL_CYCLES_FOR_TEMPLATE = 3 - -# 构建模板最多使用最近多少个周期 MAX_CYCLES_FOR_TEMPLATE = 6 -# 检测异常使用最近多少秒实际数据 DETECT_WINDOW_SECONDS = 15 - -# 恢复后,至少连续正常多少秒才考虑恢复学习 RECOVERY_MIN_SECONDS = 60 -# 健康状态下模板更新速度,越小越保守 -HEALTHY_EMA_ALPHA = 0.15 - -# 故障恢复后第一次重新学习时的更新速度 -RECOVERY_EMA_ALPHA = 0.35 +HEALTHY_EMA_ALPHA = 0.12 +RECOVERY_EMA_ALPHA = 0.30 -# 最近窗口里有多少比例的点超过阈值,才认为异常 OUTSIDE_RATIO_THRESHOLD = 0.60 - -# 最近窗口里有多少比例的点回到阈值内,才认为恢复正常 RECOVERY_INSIDE_RATIO_THRESHOLD = 0.80 +PHASE_SEARCH_RATIO = 0.15 +VALLEY_QUANTILE = 45 + -# ── 指标配置 ────────────────────────────────────────────────────────────────── -# abs_threshold / rel_threshold 需要按指标单位调。 -# feed_rate 单位 mm/min,这里先给 400 和 25%。 +# ============================================================================= +# 预测指标配置 +# ============================================================================= PREDICT_TARGETS = [ { @@ -132,21 +132,23 @@ ] EXTRA_PREDICT_LABELS = { - "forecast": "health_gated_v1", + "forecast": "phase_aligned_health_v8", "source": "protoforge", } BASELINE_STATUS_HEALTHY = "healthy" BASELINE_STATUS_ANOMALY = "anomaly" BASELINE_STATUS_RECOVERING = "recovering" -BASELINE_STATUS_LEARNING = "learning" -# ── 状态结构 ────────────────────────────────────────────────────────────────── +# ============================================================================= +# 状态结构 +# ============================================================================= @dataclass class BaselineState: period: int + phase_origin_ts: int template: List[float] status: str clean_seconds: int @@ -160,7 +162,9 @@ class BaselineState: LAST_WRITTEN_UNTIL: Dict[str, int] = {} -# ── VM 读取 ─────────────────────────────────────────────────────────────────── +# ============================================================================= +# VictoriaMetrics 读取 +# ============================================================================= def fetch_history(query: str, minutes: int = HISTORY_MINUTES) -> Tuple[List[float], List[float]]: now = datetime.now() @@ -258,7 +262,25 @@ def normalize_history(ts: List[float], ys: List[float]) -> Tuple[np.ndarray, np. return ts_grid, ys_grid -# ── 周期估计 ────────────────────────────────────────────────────────────────── +# ============================================================================= +# 周期估计 +# ============================================================================= + +def moving_average(arr: np.ndarray, window: int) -> np.ndarray: + if window <= 1 or len(arr) < window: + return arr.astype(float) + + window = int(window) + + if window % 2 == 0: + window += 1 + + kernel = np.ones(window, dtype=float) / window + pad = window // 2 + padded = np.pad(arr.astype(float), (pad, pad), mode="edge") + + return np.convolve(padded, kernel, mode="valid") + def estimate_period_by_fft(ys_arr: np.ndarray) -> float: n = len(ys_arr) @@ -307,7 +329,7 @@ def refine_period_by_autocorr(ys_arr: np.ndarray, init_period: float) -> float: corr = np.correlate(centered, centered, mode="full")[n - 1:] p0 = int(round(init_period)) - left = max(MIN_PERIOD_SECONDS, int(max(2, p0 * 0.7))) + left = max(int(MIN_PERIOD_SECONDS), int(max(2, p0 * 0.7))) right = min(n // 2, int(max(left + 1, p0 * 1.3))) if right <= left: @@ -323,96 +345,252 @@ def refine_period_by_autocorr(ys_arr: np.ndarray, init_period: float) -> float: return float(np.clip(best_lag, MIN_PERIOD_SECONDS, MAX_PERIOD_SECONDS)) -def estimate_period(ys_arr: np.ndarray) -> int: +def estimate_period_rough(ys_arr: np.ndarray) -> int: p_fft = estimate_period_by_fft(ys_arr) p_refined = refine_period_by_autocorr(ys_arr, p_fft) period = int(round(p_refined)) - period = max(MIN_PERIOD_SECONDS, min(MAX_PERIOD_SECONDS, period)) + period = max(int(MIN_PERIOD_SECONDS), min(int(MAX_PERIOD_SECONDS), period)) return int(period) -# ── 模板构建与预测 ───────────────────────────────────────────────────────────── +# ============================================================================= +# 谷底锚点检测 +# ============================================================================= -def fill_template_nan(template: np.ndarray) -> np.ndarray: - period = len(template) +def find_valley_indices( + ts_grid: np.ndarray, + ys_grid: np.ndarray, + expected_period: int, +) -> List[int]: + n = len(ys_grid) - if period == 0: - return template + if n < max(10, expected_period * 2): + return [] + + period = max(3, int(expected_period)) + + smooth_window = max(3, int(round(period * 0.08))) + smooth_window = min(smooth_window, 21) + + ys_smooth = moving_average(ys_grid, smooth_window) + threshold = float(np.percentile(ys_smooth, VALLEY_QUANTILE)) + + candidates = [] + + for i in range(1, n - 1): + if ( + ys_smooth[i] <= ys_smooth[i - 1] + and ys_smooth[i] < ys_smooth[i + 1] + and ys_smooth[i] <= threshold + ): + candidates.append(i) + + if len(candidates) < MIN_FULL_CYCLES_FOR_TEMPLATE: + candidates = [] + + for i in range(1, n - 1): + if ys_smooth[i] <= ys_smooth[i - 1] and ys_smooth[i] < ys_smooth[i + 1]: + candidates.append(i) + + if not candidates: + return [] - idx = np.arange(period) - valid = np.isfinite(template) + min_distance = max(2, int(round(period * 0.55))) + selected = [] - if not np.any(valid): - return np.zeros(period, dtype=float) + for idx in candidates: + if not selected: + selected.append(idx) + continue + + if idx - selected[-1] >= min_distance: + selected.append(idx) + continue + + if ys_smooth[idx] < ys_smooth[selected[-1]]: + selected[-1] = idx - if np.all(valid): - return template + if len(selected) < 2: + return selected - x_valid = idx[valid] - y_valid = template[valid] + cleaned = [selected[0]] + + for idx in selected[1:]: + diff = int(ts_grid[idx] - ts_grid[cleaned[-1]]) + + if int(period * 0.55) <= diff <= int(period * 1.60): + cleaned.append(idx) + continue - # 环形插值,处理 phase 0 附近缺口 - x_ext = np.concatenate([x_valid - period, x_valid, x_valid + period]) - y_ext = np.concatenate([y_valid, y_valid, y_valid]) + if diff < int(period * 0.55): + if ys_smooth[idx] < ys_smooth[cleaned[-1]]: + cleaned[-1] = idx + continue - filled = np.interp(idx, x_ext, y_ext) + cleaned.append(idx) - return filled.astype(float) + return cleaned -def build_phase_template( +def detect_period_and_valleys( + ts_grid: np.ndarray, + ys_grid: np.ndarray, +) -> Tuple[int, List[int]]: + rough = estimate_period_rough(ys_grid) + valleys = find_valley_indices(ts_grid, ys_grid, rough) + + if len(valleys) >= 3: + diffs = np.diff(ts_grid[valleys]) + good = diffs[(diffs >= rough * 0.55) & (diffs <= rough * 1.60)] + + if len(good) > 0: + period = int(round(float(np.median(good)))) + else: + period = rough + else: + period = rough + + period = max(int(MIN_PERIOD_SECONDS), min(int(MAX_PERIOD_SECONDS), period)) + + return int(period), valleys + + +# ============================================================================= +# 相位对齐模板构建 +# ============================================================================= + +def build_template_from_valleys( ts_grid: np.ndarray, ys_grid: np.ndarray, period: int, + valleys: List[int], max_cycles: int = MAX_CYCLES_FOR_TEMPLATE, - tail_seconds: Optional[int] = None, ) -> Optional[np.ndarray]: - if period <= 1 or len(ys_grid) < period * MIN_FULL_CYCLES_FOR_TEMPLATE: + if period <= 1 or len(valleys) < MIN_FULL_CYCLES_FOR_TEMPLATE + 1: return None - max_seconds = period * max_cycles + pairs = [] + + for a, b in zip(valleys[:-1], valleys[1:]): + cycle_len = float(ts_grid[b] - ts_grid[a]) - if tail_seconds is not None: - max_seconds = min(max_seconds, int(tail_seconds)) + if period * 0.55 <= cycle_len <= period * 1.60: + pairs.append((a, b, cycle_len)) - max_seconds = max(period * MIN_FULL_CYCLES_FOR_TEMPLATE, max_seconds) + if len(pairs) < MIN_FULL_CYCLES_FOR_TEMPLATE: + return None + + pairs = pairs[-max_cycles:] + + phase_grid = np.arange(period, dtype=float) + segments = [] + weights = [] + + for idx, (a, b, cycle_len) in enumerate(pairs): + seg_ts = ts_grid[a:b + 1] + seg_y = ys_grid[a:b + 1] + + if len(seg_y) < 3: + continue - if len(ys_grid) < max_seconds: - start_idx = 0 + x_old = (seg_ts - seg_ts[0]) / cycle_len * period + seg = np.interp(phase_grid, x_old, seg_y) + + segments.append(seg.astype(float)) + + weight = 0.5 + 0.5 * ((idx + 1) / len(pairs)) + weights.append(weight) + + if len(segments) < MIN_FULL_CYCLES_FOR_TEMPLATE: + return None + + arr = np.vstack(segments) + w_arr = np.array(weights, dtype=float) + + template = np.average(arr, axis=0, weights=w_arr) + + return template.astype(float) + + +def build_current_baseline( + ts_grid: np.ndarray, + ys_grid: np.ndarray, + tail_seconds: Optional[int] = None, +) -> Optional[Tuple[int, int, np.ndarray]]: + if len(ys_grid) < MIN_POINTS: + return None + + if tail_seconds is not None and tail_seconds > 0: + cutoff = ts_grid[-1] - int(tail_seconds) + mask = ts_grid >= cutoff + ts_use = ts_grid[mask] + ys_use = ys_grid[mask] else: - start_idx = len(ys_grid) - max_seconds + ts_use = ts_grid + ys_use = ys_grid + + if len(ys_use) < MIN_POINTS: + return None - ts_tail = ts_grid[start_idx:] - ys_tail = ys_grid[start_idx:] + period, valleys = detect_period_and_valleys(ts_use, ys_use) - if len(ys_tail) < period * MIN_FULL_CYCLES_FOR_TEMPLATE: + template = build_template_from_valleys( + ts_grid=ts_use, + ys_grid=ys_use, + period=period, + valleys=valleys, + ) + + if template is None or len(valleys) == 0: return None - sums = np.zeros(period, dtype=float) - weights = np.zeros(period, dtype=float) + phase_origin_ts = int(round(float(ts_use[valleys[-1]]))) - total = len(ys_tail) + return int(period), phase_origin_ts, template - for i, (t, y) in enumerate(zip(ts_tail, ys_tail)): - phase = int(t) % period - # 越近的数据权重越高 - recency = (i + 1) / total - weight = 0.3 + 0.7 * recency +# ============================================================================= +# 模板预测 +# ============================================================================= - sums[phase] += float(y) * weight - weights[phase] += weight +def circular_template_value(template: np.ndarray, phase: float) -> float: + period = len(template) - template = np.full(period, np.nan, dtype=float) + if period == 0: + return 0.0 + + phase = float(phase) % period - valid = weights > 0 - template[valid] = sums[valid] / weights[valid] + i0 = int(math.floor(phase)) % period + i1 = (i0 + 1) % period - template = fill_template_nan(template) + frac = phase - math.floor(phase) - return template + return float((1.0 - frac) * template[i0] + frac * template[i1]) + + +def predict_with_origin( + state: BaselineState, + ts_list: List[int], + phase_origin_ts: Optional[int] = None, +) -> np.ndarray: + template = np.array(state.template, dtype=float) + period = int(state.period) + + if period <= 1 or len(template) != period: + return np.zeros(len(ts_list), dtype=float) + + origin = int(state.phase_origin_ts if phase_origin_ts is None else phase_origin_ts) + + values = [] + + for ts in ts_list: + phase = (int(ts) - origin) % period + values.append(circular_template_value(template, phase)) + + return np.array(values, dtype=float) def resample_template(old_template: np.ndarray, new_period: int) -> np.ndarray: @@ -433,6 +611,38 @@ def resample_template(old_template: np.ndarray, new_period: int) -> np.ndarray: return np.interp(new_x, old_x_ext, old_y_ext).astype(float) +def align_new_template_to_old( + old_template: np.ndarray, + new_template: np.ndarray, +) -> np.ndarray: + if len(old_template) != len(new_template): + old_template = resample_template(old_template, len(new_template)) + + period = len(new_template) + + if period <= 2: + return new_template.astype(float) + + max_shift = max(1, int(round(period * 0.10))) + + old_norm = old_template - np.mean(old_template) + + best_score = None + best_template = new_template + + for shift in range(-max_shift, max_shift + 1): + shifted = np.roll(new_template, shift) + shifted_norm = shifted - np.mean(shifted) + + score = float(np.dot(old_norm, shifted_norm)) + + if best_score is None or score > best_score: + best_score = score + best_template = shifted + + return best_template.astype(float) + + def merge_template( old_template: np.ndarray, new_template: np.ndarray, @@ -443,37 +653,64 @@ def merge_template( if len(old_template) != len(new_template): old_template = resample_template(old_template, len(new_template)) - return ((1.0 - alpha) * old_template + alpha * new_template).astype(float) - - -def predict_by_state(state: BaselineState, ts_list: List[int]) -> np.ndarray: - template = np.array(state.template, dtype=float) - period = int(state.period) - - if period <= 1 or len(template) != period: - return np.zeros(len(ts_list), dtype=float) + new_template = align_new_template_to_old(old_template, new_template) - values = [] + merged = (1.0 - alpha) * old_template + alpha * new_template - for ts in ts_list: - phase = int(ts) % period - values.append(float(template[phase])) + return merged.astype(float) - return np.array(values, dtype=float) +# ============================================================================= +# 异常检测 +# ============================================================================= -def calc_threshold(pred: np.ndarray, abs_threshold: float, rel_threshold: float) -> np.ndarray: +def calc_threshold( + pred: np.ndarray, + abs_threshold: float, + rel_threshold: float, +) -> np.ndarray: return np.maximum(abs_threshold, np.abs(pred) * rel_threshold) -def calc_bounds(pred: np.ndarray, abs_threshold: float, rel_threshold: float) -> Tuple[np.ndarray, np.ndarray]: +def calc_bounds( + pred: np.ndarray, + abs_threshold: float, + rel_threshold: float, +) -> Tuple[np.ndarray, np.ndarray]: threshold = calc_threshold(pred, abs_threshold, rel_threshold) + lower = pred - threshold upper = pred + threshold + return lower, upper -# ── 异常检测与状态更新 ──────────────────────────────────────────────────────── +def find_best_phase_origin_for_recent( + state: BaselineState, + ts_recent: List[int], + actual: np.ndarray, +) -> Tuple[int, np.ndarray, float]: + period = int(state.period) + base_origin = int(state.phase_origin_ts) + + max_shift = max(1, int(round(period * PHASE_SEARCH_RATIO))) + + best_origin = base_origin + best_pred = predict_with_origin(state, ts_recent, base_origin) + best_mae = float(np.mean(np.abs(actual - best_pred))) + + for shift in range(-max_shift, max_shift + 1): + origin = base_origin + shift + pred = predict_with_origin(state, ts_recent, origin) + mae = float(np.mean(np.abs(actual - pred))) + + if mae < best_mae: + best_mae = mae + best_origin = origin + best_pred = pred + + return best_origin, best_pred, best_mae + def detect_anomaly( state: BaselineState, @@ -481,14 +718,19 @@ def detect_anomaly( ys_grid: np.ndarray, abs_threshold: float, rel_threshold: float, -) -> Tuple[bool, float, float, float]: +) -> Tuple[bool, float, float, float, int]: if len(ys_grid) < DETECT_WINDOW_SECONDS: - return False, 0.0, 0.0, 0.0 + return False, 0.0, 0.0, 0.0, int(state.phase_origin_ts) ts_recent = ts_grid[-DETECT_WINDOW_SECONDS:].astype(int).tolist() actual = ys_grid[-DETECT_WINDOW_SECONDS:].astype(float) - pred = predict_by_state(state, ts_recent) + best_origin, pred, _ = find_best_phase_origin_for_recent( + state=state, + ts_recent=ts_recent, + actual=actual, + ) + threshold = calc_threshold(pred, abs_threshold, rel_threshold) abs_err = np.abs(actual - pred) @@ -500,56 +742,28 @@ def detect_anomaly( is_anomaly = outside_ratio >= OUTSIDE_RATIO_THRESHOLD - return is_anomaly, outside_ratio, mean_abs_err, mean_rel_err - - -def is_recovered( - state: BaselineState, - ts_grid: np.ndarray, - ys_grid: np.ndarray, - abs_threshold: float, - rel_threshold: float, -) -> Tuple[bool, float]: - if len(ys_grid) < DETECT_WINDOW_SECONDS: - return False, 0.0 - - ts_recent = ts_grid[-DETECT_WINDOW_SECONDS:].astype(int).tolist() - actual = ys_grid[-DETECT_WINDOW_SECONDS:].astype(float) - - pred = predict_by_state(state, ts_recent) - threshold = calc_threshold(pred, abs_threshold, rel_threshold) - - abs_err = np.abs(actual - pred) - inside = abs_err <= threshold - - inside_ratio = float(np.mean(inside)) + return is_anomaly, outside_ratio, mean_abs_err, mean_rel_err, int(best_origin) - return inside_ratio >= RECOVERY_INSIDE_RATIO_THRESHOLD, inside_ratio +# ============================================================================= +# 健康基线状态管理 +# ============================================================================= def create_initial_state( ts_grid: np.ndarray, ys_grid: np.ndarray, now_sec: int, ) -> Optional[BaselineState]: - if len(ys_grid) < MIN_POINTS: - return None - - period = estimate_period(ys_grid) - - template = build_phase_template( - ts_grid=ts_grid, - ys_grid=ys_grid, - period=period, - max_cycles=MAX_CYCLES_FOR_TEMPLATE, - tail_seconds=period * MAX_CYCLES_FOR_TEMPLATE, - ) + baseline = build_current_baseline(ts_grid, ys_grid) - if template is None: + if baseline is None: return None + period, phase_origin_ts, template = baseline + return BaselineState( period=int(period), + phase_origin_ts=int(phase_origin_ts), template=template.astype(float).tolist(), status=BASELINE_STATUS_HEALTHY, clean_seconds=int(period * MAX_CYCLES_FOR_TEMPLATE), @@ -578,19 +792,23 @@ def maybe_update_state( return None, False, 0.0, 0.0, 0.0 BASELINE_STATES[key] = state + logger.info( - "初始化健康模板 key=%s period=%ss clean_seconds=%ss", + "初始化健康模板 key=%s period=%ss origin=%s clean=%ss", key, state.period, + datetime.fromtimestamp(state.phase_origin_ts).strftime("%H:%M:%S"), state.clean_seconds, ) + return state, False, 0.0, 0.0, 0.0 elapsed = max(1, now_sec - int(state.last_seen_ts)) elapsed = min(elapsed, POLL_INTERVAL * 2) + state.last_seen_ts = now_sec - is_anom, outside_ratio, mean_abs_err, mean_rel_err = detect_anomaly( + is_anom, outside_ratio, mean_abs_err, mean_rel_err, best_origin = detect_anomaly( state=state, ts_grid=ts_grid, ys_grid=ys_grid, @@ -602,6 +820,8 @@ def maybe_update_state( state.status = BASELINE_STATUS_ANOMALY state.clean_seconds = 0 + BASELINE_STATES[key] = state + logger.warning( "检测到异常,冻结模板 key=%s outside_ratio=%.2f mean_abs_err=%.2f mean_rel_err=%.2f", key, @@ -610,47 +830,39 @@ def maybe_update_state( mean_rel_err, ) - BASELINE_STATES[key] = state return state, True, outside_ratio, mean_abs_err, mean_rel_err - recovered, inside_ratio = is_recovered( - state=state, - ts_grid=ts_grid, - ys_grid=ys_grid, - abs_threshold=abs_threshold, - rel_threshold=rel_threshold, - ) + old_origin = int(state.phase_origin_ts) + state.phase_origin_ts = int(best_origin) + + if abs(state.phase_origin_ts - old_origin) >= 1: + logger.debug( + "相位校正 key=%s origin %s -> %s", + key, + datetime.fromtimestamp(old_origin).strftime("%H:%M:%S"), + datetime.fromtimestamp(state.phase_origin_ts).strftime("%H:%M:%S"), + ) if state.status == BASELINE_STATUS_ANOMALY: - if recovered: - state.status = BASELINE_STATUS_RECOVERING - state.clean_seconds = elapsed - logger.info( - "异常开始恢复 key=%s inside_ratio=%.2f clean_seconds=%ss", - key, - inside_ratio, - state.clean_seconds, - ) - else: - state.clean_seconds = 0 - BASELINE_STATES[key] = state - return state, True, outside_ratio, mean_abs_err, mean_rel_err + state.status = BASELINE_STATUS_RECOVERING + state.clean_seconds = elapsed - elif state.status == BASELINE_STATUS_RECOVERING: - if recovered: - state.clean_seconds += elapsed - else: - state.status = BASELINE_STATUS_ANOMALY - state.clean_seconds = 0 - BASELINE_STATES[key] = state - return state, True, outside_ratio, mean_abs_err, mean_rel_err + BASELINE_STATES[key] = state + logger.info( + "异常开始恢复 key=%s clean_seconds=%ss", + key, + state.clean_seconds, + ) + + return state, False, outside_ratio, mean_abs_err, mean_rel_err + + if state.status == BASELINE_STATUS_RECOVERING: + state.clean_seconds += elapsed else: state.status = BASELINE_STATUS_HEALTHY state.clean_seconds += elapsed - # 故障恢复后,不要立刻学习。 - # 必须至少连续正常:max(RECOVERY_MIN_SECONDS, 3 个周期) min_clean_for_update = max( RECOVERY_MIN_SECONDS, int(state.period) * MIN_FULL_CYCLES_FOR_TEMPLATE, @@ -660,30 +872,26 @@ def maybe_update_state( BASELINE_STATES[key] = state return state, False, outside_ratio, mean_abs_err, mean_rel_err - # 只使用最近 clean_seconds 这段连续正常数据来更新模板,避免历史故障污染。 - new_period = estimate_period(ys_grid) tail_seconds = min( int(state.clean_seconds), - int(new_period) * MAX_CYCLES_FOR_TEMPLATE, + int(state.period) * MAX_CYCLES_FOR_TEMPLATE, ) - new_template = build_phase_template( + baseline = build_current_baseline( ts_grid=ts_grid, ys_grid=ys_grid, - period=new_period, - max_cycles=MAX_CYCLES_FOR_TEMPLATE, tail_seconds=tail_seconds, ) - if new_template is None: + if baseline is None: BASELINE_STATES[key] = state return state, False, outside_ratio, mean_abs_err, mean_rel_err + new_period, new_origin, new_template = baseline old_template = np.array(state.template, dtype=float) if state.status == BASELINE_STATUS_RECOVERING: alpha = RECOVERY_EMA_ALPHA - state.status = BASELINE_STATUS_HEALTHY else: alpha = HEALTHY_EMA_ALPHA @@ -694,18 +902,25 @@ def maybe_update_state( ) state.period = int(new_period) + state.phase_origin_ts = int(new_origin) state.template = merged.astype(float).tolist() + state.status = BASELINE_STATUS_HEALTHY state.last_update_ts = now_sec - state.y_min = float(np.min(ys_grid[-tail_seconds:])) - state.y_max = float(np.max(ys_grid[-tail_seconds:])) + + if tail_seconds > 0 and len(ys_grid) >= tail_seconds: + state.y_min = float(np.min(ys_grid[-tail_seconds:])) + state.y_max = float(np.max(ys_grid[-tail_seconds:])) + else: + state.y_min = float(np.min(ys_grid)) + state.y_max = float(np.max(ys_grid)) BASELINE_STATES[key] = state logger.info( - "更新健康模板 key=%s period=%ss status=%s clean_seconds=%ss alpha=%.2f", + "更新健康模板 key=%s period=%ss origin=%s clean=%ss alpha=%.2f", key, state.period, - state.status, + datetime.fromtimestamp(state.phase_origin_ts).strftime("%H:%M:%S"), state.clean_seconds, alpha, ) @@ -713,7 +928,9 @@ def maybe_update_state( return state, False, outside_ratio, mean_abs_err, mean_rel_err -# ── Prometheus 格式写入 ─────────────────────────────────────────────────────── +# ============================================================================= +# Prometheus Exposition 写入 +# ============================================================================= def prom_escape_label_value(value: str) -> str: return ( @@ -731,8 +948,7 @@ def labels_to_str(labels: Dict[str, str]) -> str: parts = [] for k in sorted(labels.keys()): - v = prom_escape_label_value(labels[k]) - parts.append(f'{k}="{v}"') + parts.append(f'{k}="{prom_escape_label_value(labels[k])}"') return "{" + ",".join(parts) + "}" @@ -771,11 +987,14 @@ def write_series( resp = requests.post( f"{VM_URL}/api/v1/import/prometheus", data=payload.encode("utf-8"), - headers={"Content-Type": "text/plain; version=0.0.4; charset=utf-8"}, + headers={ + "Content-Type": "text/plain; version=0.0.4; charset=utf-8", + }, timeout=10, ) resp.raise_for_status() return True + except requests.RequestException as e: logger.error("写入数据失败 metric=%s: %s", metric_name, e) return False @@ -851,28 +1070,37 @@ def write_prediction_bundle( return ok1 and ok2 and ok3 and ok4 and ok5 and ok6 and ok7 -# ── 标签解析 ────────────────────────────────────────────────────────────────── +# ============================================================================= +# 标签解析 +# ============================================================================= _LABEL_PATTERN = re.compile( r'\s*([a-zA-Z_][a-zA-Z0-9_]*)\s*=\s*"((?:\\.|[^"])*)"\s*' ) -def _parse_labels(query: str) -> Dict[str, str]: +def parse_labels_from_query(query: str) -> Dict[str, str]: labels = {} if "{" not in query or "}" not in query: return labels try: - label_part = query[query.index("{") + 1: query.rindex("}")] + label_part = query[query.index("{") + 1:query.rindex("}")] except Exception: return labels for match in _LABEL_PATTERN.finditer(label_part): key = match.group(1) value = match.group(2) - value = value.replace('\\"', '"').replace("\\n", "\n").replace("\\\\", "\\") + + value = ( + value + .replace('\\"', '"') + .replace("\\n", "\n") + .replace("\\\\", "\\") + ) + labels[key] = value return labels @@ -892,9 +1120,11 @@ def series_key(metric_name: str, labels: Dict[str, str]) -> str: return metric_name + labels_to_str(labels) -# ── 状态持久化 ──────────────────────────────────────────────────────────────── +# ============================================================================= +# 状态持久化 +# ============================================================================= -def load_state(): +def load_state() -> None: global BASELINE_STATES if not os.path.exists(STATE_FILE): @@ -907,17 +1137,36 @@ def load_state(): states = {} for key, value in raw.get("baseline_states", {}).items(): + required_fields = { + "period", + "phase_origin_ts", + "template", + "status", + "clean_seconds", + "last_update_ts", + "last_seen_ts", + "y_min", + "y_max", + } + + if not required_fields.issubset(set(value.keys())): + continue + states[key] = BaselineState(**value) BASELINE_STATES = states - logger.info("已加载预测状态文件 %s,状态数量=%d", STATE_FILE, len(BASELINE_STATES)) + logger.info( + "已加载预测状态文件 %s,状态数量=%d", + STATE_FILE, + len(BASELINE_STATES), + ) except Exception as e: logger.warning("加载预测状态文件失败,将重新学习: %s", e) -def save_state(): +def save_state() -> None: try: raw = { "baseline_states": { @@ -937,9 +1186,11 @@ def save_state(): logger.warning("保存预测状态文件失败: %s", e) -# ── 主逻辑 ──────────────────────────────────────────────────────────────────── +# ============================================================================= +# 主流程 +# ============================================================================= -def run_once(): +def run_once() -> None: now_str = datetime.now().strftime("%H:%M:%S") for target in PREDICT_TARGETS: @@ -952,16 +1203,26 @@ def run_once(): ts, ys = fetch_history(query) if len(ys) < MIN_POINTS: - logger.info("[%s] %s 数据不足(%d 点),跳过", now_str, query, len(ys)) + logger.info( + "[%s] %s 数据不足(%d 点),跳过", + now_str, + query, + len(ys), + ) continue ts_grid, ys_grid = normalize_history(ts, ys) if len(ys_grid) < MIN_POINTS: - logger.info("[%s] %s 清洗后数据不足(%d 点),跳过", now_str, query, len(ys_grid)) + logger.info( + "[%s] %s 清洗后数据不足(%d 点),跳过", + now_str, + query, + len(ys_grid), + ) continue - base_labels = _parse_labels(query) + base_labels = parse_labels_from_query(query) write_labels = merge_labels(base_labels, EXTRA_PREDICT_LABELS) key = series_key(pred_metric, write_labels) @@ -975,7 +1236,11 @@ def run_once(): ) if state is None: - logger.info("[%s] %s 暂无可用健康模板,等待学习", now_str, query) + logger.info( + "[%s] %s 暂无可用健康模板,等待学习", + now_str, + query, + ) continue now_sec = int(time.time()) @@ -989,7 +1254,7 @@ def run_once(): for i in range(WRITE_HORIZON_SECONDS) ] - pred_values = predict_by_state(state, ts_future) + pred_values = predict_with_origin(state, ts_future) lower_values, upper_values = calc_bounds( pred=pred_values, @@ -1012,21 +1277,28 @@ def run_once(): ) if not ok: + logger.error( + "[%s] %s 写入预测数据失败", + now_str, + query, + ) continue LAST_WRITTEN_UNTIL[key] = int(max(ts_future)) future_start = datetime.fromtimestamp(ts_future[0]).strftime("%H:%M:%S") future_end = datetime.fromtimestamp(ts_future[-1]).strftime("%H:%M:%S") + origin_str = datetime.fromtimestamp(state.phase_origin_ts).strftime("%H:%M:%S") logger.info( - "[%s] %-40s → %-35s status=%s anomaly=%s period=%ss clean=%ss 写入 %d 点,预测区间 %s ~ %s", + "[%s] %-40s → %-35s status=%s anomaly=%s period=%ss origin=%s clean=%ss 写入 %d 点,预测区间 %s ~ %s", now_str, query, pred_metric, state.status, is_anomaly, state.period, + origin_str, state.clean_seconds, len(ts_future), future_start, @@ -1036,17 +1308,18 @@ def run_once(): save_state() -def main(): +def main() -> None: load_state() logger.info( - "预测服务启动 VM=%s 历史窗口=%dmin 理论预测窗口=%ds 实际写入窗口=%ds 轮询间隔=%ds state=%s", + "预测服务启动 VM=%s 历史窗口=%dmin 理论预测窗口=%ds 实际写入窗口=%ds 轮询间隔=%ds state=%s forecast=%s", VM_URL, HISTORY_MINUTES, HORIZON_SECONDS, WRITE_HORIZON_SECONDS, POLL_INTERVAL, STATE_FILE, + EXTRA_PREDICT_LABELS["forecast"], ) while True: @@ -1055,4 +1328,6 @@ def main(): if __name__ == "__main__": - main() \ No newline at end of file + main() + + \ No newline at end of file From 76e536eff9beb9e3f9db7453bbeb6f1c7844c198 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=BC=A0=E5=B0=91=E6=99=BA?= Date: Thu, 21 May 2026 13:55:08 +0800 Subject: [PATCH 24/55] fix --- ai/predict_v3_single_scene.py | 316 ++++++++++++---------------------- 1 file changed, 110 insertions(+), 206 deletions(-) diff --git a/ai/predict_v3_single_scene.py b/ai/predict_v3_single_scene.py index fc07f4f..2cde8b8 100755 --- a/ai/predict_v3_single_scene.py +++ b/ai/predict_v3_single_scene.py @@ -1,22 +1,12 @@ # -*- coding: utf-8 -*- """ -ProtoForge Predictor v8 - -功能: -1. 从 VictoriaMetrics 拉取历史数据。 -2. 对 CNC 周期型指标进行相位对齐预测。 -3. 使用“谷底锚点”对齐周期,减少上升沿/下降沿相位偏差。 -4. 每轮只写入未来 min(HORIZON_SECONDS, POLL_INTERVAL) 秒,避免预测窗口重叠。 -5. 检测异常后冻结健康模板,不把故障数据学进去。 -6. 故障恢复后等待稳定一段时间,再恢复模板更新。 -7. 写入: - - xxx_predicted - - xxx_predicted_upper - - xxx_predicted_lower - - xxx_anomaly - - xxx_anomaly_outside_ratio - - xxx_anomaly_mean_abs_error - - xxx_anomaly_mean_rel_error +ProtoForge Predictor v9 + +修复重点: +1. 预测时间轴改为锚定最后一个真实数据点 last_real_ts,而不是锚定 time.time()。 +2. 不再使用 LAST_WRITTEN_UNTIL 把预测不断推向更远未来,避免 Grafana 里预测线相对真实线出现延迟/错位。 +3. 如果真实数据时间戳没有推进,则跳过本轮预测写入,避免重复写同一段未来时间造成毛刺。 +4. 保留:相位对齐、健康模板冻结、故障期不学习、恢复后再学习、预测上下界、异常指标。 """ import json @@ -50,14 +40,13 @@ # ============================================================================= VM_URL = "http://localhost:8428" - -STATE_FILE = "/tmp/protoforge_predictor_state_v8.json" +STATE_FILE = "/tmp/protoforge_predictor_state_v9.json" HISTORY_MINUTES = 30 HORIZON_SECONDS = 120 POLL_INTERVAL = 30 -# 实际写入窗口不要大于轮询间隔,否则多轮预测会重叠。 +# 实际每轮写入的预测长度。不要大于 POLL_INTERVAL,否则容易出现预测窗口重叠。 WRITE_HORIZON_SECONDS = min(HORIZON_SECONDS, POLL_INTERVAL) QUERY_STEP = "1s" @@ -76,14 +65,23 @@ RECOVERY_EMA_ALPHA = 0.30 OUTSIDE_RATIO_THRESHOLD = 0.60 -RECOVERY_INSIDE_RATIO_THRESHOLD = 0.80 - PHASE_SEARCH_RATIO = 0.15 VALLEY_QUANTILE = 45 +# 关键修复:预测时间轴锚定真实数据最后一个点。 +# True:预测从 last_real_ts + 1 开始,适合 Grafana 与真实曲线对齐展示。 +# False:预测从当前系统时间 + 1 开始,适合只看纯未来预测,但容易与有采集延迟的真实数据错位。 +ALIGN_PREDICTION_TO_LAST_REAL_TS = True + +# 如果 last_real_ts 距离当前系统时间太久,说明采集链路可能断了,跳过预测,避免用陈旧数据继续画未来线。 +MAX_DATA_LAG_SECONDS = 180 + +# 真实数据至少推进多少秒,才写入新预测,避免同一段未来时间被反复写入。 +MIN_REAL_ADVANCE_SECONDS = 1 + # ============================================================================= -# 预测指标配置 +# 指标配置 # ============================================================================= PREDICT_TARGETS = [ @@ -132,7 +130,7 @@ ] EXTRA_PREDICT_LABELS = { - "forecast": "phase_aligned_health_v8", + "forecast": "phase_aligned_health_v9", "source": "protoforge", } @@ -159,7 +157,10 @@ class BaselineState: BASELINE_STATES: Dict[str, BaselineState] = {} -LAST_WRITTEN_UNTIL: Dict[str, int] = {} + +# 记录每条序列最后一次使用的真实数据时间戳,而不是预测写到哪里。 +# 这样不会把预测不断推向更远的未来。 +LAST_REAL_TS_WRITTEN: Dict[str, int] = {} # ============================================================================= @@ -243,7 +244,6 @@ def normalize_history(ts: List[float], ys: List[float]) -> Tuple[np.ndarray, np. return np.array([]), np.array([]) sorted_items = sorted(data.items(), key=lambda x: x[0]) - ts_clean = np.array([x[0] for x in sorted_items], dtype=float) ys_clean = np.array([x[1] for x in sorted_items], dtype=float) @@ -263,7 +263,7 @@ def normalize_history(ts: List[float], ys: List[float]) -> Tuple[np.ndarray, np. # ============================================================================= -# 周期估计 +# 周期估计与谷底检测 # ============================================================================= def moving_average(arr: np.ndarray, window: int) -> np.ndarray: @@ -355,10 +355,6 @@ def estimate_period_rough(ys_arr: np.ndarray) -> int: return int(period) -# ============================================================================= -# 谷底锚点检测 -# ============================================================================= - def find_valley_indices( ts_grid: np.ndarray, ys_grid: np.ndarray, @@ -370,7 +366,6 @@ def find_valley_indices( return [] period = max(3, int(expected_period)) - smooth_window = max(3, int(round(period * 0.08))) smooth_window = min(smooth_window, 21) @@ -389,7 +384,6 @@ def find_valley_indices( if len(candidates) < MIN_FULL_CYCLES_FOR_TEMPLATE: candidates = [] - for i in range(1, n - 1): if ys_smooth[i] <= ys_smooth[i - 1] and ys_smooth[i] < ys_smooth[i + 1]: candidates.append(i) @@ -458,7 +452,7 @@ def detect_period_and_valleys( # ============================================================================= -# 相位对齐模板构建 +# 相位对齐模板 # ============================================================================= def build_template_from_valleys( @@ -552,7 +546,7 @@ def build_current_baseline( # ============================================================================= -# 模板预测 +# 预测与模板合并 # ============================================================================= def circular_template_value(template: np.ndarray, phase: float) -> float: @@ -562,10 +556,8 @@ def circular_template_value(template: np.ndarray, phase: float) -> float: return 0.0 phase = float(phase) % period - i0 = int(math.floor(phase)) % period i1 = (i0 + 1) % period - frac = phase - math.floor(phase) return float((1.0 - frac) * template[i0] + frac * template[i1]) @@ -583,7 +575,6 @@ def predict_with_origin( return np.zeros(len(ts_list), dtype=float) origin = int(state.phase_origin_ts if phase_origin_ts is None else phase_origin_ts) - values = [] for ts in ts_list: @@ -611,10 +602,7 @@ def resample_template(old_template: np.ndarray, new_period: int) -> np.ndarray: return np.interp(new_x, old_x_ext, old_y_ext).astype(float) -def align_new_template_to_old( - old_template: np.ndarray, - new_template: np.ndarray, -) -> np.ndarray: +def align_new_template_to_old(old_template: np.ndarray, new_template: np.ndarray) -> np.ndarray: if len(old_template) != len(new_template): old_template = resample_template(old_template, len(new_template)) @@ -624,7 +612,6 @@ def align_new_template_to_old( return new_template.astype(float) max_shift = max(1, int(round(period * 0.10))) - old_norm = old_template - np.mean(old_template) best_score = None @@ -633,7 +620,6 @@ def align_new_template_to_old( for shift in range(-max_shift, max_shift + 1): shifted = np.roll(new_template, shift) shifted_norm = shifted - np.mean(shifted) - score = float(np.dot(old_norm, shifted_norm)) if best_score is None or score > best_score: @@ -643,18 +629,13 @@ def align_new_template_to_old( return best_template.astype(float) -def merge_template( - old_template: np.ndarray, - new_template: np.ndarray, - alpha: float, -) -> np.ndarray: +def merge_template(old_template: np.ndarray, new_template: np.ndarray, alpha: float) -> np.ndarray: alpha = float(np.clip(alpha, 0.0, 1.0)) if len(old_template) != len(new_template): old_template = resample_template(old_template, len(new_template)) new_template = align_new_template_to_old(old_template, new_template) - merged = (1.0 - alpha) * old_template + alpha * new_template return merged.astype(float) @@ -664,11 +645,7 @@ def merge_template( # 异常检测 # ============================================================================= -def calc_threshold( - pred: np.ndarray, - abs_threshold: float, - rel_threshold: float, -) -> np.ndarray: +def calc_threshold(pred: np.ndarray, abs_threshold: float, rel_threshold: float) -> np.ndarray: return np.maximum(abs_threshold, np.abs(pred) * rel_threshold) @@ -678,11 +655,7 @@ def calc_bounds( rel_threshold: float, ) -> Tuple[np.ndarray, np.ndarray]: threshold = calc_threshold(pred, abs_threshold, rel_threshold) - - lower = pred - threshold - upper = pred + threshold - - return lower, upper + return pred - threshold, pred + threshold def find_best_phase_origin_for_recent( @@ -692,7 +665,6 @@ def find_best_phase_origin_for_recent( ) -> Tuple[int, np.ndarray, float]: period = int(state.period) base_origin = int(state.phase_origin_ts) - max_shift = max(1, int(round(period * PHASE_SEARCH_RATIO))) best_origin = base_origin @@ -732,14 +704,12 @@ def detect_anomaly( ) threshold = calc_threshold(pred, abs_threshold, rel_threshold) - abs_err = np.abs(actual - pred) outside = abs_err > threshold outside_ratio = float(np.mean(outside)) mean_abs_err = float(np.mean(abs_err)) mean_rel_err = float(np.mean(abs_err / np.maximum(np.abs(pred), 1.0))) - is_anomaly = outside_ratio >= OUTSIDE_RATIO_THRESHOLD return is_anomaly, outside_ratio, mean_abs_err, mean_rel_err, int(best_origin) @@ -749,11 +719,7 @@ def detect_anomaly( # 健康基线状态管理 # ============================================================================= -def create_initial_state( - ts_grid: np.ndarray, - ys_grid: np.ndarray, - now_sec: int, -) -> Optional[BaselineState]: +def create_initial_state(ts_grid: np.ndarray, ys_grid: np.ndarray, now_sec: int) -> Optional[BaselineState]: baseline = build_current_baseline(ts_grid, ys_grid) if baseline is None: @@ -782,7 +748,6 @@ def maybe_update_state( rel_threshold: float, ) -> Tuple[Optional[BaselineState], bool, float, float, float]: now_sec = int(time.time()) - state = BASELINE_STATES.get(key) if state is None: @@ -805,7 +770,6 @@ def maybe_update_state( elapsed = max(1, now_sec - int(state.last_seen_ts)) elapsed = min(elapsed, POLL_INTERVAL * 2) - state.last_seen_ts = now_sec is_anom, outside_ratio, mean_abs_err, mean_rel_err, best_origin = detect_anomaly( @@ -819,7 +783,6 @@ def maybe_update_state( if is_anom: state.status = BASELINE_STATUS_ANOMALY state.clean_seconds = 0 - BASELINE_STATES[key] = state logger.warning( @@ -846,15 +809,9 @@ def maybe_update_state( if state.status == BASELINE_STATUS_ANOMALY: state.status = BASELINE_STATUS_RECOVERING state.clean_seconds = elapsed - BASELINE_STATES[key] = state - logger.info( - "异常开始恢复 key=%s clean_seconds=%ss", - key, - state.clean_seconds, - ) - + logger.info("异常开始恢复 key=%s clean_seconds=%ss", key, state.clean_seconds) return state, False, outside_ratio, mean_abs_err, mean_rel_err if state.status == BASELINE_STATUS_RECOVERING: @@ -877,11 +834,7 @@ def maybe_update_state( int(state.period) * MAX_CYCLES_FOR_TEMPLATE, ) - baseline = build_current_baseline( - ts_grid=ts_grid, - ys_grid=ys_grid, - tail_seconds=tail_seconds, - ) + baseline = build_current_baseline(ts_grid=ts_grid, ys_grid=ys_grid, tail_seconds=tail_seconds) if baseline is None: BASELINE_STATES[key] = state @@ -889,17 +842,9 @@ def maybe_update_state( new_period, new_origin, new_template = baseline old_template = np.array(state.template, dtype=float) + alpha = RECOVERY_EMA_ALPHA if state.status == BASELINE_STATUS_RECOVERING else HEALTHY_EMA_ALPHA - if state.status == BASELINE_STATUS_RECOVERING: - alpha = RECOVERY_EMA_ALPHA - else: - alpha = HEALTHY_EMA_ALPHA - - merged = merge_template( - old_template=old_template, - new_template=new_template, - alpha=alpha, - ) + merged = merge_template(old_template=old_template, new_template=new_template, alpha=alpha) state.period = int(new_period) state.phase_origin_ts = int(new_origin) @@ -933,12 +878,7 @@ def maybe_update_state( # ============================================================================= def prom_escape_label_value(value: str) -> str: - return ( - str(value) - .replace("\\", "\\\\") - .replace("\n", "\\n") - .replace('"', '\\"') - ) + return str(value).replace("\\", "\\\\").replace("\n", "\\n").replace('"', '\\"') def labels_to_str(labels: Dict[str, str]) -> str: @@ -975,8 +915,7 @@ def write_series( if not math.isfinite(ts_sec) or not math.isfinite(val): continue - ts_ms = ts_sec * 1000 - lines.append(f"{metric_name}{label_str} {val:.6f} {ts_ms}") + lines.append(f"{metric_name}{label_str} {val:.6f} {ts_sec * 1000}") if not lines: return False @@ -987,9 +926,7 @@ def write_series( resp = requests.post( f"{VM_URL}/api/v1/import/prometheus", data=payload.encode("utf-8"), - headers={ - "Content-Type": "text/plain; version=0.0.4; charset=utf-8", - }, + headers={"Content-Type": "text/plain; version=0.0.4; charset=utf-8"}, timeout=10, ) resp.raise_for_status() @@ -1012,60 +949,19 @@ def write_prediction_bundle( outside_ratio: float, mean_abs_err: float, mean_rel_err: float, + event_ts: int, ) -> bool: - ok1 = write_series( - metric_name=pred_metric, - labels=labels, - ts_list=ts_future, - values=pred_values.astype(float).tolist(), - ) - - ok2 = write_series( - metric_name=f"{pred_metric}_lower", - labels=labels, - ts_list=ts_future, - values=lower_values.astype(float).tolist(), - ) - - ok3 = write_series( - metric_name=f"{pred_metric}_upper", - labels=labels, - ts_list=ts_future, - values=upper_values.astype(float).tolist(), - ) - - now_sec = int(time.time()) + ok1 = write_series(pred_metric, labels, ts_future, pred_values.astype(float).tolist()) + ok2 = write_series(f"{pred_metric}_lower", labels, ts_future, lower_values.astype(float).tolist()) + ok3 = write_series(f"{pred_metric}_upper", labels, ts_future, upper_values.astype(float).tolist()) anomaly_labels = dict(labels) anomaly_labels["type"] = "prediction_deviation" - ok4 = write_series( - metric_name=anomaly_metric, - labels=anomaly_labels, - ts_list=[now_sec], - values=[1.0 if is_anomaly else 0.0], - ) - - ok5 = write_series( - metric_name=f"{anomaly_metric}_outside_ratio", - labels=anomaly_labels, - ts_list=[now_sec], - values=[outside_ratio], - ) - - ok6 = write_series( - metric_name=f"{anomaly_metric}_mean_abs_error", - labels=anomaly_labels, - ts_list=[now_sec], - values=[mean_abs_err], - ) - - ok7 = write_series( - metric_name=f"{anomaly_metric}_mean_rel_error", - labels=anomaly_labels, - ts_list=[now_sec], - values=[mean_rel_err], - ) + ok4 = write_series(anomaly_metric, anomaly_labels, [event_ts], [1.0 if is_anomaly else 0.0]) + ok5 = write_series(f"{anomaly_metric}_outside_ratio", anomaly_labels, [event_ts], [outside_ratio]) + ok6 = write_series(f"{anomaly_metric}_mean_abs_error", anomaly_labels, [event_ts], [mean_abs_err]) + ok7 = write_series(f"{anomaly_metric}_mean_rel_error", anomaly_labels, [event_ts], [mean_rel_err]) return ok1 and ok2 and ok3 and ok4 and ok5 and ok6 and ok7 @@ -1074,9 +970,7 @@ def write_prediction_bundle( # 标签解析 # ============================================================================= -_LABEL_PATTERN = re.compile( - r'\s*([a-zA-Z_][a-zA-Z0-9_]*)\s*=\s*"((?:\\.|[^"])*)"\s*' -) +_LABEL_PATTERN = re.compile(r'\s*([a-zA-Z_][a-zA-Z0-9_]*)\s*=\s*"((?:\\.|[^"])*)"\s*') def parse_labels_from_query(query: str) -> Dict[str, str]: @@ -1093,14 +987,7 @@ def parse_labels_from_query(query: str) -> Dict[str, str]: for match in _LABEL_PATTERN.finditer(label_part): key = match.group(1) value = match.group(2) - - value = ( - value - .replace('\\"', '"') - .replace("\\n", "\n") - .replace("\\\\", "\\") - ) - + value = value.replace('\\"', '"').replace("\\n", "\n").replace("\\\\", "\\") labels[key] = value return labels @@ -1155,12 +1042,7 @@ def load_state() -> None: states[key] = BaselineState(**value) BASELINE_STATES = states - - logger.info( - "已加载预测状态文件 %s,状态数量=%d", - STATE_FILE, - len(BASELINE_STATES), - ) + logger.info("已加载预测状态文件 %s,状态数量=%d", STATE_FILE, len(BASELINE_STATES)) except Exception as e: logger.warning("加载预测状态文件失败,将重新学习: %s", e) @@ -1186,6 +1068,44 @@ def save_state() -> None: logger.warning("保存预测状态文件失败: %s", e) +# ============================================================================= +# 时间轴选择 +# ============================================================================= + +def build_prediction_timestamps(key: str, last_real_ts: int, now_sec: int) -> Optional[List[int]]: + data_lag = now_sec - last_real_ts + + if data_lag > MAX_DATA_LAG_SECONDS: + logger.warning( + "真实数据延迟过大,跳过预测 key=%s data_lag=%ss max=%ss", + key, + data_lag, + MAX_DATA_LAG_SECONDS, + ) + return None + + last_written_real_ts = LAST_REAL_TS_WRITTEN.get(key) + + if last_written_real_ts is not None: + advance = last_real_ts - int(last_written_real_ts) + + if advance < MIN_REAL_ADVANCE_SECONDS: + logger.info( + "真实数据时间戳未推进,跳过重复写入 key=%s last_real_ts=%s last_written_real_ts=%s", + key, + last_real_ts, + last_written_real_ts, + ) + return None + + if ALIGN_PREDICTION_TO_LAST_REAL_TS: + base_ts = last_real_ts + else: + base_ts = now_sec + + return [base_ts + i + 1 for i in range(WRITE_HORIZON_SECONDS)] + + # ============================================================================= # 主流程 # ============================================================================= @@ -1203,28 +1123,17 @@ def run_once() -> None: ts, ys = fetch_history(query) if len(ys) < MIN_POINTS: - logger.info( - "[%s] %s 数据不足(%d 点),跳过", - now_str, - query, - len(ys), - ) + logger.info("[%s] %s 数据不足(%d 点),跳过", now_str, query, len(ys)) continue ts_grid, ys_grid = normalize_history(ts, ys) if len(ys_grid) < MIN_POINTS: - logger.info( - "[%s] %s 清洗后数据不足(%d 点),跳过", - now_str, - query, - len(ys_grid), - ) + logger.info("[%s] %s 清洗后数据不足(%d 点),跳过", now_str, query, len(ys_grid)) continue base_labels = parse_labels_from_query(query) write_labels = merge_labels(base_labels, EXTRA_PREDICT_LABELS) - key = series_key(pred_metric, write_labels) state, is_anomaly, outside_ratio, mean_abs_err, mean_rel_err = maybe_update_state( @@ -1236,26 +1145,23 @@ def run_once() -> None: ) if state is None: - logger.info( - "[%s] %s 暂无可用健康模板,等待学习", - now_str, - query, - ) + logger.info("[%s] %s 暂无可用健康模板,等待学习", now_str, query) continue now_sec = int(time.time()) - last_until = LAST_WRITTEN_UNTIL.get(key, 0) last_real_ts = int(ts_grid[-1]) + data_lag = now_sec - last_real_ts - base_ts = max(now_sec, last_until, last_real_ts) + ts_future = build_prediction_timestamps( + key=key, + last_real_ts=last_real_ts, + now_sec=now_sec, + ) - ts_future = [ - base_ts + i + 1 - for i in range(WRITE_HORIZON_SECONDS) - ] + if not ts_future: + continue pred_values = predict_with_origin(state, ts_future) - lower_values, upper_values = calc_bounds( pred=pred_values, abs_threshold=abs_threshold, @@ -1274,24 +1180,22 @@ def run_once() -> None: outside_ratio=outside_ratio, mean_abs_err=mean_abs_err, mean_rel_err=mean_rel_err, + event_ts=last_real_ts, ) if not ok: - logger.error( - "[%s] %s 写入预测数据失败", - now_str, - query, - ) + logger.error("[%s] %s 写入预测数据失败", now_str, query) continue - LAST_WRITTEN_UNTIL[key] = int(max(ts_future)) + LAST_REAL_TS_WRITTEN[key] = last_real_ts future_start = datetime.fromtimestamp(ts_future[0]).strftime("%H:%M:%S") future_end = datetime.fromtimestamp(ts_future[-1]).strftime("%H:%M:%S") + last_real_str = datetime.fromtimestamp(last_real_ts).strftime("%H:%M:%S") origin_str = datetime.fromtimestamp(state.phase_origin_ts).strftime("%H:%M:%S") logger.info( - "[%s] %-40s → %-35s status=%s anomaly=%s period=%ss origin=%s clean=%ss 写入 %d 点,预测区间 %s ~ %s", + "[%s] %-40s → %-35s status=%s anomaly=%s period=%ss origin=%s last_real=%s lag=%ss 写入 %d 点,预测区间 %s ~ %s", now_str, query, pred_metric, @@ -1299,7 +1203,8 @@ def run_once() -> None: is_anomaly, state.period, origin_str, - state.clean_seconds, + last_real_str, + data_lag, len(ts_future), future_start, future_end, @@ -1312,7 +1217,7 @@ def main() -> None: load_state() logger.info( - "预测服务启动 VM=%s 历史窗口=%dmin 理论预测窗口=%ds 实际写入窗口=%ds 轮询间隔=%ds state=%s forecast=%s", + "预测服务启动 VM=%s 历史窗口=%dmin 理论预测窗口=%ds 实际写入窗口=%ds 轮询间隔=%ds state=%s forecast=%s align_to_last_real=%s", VM_URL, HISTORY_MINUTES, HORIZON_SECONDS, @@ -1320,6 +1225,7 @@ def main() -> None: POLL_INTERVAL, STATE_FILE, EXTRA_PREDICT_LABELS["forecast"], + ALIGN_PREDICTION_TO_LAST_REAL_TS, ) while True: @@ -1329,5 +1235,3 @@ def main() -> None: if __name__ == "__main__": main() - - \ No newline at end of file From f5e7b2a27320066b5eb9157f297d6651adcd36c8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=BC=A0=E5=B0=91=E6=99=BA?= Date: Thu, 21 May 2026 14:05:54 +0800 Subject: [PATCH 25/55] fix --- ai/predict_v3_single_scene.py | 520 +++++++++++++++++++++++++--------- 1 file changed, 385 insertions(+), 135 deletions(-) diff --git a/ai/predict_v3_single_scene.py b/ai/predict_v3_single_scene.py index 2cde8b8..d212d2d 100755 --- a/ai/predict_v3_single_scene.py +++ b/ai/predict_v3_single_scene.py @@ -1,12 +1,14 @@ # -*- coding: utf-8 -*- """ -ProtoForge Predictor v9 +ProtoForge Predictor v10 修复重点: -1. 预测时间轴改为锚定最后一个真实数据点 last_real_ts,而不是锚定 time.time()。 -2. 不再使用 LAST_WRITTEN_UNTIL 把预测不断推向更远未来,避免 Grafana 里预测线相对真实线出现延迟/错位。 -3. 如果真实数据时间戳没有推进,则跳过本轮预测写入,避免重复写同一段未来时间造成毛刺。 -4. 保留:相位对齐、健康模板冻结、故障期不学习、恢复后再学习、预测上下界、异常指标。 +1. 修复 lag=0 但预测线仍然相位漂移的问题。 +2. 在谷底相位对齐基础上,增加 phase-lock 相位锁定。 +3. 每轮使用最近 1~2 个周期真实数据,搜索最佳 period + phase_origin。 +4. 预测起点仍然锚定最后一个真实点 last_real_ts,避免写入延迟。 +5. 保留健康模板冻结逻辑:异常期间不学习故障数据。 +6. 保留预测上下界和异常指标。 """ import json @@ -40,13 +42,12 @@ # ============================================================================= VM_URL = "http://localhost:8428" -STATE_FILE = "/tmp/protoforge_predictor_state_v9.json" +STATE_FILE = "/tmp/protoforge_predictor_state_v10.json" HISTORY_MINUTES = 30 HORIZON_SECONDS = 120 POLL_INTERVAL = 30 -# 实际每轮写入的预测长度。不要大于 POLL_INTERVAL,否则容易出现预测窗口重叠。 WRITE_HORIZON_SECONDS = min(HORIZON_SECONDS, POLL_INTERVAL) QUERY_STEP = "1s" @@ -58,26 +59,29 @@ MIN_FULL_CYCLES_FOR_TEMPLATE = 3 MAX_CYCLES_FOR_TEMPLATE = 6 -DETECT_WINDOW_SECONDS = 15 +DETECT_WINDOW_SECONDS = 20 RECOVERY_MIN_SECONDS = 60 -HEALTHY_EMA_ALPHA = 0.12 -RECOVERY_EMA_ALPHA = 0.30 +HEALTHY_EMA_ALPHA = 0.10 +RECOVERY_EMA_ALPHA = 0.25 OUTSIDE_RATIO_THRESHOLD = 0.60 -PHASE_SEARCH_RATIO = 0.15 + VALLEY_QUANTILE = 45 -# 关键修复:预测时间轴锚定真实数据最后一个点。 -# True:预测从 last_real_ts + 1 开始,适合 Grafana 与真实曲线对齐展示。 -# False:预测从当前系统时间 + 1 开始,适合只看纯未来预测,但容易与有采集延迟的真实数据错位。 -ALIGN_PREDICTION_TO_LAST_REAL_TS = True +# phase-lock 配置 +PHASE_LOCK_MIN_WINDOW_SECONDS = 45 +PHASE_LOCK_MAX_WINDOW_SECONDS = 180 +PHASE_LOCK_PERIOD_SEARCH_RATIO = 0.12 +PHASE_LOCK_ORIGIN_SEARCH_RATIO = 0.35 +PHASE_LOCK_PERIOD_STEP = 1 +PHASE_LOCK_ORIGIN_STEP = 1 -# 如果 last_real_ts 距离当前系统时间太久,说明采集链路可能断了,跳过预测,避免用陈旧数据继续画未来线。 +# 真实数据延迟超过这个值,就不继续预测 MAX_DATA_LAG_SECONDS = 180 -# 真实数据至少推进多少秒,才写入新预测,避免同一段未来时间被反复写入。 -MIN_REAL_ADVANCE_SECONDS = 1 +# 预测锚定最后一个真实点 +ALIGN_PREDICTION_TO_LAST_REAL_TS = True # ============================================================================= @@ -130,7 +134,7 @@ ] EXTRA_PREDICT_LABELS = { - "forecast": "phase_aligned_health_v9", + "forecast": "phase_locked_health_v10", "source": "protoforge", } @@ -157,9 +161,6 @@ class BaselineState: BASELINE_STATES: Dict[str, BaselineState] = {} - -# 记录每条序列最后一次使用的真实数据时间戳,而不是预测写到哪里。 -# 这样不会把预测不断推向更远的未来。 LAST_REAL_TS_WRITTEN: Dict[str, int] = {} @@ -197,8 +198,6 @@ def fetch_history(query: str, minutes: int = HISTORY_MINUTES) -> Tuple[List[floa return [], [] values = result[0].get("values", []) - if not values: - return [], [] ts = [] ys = [] @@ -244,6 +243,7 @@ def normalize_history(ts: List[float], ys: List[float]) -> Tuple[np.ndarray, np. return np.array([]), np.array([]) sorted_items = sorted(data.items(), key=lambda x: x[0]) + ts_clean = np.array([x[0] for x in sorted_items], dtype=float) ys_clean = np.array([x[1] for x in sorted_items], dtype=float) @@ -263,7 +263,7 @@ def normalize_history(ts: List[float], ys: List[float]) -> Tuple[np.ndarray, np. # ============================================================================= -# 周期估计与谷底检测 +# 周期估计 # ============================================================================= def moving_average(arr: np.ndarray, window: int) -> np.ndarray: @@ -355,6 +355,10 @@ def estimate_period_rough(ys_arr: np.ndarray) -> int: return int(period) +# ============================================================================= +# 谷底检测与模板构建 +# ============================================================================= + def find_valley_indices( ts_grid: np.ndarray, ys_grid: np.ndarray, @@ -366,6 +370,7 @@ def find_valley_indices( return [] period = max(3, int(expected_period)) + smooth_window = max(3, int(round(period * 0.08))) smooth_window = min(smooth_window, 21) @@ -384,6 +389,7 @@ def find_valley_indices( if len(candidates) < MIN_FULL_CYCLES_FOR_TEMPLATE: candidates = [] + for i in range(1, n - 1): if ys_smooth[i] <= ys_smooth[i - 1] and ys_smooth[i] < ys_smooth[i + 1]: candidates.append(i) @@ -451,10 +457,6 @@ def detect_period_and_valleys( return int(period), valleys -# ============================================================================= -# 相位对齐模板 -# ============================================================================= - def build_template_from_valleys( ts_grid: np.ndarray, ys_grid: np.ndarray, @@ -546,7 +548,7 @@ def build_current_baseline( # ============================================================================= -# 预测与模板合并 +# 模板预测与重采样 # ============================================================================= def circular_template_value(template: np.ndarray, phase: float) -> float: @@ -556,6 +558,7 @@ def circular_template_value(template: np.ndarray, phase: float) -> float: return 0.0 phase = float(phase) % period + i0 = int(math.floor(phase)) % period i1 = (i0 + 1) % period frac = phase - math.floor(phase) @@ -563,46 +566,77 @@ def circular_template_value(template: np.ndarray, phase: float) -> float: return float((1.0 - frac) * template[i0] + frac * template[i1]) -def predict_with_origin( - state: BaselineState, +def resample_template(old_template: np.ndarray, new_period: int) -> np.ndarray: + old_period = len(old_template) + + if old_period == new_period: + return old_template.astype(float) + + if old_period <= 1 or new_period <= 1: + return np.full(new_period, float(np.mean(old_template)), dtype=float) + + old_x = np.linspace(0.0, 1.0, old_period, endpoint=False) + new_x = np.linspace(0.0, 1.0, new_period, endpoint=False) + + old_x_ext = np.concatenate([old_x - 1.0, old_x, old_x + 1.0]) + old_y_ext = np.concatenate([old_template, old_template, old_template]) + + return np.interp(new_x, old_x_ext, old_y_ext).astype(float) + + +def predict_template_values( + template: np.ndarray, + period: int, + phase_origin_ts: int, ts_list: List[int], - phase_origin_ts: Optional[int] = None, ) -> np.ndarray: - template = np.array(state.template, dtype=float) - period = int(state.period) - - if period <= 1 or len(template) != period: + if period <= 1: return np.zeros(len(ts_list), dtype=float) - origin = int(state.phase_origin_ts if phase_origin_ts is None else phase_origin_ts) + if len(template) != period: + template = resample_template(template, period) + values = [] for ts in ts_list: - phase = (int(ts) - origin) % period + phase = (int(ts) - int(phase_origin_ts)) % period values.append(circular_template_value(template, phase)) return np.array(values, dtype=float) -def resample_template(old_template: np.ndarray, new_period: int) -> np.ndarray: - old_period = len(old_template) +def predict_with_state(state: BaselineState, ts_list: List[int]) -> np.ndarray: + template = np.array(state.template, dtype=float) - if old_period == new_period: - return old_template.astype(float) + return predict_template_values( + template=template, + period=int(state.period), + phase_origin_ts=int(state.phase_origin_ts), + ts_list=ts_list, + ) - if old_period <= 1 or new_period <= 1: - return np.full(new_period, float(np.mean(old_template)), dtype=float) - old_x = np.linspace(0.0, 1.0, old_period, endpoint=False) - new_x = np.linspace(0.0, 1.0, new_period, endpoint=False) +def normalize_origin_near(origin: int, period: int, near_ts: int) -> int: + if period <= 1: + return origin - old_x_ext = np.concatenate([old_x - 1.0, old_x, old_x + 1.0]) - old_y_ext = np.concatenate([old_template, old_template, old_template]) + origin = int(origin) + period = int(period) + near_ts = int(near_ts) - return np.interp(new_x, old_x_ext, old_y_ext).astype(float) + while origin + period <= near_ts: + origin += period + while origin > near_ts: + origin -= period -def align_new_template_to_old(old_template: np.ndarray, new_template: np.ndarray) -> np.ndarray: + return origin + + +def align_new_template_to_old( + old_template: np.ndarray, + new_template: np.ndarray, +) -> np.ndarray: if len(old_template) != len(new_template): old_template = resample_template(old_template, len(new_template)) @@ -629,23 +663,117 @@ def align_new_template_to_old(old_template: np.ndarray, new_template: np.ndarray return best_template.astype(float) -def merge_template(old_template: np.ndarray, new_template: np.ndarray, alpha: float) -> np.ndarray: +def merge_template( + old_template: np.ndarray, + new_template: np.ndarray, + alpha: float, +) -> np.ndarray: alpha = float(np.clip(alpha, 0.0, 1.0)) if len(old_template) != len(new_template): old_template = resample_template(old_template, len(new_template)) new_template = align_new_template_to_old(old_template, new_template) + merged = (1.0 - alpha) * old_template + alpha * new_template return merged.astype(float) +# ============================================================================= +# Phase Lock +# ============================================================================= + +def phase_lock_recent( + state: BaselineState, + ts_grid: np.ndarray, + ys_grid: np.ndarray, +) -> Tuple[int, int, np.ndarray, float]: + base_period = int(state.period) + base_origin = int(state.phase_origin_ts) + base_template = np.array(state.template, dtype=float) + + if base_period <= 1 or len(base_template) <= 1: + ts_recent = ts_grid[-DETECT_WINDOW_SECONDS:].astype(int).tolist() + pred = predict_with_state(state, ts_recent) + actual = ys_grid[-len(ts_recent):].astype(float) + mae = float(np.mean(np.abs(actual - pred))) if len(actual) else 0.0 + return base_period, base_origin, pred, mae + + window_seconds = max( + PHASE_LOCK_MIN_WINDOW_SECONDS, + min(PHASE_LOCK_MAX_WINDOW_SECONDS, int(base_period * 2)), + ) + + cutoff = ts_grid[-1] - window_seconds + mask = ts_grid >= cutoff + + ts_recent_arr = ts_grid[mask].astype(int) + actual = ys_grid[mask].astype(float) + + if len(ts_recent_arr) < max(10, DETECT_WINDOW_SECONDS): + ts_recent_arr = ts_grid[-DETECT_WINDOW_SECONDS:].astype(int) + actual = ys_grid[-DETECT_WINDOW_SECONDS:].astype(float) + + ts_recent = ts_recent_arr.tolist() + last_ts = int(ts_recent[-1]) + + p_min = max(int(MIN_PERIOD_SECONDS), int(round(base_period * (1.0 - PHASE_LOCK_PERIOD_SEARCH_RATIO)))) + p_max = min(int(MAX_PERIOD_SECONDS), int(round(base_period * (1.0 + PHASE_LOCK_PERIOD_SEARCH_RATIO)))) + + if p_max < p_min: + p_min = p_max = base_period + + best_period = base_period + best_origin = normalize_origin_near(base_origin, base_period, last_ts) + best_template = resample_template(base_template, best_period) + best_pred = predict_template_values(best_template, best_period, best_origin, ts_recent) + best_mae = float(np.mean(np.abs(actual - best_pred))) + + for period in range(p_min, p_max + 1, PHASE_LOCK_PERIOD_STEP): + template = resample_template(base_template, period) + center_origin = normalize_origin_near(base_origin, period, last_ts) + + origin_shift = max(2, int(round(period * PHASE_LOCK_ORIGIN_SEARCH_RATIO))) + + for shift in range(-origin_shift, origin_shift + 1, PHASE_LOCK_ORIGIN_STEP): + origin = center_origin + shift + + pred = predict_template_values( + template=template, + period=period, + phase_origin_ts=origin, + ts_list=ts_recent, + ) + + mae = float(np.mean(np.abs(actual - pred))) + + # 轻微惩罚周期变化,避免过拟合抖动 + penalty = abs(period - base_period) * 0.5 + score = mae + penalty + + best_score = best_mae + abs(best_period - base_period) * 0.5 + + if score < best_score: + best_period = period + best_origin = origin + best_pred = pred + best_mae = mae + + best_origin = normalize_origin_near(best_origin, best_period, last_ts) + + return int(best_period), int(best_origin), best_pred, float(best_mae) + + # ============================================================================= # 异常检测 # ============================================================================= -def calc_threshold(pred: np.ndarray, abs_threshold: float, rel_threshold: float) -> np.ndarray: +def calc_threshold( + pred: np.ndarray, + abs_threshold: float, + rel_threshold: float, +) -> np.ndarray: return np.maximum(abs_threshold, np.abs(pred) * rel_threshold) @@ -655,33 +783,8 @@ def calc_bounds( rel_threshold: float, ) -> Tuple[np.ndarray, np.ndarray]: threshold = calc_threshold(pred, abs_threshold, rel_threshold) - return pred - threshold, pred + threshold - - -def find_best_phase_origin_for_recent( - state: BaselineState, - ts_recent: List[int], - actual: np.ndarray, -) -> Tuple[int, np.ndarray, float]: - period = int(state.period) - base_origin = int(state.phase_origin_ts) - max_shift = max(1, int(round(period * PHASE_SEARCH_RATIO))) - - best_origin = base_origin - best_pred = predict_with_origin(state, ts_recent, base_origin) - best_mae = float(np.mean(np.abs(actual - best_pred))) - - for shift in range(-max_shift, max_shift + 1): - origin = base_origin + shift - pred = predict_with_origin(state, ts_recent, origin) - mae = float(np.mean(np.abs(actual - pred))) - if mae < best_mae: - best_mae = mae - best_origin = origin - best_pred = pred - - return best_origin, best_pred, best_mae + return pred - threshold, pred + threshold def detect_anomaly( @@ -690,36 +793,50 @@ def detect_anomaly( ys_grid: np.ndarray, abs_threshold: float, rel_threshold: float, -) -> Tuple[bool, float, float, float, int]: - if len(ys_grid) < DETECT_WINDOW_SECONDS: - return False, 0.0, 0.0, 0.0, int(state.phase_origin_ts) - - ts_recent = ts_grid[-DETECT_WINDOW_SECONDS:].astype(int).tolist() - actual = ys_grid[-DETECT_WINDOW_SECONDS:].astype(float) - - best_origin, pred, _ = find_best_phase_origin_for_recent( +) -> Tuple[bool, float, float, float, int, int]: + best_period, best_origin, pred_recent, _ = phase_lock_recent( state=state, - ts_recent=ts_recent, - actual=actual, + ts_grid=ts_grid, + ys_grid=ys_grid, ) - threshold = calc_threshold(pred, abs_threshold, rel_threshold) - abs_err = np.abs(actual - pred) + recent_len = len(pred_recent) + + if recent_len <= 0: + return False, 0.0, 0.0, 0.0, best_period, best_origin + + actual = ys_grid[-recent_len:].astype(float) + + threshold = calc_threshold(pred_recent, abs_threshold, rel_threshold) + + abs_err = np.abs(actual - pred_recent) outside = abs_err > threshold outside_ratio = float(np.mean(outside)) mean_abs_err = float(np.mean(abs_err)) - mean_rel_err = float(np.mean(abs_err / np.maximum(np.abs(pred), 1.0))) + mean_rel_err = float(np.mean(abs_err / np.maximum(np.abs(pred_recent), 1.0))) + is_anomaly = outside_ratio >= OUTSIDE_RATIO_THRESHOLD - return is_anomaly, outside_ratio, mean_abs_err, mean_rel_err, int(best_origin) + return ( + is_anomaly, + outside_ratio, + mean_abs_err, + mean_rel_err, + int(best_period), + int(best_origin), + ) # ============================================================================= # 健康基线状态管理 # ============================================================================= -def create_initial_state(ts_grid: np.ndarray, ys_grid: np.ndarray, now_sec: int) -> Optional[BaselineState]: +def create_initial_state( + ts_grid: np.ndarray, + ys_grid: np.ndarray, + now_sec: int, +) -> Optional[BaselineState]: baseline = build_current_baseline(ts_grid, ys_grid) if baseline is None: @@ -740,6 +857,26 @@ def create_initial_state(ts_grid: np.ndarray, ys_grid: np.ndarray, now_sec: int) ) +def apply_phase_lock_to_state( + state: BaselineState, + best_period: int, + best_origin: int, +) -> None: + best_period = int(best_period) + + if best_period <= 1: + return + + template = np.array(state.template, dtype=float) + + if len(template) != best_period: + template = resample_template(template, best_period) + + state.period = best_period + state.phase_origin_ts = int(best_origin) + state.template = template.astype(float).tolist() + + def maybe_update_state( key: str, ts_grid: np.ndarray, @@ -772,7 +909,14 @@ def maybe_update_state( elapsed = min(elapsed, POLL_INTERVAL * 2) state.last_seen_ts = now_sec - is_anom, outside_ratio, mean_abs_err, mean_rel_err, best_origin = detect_anomaly( + ( + is_anomaly, + outside_ratio, + mean_abs_err, + mean_rel_err, + best_period, + best_origin, + ) = detect_anomaly( state=state, ts_grid=ts_grid, ys_grid=ys_grid, @@ -780,9 +924,10 @@ def maybe_update_state( rel_threshold=rel_threshold, ) - if is_anom: + if is_anomaly: state.status = BASELINE_STATUS_ANOMALY state.clean_seconds = 0 + BASELINE_STATES[key] = state logger.warning( @@ -795,13 +940,17 @@ def maybe_update_state( return state, True, outside_ratio, mean_abs_err, mean_rel_err + old_period = int(state.period) old_origin = int(state.phase_origin_ts) - state.phase_origin_ts = int(best_origin) - if abs(state.phase_origin_ts - old_origin) >= 1: - logger.debug( - "相位校正 key=%s origin %s -> %s", + apply_phase_lock_to_state(state, best_period, best_origin) + + if old_period != state.period or old_origin != state.phase_origin_ts: + logger.info( + "phase-lock key=%s period %s -> %s origin %s -> %s", key, + old_period, + state.period, datetime.fromtimestamp(old_origin).strftime("%H:%M:%S"), datetime.fromtimestamp(state.phase_origin_ts).strftime("%H:%M:%S"), ) @@ -809,9 +958,15 @@ def maybe_update_state( if state.status == BASELINE_STATUS_ANOMALY: state.status = BASELINE_STATUS_RECOVERING state.clean_seconds = elapsed + BASELINE_STATES[key] = state - logger.info("异常开始恢复 key=%s clean_seconds=%ss", key, state.clean_seconds) + logger.info( + "异常开始恢复 key=%s clean_seconds=%ss", + key, + state.clean_seconds, + ) + return state, False, outside_ratio, mean_abs_err, mean_rel_err if state.status == BASELINE_STATUS_RECOVERING: @@ -834,17 +989,27 @@ def maybe_update_state( int(state.period) * MAX_CYCLES_FOR_TEMPLATE, ) - baseline = build_current_baseline(ts_grid=ts_grid, ys_grid=ys_grid, tail_seconds=tail_seconds) + baseline = build_current_baseline( + ts_grid=ts_grid, + ys_grid=ys_grid, + tail_seconds=tail_seconds, + ) if baseline is None: BASELINE_STATES[key] = state return state, False, outside_ratio, mean_abs_err, mean_rel_err new_period, new_origin, new_template = baseline + old_template = np.array(state.template, dtype=float) + alpha = RECOVERY_EMA_ALPHA if state.status == BASELINE_STATUS_RECOVERING else HEALTHY_EMA_ALPHA - merged = merge_template(old_template=old_template, new_template=new_template, alpha=alpha) + merged = merge_template( + old_template=old_template, + new_template=new_template, + alpha=alpha, + ) state.period = int(new_period) state.phase_origin_ts = int(new_origin) @@ -878,7 +1043,12 @@ def maybe_update_state( # ============================================================================= def prom_escape_label_value(value: str) -> str: - return str(value).replace("\\", "\\\\").replace("\n", "\\n").replace('"', '\\"') + return ( + str(value) + .replace("\\", "\\\\") + .replace("\n", "\\n") + .replace('"', '\\"') + ) def labels_to_str(labels: Dict[str, str]) -> str: @@ -926,7 +1096,9 @@ def write_series( resp = requests.post( f"{VM_URL}/api/v1/import/prometheus", data=payload.encode("utf-8"), - headers={"Content-Type": "text/plain; version=0.0.4; charset=utf-8"}, + headers={ + "Content-Type": "text/plain; version=0.0.4; charset=utf-8", + }, timeout=10, ) resp.raise_for_status() @@ -951,17 +1123,57 @@ def write_prediction_bundle( mean_rel_err: float, event_ts: int, ) -> bool: - ok1 = write_series(pred_metric, labels, ts_future, pred_values.astype(float).tolist()) - ok2 = write_series(f"{pred_metric}_lower", labels, ts_future, lower_values.astype(float).tolist()) - ok3 = write_series(f"{pred_metric}_upper", labels, ts_future, upper_values.astype(float).tolist()) + ok1 = write_series( + metric_name=pred_metric, + labels=labels, + ts_list=ts_future, + values=pred_values.astype(float).tolist(), + ) + + ok2 = write_series( + metric_name=f"{pred_metric}_lower", + labels=labels, + ts_list=ts_future, + values=lower_values.astype(float).tolist(), + ) + + ok3 = write_series( + metric_name=f"{pred_metric}_upper", + labels=labels, + ts_list=ts_future, + values=upper_values.astype(float).tolist(), + ) anomaly_labels = dict(labels) anomaly_labels["type"] = "prediction_deviation" - ok4 = write_series(anomaly_metric, anomaly_labels, [event_ts], [1.0 if is_anomaly else 0.0]) - ok5 = write_series(f"{anomaly_metric}_outside_ratio", anomaly_labels, [event_ts], [outside_ratio]) - ok6 = write_series(f"{anomaly_metric}_mean_abs_error", anomaly_labels, [event_ts], [mean_abs_err]) - ok7 = write_series(f"{anomaly_metric}_mean_rel_error", anomaly_labels, [event_ts], [mean_rel_err]) + ok4 = write_series( + metric_name=anomaly_metric, + labels=anomaly_labels, + ts_list=[event_ts], + values=[1.0 if is_anomaly else 0.0], + ) + + ok5 = write_series( + metric_name=f"{anomaly_metric}_outside_ratio", + labels=anomaly_labels, + ts_list=[event_ts], + values=[outside_ratio], + ) + + ok6 = write_series( + metric_name=f"{anomaly_metric}_mean_abs_error", + labels=anomaly_labels, + ts_list=[event_ts], + values=[mean_abs_err], + ) + + ok7 = write_series( + metric_name=f"{anomaly_metric}_mean_rel_error", + labels=anomaly_labels, + ts_list=[event_ts], + values=[mean_rel_err], + ) return ok1 and ok2 and ok3 and ok4 and ok5 and ok6 and ok7 @@ -970,7 +1182,9 @@ def write_prediction_bundle( # 标签解析 # ============================================================================= -_LABEL_PATTERN = re.compile(r'\s*([a-zA-Z_][a-zA-Z0-9_]*)\s*=\s*"((?:\\.|[^"])*)"\s*') +_LABEL_PATTERN = re.compile( + r'\s*([a-zA-Z_][a-zA-Z0-9_]*)\s*=\s*"((?:\\.|[^"])*)"\s*' +) def parse_labels_from_query(query: str) -> Dict[str, str]: @@ -987,7 +1201,14 @@ def parse_labels_from_query(query: str) -> Dict[str, str]: for match in _LABEL_PATTERN.finditer(label_part): key = match.group(1) value = match.group(2) - value = value.replace('\\"', '"').replace("\\n", "\n").replace("\\\\", "\\") + + value = ( + value + .replace('\\"', '"') + .replace("\\n", "\n") + .replace("\\\\", "\\") + ) + labels[key] = value return labels @@ -1042,7 +1263,12 @@ def load_state() -> None: states[key] = BaselineState(**value) BASELINE_STATES = states - logger.info("已加载预测状态文件 %s,状态数量=%d", STATE_FILE, len(BASELINE_STATES)) + + logger.info( + "已加载预测状态文件 %s,状态数量=%d", + STATE_FILE, + len(BASELINE_STATES), + ) except Exception as e: logger.warning("加载预测状态文件失败,将重新学习: %s", e) @@ -1069,10 +1295,14 @@ def save_state() -> None: # ============================================================================= -# 时间轴选择 +# 时间轴 # ============================================================================= -def build_prediction_timestamps(key: str, last_real_ts: int, now_sec: int) -> Optional[List[int]]: +def build_prediction_timestamps( + key: str, + last_real_ts: int, + now_sec: int, +) -> Optional[List[int]]: data_lag = now_sec - last_real_ts if data_lag > MAX_DATA_LAG_SECONDS: @@ -1086,24 +1316,24 @@ def build_prediction_timestamps(key: str, last_real_ts: int, now_sec: int) -> Op last_written_real_ts = LAST_REAL_TS_WRITTEN.get(key) - if last_written_real_ts is not None: - advance = last_real_ts - int(last_written_real_ts) - - if advance < MIN_REAL_ADVANCE_SECONDS: - logger.info( - "真实数据时间戳未推进,跳过重复写入 key=%s last_real_ts=%s last_written_real_ts=%s", - key, - last_real_ts, - last_written_real_ts, - ) - return None + if last_written_real_ts is not None and last_real_ts <= int(last_written_real_ts): + logger.info( + "真实数据时间戳未推进,跳过重复写入 key=%s last_real_ts=%s last_written_real_ts=%s", + key, + last_real_ts, + last_written_real_ts, + ) + return None if ALIGN_PREDICTION_TO_LAST_REAL_TS: base_ts = last_real_ts else: base_ts = now_sec - return [base_ts + i + 1 for i in range(WRITE_HORIZON_SECONDS)] + return [ + base_ts + i + 1 + for i in range(WRITE_HORIZON_SECONDS) + ] # ============================================================================= @@ -1123,17 +1353,28 @@ def run_once() -> None: ts, ys = fetch_history(query) if len(ys) < MIN_POINTS: - logger.info("[%s] %s 数据不足(%d 点),跳过", now_str, query, len(ys)) + logger.info( + "[%s] %s 数据不足(%d 点),跳过", + now_str, + query, + len(ys), + ) continue ts_grid, ys_grid = normalize_history(ts, ys) if len(ys_grid) < MIN_POINTS: - logger.info("[%s] %s 清洗后数据不足(%d 点),跳过", now_str, query, len(ys_grid)) + logger.info( + "[%s] %s 清洗后数据不足(%d 点),跳过", + now_str, + query, + len(ys_grid), + ) continue base_labels = parse_labels_from_query(query) write_labels = merge_labels(base_labels, EXTRA_PREDICT_LABELS) + key = series_key(pred_metric, write_labels) state, is_anomaly, outside_ratio, mean_abs_err, mean_rel_err = maybe_update_state( @@ -1145,7 +1386,11 @@ def run_once() -> None: ) if state is None: - logger.info("[%s] %s 暂无可用健康模板,等待学习", now_str, query) + logger.info( + "[%s] %s 暂无可用健康模板,等待学习", + now_str, + query, + ) continue now_sec = int(time.time()) @@ -1161,7 +1406,8 @@ def run_once() -> None: if not ts_future: continue - pred_values = predict_with_origin(state, ts_future) + pred_values = predict_with_state(state, ts_future) + lower_values, upper_values = calc_bounds( pred=pred_values, abs_threshold=abs_threshold, @@ -1184,7 +1430,11 @@ def run_once() -> None: ) if not ok: - logger.error("[%s] %s 写入预测数据失败", now_str, query) + logger.error( + "[%s] %s 写入预测数据失败", + now_str, + query, + ) continue LAST_REAL_TS_WRITTEN[key] = last_real_ts @@ -1234,4 +1484,4 @@ def main() -> None: if __name__ == "__main__": - main() + main() \ No newline at end of file From 79e9f9b080e3f5fc4a284ab1422d3a54aacaff0e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=BC=A0=E5=B0=91=E6=99=BA?= Date: Thu, 21 May 2026 21:23:40 +0800 Subject: [PATCH 26/55] feat(pridict_v4): update pridict v4 version --- ai/pridict_v4.py | 1604 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 1604 insertions(+) create mode 100644 ai/pridict_v4.py diff --git a/ai/pridict_v4.py b/ai/pridict_v4.py new file mode 100644 index 0000000..774ad3a --- /dev/null +++ b/ai/pridict_v4.py @@ -0,0 +1,1604 @@ +# -*- coding: utf-8 -*- +""" +ProtoForge Predictor v11 + +核心能力: +1. feed_rate / spindle_speed / spindle_current 使用 phase-lock 点预测。 +2. vibration_x / vibration_y / vibration_z 使用 phase-band 预测带。 +3. vibration 类指标不再追求单点完全贴合,而是输出: + - xxx_predicted 中位数预测线 + - xxx_predicted_upper 正常上边界 + - xxx_predicted_lower 正常下边界 +4. 预测起点锚定最后一个真实点 last_real_ts,避免时间错位。 +5. 异常期间冻结健康模板,不学习故障数据。 +6. 故障恢复后等待稳定,再恢复模板学习。 +""" + +import json +import logging +import math +import os +import re +import time +from dataclasses import asdict, dataclass +from datetime import datetime, timedelta +from typing import Dict, List, Optional, Tuple + +import numpy as np +import requests + + +# ============================================================================= +# 日志配置 +# ============================================================================= + +logging.basicConfig( + level=logging.INFO, + format="%(asctime)s [%(levelname)s] %(message)s", +) + +logger = logging.getLogger(__name__) + + +# ============================================================================= +# 基础配置 +# ============================================================================= + +VM_URL = "http://localhost:8428" +STATE_FILE = "/tmp/protoforge_predictor_state_v11.json" + +HISTORY_MINUTES = 30 +HORIZON_SECONDS = 120 +POLL_INTERVAL = 30 + +WRITE_HORIZON_SECONDS = min(HORIZON_SECONDS, POLL_INTERVAL) + +QUERY_STEP = "1s" +MIN_POINTS = 120 + +MIN_PERIOD_SECONDS = 5 +MAX_PERIOD_SECONDS = 3600 + +MIN_FULL_CYCLES_FOR_TEMPLATE = 3 +MAX_CYCLES_FOR_TEMPLATE = 8 + +DETECT_WINDOW_SECONDS = 20 +RECOVERY_MIN_SECONDS = 60 + +HEALTHY_EMA_ALPHA = 0.10 +RECOVERY_EMA_ALPHA = 0.25 + +OUTSIDE_RATIO_THRESHOLD = 0.60 + +VALLEY_QUANTILE = 45 + +MAX_DATA_LAG_SECONDS = 180 + +PHASE_LOCK_MIN_WINDOW_SECONDS = 45 +PHASE_LOCK_MAX_WINDOW_SECONDS = 180 +PHASE_LOCK_PERIOD_SEARCH_RATIO = 0.12 +PHASE_LOCK_ORIGIN_SEARCH_RATIO = 0.35 +PHASE_LOCK_PERIOD_STEP = 1 +PHASE_LOCK_ORIGIN_STEP = 1 + + +# ============================================================================= +# 指标配置 +# ============================================================================= + +PREDICT_TARGETS = [ + { + "query": 'feed_rate{device_id="fanuc-cnc"}', + "pred_metric": "feed_rate_predicted", + "anomaly_metric": "feed_rate_anomaly", + "strategy": "phase_point", + "abs_threshold": 400.0, + "rel_threshold": 0.25, + "smooth_window": 1, + }, + { + "query": 'spindle_speed{device_id="fanuc-cnc"}', + "pred_metric": "spindle_speed_predicted", + "anomaly_metric": "spindle_speed_anomaly", + "strategy": "phase_point", + "abs_threshold": 500.0, + "rel_threshold": 0.25, + "smooth_window": 1, + }, + { + "query": 'spindle_current{device_id="fanuc-cnc"}', + "pred_metric": "spindle_current_predicted", + "anomaly_metric": "spindle_current_anomaly", + "strategy": "phase_point", + "abs_threshold": 5.0, + "rel_threshold": 0.25, + "smooth_window": 1, + }, + { + "query": 'vibration_x{device_id="fanuc-cnc"}', + "pred_metric": "vibration_x_predicted", + "anomaly_metric": "vibration_x_anomaly", + "strategy": "phase_band", + "abs_threshold": 0.12, + "rel_threshold": 0.35, + "smooth_window": 5, + "band_low_q": 10, + "band_high_q": 90, + "band_pad_abs": 0.06, + }, + { + "query": 'vibration_y{device_id="fanuc-cnc"}', + "pred_metric": "vibration_y_predicted", + "anomaly_metric": "vibration_y_anomaly", + "strategy": "phase_band", + "abs_threshold": 0.12, + "rel_threshold": 0.35, + "smooth_window": 5, + "band_low_q": 10, + "band_high_q": 90, + "band_pad_abs": 0.06, + }, + { + "query": 'vibration_z{device_id="fanuc-cnc"}', + "pred_metric": "vibration_z_predicted", + "anomaly_metric": "vibration_z_anomaly", + "strategy": "phase_band", + "abs_threshold": 0.12, + "rel_threshold": 0.35, + "smooth_window": 5, + "band_low_q": 10, + "band_high_q": 90, + "band_pad_abs": 0.06, + }, +] + +EXTRA_PREDICT_LABELS = { + "forecast": "phase_band_health_v11", + "source": "protoforge", +} + +BASELINE_STATUS_HEALTHY = "healthy" +BASELINE_STATUS_ANOMALY = "anomaly" +BASELINE_STATUS_RECOVERING = "recovering" + + +# ============================================================================= +# 状态结构 +# ============================================================================= + +@dataclass +class BaselineState: + period: int + phase_origin_ts: int + template: List[float] + lower_template: List[float] + upper_template: List[float] + strategy: str + status: str + clean_seconds: int + last_update_ts: int + last_seen_ts: int + y_min: float + y_max: float + + +BASELINE_STATES: Dict[str, BaselineState] = {} +LAST_REAL_TS_WRITTEN: Dict[str, int] = {} + + +# ============================================================================= +# VictoriaMetrics 读取 +# ============================================================================= + +def fetch_history(query: str, minutes: int = HISTORY_MINUTES) -> Tuple[List[float], List[float]]: + now = datetime.now() + start = now - timedelta(minutes=minutes) + + try: + resp = requests.get( + f"{VM_URL}/api/v1/query_range", + params={ + "query": query, + "start": start.timestamp(), + "end": now.timestamp(), + "step": QUERY_STEP, + }, + timeout=10, + ) + resp.raise_for_status() + except requests.RequestException as e: + logger.error("拉取数据失败 query=%s: %s", query, e) + return [], [] + + try: + result = resp.json().get("data", {}).get("result", []) + except Exception as e: + logger.error("解析 VM 返回失败 query=%s: %s", query, e) + return [], [] + + if not result: + return [], [] + + values = result[0].get("values", []) + + ts = [] + ys = [] + + for item in values: + if len(item) < 2: + continue + + try: + t = float(item[0]) + y = float(item[1]) + except Exception: + continue + + if not math.isfinite(t) or not math.isfinite(y): + continue + + ts.append(t) + ys.append(y) + + return ts, ys + + +def normalize_history(ts: List[float], ys: List[float]) -> Tuple[np.ndarray, np.ndarray]: + if not ts or not ys or len(ts) != len(ys): + return np.array([]), np.array([]) + + data = {} + + for t, y in zip(ts, ys): + try: + sec = int(round(float(t))) + val = float(y) + except Exception: + continue + + if not math.isfinite(sec) or not math.isfinite(val): + continue + + data[sec] = val + + if not data: + return np.array([]), np.array([]) + + sorted_items = sorted(data.items(), key=lambda x: x[0]) + + ts_clean = np.array([x[0] for x in sorted_items], dtype=float) + ys_clean = np.array([x[1] for x in sorted_items], dtype=float) + + if len(ts_clean) < 2: + return ts_clean, ys_clean + + start_sec = int(ts_clean[0]) + end_sec = int(ts_clean[-1]) + + if end_sec <= start_sec: + return ts_clean, ys_clean + + ts_grid = np.arange(start_sec, end_sec + 1, 1, dtype=float) + ys_grid = np.interp(ts_grid, ts_clean, ys_clean) + + return ts_grid, ys_grid + + +# ============================================================================= +# 平滑与预处理 +# ============================================================================= + +def rolling_median(arr: np.ndarray, window: int) -> np.ndarray: + if window <= 1 or len(arr) < window: + return arr.astype(float) + + if window % 2 == 0: + window += 1 + + pad = window // 2 + padded = np.pad(arr.astype(float), (pad, pad), mode="edge") + + result = [] + + for i in range(len(arr)): + result.append(float(np.median(padded[i:i + window]))) + + return np.array(result, dtype=float) + + +def moving_average(arr: np.ndarray, window: int) -> np.ndarray: + if window <= 1 or len(arr) < window: + return arr.astype(float) + + if window % 2 == 0: + window += 1 + + kernel = np.ones(window, dtype=float) / window + pad = window // 2 + padded = np.pad(arr.astype(float), (pad, pad), mode="edge") + + return np.convolve(padded, kernel, mode="valid") + + +def preprocess_values(ys_grid: np.ndarray, target: Dict) -> np.ndarray: + strategy = target.get("strategy", "phase_point") + smooth_window = int(target.get("smooth_window", 1)) + + if strategy == "phase_band": + return rolling_median(ys_grid, smooth_window) + + if smooth_window > 1: + return moving_average(ys_grid, smooth_window) + + return ys_grid.astype(float) + + +# ============================================================================= +# 周期估计 +# ============================================================================= + +def estimate_period_by_fft(ys_arr: np.ndarray) -> float: + n = len(ys_arr) + + if n < 8: + return 60.0 + + centered = ys_arr - np.mean(ys_arr) + + if np.allclose(centered, 0): + return 60.0 + + fft_vals = np.fft.rfft(centered) + freqs = np.fft.rfftfreq(n, d=1.0) + + if len(freqs) <= 1: + return 60.0 + + power = np.abs(fft_vals[1:]) + + if len(power) == 0 or np.max(power) <= 0: + return 60.0 + + dominant_idx = int(np.argmax(power)) + 1 + dominant_freq = float(freqs[dominant_idx]) + + if dominant_freq <= 0: + return 60.0 + + period = 1.0 / dominant_freq + + return float(np.clip(period, MIN_PERIOD_SECONDS, MAX_PERIOD_SECONDS)) + + +def refine_period_by_autocorr(ys_arr: np.ndarray, init_period: float) -> float: + n = len(ys_arr) + + if n < 20: + return float(np.clip(init_period, MIN_PERIOD_SECONDS, MAX_PERIOD_SECONDS)) + + centered = ys_arr - np.mean(ys_arr) + + if np.allclose(centered, 0): + return float(np.clip(init_period, MIN_PERIOD_SECONDS, MAX_PERIOD_SECONDS)) + + corr = np.correlate(centered, centered, mode="full")[n - 1:] + + p0 = int(round(init_period)) + left = max(int(MIN_PERIOD_SECONDS), int(max(2, p0 * 0.7))) + right = min(n // 2, int(max(left + 1, p0 * 1.3))) + + if right <= left: + return float(np.clip(init_period, MIN_PERIOD_SECONDS, MAX_PERIOD_SECONDS)) + + search = corr[left:right + 1] + + if len(search) == 0: + return float(np.clip(init_period, MIN_PERIOD_SECONDS, MAX_PERIOD_SECONDS)) + + best_lag = left + int(np.argmax(search)) + + return float(np.clip(best_lag, MIN_PERIOD_SECONDS, MAX_PERIOD_SECONDS)) + + +def estimate_period_rough(ys_arr: np.ndarray) -> int: + p_fft = estimate_period_by_fft(ys_arr) + p_refined = refine_period_by_autocorr(ys_arr, p_fft) + + period = int(round(p_refined)) + period = max(int(MIN_PERIOD_SECONDS), min(int(MAX_PERIOD_SECONDS), period)) + + return int(period) + + +# ============================================================================= +# 谷底检测 +# ============================================================================= + +def find_valley_indices( + ts_grid: np.ndarray, + ys_grid: np.ndarray, + expected_period: int, +) -> List[int]: + n = len(ys_grid) + + if n < max(10, expected_period * 2): + return [] + + period = max(3, int(expected_period)) + smooth_window = max(3, int(round(period * 0.08))) + smooth_window = min(smooth_window, 21) + + ys_smooth = moving_average(ys_grid, smooth_window) + threshold = float(np.percentile(ys_smooth, VALLEY_QUANTILE)) + + candidates = [] + + for i in range(1, n - 1): + if ( + ys_smooth[i] <= ys_smooth[i - 1] + and ys_smooth[i] < ys_smooth[i + 1] + and ys_smooth[i] <= threshold + ): + candidates.append(i) + + if len(candidates) < MIN_FULL_CYCLES_FOR_TEMPLATE: + candidates = [] + + for i in range(1, n - 1): + if ys_smooth[i] <= ys_smooth[i - 1] and ys_smooth[i] < ys_smooth[i + 1]: + candidates.append(i) + + if not candidates: + return [] + + min_distance = max(2, int(round(period * 0.55))) + selected = [] + + for idx in candidates: + if not selected: + selected.append(idx) + continue + + if idx - selected[-1] >= min_distance: + selected.append(idx) + continue + + if ys_smooth[idx] < ys_smooth[selected[-1]]: + selected[-1] = idx + + if len(selected) < 2: + return selected + + cleaned = [selected[0]] + + for idx in selected[1:]: + diff = int(ts_grid[idx] - ts_grid[cleaned[-1]]) + + if int(period * 0.55) <= diff <= int(period * 1.60): + cleaned.append(idx) + continue + + if diff < int(period * 0.55): + if ys_smooth[idx] < ys_smooth[cleaned[-1]]: + cleaned[-1] = idx + continue + + cleaned.append(idx) + + return cleaned + + +def detect_period_and_valleys( + ts_grid: np.ndarray, + ys_grid: np.ndarray, +) -> Tuple[int, List[int]]: + rough = estimate_period_rough(ys_grid) + valleys = find_valley_indices(ts_grid, ys_grid, rough) + + if len(valleys) >= 3: + diffs = np.diff(ts_grid[valleys]) + good = diffs[(diffs >= rough * 0.55) & (diffs <= rough * 1.60)] + + if len(good) > 0: + period = int(round(float(np.median(good)))) + else: + period = rough + else: + period = rough + + period = max(int(MIN_PERIOD_SECONDS), min(int(MAX_PERIOD_SECONDS), period)) + + return int(period), valleys + + +# ============================================================================= +# 模板构建 +# ============================================================================= + +def build_templates_from_valleys( + ts_grid: np.ndarray, + ys_grid: np.ndarray, + period: int, + valleys: List[int], + target: Dict, +) -> Optional[Tuple[np.ndarray, np.ndarray, np.ndarray]]: + if period <= 1 or len(valleys) < MIN_FULL_CYCLES_FOR_TEMPLATE + 1: + return None + + strategy = target.get("strategy", "phase_point") + low_q = float(target.get("band_low_q", 10)) + high_q = float(target.get("band_high_q", 90)) + + pairs = [] + + for a, b in zip(valleys[:-1], valleys[1:]): + cycle_len = float(ts_grid[b] - ts_grid[a]) + + if period * 0.55 <= cycle_len <= period * 1.60: + pairs.append((a, b, cycle_len)) + + if len(pairs) < MIN_FULL_CYCLES_FOR_TEMPLATE: + return None + + pairs = pairs[-MAX_CYCLES_FOR_TEMPLATE:] + + phase_grid = np.arange(period, dtype=float) + segments = [] + weights = [] + + for idx, (a, b, cycle_len) in enumerate(pairs): + seg_ts = ts_grid[a:b + 1] + seg_y = ys_grid[a:b + 1] + + if len(seg_y) < 3: + continue + + x_old = (seg_ts - seg_ts[0]) / cycle_len * period + seg = np.interp(phase_grid, x_old, seg_y) + + segments.append(seg.astype(float)) + weights.append(0.5 + 0.5 * ((idx + 1) / len(pairs))) + + if len(segments) < MIN_FULL_CYCLES_FOR_TEMPLATE: + return None + + arr = np.vstack(segments) + w_arr = np.array(weights, dtype=float) + + if strategy == "phase_band": + mid_template = np.percentile(arr, 50, axis=0) + lower_template = np.percentile(arr, low_q, axis=0) + upper_template = np.percentile(arr, high_q, axis=0) + else: + mid_template = np.average(arr, axis=0, weights=w_arr) + lower_template = mid_template.copy() + upper_template = mid_template.copy() + + return ( + mid_template.astype(float), + lower_template.astype(float), + upper_template.astype(float), + ) + + +def build_current_baseline( + ts_grid: np.ndarray, + ys_grid: np.ndarray, + target: Dict, + tail_seconds: Optional[int] = None, +) -> Optional[Tuple[int, int, np.ndarray, np.ndarray, np.ndarray]]: + if len(ys_grid) < MIN_POINTS: + return None + + if tail_seconds is not None and tail_seconds > 0: + cutoff = ts_grid[-1] - int(tail_seconds) + mask = ts_grid >= cutoff + ts_use = ts_grid[mask] + ys_use = ys_grid[mask] + else: + ts_use = ts_grid + ys_use = ys_grid + + if len(ys_use) < MIN_POINTS: + return None + + period, valleys = detect_period_and_valleys(ts_use, ys_use) + + templates = build_templates_from_valleys( + ts_grid=ts_use, + ys_grid=ys_use, + period=period, + valleys=valleys, + target=target, + ) + + if templates is None or len(valleys) == 0: + return None + + template, lower_template, upper_template = templates + phase_origin_ts = int(round(float(ts_use[valleys[-1]]))) + + return int(period), phase_origin_ts, template, lower_template, upper_template + + +# ============================================================================= +# 模板预测 +# ============================================================================= + +def circular_template_value(template: np.ndarray, phase: float) -> float: + period = len(template) + + if period == 0: + return 0.0 + + phase = float(phase) % period + i0 = int(math.floor(phase)) % period + i1 = (i0 + 1) % period + frac = phase - math.floor(phase) + + return float((1.0 - frac) * template[i0] + frac * template[i1]) + + +def resample_template(old_template: np.ndarray, new_period: int) -> np.ndarray: + old_period = len(old_template) + + if old_period == new_period: + return old_template.astype(float) + + if old_period <= 1 or new_period <= 1: + return np.full(new_period, float(np.mean(old_template)), dtype=float) + + old_x = np.linspace(0.0, 1.0, old_period, endpoint=False) + new_x = np.linspace(0.0, 1.0, new_period, endpoint=False) + + old_x_ext = np.concatenate([old_x - 1.0, old_x, old_x + 1.0]) + old_y_ext = np.concatenate([old_template, old_template, old_template]) + + return np.interp(new_x, old_x_ext, old_y_ext).astype(float) + + +def predict_template_values( + template: np.ndarray, + period: int, + phase_origin_ts: int, + ts_list: List[int], +) -> np.ndarray: + if period <= 1: + return np.zeros(len(ts_list), dtype=float) + + if len(template) != period: + template = resample_template(template, period) + + values = [] + + for ts in ts_list: + phase = (int(ts) - int(phase_origin_ts)) % period + values.append(circular_template_value(template, phase)) + + return np.array(values, dtype=float) + + +def predict_state_bundle( + state: BaselineState, + ts_list: List[int], +) -> Tuple[np.ndarray, np.ndarray, np.ndarray]: + period = int(state.period) + origin = int(state.phase_origin_ts) + + mid = predict_template_values( + template=np.array(state.template, dtype=float), + period=period, + phase_origin_ts=origin, + ts_list=ts_list, + ) + + lower = predict_template_values( + template=np.array(state.lower_template, dtype=float), + period=period, + phase_origin_ts=origin, + ts_list=ts_list, + ) + + upper = predict_template_values( + template=np.array(state.upper_template, dtype=float), + period=period, + phase_origin_ts=origin, + ts_list=ts_list, + ) + + return mid, lower, upper + + +def normalize_origin_near(origin: int, period: int, near_ts: int) -> int: + if period <= 1: + return origin + + origin = int(origin) + period = int(period) + near_ts = int(near_ts) + + while origin + period <= near_ts: + origin += period + + while origin > near_ts: + origin -= period + + return origin + + +def merge_template( + old_template: np.ndarray, + new_template: np.ndarray, + alpha: float, +) -> np.ndarray: + alpha = float(np.clip(alpha, 0.0, 1.0)) + + if len(old_template) != len(new_template): + old_template = resample_template(old_template, len(new_template)) + + merged = (1.0 - alpha) * old_template + alpha * new_template + + return merged.astype(float) + + +# ============================================================================= +# Phase Lock +# ============================================================================= + +def phase_lock_recent( + state: BaselineState, + ts_grid: np.ndarray, + ys_model: np.ndarray, +) -> Tuple[int, int, np.ndarray, float]: + base_period = int(state.period) + base_origin = int(state.phase_origin_ts) + base_template = np.array(state.template, dtype=float) + + if base_period <= 1 or len(base_template) <= 1: + ts_recent = ts_grid[-DETECT_WINDOW_SECONDS:].astype(int).tolist() + pred = predict_template_values(base_template, base_period, base_origin, ts_recent) + actual = ys_model[-len(ts_recent):].astype(float) + mae = float(np.mean(np.abs(actual - pred))) if len(actual) else 0.0 + return base_period, base_origin, pred, mae + + window_seconds = max( + PHASE_LOCK_MIN_WINDOW_SECONDS, + min(PHASE_LOCK_MAX_WINDOW_SECONDS, int(base_period * 2)), + ) + + cutoff = ts_grid[-1] - window_seconds + mask = ts_grid >= cutoff + + ts_recent_arr = ts_grid[mask].astype(int) + actual = ys_model[mask].astype(float) + + if len(ts_recent_arr) < max(10, DETECT_WINDOW_SECONDS): + ts_recent_arr = ts_grid[-DETECT_WINDOW_SECONDS:].astype(int) + actual = ys_model[-DETECT_WINDOW_SECONDS:].astype(float) + + ts_recent = ts_recent_arr.tolist() + last_ts = int(ts_recent[-1]) + + p_min = max( + int(MIN_PERIOD_SECONDS), + int(round(base_period * (1.0 - PHASE_LOCK_PERIOD_SEARCH_RATIO))), + ) + p_max = min( + int(MAX_PERIOD_SECONDS), + int(round(base_period * (1.0 + PHASE_LOCK_PERIOD_SEARCH_RATIO))), + ) + + best_period = base_period + best_origin = normalize_origin_near(base_origin, base_period, last_ts) + best_template = resample_template(base_template, best_period) + + best_pred = predict_template_values( + template=best_template, + period=best_period, + phase_origin_ts=best_origin, + ts_list=ts_recent, + ) + + best_mae = float(np.mean(np.abs(actual - best_pred))) + + for period in range(p_min, p_max + 1, PHASE_LOCK_PERIOD_STEP): + template = resample_template(base_template, period) + center_origin = normalize_origin_near(base_origin, period, last_ts) + origin_shift = max(2, int(round(period * PHASE_LOCK_ORIGIN_SEARCH_RATIO))) + + for shift in range(-origin_shift, origin_shift + 1, PHASE_LOCK_ORIGIN_STEP): + origin = center_origin + shift + + pred = predict_template_values( + template=template, + period=period, + phase_origin_ts=origin, + ts_list=ts_recent, + ) + + mae = float(np.mean(np.abs(actual - pred))) + penalty = abs(period - base_period) * 0.5 + score = mae + penalty + + best_score = best_mae + abs(best_period - base_period) * 0.5 + + if score < best_score: + best_period = period + best_origin = origin + best_pred = pred + best_mae = mae + + best_origin = normalize_origin_near(best_origin, best_period, last_ts) + + return int(best_period), int(best_origin), best_pred, float(best_mae) + + +# ============================================================================= +# 异常检测 +# ============================================================================= + +def calc_point_bounds( + pred: np.ndarray, + abs_threshold: float, + rel_threshold: float, +) -> Tuple[np.ndarray, np.ndarray]: + threshold = np.maximum(abs_threshold, np.abs(pred) * rel_threshold) + return pred - threshold, pred + threshold + + +def calc_final_bounds( + state: BaselineState, + pred: np.ndarray, + lower_raw: np.ndarray, + upper_raw: np.ndarray, + target: Dict, +) -> Tuple[np.ndarray, np.ndarray]: + strategy = target.get("strategy", "phase_point") + abs_threshold = float(target.get("abs_threshold", 1.0)) + rel_threshold = float(target.get("rel_threshold", 0.25)) + + if strategy == "phase_band": + pad_abs = float(target.get("band_pad_abs", abs_threshold)) + dynamic_pad = np.maximum(pad_abs, np.abs(pred) * rel_threshold * 0.20) + lower = lower_raw - dynamic_pad + upper = upper_raw + dynamic_pad + return lower, upper + + return calc_point_bounds(pred, abs_threshold, rel_threshold) + + +def detect_anomaly( + state: BaselineState, + ts_grid: np.ndarray, + ys_model: np.ndarray, + target: Dict, +) -> Tuple[bool, float, float, float, int, int]: + best_period, best_origin, pred_recent, _ = phase_lock_recent( + state=state, + ts_grid=ts_grid, + ys_model=ys_model, + ) + + recent_len = len(pred_recent) + + if recent_len <= 0: + return False, 0.0, 0.0, 0.0, best_period, best_origin + + actual = ys_model[-recent_len:].astype(float) + + tmp_state = BaselineState( + period=best_period, + phase_origin_ts=best_origin, + template=state.template, + lower_template=state.lower_template, + upper_template=state.upper_template, + strategy=state.strategy, + status=state.status, + clean_seconds=state.clean_seconds, + last_update_ts=state.last_update_ts, + last_seen_ts=state.last_seen_ts, + y_min=state.y_min, + y_max=state.y_max, + ) + + recent_ts = ts_grid[-recent_len:].astype(int).tolist() + pred, lower_raw, upper_raw = predict_state_bundle(tmp_state, recent_ts) + + lower, upper = calc_final_bounds( + state=tmp_state, + pred=pred, + lower_raw=lower_raw, + upper_raw=upper_raw, + target=target, + ) + + outside = (actual < lower) | (actual > upper) + abs_err = np.abs(actual - pred) + + outside_ratio = float(np.mean(outside)) + mean_abs_err = float(np.mean(abs_err)) + mean_rel_err = float(np.mean(abs_err / np.maximum(np.abs(pred), 1e-6))) + + is_anomaly = outside_ratio >= OUTSIDE_RATIO_THRESHOLD + + return ( + is_anomaly, + outside_ratio, + mean_abs_err, + mean_rel_err, + int(best_period), + int(best_origin), + ) + + +# ============================================================================= +# 状态管理 +# ============================================================================= + +def create_initial_state( + ts_grid: np.ndarray, + ys_model: np.ndarray, + target: Dict, + now_sec: int, +) -> Optional[BaselineState]: + baseline = build_current_baseline( + ts_grid=ts_grid, + ys_grid=ys_model, + target=target, + ) + + if baseline is None: + return None + + period, phase_origin_ts, template, lower_template, upper_template = baseline + + return BaselineState( + period=int(period), + phase_origin_ts=int(phase_origin_ts), + template=template.astype(float).tolist(), + lower_template=lower_template.astype(float).tolist(), + upper_template=upper_template.astype(float).tolist(), + strategy=str(target.get("strategy", "phase_point")), + status=BASELINE_STATUS_HEALTHY, + clean_seconds=int(period * MAX_CYCLES_FOR_TEMPLATE), + last_update_ts=now_sec, + last_seen_ts=now_sec, + y_min=float(np.min(ys_model)), + y_max=float(np.max(ys_model)), + ) + + +def apply_phase_lock_to_state( + state: BaselineState, + best_period: int, + best_origin: int, +) -> None: + best_period = int(best_period) + + if best_period <= 1: + return + + if len(state.template) != best_period: + state.template = resample_template( + np.array(state.template, dtype=float), + best_period, + ).astype(float).tolist() + + if len(state.lower_template) != best_period: + state.lower_template = resample_template( + np.array(state.lower_template, dtype=float), + best_period, + ).astype(float).tolist() + + if len(state.upper_template) != best_period: + state.upper_template = resample_template( + np.array(state.upper_template, dtype=float), + best_period, + ).astype(float).tolist() + + state.period = best_period + state.phase_origin_ts = int(best_origin) + + +def maybe_update_state( + key: str, + ts_grid: np.ndarray, + ys_model: np.ndarray, + target: Dict, +) -> Tuple[Optional[BaselineState], bool, float, float, float]: + now_sec = int(time.time()) + state = BASELINE_STATES.get(key) + + if state is None: + state = create_initial_state( + ts_grid=ts_grid, + ys_model=ys_model, + target=target, + now_sec=now_sec, + ) + + if state is None: + return None, False, 0.0, 0.0, 0.0 + + BASELINE_STATES[key] = state + + logger.info( + "初始化健康模板 key=%s strategy=%s period=%ss origin=%s clean=%ss", + key, + state.strategy, + state.period, + datetime.fromtimestamp(state.phase_origin_ts).strftime("%H:%M:%S"), + state.clean_seconds, + ) + + return state, False, 0.0, 0.0, 0.0 + + elapsed = max(1, now_sec - int(state.last_seen_ts)) + elapsed = min(elapsed, POLL_INTERVAL * 2) + state.last_seen_ts = now_sec + + ( + is_anomaly, + outside_ratio, + mean_abs_err, + mean_rel_err, + best_period, + best_origin, + ) = detect_anomaly( + state=state, + ts_grid=ts_grid, + ys_model=ys_model, + target=target, + ) + + if is_anomaly: + state.status = BASELINE_STATUS_ANOMALY + state.clean_seconds = 0 + BASELINE_STATES[key] = state + + logger.warning( + "检测到异常,冻结模板 key=%s outside_ratio=%.2f mean_abs_err=%.4f mean_rel_err=%.4f", + key, + outside_ratio, + mean_abs_err, + mean_rel_err, + ) + + return state, True, outside_ratio, mean_abs_err, mean_rel_err + + old_period = int(state.period) + old_origin = int(state.phase_origin_ts) + + apply_phase_lock_to_state(state, best_period, best_origin) + + if old_period != state.period or old_origin != state.phase_origin_ts: + logger.info( + "phase-lock key=%s period %s -> %s origin %s -> %s", + key, + old_period, + state.period, + datetime.fromtimestamp(old_origin).strftime("%H:%M:%S"), + datetime.fromtimestamp(state.phase_origin_ts).strftime("%H:%M:%S"), + ) + + if state.status == BASELINE_STATUS_ANOMALY: + state.status = BASELINE_STATUS_RECOVERING + state.clean_seconds = elapsed + BASELINE_STATES[key] = state + + logger.info( + "异常开始恢复 key=%s clean_seconds=%ss", + key, + state.clean_seconds, + ) + + return state, False, outside_ratio, mean_abs_err, mean_rel_err + + if state.status == BASELINE_STATUS_RECOVERING: + state.clean_seconds += elapsed + else: + state.status = BASELINE_STATUS_HEALTHY + state.clean_seconds += elapsed + + min_clean_for_update = max( + RECOVERY_MIN_SECONDS, + int(state.period) * MIN_FULL_CYCLES_FOR_TEMPLATE, + ) + + if state.clean_seconds < min_clean_for_update: + BASELINE_STATES[key] = state + return state, False, outside_ratio, mean_abs_err, mean_rel_err + + tail_seconds = min( + int(state.clean_seconds), + int(state.period) * MAX_CYCLES_FOR_TEMPLATE, + ) + + baseline = build_current_baseline( + ts_grid=ts_grid, + ys_grid=ys_model, + target=target, + tail_seconds=tail_seconds, + ) + + if baseline is None: + BASELINE_STATES[key] = state + return state, False, outside_ratio, mean_abs_err, mean_rel_err + + new_period, new_origin, new_template, new_lower_template, new_upper_template = baseline + + alpha = RECOVERY_EMA_ALPHA if state.status == BASELINE_STATUS_RECOVERING else HEALTHY_EMA_ALPHA + + state.template = merge_template( + np.array(state.template, dtype=float), + new_template, + alpha, + ).astype(float).tolist() + + state.lower_template = merge_template( + np.array(state.lower_template, dtype=float), + new_lower_template, + alpha, + ).astype(float).tolist() + + state.upper_template = merge_template( + np.array(state.upper_template, dtype=float), + new_upper_template, + alpha, + ).astype(float).tolist() + + state.period = int(new_period) + state.phase_origin_ts = int(new_origin) + state.status = BASELINE_STATUS_HEALTHY + state.last_update_ts = now_sec + + if tail_seconds > 0 and len(ys_model) >= tail_seconds: + state.y_min = float(np.min(ys_model[-tail_seconds:])) + state.y_max = float(np.max(ys_model[-tail_seconds:])) + else: + state.y_min = float(np.min(ys_model)) + state.y_max = float(np.max(ys_model)) + + BASELINE_STATES[key] = state + + logger.info( + "更新健康模板 key=%s strategy=%s period=%ss origin=%s clean=%ss alpha=%.2f", + key, + state.strategy, + state.period, + datetime.fromtimestamp(state.phase_origin_ts).strftime("%H:%M:%S"), + state.clean_seconds, + alpha, + ) + + return state, False, outside_ratio, mean_abs_err, mean_rel_err + + +# ============================================================================= +# Prometheus 写入 +# ============================================================================= + +def prom_escape_label_value(value: str) -> str: + return ( + str(value) + .replace("\\", "\\\\") + .replace("\n", "\\n") + .replace('"', '\\"') + ) + + +def labels_to_str(labels: Dict[str, str]) -> str: + if not labels: + return "" + + parts = [] + + for k in sorted(labels.keys()): + parts.append(f'{k}="{prom_escape_label_value(labels[k])}"') + + return "{" + ",".join(parts) + "}" + + +def write_series( + metric_name: str, + labels: Dict[str, str], + ts_list: List[int], + values: List[float], +) -> bool: + if not ts_list or not values or len(ts_list) != len(values): + return False + + label_str = labels_to_str(labels) + lines = [] + + for t, y in zip(ts_list, values): + try: + ts_sec = int(round(float(t))) + val = float(y) + except Exception: + continue + + if not math.isfinite(ts_sec) or not math.isfinite(val): + continue + + lines.append(f"{metric_name}{label_str} {val:.6f} {ts_sec * 1000}") + + if not lines: + return False + + payload = "\n".join(lines) + "\n" + + try: + resp = requests.post( + f"{VM_URL}/api/v1/import/prometheus", + data=payload.encode("utf-8"), + headers={"Content-Type": "text/plain; version=0.0.4; charset=utf-8"}, + timeout=10, + ) + resp.raise_for_status() + return True + + except requests.RequestException as e: + logger.error("写入数据失败 metric=%s: %s", metric_name, e) + return False + + +def write_prediction_bundle( + pred_metric: str, + anomaly_metric: str, + labels: Dict[str, str], + ts_future: List[int], + pred_values: np.ndarray, + lower_values: np.ndarray, + upper_values: np.ndarray, + is_anomaly: bool, + outside_ratio: float, + mean_abs_err: float, + mean_rel_err: float, + event_ts: int, +) -> bool: + ok1 = write_series( + metric_name=pred_metric, + labels=labels, + ts_list=ts_future, + values=pred_values.astype(float).tolist(), + ) + + ok2 = write_series( + metric_name=f"{pred_metric}_lower", + labels=labels, + ts_list=ts_future, + values=lower_values.astype(float).tolist(), + ) + + ok3 = write_series( + metric_name=f"{pred_metric}_upper", + labels=labels, + ts_list=ts_future, + values=upper_values.astype(float).tolist(), + ) + + anomaly_labels = dict(labels) + anomaly_labels["type"] = "prediction_deviation" + + ok4 = write_series( + metric_name=anomaly_metric, + labels=anomaly_labels, + ts_list=[event_ts], + values=[1.0 if is_anomaly else 0.0], + ) + + ok5 = write_series( + metric_name=f"{anomaly_metric}_outside_ratio", + labels=anomaly_labels, + ts_list=[event_ts], + values=[outside_ratio], + ) + + ok6 = write_series( + metric_name=f"{anomaly_metric}_mean_abs_error", + labels=anomaly_labels, + ts_list=[event_ts], + values=[mean_abs_err], + ) + + ok7 = write_series( + metric_name=f"{anomaly_metric}_mean_rel_error", + labels=anomaly_labels, + ts_list=[event_ts], + values=[mean_rel_err], + ) + + return ok1 and ok2 and ok3 and ok4 and ok5 and ok6 and ok7 + + +# ============================================================================= +# 标签解析 +# ============================================================================= + +_LABEL_PATTERN = re.compile( + r'\s*([a-zA-Z_][a-zA-Z0-9_]*)\s*=\s*"((?:\\.|[^"])*)"\s*' +) + + +def parse_labels_from_query(query: str) -> Dict[str, str]: + labels = {} + + if "{" not in query or "}" not in query: + return labels + + try: + label_part = query[query.index("{") + 1:query.rindex("}")] + except Exception: + return labels + + for match in _LABEL_PATTERN.finditer(label_part): + key = match.group(1) + value = match.group(2) + + value = ( + value + .replace('\\"', '"') + .replace("\\n", "\n") + .replace("\\\\", "\\") + ) + + labels[key] = value + + return labels + + +def merge_labels(*dicts: Dict[str, str]) -> Dict[str, str]: + result = {} + + for d in dicts: + if d: + result.update(d) + + return result + + +def series_key(metric_name: str, labels: Dict[str, str]) -> str: + return metric_name + labels_to_str(labels) + + +# ============================================================================= +# 状态持久化 +# ============================================================================= + +def load_state() -> None: + global BASELINE_STATES + + if not os.path.exists(STATE_FILE): + return + + try: + with open(STATE_FILE, "r", encoding="utf-8") as f: + raw = json.load(f) + + states = {} + + for key, value in raw.get("baseline_states", {}).items(): + required_fields = { + "period", + "phase_origin_ts", + "template", + "lower_template", + "upper_template", + "strategy", + "status", + "clean_seconds", + "last_update_ts", + "last_seen_ts", + "y_min", + "y_max", + } + + if not required_fields.issubset(set(value.keys())): + continue + + states[key] = BaselineState(**value) + + BASELINE_STATES = states + + logger.info( + "已加载预测状态文件 %s,状态数量=%d", + STATE_FILE, + len(BASELINE_STATES), + ) + + except Exception as e: + logger.warning("加载预测状态文件失败,将重新学习: %s", e) + + +def save_state() -> None: + try: + raw = { + "baseline_states": { + key: asdict(value) + for key, value in BASELINE_STATES.items() + } + } + + tmp_file = STATE_FILE + ".tmp" + + with open(tmp_file, "w", encoding="utf-8") as f: + json.dump(raw, f, ensure_ascii=False, indent=2) + + os.replace(tmp_file, STATE_FILE) + + except Exception as e: + logger.warning("保存预测状态文件失败: %s", e) + + +# ============================================================================= +# 时间轴 +# ============================================================================= + +def build_prediction_timestamps( + key: str, + last_real_ts: int, + now_sec: int, +) -> Optional[List[int]]: + data_lag = now_sec - last_real_ts + + if data_lag > MAX_DATA_LAG_SECONDS: + logger.warning( + "真实数据延迟过大,跳过预测 key=%s data_lag=%ss max=%ss", + key, + data_lag, + MAX_DATA_LAG_SECONDS, + ) + return None + + last_written_real_ts = LAST_REAL_TS_WRITTEN.get(key) + + if last_written_real_ts is not None and last_real_ts <= int(last_written_real_ts): + logger.info( + "真实数据时间戳未推进,跳过重复写入 key=%s last_real_ts=%s last_written_real_ts=%s", + key, + last_real_ts, + last_written_real_ts, + ) + return None + + base_ts = last_real_ts + + return [ + base_ts + i + 1 + for i in range(WRITE_HORIZON_SECONDS) + ] + + +# ============================================================================= +# 主流程 +# ============================================================================= + +def run_once() -> None: + now_str = datetime.now().strftime("%H:%M:%S") + + for target in PREDICT_TARGETS: + query = target["query"] + pred_metric = target["pred_metric"] + anomaly_metric = target["anomaly_metric"] + + ts, ys = fetch_history(query) + + if len(ys) < MIN_POINTS: + logger.info("[%s] %s 数据不足(%d 点),跳过", now_str, query, len(ys)) + continue + + ts_grid, ys_grid_raw = normalize_history(ts, ys) + + if len(ys_grid_raw) < MIN_POINTS: + logger.info("[%s] %s 清洗后数据不足(%d 点),跳过", now_str, query, len(ys_grid_raw)) + continue + + ys_grid_model = preprocess_values(ys_grid_raw, target) + + base_labels = parse_labels_from_query(query) + write_labels = merge_labels(base_labels, EXTRA_PREDICT_LABELS) + + key = series_key(pred_metric, write_labels) + + state, is_anomaly, outside_ratio, mean_abs_err, mean_rel_err = maybe_update_state( + key=key, + ts_grid=ts_grid, + ys_model=ys_grid_model, + target=target, + ) + + if state is None: + logger.info("[%s] %s 暂无可用健康模板,等待学习", now_str, query) + continue + + now_sec = int(time.time()) + last_real_ts = int(ts_grid[-1]) + data_lag = now_sec - last_real_ts + + ts_future = build_prediction_timestamps( + key=key, + last_real_ts=last_real_ts, + now_sec=now_sec, + ) + + if not ts_future: + continue + + pred_values, lower_raw, upper_raw = predict_state_bundle(state, ts_future) + + lower_values, upper_values = calc_final_bounds( + state=state, + pred=pred_values, + lower_raw=lower_raw, + upper_raw=upper_raw, + target=target, + ) + + ok = write_prediction_bundle( + pred_metric=pred_metric, + anomaly_metric=anomaly_metric, + labels=write_labels, + ts_future=ts_future, + pred_values=pred_values, + lower_values=lower_values, + upper_values=upper_values, + is_anomaly=is_anomaly, + outside_ratio=outside_ratio, + mean_abs_err=mean_abs_err, + mean_rel_err=mean_rel_err, + event_ts=last_real_ts, + ) + + if not ok: + logger.error("[%s] %s 写入预测数据失败", now_str, query) + continue + + LAST_REAL_TS_WRITTEN[key] = last_real_ts + + future_start = datetime.fromtimestamp(ts_future[0]).strftime("%H:%M:%S") + future_end = datetime.fromtimestamp(ts_future[-1]).strftime("%H:%M:%S") + last_real_str = datetime.fromtimestamp(last_real_ts).strftime("%H:%M:%S") + origin_str = datetime.fromtimestamp(state.phase_origin_ts).strftime("%H:%M:%S") + + logger.info( + "[%s] %-40s → %-35s strategy=%s status=%s anomaly=%s period=%ss origin=%s last_real=%s lag=%ss 写入 %d 点,预测区间 %s ~ %s", + now_str, + query, + pred_metric, + state.strategy, + state.status, + is_anomaly, + state.period, + origin_str, + last_real_str, + data_lag, + len(ts_future), + future_start, + future_end, + ) + + save_state() + + +def main() -> None: + load_state() + + logger.info( + "预测服务启动 VM=%s 历史窗口=%dmin 理论预测窗口=%ds 实际写入窗口=%ds 轮询间隔=%ds state=%s forecast=%s", + VM_URL, + HISTORY_MINUTES, + HORIZON_SECONDS, + WRITE_HORIZON_SECONDS, + POLL_INTERVAL, + STATE_FILE, + EXTRA_PREDICT_LABELS["forecast"], + ) + + while True: + run_once() + time.sleep(POLL_INTERVAL) + + +if __name__ == "__main__": + main() \ No newline at end of file From 3609fbae4e2fbb33700de79ee1f7730ac81e366c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=BC=A0=E5=B0=91=E6=99=BA?= Date: Thu, 21 May 2026 21:39:55 +0800 Subject: [PATCH 27/55] fix --- ai/pridict_v4.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/ai/pridict_v4.py b/ai/pridict_v4.py index 774ad3a..c55f21a 100644 --- a/ai/pridict_v4.py +++ b/ai/pridict_v4.py @@ -120,11 +120,11 @@ "anomaly_metric": "vibration_x_anomaly", "strategy": "phase_band", "abs_threshold": 0.12, - "rel_threshold": 0.35, + "rel_threshold": 0.40, "smooth_window": 5, "band_low_q": 10, "band_high_q": 90, - "band_pad_abs": 0.06, + "band_pad_abs": 0.08, }, { "query": 'vibration_y{device_id="fanuc-cnc"}', @@ -132,11 +132,11 @@ "anomaly_metric": "vibration_y_anomaly", "strategy": "phase_band", "abs_threshold": 0.12, - "rel_threshold": 0.35, + "rel_threshold": 0.40, "smooth_window": 5, "band_low_q": 10, "band_high_q": 90, - "band_pad_abs": 0.06, + "band_pad_abs": 0.08, }, { "query": 'vibration_z{device_id="fanuc-cnc"}', @@ -144,11 +144,11 @@ "anomaly_metric": "vibration_z_anomaly", "strategy": "phase_band", "abs_threshold": 0.12, - "rel_threshold": 0.35, + "rel_threshold": 0.40, "smooth_window": 5, "band_low_q": 10, "band_high_q": 90, - "band_pad_abs": 0.06, + "band_pad_abs": 0.08, }, ] From 1c4217b31d9304cddeb6be16949cc54eaa8d31ec Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=BC=A0=E5=B0=91=E6=99=BA?= Date: Thu, 21 May 2026 21:48:41 +0800 Subject: [PATCH 28/55] fix --- ai/pridict_v4.py | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/ai/pridict_v4.py b/ai/pridict_v4.py index c55f21a..8657944 100644 --- a/ai/pridict_v4.py +++ b/ai/pridict_v4.py @@ -119,37 +119,37 @@ "pred_metric": "vibration_x_predicted", "anomaly_metric": "vibration_x_anomaly", "strategy": "phase_band", - "abs_threshold": 0.12, - "rel_threshold": 0.40, + "abs_threshold": 0.18, + "rel_threshold": 0.50, "smooth_window": 5, - "band_low_q": 10, - "band_high_q": 90, - "band_pad_abs": 0.08, + "band_low_q": 2, + "band_high_q": 98, + "band_pad_abs": 0.12, }, { "query": 'vibration_y{device_id="fanuc-cnc"}', "pred_metric": "vibration_y_predicted", "anomaly_metric": "vibration_y_anomaly", "strategy": "phase_band", - "abs_threshold": 0.12, - "rel_threshold": 0.40, + "abs_threshold": 0.18, + "rel_threshold": 0.50, "smooth_window": 5, - "band_low_q": 10, - "band_high_q": 90, - "band_pad_abs": 0.08, + "band_low_q": 2, + "band_high_q": 98, + "band_pad_abs": 0.12, }, { "query": 'vibration_z{device_id="fanuc-cnc"}', "pred_metric": "vibration_z_predicted", "anomaly_metric": "vibration_z_anomaly", "strategy": "phase_band", - "abs_threshold": 0.12, - "rel_threshold": 0.40, + "abs_threshold": 0.18, + "rel_threshold": 0.50, "smooth_window": 5, - "band_low_q": 10, - "band_high_q": 90, - "band_pad_abs": 0.08, - }, + "band_low_q": 2, + "band_high_q": 98, + "band_pad_abs": 0.12, + } ] EXTRA_PREDICT_LABELS = { From 4077e8f416d9d2f9d60df50d551fc817ddacd982 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=BC=A0=E5=B0=91=E6=99=BA?= Date: Fri, 22 May 2026 09:40:02 +0800 Subject: [PATCH 29/55] feat(predict_v5): add predict v5 --- ai/pridict_v5.py | 1794 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 1794 insertions(+) create mode 100644 ai/pridict_v5.py diff --git a/ai/pridict_v5.py b/ai/pridict_v5.py new file mode 100644 index 0000000..6894a66 --- /dev/null +++ b/ai/pridict_v5.py @@ -0,0 +1,1794 @@ +# -*- coding: utf-8 -*- +""" +ProtoForge Predictor v12 + +核心能力: +1. feed_rate / spindle_speed / spindle_current 使用 phase-lock 点预测。 +2. vibration_x / vibration_y / vibration_z 使用 phase-band 预测带。 +3. vibration 类指标: + - predicted 使用平滑后的中位数模板,用于趋势参考。 + - upper/lower 使用原始波动分位数模板 + padding,用于正常波动容忍带。 + - 偶发越界不直接报警,只有持续越界 / 高比例越界 / 严重越界才报警。 +4. 预测起点锚定最后一个真实点 last_real_ts,避免时间错位。 +5. 异常期间冻结健康模板,不学习故障数据。 +6. 故障恢复后等待稳定,再恢复模板学习。 +7. 写入: + - xxx_predicted + - xxx_predicted_upper + - xxx_predicted_lower + - xxx_anomaly + - xxx_anomaly_outside_ratio + - xxx_anomaly_mean_abs_error + - xxx_anomaly_mean_rel_error + - xxx_anomaly_max_consecutive_outside + - xxx_anomaly_max_exceed_ratio +""" + +import json +import logging +import math +import os +import re +import time +from dataclasses import asdict, dataclass +from datetime import datetime, timedelta +from typing import Dict, List, Optional, Tuple + +import numpy as np +import requests + + +# ============================================================================= +# 日志配置 +# ============================================================================= + +logging.basicConfig( + level=logging.INFO, + format="%(asctime)s [%(levelname)s] %(message)s", +) + +logger = logging.getLogger(__name__) + + +# ============================================================================= +# 基础配置 +# ============================================================================= + +VM_URL = "http://localhost:8428" +STATE_FILE = "/tmp/protoforge_predictor_state_v12.json" + +HISTORY_MINUTES = 30 +HORIZON_SECONDS = 120 +POLL_INTERVAL = 30 + +WRITE_HORIZON_SECONDS = min(HORIZON_SECONDS, POLL_INTERVAL) + +QUERY_STEP = "1s" +MIN_POINTS = 120 + +MIN_PERIOD_SECONDS = 5 +MAX_PERIOD_SECONDS = 3600 + +MIN_FULL_CYCLES_FOR_TEMPLATE = 3 +MAX_CYCLES_FOR_TEMPLATE = 8 + +DETECT_WINDOW_SECONDS = 30 +RECOVERY_MIN_SECONDS = 60 + +HEALTHY_EMA_ALPHA = 0.10 +RECOVERY_EMA_ALPHA = 0.25 + +OUTSIDE_RATIO_THRESHOLD = 0.60 +MIN_CONSECUTIVE_OUTSIDE = 5 +SEVERE_EXCEED_RATIO = 1.8 + +VALLEY_QUANTILE = 45 + +MAX_DATA_LAG_SECONDS = 180 + +PHASE_LOCK_MIN_WINDOW_SECONDS = 45 +PHASE_LOCK_MAX_WINDOW_SECONDS = 180 +PHASE_LOCK_PERIOD_SEARCH_RATIO = 0.12 +PHASE_LOCK_ORIGIN_SEARCH_RATIO = 0.35 +PHASE_LOCK_PERIOD_STEP = 1 +PHASE_LOCK_ORIGIN_STEP = 1 + + +# ============================================================================= +# 指标配置 +# ============================================================================= + +PREDICT_TARGETS = [ + { + "query": 'feed_rate{device_id="fanuc-cnc"}', + "pred_metric": "feed_rate_predicted", + "anomaly_metric": "feed_rate_anomaly", + "strategy": "phase_point", + "abs_threshold": 400.0, + "rel_threshold": 0.25, + "smooth_window": 1, + "outside_ratio_threshold": 0.60, + "min_consecutive_outside": 5, + "severe_exceed_ratio": 1.8, + }, + { + "query": 'spindle_speed{device_id="fanuc-cnc"}', + "pred_metric": "spindle_speed_predicted", + "anomaly_metric": "spindle_speed_anomaly", + "strategy": "phase_point", + "abs_threshold": 500.0, + "rel_threshold": 0.25, + "smooth_window": 1, + "outside_ratio_threshold": 0.60, + "min_consecutive_outside": 5, + "severe_exceed_ratio": 1.8, + }, + { + "query": 'spindle_current{device_id="fanuc-cnc"}', + "pred_metric": "spindle_current_predicted", + "anomaly_metric": "spindle_current_anomaly", + "strategy": "phase_point", + "abs_threshold": 5.0, + "rel_threshold": 0.25, + "smooth_window": 1, + "outside_ratio_threshold": 0.60, + "min_consecutive_outside": 5, + "severe_exceed_ratio": 1.8, + }, + { + "query": 'vibration_x{device_id="fanuc-cnc"}', + "pred_metric": "vibration_x_predicted", + "anomaly_metric": "vibration_x_anomaly", + "strategy": "phase_band", + + # vibration 类指标噪声、尖峰较多,不建议用很窄的阈值。 + "abs_threshold": 0.18, + "rel_threshold": 0.55, + + # 平滑只用于相位锁定和 predicted 中位趋势。 + "smooth_window": 5, + + # upper/lower 用原始值分位数,范围放宽,覆盖正常尖峰。 + "band_low_q": 1, + "band_high_q": 99, + "band_pad_abs": 0.15, + + # 偶发越界容忍。 + "outside_ratio_threshold": 0.70, + "min_consecutive_outside": 5, + "severe_exceed_ratio": 2.0, + }, + { + "query": 'vibration_y{device_id="fanuc-cnc"}', + "pred_metric": "vibration_y_predicted", + "anomaly_metric": "vibration_y_anomaly", + "strategy": "phase_band", + "abs_threshold": 0.18, + "rel_threshold": 0.55, + "smooth_window": 5, + "band_low_q": 1, + "band_high_q": 99, + "band_pad_abs": 0.15, + "outside_ratio_threshold": 0.70, + "min_consecutive_outside": 5, + "severe_exceed_ratio": 2.0, + }, + { + "query": 'vibration_z{device_id="fanuc-cnc"}', + "pred_metric": "vibration_z_predicted", + "anomaly_metric": "vibration_z_anomaly", + "strategy": "phase_band", + "abs_threshold": 0.18, + "rel_threshold": 0.55, + "smooth_window": 5, + "band_low_q": 1, + "band_high_q": 99, + "band_pad_abs": 0.15, + "outside_ratio_threshold": 0.70, + "min_consecutive_outside": 5, + "severe_exceed_ratio": 2.0, + }, +] + +EXTRA_PREDICT_LABELS = { + "forecast": "phase_band_health_v12", + "source": "protoforge", +} + +BASELINE_STATUS_HEALTHY = "healthy" +BASELINE_STATUS_ANOMALY = "anomaly" +BASELINE_STATUS_RECOVERING = "recovering" + + +# ============================================================================= +# 状态结构 +# ============================================================================= + +@dataclass +class BaselineState: + period: int + phase_origin_ts: int + template: List[float] + lower_template: List[float] + upper_template: List[float] + strategy: str + status: str + clean_seconds: int + last_update_ts: int + last_seen_ts: int + y_min: float + y_max: float + + +BASELINE_STATES: Dict[str, BaselineState] = {} +LAST_REAL_TS_WRITTEN: Dict[str, int] = {} + + +# ============================================================================= +# VictoriaMetrics 读取 +# ============================================================================= + +def fetch_history(query: str, minutes: int = HISTORY_MINUTES) -> Tuple[List[float], List[float]]: + now = datetime.now() + start = now - timedelta(minutes=minutes) + + try: + resp = requests.get( + f"{VM_URL}/api/v1/query_range", + params={ + "query": query, + "start": start.timestamp(), + "end": now.timestamp(), + "step": QUERY_STEP, + }, + timeout=10, + ) + resp.raise_for_status() + except requests.RequestException as e: + logger.error("拉取数据失败 query=%s: %s", query, e) + return [], [] + + try: + result = resp.json().get("data", {}).get("result", []) + except Exception as e: + logger.error("解析 VM 返回失败 query=%s: %s", query, e) + return [], [] + + if not result: + return [], [] + + values = result[0].get("values", []) + + ts = [] + ys = [] + + for item in values: + if len(item) < 2: + continue + + try: + t = float(item[0]) + y = float(item[1]) + except Exception: + continue + + if not math.isfinite(t) or not math.isfinite(y): + continue + + ts.append(t) + ys.append(y) + + return ts, ys + + +def normalize_history(ts: List[float], ys: List[float]) -> Tuple[np.ndarray, np.ndarray]: + if not ts or not ys or len(ts) != len(ys): + return np.array([]), np.array([]) + + data = {} + + for t, y in zip(ts, ys): + try: + sec = int(round(float(t))) + val = float(y) + except Exception: + continue + + if not math.isfinite(sec) or not math.isfinite(val): + continue + + data[sec] = val + + if not data: + return np.array([]), np.array([]) + + sorted_items = sorted(data.items(), key=lambda x: x[0]) + + ts_clean = np.array([x[0] for x in sorted_items], dtype=float) + ys_clean = np.array([x[1] for x in sorted_items], dtype=float) + + if len(ts_clean) < 2: + return ts_clean, ys_clean + + start_sec = int(ts_clean[0]) + end_sec = int(ts_clean[-1]) + + if end_sec <= start_sec: + return ts_clean, ys_clean + + ts_grid = np.arange(start_sec, end_sec + 1, 1, dtype=float) + ys_grid = np.interp(ts_grid, ts_clean, ys_clean) + + return ts_grid, ys_grid + + +# ============================================================================= +# 平滑与预处理 +# ============================================================================= + +def rolling_median(arr: np.ndarray, window: int) -> np.ndarray: + if window <= 1 or len(arr) < window: + return arr.astype(float) + + if window % 2 == 0: + window += 1 + + pad = window // 2 + padded = np.pad(arr.astype(float), (pad, pad), mode="edge") + + result = [] + + for i in range(len(arr)): + result.append(float(np.median(padded[i:i + window]))) + + return np.array(result, dtype=float) + + +def moving_average(arr: np.ndarray, window: int) -> np.ndarray: + if window <= 1 or len(arr) < window: + return arr.astype(float) + + if window % 2 == 0: + window += 1 + + kernel = np.ones(window, dtype=float) / window + pad = window // 2 + padded = np.pad(arr.astype(float), (pad, pad), mode="edge") + + return np.convolve(padded, kernel, mode="valid") + + +def preprocess_values(ys_grid: np.ndarray, target: Dict) -> np.ndarray: + strategy = target.get("strategy", "phase_point") + smooth_window = int(target.get("smooth_window", 1)) + + if strategy == "phase_band": + return rolling_median(ys_grid, smooth_window) + + if smooth_window > 1: + return moving_average(ys_grid, smooth_window) + + return ys_grid.astype(float) + + +# ============================================================================= +# 周期估计 +# ============================================================================= + +def estimate_period_by_fft(ys_arr: np.ndarray) -> float: + n = len(ys_arr) + + if n < 8: + return 60.0 + + centered = ys_arr - np.mean(ys_arr) + + if np.allclose(centered, 0): + return 60.0 + + fft_vals = np.fft.rfft(centered) + freqs = np.fft.rfftfreq(n, d=1.0) + + if len(freqs) <= 1: + return 60.0 + + power = np.abs(fft_vals[1:]) + + if len(power) == 0 or np.max(power) <= 0: + return 60.0 + + dominant_idx = int(np.argmax(power)) + 1 + dominant_freq = float(freqs[dominant_idx]) + + if dominant_freq <= 0: + return 60.0 + + period = 1.0 / dominant_freq + + return float(np.clip(period, MIN_PERIOD_SECONDS, MAX_PERIOD_SECONDS)) + + +def refine_period_by_autocorr(ys_arr: np.ndarray, init_period: float) -> float: + n = len(ys_arr) + + if n < 20: + return float(np.clip(init_period, MIN_PERIOD_SECONDS, MAX_PERIOD_SECONDS)) + + centered = ys_arr - np.mean(ys_arr) + + if np.allclose(centered, 0): + return float(np.clip(init_period, MIN_PERIOD_SECONDS, MAX_PERIOD_SECONDS)) + + corr = np.correlate(centered, centered, mode="full")[n - 1:] + + p0 = int(round(init_period)) + left = max(int(MIN_PERIOD_SECONDS), int(max(2, p0 * 0.7))) + right = min(n // 2, int(max(left + 1, p0 * 1.3))) + + if right <= left: + return float(np.clip(init_period, MIN_PERIOD_SECONDS, MAX_PERIOD_SECONDS)) + + search = corr[left:right + 1] + + if len(search) == 0: + return float(np.clip(init_period, MIN_PERIOD_SECONDS, MAX_PERIOD_SECONDS)) + + best_lag = left + int(np.argmax(search)) + + return float(np.clip(best_lag, MIN_PERIOD_SECONDS, MAX_PERIOD_SECONDS)) + + +def estimate_period_rough(ys_arr: np.ndarray) -> int: + p_fft = estimate_period_by_fft(ys_arr) + p_refined = refine_period_by_autocorr(ys_arr, p_fft) + + period = int(round(p_refined)) + period = max(int(MIN_PERIOD_SECONDS), min(int(MAX_PERIOD_SECONDS), period)) + + return int(period) + + +# ============================================================================= +# 谷底检测 +# ============================================================================= + +def find_valley_indices( + ts_grid: np.ndarray, + ys_grid: np.ndarray, + expected_period: int, +) -> List[int]: + n = len(ys_grid) + + if n < max(10, expected_period * 2): + return [] + + period = max(3, int(expected_period)) + smooth_window = max(3, int(round(period * 0.08))) + smooth_window = min(smooth_window, 21) + + ys_smooth = moving_average(ys_grid, smooth_window) + threshold = float(np.percentile(ys_smooth, VALLEY_QUANTILE)) + + candidates = [] + + for i in range(1, n - 1): + if ( + ys_smooth[i] <= ys_smooth[i - 1] + and ys_smooth[i] < ys_smooth[i + 1] + and ys_smooth[i] <= threshold + ): + candidates.append(i) + + if len(candidates) < MIN_FULL_CYCLES_FOR_TEMPLATE: + candidates = [] + + for i in range(1, n - 1): + if ys_smooth[i] <= ys_smooth[i - 1] and ys_smooth[i] < ys_smooth[i + 1]: + candidates.append(i) + + if not candidates: + return [] + + min_distance = max(2, int(round(period * 0.55))) + selected = [] + + for idx in candidates: + if not selected: + selected.append(idx) + continue + + if idx - selected[-1] >= min_distance: + selected.append(idx) + continue + + if ys_smooth[idx] < ys_smooth[selected[-1]]: + selected[-1] = idx + + if len(selected) < 2: + return selected + + cleaned = [selected[0]] + + for idx in selected[1:]: + diff = int(ts_grid[idx] - ts_grid[cleaned[-1]]) + + if int(period * 0.55) <= diff <= int(period * 1.60): + cleaned.append(idx) + continue + + if diff < int(period * 0.55): + if ys_smooth[idx] < ys_smooth[cleaned[-1]]: + cleaned[-1] = idx + continue + + cleaned.append(idx) + + return cleaned + + +def detect_period_and_valleys( + ts_grid: np.ndarray, + ys_grid: np.ndarray, +) -> Tuple[int, List[int]]: + rough = estimate_period_rough(ys_grid) + valleys = find_valley_indices(ts_grid, ys_grid, rough) + + if len(valleys) >= 3: + diffs = np.diff(ts_grid[valleys]) + good = diffs[(diffs >= rough * 0.55) & (diffs <= rough * 1.60)] + + if len(good) > 0: + period = int(round(float(np.median(good)))) + else: + period = rough + else: + period = rough + + period = max(int(MIN_PERIOD_SECONDS), min(int(MAX_PERIOD_SECONDS), period)) + + return int(period), valleys + + +# ============================================================================= +# 模板构建 +# ============================================================================= + +def build_templates_from_valleys( + ts_grid: np.ndarray, + ys_mid_grid: np.ndarray, + ys_band_grid: np.ndarray, + period: int, + valleys: List[int], + target: Dict, +) -> Optional[Tuple[np.ndarray, np.ndarray, np.ndarray]]: + if period <= 1 or len(valleys) < MIN_FULL_CYCLES_FOR_TEMPLATE + 1: + return None + + strategy = target.get("strategy", "phase_point") + low_q = float(target.get("band_low_q", 10)) + high_q = float(target.get("band_high_q", 90)) + + pairs = [] + + for a, b in zip(valleys[:-1], valleys[1:]): + cycle_len = float(ts_grid[b] - ts_grid[a]) + + if period * 0.55 <= cycle_len <= period * 1.60: + pairs.append((a, b, cycle_len)) + + if len(pairs) < MIN_FULL_CYCLES_FOR_TEMPLATE: + return None + + pairs = pairs[-MAX_CYCLES_FOR_TEMPLATE:] + + phase_grid = np.arange(period, dtype=float) + mid_segments = [] + band_segments = [] + weights = [] + + for idx, (a, b, cycle_len) in enumerate(pairs): + seg_ts = ts_grid[a:b + 1] + seg_mid_y = ys_mid_grid[a:b + 1] + seg_band_y = ys_band_grid[a:b + 1] + + if len(seg_mid_y) < 3 or len(seg_band_y) < 3: + continue + + x_old = (seg_ts - seg_ts[0]) / cycle_len * period + + mid_seg = np.interp(phase_grid, x_old, seg_mid_y) + band_seg = np.interp(phase_grid, x_old, seg_band_y) + + mid_segments.append(mid_seg.astype(float)) + band_segments.append(band_seg.astype(float)) + weights.append(0.5 + 0.5 * ((idx + 1) / len(pairs))) + + if len(mid_segments) < MIN_FULL_CYCLES_FOR_TEMPLATE: + return None + + mid_arr = np.vstack(mid_segments) + band_arr = np.vstack(band_segments) + w_arr = np.array(weights, dtype=float) + + if strategy == "phase_band": + mid_template = np.percentile(mid_arr, 50, axis=0) + + # upper/lower 使用原始值分布,而不是平滑值分布。 + lower_template = np.percentile(band_arr, low_q, axis=0) + upper_template = np.percentile(band_arr, high_q, axis=0) + else: + mid_template = np.average(mid_arr, axis=0, weights=w_arr) + lower_template = mid_template.copy() + upper_template = mid_template.copy() + + return ( + mid_template.astype(float), + lower_template.astype(float), + upper_template.astype(float), + ) + + +def build_current_baseline( + ts_grid: np.ndarray, + ys_mid_grid: np.ndarray, + ys_band_grid: np.ndarray, + target: Dict, + tail_seconds: Optional[int] = None, +) -> Optional[Tuple[int, int, np.ndarray, np.ndarray, np.ndarray]]: + if len(ys_mid_grid) < MIN_POINTS or len(ys_band_grid) < MIN_POINTS: + return None + + if tail_seconds is not None and tail_seconds > 0: + cutoff = ts_grid[-1] - int(tail_seconds) + mask = ts_grid >= cutoff + ts_use = ts_grid[mask] + ys_mid_use = ys_mid_grid[mask] + ys_band_use = ys_band_grid[mask] + else: + ts_use = ts_grid + ys_mid_use = ys_mid_grid + ys_band_use = ys_band_grid + + if len(ys_mid_use) < MIN_POINTS or len(ys_band_use) < MIN_POINTS: + return None + + period, valleys = detect_period_and_valleys(ts_use, ys_mid_use) + + templates = build_templates_from_valleys( + ts_grid=ts_use, + ys_mid_grid=ys_mid_use, + ys_band_grid=ys_band_use, + period=period, + valleys=valleys, + target=target, + ) + + if templates is None or len(valleys) == 0: + return None + + template, lower_template, upper_template = templates + phase_origin_ts = int(round(float(ts_use[valleys[-1]]))) + + return int(period), phase_origin_ts, template, lower_template, upper_template + + +# ============================================================================= +# 模板预测 +# ============================================================================= + +def circular_template_value(template: np.ndarray, phase: float) -> float: + period = len(template) + + if period == 0: + return 0.0 + + phase = float(phase) % period + i0 = int(math.floor(phase)) % period + i1 = (i0 + 1) % period + frac = phase - math.floor(phase) + + return float((1.0 - frac) * template[i0] + frac * template[i1]) + + +def resample_template(old_template: np.ndarray, new_period: int) -> np.ndarray: + old_period = len(old_template) + + if old_period == new_period: + return old_template.astype(float) + + if old_period <= 1 or new_period <= 1: + return np.full(new_period, float(np.mean(old_template)), dtype=float) + + old_x = np.linspace(0.0, 1.0, old_period, endpoint=False) + new_x = np.linspace(0.0, 1.0, new_period, endpoint=False) + + old_x_ext = np.concatenate([old_x - 1.0, old_x, old_x + 1.0]) + old_y_ext = np.concatenate([old_template, old_template, old_template]) + + return np.interp(new_x, old_x_ext, old_y_ext).astype(float) + + +def predict_template_values( + template: np.ndarray, + period: int, + phase_origin_ts: int, + ts_list: List[int], +) -> np.ndarray: + if period <= 1: + return np.zeros(len(ts_list), dtype=float) + + if len(template) != period: + template = resample_template(template, period) + + values = [] + + for ts in ts_list: + phase = (int(ts) - int(phase_origin_ts)) % period + values.append(circular_template_value(template, phase)) + + return np.array(values, dtype=float) + + +def predict_state_bundle( + state: BaselineState, + ts_list: List[int], +) -> Tuple[np.ndarray, np.ndarray, np.ndarray]: + period = int(state.period) + origin = int(state.phase_origin_ts) + + mid = predict_template_values( + template=np.array(state.template, dtype=float), + period=period, + phase_origin_ts=origin, + ts_list=ts_list, + ) + + lower = predict_template_values( + template=np.array(state.lower_template, dtype=float), + period=period, + phase_origin_ts=origin, + ts_list=ts_list, + ) + + upper = predict_template_values( + template=np.array(state.upper_template, dtype=float), + period=period, + phase_origin_ts=origin, + ts_list=ts_list, + ) + + return mid, lower, upper + + +def normalize_origin_near(origin: int, period: int, near_ts: int) -> int: + if period <= 1: + return origin + + origin = int(origin) + period = int(period) + near_ts = int(near_ts) + + while origin + period <= near_ts: + origin += period + + while origin > near_ts: + origin -= period + + return origin + + +def merge_template( + old_template: np.ndarray, + new_template: np.ndarray, + alpha: float, +) -> np.ndarray: + alpha = float(np.clip(alpha, 0.0, 1.0)) + + if len(old_template) != len(new_template): + old_template = resample_template(old_template, len(new_template)) + + merged = (1.0 - alpha) * old_template + alpha * new_template + + return merged.astype(float) + + +# ============================================================================= +# Phase Lock +# ============================================================================= + +def phase_lock_recent( + state: BaselineState, + ts_grid: np.ndarray, + ys_model: np.ndarray, +) -> Tuple[int, int, np.ndarray, float]: + base_period = int(state.period) + base_origin = int(state.phase_origin_ts) + base_template = np.array(state.template, dtype=float) + + if base_period <= 1 or len(base_template) <= 1: + ts_recent = ts_grid[-DETECT_WINDOW_SECONDS:].astype(int).tolist() + pred = predict_template_values(base_template, base_period, base_origin, ts_recent) + actual = ys_model[-len(ts_recent):].astype(float) + mae = float(np.mean(np.abs(actual - pred))) if len(actual) else 0.0 + return base_period, base_origin, pred, mae + + window_seconds = max( + PHASE_LOCK_MIN_WINDOW_SECONDS, + min(PHASE_LOCK_MAX_WINDOW_SECONDS, int(base_period * 2)), + ) + + cutoff = ts_grid[-1] - window_seconds + mask = ts_grid >= cutoff + + ts_recent_arr = ts_grid[mask].astype(int) + actual = ys_model[mask].astype(float) + + if len(ts_recent_arr) < max(10, DETECT_WINDOW_SECONDS): + ts_recent_arr = ts_grid[-DETECT_WINDOW_SECONDS:].astype(int) + actual = ys_model[-DETECT_WINDOW_SECONDS:].astype(float) + + ts_recent = ts_recent_arr.tolist() + last_ts = int(ts_recent[-1]) + + p_min = max( + int(MIN_PERIOD_SECONDS), + int(round(base_period * (1.0 - PHASE_LOCK_PERIOD_SEARCH_RATIO))), + ) + p_max = min( + int(MAX_PERIOD_SECONDS), + int(round(base_period * (1.0 + PHASE_LOCK_PERIOD_SEARCH_RATIO))), + ) + + best_period = base_period + best_origin = normalize_origin_near(base_origin, base_period, last_ts) + best_template = resample_template(base_template, best_period) + + best_pred = predict_template_values( + template=best_template, + period=best_period, + phase_origin_ts=best_origin, + ts_list=ts_recent, + ) + + best_mae = float(np.mean(np.abs(actual - best_pred))) + + for period in range(p_min, p_max + 1, PHASE_LOCK_PERIOD_STEP): + template = resample_template(base_template, period) + center_origin = normalize_origin_near(base_origin, period, last_ts) + origin_shift = max(2, int(round(period * PHASE_LOCK_ORIGIN_SEARCH_RATIO))) + + for shift in range(-origin_shift, origin_shift + 1, PHASE_LOCK_ORIGIN_STEP): + origin = center_origin + shift + + pred = predict_template_values( + template=template, + period=period, + phase_origin_ts=origin, + ts_list=ts_recent, + ) + + mae = float(np.mean(np.abs(actual - pred))) + penalty = abs(period - base_period) * 0.5 + score = mae + penalty + + best_score = best_mae + abs(best_period - base_period) * 0.5 + + if score < best_score: + best_period = period + best_origin = origin + best_pred = pred + best_mae = mae + + best_origin = normalize_origin_near(best_origin, best_period, last_ts) + + return int(best_period), int(best_origin), best_pred, float(best_mae) + + +# ============================================================================= +# 异常检测 +# ============================================================================= + +def max_consecutive_true(flags: np.ndarray) -> int: + max_count = 0 + current = 0 + + for flag in flags: + if bool(flag): + current += 1 + max_count = max(max_count, current) + else: + current = 0 + + return int(max_count) + + +def calc_point_bounds( + pred: np.ndarray, + abs_threshold: float, + rel_threshold: float, +) -> Tuple[np.ndarray, np.ndarray]: + threshold = np.maximum(abs_threshold, np.abs(pred) * rel_threshold) + return pred - threshold, pred + threshold + + +def calc_final_bounds( + state: BaselineState, + pred: np.ndarray, + lower_raw: np.ndarray, + upper_raw: np.ndarray, + target: Dict, +) -> Tuple[np.ndarray, np.ndarray]: + strategy = target.get("strategy", "phase_point") + abs_threshold = float(target.get("abs_threshold", 1.0)) + rel_threshold = float(target.get("rel_threshold", 0.25)) + + if strategy == "phase_band": + pad_abs = float(target.get("band_pad_abs", abs_threshold)) + + # 对 vibration 类指标:边界更像正常波动容忍带,不是硬边界。 + dynamic_pad = np.maximum( + pad_abs, + np.abs(pred) * rel_threshold * 0.25, + ) + + lower = lower_raw - dynamic_pad + upper = upper_raw + dynamic_pad + + return lower, upper + + return calc_point_bounds(pred, abs_threshold, rel_threshold) + + +def detect_anomaly( + state: BaselineState, + ts_grid: np.ndarray, + ys_model: np.ndarray, + ys_actual: np.ndarray, + target: Dict, +) -> Tuple[bool, float, float, float, int, int, int, float]: + best_period, best_origin, pred_recent, _ = phase_lock_recent( + state=state, + ts_grid=ts_grid, + ys_model=ys_model, + ) + + recent_len = len(pred_recent) + + if recent_len <= 0: + return False, 0.0, 0.0, 0.0, best_period, best_origin, 0, 0.0 + + if target.get("strategy", "phase_point") == "phase_band": + actual = ys_actual[-recent_len:].astype(float) + else: + actual = ys_model[-recent_len:].astype(float) + + tmp_state = BaselineState( + period=best_period, + phase_origin_ts=best_origin, + template=state.template, + lower_template=state.lower_template, + upper_template=state.upper_template, + strategy=state.strategy, + status=state.status, + clean_seconds=state.clean_seconds, + last_update_ts=state.last_update_ts, + last_seen_ts=state.last_seen_ts, + y_min=state.y_min, + y_max=state.y_max, + ) + + recent_ts = ts_grid[-recent_len:].astype(int).tolist() + pred, lower_raw, upper_raw = predict_state_bundle(tmp_state, recent_ts) + + lower, upper = calc_final_bounds( + state=tmp_state, + pred=pred, + lower_raw=lower_raw, + upper_raw=upper_raw, + target=target, + ) + + above_upper = actual - upper + below_lower = lower - actual + + exceed = np.maximum(above_upper, below_lower) + exceed = np.maximum(exceed, 0.0) + + outside = exceed > 0 + + band_width = np.maximum(upper - lower, 1e-6) + exceed_ratio = exceed / band_width + + abs_err = np.abs(actual - pred) + + outside_ratio = float(np.mean(outside)) + mean_abs_err = float(np.mean(abs_err)) + mean_rel_err = float(np.mean(abs_err / np.maximum(np.abs(pred), 1e-6))) + + max_outside_seconds = max_consecutive_true(outside) + max_exceed_ratio = float(np.max(exceed_ratio)) if len(exceed_ratio) > 0 else 0.0 + + outside_ratio_threshold = float( + target.get("outside_ratio_threshold", OUTSIDE_RATIO_THRESHOLD) + ) + min_consecutive_outside = int( + target.get("min_consecutive_outside", MIN_CONSECUTIVE_OUTSIDE) + ) + severe_exceed_ratio = float( + target.get("severe_exceed_ratio", SEVERE_EXCEED_RATIO) + ) + + # 核心优化: + # 1. 偶发 1~3 个点越界不报警。 + # 2. 持续越界才报警。 + # 3. 高比例越界才报警。 + # 4. 严重越界才立即报警。 + is_anomaly = ( + outside_ratio >= outside_ratio_threshold + or max_outside_seconds >= min_consecutive_outside + or max_exceed_ratio >= severe_exceed_ratio + ) + + return ( + is_anomaly, + outside_ratio, + mean_abs_err, + mean_rel_err, + int(best_period), + int(best_origin), + int(max_outside_seconds), + float(max_exceed_ratio), + ) + + +# ============================================================================= +# 状态管理 +# ============================================================================= + +def create_initial_state( + ts_grid: np.ndarray, + ys_model: np.ndarray, + ys_actual: np.ndarray, + target: Dict, + now_sec: int, +) -> Optional[BaselineState]: + baseline = build_current_baseline( + ts_grid=ts_grid, + ys_mid_grid=ys_model, + ys_band_grid=ys_actual, + target=target, + ) + + if baseline is None: + return None + + period, phase_origin_ts, template, lower_template, upper_template = baseline + + return BaselineState( + period=int(period), + phase_origin_ts=int(phase_origin_ts), + template=template.astype(float).tolist(), + lower_template=lower_template.astype(float).tolist(), + upper_template=upper_template.astype(float).tolist(), + strategy=str(target.get("strategy", "phase_point")), + status=BASELINE_STATUS_HEALTHY, + clean_seconds=int(period * MAX_CYCLES_FOR_TEMPLATE), + last_update_ts=now_sec, + last_seen_ts=now_sec, + y_min=float(np.min(ys_actual)), + y_max=float(np.max(ys_actual)), + ) + + +def apply_phase_lock_to_state( + state: BaselineState, + best_period: int, + best_origin: int, +) -> None: + best_period = int(best_period) + + if best_period <= 1: + return + + if len(state.template) != best_period: + state.template = resample_template( + np.array(state.template, dtype=float), + best_period, + ).astype(float).tolist() + + if len(state.lower_template) != best_period: + state.lower_template = resample_template( + np.array(state.lower_template, dtype=float), + best_period, + ).astype(float).tolist() + + if len(state.upper_template) != best_period: + state.upper_template = resample_template( + np.array(state.upper_template, dtype=float), + best_period, + ).astype(float).tolist() + + state.period = best_period + state.phase_origin_ts = int(best_origin) + + +def maybe_update_state( + key: str, + ts_grid: np.ndarray, + ys_model: np.ndarray, + ys_actual: np.ndarray, + target: Dict, +) -> Tuple[Optional[BaselineState], bool, float, float, float, int, float]: + now_sec = int(time.time()) + state = BASELINE_STATES.get(key) + + if state is None: + state = create_initial_state( + ts_grid=ts_grid, + ys_model=ys_model, + ys_actual=ys_actual, + target=target, + now_sec=now_sec, + ) + + if state is None: + return None, False, 0.0, 0.0, 0.0, 0, 0.0 + + BASELINE_STATES[key] = state + + logger.info( + "初始化健康模板 key=%s strategy=%s period=%ss origin=%s clean=%ss", + key, + state.strategy, + state.period, + datetime.fromtimestamp(state.phase_origin_ts).strftime("%H:%M:%S"), + state.clean_seconds, + ) + + return state, False, 0.0, 0.0, 0.0, 0, 0.0 + + elapsed = max(1, now_sec - int(state.last_seen_ts)) + elapsed = min(elapsed, POLL_INTERVAL * 2) + state.last_seen_ts = now_sec + + ( + is_anomaly, + outside_ratio, + mean_abs_err, + mean_rel_err, + best_period, + best_origin, + max_outside_seconds, + max_exceed_ratio, + ) = detect_anomaly( + state=state, + ts_grid=ts_grid, + ys_model=ys_model, + ys_actual=ys_actual, + target=target, + ) + + if is_anomaly: + state.status = BASELINE_STATUS_ANOMALY + state.clean_seconds = 0 + BASELINE_STATES[key] = state + + logger.warning( + "检测到异常,冻结模板 key=%s outside_ratio=%.2f max_outside=%ss max_exceed_ratio=%.2f mean_abs_err=%.4f mean_rel_err=%.4f", + key, + outside_ratio, + max_outside_seconds, + max_exceed_ratio, + mean_abs_err, + mean_rel_err, + ) + + return ( + state, + True, + outside_ratio, + mean_abs_err, + mean_rel_err, + max_outside_seconds, + max_exceed_ratio, + ) + + old_period = int(state.period) + old_origin = int(state.phase_origin_ts) + + apply_phase_lock_to_state(state, best_period, best_origin) + + if old_period != state.period or old_origin != state.phase_origin_ts: + logger.info( + "phase-lock key=%s period %s -> %s origin %s -> %s", + key, + old_period, + state.period, + datetime.fromtimestamp(old_origin).strftime("%H:%M:%S"), + datetime.fromtimestamp(state.phase_origin_ts).strftime("%H:%M:%S"), + ) + + if state.status == BASELINE_STATUS_ANOMALY: + state.status = BASELINE_STATUS_RECOVERING + state.clean_seconds = elapsed + BASELINE_STATES[key] = state + + logger.info( + "异常开始恢复 key=%s clean_seconds=%ss", + key, + state.clean_seconds, + ) + + return ( + state, + False, + outside_ratio, + mean_abs_err, + mean_rel_err, + max_outside_seconds, + max_exceed_ratio, + ) + + if state.status == BASELINE_STATUS_RECOVERING: + state.clean_seconds += elapsed + else: + state.status = BASELINE_STATUS_HEALTHY + state.clean_seconds += elapsed + + min_clean_for_update = max( + RECOVERY_MIN_SECONDS, + int(state.period) * MIN_FULL_CYCLES_FOR_TEMPLATE, + ) + + if state.clean_seconds < min_clean_for_update: + BASELINE_STATES[key] = state + return ( + state, + False, + outside_ratio, + mean_abs_err, + mean_rel_err, + max_outside_seconds, + max_exceed_ratio, + ) + + tail_seconds = min( + int(state.clean_seconds), + int(state.period) * MAX_CYCLES_FOR_TEMPLATE, + ) + + baseline = build_current_baseline( + ts_grid=ts_grid, + ys_mid_grid=ys_model, + ys_band_grid=ys_actual, + target=target, + tail_seconds=tail_seconds, + ) + + if baseline is None: + BASELINE_STATES[key] = state + return ( + state, + False, + outside_ratio, + mean_abs_err, + mean_rel_err, + max_outside_seconds, + max_exceed_ratio, + ) + + new_period, new_origin, new_template, new_lower_template, new_upper_template = baseline + + alpha = RECOVERY_EMA_ALPHA if state.status == BASELINE_STATUS_RECOVERING else HEALTHY_EMA_ALPHA + + state.template = merge_template( + np.array(state.template, dtype=float), + new_template, + alpha, + ).astype(float).tolist() + + state.lower_template = merge_template( + np.array(state.lower_template, dtype=float), + new_lower_template, + alpha, + ).astype(float).tolist() + + state.upper_template = merge_template( + np.array(state.upper_template, dtype=float), + new_upper_template, + alpha, + ).astype(float).tolist() + + state.period = int(new_period) + state.phase_origin_ts = int(new_origin) + state.status = BASELINE_STATUS_HEALTHY + state.last_update_ts = now_sec + + if tail_seconds > 0 and len(ys_actual) >= tail_seconds: + state.y_min = float(np.min(ys_actual[-tail_seconds:])) + state.y_max = float(np.max(ys_actual[-tail_seconds:])) + else: + state.y_min = float(np.min(ys_actual)) + state.y_max = float(np.max(ys_actual)) + + BASELINE_STATES[key] = state + + logger.info( + "更新健康模板 key=%s strategy=%s period=%ss origin=%s clean=%ss alpha=%.2f", + key, + state.strategy, + state.period, + datetime.fromtimestamp(state.phase_origin_ts).strftime("%H:%M:%S"), + state.clean_seconds, + alpha, + ) + + return ( + state, + False, + outside_ratio, + mean_abs_err, + mean_rel_err, + max_outside_seconds, + max_exceed_ratio, + ) + + +# ============================================================================= +# Prometheus 写入 +# ============================================================================= + +def prom_escape_label_value(value: str) -> str: + return ( + str(value) + .replace("\\", "\\\\") + .replace("\n", "\\n") + .replace('"', '\\"') + ) + + +def labels_to_str(labels: Dict[str, str]) -> str: + if not labels: + return "" + + parts = [] + + for k in sorted(labels.keys()): + parts.append(f'{k}="{prom_escape_label_value(labels[k])}"') + + return "{" + ",".join(parts) + "}" + + +def write_series( + metric_name: str, + labels: Dict[str, str], + ts_list: List[int], + values: List[float], +) -> bool: + if not ts_list or not values or len(ts_list) != len(values): + return False + + label_str = labels_to_str(labels) + lines = [] + + for t, y in zip(ts_list, values): + try: + ts_sec = int(round(float(t))) + val = float(y) + except Exception: + continue + + if not math.isfinite(ts_sec) or not math.isfinite(val): + continue + + lines.append(f"{metric_name}{label_str} {val:.6f} {ts_sec * 1000}") + + if not lines: + return False + + payload = "\n".join(lines) + "\n" + + try: + resp = requests.post( + f"{VM_URL}/api/v1/import/prometheus", + data=payload.encode("utf-8"), + headers={"Content-Type": "text/plain; version=0.0.4; charset=utf-8"}, + timeout=10, + ) + resp.raise_for_status() + return True + + except requests.RequestException as e: + logger.error("写入数据失败 metric=%s: %s", metric_name, e) + return False + + +def write_prediction_bundle( + pred_metric: str, + anomaly_metric: str, + labels: Dict[str, str], + ts_future: List[int], + pred_values: np.ndarray, + lower_values: np.ndarray, + upper_values: np.ndarray, + is_anomaly: bool, + outside_ratio: float, + mean_abs_err: float, + mean_rel_err: float, + max_outside_seconds: int, + max_exceed_ratio: float, + event_ts: int, +) -> bool: + ok1 = write_series( + metric_name=pred_metric, + labels=labels, + ts_list=ts_future, + values=pred_values.astype(float).tolist(), + ) + + ok2 = write_series( + metric_name=f"{pred_metric}_lower", + labels=labels, + ts_list=ts_future, + values=lower_values.astype(float).tolist(), + ) + + ok3 = write_series( + metric_name=f"{pred_metric}_upper", + labels=labels, + ts_list=ts_future, + values=upper_values.astype(float).tolist(), + ) + + anomaly_labels = dict(labels) + anomaly_labels["type"] = "prediction_deviation" + + ok4 = write_series( + metric_name=anomaly_metric, + labels=anomaly_labels, + ts_list=[event_ts], + values=[1.0 if is_anomaly else 0.0], + ) + + ok5 = write_series( + metric_name=f"{anomaly_metric}_outside_ratio", + labels=anomaly_labels, + ts_list=[event_ts], + values=[outside_ratio], + ) + + ok6 = write_series( + metric_name=f"{anomaly_metric}_mean_abs_error", + labels=anomaly_labels, + ts_list=[event_ts], + values=[mean_abs_err], + ) + + ok7 = write_series( + metric_name=f"{anomaly_metric}_mean_rel_error", + labels=anomaly_labels, + ts_list=[event_ts], + values=[mean_rel_err], + ) + + ok8 = write_series( + metric_name=f"{anomaly_metric}_max_consecutive_outside", + labels=anomaly_labels, + ts_list=[event_ts], + values=[float(max_outside_seconds)], + ) + + ok9 = write_series( + metric_name=f"{anomaly_metric}_max_exceed_ratio", + labels=anomaly_labels, + ts_list=[event_ts], + values=[float(max_exceed_ratio)], + ) + + return ok1 and ok2 and ok3 and ok4 and ok5 and ok6 and ok7 and ok8 and ok9 + + +# ============================================================================= +# 标签解析 +# ============================================================================= + +_LABEL_PATTERN = re.compile( + r'\s*([a-zA-Z_][a-zA-Z0-9_]*)\s*=\s*"((?:\\.|[^"])*)"\s*' +) + + +def parse_labels_from_query(query: str) -> Dict[str, str]: + labels = {} + + if "{" not in query or "}" not in query: + return labels + + try: + label_part = query[query.index("{") + 1:query.rindex("}")] + except Exception: + return labels + + for match in _LABEL_PATTERN.finditer(label_part): + key = match.group(1) + value = match.group(2) + + value = ( + value + .replace('\\"', '"') + .replace("\\n", "\n") + .replace("\\\\", "\\") + ) + + labels[key] = value + + return labels + + +def merge_labels(*dicts: Dict[str, str]) -> Dict[str, str]: + result = {} + + for d in dicts: + if d: + result.update(d) + + return result + + +def series_key(metric_name: str, labels: Dict[str, str]) -> str: + return metric_name + labels_to_str(labels) + + +# ============================================================================= +# 状态持久化 +# ============================================================================= + +def load_state() -> None: + global BASELINE_STATES + + if not os.path.exists(STATE_FILE): + return + + try: + with open(STATE_FILE, "r", encoding="utf-8") as f: + raw = json.load(f) + + states = {} + + for key, value in raw.get("baseline_states", {}).items(): + required_fields = { + "period", + "phase_origin_ts", + "template", + "lower_template", + "upper_template", + "strategy", + "status", + "clean_seconds", + "last_update_ts", + "last_seen_ts", + "y_min", + "y_max", + } + + if not required_fields.issubset(set(value.keys())): + continue + + states[key] = BaselineState(**value) + + BASELINE_STATES = states + + logger.info( + "已加载预测状态文件 %s,状态数量=%d", + STATE_FILE, + len(BASELINE_STATES), + ) + + except Exception as e: + logger.warning("加载预测状态文件失败,将重新学习: %s", e) + + +def save_state() -> None: + try: + raw = { + "baseline_states": { + key: asdict(value) + for key, value in BASELINE_STATES.items() + } + } + + tmp_file = STATE_FILE + ".tmp" + + with open(tmp_file, "w", encoding="utf-8") as f: + json.dump(raw, f, ensure_ascii=False, indent=2) + + os.replace(tmp_file, STATE_FILE) + + except Exception as e: + logger.warning("保存预测状态文件失败: %s", e) + + +# ============================================================================= +# 时间轴 +# ============================================================================= + +def build_prediction_timestamps( + key: str, + last_real_ts: int, + now_sec: int, +) -> Optional[List[int]]: + data_lag = now_sec - last_real_ts + + if data_lag > MAX_DATA_LAG_SECONDS: + logger.warning( + "真实数据延迟过大,跳过预测 key=%s data_lag=%ss max=%ss", + key, + data_lag, + MAX_DATA_LAG_SECONDS, + ) + return None + + last_written_real_ts = LAST_REAL_TS_WRITTEN.get(key) + + if last_written_real_ts is not None and last_real_ts <= int(last_written_real_ts): + logger.info( + "真实数据时间戳未推进,跳过重复写入 key=%s last_real_ts=%s last_written_real_ts=%s", + key, + last_real_ts, + last_written_real_ts, + ) + return None + + base_ts = last_real_ts + + return [ + base_ts + i + 1 + for i in range(WRITE_HORIZON_SECONDS) + ] + + +# ============================================================================= +# 主流程 +# ============================================================================= + +def run_once() -> None: + now_str = datetime.now().strftime("%H:%M:%S") + + for target in PREDICT_TARGETS: + query = target["query"] + pred_metric = target["pred_metric"] + anomaly_metric = target["anomaly_metric"] + + ts, ys = fetch_history(query) + + if len(ys) < MIN_POINTS: + logger.info("[%s] %s 数据不足(%d 点),跳过", now_str, query, len(ys)) + continue + + ts_grid, ys_grid_raw = normalize_history(ts, ys) + + if len(ys_grid_raw) < MIN_POINTS: + logger.info("[%s] %s 清洗后数据不足(%d 点),跳过", now_str, query, len(ys_grid_raw)) + continue + + ys_grid_model = preprocess_values(ys_grid_raw, target) + + base_labels = parse_labels_from_query(query) + write_labels = merge_labels(base_labels, EXTRA_PREDICT_LABELS) + + key = series_key(pred_metric, write_labels) + + ( + state, + is_anomaly, + outside_ratio, + mean_abs_err, + mean_rel_err, + max_outside_seconds, + max_exceed_ratio, + ) = maybe_update_state( + key=key, + ts_grid=ts_grid, + ys_model=ys_grid_model, + ys_actual=ys_grid_raw, + target=target, + ) + + if state is None: + logger.info("[%s] %s 暂无可用健康模板,等待学习", now_str, query) + continue + + now_sec = int(time.time()) + last_real_ts = int(ts_grid[-1]) + data_lag = now_sec - last_real_ts + + ts_future = build_prediction_timestamps( + key=key, + last_real_ts=last_real_ts, + now_sec=now_sec, + ) + + if not ts_future: + continue + + pred_values, lower_raw, upper_raw = predict_state_bundle(state, ts_future) + + lower_values, upper_values = calc_final_bounds( + state=state, + pred=pred_values, + lower_raw=lower_raw, + upper_raw=upper_raw, + target=target, + ) + + ok = write_prediction_bundle( + pred_metric=pred_metric, + anomaly_metric=anomaly_metric, + labels=write_labels, + ts_future=ts_future, + pred_values=pred_values, + lower_values=lower_values, + upper_values=upper_values, + is_anomaly=is_anomaly, + outside_ratio=outside_ratio, + mean_abs_err=mean_abs_err, + mean_rel_err=mean_rel_err, + max_outside_seconds=max_outside_seconds, + max_exceed_ratio=max_exceed_ratio, + event_ts=last_real_ts, + ) + + if not ok: + logger.error("[%s] %s 写入预测数据失败", now_str, query) + continue + + LAST_REAL_TS_WRITTEN[key] = last_real_ts + + future_start = datetime.fromtimestamp(ts_future[0]).strftime("%H:%M:%S") + future_end = datetime.fromtimestamp(ts_future[-1]).strftime("%H:%M:%S") + last_real_str = datetime.fromtimestamp(last_real_ts).strftime("%H:%M:%S") + origin_str = datetime.fromtimestamp(state.phase_origin_ts).strftime("%H:%M:%S") + + logger.info( + "[%s] %-40s → %-35s strategy=%s status=%s anomaly=%s outside=%.2f max_outside=%ss max_exceed=%.2f period=%ss origin=%s last_real=%s lag=%ss 写入 %d 点,预测区间 %s ~ %s", + now_str, + query, + pred_metric, + state.strategy, + state.status, + is_anomaly, + outside_ratio, + max_outside_seconds, + max_exceed_ratio, + state.period, + origin_str, + last_real_str, + data_lag, + len(ts_future), + future_start, + future_end, + ) + + save_state() + + +def main() -> None: + load_state() + + logger.info( + "预测服务启动 VM=%s 历史窗口=%dmin 理论预测窗口=%ds 实际写入窗口=%ds 轮询间隔=%ds state=%s forecast=%s", + VM_URL, + HISTORY_MINUTES, + HORIZON_SECONDS, + WRITE_HORIZON_SECONDS, + POLL_INTERVAL, + STATE_FILE, + EXTRA_PREDICT_LABELS["forecast"], + ) + + while True: + run_once() + time.sleep(POLL_INTERVAL) + + +if __name__ == "__main__": + main() \ No newline at end of file From f9b6506452a75ffd1f1f8beee1ebd1008fdc724d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=BC=A0=E5=B0=91=E6=99=BA?= Date: Mon, 25 May 2026 13:46:29 +0800 Subject: [PATCH 30/55] feat(protoforge): fault update --- protoforge/core/demo.py | 2 +- protoforge/core/fault.py | 187 +++++++++++++++++++++++++++++++++++++ protoforge/models/fault.py | 2 + web/src/views/Devices.vue | 117 ++++++++++++++++++++--- 4 files changed, 296 insertions(+), 12 deletions(-) diff --git a/protoforge/core/demo.py b/protoforge/core/demo.py index ff0b333..b7ccae7 100644 --- a/protoforge/core/demo.py +++ b/protoforge/core/demo.py @@ -108,7 +108,7 @@ async def seed_demo_data(engine: Any, template_manager: Any) -> None: "points": [ {"name": "weight", "address": "net_weight", "data_type": "float32", "generator_type": "random", "min_value": 0.5, "max_value": 50.0}, {"name": "tare", "address": "tare_weight", "data_type": "float32", "generator_type": "fixed", "fixed_value": 2.5}, - {"name": "stable", "address": "stable_flag", "data_type": "bool", "generator_type": "fixed", "fixed_value": true}, + {"name": "stable", "address": "stable_flag", "data_type": "bool", "generator_type": "fixed", "fixed_value": True}, ], }, ] diff --git a/protoforge/core/fault.py b/protoforge/core/fault.py index e72842d..5beba87 100644 --- a/protoforge/core/fault.py +++ b/protoforge/core/fault.py @@ -41,6 +41,7 @@ name="刀具磨损", description="刀具切削刃磨损,切削阻力增大,主轴电流升高,振动增大,进给速率下降", category="mechanical", + scenario_type="trend_drift", default_duration=300.0, tags=["刀具", "磨损", "渐进"], point_faults=[ @@ -67,6 +68,7 @@ name="刀具崩刃", description="刀具突发性崩刃,振动剧烈突增,主轴电流峰值,进给停止", category="mechanical", + scenario_type="sudden_spike", default_duration=15.0, tags=["刀具", "崩刃", "突发"], point_faults=[ @@ -93,6 +95,7 @@ name="主轴过热", description="主轴长时间高负荷运转或冷却不足,电流持续偏高,转速因热保护下降", category="thermal", + scenario_type="trend_drift", default_duration=240.0, tags=["主轴", "过热", "渐进"], point_faults=[ @@ -117,6 +120,7 @@ name="主轴轴承故障", description="主轴轴承磨损或润滑不足,振动幅度持续升高,伴随电流轻微上升", category="mechanical", + scenario_type="trend_drift", default_duration=360.0, tags=["主轴", "轴承", "渐进"], point_faults=[ @@ -141,6 +145,7 @@ name="进给堵转", description="进给轴卡死,进给速率降为零,主轴电流急剧升高", category="process", + scenario_type="sudden_spike", default_duration=20.0, tags=["进给", "堵转", "突发"], point_faults=[ @@ -163,6 +168,7 @@ name="振动异常", description="工件装夹松动或切削共振,三轴振动突然大幅增加", category="mechanical", + scenario_type="sudden_spike", default_duration=60.0, tags=["振动", "装夹", "突发"], point_faults=[ @@ -185,6 +191,7 @@ name="切削液不足", description="切削液供给不足,冷却润滑失效,热量积累导致振动和电流缓慢升高", category="process", + scenario_type="trend_drift", default_duration=480.0, tags=["切削液", "冷却", "渐进"], point_faults=[ @@ -211,6 +218,7 @@ name="电源波动", description="供电电压不稳定,主轴转速和进给速率出现随机波动", category="electrical", + scenario_type="high_noise", default_duration=90.0, tags=["电源", "波动", "突发"], point_faults=[ @@ -222,6 +230,176 @@ multiplier=1.0, noise_scale=150.0), ], ), + + # ================================================================== + # 以下为新增故障类型 + # ================================================================== + + # ------------------------------------------------------------------ + # 传感器强干扰 — 高噪声波动型 + # 场景:电磁干扰、接地不良、信号线屏蔽失效等导致传感器读数剧烈抖动 + # 特征:均值基本不变,但噪声幅度突然增大数倍,信号看起来"毛刺"严重 + # 区别于真实故障:设备本身没有坏,只是采集信号质量变差 + # 模式:瞬间注入,持续期间每次采样都叠加大幅随机噪声 + # ------------------------------------------------------------------ + FaultTypeDefinition( + id="sensor_noise", + name="传感器强干扰", + description=( + "【高噪声波动型】电磁干扰或接地不良导致传感器信号质量恶化。" + "均值基本不变,但每次采样叠加大幅随机噪声,曲线呈现密集毛刺。" + "典型场景:变频器附近的传感器、信号线屏蔽层破损、接地回路故障。" + ), + category="electrical", + scenario_type="high_noise", + default_duration=120.0, + tags=["传感器", "干扰", "噪声", "高噪声波动型"], + point_faults=[ + PointFaultConfig(point="spindle_current", mode=FaultMode.INSTANT, + multiplier=1.0, noise_scale=8.0), + PointFaultConfig(point="vibration_x", mode=FaultMode.INSTANT, + multiplier=1.0, noise_scale=2.5), + PointFaultConfig(point="vibration_y", mode=FaultMode.INSTANT, + multiplier=1.0, noise_scale=2.5), + PointFaultConfig(point="vibration_z", mode=FaultMode.INSTANT, + multiplier=1.0, noise_scale=3.0), + PointFaultConfig(point="feed_rate", mode=FaultMode.INSTANT, + multiplier=1.0, noise_scale=80.0), + ], + ), + + # ------------------------------------------------------------------ + # 换工件/换程序段 — 工况切换型(高速加工 → 低速精加工) + # 场景:CNC 机床切换加工程序,从粗加工切换到精加工 + # 特征:转速降低、进给降低、电流降低,所有指标跳到新的正常范围并稳定 + # 关键:这不是故障!数据本身没有坏,只是工况变了,正常范围完全不同 + # 模式:STEP 阶跃,立即跳到新基线并在整个 duration 内保持 + # ------------------------------------------------------------------ + FaultTypeDefinition( + id="mode_switch_fine_machining", + name="切换精加工工况", + description=( + "【工况切换型】从粗加工切换到精加工程序段。" + "主轴转速升高、进给速率降低、切削电流降低,各指标立即跳到新的正常范围并保持稳定。" + "数据本身没有异常,但与粗加工基线相比会触发阈值告警。" + "典型场景:换刀后进入精加工、加工不同特征面、程序跳段。" + ), + category="process", + scenario_type="mode_switch", + default_duration=300.0, + tags=["工况切换", "精加工", "程序段", "工况切换型"], + point_faults=[ + PointFaultConfig(point="spindle_speed", mode=FaultMode.STEP, + multiplier=1.4, noise_scale=30.0), + PointFaultConfig(point="feed_rate", mode=FaultMode.STEP, + multiplier=0.3, noise_scale=10.0), + PointFaultConfig(point="spindle_current", mode=FaultMode.STEP, + multiplier=0.55, noise_scale=0.5), + PointFaultConfig(point="vibration_x", mode=FaultMode.STEP, + multiplier=0.6, noise_scale=0.1), + PointFaultConfig(point="vibration_y", mode=FaultMode.STEP, + multiplier=0.6, noise_scale=0.1), + PointFaultConfig(point="vibration_z", mode=FaultMode.STEP, + multiplier=0.6, noise_scale=0.1), + ], + ), + + # ------------------------------------------------------------------ + # 进入空载工况 — 工况切换型(加工中 → 空载运行) + # 场景:加工完成、等待上料、程序暂停,主轴空转 + # 特征:进给降为 0,电流大幅下降到空载水平,转速维持,振动降低 + # 模式:STEP 阶跃,立即切换到空载基线 + # ------------------------------------------------------------------ + FaultTypeDefinition( + id="mode_switch_idle", + name="切换空载工况", + description=( + "【工况切换型】机床进入空载运行状态(加工完成等待上料、程序暂停)。" + "进给速率降为零,主轴电流降至空载水平(约为加工时的 20-30%)," + "主轴转速维持,振动明显降低。" + "典型场景:换料等待、程序暂停、加工间隙、换刀等待。" + ), + category="process", + scenario_type="mode_switch", + default_duration=180.0, + tags=["工况切换", "空载", "等待", "工况切换型"], + point_faults=[ + PointFaultConfig(point="feed_rate", mode=FaultMode.STEP, + target_value=0.0, noise_scale=2.0), + PointFaultConfig(point="spindle_current", mode=FaultMode.STEP, + multiplier=0.22, noise_scale=0.3), + PointFaultConfig(point="vibration_x", mode=FaultMode.STEP, + multiplier=0.25, noise_scale=0.05), + PointFaultConfig(point="vibration_y", mode=FaultMode.STEP, + multiplier=0.25, noise_scale=0.05), + PointFaultConfig(point="vibration_z", mode=FaultMode.STEP, + multiplier=0.25, noise_scale=0.05), + ], + ), + + # ------------------------------------------------------------------ + # 突发电流尖峰 — 突发脉冲型 + # 场景:切削过程中遇到硬质夹杂物、刀具切入角突变、工件材质不均 + # 特征:主轴电流瞬间冲高(持续 2-5 秒),然后恢复正常,其他指标基本不变 + # 区别于刀具崩刃:电流尖峰后能自动恢复,不会导致停机 + # 模式:瞬间注入,持续时间极短 + # ------------------------------------------------------------------ + FaultTypeDefinition( + id="current_spike", + name="突发电流尖峰", + description=( + "【突发脉冲型】切削过程中遇到硬质夹杂物或材质不均,主轴电流瞬间冲高后自动恢复。" + "电流短暂升至正常值的 3-4 倍,持续仅数秒,振动轻微抖动,进给基本不受影响。" + "典型场景:铸件内部硬质点、焊缝区域、材料硬度不均匀。" + "与刀具崩刃的区别:能自动恢复,不触发停机报警。" + ), + category="mechanical", + scenario_type="sudden_spike", + default_duration=5.0, + tags=["电流", "尖峰", "脉冲", "突发脉冲型"], + point_faults=[ + PointFaultConfig(point="spindle_current", mode=FaultMode.INSTANT, + multiplier=3.5, noise_scale=1.5), + PointFaultConfig(point="vibration_x", mode=FaultMode.INSTANT, + multiplier=2.0, noise_scale=0.5), + PointFaultConfig(point="vibration_y", mode=FaultMode.INSTANT, + multiplier=2.0, noise_scale=0.5), + PointFaultConfig(point="vibration_z", mode=FaultMode.INSTANT, + multiplier=2.5, noise_scale=0.8), + ], + ), + + # ------------------------------------------------------------------ + # 主轴负载异常 — 关系约束型 + # 场景:刀具钝化但未完全磨损、切削参数不匹配、工件材料变硬 + # 特征:主轴转速正常、进给速率正常,但主轴电流异常升高 + # 关键:单看任何一个指标都"正常",只有多指标关系才能发现异常 + # 模式:渐进式,电流缓慢爬升,转速和进给保持不变 + # ------------------------------------------------------------------ + FaultTypeDefinition( + id="spindle_load_anomaly", + name="主轴负载异常", + description=( + "【关系约束型】主轴转速正常、进给速率正常,但主轴电流异常升高。" + "单看任何一个指标都在正常范围内,只有分析多指标关系才能发现异常。" + "物理含义:切削阻力增大(刀具钝化初期、材料变硬)," + "系统尚未触发保护降速,但电流已超出正常切削功率范围。" + "典型场景:刀具轻度钝化、切削液浓度不足、工件材料批次差异。" + ), + category="mechanical", + scenario_type="relation_constraint", + default_duration=240.0, + tags=["主轴", "负载", "关系约束", "关系约束型"], + point_faults=[ + PointFaultConfig(point="spindle_current", mode=FaultMode.GRADUAL, + multiplier=2.8, noise_scale=1.0), + # 转速和进给保持不变(multiplier=1.0),只叠加极小噪声维持真实感 + PointFaultConfig(point="spindle_speed", mode=FaultMode.INSTANT, + multiplier=1.0, noise_scale=15.0), + PointFaultConfig(point="feed_rate", mode=FaultMode.INSTANT, + multiplier=1.0, noise_scale=5.0), + ], + ), ] # 按 id 索引 @@ -374,6 +552,15 @@ def _compute_value( target = baseline * (1.0 + (pf.multiplier - 1.0) * intensity) else: target = baseline + elif pf.mode == FaultMode.STEP: + # 阶跃模式:立即跳到新基线并在整个 duration 内保持(工况切换专用) + # 与 INSTANT 的区别:STEP 的 multiplier 表示新工况的正常倍数,不受 intensity 缩放 + if pf.target_value is not None: + target = pf.target_value + elif pf.multiplier is not None: + target = baseline * pf.multiplier + else: + target = baseline else: # 渐进模式:随 progress 线性劣化 if pf.target_value is not None: diff --git a/protoforge/models/fault.py b/protoforge/models/fault.py index cc038e0..025da96 100644 --- a/protoforge/models/fault.py +++ b/protoforge/models/fault.py @@ -8,6 +8,7 @@ class FaultMode(str, Enum): """故障注入模式""" INSTANT = "instant" # 瞬间跳变到异常值,持续 duration 后恢复 GRADUAL = "gradual" # 渐进式劣化,随时间线性恶化,到 duration 时达到峰值后恢复 + STEP = "step" # 阶跃切换到新工况基线,整个 duration 内保持新基线(工况切换专用) class FaultStatus(str, Enum): @@ -36,6 +37,7 @@ class FaultTypeDefinition(BaseModel): name: str description: str category: str # 故障分类:mechanical / electrical / thermal / process + scenario_type: str = "trend_drift" # 异常场景类型:trend_drift / sudden_spike / high_noise / mode_switch / relation_constraint default_duration: float = 120.0 # 默认持续时间(秒) point_faults: list[PointFaultConfig] = Field(default_factory=list) tags: list[str] = Field(default_factory=list) diff --git a/web/src/views/Devices.vue b/web/src/views/Devices.vue index 65e0535..36141a4 100644 --- a/web/src/views/Devices.vue +++ b/web/src/views/Devices.vue @@ -92,24 +92,55 @@ - + - 设备:{{ faultTargetDevice?.name }} + 目标设备:{{ faultTargetDevice?.name }} + - -
{{ selectedFaultType.name }} · {{ faultCategoryLabel(selectedFaultType.category) }}
-
{{ selectedFaultType.description }}
-
- 影响测点:{{ selectedFaultType.point_faults.map(p => p.point).join('、') }} + + +
+ + + {{ selectedFaultType.name }} + + {{ scenarioTypeLabel(selectedFaultType.scenario_type) }} + + + {{ faultCategoryLabel(selectedFaultType.category) }} + + + + + {{ selectedFaultType.description }} + + +
+ 影响测点: + + + {{ pf.point }} + + {{ pointFaultModeLabel(pf) }} + + +
- +
+ + {{ faultIntensityLabel }}({{ faultIntensity }}) + + · 工况切换型强度不影响切换幅度 + @@ -243,12 +278,17 @@ const columns = [ }, { title: '测点', key: 'points', width: 70, render: (row) => (row.points || []).length }, { - title: '故障', key: 'fault', width: 90, + title: '故障', key: 'fault', width: 130, render: (row) => { const fault = activeFaults.value[row.id] if (!fault || fault.status === 'none') return h(NTag, { size: 'tiny', bordered: false }, () => '正常') const pct = Math.round((fault.progress || 0) * 100) - return h(NTag, { size: 'tiny', type: 'error', bordered: false }, () => `${fault.fault_type_name} ${pct}%`) + const ft = faultTypes.value.find(t => t.id === fault.fault_type_id) + const scenarioLabel = ft ? scenarioTypeLabel(ft.scenario_type) : '' + return h(NSpace, { size: 2, vertical: false, align: 'center' }, () => [ + h(NTag, { size: 'tiny', type: 'error', bordered: false }, () => `${fault.fault_type_name} ${pct}%`), + scenarioLabel ? h(NTag, { size: 'tiny', bordered: false, style: 'font-size:10px;background:#2d1b1b;color:#f87171' }, () => scenarioLabel) : null, + ]) } }, { @@ -351,6 +391,25 @@ const faultTypeOptions = computed(() => faultTypes.value.map(t => ({ label: `${t.name}(${faultCategoryLabel(t.category)})`, value: t.id })) ) +// 按场景类型分组的故障选项 +const SCENARIO_ORDER = ['trend_drift', 'sudden_spike', 'high_noise', 'mode_switch', 'relation_constraint'] +const faultTypeGroupedOptions = computed(() => { + const groups = {} + for (const t of faultTypes.value) { + const st = t.scenario_type || 'trend_drift' + if (!groups[st]) groups[st] = [] + groups[st].push({ label: t.name, value: t.id }) + } + return SCENARIO_ORDER + .filter(st => groups[st]) + .map(st => ({ + type: 'group', + label: scenarioTypeLabel(st), + key: st, + children: groups[st], + })) +}) + const selectedFaultType = computed(() => faultTypes.value.find(t => t.id === faultTypeId.value) || null ) @@ -368,6 +427,42 @@ function faultCategoryLabel(category) { return map[category] || category } +function scenarioTypeLabel(scenarioType) { + const map = { + trend_drift: '趋势漂移型', + sudden_spike: '突发脉冲型', + high_noise: '高噪声波动型', + mode_switch: '工况切换型', + relation_constraint: '关系约束型', + } + return map[scenarioType] || scenarioType +} + +function scenarioTagType(scenarioType) { + const map = { + trend_drift: 'warning', + sudden_spike: 'error', + high_noise: 'info', + mode_switch: 'success', + relation_constraint: 'default', + } + return map[scenarioType] || 'default' +} + +function pointFaultModeLabel(pf) { + if (pf.mode === 'step') return '→ 阶跃' + if (pf.mode === 'gradual') { + if (pf.multiplier != null) return `→ ×${pf.multiplier}` + if (pf.target_value != null) return `→ ${pf.target_value}` + } + if (pf.mode === 'instant') { + if (pf.target_value != null) return `→ ${pf.target_value}` + if (pf.multiplier != null && pf.multiplier !== 1.0) return `→ ×${pf.multiplier}` + return '± 噪声' + } + return '' +} + function onFaultTypeChange(val) { const t = faultTypes.value.find(f => f.id === val) if (t && t.default_duration) faultDuration.value = t.default_duration From 685ae6b79c28cf8094e20fc6f9f7c00a32d0fe4a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=BC=A0=E5=B0=91=E6=99=BA?= Date: Mon, 25 May 2026 14:00:07 +0800 Subject: [PATCH 31/55] Revert "feat(protoforge): fault update" This reverts commit f9b6506452a75ffd1f1f8beee1ebd1008fdc724d. --- protoforge/core/demo.py | 2 +- protoforge/core/fault.py | 187 ------------------------------------- protoforge/models/fault.py | 2 - web/src/views/Devices.vue | 117 +++-------------------- 4 files changed, 12 insertions(+), 296 deletions(-) diff --git a/protoforge/core/demo.py b/protoforge/core/demo.py index b7ccae7..ff0b333 100644 --- a/protoforge/core/demo.py +++ b/protoforge/core/demo.py @@ -108,7 +108,7 @@ async def seed_demo_data(engine: Any, template_manager: Any) -> None: "points": [ {"name": "weight", "address": "net_weight", "data_type": "float32", "generator_type": "random", "min_value": 0.5, "max_value": 50.0}, {"name": "tare", "address": "tare_weight", "data_type": "float32", "generator_type": "fixed", "fixed_value": 2.5}, - {"name": "stable", "address": "stable_flag", "data_type": "bool", "generator_type": "fixed", "fixed_value": True}, + {"name": "stable", "address": "stable_flag", "data_type": "bool", "generator_type": "fixed", "fixed_value": true}, ], }, ] diff --git a/protoforge/core/fault.py b/protoforge/core/fault.py index 5beba87..e72842d 100644 --- a/protoforge/core/fault.py +++ b/protoforge/core/fault.py @@ -41,7 +41,6 @@ name="刀具磨损", description="刀具切削刃磨损,切削阻力增大,主轴电流升高,振动增大,进给速率下降", category="mechanical", - scenario_type="trend_drift", default_duration=300.0, tags=["刀具", "磨损", "渐进"], point_faults=[ @@ -68,7 +67,6 @@ name="刀具崩刃", description="刀具突发性崩刃,振动剧烈突增,主轴电流峰值,进给停止", category="mechanical", - scenario_type="sudden_spike", default_duration=15.0, tags=["刀具", "崩刃", "突发"], point_faults=[ @@ -95,7 +93,6 @@ name="主轴过热", description="主轴长时间高负荷运转或冷却不足,电流持续偏高,转速因热保护下降", category="thermal", - scenario_type="trend_drift", default_duration=240.0, tags=["主轴", "过热", "渐进"], point_faults=[ @@ -120,7 +117,6 @@ name="主轴轴承故障", description="主轴轴承磨损或润滑不足,振动幅度持续升高,伴随电流轻微上升", category="mechanical", - scenario_type="trend_drift", default_duration=360.0, tags=["主轴", "轴承", "渐进"], point_faults=[ @@ -145,7 +141,6 @@ name="进给堵转", description="进给轴卡死,进给速率降为零,主轴电流急剧升高", category="process", - scenario_type="sudden_spike", default_duration=20.0, tags=["进给", "堵转", "突发"], point_faults=[ @@ -168,7 +163,6 @@ name="振动异常", description="工件装夹松动或切削共振,三轴振动突然大幅增加", category="mechanical", - scenario_type="sudden_spike", default_duration=60.0, tags=["振动", "装夹", "突发"], point_faults=[ @@ -191,7 +185,6 @@ name="切削液不足", description="切削液供给不足,冷却润滑失效,热量积累导致振动和电流缓慢升高", category="process", - scenario_type="trend_drift", default_duration=480.0, tags=["切削液", "冷却", "渐进"], point_faults=[ @@ -218,7 +211,6 @@ name="电源波动", description="供电电压不稳定,主轴转速和进给速率出现随机波动", category="electrical", - scenario_type="high_noise", default_duration=90.0, tags=["电源", "波动", "突发"], point_faults=[ @@ -230,176 +222,6 @@ multiplier=1.0, noise_scale=150.0), ], ), - - # ================================================================== - # 以下为新增故障类型 - # ================================================================== - - # ------------------------------------------------------------------ - # 传感器强干扰 — 高噪声波动型 - # 场景:电磁干扰、接地不良、信号线屏蔽失效等导致传感器读数剧烈抖动 - # 特征:均值基本不变,但噪声幅度突然增大数倍,信号看起来"毛刺"严重 - # 区别于真实故障:设备本身没有坏,只是采集信号质量变差 - # 模式:瞬间注入,持续期间每次采样都叠加大幅随机噪声 - # ------------------------------------------------------------------ - FaultTypeDefinition( - id="sensor_noise", - name="传感器强干扰", - description=( - "【高噪声波动型】电磁干扰或接地不良导致传感器信号质量恶化。" - "均值基本不变,但每次采样叠加大幅随机噪声,曲线呈现密集毛刺。" - "典型场景:变频器附近的传感器、信号线屏蔽层破损、接地回路故障。" - ), - category="electrical", - scenario_type="high_noise", - default_duration=120.0, - tags=["传感器", "干扰", "噪声", "高噪声波动型"], - point_faults=[ - PointFaultConfig(point="spindle_current", mode=FaultMode.INSTANT, - multiplier=1.0, noise_scale=8.0), - PointFaultConfig(point="vibration_x", mode=FaultMode.INSTANT, - multiplier=1.0, noise_scale=2.5), - PointFaultConfig(point="vibration_y", mode=FaultMode.INSTANT, - multiplier=1.0, noise_scale=2.5), - PointFaultConfig(point="vibration_z", mode=FaultMode.INSTANT, - multiplier=1.0, noise_scale=3.0), - PointFaultConfig(point="feed_rate", mode=FaultMode.INSTANT, - multiplier=1.0, noise_scale=80.0), - ], - ), - - # ------------------------------------------------------------------ - # 换工件/换程序段 — 工况切换型(高速加工 → 低速精加工) - # 场景:CNC 机床切换加工程序,从粗加工切换到精加工 - # 特征:转速降低、进给降低、电流降低,所有指标跳到新的正常范围并稳定 - # 关键:这不是故障!数据本身没有坏,只是工况变了,正常范围完全不同 - # 模式:STEP 阶跃,立即跳到新基线并在整个 duration 内保持 - # ------------------------------------------------------------------ - FaultTypeDefinition( - id="mode_switch_fine_machining", - name="切换精加工工况", - description=( - "【工况切换型】从粗加工切换到精加工程序段。" - "主轴转速升高、进给速率降低、切削电流降低,各指标立即跳到新的正常范围并保持稳定。" - "数据本身没有异常,但与粗加工基线相比会触发阈值告警。" - "典型场景:换刀后进入精加工、加工不同特征面、程序跳段。" - ), - category="process", - scenario_type="mode_switch", - default_duration=300.0, - tags=["工况切换", "精加工", "程序段", "工况切换型"], - point_faults=[ - PointFaultConfig(point="spindle_speed", mode=FaultMode.STEP, - multiplier=1.4, noise_scale=30.0), - PointFaultConfig(point="feed_rate", mode=FaultMode.STEP, - multiplier=0.3, noise_scale=10.0), - PointFaultConfig(point="spindle_current", mode=FaultMode.STEP, - multiplier=0.55, noise_scale=0.5), - PointFaultConfig(point="vibration_x", mode=FaultMode.STEP, - multiplier=0.6, noise_scale=0.1), - PointFaultConfig(point="vibration_y", mode=FaultMode.STEP, - multiplier=0.6, noise_scale=0.1), - PointFaultConfig(point="vibration_z", mode=FaultMode.STEP, - multiplier=0.6, noise_scale=0.1), - ], - ), - - # ------------------------------------------------------------------ - # 进入空载工况 — 工况切换型(加工中 → 空载运行) - # 场景:加工完成、等待上料、程序暂停,主轴空转 - # 特征:进给降为 0,电流大幅下降到空载水平,转速维持,振动降低 - # 模式:STEP 阶跃,立即切换到空载基线 - # ------------------------------------------------------------------ - FaultTypeDefinition( - id="mode_switch_idle", - name="切换空载工况", - description=( - "【工况切换型】机床进入空载运行状态(加工完成等待上料、程序暂停)。" - "进给速率降为零,主轴电流降至空载水平(约为加工时的 20-30%)," - "主轴转速维持,振动明显降低。" - "典型场景:换料等待、程序暂停、加工间隙、换刀等待。" - ), - category="process", - scenario_type="mode_switch", - default_duration=180.0, - tags=["工况切换", "空载", "等待", "工况切换型"], - point_faults=[ - PointFaultConfig(point="feed_rate", mode=FaultMode.STEP, - target_value=0.0, noise_scale=2.0), - PointFaultConfig(point="spindle_current", mode=FaultMode.STEP, - multiplier=0.22, noise_scale=0.3), - PointFaultConfig(point="vibration_x", mode=FaultMode.STEP, - multiplier=0.25, noise_scale=0.05), - PointFaultConfig(point="vibration_y", mode=FaultMode.STEP, - multiplier=0.25, noise_scale=0.05), - PointFaultConfig(point="vibration_z", mode=FaultMode.STEP, - multiplier=0.25, noise_scale=0.05), - ], - ), - - # ------------------------------------------------------------------ - # 突发电流尖峰 — 突发脉冲型 - # 场景:切削过程中遇到硬质夹杂物、刀具切入角突变、工件材质不均 - # 特征:主轴电流瞬间冲高(持续 2-5 秒),然后恢复正常,其他指标基本不变 - # 区别于刀具崩刃:电流尖峰后能自动恢复,不会导致停机 - # 模式:瞬间注入,持续时间极短 - # ------------------------------------------------------------------ - FaultTypeDefinition( - id="current_spike", - name="突发电流尖峰", - description=( - "【突发脉冲型】切削过程中遇到硬质夹杂物或材质不均,主轴电流瞬间冲高后自动恢复。" - "电流短暂升至正常值的 3-4 倍,持续仅数秒,振动轻微抖动,进给基本不受影响。" - "典型场景:铸件内部硬质点、焊缝区域、材料硬度不均匀。" - "与刀具崩刃的区别:能自动恢复,不触发停机报警。" - ), - category="mechanical", - scenario_type="sudden_spike", - default_duration=5.0, - tags=["电流", "尖峰", "脉冲", "突发脉冲型"], - point_faults=[ - PointFaultConfig(point="spindle_current", mode=FaultMode.INSTANT, - multiplier=3.5, noise_scale=1.5), - PointFaultConfig(point="vibration_x", mode=FaultMode.INSTANT, - multiplier=2.0, noise_scale=0.5), - PointFaultConfig(point="vibration_y", mode=FaultMode.INSTANT, - multiplier=2.0, noise_scale=0.5), - PointFaultConfig(point="vibration_z", mode=FaultMode.INSTANT, - multiplier=2.5, noise_scale=0.8), - ], - ), - - # ------------------------------------------------------------------ - # 主轴负载异常 — 关系约束型 - # 场景:刀具钝化但未完全磨损、切削参数不匹配、工件材料变硬 - # 特征:主轴转速正常、进给速率正常,但主轴电流异常升高 - # 关键:单看任何一个指标都"正常",只有多指标关系才能发现异常 - # 模式:渐进式,电流缓慢爬升,转速和进给保持不变 - # ------------------------------------------------------------------ - FaultTypeDefinition( - id="spindle_load_anomaly", - name="主轴负载异常", - description=( - "【关系约束型】主轴转速正常、进给速率正常,但主轴电流异常升高。" - "单看任何一个指标都在正常范围内,只有分析多指标关系才能发现异常。" - "物理含义:切削阻力增大(刀具钝化初期、材料变硬)," - "系统尚未触发保护降速,但电流已超出正常切削功率范围。" - "典型场景:刀具轻度钝化、切削液浓度不足、工件材料批次差异。" - ), - category="mechanical", - scenario_type="relation_constraint", - default_duration=240.0, - tags=["主轴", "负载", "关系约束", "关系约束型"], - point_faults=[ - PointFaultConfig(point="spindle_current", mode=FaultMode.GRADUAL, - multiplier=2.8, noise_scale=1.0), - # 转速和进给保持不变(multiplier=1.0),只叠加极小噪声维持真实感 - PointFaultConfig(point="spindle_speed", mode=FaultMode.INSTANT, - multiplier=1.0, noise_scale=15.0), - PointFaultConfig(point="feed_rate", mode=FaultMode.INSTANT, - multiplier=1.0, noise_scale=5.0), - ], - ), ] # 按 id 索引 @@ -552,15 +374,6 @@ def _compute_value( target = baseline * (1.0 + (pf.multiplier - 1.0) * intensity) else: target = baseline - elif pf.mode == FaultMode.STEP: - # 阶跃模式:立即跳到新基线并在整个 duration 内保持(工况切换专用) - # 与 INSTANT 的区别:STEP 的 multiplier 表示新工况的正常倍数,不受 intensity 缩放 - if pf.target_value is not None: - target = pf.target_value - elif pf.multiplier is not None: - target = baseline * pf.multiplier - else: - target = baseline else: # 渐进模式:随 progress 线性劣化 if pf.target_value is not None: diff --git a/protoforge/models/fault.py b/protoforge/models/fault.py index 025da96..cc038e0 100644 --- a/protoforge/models/fault.py +++ b/protoforge/models/fault.py @@ -8,7 +8,6 @@ class FaultMode(str, Enum): """故障注入模式""" INSTANT = "instant" # 瞬间跳变到异常值,持续 duration 后恢复 GRADUAL = "gradual" # 渐进式劣化,随时间线性恶化,到 duration 时达到峰值后恢复 - STEP = "step" # 阶跃切换到新工况基线,整个 duration 内保持新基线(工况切换专用) class FaultStatus(str, Enum): @@ -37,7 +36,6 @@ class FaultTypeDefinition(BaseModel): name: str description: str category: str # 故障分类:mechanical / electrical / thermal / process - scenario_type: str = "trend_drift" # 异常场景类型:trend_drift / sudden_spike / high_noise / mode_switch / relation_constraint default_duration: float = 120.0 # 默认持续时间(秒) point_faults: list[PointFaultConfig] = Field(default_factory=list) tags: list[str] = Field(default_factory=list) diff --git a/web/src/views/Devices.vue b/web/src/views/Devices.vue index 36141a4..65e0535 100644 --- a/web/src/views/Devices.vue +++ b/web/src/views/Devices.vue @@ -92,55 +92,24 @@ - + - 目标设备:{{ faultTargetDevice?.name }} - + 设备:{{ faultTargetDevice?.name }} - - -
- - - {{ selectedFaultType.name }} - - {{ scenarioTypeLabel(selectedFaultType.scenario_type) }} - - - {{ faultCategoryLabel(selectedFaultType.category) }} - - - - - {{ selectedFaultType.description }} - - -
- 影响测点: - - - {{ pf.point }} - - {{ pointFaultModeLabel(pf) }} - - - + +
{{ selectedFaultType.name }} · {{ faultCategoryLabel(selectedFaultType.category) }}
+
{{ selectedFaultType.description }}
+
+ 影响测点:{{ selectedFaultType.point_faults.map(p => p.point).join('、') }}
-
- + - {{ faultIntensityLabel }}({{ faultIntensity }}) - - · 工况切换型强度不影响切换幅度 - @@ -278,17 +243,12 @@ const columns = [ }, { title: '测点', key: 'points', width: 70, render: (row) => (row.points || []).length }, { - title: '故障', key: 'fault', width: 130, + title: '故障', key: 'fault', width: 90, render: (row) => { const fault = activeFaults.value[row.id] if (!fault || fault.status === 'none') return h(NTag, { size: 'tiny', bordered: false }, () => '正常') const pct = Math.round((fault.progress || 0) * 100) - const ft = faultTypes.value.find(t => t.id === fault.fault_type_id) - const scenarioLabel = ft ? scenarioTypeLabel(ft.scenario_type) : '' - return h(NSpace, { size: 2, vertical: false, align: 'center' }, () => [ - h(NTag, { size: 'tiny', type: 'error', bordered: false }, () => `${fault.fault_type_name} ${pct}%`), - scenarioLabel ? h(NTag, { size: 'tiny', bordered: false, style: 'font-size:10px;background:#2d1b1b;color:#f87171' }, () => scenarioLabel) : null, - ]) + return h(NTag, { size: 'tiny', type: 'error', bordered: false }, () => `${fault.fault_type_name} ${pct}%`) } }, { @@ -391,25 +351,6 @@ const faultTypeOptions = computed(() => faultTypes.value.map(t => ({ label: `${t.name}(${faultCategoryLabel(t.category)})`, value: t.id })) ) -// 按场景类型分组的故障选项 -const SCENARIO_ORDER = ['trend_drift', 'sudden_spike', 'high_noise', 'mode_switch', 'relation_constraint'] -const faultTypeGroupedOptions = computed(() => { - const groups = {} - for (const t of faultTypes.value) { - const st = t.scenario_type || 'trend_drift' - if (!groups[st]) groups[st] = [] - groups[st].push({ label: t.name, value: t.id }) - } - return SCENARIO_ORDER - .filter(st => groups[st]) - .map(st => ({ - type: 'group', - label: scenarioTypeLabel(st), - key: st, - children: groups[st], - })) -}) - const selectedFaultType = computed(() => faultTypes.value.find(t => t.id === faultTypeId.value) || null ) @@ -427,42 +368,6 @@ function faultCategoryLabel(category) { return map[category] || category } -function scenarioTypeLabel(scenarioType) { - const map = { - trend_drift: '趋势漂移型', - sudden_spike: '突发脉冲型', - high_noise: '高噪声波动型', - mode_switch: '工况切换型', - relation_constraint: '关系约束型', - } - return map[scenarioType] || scenarioType -} - -function scenarioTagType(scenarioType) { - const map = { - trend_drift: 'warning', - sudden_spike: 'error', - high_noise: 'info', - mode_switch: 'success', - relation_constraint: 'default', - } - return map[scenarioType] || 'default' -} - -function pointFaultModeLabel(pf) { - if (pf.mode === 'step') return '→ 阶跃' - if (pf.mode === 'gradual') { - if (pf.multiplier != null) return `→ ×${pf.multiplier}` - if (pf.target_value != null) return `→ ${pf.target_value}` - } - if (pf.mode === 'instant') { - if (pf.target_value != null) return `→ ${pf.target_value}` - if (pf.multiplier != null && pf.multiplier !== 1.0) return `→ ×${pf.multiplier}` - return '± 噪声' - } - return '' -} - function onFaultTypeChange(val) { const t = faultTypes.value.find(f => f.id === val) if (t && t.default_duration) faultDuration.value = t.default_duration From 02174daba0a6b5f0c770243c5f47f082ea2e5d51 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=BC=A0=E5=B0=91=E6=99=BA?= Date: Tue, 26 May 2026 10:47:45 +0800 Subject: [PATCH 32/55] feat(protoforge): update protoforge --- protoforge/core/engine.py | 8 + protoforge/core/fault.py | 290 ++++++++++-------- protoforge/protocols/fanuc/server.py | 79 ++++- protoforge/templates/fanuc/fanuc_0if_cnc.json | 69 ++--- protoforge/templates/modbus/fanuc_cnc.json | 58 +++- web/src/views/Devices.vue | 2 +- 6 files changed, 318 insertions(+), 188 deletions(-) diff --git a/protoforge/core/engine.py b/protoforge/core/engine.py index 059f10e..d8b72aa 100644 --- a/protoforge/core/engine.py +++ b/protoforge/core/engine.py @@ -293,6 +293,14 @@ async def _tick_loop(self) -> None: while self._running: for instance in self._devices.values(): instance.tick() + # 将 DeviceInstance._point_values 同步到协议服务器,保证协议层读到最新值 + server = self._protocol_servers.get(instance.protocol) + if server and hasattr(server, '_behaviors'): + behavior = server._behaviors.get(instance.id) + if behavior is not None: + behavior._values.update(instance._point_values) + if hasattr(behavior, 'sync_from_point_values'): + behavior.sync_from_point_values(instance._point_values) for scenario in self._scenario_instances.values(): scenario.tick() await asyncio.sleep(1.0) diff --git a/protoforge/core/fault.py b/protoforge/core/fault.py index e72842d..11b61a7 100644 --- a/protoforge/core/fault.py +++ b/protoforge/core/fault.py @@ -32,194 +32,228 @@ BUILTIN_FAULT_TYPES: list[FaultTypeDefinition] = [ # ------------------------------------------------------------------ - # 刀具磨损 — 最常见的机加工故障 - # 特征:切削阻力增大 → 主轴电流缓慢爬升,振动幅度增大,进给速率被系统压低 - # 模式:渐进式,持续数分钟,模拟刀具从轻度磨损到需要换刀的过程 - # ------------------------------------------------------------------ - FaultTypeDefinition( - id="tool_wear", - name="刀具磨损", - description="刀具切削刃磨损,切削阻力增大,主轴电流升高,振动增大,进给速率下降", - category="mechanical", - default_duration=300.0, - tags=["刀具", "磨损", "渐进"], - point_faults=[ - PointFaultConfig(point="spindle_current", mode=FaultMode.GRADUAL, - multiplier=2.2, noise_scale=0.8), - PointFaultConfig(point="vibration_x", mode=FaultMode.GRADUAL, - multiplier=3.0, noise_scale=0.3), - PointFaultConfig(point="vibration_y", mode=FaultMode.GRADUAL, - multiplier=3.0, noise_scale=0.3), - PointFaultConfig(point="vibration_z", mode=FaultMode.GRADUAL, - multiplier=3.5, noise_scale=0.4), - PointFaultConfig(point="feed_rate", mode=FaultMode.GRADUAL, - multiplier=0.45, noise_scale=20.0), - ], - ), - - # ------------------------------------------------------------------ - # 刀具崩刃 — 突发性刀具失效 - # 特征:瞬间冲击 → 振动突增,电流瞬间峰值,进给立即停止 - # 模式:瞬间注入,持续时间短(机床通常会触发报警停机) + # 进给堵转 — 工件夹紧松动或切削量过大导致进给卡死 + # 特征:进给速率瞬间降为0,主轴负载和电流急剧升高,主轴仍在转(区别于崩刃) + # 模式:瞬间注入 # ------------------------------------------------------------------ FaultTypeDefinition( - id="tool_breakage", - name="刀具崩刃", - description="刀具突发性崩刃,振动剧烈突增,主轴电流峰值,进给停止", - category="mechanical", - default_duration=15.0, - tags=["刀具", "崩刃", "突发"], + id="feed_stall", + name="进给堵转", + description="进给轴卡死,进给速率降为零,主轴负载和电流急剧升高,主轴转速维持(区别于崩刃停主轴)", + category="process", + default_duration=20.0, + tags=["进给", "堵转", "突发"], point_faults=[ - PointFaultConfig(point="spindle_current", mode=FaultMode.INSTANT, - multiplier=4.5, noise_scale=2.0), - PointFaultConfig(point="vibration_x", mode=FaultMode.INSTANT, - multiplier=8.0, noise_scale=1.5), - PointFaultConfig(point="vibration_y", mode=FaultMode.INSTANT, - multiplier=8.0, noise_scale=1.5), - PointFaultConfig(point="vibration_z", mode=FaultMode.INSTANT, - multiplier=10.0, noise_scale=2.0), PointFaultConfig(point="feed_rate", mode=FaultMode.INSTANT, target_value=0.0, noise_scale=0.0), + PointFaultConfig(point="spindle_load", mode=FaultMode.INSTANT, + multiplier=2.8, noise_scale=5.0), + PointFaultConfig(point="spindle_current", mode=FaultMode.INSTANT, + multiplier=3.8, noise_scale=1.5), ], ), # ------------------------------------------------------------------ # 主轴过热 — 长时间高负荷或冷却系统故障 - # 特征:主轴电流持续偏高,转速因热保护逐渐降低 + # 特征:主轴负载和电流持续偏高,转速因热保护逐渐降低 # 模式:渐进式,持续时间较长 # ------------------------------------------------------------------ FaultTypeDefinition( id="spindle_overheat", name="主轴过热", - description="主轴长时间高负荷运转或冷却不足,电流持续偏高,转速因热保护下降", + description="主轴长时间高负荷运转或冷却不足,spindle_load和spindle_current持续偏高,转速因热保护渐进下降", category="thermal", default_duration=240.0, tags=["主轴", "过热", "渐进"], point_faults=[ + PointFaultConfig(point="spindle_load", mode=FaultMode.GRADUAL, + multiplier=1.6, noise_scale=3.0), PointFaultConfig(point="spindle_current", mode=FaultMode.GRADUAL, multiplier=1.8, noise_scale=1.2), PointFaultConfig(point="spindle_speed", mode=FaultMode.GRADUAL, multiplier=0.6, noise_scale=50.0), - PointFaultConfig(point="vibration_x", mode=FaultMode.GRADUAL, - multiplier=1.5, noise_scale=0.2), - PointFaultConfig(point="vibration_z", mode=FaultMode.GRADUAL, - multiplier=1.5, noise_scale=0.2), ], ), # ------------------------------------------------------------------ - # 主轴轴承故障 — 轴承磨损或润滑不足 - # 特征:振动频率特征变化,整体振动幅度升高,电流略升 - # 模式:渐进式 + # 电源波动 — 供电不稳定 + # 特征:主轴转速和进给速率出现随机波动,电流不稳定 + # 模式:瞬间注入(持续期间持续抖动) + # ------------------------------------------------------------------ + FaultTypeDefinition( + id="power_fluctuation", + name="电源波动", + description="供电电压不稳定,主轴转速和进给速率出现随机波动", + category="electrical", + default_duration=90.0, + tags=["电源", "波动", "突发"], + point_faults=[ + PointFaultConfig(point="spindle_speed", mode=FaultMode.INSTANT, + multiplier=1.0, noise_scale=300.0), + PointFaultConfig(point="spindle_current", mode=FaultMode.INSTANT, + multiplier=1.0, noise_scale=5.0), + PointFaultConfig(point="feed_rate", mode=FaultMode.INSTANT, + multiplier=1.0, noise_scale=150.0), + ], + ), + + # ------------------------------------------------------------------ + # 刀具磨损加剧 — 主轴负载趋势漂移 + # 特征:spindle_load 基线随时间缓慢爬升(趋势漂移型),电流同步升高 + # 场景:刀具从轻度磨损到需要换刀的完整过程 + # 模式:渐进式,持续时间长 # ------------------------------------------------------------------ FaultTypeDefinition( - id="spindle_bearing_fault", - name="主轴轴承故障", - description="主轴轴承磨损或润滑不足,振动幅度持续升高,伴随电流轻微上升", - category="mechanical", - default_duration=360.0, - tags=["主轴", "轴承", "渐进"], + id="tool_wear_progressive", + name="刀具磨损加剧", + description="刀具磨损导致切削阻力持续增大,spindle_load基线缓慢爬升至1.8倍,spindle_current同步升高;进给速度由G代码控制不受影响", + category="tool", + default_duration=600.0, + tags=["刀具", "磨损", "负载", "趋势漂移"], point_faults=[ - PointFaultConfig(point="vibration_x", mode=FaultMode.GRADUAL, - multiplier=4.0, noise_scale=0.5), - PointFaultConfig(point="vibration_y", mode=FaultMode.GRADUAL, - multiplier=4.0, noise_scale=0.5), - PointFaultConfig(point="vibration_z", mode=FaultMode.GRADUAL, - multiplier=5.0, noise_scale=0.8), + PointFaultConfig(point="spindle_load", mode=FaultMode.GRADUAL, + multiplier=1.8, noise_scale=3.0), PointFaultConfig(point="spindle_current", mode=FaultMode.GRADUAL, - multiplier=1.3, noise_scale=0.5), + multiplier=1.7, noise_scale=1.5), ], ), # ------------------------------------------------------------------ - # 进给堵转 — 工件夹紧松动或切削量过大导致进给卡死 - # 特征:进给速率瞬间降为 0,主轴电流急剧升高 - # 模式:瞬间注入 + # 刀具崩刃 — 主轴负载突发脉冲 + # 特征:spindle_load 瞬间冲高(可超120%,FANUC最大输出200%),进给停止,CNC停主轴 + # 场景:刀具突发性失效,机床触发过载报警并停机 + # 模式:瞬间注入,持续时间极短 # ------------------------------------------------------------------ FaultTypeDefinition( - id="feed_stall", - name="进给堵转", - description="进给轴卡死,进给速率降为零,主轴电流急剧升高", - category="process", - default_duration=20.0, - tags=["进给", "堵转", "突发"], + id="tool_breakage_sudden", + name="刀具崩刃", + description="刀具突发性崩刃,spindle_load瞬间冲高至正常值3.2倍(可超120%,FANUC最大输出200%),进给停止,CNC触发过载报警并停主轴", + category="tool", + default_duration=10.0, + tags=["刀具", "崩刃", "突发", "过载"], point_faults=[ + PointFaultConfig(point="spindle_load", mode=FaultMode.INSTANT, + multiplier=3.2, noise_scale=8.0), + PointFaultConfig(point="spindle_current", mode=FaultMode.INSTANT, + multiplier=4.0, noise_scale=3.0), + PointFaultConfig(point="spindle_speed", mode=FaultMode.INSTANT, + target_value=0.0, noise_scale=0.0), PointFaultConfig(point="feed_rate", mode=FaultMode.INSTANT, target_value=0.0, noise_scale=0.0), + PointFaultConfig(point="alarm_status", mode=FaultMode.INSTANT, + target_value=1.0, noise_scale=0.0), + ], + ), + + # ------------------------------------------------------------------ + # 过载保护触发 — 负载/进给反向联动异常(关系约束型) + # 特征:负载超限后CNC自动降进给速率,负载高企与进给降速同时出现 + # 场景:切削参数过激进,CNC自适应保护介入 + # 模式:瞬间注入(持续期间维持异常关系) + # ------------------------------------------------------------------ + FaultTypeDefinition( + id="tool_overload_protection", + name="过载保护触发", + description="主轴负载超限,CNC自动降低进给速率保护刀具,负载高企与进给降速同时出现", + category="tool", + default_duration=120.0, + tags=["刀具", "过载", "进给", "关系约束"], + point_faults=[ + PointFaultConfig(point="spindle_load", mode=FaultMode.INSTANT, + multiplier=1.9, noise_scale=4.0), PointFaultConfig(point="spindle_current", mode=FaultMode.INSTANT, - multiplier=3.8, noise_scale=1.5), - PointFaultConfig(point="vibration_z", mode=FaultMode.INSTANT, - multiplier=5.0, noise_scale=1.0), + multiplier=1.8, noise_scale=2.0), + PointFaultConfig(point="feed_rate", mode=FaultMode.INSTANT, + multiplier=0.35, noise_scale=15.0), ], ), # ------------------------------------------------------------------ - # 振动异常 — 工件装夹松动或共振 - # 特征:三轴振动突然大幅增加,其他指标基本正常 - # 模式:瞬间注入 + # 空切检测 — 刀具未接触工件(工况切换型) + # 特征:spindle_load 跌至空载区间(5-15%),主轴转速和进给速率保持正常 + # 场景:工件装夹偏移、程序坐标错误、工件提前切完 + # 模式:瞬间注入(均值跳变,方差不变) # ------------------------------------------------------------------ FaultTypeDefinition( - id="vibration_spike", - name="振动异常", - description="工件装夹松动或切削共振,三轴振动突然大幅增加", - category="mechanical", - default_duration=60.0, - tags=["振动", "装夹", "突发"], + id="air_cutting", + name="空切检测", + description="刀具未接触工件,spindle_load跌至空载区间(5-15%),spindle_current降至空转水平,转速进给保持正常", + category="tool", + default_duration=180.0, + tags=["刀具", "空切", "工况切换", "负载"], point_faults=[ - PointFaultConfig(point="vibration_x", mode=FaultMode.INSTANT, - multiplier=6.0, noise_scale=1.0), - PointFaultConfig(point="vibration_y", mode=FaultMode.INSTANT, - multiplier=6.0, noise_scale=1.0), - PointFaultConfig(point="vibration_z", mode=FaultMode.INSTANT, - multiplier=7.0, noise_scale=1.2), + PointFaultConfig(point="spindle_load", mode=FaultMode.INSTANT, + target_value=8.0, noise_scale=2.0), + PointFaultConfig(point="spindle_current", mode=FaultMode.INSTANT, + target_value=2.5, noise_scale=0.3), ], ), # ------------------------------------------------------------------ - # 切削液不足 — 冷却润滑失效 - # 特征:热量积累 → 振动缓慢升高,电流缓慢升高,进给略降 - # 模式:渐进式,速度较慢 + # 积屑瘤 — 切屑粘附刀刃导致周期性负载突刺 + # 特征:spindle_load 在正常基线上出现间歇性冲高后恢复,不是持续爬升 + # 突刺幅度约1.5-2倍基线,持续1-3秒后自行恢复,周期不固定 + # 场景:低速切削、切削液不足、韧性材料(铝合金、不锈钢)加工时常见 + # 模式:瞬间注入(noise_scale 大,模拟随机突刺效果) # ------------------------------------------------------------------ FaultTypeDefinition( - id="coolant_failure", - name="切削液不足", - description="切削液供给不足,冷却润滑失效,热量积累导致振动和电流缓慢升高", - category="process", - default_duration=480.0, - tags=["切削液", "冷却", "渐进"], + id="built_up_edge", + name="积屑瘤", + description="切屑粘附刀刃,spindle_load在正常基线上出现间歇性突刺(1.5-2倍),突刺后自行恢复,区别于磨损的持续爬升", + category="tool", + default_duration=300.0, + tags=["刀具", "积屑瘤", "突刺", "低速切削"], point_faults=[ - PointFaultConfig(point="spindle_current", mode=FaultMode.GRADUAL, - multiplier=1.6, noise_scale=0.8), - PointFaultConfig(point="vibration_x", mode=FaultMode.GRADUAL, - multiplier=2.0, noise_scale=0.3), - PointFaultConfig(point="vibration_y", mode=FaultMode.GRADUAL, - multiplier=2.0, noise_scale=0.3), - PointFaultConfig(point="vibration_z", mode=FaultMode.GRADUAL, - multiplier=2.5, noise_scale=0.4), - PointFaultConfig(point="feed_rate", mode=FaultMode.GRADUAL, - multiplier=0.75, noise_scale=15.0), + PointFaultConfig(point="spindle_load", mode=FaultMode.INSTANT, + multiplier=1.7, noise_scale=12.0), + PointFaultConfig(point="spindle_current", mode=FaultMode.INSTANT, + multiplier=1.6, noise_scale=4.0), ], ), # ------------------------------------------------------------------ - # 电源波动 — 供电不稳定 - # 特征:主轴转速和进给速率出现随机波动,电流不稳定 - # 模式:瞬间注入(持续期间持续抖动) + # 刀具涂层剥落 — 负载阶跃后在新基线稳定 + # 特征:spindle_load 出现一次阶跃式跳升(区别于缓慢爬升的磨损), + # 然后在新的高基线上稳定波动,不会继续爬升也不会恢复 + # 场景:涂层质量问题或切削条件恶劣导致涂层突然失效 + # 模式:瞬间注入(立即跳到新基线,持续维持) # ------------------------------------------------------------------ FaultTypeDefinition( - id="power_fluctuation", - name="电源波动", - description="供电电压不稳定,主轴转速和进给速率出现随机波动", - category="electrical", - default_duration=90.0, - tags=["电源", "波动", "突发"], + id="coating_spalling", + name="刀具涂层剥落", + description="刀具涂层突然失效,spindle_load阶跃式跳升至1.5倍后在新基线稳定波动,区别于磨损的缓慢爬升和崩刃的瞬间冲高", + category="tool", + default_duration=600.0, + tags=["刀具", "涂层", "阶跃", "工况切换"], point_faults=[ - PointFaultConfig(point="spindle_speed", mode=FaultMode.INSTANT, - multiplier=1.0, noise_scale=300.0), + PointFaultConfig(point="spindle_load", mode=FaultMode.INSTANT, + multiplier=1.5, noise_scale=3.0), PointFaultConfig(point="spindle_current", mode=FaultMode.INSTANT, - multiplier=1.0, noise_scale=5.0), + multiplier=1.4, noise_scale=1.5), + ], + ), + + # ------------------------------------------------------------------ + # 换刀装夹偏移 — 刀具伸出量异常导致负载整体偏高 + # 特征:换刀后 spindle_load 整体偏高(1.4-1.6倍),波动规律正常, + # 不是空切(负载不低),不是磨损(不随时间爬升) + # 场景:刀具伸出量偏长、刀柄锥面未清洁、刀具型号装错 + # 模式:瞬间注入(均值整体偏移,方差不变) + # ------------------------------------------------------------------ + FaultTypeDefinition( + id="tool_offset_error", + name="换刀装夹偏移", + description="换刀后刀具伸出量或装夹位置异常,spindle_load整体偏高(1.4-1.6倍),波动规律正常,不随时间变化,区别于磨损和空切", + category="tool", + default_duration=3600.0, + tags=["刀具", "装夹", "工况切换", "负载偏移"], + point_faults=[ + PointFaultConfig(point="spindle_load", mode=FaultMode.INSTANT, + multiplier=1.5, noise_scale=3.0), + PointFaultConfig(point="spindle_current", mode=FaultMode.INSTANT, + multiplier=1.4, noise_scale=1.5), PointFaultConfig(point="feed_rate", mode=FaultMode.INSTANT, - multiplier=1.0, noise_scale=150.0), + multiplier=1.0, noise_scale=5.0), ], ), ] @@ -312,10 +346,10 @@ def apply(self, device: Any) -> None: continue baseline = fault.baseline_values.get(pf.point, 0.0) if baseline == 0.0: - # 基线为 0 时用当前值兜底,避免乘法无效 - try: - baseline = float(device._point_values[pf.point]) or 1.0 - except (TypeError, ValueError): + # 基线为0说明注入时设备处于换刀/停机状态 + # target_value 模式可以直接执行(如崩刃归零、空切归空载) + # multiplier 模式跳过,避免在零基线上产生无意义的值 + if pf.target_value is None: continue device._point_values[pf.point] = self._compute_value( diff --git a/protoforge/protocols/fanuc/server.py b/protoforge/protocols/fanuc/server.py index b0d6f15..e711347 100644 --- a/protoforge/protocols/fanuc/server.py +++ b/protoforge/protocols/fanuc/server.py @@ -22,8 +22,11 @@ def __init__(self, points: list[dict]): "program": "O0001", "speed_override": 100, "feed_override": 100, - "spindle_speed": 3000, - "feed_rate": 500, + "spindle_speed": 0.0, + "feed_rate": 0.0, + "spindle_current": 0.0, + "spindle_load": 0.0, + "tool_number": 1, "absolute_pos": [0.0] * 5, "machine_pos": [0.0] * 5, "relative_pos": [0.0] * 5, @@ -32,6 +35,32 @@ def __init__(self, points: list[dict]): for p in points: self._values[p["name"]] = p.get("fixed_value", 0) + def sync_from_point_values(self, point_values: dict[str, Any]) -> None: + """将 DeviceInstance._point_values 同步到 _cnc_status,保持协议层数据与生成器一致""" + mapping = { + "spindle_speed": "spindle_speed", + "feed_rate": "feed_rate", + "spindle_current": "spindle_current", + "spindle_load": "spindle_load", + "tool_number": "tool_number", + "alarm_status": "alarm", + "run_mode": "mode", + "execution_status": "execution", + "program_name": "program", + "x_absolute": ("absolute_pos", 0), + "y_absolute": ("absolute_pos", 1), + "z_absolute": ("absolute_pos", 2), + } + for point_name, status_key in mapping.items(): + if point_name not in point_values: + continue + val = point_values[point_name] + if isinstance(status_key, tuple): + key, idx = status_key + self._cnc_status[key][idx] = float(val) + else: + self._cnc_status[status_key] = val + async def generate_value(self, point_config: dict[str, Any]) -> Any: name = point_config.get("name", "") return self._values.get(name, 0) @@ -144,6 +173,12 @@ def _process_focas(self, data: bytes) -> bytes | None: return self._handle_cnc_rdspindlespd(req_id) elif func_id == 0x0111: return self._handle_cnc_rdfeed(req_id) + elif func_id == 0x0112: + return self._handle_cnc_rdspload(req_id) + elif func_id == 0x0113: + return self._handle_cnc_rdspmeter(req_id) + elif func_id == 0x0114: + return self._handle_cnc_toolnum(req_id) elif func_id == 0x0120: return self._handle_cnc_alarm(req_id) elif func_id == 0x0130: @@ -247,7 +282,7 @@ def _handle_cnc_rdspindlespd(self, req_id: int) -> bytes: def _handle_cnc_rdfeed(self, req_id: int) -> bytes: behavior = next(iter(self._behaviors.values()), None) - feed = behavior._cnc_status.get("feed_rate", 500) if behavior else 500 + feed = behavior._cnc_status.get("feed_rate", 0.0) if behavior else 0.0 resp = bytearray() resp += struct.pack(" bytes: resp += struct.pack(" bytes: + """cnc_rdspload — 主轴负载率(%),FANUC FOCAS2 原生接口""" + behavior = next(iter(self._behaviors.values()), None) + load = behavior._cnc_status.get("spindle_load", 0.0) if behavior else 0.0 + + resp = bytearray() + resp += struct.pack(" bytes: + """cnc_rdspmeter — 主轴电流(A)""" + behavior = next(iter(self._behaviors.values()), None) + current = behavior._cnc_status.get("spindle_current", 0.0) if behavior else 0.0 + + resp = bytearray() + resp += struct.pack(" bytes: + """cnc_toolnum — 当前刀号""" + behavior = next(iter(self._behaviors.values()), None) + tool = behavior._cnc_status.get("tool_number", 1) if behavior else 1 + + resp = bytearray() + resp += struct.pack(" bytes: behavior = next(iter(self._behaviors.values()), None) alarm = behavior._cnc_status.get("alarm", 0) if behavior else 0 @@ -311,6 +382,8 @@ async def read_points(self, device_id: str) -> list[PointValue]: config = self._device_configs.get(device_id) if not behavior or not config: return [] + # 将 _point_values 同步到 _cnc_status,保证 FOCAS 协议响应与生成器数据一致 + behavior.sync_from_point_values(behavior._values) now = time.time() return [PointValue(name=p.name, value=behavior.get_value(p.name), timestamp=now) for p in config.points] diff --git a/protoforge/templates/fanuc/fanuc_0if_cnc.json b/protoforge/templates/fanuc/fanuc_0if_cnc.json index 39437f3..0f0e362 100644 --- a/protoforge/templates/fanuc/fanuc_0if_cnc.json +++ b/protoforge/templates/fanuc/fanuc_0if_cnc.json @@ -62,77 +62,68 @@ "address": "spindle_speed", "data_type": "float32", "unit": "RPM", - "description": "主轴转速", + "description": "主轴转速,按加工程序阶梯切换:换刀归零,粗铣2000,半精铣4000,精铣6000", "access": "r", - "generator_type": "sawtooth", - "min_value": 1000, + "generator_type": "script", + "min_value": 0, "max_value": 8000, - "generator_config": {"period": 120} + "generator_config": { + "script": "elapsed = context['elapsed']; t = elapsed % 360\nif t < 30: target = 0\nelif t < 120: target = 2000\nelif t < 150: target = 0\nelif t < 240: target = 4000\nelif t < 270: target = 0\nelse: target = 6000\nnoise = random.gauss(0, 8) if target > 0 else 0; result = round(max(0, target + noise), 1)" + } }, { "name": "feed_rate", "address": "feed_rate", "data_type": "float32", "unit": "mm/min", - "description": "进给速度", + "description": "进给速度,与加工工步联动:换刀时为0,粗铣800,半精铣500,精铣300", "access": "r", - "generator_type": "sine", - "min_value": 100, + "generator_type": "script", + "min_value": 0, "max_value": 5000, - "generator_config": {"period": 60, "phase": 0.0} + "generator_config": { + "script": "elapsed = context['elapsed']; t = elapsed % 360\nif t < 30: target = 0\nelif t < 120: target = 800\nelif t < 150: target = 0\nelif t < 240: target = 500\nelif t < 270: target = 0\nelse: target = 300\nnoise = random.gauss(0, 5) if target > 0 else 0; result = round(max(0, target + noise), 1)" + } }, { "name": "spindle_current", "address": "spindle_current", "data_type": "float32", "unit": "A", - "description": "主轴电流", - "access": "r", - "generator_type": "sine", - "min_value": 8.0, - "max_value": 32.0, - "generator_config": {"period": 120, "phase": 0.5} - }, - { - "name": "vibration_x", - "address": "vibration_x", - "data_type": "float32", - "unit": "m/s²", - "description": "X轴振动加速度", + "description": "主轴电流,与工步联动:换刀时伺服保持电流约2.5A,粗铣18-24A,半精铣12-18A,精铣8-13A", "access": "r", "generator_type": "script", - "min_value": 0.1, - "max_value": 2.5, + "min_value": 0.0, + "max_value": 40.0, "generator_config": { - "script": "elapsed = context['elapsed']; base = 0.5 + 0.3 * math.sin(2 * math.pi * elapsed / 90); noise = random.uniform(-0.15, 0.15); result = round(max(0.1, base + noise), 3)" + "script": "elapsed = context['elapsed']; t = elapsed % 360\nif t < 30: base, noise_std = 2.5, 0.3\nelif t < 120: base, noise_std = 21.0, 1.2\nelif t < 150: base, noise_std = 2.5, 0.3\nelif t < 240: base, noise_std = 15.0, 1.2\nelif t < 270: base, noise_std = 2.5, 0.3\nelse: base, noise_std = 10.0, 1.2\nresult = round(max(0, base + random.gauss(0, noise_std)), 2)" } }, { - "name": "vibration_y", - "address": "vibration_y", + "name": "spindle_load", + "address": "spindle_load", "data_type": "float32", - "unit": "m/s²", - "description": "Y轴振动加速度", + "unit": "%", + "description": "主轴负载率(0-100%),与工步联动:换刀时0%,粗铣45-60%,半精铣35-50%,精铣25-40%", "access": "r", "generator_type": "script", - "min_value": 0.1, - "max_value": 2.5, + "min_value": 0.0, + "max_value": 120.0, "generator_config": { - "script": "elapsed = context['elapsed']; base = 0.5 + 0.3 * math.sin(2 * math.pi * elapsed / 75 + 1.0); noise = random.uniform(-0.15, 0.15); result = round(max(0.1, base + noise), 3)" + "script": "elapsed = context['elapsed']; t = elapsed % 360\nif t < 30: base = 0\nelif t < 120: base = 52.0\nelif t < 150: base = 0\nelif t < 240: base = 42.0\nelif t < 270: base = 0\nelse: base = 32.0\nnoise = random.gauss(0, 2.5) if base > 0 else 0; result = round(max(0, min(120.0, base + noise)), 2)" } }, { - "name": "vibration_z", - "address": "vibration_z", - "data_type": "float32", - "unit": "m/s²", - "description": "Z轴振动加速度", + "name": "tool_number", + "address": "tool_number", + "data_type": "uint16", + "description": "当前刀号,与工步联动:换刀阶段切换,粗铣T01,半精铣T02,精铣T03", "access": "r", "generator_type": "script", - "min_value": 0.1, - "max_value": 3.0, + "min_value": 1, + "max_value": 12, "generator_config": { - "script": "elapsed = context['elapsed']; base = 0.7 + 0.4 * math.sin(2 * math.pi * elapsed / 60 + 2.1); noise = random.uniform(-0.2, 0.2); result = round(max(0.1, base + noise), 3)" + "script": "elapsed = context['elapsed']; t = elapsed % 360\nif t < 120: result = 1\nelif t < 240: result = 2\nelse: result = 3" } }, { diff --git a/protoforge/templates/modbus/fanuc_cnc.json b/protoforge/templates/modbus/fanuc_cnc.json index 43622cf..3a21815 100644 --- a/protoforge/templates/modbus/fanuc_cnc.json +++ b/protoforge/templates/modbus/fanuc_cnc.json @@ -11,36 +11,42 @@ "address": "0", "data_type": "uint16", "unit": "RPM", - "description": "主轴实际转速", + "description": "主轴实际转速,与工步联动:换刀归零,粗铣2000,半精铣4000,精铣6000", "access": "r", - "generator_type": "sawtooth", + "generator_type": "script", "min_value": 0, - "max_value": 12000, - "generator_config": {"period": 180} + "max_value": 8000, + "generator_config": { + "script": "elapsed = context['elapsed']; t = elapsed % 360\nif t < 30: target = 0\nelif t < 120: target = 2000\nelif t < 150: target = 0\nelif t < 240: target = 4000\nelif t < 270: target = 0\nelse: target = 6000\nnoise = random.gauss(0, 8) if target > 0 else 0; result = round(max(0, target + noise), 1)" + } }, { "name": "feed_rate", "address": "1", "data_type": "float32", "unit": "mm/min", - "description": "实际进给速度", + "description": "实际进给速度,与工步联动:换刀时0,粗铣800,半精铣500,精铣300", "access": "r", - "generator_type": "sine", - "min_value": 200.0, - "max_value": 3000.0, - "generator_config": {"period": 90, "phase": 1.0} + "generator_type": "script", + "min_value": 0, + "max_value": 5000, + "generator_config": { + "script": "elapsed = context['elapsed']; t = elapsed % 360\nif t < 30: target = 0\nelif t < 120: target = 800\nelif t < 150: target = 0\nelif t < 240: target = 500\nelif t < 270: target = 0\nelse: target = 300\nnoise = random.gauss(0, 5) if target > 0 else 0; result = round(max(0, target + noise), 1)" + } }, { "name": "spindle_current", "address": "2", "data_type": "float32", "unit": "A", - "description": "主轴电流", + "description": "主轴电流,与工步联动:换刀伺服保持2.5A,粗铣21A,半精铣15A,精铣10A", "access": "r", - "generator_type": "sine", - "min_value": 8.0, - "max_value": 35.0, - "generator_config": {"period": 120, "phase": 2.0} + "generator_type": "script", + "min_value": 0.0, + "max_value": 40.0, + "generator_config": { + "script": "elapsed = context['elapsed']; t = elapsed % 360\nif t < 30: base, noise_std = 2.5, 0.3\nelif t < 120: base, noise_std = 21.0, 1.2\nelif t < 150: base, noise_std = 2.5, 0.3\nelif t < 240: base, noise_std = 15.0, 1.2\nelif t < 270: base, noise_std = 2.5, 0.3\nelse: base, noise_std = 10.0, 1.2\nresult = round(max(0, base + random.gauss(0, noise_std)), 2)" + } }, { "name": "vibration_x", @@ -84,6 +90,20 @@ "script": "elapsed = context['elapsed']; base = 0.7 + 0.4 * math.sin(2 * math.pi * elapsed / 58 + 2.8); noise = random.uniform(-0.2, 0.2); result = round(max(0.1, base + noise), 3)" } }, + { + "name": "spindle_load", + "address": "29", + "data_type": "float32", + "unit": "%", + "description": "主轴负载率(0-100%),与工步联动:换刀时0%,粗铣45-60%,半精铣35-50%,精铣25-40%", + "access": "r", + "generator_type": "script", + "min_value": 0.0, + "max_value": 120.0, + "generator_config": { + "script": "elapsed = context['elapsed']; t = elapsed % 360\nif t < 30: base = 0\nelif t < 120: base = 52.0\nelif t < 150: base = 0\nelif t < 240: base = 42.0\nelif t < 270: base = 0\nelse: base = 32.0\nnoise = random.gauss(0, 2.5) if base > 0 else 0; result = round(max(0, min(120.0, base + noise)), 2)" + } + }, { "name": "spindle_override", "address": "3", @@ -126,10 +146,14 @@ "name": "tool_no", "address": "7", "data_type": "uint16", - "description": "当前刀具号T", + "description": "当前刀具号,与工步联动:粗铣T1,半精铣T2,精铣T3", "access": "r", - "generator_type": "fixed", - "fixed_value": 3 + "generator_type": "script", + "min_value": 1, + "max_value": 12, + "generator_config": { + "script": "elapsed = context['elapsed']; t = elapsed % 360\nif t < 120: result = 1\nelif t < 240: result = 2\nelse: result = 3" + } }, { "name": "abs_x", diff --git a/web/src/views/Devices.vue b/web/src/views/Devices.vue index 65e0535..3a33068 100644 --- a/web/src/views/Devices.vue +++ b/web/src/views/Devices.vue @@ -364,7 +364,7 @@ const faultIntensityLabel = computed(() => { }) function faultCategoryLabel(category) { - const map = { mechanical: '机械', thermal: '热', electrical: '电气', process: '工艺' } + const map = { mechanical: '机械', thermal: '热', electrical: '电气', process: '工艺', tool: '刀具' } return map[category] || category } From 58b2e3685ec83a1b4520874510da9d7ce0542ef4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=BC=A0=E5=B0=91=E6=99=BA?= Date: Wed, 27 May 2026 09:36:02 +0800 Subject: [PATCH 33/55] fix(scene): scene different --- protoforge/templates/modbus/fanuc_cnc.json | 24 +- .../templates/modbus/fanuc_cnc_finish.json | 259 ++++++++++++++++++ .../modbus/fanuc_cnc_semi_finish.json | 259 ++++++++++++++++++ 3 files changed, 529 insertions(+), 13 deletions(-) create mode 100644 protoforge/templates/modbus/fanuc_cnc_finish.json create mode 100644 protoforge/templates/modbus/fanuc_cnc_semi_finish.json diff --git a/protoforge/templates/modbus/fanuc_cnc.json b/protoforge/templates/modbus/fanuc_cnc.json index 3a21815..3ff30df 100644 --- a/protoforge/templates/modbus/fanuc_cnc.json +++ b/protoforge/templates/modbus/fanuc_cnc.json @@ -11,13 +11,13 @@ "address": "0", "data_type": "uint16", "unit": "RPM", - "description": "主轴实际转速,与工步联动:换刀归零,粗铣2000,半精铣4000,精铣6000", + "description": "主轴实际转速,粗加工工位:空闲为0,启动后稳定在约2000RPM", "access": "r", "generator_type": "script", "min_value": 0, "max_value": 8000, "generator_config": { - "script": "elapsed = context['elapsed']; t = elapsed % 360\nif t < 30: target = 0\nelif t < 120: target = 2000\nelif t < 150: target = 0\nelif t < 240: target = 4000\nelif t < 270: target = 0\nelse: target = 6000\nnoise = random.gauss(0, 8) if target > 0 else 0; result = round(max(0, target + noise), 1)" + "script": "elapsed = context['elapsed']; t = elapsed % 180\nif t < 20:\n target = 0\nelif t < 30:\n target = 2000 * ((t - 20) / 10.0)\nelif t < 155:\n target = 2000\nelif t < 165:\n target = 2000 * (1 - ((t - 155) / 10.0))\nelse:\n target = 0\nnoise = random.gauss(0, 15) if target > 100 else 0\nresult = int(round(max(0, min(8000, target + noise))))" } }, { @@ -25,13 +25,13 @@ "address": "1", "data_type": "float32", "unit": "mm/min", - "description": "实际进给速度,与工步联动:换刀时0,粗铣800,半精铣500,精铣300", + "description": "粗加工实际进给速度:切入阶段中低速,稳定粗加工约800mm/min,空闲和主轴启动阶段为0", "access": "r", "generator_type": "script", "min_value": 0, "max_value": 5000, "generator_config": { - "script": "elapsed = context['elapsed']; t = elapsed % 360\nif t < 30: target = 0\nelif t < 120: target = 800\nelif t < 150: target = 0\nelif t < 240: target = 500\nelif t < 270: target = 0\nelse: target = 300\nnoise = random.gauss(0, 5) if target > 0 else 0; result = round(max(0, target + noise), 1)" + "script": "elapsed = context['elapsed']; t = elapsed % 180\nif t < 30:\n target = 0\nelif t < 40:\n target = 500 + 300 * ((t - 30) / 10.0)\nelif t < 140:\n target = 800\nelif t < 155:\n target = 400\nelse:\n target = 0\nnoise = random.gauss(0, 25) if target > 0 else 0\nresult = round(max(0, min(5000, target + noise)), 1)" } }, { @@ -39,13 +39,13 @@ "address": "2", "data_type": "float32", "unit": "A", - "description": "主轴电流,与工步联动:换刀伺服保持2.5A,粗铣21A,半精铣15A,精铣10A", + "description": "粗加工主轴电流:空闲低电流,启动阶段中等电流,切入瞬间升高,稳定粗加工约19~24A", "access": "r", "generator_type": "script", "min_value": 0.0, "max_value": 40.0, "generator_config": { - "script": "elapsed = context['elapsed']; t = elapsed % 360\nif t < 30: base, noise_std = 2.5, 0.3\nelif t < 120: base, noise_std = 21.0, 1.2\nelif t < 150: base, noise_std = 2.5, 0.3\nelif t < 240: base, noise_std = 15.0, 1.2\nelif t < 270: base, noise_std = 2.5, 0.3\nelse: base, noise_std = 10.0, 1.2\nresult = round(max(0, base + random.gauss(0, noise_std)), 2)" + "script": "elapsed = context['elapsed']; t = elapsed % 180\nif t < 20:\n base, noise_std = 2.2, 0.25\nelif t < 30:\n base, noise_std = 6.0, 0.8\nelif t < 40:\n base, noise_std = 23.5, 1.5\nelif t < 140:\n base, noise_std = 21.5, 1.3\nelif t < 155:\n base, noise_std = 6.0, 0.8\nelif t < 165:\n base, noise_std = 4.0, 0.5\nelse:\n base, noise_std = 2.2, 0.25\nresult = round(max(0, min(40.0, base + random.gauss(0, noise_std))), 2)" } }, { @@ -95,13 +95,13 @@ "address": "29", "data_type": "float32", "unit": "%", - "description": "主轴负载率(0-100%),与工步联动:换刀时0%,粗铣45-60%,半精铣35-50%,精铣25-40%", + "description": "粗加工主轴负载率,常态0~100%,短时允许到120%;粗加工稳定阶段约45~65%,切入瞬间可能更高", "access": "r", "generator_type": "script", "min_value": 0.0, "max_value": 120.0, "generator_config": { - "script": "elapsed = context['elapsed']; t = elapsed % 360\nif t < 30: base = 0\nelif t < 120: base = 52.0\nelif t < 150: base = 0\nelif t < 240: base = 42.0\nelif t < 270: base = 0\nelse: base = 32.0\nnoise = random.gauss(0, 2.5) if base > 0 else 0; result = round(max(0, min(120.0, base + noise)), 2)" + "script": "elapsed = context['elapsed']; t = elapsed % 180\nif t < 20:\n base, noise_std = 0.0, 0.0\nelif t < 30:\n base, noise_std = 8.0, 2.0\nelif t < 40:\n base, noise_std = 65.0, 4.0\nelif t < 140:\n base, noise_std = 55.0, 4.0\nelif t < 155:\n base, noise_std = 8.0, 2.0\nelif t < 165:\n base, noise_std = 3.0, 1.0\nelse:\n base, noise_std = 0.0, 0.0\nnoise = random.gauss(0, noise_std) if noise_std > 0 else 0\nresult = round(max(0, min(120.0, base + noise)), 2)" } }, { @@ -146,14 +146,12 @@ "name": "tool_no", "address": "7", "data_type": "uint16", - "description": "当前刀具号,与工步联动:粗铣T1,半精铣T2,精铣T3", + "description": "当前刀具号,粗加工工位固定使用T1", "access": "r", - "generator_type": "script", + "generator_type": "fixed", "min_value": 1, "max_value": 12, - "generator_config": { - "script": "elapsed = context['elapsed']; t = elapsed % 360\nif t < 120: result = 1\nelif t < 240: result = 2\nelse: result = 3" - } + "fixed_value": 1 }, { "name": "abs_x", diff --git a/protoforge/templates/modbus/fanuc_cnc_finish.json b/protoforge/templates/modbus/fanuc_cnc_finish.json new file mode 100644 index 0000000..1243e59 --- /dev/null +++ b/protoforge/templates/modbus/fanuc_cnc_finish.json @@ -0,0 +1,259 @@ +{ + "id": "modbus_fanuc_cnc_finish", + "name": "Fanuc CNC 精铣工位", + "protocol": "modbus_tcp", + "description": "FANUC Series 0i-MF数控系统,精铣工位:主轴约6000RPM,进给约300mm/min,切深小,表面粗糙度Ra0.8~1.6,要求主轴稳定性高", + "manufacturer": "FANUC", + "model": "0i-MF", + "points": [ + { + "name": "spindle_speed", + "address": "0", + "data_type": "uint16", + "unit": "RPM", + "description": "主轴实际转速,精铣工位:空闲为0,启动后稳定在约6000RPM,精铣对转速稳定性要求高,波动小", + "access": "r", + "generator_type": "script", + "min_value": 0, + "max_value": 8000, + "generator_config": { + "script": "elapsed = context['elapsed']; t = elapsed % 120\nif t < 12:\n target = 0\nelif t < 28:\n target = 6000 * ((t - 12) / 16.0)\nelif t < 95:\n target = 6000\nelif t < 110:\n target = 6000 * (1 - ((t - 95) / 15.0))\nelse:\n target = 0\nnoise = random.gauss(0, 8) if target > 100 else 0\nresult = int(round(max(0, min(8000, target + noise))))" + } + }, + { + "name": "feed_rate", + "address": "1", + "data_type": "float32", + "unit": "mm/min", + "description": "精铣实际进给速度:切入阶段低速,稳定精铣约300mm/min,精铣进给慢且稳定,波动小", + "access": "r", + "generator_type": "script", + "min_value": 0, + "max_value": 5000, + "generator_config": { + "script": "elapsed = context['elapsed']; t = elapsed % 120\nif t < 28:\n target = 0\nelif t < 36:\n target = 150 + 150 * ((t - 28) / 8.0)\nelif t < 90:\n target = 300\nelif t < 100:\n target = 150\nelse:\n target = 0\nnoise = random.gauss(0, 6) if target > 0 else 0\nresult = round(max(0, min(5000, target + noise)), 1)" + } + }, + { + "name": "spindle_current", + "address": "2", + "data_type": "float32", + "unit": "A", + "description": "精铣主轴电流:空闲约2A,启动约4A,切入峰值约11A,稳定精铣约7~10A,精铣切深小电流低且稳定", + "access": "r", + "generator_type": "script", + "min_value": 0.0, + "max_value": 40.0, + "generator_config": { + "script": "elapsed = context['elapsed']; t = elapsed % 120\nif t < 12:\n base, noise_std = 2.2, 0.2\nelif t < 28:\n base, noise_std = 4.0, 0.4\nelif t < 36:\n base, noise_std = 11.0, 0.8\nelif t < 90:\n base, noise_std = 8.5, 0.5\nelif t < 100:\n base, noise_std = 4.0, 0.4\nelif t < 110:\n base, noise_std = 3.0, 0.3\nelse:\n base, noise_std = 2.2, 0.2\nresult = round(max(0, min(40.0, base + random.gauss(0, noise_std))), 2)" + } + }, + { + "name": "vibration_x", + "address": "23", + "data_type": "float32", + "unit": "m/s²", + "description": "X轴振动加速度,精铣切深小但转速高,振动幅值小、频率高", + "access": "r", + "generator_type": "script", + "min_value": 0.02, + "max_value": 1.5, + "generator_config": { + "script": "elapsed = context['elapsed']; t = elapsed % 120\nif 28 <= t < 100:\n base = 0.18 + 0.1 * math.sin(2 * math.pi * elapsed / 28 + 0.8)\nelse:\n base = 0.05\nnoise = random.uniform(-0.04, 0.04)\nresult = round(max(0.02, base + noise), 3)" + } + }, + { + "name": "vibration_y", + "address": "25", + "data_type": "float32", + "unit": "m/s²", + "description": "Y轴振动加速度,精铣切深小但转速高,振动幅值小、频率高", + "access": "r", + "generator_type": "script", + "min_value": 0.02, + "max_value": 1.5, + "generator_config": { + "script": "elapsed = context['elapsed']; t = elapsed % 120\nif 28 <= t < 100:\n base = 0.18 + 0.1 * math.sin(2 * math.pi * elapsed / 22 + 1.8)\nelse:\n base = 0.05\nnoise = random.uniform(-0.04, 0.04)\nresult = round(max(0.02, base + noise), 3)" + } + }, + { + "name": "vibration_z", + "address": "27", + "data_type": "float32", + "unit": "m/s²", + "description": "Z轴振动加速度,精铣切深小但转速高,振动幅值小、频率高", + "access": "r", + "generator_type": "script", + "min_value": 0.02, + "max_value": 1.5, + "generator_config": { + "script": "elapsed = context['elapsed']; t = elapsed % 120\nif 28 <= t < 100:\n base = 0.2 + 0.12 * math.sin(2 * math.pi * elapsed / 18 + 2.8)\nelse:\n base = 0.06\nnoise = random.uniform(-0.05, 0.05)\nresult = round(max(0.02, base + noise), 3)" + } + }, + { + "name": "spindle_load", + "address": "29", + "data_type": "float32", + "unit": "%", + "description": "精铣主轴负载率,稳定阶段约15~28%,切入瞬间约32%,精铣切深小负载低且稳定,短时允许到120%", + "access": "r", + "generator_type": "script", + "min_value": 0.0, + "max_value": 120.0, + "generator_config": { + "script": "elapsed = context['elapsed']; t = elapsed % 120\nif t < 12:\n base, noise_std = 0.0, 0.0\nelif t < 28:\n base, noise_std = 4.0, 1.0\nelif t < 36:\n base, noise_std = 32.0, 2.0\nelif t < 90:\n base, noise_std = 22.0, 1.5\nelif t < 100:\n base, noise_std = 4.0, 1.0\nelif t < 110:\n base, noise_std = 1.5, 0.5\nelse:\n base, noise_std = 0.0, 0.0\nnoise = random.gauss(0, noise_std) if noise_std > 0 else 0\nresult = round(max(0, min(120.0, base + noise)), 2)" + } + }, + { + "name": "spindle_override", + "address": "3", + "data_type": "uint16", + "unit": "%", + "description": "主轴倍率", + "access": "r", + "generator_type": "fixed", + "fixed_value": 100 + }, + { + "name": "feed_override", + "address": "4", + "data_type": "uint16", + "unit": "%", + "description": "进给倍率", + "access": "r", + "generator_type": "fixed", + "fixed_value": 100 + }, + { + "name": "running_mode", + "address": "5", + "data_type": "uint16", + "description": "运行模式(1=MDI 2=AUTO 3=JOG 4=EDIT 5=HANDLE)", + "access": "r", + "generator_type": "fixed", + "fixed_value": 2 + }, + { + "name": "exec_status", + "address": "6", + "data_type": "uint16", + "description": "执行状态(0=空闲 1=运行 2=暂停 3=M00停 4=M01停)", + "access": "r", + "generator_type": "fixed", + "fixed_value": 1 + }, + { + "name": "tool_no", + "address": "7", + "data_type": "uint16", + "description": "当前刀具号,精铣工位固定使用T3", + "access": "r", + "generator_type": "fixed", + "min_value": 1, + "max_value": 12, + "fixed_value": 3 + }, + { + "name": "abs_x", + "address": "8", + "data_type": "float32", + "unit": "mm", + "description": "X轴绝对坐标", + "access": "r", + "generator_type": "random", + "min_value": -800.0, + "max_value": 800.0 + }, + { + "name": "abs_y", + "address": "10", + "data_type": "float32", + "unit": "mm", + "description": "Y轴绝对坐标", + "access": "r", + "generator_type": "random", + "min_value": -500.0, + "max_value": 500.0 + }, + { + "name": "abs_z", + "address": "12", + "data_type": "float32", + "unit": "mm", + "description": "Z轴绝对坐标", + "access": "r", + "generator_type": "random", + "min_value": -600.0, + "max_value": 200.0 + }, + { + "name": "mach_x", + "address": "14", + "data_type": "float32", + "unit": "mm", + "description": "X轴机械坐标", + "access": "r", + "generator_type": "random", + "min_value": -800.0, + "max_value": 800.0 + }, + { + "name": "mach_y", + "address": "16", + "data_type": "float32", + "unit": "mm", + "description": "Y轴机械坐标", + "access": "r", + "generator_type": "random", + "min_value": -500.0, + "max_value": 500.0 + }, + { + "name": "mach_z", + "address": "18", + "data_type": "float32", + "unit": "mm", + "description": "Z轴机械坐标", + "access": "r", + "generator_type": "random", + "min_value": -600.0, + "max_value": 200.0 + }, + { + "name": "part_count", + "address": "20", + "data_type": "uint16", + "description": "加工计数", + "access": "r", + "generator_type": "script", + "min_value": 0, + "max_value": 99999, + "generator_config": { + "script": "elapsed = context['elapsed']; result = min(int(elapsed / 120), 99999)" + } + }, + { + "name": "cycle_time", + "address": "21", + "data_type": "uint16", + "unit": "s", + "description": "循环时间,精铣单件约120s", + "access": "r", + "generator_type": "fixed", + "fixed_value": 120 + }, + { + "name": "alarm_no", + "address": "22", + "data_type": "uint16", + "description": "报警号(0=无报警)", + "access": "r", + "generator_type": "fixed", + "fixed_value": 0 + } + ], + "protocol_config": { + "slave_id": 8 + }, + "tags": ["CNC", "FANUC", "数控", "机床", "发那科", "精铣"] +} diff --git a/protoforge/templates/modbus/fanuc_cnc_semi_finish.json b/protoforge/templates/modbus/fanuc_cnc_semi_finish.json new file mode 100644 index 0000000..9efc897 --- /dev/null +++ b/protoforge/templates/modbus/fanuc_cnc_semi_finish.json @@ -0,0 +1,259 @@ +{ + "id": "modbus_fanuc_cnc_semi_finish", + "name": "Fanuc CNC 半精铣工位", + "protocol": "modbus_tcp", + "description": "FANUC Series 0i-MF数控系统,半精铣工位:主轴约4000RPM,进给约500mm/min,切深中等,表面粗糙度Ra3.2~6.3", + "manufacturer": "FANUC", + "model": "0i-MF", + "points": [ + { + "name": "spindle_speed", + "address": "0", + "data_type": "uint16", + "unit": "RPM", + "description": "主轴实际转速,半精铣工位:空闲为0,启动后稳定在约4000RPM", + "access": "r", + "generator_type": "script", + "min_value": 0, + "max_value": 8000, + "generator_config": { + "script": "elapsed = context['elapsed']; t = elapsed % 150\nif t < 15:\n target = 0\nelif t < 28:\n target = 4000 * ((t - 15) / 13.0)\nelif t < 120:\n target = 4000\nelif t < 133:\n target = 4000 * (1 - ((t - 120) / 13.0))\nelse:\n target = 0\nnoise = random.gauss(0, 20) if target > 100 else 0\nresult = int(round(max(0, min(8000, target + noise))))" + } + }, + { + "name": "feed_rate", + "address": "1", + "data_type": "float32", + "unit": "mm/min", + "description": "半精铣实际进给速度:切入阶段低速,稳定半精铣约500mm/min,退刀阶段降速", + "access": "r", + "generator_type": "script", + "min_value": 0, + "max_value": 5000, + "generator_config": { + "script": "elapsed = context['elapsed']; t = elapsed % 150\nif t < 28:\n target = 0\nelif t < 38:\n target = 250 + 250 * ((t - 28) / 10.0)\nelif t < 115:\n target = 500\nelif t < 128:\n target = 250\nelse:\n target = 0\nnoise = random.gauss(0, 15) if target > 0 else 0\nresult = round(max(0, min(5000, target + noise)), 1)" + } + }, + { + "name": "spindle_current", + "address": "2", + "data_type": "float32", + "unit": "A", + "description": "半精铣主轴电流:空闲约2A,启动约5A,切入峰值约17A,稳定半精铣约13~16A", + "access": "r", + "generator_type": "script", + "min_value": 0.0, + "max_value": 40.0, + "generator_config": { + "script": "elapsed = context['elapsed']; t = elapsed % 150\nif t < 15:\n base, noise_std = 2.2, 0.25\nelif t < 28:\n base, noise_std = 5.0, 0.6\nelif t < 38:\n base, noise_std = 17.0, 1.2\nelif t < 115:\n base, noise_std = 14.5, 0.9\nelif t < 128:\n base, noise_std = 5.0, 0.6\nelif t < 138:\n base, noise_std = 3.5, 0.4\nelse:\n base, noise_std = 2.2, 0.25\nresult = round(max(0, min(40.0, base + random.gauss(0, noise_std))), 2)" + } + }, + { + "name": "vibration_x", + "address": "23", + "data_type": "float32", + "unit": "m/s²", + "description": "X轴振动加速度,半精铣切深中等,振动幅值中等", + "access": "r", + "generator_type": "script", + "min_value": 0.05, + "max_value": 2.0, + "generator_config": { + "script": "elapsed = context['elapsed']; t = elapsed % 150\nif 28 <= t < 128:\n base = 0.35 + 0.2 * math.sin(2 * math.pi * elapsed / 55 + 0.8)\nelse:\n base = 0.08\nnoise = random.uniform(-0.08, 0.08)\nresult = round(max(0.05, base + noise), 3)" + } + }, + { + "name": "vibration_y", + "address": "25", + "data_type": "float32", + "unit": "m/s²", + "description": "Y轴振动加速度,半精铣切深中等,振动幅值中等", + "access": "r", + "generator_type": "script", + "min_value": 0.05, + "max_value": 2.0, + "generator_config": { + "script": "elapsed = context['elapsed']; t = elapsed % 150\nif 28 <= t < 128:\n base = 0.35 + 0.2 * math.sin(2 * math.pi * elapsed / 45 + 1.8)\nelse:\n base = 0.08\nnoise = random.uniform(-0.08, 0.08)\nresult = round(max(0.05, base + noise), 3)" + } + }, + { + "name": "vibration_z", + "address": "27", + "data_type": "float32", + "unit": "m/s²", + "description": "Z轴振动加速度,半精铣切深中等,振动幅值中等", + "access": "r", + "generator_type": "script", + "min_value": 0.05, + "max_value": 2.0, + "generator_config": { + "script": "elapsed = context['elapsed']; t = elapsed % 150\nif 28 <= t < 128:\n base = 0.4 + 0.22 * math.sin(2 * math.pi * elapsed / 38 + 2.8)\nelse:\n base = 0.1\nnoise = random.uniform(-0.1, 0.1)\nresult = round(max(0.05, base + noise), 3)" + } + }, + { + "name": "spindle_load", + "address": "29", + "data_type": "float32", + "unit": "%", + "description": "半精铣主轴负载率,稳定阶段约30~45%,切入瞬间约48%,短时允许到120%", + "access": "r", + "generator_type": "script", + "min_value": 0.0, + "max_value": 120.0, + "generator_config": { + "script": "elapsed = context['elapsed']; t = elapsed % 150\nif t < 15:\n base, noise_std = 0.0, 0.0\nelif t < 28:\n base, noise_std = 6.0, 1.5\nelif t < 38:\n base, noise_std = 48.0, 3.0\nelif t < 115:\n base, noise_std = 38.0, 2.5\nelif t < 128:\n base, noise_std = 6.0, 1.5\nelif t < 138:\n base, noise_std = 2.5, 0.8\nelse:\n base, noise_std = 0.0, 0.0\nnoise = random.gauss(0, noise_std) if noise_std > 0 else 0\nresult = round(max(0, min(120.0, base + noise)), 2)" + } + }, + { + "name": "spindle_override", + "address": "3", + "data_type": "uint16", + "unit": "%", + "description": "主轴倍率", + "access": "r", + "generator_type": "fixed", + "fixed_value": 100 + }, + { + "name": "feed_override", + "address": "4", + "data_type": "uint16", + "unit": "%", + "description": "进给倍率", + "access": "r", + "generator_type": "fixed", + "fixed_value": 100 + }, + { + "name": "running_mode", + "address": "5", + "data_type": "uint16", + "description": "运行模式(1=MDI 2=AUTO 3=JOG 4=EDIT 5=HANDLE)", + "access": "r", + "generator_type": "fixed", + "fixed_value": 2 + }, + { + "name": "exec_status", + "address": "6", + "data_type": "uint16", + "description": "执行状态(0=空闲 1=运行 2=暂停 3=M00停 4=M01停)", + "access": "r", + "generator_type": "fixed", + "fixed_value": 1 + }, + { + "name": "tool_no", + "address": "7", + "data_type": "uint16", + "description": "当前刀具号,半精铣工位固定使用T2", + "access": "r", + "generator_type": "fixed", + "min_value": 1, + "max_value": 12, + "fixed_value": 2 + }, + { + "name": "abs_x", + "address": "8", + "data_type": "float32", + "unit": "mm", + "description": "X轴绝对坐标", + "access": "r", + "generator_type": "random", + "min_value": -800.0, + "max_value": 800.0 + }, + { + "name": "abs_y", + "address": "10", + "data_type": "float32", + "unit": "mm", + "description": "Y轴绝对坐标", + "access": "r", + "generator_type": "random", + "min_value": -500.0, + "max_value": 500.0 + }, + { + "name": "abs_z", + "address": "12", + "data_type": "float32", + "unit": "mm", + "description": "Z轴绝对坐标", + "access": "r", + "generator_type": "random", + "min_value": -600.0, + "max_value": 200.0 + }, + { + "name": "mach_x", + "address": "14", + "data_type": "float32", + "unit": "mm", + "description": "X轴机械坐标", + "access": "r", + "generator_type": "random", + "min_value": -800.0, + "max_value": 800.0 + }, + { + "name": "mach_y", + "address": "16", + "data_type": "float32", + "unit": "mm", + "description": "Y轴机械坐标", + "access": "r", + "generator_type": "random", + "min_value": -500.0, + "max_value": 500.0 + }, + { + "name": "mach_z", + "address": "18", + "data_type": "float32", + "unit": "mm", + "description": "Z轴机械坐标", + "access": "r", + "generator_type": "random", + "min_value": -600.0, + "max_value": 200.0 + }, + { + "name": "part_count", + "address": "20", + "data_type": "uint16", + "description": "加工计数", + "access": "r", + "generator_type": "script", + "min_value": 0, + "max_value": 99999, + "generator_config": { + "script": "elapsed = context['elapsed']; result = min(int(elapsed / 150), 99999)" + } + }, + { + "name": "cycle_time", + "address": "21", + "data_type": "uint16", + "unit": "s", + "description": "循环时间,半精铣单件约150s", + "access": "r", + "generator_type": "fixed", + "fixed_value": 150 + }, + { + "name": "alarm_no", + "address": "22", + "data_type": "uint16", + "description": "报警号(0=无报警)", + "access": "r", + "generator_type": "fixed", + "fixed_value": 0 + } + ], + "protocol_config": { + "slave_id": 7 + }, + "tags": ["CNC", "FANUC", "数控", "机床", "发那科", "半精铣"] +} From 736baadcb2c8073cff474fcad2e2dd6c6662abb9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=BC=A0=E5=B0=91=E6=99=BA?= Date: Wed, 27 May 2026 13:53:54 +0800 Subject: [PATCH 34/55] fix(scene): update scene cnc --- protoforge/templates/modbus/fanuc_cnc.json | 34 +++++++++++----------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/protoforge/templates/modbus/fanuc_cnc.json b/protoforge/templates/modbus/fanuc_cnc.json index 3ff30df..ebbc8f5 100644 --- a/protoforge/templates/modbus/fanuc_cnc.json +++ b/protoforge/templates/modbus/fanuc_cnc.json @@ -11,13 +11,13 @@ "address": "0", "data_type": "uint16", "unit": "RPM", - "description": "主轴实际转速,粗加工工位:空闲为0,启动后稳定在约2000RPM", + "description": "主轴实际转速,粗加工工位:空闲为0,启动后稳定在约2000RPM,每件节拍有随机差异", "access": "r", "generator_type": "script", "min_value": 0, "max_value": 8000, "generator_config": { - "script": "elapsed = context['elapsed']; t = elapsed % 180\nif t < 20:\n target = 0\nelif t < 30:\n target = 2000 * ((t - 20) / 10.0)\nelif t < 155:\n target = 2000\nelif t < 165:\n target = 2000 * (1 - ((t - 155) / 10.0))\nelse:\n target = 0\nnoise = random.gauss(0, 15) if target > 100 else 0\nresult = int(round(max(0, min(8000, target + noise))))" + "script": "elapsed = context['elapsed']\ncycle_idx = int(elapsed // 180)\nck = f'spd_{cycle_idx}'\nif ck not in cache:\n rng = random.Random(cycle_idx * 7 + 1)\n cache[ck] = {'dur': rng.randint(-8, 10), 'spd': rng.uniform(-40, 40)}\noff = cache[ck]\nperiod = 180 + off['dur']; t = elapsed % period\nramp = 8; cut_end = period - 25 - ramp\nif t < 20:\n target = 0\nelif t < 20 + ramp:\n target = (2000 + off['spd']) * ((t - 20) / ramp)\nelif t < cut_end:\n target = 2000 + off['spd']\nelif t < cut_end + ramp:\n target = (2000 + off['spd']) * (1 - (t - cut_end) / ramp)\nelse:\n target = 0\nnoise = random.gauss(0, 12) if target > 100 else 0\nresult = int(round(max(0, min(8000, target + noise))))" } }, { @@ -25,13 +25,13 @@ "address": "1", "data_type": "float32", "unit": "mm/min", - "description": "粗加工实际进给速度:切入阶段中低速,稳定粗加工约800mm/min,空闲和主轴启动阶段为0", + "description": "粗加工实际进给速度:切入爬升,稳定粗铣约800mm/min含拐角减速扰动,退出降速,空闲为0", "access": "r", "generator_type": "script", "min_value": 0, "max_value": 5000, "generator_config": { - "script": "elapsed = context['elapsed']; t = elapsed % 180\nif t < 30:\n target = 0\nelif t < 40:\n target = 500 + 300 * ((t - 30) / 10.0)\nelif t < 140:\n target = 800\nelif t < 155:\n target = 400\nelse:\n target = 0\nnoise = random.gauss(0, 25) if target > 0 else 0\nresult = round(max(0, min(5000, target + noise)), 1)" + "script": "elapsed = context['elapsed']\ncycle_idx = int(elapsed // 180)\nck = f'feed_{cycle_idx}'\nif ck not in cache:\n rng = random.Random(cycle_idx * 7 + 2)\n cache[ck] = {'dur': rng.randint(-8, 10), 'feed': rng.uniform(-30, 50)}\noff = cache[ck]\nperiod = 180 + off['dur']; t = elapsed % period\ncut_end = period - 25 - 8\nif t < 30:\n target = 0\nelif t < 40:\n target = 400 + 400 * ((t - 30) / 10.0)\nelif t < cut_end:\n base_feed = 800 + off['feed']\n corner = 80 * math.sin(2 * math.pi * elapsed / 23.7) * max(0, math.sin(2 * math.pi * elapsed / 41.3))\n target = base_feed + corner\nelif t < cut_end + 12:\n target = 350\nelse:\n target = 0\nnoise = random.gauss(0, 18) if target > 0 else 0\nresult = round(max(0, min(5000, target + noise)), 1)" } }, { @@ -39,13 +39,13 @@ "address": "2", "data_type": "float32", "unit": "A", - "description": "粗加工主轴电流:空闲低电流,启动阶段中等电流,切入瞬间升高,稳定粗加工约19~24A", + "description": "粗加工主轴电流:空闲约2A,启动约6A,切入峰值约23A,稳定粗铣约19~24A含细碎波动,退刀降低", "access": "r", "generator_type": "script", "min_value": 0.0, "max_value": 40.0, "generator_config": { - "script": "elapsed = context['elapsed']; t = elapsed % 180\nif t < 20:\n base, noise_std = 2.2, 0.25\nelif t < 30:\n base, noise_std = 6.0, 0.8\nelif t < 40:\n base, noise_std = 23.5, 1.5\nelif t < 140:\n base, noise_std = 21.5, 1.3\nelif t < 155:\n base, noise_std = 6.0, 0.8\nelif t < 165:\n base, noise_std = 4.0, 0.5\nelse:\n base, noise_std = 2.2, 0.25\nresult = round(max(0, min(40.0, base + random.gauss(0, noise_std))), 2)" + "script": "elapsed = context['elapsed']\ncycle_idx = int(elapsed // 180)\nck = f'cur_{cycle_idx}'\nif ck not in cache:\n rng = random.Random(cycle_idx * 7 + 3)\n cache[ck] = {'dur': rng.randint(-8, 10), 'cur': rng.uniform(-1.5, 2.0)}\noff = cache[ck]\nperiod = 180 + off['dur']; t = elapsed % period\ncut_end = period - 25 - 8\nif t < 20:\n base, noise_std = 2.2, 0.2\nelif t < 20 + 8:\n base, noise_std = 6.0, 0.7\nelif t < 40:\n base, noise_std = 23.5 + off['cur'], 1.8\nelif t < cut_end:\n drift = 0.8 * math.sin(2 * math.pi * elapsed / 37.4) + 0.5 * math.sin(2 * math.pi * elapsed / 19.1)\n base, noise_std = 21.5 + off['cur'] + drift, 1.0\nelif t < cut_end + 12:\n base, noise_std = 6.0, 0.7\nelif t < cut_end + 20:\n base, noise_std = 3.5, 0.4\nelse:\n base, noise_std = 2.2, 0.2\nresult = round(max(0, min(40.0, base + random.gauss(0, noise_std))), 2)" } }, { @@ -53,13 +53,13 @@ "address": "23", "data_type": "float32", "unit": "m/s²", - "description": "X轴振动加速度", + "description": "X轴振动加速度,粗铣切削时约0.4~0.9m/s²,空闲时接近0", "access": "r", "generator_type": "script", - "min_value": 0.1, + "min_value": 0.0, "max_value": 2.5, "generator_config": { - "script": "elapsed = context['elapsed']; base = 0.5 + 0.3 * math.sin(2 * math.pi * elapsed / 85 + 0.8); noise = random.uniform(-0.15, 0.15); result = round(max(0.1, base + noise), 3)" + "script": "elapsed = context['elapsed']\ncycle_idx = int(elapsed // 180)\nck = f'vx_{cycle_idx}'\nif ck not in cache:\n rng = random.Random(cycle_idx * 7 + 4)\n cache[ck] = {'dur': rng.randint(-8, 10)}\nperiod = 180 + cache[ck]['dur']; t = elapsed % period\ncut_end = period - 25 - 8\ncutting = 40 <= t < cut_end\nif cutting:\n base = 0.55 + 0.22 * math.sin(2 * math.pi * elapsed / 85 + 0.8) + 0.1 * math.sin(2 * math.pi * elapsed / 17.3)\n noise = random.uniform(-0.12, 0.12)\nelse:\n base = 0.04\n noise = random.uniform(-0.02, 0.02)\nresult = round(max(0.0, base + noise), 3)" } }, { @@ -67,13 +67,13 @@ "address": "25", "data_type": "float32", "unit": "m/s²", - "description": "Y轴振动加速度", + "description": "Y轴振动加速度,粗铣切削时约0.4~0.9m/s²,空闲时接近0", "access": "r", "generator_type": "script", - "min_value": 0.1, + "min_value": 0.0, "max_value": 2.5, "generator_config": { - "script": "elapsed = context['elapsed']; base = 0.5 + 0.3 * math.sin(2 * math.pi * elapsed / 70 + 1.8); noise = random.uniform(-0.15, 0.15); result = round(max(0.1, base + noise), 3)" + "script": "elapsed = context['elapsed']\ncycle_idx = int(elapsed // 180)\nck = f'vy_{cycle_idx}'\nif ck not in cache:\n rng = random.Random(cycle_idx * 7 + 5)\n cache[ck] = {'dur': rng.randint(-8, 10)}\nperiod = 180 + cache[ck]['dur']; t = elapsed % period\ncut_end = period - 25 - 8\ncutting = 40 <= t < cut_end\nif cutting:\n base = 0.5 + 0.2 * math.sin(2 * math.pi * elapsed / 70 + 1.8) + 0.08 * math.sin(2 * math.pi * elapsed / 13.7)\n noise = random.uniform(-0.1, 0.1)\nelse:\n base = 0.04\n noise = random.uniform(-0.02, 0.02)\nresult = round(max(0.0, base + noise), 3)" } }, { @@ -81,13 +81,13 @@ "address": "27", "data_type": "float32", "unit": "m/s²", - "description": "Z轴振动加速度", + "description": "Z轴振动加速度,粗铣切削时约0.5~1.1m/s²(Z向切深方向幅值略大),空闲时接近0", "access": "r", "generator_type": "script", - "min_value": 0.1, + "min_value": 0.0, "max_value": 3.0, "generator_config": { - "script": "elapsed = context['elapsed']; base = 0.7 + 0.4 * math.sin(2 * math.pi * elapsed / 58 + 2.8); noise = random.uniform(-0.2, 0.2); result = round(max(0.1, base + noise), 3)" + "script": "elapsed = context['elapsed']\ncycle_idx = int(elapsed // 180)\nck = f'vz_{cycle_idx}'\nif ck not in cache:\n rng = random.Random(cycle_idx * 7 + 6)\n cache[ck] = {'dur': rng.randint(-8, 10)}\nperiod = 180 + cache[ck]['dur']; t = elapsed % period\ncut_end = period - 25 - 8\ncutting = 40 <= t < cut_end\nif cutting:\n base = 0.65 + 0.28 * math.sin(2 * math.pi * elapsed / 58 + 2.8) + 0.12 * math.sin(2 * math.pi * elapsed / 11.2)\n noise = random.uniform(-0.15, 0.15)\nelse:\n base = 0.05\n noise = random.uniform(-0.02, 0.02)\nresult = round(max(0.0, base + noise), 3)" } }, { @@ -95,13 +95,13 @@ "address": "29", "data_type": "float32", "unit": "%", - "description": "粗加工主轴负载率,常态0~100%,短时允许到120%;粗加工稳定阶段约45~65%,切入瞬间可能更高", + "description": "粗加工主轴负载率,稳定粗铣约48~68%含刀路扰动,切入瞬间约65~75%,空闲接近0,短时允许到120%", "access": "r", "generator_type": "script", "min_value": 0.0, "max_value": 120.0, "generator_config": { - "script": "elapsed = context['elapsed']; t = elapsed % 180\nif t < 20:\n base, noise_std = 0.0, 0.0\nelif t < 30:\n base, noise_std = 8.0, 2.0\nelif t < 40:\n base, noise_std = 65.0, 4.0\nelif t < 140:\n base, noise_std = 55.0, 4.0\nelif t < 155:\n base, noise_std = 8.0, 2.0\nelif t < 165:\n base, noise_std = 3.0, 1.0\nelse:\n base, noise_std = 0.0, 0.0\nnoise = random.gauss(0, noise_std) if noise_std > 0 else 0\nresult = round(max(0, min(120.0, base + noise)), 2)" + "script": "elapsed = context['elapsed']\ncycle_idx = int(elapsed // 180)\nck = f'load_{cycle_idx}'\nif ck not in cache:\n rng = random.Random(cycle_idx * 7 + 7)\n cache[ck] = {'dur': rng.randint(-8, 10), 'load': rng.uniform(-4.0, 6.0)}\noff = cache[ck]\nperiod = 180 + off['dur']; t = elapsed % period\ncut_end = period - 25 - 8\nif t < 20:\n base, noise_std = 0.0, 0.0\nelif t < 20 + 8:\n base, noise_std = 6.0, 1.5\nelif t < 40:\n base, noise_std = 68.0 + off['load'], 3.5\nelif t < cut_end:\n drift = 4.5 * math.sin(2 * math.pi * elapsed / 37.4) + 2.5 * math.sin(2 * math.pi * elapsed / 19.1) + 1.5 * math.sin(2 * math.pi * elapsed / 7.3)\n base, noise_std = 56.0 + off['load'] + drift, 2.5\nelif t < cut_end + 12:\n base, noise_std = 6.0, 1.5\nelif t < cut_end + 20:\n base, noise_std = 2.0, 0.8\nelse:\n base, noise_std = 0.0, 0.0\nnoise = random.gauss(0, noise_std) if noise_std > 0 else 0\nresult = round(max(0, min(120.0, base + noise)), 2)" } }, { From c9269967283d9cbe7b3f6b384ead70c7ccf2fab7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=BC=A0=E5=B0=91=E6=99=BA?= Date: Thu, 28 May 2026 13:44:34 +0800 Subject: [PATCH 35/55] fix(fault): update fault --- ai/predictor/README.md | 27 +++ ai/predictor/__init__.py | 30 +++ ai/predictor/anomaly.py | 242 ++++++++++++++++++++ ai/predictor/config.py | 186 ++++++++++++++++ ai/predictor/discovery.py | 84 +++++++ ai/predictor/models.py | 114 ++++++++++ ai/predictor/phase_lock.py | 152 +++++++++++++ ai/predictor/profiling.py | 256 +++++++++++++++++++++ ai/predictor/service.py | 333 ++++++++++++++++++++++++++++ ai/predictor/signal.py | 335 ++++++++++++++++++++++++++++ ai/predictor/state.py | 328 +++++++++++++++++++++++++++ ai/predictor/storage.py | 438 ++++++++++++++++++++++++++++++++++++ ai/predictor/template.py | 384 ++++++++++++++++++++++++++++++++ ai/pridict_v5.py | 442 ++++++++++++++++++++++++++----------- protoforge/core/fault.py | 71 +++++- 15 files changed, 3282 insertions(+), 140 deletions(-) create mode 100644 ai/predictor/README.md create mode 100644 ai/predictor/__init__.py create mode 100644 ai/predictor/anomaly.py create mode 100644 ai/predictor/config.py create mode 100644 ai/predictor/discovery.py create mode 100644 ai/predictor/models.py create mode 100644 ai/predictor/phase_lock.py create mode 100644 ai/predictor/profiling.py create mode 100644 ai/predictor/service.py create mode 100644 ai/predictor/signal.py create mode 100644 ai/predictor/state.py create mode 100644 ai/predictor/storage.py create mode 100644 ai/predictor/template.py diff --git a/ai/predictor/README.md b/ai/predictor/README.md new file mode 100644 index 0000000..8fcc022 --- /dev/null +++ b/ai/predictor/README.md @@ -0,0 +1,27 @@ +```bash +ai/predictor/ + ├── __init__.py # 公开 API:PredictorService, run() + ├── config.py # 所有常量,支持环境变量覆盖 + ├── models.py # BaselineState, MetricProfile 数据类 + ├── discovery.py # VM 设备/指标发现 + ├── signal.py # 纯信号处理:平滑、FFT+自相关周期估计、谷底检测 + ├── template.py # 模板构建、预测、重采样、EMA 融合 + ├── phase_lock.py # Phase-lock 相位对齐 + ├── anomaly.py # 异常检测:边界计算、越界统计、三条件判断 + ├── state.py # 状态机:HEALTHY/ANOMALY/RECOVERING 生命周期 + ├── profiling.py # 自适应配置推断:infer_metric_profile, refresh_targets + ├── storage.py # VM 读写、标签工具、状态持久化 + └── service.py # PredictorService 主类(run_once / run) + + 启动方式: + from ai.predictor import run + run() + # 或 + from ai.predictor import PredictorService + PredictorService(vm_url="http://vm:8428").run() + + 主要改进: + - 全局变量(BASELINE_STATES、LAST_REAL_TS_WRITTEN、_TARGETS_CACHE)全部移入 PredictorService 实例属性 + - IO 与计算完全分离:signal.py、template.py、anomaly.py 均为纯函数,无网络请求 + - 每个模块顶部有职责说明,每个公开函数有完整 docstring +``` diff --git a/ai/predictor/__init__.py b/ai/predictor/__init__.py new file mode 100644 index 0000000..9e45810 --- /dev/null +++ b/ai/predictor/__init__.py @@ -0,0 +1,30 @@ +# -*- coding: utf-8 -*- +""" +predictor +~~~~~~~~~ +ProtoForge 预测服务 package。 + +对外暴露: +- ``PredictorService``:预测服务主类,支持 run() 一键启动 +- ``run()``:便捷入口,使用默认配置启动服务 + +快速启动:: + + from ai.predictor import run + run() + +或自定义配置:: + + from ai.predictor import PredictorService + svc = PredictorService(vm_url="http://vm:8428", poll_interval=60) + svc.run() +""" + +from .service import PredictorService + +__all__ = ["PredictorService", "run"] + + +def run() -> None: + """使用默认配置启动预测服务(一行启动)。""" + PredictorService().run() diff --git a/ai/predictor/anomaly.py b/ai/predictor/anomaly.py new file mode 100644 index 0000000..779d38e --- /dev/null +++ b/ai/predictor/anomaly.py @@ -0,0 +1,242 @@ +# -*- coding: utf-8 -*- +""" +predictor.anomaly +~~~~~~~~~~~~~~~~~ +异常检测:判断当前信号是否偏离健康基线。 + +职责: +- 计算预测边界(phase_point 用对称阈值,phase_band 用分位数带) +- 应用物理上下限兜底(来自 override 文件) +- 统计越界比例、连续越界秒数、最大越界倍数 +- 综合三个条件判断是否触发异常 + +依赖:predictor.phase_lock, predictor.template, predictor.config, predictor.models +""" + +from typing import Dict, Tuple + +import numpy as np + +from . import config +from .models import BaselineState +from .phase_lock import phase_lock_recent +from .template import predict_state_bundle + + +def max_consecutive_true(flags: np.ndarray) -> int: + """ + 计算布尔数组中最长连续 True 的长度。 + + 用于统计最长连续越界秒数,是异常判断的条件之一。 + + Args: + flags: 布尔数组(True 表示该点越界) + + Returns: + 最长连续 True 的长度(整数)。 + """ + max_count = 0 + current = 0 + for flag in flags: + if bool(flag): + current += 1 + max_count = max(max_count, current) + else: + current = 0 + return int(max_count) + + +def calc_point_bounds( + pred: np.ndarray, + abs_threshold: float, + rel_threshold: float, +) -> Tuple[np.ndarray, np.ndarray]: + """ + 计算 phase_point 策略的对称预测边界。 + + 边界宽度 = max(abs_threshold, |pred| * rel_threshold), + 取两者较大值,保证在小值区域有最小绝对容忍度。 + + Args: + pred: 预测中值数组 + abs_threshold: 绝对误差阈值 + rel_threshold: 相对误差阈值(相对于预测值的比例) + + Returns: + (lower, upper) 边界数组对。 + """ + threshold = np.maximum(abs_threshold, np.abs(pred) * rel_threshold) + return pred - threshold, pred + threshold + + +def calc_final_bounds( + state: BaselineState, + pred: np.ndarray, + lower_raw: np.ndarray, + upper_raw: np.ndarray, + target: Dict, +) -> Tuple[np.ndarray, np.ndarray]: + """ + 计算最终预测边界,综合策略、动态填充和物理上下限。 + + phase_band 策略: + 在分位数模板边界基础上,叠加动态填充(band_pad_abs 和相对填充取较大值), + 覆盖正常的尖峰波动,避免误报。 + + phase_point 策略: + 直接用对称阈值计算边界。 + + 物理上下限(可选): + 来自 override 文件的 hard_max / hard_min,对边界做最终 clip。 + + Args: + state: 当前基线状态(提供策略信息) + pred: 预测中值数组 + lower_raw: 模板下界数组(phase_band 为分位数,phase_point 等于 pred) + upper_raw: 模板上界数组 + target: target dict,包含阈值和物理上下限配置 + + Returns: + (lower, upper) 最终边界数组对。 + """ + strategy = target.get("strategy", "phase_point") + abs_threshold = float(target.get("abs_threshold", 1.0)) + rel_threshold = float(target.get("rel_threshold", 0.25)) + + if strategy == "phase_band": + pad_abs = float(target.get("band_pad_abs", abs_threshold)) + # 动态填充:取绝对填充和相对填充(预测值的 25% * rel_threshold)的较大值 + dynamic_pad = np.maximum(pad_abs, np.abs(pred) * rel_threshold * 0.25) + lower = lower_raw - dynamic_pad + upper = upper_raw + dynamic_pad + else: + lower, upper = calc_point_bounds(pred, abs_threshold, rel_threshold) + + # 物理上下限兜底(来自 override 文件,可选) + hard_max = target.get("hard_max") + hard_min = target.get("hard_min") + if hard_max is not None: + upper = np.minimum(upper, float(hard_max)) + if hard_min is not None: + lower = np.maximum(lower, float(hard_min)) + + return lower, upper + + +def detect_anomaly( + state: BaselineState, + ts_grid: np.ndarray, + ys_model: np.ndarray, + ys_actual: np.ndarray, + target: Dict, +) -> Tuple[bool, float, float, float, int, int, int, float]: + """ + 检测当前信号是否偏离健康基线,返回完整的诊断指标。 + + 流程: + 1. phase-lock 对齐:在最近窗口内找最优 (period, origin) + 2. 用对齐后的参数预测最近窗口的值 + 3. 计算越界统计量 + 4. 按三个条件判断是否异常: + - 越界比例 >= outside_ratio_threshold + - 连续越界秒数 >= min_consecutive_outside + - 最大越界倍数 >= severe_exceed_ratio(单点严重越界立即报警) + + Args: + state: 当前基线状态 + ts_grid: 均匀 1 秒网格的时间戳数组 + ys_model: 平滑后的信号(phase_point 用于比较) + ys_actual: 原始信号(phase_band 用于比较) + target: target dict,包含阈值配置 + + Returns: + (is_anomaly, outside_ratio, mean_abs_err, mean_rel_err, + best_period, best_origin, max_outside_seconds, max_exceed_ratio) + """ + best_period, best_origin, pred_recent, _ = phase_lock_recent( + state=state, + ts_grid=ts_grid, + ys_model=ys_model, + target=target, + ) + + recent_len = len(pred_recent) + if recent_len <= 0: + return False, 0.0, 0.0, 0.0, best_period, best_origin, 0, 0.0 + + # phase_band 用原始信号比较(保留真实波动),phase_point 用平滑信号 + if target.get("strategy", "phase_point") == "phase_band": + actual = ys_actual[-recent_len:].astype(float) + else: + actual = ys_model[-recent_len:].astype(float) + + # 用 phase-lock 后的最优参数重新预测(临时 state,不修改原始 state) + tmp_state = BaselineState( + period=best_period, + phase_origin_ts=best_origin, + template=state.template, + lower_template=state.lower_template, + upper_template=state.upper_template, + strategy=state.strategy, + status=state.status, + clean_seconds=state.clean_seconds, + last_update_ts=state.last_update_ts, + last_seen_ts=state.last_seen_ts, + y_min=state.y_min, + y_max=state.y_max, + ) + + recent_ts = ts_grid[-recent_len:].astype(int).tolist() + pred, lower_raw, upper_raw = predict_state_bundle(tmp_state, recent_ts) + + lower, upper = calc_final_bounds( + state=tmp_state, + pred=pred, + lower_raw=lower_raw, + upper_raw=upper_raw, + target=target, + ) + + # 计算越界量(负值表示在边界内,clip 到 0) + above_upper = actual - upper + below_lower = lower - actual + exceed = np.maximum(np.maximum(above_upper, below_lower), 0.0) + outside = exceed > 0 + + band_width = np.maximum(upper - lower, 1e-6) + exceed_ratio = exceed / band_width # 越界量相对于边界宽度的倍数 + + abs_err = np.abs(actual - pred) + outside_ratio = float(np.mean(outside)) + mean_abs_err = float(np.mean(abs_err)) + mean_rel_err = float(np.mean(abs_err / np.maximum(np.abs(pred), 1e-6))) + max_outside_seconds = max_consecutive_true(outside) + max_exceed_ratio = float(np.max(exceed_ratio)) if len(exceed_ratio) > 0 else 0.0 + + # 从 target 读取阈值,允许每个指标独立配置 + outside_ratio_threshold = float( + target.get("outside_ratio_threshold", config.OUTSIDE_RATIO_THRESHOLD) + ) + min_consecutive_outside = int( + target.get("min_consecutive_outside", config.MIN_CONSECUTIVE_OUTSIDE) + ) + severe_exceed_ratio = float( + target.get("severe_exceed_ratio", config.SEVERE_EXCEED_RATIO) + ) + + is_anomaly = ( + outside_ratio >= outside_ratio_threshold + or max_outside_seconds >= min_consecutive_outside + or max_exceed_ratio >= severe_exceed_ratio + ) + + return ( + is_anomaly, + outside_ratio, + mean_abs_err, + mean_rel_err, + int(best_period), + int(best_origin), + int(max_outside_seconds), + float(max_exceed_ratio), + ) diff --git a/ai/predictor/config.py b/ai/predictor/config.py new file mode 100644 index 0000000..ed5c11b --- /dev/null +++ b/ai/predictor/config.py @@ -0,0 +1,186 @@ +# -*- coding: utf-8 -*- +""" +predictor.config +~~~~~~~~~~~~~~~~ +所有运行时配置常量,集中在此处管理。 + +大部分参数支持通过环境变量覆盖,方便容器化部署时无需修改代码。 +环境变量前缀统一为 ``PROTOFORGE_``。 +""" + +import os +from typing import List + +# --------------------------------------------------------------------------- +# VictoriaMetrics 连接 +# --------------------------------------------------------------------------- + +#: VM HTTP 地址,默认本机 +VM_URL: str = os.environ.get("PROTOFORGE_VM_URL", "http://localhost:8428") + +# --------------------------------------------------------------------------- +# 状态持久化 +# --------------------------------------------------------------------------- + +#: 健康模板状态文件路径(JSON),重启后可恢复学习进度 +STATE_FILE: str = os.environ.get( + "PROTOFORGE_STATE_FILE", + "/tmp/protoforge_predictor_state_v14.json", +) + +# --------------------------------------------------------------------------- +# 轮询与预测时间窗口 +# --------------------------------------------------------------------------- + +#: 拉取历史数据的时间窗口(分钟) +HISTORY_MINUTES: int = int(os.environ.get("PROTOFORGE_HISTORY_MINUTES", "30")) + +#: 理论预测时间跨度(秒) +HORIZON_SECONDS: int = int(os.environ.get("PROTOFORGE_HORIZON_SECONDS", "120")) + +#: 轮询间隔(秒) +POLL_INTERVAL: int = int(os.environ.get("PROTOFORGE_POLL_INTERVAL", "30")) + +#: 实际写入 VM 的预测点数 = min(HORIZON_SECONDS, POLL_INTERVAL) +#: 避免写入过多未来点导致 Grafana 图表出现"预测跳跃" +WRITE_HORIZON_SECONDS: int = min(HORIZON_SECONDS, POLL_INTERVAL) + +#: VM 查询步长 +QUERY_STEP: str = "1s" + +#: 最少需要多少个历史点才能开始建模 +MIN_POINTS: int = 120 + +# --------------------------------------------------------------------------- +# 周期检测范围 +# --------------------------------------------------------------------------- + +#: 允许的最短周期(秒) +MIN_PERIOD_SECONDS: int = 5 + +#: 允许的最长周期(秒) +MAX_PERIOD_SECONDS: int = 3600 + +# --------------------------------------------------------------------------- +# 模板学习参数 +# --------------------------------------------------------------------------- + +#: 构建模板至少需要多少个完整周期 +MIN_FULL_CYCLES_FOR_TEMPLATE: int = 3 + +#: 最多使用最近多少个周期来构建模板(防止过旧数据污染) +MAX_CYCLES_FOR_TEMPLATE: int = 8 + +#: 谷底检测时,低于此百分位的点才被视为谷底候选 +VALLEY_QUANTILE: int = 45 + +#: 健康状态下模板 EMA 更新步长(越小越保守,变化越慢) +HEALTHY_EMA_ALPHA: float = 0.10 + +#: 恢复状态下模板 EMA 更新步长(比健康状态更激进,加速追赶) +RECOVERY_EMA_ALPHA: float = 0.25 + +# --------------------------------------------------------------------------- +# 异常检测默认阈值 +# --------------------------------------------------------------------------- + +#: 检测窗口(秒):只看最近这段时间的数据来判断是否异常 +DETECT_WINDOW_SECONDS: int = 30 + +#: 恢复期最短持续时间(秒):异常消失后至少稳定这么久才恢复学习 +RECOVERY_MIN_SECONDS: int = 60 + +#: 越界比例阈值:窗口内超过此比例的点越界则报警 +OUTSIDE_RATIO_THRESHOLD: float = 0.60 + +#: 连续越界阈值(秒):连续越界超过此秒数则报警 +MIN_CONSECUTIVE_OUTSIDE: int = 5 + +#: 严重越界倍数:单点超出边界宽度的此倍数则立即报警 +SEVERE_EXCEED_RATIO: float = 1.8 + +#: 真实数据最大允许延迟(秒):超过此值认为数据管道异常,跳过预测 +MAX_DATA_LAG_SECONDS: int = 180 + +# --------------------------------------------------------------------------- +# Phase-lock 搜索参数 +# --------------------------------------------------------------------------- + +#: phase-lock 使用的最短历史窗口(秒) +PHASE_LOCK_MIN_WINDOW_SECONDS: int = 45 + +#: phase-lock 使用的最长历史窗口(秒) +PHASE_LOCK_MAX_WINDOW_SECONDS: int = 180 + +#: 周期搜索范围(相对于基准周期的比例),由 infer_metric_profile 动态覆盖 +PHASE_LOCK_PERIOD_SEARCH_RATIO: float = 0.12 + +#: 相位原点搜索范围(相对于周期的比例) +PHASE_LOCK_ORIGIN_SEARCH_RATIO: float = 0.35 + +#: 周期搜索步长(秒) +PHASE_LOCK_PERIOD_STEP: int = 1 + +#: 相位原点搜索步长(秒) +PHASE_LOCK_ORIGIN_STEP: int = 1 + +# --------------------------------------------------------------------------- +# 监控指标白名单 +# --------------------------------------------------------------------------- + +#: 默认监控的指标名列表 +_DEFAULT_MONITORED_METRICS: List[str] = [ + "feed_rate", + "spindle_speed", + "spindle_current", + "spindle_load", + "vibration_x", + "vibration_y", + "vibration_z", +] + +#: 实际使用的监控指标列表,可通过环境变量 PROTOFORGE_MONITORED_METRICS 覆盖 +#: 格式:逗号分隔的指标名,例如 "feed_rate,spindle_speed" +MONITORED_METRICS: List[str] = [ + m.strip() + for m in os.environ.get( + "PROTOFORGE_MONITORED_METRICS", + ",".join(_DEFAULT_MONITORED_METRICS), + ).split(",") + if m.strip() +] + +# --------------------------------------------------------------------------- +# 人工上下限覆盖文件 +# --------------------------------------------------------------------------- + +#: 覆盖文件路径,文件不存在时忽略(不报错) +#: 文件格式(JSON): +#: { +#: "device-id": { +#: "metric_name": {"hard_max": 35.0, "hard_min": 0.0} +#: } +#: } +OVERRIDE_FILE: str = os.environ.get( + "PROTOFORGE_PREDICTOR_OVERRIDE", + "/etc/protoforge/predictor_override.json", +) + +# --------------------------------------------------------------------------- +# 目标列表刷新间隔 +# --------------------------------------------------------------------------- + +#: 每隔多少秒重新发现设备和指标(秒) +TARGETS_REFRESH_INTERVAL: int = int( + os.environ.get("PROTOFORGE_TARGETS_REFRESH", "60") +) + +# --------------------------------------------------------------------------- +# 写入 VM 时附加的额外标签 +# --------------------------------------------------------------------------- + +#: 附加到所有预测指标上的标签,用于在 Grafana 中区分预测数据和原始数据 +EXTRA_PREDICT_LABELS = { + "forecast": "phase_band_health_v14", + "source": "protoforge", +} diff --git a/ai/predictor/discovery.py b/ai/predictor/discovery.py new file mode 100644 index 0000000..c8b829a --- /dev/null +++ b/ai/predictor/discovery.py @@ -0,0 +1,84 @@ +# -*- coding: utf-8 -*- +""" +predictor.discovery +~~~~~~~~~~~~~~~~~~~ +Layer 1:设备与指标发现。 + +职责: +- 从 VictoriaMetrics 查询所有在线设备(device_id 标签值) +- 探测指定设备上哪些指标名当前有数据 + +本模块只做网络查询,不包含任何预测或统计逻辑。 + +依赖:requests +""" + +import logging +from typing import List + +import requests + +logger = logging.getLogger(__name__) + + +def discover_device_ids(vm_url: str) -> List[str]: + """ + 从 VictoriaMetrics 查询所有 device_id 标签值。 + + 调用 VM 的 label values 接口,返回当前存储中出现过的所有设备 ID。 + 网络失败时返回空列表,不抛出异常,由调用方决定如何处理。 + + Args: + vm_url: VM HTTP 地址,如 "http://localhost:8428" + + Returns: + 设备 ID 字符串列表,空字符串已过滤。 + 查询失败时返回空列表。 + """ + try: + resp = requests.get( + f"{vm_url}/api/v1/label/device_id/values", + timeout=10, + ) + resp.raise_for_status() + return [v for v in resp.json().get("data", []) if v] + except requests.RequestException as e: + logger.error("发现 device_id 失败: %s", e) + return [] + + +def discover_metrics_for_device( + vm_url: str, + device_id: str, + candidates: List[str], +) -> List[str]: + """ + 探测指定设备在 VictoriaMetrics 中实际存在且有近期数据的指标名。 + + 对 candidates 中的每个指标名发起即时查询(instant query), + 只有返回非空 result 的指标才被认为"存在"。 + + Args: + vm_url: VM HTTP 地址 + device_id: 设备标识,对应 VM 中的 device_id 标签值 + candidates: 待探测的指标名列表,如 ["feed_rate", "spindle_speed"] + + Returns: + 实际有数据的指标名列表(保持 candidates 中的顺序)。 + 单个指标查询失败时静默跳过,不影响其他指标的探测。 + """ + found: List[str] = [] + for metric in candidates: + try: + resp = requests.get( + f"{vm_url}/api/v1/query", + params={"query": f'{metric}{{device_id="{device_id}"}}'}, + timeout=5, + ) + resp.raise_for_status() + if resp.json().get("data", {}).get("result"): + found.append(metric) + except requests.RequestException: + # 单个指标查询失败不影响整体发现流程 + pass + return found diff --git a/ai/predictor/models.py b/ai/predictor/models.py new file mode 100644 index 0000000..8b48bf5 --- /dev/null +++ b/ai/predictor/models.py @@ -0,0 +1,114 @@ +# -*- coding: utf-8 -*- +""" +predictor.models +~~~~~~~~~~~~~~~~ +纯数据结构定义,不包含任何业务逻辑或 IO 操作。 + +包含: +- ``BaselineState``:单个指标的健康模板状态,记录周期、模板曲线、健康状态等 +- ``MetricProfile``:从历史数据统计出的指标特征,驱动策略和阈值的自动推断 +- 状态常量:HEALTHY / ANOMALY / RECOVERING +""" + +from dataclasses import dataclass, field +from typing import List + +# --------------------------------------------------------------------------- +# 基线状态常量 +# --------------------------------------------------------------------------- + +#: 正常运行,模板持续学习更新 +BASELINE_STATUS_HEALTHY = "healthy" + +#: 检测到异常,模板冻结,不学习故障数据 +BASELINE_STATUS_ANOMALY = "anomaly" + +#: 异常消失,等待稳定后恢复学习 +BASELINE_STATUS_RECOVERING = "recovering" + + +# --------------------------------------------------------------------------- +# 基线状态 +# --------------------------------------------------------------------------- + +@dataclass +class BaselineState: + """ + 单个指标的健康基线状态。 + + 每个 (device_id, metric) 对应一个独立的 BaselineState 实例, + 存储该指标的周期模板和当前健康状态。 + + Attributes: + period: 检测到的加工周期长度(秒) + phase_origin_ts: 相位原点时间戳(Unix 秒),用于计算当前相位 + template: 中位数模板曲线,长度等于 period,用于预测 + lower_template: 下界模板曲线(phase_band 策略时为分位数,否则等于 template) + upper_template: 上界模板曲线(phase_band 策略时为分位数,否则等于 template) + strategy: 预测策略,"phase_point" 或 "phase_band" + status: 当前健康状态,取值为 BASELINE_STATUS_* 常量 + clean_seconds: 连续健康运行的秒数,用于判断是否可以更新模板 + last_update_ts: 上次模板更新的时间戳(Unix 秒) + last_seen_ts: 上次处理该指标的时间戳(Unix 秒),用于计算 elapsed + y_min: 最近一段时间内的最小值,用于量程参考 + y_max: 最近一段时间内的最大值,用于量程参考 + """ + + period: int + phase_origin_ts: int + template: List[float] + lower_template: List[float] + upper_template: List[float] + strategy: str + status: str + clean_seconds: int + last_update_ts: int + last_seen_ts: int + y_min: float + y_max: float + + +# --------------------------------------------------------------------------- +# 指标特征(自适应配置推断结果) +# --------------------------------------------------------------------------- + +@dataclass +class MetricProfile: + """ + 从历史数据统计出的指标特征,用于自动推断预测策略和阈值。 + + 由 ``profiling.infer_metric_profile()`` 生成, + 再由 ``profiling.build_target()`` 转换为执行层 target dict。 + + Attributes: + device_id: 设备标识,对应 VM 中的 device_id 标签值 + metric: 指标名,如 "feed_rate"、"spindle_current" + p5: 活跃段第 5 百分位数(过滤空闲零值后) + p95: 活跃段第 95 百分位数 + iqr: p95 - p5,反映正常波动范围 + cv: 变异系数(std / mean),衡量信号稳定性 + cv < 0.15 → 稳定信号(精铣类)→ phase_point + cv >= 0.15 → 波动信号(粗铣负载、振动类)→ phase_band + strategy: 自动推断的预测策略,"phase_point" 或 "phase_band" + abs_threshold: 绝对误差阈值(自动计算) + rel_threshold: 相对误差阈值(自动计算) + band_low_q: phase_band 下界分位数(默认 5) + band_high_q: phase_band 上界分位数(默认 95) + band_pad_abs: phase_band 额外填充宽度,覆盖正常尖峰 + phase_lock_period_search_ratio: phase-lock 周期搜索范围(相对比例) + 由实测周期抖动率动态决定,周期越不稳定则搜索范围越宽 + """ + + device_id: str + metric: str + p5: float + p95: float + iqr: float + cv: float + strategy: str + abs_threshold: float + rel_threshold: float + band_low_q: float + band_high_q: float + band_pad_abs: float + phase_lock_period_search_ratio: float diff --git a/ai/predictor/phase_lock.py b/ai/predictor/phase_lock.py new file mode 100644 index 0000000..b38acaf --- /dev/null +++ b/ai/predictor/phase_lock.py @@ -0,0 +1,152 @@ +# -*- coding: utf-8 -*- +""" +predictor.phase_lock +~~~~~~~~~~~~~~~~~~~~ +Phase-lock 相位对齐:在每次预测前动态校正周期和相位原点。 + +职责: +- 在基准周期附近搜索最优 (period, origin) 组合 +- 最小化最近时间窗口内的预测 MAE +- 支持 target 级别的搜索范围配置(粗铣工位周期抖动大,需要更宽的范围) + +依赖:predictor.template, predictor.config, predictor.models +""" + +import logging +from typing import Dict, Optional, Tuple + +import numpy as np + +from . import config +from .models import BaselineState +from .template import ( + normalize_origin_near, + predict_template_values, + resample_template, +) + +logger = logging.getLogger(__name__) + + +def phase_lock_recent( + state: BaselineState, + ts_grid: np.ndarray, + ys_model: np.ndarray, + target: Optional[Dict] = None, +) -> Tuple[int, int, np.ndarray, float]: + """ + 在最近时间窗口内搜索最优 (period, phase_origin) 组合。 + + 搜索策略: + 1. 确定搜索窗口(min/max 之间,约 2 倍周期) + 2. 在 [base_period * (1 - ratio), base_period * (1 + ratio)] 范围内枚举周期 + 3. 对每个周期,在 origin ± origin_shift 范围内枚举相位原点 + 4. 选择 MAE + 周期偏移惩罚最小的组合 + (惩罚项防止无谓地漂移到远离基准的周期) + + Args: + state: 当前基线状态(提供基准 period、origin、template) + ts_grid: 均匀 1 秒网格的时间戳数组 + ys_model: 平滑后的信号(用于计算 MAE) + target: target dict,可包含 phase_lock_period_search_ratio / + phase_lock_origin_search_ratio 覆盖默认搜索范围 + + Returns: + (best_period, best_origin, best_pred, best_mae) 元组: + - best_period: 最优周期(整数秒) + - best_origin: 最优相位原点(Unix 秒) + - best_pred: 最优参数下的预测值数组(长度为搜索窗口大小) + - best_mae: 最优 MAE + """ + base_period = int(state.period) + base_origin = int(state.phase_origin_ts) + base_template = np.array(state.template, dtype=float) + + # 从 target 读取搜索范围,允许粗铣工位使用更宽的范围 + period_search_ratio = float( + (target or {}).get("phase_lock_period_search_ratio", config.PHASE_LOCK_PERIOD_SEARCH_RATIO) + ) + origin_search_ratio = float( + (target or {}).get("phase_lock_origin_search_ratio", config.PHASE_LOCK_ORIGIN_SEARCH_RATIO) + ) + + # 数据不足时直接返回基准预测 + if base_period <= 1 or len(base_template) <= 1: + ts_recent = ts_grid[-config.DETECT_WINDOW_SECONDS :].astype(int).tolist() + pred = predict_template_values(base_template, base_period, base_origin, ts_recent) + actual = ys_model[-len(ts_recent) :].astype(float) + mae = float(np.mean(np.abs(actual - pred))) if len(actual) else 0.0 + return base_period, base_origin, pred, mae + + # 搜索窗口:约 2 倍周期,clip 到 [min, max] + window_seconds = max( + config.PHASE_LOCK_MIN_WINDOW_SECONDS, + min(config.PHASE_LOCK_MAX_WINDOW_SECONDS, int(base_period * 2)), + ) + + cutoff = ts_grid[-1] - window_seconds + mask = ts_grid >= cutoff + ts_recent_arr = ts_grid[mask].astype(int) + actual = ys_model[mask].astype(float) + + # 窗口内数据不足时退化到固定长度 + if len(ts_recent_arr) < max(10, config.DETECT_WINDOW_SECONDS): + ts_recent_arr = ts_grid[-config.DETECT_WINDOW_SECONDS :].astype(int) + actual = ys_model[-config.DETECT_WINDOW_SECONDS :].astype(float) + + ts_recent = ts_recent_arr.tolist() + last_ts = int(ts_recent[-1]) + + # 周期搜索范围 + p_min = max( + int(config.MIN_PERIOD_SECONDS), + int(round(base_period * (1.0 - period_search_ratio))), + ) + p_max = min( + int(config.MAX_PERIOD_SECONDS), + int(round(base_period * (1.0 + period_search_ratio))), + ) + + # 初始化为基准参数 + best_period = base_period + best_origin = normalize_origin_near(base_origin, base_period, last_ts) + best_template = resample_template(base_template, best_period) + best_pred = predict_template_values( + template=best_template, + period=best_period, + phase_origin_ts=best_origin, + ts_list=ts_recent, + ) + best_mae = float(np.mean(np.abs(actual - best_pred))) + + for period in range(p_min, p_max + 1, config.PHASE_LOCK_PERIOD_STEP): + template = resample_template(base_template, period) + center_origin = normalize_origin_near(base_origin, period, last_ts) + origin_shift = max(2, int(round(period * origin_search_ratio))) + + for shift in range(-origin_shift, origin_shift + 1, config.PHASE_LOCK_ORIGIN_STEP): + origin = center_origin + shift + pred = predict_template_values( + template=template, + period=period, + phase_origin_ts=origin, + ts_list=ts_recent, + ) + mae = float(np.mean(np.abs(actual - pred))) + + # 惩罚项:偏离基准周期越远,惩罚越大(0.5 秒/秒偏差) + # 防止在噪声中漂移到远离真实周期的位置 + penalty = abs(period - base_period) * 0.5 + score = mae + penalty + best_score = best_mae + abs(best_period - base_period) * 0.5 + + if score < best_score: + best_period = period + best_origin = origin + best_pred = pred + best_mae = mae + + # 规整化最终原点到最新时间戳附近 + best_origin = normalize_origin_near(best_origin, best_period, last_ts) + + return int(best_period), int(best_origin), best_pred, float(best_mae) diff --git a/ai/predictor/profiling.py b/ai/predictor/profiling.py new file mode 100644 index 0000000..5330500 --- /dev/null +++ b/ai/predictor/profiling.py @@ -0,0 +1,256 @@ +# -*- coding: utf-8 -*- +""" +predictor.profiling +~~~~~~~~~~~~~~~~~~~ +Layer 2:自适应配置推断。 + +职责: +- 从历史数据统计指标特征(p5/p95/IQR/cv/周期抖动率) +- 自动推断预测策略(phase_point vs phase_band)和阈值 +- 加载人工上下限覆盖文件(override) +- 将 MetricProfile 转换为执行层 target dict +- 完整的发现 + 推断流程(refresh_targets) + +依赖:predictor.storage, predictor.discovery, predictor.signal, predictor.models, predictor.config +""" + +import json +import logging +import os +from typing import Dict, List, Optional + +import numpy as np + +from . import config +from .discovery import discover_device_ids, discover_metrics_for_device +from .models import MetricProfile +from .signal import estimate_period_rough, find_valley_indices +from .storage import fetch_history, normalize_history + +logger = logging.getLogger(__name__) + + +def infer_metric_profile( + vm_url: str, + device_id: str, + metric: str, +) -> Optional[MetricProfile]: + """ + 拉取历史数据,统计活跃段特征,自动推断预测策略和阈值。 + + 推断逻辑: + - 空闲段过滤:排除 p10 以下的点,避免机床空闲时的零值拉低阈值 + - strategy 判断:cv < 0.15 → phase_point(稳定信号),否则 phase_band(波动信号) + - abs_threshold:取 IQR * 0.8、量程 * 0.05、std * 2.0 三者最大值 + - rel_threshold:min(0.30, cv * 1.5) + - band_pad_abs:max(IQR * 0.3, std),覆盖正常尖峰 + - phase_lock_period_search_ratio:由周期抖动率动态决定,clip 到 [0.12, 0.25] + + Args: + vm_url: VM HTTP 地址 + device_id: 设备标识 + metric: 指标名 + + Returns: + MetricProfile,数据不足时返回 None。 + """ + ts_raw, ys_raw = fetch_history( + vm_url=vm_url, + query=f'{metric}{{device_id="{device_id}"}}', + minutes=config.HISTORY_MINUTES, + ) + + if len(ys_raw) < config.MIN_POINTS: + return None + + arr = np.array(ys_raw, dtype=float) + + # 过滤空闲段:只保留活跃值(高于 p10) + p10_val = float(np.percentile(arr, 10)) + active = arr[arr > p10_val] + if len(active) < 30: + active = arr # 数据全是活跃段,不过滤 + + mean_val = float(np.mean(active)) + std_val = float(np.std(active)) + cv = std_val / max(abs(mean_val), 1e-6) + p5 = float(np.percentile(active, 5)) + p95 = float(np.percentile(active, 95)) + iqr = p95 - p5 + + # 策略自动判断:cv 衡量信号稳定性 + strategy = "phase_point" if cv < 0.15 else "phase_band" + + # 阈值自动计算 + abs_threshold = max(iqr * 0.8, (p95 - p5) * 0.05, std_val * 2.0) + rel_threshold = min(0.30, cv * 1.5) + + # phase_band 容忍带宽度 + band_pad_abs = max(iqr * 0.3, std_val) + + # phase-lock 搜索范围:从历史数据估算周期抖动率 + ts_grid, ys_grid = normalize_history(ts_raw, ys_raw) + period_search_ratio = config.PHASE_LOCK_PERIOD_SEARCH_RATIO # 默认值 + + if len(ys_grid) >= config.MIN_POINTS: + rough_period = estimate_period_rough(ys_grid) + if rough_period > config.MIN_PERIOD_SECONDS: + valleys = find_valley_indices(ts_grid, ys_grid, rough_period) + if len(valleys) >= 3: + diffs = np.diff(ts_grid[valleys].astype(float)) + valid = diffs[ + (diffs > rough_period * 0.5) & (diffs < rough_period * 2.0) + ] + if len(valid) >= 2: + # 周期变异系数 * 2 作为搜索范围,clip 到 [0.12, 0.25] + period_cv = float(np.std(valid) / max(np.mean(valid), 1e-6)) + period_search_ratio = float(np.clip(period_cv * 2.0, 0.12, 0.25)) + + logger.info( + "推断指标特征 device=%s metric=%s cv=%.3f strategy=%s " + "abs_thr=%.3f rel_thr=%.3f period_search=%.2f", + device_id, metric, cv, strategy, + abs_threshold, rel_threshold, period_search_ratio, + ) + + return MetricProfile( + device_id=device_id, + metric=metric, + p5=p5, + p95=p95, + iqr=iqr, + cv=cv, + strategy=strategy, + abs_threshold=abs_threshold, + rel_threshold=rel_threshold, + band_low_q=5.0, + band_high_q=95.0, + band_pad_abs=band_pad_abs, + phase_lock_period_search_ratio=period_search_ratio, + ) + + +def load_overrides(path: str) -> Dict: + """ + 加载人工上下限覆盖文件,文件不存在时返回空字典。 + + 文件格式(JSON): + { + "device-id": { + "metric_name": {"hard_max": 35.0, "hard_min": 0.0} + } + } + + Args: + path: 覆盖文件路径 + + Returns: + 覆盖配置字典,文件不存在或解析失败时返回空字典。 + """ + if not os.path.exists(path): + return {} + try: + with open(path, "r", encoding="utf-8") as f: + return json.load(f) + except Exception as e: + logger.warning("加载 override 文件失败 %s: %s", path, e) + return {} + + +def build_target(profile: MetricProfile, overrides: Dict) -> Dict: + """ + 将 MetricProfile 转换为预测执行层可用的 target dict。 + + target dict 包含 run_once() 所需的全部配置: + - query / pred_metric / anomaly_metric + - strategy / 阈值 / 平滑窗口 + - phase-lock 搜索范围 + - 物理上下限(可选,来自 override 文件) + + Args: + profile: 从历史数据推断出的指标特征 + overrides: 覆盖配置字典(来自 load_overrides) + + Returns: + target dict。 + """ + device_overrides = overrides.get(profile.device_id, {}).get(profile.metric, {}) + + target: Dict = { + "query": f'{profile.metric}{{device_id="{profile.device_id}"}}', + "pred_metric": f"{profile.metric}_predicted", + "anomaly_metric": f"{profile.metric}_anomaly", + "strategy": profile.strategy, + "abs_threshold": profile.abs_threshold, + "rel_threshold": profile.rel_threshold, + # phase_band 用更大的平滑窗口抑制脉冲噪声 + "smooth_window": 5 if profile.strategy == "phase_band" else 2, + "outside_ratio_threshold": 0.60, + "min_consecutive_outside": 5, + "severe_exceed_ratio": 1.8, + "phase_lock_period_search_ratio": profile.phase_lock_period_search_ratio, + # origin 搜索范围约为 period 搜索范围的 2.5 倍 + "phase_lock_origin_search_ratio": min( + 0.45, profile.phase_lock_period_search_ratio * 2.5 + ), + # 物理上下限(可选,来自 override 文件) + "hard_max": device_overrides.get("hard_max"), + "hard_min": device_overrides.get("hard_min"), + } + + if profile.strategy == "phase_band": + target.update({ + "band_low_q": profile.band_low_q, + "band_high_q": profile.band_high_q, + "band_pad_abs": profile.band_pad_abs, + }) + + return target + + +def refresh_targets( + vm_url: str, + monitored_metrics: List[str], + override_path: str, +) -> List[Dict]: + """ + 完整的发现 + 推断流程:发现所有设备,推断所有指标的配置,返回 target list。 + + 流程: + 1. 从 VM 发现所有 device_id + 2. 对每个设备探测哪些指标有数据 + 3. 对每个有数据的指标推断 MetricProfile + 4. 将 MetricProfile 转换为 target dict + + 若发现失败(无 device_id),返回空列表,由调用方决定是否保留旧列表。 + + Args: + vm_url: VM HTTP 地址 + monitored_metrics: 待探测的指标名候选列表 + override_path: 覆盖文件路径 + + Returns: + target dict 列表,每个元素对应一个 (device_id, metric) 对。 + """ + logger.info("开始发现设备和指标...") + overrides = load_overrides(override_path) + targets: List[Dict] = [] + + device_ids = discover_device_ids(vm_url) + if not device_ids: + logger.warning("未发现任何 device_id") + return [] + + for device_id in device_ids: + metrics = discover_metrics_for_device(vm_url, device_id, monitored_metrics) + for metric in metrics: + profile = infer_metric_profile(vm_url, device_id, metric) + if profile is not None: + targets.append(build_target(profile, overrides)) + + logger.info( + "目标列表已更新:%d 台设备,%d 个指标目标", + len(device_ids), + len(targets), + ) + return targets diff --git a/ai/predictor/service.py b/ai/predictor/service.py new file mode 100644 index 0000000..1e9dca7 --- /dev/null +++ b/ai/predictor/service.py @@ -0,0 +1,333 @@ +# -*- coding: utf-8 -*- +""" +predictor.service +~~~~~~~~~~~~~~~~~ +主服务类:组装所有模块,驱动预测主循环。 + +职责: +- 持有所有运行时状态(baseline_states、last_written、targets) +- 按 TARGETS_REFRESH_INTERVAL 定期重新发现设备和指标 +- 每轮轮询:拉取历史数据 → 更新状态 → 预测 → 写入 VM +- 每轮结束后持久化状态到文件 + +依赖:所有其他 predictor 子模块 +""" + +import logging +import time +from datetime import datetime +from typing import Dict, List, Optional + +from . import config +from .anomaly import calc_final_bounds +from .models import BaselineState +from .profiling import refresh_targets +from .signal import preprocess_values +from .state import maybe_update_state +from .storage import ( + fetch_history, + load_state, + merge_labels, + normalize_history, + parse_labels_from_query, + save_state, + series_key, + write_prediction_bundle, +) +from .template import predict_state_bundle + +logger = logging.getLogger(__name__) + + +class PredictorService: + """ + 预测服务主类。 + + 封装所有运行时状态,支持多实例部署(每个实例独立持有状态)。 + 通过 run() 启动主循环,通过 run_once() 执行单轮预测。 + + Attributes: + _vm_url: VM HTTP 地址 + _state_file: 状态持久化文件路径 + _history_minutes: 拉取历史数据的时间窗口(分钟) + _write_horizon: 实际写入 VM 的预测点数(秒) + _poll_interval: 轮询间隔(秒) + _targets_refresh_interval: 目标列表刷新间隔(秒) + _monitored_metrics: 待监控的指标名列表 + _override_file: 人工上下限覆盖文件路径 + _extra_labels: 写入 VM 时附加的额外标签 + _states: key → BaselineState 的字典(运行时状态) + _last_written: key → 上次写入的真实数据时间戳 + _targets: 当前目标列表 + _targets_last_refresh: 上次刷新目标列表的时间戳 + """ + + def __init__( + self, + vm_url: str = config.VM_URL, + state_file: str = config.STATE_FILE, + history_minutes: int = config.HISTORY_MINUTES, + write_horizon: int = config.WRITE_HORIZON_SECONDS, + poll_interval: int = config.POLL_INTERVAL, + targets_refresh_interval: int = config.TARGETS_REFRESH_INTERVAL, + monitored_metrics: Optional[List[str]] = None, + override_file: str = config.OVERRIDE_FILE, + extra_labels: Optional[Dict[str, str]] = None, + ) -> None: + self._vm_url = vm_url + self._state_file = state_file + self._history_minutes = history_minutes + self._write_horizon = write_horizon + self._poll_interval = poll_interval + self._targets_refresh_interval = targets_refresh_interval + self._monitored_metrics = monitored_metrics or config.MONITORED_METRICS + self._override_file = override_file + self._extra_labels = extra_labels or config.EXTRA_PREDICT_LABELS + + self._states: Dict[str, BaselineState] = {} + self._last_written: Dict[str, int] = {} + self._targets: List[Dict] = [] + self._targets_last_refresh: float = 0.0 + + # ------------------------------------------------------------------ + # 目标列表管理 + # ------------------------------------------------------------------ + + def _refresh_targets_if_needed(self) -> None: + """ + 按 targets_refresh_interval 间隔重新发现设备和指标。 + + 首次调用时立即执行发现。发现失败时保留现有目标列表。 + """ + now = time.time() + if now - self._targets_last_refresh < self._targets_refresh_interval and self._targets: + return + + new_targets = refresh_targets( + vm_url=self._vm_url, + monitored_metrics=self._monitored_metrics, + override_path=self._override_file, + ) + + if new_targets: + self._targets = new_targets + self._targets_last_refresh = now + else: + logger.warning("发现流程未产生任何有效目标,保持现有目标列表") + + # ------------------------------------------------------------------ + # 预测时间轴 + # ------------------------------------------------------------------ + + def _build_prediction_timestamps( + self, + key: str, + last_real_ts: int, + now_sec: int, + ) -> Optional[List[int]]: + """ + 构建预测时间戳列表(从 last_real_ts + 1 开始,共 write_horizon 个点)。 + + 两种情况下跳过写入: + 1. 真实数据延迟过大(数据管道异常) + 2. 真实数据时间戳未推进(重复写入同一批预测) + + Args: + key: 序列标识符 + last_real_ts: 最新真实数据点的时间戳(Unix 秒) + now_sec: 当前时间戳(Unix 秒) + + Returns: + 预测时间戳列表,跳过时返回 None。 + """ + data_lag = now_sec - last_real_ts + + if data_lag > config.MAX_DATA_LAG_SECONDS: + logger.warning( + "真实数据延迟过大,跳过预测 key=%s data_lag=%ss max=%ss", + key, data_lag, config.MAX_DATA_LAG_SECONDS, + ) + return None + + last_written_real_ts = self._last_written.get(key) + if last_written_real_ts is not None and last_real_ts <= int(last_written_real_ts): + logger.info( + "真实数据时间戳未推进,跳过重复写入 key=%s last_real_ts=%s last_written=%s", + key, last_real_ts, last_written_real_ts, + ) + return None + + return [last_real_ts + i + 1 for i in range(self._write_horizon)] + + # ------------------------------------------------------------------ + # 单轮预测 + # ------------------------------------------------------------------ + + def run_once(self) -> None: + """ + 执行一轮预测:遍历所有目标,拉取数据、更新状态、写入预测结果。 + + 每轮结束后将状态持久化到文件。 + """ + now_str = datetime.now().strftime("%H:%M:%S") + + self._refresh_targets_if_needed() + + if not self._targets: + logger.warning("[%s] 目标列表为空,等待设备发现完成", now_str) + return + + for target in self._targets: + query = target["query"] + pred_metric = target["pred_metric"] + anomaly_metric = target["anomaly_metric"] + strategy = target.get("strategy", "phase_point") + smooth_window = int(target.get("smooth_window", 1)) + + # 1. 拉取历史数据 + ts, ys = fetch_history( + vm_url=self._vm_url, + query=query, + minutes=self._history_minutes, + ) + + if len(ys) < config.MIN_POINTS: + logger.info("[%s] %s 数据不足(%d 点),跳过", now_str, query, len(ys)) + continue + + ts_grid, ys_grid_raw = normalize_history(ts, ys) + + if len(ys_grid_raw) < config.MIN_POINTS: + logger.info( + "[%s] %s 清洗后数据不足(%d 点),跳过", + now_str, query, len(ys_grid_raw), + ) + continue + + # 2. 预处理(平滑) + ys_grid_model = preprocess_values(ys_grid_raw, strategy, smooth_window) + + # 3. 构建写入标签 + base_labels = parse_labels_from_query(query) + write_labels = merge_labels(base_labels, self._extra_labels) + key = series_key(pred_metric, write_labels) + + # 4. 更新状态(异常检测 + 模板学习) + ( + state, + is_anomaly, + outside_ratio, + mean_abs_err, + mean_rel_err, + max_outside_seconds, + max_exceed_ratio, + ) = maybe_update_state( + key=key, + ts_grid=ts_grid, + ys_model=ys_grid_model, + ys_actual=ys_grid_raw, + target=target, + states=self._states, + ) + + if state is None: + logger.info("[%s] %s 暂无可用健康模板,等待学习", now_str, query) + continue + + # 5. 构建预测时间戳 + now_sec = int(time.time()) + last_real_ts = int(ts_grid[-1]) + data_lag = now_sec - last_real_ts + + ts_future = self._build_prediction_timestamps( + key=key, + last_real_ts=last_real_ts, + now_sec=now_sec, + ) + + if not ts_future: + continue + + # 6. 预测 + pred_values, lower_raw, upper_raw = predict_state_bundle(state, ts_future) + lower_values, upper_values = calc_final_bounds( + state=state, + pred=pred_values, + lower_raw=lower_raw, + upper_raw=upper_raw, + target=target, + ) + + # 7. 写入 VM + ok = write_prediction_bundle( + vm_url=self._vm_url, + pred_metric=pred_metric, + anomaly_metric=anomaly_metric, + labels=write_labels, + ts_future=ts_future, + pred_values=pred_values, + lower_values=lower_values, + upper_values=upper_values, + is_anomaly=is_anomaly, + outside_ratio=outside_ratio, + mean_abs_err=mean_abs_err, + mean_rel_err=mean_rel_err, + max_outside_seconds=max_outside_seconds, + max_exceed_ratio=max_exceed_ratio, + event_ts=last_real_ts, + ) + + if not ok: + logger.error("[%s] %s 写入预测数据失败", now_str, query) + continue + + self._last_written[key] = last_real_ts + + # 8. 打印摘要日志 + future_start = datetime.fromtimestamp(ts_future[0]).strftime("%H:%M:%S") + future_end = datetime.fromtimestamp(ts_future[-1]).strftime("%H:%M:%S") + last_real_str = datetime.fromtimestamp(last_real_ts).strftime("%H:%M:%S") + origin_str = datetime.fromtimestamp(state.phase_origin_ts).strftime("%H:%M:%S") + + logger.info( + "[%s] %-50s → %-35s strategy=%s status=%s anomaly=%s " + "outside=%.2f max_outside=%ss max_exceed=%.2f " + "period=%ss origin=%s last_real=%s lag=%ss 写入 %d 点,预测区间 %s ~ %s", + now_str, query, pred_metric, + state.strategy, state.status, is_anomaly, + outside_ratio, max_outside_seconds, max_exceed_ratio, + state.period, origin_str, last_real_str, data_lag, + len(ts_future), future_start, future_end, + ) + + save_state(self._state_file, self._states) + + # ------------------------------------------------------------------ + # 主循环 + # ------------------------------------------------------------------ + + def run(self) -> None: + """ + 启动预测服务主循环。 + + 加载持久化状态后进入无限循环,每隔 poll_interval 秒执行一次 run_once()。 + """ + self._states = load_state(self._state_file) + + logger.info( + "预测服务启动 VM=%s 历史窗口=%dmin 理论预测窗口=%ds 实际写入窗口=%ds " + "轮询间隔=%ds state=%s forecast=%s override=%s refresh=%ds", + self._vm_url, + self._history_minutes, + config.HORIZON_SECONDS, + self._write_horizon, + self._poll_interval, + self._state_file, + self._extra_labels.get("forecast", ""), + self._override_file, + self._targets_refresh_interval, + ) + + while True: + self.run_once() + time.sleep(self._poll_interval) diff --git a/ai/predictor/signal.py b/ai/predictor/signal.py new file mode 100644 index 0000000..e948160 --- /dev/null +++ b/ai/predictor/signal.py @@ -0,0 +1,335 @@ +# -*- coding: utf-8 -*- +""" +predictor.signal +~~~~~~~~~~~~~~~~ +纯信号处理与周期估计,不包含任何 IO 操作。 + +职责: +- 滚动中位数、移动平均等平滑算法 +- 基于 FFT + 自相关的周期估计 +- 谷底检测(用于模板构建的相位对齐) +- 原始数据预处理(根据策略选择平滑方式) + +本模块所有函数均为纯函数,输入 numpy 数组,输出 numpy 数组或基本类型。 + +依赖:numpy +""" + +import math +from typing import Dict, List, Tuple + +import numpy as np + +from . import config + + +def rolling_median(arr: np.ndarray, window: int) -> np.ndarray: + """ + 对数组做滚动中位数平滑(边缘用 edge 填充)。 + + 中位数对脉冲噪声鲁棒,适合 phase_band 策略的粗铣负载信号。 + window 自动调整为奇数,保证对称填充。 + + Args: + arr: 输入数组 + window: 滑动窗口大小(秒),<=1 时直接返回原数组 + + Returns: + 平滑后的数组,长度与输入相同。 + """ + if window <= 1 or len(arr) < window: + return arr.astype(float) + + # 保证奇数窗口,使填充对称 + if window % 2 == 0: + window += 1 + + pad = window // 2 + padded = np.pad(arr.astype(float), (pad, pad), mode="edge") + + result = [float(np.median(padded[i : i + window])) for i in range(len(arr))] + return np.array(result, dtype=float) + + +def moving_average(arr: np.ndarray, window: int) -> np.ndarray: + """ + 对数组做均匀权重移动平均(边缘用 edge 填充)。 + + 比滚动中位数快,适合 phase_point 策略的稳定信号。 + window 自动调整为奇数,保证对称填充。 + + Args: + arr: 输入数组 + window: 滑动窗口大小(秒),<=1 时直接返回原数组 + + Returns: + 平滑后的数组,长度与输入相同。 + """ + if window <= 1 or len(arr) < window: + return arr.astype(float) + + if window % 2 == 0: + window += 1 + + kernel = np.ones(window, dtype=float) / window + pad = window // 2 + padded = np.pad(arr.astype(float), (pad, pad), mode="edge") + + return np.convolve(padded, kernel, mode="valid") + + +def preprocess_values( + ys_grid: np.ndarray, + strategy: str, + smooth_window: int, +) -> np.ndarray: + """ + 根据预测策略对原始数据进行预处理平滑。 + + - phase_band:使用滚动中位数(对脉冲噪声鲁棒) + - phase_point:使用移动平均(保留趋势,计算更快) + + Args: + ys_grid: 均匀 1 秒网格上的原始值数组 + strategy: "phase_point" 或 "phase_band" + smooth_window: 平滑窗口大小(秒),<=1 时不平滑 + + Returns: + 平滑后的数组,长度与输入相同。 + """ + if strategy == "phase_band": + return rolling_median(ys_grid, smooth_window) + + if smooth_window > 1: + return moving_average(ys_grid, smooth_window) + + return ys_grid.astype(float) + + +# --------------------------------------------------------------------------- +# 周期估计 +# --------------------------------------------------------------------------- + +def estimate_period_by_fft(ys_arr: np.ndarray) -> float: + """ + 用 FFT 粗估信号的主周期(秒)。 + + 取去均值后的功率谱中能量最大的频率分量,转换为周期。 + 结果被 clip 到 [MIN_PERIOD_SECONDS, MAX_PERIOD_SECONDS]。 + + Args: + ys_arr: 均匀采样的值数组(1 秒间隔) + + Returns: + 估计的周期(秒),浮点数。数据不足或全零时返回 60.0。 + """ + n = len(ys_arr) + if n < 8: + return 60.0 + + centered = ys_arr - np.mean(ys_arr) + if np.allclose(centered, 0): + return 60.0 + + fft_vals = np.fft.rfft(centered) + freqs = np.fft.rfftfreq(n, d=1.0) + + if len(freqs) <= 1: + return 60.0 + + # 跳过直流分量(index 0),找功率最大的频率 + power = np.abs(fft_vals[1:]) + if len(power) == 0 or np.max(power) <= 0: + return 60.0 + + dominant_idx = int(np.argmax(power)) + 1 + dominant_freq = float(freqs[dominant_idx]) + + if dominant_freq <= 0: + return 60.0 + + period = 1.0 / dominant_freq + return float(np.clip(period, config.MIN_PERIOD_SECONDS, config.MAX_PERIOD_SECONDS)) + + +def refine_period_by_autocorr(ys_arr: np.ndarray, init_period: float) -> float: + """ + 用自相关函数在 FFT 粗估周期附近精化周期。 + + 在 [init_period * 0.7, init_period * 1.3] 范围内搜索自相关峰值, + 比 FFT 对非整数周期和噪声更鲁棒。 + + Args: + ys_arr: 均匀采样的值数组 + init_period: FFT 粗估的初始周期(秒) + + Returns: + 精化后的周期(秒),clip 到合法范围。 + """ + n = len(ys_arr) + if n < 20: + return float(np.clip(init_period, config.MIN_PERIOD_SECONDS, config.MAX_PERIOD_SECONDS)) + + centered = ys_arr - np.mean(ys_arr) + if np.allclose(centered, 0): + return float(np.clip(init_period, config.MIN_PERIOD_SECONDS, config.MAX_PERIOD_SECONDS)) + + # 全相关,取正半轴(lag >= 0) + corr = np.correlate(centered, centered, mode="full")[n - 1:] + + p0 = int(round(init_period)) + left = max(int(config.MIN_PERIOD_SECONDS), int(max(2, p0 * 0.7))) + right = min(n // 2, int(max(left + 1, p0 * 1.3))) + + if right <= left: + return float(np.clip(init_period, config.MIN_PERIOD_SECONDS, config.MAX_PERIOD_SECONDS)) + + search = corr[left : right + 1] + if len(search) == 0: + return float(np.clip(init_period, config.MIN_PERIOD_SECONDS, config.MAX_PERIOD_SECONDS)) + + best_lag = left + int(np.argmax(search)) + return float(np.clip(best_lag, config.MIN_PERIOD_SECONDS, config.MAX_PERIOD_SECONDS)) + + +def estimate_period_rough(ys_arr: np.ndarray) -> int: + """ + FFT + 自相关两步法估计信号周期,返回整数秒。 + + 先用 FFT 粗估,再用自相关精化,最后 clip 到合法范围。 + + Args: + ys_arr: 均匀采样的值数组 + + Returns: + 估计的周期(整数秒)。 + """ + p_fft = estimate_period_by_fft(ys_arr) + p_refined = refine_period_by_autocorr(ys_arr, p_fft) + period = int(round(p_refined)) + period = max(int(config.MIN_PERIOD_SECONDS), min(int(config.MAX_PERIOD_SECONDS), period)) + return int(period) + + +# --------------------------------------------------------------------------- +# 谷底检测 +# --------------------------------------------------------------------------- + +def find_valley_indices( + ts_grid: np.ndarray, + ys_grid: np.ndarray, + expected_period: int, +) -> List[int]: + """ + 在时序数据中检测周期性谷底(加工周期的起始点)。 + + 算法步骤: + 1. 对信号做轻度移动平均平滑,抑制高频噪声 + 2. 找低于 VALLEY_QUANTILE 百分位的局部极小值作为候选 + 3. 若候选不足,放宽条件(不限百分位) + 4. 按最小间距过滤,同一间距内保留最低点 + 5. 按周期合理性(0.55~1.60 倍期望周期)清洗 + + Args: + ts_grid: 均匀 1 秒网格的时间戳数组 + ys_grid: 对应的值数组 + expected_period: 预期周期(秒),用于设置最小间距和合理性检查 + + Returns: + 谷底在数组中的索引列表(已排序)。 + 数据不足时返回空列表。 + """ + n = len(ys_grid) + if n < max(10, expected_period * 2): + return [] + + period = max(3, int(expected_period)) + # 平滑窗口约为周期的 8%,最大 21 秒,避免过度平滑 + smooth_window = min(max(3, int(round(period * 0.08))), 21) + ys_smooth = moving_average(ys_grid, smooth_window) + + threshold = float(np.percentile(ys_smooth, config.VALLEY_QUANTILE)) + + # 第一轮:只取低于阈值的局部极小值 + candidates = [ + i for i in range(1, n - 1) + if ( + ys_smooth[i] <= ys_smooth[i - 1] + and ys_smooth[i] < ys_smooth[i + 1] + and ys_smooth[i] <= threshold + ) + ] + + # 候选不足时放宽:取所有局部极小值 + if len(candidates) < config.MIN_FULL_CYCLES_FOR_TEMPLATE: + candidates = [ + i for i in range(1, n - 1) + if ys_smooth[i] <= ys_smooth[i - 1] and ys_smooth[i] < ys_smooth[i + 1] + ] + + if not candidates: + return [] + + # 按最小间距过滤:同一间距内保留最低点 + min_distance = max(2, int(round(period * 0.55))) + selected: List[int] = [] + for idx in candidates: + if not selected: + selected.append(idx) + elif idx - selected[-1] >= min_distance: + selected.append(idx) + elif ys_smooth[idx] < ys_smooth[selected[-1]]: + selected[-1] = idx + + if len(selected) < 2: + return selected + + # 按周期合理性清洗:间距过小则保留更低点,间距过大则直接接受 + cleaned = [selected[0]] + for idx in selected[1:]: + diff = int(ts_grid[idx] - ts_grid[cleaned[-1]]) + if int(period * 0.55) <= diff <= int(period * 1.60): + cleaned.append(idx) + elif diff < int(period * 0.55): + # 间距太小,保留更低的那个 + if ys_smooth[idx] < ys_smooth[cleaned[-1]]: + cleaned[-1] = idx + else: + # 间距过大(可能漏检了一个谷底),直接接受 + cleaned.append(idx) + + return cleaned + + +def detect_period_and_valleys( + ts_grid: np.ndarray, + ys_grid: np.ndarray, +) -> Tuple[int, List[int]]: + """ + 综合估计周期并检测谷底。 + + 先粗估周期,再检测谷底,最后用谷底间距的中位数修正周期。 + 谷底间距的中位数比 FFT 更能反映实际加工节拍。 + + Args: + ts_grid: 均匀 1 秒网格的时间戳数组 + ys_grid: 对应的值数组 + + Returns: + (period, valley_indices) 元组: + - period: 修正后的周期(整数秒) + - valley_indices: 谷底索引列表 + """ + rough = estimate_period_rough(ys_grid) + valleys = find_valley_indices(ts_grid, ys_grid, rough) + + if len(valleys) >= 3: + diffs = np.diff(ts_grid[valleys]) + # 只取合理范围内的间距参与中位数计算 + good = diffs[(diffs >= rough * 0.55) & (diffs <= rough * 1.60)] + period = int(round(float(np.median(good)))) if len(good) > 0 else rough + else: + period = rough + + period = max(int(config.MIN_PERIOD_SECONDS), min(int(config.MAX_PERIOD_SECONDS), period)) + return int(period), valleys diff --git a/ai/predictor/state.py b/ai/predictor/state.py new file mode 100644 index 0000000..d7adedd --- /dev/null +++ b/ai/predictor/state.py @@ -0,0 +1,328 @@ +# -*- coding: utf-8 -*- +""" +predictor.state +~~~~~~~~~~~~~~~ +状态生命周期管理:BaselineState 的创建、更新和 phase-lock 应用。 + +职责: +- 首次见到某指标时初始化健康基线 +- 每轮轮询时运行异常检测,更新状态机(HEALTHY / ANOMALY / RECOVERING) +- 健康/恢复状态下用 EMA 渐进更新模板 +- 将 phase-lock 结果写回 state + +本模块不做任何 IO,states 字典由调用方(service.py)持有和传入。 + +依赖:predictor.template, predictor.anomaly, predictor.config, predictor.models +""" + +import logging +import time +from datetime import datetime +from typing import Dict, Optional, Tuple + +import numpy as np + +from . import config +from .anomaly import detect_anomaly +from .models import ( + BASELINE_STATUS_ANOMALY, + BASELINE_STATUS_HEALTHY, + BASELINE_STATUS_RECOVERING, + BaselineState, +) +from .template import ( + build_current_baseline, + merge_template, + resample_template, +) + +logger = logging.getLogger(__name__) + + +def create_initial_state( + ts_grid: np.ndarray, + ys_model: np.ndarray, + ys_actual: np.ndarray, + target: Dict, + now_sec: int, +) -> Optional[BaselineState]: + """ + 从历史数据构建初始健康基线状态。 + + 首次见到某指标时调用,需要足够的历史数据(MIN_POINTS 个点)。 + + Args: + ts_grid: 均匀 1 秒网格的时间戳数组 + ys_model: 平滑后的信号(用于周期检测和中值模板) + ys_actual: 原始信号(用于分位数模板和量程统计) + target: target dict,包含策略和阈值配置 + now_sec: 当前时间戳(Unix 秒) + + Returns: + 初始化的 BaselineState,数据不足时返回 None。 + """ + strategy = str(target.get("strategy", "phase_point")) + band_low_q = float(target.get("band_low_q", 5.0)) + band_high_q = float(target.get("band_high_q", 95.0)) + + baseline = build_current_baseline( + ts_grid=ts_grid, + ys_mid_grid=ys_model, + ys_band_grid=ys_actual, + strategy=strategy, + band_low_q=band_low_q, + band_high_q=band_high_q, + ) + + if baseline is None: + return None + + period, phase_origin_ts, template, lower_template, upper_template = baseline + + return BaselineState( + period=int(period), + phase_origin_ts=int(phase_origin_ts), + template=template.astype(float).tolist(), + lower_template=lower_template.astype(float).tolist(), + upper_template=upper_template.astype(float).tolist(), + strategy=strategy, + status=BASELINE_STATUS_HEALTHY, + # 初始 clean_seconds 设为多个完整周期,表示已有足够的健康历史 + clean_seconds=int(period * config.MAX_CYCLES_FOR_TEMPLATE), + last_update_ts=now_sec, + last_seen_ts=now_sec, + y_min=float(np.min(ys_actual)), + y_max=float(np.max(ys_actual)), + ) + + +def apply_phase_lock_to_state( + state: BaselineState, + best_period: int, + best_origin: int, +) -> None: + """ + 将 phase-lock 搜索结果写回 state(原地修改)。 + + 若周期发生变化,同时对三条模板做重采样,保持长度一致。 + + Args: + state: 要更新的基线状态(原地修改) + best_period: phase-lock 找到的最优周期(整数秒) + best_origin: phase-lock 找到的最优相位原点(Unix 秒) + """ + best_period = int(best_period) + if best_period <= 1: + return + + # 周期变化时重采样三条模板 + if len(state.template) != best_period: + state.template = resample_template( + np.array(state.template, dtype=float), best_period + ).astype(float).tolist() + + if len(state.lower_template) != best_period: + state.lower_template = resample_template( + np.array(state.lower_template, dtype=float), best_period + ).astype(float).tolist() + + if len(state.upper_template) != best_period: + state.upper_template = resample_template( + np.array(state.upper_template, dtype=float), best_period + ).astype(float).tolist() + + state.period = best_period + state.phase_origin_ts = int(best_origin) + + +def maybe_update_state( + key: str, + ts_grid: np.ndarray, + ys_model: np.ndarray, + ys_actual: np.ndarray, + target: Dict, + states: Dict[str, BaselineState], +) -> Tuple[Optional[BaselineState], bool, float, float, float, int, float]: + """ + 核心状态更新函数:检测异常并按状态机规则更新基线。 + + 状态机转换: + - 无状态 → 初始化 → HEALTHY(返回,本轮不做异常检测) + - HEALTHY + 异常 → ANOMALY(冻结模板) + - ANOMALY + 正常 → RECOVERING(开始计时) + - RECOVERING + 正常 + 足够时间 → HEALTHY(恢复学习) + - HEALTHY/RECOVERING + 正常 + 足够时间 → 更新模板(EMA) + + Args: + key: 序列唯一标识符(用于 states 字典的键) + ts_grid: 均匀 1 秒网格的时间戳数组 + ys_model: 平滑后的信号 + ys_actual: 原始信号 + target: target dict,包含策略和阈值配置 + states: 所有指标的状态字典(由 PredictorService 持有,原地修改) + + Returns: + (state, is_anomaly, outside_ratio, mean_abs_err, mean_rel_err, + max_outside_seconds, max_exceed_ratio) + state 为 None 表示数据不足,本轮跳过。 + """ + now_sec = int(time.time()) + state = states.get(key) + + # 首次见到该指标:初始化健康基线 + if state is None: + state = create_initial_state( + ts_grid=ts_grid, + ys_model=ys_model, + ys_actual=ys_actual, + target=target, + now_sec=now_sec, + ) + + if state is None: + return None, False, 0.0, 0.0, 0.0, 0, 0.0 + + states[key] = state + logger.info( + "初始化健康模板 key=%s strategy=%s period=%ss origin=%s clean=%ss", + key, + state.strategy, + state.period, + datetime.fromtimestamp(state.phase_origin_ts).strftime("%H:%M:%S"), + state.clean_seconds, + ) + return state, False, 0.0, 0.0, 0.0, 0, 0.0 + + # 计算距上次处理的时间(秒),用于累加 clean_seconds + elapsed = max(1, now_sec - int(state.last_seen_ts)) + elapsed = min(elapsed, config.POLL_INTERVAL * 2) # 防止长时间停机后 clean_seconds 暴增 + state.last_seen_ts = now_sec + + ( + is_anomaly, + outside_ratio, + mean_abs_err, + mean_rel_err, + best_period, + best_origin, + max_outside_seconds, + max_exceed_ratio, + ) = detect_anomaly( + state=state, + ts_grid=ts_grid, + ys_model=ys_model, + ys_actual=ys_actual, + target=target, + ) + + # 异常:冻结模板,不学习故障数据 + if is_anomaly: + state.status = BASELINE_STATUS_ANOMALY + state.clean_seconds = 0 + states[key] = state + logger.warning( + "检测到异常,冻结模板 key=%s outside_ratio=%.2f max_outside=%ss " + "max_exceed_ratio=%.2f mean_abs_err=%.4f mean_rel_err=%.4f", + key, outside_ratio, max_outside_seconds, + max_exceed_ratio, mean_abs_err, mean_rel_err, + ) + return state, True, outside_ratio, mean_abs_err, mean_rel_err, max_outside_seconds, max_exceed_ratio + + # 正常:应用 phase-lock 结果 + old_period = int(state.period) + old_origin = int(state.phase_origin_ts) + apply_phase_lock_to_state(state, best_period, best_origin) + + if old_period != state.period or old_origin != state.phase_origin_ts: + logger.info( + "phase-lock key=%s period %s -> %s origin %s -> %s", + key, old_period, state.period, + datetime.fromtimestamp(old_origin).strftime("%H:%M:%S"), + datetime.fromtimestamp(state.phase_origin_ts).strftime("%H:%M:%S"), + ) + + # 异常刚消失:进入恢复期,等待稳定后再恢复学习 + if state.status == BASELINE_STATUS_ANOMALY: + state.status = BASELINE_STATUS_RECOVERING + state.clean_seconds = elapsed + states[key] = state + logger.info("异常开始恢复 key=%s clean_seconds=%ss", key, state.clean_seconds) + return state, False, outside_ratio, mean_abs_err, mean_rel_err, max_outside_seconds, max_exceed_ratio + + # 累加健康时间 + if state.status == BASELINE_STATUS_RECOVERING: + state.clean_seconds += elapsed + else: + state.status = BASELINE_STATUS_HEALTHY + state.clean_seconds += elapsed + + # 健康时间不足:不更新模板 + min_clean_for_update = max( + config.RECOVERY_MIN_SECONDS, + int(state.period) * config.MIN_FULL_CYCLES_FOR_TEMPLATE, + ) + if state.clean_seconds < min_clean_for_update: + states[key] = state + return state, False, outside_ratio, mean_abs_err, mean_rel_err, max_outside_seconds, max_exceed_ratio + + # 健康时间足够:用最近数据更新模板(EMA) + tail_seconds = min( + int(state.clean_seconds), + int(state.period) * config.MAX_CYCLES_FOR_TEMPLATE, + ) + + strategy = str(target.get("strategy", "phase_point")) + band_low_q = float(target.get("band_low_q", 5.0)) + band_high_q = float(target.get("band_high_q", 95.0)) + + baseline = build_current_baseline( + ts_grid=ts_grid, + ys_mid_grid=ys_model, + ys_band_grid=ys_actual, + strategy=strategy, + band_low_q=band_low_q, + band_high_q=band_high_q, + tail_seconds=tail_seconds, + ) + + if baseline is None: + states[key] = state + return state, False, outside_ratio, mean_abs_err, mean_rel_err, max_outside_seconds, max_exceed_ratio + + new_period, new_origin, new_template, new_lower_template, new_upper_template = baseline + + # 恢复期用更激进的 alpha,加速追赶真实信号 + alpha = config.RECOVERY_EMA_ALPHA if state.status == BASELINE_STATUS_RECOVERING else config.HEALTHY_EMA_ALPHA + + state.template = merge_template( + np.array(state.template, dtype=float), new_template, alpha + ).astype(float).tolist() + state.lower_template = merge_template( + np.array(state.lower_template, dtype=float), new_lower_template, alpha + ).astype(float).tolist() + state.upper_template = merge_template( + np.array(state.upper_template, dtype=float), new_upper_template, alpha + ).astype(float).tolist() + + state.period = int(new_period) + state.phase_origin_ts = int(new_origin) + state.status = BASELINE_STATUS_HEALTHY + state.last_update_ts = now_sec + + # 更新量程统计(用于 Grafana 展示) + if tail_seconds > 0 and len(ys_actual) >= tail_seconds: + state.y_min = float(np.min(ys_actual[-tail_seconds:])) + state.y_max = float(np.max(ys_actual[-tail_seconds:])) + else: + state.y_min = float(np.min(ys_actual)) + state.y_max = float(np.max(ys_actual)) + + states[key] = state + logger.info( + "更新健康模板 key=%s strategy=%s period=%ss origin=%s clean=%ss alpha=%.2f", + key, state.strategy, state.period, + datetime.fromtimestamp(state.phase_origin_ts).strftime("%H:%M:%S"), + state.clean_seconds, alpha, + ) + + return state, False, outside_ratio, mean_abs_err, mean_rel_err, max_outside_seconds, max_exceed_ratio diff --git a/ai/predictor/storage.py b/ai/predictor/storage.py new file mode 100644 index 0000000..f614528 --- /dev/null +++ b/ai/predictor/storage.py @@ -0,0 +1,438 @@ +# -*- coding: utf-8 -*- +""" +predictor.storage +~~~~~~~~~~~~~~~~~ +VictoriaMetrics 读写层,封装所有网络 IO。 + +职责: +- 从 VM 拉取历史时序数据(query_range) +- 将预测结果和异常指标写入 VM(import/prometheus) +- 标签字符串的序列化与解析 +- 状态文件的持久化读写 + +本模块不包含任何预测或异常检测逻辑,只负责数据的搬运和格式转换。 + +依赖:requests, numpy +""" + +import json +import logging +import math +import os +import re +from dataclasses import asdict +from datetime import datetime, timedelta +from typing import Dict, List, Optional, Tuple + +import numpy as np +import requests + +from .models import BaselineState + +logger = logging.getLogger(__name__) + +# 用于解析 PromQL 标签字符串的正则,匹配 key="value" 格式 +_LABEL_PATTERN = re.compile( + r'\s*([a-zA-Z_][a-zA-Z0-9_]*)\s*=\s*"((?:\\.|[^"])*)"\s*' +) + + +# --------------------------------------------------------------------------- +# 历史数据读取 +# --------------------------------------------------------------------------- + +def fetch_history( + vm_url: str, + query: str, + minutes: int, + step: str = "1s", +) -> Tuple[List[float], List[float]]: + """ + 从 VictoriaMetrics 拉取指定查询的历史时序数据。 + + Args: + vm_url: VM HTTP 地址,如 "http://localhost:8428" + query: PromQL 查询表达式,如 'feed_rate{device_id="fanuc-cnc"}' + minutes: 向前拉取多少分钟的历史数据 + step: 查询步长,默认 "1s"(每秒一个点) + + Returns: + (timestamps, values) 两个列表,长度相同。 + 如果查询失败或无数据,返回两个空列表。 + """ + now = datetime.now() + start = now - timedelta(minutes=minutes) + + try: + resp = requests.get( + f"{vm_url}/api/v1/query_range", + params={ + "query": query, + "start": start.timestamp(), + "end": now.timestamp(), + "step": step, + }, + timeout=10, + ) + resp.raise_for_status() + except requests.RequestException as e: + logger.error("拉取历史数据失败 query=%s: %s", query, e) + return [], [] + + try: + result = resp.json().get("data", {}).get("result", []) + except Exception as e: + logger.error("解析 VM 响应失败 query=%s: %s", query, e) + return [], [] + + if not result: + return [], [] + + ts_list: List[float] = [] + ys_list: List[float] = [] + + for item in result[0].get("values", []): + if len(item) < 2: + continue + try: + t = float(item[0]) + y = float(item[1]) + except (TypeError, ValueError): + continue + # 过滤 NaN / Inf,防止后续 numpy 计算出错 + if math.isfinite(t) and math.isfinite(y): + ts_list.append(t) + ys_list.append(y) + + return ts_list, ys_list + + +def normalize_history( + ts: List[float], + ys: List[float], +) -> Tuple[np.ndarray, np.ndarray]: + """ + 将原始时序数据规整化为均匀 1 秒间隔的网格。 + + 处理步骤: + 1. 去重(同一秒内取最后一个值) + 2. 按时间戳排序 + 3. 线性插值填充缺失秒 + + Args: + ts: 原始时间戳列表(Unix 秒,可以是浮点数) + ys: 对应的值列表 + + Returns: + (ts_grid, ys_grid) 均匀 1 秒间隔的 numpy 数组。 + 如果输入无效,返回两个空数组。 + """ + if not ts or not ys or len(ts) != len(ys): + return np.array([]), np.array([]) + + # 去重:同一秒取最后写入的值 + data: Dict[int, float] = {} + for t, y in zip(ts, ys): + try: + sec = int(round(float(t))) + val = float(y) + except (TypeError, ValueError): + continue + if math.isfinite(sec) and math.isfinite(val): + data[sec] = val + + if not data: + return np.array([]), np.array([]) + + sorted_items = sorted(data.items()) + ts_clean = np.array([x[0] for x in sorted_items], dtype=float) + ys_clean = np.array([x[1] for x in sorted_items], dtype=float) + + if len(ts_clean) < 2: + return ts_clean, ys_clean + + start_sec = int(ts_clean[0]) + end_sec = int(ts_clean[-1]) + + if end_sec <= start_sec: + return ts_clean, ys_clean + + # 构建均匀网格并插值 + ts_grid = np.arange(start_sec, end_sec + 1, 1, dtype=float) + ys_grid = np.interp(ts_grid, ts_clean, ys_clean) + + return ts_grid, ys_grid + + +# --------------------------------------------------------------------------- +# 标签工具 +# --------------------------------------------------------------------------- + +def prom_escape_label_value(value: str) -> str: + """对 Prometheus 标签值进行转义,处理反斜杠、换行符和双引号。""" + return ( + str(value) + .replace("\\", "\\\\") + .replace("\n", "\\n") + .replace('"', '\\"') + ) + + +def labels_to_str(labels: Dict[str, str]) -> str: + """ + 将标签字典序列化为 Prometheus 格式的标签字符串。 + + Example: + {"device_id": "fanuc-cnc", "source": "protoforge"} + → '{device_id="fanuc-cnc",source="protoforge"}' + """ + if not labels: + return "" + parts = [ + f'{k}="{prom_escape_label_value(labels[k])}"' + for k in sorted(labels) + ] + return "{" + ",".join(parts) + "}" + + +def parse_labels_from_query(query: str) -> Dict[str, str]: + """ + 从 PromQL 查询字符串中提取标签字典。 + + Example: + 'feed_rate{device_id="fanuc-cnc"}' → {"device_id": "fanuc-cnc"} + """ + labels: Dict[str, str] = {} + + if "{" not in query or "}" not in query: + return labels + + try: + label_part = query[query.index("{") + 1 : query.rindex("}")] + except ValueError: + return labels + + for match in _LABEL_PATTERN.finditer(label_part): + key = match.group(1) + value = ( + match.group(2) + .replace('\\"', '"') + .replace("\\n", "\n") + .replace("\\\\", "\\") + ) + labels[key] = value + + return labels + + +def merge_labels(*dicts: Dict[str, str]) -> Dict[str, str]: + """合并多个标签字典,后面的字典覆盖前面的同名键。""" + result: Dict[str, str] = {} + for d in dicts: + if d: + result.update(d) + return result + + +def series_key(metric_name: str, labels: Dict[str, str]) -> str: + """生成唯一的序列标识符,用于 BaselineState 字典的键。""" + return metric_name + labels_to_str(labels) + + +# --------------------------------------------------------------------------- +# 数据写入 +# --------------------------------------------------------------------------- + +def write_series( + vm_url: str, + metric_name: str, + labels: Dict[str, str], + ts_list: List[int], + values: List[float], +) -> bool: + """ + 将一条时序数据写入 VictoriaMetrics(Prometheus remote write 格式)。 + + Args: + vm_url: VM HTTP 地址 + metric_name: 指标名 + labels: 标签字典 + ts_list: 时间戳列表(Unix 秒) + values: 对应的值列表 + + Returns: + 写入成功返回 True,否则返回 False。 + """ + if not ts_list or not values or len(ts_list) != len(values): + return False + + label_str = labels_to_str(labels) + lines: List[str] = [] + + for t, y in zip(ts_list, values): + try: + ts_sec = int(round(float(t))) + val = float(y) + except (TypeError, ValueError): + continue + if math.isfinite(ts_sec) and math.isfinite(val): + # VM 使用毫秒时间戳 + lines.append(f"{metric_name}{label_str} {val:.6f} {ts_sec * 1000}") + + if not lines: + return False + + payload = "\n".join(lines) + "\n" + + try: + resp = requests.post( + f"{vm_url}/api/v1/import/prometheus", + data=payload.encode("utf-8"), + headers={"Content-Type": "text/plain; version=0.0.4; charset=utf-8"}, + timeout=10, + ) + resp.raise_for_status() + return True + except requests.RequestException as e: + logger.error("写入数据失败 metric=%s: %s", metric_name, e) + return False + + +def write_prediction_bundle( + vm_url: str, + pred_metric: str, + anomaly_metric: str, + labels: Dict[str, str], + ts_future: List[int], + pred_values: np.ndarray, + lower_values: np.ndarray, + upper_values: np.ndarray, + is_anomaly: bool, + outside_ratio: float, + mean_abs_err: float, + mean_rel_err: float, + max_outside_seconds: int, + max_exceed_ratio: float, + event_ts: int, +) -> bool: + """ + 一次性写入一个指标的完整预测结果包,包含: + - 预测中值曲线(pred_metric) + - 预测下界曲线(pred_metric_lower) + - 预测上界曲线(pred_metric_upper) + - 异常标志(anomaly_metric,0 或 1) + - 各项异常诊断指标(outside_ratio、mean_abs_error 等) + + Args: + vm_url: VM HTTP 地址 + pred_metric: 预测指标名,如 "feed_rate_predicted" + anomaly_metric: 异常指标名,如 "feed_rate_anomaly" + labels: 写入时附加的标签 + ts_future: 预测时间戳列表(未来时刻,Unix 秒) + pred_values: 预测中值数组 + lower_values: 预测下界数组 + upper_values: 预测上界数组 + is_anomaly: 当前是否判定为异常 + outside_ratio: 检测窗口内越界点比例 + mean_abs_err: 平均绝对误差 + mean_rel_err: 平均相对误差 + max_outside_seconds: 最长连续越界秒数 + max_exceed_ratio: 最大越界倍数(相对于边界宽度) + event_ts: 异常诊断指标的时间戳(通常为最后一个真实数据点的时间戳) + + Returns: + 所有写入均成功返回 True,任意一个失败返回 False。 + """ + # 异常诊断指标附加 type 标签,便于在 Grafana 中过滤 + anomaly_labels = {**labels, "type": "prediction_deviation"} + + results = [ + write_series(vm_url, pred_metric, labels, + ts_future, pred_values.tolist()), + write_series(vm_url, f"{pred_metric}_lower", labels, + ts_future, lower_values.tolist()), + write_series(vm_url, f"{pred_metric}_upper", labels, + ts_future, upper_values.tolist()), + write_series(vm_url, anomaly_metric, anomaly_labels, + [event_ts], [1.0 if is_anomaly else 0.0]), + write_series(vm_url, f"{anomaly_metric}_outside_ratio", anomaly_labels, + [event_ts], [outside_ratio]), + write_series(vm_url, f"{anomaly_metric}_mean_abs_error", anomaly_labels, + [event_ts], [mean_abs_err]), + write_series(vm_url, f"{anomaly_metric}_mean_rel_error", anomaly_labels, + [event_ts], [mean_rel_err]), + write_series(vm_url, f"{anomaly_metric}_max_consecutive_outside", anomaly_labels, + [event_ts], [float(max_outside_seconds)]), + write_series(vm_url, f"{anomaly_metric}_max_exceed_ratio", anomaly_labels, + [event_ts], [float(max_exceed_ratio)]), + ] + + return all(results) + + +# --------------------------------------------------------------------------- +# 状态持久化 +# --------------------------------------------------------------------------- + +def load_state(path: str) -> Dict[str, BaselineState]: + """ + 从 JSON 文件加载所有指标的基线状态。 + + 文件不存在时返回空字典(正常首次启动情况)。 + 字段不完整的条目会被跳过,不会导致整体加载失败。 + + Args: + path: 状态文件路径 + + Returns: + key → BaselineState 的字典 + """ + if not os.path.exists(path): + return {} + + try: + with open(path, "r", encoding="utf-8") as f: + raw = json.load(f) + except Exception as e: + logger.warning("加载状态文件失败,将重新学习: %s", e) + return {} + + required_fields = { + "period", "phase_origin_ts", "template", "lower_template", + "upper_template", "strategy", "status", "clean_seconds", + "last_update_ts", "last_seen_ts", "y_min", "y_max", + } + + states: Dict[str, BaselineState] = {} + for key, value in raw.get("baseline_states", {}).items(): + if required_fields.issubset(value.keys()): + states[key] = BaselineState(**value) + + logger.info("已加载状态文件 %s,共 %d 条记录", path, len(states)) + return states + + +def save_state(path: str, states: Dict[str, BaselineState]) -> None: + """ + 将所有指标的基线状态原子写入 JSON 文件。 + + 使用临时文件 + os.replace 保证写入原子性, + 避免进程崩溃时产生损坏的状态文件。 + + Args: + path: 状态文件路径 + states: key → BaselineState 的字典 + """ + try: + raw = { + "baseline_states": { + key: asdict(state) + for key, state in states.items() + } + } + tmp_path = path + ".tmp" + with open(tmp_path, "w", encoding="utf-8") as f: + json.dump(raw, f, ensure_ascii=False, indent=2) + os.replace(tmp_path, path) + except Exception as e: + logger.warning("保存状态文件失败: %s", e) diff --git a/ai/predictor/template.py b/ai/predictor/template.py new file mode 100644 index 0000000..86d8170 --- /dev/null +++ b/ai/predictor/template.py @@ -0,0 +1,384 @@ +# -*- coding: utf-8 -*- +""" +predictor.template +~~~~~~~~~~~~~~~~~~ +模板的构建、预测、重采样与融合,不包含任何 IO 操作。 + +职责: +- 从历史谷底片段构建周期模板(中值/分位数) +- 基于模板和相位原点预测未来值 +- 模板重采样(周期变化时对齐长度) +- EMA 融合新旧模板(渐进式学习) +- 相位原点规整化 + +依赖:numpy, predictor.signal, predictor.config, predictor.models +""" + +import math +from typing import List, Optional, Tuple + +import numpy as np + +from . import config +from .models import BaselineState +from .signal import moving_average + + +# --------------------------------------------------------------------------- +# 模板构建 +# --------------------------------------------------------------------------- + +def build_templates_from_valleys( + ts_grid: np.ndarray, + ys_mid_grid: np.ndarray, + ys_band_grid: np.ndarray, + period: int, + valleys: List[int], + strategy: str, + band_low_q: float, + band_high_q: float, +) -> Optional[Tuple[np.ndarray, np.ndarray, np.ndarray]]: + """ + 从历史谷底片段构建三条模板曲线(中值、下界、上界)。 + + 每个相邻谷底对定义一个周期片段,将其重采样到统一的 period 长度, + 再按策略聚合: + - phase_point:加权平均(越近的周期权重越高) + - phase_band:中位数 + 分位数(对异常周期鲁棒) + + Args: + ts_grid: 均匀 1 秒网格的时间戳数组 + ys_mid_grid: 用于构建中值模板的平滑信号(phase_point 用) + ys_band_grid: 用于构建分位数模板的原始信号(phase_band 用) + period: 目标模板长度(秒) + valleys: 谷底索引列表 + strategy: "phase_point" 或 "phase_band" + band_low_q: phase_band 下界分位数(如 5.0) + band_high_q: phase_band 上界分位数(如 95.0) + + Returns: + (mid_template, lower_template, upper_template) 三个长度为 period 的数组。 + 数据不足时返回 None。 + """ + if period <= 1 or len(valleys) < config.MIN_FULL_CYCLES_FOR_TEMPLATE + 1: + return None + + # 筛选长度合理的周期片段(0.55~1.60 倍期望周期) + pairs = [ + (a, b, float(ts_grid[b] - ts_grid[a])) + for a, b in zip(valleys[:-1], valleys[1:]) + if period * 0.55 <= float(ts_grid[b] - ts_grid[a]) <= period * 1.60 + ] + + if len(pairs) < config.MIN_FULL_CYCLES_FOR_TEMPLATE: + return None + + # 只取最近 MAX_CYCLES_FOR_TEMPLATE 个周期,防止过旧数据污染 + pairs = pairs[-config.MAX_CYCLES_FOR_TEMPLATE:] + + phase_grid = np.arange(period, dtype=float) + mid_segments: List[np.ndarray] = [] + band_segments: List[np.ndarray] = [] + weights: List[float] = [] + + for idx, (a, b, cycle_len) in enumerate(pairs): + seg_ts = ts_grid[a : b + 1] + seg_mid_y = ys_mid_grid[a : b + 1] + seg_band_y = ys_band_grid[a : b + 1] + + if len(seg_mid_y) < 3 or len(seg_band_y) < 3: + continue + + # 将片段的时间轴归一化到 [0, period),再插值到统一相位网格 + x_old = (seg_ts - seg_ts[0]) / cycle_len * period + mid_seg = np.interp(phase_grid, x_old, seg_mid_y) + band_seg = np.interp(phase_grid, x_old, seg_band_y) + + mid_segments.append(mid_seg.astype(float)) + band_segments.append(band_seg.astype(float)) + # 越近的周期权重越高(线性递增,范围 0.5~1.0) + weights.append(0.5 + 0.5 * ((idx + 1) / len(pairs))) + + if len(mid_segments) < config.MIN_FULL_CYCLES_FOR_TEMPLATE: + return None + + mid_arr = np.vstack(mid_segments) + band_arr = np.vstack(band_segments) + w_arr = np.array(weights, dtype=float) + + if strategy == "phase_band": + # 分位数聚合:对异常周期鲁棒,保留正常波动范围 + mid_template = np.percentile(mid_arr, 50, axis=0) + lower_template = np.percentile(band_arr, band_low_q, axis=0) + upper_template = np.percentile(band_arr, band_high_q, axis=0) + else: + # 加权平均:越近的周期贡献越大 + mid_template = np.average(mid_arr, axis=0, weights=w_arr) + lower_template = mid_template.copy() + upper_template = mid_template.copy() + + return ( + mid_template.astype(float), + lower_template.astype(float), + upper_template.astype(float), + ) + + +def build_current_baseline( + ts_grid: np.ndarray, + ys_mid_grid: np.ndarray, + ys_band_grid: np.ndarray, + strategy: str, + band_low_q: float, + band_high_q: float, + tail_seconds: Optional[int] = None, +) -> Optional[Tuple[int, int, np.ndarray, np.ndarray, np.ndarray]]: + """ + 从历史数据构建当前基线(周期 + 相位原点 + 三条模板曲线)。 + + 可选 tail_seconds 参数限制只使用最近一段数据, + 用于健康状态下的增量模板更新(避免使用过旧的异常数据)。 + + Args: + ts_grid: 均匀 1 秒网格的时间戳数组 + ys_mid_grid: 平滑后的信号(用于周期检测和中值模板) + ys_band_grid: 原始信号(用于分位数模板) + strategy: "phase_point" 或 "phase_band" + band_low_q: phase_band 下界分位数 + band_high_q: phase_band 上界分位数 + tail_seconds: 若指定,只使用最近 tail_seconds 秒的数据 + + Returns: + (period, phase_origin_ts, template, lower_template, upper_template) + 数据不足或无法检测到谷底时返回 None。 + """ + from .signal import detect_period_and_valleys + + if len(ys_mid_grid) < config.MIN_POINTS or len(ys_band_grid) < config.MIN_POINTS: + return None + + if tail_seconds is not None and tail_seconds > 0: + cutoff = ts_grid[-1] - int(tail_seconds) + mask = ts_grid >= cutoff + ts_use = ts_grid[mask] + ys_mid_use = ys_mid_grid[mask] + ys_band_use = ys_band_grid[mask] + else: + ts_use = ts_grid + ys_mid_use = ys_mid_grid + ys_band_use = ys_band_grid + + if len(ys_mid_use) < config.MIN_POINTS or len(ys_band_use) < config.MIN_POINTS: + return None + + period, valleys = detect_period_and_valleys(ts_use, ys_mid_use) + + templates = build_templates_from_valleys( + ts_grid=ts_use, + ys_mid_grid=ys_mid_use, + ys_band_grid=ys_band_use, + period=period, + valleys=valleys, + strategy=strategy, + band_low_q=band_low_q, + band_high_q=band_high_q, + ) + + if templates is None or len(valleys) == 0: + return None + + template, lower_template, upper_template = templates + # 以最后一个谷底作为相位原点 + phase_origin_ts = int(round(float(ts_use[valleys[-1]]))) + + return int(period), phase_origin_ts, template, lower_template, upper_template + + +# --------------------------------------------------------------------------- +# 模板预测 +# --------------------------------------------------------------------------- + +def circular_template_value(template: np.ndarray, phase: float) -> float: + """ + 从模板中读取指定相位处的值(线性插值,循环边界)。 + + Args: + template: 长度为 period 的模板数组 + phase: 相位(0 到 period 之间的浮点数) + + Returns: + 插值后的模板值。 + """ + period = len(template) + if period == 0: + return 0.0 + + phase = float(phase) % period + i0 = int(math.floor(phase)) % period + i1 = (i0 + 1) % period + frac = phase - math.floor(phase) + + return float((1.0 - frac) * template[i0] + frac * template[i1]) + + +def resample_template(old_template: np.ndarray, new_period: int) -> np.ndarray: + """ + 将模板重采样到新的周期长度。 + + 当 phase-lock 检测到周期漂移时,需要将旧模板拉伸/压缩到新周期。 + 使用循环扩展(拼接三份)保证边界处插值正确。 + + Args: + old_template: 原始模板数组 + new_period: 目标周期长度(秒) + + Returns: + 重采样后的模板数组,长度为 new_period。 + """ + old_period = len(old_template) + if old_period == new_period: + return old_template.astype(float) + + if old_period <= 1 or new_period <= 1: + return np.full(new_period, float(np.mean(old_template)), dtype=float) + + # 归一化到 [0, 1) 相位空间,循环扩展保证边界插值正确 + old_x = np.linspace(0.0, 1.0, old_period, endpoint=False) + new_x = np.linspace(0.0, 1.0, new_period, endpoint=False) + + old_x_ext = np.concatenate([old_x - 1.0, old_x, old_x + 1.0]) + old_y_ext = np.concatenate([old_template, old_template, old_template]) + + return np.interp(new_x, old_x_ext, old_y_ext).astype(float) + + +def predict_template_values( + template: np.ndarray, + period: int, + phase_origin_ts: int, + ts_list: List[int], +) -> np.ndarray: + """ + 根据模板和相位原点,预测一组时间戳处的值。 + + 相位 = (ts - phase_origin_ts) mod period, + 再从模板中线性插值读取对应值。 + + Args: + template: 长度为 period 的模板数组 + period: 周期(秒) + phase_origin_ts: 相位原点时间戳(Unix 秒) + ts_list: 待预测的时间戳列表(Unix 秒) + + Returns: + 预测值数组,长度与 ts_list 相同。 + """ + if period <= 1: + return np.zeros(len(ts_list), dtype=float) + + if len(template) != period: + template = resample_template(template, period) + + values = [ + circular_template_value(template, (int(ts) - int(phase_origin_ts)) % period) + for ts in ts_list + ] + return np.array(values, dtype=float) + + +def predict_state_bundle( + state: BaselineState, + ts_list: List[int], +) -> Tuple[np.ndarray, np.ndarray, np.ndarray]: + """ + 用 BaselineState 中的三条模板预测一组时间戳处的值。 + + Args: + state: 当前基线状态(包含 period、phase_origin_ts、三条模板) + ts_list: 待预测的时间戳列表(Unix 秒) + + Returns: + (mid, lower, upper) 三个预测数组,长度与 ts_list 相同。 + """ + period = int(state.period) + origin = int(state.phase_origin_ts) + + mid = predict_template_values( + template=np.array(state.template, dtype=float), + period=period, + phase_origin_ts=origin, + ts_list=ts_list, + ) + lower = predict_template_values( + template=np.array(state.lower_template, dtype=float), + period=period, + phase_origin_ts=origin, + ts_list=ts_list, + ) + upper = predict_template_values( + template=np.array(state.upper_template, dtype=float), + period=period, + phase_origin_ts=origin, + ts_list=ts_list, + ) + return mid, lower, upper + + +def normalize_origin_near(origin: int, period: int, near_ts: int) -> int: + """ + 将相位原点规整化到 near_ts 附近(使 origin <= near_ts < origin + period)。 + + phase-lock 搜索时需要将原点移到最近的时间窗口内, + 避免因原点过旧导致相位计算溢出。 + + Args: + origin: 当前相位原点(Unix 秒) + period: 周期(秒) + near_ts: 目标时间戳(通常为最新数据点的时间戳) + + Returns: + 规整化后的相位原点(Unix 秒)。 + """ + if period <= 1: + return origin + + origin = int(origin) + period = int(period) + near_ts = int(near_ts) + + while origin + period <= near_ts: + origin += period + + while origin > near_ts: + origin -= period + + return origin + + +def merge_template( + old_template: np.ndarray, + new_template: np.ndarray, + alpha: float, +) -> np.ndarray: + """ + 用 EMA 融合旧模板和新模板。 + + merged = (1 - alpha) * old + alpha * new + + 若两者长度不同,先将旧模板重采样到新模板长度。 + alpha 越大,新模板权重越高(学习越激进)。 + + Args: + old_template: 旧模板数组 + new_template: 新模板数组 + alpha: EMA 步长,clip 到 [0, 1] + + Returns: + 融合后的模板数组,长度与 new_template 相同。 + """ + alpha = float(np.clip(alpha, 0.0, 1.0)) + + if len(old_template) != len(new_template): + old_template = resample_template(old_template, len(new_template)) + + return ((1.0 - alpha) * old_template + alpha * new_template).astype(float) diff --git a/ai/pridict_v5.py b/ai/pridict_v5.py index 6894a66..dde0b11 100644 --- a/ai/pridict_v5.py +++ b/ai/pridict_v5.py @@ -1,27 +1,31 @@ # -*- coding: utf-8 -*- """ -ProtoForge Predictor v12 +ProtoForge Predictor v13 核心能力: -1. feed_rate / spindle_speed / spindle_current 使用 phase-lock 点预测。 -2. vibration_x / vibration_y / vibration_z 使用 phase-band 预测带。 -3. vibration 类指标: - - predicted 使用平滑后的中位数模板,用于趋势参考。 - - upper/lower 使用原始波动分位数模板 + padding,用于正常波动容忍带。 - - 偶发越界不直接报警,只有持续越界 / 高比例越界 / 严重越界才报警。 -4. 预测起点锚定最后一个真实点 last_real_ts,避免时间错位。 -5. 异常期间冻结健康模板,不学习故障数据。 -6. 故障恢复后等待稳定,再恢复模板学习。 -7. 写入: - - xxx_predicted - - xxx_predicted_upper - - xxx_predicted_lower - - xxx_anomaly - - xxx_anomaly_outside_ratio - - xxx_anomaly_mean_abs_error - - xxx_anomaly_mean_rel_error - - xxx_anomaly_max_consecutive_outside - - xxx_anomaly_max_exceed_ratio +1. 支持三个独立 CNC 工位:粗铣(fanuc-cnc)、半精铣(fanuc-cnc-semi-finish)、精铣(fanuc-cnc-finish) +2. 覆盖指标:feed_rate / spindle_speed / spindle_current / spindle_load +3. feed_rate / spindle_speed / spindle_current 使用 phase-lock 点预测。 +4. spindle_load 使用 phase_band 预测带(多频漂移容忍)。 +5. vibration_x / vibration_y / vibration_z 使用 phase-band 预测带。 +6. 各工位独立阈值配置,匹配实际量程差异: + - 粗铣:spindle_speed~2000RPM, feed_rate~800mm/min, spindle_current~21A, spindle_load~56% + - 半精铣:spindle_speed~4000RPM, feed_rate~500mm/min, spindle_current~14.5A, spindle_load~38% + - 精铣:spindle_speed~6000RPM, feed_rate~300mm/min, spindle_current~8.5A, spindle_load~22% +7. 粗铣周期含随机抖动(±10s),phase-lock 搜索范围扩大至 ±18%。 +8. 预测起点锚定最后一个真实点 last_real_ts,避免时间错位。 +9. 异常期间冻结健康模板,不学习故障数据。 +10. 故障恢复后等待稳定,再恢复模板学习。 +11. 写入: + - xxx_predicted + - xxx_predicted_upper + - xxx_predicted_lower + - xxx_anomaly + - xxx_anomaly_outside_ratio + - xxx_anomaly_mean_abs_error + - xxx_anomaly_mean_rel_error + - xxx_anomaly_max_consecutive_outside + - xxx_anomaly_max_exceed_ratio """ import json @@ -55,7 +59,7 @@ # ============================================================================= VM_URL = "http://localhost:8428" -STATE_FILE = "/tmp/protoforge_predictor_state_v12.json" +STATE_FILE = "/tmp/protoforge_predictor_state_v14.json" HISTORY_MINUTES = 30 HORIZON_SECONDS = 120 @@ -86,6 +90,7 @@ MAX_DATA_LAG_SECONDS = 180 +# 默认 phase-lock 搜索参数(精铣/半精铣:固定周期,搜索范围窄) PHASE_LOCK_MIN_WINDOW_SECONDS = 45 PHASE_LOCK_MAX_WINDOW_SECONDS = 180 PHASE_LOCK_PERIOD_SEARCH_RATIO = 0.12 @@ -95,105 +100,260 @@ # ============================================================================= -# 指标配置 +# 监控指标白名单(可通过环境变量 PROTOFORGE_MONITORED_METRICS 覆盖) # ============================================================================= -PREDICT_TARGETS = [ - { - "query": 'feed_rate{device_id="fanuc-cnc"}', - "pred_metric": "feed_rate_predicted", - "anomaly_metric": "feed_rate_anomaly", - "strategy": "phase_point", - "abs_threshold": 400.0, - "rel_threshold": 0.25, - "smooth_window": 1, - "outside_ratio_threshold": 0.60, - "min_consecutive_outside": 5, - "severe_exceed_ratio": 1.8, - }, - { - "query": 'spindle_speed{device_id="fanuc-cnc"}', - "pred_metric": "spindle_speed_predicted", - "anomaly_metric": "spindle_speed_anomaly", - "strategy": "phase_point", - "abs_threshold": 500.0, - "rel_threshold": 0.25, - "smooth_window": 1, - "outside_ratio_threshold": 0.60, - "min_consecutive_outside": 5, - "severe_exceed_ratio": 1.8, - }, - { - "query": 'spindle_current{device_id="fanuc-cnc"}', - "pred_metric": "spindle_current_predicted", - "anomaly_metric": "spindle_current_anomaly", - "strategy": "phase_point", - "abs_threshold": 5.0, - "rel_threshold": 0.25, - "smooth_window": 1, +_DEFAULT_MONITORED_METRICS = [ + "feed_rate", + "spindle_speed", + "spindle_current", + "spindle_load", + "vibration_x", + "vibration_y", + "vibration_z", +] + +MONITORED_METRICS: List[str] = [ + m.strip() + for m in os.environ.get( + "PROTOFORGE_MONITORED_METRICS", + ",".join(_DEFAULT_MONITORED_METRICS), + ).split(",") + if m.strip() +] + +# 人工上下限覆盖文件(可选,不存在则忽略) +# 格式:{"device-id": {"metric_name": {"hard_max": 35.0, "hard_min": 0.0}}} +OVERRIDE_FILE = os.environ.get( + "PROTOFORGE_PREDICTOR_OVERRIDE", + "/etc/protoforge/predictor_override.json", +) + +# 目标列表刷新间隔(秒) +TARGETS_REFRESH_INTERVAL = int(os.environ.get("PROTOFORGE_TARGETS_REFRESH", "60")) + +# 运行时目标缓存 +_TARGETS_CACHE: List[Dict] = [] +_TARGETS_LAST_REFRESH: float = 0.0 + + +# ============================================================================= +# Layer 1: 设备与指标发现 +# ============================================================================= + +def discover_device_ids() -> List[str]: + """查询 VM 中所有 device_id 标签值。""" + try: + resp = requests.get( + f"{VM_URL}/api/v1/label/device_id/values", + timeout=10, + ) + resp.raise_for_status() + return [v for v in resp.json().get("data", []) if v] + except requests.RequestException as e: + logger.error("发现 device_id 失败: %s", e) + return [] + + +def discover_metrics_for_device(device_id: str) -> List[str]: + """查询该设备在 VM 中实际存在且有近期数据的指标名。""" + found = [] + for metric in MONITORED_METRICS: + try: + resp = requests.get( + f"{VM_URL}/api/v1/query", + params={"query": f'{metric}{{device_id="{device_id}"}}'}, + timeout=5, + ) + resp.raise_for_status() + if resp.json().get("data", {}).get("result"): + found.append(metric) + except requests.RequestException: + pass + return found + + +# ============================================================================= +# Layer 2: 自适应配置推断 +# ============================================================================= + +@dataclass +class MetricProfile: + """从历史数据统计出的指标特征,驱动策略和阈值的自动推断。""" + device_id: str + metric: str + p5: float # 活跃段 5th percentile + p95: float # 活跃段 95th percentile + iqr: float # p95 - p5 + cv: float # 变异系数 std/mean(衡量稳定性) + strategy: str # "phase_point" 或 "phase_band" + abs_threshold: float + rel_threshold: float + band_low_q: float + band_high_q: float + band_pad_abs: float + phase_lock_period_search_ratio: float + + +def infer_metric_profile(device_id: str, metric: str) -> Optional["MetricProfile"]: + """ + 拉取历史数据,统计活跃段特征,自动推断预测策略和阈值。 + + 空闲段过滤:排除 p10 以下的点,避免机床空闲时的零值拉低阈值。 + strategy 判断:cv < 0.15 → phase_point(稳定信号),否则 phase_band(波动信号)。 + phase_lock 搜索范围:由周期长度的变异系数动态决定,周期抖动大则搜索范围宽。 + """ + ts_raw, ys_raw = fetch_history(f'{metric}{{device_id="{device_id}"}}') + if len(ys_raw) < MIN_POINTS: + return None + + arr = np.array(ys_raw, dtype=float) + + # 过滤空闲段:只保留活跃值(高于 p10) + p10_val = float(np.percentile(arr, 10)) + active = arr[arr > p10_val] + if len(active) < 30: + active = arr # 数据全是活跃段,不过滤 + + mean_val = float(np.mean(active)) + std_val = float(np.std(active)) + cv = std_val / max(abs(mean_val), 1e-6) + p5 = float(np.percentile(active, 5)) + p95 = float(np.percentile(active, 95)) + iqr = p95 - p5 + + # 策略自动判断 + strategy = "phase_point" if cv < 0.15 else "phase_band" + + # 阈值自动计算:取 IQR 的 80%、量程的 5%、2倍标准差 三者最大值 + abs_threshold = max(iqr * 0.8, (p95 - p5) * 0.05, std_val * 2.0) + rel_threshold = min(0.30, cv * 1.5) + + # phase_band 容忍带宽度:IQR 的 30% 或 1 倍标准差,取较大值 + band_pad_abs = max(iqr * 0.3, std_val) + + # phase-lock 搜索范围:从历史数据估算周期抖动率 + # 用 FFT 粗估周期,再用自相关精化,最后计算多周期长度的变异系数 + ts_grid, ys_grid = normalize_history(ts_raw, ys_raw) + period_search_ratio = PHASE_LOCK_PERIOD_SEARCH_RATIO # 默认值 + if len(ys_grid) >= MIN_POINTS: + rough_period = estimate_period_rough(ys_grid) + if rough_period > MIN_PERIOD_SECONDS: + # 用谷底间距估算周期抖动 + valleys = find_valley_indices(ts_grid, ys_grid, rough_period) + if len(valleys) >= 3: + diffs = np.diff(ts_grid[valleys].astype(float)) + valid = diffs[(diffs > rough_period * 0.5) & (diffs < rough_period * 2.0)] + if len(valid) >= 2: + period_cv = float(np.std(valid) / max(np.mean(valid), 1e-6)) + period_search_ratio = float(np.clip(period_cv * 2.0, 0.12, 0.25)) + + logger.info( + "推断指标特征 device=%s metric=%s cv=%.3f strategy=%s abs_thr=%.3f rel_thr=%.3f period_search=%.2f", + device_id, metric, cv, strategy, abs_threshold, rel_threshold, period_search_ratio, + ) + + return MetricProfile( + device_id=device_id, + metric=metric, + p5=p5, + p95=p95, + iqr=iqr, + cv=cv, + strategy=strategy, + abs_threshold=abs_threshold, + rel_threshold=rel_threshold, + band_low_q=5.0, + band_high_q=95.0, + band_pad_abs=band_pad_abs, + phase_lock_period_search_ratio=period_search_ratio, + ) + + +def load_overrides() -> Dict: + """加载人工上下限覆盖文件,文件不存在时返回空字典。""" + if not os.path.exists(OVERRIDE_FILE): + return {} + try: + with open(OVERRIDE_FILE, "r", encoding="utf-8") as f: + return json.load(f) + except Exception as e: + logger.warning("加载 override 文件失败 %s: %s", OVERRIDE_FILE, e) + return {} + + +def build_target(profile: MetricProfile, overrides: Dict) -> Dict: + """将 MetricProfile 转换为预测执行层可用的 target dict。""" + device_overrides = overrides.get(profile.device_id, {}).get(profile.metric, {}) + + target: Dict = { + "query": f'{profile.metric}{{device_id="{profile.device_id}"}}', + "pred_metric": f"{profile.metric}_predicted", + "anomaly_metric": f"{profile.metric}_anomaly", + "strategy": profile.strategy, + "abs_threshold": profile.abs_threshold, + "rel_threshold": profile.rel_threshold, + "smooth_window": 5 if profile.strategy == "phase_band" else 2, "outside_ratio_threshold": 0.60, "min_consecutive_outside": 5, "severe_exceed_ratio": 1.8, - }, - { - "query": 'vibration_x{device_id="fanuc-cnc"}', - "pred_metric": "vibration_x_predicted", - "anomaly_metric": "vibration_x_anomaly", - "strategy": "phase_band", - - # vibration 类指标噪声、尖峰较多,不建议用很窄的阈值。 - "abs_threshold": 0.18, - "rel_threshold": 0.55, - - # 平滑只用于相位锁定和 predicted 中位趋势。 - "smooth_window": 5, - - # upper/lower 用原始值分位数,范围放宽,覆盖正常尖峰。 - "band_low_q": 1, - "band_high_q": 99, - "band_pad_abs": 0.15, - - # 偶发越界容忍。 - "outside_ratio_threshold": 0.70, - "min_consecutive_outside": 5, - "severe_exceed_ratio": 2.0, - }, - { - "query": 'vibration_y{device_id="fanuc-cnc"}', - "pred_metric": "vibration_y_predicted", - "anomaly_metric": "vibration_y_anomaly", - "strategy": "phase_band", - "abs_threshold": 0.18, - "rel_threshold": 0.55, - "smooth_window": 5, - "band_low_q": 1, - "band_high_q": 99, - "band_pad_abs": 0.15, - "outside_ratio_threshold": 0.70, - "min_consecutive_outside": 5, - "severe_exceed_ratio": 2.0, - }, - { - "query": 'vibration_z{device_id="fanuc-cnc"}', - "pred_metric": "vibration_z_predicted", - "anomaly_metric": "vibration_z_anomaly", - "strategy": "phase_band", - "abs_threshold": 0.18, - "rel_threshold": 0.55, - "smooth_window": 5, - "band_low_q": 1, - "band_high_q": 99, - "band_pad_abs": 0.15, - "outside_ratio_threshold": 0.70, - "min_consecutive_outside": 5, - "severe_exceed_ratio": 2.0, - }, -] + "phase_lock_period_search_ratio": profile.phase_lock_period_search_ratio, + "phase_lock_origin_search_ratio": min( + 0.45, profile.phase_lock_period_search_ratio * 2.5 + ), + # 物理上下限(可选,来自 override 文件) + "hard_max": device_overrides.get("hard_max"), + "hard_min": device_overrides.get("hard_min"), + } + + if profile.strategy == "phase_band": + target.update({ + "band_low_q": profile.band_low_q, + "band_high_q": profile.band_high_q, + "band_pad_abs": profile.band_pad_abs, + }) + + return target + + +def refresh_targets_if_needed() -> None: + """ + 按 TARGETS_REFRESH_INTERVAL 间隔重新发现设备和指标,动态更新目标列表。 + 首次调用时立即执行发现。 + """ + global _TARGETS_CACHE, _TARGETS_LAST_REFRESH + + now = time.time() + if now - _TARGETS_LAST_REFRESH < TARGETS_REFRESH_INTERVAL and _TARGETS_CACHE: + return + + logger.info("开始发现设备和指标...") + overrides = load_overrides() + targets: List[Dict] = [] + + device_ids = discover_device_ids() + if not device_ids: + logger.warning("未发现任何 device_id,保持现有目标列表") + return + + for device_id in device_ids: + metrics = discover_metrics_for_device(device_id) + for metric in metrics: + profile = infer_metric_profile(device_id, metric) + if profile is not None: + targets.append(build_target(profile, overrides)) + + if targets: + _TARGETS_CACHE = targets + _TARGETS_LAST_REFRESH = now + logger.info( + "目标列表已更新:%d 台设备,%d 个指标目标", + len(device_ids), + len(targets), + ) + else: + logger.warning("发现流程未产生任何有效目标,保持现有目标列表") -EXTRA_PREDICT_LABELS = { - "forecast": "phase_band_health_v12", - "source": "protoforge", -} BASELINE_STATUS_HEALTHY = "healthy" BASELINE_STATUS_ANOMALY = "anomaly" @@ -612,8 +772,6 @@ def build_templates_from_valleys( if strategy == "phase_band": mid_template = np.percentile(mid_arr, 50, axis=0) - - # upper/lower 使用原始值分布,而不是平滑值分布。 lower_template = np.percentile(band_arr, low_q, axis=0) upper_template = np.percentile(band_arr, high_q, axis=0) else: @@ -794,17 +952,28 @@ def merge_template( # ============================================================================= # Phase Lock +# 支持 target 级别的 phase_lock_period_search_ratio / phase_lock_origin_search_ratio +# 粗铣工位周期含随机抖动(±10s),需要更宽的搜索范围 # ============================================================================= def phase_lock_recent( state: BaselineState, ts_grid: np.ndarray, ys_model: np.ndarray, + target: Optional[Dict] = None, ) -> Tuple[int, int, np.ndarray, float]: base_period = int(state.period) base_origin = int(state.phase_origin_ts) base_template = np.array(state.template, dtype=float) + # 从 target 读取搜索范围,允许粗铣工位使用更宽的范围 + period_search_ratio = float( + (target or {}).get("phase_lock_period_search_ratio", PHASE_LOCK_PERIOD_SEARCH_RATIO) + ) + origin_search_ratio = float( + (target or {}).get("phase_lock_origin_search_ratio", PHASE_LOCK_ORIGIN_SEARCH_RATIO) + ) + if base_period <= 1 or len(base_template) <= 1: ts_recent = ts_grid[-DETECT_WINDOW_SECONDS:].astype(int).tolist() pred = predict_template_values(base_template, base_period, base_origin, ts_recent) @@ -832,11 +1001,11 @@ def phase_lock_recent( p_min = max( int(MIN_PERIOD_SECONDS), - int(round(base_period * (1.0 - PHASE_LOCK_PERIOD_SEARCH_RATIO))), + int(round(base_period * (1.0 - period_search_ratio))), ) p_max = min( int(MAX_PERIOD_SECONDS), - int(round(base_period * (1.0 + PHASE_LOCK_PERIOD_SEARCH_RATIO))), + int(round(base_period * (1.0 + period_search_ratio))), ) best_period = base_period @@ -855,7 +1024,7 @@ def phase_lock_recent( for period in range(p_min, p_max + 1, PHASE_LOCK_PERIOD_STEP): template = resample_template(base_template, period) center_origin = normalize_origin_near(base_origin, period, last_ts) - origin_shift = max(2, int(round(period * PHASE_LOCK_ORIGIN_SEARCH_RATIO))) + origin_shift = max(2, int(round(period * origin_search_ratio))) for shift in range(-origin_shift, origin_shift + 1, PHASE_LOCK_ORIGIN_STEP): origin = center_origin + shift @@ -925,7 +1094,6 @@ def calc_final_bounds( if strategy == "phase_band": pad_abs = float(target.get("band_pad_abs", abs_threshold)) - # 对 vibration 类指标:边界更像正常波动容忍带,不是硬边界。 dynamic_pad = np.maximum( pad_abs, np.abs(pred) * rel_threshold * 0.25, @@ -933,10 +1101,18 @@ def calc_final_bounds( lower = lower_raw - dynamic_pad upper = upper_raw + dynamic_pad + else: + lower, upper = calc_point_bounds(pred, abs_threshold, rel_threshold) - return lower, upper + # 物理上下限兜底(来自 override 文件,可选) + hard_max = target.get("hard_max") + hard_min = target.get("hard_min") + if hard_max is not None: + upper = np.minimum(upper, float(hard_max)) + if hard_min is not None: + lower = np.maximum(lower, float(hard_min)) - return calc_point_bounds(pred, abs_threshold, rel_threshold) + return lower, upper def detect_anomaly( @@ -950,6 +1126,7 @@ def detect_anomaly( state=state, ts_grid=ts_grid, ys_model=ys_model, + target=target, ) recent_len = len(pred_recent) @@ -1018,11 +1195,6 @@ def detect_anomaly( target.get("severe_exceed_ratio", SEVERE_EXCEED_RATIO) ) - # 核心优化: - # 1. 偶发 1~3 个点越界不报警。 - # 2. 持续越界才报警。 - # 3. 高比例越界才报警。 - # 4. 严重越界才立即报警。 is_anomaly = ( outside_ratio >= outside_ratio_threshold or max_outside_seconds >= min_consecutive_outside @@ -1653,7 +1825,13 @@ def build_prediction_timestamps( def run_once() -> None: now_str = datetime.now().strftime("%H:%M:%S") - for target in PREDICT_TARGETS: + refresh_targets_if_needed() + + if not _TARGETS_CACHE: + logger.warning("[%s] 目标列表为空,等待设备发现完成", now_str) + return + + for target in _TARGETS_CACHE: query = target["query"] pred_metric = target["pred_metric"] anomaly_metric = target["anomaly_metric"] @@ -1749,7 +1927,7 @@ def run_once() -> None: origin_str = datetime.fromtimestamp(state.phase_origin_ts).strftime("%H:%M:%S") logger.info( - "[%s] %-40s → %-35s strategy=%s status=%s anomaly=%s outside=%.2f max_outside=%ss max_exceed=%.2f period=%ss origin=%s last_real=%s lag=%ss 写入 %d 点,预测区间 %s ~ %s", + "[%s] %-50s → %-35s strategy=%s status=%s anomaly=%s outside=%.2f max_outside=%ss max_exceed=%.2f period=%ss origin=%s last_real=%s lag=%ss 写入 %d 点,预测区间 %s ~ %s", now_str, query, pred_metric, @@ -1775,7 +1953,7 @@ def main() -> None: load_state() logger.info( - "预测服务启动 VM=%s 历史窗口=%dmin 理论预测窗口=%ds 实际写入窗口=%ds 轮询间隔=%ds state=%s forecast=%s", + "预测服务启动 VM=%s 历史窗口=%dmin 理论预测窗口=%ds 实际写入窗口=%ds 轮询间隔=%ds state=%s forecast=%s override=%s refresh=%ds", VM_URL, HISTORY_MINUTES, HORIZON_SECONDS, @@ -1783,6 +1961,8 @@ def main() -> None: POLL_INTERVAL, STATE_FILE, EXTRA_PREDICT_LABELS["forecast"], + OVERRIDE_FILE, + TARGETS_REFRESH_INTERVAL, ) while True: @@ -1791,4 +1971,4 @@ def main() -> None: if __name__ == "__main__": - main() \ No newline at end of file + main() diff --git a/protoforge/core/fault.py b/protoforge/core/fault.py index 11b61a7..2182bf1 100644 --- a/protoforge/core/fault.py +++ b/protoforge/core/fault.py @@ -32,24 +32,77 @@ BUILTIN_FAULT_TYPES: list[FaultTypeDefinition] = [ # ------------------------------------------------------------------ - # 进给堵转 — 工件夹紧松动或切削量过大导致进给卡死 - # 特征:进给速率瞬间降为0,主轴负载和电流急剧升高,主轴仍在转(区别于崩刃) - # 模式:瞬间注入 + # 进给堵转(粗铣)— fanuc-cnc + # 量程:spindle_speed~2000RPM, feed_rate~800mm/min, + # spindle_current~21A, spindle_load~56% + # 堵转目标:load→92%, current→38A,转速维持+轻微抖动 # ------------------------------------------------------------------ FaultTypeDefinition( - id="feed_stall", - name="进给堵转", - description="进给轴卡死,进给速率降为零,主轴负载和电流急剧升高,主轴转速维持(区别于崩刃停主轴)", + id="feed_stall_rough", + name="进给堵转(粗铣)", + description="粗铣进给轴卡死,进给速率降为零,主轴负载升至~92%,电流升至~38A,主轴转速维持(区别于崩刃停主轴)", category="process", default_duration=20.0, - tags=["进给", "堵转", "突发"], + tags=["进给", "堵转", "突发", "粗铣"], point_faults=[ PointFaultConfig(point="feed_rate", mode=FaultMode.INSTANT, target_value=0.0, noise_scale=0.0), PointFaultConfig(point="spindle_load", mode=FaultMode.INSTANT, - multiplier=2.8, noise_scale=5.0), + target_value=92.0, noise_scale=4.0), PointFaultConfig(point="spindle_current", mode=FaultMode.INSTANT, - multiplier=3.8, noise_scale=1.5), + target_value=38.0, noise_scale=1.5), + PointFaultConfig(point="spindle_speed", mode=FaultMode.INSTANT, + multiplier=1.0, noise_scale=30.0), + ], + ), + + # ------------------------------------------------------------------ + # 进给堵转(半精铣)— fanuc-cnc-semi-finish + # 量程:spindle_speed~4000RPM, feed_rate~500mm/min, + # spindle_current~14.5A, spindle_load~38% + # 堵转目标:load→68%, current→26A,转速维持+轻微抖动 + # ------------------------------------------------------------------ + FaultTypeDefinition( + id="feed_stall_semi", + name="进给堵转(半精铣)", + description="半精铣进给轴卡死,进给速率降为零,主轴负载升至~68%,电流升至~26A,主轴转速维持(区别于崩刃停主轴)", + category="process", + default_duration=20.0, + tags=["进给", "堵转", "突发", "半精铣"], + point_faults=[ + PointFaultConfig(point="feed_rate", mode=FaultMode.INSTANT, + target_value=0.0, noise_scale=0.0), + PointFaultConfig(point="spindle_load", mode=FaultMode.INSTANT, + target_value=68.0, noise_scale=3.0), + PointFaultConfig(point="spindle_current", mode=FaultMode.INSTANT, + target_value=26.0, noise_scale=1.2), + PointFaultConfig(point="spindle_speed", mode=FaultMode.INSTANT, + multiplier=1.0, noise_scale=50.0), + ], + ), + + # ------------------------------------------------------------------ + # 进给堵转(精铣)— fanuc-cnc-finish + # 量程:spindle_speed~6000RPM, feed_rate~300mm/min, + # spindle_current~8.5A, spindle_load~22% + # 堵转目标:load→40%, current→15A,转速维持+轻微抖动 + # ------------------------------------------------------------------ + FaultTypeDefinition( + id="feed_stall_finish", + name="进给堵转(精铣)", + description="精铣进给轴卡死,进给速率降为零,主轴负载升至~40%,电流升至~15A,主轴转速维持(区别于崩刃停主轴)", + category="process", + default_duration=20.0, + tags=["进给", "堵转", "突发", "精铣"], + point_faults=[ + PointFaultConfig(point="feed_rate", mode=FaultMode.INSTANT, + target_value=0.0, noise_scale=0.0), + PointFaultConfig(point="spindle_load", mode=FaultMode.INSTANT, + target_value=40.0, noise_scale=2.0), + PointFaultConfig(point="spindle_current", mode=FaultMode.INSTANT, + target_value=15.0, noise_scale=0.8), + PointFaultConfig(point="spindle_speed", mode=FaultMode.INSTANT, + multiplier=1.0, noise_scale=80.0), ], ), From 07fc5d6897f2ab7b228fa16d748cd9ad1c74abb9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=BC=A0=E5=B0=91=E6=99=BA?= Date: Fri, 29 May 2026 10:09:13 +0800 Subject: [PATCH 36/55] fix(fault): fault fix --- FAULT_INJECTION.md | 47 ++++++++++--- protoforge/core/fault.py | 133 +++++++++++++++++++++++++++---------- protoforge/models/fault.py | 8 ++- 3 files changed, 142 insertions(+), 46 deletions(-) diff --git a/FAULT_INJECTION.md b/FAULT_INJECTION.md index 951648d..22746bf 100644 --- a/FAULT_INJECTION.md +++ b/FAULT_INJECTION.md @@ -160,19 +160,48 @@ DELETE /api/v1/devices/{device_id}/fault --- -### spindle_overheat — 主轴过热 +### spindle_overheat_rough — 主轴过热(粗铣) - **分类**:thermal -- **模式**:渐进式 +- **模式**:渐进式(绝对目标值) - **默认持续时间**:240 秒 -- **真实场景**:长时间高负荷或冷却系统故障,热保护机制逐渐降低转速 +- **真实场景**:粗铣主轴长时间高负荷或冷却不足,负载/电流持续高位,热保护渐进降速 -| 测点 | 变化方向 | 峰值倍率 | -|------|---------|---------| -| `spindle_current` | 升高 | ×1.8 | -| `spindle_speed` | 降低 | ×0.6 | -| `vibration_x` | 升高 | ×1.5 | -| `vibration_z` | 升高 | ×1.5 | +| 测点 | 变化方向 | 目标值 | +|------|---------|--------| +| `spindle_load` | 持续升高 | →85% | +| `spindle_current` | 持续升高 | →34A | +| `spindle_speed` | 渐进降低 | →1400 RPM | + +--- + +### spindle_overheat_semi — 主轴过热(半精铣) + +- **分类**:thermal +- **模式**:渐进式(绝对目标值) +- **默认持续时间**:240 秒 +- **真实场景**:半精铣主轴长时间高负荷或冷却不足,负载/电流持续高位,热保护渐进降速 + +| 测点 | 变化方向 | 目标值 | +|------|---------|--------| +| `spindle_load` | 持续升高 | →72% | +| `spindle_current` | 持续升高 | →24A | +| `spindle_speed` | 渐进降低 | →2600 RPM | + +--- + +### spindle_overheat_finish — 主轴过热(精铣) + +- **分类**:thermal +- **模式**:渐进式(绝对目标值) +- **默认持续时间**:240 秒 +- **真实场景**:精铣主轴长时间高负荷或冷却不足,负载/电流持续高位,热保护渐进降速 + +| 测点 | 变化方向 | 目标值 | +|------|---------|--------| +| `spindle_load` | 持续升高 | →48% | +| `spindle_current` | 持续升高 | →15A | +| `spindle_speed` | 渐进降低 | →3800 RPM | --- diff --git a/protoforge/core/fault.py b/protoforge/core/fault.py index 2182bf1..857bf90 100644 --- a/protoforge/core/fault.py +++ b/protoforge/core/fault.py @@ -35,12 +35,12 @@ # 进给堵转(粗铣)— fanuc-cnc # 量程:spindle_speed~2000RPM, feed_rate~800mm/min, # spindle_current~21A, spindle_load~56% - # 堵转目标:load→92%, current→38A,转速维持+轻微抖动 + # 堵转目标:load→85~100%, current→34~42A,转速维持+轻微抖动 # ------------------------------------------------------------------ FaultTypeDefinition( id="feed_stall_rough", name="进给堵转(粗铣)", - description="粗铣进给轴卡死,进给速率降为零,主轴负载升至~92%,电流升至~38A,主轴转速维持(区别于崩刃停主轴)", + description="粗铣进给轴卡死,进给速率降为零,主轴负载升至85~100%,电流升至34~42A,主轴转速维持(区别于崩刃停主轴)", category="process", default_duration=20.0, tags=["进给", "堵转", "突发", "粗铣"], @@ -48,9 +48,9 @@ PointFaultConfig(point="feed_rate", mode=FaultMode.INSTANT, target_value=0.0, noise_scale=0.0), PointFaultConfig(point="spindle_load", mode=FaultMode.INSTANT, - target_value=92.0, noise_scale=4.0), + target_min=85.0, target_max=100.0, noise_scale=4.0), PointFaultConfig(point="spindle_current", mode=FaultMode.INSTANT, - target_value=38.0, noise_scale=1.5), + target_min=34.0, target_max=42.0, noise_scale=1.5), PointFaultConfig(point="spindle_speed", mode=FaultMode.INSTANT, multiplier=1.0, noise_scale=30.0), ], @@ -60,12 +60,12 @@ # 进给堵转(半精铣)— fanuc-cnc-semi-finish # 量程:spindle_speed~4000RPM, feed_rate~500mm/min, # spindle_current~14.5A, spindle_load~38% - # 堵转目标:load→68%, current→26A,转速维持+轻微抖动 + # 堵转目标:load→62~75%, current→23~29A,转速维持+轻微抖动 # ------------------------------------------------------------------ FaultTypeDefinition( id="feed_stall_semi", name="进给堵转(半精铣)", - description="半精铣进给轴卡死,进给速率降为零,主轴负载升至~68%,电流升至~26A,主轴转速维持(区别于崩刃停主轴)", + description="半精铣进给轴卡死,进给速率降为零,主轴负载升至62~75%,电流升至23~29A,主轴转速维持(区别于崩刃停主轴)", category="process", default_duration=20.0, tags=["进给", "堵转", "突发", "半精铣"], @@ -73,9 +73,9 @@ PointFaultConfig(point="feed_rate", mode=FaultMode.INSTANT, target_value=0.0, noise_scale=0.0), PointFaultConfig(point="spindle_load", mode=FaultMode.INSTANT, - target_value=68.0, noise_scale=3.0), + target_min=62.0, target_max=75.0, noise_scale=3.0), PointFaultConfig(point="spindle_current", mode=FaultMode.INSTANT, - target_value=26.0, noise_scale=1.2), + target_min=23.0, target_max=29.0, noise_scale=1.2), PointFaultConfig(point="spindle_speed", mode=FaultMode.INSTANT, multiplier=1.0, noise_scale=50.0), ], @@ -85,12 +85,12 @@ # 进给堵转(精铣)— fanuc-cnc-finish # 量程:spindle_speed~6000RPM, feed_rate~300mm/min, # spindle_current~8.5A, spindle_load~22% - # 堵转目标:load→40%, current→15A,转速维持+轻微抖动 + # 堵转目标:load→36~45%, current→13~17A,转速维持+轻微抖动 # ------------------------------------------------------------------ FaultTypeDefinition( id="feed_stall_finish", name="进给堵转(精铣)", - description="精铣进给轴卡死,进给速率降为零,主轴负载升至~40%,电流升至~15A,主轴转速维持(区别于崩刃停主轴)", + description="精铣进给轴卡死,进给速率降为零,主轴负载升至36~45%,电流升至13~17A,主轴转速维持(区别于崩刃停主轴)", category="process", default_duration=20.0, tags=["进给", "堵转", "突发", "精铣"], @@ -98,33 +98,79 @@ PointFaultConfig(point="feed_rate", mode=FaultMode.INSTANT, target_value=0.0, noise_scale=0.0), PointFaultConfig(point="spindle_load", mode=FaultMode.INSTANT, - target_value=40.0, noise_scale=2.0), + target_min=36.0, target_max=45.0, noise_scale=2.0), PointFaultConfig(point="spindle_current", mode=FaultMode.INSTANT, - target_value=15.0, noise_scale=0.8), + target_min=13.0, target_max=17.0, noise_scale=0.8), PointFaultConfig(point="spindle_speed", mode=FaultMode.INSTANT, multiplier=1.0, noise_scale=80.0), ], ), # ------------------------------------------------------------------ - # 主轴过热 — 长时间高负荷或冷却系统故障 - # 特征:主轴负载和电流持续偏高,转速因热保护逐渐降低 - # 模式:渐进式,持续时间较长 + # 主轴过热(粗铣)— fanuc-cnc + # 基线:spindle_speed~2000RPM, spindle_current~21A, spindle_load~56% + # 过热目标范围:load 78~92%,current 30~38A,转速降至 1200~1600RPM + # 范围模拟不同冷却状态、负荷历史、环境温度下的个体差异 + # 模式:渐进式;全部用 target_min/max,避免 multiplier 在空载基线=0 时失效 # ------------------------------------------------------------------ FaultTypeDefinition( - id="spindle_overheat", - name="主轴过热", - description="主轴长时间高负荷运转或冷却不足,spindle_load和spindle_current持续偏高,转速因热保护渐进下降", + id="spindle_overheat_rough", + name="主轴过热(粗铣)", + description="粗铣主轴长时间高负荷或冷却不足,spindle_load渐进升至78~92%,spindle_current升至30~38A,转速因热保护渐进降至1200~1600RPM", category="thermal", default_duration=240.0, - tags=["主轴", "过热", "渐进"], + tags=["主轴", "过热", "渐进", "粗铣"], point_faults=[ PointFaultConfig(point="spindle_load", mode=FaultMode.GRADUAL, - multiplier=1.6, noise_scale=3.0), + target_min=78.0, target_max=92.0, noise_scale=3.5), PointFaultConfig(point="spindle_current", mode=FaultMode.GRADUAL, - multiplier=1.8, noise_scale=1.2), + target_min=30.0, target_max=38.0, noise_scale=1.5), PointFaultConfig(point="spindle_speed", mode=FaultMode.GRADUAL, - multiplier=0.6, noise_scale=50.0), + target_min=1200, target_max=1600, noise_scale=40.0), + ], + ), + + # ------------------------------------------------------------------ + # 主轴过热(半精铣)— fanuc-cnc-semi-finish + # 基线:spindle_speed~4000RPM, spindle_current~14.5A, spindle_load~38% + # 过热目标范围:load 65~78%,current 21~27A,转速降至 2400~2900RPM + # ------------------------------------------------------------------ + FaultTypeDefinition( + id="spindle_overheat_semi", + name="主轴过热(半精铣)", + description="半精铣主轴长时间高负荷或冷却不足,spindle_load渐进升至65~78%,spindle_current升至21~27A,转速因热保护渐进降至2400~2900RPM", + category="thermal", + default_duration=240.0, + tags=["主轴", "过热", "渐进", "半精铣"], + point_faults=[ + PointFaultConfig(point="spindle_load", mode=FaultMode.GRADUAL, + target_min=65.0, target_max=78.0, noise_scale=3.0), + PointFaultConfig(point="spindle_current", mode=FaultMode.GRADUAL, + target_min=21.0, target_max=27.0, noise_scale=1.2), + PointFaultConfig(point="spindle_speed", mode=FaultMode.GRADUAL, + target_min=2400, target_max=2900, noise_scale=50.0), + ], + ), + + # ------------------------------------------------------------------ + # 主轴过热(精铣)— fanuc-cnc-finish + # 基线:spindle_speed~6000RPM, spindle_current~8.5A, spindle_load~22% + # 过热目标范围:load 42~55%,current 13~17A,转速降至 3600~4200RPM + # ------------------------------------------------------------------ + FaultTypeDefinition( + id="spindle_overheat_finish", + name="主轴过热(精铣)", + description="精铣主轴长时间高负荷或冷却不足,spindle_load渐进升至42~55%,spindle_current升至13~17A,转速因热保护渐进降至3600~4200RPM", + category="thermal", + default_duration=240.0, + tags=["主轴", "过热", "渐进", "精铣"], + point_faults=[ + PointFaultConfig(point="spindle_load", mode=FaultMode.GRADUAL, + target_min=42.0, target_max=55.0, noise_scale=2.0), + PointFaultConfig(point="spindle_current", mode=FaultMode.GRADUAL, + target_min=13.0, target_max=17.0, noise_scale=0.8), + PointFaultConfig(point="spindle_speed", mode=FaultMode.GRADUAL, + target_min=3600, target_max=4200, noise_scale=60.0), ], ), @@ -230,15 +276,15 @@ FaultTypeDefinition( id="air_cutting", name="空切检测", - description="刀具未接触工件,spindle_load跌至空载区间(5-15%),spindle_current降至空转水平,转速进给保持正常", + description="刀具未接触工件,spindle_load跌至空载区间(4-12%),spindle_current降至空转水平,转速进给保持正常", category="tool", default_duration=180.0, tags=["刀具", "空切", "工况切换", "负载"], point_faults=[ PointFaultConfig(point="spindle_load", mode=FaultMode.INSTANT, - target_value=8.0, noise_scale=2.0), + target_min=4.0, target_max=12.0, noise_scale=2.0), PointFaultConfig(point="spindle_current", mode=FaultMode.INSTANT, - target_value=2.5, noise_scale=0.3), + target_min=2.0, target_max=3.5, noise_scale=0.3), ], ), @@ -356,6 +402,13 @@ def inject(self, device: Any, request: FaultInjectRequest) -> FaultInfo: except (TypeError, ValueError): baseline[pf.point] = 0.0 + # 对有范围定义的测点,注入时随机采样一个实际目标值 + # 使每次注入的故障严重程度有所不同,模拟真实场景的个体差异 + resolved_targets: dict[str, float] = {} + for pf in fault_type.point_faults: + if pf.target_min is not None and pf.target_max is not None: + resolved_targets[pf.point] = random.uniform(pf.target_min, pf.target_max) + fault = ActiveFault( fault_id=uuid.uuid4().hex[:12], device_id=device.id, @@ -365,10 +418,11 @@ def inject(self, device: Any, request: FaultInjectRequest) -> FaultInfo: duration=duration, started_at=time.time(), baseline_values=baseline, + resolved_targets=resolved_targets, ) self._active[device.id] = fault - logger.info("Fault injected: device=%s type=%s duration=%.0fs", - device.id, fault_type.id, duration) + logger.info("Fault injected: device=%s type=%s duration=%.0fs resolved_targets=%s", + device.id, fault_type.id, duration, resolved_targets) return self._to_info(fault, fault_type) def apply(self, device: Any) -> None: @@ -400,13 +454,14 @@ def apply(self, device: Any) -> None: baseline = fault.baseline_values.get(pf.point, 0.0) if baseline == 0.0: # 基线为0说明注入时设备处于换刀/停机状态 - # target_value 模式可以直接执行(如崩刃归零、空切归空载) + # target_value / resolved_targets 模式可以直接执行 # multiplier 模式跳过,避免在零基线上产生无意义的值 - if pf.target_value is None: + if pf.target_value is None and pf.point not in fault.resolved_targets: continue + resolved_target = fault.resolved_targets.get(pf.point) device._point_values[pf.point] = self._compute_value( - pf, baseline, progress, fault.intensity + pf, baseline, progress, fault.intensity, resolved_target ) def clear(self, device_id: str) -> bool: @@ -451,20 +506,26 @@ def _compute_value( baseline: float, progress: float, intensity: float, + resolved_target: Optional[float] = None, ) -> float: - """根据故障配置和当前进度计算覆盖值""" + """根据故障配置和当前进度计算覆盖值。 + + 目标值优先级:resolved_target(注入时随机采样)> target_value(固定值)> multiplier + """ + # 确定本次注入的实际目标值 + effective_target: Optional[float] = resolved_target if resolved_target is not None else pf.target_value + if pf.mode == FaultMode.INSTANT: - # 瞬间模式:直接用目标值,不随时间变化 - if pf.target_value is not None: - target = pf.target_value + if effective_target is not None: + target = effective_target elif pf.multiplier is not None: target = baseline * (1.0 + (pf.multiplier - 1.0) * intensity) else: target = baseline else: # 渐进模式:随 progress 线性劣化 - if pf.target_value is not None: - target = baseline + (pf.target_value - baseline) * progress * intensity + if effective_target is not None: + target = baseline + (effective_target - baseline) * progress * intensity elif pf.multiplier is not None: target = baseline * (1.0 + (pf.multiplier - 1.0) * progress * intensity) else: diff --git a/protoforge/models/fault.py b/protoforge/models/fault.py index cc038e0..9928332 100644 --- a/protoforge/models/fault.py +++ b/protoforge/models/fault.py @@ -25,6 +25,11 @@ class PointFaultConfig(BaseModel): target_value: Optional[float] = None multiplier: Optional[float] = None # 异常值 = 当前正常值 × multiplier + # 目标值范围:注入时在 [target_min, target_max] 内随机采样一个实际目标值 + # 设置后会覆盖 target_value,使每次注入的故障严重程度有所不同 + target_min: Optional[float] = None + target_max: Optional[float] = None + # GRADUAL 模式:从当前值线性劣化到 target_value 或 multiplier 倍 # 劣化程度 = progress(0~1) × (target - baseline) noise_scale: float = 0.0 # 叠加随机噪声幅度,模拟真实抖动 @@ -59,7 +64,8 @@ class ActiveFault(BaseModel): duration: float = 120.0 started_at: float = 0.0 cleared_at: Optional[float] = None - baseline_values: dict[str, float] = Field(default_factory=dict) # 注入时的正常基线值 + baseline_values: dict[str, float] = Field(default_factory=dict) # 注入时的正常基线值 + resolved_targets: dict[str, float] = Field(default_factory=dict) # 注入时随机采样的实际目标值 class FaultInfo(BaseModel): From 5a91ce18b3dadda432d8b651149f83e3d73c5239 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=BC=A0=E5=B0=91=E6=99=BA?= Date: Wed, 3 Jun 2026 20:15:22 +0800 Subject: [PATCH 37/55] fix --- protoforge/core/fault.py | 23 +++++++++++++++-------- protoforge/models/fault.py | 2 ++ 2 files changed, 17 insertions(+), 8 deletions(-) diff --git a/protoforge/core/fault.py b/protoforge/core/fault.py index 857bf90..a8c3129 100644 --- a/protoforge/core/fault.py +++ b/protoforge/core/fault.py @@ -126,7 +126,8 @@ PointFaultConfig(point="spindle_current", mode=FaultMode.GRADUAL, target_min=30.0, target_max=38.0, noise_scale=1.5), PointFaultConfig(point="spindle_speed", mode=FaultMode.GRADUAL, - target_min=1200, target_max=1600, noise_scale=40.0), + target_min=1200, target_max=1600, noise_scale=40.0, + nominal_baseline=2000.0), ], ), @@ -148,7 +149,8 @@ PointFaultConfig(point="spindle_current", mode=FaultMode.GRADUAL, target_min=21.0, target_max=27.0, noise_scale=1.2), PointFaultConfig(point="spindle_speed", mode=FaultMode.GRADUAL, - target_min=2400, target_max=2900, noise_scale=50.0), + target_min=2400, target_max=2900, noise_scale=50.0, + nominal_baseline=4000.0), ], ), @@ -170,7 +172,8 @@ PointFaultConfig(point="spindle_current", mode=FaultMode.GRADUAL, target_min=13.0, target_max=17.0, noise_scale=0.8), PointFaultConfig(point="spindle_speed", mode=FaultMode.GRADUAL, - target_min=3600, target_max=4200, noise_scale=60.0), + target_min=3600, target_max=4200, noise_scale=60.0, + nominal_baseline=6000.0), ], ), @@ -515,21 +518,25 @@ def _compute_value( # 确定本次注入的实际目标值 effective_target: Optional[float] = resolved_target if resolved_target is not None else pf.target_value + # 如果配置了额定基线,使用它替代注入时采样的瞬时值 + # 避免在升/降速等非稳态阶段注入时,基线偏低导致渐进目标反而高于基线(转速"上升"bug) + effective_baseline = pf.nominal_baseline if pf.nominal_baseline is not None else baseline + if pf.mode == FaultMode.INSTANT: if effective_target is not None: target = effective_target elif pf.multiplier is not None: - target = baseline * (1.0 + (pf.multiplier - 1.0) * intensity) + target = effective_baseline * (1.0 + (pf.multiplier - 1.0) * intensity) else: - target = baseline + target = effective_baseline else: # 渐进模式:随 progress 线性劣化 if effective_target is not None: - target = baseline + (effective_target - baseline) * progress * intensity + target = effective_baseline + (effective_target - effective_baseline) * progress * intensity elif pf.multiplier is not None: - target = baseline * (1.0 + (pf.multiplier - 1.0) * progress * intensity) + target = effective_baseline * (1.0 + (pf.multiplier - 1.0) * progress * intensity) else: - target = baseline + target = effective_baseline # 叠加随机噪声,模拟真实信号抖动 if pf.noise_scale > 0: diff --git a/protoforge/models/fault.py b/protoforge/models/fault.py index 9928332..61eb0e4 100644 --- a/protoforge/models/fault.py +++ b/protoforge/models/fault.py @@ -33,6 +33,8 @@ class PointFaultConfig(BaseModel): # GRADUAL 模式:从当前值线性劣化到 target_value 或 multiplier 倍 # 劣化程度 = progress(0~1) × (target - baseline) noise_scale: float = 0.0 # 叠加随机噪声幅度,模拟真实抖动 + nominal_baseline: Optional[float] = None # 稳态额定基线,设置后替代注入时采样的瞬时值 + # 用于周期性信号(如主轴转速)避免在升/降速段注入时基线失真 class FaultTypeDefinition(BaseModel): From 1d083cb6e5dc3e0da95081a80d5ce9c13d35765f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=BC=A0=E5=B0=91=E6=99=BA?= Date: Wed, 3 Jun 2026 20:43:56 +0800 Subject: [PATCH 38/55] fix --- protoforge/core/fault.py | 63 ++++++++++++++++++++++++++++++++++------ 1 file changed, 54 insertions(+), 9 deletions(-) diff --git a/protoforge/core/fault.py b/protoforge/core/fault.py index a8c3129..86aeb1d 100644 --- a/protoforge/core/fault.py +++ b/protoforge/core/fault.py @@ -178,24 +178,69 @@ ), # ------------------------------------------------------------------ - # 电源波动 — 供电不稳定 - # 特征:主轴转速和进给速率出现随机波动,电流不稳定 - # 模式:瞬间注入(持续期间持续抖动) + # 电源波动(粗铣)— fanuc-cnc + # 主轴~2000RPM,进给~800mm/min + # 转速噪声 ±200 RPM(±10%),进给噪声 ±80 mm/min(±10%),电流噪声 ±3A # ------------------------------------------------------------------ FaultTypeDefinition( - id="power_fluctuation", - name="电源波动", - description="供电电压不稳定,主轴转速和进给速率出现随机波动", + id="power_fluctuation_rough", + name="电源波动(粗铣)", + description="粗铣工位供电电压不稳定,主轴转速出现随机波动(±200RPM),进给速率抖动(±80mm/min),电流不稳定", category="electrical", default_duration=90.0, - tags=["电源", "波动", "突发"], + tags=["电源", "波动", "突发", "粗铣"], + point_faults=[ + PointFaultConfig(point="spindle_speed", mode=FaultMode.INSTANT, + multiplier=1.0, noise_scale=200.0), + PointFaultConfig(point="spindle_current", mode=FaultMode.INSTANT, + multiplier=1.0, noise_scale=3.0), + PointFaultConfig(point="feed_rate", mode=FaultMode.INSTANT, + multiplier=1.0, noise_scale=80.0), + ], + ), + + # ------------------------------------------------------------------ + # 电源波动(半精铣)— fanuc-cnc-semi-finish + # 主轴~4000RPM,进给~300mm/min + # 转速噪声 ±300 RPM(±7.5%),进给噪声 ±25 mm/min(±8%),电流噪声 ±2A + # ------------------------------------------------------------------ + FaultTypeDefinition( + id="power_fluctuation_semi", + name="电源波动(半精铣)", + description="半精铣工位供电电压不稳定,主轴转速出现随机波动(±300RPM),进给速率抖动(±25mm/min),电流不稳定", + category="electrical", + default_duration=90.0, + tags=["电源", "波动", "突发", "半精铣"], point_faults=[ PointFaultConfig(point="spindle_speed", mode=FaultMode.INSTANT, multiplier=1.0, noise_scale=300.0), PointFaultConfig(point="spindle_current", mode=FaultMode.INSTANT, - multiplier=1.0, noise_scale=5.0), + multiplier=1.0, noise_scale=2.0), + PointFaultConfig(point="feed_rate", mode=FaultMode.INSTANT, + multiplier=1.0, noise_scale=25.0), + ], + ), + + # ------------------------------------------------------------------ + # 电源波动(精铣)— fanuc-cnc-finish + # 主轴~6000RPM,进给~300mm/min + # 转速噪声 ±450 RPM(±7.5%),进给噪声 ±25 mm/min(±8%),电流噪声 ±1.2A + # 精铣对稳定性要求高,波动对加工质量影响更敏感 + # ------------------------------------------------------------------ + FaultTypeDefinition( + id="power_fluctuation_finish", + name="电源波动(精铣)", + description="精铣工位供电电压不稳定,主轴转速出现随机波动(±450RPM),进给速率抖动(±25mm/min),电流不稳定;精铣对稳定性要求高,波动易导致表面质量下降", + category="electrical", + default_duration=90.0, + tags=["电源", "波动", "突发", "精铣"], + point_faults=[ + PointFaultConfig(point="spindle_speed", mode=FaultMode.INSTANT, + multiplier=1.0, noise_scale=450.0), + PointFaultConfig(point="spindle_current", mode=FaultMode.INSTANT, + multiplier=1.0, noise_scale=1.2), PointFaultConfig(point="feed_rate", mode=FaultMode.INSTANT, - multiplier=1.0, noise_scale=150.0), + multiplier=1.0, noise_scale=25.0), ], ), From 30ad880be00d40453451af5fa4a622db5693d511 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=BC=A0=E5=B0=91=E6=99=BA?= Date: Wed, 3 Jun 2026 21:10:31 +0800 Subject: [PATCH 39/55] fix --- protoforge/core/fault.py | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/protoforge/core/fault.py b/protoforge/core/fault.py index 86aeb1d..cdbcfe9 100644 --- a/protoforge/core/fault.py +++ b/protoforge/core/fault.py @@ -191,11 +191,11 @@ tags=["电源", "波动", "突发", "粗铣"], point_faults=[ PointFaultConfig(point="spindle_speed", mode=FaultMode.INSTANT, - multiplier=1.0, noise_scale=200.0), + multiplier=1.0, noise_scale=200.0, nominal_baseline=2000.0), PointFaultConfig(point="spindle_current", mode=FaultMode.INSTANT, multiplier=1.0, noise_scale=3.0), PointFaultConfig(point="feed_rate", mode=FaultMode.INSTANT, - multiplier=1.0, noise_scale=80.0), + multiplier=1.0, noise_scale=80.0, nominal_baseline=800.0), ], ), @@ -213,11 +213,11 @@ tags=["电源", "波动", "突发", "半精铣"], point_faults=[ PointFaultConfig(point="spindle_speed", mode=FaultMode.INSTANT, - multiplier=1.0, noise_scale=300.0), + multiplier=1.0, noise_scale=300.0, nominal_baseline=4000.0), PointFaultConfig(point="spindle_current", mode=FaultMode.INSTANT, multiplier=1.0, noise_scale=2.0), PointFaultConfig(point="feed_rate", mode=FaultMode.INSTANT, - multiplier=1.0, noise_scale=25.0), + multiplier=1.0, noise_scale=25.0, nominal_baseline=300.0), ], ), @@ -236,11 +236,11 @@ tags=["电源", "波动", "突发", "精铣"], point_faults=[ PointFaultConfig(point="spindle_speed", mode=FaultMode.INSTANT, - multiplier=1.0, noise_scale=450.0), + multiplier=1.0, noise_scale=450.0, nominal_baseline=6000.0), PointFaultConfig(point="spindle_current", mode=FaultMode.INSTANT, multiplier=1.0, noise_scale=1.2), PointFaultConfig(point="feed_rate", mode=FaultMode.INSTANT, - multiplier=1.0, noise_scale=25.0), + multiplier=1.0, noise_scale=25.0, nominal_baseline=300.0), ], ), @@ -500,10 +500,11 @@ def apply(self, device: Any) -> None: if pf.point not in device._point_values: continue baseline = fault.baseline_values.get(pf.point, 0.0) - if baseline == 0.0: + if baseline == 0.0 and pf.nominal_baseline is None: # 基线为0说明注入时设备处于换刀/停机状态 # target_value / resolved_targets 模式可以直接执行 # multiplier 模式跳过,避免在零基线上产生无意义的值 + # 例外:配置了 nominal_baseline 时使用额定值,不跳过 if pf.target_value is None and pf.point not in fault.resolved_targets: continue From 8ecfb1168aa1ff9c722550322c8e8d6c0395fa0c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=BC=A0=E5=B0=91=E6=99=BA?= Date: Wed, 3 Jun 2026 21:50:03 +0800 Subject: [PATCH 40/55] fix --- protoforge/core/fault.py | 9 ++++++--- protoforge/models/fault.py | 4 +++- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/protoforge/core/fault.py b/protoforge/core/fault.py index cdbcfe9..5edfcfc 100644 --- a/protoforge/core/fault.py +++ b/protoforge/core/fault.py @@ -259,9 +259,9 @@ tags=["刀具", "磨损", "负载", "趋势漂移"], point_faults=[ PointFaultConfig(point="spindle_load", mode=FaultMode.GRADUAL, - multiplier=1.8, noise_scale=3.0), + multiplier=1.8, noise_ratio=0.05), PointFaultConfig(point="spindle_current", mode=FaultMode.GRADUAL, - multiplier=1.7, noise_scale=1.5), + multiplier=1.7, noise_ratio=0.05), ], ), @@ -585,7 +585,10 @@ def _compute_value( target = effective_baseline # 叠加随机噪声,模拟真实信号抖动 - if pf.noise_scale > 0: + # noise_ratio > 0 时按 effective_baseline 比例计算噪声幅度,否则使用绝对值 noise_scale + if pf.noise_ratio > 0: + target += random.gauss(0, pf.noise_ratio * effective_baseline * intensity) + elif pf.noise_scale > 0: target += random.gauss(0, pf.noise_scale * intensity) return round(max(0.0, target), 4) diff --git a/protoforge/models/fault.py b/protoforge/models/fault.py index 61eb0e4..5c69ee3 100644 --- a/protoforge/models/fault.py +++ b/protoforge/models/fault.py @@ -32,7 +32,9 @@ class PointFaultConfig(BaseModel): # GRADUAL 模式:从当前值线性劣化到 target_value 或 multiplier 倍 # 劣化程度 = progress(0~1) × (target - baseline) - noise_scale: float = 0.0 # 叠加随机噪声幅度,模拟真实抖动 + noise_scale: float = 0.0 # 叠加随机噪声幅度(绝对值),模拟真实抖动 + noise_ratio: float = 0.0 # 叠加随机噪声幅度(相对 effective_baseline 的比例) + # 与 noise_scale 互斥,noise_ratio > 0 时优先使用 nominal_baseline: Optional[float] = None # 稳态额定基线,设置后替代注入时采样的瞬时值 # 用于周期性信号(如主轴转速)避免在升/降速段注入时基线失真 From 2d23121ae05e27429390edb583ce1e73bf21cfcd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=BC=A0=E5=B0=91=E6=99=BA?= Date: Thu, 4 Jun 2026 09:03:22 +0800 Subject: [PATCH 41/55] fix --- protoforge/core/fault.py | 57 +++++++++++++++++++++++++++++++++++----- 1 file changed, 51 insertions(+), 6 deletions(-) diff --git a/protoforge/core/fault.py b/protoforge/core/fault.py index 5edfcfc..5013c4e 100644 --- a/protoforge/core/fault.py +++ b/protoforge/core/fault.py @@ -250,18 +250,63 @@ # 场景:刀具从轻度磨损到需要换刀的完整过程 # 模式:渐进式,持续时间长 # ------------------------------------------------------------------ + # ------------------------------------------------------------------ + # 刀具磨损加剧(粗铣) + # 切削段基线:spindle_load~54%, spindle_current~20A + # 目标:load×1.8→97%, current×1.7→34A + # ------------------------------------------------------------------ + FaultTypeDefinition( + id="tool_wear_progressive_rough", + name="刀具磨损加剧(粗铣)", + description="粗铣刀具磨损导致切削阻力持续增大,spindle_load渐进爬升至1.8倍(~97%),spindle_current升至1.7倍(~34A)", + category="tool", + default_duration=600.0, + tags=["刀具", "磨损", "负载", "趋势漂移", "粗铣"], + point_faults=[ + PointFaultConfig(point="spindle_load", mode=FaultMode.GRADUAL, + multiplier=1.8, noise_ratio=0.05, nominal_baseline=54.0), + PointFaultConfig(point="spindle_current", mode=FaultMode.GRADUAL, + multiplier=1.7, noise_ratio=0.05, nominal_baseline=20.0), + ], + ), + + # ------------------------------------------------------------------ + # 刀具磨损加剧(半精铣) + # 切削段基线:spindle_load~33%, spindle_current~13.5A + # 目标:load×1.8→59%, current×1.7→23A + # ------------------------------------------------------------------ + FaultTypeDefinition( + id="tool_wear_progressive_semi", + name="刀具磨损加剧(半精铣)", + description="半精铣刀具磨损导致切削阻力持续增大,spindle_load渐进爬升至1.8倍(~59%),spindle_current升至1.7倍(~23A)", + category="tool", + default_duration=600.0, + tags=["刀具", "磨损", "负载", "趋势漂移", "半精铣"], + point_faults=[ + PointFaultConfig(point="spindle_load", mode=FaultMode.GRADUAL, + multiplier=1.8, noise_ratio=0.05, nominal_baseline=33.0), + PointFaultConfig(point="spindle_current", mode=FaultMode.GRADUAL, + multiplier=1.7, noise_ratio=0.05, nominal_baseline=13.5), + ], + ), + + # ------------------------------------------------------------------ + # 刀具磨损加剧(精铣) + # 切削段基线:spindle_load~22%, spindle_current~8.8A + # 目标:load×1.8→40%, current×1.7→15A + # ------------------------------------------------------------------ FaultTypeDefinition( - id="tool_wear_progressive", - name="刀具磨损加剧", - description="刀具磨损导致切削阻力持续增大,spindle_load基线缓慢爬升至1.8倍,spindle_current同步升高;进给速度由G代码控制不受影响", + id="tool_wear_progressive_finish", + name="刀具磨损加剧(精铣)", + description="精铣刀具磨损导致切削阻力持续增大,spindle_load渐进爬升至1.8倍(~40%),spindle_current升至1.7倍(~15A);精铣对负载变化敏感,易影响表面质量", category="tool", default_duration=600.0, - tags=["刀具", "磨损", "负载", "趋势漂移"], + tags=["刀具", "磨损", "负载", "趋势漂移", "精铣"], point_faults=[ PointFaultConfig(point="spindle_load", mode=FaultMode.GRADUAL, - multiplier=1.8, noise_ratio=0.05), + multiplier=1.8, noise_ratio=0.05, nominal_baseline=22.0), PointFaultConfig(point="spindle_current", mode=FaultMode.GRADUAL, - multiplier=1.7, noise_ratio=0.05), + multiplier=1.7, noise_ratio=0.05, nominal_baseline=8.8), ], ), From 47dca19a2895a985ad53d73eb26a9bcdd2e7b2ea Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=BC=A0=E5=B0=91=E6=99=BA?= Date: Thu, 4 Jun 2026 09:23:11 +0800 Subject: [PATCH 42/55] fix --- protoforge/core/fault.py | 89 +++++++++++++++++++++++++++++++++++----- 1 file changed, 79 insertions(+), 10 deletions(-) diff --git a/protoforge/core/fault.py b/protoforge/core/fault.py index 5013c4e..7febb4b 100644 --- a/protoforge/core/fault.py +++ b/protoforge/core/fault.py @@ -311,23 +311,77 @@ ), # ------------------------------------------------------------------ - # 刀具崩刃 — 主轴负载突发脉冲 - # 特征:spindle_load 瞬间冲高(可超120%,FANUC最大输出200%),进给停止,CNC停主轴 - # 场景:刀具突发性失效,机床触发过载报警并停机 - # 模式:瞬间注入,持续时间极短 + # 刀具崩刃(粗铣)— fanuc-cnc + # 正常切削基线:spindle_load~56%, spindle_current~21A + # 崩刃特征:load 瞬间冲高至 160~185%(FANUC 最大输出200%), + # current 冲至 75~90A,转速/进给归零,触发过载报警 + # 使用绝对目标值(target_min/max),避免注入时恰好处于低电流阶段 + # 导致 multiplier × 低基线 < 正常切削峰值的问题 # ------------------------------------------------------------------ FaultTypeDefinition( - id="tool_breakage_sudden", - name="刀具崩刃", - description="刀具突发性崩刃,spindle_load瞬间冲高至正常值3.2倍(可超120%,FANUC最大输出200%),进给停止,CNC触发过载报警并停主轴", + id="tool_breakage_rough", + name="刀具崩刃(粗铣)", + description="粗铣刀具突发性崩刃,spindle_load瞬间冲高至160~185%,spindle_current冲至75~90A,进给停止,CNC触发过载报警并停主轴", category="tool", default_duration=10.0, - tags=["刀具", "崩刃", "突发", "过载"], + tags=["刀具", "崩刃", "突发", "过载", "粗铣"], point_faults=[ PointFaultConfig(point="spindle_load", mode=FaultMode.INSTANT, - multiplier=3.2, noise_scale=8.0), + target_min=160.0, target_max=185.0, noise_scale=8.0), PointFaultConfig(point="spindle_current", mode=FaultMode.INSTANT, - multiplier=4.0, noise_scale=3.0), + target_min=75.0, target_max=90.0, noise_scale=3.0), + PointFaultConfig(point="spindle_speed", mode=FaultMode.INSTANT, + target_value=0.0, noise_scale=0.0), + PointFaultConfig(point="feed_rate", mode=FaultMode.INSTANT, + target_value=0.0, noise_scale=0.0), + PointFaultConfig(point="alarm_status", mode=FaultMode.INSTANT, + target_value=1.0, noise_scale=0.0), + ], + ), + + # ------------------------------------------------------------------ + # 刀具崩刃(半精铣)— fanuc-cnc-semi-finish + # 正常切削基线:spindle_load~38%, spindle_current~14.5A + # 崩刃特征:load 瞬间冲高至 120~145%,current 冲至 52~64A + # ------------------------------------------------------------------ + FaultTypeDefinition( + id="tool_breakage_semi", + name="刀具崩刃(半精铣)", + description="半精铣刀具突发性崩刃,spindle_load瞬间冲高至120~145%,spindle_current冲至52~64A,进给停止,CNC触发过载报警并停主轴", + category="tool", + default_duration=10.0, + tags=["刀具", "崩刃", "突发", "过载", "半精铣"], + point_faults=[ + PointFaultConfig(point="spindle_load", mode=FaultMode.INSTANT, + target_min=120.0, target_max=145.0, noise_scale=6.0), + PointFaultConfig(point="spindle_current", mode=FaultMode.INSTANT, + target_min=52.0, target_max=64.0, noise_scale=2.5), + PointFaultConfig(point="spindle_speed", mode=FaultMode.INSTANT, + target_value=0.0, noise_scale=0.0), + PointFaultConfig(point="feed_rate", mode=FaultMode.INSTANT, + target_value=0.0, noise_scale=0.0), + PointFaultConfig(point="alarm_status", mode=FaultMode.INSTANT, + target_value=1.0, noise_scale=0.0), + ], + ), + + # ------------------------------------------------------------------ + # 刀具崩刃(精铣)— fanuc-cnc-finish + # 正常切削基线:spindle_load~22%, spindle_current~8.5A + # 崩刃特征:load 瞬间冲高至 70~90%,current 冲至 30~40A + # ------------------------------------------------------------------ + FaultTypeDefinition( + id="tool_breakage_finish", + name="刀具崩刃(精铣)", + description="精铣刀具突发性崩刃,spindle_load瞬间冲高至70~90%,spindle_current冲至30~40A,进给停止,CNC触发过载报警并停主轴", + category="tool", + default_duration=10.0, + tags=["刀具", "崩刃", "突发", "过载", "精铣"], + point_faults=[ + PointFaultConfig(point="spindle_load", mode=FaultMode.INSTANT, + target_min=70.0, target_max=90.0, noise_scale=4.0), + PointFaultConfig(point="spindle_current", mode=FaultMode.INSTANT, + target_min=30.0, target_max=40.0, noise_scale=1.5), PointFaultConfig(point="spindle_speed", mode=FaultMode.INSTANT, target_value=0.0, noise_scale=0.0), PointFaultConfig(point="feed_rate", mode=FaultMode.INSTANT, @@ -544,6 +598,21 @@ def apply(self, device: Any) -> None: for pf in fault_type.point_faults: if pf.point not in device._point_values: continue + # INSTANT + multiplier 模式:每 tick 取设备当前值作为动态基线。 + # 这样程序运行中电流/负载自然变化时,故障倍数始终基于实时水位, + # 避免注入时恰好在低峰导致 multiplier × 旧低基线 < 正常高峰的问题。 + # resolved_targets(绝对值)和 target_value 模式不受影响,保持原逻辑。 + if (pf.mode == FaultMode.INSTANT + and pf.multiplier is not None + and pf.target_value is None + and pf.point not in fault.resolved_targets + and pf.nominal_baseline is None): + live_val = device._point_values.get(pf.point) + if live_val is not None: + try: + fault.baseline_values[pf.point] = float(live_val) + except (TypeError, ValueError): + pass baseline = fault.baseline_values.get(pf.point, 0.0) if baseline == 0.0 and pf.nominal_baseline is None: # 基线为0说明注入时设备处于换刀/停机状态 From 45ba03319cdb4a465e522ed5fe0487e2b39e2f9a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=BC=A0=E5=B0=91=E6=99=BA?= Date: Tue, 9 Jun 2026 08:54:56 +0800 Subject: [PATCH 43/55] fix --- ai/ai.md | 680 +++++++ ai/predict.py | 97 - ai/predict_v2.py | 571 ------ ai/predict_v3_single_scene.py | 1487 --------------- ai/pridict_v4.py | 1604 ----------------- ai/pridict_v5.py | 148 +- protoforge/core/cnc_metric_generator.py | 661 +++++++ protoforge/core/engine.py | 5 + protoforge/core/generator.py | 15 + protoforge/core/metrics.py | 8 +- protoforge/core/simulators.py | 33 + protoforge/models/device.py | 1 + .../protocols/mtconnect/lathe_simulator.py | 421 +++++ .../templates/mtconnect/lathe_machine.json | 245 ++- tests/test_cnc_metric_generator.py | 372 ++++ 15 files changed, 2576 insertions(+), 3772 deletions(-) create mode 100644 ai/ai.md delete mode 100755 ai/predict.py delete mode 100755 ai/predict_v2.py delete mode 100755 ai/predict_v3_single_scene.py delete mode 100644 ai/pridict_v4.py create mode 100644 protoforge/core/cnc_metric_generator.py create mode 100644 protoforge/core/simulators.py create mode 100644 protoforge/protocols/mtconnect/lathe_simulator.py create mode 100644 tests/test_cnc_metric_generator.py diff --git a/ai/ai.md b/ai/ai.md new file mode 100644 index 0000000..d1a1e16 --- /dev/null +++ b/ai/ai.md @@ -0,0 +1,680 @@ +# ProtoForge 预测算法优化方案 + +## 1. 背景与现状 + +### 1.1 当前算法(pridict_v5.py)核心能力 + +- 支持三个 CNC 工位:粗铣(fanuc-cnc)、半精铣(fanuc-cnc-semi-finish)、精铣(fanuc-cnc-finish) +- 监控指标:feed_rate / spindle_speed / spindle_current / spindle_load / vibration_x/y/z +- 预测策略:phase_point(稳定信号点预测)、phase_band(波动信号带预测) +- phase-lock 机制:对齐模板与实时信号的相位偏移 +- 模板学习:健康状态下 EMA 渐进更新,异常状态冻结模板 +- 异常检测:基于超出预测带的比例、连续超出秒数、最大超出倍率三指标 + +### 1.2 仿真模拟器的变化 + +仿真器故障类型从早期通用故障(tool_wear、tool_breakage、spindle_overheat) +**升级为按工位细分的 15+ 种故障类型**,主要变化如下: + +| 类型 | 旧故障 | 新故障(现在) | +|------|--------|--------------| +| 刀具磨损 | tool_wear(通用) | tool_wear_progressive_rough/semi/finish | +| 刀具崩刃 | tool_breakage(通用) | tool_breakage_rough/semi/finish | +| 主轴过热 | spindle_overheat(通用) | spindle_overheat_rough/semi/finish | +| 进给堵转 | 无 | feed_stall_rough/semi/finish | +| 电源波动 | 无 | power_fluctuation_rough/semi/finish | +| 其他 | 无 | tool_overload_protection、air_cutting、built_up_edge、coating_spalling、tool_offset_error | + +**故障参数已与工位量程精确对齐**(如粗铣 spindle_load 崩刃冲至 160~185%,精铣仅 70~90%), +旧算法的硬编码覆盖参数(_SIMULATION_STRATEGY_OVERRIDES)仍可用,但需要与新故障类型匹配更新。 + +### 1.3 需要解决的核心问题 + +1. **新故障场景覆盖不足**:积屑瘤(周期性突刺)、空切(负载跳低)、涂层剥落(阶跃跳变)、换刀装夹偏移(均值永久偏移)等新故障模式,当前算法缺乏针对性检测策略。 +2. **真实场景适配欠缺**:渐进式趋势漂移(磨损爬升)用 phase_band 检测不够灵敏;阶跃型故障(崩刃、堵转)用模板对比延迟较高。 +3. **换刀事件无感知**:换刀后正常值域发生整体变化,旧算法会错误告警,需要识别换刀切换并重置模板。 +4. **多指标联动未利用**:各指标独立预测,未利用 load↑+current↑+feed_rate↓ 等联动关系强化检测精度。 + +--- + +## 2. 优化目标 + +1. 覆盖仿真器现有全部 15+ 种故障类型,每种故障至少触发一项指标的异常告警 +2. 区分五类故障模式:**突发冲高**、**渐进漂移**、**周期突刺**、**阶跃跳变**、**均值偏移** +3. 支持真实场景下的**换刀感知**,自动重置受影响指标的健康模板 +4. 引入**多指标联动置信度**,降低单指标误报率 +5. 保持对仿真环境和真实生产环境的双重适用性 + +--- + +## 3. 优化方案 + +### 3.1 检测层扩展:五种故障模式检测器 + +在当前基于 phase_band 的超带检测基础上,增加以下检测维度: + +#### 3.1.1 趋势漂移检测(针对 tool_wear_progressive_* ) +- 在长时间窗口(5~10 分钟)上对 spindle_load、spindle_current 拟合线性趋势 +- 斜率超过阈值(如 load > 0.05%/s,current > 0.02A/s)持续 60s 以上,触发告警 +- 与 phase_band 超带并列运行,两者任一触发即告警 +- **参数**:`trend_window_seconds`(默认 300s)、`trend_slope_threshold`(按工位设定) + +#### 3.1.2 周期性突刺检测(针对 built_up_edge) +- 在短时间窗口(30s)内检测超出 phase_band 上边界的瞬时尖峰数量 +- 尖峰定义:持续不超过 5s、幅度超出上界 20% 以上的脉冲 +- 单窗口内尖峰次数 ≥ 3 次触发告警(区别于持续超带的磨损/堵转) +- **参数**:`spike_window_seconds`、`spike_amp_ratio`、`spike_min_count` + +#### 3.1.3 阶跃检测(针对 coating_spalling、tool_breakage_*) +- 计算相邻 10s 均值之差,超过 IQR 的 1.5 倍视为阶跃 +- 崩刃:阶跃幅度极大(超出正常值 2 倍以上)且随后归零 → 触发 breakage 告警 +- 涂层剥落:阶跃后在新均值稳定 30s 以上 → 触发 coating_spalling 告警 +- **参数**:`step_iqr_multiplier`、`step_stable_seconds` + +#### 3.1.4 均值偏移检测(针对 air_cutting、tool_offset_error) +- 检测最近 60s 的均值与健康模板均值的偏差 +- 空切:load/current 均值跌至模板均值的 30% 以下,且 feed_rate 保持正常 → 空切告警 +- 装夹偏移:load/current 均值持续偏高(1.3~1.6 倍)但不随时间爬升 → 偏移告警 +- **参数**:`air_cut_load_ratio`(默认 0.3)、`offset_load_min_ratio`、`offset_stable_seconds` + +#### 3.1.5 快速冲高检测(针对 feed_stall_*、tool_breakage_*) +- 在 DETECT_WINDOW_SECONDS(30s)内,若超带比例 > 0.8 且值域中位数超出上界,立即触发 +- 相比原有 outside_ratio(0.6)更激进,专用于突发故障的快速响应 +- 与原有检测并行,使用 `fast_alert` 标志与标准告警区分 + +### 3.2 纯监控数据换刀感知方案 + +#### 3.2.1 方案背景与可行性评估 + +在没有 CNC 直接信号(T code / M06)接入的情况下,通过监控数据推断换刀事件是一种工程上可行的兜底方案,但有明确的局限性,需要与使用方对齐预期。 + +**可行性结论:中等可行,在仿真环境中高度可行,在真实环境中需要配合约束条件。** + +| 维度 | 评估 | 说明 | +|------|------|------| +| 仿真环境准确率 | 高(>90%) | 仿真器信号无噪声干扰,停机特征清晰 | +| 真实环境准确率 | 中(70~85%) | 受节拍变化、程序暂停、对刀等操作干扰 | +| 漏检率 | 低(~5%) | 换刀必然经过停机,主特征不易丢失 | +| 误报率 | 中(10~20%) | 程序暂停、换工件等也有类似信号 | +| 响应延迟 | 20~60s | 需要等待换刀后稳定切削才能确认 | + +**核心挑战**:换刀停机与以下事件的监控信号高度相似,是误报的主要来源: +- 程序暂停(M00/M01) +- 加工完成等待下料 +- 急停或报警停机 +- 主轴定向(换工件时的主轴控制) + +#### 3.2.2 换刀事件的监控信号特征 + +换刀过程在时序上分为四个阶段,每个阶段在监控指标上有不同特征: + +``` +阶段一:切削结束 阶段二:停机换刀 阶段三:重启暖机 阶段四:新刀切削 +───────────────── → ───────────────── → ───────────────── → ───────────────── +feed_rate: 正常 feed_rate ≈ 0 feed_rate = 0 feed_rate: 恢复 +spindle_speed: 正常 spindle_speed ↓→0 spindle_speed: 缓升 spindle_speed: 目标值 +spindle_load: 正常结束 spindle_load ≈ 0 spindle_load: 极低 spindle_load: 新基线 +spindle_current: 正常 spindle_current: 极低 spindle_current: 低 spindle_current: 新基线 +alarm_status: 0 alarm_status: 0 alarm_status: 0 alarm_status: 0 +``` + +**与相似事件的关键区分特征**: + +| 事件 | 停机前 load | 停机持续时长 | alarm_status | 重启后 load | +|------|------------|-------------|-------------|------------| +| **换刀** | 正常结束值(无冲高) | 15~60s | 0 | 可能偏离旧基线 ±15%+ | +| 程序暂停 | 正常结束值 | 不确定(秒级~分钟级) | 0 | 与旧基线一致 | +| 崩刃停机 | 瞬间冲高(>2倍)后归零 | 短(<15s,触发报警) | **1** | 0(主轴被迫停止) | +| 加工完成等待 | 正常结束值 | 较长(>60s) | 0 | 与旧基线一致 | +| 急停/报警 | 不确定 | 不确定 | **1** | 需人工处理 | + +#### 3.2.3 换刀检测算法:三阶段判决 + +换刀检测采用三阶段状态机,避免单一条件误判: + +**阶段一:停机候选检测(Idle Candidate)** + +触发条件(同时满足): +``` +spindle_speed < spindle_speed_idle_threshold(各工位空载转速阈值的 10%) + 粗铣: < 200 RPM, 半精铣: < 400 RPM, 精铣: < 600 RPM +AND feed_rate < 10 mm/min(接近零) +AND alarm_status == 0(无报警,排除崩刃/急停) +AND 停机前 spindle_load 无冲高(最近 10s 内 load 峰值 < 模板上界 × 1.3) +AND 持续时长 ≥ idle_min_seconds(默认 8s,避免切削间隙误触发) +``` + +进入 `IDLE_CANDIDATE` 状态,记录停机开始时间戳。 + +**阶段二:停机类型分类(Idle Classification)** + +在 `IDLE_CANDIDATE` 状态下,持续观察停机时长: + +``` +停机时长 < idle_min_seconds(8s) → 切削间隙,忽略,返回正常 +停机时长 8s ~ tool_change_max_seconds → 进入 POSSIBLE_TOOL_CHANGE 候选 +停机时长 > tool_change_max_seconds → 长时待机(下料/换工件),记录待机开始, + 待机结束重启后重走阶段一 +``` + +`tool_change_max_seconds` 建议值:粗铣 90s,半精铣 60s,精铣 60s。 +(换刀物理动作:ATC 换刀 10~30s,手动换刀 30~60s,超出则大概率是其他等待) + +**阶段三:换刀确认(Tool Change Confirmation)** + +主轴重启后,等待 1~2 个完整切削周期,进行基线偏移确认: + +``` +新切削均值 vs 旧模板均值: + |新均值 - 旧均值| / 旧均值 > baseline_shift_threshold(默认 0.12,即 12%) + → 确认换刀,触发模板重置流程 + + |新均值 - 旧均值| / 旧均值 ≤ 0.12 + → 同型号刀具更换或程序恢复,不重置模板,返回 healthy 状态 +``` + +这一步是关键判决:**程序暂停恢复后新旧基线一致,换刀后新旧基线大概率偏移**。 +同型号换刀(如定期刀具寿命更换)偏移较小(旧刀磨损→新刀约下降 5~15%),依然可以触发。 + +#### 3.2.4 检测到换刀后的模板处理策略 + +不立即清空旧模板,采用"快速替换"策略: + +**步骤 1:进入 `TOOL_CHANGE` 状态** +- 暂停所有指标的异常检测输出(避免换刀后初始切削产生大量误报) +- 保留旧模板,不删除 + +**步骤 2:新模板预热采集** +- 等待 `tool_change_stable_cycles`(默认 2 个完整切削周期)的数据 +- 这段时间内用旧模板做参考,不告警,但持续采集数据 + +**步骤 3:模板切换** +- 用预热期数据直接重建新模板(build_current_baseline) +- 新模板建好后,切换回 `healthy` 状态 +- EMA alpha 在换刀后前 5 个周期内升高至 0.3(加速收敛),之后恢复正常 0.1 + +**步骤 4:换刀事件上报** +- 向 Webhook 推送换刀事件(时间戳、工位、旧基线值、新基线值、确认置信度) +- 供 MES/SCADA 层决定是否重置刀具寿命计数器 + +``` +换刀事件 payload: +{ + "event": "tool_change_detected", + "device_id": "fanuc-cnc", + "timestamp": 1717600000, + "idle_duration_seconds": 32, + "old_baseline": {"spindle_load": 54.2, "spindle_current": 20.1}, + "new_baseline": {"spindle_load": 47.8, "spindle_current": 18.5}, + "baseline_shift_ratio": 0.118, + "confidence": 0.82 +} +``` + +#### 3.2.5 置信度评分 + +每次换刀检测附带置信度(0~1),综合以下因素: + +| 因素 | 加分条件 | 加分 | +|------|---------|------| +| 停机时长合理 | 15~45s(ATC 换刀典型范围) | +0.3 | +| 停机前无报警 | alarm_status 全程为 0 | +0.2 | +| 基线偏移显著 | shift_ratio > 0.15 | +0.2 | +| 基线偏移方向合理 | 新刀 load 低于旧刀(换新刀减摩擦) | +0.1 | +| 停机前无冲高 | 排除崩刃后停机 | +0.2 | + +置信度 < 0.5:记录日志,不触发模板重置,仅标记为"疑似换刀" +置信度 0.5~0.7:触发模板重置,Webhook 标注 low confidence +置信度 > 0.7:正常换刀流程 + +#### 3.2.6 局限性与边界条件 + +**已知不可处理的情况**: +1. **同型号换刀 + 新刀基线与旧磨损刀差异 < 12%**:无法触发基线偏移确认,换刀被当程序恢复处理。实际影响:不重置模板,但旧磨损刀的高基线 EMA 会在后续正常切削中自然收敛回新刀水平(约 10~15 个周期)。 +2. **手动操作导致停机时长超过 90s**:被分类为长时待机,换刀重启后会正常走基线偏移检测,只是延迟了确认。 +3. **连续多次换刀(<5 分钟内)**:第二次换刀在第一次模板预热期内发生,需要重置预热计时器并重新采集。设计上支持,但需要测试。 +4. **报警后换刀**:alarm_status=1 期间的停机被过滤掉不识别为换刀,需要 alarm 清除后才重新进入检测流程。 + +**仿真环境的特殊说明**: +仿真器当前无 alarm_status 的主动置位逻辑(崩刃故障有 alarm_status=1),换刀模拟需要通过手动停止/启动设备实现,信号特征与真实换刀一致,算法可直接适用。 + +### 3.3 多指标联动置信度 + +不改变各指标的独立预测逻辑,在输出层增加联动评分: + +**联动规则(基于仿真故障模式)**: + +| 联动组合 | 判断 | 置信度加成 | +|---------|------|----------| +| load 异常 + current 异常 | 真实负载问题(非噪声) | +0.3 | +| load 上升 + feed_rate 下降 | 过载保护或磨损 | +0.2 | +| feed_rate=0 + load 上升 | 进给堵转 | +0.4 | +| 所有指标归零 | 崩刃/停机 | +0.4 | +| load 下降 + current 下降 + feed_rate 正常 | 空切 | +0.3 | + +**输出**:新增 `composite_anomaly_confidence` 指标(0~1),供告警聚合系统使用。 + +### 3.4 仿真策略覆盖表更新 + +当前 `_SIMULATION_STRATEGY_OVERRIDES` 仅覆盖工位级别的策略选择, +需要针对新故障类型补充以下调整: + +| 工位/指标 | 新增覆盖原因 | +|----------|------------| +| 粗铣 spindle_load | 崩刃冲高至 160~185%,现有 band_pad_abs=6.0 偏小,建议检测层增加快速冲高检测 | +| 三工位 spindle_load | tool_wear_progressive 斜率慢,需要补充趋势检测 | +| 三工位 spindle_load | air_cutting 跌至 4~12%,需要均值偏移检测 | +| 三工位 all | tool_offset_error 换刀后整体偏移,需要换刀感知 | + +### 3.5 真实场景扩展 + +除仿真环境外,真实 CNC 场景还需考虑: + +1. **数据质量**:真实采集存在缺包、抖动、时间戳乱序 + - 方案:`normalize_history` 已做插值,追加 `outlier_clip`(3σ 剔除离群点) + +2. **工况段切割**:真实机床有待机段(load≈0)和切削段,需要识别并只对切削段建模 + - 方案:在 `infer_metric_profile` 中增加工况分段,仅用活跃段数据建模(已有 p10 过滤基础,强化) + +3. **周期变化**:真实刀路程序可能含多段不同周期,FFT 只取主周期,辅周期被忽略 + - 方案:对检测到多峰 FFT 的场景,构建 multi-period 模板(可选增强) + +4. **采集频率差异**:仿真器 1s/点,真实设备可能 100ms~10s 不等 + - 方案:`QUERY_STEP` 参数化,自动适配采集频率,确保插值后密度一致 + +--- + +## 4. 实现计划 + +按优先级排序: + +### P0(核心,与新故障直接相关) +1. **趋势漂移检测器**:补充 `detect_trend` 函数,在 `detect_anomaly` 中并行运行 +2. **阶跃检测器**:补充 `detect_step_change`,覆盖崩刃和涂层剥落 +3. **快速冲高检测**:降低堵转/崩刃的响应延迟 +4. **换刀感知状态机**:三阶段判决(停机候选→类型分类→基线偏移确认),新增 `IDLE_CANDIDATE`/`POSSIBLE_TOOL_CHANGE`/`TOOL_CHANGE` 状态 + +### P1(提升精度) +5. **均值偏移检测**:覆盖空切和装夹偏移 +6. **周期突刺检测**:覆盖积屑瘤 +7. **仿真策略覆盖表更新**:与新故障类型对齐 + +### P2(真实场景适配) +8. **多指标联动置信度**:聚合告警输出 +9. **离群点剔除**:提升真实采集数据鲁棒性 +10. **工况分段强化**:精确识别切削段 vs 待机段 + +--- + +## 5. 文件结构规划 + +当前预测代码已重构为模块化目录 `ai/predictor/`,建议在此基础上增加: + +``` +ai/predictor/ +├── anomaly.py # 现有:超带异常检测(phase_band/phase_point) +├── trend.py # 新增:趋势漂移检测 +├── step.py # 新增:阶跃/均值偏移检测 +├── spike.py # 新增:周期突刺检测 +├── composite.py # 新增:多指标联动置信度 +├── tool_change.py # 新增:换刀感知 +├── phase_lock.py # 现有:相位锁定 +├── template.py # 现有:模板构建与预测 +├── signal.py # 现有:信号预处理 +├── profiling.py # 现有:指标特征推断 +├── discovery.py # 现有:设备/指标发现 +├── state.py # 现有:状态机(需扩展 tool_change 状态) +├── storage.py # 现有:状态持久化 +├── models.py # 现有:数据模型 +├── config.py # 现有:配置参数 +└── service.py # 现有:主循环 +``` + +--- + +## 6. 关键参数(初始建议值,待验证) + +| 参数 | 默认值 | 说明 | +|------|--------|------| +| `trend_window_seconds` | 300 | 趋势检测时间窗口 | +| `trend_slope_threshold_load` | 0.04 %/s | load 趋势斜率告警阈值 | +| `trend_slope_threshold_current` | 0.015 A/s | current 趋势斜率告警阈值 | +| `step_iqr_multiplier` | 1.5 | 阶跃检测 IQR 倍数 | +| `step_stable_seconds` | 30 | 涂层剥落:阶跃后稳定确认时间 | +| `spike_window_seconds` | 30 | 突刺检测窗口 | +| `spike_amp_ratio` | 0.2 | 突刺幅度(相对于上界超出比例) | +| `spike_min_count` | 3 | 窗口内最小突刺次数 | +| `air_cut_load_ratio` | 0.3 | 空切:load 相对于模板均值的比例下限 | +| `tool_change_idle_min_seconds` | 8 | 停机候选最小持续时长(排除切削间隙) | +| `tool_change_max_seconds_rough` | 90 | 粗铣换刀最大停机时长(超出视为长时待机) | +| `tool_change_max_seconds_semi` | 60 | 半精铣换刀最大停机时长 | +| `tool_change_max_seconds_finish` | 60 | 精铣换刀最大停机时长 | +| `tool_change_baseline_shift_threshold` | 0.12 | 基线偏移确认阈值(12%) | +| `tool_change_stable_cycles` | 2 | 换刀后预热采集周期数 | +| `tool_change_fast_ema_alpha` | 0.3 | 换刀后前 5 周期的快速 EMA alpha | +| `tool_change_confidence_threshold` | 0.5 | 换刀确认最低置信度 | + +--- + +## 7. 风险与约束 + +1. **趋势检测误报**:正常切削段 load 也有周期性起伏,需确保趋势窗口足够长(> 3 个完整周期) +2. **换刀感知误识别**:程序暂停(M00/M01)与换刀停机信号高度相似,三阶段判决通过"停机时长 + 重启后基线偏移"联合判断降低误报;置信度 < 0.5 时不触发模板重置 +3. **多指标联动延迟**:需要所有指标数据对齐同一时刻,异步拉取可能引入 1~2s 偏差 +4. **模块化重构兼容性**:pridict_v5.py 的逻辑正在迁移至 `ai/predictor/` 目录,方案实现应基于新目录结构 + +--- + +## 8. 真实场景可行性分析 + +### 8.1 方案隐含假设与真实环境的差距 + +当前方案建立在四个隐含假设上,在真实场景中这些假设并不总是成立,是可行性折扣的主要来源: + +**假设一:信号是干净的周期性信号** + +仿真器产生的是理想波形。真实 CNC 的 spindle_load 受多种非故障因素叠加干扰: + +- 切削材料硬度的批次差异(同一程序每次 load 都略有不同,波动 ±5~10%) +- 刀路中的局部变速段(倒角、孔位、转角减速导致 load 瞬时跳变) +- 主轴电机温度漂移(长班次后基线自然上浮 3~8%,会被趋势检测误报为磨损) +- 冷却液状态切换(开/关冷却液时 spindle_current 有 0.5~1.5A 跳变) +- 振动信号在真实环境受机床安装基础、夹具刚性、相邻机床干扰 + +**影响**:phase_band 的 band_pad_abs 在真实场景需要更宽(约 1.5~2x 仿真值),灵敏度相应下降。趋势检测的斜率阈值需要上调以抵抗温度漂移。 + +**假设二:换刀停机时长在 8~90s 之间** + +真实工厂差异极大,当前参数覆盖不全: + +| 换刀方式 | 典型时长 | 与当前参数的关系 | +|---------|---------|---------------| +| 全自动 ATC 刀库 | 5~15s | 可能触发不到 idle_min_seconds=8s 下限,**漏检** | +| 半自动(刀臂+人工紧固) | 20~45s | 参数范围内,可识别 | +| 手动换刀(小型车间) | 45~120s | 超过 tool_change_max_seconds=90s,分类为长时待机 | +| 换刀+对刀仪测量 | 60~180s | 同上,停机过长被误分类 | + +**处置建议**:idle_min_seconds 应可配置(建议范围 3~8s),并提供工厂现场标定工具。 + +**假设三:换刀后基线偏移 ≥ 12%** + +以下场景基线偏移不足,导致换刀漏确认: + +- 同规格刀片批量更换(新刀 vs 轻度磨损旧刀):偏移仅 3~8%,低于 12% 阈值 +- 加工中心多工序换刀中,每段切削工况不同,load 基线本身波动就大于 12%,导致阈值失去意义 + +**假设四:程序周期稳定** + +真实加工程序一个 NC 文件可能包含多段异构工序(外轮廓→钻孔→铰孔→精铣内腔),每段的 load 特征完全不同,FFT 周期估计会退化。这是对 phase_band 建模的根本性挑战。 + +--- + +### 8.2 各检测器真实可行性评分 + +| 检测器 | 仿真可行性 | 真实可行性 | 主要障碍 | 建议处置 | +|--------|-----------|-----------|---------|---------| +| phase_band 超带 | 高 | 中 | 真实噪声宽,带宽需要放大,灵敏度下降 | 增加工厂标定流程,按实测数据调 band_pad_abs | +| 趋势漂移检测 | 高 | 中高 | 电机温漂会产生真实斜率,干扰磨损检测 | 增加温漂补偿(以班次为单位做基准修正) | +| 阶跃检测(崩刃) | 高 | 高 | 崩刃信号极强,真实中同样清晰可辨 | 可直接使用,崩刃幅度远大于噪声 | +| 均值偏移(空切) | 高 | 高 | 空切 load 跌幅明显,误报少 | 需配合 feed_rate 保持正常来排除停机 | +| 周期突刺(积屑瘤) | 中 | 低中 | 真实信号噪声大,突刺难与高频噪声区分 | 需要更长确认窗口,或依赖振动信号辅助 | +| 换刀感知 | 高 | 中低 | ATC 停机可能 <8s,多种停机事件混淆 | 见 8.3 节 | + +**整体评估**:在真实单工序铣床上,方案核心功能(崩刃/堵转/空切检测)可行性高,趋势漂移和换刀感知在未经标定时准确率中等,需要现场调参周期(建议 2~4 周)。 + +--- + +## 9. 机型适用性分析 + +### 9.1 适用性全景 + +| 机型 | 适用性 | 核心原因 | +|------|--------|---------| +| 立式/卧式铣床(单工序) | **高** | 信号周期稳定,方案建模原型,仿真器对应此类 | +| 车床(普通车削) | **中** | 周期结构相似,但转速随直径变化,换刀停机更短 | +| 钻床/镗床 | **中** | 周期清晰,但 load 特征与铣削不同,参数需重新标定 | +| 立式加工中心(VMC) | **低中** | 多工序换刀频繁,单一模板假设失效,需架构改造 | +| 卧式加工中心(HMC) | **低** | 同上,且托盘交换导致更多停机事件干扰 | +| 五轴加工中心 | **不适用** | 主轴随姿态变化,load 规律性弱,纯监控数据不足 | +| 车削中心(Turn-Mill) | **低** | 车铣复合,工况切换频繁,模型无法统一 | + +--- + +### 9.2 单工序铣床(方案最适用场景) + +这是当前仿真器建模的原型,也是方案设计的基准场景。 + +**信号特征**: +- 一个工件对应一个或少量固定刀路程序,主轴 load 周期稳定 +- 换刀频率低(按刀具寿命,数小时到数天一次) +- 换刀后运行的程序与之前相同,新旧刀具的 load 差异来源于磨损变化 + +**方案适配程度**:完全适用,仿真器已充分覆盖此类场景的故障模式。 + +--- + +### 9.3 加工中心(VMC/HMC)—— 与方案差异最大 + +加工中心和铣床最本质的区别是**一个程序内连续自动换多把刀**: + +``` +装夹工件 → [刀T01钻孔] → ATC换刀(5~15s) → [刀T02铣面] → ATC换刀 → [刀T03铰孔] → ... + load~45% load~65% load~20% +``` + +这对当前方案产生以下结构性冲击: + +**问题一:周期结构崩坏** + +整个加工程序由多段异构切削组成,每段 load/current 特征不同。FFT 无法找到代表全程的稳定周期,phase_lock 机制的前提失效。 + +实际观测到的"周期"是整个程序的节拍(从装夹到卸料),但程序内部各工序的 load 差异极大,单一模板无法描述。 + +**问题二:换刀极频繁且停机极短** + +ATC 自动换刀时间通常 5~15s,远低于当前 idle_min_seconds=8s 的下限,导致大部分换刀事件被过滤掉。即使降低阈值到 3s,也无法有效区分 ATC 换刀与切削间隙(如钻孔退刀后的主轴短暂减速)。 + +**问题三:多工序 load 跳变被误检** + +刀T01(钻孔,load~45%)→ ATC换刀 → 刀T02(铣面,load~65%),换刀后 load 上升约 44%,远超 baseline_shift_threshold=12%,会被正确"识别"为换刀——但同时,后续每次换刀到更高 load 工序都会触发,造成模板频繁重置,无法稳定建模。 + +反过来,load 从铣面→铰孔 下降 70%,会被阶跃检测误报为"崩刃停机"。 + +**加工中心需要的架构**: + +纯监控数据在加工中心场景无法可靠工作。正确方案是**按刀号分段建模**: + +``` +每个 T 号 → 独立的 phase_band 模板 + T01 模板:钻孔段 load 均值 + 波动范围 + T02 模板:铣面段 load 均值 + 波动范围 + ... +``` + +这需要 CNC 提供 T code 信号(FOCAS/OPC-UA 均可读取)。没有 T code,纯监控数据方案在加工中心上只能做粗粒度检测(如全局崩刃、主轴过热),无法做刀具磨损级别的精细检测。 + +--- + +### 9.4 车床 + +车床信号与铣床的主要差异: + +1. **恒线速度(CSS)控制**:加工锥面/端面时,主轴转速随工件直径实时变化(spindle_speed 是连续变化曲线,不是常数),当前 phase_band 对 spindle_speed 的建模假设失效 +2. **换刀停机更短**:刀台旋转换刀通常 2~5s,低于 idle_min_seconds=8s,漏检率高 +3. **load 波形特征**:车削 load 曲线相对平稳,周期性特征不如铣削明显,FFT 估计周期的精度下降 + +**适配建议**: +- spindle_speed 指标在车床上不适合用 phase_band,改为范围监控(检查是否在编程转速范围内) +- idle_min_seconds 调低至 3s +- load/current 检测仍可用,是车床上最有价值的监控指标 + +--- + +### 9.5 其他机床类型适用性分析 + +以下九类机床在主轴特征、切削力表现形式、换刀方式上与铣床有本质差异,逐一评估。 + +#### 9.5.1 钻床 + +**切削特征**:轴向压入式,load 曲线呈"空载→切入尖峰→稳态钻削→退刀归零"的固定形态,周期性来自重复钻孔节拍而非刀路轨迹,形态与铣削不同。 + +**换刀特征**:换刀频率低(一把钻头加工数十~数百孔),换刀停机通常 30~120s,参数范围内可识别。 + +**指标适用性**: +- spindle_load / spindle_current:适用,阶跃检测可捕捉钻头折断(折断时 load 瞬冲后归零) +- phase_band:对"重复相同孔位"的批量加工可以建模,但每孔内部的斜坡形 load 需要适当放宽 band_pad_abs +- 趋势检测:适用于钻头磨损(稳态钻削段 load 缓慢爬升) + +**综合评分**:中(需调参,核心检测功能可用) + +--- + +#### 9.5.2 镗床 + +**切削特征**:内孔旋转切削,load 曲线形态与铣削接近(旋转稳态),但切削深度小、切削力弱,spindle_load 信号幅度低(5~20%),信噪比差。 + +**关键问题**:高精度镗削对 load 波动极敏感,正常切削条件变化(材料硬度、进给微调)都会引起 ±3~5% 的 load 抖动,band_pad_abs 过大则漏报,过小则误报。需要比铣床更精细的参数标定。 + +**指标适用性**: +- phase_band:适用,但 band_pad_abs 需缩窄(建议为铣床配置的 50~70%) +- 趋势检测:适用于镗刀磨损 +- 阶跃检测:适用于刀片崩刃 + +**综合评分**:中(参数敏感,需精细标定) + +--- + +#### 9.5.3 磨床 + +**切削特征**:这是与方案假设差异最大的机型。磨削无"刀具"概念(砂轮),主轴转速极高(1000~6000 RPM),切削深度极浅,spindle_load 长期处于低位稳态(5~15%),信号几乎无周期性波动。 + +**核心矛盾**:phase_band 的建模前提是信号有可学习的周期形态,磨削信号接近直流,FFT 无法找到有意义的周期,模板建模机制失效。 + +磨床的健康状态退化表现为**功率/电流的长周期缓慢上升**(砂轮磨钝→切削阻力增大),这正是趋势检测的目标场景。 + +**指标适用性**: +- phase_band / phase_lock:**不适用**,信号无周期性 +- 趋势检测(detect_trend):**高度适用**,是磨床监控的核心手段 +- 阶跃检测:适用于砂轮崩碎(load 瞬间冲高后停机) +- 换刀感知:磨床换砂轮停机时间较长(修整+安装 5~30 分钟),需要大幅调整参数 + +**综合评分**:低中(phase_band 核心机制不适用,趋势检测单独有价值) + +--- + +#### 9.5.4 齿轮加工机床(滚齿 / 插齿 / 磨齿) + +**切削特征**:连续展成运动,spindle_load 在物理上有极高频率的每齿切入脉冲(脉冲宽度毫秒级,频率 = 主轴转速 × 齿数,可达数十~数百 Hz)。 + +**采样率瓶颈**:当前监控采集频率 1s/点,完全无法捕捉每齿脉冲,只能观测到时间平均 load。在 1s 采样分辨率下,齿轮加工机床的信号行为退化为"类铣床"的低频形态,失去了齿轮加工特有的诊断信息。 + +**在 1s 采样率下的有效范围**: +- 粗粒度故障(刀具整体磨损导致均值爬升、断刀后 load 归零):适用 +- 精细故障(单齿崩刃、齿距误差):**需要 100ms 以下采样率才能检测,当前方案无法覆盖** + +**综合评分**:低中(粗粒度检测可用,精细齿形故障超出当前采样能力范围) + +--- + +#### 9.5.5 螺纹加工机床(丝锥攻丝 / 螺纹铣) + +**切削特征**:丝锥攻丝的 load 曲线呈线性爬升(随攻入深度增加)后反转退出(力矩反向),单孔周期 2~15s。最典型故障是**丝锥折断**:折断时 load 瞬间冲高(1.5~3 倍正常值),随后归零——与崩刃阶跃模型完全吻合。 + +**指标适用性**: +- 阶跃检测(断丝锥):**高度适用**,信号特征极清晰 +- phase_band:需要适配"线性斜坡→反转"的非平台式 load 曲线,band 需要随相位动态变化,直接套用当前模板会误报攻丝退刀段 +- 趋势检测:适用于丝锥磨损(稳态攻丝段 load 峰值缓慢爬升) + +**综合评分**:中(断丝锥检测直接适用;phase_band 需要适配非平台 load 形态) + +--- + +#### 9.5.6 刨床 / 插床 + +**切削特征**:往复直线运动(刀具直线进给,工件横向步进),主轴概念不存在或仅指进给驱动。核心监控量是进给轴电机电流/力矩,而非旋转主轴的 spindle_load / spindle_current。 + +**核心矛盾**:当前方案所有指标定义(spindle_speed、spindle_load、spindle_current)均基于旋转主轴,在刨床/插床上物理意义不成立或为空值。 + +**综合评分**:**不适用**(监控指标体系与当前方案不匹配,需重新定义适合直线驱动的指标) + +--- + +#### 9.5.7 拉床 + +**切削特征**:单次直线行程切削,拉刀逐步增加齿高,load 在整个行程中呈**单调线性增大**(前刀切入→后刀逐步加深),行程结束后快速归零。单次行程 5~30s。 + +**指标适用性**: +- phase_band:不适用,load 是斜坡形而非平台形,无法用恒定均值模板描述 +- 趋势检测(跨多个工件):**适用**,拉刀磨损表现为行程内 load 斜率逐渐增大 +- 阶跃检测:适用于拉刀崩齿(局部 load 突刺) + +**综合评分**:低中(趋势检测和阶跃检测单独有价值,phase_band 核心机制不适用) + +--- + +#### 9.5.8 锯床(带锯 / 圆锯) + +**切削特征**:连续稳态切削,spindle_load 在正常切削中几乎是常数(取决于材料和进给速度),周期性极弱。信号接近直流,与磨床相似。 + +**主要故障特征**: +- 锯条/锯片断裂:load 瞬间冲高后归零,与崩刃阶跃完全相同 +- 锯条磨钝:load 缓慢爬升,趋势检测适用 +- 进给速度过快(过载):load 持续偏高,均值偏移检测适用 + +**指标适用性**: +- 阶跃检测(锯条断裂):**高度适用** +- 趋势检测(锯条磨钝):**适用** +- phase_band:信号太平稳,建不出有周期性的模板,意义不大 + +**综合评分**:中(阶跃+趋势检测有价值,phase_band 不适用) + +--- + +#### 9.5.9 电加工机床(EDM 放电加工 / 线切割) + +**切削特征**:无机械切削力,通过电火花放电蚀除材料,主轴不旋转。监控的核心量是**放电脉冲频率、放电间隙电压、峰值电流**,与 spindle_load / spindle_current 的物理含义完全不同(spindle_current 在电加工机床上即使有采集,代表的是伺服轴驱动电流而非切削负载)。 + +**核心矛盾**:方案所有检测逻辑围绕"机械切削力在主轴上的体现"设计,电加工机床不存在这个物理过程。 + +**综合评分**:**完全不适用**(需要针对放电参数重新设计监控体系) + +--- + +### 9.6 全机型适用性汇总 + +| 机型 | 适用性 | 可用的检测模块 | 不适用的模块 | 主要障碍 | +|------|--------|-------------|------------|---------| +| 立式/卧式铣床 | **高** | 全部 | — | 方案设计原型 | +| 钻床 | **中** | 阶跃、趋势、phase_band | — | load 形态为斜坡而非平台,需调参 | +| 镗床 | **中** | phase_band、趋势、阶跃 | — | 信号幅度弱,参数敏感 | +| 车床 | **中** | load/current 全部检测器 | spindle_speed 建模 | CSS 控制下转速非常数 | +| 螺纹加工机床 | **中** | 阶跃(断丝锥)、趋势 | phase_band | load 为斜坡+反转 | +| 齿轮加工机床 | **低中** | 趋势(粗粒度)、阶跃 | 每齿精细检测 | 采样率不足,1s 无法捕捉每齿脉冲 | +| 磨床 | **低中** | 趋势检测 | phase_band、phase_lock | 信号无周期性 | +| 锯床 | **中** | 阶跃(断锯条)、趋势 | phase_band | 信号接近直流 | +| 拉床 | **低中** | 趋势(跨工件)、阶跃 | phase_band | load 为单次斜坡 | +| 加工中心(VMC/HMC) | **低中** | 全局崩刃/过热 | 刀具磨损精细检测 | 多工序换刀,单模板失效 | +| 刨床/插床 | **不适用** | — | 全部 | 指标体系不匹配 | +| 电加工机床 | **不适用** | — | 全部 | 物理过程完全不同 | +| 五轴加工中心 | **不适用** | — | 全部 | 姿态变化导致 load 规律性消失 | + +**结论**:方案的 phase_band + phase_lock 核心机制依赖"信号具有可重复的周期性形态",这一前提在铣床、钻床、镗床、车床上成立,在磨床、锯床、拉床上不成立,在刨床/电加工机床上完全不适用。趋势检测和阶跃检测的适用范围更广,在大多数有旋转主轴的机床上都能提供基础价值。 + +### 9.7 方案定位建议 + +基于以上分析,建议明确方案的适用范围声明: + +**当前方案版本(v13/v14)定位**: +- 最适合:**单工序立式铣床、卧式铣床**(与仿真器一致) +- 部分适用:**普通车床**(需调参),**钻镗床**(需重标定) +- 不建议直接用于:**加工中心**(需按刀号分段建模的架构升级),**五轴机床** + +**加工中心适配路线**(如需扩展): + +- 短期:仅做全局级检测(主轴过热、崩刃、过载),放弃刀具磨损级别检测 +- 中期:接入 T code 信号,实现按刀号分模板建模,恢复完整检测能力 +- 长期:引入程序段识别(通过 NC 代码解析预知各段工况),实现自适应建模 + +--- + +_最后更新:2026-06-07(补充 9.5~9.6:钻床/镗床/磨床/齿轮机床等九类机型适用性分析及汇总表)_ diff --git a/ai/predict.py b/ai/predict.py deleted file mode 100755 index b70f822..0000000 --- a/ai/predict.py +++ /dev/null @@ -1,97 +0,0 @@ -# -*- coding: utf-8 -*- - -import requests -import numpy as np -from datetime import datetime, timedelta - -VM_URL = "http://localhost:8428" -DEVICE_ID = "fanuc-cnc" -METRIC = f'feed_rate{{device_id="{DEVICE_ID}"}}' - -def fetch_history(minutes=30): - """从VM拉取历史数据""" - end = datetime.now() - start = end - timedelta(minutes=minutes) - resp = requests.get(f"{VM_URL}/api/v1/query_range", params={ - "query": METRIC, - "start": start.timestamp(), - "end": end.timestamp(), - "step": "1s", - }) - result = resp.json()["data"]["result"] - if not result: - return [], [] - values = result[0]["values"] - ts = [float(v[0]) for v in values] - ys = [float(v[1]) for v in values] - return ts, ys - -def predict_next(ts, ys, horizon=60): - """ - 用FFT检测主频,拟合正弦波,外推未来horizon秒 - 适合周期性信号 - """ - if len(ys) < 60: - return [], [] - - ys = np.array(ys) - n = len(ys) - dt = 1.0 # 1秒采样 - - # FFT找主频 - fft = np.fft.rfft(ys - ys.mean()) - freqs = np.fft.rfftfreq(n, d=dt) - dominant_idx = np.argmax(np.abs(fft[1:])) + 1 - dominant_freq = freqs[dominant_idx] - period = 1.0 / dominant_freq if dominant_freq > 0 else 60 - - # 拟合:y = A*sin(2π/T * t + φ) + offset - from scipy.optimize import curve_fit - t_rel = np.arange(n, dtype=float) - offset = ys.mean() - amplitude = (ys.max() - ys.min()) / 2 - - def sine_model(t, A, T, phi, C): - return A * np.sin(2 * np.pi / T * t + phi) + C - - try: - popt, _ = curve_fit( - sine_model, t_rel, ys, - p0=[amplitude, period, 0, offset], - maxfev=5000 - ) - # 外推 - t_future = np.arange(n, n + horizon, dtype=float) - y_pred = sine_model(t_future, *popt) - ts_future = [ts[-1] + i + 1 for i in range(horizon)] - return ts_future, y_pred.tolist() - except Exception: - # 拟合失败降级为线性 - slope = (ys[-1] - ys[-10]) / 10 - ts_future = [ts[-1] + i + 1 for i in range(horizon)] - y_pred = [ys[-1] + slope * (i + 1) for i in range(horizon)] - return ts_future, y_pred - -def write_predictions(ts_future, y_pred, metric_name="protoforge_feed_rate_predicted"): - """写回VictoriaMetrics""" - lines = [] - for t, y in zip(ts_future, y_pred): - ts_ms = int(t * 1000) - lines.append(f'{metric_name}{{device_id="{DEVICE_ID}"}} {y:.2f} {ts_ms}') - payload = "\n".join(lines) - requests.post(f"{VM_URL}/api/v1/import/prometheus", data=payload) - -def run_once(): - ts, ys = fetch_history(minutes=30) - if len(ys) < 60: - print("数据不足") - return - ts_future, y_pred = predict_next(ts, ys, horizon=120) - write_predictions(ts_future, y_pred) - print(f"写入 {len(y_pred)} 个预测点,预测到 +{len(y_pred)}s") - -if __name__ == "__main__": - import time - while True: - run_once() - time.sleep(30) # 每30秒重新预测一次 diff --git a/ai/predict_v2.py b/ai/predict_v2.py deleted file mode 100755 index 933a34f..0000000 --- a/ai/predict_v2.py +++ /dev/null @@ -1,571 +0,0 @@ -# -*- coding: utf-8 -*- -""" -ProtoForge 预测服务 v5 - -修复点: -1. 不再使用“单正弦拟合”作为主预测算法。 -2. 主算法改为:周期模板预测(同相位历史值加权平均)。 -3. 周期估计使用 FFT 粗估 + 自相关细化,比单纯 FFT 更稳。 -4. 若可用完整周期不足,则降级为多谐波回归(而不是单正弦)。 -5. 每轮只写入未来 min(HORIZON_SECONDS, POLL_INTERVAL) 秒,避免预测窗口重叠。 -6. 不删除旧预测历史,避免历史预测消失。 -""" - -import logging -import math -import re -import time -from datetime import datetime, timedelta -from typing import Dict, List, Tuple - -import numpy as np -import requests - -logging.basicConfig( - level=logging.INFO, - format="%(asctime)s [%(levelname)s] %(message)s", -) -logger = logging.getLogger(__name__) - -# ── 配置 ────────────────────────────────────────────────────────────────────── - -VM_URL = "http://localhost:8428" - -PREDICT_TARGETS = [ - ('feed_rate{device_id="fanuc-cnc"}', "feed_rate_predicted"), - ('spindle_speed{device_id="fanuc-cnc"}', "spindle_speed_predicted"), - ('spindle_current{device_id="fanuc-cnc"}', "spindle_current_predicted"), - ('vibration_x{device_id="fanuc-cnc"}', "vibration_x_predicted"), - ('vibration_y{device_id="fanuc-cnc"}', "vibration_y_predicted"), - ('vibration_z{device_id="fanuc-cnc"}', "vibration_z_predicted"), -] - -HISTORY_MINUTES = 30 -HORIZON_SECONDS = 120 -POLL_INTERVAL = 30 -WRITE_HORIZON_SECONDS = min(HORIZON_SECONDS, POLL_INTERVAL) -MIN_POINTS = 120 -QUERY_STEP = "1s" - -# 至少要有多少个完整周期,才使用“周期模板预测” -MIN_FULL_CYCLES_FOR_TEMPLATE = 3 -MAX_CYCLES_FOR_TEMPLATE = 6 - -# 周期范围 -MIN_PERIOD_SECONDS = 5 -MAX_PERIOD_SECONDS = 3600 - -# 多谐波回归最高阶数(降级模式) -MAX_HARMONICS = 4 - -EXTRA_PREDICT_LABELS = { - "forecast": "seasonal_v1", - "source": "protoforge", -} - -# 进程内记录每条预测序列上次写到哪里,避免本进程运行时重复写 -LAST_WRITTEN_UNTIL: Dict[str, int] = {} - -# ───────────────────────────────────────────────────────────────────────────── - - -def fetch_history(query: str, minutes: int = HISTORY_MINUTES) -> Tuple[List[float], List[float]]: - """从 VictoriaMetrics 拉取历史时序数据。""" - now = datetime.now() - start = now - timedelta(minutes=minutes) - - try: - resp = requests.get( - f"{VM_URL}/api/v1/query_range", - params={ - "query": query, - "start": start.timestamp(), - "end": now.timestamp(), - "step": QUERY_STEP, - }, - timeout=10, - ) - resp.raise_for_status() - except requests.RequestException as e: - logger.error("拉取数据失败 query=%s: %s", query, e) - return [], [] - - try: - result = resp.json().get("data", {}).get("result", []) - except Exception as e: - logger.error("解析 VM 返回失败 query=%s: %s", query, e) - return [], [] - - if not result: - return [], [] - - values = result[0].get("values", []) - if not values: - return [], [] - - ts = [] - ys = [] - for item in values: - if len(item) < 2: - continue - try: - t = float(item[0]) - y = float(item[1]) - except Exception: - continue - if not math.isfinite(t) or not math.isfinite(y): - continue - ts.append(t) - ys.append(y) - - return ts, ys - - -def normalize_history(ts: List[float], ys: List[float]) -> Tuple[np.ndarray, np.ndarray]: - """ - 清洗历史数据: - 1. 时间戳整秒化 - 2. 排序 - 3. 同一秒多个点保留最后一个 - 4. 按 1 秒插值补齐 - """ - if not ts or not ys or len(ts) != len(ys): - return np.array([]), np.array([]) - - data = {} - for t, y in zip(ts, ys): - try: - sec = int(round(float(t))) - val = float(y) - except Exception: - continue - if not math.isfinite(sec) or not math.isfinite(val): - continue - data[sec] = val - - if not data: - return np.array([]), np.array([]) - - sorted_items = sorted(data.items(), key=lambda x: x[0]) - ts_clean = np.array([x[0] for x in sorted_items], dtype=float) - ys_clean = np.array([x[1] for x in sorted_items], dtype=float) - - if len(ts_clean) < 2: - return ts_clean, ys_clean - - start_sec = int(ts_clean[0]) - end_sec = int(ts_clean[-1]) - - if end_sec <= start_sec: - return ts_clean, ys_clean - - ts_grid = np.arange(start_sec, end_sec + 1, 1, dtype=float) - ys_grid = np.interp(ts_grid, ts_clean, ys_clean) - - return ts_grid, ys_grid - - -def estimate_period_by_fft(ys_arr: np.ndarray) -> float: - """FFT 粗估周期。""" - n = len(ys_arr) - if n < 8: - return 60.0 - - centered = ys_arr - np.mean(ys_arr) - if np.allclose(centered, 0): - return 60.0 - - fft_vals = np.fft.rfft(centered) - freqs = np.fft.rfftfreq(n, d=1.0) - - if len(freqs) <= 1: - return 60.0 - - power = np.abs(fft_vals[1:]) - if len(power) == 0 or np.max(power) <= 0: - return 60.0 - - dominant_idx = int(np.argmax(power)) + 1 - dominant_freq = float(freqs[dominant_idx]) - if dominant_freq <= 0: - return 60.0 - - period = 1.0 / dominant_freq - return float(np.clip(period, MIN_PERIOD_SECONDS, MAX_PERIOD_SECONDS)) - - -def refine_period_by_autocorr(ys_arr: np.ndarray, init_period: float) -> float: - """ - 用自相关在 init_period 附近细化周期估计。 - """ - n = len(ys_arr) - if n < 20: - return float(np.clip(init_period, MIN_PERIOD_SECONDS, MAX_PERIOD_SECONDS)) - - centered = ys_arr - np.mean(ys_arr) - if np.allclose(centered, 0): - return float(np.clip(init_period, MIN_PERIOD_SECONDS, MAX_PERIOD_SECONDS)) - - corr = np.correlate(centered, centered, mode="full")[n - 1:] - - p0 = int(round(init_period)) - left = max(MIN_PERIOD_SECONDS, int(max(2, p0 * 0.7))) - right = min(n // 2, int(max(left + 1, p0 * 1.3))) - - if right <= left: - return float(np.clip(init_period, MIN_PERIOD_SECONDS, MAX_PERIOD_SECONDS)) - - search = corr[left:right + 1] - if len(search) == 0: - return float(np.clip(init_period, MIN_PERIOD_SECONDS, MAX_PERIOD_SECONDS)) - - best_lag = left + int(np.argmax(search)) - return float(np.clip(best_lag, MIN_PERIOD_SECONDS, MAX_PERIOD_SECONDS)) - - -def estimate_period(ys_arr: np.ndarray) -> float: - """FFT + 自相关 的组合周期估计。""" - p_fft = estimate_period_by_fft(ys_arr) - p_refined = refine_period_by_autocorr(ys_arr, p_fft) - return p_refined - - -def seasonal_template_predict( - ys_arr: np.ndarray, - horizon: int, - period: int, - gap: int = 0, - max_cycles: int = MAX_CYCLES_FOR_TEMPLATE, -) -> List[float]: - """ - 同相位历史值加权平均预测。 - 对未来第 k 个点,取过去多个周期同相位点做加权平均: - y[n-1+gap+k] ≈ avg(y[n-1+gap+k-p], y[n-1+gap+k-2p], ...) - """ - n = len(ys_arr) - preds = [] - - for k in range(1, horizon + 1): - target_idx = (n - 1) + gap + k - - values = [] - weights = [] - - # m=1 表示最近一个周期;m 越大越久远 - for m in range(1, max_cycles + 1): - hist_idx = target_idx - m * period - if 0 <= hist_idx < n: - # 越近权重越大 - w = 1.0 / m - values.append(float(ys_arr[hist_idx])) - weights.append(w) - - if not values: - # 万一拿不到,退化为最后一个值 - preds.append(float(ys_arr[-1])) - else: - preds.append(float(np.average(values, weights=weights))) - - return preds - - -def harmonic_regression_predict( - ys_arr: np.ndarray, - horizon: int, - period: int, - gap: int = 0, - max_harmonics: int = MAX_HARMONICS, -) -> List[float]: - """ - 多谐波回归(降级模式): - y = c + Σ [a_k sin(2πkt/P) + b_k cos(2πkt/P)] - 相比单正弦,更能表达非标准正弦波形。 - """ - n = len(ys_arr) - if n < 10 or period <= 1: - return [float(ys_arr[-1])] * horizon - - # 周期太短时,谐波数不能太大 - K = min(max_harmonics, max(1, period // 4)) - - t = np.arange(n, dtype=float) - cols = [np.ones(n, dtype=float)] - - for k in range(1, K + 1): - angle = 2.0 * np.pi * k * t / period - cols.append(np.sin(angle)) - cols.append(np.cos(angle)) - - X = np.column_stack(cols) - - try: - coef, _, _, _ = np.linalg.lstsq(X, ys_arr, rcond=None) - except Exception: - return [float(ys_arr[-1])] * horizon - - t_future = np.arange(n + gap, n + gap + horizon, dtype=float) - cols_future = [np.ones(horizon, dtype=float)] - - for k in range(1, K + 1): - angle = 2.0 * np.pi * k * t_future / period - cols_future.append(np.sin(angle)) - cols_future.append(np.cos(angle)) - - X_future = np.column_stack(cols_future) - y_pred = X_future @ coef - - return y_pred.astype(float).tolist() - - -def predict_next( - ts: List[float], - ys: List[float], - horizon: int, - base_ts: int, -) -> Tuple[List[float], List[float]]: - """ - 主预测函数: - 1. 周期估计 - 2. 优先使用周期模板预测 - 3. 周期不够时降级为多谐波回归 - """ - ts_grid, ys_grid = normalize_history(ts, ys) - if len(ys_grid) < MIN_POINTS: - return [], [] - - y_min = float(np.min(ys_grid)) - y_max = float(np.max(ys_grid)) - y_range = y_max - y_min - - if y_range <= 1e-9: - base_ts = max(int(base_ts), int(ts_grid[-1])) - ts_future = [base_ts + i + 1 for i in range(horizon)] - y_pred = [float(ys_grid[-1])] * horizon - return ts_future, y_pred - - period_est = estimate_period(ys_grid) - period = int(round(period_est)) - period = max(MIN_PERIOD_SECONDS, min(MAX_PERIOD_SECONDS, period)) - - last_real_ts = int(ts_grid[-1]) - base_ts = max(int(base_ts), last_real_ts) - - # 如果当前时间已经超过最后一个真实点,gap 表示中间“空过去”的秒数 - gap = max(0, base_ts - last_real_ts) - - ts_future = [base_ts + i + 1 for i in range(horizon)] - - full_cycles = len(ys_grid) // period if period > 0 else 0 - - if full_cycles >= MIN_FULL_CYCLES_FOR_TEMPLATE: - y_pred = seasonal_template_predict( - ys_arr=ys_grid, - horizon=horizon, - period=period, - gap=gap, - max_cycles=min(MAX_CYCLES_FOR_TEMPLATE, full_cycles), - ) - model_name = "seasonal_template" - else: - y_pred = harmonic_regression_predict( - ys_arr=ys_grid, - horizon=horizon, - period=period, - gap=gap, - max_harmonics=MAX_HARMONICS, - ) - model_name = "harmonic_regression" - - # 合理裁剪,避免偶然外推过大 - margin = y_range * 0.15 - lower = y_min - margin - upper = y_max + margin - y_pred = np.clip(np.array(y_pred, dtype=float), lower, upper).astype(float).tolist() - - logger.debug( - "predict_next model=%s period=%ss full_cycles=%s gap=%s", - model_name, period, full_cycles, gap - ) - - return ts_future, y_pred - - -def prom_escape_label_value(value: str) -> str: - """Prometheus label value 转义。""" - return ( - str(value) - .replace("\\", "\\\\") - .replace("\n", "\\n") - .replace('"', '\\"') - ) - - -def labels_to_str(labels: Dict[str, str]) -> str: - if not labels: - return "" - parts = [] - for k in sorted(labels.keys()): - v = prom_escape_label_value(labels[k]) - parts.append(f'{k}="{v}"') - return "{" + ",".join(parts) + "}" - - -def write_predictions( - ts_future: List[float], - y_pred: List[float], - metric_name: str, - labels: Dict[str, str], -) -> bool: - """将预测值以 Prometheus exposition 格式写入 VictoriaMetrics。""" - if not ts_future or not y_pred or len(ts_future) != len(y_pred): - logger.warning("预测数据为空或长度不一致 metric=%s", metric_name) - return False - - label_str = labels_to_str(labels) - lines = [] - - for t, y in zip(ts_future, y_pred): - try: - ts_sec = int(round(float(t))) - val = float(y) - except Exception: - continue - - if not math.isfinite(ts_sec) or not math.isfinite(val): - continue - - ts_ms = ts_sec * 1000 - lines.append(f"{metric_name}{label_str} {val:.6f} {ts_ms}") - - if not lines: - logger.warning("没有可写入的预测点 metric=%s", metric_name) - return False - - payload = "\n".join(lines) + "\n" - - try: - resp = requests.post( - f"{VM_URL}/api/v1/import/prometheus", - data=payload.encode("utf-8"), - headers={"Content-Type": "text/plain; version=0.0.4; charset=utf-8"}, - timeout=10, - ) - resp.raise_for_status() - return True - except requests.RequestException as e: - logger.error("写入预测数据失败 metric=%s: %s", metric_name, e) - return False - - -_LABEL_PATTERN = re.compile( - r'\s*([a-zA-Z_][a-zA-Z0-9_]*)\s*=\s*"((?:\\.|[^"])*)"\s*' -) - - -def _parse_labels(query: str) -> Dict[str, str]: - """从查询表达式中解析标签。""" - labels = {} - - if "{" not in query or "}" not in query: - return labels - - try: - label_part = query[query.index("{") + 1: query.rindex("}")] - except Exception: - return labels - - for match in _LABEL_PATTERN.finditer(label_part): - key = match.group(1) - value = match.group(2) - value = value.replace('\\"', '"').replace("\\n", "\n").replace("\\\\", "\\") - labels[key] = value - - return labels - - -def merge_labels(*dicts: Dict[str, str]) -> Dict[str, str]: - result = {} - for d in dicts: - if d: - result.update(d) - return result - - -def series_key(metric_name: str, labels: Dict[str, str]) -> str: - return metric_name + labels_to_str(labels) - - -def run_once(): - now_str = datetime.now().strftime("%H:%M:%S") - - for query, pred_metric in PREDICT_TARGETS: - ts, ys = fetch_history(query) - if len(ys) < MIN_POINTS: - logger.info("[%s] %s 数据不足(%d 点),跳过", now_str, query, len(ys)) - continue - - base_labels = _parse_labels(query) - write_labels = merge_labels(base_labels, EXTRA_PREDICT_LABELS) - - key = series_key(pred_metric, write_labels) - - now_sec = int(time.time()) - last_until = LAST_WRITTEN_UNTIL.get(key, 0) - - # 避免同一进程内写重叠时间段 - base_ts = max(now_sec, last_until) - - ts_future, y_pred = predict_next( - ts=ts, - ys=ys, - horizon=WRITE_HORIZON_SECONDS, - base_ts=base_ts, - ) - - if not ts_future or not y_pred: - logger.warning("[%s] %s 预测结果为空,跳过", now_str, query) - continue - - ok = write_predictions( - ts_future=ts_future, - y_pred=y_pred, - metric_name=pred_metric, - labels=write_labels, - ) - if not ok: - continue - - LAST_WRITTEN_UNTIL[key] = int(max(ts_future)) - - future_start = datetime.fromtimestamp(ts_future[0]).strftime("%H:%M:%S") - future_end = datetime.fromtimestamp(ts_future[-1]).strftime("%H:%M:%S") - - logger.info( - "[%s] %-40s → %-35s 写入 %d 点,预测区间 %s ~ %s,标签=%s", - now_str, - query, - pred_metric, - len(y_pred), - future_start, - future_end, - labels_to_str(write_labels), - ) - - -def main(): - logger.info( - "预测服务启动 VM=%s 历史窗口=%dmin 理论预测窗口=%ds 实际写入窗口=%ds 轮询间隔=%ds", - VM_URL, - HISTORY_MINUTES, - HORIZON_SECONDS, - WRITE_HORIZON_SECONDS, - POLL_INTERVAL, - ) - - while True: - run_once() - time.sleep(POLL_INTERVAL) - - -if __name__ == "__main__": - main() \ No newline at end of file diff --git a/ai/predict_v3_single_scene.py b/ai/predict_v3_single_scene.py deleted file mode 100755 index d212d2d..0000000 --- a/ai/predict_v3_single_scene.py +++ /dev/null @@ -1,1487 +0,0 @@ -# -*- coding: utf-8 -*- -""" -ProtoForge Predictor v10 - -修复重点: -1. 修复 lag=0 但预测线仍然相位漂移的问题。 -2. 在谷底相位对齐基础上,增加 phase-lock 相位锁定。 -3. 每轮使用最近 1~2 个周期真实数据,搜索最佳 period + phase_origin。 -4. 预测起点仍然锚定最后一个真实点 last_real_ts,避免写入延迟。 -5. 保留健康模板冻结逻辑:异常期间不学习故障数据。 -6. 保留预测上下界和异常指标。 -""" - -import json -import logging -import math -import os -import re -import time -from dataclasses import asdict, dataclass -from datetime import datetime, timedelta -from typing import Dict, List, Optional, Tuple - -import numpy as np -import requests - - -# ============================================================================= -# 日志配置 -# ============================================================================= - -logging.basicConfig( - level=logging.INFO, - format="%(asctime)s [%(levelname)s] %(message)s", -) - -logger = logging.getLogger(__name__) - - -# ============================================================================= -# 基础配置 -# ============================================================================= - -VM_URL = "http://localhost:8428" -STATE_FILE = "/tmp/protoforge_predictor_state_v10.json" - -HISTORY_MINUTES = 30 -HORIZON_SECONDS = 120 -POLL_INTERVAL = 30 - -WRITE_HORIZON_SECONDS = min(HORIZON_SECONDS, POLL_INTERVAL) - -QUERY_STEP = "1s" -MIN_POINTS = 120 - -MIN_PERIOD_SECONDS = 5 -MAX_PERIOD_SECONDS = 3600 - -MIN_FULL_CYCLES_FOR_TEMPLATE = 3 -MAX_CYCLES_FOR_TEMPLATE = 6 - -DETECT_WINDOW_SECONDS = 20 -RECOVERY_MIN_SECONDS = 60 - -HEALTHY_EMA_ALPHA = 0.10 -RECOVERY_EMA_ALPHA = 0.25 - -OUTSIDE_RATIO_THRESHOLD = 0.60 - -VALLEY_QUANTILE = 45 - -# phase-lock 配置 -PHASE_LOCK_MIN_WINDOW_SECONDS = 45 -PHASE_LOCK_MAX_WINDOW_SECONDS = 180 -PHASE_LOCK_PERIOD_SEARCH_RATIO = 0.12 -PHASE_LOCK_ORIGIN_SEARCH_RATIO = 0.35 -PHASE_LOCK_PERIOD_STEP = 1 -PHASE_LOCK_ORIGIN_STEP = 1 - -# 真实数据延迟超过这个值,就不继续预测 -MAX_DATA_LAG_SECONDS = 180 - -# 预测锚定最后一个真实点 -ALIGN_PREDICTION_TO_LAST_REAL_TS = True - - -# ============================================================================= -# 指标配置 -# ============================================================================= - -PREDICT_TARGETS = [ - { - "query": 'feed_rate{device_id="fanuc-cnc"}', - "pred_metric": "feed_rate_predicted", - "anomaly_metric": "feed_rate_anomaly", - "abs_threshold": 400.0, - "rel_threshold": 0.25, - }, - { - "query": 'spindle_speed{device_id="fanuc-cnc"}', - "pred_metric": "spindle_speed_predicted", - "anomaly_metric": "spindle_speed_anomaly", - "abs_threshold": 500.0, - "rel_threshold": 0.25, - }, - { - "query": 'spindle_current{device_id="fanuc-cnc"}', - "pred_metric": "spindle_current_predicted", - "anomaly_metric": "spindle_current_anomaly", - "abs_threshold": 5.0, - "rel_threshold": 0.25, - }, - { - "query": 'vibration_x{device_id="fanuc-cnc"}', - "pred_metric": "vibration_x_predicted", - "anomaly_metric": "vibration_x_anomaly", - "abs_threshold": 1.0, - "rel_threshold": 0.30, - }, - { - "query": 'vibration_y{device_id="fanuc-cnc"}', - "pred_metric": "vibration_y_predicted", - "anomaly_metric": "vibration_y_anomaly", - "abs_threshold": 1.0, - "rel_threshold": 0.30, - }, - { - "query": 'vibration_z{device_id="fanuc-cnc"}', - "pred_metric": "vibration_z_predicted", - "anomaly_metric": "vibration_z_anomaly", - "abs_threshold": 1.0, - "rel_threshold": 0.30, - }, -] - -EXTRA_PREDICT_LABELS = { - "forecast": "phase_locked_health_v10", - "source": "protoforge", -} - -BASELINE_STATUS_HEALTHY = "healthy" -BASELINE_STATUS_ANOMALY = "anomaly" -BASELINE_STATUS_RECOVERING = "recovering" - - -# ============================================================================= -# 状态结构 -# ============================================================================= - -@dataclass -class BaselineState: - period: int - phase_origin_ts: int - template: List[float] - status: str - clean_seconds: int - last_update_ts: int - last_seen_ts: int - y_min: float - y_max: float - - -BASELINE_STATES: Dict[str, BaselineState] = {} -LAST_REAL_TS_WRITTEN: Dict[str, int] = {} - - -# ============================================================================= -# VictoriaMetrics 读取 -# ============================================================================= - -def fetch_history(query: str, minutes: int = HISTORY_MINUTES) -> Tuple[List[float], List[float]]: - now = datetime.now() - start = now - timedelta(minutes=minutes) - - try: - resp = requests.get( - f"{VM_URL}/api/v1/query_range", - params={ - "query": query, - "start": start.timestamp(), - "end": now.timestamp(), - "step": QUERY_STEP, - }, - timeout=10, - ) - resp.raise_for_status() - except requests.RequestException as e: - logger.error("拉取数据失败 query=%s: %s", query, e) - return [], [] - - try: - result = resp.json().get("data", {}).get("result", []) - except Exception as e: - logger.error("解析 VM 返回失败 query=%s: %s", query, e) - return [], [] - - if not result: - return [], [] - - values = result[0].get("values", []) - - ts = [] - ys = [] - - for item in values: - if len(item) < 2: - continue - - try: - t = float(item[0]) - y = float(item[1]) - except Exception: - continue - - if not math.isfinite(t) or not math.isfinite(y): - continue - - ts.append(t) - ys.append(y) - - return ts, ys - - -def normalize_history(ts: List[float], ys: List[float]) -> Tuple[np.ndarray, np.ndarray]: - if not ts or not ys or len(ts) != len(ys): - return np.array([]), np.array([]) - - data = {} - - for t, y in zip(ts, ys): - try: - sec = int(round(float(t))) - val = float(y) - except Exception: - continue - - if not math.isfinite(sec) or not math.isfinite(val): - continue - - data[sec] = val - - if not data: - return np.array([]), np.array([]) - - sorted_items = sorted(data.items(), key=lambda x: x[0]) - - ts_clean = np.array([x[0] for x in sorted_items], dtype=float) - ys_clean = np.array([x[1] for x in sorted_items], dtype=float) - - if len(ts_clean) < 2: - return ts_clean, ys_clean - - start_sec = int(ts_clean[0]) - end_sec = int(ts_clean[-1]) - - if end_sec <= start_sec: - return ts_clean, ys_clean - - ts_grid = np.arange(start_sec, end_sec + 1, 1, dtype=float) - ys_grid = np.interp(ts_grid, ts_clean, ys_clean) - - return ts_grid, ys_grid - - -# ============================================================================= -# 周期估计 -# ============================================================================= - -def moving_average(arr: np.ndarray, window: int) -> np.ndarray: - if window <= 1 or len(arr) < window: - return arr.astype(float) - - window = int(window) - - if window % 2 == 0: - window += 1 - - kernel = np.ones(window, dtype=float) / window - pad = window // 2 - padded = np.pad(arr.astype(float), (pad, pad), mode="edge") - - return np.convolve(padded, kernel, mode="valid") - - -def estimate_period_by_fft(ys_arr: np.ndarray) -> float: - n = len(ys_arr) - - if n < 8: - return 60.0 - - centered = ys_arr - np.mean(ys_arr) - - if np.allclose(centered, 0): - return 60.0 - - fft_vals = np.fft.rfft(centered) - freqs = np.fft.rfftfreq(n, d=1.0) - - if len(freqs) <= 1: - return 60.0 - - power = np.abs(fft_vals[1:]) - - if len(power) == 0 or np.max(power) <= 0: - return 60.0 - - dominant_idx = int(np.argmax(power)) + 1 - dominant_freq = float(freqs[dominant_idx]) - - if dominant_freq <= 0: - return 60.0 - - period = 1.0 / dominant_freq - - return float(np.clip(period, MIN_PERIOD_SECONDS, MAX_PERIOD_SECONDS)) - - -def refine_period_by_autocorr(ys_arr: np.ndarray, init_period: float) -> float: - n = len(ys_arr) - - if n < 20: - return float(np.clip(init_period, MIN_PERIOD_SECONDS, MAX_PERIOD_SECONDS)) - - centered = ys_arr - np.mean(ys_arr) - - if np.allclose(centered, 0): - return float(np.clip(init_period, MIN_PERIOD_SECONDS, MAX_PERIOD_SECONDS)) - - corr = np.correlate(centered, centered, mode="full")[n - 1:] - - p0 = int(round(init_period)) - left = max(int(MIN_PERIOD_SECONDS), int(max(2, p0 * 0.7))) - right = min(n // 2, int(max(left + 1, p0 * 1.3))) - - if right <= left: - return float(np.clip(init_period, MIN_PERIOD_SECONDS, MAX_PERIOD_SECONDS)) - - search = corr[left:right + 1] - - if len(search) == 0: - return float(np.clip(init_period, MIN_PERIOD_SECONDS, MAX_PERIOD_SECONDS)) - - best_lag = left + int(np.argmax(search)) - - return float(np.clip(best_lag, MIN_PERIOD_SECONDS, MAX_PERIOD_SECONDS)) - - -def estimate_period_rough(ys_arr: np.ndarray) -> int: - p_fft = estimate_period_by_fft(ys_arr) - p_refined = refine_period_by_autocorr(ys_arr, p_fft) - - period = int(round(p_refined)) - period = max(int(MIN_PERIOD_SECONDS), min(int(MAX_PERIOD_SECONDS), period)) - - return int(period) - - -# ============================================================================= -# 谷底检测与模板构建 -# ============================================================================= - -def find_valley_indices( - ts_grid: np.ndarray, - ys_grid: np.ndarray, - expected_period: int, -) -> List[int]: - n = len(ys_grid) - - if n < max(10, expected_period * 2): - return [] - - period = max(3, int(expected_period)) - - smooth_window = max(3, int(round(period * 0.08))) - smooth_window = min(smooth_window, 21) - - ys_smooth = moving_average(ys_grid, smooth_window) - threshold = float(np.percentile(ys_smooth, VALLEY_QUANTILE)) - - candidates = [] - - for i in range(1, n - 1): - if ( - ys_smooth[i] <= ys_smooth[i - 1] - and ys_smooth[i] < ys_smooth[i + 1] - and ys_smooth[i] <= threshold - ): - candidates.append(i) - - if len(candidates) < MIN_FULL_CYCLES_FOR_TEMPLATE: - candidates = [] - - for i in range(1, n - 1): - if ys_smooth[i] <= ys_smooth[i - 1] and ys_smooth[i] < ys_smooth[i + 1]: - candidates.append(i) - - if not candidates: - return [] - - min_distance = max(2, int(round(period * 0.55))) - selected = [] - - for idx in candidates: - if not selected: - selected.append(idx) - continue - - if idx - selected[-1] >= min_distance: - selected.append(idx) - continue - - if ys_smooth[idx] < ys_smooth[selected[-1]]: - selected[-1] = idx - - if len(selected) < 2: - return selected - - cleaned = [selected[0]] - - for idx in selected[1:]: - diff = int(ts_grid[idx] - ts_grid[cleaned[-1]]) - - if int(period * 0.55) <= diff <= int(period * 1.60): - cleaned.append(idx) - continue - - if diff < int(period * 0.55): - if ys_smooth[idx] < ys_smooth[cleaned[-1]]: - cleaned[-1] = idx - continue - - cleaned.append(idx) - - return cleaned - - -def detect_period_and_valleys( - ts_grid: np.ndarray, - ys_grid: np.ndarray, -) -> Tuple[int, List[int]]: - rough = estimate_period_rough(ys_grid) - valleys = find_valley_indices(ts_grid, ys_grid, rough) - - if len(valleys) >= 3: - diffs = np.diff(ts_grid[valleys]) - good = diffs[(diffs >= rough * 0.55) & (diffs <= rough * 1.60)] - - if len(good) > 0: - period = int(round(float(np.median(good)))) - else: - period = rough - else: - period = rough - - period = max(int(MIN_PERIOD_SECONDS), min(int(MAX_PERIOD_SECONDS), period)) - - return int(period), valleys - - -def build_template_from_valleys( - ts_grid: np.ndarray, - ys_grid: np.ndarray, - period: int, - valleys: List[int], - max_cycles: int = MAX_CYCLES_FOR_TEMPLATE, -) -> Optional[np.ndarray]: - if period <= 1 or len(valleys) < MIN_FULL_CYCLES_FOR_TEMPLATE + 1: - return None - - pairs = [] - - for a, b in zip(valleys[:-1], valleys[1:]): - cycle_len = float(ts_grid[b] - ts_grid[a]) - - if period * 0.55 <= cycle_len <= period * 1.60: - pairs.append((a, b, cycle_len)) - - if len(pairs) < MIN_FULL_CYCLES_FOR_TEMPLATE: - return None - - pairs = pairs[-max_cycles:] - - phase_grid = np.arange(period, dtype=float) - segments = [] - weights = [] - - for idx, (a, b, cycle_len) in enumerate(pairs): - seg_ts = ts_grid[a:b + 1] - seg_y = ys_grid[a:b + 1] - - if len(seg_y) < 3: - continue - - x_old = (seg_ts - seg_ts[0]) / cycle_len * period - seg = np.interp(phase_grid, x_old, seg_y) - - segments.append(seg.astype(float)) - - weight = 0.5 + 0.5 * ((idx + 1) / len(pairs)) - weights.append(weight) - - if len(segments) < MIN_FULL_CYCLES_FOR_TEMPLATE: - return None - - arr = np.vstack(segments) - w_arr = np.array(weights, dtype=float) - - template = np.average(arr, axis=0, weights=w_arr) - - return template.astype(float) - - -def build_current_baseline( - ts_grid: np.ndarray, - ys_grid: np.ndarray, - tail_seconds: Optional[int] = None, -) -> Optional[Tuple[int, int, np.ndarray]]: - if len(ys_grid) < MIN_POINTS: - return None - - if tail_seconds is not None and tail_seconds > 0: - cutoff = ts_grid[-1] - int(tail_seconds) - mask = ts_grid >= cutoff - ts_use = ts_grid[mask] - ys_use = ys_grid[mask] - else: - ts_use = ts_grid - ys_use = ys_grid - - if len(ys_use) < MIN_POINTS: - return None - - period, valleys = detect_period_and_valleys(ts_use, ys_use) - - template = build_template_from_valleys( - ts_grid=ts_use, - ys_grid=ys_use, - period=period, - valleys=valleys, - ) - - if template is None or len(valleys) == 0: - return None - - phase_origin_ts = int(round(float(ts_use[valleys[-1]]))) - - return int(period), phase_origin_ts, template - - -# ============================================================================= -# 模板预测与重采样 -# ============================================================================= - -def circular_template_value(template: np.ndarray, phase: float) -> float: - period = len(template) - - if period == 0: - return 0.0 - - phase = float(phase) % period - - i0 = int(math.floor(phase)) % period - i1 = (i0 + 1) % period - frac = phase - math.floor(phase) - - return float((1.0 - frac) * template[i0] + frac * template[i1]) - - -def resample_template(old_template: np.ndarray, new_period: int) -> np.ndarray: - old_period = len(old_template) - - if old_period == new_period: - return old_template.astype(float) - - if old_period <= 1 or new_period <= 1: - return np.full(new_period, float(np.mean(old_template)), dtype=float) - - old_x = np.linspace(0.0, 1.0, old_period, endpoint=False) - new_x = np.linspace(0.0, 1.0, new_period, endpoint=False) - - old_x_ext = np.concatenate([old_x - 1.0, old_x, old_x + 1.0]) - old_y_ext = np.concatenate([old_template, old_template, old_template]) - - return np.interp(new_x, old_x_ext, old_y_ext).astype(float) - - -def predict_template_values( - template: np.ndarray, - period: int, - phase_origin_ts: int, - ts_list: List[int], -) -> np.ndarray: - if period <= 1: - return np.zeros(len(ts_list), dtype=float) - - if len(template) != period: - template = resample_template(template, period) - - values = [] - - for ts in ts_list: - phase = (int(ts) - int(phase_origin_ts)) % period - values.append(circular_template_value(template, phase)) - - return np.array(values, dtype=float) - - -def predict_with_state(state: BaselineState, ts_list: List[int]) -> np.ndarray: - template = np.array(state.template, dtype=float) - - return predict_template_values( - template=template, - period=int(state.period), - phase_origin_ts=int(state.phase_origin_ts), - ts_list=ts_list, - ) - - -def normalize_origin_near(origin: int, period: int, near_ts: int) -> int: - if period <= 1: - return origin - - origin = int(origin) - period = int(period) - near_ts = int(near_ts) - - while origin + period <= near_ts: - origin += period - - while origin > near_ts: - origin -= period - - return origin - - -def align_new_template_to_old( - old_template: np.ndarray, - new_template: np.ndarray, -) -> np.ndarray: - if len(old_template) != len(new_template): - old_template = resample_template(old_template, len(new_template)) - - period = len(new_template) - - if period <= 2: - return new_template.astype(float) - - max_shift = max(1, int(round(period * 0.10))) - old_norm = old_template - np.mean(old_template) - - best_score = None - best_template = new_template - - for shift in range(-max_shift, max_shift + 1): - shifted = np.roll(new_template, shift) - shifted_norm = shifted - np.mean(shifted) - score = float(np.dot(old_norm, shifted_norm)) - - if best_score is None or score > best_score: - best_score = score - best_template = shifted - - return best_template.astype(float) - - -def merge_template( - old_template: np.ndarray, - new_template: np.ndarray, - alpha: float, -) -> np.ndarray: - alpha = float(np.clip(alpha, 0.0, 1.0)) - - if len(old_template) != len(new_template): - old_template = resample_template(old_template, len(new_template)) - - new_template = align_new_template_to_old(old_template, new_template) - - merged = (1.0 - alpha) * old_template + alpha * new_template - - return merged.astype(float) - - -# ============================================================================= -# Phase Lock -# ============================================================================= - -def phase_lock_recent( - state: BaselineState, - ts_grid: np.ndarray, - ys_grid: np.ndarray, -) -> Tuple[int, int, np.ndarray, float]: - base_period = int(state.period) - base_origin = int(state.phase_origin_ts) - base_template = np.array(state.template, dtype=float) - - if base_period <= 1 or len(base_template) <= 1: - ts_recent = ts_grid[-DETECT_WINDOW_SECONDS:].astype(int).tolist() - pred = predict_with_state(state, ts_recent) - actual = ys_grid[-len(ts_recent):].astype(float) - mae = float(np.mean(np.abs(actual - pred))) if len(actual) else 0.0 - return base_period, base_origin, pred, mae - - window_seconds = max( - PHASE_LOCK_MIN_WINDOW_SECONDS, - min(PHASE_LOCK_MAX_WINDOW_SECONDS, int(base_period * 2)), - ) - - cutoff = ts_grid[-1] - window_seconds - mask = ts_grid >= cutoff - - ts_recent_arr = ts_grid[mask].astype(int) - actual = ys_grid[mask].astype(float) - - if len(ts_recent_arr) < max(10, DETECT_WINDOW_SECONDS): - ts_recent_arr = ts_grid[-DETECT_WINDOW_SECONDS:].astype(int) - actual = ys_grid[-DETECT_WINDOW_SECONDS:].astype(float) - - ts_recent = ts_recent_arr.tolist() - last_ts = int(ts_recent[-1]) - - p_min = max(int(MIN_PERIOD_SECONDS), int(round(base_period * (1.0 - PHASE_LOCK_PERIOD_SEARCH_RATIO)))) - p_max = min(int(MAX_PERIOD_SECONDS), int(round(base_period * (1.0 + PHASE_LOCK_PERIOD_SEARCH_RATIO)))) - - if p_max < p_min: - p_min = p_max = base_period - - best_period = base_period - best_origin = normalize_origin_near(base_origin, base_period, last_ts) - best_template = resample_template(base_template, best_period) - best_pred = predict_template_values(best_template, best_period, best_origin, ts_recent) - best_mae = float(np.mean(np.abs(actual - best_pred))) - - for period in range(p_min, p_max + 1, PHASE_LOCK_PERIOD_STEP): - template = resample_template(base_template, period) - center_origin = normalize_origin_near(base_origin, period, last_ts) - - origin_shift = max(2, int(round(period * PHASE_LOCK_ORIGIN_SEARCH_RATIO))) - - for shift in range(-origin_shift, origin_shift + 1, PHASE_LOCK_ORIGIN_STEP): - origin = center_origin + shift - - pred = predict_template_values( - template=template, - period=period, - phase_origin_ts=origin, - ts_list=ts_recent, - ) - - mae = float(np.mean(np.abs(actual - pred))) - - # 轻微惩罚周期变化,避免过拟合抖动 - penalty = abs(period - base_period) * 0.5 - score = mae + penalty - - best_score = best_mae + abs(best_period - base_period) * 0.5 - - if score < best_score: - best_period = period - best_origin = origin - best_pred = pred - best_mae = mae - - best_origin = normalize_origin_near(best_origin, best_period, last_ts) - - return int(best_period), int(best_origin), best_pred, float(best_mae) - - -# ============================================================================= -# 异常检测 -# ============================================================================= - -def calc_threshold( - pred: np.ndarray, - abs_threshold: float, - rel_threshold: float, -) -> np.ndarray: - return np.maximum(abs_threshold, np.abs(pred) * rel_threshold) - - -def calc_bounds( - pred: np.ndarray, - abs_threshold: float, - rel_threshold: float, -) -> Tuple[np.ndarray, np.ndarray]: - threshold = calc_threshold(pred, abs_threshold, rel_threshold) - - return pred - threshold, pred + threshold - - -def detect_anomaly( - state: BaselineState, - ts_grid: np.ndarray, - ys_grid: np.ndarray, - abs_threshold: float, - rel_threshold: float, -) -> Tuple[bool, float, float, float, int, int]: - best_period, best_origin, pred_recent, _ = phase_lock_recent( - state=state, - ts_grid=ts_grid, - ys_grid=ys_grid, - ) - - recent_len = len(pred_recent) - - if recent_len <= 0: - return False, 0.0, 0.0, 0.0, best_period, best_origin - - actual = ys_grid[-recent_len:].astype(float) - - threshold = calc_threshold(pred_recent, abs_threshold, rel_threshold) - - abs_err = np.abs(actual - pred_recent) - outside = abs_err > threshold - - outside_ratio = float(np.mean(outside)) - mean_abs_err = float(np.mean(abs_err)) - mean_rel_err = float(np.mean(abs_err / np.maximum(np.abs(pred_recent), 1.0))) - - is_anomaly = outside_ratio >= OUTSIDE_RATIO_THRESHOLD - - return ( - is_anomaly, - outside_ratio, - mean_abs_err, - mean_rel_err, - int(best_period), - int(best_origin), - ) - - -# ============================================================================= -# 健康基线状态管理 -# ============================================================================= - -def create_initial_state( - ts_grid: np.ndarray, - ys_grid: np.ndarray, - now_sec: int, -) -> Optional[BaselineState]: - baseline = build_current_baseline(ts_grid, ys_grid) - - if baseline is None: - return None - - period, phase_origin_ts, template = baseline - - return BaselineState( - period=int(period), - phase_origin_ts=int(phase_origin_ts), - template=template.astype(float).tolist(), - status=BASELINE_STATUS_HEALTHY, - clean_seconds=int(period * MAX_CYCLES_FOR_TEMPLATE), - last_update_ts=now_sec, - last_seen_ts=now_sec, - y_min=float(np.min(ys_grid)), - y_max=float(np.max(ys_grid)), - ) - - -def apply_phase_lock_to_state( - state: BaselineState, - best_period: int, - best_origin: int, -) -> None: - best_period = int(best_period) - - if best_period <= 1: - return - - template = np.array(state.template, dtype=float) - - if len(template) != best_period: - template = resample_template(template, best_period) - - state.period = best_period - state.phase_origin_ts = int(best_origin) - state.template = template.astype(float).tolist() - - -def maybe_update_state( - key: str, - ts_grid: np.ndarray, - ys_grid: np.ndarray, - abs_threshold: float, - rel_threshold: float, -) -> Tuple[Optional[BaselineState], bool, float, float, float]: - now_sec = int(time.time()) - state = BASELINE_STATES.get(key) - - if state is None: - state = create_initial_state(ts_grid, ys_grid, now_sec) - - if state is None: - return None, False, 0.0, 0.0, 0.0 - - BASELINE_STATES[key] = state - - logger.info( - "初始化健康模板 key=%s period=%ss origin=%s clean=%ss", - key, - state.period, - datetime.fromtimestamp(state.phase_origin_ts).strftime("%H:%M:%S"), - state.clean_seconds, - ) - - return state, False, 0.0, 0.0, 0.0 - - elapsed = max(1, now_sec - int(state.last_seen_ts)) - elapsed = min(elapsed, POLL_INTERVAL * 2) - state.last_seen_ts = now_sec - - ( - is_anomaly, - outside_ratio, - mean_abs_err, - mean_rel_err, - best_period, - best_origin, - ) = detect_anomaly( - state=state, - ts_grid=ts_grid, - ys_grid=ys_grid, - abs_threshold=abs_threshold, - rel_threshold=rel_threshold, - ) - - if is_anomaly: - state.status = BASELINE_STATUS_ANOMALY - state.clean_seconds = 0 - - BASELINE_STATES[key] = state - - logger.warning( - "检测到异常,冻结模板 key=%s outside_ratio=%.2f mean_abs_err=%.2f mean_rel_err=%.2f", - key, - outside_ratio, - mean_abs_err, - mean_rel_err, - ) - - return state, True, outside_ratio, mean_abs_err, mean_rel_err - - old_period = int(state.period) - old_origin = int(state.phase_origin_ts) - - apply_phase_lock_to_state(state, best_period, best_origin) - - if old_period != state.period or old_origin != state.phase_origin_ts: - logger.info( - "phase-lock key=%s period %s -> %s origin %s -> %s", - key, - old_period, - state.period, - datetime.fromtimestamp(old_origin).strftime("%H:%M:%S"), - datetime.fromtimestamp(state.phase_origin_ts).strftime("%H:%M:%S"), - ) - - if state.status == BASELINE_STATUS_ANOMALY: - state.status = BASELINE_STATUS_RECOVERING - state.clean_seconds = elapsed - - BASELINE_STATES[key] = state - - logger.info( - "异常开始恢复 key=%s clean_seconds=%ss", - key, - state.clean_seconds, - ) - - return state, False, outside_ratio, mean_abs_err, mean_rel_err - - if state.status == BASELINE_STATUS_RECOVERING: - state.clean_seconds += elapsed - else: - state.status = BASELINE_STATUS_HEALTHY - state.clean_seconds += elapsed - - min_clean_for_update = max( - RECOVERY_MIN_SECONDS, - int(state.period) * MIN_FULL_CYCLES_FOR_TEMPLATE, - ) - - if state.clean_seconds < min_clean_for_update: - BASELINE_STATES[key] = state - return state, False, outside_ratio, mean_abs_err, mean_rel_err - - tail_seconds = min( - int(state.clean_seconds), - int(state.period) * MAX_CYCLES_FOR_TEMPLATE, - ) - - baseline = build_current_baseline( - ts_grid=ts_grid, - ys_grid=ys_grid, - tail_seconds=tail_seconds, - ) - - if baseline is None: - BASELINE_STATES[key] = state - return state, False, outside_ratio, mean_abs_err, mean_rel_err - - new_period, new_origin, new_template = baseline - - old_template = np.array(state.template, dtype=float) - - alpha = RECOVERY_EMA_ALPHA if state.status == BASELINE_STATUS_RECOVERING else HEALTHY_EMA_ALPHA - - merged = merge_template( - old_template=old_template, - new_template=new_template, - alpha=alpha, - ) - - state.period = int(new_period) - state.phase_origin_ts = int(new_origin) - state.template = merged.astype(float).tolist() - state.status = BASELINE_STATUS_HEALTHY - state.last_update_ts = now_sec - - if tail_seconds > 0 and len(ys_grid) >= tail_seconds: - state.y_min = float(np.min(ys_grid[-tail_seconds:])) - state.y_max = float(np.max(ys_grid[-tail_seconds:])) - else: - state.y_min = float(np.min(ys_grid)) - state.y_max = float(np.max(ys_grid)) - - BASELINE_STATES[key] = state - - logger.info( - "更新健康模板 key=%s period=%ss origin=%s clean=%ss alpha=%.2f", - key, - state.period, - datetime.fromtimestamp(state.phase_origin_ts).strftime("%H:%M:%S"), - state.clean_seconds, - alpha, - ) - - return state, False, outside_ratio, mean_abs_err, mean_rel_err - - -# ============================================================================= -# Prometheus Exposition 写入 -# ============================================================================= - -def prom_escape_label_value(value: str) -> str: - return ( - str(value) - .replace("\\", "\\\\") - .replace("\n", "\\n") - .replace('"', '\\"') - ) - - -def labels_to_str(labels: Dict[str, str]) -> str: - if not labels: - return "" - - parts = [] - - for k in sorted(labels.keys()): - parts.append(f'{k}="{prom_escape_label_value(labels[k])}"') - - return "{" + ",".join(parts) + "}" - - -def write_series( - metric_name: str, - labels: Dict[str, str], - ts_list: List[int], - values: List[float], -) -> bool: - if not ts_list or not values or len(ts_list) != len(values): - return False - - label_str = labels_to_str(labels) - lines = [] - - for t, y in zip(ts_list, values): - try: - ts_sec = int(round(float(t))) - val = float(y) - except Exception: - continue - - if not math.isfinite(ts_sec) or not math.isfinite(val): - continue - - lines.append(f"{metric_name}{label_str} {val:.6f} {ts_sec * 1000}") - - if not lines: - return False - - payload = "\n".join(lines) + "\n" - - try: - resp = requests.post( - f"{VM_URL}/api/v1/import/prometheus", - data=payload.encode("utf-8"), - headers={ - "Content-Type": "text/plain; version=0.0.4; charset=utf-8", - }, - timeout=10, - ) - resp.raise_for_status() - return True - - except requests.RequestException as e: - logger.error("写入数据失败 metric=%s: %s", metric_name, e) - return False - - -def write_prediction_bundle( - pred_metric: str, - anomaly_metric: str, - labels: Dict[str, str], - ts_future: List[int], - pred_values: np.ndarray, - lower_values: np.ndarray, - upper_values: np.ndarray, - is_anomaly: bool, - outside_ratio: float, - mean_abs_err: float, - mean_rel_err: float, - event_ts: int, -) -> bool: - ok1 = write_series( - metric_name=pred_metric, - labels=labels, - ts_list=ts_future, - values=pred_values.astype(float).tolist(), - ) - - ok2 = write_series( - metric_name=f"{pred_metric}_lower", - labels=labels, - ts_list=ts_future, - values=lower_values.astype(float).tolist(), - ) - - ok3 = write_series( - metric_name=f"{pred_metric}_upper", - labels=labels, - ts_list=ts_future, - values=upper_values.astype(float).tolist(), - ) - - anomaly_labels = dict(labels) - anomaly_labels["type"] = "prediction_deviation" - - ok4 = write_series( - metric_name=anomaly_metric, - labels=anomaly_labels, - ts_list=[event_ts], - values=[1.0 if is_anomaly else 0.0], - ) - - ok5 = write_series( - metric_name=f"{anomaly_metric}_outside_ratio", - labels=anomaly_labels, - ts_list=[event_ts], - values=[outside_ratio], - ) - - ok6 = write_series( - metric_name=f"{anomaly_metric}_mean_abs_error", - labels=anomaly_labels, - ts_list=[event_ts], - values=[mean_abs_err], - ) - - ok7 = write_series( - metric_name=f"{anomaly_metric}_mean_rel_error", - labels=anomaly_labels, - ts_list=[event_ts], - values=[mean_rel_err], - ) - - return ok1 and ok2 and ok3 and ok4 and ok5 and ok6 and ok7 - - -# ============================================================================= -# 标签解析 -# ============================================================================= - -_LABEL_PATTERN = re.compile( - r'\s*([a-zA-Z_][a-zA-Z0-9_]*)\s*=\s*"((?:\\.|[^"])*)"\s*' -) - - -def parse_labels_from_query(query: str) -> Dict[str, str]: - labels = {} - - if "{" not in query or "}" not in query: - return labels - - try: - label_part = query[query.index("{") + 1:query.rindex("}")] - except Exception: - return labels - - for match in _LABEL_PATTERN.finditer(label_part): - key = match.group(1) - value = match.group(2) - - value = ( - value - .replace('\\"', '"') - .replace("\\n", "\n") - .replace("\\\\", "\\") - ) - - labels[key] = value - - return labels - - -def merge_labels(*dicts: Dict[str, str]) -> Dict[str, str]: - result = {} - - for d in dicts: - if d: - result.update(d) - - return result - - -def series_key(metric_name: str, labels: Dict[str, str]) -> str: - return metric_name + labels_to_str(labels) - - -# ============================================================================= -# 状态持久化 -# ============================================================================= - -def load_state() -> None: - global BASELINE_STATES - - if not os.path.exists(STATE_FILE): - return - - try: - with open(STATE_FILE, "r", encoding="utf-8") as f: - raw = json.load(f) - - states = {} - - for key, value in raw.get("baseline_states", {}).items(): - required_fields = { - "period", - "phase_origin_ts", - "template", - "status", - "clean_seconds", - "last_update_ts", - "last_seen_ts", - "y_min", - "y_max", - } - - if not required_fields.issubset(set(value.keys())): - continue - - states[key] = BaselineState(**value) - - BASELINE_STATES = states - - logger.info( - "已加载预测状态文件 %s,状态数量=%d", - STATE_FILE, - len(BASELINE_STATES), - ) - - except Exception as e: - logger.warning("加载预测状态文件失败,将重新学习: %s", e) - - -def save_state() -> None: - try: - raw = { - "baseline_states": { - key: asdict(value) - for key, value in BASELINE_STATES.items() - } - } - - tmp_file = STATE_FILE + ".tmp" - - with open(tmp_file, "w", encoding="utf-8") as f: - json.dump(raw, f, ensure_ascii=False, indent=2) - - os.replace(tmp_file, STATE_FILE) - - except Exception as e: - logger.warning("保存预测状态文件失败: %s", e) - - -# ============================================================================= -# 时间轴 -# ============================================================================= - -def build_prediction_timestamps( - key: str, - last_real_ts: int, - now_sec: int, -) -> Optional[List[int]]: - data_lag = now_sec - last_real_ts - - if data_lag > MAX_DATA_LAG_SECONDS: - logger.warning( - "真实数据延迟过大,跳过预测 key=%s data_lag=%ss max=%ss", - key, - data_lag, - MAX_DATA_LAG_SECONDS, - ) - return None - - last_written_real_ts = LAST_REAL_TS_WRITTEN.get(key) - - if last_written_real_ts is not None and last_real_ts <= int(last_written_real_ts): - logger.info( - "真实数据时间戳未推进,跳过重复写入 key=%s last_real_ts=%s last_written_real_ts=%s", - key, - last_real_ts, - last_written_real_ts, - ) - return None - - if ALIGN_PREDICTION_TO_LAST_REAL_TS: - base_ts = last_real_ts - else: - base_ts = now_sec - - return [ - base_ts + i + 1 - for i in range(WRITE_HORIZON_SECONDS) - ] - - -# ============================================================================= -# 主流程 -# ============================================================================= - -def run_once() -> None: - now_str = datetime.now().strftime("%H:%M:%S") - - for target in PREDICT_TARGETS: - query = target["query"] - pred_metric = target["pred_metric"] - anomaly_metric = target["anomaly_metric"] - abs_threshold = float(target["abs_threshold"]) - rel_threshold = float(target["rel_threshold"]) - - ts, ys = fetch_history(query) - - if len(ys) < MIN_POINTS: - logger.info( - "[%s] %s 数据不足(%d 点),跳过", - now_str, - query, - len(ys), - ) - continue - - ts_grid, ys_grid = normalize_history(ts, ys) - - if len(ys_grid) < MIN_POINTS: - logger.info( - "[%s] %s 清洗后数据不足(%d 点),跳过", - now_str, - query, - len(ys_grid), - ) - continue - - base_labels = parse_labels_from_query(query) - write_labels = merge_labels(base_labels, EXTRA_PREDICT_LABELS) - - key = series_key(pred_metric, write_labels) - - state, is_anomaly, outside_ratio, mean_abs_err, mean_rel_err = maybe_update_state( - key=key, - ts_grid=ts_grid, - ys_grid=ys_grid, - abs_threshold=abs_threshold, - rel_threshold=rel_threshold, - ) - - if state is None: - logger.info( - "[%s] %s 暂无可用健康模板,等待学习", - now_str, - query, - ) - continue - - now_sec = int(time.time()) - last_real_ts = int(ts_grid[-1]) - data_lag = now_sec - last_real_ts - - ts_future = build_prediction_timestamps( - key=key, - last_real_ts=last_real_ts, - now_sec=now_sec, - ) - - if not ts_future: - continue - - pred_values = predict_with_state(state, ts_future) - - lower_values, upper_values = calc_bounds( - pred=pred_values, - abs_threshold=abs_threshold, - rel_threshold=rel_threshold, - ) - - ok = write_prediction_bundle( - pred_metric=pred_metric, - anomaly_metric=anomaly_metric, - labels=write_labels, - ts_future=ts_future, - pred_values=pred_values, - lower_values=lower_values, - upper_values=upper_values, - is_anomaly=is_anomaly, - outside_ratio=outside_ratio, - mean_abs_err=mean_abs_err, - mean_rel_err=mean_rel_err, - event_ts=last_real_ts, - ) - - if not ok: - logger.error( - "[%s] %s 写入预测数据失败", - now_str, - query, - ) - continue - - LAST_REAL_TS_WRITTEN[key] = last_real_ts - - future_start = datetime.fromtimestamp(ts_future[0]).strftime("%H:%M:%S") - future_end = datetime.fromtimestamp(ts_future[-1]).strftime("%H:%M:%S") - last_real_str = datetime.fromtimestamp(last_real_ts).strftime("%H:%M:%S") - origin_str = datetime.fromtimestamp(state.phase_origin_ts).strftime("%H:%M:%S") - - logger.info( - "[%s] %-40s → %-35s status=%s anomaly=%s period=%ss origin=%s last_real=%s lag=%ss 写入 %d 点,预测区间 %s ~ %s", - now_str, - query, - pred_metric, - state.status, - is_anomaly, - state.period, - origin_str, - last_real_str, - data_lag, - len(ts_future), - future_start, - future_end, - ) - - save_state() - - -def main() -> None: - load_state() - - logger.info( - "预测服务启动 VM=%s 历史窗口=%dmin 理论预测窗口=%ds 实际写入窗口=%ds 轮询间隔=%ds state=%s forecast=%s align_to_last_real=%s", - VM_URL, - HISTORY_MINUTES, - HORIZON_SECONDS, - WRITE_HORIZON_SECONDS, - POLL_INTERVAL, - STATE_FILE, - EXTRA_PREDICT_LABELS["forecast"], - ALIGN_PREDICTION_TO_LAST_REAL_TS, - ) - - while True: - run_once() - time.sleep(POLL_INTERVAL) - - -if __name__ == "__main__": - main() \ No newline at end of file diff --git a/ai/pridict_v4.py b/ai/pridict_v4.py deleted file mode 100644 index 8657944..0000000 --- a/ai/pridict_v4.py +++ /dev/null @@ -1,1604 +0,0 @@ -# -*- coding: utf-8 -*- -""" -ProtoForge Predictor v11 - -核心能力: -1. feed_rate / spindle_speed / spindle_current 使用 phase-lock 点预测。 -2. vibration_x / vibration_y / vibration_z 使用 phase-band 预测带。 -3. vibration 类指标不再追求单点完全贴合,而是输出: - - xxx_predicted 中位数预测线 - - xxx_predicted_upper 正常上边界 - - xxx_predicted_lower 正常下边界 -4. 预测起点锚定最后一个真实点 last_real_ts,避免时间错位。 -5. 异常期间冻结健康模板,不学习故障数据。 -6. 故障恢复后等待稳定,再恢复模板学习。 -""" - -import json -import logging -import math -import os -import re -import time -from dataclasses import asdict, dataclass -from datetime import datetime, timedelta -from typing import Dict, List, Optional, Tuple - -import numpy as np -import requests - - -# ============================================================================= -# 日志配置 -# ============================================================================= - -logging.basicConfig( - level=logging.INFO, - format="%(asctime)s [%(levelname)s] %(message)s", -) - -logger = logging.getLogger(__name__) - - -# ============================================================================= -# 基础配置 -# ============================================================================= - -VM_URL = "http://localhost:8428" -STATE_FILE = "/tmp/protoforge_predictor_state_v11.json" - -HISTORY_MINUTES = 30 -HORIZON_SECONDS = 120 -POLL_INTERVAL = 30 - -WRITE_HORIZON_SECONDS = min(HORIZON_SECONDS, POLL_INTERVAL) - -QUERY_STEP = "1s" -MIN_POINTS = 120 - -MIN_PERIOD_SECONDS = 5 -MAX_PERIOD_SECONDS = 3600 - -MIN_FULL_CYCLES_FOR_TEMPLATE = 3 -MAX_CYCLES_FOR_TEMPLATE = 8 - -DETECT_WINDOW_SECONDS = 20 -RECOVERY_MIN_SECONDS = 60 - -HEALTHY_EMA_ALPHA = 0.10 -RECOVERY_EMA_ALPHA = 0.25 - -OUTSIDE_RATIO_THRESHOLD = 0.60 - -VALLEY_QUANTILE = 45 - -MAX_DATA_LAG_SECONDS = 180 - -PHASE_LOCK_MIN_WINDOW_SECONDS = 45 -PHASE_LOCK_MAX_WINDOW_SECONDS = 180 -PHASE_LOCK_PERIOD_SEARCH_RATIO = 0.12 -PHASE_LOCK_ORIGIN_SEARCH_RATIO = 0.35 -PHASE_LOCK_PERIOD_STEP = 1 -PHASE_LOCK_ORIGIN_STEP = 1 - - -# ============================================================================= -# 指标配置 -# ============================================================================= - -PREDICT_TARGETS = [ - { - "query": 'feed_rate{device_id="fanuc-cnc"}', - "pred_metric": "feed_rate_predicted", - "anomaly_metric": "feed_rate_anomaly", - "strategy": "phase_point", - "abs_threshold": 400.0, - "rel_threshold": 0.25, - "smooth_window": 1, - }, - { - "query": 'spindle_speed{device_id="fanuc-cnc"}', - "pred_metric": "spindle_speed_predicted", - "anomaly_metric": "spindle_speed_anomaly", - "strategy": "phase_point", - "abs_threshold": 500.0, - "rel_threshold": 0.25, - "smooth_window": 1, - }, - { - "query": 'spindle_current{device_id="fanuc-cnc"}', - "pred_metric": "spindle_current_predicted", - "anomaly_metric": "spindle_current_anomaly", - "strategy": "phase_point", - "abs_threshold": 5.0, - "rel_threshold": 0.25, - "smooth_window": 1, - }, - { - "query": 'vibration_x{device_id="fanuc-cnc"}', - "pred_metric": "vibration_x_predicted", - "anomaly_metric": "vibration_x_anomaly", - "strategy": "phase_band", - "abs_threshold": 0.18, - "rel_threshold": 0.50, - "smooth_window": 5, - "band_low_q": 2, - "band_high_q": 98, - "band_pad_abs": 0.12, - }, - { - "query": 'vibration_y{device_id="fanuc-cnc"}', - "pred_metric": "vibration_y_predicted", - "anomaly_metric": "vibration_y_anomaly", - "strategy": "phase_band", - "abs_threshold": 0.18, - "rel_threshold": 0.50, - "smooth_window": 5, - "band_low_q": 2, - "band_high_q": 98, - "band_pad_abs": 0.12, - }, - { - "query": 'vibration_z{device_id="fanuc-cnc"}', - "pred_metric": "vibration_z_predicted", - "anomaly_metric": "vibration_z_anomaly", - "strategy": "phase_band", - "abs_threshold": 0.18, - "rel_threshold": 0.50, - "smooth_window": 5, - "band_low_q": 2, - "band_high_q": 98, - "band_pad_abs": 0.12, - } -] - -EXTRA_PREDICT_LABELS = { - "forecast": "phase_band_health_v11", - "source": "protoforge", -} - -BASELINE_STATUS_HEALTHY = "healthy" -BASELINE_STATUS_ANOMALY = "anomaly" -BASELINE_STATUS_RECOVERING = "recovering" - - -# ============================================================================= -# 状态结构 -# ============================================================================= - -@dataclass -class BaselineState: - period: int - phase_origin_ts: int - template: List[float] - lower_template: List[float] - upper_template: List[float] - strategy: str - status: str - clean_seconds: int - last_update_ts: int - last_seen_ts: int - y_min: float - y_max: float - - -BASELINE_STATES: Dict[str, BaselineState] = {} -LAST_REAL_TS_WRITTEN: Dict[str, int] = {} - - -# ============================================================================= -# VictoriaMetrics 读取 -# ============================================================================= - -def fetch_history(query: str, minutes: int = HISTORY_MINUTES) -> Tuple[List[float], List[float]]: - now = datetime.now() - start = now - timedelta(minutes=minutes) - - try: - resp = requests.get( - f"{VM_URL}/api/v1/query_range", - params={ - "query": query, - "start": start.timestamp(), - "end": now.timestamp(), - "step": QUERY_STEP, - }, - timeout=10, - ) - resp.raise_for_status() - except requests.RequestException as e: - logger.error("拉取数据失败 query=%s: %s", query, e) - return [], [] - - try: - result = resp.json().get("data", {}).get("result", []) - except Exception as e: - logger.error("解析 VM 返回失败 query=%s: %s", query, e) - return [], [] - - if not result: - return [], [] - - values = result[0].get("values", []) - - ts = [] - ys = [] - - for item in values: - if len(item) < 2: - continue - - try: - t = float(item[0]) - y = float(item[1]) - except Exception: - continue - - if not math.isfinite(t) or not math.isfinite(y): - continue - - ts.append(t) - ys.append(y) - - return ts, ys - - -def normalize_history(ts: List[float], ys: List[float]) -> Tuple[np.ndarray, np.ndarray]: - if not ts or not ys or len(ts) != len(ys): - return np.array([]), np.array([]) - - data = {} - - for t, y in zip(ts, ys): - try: - sec = int(round(float(t))) - val = float(y) - except Exception: - continue - - if not math.isfinite(sec) or not math.isfinite(val): - continue - - data[sec] = val - - if not data: - return np.array([]), np.array([]) - - sorted_items = sorted(data.items(), key=lambda x: x[0]) - - ts_clean = np.array([x[0] for x in sorted_items], dtype=float) - ys_clean = np.array([x[1] for x in sorted_items], dtype=float) - - if len(ts_clean) < 2: - return ts_clean, ys_clean - - start_sec = int(ts_clean[0]) - end_sec = int(ts_clean[-1]) - - if end_sec <= start_sec: - return ts_clean, ys_clean - - ts_grid = np.arange(start_sec, end_sec + 1, 1, dtype=float) - ys_grid = np.interp(ts_grid, ts_clean, ys_clean) - - return ts_grid, ys_grid - - -# ============================================================================= -# 平滑与预处理 -# ============================================================================= - -def rolling_median(arr: np.ndarray, window: int) -> np.ndarray: - if window <= 1 or len(arr) < window: - return arr.astype(float) - - if window % 2 == 0: - window += 1 - - pad = window // 2 - padded = np.pad(arr.astype(float), (pad, pad), mode="edge") - - result = [] - - for i in range(len(arr)): - result.append(float(np.median(padded[i:i + window]))) - - return np.array(result, dtype=float) - - -def moving_average(arr: np.ndarray, window: int) -> np.ndarray: - if window <= 1 or len(arr) < window: - return arr.astype(float) - - if window % 2 == 0: - window += 1 - - kernel = np.ones(window, dtype=float) / window - pad = window // 2 - padded = np.pad(arr.astype(float), (pad, pad), mode="edge") - - return np.convolve(padded, kernel, mode="valid") - - -def preprocess_values(ys_grid: np.ndarray, target: Dict) -> np.ndarray: - strategy = target.get("strategy", "phase_point") - smooth_window = int(target.get("smooth_window", 1)) - - if strategy == "phase_band": - return rolling_median(ys_grid, smooth_window) - - if smooth_window > 1: - return moving_average(ys_grid, smooth_window) - - return ys_grid.astype(float) - - -# ============================================================================= -# 周期估计 -# ============================================================================= - -def estimate_period_by_fft(ys_arr: np.ndarray) -> float: - n = len(ys_arr) - - if n < 8: - return 60.0 - - centered = ys_arr - np.mean(ys_arr) - - if np.allclose(centered, 0): - return 60.0 - - fft_vals = np.fft.rfft(centered) - freqs = np.fft.rfftfreq(n, d=1.0) - - if len(freqs) <= 1: - return 60.0 - - power = np.abs(fft_vals[1:]) - - if len(power) == 0 or np.max(power) <= 0: - return 60.0 - - dominant_idx = int(np.argmax(power)) + 1 - dominant_freq = float(freqs[dominant_idx]) - - if dominant_freq <= 0: - return 60.0 - - period = 1.0 / dominant_freq - - return float(np.clip(period, MIN_PERIOD_SECONDS, MAX_PERIOD_SECONDS)) - - -def refine_period_by_autocorr(ys_arr: np.ndarray, init_period: float) -> float: - n = len(ys_arr) - - if n < 20: - return float(np.clip(init_period, MIN_PERIOD_SECONDS, MAX_PERIOD_SECONDS)) - - centered = ys_arr - np.mean(ys_arr) - - if np.allclose(centered, 0): - return float(np.clip(init_period, MIN_PERIOD_SECONDS, MAX_PERIOD_SECONDS)) - - corr = np.correlate(centered, centered, mode="full")[n - 1:] - - p0 = int(round(init_period)) - left = max(int(MIN_PERIOD_SECONDS), int(max(2, p0 * 0.7))) - right = min(n // 2, int(max(left + 1, p0 * 1.3))) - - if right <= left: - return float(np.clip(init_period, MIN_PERIOD_SECONDS, MAX_PERIOD_SECONDS)) - - search = corr[left:right + 1] - - if len(search) == 0: - return float(np.clip(init_period, MIN_PERIOD_SECONDS, MAX_PERIOD_SECONDS)) - - best_lag = left + int(np.argmax(search)) - - return float(np.clip(best_lag, MIN_PERIOD_SECONDS, MAX_PERIOD_SECONDS)) - - -def estimate_period_rough(ys_arr: np.ndarray) -> int: - p_fft = estimate_period_by_fft(ys_arr) - p_refined = refine_period_by_autocorr(ys_arr, p_fft) - - period = int(round(p_refined)) - period = max(int(MIN_PERIOD_SECONDS), min(int(MAX_PERIOD_SECONDS), period)) - - return int(period) - - -# ============================================================================= -# 谷底检测 -# ============================================================================= - -def find_valley_indices( - ts_grid: np.ndarray, - ys_grid: np.ndarray, - expected_period: int, -) -> List[int]: - n = len(ys_grid) - - if n < max(10, expected_period * 2): - return [] - - period = max(3, int(expected_period)) - smooth_window = max(3, int(round(period * 0.08))) - smooth_window = min(smooth_window, 21) - - ys_smooth = moving_average(ys_grid, smooth_window) - threshold = float(np.percentile(ys_smooth, VALLEY_QUANTILE)) - - candidates = [] - - for i in range(1, n - 1): - if ( - ys_smooth[i] <= ys_smooth[i - 1] - and ys_smooth[i] < ys_smooth[i + 1] - and ys_smooth[i] <= threshold - ): - candidates.append(i) - - if len(candidates) < MIN_FULL_CYCLES_FOR_TEMPLATE: - candidates = [] - - for i in range(1, n - 1): - if ys_smooth[i] <= ys_smooth[i - 1] and ys_smooth[i] < ys_smooth[i + 1]: - candidates.append(i) - - if not candidates: - return [] - - min_distance = max(2, int(round(period * 0.55))) - selected = [] - - for idx in candidates: - if not selected: - selected.append(idx) - continue - - if idx - selected[-1] >= min_distance: - selected.append(idx) - continue - - if ys_smooth[idx] < ys_smooth[selected[-1]]: - selected[-1] = idx - - if len(selected) < 2: - return selected - - cleaned = [selected[0]] - - for idx in selected[1:]: - diff = int(ts_grid[idx] - ts_grid[cleaned[-1]]) - - if int(period * 0.55) <= diff <= int(period * 1.60): - cleaned.append(idx) - continue - - if diff < int(period * 0.55): - if ys_smooth[idx] < ys_smooth[cleaned[-1]]: - cleaned[-1] = idx - continue - - cleaned.append(idx) - - return cleaned - - -def detect_period_and_valleys( - ts_grid: np.ndarray, - ys_grid: np.ndarray, -) -> Tuple[int, List[int]]: - rough = estimate_period_rough(ys_grid) - valleys = find_valley_indices(ts_grid, ys_grid, rough) - - if len(valleys) >= 3: - diffs = np.diff(ts_grid[valleys]) - good = diffs[(diffs >= rough * 0.55) & (diffs <= rough * 1.60)] - - if len(good) > 0: - period = int(round(float(np.median(good)))) - else: - period = rough - else: - period = rough - - period = max(int(MIN_PERIOD_SECONDS), min(int(MAX_PERIOD_SECONDS), period)) - - return int(period), valleys - - -# ============================================================================= -# 模板构建 -# ============================================================================= - -def build_templates_from_valleys( - ts_grid: np.ndarray, - ys_grid: np.ndarray, - period: int, - valleys: List[int], - target: Dict, -) -> Optional[Tuple[np.ndarray, np.ndarray, np.ndarray]]: - if period <= 1 or len(valleys) < MIN_FULL_CYCLES_FOR_TEMPLATE + 1: - return None - - strategy = target.get("strategy", "phase_point") - low_q = float(target.get("band_low_q", 10)) - high_q = float(target.get("band_high_q", 90)) - - pairs = [] - - for a, b in zip(valleys[:-1], valleys[1:]): - cycle_len = float(ts_grid[b] - ts_grid[a]) - - if period * 0.55 <= cycle_len <= period * 1.60: - pairs.append((a, b, cycle_len)) - - if len(pairs) < MIN_FULL_CYCLES_FOR_TEMPLATE: - return None - - pairs = pairs[-MAX_CYCLES_FOR_TEMPLATE:] - - phase_grid = np.arange(period, dtype=float) - segments = [] - weights = [] - - for idx, (a, b, cycle_len) in enumerate(pairs): - seg_ts = ts_grid[a:b + 1] - seg_y = ys_grid[a:b + 1] - - if len(seg_y) < 3: - continue - - x_old = (seg_ts - seg_ts[0]) / cycle_len * period - seg = np.interp(phase_grid, x_old, seg_y) - - segments.append(seg.astype(float)) - weights.append(0.5 + 0.5 * ((idx + 1) / len(pairs))) - - if len(segments) < MIN_FULL_CYCLES_FOR_TEMPLATE: - return None - - arr = np.vstack(segments) - w_arr = np.array(weights, dtype=float) - - if strategy == "phase_band": - mid_template = np.percentile(arr, 50, axis=0) - lower_template = np.percentile(arr, low_q, axis=0) - upper_template = np.percentile(arr, high_q, axis=0) - else: - mid_template = np.average(arr, axis=0, weights=w_arr) - lower_template = mid_template.copy() - upper_template = mid_template.copy() - - return ( - mid_template.astype(float), - lower_template.astype(float), - upper_template.astype(float), - ) - - -def build_current_baseline( - ts_grid: np.ndarray, - ys_grid: np.ndarray, - target: Dict, - tail_seconds: Optional[int] = None, -) -> Optional[Tuple[int, int, np.ndarray, np.ndarray, np.ndarray]]: - if len(ys_grid) < MIN_POINTS: - return None - - if tail_seconds is not None and tail_seconds > 0: - cutoff = ts_grid[-1] - int(tail_seconds) - mask = ts_grid >= cutoff - ts_use = ts_grid[mask] - ys_use = ys_grid[mask] - else: - ts_use = ts_grid - ys_use = ys_grid - - if len(ys_use) < MIN_POINTS: - return None - - period, valleys = detect_period_and_valleys(ts_use, ys_use) - - templates = build_templates_from_valleys( - ts_grid=ts_use, - ys_grid=ys_use, - period=period, - valleys=valleys, - target=target, - ) - - if templates is None or len(valleys) == 0: - return None - - template, lower_template, upper_template = templates - phase_origin_ts = int(round(float(ts_use[valleys[-1]]))) - - return int(period), phase_origin_ts, template, lower_template, upper_template - - -# ============================================================================= -# 模板预测 -# ============================================================================= - -def circular_template_value(template: np.ndarray, phase: float) -> float: - period = len(template) - - if period == 0: - return 0.0 - - phase = float(phase) % period - i0 = int(math.floor(phase)) % period - i1 = (i0 + 1) % period - frac = phase - math.floor(phase) - - return float((1.0 - frac) * template[i0] + frac * template[i1]) - - -def resample_template(old_template: np.ndarray, new_period: int) -> np.ndarray: - old_period = len(old_template) - - if old_period == new_period: - return old_template.astype(float) - - if old_period <= 1 or new_period <= 1: - return np.full(new_period, float(np.mean(old_template)), dtype=float) - - old_x = np.linspace(0.0, 1.0, old_period, endpoint=False) - new_x = np.linspace(0.0, 1.0, new_period, endpoint=False) - - old_x_ext = np.concatenate([old_x - 1.0, old_x, old_x + 1.0]) - old_y_ext = np.concatenate([old_template, old_template, old_template]) - - return np.interp(new_x, old_x_ext, old_y_ext).astype(float) - - -def predict_template_values( - template: np.ndarray, - period: int, - phase_origin_ts: int, - ts_list: List[int], -) -> np.ndarray: - if period <= 1: - return np.zeros(len(ts_list), dtype=float) - - if len(template) != period: - template = resample_template(template, period) - - values = [] - - for ts in ts_list: - phase = (int(ts) - int(phase_origin_ts)) % period - values.append(circular_template_value(template, phase)) - - return np.array(values, dtype=float) - - -def predict_state_bundle( - state: BaselineState, - ts_list: List[int], -) -> Tuple[np.ndarray, np.ndarray, np.ndarray]: - period = int(state.period) - origin = int(state.phase_origin_ts) - - mid = predict_template_values( - template=np.array(state.template, dtype=float), - period=period, - phase_origin_ts=origin, - ts_list=ts_list, - ) - - lower = predict_template_values( - template=np.array(state.lower_template, dtype=float), - period=period, - phase_origin_ts=origin, - ts_list=ts_list, - ) - - upper = predict_template_values( - template=np.array(state.upper_template, dtype=float), - period=period, - phase_origin_ts=origin, - ts_list=ts_list, - ) - - return mid, lower, upper - - -def normalize_origin_near(origin: int, period: int, near_ts: int) -> int: - if period <= 1: - return origin - - origin = int(origin) - period = int(period) - near_ts = int(near_ts) - - while origin + period <= near_ts: - origin += period - - while origin > near_ts: - origin -= period - - return origin - - -def merge_template( - old_template: np.ndarray, - new_template: np.ndarray, - alpha: float, -) -> np.ndarray: - alpha = float(np.clip(alpha, 0.0, 1.0)) - - if len(old_template) != len(new_template): - old_template = resample_template(old_template, len(new_template)) - - merged = (1.0 - alpha) * old_template + alpha * new_template - - return merged.astype(float) - - -# ============================================================================= -# Phase Lock -# ============================================================================= - -def phase_lock_recent( - state: BaselineState, - ts_grid: np.ndarray, - ys_model: np.ndarray, -) -> Tuple[int, int, np.ndarray, float]: - base_period = int(state.period) - base_origin = int(state.phase_origin_ts) - base_template = np.array(state.template, dtype=float) - - if base_period <= 1 or len(base_template) <= 1: - ts_recent = ts_grid[-DETECT_WINDOW_SECONDS:].astype(int).tolist() - pred = predict_template_values(base_template, base_period, base_origin, ts_recent) - actual = ys_model[-len(ts_recent):].astype(float) - mae = float(np.mean(np.abs(actual - pred))) if len(actual) else 0.0 - return base_period, base_origin, pred, mae - - window_seconds = max( - PHASE_LOCK_MIN_WINDOW_SECONDS, - min(PHASE_LOCK_MAX_WINDOW_SECONDS, int(base_period * 2)), - ) - - cutoff = ts_grid[-1] - window_seconds - mask = ts_grid >= cutoff - - ts_recent_arr = ts_grid[mask].astype(int) - actual = ys_model[mask].astype(float) - - if len(ts_recent_arr) < max(10, DETECT_WINDOW_SECONDS): - ts_recent_arr = ts_grid[-DETECT_WINDOW_SECONDS:].astype(int) - actual = ys_model[-DETECT_WINDOW_SECONDS:].astype(float) - - ts_recent = ts_recent_arr.tolist() - last_ts = int(ts_recent[-1]) - - p_min = max( - int(MIN_PERIOD_SECONDS), - int(round(base_period * (1.0 - PHASE_LOCK_PERIOD_SEARCH_RATIO))), - ) - p_max = min( - int(MAX_PERIOD_SECONDS), - int(round(base_period * (1.0 + PHASE_LOCK_PERIOD_SEARCH_RATIO))), - ) - - best_period = base_period - best_origin = normalize_origin_near(base_origin, base_period, last_ts) - best_template = resample_template(base_template, best_period) - - best_pred = predict_template_values( - template=best_template, - period=best_period, - phase_origin_ts=best_origin, - ts_list=ts_recent, - ) - - best_mae = float(np.mean(np.abs(actual - best_pred))) - - for period in range(p_min, p_max + 1, PHASE_LOCK_PERIOD_STEP): - template = resample_template(base_template, period) - center_origin = normalize_origin_near(base_origin, period, last_ts) - origin_shift = max(2, int(round(period * PHASE_LOCK_ORIGIN_SEARCH_RATIO))) - - for shift in range(-origin_shift, origin_shift + 1, PHASE_LOCK_ORIGIN_STEP): - origin = center_origin + shift - - pred = predict_template_values( - template=template, - period=period, - phase_origin_ts=origin, - ts_list=ts_recent, - ) - - mae = float(np.mean(np.abs(actual - pred))) - penalty = abs(period - base_period) * 0.5 - score = mae + penalty - - best_score = best_mae + abs(best_period - base_period) * 0.5 - - if score < best_score: - best_period = period - best_origin = origin - best_pred = pred - best_mae = mae - - best_origin = normalize_origin_near(best_origin, best_period, last_ts) - - return int(best_period), int(best_origin), best_pred, float(best_mae) - - -# ============================================================================= -# 异常检测 -# ============================================================================= - -def calc_point_bounds( - pred: np.ndarray, - abs_threshold: float, - rel_threshold: float, -) -> Tuple[np.ndarray, np.ndarray]: - threshold = np.maximum(abs_threshold, np.abs(pred) * rel_threshold) - return pred - threshold, pred + threshold - - -def calc_final_bounds( - state: BaselineState, - pred: np.ndarray, - lower_raw: np.ndarray, - upper_raw: np.ndarray, - target: Dict, -) -> Tuple[np.ndarray, np.ndarray]: - strategy = target.get("strategy", "phase_point") - abs_threshold = float(target.get("abs_threshold", 1.0)) - rel_threshold = float(target.get("rel_threshold", 0.25)) - - if strategy == "phase_band": - pad_abs = float(target.get("band_pad_abs", abs_threshold)) - dynamic_pad = np.maximum(pad_abs, np.abs(pred) * rel_threshold * 0.20) - lower = lower_raw - dynamic_pad - upper = upper_raw + dynamic_pad - return lower, upper - - return calc_point_bounds(pred, abs_threshold, rel_threshold) - - -def detect_anomaly( - state: BaselineState, - ts_grid: np.ndarray, - ys_model: np.ndarray, - target: Dict, -) -> Tuple[bool, float, float, float, int, int]: - best_period, best_origin, pred_recent, _ = phase_lock_recent( - state=state, - ts_grid=ts_grid, - ys_model=ys_model, - ) - - recent_len = len(pred_recent) - - if recent_len <= 0: - return False, 0.0, 0.0, 0.0, best_period, best_origin - - actual = ys_model[-recent_len:].astype(float) - - tmp_state = BaselineState( - period=best_period, - phase_origin_ts=best_origin, - template=state.template, - lower_template=state.lower_template, - upper_template=state.upper_template, - strategy=state.strategy, - status=state.status, - clean_seconds=state.clean_seconds, - last_update_ts=state.last_update_ts, - last_seen_ts=state.last_seen_ts, - y_min=state.y_min, - y_max=state.y_max, - ) - - recent_ts = ts_grid[-recent_len:].astype(int).tolist() - pred, lower_raw, upper_raw = predict_state_bundle(tmp_state, recent_ts) - - lower, upper = calc_final_bounds( - state=tmp_state, - pred=pred, - lower_raw=lower_raw, - upper_raw=upper_raw, - target=target, - ) - - outside = (actual < lower) | (actual > upper) - abs_err = np.abs(actual - pred) - - outside_ratio = float(np.mean(outside)) - mean_abs_err = float(np.mean(abs_err)) - mean_rel_err = float(np.mean(abs_err / np.maximum(np.abs(pred), 1e-6))) - - is_anomaly = outside_ratio >= OUTSIDE_RATIO_THRESHOLD - - return ( - is_anomaly, - outside_ratio, - mean_abs_err, - mean_rel_err, - int(best_period), - int(best_origin), - ) - - -# ============================================================================= -# 状态管理 -# ============================================================================= - -def create_initial_state( - ts_grid: np.ndarray, - ys_model: np.ndarray, - target: Dict, - now_sec: int, -) -> Optional[BaselineState]: - baseline = build_current_baseline( - ts_grid=ts_grid, - ys_grid=ys_model, - target=target, - ) - - if baseline is None: - return None - - period, phase_origin_ts, template, lower_template, upper_template = baseline - - return BaselineState( - period=int(period), - phase_origin_ts=int(phase_origin_ts), - template=template.astype(float).tolist(), - lower_template=lower_template.astype(float).tolist(), - upper_template=upper_template.astype(float).tolist(), - strategy=str(target.get("strategy", "phase_point")), - status=BASELINE_STATUS_HEALTHY, - clean_seconds=int(period * MAX_CYCLES_FOR_TEMPLATE), - last_update_ts=now_sec, - last_seen_ts=now_sec, - y_min=float(np.min(ys_model)), - y_max=float(np.max(ys_model)), - ) - - -def apply_phase_lock_to_state( - state: BaselineState, - best_period: int, - best_origin: int, -) -> None: - best_period = int(best_period) - - if best_period <= 1: - return - - if len(state.template) != best_period: - state.template = resample_template( - np.array(state.template, dtype=float), - best_period, - ).astype(float).tolist() - - if len(state.lower_template) != best_period: - state.lower_template = resample_template( - np.array(state.lower_template, dtype=float), - best_period, - ).astype(float).tolist() - - if len(state.upper_template) != best_period: - state.upper_template = resample_template( - np.array(state.upper_template, dtype=float), - best_period, - ).astype(float).tolist() - - state.period = best_period - state.phase_origin_ts = int(best_origin) - - -def maybe_update_state( - key: str, - ts_grid: np.ndarray, - ys_model: np.ndarray, - target: Dict, -) -> Tuple[Optional[BaselineState], bool, float, float, float]: - now_sec = int(time.time()) - state = BASELINE_STATES.get(key) - - if state is None: - state = create_initial_state( - ts_grid=ts_grid, - ys_model=ys_model, - target=target, - now_sec=now_sec, - ) - - if state is None: - return None, False, 0.0, 0.0, 0.0 - - BASELINE_STATES[key] = state - - logger.info( - "初始化健康模板 key=%s strategy=%s period=%ss origin=%s clean=%ss", - key, - state.strategy, - state.period, - datetime.fromtimestamp(state.phase_origin_ts).strftime("%H:%M:%S"), - state.clean_seconds, - ) - - return state, False, 0.0, 0.0, 0.0 - - elapsed = max(1, now_sec - int(state.last_seen_ts)) - elapsed = min(elapsed, POLL_INTERVAL * 2) - state.last_seen_ts = now_sec - - ( - is_anomaly, - outside_ratio, - mean_abs_err, - mean_rel_err, - best_period, - best_origin, - ) = detect_anomaly( - state=state, - ts_grid=ts_grid, - ys_model=ys_model, - target=target, - ) - - if is_anomaly: - state.status = BASELINE_STATUS_ANOMALY - state.clean_seconds = 0 - BASELINE_STATES[key] = state - - logger.warning( - "检测到异常,冻结模板 key=%s outside_ratio=%.2f mean_abs_err=%.4f mean_rel_err=%.4f", - key, - outside_ratio, - mean_abs_err, - mean_rel_err, - ) - - return state, True, outside_ratio, mean_abs_err, mean_rel_err - - old_period = int(state.period) - old_origin = int(state.phase_origin_ts) - - apply_phase_lock_to_state(state, best_period, best_origin) - - if old_period != state.period or old_origin != state.phase_origin_ts: - logger.info( - "phase-lock key=%s period %s -> %s origin %s -> %s", - key, - old_period, - state.period, - datetime.fromtimestamp(old_origin).strftime("%H:%M:%S"), - datetime.fromtimestamp(state.phase_origin_ts).strftime("%H:%M:%S"), - ) - - if state.status == BASELINE_STATUS_ANOMALY: - state.status = BASELINE_STATUS_RECOVERING - state.clean_seconds = elapsed - BASELINE_STATES[key] = state - - logger.info( - "异常开始恢复 key=%s clean_seconds=%ss", - key, - state.clean_seconds, - ) - - return state, False, outside_ratio, mean_abs_err, mean_rel_err - - if state.status == BASELINE_STATUS_RECOVERING: - state.clean_seconds += elapsed - else: - state.status = BASELINE_STATUS_HEALTHY - state.clean_seconds += elapsed - - min_clean_for_update = max( - RECOVERY_MIN_SECONDS, - int(state.period) * MIN_FULL_CYCLES_FOR_TEMPLATE, - ) - - if state.clean_seconds < min_clean_for_update: - BASELINE_STATES[key] = state - return state, False, outside_ratio, mean_abs_err, mean_rel_err - - tail_seconds = min( - int(state.clean_seconds), - int(state.period) * MAX_CYCLES_FOR_TEMPLATE, - ) - - baseline = build_current_baseline( - ts_grid=ts_grid, - ys_grid=ys_model, - target=target, - tail_seconds=tail_seconds, - ) - - if baseline is None: - BASELINE_STATES[key] = state - return state, False, outside_ratio, mean_abs_err, mean_rel_err - - new_period, new_origin, new_template, new_lower_template, new_upper_template = baseline - - alpha = RECOVERY_EMA_ALPHA if state.status == BASELINE_STATUS_RECOVERING else HEALTHY_EMA_ALPHA - - state.template = merge_template( - np.array(state.template, dtype=float), - new_template, - alpha, - ).astype(float).tolist() - - state.lower_template = merge_template( - np.array(state.lower_template, dtype=float), - new_lower_template, - alpha, - ).astype(float).tolist() - - state.upper_template = merge_template( - np.array(state.upper_template, dtype=float), - new_upper_template, - alpha, - ).astype(float).tolist() - - state.period = int(new_period) - state.phase_origin_ts = int(new_origin) - state.status = BASELINE_STATUS_HEALTHY - state.last_update_ts = now_sec - - if tail_seconds > 0 and len(ys_model) >= tail_seconds: - state.y_min = float(np.min(ys_model[-tail_seconds:])) - state.y_max = float(np.max(ys_model[-tail_seconds:])) - else: - state.y_min = float(np.min(ys_model)) - state.y_max = float(np.max(ys_model)) - - BASELINE_STATES[key] = state - - logger.info( - "更新健康模板 key=%s strategy=%s period=%ss origin=%s clean=%ss alpha=%.2f", - key, - state.strategy, - state.period, - datetime.fromtimestamp(state.phase_origin_ts).strftime("%H:%M:%S"), - state.clean_seconds, - alpha, - ) - - return state, False, outside_ratio, mean_abs_err, mean_rel_err - - -# ============================================================================= -# Prometheus 写入 -# ============================================================================= - -def prom_escape_label_value(value: str) -> str: - return ( - str(value) - .replace("\\", "\\\\") - .replace("\n", "\\n") - .replace('"', '\\"') - ) - - -def labels_to_str(labels: Dict[str, str]) -> str: - if not labels: - return "" - - parts = [] - - for k in sorted(labels.keys()): - parts.append(f'{k}="{prom_escape_label_value(labels[k])}"') - - return "{" + ",".join(parts) + "}" - - -def write_series( - metric_name: str, - labels: Dict[str, str], - ts_list: List[int], - values: List[float], -) -> bool: - if not ts_list or not values or len(ts_list) != len(values): - return False - - label_str = labels_to_str(labels) - lines = [] - - for t, y in zip(ts_list, values): - try: - ts_sec = int(round(float(t))) - val = float(y) - except Exception: - continue - - if not math.isfinite(ts_sec) or not math.isfinite(val): - continue - - lines.append(f"{metric_name}{label_str} {val:.6f} {ts_sec * 1000}") - - if not lines: - return False - - payload = "\n".join(lines) + "\n" - - try: - resp = requests.post( - f"{VM_URL}/api/v1/import/prometheus", - data=payload.encode("utf-8"), - headers={"Content-Type": "text/plain; version=0.0.4; charset=utf-8"}, - timeout=10, - ) - resp.raise_for_status() - return True - - except requests.RequestException as e: - logger.error("写入数据失败 metric=%s: %s", metric_name, e) - return False - - -def write_prediction_bundle( - pred_metric: str, - anomaly_metric: str, - labels: Dict[str, str], - ts_future: List[int], - pred_values: np.ndarray, - lower_values: np.ndarray, - upper_values: np.ndarray, - is_anomaly: bool, - outside_ratio: float, - mean_abs_err: float, - mean_rel_err: float, - event_ts: int, -) -> bool: - ok1 = write_series( - metric_name=pred_metric, - labels=labels, - ts_list=ts_future, - values=pred_values.astype(float).tolist(), - ) - - ok2 = write_series( - metric_name=f"{pred_metric}_lower", - labels=labels, - ts_list=ts_future, - values=lower_values.astype(float).tolist(), - ) - - ok3 = write_series( - metric_name=f"{pred_metric}_upper", - labels=labels, - ts_list=ts_future, - values=upper_values.astype(float).tolist(), - ) - - anomaly_labels = dict(labels) - anomaly_labels["type"] = "prediction_deviation" - - ok4 = write_series( - metric_name=anomaly_metric, - labels=anomaly_labels, - ts_list=[event_ts], - values=[1.0 if is_anomaly else 0.0], - ) - - ok5 = write_series( - metric_name=f"{anomaly_metric}_outside_ratio", - labels=anomaly_labels, - ts_list=[event_ts], - values=[outside_ratio], - ) - - ok6 = write_series( - metric_name=f"{anomaly_metric}_mean_abs_error", - labels=anomaly_labels, - ts_list=[event_ts], - values=[mean_abs_err], - ) - - ok7 = write_series( - metric_name=f"{anomaly_metric}_mean_rel_error", - labels=anomaly_labels, - ts_list=[event_ts], - values=[mean_rel_err], - ) - - return ok1 and ok2 and ok3 and ok4 and ok5 and ok6 and ok7 - - -# ============================================================================= -# 标签解析 -# ============================================================================= - -_LABEL_PATTERN = re.compile( - r'\s*([a-zA-Z_][a-zA-Z0-9_]*)\s*=\s*"((?:\\.|[^"])*)"\s*' -) - - -def parse_labels_from_query(query: str) -> Dict[str, str]: - labels = {} - - if "{" not in query or "}" not in query: - return labels - - try: - label_part = query[query.index("{") + 1:query.rindex("}")] - except Exception: - return labels - - for match in _LABEL_PATTERN.finditer(label_part): - key = match.group(1) - value = match.group(2) - - value = ( - value - .replace('\\"', '"') - .replace("\\n", "\n") - .replace("\\\\", "\\") - ) - - labels[key] = value - - return labels - - -def merge_labels(*dicts: Dict[str, str]) -> Dict[str, str]: - result = {} - - for d in dicts: - if d: - result.update(d) - - return result - - -def series_key(metric_name: str, labels: Dict[str, str]) -> str: - return metric_name + labels_to_str(labels) - - -# ============================================================================= -# 状态持久化 -# ============================================================================= - -def load_state() -> None: - global BASELINE_STATES - - if not os.path.exists(STATE_FILE): - return - - try: - with open(STATE_FILE, "r", encoding="utf-8") as f: - raw = json.load(f) - - states = {} - - for key, value in raw.get("baseline_states", {}).items(): - required_fields = { - "period", - "phase_origin_ts", - "template", - "lower_template", - "upper_template", - "strategy", - "status", - "clean_seconds", - "last_update_ts", - "last_seen_ts", - "y_min", - "y_max", - } - - if not required_fields.issubset(set(value.keys())): - continue - - states[key] = BaselineState(**value) - - BASELINE_STATES = states - - logger.info( - "已加载预测状态文件 %s,状态数量=%d", - STATE_FILE, - len(BASELINE_STATES), - ) - - except Exception as e: - logger.warning("加载预测状态文件失败,将重新学习: %s", e) - - -def save_state() -> None: - try: - raw = { - "baseline_states": { - key: asdict(value) - for key, value in BASELINE_STATES.items() - } - } - - tmp_file = STATE_FILE + ".tmp" - - with open(tmp_file, "w", encoding="utf-8") as f: - json.dump(raw, f, ensure_ascii=False, indent=2) - - os.replace(tmp_file, STATE_FILE) - - except Exception as e: - logger.warning("保存预测状态文件失败: %s", e) - - -# ============================================================================= -# 时间轴 -# ============================================================================= - -def build_prediction_timestamps( - key: str, - last_real_ts: int, - now_sec: int, -) -> Optional[List[int]]: - data_lag = now_sec - last_real_ts - - if data_lag > MAX_DATA_LAG_SECONDS: - logger.warning( - "真实数据延迟过大,跳过预测 key=%s data_lag=%ss max=%ss", - key, - data_lag, - MAX_DATA_LAG_SECONDS, - ) - return None - - last_written_real_ts = LAST_REAL_TS_WRITTEN.get(key) - - if last_written_real_ts is not None and last_real_ts <= int(last_written_real_ts): - logger.info( - "真实数据时间戳未推进,跳过重复写入 key=%s last_real_ts=%s last_written_real_ts=%s", - key, - last_real_ts, - last_written_real_ts, - ) - return None - - base_ts = last_real_ts - - return [ - base_ts + i + 1 - for i in range(WRITE_HORIZON_SECONDS) - ] - - -# ============================================================================= -# 主流程 -# ============================================================================= - -def run_once() -> None: - now_str = datetime.now().strftime("%H:%M:%S") - - for target in PREDICT_TARGETS: - query = target["query"] - pred_metric = target["pred_metric"] - anomaly_metric = target["anomaly_metric"] - - ts, ys = fetch_history(query) - - if len(ys) < MIN_POINTS: - logger.info("[%s] %s 数据不足(%d 点),跳过", now_str, query, len(ys)) - continue - - ts_grid, ys_grid_raw = normalize_history(ts, ys) - - if len(ys_grid_raw) < MIN_POINTS: - logger.info("[%s] %s 清洗后数据不足(%d 点),跳过", now_str, query, len(ys_grid_raw)) - continue - - ys_grid_model = preprocess_values(ys_grid_raw, target) - - base_labels = parse_labels_from_query(query) - write_labels = merge_labels(base_labels, EXTRA_PREDICT_LABELS) - - key = series_key(pred_metric, write_labels) - - state, is_anomaly, outside_ratio, mean_abs_err, mean_rel_err = maybe_update_state( - key=key, - ts_grid=ts_grid, - ys_model=ys_grid_model, - target=target, - ) - - if state is None: - logger.info("[%s] %s 暂无可用健康模板,等待学习", now_str, query) - continue - - now_sec = int(time.time()) - last_real_ts = int(ts_grid[-1]) - data_lag = now_sec - last_real_ts - - ts_future = build_prediction_timestamps( - key=key, - last_real_ts=last_real_ts, - now_sec=now_sec, - ) - - if not ts_future: - continue - - pred_values, lower_raw, upper_raw = predict_state_bundle(state, ts_future) - - lower_values, upper_values = calc_final_bounds( - state=state, - pred=pred_values, - lower_raw=lower_raw, - upper_raw=upper_raw, - target=target, - ) - - ok = write_prediction_bundle( - pred_metric=pred_metric, - anomaly_metric=anomaly_metric, - labels=write_labels, - ts_future=ts_future, - pred_values=pred_values, - lower_values=lower_values, - upper_values=upper_values, - is_anomaly=is_anomaly, - outside_ratio=outside_ratio, - mean_abs_err=mean_abs_err, - mean_rel_err=mean_rel_err, - event_ts=last_real_ts, - ) - - if not ok: - logger.error("[%s] %s 写入预测数据失败", now_str, query) - continue - - LAST_REAL_TS_WRITTEN[key] = last_real_ts - - future_start = datetime.fromtimestamp(ts_future[0]).strftime("%H:%M:%S") - future_end = datetime.fromtimestamp(ts_future[-1]).strftime("%H:%M:%S") - last_real_str = datetime.fromtimestamp(last_real_ts).strftime("%H:%M:%S") - origin_str = datetime.fromtimestamp(state.phase_origin_ts).strftime("%H:%M:%S") - - logger.info( - "[%s] %-40s → %-35s strategy=%s status=%s anomaly=%s period=%ss origin=%s last_real=%s lag=%ss 写入 %d 点,预测区间 %s ~ %s", - now_str, - query, - pred_metric, - state.strategy, - state.status, - is_anomaly, - state.period, - origin_str, - last_real_str, - data_lag, - len(ts_future), - future_start, - future_end, - ) - - save_state() - - -def main() -> None: - load_state() - - logger.info( - "预测服务启动 VM=%s 历史窗口=%dmin 理论预测窗口=%ds 实际写入窗口=%ds 轮询间隔=%ds state=%s forecast=%s", - VM_URL, - HISTORY_MINUTES, - HORIZON_SECONDS, - WRITE_HORIZON_SECONDS, - POLL_INTERVAL, - STATE_FILE, - EXTRA_PREDICT_LABELS["forecast"], - ) - - while True: - run_once() - time.sleep(POLL_INTERVAL) - - -if __name__ == "__main__": - main() \ No newline at end of file diff --git a/ai/pridict_v5.py b/ai/pridict_v5.py index dde0b11..221310b 100644 --- a/ai/pridict_v5.py +++ b/ai/pridict_v5.py @@ -355,6 +355,140 @@ def refresh_targets_if_needed() -> None: logger.warning("发现流程未产生任何有效目标,保持现有目标列表") +EXTRA_PREDICT_LABELS = { + "forecast": "phase_band_health_v13", + "source": "protoforge", +} + +# ============================================================================= +# 仿真感知策略覆盖 +# 基于模板仿真算法的特征,对特定指标强制覆盖自动推断的策略与参数。 +# +# 粗铣(fanuc-cnc) 周期约 180s 含随机抖动 ±8~10s: +# - feed_rate : 双频拐角扰动(含 sin 叠加),强制 phase_band +# - spindle_current : 双频漂移,强制 phase_band +# - spindle_load : 三频漂移,强制 phase_band +# - phase_lock : 周期搜索范围扩至 ±18%(覆盖抖动 + 相位偏移) +# +# 半精铣(fanuc-cnc-semi-finish) / 精铣(fanuc-cnc-finish) 周期固定: +# - spindle_load : 噪声较大,强制 phase_band +# - 其余指标保持自动推断 +# ============================================================================= + +# 按 device_id 片段 + metric 名称匹配的策略覆盖表 +# 格式: (device_id_substring, metric) -> {overrides} +_SIMULATION_STRATEGY_OVERRIDES: List[Tuple[str, str, Dict]] = [ + # ── 粗铣工位 ────────────────────────────────────────────────────────────── + # feed_rate: 双频拐角扰动幅值大(±80mm/min),phase_band + 宽搜索 + ("fanuc-cnc", "feed_rate", { + "strategy": "phase_band", + "band_low_q": 5.0, + "band_high_q": 95.0, + "band_pad_abs": 40.0, + "phase_lock_period_search_ratio": 0.18, + "phase_lock_origin_search_ratio": 0.45, + "smooth_window": 5, + }), + # spindle_current: 双频漂移(约 ±1.5A),phase_band + ("fanuc-cnc", "spindle_current", { + "strategy": "phase_band", + "band_low_q": 5.0, + "band_high_q": 95.0, + "band_pad_abs": 2.5, + "phase_lock_period_search_ratio": 0.18, + "phase_lock_origin_search_ratio": 0.45, + "smooth_window": 5, + }), + # spindle_load: 三频漂移(约 ±8%),phase_band + ("fanuc-cnc", "spindle_load", { + "strategy": "phase_band", + "band_low_q": 5.0, + "band_high_q": 95.0, + "band_pad_abs": 6.0, + "phase_lock_period_search_ratio": 0.18, + "phase_lock_origin_search_ratio": 0.45, + "smooth_window": 5, + }), + # spindle_speed: 周期抖动大,扩大搜索范围(策略保持自动推断) + ("fanuc-cnc", "spindle_speed", { + "phase_lock_period_search_ratio": 0.18, + "phase_lock_origin_search_ratio": 0.45, + }), + # ── 半精铣工位 ──────────────────────────────────────────────────────────── + # spindle_load: gauss(2.5) 噪声较大,phase_band + ("fanuc-cnc-semi-finish", "spindle_load", { + "strategy": "phase_band", + "band_low_q": 5.0, + "band_high_q": 95.0, + "band_pad_abs": 4.0, + "smooth_window": 5, + }), + # spindle_current: gauss(0.9),偏稳定,但保留 phase_band 以容忍切入峰值 + ("fanuc-cnc-semi-finish", "spindle_current", { + "strategy": "phase_band", + "band_low_q": 5.0, + "band_high_q": 95.0, + "band_pad_abs": 2.0, + "smooth_window": 3, + }), + # ── 精铣工位 ────────────────────────────────────────────────────────────── + # spindle_load: gauss(1.5),切入有峰值,phase_band + ("fanuc-cnc-finish", "spindle_load", { + "strategy": "phase_band", + "band_low_q": 5.0, + "band_high_q": 95.0, + "band_pad_abs": 3.0, + "smooth_window": 5, + }), + # spindle_current: 切入峰值 11A vs 稳态 8.5A,phase_band + ("fanuc-cnc-finish", "spindle_current", { + "strategy": "phase_band", + "band_low_q": 5.0, + "band_high_q": 95.0, + "band_pad_abs": 1.5, + "smooth_window": 3, + }), +] + + +def _apply_simulation_overrides(target: Dict, device_id: str) -> Dict: + """ + 根据仿真感知覆盖表,对 target dict 应用策略和参数覆盖。 + 匹配规则:device_id 包含指定子串 且 metric 名称匹配。 + 粗铣工位的 device_id 通常含 'fanuc-cnc' 但不含 'semi-finish'/'finish', + 因此半精铣/精铣规则放在粗铣规则之后(更具体的子串先匹配)。 + """ + # 从 pred_metric 还原 metric 名(格式:xxx_predicted) + pred_metric = target.get("pred_metric", "") + metric = pred_metric.replace("_predicted", "") if pred_metric.endswith("_predicted") else "" + + if not metric: + return target + + # 按顺序匹配——更具体的子串(semi-finish/finish)应排在 fanuc-cnc 前面, + # 但在覆盖表中我们已经将半精铣/精铣规则放在粗铣规则之后,通过子串包含顺序 + # 保证精确匹配:semi-finish 不会匹配纯 "fanuc-cnc" 的规则,因为设备 ID + # 是完整字符串,检查如下——对于 fanuc-cnc 规则,额外排除含 semi/finish 的设备。 + applied = dict(target) + for device_substr, rule_metric, overrides in _SIMULATION_STRATEGY_OVERRIDES: + if rule_metric != metric: + continue + # 粗铣规则(substr == "fanuc-cnc")不应命中半精铣/精铣设备 + if device_substr == "fanuc-cnc" and ( + "semi-finish" in device_id or "finish" in device_id + ): + continue + if device_substr in device_id: + applied.update(overrides) + logger.debug( + "仿真策略覆盖 device=%s metric=%s overrides=%s", + device_id, metric, list(overrides.keys()), + ) + break + + return applied + + BASELINE_STATUS_HEALTHY = "healthy" BASELINE_STATUS_ANOMALY = "anomaly" BASELINE_STATUS_RECOVERING = "recovering" @@ -1848,9 +1982,15 @@ def run_once() -> None: logger.info("[%s] %s 清洗后数据不足(%d 点),跳过", now_str, query, len(ys_grid_raw)) continue - ys_grid_model = preprocess_values(ys_grid_raw, target) - base_labels = parse_labels_from_query(query) + + # 根据仿真算法特征,对 feed_rate / spindle_current / spindle_load 等指标 + # 应用感知覆盖(强制 phase_band、扩大粗铣搜索范围等) + device_id_from_labels = base_labels.get("device_id", "") + effective_target = _apply_simulation_overrides(target, device_id_from_labels) + + ys_grid_model = preprocess_values(ys_grid_raw, effective_target) + write_labels = merge_labels(base_labels, EXTRA_PREDICT_LABELS) key = series_key(pred_metric, write_labels) @@ -1868,7 +2008,7 @@ def run_once() -> None: ts_grid=ts_grid, ys_model=ys_grid_model, ys_actual=ys_grid_raw, - target=target, + target=effective_target, ) if state is None: @@ -1895,7 +2035,7 @@ def run_once() -> None: pred=pred_values, lower_raw=lower_raw, upper_raw=upper_raw, - target=target, + target=effective_target, ) ok = write_prediction_bundle( diff --git a/protoforge/core/cnc_metric_generator.py b/protoforge/core/cnc_metric_generator.py new file mode 100644 index 0000000..329f56e --- /dev/null +++ b/protoforge/core/cnc_metric_generator.py @@ -0,0 +1,661 @@ +""" +CNC 车床正常加工状态时序数据生成算法 +===================================== + +设计原则: + - 所有指标由切削强度 cutting_intensity 统一驱动,禁止各自独立随机。 + - 热惯性模型:tool_temperature 使用一阶 RC 滤波,alpha ≈ 0.04/tick。 + - 电流滞后:spindle_current 对 spindle_load 有 1~3 tick 的一阶滞后。 + - 磨损单调:tool_wear_value 在切削阶段只增不减。 + - 噪声比例:roughing > semi_finishing > finishing,稳定性反向。 + - 纯 Python 标准库实现,无第三方依赖。 + +用法: + generator = BaseMetricGenerator() + frame = generator.generate(t=0.0, dt=1.0, stage="roughing") +""" + +from __future__ import annotations + +import math +import random +from dataclasses import dataclass, field +from typing import Optional + + +# --------------------------------------------------------------------------- +# 数据结构 +# --------------------------------------------------------------------------- + +@dataclass +class StageProfile: + """某个加工阶段的工艺参数范围及行为特征。""" + stage: str + + feed_rate_min: float + feed_rate_max: float + spindle_speed_min: float + spindle_speed_max: float + spindle_current_min: float + spindle_current_max: float + spindle_load_min: float + spindle_load_max: float + vibration_min: float + vibration_max: float + acoustic_min: float + acoustic_max: float + temperature_min: float + temperature_max: float + surface_roughness_min: float + surface_roughness_max: float + + # 每 tick 磨损增量的阶段系数(idle/tool_change = 0) + wear_rate_factor: float + # 稳定性因子:越高噪声越小,finishing=0.95,roughing=0.6 + stability_factor: float + + # 衍生属性 + @property + def feed_rate_mid(self) -> float: + return (self.feed_rate_min + self.feed_rate_max) / 2 + + @property + def spindle_speed_mid(self) -> float: + return (self.spindle_speed_min + self.spindle_speed_max) / 2 + + @property + def spindle_load_mid(self) -> float: + return (self.spindle_load_min + self.spindle_load_max) / 2 + + @property + def vibration_mid(self) -> float: + return (self.vibration_min + self.vibration_max) / 2 + + @property + def acoustic_mid(self) -> float: + return (self.acoustic_min + self.acoustic_max) / 2 + + @property + def temperature_mid(self) -> float: + return (self.temperature_min + self.temperature_max) / 2 + + @property + def surface_roughness_mid(self) -> float: + return (self.surface_roughness_min + self.surface_roughness_max) / 2 + + +@dataclass +class MetricFrame: + """单个 tick 产出的所有指标快照。""" + timestamp: float + stage: str + + feed_rate: float # mm/min + spindle_speed: float # RPM + spindle_current: float # A + spindle_load: float # % + vibration_x: float # mm/s + vibration_y: float # mm/s + vibration_z: float # mm/s + acoustic_emission: float # V(声发射传感器电压,代表强度) + tool_temperature: float # °C + surface_roughness: float # μm Ra + tool_wear_value: float # μm(累积磨损量) + + +@dataclass +class GeneratorState: + """跨 tick 需要持久化的生成器内部状态。""" + # 材料扰动随机游走值(慢变量,[-0.05, +0.05]) + material_random_walk: float = 0.0 + # 热状态(tool_temperature 的平滑变量) + thermal_state: float = 28.0 + # 刀具累积磨损(μm,单调不减) + tool_wear_accumulated: float = 0.0 + # 上一 tick 的 spindle_load(用于电流滞后计算) + last_spindle_load: float = 0.0 + # 滞后缓冲区(最多保存 3 tick 历史) + load_lag_buffer: list = field(default_factory=lambda: [0.0, 0.0, 0.0]) + # 上一 tick 的 surface_roughness(idle 阶段保持上次值) + last_surface_roughness: float = 1.0 + # 切削周期相位(用于 cutting_cycle_wave) + cycle_phase: float = 0.0 + # 当前阶段 + current_stage: str = "idle" + + +# --------------------------------------------------------------------------- +# 阶段配置 +# --------------------------------------------------------------------------- + +_STAGE_PROFILES: dict[str, StageProfile] = { + "idle": StageProfile( + stage="idle", + feed_rate_min=0.0, feed_rate_max=5.0, + spindle_speed_min=0.0, spindle_speed_max=100.0, + spindle_current_min=0.5, spindle_current_max=2.0, + spindle_load_min=0.0, spindle_load_max=5.0, + vibration_min=0.01, vibration_max=0.08, + acoustic_min=0.01, acoustic_max=0.08, + temperature_min=25.0, temperature_max=40.0, + surface_roughness_min=0.3, surface_roughness_max=1.5, + wear_rate_factor=0.0, + stability_factor=1.0, + ), + "tool_change": StageProfile( + stage="tool_change", + feed_rate_min=0.0, feed_rate_max=20.0, + spindle_speed_min=0.0, spindle_speed_max=100.0, + spindle_current_min=1.0, spindle_current_max=4.0, + spindle_load_min=0.0, spindle_load_max=8.0, + vibration_min=0.05, vibration_max=0.3, + acoustic_min=0.05, acoustic_max=0.4, + temperature_min=25.0, temperature_max=45.0, + surface_roughness_min=0.3, surface_roughness_max=1.5, + wear_rate_factor=0.0, + stability_factor=0.8, + ), + "roughing": StageProfile( + stage="roughing", + feed_rate_min=800.0, feed_rate_max=1600.0, + spindle_speed_min=1200.0, spindle_speed_max=2500.0, + spindle_current_min=12.0, spindle_current_max=25.0, + spindle_load_min=45.0, spindle_load_max=80.0, + vibration_min=0.4, vibration_max=1.2, + acoustic_min=0.5, acoustic_max=1.3, + temperature_min=45.0, temperature_max=75.0, + surface_roughness_min=2.0, surface_roughness_max=6.0, + wear_rate_factor=1.5, + stability_factor=0.6, + ), + "semi_finishing": StageProfile( + stage="semi_finishing", + feed_rate_min=400.0, feed_rate_max=900.0, + spindle_speed_min=2200.0, spindle_speed_max=3800.0, + spindle_current_min=8.0, spindle_current_max=18.0, + spindle_load_min=30.0, spindle_load_max=60.0, + vibration_min=0.25, vibration_max=0.8, + acoustic_min=0.3, acoustic_max=0.9, + temperature_min=40.0, temperature_max=65.0, + surface_roughness_min=1.0, surface_roughness_max=3.0, + wear_rate_factor=1.0, + stability_factor=0.8, + ), + "finishing": StageProfile( + stage="finishing", + feed_rate_min=100.0, feed_rate_max=400.0, + spindle_speed_min=3000.0, spindle_speed_max=5000.0, + spindle_current_min=5.0, spindle_current_max=12.0, + spindle_load_min=15.0, spindle_load_max=40.0, + vibration_min=0.1, vibration_max=0.45, + acoustic_min=0.15, acoustic_max=0.5, + temperature_min=35.0, temperature_max=55.0, + surface_roughness_min=0.3, surface_roughness_max=1.5, + wear_rate_factor=0.5, + stability_factor=0.95, + ), +} + +# 阶段切削强度基准系数(归一化到 [0,1] 区间用于 cutting_intensity 计算) +_STAGE_INTENSITY_FACTOR: dict[str, float] = { + "idle": 0.02, + "tool_change": 0.05, + "roughing": 1.00, + "semi_finishing": 0.65, + "finishing": 0.35, +} + +# 基础磨损速率 μm/tick(roughing 1.5×,finishing 0.5×) +_BASE_WEAR_RATE = 0.002 # μm/tick,在 roughing 阶段约每 500 tick 磨损 1 μm + + +# --------------------------------------------------------------------------- +# 主生成器 +# --------------------------------------------------------------------------- + +class BaseMetricGenerator: + """ + CNC 车床正常加工状态时序数据生成器。 + + 典型用法: + gen = BaseMetricGenerator(ambient_temperature=28.0, seed=20260609) + frame = gen.generate(t=0.0, dt=1.0, stage="roughing") + """ + + def __init__( + self, + ambient_temperature: float = 28.0, + seed: Optional[int] = None, + thermal_alpha: float = 0.04, + ): + self._ambient = ambient_temperature + self._rng = random.Random(seed) + # 热惯性系数(每 tick 向目标温度靠近的比例) + self._thermal_alpha = thermal_alpha + self._state = GeneratorState( + thermal_state=ambient_temperature, + last_surface_roughness=1.0, + ) + + # ------------------------------------------------------------------ + # 公开 API + # ------------------------------------------------------------------ + + def generate(self, t: float, dt: float, stage: str) -> MetricFrame: + """ + 生成一帧指标数据。 + + Args: + t: 当前时间(秒),用于低频波形计算。 + dt: 时间步长(秒),影响磨损增量和热惯性。 + stage: 加工阶段名称(idle/tool_change/roughing/semi_finishing/finishing)。 + + Returns: + MetricFrame,所有指标均已 clamp 至合理范围。 + """ + profile = self.get_stage_profile(stage) + state = self._state + state.current_stage = stage + + # ── 1. 材料扰动(慢变量,低频正弦 + 随机游走)────────────────────── + material_variation = self._calc_material_variation(t, dt, state) + + # ── 2. 切削周期波动 ────────────────────────────────────────────────── + cutting_cycle_wave = self._calc_cutting_cycle_wave(t, dt, stage, state, profile) + + # ── 3. feed_rate ────────────────────────────────────────────────────── + feed_rate = self._calc_feed_rate(profile, cutting_cycle_wave, stage) + + # ── 4. spindle_speed ────────────────────────────────────────────────── + spindle_speed = self._calc_spindle_speed(profile, stage) + + # ── 5. cutting_intensity(归一化切削强度)──────────────────────────── + cutting_intensity = self._calc_cutting_intensity( + feed_rate, stage, material_variation, profile + ) + + # ── 6. spindle_load ─────────────────────────────────────────────────── + spindle_load = self._calc_spindle_load( + profile, cutting_intensity, cutting_cycle_wave + ) + + # ── 7. spindle_current(对 load 有 1~2 tick 滞后)──────────────────── + spindle_current = self._calc_spindle_current(profile, spindle_load, state) + + # ── 8. vibration(三轴,各有小幅随机偏差)──────────────────────────── + vib_x, vib_y, vib_z = self._calc_vibration( + profile, spindle_load, feed_rate, stage + ) + + # ── 9. acoustic_emission ───────────────────────────────────────────── + vibration_rms = (vib_x + vib_y + vib_z) / 3.0 + acoustic_emission = self._calc_acoustic(profile, vibration_rms, spindle_load) + + # ── 10. tool_temperature(热惯性模型)──────────────────────────────── + tool_temperature = self._calc_temperature( + profile, spindle_load, spindle_current, dt, state + ) + + # ── 11. tool_wear_value(单调递增)──────────────────────────────────── + tool_wear_value = self._calc_tool_wear(profile, spindle_load, dt, state) + + # ── 12. surface_roughness ───────────────────────────────────────────── + surface_roughness = self._calc_surface_roughness( + profile, vibration_rms, tool_wear_value, stage, state + ) + + # ── 13. 更新滞后缓冲区 ──────────────────────────────────────────────── + state.load_lag_buffer.pop(0) + state.load_lag_buffer.append(spindle_load) + state.last_spindle_load = spindle_load + state.last_surface_roughness = surface_roughness + + # ── 14. 构造帧 + clamp ──────────────────────────────────────────────── + frame = MetricFrame( + timestamp=t, + stage=stage, + feed_rate=feed_rate, + spindle_speed=spindle_speed, + spindle_current=spindle_current, + spindle_load=spindle_load, + vibration_x=vib_x, + vibration_y=vib_y, + vibration_z=vib_z, + acoustic_emission=acoustic_emission, + tool_temperature=tool_temperature, + surface_roughness=surface_roughness, + tool_wear_value=tool_wear_value, + ) + return self.clamp_frame(frame) + + def get_stage_profile(self, stage: str) -> StageProfile: + if stage not in _STAGE_PROFILES: + raise ValueError(f"Unknown stage: {stage!r}. Valid: {list(_STAGE_PROFILES)}") + return _STAGE_PROFILES[stage] + + def reset_wear(self) -> None: + """换刀后重置磨损量(新刀从 0 开始)。""" + self._state.tool_wear_accumulated = 0.0 + + @property + def state(self) -> GeneratorState: + return self._state + + # ------------------------------------------------------------------ + # 各指标计算 + # ------------------------------------------------------------------ + + def _calc_material_variation( + self, t: float, dt: float, state: GeneratorState + ) -> float: + """ + 材料均匀性扰动,慢变量。 + = 1.0 + 低频正弦(周期60s,幅度±3%)+ 随机游走(±1%/tick) + """ + slow_sine = 0.03 * math.sin(2 * math.pi * t / 60.0) + walk_step = self._rng.gauss(0, 0.005) * dt + state.material_random_walk = max( + -0.05, min(0.05, state.material_random_walk + walk_step) + ) + return 1.0 + slow_sine + state.material_random_walk + + def _calc_cutting_cycle_wave( + self, + t: float, + dt: float, + stage: str, + state: GeneratorState, + profile: StageProfile, + ) -> float: + """ + 切削周期波动(模拟走刀一圈的周期性载荷)。 + roughing 幅度较大(±8%),finishing 幅度较小(±3%)。 + """ + # 切削周期:roughing 约 0.5~1 rpm 对应进给一圈,用简化固定周期模拟 + period_map = { + "roughing": 8.0, + "semi_finishing": 6.0, + "finishing": 4.0, + "idle": 10.0, + "tool_change": 10.0, + } + amplitude_map = { + "roughing": 0.08, + "semi_finishing": 0.055, + "finishing": 0.03, + "idle": 0.01, + "tool_change": 0.02, + } + period = period_map.get(stage, 6.0) + amplitude = amplitude_map.get(stage, 0.05) + state.cycle_phase = (state.cycle_phase + dt * 2 * math.pi / period) % ( + 2 * math.pi + ) + return 1.0 + amplitude * math.sin(state.cycle_phase) + + def _calc_feed_rate( + self, + profile: StageProfile, + cutting_cycle_wave: float, + stage: str, + ) -> float: + """ + 进给速度 = 阶段中值 × 切削波动 + 噪声。 + idle/tool_change 接近 0,finishing 更稳定。 + """ + if stage in ("idle", "tool_change"): + return max(0.0, self._rng.uniform(profile.feed_rate_min, profile.feed_rate_max)) + noise_ratio = (1.0 - profile.stability_factor) * 0.06 + base = profile.feed_rate_mid * cutting_cycle_wave + noise = self._rng.gauss(0, base * noise_ratio) + return max(profile.feed_rate_min, min(profile.feed_rate_max, base + noise)) + + def _calc_spindle_speed(self, profile: StageProfile, stage: str) -> float: + """ + 主轴转速正常状态下稳定。 + roughing 允许 2% 波动,finishing 允许 0.8% 波动。 + """ + if stage in ("idle", "tool_change"): + return self._rng.uniform(profile.spindle_speed_min, profile.spindle_speed_max) + noise_pct = { + "roughing": 0.020, + "semi_finishing": 0.015, + "finishing": 0.008, + }.get(stage, 0.015) + base = profile.spindle_speed_mid + noise = self._rng.gauss(0, base * noise_pct) + return max(profile.spindle_speed_min, min(profile.spindle_speed_max, base + noise)) + + def _calc_cutting_intensity( + self, + feed_rate: float, + stage: str, + material_variation: float, + profile: StageProfile, + ) -> float: + """ + 切削强度(0~1),驱动后续所有与切削力相关的指标。 + = normalize(feed_rate) × stage_factor × material_variation + """ + stage_factor = _STAGE_INTENSITY_FACTOR.get(stage, 0.5) + if profile.feed_rate_max <= profile.feed_rate_min: + norm_feed = 0.5 + else: + norm_feed = (feed_rate - profile.feed_rate_min) / ( + profile.feed_rate_max - profile.feed_rate_min + ) + norm_feed = max(0.0, min(1.0, norm_feed)) + return max(0.0, min(1.0, norm_feed * stage_factor * material_variation)) + + def _calc_spindle_load( + self, + profile: StageProfile, + cutting_intensity: float, + cutting_cycle_wave: float, + ) -> float: + """ + 主轴负载(%)= 阶段基线 + cutting_intensity 加权 + 切削波动 + 噪声。 + """ + load_range = profile.spindle_load_max - profile.spindle_load_min + load_base = profile.spindle_load_min + load_range * cutting_intensity + load = load_base * cutting_cycle_wave + noise = self._rng.gauss(0, load_range * (1.0 - profile.stability_factor) * 0.04) + return max(profile.spindle_load_min, min(profile.spindle_load_max, load + noise)) + + def _calc_spindle_current( + self, + profile: StageProfile, + spindle_load: float, + state: GeneratorState, + ) -> float: + """ + 主轴电流(A),对负载有 1~2 tick 滞后(一阶低通)。 + current = idle_current + k × lag_load + noise + k 由阶段电流范围和负载范围反推。 + """ + # 滞后混合:60% 当前负载 + 25% 上一 tick + 15% 两 tick 前 + lag_load = spindle_load * 0.60 + state.load_lag_buffer[1] * 0.25 + state.load_lag_buffer[0] * 0.15 + # 线性映射:load_min → current_min,load_max → current_max + load_range = profile.spindle_load_max - profile.spindle_load_min + current_range = profile.spindle_current_max - profile.spindle_current_min + if load_range > 0: + k = current_range / load_range + else: + k = 0.0 + current_base = profile.spindle_current_min + k * (lag_load - profile.spindle_load_min) + noise = self._rng.gauss( + 0, + (profile.spindle_current_max - profile.spindle_current_min) + * (1.0 - profile.stability_factor) + * 0.03, + ) + return max(profile.spindle_current_min, min(profile.spindle_current_max, current_base + noise)) + + def _calc_vibration( + self, + profile: StageProfile, + spindle_load: float, + feed_rate: float, + stage: str, + ) -> tuple[float, float, float]: + """ + 振动(mm/s),三轴各有独立微偏。 + vibration = base × (1 + load_factor × feed_factor) + noise + """ + load_norm = (spindle_load - profile.spindle_load_min) / max( + profile.spindle_load_max - profile.spindle_load_min, 1.0 + ) + feed_norm = (feed_rate - profile.feed_rate_min) / max( + profile.feed_rate_max - profile.feed_rate_min, 1.0 + ) + vib_base = profile.vibration_min + ( + profile.vibration_max - profile.vibration_min + ) * load_norm + vib_combined = vib_base * (1.0 + 0.15 * feed_norm) + noise_sigma = vib_combined * (1.0 - profile.stability_factor) * 0.08 + + # 三轴偏差因子(确定性偏置 + 小噪声,不完全相同) + vib_x = vib_combined * self._rng.uniform(0.85, 1.15) + self._rng.gauss(0, noise_sigma) + vib_y = vib_combined * self._rng.uniform(0.90, 1.25) + self._rng.gauss(0, noise_sigma) + vib_z = vib_combined * self._rng.uniform(0.75, 1.05) + self._rng.gauss(0, noise_sigma) + + return ( + max(0.0, vib_x), + max(0.0, vib_y), + max(0.0, vib_z), + ) + + def _calc_acoustic( + self, + profile: StageProfile, + vibration_rms: float, + spindle_load: float, + ) -> float: + """ + 声发射(V),受振动(40%权重)和主轴负载(30%权重)影响。 + """ + vib_norm = (vibration_rms - profile.vibration_min) / max( + profile.vibration_max - profile.vibration_min, 1e-6 + ) + load_norm = (spindle_load - profile.spindle_load_min) / max( + profile.spindle_load_max - profile.spindle_load_min, 1.0 + ) + acoustic_range = profile.acoustic_max - profile.acoustic_min + acoustic = profile.acoustic_min + acoustic_range * ( + 0.4 * vib_norm + 0.3 * load_norm + 0.3 + ) + noise = self._rng.gauss(0, acoustic_range * 0.03) + return max(profile.acoustic_min, min(profile.acoustic_max, acoustic + noise)) + + def _calc_temperature( + self, + profile: StageProfile, + spindle_load: float, + spindle_current: float, + dt: float, + state: GeneratorState, + ) -> float: + """ + 刀具温度(°C),一阶热惯性模型,慢变量。 + target = ambient + k1 × load + k2 × current + thermal_state += alpha × (target - thermal_state) × dt + """ + k1 = (profile.temperature_max - self._ambient) / max(profile.spindle_load_max, 1.0) * 0.6 + k2 = (profile.temperature_max - self._ambient) / max(profile.spindle_current_max, 1.0) * 0.4 + target_temp = self._ambient + k1 * spindle_load + k2 * spindle_current + target_temp = max(self._ambient, min(120.0, target_temp)) + + alpha = self._thermal_alpha * dt + state.thermal_state += alpha * (target_temp - state.thermal_state) + + noise = self._rng.gauss(0, 0.3) + return max(20.0, min(120.0, state.thermal_state + noise)) + + def _calc_tool_wear( + self, + profile: StageProfile, + spindle_load: float, + dt: float, + state: GeneratorState, + ) -> float: + """ + 刀具磨损量(μm),只在切削阶段单调递增。 + wear_delta = base_rate × stage_factor × load_factor × dt + """ + if profile.wear_rate_factor <= 0.0: + return state.tool_wear_accumulated + + load_norm = (spindle_load - profile.spindle_load_min) / max( + profile.spindle_load_max - profile.spindle_load_min, 1.0 + ) + wear_delta = ( + _BASE_WEAR_RATE + * profile.wear_rate_factor + * (0.5 + 0.5 * load_norm) + * dt + ) + state.tool_wear_accumulated += max(0.0, wear_delta) + return state.tool_wear_accumulated + + def _calc_surface_roughness( + self, + profile: StageProfile, + vibration_rms: float, + tool_wear_value: float, + stage: str, + state: GeneratorState, + ) -> float: + """ + 表面粗糙度 Ra(μm)。 + idle/tool_change 阶段保持上次值。 + = profile.base × (1 + 0.2 × vib_factor) × (1 + 0.5 × wear_factor) + noise + """ + if stage in ("idle", "tool_change"): + return state.last_surface_roughness + + vib_range = profile.vibration_max - profile.vibration_min + vib_factor = (vibration_rms - profile.vibration_min) / max(vib_range, 1e-6) + vib_factor = max(0.0, min(1.0, vib_factor)) + + # 磨损因子:磨损 50μm 时表面质量开始明显劣化 + wear_factor = min(tool_wear_value / 50.0, 1.0) + + roughness_range = profile.surface_roughness_max - profile.surface_roughness_min + roughness = ( + profile.surface_roughness_min + + roughness_range * (0.4 + 0.35 * vib_factor + 0.25 * wear_factor) + ) + noise = self._rng.gauss(0, roughness_range * 0.03) + return max(0.0, roughness + noise) + + # ------------------------------------------------------------------ + # clamp 和工具函数 + # ------------------------------------------------------------------ + + @staticmethod + def clamp_frame(frame: MetricFrame) -> MetricFrame: + frame.feed_rate = max(0.0, frame.feed_rate) + frame.spindle_speed = max(0.0, frame.spindle_speed) + frame.spindle_current = max(0.0, frame.spindle_current) + frame.spindle_load = max(0.0, min(100.0, frame.spindle_load)) + frame.vibration_x = max(0.0, frame.vibration_x) + frame.vibration_y = max(0.0, frame.vibration_y) + frame.vibration_z = max(0.0, frame.vibration_z) + frame.acoustic_emission = max(0.0, frame.acoustic_emission) + frame.tool_temperature = max(20.0, min(120.0, frame.tool_temperature)) + frame.surface_roughness = max(0.0, frame.surface_roughness) + frame.tool_wear_value = max(0.0, frame.tool_wear_value) + return frame + + def add_noise(self, value: float, ratio: float) -> float: + """对 value 叠加比例为 ratio 的高斯噪声。""" + return value + self._rng.gauss(0, abs(value) * ratio) + + @staticmethod + def smooth_step(x: float) -> float: + """S 型平滑函数,x ∈ [0,1] → [0,1]。""" + x = max(0.0, min(1.0, x)) + return x * x * (3 - 2 * x) + + def random_walk(self, previous: float, step_sigma: float = 0.01) -> float: + return previous + self._rng.gauss(0, step_sigma) diff --git a/protoforge/core/engine.py b/protoforge/core/engine.py index d8b72aa..acd786a 100644 --- a/protoforge/core/engine.py +++ b/protoforge/core/engine.py @@ -7,6 +7,7 @@ from protoforge.core.fault import fault_injector from protoforge.core.generator import DataGenerator from protoforge.core.scenario import Scenario +from protoforge.core.simulators import get_device_simulator from protoforge.models.device import DeviceConfig, DeviceInfo, DeviceStatus, PointValue from protoforge.models.fault import FaultInfo, FaultInjectRequest, FaultTypeDefinition from protoforge.models.scenario import ScenarioConfig, ScenarioInfo, ScenarioStatus @@ -60,6 +61,10 @@ async def create_device(self, config: DeviceConfig) -> DeviceInfo: self._devices[config.id] = instance # 注册故障注入钩子 instance.register_post_tick_hook(fault_injector.apply) + # 注册设备专用仿真器(如车床状态机),根据 template_id 自动匹配 + simulator = get_device_simulator(config.template_id) + if simulator is not None: + instance.register_post_tick_hook(simulator) server = self._protocol_servers.get(config.protocol) if server and server.status == ProtocolStatus.RUNNING: diff --git a/protoforge/core/generator.py b/protoforge/core/generator.py index 21b3c07..6b488c5 100644 --- a/protoforge/core/generator.py +++ b/protoforge/core/generator.py @@ -34,6 +34,7 @@ class DataGenerator: def __init__(self): self._start_time: dict[str, float] = {} self._script_engine = ScriptEngine() + self._counters: dict[str, float] = {} def generate(self, point: PointConfig) -> Any: key = f"{point.name}_{point.address}" @@ -52,6 +53,8 @@ def generate(self, point: PointConfig) -> Any: return self._generate_triangle(point, elapsed) elif point.generator_type == GeneratorType.SAWTOOTH: return self._generate_sawtooth(point, elapsed) + elif point.generator_type == GeneratorType.COUNTER: + return self._generate_counter(point, key) elif point.generator_type == GeneratorType.SCRIPT: return self._generate_script(point, elapsed) else: @@ -99,6 +102,18 @@ def _generate_sawtooth(self, point: PointConfig, elapsed: float) -> Any: value = lo + (hi - lo) * t return self._cast_value(value, point.data_type) + def _generate_counter(self, point: PointConfig, key: str) -> Any: + lo = point.min_value if point.min_value is not None else 0 + hi = point.max_value if point.max_value is not None else 2**31 - 1 + step = point.generator_config.get("step", 1) + if key not in self._counters: + self._counters[key] = lo + else: + self._counters[key] += step + if self._counters[key] > hi: + self._counters[key] = lo + return self._cast_value(self._counters[key], point.data_type) + def _generate_script(self, point: PointConfig, elapsed: float) -> Any: script = point.generator_config.get("script", "result = 0") context = { diff --git a/protoforge/core/metrics.py b/protoforge/core/metrics.py index 9670525..21e42f7 100644 --- a/protoforge/core/metrics.py +++ b/protoforge/core/metrics.py @@ -53,17 +53,21 @@ def collect_from_engine(self, engine: Any) -> None: "protocol": device.config.protocol, } for point in device.read_all_points(): - labels = {**labels_base, "point": point.name} point_config = next( (p for p in device.config.points if p.name == point.name), None ) + labels = {**labels_base, "point": point.name} if point_config and point_config.unit: labels["unit"] = point_config.unit - key = self._make_key(point.name, labels) if point.quality != "good": + key = self._make_key(point.name, labels) self._gauges.pop(key, None) elif isinstance(point.value, (int, float)): self.set_gauge(point.name, float(point.value), labels) + elif isinstance(point.value, str) and point.value: + # 字符串测点以 info 指标形式上报(gauge=1,值放 value label) + info_labels = {**labels, "value": point.value} + self.set_gauge(f"{point.name}_info", 1.0, info_labels) def collect_from_test_runner(self, runner: Any) -> None: self.set_gauge("protoforge_test_cases_total", len(runner._test_cases)) diff --git a/protoforge/core/simulators.py b/protoforge/core/simulators.py new file mode 100644 index 0000000..4c75832 --- /dev/null +++ b/protoforge/core/simulators.py @@ -0,0 +1,33 @@ +""" +设备仿真器注册表 + +根据 template_id 返回对应的仿真器实例(callable,注册为 post_tick_hook)。 +新增仿真器时只需在 _REGISTRY 中添加映射即可,无需修改 engine。 +""" + +from typing import Any, Callable, Optional + + +def _build_registry() -> dict[str, Callable[[], Any]]: + registry: dict[str, Callable[[], Any]] = {} + try: + from protoforge.protocols.mtconnect.lathe_simulator import LatheSimulator + registry["mtconnect_lathe"] = LatheSimulator + except ImportError: + pass + return registry + + +_REGISTRY = _build_registry() + + +def get_device_simulator(template_id: Optional[str]) -> Optional[Any]: + """ + 根据 template_id 返回一个新的仿真器实例,未匹配则返回 None。 + """ + if template_id is None: + return None + factory = _REGISTRY.get(template_id) + if factory is None: + return None + return factory() diff --git a/protoforge/models/device.py b/protoforge/models/device.py index 5be8c44..cbe35e4 100644 --- a/protoforge/models/device.py +++ b/protoforge/models/device.py @@ -22,6 +22,7 @@ class GeneratorType(str, Enum): TRIANGLE = "triangle" SAWTOOTH = "sawtooth" SCRIPT = "script" + COUNTER = "counter" class PointConfig(BaseModel): diff --git a/protoforge/protocols/mtconnect/lathe_simulator.py b/protoforge/protocols/mtconnect/lathe_simulator.py new file mode 100644 index 0000000..d3047ca --- /dev/null +++ b/protoforge/protocols/mtconnect/lathe_simulator.py @@ -0,0 +1,421 @@ +""" +车床状态机仿真器 + +仿真 CNC 车床通过 MTConnect 协议能真实输出的信号。 + +工作周期: + IDLE → SPINUP → CUTTING → DECEL → TOOL_CHANGE → IDLE + ↓ (偶发,两种故障路径) + TOOL_BREAK / CHIP_WRAP → TOOL_CHANGE → IDLE + +每个 tick 的处理流程: + 1. 状态机推进(确定当前 stage) + 2. BaseMetricGenerator.generate() 生成健康 MetricFrame + (联动建模 + 噪声 + clamp,正常加工算法与故障逻辑解耦) + 3. 把 MetricFrame 写入 device._point_values + 4. 通过 MetricsCollector 上报 Prometheus + +崩刀(TOOL_BREAK)的 CNC 可观测特征: + - spindle_load 突增(驱动器过载保护触发) + - spindle_speed 急降至 0(CNC 紧急制动) + - execution → STOPPED,e_stop → TRIGGERED + - system_condition → FAULT,condition_native_code = ALM-401 + +刀缠屑(CHIP_WRAP)的 CNC 可观测特征: + - spindle_load 缓慢持续爬升(缠绕阻力增大) + - spindle_speed 因负载升高略微下降(恒功率特性) + - feed_rate 出现不规律波动(缠屑阻力脉冲) + - 超过负载阈值后 CNC 报警停机 + - system_condition → FAULT,condition_native_code = ALM-305 +""" + +import math +import random +import time +from enum import Enum +from typing import Any + +from protoforge.core.cnc_metric_generator import BaseMetricGenerator + + +class _State(Enum): + IDLE = "idle" + SPINUP = "spinup" + CUTTING = "cutting" + DECEL = "decel" + TOOL_CHANGE = "tool_change" + TOOL_BREAK = "tool_break" + CHIP_WRAP = "chip_wrap" + + +# 状态机阶段 → MetricGenerator 加工阶段的映射 +_STATE_TO_STAGE: dict[_State, str] = { + _State.IDLE: "idle", + _State.SPINUP: "idle", + _State.CUTTING: "roughing", # 默认粗加工,子阶段由 _cutting_stage 动态切换 + _State.DECEL: "idle", + _State.TOOL_CHANGE: "tool_change", + _State.TOOL_BREAK: "idle", + _State.CHIP_WRAP: "roughing", +} + +# 刀塔配置(刀位号, 刀具ID) +_TOOL_TABLE = [ + (1, "T01"), # 外圆粗车刀 + (2, "T02"), # 外圆精车刀 + (3, "T03"), # 切槽刀 + (4, "T04"), # 螺纹刀 +] + +_NC_BLOCKS = [ + "N0010 G00 X200.0 Z50.0", + "N0020 G96 S180 M03", + "N0030 G00 X52.0 Z2.0", + "N0040 G01 Z-80.0 F0.25", + "N0050 G01 X56.0", + "N0060 G00 Z2.0", + "N0070 G01 X48.0", + "N0080 G01 Z-60.0 F0.20", + "N0090 G01 X52.0", + "N0100 G00 X200.0 Z50.0", + "N0110 M05", + "N0120 M30", +] + +# 每个零件的加工子阶段序列(按进度切分) +# (阶段名, 开始进度, 结束进度) +_CUT_SUBSTAGES = [ + ("roughing", 0.00, 0.45), + ("semi_finishing", 0.45, 0.75), + ("finishing", 0.75, 1.00), +] + + +class LatheSimulator: + """注册为 DeviceInstance 的 post_tick_hook,每次 tick 更新所有测点。""" + + def __init__(self): + self._state = _State.IDLE + self._state_elapsed = 0.0 + self._state_duration = 0.0 + + # 主轴(状态机内部用于 CNC 信号联动) + self._spindle_target = 0.0 + self._spindle_actual = 0.0 + + # 进给(状态机内部值) + self._feed_actual = 0.0 + + # 轴位置 + self._x_pos = 150.0 + self._z_pos = 50.0 + + # 刀具(只跟踪刀位) + self._tool_idx = 0 + + # 生产统计 + self._part_count = 0 + + # 程序执行 + self._program_line = 0 + self._block_idx = 0 + + # 故障状态 + self._condition_native_code = "" + self._break_load_spike = 0.0 + self._wrap_load_increment = 0.0 + self._fault_cooldown = 0 + + # 当前切削子阶段(roughing/semi_finishing/finishing) + self._cutting_stage = "roughing" + + # tick 计数,用于传入 BaseMetricGenerator 的 t + self._tick_count = 0 + + # 正常加工指标生成器 + self._metric_gen = BaseMetricGenerator( + ambient_temperature=28.0, + seed=None, # None = 随机种子,每次实例化不同 + ) + + # ------------------------------------------------------------------ + # post_tick_hook 入口 + # ------------------------------------------------------------------ + + def __call__(self, device_instance: Any) -> None: + self._tick_count += 1 + t = float(self._tick_count) # 用 tick 序号作为时间 t(dt=1s) + + # 1. 状态机推进 + self._step_state_machine() + + # 2. 确定当前 MetricGenerator 阶段 + stage = self._get_metric_stage() + + # 3. 生成正常加工 MetricFrame(含联动 + 噪声 + clamp) + frame = self._metric_gen.generate(t=t, dt=1.0, stage=stage) + + # 4. 把 MetricFrame 写入 device._point_values(MTConnect 标准测点) + vals = device_instance._point_values + self._update_cnc_points(vals, frame) + + # 5. 上报 Prometheus + self._emit_prometheus(device_instance, frame) + + # ------------------------------------------------------------------ + # 状态机 + # ------------------------------------------------------------------ + + def _step_state_machine(self) -> None: + self._state_elapsed += 1 + if self._fault_cooldown > 0: + self._fault_cooldown -= 1 + + dispatch = { + _State.IDLE: self._on_idle, + _State.SPINUP: self._on_spinup, + _State.CUTTING: self._on_cutting, + _State.DECEL: self._on_decel, + _State.TOOL_CHANGE: self._on_tool_change, + _State.TOOL_BREAK: self._on_tool_break, + _State.CHIP_WRAP: self._on_chip_wrap, + } + dispatch[self._state]() + + def _transition(self, new_state: _State, duration: float) -> None: + self._state = new_state + self._state_elapsed = 0 + self._state_duration = duration + + def _get_metric_stage(self) -> str: + """将状态机状态映射到 MetricGenerator 阶段。""" + if self._state == _State.CUTTING: + return self._cutting_stage + if self._state == _State.CHIP_WRAP: + return "roughing" + return _STATE_TO_STAGE.get(self._state, "idle") + + def _update_cutting_substage(self, progress: float) -> None: + """根据切削进度动态切换粗/半精/精加工子阶段。""" + for stage_name, start, end in _CUT_SUBSTAGES: + if start <= progress < end: + if self._cutting_stage != stage_name: + self._cutting_stage = stage_name + # 换阶段时不重置磨损,但可记录换刀(此处仅切换参数集) + return + self._cutting_stage = "finishing" + + def _on_idle(self) -> None: + self._spindle_target = 0.0 + self._spindle_actual = self._smooth(self._spindle_actual, 0.0, 0.15) + self._feed_actual = 0.0 + self._condition_native_code = "" + self._wrap_load_increment = 0.0 + if self._state_elapsed >= self._state_duration: + self._spindle_target = random.uniform(800, 2500) + self._program_line = 1 + self._block_idx = 0 + self._cutting_stage = "roughing" + self._transition(_State.SPINUP, random.uniform(3, 6)) + + def _on_spinup(self) -> None: + self._spindle_actual = self._smooth( + self._spindle_actual, self._spindle_target, 0.25 + ) + if self._state_elapsed >= self._state_duration: + self._transition(_State.CUTTING, random.uniform(20, 40)) + + def _on_cutting(self) -> None: + noise = random.gauss(0, self._spindle_target * 0.02) + self._spindle_actual = max( + self._spindle_target * 0.85, + min(self._spindle_target * 1.05, self._spindle_actual + noise), + ) + self._feed_actual = self._spindle_target * random.uniform(0.08, 0.15) + + progress = self._state_elapsed / max(self._state_duration, 1) + self._z_pos = 50.0 - 350.0 * (progress % 1.0) + self._x_pos = random.uniform(20, 60) + math.sin(progress * math.pi * 4) * 5 + self._block_idx = int(progress * len(_NC_BLOCKS)) % len(_NC_BLOCKS) + self._program_line = (self._block_idx + 1) * 10 + + # 动态切换粗/半精/精加工子阶段 + self._update_cutting_substage(progress) + + if self._fault_cooldown == 0 and progress > 0.2: + r = random.random() + if r < 0.004: + self._condition_native_code = "ALM-401" + self._break_load_spike = random.uniform(1.8, 3.0) + self._transition(_State.TOOL_BREAK, random.uniform(3, 6)) + return + elif r < 0.008: + self._condition_native_code = "ALM-305" + self._wrap_load_increment = 0.0 + self._transition(_State.CHIP_WRAP, random.uniform(15, 25)) + return + + if self._state_elapsed >= self._state_duration: + self._transition(_State.DECEL, random.uniform(3, 5)) + + def _on_decel(self) -> None: + self._spindle_actual = self._smooth(self._spindle_actual, 0.0, 0.20) + self._feed_actual = self._smooth(self._feed_actual, 0.0, 0.30) + self._x_pos = self._smooth(self._x_pos, 150.0, 0.20) + self._z_pos = self._smooth(self._z_pos, 50.0, 0.20) + if self._state_elapsed >= self._state_duration: + self._part_count += 1 + if self._part_count % 5 == 0: + self._metric_gen.reset_wear() + self._transition(_State.TOOL_CHANGE, random.uniform(4, 8)) + else: + self._transition(_State.IDLE, random.uniform(3, 6)) + + def _on_tool_change(self) -> None: + self._spindle_actual = 0.0 + self._feed_actual = 0.0 + if self._state_elapsed >= self._state_duration: + self._tool_idx = (self._tool_idx + 1) % len(_TOOL_TABLE) + self._condition_native_code = "" + self._transition(_State.IDLE, random.uniform(2, 4)) + + def _on_tool_break(self) -> None: + phase = self._state_elapsed / max(self._state_duration, 1) + if phase < 0.35: + self._spindle_actual *= (1.0 - phase * 0.2) + else: + self._spindle_actual = self._smooth(self._spindle_actual, 0.0, 0.45) + self._feed_actual = 0.0 + if self._state_elapsed >= self._state_duration: + self._fault_cooldown = 40 + self._transition(_State.TOOL_CHANGE, random.uniform(6, 10)) + + def _on_chip_wrap(self) -> None: + self._wrap_load_increment += random.uniform(2.5, 4.5) + drag = min(self._wrap_load_increment / 200.0, 0.25) + self._spindle_actual = max( + 0.0, + self._spindle_target * (1.0 - drag) + random.gauss(0, 20), + ) + feed_base = self._spindle_target * 0.10 + self._feed_actual = feed_base * (1.0 + random.uniform(-0.3, 0.1)) + effective_load = 30 + self._wrap_load_increment + if effective_load >= 90.0 or self._state_elapsed >= self._state_duration: + self._fault_cooldown = 30 + self._spindle_actual = self._smooth(self._spindle_actual, 0.0, 0.5) + self._transition(_State.TOOL_CHANGE, random.uniform(5, 9)) + + # ------------------------------------------------------------------ + # 写入 MTConnect 测点 + MetricFrame 测点 + # ------------------------------------------------------------------ + + def _update_cnc_points(self, vals: dict[str, Any], frame) -> None: + """ + 将 MetricFrame(正常加工基础指标)与状态机(CNC 信号)合并写入测点。 + 状态机负责:execution/controller_mode/e_stop/system_condition/position/tool/part_count + MetricFrame 负责:spindle_speed/spindle_load/feed_rate/vibration/acoustic/temperature/roughness/wear + """ + state = self._state + is_cutting = state == _State.CUTTING + is_tool_break = state == _State.TOOL_BREAK + is_chip_wrap = state == _State.CHIP_WRAP + is_fault = is_tool_break or is_chip_wrap + is_tool_change = state == _State.TOOL_CHANGE + + cur_tool_no, cur_tool_id = _TOOL_TABLE[self._tool_idx] + + # ── CNC 状态信号(来自状态机)──────────────────────────────────────── + vals["availability"] = "AVAILABLE" + vals["e_stop"] = "TRIGGERED" if is_fault else "ARMED" + vals["system_condition"] = "FAULT" if is_fault else "NORMAL" + vals["condition_native_code"] = self._condition_native_code + + if is_fault: + vals["execution"] = "STOPPED" + vals["controller_mode"] = "MANUAL" + elif is_tool_change: + vals["execution"] = "WAIT" + vals["controller_mode"] = "AUTOMATIC" + elif state == _State.IDLE: + vals["execution"] = "READY" + vals["controller_mode"] = "AUTOMATIC" + else: + vals["execution"] = "ACTIVE" + vals["controller_mode"] = "AUTOMATIC" + + vals["program"] = "O0001" if not is_fault else "O0000" + vals["block"] = _NC_BLOCKS[self._block_idx] if is_cutting else "" + vals["line"] = self._program_line + vals["x_position"] = round(self._x_pos, 3) + vals["z_position"] = round(self._z_pos, 3) + vals["tool_id"] = cur_tool_id + vals["tool_number"] = cur_tool_no + vals["part_count"] = self._part_count + + # ── 主轴方向(由状态机内部转速决定)──────────────────────────────── + vals["spindle_direction"] = "STOPPED" if self._spindle_actual < 10 else "CW" + vals["spindle_override"] = 100.0 + vals["feed_override"] = 100.0 + vals["rapid_override"] = 100.0 + + # ── MetricFrame 基础指标 ───────────────────────────────────────────── + vals["spindle_speed"] = round(frame.spindle_speed, 1) + vals["spindle_load"] = round(frame.spindle_load, 1) + vals["spindle_current"] = round(frame.spindle_current, 2) + vals["feed_rate"] = round(frame.feed_rate, 1) + vals["vibration_x"] = round(frame.vibration_x, 4) + vals["vibration_y"] = round(frame.vibration_y, 4) + vals["vibration_z"] = round(frame.vibration_z, 4) + vals["acoustic_emission"] = round(frame.acoustic_emission, 4) + vals["tool_temperature"] = round(frame.tool_temperature, 2) + vals["surface_roughness"] = round(frame.surface_roughness, 3) + vals["tool_wear_value"] = round(frame.tool_wear_value, 4) + + # 故障覆盖:崩刀时 spindle_load 突增并覆盖 MetricFrame 的值 + if is_tool_break: + phase = self._state_elapsed / max(self._state_duration, 1) + spike = self._break_load_spike if phase < 0.35 else 1.0 + overload = min(100.0, frame.spindle_load * spike) + vals["spindle_load"] = round(overload, 1) + + # 缠屑覆盖:负载爬升覆盖 MetricFrame 的值 + if is_chip_wrap: + wrap_load = min(100.0, 30.0 + self._wrap_load_increment + random.gauss(0, 2)) + vals["spindle_load"] = round(wrap_load, 1) + + def _emit_prometheus(self, device_instance: Any, frame) -> None: + """ + 通过 MetricsCollector 上报 Prometheus 指标。 + 复用项目已有的 set_gauge 接口,不重复注册。 + """ + try: + from protoforge.core.metrics import metrics + except ImportError: + return + + device_id = getattr(device_instance.config, "id", "unknown") + device_name = getattr(device_instance.config, "name", "unknown") + labels = { + "device_id": device_id, + "device_name": device_name, + "protocol": "mtconnect", + "stage": frame.stage, + } + + metrics.set_gauge("cnc_feed_rate", frame.feed_rate, {**labels, "unit": "mm/min"}) + metrics.set_gauge("cnc_spindle_speed", frame.spindle_speed, {**labels, "unit": "RPM"}) + metrics.set_gauge("cnc_spindle_current", frame.spindle_current, {**labels, "unit": "A"}) + metrics.set_gauge("cnc_spindle_load", frame.spindle_load, {**labels, "unit": "%"}) + metrics.set_gauge("cnc_vibration_x", frame.vibration_x, {**labels, "unit": "mm/s"}) + metrics.set_gauge("cnc_vibration_y", frame.vibration_y, {**labels, "unit": "mm/s"}) + metrics.set_gauge("cnc_vibration_z", frame.vibration_z, {**labels, "unit": "mm/s"}) + metrics.set_gauge("cnc_acoustic_emission", frame.acoustic_emission, {**labels, "unit": "V"}) + metrics.set_gauge("cnc_tool_temperature", frame.tool_temperature, {**labels, "unit": "C"}) + metrics.set_gauge("cnc_surface_roughness", frame.surface_roughness, {**labels, "unit": "um"}) + metrics.set_gauge("cnc_tool_wear_value", frame.tool_wear_value, {**labels, "unit": "um"}) + + # ------------------------------------------------------------------ + + @staticmethod + def _smooth(current: float, target: float, rate: float) -> float: + return current + (target - current) * rate diff --git a/protoforge/templates/mtconnect/lathe_machine.json b/protoforge/templates/mtconnect/lathe_machine.json index 33d36b0..5f00ec3 100644 --- a/protoforge/templates/mtconnect/lathe_machine.json +++ b/protoforge/templates/mtconnect/lathe_machine.json @@ -6,21 +6,76 @@ "manufacturer": "Generic", "model": "MC-Lathe-2Axis", "points": [ + { + "name": "availability", + "address": "Availability", + "data_type": "string", + "description": "设备可用性(MTConnect必需字段)", + "access": "r", + "generator_type": "fixed", + "fixed_value": "AVAILABLE" + }, { "name": "execution", "address": "Execution", "data_type": "string", - "description": "执行状态", + "description": "执行状态(ACTIVE/READY/STOPPED/WAIT)", "access": "r", "generator_type": "fixed", "fixed_value": "ACTIVE" }, + { + "name": "controller_mode", + "address": "ControllerMode", + "data_type": "string", + "description": "控制器模式(AUTOMATIC/MANUAL/MDI)", + "access": "r", + "generator_type": "fixed", + "fixed_value": "AUTOMATIC" + }, + { + "name": "e_stop", + "address": "EmergencyStop", + "data_type": "string", + "description": "急停状态(ARMED/TRIGGERED)", + "access": "r", + "generator_type": "fixed", + "fixed_value": "ARMED" + }, + { + "name": "program", + "address": "Program", + "data_type": "string", + "description": "当前运行NC程序名", + "access": "r", + "generator_type": "fixed", + "fixed_value": "O0001" + }, + { + "name": "block", + "address": "Block", + "data_type": "string", + "description": "当前执行程序段", + "access": "r", + "generator_type": "fixed", + "fixed_value": "N0010 G01 X50.0 Z-100.0 F0.2" + }, + { + "name": "line", + "address": "Line", + "data_type": "int32", + "description": "当前程序行号", + "access": "r", + "generator_type": "counter", + "min_value": 1, + "max_value": 500 + }, { "name": "x_position", "address": "Xposition", "data_type": "float32", "unit": "mm", - "description": "X轴位置(径向)", + "description": "X轴位置(径向)", "access": "r", "generator_type": "sine", "min_value": 0, @@ -31,22 +86,90 @@ "address": "Zposition", "data_type": "float32", "unit": "mm", - "description": "Z轴位置(纵向)", + "description": "Z轴位置(纵向)", "access": "r", "generator_type": "sine", "min_value": -300, "max_value": 50 }, + { + "name": "feed_rate", + "address": "PathFeedrate", + "data_type": "float32", + "unit": "mm/min", + "description": "实际进给速度", + "access": "r", + "generator_type": "fixed", + "fixed_value": 0.0 + }, + { + "name": "feed_override", + "address": "FeedrateOverride", + "data_type": "float32", + "unit": "%", + "description": "进给倍率", + "access": "r", + "generator_type": "fixed", + "fixed_value": 100.0 + }, + { + "name": "rapid_override", + "address": "RapidOverride", + "data_type": "float32", + "unit": "%", + "description": "快速移动倍率", + "access": "r", + "generator_type": "fixed", + "fixed_value": 100.0 + }, { "name": "spindle_speed", "address": "SpindleSpeed", "data_type": "float32", "unit": "RPM", - "description": "主轴转速", + "description": "主轴实际转速", "access": "r", - "generator_type": "random", - "min_value": 500, - "max_value": 4000 + "generator_type": "fixed", + "fixed_value": 0.0 + }, + { + "name": "spindle_override", + "address": "SpindleOverride", + "data_type": "float32", + "unit": "%", + "description": "主轴倍率", + "access": "r", + "generator_type": "fixed", + "fixed_value": 100.0 + }, + { + "name": "spindle_load", + "address": "SpindleLoad", + "data_type": "float32", + "unit": "%", + "description": "主轴负载(伺服驱动器输出,0~100%)", + "access": "r", + "generator_type": "fixed", + "fixed_value": 0.0 + }, + { + "name": "spindle_current", + "address": "SpindleCurrent", + "data_type": "float32", + "unit": "A", + "description": "主轴电流,与负载正相关,有1~2 tick滞后", + "access": "r", + "generator_type": "fixed", + "fixed_value": 0.0 + }, + { + "name": "spindle_direction", + "address": "RotaryVelocity", + "data_type": "string", + "description": "主轴旋转方向(CW/CCW/STOPPED)", + "access": "r", + "generator_type": "fixed", + "fixed_value": "CW" }, { "name": "tool_id", @@ -56,6 +179,114 @@ "access": "r", "generator_type": "fixed", "fixed_value": "T01" + }, + { + "name": "tool_number", + "address": "ToolNumber", + "data_type": "int32", + "description": "刀塔当前刀位号", + "access": "r", + "generator_type": "fixed", + "fixed_value": 1 + }, + { + "name": "part_count", + "address": "PartCount", + "data_type": "int32", + "unit": "pcs", + "description": "累计零件计数(CNC内部M代码触发)", + "access": "r", + "generator_type": "counter", + "min_value": 0, + "max_value": 99999 + }, + { + "name": "system_condition", + "address": "SystemCondition", + "data_type": "string", + "description": "系统报警状态(NORMAL/WARNING/FAULT)", + "access": "r", + "generator_type": "fixed", + "fixed_value": "NORMAL" + }, + { + "name": "condition_native_code", + "address": "ConditionNativeCode", + "data_type": "string", + "description": "CNC厂商报警号(如ALM-401),无报警时为空", + "access": "r", + "generator_type": "fixed", + "fixed_value": "" + }, + { + "name": "vibration_x", + "address": "VibrationX", + "data_type": "float32", + "unit": "mm/s", + "description": "X轴振动速度RMS(外部加速度传感器,随主轴负载和进给联动)", + "access": "r", + "generator_type": "fixed", + "fixed_value": 0.0 + }, + { + "name": "vibration_y", + "address": "VibrationY", + "data_type": "float32", + "unit": "mm/s", + "description": "Y轴振动速度RMS", + "access": "r", + "generator_type": "fixed", + "fixed_value": 0.0 + }, + { + "name": "vibration_z", + "address": "VibrationZ", + "data_type": "float32", + "unit": "mm/s", + "description": "Z轴振动速度RMS(轴向)", + "access": "r", + "generator_type": "fixed", + "fixed_value": 0.0 + }, + { + "name": "acoustic_emission", + "address": "AcousticEmission", + "data_type": "float32", + "unit": "V", + "description": "声发射传感器输出电压,受振动和主轴负载驱动", + "access": "r", + "generator_type": "fixed", + "fixed_value": 0.0 + }, + { + "name": "tool_temperature", + "address": "ToolTemperature", + "data_type": "float32", + "unit": "C", + "description": "刀具/切削区温度(热惯性模型,缓慢变化)", + "access": "r", + "generator_type": "fixed", + "fixed_value": 28.0 + }, + { + "name": "surface_roughness", + "address": "SurfaceRoughness", + "data_type": "float32", + "unit": "um", + "description": "工件表面粗糙度Ra,受加工阶段、振动、磨损驱动", + "access": "r", + "generator_type": "fixed", + "fixed_value": 1.0 + }, + { + "name": "tool_wear_value", + "address": "ToolWearValue", + "data_type": "float32", + "unit": "um", + "description": "刀具累积磨损量(切削阶段单调递增,换刀后归零)", + "access": "r", + "generator_type": "fixed", + "fixed_value": 0.0 } ], "protocol_config": { diff --git a/tests/test_cnc_metric_generator.py b/tests/test_cnc_metric_generator.py new file mode 100644 index 0000000..d944129 --- /dev/null +++ b/tests/test_cnc_metric_generator.py @@ -0,0 +1,372 @@ +""" +tests/test_cnc_metric_generator.py +=================================== + +验证 BaseMetricGenerator 的正常加工状态时序数据生成算法。 + +覆盖以下场景: +1. roughing vs finishing 阶段指标大小关系 +2. finishing 阶段主轴转速高且稳定、振动/粗糙度低 +3. tool_temperature 慢变量特性(不瞬变、idle 缓慢回落) +4. tool_wear_value 在切削阶段单调递增,idle/tool_change 不增长 +5. spindle_current 与 spindle_load 正相关且不完全同步(有滞后) +6. 所有指标无负值且不超合理边界 +""" + +import pytest +from protoforge.core.cnc_metric_generator import BaseMetricGenerator, MetricFrame + + +# --------------------------------------------------------------------------- +# Fixtures +# --------------------------------------------------------------------------- + +@pytest.fixture +def gen(): + """固定随机种子,保证测试可重现。""" + return BaseMetricGenerator(ambient_temperature=28.0, seed=20260609) + + +def _run_n(gen: BaseMetricGenerator, stage: str, n: int) -> list[MetricFrame]: + """运行 n 个 tick,返回所有帧。""" + frames = [] + for i in range(n): + frames.append(gen.generate(t=float(i), dt=1.0, stage=stage)) + return frames + + +# --------------------------------------------------------------------------- +# 1. roughing vs finishing 阶段指标大小关系 +# --------------------------------------------------------------------------- + +class TestRoughingVsFinishing: + """粗加工各项指标应高于精加工。""" + + N = 50 # 取足够多的样本,用均值比较,避免噪声误判 + + def test_feed_rate_roughing_gt_finishing(self): + gen_r = BaseMetricGenerator(seed=1) + gen_f = BaseMetricGenerator(seed=1) + avg_r = sum(f.feed_rate for f in _run_n(gen_r, "roughing", self.N)) / self.N + avg_f = sum(f.feed_rate for f in _run_n(gen_f, "finishing", self.N)) / self.N + assert avg_r > avg_f, f"roughing feed_rate均值({avg_r:.1f}) 应 > finishing({avg_f:.1f})" + + def test_spindle_load_roughing_gt_finishing(self): + gen_r = BaseMetricGenerator(seed=2) + gen_f = BaseMetricGenerator(seed=2) + avg_r = sum(f.spindle_load for f in _run_n(gen_r, "roughing", self.N)) / self.N + avg_f = sum(f.spindle_load for f in _run_n(gen_f, "finishing", self.N)) / self.N + assert avg_r > avg_f, f"roughing spindle_load均值({avg_r:.1f}) 应 > finishing({avg_f:.1f})" + + def test_spindle_current_roughing_gt_finishing(self): + gen_r = BaseMetricGenerator(seed=3) + gen_f = BaseMetricGenerator(seed=3) + avg_r = sum(f.spindle_current for f in _run_n(gen_r, "roughing", self.N)) / self.N + avg_f = sum(f.spindle_current for f in _run_n(gen_f, "finishing", self.N)) / self.N + assert avg_r > avg_f, f"roughing current均值({avg_r:.2f}) 应 > finishing({avg_f:.2f})" + + def test_vibration_roughing_gt_finishing(self): + gen_r = BaseMetricGenerator(seed=4) + gen_f = BaseMetricGenerator(seed=4) + avg_r = sum( + (f.vibration_x + f.vibration_y + f.vibration_z) / 3 + for f in _run_n(gen_r, "roughing", self.N) + ) / self.N + avg_f = sum( + (f.vibration_x + f.vibration_y + f.vibration_z) / 3 + for f in _run_n(gen_f, "finishing", self.N) + ) / self.N + assert avg_r > avg_f, f"roughing vibration均值({avg_r:.3f}) 应 > finishing({avg_f:.3f})" + + def test_surface_roughness_roughing_gt_finishing(self): + gen_r = BaseMetricGenerator(seed=5) + gen_f = BaseMetricGenerator(seed=5) + avg_r = sum(f.surface_roughness for f in _run_n(gen_r, "roughing", self.N)) / self.N + avg_f = sum(f.surface_roughness for f in _run_n(gen_f, "finishing", self.N)) / self.N + assert avg_r > avg_f, f"roughing roughness均值({avg_r:.2f}) 应 > finishing({avg_f:.2f})" + + +# --------------------------------------------------------------------------- +# 2. finishing 阶段:高转速且稳定,振动/粗糙度低 +# --------------------------------------------------------------------------- + +class TestFinishing: + + def test_spindle_speed_high(self, gen): + frames = _run_n(gen, "finishing", 30) + for f in frames: + assert f.spindle_speed >= 3000, f"finishing spindle_speed({f.spindle_speed}) 应 >= 3000 RPM" + + def test_spindle_speed_stable(self, gen): + """精加工主轴转速波动应 < 2%(稳定性要求)。""" + frames = _run_n(gen, "finishing", 50) + speeds = [f.spindle_speed for f in frames] + avg = sum(speeds) / len(speeds) + max_deviation = max(abs(s - avg) / avg for s in speeds) + assert max_deviation < 0.02, f"finishing 转速最大偏差({max_deviation:.3%}) 超过 2%" + + def test_vibration_low(self, gen): + frames = _run_n(gen, "finishing", 30) + for f in frames: + vib_rms = (f.vibration_x + f.vibration_y + f.vibration_z) / 3 + assert vib_rms <= 0.6, f"finishing vibration_rms({vib_rms:.3f}) 应 <= 0.6 mm/s" + + def test_surface_roughness_low(self, gen): + frames = _run_n(gen, "finishing", 30) + for f in frames: + assert f.surface_roughness <= 1.8, \ + f"finishing surface_roughness({f.surface_roughness:.3f}) 应 <= 1.8 μm" + + +# --------------------------------------------------------------------------- +# 3. tool_temperature 慢变量特性 +# --------------------------------------------------------------------------- + +class TestToolTemperature: + + MAX_JUMP_PER_TICK = 3.0 # 单 tick 最大允许温度变化(°C) + + def test_no_instant_jump_roughing(self, gen): + """粗加工阶段温度不应瞬间大幅跳变。""" + frames = _run_n(gen, "roughing", 60) + temps = [f.tool_temperature for f in frames] + for i in range(1, len(temps)): + delta = abs(temps[i] - temps[i - 1]) + assert delta <= self.MAX_JUMP_PER_TICK, \ + f"tick {i}: 温度跳变 {delta:.2f}°C 超过 {self.MAX_JUMP_PER_TICK}°C" + + def test_temperature_rises_in_roughing(self, gen): + """粗加工持续运行后温度应高于初始环境温度。""" + frames = _run_n(gen, "roughing", 100) + # 最后 10 tick 均值应高于初始热状态 + late_avg = sum(f.tool_temperature for f in frames[-10:]) / 10 + assert late_avg > 35.0, \ + f"粗加工后期温度均值({late_avg:.1f}°C) 应 > 35°C" + + def test_temperature_falls_in_idle(self): + """idle 阶段温度应缓慢回落。""" + gen = BaseMetricGenerator(seed=42) + # 先跑 80 tick roughing 把温度升高 + for i in range(80): + gen.generate(t=float(i), dt=1.0, stage="roughing") + hot_temp = gen.state.thermal_state + + # 再跑 60 tick idle + for i in range(80, 140): + gen.generate(t=float(i), dt=1.0, stage="idle") + cool_temp = gen.state.thermal_state + + assert cool_temp < hot_temp, \ + f"idle 后温度({cool_temp:.1f}) 应低于加工后温度({hot_temp:.1f})" + + def test_no_instant_jump_idle(self, gen): + """idle 阶段温度同样不应瞬变。""" + frames = _run_n(gen, "idle", 30) + temps = [f.tool_temperature for f in frames] + for i in range(1, len(temps)): + delta = abs(temps[i] - temps[i - 1]) + assert delta <= self.MAX_JUMP_PER_TICK, \ + f"idle tick {i}: 温度跳变 {delta:.2f}°C" + + +# --------------------------------------------------------------------------- +# 4. tool_wear_value 单调性 +# --------------------------------------------------------------------------- + +class TestToolWear: + + def _check_monotone(self, stages: list[str], n_per_stage: int = 20): + gen = BaseMetricGenerator(seed=99) + prev_wear = 0.0 + for stage in stages: + for i in range(n_per_stage): + t = float(len(stages) * n_per_stage + i) + frame = gen.generate(t=t, dt=1.0, stage=stage) + assert frame.tool_wear_value >= prev_wear - 1e-9, \ + f"stage={stage} tick={i}: wear({frame.tool_wear_value:.6f}) < prev({prev_wear:.6f}),磨损不单调" + prev_wear = frame.tool_wear_value + return prev_wear + + def test_wear_increases_in_roughing(self): + gen = BaseMetricGenerator(seed=10) + wear_start = gen.state.tool_wear_accumulated + _run_n(gen, "roughing", 50) + wear_end = gen.state.tool_wear_accumulated + assert wear_end > wear_start, \ + f"粗加工后磨损({wear_end:.4f}) 应 > 初始({wear_start:.4f})" + + def test_wear_increases_in_semi_finishing(self): + gen = BaseMetricGenerator(seed=11) + _run_n(gen, "semi_finishing", 50) + assert gen.state.tool_wear_accumulated > 0, "半精加工后磨损应 > 0" + + def test_wear_increases_in_finishing(self): + gen = BaseMetricGenerator(seed=12) + _run_n(gen, "finishing", 50) + assert gen.state.tool_wear_accumulated > 0, "精加工后磨损应 > 0" + + def test_wear_no_increase_in_idle(self): + gen = BaseMetricGenerator(seed=13) + # 先加工一段,再 idle + _run_n(gen, "roughing", 10) + wear_before_idle = gen.state.tool_wear_accumulated + _run_n(gen, "idle", 30) + assert gen.state.tool_wear_accumulated == wear_before_idle, \ + "idle 阶段磨损不应增长" + + def test_wear_no_increase_in_tool_change(self): + gen = BaseMetricGenerator(seed=14) + _run_n(gen, "roughing", 10) + wear_before = gen.state.tool_wear_accumulated + _run_n(gen, "tool_change", 20) + assert gen.state.tool_wear_accumulated == wear_before, \ + "tool_change 阶段磨损不应增长" + + def test_wear_monotone_across_cutting_stages(self): + final_wear = self._check_monotone( + ["roughing", "roughing", "semi_finishing", "finishing"], + n_per_stage=30 + ) + assert final_wear > 0 + + def test_roughing_wear_gt_finishing_wear(self): + """粗加工单位时间磨损应快于精加工。""" + gen_r = BaseMetricGenerator(seed=20) + gen_f = BaseMetricGenerator(seed=20) + _run_n(gen_r, "roughing", 100) + _run_n(gen_f, "finishing", 100) + assert gen_r.state.tool_wear_accumulated > gen_f.state.tool_wear_accumulated, \ + "粗加工磨损速率应高于精加工" + + +# --------------------------------------------------------------------------- +# 5. spindle_current 与 spindle_load 的相关性与滞后 +# --------------------------------------------------------------------------- + +class TestCurrentLoadCorrelation: + + def test_current_load_positive_correlation(self, gen): + """电流与负载正相关(Pearson r > 0.5)。 + 注:roughing 阶段噪声较大(stability=0.6),加上 1~2 tick 滞后, + 实际相关系数在 0.5~0.75 之间,符合真实 CNC 采集数据的特征。 + """ + frames = _run_n(gen, "roughing", 200) + loads = [f.spindle_load for f in frames] + currents = [f.spindle_current for f in frames] + + n = len(loads) + mean_l = sum(loads) / n + mean_c = sum(currents) / n + cov = sum((l - mean_l) * (c - mean_c) for l, c in zip(loads, currents)) / n + std_l = (sum((l - mean_l) ** 2 for l in loads) / n) ** 0.5 + std_c = (sum((c - mean_c) ** 2 for c in currents) / n) ** 0.5 + r = cov / (std_l * std_c + 1e-9) + assert r > 0.5, f"电流-负载 Pearson r({r:.3f}) 应 > 0.5" + + def test_current_not_identical_to_load(self, gen): + """电流与负载不完全相同(体现滞后和不同物理量)。""" + frames = _run_n(gen, "roughing", 30) + diffs = [abs(f.spindle_current - f.spindle_load) for f in frames] + avg_diff = sum(diffs) / len(diffs) + assert avg_diff > 1.0, \ + f"电流与负载均值差({avg_diff:.2f}) 过小,可能完全相同" + + def test_current_unit_range(self, gen): + """电流应在 roughing 合理范围(12~25 A)附近。""" + frames = _run_n(gen, "roughing", 50) + for f in frames: + assert 5.0 <= f.spindle_current <= 35.0, \ + f"roughing spindle_current({f.spindle_current:.2f} A) 超出合理范围" + + def test_current_lag_detection(self): + """ + 验证滞后:在负载突变后,电流应有一定惯性(不瞬间到达目标值)。 + 用两个生成器模拟:一个跑 idle 后切换 roughing,检查前几 tick 电流低于稳态均值。 + """ + gen = BaseMetricGenerator(seed=77) + # 先跑 10 tick idle(负载很低) + for i in range(10): + gen.generate(t=float(i), dt=1.0, stage="idle") + # 再跑 roughing,前 3 tick 电流应低于稳态 + early_currents = [] + for i in range(10, 13): + f = gen.generate(t=float(i), dt=1.0, stage="roughing") + early_currents.append(f.spindle_current) + # 稳态(第 30~40 tick) + for i in range(13, 40): + f = gen.generate(t=float(i), dt=1.0, stage="roughing") + steady_currents = [] + for i in range(40, 60): + f = gen.generate(t=float(i), dt=1.0, stage="roughing") + steady_currents.append(f.spindle_current) + + early_avg = sum(early_currents) / len(early_currents) + steady_avg = sum(steady_currents) / len(steady_currents) + assert early_avg < steady_avg, \ + f"切换到 roughing 后早期电流({early_avg:.2f}) 应低于稳态({steady_avg:.2f}),体现滞后" + + +# --------------------------------------------------------------------------- +# 6. 所有指标边界检查(无负值,不超上限) +# --------------------------------------------------------------------------- + +class TestBoundaries: + + ALL_STAGES = ["idle", "tool_change", "roughing", "semi_finishing", "finishing"] + + def test_no_negative_values(self): + for stage in self.ALL_STAGES: + gen = BaseMetricGenerator(seed=0) + for i, frame in enumerate(_run_n(gen, stage, 30)): + assert frame.feed_rate >= 0, f"{stage} t={i}: feed_rate < 0" + assert frame.spindle_speed >= 0, f"{stage} t={i}: spindle_speed < 0" + assert frame.spindle_current >= 0, f"{stage} t={i}: spindle_current < 0" + assert frame.spindle_load >= 0, f"{stage} t={i}: spindle_load < 0" + assert frame.vibration_x >= 0, f"{stage} t={i}: vibration_x < 0" + assert frame.vibration_y >= 0, f"{stage} t={i}: vibration_y < 0" + assert frame.vibration_z >= 0, f"{stage} t={i}: vibration_z < 0" + assert frame.acoustic_emission >= 0, f"{stage} t={i}: acoustic_emission < 0" + assert frame.surface_roughness >= 0, f"{stage} t={i}: surface_roughness < 0" + assert frame.tool_wear_value >= 0, f"{stage} t={i}: tool_wear_value < 0" + + def test_spindle_load_max_100(self): + for stage in self.ALL_STAGES: + gen = BaseMetricGenerator(seed=1) + for i, frame in enumerate(_run_n(gen, stage, 30)): + assert frame.spindle_load <= 100.0, \ + f"{stage} t={i}: spindle_load({frame.spindle_load}) > 100%" + + def test_tool_temperature_range(self): + for stage in self.ALL_STAGES: + gen = BaseMetricGenerator(seed=2) + for i, frame in enumerate(_run_n(gen, stage, 30)): + assert 20.0 <= frame.tool_temperature <= 120.0, \ + f"{stage} t={i}: tool_temperature({frame.tool_temperature:.1f}) 超出 [20,120]°C" + + def test_no_unrealistic_instant_jump(self): + """任意连续 tick 的指标变化不应超过合理上限(防止仿真器 bug)。""" + MAX_SPINDLE_SPEED_JUMP = 500 # RPM/tick + MAX_LOAD_JUMP = 30 # %/tick + MAX_TEMP_JUMP = 5 # °C/tick + + for stage in ["roughing", "finishing"]: + gen = BaseMetricGenerator(seed=3) + frames = _run_n(gen, stage, 60) + for i in range(1, len(frames)): + prev, curr = frames[i - 1], frames[i] + assert abs(curr.spindle_speed - prev.spindle_speed) <= MAX_SPINDLE_SPEED_JUMP, \ + f"{stage} t={i}: spindle_speed 跳变过大" + assert abs(curr.spindle_load - prev.spindle_load) <= MAX_LOAD_JUMP, \ + f"{stage} t={i}: spindle_load 跳变过大" + assert abs(curr.tool_temperature - prev.tool_temperature) <= MAX_TEMP_JUMP, \ + f"{stage} t={i}: tool_temperature 跳变过大" + + +# --------------------------------------------------------------------------- +# 附加:stage 名称错误时应抛出异常 +# --------------------------------------------------------------------------- + +def test_invalid_stage_raises(): + gen = BaseMetricGenerator() + with pytest.raises(ValueError, match="Unknown stage"): + gen.generate(t=0.0, dt=1.0, stage="nonexistent_stage") From 47c1b47a42f0197ee51665ea103c391a8ca2de1c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=BC=A0=E5=B0=91=E6=99=BA?= Date: Tue, 9 Jun 2026 09:36:15 +0800 Subject: [PATCH 44/55] fix --- protoforge/core/cnc_metric_generator.py | 152 ++++++++++++++++++++++-- 1 file changed, 142 insertions(+), 10 deletions(-) diff --git a/protoforge/core/cnc_metric_generator.py b/protoforge/core/cnc_metric_generator.py index 329f56e..50709b1 100644 --- a/protoforge/core/cnc_metric_generator.py +++ b/protoforge/core/cnc_metric_generator.py @@ -124,6 +124,129 @@ class GeneratorState: current_stage: str = "idle" +# --------------------------------------------------------------------------- +# SpindleLoadGenerator —— 状态驱动、EMA 平滑的主轴负载生成器 +# --------------------------------------------------------------------------- + +# 各工艺阶段的负载基线及允许范围 +_PROCESS_LOAD_CONFIG: dict[str, dict] = { + "rough": { + "base": 55.0, + "slow_amp": 6.0, # 慢周期波动幅度(%) + "cut_amp": 4.0, # 切削周期扰动幅度(%) + "noise_sigma": 2.5, # 高斯噪声标准差(%) + "clamp_min": 35.0, + "clamp_max": 85.0, + "ema_alpha": 0.18, # 较快响应,粗加工负载变化快 + }, + "semi_finish": { + "base": 38.0, + "slow_amp": 4.0, + "cut_amp": 2.5, + "noise_sigma": 1.5, + "clamp_min": 22.0, + "clamp_max": 65.0, + "ema_alpha": 0.15, + }, + "finish": { + "base": 22.0, + "slow_amp": 2.5, + "cut_amp": 1.5, + "noise_sigma": 0.8, + "clamp_min": 12.0, + "clamp_max": 42.0, + "ema_alpha": 0.12, # 较慢响应,精加工负载更平稳 + }, +} + +# 各驱动状态的负载基线及 EMA 系数 +_STATE_LOAD_CONFIG: dict[str, dict] = { + "idle": {"base": 1.5, "noise_sigma": 0.3, "clamp_min": 0.0, "clamp_max": 5.0, "ema_alpha": 0.10}, + "tool_change": {"base": 4.0, "noise_sigma": 0.8, "clamp_min": 0.0, "clamp_max": 10.0, "ema_alpha": 0.12}, + "spindle_on": {"base": 8.0, "noise_sigma": 1.2, "clamp_min": 3.0, "clamp_max": 18.0, "ema_alpha": 0.15}, + "air_cut": {"base": 15.0, "noise_sigma": 2.0, "clamp_min": 8.0, "clamp_max": 28.0, "ema_alpha": 0.16}, + # "cutting" state delegates to _PROCESS_LOAD_CONFIG +} + +# stage 名称 → 内部 process 名称映射 +_STAGE_TO_PROCESS: dict[str, str] = { + "roughing": "rough", + "semi_finishing": "semi_finish", + "finishing": "finish", +} + +# stage 名称 → 驱动状态映射(非切削阶段) +_STAGE_TO_STATE: dict[str, str] = { + "idle": "idle", + "tool_change": "tool_change", +} + + +class SpindleLoadGenerator: + """ + 状态驱动、EMA 平滑的主轴负载生成器。 + + 内部维护 prev_load 跨 tick 状态,使负载曲线连续平滑, + 避免随机脉冲。各切削工艺有独立基线和 clamp 范围, + idle/tool_change 等非切削状态接近 0。 + + stage 参数取值:idle / tool_change / roughing / semi_finishing / finishing + """ + + def __init__(self, rng: random.Random): + self._rng = rng + self.prev_load: float = 0.0 + + def generate( + self, + t: float, + stage: str, + material_variation: float = 1.0, + slow_phase: float = 0.0, + cut_phase: float = 0.0, + ) -> float: + """ + 生成本 tick 的主轴负载(%)。 + + Args: + t: 当前时间(秒),保留供未来扩展。 + stage: 加工阶段(idle/tool_change/roughing/semi_finishing/finishing)。 + material_variation: 材料扰动系数(≈1.0,±5%)。 + slow_phase: 慢周期相位(弧度),由外部统一维护。 + cut_phase: 切削周期相位(弧度),由外部统一维护。 + + Returns: + clamp 后的主轴负载(%)。 + """ + process = _STAGE_TO_PROCESS.get(stage) + + if process is not None: + # 切削阶段:使用工艺基线 + cfg = _PROCESS_LOAD_CONFIG[process] + slow_wave = cfg["slow_amp"] * math.sin(slow_phase) + cut_wave = cfg["cut_amp"] * math.sin(cut_phase) + noise = self._rng.gauss(0, cfg["noise_sigma"]) + mat_delta = (material_variation - 1.0) * cfg["base"] * 0.5 # 材料变化影响基线的 50% + target = cfg["base"] + slow_wave + cut_wave + noise + mat_delta + alpha = cfg["ema_alpha"] + lo, hi = cfg["clamp_min"], cfg["clamp_max"] + else: + # 非切削阶段:使用状态基线 + state_key = _STAGE_TO_STATE.get(stage, "idle") + cfg = _STATE_LOAD_CONFIG[state_key] + noise = self._rng.gauss(0, cfg["noise_sigma"]) + target = cfg["base"] + noise + alpha = cfg["ema_alpha"] + lo, hi = cfg["clamp_min"], cfg["clamp_max"] + + # EMA 平滑 + new_load = self.prev_load + alpha * (target - self.prev_load) + # clamp + new_load = max(lo, min(hi, new_load)) + self.prev_load = new_load + return new_load + + # --------------------------------------------------------------------------- # 阶段配置 # --------------------------------------------------------------------------- @@ -236,6 +359,8 @@ def __init__( thermal_state=ambient_temperature, last_surface_roughness=1.0, ) + # 主轴负载生成器(状态驱动 + EMA 平滑) + self._spindle_load_gen = SpindleLoadGenerator(self._rng) # ------------------------------------------------------------------ # 公开 API @@ -274,9 +399,11 @@ def generate(self, t: float, dt: float, stage: str) -> MetricFrame: feed_rate, stage, material_variation, profile ) - # ── 6. spindle_load ─────────────────────────────────────────────────── + # ── 6. spindle_load(状态驱动 + EMA 平滑)──────────────────────────── + # 慢波相位(约 90 s 周期)和切削相位复用 cycle_phase + slow_phase = 2 * math.pi * t / 90.0 spindle_load = self._calc_spindle_load( - profile, cutting_intensity, cutting_cycle_wave + profile, stage, material_variation, slow_phase, state.cycle_phase ) # ── 7. spindle_current(对 load 有 1~2 tick 滞后)──────────────────── @@ -450,17 +577,22 @@ def _calc_cutting_intensity( def _calc_spindle_load( self, profile: StageProfile, - cutting_intensity: float, - cutting_cycle_wave: float, + stage: str, + material_variation: float, + slow_phase: float, + cut_phase: float, ) -> float: """ - 主轴负载(%)= 阶段基线 + cutting_intensity 加权 + 切削波动 + 噪声。 + 主轴负载(%)—— 委托给 SpindleLoadGenerator。 + 使用状态驱动基线 + EMA 平滑,避免随机脉冲行为。 """ - load_range = profile.spindle_load_max - profile.spindle_load_min - load_base = profile.spindle_load_min + load_range * cutting_intensity - load = load_base * cutting_cycle_wave - noise = self._rng.gauss(0, load_range * (1.0 - profile.stability_factor) * 0.04) - return max(profile.spindle_load_min, min(profile.spindle_load_max, load + noise)) + return self._spindle_load_gen.generate( + t=0.0, # t 保留,当前未使用 + stage=stage, + material_variation=material_variation, + slow_phase=slow_phase, + cut_phase=cut_phase, + ) def _calc_spindle_current( self, From c0fe62d6a86c5d7663678607ab757d56bb9a5483 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=BC=A0=E5=B0=91=E6=99=BA?= Date: Tue, 9 Jun 2026 10:12:43 +0800 Subject: [PATCH 45/55] fix --- protoforge/core/cnc_metric_generator.py | 261 +++++++++++------- .../protocols/mtconnect/lathe_simulator.py | 12 +- 2 files changed, 176 insertions(+), 97 deletions(-) diff --git a/protoforge/core/cnc_metric_generator.py b/protoforge/core/cnc_metric_generator.py index 50709b1..3478fdf 100644 --- a/protoforge/core/cnc_metric_generator.py +++ b/protoforge/core/cnc_metric_generator.py @@ -106,146 +106,214 @@ class MetricFrame: @dataclass class GeneratorState: """跨 tick 需要持久化的生成器内部状态。""" - # 材料扰动随机游走值(慢变量,[-0.05, +0.05]) material_random_walk: float = 0.0 - # 热状态(tool_temperature 的平滑变量) thermal_state: float = 28.0 - # 刀具累积磨损(μm,单调不减) tool_wear_accumulated: float = 0.0 - # 上一 tick 的 spindle_load(用于电流滞后计算) last_spindle_load: float = 0.0 - # 滞后缓冲区(最多保存 3 tick 历史) load_lag_buffer: list = field(default_factory=lambda: [0.0, 0.0, 0.0]) - # 上一 tick 的 surface_roughness(idle 阶段保持上次值) last_surface_roughness: float = 1.0 - # 切削周期相位(用于 cutting_cycle_wave) cycle_phase: float = 0.0 - # 当前阶段 current_stage: str = "idle" + # 切削阶段内已经过的秒数(用于 entry/exit ramp 计算) + cutting_elapsed: float = 0.0 + # 当前切削阶段预估总时长(由状态机传入) + cutting_total: float = 30.0 # --------------------------------------------------------------------------- -# SpindleLoadGenerator —— 状态驱动、EMA 平滑的主轴负载生成器 +# SpindleLoadGenerator —— 状态驱动、切入/切出 ramp、周期扰动的主轴负载生成器 # --------------------------------------------------------------------------- -# 各工艺阶段的负载基线及允许范围 -_PROCESS_LOAD_CONFIG: dict[str, dict] = { +# 切削工艺配置 +_PROCESS_CFG: dict[str, dict] = { "rough": { - "base": 55.0, - "slow_amp": 6.0, # 慢周期波动幅度(%) - "cut_amp": 4.0, # 切削周期扰动幅度(%) - "noise_sigma": 2.5, # 高斯噪声标准差(%) - "clamp_min": 35.0, - "clamp_max": 85.0, - "ema_alpha": 0.18, # 较快响应,粗加工负载变化快 + "base_load": 55.0, + "slow_freq": 0.10, # rad/s,慢波频率 + "slow_amp": 5.0, + "cut_freq": 0.75, # rad/s,切削波频率 + "cut_amp": 2.5, + "material_freq": 0.03, + "material_amp": 4.0, + "noise_range": 2.0, # uniform ±noise_range + "clamp_min": 35.0, + "clamp_max": 85.0, + "ema_alpha": 0.10, + "entry_ramp_s": 6.0, # 切入 ramp 时长(秒) + "exit_ramp_s": 5.0, # 切出 ramp 时长(秒) + # 低负载基准(air_cut 阶段,用于 ramp 起止参考) + "air_cut_base": 8.0, }, "semi_finish": { - "base": 38.0, - "slow_amp": 4.0, - "cut_amp": 2.5, - "noise_sigma": 1.5, - "clamp_min": 22.0, - "clamp_max": 65.0, - "ema_alpha": 0.15, + "base_load": 38.0, + "slow_freq": 0.08, + "slow_amp": 3.5, + "cut_freq": 0.65, + "cut_amp": 1.8, + "material_freq": 0.025, + "material_amp": 2.5, + "noise_range": 1.5, + "clamp_min": 20.0, + "clamp_max": 65.0, + "ema_alpha": 0.10, + "entry_ramp_s": 5.0, + "exit_ramp_s": 4.0, + "air_cut_base": 6.0, }, "finish": { - "base": 22.0, - "slow_amp": 2.5, - "cut_amp": 1.5, - "noise_sigma": 0.8, - "clamp_min": 12.0, - "clamp_max": 42.0, - "ema_alpha": 0.12, # 较慢响应,精加工负载更平稳 + "base_load": 22.0, + "slow_freq": 0.06, + "slow_amp": 2.0, + "cut_freq": 0.55, + "cut_amp": 1.0, + "material_freq": 0.02, + "material_amp": 1.2, + "noise_range": 0.8, + "clamp_min": 8.0, + "clamp_max": 45.0, + "ema_alpha": 0.09, + "entry_ramp_s": 4.0, + "exit_ramp_s": 3.0, + "air_cut_base": 5.0, }, } -# 各驱动状态的负载基线及 EMA 系数 -_STATE_LOAD_CONFIG: dict[str, dict] = { - "idle": {"base": 1.5, "noise_sigma": 0.3, "clamp_min": 0.0, "clamp_max": 5.0, "ema_alpha": 0.10}, - "tool_change": {"base": 4.0, "noise_sigma": 0.8, "clamp_min": 0.0, "clamp_max": 10.0, "ema_alpha": 0.12}, - "spindle_on": {"base": 8.0, "noise_sigma": 1.2, "clamp_min": 3.0, "clamp_max": 18.0, "ema_alpha": 0.15}, - "air_cut": {"base": 15.0, "noise_sigma": 2.0, "clamp_min": 8.0, "clamp_max": 28.0, "ema_alpha": 0.16}, - # "cutting" state delegates to _PROCESS_LOAD_CONFIG +# 非切削状态配置(base / noise / clamp / ema_alpha) +_STATE_CFG: dict[str, dict] = { + "idle": {"base": 1.0, "noise": 0.4, "lo": 0.0, "hi": 2.0, "alpha": 0.35}, + "tool_change": {"base": 3.5, "noise": 0.6, "lo": 0.0, "hi": 8.0, "alpha": 0.25}, + "spindle_on": {"base": 4.5, "noise": 0.5, "lo": 3.0, "hi": 8.0, "alpha": 0.22}, + "air_cut": {"base": 7.5, "noise": 0.8, "lo": 5.0, "hi": 12.0, "alpha": 0.20}, } -# stage 名称 → 内部 process 名称映射 +# stage → process 映射(切削阶段) _STAGE_TO_PROCESS: dict[str, str] = { - "roughing": "rough", - "semi_finishing": "semi_finish", - "finishing": "finish", -} - -# stage 名称 → 驱动状态映射(非切削阶段) -_STAGE_TO_STATE: dict[str, str] = { - "idle": "idle", - "tool_change": "tool_change", + "roughing": "rough", + "semi_finishing": "semi_finish", + "finishing": "finish", } class SpindleLoadGenerator: """ - 状态驱动、EMA 平滑的主轴负载生成器。 + 状态驱动、切入/切出 ramp、周期级扰动的主轴负载生成器。 - 内部维护 prev_load 跨 tick 状态,使负载曲线连续平滑, - 避免随机脉冲。各切削工艺有独立基线和 clamp 范围, - idle/tool_change 等非切削状态接近 0。 + 支持的 stage 值(由 LatheSimulator 的 _get_metric_stage 传入): + idle / tool_change / roughing / semi_finishing / finishing - stage 参数取值:idle / tool_change / roughing / semi_finishing / finishing + 内部将切削阶段按 cutting_elapsed / cutting_total 推导出 + entry_cut → cutting → exit_cut 子状态,实现平滑切入切出。 + 每个加工周期开始时随机化 cycle_factor / phase 保证周期间差异。 """ def __init__(self, rng: random.Random): self._rng = rng self.prev_load: float = 0.0 + # 周期级随机状态(每次进入切削阶段时刷新) + self._cycle_id: Optional[str] = None + self._cycle_factor: float = 1.0 # 0.92~1.08,整体缩放基线 + self._phase1: float = 0.0 # 慢波初相位 + self._phase2: float = 0.0 # 切削波初相位 + self._material_phase: float = 0.0 # 材料漂移初相位 + + # 上一个 stage,用于检测切削周期切换 + self._last_stage: str = "idle" + + # ------------------------------------------------------------------ + + def _refresh_cycle(self, stage: str) -> None: + """检测到新的切削周期时刷新周期级随机参数。""" + cycle_id = stage # 简单以 stage 变化作为新周期标志 + was_cutting = self._last_stage in _STAGE_TO_PROCESS + now_cutting = stage in _STAGE_TO_PROCESS + # 从非切削 → 切削,或切削工艺跳转(粗 → 精),认为是新周期 + if now_cutting and (not was_cutting or stage != self._last_stage): + self._cycle_factor = self._rng.uniform(0.92, 1.08) + self._phase1 = self._rng.uniform(0, 2 * math.pi) + self._phase2 = self._rng.uniform(0, 2 * math.pi) + self._material_phase = self._rng.uniform(0, 2 * math.pi) + self._last_stage = stage + def generate( self, t: float, stage: str, - material_variation: float = 1.0, - slow_phase: float = 0.0, - cut_phase: float = 0.0, + cutting_elapsed: float = 0.0, + cutting_total: float = 30.0, ) -> float: """ 生成本 tick 的主轴负载(%)。 Args: - t: 当前时间(秒),保留供未来扩展。 - stage: 加工阶段(idle/tool_change/roughing/semi_finishing/finishing)。 - material_variation: 材料扰动系数(≈1.0,±5%)。 - slow_phase: 慢周期相位(弧度),由外部统一维护。 - cut_phase: 切削周期相位(弧度),由外部统一维护。 + t: 当前时间(秒),用于波形计算。 + stage: 加工阶段(idle/tool_change/roughing/semi_finishing/finishing)。 + cutting_elapsed: 当前切削阶段已经过的秒数(用于 ramp 计算)。 + cutting_total: 当前切削阶段总时长预估(用于 exit_cut 判断)。 Returns: clamp 后的主轴负载(%)。 """ + self._refresh_cycle(stage) + process = _STAGE_TO_PROCESS.get(stage) - if process is not None: - # 切削阶段:使用工艺基线 - cfg = _PROCESS_LOAD_CONFIG[process] - slow_wave = cfg["slow_amp"] * math.sin(slow_phase) - cut_wave = cfg["cut_amp"] * math.sin(cut_phase) - noise = self._rng.gauss(0, cfg["noise_sigma"]) - mat_delta = (material_variation - 1.0) * cfg["base"] * 0.5 # 材料变化影响基线的 50% - target = cfg["base"] + slow_wave + cut_wave + noise + mat_delta - alpha = cfg["ema_alpha"] - lo, hi = cfg["clamp_min"], cfg["clamp_max"] + if process is None: + # 非切削阶段 + cfg = _STATE_CFG.get(stage, _STATE_CFG["idle"]) + slow_wave = math.sin(t * 0.20) * 0.8 + noise = self._rng.uniform(-cfg["noise"], cfg["noise"]) + target = cfg["base"] + slow_wave + noise + alpha = cfg["alpha"] + lo, hi = cfg["lo"], cfg["hi"] else: - # 非切削阶段:使用状态基线 - state_key = _STAGE_TO_STATE.get(stage, "idle") - cfg = _STATE_LOAD_CONFIG[state_key] - noise = self._rng.gauss(0, cfg["noise_sigma"]) - target = cfg["base"] + noise - alpha = cfg["ema_alpha"] - lo, hi = cfg["clamp_min"], cfg["clamp_max"] + # 切削阶段:entry_cut → cutting → exit_cut + pcfg = _PROCESS_CFG[process] + entry_s = pcfg["entry_ramp_s"] + exit_s = pcfg["exit_ramp_s"] + air_base = pcfg["air_cut_base"] + eff_base = pcfg["base_load"] * self._cycle_factor + + # 切出判断:距切削结束不足 exit_s 秒 + time_to_end = cutting_total - cutting_elapsed + in_exit = (time_to_end <= exit_s) and (cutting_elapsed > entry_s) + + if cutting_elapsed <= entry_s: + # ── entry_cut:从 air_base 平滑爬升到 eff_base ── + ramp = cutting_elapsed / entry_s # 0→1 + smooth_ramp = ramp * ramp * (3 - 2 * ramp) # smoothstep + target_cutting = self._cutting_target(t, pcfg, eff_base) + target = air_base + (target_cutting - air_base) * smooth_ramp + alpha = 0.12 + lo, hi = air_base * 0.5, pcfg["clamp_max"] + elif in_exit: + # ── exit_cut:从 eff_base 平滑下降到 air_base ── + exit_elapsed = exit_s - time_to_end + ramp = max(0.0, min(1.0, exit_elapsed / exit_s)) + smooth_ramp = ramp * ramp * (3 - 2 * ramp) + target_cutting = self._cutting_target(t, pcfg, eff_base) + target = target_cutting * (1.0 - smooth_ramp) + air_base * smooth_ramp + alpha = 0.13 + lo, hi = air_base * 0.4, pcfg["clamp_max"] + else: + # ── cutting:稳定切削平台 ── + target = self._cutting_target(t, pcfg, eff_base) + alpha = pcfg["ema_alpha"] + lo, hi = pcfg["clamp_min"], pcfg["clamp_max"] # EMA 平滑 new_load = self.prev_load + alpha * (target - self.prev_load) - # clamp new_load = max(lo, min(hi, new_load)) self.prev_load = new_load return new_load + def _cutting_target(self, t: float, pcfg: dict, eff_base: float) -> float: + """计算切削平台目标负载(含慢波 + 切削波 + 材料漂移 + 小噪声)。""" + slow_wave = math.sin(t * pcfg["slow_freq"] + self._phase1) * pcfg["slow_amp"] + cut_wave = math.sin(t * pcfg["cut_freq"] + self._phase2) * pcfg["cut_amp"] + material_drift = math.sin(t * pcfg["material_freq"] + self._material_phase) * pcfg["material_amp"] + noise = self._rng.uniform(-pcfg["noise_range"], pcfg["noise_range"]) + return eff_base + slow_wave + cut_wave + material_drift + noise + # --------------------------------------------------------------------------- # 阶段配置 @@ -380,6 +448,17 @@ def generate(self, t: float, dt: float, stage: str) -> MetricFrame: """ profile = self.get_stage_profile(stage) state = self._state + + # ── 切削阶段计时维护 ───────────────────────────────────────────────── + is_cutting = stage in _STAGE_TO_PROCESS + if is_cutting: + if state.current_stage not in _STAGE_TO_PROCESS: + # 刚进入切削阶段,重置计时 + state.cutting_elapsed = 0.0 + else: + state.cutting_elapsed += dt + else: + state.cutting_elapsed = 0.0 state.current_stage = stage # ── 1. 材料扰动(慢变量,低频正弦 + 随机游走)────────────────────── @@ -399,11 +478,9 @@ def generate(self, t: float, dt: float, stage: str) -> MetricFrame: feed_rate, stage, material_variation, profile ) - # ── 6. spindle_load(状态驱动 + EMA 平滑)──────────────────────────── - # 慢波相位(约 90 s 周期)和切削相位复用 cycle_phase - slow_phase = 2 * math.pi * t / 90.0 + # ── 6. spindle_load(状态驱动 + ramp + EMA 平滑)──────────────────── spindle_load = self._calc_spindle_load( - profile, stage, material_variation, slow_phase, state.cycle_phase + stage, t, dt, state ) # ── 7. spindle_current(对 load 有 1~2 tick 滞后)──────────────────── @@ -576,22 +653,20 @@ def _calc_cutting_intensity( def _calc_spindle_load( self, - profile: StageProfile, stage: str, - material_variation: float, - slow_phase: float, - cut_phase: float, + t: float, + dt: float, + state: GeneratorState, ) -> float: """ 主轴负载(%)—— 委托给 SpindleLoadGenerator。 - 使用状态驱动基线 + EMA 平滑,避免随机脉冲行为。 + 传入切削计时信息,实现切入/切出 ramp。 """ return self._spindle_load_gen.generate( - t=0.0, # t 保留,当前未使用 + t=t, stage=stage, - material_variation=material_variation, - slow_phase=slow_phase, - cut_phase=cut_phase, + cutting_elapsed=state.cutting_elapsed, + cutting_total=state.cutting_total, ) def _calc_spindle_current( diff --git a/protoforge/protocols/mtconnect/lathe_simulator.py b/protoforge/protocols/mtconnect/lathe_simulator.py index d3047ca..da974f8 100644 --- a/protoforge/protocols/mtconnect/lathe_simulator.py +++ b/protoforge/protocols/mtconnect/lathe_simulator.py @@ -152,14 +152,18 @@ def __call__(self, device_instance: Any) -> None: # 2. 确定当前 MetricGenerator 阶段 stage = self._get_metric_stage() - # 3. 生成正常加工 MetricFrame(含联动 + 噪声 + clamp) + # 3. 把 CUTTING 状态总时长同步给 MetricGenerator(用于 exit_ramp 计算) + if self._state == _State.CUTTING: + self._metric_gen.state.cutting_total = self._state_duration + + # 4. 生成正常加工 MetricFrame(含联动 + 噪声 + clamp) frame = self._metric_gen.generate(t=t, dt=1.0, stage=stage) - # 4. 把 MetricFrame 写入 device._point_values(MTConnect 标准测点) + # 5. 把 MetricFrame 写入 device._point_values(MTConnect 标准测点) vals = device_instance._point_values self._update_cnc_points(vals, frame) - # 5. 上报 Prometheus + # 6. 上报 Prometheus self._emit_prometheus(device_instance, frame) # ------------------------------------------------------------------ @@ -223,7 +227,7 @@ def _on_spinup(self) -> None: self._spindle_actual, self._spindle_target, 0.25 ) if self._state_elapsed >= self._state_duration: - self._transition(_State.CUTTING, random.uniform(20, 40)) + self._transition(_State.CUTTING, random.uniform(35, 65)) def _on_cutting(self) -> None: noise = random.gauss(0, self._spindle_target * 0.02) From db323e42993f9b9dd2ce341ade4a23c38225891e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=BC=A0=E5=B0=91=E6=99=BA?= Date: Tue, 9 Jun 2026 10:50:22 +0800 Subject: [PATCH 46/55] fix --- protoforge/core/cnc_metric_generator.py | 296 +++++++++++++----- .../protocols/mtconnect/lathe_simulator.py | 18 +- 2 files changed, 225 insertions(+), 89 deletions(-) diff --git a/protoforge/core/cnc_metric_generator.py b/protoforge/core/cnc_metric_generator.py index 3478fdf..49d83d3 100644 --- a/protoforge/core/cnc_metric_generator.py +++ b/protoforge/core/cnc_metric_generator.py @@ -3,11 +3,10 @@ ===================================== 设计原则: - - 所有指标由切削强度 cutting_intensity 统一驱动,禁止各自独立随机。 + - spindle_speed / spindle_load / spindle_current 由 CncSpindleGenerator 统一驱动。 + - 生成链路:工艺阶段 → 目标转速 → 实际转速(EMA) → 负载 → 电流。 - 热惯性模型:tool_temperature 使用一阶 RC 滤波,alpha ≈ 0.04/tick。 - - 电流滞后:spindle_current 对 spindle_load 有 1~3 tick 的一阶滞后。 - 磨损单调:tool_wear_value 在切削阶段只增不减。 - - 噪声比例:roughing > semi_finishing > finishing,稳定性反向。 - 纯 Python 标准库实现,无第三方依赖。 用法: @@ -118,6 +117,8 @@ class GeneratorState: cutting_elapsed: float = 0.0 # 当前切削阶段预估总时长(由状态机传入) cutting_total: float = 30.0 + # 状态机内部状态(idle/spinup/cutting/decel/tool_change),用于转速平滑 + spindle_state: str = "idle" # --------------------------------------------------------------------------- @@ -315,6 +316,193 @@ def _cutting_target(self, t: float, pcfg: dict, eff_base: float) -> float: return eff_base + slow_wave + cut_wave + material_drift + noise +# --------------------------------------------------------------------------- +# CncSpindleGenerator —— spindle_speed / spindle_load / spindle_current 统一联动 +# --------------------------------------------------------------------------- + +# 工艺阶段 → 主轴目标转速配置 +_PROCESS_SPEED_CFG: dict[str, dict] = { + "rough": {"target": 2000.0, "noise": 30.0, "lo": 1800.0, "hi": 2200.0}, + "semi_finish": {"target": 3000.0, "noise": 40.0, "lo": 2800.0, "hi": 3200.0}, + "finish": {"target": 4000.0, "noise": 50.0, "lo": 3800.0, "hi": 4200.0}, +} + +# 非切削状态下转速目标(0 = 停止) +_STATE_SPEED_TARGET: dict[str, float] = { + "idle": 0.0, + "tool_change": 0.0, +} + +# 各状态的转速 EMA alpha(值越小过渡越慢) +_SPEED_ALPHA: dict[str, float] = { + "idle": 0.20, # 快速停止 + "tool_change": 0.22, + "spinup": 0.14, # 平滑升速 + "cutting": 0.06, # 稳定运转,微调 + "decel": 0.18, # 降速 +} + +# 电流模型配置:各工艺的空载基础电流和负载系数 +_PROCESS_CURRENT_CFG: dict[str, dict] = { + "rough": {"base": 3.0, "load_factor": 0.20, "noise": 0.4, "lo": 8.0, "hi": 20.0}, + "semi_finish": {"base": 2.5, "load_factor": 0.16, "noise": 0.3, "lo": 5.0, "hi": 15.0}, + "finish": {"base": 2.0, "load_factor": 0.12, "noise": 0.2, "lo": 3.0, "hi": 10.0}, +} + +# 非切削状态的电流配置 +_STATE_CURRENT_CFG: dict[str, dict] = { + "idle": {"base": 0.3, "noise": 0.15, "lo": 0.0, "hi": 1.0, "alpha": 0.35}, + "tool_change": {"base": 0.5, "noise": 0.2, "lo": 0.0, "hi": 1.5, "alpha": 0.30}, + "spindle_on": {"base": 3.2, "noise": 0.4, "lo": 2.0, "hi": 5.0, "alpha": 0.20}, + "air_cut": {"base": 4.0, "noise": 0.5, "lo": 2.5, "hi": 6.0, "alpha": 0.18}, +} + +# 电流 EMA alpha(切削阶段,略慢于负载,体现电气滞后) +_CURRENT_ALPHA_CUTTING: dict[str, float] = { + "entry_cut": 0.10, + "cutting": 0.10, + "exit_cut": 0.12, +} + + +def _clamp(v: float, lo: float, hi: float) -> float: + return max(lo, min(hi, v)) + + +def _ema(prev: float, target: float, alpha: float) -> float: + return prev + alpha * (target - prev) + + +class CncSpindleGenerator: + """ + 统一驱动 spindle_speed / spindle_load / spindle_current 的联动生成器。 + + 生成链路: + 工艺阶段(process) → 目标转速 → 实际转速(EMA) → 负载(SpindleLoadGenerator) + → 电流(负载+转速映射) + + stage 参数取值:idle / tool_change / roughing / semi_finishing / finishing + spindle_state 参数取值:idle / tool_change / spinup / cutting / decel + (由 LatheSimulator 状态机传入,用于控制转速 EMA alpha) + """ + + def __init__(self, rng: random.Random, load_gen: SpindleLoadGenerator): + self._rng = rng + self._load_gen = load_gen # 复用已有的负载生成器 + + self.prev_speed: float = 0.0 + self.prev_current: float = 0.0 + + def generate( + self, + t: float, + stage: str, + spindle_state: str = "cutting", + cutting_elapsed: float = 0.0, + cutting_total: float = 30.0, + ) -> tuple[float, float, float]: + """ + 生成 (spindle_speed, spindle_load, spindle_current)。 + + Args: + t: 当前时间(秒)。 + stage: MetricGenerator 加工阶段。 + spindle_state: LatheSimulator 内部状态(idle/spinup/cutting/decel/tool_change)。 + cutting_elapsed: 切削阶段已过秒数(传给负载生成器)。 + cutting_total: 切削阶段总时长(传给负载生成器)。 + """ + process = _STAGE_TO_PROCESS.get(stage) # None = 非切削 + + # ── 1. 主轴转速 ──────────────────────────────────────────────────── + speed = self._calc_speed(stage, spindle_state, process) + + # ── 2. 主轴负载(委托 SpindleLoadGenerator)──────────────────────── + load = self._load_gen.generate( + t=t, + stage=stage, + cutting_elapsed=cutting_elapsed, + cutting_total=cutting_total, + ) + + # ── 3. 主轴电流(由转速 + 负载推导)─────────────────────────────── + current = self._calc_current(stage, spindle_state, process, speed, load) + + return speed, load, current + + # ------------------------------------------------------------------ + + def _calc_speed(self, stage: str, spindle_state: str, process: Optional[str]) -> float: + """转速:按工艺目标 + EMA 平滑,非切削时降到 0。""" + if process is not None: + scfg = _PROCESS_SPEED_CFG[process] + noise = self._rng.gauss(0, scfg["noise"]) + target = scfg["target"] + noise + # 首次从停止状态进入切削:直接跳到目标转速附近,避免长收敛期 + if self.prev_speed < scfg["lo"] * 0.5: + self.prev_speed = scfg["target"] + alpha = _SPEED_ALPHA.get("cutting", 0.06) + lo, hi = scfg["lo"] * 0.92, scfg["hi"] * 1.05 + elif spindle_state == "spinup": + if self.prev_speed > 500: + target = min(self.prev_speed * 1.12, 2200.0) + else: + target = 2000.0 + alpha = _SPEED_ALPHA.get("spinup", 0.14) + lo, hi = 0.0, 2500.0 + else: + target = _STATE_SPEED_TARGET.get(stage, 0.0) + alpha = _SPEED_ALPHA.get(spindle_state, 0.20) + lo, hi = 0.0, 200.0 + + new_speed = _ema(self.prev_speed, target, alpha) + new_speed = _clamp(new_speed, lo, hi) + self.prev_speed = new_speed + return new_speed + + def _calc_current( + self, + stage: str, + spindle_state: str, + process: Optional[str], + speed: float, + load: float, + ) -> float: + """电流:空载基础 + 负载映射,有轻微 EMA 滞后。""" + if process is not None: + ccfg = _PROCESS_CURRENT_CFG[process] + # 转速修正:实际转速偏低时电流也偏低(恒功率特性简化) + speed_ratio = _clamp(speed / _PROCESS_SPEED_CFG[process]["target"], 0.5, 1.1) + noise = self._rng.gauss(0, ccfg["noise"]) + target = (ccfg["base"] + ccfg["load_factor"] * load + noise) * speed_ratio + + # 判断切削子状态(entry/cutting/exit)决定 alpha + if self._load_gen._last_stage in _STAGE_TO_PROCESS: + pcfg = _PROCESS_CFG[process] + time_to_end = (self._load_gen.prev_load > 0 and + hasattr(self._load_gen, '_last_stage')) + # 简化:直接用较小 alpha 保持平滑 + alpha = 0.10 + else: + alpha = 0.10 + + lo, hi = ccfg["lo"], ccfg["hi"] + else: + # 非切削状态 + state_key = stage if stage in _STATE_CURRENT_CFG else "idle" + ccfg = _STATE_CURRENT_CFG[state_key] + noise = self._rng.gauss(0, ccfg["noise"]) + # 转速联动:主轴停止时电流趋近 0 + speed_factor = _clamp(speed / 100.0, 0.0, 1.0) if speed < 100 else 1.0 + target = (ccfg["base"] + noise) * speed_factor + alpha = ccfg["alpha"] + lo, hi = ccfg["lo"], ccfg["hi"] + + new_current = _ema(self.prev_current, target, alpha) + new_current = _clamp(new_current, lo, hi) + self.prev_current = new_current + return new_current + + # --------------------------------------------------------------------------- # 阶段配置 # --------------------------------------------------------------------------- @@ -421,14 +609,15 @@ def __init__( ): self._ambient = ambient_temperature self._rng = random.Random(seed) - # 热惯性系数(每 tick 向目标温度靠近的比例) self._thermal_alpha = thermal_alpha self._state = GeneratorState( thermal_state=ambient_temperature, last_surface_roughness=1.0, ) - # 主轴负载生成器(状态驱动 + EMA 平滑) + # 负载生成器(状态驱动 + ramp + EMA) self._spindle_load_gen = SpindleLoadGenerator(self._rng) + # 主轴联动生成器(speed / load / current 统一驱动) + self._spindle_gen = CncSpindleGenerator(self._rng, self._spindle_load_gen) # ------------------------------------------------------------------ # 公开 API @@ -464,57 +653,55 @@ def generate(self, t: float, dt: float, stage: str) -> MetricFrame: # ── 1. 材料扰动(慢变量,低频正弦 + 随机游走)────────────────────── material_variation = self._calc_material_variation(t, dt, state) - # ── 2. 切削周期波动 ────────────────────────────────────────────────── + # ── 2. 切削周期波动(feed_rate 使用)──────────────────────────────── cutting_cycle_wave = self._calc_cutting_cycle_wave(t, dt, stage, state, profile) # ── 3. feed_rate ────────────────────────────────────────────────────── feed_rate = self._calc_feed_rate(profile, cutting_cycle_wave, stage) - # ── 4. spindle_speed ────────────────────────────────────────────────── - spindle_speed = self._calc_spindle_speed(profile, stage) - - # ── 5. cutting_intensity(归一化切削强度)──────────────────────────── + # ── 4. cutting_intensity(供其他指标参考,不再驱动 load)──────────── cutting_intensity = self._calc_cutting_intensity( feed_rate, stage, material_variation, profile ) - # ── 6. spindle_load(状态驱动 + ramp + EMA 平滑)──────────────────── - spindle_load = self._calc_spindle_load( - stage, t, dt, state + # ── 5. spindle_speed / spindle_load / spindle_current(联动生成)──── + spindle_speed, spindle_load, spindle_current = self._spindle_gen.generate( + t=t, + stage=stage, + spindle_state=state.spindle_state, + cutting_elapsed=state.cutting_elapsed, + cutting_total=state.cutting_total, ) - # ── 7. spindle_current(对 load 有 1~2 tick 滞后)──────────────────── - spindle_current = self._calc_spindle_current(profile, spindle_load, state) - - # ── 8. vibration(三轴,各有小幅随机偏差)──────────────────────────── + # ── 6. vibration(三轴,各有小幅随机偏差)──────────────────────────── vib_x, vib_y, vib_z = self._calc_vibration( profile, spindle_load, feed_rate, stage ) - # ── 9. acoustic_emission ───────────────────────────────────────────── + # ── 7. acoustic_emission ───────────────────────────────────────────── vibration_rms = (vib_x + vib_y + vib_z) / 3.0 acoustic_emission = self._calc_acoustic(profile, vibration_rms, spindle_load) - # ── 10. tool_temperature(热惯性模型)──────────────────────────────── + # ── 8. tool_temperature(热惯性模型)──────────────────────────────── tool_temperature = self._calc_temperature( profile, spindle_load, spindle_current, dt, state ) - # ── 11. tool_wear_value(单调递增)──────────────────────────────────── + # ── 9. tool_wear_value(单调递增)──────────────────────────────────── tool_wear_value = self._calc_tool_wear(profile, spindle_load, dt, state) - # ── 12. surface_roughness ───────────────────────────────────────────── + # ── 10. surface_roughness ───────────────────────────────────────────── surface_roughness = self._calc_surface_roughness( profile, vibration_rms, tool_wear_value, stage, state ) - # ── 13. 更新滞后缓冲区 ──────────────────────────────────────────────── + # ── 11. 更新滞后缓冲区 ──────────────────────────────────────────────── state.load_lag_buffer.pop(0) state.load_lag_buffer.append(spindle_load) state.last_spindle_load = spindle_load state.last_surface_roughness = surface_roughness - # ── 14. 构造帧 + clamp ──────────────────────────────────────────────── + # ── 12. 构造帧 + clamp ──────────────────────────────────────────────── frame = MetricFrame( timestamp=t, stage=stage, @@ -614,22 +801,6 @@ def _calc_feed_rate( noise = self._rng.gauss(0, base * noise_ratio) return max(profile.feed_rate_min, min(profile.feed_rate_max, base + noise)) - def _calc_spindle_speed(self, profile: StageProfile, stage: str) -> float: - """ - 主轴转速正常状态下稳定。 - roughing 允许 2% 波动,finishing 允许 0.8% 波动。 - """ - if stage in ("idle", "tool_change"): - return self._rng.uniform(profile.spindle_speed_min, profile.spindle_speed_max) - noise_pct = { - "roughing": 0.020, - "semi_finishing": 0.015, - "finishing": 0.008, - }.get(stage, 0.015) - base = profile.spindle_speed_mid - noise = self._rng.gauss(0, base * noise_pct) - return max(profile.spindle_speed_min, min(profile.spindle_speed_max, base + noise)) - def _calc_cutting_intensity( self, feed_rate: float, @@ -651,53 +822,6 @@ def _calc_cutting_intensity( norm_feed = max(0.0, min(1.0, norm_feed)) return max(0.0, min(1.0, norm_feed * stage_factor * material_variation)) - def _calc_spindle_load( - self, - stage: str, - t: float, - dt: float, - state: GeneratorState, - ) -> float: - """ - 主轴负载(%)—— 委托给 SpindleLoadGenerator。 - 传入切削计时信息,实现切入/切出 ramp。 - """ - return self._spindle_load_gen.generate( - t=t, - stage=stage, - cutting_elapsed=state.cutting_elapsed, - cutting_total=state.cutting_total, - ) - - def _calc_spindle_current( - self, - profile: StageProfile, - spindle_load: float, - state: GeneratorState, - ) -> float: - """ - 主轴电流(A),对负载有 1~2 tick 滞后(一阶低通)。 - current = idle_current + k × lag_load + noise - k 由阶段电流范围和负载范围反推。 - """ - # 滞后混合:60% 当前负载 + 25% 上一 tick + 15% 两 tick 前 - lag_load = spindle_load * 0.60 + state.load_lag_buffer[1] * 0.25 + state.load_lag_buffer[0] * 0.15 - # 线性映射:load_min → current_min,load_max → current_max - load_range = profile.spindle_load_max - profile.spindle_load_min - current_range = profile.spindle_current_max - profile.spindle_current_min - if load_range > 0: - k = current_range / load_range - else: - k = 0.0 - current_base = profile.spindle_current_min + k * (lag_load - profile.spindle_load_min) - noise = self._rng.gauss( - 0, - (profile.spindle_current_max - profile.spindle_current_min) - * (1.0 - profile.stability_factor) - * 0.03, - ) - return max(profile.spindle_current_min, min(profile.spindle_current_max, current_base + noise)) - def _calc_vibration( self, profile: StageProfile, diff --git a/protoforge/protocols/mtconnect/lathe_simulator.py b/protoforge/protocols/mtconnect/lathe_simulator.py index da974f8..79f947e 100644 --- a/protoforge/protocols/mtconnect/lathe_simulator.py +++ b/protoforge/protocols/mtconnect/lathe_simulator.py @@ -152,9 +152,20 @@ def __call__(self, device_instance: Any) -> None: # 2. 确定当前 MetricGenerator 阶段 stage = self._get_metric_stage() - # 3. 把 CUTTING 状态总时长同步给 MetricGenerator(用于 exit_ramp 计算) + # 3. 把状态机信息同步给 MetricGenerator if self._state == _State.CUTTING: self._metric_gen.state.cutting_total = self._state_duration + # spindle_state 用于转速 EMA alpha 控制 + _sm_to_spindle = { + _State.IDLE: "idle", + _State.SPINUP: "spinup", + _State.CUTTING: "cutting", + _State.DECEL: "decel", + _State.TOOL_CHANGE: "tool_change", + _State.TOOL_BREAK: "idle", + _State.CHIP_WRAP: "cutting", + } + self._metric_gen.state.spindle_state = _sm_to_spindle.get(self._state, "idle") # 4. 生成正常加工 MetricFrame(含联动 + 噪声 + clamp) frame = self._metric_gen.generate(t=t, dt=1.0, stage=stage) @@ -216,11 +227,12 @@ def _on_idle(self) -> None: self._condition_native_code = "" self._wrap_load_increment = 0.0 if self._state_elapsed >= self._state_duration: - self._spindle_target = random.uniform(800, 2500) + # 目标转速按即将开始的切削工艺设定(粗加工 2000 RPM) + self._spindle_target = 2000.0 self._program_line = 1 self._block_idx = 0 self._cutting_stage = "roughing" - self._transition(_State.SPINUP, random.uniform(3, 6)) + self._transition(_State.SPINUP, random.uniform(4, 8)) def _on_spinup(self) -> None: self._spindle_actual = self._smooth( From cb3b770cf1c25864fed939657fcd670ce6dffc1d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=BC=A0=E5=B0=91=E6=99=BA?= Date: Tue, 9 Jun 2026 11:36:44 +0800 Subject: [PATCH 47/55] fix --- protoforge/core/cnc_metric_generator.py | 68 ++++++++--- .../protocols/mtconnect/lathe_simulator.py | 110 +++++++++++++++--- 2 files changed, 148 insertions(+), 30 deletions(-) diff --git a/protoforge/core/cnc_metric_generator.py b/protoforge/core/cnc_metric_generator.py index 49d83d3..097e616 100644 --- a/protoforge/core/cnc_metric_generator.py +++ b/protoforge/core/cnc_metric_generator.py @@ -119,6 +119,9 @@ class GeneratorState: cutting_total: float = 30.0 # 状态机内部状态(idle/spinup/cutting/decel/tool_change),用于转速平滑 spindle_state: str = "idle" + # 任务级状态:process_running = 主轴保持目标转速;idle = 主轴可以停 + # 由 LatheSimulator 的 _STATE_TO_TASK 映射传入 + task_state: str = "idle" # --------------------------------------------------------------------------- @@ -392,6 +395,8 @@ def __init__(self, rng: random.Random, load_gen: SpindleLoadGenerator): self.prev_speed: float = 0.0 self.prev_current: float = 0.0 + # 最近一次切削工艺目标转速(air_cut 间隙保持用) + self._last_process_speed: float = 2000.0 def generate( self, @@ -400,6 +405,7 @@ def generate( spindle_state: str = "cutting", cutting_elapsed: float = 0.0, cutting_total: float = 30.0, + task_state: str = "idle", ) -> tuple[float, float, float]: """ 生成 (spindle_speed, spindle_load, spindle_current)。 @@ -410,11 +416,13 @@ def generate( spindle_state: LatheSimulator 内部状态(idle/spinup/cutting/decel/tool_change)。 cutting_elapsed: 切削阶段已过秒数(传给负载生成器)。 cutting_total: 切削阶段总时长(传给负载生成器)。 + task_state: 任务级状态(process_running/idle)。 + process_running 时主轴保持目标转速,即使 stage=idle(air_cut 间隙)。 """ process = _STAGE_TO_PROCESS.get(stage) # None = 非切削 # ── 1. 主轴转速 ──────────────────────────────────────────────────── - speed = self._calc_speed(stage, spindle_state, process) + speed = self._calc_speed(stage, spindle_state, process, task_state) # ── 2. 主轴负载(委托 SpindleLoadGenerator)──────────────────────── load = self._load_gen.generate( @@ -425,31 +433,59 @@ def generate( ) # ── 3. 主轴电流(由转速 + 负载推导)─────────────────────────────── - current = self._calc_current(stage, spindle_state, process, speed, load) + current = self._calc_current(stage, spindle_state, process, speed, load, task_state) return speed, load, current # ------------------------------------------------------------------ - def _calc_speed(self, stage: str, spindle_state: str, process: Optional[str]) -> float: - """转速:按工艺目标 + EMA 平滑,非切削时降到 0。""" + def _calc_speed( + self, + stage: str, + spindle_state: str, + process: Optional[str], + task_state: str = "idle", + ) -> float: + """ + 转速:按工艺目标 + EMA 平滑,非切削时降到 0。 + + task_state="process_running" 时,即使 stage=idle(air_cut 间隙), + 主轴也保持最近的切削工艺目标转速,不降到 0。 + 只有 task_state="idle" 时才允许主轴停转。 + """ if process is not None: + # 切削阶段:按工艺目标转速 scfg = _PROCESS_SPEED_CFG[process] noise = self._rng.gauss(0, scfg["noise"]) target = scfg["target"] + noise # 首次从停止状态进入切削:直接跳到目标转速附近,避免长收敛期 if self.prev_speed < scfg["lo"] * 0.5: self.prev_speed = scfg["target"] + # 记录本轮使用的工艺目标(供 air_cut 保持用) + self._last_process_speed = scfg["target"] + else: + self._last_process_speed = scfg["target"] alpha = _SPEED_ALPHA.get("cutting", 0.06) lo, hi = scfg["lo"] * 0.92, scfg["hi"] * 1.05 elif spindle_state == "spinup": + # 升速阶段:向目标转速爬升 if self.prev_speed > 500: target = min(self.prev_speed * 1.12, 2200.0) else: target = 2000.0 alpha = _SPEED_ALPHA.get("spinup", 0.14) lo, hi = 0.0, 2500.0 + elif task_state == "process_running": + # 任务运行中的非切削间隙(air_cut / decel_cycle):主轴保持转速 + # 目标是最近一次切削工艺的目标转速,略加噪声 + base_target = getattr(self, "_last_process_speed", 2000.0) + noise = self._rng.gauss(0, base_target * 0.008) + target = base_target + noise + alpha = _SPEED_ALPHA.get("cutting", 0.06) # 保持稳定 + lo = base_target * 0.90 + hi = base_target * 1.10 else: + # 任务级停机(idle / tool_change / 故障):主轴降到 0 target = _STATE_SPEED_TARGET.get(stage, 0.0) alpha = _SPEED_ALPHA.get(spindle_state, 0.20) lo, hi = 0.0, 200.0 @@ -466,6 +502,7 @@ def _calc_current( process: Optional[str], speed: float, load: float, + task_state: str = "idle", ) -> float: """电流:空载基础 + 负载映射,有轻微 EMA 滞后。""" if process is not None: @@ -474,20 +511,18 @@ def _calc_current( speed_ratio = _clamp(speed / _PROCESS_SPEED_CFG[process]["target"], 0.5, 1.1) noise = self._rng.gauss(0, ccfg["noise"]) target = (ccfg["base"] + ccfg["load_factor"] * load + noise) * speed_ratio - - # 判断切削子状态(entry/cutting/exit)决定 alpha - if self._load_gen._last_stage in _STAGE_TO_PROCESS: - pcfg = _PROCESS_CFG[process] - time_to_end = (self._load_gen.prev_load > 0 and - hasattr(self._load_gen, '_last_stage')) - # 简化:直接用较小 alpha 保持平滑 - alpha = 0.10 - else: - alpha = 0.10 - + alpha = 0.10 + lo, hi = ccfg["lo"], ccfg["hi"] + elif task_state == "process_running": + # air_cut / decel_cycle:主轴空转,电流略低于切削 + ccfg = _STATE_CURRENT_CFG["air_cut"] + noise = self._rng.gauss(0, ccfg["noise"]) + speed_factor = _clamp(speed / 2000.0, 0.5, 1.2) + target = (ccfg["base"] + noise) * speed_factor + alpha = ccfg["alpha"] lo, hi = ccfg["lo"], ccfg["hi"] else: - # 非切削状态 + # 非切削状态(idle / tool_change) state_key = stage if stage in _STATE_CURRENT_CFG else "idle" ccfg = _STATE_CURRENT_CFG[state_key] noise = self._rng.gauss(0, ccfg["noise"]) @@ -671,6 +706,7 @@ def generate(self, t: float, dt: float, stage: str) -> MetricFrame: spindle_state=state.spindle_state, cutting_elapsed=state.cutting_elapsed, cutting_total=state.cutting_total, + task_state=getattr(state, "task_state", "idle"), ) # ── 6. vibration(三轴,各有小幅随机偏差)──────────────────────────── diff --git a/protoforge/protocols/mtconnect/lathe_simulator.py b/protoforge/protocols/mtconnect/lathe_simulator.py index 79f947e..117bcfb 100644 --- a/protoforge/protocols/mtconnect/lathe_simulator.py +++ b/protoforge/protocols/mtconnect/lathe_simulator.py @@ -3,10 +3,19 @@ 仿真 CNC 车床通过 MTConnect 协议能真实输出的信号。 -工作周期: - IDLE → SPINUP → CUTTING → DECEL → TOOL_CHANGE → IDLE - ↓ (偶发,两种故障路径) - TOOL_BREAK / CHIP_WRAP → TOOL_CHANGE → IDLE +工作周期(任务级): + IDLE → SPINUP → [切削大循环] → SPINDOWN → TOOL_CHANGE → IDLE + +切削大循环(周期级,主轴保持转速): + AIR_CUT → CUTTING → DECEL_CYCLE → AIR_CUT → ...(循环 N 次后退出) + ↓ (偶发,两种故障路径) + TOOL_BREAK / CHIP_WRAP → TOOL_CHANGE → IDLE + +关键设计: + - AIR_CUT 状态:主轴已启动,快速定位中,主轴转速保持目标值 + - CUTTING 和 AIR_CUT 都属于 task_state="process_running",主轴不停 + - 只有 IDLE / TOOL_CHANGE / 故障恢复 时 task_state="idle",主轴才降到 0 + - 每完成 cycles_per_task 个切削周期后才真正回到 IDLE(换刀或停机) 每个 tick 的处理流程: 1. 状态机推进(确定当前 stage) @@ -41,8 +50,10 @@ class _State(Enum): IDLE = "idle" SPINUP = "spinup" + AIR_CUT = "air_cut" # 主轴运转,快速定位,不切削 CUTTING = "cutting" - DECEL = "decel" + DECEL_CYCLE = "decel_cycle" # 周期级减速(主轴保持转速,只减进给) + DECEL = "decel" # 任务级降速(主轴降到 0) TOOL_CHANGE = "tool_change" TOOL_BREAK = "tool_break" CHIP_WRAP = "chip_wrap" @@ -52,13 +63,28 @@ class _State(Enum): _STATE_TO_STAGE: dict[_State, str] = { _State.IDLE: "idle", _State.SPINUP: "idle", + _State.AIR_CUT: "idle", # air_cut 阶段负载模型用 idle,但主轴不停 _State.CUTTING: "roughing", # 默认粗加工,子阶段由 _cutting_stage 动态切换 - _State.DECEL: "idle", + _State.DECEL_CYCLE: "idle", # 周期间减速,主轴不停 + _State.DECEL: "idle", # 任务级降速 _State.TOOL_CHANGE: "tool_change", _State.TOOL_BREAK: "idle", _State.CHIP_WRAP: "roughing", } +# task_state 映射:process_running = 主轴保持,idle = 主轴可以停 +_STATE_TO_TASK: dict[_State, str] = { + _State.IDLE: "idle", + _State.SPINUP: "process_running", + _State.AIR_CUT: "process_running", + _State.CUTTING: "process_running", + _State.DECEL_CYCLE: "process_running", + _State.DECEL: "idle", + _State.TOOL_CHANGE: "idle", + _State.TOOL_BREAK: "idle", + _State.CHIP_WRAP: "process_running", +} + # 刀塔配置(刀位号, 刀具ID) _TOOL_TABLE = [ (1, "T01"), # 外圆粗车刀 @@ -129,6 +155,11 @@ def __init__(self): # 当前切削子阶段(roughing/semi_finishing/finishing) self._cutting_stage = "roughing" + # 当前任务内已完成的切削周期数(达到上限后才真正停机) + self._cycles_in_task = 0 + # 每个任务包含多少个切削周期(随机 3~6),到达后进入真正 IDLE + self._cycles_per_task = random.randint(3, 6) + # tick 计数,用于传入 BaseMetricGenerator 的 t self._tick_count = 0 @@ -155,11 +186,14 @@ def __call__(self, device_instance: Any) -> None: # 3. 把状态机信息同步给 MetricGenerator if self._state == _State.CUTTING: self._metric_gen.state.cutting_total = self._state_duration + # spindle_state 用于转速 EMA alpha 控制 _sm_to_spindle = { _State.IDLE: "idle", _State.SPINUP: "spinup", + _State.AIR_CUT: "cutting", # air_cut 保持转速(cutting alpha) _State.CUTTING: "cutting", + _State.DECEL_CYCLE: "cutting", # 周期间不降速 _State.DECEL: "decel", _State.TOOL_CHANGE: "tool_change", _State.TOOL_BREAK: "idle", @@ -167,6 +201,10 @@ def __call__(self, device_instance: Any) -> None: } self._metric_gen.state.spindle_state = _sm_to_spindle.get(self._state, "idle") + # task_state:process_running = 主轴保持目标转速;idle = 主轴可以停 + task_state = _STATE_TO_TASK.get(self._state, "idle") + self._metric_gen.state.task_state = task_state + # 4. 生成正常加工 MetricFrame(含联动 + 噪声 + clamp) frame = self._metric_gen.generate(t=t, dt=1.0, stage=stage) @@ -189,7 +227,9 @@ def _step_state_machine(self) -> None: dispatch = { _State.IDLE: self._on_idle, _State.SPINUP: self._on_spinup, + _State.AIR_CUT: self._on_air_cut, _State.CUTTING: self._on_cutting, + _State.DECEL_CYCLE: self._on_decel_cycle, _State.DECEL: self._on_decel, _State.TOOL_CHANGE: self._on_tool_change, _State.TOOL_BREAK: self._on_tool_break, @@ -227,17 +267,33 @@ def _on_idle(self) -> None: self._condition_native_code = "" self._wrap_load_increment = 0.0 if self._state_elapsed >= self._state_duration: - # 目标转速按即将开始的切削工艺设定(粗加工 2000 RPM) + # 开始新任务:主轴升速目标转速(粗加工 2000 RPM) self._spindle_target = 2000.0 self._program_line = 1 self._block_idx = 0 self._cutting_stage = "roughing" + self._cycles_in_task = 0 + self._cycles_per_task = random.randint(3, 6) self._transition(_State.SPINUP, random.uniform(4, 8)) def _on_spinup(self) -> None: self._spindle_actual = self._smooth( self._spindle_actual, self._spindle_target, 0.25 ) + if self._state_elapsed >= self._state_duration: + self._transition(_State.AIR_CUT, random.uniform(3, 6)) + + def _on_air_cut(self) -> None: + """主轴运转,快速定位,不切削。主轴转速保持目标值。""" + noise = random.gauss(0, self._spindle_target * 0.01) + self._spindle_actual = max( + self._spindle_target * 0.95, + min(self._spindle_target * 1.05, self._spindle_actual + noise), + ) + self._feed_actual = 0.0 + # 快速移动回到起刀点 + self._x_pos = self._smooth(self._x_pos, 50.0, 0.30) + self._z_pos = self._smooth(self._z_pos, 2.0, 0.30) if self._state_elapsed >= self._state_duration: self._transition(_State.CUTTING, random.uniform(35, 65)) @@ -272,20 +328,41 @@ def _on_cutting(self) -> None: return if self._state_elapsed >= self._state_duration: - self._transition(_State.DECEL, random.uniform(3, 5)) + # 周期结束:进入 DECEL_CYCLE(主轴保持转速,只停进给) + self._transition(_State.DECEL_CYCLE, random.uniform(2, 4)) + + def _on_decel_cycle(self) -> None: + """ + 周期级减速:只停进给,主轴转速保持。 + 结束后:若任务周期未满,回到 AIR_CUT;若满了,进入任务级 DECEL。 + """ + self._feed_actual = self._smooth(self._feed_actual, 0.0, 0.40) + # 主轴保持转速(微小噪声) + noise = random.gauss(0, self._spindle_target * 0.01) + self._spindle_actual = max( + self._spindle_target * 0.95, + min(self._spindle_target * 1.05, self._spindle_actual + noise), + ) + if self._state_elapsed >= self._state_duration: + self._cycles_in_task += 1 + self._part_count += 1 + if self._cycles_in_task >= self._cycles_per_task: + # 任务周期完成:进行真正的降速停机 + if self._part_count % 5 == 0: + self._metric_gen.reset_wear() + self._transition(_State.DECEL, random.uniform(3, 5)) + else: + # 继续下一个切削周期:回到 AIR_CUT + self._transition(_State.AIR_CUT, random.uniform(3, 6)) def _on_decel(self) -> None: + """任务级降速:主轴降到 0,准备换刀或停机。""" self._spindle_actual = self._smooth(self._spindle_actual, 0.0, 0.20) self._feed_actual = self._smooth(self._feed_actual, 0.0, 0.30) self._x_pos = self._smooth(self._x_pos, 150.0, 0.20) self._z_pos = self._smooth(self._z_pos, 50.0, 0.20) if self._state_elapsed >= self._state_duration: - self._part_count += 1 - if self._part_count % 5 == 0: - self._metric_gen.reset_wear() - self._transition(_State.TOOL_CHANGE, random.uniform(4, 8)) - else: - self._transition(_State.IDLE, random.uniform(3, 6)) + self._transition(_State.TOOL_CHANGE, random.uniform(4, 8)) def _on_tool_change(self) -> None: self._spindle_actual = 0.0 @@ -333,10 +410,12 @@ def _update_cnc_points(self, vals: dict[str, Any], frame) -> None: """ state = self._state is_cutting = state == _State.CUTTING + is_air_cut = state == _State.AIR_CUT is_tool_break = state == _State.TOOL_BREAK is_chip_wrap = state == _State.CHIP_WRAP is_fault = is_tool_break or is_chip_wrap is_tool_change = state == _State.TOOL_CHANGE + is_decel_cycle = state == _State.DECEL_CYCLE cur_tool_no, cur_tool_id = _TOOL_TABLE[self._tool_idx] @@ -355,6 +434,9 @@ def _update_cnc_points(self, vals: dict[str, Any], frame) -> None: elif state == _State.IDLE: vals["execution"] = "READY" vals["controller_mode"] = "AUTOMATIC" + elif is_air_cut or is_decel_cycle: + vals["execution"] = "ACTIVE" + vals["controller_mode"] = "AUTOMATIC" else: vals["execution"] = "ACTIVE" vals["controller_mode"] = "AUTOMATIC" From c618fc56036b1849df403212c7d9e745240604a7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=BC=A0=E5=B0=91=E6=99=BA?= Date: Tue, 9 Jun 2026 13:55:38 +0800 Subject: [PATCH 48/55] fix --- protoforge/core/cnc_metric_generator.py | 262 +++++++++++++----- .../protocols/mtconnect/lathe_simulator.py | 26 +- 2 files changed, 196 insertions(+), 92 deletions(-) diff --git a/protoforge/core/cnc_metric_generator.py b/protoforge/core/cnc_metric_generator.py index 097e616..457d654 100644 --- a/protoforge/core/cnc_metric_generator.py +++ b/protoforge/core/cnc_metric_generator.py @@ -122,6 +122,9 @@ class GeneratorState: # 任务级状态:process_running = 主轴保持目标转速;idle = 主轴可以停 # 由 LatheSimulator 的 _STATE_TO_TASK 映射传入 task_state: str = "idle" + # 加工周期状态:air_cut / entry_cut / cutting / exit_cut + # cycle_state 只描述负载形态,不控制主轴启停或转速档位 + cycle_state: str = "air_cut" # --------------------------------------------------------------------------- @@ -394,9 +397,30 @@ def __init__(self, rng: random.Random, load_gen: SpindleLoadGenerator): self._load_gen = load_gen # 复用已有的负载生成器 self.prev_speed: float = 0.0 + self.prev_load: float = 0.0 self.prev_current: float = 0.0 - # 最近一次切削工艺目标转速(air_cut 间隙保持用) - self._last_process_speed: float = 2000.0 + self.process: str = "rough" + + self.current_cycle_id: Optional[str] = None + self.cycle_factor: float = 1.0 + self.phase1: float = 0.0 + self.phase2: float = 0.0 + self.material_phase: float = 0.0 + self.entry_ramp_seconds: float = 6.0 + self.exit_ramp_seconds: float = 4.5 + self._cycle_cutting_load: float = 55.0 + + def start_new_cycle(self, cycle_id: str) -> None: + """每个 rough 切削周期只刷新一次周期级扰动参数。""" + if cycle_id == self.current_cycle_id: + return + self.current_cycle_id = cycle_id + self.cycle_factor = self._rng.uniform(0.92, 1.08) + self.phase1 = self._rng.uniform(0, 2 * math.pi) + self.phase2 = self._rng.uniform(0, 2 * math.pi) + self.material_phase = self._rng.uniform(0, 2 * math.pi) + self.entry_ramp_seconds = self._rng.uniform(4.0, 8.0) + self.exit_ramp_seconds = self._rng.uniform(3.0, 6.0) def generate( self, @@ -419,21 +443,27 @@ def generate( task_state: 任务级状态(process_running/idle)。 process_running 时主轴保持目标转速,即使 stage=idle(air_cut 间隙)。 """ - process = _STAGE_TO_PROCESS.get(stage) # None = 非切削 + # 本轮优化固定为 rough 正常工况;stage 仍原样写入 MetricFrame/标签。 + process = "rough" + cycle_state = self._resolve_cycle_state(stage, task_state, cutting_elapsed, cutting_total) + cycle_id = self._resolve_cycle_id(t, stage, task_state, cutting_elapsed, cutting_total) + self.start_new_cycle(cycle_id) # ── 1. 主轴转速 ──────────────────────────────────────────────────── speed = self._calc_speed(stage, spindle_state, process, task_state) - # ── 2. 主轴负载(委托 SpindleLoadGenerator)──────────────────────── - load = self._load_gen.generate( + # 保持旧负载生成器的周期状态同步,避免其他调用路径依赖其内部状态。 + self._load_gen.generate( t=t, stage=stage, cutting_elapsed=cutting_elapsed, cutting_total=cutting_total, ) + # ── 2. 主轴负载(rough 正常工况,cycle_state 只影响负载形态)─────── + load = self._calc_rough_load(t, speed, task_state, cycle_state, cutting_elapsed, cutting_total) # ── 3. 主轴电流(由转速 + 负载推导)─────────────────────────────── - current = self._calc_current(stage, spindle_state, process, speed, load, task_state) + current = self._calc_current(stage, spindle_state, process, speed, load, task_state, cycle_state) return speed, load, current @@ -447,54 +477,129 @@ def _calc_speed( task_state: str = "idle", ) -> float: """ - 转速:按工艺目标 + EMA 平滑,非切削时降到 0。 - - task_state="process_running" 时,即使 stage=idle(air_cut 间隙), - 主轴也保持最近的切削工艺目标转速,不降到 0。 - 只有 task_state="idle" 时才允许主轴停转。 + 转速只由任务级状态控制启停;rough 周期状态不切换转速档位。 """ - if process is not None: - # 切削阶段:按工艺目标转速 - scfg = _PROCESS_SPEED_CFG[process] - noise = self._rng.gauss(0, scfg["noise"]) - target = scfg["target"] + noise - # 首次从停止状态进入切削:直接跳到目标转速附近,避免长收敛期 - if self.prev_speed < scfg["lo"] * 0.5: - self.prev_speed = scfg["target"] - # 记录本轮使用的工艺目标(供 air_cut 保持用) - self._last_process_speed = scfg["target"] - else: - self._last_process_speed = scfg["target"] - alpha = _SPEED_ALPHA.get("cutting", 0.06) - lo, hi = scfg["lo"] * 0.92, scfg["hi"] * 1.05 - elif spindle_state == "spinup": - # 升速阶段:向目标转速爬升 - if self.prev_speed > 500: - target = min(self.prev_speed * 1.12, 2200.0) - else: - target = 2000.0 - alpha = _SPEED_ALPHA.get("spinup", 0.14) - lo, hi = 0.0, 2500.0 - elif task_state == "process_running": - # 任务运行中的非切削间隙(air_cut / decel_cycle):主轴保持转速 - # 目标是最近一次切削工艺的目标转速,略加噪声 - base_target = getattr(self, "_last_process_speed", 2000.0) - noise = self._rng.gauss(0, base_target * 0.008) - target = base_target + noise - alpha = _SPEED_ALPHA.get("cutting", 0.06) # 保持稳定 - lo = base_target * 0.90 - hi = base_target * 1.10 + if task_state in ("idle", "spindle_off", "tool_change"): + target = 0.0 + alpha = self._rng.uniform(0.12, 0.25) else: - # 任务级停机(idle / tool_change / 故障):主轴降到 0 - target = _STATE_SPEED_TARGET.get(stage, 0.0) - alpha = _SPEED_ALPHA.get(spindle_state, 0.20) - lo, hi = 0.0, 200.0 + target = 2000.0 + if spindle_state == "spinup" or self.prev_speed < 1750.0: + alpha = self._rng.uniform(0.10, 0.18) + else: + alpha = self._rng.uniform(0.03, 0.08) new_speed = _ema(self.prev_speed, target, alpha) - new_speed = _clamp(new_speed, lo, hi) + if task_state not in ("idle", "spindle_off", "tool_change") and new_speed > 1750.0: + new_speed += self._rng.uniform(-30.0, 30.0) + new_speed = _clamp(new_speed, 0.0, 2200.0) self.prev_speed = new_speed return new_speed + def _resolve_cycle_state( + self, + stage: str, + task_state: str, + cutting_elapsed: float, + cutting_total: float, + ) -> str: + if task_state != "process_running": + return "air_cut" + if stage not in _STAGE_TO_PROCESS: + return "air_cut" + + entry_s = max(self.entry_ramp_seconds, 0.1) + exit_s = max(self.exit_ramp_seconds, 0.1) + time_to_end = cutting_total - cutting_elapsed + if cutting_elapsed <= entry_s: + return "entry_cut" + if cutting_elapsed > entry_s and time_to_end <= exit_s: + return "exit_cut" + return "cutting" + + def _resolve_cycle_id( + self, + t: float, + stage: str, + task_state: str, + cutting_elapsed: float, + cutting_total: float, + ) -> str: + if task_state != "process_running": + return "stopped" + if stage not in _STAGE_TO_PROCESS: + return self.current_cycle_id or "air_cut" + cycle_start = t - cutting_elapsed + return f"rough:{cycle_start:.0f}:{cutting_total:.0f}" + + def _air_cut_load_target(self, t: float) -> float: + target = 7.0 + math.sin(t * 0.20) * 1.5 + self._rng.uniform(-0.8, 0.8) + return _clamp(target, 5.0, 12.0) + + def _rough_cutting_load_target(self, t: float) -> float: + effective_base = 55.0 * self.cycle_factor + slow_wave = math.sin(t * 0.10 + self.phase1) * 5.0 + cutting_wave = math.sin(t * 0.75 + self.phase2) * 2.5 + material_drift = math.sin(t * 0.03 + self.material_phase) * 4.0 + small_noise = self._rng.uniform(-2.0, 2.0) + target = effective_base + slow_wave + cutting_wave + material_drift + small_noise + return _clamp(target, 35.0, 82.0) + + def _calc_rough_load( + self, + t: float, + speed: float, + task_state: str, + cycle_state: str, + cutting_elapsed: float, + cutting_total: float, + ) -> float: + if speed <= 50.0: + target = self._rng.uniform(0.0, 2.0) + alpha = self._rng.uniform(0.30, 0.45) + lo, hi = 0.0, 2.0 + elif task_state == "process_running": + air_load = self._air_cut_load_target(t) + cutting_target = self._rough_cutting_load_target(t) + self._cycle_cutting_load = cutting_target + + if cycle_state == "air_cut": + target = air_load + alpha = self._rng.uniform(0.18, 0.25) + lo, hi = 5.0, 12.0 + elif cycle_state == "entry_cut": + ratio = _clamp(cutting_elapsed / max(self.entry_ramp_seconds, 0.1), 0.0, 1.0) + target = air_load + (cutting_target - air_load) * ratio + alpha = self._rng.uniform(0.08, 0.14) + lo, hi = 5.0, 82.0 + elif cycle_state == "cutting": + target = cutting_target + alpha = self._rng.uniform(0.08, 0.15) + lo, hi = 35.0, 82.0 + elif cycle_state == "exit_cut": + exit_elapsed = max(0.0, self.exit_ramp_seconds - (cutting_total - cutting_elapsed)) + ratio = _clamp(exit_elapsed / max(self.exit_ramp_seconds, 0.1), 0.0, 1.0) + target = self._cycle_cutting_load * (1.0 - ratio) + air_load * ratio + alpha = self._rng.uniform(0.10, 0.18) + lo, hi = 5.0, 82.0 + else: + target = air_load + alpha = self._rng.uniform(0.18, 0.25) + lo, hi = 5.0, 12.0 + else: + target = self._rng.uniform(0.0, 2.0) + alpha = self._rng.uniform(0.25, 0.40) + lo, hi = 0.0, 2.0 + + new_load = _ema(self.prev_load, target, alpha) + if speed > 50.0 and task_state == "process_running": + min_load = 5.0 if cycle_state in ("air_cut", "entry_cut", "exit_cut") else 35.0 + new_load = _clamp(new_load, min_load, hi) + else: + new_load = _clamp(new_load, lo, hi) + self.prev_load = new_load + return new_load + def _calc_current( self, stage: str, @@ -503,34 +608,33 @@ def _calc_current( speed: float, load: float, task_state: str = "idle", + cycle_state: str = "air_cut", ) -> float: - """电流:空载基础 + 负载映射,有轻微 EMA 滞后。""" - if process is not None: - ccfg = _PROCESS_CURRENT_CFG[process] - # 转速修正:实际转速偏低时电流也偏低(恒功率特性简化) - speed_ratio = _clamp(speed / _PROCESS_SPEED_CFG[process]["target"], 0.5, 1.1) - noise = self._rng.gauss(0, ccfg["noise"]) - target = (ccfg["base"] + ccfg["load_factor"] * load + noise) * speed_ratio - alpha = 0.10 - lo, hi = ccfg["lo"], ccfg["hi"] - elif task_state == "process_running": - # air_cut / decel_cycle:主轴空转,电流略低于切削 - ccfg = _STATE_CURRENT_CFG["air_cut"] - noise = self._rng.gauss(0, ccfg["noise"]) - speed_factor = _clamp(speed / 2000.0, 0.5, 1.2) - target = (ccfg["base"] + noise) * speed_factor - alpha = ccfg["alpha"] - lo, hi = ccfg["lo"], ccfg["hi"] + """电流由主轴转速和负载推导,避免独立随机曲线。""" + if speed <= 50.0: + target = self._rng.uniform(0.0, 0.8) + alpha = self._rng.uniform(0.25, 0.40) + lo, hi = 0.0, 0.8 + elif cycle_state == "air_cut": + target = 3.5 + load * 0.12 + self._rng.uniform(-0.4, 0.4) + alpha = self._rng.uniform(0.15, 0.25) + lo, hi = 2.5, 6.0 + elif cycle_state == "entry_cut": + target = 3.0 + load * 0.17 + self._rng.uniform(-0.5, 0.5) + alpha = self._rng.uniform(0.08, 0.16) + lo, hi = 2.5, 17.0 + elif cycle_state == "cutting": + target = 3.0 + load * 0.18 + self._rng.uniform(-0.6, 0.6) + alpha = self._rng.uniform(0.08, 0.15) + lo, hi = 10.0, 17.0 + elif cycle_state == "exit_cut": + target = 3.0 + load * 0.16 + self._rng.uniform(-0.5, 0.5) + alpha = self._rng.uniform(0.10, 0.20) + lo, hi = 2.5, 17.0 else: - # 非切削状态(idle / tool_change) - state_key = stage if stage in _STATE_CURRENT_CFG else "idle" - ccfg = _STATE_CURRENT_CFG[state_key] - noise = self._rng.gauss(0, ccfg["noise"]) - # 转速联动:主轴停止时电流趋近 0 - speed_factor = _clamp(speed / 100.0, 0.0, 1.0) if speed < 100 else 1.0 - target = (ccfg["base"] + noise) * speed_factor - alpha = ccfg["alpha"] - lo, hi = ccfg["lo"], ccfg["hi"] + target = 3.0 + load * 0.12 + self._rng.uniform(-0.4, 0.4) + alpha = self._rng.uniform(0.15, 0.25) + lo, hi = 2.5, 6.0 new_current = _ema(self.prev_current, target, alpha) new_current = _clamp(new_current, lo, hi) @@ -700,13 +804,21 @@ def generate(self, t: float, dt: float, stage: str) -> MetricFrame: ) # ── 5. spindle_speed / spindle_load / spindle_current(联动生成)──── + task_state = getattr(state, "task_state", "idle") + spindle_state = state.spindle_state + if stage in _STAGE_TO_PROCESS and task_state == "idle": + # 兼容直接调用 BaseMetricGenerator.generate(stage="roughing") 的路径: + # 显式切削阶段代表正常加工中,而不是任务级停机。 + task_state = "process_running" + if spindle_state == "idle": + spindle_state = "cutting" spindle_speed, spindle_load, spindle_current = self._spindle_gen.generate( t=t, stage=stage, - spindle_state=state.spindle_state, + spindle_state=spindle_state, cutting_elapsed=state.cutting_elapsed, cutting_total=state.cutting_total, - task_state=getattr(state, "task_state", "idle"), + task_state=task_state, ) # ── 6. vibration(三轴,各有小幅随机偏差)──────────────────────────── diff --git a/protoforge/protocols/mtconnect/lathe_simulator.py b/protoforge/protocols/mtconnect/lathe_simulator.py index 117bcfb..0465327 100644 --- a/protoforge/protocols/mtconnect/lathe_simulator.py +++ b/protoforge/protocols/mtconnect/lathe_simulator.py @@ -108,12 +108,10 @@ class _State(Enum): "N0120 M30", ] -# 每个零件的加工子阶段序列(按进度切分) +# 每个零件的加工子阶段序列(本轮正常工况固定为 rough) # (阶段名, 开始进度, 结束进度) _CUT_SUBSTAGES = [ - ("roughing", 0.00, 0.45), - ("semi_finishing", 0.45, 0.75), - ("finishing", 0.75, 1.00), + ("roughing", 0.00, 1.00), ] @@ -245,20 +243,14 @@ def _transition(self, new_state: _State, duration: float) -> None: def _get_metric_stage(self) -> str: """将状态机状态映射到 MetricGenerator 阶段。""" if self._state == _State.CUTTING: - return self._cutting_stage + return "roughing" if self._state == _State.CHIP_WRAP: return "roughing" return _STATE_TO_STAGE.get(self._state, "idle") def _update_cutting_substage(self, progress: float) -> None: - """根据切削进度动态切换粗/半精/精加工子阶段。""" - for stage_name, start, end in _CUT_SUBSTAGES: - if start <= progress < end: - if self._cutting_stage != stage_name: - self._cutting_stage = stage_name - # 换阶段时不重置磨损,但可记录换刀(此处仅切换参数集) - return - self._cutting_stage = "finishing" + """本轮正常工况只模拟 rough,不在小周期内切换 semi/finish。""" + self._cutting_stage = "roughing" def _on_idle(self) -> None: self._spindle_target = 0.0 @@ -281,7 +273,7 @@ def _on_spinup(self) -> None: self._spindle_actual, self._spindle_target, 0.25 ) if self._state_elapsed >= self._state_duration: - self._transition(_State.AIR_CUT, random.uniform(3, 6)) + self._transition(_State.AIR_CUT, random.uniform(6, 12)) def _on_air_cut(self) -> None: """主轴运转,快速定位,不切削。主轴转速保持目标值。""" @@ -295,7 +287,7 @@ def _on_air_cut(self) -> None: self._x_pos = self._smooth(self._x_pos, 50.0, 0.30) self._z_pos = self._smooth(self._z_pos, 2.0, 0.30) if self._state_elapsed >= self._state_duration: - self._transition(_State.CUTTING, random.uniform(35, 65)) + self._transition(_State.CUTTING, random.uniform(45, 90)) def _on_cutting(self) -> None: noise = random.gauss(0, self._spindle_target * 0.02) @@ -329,7 +321,7 @@ def _on_cutting(self) -> None: if self._state_elapsed >= self._state_duration: # 周期结束:进入 DECEL_CYCLE(主轴保持转速,只停进给) - self._transition(_State.DECEL_CYCLE, random.uniform(2, 4)) + self._transition(_State.DECEL_CYCLE, random.uniform(3, 6)) def _on_decel_cycle(self) -> None: """ @@ -353,7 +345,7 @@ def _on_decel_cycle(self) -> None: self._transition(_State.DECEL, random.uniform(3, 5)) else: # 继续下一个切削周期:回到 AIR_CUT - self._transition(_State.AIR_CUT, random.uniform(3, 6)) + self._transition(_State.AIR_CUT, random.uniform(6, 12)) def _on_decel(self) -> None: """任务级降速:主轴降到 0,准备换刀或停机。""" From 75471894ab8ad95c2bb26c192f5ffab4f19825c5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=BC=A0=E5=B0=91=E6=99=BA?= Date: Wed, 10 Jun 2026 06:46:09 +0800 Subject: [PATCH 49/55] fix --- protoforge/core/fault.py | 110 ++++++++++++++++++ .../protocols/mtconnect/lathe_simulator.py | 60 +++++++--- 2 files changed, 155 insertions(+), 15 deletions(-) diff --git a/protoforge/core/fault.py b/protoforge/core/fault.py index 7febb4b..6867ec9 100644 --- a/protoforge/core/fault.py +++ b/protoforge/core/fault.py @@ -502,6 +502,116 @@ multiplier=1.0, noise_scale=5.0), ], ), + + # ================================================================== + # 车床 CNC Rough 粗车故障类型 + # 基线:spindle_speed~2000RPM, spindle_load cutting~55%, spindle_current cutting~13A + # 仅影响 spindle_speed / spindle_load / spindle_current 三个测点 + # ================================================================== + + # ------------------------------------------------------------------ + # 缠屑(车床粗车)— chip_entanglement_rough + # 物理含义:切屑缠绕刀具/工件,切削阻力逐步增大 + # 特征:spindle_load/current 渐进爬升,spindle_speed 基本维持(严重时轻微下降) + # 模式:GRADUAL(渐进式),区别于崩刃的瞬间冲击 + # ------------------------------------------------------------------ + FaultTypeDefinition( + id="chip_entanglement_rough", + name="缠屑(车床粗车)", + description="车床粗车切屑缠绕刀具/工件,切削阻力逐步增大。spindle_load渐进从~55%爬升到70~90%,spindle_current从~13A升至16~20A,spindle_speed基本维持2000RPM(严重时轻微下降到1900RPM)。区别于缠屑:不瞬间冲击;区别于磨损:爬升更快且波动更大", + category="process", + default_duration=180.0, + tags=["缠屑", "渐进", "车床", "粗车"], + point_faults=[ + PointFaultConfig(point="spindle_load", mode=FaultMode.GRADUAL, + target_min=70.0, target_max=90.0, noise_scale=4.5), + PointFaultConfig(point="spindle_current", mode=FaultMode.GRADUAL, + target_min=16.0, target_max=20.0, noise_scale=1.2), + # 转速只在严重时(progress > 0.6)才轻微下降,nominal_baseline 保持 2000 + PointFaultConfig(point="spindle_speed", mode=FaultMode.GRADUAL, + target_min=1880.0, target_max=1950.0, noise_scale=25.0, + nominal_baseline=2000.0), + ], + ), + + # ------------------------------------------------------------------ + # 刀具磨损(车床粗车)— tool_wear_rough + # 物理含义:刀具逐步变钝,切削阻力慢性增加 + # 特征:load/current 长时间缓慢趋势性上升,转速基本稳定 + # 模式:GRADUAL,持续时间长(600s),不应瞬间恢复 + # 使用 nominal_baseline 避免注入时恰好在空切段导致基线失真 + # ------------------------------------------------------------------ + FaultTypeDefinition( + id="tool_wear_rough", + name="刀具磨损(车床粗车)", + description="车床粗车刀具逐步变钝,切削阻力慢性增加。spindle_load从~55%缓慢抬升到60~75%,spindle_current从~13A抬升到13~16A,spindle_speed基本稳定在2000RPM。区别于缠屑:爬升极慢;区别于崩刃:无冲击峰值,不停主轴", + category="tool", + default_duration=600.0, + tags=["刀具", "磨损", "渐进", "车床", "粗车", "趋势漂移"], + point_faults=[ + PointFaultConfig(point="spindle_load", mode=FaultMode.GRADUAL, + target_min=60.0, target_max=75.0, noise_ratio=0.04, + nominal_baseline=55.0), + PointFaultConfig(point="spindle_current", mode=FaultMode.GRADUAL, + target_min=13.0, target_max=16.0, noise_ratio=0.04, + nominal_baseline=13.0), + # 磨损对转速影响极小,仅在严重时轻微下降,nominal_baseline 保持 2000 + PointFaultConfig(point="spindle_speed", mode=FaultMode.GRADUAL, + target_min=1930.0, target_max=1990.0, noise_scale=20.0, + nominal_baseline=2000.0), + ], + ), + + # ------------------------------------------------------------------ + # 断刀/崩刀 emergency_stop(车床粗车)— tool_break_emergency_stop_rough + # 物理含义:刀具突然断裂,CNC 触发紧急停机 + # 特征:瞬间冲击后 load/current 归零,spindle_speed 急降到 0 + # 模式:INSTANT,持续时间短(仅代表报警持续窗口),之后停机 + # 断刀冲击只触发一次(注入时随机采样 resolved_target),不每 tick 重新冲击 + # ------------------------------------------------------------------ + FaultTypeDefinition( + id="tool_break_emergency_stop_rough", + name="断刀急停(车床粗车)", + description="车床粗车刀具突然断裂,CNC触发紧急停机。spindle_load瞬间冲高到85~100%,spindle_current冲高到18~25A,随后(下一tick)主轴急停到0。断刀冲击只触发一次,之后进入停机等待状态,不自动恢复正常切削", + category="tool", + default_duration=8.0, + tags=["断刀", "崩刀", "急停", "突发", "车床", "粗车"], + point_faults=[ + # 瞬间冲高,noise_scale 小(冲击值已由 target_min/max 精确控制) + PointFaultConfig(point="spindle_load", mode=FaultMode.INSTANT, + target_min=85.0, target_max=100.0, noise_scale=3.0), + PointFaultConfig(point="spindle_current", mode=FaultMode.INSTANT, + target_min=18.0, target_max=25.0, noise_scale=1.5), + # 主轴急停到 0 + PointFaultConfig(point="spindle_speed", mode=FaultMode.INSTANT, + target_value=0.0, noise_scale=0.0), + ], + ), + + # ------------------------------------------------------------------ + # 断刀/崩刀 broken_cutting(车床粗车)— tool_break_broken_cutting_rough + # 物理含义:刀具断裂但主轴未停,在破损刀具状态下继续异常切削 + # 特征:瞬间冲击后 load/current 降到低位(破损刀具切不动),转速维持 + # 模式:INSTANT,持续时间短(8s 冲击窗口)+ 后续低负载异常阶段 + # ------------------------------------------------------------------ + FaultTypeDefinition( + id="tool_break_broken_cutting_rough", + name="断刀异常切削(车床粗车)", + description="车床粗车刀具断裂但主轴未停,破损刀具继续异常切削。spindle_load瞬间冲高到85~100%后降至5~15%,spindle_current冲高到18~25A后降至3~6A,spindle_speed维持1800~2200RPM不停机。区别于急停:主轴不归零", + category="tool", + default_duration=8.0, + tags=["断刀", "崩刀", "异常切削", "突发", "车床", "粗车"], + point_faults=[ + # 冲击后维持低负载(破损刀具切不动) + PointFaultConfig(point="spindle_load", mode=FaultMode.INSTANT, + target_min=5.0, target_max=15.0, noise_scale=2.0), + PointFaultConfig(point="spindle_current", mode=FaultMode.INSTANT, + target_min=3.0, target_max=6.0, noise_scale=0.8), + # 转速维持,nominal_baseline 避免注入时基线失真 + PointFaultConfig(point="spindle_speed", mode=FaultMode.INSTANT, + multiplier=1.0, noise_scale=30.0, nominal_baseline=2000.0), + ], + ), ] # 按 id 索引 diff --git a/protoforge/protocols/mtconnect/lathe_simulator.py b/protoforge/protocols/mtconnect/lathe_simulator.py index 0465327..59484cb 100644 --- a/protoforge/protocols/mtconnect/lathe_simulator.py +++ b/protoforge/protocols/mtconnect/lathe_simulator.py @@ -45,6 +45,7 @@ from typing import Any from protoforge.core.cnc_metric_generator import BaseMetricGenerator +from protoforge.core.fault import fault_injector class _State(Enum): @@ -210,8 +211,32 @@ def __call__(self, device_instance: Any) -> None: vals = device_instance._point_values self._update_cnc_points(vals, frame) - # 6. 上报 Prometheus - self._emit_prometheus(device_instance, frame) + # 6. 复用铣床故障注入机制:在 baseline 写入后覆盖故障测点值 + # fault_injector.apply() 只覆盖 _point_values,不修改状态机 + # 只有 process_running 切削阶段的故障才有意义; + # 但 apply() 本身会检查 fault.duration,状态机不需要感知 + fault_injector.apply(device_instance) + + # ── 断刀二阶段后处理(不修改 FaultInjector 框架,符合铣床风格)─────── + _active_fault = fault_injector.get_fault(device_instance.id) + if _active_fault is not None: + _fault_id = _active_fault.fault_type_id + _elapsed = _active_fault.elapsed + + # 断刀急停:冲击窗口前 2s → 之后 load/current/speed 降到停机水平 + if _fault_id == "tool_break_emergency_stop_rough" and _elapsed > 2.0: + vals["spindle_load"] = round(random.uniform(0.0, 2.0), 1) + vals["spindle_current"] = round(random.uniform(0.0, 1.0), 2) + vals["spindle_speed"] = 0.0 + + # 断刀异常切削:冲击窗口前 2s 输出冲击峰值,之后由 FaultInjector 维持低负载 + elif _fault_id == "tool_break_broken_cutting_rough" and _elapsed <= 2.0: + vals["spindle_load"] = round(random.uniform(85.0, 100.0) + random.gauss(0, 3.0), 1) + vals["spindle_current"] = round(random.uniform(18.0, 25.0) + random.gauss(0, 1.5), 2) + # 转速在冲击瞬间保持(FaultInjector 已设置 nominal_baseline=2000,此处不覆盖) + + # 7. 上报 Prometheus(使用 fault-applied 后的 _point_values,而非注入前的 frame) + self._emit_prometheus(device_instance, vals) # ------------------------------------------------------------------ # 状态机 @@ -460,6 +485,8 @@ def _update_cnc_points(self, vals: dict[str, Any], frame) -> None: vals["tool_temperature"] = round(frame.tool_temperature, 2) vals["surface_roughness"] = round(frame.surface_roughness, 3) vals["tool_wear_value"] = round(frame.tool_wear_value, 4) + # 存入 stage 供 _emit_prometheus 使用(不作为 MTConnect 测点上报) + vals["_stage"] = frame.stage # 故障覆盖:崩刀时 spindle_load 突增并覆盖 MetricFrame 的值 if is_tool_break: @@ -473,9 +500,10 @@ def _update_cnc_points(self, vals: dict[str, Any], frame) -> None: wrap_load = min(100.0, 30.0 + self._wrap_load_increment + random.gauss(0, 2)) vals["spindle_load"] = round(wrap_load, 1) - def _emit_prometheus(self, device_instance: Any, frame) -> None: + def _emit_prometheus(self, device_instance: Any, vals: dict) -> None: """ 通过 MetricsCollector 上报 Prometheus 指标。 + 使用 fault-applied 后的 device._point_values,确保故障覆盖值能正确上报。 复用项目已有的 set_gauge 接口,不重复注册。 """ try: @@ -485,24 +513,26 @@ def _emit_prometheus(self, device_instance: Any, frame) -> None: device_id = getattr(device_instance.config, "id", "unknown") device_name = getattr(device_instance.config, "name", "unknown") + # stage 仍从 frame 获取(故障不改变 stage 标签) + stage = vals.get("_stage", "roughing") labels = { "device_id": device_id, "device_name": device_name, "protocol": "mtconnect", - "stage": frame.stage, + "stage": stage, } - metrics.set_gauge("cnc_feed_rate", frame.feed_rate, {**labels, "unit": "mm/min"}) - metrics.set_gauge("cnc_spindle_speed", frame.spindle_speed, {**labels, "unit": "RPM"}) - metrics.set_gauge("cnc_spindle_current", frame.spindle_current, {**labels, "unit": "A"}) - metrics.set_gauge("cnc_spindle_load", frame.spindle_load, {**labels, "unit": "%"}) - metrics.set_gauge("cnc_vibration_x", frame.vibration_x, {**labels, "unit": "mm/s"}) - metrics.set_gauge("cnc_vibration_y", frame.vibration_y, {**labels, "unit": "mm/s"}) - metrics.set_gauge("cnc_vibration_z", frame.vibration_z, {**labels, "unit": "mm/s"}) - metrics.set_gauge("cnc_acoustic_emission", frame.acoustic_emission, {**labels, "unit": "V"}) - metrics.set_gauge("cnc_tool_temperature", frame.tool_temperature, {**labels, "unit": "C"}) - metrics.set_gauge("cnc_surface_roughness", frame.surface_roughness, {**labels, "unit": "um"}) - metrics.set_gauge("cnc_tool_wear_value", frame.tool_wear_value, {**labels, "unit": "um"}) + metrics.set_gauge("cnc_feed_rate", vals.get("feed_rate", 0.0), {**labels, "unit": "mm/min"}) + metrics.set_gauge("cnc_spindle_speed", vals.get("spindle_speed", 0.0), {**labels, "unit": "RPM"}) + metrics.set_gauge("cnc_spindle_current", vals.get("spindle_current", 0.0), {**labels, "unit": "A"}) + metrics.set_gauge("cnc_spindle_load", vals.get("spindle_load", 0.0), {**labels, "unit": "%"}) + metrics.set_gauge("cnc_vibration_x", vals.get("vibration_x", 0.0), {**labels, "unit": "mm/s"}) + metrics.set_gauge("cnc_vibration_y", vals.get("vibration_y", 0.0), {**labels, "unit": "mm/s"}) + metrics.set_gauge("cnc_vibration_z", vals.get("vibration_z", 0.0), {**labels, "unit": "mm/s"}) + metrics.set_gauge("cnc_acoustic_emission", vals.get("acoustic_emission", 0.0), {**labels, "unit": "V"}) + metrics.set_gauge("cnc_tool_temperature", vals.get("tool_temperature", 0.0), {**labels, "unit": "C"}) + metrics.set_gauge("cnc_surface_roughness", vals.get("surface_roughness", 0.0), {**labels, "unit": "um"}) + metrics.set_gauge("cnc_tool_wear_value", vals.get("tool_wear_value", 0.0), {**labels, "unit": "um"}) # ------------------------------------------------------------------ From dbc74a5775290c427ba12c5070bd4a3b349e056e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=BC=A0=E5=B0=91=E6=99=BA?= Date: Wed, 10 Jun 2026 08:48:09 +0800 Subject: [PATCH 50/55] fix --- protoforge/core/cnc_metric_generator.py | 204 ++++++++++++------ .../protocols/mtconnect/lathe_simulator.py | 161 ++++++++++---- 2 files changed, 252 insertions(+), 113 deletions(-) diff --git a/protoforge/core/cnc_metric_generator.py b/protoforge/core/cnc_metric_generator.py index 457d654..d75482d 100644 --- a/protoforge/core/cnc_metric_generator.py +++ b/protoforge/core/cnc_metric_generator.py @@ -125,6 +125,10 @@ class GeneratorState: # 加工周期状态:air_cut / entry_cut / cutting / exit_cut # cycle_state 只描述负载形态,不控制主轴启停或转速档位 cycle_state: str = "air_cut" + # 当前工艺阶段:rough / semi_finish / finish + # 由 LatheSimulator 在每次 tick 前设置,支持 single_process 和 process_flow 两种模式 + # None 表示从 stage 自动推导(保持向后兼容) + current_process: Optional[str] = None # --------------------------------------------------------------------------- @@ -384,12 +388,14 @@ class CncSpindleGenerator: 统一驱动 spindle_speed / spindle_load / spindle_current 的联动生成器。 生成链路: - 工艺阶段(process) → 目标转速 → 实际转速(EMA) → 负载(SpindleLoadGenerator) + 工艺阶段(process) → 目标转速 → 实际转速(EMA) → 负载(process 参数化) → 电流(负载+转速映射) stage 参数取值:idle / tool_change / roughing / semi_finishing / finishing spindle_state 参数取值:idle / tool_change / spinup / cutting / decel (由 LatheSimulator 状态机传入,用于控制转速 EMA alpha) + process 参数取值:rough / semi_finish / finish + (由外部传入,覆盖 stage 推导,用于 process_flow 模式) """ def __init__(self, rng: random.Random, load_gen: SpindleLoadGenerator): @@ -399,7 +405,6 @@ def __init__(self, rng: random.Random, load_gen: SpindleLoadGenerator): self.prev_speed: float = 0.0 self.prev_load: float = 0.0 self.prev_current: float = 0.0 - self.process: str = "rough" self.current_cycle_id: Optional[str] = None self.cycle_factor: float = 1.0 @@ -410,8 +415,8 @@ def __init__(self, rng: random.Random, load_gen: SpindleLoadGenerator): self.exit_ramp_seconds: float = 4.5 self._cycle_cutting_load: float = 55.0 - def start_new_cycle(self, cycle_id: str) -> None: - """每个 rough 切削周期只刷新一次周期级扰动参数。""" + def start_new_cycle(self, cycle_id: str, process: str = "rough") -> None: + """每个切削周期只刷新一次周期级扰动参数。""" if cycle_id == self.current_cycle_id: return self.current_cycle_id = cycle_id @@ -419,8 +424,12 @@ def start_new_cycle(self, cycle_id: str) -> None: self.phase1 = self._rng.uniform(0, 2 * math.pi) self.phase2 = self._rng.uniform(0, 2 * math.pi) self.material_phase = self._rng.uniform(0, 2 * math.pi) - self.entry_ramp_seconds = self._rng.uniform(4.0, 8.0) - self.exit_ramp_seconds = self._rng.uniform(3.0, 6.0) + pcfg = _PROCESS_CFG.get(process, _PROCESS_CFG["rough"]) + # entry/exit ramp 从工艺配置中随机采样 + entry_range = pcfg.get("entry_ramp_s", 6.0) + exit_range = pcfg.get("exit_ramp_s", 4.5) + self.entry_ramp_seconds = self._rng.uniform(entry_range * 0.6, entry_range * 1.4) + self.exit_ramp_seconds = self._rng.uniform(exit_range * 0.6, exit_range * 1.4) def generate( self, @@ -430,27 +439,31 @@ def generate( cutting_elapsed: float = 0.0, cutting_total: float = 30.0, task_state: str = "idle", + process: Optional[str] = None, ) -> tuple[float, float, float]: """ 生成 (spindle_speed, spindle_load, spindle_current)。 Args: t: 当前时间(秒)。 - stage: MetricGenerator 加工阶段。 + stage: MetricGenerator 加工阶段(roughing/semi_finishing/finishing/idle/tool_change)。 spindle_state: LatheSimulator 内部状态(idle/spinup/cutting/decel/tool_change)。 - cutting_elapsed: 切削阶段已过秒数(传给负载生成器)。 - cutting_total: 切削阶段总时长(传给负载生成器)。 + cutting_elapsed: 切削阶段已过秒数。 + cutting_total: 切削阶段总时长。 task_state: 任务级状态(process_running/idle)。 - process_running 时主轴保持目标转速,即使 stage=idle(air_cut 间隙)。 + process: 工艺阶段(rough/semi_finish/finish)。 + None 时从 stage 自动推导;可由 LatheSimulator 显式传入以支持 process_flow。 """ - # 本轮优化固定为 rough 正常工况;stage 仍原样写入 MetricFrame/标签。 - process = "rough" + # 优先使用外部传入的 process;若为 None 则从 stage 推导 + if process is None: + process = _STAGE_TO_PROCESS.get(stage, "rough") + cycle_state = self._resolve_cycle_state(stage, task_state, cutting_elapsed, cutting_total) - cycle_id = self._resolve_cycle_id(t, stage, task_state, cutting_elapsed, cutting_total) - self.start_new_cycle(cycle_id) + cycle_id = self._resolve_cycle_id(t, stage, task_state, cutting_elapsed, cutting_total, process) + self.start_new_cycle(cycle_id, process) - # ── 1. 主轴转速 ──────────────────────────────────────────────────── - speed = self._calc_speed(stage, spindle_state, process, task_state) + # ── 1. 主轴转速(由 process 和 task_state 决定,不由 cycle_state 决定)── + speed = self._calc_speed(spindle_state, process, task_state) # 保持旧负载生成器的周期状态同步,避免其他调用路径依赖其内部状态。 self._load_gen.generate( @@ -459,11 +472,11 @@ def generate( cutting_elapsed=cutting_elapsed, cutting_total=cutting_total, ) - # ── 2. 主轴负载(rough 正常工况,cycle_state 只影响负载形态)─────── - load = self._calc_rough_load(t, speed, task_state, cycle_state, cutting_elapsed, cutting_total) + # ── 2. 主轴负载(由 process 参数化,cycle_state 控制 ramp 形态)────── + load = self._calc_load(t, speed, process, task_state, cycle_state, cutting_elapsed, cutting_total) - # ── 3. 主轴电流(由转速 + 负载推导)─────────────────────────────── - current = self._calc_current(stage, spindle_state, process, speed, load, task_state, cycle_state) + # ── 3. 主轴电流(由转速 + 负载 + process 推导)───────────────────── + current = self._calc_current(process, speed, load, task_state, cycle_state) return speed, load, current @@ -471,28 +484,44 @@ def generate( def _calc_speed( self, - stage: str, spindle_state: str, - process: Optional[str], + process: str, task_state: str = "idle", ) -> float: """ - 转速只由任务级状态控制启停;rough 周期状态不切换转速档位。 + 转速只由 task_state 和 process 决定。 + - idle/tool_change → 目标 0 RPM(降速) + - process_running → 目标转速由当前 process 决定(rough=2000, semi=3000, finish=4000) + - cycle_state 不参与转速决策,air_cut 期间转速保持目标值 """ + scfg = _PROCESS_SPEED_CFG.get(process, _PROCESS_SPEED_CFG["rough"]) + if task_state in ("idle", "spindle_off", "tool_change"): target = 0.0 alpha = self._rng.uniform(0.12, 0.25) else: - target = 2000.0 - if spindle_state == "spinup" or self.prev_speed < 1750.0: + target = scfg["target"] + # 升速阶段(转速还在目标 90% 以下)用较大 alpha,稳态用小 alpha + threshold = scfg["target"] * 0.90 + if spindle_state == "spinup" or self.prev_speed < threshold: alpha = self._rng.uniform(0.10, 0.18) else: alpha = self._rng.uniform(0.03, 0.08) new_speed = _ema(self.prev_speed, target, alpha) - if task_state not in ("idle", "spindle_off", "tool_change") and new_speed > 1750.0: - new_speed += self._rng.uniform(-30.0, 30.0) - new_speed = _clamp(new_speed, 0.0, 2200.0) + # 稳态时叠加小幅噪声(转速高于目标 85% 时才加) + if task_state not in ("idle", "spindle_off", "tool_change"): + threshold_noise = scfg["target"] * 0.85 + if new_speed > threshold_noise: + noise = scfg["noise"] + new_speed += self._rng.uniform(-noise, noise) + + # clamp:运行中保持在 process 允许的转速区间 + if task_state not in ("idle", "spindle_off", "tool_change"): + new_speed = _clamp(new_speed, scfg["lo"], scfg["hi"]) + else: + new_speed = _clamp(new_speed, 0.0, scfg["hi"]) + self.prev_speed = new_speed return new_speed @@ -524,68 +553,94 @@ def _resolve_cycle_id( task_state: str, cutting_elapsed: float, cutting_total: float, + process: str = "rough", ) -> str: if task_state != "process_running": return "stopped" if stage not in _STAGE_TO_PROCESS: return self.current_cycle_id or "air_cut" cycle_start = t - cutting_elapsed - return f"rough:{cycle_start:.0f}:{cutting_total:.0f}" - - def _air_cut_load_target(self, t: float) -> float: - target = 7.0 + math.sin(t * 0.20) * 1.5 + self._rng.uniform(-0.8, 0.8) - return _clamp(target, 5.0, 12.0) - - def _rough_cutting_load_target(self, t: float) -> float: - effective_base = 55.0 * self.cycle_factor - slow_wave = math.sin(t * 0.10 + self.phase1) * 5.0 - cutting_wave = math.sin(t * 0.75 + self.phase2) * 2.5 - material_drift = math.sin(t * 0.03 + self.material_phase) * 4.0 - small_noise = self._rng.uniform(-2.0, 2.0) - target = effective_base + slow_wave + cutting_wave + material_drift + small_noise - return _clamp(target, 35.0, 82.0) - - def _calc_rough_load( + return f"{process}:{cycle_start:.0f}:{cutting_total:.0f}" + + def _air_cut_load_target(self, t: float, process: str) -> float: + pcfg = _PROCESS_CFG.get(process, _PROCESS_CFG["rough"]) + air_base = pcfg["air_cut_base"] + target = air_base + math.sin(t * 0.20) * (pcfg["noise_range"] * 0.4) + self._rng.uniform(-0.8, 0.8) + # air_cut 负载 clamp 到各工艺的 air 区间(rough:5~12, semi:4~10, finish:3~8) + air_lo = { + "rough": 5.0, + "semi_finish": 4.0, + "finish": 3.0, + }.get(process, 5.0) + air_hi = { + "rough": 12.0, + "semi_finish": 10.0, + "finish": 8.0, + }.get(process, 12.0) + return _clamp(target, air_lo, air_hi) + + def _cutting_load_target(self, t: float, process: str) -> float: + pcfg = _PROCESS_CFG.get(process, _PROCESS_CFG["rough"]) + effective_base = pcfg["base_load"] * self.cycle_factor + slow_wave = math.sin(t * pcfg["slow_freq"] + self.phase1) * pcfg["slow_amp"] + cut_wave = math.sin(t * pcfg["cut_freq"] + self.phase2) * pcfg["cut_amp"] + material_drift = math.sin(t * pcfg["material_freq"] + self.material_phase) * pcfg["material_amp"] + small_noise = self._rng.uniform(-pcfg["noise_range"], pcfg["noise_range"]) + target = effective_base + slow_wave + cut_wave + material_drift + small_noise + return _clamp(target, pcfg["clamp_min"], pcfg["clamp_max"]) + + def _calc_load( self, t: float, speed: float, + process: str, task_state: str, cycle_state: str, cutting_elapsed: float, cutting_total: float, ) -> float: + """主轴负载由 process 参数化,cycle_state 控制 entry/exit ramp 形态。""" + pcfg = _PROCESS_CFG.get(process, _PROCESS_CFG["rough"]) + if speed <= 50.0: target = self._rng.uniform(0.0, 2.0) alpha = self._rng.uniform(0.30, 0.45) lo, hi = 0.0, 2.0 elif task_state == "process_running": - air_load = self._air_cut_load_target(t) - cutting_target = self._rough_cutting_load_target(t) + air_load = self._air_cut_load_target(t, process) + cutting_target = self._cutting_load_target(t, process) self._cycle_cutting_load = cutting_target + air_lo = { + "rough": 5.0, "semi_finish": 4.0, "finish": 3.0, + }.get(process, 5.0) + if cycle_state == "air_cut": target = air_load alpha = self._rng.uniform(0.18, 0.25) - lo, hi = 5.0, 12.0 + lo, hi = air_lo, pcfg["air_cut_base"] + pcfg["noise_range"] * 2 elif cycle_state == "entry_cut": ratio = _clamp(cutting_elapsed / max(self.entry_ramp_seconds, 0.1), 0.0, 1.0) + # smoothstep 使 ramp 更自然 + ratio = ratio * ratio * (3 - 2 * ratio) target = air_load + (cutting_target - air_load) * ratio alpha = self._rng.uniform(0.08, 0.14) - lo, hi = 5.0, 82.0 + lo, hi = air_lo, pcfg["clamp_max"] elif cycle_state == "cutting": target = cutting_target - alpha = self._rng.uniform(0.08, 0.15) - lo, hi = 35.0, 82.0 + alpha = pcfg["ema_alpha"] + lo, hi = pcfg["clamp_min"], pcfg["clamp_max"] elif cycle_state == "exit_cut": exit_elapsed = max(0.0, self.exit_ramp_seconds - (cutting_total - cutting_elapsed)) ratio = _clamp(exit_elapsed / max(self.exit_ramp_seconds, 0.1), 0.0, 1.0) + ratio = ratio * ratio * (3 - 2 * ratio) target = self._cycle_cutting_load * (1.0 - ratio) + air_load * ratio alpha = self._rng.uniform(0.10, 0.18) - lo, hi = 5.0, 82.0 + lo, hi = air_lo, pcfg["clamp_max"] else: target = air_load alpha = self._rng.uniform(0.18, 0.25) - lo, hi = 5.0, 12.0 + lo, hi = air_lo, pcfg["air_cut_base"] + pcfg["noise_range"] * 2 else: target = self._rng.uniform(0.0, 2.0) alpha = self._rng.uniform(0.25, 0.40) @@ -593,7 +648,9 @@ def _calc_rough_load( new_load = _ema(self.prev_load, target, alpha) if speed > 50.0 and task_state == "process_running": - min_load = 5.0 if cycle_state in ("air_cut", "entry_cut", "exit_cut") else 35.0 + air_lo = {"rough": 5.0, "semi_finish": 4.0, "finish": 3.0}.get(process, 5.0) + cut_lo = pcfg["clamp_min"] + min_load = air_lo if cycle_state in ("air_cut", "entry_cut", "exit_cut") else cut_lo new_load = _clamp(new_load, min_load, hi) else: new_load = _clamp(new_load, lo, hi) @@ -602,39 +659,49 @@ def _calc_rough_load( def _calc_current( self, - stage: str, - spindle_state: str, - process: Optional[str], + process: str, speed: float, load: float, task_state: str = "idle", cycle_state: str = "air_cut", ) -> float: - """电流由主轴转速和负载推导,避免独立随机曲线。""" + """电流由 process、主轴转速和负载推导,避免独立随机曲线。""" + ccfg = _PROCESS_CURRENT_CFG.get(process, _PROCESS_CURRENT_CFG["rough"]) + base = ccfg["base"] + load_factor = ccfg["load_factor"] + noise_amp = ccfg["noise"] + if speed <= 50.0: target = self._rng.uniform(0.0, 0.8) alpha = self._rng.uniform(0.25, 0.40) lo, hi = 0.0, 0.8 elif cycle_state == "air_cut": - target = 3.5 + load * 0.12 + self._rng.uniform(-0.4, 0.4) + target = base + load * load_factor * 0.65 + self._rng.uniform(-noise_amp * 0.8, noise_amp * 0.8) alpha = self._rng.uniform(0.15, 0.25) - lo, hi = 2.5, 6.0 + # air_cut 电流 clamp 到各工艺 air 区间 + air_lo_map = {"rough": 2.5, "semi_finish": 2.0, "finish": 1.5} + air_hi_map = {"rough": 6.0, "semi_finish": 5.0, "finish": 4.0} + lo = air_lo_map.get(process, 2.5) + hi = air_hi_map.get(process, 6.0) elif cycle_state == "entry_cut": - target = 3.0 + load * 0.17 + self._rng.uniform(-0.5, 0.5) + target = base + load * load_factor + self._rng.uniform(-noise_amp, noise_amp) alpha = self._rng.uniform(0.08, 0.16) - lo, hi = 2.5, 17.0 + lo, hi = ccfg["lo"] * 0.5, ccfg["hi"] elif cycle_state == "cutting": - target = 3.0 + load * 0.18 + self._rng.uniform(-0.6, 0.6) + target = base + load * load_factor + self._rng.uniform(-noise_amp, noise_amp) alpha = self._rng.uniform(0.08, 0.15) - lo, hi = 10.0, 17.0 + lo, hi = ccfg["lo"], ccfg["hi"] elif cycle_state == "exit_cut": - target = 3.0 + load * 0.16 + self._rng.uniform(-0.5, 0.5) + target = base + load * load_factor + self._rng.uniform(-noise_amp * 0.9, noise_amp * 0.9) alpha = self._rng.uniform(0.10, 0.20) - lo, hi = 2.5, 17.0 + lo, hi = ccfg["lo"] * 0.5, ccfg["hi"] else: - target = 3.0 + load * 0.12 + self._rng.uniform(-0.4, 0.4) + target = base + load * load_factor * 0.65 + self._rng.uniform(-noise_amp * 0.8, noise_amp * 0.8) alpha = self._rng.uniform(0.15, 0.25) - lo, hi = 2.5, 6.0 + air_lo_map = {"rough": 2.5, "semi_finish": 2.0, "finish": 1.5} + air_hi_map = {"rough": 6.0, "semi_finish": 5.0, "finish": 4.0} + lo = air_lo_map.get(process, 2.5) + hi = air_hi_map.get(process, 6.0) new_current = _ema(self.prev_current, target, alpha) new_current = _clamp(new_current, lo, hi) @@ -812,6 +879,8 @@ def generate(self, t: float, dt: float, stage: str) -> MetricFrame: task_state = "process_running" if spindle_state == "idle": spindle_state = "cutting" + # process 优先由外部(LatheSimulator)通过 state 传入;若未设置则从 stage 推导 + current_process = getattr(state, "current_process", None) or _STAGE_TO_PROCESS.get(stage, "rough") spindle_speed, spindle_load, spindle_current = self._spindle_gen.generate( t=t, stage=stage, @@ -819,6 +888,7 @@ def generate(self, t: float, dt: float, stage: str) -> MetricFrame: cutting_elapsed=state.cutting_elapsed, cutting_total=state.cutting_total, task_state=task_state, + process=current_process, ) # ── 6. vibration(三轴,各有小幅随机偏差)──────────────────────────── diff --git a/protoforge/protocols/mtconnect/lathe_simulator.py b/protoforge/protocols/mtconnect/lathe_simulator.py index 59484cb..1451b67 100644 --- a/protoforge/protocols/mtconnect/lathe_simulator.py +++ b/protoforge/protocols/mtconnect/lathe_simulator.py @@ -17,12 +17,18 @@ - 只有 IDLE / TOOL_CHANGE / 故障恢复 时 task_state="idle",主轴才降到 0 - 每完成 cycles_per_task 个切削周期后才真正回到 IDLE(换刀或停机) +process_mode 支持: + - "single_process":固定一种工艺(rough / semi_finish / finish),适合单独观察 + - "process_flow":模拟完整流程 rough → semi_finish → finish,各阶段持续时间可配置 + 每个 tick 的处理流程: 1. 状态机推进(确定当前 stage) - 2. BaseMetricGenerator.generate() 生成健康 MetricFrame + 2. 工艺阶段更新(process_flow 模式下检查是否需要切换 process) + 3. BaseMetricGenerator.generate() 生成健康 MetricFrame (联动建模 + 噪声 + clamp,正常加工算法与故障逻辑解耦) - 3. 把 MetricFrame 写入 device._point_values - 4. 通过 MetricsCollector 上报 Prometheus + 4. 把 MetricFrame 写入 device._point_values + 5. 故障注入(复用铣床 FaultInjector 机制) + 6. 通过 MetricsCollector 上报 Prometheus 崩刀(TOOL_BREAK)的 CNC 可观测特征: - spindle_load 突增(驱动器过载保护触发) @@ -60,19 +66,6 @@ class _State(Enum): CHIP_WRAP = "chip_wrap" -# 状态机阶段 → MetricGenerator 加工阶段的映射 -_STATE_TO_STAGE: dict[_State, str] = { - _State.IDLE: "idle", - _State.SPINUP: "idle", - _State.AIR_CUT: "idle", # air_cut 阶段负载模型用 idle,但主轴不停 - _State.CUTTING: "roughing", # 默认粗加工,子阶段由 _cutting_stage 动态切换 - _State.DECEL_CYCLE: "idle", # 周期间减速,主轴不停 - _State.DECEL: "idle", # 任务级降速 - _State.TOOL_CHANGE: "tool_change", - _State.TOOL_BREAK: "idle", - _State.CHIP_WRAP: "roughing", -} - # task_state 映射:process_running = 主轴保持,idle = 主轴可以停 _STATE_TO_TASK: dict[_State, str] = { _State.IDLE: "idle", @@ -86,6 +79,21 @@ class _State(Enum): _State.CHIP_WRAP: "process_running", } +# 工艺阶段 → MetricGenerator stage 名称的映射 +_PROCESS_TO_STAGE: dict[str, str] = { + "rough": "roughing", + "semi_finish": "semi_finishing", + "finish": "finishing", +} + +# process_flow 模式:各工艺阶段的持续时间区间(秒) +_PROCESS_FLOW_DURATION: dict[str, tuple[float, float]] = { + "rough": (120.0, 300.0), # 2~5 分钟 + "semi_finish": (60.0, 180.0), # 1~3 分钟 + "finish": (60.0, 180.0), # 1~3 分钟 +} +_PROCESS_FLOW_ORDER = ["rough", "semi_finish", "finish"] + # 刀塔配置(刀位号, 刀具ID) _TOOL_TABLE = [ (1, "T01"), # 外圆粗车刀 @@ -109,17 +117,31 @@ class _State(Enum): "N0120 M30", ] -# 每个零件的加工子阶段序列(本轮正常工况固定为 rough) -# (阶段名, 开始进度, 结束进度) -_CUT_SUBSTAGES = [ - ("roughing", 0.00, 1.00), -] - class LatheSimulator: - """注册为 DeviceInstance 的 post_tick_hook,每次 tick 更新所有测点。""" - - def __init__(self): + """ + 注册为 DeviceInstance 的 post_tick_hook,每次 tick 更新所有测点。 + + Args: + process_mode: "single_process"(默认,固定工艺)或 "process_flow"(完整流程) + process: single_process 模式下使用的工艺("rough"/"semi_finish"/"finish") + """ + + def __init__( + self, + process_mode: str = "single_process", + process: str = "rough", + ): + # ── 工艺模式配置 ──────────────────────────────────────────────────── + self._process_mode = process_mode # "single_process" | "process_flow" + self._process = process # 当前工艺:rough / semi_finish / finish + + # process_flow 模式下的阶段跟踪 + self._flow_idx = 0 # 当前在 _PROCESS_FLOW_ORDER 中的索引 + self._flow_elapsed = 0.0 # 当前 process 已运行秒数 + self._flow_duration = self._sample_flow_duration(process) # 当前 process 目标持续时长 + + # ── 状态机 ────────────────────────────────────────────────────────── self._state = _State.IDLE self._state_elapsed = 0.0 self._state_duration = 0.0 @@ -151,7 +173,7 @@ def __init__(self): self._wrap_load_increment = 0.0 self._fault_cooldown = 0 - # 当前切削子阶段(roughing/semi_finishing/finishing) + # 当前切削子阶段(由 process 决定) self._cutting_stage = "roughing" # 当前任务内已完成的切削周期数(达到上限后才真正停机) @@ -169,9 +191,48 @@ def __init__(self): ) # ------------------------------------------------------------------ - # post_tick_hook 入口 + # 工艺阶段管理 # ------------------------------------------------------------------ + @staticmethod + def _sample_flow_duration(process: str) -> float: + lo, hi = _PROCESS_FLOW_DURATION.get(process, (120.0, 300.0)) + return random.uniform(lo, hi) + + def _update_process(self) -> None: + """ + process_flow 模式:每 tick 累加流逝时间,到期后切换到下一工艺。 + single_process 模式:不做任何操作。 + """ + if self._process_mode != "process_flow": + return + + task_state = _STATE_TO_TASK.get(self._state, "idle") + if task_state == "process_running": + self._flow_elapsed += 1.0 + + if self._flow_elapsed >= self._flow_duration: + next_idx = (self._flow_idx + 1) % len(_PROCESS_FLOW_ORDER) + next_process = _PROCESS_FLOW_ORDER[next_idx] + self._flow_idx = next_idx + self._process = next_process + self._flow_elapsed = 0.0 + self._flow_duration = self._sample_flow_duration(next_process) + # 更新状态机内的切削阶段标识(用于 NC 程序行号等信号) + self._cutting_stage = _PROCESS_TO_STAGE.get(next_process, "roughing") + + def _get_metric_stage(self) -> str: + """ + 将状态机状态映射到 MetricGenerator 加工阶段名。 + CUTTING / CHIP_WRAP 时使用当前 process 对应的 stage; + 其余状态使用 idle / tool_change。 + """ + if self._state in (_State.CUTTING, _State.CHIP_WRAP): + return _PROCESS_TO_STAGE.get(self._process, "roughing") + if self._state == _State.TOOL_CHANGE: + return "tool_change" + return "idle" + def __call__(self, device_instance: Any) -> None: self._tick_count += 1 t = float(self._tick_count) # 用 tick 序号作为时间 t(dt=1s) @@ -179,10 +240,13 @@ def __call__(self, device_instance: Any) -> None: # 1. 状态机推进 self._step_state_machine() - # 2. 确定当前 MetricGenerator 阶段 + # 2. 工艺阶段更新(process_flow 模式下检查是否需要切换 process) + self._update_process() + + # 3. 确定当前 MetricGenerator 阶段(由当前 process 决定) stage = self._get_metric_stage() - # 3. 把状态机信息同步给 MetricGenerator + # 4. 把状态机信息同步给 MetricGenerator if self._state == _State.CUTTING: self._metric_gen.state.cutting_total = self._state_duration @@ -204,17 +268,18 @@ def __call__(self, device_instance: Any) -> None: task_state = _STATE_TO_TASK.get(self._state, "idle") self._metric_gen.state.task_state = task_state - # 4. 生成正常加工 MetricFrame(含联动 + 噪声 + clamp) + # 当前工艺阶段(传给 CncSpindleGenerator,控制转速目标和负载/电流基线) + self._metric_gen.state.current_process = self._process + + # 5. 生成正常加工 MetricFrame(含联动 + 噪声 + clamp) frame = self._metric_gen.generate(t=t, dt=1.0, stage=stage) - # 5. 把 MetricFrame 写入 device._point_values(MTConnect 标准测点) + # 6. 把 MetricFrame 写入 device._point_values(MTConnect 标准测点) vals = device_instance._point_values self._update_cnc_points(vals, frame) - # 6. 复用铣床故障注入机制:在 baseline 写入后覆盖故障测点值 + # 7. 复用铣床故障注入机制:在 baseline 写入后覆盖故障测点值 # fault_injector.apply() 只覆盖 _point_values,不修改状态机 - # 只有 process_running 切削阶段的故障才有意义; - # 但 apply() 本身会检查 fault.duration,状态机不需要感知 fault_injector.apply(device_instance) # ── 断刀二阶段后处理(不修改 FaultInjector 框架,符合铣床风格)─────── @@ -235,7 +300,7 @@ def __call__(self, device_instance: Any) -> None: vals["spindle_current"] = round(random.uniform(18.0, 25.0) + random.gauss(0, 1.5), 2) # 转速在冲击瞬间保持(FaultInjector 已设置 nominal_baseline=2000,此处不覆盖) - # 7. 上报 Prometheus(使用 fault-applied 后的 _point_values,而非注入前的 frame) + # 8. 上报 Prometheus(使用 fault-applied 后的 _point_values,而非注入前的 frame) self._emit_prometheus(device_instance, vals) # ------------------------------------------------------------------ @@ -266,16 +331,19 @@ def _transition(self, new_state: _State, duration: float) -> None: self._state_duration = duration def _get_metric_stage(self) -> str: - """将状态机状态映射到 MetricGenerator 阶段。""" - if self._state == _State.CUTTING: - return "roughing" - if self._state == _State.CHIP_WRAP: - return "roughing" - return _STATE_TO_STAGE.get(self._state, "idle") + """ + 将状态机状态映射到 MetricGenerator 加工阶段名(已移至工艺管理区)。 + 此方法保留作为重载点,实现在类上方的工艺管理方法中。 + """ + if self._state in (_State.CUTTING, _State.CHIP_WRAP): + return _PROCESS_TO_STAGE.get(self._process, "roughing") + if self._state == _State.TOOL_CHANGE: + return "tool_change" + return "idle" def _update_cutting_substage(self, progress: float) -> None: - """本轮正常工况只模拟 rough,不在小周期内切换 semi/finish。""" - self._cutting_stage = "roughing" + """切削子阶段由当前 process 决定,不随 progress 在周期内切换工艺。""" + self._cutting_stage = _PROCESS_TO_STAGE.get(self._process, "roughing") def _on_idle(self) -> None: self._spindle_target = 0.0 @@ -284,11 +352,12 @@ def _on_idle(self) -> None: self._condition_native_code = "" self._wrap_load_increment = 0.0 if self._state_elapsed >= self._state_duration: - # 开始新任务:主轴升速目标转速(粗加工 2000 RPM) - self._spindle_target = 2000.0 + # 开始新任务:主轴升速到当前 process 的目标转速 + from protoforge.core.cnc_metric_generator import _PROCESS_SPEED_CFG + self._spindle_target = _PROCESS_SPEED_CFG.get(self._process, {}).get("target", 2000.0) self._program_line = 1 self._block_idx = 0 - self._cutting_stage = "roughing" + self._cutting_stage = _PROCESS_TO_STAGE.get(self._process, "roughing") self._cycles_in_task = 0 self._cycles_per_task = random.randint(3, 6) self._transition(_State.SPINUP, random.uniform(4, 8)) From a4175c8573ab754a38fddf2f04857cd0ec25aca7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=BC=A0=E5=B0=91=E6=99=BA?= Date: Thu, 11 Jun 2026 06:24:24 +0800 Subject: [PATCH 51/55] fix --- protoforge/core/device.py | 2 ++ protoforge/core/metrics.py | 3 +++ 2 files changed, 5 insertions(+) diff --git a/protoforge/core/device.py b/protoforge/core/device.py index f04414a..62588b6 100644 --- a/protoforge/core/device.py +++ b/protoforge/core/device.py @@ -83,6 +83,8 @@ def read_all_points(self) -> list[PointValue]: result = [] now = time.time() for name in self._point_values: + if name.startswith("_"): + continue result.append( PointValue( name=name, diff --git a/protoforge/core/metrics.py b/protoforge/core/metrics.py index 21e42f7..9264bb7 100644 --- a/protoforge/core/metrics.py +++ b/protoforge/core/metrics.py @@ -52,6 +52,9 @@ def collect_from_engine(self, engine: Any) -> None: "device_name": device.config.name, "protocol": device.config.protocol, } + stage = device._point_values.get("_stage") + if stage: + labels_base["stage"] = stage for point in device.read_all_points(): point_config = next( (p for p in device.config.points if p.name == point.name), None From 97dd55a8a12b76ade1e534c22976d8a4c8f17144 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=BC=A0=E5=B0=91=E6=99=BA?= Date: Thu, 11 Jun 2026 06:38:18 +0800 Subject: [PATCH 52/55] fix --- protoforge/core/simulators.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/protoforge/core/simulators.py b/protoforge/core/simulators.py index 4c75832..2569017 100644 --- a/protoforge/core/simulators.py +++ b/protoforge/core/simulators.py @@ -12,7 +12,7 @@ def _build_registry() -> dict[str, Callable[[], Any]]: registry: dict[str, Callable[[], Any]] = {} try: from protoforge.protocols.mtconnect.lathe_simulator import LatheSimulator - registry["mtconnect_lathe"] = LatheSimulator + registry["mtconnect_lathe"] = lambda: LatheSimulator(process_mode="process_flow") except ImportError: pass return registry From e9cde11c41b4dffcf2979a1bdab09854d5925c80 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=BC=A0=E5=B0=91=E6=99=BA?= Date: Thu, 11 Jun 2026 06:50:57 +0800 Subject: [PATCH 53/55] fix --- protoforge/core/engine.py | 2 +- protoforge/core/simulators.py | 14 +++++++++----- protoforge/models/device.py | 1 + 3 files changed, 11 insertions(+), 6 deletions(-) diff --git a/protoforge/core/engine.py b/protoforge/core/engine.py index acd786a..d6117a6 100644 --- a/protoforge/core/engine.py +++ b/protoforge/core/engine.py @@ -62,7 +62,7 @@ async def create_device(self, config: DeviceConfig) -> DeviceInfo: # 注册故障注入钩子 instance.register_post_tick_hook(fault_injector.apply) # 注册设备专用仿真器(如车床状态机),根据 template_id 自动匹配 - simulator = get_device_simulator(config.template_id) + simulator = get_device_simulator(config.template_id, config.simulator_params or {}) if simulator is not None: instance.register_post_tick_hook(simulator) diff --git a/protoforge/core/simulators.py b/protoforge/core/simulators.py index 2569017..809188c 100644 --- a/protoforge/core/simulators.py +++ b/protoforge/core/simulators.py @@ -8,11 +8,11 @@ from typing import Any, Callable, Optional -def _build_registry() -> dict[str, Callable[[], Any]]: - registry: dict[str, Callable[[], Any]] = {} +def _build_registry() -> dict[str, Callable[..., Any]]: + registry: dict[str, Callable[..., Any]] = {} try: from protoforge.protocols.mtconnect.lathe_simulator import LatheSimulator - registry["mtconnect_lathe"] = lambda: LatheSimulator(process_mode="process_flow") + registry["mtconnect_lathe"] = LatheSimulator except ImportError: pass return registry @@ -21,13 +21,17 @@ def _build_registry() -> dict[str, Callable[[], Any]]: _REGISTRY = _build_registry() -def get_device_simulator(template_id: Optional[str]) -> Optional[Any]: +def get_device_simulator( + template_id: Optional[str], + simulator_params: dict[str, Any] | None = None, +) -> Optional[Any]: """ 根据 template_id 返回一个新的仿真器实例,未匹配则返回 None。 + simulator_params 会作为关键字参数透传给仿真器构造函数。 """ if template_id is None: return None factory = _REGISTRY.get(template_id) if factory is None: return None - return factory() + return factory(**(simulator_params or {})) diff --git a/protoforge/models/device.py b/protoforge/models/device.py index cbe35e4..44dc2db 100644 --- a/protoforge/models/device.py +++ b/protoforge/models/device.py @@ -48,6 +48,7 @@ class DeviceConfig(BaseModel): template_id: Optional[str] = None points: list[PointConfig] = Field(default_factory=list) protocol_config: dict[str, Any] = Field(default_factory=dict) + simulator_params: dict[str, Any] = Field(default_factory=dict) class PointValue(BaseModel): From a26a04c9597f5fc2359aad45f443bdd7bd030be9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=BC=A0=E5=B0=91=E6=99=BA?= Date: Thu, 11 Jun 2026 06:55:01 +0800 Subject: [PATCH 54/55] fix --- protoforge/api/v1/router.py | 1 + 1 file changed, 1 insertion(+) diff --git a/protoforge/api/v1/router.py b/protoforge/api/v1/router.py index 7a6c050..604f521 100644 --- a/protoforge/api/v1/router.py +++ b/protoforge/api/v1/router.py @@ -431,6 +431,7 @@ async def search_templates(q: str = "", protocol: Optional[str] = None, tag: Opt q_lower = q.lower() templates = [t for t in templates if q_lower in t.name.lower() or + q_lower in t.id.lower() or q_lower in (t.description or "").lower() or any(q_lower in tag_item.lower() for tag_item in (t.tags or []))] if tag: From f0aaaeea4a85be694186677f5053808ae565efe8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=BC=A0=E5=B0=91=E6=99=BA?= Date: Thu, 11 Jun 2026 20:05:34 +0800 Subject: [PATCH 55/55] fox --- protoforge/core/simulators.py | 14 +++++- protoforge/core/template.py | 63 +++++++++++++++++++++++++++ tests/test_lathe_station_templates.py | 42 ++++++++++++++++++ 3 files changed, 118 insertions(+), 1 deletion(-) create mode 100644 tests/test_lathe_station_templates.py diff --git a/protoforge/core/simulators.py b/protoforge/core/simulators.py index 809188c..047d7fc 100644 --- a/protoforge/core/simulators.py +++ b/protoforge/core/simulators.py @@ -13,6 +13,18 @@ def _build_registry() -> dict[str, Callable[..., Any]]: try: from protoforge.protocols.mtconnect.lathe_simulator import LatheSimulator registry["mtconnect_lathe"] = LatheSimulator + registry["mtconnect_lathe_rough"] = lambda **_: LatheSimulator( + process_mode="single_process", + process="rough", + ) + registry["mtconnect_lathe_semi_finish"] = lambda **_: LatheSimulator( + process_mode="single_process", + process="semi_finish", + ) + registry["mtconnect_lathe_finish"] = lambda **_: LatheSimulator( + process_mode="single_process", + process="finish", + ) except ImportError: pass return registry @@ -27,7 +39,7 @@ def get_device_simulator( ) -> Optional[Any]: """ 根据 template_id 返回一个新的仿真器实例,未匹配则返回 None。 - simulator_params 会作为关键字参数透传给仿真器构造函数。 + simulator_params 默认作为关键字参数透传;固定工位模板可在注册表中选择忽略参数。 """ if template_id is None: return None diff --git a/protoforge/core/template.py b/protoforge/core/template.py index e7ff122..6a7659c 100644 --- a/protoforge/core/template.py +++ b/protoforge/core/template.py @@ -31,6 +31,7 @@ def load_builtin_templates(self) -> None: self._load_from_dir(_TEMPLATES_DIR / "opcda") self._load_from_dir(_TEMPLATES_DIR / "fanuc") self._load_from_dir(_TEMPLATES_DIR / "mtconnect") + self._add_lathe_station_templates() self._load_from_dir(_TEMPLATES_DIR / "toledo") self._loaded = True logger.info("Loaded %d built-in templates", len(self._templates)) @@ -98,3 +99,65 @@ def _load_from_dir(self, dir_path: Path) -> None: self._templates[template.id] = template except Exception as e: logger.warning("Failed to load template %s: %s", json_file, e) + + def _add_lathe_station_templates(self) -> None: + """ + MTConnect 车床按工位拆分模板。 + + 原 mtconnect_lathe 保留用于兼容旧设备;三个 station 模板由同一组 + MTConnect 测点派生,但会在 simulator registry 中绑定到不同工艺。 + """ + base = self._templates.get("mtconnect_lathe") + if base is None: + return + + station_defs = [ + { + "id": "mtconnect_lathe_rough", + "name": "MTConnect车床 粗加工工位", + "uuid": "mtc-lathe-rough-001", + "process_tag": "粗加工", + "description": ( + "MTConnect标准车床粗加工工位,固定运行粗车工艺;" + "主轴约2000RPM,负载和电流较高,适合单独观察粗加工数据。" + ), + }, + { + "id": "mtconnect_lathe_semi_finish", + "name": "MTConnect车床 半精加工工位", + "uuid": "mtc-lathe-semi-finish-001", + "process_tag": "半精加工", + "description": ( + "MTConnect标准车床半精加工工位,固定运行半精车工艺;" + "主轴约3000RPM,负载、电流和粗糙度介于粗加工与精加工之间。" + ), + }, + { + "id": "mtconnect_lathe_finish", + "name": "MTConnect车床 精加工工位", + "uuid": "mtc-lathe-finish-001", + "process_tag": "精加工", + "description": ( + "MTConnect标准车床精加工工位,固定运行精车工艺;" + "主轴约4000RPM,负载较低,转速和表面质量更稳定。" + ), + }, + ] + + for spec in station_defs: + if spec["id"] in self._templates: + continue + template = base.model_copy(deep=True) + template.id = spec["id"] + template.name = spec["name"] + template.description = spec["description"] + template.protocol_config = { + **template.protocol_config, + "device_uuid": spec["uuid"], + } + template.tags = [ + tag for tag in template.tags + if tag not in {"粗加工", "半精加工", "精加工"} + ] + template.tags.extend(["工位", spec["process_tag"]]) + self._templates[template.id] = template diff --git a/tests/test_lathe_station_templates.py b/tests/test_lathe_station_templates.py new file mode 100644 index 0000000..c1e37ae --- /dev/null +++ b/tests/test_lathe_station_templates.py @@ -0,0 +1,42 @@ +from protoforge.core.simulators import get_device_simulator +from protoforge.core.template import TemplateManager + + +def test_mtconnect_lathe_station_templates_are_available(): + tm = TemplateManager() + tm.load_builtin_templates() + + rough = tm.get_template("mtconnect_lathe_rough") + semi = tm.get_template("mtconnect_lathe_semi_finish") + finish = tm.get_template("mtconnect_lathe_finish") + + assert rough.name == "MTConnect车床 粗加工工位" + assert semi.name == "MTConnect车床 半精加工工位" + assert finish.name == "MTConnect车床 精加工工位" + + assert rough.protocol_config["device_uuid"] == "mtc-lathe-rough-001" + assert semi.protocol_config["device_uuid"] == "mtc-lathe-semi-finish-001" + assert finish.protocol_config["device_uuid"] == "mtc-lathe-finish-001" + assert len(rough.points) == len(semi.points) == len(finish.points) + assert "工位" in rough.tags + assert "粗加工" in rough.tags + assert "半精加工" in semi.tags + assert "精加工" in finish.tags + + +def test_lathe_station_simulators_force_single_process(): + cases = [ + ("mtconnect_lathe_rough", "rough"), + ("mtconnect_lathe_semi_finish", "semi_finish"), + ("mtconnect_lathe_finish", "finish"), + ] + + for template_id, process in cases: + simulator = get_device_simulator( + template_id, + {"process_mode": "process_flow", "process": "rough"}, + ) + + assert simulator is not None + assert simulator._process_mode == "single_process" + assert simulator._process == process