From 49f2fc443887fba0aeddf216bc966d0d47fbc483 Mon Sep 17 00:00:00 2001
From: zhangshaozhi <zhangshaozhi@zhangshaozhideMacBook-Pro.local>
Date: Sun, 17 May 2026 20:26:16 +0800
Subject: [PATCH 01/55] fix: make application runnable in production

- Dockerfile: include README.md in COPY statement
- main.py: add static file serving for built frontend (SPA routing support)
- App.vue: wrap template with Naive UI message/dialog/notification providers

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 Dockerfile         |  2 +-
 protoforge/main.py | 10 ++++++++++
 web/src/App.vue    |  8 +++++++-
 3 files changed, 18 insertions(+), 2 deletions(-)
diff --git a/Dockerfile b/Dockerfile
index babbf5f..2528ade 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -10,7 +10,7 @@ WORKDIR /app
 
 RUN apt-get update && apt-get install -y --no-install-recommends curl && rm -rf /var/lib/apt/lists/*
 
-COPY pyproject.toml .
+COPY pyproject.toml README.md ./
 COPY protoforge/ protoforge/
 
 COPY --from=frontend-builder /app/web/dist /app/static
diff --git a/protoforge/main.py b/protoforge/main.py
index 894c5bc..0e43951 100644
--- a/protoforge/main.py
+++ b/protoforge/main.py
@@ -1,8 +1,11 @@
 import logging
 from contextlib import asynccontextmanager
+from pathlib import Path
 
 from fastapi import FastAPI
 from fastapi.middleware.cors import CORSMiddleware
+from fastapi.responses import FileResponse
+from fastapi.staticfiles import StaticFiles
 
 from protoforge.api.v1.router import router
 from protoforge.core.engine import SimulationEngine
@@ -184,12 +187,19 @@ def create_app() -> FastAPI:
 
     @app.get("/")
     async def root():
+        index = Path("/app/static/index.html")
+        if index.exists():
+            return FileResponse(index)
         return {
             "name": "ProtoForge",
             "version": "0.1.0",
             "description": "物联网协议仿真与测试平台",
         }
 
+    static_dir = Path("/app/static")
+    if static_dir.exists():
+        app.mount("/assets", StaticFiles(directory=static_dir / "assets"), name="assets")
+
     @app.get("/health")
     async def health():
         return {"status": "ok"}
diff --git a/web/src/App.vue b/web/src/App.vue
index ae6d315..22eabe9 100644
--- a/web/src/App.vue
+++ b/web/src/App.vue
@@ -1,4 +1,7 @@
 <template>
+  <n-message-provider>
+  <n-dialog-provider>
+  <n-notification-provider>
   <div v-if="!loggedIn" class="login-wrapper">
     <Login @login-success="onLogin" />
   </div>
@@ -73,12 +76,15 @@
       </n-layout-content>
     </n-layout>
   </n-layout>
+  </n-notification-provider>
+  </n-dialog-provider>
+  </n-message-provider>
 </template>
 
 <script setup>
 import { ref, computed, onMounted, onUnmounted, h } from 'vue'
 import { useRouter, useRoute } from 'vue-router'
-import { NLayout, NLayoutSider, NLayoutHeader, NLayoutContent, NMenu, NSpace, NAutoComplete, NTag, NButton, NDropdown } from 'naive-ui'
+import { NLayout, NLayoutSider, NLayoutHeader, NLayoutContent, NMenu, NSpace, NAutoComplete, NTag, NButton, NDropdown, NMessageProvider, NDialogProvider, NNotificationProvider } from 'naive-ui'
 import api from './api.js'
 import Login from './views/Login.vue'
 import Welcome from './views/Welcome.vue'

From 8fcf6c17b7786b7350c6ff540f4e665ea481e1a8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E5=BC=A0=E5=B0=91=E6=99=BA?=
 <zhangshaozhi@bailongma-inc.com>
Date: Mon, 18 May 2026 13:59:49 +0800
Subject: [PATCH 02/55] fix: support container and local env

---
 protoforge/main.py | 19 +++++++++++++------
 1 file changed, 13 insertions(+), 6 deletions(-)

diff --git a/protoforge/main.py b/protoforge/main.py
index 0e43951..73489ae 100644
--- a/protoforge/main.py
+++ b/protoforge/main.py
@@ -1,4 +1,5 @@
 import logging
+import os
 from contextlib import asynccontextmanager
 from pathlib import Path
 
@@ -142,7 +143,6 @@ async def lifespan(app: FastAPI):
     except Exception as e:
         logger.warning("Failed to start webhook manager: %s", e)
 
-    import os
     if os.environ.get("PROTOFORGE_DEMO_MODE"):
         try:
             from protoforge.core.demo import seed_demo_data
@@ -185,19 +185,26 @@ def create_app() -> FastAPI:
 
     app.include_router(router)
 
+    # 按优先级查找静态文件目录：环境变量 > 容器路径 > 本地构建产物
+    _repo_root = Path(__file__).parent.parent
+    _static_candidates = [
+        Path(os.environ["STATIC_DIR"]) if "STATIC_DIR" in os.environ else None,
+        Path("/app/static"),
+        _repo_root / "web" / "dist",
+    ]
+    static_dir = next((p for p in _static_candidates if p and p.is_dir()), None)
+
     @app.get("/")
     async def root():
-        index = Path("/app/static/index.html")
-        if index.exists():
-            return FileResponse(index)
+        if static_dir and (static_dir / "index.html").exists():
+            return FileResponse(static_dir / "index.html")
         return {
             "name": "ProtoForge",
             "version": "0.1.0",
             "description": "物联网协议仿真与测试平台",
         }
 
-    static_dir = Path("/app/static")
-    if static_dir.exists():
+    if static_dir and (static_dir / "assets").is_dir():
         app.mount("/assets", StaticFiles(directory=static_dir / "assets"), name="assets")
 
     @app.get("/health")

From 1fa9c0889b9351f9bc4d404f6615cafec1075f85 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E5=BC=A0=E5=B0=91=E6=99=BA?=
 <zhangshaozhi@bailongma-inc.com>
Date: Mon, 18 May 2026 14:30:59 +0800
Subject: [PATCH 03/55] fix(metrics): support prometheus metric

---
 protoforge/core/metrics.py | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

diff --git a/protoforge/core/metrics.py b/protoforge/core/metrics.py
index 9300372..0e01abb 100644
--- a/protoforge/core/metrics.py
+++ b/protoforge/core/metrics.py
@@ -46,6 +46,25 @@ def collect_from_engine(self, engine: Any) -> None:
                                 if p.status.value == "running")
         self.set_gauge("protoforge_protocols_running", protocols_running)
 
+        for device in engine._devices.values():
+            if device.status.value != "online":
+                continue
+            labels_base = {
+                "device_id": device.config.id,
+                "device_name": device.config.name,
+                "protocol": device.config.protocol,
+            }
+            for point in device.read_all_points():
+                if not isinstance(point.value, (int, float)):
+                    continue
+                labels = {**labels_base, "point": point.name}
+                point_config = next(
+                    (p for p in device.config.points if p.name == point.name), None
+                )
+                if point_config and point_config.unit:
+                    labels["unit"] = point_config.unit
+                self.set_gauge("protoforge_device_point", float(point.value), labels)
+
     def collect_from_test_runner(self, runner: Any) -> None:
         self.set_gauge("protoforge_test_cases_total", len(runner._test_cases))
         self.set_gauge("protoforge_test_suites_total", len(runner._test_suites))

From 382e939efc63779b60f0a6be768aa210f47c6940 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E5=BC=A0=E5=B0=91=E6=99=BA?=
 <zhangshaozhi@bailongma-inc.com>
Date: Mon, 18 May 2026 14:38:29 +0800
Subject: [PATCH 04/55] fix

---
 protoforge/core/metrics.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/protoforge/core/metrics.py b/protoforge/core/metrics.py
index 0e01abb..8a5a2bb 100644
--- a/protoforge/core/metrics.py
+++ b/protoforge/core/metrics.py
@@ -47,8 +47,6 @@ def collect_from_engine(self, engine: Any) -> None:
         self.set_gauge("protoforge_protocols_running", protocols_running)
 
         for device in engine._devices.values():
-            if device.status.value != "online":
-                continue
             labels_base = {
                 "device_id": device.config.id,
                 "device_name": device.config.name,

From 80737ecd5a2099121e10ee624d6363f6e378b7d6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E5=BC=A0=E5=B0=91=E6=99=BA?=
 <zhangshaozhi@bailongma-inc.com>
Date: Mon, 18 May 2026 14:42:38 +0800
Subject: [PATCH 05/55] fix(metric): update metric name

---
 protoforge/core/metrics.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/protoforge/core/metrics.py b/protoforge/core/metrics.py
index 8a5a2bb..97f7bb9 100644
--- a/protoforge/core/metrics.py
+++ b/protoforge/core/metrics.py
@@ -61,7 +61,7 @@ def collect_from_engine(self, engine: Any) -> None:
                 )
                 if point_config and point_config.unit:
                     labels["unit"] = point_config.unit
-                self.set_gauge("protoforge_device_point", float(point.value), labels)
+                self.set_gauge(f"protoforge_{point.name}", float(point.value), labels)
 
     def collect_from_test_runner(self, runner: Any) -> None:
         self.set_gauge("protoforge_test_cases_total", len(runner._test_cases))

From 8f6ea66402ba0042edcdd4ba5dbcc7414cbfe240 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E5=BC=A0=E5=B0=91=E6=99=BA?=
 <zhangshaozhi@bailongma-inc.com>
Date: Mon, 18 May 2026 14:52:29 +0800
Subject: [PATCH 06/55] fix

---
 protoforge/core/metrics.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/protoforge/core/metrics.py b/protoforge/core/metrics.py
index 97f7bb9..18fb327 100644
--- a/protoforge/core/metrics.py
+++ b/protoforge/core/metrics.py
@@ -61,7 +61,7 @@ def collect_from_engine(self, engine: Any) -> None:
                 )
                 if point_config and point_config.unit:
                     labels["unit"] = point_config.unit
-                self.set_gauge(f"protoforge_{point.name}", float(point.value), labels)
+                self.set_gauge(point.name, float(point.value), labels)
 
     def collect_from_test_runner(self, runner: Any) -> None:
         self.set_gauge("protoforge_test_cases_total", len(runner._test_cases))

From 94d779adee9ad861b6f66436b1ac156025670034 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E5=BC=A0=E5=B0=91=E6=99=BA?=
 <zhangshaozhi@bailongma-inc.com>
Date: Tue, 19 May 2026 10:22:34 +0800
Subject: [PATCH 07/55] fix(monitor): collect monitor data

---
 protoforge/core/device.py  | 3 +++
 protoforge/core/metrics.py | 8 +++++---
 2 files changed, 8 insertions(+), 3 deletions(-)

diff --git a/protoforge/core/device.py b/protoforge/core/device.py
index a344c7c..efd21ef 100644
--- a/protoforge/core/device.py
+++ b/protoforge/core/device.py
@@ -59,9 +59,11 @@ def read_point(self, point_name: str) -> Optional[PointValue]:
             name=point_name,
             value=self._point_values[point_name],
             timestamp=time.time(),
+            quality="good" if self._status == DeviceStatus.ONLINE else "bad",
         )
 
     def read_all_points(self) -> list[PointValue]:
+        quality = "good" if self._status == DeviceStatus.ONLINE else "bad"
         result = []
         now = time.time()
         for name in self._point_values:
@@ -70,6 +72,7 @@ def read_all_points(self) -> list[PointValue]:
                     name=name,
                     value=self._point_values[name],
                     timestamp=now,
+                    quality=quality,
                 )
             )
         return result
diff --git a/protoforge/core/metrics.py b/protoforge/core/metrics.py
index 18fb327..9670525 100644
--- a/protoforge/core/metrics.py
+++ b/protoforge/core/metrics.py
@@ -53,15 +53,17 @@ def collect_from_engine(self, engine: Any) -> None:
                 "protocol": device.config.protocol,
             }
             for point in device.read_all_points():
-                if not isinstance(point.value, (int, float)):
-                    continue
                 labels = {**labels_base, "point": point.name}
                 point_config = next(
                     (p for p in device.config.points if p.name == point.name), None
                 )
                 if point_config and point_config.unit:
                     labels["unit"] = point_config.unit
-                self.set_gauge(point.name, float(point.value), labels)
+                key = self._make_key(point.name, labels)
+                if point.quality != "good":
+                    self._gauges.pop(key, None)
+                elif isinstance(point.value, (int, float)):
+                    self.set_gauge(point.name, float(point.value), labels)
 
     def collect_from_test_runner(self, runner: Any) -> None:
         self.set_gauge("protoforge_test_cases_total", len(runner._test_cases))

From e515c50708a4181d5fd26751f8853cec203c566f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E5=BC=A0=E5=B0=91=E6=99=BA?=
 <zhangshaozhi@bailongma-inc.com>
Date: Tue, 19 May 2026 17:54:26 +0800
Subject: [PATCH 08/55] fix(fanuc): update fanuc function

---
 protoforge/templates/fanuc/fanuc_0if_cnc.json    | 5 +++--
 protoforge/templates/modbus/fanuc_cnc.json       | 7 ++++---
 protoforge/templates/mtconnect/mill_machine.json | 5 +++--
 3 files changed, 10 insertions(+), 7 deletions(-)

diff --git a/protoforge/templates/fanuc/fanuc_0if_cnc.json b/protoforge/templates/fanuc/fanuc_0if_cnc.json
index 3231d9b..a10b74f 100644
--- a/protoforge/templates/fanuc/fanuc_0if_cnc.json
+++ b/protoforge/templates/fanuc/fanuc_0if_cnc.json
@@ -75,9 +75,10 @@
             "unit": "mm/min",
             "description": "进给速度",
             "access": "r",
-            "generator_type": "random",
+            "generator_type": "sine",
             "min_value": 100,
-            "max_value": 5000
+            "max_value": 5000,
+            "generator_config": {"period": 60, "phase": 0.0}
         },
         {
             "name": "alarm_status",
diff --git a/protoforge/templates/modbus/fanuc_cnc.json b/protoforge/templates/modbus/fanuc_cnc.json
index 843151e..2970682 100644
--- a/protoforge/templates/modbus/fanuc_cnc.json
+++ b/protoforge/templates/modbus/fanuc_cnc.json
@@ -24,9 +24,10 @@
             "unit": "mm/min",
             "description": "实际进给速度",
             "access": "r",
-            "generator_type": "random",
-            "min_value": 0.0,
-            "max_value": 10000.0
+            "generator_type": "sine",
+            "min_value": 200.0,
+            "max_value": 3000.0,
+            "generator_config": {"period": 90, "phase": 1.0}
         },
         {
             "name": "spindle_override",
diff --git a/protoforge/templates/mtconnect/mill_machine.json b/protoforge/templates/mtconnect/mill_machine.json
index eaeef74..6b06daa 100644
--- a/protoforge/templates/mtconnect/mill_machine.json
+++ b/protoforge/templates/mtconnect/mill_machine.json
@@ -66,9 +66,10 @@
             "unit": "mm/min",
             "description": "进给速度",
             "access": "r",
-            "generator_type": "random",
+            "generator_type": "sine",
             "min_value": 200,
-            "max_value": 3000
+            "max_value": 3000,
+            "generator_config": {"period": 75, "phase": 2.1}
         },
         {
             "name": "part_count",

From d82cc9374d4001d87ed7d52f86534567aca5487d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E5=BC=A0=E5=B0=91=E6=99=BA?=
 <zhangshaozhi@bailongma-inc.com>
Date: Tue, 19 May 2026 18:14:27 +0800
Subject: [PATCH 09/55] fix(templates): update templates

---
 protoforge/templates/fanuc/fanuc_0if_cnc.json | 45 +++++++++++++++++++
 .../templates/fanuc/fanuc_31ib_cnc.json       | 45 +++++++++++++++++++
 protoforge/templates/modbus/fanuc_cnc.json    | 45 +++++++++++++++++++
 .../templates/mtconnect/mill_machine.json     | 45 +++++++++++++++++++
 4 files changed, 180 insertions(+)

diff --git a/protoforge/templates/fanuc/fanuc_0if_cnc.json b/protoforge/templates/fanuc/fanuc_0if_cnc.json
index a10b74f..476ad73 100644
--- a/protoforge/templates/fanuc/fanuc_0if_cnc.json
+++ b/protoforge/templates/fanuc/fanuc_0if_cnc.json
@@ -80,6 +80,51 @@
             "max_value": 5000,
             "generator_config": {"period": 60, "phase": 0.0}
         },
+        {
+            "name": "spindle_current",
+            "address": "spindle_current",
+            "data_type": "float32",
+            "unit": "A",
+            "description": "主轴电流",
+            "access": "r",
+            "generator_type": "sine",
+            "min_value": 8.0,
+            "max_value": 32.0,
+            "generator_config": {"period": 120, "phase": 0.5}
+        },
+        {
+            "name": "vibration_x",
+            "address": "vibration_x",
+            "data_type": "float32",
+            "unit": "m/s²",
+            "description": "X轴振动加速度",
+            "access": "r",
+            "generator_type": "random",
+            "min_value": 0.1,
+            "max_value": 2.5
+        },
+        {
+            "name": "vibration_y",
+            "address": "vibration_y",
+            "data_type": "float32",
+            "unit": "m/s²",
+            "description": "Y轴振动加速度",
+            "access": "r",
+            "generator_type": "random",
+            "min_value": 0.1,
+            "max_value": 2.5
+        },
+        {
+            "name": "vibration_z",
+            "address": "vibration_z",
+            "data_type": "float32",
+            "unit": "m/s²",
+            "description": "Z轴振动加速度",
+            "access": "r",
+            "generator_type": "random",
+            "min_value": 0.1,
+            "max_value": 3.0
+        },
         {
             "name": "alarm_status",
             "address": "alarm",
diff --git a/protoforge/templates/fanuc/fanuc_31ib_cnc.json b/protoforge/templates/fanuc/fanuc_31ib_cnc.json
index 89f6ef1..97d18fd 100644
--- a/protoforge/templates/fanuc/fanuc_31ib_cnc.json
+++ b/protoforge/templates/fanuc/fanuc_31ib_cnc.json
@@ -80,6 +80,51 @@
             "generator_type": "fixed",
             "fixed_value": 100
         },
+        {
+            "name": "spindle_current",
+            "address": "spindle_current",
+            "data_type": "float32",
+            "unit": "A",
+            "description": "主轴电流",
+            "access": "r",
+            "generator_type": "sine",
+            "min_value": 10.0,
+            "max_value": 45.0,
+            "generator_config": {"period": 120, "phase": 1.2}
+        },
+        {
+            "name": "vibration_x",
+            "address": "vibration_x",
+            "data_type": "float32",
+            "unit": "m/s²",
+            "description": "X轴振动加速度",
+            "access": "r",
+            "generator_type": "random",
+            "min_value": 0.1,
+            "max_value": 3.0
+        },
+        {
+            "name": "vibration_y",
+            "address": "vibration_y",
+            "data_type": "float32",
+            "unit": "m/s²",
+            "description": "Y轴振动加速度",
+            "access": "r",
+            "generator_type": "random",
+            "min_value": 0.1,
+            "max_value": 3.0
+        },
+        {
+            "name": "vibration_z",
+            "address": "vibration_z",
+            "data_type": "float32",
+            "unit": "m/s²",
+            "description": "Z轴振动加速度",
+            "access": "r",
+            "generator_type": "random",
+            "min_value": 0.1,
+            "max_value": 4.0
+        },
         {
             "name": "tool_number",
             "address": "tool_number",
diff --git a/protoforge/templates/modbus/fanuc_cnc.json b/protoforge/templates/modbus/fanuc_cnc.json
index 2970682..b154318 100644
--- a/protoforge/templates/modbus/fanuc_cnc.json
+++ b/protoforge/templates/modbus/fanuc_cnc.json
@@ -29,6 +29,51 @@
             "max_value": 3000.0,
             "generator_config": {"period": 90, "phase": 1.0}
         },
+        {
+            "name": "spindle_current",
+            "address": "2",
+            "data_type": "float32",
+            "unit": "A",
+            "description": "主轴电流",
+            "access": "r",
+            "generator_type": "sine",
+            "min_value": 8.0,
+            "max_value": 35.0,
+            "generator_config": {"period": 120, "phase": 2.0}
+        },
+        {
+            "name": "vibration_x",
+            "address": "23",
+            "data_type": "float32",
+            "unit": "m/s²",
+            "description": "X轴振动加速度",
+            "access": "r",
+            "generator_type": "random",
+            "min_value": 0.1,
+            "max_value": 2.5
+        },
+        {
+            "name": "vibration_y",
+            "address": "25",
+            "data_type": "float32",
+            "unit": "m/s²",
+            "description": "Y轴振动加速度",
+            "access": "r",
+            "generator_type": "random",
+            "min_value": 0.1,
+            "max_value": 2.5
+        },
+        {
+            "name": "vibration_z",
+            "address": "27",
+            "data_type": "float32",
+            "unit": "m/s²",
+            "description": "Z轴振动加速度",
+            "access": "r",
+            "generator_type": "random",
+            "min_value": 0.1,
+            "max_value": 3.0
+        },
         {
             "name": "spindle_override",
             "address": "3",
diff --git a/protoforge/templates/mtconnect/mill_machine.json b/protoforge/templates/mtconnect/mill_machine.json
index 6b06daa..1262d0f 100644
--- a/protoforge/templates/mtconnect/mill_machine.json
+++ b/protoforge/templates/mtconnect/mill_machine.json
@@ -71,6 +71,51 @@
             "max_value": 3000,
             "generator_config": {"period": 75, "phase": 2.1}
         },
+        {
+            "name": "spindle_current",
+            "address": "SpindleCurrent",
+            "data_type": "float32",
+            "unit": "A",
+            "description": "主轴电流",
+            "access": "r",
+            "generator_type": "sine",
+            "min_value": 6.0,
+            "max_value": 28.0,
+            "generator_config": {"period": 120, "phase": 3.1}
+        },
+        {
+            "name": "vibration_x",
+            "address": "VibrationX",
+            "data_type": "float32",
+            "unit": "m/s²",
+            "description": "X轴振动加速度",
+            "access": "r",
+            "generator_type": "random",
+            "min_value": 0.1,
+            "max_value": 2.0
+        },
+        {
+            "name": "vibration_y",
+            "address": "VibrationY",
+            "data_type": "float32",
+            "unit": "m/s²",
+            "description": "Y轴振动加速度",
+            "access": "r",
+            "generator_type": "random",
+            "min_value": 0.1,
+            "max_value": 2.0
+        },
+        {
+            "name": "vibration_z",
+            "address": "VibrationZ",
+            "data_type": "float32",
+            "unit": "m/s²",
+            "description": "Z轴振动加速度",
+            "access": "r",
+            "generator_type": "random",
+            "min_value": 0.1,
+            "max_value": 2.5
+        },
         {
             "name": "part_count",
             "address": "PartCount",

From 566910feb851276a5d542a30cff8b0bd9fb5f86c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E5=BC=A0=E5=B0=91=E6=99=BA?=
 <zhangshaozhi@bailongma-inc.com>
Date: Tue, 19 May 2026 19:46:40 +0800
Subject: [PATCH 10/55] fix(router): update router

---
 docs/curl.md                |  6 ++++++
 protoforge/api/v1/router.py | 32 ++++++++++++++++++++++++++++++++
 2 files changed, 38 insertions(+)
 create mode 100644 docs/curl.md

diff --git a/docs/curl.md b/docs/curl.md
new file mode 100644
index 0000000..ae3cbb1
--- /dev/null
+++ b/docs/curl.md
@@ -0,0 +1,6 @@
+# 更新设备测试点请求
+
+```bash
+# /api/v1/devices/{device_id}/sync-from-template
+curl -X POST http://localhost:8000/api/v1/devices/fanuc-cnc数控系统/sync-from-template
+```
diff --git a/protoforge/api/v1/router.py b/protoforge/api/v1/router.py
index e06b966..8eaab1b 100644
--- a/protoforge/api/v1/router.py
+++ b/protoforge/api/v1/router.py
@@ -202,6 +202,38 @@ async def batch_stop_devices(device_ids: list[str]):
     return {"status": "ok", "stopped": stopped, "errors": errors}
 
 
+@router.post("/devices/{device_id}/sync-from-template")
+async def sync_device_from_template(device_id: str):
+    engine = _get_engine()
+    db = _get_database()
+    tm = _get_template_manager()
+    try:
+        instance = engine._devices.get(device_id)
+        if not instance:
+            raise HTTPException(status_code=404, detail=f"Device not found: {device_id}")
+        template_id = instance.config.template_id
+        if not template_id:
+            raise HTTPException(status_code=400, detail="Device has no associated template")
+        template = tm.get_template(template_id)
+        if not template:
+            raise HTTPException(status_code=404, detail=f"Template not found: {template_id}")
+        new_config = DeviceConfig(
+            id=device_id,
+            name=instance.config.name,
+            protocol=instance.config.protocol,
+            template_id=template_id,
+            points=template.points,
+            protocol_config=instance.config.protocol_config,
+        )
+        result = await engine.update_device(device_id, new_config)
+        await db.save_device(new_config)
+        return {"status": "ok", "point_count": len(template.points), "device": result}
+    except HTTPException:
+        raise
+    except ValueError as e:
+        raise HTTPException(status_code=404, detail=str(e))
+
+
 @router.get("/devices/{device_id}", response_model=DeviceInfo)
 async def get_device(device_id: str):
     engine = _get_engine()

From 741bf518a3eccb6997f844063967b37d413878bb Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E5=BC=A0=E5=B0=91=E6=99=BA?=
 <zhangshaozhi@bailongma-inc.com>
Date: Tue, 19 May 2026 20:22:12 +0800
Subject: [PATCH 11/55] fix(template): update template

---
 protoforge/templates/fanuc/fanuc_0if_cnc.json    | 5 +++--
 protoforge/templates/fanuc/fanuc_31ib_cnc.json   | 5 +++--
 protoforge/templates/modbus/fanuc_cnc.json       | 5 +++--
 protoforge/templates/mtconnect/mill_machine.json | 5 +++--
 4 files changed, 12 insertions(+), 8 deletions(-)

diff --git a/protoforge/templates/fanuc/fanuc_0if_cnc.json b/protoforge/templates/fanuc/fanuc_0if_cnc.json
index 476ad73..3e79750 100644
--- a/protoforge/templates/fanuc/fanuc_0if_cnc.json
+++ b/protoforge/templates/fanuc/fanuc_0if_cnc.json
@@ -64,9 +64,10 @@
             "unit": "RPM",
             "description": "主轴转速",
             "access": "r",
-            "generator_type": "random",
+            "generator_type": "sawtooth",
             "min_value": 1000,
-            "max_value": 8000
+            "max_value": 8000,
+            "generator_config": {"period": 120}
         },
         {
             "name": "feed_rate",
diff --git a/protoforge/templates/fanuc/fanuc_31ib_cnc.json b/protoforge/templates/fanuc/fanuc_31ib_cnc.json
index 97d18fd..83fee77 100644
--- a/protoforge/templates/fanuc/fanuc_31ib_cnc.json
+++ b/protoforge/templates/fanuc/fanuc_31ib_cnc.json
@@ -66,9 +66,10 @@
             "unit": "RPM",
             "description": "主轴转速",
             "access": "r",
-            "generator_type": "random",
+            "generator_type": "sawtooth",
             "min_value": 2000,
-            "max_value": 15000
+            "max_value": 15000,
+            "generator_config": {"period": 150}
         },
         {
             "name": "feed_override",
diff --git a/protoforge/templates/modbus/fanuc_cnc.json b/protoforge/templates/modbus/fanuc_cnc.json
index b154318..265ae56 100644
--- a/protoforge/templates/modbus/fanuc_cnc.json
+++ b/protoforge/templates/modbus/fanuc_cnc.json
@@ -13,9 +13,10 @@
             "unit": "RPM",
             "description": "主轴实际转速",
             "access": "r",
-            "generator_type": "random",
+            "generator_type": "sawtooth",
             "min_value": 0,
-            "max_value": 12000
+            "max_value": 12000,
+            "generator_config": {"period": 180}
         },
         {
             "name": "feed_rate",
diff --git a/protoforge/templates/mtconnect/mill_machine.json b/protoforge/templates/mtconnect/mill_machine.json
index 1262d0f..5ce7d63 100644
--- a/protoforge/templates/mtconnect/mill_machine.json
+++ b/protoforge/templates/mtconnect/mill_machine.json
@@ -55,9 +55,10 @@
             "unit": "RPM",
             "description": "主轴转速",
             "access": "r",
-            "generator_type": "random",
+            "generator_type": "sawtooth",
             "min_value": 3000,
-            "max_value": 12000
+            "max_value": 12000,
+            "generator_config": {"period": 135}
         },
         {
             "name": "feed_rate",

From 3cc3be3e59319f41bcddf4e8f642c4f9618d78e4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E5=BC=A0=E5=B0=91=E6=99=BA?=
 <zhangshaozhi@bailongma-inc.com>
Date: Tue, 19 May 2026 20:35:05 +0800
Subject: [PATCH 12/55] fix(template): update template

---
 protoforge/templates/modbus/fanuc_cnc.json       | 7 +++++--
 protoforge/templates/mtconnect/mill_machine.json | 7 +++++--
 2 files changed, 10 insertions(+), 4 deletions(-)

diff --git a/protoforge/templates/modbus/fanuc_cnc.json b/protoforge/templates/modbus/fanuc_cnc.json
index 265ae56..dc7a146 100644
--- a/protoforge/templates/modbus/fanuc_cnc.json
+++ b/protoforge/templates/modbus/fanuc_cnc.json
@@ -194,9 +194,12 @@
             "data_type": "uint16",
             "description": "加工计数",
             "access": "r",
-            "generator_type": "random",
+            "generator_type": "script",
             "min_value": 0,
-            "max_value": 99999
+            "max_value": 99999,
+            "generator_config": {
+                "script": "key = 'part_count_modbus'; last = cache.get(key, 0); interval = 45; result = min(int(elapsed / interval), 99999); cache[key] = result"
+            }
         },
         {
             "name": "cycle_time",
diff --git a/protoforge/templates/mtconnect/mill_machine.json b/protoforge/templates/mtconnect/mill_machine.json
index 5ce7d63..5a11593 100644
--- a/protoforge/templates/mtconnect/mill_machine.json
+++ b/protoforge/templates/mtconnect/mill_machine.json
@@ -124,9 +124,12 @@
             "unit": "件",
             "description": "加工件数",
             "access": "r",
-            "generator_type": "sawtooth",
+            "generator_type": "script",
             "min_value": 0,
-            "max_value": 999
+            "max_value": 999,
+            "generator_config": {
+                "script": "key = 'part_count_mtconnect'; interval = 60; result = min(int(elapsed / interval), 999); cache[key] = result"
+            }
         }
     ],
     "protocol_config": {

From 5bdbcd4ecbbcea080e36741f34fa3662ce669345 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E5=BC=A0=E5=B0=91=E6=99=BA?=
 <zhangshaozhi@bailongma-inc.com>
Date: Tue, 19 May 2026 21:01:59 +0800
Subject: [PATCH 13/55] fix(template): update tempalte

---
 protoforge/templates/modbus/fanuc_cnc.json       | 2 +-
 protoforge/templates/mtconnect/mill_machine.json | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/protoforge/templates/modbus/fanuc_cnc.json b/protoforge/templates/modbus/fanuc_cnc.json
index dc7a146..22db212 100644
--- a/protoforge/templates/modbus/fanuc_cnc.json
+++ b/protoforge/templates/modbus/fanuc_cnc.json
@@ -198,7 +198,7 @@
             "min_value": 0,
             "max_value": 99999,
             "generator_config": {
-                "script": "key = 'part_count_modbus'; last = cache.get(key, 0); interval = 45; result = min(int(elapsed / interval), 99999); cache[key] = result"
+                "script": "elapsed = context['elapsed']; result = min(int(elapsed / 45), 99999)"
             }
         },
         {
diff --git a/protoforge/templates/mtconnect/mill_machine.json b/protoforge/templates/mtconnect/mill_machine.json
index 5a11593..b701a48 100644
--- a/protoforge/templates/mtconnect/mill_machine.json
+++ b/protoforge/templates/mtconnect/mill_machine.json
@@ -128,7 +128,7 @@
             "min_value": 0,
             "max_value": 999,
             "generator_config": {
-                "script": "key = 'part_count_mtconnect'; interval = 60; result = min(int(elapsed / interval), 999); cache[key] = result"
+                "script": "elapsed = context['elapsed']; result = min(int(elapsed / 60), 999)"
             }
         }
     ],

From 5d2d9fe0634aa4bcba08552b09c5da3d84b76db7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E5=BC=A0=E5=B0=91=E6=99=BA?=
 <zhangshaozhi@bailongma-inc.com>
Date: Tue, 19 May 2026 21:41:04 +0800
Subject: [PATCH 14/55] fix(template): update tempalte

---
 protoforge/templates/fanuc/fanuc_0if_cnc.json | 21 +++++++++++++------
 .../templates/fanuc/fanuc_31ib_cnc.json       | 21 +++++++++++++------
 protoforge/templates/modbus/fanuc_cnc.json    | 21 +++++++++++++------
 .../templates/mtconnect/mill_machine.json     | 21 +++++++++++++------
 4 files changed, 60 insertions(+), 24 deletions(-)

diff --git a/protoforge/templates/fanuc/fanuc_0if_cnc.json b/protoforge/templates/fanuc/fanuc_0if_cnc.json
index 3e79750..39437f3 100644
--- a/protoforge/templates/fanuc/fanuc_0if_cnc.json
+++ b/protoforge/templates/fanuc/fanuc_0if_cnc.json
@@ -100,9 +100,12 @@
             "unit": "m/s²",
             "description": "X轴振动加速度",
             "access": "r",
-            "generator_type": "random",
+            "generator_type": "script",
             "min_value": 0.1,
-            "max_value": 2.5
+            "max_value": 2.5,
+            "generator_config": {
+                "script": "elapsed = context['elapsed']; base = 0.5 + 0.3 * math.sin(2 * math.pi * elapsed / 90); noise = random.uniform(-0.15, 0.15); result = round(max(0.1, base + noise), 3)"
+            }
         },
         {
             "name": "vibration_y",
@@ -111,9 +114,12 @@
             "unit": "m/s²",
             "description": "Y轴振动加速度",
             "access": "r",
-            "generator_type": "random",
+            "generator_type": "script",
             "min_value": 0.1,
-            "max_value": 2.5
+            "max_value": 2.5,
+            "generator_config": {
+                "script": "elapsed = context['elapsed']; base = 0.5 + 0.3 * math.sin(2 * math.pi * elapsed / 75 + 1.0); noise = random.uniform(-0.15, 0.15); result = round(max(0.1, base + noise), 3)"
+            }
         },
         {
             "name": "vibration_z",
@@ -122,9 +128,12 @@
             "unit": "m/s²",
             "description": "Z轴振动加速度",
             "access": "r",
-            "generator_type": "random",
+            "generator_type": "script",
             "min_value": 0.1,
-            "max_value": 3.0
+            "max_value": 3.0,
+            "generator_config": {
+                "script": "elapsed = context['elapsed']; base = 0.7 + 0.4 * math.sin(2 * math.pi * elapsed / 60 + 2.1); noise = random.uniform(-0.2, 0.2); result = round(max(0.1, base + noise), 3)"
+            }
         },
         {
             "name": "alarm_status",
diff --git a/protoforge/templates/fanuc/fanuc_31ib_cnc.json b/protoforge/templates/fanuc/fanuc_31ib_cnc.json
index 83fee77..808fbc6 100644
--- a/protoforge/templates/fanuc/fanuc_31ib_cnc.json
+++ b/protoforge/templates/fanuc/fanuc_31ib_cnc.json
@@ -100,9 +100,12 @@
             "unit": "m/s²",
             "description": "X轴振动加速度",
             "access": "r",
-            "generator_type": "random",
+            "generator_type": "script",
             "min_value": 0.1,
-            "max_value": 3.0
+            "max_value": 3.0,
+            "generator_config": {
+                "script": "elapsed = context['elapsed']; base = 0.6 + 0.4 * math.sin(2 * math.pi * elapsed / 80 + 0.5); noise = random.uniform(-0.2, 0.2); result = round(max(0.1, base + noise), 3)"
+            }
         },
         {
             "name": "vibration_y",
@@ -111,9 +114,12 @@
             "unit": "m/s²",
             "description": "Y轴振动加速度",
             "access": "r",
-            "generator_type": "random",
+            "generator_type": "script",
             "min_value": 0.1,
-            "max_value": 3.0
+            "max_value": 3.0,
+            "generator_config": {
+                "script": "elapsed = context['elapsed']; base = 0.6 + 0.4 * math.sin(2 * math.pi * elapsed / 65 + 1.5); noise = random.uniform(-0.2, 0.2); result = round(max(0.1, base + noise), 3)"
+            }
         },
         {
             "name": "vibration_z",
@@ -122,9 +128,12 @@
             "unit": "m/s²",
             "description": "Z轴振动加速度",
             "access": "r",
-            "generator_type": "random",
+            "generator_type": "script",
             "min_value": 0.1,
-            "max_value": 4.0
+            "max_value": 4.0,
+            "generator_config": {
+                "script": "elapsed = context['elapsed']; base = 0.8 + 0.5 * math.sin(2 * math.pi * elapsed / 55 + 2.5); noise = random.uniform(-0.25, 0.25); result = round(max(0.1, base + noise), 3)"
+            }
         },
         {
             "name": "tool_number",
diff --git a/protoforge/templates/modbus/fanuc_cnc.json b/protoforge/templates/modbus/fanuc_cnc.json
index 22db212..43622cf 100644
--- a/protoforge/templates/modbus/fanuc_cnc.json
+++ b/protoforge/templates/modbus/fanuc_cnc.json
@@ -49,9 +49,12 @@
             "unit": "m/s²",
             "description": "X轴振动加速度",
             "access": "r",
-            "generator_type": "random",
+            "generator_type": "script",
             "min_value": 0.1,
-            "max_value": 2.5
+            "max_value": 2.5,
+            "generator_config": {
+                "script": "elapsed = context['elapsed']; base = 0.5 + 0.3 * math.sin(2 * math.pi * elapsed / 85 + 0.8); noise = random.uniform(-0.15, 0.15); result = round(max(0.1, base + noise), 3)"
+            }
         },
         {
             "name": "vibration_y",
@@ -60,9 +63,12 @@
             "unit": "m/s²",
             "description": "Y轴振动加速度",
             "access": "r",
-            "generator_type": "random",
+            "generator_type": "script",
             "min_value": 0.1,
-            "max_value": 2.5
+            "max_value": 2.5,
+            "generator_config": {
+                "script": "elapsed = context['elapsed']; base = 0.5 + 0.3 * math.sin(2 * math.pi * elapsed / 70 + 1.8); noise = random.uniform(-0.15, 0.15); result = round(max(0.1, base + noise), 3)"
+            }
         },
         {
             "name": "vibration_z",
@@ -71,9 +77,12 @@
             "unit": "m/s²",
             "description": "Z轴振动加速度",
             "access": "r",
-            "generator_type": "random",
+            "generator_type": "script",
             "min_value": 0.1,
-            "max_value": 3.0
+            "max_value": 3.0,
+            "generator_config": {
+                "script": "elapsed = context['elapsed']; base = 0.7 + 0.4 * math.sin(2 * math.pi * elapsed / 58 + 2.8); noise = random.uniform(-0.2, 0.2); result = round(max(0.1, base + noise), 3)"
+            }
         },
         {
             "name": "spindle_override",
diff --git a/protoforge/templates/mtconnect/mill_machine.json b/protoforge/templates/mtconnect/mill_machine.json
index b701a48..fd08bdd 100644
--- a/protoforge/templates/mtconnect/mill_machine.json
+++ b/protoforge/templates/mtconnect/mill_machine.json
@@ -91,9 +91,12 @@
             "unit": "m/s²",
             "description": "X轴振动加速度",
             "access": "r",
-            "generator_type": "random",
+            "generator_type": "script",
             "min_value": 0.1,
-            "max_value": 2.0
+            "max_value": 2.0,
+            "generator_config": {
+                "script": "elapsed = context['elapsed']; base = 0.4 + 0.25 * math.sin(2 * math.pi * elapsed / 95 + 0.3); noise = random.uniform(-0.12, 0.12); result = round(max(0.1, base + noise), 3)"
+            }
         },
         {
             "name": "vibration_y",
@@ -102,9 +105,12 @@
             "unit": "m/s²",
             "description": "Y轴振动加速度",
             "access": "r",
-            "generator_type": "random",
+            "generator_type": "script",
             "min_value": 0.1,
-            "max_value": 2.0
+            "max_value": 2.0,
+            "generator_config": {
+                "script": "elapsed = context['elapsed']; base = 0.4 + 0.25 * math.sin(2 * math.pi * elapsed / 78 + 1.3); noise = random.uniform(-0.12, 0.12); result = round(max(0.1, base + noise), 3)"
+            }
         },
         {
             "name": "vibration_z",
@@ -113,9 +119,12 @@
             "unit": "m/s²",
             "description": "Z轴振动加速度",
             "access": "r",
-            "generator_type": "random",
+            "generator_type": "script",
             "min_value": 0.1,
-            "max_value": 2.5
+            "max_value": 2.5,
+            "generator_config": {
+                "script": "elapsed = context['elapsed']; base = 0.6 + 0.35 * math.sin(2 * math.pi * elapsed / 62 + 2.3); noise = random.uniform(-0.18, 0.18); result = round(max(0.1, base + noise), 3)"
+            }
         },
         {
             "name": "part_count",

From c57a366c70ceacb599786c08934cef60a69d0d25 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E5=BC=A0=E5=B0=91=E6=99=BA?=
 <zhangshaozhi@bailongma-inc.com>
Date: Wed, 20 May 2026 13:40:09 +0800
Subject: [PATCH 15/55] feat(fault): support fault

---
 FAULT_INJECTION.md          | 321 +++++++++++++++++++++++++++
 protoforge/api/v1/router.py |  57 +++++
 protoforge/core/device.py   |  18 +-
 protoforge/core/engine.py   |  32 +++
 protoforge/core/fault.py    | 419 ++++++++++++++++++++++++++++++++++++
 protoforge/models/fault.py  |  77 +++++++
 6 files changed, 923 insertions(+), 1 deletion(-)
 create mode 100644 FAULT_INJECTION.md
 create mode 100644 protoforge/core/fault.py
 create mode 100644 protoforge/models/fault.py

diff --git a/FAULT_INJECTION.md b/FAULT_INJECTION.md
new file mode 100644
index 0000000..951648d
--- /dev/null
+++ b/FAULT_INJECTION.md
@@ -0,0 +1,321 @@
+# 故障注入使用文档
+
+本文档描述 ProtoForge 故障注入模块的设计、使用方式及内置故障类型。
+
+---
+
+## 概述
+
+故障注入模块允许你在运行中的模拟设备上注入真实工业场景的异常，用于：
+
+- 验证监控系统的异常检测能力
+- 训练工业 AI 异常检测模型（提供异常样本）
+- 测试报警规则和联动逻辑
+
+支持四种异常场景：
+
+| 场景 | 说明 |
+|------|------|
+| 异常注入 | 立即将指定测点推入异常区间 |
+| 自动恢复 | 故障持续指定时间后自动恢复正常 |
+| 多指标联动 | 一次注入同时影响多个相关测点 |
+| 渐进式劣化 | 指标随时间线性恶化，模拟真实磨损过程 |
+
+---
+
+## 架构设计
+
+```
+FaultInjector（独立模块）
+    │
+    ├── inject(device, request)   注入故障
+    ├── apply(device)             每次 tick 后覆盖测点值（通过钩子机制）
+    ├── clear(device_id)          手动清除
+    └── 自动到期恢复
+
+DeviceInstance.tick()
+    └── 执行正常生成器
+    └── 执行 post_tick_hooks（FaultInjector.apply 挂载于此）
+```
+
+故障模块通过 `register_post_tick_hook` 挂载到设备，不修改设备本身的生成逻辑，完全解耦。
+
+---
+
+## API 接口
+
+### 查询故障类型
+
+```
+GET /api/v1/faults/types
+```
+
+返回所有内置故障类型列表。
+
+```
+GET /api/v1/faults/types/{fault_type_id}
+```
+
+返回指定故障类型的详细定义，包含影响的测点和行为参数。
+
+### 查询活跃故障
+
+```
+GET /api/v1/faults/active
+```
+
+返回当前所有设备上正在运行的故障实例。
+
+### 注入故障
+
+```
+POST /api/v1/devices/{device_id}/fault
+```
+
+请求体：
+
+```json
+{
+    "fault_type_id": "tool_wear",
+    "duration": 300,
+    "intensity": 0.8
+}
+```
+
+| 字段 | 类型 | 必填 | 说明 |
+|------|------|------|------|
+| `fault_type_id` | string | 是 | 故障类型 ID，见下方故障类型列表 |
+| `duration` | float | 否 | 持续时间（秒），不填则使用类型默认值 |
+| `intensity` | float | 否 | 故障强度 0.0~1.0，默认 1.0，影响劣化幅度 |
+
+响应示例：
+
+```json
+{
+    "fault_id": "a3f2c1d4e5b6",
+    "device_id": "fanuc-cnc-01",
+    "fault_type_id": "tool_wear",
+    "fault_type_name": "刀具磨损",
+    "status": "active",
+    "intensity": 0.8,
+    "duration": 300.0,
+    "elapsed": 0.0,
+    "progress": 0.0,
+    "affected_points": ["spindle_current", "vibration_x", "vibration_y", "vibration_z", "feed_rate"],
+    "started_at": 1716192000.0
+}
+```
+
+### 查询设备当前故障
+
+```
+GET /api/v1/devices/{device_id}/fault
+```
+
+无故障时返回 `{"status": "none"}`，有故障时返回故障详情（含实时 `elapsed` 和 `progress`）。
+
+### 手动清除故障
+
+```
+DELETE /api/v1/devices/{device_id}/fault
+```
+
+立即清除故障，测点值由生成器在下一个 tick 自然恢复正常。
+
+---
+
+## 内置故障类型
+
+### tool_wear — 刀具磨损
+
+- **分类**：mechanical
+- **模式**：渐进式
+- **默认持续时间**：300 秒
+- **真实场景**：刀具切削刃逐渐磨损，切削阻力增大，系统自动压低进给速率
+
+| 测点 | 变化方向 | 峰值倍率 |
+|------|---------|---------|
+| `spindle_current` | 升高 | ×2.2 |
+| `vibration_x` | 升高 | ×3.0 |
+| `vibration_y` | 升高 | ×3.0 |
+| `vibration_z` | 升高 | ×3.5 |
+| `feed_rate` | 降低 | ×0.45 |
+
+---
+
+### tool_breakage — 刀具崩刃
+
+- **分类**：mechanical
+- **模式**：瞬间注入
+- **默认持续时间**：15 秒
+- **真实场景**：刀具突发性崩刃，机床通常会触发报警并停机
+
+| 测点 | 变化方向 | 峰值倍率 |
+|------|---------|---------|
+| `spindle_current` | 急升 | ×4.5 |
+| `vibration_x` | 急升 | ×8.0 |
+| `vibration_y` | 急升 | ×8.0 |
+| `vibration_z` | 急升 | ×10.0 |
+| `feed_rate` | 停止 | →0 |
+
+---
+
+### spindle_overheat — 主轴过热
+
+- **分类**：thermal
+- **模式**：渐进式
+- **默认持续时间**：240 秒
+- **真实场景**：长时间高负荷或冷却系统故障，热保护机制逐渐降低转速
+
+| 测点 | 变化方向 | 峰值倍率 |
+|------|---------|---------|
+| `spindle_current` | 升高 | ×1.8 |
+| `spindle_speed` | 降低 | ×0.6 |
+| `vibration_x` | 升高 | ×1.5 |
+| `vibration_z` | 升高 | ×1.5 |
+
+---
+
+### spindle_bearing_fault — 主轴轴承故障
+
+- **分类**：mechanical
+- **模式**：渐进式
+- **默认持续时间**：360 秒
+- **真实场景**：轴承磨损或润滑不足，振动持续升高
+
+| 测点 | 变化方向 | 峰值倍率 |
+|------|---------|---------|
+| `vibration_x` | 升高 | ×4.0 |
+| `vibration_y` | 升高 | ×4.0 |
+| `vibration_z` | 升高 | ×5.0 |
+| `spindle_current` | 轻微升高 | ×1.3 |
+
+---
+
+### feed_stall — 进给堵转
+
+- **分类**：process
+- **模式**：瞬间注入
+- **默认持续时间**：20 秒
+- **真实场景**：工件夹紧松动或切削量过大导致进给轴卡死
+
+| 测点 | 变化方向 | 峰值倍率 |
+|------|---------|---------|
+| `feed_rate` | 停止 | →0 |
+| `spindle_current` | 急升 | ×3.8 |
+| `vibration_z` | 急升 | ×5.0 |
+
+---
+
+### vibration_spike — 振动异常
+
+- **分类**：mechanical
+- **模式**：瞬间注入
+- **默认持续时间**：60 秒
+- **真实场景**：工件装夹松动或切削共振
+
+| 测点 | 变化方向 | 峰值倍率 |
+|------|---------|---------|
+| `vibration_x` | 急升 | ×6.0 |
+| `vibration_y` | 急升 | ×6.0 |
+| `vibration_z` | 急升 | ×7.0 |
+
+---
+
+### coolant_failure — 切削液不足
+
+- **分类**：process
+- **模式**：渐进式
+- **默认持续时间**：480 秒
+- **真实场景**：切削液供给不足，热量积累，劣化速度较慢
+
+| 测点 | 变化方向 | 峰值倍率 |
+|------|---------|---------|
+| `spindle_current` | 升高 | ×1.6 |
+| `vibration_x` | 升高 | ×2.0 |
+| `vibration_y` | 升高 | ×2.0 |
+| `vibration_z` | 升高 | ×2.5 |
+| `feed_rate` | 降低 | ×0.75 |
+
+---
+
+### power_fluctuation — 电源波动
+
+- **分类**：electrical
+- **模式**：瞬间注入（持续期间持续抖动）
+- **默认持续时间**：90 秒
+- **真实场景**：供电电压不稳定，各指标出现随机波动
+
+| 测点 | 变化方向 | 说明 |
+|------|---------|------|
+| `spindle_speed` | 随机抖动 | ±300 RPM 噪声 |
+| `spindle_current` | 随机抖动 | ±5 A 噪声 |
+| `feed_rate` | 随机抖动 | ±150 mm/min 噪声 |
+
+---
+
+## 使用示例
+
+### 模拟刀具磨损过程
+
+```bash
+# 注入刀具磨损，持续 5 分钟，强度 100%
+curl -X POST http://localhost:8000/api/v1/devices/fanuc-cnc-01/fault \
+  -H "Content-Type: application/json" \
+  -d '{"fault_type_id": "tool_wear", "duration": 300, "intensity": 1.0}'
+
+# 每隔 30 秒查看故障进度
+curl http://localhost:8000/api/v1/devices/fanuc-cnc-01/fault
+
+# 查看 Prometheus 指标变化
+curl http://localhost:8000/api/v1/metrics | grep -E "spindle_current|vibration|feed_rate"
+```
+
+### 模拟突发崩刃后手动恢复
+
+```bash
+# 注入崩刃故障
+curl -X POST http://localhost:8000/api/v1/devices/fanuc-cnc-01/fault \
+  -H "Content-Type: application/json" \
+  -d '{"fault_type_id": "tool_breakage", "duration": 60}'
+
+# 手动提前清除
+curl -X DELETE http://localhost:8000/api/v1/devices/fanuc-cnc-01/fault
+```
+
+### 低强度渐进劣化（用于 AI 模型训练）
+
+```bash
+# 用 50% 强度注入轴承故障，持续 10 分钟，产生轻微异常样本
+curl -X POST http://localhost:8000/api/v1/devices/fanuc-cnc-01/fault \
+  -H "Content-Type: application/json" \
+  -d '{"fault_type_id": "spindle_bearing_fault", "duration": 600, "intensity": 0.5}'
+```
+
+---
+
+## 与 Prometheus 集成
+
+故障注入后，测点值的变化会实时反映在 `/api/v1/metrics` 接口中。可以用 Grafana 观察故障期间各指标的时序变化：
+
+```
+# 主轴电流（故障期间会升高）
+fanuc_cnc_spindle_current
+
+# 三轴振动
+fanuc_cnc_vibration_x
+fanuc_cnc_vibration_y
+fanuc_cnc_vibration_z
+
+# 进给速率（刀具磨损/堵转时会降低）
+fanuc_cnc_feed_rate
+```
+
+---
+
+## 注意事项
+
+- 同一设备同时只能有一个活跃故障，新注入会覆盖旧故障
+- 故障到期后测点值由生成器在下一个 tick 自然恢复，不会瞬间跳回
+- 设备必须处于 `online` 状态才能注入故障
+- 删除设备时会自动清除其故障
diff --git a/protoforge/api/v1/router.py b/protoforge/api/v1/router.py
index 8eaab1b..7a6c050 100644
--- a/protoforge/api/v1/router.py
+++ b/protoforge/api/v1/router.py
@@ -8,6 +8,7 @@
 from fastapi.responses import PlainTextResponse
 
 from protoforge.models.device import DeviceConfig, DeviceInfo, PointValue
+from protoforge.models.fault import FaultInjectRequest
 from protoforge.models.scenario import ScenarioConfig, ScenarioInfo
 from protoforge.models.template import TemplateDetail, TemplateInfo
 
@@ -1207,6 +1208,62 @@ async def setup_demo():
         raise HTTPException(status_code=500, detail=get_friendly_error(str(e)))
 
 
+@router.get("/faults/types")
+async def list_fault_types():
+    engine = _get_engine()
+    types = engine.list_fault_types()
+    return [t.model_dump() for t in types]
+
+
+@router.get("/faults/types/{fault_type_id}")
+async def get_fault_type(fault_type_id: str):
+    from protoforge.core.fault import fault_injector
+    ft = fault_injector.get_fault_type(fault_type_id)
+    if not ft:
+        raise HTTPException(status_code=404, detail=f"Fault type not found: {fault_type_id}")
+    return ft.model_dump()
+
+
+@router.get("/faults/active")
+async def list_active_faults():
+    engine = _get_engine()
+    return [f.model_dump() for f in engine.list_active_faults()]
+
+
+@router.post("/devices/{device_id}/fault")
+async def inject_fault(device_id: str, request: FaultInjectRequest):
+    engine = _get_engine()
+    log_bus = _get_log_bus()
+    try:
+        info = engine.inject_fault(device_id, request)
+        log_bus.emit("", "system", device_id, "fault_injected",
+                     f"Fault {request.fault_type_id} injected into {device_id}",
+                     {"fault_type": request.fault_type_id, "duration": info.duration})
+        return info.model_dump()
+    except ValueError as e:
+        raise HTTPException(status_code=400, detail=str(e))
+
+
+@router.get("/devices/{device_id}/fault")
+async def get_device_fault(device_id: str):
+    engine = _get_engine()
+    info = engine.get_fault(device_id)
+    if not info:
+        return {"status": "none"}
+    return info.model_dump()
+
+
+@router.delete("/devices/{device_id}/fault")
+async def clear_device_fault(device_id: str):
+    engine = _get_engine()
+    log_bus = _get_log_bus()
+    cleared = engine.clear_fault(device_id)
+    if cleared:
+        log_bus.emit("", "system", device_id, "fault_cleared",
+                     f"Fault cleared on {device_id}")
+    return {"status": "ok", "cleared": cleared}
+
+
 @router.get("/setup/status")
 async def setup_status():
     engine = _get_engine()
diff --git a/protoforge/core/device.py b/protoforge/core/device.py
index efd21ef..f04414a 100644
--- a/protoforge/core/device.py
+++ b/protoforge/core/device.py
@@ -1,5 +1,5 @@
 import time
-from typing import Any, Optional
+from typing import Any, Callable, Optional
 
 from protoforge.core.generator import DataGenerator
 from protoforge.models.device import DeviceConfig, DeviceStatus, GeneratorType, PointConfig, PointValue
@@ -13,6 +13,8 @@ def __init__(self, config: DeviceConfig, generator: DataGenerator):
         self._point_values: dict[str, Any] = {}
         self._point_configs: dict[str, PointConfig] = {}
         self._start_time: Optional[float] = None
+        # 可选的 tick 后处理钩子，由外部模块（如 FaultInjector）注册
+        self._post_tick_hooks: list[Callable[["DeviceInstance"], None]] = []
 
         for point in config.points:
             self._point_configs[point.name] = point
@@ -21,6 +23,14 @@ def __init__(self, config: DeviceConfig, generator: DataGenerator):
             else:
                 self._point_values[point.name] = self._generator.generate(point)
 
+    def register_post_tick_hook(self, hook: Callable[["DeviceInstance"], None]) -> None:
+        """注册 tick 后处理钩子，外部模块通过此接口介入，不修改 tick 逻辑本身"""
+        if hook not in self._post_tick_hooks:
+            self._post_tick_hooks.append(hook)
+
+    def unregister_post_tick_hook(self, hook: Callable[["DeviceInstance"], None]) -> None:
+        self._post_tick_hooks = [h for h in self._post_tick_hooks if h != hook]
+
     @property
     def id(self) -> str:
         return self.config.id
@@ -51,6 +61,12 @@ def tick(self) -> None:
         for name, point in self._point_configs.items():
             if point.generator_type != GeneratorType.FIXED:
                 self._point_values[name] = self._generator.generate(point)
+        # 执行后处理钩子（故障注入等外部模块在此覆盖测点值）
+        for hook in self._post_tick_hooks:
+            try:
+                hook(self)
+            except Exception:
+                pass
 
     def read_point(self, point_name: str) -> Optional[PointValue]:
         if point_name not in self._point_values:
diff --git a/protoforge/core/engine.py b/protoforge/core/engine.py
index f289425..059f10e 100644
--- a/protoforge/core/engine.py
+++ b/protoforge/core/engine.py
@@ -4,9 +4,11 @@
 from typing import Any, Optional
 
 from protoforge.core.device import DeviceInstance
+from protoforge.core.fault import fault_injector
 from protoforge.core.generator import DataGenerator
 from protoforge.core.scenario import Scenario
 from protoforge.models.device import DeviceConfig, DeviceInfo, DeviceStatus, PointValue
+from protoforge.models.fault import FaultInfo, FaultInjectRequest, FaultTypeDefinition
 from protoforge.models.scenario import ScenarioConfig, ScenarioInfo, ScenarioStatus
 from protoforge.protocols.base import ProtocolServer, ProtocolStatus
 
@@ -56,6 +58,8 @@ async def stop_protocol(self, protocol_name: str) -> None:
     async def create_device(self, config: DeviceConfig) -> DeviceInfo:
         instance = DeviceInstance(config, self._generator)
         self._devices[config.id] = instance
+        # 注册故障注入钩子
+        instance.register_post_tick_hook(fault_injector.apply)
 
         server = self._protocol_servers.get(config.protocol)
         if server and server.status == ProtocolStatus.RUNNING:
@@ -70,6 +74,9 @@ async def remove_device(self, device_id: str) -> None:
         if not instance:
             raise ValueError(f"Device not found: {device_id}")
 
+        # 清除该设备的故障
+        fault_injector.clear(device_id)
+
         server = self._protocol_servers.get(instance.protocol)
         if server and server.status == ProtocolStatus.RUNNING:
             await server.remove_device(device_id)
@@ -299,3 +306,28 @@ def _get_device_info(self, instance: DeviceInstance) -> DeviceInfo:
             status=instance.status,
             points=instance.read_all_points(),
         )
+
+    # ------------------------------------------------------------------
+    # 故障管理
+    # ------------------------------------------------------------------
+
+    def inject_fault(self, device_id: str, request: FaultInjectRequest) -> FaultInfo:
+        instance = self._devices.get(device_id)
+        if not instance:
+            raise ValueError(f"Device not found: {device_id}")
+        if instance.status != DeviceStatus.ONLINE:
+            raise ValueError(f"Device {device_id} is not online")
+        return fault_injector.inject(instance, request)
+
+    def clear_fault(self, device_id: str) -> bool:
+        return fault_injector.clear(device_id)
+
+    def get_fault(self, device_id: str) -> Optional[FaultInfo]:
+        return fault_injector.get_fault(device_id)
+
+    def list_active_faults(self) -> list[FaultInfo]:
+        return fault_injector.list_active()
+
+    @staticmethod
+    def list_fault_types() -> list[FaultTypeDefinition]:
+        return fault_injector.list_fault_types()
diff --git a/protoforge/core/fault.py b/protoforge/core/fault.py
new file mode 100644
index 0000000..e72842d
--- /dev/null
+++ b/protoforge/core/fault.py
@@ -0,0 +1,419 @@
+"""
+故障注入模块 (FaultInjector)
+
+设计原则：
+- 完全独立，不修改 device.py / engine.py 现有逻辑
+- 通过 apply(device) 在每次 tick 后覆盖测点值，device 本身无感知
+- 支持四种场景：异常注入、自动恢复、多指标联动、渐进式劣化
+"""
+import logging
+import random
+import time
+import uuid
+from typing import Any, Optional
+
+from protoforge.models.fault import (
+    ActiveFault,
+    FaultInfo,
+    FaultInjectRequest,
+    FaultMode,
+    FaultStatus,
+    FaultTypeDefinition,
+    PointFaultConfig,
+)
+
+logger = logging.getLogger(__name__)
+
+
+# ---------------------------------------------------------------------------
+# 内置故障类型定义（基于真实工业场景）
+# ---------------------------------------------------------------------------
+
+BUILTIN_FAULT_TYPES: list[FaultTypeDefinition] = [
+
+    # ------------------------------------------------------------------
+    # 刀具磨损 — 最常见的机加工故障
+    # 特征：切削阻力增大 → 主轴电流缓慢爬升，振动幅度增大，进给速率被系统压低
+    # 模式：渐进式，持续数分钟，模拟刀具从轻度磨损到需要换刀的过程
+    # ------------------------------------------------------------------
+    FaultTypeDefinition(
+        id="tool_wear",
+        name="刀具磨损",
+        description="刀具切削刃磨损，切削阻力增大，主轴电流升高，振动增大，进给速率下降",
+        category="mechanical",
+        default_duration=300.0,
+        tags=["刀具", "磨损", "渐进"],
+        point_faults=[
+            PointFaultConfig(point="spindle_current", mode=FaultMode.GRADUAL,
+                             multiplier=2.2, noise_scale=0.8),
+            PointFaultConfig(point="vibration_x", mode=FaultMode.GRADUAL,
+                             multiplier=3.0, noise_scale=0.3),
+            PointFaultConfig(point="vibration_y", mode=FaultMode.GRADUAL,
+                             multiplier=3.0, noise_scale=0.3),
+            PointFaultConfig(point="vibration_z", mode=FaultMode.GRADUAL,
+                             multiplier=3.5, noise_scale=0.4),
+            PointFaultConfig(point="feed_rate", mode=FaultMode.GRADUAL,
+                             multiplier=0.45, noise_scale=20.0),
+        ],
+    ),
+
+    # ------------------------------------------------------------------
+    # 刀具崩刃 — 突发性刀具失效
+    # 特征：瞬间冲击 → 振动突增，电流瞬间峰值，进给立即停止
+    # 模式：瞬间注入，持续时间短（机床通常会触发报警停机）
+    # ------------------------------------------------------------------
+    FaultTypeDefinition(
+        id="tool_breakage",
+        name="刀具崩刃",
+        description="刀具突发性崩刃，振动剧烈突增，主轴电流峰值，进给停止",
+        category="mechanical",
+        default_duration=15.0,
+        tags=["刀具", "崩刃", "突发"],
+        point_faults=[
+            PointFaultConfig(point="spindle_current", mode=FaultMode.INSTANT,
+                             multiplier=4.5, noise_scale=2.0),
+            PointFaultConfig(point="vibration_x", mode=FaultMode.INSTANT,
+                             multiplier=8.0, noise_scale=1.5),
+            PointFaultConfig(point="vibration_y", mode=FaultMode.INSTANT,
+                             multiplier=8.0, noise_scale=1.5),
+            PointFaultConfig(point="vibration_z", mode=FaultMode.INSTANT,
+                             multiplier=10.0, noise_scale=2.0),
+            PointFaultConfig(point="feed_rate", mode=FaultMode.INSTANT,
+                             target_value=0.0, noise_scale=0.0),
+        ],
+    ),
+
+    # ------------------------------------------------------------------
+    # 主轴过热 — 长时间高负荷或冷却系统故障
+    # 特征：主轴电流持续偏高，转速因热保护逐渐降低
+    # 模式：渐进式，持续时间较长
+    # ------------------------------------------------------------------
+    FaultTypeDefinition(
+        id="spindle_overheat",
+        name="主轴过热",
+        description="主轴长时间高负荷运转或冷却不足，电流持续偏高，转速因热保护下降",
+        category="thermal",
+        default_duration=240.0,
+        tags=["主轴", "过热", "渐进"],
+        point_faults=[
+            PointFaultConfig(point="spindle_current", mode=FaultMode.GRADUAL,
+                             multiplier=1.8, noise_scale=1.2),
+            PointFaultConfig(point="spindle_speed", mode=FaultMode.GRADUAL,
+                             multiplier=0.6, noise_scale=50.0),
+            PointFaultConfig(point="vibration_x", mode=FaultMode.GRADUAL,
+                             multiplier=1.5, noise_scale=0.2),
+            PointFaultConfig(point="vibration_z", mode=FaultMode.GRADUAL,
+                             multiplier=1.5, noise_scale=0.2),
+        ],
+    ),
+
+    # ------------------------------------------------------------------
+    # 主轴轴承故障 — 轴承磨损或润滑不足
+    # 特征：振动频率特征变化，整体振动幅度升高，电流略升
+    # 模式：渐进式
+    # ------------------------------------------------------------------
+    FaultTypeDefinition(
+        id="spindle_bearing_fault",
+        name="主轴轴承故障",
+        description="主轴轴承磨损或润滑不足，振动幅度持续升高，伴随电流轻微上升",
+        category="mechanical",
+        default_duration=360.0,
+        tags=["主轴", "轴承", "渐进"],
+        point_faults=[
+            PointFaultConfig(point="vibration_x", mode=FaultMode.GRADUAL,
+                             multiplier=4.0, noise_scale=0.5),
+            PointFaultConfig(point="vibration_y", mode=FaultMode.GRADUAL,
+                             multiplier=4.0, noise_scale=0.5),
+            PointFaultConfig(point="vibration_z", mode=FaultMode.GRADUAL,
+                             multiplier=5.0, noise_scale=0.8),
+            PointFaultConfig(point="spindle_current", mode=FaultMode.GRADUAL,
+                             multiplier=1.3, noise_scale=0.5),
+        ],
+    ),
+
+    # ------------------------------------------------------------------
+    # 进给堵转 — 工件夹紧松动或切削量过大导致进给卡死
+    # 特征：进给速率瞬间降为 0，主轴电流急剧升高
+    # 模式：瞬间注入
+    # ------------------------------------------------------------------
+    FaultTypeDefinition(
+        id="feed_stall",
+        name="进给堵转",
+        description="进给轴卡死，进给速率降为零，主轴电流急剧升高",
+        category="process",
+        default_duration=20.0,
+        tags=["进给", "堵转", "突发"],
+        point_faults=[
+            PointFaultConfig(point="feed_rate", mode=FaultMode.INSTANT,
+                             target_value=0.0, noise_scale=0.0),
+            PointFaultConfig(point="spindle_current", mode=FaultMode.INSTANT,
+                             multiplier=3.8, noise_scale=1.5),
+            PointFaultConfig(point="vibration_z", mode=FaultMode.INSTANT,
+                             multiplier=5.0, noise_scale=1.0),
+        ],
+    ),
+
+    # ------------------------------------------------------------------
+    # 振动异常 — 工件装夹松动或共振
+    # 特征：三轴振动突然大幅增加，其他指标基本正常
+    # 模式：瞬间注入
+    # ------------------------------------------------------------------
+    FaultTypeDefinition(
+        id="vibration_spike",
+        name="振动异常",
+        description="工件装夹松动或切削共振，三轴振动突然大幅增加",
+        category="mechanical",
+        default_duration=60.0,
+        tags=["振动", "装夹", "突发"],
+        point_faults=[
+            PointFaultConfig(point="vibration_x", mode=FaultMode.INSTANT,
+                             multiplier=6.0, noise_scale=1.0),
+            PointFaultConfig(point="vibration_y", mode=FaultMode.INSTANT,
+                             multiplier=6.0, noise_scale=1.0),
+            PointFaultConfig(point="vibration_z", mode=FaultMode.INSTANT,
+                             multiplier=7.0, noise_scale=1.2),
+        ],
+    ),
+
+    # ------------------------------------------------------------------
+    # 切削液不足 — 冷却润滑失效
+    # 特征：热量积累 → 振动缓慢升高，电流缓慢升高，进给略降
+    # 模式：渐进式，速度较慢
+    # ------------------------------------------------------------------
+    FaultTypeDefinition(
+        id="coolant_failure",
+        name="切削液不足",
+        description="切削液供给不足，冷却润滑失效，热量积累导致振动和电流缓慢升高",
+        category="process",
+        default_duration=480.0,
+        tags=["切削液", "冷却", "渐进"],
+        point_faults=[
+            PointFaultConfig(point="spindle_current", mode=FaultMode.GRADUAL,
+                             multiplier=1.6, noise_scale=0.8),
+            PointFaultConfig(point="vibration_x", mode=FaultMode.GRADUAL,
+                             multiplier=2.0, noise_scale=0.3),
+            PointFaultConfig(point="vibration_y", mode=FaultMode.GRADUAL,
+                             multiplier=2.0, noise_scale=0.3),
+            PointFaultConfig(point="vibration_z", mode=FaultMode.GRADUAL,
+                             multiplier=2.5, noise_scale=0.4),
+            PointFaultConfig(point="feed_rate", mode=FaultMode.GRADUAL,
+                             multiplier=0.75, noise_scale=15.0),
+        ],
+    ),
+
+    # ------------------------------------------------------------------
+    # 电源波动 — 供电不稳定
+    # 特征：主轴转速和进给速率出现随机波动，电流不稳定
+    # 模式：瞬间注入（持续期间持续抖动）
+    # ------------------------------------------------------------------
+    FaultTypeDefinition(
+        id="power_fluctuation",
+        name="电源波动",
+        description="供电电压不稳定，主轴转速和进给速率出现随机波动",
+        category="electrical",
+        default_duration=90.0,
+        tags=["电源", "波动", "突发"],
+        point_faults=[
+            PointFaultConfig(point="spindle_speed", mode=FaultMode.INSTANT,
+                             multiplier=1.0, noise_scale=300.0),
+            PointFaultConfig(point="spindle_current", mode=FaultMode.INSTANT,
+                             multiplier=1.0, noise_scale=5.0),
+            PointFaultConfig(point="feed_rate", mode=FaultMode.INSTANT,
+                             multiplier=1.0, noise_scale=150.0),
+        ],
+    ),
+]
+
+# 按 id 索引
+_FAULT_TYPE_MAP: dict[str, FaultTypeDefinition] = {ft.id: ft for ft in BUILTIN_FAULT_TYPES}
+
+
+# ---------------------------------------------------------------------------
+# FaultInjector
+# ---------------------------------------------------------------------------
+
+class FaultInjector:
+    """
+    故障注入器，完全独立于 DeviceInstance。
+
+    使用方式：
+        injector = FaultInjector()
+        injector.inject(device, request)   # 注入故障
+        injector.apply(device)             # 每次 tick 后调用，覆盖测点值
+        injector.clear(device_id)          # 手动清除
+    """
+
+    def __init__(self):
+        # device_id -> ActiveFault
+        self._active: dict[str, ActiveFault] = {}
+
+    # ------------------------------------------------------------------
+    # 公开接口
+    # ------------------------------------------------------------------
+
+    def inject(self, device: Any, request: FaultInjectRequest) -> FaultInfo:
+        """向设备注入故障，返回故障信息"""
+        fault_type = _FAULT_TYPE_MAP.get(request.fault_type_id)
+        if not fault_type:
+            raise ValueError(f"Unknown fault type: {request.fault_type_id}")
+
+        duration = request.duration if request.duration is not None else fault_type.default_duration
+
+        # 记录注入时各测点的当前基线值
+        baseline: dict[str, float] = {}
+        for pf in fault_type.point_faults:
+            val = device._point_values.get(pf.point)
+            if val is not None:
+                try:
+                    baseline[pf.point] = float(val)
+                except (TypeError, ValueError):
+                    baseline[pf.point] = 0.0
+
+        fault = ActiveFault(
+            fault_id=uuid.uuid4().hex[:12],
+            device_id=device.id,
+            fault_type_id=fault_type.id,
+            fault_type_name=fault_type.name,
+            intensity=max(0.0, min(1.0, request.intensity)),
+            duration=duration,
+            started_at=time.time(),
+            baseline_values=baseline,
+        )
+        self._active[device.id] = fault
+        logger.info("Fault injected: device=%s type=%s duration=%.0fs",
+                    device.id, fault_type.id, duration)
+        return self._to_info(fault, fault_type)
+
+    def apply(self, device: Any) -> None:
+        """
+        在 device.tick() 之后调用，将故障效果覆盖到 _point_values。
+        故障超时后自动清除。
+        """
+        fault = self._active.get(device.id)
+        if not fault:
+            return
+
+        now = time.time()
+        elapsed = now - fault.started_at
+
+        if elapsed >= fault.duration:
+            self._expire(device, fault)
+            return
+
+        fault_type = _FAULT_TYPE_MAP.get(fault.fault_type_id)
+        if not fault_type:
+            return
+
+        # progress: 0.0（刚注入）→ 1.0（达到峰值）
+        progress = min(elapsed / fault.duration, 1.0)
+
+        for pf in fault_type.point_faults:
+            if pf.point not in device._point_values:
+                continue
+            baseline = fault.baseline_values.get(pf.point, 0.0)
+            if baseline == 0.0:
+                # 基线为 0 时用当前值兜底，避免乘法无效
+                try:
+                    baseline = float(device._point_values[pf.point]) or 1.0
+                except (TypeError, ValueError):
+                    continue
+
+            device._point_values[pf.point] = self._compute_value(
+                pf, baseline, progress, fault.intensity
+            )
+
+    def clear(self, device_id: str) -> bool:
+        """手动清除故障，不恢复基线（让生成器自然恢复）"""
+        if device_id not in self._active:
+            return False
+        fault = self._active.pop(device_id)
+        fault.status = FaultStatus.CLEARED
+        fault.cleared_at = time.time()
+        logger.info("Fault cleared manually: device=%s type=%s", device_id, fault.fault_type_id)
+        return True
+
+    def get_fault(self, device_id: str) -> Optional[FaultInfo]:
+        fault = self._active.get(device_id)
+        if not fault:
+            return None
+        fault_type = _FAULT_TYPE_MAP.get(fault.fault_type_id)
+        return self._to_info(fault, fault_type)
+
+    def list_active(self) -> list[FaultInfo]:
+        result = []
+        for fault in self._active.values():
+            fault_type = _FAULT_TYPE_MAP.get(fault.fault_type_id)
+            result.append(self._to_info(fault, fault_type))
+        return result
+
+    @staticmethod
+    def list_fault_types() -> list[FaultTypeDefinition]:
+        return BUILTIN_FAULT_TYPES
+
+    @staticmethod
+    def get_fault_type(fault_type_id: str) -> Optional[FaultTypeDefinition]:
+        return _FAULT_TYPE_MAP.get(fault_type_id)
+
+    # ------------------------------------------------------------------
+    # 内部逻辑
+    # ------------------------------------------------------------------
+
+    def _compute_value(
+        self,
+        pf: PointFaultConfig,
+        baseline: float,
+        progress: float,
+        intensity: float,
+    ) -> float:
+        """根据故障配置和当前进度计算覆盖值"""
+        if pf.mode == FaultMode.INSTANT:
+            # 瞬间模式：直接用目标值，不随时间变化
+            if pf.target_value is not None:
+                target = pf.target_value
+            elif pf.multiplier is not None:
+                target = baseline * (1.0 + (pf.multiplier - 1.0) * intensity)
+            else:
+                target = baseline
+        else:
+            # 渐进模式：随 progress 线性劣化
+            if pf.target_value is not None:
+                target = baseline + (pf.target_value - baseline) * progress * intensity
+            elif pf.multiplier is not None:
+                target = baseline * (1.0 + (pf.multiplier - 1.0) * progress * intensity)
+            else:
+                target = baseline
+
+        # 叠加随机噪声，模拟真实信号抖动
+        if pf.noise_scale > 0:
+            target += random.gauss(0, pf.noise_scale * intensity)
+
+        return round(max(0.0, target), 4)
+
+    def _expire(self, device: Any, fault: ActiveFault) -> None:
+        """故障到期，从 active 中移除，让生成器自然恢复正常值"""
+        self._active.pop(device.id, None)
+        logger.info("Fault expired: device=%s type=%s", device.id, fault.fault_type_id)
+
+    @staticmethod
+    def _to_info(fault: ActiveFault, fault_type: Optional[FaultTypeDefinition]) -> FaultInfo:
+        now = time.time()
+        elapsed = now - fault.started_at
+        progress = min(elapsed / fault.duration, 1.0)
+        affected = [pf.point for pf in fault_type.point_faults] if fault_type else []
+        return FaultInfo(
+            fault_id=fault.fault_id,
+            device_id=fault.device_id,
+            fault_type_id=fault.fault_type_id,
+            fault_type_name=fault.fault_type_name,
+            status=fault.status,
+            intensity=fault.intensity,
+            duration=fault.duration,
+            elapsed=round(elapsed, 1),
+            progress=round(progress, 3),
+            affected_points=affected,
+            started_at=fault.started_at,
+        )
+
+
+# 全局单例
+fault_injector = FaultInjector()
diff --git a/protoforge/models/fault.py b/protoforge/models/fault.py
new file mode 100644
index 0000000..cc038e0
--- /dev/null
+++ b/protoforge/models/fault.py
@@ -0,0 +1,77 @@
+from enum import Enum
+from typing import Any, Optional
+
+from pydantic import BaseModel, Field
+
+
+class FaultMode(str, Enum):
+    """故障注入模式"""
+    INSTANT = "instant"       # 瞬间跳变到异常值，持续 duration 后恢复
+    GRADUAL = "gradual"       # 渐进式劣化，随时间线性恶化，到 duration 时达到峰值后恢复
+
+
+class FaultStatus(str, Enum):
+    ACTIVE = "active"
+    RECOVERING = "recovering"
+    CLEARED = "cleared"
+
+
+class PointFaultConfig(BaseModel):
+    """单个测点的故障行为定义"""
+    point: str
+    mode: FaultMode = FaultMode.INSTANT
+
+    # INSTANT 模式：直接设置为 target_value（若为 None 则用 multiplier 乘以当前值）
+    target_value: Optional[float] = None
+    multiplier: Optional[float] = None     # 异常值 = 当前正常值 × multiplier
+
+    # GRADUAL 模式：从当前值线性劣化到 target_value 或 multiplier 倍
+    # 劣化程度 = progress(0~1) × (target - baseline)
+    noise_scale: float = 0.0               # 叠加随机噪声幅度，模拟真实抖动
+
+
+class FaultTypeDefinition(BaseModel):
+    """故障类型定义，描述一种真实故障场景"""
+    id: str
+    name: str
+    description: str
+    category: str                          # 故障分类：mechanical / electrical / thermal / process
+    default_duration: float = 120.0        # 默认持续时间（秒）
+    point_faults: list[PointFaultConfig] = Field(default_factory=list)
+    tags: list[str] = Field(default_factory=list)
+
+
+class FaultInjectRequest(BaseModel):
+    """故障注入请求"""
+    fault_type_id: str
+    duration: Optional[float] = None       # 覆盖默认持续时间，None 表示用类型默认值
+    intensity: float = 1.0                 # 故障强度系数 0~1，影响劣化幅度
+
+
+class ActiveFault(BaseModel):
+    """当前激活的故障实例"""
+    fault_id: str                          # 唯一实例 ID
+    device_id: str
+    fault_type_id: str
+    fault_type_name: str
+    status: FaultStatus = FaultStatus.ACTIVE
+    intensity: float = 1.0
+    duration: float = 120.0
+    started_at: float = 0.0
+    cleared_at: Optional[float] = None
+    baseline_values: dict[str, float] = Field(default_factory=dict)  # 注入时的正常基线值
+
+
+class FaultInfo(BaseModel):
+    """故障状态信息（API 响应用）"""
+    fault_id: str
+    device_id: str
+    fault_type_id: str
+    fault_type_name: str
+    status: FaultStatus
+    intensity: float
+    duration: float
+    elapsed: float
+    progress: float                        # 0~1，故障进度
+    affected_points: list[str]
+    started_at: float

From 05f993cae5374642bb5447e1aa260e87aec012f9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E5=BC=A0=E5=B0=91=E6=99=BA?=
 <zhangshaozhi@bailongma-inc.com>
Date: Wed, 20 May 2026 14:18:21 +0800
Subject: [PATCH 16/55] feat(fault): support fault

---
 web/src/api.js            |   6 ++
 web/src/views/Devices.vue | 169 +++++++++++++++++++++++++++++++++++---
 2 files changed, 164 insertions(+), 11 deletions(-)

diff --git a/web/src/api.js b/web/src/api.js
index 2bc15b3..059d4bd 100644
--- a/web/src/api.js
+++ b/web/src/api.js
@@ -128,4 +128,10 @@ export default {
 
   getSettings: () => d(api.get('/settings')),
   updateSettings: (updates) => d(api.put('/settings', updates)),
+
+  getFaultTypes: () => d(api.get('/faults/types')),
+  getActiveFaults: () => d(api.get('/faults/active')),
+  injectFault: (deviceId, faultTypeId, duration, intensity) => d(api.post(`/devices/${deviceId}/fault`, { fault_type_id: faultTypeId, duration, intensity })),
+  getDeviceFault: (deviceId) => d(api.get(`/devices/${deviceId}/fault`)),
+  clearDeviceFault: (deviceId) => d(api.delete(`/devices/${deviceId}/fault`)),
 }
diff --git a/web/src/views/Devices.vue b/web/src/views/Devices.vue
index 022f3ac..65e0535 100644
--- a/web/src/views/Devices.vue
+++ b/web/src/views/Devices.vue
@@ -90,14 +90,62 @@
       <n-modal v-model:show="showPointsModal" preset="card" title="设备测点" style="width:700px">
         <n-data-table :columns="pointColumns" :data="currentPoints" :bordered="false" size="small" />
       </n-modal>
+
+      <!-- 故障注入 Modal -->
+      <n-modal v-model:show="showFaultModal" preset="card" title="故障注入" style="width:480px">
+        <n-space vertical size="medium">
+          <n-text depth="3" style="font-size:13px">设备：{{ faultTargetDevice?.name }}</n-text>
+          <n-form-item label="故障类型" label-placement="left" label-width="80">
+            <n-select
+              v-model:value="faultTypeId"
+              :options="faultTypeOptions"
+              placeholder="选择故障类型"
+              @update:value="onFaultTypeChange"
+            />
+          </n-form-item>
+          <n-alert v-if="selectedFaultType" type="warning" :bordered="false" style="font-size:12px">
+            <div style="font-weight:500;margin-bottom:4px">{{ selectedFaultType.name }} · {{ faultCategoryLabel(selectedFaultType.category) }}</div>
+            <div style="color:#94a3b8">{{ selectedFaultType.description }}</div>
+            <div style="margin-top:6px;color:#94a3b8">
+              影响测点：{{ selectedFaultType.point_faults.map(p => p.point).join('、') }}
+            </div>
+          </n-alert>
+          <n-form-item label="持续时间" label-placement="left" label-width="80">
+            <n-input-number
+              v-model:value="faultDuration"
+              :min="5"
+              :max="3600"
+              style="width:100%"
+            >
+              <template #suffix>秒</template>
+            </n-input-number>
+          </n-form-item>
+          <n-form-item label="故障强度" label-placement="left" label-width="80">
+            <n-space vertical style="width:100%">
+              <n-slider v-model:value="faultIntensity" :min="0.1" :max="1.0" :step="0.1" />
+              <n-text depth="3" style="font-size:12px">
+                {{ faultIntensityLabel }}（{{ faultIntensity }}）
+              </n-text>
+            </n-space>
+          </n-form-item>
+        </n-space>
+        <template #action>
+          <n-space justify="end">
+            <n-button @click="showFaultModal = false">取消</n-button>
+            <n-button type="error" :loading="faultLoading" :disabled="!faultTypeId" @click="doInjectFault">
+              注入故障
+            </n-button>
+          </n-space>
+        </template>
+      </n-modal>
     </n-space>
   </div>
 </template>
 
 <script setup>
-import { ref, computed, onMounted, h } from 'vue'
+import { ref, computed, onMounted, onUnmounted, h } from 'vue'
 import { NSpace, NSelect, NButton, NDataTable, NModal, NForm, NFormItem, NInput, NTag,
-  NSteps, NStep, NText, NAlert, useMessage, useDialog } from 'naive-ui'
+  NSteps, NStep, NText, NAlert, NInputNumber, NSlider, useMessage, useDialog } from 'naive-ui'
 import { useRouter } from 'vue-router'
 import api from '../api.js'
 
@@ -123,6 +171,17 @@ const qcTemplateId = ref(null)
 const qcDeviceName = ref('')
 const qcLoading = ref(false)
 
+// 故障注入状态
+const showFaultModal = ref(false)
+const faultTargetDevice = ref(null)
+const faultTypes = ref([])
+const faultTypeId = ref(null)
+const faultDuration = ref(120)
+const faultIntensity = ref(1.0)
+const faultLoading = ref(false)
+// device_id -> fault info，用于在列表中显示故障状态
+const activeFaults = ref({})
+
 const protocolLabels = {
   modbus_tcp: 'Modbus TCP', modbus_rtu: 'Modbus RTU', opcua: 'OPC-UA', mqtt: 'MQTT',
   http: 'HTTP', gb28181: 'GB28181', bacnet: 'BACnet', s7: 'S7',
@@ -184,13 +243,25 @@ const columns = [
   },
   { title: '测点', key: 'points', width: 70, render: (row) => (row.points || []).length },
   {
-    title: '操作', key: 'actions', width: 280,
+    title: '故障', key: 'fault', width: 90,
+    render: (row) => {
+      const fault = activeFaults.value[row.id]
+      if (!fault || fault.status === 'none') return h(NTag, { size: 'tiny', bordered: false }, () => '正常')
+      const pct = Math.round((fault.progress || 0) * 100)
+      return h(NTag, { size: 'tiny', type: 'error', bordered: false }, () => `${fault.fault_type_name} ${pct}%`)
+    }
+  },
+  {
+    title: '操作', key: 'actions', width: 320,
     render: (row) => h(NSpace, { size: 4 }, () => [
       h(NButton, { size: 'tiny', tertiary: true, onClick: () => viewPoints(row.id) }, () => '测点'),
       h(NButton, { size: 'tiny', tertiary: true, onClick: () => openEditDevice(row) }, () => '编辑'),
       row.status === 'online' || row.status === 'running'
         ? h(NButton, { size: 'tiny', type: 'warning', secondary: true, onClick: () => toggleDevice(row.id, 'stop') }, () => '停止')
         : h(NButton, { size: 'tiny', type: 'primary', secondary: true, onClick: () => toggleDevice(row.id, 'start') }, () => '启动'),
+      activeFaults.value[row.id] && activeFaults.value[row.id].status !== 'none'
+        ? h(NButton, { size: 'tiny', type: 'warning', secondary: true, onClick: () => stopFault(row.id) }, () => '停止故障')
+        : h(NButton, { size: 'tiny', type: 'error', ghost: true, disabled: row.status !== 'online', onClick: () => openFaultModal(row) }, () => '注入故障'),
       h(NButton, { size: 'tiny', type: 'error', secondary: true, onClick: () => confirmDeleteDevice(row) }, () => '删除'),
     ])
   },
@@ -220,13 +291,6 @@ async function doQuickCreate() {
   } finally { qcLoading.value = false }
 }
 
-async function loadData() {
-  try {
-    const [devRes, protoRes, tmplRes] = await Promise.all([api.getDevices(), api.getProtocols(), api.getTemplates()])
-    devices.value = devRes; protocols.value = protoRes; templates.value = tmplRes
-  } catch (e) { message.error('加载数据失败: ' + (e.response?.data?.detail || e.message)) }
-}
-
 async function createDevice() {
   creating.value = true
   try {
@@ -282,5 +346,88 @@ async function viewPoints(id) {
   catch (e) { message.error('读取测点失败: ' + (e.response?.data?.detail || e.message)) }
 }
 
-onMounted(loadData)
+// 故障注入相关
+const faultTypeOptions = computed(() =>
+  faultTypes.value.map(t => ({ label: `${t.name}（${faultCategoryLabel(t.category)}）`, value: t.id }))
+)
+
+const selectedFaultType = computed(() =>
+  faultTypes.value.find(t => t.id === faultTypeId.value) || null
+)
+
+const faultIntensityLabel = computed(() => {
+  const v = faultIntensity.value
+  if (v <= 0.3) return '轻微'
+  if (v <= 0.6) return '中等'
+  if (v <= 0.8) return '严重'
+  return '极严重'
+})
+
+function faultCategoryLabel(category) {
+  const map = { mechanical: '机械', thermal: '热', electrical: '电气', process: '工艺' }
+  return map[category] || category
+}
+
+function onFaultTypeChange(val) {
+  const t = faultTypes.value.find(f => f.id === val)
+  if (t && t.default_duration) faultDuration.value = t.default_duration
+}
+
+function openFaultModal(row) {
+  faultTargetDevice.value = row
+  faultTypeId.value = null
+  faultDuration.value = 120
+  faultIntensity.value = 1.0
+  showFaultModal.value = true
+}
+
+async function doInjectFault() {
+  if (!faultTypeId.value || !faultTargetDevice.value) return
+  faultLoading.value = true
+  try {
+    await api.injectFault(faultTargetDevice.value.id, faultTypeId.value, faultDuration.value, faultIntensity.value)
+    message.success(`已向设备 "${faultTargetDevice.value.name}" 注入故障`)
+    showFaultModal.value = false
+    await loadFaultStatus()
+  } catch (e) {
+    message.error('注入失败: ' + (e.response?.data?.detail || e.message))
+  } finally { faultLoading.value = false }
+}
+
+async function stopFault(deviceId) {
+  try {
+    await api.clearDeviceFault(deviceId)
+    message.success('故障已停止')
+    await loadFaultStatus()
+  } catch (e) {
+    message.error('停止故障失败: ' + (e.response?.data?.detail || e.message))
+  }
+}
+
+async function loadFaultStatus() {
+  try {
+    const list = await api.getActiveFaults()
+    const map = {}
+    for (const f of list) map[f.device_id] = f
+    activeFaults.value = map
+  } catch (e) { /* 静默失败 */ }
+}
+
+async function loadData() {
+  try {
+    const [devRes, protoRes, tmplRes, ftRes] = await Promise.all([
+      api.getDevices(), api.getProtocols(), api.getTemplates(), api.getFaultTypes()
+    ])
+    devices.value = devRes; protocols.value = protoRes; templates.value = tmplRes; faultTypes.value = ftRes
+    await loadFaultStatus()
+  } catch (e) { message.error('加载数据失败: ' + (e.response?.data?.detail || e.message)) }
+}
+
+let faultPollTimer = null
+onMounted(() => {
+  loadData()
+  faultPollTimer = setInterval(loadFaultStatus, 3000)
+})
+
+onUnmounted(() => { if (faultPollTimer) clearInterval(faultPollTimer) })
 </script>

From 550d8e20b98481a5390be716104142d6aec87770 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E5=BC=A0=E5=B0=91=E6=99=BA?=
 <zhangshaozhi@bailongma-inc.com>
Date: Wed, 20 May 2026 19:57:06 +0800
Subject: [PATCH 17/55] feat(ai): support ai

---
 ai/predict.py | 97 +++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 97 insertions(+)
 create mode 100755 ai/predict.py

diff --git a/ai/predict.py b/ai/predict.py
new file mode 100755
index 0000000..b70f822
--- /dev/null
+++ b/ai/predict.py
@@ -0,0 +1,97 @@
+# -*- coding: utf-8 -*-
+
+import requests
+import numpy as np
+from datetime import datetime, timedelta
+
+VM_URL = "http://localhost:8428"
+DEVICE_ID = "fanuc-cnc"
+METRIC = f'feed_rate{{device_id="{DEVICE_ID}"}}'
+
+def fetch_history(minutes=30):
+    """从VM拉取历史数据"""
+    end = datetime.now()
+    start = end - timedelta(minutes=minutes)
+    resp = requests.get(f"{VM_URL}/api/v1/query_range", params={
+        "query": METRIC,
+        "start": start.timestamp(),
+        "end": end.timestamp(),
+        "step": "1s",
+    })
+    result = resp.json()["data"]["result"]
+    if not result:
+        return [], []
+    values = result[0]["values"]
+    ts = [float(v[0]) for v in values]
+    ys = [float(v[1]) for v in values]
+    return ts, ys
+
+def predict_next(ts, ys, horizon=60):
+    """
+    用FFT检测主频，拟合正弦波，外推未来horizon秒
+    适合周期性信号
+    """
+    if len(ys) < 60:
+        return [], []
+
+    ys = np.array(ys)
+    n = len(ys)
+    dt = 1.0  # 1秒采样
+
+    # FFT找主频
+    fft = np.fft.rfft(ys - ys.mean())
+    freqs = np.fft.rfftfreq(n, d=dt)
+    dominant_idx = np.argmax(np.abs(fft[1:])) + 1
+    dominant_freq = freqs[dominant_idx]
+    period = 1.0 / dominant_freq if dominant_freq > 0 else 60
+
+    # 拟合：y = A*sin(2π/T * t + φ) + offset
+    from scipy.optimize import curve_fit
+    t_rel = np.arange(n, dtype=float)
+    offset = ys.mean()
+    amplitude = (ys.max() - ys.min()) / 2
+
+    def sine_model(t, A, T, phi, C):
+        return A * np.sin(2 * np.pi / T * t + phi) + C
+
+    try:
+        popt, _ = curve_fit(
+            sine_model, t_rel, ys,
+            p0=[amplitude, period, 0, offset],
+            maxfev=5000
+        )
+        # 外推
+        t_future = np.arange(n, n + horizon, dtype=float)
+        y_pred = sine_model(t_future, *popt)
+        ts_future = [ts[-1] + i + 1 for i in range(horizon)]
+        return ts_future, y_pred.tolist()
+    except Exception:
+        # 拟合失败降级为线性
+        slope = (ys[-1] - ys[-10]) / 10
+        ts_future = [ts[-1] + i + 1 for i in range(horizon)]
+        y_pred = [ys[-1] + slope * (i + 1) for i in range(horizon)]
+        return ts_future, y_pred
+
+def write_predictions(ts_future, y_pred, metric_name="protoforge_feed_rate_predicted"):
+    """写回VictoriaMetrics"""
+    lines = []
+    for t, y in zip(ts_future, y_pred):
+        ts_ms = int(t * 1000)
+        lines.append(f'{metric_name}{{device_id="{DEVICE_ID}"}} {y:.2f} {ts_ms}')
+    payload = "\n".join(lines)
+    requests.post(f"{VM_URL}/api/v1/import/prometheus", data=payload)
+
+def run_once():
+    ts, ys = fetch_history(minutes=30)
+    if len(ys) < 60:
+        print("数据不足")
+        return
+    ts_future, y_pred = predict_next(ts, ys, horizon=120)
+    write_predictions(ts_future, y_pred)
+    print(f"写入 {len(y_pred)} 个预测点，预测到 +{len(y_pred)}s")
+
+if __name__ == "__main__":
+    import time
+    while True:
+        run_once()
+        time.sleep(30)  # 每30秒重新预测一次

From e8f70d09c27d53bff6c8f310b7a2064496632a33 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E5=BC=A0=E5=B0=91=E6=99=BA?=
 <zhangshaozhi@bailongma-inc.com>
Date: Wed, 20 May 2026 20:29:13 +0800
Subject: [PATCH 18/55] feat(predict_v2): add predict_v2 python file

---
 ai/predict_v2.py | 206 +++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 206 insertions(+)
 create mode 100755 ai/predict_v2.py

diff --git a/ai/predict_v2.py b/ai/predict_v2.py
new file mode 100755
index 0000000..df5dd97
--- /dev/null
+++ b/ai/predict_v2.py
@@ -0,0 +1,206 @@
+# -*- coding: utf-8 -*-
+"""
+ProtoForge 预测服务 v2
+从 VictoriaMetrics 拉取历史数据，用 FFT + 正弦拟合预测未来值，写回 VM。
+预测值时间戳为未来时间，Grafana 中预测线出现在实测线右侧延伸处。
+"""
+
+import logging
+import time
+from datetime import datetime, timedelta
+
+import numpy as np
+import requests
+from scipy.optimize import curve_fit
+
+logging.basicConfig(
+    level=logging.INFO,
+    format="%(asctime)s [%(levelname)s] %(message)s",
+)
+logger = logging.getLogger(__name__)
+
+# ── 配置 ──────────────────────────────────────────────────────────────────────
+VM_URL = "http://localhost:8428"
+
+# 要预测的指标列表，每项：(查询表达式, 写回指标名)
+PREDICT_TARGETS = [
+    ('feed_rate{device_id="fanuc-cnc"}',       "feed_rate_predicted"),
+    ('spindle_speed{device_id="fanuc-cnc"}',    "spindle_speed_predicted"),
+    ('spindle_current{device_id="fanuc-cnc"}',  "spindle_current_predicted"),
+    ('vibration_x{device_id="fanuc-cnc"}',      "vibration_x_predicted"),
+    ('vibration_y{device_id="fanuc-cnc"}',      "vibration_y_predicted"),
+    ('vibration_z{device_id="fanuc-cnc"}',      "vibration_z_predicted"),
+]
+
+HISTORY_MINUTES = 30   # 拉取多少分钟历史数据用于拟合
+HORIZON_SECONDS = 120  # 预测未来多少秒
+POLL_INTERVAL   = 30   # 每隔多少秒重新预测一次
+MIN_POINTS      = 120  # 至少需要多少个历史点才开始预测
+# ─────────────────────────────────────────────────────────────────────────────
+
+
+def fetch_history(query: str, minutes: int = HISTORY_MINUTES):
+    """从 VictoriaMetrics 拉取历史时序数据，返回 (timestamps, values)。"""
+    now = datetime.now()
+    start = now - timedelta(minutes=minutes)
+    try:
+        resp = requests.get(
+            f"{VM_URL}/api/v1/query_range",
+            params={
+                "query": query,
+                "start": start.timestamp(),
+                "end":   now.timestamp(),
+                "step":  "1s",
+            },
+            timeout=10,
+        )
+        resp.raise_for_status()
+    except requests.RequestException as e:
+        logger.error("拉取数据失败 query=%s: %s", query, e)
+        return [], []
+
+    result = resp.json().get("data", {}).get("result", [])
+    if not result:
+        return [], []
+
+    values = result[0]["values"]
+    ts = [float(v[0]) for v in values]
+    ys = [float(v[1]) for v in values]
+    return ts, ys
+
+
+def _sine_model(t, A, T, phi, C):
+    return A * np.sin(2 * np.pi / T * t + phi) + C
+
+
+def predict_next(ts: list, ys: list, horizon: int = HORIZON_SECONDS):
+    """
+    用 FFT 检测主频，拟合正弦波，外推未来 horizon 秒。
+    返回 (future_timestamps, predicted_values)，时间戳均在最后一个真实点之后。
+    降级策略：拟合失败时用最近 10 点线性外推。
+    """
+    ys_arr = np.array(ys)
+    n = len(ys_arr)
+
+    # ── FFT 找主频 ────────────────────────────────────────────────────────────
+    fft_vals = np.fft.rfft(ys_arr - ys_arr.mean())
+    freqs = np.fft.rfftfreq(n, d=1.0)  # d=1 表示 1 秒采样间隔
+    # 跳过直流分量（index 0）
+    dominant_idx = int(np.argmax(np.abs(fft_vals[1:]))) + 1
+    dominant_freq = freqs[dominant_idx]
+    period = 1.0 / dominant_freq if dominant_freq > 0 else 60.0
+    period = float(np.clip(period, 5.0, 3600.0))  # 限制在合理范围
+
+    # ── 正弦拟合 ──────────────────────────────────────────────────────────────
+    t_rel = np.arange(n, dtype=float)
+    amplitude = (ys_arr.max() - ys_arr.min()) / 2.0
+    offset = float(ys_arr.mean())
+
+    # 最后一个真实数据点的 Unix 时间戳（秒）
+    last_ts = ts[-1]
+
+    try:
+        popt, _ = curve_fit(
+            _sine_model,
+            t_rel,
+            ys_arr,
+            p0=[amplitude, period, 0.0, offset],
+            bounds=(
+                [0,       5.0,    -np.pi, ys_arr.min()],
+                [np.inf,  3600.0,  np.pi, ys_arr.max()],
+            ),
+            maxfev=8000,
+        )
+        t_future = np.arange(n, n + horizon, dtype=float)
+        y_pred = _sine_model(t_future, *popt)
+        # 裁剪到历史数据值域，避免外推飞出合理范围
+        y_pred = np.clip(y_pred, ys_arr.min() * 0.5, ys_arr.max() * 1.5)
+
+        # 未来时间戳：last_ts + 1s, +2s, ..., +horizon s
+        ts_future = [last_ts + i + 1 for i in range(horizon)]
+        logger.debug("正弦拟合成功 period=%.1fs amplitude=%.2f", popt[1], popt[0])
+        return ts_future, y_pred.tolist()
+
+    except Exception as e:
+        logger.warning("正弦拟合失败，降级为线性外推: %s", e)
+        tail = min(10, n)
+        slope = (ys_arr[-1] - ys_arr[-tail]) / tail
+        ts_future = [last_ts + i + 1 for i in range(horizon)]
+        y_pred = [float(ys_arr[-1] + slope * (i + 1)) for i in range(horizon)]
+        return ts_future, y_pred
+
+
+def write_predictions(ts_future: list, y_pred: list, metric_name: str, extra_labels: dict = None):
+    """
+    将预测值以 Prometheus exposition 格式写入 VictoriaMetrics。
+    时间戳为毫秒级 Unix 时间戳，对应未来时间点。
+    """
+    label_str = ""
+    if extra_labels:
+        parts = [f'{k}="{v}"' for k, v in extra_labels.items()]
+        label_str = "{" + ",".join(parts) + "}"
+
+    lines = []
+    for t, y in zip(ts_future, y_pred):
+        ts_ms = int(t * 1000)
+        lines.append(f"{metric_name}{label_str} {y:.4f} {ts_ms}")
+
+    payload = "\n".join(lines)
+    try:
+        resp = requests.post(
+            f"{VM_URL}/api/v1/import/prometheus",
+            data=payload,
+            timeout=10,
+        )
+        resp.raise_for_status()
+    except requests.RequestException as e:
+        logger.error("写入预测数据失败 metric=%s: %s", metric_name, e)
+
+
+def _parse_labels(query: str) -> dict:
+    """从查询表达式中解析标签，如 feed_rate{device_id="fanuc-cnc"} → {"device_id": "fanuc-cnc"}"""
+    labels = {}
+    if "{" not in query:
+        return labels
+    label_part = query[query.index("{") + 1: query.index("}")]
+    for item in label_part.split(","):
+        if "=" in item:
+            k, v = item.split("=", 1)
+            labels[k.strip()] = v.strip().strip('"')
+    return labels
+
+
+def run_once():
+    now_str = datetime.now().strftime("%H:%M:%S")
+    for query, pred_metric in PREDICT_TARGETS:
+        ts, ys = fetch_history(query)
+        if len(ys) < MIN_POINTS:
+            logger.info("[%s] %s 数据不足（%d 点），跳过", now_str, query, len(ys))
+            continue
+
+        ts_future, y_pred = predict_next(ts, ys, horizon=HORIZON_SECONDS)
+        if not ts_future:
+            continue
+
+        extra_labels = _parse_labels(query)
+        write_predictions(ts_future, y_pred, pred_metric, extra_labels)
+
+        future_time = datetime.fromtimestamp(ts_future[-1]).strftime("%H:%M:%S")
+        logger.info(
+            "[%s] %-40s → %-35s 写入 %d 点，预测至 %s",
+            now_str, query, pred_metric, len(y_pred), future_time,
+        )
+
+
+def main():
+    logger.info(
+        "预测服务启动  VM=%s  预测窗口=%ds  轮询间隔=%ds",
+        VM_URL, HORIZON_SECONDS, POLL_INTERVAL,
+    )
+    while True:
+        run_once()
+        time.sleep(POLL_INTERVAL)
+
+
+if __name__ == "__main__":
+    main()

From 57df20284645347aba7de4e2399640e05b6d0b3c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E5=BC=A0=E5=B0=91=E6=99=BA?=
 <zhangshaozhi@bailongma-inc.com>
Date: Wed, 20 May 2026 21:13:52 +0800
Subject: [PATCH 19/55] fix

---
 ai/predict_v2.py | 536 +++++++++++++++++++++++++++++++++++++++--------
 1 file changed, 450 insertions(+), 86 deletions(-)

diff --git a/ai/predict_v2.py b/ai/predict_v2.py
index df5dd97..bc425c8 100755
--- a/ai/predict_v2.py
+++ b/ai/predict_v2.py
@@ -1,56 +1,93 @@
 # -*- coding: utf-8 -*-
 """
-ProtoForge 预测服务 v2
-从 VictoriaMetrics 拉取历史数据，用 FFT + 正弦拟合预测未来值，写回 VM。
-预测值时间戳为未来时间，Grafana 中预测线出现在实测线右侧延伸处。
+ProtoForge 预测服务 v3
+
+修复点：
+1. 解决 HORIZON_SECONDS > POLL_INTERVAL 时，多轮预测窗口重叠导致 Grafana 出现毛刺/竖线问题。
+2. 每轮写入新预测前，删除同一个预测 metric 的旧预测序列，只保留最新一轮预测。
+3. 预测时间戳按整秒写入，避免毫秒时间戳和 Grafana step 不对齐。
+4. 拟合使用真实 timestamp 相对时间，不再假设历史数据严格 1 秒等间隔。
+5. 对历史数据做排序、去重、NaN/Inf 清洗。
 """
 
 import logging
+import math
+import re
 import time
 from datetime import datetime, timedelta
+from typing import Dict, List, Tuple
 
 import numpy as np
 import requests
 from scipy.optimize import curve_fit
 
+
 logging.basicConfig(
     level=logging.INFO,
     format="%(asctime)s [%(levelname)s] %(message)s",
 )
+
 logger = logging.getLogger(__name__)
 
+
 # ── 配置 ──────────────────────────────────────────────────────────────────────
+
 VM_URL = "http://localhost:8428"
 
-# 要预测的指标列表，每项：(查询表达式, 写回指标名)
 PREDICT_TARGETS = [
-    ('feed_rate{device_id="fanuc-cnc"}',       "feed_rate_predicted"),
-    ('spindle_speed{device_id="fanuc-cnc"}',    "spindle_speed_predicted"),
-    ('spindle_current{device_id="fanuc-cnc"}',  "spindle_current_predicted"),
-    ('vibration_x{device_id="fanuc-cnc"}',      "vibration_x_predicted"),
-    ('vibration_y{device_id="fanuc-cnc"}',      "vibration_y_predicted"),
-    ('vibration_z{device_id="fanuc-cnc"}',      "vibration_z_predicted"),
+    ('feed_rate{device_id="fanuc-cnc"}', "feed_rate_predicted"),
+    ('spindle_speed{device_id="fanuc-cnc"}', "spindle_speed_predicted"),
+    ('spindle_current{device_id="fanuc-cnc"}', "spindle_current_predicted"),
+    ('vibration_x{device_id="fanuc-cnc"}', "vibration_x_predicted"),
+    ('vibration_y{device_id="fanuc-cnc"}', "vibration_y_predicted"),
+    ('vibration_z{device_id="fanuc-cnc"}', "vibration_z_predicted"),
 ]
 
-HISTORY_MINUTES = 30   # 拉取多少分钟历史数据用于拟合
-HORIZON_SECONDS = 120  # 预测未来多少秒
-POLL_INTERVAL   = 30   # 每隔多少秒重新预测一次
-MIN_POINTS      = 120  # 至少需要多少个历史点才开始预测
+HISTORY_MINUTES = 30
+HORIZON_SECONDS = 120
+POLL_INTERVAL = 30
+MIN_POINTS = 120
+QUERY_STEP = "1s"
+
+# 关键修复：每轮写入前删除旧预测，避免 120s 预测窗口和 30s 轮询周期重叠
+CLEAR_OLD_PREDICTIONS = True
+
+# 如果删除旧预测失败，是否跳过本轮写入。
+# 建议 True，避免继续叠加脏数据。
+SKIP_WRITE_IF_CLEAR_FAILED = True
+
+# 给新预测数据加一个稳定标签，方便 Grafana 查询过滤。
+# Grafana 可以查询：feed_rate_predicted{device_id="fanuc-cnc",forecast="latest"}
+EXTRA_PREDICT_LABELS = {
+    "forecast": "latest",
+    "source": "protoforge",
+}
+
+# 正弦周期限制
+MIN_PERIOD_SECONDS = 5.0
+MAX_PERIOD_SECONDS = 3600.0
+
 # ─────────────────────────────────────────────────────────────────────────────
 
 
-def fetch_history(query: str, minutes: int = HISTORY_MINUTES):
-    """从 VictoriaMetrics 拉取历史时序数据，返回 (timestamps, values)。"""
+def fetch_history(query: str, minutes: int = HISTORY_MINUTES) -> Tuple[List[float], List[float]]:
+    """
+    从 VictoriaMetrics 拉取历史时序数据。
+    返回：
+        timestamps: Unix 秒级时间戳
+        values: float 数值
+    """
     now = datetime.now()
     start = now - timedelta(minutes=minutes)
+
     try:
         resp = requests.get(
             f"{VM_URL}/api/v1/query_range",
             params={
                 "query": query,
                 "start": start.timestamp(),
-                "end":   now.timestamp(),
-                "step":  "1s",
+                "end": now.timestamp(),
+                "step": QUERY_STEP,
             },
             timeout=10,
         )
@@ -59,148 +96,475 @@ def fetch_history(query: str, minutes: int = HISTORY_MINUTES):
         logger.error("拉取数据失败 query=%s: %s", query, e)
         return [], []
 
-    result = resp.json().get("data", {}).get("result", [])
+    try:
+        result = resp.json().get("data", {}).get("result", [])
+    except Exception as e:
+        logger.error("解析 VM 返回失败 query=%s: %s", query, e)
+        return [], []
+
     if not result:
         return [], []
 
-    values = result[0]["values"]
-    ts = [float(v[0]) for v in values]
-    ys = [float(v[1]) for v in values]
+    values = result[0].get("values", [])
+    if not values:
+        return [], []
+
+    ts = []
+    ys = []
+
+    for item in values:
+        if len(item) < 2:
+            continue
+
+        try:
+            t = float(item[0])
+            y = float(item[1])
+        except Exception:
+            continue
+
+        if not math.isfinite(t) or not math.isfinite(y):
+            continue
+
+        ts.append(t)
+        ys.append(y)
+
     return ts, ys
 
 
-def _sine_model(t, A, T, phi, C):
-    return A * np.sin(2 * np.pi / T * t + phi) + C
+def normalize_history(ts: List[float], ys: List[float]) -> Tuple[np.ndarray, np.ndarray]:
+    """
+    清洗历史数据：
+    1. 转换为整秒时间戳
+    2. 排序
+    3. 同一秒多个值时保留最后一个
+    4. 插值补齐中间缺失秒
+    """
+    if not ts or not ys or len(ts) != len(ys):
+        return np.array([]), np.array([])
+
+    data = {}
+
+    for t, y in zip(ts, ys):
+        try:
+            sec = int(round(float(t)))
+            val = float(y)
+        except Exception:
+            continue
+
+        if not math.isfinite(sec) or not math.isfinite(val):
+            continue
+
+        data[sec] = val
+
+    if not data:
+        return np.array([]), np.array([])
+
+    sorted_items = sorted(data.items(), key=lambda x: x[0])
 
+    ts_clean = np.array([x[0] for x in sorted_items], dtype=float)
+    ys_clean = np.array([x[1] for x in sorted_items], dtype=float)
 
-def predict_next(ts: list, ys: list, horizon: int = HORIZON_SECONDS):
+    if len(ts_clean) < 2:
+        return ts_clean, ys_clean
+
+    start_sec = int(ts_clean[0])
+    end_sec = int(ts_clean[-1])
+
+    if end_sec <= start_sec:
+        return ts_clean, ys_clean
+
+    # 统一为 1 秒网格，减少 query_range 缺点、抖动、缺失点对 FFT 的影响
+    ts_grid = np.arange(start_sec, end_sec + 1, 1, dtype=float)
+    ys_grid = np.interp(ts_grid, ts_clean, ys_clean)
+
+    return ts_grid, ys_grid
+
+
+def _sine_model(t: np.ndarray, A: float, T: float, phi: float, C: float) -> np.ndarray:
+    return A * np.sin(2.0 * np.pi / T * t + phi) + C
+
+
+def estimate_period_by_fft(ys_arr: np.ndarray) -> float:
     """
-    用 FFT 检测主频，拟合正弦波，外推未来 horizon 秒。
-    返回 (future_timestamps, predicted_values)，时间戳均在最后一个真实点之后。
-    降级策略：拟合失败时用最近 10 点线性外推。
+    使用 FFT 估算主周期。
+    ys_arr 默认是 1 秒间隔。
     """
-    ys_arr = np.array(ys)
     n = len(ys_arr)
 
-    # ── FFT 找主频 ────────────────────────────────────────────────────────────
-    fft_vals = np.fft.rfft(ys_arr - ys_arr.mean())
-    freqs = np.fft.rfftfreq(n, d=1.0)  # d=1 表示 1 秒采样间隔
-    # 跳过直流分量（index 0）
-    dominant_idx = int(np.argmax(np.abs(fft_vals[1:]))) + 1
-    dominant_freq = freqs[dominant_idx]
-    period = 1.0 / dominant_freq if dominant_freq > 0 else 60.0
-    period = float(np.clip(period, 5.0, 3600.0))  # 限制在合理范围
+    if n < 4:
+        return 60.0
+
+    centered = ys_arr - np.mean(ys_arr)
+
+    if np.allclose(centered, 0):
+        return 60.0
+
+    fft_vals = np.fft.rfft(centered)
+    freqs = np.fft.rfftfreq(n, d=1.0)
+
+    if len(freqs) <= 1:
+        return 60.0
+
+    # 跳过直流分量 index 0
+    power = np.abs(fft_vals[1:])
+    if len(power) == 0 or np.max(power) <= 0:
+        return 60.0
+
+    dominant_idx = int(np.argmax(power)) + 1
+    dominant_freq = float(freqs[dominant_idx])
+
+    if dominant_freq <= 0:
+        return 60.0
+
+    period = 1.0 / dominant_freq
+    period = float(np.clip(period, MIN_PERIOD_SECONDS, MAX_PERIOD_SECONDS))
+
+    return period
 
-    # ── 正弦拟合 ──────────────────────────────────────────────────────────────
-    t_rel = np.arange(n, dtype=float)
-    amplitude = (ys_arr.max() - ys_arr.min()) / 2.0
-    offset = float(ys_arr.mean())
 
-    # 最后一个真实数据点的 Unix 时间戳（秒）
-    last_ts = ts[-1]
+def predict_next(
+    ts: List[float],
+    ys: List[float],
+    horizon: int = HORIZON_SECONDS,
+    start_from_now: bool = True,
+) -> Tuple[List[float], List[float]]:
+    """
+    用 FFT 检测主频，拟合正弦波，外推未来 horizon 秒。
+    返回：
+        future_timestamps: 未来整秒时间戳
+        predicted_values: 预测值
+    """
+    ts_grid, ys_grid = normalize_history(ts, ys)
+
+    if len(ys_grid) < MIN_POINTS:
+        return [], []
+
+    n = len(ys_grid)
+
+    y_min = float(np.min(ys_grid))
+    y_max = float(np.max(ys_grid))
+    y_mean = float(np.mean(ys_grid))
+    y_range = y_max - y_min
+
+    # 数据几乎不波动时，直接使用最后一个值保持
+    if y_range <= 1e-9:
+        base_ts = int(time.time()) if start_from_now else int(ts_grid[-1])
+        base_ts = max(base_ts, int(ts_grid[-1]))
+
+        ts_future = [base_ts + i + 1 for i in range(horizon)]
+        y_pred = [float(ys_grid[-1])] * horizon
+        return ts_future, y_pred
+
+    period = estimate_period_by_fft(ys_grid)
+
+    # 用真实时间戳做相对时间，而不是 np.arange(n)
+    t_fit = ts_grid - ts_grid[0]
+
+    amplitude = y_range / 2.0
+    offset = y_mean
+
+    # 预测起点统一对齐到整秒
+    if start_from_now:
+        base_ts = int(time.time())
+    else:
+        base_ts = int(ts_grid[-1])
+
+    # 避免因为 VM 查询延迟导致预测点落在最后一个真实点之前
+    base_ts = max(base_ts, int(ts_grid[-1]))
+
+    ts_future_arr = np.arange(base_ts + 1, base_ts + 1 + horizon, 1, dtype=float)
+    t_future = ts_future_arr - ts_grid[0]
 
     try:
         popt, _ = curve_fit(
             _sine_model,
-            t_rel,
-            ys_arr,
+            t_fit,
+            ys_grid,
             p0=[amplitude, period, 0.0, offset],
             bounds=(
-                [0,       5.0,    -np.pi, ys_arr.min()],
-                [np.inf,  3600.0,  np.pi, ys_arr.max()],
+                [0.0, MIN_PERIOD_SECONDS, -2.0 * np.pi, y_min - y_range],
+                [np.inf, MAX_PERIOD_SECONDS, 2.0 * np.pi, y_max + y_range],
             ),
-            maxfev=8000,
+            maxfev=12000,
+        )
+
+        y_pred_arr = _sine_model(t_future, *popt)
+
+        # 裁剪到合理范围，避免拟合异常时飞出去
+        margin = y_range * 0.2
+        lower = y_min - margin
+        upper = y_max + margin
+        y_pred_arr = np.clip(y_pred_arr, lower, upper)
+
+        if not np.all(np.isfinite(y_pred_arr)):
+            raise ValueError("预测结果包含 NaN/Inf")
+
+        logger.debug(
+            "正弦拟合成功 period=%.2fs amplitude=%.4f offset=%.4f",
+            popt[1],
+            popt[0],
+            popt[3],
         )
-        t_future = np.arange(n, n + horizon, dtype=float)
-        y_pred = _sine_model(t_future, *popt)
-        # 裁剪到历史数据值域，避免外推飞出合理范围
-        y_pred = np.clip(y_pred, ys_arr.min() * 0.5, ys_arr.max() * 1.5)
 
-        # 未来时间戳：last_ts + 1s, +2s, ..., +horizon s
-        ts_future = [last_ts + i + 1 for i in range(horizon)]
-        logger.debug("正弦拟合成功 period=%.1fs amplitude=%.2f", popt[1], popt[0])
-        return ts_future, y_pred.tolist()
+        return ts_future_arr.tolist(), y_pred_arr.astype(float).tolist()
 
     except Exception as e:
-        logger.warning("正弦拟合失败，降级为线性外推: %s", e)
+        logger.warning("正弦拟合失败，降级为最近值平滑外推: %s", e)
+
+        # 降级策略：用最近 10 个点的均值保持，避免线性外推越走越偏
         tail = min(10, n)
-        slope = (ys_arr[-1] - ys_arr[-tail]) / tail
-        ts_future = [last_ts + i + 1 for i in range(horizon)]
-        y_pred = [float(ys_arr[-1] + slope * (i + 1)) for i in range(horizon)]
+        last_value = float(np.mean(ys_grid[-tail:]))
+
+        ts_future = ts_future_arr.tolist()
+        y_pred = [last_value] * horizon
+
         return ts_future, y_pred
 
 
-def write_predictions(ts_future: list, y_pred: list, metric_name: str, extra_labels: dict = None):
+def prom_escape_label_value(value: str) -> str:
+    """
+    Prometheus exposition label value 转义。
+    """
+    return (
+        str(value)
+        .replace("\\", "\\\\")
+        .replace("\n", "\\n")
+        .replace('"', '\\"')
+    )
+
+
+def build_selector(metric_name: str, labels: Dict[str, str]) -> str:
+    """
+    构造 PromQL selector，用于 delete_series。
+
+    示例：
+        feed_rate_predicted{device_id="fanuc-cnc"}
+    """
+    if not labels:
+        return metric_name
+
+    parts = []
+    for k in sorted(labels.keys()):
+        v = prom_escape_label_value(labels[k])
+        parts.append(f'{k}="{v}"')
+
+    return f'{metric_name}' + "{" + ",".join(parts) + "}"
+
+
+def delete_old_predictions(metric_name: str, base_labels: Dict[str, str]) -> bool:
+    """
+    删除旧预测序列，避免多轮预测窗口重叠。
+
+    注意：
+    这里故意只用 base_labels，比如 device_id。
+    不带 forecast/source 标签，是为了兼容旧版本脚本写入的无 forecast 标签数据。
+    """
+    selector = build_selector(metric_name, base_labels)
+
+    try:
+        resp = requests.post(
+            f"{VM_URL}/api/v1/admin/tsdb/delete_series",
+            params=[("match[]", selector)],
+            timeout=10,
+        )
+
+        if resp.status_code not in (200, 204):
+            logger.error(
+                "删除旧预测数据失败 metric=%s selector=%s status=%s body=%s",
+                metric_name,
+                selector,
+                resp.status_code,
+                resp.text[:500],
+            )
+            return False
+
+        logger.debug("已删除旧预测数据 selector=%s", selector)
+        return True
+
+    except requests.RequestException as e:
+        logger.error("删除旧预测数据异常 metric=%s selector=%s: %s", metric_name, selector, e)
+        return False
+
+
+def write_predictions(
+    ts_future: List[float],
+    y_pred: List[float],
+    metric_name: str,
+    labels: Dict[str, str] = None,
+) -> bool:
     """
     将预测值以 Prometheus exposition 格式写入 VictoriaMetrics。
-    时间戳为毫秒级 Unix 时间戳，对应未来时间点。
+    时间戳为毫秒级 Unix timestamp。
     """
+    if labels is None:
+        labels = {}
+
+    if not ts_future or not y_pred or len(ts_future) != len(y_pred):
+        logger.warning("预测数据为空或长度不一致 metric=%s", metric_name)
+        return False
+
     label_str = ""
-    if extra_labels:
-        parts = [f'{k}="{v}"' for k, v in extra_labels.items()]
+    if labels:
+        parts = []
+        for k in sorted(labels.keys()):
+            v = prom_escape_label_value(labels[k])
+            parts.append(f'{k}="{v}"')
         label_str = "{" + ",".join(parts) + "}"
 
     lines = []
+
     for t, y in zip(ts_future, y_pred):
-        ts_ms = int(t * 1000)
-        lines.append(f"{metric_name}{label_str} {y:.4f} {ts_ms}")
+        try:
+            ts_sec = int(round(float(t)))
+            val = float(y)
+        except Exception:
+            continue
+
+        if not math.isfinite(ts_sec) or not math.isfinite(val):
+            continue
+
+        ts_ms = ts_sec * 1000
+        lines.append(f"{metric_name}{label_str} {val:.6f} {ts_ms}")
+
+    if not lines:
+        logger.warning("没有可写入的预测点 metric=%s", metric_name)
+        return False
+
+    payload = "\n".join(lines) + "\n"
 
-    payload = "\n".join(lines)
     try:
         resp = requests.post(
             f"{VM_URL}/api/v1/import/prometheus",
-            data=payload,
+            data=payload.encode("utf-8"),
+            headers={
+                "Content-Type": "text/plain; version=0.0.4; charset=utf-8",
+            },
             timeout=10,
         )
         resp.raise_for_status()
+        return True
+
     except requests.RequestException as e:
         logger.error("写入预测数据失败 metric=%s: %s", metric_name, e)
+        return False
+
+
+_LABEL_PATTERN = re.compile(r'\s*([a-zA-Z_][a-zA-Z0-9_]*)\s*=\s*"((?:\\.|[^"])*)"\s*')
 
 
-def _parse_labels(query: str) -> dict:
-    """从查询表达式中解析标签，如 feed_rate{device_id="fanuc-cnc"} → {"device_id": "fanuc-cnc"}"""
+def _parse_labels(query: str) -> Dict[str, str]:
+    """
+    从查询表达式中解析标签。
+
+    示例：
+        feed_rate{device_id="fanuc-cnc"} -> {"device_id": "fanuc-cnc"}
+    """
     labels = {}
-    if "{" not in query:
+
+    if "{" not in query or "}" not in query:
         return labels
-    label_part = query[query.index("{") + 1: query.index("}")]
-    for item in label_part.split(","):
-        if "=" in item:
-            k, v = item.split("=", 1)
-            labels[k.strip()] = v.strip().strip('"')
+
+    try:
+        label_part = query[query.index("{") + 1: query.rindex("}")]
+    except Exception:
+        return labels
+
+    for match in _LABEL_PATTERN.finditer(label_part):
+        key = match.group(1)
+        value = match.group(2)
+        value = value.replace('\\"', '"').replace("\\n", "\n").replace("\\\\", "\\")
+        labels[key] = value
+
     return labels
 
 
+def merge_labels(*dicts: Dict[str, str]) -> Dict[str, str]:
+    result = {}
+
+    for d in dicts:
+        if not d:
+            continue
+        result.update(d)
+
+    return result
+
+
 def run_once():
     now_str = datetime.now().strftime("%H:%M:%S")
+
     for query, pred_metric in PREDICT_TARGETS:
         ts, ys = fetch_history(query)
+
         if len(ys) < MIN_POINTS:
             logger.info("[%s] %s 数据不足（%d 点），跳过", now_str, query, len(ys))
             continue
 
-        ts_future, y_pred = predict_next(ts, ys, horizon=HORIZON_SECONDS)
-        if not ts_future:
+        ts_future, y_pred = predict_next(
+            ts,
+            ys,
+            horizon=HORIZON_SECONDS,
+            start_from_now=True,
+        )
+
+        if not ts_future or not y_pred:
+            logger.warning("[%s] %s 预测结果为空，跳过", now_str, query)
+            continue
+
+        base_labels = _parse_labels(query)
+
+        # 先删除旧预测，再写入新预测。
+        # 删除条件只带 base_labels，兼容老版本无 forecast/source 标签的脏数据。
+        if CLEAR_OLD_PREDICTIONS:
+            clear_ok = delete_old_predictions(pred_metric, base_labels)
+
+            if not clear_ok and SKIP_WRITE_IF_CLEAR_FAILED:
+                logger.error(
+                    "[%s] %s 删除旧预测失败，为避免继续制造重叠数据，本轮跳过写入",
+                    now_str,
+                    pred_metric,
+                )
+                continue
+
+        write_labels = merge_labels(base_labels, EXTRA_PREDICT_LABELS)
+
+        ok = write_predictions(
+            ts_future=ts_future,
+            y_pred=y_pred,
+            metric_name=pred_metric,
+            labels=write_labels,
+        )
+
+        if not ok:
             continue
 
-        extra_labels = _parse_labels(query)
-        write_predictions(ts_future, y_pred, pred_metric, extra_labels)
+        future_start = datetime.fromtimestamp(ts_future[0]).strftime("%H:%M:%S")
+        future_end = datetime.fromtimestamp(ts_future[-1]).strftime("%H:%M:%S")
 
-        future_time = datetime.fromtimestamp(ts_future[-1]).strftime("%H:%M:%S")
         logger.info(
-            "[%s] %-40s → %-35s 写入 %d 点，预测至 %s",
-            now_str, query, pred_metric, len(y_pred), future_time,
+            "[%s] %-40s → %-35s 写入 %d 点，预测区间 %s ~ %s",
+            now_str,
+            query,
+            pred_metric,
+            len(y_pred),
+            future_start,
+            future_end,
         )
 
 
 def main():
     logger.info(
-        "预测服务启动  VM=%s  预测窗口=%ds  轮询间隔=%ds",
-        VM_URL, HORIZON_SECONDS, POLL_INTERVAL,
+        "预测服务启动 VM=%s 历史窗口=%dmin 预测窗口=%ds 轮询间隔=%ds 清理旧预测=%s",
+        VM_URL,
+        HISTORY_MINUTES,
+        HORIZON_SECONDS,
+        POLL_INTERVAL,
+        CLEAR_OLD_PREDICTIONS,
     )
+
     while True:
         run_once()
         time.sleep(POLL_INTERVAL)
 
 
 if __name__ == "__main__":
-    main()
+    main()
\ No newline at end of file

From 88aec295671ca112fd422a28acd4d76d43a82f20 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E5=BC=A0=E5=B0=91=E6=99=BA?=
 <zhangshaozhi@bailongma-inc.com>
Date: Wed, 20 May 2026 21:21:11 +0800
Subject: [PATCH 20/55] fix

---
 ai/predict_v2.py | 263 +++++++++++++++++++++++------------------------
 1 file changed, 128 insertions(+), 135 deletions(-)

diff --git a/ai/predict_v2.py b/ai/predict_v2.py
index bc425c8..f631e12 100755
--- a/ai/predict_v2.py
+++ b/ai/predict_v2.py
@@ -1,13 +1,14 @@
 # -*- coding: utf-8 -*-
 """
-ProtoForge 预测服务 v3
+ProtoForge 预测服务 v4
 
 修复点：
-1. 解决 HORIZON_SECONDS > POLL_INTERVAL 时，多轮预测窗口重叠导致 Grafana 出现毛刺/竖线问题。
-2. 每轮写入新预测前，删除同一个预测 metric 的旧预测序列，只保留最新一轮预测。
-3. 预测时间戳按整秒写入，避免毫秒时间戳和 Grafana step 不对齐。
-4. 拟合使用真实 timestamp 相对时间，不再假设历史数据严格 1 秒等间隔。
-5. 对历史数据做排序、去重、NaN/Inf 清洗。
+1. 不再使用 VictoriaMetrics delete_series，避免预测历史被整条删除。
+2. 不再每 30 秒写未来 120 秒，避免多轮预测窗口重叠导致 Grafana 出现竖线/毛刺。
+3. 每轮只写未来 min(HORIZON_SECONDS, POLL_INTERVAL) 秒的数据。
+4. 使用 forecast="rolling_v2" 新标签，避免和上一版 forecast="latest" 的旧预测数据混在一起。
+5. 使用真实 timestamp 做拟合，不假设采样严格等间隔。
+6. 拟合失败时不再简单写平直线，而是尽量重复最近一个周期的波形。
 """
 
 import logging
@@ -44,29 +45,35 @@
 ]
 
 HISTORY_MINUTES = 30
+
+# 理论预测窗口
 HORIZON_SECONDS = 120
+
+# 轮询间隔
 POLL_INTERVAL = 30
+
+# 实际写入窗口。
+# 关键点：实际写入窗口不要大于轮询间隔，否则不同批次预测会重叠。
+WRITE_HORIZON_SECONDS = min(HORIZON_SECONDS, POLL_INTERVAL)
+
 MIN_POINTS = 120
 QUERY_STEP = "1s"
 
-# 关键修复：每轮写入前删除旧预测，避免 120s 预测窗口和 30s 轮询周期重叠
-CLEAR_OLD_PREDICTIONS = True
-
-# 如果删除旧预测失败，是否跳过本轮写入。
-# 建议 True，避免继续叠加脏数据。
-SKIP_WRITE_IF_CLEAR_FAILED = True
+# 不要再清理旧预测，否则历史预测会被整条删除。
+CLEAR_OLD_PREDICTIONS = False
 
-# 给新预测数据加一个稳定标签，方便 Grafana 查询过滤。
-# Grafana 可以查询：feed_rate_predicted{device_id="fanuc-cnc",forecast="latest"}
+# 使用新标签，避免和上一版 forecast="latest" 数据混在一起。
 EXTRA_PREDICT_LABELS = {
-    "forecast": "latest",
+    "forecast": "rolling_v2",
     "source": "protoforge",
 }
 
-# 正弦周期限制
 MIN_PERIOD_SECONDS = 5.0
 MAX_PERIOD_SECONDS = 3600.0
 
+# 进程内记录每条预测序列上次写到哪里，避免本进程运行期间重复写同一时间段
+LAST_WRITTEN_UNTIL: Dict[str, int] = {}
+
 # ─────────────────────────────────────────────────────────────────────────────
 
 
@@ -134,10 +141,10 @@ def fetch_history(query: str, minutes: int = HISTORY_MINUTES) -> Tuple[List[floa
 def normalize_history(ts: List[float], ys: List[float]) -> Tuple[np.ndarray, np.ndarray]:
     """
     清洗历史数据：
-    1. 转换为整秒时间戳
+    1. 时间戳转为整秒
     2. 排序
     3. 同一秒多个值时保留最后一个
-    4. 插值补齐中间缺失秒
+    4. 插值补齐缺失秒
     """
     if not ts or not ys or len(ts) != len(ys):
         return np.array([]), np.array([])
@@ -173,7 +180,6 @@ def normalize_history(ts: List[float], ys: List[float]) -> Tuple[np.ndarray, np.
     if end_sec <= start_sec:
         return ts_clean, ys_clean
 
-    # 统一为 1 秒网格，减少 query_range 缺点、抖动、缺失点对 FFT 的影响
     ts_grid = np.arange(start_sec, end_sec + 1, 1, dtype=float)
     ys_grid = np.interp(ts_grid, ts_clean, ys_clean)
 
@@ -187,7 +193,7 @@ def _sine_model(t: np.ndarray, A: float, T: float, phi: float, C: float) -> np.n
 def estimate_period_by_fft(ys_arr: np.ndarray) -> float:
     """
     使用 FFT 估算主周期。
-    ys_arr 默认是 1 秒间隔。
+    ys_arr 默认已经是 1 秒间隔。
     """
     n = len(ys_arr)
 
@@ -205,8 +211,8 @@ def estimate_period_by_fft(ys_arr: np.ndarray) -> float:
     if len(freqs) <= 1:
         return 60.0
 
-    # 跳过直流分量 index 0
     power = np.abs(fft_vals[1:])
+
     if len(power) == 0 or np.max(power) <= 0:
         return 60.0
 
@@ -222,59 +228,84 @@ def estimate_period_by_fft(ys_arr: np.ndarray) -> float:
     return period
 
 
+def repeat_last_period(
+    ts_grid: np.ndarray,
+    ys_grid: np.ndarray,
+    ts_future_arr: np.ndarray,
+    period_seconds: float,
+) -> np.ndarray:
+    """
+    拟合失败时的降级策略：
+    不直接写平直线，而是把未来时间映射回最近一个周期的历史波形。
+    """
+    if len(ts_grid) < 2:
+        return np.full_like(ts_future_arr, float(ys_grid[-1]), dtype=float)
+
+    period = max(int(round(period_seconds)), 1)
+
+    y_pred = []
+
+    hist_start = float(ts_grid[0])
+    hist_end = float(ts_grid[-1])
+
+    for future_ts in ts_future_arr:
+        mapped_ts = float(future_ts)
+
+        while mapped_ts > hist_end:
+            mapped_ts -= period
+
+        while mapped_ts < hist_start:
+            mapped_ts += period
+
+        val = float(np.interp(mapped_ts, ts_grid, ys_grid))
+        y_pred.append(val)
+
+    return np.array(y_pred, dtype=float)
+
+
 def predict_next(
     ts: List[float],
     ys: List[float],
-    horizon: int = HORIZON_SECONDS,
-    start_from_now: bool = True,
+    horizon: int,
+    base_ts: int,
 ) -> Tuple[List[float], List[float]]:
     """
     用 FFT 检测主频，拟合正弦波，外推未来 horizon 秒。
-    返回：
-        future_timestamps: 未来整秒时间戳
-        predicted_values: 预测值
+
+    base_ts:
+        从 base_ts + 1 开始写预测。
     """
     ts_grid, ys_grid = normalize_history(ts, ys)
 
     if len(ys_grid) < MIN_POINTS:
         return [], []
 
-    n = len(ys_grid)
-
     y_min = float(np.min(ys_grid))
     y_max = float(np.max(ys_grid))
     y_mean = float(np.mean(ys_grid))
     y_range = y_max - y_min
 
-    # 数据几乎不波动时，直接使用最后一个值保持
-    if y_range <= 1e-9:
-        base_ts = int(time.time()) if start_from_now else int(ts_grid[-1])
-        base_ts = max(base_ts, int(ts_grid[-1]))
+    base_ts = max(int(base_ts), int(ts_grid[-1]))
+
+    ts_future_arr = np.arange(
+        base_ts + 1,
+        base_ts + 1 + horizon,
+        1,
+        dtype=float,
+    )
 
-        ts_future = [base_ts + i + 1 for i in range(horizon)]
-        y_pred = [float(ys_grid[-1])] * horizon
-        return ts_future, y_pred
+    if y_range <= 1e-9:
+        y_pred_arr = np.full_like(ts_future_arr, float(ys_grid[-1]), dtype=float)
+        return ts_future_arr.tolist(), y_pred_arr.tolist()
 
     period = estimate_period_by_fft(ys_grid)
 
-    # 用真实时间戳做相对时间，而不是 np.arange(n)
     t_fit = ts_grid - ts_grid[0]
+    t_future = ts_future_arr - ts_grid[0]
 
     amplitude = y_range / 2.0
     offset = y_mean
 
-    # 预测起点统一对齐到整秒
-    if start_from_now:
-        base_ts = int(time.time())
-    else:
-        base_ts = int(ts_grid[-1])
-
-    # 避免因为 VM 查询延迟导致预测点落在最后一个真实点之前
-    base_ts = max(base_ts, int(ts_grid[-1]))
-
-    ts_future_arr = np.arange(base_ts + 1, base_ts + 1 + horizon, 1, dtype=float)
-    t_future = ts_future_arr - ts_grid[0]
-
     try:
         popt, _ = curve_fit(
             _sine_model,
@@ -290,7 +321,6 @@ def predict_next(
 
         y_pred_arr = _sine_model(t_future, *popt)
 
-        # 裁剪到合理范围，避免拟合异常时飞出去
         margin = y_range * 0.2
         lower = y_min - margin
         upper = y_max + margin
@@ -309,16 +339,21 @@ def predict_next(
         return ts_future_arr.tolist(), y_pred_arr.astype(float).tolist()
 
     except Exception as e:
-        logger.warning("正弦拟合失败，降级为最近值平滑外推: %s", e)
+        logger.warning("正弦拟合失败，降级为最近周期波形复制: %s", e)
 
-        # 降级策略：用最近 10 个点的均值保持，避免线性外推越走越偏
-        tail = min(10, n)
-        last_value = float(np.mean(ys_grid[-tail:]))
+        y_pred_arr = repeat_last_period(
+            ts_grid=ts_grid,
+            ys_grid=ys_grid,
+            ts_future_arr=ts_future_arr,
+            period_seconds=period,
+        )
 
-        ts_future = ts_future_arr.tolist()
-        y_pred = [last_value] * horizon
+        margin = y_range * 0.2
+        lower = y_min - margin
+        upper = y_max + margin
+        y_pred_arr = np.clip(y_pred_arr, lower, upper)
 
-        return ts_future, y_pred
+        return ts_future_arr.tolist(), y_pred_arr.astype(float).tolist()
 
 
 def prom_escape_label_value(value: str) -> str:
@@ -333,83 +368,34 @@ def prom_escape_label_value(value: str) -> str:
     )
 
 
-def build_selector(metric_name: str, labels: Dict[str, str]) -> str:
-    """
-    构造 PromQL selector，用于 delete_series。
-
-    示例：
-        feed_rate_predicted{device_id="fanuc-cnc"}
-    """
+def labels_to_str(labels: Dict[str, str]) -> str:
     if not labels:
-        return metric_name
+        return ""
 
     parts = []
+
     for k in sorted(labels.keys()):
         v = prom_escape_label_value(labels[k])
         parts.append(f'{k}="{v}"')
 
-    return f'{metric_name}' + "{" + ",".join(parts) + "}"
-
-
-def delete_old_predictions(metric_name: str, base_labels: Dict[str, str]) -> bool:
-    """
-    删除旧预测序列，避免多轮预测窗口重叠。
-
-    注意：
-    这里故意只用 base_labels，比如 device_id。
-    不带 forecast/source 标签，是为了兼容旧版本脚本写入的无 forecast 标签数据。
-    """
-    selector = build_selector(metric_name, base_labels)
-
-    try:
-        resp = requests.post(
-            f"{VM_URL}/api/v1/admin/tsdb/delete_series",
-            params=[("match[]", selector)],
-            timeout=10,
-        )
-
-        if resp.status_code not in (200, 204):
-            logger.error(
-                "删除旧预测数据失败 metric=%s selector=%s status=%s body=%s",
-                metric_name,
-                selector,
-                resp.status_code,
-                resp.text[:500],
-            )
-            return False
-
-        logger.debug("已删除旧预测数据 selector=%s", selector)
-        return True
-
-    except requests.RequestException as e:
-        logger.error("删除旧预测数据异常 metric=%s selector=%s: %s", metric_name, selector, e)
-        return False
+    return "{" + ",".join(parts) + "}"
 
 
 def write_predictions(
     ts_future: List[float],
     y_pred: List[float],
     metric_name: str,
-    labels: Dict[str, str] = None,
+    labels: Dict[str, str],
 ) -> bool:
     """
     将预测值以 Prometheus exposition 格式写入 VictoriaMetrics。
     时间戳为毫秒级 Unix timestamp。
     """
-    if labels is None:
-        labels = {}
-
     if not ts_future or not y_pred or len(ts_future) != len(y_pred):
         logger.warning("预测数据为空或长度不一致 metric=%s", metric_name)
         return False
 
-    label_str = ""
-    if labels:
-        parts = []
-        for k in sorted(labels.keys()):
-            v = prom_escape_label_value(labels[k])
-            parts.append(f'{k}="{v}"')
-        label_str = "{" + ",".join(parts) + "}"
+    label_str = labels_to_str(labels)
 
     lines = []
 
@@ -449,7 +435,9 @@ def write_predictions(
         return False
 
 
-_LABEL_PATTERN = re.compile(r'\s*([a-zA-Z_][a-zA-Z0-9_]*)\s*=\s*"((?:\\.|[^"])*)"\s*')
+_LABEL_PATTERN = re.compile(
+    r'\s*([a-zA-Z_][a-zA-Z0-9_]*)\s*=\s*"((?:\\.|[^"])*)"\s*'
+)
 
 
 def _parse_labels(query: str) -> Dict[str, str]:
@@ -489,6 +477,13 @@ def merge_labels(*dicts: Dict[str, str]) -> Dict[str, str]:
     return result
 
 
+def series_key(metric_name: str, labels: Dict[str, str]) -> str:
+    """
+    构造进程内唯一 key，用于记录上次写到哪个时间点。
+    """
+    return metric_name + labels_to_str(labels)
+
+
 def run_once():
     now_str = datetime.now().strftime("%H:%M:%S")
 
@@ -499,34 +494,28 @@ def run_once():
             logger.info("[%s] %s 数据不足（%d 点），跳过", now_str, query, len(ys))
             continue
 
+        base_labels = _parse_labels(query)
+        write_labels = merge_labels(base_labels, EXTRA_PREDICT_LABELS)
+
+        key = series_key(pred_metric, write_labels)
+
+        now_sec = int(time.time())
+        last_until = LAST_WRITTEN_UNTIL.get(key, 0)
+
+        # 防止同一进程内重复写入已经预测过的时间段
+        base_ts = max(now_sec, last_until)
+
         ts_future, y_pred = predict_next(
-            ts,
-            ys,
-            horizon=HORIZON_SECONDS,
-            start_from_now=True,
+            ts=ts,
+            ys=ys,
+            horizon=WRITE_HORIZON_SECONDS,
+            base_ts=base_ts,
         )
 
         if not ts_future or not y_pred:
             logger.warning("[%s] %s 预测结果为空，跳过", now_str, query)
             continue
 
-        base_labels = _parse_labels(query)
-
-        # 先删除旧预测，再写入新预测。
-        # 删除条件只带 base_labels，兼容老版本无 forecast/source 标签的脏数据。
-        if CLEAR_OLD_PREDICTIONS:
-            clear_ok = delete_old_predictions(pred_metric, base_labels)
-
-            if not clear_ok and SKIP_WRITE_IF_CLEAR_FAILED:
-                logger.error(
-                    "[%s] %s 删除旧预测失败，为避免继续制造重叠数据，本轮跳过写入",
-                    now_str,
-                    pred_metric,
-                )
-                continue
-
-        write_labels = merge_labels(base_labels, EXTRA_PREDICT_LABELS)
-
         ok = write_predictions(
             ts_future=ts_future,
             y_pred=y_pred,
@@ -537,26 +526,30 @@ def run_once():
         if not ok:
             continue
 
+        LAST_WRITTEN_UNTIL[key] = int(max(ts_future))
+
         future_start = datetime.fromtimestamp(ts_future[0]).strftime("%H:%M:%S")
         future_end = datetime.fromtimestamp(ts_future[-1]).strftime("%H:%M:%S")
 
         logger.info(
-            "[%s] %-40s → %-35s 写入 %d 点，预测区间 %s ~ %s",
+            "[%s] %-40s → %-35s 写入 %d 点，预测区间 %s ~ %s，标签=%s",
             now_str,
             query,
             pred_metric,
             len(y_pred),
             future_start,
             future_end,
+            labels_to_str(write_labels),
         )
 
 
 def main():
     logger.info(
-        "预测服务启动 VM=%s 历史窗口=%dmin 预测窗口=%ds 轮询间隔=%ds 清理旧预测=%s",
+        "预测服务启动 VM=%s 历史窗口=%dmin 理论预测窗口=%ds 实际写入窗口=%ds 轮询间隔=%ds 清理旧预测=%s",
         VM_URL,
         HISTORY_MINUTES,
         HORIZON_SECONDS,
+        WRITE_HORIZON_SECONDS,
         POLL_INTERVAL,
         CLEAR_OLD_PREDICTIONS,
     )

From 72d5c092018c5caf59f1a9f6ae556e6eff24ecca Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E5=BC=A0=E5=B0=91=E6=99=BA?=
 <zhangshaozhi@bailongma-inc.com>
Date: Wed, 20 May 2026 21:35:15 +0800
Subject: [PATCH 21/55] fix

---
 ai/predict_v2.py | 368 ++++++++++++++++++++++++-----------------------
 1 file changed, 188 insertions(+), 180 deletions(-)

diff --git a/ai/predict_v2.py b/ai/predict_v2.py
index f631e12..933a34f 100755
--- a/ai/predict_v2.py
+++ b/ai/predict_v2.py
@@ -1,14 +1,14 @@
 # -*- coding: utf-8 -*-
 """
-ProtoForge 预测服务 v4
+ProtoForge 预测服务 v5
 
 修复点：
-1. 不再使用 VictoriaMetrics delete_series，避免预测历史被整条删除。
-2. 不再每 30 秒写未来 120 秒，避免多轮预测窗口重叠导致 Grafana 出现竖线/毛刺。
-3. 每轮只写未来 min(HORIZON_SECONDS, POLL_INTERVAL) 秒的数据。
-4. 使用 forecast="rolling_v2" 新标签，避免和上一版 forecast="latest" 的旧预测数据混在一起。
-5. 使用真实 timestamp 做拟合，不假设采样严格等间隔。
-6. 拟合失败时不再简单写平直线，而是尽量重复最近一个周期的波形。
+1. 不再使用“单正弦拟合”作为主预测算法。
+2. 主算法改为：周期模板预测（同相位历史值加权平均）。
+3. 周期估计使用 FFT 粗估 + 自相关细化，比单纯 FFT 更稳。
+4. 若可用完整周期不足，则降级为多谐波回归（而不是单正弦）。
+5. 每轮只写入未来 min(HORIZON_SECONDS, POLL_INTERVAL) 秒，避免预测窗口重叠。
+6. 不删除旧预测历史，避免历史预测消失。
 """
 
 import logging
@@ -20,17 +20,13 @@
 
 import numpy as np
 import requests
-from scipy.optimize import curve_fit
-
 
 logging.basicConfig(
     level=logging.INFO,
     format="%(asctime)s [%(levelname)s] %(message)s",
 )
-
 logger = logging.getLogger(__name__)
 
-
 # ── 配置 ──────────────────────────────────────────────────────────────────────
 
 VM_URL = "http://localhost:8428"
@@ -45,45 +41,36 @@
 ]
 
 HISTORY_MINUTES = 30
-
-# 理论预测窗口
 HORIZON_SECONDS = 120
-
-# 轮询间隔
 POLL_INTERVAL = 30
-
-# 实际写入窗口。
-# 关键点：实际写入窗口不要大于轮询间隔，否则不同批次预测会重叠。
 WRITE_HORIZON_SECONDS = min(HORIZON_SECONDS, POLL_INTERVAL)
-
 MIN_POINTS = 120
 QUERY_STEP = "1s"
 
-# 不要再清理旧预测，否则历史预测会被整条删除。
-CLEAR_OLD_PREDICTIONS = False
+# 至少要有多少个完整周期，才使用“周期模板预测”
+MIN_FULL_CYCLES_FOR_TEMPLATE = 3
+MAX_CYCLES_FOR_TEMPLATE = 6
+
+# 周期范围
+MIN_PERIOD_SECONDS = 5
+MAX_PERIOD_SECONDS = 3600
+
+# 多谐波回归最高阶数（降级模式）
+MAX_HARMONICS = 4
 
-# 使用新标签，避免和上一版 forecast="latest" 数据混在一起。
 EXTRA_PREDICT_LABELS = {
-    "forecast": "rolling_v2",
+    "forecast": "seasonal_v1",
     "source": "protoforge",
 }
 
-MIN_PERIOD_SECONDS = 5.0
-MAX_PERIOD_SECONDS = 3600.0
-
-# 进程内记录每条预测序列上次写到哪里，避免本进程运行期间重复写同一时间段
+# 进程内记录每条预测序列上次写到哪里，避免本进程运行时重复写
 LAST_WRITTEN_UNTIL: Dict[str, int] = {}
 
 # ─────────────────────────────────────────────────────────────────────────────
 
 
 def fetch_history(query: str, minutes: int = HISTORY_MINUTES) -> Tuple[List[float], List[float]]:
-    """
-    从 VictoriaMetrics 拉取历史时序数据。
-    返回：
-        timestamps: Unix 秒级时间戳
-        values: float 数值
-    """
+    """从 VictoriaMetrics 拉取历史时序数据。"""
     now = datetime.now()
     start = now - timedelta(minutes=minutes)
 
@@ -118,20 +105,16 @@ def fetch_history(query: str, minutes: int = HISTORY_MINUTES) -> Tuple[List[floa
 
     ts = []
     ys = []
-
     for item in values:
         if len(item) < 2:
             continue
-
         try:
             t = float(item[0])
             y = float(item[1])
         except Exception:
             continue
-
         if not math.isfinite(t) or not math.isfinite(y):
             continue
-
         ts.append(t)
         ys.append(y)
 
@@ -141,33 +124,29 @@ def fetch_history(query: str, minutes: int = HISTORY_MINUTES) -> Tuple[List[floa
 def normalize_history(ts: List[float], ys: List[float]) -> Tuple[np.ndarray, np.ndarray]:
     """
     清洗历史数据：
-    1. 时间戳转为整秒
+    1. 时间戳整秒化
     2. 排序
-    3. 同一秒多个值时保留最后一个
-    4. 插值补齐缺失秒
+    3. 同一秒多个点保留最后一个
+    4. 按 1 秒插值补齐
     """
     if not ts or not ys or len(ts) != len(ys):
         return np.array([]), np.array([])
 
     data = {}
-
     for t, y in zip(ts, ys):
         try:
             sec = int(round(float(t)))
             val = float(y)
         except Exception:
             continue
-
         if not math.isfinite(sec) or not math.isfinite(val):
             continue
-
         data[sec] = val
 
     if not data:
         return np.array([]), np.array([])
 
     sorted_items = sorted(data.items(), key=lambda x: x[0])
-
     ts_clean = np.array([x[0] for x in sorted_items], dtype=float)
     ys_clean = np.array([x[1] for x in sorted_items], dtype=float)
 
@@ -186,22 +165,13 @@ def normalize_history(ts: List[float], ys: List[float]) -> Tuple[np.ndarray, np.
     return ts_grid, ys_grid
 
 
-def _sine_model(t: np.ndarray, A: float, T: float, phi: float, C: float) -> np.ndarray:
-    return A * np.sin(2.0 * np.pi / T * t + phi) + C
-
-
 def estimate_period_by_fft(ys_arr: np.ndarray) -> float:
-    """
-    使用 FFT 估算主周期。
-    ys_arr 默认已经是 1 秒间隔。
-    """
+    """FFT 粗估周期。"""
     n = len(ys_arr)
-
-    if n < 4:
+    if n < 8:
         return 60.0
 
     centered = ys_arr - np.mean(ys_arr)
-
     if np.allclose(centered, 0):
         return 60.0
 
@@ -212,55 +182,139 @@ def estimate_period_by_fft(ys_arr: np.ndarray) -> float:
         return 60.0
 
     power = np.abs(fft_vals[1:])
-
     if len(power) == 0 or np.max(power) <= 0:
         return 60.0
 
     dominant_idx = int(np.argmax(power)) + 1
     dominant_freq = float(freqs[dominant_idx])
-
     if dominant_freq <= 0:
         return 60.0
 
     period = 1.0 / dominant_freq
-    period = float(np.clip(period, MIN_PERIOD_SECONDS, MAX_PERIOD_SECONDS))
+    return float(np.clip(period, MIN_PERIOD_SECONDS, MAX_PERIOD_SECONDS))
+
+
+def refine_period_by_autocorr(ys_arr: np.ndarray, init_period: float) -> float:
+    """
+    用自相关在 init_period 附近细化周期估计。
+    """
+    n = len(ys_arr)
+    if n < 20:
+        return float(np.clip(init_period, MIN_PERIOD_SECONDS, MAX_PERIOD_SECONDS))
+
+    centered = ys_arr - np.mean(ys_arr)
+    if np.allclose(centered, 0):
+        return float(np.clip(init_period, MIN_PERIOD_SECONDS, MAX_PERIOD_SECONDS))
 
-    return period
+    corr = np.correlate(centered, centered, mode="full")[n - 1:]
 
+    p0 = int(round(init_period))
+    left = max(MIN_PERIOD_SECONDS, int(max(2, p0 * 0.7)))
+    right = min(n // 2, int(max(left + 1, p0 * 1.3)))
 
-def repeat_last_period(
-    ts_grid: np.ndarray,
-    ys_grid: np.ndarray,
-    ts_future_arr: np.ndarray,
-    period_seconds: float,
-) -> np.ndarray:
+    if right <= left:
+        return float(np.clip(init_period, MIN_PERIOD_SECONDS, MAX_PERIOD_SECONDS))
+
+    search = corr[left:right + 1]
+    if len(search) == 0:
+        return float(np.clip(init_period, MIN_PERIOD_SECONDS, MAX_PERIOD_SECONDS))
+
+    best_lag = left + int(np.argmax(search))
+    return float(np.clip(best_lag, MIN_PERIOD_SECONDS, MAX_PERIOD_SECONDS))
+
+
+def estimate_period(ys_arr: np.ndarray) -> float:
+    """FFT + 自相关 的组合周期估计。"""
+    p_fft = estimate_period_by_fft(ys_arr)
+    p_refined = refine_period_by_autocorr(ys_arr, p_fft)
+    return p_refined
+
+
+def seasonal_template_predict(
+    ys_arr: np.ndarray,
+    horizon: int,
+    period: int,
+    gap: int = 0,
+    max_cycles: int = MAX_CYCLES_FOR_TEMPLATE,
+) -> List[float]:
     """
-    拟合失败时的降级策略：
-    不直接写平直线，而是把未来时间映射回最近一个周期的历史波形。
+    同相位历史值加权平均预测。
+    对未来第 k 个点，取过去多个周期同相位点做加权平均：
+        y[n-1+gap+k] ≈ avg(y[n-1+gap+k-p], y[n-1+gap+k-2p], ...)
     """
-    if len(ts_grid) < 2:
-        return np.full_like(ts_future_arr, float(ys_grid[-1]), dtype=float)
+    n = len(ys_arr)
+    preds = []
 
-    period = max(int(round(period_seconds)), 1)
+    for k in range(1, horizon + 1):
+        target_idx = (n - 1) + gap + k
 
-    y_pred = []
+        values = []
+        weights = []
 
-    hist_start = float(ts_grid[0])
-    hist_end = float(ts_grid[-1])
+        # m=1 表示最近一个周期；m 越大越久远
+        for m in range(1, max_cycles + 1):
+            hist_idx = target_idx - m * period
+            if 0 <= hist_idx < n:
+                # 越近权重越大
+                w = 1.0 / m
+                values.append(float(ys_arr[hist_idx]))
+                weights.append(w)
 
-    for future_ts in ts_future_arr:
-        mapped_ts = float(future_ts)
+        if not values:
+            # 万一拿不到，退化为最后一个值
+            preds.append(float(ys_arr[-1]))
+        else:
+            preds.append(float(np.average(values, weights=weights)))
 
-        while mapped_ts > hist_end:
-            mapped_ts -= period
+    return preds
 
-        while mapped_ts < hist_start:
-            mapped_ts += period
 
-        val = float(np.interp(mapped_ts, ts_grid, ys_grid))
-        y_pred.append(val)
+def harmonic_regression_predict(
+    ys_arr: np.ndarray,
+    horizon: int,
+    period: int,
+    gap: int = 0,
+    max_harmonics: int = MAX_HARMONICS,
+) -> List[float]:
+    """
+    多谐波回归（降级模式）：
+    y = c + Σ [a_k sin(2πkt/P) + b_k cos(2πkt/P)]
+    相比单正弦，更能表达非标准正弦波形。
+    """
+    n = len(ys_arr)
+    if n < 10 or period <= 1:
+        return [float(ys_arr[-1])] * horizon
+
+    # 周期太短时，谐波数不能太大
+    K = min(max_harmonics, max(1, period // 4))
+
+    t = np.arange(n, dtype=float)
+    cols = [np.ones(n, dtype=float)]
+
+    for k in range(1, K + 1):
+        angle = 2.0 * np.pi * k * t / period
+        cols.append(np.sin(angle))
+        cols.append(np.cos(angle))
+
+    X = np.column_stack(cols)
+
+    try:
+        coef, _, _, _ = np.linalg.lstsq(X, ys_arr, rcond=None)
+    except Exception:
+        return [float(ys_arr[-1])] * horizon
+
+    t_future = np.arange(n + gap, n + gap + horizon, dtype=float)
+    cols_future = [np.ones(horizon, dtype=float)]
 
-    return np.array(y_pred, dtype=float)
+    for k in range(1, K + 1):
+        angle = 2.0 * np.pi * k * t_future / period
+        cols_future.append(np.sin(angle))
+        cols_future.append(np.cos(angle))
+
+    X_future = np.column_stack(cols_future)
+    y_pred = X_future @ coef
+
+    return y_pred.astype(float).tolist()
 
 
 def predict_next(
@@ -270,96 +324,74 @@ def predict_next(
     base_ts: int,
 ) -> Tuple[List[float], List[float]]:
     """
-    用 FFT 检测主频，拟合正弦波，外推未来 horizon 秒。
-
-    base_ts:
-        从 base_ts + 1 开始写预测。
+    主预测函数：
+    1. 周期估计
+    2. 优先使用周期模板预测
+    3. 周期不够时降级为多谐波回归
     """
     ts_grid, ys_grid = normalize_history(ts, ys)
-
     if len(ys_grid) < MIN_POINTS:
         return [], []
 
     y_min = float(np.min(ys_grid))
     y_max = float(np.max(ys_grid))
-    y_mean = float(np.mean(ys_grid))
     y_range = y_max - y_min
 
-    base_ts = max(int(base_ts), int(ts_grid[-1]))
-
-    ts_future_arr = np.arange(
-        base_ts + 1,
-        base_ts + 1 + horizon,
-        1,
-        dtype=float,
-    )
-
     if y_range <= 1e-9:
-        y_pred_arr = np.full_like(ts_future_arr, float(ys_grid[-1]), dtype=float)
-        return ts_future_arr.tolist(), y_pred_arr.tolist()
+        base_ts = max(int(base_ts), int(ts_grid[-1]))
+        ts_future = [base_ts + i + 1 for i in range(horizon)]
+        y_pred = [float(ys_grid[-1])] * horizon
+        return ts_future, y_pred
 
-    period = estimate_period_by_fft(ys_grid)
+    period_est = estimate_period(ys_grid)
+    period = int(round(period_est))
+    period = max(MIN_PERIOD_SECONDS, min(MAX_PERIOD_SECONDS, period))
 
-    t_fit = ts_grid - ts_grid[0]
-    t_future = ts_future_arr - ts_grid[0]
+    last_real_ts = int(ts_grid[-1])
+    base_ts = max(int(base_ts), last_real_ts)
 
-    amplitude = y_range / 2.0
-    offset = y_mean
+    # 如果当前时间已经超过最后一个真实点，gap 表示中间“空过去”的秒数
+    gap = max(0, base_ts - last_real_ts)
 
-    try:
-        popt, _ = curve_fit(
-            _sine_model,
-            t_fit,
-            ys_grid,
-            p0=[amplitude, period, 0.0, offset],
-            bounds=(
-                [0.0, MIN_PERIOD_SECONDS, -2.0 * np.pi, y_min - y_range],
-                [np.inf, MAX_PERIOD_SECONDS, 2.0 * np.pi, y_max + y_range],
-            ),
-            maxfev=12000,
-        )
+    ts_future = [base_ts + i + 1 for i in range(horizon)]
 
-        y_pred_arr = _sine_model(t_future, *popt)
+    full_cycles = len(ys_grid) // period if period > 0 else 0
 
-        margin = y_range * 0.2
-        lower = y_min - margin
-        upper = y_max + margin
-        y_pred_arr = np.clip(y_pred_arr, lower, upper)
-
-        if not np.all(np.isfinite(y_pred_arr)):
-            raise ValueError("预测结果包含 NaN/Inf")
-
-        logger.debug(
-            "正弦拟合成功 period=%.2fs amplitude=%.4f offset=%.4f",
-            popt[1],
-            popt[0],
-            popt[3],
+    if full_cycles >= MIN_FULL_CYCLES_FOR_TEMPLATE:
+        y_pred = seasonal_template_predict(
+            ys_arr=ys_grid,
+            horizon=horizon,
+            period=period,
+            gap=gap,
+            max_cycles=min(MAX_CYCLES_FOR_TEMPLATE, full_cycles),
         )
-
-        return ts_future_arr.tolist(), y_pred_arr.astype(float).tolist()
-
-    except Exception as e:
-        logger.warning("正弦拟合失败，降级为最近周期波形复制: %s", e)
-
-        y_pred_arr = repeat_last_period(
-            ts_grid=ts_grid,
-            ys_grid=ys_grid,
-            ts_future_arr=ts_future_arr,
-            period_seconds=period,
+        model_name = "seasonal_template"
+    else:
+        y_pred = harmonic_regression_predict(
+            ys_arr=ys_grid,
+            horizon=horizon,
+            period=period,
+            gap=gap,
+            max_harmonics=MAX_HARMONICS,
         )
+        model_name = "harmonic_regression"
+
+    # 合理裁剪，避免偶然外推过大
+    margin = y_range * 0.15
+    lower = y_min - margin
+    upper = y_max + margin
+    y_pred = np.clip(np.array(y_pred, dtype=float), lower, upper).astype(float).tolist()
 
-        margin = y_range * 0.2
-        lower = y_min - margin
-        upper = y_max + margin
-        y_pred_arr = np.clip(y_pred_arr, lower, upper)
+    logger.debug(
+        "predict_next model=%s period=%ss full_cycles=%s gap=%s",
+        model_name, period, full_cycles, gap
+    )
 
-        return ts_future_arr.tolist(), y_pred_arr.astype(float).tolist()
+    return ts_future, y_pred
 
 
 def prom_escape_label_value(value: str) -> str:
-    """
-    Prometheus exposition label value 转义。
-    """
+    """Prometheus label value 转义。"""
     return (
         str(value)
         .replace("\\", "\\\\")
@@ -371,13 +403,10 @@ def prom_escape_label_value(value: str) -> str:
 def labels_to_str(labels: Dict[str, str]) -> str:
     if not labels:
         return ""
-
     parts = []
-
     for k in sorted(labels.keys()):
         v = prom_escape_label_value(labels[k])
         parts.append(f'{k}="{v}"')
-
     return "{" + ",".join(parts) + "}"
 
 
@@ -387,16 +416,12 @@ def write_predictions(
     metric_name: str,
     labels: Dict[str, str],
 ) -> bool:
-    """
-    将预测值以 Prometheus exposition 格式写入 VictoriaMetrics。
-    时间戳为毫秒级 Unix timestamp。
-    """
+    """将预测值以 Prometheus exposition 格式写入 VictoriaMetrics。"""
     if not ts_future or not y_pred or len(ts_future) != len(y_pred):
         logger.warning("预测数据为空或长度不一致 metric=%s", metric_name)
         return False
 
     label_str = labels_to_str(labels)
-
     lines = []
 
     for t, y in zip(ts_future, y_pred):
@@ -422,14 +447,11 @@ def write_predictions(
         resp = requests.post(
             f"{VM_URL}/api/v1/import/prometheus",
             data=payload.encode("utf-8"),
-            headers={
-                "Content-Type": "text/plain; version=0.0.4; charset=utf-8",
-            },
+            headers={"Content-Type": "text/plain; version=0.0.4; charset=utf-8"},
             timeout=10,
         )
         resp.raise_for_status()
         return True
-
     except requests.RequestException as e:
         logger.error("写入预测数据失败 metric=%s: %s", metric_name, e)
         return False
@@ -441,12 +463,7 @@ def write_predictions(
 
 
 def _parse_labels(query: str) -> Dict[str, str]:
-    """
-    从查询表达式中解析标签。
-
-    示例：
-        feed_rate{device_id="fanuc-cnc"} -> {"device_id": "fanuc-cnc"}
-    """
+    """从查询表达式中解析标签。"""
     labels = {}
 
     if "{" not in query or "}" not in query:
@@ -468,19 +485,13 @@ def _parse_labels(query: str) -> Dict[str, str]:
 
 def merge_labels(*dicts: Dict[str, str]) -> Dict[str, str]:
     result = {}
-
     for d in dicts:
-        if not d:
-            continue
-        result.update(d)
-
+        if d:
+            result.update(d)
     return result
 
 
 def series_key(metric_name: str, labels: Dict[str, str]) -> str:
-    """
-    构造进程内唯一 key，用于记录上次写到哪个时间点。
-    """
     return metric_name + labels_to_str(labels)
 
 
@@ -489,7 +500,6 @@ def run_once():
 
     for query, pred_metric in PREDICT_TARGETS:
         ts, ys = fetch_history(query)
-
         if len(ys) < MIN_POINTS:
             logger.info("[%s] %s 数据不足（%d 点），跳过", now_str, query, len(ys))
             continue
@@ -502,7 +512,7 @@ def run_once():
         now_sec = int(time.time())
         last_until = LAST_WRITTEN_UNTIL.get(key, 0)
 
-        # 防止同一进程内重复写入已经预测过的时间段
+        # 避免同一进程内写重叠时间段
         base_ts = max(now_sec, last_until)
 
         ts_future, y_pred = predict_next(
@@ -522,7 +532,6 @@ def run_once():
             metric_name=pred_metric,
             labels=write_labels,
         )
-
         if not ok:
             continue
 
@@ -545,13 +554,12 @@ def run_once():
 
 def main():
     logger.info(
-        "预测服务启动 VM=%s 历史窗口=%dmin 理论预测窗口=%ds 实际写入窗口=%ds 轮询间隔=%ds 清理旧预测=%s",
+        "预测服务启动 VM=%s 历史窗口=%dmin 理论预测窗口=%ds 实际写入窗口=%ds 轮询间隔=%ds",
         VM_URL,
         HISTORY_MINUTES,
         HORIZON_SECONDS,
         WRITE_HORIZON_SECONDS,
         POLL_INTERVAL,
-        CLEAR_OLD_PREDICTIONS,
     )
 
     while True:

From c26b9991d4cfac374d56829b47023b043d270aba Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E5=BC=A0=E5=B0=91=E6=99=BA?=
 <zhangshaozhi@bailongma-inc.com>
Date: Thu, 21 May 2026 09:03:00 +0800
Subject: [PATCH 22/55] feat(ai): support single scene predict

---
 ai/predict_v3_single_scene.py | 1058 +++++++++++++++++++++++++++++++++
 1 file changed, 1058 insertions(+)
 create mode 100644 ai/predict_v3_single_scene.py

diff --git a/ai/predict_v3_single_scene.py b/ai/predict_v3_single_scene.py
new file mode 100644
index 0000000..23af8c5
--- /dev/null
+++ b/ai/predict_v3_single_scene.py
@@ -0,0 +1,1058 @@
+# -*- coding: utf-8 -*-
+"""
+ProtoForge 预测服务 v6
+
+核心能力：
+1. 周期模板预测：适合 CNC 这类强周期、非标准正弦波形。
+2. 健康基线冻结：检测到异常后，不再用故障数据更新预测模板。
+3. 恢复冷却机制：故障恢复后，需要连续稳定多个周期，才恢复学习。
+4. 预测上下界：写入 predicted_upper / predicted_lower，方便 Grafana 展示预测带。
+5. 异常标记：写入 xxx_anomaly，1 表示异常，0 表示正常。
+6. 不删除历史预测，不使用 delete_series。
+"""
+
+"""
+场景：不考虑物料、不考虑跨程序场景算法预测
+"""
+
+import json
+import logging
+import math
+import os
+import re
+import time
+from dataclasses import asdict, dataclass
+from datetime import datetime, timedelta
+from typing import Dict, List, Optional, Tuple
+
+import numpy as np
+import requests
+
+
+logging.basicConfig(
+    level=logging.INFO,
+    format="%(asctime)s [%(levelname)s] %(message)s",
+)
+
+logger = logging.getLogger(__name__)
+
+
+# ── 基础配置 ──────────────────────────────────────────────────────────────────
+
+VM_URL = "http://localhost:8428"
+
+STATE_FILE = "/tmp/protoforge_predictor_state.json"
+
+HISTORY_MINUTES = 30
+HORIZON_SECONDS = 120
+POLL_INTERVAL = 30
+
+# 实际每轮写入未来多少秒。
+# 不要大于 POLL_INTERVAL，否则多轮预测会重叠。
+WRITE_HORIZON_SECONDS = min(HORIZON_SECONDS, POLL_INTERVAL)
+
+QUERY_STEP = "1s"
+MIN_POINTS = 120
+
+MIN_PERIOD_SECONDS = 5
+MAX_PERIOD_SECONDS = 3600
+
+# 至少多少个完整周期才允许构建健康模板
+MIN_FULL_CYCLES_FOR_TEMPLATE = 3
+
+# 构建模板最多使用最近多少个周期
+MAX_CYCLES_FOR_TEMPLATE = 6
+
+# 检测异常使用最近多少秒实际数据
+DETECT_WINDOW_SECONDS = 15
+
+# 恢复后，至少连续正常多少秒才考虑恢复学习
+RECOVERY_MIN_SECONDS = 60
+
+# 健康状态下模板更新速度，越小越保守
+HEALTHY_EMA_ALPHA = 0.15
+
+# 故障恢复后第一次重新学习时的更新速度
+RECOVERY_EMA_ALPHA = 0.35
+
+# 最近窗口里有多少比例的点超过阈值，才认为异常
+OUTSIDE_RATIO_THRESHOLD = 0.60
+
+# 最近窗口里有多少比例的点回到阈值内，才认为恢复正常
+RECOVERY_INSIDE_RATIO_THRESHOLD = 0.80
+
+
+# ── 指标配置 ──────────────────────────────────────────────────────────────────
+# abs_threshold / rel_threshold 需要按指标单位调。
+# feed_rate 单位 mm/min，这里先给 400 和 25%。
+
+PREDICT_TARGETS = [
+    {
+        "query": 'feed_rate{device_id="fanuc-cnc"}',
+        "pred_metric": "feed_rate_predicted",
+        "anomaly_metric": "feed_rate_anomaly",
+        "abs_threshold": 400.0,
+        "rel_threshold": 0.25,
+    },
+    {
+        "query": 'spindle_speed{device_id="fanuc-cnc"}',
+        "pred_metric": "spindle_speed_predicted",
+        "anomaly_metric": "spindle_speed_anomaly",
+        "abs_threshold": 500.0,
+        "rel_threshold": 0.25,
+    },
+    {
+        "query": 'spindle_current{device_id="fanuc-cnc"}',
+        "pred_metric": "spindle_current_predicted",
+        "anomaly_metric": "spindle_current_anomaly",
+        "abs_threshold": 5.0,
+        "rel_threshold": 0.25,
+    },
+    {
+        "query": 'vibration_x{device_id="fanuc-cnc"}',
+        "pred_metric": "vibration_x_predicted",
+        "anomaly_metric": "vibration_x_anomaly",
+        "abs_threshold": 1.0,
+        "rel_threshold": 0.30,
+    },
+    {
+        "query": 'vibration_y{device_id="fanuc-cnc"}',
+        "pred_metric": "vibration_y_predicted",
+        "anomaly_metric": "vibration_y_anomaly",
+        "abs_threshold": 1.0,
+        "rel_threshold": 0.30,
+    },
+    {
+        "query": 'vibration_z{device_id="fanuc-cnc"}',
+        "pred_metric": "vibration_z_predicted",
+        "anomaly_metric": "vibration_z_anomaly",
+        "abs_threshold": 1.0,
+        "rel_threshold": 0.30,
+    },
+]
+
+EXTRA_PREDICT_LABELS = {
+    "forecast": "health_gated_v1",
+    "source": "protoforge",
+}
+
+BASELINE_STATUS_HEALTHY = "healthy"
+BASELINE_STATUS_ANOMALY = "anomaly"
+BASELINE_STATUS_RECOVERING = "recovering"
+BASELINE_STATUS_LEARNING = "learning"
+
+
+# ── 状态结构 ──────────────────────────────────────────────────────────────────
+
+@dataclass
+class BaselineState:
+    period: int
+    template: List[float]
+    status: str
+    clean_seconds: int
+    last_update_ts: int
+    last_seen_ts: int
+    y_min: float
+    y_max: float
+
+
+BASELINE_STATES: Dict[str, BaselineState] = {}
+LAST_WRITTEN_UNTIL: Dict[str, int] = {}
+
+
+# ── VM 读取 ───────────────────────────────────────────────────────────────────
+
+def fetch_history(query: str, minutes: int = HISTORY_MINUTES) -> Tuple[List[float], List[float]]:
+    now = datetime.now()
+    start = now - timedelta(minutes=minutes)
+
+    try:
+        resp = requests.get(
+            f"{VM_URL}/api/v1/query_range",
+            params={
+                "query": query,
+                "start": start.timestamp(),
+                "end": now.timestamp(),
+                "step": QUERY_STEP,
+            },
+            timeout=10,
+        )
+        resp.raise_for_status()
+    except requests.RequestException as e:
+        logger.error("拉取数据失败 query=%s: %s", query, e)
+        return [], []
+
+    try:
+        result = resp.json().get("data", {}).get("result", [])
+    except Exception as e:
+        logger.error("解析 VM 返回失败 query=%s: %s", query, e)
+        return [], []
+
+    if not result:
+        return [], []
+
+    values = result[0].get("values", [])
+    if not values:
+        return [], []
+
+    ts = []
+    ys = []
+
+    for item in values:
+        if len(item) < 2:
+            continue
+
+        try:
+            t = float(item[0])
+            y = float(item[1])
+        except Exception:
+            continue
+
+        if not math.isfinite(t) or not math.isfinite(y):
+            continue
+
+        ts.append(t)
+        ys.append(y)
+
+    return ts, ys
+
+
+def normalize_history(ts: List[float], ys: List[float]) -> Tuple[np.ndarray, np.ndarray]:
+    if not ts or not ys or len(ts) != len(ys):
+        return np.array([]), np.array([])
+
+    data = {}
+
+    for t, y in zip(ts, ys):
+        try:
+            sec = int(round(float(t)))
+            val = float(y)
+        except Exception:
+            continue
+
+        if not math.isfinite(sec) or not math.isfinite(val):
+            continue
+
+        data[sec] = val
+
+    if not data:
+        return np.array([]), np.array([])
+
+    sorted_items = sorted(data.items(), key=lambda x: x[0])
+
+    ts_clean = np.array([x[0] for x in sorted_items], dtype=float)
+    ys_clean = np.array([x[1] for x in sorted_items], dtype=float)
+
+    if len(ts_clean) < 2:
+        return ts_clean, ys_clean
+
+    start_sec = int(ts_clean[0])
+    end_sec = int(ts_clean[-1])
+
+    if end_sec <= start_sec:
+        return ts_clean, ys_clean
+
+    ts_grid = np.arange(start_sec, end_sec + 1, 1, dtype=float)
+    ys_grid = np.interp(ts_grid, ts_clean, ys_clean)
+
+    return ts_grid, ys_grid
+
+
+# ── 周期估计 ──────────────────────────────────────────────────────────────────
+
+def estimate_period_by_fft(ys_arr: np.ndarray) -> float:
+    n = len(ys_arr)
+
+    if n < 8:
+        return 60.0
+
+    centered = ys_arr - np.mean(ys_arr)
+
+    if np.allclose(centered, 0):
+        return 60.0
+
+    fft_vals = np.fft.rfft(centered)
+    freqs = np.fft.rfftfreq(n, d=1.0)
+
+    if len(freqs) <= 1:
+        return 60.0
+
+    power = np.abs(fft_vals[1:])
+
+    if len(power) == 0 or np.max(power) <= 0:
+        return 60.0
+
+    dominant_idx = int(np.argmax(power)) + 1
+    dominant_freq = float(freqs[dominant_idx])
+
+    if dominant_freq <= 0:
+        return 60.0
+
+    period = 1.0 / dominant_freq
+
+    return float(np.clip(period, MIN_PERIOD_SECONDS, MAX_PERIOD_SECONDS))
+
+
+def refine_period_by_autocorr(ys_arr: np.ndarray, init_period: float) -> float:
+    n = len(ys_arr)
+
+    if n < 20:
+        return float(np.clip(init_period, MIN_PERIOD_SECONDS, MAX_PERIOD_SECONDS))
+
+    centered = ys_arr - np.mean(ys_arr)
+
+    if np.allclose(centered, 0):
+        return float(np.clip(init_period, MIN_PERIOD_SECONDS, MAX_PERIOD_SECONDS))
+
+    corr = np.correlate(centered, centered, mode="full")[n - 1:]
+
+    p0 = int(round(init_period))
+    left = max(MIN_PERIOD_SECONDS, int(max(2, p0 * 0.7)))
+    right = min(n // 2, int(max(left + 1, p0 * 1.3)))
+
+    if right <= left:
+        return float(np.clip(init_period, MIN_PERIOD_SECONDS, MAX_PERIOD_SECONDS))
+
+    search = corr[left:right + 1]
+
+    if len(search) == 0:
+        return float(np.clip(init_period, MIN_PERIOD_SECONDS, MAX_PERIOD_SECONDS))
+
+    best_lag = left + int(np.argmax(search))
+
+    return float(np.clip(best_lag, MIN_PERIOD_SECONDS, MAX_PERIOD_SECONDS))
+
+
+def estimate_period(ys_arr: np.ndarray) -> int:
+    p_fft = estimate_period_by_fft(ys_arr)
+    p_refined = refine_period_by_autocorr(ys_arr, p_fft)
+
+    period = int(round(p_refined))
+    period = max(MIN_PERIOD_SECONDS, min(MAX_PERIOD_SECONDS, period))
+
+    return int(period)
+
+
+# ── 模板构建与预测 ─────────────────────────────────────────────────────────────
+
+def fill_template_nan(template: np.ndarray) -> np.ndarray:
+    period = len(template)
+
+    if period == 0:
+        return template
+
+    idx = np.arange(period)
+    valid = np.isfinite(template)
+
+    if not np.any(valid):
+        return np.zeros(period, dtype=float)
+
+    if np.all(valid):
+        return template
+
+    x_valid = idx[valid]
+    y_valid = template[valid]
+
+    # 环形插值，处理 phase 0 附近缺口
+    x_ext = np.concatenate([x_valid - period, x_valid, x_valid + period])
+    y_ext = np.concatenate([y_valid, y_valid, y_valid])
+
+    filled = np.interp(idx, x_ext, y_ext)
+
+    return filled.astype(float)
+
+
+def build_phase_template(
+    ts_grid: np.ndarray,
+    ys_grid: np.ndarray,
+    period: int,
+    max_cycles: int = MAX_CYCLES_FOR_TEMPLATE,
+    tail_seconds: Optional[int] = None,
+) -> Optional[np.ndarray]:
+    if period <= 1 or len(ys_grid) < period * MIN_FULL_CYCLES_FOR_TEMPLATE:
+        return None
+
+    max_seconds = period * max_cycles
+
+    if tail_seconds is not None:
+        max_seconds = min(max_seconds, int(tail_seconds))
+
+    max_seconds = max(period * MIN_FULL_CYCLES_FOR_TEMPLATE, max_seconds)
+
+    if len(ys_grid) < max_seconds:
+        start_idx = 0
+    else:
+        start_idx = len(ys_grid) - max_seconds
+
+    ts_tail = ts_grid[start_idx:]
+    ys_tail = ys_grid[start_idx:]
+
+    if len(ys_tail) < period * MIN_FULL_CYCLES_FOR_TEMPLATE:
+        return None
+
+    sums = np.zeros(period, dtype=float)
+    weights = np.zeros(period, dtype=float)
+
+    total = len(ys_tail)
+
+    for i, (t, y) in enumerate(zip(ts_tail, ys_tail)):
+        phase = int(t) % period
+
+        # 越近的数据权重越高
+        recency = (i + 1) / total
+        weight = 0.3 + 0.7 * recency
+
+        sums[phase] += float(y) * weight
+        weights[phase] += weight
+
+    template = np.full(period, np.nan, dtype=float)
+
+    valid = weights > 0
+    template[valid] = sums[valid] / weights[valid]
+
+    template = fill_template_nan(template)
+
+    return template
+
+
+def resample_template(old_template: np.ndarray, new_period: int) -> np.ndarray:
+    old_period = len(old_template)
+
+    if old_period == new_period:
+        return old_template.astype(float)
+
+    if old_period <= 1 or new_period <= 1:
+        return np.full(new_period, float(np.mean(old_template)), dtype=float)
+
+    old_x = np.linspace(0.0, 1.0, old_period, endpoint=False)
+    new_x = np.linspace(0.0, 1.0, new_period, endpoint=False)
+
+    old_x_ext = np.concatenate([old_x - 1.0, old_x, old_x + 1.0])
+    old_y_ext = np.concatenate([old_template, old_template, old_template])
+
+    return np.interp(new_x, old_x_ext, old_y_ext).astype(float)
+
+
+def merge_template(
+    old_template: np.ndarray,
+    new_template: np.ndarray,
+    alpha: float,
+) -> np.ndarray:
+    alpha = float(np.clip(alpha, 0.0, 1.0))
+
+    if len(old_template) != len(new_template):
+        old_template = resample_template(old_template, len(new_template))
+
+    return ((1.0 - alpha) * old_template + alpha * new_template).astype(float)
+
+
+def predict_by_state(state: BaselineState, ts_list: List[int]) -> np.ndarray:
+    template = np.array(state.template, dtype=float)
+    period = int(state.period)
+
+    if period <= 1 or len(template) != period:
+        return np.zeros(len(ts_list), dtype=float)
+
+    values = []
+
+    for ts in ts_list:
+        phase = int(ts) % period
+        values.append(float(template[phase]))
+
+    return np.array(values, dtype=float)
+
+
+def calc_threshold(pred: np.ndarray, abs_threshold: float, rel_threshold: float) -> np.ndarray:
+    return np.maximum(abs_threshold, np.abs(pred) * rel_threshold)
+
+
+def calc_bounds(pred: np.ndarray, abs_threshold: float, rel_threshold: float) -> Tuple[np.ndarray, np.ndarray]:
+    threshold = calc_threshold(pred, abs_threshold, rel_threshold)
+    lower = pred - threshold
+    upper = pred + threshold
+    return lower, upper
+
+
+# ── 异常检测与状态更新 ────────────────────────────────────────────────────────
+
+def detect_anomaly(
+    state: BaselineState,
+    ts_grid: np.ndarray,
+    ys_grid: np.ndarray,
+    abs_threshold: float,
+    rel_threshold: float,
+) -> Tuple[bool, float, float, float]:
+    if len(ys_grid) < DETECT_WINDOW_SECONDS:
+        return False, 0.0, 0.0, 0.0
+
+    ts_recent = ts_grid[-DETECT_WINDOW_SECONDS:].astype(int).tolist()
+    actual = ys_grid[-DETECT_WINDOW_SECONDS:].astype(float)
+
+    pred = predict_by_state(state, ts_recent)
+    threshold = calc_threshold(pred, abs_threshold, rel_threshold)
+
+    abs_err = np.abs(actual - pred)
+    outside = abs_err > threshold
+
+    outside_ratio = float(np.mean(outside))
+    mean_abs_err = float(np.mean(abs_err))
+    mean_rel_err = float(np.mean(abs_err / np.maximum(np.abs(pred), 1.0)))
+
+    is_anomaly = outside_ratio >= OUTSIDE_RATIO_THRESHOLD
+
+    return is_anomaly, outside_ratio, mean_abs_err, mean_rel_err
+
+
+def is_recovered(
+    state: BaselineState,
+    ts_grid: np.ndarray,
+    ys_grid: np.ndarray,
+    abs_threshold: float,
+    rel_threshold: float,
+) -> Tuple[bool, float]:
+    if len(ys_grid) < DETECT_WINDOW_SECONDS:
+        return False, 0.0
+
+    ts_recent = ts_grid[-DETECT_WINDOW_SECONDS:].astype(int).tolist()
+    actual = ys_grid[-DETECT_WINDOW_SECONDS:].astype(float)
+
+    pred = predict_by_state(state, ts_recent)
+    threshold = calc_threshold(pred, abs_threshold, rel_threshold)
+
+    abs_err = np.abs(actual - pred)
+    inside = abs_err <= threshold
+
+    inside_ratio = float(np.mean(inside))
+
+    return inside_ratio >= RECOVERY_INSIDE_RATIO_THRESHOLD, inside_ratio
+
+
+def create_initial_state(
+    ts_grid: np.ndarray,
+    ys_grid: np.ndarray,
+    now_sec: int,
+) -> Optional[BaselineState]:
+    if len(ys_grid) < MIN_POINTS:
+        return None
+
+    period = estimate_period(ys_grid)
+
+    template = build_phase_template(
+        ts_grid=ts_grid,
+        ys_grid=ys_grid,
+        period=period,
+        max_cycles=MAX_CYCLES_FOR_TEMPLATE,
+        tail_seconds=period * MAX_CYCLES_FOR_TEMPLATE,
+    )
+
+    if template is None:
+        return None
+
+    return BaselineState(
+        period=int(period),
+        template=template.astype(float).tolist(),
+        status=BASELINE_STATUS_HEALTHY,
+        clean_seconds=int(period * MAX_CYCLES_FOR_TEMPLATE),
+        last_update_ts=now_sec,
+        last_seen_ts=now_sec,
+        y_min=float(np.min(ys_grid)),
+        y_max=float(np.max(ys_grid)),
+    )
+
+
+def maybe_update_state(
+    key: str,
+    ts_grid: np.ndarray,
+    ys_grid: np.ndarray,
+    abs_threshold: float,
+    rel_threshold: float,
+) -> Tuple[Optional[BaselineState], bool, float, float, float]:
+    now_sec = int(time.time())
+
+    state = BASELINE_STATES.get(key)
+
+    if state is None:
+        state = create_initial_state(ts_grid, ys_grid, now_sec)
+
+        if state is None:
+            return None, False, 0.0, 0.0, 0.0
+
+        BASELINE_STATES[key] = state
+        logger.info(
+            "初始化健康模板 key=%s period=%ss clean_seconds=%ss",
+            key,
+            state.period,
+            state.clean_seconds,
+        )
+        return state, False, 0.0, 0.0, 0.0
+
+    elapsed = max(1, now_sec - int(state.last_seen_ts))
+    elapsed = min(elapsed, POLL_INTERVAL * 2)
+    state.last_seen_ts = now_sec
+
+    is_anom, outside_ratio, mean_abs_err, mean_rel_err = detect_anomaly(
+        state=state,
+        ts_grid=ts_grid,
+        ys_grid=ys_grid,
+        abs_threshold=abs_threshold,
+        rel_threshold=rel_threshold,
+    )
+
+    if is_anom:
+        state.status = BASELINE_STATUS_ANOMALY
+        state.clean_seconds = 0
+
+        logger.warning(
+            "检测到异常，冻结模板 key=%s outside_ratio=%.2f mean_abs_err=%.2f mean_rel_err=%.2f",
+            key,
+            outside_ratio,
+            mean_abs_err,
+            mean_rel_err,
+        )
+
+        BASELINE_STATES[key] = state
+        return state, True, outside_ratio, mean_abs_err, mean_rel_err
+
+    recovered, inside_ratio = is_recovered(
+        state=state,
+        ts_grid=ts_grid,
+        ys_grid=ys_grid,
+        abs_threshold=abs_threshold,
+        rel_threshold=rel_threshold,
+    )
+
+    if state.status == BASELINE_STATUS_ANOMALY:
+        if recovered:
+            state.status = BASELINE_STATUS_RECOVERING
+            state.clean_seconds = elapsed
+            logger.info(
+                "异常开始恢复 key=%s inside_ratio=%.2f clean_seconds=%ss",
+                key,
+                inside_ratio,
+                state.clean_seconds,
+            )
+        else:
+            state.clean_seconds = 0
+            BASELINE_STATES[key] = state
+            return state, True, outside_ratio, mean_abs_err, mean_rel_err
+
+    elif state.status == BASELINE_STATUS_RECOVERING:
+        if recovered:
+            state.clean_seconds += elapsed
+        else:
+            state.status = BASELINE_STATUS_ANOMALY
+            state.clean_seconds = 0
+            BASELINE_STATES[key] = state
+            return state, True, outside_ratio, mean_abs_err, mean_rel_err
+
+    else:
+        state.status = BASELINE_STATUS_HEALTHY
+        state.clean_seconds += elapsed
+
+    # 故障恢复后，不要立刻学习。
+    # 必须至少连续正常：max(RECOVERY_MIN_SECONDS, 3 个周期)
+    min_clean_for_update = max(
+        RECOVERY_MIN_SECONDS,
+        int(state.period) * MIN_FULL_CYCLES_FOR_TEMPLATE,
+    )
+
+    if state.clean_seconds < min_clean_for_update:
+        BASELINE_STATES[key] = state
+        return state, False, outside_ratio, mean_abs_err, mean_rel_err
+
+    # 只使用最近 clean_seconds 这段连续正常数据来更新模板，避免历史故障污染。
+    new_period = estimate_period(ys_grid)
+    tail_seconds = min(
+        int(state.clean_seconds),
+        int(new_period) * MAX_CYCLES_FOR_TEMPLATE,
+    )
+
+    new_template = build_phase_template(
+        ts_grid=ts_grid,
+        ys_grid=ys_grid,
+        period=new_period,
+        max_cycles=MAX_CYCLES_FOR_TEMPLATE,
+        tail_seconds=tail_seconds,
+    )
+
+    if new_template is None:
+        BASELINE_STATES[key] = state
+        return state, False, outside_ratio, mean_abs_err, mean_rel_err
+
+    old_template = np.array(state.template, dtype=float)
+
+    if state.status == BASELINE_STATUS_RECOVERING:
+        alpha = RECOVERY_EMA_ALPHA
+        state.status = BASELINE_STATUS_HEALTHY
+    else:
+        alpha = HEALTHY_EMA_ALPHA
+
+    merged = merge_template(
+        old_template=old_template,
+        new_template=new_template,
+        alpha=alpha,
+    )
+
+    state.period = int(new_period)
+    state.template = merged.astype(float).tolist()
+    state.last_update_ts = now_sec
+    state.y_min = float(np.min(ys_grid[-tail_seconds:]))
+    state.y_max = float(np.max(ys_grid[-tail_seconds:]))
+
+    BASELINE_STATES[key] = state
+
+    logger.info(
+        "更新健康模板 key=%s period=%ss status=%s clean_seconds=%ss alpha=%.2f",
+        key,
+        state.period,
+        state.status,
+        state.clean_seconds,
+        alpha,
+    )
+
+    return state, False, outside_ratio, mean_abs_err, mean_rel_err
+
+
+# ── Prometheus 格式写入 ───────────────────────────────────────────────────────
+
+def prom_escape_label_value(value: str) -> str:
+    return (
+        str(value)
+        .replace("\\", "\\\\")
+        .replace("\n", "\\n")
+        .replace('"', '\\"')
+    )
+
+
+def labels_to_str(labels: Dict[str, str]) -> str:
+    if not labels:
+        return ""
+
+    parts = []
+
+    for k in sorted(labels.keys()):
+        v = prom_escape_label_value(labels[k])
+        parts.append(f'{k}="{v}"')
+
+    return "{" + ",".join(parts) + "}"
+
+
+def write_series(
+    metric_name: str,
+    labels: Dict[str, str],
+    ts_list: List[int],
+    values: List[float],
+) -> bool:
+    if not ts_list or not values or len(ts_list) != len(values):
+        return False
+
+    label_str = labels_to_str(labels)
+    lines = []
+
+    for t, y in zip(ts_list, values):
+        try:
+            ts_sec = int(round(float(t)))
+            val = float(y)
+        except Exception:
+            continue
+
+        if not math.isfinite(ts_sec) or not math.isfinite(val):
+            continue
+
+        ts_ms = ts_sec * 1000
+        lines.append(f"{metric_name}{label_str} {val:.6f} {ts_ms}")
+
+    if not lines:
+        return False
+
+    payload = "\n".join(lines) + "\n"
+
+    try:
+        resp = requests.post(
+            f"{VM_URL}/api/v1/import/prometheus",
+            data=payload.encode("utf-8"),
+            headers={"Content-Type": "text/plain; version=0.0.4; charset=utf-8"},
+            timeout=10,
+        )
+        resp.raise_for_status()
+        return True
+    except requests.RequestException as e:
+        logger.error("写入数据失败 metric=%s: %s", metric_name, e)
+        return False
+
+
+def write_prediction_bundle(
+    pred_metric: str,
+    anomaly_metric: str,
+    labels: Dict[str, str],
+    ts_future: List[int],
+    pred_values: np.ndarray,
+    lower_values: np.ndarray,
+    upper_values: np.ndarray,
+    is_anomaly: bool,
+    outside_ratio: float,
+    mean_abs_err: float,
+    mean_rel_err: float,
+) -> bool:
+    ok1 = write_series(
+        metric_name=pred_metric,
+        labels=labels,
+        ts_list=ts_future,
+        values=pred_values.astype(float).tolist(),
+    )
+
+    ok2 = write_series(
+        metric_name=f"{pred_metric}_lower",
+        labels=labels,
+        ts_list=ts_future,
+        values=lower_values.astype(float).tolist(),
+    )
+
+    ok3 = write_series(
+        metric_name=f"{pred_metric}_upper",
+        labels=labels,
+        ts_list=ts_future,
+        values=upper_values.astype(float).tolist(),
+    )
+
+    now_sec = int(time.time())
+
+    anomaly_labels = dict(labels)
+    anomaly_labels["type"] = "prediction_deviation"
+
+    ok4 = write_series(
+        metric_name=anomaly_metric,
+        labels=anomaly_labels,
+        ts_list=[now_sec],
+        values=[1.0 if is_anomaly else 0.0],
+    )
+
+    ok5 = write_series(
+        metric_name=f"{anomaly_metric}_outside_ratio",
+        labels=anomaly_labels,
+        ts_list=[now_sec],
+        values=[outside_ratio],
+    )
+
+    ok6 = write_series(
+        metric_name=f"{anomaly_metric}_mean_abs_error",
+        labels=anomaly_labels,
+        ts_list=[now_sec],
+        values=[mean_abs_err],
+    )
+
+    ok7 = write_series(
+        metric_name=f"{anomaly_metric}_mean_rel_error",
+        labels=anomaly_labels,
+        ts_list=[now_sec],
+        values=[mean_rel_err],
+    )
+
+    return ok1 and ok2 and ok3 and ok4 and ok5 and ok6 and ok7
+
+
+# ── 标签解析 ──────────────────────────────────────────────────────────────────
+
+_LABEL_PATTERN = re.compile(
+    r'\s*([a-zA-Z_][a-zA-Z0-9_]*)\s*=\s*"((?:\\.|[^"])*)"\s*'
+)
+
+
+def _parse_labels(query: str) -> Dict[str, str]:
+    labels = {}
+
+    if "{" not in query or "}" not in query:
+        return labels
+
+    try:
+        label_part = query[query.index("{") + 1: query.rindex("}")]
+    except Exception:
+        return labels
+
+    for match in _LABEL_PATTERN.finditer(label_part):
+        key = match.group(1)
+        value = match.group(2)
+        value = value.replace('\\"', '"').replace("\\n", "\n").replace("\\\\", "\\")
+        labels[key] = value
+
+    return labels
+
+
+def merge_labels(*dicts: Dict[str, str]) -> Dict[str, str]:
+    result = {}
+
+    for d in dicts:
+        if d:
+            result.update(d)
+
+    return result
+
+
+def series_key(metric_name: str, labels: Dict[str, str]) -> str:
+    return metric_name + labels_to_str(labels)
+
+
+# ── 状态持久化 ────────────────────────────────────────────────────────────────
+
+def load_state():
+    global BASELINE_STATES
+
+    if not os.path.exists(STATE_FILE):
+        return
+
+    try:
+        with open(STATE_FILE, "r", encoding="utf-8") as f:
+            raw = json.load(f)
+
+        states = {}
+
+        for key, value in raw.get("baseline_states", {}).items():
+            states[key] = BaselineState(**value)
+
+        BASELINE_STATES = states
+
+        logger.info("已加载预测状态文件 %s，状态数量=%d", STATE_FILE, len(BASELINE_STATES))
+
+    except Exception as e:
+        logger.warning("加载预测状态文件失败，将重新学习: %s", e)
+
+
+def save_state():
+    try:
+        raw = {
+            "baseline_states": {
+                key: asdict(value)
+                for key, value in BASELINE_STATES.items()
+            }
+        }
+
+        tmp_file = STATE_FILE + ".tmp"
+
+        with open(tmp_file, "w", encoding="utf-8") as f:
+            json.dump(raw, f, ensure_ascii=False, indent=2)
+
+        os.replace(tmp_file, STATE_FILE)
+
+    except Exception as e:
+        logger.warning("保存预测状态文件失败: %s", e)
+
+
+# ── 主逻辑 ────────────────────────────────────────────────────────────────────
+
+def run_once():
+    now_str = datetime.now().strftime("%H:%M:%S")
+
+    for target in PREDICT_TARGETS:
+        query = target["query"]
+        pred_metric = target["pred_metric"]
+        anomaly_metric = target["anomaly_metric"]
+        abs_threshold = float(target["abs_threshold"])
+        rel_threshold = float(target["rel_threshold"])
+
+        ts, ys = fetch_history(query)
+
+        if len(ys) < MIN_POINTS:
+            logger.info("[%s] %s 数据不足（%d 点），跳过", now_str, query, len(ys))
+            continue
+
+        ts_grid, ys_grid = normalize_history(ts, ys)
+
+        if len(ys_grid) < MIN_POINTS:
+            logger.info("[%s] %s 清洗后数据不足（%d 点），跳过", now_str, query, len(ys_grid))
+            continue
+
+        base_labels = _parse_labels(query)
+        write_labels = merge_labels(base_labels, EXTRA_PREDICT_LABELS)
+
+        key = series_key(pred_metric, write_labels)
+
+        state, is_anomaly, outside_ratio, mean_abs_err, mean_rel_err = maybe_update_state(
+            key=key,
+            ts_grid=ts_grid,
+            ys_grid=ys_grid,
+            abs_threshold=abs_threshold,
+            rel_threshold=rel_threshold,
+        )
+
+        if state is None:
+            logger.info("[%s] %s 暂无可用健康模板，等待学习", now_str, query)
+            continue
+
+        now_sec = int(time.time())
+        last_until = LAST_WRITTEN_UNTIL.get(key, 0)
+        last_real_ts = int(ts_grid[-1])
+
+        base_ts = max(now_sec, last_until, last_real_ts)
+
+        ts_future = [
+            base_ts + i + 1
+            for i in range(WRITE_HORIZON_SECONDS)
+        ]
+
+        pred_values = predict_by_state(state, ts_future)
+
+        lower_values, upper_values = calc_bounds(
+            pred=pred_values,
+            abs_threshold=abs_threshold,
+            rel_threshold=rel_threshold,
+        )
+
+        ok = write_prediction_bundle(
+            pred_metric=pred_metric,
+            anomaly_metric=anomaly_metric,
+            labels=write_labels,
+            ts_future=ts_future,
+            pred_values=pred_values,
+            lower_values=lower_values,
+            upper_values=upper_values,
+            is_anomaly=is_anomaly,
+            outside_ratio=outside_ratio,
+            mean_abs_err=mean_abs_err,
+            mean_rel_err=mean_rel_err,
+        )
+
+        if not ok:
+            continue
+
+        LAST_WRITTEN_UNTIL[key] = int(max(ts_future))
+
+        future_start = datetime.fromtimestamp(ts_future[0]).strftime("%H:%M:%S")
+        future_end = datetime.fromtimestamp(ts_future[-1]).strftime("%H:%M:%S")
+
+        logger.info(
+            "[%s] %-40s → %-35s status=%s anomaly=%s period=%ss clean=%ss 写入 %d 点，预测区间 %s ~ %s",
+            now_str,
+            query,
+            pred_metric,
+            state.status,
+            is_anomaly,
+            state.period,
+            state.clean_seconds,
+            len(ts_future),
+            future_start,
+            future_end,
+        )
+
+    save_state()
+
+
+def main():
+    load_state()
+
+    logger.info(
+        "预测服务启动 VM=%s 历史窗口=%dmin 理论预测窗口=%ds 实际写入窗口=%ds 轮询间隔=%ds state=%s",
+        VM_URL,
+        HISTORY_MINUTES,
+        HORIZON_SECONDS,
+        WRITE_HORIZON_SECONDS,
+        POLL_INTERVAL,
+        STATE_FILE,
+    )
+
+    while True:
+        run_once()
+        time.sleep(POLL_INTERVAL)
+
+
+if __name__ == "__main__":
+    main()
\ No newline at end of file

From 54c4b851a004567078cfec337933aafcbd676b44 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E5=BC=A0=E5=B0=91=E6=99=BA?=
 <zhangshaozhi@bailongma-inc.com>
Date: Thu, 21 May 2026 13:39:33 +0800
Subject: [PATCH 23/55] fix

---
 ai/predict_v3_single_scene.py | 701 +++++++++++++++++++++++-----------
 1 file changed, 488 insertions(+), 213 deletions(-)
 mode change 100644 => 100755 ai/predict_v3_single_scene.py

diff --git a/ai/predict_v3_single_scene.py b/ai/predict_v3_single_scene.py
old mode 100644
new mode 100755
index 23af8c5..fc07f4f
--- a/ai/predict_v3_single_scene.py
+++ b/ai/predict_v3_single_scene.py
@@ -1,18 +1,22 @@
 # -*- coding: utf-8 -*-
 """
-ProtoForge 预测服务 v6
-
-核心能力：
-1. 周期模板预测：适合 CNC 这类强周期、非标准正弦波形。
-2. 健康基线冻结：检测到异常后，不再用故障数据更新预测模板。
-3. 恢复冷却机制：故障恢复后，需要连续稳定多个周期，才恢复学习。
-4. 预测上下界：写入 predicted_upper / predicted_lower，方便 Grafana 展示预测带。
-5. 异常标记：写入 xxx_anomaly，1 表示异常，0 表示正常。
-6. 不删除历史预测，不使用 delete_series。
-"""
-
-"""
-场景：不考虑物料、不考虑跨程序场景算法预测
+ProtoForge Predictor v8
+
+功能：
+1. 从 VictoriaMetrics 拉取历史数据。
+2. 对 CNC 周期型指标进行相位对齐预测。
+3. 使用“谷底锚点”对齐周期，减少上升沿/下降沿相位偏差。
+4. 每轮只写入未来 min(HORIZON_SECONDS, POLL_INTERVAL) 秒，避免预测窗口重叠。
+5. 检测异常后冻结健康模板，不把故障数据学进去。
+6. 故障恢复后等待稳定一段时间，再恢复模板更新。
+7. 写入：
+   - xxx_predicted
+   - xxx_predicted_upper
+   - xxx_predicted_lower
+   - xxx_anomaly
+   - xxx_anomaly_outside_ratio
+   - xxx_anomaly_mean_abs_error
+   - xxx_anomaly_mean_rel_error
 """
 
 import json
@@ -29,6 +33,10 @@
 import requests
 
 
+# =============================================================================
+# 日志配置
+# =============================================================================
+
 logging.basicConfig(
     level=logging.INFO,
     format="%(asctime)s [%(levelname)s] %(message)s",
@@ -37,18 +45,19 @@
 logger = logging.getLogger(__name__)
 
 
-# ── 基础配置 ──────────────────────────────────────────────────────────────────
+# =============================================================================
+# 基础配置
+# =============================================================================
 
 VM_URL = "http://localhost:8428"
 
-STATE_FILE = "/tmp/protoforge_predictor_state.json"
+STATE_FILE = "/tmp/protoforge_predictor_state_v8.json"
 
 HISTORY_MINUTES = 30
 HORIZON_SECONDS = 120
 POLL_INTERVAL = 30
 
-# 实际每轮写入未来多少秒。
-# 不要大于 POLL_INTERVAL，否则多轮预测会重叠。
+# 实际写入窗口不要大于轮询间隔，否则多轮预测会重叠。
 WRITE_HORIZON_SECONDS = min(HORIZON_SECONDS, POLL_INTERVAL)
 
 QUERY_STEP = "1s"
@@ -57,34 +66,25 @@
 MIN_PERIOD_SECONDS = 5
 MAX_PERIOD_SECONDS = 3600
 
-# 至少多少个完整周期才允许构建健康模板
 MIN_FULL_CYCLES_FOR_TEMPLATE = 3
-
-# 构建模板最多使用最近多少个周期
 MAX_CYCLES_FOR_TEMPLATE = 6
 
-# 检测异常使用最近多少秒实际数据
 DETECT_WINDOW_SECONDS = 15
-
-# 恢复后，至少连续正常多少秒才考虑恢复学习
 RECOVERY_MIN_SECONDS = 60
 
-# 健康状态下模板更新速度，越小越保守
-HEALTHY_EMA_ALPHA = 0.15
-
-# 故障恢复后第一次重新学习时的更新速度
-RECOVERY_EMA_ALPHA = 0.35
+HEALTHY_EMA_ALPHA = 0.12
+RECOVERY_EMA_ALPHA = 0.30
 
-# 最近窗口里有多少比例的点超过阈值，才认为异常
 OUTSIDE_RATIO_THRESHOLD = 0.60
-
-# 最近窗口里有多少比例的点回到阈值内，才认为恢复正常
 RECOVERY_INSIDE_RATIO_THRESHOLD = 0.80
 
+PHASE_SEARCH_RATIO = 0.15
+VALLEY_QUANTILE = 45
+
 
-# ── 指标配置 ──────────────────────────────────────────────────────────────────
-# abs_threshold / rel_threshold 需要按指标单位调。
-# feed_rate 单位 mm/min，这里先给 400 和 25%。
+# =============================================================================
+# 预测指标配置
+# =============================================================================
 
 PREDICT_TARGETS = [
     {
@@ -132,21 +132,23 @@
 ]
 
 EXTRA_PREDICT_LABELS = {
-    "forecast": "health_gated_v1",
+    "forecast": "phase_aligned_health_v8",
     "source": "protoforge",
 }
 
 BASELINE_STATUS_HEALTHY = "healthy"
 BASELINE_STATUS_ANOMALY = "anomaly"
 BASELINE_STATUS_RECOVERING = "recovering"
-BASELINE_STATUS_LEARNING = "learning"
 
 
-# ── 状态结构 ──────────────────────────────────────────────────────────────────
+# =============================================================================
+# 状态结构
+# =============================================================================
 
 @dataclass
 class BaselineState:
     period: int
+    phase_origin_ts: int
     template: List[float]
     status: str
     clean_seconds: int
@@ -160,7 +162,9 @@ class BaselineState:
 LAST_WRITTEN_UNTIL: Dict[str, int] = {}
 
 
-# ── VM 读取 ───────────────────────────────────────────────────────────────────
+# =============================================================================
+# VictoriaMetrics 读取
+# =============================================================================
 
 def fetch_history(query: str, minutes: int = HISTORY_MINUTES) -> Tuple[List[float], List[float]]:
     now = datetime.now()
@@ -258,7 +262,25 @@ def normalize_history(ts: List[float], ys: List[float]) -> Tuple[np.ndarray, np.
     return ts_grid, ys_grid
 
 
-# ── 周期估计 ──────────────────────────────────────────────────────────────────
+# =============================================================================
+# 周期估计
+# =============================================================================
+
+def moving_average(arr: np.ndarray, window: int) -> np.ndarray:
+    if window <= 1 or len(arr) < window:
+        return arr.astype(float)
+
+    window = int(window)
+
+    if window % 2 == 0:
+        window += 1
+
+    kernel = np.ones(window, dtype=float) / window
+    pad = window // 2
+    padded = np.pad(arr.astype(float), (pad, pad), mode="edge")
+
+    return np.convolve(padded, kernel, mode="valid")
+
 
 def estimate_period_by_fft(ys_arr: np.ndarray) -> float:
     n = len(ys_arr)
@@ -307,7 +329,7 @@ def refine_period_by_autocorr(ys_arr: np.ndarray, init_period: float) -> float:
     corr = np.correlate(centered, centered, mode="full")[n - 1:]
 
     p0 = int(round(init_period))
-    left = max(MIN_PERIOD_SECONDS, int(max(2, p0 * 0.7)))
+    left = max(int(MIN_PERIOD_SECONDS), int(max(2, p0 * 0.7)))
     right = min(n // 2, int(max(left + 1, p0 * 1.3)))
 
     if right <= left:
@@ -323,96 +345,252 @@ def refine_period_by_autocorr(ys_arr: np.ndarray, init_period: float) -> float:
     return float(np.clip(best_lag, MIN_PERIOD_SECONDS, MAX_PERIOD_SECONDS))
 
 
-def estimate_period(ys_arr: np.ndarray) -> int:
+def estimate_period_rough(ys_arr: np.ndarray) -> int:
     p_fft = estimate_period_by_fft(ys_arr)
     p_refined = refine_period_by_autocorr(ys_arr, p_fft)
 
     period = int(round(p_refined))
-    period = max(MIN_PERIOD_SECONDS, min(MAX_PERIOD_SECONDS, period))
+    period = max(int(MIN_PERIOD_SECONDS), min(int(MAX_PERIOD_SECONDS), period))
 
     return int(period)
 
 
-# ── 模板构建与预测 ─────────────────────────────────────────────────────────────
+# =============================================================================
+# 谷底锚点检测
+# =============================================================================
 
-def fill_template_nan(template: np.ndarray) -> np.ndarray:
-    period = len(template)
+def find_valley_indices(
+    ts_grid: np.ndarray,
+    ys_grid: np.ndarray,
+    expected_period: int,
+) -> List[int]:
+    n = len(ys_grid)
 
-    if period == 0:
-        return template
+    if n < max(10, expected_period * 2):
+        return []
+
+    period = max(3, int(expected_period))
+
+    smooth_window = max(3, int(round(period * 0.08)))
+    smooth_window = min(smooth_window, 21)
+
+    ys_smooth = moving_average(ys_grid, smooth_window)
+    threshold = float(np.percentile(ys_smooth, VALLEY_QUANTILE))
+
+    candidates = []
+
+    for i in range(1, n - 1):
+        if (
+            ys_smooth[i] <= ys_smooth[i - 1]
+            and ys_smooth[i] < ys_smooth[i + 1]
+            and ys_smooth[i] <= threshold
+        ):
+            candidates.append(i)
+
+    if len(candidates) < MIN_FULL_CYCLES_FOR_TEMPLATE:
+        candidates = []
+
+        for i in range(1, n - 1):
+            if ys_smooth[i] <= ys_smooth[i - 1] and ys_smooth[i] < ys_smooth[i + 1]:
+                candidates.append(i)
+
+    if not candidates:
+        return []
 
-    idx = np.arange(period)
-    valid = np.isfinite(template)
+    min_distance = max(2, int(round(period * 0.55)))
+    selected = []
 
-    if not np.any(valid):
-        return np.zeros(period, dtype=float)
+    for idx in candidates:
+        if not selected:
+            selected.append(idx)
+            continue
+
+        if idx - selected[-1] >= min_distance:
+            selected.append(idx)
+            continue
+
+        if ys_smooth[idx] < ys_smooth[selected[-1]]:
+            selected[-1] = idx
 
-    if np.all(valid):
-        return template
+    if len(selected) < 2:
+        return selected
 
-    x_valid = idx[valid]
-    y_valid = template[valid]
+    cleaned = [selected[0]]
+
+    for idx in selected[1:]:
+        diff = int(ts_grid[idx] - ts_grid[cleaned[-1]])
+
+        if int(period * 0.55) <= diff <= int(period * 1.60):
+            cleaned.append(idx)
+            continue
 
-    # 环形插值，处理 phase 0 附近缺口
-    x_ext = np.concatenate([x_valid - period, x_valid, x_valid + period])
-    y_ext = np.concatenate([y_valid, y_valid, y_valid])
+        if diff < int(period * 0.55):
+            if ys_smooth[idx] < ys_smooth[cleaned[-1]]:
+                cleaned[-1] = idx
+            continue
 
-    filled = np.interp(idx, x_ext, y_ext)
+        cleaned.append(idx)
 
-    return filled.astype(float)
+    return cleaned
 
 
-def build_phase_template(
+def detect_period_and_valleys(
+    ts_grid: np.ndarray,
+    ys_grid: np.ndarray,
+) -> Tuple[int, List[int]]:
+    rough = estimate_period_rough(ys_grid)
+    valleys = find_valley_indices(ts_grid, ys_grid, rough)
+
+    if len(valleys) >= 3:
+        diffs = np.diff(ts_grid[valleys])
+        good = diffs[(diffs >= rough * 0.55) & (diffs <= rough * 1.60)]
+
+        if len(good) > 0:
+            period = int(round(float(np.median(good))))
+        else:
+            period = rough
+    else:
+        period = rough
+
+    period = max(int(MIN_PERIOD_SECONDS), min(int(MAX_PERIOD_SECONDS), period))
+
+    return int(period), valleys
+
+
+# =============================================================================
+# 相位对齐模板构建
+# =============================================================================
+
+def build_template_from_valleys(
     ts_grid: np.ndarray,
     ys_grid: np.ndarray,
     period: int,
+    valleys: List[int],
     max_cycles: int = MAX_CYCLES_FOR_TEMPLATE,
-    tail_seconds: Optional[int] = None,
 ) -> Optional[np.ndarray]:
-    if period <= 1 or len(ys_grid) < period * MIN_FULL_CYCLES_FOR_TEMPLATE:
+    if period <= 1 or len(valleys) < MIN_FULL_CYCLES_FOR_TEMPLATE + 1:
         return None
 
-    max_seconds = period * max_cycles
+    pairs = []
+
+    for a, b in zip(valleys[:-1], valleys[1:]):
+        cycle_len = float(ts_grid[b] - ts_grid[a])
 
-    if tail_seconds is not None:
-        max_seconds = min(max_seconds, int(tail_seconds))
+        if period * 0.55 <= cycle_len <= period * 1.60:
+            pairs.append((a, b, cycle_len))
 
-    max_seconds = max(period * MIN_FULL_CYCLES_FOR_TEMPLATE, max_seconds)
+    if len(pairs) < MIN_FULL_CYCLES_FOR_TEMPLATE:
+        return None
+
+    pairs = pairs[-max_cycles:]
+
+    phase_grid = np.arange(period, dtype=float)
+    segments = []
+    weights = []
+
+    for idx, (a, b, cycle_len) in enumerate(pairs):
+        seg_ts = ts_grid[a:b + 1]
+        seg_y = ys_grid[a:b + 1]
+
+        if len(seg_y) < 3:
+            continue
 
-    if len(ys_grid) < max_seconds:
-        start_idx = 0
+        x_old = (seg_ts - seg_ts[0]) / cycle_len * period
+        seg = np.interp(phase_grid, x_old, seg_y)
+
+        segments.append(seg.astype(float))
+
+        weight = 0.5 + 0.5 * ((idx + 1) / len(pairs))
+        weights.append(weight)
+
+    if len(segments) < MIN_FULL_CYCLES_FOR_TEMPLATE:
+        return None
+
+    arr = np.vstack(segments)
+    w_arr = np.array(weights, dtype=float)
+
+    template = np.average(arr, axis=0, weights=w_arr)
+
+    return template.astype(float)
+
+
+def build_current_baseline(
+    ts_grid: np.ndarray,
+    ys_grid: np.ndarray,
+    tail_seconds: Optional[int] = None,
+) -> Optional[Tuple[int, int, np.ndarray]]:
+    if len(ys_grid) < MIN_POINTS:
+        return None
+
+    if tail_seconds is not None and tail_seconds > 0:
+        cutoff = ts_grid[-1] - int(tail_seconds)
+        mask = ts_grid >= cutoff
+        ts_use = ts_grid[mask]
+        ys_use = ys_grid[mask]
     else:
-        start_idx = len(ys_grid) - max_seconds
+        ts_use = ts_grid
+        ys_use = ys_grid
+
+    if len(ys_use) < MIN_POINTS:
+        return None
 
-    ts_tail = ts_grid[start_idx:]
-    ys_tail = ys_grid[start_idx:]
+    period, valleys = detect_period_and_valleys(ts_use, ys_use)
 
-    if len(ys_tail) < period * MIN_FULL_CYCLES_FOR_TEMPLATE:
+    template = build_template_from_valleys(
+        ts_grid=ts_use,
+        ys_grid=ys_use,
+        period=period,
+        valleys=valleys,
+    )
+
+    if template is None or len(valleys) == 0:
         return None
 
-    sums = np.zeros(period, dtype=float)
-    weights = np.zeros(period, dtype=float)
+    phase_origin_ts = int(round(float(ts_use[valleys[-1]])))
 
-    total = len(ys_tail)
+    return int(period), phase_origin_ts, template
 
-    for i, (t, y) in enumerate(zip(ts_tail, ys_tail)):
-        phase = int(t) % period
 
-        # 越近的数据权重越高
-        recency = (i + 1) / total
-        weight = 0.3 + 0.7 * recency
+# =============================================================================
+# 模板预测
+# =============================================================================
 
-        sums[phase] += float(y) * weight
-        weights[phase] += weight
+def circular_template_value(template: np.ndarray, phase: float) -> float:
+    period = len(template)
 
-    template = np.full(period, np.nan, dtype=float)
+    if period == 0:
+        return 0.0
+
+    phase = float(phase) % period
 
-    valid = weights > 0
-    template[valid] = sums[valid] / weights[valid]
+    i0 = int(math.floor(phase)) % period
+    i1 = (i0 + 1) % period
 
-    template = fill_template_nan(template)
+    frac = phase - math.floor(phase)
 
-    return template
+    return float((1.0 - frac) * template[i0] + frac * template[i1])
+
+
+def predict_with_origin(
+    state: BaselineState,
+    ts_list: List[int],
+    phase_origin_ts: Optional[int] = None,
+) -> np.ndarray:
+    template = np.array(state.template, dtype=float)
+    period = int(state.period)
+
+    if period <= 1 or len(template) != period:
+        return np.zeros(len(ts_list), dtype=float)
+
+    origin = int(state.phase_origin_ts if phase_origin_ts is None else phase_origin_ts)
+
+    values = []
+
+    for ts in ts_list:
+        phase = (int(ts) - origin) % period
+        values.append(circular_template_value(template, phase))
+
+    return np.array(values, dtype=float)
 
 
 def resample_template(old_template: np.ndarray, new_period: int) -> np.ndarray:
@@ -433,6 +611,38 @@ def resample_template(old_template: np.ndarray, new_period: int) -> np.ndarray:
     return np.interp(new_x, old_x_ext, old_y_ext).astype(float)
 
 
+def align_new_template_to_old(
+    old_template: np.ndarray,
+    new_template: np.ndarray,
+) -> np.ndarray:
+    if len(old_template) != len(new_template):
+        old_template = resample_template(old_template, len(new_template))
+
+    period = len(new_template)
+
+    if period <= 2:
+        return new_template.astype(float)
+
+    max_shift = max(1, int(round(period * 0.10)))
+
+    old_norm = old_template - np.mean(old_template)
+
+    best_score = None
+    best_template = new_template
+
+    for shift in range(-max_shift, max_shift + 1):
+        shifted = np.roll(new_template, shift)
+        shifted_norm = shifted - np.mean(shifted)
+
+        score = float(np.dot(old_norm, shifted_norm))
+
+        if best_score is None or score > best_score:
+            best_score = score
+            best_template = shifted
+
+    return best_template.astype(float)
+
+
 def merge_template(
     old_template: np.ndarray,
     new_template: np.ndarray,
@@ -443,37 +653,64 @@ def merge_template(
     if len(old_template) != len(new_template):
         old_template = resample_template(old_template, len(new_template))
 
-    return ((1.0 - alpha) * old_template + alpha * new_template).astype(float)
-
-
-def predict_by_state(state: BaselineState, ts_list: List[int]) -> np.ndarray:
-    template = np.array(state.template, dtype=float)
-    period = int(state.period)
-
-    if period <= 1 or len(template) != period:
-        return np.zeros(len(ts_list), dtype=float)
+    new_template = align_new_template_to_old(old_template, new_template)
 
-    values = []
+    merged = (1.0 - alpha) * old_template + alpha * new_template
 
-    for ts in ts_list:
-        phase = int(ts) % period
-        values.append(float(template[phase]))
+    return merged.astype(float)
 
-    return np.array(values, dtype=float)
 
+# =============================================================================
+# 异常检测
+# =============================================================================
 
-def calc_threshold(pred: np.ndarray, abs_threshold: float, rel_threshold: float) -> np.ndarray:
+def calc_threshold(
+    pred: np.ndarray,
+    abs_threshold: float,
+    rel_threshold: float,
+) -> np.ndarray:
     return np.maximum(abs_threshold, np.abs(pred) * rel_threshold)
 
 
-def calc_bounds(pred: np.ndarray, abs_threshold: float, rel_threshold: float) -> Tuple[np.ndarray, np.ndarray]:
+def calc_bounds(
+    pred: np.ndarray,
+    abs_threshold: float,
+    rel_threshold: float,
+) -> Tuple[np.ndarray, np.ndarray]:
     threshold = calc_threshold(pred, abs_threshold, rel_threshold)
+
     lower = pred - threshold
     upper = pred + threshold
+
     return lower, upper
 
 
-# ── 异常检测与状态更新 ────────────────────────────────────────────────────────
+def find_best_phase_origin_for_recent(
+    state: BaselineState,
+    ts_recent: List[int],
+    actual: np.ndarray,
+) -> Tuple[int, np.ndarray, float]:
+    period = int(state.period)
+    base_origin = int(state.phase_origin_ts)
+
+    max_shift = max(1, int(round(period * PHASE_SEARCH_RATIO)))
+
+    best_origin = base_origin
+    best_pred = predict_with_origin(state, ts_recent, base_origin)
+    best_mae = float(np.mean(np.abs(actual - best_pred)))
+
+    for shift in range(-max_shift, max_shift + 1):
+        origin = base_origin + shift
+        pred = predict_with_origin(state, ts_recent, origin)
+        mae = float(np.mean(np.abs(actual - pred)))
+
+        if mae < best_mae:
+            best_mae = mae
+            best_origin = origin
+            best_pred = pred
+
+    return best_origin, best_pred, best_mae
+
 
 def detect_anomaly(
     state: BaselineState,
@@ -481,14 +718,19 @@ def detect_anomaly(
     ys_grid: np.ndarray,
     abs_threshold: float,
     rel_threshold: float,
-) -> Tuple[bool, float, float, float]:
+) -> Tuple[bool, float, float, float, int]:
     if len(ys_grid) < DETECT_WINDOW_SECONDS:
-        return False, 0.0, 0.0, 0.0
+        return False, 0.0, 0.0, 0.0, int(state.phase_origin_ts)
 
     ts_recent = ts_grid[-DETECT_WINDOW_SECONDS:].astype(int).tolist()
     actual = ys_grid[-DETECT_WINDOW_SECONDS:].astype(float)
 
-    pred = predict_by_state(state, ts_recent)
+    best_origin, pred, _ = find_best_phase_origin_for_recent(
+        state=state,
+        ts_recent=ts_recent,
+        actual=actual,
+    )
+
     threshold = calc_threshold(pred, abs_threshold, rel_threshold)
 
     abs_err = np.abs(actual - pred)
@@ -500,56 +742,28 @@ def detect_anomaly(
 
     is_anomaly = outside_ratio >= OUTSIDE_RATIO_THRESHOLD
 
-    return is_anomaly, outside_ratio, mean_abs_err, mean_rel_err
-
-
-def is_recovered(
-    state: BaselineState,
-    ts_grid: np.ndarray,
-    ys_grid: np.ndarray,
-    abs_threshold: float,
-    rel_threshold: float,
-) -> Tuple[bool, float]:
-    if len(ys_grid) < DETECT_WINDOW_SECONDS:
-        return False, 0.0
-
-    ts_recent = ts_grid[-DETECT_WINDOW_SECONDS:].astype(int).tolist()
-    actual = ys_grid[-DETECT_WINDOW_SECONDS:].astype(float)
-
-    pred = predict_by_state(state, ts_recent)
-    threshold = calc_threshold(pred, abs_threshold, rel_threshold)
-
-    abs_err = np.abs(actual - pred)
-    inside = abs_err <= threshold
-
-    inside_ratio = float(np.mean(inside))
+    return is_anomaly, outside_ratio, mean_abs_err, mean_rel_err, int(best_origin)
 
-    return inside_ratio >= RECOVERY_INSIDE_RATIO_THRESHOLD, inside_ratio
 
+# =============================================================================
+# 健康基线状态管理
+# =============================================================================
 
 def create_initial_state(
     ts_grid: np.ndarray,
     ys_grid: np.ndarray,
     now_sec: int,
 ) -> Optional[BaselineState]:
-    if len(ys_grid) < MIN_POINTS:
-        return None
-
-    period = estimate_period(ys_grid)
-
-    template = build_phase_template(
-        ts_grid=ts_grid,
-        ys_grid=ys_grid,
-        period=period,
-        max_cycles=MAX_CYCLES_FOR_TEMPLATE,
-        tail_seconds=period * MAX_CYCLES_FOR_TEMPLATE,
-    )
+    baseline = build_current_baseline(ts_grid, ys_grid)
 
-    if template is None:
+    if baseline is None:
         return None
 
+    period, phase_origin_ts, template = baseline
+
     return BaselineState(
         period=int(period),
+        phase_origin_ts=int(phase_origin_ts),
         template=template.astype(float).tolist(),
         status=BASELINE_STATUS_HEALTHY,
         clean_seconds=int(period * MAX_CYCLES_FOR_TEMPLATE),
@@ -578,19 +792,23 @@ def maybe_update_state(
             return None, False, 0.0, 0.0, 0.0
 
         BASELINE_STATES[key] = state
+
         logger.info(
-            "初始化健康模板 key=%s period=%ss clean_seconds=%ss",
+            "初始化健康模板 key=%s period=%ss origin=%s clean=%ss",
             key,
             state.period,
+            datetime.fromtimestamp(state.phase_origin_ts).strftime("%H:%M:%S"),
             state.clean_seconds,
         )
+
         return state, False, 0.0, 0.0, 0.0
 
     elapsed = max(1, now_sec - int(state.last_seen_ts))
     elapsed = min(elapsed, POLL_INTERVAL * 2)
+
     state.last_seen_ts = now_sec
 
-    is_anom, outside_ratio, mean_abs_err, mean_rel_err = detect_anomaly(
+    is_anom, outside_ratio, mean_abs_err, mean_rel_err, best_origin = detect_anomaly(
         state=state,
         ts_grid=ts_grid,
         ys_grid=ys_grid,
@@ -602,6 +820,8 @@ def maybe_update_state(
         state.status = BASELINE_STATUS_ANOMALY
         state.clean_seconds = 0
 
+        BASELINE_STATES[key] = state
+
         logger.warning(
             "检测到异常，冻结模板 key=%s outside_ratio=%.2f mean_abs_err=%.2f mean_rel_err=%.2f",
             key,
@@ -610,47 +830,39 @@ def maybe_update_state(
             mean_rel_err,
         )
 
-        BASELINE_STATES[key] = state
         return state, True, outside_ratio, mean_abs_err, mean_rel_err
 
-    recovered, inside_ratio = is_recovered(
-        state=state,
-        ts_grid=ts_grid,
-        ys_grid=ys_grid,
-        abs_threshold=abs_threshold,
-        rel_threshold=rel_threshold,
-    )
+    old_origin = int(state.phase_origin_ts)
+    state.phase_origin_ts = int(best_origin)
+
+    if abs(state.phase_origin_ts - old_origin) >= 1:
+        logger.debug(
+            "相位校正 key=%s origin %s -> %s",
+            key,
+            datetime.fromtimestamp(old_origin).strftime("%H:%M:%S"),
+            datetime.fromtimestamp(state.phase_origin_ts).strftime("%H:%M:%S"),
+        )
 
     if state.status == BASELINE_STATUS_ANOMALY:
-        if recovered:
-            state.status = BASELINE_STATUS_RECOVERING
-            state.clean_seconds = elapsed
-            logger.info(
-                "异常开始恢复 key=%s inside_ratio=%.2f clean_seconds=%ss",
-                key,
-                inside_ratio,
-                state.clean_seconds,
-            )
-        else:
-            state.clean_seconds = 0
-            BASELINE_STATES[key] = state
-            return state, True, outside_ratio, mean_abs_err, mean_rel_err
+        state.status = BASELINE_STATUS_RECOVERING
+        state.clean_seconds = elapsed
 
-    elif state.status == BASELINE_STATUS_RECOVERING:
-        if recovered:
-            state.clean_seconds += elapsed
-        else:
-            state.status = BASELINE_STATUS_ANOMALY
-            state.clean_seconds = 0
-            BASELINE_STATES[key] = state
-            return state, True, outside_ratio, mean_abs_err, mean_rel_err
+        BASELINE_STATES[key] = state
 
+        logger.info(
+            "异常开始恢复 key=%s clean_seconds=%ss",
+            key,
+            state.clean_seconds,
+        )
+
+        return state, False, outside_ratio, mean_abs_err, mean_rel_err
+
+    if state.status == BASELINE_STATUS_RECOVERING:
+        state.clean_seconds += elapsed
     else:
         state.status = BASELINE_STATUS_HEALTHY
         state.clean_seconds += elapsed
 
-    # 故障恢复后，不要立刻学习。
-    # 必须至少连续正常：max(RECOVERY_MIN_SECONDS, 3 个周期)
     min_clean_for_update = max(
         RECOVERY_MIN_SECONDS,
         int(state.period) * MIN_FULL_CYCLES_FOR_TEMPLATE,
@@ -660,30 +872,26 @@ def maybe_update_state(
         BASELINE_STATES[key] = state
         return state, False, outside_ratio, mean_abs_err, mean_rel_err
 
-    # 只使用最近 clean_seconds 这段连续正常数据来更新模板，避免历史故障污染。
-    new_period = estimate_period(ys_grid)
     tail_seconds = min(
         int(state.clean_seconds),
-        int(new_period) * MAX_CYCLES_FOR_TEMPLATE,
+        int(state.period) * MAX_CYCLES_FOR_TEMPLATE,
     )
 
-    new_template = build_phase_template(
+    baseline = build_current_baseline(
         ts_grid=ts_grid,
         ys_grid=ys_grid,
-        period=new_period,
-        max_cycles=MAX_CYCLES_FOR_TEMPLATE,
         tail_seconds=tail_seconds,
     )
 
-    if new_template is None:
+    if baseline is None:
         BASELINE_STATES[key] = state
         return state, False, outside_ratio, mean_abs_err, mean_rel_err
 
+    new_period, new_origin, new_template = baseline
     old_template = np.array(state.template, dtype=float)
 
     if state.status == BASELINE_STATUS_RECOVERING:
         alpha = RECOVERY_EMA_ALPHA
-        state.status = BASELINE_STATUS_HEALTHY
     else:
         alpha = HEALTHY_EMA_ALPHA
 
@@ -694,18 +902,25 @@ def maybe_update_state(
     )
 
     state.period = int(new_period)
+    state.phase_origin_ts = int(new_origin)
     state.template = merged.astype(float).tolist()
+    state.status = BASELINE_STATUS_HEALTHY
     state.last_update_ts = now_sec
-    state.y_min = float(np.min(ys_grid[-tail_seconds:]))
-    state.y_max = float(np.max(ys_grid[-tail_seconds:]))
+
+    if tail_seconds > 0 and len(ys_grid) >= tail_seconds:
+        state.y_min = float(np.min(ys_grid[-tail_seconds:]))
+        state.y_max = float(np.max(ys_grid[-tail_seconds:]))
+    else:
+        state.y_min = float(np.min(ys_grid))
+        state.y_max = float(np.max(ys_grid))
 
     BASELINE_STATES[key] = state
 
     logger.info(
-        "更新健康模板 key=%s period=%ss status=%s clean_seconds=%ss alpha=%.2f",
+        "更新健康模板 key=%s period=%ss origin=%s clean=%ss alpha=%.2f",
         key,
         state.period,
-        state.status,
+        datetime.fromtimestamp(state.phase_origin_ts).strftime("%H:%M:%S"),
         state.clean_seconds,
         alpha,
     )
@@ -713,7 +928,9 @@ def maybe_update_state(
     return state, False, outside_ratio, mean_abs_err, mean_rel_err
 
 
-# ── Prometheus 格式写入 ───────────────────────────────────────────────────────
+# =============================================================================
+# Prometheus Exposition 写入
+# =============================================================================
 
 def prom_escape_label_value(value: str) -> str:
     return (
@@ -731,8 +948,7 @@ def labels_to_str(labels: Dict[str, str]) -> str:
     parts = []
 
     for k in sorted(labels.keys()):
-        v = prom_escape_label_value(labels[k])
-        parts.append(f'{k}="{v}"')
+        parts.append(f'{k}="{prom_escape_label_value(labels[k])}"')
 
     return "{" + ",".join(parts) + "}"
 
@@ -771,11 +987,14 @@ def write_series(
         resp = requests.post(
             f"{VM_URL}/api/v1/import/prometheus",
             data=payload.encode("utf-8"),
-            headers={"Content-Type": "text/plain; version=0.0.4; charset=utf-8"},
+            headers={
+                "Content-Type": "text/plain; version=0.0.4; charset=utf-8",
+            },
             timeout=10,
         )
         resp.raise_for_status()
         return True
+
     except requests.RequestException as e:
         logger.error("写入数据失败 metric=%s: %s", metric_name, e)
         return False
@@ -851,28 +1070,37 @@ def write_prediction_bundle(
     return ok1 and ok2 and ok3 and ok4 and ok5 and ok6 and ok7
 
 
-# ── 标签解析 ──────────────────────────────────────────────────────────────────
+# =============================================================================
+# 标签解析
+# =============================================================================
 
 _LABEL_PATTERN = re.compile(
     r'\s*([a-zA-Z_][a-zA-Z0-9_]*)\s*=\s*"((?:\\.|[^"])*)"\s*'
 )
 
 
-def _parse_labels(query: str) -> Dict[str, str]:
+def parse_labels_from_query(query: str) -> Dict[str, str]:
     labels = {}
 
     if "{" not in query or "}" not in query:
         return labels
 
     try:
-        label_part = query[query.index("{") + 1: query.rindex("}")]
+        label_part = query[query.index("{") + 1:query.rindex("}")]
     except Exception:
         return labels
 
     for match in _LABEL_PATTERN.finditer(label_part):
         key = match.group(1)
         value = match.group(2)
-        value = value.replace('\\"', '"').replace("\\n", "\n").replace("\\\\", "\\")
+
+        value = (
+            value
+            .replace('\\"', '"')
+            .replace("\\n", "\n")
+            .replace("\\\\", "\\")
+        )
+
         labels[key] = value
 
     return labels
@@ -892,9 +1120,11 @@ def series_key(metric_name: str, labels: Dict[str, str]) -> str:
     return metric_name + labels_to_str(labels)
 
 
-# ── 状态持久化 ────────────────────────────────────────────────────────────────
+# =============================================================================
+# 状态持久化
+# =============================================================================
 
-def load_state():
+def load_state() -> None:
     global BASELINE_STATES
 
     if not os.path.exists(STATE_FILE):
@@ -907,17 +1137,36 @@ def load_state():
         states = {}
 
         for key, value in raw.get("baseline_states", {}).items():
+            required_fields = {
+                "period",
+                "phase_origin_ts",
+                "template",
+                "status",
+                "clean_seconds",
+                "last_update_ts",
+                "last_seen_ts",
+                "y_min",
+                "y_max",
+            }
+
+            if not required_fields.issubset(set(value.keys())):
+                continue
+
             states[key] = BaselineState(**value)
 
         BASELINE_STATES = states
 
-        logger.info("已加载预测状态文件 %s，状态数量=%d", STATE_FILE, len(BASELINE_STATES))
+        logger.info(
+            "已加载预测状态文件 %s，状态数量=%d",
+            STATE_FILE,
+            len(BASELINE_STATES),
+        )
 
     except Exception as e:
         logger.warning("加载预测状态文件失败，将重新学习: %s", e)
 
 
-def save_state():
+def save_state() -> None:
     try:
         raw = {
             "baseline_states": {
@@ -937,9 +1186,11 @@ def save_state():
         logger.warning("保存预测状态文件失败: %s", e)
 
 
-# ── 主逻辑 ────────────────────────────────────────────────────────────────────
+# =============================================================================
+# 主流程
+# =============================================================================
 
-def run_once():
+def run_once() -> None:
     now_str = datetime.now().strftime("%H:%M:%S")
 
     for target in PREDICT_TARGETS:
@@ -952,16 +1203,26 @@ def run_once():
         ts, ys = fetch_history(query)
 
         if len(ys) < MIN_POINTS:
-            logger.info("[%s] %s 数据不足（%d 点），跳过", now_str, query, len(ys))
+            logger.info(
+                "[%s] %s 数据不足（%d 点），跳过",
+                now_str,
+                query,
+                len(ys),
+            )
             continue
 
         ts_grid, ys_grid = normalize_history(ts, ys)
 
         if len(ys_grid) < MIN_POINTS:
-            logger.info("[%s] %s 清洗后数据不足（%d 点），跳过", now_str, query, len(ys_grid))
+            logger.info(
+                "[%s] %s 清洗后数据不足（%d 点），跳过",
+                now_str,
+                query,
+                len(ys_grid),
+            )
             continue
 
-        base_labels = _parse_labels(query)
+        base_labels = parse_labels_from_query(query)
         write_labels = merge_labels(base_labels, EXTRA_PREDICT_LABELS)
 
         key = series_key(pred_metric, write_labels)
@@ -975,7 +1236,11 @@ def run_once():
         )
 
         if state is None:
-            logger.info("[%s] %s 暂无可用健康模板，等待学习", now_str, query)
+            logger.info(
+                "[%s] %s 暂无可用健康模板，等待学习",
+                now_str,
+                query,
+            )
             continue
 
         now_sec = int(time.time())
@@ -989,7 +1254,7 @@ def run_once():
             for i in range(WRITE_HORIZON_SECONDS)
         ]
 
-        pred_values = predict_by_state(state, ts_future)
+        pred_values = predict_with_origin(state, ts_future)
 
         lower_values, upper_values = calc_bounds(
             pred=pred_values,
@@ -1012,21 +1277,28 @@ def run_once():
         )
 
         if not ok:
+            logger.error(
+                "[%s] %s 写入预测数据失败",
+                now_str,
+                query,
+            )
             continue
 
         LAST_WRITTEN_UNTIL[key] = int(max(ts_future))
 
         future_start = datetime.fromtimestamp(ts_future[0]).strftime("%H:%M:%S")
         future_end = datetime.fromtimestamp(ts_future[-1]).strftime("%H:%M:%S")
+        origin_str = datetime.fromtimestamp(state.phase_origin_ts).strftime("%H:%M:%S")
 
         logger.info(
-            "[%s] %-40s → %-35s status=%s anomaly=%s period=%ss clean=%ss 写入 %d 点，预测区间 %s ~ %s",
+            "[%s] %-40s → %-35s status=%s anomaly=%s period=%ss origin=%s clean=%ss 写入 %d 点，预测区间 %s ~ %s",
             now_str,
             query,
             pred_metric,
             state.status,
             is_anomaly,
             state.period,
+            origin_str,
             state.clean_seconds,
             len(ts_future),
             future_start,
@@ -1036,17 +1308,18 @@ def run_once():
     save_state()
 
 
-def main():
+def main() -> None:
     load_state()
 
     logger.info(
-        "预测服务启动 VM=%s 历史窗口=%dmin 理论预测窗口=%ds 实际写入窗口=%ds 轮询间隔=%ds state=%s",
+        "预测服务启动 VM=%s 历史窗口=%dmin 理论预测窗口=%ds 实际写入窗口=%ds 轮询间隔=%ds state=%s forecast=%s",
         VM_URL,
         HISTORY_MINUTES,
         HORIZON_SECONDS,
         WRITE_HORIZON_SECONDS,
         POLL_INTERVAL,
         STATE_FILE,
+        EXTRA_PREDICT_LABELS["forecast"],
     )
 
     while True:
@@ -1055,4 +1328,6 @@ def main():
 
 
 if __name__ == "__main__":
-    main()
\ No newline at end of file
+    main()
+
+    
\ No newline at end of file

From 76e536eff9beb9e3f9db7453bbeb6f1c7844c198 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E5=BC=A0=E5=B0=91=E6=99=BA?=
 <zhangshaozhi@bailongma-inc.com>
Date: Thu, 21 May 2026 13:55:08 +0800
Subject: [PATCH 24/55] fix

---
 ai/predict_v3_single_scene.py | 316 ++++++++++++----------------------
 1 file changed, 110 insertions(+), 206 deletions(-)

diff --git a/ai/predict_v3_single_scene.py b/ai/predict_v3_single_scene.py
index fc07f4f..2cde8b8 100755
--- a/ai/predict_v3_single_scene.py
+++ b/ai/predict_v3_single_scene.py
@@ -1,22 +1,12 @@
 # -*- coding: utf-8 -*-
 """
-ProtoForge Predictor v8
-
-功能：
-1. 从 VictoriaMetrics 拉取历史数据。
-2. 对 CNC 周期型指标进行相位对齐预测。
-3. 使用“谷底锚点”对齐周期，减少上升沿/下降沿相位偏差。
-4. 每轮只写入未来 min(HORIZON_SECONDS, POLL_INTERVAL) 秒，避免预测窗口重叠。
-5. 检测异常后冻结健康模板，不把故障数据学进去。
-6. 故障恢复后等待稳定一段时间，再恢复模板更新。
-7. 写入：
-   - xxx_predicted
-   - xxx_predicted_upper
-   - xxx_predicted_lower
-   - xxx_anomaly
-   - xxx_anomaly_outside_ratio
-   - xxx_anomaly_mean_abs_error
-   - xxx_anomaly_mean_rel_error
+ProtoForge Predictor v9
+
+修复重点：
+1. 预测时间轴改为锚定最后一个真实数据点 last_real_ts，而不是锚定 time.time()。
+2. 不再使用 LAST_WRITTEN_UNTIL 把预测不断推向更远未来，避免 Grafana 里预测线相对真实线出现延迟/错位。
+3. 如果真实数据时间戳没有推进，则跳过本轮预测写入，避免重复写同一段未来时间造成毛刺。
+4. 保留：相位对齐、健康模板冻结、故障期不学习、恢复后再学习、预测上下界、异常指标。
 """
 
 import json
@@ -50,14 +40,13 @@
 # =============================================================================
 
 VM_URL = "http://localhost:8428"
-
-STATE_FILE = "/tmp/protoforge_predictor_state_v8.json"
+STATE_FILE = "/tmp/protoforge_predictor_state_v9.json"
 
 HISTORY_MINUTES = 30
 HORIZON_SECONDS = 120
 POLL_INTERVAL = 30
 
-# 实际写入窗口不要大于轮询间隔，否则多轮预测会重叠。
+# 实际每轮写入的预测长度。不要大于 POLL_INTERVAL，否则容易出现预测窗口重叠。
 WRITE_HORIZON_SECONDS = min(HORIZON_SECONDS, POLL_INTERVAL)
 
 QUERY_STEP = "1s"
@@ -76,14 +65,23 @@
 RECOVERY_EMA_ALPHA = 0.30
 
 OUTSIDE_RATIO_THRESHOLD = 0.60
-RECOVERY_INSIDE_RATIO_THRESHOLD = 0.80
-
 PHASE_SEARCH_RATIO = 0.15
 VALLEY_QUANTILE = 45
 
+# 关键修复：预测时间轴锚定真实数据最后一个点。
+# True：预测从 last_real_ts + 1 开始，适合 Grafana 与真实曲线对齐展示。
+# False：预测从当前系统时间 + 1 开始，适合只看纯未来预测，但容易与有采集延迟的真实数据错位。
+ALIGN_PREDICTION_TO_LAST_REAL_TS = True
+
+# 如果 last_real_ts 距离当前系统时间太久，说明采集链路可能断了，跳过预测，避免用陈旧数据继续画未来线。
+MAX_DATA_LAG_SECONDS = 180
+
+# 真实数据至少推进多少秒，才写入新预测，避免同一段未来时间被反复写入。
+MIN_REAL_ADVANCE_SECONDS = 1
+
 
 # =============================================================================
-# 预测指标配置
+# 指标配置
 # =============================================================================
 
 PREDICT_TARGETS = [
@@ -132,7 +130,7 @@
 ]
 
 EXTRA_PREDICT_LABELS = {
-    "forecast": "phase_aligned_health_v8",
+    "forecast": "phase_aligned_health_v9",
     "source": "protoforge",
 }
 
@@ -159,7 +157,10 @@ class BaselineState:
 
 
 BASELINE_STATES: Dict[str, BaselineState] = {}
-LAST_WRITTEN_UNTIL: Dict[str, int] = {}
+
+# 记录每条序列最后一次使用的真实数据时间戳，而不是预测写到哪里。
+# 这样不会把预测不断推向更远的未来。
+LAST_REAL_TS_WRITTEN: Dict[str, int] = {}
 
 
 # =============================================================================
@@ -243,7 +244,6 @@ def normalize_history(ts: List[float], ys: List[float]) -> Tuple[np.ndarray, np.
         return np.array([]), np.array([])
 
     sorted_items = sorted(data.items(), key=lambda x: x[0])
-
     ts_clean = np.array([x[0] for x in sorted_items], dtype=float)
     ys_clean = np.array([x[1] for x in sorted_items], dtype=float)
 
@@ -263,7 +263,7 @@ def normalize_history(ts: List[float], ys: List[float]) -> Tuple[np.ndarray, np.
 
 
 # =============================================================================
-# 周期估计
+# 周期估计与谷底检测
 # =============================================================================
 
 def moving_average(arr: np.ndarray, window: int) -> np.ndarray:
@@ -355,10 +355,6 @@ def estimate_period_rough(ys_arr: np.ndarray) -> int:
     return int(period)
 
 
-# =============================================================================
-# 谷底锚点检测
-# =============================================================================
-
 def find_valley_indices(
     ts_grid: np.ndarray,
     ys_grid: np.ndarray,
@@ -370,7 +366,6 @@ def find_valley_indices(
         return []
 
     period = max(3, int(expected_period))
-
     smooth_window = max(3, int(round(period * 0.08)))
     smooth_window = min(smooth_window, 21)
 
@@ -389,7 +384,6 @@ def find_valley_indices(
 
     if len(candidates) < MIN_FULL_CYCLES_FOR_TEMPLATE:
         candidates = []
-
         for i in range(1, n - 1):
             if ys_smooth[i] <= ys_smooth[i - 1] and ys_smooth[i] < ys_smooth[i + 1]:
                 candidates.append(i)
@@ -458,7 +452,7 @@ def detect_period_and_valleys(
 
 
 # =============================================================================
-# 相位对齐模板构建
+# 相位对齐模板
 # =============================================================================
 
 def build_template_from_valleys(
@@ -552,7 +546,7 @@ def build_current_baseline(
 
 
 # =============================================================================
-# 模板预测
+# 预测与模板合并
 # =============================================================================
 
 def circular_template_value(template: np.ndarray, phase: float) -> float:
@@ -562,10 +556,8 @@ def circular_template_value(template: np.ndarray, phase: float) -> float:
         return 0.0
 
     phase = float(phase) % period
-
     i0 = int(math.floor(phase)) % period
     i1 = (i0 + 1) % period
-
     frac = phase - math.floor(phase)
 
     return float((1.0 - frac) * template[i0] + frac * template[i1])
@@ -583,7 +575,6 @@ def predict_with_origin(
         return np.zeros(len(ts_list), dtype=float)
 
     origin = int(state.phase_origin_ts if phase_origin_ts is None else phase_origin_ts)
-
     values = []
 
     for ts in ts_list:
@@ -611,10 +602,7 @@ def resample_template(old_template: np.ndarray, new_period: int) -> np.ndarray:
     return np.interp(new_x, old_x_ext, old_y_ext).astype(float)
 
 
-def align_new_template_to_old(
-    old_template: np.ndarray,
-    new_template: np.ndarray,
-) -> np.ndarray:
+def align_new_template_to_old(old_template: np.ndarray, new_template: np.ndarray) -> np.ndarray:
     if len(old_template) != len(new_template):
         old_template = resample_template(old_template, len(new_template))
 
@@ -624,7 +612,6 @@ def align_new_template_to_old(
         return new_template.astype(float)
 
     max_shift = max(1, int(round(period * 0.10)))
-
     old_norm = old_template - np.mean(old_template)
 
     best_score = None
@@ -633,7 +620,6 @@ def align_new_template_to_old(
     for shift in range(-max_shift, max_shift + 1):
         shifted = np.roll(new_template, shift)
         shifted_norm = shifted - np.mean(shifted)
-
         score = float(np.dot(old_norm, shifted_norm))
 
         if best_score is None or score > best_score:
@@ -643,18 +629,13 @@ def align_new_template_to_old(
     return best_template.astype(float)
 
 
-def merge_template(
-    old_template: np.ndarray,
-    new_template: np.ndarray,
-    alpha: float,
-) -> np.ndarray:
+def merge_template(old_template: np.ndarray, new_template: np.ndarray, alpha: float) -> np.ndarray:
     alpha = float(np.clip(alpha, 0.0, 1.0))
 
     if len(old_template) != len(new_template):
         old_template = resample_template(old_template, len(new_template))
 
     new_template = align_new_template_to_old(old_template, new_template)
-
     merged = (1.0 - alpha) * old_template + alpha * new_template
 
     return merged.astype(float)
@@ -664,11 +645,7 @@ def merge_template(
 # 异常检测
 # =============================================================================
 
-def calc_threshold(
-    pred: np.ndarray,
-    abs_threshold: float,
-    rel_threshold: float,
-) -> np.ndarray:
+def calc_threshold(pred: np.ndarray, abs_threshold: float, rel_threshold: float) -> np.ndarray:
     return np.maximum(abs_threshold, np.abs(pred) * rel_threshold)
 
 
@@ -678,11 +655,7 @@ def calc_bounds(
     rel_threshold: float,
 ) -> Tuple[np.ndarray, np.ndarray]:
     threshold = calc_threshold(pred, abs_threshold, rel_threshold)
-
-    lower = pred - threshold
-    upper = pred + threshold
-
-    return lower, upper
+    return pred - threshold, pred + threshold
 
 
 def find_best_phase_origin_for_recent(
@@ -692,7 +665,6 @@ def find_best_phase_origin_for_recent(
 ) -> Tuple[int, np.ndarray, float]:
     period = int(state.period)
     base_origin = int(state.phase_origin_ts)
-
     max_shift = max(1, int(round(period * PHASE_SEARCH_RATIO)))
 
     best_origin = base_origin
@@ -732,14 +704,12 @@ def detect_anomaly(
     )
 
     threshold = calc_threshold(pred, abs_threshold, rel_threshold)
-
     abs_err = np.abs(actual - pred)
     outside = abs_err > threshold
 
     outside_ratio = float(np.mean(outside))
     mean_abs_err = float(np.mean(abs_err))
     mean_rel_err = float(np.mean(abs_err / np.maximum(np.abs(pred), 1.0)))
-
     is_anomaly = outside_ratio >= OUTSIDE_RATIO_THRESHOLD
 
     return is_anomaly, outside_ratio, mean_abs_err, mean_rel_err, int(best_origin)
@@ -749,11 +719,7 @@ def detect_anomaly(
 # 健康基线状态管理
 # =============================================================================
 
-def create_initial_state(
-    ts_grid: np.ndarray,
-    ys_grid: np.ndarray,
-    now_sec: int,
-) -> Optional[BaselineState]:
+def create_initial_state(ts_grid: np.ndarray, ys_grid: np.ndarray, now_sec: int) -> Optional[BaselineState]:
     baseline = build_current_baseline(ts_grid, ys_grid)
 
     if baseline is None:
@@ -782,7 +748,6 @@ def maybe_update_state(
     rel_threshold: float,
 ) -> Tuple[Optional[BaselineState], bool, float, float, float]:
     now_sec = int(time.time())
-
     state = BASELINE_STATES.get(key)
 
     if state is None:
@@ -805,7 +770,6 @@ def maybe_update_state(
 
     elapsed = max(1, now_sec - int(state.last_seen_ts))
     elapsed = min(elapsed, POLL_INTERVAL * 2)
-
     state.last_seen_ts = now_sec
 
     is_anom, outside_ratio, mean_abs_err, mean_rel_err, best_origin = detect_anomaly(
@@ -819,7 +783,6 @@ def maybe_update_state(
     if is_anom:
         state.status = BASELINE_STATUS_ANOMALY
         state.clean_seconds = 0
-
         BASELINE_STATES[key] = state
 
         logger.warning(
@@ -846,15 +809,9 @@ def maybe_update_state(
     if state.status == BASELINE_STATUS_ANOMALY:
         state.status = BASELINE_STATUS_RECOVERING
         state.clean_seconds = elapsed
-
         BASELINE_STATES[key] = state
 
-        logger.info(
-            "异常开始恢复 key=%s clean_seconds=%ss",
-            key,
-            state.clean_seconds,
-        )
-
+        logger.info("异常开始恢复 key=%s clean_seconds=%ss", key, state.clean_seconds)
         return state, False, outside_ratio, mean_abs_err, mean_rel_err
 
     if state.status == BASELINE_STATUS_RECOVERING:
@@ -877,11 +834,7 @@ def maybe_update_state(
         int(state.period) * MAX_CYCLES_FOR_TEMPLATE,
     )
 
-    baseline = build_current_baseline(
-        ts_grid=ts_grid,
-        ys_grid=ys_grid,
-        tail_seconds=tail_seconds,
-    )
+    baseline = build_current_baseline(ts_grid=ts_grid, ys_grid=ys_grid, tail_seconds=tail_seconds)
 
     if baseline is None:
         BASELINE_STATES[key] = state
@@ -889,17 +842,9 @@ def maybe_update_state(
 
     new_period, new_origin, new_template = baseline
     old_template = np.array(state.template, dtype=float)
+    alpha = RECOVERY_EMA_ALPHA if state.status == BASELINE_STATUS_RECOVERING else HEALTHY_EMA_ALPHA
 
-    if state.status == BASELINE_STATUS_RECOVERING:
-        alpha = RECOVERY_EMA_ALPHA
-    else:
-        alpha = HEALTHY_EMA_ALPHA
-
-    merged = merge_template(
-        old_template=old_template,
-        new_template=new_template,
-        alpha=alpha,
-    )
+    merged = merge_template(old_template=old_template, new_template=new_template, alpha=alpha)
 
     state.period = int(new_period)
     state.phase_origin_ts = int(new_origin)
@@ -933,12 +878,7 @@ def maybe_update_state(
 # =============================================================================
 
 def prom_escape_label_value(value: str) -> str:
-    return (
-        str(value)
-        .replace("\\", "\\\\")
-        .replace("\n", "\\n")
-        .replace('"', '\\"')
-    )
+    return str(value).replace("\\", "\\\\").replace("\n", "\\n").replace('"', '\\"')
 
 
 def labels_to_str(labels: Dict[str, str]) -> str:
@@ -975,8 +915,7 @@ def write_series(
         if not math.isfinite(ts_sec) or not math.isfinite(val):
             continue
 
-        ts_ms = ts_sec * 1000
-        lines.append(f"{metric_name}{label_str} {val:.6f} {ts_ms}")
+        lines.append(f"{metric_name}{label_str} {val:.6f} {ts_sec * 1000}")
 
     if not lines:
         return False
@@ -987,9 +926,7 @@ def write_series(
         resp = requests.post(
             f"{VM_URL}/api/v1/import/prometheus",
             data=payload.encode("utf-8"),
-            headers={
-                "Content-Type": "text/plain; version=0.0.4; charset=utf-8",
-            },
+            headers={"Content-Type": "text/plain; version=0.0.4; charset=utf-8"},
             timeout=10,
         )
         resp.raise_for_status()
@@ -1012,60 +949,19 @@ def write_prediction_bundle(
     outside_ratio: float,
     mean_abs_err: float,
     mean_rel_err: float,
+    event_ts: int,
 ) -> bool:
-    ok1 = write_series(
-        metric_name=pred_metric,
-        labels=labels,
-        ts_list=ts_future,
-        values=pred_values.astype(float).tolist(),
-    )
-
-    ok2 = write_series(
-        metric_name=f"{pred_metric}_lower",
-        labels=labels,
-        ts_list=ts_future,
-        values=lower_values.astype(float).tolist(),
-    )
-
-    ok3 = write_series(
-        metric_name=f"{pred_metric}_upper",
-        labels=labels,
-        ts_list=ts_future,
-        values=upper_values.astype(float).tolist(),
-    )
-
-    now_sec = int(time.time())
+    ok1 = write_series(pred_metric, labels, ts_future, pred_values.astype(float).tolist())
+    ok2 = write_series(f"{pred_metric}_lower", labels, ts_future, lower_values.astype(float).tolist())
+    ok3 = write_series(f"{pred_metric}_upper", labels, ts_future, upper_values.astype(float).tolist())
 
     anomaly_labels = dict(labels)
     anomaly_labels["type"] = "prediction_deviation"
 
-    ok4 = write_series(
-        metric_name=anomaly_metric,
-        labels=anomaly_labels,
-        ts_list=[now_sec],
-        values=[1.0 if is_anomaly else 0.0],
-    )
-
-    ok5 = write_series(
-        metric_name=f"{anomaly_metric}_outside_ratio",
-        labels=anomaly_labels,
-        ts_list=[now_sec],
-        values=[outside_ratio],
-    )
-
-    ok6 = write_series(
-        metric_name=f"{anomaly_metric}_mean_abs_error",
-        labels=anomaly_labels,
-        ts_list=[now_sec],
-        values=[mean_abs_err],
-    )
-
-    ok7 = write_series(
-        metric_name=f"{anomaly_metric}_mean_rel_error",
-        labels=anomaly_labels,
-        ts_list=[now_sec],
-        values=[mean_rel_err],
-    )
+    ok4 = write_series(anomaly_metric, anomaly_labels, [event_ts], [1.0 if is_anomaly else 0.0])
+    ok5 = write_series(f"{anomaly_metric}_outside_ratio", anomaly_labels, [event_ts], [outside_ratio])
+    ok6 = write_series(f"{anomaly_metric}_mean_abs_error", anomaly_labels, [event_ts], [mean_abs_err])
+    ok7 = write_series(f"{anomaly_metric}_mean_rel_error", anomaly_labels, [event_ts], [mean_rel_err])
 
     return ok1 and ok2 and ok3 and ok4 and ok5 and ok6 and ok7
 
@@ -1074,9 +970,7 @@ def write_prediction_bundle(
 # 标签解析
 # =============================================================================
 
-_LABEL_PATTERN = re.compile(
-    r'\s*([a-zA-Z_][a-zA-Z0-9_]*)\s*=\s*"((?:\\.|[^"])*)"\s*'
-)
+_LABEL_PATTERN = re.compile(r'\s*([a-zA-Z_][a-zA-Z0-9_]*)\s*=\s*"((?:\\.|[^"])*)"\s*')
 
 
 def parse_labels_from_query(query: str) -> Dict[str, str]:
@@ -1093,14 +987,7 @@ def parse_labels_from_query(query: str) -> Dict[str, str]:
     for match in _LABEL_PATTERN.finditer(label_part):
         key = match.group(1)
         value = match.group(2)
-
-        value = (
-            value
-            .replace('\\"', '"')
-            .replace("\\n", "\n")
-            .replace("\\\\", "\\")
-        )
-
+        value = value.replace('\\"', '"').replace("\\n", "\n").replace("\\\\", "\\")
         labels[key] = value
 
     return labels
@@ -1155,12 +1042,7 @@ def load_state() -> None:
             states[key] = BaselineState(**value)
 
         BASELINE_STATES = states
-
-        logger.info(
-            "已加载预测状态文件 %s，状态数量=%d",
-            STATE_FILE,
-            len(BASELINE_STATES),
-        )
+        logger.info("已加载预测状态文件 %s，状态数量=%d", STATE_FILE, len(BASELINE_STATES))
 
     except Exception as e:
         logger.warning("加载预测状态文件失败，将重新学习: %s", e)
@@ -1186,6 +1068,44 @@ def save_state() -> None:
         logger.warning("保存预测状态文件失败: %s", e)
 
 
+# =============================================================================
+# 时间轴选择
+# =============================================================================
+
+def build_prediction_timestamps(key: str, last_real_ts: int, now_sec: int) -> Optional[List[int]]:
+    data_lag = now_sec - last_real_ts
+
+    if data_lag > MAX_DATA_LAG_SECONDS:
+        logger.warning(
+            "真实数据延迟过大，跳过预测 key=%s data_lag=%ss max=%ss",
+            key,
+            data_lag,
+            MAX_DATA_LAG_SECONDS,
+        )
+        return None
+
+    last_written_real_ts = LAST_REAL_TS_WRITTEN.get(key)
+
+    if last_written_real_ts is not None:
+        advance = last_real_ts - int(last_written_real_ts)
+
+        if advance < MIN_REAL_ADVANCE_SECONDS:
+            logger.info(
+                "真实数据时间戳未推进，跳过重复写入 key=%s last_real_ts=%s last_written_real_ts=%s",
+                key,
+                last_real_ts,
+                last_written_real_ts,
+            )
+            return None
+
+    if ALIGN_PREDICTION_TO_LAST_REAL_TS:
+        base_ts = last_real_ts
+    else:
+        base_ts = now_sec
+
+    return [base_ts + i + 1 for i in range(WRITE_HORIZON_SECONDS)]
+
+
 # =============================================================================
 # 主流程
 # =============================================================================
@@ -1203,28 +1123,17 @@ def run_once() -> None:
         ts, ys = fetch_history(query)
 
         if len(ys) < MIN_POINTS:
-            logger.info(
-                "[%s] %s 数据不足（%d 点），跳过",
-                now_str,
-                query,
-                len(ys),
-            )
+            logger.info("[%s] %s 数据不足（%d 点），跳过", now_str, query, len(ys))
             continue
 
         ts_grid, ys_grid = normalize_history(ts, ys)
 
         if len(ys_grid) < MIN_POINTS:
-            logger.info(
-                "[%s] %s 清洗后数据不足（%d 点），跳过",
-                now_str,
-                query,
-                len(ys_grid),
-            )
+            logger.info("[%s] %s 清洗后数据不足（%d 点），跳过", now_str, query, len(ys_grid))
             continue
 
         base_labels = parse_labels_from_query(query)
         write_labels = merge_labels(base_labels, EXTRA_PREDICT_LABELS)
-
         key = series_key(pred_metric, write_labels)
 
         state, is_anomaly, outside_ratio, mean_abs_err, mean_rel_err = maybe_update_state(
@@ -1236,26 +1145,23 @@ def run_once() -> None:
         )
 
         if state is None:
-            logger.info(
-                "[%s] %s 暂无可用健康模板，等待学习",
-                now_str,
-                query,
-            )
+            logger.info("[%s] %s 暂无可用健康模板，等待学习", now_str, query)
             continue
 
         now_sec = int(time.time())
-        last_until = LAST_WRITTEN_UNTIL.get(key, 0)
         last_real_ts = int(ts_grid[-1])
+        data_lag = now_sec - last_real_ts
 
-        base_ts = max(now_sec, last_until, last_real_ts)
+        ts_future = build_prediction_timestamps(
+            key=key,
+            last_real_ts=last_real_ts,
+            now_sec=now_sec,
+        )
 
-        ts_future = [
-            base_ts + i + 1
-            for i in range(WRITE_HORIZON_SECONDS)
-        ]
+        if not ts_future:
+            continue
 
         pred_values = predict_with_origin(state, ts_future)
-
         lower_values, upper_values = calc_bounds(
             pred=pred_values,
             abs_threshold=abs_threshold,
@@ -1274,24 +1180,22 @@ def run_once() -> None:
             outside_ratio=outside_ratio,
             mean_abs_err=mean_abs_err,
             mean_rel_err=mean_rel_err,
+            event_ts=last_real_ts,
         )
 
         if not ok:
-            logger.error(
-                "[%s] %s 写入预测数据失败",
-                now_str,
-                query,
-            )
+            logger.error("[%s] %s 写入预测数据失败", now_str, query)
             continue
 
-        LAST_WRITTEN_UNTIL[key] = int(max(ts_future))
+        LAST_REAL_TS_WRITTEN[key] = last_real_ts
 
         future_start = datetime.fromtimestamp(ts_future[0]).strftime("%H:%M:%S")
         future_end = datetime.fromtimestamp(ts_future[-1]).strftime("%H:%M:%S")
+        last_real_str = datetime.fromtimestamp(last_real_ts).strftime("%H:%M:%S")
         origin_str = datetime.fromtimestamp(state.phase_origin_ts).strftime("%H:%M:%S")
 
         logger.info(
-            "[%s] %-40s → %-35s status=%s anomaly=%s period=%ss origin=%s clean=%ss 写入 %d 点，预测区间 %s ~ %s",
+            "[%s] %-40s → %-35s status=%s anomaly=%s period=%ss origin=%s last_real=%s lag=%ss 写入 %d 点，预测区间 %s ~ %s",
             now_str,
             query,
             pred_metric,
@@ -1299,7 +1203,8 @@ def run_once() -> None:
             is_anomaly,
             state.period,
             origin_str,
-            state.clean_seconds,
+            last_real_str,
+            data_lag,
             len(ts_future),
             future_start,
             future_end,
@@ -1312,7 +1217,7 @@ def main() -> None:
     load_state()
 
     logger.info(
-        "预测服务启动 VM=%s 历史窗口=%dmin 理论预测窗口=%ds 实际写入窗口=%ds 轮询间隔=%ds state=%s forecast=%s",
+        "预测服务启动 VM=%s 历史窗口=%dmin 理论预测窗口=%ds 实际写入窗口=%ds 轮询间隔=%ds state=%s forecast=%s align_to_last_real=%s",
         VM_URL,
         HISTORY_MINUTES,
         HORIZON_SECONDS,
@@ -1320,6 +1225,7 @@ def main() -> None:
         POLL_INTERVAL,
         STATE_FILE,
         EXTRA_PREDICT_LABELS["forecast"],
+        ALIGN_PREDICTION_TO_LAST_REAL_TS,
     )
 
     while True:
@@ -1329,5 +1235,3 @@ def main() -> None:
 
 if __name__ == "__main__":
     main()
-
-    
\ No newline at end of file

From f5e7b2a27320066b5eb9157f297d6651adcd36c8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E5=BC=A0=E5=B0=91=E6=99=BA?=
 <zhangshaozhi@bailongma-inc.com>
Date: Thu, 21 May 2026 14:05:54 +0800
Subject: [PATCH 25/55] fix

---
 ai/predict_v3_single_scene.py | 520 +++++++++++++++++++++++++---------
 1 file changed, 385 insertions(+), 135 deletions(-)

diff --git a/ai/predict_v3_single_scene.py b/ai/predict_v3_single_scene.py
index 2cde8b8..d212d2d 100755
--- a/ai/predict_v3_single_scene.py
+++ b/ai/predict_v3_single_scene.py
@@ -1,12 +1,14 @@
 # -*- coding: utf-8 -*-
 """
-ProtoForge Predictor v9
+ProtoForge Predictor v10
 
 修复重点：
-1. 预测时间轴改为锚定最后一个真实数据点 last_real_ts，而不是锚定 time.time()。
-2. 不再使用 LAST_WRITTEN_UNTIL 把预测不断推向更远未来，避免 Grafana 里预测线相对真实线出现延迟/错位。
-3. 如果真实数据时间戳没有推进，则跳过本轮预测写入，避免重复写同一段未来时间造成毛刺。
-4. 保留：相位对齐、健康模板冻结、故障期不学习、恢复后再学习、预测上下界、异常指标。
+1. 修复 lag=0 但预测线仍然相位漂移的问题。
+2. 在谷底相位对齐基础上，增加 phase-lock 相位锁定。
+3. 每轮使用最近 1~2 个周期真实数据，搜索最佳 period + phase_origin。
+4. 预测起点仍然锚定最后一个真实点 last_real_ts，避免写入延迟。
+5. 保留健康模板冻结逻辑：异常期间不学习故障数据。
+6. 保留预测上下界和异常指标。
 """
 
 import json
@@ -40,13 +42,12 @@
 # =============================================================================
 
 VM_URL = "http://localhost:8428"
-STATE_FILE = "/tmp/protoforge_predictor_state_v9.json"
+STATE_FILE = "/tmp/protoforge_predictor_state_v10.json"
 
 HISTORY_MINUTES = 30
 HORIZON_SECONDS = 120
 POLL_INTERVAL = 30
 
-# 实际每轮写入的预测长度。不要大于 POLL_INTERVAL，否则容易出现预测窗口重叠。
 WRITE_HORIZON_SECONDS = min(HORIZON_SECONDS, POLL_INTERVAL)
 
 QUERY_STEP = "1s"
@@ -58,26 +59,29 @@
 MIN_FULL_CYCLES_FOR_TEMPLATE = 3
 MAX_CYCLES_FOR_TEMPLATE = 6
 
-DETECT_WINDOW_SECONDS = 15
+DETECT_WINDOW_SECONDS = 20
 RECOVERY_MIN_SECONDS = 60
 
-HEALTHY_EMA_ALPHA = 0.12
-RECOVERY_EMA_ALPHA = 0.30
+HEALTHY_EMA_ALPHA = 0.10
+RECOVERY_EMA_ALPHA = 0.25
 
 OUTSIDE_RATIO_THRESHOLD = 0.60
-PHASE_SEARCH_RATIO = 0.15
+
 VALLEY_QUANTILE = 45
 
-# 关键修复：预测时间轴锚定真实数据最后一个点。
-# True：预测从 last_real_ts + 1 开始，适合 Grafana 与真实曲线对齐展示。
-# False：预测从当前系统时间 + 1 开始，适合只看纯未来预测，但容易与有采集延迟的真实数据错位。
-ALIGN_PREDICTION_TO_LAST_REAL_TS = True
+# phase-lock 配置
+PHASE_LOCK_MIN_WINDOW_SECONDS = 45
+PHASE_LOCK_MAX_WINDOW_SECONDS = 180
+PHASE_LOCK_PERIOD_SEARCH_RATIO = 0.12
+PHASE_LOCK_ORIGIN_SEARCH_RATIO = 0.35
+PHASE_LOCK_PERIOD_STEP = 1
+PHASE_LOCK_ORIGIN_STEP = 1
 
-# 如果 last_real_ts 距离当前系统时间太久，说明采集链路可能断了，跳过预测，避免用陈旧数据继续画未来线。
+# 真实数据延迟超过这个值，就不继续预测
 MAX_DATA_LAG_SECONDS = 180
 
-# 真实数据至少推进多少秒，才写入新预测，避免同一段未来时间被反复写入。
-MIN_REAL_ADVANCE_SECONDS = 1
+# 预测锚定最后一个真实点
+ALIGN_PREDICTION_TO_LAST_REAL_TS = True
 
 
 # =============================================================================
@@ -130,7 +134,7 @@
 ]
 
 EXTRA_PREDICT_LABELS = {
-    "forecast": "phase_aligned_health_v9",
+    "forecast": "phase_locked_health_v10",
     "source": "protoforge",
 }
 
@@ -157,9 +161,6 @@ class BaselineState:
 
 
 BASELINE_STATES: Dict[str, BaselineState] = {}
-
-# 记录每条序列最后一次使用的真实数据时间戳，而不是预测写到哪里。
-# 这样不会把预测不断推向更远的未来。
 LAST_REAL_TS_WRITTEN: Dict[str, int] = {}
 
 
@@ -197,8 +198,6 @@ def fetch_history(query: str, minutes: int = HISTORY_MINUTES) -> Tuple[List[floa
         return [], []
 
     values = result[0].get("values", [])
-    if not values:
-        return [], []
 
     ts = []
     ys = []
@@ -244,6 +243,7 @@ def normalize_history(ts: List[float], ys: List[float]) -> Tuple[np.ndarray, np.
         return np.array([]), np.array([])
 
     sorted_items = sorted(data.items(), key=lambda x: x[0])
+
     ts_clean = np.array([x[0] for x in sorted_items], dtype=float)
     ys_clean = np.array([x[1] for x in sorted_items], dtype=float)
 
@@ -263,7 +263,7 @@ def normalize_history(ts: List[float], ys: List[float]) -> Tuple[np.ndarray, np.
 
 
 # =============================================================================
-# 周期估计与谷底检测
+# 周期估计
 # =============================================================================
 
 def moving_average(arr: np.ndarray, window: int) -> np.ndarray:
@@ -355,6 +355,10 @@ def estimate_period_rough(ys_arr: np.ndarray) -> int:
     return int(period)
 
 
+# =============================================================================
+# 谷底检测与模板构建
+# =============================================================================
+
 def find_valley_indices(
     ts_grid: np.ndarray,
     ys_grid: np.ndarray,
@@ -366,6 +370,7 @@ def find_valley_indices(
         return []
 
     period = max(3, int(expected_period))
+
     smooth_window = max(3, int(round(period * 0.08)))
     smooth_window = min(smooth_window, 21)
 
@@ -384,6 +389,7 @@ def find_valley_indices(
 
     if len(candidates) < MIN_FULL_CYCLES_FOR_TEMPLATE:
         candidates = []
+
         for i in range(1, n - 1):
             if ys_smooth[i] <= ys_smooth[i - 1] and ys_smooth[i] < ys_smooth[i + 1]:
                 candidates.append(i)
@@ -451,10 +457,6 @@ def detect_period_and_valleys(
     return int(period), valleys
 
 
-# =============================================================================
-# 相位对齐模板
-# =============================================================================
-
 def build_template_from_valleys(
     ts_grid: np.ndarray,
     ys_grid: np.ndarray,
@@ -546,7 +548,7 @@ def build_current_baseline(
 
 
 # =============================================================================
-# 预测与模板合并
+# 模板预测与重采样
 # =============================================================================
 
 def circular_template_value(template: np.ndarray, phase: float) -> float:
@@ -556,6 +558,7 @@ def circular_template_value(template: np.ndarray, phase: float) -> float:
         return 0.0
 
     phase = float(phase) % period
+
     i0 = int(math.floor(phase)) % period
     i1 = (i0 + 1) % period
     frac = phase - math.floor(phase)
@@ -563,46 +566,77 @@ def circular_template_value(template: np.ndarray, phase: float) -> float:
     return float((1.0 - frac) * template[i0] + frac * template[i1])
 
 
-def predict_with_origin(
-    state: BaselineState,
+def resample_template(old_template: np.ndarray, new_period: int) -> np.ndarray:
+    old_period = len(old_template)
+
+    if old_period == new_period:
+        return old_template.astype(float)
+
+    if old_period <= 1 or new_period <= 1:
+        return np.full(new_period, float(np.mean(old_template)), dtype=float)
+
+    old_x = np.linspace(0.0, 1.0, old_period, endpoint=False)
+    new_x = np.linspace(0.0, 1.0, new_period, endpoint=False)
+
+    old_x_ext = np.concatenate([old_x - 1.0, old_x, old_x + 1.0])
+    old_y_ext = np.concatenate([old_template, old_template, old_template])
+
+    return np.interp(new_x, old_x_ext, old_y_ext).astype(float)
+
+
+def predict_template_values(
+    template: np.ndarray,
+    period: int,
+    phase_origin_ts: int,
     ts_list: List[int],
-    phase_origin_ts: Optional[int] = None,
 ) -> np.ndarray:
-    template = np.array(state.template, dtype=float)
-    period = int(state.period)
-
-    if period <= 1 or len(template) != period:
+    if period <= 1:
         return np.zeros(len(ts_list), dtype=float)
 
-    origin = int(state.phase_origin_ts if phase_origin_ts is None else phase_origin_ts)
+    if len(template) != period:
+        template = resample_template(template, period)
+
     values = []
 
     for ts in ts_list:
-        phase = (int(ts) - origin) % period
+        phase = (int(ts) - int(phase_origin_ts)) % period
         values.append(circular_template_value(template, phase))
 
     return np.array(values, dtype=float)
 
 
-def resample_template(old_template: np.ndarray, new_period: int) -> np.ndarray:
-    old_period = len(old_template)
+def predict_with_state(state: BaselineState, ts_list: List[int]) -> np.ndarray:
+    template = np.array(state.template, dtype=float)
 
-    if old_period == new_period:
-        return old_template.astype(float)
+    return predict_template_values(
+        template=template,
+        period=int(state.period),
+        phase_origin_ts=int(state.phase_origin_ts),
+        ts_list=ts_list,
+    )
 
-    if old_period <= 1 or new_period <= 1:
-        return np.full(new_period, float(np.mean(old_template)), dtype=float)
 
-    old_x = np.linspace(0.0, 1.0, old_period, endpoint=False)
-    new_x = np.linspace(0.0, 1.0, new_period, endpoint=False)
+def normalize_origin_near(origin: int, period: int, near_ts: int) -> int:
+    if period <= 1:
+        return origin
 
-    old_x_ext = np.concatenate([old_x - 1.0, old_x, old_x + 1.0])
-    old_y_ext = np.concatenate([old_template, old_template, old_template])
+    origin = int(origin)
+    period = int(period)
+    near_ts = int(near_ts)
 
-    return np.interp(new_x, old_x_ext, old_y_ext).astype(float)
+    while origin + period <= near_ts:
+        origin += period
 
+    while origin > near_ts:
+        origin -= period
 
-def align_new_template_to_old(old_template: np.ndarray, new_template: np.ndarray) -> np.ndarray:
+    return origin
+
+
+def align_new_template_to_old(
+    old_template: np.ndarray,
+    new_template: np.ndarray,
+) -> np.ndarray:
     if len(old_template) != len(new_template):
         old_template = resample_template(old_template, len(new_template))
 
@@ -629,23 +663,117 @@ def align_new_template_to_old(old_template: np.ndarray, new_template: np.ndarray
     return best_template.astype(float)
 
 
-def merge_template(old_template: np.ndarray, new_template: np.ndarray, alpha: float) -> np.ndarray:
+def merge_template(
+    old_template: np.ndarray,
+    new_template: np.ndarray,
+    alpha: float,
+) -> np.ndarray:
     alpha = float(np.clip(alpha, 0.0, 1.0))
 
     if len(old_template) != len(new_template):
         old_template = resample_template(old_template, len(new_template))
 
     new_template = align_new_template_to_old(old_template, new_template)
+
     merged = (1.0 - alpha) * old_template + alpha * new_template
 
     return merged.astype(float)
 
 
+# =============================================================================
+# Phase Lock
+# =============================================================================
+
+def phase_lock_recent(
+    state: BaselineState,
+    ts_grid: np.ndarray,
+    ys_grid: np.ndarray,
+) -> Tuple[int, int, np.ndarray, float]:
+    base_period = int(state.period)
+    base_origin = int(state.phase_origin_ts)
+    base_template = np.array(state.template, dtype=float)
+
+    if base_period <= 1 or len(base_template) <= 1:
+        ts_recent = ts_grid[-DETECT_WINDOW_SECONDS:].astype(int).tolist()
+        pred = predict_with_state(state, ts_recent)
+        actual = ys_grid[-len(ts_recent):].astype(float)
+        mae = float(np.mean(np.abs(actual - pred))) if len(actual) else 0.0
+        return base_period, base_origin, pred, mae
+
+    window_seconds = max(
+        PHASE_LOCK_MIN_WINDOW_SECONDS,
+        min(PHASE_LOCK_MAX_WINDOW_SECONDS, int(base_period * 2)),
+    )
+
+    cutoff = ts_grid[-1] - window_seconds
+    mask = ts_grid >= cutoff
+
+    ts_recent_arr = ts_grid[mask].astype(int)
+    actual = ys_grid[mask].astype(float)
+
+    if len(ts_recent_arr) < max(10, DETECT_WINDOW_SECONDS):
+        ts_recent_arr = ts_grid[-DETECT_WINDOW_SECONDS:].astype(int)
+        actual = ys_grid[-DETECT_WINDOW_SECONDS:].astype(float)
+
+    ts_recent = ts_recent_arr.tolist()
+    last_ts = int(ts_recent[-1])
+
+    p_min = max(int(MIN_PERIOD_SECONDS), int(round(base_period * (1.0 - PHASE_LOCK_PERIOD_SEARCH_RATIO))))
+    p_max = min(int(MAX_PERIOD_SECONDS), int(round(base_period * (1.0 + PHASE_LOCK_PERIOD_SEARCH_RATIO))))
+
+    if p_max < p_min:
+        p_min = p_max = base_period
+
+    best_period = base_period
+    best_origin = normalize_origin_near(base_origin, base_period, last_ts)
+    best_template = resample_template(base_template, best_period)
+    best_pred = predict_template_values(best_template, best_period, best_origin, ts_recent)
+    best_mae = float(np.mean(np.abs(actual - best_pred)))
+
+    for period in range(p_min, p_max + 1, PHASE_LOCK_PERIOD_STEP):
+        template = resample_template(base_template, period)
+        center_origin = normalize_origin_near(base_origin, period, last_ts)
+
+        origin_shift = max(2, int(round(period * PHASE_LOCK_ORIGIN_SEARCH_RATIO)))
+
+        for shift in range(-origin_shift, origin_shift + 1, PHASE_LOCK_ORIGIN_STEP):
+            origin = center_origin + shift
+
+            pred = predict_template_values(
+                template=template,
+                period=period,
+                phase_origin_ts=origin,
+                ts_list=ts_recent,
+            )
+
+            mae = float(np.mean(np.abs(actual - pred)))
+
+            # 轻微惩罚周期变化，避免过拟合抖动
+            penalty = abs(period - base_period) * 0.5
+            score = mae + penalty
+
+            best_score = best_mae + abs(best_period - base_period) * 0.5
+
+            if score < best_score:
+                best_period = period
+                best_origin = origin
+                best_pred = pred
+                best_mae = mae
+
+    best_origin = normalize_origin_near(best_origin, best_period, last_ts)
+
+    return int(best_period), int(best_origin), best_pred, float(best_mae)
+
+
 # =============================================================================
 # 异常检测
 # =============================================================================
 
-def calc_threshold(pred: np.ndarray, abs_threshold: float, rel_threshold: float) -> np.ndarray:
+def calc_threshold(
+    pred: np.ndarray,
+    abs_threshold: float,
+    rel_threshold: float,
+) -> np.ndarray:
     return np.maximum(abs_threshold, np.abs(pred) * rel_threshold)
 
 
@@ -655,33 +783,8 @@ def calc_bounds(
     rel_threshold: float,
 ) -> Tuple[np.ndarray, np.ndarray]:
     threshold = calc_threshold(pred, abs_threshold, rel_threshold)
-    return pred - threshold, pred + threshold
-
-
-def find_best_phase_origin_for_recent(
-    state: BaselineState,
-    ts_recent: List[int],
-    actual: np.ndarray,
-) -> Tuple[int, np.ndarray, float]:
-    period = int(state.period)
-    base_origin = int(state.phase_origin_ts)
-    max_shift = max(1, int(round(period * PHASE_SEARCH_RATIO)))
-
-    best_origin = base_origin
-    best_pred = predict_with_origin(state, ts_recent, base_origin)
-    best_mae = float(np.mean(np.abs(actual - best_pred)))
-
-    for shift in range(-max_shift, max_shift + 1):
-        origin = base_origin + shift
-        pred = predict_with_origin(state, ts_recent, origin)
-        mae = float(np.mean(np.abs(actual - pred)))
 
-        if mae < best_mae:
-            best_mae = mae
-            best_origin = origin
-            best_pred = pred
-
-    return best_origin, best_pred, best_mae
+    return pred - threshold, pred + threshold
 
 
 def detect_anomaly(
@@ -690,36 +793,50 @@ def detect_anomaly(
     ys_grid: np.ndarray,
     abs_threshold: float,
     rel_threshold: float,
-) -> Tuple[bool, float, float, float, int]:
-    if len(ys_grid) < DETECT_WINDOW_SECONDS:
-        return False, 0.0, 0.0, 0.0, int(state.phase_origin_ts)
-
-    ts_recent = ts_grid[-DETECT_WINDOW_SECONDS:].astype(int).tolist()
-    actual = ys_grid[-DETECT_WINDOW_SECONDS:].astype(float)
-
-    best_origin, pred, _ = find_best_phase_origin_for_recent(
+) -> Tuple[bool, float, float, float, int, int]:
+    best_period, best_origin, pred_recent, _ = phase_lock_recent(
         state=state,
-        ts_recent=ts_recent,
-        actual=actual,
+        ts_grid=ts_grid,
+        ys_grid=ys_grid,
     )
 
-    threshold = calc_threshold(pred, abs_threshold, rel_threshold)
-    abs_err = np.abs(actual - pred)
+    recent_len = len(pred_recent)
+
+    if recent_len <= 0:
+        return False, 0.0, 0.0, 0.0, best_period, best_origin
+
+    actual = ys_grid[-recent_len:].astype(float)
+
+    threshold = calc_threshold(pred_recent, abs_threshold, rel_threshold)
+
+    abs_err = np.abs(actual - pred_recent)
     outside = abs_err > threshold
 
     outside_ratio = float(np.mean(outside))
     mean_abs_err = float(np.mean(abs_err))
-    mean_rel_err = float(np.mean(abs_err / np.maximum(np.abs(pred), 1.0)))
+    mean_rel_err = float(np.mean(abs_err / np.maximum(np.abs(pred_recent), 1.0)))
+
     is_anomaly = outside_ratio >= OUTSIDE_RATIO_THRESHOLD
 
-    return is_anomaly, outside_ratio, mean_abs_err, mean_rel_err, int(best_origin)
+    return (
+        is_anomaly,
+        outside_ratio,
+        mean_abs_err,
+        mean_rel_err,
+        int(best_period),
+        int(best_origin),
+    )
 
 
 # =============================================================================
 # 健康基线状态管理
 # =============================================================================
 
-def create_initial_state(ts_grid: np.ndarray, ys_grid: np.ndarray, now_sec: int) -> Optional[BaselineState]:
+def create_initial_state(
+    ts_grid: np.ndarray,
+    ys_grid: np.ndarray,
+    now_sec: int,
+) -> Optional[BaselineState]:
     baseline = build_current_baseline(ts_grid, ys_grid)
 
     if baseline is None:
@@ -740,6 +857,26 @@ def create_initial_state(ts_grid: np.ndarray, ys_grid: np.ndarray, now_sec: int)
     )
 
 
+def apply_phase_lock_to_state(
+    state: BaselineState,
+    best_period: int,
+    best_origin: int,
+) -> None:
+    best_period = int(best_period)
+
+    if best_period <= 1:
+        return
+
+    template = np.array(state.template, dtype=float)
+
+    if len(template) != best_period:
+        template = resample_template(template, best_period)
+
+    state.period = best_period
+    state.phase_origin_ts = int(best_origin)
+    state.template = template.astype(float).tolist()
+
+
 def maybe_update_state(
     key: str,
     ts_grid: np.ndarray,
@@ -772,7 +909,14 @@ def maybe_update_state(
     elapsed = min(elapsed, POLL_INTERVAL * 2)
     state.last_seen_ts = now_sec
 
-    is_anom, outside_ratio, mean_abs_err, mean_rel_err, best_origin = detect_anomaly(
+    (
+        is_anomaly,
+        outside_ratio,
+        mean_abs_err,
+        mean_rel_err,
+        best_period,
+        best_origin,
+    ) = detect_anomaly(
         state=state,
         ts_grid=ts_grid,
         ys_grid=ys_grid,
@@ -780,9 +924,10 @@ def maybe_update_state(
         rel_threshold=rel_threshold,
     )
 
-    if is_anom:
+    if is_anomaly:
         state.status = BASELINE_STATUS_ANOMALY
         state.clean_seconds = 0
+
         BASELINE_STATES[key] = state
 
         logger.warning(
@@ -795,13 +940,17 @@ def maybe_update_state(
 
         return state, True, outside_ratio, mean_abs_err, mean_rel_err
 
+    old_period = int(state.period)
     old_origin = int(state.phase_origin_ts)
-    state.phase_origin_ts = int(best_origin)
 
-    if abs(state.phase_origin_ts - old_origin) >= 1:
-        logger.debug(
-            "相位校正 key=%s origin %s -> %s",
+    apply_phase_lock_to_state(state, best_period, best_origin)
+
+    if old_period != state.period or old_origin != state.phase_origin_ts:
+        logger.info(
+            "phase-lock key=%s period %s -> %s origin %s -> %s",
             key,
+            old_period,
+            state.period,
             datetime.fromtimestamp(old_origin).strftime("%H:%M:%S"),
             datetime.fromtimestamp(state.phase_origin_ts).strftime("%H:%M:%S"),
         )
@@ -809,9 +958,15 @@ def maybe_update_state(
     if state.status == BASELINE_STATUS_ANOMALY:
         state.status = BASELINE_STATUS_RECOVERING
         state.clean_seconds = elapsed
+
         BASELINE_STATES[key] = state
 
-        logger.info("异常开始恢复 key=%s clean_seconds=%ss", key, state.clean_seconds)
+        logger.info(
+            "异常开始恢复 key=%s clean_seconds=%ss",
+            key,
+            state.clean_seconds,
+        )
+
         return state, False, outside_ratio, mean_abs_err, mean_rel_err
 
     if state.status == BASELINE_STATUS_RECOVERING:
@@ -834,17 +989,27 @@ def maybe_update_state(
         int(state.period) * MAX_CYCLES_FOR_TEMPLATE,
     )
 
-    baseline = build_current_baseline(ts_grid=ts_grid, ys_grid=ys_grid, tail_seconds=tail_seconds)
+    baseline = build_current_baseline(
+        ts_grid=ts_grid,
+        ys_grid=ys_grid,
+        tail_seconds=tail_seconds,
+    )
 
     if baseline is None:
         BASELINE_STATES[key] = state
         return state, False, outside_ratio, mean_abs_err, mean_rel_err
 
     new_period, new_origin, new_template = baseline
+
     old_template = np.array(state.template, dtype=float)
+
     alpha = RECOVERY_EMA_ALPHA if state.status == BASELINE_STATUS_RECOVERING else HEALTHY_EMA_ALPHA
 
-    merged = merge_template(old_template=old_template, new_template=new_template, alpha=alpha)
+    merged = merge_template(
+        old_template=old_template,
+        new_template=new_template,
+        alpha=alpha,
+    )
 
     state.period = int(new_period)
     state.phase_origin_ts = int(new_origin)
@@ -878,7 +1043,12 @@ def maybe_update_state(
 # =============================================================================
 
 def prom_escape_label_value(value: str) -> str:
-    return str(value).replace("\\", "\\\\").replace("\n", "\\n").replace('"', '\\"')
+    return (
+        str(value)
+        .replace("\\", "\\\\")
+        .replace("\n", "\\n")
+        .replace('"', '\\"')
+    )
 
 
 def labels_to_str(labels: Dict[str, str]) -> str:
@@ -926,7 +1096,9 @@ def write_series(
         resp = requests.post(
             f"{VM_URL}/api/v1/import/prometheus",
             data=payload.encode("utf-8"),
-            headers={"Content-Type": "text/plain; version=0.0.4; charset=utf-8"},
+            headers={
+                "Content-Type": "text/plain; version=0.0.4; charset=utf-8",
+            },
             timeout=10,
         )
         resp.raise_for_status()
@@ -951,17 +1123,57 @@ def write_prediction_bundle(
     mean_rel_err: float,
     event_ts: int,
 ) -> bool:
-    ok1 = write_series(pred_metric, labels, ts_future, pred_values.astype(float).tolist())
-    ok2 = write_series(f"{pred_metric}_lower", labels, ts_future, lower_values.astype(float).tolist())
-    ok3 = write_series(f"{pred_metric}_upper", labels, ts_future, upper_values.astype(float).tolist())
+    ok1 = write_series(
+        metric_name=pred_metric,
+        labels=labels,
+        ts_list=ts_future,
+        values=pred_values.astype(float).tolist(),
+    )
+
+    ok2 = write_series(
+        metric_name=f"{pred_metric}_lower",
+        labels=labels,
+        ts_list=ts_future,
+        values=lower_values.astype(float).tolist(),
+    )
+
+    ok3 = write_series(
+        metric_name=f"{pred_metric}_upper",
+        labels=labels,
+        ts_list=ts_future,
+        values=upper_values.astype(float).tolist(),
+    )
 
     anomaly_labels = dict(labels)
     anomaly_labels["type"] = "prediction_deviation"
 
-    ok4 = write_series(anomaly_metric, anomaly_labels, [event_ts], [1.0 if is_anomaly else 0.0])
-    ok5 = write_series(f"{anomaly_metric}_outside_ratio", anomaly_labels, [event_ts], [outside_ratio])
-    ok6 = write_series(f"{anomaly_metric}_mean_abs_error", anomaly_labels, [event_ts], [mean_abs_err])
-    ok7 = write_series(f"{anomaly_metric}_mean_rel_error", anomaly_labels, [event_ts], [mean_rel_err])
+    ok4 = write_series(
+        metric_name=anomaly_metric,
+        labels=anomaly_labels,
+        ts_list=[event_ts],
+        values=[1.0 if is_anomaly else 0.0],
+    )
+
+    ok5 = write_series(
+        metric_name=f"{anomaly_metric}_outside_ratio",
+        labels=anomaly_labels,
+        ts_list=[event_ts],
+        values=[outside_ratio],
+    )
+
+    ok6 = write_series(
+        metric_name=f"{anomaly_metric}_mean_abs_error",
+        labels=anomaly_labels,
+        ts_list=[event_ts],
+        values=[mean_abs_err],
+    )
+
+    ok7 = write_series(
+        metric_name=f"{anomaly_metric}_mean_rel_error",
+        labels=anomaly_labels,
+        ts_list=[event_ts],
+        values=[mean_rel_err],
+    )
 
     return ok1 and ok2 and ok3 and ok4 and ok5 and ok6 and ok7
 
@@ -970,7 +1182,9 @@ def write_prediction_bundle(
 # 标签解析
 # =============================================================================
 
-_LABEL_PATTERN = re.compile(r'\s*([a-zA-Z_][a-zA-Z0-9_]*)\s*=\s*"((?:\\.|[^"])*)"\s*')
+_LABEL_PATTERN = re.compile(
+    r'\s*([a-zA-Z_][a-zA-Z0-9_]*)\s*=\s*"((?:\\.|[^"])*)"\s*'
+)
 
 
 def parse_labels_from_query(query: str) -> Dict[str, str]:
@@ -987,7 +1201,14 @@ def parse_labels_from_query(query: str) -> Dict[str, str]:
     for match in _LABEL_PATTERN.finditer(label_part):
         key = match.group(1)
         value = match.group(2)
-        value = value.replace('\\"', '"').replace("\\n", "\n").replace("\\\\", "\\")
+
+        value = (
+            value
+            .replace('\\"', '"')
+            .replace("\\n", "\n")
+            .replace("\\\\", "\\")
+        )
+
         labels[key] = value
 
     return labels
@@ -1042,7 +1263,12 @@ def load_state() -> None:
             states[key] = BaselineState(**value)
 
         BASELINE_STATES = states
-        logger.info("已加载预测状态文件 %s，状态数量=%d", STATE_FILE, len(BASELINE_STATES))
+
+        logger.info(
+            "已加载预测状态文件 %s，状态数量=%d",
+            STATE_FILE,
+            len(BASELINE_STATES),
+        )
 
     except Exception as e:
         logger.warning("加载预测状态文件失败，将重新学习: %s", e)
@@ -1069,10 +1295,14 @@ def save_state() -> None:
 
 
 # =============================================================================
-# 时间轴选择
+# 时间轴
 # =============================================================================
 
-def build_prediction_timestamps(key: str, last_real_ts: int, now_sec: int) -> Optional[List[int]]:
+def build_prediction_timestamps(
+    key: str,
+    last_real_ts: int,
+    now_sec: int,
+) -> Optional[List[int]]:
     data_lag = now_sec - last_real_ts
 
     if data_lag > MAX_DATA_LAG_SECONDS:
@@ -1086,24 +1316,24 @@ def build_prediction_timestamps(key: str, last_real_ts: int, now_sec: int) -> Op
 
     last_written_real_ts = LAST_REAL_TS_WRITTEN.get(key)
 
-    if last_written_real_ts is not None:
-        advance = last_real_ts - int(last_written_real_ts)
-
-        if advance < MIN_REAL_ADVANCE_SECONDS:
-            logger.info(
-                "真实数据时间戳未推进，跳过重复写入 key=%s last_real_ts=%s last_written_real_ts=%s",
-                key,
-                last_real_ts,
-                last_written_real_ts,
-            )
-            return None
+    if last_written_real_ts is not None and last_real_ts <= int(last_written_real_ts):
+        logger.info(
+            "真实数据时间戳未推进，跳过重复写入 key=%s last_real_ts=%s last_written_real_ts=%s",
+            key,
+            last_real_ts,
+            last_written_real_ts,
+        )
+        return None
 
     if ALIGN_PREDICTION_TO_LAST_REAL_TS:
         base_ts = last_real_ts
     else:
         base_ts = now_sec
 
-    return [base_ts + i + 1 for i in range(WRITE_HORIZON_SECONDS)]
+    return [
+        base_ts + i + 1
+        for i in range(WRITE_HORIZON_SECONDS)
+    ]
 
 
 # =============================================================================
@@ -1123,17 +1353,28 @@ def run_once() -> None:
         ts, ys = fetch_history(query)
 
         if len(ys) < MIN_POINTS:
-            logger.info("[%s] %s 数据不足（%d 点），跳过", now_str, query, len(ys))
+            logger.info(
+                "[%s] %s 数据不足（%d 点），跳过",
+                now_str,
+                query,
+                len(ys),
+            )
             continue
 
         ts_grid, ys_grid = normalize_history(ts, ys)
 
         if len(ys_grid) < MIN_POINTS:
-            logger.info("[%s] %s 清洗后数据不足（%d 点），跳过", now_str, query, len(ys_grid))
+            logger.info(
+                "[%s] %s 清洗后数据不足（%d 点），跳过",
+                now_str,
+                query,
+                len(ys_grid),
+            )
             continue
 
         base_labels = parse_labels_from_query(query)
         write_labels = merge_labels(base_labels, EXTRA_PREDICT_LABELS)
+
         key = series_key(pred_metric, write_labels)
 
         state, is_anomaly, outside_ratio, mean_abs_err, mean_rel_err = maybe_update_state(
@@ -1145,7 +1386,11 @@ def run_once() -> None:
         )
 
         if state is None:
-            logger.info("[%s] %s 暂无可用健康模板，等待学习", now_str, query)
+            logger.info(
+                "[%s] %s 暂无可用健康模板，等待学习",
+                now_str,
+                query,
+            )
             continue
 
         now_sec = int(time.time())
@@ -1161,7 +1406,8 @@ def run_once() -> None:
         if not ts_future:
             continue
 
-        pred_values = predict_with_origin(state, ts_future)
+        pred_values = predict_with_state(state, ts_future)
+
         lower_values, upper_values = calc_bounds(
             pred=pred_values,
             abs_threshold=abs_threshold,
@@ -1184,7 +1430,11 @@ def run_once() -> None:
         )
 
         if not ok:
-            logger.error("[%s] %s 写入预测数据失败", now_str, query)
+            logger.error(
+                "[%s] %s 写入预测数据失败",
+                now_str,
+                query,
+            )
             continue
 
         LAST_REAL_TS_WRITTEN[key] = last_real_ts
@@ -1234,4 +1484,4 @@ def main() -> None:
 
 
 if __name__ == "__main__":
-    main()
+    main()
\ No newline at end of file

From 79e9f9b080e3f5fc4a284ab1422d3a54aacaff0e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E5=BC=A0=E5=B0=91=E6=99=BA?=
 <zhangshaozhi@bailongma-inc.com>
Date: Thu, 21 May 2026 21:23:40 +0800
Subject: [PATCH 26/55] feat(pridict_v4): update pridict v4 version

---
 ai/pridict_v4.py | 1604 ++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 1604 insertions(+)
 create mode 100644 ai/pridict_v4.py

diff --git a/ai/pridict_v4.py b/ai/pridict_v4.py
new file mode 100644
index 0000000..774ad3a
--- /dev/null
+++ b/ai/pridict_v4.py
@@ -0,0 +1,1604 @@
+# -*- coding: utf-8 -*-
+"""
+ProtoForge Predictor v11
+
+核心能力：
+1. feed_rate / spindle_speed / spindle_current 使用 phase-lock 点预测。
+2. vibration_x / vibration_y / vibration_z 使用 phase-band 预测带。
+3. vibration 类指标不再追求单点完全贴合，而是输出：
+   - xxx_predicted        中位数预测线
+   - xxx_predicted_upper  正常上边界
+   - xxx_predicted_lower  正常下边界
+4. 预测起点锚定最后一个真实点 last_real_ts，避免时间错位。
+5. 异常期间冻结健康模板，不学习故障数据。
+6. 故障恢复后等待稳定，再恢复模板学习。
+"""
+
+import json
+import logging
+import math
+import os
+import re
+import time
+from dataclasses import asdict, dataclass
+from datetime import datetime, timedelta
+from typing import Dict, List, Optional, Tuple
+
+import numpy as np
+import requests
+
+
+# =============================================================================
+# 日志配置
+# =============================================================================
+
+logging.basicConfig(
+    level=logging.INFO,
+    format="%(asctime)s [%(levelname)s] %(message)s",
+)
+
+logger = logging.getLogger(__name__)
+
+
+# =============================================================================
+# 基础配置
+# =============================================================================
+
+VM_URL = "http://localhost:8428"
+STATE_FILE = "/tmp/protoforge_predictor_state_v11.json"
+
+HISTORY_MINUTES = 30
+HORIZON_SECONDS = 120
+POLL_INTERVAL = 30
+
+WRITE_HORIZON_SECONDS = min(HORIZON_SECONDS, POLL_INTERVAL)
+
+QUERY_STEP = "1s"
+MIN_POINTS = 120
+
+MIN_PERIOD_SECONDS = 5
+MAX_PERIOD_SECONDS = 3600
+
+MIN_FULL_CYCLES_FOR_TEMPLATE = 3
+MAX_CYCLES_FOR_TEMPLATE = 8
+
+DETECT_WINDOW_SECONDS = 20
+RECOVERY_MIN_SECONDS = 60
+
+HEALTHY_EMA_ALPHA = 0.10
+RECOVERY_EMA_ALPHA = 0.25
+
+OUTSIDE_RATIO_THRESHOLD = 0.60
+
+VALLEY_QUANTILE = 45
+
+MAX_DATA_LAG_SECONDS = 180
+
+PHASE_LOCK_MIN_WINDOW_SECONDS = 45
+PHASE_LOCK_MAX_WINDOW_SECONDS = 180
+PHASE_LOCK_PERIOD_SEARCH_RATIO = 0.12
+PHASE_LOCK_ORIGIN_SEARCH_RATIO = 0.35
+PHASE_LOCK_PERIOD_STEP = 1
+PHASE_LOCK_ORIGIN_STEP = 1
+
+
+# =============================================================================
+# 指标配置
+# =============================================================================
+
+PREDICT_TARGETS = [
+    {
+        "query": 'feed_rate{device_id="fanuc-cnc"}',
+        "pred_metric": "feed_rate_predicted",
+        "anomaly_metric": "feed_rate_anomaly",
+        "strategy": "phase_point",
+        "abs_threshold": 400.0,
+        "rel_threshold": 0.25,
+        "smooth_window": 1,
+    },
+    {
+        "query": 'spindle_speed{device_id="fanuc-cnc"}',
+        "pred_metric": "spindle_speed_predicted",
+        "anomaly_metric": "spindle_speed_anomaly",
+        "strategy": "phase_point",
+        "abs_threshold": 500.0,
+        "rel_threshold": 0.25,
+        "smooth_window": 1,
+    },
+    {
+        "query": 'spindle_current{device_id="fanuc-cnc"}',
+        "pred_metric": "spindle_current_predicted",
+        "anomaly_metric": "spindle_current_anomaly",
+        "strategy": "phase_point",
+        "abs_threshold": 5.0,
+        "rel_threshold": 0.25,
+        "smooth_window": 1,
+    },
+    {
+        "query": 'vibration_x{device_id="fanuc-cnc"}',
+        "pred_metric": "vibration_x_predicted",
+        "anomaly_metric": "vibration_x_anomaly",
+        "strategy": "phase_band",
+        "abs_threshold": 0.12,
+        "rel_threshold": 0.35,
+        "smooth_window": 5,
+        "band_low_q": 10,
+        "band_high_q": 90,
+        "band_pad_abs": 0.06,
+    },
+    {
+        "query": 'vibration_y{device_id="fanuc-cnc"}',
+        "pred_metric": "vibration_y_predicted",
+        "anomaly_metric": "vibration_y_anomaly",
+        "strategy": "phase_band",
+        "abs_threshold": 0.12,
+        "rel_threshold": 0.35,
+        "smooth_window": 5,
+        "band_low_q": 10,
+        "band_high_q": 90,
+        "band_pad_abs": 0.06,
+    },
+    {
+        "query": 'vibration_z{device_id="fanuc-cnc"}',
+        "pred_metric": "vibration_z_predicted",
+        "anomaly_metric": "vibration_z_anomaly",
+        "strategy": "phase_band",
+        "abs_threshold": 0.12,
+        "rel_threshold": 0.35,
+        "smooth_window": 5,
+        "band_low_q": 10,
+        "band_high_q": 90,
+        "band_pad_abs": 0.06,
+    },
+]
+
+EXTRA_PREDICT_LABELS = {
+    "forecast": "phase_band_health_v11",
+    "source": "protoforge",
+}
+
+BASELINE_STATUS_HEALTHY = "healthy"
+BASELINE_STATUS_ANOMALY = "anomaly"
+BASELINE_STATUS_RECOVERING = "recovering"
+
+
+# =============================================================================
+# 状态结构
+# =============================================================================
+
+@dataclass
+class BaselineState:
+    period: int
+    phase_origin_ts: int
+    template: List[float]
+    lower_template: List[float]
+    upper_template: List[float]
+    strategy: str
+    status: str
+    clean_seconds: int
+    last_update_ts: int
+    last_seen_ts: int
+    y_min: float
+    y_max: float
+
+
+BASELINE_STATES: Dict[str, BaselineState] = {}
+LAST_REAL_TS_WRITTEN: Dict[str, int] = {}
+
+
+# =============================================================================
+# VictoriaMetrics 读取
+# =============================================================================
+
+def fetch_history(query: str, minutes: int = HISTORY_MINUTES) -> Tuple[List[float], List[float]]:
+    now = datetime.now()
+    start = now - timedelta(minutes=minutes)
+
+    try:
+        resp = requests.get(
+            f"{VM_URL}/api/v1/query_range",
+            params={
+                "query": query,
+                "start": start.timestamp(),
+                "end": now.timestamp(),
+                "step": QUERY_STEP,
+            },
+            timeout=10,
+        )
+        resp.raise_for_status()
+    except requests.RequestException as e:
+        logger.error("拉取数据失败 query=%s: %s", query, e)
+        return [], []
+
+    try:
+        result = resp.json().get("data", {}).get("result", [])
+    except Exception as e:
+        logger.error("解析 VM 返回失败 query=%s: %s", query, e)
+        return [], []
+
+    if not result:
+        return [], []
+
+    values = result[0].get("values", [])
+
+    ts = []
+    ys = []
+
+    for item in values:
+        if len(item) < 2:
+            continue
+
+        try:
+            t = float(item[0])
+            y = float(item[1])
+        except Exception:
+            continue
+
+        if not math.isfinite(t) or not math.isfinite(y):
+            continue
+
+        ts.append(t)
+        ys.append(y)
+
+    return ts, ys
+
+
+def normalize_history(ts: List[float], ys: List[float]) -> Tuple[np.ndarray, np.ndarray]:
+    if not ts or not ys or len(ts) != len(ys):
+        return np.array([]), np.array([])
+
+    data = {}
+
+    for t, y in zip(ts, ys):
+        try:
+            sec = int(round(float(t)))
+            val = float(y)
+        except Exception:
+            continue
+
+        if not math.isfinite(sec) or not math.isfinite(val):
+            continue
+
+        data[sec] = val
+
+    if not data:
+        return np.array([]), np.array([])
+
+    sorted_items = sorted(data.items(), key=lambda x: x[0])
+
+    ts_clean = np.array([x[0] for x in sorted_items], dtype=float)
+    ys_clean = np.array([x[1] for x in sorted_items], dtype=float)
+
+    if len(ts_clean) < 2:
+        return ts_clean, ys_clean
+
+    start_sec = int(ts_clean[0])
+    end_sec = int(ts_clean[-1])
+
+    if end_sec <= start_sec:
+        return ts_clean, ys_clean
+
+    ts_grid = np.arange(start_sec, end_sec + 1, 1, dtype=float)
+    ys_grid = np.interp(ts_grid, ts_clean, ys_clean)
+
+    return ts_grid, ys_grid
+
+
+# =============================================================================
+# 平滑与预处理
+# =============================================================================
+
+def rolling_median(arr: np.ndarray, window: int) -> np.ndarray:
+    if window <= 1 or len(arr) < window:
+        return arr.astype(float)
+
+    if window % 2 == 0:
+        window += 1
+
+    pad = window // 2
+    padded = np.pad(arr.astype(float), (pad, pad), mode="edge")
+
+    result = []
+
+    for i in range(len(arr)):
+        result.append(float(np.median(padded[i:i + window])))
+
+    return np.array(result, dtype=float)
+
+
+def moving_average(arr: np.ndarray, window: int) -> np.ndarray:
+    if window <= 1 or len(arr) < window:
+        return arr.astype(float)
+
+    if window % 2 == 0:
+        window += 1
+
+    kernel = np.ones(window, dtype=float) / window
+    pad = window // 2
+    padded = np.pad(arr.astype(float), (pad, pad), mode="edge")
+
+    return np.convolve(padded, kernel, mode="valid")
+
+
+def preprocess_values(ys_grid: np.ndarray, target: Dict) -> np.ndarray:
+    strategy = target.get("strategy", "phase_point")
+    smooth_window = int(target.get("smooth_window", 1))
+
+    if strategy == "phase_band":
+        return rolling_median(ys_grid, smooth_window)
+
+    if smooth_window > 1:
+        return moving_average(ys_grid, smooth_window)
+
+    return ys_grid.astype(float)
+
+
+# =============================================================================
+# 周期估计
+# =============================================================================
+
+def estimate_period_by_fft(ys_arr: np.ndarray) -> float:
+    n = len(ys_arr)
+
+    if n < 8:
+        return 60.0
+
+    centered = ys_arr - np.mean(ys_arr)
+
+    if np.allclose(centered, 0):
+        return 60.0
+
+    fft_vals = np.fft.rfft(centered)
+    freqs = np.fft.rfftfreq(n, d=1.0)
+
+    if len(freqs) <= 1:
+        return 60.0
+
+    power = np.abs(fft_vals[1:])
+
+    if len(power) == 0 or np.max(power) <= 0:
+        return 60.0
+
+    dominant_idx = int(np.argmax(power)) + 1
+    dominant_freq = float(freqs[dominant_idx])
+
+    if dominant_freq <= 0:
+        return 60.0
+
+    period = 1.0 / dominant_freq
+
+    return float(np.clip(period, MIN_PERIOD_SECONDS, MAX_PERIOD_SECONDS))
+
+
+def refine_period_by_autocorr(ys_arr: np.ndarray, init_period: float) -> float:
+    n = len(ys_arr)
+
+    if n < 20:
+        return float(np.clip(init_period, MIN_PERIOD_SECONDS, MAX_PERIOD_SECONDS))
+
+    centered = ys_arr - np.mean(ys_arr)
+
+    if np.allclose(centered, 0):
+        return float(np.clip(init_period, MIN_PERIOD_SECONDS, MAX_PERIOD_SECONDS))
+
+    corr = np.correlate(centered, centered, mode="full")[n - 1:]
+
+    p0 = int(round(init_period))
+    left = max(int(MIN_PERIOD_SECONDS), int(max(2, p0 * 0.7)))
+    right = min(n // 2, int(max(left + 1, p0 * 1.3)))
+
+    if right <= left:
+        return float(np.clip(init_period, MIN_PERIOD_SECONDS, MAX_PERIOD_SECONDS))
+
+    search = corr[left:right + 1]
+
+    if len(search) == 0:
+        return float(np.clip(init_period, MIN_PERIOD_SECONDS, MAX_PERIOD_SECONDS))
+
+    best_lag = left + int(np.argmax(search))
+
+    return float(np.clip(best_lag, MIN_PERIOD_SECONDS, MAX_PERIOD_SECONDS))
+
+
+def estimate_period_rough(ys_arr: np.ndarray) -> int:
+    p_fft = estimate_period_by_fft(ys_arr)
+    p_refined = refine_period_by_autocorr(ys_arr, p_fft)
+
+    period = int(round(p_refined))
+    period = max(int(MIN_PERIOD_SECONDS), min(int(MAX_PERIOD_SECONDS), period))
+
+    return int(period)
+
+
+# =============================================================================
+# 谷底检测
+# =============================================================================
+
+def find_valley_indices(
+    ts_grid: np.ndarray,
+    ys_grid: np.ndarray,
+    expected_period: int,
+) -> List[int]:
+    n = len(ys_grid)
+
+    if n < max(10, expected_period * 2):
+        return []
+
+    period = max(3, int(expected_period))
+    smooth_window = max(3, int(round(period * 0.08)))
+    smooth_window = min(smooth_window, 21)
+
+    ys_smooth = moving_average(ys_grid, smooth_window)
+    threshold = float(np.percentile(ys_smooth, VALLEY_QUANTILE))
+
+    candidates = []
+
+    for i in range(1, n - 1):
+        if (
+            ys_smooth[i] <= ys_smooth[i - 1]
+            and ys_smooth[i] < ys_smooth[i + 1]
+            and ys_smooth[i] <= threshold
+        ):
+            candidates.append(i)
+
+    if len(candidates) < MIN_FULL_CYCLES_FOR_TEMPLATE:
+        candidates = []
+
+        for i in range(1, n - 1):
+            if ys_smooth[i] <= ys_smooth[i - 1] and ys_smooth[i] < ys_smooth[i + 1]:
+                candidates.append(i)
+
+    if not candidates:
+        return []
+
+    min_distance = max(2, int(round(period * 0.55)))
+    selected = []
+
+    for idx in candidates:
+        if not selected:
+            selected.append(idx)
+            continue
+
+        if idx - selected[-1] >= min_distance:
+            selected.append(idx)
+            continue
+
+        if ys_smooth[idx] < ys_smooth[selected[-1]]:
+            selected[-1] = idx
+
+    if len(selected) < 2:
+        return selected
+
+    cleaned = [selected[0]]
+
+    for idx in selected[1:]:
+        diff = int(ts_grid[idx] - ts_grid[cleaned[-1]])
+
+        if int(period * 0.55) <= diff <= int(period * 1.60):
+            cleaned.append(idx)
+            continue
+
+        if diff < int(period * 0.55):
+            if ys_smooth[idx] < ys_smooth[cleaned[-1]]:
+                cleaned[-1] = idx
+            continue
+
+        cleaned.append(idx)
+
+    return cleaned
+
+
+def detect_period_and_valleys(
+    ts_grid: np.ndarray,
+    ys_grid: np.ndarray,
+) -> Tuple[int, List[int]]:
+    rough = estimate_period_rough(ys_grid)
+    valleys = find_valley_indices(ts_grid, ys_grid, rough)
+
+    if len(valleys) >= 3:
+        diffs = np.diff(ts_grid[valleys])
+        good = diffs[(diffs >= rough * 0.55) & (diffs <= rough * 1.60)]
+
+        if len(good) > 0:
+            period = int(round(float(np.median(good))))
+        else:
+            period = rough
+    else:
+        period = rough
+
+    period = max(int(MIN_PERIOD_SECONDS), min(int(MAX_PERIOD_SECONDS), period))
+
+    return int(period), valleys
+
+
+# =============================================================================
+# 模板构建
+# =============================================================================
+
+def build_templates_from_valleys(
+    ts_grid: np.ndarray,
+    ys_grid: np.ndarray,
+    period: int,
+    valleys: List[int],
+    target: Dict,
+) -> Optional[Tuple[np.ndarray, np.ndarray, np.ndarray]]:
+    if period <= 1 or len(valleys) < MIN_FULL_CYCLES_FOR_TEMPLATE + 1:
+        return None
+
+    strategy = target.get("strategy", "phase_point")
+    low_q = float(target.get("band_low_q", 10))
+    high_q = float(target.get("band_high_q", 90))
+
+    pairs = []
+
+    for a, b in zip(valleys[:-1], valleys[1:]):
+        cycle_len = float(ts_grid[b] - ts_grid[a])
+
+        if period * 0.55 <= cycle_len <= period * 1.60:
+            pairs.append((a, b, cycle_len))
+
+    if len(pairs) < MIN_FULL_CYCLES_FOR_TEMPLATE:
+        return None
+
+    pairs = pairs[-MAX_CYCLES_FOR_TEMPLATE:]
+
+    phase_grid = np.arange(period, dtype=float)
+    segments = []
+    weights = []
+
+    for idx, (a, b, cycle_len) in enumerate(pairs):
+        seg_ts = ts_grid[a:b + 1]
+        seg_y = ys_grid[a:b + 1]
+
+        if len(seg_y) < 3:
+            continue
+
+        x_old = (seg_ts - seg_ts[0]) / cycle_len * period
+        seg = np.interp(phase_grid, x_old, seg_y)
+
+        segments.append(seg.astype(float))
+        weights.append(0.5 + 0.5 * ((idx + 1) / len(pairs)))
+
+    if len(segments) < MIN_FULL_CYCLES_FOR_TEMPLATE:
+        return None
+
+    arr = np.vstack(segments)
+    w_arr = np.array(weights, dtype=float)
+
+    if strategy == "phase_band":
+        mid_template = np.percentile(arr, 50, axis=0)
+        lower_template = np.percentile(arr, low_q, axis=0)
+        upper_template = np.percentile(arr, high_q, axis=0)
+    else:
+        mid_template = np.average(arr, axis=0, weights=w_arr)
+        lower_template = mid_template.copy()
+        upper_template = mid_template.copy()
+
+    return (
+        mid_template.astype(float),
+        lower_template.astype(float),
+        upper_template.astype(float),
+    )
+
+
+def build_current_baseline(
+    ts_grid: np.ndarray,
+    ys_grid: np.ndarray,
+    target: Dict,
+    tail_seconds: Optional[int] = None,
+) -> Optional[Tuple[int, int, np.ndarray, np.ndarray, np.ndarray]]:
+    if len(ys_grid) < MIN_POINTS:
+        return None
+
+    if tail_seconds is not None and tail_seconds > 0:
+        cutoff = ts_grid[-1] - int(tail_seconds)
+        mask = ts_grid >= cutoff
+        ts_use = ts_grid[mask]
+        ys_use = ys_grid[mask]
+    else:
+        ts_use = ts_grid
+        ys_use = ys_grid
+
+    if len(ys_use) < MIN_POINTS:
+        return None
+
+    period, valleys = detect_period_and_valleys(ts_use, ys_use)
+
+    templates = build_templates_from_valleys(
+        ts_grid=ts_use,
+        ys_grid=ys_use,
+        period=period,
+        valleys=valleys,
+        target=target,
+    )
+
+    if templates is None or len(valleys) == 0:
+        return None
+
+    template, lower_template, upper_template = templates
+    phase_origin_ts = int(round(float(ts_use[valleys[-1]])))
+
+    return int(period), phase_origin_ts, template, lower_template, upper_template
+
+
+# =============================================================================
+# 模板预测
+# =============================================================================
+
+def circular_template_value(template: np.ndarray, phase: float) -> float:
+    period = len(template)
+
+    if period == 0:
+        return 0.0
+
+    phase = float(phase) % period
+    i0 = int(math.floor(phase)) % period
+    i1 = (i0 + 1) % period
+    frac = phase - math.floor(phase)
+
+    return float((1.0 - frac) * template[i0] + frac * template[i1])
+
+
+def resample_template(old_template: np.ndarray, new_period: int) -> np.ndarray:
+    old_period = len(old_template)
+
+    if old_period == new_period:
+        return old_template.astype(float)
+
+    if old_period <= 1 or new_period <= 1:
+        return np.full(new_period, float(np.mean(old_template)), dtype=float)
+
+    old_x = np.linspace(0.0, 1.0, old_period, endpoint=False)
+    new_x = np.linspace(0.0, 1.0, new_period, endpoint=False)
+
+    old_x_ext = np.concatenate([old_x - 1.0, old_x, old_x + 1.0])
+    old_y_ext = np.concatenate([old_template, old_template, old_template])
+
+    return np.interp(new_x, old_x_ext, old_y_ext).astype(float)
+
+
+def predict_template_values(
+    template: np.ndarray,
+    period: int,
+    phase_origin_ts: int,
+    ts_list: List[int],
+) -> np.ndarray:
+    if period <= 1:
+        return np.zeros(len(ts_list), dtype=float)
+
+    if len(template) != period:
+        template = resample_template(template, period)
+
+    values = []
+
+    for ts in ts_list:
+        phase = (int(ts) - int(phase_origin_ts)) % period
+        values.append(circular_template_value(template, phase))
+
+    return np.array(values, dtype=float)
+
+
+def predict_state_bundle(
+    state: BaselineState,
+    ts_list: List[int],
+) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
+    period = int(state.period)
+    origin = int(state.phase_origin_ts)
+
+    mid = predict_template_values(
+        template=np.array(state.template, dtype=float),
+        period=period,
+        phase_origin_ts=origin,
+        ts_list=ts_list,
+    )
+
+    lower = predict_template_values(
+        template=np.array(state.lower_template, dtype=float),
+        period=period,
+        phase_origin_ts=origin,
+        ts_list=ts_list,
+    )
+
+    upper = predict_template_values(
+        template=np.array(state.upper_template, dtype=float),
+        period=period,
+        phase_origin_ts=origin,
+        ts_list=ts_list,
+    )
+
+    return mid, lower, upper
+
+
+def normalize_origin_near(origin: int, period: int, near_ts: int) -> int:
+    if period <= 1:
+        return origin
+
+    origin = int(origin)
+    period = int(period)
+    near_ts = int(near_ts)
+
+    while origin + period <= near_ts:
+        origin += period
+
+    while origin > near_ts:
+        origin -= period
+
+    return origin
+
+
+def merge_template(
+    old_template: np.ndarray,
+    new_template: np.ndarray,
+    alpha: float,
+) -> np.ndarray:
+    alpha = float(np.clip(alpha, 0.0, 1.0))
+
+    if len(old_template) != len(new_template):
+        old_template = resample_template(old_template, len(new_template))
+
+    merged = (1.0 - alpha) * old_template + alpha * new_template
+
+    return merged.astype(float)
+
+
+# =============================================================================
+# Phase Lock
+# =============================================================================
+
+def phase_lock_recent(
+    state: BaselineState,
+    ts_grid: np.ndarray,
+    ys_model: np.ndarray,
+) -> Tuple[int, int, np.ndarray, float]:
+    base_period = int(state.period)
+    base_origin = int(state.phase_origin_ts)
+    base_template = np.array(state.template, dtype=float)
+
+    if base_period <= 1 or len(base_template) <= 1:
+        ts_recent = ts_grid[-DETECT_WINDOW_SECONDS:].astype(int).tolist()
+        pred = predict_template_values(base_template, base_period, base_origin, ts_recent)
+        actual = ys_model[-len(ts_recent):].astype(float)
+        mae = float(np.mean(np.abs(actual - pred))) if len(actual) else 0.0
+        return base_period, base_origin, pred, mae
+
+    window_seconds = max(
+        PHASE_LOCK_MIN_WINDOW_SECONDS,
+        min(PHASE_LOCK_MAX_WINDOW_SECONDS, int(base_period * 2)),
+    )
+
+    cutoff = ts_grid[-1] - window_seconds
+    mask = ts_grid >= cutoff
+
+    ts_recent_arr = ts_grid[mask].astype(int)
+    actual = ys_model[mask].astype(float)
+
+    if len(ts_recent_arr) < max(10, DETECT_WINDOW_SECONDS):
+        ts_recent_arr = ts_grid[-DETECT_WINDOW_SECONDS:].astype(int)
+        actual = ys_model[-DETECT_WINDOW_SECONDS:].astype(float)
+
+    ts_recent = ts_recent_arr.tolist()
+    last_ts = int(ts_recent[-1])
+
+    p_min = max(
+        int(MIN_PERIOD_SECONDS),
+        int(round(base_period * (1.0 - PHASE_LOCK_PERIOD_SEARCH_RATIO))),
+    )
+    p_max = min(
+        int(MAX_PERIOD_SECONDS),
+        int(round(base_period * (1.0 + PHASE_LOCK_PERIOD_SEARCH_RATIO))),
+    )
+
+    best_period = base_period
+    best_origin = normalize_origin_near(base_origin, base_period, last_ts)
+    best_template = resample_template(base_template, best_period)
+
+    best_pred = predict_template_values(
+        template=best_template,
+        period=best_period,
+        phase_origin_ts=best_origin,
+        ts_list=ts_recent,
+    )
+
+    best_mae = float(np.mean(np.abs(actual - best_pred)))
+
+    for period in range(p_min, p_max + 1, PHASE_LOCK_PERIOD_STEP):
+        template = resample_template(base_template, period)
+        center_origin = normalize_origin_near(base_origin, period, last_ts)
+        origin_shift = max(2, int(round(period * PHASE_LOCK_ORIGIN_SEARCH_RATIO)))
+
+        for shift in range(-origin_shift, origin_shift + 1, PHASE_LOCK_ORIGIN_STEP):
+            origin = center_origin + shift
+
+            pred = predict_template_values(
+                template=template,
+                period=period,
+                phase_origin_ts=origin,
+                ts_list=ts_recent,
+            )
+
+            mae = float(np.mean(np.abs(actual - pred)))
+            penalty = abs(period - base_period) * 0.5
+            score = mae + penalty
+
+            best_score = best_mae + abs(best_period - base_period) * 0.5
+
+            if score < best_score:
+                best_period = period
+                best_origin = origin
+                best_pred = pred
+                best_mae = mae
+
+    best_origin = normalize_origin_near(best_origin, best_period, last_ts)
+
+    return int(best_period), int(best_origin), best_pred, float(best_mae)
+
+
+# =============================================================================
+# 异常检测
+# =============================================================================
+
+def calc_point_bounds(
+    pred: np.ndarray,
+    abs_threshold: float,
+    rel_threshold: float,
+) -> Tuple[np.ndarray, np.ndarray]:
+    threshold = np.maximum(abs_threshold, np.abs(pred) * rel_threshold)
+    return pred - threshold, pred + threshold
+
+
+def calc_final_bounds(
+    state: BaselineState,
+    pred: np.ndarray,
+    lower_raw: np.ndarray,
+    upper_raw: np.ndarray,
+    target: Dict,
+) -> Tuple[np.ndarray, np.ndarray]:
+    strategy = target.get("strategy", "phase_point")
+    abs_threshold = float(target.get("abs_threshold", 1.0))
+    rel_threshold = float(target.get("rel_threshold", 0.25))
+
+    if strategy == "phase_band":
+        pad_abs = float(target.get("band_pad_abs", abs_threshold))
+        dynamic_pad = np.maximum(pad_abs, np.abs(pred) * rel_threshold * 0.20)
+        lower = lower_raw - dynamic_pad
+        upper = upper_raw + dynamic_pad
+        return lower, upper
+
+    return calc_point_bounds(pred, abs_threshold, rel_threshold)
+
+
+def detect_anomaly(
+    state: BaselineState,
+    ts_grid: np.ndarray,
+    ys_model: np.ndarray,
+    target: Dict,
+) -> Tuple[bool, float, float, float, int, int]:
+    best_period, best_origin, pred_recent, _ = phase_lock_recent(
+        state=state,
+        ts_grid=ts_grid,
+        ys_model=ys_model,
+    )
+
+    recent_len = len(pred_recent)
+
+    if recent_len <= 0:
+        return False, 0.0, 0.0, 0.0, best_period, best_origin
+
+    actual = ys_model[-recent_len:].astype(float)
+
+    tmp_state = BaselineState(
+        period=best_period,
+        phase_origin_ts=best_origin,
+        template=state.template,
+        lower_template=state.lower_template,
+        upper_template=state.upper_template,
+        strategy=state.strategy,
+        status=state.status,
+        clean_seconds=state.clean_seconds,
+        last_update_ts=state.last_update_ts,
+        last_seen_ts=state.last_seen_ts,
+        y_min=state.y_min,
+        y_max=state.y_max,
+    )
+
+    recent_ts = ts_grid[-recent_len:].astype(int).tolist()
+    pred, lower_raw, upper_raw = predict_state_bundle(tmp_state, recent_ts)
+
+    lower, upper = calc_final_bounds(
+        state=tmp_state,
+        pred=pred,
+        lower_raw=lower_raw,
+        upper_raw=upper_raw,
+        target=target,
+    )
+
+    outside = (actual < lower) | (actual > upper)
+    abs_err = np.abs(actual - pred)
+
+    outside_ratio = float(np.mean(outside))
+    mean_abs_err = float(np.mean(abs_err))
+    mean_rel_err = float(np.mean(abs_err / np.maximum(np.abs(pred), 1e-6)))
+
+    is_anomaly = outside_ratio >= OUTSIDE_RATIO_THRESHOLD
+
+    return (
+        is_anomaly,
+        outside_ratio,
+        mean_abs_err,
+        mean_rel_err,
+        int(best_period),
+        int(best_origin),
+    )
+
+
+# =============================================================================
+# 状态管理
+# =============================================================================
+
+def create_initial_state(
+    ts_grid: np.ndarray,
+    ys_model: np.ndarray,
+    target: Dict,
+    now_sec: int,
+) -> Optional[BaselineState]:
+    baseline = build_current_baseline(
+        ts_grid=ts_grid,
+        ys_grid=ys_model,
+        target=target,
+    )
+
+    if baseline is None:
+        return None
+
+    period, phase_origin_ts, template, lower_template, upper_template = baseline
+
+    return BaselineState(
+        period=int(period),
+        phase_origin_ts=int(phase_origin_ts),
+        template=template.astype(float).tolist(),
+        lower_template=lower_template.astype(float).tolist(),
+        upper_template=upper_template.astype(float).tolist(),
+        strategy=str(target.get("strategy", "phase_point")),
+        status=BASELINE_STATUS_HEALTHY,
+        clean_seconds=int(period * MAX_CYCLES_FOR_TEMPLATE),
+        last_update_ts=now_sec,
+        last_seen_ts=now_sec,
+        y_min=float(np.min(ys_model)),
+        y_max=float(np.max(ys_model)),
+    )
+
+
+def apply_phase_lock_to_state(
+    state: BaselineState,
+    best_period: int,
+    best_origin: int,
+) -> None:
+    best_period = int(best_period)
+
+    if best_period <= 1:
+        return
+
+    if len(state.template) != best_period:
+        state.template = resample_template(
+            np.array(state.template, dtype=float),
+            best_period,
+        ).astype(float).tolist()
+
+    if len(state.lower_template) != best_period:
+        state.lower_template = resample_template(
+            np.array(state.lower_template, dtype=float),
+            best_period,
+        ).astype(float).tolist()
+
+    if len(state.upper_template) != best_period:
+        state.upper_template = resample_template(
+            np.array(state.upper_template, dtype=float),
+            best_period,
+        ).astype(float).tolist()
+
+    state.period = best_period
+    state.phase_origin_ts = int(best_origin)
+
+
+def maybe_update_state(
+    key: str,
+    ts_grid: np.ndarray,
+    ys_model: np.ndarray,
+    target: Dict,
+) -> Tuple[Optional[BaselineState], bool, float, float, float]:
+    now_sec = int(time.time())
+    state = BASELINE_STATES.get(key)
+
+    if state is None:
+        state = create_initial_state(
+            ts_grid=ts_grid,
+            ys_model=ys_model,
+            target=target,
+            now_sec=now_sec,
+        )
+
+        if state is None:
+            return None, False, 0.0, 0.0, 0.0
+
+        BASELINE_STATES[key] = state
+
+        logger.info(
+            "初始化健康模板 key=%s strategy=%s period=%ss origin=%s clean=%ss",
+            key,
+            state.strategy,
+            state.period,
+            datetime.fromtimestamp(state.phase_origin_ts).strftime("%H:%M:%S"),
+            state.clean_seconds,
+        )
+
+        return state, False, 0.0, 0.0, 0.0
+
+    elapsed = max(1, now_sec - int(state.last_seen_ts))
+    elapsed = min(elapsed, POLL_INTERVAL * 2)
+    state.last_seen_ts = now_sec
+
+    (
+        is_anomaly,
+        outside_ratio,
+        mean_abs_err,
+        mean_rel_err,
+        best_period,
+        best_origin,
+    ) = detect_anomaly(
+        state=state,
+        ts_grid=ts_grid,
+        ys_model=ys_model,
+        target=target,
+    )
+
+    if is_anomaly:
+        state.status = BASELINE_STATUS_ANOMALY
+        state.clean_seconds = 0
+        BASELINE_STATES[key] = state
+
+        logger.warning(
+            "检测到异常，冻结模板 key=%s outside_ratio=%.2f mean_abs_err=%.4f mean_rel_err=%.4f",
+            key,
+            outside_ratio,
+            mean_abs_err,
+            mean_rel_err,
+        )
+
+        return state, True, outside_ratio, mean_abs_err, mean_rel_err
+
+    old_period = int(state.period)
+    old_origin = int(state.phase_origin_ts)
+
+    apply_phase_lock_to_state(state, best_period, best_origin)
+
+    if old_period != state.period or old_origin != state.phase_origin_ts:
+        logger.info(
+            "phase-lock key=%s period %s -> %s origin %s -> %s",
+            key,
+            old_period,
+            state.period,
+            datetime.fromtimestamp(old_origin).strftime("%H:%M:%S"),
+            datetime.fromtimestamp(state.phase_origin_ts).strftime("%H:%M:%S"),
+        )
+
+    if state.status == BASELINE_STATUS_ANOMALY:
+        state.status = BASELINE_STATUS_RECOVERING
+        state.clean_seconds = elapsed
+        BASELINE_STATES[key] = state
+
+        logger.info(
+            "异常开始恢复 key=%s clean_seconds=%ss",
+            key,
+            state.clean_seconds,
+        )
+
+        return state, False, outside_ratio, mean_abs_err, mean_rel_err
+
+    if state.status == BASELINE_STATUS_RECOVERING:
+        state.clean_seconds += elapsed
+    else:
+        state.status = BASELINE_STATUS_HEALTHY
+        state.clean_seconds += elapsed
+
+    min_clean_for_update = max(
+        RECOVERY_MIN_SECONDS,
+        int(state.period) * MIN_FULL_CYCLES_FOR_TEMPLATE,
+    )
+
+    if state.clean_seconds < min_clean_for_update:
+        BASELINE_STATES[key] = state
+        return state, False, outside_ratio, mean_abs_err, mean_rel_err
+
+    tail_seconds = min(
+        int(state.clean_seconds),
+        int(state.period) * MAX_CYCLES_FOR_TEMPLATE,
+    )
+
+    baseline = build_current_baseline(
+        ts_grid=ts_grid,
+        ys_grid=ys_model,
+        target=target,
+        tail_seconds=tail_seconds,
+    )
+
+    if baseline is None:
+        BASELINE_STATES[key] = state
+        return state, False, outside_ratio, mean_abs_err, mean_rel_err
+
+    new_period, new_origin, new_template, new_lower_template, new_upper_template = baseline
+
+    alpha = RECOVERY_EMA_ALPHA if state.status == BASELINE_STATUS_RECOVERING else HEALTHY_EMA_ALPHA
+
+    state.template = merge_template(
+        np.array(state.template, dtype=float),
+        new_template,
+        alpha,
+    ).astype(float).tolist()
+
+    state.lower_template = merge_template(
+        np.array(state.lower_template, dtype=float),
+        new_lower_template,
+        alpha,
+    ).astype(float).tolist()
+
+    state.upper_template = merge_template(
+        np.array(state.upper_template, dtype=float),
+        new_upper_template,
+        alpha,
+    ).astype(float).tolist()
+
+    state.period = int(new_period)
+    state.phase_origin_ts = int(new_origin)
+    state.status = BASELINE_STATUS_HEALTHY
+    state.last_update_ts = now_sec
+
+    if tail_seconds > 0 and len(ys_model) >= tail_seconds:
+        state.y_min = float(np.min(ys_model[-tail_seconds:]))
+        state.y_max = float(np.max(ys_model[-tail_seconds:]))
+    else:
+        state.y_min = float(np.min(ys_model))
+        state.y_max = float(np.max(ys_model))
+
+    BASELINE_STATES[key] = state
+
+    logger.info(
+        "更新健康模板 key=%s strategy=%s period=%ss origin=%s clean=%ss alpha=%.2f",
+        key,
+        state.strategy,
+        state.period,
+        datetime.fromtimestamp(state.phase_origin_ts).strftime("%H:%M:%S"),
+        state.clean_seconds,
+        alpha,
+    )
+
+    return state, False, outside_ratio, mean_abs_err, mean_rel_err
+
+
+# =============================================================================
+# Prometheus 写入
+# =============================================================================
+
+def prom_escape_label_value(value: str) -> str:
+    return (
+        str(value)
+        .replace("\\", "\\\\")
+        .replace("\n", "\\n")
+        .replace('"', '\\"')
+    )
+
+
+def labels_to_str(labels: Dict[str, str]) -> str:
+    if not labels:
+        return ""
+
+    parts = []
+
+    for k in sorted(labels.keys()):
+        parts.append(f'{k}="{prom_escape_label_value(labels[k])}"')
+
+    return "{" + ",".join(parts) + "}"
+
+
+def write_series(
+    metric_name: str,
+    labels: Dict[str, str],
+    ts_list: List[int],
+    values: List[float],
+) -> bool:
+    if not ts_list or not values or len(ts_list) != len(values):
+        return False
+
+    label_str = labels_to_str(labels)
+    lines = []
+
+    for t, y in zip(ts_list, values):
+        try:
+            ts_sec = int(round(float(t)))
+            val = float(y)
+        except Exception:
+            continue
+
+        if not math.isfinite(ts_sec) or not math.isfinite(val):
+            continue
+
+        lines.append(f"{metric_name}{label_str} {val:.6f} {ts_sec * 1000}")
+
+    if not lines:
+        return False
+
+    payload = "\n".join(lines) + "\n"
+
+    try:
+        resp = requests.post(
+            f"{VM_URL}/api/v1/import/prometheus",
+            data=payload.encode("utf-8"),
+            headers={"Content-Type": "text/plain; version=0.0.4; charset=utf-8"},
+            timeout=10,
+        )
+        resp.raise_for_status()
+        return True
+
+    except requests.RequestException as e:
+        logger.error("写入数据失败 metric=%s: %s", metric_name, e)
+        return False
+
+
+def write_prediction_bundle(
+    pred_metric: str,
+    anomaly_metric: str,
+    labels: Dict[str, str],
+    ts_future: List[int],
+    pred_values: np.ndarray,
+    lower_values: np.ndarray,
+    upper_values: np.ndarray,
+    is_anomaly: bool,
+    outside_ratio: float,
+    mean_abs_err: float,
+    mean_rel_err: float,
+    event_ts: int,
+) -> bool:
+    ok1 = write_series(
+        metric_name=pred_metric,
+        labels=labels,
+        ts_list=ts_future,
+        values=pred_values.astype(float).tolist(),
+    )
+
+    ok2 = write_series(
+        metric_name=f"{pred_metric}_lower",
+        labels=labels,
+        ts_list=ts_future,
+        values=lower_values.astype(float).tolist(),
+    )
+
+    ok3 = write_series(
+        metric_name=f"{pred_metric}_upper",
+        labels=labels,
+        ts_list=ts_future,
+        values=upper_values.astype(float).tolist(),
+    )
+
+    anomaly_labels = dict(labels)
+    anomaly_labels["type"] = "prediction_deviation"
+
+    ok4 = write_series(
+        metric_name=anomaly_metric,
+        labels=anomaly_labels,
+        ts_list=[event_ts],
+        values=[1.0 if is_anomaly else 0.0],
+    )
+
+    ok5 = write_series(
+        metric_name=f"{anomaly_metric}_outside_ratio",
+        labels=anomaly_labels,
+        ts_list=[event_ts],
+        values=[outside_ratio],
+    )
+
+    ok6 = write_series(
+        metric_name=f"{anomaly_metric}_mean_abs_error",
+        labels=anomaly_labels,
+        ts_list=[event_ts],
+        values=[mean_abs_err],
+    )
+
+    ok7 = write_series(
+        metric_name=f"{anomaly_metric}_mean_rel_error",
+        labels=anomaly_labels,
+        ts_list=[event_ts],
+        values=[mean_rel_err],
+    )
+
+    return ok1 and ok2 and ok3 and ok4 and ok5 and ok6 and ok7
+
+
+# =============================================================================
+# 标签解析
+# =============================================================================
+
+_LABEL_PATTERN = re.compile(
+    r'\s*([a-zA-Z_][a-zA-Z0-9_]*)\s*=\s*"((?:\\.|[^"])*)"\s*'
+)
+
+
+def parse_labels_from_query(query: str) -> Dict[str, str]:
+    labels = {}
+
+    if "{" not in query or "}" not in query:
+        return labels
+
+    try:
+        label_part = query[query.index("{") + 1:query.rindex("}")]
+    except Exception:
+        return labels
+
+    for match in _LABEL_PATTERN.finditer(label_part):
+        key = match.group(1)
+        value = match.group(2)
+
+        value = (
+            value
+            .replace('\\"', '"')
+            .replace("\\n", "\n")
+            .replace("\\\\", "\\")
+        )
+
+        labels[key] = value
+
+    return labels
+
+
+def merge_labels(*dicts: Dict[str, str]) -> Dict[str, str]:
+    result = {}
+
+    for d in dicts:
+        if d:
+            result.update(d)
+
+    return result
+
+
+def series_key(metric_name: str, labels: Dict[str, str]) -> str:
+    return metric_name + labels_to_str(labels)
+
+
+# =============================================================================
+# 状态持久化
+# =============================================================================
+
+def load_state() -> None:
+    global BASELINE_STATES
+
+    if not os.path.exists(STATE_FILE):
+        return
+
+    try:
+        with open(STATE_FILE, "r", encoding="utf-8") as f:
+            raw = json.load(f)
+
+        states = {}
+
+        for key, value in raw.get("baseline_states", {}).items():
+            required_fields = {
+                "period",
+                "phase_origin_ts",
+                "template",
+                "lower_template",
+                "upper_template",
+                "strategy",
+                "status",
+                "clean_seconds",
+                "last_update_ts",
+                "last_seen_ts",
+                "y_min",
+                "y_max",
+            }
+
+            if not required_fields.issubset(set(value.keys())):
+                continue
+
+            states[key] = BaselineState(**value)
+
+        BASELINE_STATES = states
+
+        logger.info(
+            "已加载预测状态文件 %s，状态数量=%d",
+            STATE_FILE,
+            len(BASELINE_STATES),
+        )
+
+    except Exception as e:
+        logger.warning("加载预测状态文件失败，将重新学习: %s", e)
+
+
+def save_state() -> None:
+    try:
+        raw = {
+            "baseline_states": {
+                key: asdict(value)
+                for key, value in BASELINE_STATES.items()
+            }
+        }
+
+        tmp_file = STATE_FILE + ".tmp"
+
+        with open(tmp_file, "w", encoding="utf-8") as f:
+            json.dump(raw, f, ensure_ascii=False, indent=2)
+
+        os.replace(tmp_file, STATE_FILE)
+
+    except Exception as e:
+        logger.warning("保存预测状态文件失败: %s", e)
+
+
+# =============================================================================
+# 时间轴
+# =============================================================================
+
+def build_prediction_timestamps(
+    key: str,
+    last_real_ts: int,
+    now_sec: int,
+) -> Optional[List[int]]:
+    data_lag = now_sec - last_real_ts
+
+    if data_lag > MAX_DATA_LAG_SECONDS:
+        logger.warning(
+            "真实数据延迟过大，跳过预测 key=%s data_lag=%ss max=%ss",
+            key,
+            data_lag,
+            MAX_DATA_LAG_SECONDS,
+        )
+        return None
+
+    last_written_real_ts = LAST_REAL_TS_WRITTEN.get(key)
+
+    if last_written_real_ts is not None and last_real_ts <= int(last_written_real_ts):
+        logger.info(
+            "真实数据时间戳未推进，跳过重复写入 key=%s last_real_ts=%s last_written_real_ts=%s",
+            key,
+            last_real_ts,
+            last_written_real_ts,
+        )
+        return None
+
+    base_ts = last_real_ts
+
+    return [
+        base_ts + i + 1
+        for i in range(WRITE_HORIZON_SECONDS)
+    ]
+
+
+# =============================================================================
+# 主流程
+# =============================================================================
+
+def run_once() -> None:
+    now_str = datetime.now().strftime("%H:%M:%S")
+
+    for target in PREDICT_TARGETS:
+        query = target["query"]
+        pred_metric = target["pred_metric"]
+        anomaly_metric = target["anomaly_metric"]
+
+        ts, ys = fetch_history(query)
+
+        if len(ys) < MIN_POINTS:
+            logger.info("[%s] %s 数据不足（%d 点），跳过", now_str, query, len(ys))
+            continue
+
+        ts_grid, ys_grid_raw = normalize_history(ts, ys)
+
+        if len(ys_grid_raw) < MIN_POINTS:
+            logger.info("[%s] %s 清洗后数据不足（%d 点），跳过", now_str, query, len(ys_grid_raw))
+            continue
+
+        ys_grid_model = preprocess_values(ys_grid_raw, target)
+
+        base_labels = parse_labels_from_query(query)
+        write_labels = merge_labels(base_labels, EXTRA_PREDICT_LABELS)
+
+        key = series_key(pred_metric, write_labels)
+
+        state, is_anomaly, outside_ratio, mean_abs_err, mean_rel_err = maybe_update_state(
+            key=key,
+            ts_grid=ts_grid,
+            ys_model=ys_grid_model,
+            target=target,
+        )
+
+        if state is None:
+            logger.info("[%s] %s 暂无可用健康模板，等待学习", now_str, query)
+            continue
+
+        now_sec = int(time.time())
+        last_real_ts = int(ts_grid[-1])
+        data_lag = now_sec - last_real_ts
+
+        ts_future = build_prediction_timestamps(
+            key=key,
+            last_real_ts=last_real_ts,
+            now_sec=now_sec,
+        )
+
+        if not ts_future:
+            continue
+
+        pred_values, lower_raw, upper_raw = predict_state_bundle(state, ts_future)
+
+        lower_values, upper_values = calc_final_bounds(
+            state=state,
+            pred=pred_values,
+            lower_raw=lower_raw,
+            upper_raw=upper_raw,
+            target=target,
+        )
+
+        ok = write_prediction_bundle(
+            pred_metric=pred_metric,
+            anomaly_metric=anomaly_metric,
+            labels=write_labels,
+            ts_future=ts_future,
+            pred_values=pred_values,
+            lower_values=lower_values,
+            upper_values=upper_values,
+            is_anomaly=is_anomaly,
+            outside_ratio=outside_ratio,
+            mean_abs_err=mean_abs_err,
+            mean_rel_err=mean_rel_err,
+            event_ts=last_real_ts,
+        )
+
+        if not ok:
+            logger.error("[%s] %s 写入预测数据失败", now_str, query)
+            continue
+
+        LAST_REAL_TS_WRITTEN[key] = last_real_ts
+
+        future_start = datetime.fromtimestamp(ts_future[0]).strftime("%H:%M:%S")
+        future_end = datetime.fromtimestamp(ts_future[-1]).strftime("%H:%M:%S")
+        last_real_str = datetime.fromtimestamp(last_real_ts).strftime("%H:%M:%S")
+        origin_str = datetime.fromtimestamp(state.phase_origin_ts).strftime("%H:%M:%S")
+
+        logger.info(
+            "[%s] %-40s → %-35s strategy=%s status=%s anomaly=%s period=%ss origin=%s last_real=%s lag=%ss 写入 %d 点，预测区间 %s ~ %s",
+            now_str,
+            query,
+            pred_metric,
+            state.strategy,
+            state.status,
+            is_anomaly,
+            state.period,
+            origin_str,
+            last_real_str,
+            data_lag,
+            len(ts_future),
+            future_start,
+            future_end,
+        )
+
+    save_state()
+
+
+def main() -> None:
+    load_state()
+
+    logger.info(
+        "预测服务启动 VM=%s 历史窗口=%dmin 理论预测窗口=%ds 实际写入窗口=%ds 轮询间隔=%ds state=%s forecast=%s",
+        VM_URL,
+        HISTORY_MINUTES,
+        HORIZON_SECONDS,
+        WRITE_HORIZON_SECONDS,
+        POLL_INTERVAL,
+        STATE_FILE,
+        EXTRA_PREDICT_LABELS["forecast"],
+    )
+
+    while True:
+        run_once()
+        time.sleep(POLL_INTERVAL)
+
+
+if __name__ == "__main__":
+    main()
\ No newline at end of file

From 3609fbae4e2fbb33700de79ee1f7730ac81e366c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E5=BC=A0=E5=B0=91=E6=99=BA?=
 <zhangshaozhi@bailongma-inc.com>
Date: Thu, 21 May 2026 21:39:55 +0800
Subject: [PATCH 27/55] fix

---
 ai/pridict_v4.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/ai/pridict_v4.py b/ai/pridict_v4.py
index 774ad3a..c55f21a 100644
--- a/ai/pridict_v4.py
+++ b/ai/pridict_v4.py
@@ -120,11 +120,11 @@
         "anomaly_metric": "vibration_x_anomaly",
         "strategy": "phase_band",
         "abs_threshold": 0.12,
-        "rel_threshold": 0.35,
+        "rel_threshold": 0.40,
         "smooth_window": 5,
         "band_low_q": 10,
         "band_high_q": 90,
-        "band_pad_abs": 0.06,
+        "band_pad_abs": 0.08,
     },
     {
         "query": 'vibration_y{device_id="fanuc-cnc"}',
@@ -132,11 +132,11 @@
         "anomaly_metric": "vibration_y_anomaly",
         "strategy": "phase_band",
         "abs_threshold": 0.12,
-        "rel_threshold": 0.35,
+        "rel_threshold": 0.40,
         "smooth_window": 5,
         "band_low_q": 10,
         "band_high_q": 90,
-        "band_pad_abs": 0.06,
+        "band_pad_abs": 0.08,
     },
     {
         "query": 'vibration_z{device_id="fanuc-cnc"}',
@@ -144,11 +144,11 @@
         "anomaly_metric": "vibration_z_anomaly",
         "strategy": "phase_band",
         "abs_threshold": 0.12,
-        "rel_threshold": 0.35,
+        "rel_threshold": 0.40,
         "smooth_window": 5,
         "band_low_q": 10,
         "band_high_q": 90,
-        "band_pad_abs": 0.06,
+        "band_pad_abs": 0.08,
     },
 ]
 

From 1c4217b31d9304cddeb6be16949cc54eaa8d31ec Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E5=BC=A0=E5=B0=91=E6=99=BA?=
 <zhangshaozhi@bailongma-inc.com>
Date: Thu, 21 May 2026 21:48:41 +0800
Subject: [PATCH 28/55] fix

---
 ai/pridict_v4.py | 32 ++++++++++++++++----------------
 1 file changed, 16 insertions(+), 16 deletions(-)

diff --git a/ai/pridict_v4.py b/ai/pridict_v4.py
index c55f21a..8657944 100644
--- a/ai/pridict_v4.py
+++ b/ai/pridict_v4.py
@@ -119,37 +119,37 @@
         "pred_metric": "vibration_x_predicted",
         "anomaly_metric": "vibration_x_anomaly",
         "strategy": "phase_band",
-        "abs_threshold": 0.12,
-        "rel_threshold": 0.40,
+        "abs_threshold": 0.18,
+        "rel_threshold": 0.50,
         "smooth_window": 5,
-        "band_low_q": 10,
-        "band_high_q": 90,
-        "band_pad_abs": 0.08,
+        "band_low_q": 2,
+        "band_high_q": 98,
+        "band_pad_abs": 0.12,
     },
     {
         "query": 'vibration_y{device_id="fanuc-cnc"}',
         "pred_metric": "vibration_y_predicted",
         "anomaly_metric": "vibration_y_anomaly",
         "strategy": "phase_band",
-        "abs_threshold": 0.12,
-        "rel_threshold": 0.40,
+        "abs_threshold": 0.18,
+        "rel_threshold": 0.50,
         "smooth_window": 5,
-        "band_low_q": 10,
-        "band_high_q": 90,
-        "band_pad_abs": 0.08,
+        "band_low_q": 2,
+        "band_high_q": 98,
+        "band_pad_abs": 0.12,
     },
     {
         "query": 'vibration_z{device_id="fanuc-cnc"}',
         "pred_metric": "vibration_z_predicted",
         "anomaly_metric": "vibration_z_anomaly",
         "strategy": "phase_band",
-        "abs_threshold": 0.12,
-        "rel_threshold": 0.40,
+        "abs_threshold": 0.18,
+        "rel_threshold": 0.50,
         "smooth_window": 5,
-        "band_low_q": 10,
-        "band_high_q": 90,
-        "band_pad_abs": 0.08,
-    },
+        "band_low_q": 2,
+        "band_high_q": 98,
+        "band_pad_abs": 0.12,
+    }
 ]
 
 EXTRA_PREDICT_LABELS = {

From 4077e8f416d9d2f9d60df50d551fc817ddacd982 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E5=BC=A0=E5=B0=91=E6=99=BA?=
 <zhangshaozhi@bailongma-inc.com>
Date: Fri, 22 May 2026 09:40:02 +0800
Subject: [PATCH 29/55] feat(predict_v5): add predict v5

---
 ai/pridict_v5.py | 1794 ++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 1794 insertions(+)
 create mode 100644 ai/pridict_v5.py

diff --git a/ai/pridict_v5.py b/ai/pridict_v5.py
new file mode 100644
index 0000000..6894a66
--- /dev/null
+++ b/ai/pridict_v5.py
@@ -0,0 +1,1794 @@
+# -*- coding: utf-8 -*-
+"""
+ProtoForge Predictor v12
+
+核心能力：
+1. feed_rate / spindle_speed / spindle_current 使用 phase-lock 点预测。
+2. vibration_x / vibration_y / vibration_z 使用 phase-band 预测带。
+3. vibration 类指标：
+   - predicted 使用平滑后的中位数模板，用于趋势参考。
+   - upper/lower 使用原始波动分位数模板 + padding，用于正常波动容忍带。
+   - 偶发越界不直接报警，只有持续越界 / 高比例越界 / 严重越界才报警。
+4. 预测起点锚定最后一个真实点 last_real_ts，避免时间错位。
+5. 异常期间冻结健康模板，不学习故障数据。
+6. 故障恢复后等待稳定，再恢复模板学习。
+7. 写入：
+   - xxx_predicted
+   - xxx_predicted_upper
+   - xxx_predicted_lower
+   - xxx_anomaly
+   - xxx_anomaly_outside_ratio
+   - xxx_anomaly_mean_abs_error
+   - xxx_anomaly_mean_rel_error
+   - xxx_anomaly_max_consecutive_outside
+   - xxx_anomaly_max_exceed_ratio
+"""
+
+import json
+import logging
+import math
+import os
+import re
+import time
+from dataclasses import asdict, dataclass
+from datetime import datetime, timedelta
+from typing import Dict, List, Optional, Tuple
+
+import numpy as np
+import requests
+
+
+# =============================================================================
+# 日志配置
+# =============================================================================
+
+logging.basicConfig(
+    level=logging.INFO,
+    format="%(asctime)s [%(levelname)s] %(message)s",
+)
+
+logger = logging.getLogger(__name__)
+
+
+# =============================================================================
+# 基础配置
+# =============================================================================
+
+VM_URL = "http://localhost:8428"
+STATE_FILE = "/tmp/protoforge_predictor_state_v12.json"
+
+HISTORY_MINUTES = 30
+HORIZON_SECONDS = 120
+POLL_INTERVAL = 30
+
+WRITE_HORIZON_SECONDS = min(HORIZON_SECONDS, POLL_INTERVAL)
+
+QUERY_STEP = "1s"
+MIN_POINTS = 120
+
+MIN_PERIOD_SECONDS = 5
+MAX_PERIOD_SECONDS = 3600
+
+MIN_FULL_CYCLES_FOR_TEMPLATE = 3
+MAX_CYCLES_FOR_TEMPLATE = 8
+
+DETECT_WINDOW_SECONDS = 30
+RECOVERY_MIN_SECONDS = 60
+
+HEALTHY_EMA_ALPHA = 0.10
+RECOVERY_EMA_ALPHA = 0.25
+
+OUTSIDE_RATIO_THRESHOLD = 0.60
+MIN_CONSECUTIVE_OUTSIDE = 5
+SEVERE_EXCEED_RATIO = 1.8
+
+VALLEY_QUANTILE = 45
+
+MAX_DATA_LAG_SECONDS = 180
+
+PHASE_LOCK_MIN_WINDOW_SECONDS = 45
+PHASE_LOCK_MAX_WINDOW_SECONDS = 180
+PHASE_LOCK_PERIOD_SEARCH_RATIO = 0.12
+PHASE_LOCK_ORIGIN_SEARCH_RATIO = 0.35
+PHASE_LOCK_PERIOD_STEP = 1
+PHASE_LOCK_ORIGIN_STEP = 1
+
+
+# =============================================================================
+# 指标配置
+# =============================================================================
+
+PREDICT_TARGETS = [
+    {
+        "query": 'feed_rate{device_id="fanuc-cnc"}',
+        "pred_metric": "feed_rate_predicted",
+        "anomaly_metric": "feed_rate_anomaly",
+        "strategy": "phase_point",
+        "abs_threshold": 400.0,
+        "rel_threshold": 0.25,
+        "smooth_window": 1,
+        "outside_ratio_threshold": 0.60,
+        "min_consecutive_outside": 5,
+        "severe_exceed_ratio": 1.8,
+    },
+    {
+        "query": 'spindle_speed{device_id="fanuc-cnc"}',
+        "pred_metric": "spindle_speed_predicted",
+        "anomaly_metric": "spindle_speed_anomaly",
+        "strategy": "phase_point",
+        "abs_threshold": 500.0,
+        "rel_threshold": 0.25,
+        "smooth_window": 1,
+        "outside_ratio_threshold": 0.60,
+        "min_consecutive_outside": 5,
+        "severe_exceed_ratio": 1.8,
+    },
+    {
+        "query": 'spindle_current{device_id="fanuc-cnc"}',
+        "pred_metric": "spindle_current_predicted",
+        "anomaly_metric": "spindle_current_anomaly",
+        "strategy": "phase_point",
+        "abs_threshold": 5.0,
+        "rel_threshold": 0.25,
+        "smooth_window": 1,
+        "outside_ratio_threshold": 0.60,
+        "min_consecutive_outside": 5,
+        "severe_exceed_ratio": 1.8,
+    },
+    {
+        "query": 'vibration_x{device_id="fanuc-cnc"}',
+        "pred_metric": "vibration_x_predicted",
+        "anomaly_metric": "vibration_x_anomaly",
+        "strategy": "phase_band",
+
+        # vibration 类指标噪声、尖峰较多，不建议用很窄的阈值。
+        "abs_threshold": 0.18,
+        "rel_threshold": 0.55,
+
+        # 平滑只用于相位锁定和 predicted 中位趋势。
+        "smooth_window": 5,
+
+        # upper/lower 用原始值分位数，范围放宽，覆盖正常尖峰。
+        "band_low_q": 1,
+        "band_high_q": 99,
+        "band_pad_abs": 0.15,
+
+        # 偶发越界容忍。
+        "outside_ratio_threshold": 0.70,
+        "min_consecutive_outside": 5,
+        "severe_exceed_ratio": 2.0,
+    },
+    {
+        "query": 'vibration_y{device_id="fanuc-cnc"}',
+        "pred_metric": "vibration_y_predicted",
+        "anomaly_metric": "vibration_y_anomaly",
+        "strategy": "phase_band",
+        "abs_threshold": 0.18,
+        "rel_threshold": 0.55,
+        "smooth_window": 5,
+        "band_low_q": 1,
+        "band_high_q": 99,
+        "band_pad_abs": 0.15,
+        "outside_ratio_threshold": 0.70,
+        "min_consecutive_outside": 5,
+        "severe_exceed_ratio": 2.0,
+    },
+    {
+        "query": 'vibration_z{device_id="fanuc-cnc"}',
+        "pred_metric": "vibration_z_predicted",
+        "anomaly_metric": "vibration_z_anomaly",
+        "strategy": "phase_band",
+        "abs_threshold": 0.18,
+        "rel_threshold": 0.55,
+        "smooth_window": 5,
+        "band_low_q": 1,
+        "band_high_q": 99,
+        "band_pad_abs": 0.15,
+        "outside_ratio_threshold": 0.70,
+        "min_consecutive_outside": 5,
+        "severe_exceed_ratio": 2.0,
+    },
+]
+
+EXTRA_PREDICT_LABELS = {
+    "forecast": "phase_band_health_v12",
+    "source": "protoforge",
+}
+
+BASELINE_STATUS_HEALTHY = "healthy"
+BASELINE_STATUS_ANOMALY = "anomaly"
+BASELINE_STATUS_RECOVERING = "recovering"
+
+
+# =============================================================================
+# 状态结构
+# =============================================================================
+
+@dataclass
+class BaselineState:
+    period: int
+    phase_origin_ts: int
+    template: List[float]
+    lower_template: List[float]
+    upper_template: List[float]
+    strategy: str
+    status: str
+    clean_seconds: int
+    last_update_ts: int
+    last_seen_ts: int
+    y_min: float
+    y_max: float
+
+
+BASELINE_STATES: Dict[str, BaselineState] = {}
+LAST_REAL_TS_WRITTEN: Dict[str, int] = {}
+
+
+# =============================================================================
+# VictoriaMetrics 读取
+# =============================================================================
+
+def fetch_history(query: str, minutes: int = HISTORY_MINUTES) -> Tuple[List[float], List[float]]:
+    now = datetime.now()
+    start = now - timedelta(minutes=minutes)
+
+    try:
+        resp = requests.get(
+            f"{VM_URL}/api/v1/query_range",
+            params={
+                "query": query,
+                "start": start.timestamp(),
+                "end": now.timestamp(),
+                "step": QUERY_STEP,
+            },
+            timeout=10,
+        )
+        resp.raise_for_status()
+    except requests.RequestException as e:
+        logger.error("拉取数据失败 query=%s: %s", query, e)
+        return [], []
+
+    try:
+        result = resp.json().get("data", {}).get("result", [])
+    except Exception as e:
+        logger.error("解析 VM 返回失败 query=%s: %s", query, e)
+        return [], []
+
+    if not result:
+        return [], []
+
+    values = result[0].get("values", [])
+
+    ts = []
+    ys = []
+
+    for item in values:
+        if len(item) < 2:
+            continue
+
+        try:
+            t = float(item[0])
+            y = float(item[1])
+        except Exception:
+            continue
+
+        if not math.isfinite(t) or not math.isfinite(y):
+            continue
+
+        ts.append(t)
+        ys.append(y)
+
+    return ts, ys
+
+
+def normalize_history(ts: List[float], ys: List[float]) -> Tuple[np.ndarray, np.ndarray]:
+    if not ts or not ys or len(ts) != len(ys):
+        return np.array([]), np.array([])
+
+    data = {}
+
+    for t, y in zip(ts, ys):
+        try:
+            sec = int(round(float(t)))
+            val = float(y)
+        except Exception:
+            continue
+
+        if not math.isfinite(sec) or not math.isfinite(val):
+            continue
+
+        data[sec] = val
+
+    if not data:
+        return np.array([]), np.array([])
+
+    sorted_items = sorted(data.items(), key=lambda x: x[0])
+
+    ts_clean = np.array([x[0] for x in sorted_items], dtype=float)
+    ys_clean = np.array([x[1] for x in sorted_items], dtype=float)
+
+    if len(ts_clean) < 2:
+        return ts_clean, ys_clean
+
+    start_sec = int(ts_clean[0])
+    end_sec = int(ts_clean[-1])
+
+    if end_sec <= start_sec:
+        return ts_clean, ys_clean
+
+    ts_grid = np.arange(start_sec, end_sec + 1, 1, dtype=float)
+    ys_grid = np.interp(ts_grid, ts_clean, ys_clean)
+
+    return ts_grid, ys_grid
+
+
+# =============================================================================
+# 平滑与预处理
+# =============================================================================
+
+def rolling_median(arr: np.ndarray, window: int) -> np.ndarray:
+    if window <= 1 or len(arr) < window:
+        return arr.astype(float)
+
+    if window % 2 == 0:
+        window += 1
+
+    pad = window // 2
+    padded = np.pad(arr.astype(float), (pad, pad), mode="edge")
+
+    result = []
+
+    for i in range(len(arr)):
+        result.append(float(np.median(padded[i:i + window])))
+
+    return np.array(result, dtype=float)
+
+
+def moving_average(arr: np.ndarray, window: int) -> np.ndarray:
+    if window <= 1 or len(arr) < window:
+        return arr.astype(float)
+
+    if window % 2 == 0:
+        window += 1
+
+    kernel = np.ones(window, dtype=float) / window
+    pad = window // 2
+    padded = np.pad(arr.astype(float), (pad, pad), mode="edge")
+
+    return np.convolve(padded, kernel, mode="valid")
+
+
+def preprocess_values(ys_grid: np.ndarray, target: Dict) -> np.ndarray:
+    strategy = target.get("strategy", "phase_point")
+    smooth_window = int(target.get("smooth_window", 1))
+
+    if strategy == "phase_band":
+        return rolling_median(ys_grid, smooth_window)
+
+    if smooth_window > 1:
+        return moving_average(ys_grid, smooth_window)
+
+    return ys_grid.astype(float)
+
+
+# =============================================================================
+# 周期估计
+# =============================================================================
+
+def estimate_period_by_fft(ys_arr: np.ndarray) -> float:
+    n = len(ys_arr)
+
+    if n < 8:
+        return 60.0
+
+    centered = ys_arr - np.mean(ys_arr)
+
+    if np.allclose(centered, 0):
+        return 60.0
+
+    fft_vals = np.fft.rfft(centered)
+    freqs = np.fft.rfftfreq(n, d=1.0)
+
+    if len(freqs) <= 1:
+        return 60.0
+
+    power = np.abs(fft_vals[1:])
+
+    if len(power) == 0 or np.max(power) <= 0:
+        return 60.0
+
+    dominant_idx = int(np.argmax(power)) + 1
+    dominant_freq = float(freqs[dominant_idx])
+
+    if dominant_freq <= 0:
+        return 60.0
+
+    period = 1.0 / dominant_freq
+
+    return float(np.clip(period, MIN_PERIOD_SECONDS, MAX_PERIOD_SECONDS))
+
+
+def refine_period_by_autocorr(ys_arr: np.ndarray, init_period: float) -> float:
+    n = len(ys_arr)
+
+    if n < 20:
+        return float(np.clip(init_period, MIN_PERIOD_SECONDS, MAX_PERIOD_SECONDS))
+
+    centered = ys_arr - np.mean(ys_arr)
+
+    if np.allclose(centered, 0):
+        return float(np.clip(init_period, MIN_PERIOD_SECONDS, MAX_PERIOD_SECONDS))
+
+    corr = np.correlate(centered, centered, mode="full")[n - 1:]
+
+    p0 = int(round(init_period))
+    left = max(int(MIN_PERIOD_SECONDS), int(max(2, p0 * 0.7)))
+    right = min(n // 2, int(max(left + 1, p0 * 1.3)))
+
+    if right <= left:
+        return float(np.clip(init_period, MIN_PERIOD_SECONDS, MAX_PERIOD_SECONDS))
+
+    search = corr[left:right + 1]
+
+    if len(search) == 0:
+        return float(np.clip(init_period, MIN_PERIOD_SECONDS, MAX_PERIOD_SECONDS))
+
+    best_lag = left + int(np.argmax(search))
+
+    return float(np.clip(best_lag, MIN_PERIOD_SECONDS, MAX_PERIOD_SECONDS))
+
+
+def estimate_period_rough(ys_arr: np.ndarray) -> int:
+    p_fft = estimate_period_by_fft(ys_arr)
+    p_refined = refine_period_by_autocorr(ys_arr, p_fft)
+
+    period = int(round(p_refined))
+    period = max(int(MIN_PERIOD_SECONDS), min(int(MAX_PERIOD_SECONDS), period))
+
+    return int(period)
+
+
+# =============================================================================
+# 谷底检测
+# =============================================================================
+
+def find_valley_indices(
+    ts_grid: np.ndarray,
+    ys_grid: np.ndarray,
+    expected_period: int,
+) -> List[int]:
+    n = len(ys_grid)
+
+    if n < max(10, expected_period * 2):
+        return []
+
+    period = max(3, int(expected_period))
+    smooth_window = max(3, int(round(period * 0.08)))
+    smooth_window = min(smooth_window, 21)
+
+    ys_smooth = moving_average(ys_grid, smooth_window)
+    threshold = float(np.percentile(ys_smooth, VALLEY_QUANTILE))
+
+    candidates = []
+
+    for i in range(1, n - 1):
+        if (
+            ys_smooth[i] <= ys_smooth[i - 1]
+            and ys_smooth[i] < ys_smooth[i + 1]
+            and ys_smooth[i] <= threshold
+        ):
+            candidates.append(i)
+
+    if len(candidates) < MIN_FULL_CYCLES_FOR_TEMPLATE:
+        candidates = []
+
+        for i in range(1, n - 1):
+            if ys_smooth[i] <= ys_smooth[i - 1] and ys_smooth[i] < ys_smooth[i + 1]:
+                candidates.append(i)
+
+    if not candidates:
+        return []
+
+    min_distance = max(2, int(round(period * 0.55)))
+    selected = []
+
+    for idx in candidates:
+        if not selected:
+            selected.append(idx)
+            continue
+
+        if idx - selected[-1] >= min_distance:
+            selected.append(idx)
+            continue
+
+        if ys_smooth[idx] < ys_smooth[selected[-1]]:
+            selected[-1] = idx
+
+    if len(selected) < 2:
+        return selected
+
+    cleaned = [selected[0]]
+
+    for idx in selected[1:]:
+        diff = int(ts_grid[idx] - ts_grid[cleaned[-1]])
+
+        if int(period * 0.55) <= diff <= int(period * 1.60):
+            cleaned.append(idx)
+            continue
+
+        if diff < int(period * 0.55):
+            if ys_smooth[idx] < ys_smooth[cleaned[-1]]:
+                cleaned[-1] = idx
+            continue
+
+        cleaned.append(idx)
+
+    return cleaned
+
+
+def detect_period_and_valleys(
+    ts_grid: np.ndarray,
+    ys_grid: np.ndarray,
+) -> Tuple[int, List[int]]:
+    rough = estimate_period_rough(ys_grid)
+    valleys = find_valley_indices(ts_grid, ys_grid, rough)
+
+    if len(valleys) >= 3:
+        diffs = np.diff(ts_grid[valleys])
+        good = diffs[(diffs >= rough * 0.55) & (diffs <= rough * 1.60)]
+
+        if len(good) > 0:
+            period = int(round(float(np.median(good))))
+        else:
+            period = rough
+    else:
+        period = rough
+
+    period = max(int(MIN_PERIOD_SECONDS), min(int(MAX_PERIOD_SECONDS), period))
+
+    return int(period), valleys
+
+
+# =============================================================================
+# 模板构建
+# =============================================================================
+
+def build_templates_from_valleys(
+    ts_grid: np.ndarray,
+    ys_mid_grid: np.ndarray,
+    ys_band_grid: np.ndarray,
+    period: int,
+    valleys: List[int],
+    target: Dict,
+) -> Optional[Tuple[np.ndarray, np.ndarray, np.ndarray]]:
+    if period <= 1 or len(valleys) < MIN_FULL_CYCLES_FOR_TEMPLATE + 1:
+        return None
+
+    strategy = target.get("strategy", "phase_point")
+    low_q = float(target.get("band_low_q", 10))
+    high_q = float(target.get("band_high_q", 90))
+
+    pairs = []
+
+    for a, b in zip(valleys[:-1], valleys[1:]):
+        cycle_len = float(ts_grid[b] - ts_grid[a])
+
+        if period * 0.55 <= cycle_len <= period * 1.60:
+            pairs.append((a, b, cycle_len))
+
+    if len(pairs) < MIN_FULL_CYCLES_FOR_TEMPLATE:
+        return None
+
+    pairs = pairs[-MAX_CYCLES_FOR_TEMPLATE:]
+
+    phase_grid = np.arange(period, dtype=float)
+    mid_segments = []
+    band_segments = []
+    weights = []
+
+    for idx, (a, b, cycle_len) in enumerate(pairs):
+        seg_ts = ts_grid[a:b + 1]
+        seg_mid_y = ys_mid_grid[a:b + 1]
+        seg_band_y = ys_band_grid[a:b + 1]
+
+        if len(seg_mid_y) < 3 or len(seg_band_y) < 3:
+            continue
+
+        x_old = (seg_ts - seg_ts[0]) / cycle_len * period
+
+        mid_seg = np.interp(phase_grid, x_old, seg_mid_y)
+        band_seg = np.interp(phase_grid, x_old, seg_band_y)
+
+        mid_segments.append(mid_seg.astype(float))
+        band_segments.append(band_seg.astype(float))
+        weights.append(0.5 + 0.5 * ((idx + 1) / len(pairs)))
+
+    if len(mid_segments) < MIN_FULL_CYCLES_FOR_TEMPLATE:
+        return None
+
+    mid_arr = np.vstack(mid_segments)
+    band_arr = np.vstack(band_segments)
+    w_arr = np.array(weights, dtype=float)
+
+    if strategy == "phase_band":
+        mid_template = np.percentile(mid_arr, 50, axis=0)
+
+        # upper/lower 使用原始值分布，而不是平滑值分布。
+        lower_template = np.percentile(band_arr, low_q, axis=0)
+        upper_template = np.percentile(band_arr, high_q, axis=0)
+    else:
+        mid_template = np.average(mid_arr, axis=0, weights=w_arr)
+        lower_template = mid_template.copy()
+        upper_template = mid_template.copy()
+
+    return (
+        mid_template.astype(float),
+        lower_template.astype(float),
+        upper_template.astype(float),
+    )
+
+
+def build_current_baseline(
+    ts_grid: np.ndarray,
+    ys_mid_grid: np.ndarray,
+    ys_band_grid: np.ndarray,
+    target: Dict,
+    tail_seconds: Optional[int] = None,
+) -> Optional[Tuple[int, int, np.ndarray, np.ndarray, np.ndarray]]:
+    if len(ys_mid_grid) < MIN_POINTS or len(ys_band_grid) < MIN_POINTS:
+        return None
+
+    if tail_seconds is not None and tail_seconds > 0:
+        cutoff = ts_grid[-1] - int(tail_seconds)
+        mask = ts_grid >= cutoff
+        ts_use = ts_grid[mask]
+        ys_mid_use = ys_mid_grid[mask]
+        ys_band_use = ys_band_grid[mask]
+    else:
+        ts_use = ts_grid
+        ys_mid_use = ys_mid_grid
+        ys_band_use = ys_band_grid
+
+    if len(ys_mid_use) < MIN_POINTS or len(ys_band_use) < MIN_POINTS:
+        return None
+
+    period, valleys = detect_period_and_valleys(ts_use, ys_mid_use)
+
+    templates = build_templates_from_valleys(
+        ts_grid=ts_use,
+        ys_mid_grid=ys_mid_use,
+        ys_band_grid=ys_band_use,
+        period=period,
+        valleys=valleys,
+        target=target,
+    )
+
+    if templates is None or len(valleys) == 0:
+        return None
+
+    template, lower_template, upper_template = templates
+    phase_origin_ts = int(round(float(ts_use[valleys[-1]])))
+
+    return int(period), phase_origin_ts, template, lower_template, upper_template
+
+
+# =============================================================================
+# 模板预测
+# =============================================================================
+
+def circular_template_value(template: np.ndarray, phase: float) -> float:
+    period = len(template)
+
+    if period == 0:
+        return 0.0
+
+    phase = float(phase) % period
+    i0 = int(math.floor(phase)) % period
+    i1 = (i0 + 1) % period
+    frac = phase - math.floor(phase)
+
+    return float((1.0 - frac) * template[i0] + frac * template[i1])
+
+
+def resample_template(old_template: np.ndarray, new_period: int) -> np.ndarray:
+    old_period = len(old_template)
+
+    if old_period == new_period:
+        return old_template.astype(float)
+
+    if old_period <= 1 or new_period <= 1:
+        return np.full(new_period, float(np.mean(old_template)), dtype=float)
+
+    old_x = np.linspace(0.0, 1.0, old_period, endpoint=False)
+    new_x = np.linspace(0.0, 1.0, new_period, endpoint=False)
+
+    old_x_ext = np.concatenate([old_x - 1.0, old_x, old_x + 1.0])
+    old_y_ext = np.concatenate([old_template, old_template, old_template])
+
+    return np.interp(new_x, old_x_ext, old_y_ext).astype(float)
+
+
+def predict_template_values(
+    template: np.ndarray,
+    period: int,
+    phase_origin_ts: int,
+    ts_list: List[int],
+) -> np.ndarray:
+    if period <= 1:
+        return np.zeros(len(ts_list), dtype=float)
+
+    if len(template) != period:
+        template = resample_template(template, period)
+
+    values = []
+
+    for ts in ts_list:
+        phase = (int(ts) - int(phase_origin_ts)) % period
+        values.append(circular_template_value(template, phase))
+
+    return np.array(values, dtype=float)
+
+
+def predict_state_bundle(
+    state: BaselineState,
+    ts_list: List[int],
+) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
+    period = int(state.period)
+    origin = int(state.phase_origin_ts)
+
+    mid = predict_template_values(
+        template=np.array(state.template, dtype=float),
+        period=period,
+        phase_origin_ts=origin,
+        ts_list=ts_list,
+    )
+
+    lower = predict_template_values(
+        template=np.array(state.lower_template, dtype=float),
+        period=period,
+        phase_origin_ts=origin,
+        ts_list=ts_list,
+    )
+
+    upper = predict_template_values(
+        template=np.array(state.upper_template, dtype=float),
+        period=period,
+        phase_origin_ts=origin,
+        ts_list=ts_list,
+    )
+
+    return mid, lower, upper
+
+
+def normalize_origin_near(origin: int, period: int, near_ts: int) -> int:
+    if period <= 1:
+        return origin
+
+    origin = int(origin)
+    period = int(period)
+    near_ts = int(near_ts)
+
+    while origin + period <= near_ts:
+        origin += period
+
+    while origin > near_ts:
+        origin -= period
+
+    return origin
+
+
+def merge_template(
+    old_template: np.ndarray,
+    new_template: np.ndarray,
+    alpha: float,
+) -> np.ndarray:
+    alpha = float(np.clip(alpha, 0.0, 1.0))
+
+    if len(old_template) != len(new_template):
+        old_template = resample_template(old_template, len(new_template))
+
+    merged = (1.0 - alpha) * old_template + alpha * new_template
+
+    return merged.astype(float)
+
+
+# =============================================================================
+# Phase Lock
+# =============================================================================
+
+def phase_lock_recent(
+    state: BaselineState,
+    ts_grid: np.ndarray,
+    ys_model: np.ndarray,
+) -> Tuple[int, int, np.ndarray, float]:
+    base_period = int(state.period)
+    base_origin = int(state.phase_origin_ts)
+    base_template = np.array(state.template, dtype=float)
+
+    if base_period <= 1 or len(base_template) <= 1:
+        ts_recent = ts_grid[-DETECT_WINDOW_SECONDS:].astype(int).tolist()
+        pred = predict_template_values(base_template, base_period, base_origin, ts_recent)
+        actual = ys_model[-len(ts_recent):].astype(float)
+        mae = float(np.mean(np.abs(actual - pred))) if len(actual) else 0.0
+        return base_period, base_origin, pred, mae
+
+    window_seconds = max(
+        PHASE_LOCK_MIN_WINDOW_SECONDS,
+        min(PHASE_LOCK_MAX_WINDOW_SECONDS, int(base_period * 2)),
+    )
+
+    cutoff = ts_grid[-1] - window_seconds
+    mask = ts_grid >= cutoff
+
+    ts_recent_arr = ts_grid[mask].astype(int)
+    actual = ys_model[mask].astype(float)
+
+    if len(ts_recent_arr) < max(10, DETECT_WINDOW_SECONDS):
+        ts_recent_arr = ts_grid[-DETECT_WINDOW_SECONDS:].astype(int)
+        actual = ys_model[-DETECT_WINDOW_SECONDS:].astype(float)
+
+    ts_recent = ts_recent_arr.tolist()
+    last_ts = int(ts_recent[-1])
+
+    p_min = max(
+        int(MIN_PERIOD_SECONDS),
+        int(round(base_period * (1.0 - PHASE_LOCK_PERIOD_SEARCH_RATIO))),
+    )
+    p_max = min(
+        int(MAX_PERIOD_SECONDS),
+        int(round(base_period * (1.0 + PHASE_LOCK_PERIOD_SEARCH_RATIO))),
+    )
+
+    best_period = base_period
+    best_origin = normalize_origin_near(base_origin, base_period, last_ts)
+    best_template = resample_template(base_template, best_period)
+
+    best_pred = predict_template_values(
+        template=best_template,
+        period=best_period,
+        phase_origin_ts=best_origin,
+        ts_list=ts_recent,
+    )
+
+    best_mae = float(np.mean(np.abs(actual - best_pred)))
+
+    for period in range(p_min, p_max + 1, PHASE_LOCK_PERIOD_STEP):
+        template = resample_template(base_template, period)
+        center_origin = normalize_origin_near(base_origin, period, last_ts)
+        origin_shift = max(2, int(round(period * PHASE_LOCK_ORIGIN_SEARCH_RATIO)))
+
+        for shift in range(-origin_shift, origin_shift + 1, PHASE_LOCK_ORIGIN_STEP):
+            origin = center_origin + shift
+
+            pred = predict_template_values(
+                template=template,
+                period=period,
+                phase_origin_ts=origin,
+                ts_list=ts_recent,
+            )
+
+            mae = float(np.mean(np.abs(actual - pred)))
+            penalty = abs(period - base_period) * 0.5
+            score = mae + penalty
+
+            best_score = best_mae + abs(best_period - base_period) * 0.5
+
+            if score < best_score:
+                best_period = period
+                best_origin = origin
+                best_pred = pred
+                best_mae = mae
+
+    best_origin = normalize_origin_near(best_origin, best_period, last_ts)
+
+    return int(best_period), int(best_origin), best_pred, float(best_mae)
+
+
+# =============================================================================
+# 异常检测
+# =============================================================================
+
+def max_consecutive_true(flags: np.ndarray) -> int:
+    max_count = 0
+    current = 0
+
+    for flag in flags:
+        if bool(flag):
+            current += 1
+            max_count = max(max_count, current)
+        else:
+            current = 0
+
+    return int(max_count)
+
+
+def calc_point_bounds(
+    pred: np.ndarray,
+    abs_threshold: float,
+    rel_threshold: float,
+) -> Tuple[np.ndarray, np.ndarray]:
+    threshold = np.maximum(abs_threshold, np.abs(pred) * rel_threshold)
+    return pred - threshold, pred + threshold
+
+
+def calc_final_bounds(
+    state: BaselineState,
+    pred: np.ndarray,
+    lower_raw: np.ndarray,
+    upper_raw: np.ndarray,
+    target: Dict,
+) -> Tuple[np.ndarray, np.ndarray]:
+    strategy = target.get("strategy", "phase_point")
+    abs_threshold = float(target.get("abs_threshold", 1.0))
+    rel_threshold = float(target.get("rel_threshold", 0.25))
+
+    if strategy == "phase_band":
+        pad_abs = float(target.get("band_pad_abs", abs_threshold))
+
+        # 对 vibration 类指标：边界更像正常波动容忍带，不是硬边界。
+        dynamic_pad = np.maximum(
+            pad_abs,
+            np.abs(pred) * rel_threshold * 0.25,
+        )
+
+        lower = lower_raw - dynamic_pad
+        upper = upper_raw + dynamic_pad
+
+        return lower, upper
+
+    return calc_point_bounds(pred, abs_threshold, rel_threshold)
+
+
+def detect_anomaly(
+    state: BaselineState,
+    ts_grid: np.ndarray,
+    ys_model: np.ndarray,
+    ys_actual: np.ndarray,
+    target: Dict,
+) -> Tuple[bool, float, float, float, int, int, int, float]:
+    best_period, best_origin, pred_recent, _ = phase_lock_recent(
+        state=state,
+        ts_grid=ts_grid,
+        ys_model=ys_model,
+    )
+
+    recent_len = len(pred_recent)
+
+    if recent_len <= 0:
+        return False, 0.0, 0.0, 0.0, best_period, best_origin, 0, 0.0
+
+    if target.get("strategy", "phase_point") == "phase_band":
+        actual = ys_actual[-recent_len:].astype(float)
+    else:
+        actual = ys_model[-recent_len:].astype(float)
+
+    tmp_state = BaselineState(
+        period=best_period,
+        phase_origin_ts=best_origin,
+        template=state.template,
+        lower_template=state.lower_template,
+        upper_template=state.upper_template,
+        strategy=state.strategy,
+        status=state.status,
+        clean_seconds=state.clean_seconds,
+        last_update_ts=state.last_update_ts,
+        last_seen_ts=state.last_seen_ts,
+        y_min=state.y_min,
+        y_max=state.y_max,
+    )
+
+    recent_ts = ts_grid[-recent_len:].astype(int).tolist()
+    pred, lower_raw, upper_raw = predict_state_bundle(tmp_state, recent_ts)
+
+    lower, upper = calc_final_bounds(
+        state=tmp_state,
+        pred=pred,
+        lower_raw=lower_raw,
+        upper_raw=upper_raw,
+        target=target,
+    )
+
+    above_upper = actual - upper
+    below_lower = lower - actual
+
+    exceed = np.maximum(above_upper, below_lower)
+    exceed = np.maximum(exceed, 0.0)
+
+    outside = exceed > 0
+
+    band_width = np.maximum(upper - lower, 1e-6)
+    exceed_ratio = exceed / band_width
+
+    abs_err = np.abs(actual - pred)
+
+    outside_ratio = float(np.mean(outside))
+    mean_abs_err = float(np.mean(abs_err))
+    mean_rel_err = float(np.mean(abs_err / np.maximum(np.abs(pred), 1e-6)))
+
+    max_outside_seconds = max_consecutive_true(outside)
+    max_exceed_ratio = float(np.max(exceed_ratio)) if len(exceed_ratio) > 0 else 0.0
+
+    outside_ratio_threshold = float(
+        target.get("outside_ratio_threshold", OUTSIDE_RATIO_THRESHOLD)
+    )
+    min_consecutive_outside = int(
+        target.get("min_consecutive_outside", MIN_CONSECUTIVE_OUTSIDE)
+    )
+    severe_exceed_ratio = float(
+        target.get("severe_exceed_ratio", SEVERE_EXCEED_RATIO)
+    )
+
+    # 核心优化：
+    # 1. 偶发 1~3 个点越界不报警。
+    # 2. 持续越界才报警。
+    # 3. 高比例越界才报警。
+    # 4. 严重越界才立即报警。
+    is_anomaly = (
+        outside_ratio >= outside_ratio_threshold
+        or max_outside_seconds >= min_consecutive_outside
+        or max_exceed_ratio >= severe_exceed_ratio
+    )
+
+    return (
+        is_anomaly,
+        outside_ratio,
+        mean_abs_err,
+        mean_rel_err,
+        int(best_period),
+        int(best_origin),
+        int(max_outside_seconds),
+        float(max_exceed_ratio),
+    )
+
+
+# =============================================================================
+# 状态管理
+# =============================================================================
+
+def create_initial_state(
+    ts_grid: np.ndarray,
+    ys_model: np.ndarray,
+    ys_actual: np.ndarray,
+    target: Dict,
+    now_sec: int,
+) -> Optional[BaselineState]:
+    baseline = build_current_baseline(
+        ts_grid=ts_grid,
+        ys_mid_grid=ys_model,
+        ys_band_grid=ys_actual,
+        target=target,
+    )
+
+    if baseline is None:
+        return None
+
+    period, phase_origin_ts, template, lower_template, upper_template = baseline
+
+    return BaselineState(
+        period=int(period),
+        phase_origin_ts=int(phase_origin_ts),
+        template=template.astype(float).tolist(),
+        lower_template=lower_template.astype(float).tolist(),
+        upper_template=upper_template.astype(float).tolist(),
+        strategy=str(target.get("strategy", "phase_point")),
+        status=BASELINE_STATUS_HEALTHY,
+        clean_seconds=int(period * MAX_CYCLES_FOR_TEMPLATE),
+        last_update_ts=now_sec,
+        last_seen_ts=now_sec,
+        y_min=float(np.min(ys_actual)),
+        y_max=float(np.max(ys_actual)),
+    )
+
+
+def apply_phase_lock_to_state(
+    state: BaselineState,
+    best_period: int,
+    best_origin: int,
+) -> None:
+    best_period = int(best_period)
+
+    if best_period <= 1:
+        return
+
+    if len(state.template) != best_period:
+        state.template = resample_template(
+            np.array(state.template, dtype=float),
+            best_period,
+        ).astype(float).tolist()
+
+    if len(state.lower_template) != best_period:
+        state.lower_template = resample_template(
+            np.array(state.lower_template, dtype=float),
+            best_period,
+        ).astype(float).tolist()
+
+    if len(state.upper_template) != best_period:
+        state.upper_template = resample_template(
+            np.array(state.upper_template, dtype=float),
+            best_period,
+        ).astype(float).tolist()
+
+    state.period = best_period
+    state.phase_origin_ts = int(best_origin)
+
+
+def maybe_update_state(
+    key: str,
+    ts_grid: np.ndarray,
+    ys_model: np.ndarray,
+    ys_actual: np.ndarray,
+    target: Dict,
+) -> Tuple[Optional[BaselineState], bool, float, float, float, int, float]:
+    now_sec = int(time.time())
+    state = BASELINE_STATES.get(key)
+
+    if state is None:
+        state = create_initial_state(
+            ts_grid=ts_grid,
+            ys_model=ys_model,
+            ys_actual=ys_actual,
+            target=target,
+            now_sec=now_sec,
+        )
+
+        if state is None:
+            return None, False, 0.0, 0.0, 0.0, 0, 0.0
+
+        BASELINE_STATES[key] = state
+
+        logger.info(
+            "初始化健康模板 key=%s strategy=%s period=%ss origin=%s clean=%ss",
+            key,
+            state.strategy,
+            state.period,
+            datetime.fromtimestamp(state.phase_origin_ts).strftime("%H:%M:%S"),
+            state.clean_seconds,
+        )
+
+        return state, False, 0.0, 0.0, 0.0, 0, 0.0
+
+    elapsed = max(1, now_sec - int(state.last_seen_ts))
+    elapsed = min(elapsed, POLL_INTERVAL * 2)
+    state.last_seen_ts = now_sec
+
+    (
+        is_anomaly,
+        outside_ratio,
+        mean_abs_err,
+        mean_rel_err,
+        best_period,
+        best_origin,
+        max_outside_seconds,
+        max_exceed_ratio,
+    ) = detect_anomaly(
+        state=state,
+        ts_grid=ts_grid,
+        ys_model=ys_model,
+        ys_actual=ys_actual,
+        target=target,
+    )
+
+    if is_anomaly:
+        state.status = BASELINE_STATUS_ANOMALY
+        state.clean_seconds = 0
+        BASELINE_STATES[key] = state
+
+        logger.warning(
+            "检测到异常，冻结模板 key=%s outside_ratio=%.2f max_outside=%ss max_exceed_ratio=%.2f mean_abs_err=%.4f mean_rel_err=%.4f",
+            key,
+            outside_ratio,
+            max_outside_seconds,
+            max_exceed_ratio,
+            mean_abs_err,
+            mean_rel_err,
+        )
+
+        return (
+            state,
+            True,
+            outside_ratio,
+            mean_abs_err,
+            mean_rel_err,
+            max_outside_seconds,
+            max_exceed_ratio,
+        )
+
+    old_period = int(state.period)
+    old_origin = int(state.phase_origin_ts)
+
+    apply_phase_lock_to_state(state, best_period, best_origin)
+
+    if old_period != state.period or old_origin != state.phase_origin_ts:
+        logger.info(
+            "phase-lock key=%s period %s -> %s origin %s -> %s",
+            key,
+            old_period,
+            state.period,
+            datetime.fromtimestamp(old_origin).strftime("%H:%M:%S"),
+            datetime.fromtimestamp(state.phase_origin_ts).strftime("%H:%M:%S"),
+        )
+
+    if state.status == BASELINE_STATUS_ANOMALY:
+        state.status = BASELINE_STATUS_RECOVERING
+        state.clean_seconds = elapsed
+        BASELINE_STATES[key] = state
+
+        logger.info(
+            "异常开始恢复 key=%s clean_seconds=%ss",
+            key,
+            state.clean_seconds,
+        )
+
+        return (
+            state,
+            False,
+            outside_ratio,
+            mean_abs_err,
+            mean_rel_err,
+            max_outside_seconds,
+            max_exceed_ratio,
+        )
+
+    if state.status == BASELINE_STATUS_RECOVERING:
+        state.clean_seconds += elapsed
+    else:
+        state.status = BASELINE_STATUS_HEALTHY
+        state.clean_seconds += elapsed
+
+    min_clean_for_update = max(
+        RECOVERY_MIN_SECONDS,
+        int(state.period) * MIN_FULL_CYCLES_FOR_TEMPLATE,
+    )
+
+    if state.clean_seconds < min_clean_for_update:
+        BASELINE_STATES[key] = state
+        return (
+            state,
+            False,
+            outside_ratio,
+            mean_abs_err,
+            mean_rel_err,
+            max_outside_seconds,
+            max_exceed_ratio,
+        )
+
+    tail_seconds = min(
+        int(state.clean_seconds),
+        int(state.period) * MAX_CYCLES_FOR_TEMPLATE,
+    )
+
+    baseline = build_current_baseline(
+        ts_grid=ts_grid,
+        ys_mid_grid=ys_model,
+        ys_band_grid=ys_actual,
+        target=target,
+        tail_seconds=tail_seconds,
+    )
+
+    if baseline is None:
+        BASELINE_STATES[key] = state
+        return (
+            state,
+            False,
+            outside_ratio,
+            mean_abs_err,
+            mean_rel_err,
+            max_outside_seconds,
+            max_exceed_ratio,
+        )
+
+    new_period, new_origin, new_template, new_lower_template, new_upper_template = baseline
+
+    alpha = RECOVERY_EMA_ALPHA if state.status == BASELINE_STATUS_RECOVERING else HEALTHY_EMA_ALPHA
+
+    state.template = merge_template(
+        np.array(state.template, dtype=float),
+        new_template,
+        alpha,
+    ).astype(float).tolist()
+
+    state.lower_template = merge_template(
+        np.array(state.lower_template, dtype=float),
+        new_lower_template,
+        alpha,
+    ).astype(float).tolist()
+
+    state.upper_template = merge_template(
+        np.array(state.upper_template, dtype=float),
+        new_upper_template,
+        alpha,
+    ).astype(float).tolist()
+
+    state.period = int(new_period)
+    state.phase_origin_ts = int(new_origin)
+    state.status = BASELINE_STATUS_HEALTHY
+    state.last_update_ts = now_sec
+
+    if tail_seconds > 0 and len(ys_actual) >= tail_seconds:
+        state.y_min = float(np.min(ys_actual[-tail_seconds:]))
+        state.y_max = float(np.max(ys_actual[-tail_seconds:]))
+    else:
+        state.y_min = float(np.min(ys_actual))
+        state.y_max = float(np.max(ys_actual))
+
+    BASELINE_STATES[key] = state
+
+    logger.info(
+        "更新健康模板 key=%s strategy=%s period=%ss origin=%s clean=%ss alpha=%.2f",
+        key,
+        state.strategy,
+        state.period,
+        datetime.fromtimestamp(state.phase_origin_ts).strftime("%H:%M:%S"),
+        state.clean_seconds,
+        alpha,
+    )
+
+    return (
+        state,
+        False,
+        outside_ratio,
+        mean_abs_err,
+        mean_rel_err,
+        max_outside_seconds,
+        max_exceed_ratio,
+    )
+
+
+# =============================================================================
+# Prometheus 写入
+# =============================================================================
+
+def prom_escape_label_value(value: str) -> str:
+    return (
+        str(value)
+        .replace("\\", "\\\\")
+        .replace("\n", "\\n")
+        .replace('"', '\\"')
+    )
+
+
+def labels_to_str(labels: Dict[str, str]) -> str:
+    if not labels:
+        return ""
+
+    parts = []
+
+    for k in sorted(labels.keys()):
+        parts.append(f'{k}="{prom_escape_label_value(labels[k])}"')
+
+    return "{" + ",".join(parts) + "}"
+
+
+def write_series(
+    metric_name: str,
+    labels: Dict[str, str],
+    ts_list: List[int],
+    values: List[float],
+) -> bool:
+    if not ts_list or not values or len(ts_list) != len(values):
+        return False
+
+    label_str = labels_to_str(labels)
+    lines = []
+
+    for t, y in zip(ts_list, values):
+        try:
+            ts_sec = int(round(float(t)))
+            val = float(y)
+        except Exception:
+            continue
+
+        if not math.isfinite(ts_sec) or not math.isfinite(val):
+            continue
+
+        lines.append(f"{metric_name}{label_str} {val:.6f} {ts_sec * 1000}")
+
+    if not lines:
+        return False
+
+    payload = "\n".join(lines) + "\n"
+
+    try:
+        resp = requests.post(
+            f"{VM_URL}/api/v1/import/prometheus",
+            data=payload.encode("utf-8"),
+            headers={"Content-Type": "text/plain; version=0.0.4; charset=utf-8"},
+            timeout=10,
+        )
+        resp.raise_for_status()
+        return True
+
+    except requests.RequestException as e:
+        logger.error("写入数据失败 metric=%s: %s", metric_name, e)
+        return False
+
+
+def write_prediction_bundle(
+    pred_metric: str,
+    anomaly_metric: str,
+    labels: Dict[str, str],
+    ts_future: List[int],
+    pred_values: np.ndarray,
+    lower_values: np.ndarray,
+    upper_values: np.ndarray,
+    is_anomaly: bool,
+    outside_ratio: float,
+    mean_abs_err: float,
+    mean_rel_err: float,
+    max_outside_seconds: int,
+    max_exceed_ratio: float,
+    event_ts: int,
+) -> bool:
+    ok1 = write_series(
+        metric_name=pred_metric,
+        labels=labels,
+        ts_list=ts_future,
+        values=pred_values.astype(float).tolist(),
+    )
+
+    ok2 = write_series(
+        metric_name=f"{pred_metric}_lower",
+        labels=labels,
+        ts_list=ts_future,
+        values=lower_values.astype(float).tolist(),
+    )
+
+    ok3 = write_series(
+        metric_name=f"{pred_metric}_upper",
+        labels=labels,
+        ts_list=ts_future,
+        values=upper_values.astype(float).tolist(),
+    )
+
+    anomaly_labels = dict(labels)
+    anomaly_labels["type"] = "prediction_deviation"
+
+    ok4 = write_series(
+        metric_name=anomaly_metric,
+        labels=anomaly_labels,
+        ts_list=[event_ts],
+        values=[1.0 if is_anomaly else 0.0],
+    )
+
+    ok5 = write_series(
+        metric_name=f"{anomaly_metric}_outside_ratio",
+        labels=anomaly_labels,
+        ts_list=[event_ts],
+        values=[outside_ratio],
+    )
+
+    ok6 = write_series(
+        metric_name=f"{anomaly_metric}_mean_abs_error",
+        labels=anomaly_labels,
+        ts_list=[event_ts],
+        values=[mean_abs_err],
+    )
+
+    ok7 = write_series(
+        metric_name=f"{anomaly_metric}_mean_rel_error",
+        labels=anomaly_labels,
+        ts_list=[event_ts],
+        values=[mean_rel_err],
+    )
+
+    ok8 = write_series(
+        metric_name=f"{anomaly_metric}_max_consecutive_outside",
+        labels=anomaly_labels,
+        ts_list=[event_ts],
+        values=[float(max_outside_seconds)],
+    )
+
+    ok9 = write_series(
+        metric_name=f"{anomaly_metric}_max_exceed_ratio",
+        labels=anomaly_labels,
+        ts_list=[event_ts],
+        values=[float(max_exceed_ratio)],
+    )
+
+    return ok1 and ok2 and ok3 and ok4 and ok5 and ok6 and ok7 and ok8 and ok9
+
+
+# =============================================================================
+# 标签解析
+# =============================================================================
+
+_LABEL_PATTERN = re.compile(
+    r'\s*([a-zA-Z_][a-zA-Z0-9_]*)\s*=\s*"((?:\\.|[^"])*)"\s*'
+)
+
+
+def parse_labels_from_query(query: str) -> Dict[str, str]:
+    labels = {}
+
+    if "{" not in query or "}" not in query:
+        return labels
+
+    try:
+        label_part = query[query.index("{") + 1:query.rindex("}")]
+    except Exception:
+        return labels
+
+    for match in _LABEL_PATTERN.finditer(label_part):
+        key = match.group(1)
+        value = match.group(2)
+
+        value = (
+            value
+            .replace('\\"', '"')
+            .replace("\\n", "\n")
+            .replace("\\\\", "\\")
+        )
+
+        labels[key] = value
+
+    return labels
+
+
+def merge_labels(*dicts: Dict[str, str]) -> Dict[str, str]:
+    result = {}
+
+    for d in dicts:
+        if d:
+            result.update(d)
+
+    return result
+
+
+def series_key(metric_name: str, labels: Dict[str, str]) -> str:
+    return metric_name + labels_to_str(labels)
+
+
+# =============================================================================
+# 状态持久化
+# =============================================================================
+
+def load_state() -> None:
+    global BASELINE_STATES
+
+    if not os.path.exists(STATE_FILE):
+        return
+
+    try:
+        with open(STATE_FILE, "r", encoding="utf-8") as f:
+            raw = json.load(f)
+
+        states = {}
+
+        for key, value in raw.get("baseline_states", {}).items():
+            required_fields = {
+                "period",
+                "phase_origin_ts",
+                "template",
+                "lower_template",
+                "upper_template",
+                "strategy",
+                "status",
+                "clean_seconds",
+                "last_update_ts",
+                "last_seen_ts",
+                "y_min",
+                "y_max",
+            }
+
+            if not required_fields.issubset(set(value.keys())):
+                continue
+
+            states[key] = BaselineState(**value)
+
+        BASELINE_STATES = states
+
+        logger.info(
+            "已加载预测状态文件 %s，状态数量=%d",
+            STATE_FILE,
+            len(BASELINE_STATES),
+        )
+
+    except Exception as e:
+        logger.warning("加载预测状态文件失败，将重新学习: %s", e)
+
+
+def save_state() -> None:
+    try:
+        raw = {
+            "baseline_states": {
+                key: asdict(value)
+                for key, value in BASELINE_STATES.items()
+            }
+        }
+
+        tmp_file = STATE_FILE + ".tmp"
+
+        with open(tmp_file, "w", encoding="utf-8") as f:
+            json.dump(raw, f, ensure_ascii=False, indent=2)
+
+        os.replace(tmp_file, STATE_FILE)
+
+    except Exception as e:
+        logger.warning("保存预测状态文件失败: %s", e)
+
+
+# =============================================================================
+# 时间轴
+# =============================================================================
+
+def build_prediction_timestamps(
+    key: str,
+    last_real_ts: int,
+    now_sec: int,
+) -> Optional[List[int]]:
+    data_lag = now_sec - last_real_ts
+
+    if data_lag > MAX_DATA_LAG_SECONDS:
+        logger.warning(
+            "真实数据延迟过大，跳过预测 key=%s data_lag=%ss max=%ss",
+            key,
+            data_lag,
+            MAX_DATA_LAG_SECONDS,
+        )
+        return None
+
+    last_written_real_ts = LAST_REAL_TS_WRITTEN.get(key)
+
+    if last_written_real_ts is not None and last_real_ts <= int(last_written_real_ts):
+        logger.info(
+            "真实数据时间戳未推进，跳过重复写入 key=%s last_real_ts=%s last_written_real_ts=%s",
+            key,
+            last_real_ts,
+            last_written_real_ts,
+        )
+        return None
+
+    base_ts = last_real_ts
+
+    return [
+        base_ts + i + 1
+        for i in range(WRITE_HORIZON_SECONDS)
+    ]
+
+
+# =============================================================================
+# 主流程
+# =============================================================================
+
+def run_once() -> None:
+    now_str = datetime.now().strftime("%H:%M:%S")
+
+    for target in PREDICT_TARGETS:
+        query = target["query"]
+        pred_metric = target["pred_metric"]
+        anomaly_metric = target["anomaly_metric"]
+
+        ts, ys = fetch_history(query)
+
+        if len(ys) < MIN_POINTS:
+            logger.info("[%s] %s 数据不足（%d 点），跳过", now_str, query, len(ys))
+            continue
+
+        ts_grid, ys_grid_raw = normalize_history(ts, ys)
+
+        if len(ys_grid_raw) < MIN_POINTS:
+            logger.info("[%s] %s 清洗后数据不足（%d 点），跳过", now_str, query, len(ys_grid_raw))
+            continue
+
+        ys_grid_model = preprocess_values(ys_grid_raw, target)
+
+        base_labels = parse_labels_from_query(query)
+        write_labels = merge_labels(base_labels, EXTRA_PREDICT_LABELS)
+
+        key = series_key(pred_metric, write_labels)
+
+        (
+            state,
+            is_anomaly,
+            outside_ratio,
+            mean_abs_err,
+            mean_rel_err,
+            max_outside_seconds,
+            max_exceed_ratio,
+        ) = maybe_update_state(
+            key=key,
+            ts_grid=ts_grid,
+            ys_model=ys_grid_model,
+            ys_actual=ys_grid_raw,
+            target=target,
+        )
+
+        if state is None:
+            logger.info("[%s] %s 暂无可用健康模板，等待学习", now_str, query)
+            continue
+
+        now_sec = int(time.time())
+        last_real_ts = int(ts_grid[-1])
+        data_lag = now_sec - last_real_ts
+
+        ts_future = build_prediction_timestamps(
+            key=key,
+            last_real_ts=last_real_ts,
+            now_sec=now_sec,
+        )
+
+        if not ts_future:
+            continue
+
+        pred_values, lower_raw, upper_raw = predict_state_bundle(state, ts_future)
+
+        lower_values, upper_values = calc_final_bounds(
+            state=state,
+            pred=pred_values,
+            lower_raw=lower_raw,
+            upper_raw=upper_raw,
+            target=target,
+        )
+
+        ok = write_prediction_bundle(
+            pred_metric=pred_metric,
+            anomaly_metric=anomaly_metric,
+            labels=write_labels,
+            ts_future=ts_future,
+            pred_values=pred_values,
+            lower_values=lower_values,
+            upper_values=upper_values,
+            is_anomaly=is_anomaly,
+            outside_ratio=outside_ratio,
+            mean_abs_err=mean_abs_err,
+            mean_rel_err=mean_rel_err,
+            max_outside_seconds=max_outside_seconds,
+            max_exceed_ratio=max_exceed_ratio,
+            event_ts=last_real_ts,
+        )
+
+        if not ok:
+            logger.error("[%s] %s 写入预测数据失败", now_str, query)
+            continue
+
+        LAST_REAL_TS_WRITTEN[key] = last_real_ts
+
+        future_start = datetime.fromtimestamp(ts_future[0]).strftime("%H:%M:%S")
+        future_end = datetime.fromtimestamp(ts_future[-1]).strftime("%H:%M:%S")
+        last_real_str = datetime.fromtimestamp(last_real_ts).strftime("%H:%M:%S")
+        origin_str = datetime.fromtimestamp(state.phase_origin_ts).strftime("%H:%M:%S")
+
+        logger.info(
+            "[%s] %-40s → %-35s strategy=%s status=%s anomaly=%s outside=%.2f max_outside=%ss max_exceed=%.2f period=%ss origin=%s last_real=%s lag=%ss 写入 %d 点，预测区间 %s ~ %s",
+            now_str,
+            query,
+            pred_metric,
+            state.strategy,
+            state.status,
+            is_anomaly,
+            outside_ratio,
+            max_outside_seconds,
+            max_exceed_ratio,
+            state.period,
+            origin_str,
+            last_real_str,
+            data_lag,
+            len(ts_future),
+            future_start,
+            future_end,
+        )
+
+    save_state()
+
+
+def main() -> None:
+    load_state()
+
+    logger.info(
+        "预测服务启动 VM=%s 历史窗口=%dmin 理论预测窗口=%ds 实际写入窗口=%ds 轮询间隔=%ds state=%s forecast=%s",
+        VM_URL,
+        HISTORY_MINUTES,
+        HORIZON_SECONDS,
+        WRITE_HORIZON_SECONDS,
+        POLL_INTERVAL,
+        STATE_FILE,
+        EXTRA_PREDICT_LABELS["forecast"],
+    )
+
+    while True:
+        run_once()
+        time.sleep(POLL_INTERVAL)
+
+
+if __name__ == "__main__":
+    main()
\ No newline at end of file

From f9b6506452a75ffd1f1f8beee1ebd1008fdc724d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E5=BC=A0=E5=B0=91=E6=99=BA?=
 <zhangshaozhi@bailongma-inc.com>
Date: Mon, 25 May 2026 13:46:29 +0800
Subject: [PATCH 30/55] feat(protoforge): fault update

---
 protoforge/core/demo.py    |   2 +-
 protoforge/core/fault.py   | 187 +++++++++++++++++++++++++++++++++++++
 protoforge/models/fault.py |   2 +
 web/src/views/Devices.vue  | 117 ++++++++++++++++++++---
 4 files changed, 296 insertions(+), 12 deletions(-)

diff --git a/protoforge/core/demo.py b/protoforge/core/demo.py
index ff0b333..b7ccae7 100644
--- a/protoforge/core/demo.py
+++ b/protoforge/core/demo.py
@@ -108,7 +108,7 @@ async def seed_demo_data(engine: Any, template_manager: Any) -> None:
             "points": [
                 {"name": "weight", "address": "net_weight", "data_type": "float32", "generator_type": "random", "min_value": 0.5, "max_value": 50.0},
                 {"name": "tare", "address": "tare_weight", "data_type": "float32", "generator_type": "fixed", "fixed_value": 2.5},
-                {"name": "stable", "address": "stable_flag", "data_type": "bool", "generator_type": "fixed", "fixed_value": true},
+                {"name": "stable", "address": "stable_flag", "data_type": "bool", "generator_type": "fixed", "fixed_value": True},
             ],
         },
     ]
diff --git a/protoforge/core/fault.py b/protoforge/core/fault.py
index e72842d..5beba87 100644
--- a/protoforge/core/fault.py
+++ b/protoforge/core/fault.py
@@ -41,6 +41,7 @@
         name="刀具磨损",
         description="刀具切削刃磨损，切削阻力增大，主轴电流升高，振动增大，进给速率下降",
         category="mechanical",
+        scenario_type="trend_drift",
         default_duration=300.0,
         tags=["刀具", "磨损", "渐进"],
         point_faults=[
@@ -67,6 +68,7 @@
         name="刀具崩刃",
         description="刀具突发性崩刃，振动剧烈突增，主轴电流峰值，进给停止",
         category="mechanical",
+        scenario_type="sudden_spike",
         default_duration=15.0,
         tags=["刀具", "崩刃", "突发"],
         point_faults=[
@@ -93,6 +95,7 @@
         name="主轴过热",
         description="主轴长时间高负荷运转或冷却不足，电流持续偏高，转速因热保护下降",
         category="thermal",
+        scenario_type="trend_drift",
         default_duration=240.0,
         tags=["主轴", "过热", "渐进"],
         point_faults=[
@@ -117,6 +120,7 @@
         name="主轴轴承故障",
         description="主轴轴承磨损或润滑不足，振动幅度持续升高，伴随电流轻微上升",
         category="mechanical",
+        scenario_type="trend_drift",
         default_duration=360.0,
         tags=["主轴", "轴承", "渐进"],
         point_faults=[
@@ -141,6 +145,7 @@
         name="进给堵转",
         description="进给轴卡死，进给速率降为零，主轴电流急剧升高",
         category="process",
+        scenario_type="sudden_spike",
         default_duration=20.0,
         tags=["进给", "堵转", "突发"],
         point_faults=[
@@ -163,6 +168,7 @@
         name="振动异常",
         description="工件装夹松动或切削共振，三轴振动突然大幅增加",
         category="mechanical",
+        scenario_type="sudden_spike",
         default_duration=60.0,
         tags=["振动", "装夹", "突发"],
         point_faults=[
@@ -185,6 +191,7 @@
         name="切削液不足",
         description="切削液供给不足，冷却润滑失效，热量积累导致振动和电流缓慢升高",
         category="process",
+        scenario_type="trend_drift",
         default_duration=480.0,
         tags=["切削液", "冷却", "渐进"],
         point_faults=[
@@ -211,6 +218,7 @@
         name="电源波动",
         description="供电电压不稳定，主轴转速和进给速率出现随机波动",
         category="electrical",
+        scenario_type="high_noise",
         default_duration=90.0,
         tags=["电源", "波动", "突发"],
         point_faults=[
@@ -222,6 +230,176 @@
                              multiplier=1.0, noise_scale=150.0),
         ],
     ),
+
+    # ==================================================================
+    # 以下为新增故障类型
+    # ==================================================================
+
+    # ------------------------------------------------------------------
+    # 传感器强干扰 — 高噪声波动型
+    # 场景：电磁干扰、接地不良、信号线屏蔽失效等导致传感器读数剧烈抖动
+    # 特征：均值基本不变，但噪声幅度突然增大数倍，信号看起来"毛刺"严重
+    # 区别于真实故障：设备本身没有坏，只是采集信号质量变差
+    # 模式：瞬间注入，持续期间每次采样都叠加大幅随机噪声
+    # ------------------------------------------------------------------
+    FaultTypeDefinition(
+        id="sensor_noise",
+        name="传感器强干扰",
+        description=(
+            "【高噪声波动型】电磁干扰或接地不良导致传感器信号质量恶化。"
+            "均值基本不变，但每次采样叠加大幅随机噪声，曲线呈现密集毛刺。"
+            "典型场景：变频器附近的传感器、信号线屏蔽层破损、接地回路故障。"
+        ),
+        category="electrical",
+        scenario_type="high_noise",
+        default_duration=120.0,
+        tags=["传感器", "干扰", "噪声", "高噪声波动型"],
+        point_faults=[
+            PointFaultConfig(point="spindle_current", mode=FaultMode.INSTANT,
+                             multiplier=1.0, noise_scale=8.0),
+            PointFaultConfig(point="vibration_x", mode=FaultMode.INSTANT,
+                             multiplier=1.0, noise_scale=2.5),
+            PointFaultConfig(point="vibration_y", mode=FaultMode.INSTANT,
+                             multiplier=1.0, noise_scale=2.5),
+            PointFaultConfig(point="vibration_z", mode=FaultMode.INSTANT,
+                             multiplier=1.0, noise_scale=3.0),
+            PointFaultConfig(point="feed_rate", mode=FaultMode.INSTANT,
+                             multiplier=1.0, noise_scale=80.0),
+        ],
+    ),
+
+    # ------------------------------------------------------------------
+    # 换工件/换程序段 — 工况切换型（高速加工 → 低速精加工）
+    # 场景：CNC 机床切换加工程序，从粗加工切换到精加工
+    # 特征：转速降低、进给降低、电流降低，所有指标跳到新的正常范围并稳定
+    # 关键：这不是故障！数据本身没有坏，只是工况变了，正常范围完全不同
+    # 模式：STEP 阶跃，立即跳到新基线并在整个 duration 内保持
+    # ------------------------------------------------------------------
+    FaultTypeDefinition(
+        id="mode_switch_fine_machining",
+        name="切换精加工工况",
+        description=(
+            "【工况切换型】从粗加工切换到精加工程序段。"
+            "主轴转速升高、进给速率降低、切削电流降低，各指标立即跳到新的正常范围并保持稳定。"
+            "数据本身没有异常，但与粗加工基线相比会触发阈值告警。"
+            "典型场景：换刀后进入精加工、加工不同特征面、程序跳段。"
+        ),
+        category="process",
+        scenario_type="mode_switch",
+        default_duration=300.0,
+        tags=["工况切换", "精加工", "程序段", "工况切换型"],
+        point_faults=[
+            PointFaultConfig(point="spindle_speed", mode=FaultMode.STEP,
+                             multiplier=1.4, noise_scale=30.0),
+            PointFaultConfig(point="feed_rate", mode=FaultMode.STEP,
+                             multiplier=0.3, noise_scale=10.0),
+            PointFaultConfig(point="spindle_current", mode=FaultMode.STEP,
+                             multiplier=0.55, noise_scale=0.5),
+            PointFaultConfig(point="vibration_x", mode=FaultMode.STEP,
+                             multiplier=0.6, noise_scale=0.1),
+            PointFaultConfig(point="vibration_y", mode=FaultMode.STEP,
+                             multiplier=0.6, noise_scale=0.1),
+            PointFaultConfig(point="vibration_z", mode=FaultMode.STEP,
+                             multiplier=0.6, noise_scale=0.1),
+        ],
+    ),
+
+    # ------------------------------------------------------------------
+    # 进入空载工况 — 工况切换型（加工中 → 空载运行）
+    # 场景：加工完成、等待上料、程序暂停，主轴空转
+    # 特征：进给降为 0，电流大幅下降到空载水平，转速维持，振动降低
+    # 模式：STEP 阶跃，立即切换到空载基线
+    # ------------------------------------------------------------------
+    FaultTypeDefinition(
+        id="mode_switch_idle",
+        name="切换空载工况",
+        description=(
+            "【工况切换型】机床进入空载运行状态（加工完成等待上料、程序暂停）。"
+            "进给速率降为零，主轴电流降至空载水平（约为加工时的 20-30%），"
+            "主轴转速维持，振动明显降低。"
+            "典型场景：换料等待、程序暂停、加工间隙、换刀等待。"
+        ),
+        category="process",
+        scenario_type="mode_switch",
+        default_duration=180.0,
+        tags=["工况切换", "空载", "等待", "工况切换型"],
+        point_faults=[
+            PointFaultConfig(point="feed_rate", mode=FaultMode.STEP,
+                             target_value=0.0, noise_scale=2.0),
+            PointFaultConfig(point="spindle_current", mode=FaultMode.STEP,
+                             multiplier=0.22, noise_scale=0.3),
+            PointFaultConfig(point="vibration_x", mode=FaultMode.STEP,
+                             multiplier=0.25, noise_scale=0.05),
+            PointFaultConfig(point="vibration_y", mode=FaultMode.STEP,
+                             multiplier=0.25, noise_scale=0.05),
+            PointFaultConfig(point="vibration_z", mode=FaultMode.STEP,
+                             multiplier=0.25, noise_scale=0.05),
+        ],
+    ),
+
+    # ------------------------------------------------------------------
+    # 突发电流尖峰 — 突发脉冲型
+    # 场景：切削过程中遇到硬质夹杂物、刀具切入角突变、工件材质不均
+    # 特征：主轴电流瞬间冲高（持续 2-5 秒），然后恢复正常，其他指标基本不变
+    # 区别于刀具崩刃：电流尖峰后能自动恢复，不会导致停机
+    # 模式：瞬间注入，持续时间极短
+    # ------------------------------------------------------------------
+    FaultTypeDefinition(
+        id="current_spike",
+        name="突发电流尖峰",
+        description=(
+            "【突发脉冲型】切削过程中遇到硬质夹杂物或材质不均，主轴电流瞬间冲高后自动恢复。"
+            "电流短暂升至正常值的 3-4 倍，持续仅数秒，振动轻微抖动，进给基本不受影响。"
+            "典型场景：铸件内部硬质点、焊缝区域、材料硬度不均匀。"
+            "与刀具崩刃的区别：能自动恢复，不触发停机报警。"
+        ),
+        category="mechanical",
+        scenario_type="sudden_spike",
+        default_duration=5.0,
+        tags=["电流", "尖峰", "脉冲", "突发脉冲型"],
+        point_faults=[
+            PointFaultConfig(point="spindle_current", mode=FaultMode.INSTANT,
+                             multiplier=3.5, noise_scale=1.5),
+            PointFaultConfig(point="vibration_x", mode=FaultMode.INSTANT,
+                             multiplier=2.0, noise_scale=0.5),
+            PointFaultConfig(point="vibration_y", mode=FaultMode.INSTANT,
+                             multiplier=2.0, noise_scale=0.5),
+            PointFaultConfig(point="vibration_z", mode=FaultMode.INSTANT,
+                             multiplier=2.5, noise_scale=0.8),
+        ],
+    ),
+
+    # ------------------------------------------------------------------
+    # 主轴负载异常 — 关系约束型
+    # 场景：刀具钝化但未完全磨损、切削参数不匹配、工件材料变硬
+    # 特征：主轴转速正常、进给速率正常，但主轴电流异常升高
+    # 关键：单看任何一个指标都"正常"，只有多指标关系才能发现异常
+    # 模式：渐进式，电流缓慢爬升，转速和进给保持不变
+    # ------------------------------------------------------------------
+    FaultTypeDefinition(
+        id="spindle_load_anomaly",
+        name="主轴负载异常",
+        description=(
+            "【关系约束型】主轴转速正常、进给速率正常，但主轴电流异常升高。"
+            "单看任何一个指标都在正常范围内，只有分析多指标关系才能发现异常。"
+            "物理含义：切削阻力增大（刀具钝化初期、材料变硬），"
+            "系统尚未触发保护降速，但电流已超出正常切削功率范围。"
+            "典型场景：刀具轻度钝化、切削液浓度不足、工件材料批次差异。"
+        ),
+        category="mechanical",
+        scenario_type="relation_constraint",
+        default_duration=240.0,
+        tags=["主轴", "负载", "关系约束", "关系约束型"],
+        point_faults=[
+            PointFaultConfig(point="spindle_current", mode=FaultMode.GRADUAL,
+                             multiplier=2.8, noise_scale=1.0),
+            # 转速和进给保持不变（multiplier=1.0），只叠加极小噪声维持真实感
+            PointFaultConfig(point="spindle_speed", mode=FaultMode.INSTANT,
+                             multiplier=1.0, noise_scale=15.0),
+            PointFaultConfig(point="feed_rate", mode=FaultMode.INSTANT,
+                             multiplier=1.0, noise_scale=5.0),
+        ],
+    ),
 ]
 
 # 按 id 索引
@@ -374,6 +552,15 @@ def _compute_value(
                 target = baseline * (1.0 + (pf.multiplier - 1.0) * intensity)
             else:
                 target = baseline
+        elif pf.mode == FaultMode.STEP:
+            # 阶跃模式：立即跳到新基线并在整个 duration 内保持（工况切换专用）
+            # 与 INSTANT 的区别：STEP 的 multiplier 表示新工况的正常倍数，不受 intensity 缩放
+            if pf.target_value is not None:
+                target = pf.target_value
+            elif pf.multiplier is not None:
+                target = baseline * pf.multiplier
+            else:
+                target = baseline
         else:
             # 渐进模式：随 progress 线性劣化
             if pf.target_value is not None:
diff --git a/protoforge/models/fault.py b/protoforge/models/fault.py
index cc038e0..025da96 100644
--- a/protoforge/models/fault.py
+++ b/protoforge/models/fault.py
@@ -8,6 +8,7 @@ class FaultMode(str, Enum):
     """故障注入模式"""
     INSTANT = "instant"       # 瞬间跳变到异常值，持续 duration 后恢复
     GRADUAL = "gradual"       # 渐进式劣化，随时间线性恶化，到 duration 时达到峰值后恢复
+    STEP = "step"             # 阶跃切换到新工况基线，整个 duration 内保持新基线（工况切换专用）
 
 
 class FaultStatus(str, Enum):
@@ -36,6 +37,7 @@ class FaultTypeDefinition(BaseModel):
     name: str
     description: str
     category: str                          # 故障分类：mechanical / electrical / thermal / process
+    scenario_type: str = "trend_drift"     # 异常场景类型：trend_drift / sudden_spike / high_noise / mode_switch / relation_constraint
     default_duration: float = 120.0        # 默认持续时间（秒）
     point_faults: list[PointFaultConfig] = Field(default_factory=list)
     tags: list[str] = Field(default_factory=list)
diff --git a/web/src/views/Devices.vue b/web/src/views/Devices.vue
index 65e0535..36141a4 100644
--- a/web/src/views/Devices.vue
+++ b/web/src/views/Devices.vue
@@ -92,24 +92,55 @@
       </n-modal>
 
       <!-- 故障注入 Modal -->
-      <n-modal v-model:show="showFaultModal" preset="card" title="故障注入" style="width:480px">
+      <n-modal v-model:show="showFaultModal" preset="card" title="故障注入" style="width:560px">
         <n-space vertical size="medium">
-          <n-text depth="3" style="font-size:13px">设备：{{ faultTargetDevice?.name }}</n-text>
+          <n-text depth="3" style="font-size:13px">目标设备：<n-text strong>{{ faultTargetDevice?.name }}</n-text></n-text>
+
           <n-form-item label="故障类型" label-placement="left" label-width="80">
             <n-select
               v-model:value="faultTypeId"
-              :options="faultTypeOptions"
+              :options="faultTypeGroupedOptions"
               placeholder="选择故障类型"
               @update:value="onFaultTypeChange"
             />
           </n-form-item>
-          <n-alert v-if="selectedFaultType" type="warning" :bordered="false" style="font-size:12px">
-            <div style="font-weight:500;margin-bottom:4px">{{ selectedFaultType.name }} · {{ faultCategoryLabel(selectedFaultType.category) }}</div>
-            <div style="color:#94a3b8">{{ selectedFaultType.description }}</div>
-            <div style="margin-top:6px;color:#94a3b8">
-              影响测点：{{ selectedFaultType.point_faults.map(p => p.point).join('、') }}
+
+          <!-- 场景说明卡片 -->
+          <div v-if="selectedFaultType" style="background:#1a1a2e;border:1px solid #2d2d4e;border-radius:8px;padding:14px 16px;">
+            <!-- 标题行：故障名 + 场景类型标签 + 分类标签 -->
+            <n-space align="center" style="margin-bottom:10px;flex-wrap:wrap;gap:6px">
+              <n-text strong style="font-size:14px">{{ selectedFaultType.name }}</n-text>
+              <n-tag :type="scenarioTagType(selectedFaultType.scenario_type)" size="small" round>
+                {{ scenarioTypeLabel(selectedFaultType.scenario_type) }}
+              </n-tag>
+              <n-tag size="small" :bordered="false" style="background:#2d2d4e;color:#94a3b8">
+                {{ faultCategoryLabel(selectedFaultType.category) }}
+              </n-tag>
+            </n-space>
+
+            <!-- 描述文本 -->
+            <n-text depth="3" style="font-size:12px;line-height:1.7;display:block;white-space:pre-wrap">{{ selectedFaultType.description }}</n-text>
+
+            <!-- 影响测点 -->
+            <div style="margin-top:10px;padding-top:10px;border-top:1px solid #2d2d4e">
+              <n-text depth="3" style="font-size:11px">影响测点：</n-text>
+              <n-space size="small" style="margin-top:4px;flex-wrap:wrap">
+                <n-tag
+                  v-for="pf in selectedFaultType.point_faults"
+                  :key="pf.point"
+                  size="tiny"
+                  :bordered="false"
+                  style="background:#2d2d4e;color:#e2e8f0;font-family:monospace"
+                >
+                  {{ pf.point }}
+                  <span style="color:#94a3b8;margin-left:4px">
+                    {{ pointFaultModeLabel(pf) }}
+                  </span>
+                </n-tag>
+              </n-space>
             </div>
-          </n-alert>
+          </div>
+
           <n-form-item label="持续时间" label-placement="left" label-width="80">
             <n-input-number
               v-model:value="faultDuration"
@@ -120,11 +151,15 @@
               <template #suffix>秒</template>
             </n-input-number>
           </n-form-item>
+
           <n-form-item label="故障强度" label-placement="left" label-width="80">
             <n-space vertical style="width:100%">
               <n-slider v-model:value="faultIntensity" :min="0.1" :max="1.0" :step="0.1" />
               <n-text depth="3" style="font-size:12px">
                 {{ faultIntensityLabel }}（{{ faultIntensity }}）
+                <span v-if="selectedFaultType?.scenario_type === 'mode_switch'" style="color:#f59e0b">
+                  · 工况切换型强度不影响切换幅度
+                </span>
               </n-text>
             </n-space>
           </n-form-item>
@@ -243,12 +278,17 @@ const columns = [
   },
   { title: '测点', key: 'points', width: 70, render: (row) => (row.points || []).length },
   {
-    title: '故障', key: 'fault', width: 90,
+    title: '故障', key: 'fault', width: 130,
     render: (row) => {
       const fault = activeFaults.value[row.id]
       if (!fault || fault.status === 'none') return h(NTag, { size: 'tiny', bordered: false }, () => '正常')
       const pct = Math.round((fault.progress || 0) * 100)
-      return h(NTag, { size: 'tiny', type: 'error', bordered: false }, () => `${fault.fault_type_name} ${pct}%`)
+      const ft = faultTypes.value.find(t => t.id === fault.fault_type_id)
+      const scenarioLabel = ft ? scenarioTypeLabel(ft.scenario_type) : ''
+      return h(NSpace, { size: 2, vertical: false, align: 'center' }, () => [
+        h(NTag, { size: 'tiny', type: 'error', bordered: false }, () => `${fault.fault_type_name} ${pct}%`),
+        scenarioLabel ? h(NTag, { size: 'tiny', bordered: false, style: 'font-size:10px;background:#2d1b1b;color:#f87171' }, () => scenarioLabel) : null,
+      ])
     }
   },
   {
@@ -351,6 +391,25 @@ const faultTypeOptions = computed(() =>
   faultTypes.value.map(t => ({ label: `${t.name}（${faultCategoryLabel(t.category)}）`, value: t.id }))
 )
 
+// 按场景类型分组的故障选项
+const SCENARIO_ORDER = ['trend_drift', 'sudden_spike', 'high_noise', 'mode_switch', 'relation_constraint']
+const faultTypeGroupedOptions = computed(() => {
+  const groups = {}
+  for (const t of faultTypes.value) {
+    const st = t.scenario_type || 'trend_drift'
+    if (!groups[st]) groups[st] = []
+    groups[st].push({ label: t.name, value: t.id })
+  }
+  return SCENARIO_ORDER
+    .filter(st => groups[st])
+    .map(st => ({
+      type: 'group',
+      label: scenarioTypeLabel(st),
+      key: st,
+      children: groups[st],
+    }))
+})
+
 const selectedFaultType = computed(() =>
   faultTypes.value.find(t => t.id === faultTypeId.value) || null
 )
@@ -368,6 +427,42 @@ function faultCategoryLabel(category) {
   return map[category] || category
 }
 
+function scenarioTypeLabel(scenarioType) {
+  const map = {
+    trend_drift: '趋势漂移型',
+    sudden_spike: '突发脉冲型',
+    high_noise: '高噪声波动型',
+    mode_switch: '工况切换型',
+    relation_constraint: '关系约束型',
+  }
+  return map[scenarioType] || scenarioType
+}
+
+function scenarioTagType(scenarioType) {
+  const map = {
+    trend_drift: 'warning',
+    sudden_spike: 'error',
+    high_noise: 'info',
+    mode_switch: 'success',
+    relation_constraint: 'default',
+  }
+  return map[scenarioType] || 'default'
+}
+
+function pointFaultModeLabel(pf) {
+  if (pf.mode === 'step') return '→ 阶跃'
+  if (pf.mode === 'gradual') {
+    if (pf.multiplier != null) return `→ ×${pf.multiplier}`
+    if (pf.target_value != null) return `→ ${pf.target_value}`
+  }
+  if (pf.mode === 'instant') {
+    if (pf.target_value != null) return `→ ${pf.target_value}`
+    if (pf.multiplier != null && pf.multiplier !== 1.0) return `→ ×${pf.multiplier}`
+    return '± 噪声'
+  }
+  return ''
+}
+
 function onFaultTypeChange(val) {
   const t = faultTypes.value.find(f => f.id === val)
   if (t && t.default_duration) faultDuration.value = t.default_duration

From 685ae6b79c28cf8094e20fc6f9f7c00a32d0fe4a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E5=BC=A0=E5=B0=91=E6=99=BA?=
 <zhangshaozhi@bailongma-inc.com>
Date: Mon, 25 May 2026 14:00:07 +0800
Subject: [PATCH 31/55] Revert "feat(protoforge): fault update"

This reverts commit f9b6506452a75ffd1f1f8beee1ebd1008fdc724d.
---
 protoforge/core/demo.py    |   2 +-
 protoforge/core/fault.py   | 187 -------------------------------------
 protoforge/models/fault.py |   2 -
 web/src/views/Devices.vue  | 117 +++--------------------
 4 files changed, 12 insertions(+), 296 deletions(-)

diff --git a/protoforge/core/demo.py b/protoforge/core/demo.py
index b7ccae7..ff0b333 100644
--- a/protoforge/core/demo.py
+++ b/protoforge/core/demo.py
@@ -108,7 +108,7 @@ async def seed_demo_data(engine: Any, template_manager: Any) -> None:
             "points": [
                 {"name": "weight", "address": "net_weight", "data_type": "float32", "generator_type": "random", "min_value": 0.5, "max_value": 50.0},
                 {"name": "tare", "address": "tare_weight", "data_type": "float32", "generator_type": "fixed", "fixed_value": 2.5},
-                {"name": "stable", "address": "stable_flag", "data_type": "bool", "generator_type": "fixed", "fixed_value": True},
+                {"name": "stable", "address": "stable_flag", "data_type": "bool", "generator_type": "fixed", "fixed_value": true},
             ],
         },
     ]
diff --git a/protoforge/core/fault.py b/protoforge/core/fault.py
index 5beba87..e72842d 100644
--- a/protoforge/core/fault.py
+++ b/protoforge/core/fault.py
@@ -41,7 +41,6 @@
         name="刀具磨损",
         description="刀具切削刃磨损，切削阻力增大，主轴电流升高，振动增大，进给速率下降",
         category="mechanical",
-        scenario_type="trend_drift",
         default_duration=300.0,
         tags=["刀具", "磨损", "渐进"],
         point_faults=[
@@ -68,7 +67,6 @@
         name="刀具崩刃",
         description="刀具突发性崩刃，振动剧烈突增，主轴电流峰值，进给停止",
         category="mechanical",
-        scenario_type="sudden_spike",
         default_duration=15.0,
         tags=["刀具", "崩刃", "突发"],
         point_faults=[
@@ -95,7 +93,6 @@
         name="主轴过热",
         description="主轴长时间高负荷运转或冷却不足，电流持续偏高，转速因热保护下降",
         category="thermal",
-        scenario_type="trend_drift",
         default_duration=240.0,
         tags=["主轴", "过热", "渐进"],
         point_faults=[
@@ -120,7 +117,6 @@
         name="主轴轴承故障",
         description="主轴轴承磨损或润滑不足，振动幅度持续升高，伴随电流轻微上升",
         category="mechanical",
-        scenario_type="trend_drift",
         default_duration=360.0,
         tags=["主轴", "轴承", "渐进"],
         point_faults=[
@@ -145,7 +141,6 @@
         name="进给堵转",
         description="进给轴卡死，进给速率降为零，主轴电流急剧升高",
         category="process",
-        scenario_type="sudden_spike",
         default_duration=20.0,
         tags=["进给", "堵转", "突发"],
         point_faults=[
@@ -168,7 +163,6 @@
         name="振动异常",
         description="工件装夹松动或切削共振，三轴振动突然大幅增加",
         category="mechanical",
-        scenario_type="sudden_spike",
         default_duration=60.0,
         tags=["振动", "装夹", "突发"],
         point_faults=[
@@ -191,7 +185,6 @@
         name="切削液不足",
         description="切削液供给不足，冷却润滑失效，热量积累导致振动和电流缓慢升高",
         category="process",
-        scenario_type="trend_drift",
         default_duration=480.0,
         tags=["切削液", "冷却", "渐进"],
         point_faults=[
@@ -218,7 +211,6 @@
         name="电源波动",
         description="供电电压不稳定，主轴转速和进给速率出现随机波动",
         category="electrical",
-        scenario_type="high_noise",
         default_duration=90.0,
         tags=["电源", "波动", "突发"],
         point_faults=[
@@ -230,176 +222,6 @@
                              multiplier=1.0, noise_scale=150.0),
         ],
     ),
-
-    # ==================================================================
-    # 以下为新增故障类型
-    # ==================================================================
-
-    # ------------------------------------------------------------------
-    # 传感器强干扰 — 高噪声波动型
-    # 场景：电磁干扰、接地不良、信号线屏蔽失效等导致传感器读数剧烈抖动
-    # 特征：均值基本不变，但噪声幅度突然增大数倍，信号看起来"毛刺"严重
-    # 区别于真实故障：设备本身没有坏，只是采集信号质量变差
-    # 模式：瞬间注入，持续期间每次采样都叠加大幅随机噪声
-    # ------------------------------------------------------------------
-    FaultTypeDefinition(
-        id="sensor_noise",
-        name="传感器强干扰",
-        description=(
-            "【高噪声波动型】电磁干扰或接地不良导致传感器信号质量恶化。"
-            "均值基本不变，但每次采样叠加大幅随机噪声，曲线呈现密集毛刺。"
-            "典型场景：变频器附近的传感器、信号线屏蔽层破损、接地回路故障。"
-        ),
-        category="electrical",
-        scenario_type="high_noise",
-        default_duration=120.0,
-        tags=["传感器", "干扰", "噪声", "高噪声波动型"],
-        point_faults=[
-            PointFaultConfig(point="spindle_current", mode=FaultMode.INSTANT,
-                             multiplier=1.0, noise_scale=8.0),
-            PointFaultConfig(point="vibration_x", mode=FaultMode.INSTANT,
-                             multiplier=1.0, noise_scale=2.5),
-            PointFaultConfig(point="vibration_y", mode=FaultMode.INSTANT,
-                             multiplier=1.0, noise_scale=2.5),
-            PointFaultConfig(point="vibration_z", mode=FaultMode.INSTANT,
-                             multiplier=1.0, noise_scale=3.0),
-            PointFaultConfig(point="feed_rate", mode=FaultMode.INSTANT,
-                             multiplier=1.0, noise_scale=80.0),
-        ],
-    ),
-
-    # ------------------------------------------------------------------
-    # 换工件/换程序段 — 工况切换型（高速加工 → 低速精加工）
-    # 场景：CNC 机床切换加工程序，从粗加工切换到精加工
-    # 特征：转速降低、进给降低、电流降低，所有指标跳到新的正常范围并稳定
-    # 关键：这不是故障！数据本身没有坏，只是工况变了，正常范围完全不同
-    # 模式：STEP 阶跃，立即跳到新基线并在整个 duration 内保持
-    # ------------------------------------------------------------------
-    FaultTypeDefinition(
-        id="mode_switch_fine_machining",
-        name="切换精加工工况",
-        description=(
-            "【工况切换型】从粗加工切换到精加工程序段。"
-            "主轴转速升高、进给速率降低、切削电流降低，各指标立即跳到新的正常范围并保持稳定。"
-            "数据本身没有异常，但与粗加工基线相比会触发阈值告警。"
-            "典型场景：换刀后进入精加工、加工不同特征面、程序跳段。"
-        ),
-        category="process",
-        scenario_type="mode_switch",
-        default_duration=300.0,
-        tags=["工况切换", "精加工", "程序段", "工况切换型"],
-        point_faults=[
-            PointFaultConfig(point="spindle_speed", mode=FaultMode.STEP,
-                             multiplier=1.4, noise_scale=30.0),
-            PointFaultConfig(point="feed_rate", mode=FaultMode.STEP,
-                             multiplier=0.3, noise_scale=10.0),
-            PointFaultConfig(point="spindle_current", mode=FaultMode.STEP,
-                             multiplier=0.55, noise_scale=0.5),
-            PointFaultConfig(point="vibration_x", mode=FaultMode.STEP,
-                             multiplier=0.6, noise_scale=0.1),
-            PointFaultConfig(point="vibration_y", mode=FaultMode.STEP,
-                             multiplier=0.6, noise_scale=0.1),
-            PointFaultConfig(point="vibration_z", mode=FaultMode.STEP,
-                             multiplier=0.6, noise_scale=0.1),
-        ],
-    ),
-
-    # ------------------------------------------------------------------
-    # 进入空载工况 — 工况切换型（加工中 → 空载运行）
-    # 场景：加工完成、等待上料、程序暂停，主轴空转
-    # 特征：进给降为 0，电流大幅下降到空载水平，转速维持，振动降低
-    # 模式：STEP 阶跃，立即切换到空载基线
-    # ------------------------------------------------------------------
-    FaultTypeDefinition(
-        id="mode_switch_idle",
-        name="切换空载工况",
-        description=(
-            "【工况切换型】机床进入空载运行状态（加工完成等待上料、程序暂停）。"
-            "进给速率降为零，主轴电流降至空载水平（约为加工时的 20-30%），"
-            "主轴转速维持，振动明显降低。"
-            "典型场景：换料等待、程序暂停、加工间隙、换刀等待。"
-        ),
-        category="process",
-        scenario_type="mode_switch",
-        default_duration=180.0,
-        tags=["工况切换", "空载", "等待", "工况切换型"],
-        point_faults=[
-            PointFaultConfig(point="feed_rate", mode=FaultMode.STEP,
-                             target_value=0.0, noise_scale=2.0),
-            PointFaultConfig(point="spindle_current", mode=FaultMode.STEP,
-                             multiplier=0.22, noise_scale=0.3),
-            PointFaultConfig(point="vibration_x", mode=FaultMode.STEP,
-                             multiplier=0.25, noise_scale=0.05),
-            PointFaultConfig(point="vibration_y", mode=FaultMode.STEP,
-                             multiplier=0.25, noise_scale=0.05),
-            PointFaultConfig(point="vibration_z", mode=FaultMode.STEP,
-                             multiplier=0.25, noise_scale=0.05),
-        ],
-    ),
-
-    # ------------------------------------------------------------------
-    # 突发电流尖峰 — 突发脉冲型
-    # 场景：切削过程中遇到硬质夹杂物、刀具切入角突变、工件材质不均
-    # 特征：主轴电流瞬间冲高（持续 2-5 秒），然后恢复正常，其他指标基本不变
-    # 区别于刀具崩刃：电流尖峰后能自动恢复，不会导致停机
-    # 模式：瞬间注入，持续时间极短
-    # ------------------------------------------------------------------
-    FaultTypeDefinition(
-        id="current_spike",
-        name="突发电流尖峰",
-        description=(
-            "【突发脉冲型】切削过程中遇到硬质夹杂物或材质不均，主轴电流瞬间冲高后自动恢复。"
-            "电流短暂升至正常值的 3-4 倍，持续仅数秒，振动轻微抖动，进给基本不受影响。"
-            "典型场景：铸件内部硬质点、焊缝区域、材料硬度不均匀。"
-            "与刀具崩刃的区别：能自动恢复，不触发停机报警。"
-        ),
-        category="mechanical",
-        scenario_type="sudden_spike",
-        default_duration=5.0,
-        tags=["电流", "尖峰", "脉冲", "突发脉冲型"],
-        point_faults=[
-            PointFaultConfig(point="spindle_current", mode=FaultMode.INSTANT,
-                             multiplier=3.5, noise_scale=1.5),
-            PointFaultConfig(point="vibration_x", mode=FaultMode.INSTANT,
-                             multiplier=2.0, noise_scale=0.5),
-            PointFaultConfig(point="vibration_y", mode=FaultMode.INSTANT,
-                             multiplier=2.0, noise_scale=0.5),
-            PointFaultConfig(point="vibration_z", mode=FaultMode.INSTANT,
-                             multiplier=2.5, noise_scale=0.8),
-        ],
-    ),
-
-    # ------------------------------------------------------------------
-    # 主轴负载异常 — 关系约束型
-    # 场景：刀具钝化但未完全磨损、切削参数不匹配、工件材料变硬
-    # 特征：主轴转速正常、进给速率正常，但主轴电流异常升高
-    # 关键：单看任何一个指标都"正常"，只有多指标关系才能发现异常
-    # 模式：渐进式，电流缓慢爬升，转速和进给保持不变
-    # ------------------------------------------------------------------
-    FaultTypeDefinition(
-        id="spindle_load_anomaly",
-        name="主轴负载异常",
-        description=(
-            "【关系约束型】主轴转速正常、进给速率正常，但主轴电流异常升高。"
-            "单看任何一个指标都在正常范围内，只有分析多指标关系才能发现异常。"
-            "物理含义：切削阻力增大（刀具钝化初期、材料变硬），"
-            "系统尚未触发保护降速，但电流已超出正常切削功率范围。"
-            "典型场景：刀具轻度钝化、切削液浓度不足、工件材料批次差异。"
-        ),
-        category="mechanical",
-        scenario_type="relation_constraint",
-        default_duration=240.0,
-        tags=["主轴", "负载", "关系约束", "关系约束型"],
-        point_faults=[
-            PointFaultConfig(point="spindle_current", mode=FaultMode.GRADUAL,
-                             multiplier=2.8, noise_scale=1.0),
-            # 转速和进给保持不变（multiplier=1.0），只叠加极小噪声维持真实感
-            PointFaultConfig(point="spindle_speed", mode=FaultMode.INSTANT,
-                             multiplier=1.0, noise_scale=15.0),
-            PointFaultConfig(point="feed_rate", mode=FaultMode.INSTANT,
-                             multiplier=1.0, noise_scale=5.0),
-        ],
-    ),
 ]
 
 # 按 id 索引
@@ -552,15 +374,6 @@ def _compute_value(
                 target = baseline * (1.0 + (pf.multiplier - 1.0) * intensity)
             else:
                 target = baseline
-        elif pf.mode == FaultMode.STEP:
-            # 阶跃模式：立即跳到新基线并在整个 duration 内保持（工况切换专用）
-            # 与 INSTANT 的区别：STEP 的 multiplier 表示新工况的正常倍数，不受 intensity 缩放
-            if pf.target_value is not None:
-                target = pf.target_value
-            elif pf.multiplier is not None:
-                target = baseline * pf.multiplier
-            else:
-                target = baseline
         else:
             # 渐进模式：随 progress 线性劣化
             if pf.target_value is not None:
diff --git a/protoforge/models/fault.py b/protoforge/models/fault.py
index 025da96..cc038e0 100644
--- a/protoforge/models/fault.py
+++ b/protoforge/models/fault.py
@@ -8,7 +8,6 @@ class FaultMode(str, Enum):
     """故障注入模式"""
     INSTANT = "instant"       # 瞬间跳变到异常值，持续 duration 后恢复
     GRADUAL = "gradual"       # 渐进式劣化，随时间线性恶化，到 duration 时达到峰值后恢复
-    STEP = "step"             # 阶跃切换到新工况基线，整个 duration 内保持新基线（工况切换专用）
 
 
 class FaultStatus(str, Enum):
@@ -37,7 +36,6 @@ class FaultTypeDefinition(BaseModel):
     name: str
     description: str
     category: str                          # 故障分类：mechanical / electrical / thermal / process
-    scenario_type: str = "trend_drift"     # 异常场景类型：trend_drift / sudden_spike / high_noise / mode_switch / relation_constraint
     default_duration: float = 120.0        # 默认持续时间（秒）
     point_faults: list[PointFaultConfig] = Field(default_factory=list)
     tags: list[str] = Field(default_factory=list)
diff --git a/web/src/views/Devices.vue b/web/src/views/Devices.vue
index 36141a4..65e0535 100644
--- a/web/src/views/Devices.vue
+++ b/web/src/views/Devices.vue
@@ -92,55 +92,24 @@
       </n-modal>
 
       <!-- 故障注入 Modal -->
-      <n-modal v-model:show="showFaultModal" preset="card" title="故障注入" style="width:560px">
+      <n-modal v-model:show="showFaultModal" preset="card" title="故障注入" style="width:480px">
         <n-space vertical size="medium">
-          <n-text depth="3" style="font-size:13px">目标设备：<n-text strong>{{ faultTargetDevice?.name }}</n-text></n-text>
-
+          <n-text depth="3" style="font-size:13px">设备：{{ faultTargetDevice?.name }}</n-text>
           <n-form-item label="故障类型" label-placement="left" label-width="80">
             <n-select
               v-model:value="faultTypeId"
-              :options="faultTypeGroupedOptions"
+              :options="faultTypeOptions"
               placeholder="选择故障类型"
               @update:value="onFaultTypeChange"
             />
           </n-form-item>
-
-          <!-- 场景说明卡片 -->
-          <div v-if="selectedFaultType" style="background:#1a1a2e;border:1px solid #2d2d4e;border-radius:8px;padding:14px 16px;">
-            <!-- 标题行：故障名 + 场景类型标签 + 分类标签 -->
-            <n-space align="center" style="margin-bottom:10px;flex-wrap:wrap;gap:6px">
-              <n-text strong style="font-size:14px">{{ selectedFaultType.name }}</n-text>
-              <n-tag :type="scenarioTagType(selectedFaultType.scenario_type)" size="small" round>
-                {{ scenarioTypeLabel(selectedFaultType.scenario_type) }}
-              </n-tag>
-              <n-tag size="small" :bordered="false" style="background:#2d2d4e;color:#94a3b8">
-                {{ faultCategoryLabel(selectedFaultType.category) }}
-              </n-tag>
-            </n-space>
-
-            <!-- 描述文本 -->
-            <n-text depth="3" style="font-size:12px;line-height:1.7;display:block;white-space:pre-wrap">{{ selectedFaultType.description }}</n-text>
-
-            <!-- 影响测点 -->
-            <div style="margin-top:10px;padding-top:10px;border-top:1px solid #2d2d4e">
-              <n-text depth="3" style="font-size:11px">影响测点：</n-text>
-              <n-space size="small" style="margin-top:4px;flex-wrap:wrap">
-                <n-tag
-                  v-for="pf in selectedFaultType.point_faults"
-                  :key="pf.point"
-                  size="tiny"
-                  :bordered="false"
-                  style="background:#2d2d4e;color:#e2e8f0;font-family:monospace"
-                >
-                  {{ pf.point }}
-                  <span style="color:#94a3b8;margin-left:4px">
-                    {{ pointFaultModeLabel(pf) }}
-                  </span>
-                </n-tag>
-              </n-space>
+          <n-alert v-if="selectedFaultType" type="warning" :bordered="false" style="font-size:12px">
+            <div style="font-weight:500;margin-bottom:4px">{{ selectedFaultType.name }} · {{ faultCategoryLabel(selectedFaultType.category) }}</div>
+            <div style="color:#94a3b8">{{ selectedFaultType.description }}</div>
+            <div style="margin-top:6px;color:#94a3b8">
+              影响测点：{{ selectedFaultType.point_faults.map(p => p.point).join('、') }}
             </div>
-          </div>
-
+          </n-alert>
           <n-form-item label="持续时间" label-placement="left" label-width="80">
             <n-input-number
               v-model:value="faultDuration"
@@ -151,15 +120,11 @@
               <template #suffix>秒</template>
             </n-input-number>
           </n-form-item>
-
           <n-form-item label="故障强度" label-placement="left" label-width="80">
             <n-space vertical style="width:100%">
               <n-slider v-model:value="faultIntensity" :min="0.1" :max="1.0" :step="0.1" />
               <n-text depth="3" style="font-size:12px">
                 {{ faultIntensityLabel }}（{{ faultIntensity }}）
-                <span v-if="selectedFaultType?.scenario_type === 'mode_switch'" style="color:#f59e0b">
-                  · 工况切换型强度不影响切换幅度
-                </span>
               </n-text>
             </n-space>
           </n-form-item>
@@ -278,17 +243,12 @@ const columns = [
   },
   { title: '测点', key: 'points', width: 70, render: (row) => (row.points || []).length },
   {
-    title: '故障', key: 'fault', width: 130,
+    title: '故障', key: 'fault', width: 90,
     render: (row) => {
       const fault = activeFaults.value[row.id]
       if (!fault || fault.status === 'none') return h(NTag, { size: 'tiny', bordered: false }, () => '正常')
       const pct = Math.round((fault.progress || 0) * 100)
-      const ft = faultTypes.value.find(t => t.id === fault.fault_type_id)
-      const scenarioLabel = ft ? scenarioTypeLabel(ft.scenario_type) : ''
-      return h(NSpace, { size: 2, vertical: false, align: 'center' }, () => [
-        h(NTag, { size: 'tiny', type: 'error', bordered: false }, () => `${fault.fault_type_name} ${pct}%`),
-        scenarioLabel ? h(NTag, { size: 'tiny', bordered: false, style: 'font-size:10px;background:#2d1b1b;color:#f87171' }, () => scenarioLabel) : null,
-      ])
+      return h(NTag, { size: 'tiny', type: 'error', bordered: false }, () => `${fault.fault_type_name} ${pct}%`)
     }
   },
   {
@@ -391,25 +351,6 @@ const faultTypeOptions = computed(() =>
   faultTypes.value.map(t => ({ label: `${t.name}（${faultCategoryLabel(t.category)}）`, value: t.id }))
 )
 
-// 按场景类型分组的故障选项
-const SCENARIO_ORDER = ['trend_drift', 'sudden_spike', 'high_noise', 'mode_switch', 'relation_constraint']
-const faultTypeGroupedOptions = computed(() => {
-  const groups = {}
-  for (const t of faultTypes.value) {
-    const st = t.scenario_type || 'trend_drift'
-    if (!groups[st]) groups[st] = []
-    groups[st].push({ label: t.name, value: t.id })
-  }
-  return SCENARIO_ORDER
-    .filter(st => groups[st])
-    .map(st => ({
-      type: 'group',
-      label: scenarioTypeLabel(st),
-      key: st,
-      children: groups[st],
-    }))
-})
-
 const selectedFaultType = computed(() =>
   faultTypes.value.find(t => t.id === faultTypeId.value) || null
 )
@@ -427,42 +368,6 @@ function faultCategoryLabel(category) {
   return map[category] || category
 }
 
-function scenarioTypeLabel(scenarioType) {
-  const map = {
-    trend_drift: '趋势漂移型',
-    sudden_spike: '突发脉冲型',
-    high_noise: '高噪声波动型',
-    mode_switch: '工况切换型',
-    relation_constraint: '关系约束型',
-  }
-  return map[scenarioType] || scenarioType
-}
-
-function scenarioTagType(scenarioType) {
-  const map = {
-    trend_drift: 'warning',
-    sudden_spike: 'error',
-    high_noise: 'info',
-    mode_switch: 'success',
-    relation_constraint: 'default',
-  }
-  return map[scenarioType] || 'default'
-}
-
-function pointFaultModeLabel(pf) {
-  if (pf.mode === 'step') return '→ 阶跃'
-  if (pf.mode === 'gradual') {
-    if (pf.multiplier != null) return `→ ×${pf.multiplier}`
-    if (pf.target_value != null) return `→ ${pf.target_value}`
-  }
-  if (pf.mode === 'instant') {
-    if (pf.target_value != null) return `→ ${pf.target_value}`
-    if (pf.multiplier != null && pf.multiplier !== 1.0) return `→ ×${pf.multiplier}`
-    return '± 噪声'
-  }
-  return ''
-}
-
 function onFaultTypeChange(val) {
   const t = faultTypes.value.find(f => f.id === val)
   if (t && t.default_duration) faultDuration.value = t.default_duration

From 02174daba0a6b5f0c770243c5f47f082ea2e5d51 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E5=BC=A0=E5=B0=91=E6=99=BA?=
 <zhangshaozhi@bailongma-inc.com>
Date: Tue, 26 May 2026 10:47:45 +0800
Subject: [PATCH 32/55] feat(protoforge): update protoforge

---
 protoforge/core/engine.py                     |   8 +
 protoforge/core/fault.py                      | 290 ++++++++++--------
 protoforge/protocols/fanuc/server.py          |  79 ++++-
 protoforge/templates/fanuc/fanuc_0if_cnc.json |  69 ++---
 protoforge/templates/modbus/fanuc_cnc.json    |  58 +++-
 web/src/views/Devices.vue                     |   2 +-
 6 files changed, 318 insertions(+), 188 deletions(-)

diff --git a/protoforge/core/engine.py b/protoforge/core/engine.py
index 059f10e..d8b72aa 100644
--- a/protoforge/core/engine.py
+++ b/protoforge/core/engine.py
@@ -293,6 +293,14 @@ async def _tick_loop(self) -> None:
         while self._running:
             for instance in self._devices.values():
                 instance.tick()
+                # 将 DeviceInstance._point_values 同步到协议服务器，保证协议层读到最新值
+                server = self._protocol_servers.get(instance.protocol)
+                if server and hasattr(server, '_behaviors'):
+                    behavior = server._behaviors.get(instance.id)
+                    if behavior is not None:
+                        behavior._values.update(instance._point_values)
+                        if hasattr(behavior, 'sync_from_point_values'):
+                            behavior.sync_from_point_values(instance._point_values)
             for scenario in self._scenario_instances.values():
                 scenario.tick()
             await asyncio.sleep(1.0)
diff --git a/protoforge/core/fault.py b/protoforge/core/fault.py
index e72842d..11b61a7 100644
--- a/protoforge/core/fault.py
+++ b/protoforge/core/fault.py
@@ -32,194 +32,228 @@
 BUILTIN_FAULT_TYPES: list[FaultTypeDefinition] = [
 
     # ------------------------------------------------------------------
-    # 刀具磨损 — 最常见的机加工故障
-    # 特征：切削阻力增大 → 主轴电流缓慢爬升，振动幅度增大，进给速率被系统压低
-    # 模式：渐进式，持续数分钟，模拟刀具从轻度磨损到需要换刀的过程
-    # ------------------------------------------------------------------
-    FaultTypeDefinition(
-        id="tool_wear",
-        name="刀具磨损",
-        description="刀具切削刃磨损，切削阻力增大，主轴电流升高，振动增大，进给速率下降",
-        category="mechanical",
-        default_duration=300.0,
-        tags=["刀具", "磨损", "渐进"],
-        point_faults=[
-            PointFaultConfig(point="spindle_current", mode=FaultMode.GRADUAL,
-                             multiplier=2.2, noise_scale=0.8),
-            PointFaultConfig(point="vibration_x", mode=FaultMode.GRADUAL,
-                             multiplier=3.0, noise_scale=0.3),
-            PointFaultConfig(point="vibration_y", mode=FaultMode.GRADUAL,
-                             multiplier=3.0, noise_scale=0.3),
-            PointFaultConfig(point="vibration_z", mode=FaultMode.GRADUAL,
-                             multiplier=3.5, noise_scale=0.4),
-            PointFaultConfig(point="feed_rate", mode=FaultMode.GRADUAL,
-                             multiplier=0.45, noise_scale=20.0),
-        ],
-    ),
-
-    # ------------------------------------------------------------------
-    # 刀具崩刃 — 突发性刀具失效
-    # 特征：瞬间冲击 → 振动突增，电流瞬间峰值，进给立即停止
-    # 模式：瞬间注入，持续时间短（机床通常会触发报警停机）
+    # 进给堵转 — 工件夹紧松动或切削量过大导致进给卡死
+    # 特征：进给速率瞬间降为0，主轴负载和电流急剧升高，主轴仍在转（区别于崩刃）
+    # 模式：瞬间注入
     # ------------------------------------------------------------------
     FaultTypeDefinition(
-        id="tool_breakage",
-        name="刀具崩刃",
-        description="刀具突发性崩刃，振动剧烈突增，主轴电流峰值，进给停止",
-        category="mechanical",
-        default_duration=15.0,
-        tags=["刀具", "崩刃", "突发"],
+        id="feed_stall",
+        name="进给堵转",
+        description="进给轴卡死，进给速率降为零，主轴负载和电流急剧升高，主轴转速维持（区别于崩刃停主轴）",
+        category="process",
+        default_duration=20.0,
+        tags=["进给", "堵转", "突发"],
         point_faults=[
-            PointFaultConfig(point="spindle_current", mode=FaultMode.INSTANT,
-                             multiplier=4.5, noise_scale=2.0),
-            PointFaultConfig(point="vibration_x", mode=FaultMode.INSTANT,
-                             multiplier=8.0, noise_scale=1.5),
-            PointFaultConfig(point="vibration_y", mode=FaultMode.INSTANT,
-                             multiplier=8.0, noise_scale=1.5),
-            PointFaultConfig(point="vibration_z", mode=FaultMode.INSTANT,
-                             multiplier=10.0, noise_scale=2.0),
             PointFaultConfig(point="feed_rate", mode=FaultMode.INSTANT,
                              target_value=0.0, noise_scale=0.0),
+            PointFaultConfig(point="spindle_load", mode=FaultMode.INSTANT,
+                             multiplier=2.8, noise_scale=5.0),
+            PointFaultConfig(point="spindle_current", mode=FaultMode.INSTANT,
+                             multiplier=3.8, noise_scale=1.5),
         ],
     ),
 
     # ------------------------------------------------------------------
     # 主轴过热 — 长时间高负荷或冷却系统故障
-    # 特征：主轴电流持续偏高，转速因热保护逐渐降低
+    # 特征：主轴负载和电流持续偏高，转速因热保护逐渐降低
     # 模式：渐进式，持续时间较长
     # ------------------------------------------------------------------
     FaultTypeDefinition(
         id="spindle_overheat",
         name="主轴过热",
-        description="主轴长时间高负荷运转或冷却不足，电流持续偏高，转速因热保护下降",
+        description="主轴长时间高负荷运转或冷却不足，spindle_load和spindle_current持续偏高，转速因热保护渐进下降",
         category="thermal",
         default_duration=240.0,
         tags=["主轴", "过热", "渐进"],
         point_faults=[
+            PointFaultConfig(point="spindle_load", mode=FaultMode.GRADUAL,
+                             multiplier=1.6, noise_scale=3.0),
             PointFaultConfig(point="spindle_current", mode=FaultMode.GRADUAL,
                              multiplier=1.8, noise_scale=1.2),
             PointFaultConfig(point="spindle_speed", mode=FaultMode.GRADUAL,
                              multiplier=0.6, noise_scale=50.0),
-            PointFaultConfig(point="vibration_x", mode=FaultMode.GRADUAL,
-                             multiplier=1.5, noise_scale=0.2),
-            PointFaultConfig(point="vibration_z", mode=FaultMode.GRADUAL,
-                             multiplier=1.5, noise_scale=0.2),
         ],
     ),
 
     # ------------------------------------------------------------------
-    # 主轴轴承故障 — 轴承磨损或润滑不足
-    # 特征：振动频率特征变化，整体振动幅度升高，电流略升
-    # 模式：渐进式
+    # 电源波动 — 供电不稳定
+    # 特征：主轴转速和进给速率出现随机波动，电流不稳定
+    # 模式：瞬间注入（持续期间持续抖动）
+    # ------------------------------------------------------------------
+    FaultTypeDefinition(
+        id="power_fluctuation",
+        name="电源波动",
+        description="供电电压不稳定，主轴转速和进给速率出现随机波动",
+        category="electrical",
+        default_duration=90.0,
+        tags=["电源", "波动", "突发"],
+        point_faults=[
+            PointFaultConfig(point="spindle_speed", mode=FaultMode.INSTANT,
+                             multiplier=1.0, noise_scale=300.0),
+            PointFaultConfig(point="spindle_current", mode=FaultMode.INSTANT,
+                             multiplier=1.0, noise_scale=5.0),
+            PointFaultConfig(point="feed_rate", mode=FaultMode.INSTANT,
+                             multiplier=1.0, noise_scale=150.0),
+        ],
+    ),
+
+    # ------------------------------------------------------------------
+    # 刀具磨损加剧 — 主轴负载趋势漂移
+    # 特征：spindle_load 基线随时间缓慢爬升（趋势漂移型），电流同步升高
+    # 场景：刀具从轻度磨损到需要换刀的完整过程
+    # 模式：渐进式，持续时间长
     # ------------------------------------------------------------------
     FaultTypeDefinition(
-        id="spindle_bearing_fault",
-        name="主轴轴承故障",
-        description="主轴轴承磨损或润滑不足，振动幅度持续升高，伴随电流轻微上升",
-        category="mechanical",
-        default_duration=360.0,
-        tags=["主轴", "轴承", "渐进"],
+        id="tool_wear_progressive",
+        name="刀具磨损加剧",
+        description="刀具磨损导致切削阻力持续增大，spindle_load基线缓慢爬升至1.8倍，spindle_current同步升高；进给速度由G代码控制不受影响",
+        category="tool",
+        default_duration=600.0,
+        tags=["刀具", "磨损", "负载", "趋势漂移"],
         point_faults=[
-            PointFaultConfig(point="vibration_x", mode=FaultMode.GRADUAL,
-                             multiplier=4.0, noise_scale=0.5),
-            PointFaultConfig(point="vibration_y", mode=FaultMode.GRADUAL,
-                             multiplier=4.0, noise_scale=0.5),
-            PointFaultConfig(point="vibration_z", mode=FaultMode.GRADUAL,
-                             multiplier=5.0, noise_scale=0.8),
+            PointFaultConfig(point="spindle_load", mode=FaultMode.GRADUAL,
+                             multiplier=1.8, noise_scale=3.0),
             PointFaultConfig(point="spindle_current", mode=FaultMode.GRADUAL,
-                             multiplier=1.3, noise_scale=0.5),
+                             multiplier=1.7, noise_scale=1.5),
         ],
     ),
 
     # ------------------------------------------------------------------
-    # 进给堵转 — 工件夹紧松动或切削量过大导致进给卡死
-    # 特征：进给速率瞬间降为 0，主轴电流急剧升高
-    # 模式：瞬间注入
+    # 刀具崩刃 — 主轴负载突发脉冲
+    # 特征：spindle_load 瞬间冲高（可超120%，FANUC最大输出200%），进给停止，CNC停主轴
+    # 场景：刀具突发性失效，机床触发过载报警并停机
+    # 模式：瞬间注入，持续时间极短
     # ------------------------------------------------------------------
     FaultTypeDefinition(
-        id="feed_stall",
-        name="进给堵转",
-        description="进给轴卡死，进给速率降为零，主轴电流急剧升高",
-        category="process",
-        default_duration=20.0,
-        tags=["进给", "堵转", "突发"],
+        id="tool_breakage_sudden",
+        name="刀具崩刃",
+        description="刀具突发性崩刃，spindle_load瞬间冲高至正常值3.2倍（可超120%，FANUC最大输出200%），进给停止，CNC触发过载报警并停主轴",
+        category="tool",
+        default_duration=10.0,
+        tags=["刀具", "崩刃", "突发", "过载"],
         point_faults=[
+            PointFaultConfig(point="spindle_load", mode=FaultMode.INSTANT,
+                             multiplier=3.2, noise_scale=8.0),
+            PointFaultConfig(point="spindle_current", mode=FaultMode.INSTANT,
+                             multiplier=4.0, noise_scale=3.0),
+            PointFaultConfig(point="spindle_speed", mode=FaultMode.INSTANT,
+                             target_value=0.0, noise_scale=0.0),
             PointFaultConfig(point="feed_rate", mode=FaultMode.INSTANT,
                              target_value=0.0, noise_scale=0.0),
+            PointFaultConfig(point="alarm_status", mode=FaultMode.INSTANT,
+                             target_value=1.0, noise_scale=0.0),
+        ],
+    ),
+
+    # ------------------------------------------------------------------
+    # 过载保护触发 — 负载/进给反向联动异常（关系约束型）
+    # 特征：负载超限后CNC自动降进给速率，负载高企与进给降速同时出现
+    # 场景：切削参数过激进，CNC自适应保护介入
+    # 模式：瞬间注入（持续期间维持异常关系）
+    # ------------------------------------------------------------------
+    FaultTypeDefinition(
+        id="tool_overload_protection",
+        name="过载保护触发",
+        description="主轴负载超限，CNC自动降低进给速率保护刀具，负载高企与进给降速同时出现",
+        category="tool",
+        default_duration=120.0,
+        tags=["刀具", "过载", "进给", "关系约束"],
+        point_faults=[
+            PointFaultConfig(point="spindle_load", mode=FaultMode.INSTANT,
+                             multiplier=1.9, noise_scale=4.0),
             PointFaultConfig(point="spindle_current", mode=FaultMode.INSTANT,
-                             multiplier=3.8, noise_scale=1.5),
-            PointFaultConfig(point="vibration_z", mode=FaultMode.INSTANT,
-                             multiplier=5.0, noise_scale=1.0),
+                             multiplier=1.8, noise_scale=2.0),
+            PointFaultConfig(point="feed_rate", mode=FaultMode.INSTANT,
+                             multiplier=0.35, noise_scale=15.0),
         ],
     ),
 
     # ------------------------------------------------------------------
-    # 振动异常 — 工件装夹松动或共振
-    # 特征：三轴振动突然大幅增加，其他指标基本正常
-    # 模式：瞬间注入
+    # 空切检测 — 刀具未接触工件（工况切换型）
+    # 特征：spindle_load 跌至空载区间（5-15%），主轴转速和进给速率保持正常
+    # 场景：工件装夹偏移、程序坐标错误、工件提前切完
+    # 模式：瞬间注入（均值跳变，方差不变）
     # ------------------------------------------------------------------
     FaultTypeDefinition(
-        id="vibration_spike",
-        name="振动异常",
-        description="工件装夹松动或切削共振，三轴振动突然大幅增加",
-        category="mechanical",
-        default_duration=60.0,
-        tags=["振动", "装夹", "突发"],
+        id="air_cutting",
+        name="空切检测",
+        description="刀具未接触工件，spindle_load跌至空载区间(5-15%)，spindle_current降至空转水平，转速进给保持正常",
+        category="tool",
+        default_duration=180.0,
+        tags=["刀具", "空切", "工况切换", "负载"],
         point_faults=[
-            PointFaultConfig(point="vibration_x", mode=FaultMode.INSTANT,
-                             multiplier=6.0, noise_scale=1.0),
-            PointFaultConfig(point="vibration_y", mode=FaultMode.INSTANT,
-                             multiplier=6.0, noise_scale=1.0),
-            PointFaultConfig(point="vibration_z", mode=FaultMode.INSTANT,
-                             multiplier=7.0, noise_scale=1.2),
+            PointFaultConfig(point="spindle_load", mode=FaultMode.INSTANT,
+                             target_value=8.0, noise_scale=2.0),
+            PointFaultConfig(point="spindle_current", mode=FaultMode.INSTANT,
+                             target_value=2.5, noise_scale=0.3),
         ],
     ),
 
     # ------------------------------------------------------------------
-    # 切削液不足 — 冷却润滑失效
-    # 特征：热量积累 → 振动缓慢升高，电流缓慢升高，进给略降
-    # 模式：渐进式，速度较慢
+    # 积屑瘤 — 切屑粘附刀刃导致周期性负载突刺
+    # 特征：spindle_load 在正常基线上出现间歇性冲高后恢复，不是持续爬升
+    #       突刺幅度约1.5-2倍基线，持续1-3秒后自行恢复，周期不固定
+    # 场景：低速切削、切削液不足、韧性材料（铝合金、不锈钢）加工时常见
+    # 模式：瞬间注入（noise_scale 大，模拟随机突刺效果）
     # ------------------------------------------------------------------
     FaultTypeDefinition(
-        id="coolant_failure",
-        name="切削液不足",
-        description="切削液供给不足，冷却润滑失效，热量积累导致振动和电流缓慢升高",
-        category="process",
-        default_duration=480.0,
-        tags=["切削液", "冷却", "渐进"],
+        id="built_up_edge",
+        name="积屑瘤",
+        description="切屑粘附刀刃，spindle_load在正常基线上出现间歇性突刺（1.5-2倍），突刺后自行恢复，区别于磨损的持续爬升",
+        category="tool",
+        default_duration=300.0,
+        tags=["刀具", "积屑瘤", "突刺", "低速切削"],
         point_faults=[
-            PointFaultConfig(point="spindle_current", mode=FaultMode.GRADUAL,
-                             multiplier=1.6, noise_scale=0.8),
-            PointFaultConfig(point="vibration_x", mode=FaultMode.GRADUAL,
-                             multiplier=2.0, noise_scale=0.3),
-            PointFaultConfig(point="vibration_y", mode=FaultMode.GRADUAL,
-                             multiplier=2.0, noise_scale=0.3),
-            PointFaultConfig(point="vibration_z", mode=FaultMode.GRADUAL,
-                             multiplier=2.5, noise_scale=0.4),
-            PointFaultConfig(point="feed_rate", mode=FaultMode.GRADUAL,
-                             multiplier=0.75, noise_scale=15.0),
+            PointFaultConfig(point="spindle_load", mode=FaultMode.INSTANT,
+                             multiplier=1.7, noise_scale=12.0),
+            PointFaultConfig(point="spindle_current", mode=FaultMode.INSTANT,
+                             multiplier=1.6, noise_scale=4.0),
         ],
     ),
 
     # ------------------------------------------------------------------
-    # 电源波动 — 供电不稳定
-    # 特征：主轴转速和进给速率出现随机波动，电流不稳定
-    # 模式：瞬间注入（持续期间持续抖动）
+    # 刀具涂层剥落 — 负载阶跃后在新基线稳定
+    # 特征：spindle_load 出现一次阶跃式跳升（区别于缓慢爬升的磨损），
+    #       然后在新的高基线上稳定波动，不会继续爬升也不会恢复
+    # 场景：涂层质量问题或切削条件恶劣导致涂层突然失效
+    # 模式：瞬间注入（立即跳到新基线，持续维持）
     # ------------------------------------------------------------------
     FaultTypeDefinition(
-        id="power_fluctuation",
-        name="电源波动",
-        description="供电电压不稳定，主轴转速和进给速率出现随机波动",
-        category="electrical",
-        default_duration=90.0,
-        tags=["电源", "波动", "突发"],
+        id="coating_spalling",
+        name="刀具涂层剥落",
+        description="刀具涂层突然失效，spindle_load阶跃式跳升至1.5倍后在新基线稳定波动，区别于磨损的缓慢爬升和崩刃的瞬间冲高",
+        category="tool",
+        default_duration=600.0,
+        tags=["刀具", "涂层", "阶跃", "工况切换"],
         point_faults=[
-            PointFaultConfig(point="spindle_speed", mode=FaultMode.INSTANT,
-                             multiplier=1.0, noise_scale=300.0),
+            PointFaultConfig(point="spindle_load", mode=FaultMode.INSTANT,
+                             multiplier=1.5, noise_scale=3.0),
             PointFaultConfig(point="spindle_current", mode=FaultMode.INSTANT,
-                             multiplier=1.0, noise_scale=5.0),
+                             multiplier=1.4, noise_scale=1.5),
+        ],
+    ),
+
+    # ------------------------------------------------------------------
+    # 换刀装夹偏移 — 刀具伸出量异常导致负载整体偏高
+    # 特征：换刀后 spindle_load 整体偏高（1.4-1.6倍），波动规律正常，
+    #       不是空切（负载不低），不是磨损（不随时间爬升）
+    # 场景：刀具伸出量偏长、刀柄锥面未清洁、刀具型号装错
+    # 模式：瞬间注入（均值整体偏移，方差不变）
+    # ------------------------------------------------------------------
+    FaultTypeDefinition(
+        id="tool_offset_error",
+        name="换刀装夹偏移",
+        description="换刀后刀具伸出量或装夹位置异常，spindle_load整体偏高(1.4-1.6倍)，波动规律正常，不随时间变化，区别于磨损和空切",
+        category="tool",
+        default_duration=3600.0,
+        tags=["刀具", "装夹", "工况切换", "负载偏移"],
+        point_faults=[
+            PointFaultConfig(point="spindle_load", mode=FaultMode.INSTANT,
+                             multiplier=1.5, noise_scale=3.0),
+            PointFaultConfig(point="spindle_current", mode=FaultMode.INSTANT,
+                             multiplier=1.4, noise_scale=1.5),
             PointFaultConfig(point="feed_rate", mode=FaultMode.INSTANT,
-                             multiplier=1.0, noise_scale=150.0),
+                             multiplier=1.0, noise_scale=5.0),
         ],
     ),
 ]
@@ -312,10 +346,10 @@ def apply(self, device: Any) -> None:
                 continue
             baseline = fault.baseline_values.get(pf.point, 0.0)
             if baseline == 0.0:
-                # 基线为 0 时用当前值兜底，避免乘法无效
-                try:
-                    baseline = float(device._point_values[pf.point]) or 1.0
-                except (TypeError, ValueError):
+                # 基线为0说明注入时设备处于换刀/停机状态
+                # target_value 模式可以直接执行（如崩刃归零、空切归空载）
+                # multiplier 模式跳过，避免在零基线上产生无意义的值
+                if pf.target_value is None:
                     continue
 
             device._point_values[pf.point] = self._compute_value(
diff --git a/protoforge/protocols/fanuc/server.py b/protoforge/protocols/fanuc/server.py
index b0d6f15..e711347 100644
--- a/protoforge/protocols/fanuc/server.py
+++ b/protoforge/protocols/fanuc/server.py
@@ -22,8 +22,11 @@ def __init__(self, points: list[dict]):
             "program": "O0001",
             "speed_override": 100,
             "feed_override": 100,
-            "spindle_speed": 3000,
-            "feed_rate": 500,
+            "spindle_speed": 0.0,
+            "feed_rate": 0.0,
+            "spindle_current": 0.0,
+            "spindle_load": 0.0,
+            "tool_number": 1,
             "absolute_pos": [0.0] * 5,
             "machine_pos": [0.0] * 5,
             "relative_pos": [0.0] * 5,
@@ -32,6 +35,32 @@ def __init__(self, points: list[dict]):
         for p in points:
             self._values[p["name"]] = p.get("fixed_value", 0)
 
+    def sync_from_point_values(self, point_values: dict[str, Any]) -> None:
+        """将 DeviceInstance._point_values 同步到 _cnc_status，保持协议层数据与生成器一致"""
+        mapping = {
+            "spindle_speed": "spindle_speed",
+            "feed_rate": "feed_rate",
+            "spindle_current": "spindle_current",
+            "spindle_load": "spindle_load",
+            "tool_number": "tool_number",
+            "alarm_status": "alarm",
+            "run_mode": "mode",
+            "execution_status": "execution",
+            "program_name": "program",
+            "x_absolute": ("absolute_pos", 0),
+            "y_absolute": ("absolute_pos", 1),
+            "z_absolute": ("absolute_pos", 2),
+        }
+        for point_name, status_key in mapping.items():
+            if point_name not in point_values:
+                continue
+            val = point_values[point_name]
+            if isinstance(status_key, tuple):
+                key, idx = status_key
+                self._cnc_status[key][idx] = float(val)
+            else:
+                self._cnc_status[status_key] = val
+
     async def generate_value(self, point_config: dict[str, Any]) -> Any:
         name = point_config.get("name", "")
         return self._values.get(name, 0)
@@ -144,6 +173,12 @@ def _process_focas(self, data: bytes) -> bytes | None:
             return self._handle_cnc_rdspindlespd(req_id)
         elif func_id == 0x0111:
             return self._handle_cnc_rdfeed(req_id)
+        elif func_id == 0x0112:
+            return self._handle_cnc_rdspload(req_id)
+        elif func_id == 0x0113:
+            return self._handle_cnc_rdspmeter(req_id)
+        elif func_id == 0x0114:
+            return self._handle_cnc_toolnum(req_id)
         elif func_id == 0x0120:
             return self._handle_cnc_alarm(req_id)
         elif func_id == 0x0130:
@@ -247,7 +282,7 @@ def _handle_cnc_rdspindlespd(self, req_id: int) -> bytes:
 
     def _handle_cnc_rdfeed(self, req_id: int) -> bytes:
         behavior = next(iter(self._behaviors.values()), None)
-        feed = behavior._cnc_status.get("feed_rate", 500) if behavior else 500
+        feed = behavior._cnc_status.get("feed_rate", 0.0) if behavior else 0.0
 
         resp = bytearray()
         resp += struct.pack("<H", 0x0111)
@@ -256,6 +291,42 @@ def _handle_cnc_rdfeed(self, req_id: int) -> bytes:
         resp += struct.pack("<d", float(feed))
         return bytes(resp)
 
+    def _handle_cnc_rdspload(self, req_id: int) -> bytes:
+        """cnc_rdspload — 主轴负载率(%)，FANUC FOCAS2 原生接口"""
+        behavior = next(iter(self._behaviors.values()), None)
+        load = behavior._cnc_status.get("spindle_load", 0.0) if behavior else 0.0
+
+        resp = bytearray()
+        resp += struct.pack("<H", 0x0112)
+        resp += struct.pack("<I", req_id)
+        resp += struct.pack("<I", 0x00000000)
+        resp += struct.pack("<d", float(load))
+        return bytes(resp)
+
+    def _handle_cnc_rdspmeter(self, req_id: int) -> bytes:
+        """cnc_rdspmeter — 主轴电流(A)"""
+        behavior = next(iter(self._behaviors.values()), None)
+        current = behavior._cnc_status.get("spindle_current", 0.0) if behavior else 0.0
+
+        resp = bytearray()
+        resp += struct.pack("<H", 0x0113)
+        resp += struct.pack("<I", req_id)
+        resp += struct.pack("<I", 0x00000000)
+        resp += struct.pack("<d", float(current))
+        return bytes(resp)
+
+    def _handle_cnc_toolnum(self, req_id: int) -> bytes:
+        """cnc_toolnum — 当前刀号"""
+        behavior = next(iter(self._behaviors.values()), None)
+        tool = behavior._cnc_status.get("tool_number", 1) if behavior else 1
+
+        resp = bytearray()
+        resp += struct.pack("<H", 0x0114)
+        resp += struct.pack("<I", req_id)
+        resp += struct.pack("<I", 0x00000000)
+        resp += struct.pack("<H", int(tool))
+        return bytes(resp)
+
     def _handle_cnc_alarm(self, req_id: int) -> bytes:
         behavior = next(iter(self._behaviors.values()), None)
         alarm = behavior._cnc_status.get("alarm", 0) if behavior else 0
@@ -311,6 +382,8 @@ async def read_points(self, device_id: str) -> list[PointValue]:
         config = self._device_configs.get(device_id)
         if not behavior or not config:
             return []
+        # 将 _point_values 同步到 _cnc_status，保证 FOCAS 协议响应与生成器数据一致
+        behavior.sync_from_point_values(behavior._values)
         now = time.time()
         return [PointValue(name=p.name, value=behavior.get_value(p.name), timestamp=now) for p in config.points]
 
diff --git a/protoforge/templates/fanuc/fanuc_0if_cnc.json b/protoforge/templates/fanuc/fanuc_0if_cnc.json
index 39437f3..0f0e362 100644
--- a/protoforge/templates/fanuc/fanuc_0if_cnc.json
+++ b/protoforge/templates/fanuc/fanuc_0if_cnc.json
@@ -62,77 +62,68 @@
             "address": "spindle_speed",
             "data_type": "float32",
             "unit": "RPM",
-            "description": "主轴转速",
+            "description": "主轴转速，按加工程序阶梯切换：换刀归零，粗铣2000，半精铣4000，精铣6000",
             "access": "r",
-            "generator_type": "sawtooth",
-            "min_value": 1000,
+            "generator_type": "script",
+            "min_value": 0,
             "max_value": 8000,
-            "generator_config": {"period": 120}
+            "generator_config": {
+                "script": "elapsed = context['elapsed']; t = elapsed % 360\nif t < 30: target = 0\nelif t < 120: target = 2000\nelif t < 150: target = 0\nelif t < 240: target = 4000\nelif t < 270: target = 0\nelse: target = 6000\nnoise = random.gauss(0, 8) if target > 0 else 0; result = round(max(0, target + noise), 1)"
+            }
         },
         {
             "name": "feed_rate",
             "address": "feed_rate",
             "data_type": "float32",
             "unit": "mm/min",
-            "description": "进给速度",
+            "description": "进给速度，与加工工步联动：换刀时为0，粗铣800，半精铣500，精铣300",
             "access": "r",
-            "generator_type": "sine",
-            "min_value": 100,
+            "generator_type": "script",
+            "min_value": 0,
             "max_value": 5000,
-            "generator_config": {"period": 60, "phase": 0.0}
+            "generator_config": {
+                "script": "elapsed = context['elapsed']; t = elapsed % 360\nif t < 30: target = 0\nelif t < 120: target = 800\nelif t < 150: target = 0\nelif t < 240: target = 500\nelif t < 270: target = 0\nelse: target = 300\nnoise = random.gauss(0, 5) if target > 0 else 0; result = round(max(0, target + noise), 1)"
+            }
         },
         {
             "name": "spindle_current",
             "address": "spindle_current",
             "data_type": "float32",
             "unit": "A",
-            "description": "主轴电流",
-            "access": "r",
-            "generator_type": "sine",
-            "min_value": 8.0,
-            "max_value": 32.0,
-            "generator_config": {"period": 120, "phase": 0.5}
-        },
-        {
-            "name": "vibration_x",
-            "address": "vibration_x",
-            "data_type": "float32",
-            "unit": "m/s²",
-            "description": "X轴振动加速度",
+            "description": "主轴电流，与工步联动：换刀时伺服保持电流约2.5A，粗铣18-24A，半精铣12-18A，精铣8-13A",
             "access": "r",
             "generator_type": "script",
-            "min_value": 0.1,
-            "max_value": 2.5,
+            "min_value": 0.0,
+            "max_value": 40.0,
             "generator_config": {
-                "script": "elapsed = context['elapsed']; base = 0.5 + 0.3 * math.sin(2 * math.pi * elapsed / 90); noise = random.uniform(-0.15, 0.15); result = round(max(0.1, base + noise), 3)"
+                "script": "elapsed = context['elapsed']; t = elapsed % 360\nif t < 30: base, noise_std = 2.5, 0.3\nelif t < 120: base, noise_std = 21.0, 1.2\nelif t < 150: base, noise_std = 2.5, 0.3\nelif t < 240: base, noise_std = 15.0, 1.2\nelif t < 270: base, noise_std = 2.5, 0.3\nelse: base, noise_std = 10.0, 1.2\nresult = round(max(0, base + random.gauss(0, noise_std)), 2)"
             }
         },
         {
-            "name": "vibration_y",
-            "address": "vibration_y",
+            "name": "spindle_load",
+            "address": "spindle_load",
             "data_type": "float32",
-            "unit": "m/s²",
-            "description": "Y轴振动加速度",
+            "unit": "%",
+            "description": "主轴负载率(0-100%)，与工步联动：换刀时0%，粗铣45-60%，半精铣35-50%，精铣25-40%",
             "access": "r",
             "generator_type": "script",
-            "min_value": 0.1,
-            "max_value": 2.5,
+            "min_value": 0.0,
+            "max_value": 120.0,
             "generator_config": {
-                "script": "elapsed = context['elapsed']; base = 0.5 + 0.3 * math.sin(2 * math.pi * elapsed / 75 + 1.0); noise = random.uniform(-0.15, 0.15); result = round(max(0.1, base + noise), 3)"
+                "script": "elapsed = context['elapsed']; t = elapsed % 360\nif t < 30: base = 0\nelif t < 120: base = 52.0\nelif t < 150: base = 0\nelif t < 240: base = 42.0\nelif t < 270: base = 0\nelse: base = 32.0\nnoise = random.gauss(0, 2.5) if base > 0 else 0; result = round(max(0, min(120.0, base + noise)), 2)"
             }
         },
         {
-            "name": "vibration_z",
-            "address": "vibration_z",
-            "data_type": "float32",
-            "unit": "m/s²",
-            "description": "Z轴振动加速度",
+            "name": "tool_number",
+            "address": "tool_number",
+            "data_type": "uint16",
+            "description": "当前刀号，与工步联动：换刀阶段切换，粗铣T01，半精铣T02，精铣T03",
             "access": "r",
             "generator_type": "script",
-            "min_value": 0.1,
-            "max_value": 3.0,
+            "min_value": 1,
+            "max_value": 12,
             "generator_config": {
-                "script": "elapsed = context['elapsed']; base = 0.7 + 0.4 * math.sin(2 * math.pi * elapsed / 60 + 2.1); noise = random.uniform(-0.2, 0.2); result = round(max(0.1, base + noise), 3)"
+                "script": "elapsed = context['elapsed']; t = elapsed % 360\nif t < 120: result = 1\nelif t < 240: result = 2\nelse: result = 3"
             }
         },
         {
diff --git a/protoforge/templates/modbus/fanuc_cnc.json b/protoforge/templates/modbus/fanuc_cnc.json
index 43622cf..3a21815 100644
--- a/protoforge/templates/modbus/fanuc_cnc.json
+++ b/protoforge/templates/modbus/fanuc_cnc.json
@@ -11,36 +11,42 @@
             "address": "0",
             "data_type": "uint16",
             "unit": "RPM",
-            "description": "主轴实际转速",
+            "description": "主轴实际转速，与工步联动：换刀归零，粗铣2000，半精铣4000，精铣6000",
             "access": "r",
-            "generator_type": "sawtooth",
+            "generator_type": "script",
             "min_value": 0,
-            "max_value": 12000,
-            "generator_config": {"period": 180}
+            "max_value": 8000,
+            "generator_config": {
+                "script": "elapsed = context['elapsed']; t = elapsed % 360\nif t < 30: target = 0\nelif t < 120: target = 2000\nelif t < 150: target = 0\nelif t < 240: target = 4000\nelif t < 270: target = 0\nelse: target = 6000\nnoise = random.gauss(0, 8) if target > 0 else 0; result = round(max(0, target + noise), 1)"
+            }
         },
         {
             "name": "feed_rate",
             "address": "1",
             "data_type": "float32",
             "unit": "mm/min",
-            "description": "实际进给速度",
+            "description": "实际进给速度，与工步联动：换刀时0，粗铣800，半精铣500，精铣300",
             "access": "r",
-            "generator_type": "sine",
-            "min_value": 200.0,
-            "max_value": 3000.0,
-            "generator_config": {"period": 90, "phase": 1.0}
+            "generator_type": "script",
+            "min_value": 0,
+            "max_value": 5000,
+            "generator_config": {
+                "script": "elapsed = context['elapsed']; t = elapsed % 360\nif t < 30: target = 0\nelif t < 120: target = 800\nelif t < 150: target = 0\nelif t < 240: target = 500\nelif t < 270: target = 0\nelse: target = 300\nnoise = random.gauss(0, 5) if target > 0 else 0; result = round(max(0, target + noise), 1)"
+            }
         },
         {
             "name": "spindle_current",
             "address": "2",
             "data_type": "float32",
             "unit": "A",
-            "description": "主轴电流",
+            "description": "主轴电流，与工步联动：换刀伺服保持2.5A，粗铣21A，半精铣15A，精铣10A",
             "access": "r",
-            "generator_type": "sine",
-            "min_value": 8.0,
-            "max_value": 35.0,
-            "generator_config": {"period": 120, "phase": 2.0}
+            "generator_type": "script",
+            "min_value": 0.0,
+            "max_value": 40.0,
+            "generator_config": {
+                "script": "elapsed = context['elapsed']; t = elapsed % 360\nif t < 30: base, noise_std = 2.5, 0.3\nelif t < 120: base, noise_std = 21.0, 1.2\nelif t < 150: base, noise_std = 2.5, 0.3\nelif t < 240: base, noise_std = 15.0, 1.2\nelif t < 270: base, noise_std = 2.5, 0.3\nelse: base, noise_std = 10.0, 1.2\nresult = round(max(0, base + random.gauss(0, noise_std)), 2)"
+            }
         },
         {
             "name": "vibration_x",
@@ -84,6 +90,20 @@
                 "script": "elapsed = context['elapsed']; base = 0.7 + 0.4 * math.sin(2 * math.pi * elapsed / 58 + 2.8); noise = random.uniform(-0.2, 0.2); result = round(max(0.1, base + noise), 3)"
             }
         },
+        {
+            "name": "spindle_load",
+            "address": "29",
+            "data_type": "float32",
+            "unit": "%",
+            "description": "主轴负载率(0-100%)，与工步联动：换刀时0%，粗铣45-60%，半精铣35-50%，精铣25-40%",
+            "access": "r",
+            "generator_type": "script",
+            "min_value": 0.0,
+            "max_value": 120.0,
+            "generator_config": {
+                "script": "elapsed = context['elapsed']; t = elapsed % 360\nif t < 30: base = 0\nelif t < 120: base = 52.0\nelif t < 150: base = 0\nelif t < 240: base = 42.0\nelif t < 270: base = 0\nelse: base = 32.0\nnoise = random.gauss(0, 2.5) if base > 0 else 0; result = round(max(0, min(120.0, base + noise)), 2)"
+            }
+        },
         {
             "name": "spindle_override",
             "address": "3",
@@ -126,10 +146,14 @@
             "name": "tool_no",
             "address": "7",
             "data_type": "uint16",
-            "description": "当前刀具号T",
+            "description": "当前刀具号，与工步联动：粗铣T1，半精铣T2，精铣T3",
             "access": "r",
-            "generator_type": "fixed",
-            "fixed_value": 3
+            "generator_type": "script",
+            "min_value": 1,
+            "max_value": 12,
+            "generator_config": {
+                "script": "elapsed = context['elapsed']; t = elapsed % 360\nif t < 120: result = 1\nelif t < 240: result = 2\nelse: result = 3"
+            }
         },
         {
             "name": "abs_x",
diff --git a/web/src/views/Devices.vue b/web/src/views/Devices.vue
index 65e0535..3a33068 100644
--- a/web/src/views/Devices.vue
+++ b/web/src/views/Devices.vue
@@ -364,7 +364,7 @@ const faultIntensityLabel = computed(() => {
 })
 
 function faultCategoryLabel(category) {
-  const map = { mechanical: '机械', thermal: '热', electrical: '电气', process: '工艺' }
+  const map = { mechanical: '机械', thermal: '热', electrical: '电气', process: '工艺', tool: '刀具' }
   return map[category] || category
 }
 

From 58b2e3685ec83a1b4520874510da9d7ce0542ef4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E5=BC=A0=E5=B0=91=E6=99=BA?=
 <zhangshaozhi@bailongma-inc.com>
Date: Wed, 27 May 2026 09:36:02 +0800
Subject: [PATCH 33/55] fix(scene): scene different

---
 protoforge/templates/modbus/fanuc_cnc.json    |  24 +-
 .../templates/modbus/fanuc_cnc_finish.json    | 259 ++++++++++++++++++
 .../modbus/fanuc_cnc_semi_finish.json         | 259 ++++++++++++++++++
 3 files changed, 529 insertions(+), 13 deletions(-)
 create mode 100644 protoforge/templates/modbus/fanuc_cnc_finish.json
 create mode 100644 protoforge/templates/modbus/fanuc_cnc_semi_finish.json

diff --git a/protoforge/templates/modbus/fanuc_cnc.json b/protoforge/templates/modbus/fanuc_cnc.json
index 3a21815..3ff30df 100644
--- a/protoforge/templates/modbus/fanuc_cnc.json
+++ b/protoforge/templates/modbus/fanuc_cnc.json
@@ -11,13 +11,13 @@
             "address": "0",
             "data_type": "uint16",
             "unit": "RPM",
-            "description": "主轴实际转速，与工步联动：换刀归零，粗铣2000，半精铣4000，精铣6000",
+            "description": "主轴实际转速，粗加工工位：空闲为0，启动后稳定在约2000RPM",
             "access": "r",
             "generator_type": "script",
             "min_value": 0,
             "max_value": 8000,
             "generator_config": {
-                "script": "elapsed = context['elapsed']; t = elapsed % 360\nif t < 30: target = 0\nelif t < 120: target = 2000\nelif t < 150: target = 0\nelif t < 240: target = 4000\nelif t < 270: target = 0\nelse: target = 6000\nnoise = random.gauss(0, 8) if target > 0 else 0; result = round(max(0, target + noise), 1)"
+                "script": "elapsed = context['elapsed']; t = elapsed % 180\nif t < 20:\n    target = 0\nelif t < 30:\n    target = 2000 * ((t - 20) / 10.0)\nelif t < 155:\n    target = 2000\nelif t < 165:\n    target = 2000 * (1 - ((t - 155) / 10.0))\nelse:\n    target = 0\nnoise = random.gauss(0, 15) if target > 100 else 0\nresult = int(round(max(0, min(8000, target + noise))))"
             }
         },
         {
@@ -25,13 +25,13 @@
             "address": "1",
             "data_type": "float32",
             "unit": "mm/min",
-            "description": "实际进给速度，与工步联动：换刀时0，粗铣800，半精铣500，精铣300",
+            "description": "粗加工实际进给速度：切入阶段中低速，稳定粗加工约800mm/min，空闲和主轴启动阶段为0",
             "access": "r",
             "generator_type": "script",
             "min_value": 0,
             "max_value": 5000,
             "generator_config": {
-                "script": "elapsed = context['elapsed']; t = elapsed % 360\nif t < 30: target = 0\nelif t < 120: target = 800\nelif t < 150: target = 0\nelif t < 240: target = 500\nelif t < 270: target = 0\nelse: target = 300\nnoise = random.gauss(0, 5) if target > 0 else 0; result = round(max(0, target + noise), 1)"
+                "script": "elapsed = context['elapsed']; t = elapsed % 180\nif t < 30:\n    target = 0\nelif t < 40:\n    target = 500 + 300 * ((t - 30) / 10.0)\nelif t < 140:\n    target = 800\nelif t < 155:\n    target = 400\nelse:\n    target = 0\nnoise = random.gauss(0, 25) if target > 0 else 0\nresult = round(max(0, min(5000, target + noise)), 1)"
             }
         },
         {
@@ -39,13 +39,13 @@
             "address": "2",
             "data_type": "float32",
             "unit": "A",
-            "description": "主轴电流，与工步联动：换刀伺服保持2.5A，粗铣21A，半精铣15A，精铣10A",
+            "description": "粗加工主轴电流：空闲低电流，启动阶段中等电流，切入瞬间升高，稳定粗加工约19~24A",
             "access": "r",
             "generator_type": "script",
             "min_value": 0.0,
             "max_value": 40.0,
             "generator_config": {
-                "script": "elapsed = context['elapsed']; t = elapsed % 360\nif t < 30: base, noise_std = 2.5, 0.3\nelif t < 120: base, noise_std = 21.0, 1.2\nelif t < 150: base, noise_std = 2.5, 0.3\nelif t < 240: base, noise_std = 15.0, 1.2\nelif t < 270: base, noise_std = 2.5, 0.3\nelse: base, noise_std = 10.0, 1.2\nresult = round(max(0, base + random.gauss(0, noise_std)), 2)"
+                "script": "elapsed = context['elapsed']; t = elapsed % 180\nif t < 20:\n    base, noise_std = 2.2, 0.25\nelif t < 30:\n    base, noise_std = 6.0, 0.8\nelif t < 40:\n    base, noise_std = 23.5, 1.5\nelif t < 140:\n    base, noise_std = 21.5, 1.3\nelif t < 155:\n    base, noise_std = 6.0, 0.8\nelif t < 165:\n    base, noise_std = 4.0, 0.5\nelse:\n    base, noise_std = 2.2, 0.25\nresult = round(max(0, min(40.0, base + random.gauss(0, noise_std))), 2)"
             }
         },
         {
@@ -95,13 +95,13 @@
             "address": "29",
             "data_type": "float32",
             "unit": "%",
-            "description": "主轴负载率(0-100%)，与工步联动：换刀时0%，粗铣45-60%，半精铣35-50%，精铣25-40%",
+            "description": "粗加工主轴负载率，常态0~100%，短时允许到120%；粗加工稳定阶段约45~65%，切入瞬间可能更高",
             "access": "r",
             "generator_type": "script",
             "min_value": 0.0,
             "max_value": 120.0,
             "generator_config": {
-                "script": "elapsed = context['elapsed']; t = elapsed % 360\nif t < 30: base = 0\nelif t < 120: base = 52.0\nelif t < 150: base = 0\nelif t < 240: base = 42.0\nelif t < 270: base = 0\nelse: base = 32.0\nnoise = random.gauss(0, 2.5) if base > 0 else 0; result = round(max(0, min(120.0, base + noise)), 2)"
+                "script": "elapsed = context['elapsed']; t = elapsed % 180\nif t < 20:\n    base, noise_std = 0.0, 0.0\nelif t < 30:\n    base, noise_std = 8.0, 2.0\nelif t < 40:\n    base, noise_std = 65.0, 4.0\nelif t < 140:\n    base, noise_std = 55.0, 4.0\nelif t < 155:\n    base, noise_std = 8.0, 2.0\nelif t < 165:\n    base, noise_std = 3.0, 1.0\nelse:\n    base, noise_std = 0.0, 0.0\nnoise = random.gauss(0, noise_std) if noise_std > 0 else 0\nresult = round(max(0, min(120.0, base + noise)), 2)"
             }
         },
         {
@@ -146,14 +146,12 @@
             "name": "tool_no",
             "address": "7",
             "data_type": "uint16",
-            "description": "当前刀具号，与工步联动：粗铣T1，半精铣T2，精铣T3",
+            "description": "当前刀具号，粗加工工位固定使用T1",
             "access": "r",
-            "generator_type": "script",
+            "generator_type": "fixed",
             "min_value": 1,
             "max_value": 12,
-            "generator_config": {
-                "script": "elapsed = context['elapsed']; t = elapsed % 360\nif t < 120: result = 1\nelif t < 240: result = 2\nelse: result = 3"
-            }
+            "fixed_value": 1
         },
         {
             "name": "abs_x",
diff --git a/protoforge/templates/modbus/fanuc_cnc_finish.json b/protoforge/templates/modbus/fanuc_cnc_finish.json
new file mode 100644
index 0000000..1243e59
--- /dev/null
+++ b/protoforge/templates/modbus/fanuc_cnc_finish.json
@@ -0,0 +1,259 @@
+{
+    "id": "modbus_fanuc_cnc_finish",
+    "name": "Fanuc CNC 精铣工位",
+    "protocol": "modbus_tcp",
+    "description": "FANUC Series 0i-MF数控系统，精铣工位：主轴约6000RPM，进给约300mm/min，切深小，表面粗糙度Ra0.8~1.6，要求主轴稳定性高",
+    "manufacturer": "FANUC",
+    "model": "0i-MF",
+    "points": [
+        {
+            "name": "spindle_speed",
+            "address": "0",
+            "data_type": "uint16",
+            "unit": "RPM",
+            "description": "主轴实际转速，精铣工位：空闲为0，启动后稳定在约6000RPM，精铣对转速稳定性要求高，波动小",
+            "access": "r",
+            "generator_type": "script",
+            "min_value": 0,
+            "max_value": 8000,
+            "generator_config": {
+                "script": "elapsed = context['elapsed']; t = elapsed % 120\nif t < 12:\n    target = 0\nelif t < 28:\n    target = 6000 * ((t - 12) / 16.0)\nelif t < 95:\n    target = 6000\nelif t < 110:\n    target = 6000 * (1 - ((t - 95) / 15.0))\nelse:\n    target = 0\nnoise = random.gauss(0, 8) if target > 100 else 0\nresult = int(round(max(0, min(8000, target + noise))))"
+            }
+        },
+        {
+            "name": "feed_rate",
+            "address": "1",
+            "data_type": "float32",
+            "unit": "mm/min",
+            "description": "精铣实际进给速度：切入阶段低速，稳定精铣约300mm/min，精铣进给慢且稳定，波动小",
+            "access": "r",
+            "generator_type": "script",
+            "min_value": 0,
+            "max_value": 5000,
+            "generator_config": {
+                "script": "elapsed = context['elapsed']; t = elapsed % 120\nif t < 28:\n    target = 0\nelif t < 36:\n    target = 150 + 150 * ((t - 28) / 8.0)\nelif t < 90:\n    target = 300\nelif t < 100:\n    target = 150\nelse:\n    target = 0\nnoise = random.gauss(0, 6) if target > 0 else 0\nresult = round(max(0, min(5000, target + noise)), 1)"
+            }
+        },
+        {
+            "name": "spindle_current",
+            "address": "2",
+            "data_type": "float32",
+            "unit": "A",
+            "description": "精铣主轴电流：空闲约2A，启动约4A，切入峰值约11A，稳定精铣约7~10A，精铣切深小电流低且稳定",
+            "access": "r",
+            "generator_type": "script",
+            "min_value": 0.0,
+            "max_value": 40.0,
+            "generator_config": {
+                "script": "elapsed = context['elapsed']; t = elapsed % 120\nif t < 12:\n    base, noise_std = 2.2, 0.2\nelif t < 28:\n    base, noise_std = 4.0, 0.4\nelif t < 36:\n    base, noise_std = 11.0, 0.8\nelif t < 90:\n    base, noise_std = 8.5, 0.5\nelif t < 100:\n    base, noise_std = 4.0, 0.4\nelif t < 110:\n    base, noise_std = 3.0, 0.3\nelse:\n    base, noise_std = 2.2, 0.2\nresult = round(max(0, min(40.0, base + random.gauss(0, noise_std))), 2)"
+            }
+        },
+        {
+            "name": "vibration_x",
+            "address": "23",
+            "data_type": "float32",
+            "unit": "m/s²",
+            "description": "X轴振动加速度，精铣切深小但转速高，振动幅值小、频率高",
+            "access": "r",
+            "generator_type": "script",
+            "min_value": 0.02,
+            "max_value": 1.5,
+            "generator_config": {
+                "script": "elapsed = context['elapsed']; t = elapsed % 120\nif 28 <= t < 100:\n    base = 0.18 + 0.1 * math.sin(2 * math.pi * elapsed / 28 + 0.8)\nelse:\n    base = 0.05\nnoise = random.uniform(-0.04, 0.04)\nresult = round(max(0.02, base + noise), 3)"
+            }
+        },
+        {
+            "name": "vibration_y",
+            "address": "25",
+            "data_type": "float32",
+            "unit": "m/s²",
+            "description": "Y轴振动加速度，精铣切深小但转速高，振动幅值小、频率高",
+            "access": "r",
+            "generator_type": "script",
+            "min_value": 0.02,
+            "max_value": 1.5,
+            "generator_config": {
+                "script": "elapsed = context['elapsed']; t = elapsed % 120\nif 28 <= t < 100:\n    base = 0.18 + 0.1 * math.sin(2 * math.pi * elapsed / 22 + 1.8)\nelse:\n    base = 0.05\nnoise = random.uniform(-0.04, 0.04)\nresult = round(max(0.02, base + noise), 3)"
+            }
+        },
+        {
+            "name": "vibration_z",
+            "address": "27",
+            "data_type": "float32",
+            "unit": "m/s²",
+            "description": "Z轴振动加速度，精铣切深小但转速高，振动幅值小、频率高",
+            "access": "r",
+            "generator_type": "script",
+            "min_value": 0.02,
+            "max_value": 1.5,
+            "generator_config": {
+                "script": "elapsed = context['elapsed']; t = elapsed % 120\nif 28 <= t < 100:\n    base = 0.2 + 0.12 * math.sin(2 * math.pi * elapsed / 18 + 2.8)\nelse:\n    base = 0.06\nnoise = random.uniform(-0.05, 0.05)\nresult = round(max(0.02, base + noise), 3)"
+            }
+        },
+        {
+            "name": "spindle_load",
+            "address": "29",
+            "data_type": "float32",
+            "unit": "%",
+            "description": "精铣主轴负载率，稳定阶段约15~28%，切入瞬间约32%，精铣切深小负载低且稳定，短时允许到120%",
+            "access": "r",
+            "generator_type": "script",
+            "min_value": 0.0,
+            "max_value": 120.0,
+            "generator_config": {
+                "script": "elapsed = context['elapsed']; t = elapsed % 120\nif t < 12:\n    base, noise_std = 0.0, 0.0\nelif t < 28:\n    base, noise_std = 4.0, 1.0\nelif t < 36:\n    base, noise_std = 32.0, 2.0\nelif t < 90:\n    base, noise_std = 22.0, 1.5\nelif t < 100:\n    base, noise_std = 4.0, 1.0\nelif t < 110:\n    base, noise_std = 1.5, 0.5\nelse:\n    base, noise_std = 0.0, 0.0\nnoise = random.gauss(0, noise_std) if noise_std > 0 else 0\nresult = round(max(0, min(120.0, base + noise)), 2)"
+            }
+        },
+        {
+            "name": "spindle_override",
+            "address": "3",
+            "data_type": "uint16",
+            "unit": "%",
+            "description": "主轴倍率",
+            "access": "r",
+            "generator_type": "fixed",
+            "fixed_value": 100
+        },
+        {
+            "name": "feed_override",
+            "address": "4",
+            "data_type": "uint16",
+            "unit": "%",
+            "description": "进给倍率",
+            "access": "r",
+            "generator_type": "fixed",
+            "fixed_value": 100
+        },
+        {
+            "name": "running_mode",
+            "address": "5",
+            "data_type": "uint16",
+            "description": "运行模式(1=MDI 2=AUTO 3=JOG 4=EDIT 5=HANDLE)",
+            "access": "r",
+            "generator_type": "fixed",
+            "fixed_value": 2
+        },
+        {
+            "name": "exec_status",
+            "address": "6",
+            "data_type": "uint16",
+            "description": "执行状态(0=空闲 1=运行 2=暂停 3=M00停 4=M01停)",
+            "access": "r",
+            "generator_type": "fixed",
+            "fixed_value": 1
+        },
+        {
+            "name": "tool_no",
+            "address": "7",
+            "data_type": "uint16",
+            "description": "当前刀具号，精铣工位固定使用T3",
+            "access": "r",
+            "generator_type": "fixed",
+            "min_value": 1,
+            "max_value": 12,
+            "fixed_value": 3
+        },
+        {
+            "name": "abs_x",
+            "address": "8",
+            "data_type": "float32",
+            "unit": "mm",
+            "description": "X轴绝对坐标",
+            "access": "r",
+            "generator_type": "random",
+            "min_value": -800.0,
+            "max_value": 800.0
+        },
+        {
+            "name": "abs_y",
+            "address": "10",
+            "data_type": "float32",
+            "unit": "mm",
+            "description": "Y轴绝对坐标",
+            "access": "r",
+            "generator_type": "random",
+            "min_value": -500.0,
+            "max_value": 500.0
+        },
+        {
+            "name": "abs_z",
+            "address": "12",
+            "data_type": "float32",
+            "unit": "mm",
+            "description": "Z轴绝对坐标",
+            "access": "r",
+            "generator_type": "random",
+            "min_value": -600.0,
+            "max_value": 200.0
+        },
+        {
+            "name": "mach_x",
+            "address": "14",
+            "data_type": "float32",
+            "unit": "mm",
+            "description": "X轴机械坐标",
+            "access": "r",
+            "generator_type": "random",
+            "min_value": -800.0,
+            "max_value": 800.0
+        },
+        {
+            "name": "mach_y",
+            "address": "16",
+            "data_type": "float32",
+            "unit": "mm",
+            "description": "Y轴机械坐标",
+            "access": "r",
+            "generator_type": "random",
+            "min_value": -500.0,
+            "max_value": 500.0
+        },
+        {
+            "name": "mach_z",
+            "address": "18",
+            "data_type": "float32",
+            "unit": "mm",
+            "description": "Z轴机械坐标",
+            "access": "r",
+            "generator_type": "random",
+            "min_value": -600.0,
+            "max_value": 200.0
+        },
+        {
+            "name": "part_count",
+            "address": "20",
+            "data_type": "uint16",
+            "description": "加工计数",
+            "access": "r",
+            "generator_type": "script",
+            "min_value": 0,
+            "max_value": 99999,
+            "generator_config": {
+                "script": "elapsed = context['elapsed']; result = min(int(elapsed / 120), 99999)"
+            }
+        },
+        {
+            "name": "cycle_time",
+            "address": "21",
+            "data_type": "uint16",
+            "unit": "s",
+            "description": "循环时间，精铣单件约120s",
+            "access": "r",
+            "generator_type": "fixed",
+            "fixed_value": 120
+        },
+        {
+            "name": "alarm_no",
+            "address": "22",
+            "data_type": "uint16",
+            "description": "报警号(0=无报警)",
+            "access": "r",
+            "generator_type": "fixed",
+            "fixed_value": 0
+        }
+    ],
+    "protocol_config": {
+        "slave_id": 8
+    },
+    "tags": ["CNC", "FANUC", "数控", "机床", "发那科", "精铣"]
+}
diff --git a/protoforge/templates/modbus/fanuc_cnc_semi_finish.json b/protoforge/templates/modbus/fanuc_cnc_semi_finish.json
new file mode 100644
index 0000000..9efc897
--- /dev/null
+++ b/protoforge/templates/modbus/fanuc_cnc_semi_finish.json
@@ -0,0 +1,259 @@
+{
+    "id": "modbus_fanuc_cnc_semi_finish",
+    "name": "Fanuc CNC 半精铣工位",
+    "protocol": "modbus_tcp",
+    "description": "FANUC Series 0i-MF数控系统，半精铣工位：主轴约4000RPM，进给约500mm/min，切深中等，表面粗糙度Ra3.2~6.3",
+    "manufacturer": "FANUC",
+    "model": "0i-MF",
+    "points": [
+        {
+            "name": "spindle_speed",
+            "address": "0",
+            "data_type": "uint16",
+            "unit": "RPM",
+            "description": "主轴实际转速，半精铣工位：空闲为0，启动后稳定在约4000RPM",
+            "access": "r",
+            "generator_type": "script",
+            "min_value": 0,
+            "max_value": 8000,
+            "generator_config": {
+                "script": "elapsed = context['elapsed']; t = elapsed % 150\nif t < 15:\n    target = 0\nelif t < 28:\n    target = 4000 * ((t - 15) / 13.0)\nelif t < 120:\n    target = 4000\nelif t < 133:\n    target = 4000 * (1 - ((t - 120) / 13.0))\nelse:\n    target = 0\nnoise = random.gauss(0, 20) if target > 100 else 0\nresult = int(round(max(0, min(8000, target + noise))))"
+            }
+        },
+        {
+            "name": "feed_rate",
+            "address": "1",
+            "data_type": "float32",
+            "unit": "mm/min",
+            "description": "半精铣实际进给速度：切入阶段低速，稳定半精铣约500mm/min，退刀阶段降速",
+            "access": "r",
+            "generator_type": "script",
+            "min_value": 0,
+            "max_value": 5000,
+            "generator_config": {
+                "script": "elapsed = context['elapsed']; t = elapsed % 150\nif t < 28:\n    target = 0\nelif t < 38:\n    target = 250 + 250 * ((t - 28) / 10.0)\nelif t < 115:\n    target = 500\nelif t < 128:\n    target = 250\nelse:\n    target = 0\nnoise = random.gauss(0, 15) if target > 0 else 0\nresult = round(max(0, min(5000, target + noise)), 1)"
+            }
+        },
+        {
+            "name": "spindle_current",
+            "address": "2",
+            "data_type": "float32",
+            "unit": "A",
+            "description": "半精铣主轴电流：空闲约2A，启动约5A，切入峰值约17A，稳定半精铣约13~16A",
+            "access": "r",
+            "generator_type": "script",
+            "min_value": 0.0,
+            "max_value": 40.0,
+            "generator_config": {
+                "script": "elapsed = context['elapsed']; t = elapsed % 150\nif t < 15:\n    base, noise_std = 2.2, 0.25\nelif t < 28:\n    base, noise_std = 5.0, 0.6\nelif t < 38:\n    base, noise_std = 17.0, 1.2\nelif t < 115:\n    base, noise_std = 14.5, 0.9\nelif t < 128:\n    base, noise_std = 5.0, 0.6\nelif t < 138:\n    base, noise_std = 3.5, 0.4\nelse:\n    base, noise_std = 2.2, 0.25\nresult = round(max(0, min(40.0, base + random.gauss(0, noise_std))), 2)"
+            }
+        },
+        {
+            "name": "vibration_x",
+            "address": "23",
+            "data_type": "float32",
+            "unit": "m/s²",
+            "description": "X轴振动加速度，半精铣切深中等，振动幅值中等",
+            "access": "r",
+            "generator_type": "script",
+            "min_value": 0.05,
+            "max_value": 2.0,
+            "generator_config": {
+                "script": "elapsed = context['elapsed']; t = elapsed % 150\nif 28 <= t < 128:\n    base = 0.35 + 0.2 * math.sin(2 * math.pi * elapsed / 55 + 0.8)\nelse:\n    base = 0.08\nnoise = random.uniform(-0.08, 0.08)\nresult = round(max(0.05, base + noise), 3)"
+            }
+        },
+        {
+            "name": "vibration_y",
+            "address": "25",
+            "data_type": "float32",
+            "unit": "m/s²",
+            "description": "Y轴振动加速度，半精铣切深中等，振动幅值中等",
+            "access": "r",
+            "generator_type": "script",
+            "min_value": 0.05,
+            "max_value": 2.0,
+            "generator_config": {
+                "script": "elapsed = context['elapsed']; t = elapsed % 150\nif 28 <= t < 128:\n    base = 0.35 + 0.2 * math.sin(2 * math.pi * elapsed / 45 + 1.8)\nelse:\n    base = 0.08\nnoise = random.uniform(-0.08, 0.08)\nresult = round(max(0.05, base + noise), 3)"
+            }
+        },
+        {
+            "name": "vibration_z",
+            "address": "27",
+            "data_type": "float32",
+            "unit": "m/s²",
+            "description": "Z轴振动加速度，半精铣切深中等，振动幅值中等",
+            "access": "r",
+            "generator_type": "script",
+            "min_value": 0.05,
+            "max_value": 2.0,
+            "generator_config": {
+                "script": "elapsed = context['elapsed']; t = elapsed % 150\nif 28 <= t < 128:\n    base = 0.4 + 0.22 * math.sin(2 * math.pi * elapsed / 38 + 2.8)\nelse:\n    base = 0.1\nnoise = random.uniform(-0.1, 0.1)\nresult = round(max(0.05, base + noise), 3)"
+            }
+        },
+        {
+            "name": "spindle_load",
+            "address": "29",
+            "data_type": "float32",
+            "unit": "%",
+            "description": "半精铣主轴负载率，稳定阶段约30~45%，切入瞬间约48%，短时允许到120%",
+            "access": "r",
+            "generator_type": "script",
+            "min_value": 0.0,
+            "max_value": 120.0,
+            "generator_config": {
+                "script": "elapsed = context['elapsed']; t = elapsed % 150\nif t < 15:\n    base, noise_std = 0.0, 0.0\nelif t < 28:\n    base, noise_std = 6.0, 1.5\nelif t < 38:\n    base, noise_std = 48.0, 3.0\nelif t < 115:\n    base, noise_std = 38.0, 2.5\nelif t < 128:\n    base, noise_std = 6.0, 1.5\nelif t < 138:\n    base, noise_std = 2.5, 0.8\nelse:\n    base, noise_std = 0.0, 0.0\nnoise = random.gauss(0, noise_std) if noise_std > 0 else 0\nresult = round(max(0, min(120.0, base + noise)), 2)"
+            }
+        },
+        {
+            "name": "spindle_override",
+            "address": "3",
+            "data_type": "uint16",
+            "unit": "%",
+            "description": "主轴倍率",
+            "access": "r",
+            "generator_type": "fixed",
+            "fixed_value": 100
+        },
+        {
+            "name": "feed_override",
+            "address": "4",
+            "data_type": "uint16",
+            "unit": "%",
+            "description": "进给倍率",
+            "access": "r",
+            "generator_type": "fixed",
+            "fixed_value": 100
+        },
+        {
+            "name": "running_mode",
+            "address": "5",
+            "data_type": "uint16",
+            "description": "运行模式(1=MDI 2=AUTO 3=JOG 4=EDIT 5=HANDLE)",
+            "access": "r",
+            "generator_type": "fixed",
+            "fixed_value": 2
+        },
+        {
+            "name": "exec_status",
+            "address": "6",
+            "data_type": "uint16",
+            "description": "执行状态(0=空闲 1=运行 2=暂停 3=M00停 4=M01停)",
+            "access": "r",
+            "generator_type": "fixed",
+            "fixed_value": 1
+        },
+        {
+            "name": "tool_no",
+            "address": "7",
+            "data_type": "uint16",
+            "description": "当前刀具号，半精铣工位固定使用T2",
+            "access": "r",
+            "generator_type": "fixed",
+            "min_value": 1,
+            "max_value": 12,
+            "fixed_value": 2
+        },
+        {
+            "name": "abs_x",
+            "address": "8",
+            "data_type": "float32",
+            "unit": "mm",
+            "description": "X轴绝对坐标",
+            "access": "r",
+            "generator_type": "random",
+            "min_value": -800.0,
+            "max_value": 800.0
+        },
+        {
+            "name": "abs_y",
+            "address": "10",
+            "data_type": "float32",
+            "unit": "mm",
+            "description": "Y轴绝对坐标",
+            "access": "r",
+            "generator_type": "random",
+            "min_value": -500.0,
+            "max_value": 500.0
+        },
+        {
+            "name": "abs_z",
+            "address": "12",
+            "data_type": "float32",
+            "unit": "mm",
+            "description": "Z轴绝对坐标",
+            "access": "r",
+            "generator_type": "random",
+            "min_value": -600.0,
+            "max_value": 200.0
+        },
+        {
+            "name": "mach_x",
+            "address": "14",
+            "data_type": "float32",
+            "unit": "mm",
+            "description": "X轴机械坐标",
+            "access": "r",
+            "generator_type": "random",
+            "min_value": -800.0,
+            "max_value": 800.0
+        },
+        {
+            "name": "mach_y",
+            "address": "16",
+            "data_type": "float32",
+            "unit": "mm",
+            "description": "Y轴机械坐标",
+            "access": "r",
+            "generator_type": "random",
+            "min_value": -500.0,
+            "max_value": 500.0
+        },
+        {
+            "name": "mach_z",
+            "address": "18",
+            "data_type": "float32",
+            "unit": "mm",
+            "description": "Z轴机械坐标",
+            "access": "r",
+            "generator_type": "random",
+            "min_value": -600.0,
+            "max_value": 200.0
+        },
+        {
+            "name": "part_count",
+            "address": "20",
+            "data_type": "uint16",
+            "description": "加工计数",
+            "access": "r",
+            "generator_type": "script",
+            "min_value": 0,
+            "max_value": 99999,
+            "generator_config": {
+                "script": "elapsed = context['elapsed']; result = min(int(elapsed / 150), 99999)"
+            }
+        },
+        {
+            "name": "cycle_time",
+            "address": "21",
+            "data_type": "uint16",
+            "unit": "s",
+            "description": "循环时间，半精铣单件约150s",
+            "access": "r",
+            "generator_type": "fixed",
+            "fixed_value": 150
+        },
+        {
+            "name": "alarm_no",
+            "address": "22",
+            "data_type": "uint16",
+            "description": "报警号(0=无报警)",
+            "access": "r",
+            "generator_type": "fixed",
+            "fixed_value": 0
+        }
+    ],
+    "protocol_config": {
+        "slave_id": 7
+    },
+    "tags": ["CNC", "FANUC", "数控", "机床", "发那科", "半精铣"]
+}

From 736baadcb2c8073cff474fcad2e2dd6c6662abb9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E5=BC=A0=E5=B0=91=E6=99=BA?=
 <zhangshaozhi@bailongma-inc.com>
Date: Wed, 27 May 2026 13:53:54 +0800
Subject: [PATCH 34/55] fix(scene): update scene cnc

---
 protoforge/templates/modbus/fanuc_cnc.json | 34 +++++++++++-----------
 1 file changed, 17 insertions(+), 17 deletions(-)

diff --git a/protoforge/templates/modbus/fanuc_cnc.json b/protoforge/templates/modbus/fanuc_cnc.json
index 3ff30df..ebbc8f5 100644
--- a/protoforge/templates/modbus/fanuc_cnc.json
+++ b/protoforge/templates/modbus/fanuc_cnc.json
@@ -11,13 +11,13 @@
             "address": "0",
             "data_type": "uint16",
             "unit": "RPM",
-            "description": "主轴实际转速，粗加工工位：空闲为0，启动后稳定在约2000RPM",
+            "description": "主轴实际转速，粗加工工位：空闲为0，启动后稳定在约2000RPM，每件节拍有随机差异",
             "access": "r",
             "generator_type": "script",
             "min_value": 0,
             "max_value": 8000,
             "generator_config": {
-                "script": "elapsed = context['elapsed']; t = elapsed % 180\nif t < 20:\n    target = 0\nelif t < 30:\n    target = 2000 * ((t - 20) / 10.0)\nelif t < 155:\n    target = 2000\nelif t < 165:\n    target = 2000 * (1 - ((t - 155) / 10.0))\nelse:\n    target = 0\nnoise = random.gauss(0, 15) if target > 100 else 0\nresult = int(round(max(0, min(8000, target + noise))))"
+                "script": "elapsed = context['elapsed']\ncycle_idx = int(elapsed // 180)\nck = f'spd_{cycle_idx}'\nif ck not in cache:\n    rng = random.Random(cycle_idx * 7 + 1)\n    cache[ck] = {'dur': rng.randint(-8, 10), 'spd': rng.uniform(-40, 40)}\noff = cache[ck]\nperiod = 180 + off['dur']; t = elapsed % period\nramp = 8; cut_end = period - 25 - ramp\nif t < 20:\n    target = 0\nelif t < 20 + ramp:\n    target = (2000 + off['spd']) * ((t - 20) / ramp)\nelif t < cut_end:\n    target = 2000 + off['spd']\nelif t < cut_end + ramp:\n    target = (2000 + off['spd']) * (1 - (t - cut_end) / ramp)\nelse:\n    target = 0\nnoise = random.gauss(0, 12) if target > 100 else 0\nresult = int(round(max(0, min(8000, target + noise))))"
             }
         },
         {
@@ -25,13 +25,13 @@
             "address": "1",
             "data_type": "float32",
             "unit": "mm/min",
-            "description": "粗加工实际进给速度：切入阶段中低速，稳定粗加工约800mm/min，空闲和主轴启动阶段为0",
+            "description": "粗加工实际进给速度：切入爬升，稳定粗铣约800mm/min含拐角减速扰动，退出降速，空闲为0",
             "access": "r",
             "generator_type": "script",
             "min_value": 0,
             "max_value": 5000,
             "generator_config": {
-                "script": "elapsed = context['elapsed']; t = elapsed % 180\nif t < 30:\n    target = 0\nelif t < 40:\n    target = 500 + 300 * ((t - 30) / 10.0)\nelif t < 140:\n    target = 800\nelif t < 155:\n    target = 400\nelse:\n    target = 0\nnoise = random.gauss(0, 25) if target > 0 else 0\nresult = round(max(0, min(5000, target + noise)), 1)"
+                "script": "elapsed = context['elapsed']\ncycle_idx = int(elapsed // 180)\nck = f'feed_{cycle_idx}'\nif ck not in cache:\n    rng = random.Random(cycle_idx * 7 + 2)\n    cache[ck] = {'dur': rng.randint(-8, 10), 'feed': rng.uniform(-30, 50)}\noff = cache[ck]\nperiod = 180 + off['dur']; t = elapsed % period\ncut_end = period - 25 - 8\nif t < 30:\n    target = 0\nelif t < 40:\n    target = 400 + 400 * ((t - 30) / 10.0)\nelif t < cut_end:\n    base_feed = 800 + off['feed']\n    corner = 80 * math.sin(2 * math.pi * elapsed / 23.7) * max(0, math.sin(2 * math.pi * elapsed / 41.3))\n    target = base_feed + corner\nelif t < cut_end + 12:\n    target = 350\nelse:\n    target = 0\nnoise = random.gauss(0, 18) if target > 0 else 0\nresult = round(max(0, min(5000, target + noise)), 1)"
             }
         },
         {
@@ -39,13 +39,13 @@
             "address": "2",
             "data_type": "float32",
             "unit": "A",
-            "description": "粗加工主轴电流：空闲低电流，启动阶段中等电流，切入瞬间升高，稳定粗加工约19~24A",
+            "description": "粗加工主轴电流：空闲约2A，启动约6A，切入峰值约23A，稳定粗铣约19~24A含细碎波动，退刀降低",
             "access": "r",
             "generator_type": "script",
             "min_value": 0.0,
             "max_value": 40.0,
             "generator_config": {
-                "script": "elapsed = context['elapsed']; t = elapsed % 180\nif t < 20:\n    base, noise_std = 2.2, 0.25\nelif t < 30:\n    base, noise_std = 6.0, 0.8\nelif t < 40:\n    base, noise_std = 23.5, 1.5\nelif t < 140:\n    base, noise_std = 21.5, 1.3\nelif t < 155:\n    base, noise_std = 6.0, 0.8\nelif t < 165:\n    base, noise_std = 4.0, 0.5\nelse:\n    base, noise_std = 2.2, 0.25\nresult = round(max(0, min(40.0, base + random.gauss(0, noise_std))), 2)"
+                "script": "elapsed = context['elapsed']\ncycle_idx = int(elapsed // 180)\nck = f'cur_{cycle_idx}'\nif ck not in cache:\n    rng = random.Random(cycle_idx * 7 + 3)\n    cache[ck] = {'dur': rng.randint(-8, 10), 'cur': rng.uniform(-1.5, 2.0)}\noff = cache[ck]\nperiod = 180 + off['dur']; t = elapsed % period\ncut_end = period - 25 - 8\nif t < 20:\n    base, noise_std = 2.2, 0.2\nelif t < 20 + 8:\n    base, noise_std = 6.0, 0.7\nelif t < 40:\n    base, noise_std = 23.5 + off['cur'], 1.8\nelif t < cut_end:\n    drift = 0.8 * math.sin(2 * math.pi * elapsed / 37.4) + 0.5 * math.sin(2 * math.pi * elapsed / 19.1)\n    base, noise_std = 21.5 + off['cur'] + drift, 1.0\nelif t < cut_end + 12:\n    base, noise_std = 6.0, 0.7\nelif t < cut_end + 20:\n    base, noise_std = 3.5, 0.4\nelse:\n    base, noise_std = 2.2, 0.2\nresult = round(max(0, min(40.0, base + random.gauss(0, noise_std))), 2)"
             }
         },
         {
@@ -53,13 +53,13 @@
             "address": "23",
             "data_type": "float32",
             "unit": "m/s²",
-            "description": "X轴振动加速度",
+            "description": "X轴振动加速度，粗铣切削时约0.4~0.9m/s²，空闲时接近0",
             "access": "r",
             "generator_type": "script",
-            "min_value": 0.1,
+            "min_value": 0.0,
             "max_value": 2.5,
             "generator_config": {
-                "script": "elapsed = context['elapsed']; base = 0.5 + 0.3 * math.sin(2 * math.pi * elapsed / 85 + 0.8); noise = random.uniform(-0.15, 0.15); result = round(max(0.1, base + noise), 3)"
+                "script": "elapsed = context['elapsed']\ncycle_idx = int(elapsed // 180)\nck = f'vx_{cycle_idx}'\nif ck not in cache:\n    rng = random.Random(cycle_idx * 7 + 4)\n    cache[ck] = {'dur': rng.randint(-8, 10)}\nperiod = 180 + cache[ck]['dur']; t = elapsed % period\ncut_end = period - 25 - 8\ncutting = 40 <= t < cut_end\nif cutting:\n    base = 0.55 + 0.22 * math.sin(2 * math.pi * elapsed / 85 + 0.8) + 0.1 * math.sin(2 * math.pi * elapsed / 17.3)\n    noise = random.uniform(-0.12, 0.12)\nelse:\n    base = 0.04\n    noise = random.uniform(-0.02, 0.02)\nresult = round(max(0.0, base + noise), 3)"
             }
         },
         {
@@ -67,13 +67,13 @@
             "address": "25",
             "data_type": "float32",
             "unit": "m/s²",
-            "description": "Y轴振动加速度",
+            "description": "Y轴振动加速度，粗铣切削时约0.4~0.9m/s²，空闲时接近0",
             "access": "r",
             "generator_type": "script",
-            "min_value": 0.1,
+            "min_value": 0.0,
             "max_value": 2.5,
             "generator_config": {
-                "script": "elapsed = context['elapsed']; base = 0.5 + 0.3 * math.sin(2 * math.pi * elapsed / 70 + 1.8); noise = random.uniform(-0.15, 0.15); result = round(max(0.1, base + noise), 3)"
+                "script": "elapsed = context['elapsed']\ncycle_idx = int(elapsed // 180)\nck = f'vy_{cycle_idx}'\nif ck not in cache:\n    rng = random.Random(cycle_idx * 7 + 5)\n    cache[ck] = {'dur': rng.randint(-8, 10)}\nperiod = 180 + cache[ck]['dur']; t = elapsed % period\ncut_end = period - 25 - 8\ncutting = 40 <= t < cut_end\nif cutting:\n    base = 0.5 + 0.2 * math.sin(2 * math.pi * elapsed / 70 + 1.8) + 0.08 * math.sin(2 * math.pi * elapsed / 13.7)\n    noise = random.uniform(-0.1, 0.1)\nelse:\n    base = 0.04\n    noise = random.uniform(-0.02, 0.02)\nresult = round(max(0.0, base + noise), 3)"
             }
         },
         {
@@ -81,13 +81,13 @@
             "address": "27",
             "data_type": "float32",
             "unit": "m/s²",
-            "description": "Z轴振动加速度",
+            "description": "Z轴振动加速度，粗铣切削时约0.5~1.1m/s²（Z向切深方向幅值略大），空闲时接近0",
             "access": "r",
             "generator_type": "script",
-            "min_value": 0.1,
+            "min_value": 0.0,
             "max_value": 3.0,
             "generator_config": {
-                "script": "elapsed = context['elapsed']; base = 0.7 + 0.4 * math.sin(2 * math.pi * elapsed / 58 + 2.8); noise = random.uniform(-0.2, 0.2); result = round(max(0.1, base + noise), 3)"
+                "script": "elapsed = context['elapsed']\ncycle_idx = int(elapsed // 180)\nck = f'vz_{cycle_idx}'\nif ck not in cache:\n    rng = random.Random(cycle_idx * 7 + 6)\n    cache[ck] = {'dur': rng.randint(-8, 10)}\nperiod = 180 + cache[ck]['dur']; t = elapsed % period\ncut_end = period - 25 - 8\ncutting = 40 <= t < cut_end\nif cutting:\n    base = 0.65 + 0.28 * math.sin(2 * math.pi * elapsed / 58 + 2.8) + 0.12 * math.sin(2 * math.pi * elapsed / 11.2)\n    noise = random.uniform(-0.15, 0.15)\nelse:\n    base = 0.05\n    noise = random.uniform(-0.02, 0.02)\nresult = round(max(0.0, base + noise), 3)"
             }
         },
         {
@@ -95,13 +95,13 @@
             "address": "29",
             "data_type": "float32",
             "unit": "%",
-            "description": "粗加工主轴负载率，常态0~100%，短时允许到120%；粗加工稳定阶段约45~65%，切入瞬间可能更高",
+            "description": "粗加工主轴负载率，稳定粗铣约48~68%含刀路扰动，切入瞬间约65~75%，空闲接近0，短时允许到120%",
             "access": "r",
             "generator_type": "script",
             "min_value": 0.0,
             "max_value": 120.0,
             "generator_config": {
-                "script": "elapsed = context['elapsed']; t = elapsed % 180\nif t < 20:\n    base, noise_std = 0.0, 0.0\nelif t < 30:\n    base, noise_std = 8.0, 2.0\nelif t < 40:\n    base, noise_std = 65.0, 4.0\nelif t < 140:\n    base, noise_std = 55.0, 4.0\nelif t < 155:\n    base, noise_std = 8.0, 2.0\nelif t < 165:\n    base, noise_std = 3.0, 1.0\nelse:\n    base, noise_std = 0.0, 0.0\nnoise = random.gauss(0, noise_std) if noise_std > 0 else 0\nresult = round(max(0, min(120.0, base + noise)), 2)"
+                "script": "elapsed = context['elapsed']\ncycle_idx = int(elapsed // 180)\nck = f'load_{cycle_idx}'\nif ck not in cache:\n    rng = random.Random(cycle_idx * 7 + 7)\n    cache[ck] = {'dur': rng.randint(-8, 10), 'load': rng.uniform(-4.0, 6.0)}\noff = cache[ck]\nperiod = 180 + off['dur']; t = elapsed % period\ncut_end = period - 25 - 8\nif t < 20:\n    base, noise_std = 0.0, 0.0\nelif t < 20 + 8:\n    base, noise_std = 6.0, 1.5\nelif t < 40:\n    base, noise_std = 68.0 + off['load'], 3.5\nelif t < cut_end:\n    drift = 4.5 * math.sin(2 * math.pi * elapsed / 37.4) + 2.5 * math.sin(2 * math.pi * elapsed / 19.1) + 1.5 * math.sin(2 * math.pi * elapsed / 7.3)\n    base, noise_std = 56.0 + off['load'] + drift, 2.5\nelif t < cut_end + 12:\n    base, noise_std = 6.0, 1.5\nelif t < cut_end + 20:\n    base, noise_std = 2.0, 0.8\nelse:\n    base, noise_std = 0.0, 0.0\nnoise = random.gauss(0, noise_std) if noise_std > 0 else 0\nresult = round(max(0, min(120.0, base + noise)), 2)"
             }
         },
         {

From c9269967283d9cbe7b3f6b384ead70c7ccf2fab7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E5=BC=A0=E5=B0=91=E6=99=BA?=
 <zhangshaozhi@bailongma-inc.com>
Date: Thu, 28 May 2026 13:44:34 +0800
Subject: [PATCH 35/55] fix(fault): update fault

---
 ai/predictor/README.md     |  27 +++
 ai/predictor/__init__.py   |  30 +++
 ai/predictor/anomaly.py    | 242 ++++++++++++++++++++
 ai/predictor/config.py     | 186 ++++++++++++++++
 ai/predictor/discovery.py  |  84 +++++++
 ai/predictor/models.py     | 114 ++++++++++
 ai/predictor/phase_lock.py | 152 +++++++++++++
 ai/predictor/profiling.py  | 256 +++++++++++++++++++++
 ai/predictor/service.py    | 333 ++++++++++++++++++++++++++++
 ai/predictor/signal.py     | 335 ++++++++++++++++++++++++++++
 ai/predictor/state.py      | 328 +++++++++++++++++++++++++++
 ai/predictor/storage.py    | 438 ++++++++++++++++++++++++++++++++++++
 ai/predictor/template.py   | 384 ++++++++++++++++++++++++++++++++
 ai/pridict_v5.py           | 442 ++++++++++++++++++++++++++-----------
 protoforge/core/fault.py   |  71 +++++-
 15 files changed, 3282 insertions(+), 140 deletions(-)
 create mode 100644 ai/predictor/README.md
 create mode 100644 ai/predictor/__init__.py
 create mode 100644 ai/predictor/anomaly.py
 create mode 100644 ai/predictor/config.py
 create mode 100644 ai/predictor/discovery.py
 create mode 100644 ai/predictor/models.py
 create mode 100644 ai/predictor/phase_lock.py
 create mode 100644 ai/predictor/profiling.py
 create mode 100644 ai/predictor/service.py
 create mode 100644 ai/predictor/signal.py
 create mode 100644 ai/predictor/state.py
 create mode 100644 ai/predictor/storage.py
 create mode 100644 ai/predictor/template.py

diff --git a/ai/predictor/README.md b/ai/predictor/README.md
new file mode 100644
index 0000000..8fcc022
--- /dev/null
+++ b/ai/predictor/README.md
@@ -0,0 +1,27 @@
+```bash
+ai/predictor/
+  ├── __init__.py      # 公开 API：PredictorService, run()
+  ├── config.py        # 所有常量，支持环境变量覆盖
+  ├── models.py        # BaselineState, MetricProfile 数据类
+  ├── discovery.py     # VM 设备/指标发现
+  ├── signal.py        # 纯信号处理：平滑、FFT+自相关周期估计、谷底检测
+  ├── template.py      # 模板构建、预测、重采样、EMA 融合
+  ├── phase_lock.py    # Phase-lock 相位对齐
+  ├── anomaly.py       # 异常检测：边界计算、越界统计、三条件判断
+  ├── state.py         # 状态机：HEALTHY/ANOMALY/RECOVERING 生命周期
+  ├── profiling.py     # 自适应配置推断：infer_metric_profile, refresh_targets
+  ├── storage.py       # VM 读写、标签工具、状态持久化
+  └── service.py       # PredictorService 主类（run_once / run）
+
+  启动方式：
+  from ai.predictor import run
+  run()
+  # 或
+  from ai.predictor import PredictorService
+  PredictorService(vm_url="http://vm:8428").run()
+
+  主要改进：
+  - 全局变量（BASELINE_STATES、LAST_REAL_TS_WRITTEN、_TARGETS_CACHE）全部移入 PredictorService 实例属性
+  - IO 与计算完全分离：signal.py、template.py、anomaly.py 均为纯函数，无网络请求
+  - 每个模块顶部有职责说明，每个公开函数有完整 docstring
+```
diff --git a/ai/predictor/__init__.py b/ai/predictor/__init__.py
new file mode 100644
index 0000000..9e45810
--- /dev/null
+++ b/ai/predictor/__init__.py
@@ -0,0 +1,30 @@
+# -*- coding: utf-8 -*-
+"""
+predictor
+~~~~~~~~~
+ProtoForge 预测服务 package。
+
+对外暴露：
+- ``PredictorService``：预测服务主类，支持 run() 一键启动
+- ``run()``：便捷入口，使用默认配置启动服务
+
+快速启动::
+
+    from ai.predictor import run
+    run()
+
+或自定义配置::
+
+    from ai.predictor import PredictorService
+    svc = PredictorService(vm_url="http://vm:8428", poll_interval=60)
+    svc.run()
+"""
+
+from .service import PredictorService
+
+__all__ = ["PredictorService", "run"]
+
+
+def run() -> None:
+    """使用默认配置启动预测服务（一行启动）。"""
+    PredictorService().run()
diff --git a/ai/predictor/anomaly.py b/ai/predictor/anomaly.py
new file mode 100644
index 0000000..779d38e
--- /dev/null
+++ b/ai/predictor/anomaly.py
@@ -0,0 +1,242 @@
+# -*- coding: utf-8 -*-
+"""
+predictor.anomaly
+~~~~~~~~~~~~~~~~~
+异常检测：判断当前信号是否偏离健康基线。
+
+职责：
+- 计算预测边界（phase_point 用对称阈值，phase_band 用分位数带）
+- 应用物理上下限兜底（来自 override 文件）
+- 统计越界比例、连续越界秒数、最大越界倍数
+- 综合三个条件判断是否触发异常
+
+依赖：predictor.phase_lock, predictor.template, predictor.config, predictor.models
+"""
+
+from typing import Dict, Tuple
+
+import numpy as np
+
+from . import config
+from .models import BaselineState
+from .phase_lock import phase_lock_recent
+from .template import predict_state_bundle
+
+
+def max_consecutive_true(flags: np.ndarray) -> int:
+    """
+    计算布尔数组中最长连续 True 的长度。
+
+    用于统计最长连续越界秒数，是异常判断的条件之一。
+
+    Args:
+        flags: 布尔数组（True 表示该点越界）
+
+    Returns:
+        最长连续 True 的长度（整数）。
+    """
+    max_count = 0
+    current = 0
+    for flag in flags:
+        if bool(flag):
+            current += 1
+            max_count = max(max_count, current)
+        else:
+            current = 0
+    return int(max_count)
+
+
+def calc_point_bounds(
+    pred: np.ndarray,
+    abs_threshold: float,
+    rel_threshold: float,
+) -> Tuple[np.ndarray, np.ndarray]:
+    """
+    计算 phase_point 策略的对称预测边界。
+
+    边界宽度 = max(abs_threshold, |pred| * rel_threshold)，
+    取两者较大值，保证在小值区域有最小绝对容忍度。
+
+    Args:
+        pred: 预测中值数组
+        abs_threshold: 绝对误差阈值
+        rel_threshold: 相对误差阈值（相对于预测值的比例）
+
+    Returns:
+        (lower, upper) 边界数组对。
+    """
+    threshold = np.maximum(abs_threshold, np.abs(pred) * rel_threshold)
+    return pred - threshold, pred + threshold
+
+
+def calc_final_bounds(
+    state: BaselineState,
+    pred: np.ndarray,
+    lower_raw: np.ndarray,
+    upper_raw: np.ndarray,
+    target: Dict,
+) -> Tuple[np.ndarray, np.ndarray]:
+    """
+    计算最终预测边界，综合策略、动态填充和物理上下限。
+
+    phase_band 策略：
+        在分位数模板边界基础上，叠加动态填充（band_pad_abs 和相对填充取较大值），
+        覆盖正常的尖峰波动，避免误报。
+
+    phase_point 策略：
+        直接用对称阈值计算边界。
+
+    物理上下限（可选）：
+        来自 override 文件的 hard_max / hard_min，对边界做最终 clip。
+
+    Args:
+        state: 当前基线状态（提供策略信息）
+        pred: 预测中值数组
+        lower_raw: 模板下界数组（phase_band 为分位数，phase_point 等于 pred）
+        upper_raw: 模板上界数组
+        target: target dict，包含阈值和物理上下限配置
+
+    Returns:
+        (lower, upper) 最终边界数组对。
+    """
+    strategy = target.get("strategy", "phase_point")
+    abs_threshold = float(target.get("abs_threshold", 1.0))
+    rel_threshold = float(target.get("rel_threshold", 0.25))
+
+    if strategy == "phase_band":
+        pad_abs = float(target.get("band_pad_abs", abs_threshold))
+        # 动态填充：取绝对填充和相对填充（预测值的 25% * rel_threshold）的较大值
+        dynamic_pad = np.maximum(pad_abs, np.abs(pred) * rel_threshold * 0.25)
+        lower = lower_raw - dynamic_pad
+        upper = upper_raw + dynamic_pad
+    else:
+        lower, upper = calc_point_bounds(pred, abs_threshold, rel_threshold)
+
+    # 物理上下限兜底（来自 override 文件，可选）
+    hard_max = target.get("hard_max")
+    hard_min = target.get("hard_min")
+    if hard_max is not None:
+        upper = np.minimum(upper, float(hard_max))
+    if hard_min is not None:
+        lower = np.maximum(lower, float(hard_min))
+
+    return lower, upper
+
+
+def detect_anomaly(
+    state: BaselineState,
+    ts_grid: np.ndarray,
+    ys_model: np.ndarray,
+    ys_actual: np.ndarray,
+    target: Dict,
+) -> Tuple[bool, float, float, float, int, int, int, float]:
+    """
+    检测当前信号是否偏离健康基线，返回完整的诊断指标。
+
+    流程：
+    1. phase-lock 对齐：在最近窗口内找最优 (period, origin)
+    2. 用对齐后的参数预测最近窗口的值
+    3. 计算越界统计量
+    4. 按三个条件判断是否异常：
+       - 越界比例 >= outside_ratio_threshold
+       - 连续越界秒数 >= min_consecutive_outside
+       - 最大越界倍数 >= severe_exceed_ratio（单点严重越界立即报警）
+
+    Args:
+        state: 当前基线状态
+        ts_grid: 均匀 1 秒网格的时间戳数组
+        ys_model: 平滑后的信号（phase_point 用于比较）
+        ys_actual: 原始信号（phase_band 用于比较）
+        target: target dict，包含阈值配置
+
+    Returns:
+        (is_anomaly, outside_ratio, mean_abs_err, mean_rel_err,
+         best_period, best_origin, max_outside_seconds, max_exceed_ratio)
+    """
+    best_period, best_origin, pred_recent, _ = phase_lock_recent(
+        state=state,
+        ts_grid=ts_grid,
+        ys_model=ys_model,
+        target=target,
+    )
+
+    recent_len = len(pred_recent)
+    if recent_len <= 0:
+        return False, 0.0, 0.0, 0.0, best_period, best_origin, 0, 0.0
+
+    # phase_band 用原始信号比较（保留真实波动），phase_point 用平滑信号
+    if target.get("strategy", "phase_point") == "phase_band":
+        actual = ys_actual[-recent_len:].astype(float)
+    else:
+        actual = ys_model[-recent_len:].astype(float)
+
+    # 用 phase-lock 后的最优参数重新预测（临时 state，不修改原始 state）
+    tmp_state = BaselineState(
+        period=best_period,
+        phase_origin_ts=best_origin,
+        template=state.template,
+        lower_template=state.lower_template,
+        upper_template=state.upper_template,
+        strategy=state.strategy,
+        status=state.status,
+        clean_seconds=state.clean_seconds,
+        last_update_ts=state.last_update_ts,
+        last_seen_ts=state.last_seen_ts,
+        y_min=state.y_min,
+        y_max=state.y_max,
+    )
+
+    recent_ts = ts_grid[-recent_len:].astype(int).tolist()
+    pred, lower_raw, upper_raw = predict_state_bundle(tmp_state, recent_ts)
+
+    lower, upper = calc_final_bounds(
+        state=tmp_state,
+        pred=pred,
+        lower_raw=lower_raw,
+        upper_raw=upper_raw,
+        target=target,
+    )
+
+    # 计算越界量（负值表示在边界内，clip 到 0）
+    above_upper = actual - upper
+    below_lower = lower - actual
+    exceed = np.maximum(np.maximum(above_upper, below_lower), 0.0)
+    outside = exceed > 0
+
+    band_width = np.maximum(upper - lower, 1e-6)
+    exceed_ratio = exceed / band_width  # 越界量相对于边界宽度的倍数
+
+    abs_err = np.abs(actual - pred)
+    outside_ratio = float(np.mean(outside))
+    mean_abs_err = float(np.mean(abs_err))
+    mean_rel_err = float(np.mean(abs_err / np.maximum(np.abs(pred), 1e-6)))
+    max_outside_seconds = max_consecutive_true(outside)
+    max_exceed_ratio = float(np.max(exceed_ratio)) if len(exceed_ratio) > 0 else 0.0
+
+    # 从 target 读取阈值，允许每个指标独立配置
+    outside_ratio_threshold = float(
+        target.get("outside_ratio_threshold", config.OUTSIDE_RATIO_THRESHOLD)
+    )
+    min_consecutive_outside = int(
+        target.get("min_consecutive_outside", config.MIN_CONSECUTIVE_OUTSIDE)
+    )
+    severe_exceed_ratio = float(
+        target.get("severe_exceed_ratio", config.SEVERE_EXCEED_RATIO)
+    )
+
+    is_anomaly = (
+        outside_ratio >= outside_ratio_threshold
+        or max_outside_seconds >= min_consecutive_outside
+        or max_exceed_ratio >= severe_exceed_ratio
+    )
+
+    return (
+        is_anomaly,
+        outside_ratio,
+        mean_abs_err,
+        mean_rel_err,
+        int(best_period),
+        int(best_origin),
+        int(max_outside_seconds),
+        float(max_exceed_ratio),
+    )
diff --git a/ai/predictor/config.py b/ai/predictor/config.py
new file mode 100644
index 0000000..ed5c11b
--- /dev/null
+++ b/ai/predictor/config.py
@@ -0,0 +1,186 @@
+# -*- coding: utf-8 -*-
+"""
+predictor.config
+~~~~~~~~~~~~~~~~
+所有运行时配置常量，集中在此处管理。
+
+大部分参数支持通过环境变量覆盖，方便容器化部署时无需修改代码。
+环境变量前缀统一为 ``PROTOFORGE_``。
+"""
+
+import os
+from typing import List
+
+# ---------------------------------------------------------------------------
+# VictoriaMetrics 连接
+# ---------------------------------------------------------------------------
+
+#: VM HTTP 地址，默认本机
+VM_URL: str = os.environ.get("PROTOFORGE_VM_URL", "http://localhost:8428")
+
+# ---------------------------------------------------------------------------
+# 状态持久化
+# ---------------------------------------------------------------------------
+
+#: 健康模板状态文件路径（JSON），重启后可恢复学习进度
+STATE_FILE: str = os.environ.get(
+    "PROTOFORGE_STATE_FILE",
+    "/tmp/protoforge_predictor_state_v14.json",
+)
+
+# ---------------------------------------------------------------------------
+# 轮询与预测时间窗口
+# ---------------------------------------------------------------------------
+
+#: 拉取历史数据的时间窗口（分钟）
+HISTORY_MINUTES: int = int(os.environ.get("PROTOFORGE_HISTORY_MINUTES", "30"))
+
+#: 理论预测时间跨度（秒）
+HORIZON_SECONDS: int = int(os.environ.get("PROTOFORGE_HORIZON_SECONDS", "120"))
+
+#: 轮询间隔（秒）
+POLL_INTERVAL: int = int(os.environ.get("PROTOFORGE_POLL_INTERVAL", "30"))
+
+#: 实际写入 VM 的预测点数 = min(HORIZON_SECONDS, POLL_INTERVAL)
+#: 避免写入过多未来点导致 Grafana 图表出现"预测跳跃"
+WRITE_HORIZON_SECONDS: int = min(HORIZON_SECONDS, POLL_INTERVAL)
+
+#: VM 查询步长
+QUERY_STEP: str = "1s"
+
+#: 最少需要多少个历史点才能开始建模
+MIN_POINTS: int = 120
+
+# ---------------------------------------------------------------------------
+# 周期检测范围
+# ---------------------------------------------------------------------------
+
+#: 允许的最短周期（秒）
+MIN_PERIOD_SECONDS: int = 5
+
+#: 允许的最长周期（秒）
+MAX_PERIOD_SECONDS: int = 3600
+
+# ---------------------------------------------------------------------------
+# 模板学习参数
+# ---------------------------------------------------------------------------
+
+#: 构建模板至少需要多少个完整周期
+MIN_FULL_CYCLES_FOR_TEMPLATE: int = 3
+
+#: 最多使用最近多少个周期来构建模板（防止过旧数据污染）
+MAX_CYCLES_FOR_TEMPLATE: int = 8
+
+#: 谷底检测时，低于此百分位的点才被视为谷底候选
+VALLEY_QUANTILE: int = 45
+
+#: 健康状态下模板 EMA 更新步长（越小越保守，变化越慢）
+HEALTHY_EMA_ALPHA: float = 0.10
+
+#: 恢复状态下模板 EMA 更新步长（比健康状态更激进，加速追赶）
+RECOVERY_EMA_ALPHA: float = 0.25
+
+# ---------------------------------------------------------------------------
+# 异常检测默认阈值
+# ---------------------------------------------------------------------------
+
+#: 检测窗口（秒）：只看最近这段时间的数据来判断是否异常
+DETECT_WINDOW_SECONDS: int = 30
+
+#: 恢复期最短持续时间（秒）：异常消失后至少稳定这么久才恢复学习
+RECOVERY_MIN_SECONDS: int = 60
+
+#: 越界比例阈值：窗口内超过此比例的点越界则报警
+OUTSIDE_RATIO_THRESHOLD: float = 0.60
+
+#: 连续越界阈值（秒）：连续越界超过此秒数则报警
+MIN_CONSECUTIVE_OUTSIDE: int = 5
+
+#: 严重越界倍数：单点超出边界宽度的此倍数则立即报警
+SEVERE_EXCEED_RATIO: float = 1.8
+
+#: 真实数据最大允许延迟（秒）：超过此值认为数据管道异常，跳过预测
+MAX_DATA_LAG_SECONDS: int = 180
+
+# ---------------------------------------------------------------------------
+# Phase-lock 搜索参数
+# ---------------------------------------------------------------------------
+
+#: phase-lock 使用的最短历史窗口（秒）
+PHASE_LOCK_MIN_WINDOW_SECONDS: int = 45
+
+#: phase-lock 使用的最长历史窗口（秒）
+PHASE_LOCK_MAX_WINDOW_SECONDS: int = 180
+
+#: 周期搜索范围（相对于基准周期的比例），由 infer_metric_profile 动态覆盖
+PHASE_LOCK_PERIOD_SEARCH_RATIO: float = 0.12
+
+#: 相位原点搜索范围（相对于周期的比例）
+PHASE_LOCK_ORIGIN_SEARCH_RATIO: float = 0.35
+
+#: 周期搜索步长（秒）
+PHASE_LOCK_PERIOD_STEP: int = 1
+
+#: 相位原点搜索步长（秒）
+PHASE_LOCK_ORIGIN_STEP: int = 1
+
+# ---------------------------------------------------------------------------
+# 监控指标白名单
+# ---------------------------------------------------------------------------
+
+#: 默认监控的指标名列表
+_DEFAULT_MONITORED_METRICS: List[str] = [
+    "feed_rate",
+    "spindle_speed",
+    "spindle_current",
+    "spindle_load",
+    "vibration_x",
+    "vibration_y",
+    "vibration_z",
+]
+
+#: 实际使用的监控指标列表，可通过环境变量 PROTOFORGE_MONITORED_METRICS 覆盖
+#: 格式：逗号分隔的指标名，例如 "feed_rate,spindle_speed"
+MONITORED_METRICS: List[str] = [
+    m.strip()
+    for m in os.environ.get(
+        "PROTOFORGE_MONITORED_METRICS",
+        ",".join(_DEFAULT_MONITORED_METRICS),
+    ).split(",")
+    if m.strip()
+]
+
+# ---------------------------------------------------------------------------
+# 人工上下限覆盖文件
+# ---------------------------------------------------------------------------
+
+#: 覆盖文件路径，文件不存在时忽略（不报错）
+#: 文件格式（JSON）：
+#:   {
+#:     "device-id": {
+#:       "metric_name": {"hard_max": 35.0, "hard_min": 0.0}
+#:     }
+#:   }
+OVERRIDE_FILE: str = os.environ.get(
+    "PROTOFORGE_PREDICTOR_OVERRIDE",
+    "/etc/protoforge/predictor_override.json",
+)
+
+# ---------------------------------------------------------------------------
+# 目标列表刷新间隔
+# ---------------------------------------------------------------------------
+
+#: 每隔多少秒重新发现设备和指标（秒）
+TARGETS_REFRESH_INTERVAL: int = int(
+    os.environ.get("PROTOFORGE_TARGETS_REFRESH", "60")
+)
+
+# ---------------------------------------------------------------------------
+# 写入 VM 时附加的额外标签
+# ---------------------------------------------------------------------------
+
+#: 附加到所有预测指标上的标签，用于在 Grafana 中区分预测数据和原始数据
+EXTRA_PREDICT_LABELS = {
+    "forecast": "phase_band_health_v14",
+    "source": "protoforge",
+}
diff --git a/ai/predictor/discovery.py b/ai/predictor/discovery.py
new file mode 100644
index 0000000..c8b829a
--- /dev/null
+++ b/ai/predictor/discovery.py
@@ -0,0 +1,84 @@
+# -*- coding: utf-8 -*-
+"""
+predictor.discovery
+~~~~~~~~~~~~~~~~~~~
+Layer 1：设备与指标发现。
+
+职责：
+- 从 VictoriaMetrics 查询所有在线设备（device_id 标签值）
+- 探测指定设备上哪些指标名当前有数据
+
+本模块只做网络查询，不包含任何预测或统计逻辑。
+
+依赖：requests
+"""
+
+import logging
+from typing import List
+
+import requests
+
+logger = logging.getLogger(__name__)
+
+
+def discover_device_ids(vm_url: str) -> List[str]:
+    """
+    从 VictoriaMetrics 查询所有 device_id 标签值。
+
+    调用 VM 的 label values 接口，返回当前存储中出现过的所有设备 ID。
+    网络失败时返回空列表，不抛出异常，由调用方决定如何处理。
+
+    Args:
+        vm_url: VM HTTP 地址，如 "http://localhost:8428"
+
+    Returns:
+        设备 ID 字符串列表，空字符串已过滤。
+        查询失败时返回空列表。
+    """
+    try:
+        resp = requests.get(
+            f"{vm_url}/api/v1/label/device_id/values",
+            timeout=10,
+        )
+        resp.raise_for_status()
+        return [v for v in resp.json().get("data", []) if v]
+    except requests.RequestException as e:
+        logger.error("发现 device_id 失败: %s", e)
+        return []
+
+
+def discover_metrics_for_device(
+    vm_url: str,
+    device_id: str,
+    candidates: List[str],
+) -> List[str]:
+    """
+    探测指定设备在 VictoriaMetrics 中实际存在且有近期数据的指标名。
+
+    对 candidates 中的每个指标名发起即时查询（instant query），
+    只有返回非空 result 的指标才被认为"存在"。
+
+    Args:
+        vm_url: VM HTTP 地址
+        device_id: 设备标识，对应 VM 中的 device_id 标签值
+        candidates: 待探测的指标名列表，如 ["feed_rate", "spindle_speed"]
+
+    Returns:
+        实际有数据的指标名列表（保持 candidates 中的顺序）。
+        单个指标查询失败时静默跳过，不影响其他指标的探测。
+    """
+    found: List[str] = []
+    for metric in candidates:
+        try:
+            resp = requests.get(
+                f"{vm_url}/api/v1/query",
+                params={"query": f'{metric}{{device_id="{device_id}"}}'},
+                timeout=5,
+            )
+            resp.raise_for_status()
+            if resp.json().get("data", {}).get("result"):
+                found.append(metric)
+        except requests.RequestException:
+            # 单个指标查询失败不影响整体发现流程
+            pass
+    return found
diff --git a/ai/predictor/models.py b/ai/predictor/models.py
new file mode 100644
index 0000000..8b48bf5
--- /dev/null
+++ b/ai/predictor/models.py
@@ -0,0 +1,114 @@
+# -*- coding: utf-8 -*-
+"""
+predictor.models
+~~~~~~~~~~~~~~~~
+纯数据结构定义，不包含任何业务逻辑或 IO 操作。
+
+包含：
+- ``BaselineState``：单个指标的健康模板状态，记录周期、模板曲线、健康状态等
+- ``MetricProfile``：从历史数据统计出的指标特征，驱动策略和阈值的自动推断
+- 状态常量：HEALTHY / ANOMALY / RECOVERING
+"""
+
+from dataclasses import dataclass, field
+from typing import List
+
+# ---------------------------------------------------------------------------
+# 基线状态常量
+# ---------------------------------------------------------------------------
+
+#: 正常运行，模板持续学习更新
+BASELINE_STATUS_HEALTHY = "healthy"
+
+#: 检测到异常，模板冻结，不学习故障数据
+BASELINE_STATUS_ANOMALY = "anomaly"
+
+#: 异常消失，等待稳定后恢复学习
+BASELINE_STATUS_RECOVERING = "recovering"
+
+
+# ---------------------------------------------------------------------------
+# 基线状态
+# ---------------------------------------------------------------------------
+
+@dataclass
+class BaselineState:
+    """
+    单个指标的健康基线状态。
+
+    每个 (device_id, metric) 对应一个独立的 BaselineState 实例，
+    存储该指标的周期模板和当前健康状态。
+
+    Attributes:
+        period: 检测到的加工周期长度（秒）
+        phase_origin_ts: 相位原点时间戳（Unix 秒），用于计算当前相位
+        template: 中位数模板曲线，长度等于 period，用于预测
+        lower_template: 下界模板曲线（phase_band 策略时为分位数，否则等于 template）
+        upper_template: 上界模板曲线（phase_band 策略时为分位数，否则等于 template）
+        strategy: 预测策略，"phase_point" 或 "phase_band"
+        status: 当前健康状态，取值为 BASELINE_STATUS_* 常量
+        clean_seconds: 连续健康运行的秒数，用于判断是否可以更新模板
+        last_update_ts: 上次模板更新的时间戳（Unix 秒）
+        last_seen_ts: 上次处理该指标的时间戳（Unix 秒），用于计算 elapsed
+        y_min: 最近一段时间内的最小值，用于量程参考
+        y_max: 最近一段时间内的最大值，用于量程参考
+    """
+
+    period: int
+    phase_origin_ts: int
+    template: List[float]
+    lower_template: List[float]
+    upper_template: List[float]
+    strategy: str
+    status: str
+    clean_seconds: int
+    last_update_ts: int
+    last_seen_ts: int
+    y_min: float
+    y_max: float
+
+
+# ---------------------------------------------------------------------------
+# 指标特征（自适应配置推断结果）
+# ---------------------------------------------------------------------------
+
+@dataclass
+class MetricProfile:
+    """
+    从历史数据统计出的指标特征，用于自动推断预测策略和阈值。
+
+    由 ``profiling.infer_metric_profile()`` 生成，
+    再由 ``profiling.build_target()`` 转换为执行层 target dict。
+
+    Attributes:
+        device_id: 设备标识，对应 VM 中的 device_id 标签值
+        metric: 指标名，如 "feed_rate"、"spindle_current"
+        p5: 活跃段第 5 百分位数（过滤空闲零值后）
+        p95: 活跃段第 95 百分位数
+        iqr: p95 - p5，反映正常波动范围
+        cv: 变异系数（std / mean），衡量信号稳定性
+            cv < 0.15 → 稳定信号（精铣类）→ phase_point
+            cv >= 0.15 → 波动信号（粗铣负载、振动类）→ phase_band
+        strategy: 自动推断的预测策略，"phase_point" 或 "phase_band"
+        abs_threshold: 绝对误差阈值（自动计算）
+        rel_threshold: 相对误差阈值（自动计算）
+        band_low_q: phase_band 下界分位数（默认 5）
+        band_high_q: phase_band 上界分位数（默认 95）
+        band_pad_abs: phase_band 额外填充宽度，覆盖正常尖峰
+        phase_lock_period_search_ratio: phase-lock 周期搜索范围（相对比例）
+            由实测周期抖动率动态决定，周期越不稳定则搜索范围越宽
+    """
+
+    device_id: str
+    metric: str
+    p5: float
+    p95: float
+    iqr: float
+    cv: float
+    strategy: str
+    abs_threshold: float
+    rel_threshold: float
+    band_low_q: float
+    band_high_q: float
+    band_pad_abs: float
+    phase_lock_period_search_ratio: float
diff --git a/ai/predictor/phase_lock.py b/ai/predictor/phase_lock.py
new file mode 100644
index 0000000..b38acaf
--- /dev/null
+++ b/ai/predictor/phase_lock.py
@@ -0,0 +1,152 @@
+# -*- coding: utf-8 -*-
+"""
+predictor.phase_lock
+~~~~~~~~~~~~~~~~~~~~
+Phase-lock 相位对齐：在每次预测前动态校正周期和相位原点。
+
+职责：
+- 在基准周期附近搜索最优 (period, origin) 组合
+- 最小化最近时间窗口内的预测 MAE
+- 支持 target 级别的搜索范围配置（粗铣工位周期抖动大，需要更宽的范围）
+
+依赖：predictor.template, predictor.config, predictor.models
+"""
+
+import logging
+from typing import Dict, Optional, Tuple
+
+import numpy as np
+
+from . import config
+from .models import BaselineState
+from .template import (
+    normalize_origin_near,
+    predict_template_values,
+    resample_template,
+)
+
+logger = logging.getLogger(__name__)
+
+
+def phase_lock_recent(
+    state: BaselineState,
+    ts_grid: np.ndarray,
+    ys_model: np.ndarray,
+    target: Optional[Dict] = None,
+) -> Tuple[int, int, np.ndarray, float]:
+    """
+    在最近时间窗口内搜索最优 (period, phase_origin) 组合。
+
+    搜索策略：
+    1. 确定搜索窗口（min/max 之间，约 2 倍周期）
+    2. 在 [base_period * (1 - ratio), base_period * (1 + ratio)] 范围内枚举周期
+    3. 对每个周期，在 origin ± origin_shift 范围内枚举相位原点
+    4. 选择 MAE + 周期偏移惩罚最小的组合
+       （惩罚项防止无谓地漂移到远离基准的周期）
+
+    Args:
+        state: 当前基线状态（提供基准 period、origin、template）
+        ts_grid: 均匀 1 秒网格的时间戳数组
+        ys_model: 平滑后的信号（用于计算 MAE）
+        target: target dict，可包含 phase_lock_period_search_ratio /
+                phase_lock_origin_search_ratio 覆盖默认搜索范围
+
+    Returns:
+        (best_period, best_origin, best_pred, best_mae) 元组：
+        - best_period: 最优周期（整数秒）
+        - best_origin: 最优相位原点（Unix 秒）
+        - best_pred: 最优参数下的预测值数组（长度为搜索窗口大小）
+        - best_mae: 最优 MAE
+    """
+    base_period = int(state.period)
+    base_origin = int(state.phase_origin_ts)
+    base_template = np.array(state.template, dtype=float)
+
+    # 从 target 读取搜索范围，允许粗铣工位使用更宽的范围
+    period_search_ratio = float(
+        (target or {}).get("phase_lock_period_search_ratio", config.PHASE_LOCK_PERIOD_SEARCH_RATIO)
+    )
+    origin_search_ratio = float(
+        (target or {}).get("phase_lock_origin_search_ratio", config.PHASE_LOCK_ORIGIN_SEARCH_RATIO)
+    )
+
+    # 数据不足时直接返回基准预测
+    if base_period <= 1 or len(base_template) <= 1:
+        ts_recent = ts_grid[-config.DETECT_WINDOW_SECONDS :].astype(int).tolist()
+        pred = predict_template_values(base_template, base_period, base_origin, ts_recent)
+        actual = ys_model[-len(ts_recent) :].astype(float)
+        mae = float(np.mean(np.abs(actual - pred))) if len(actual) else 0.0
+        return base_period, base_origin, pred, mae
+
+    # 搜索窗口：约 2 倍周期，clip 到 [min, max]
+    window_seconds = max(
+        config.PHASE_LOCK_MIN_WINDOW_SECONDS,
+        min(config.PHASE_LOCK_MAX_WINDOW_SECONDS, int(base_period * 2)),
+    )
+
+    cutoff = ts_grid[-1] - window_seconds
+    mask = ts_grid >= cutoff
+    ts_recent_arr = ts_grid[mask].astype(int)
+    actual = ys_model[mask].astype(float)
+
+    # 窗口内数据不足时退化到固定长度
+    if len(ts_recent_arr) < max(10, config.DETECT_WINDOW_SECONDS):
+        ts_recent_arr = ts_grid[-config.DETECT_WINDOW_SECONDS :].astype(int)
+        actual = ys_model[-config.DETECT_WINDOW_SECONDS :].astype(float)
+
+    ts_recent = ts_recent_arr.tolist()
+    last_ts = int(ts_recent[-1])
+
+    # 周期搜索范围
+    p_min = max(
+        int(config.MIN_PERIOD_SECONDS),
+        int(round(base_period * (1.0 - period_search_ratio))),
+    )
+    p_max = min(
+        int(config.MAX_PERIOD_SECONDS),
+        int(round(base_period * (1.0 + period_search_ratio))),
+    )
+
+    # 初始化为基准参数
+    best_period = base_period
+    best_origin = normalize_origin_near(base_origin, base_period, last_ts)
+    best_template = resample_template(base_template, best_period)
+    best_pred = predict_template_values(
+        template=best_template,
+        period=best_period,
+        phase_origin_ts=best_origin,
+        ts_list=ts_recent,
+    )
+    best_mae = float(np.mean(np.abs(actual - best_pred)))
+
+    for period in range(p_min, p_max + 1, config.PHASE_LOCK_PERIOD_STEP):
+        template = resample_template(base_template, period)
+        center_origin = normalize_origin_near(base_origin, period, last_ts)
+        origin_shift = max(2, int(round(period * origin_search_ratio)))
+
+        for shift in range(-origin_shift, origin_shift + 1, config.PHASE_LOCK_ORIGIN_STEP):
+            origin = center_origin + shift
+            pred = predict_template_values(
+                template=template,
+                period=period,
+                phase_origin_ts=origin,
+                ts_list=ts_recent,
+            )
+            mae = float(np.mean(np.abs(actual - pred)))
+
+            # 惩罚项：偏离基准周期越远，惩罚越大（0.5 秒/秒偏差）
+            # 防止在噪声中漂移到远离真实周期的位置
+            penalty = abs(period - base_period) * 0.5
+            score = mae + penalty
+            best_score = best_mae + abs(best_period - base_period) * 0.5
+
+            if score < best_score:
+                best_period = period
+                best_origin = origin
+                best_pred = pred
+                best_mae = mae
+
+    # 规整化最终原点到最新时间戳附近
+    best_origin = normalize_origin_near(best_origin, best_period, last_ts)
+
+    return int(best_period), int(best_origin), best_pred, float(best_mae)
diff --git a/ai/predictor/profiling.py b/ai/predictor/profiling.py
new file mode 100644
index 0000000..5330500
--- /dev/null
+++ b/ai/predictor/profiling.py
@@ -0,0 +1,256 @@
+# -*- coding: utf-8 -*-
+"""
+predictor.profiling
+~~~~~~~~~~~~~~~~~~~
+Layer 2：自适应配置推断。
+
+职责：
+- 从历史数据统计指标特征（p5/p95/IQR/cv/周期抖动率）
+- 自动推断预测策略（phase_point vs phase_band）和阈值
+- 加载人工上下限覆盖文件（override）
+- 将 MetricProfile 转换为执行层 target dict
+- 完整的发现 + 推断流程（refresh_targets）
+
+依赖：predictor.storage, predictor.discovery, predictor.signal, predictor.models, predictor.config
+"""
+
+import json
+import logging
+import os
+from typing import Dict, List, Optional
+
+import numpy as np
+
+from . import config
+from .discovery import discover_device_ids, discover_metrics_for_device
+from .models import MetricProfile
+from .signal import estimate_period_rough, find_valley_indices
+from .storage import fetch_history, normalize_history
+
+logger = logging.getLogger(__name__)
+
+
+def infer_metric_profile(
+    vm_url: str,
+    device_id: str,
+    metric: str,
+) -> Optional[MetricProfile]:
+    """
+    拉取历史数据，统计活跃段特征，自动推断预测策略和阈值。
+
+    推断逻辑：
+    - 空闲段过滤：排除 p10 以下的点，避免机床空闲时的零值拉低阈值
+    - strategy 判断：cv < 0.15 → phase_point（稳定信号），否则 phase_band（波动信号）
+    - abs_threshold：取 IQR * 0.8、量程 * 0.05、std * 2.0 三者最大值
+    - rel_threshold：min(0.30, cv * 1.5)
+    - band_pad_abs：max(IQR * 0.3, std)，覆盖正常尖峰
+    - phase_lock_period_search_ratio：由周期抖动率动态决定，clip 到 [0.12, 0.25]
+
+    Args:
+        vm_url: VM HTTP 地址
+        device_id: 设备标识
+        metric: 指标名
+
+    Returns:
+        MetricProfile，数据不足时返回 None。
+    """
+    ts_raw, ys_raw = fetch_history(
+        vm_url=vm_url,
+        query=f'{metric}{{device_id="{device_id}"}}',
+        minutes=config.HISTORY_MINUTES,
+    )
+
+    if len(ys_raw) < config.MIN_POINTS:
+        return None
+
+    arr = np.array(ys_raw, dtype=float)
+
+    # 过滤空闲段：只保留活跃值（高于 p10）
+    p10_val = float(np.percentile(arr, 10))
+    active = arr[arr > p10_val]
+    if len(active) < 30:
+        active = arr  # 数据全是活跃段，不过滤
+
+    mean_val = float(np.mean(active))
+    std_val = float(np.std(active))
+    cv = std_val / max(abs(mean_val), 1e-6)
+    p5 = float(np.percentile(active, 5))
+    p95 = float(np.percentile(active, 95))
+    iqr = p95 - p5
+
+    # 策略自动判断：cv 衡量信号稳定性
+    strategy = "phase_point" if cv < 0.15 else "phase_band"
+
+    # 阈值自动计算
+    abs_threshold = max(iqr * 0.8, (p95 - p5) * 0.05, std_val * 2.0)
+    rel_threshold = min(0.30, cv * 1.5)
+
+    # phase_band 容忍带宽度
+    band_pad_abs = max(iqr * 0.3, std_val)
+
+    # phase-lock 搜索范围：从历史数据估算周期抖动率
+    ts_grid, ys_grid = normalize_history(ts_raw, ys_raw)
+    period_search_ratio = config.PHASE_LOCK_PERIOD_SEARCH_RATIO  # 默认值
+
+    if len(ys_grid) >= config.MIN_POINTS:
+        rough_period = estimate_period_rough(ys_grid)
+        if rough_period > config.MIN_PERIOD_SECONDS:
+            valleys = find_valley_indices(ts_grid, ys_grid, rough_period)
+            if len(valleys) >= 3:
+                diffs = np.diff(ts_grid[valleys].astype(float))
+                valid = diffs[
+                    (diffs > rough_period * 0.5) & (diffs < rough_period * 2.0)
+                ]
+                if len(valid) >= 2:
+                    # 周期变异系数 * 2 作为搜索范围，clip 到 [0.12, 0.25]
+                    period_cv = float(np.std(valid) / max(np.mean(valid), 1e-6))
+                    period_search_ratio = float(np.clip(period_cv * 2.0, 0.12, 0.25))
+
+    logger.info(
+        "推断指标特征 device=%s metric=%s cv=%.3f strategy=%s "
+        "abs_thr=%.3f rel_thr=%.3f period_search=%.2f",
+        device_id, metric, cv, strategy,
+        abs_threshold, rel_threshold, period_search_ratio,
+    )
+
+    return MetricProfile(
+        device_id=device_id,
+        metric=metric,
+        p5=p5,
+        p95=p95,
+        iqr=iqr,
+        cv=cv,
+        strategy=strategy,
+        abs_threshold=abs_threshold,
+        rel_threshold=rel_threshold,
+        band_low_q=5.0,
+        band_high_q=95.0,
+        band_pad_abs=band_pad_abs,
+        phase_lock_period_search_ratio=period_search_ratio,
+    )
+
+
+def load_overrides(path: str) -> Dict:
+    """
+    加载人工上下限覆盖文件，文件不存在时返回空字典。
+
+    文件格式（JSON）：
+        {
+          "device-id": {
+            "metric_name": {"hard_max": 35.0, "hard_min": 0.0}
+          }
+        }
+
+    Args:
+        path: 覆盖文件路径
+
+    Returns:
+        覆盖配置字典，文件不存在或解析失败时返回空字典。
+    """
+    if not os.path.exists(path):
+        return {}
+    try:
+        with open(path, "r", encoding="utf-8") as f:
+            return json.load(f)
+    except Exception as e:
+        logger.warning("加载 override 文件失败 %s: %s", path, e)
+        return {}
+
+
+def build_target(profile: MetricProfile, overrides: Dict) -> Dict:
+    """
+    将 MetricProfile 转换为预测执行层可用的 target dict。
+
+    target dict 包含 run_once() 所需的全部配置：
+    - query / pred_metric / anomaly_metric
+    - strategy / 阈值 / 平滑窗口
+    - phase-lock 搜索范围
+    - 物理上下限（可选，来自 override 文件）
+
+    Args:
+        profile: 从历史数据推断出的指标特征
+        overrides: 覆盖配置字典（来自 load_overrides）
+
+    Returns:
+        target dict。
+    """
+    device_overrides = overrides.get(profile.device_id, {}).get(profile.metric, {})
+
+    target: Dict = {
+        "query": f'{profile.metric}{{device_id="{profile.device_id}"}}',
+        "pred_metric": f"{profile.metric}_predicted",
+        "anomaly_metric": f"{profile.metric}_anomaly",
+        "strategy": profile.strategy,
+        "abs_threshold": profile.abs_threshold,
+        "rel_threshold": profile.rel_threshold,
+        # phase_band 用更大的平滑窗口抑制脉冲噪声
+        "smooth_window": 5 if profile.strategy == "phase_band" else 2,
+        "outside_ratio_threshold": 0.60,
+        "min_consecutive_outside": 5,
+        "severe_exceed_ratio": 1.8,
+        "phase_lock_period_search_ratio": profile.phase_lock_period_search_ratio,
+        # origin 搜索范围约为 period 搜索范围的 2.5 倍
+        "phase_lock_origin_search_ratio": min(
+            0.45, profile.phase_lock_period_search_ratio * 2.5
+        ),
+        # 物理上下限（可选，来自 override 文件）
+        "hard_max": device_overrides.get("hard_max"),
+        "hard_min": device_overrides.get("hard_min"),
+    }
+
+    if profile.strategy == "phase_band":
+        target.update({
+            "band_low_q": profile.band_low_q,
+            "band_high_q": profile.band_high_q,
+            "band_pad_abs": profile.band_pad_abs,
+        })
+
+    return target
+
+
+def refresh_targets(
+    vm_url: str,
+    monitored_metrics: List[str],
+    override_path: str,
+) -> List[Dict]:
+    """
+    完整的发现 + 推断流程：发现所有设备，推断所有指标的配置，返回 target list。
+
+    流程：
+    1. 从 VM 发现所有 device_id
+    2. 对每个设备探测哪些指标有数据
+    3. 对每个有数据的指标推断 MetricProfile
+    4. 将 MetricProfile 转换为 target dict
+
+    若发现失败（无 device_id），返回空列表，由调用方决定是否保留旧列表。
+
+    Args:
+        vm_url: VM HTTP 地址
+        monitored_metrics: 待探测的指标名候选列表
+        override_path: 覆盖文件路径
+
+    Returns:
+        target dict 列表，每个元素对应一个 (device_id, metric) 对。
+    """
+    logger.info("开始发现设备和指标...")
+    overrides = load_overrides(override_path)
+    targets: List[Dict] = []
+
+    device_ids = discover_device_ids(vm_url)
+    if not device_ids:
+        logger.warning("未发现任何 device_id")
+        return []
+
+    for device_id in device_ids:
+        metrics = discover_metrics_for_device(vm_url, device_id, monitored_metrics)
+        for metric in metrics:
+            profile = infer_metric_profile(vm_url, device_id, metric)
+            if profile is not None:
+                targets.append(build_target(profile, overrides))
+
+    logger.info(
+        "目标列表已更新：%d 台设备，%d 个指标目标",
+        len(device_ids),
+        len(targets),
+    )
+    return targets
diff --git a/ai/predictor/service.py b/ai/predictor/service.py
new file mode 100644
index 0000000..1e9dca7
--- /dev/null
+++ b/ai/predictor/service.py
@@ -0,0 +1,333 @@
+# -*- coding: utf-8 -*-
+"""
+predictor.service
+~~~~~~~~~~~~~~~~~
+主服务类：组装所有模块，驱动预测主循环。
+
+职责：
+- 持有所有运行时状态（baseline_states、last_written、targets）
+- 按 TARGETS_REFRESH_INTERVAL 定期重新发现设备和指标
+- 每轮轮询：拉取历史数据 → 更新状态 → 预测 → 写入 VM
+- 每轮结束后持久化状态到文件
+
+依赖：所有其他 predictor 子模块
+"""
+
+import logging
+import time
+from datetime import datetime
+from typing import Dict, List, Optional
+
+from . import config
+from .anomaly import calc_final_bounds
+from .models import BaselineState
+from .profiling import refresh_targets
+from .signal import preprocess_values
+from .state import maybe_update_state
+from .storage import (
+    fetch_history,
+    load_state,
+    merge_labels,
+    normalize_history,
+    parse_labels_from_query,
+    save_state,
+    series_key,
+    write_prediction_bundle,
+)
+from .template import predict_state_bundle
+
+logger = logging.getLogger(__name__)
+
+
+class PredictorService:
+    """
+    预测服务主类。
+
+    封装所有运行时状态，支持多实例部署（每个实例独立持有状态）。
+    通过 run() 启动主循环，通过 run_once() 执行单轮预测。
+
+    Attributes:
+        _vm_url: VM HTTP 地址
+        _state_file: 状态持久化文件路径
+        _history_minutes: 拉取历史数据的时间窗口（分钟）
+        _write_horizon: 实际写入 VM 的预测点数（秒）
+        _poll_interval: 轮询间隔（秒）
+        _targets_refresh_interval: 目标列表刷新间隔（秒）
+        _monitored_metrics: 待监控的指标名列表
+        _override_file: 人工上下限覆盖文件路径
+        _extra_labels: 写入 VM 时附加的额外标签
+        _states: key → BaselineState 的字典（运行时状态）
+        _last_written: key → 上次写入的真实数据时间戳
+        _targets: 当前目标列表
+        _targets_last_refresh: 上次刷新目标列表的时间戳
+    """
+
+    def __init__(
+        self,
+        vm_url: str = config.VM_URL,
+        state_file: str = config.STATE_FILE,
+        history_minutes: int = config.HISTORY_MINUTES,
+        write_horizon: int = config.WRITE_HORIZON_SECONDS,
+        poll_interval: int = config.POLL_INTERVAL,
+        targets_refresh_interval: int = config.TARGETS_REFRESH_INTERVAL,
+        monitored_metrics: Optional[List[str]] = None,
+        override_file: str = config.OVERRIDE_FILE,
+        extra_labels: Optional[Dict[str, str]] = None,
+    ) -> None:
+        self._vm_url = vm_url
+        self._state_file = state_file
+        self._history_minutes = history_minutes
+        self._write_horizon = write_horizon
+        self._poll_interval = poll_interval
+        self._targets_refresh_interval = targets_refresh_interval
+        self._monitored_metrics = monitored_metrics or config.MONITORED_METRICS
+        self._override_file = override_file
+        self._extra_labels = extra_labels or config.EXTRA_PREDICT_LABELS
+
+        self._states: Dict[str, BaselineState] = {}
+        self._last_written: Dict[str, int] = {}
+        self._targets: List[Dict] = []
+        self._targets_last_refresh: float = 0.0
+
+    # ------------------------------------------------------------------
+    # 目标列表管理
+    # ------------------------------------------------------------------
+
+    def _refresh_targets_if_needed(self) -> None:
+        """
+        按 targets_refresh_interval 间隔重新发现设备和指标。
+
+        首次调用时立即执行发现。发现失败时保留现有目标列表。
+        """
+        now = time.time()
+        if now - self._targets_last_refresh < self._targets_refresh_interval and self._targets:
+            return
+
+        new_targets = refresh_targets(
+            vm_url=self._vm_url,
+            monitored_metrics=self._monitored_metrics,
+            override_path=self._override_file,
+        )
+
+        if new_targets:
+            self._targets = new_targets
+            self._targets_last_refresh = now
+        else:
+            logger.warning("发现流程未产生任何有效目标，保持现有目标列表")
+
+    # ------------------------------------------------------------------
+    # 预测时间轴
+    # ------------------------------------------------------------------
+
+    def _build_prediction_timestamps(
+        self,
+        key: str,
+        last_real_ts: int,
+        now_sec: int,
+    ) -> Optional[List[int]]:
+        """
+        构建预测时间戳列表（从 last_real_ts + 1 开始，共 write_horizon 个点）。
+
+        两种情况下跳过写入：
+        1. 真实数据延迟过大（数据管道异常）
+        2. 真实数据时间戳未推进（重复写入同一批预测）
+
+        Args:
+            key: 序列标识符
+            last_real_ts: 最新真实数据点的时间戳（Unix 秒）
+            now_sec: 当前时间戳（Unix 秒）
+
+        Returns:
+            预测时间戳列表，跳过时返回 None。
+        """
+        data_lag = now_sec - last_real_ts
+
+        if data_lag > config.MAX_DATA_LAG_SECONDS:
+            logger.warning(
+                "真实数据延迟过大，跳过预测 key=%s data_lag=%ss max=%ss",
+                key, data_lag, config.MAX_DATA_LAG_SECONDS,
+            )
+            return None
+
+        last_written_real_ts = self._last_written.get(key)
+        if last_written_real_ts is not None and last_real_ts <= int(last_written_real_ts):
+            logger.info(
+                "真实数据时间戳未推进，跳过重复写入 key=%s last_real_ts=%s last_written=%s",
+                key, last_real_ts, last_written_real_ts,
+            )
+            return None
+
+        return [last_real_ts + i + 1 for i in range(self._write_horizon)]
+
+    # ------------------------------------------------------------------
+    # 单轮预测
+    # ------------------------------------------------------------------
+
+    def run_once(self) -> None:
+        """
+        执行一轮预测：遍历所有目标，拉取数据、更新状态、写入预测结果。
+
+        每轮结束后将状态持久化到文件。
+        """
+        now_str = datetime.now().strftime("%H:%M:%S")
+
+        self._refresh_targets_if_needed()
+
+        if not self._targets:
+            logger.warning("[%s] 目标列表为空，等待设备发现完成", now_str)
+            return
+
+        for target in self._targets:
+            query = target["query"]
+            pred_metric = target["pred_metric"]
+            anomaly_metric = target["anomaly_metric"]
+            strategy = target.get("strategy", "phase_point")
+            smooth_window = int(target.get("smooth_window", 1))
+
+            # 1. 拉取历史数据
+            ts, ys = fetch_history(
+                vm_url=self._vm_url,
+                query=query,
+                minutes=self._history_minutes,
+            )
+
+            if len(ys) < config.MIN_POINTS:
+                logger.info("[%s] %s 数据不足（%d 点），跳过", now_str, query, len(ys))
+                continue
+
+            ts_grid, ys_grid_raw = normalize_history(ts, ys)
+
+            if len(ys_grid_raw) < config.MIN_POINTS:
+                logger.info(
+                    "[%s] %s 清洗后数据不足（%d 点），跳过",
+                    now_str, query, len(ys_grid_raw),
+                )
+                continue
+
+            # 2. 预处理（平滑）
+            ys_grid_model = preprocess_values(ys_grid_raw, strategy, smooth_window)
+
+            # 3. 构建写入标签
+            base_labels = parse_labels_from_query(query)
+            write_labels = merge_labels(base_labels, self._extra_labels)
+            key = series_key(pred_metric, write_labels)
+
+            # 4. 更新状态（异常检测 + 模板学习）
+            (
+                state,
+                is_anomaly,
+                outside_ratio,
+                mean_abs_err,
+                mean_rel_err,
+                max_outside_seconds,
+                max_exceed_ratio,
+            ) = maybe_update_state(
+                key=key,
+                ts_grid=ts_grid,
+                ys_model=ys_grid_model,
+                ys_actual=ys_grid_raw,
+                target=target,
+                states=self._states,
+            )
+
+            if state is None:
+                logger.info("[%s] %s 暂无可用健康模板，等待学习", now_str, query)
+                continue
+
+            # 5. 构建预测时间戳
+            now_sec = int(time.time())
+            last_real_ts = int(ts_grid[-1])
+            data_lag = now_sec - last_real_ts
+
+            ts_future = self._build_prediction_timestamps(
+                key=key,
+                last_real_ts=last_real_ts,
+                now_sec=now_sec,
+            )
+
+            if not ts_future:
+                continue
+
+            # 6. 预测
+            pred_values, lower_raw, upper_raw = predict_state_bundle(state, ts_future)
+            lower_values, upper_values = calc_final_bounds(
+                state=state,
+                pred=pred_values,
+                lower_raw=lower_raw,
+                upper_raw=upper_raw,
+                target=target,
+            )
+
+            # 7. 写入 VM
+            ok = write_prediction_bundle(
+                vm_url=self._vm_url,
+                pred_metric=pred_metric,
+                anomaly_metric=anomaly_metric,
+                labels=write_labels,
+                ts_future=ts_future,
+                pred_values=pred_values,
+                lower_values=lower_values,
+                upper_values=upper_values,
+                is_anomaly=is_anomaly,
+                outside_ratio=outside_ratio,
+                mean_abs_err=mean_abs_err,
+                mean_rel_err=mean_rel_err,
+                max_outside_seconds=max_outside_seconds,
+                max_exceed_ratio=max_exceed_ratio,
+                event_ts=last_real_ts,
+            )
+
+            if not ok:
+                logger.error("[%s] %s 写入预测数据失败", now_str, query)
+                continue
+
+            self._last_written[key] = last_real_ts
+
+            # 8. 打印摘要日志
+            future_start = datetime.fromtimestamp(ts_future[0]).strftime("%H:%M:%S")
+            future_end = datetime.fromtimestamp(ts_future[-1]).strftime("%H:%M:%S")
+            last_real_str = datetime.fromtimestamp(last_real_ts).strftime("%H:%M:%S")
+            origin_str = datetime.fromtimestamp(state.phase_origin_ts).strftime("%H:%M:%S")
+
+            logger.info(
+                "[%s] %-50s → %-35s strategy=%s status=%s anomaly=%s "
+                "outside=%.2f max_outside=%ss max_exceed=%.2f "
+                "period=%ss origin=%s last_real=%s lag=%ss 写入 %d 点，预测区间 %s ~ %s",
+                now_str, query, pred_metric,
+                state.strategy, state.status, is_anomaly,
+                outside_ratio, max_outside_seconds, max_exceed_ratio,
+                state.period, origin_str, last_real_str, data_lag,
+                len(ts_future), future_start, future_end,
+            )
+
+        save_state(self._state_file, self._states)
+
+    # ------------------------------------------------------------------
+    # 主循环
+    # ------------------------------------------------------------------
+
+    def run(self) -> None:
+        """
+        启动预测服务主循环。
+
+        加载持久化状态后进入无限循环，每隔 poll_interval 秒执行一次 run_once()。
+        """
+        self._states = load_state(self._state_file)
+
+        logger.info(
+            "预测服务启动 VM=%s 历史窗口=%dmin 理论预测窗口=%ds 实际写入窗口=%ds "
+            "轮询间隔=%ds state=%s forecast=%s override=%s refresh=%ds",
+            self._vm_url,
+            self._history_minutes,
+            config.HORIZON_SECONDS,
+            self._write_horizon,
+            self._poll_interval,
+            self._state_file,
+            self._extra_labels.get("forecast", ""),
+            self._override_file,
+            self._targets_refresh_interval,
+        )
+
+        while True:
+            self.run_once()
+            time.sleep(self._poll_interval)
diff --git a/ai/predictor/signal.py b/ai/predictor/signal.py
new file mode 100644
index 0000000..e948160
--- /dev/null
+++ b/ai/predictor/signal.py
@@ -0,0 +1,335 @@
+# -*- coding: utf-8 -*-
+"""
+predictor.signal
+~~~~~~~~~~~~~~~~
+纯信号处理与周期估计，不包含任何 IO 操作。
+
+职责：
+- 滚动中位数、移动平均等平滑算法
+- 基于 FFT + 自相关的周期估计
+- 谷底检测（用于模板构建的相位对齐）
+- 原始数据预处理（根据策略选择平滑方式）
+
+本模块所有函数均为纯函数，输入 numpy 数组，输出 numpy 数组或基本类型。
+
+依赖：numpy
+"""
+
+import math
+from typing import Dict, List, Tuple
+
+import numpy as np
+
+from . import config
+
+
+def rolling_median(arr: np.ndarray, window: int) -> np.ndarray:
+    """
+    对数组做滚动中位数平滑（边缘用 edge 填充）。
+
+    中位数对脉冲噪声鲁棒，适合 phase_band 策略的粗铣负载信号。
+    window 自动调整为奇数，保证对称填充。
+
+    Args:
+        arr: 输入数组
+        window: 滑动窗口大小（秒），<=1 时直接返回原数组
+
+    Returns:
+        平滑后的数组，长度与输入相同。
+    """
+    if window <= 1 or len(arr) < window:
+        return arr.astype(float)
+
+    # 保证奇数窗口，使填充对称
+    if window % 2 == 0:
+        window += 1
+
+    pad = window // 2
+    padded = np.pad(arr.astype(float), (pad, pad), mode="edge")
+
+    result = [float(np.median(padded[i : i + window])) for i in range(len(arr))]
+    return np.array(result, dtype=float)
+
+
+def moving_average(arr: np.ndarray, window: int) -> np.ndarray:
+    """
+    对数组做均匀权重移动平均（边缘用 edge 填充）。
+
+    比滚动中位数快，适合 phase_point 策略的稳定信号。
+    window 自动调整为奇数，保证对称填充。
+
+    Args:
+        arr: 输入数组
+        window: 滑动窗口大小（秒），<=1 时直接返回原数组
+
+    Returns:
+        平滑后的数组，长度与输入相同。
+    """
+    if window <= 1 or len(arr) < window:
+        return arr.astype(float)
+
+    if window % 2 == 0:
+        window += 1
+
+    kernel = np.ones(window, dtype=float) / window
+    pad = window // 2
+    padded = np.pad(arr.astype(float), (pad, pad), mode="edge")
+
+    return np.convolve(padded, kernel, mode="valid")
+
+
+def preprocess_values(
+    ys_grid: np.ndarray,
+    strategy: str,
+    smooth_window: int,
+) -> np.ndarray:
+    """
+    根据预测策略对原始数据进行预处理平滑。
+
+    - phase_band：使用滚动中位数（对脉冲噪声鲁棒）
+    - phase_point：使用移动平均（保留趋势，计算更快）
+
+    Args:
+        ys_grid: 均匀 1 秒网格上的原始值数组
+        strategy: "phase_point" 或 "phase_band"
+        smooth_window: 平滑窗口大小（秒），<=1 时不平滑
+
+    Returns:
+        平滑后的数组，长度与输入相同。
+    """
+    if strategy == "phase_band":
+        return rolling_median(ys_grid, smooth_window)
+
+    if smooth_window > 1:
+        return moving_average(ys_grid, smooth_window)
+
+    return ys_grid.astype(float)
+
+
+# ---------------------------------------------------------------------------
+# 周期估计
+# ---------------------------------------------------------------------------
+
+def estimate_period_by_fft(ys_arr: np.ndarray) -> float:
+    """
+    用 FFT 粗估信号的主周期（秒）。
+
+    取去均值后的功率谱中能量最大的频率分量，转换为周期。
+    结果被 clip 到 [MIN_PERIOD_SECONDS, MAX_PERIOD_SECONDS]。
+
+    Args:
+        ys_arr: 均匀采样的值数组（1 秒间隔）
+
+    Returns:
+        估计的周期（秒），浮点数。数据不足或全零时返回 60.0。
+    """
+    n = len(ys_arr)
+    if n < 8:
+        return 60.0
+
+    centered = ys_arr - np.mean(ys_arr)
+    if np.allclose(centered, 0):
+        return 60.0
+
+    fft_vals = np.fft.rfft(centered)
+    freqs = np.fft.rfftfreq(n, d=1.0)
+
+    if len(freqs) <= 1:
+        return 60.0
+
+    # 跳过直流分量（index 0），找功率最大的频率
+    power = np.abs(fft_vals[1:])
+    if len(power) == 0 or np.max(power) <= 0:
+        return 60.0
+
+    dominant_idx = int(np.argmax(power)) + 1
+    dominant_freq = float(freqs[dominant_idx])
+
+    if dominant_freq <= 0:
+        return 60.0
+
+    period = 1.0 / dominant_freq
+    return float(np.clip(period, config.MIN_PERIOD_SECONDS, config.MAX_PERIOD_SECONDS))
+
+
+def refine_period_by_autocorr(ys_arr: np.ndarray, init_period: float) -> float:
+    """
+    用自相关函数在 FFT 粗估周期附近精化周期。
+
+    在 [init_period * 0.7, init_period * 1.3] 范围内搜索自相关峰值，
+    比 FFT 对非整数周期和噪声更鲁棒。
+
+    Args:
+        ys_arr: 均匀采样的值数组
+        init_period: FFT 粗估的初始周期（秒）
+
+    Returns:
+        精化后的周期（秒），clip 到合法范围。
+    """
+    n = len(ys_arr)
+    if n < 20:
+        return float(np.clip(init_period, config.MIN_PERIOD_SECONDS, config.MAX_PERIOD_SECONDS))
+
+    centered = ys_arr - np.mean(ys_arr)
+    if np.allclose(centered, 0):
+        return float(np.clip(init_period, config.MIN_PERIOD_SECONDS, config.MAX_PERIOD_SECONDS))
+
+    # 全相关，取正半轴（lag >= 0）
+    corr = np.correlate(centered, centered, mode="full")[n - 1:]
+
+    p0 = int(round(init_period))
+    left = max(int(config.MIN_PERIOD_SECONDS), int(max(2, p0 * 0.7)))
+    right = min(n // 2, int(max(left + 1, p0 * 1.3)))
+
+    if right <= left:
+        return float(np.clip(init_period, config.MIN_PERIOD_SECONDS, config.MAX_PERIOD_SECONDS))
+
+    search = corr[left : right + 1]
+    if len(search) == 0:
+        return float(np.clip(init_period, config.MIN_PERIOD_SECONDS, config.MAX_PERIOD_SECONDS))
+
+    best_lag = left + int(np.argmax(search))
+    return float(np.clip(best_lag, config.MIN_PERIOD_SECONDS, config.MAX_PERIOD_SECONDS))
+
+
+def estimate_period_rough(ys_arr: np.ndarray) -> int:
+    """
+    FFT + 自相关两步法估计信号周期，返回整数秒。
+
+    先用 FFT 粗估，再用自相关精化，最后 clip 到合法范围。
+
+    Args:
+        ys_arr: 均匀采样的值数组
+
+    Returns:
+        估计的周期（整数秒）。
+    """
+    p_fft = estimate_period_by_fft(ys_arr)
+    p_refined = refine_period_by_autocorr(ys_arr, p_fft)
+    period = int(round(p_refined))
+    period = max(int(config.MIN_PERIOD_SECONDS), min(int(config.MAX_PERIOD_SECONDS), period))
+    return int(period)
+
+
+# ---------------------------------------------------------------------------
+# 谷底检测
+# ---------------------------------------------------------------------------
+
+def find_valley_indices(
+    ts_grid: np.ndarray,
+    ys_grid: np.ndarray,
+    expected_period: int,
+) -> List[int]:
+    """
+    在时序数据中检测周期性谷底（加工周期的起始点）。
+
+    算法步骤：
+    1. 对信号做轻度移动平均平滑，抑制高频噪声
+    2. 找低于 VALLEY_QUANTILE 百分位的局部极小值作为候选
+    3. 若候选不足，放宽条件（不限百分位）
+    4. 按最小间距过滤，同一间距内保留最低点
+    5. 按周期合理性（0.55~1.60 倍期望周期）清洗
+
+    Args:
+        ts_grid: 均匀 1 秒网格的时间戳数组
+        ys_grid: 对应的值数组
+        expected_period: 预期周期（秒），用于设置最小间距和合理性检查
+
+    Returns:
+        谷底在数组中的索引列表（已排序）。
+        数据不足时返回空列表。
+    """
+    n = len(ys_grid)
+    if n < max(10, expected_period * 2):
+        return []
+
+    period = max(3, int(expected_period))
+    # 平滑窗口约为周期的 8%，最大 21 秒，避免过度平滑
+    smooth_window = min(max(3, int(round(period * 0.08))), 21)
+    ys_smooth = moving_average(ys_grid, smooth_window)
+
+    threshold = float(np.percentile(ys_smooth, config.VALLEY_QUANTILE))
+
+    # 第一轮：只取低于阈值的局部极小值
+    candidates = [
+        i for i in range(1, n - 1)
+        if (
+            ys_smooth[i] <= ys_smooth[i - 1]
+            and ys_smooth[i] < ys_smooth[i + 1]
+            and ys_smooth[i] <= threshold
+        )
+    ]
+
+    # 候选不足时放宽：取所有局部极小值
+    if len(candidates) < config.MIN_FULL_CYCLES_FOR_TEMPLATE:
+        candidates = [
+            i for i in range(1, n - 1)
+            if ys_smooth[i] <= ys_smooth[i - 1] and ys_smooth[i] < ys_smooth[i + 1]
+        ]
+
+    if not candidates:
+        return []
+
+    # 按最小间距过滤：同一间距内保留最低点
+    min_distance = max(2, int(round(period * 0.55)))
+    selected: List[int] = []
+    for idx in candidates:
+        if not selected:
+            selected.append(idx)
+        elif idx - selected[-1] >= min_distance:
+            selected.append(idx)
+        elif ys_smooth[idx] < ys_smooth[selected[-1]]:
+            selected[-1] = idx
+
+    if len(selected) < 2:
+        return selected
+
+    # 按周期合理性清洗：间距过小则保留更低点，间距过大则直接接受
+    cleaned = [selected[0]]
+    for idx in selected[1:]:
+        diff = int(ts_grid[idx] - ts_grid[cleaned[-1]])
+        if int(period * 0.55) <= diff <= int(period * 1.60):
+            cleaned.append(idx)
+        elif diff < int(period * 0.55):
+            # 间距太小，保留更低的那个
+            if ys_smooth[idx] < ys_smooth[cleaned[-1]]:
+                cleaned[-1] = idx
+        else:
+            # 间距过大（可能漏检了一个谷底），直接接受
+            cleaned.append(idx)
+
+    return cleaned
+
+
+def detect_period_and_valleys(
+    ts_grid: np.ndarray,
+    ys_grid: np.ndarray,
+) -> Tuple[int, List[int]]:
+    """
+    综合估计周期并检测谷底。
+
+    先粗估周期，再检测谷底，最后用谷底间距的中位数修正周期。
+    谷底间距的中位数比 FFT 更能反映实际加工节拍。
+
+    Args:
+        ts_grid: 均匀 1 秒网格的时间戳数组
+        ys_grid: 对应的值数组
+
+    Returns:
+        (period, valley_indices) 元组：
+        - period: 修正后的周期（整数秒）
+        - valley_indices: 谷底索引列表
+    """
+    rough = estimate_period_rough(ys_grid)
+    valleys = find_valley_indices(ts_grid, ys_grid, rough)
+
+    if len(valleys) >= 3:
+        diffs = np.diff(ts_grid[valleys])
+        # 只取合理范围内的间距参与中位数计算
+        good = diffs[(diffs >= rough * 0.55) & (diffs <= rough * 1.60)]
+        period = int(round(float(np.median(good)))) if len(good) > 0 else rough
+    else:
+        period = rough
+
+    period = max(int(config.MIN_PERIOD_SECONDS), min(int(config.MAX_PERIOD_SECONDS), period))
+    return int(period), valleys
diff --git a/ai/predictor/state.py b/ai/predictor/state.py
new file mode 100644
index 0000000..d7adedd
--- /dev/null
+++ b/ai/predictor/state.py
@@ -0,0 +1,328 @@
+# -*- coding: utf-8 -*-
+"""
+predictor.state
+~~~~~~~~~~~~~~~
+状态生命周期管理：BaselineState 的创建、更新和 phase-lock 应用。
+
+职责：
+- 首次见到某指标时初始化健康基线
+- 每轮轮询时运行异常检测，更新状态机（HEALTHY / ANOMALY / RECOVERING）
+- 健康/恢复状态下用 EMA 渐进更新模板
+- 将 phase-lock 结果写回 state
+
+本模块不做任何 IO，states 字典由调用方（service.py）持有和传入。
+
+依赖：predictor.template, predictor.anomaly, predictor.config, predictor.models
+"""
+
+import logging
+import time
+from datetime import datetime
+from typing import Dict, Optional, Tuple
+
+import numpy as np
+
+from . import config
+from .anomaly import detect_anomaly
+from .models import (
+    BASELINE_STATUS_ANOMALY,
+    BASELINE_STATUS_HEALTHY,
+    BASELINE_STATUS_RECOVERING,
+    BaselineState,
+)
+from .template import (
+    build_current_baseline,
+    merge_template,
+    resample_template,
+)
+
+logger = logging.getLogger(__name__)
+
+
+def create_initial_state(
+    ts_grid: np.ndarray,
+    ys_model: np.ndarray,
+    ys_actual: np.ndarray,
+    target: Dict,
+    now_sec: int,
+) -> Optional[BaselineState]:
+    """
+    从历史数据构建初始健康基线状态。
+
+    首次见到某指标时调用，需要足够的历史数据（MIN_POINTS 个点）。
+
+    Args:
+        ts_grid: 均匀 1 秒网格的时间戳数组
+        ys_model: 平滑后的信号（用于周期检测和中值模板）
+        ys_actual: 原始信号（用于分位数模板和量程统计）
+        target: target dict，包含策略和阈值配置
+        now_sec: 当前时间戳（Unix 秒）
+
+    Returns:
+        初始化的 BaselineState，数据不足时返回 None。
+    """
+    strategy = str(target.get("strategy", "phase_point"))
+    band_low_q = float(target.get("band_low_q", 5.0))
+    band_high_q = float(target.get("band_high_q", 95.0))
+
+    baseline = build_current_baseline(
+        ts_grid=ts_grid,
+        ys_mid_grid=ys_model,
+        ys_band_grid=ys_actual,
+        strategy=strategy,
+        band_low_q=band_low_q,
+        band_high_q=band_high_q,
+    )
+
+    if baseline is None:
+        return None
+
+    period, phase_origin_ts, template, lower_template, upper_template = baseline
+
+    return BaselineState(
+        period=int(period),
+        phase_origin_ts=int(phase_origin_ts),
+        template=template.astype(float).tolist(),
+        lower_template=lower_template.astype(float).tolist(),
+        upper_template=upper_template.astype(float).tolist(),
+        strategy=strategy,
+        status=BASELINE_STATUS_HEALTHY,
+        # 初始 clean_seconds 设为多个完整周期，表示已有足够的健康历史
+        clean_seconds=int(period * config.MAX_CYCLES_FOR_TEMPLATE),
+        last_update_ts=now_sec,
+        last_seen_ts=now_sec,
+        y_min=float(np.min(ys_actual)),
+        y_max=float(np.max(ys_actual)),
+    )
+
+
+def apply_phase_lock_to_state(
+    state: BaselineState,
+    best_period: int,
+    best_origin: int,
+) -> None:
+    """
+    将 phase-lock 搜索结果写回 state（原地修改）。
+
+    若周期发生变化，同时对三条模板做重采样，保持长度一致。
+
+    Args:
+        state: 要更新的基线状态（原地修改）
+        best_period: phase-lock 找到的最优周期（整数秒）
+        best_origin: phase-lock 找到的最优相位原点（Unix 秒）
+    """
+    best_period = int(best_period)
+    if best_period <= 1:
+        return
+
+    # 周期变化时重采样三条模板
+    if len(state.template) != best_period:
+        state.template = resample_template(
+            np.array(state.template, dtype=float), best_period
+        ).astype(float).tolist()
+
+    if len(state.lower_template) != best_period:
+        state.lower_template = resample_template(
+            np.array(state.lower_template, dtype=float), best_period
+        ).astype(float).tolist()
+
+    if len(state.upper_template) != best_period:
+        state.upper_template = resample_template(
+            np.array(state.upper_template, dtype=float), best_period
+        ).astype(float).tolist()
+
+    state.period = best_period
+    state.phase_origin_ts = int(best_origin)
+
+
+def maybe_update_state(
+    key: str,
+    ts_grid: np.ndarray,
+    ys_model: np.ndarray,
+    ys_actual: np.ndarray,
+    target: Dict,
+    states: Dict[str, BaselineState],
+) -> Tuple[Optional[BaselineState], bool, float, float, float, int, float]:
+    """
+    核心状态更新函数：检测异常并按状态机规则更新基线。
+
+    状态机转换：
+    - 无状态 → 初始化 → HEALTHY（返回，本轮不做异常检测）
+    - HEALTHY + 异常 → ANOMALY（冻结模板）
+    - ANOMALY + 正常 → RECOVERING（开始计时）
+    - RECOVERING + 正常 + 足够时间 → HEALTHY（恢复学习）
+    - HEALTHY/RECOVERING + 正常 + 足够时间 → 更新模板（EMA）
+
+    Args:
+        key: 序列唯一标识符（用于 states 字典的键）
+        ts_grid: 均匀 1 秒网格的时间戳数组
+        ys_model: 平滑后的信号
+        ys_actual: 原始信号
+        target: target dict，包含策略和阈值配置
+        states: 所有指标的状态字典（由 PredictorService 持有，原地修改）
+
+    Returns:
+        (state, is_anomaly, outside_ratio, mean_abs_err, mean_rel_err,
+         max_outside_seconds, max_exceed_ratio)
+        state 为 None 表示数据不足，本轮跳过。
+    """
+    now_sec = int(time.time())
+    state = states.get(key)
+
+    # 首次见到该指标：初始化健康基线
+    if state is None:
+        state = create_initial_state(
+            ts_grid=ts_grid,
+            ys_model=ys_model,
+            ys_actual=ys_actual,
+            target=target,
+            now_sec=now_sec,
+        )
+
+        if state is None:
+            return None, False, 0.0, 0.0, 0.0, 0, 0.0
+
+        states[key] = state
+        logger.info(
+            "初始化健康模板 key=%s strategy=%s period=%ss origin=%s clean=%ss",
+            key,
+            state.strategy,
+            state.period,
+            datetime.fromtimestamp(state.phase_origin_ts).strftime("%H:%M:%S"),
+            state.clean_seconds,
+        )
+        return state, False, 0.0, 0.0, 0.0, 0, 0.0
+
+    # 计算距上次处理的时间（秒），用于累加 clean_seconds
+    elapsed = max(1, now_sec - int(state.last_seen_ts))
+    elapsed = min(elapsed, config.POLL_INTERVAL * 2)  # 防止长时间停机后 clean_seconds 暴增
+    state.last_seen_ts = now_sec
+
+    (
+        is_anomaly,
+        outside_ratio,
+        mean_abs_err,
+        mean_rel_err,
+        best_period,
+        best_origin,
+        max_outside_seconds,
+        max_exceed_ratio,
+    ) = detect_anomaly(
+        state=state,
+        ts_grid=ts_grid,
+        ys_model=ys_model,
+        ys_actual=ys_actual,
+        target=target,
+    )
+
+    # 异常：冻结模板，不学习故障数据
+    if is_anomaly:
+        state.status = BASELINE_STATUS_ANOMALY
+        state.clean_seconds = 0
+        states[key] = state
+        logger.warning(
+            "检测到异常，冻结模板 key=%s outside_ratio=%.2f max_outside=%ss "
+            "max_exceed_ratio=%.2f mean_abs_err=%.4f mean_rel_err=%.4f",
+            key, outside_ratio, max_outside_seconds,
+            max_exceed_ratio, mean_abs_err, mean_rel_err,
+        )
+        return state, True, outside_ratio, mean_abs_err, mean_rel_err, max_outside_seconds, max_exceed_ratio
+
+    # 正常：应用 phase-lock 结果
+    old_period = int(state.period)
+    old_origin = int(state.phase_origin_ts)
+    apply_phase_lock_to_state(state, best_period, best_origin)
+
+    if old_period != state.period or old_origin != state.phase_origin_ts:
+        logger.info(
+            "phase-lock key=%s period %s -> %s origin %s -> %s",
+            key, old_period, state.period,
+            datetime.fromtimestamp(old_origin).strftime("%H:%M:%S"),
+            datetime.fromtimestamp(state.phase_origin_ts).strftime("%H:%M:%S"),
+        )
+
+    # 异常刚消失：进入恢复期，等待稳定后再恢复学习
+    if state.status == BASELINE_STATUS_ANOMALY:
+        state.status = BASELINE_STATUS_RECOVERING
+        state.clean_seconds = elapsed
+        states[key] = state
+        logger.info("异常开始恢复 key=%s clean_seconds=%ss", key, state.clean_seconds)
+        return state, False, outside_ratio, mean_abs_err, mean_rel_err, max_outside_seconds, max_exceed_ratio
+
+    # 累加健康时间
+    if state.status == BASELINE_STATUS_RECOVERING:
+        state.clean_seconds += elapsed
+    else:
+        state.status = BASELINE_STATUS_HEALTHY
+        state.clean_seconds += elapsed
+
+    # 健康时间不足：不更新模板
+    min_clean_for_update = max(
+        config.RECOVERY_MIN_SECONDS,
+        int(state.period) * config.MIN_FULL_CYCLES_FOR_TEMPLATE,
+    )
+    if state.clean_seconds < min_clean_for_update:
+        states[key] = state
+        return state, False, outside_ratio, mean_abs_err, mean_rel_err, max_outside_seconds, max_exceed_ratio
+
+    # 健康时间足够：用最近数据更新模板（EMA）
+    tail_seconds = min(
+        int(state.clean_seconds),
+        int(state.period) * config.MAX_CYCLES_FOR_TEMPLATE,
+    )
+
+    strategy = str(target.get("strategy", "phase_point"))
+    band_low_q = float(target.get("band_low_q", 5.0))
+    band_high_q = float(target.get("band_high_q", 95.0))
+
+    baseline = build_current_baseline(
+        ts_grid=ts_grid,
+        ys_mid_grid=ys_model,
+        ys_band_grid=ys_actual,
+        strategy=strategy,
+        band_low_q=band_low_q,
+        band_high_q=band_high_q,
+        tail_seconds=tail_seconds,
+    )
+
+    if baseline is None:
+        states[key] = state
+        return state, False, outside_ratio, mean_abs_err, mean_rel_err, max_outside_seconds, max_exceed_ratio
+
+    new_period, new_origin, new_template, new_lower_template, new_upper_template = baseline
+
+    # 恢复期用更激进的 alpha，加速追赶真实信号
+    alpha = config.RECOVERY_EMA_ALPHA if state.status == BASELINE_STATUS_RECOVERING else config.HEALTHY_EMA_ALPHA
+
+    state.template = merge_template(
+        np.array(state.template, dtype=float), new_template, alpha
+    ).astype(float).tolist()
+    state.lower_template = merge_template(
+        np.array(state.lower_template, dtype=float), new_lower_template, alpha
+    ).astype(float).tolist()
+    state.upper_template = merge_template(
+        np.array(state.upper_template, dtype=float), new_upper_template, alpha
+    ).astype(float).tolist()
+
+    state.period = int(new_period)
+    state.phase_origin_ts = int(new_origin)
+    state.status = BASELINE_STATUS_HEALTHY
+    state.last_update_ts = now_sec
+
+    # 更新量程统计（用于 Grafana 展示）
+    if tail_seconds > 0 and len(ys_actual) >= tail_seconds:
+        state.y_min = float(np.min(ys_actual[-tail_seconds:]))
+        state.y_max = float(np.max(ys_actual[-tail_seconds:]))
+    else:
+        state.y_min = float(np.min(ys_actual))
+        state.y_max = float(np.max(ys_actual))
+
+    states[key] = state
+    logger.info(
+        "更新健康模板 key=%s strategy=%s period=%ss origin=%s clean=%ss alpha=%.2f",
+        key, state.strategy, state.period,
+        datetime.fromtimestamp(state.phase_origin_ts).strftime("%H:%M:%S"),
+        state.clean_seconds, alpha,
+    )
+
+    return state, False, outside_ratio, mean_abs_err, mean_rel_err, max_outside_seconds, max_exceed_ratio
diff --git a/ai/predictor/storage.py b/ai/predictor/storage.py
new file mode 100644
index 0000000..f614528
--- /dev/null
+++ b/ai/predictor/storage.py
@@ -0,0 +1,438 @@
+# -*- coding: utf-8 -*-
+"""
+predictor.storage
+~~~~~~~~~~~~~~~~~
+VictoriaMetrics 读写层，封装所有网络 IO。
+
+职责：
+- 从 VM 拉取历史时序数据（query_range）
+- 将预测结果和异常指标写入 VM（import/prometheus）
+- 标签字符串的序列化与解析
+- 状态文件的持久化读写
+
+本模块不包含任何预测或异常检测逻辑，只负责数据的搬运和格式转换。
+
+依赖：requests, numpy
+"""
+
+import json
+import logging
+import math
+import os
+import re
+from dataclasses import asdict
+from datetime import datetime, timedelta
+from typing import Dict, List, Optional, Tuple
+
+import numpy as np
+import requests
+
+from .models import BaselineState
+
+logger = logging.getLogger(__name__)
+
+# 用于解析 PromQL 标签字符串的正则，匹配 key="value" 格式
+_LABEL_PATTERN = re.compile(
+    r'\s*([a-zA-Z_][a-zA-Z0-9_]*)\s*=\s*"((?:\\.|[^"])*)"\s*'
+)
+
+
+# ---------------------------------------------------------------------------
+# 历史数据读取
+# ---------------------------------------------------------------------------
+
+def fetch_history(
+    vm_url: str,
+    query: str,
+    minutes: int,
+    step: str = "1s",
+) -> Tuple[List[float], List[float]]:
+    """
+    从 VictoriaMetrics 拉取指定查询的历史时序数据。
+
+    Args:
+        vm_url: VM HTTP 地址，如 "http://localhost:8428"
+        query: PromQL 查询表达式，如 'feed_rate{device_id="fanuc-cnc"}'
+        minutes: 向前拉取多少分钟的历史数据
+        step: 查询步长，默认 "1s"（每秒一个点）
+
+    Returns:
+        (timestamps, values) 两个列表，长度相同。
+        如果查询失败或无数据，返回两个空列表。
+    """
+    now = datetime.now()
+    start = now - timedelta(minutes=minutes)
+
+    try:
+        resp = requests.get(
+            f"{vm_url}/api/v1/query_range",
+            params={
+                "query": query,
+                "start": start.timestamp(),
+                "end": now.timestamp(),
+                "step": step,
+            },
+            timeout=10,
+        )
+        resp.raise_for_status()
+    except requests.RequestException as e:
+        logger.error("拉取历史数据失败 query=%s: %s", query, e)
+        return [], []
+
+    try:
+        result = resp.json().get("data", {}).get("result", [])
+    except Exception as e:
+        logger.error("解析 VM 响应失败 query=%s: %s", query, e)
+        return [], []
+
+    if not result:
+        return [], []
+
+    ts_list: List[float] = []
+    ys_list: List[float] = []
+
+    for item in result[0].get("values", []):
+        if len(item) < 2:
+            continue
+        try:
+            t = float(item[0])
+            y = float(item[1])
+        except (TypeError, ValueError):
+            continue
+        # 过滤 NaN / Inf，防止后续 numpy 计算出错
+        if math.isfinite(t) and math.isfinite(y):
+            ts_list.append(t)
+            ys_list.append(y)
+
+    return ts_list, ys_list
+
+
+def normalize_history(
+    ts: List[float],
+    ys: List[float],
+) -> Tuple[np.ndarray, np.ndarray]:
+    """
+    将原始时序数据规整化为均匀 1 秒间隔的网格。
+
+    处理步骤：
+    1. 去重（同一秒内取最后一个值）
+    2. 按时间戳排序
+    3. 线性插值填充缺失秒
+
+    Args:
+        ts: 原始时间戳列表（Unix 秒，可以是浮点数）
+        ys: 对应的值列表
+
+    Returns:
+        (ts_grid, ys_grid) 均匀 1 秒间隔的 numpy 数组。
+        如果输入无效，返回两个空数组。
+    """
+    if not ts or not ys or len(ts) != len(ys):
+        return np.array([]), np.array([])
+
+    # 去重：同一秒取最后写入的值
+    data: Dict[int, float] = {}
+    for t, y in zip(ts, ys):
+        try:
+            sec = int(round(float(t)))
+            val = float(y)
+        except (TypeError, ValueError):
+            continue
+        if math.isfinite(sec) and math.isfinite(val):
+            data[sec] = val
+
+    if not data:
+        return np.array([]), np.array([])
+
+    sorted_items = sorted(data.items())
+    ts_clean = np.array([x[0] for x in sorted_items], dtype=float)
+    ys_clean = np.array([x[1] for x in sorted_items], dtype=float)
+
+    if len(ts_clean) < 2:
+        return ts_clean, ys_clean
+
+    start_sec = int(ts_clean[0])
+    end_sec = int(ts_clean[-1])
+
+    if end_sec <= start_sec:
+        return ts_clean, ys_clean
+
+    # 构建均匀网格并插值
+    ts_grid = np.arange(start_sec, end_sec + 1, 1, dtype=float)
+    ys_grid = np.interp(ts_grid, ts_clean, ys_clean)
+
+    return ts_grid, ys_grid
+
+
+# ---------------------------------------------------------------------------
+# 标签工具
+# ---------------------------------------------------------------------------
+
+def prom_escape_label_value(value: str) -> str:
+    """对 Prometheus 标签值进行转义，处理反斜杠、换行符和双引号。"""
+    return (
+        str(value)
+        .replace("\\", "\\\\")
+        .replace("\n", "\\n")
+        .replace('"', '\\"')
+    )
+
+
+def labels_to_str(labels: Dict[str, str]) -> str:
+    """
+    将标签字典序列化为 Prometheus 格式的标签字符串。
+
+    Example:
+        {"device_id": "fanuc-cnc", "source": "protoforge"}
+        → '{device_id="fanuc-cnc",source="protoforge"}'
+    """
+    if not labels:
+        return ""
+    parts = [
+        f'{k}="{prom_escape_label_value(labels[k])}"'
+        for k in sorted(labels)
+    ]
+    return "{" + ",".join(parts) + "}"
+
+
+def parse_labels_from_query(query: str) -> Dict[str, str]:
+    """
+    从 PromQL 查询字符串中提取标签字典。
+
+    Example:
+        'feed_rate{device_id="fanuc-cnc"}' → {"device_id": "fanuc-cnc"}
+    """
+    labels: Dict[str, str] = {}
+
+    if "{" not in query or "}" not in query:
+        return labels
+
+    try:
+        label_part = query[query.index("{") + 1 : query.rindex("}")]
+    except ValueError:
+        return labels
+
+    for match in _LABEL_PATTERN.finditer(label_part):
+        key = match.group(1)
+        value = (
+            match.group(2)
+            .replace('\\"', '"')
+            .replace("\\n", "\n")
+            .replace("\\\\", "\\")
+        )
+        labels[key] = value
+
+    return labels
+
+
+def merge_labels(*dicts: Dict[str, str]) -> Dict[str, str]:
+    """合并多个标签字典，后面的字典覆盖前面的同名键。"""
+    result: Dict[str, str] = {}
+    for d in dicts:
+        if d:
+            result.update(d)
+    return result
+
+
+def series_key(metric_name: str, labels: Dict[str, str]) -> str:
+    """生成唯一的序列标识符，用于 BaselineState 字典的键。"""
+    return metric_name + labels_to_str(labels)
+
+
+# ---------------------------------------------------------------------------
+# 数据写入
+# ---------------------------------------------------------------------------
+
+def write_series(
+    vm_url: str,
+    metric_name: str,
+    labels: Dict[str, str],
+    ts_list: List[int],
+    values: List[float],
+) -> bool:
+    """
+    将一条时序数据写入 VictoriaMetrics（Prometheus remote write 格式）。
+
+    Args:
+        vm_url: VM HTTP 地址
+        metric_name: 指标名
+        labels: 标签字典
+        ts_list: 时间戳列表（Unix 秒）
+        values: 对应的值列表
+
+    Returns:
+        写入成功返回 True，否则返回 False。
+    """
+    if not ts_list or not values or len(ts_list) != len(values):
+        return False
+
+    label_str = labels_to_str(labels)
+    lines: List[str] = []
+
+    for t, y in zip(ts_list, values):
+        try:
+            ts_sec = int(round(float(t)))
+            val = float(y)
+        except (TypeError, ValueError):
+            continue
+        if math.isfinite(ts_sec) and math.isfinite(val):
+            # VM 使用毫秒时间戳
+            lines.append(f"{metric_name}{label_str} {val:.6f} {ts_sec * 1000}")
+
+    if not lines:
+        return False
+
+    payload = "\n".join(lines) + "\n"
+
+    try:
+        resp = requests.post(
+            f"{vm_url}/api/v1/import/prometheus",
+            data=payload.encode("utf-8"),
+            headers={"Content-Type": "text/plain; version=0.0.4; charset=utf-8"},
+            timeout=10,
+        )
+        resp.raise_for_status()
+        return True
+    except requests.RequestException as e:
+        logger.error("写入数据失败 metric=%s: %s", metric_name, e)
+        return False
+
+
+def write_prediction_bundle(
+    vm_url: str,
+    pred_metric: str,
+    anomaly_metric: str,
+    labels: Dict[str, str],
+    ts_future: List[int],
+    pred_values: np.ndarray,
+    lower_values: np.ndarray,
+    upper_values: np.ndarray,
+    is_anomaly: bool,
+    outside_ratio: float,
+    mean_abs_err: float,
+    mean_rel_err: float,
+    max_outside_seconds: int,
+    max_exceed_ratio: float,
+    event_ts: int,
+) -> bool:
+    """
+    一次性写入一个指标的完整预测结果包，包含：
+    - 预测中值曲线（pred_metric）
+    - 预测下界曲线（pred_metric_lower）
+    - 预测上界曲线（pred_metric_upper）
+    - 异常标志（anomaly_metric，0 或 1）
+    - 各项异常诊断指标（outside_ratio、mean_abs_error 等）
+
+    Args:
+        vm_url: VM HTTP 地址
+        pred_metric: 预测指标名，如 "feed_rate_predicted"
+        anomaly_metric: 异常指标名，如 "feed_rate_anomaly"
+        labels: 写入时附加的标签
+        ts_future: 预测时间戳列表（未来时刻，Unix 秒）
+        pred_values: 预测中值数组
+        lower_values: 预测下界数组
+        upper_values: 预测上界数组
+        is_anomaly: 当前是否判定为异常
+        outside_ratio: 检测窗口内越界点比例
+        mean_abs_err: 平均绝对误差
+        mean_rel_err: 平均相对误差
+        max_outside_seconds: 最长连续越界秒数
+        max_exceed_ratio: 最大越界倍数（相对于边界宽度）
+        event_ts: 异常诊断指标的时间戳（通常为最后一个真实数据点的时间戳）
+
+    Returns:
+        所有写入均成功返回 True，任意一个失败返回 False。
+    """
+    # 异常诊断指标附加 type 标签，便于在 Grafana 中过滤
+    anomaly_labels = {**labels, "type": "prediction_deviation"}
+
+    results = [
+        write_series(vm_url, pred_metric, labels,
+                     ts_future, pred_values.tolist()),
+        write_series(vm_url, f"{pred_metric}_lower", labels,
+                     ts_future, lower_values.tolist()),
+        write_series(vm_url, f"{pred_metric}_upper", labels,
+                     ts_future, upper_values.tolist()),
+        write_series(vm_url, anomaly_metric, anomaly_labels,
+                     [event_ts], [1.0 if is_anomaly else 0.0]),
+        write_series(vm_url, f"{anomaly_metric}_outside_ratio", anomaly_labels,
+                     [event_ts], [outside_ratio]),
+        write_series(vm_url, f"{anomaly_metric}_mean_abs_error", anomaly_labels,
+                     [event_ts], [mean_abs_err]),
+        write_series(vm_url, f"{anomaly_metric}_mean_rel_error", anomaly_labels,
+                     [event_ts], [mean_rel_err]),
+        write_series(vm_url, f"{anomaly_metric}_max_consecutive_outside", anomaly_labels,
+                     [event_ts], [float(max_outside_seconds)]),
+        write_series(vm_url, f"{anomaly_metric}_max_exceed_ratio", anomaly_labels,
+                     [event_ts], [float(max_exceed_ratio)]),
+    ]
+
+    return all(results)
+
+
+# ---------------------------------------------------------------------------
+# 状态持久化
+# ---------------------------------------------------------------------------
+
+def load_state(path: str) -> Dict[str, BaselineState]:
+    """
+    从 JSON 文件加载所有指标的基线状态。
+
+    文件不存在时返回空字典（正常首次启动情况）。
+    字段不完整的条目会被跳过，不会导致整体加载失败。
+
+    Args:
+        path: 状态文件路径
+
+    Returns:
+        key → BaselineState 的字典
+    """
+    if not os.path.exists(path):
+        return {}
+
+    try:
+        with open(path, "r", encoding="utf-8") as f:
+            raw = json.load(f)
+    except Exception as e:
+        logger.warning("加载状态文件失败，将重新学习: %s", e)
+        return {}
+
+    required_fields = {
+        "period", "phase_origin_ts", "template", "lower_template",
+        "upper_template", "strategy", "status", "clean_seconds",
+        "last_update_ts", "last_seen_ts", "y_min", "y_max",
+    }
+
+    states: Dict[str, BaselineState] = {}
+    for key, value in raw.get("baseline_states", {}).items():
+        if required_fields.issubset(value.keys()):
+            states[key] = BaselineState(**value)
+
+    logger.info("已加载状态文件 %s，共 %d 条记录", path, len(states))
+    return states
+
+
+def save_state(path: str, states: Dict[str, BaselineState]) -> None:
+    """
+    将所有指标的基线状态原子写入 JSON 文件。
+
+    使用临时文件 + os.replace 保证写入原子性，
+    避免进程崩溃时产生损坏的状态文件。
+
+    Args:
+        path: 状态文件路径
+        states: key → BaselineState 的字典
+    """
+    try:
+        raw = {
+            "baseline_states": {
+                key: asdict(state)
+                for key, state in states.items()
+            }
+        }
+        tmp_path = path + ".tmp"
+        with open(tmp_path, "w", encoding="utf-8") as f:
+            json.dump(raw, f, ensure_ascii=False, indent=2)
+        os.replace(tmp_path, path)
+    except Exception as e:
+        logger.warning("保存状态文件失败: %s", e)
diff --git a/ai/predictor/template.py b/ai/predictor/template.py
new file mode 100644
index 0000000..86d8170
--- /dev/null
+++ b/ai/predictor/template.py
@@ -0,0 +1,384 @@
+# -*- coding: utf-8 -*-
+"""
+predictor.template
+~~~~~~~~~~~~~~~~~~
+模板的构建、预测、重采样与融合，不包含任何 IO 操作。
+
+职责：
+- 从历史谷底片段构建周期模板（中值/分位数）
+- 基于模板和相位原点预测未来值
+- 模板重采样（周期变化时对齐长度）
+- EMA 融合新旧模板（渐进式学习）
+- 相位原点规整化
+
+依赖：numpy, predictor.signal, predictor.config, predictor.models
+"""
+
+import math
+from typing import List, Optional, Tuple
+
+import numpy as np
+
+from . import config
+from .models import BaselineState
+from .signal import moving_average
+
+
+# ---------------------------------------------------------------------------
+# 模板构建
+# ---------------------------------------------------------------------------
+
+def build_templates_from_valleys(
+    ts_grid: np.ndarray,
+    ys_mid_grid: np.ndarray,
+    ys_band_grid: np.ndarray,
+    period: int,
+    valleys: List[int],
+    strategy: str,
+    band_low_q: float,
+    band_high_q: float,
+) -> Optional[Tuple[np.ndarray, np.ndarray, np.ndarray]]:
+    """
+    从历史谷底片段构建三条模板曲线（中值、下界、上界）。
+
+    每个相邻谷底对定义一个周期片段，将其重采样到统一的 period 长度，
+    再按策略聚合：
+    - phase_point：加权平均（越近的周期权重越高）
+    - phase_band：中位数 + 分位数（对异常周期鲁棒）
+
+    Args:
+        ts_grid: 均匀 1 秒网格的时间戳数组
+        ys_mid_grid: 用于构建中值模板的平滑信号（phase_point 用）
+        ys_band_grid: 用于构建分位数模板的原始信号（phase_band 用）
+        period: 目标模板长度（秒）
+        valleys: 谷底索引列表
+        strategy: "phase_point" 或 "phase_band"
+        band_low_q: phase_band 下界分位数（如 5.0）
+        band_high_q: phase_band 上界分位数（如 95.0）
+
+    Returns:
+        (mid_template, lower_template, upper_template) 三个长度为 period 的数组。
+        数据不足时返回 None。
+    """
+    if period <= 1 or len(valleys) < config.MIN_FULL_CYCLES_FOR_TEMPLATE + 1:
+        return None
+
+    # 筛选长度合理的周期片段（0.55~1.60 倍期望周期）
+    pairs = [
+        (a, b, float(ts_grid[b] - ts_grid[a]))
+        for a, b in zip(valleys[:-1], valleys[1:])
+        if period * 0.55 <= float(ts_grid[b] - ts_grid[a]) <= period * 1.60
+    ]
+
+    if len(pairs) < config.MIN_FULL_CYCLES_FOR_TEMPLATE:
+        return None
+
+    # 只取最近 MAX_CYCLES_FOR_TEMPLATE 个周期，防止过旧数据污染
+    pairs = pairs[-config.MAX_CYCLES_FOR_TEMPLATE:]
+
+    phase_grid = np.arange(period, dtype=float)
+    mid_segments: List[np.ndarray] = []
+    band_segments: List[np.ndarray] = []
+    weights: List[float] = []
+
+    for idx, (a, b, cycle_len) in enumerate(pairs):
+        seg_ts = ts_grid[a : b + 1]
+        seg_mid_y = ys_mid_grid[a : b + 1]
+        seg_band_y = ys_band_grid[a : b + 1]
+
+        if len(seg_mid_y) < 3 or len(seg_band_y) < 3:
+            continue
+
+        # 将片段的时间轴归一化到 [0, period)，再插值到统一相位网格
+        x_old = (seg_ts - seg_ts[0]) / cycle_len * period
+        mid_seg = np.interp(phase_grid, x_old, seg_mid_y)
+        band_seg = np.interp(phase_grid, x_old, seg_band_y)
+
+        mid_segments.append(mid_seg.astype(float))
+        band_segments.append(band_seg.astype(float))
+        # 越近的周期权重越高（线性递增，范围 0.5~1.0）
+        weights.append(0.5 + 0.5 * ((idx + 1) / len(pairs)))
+
+    if len(mid_segments) < config.MIN_FULL_CYCLES_FOR_TEMPLATE:
+        return None
+
+    mid_arr = np.vstack(mid_segments)
+    band_arr = np.vstack(band_segments)
+    w_arr = np.array(weights, dtype=float)
+
+    if strategy == "phase_band":
+        # 分位数聚合：对异常周期鲁棒，保留正常波动范围
+        mid_template = np.percentile(mid_arr, 50, axis=0)
+        lower_template = np.percentile(band_arr, band_low_q, axis=0)
+        upper_template = np.percentile(band_arr, band_high_q, axis=0)
+    else:
+        # 加权平均：越近的周期贡献越大
+        mid_template = np.average(mid_arr, axis=0, weights=w_arr)
+        lower_template = mid_template.copy()
+        upper_template = mid_template.copy()
+
+    return (
+        mid_template.astype(float),
+        lower_template.astype(float),
+        upper_template.astype(float),
+    )
+
+
+def build_current_baseline(
+    ts_grid: np.ndarray,
+    ys_mid_grid: np.ndarray,
+    ys_band_grid: np.ndarray,
+    strategy: str,
+    band_low_q: float,
+    band_high_q: float,
+    tail_seconds: Optional[int] = None,
+) -> Optional[Tuple[int, int, np.ndarray, np.ndarray, np.ndarray]]:
+    """
+    从历史数据构建当前基线（周期 + 相位原点 + 三条模板曲线）。
+
+    可选 tail_seconds 参数限制只使用最近一段数据，
+    用于健康状态下的增量模板更新（避免使用过旧的异常数据）。
+
+    Args:
+        ts_grid: 均匀 1 秒网格的时间戳数组
+        ys_mid_grid: 平滑后的信号（用于周期检测和中值模板）
+        ys_band_grid: 原始信号（用于分位数模板）
+        strategy: "phase_point" 或 "phase_band"
+        band_low_q: phase_band 下界分位数
+        band_high_q: phase_band 上界分位数
+        tail_seconds: 若指定，只使用最近 tail_seconds 秒的数据
+
+    Returns:
+        (period, phase_origin_ts, template, lower_template, upper_template)
+        数据不足或无法检测到谷底时返回 None。
+    """
+    from .signal import detect_period_and_valleys
+
+    if len(ys_mid_grid) < config.MIN_POINTS or len(ys_band_grid) < config.MIN_POINTS:
+        return None
+
+    if tail_seconds is not None and tail_seconds > 0:
+        cutoff = ts_grid[-1] - int(tail_seconds)
+        mask = ts_grid >= cutoff
+        ts_use = ts_grid[mask]
+        ys_mid_use = ys_mid_grid[mask]
+        ys_band_use = ys_band_grid[mask]
+    else:
+        ts_use = ts_grid
+        ys_mid_use = ys_mid_grid
+        ys_band_use = ys_band_grid
+
+    if len(ys_mid_use) < config.MIN_POINTS or len(ys_band_use) < config.MIN_POINTS:
+        return None
+
+    period, valleys = detect_period_and_valleys(ts_use, ys_mid_use)
+
+    templates = build_templates_from_valleys(
+        ts_grid=ts_use,
+        ys_mid_grid=ys_mid_use,
+        ys_band_grid=ys_band_use,
+        period=period,
+        valleys=valleys,
+        strategy=strategy,
+        band_low_q=band_low_q,
+        band_high_q=band_high_q,
+    )
+
+    if templates is None or len(valleys) == 0:
+        return None
+
+    template, lower_template, upper_template = templates
+    # 以最后一个谷底作为相位原点
+    phase_origin_ts = int(round(float(ts_use[valleys[-1]])))
+
+    return int(period), phase_origin_ts, template, lower_template, upper_template
+
+
+# ---------------------------------------------------------------------------
+# 模板预测
+# ---------------------------------------------------------------------------
+
+def circular_template_value(template: np.ndarray, phase: float) -> float:
+    """
+    从模板中读取指定相位处的值（线性插值，循环边界）。
+
+    Args:
+        template: 长度为 period 的模板数组
+        phase: 相位（0 到 period 之间的浮点数）
+
+    Returns:
+        插值后的模板值。
+    """
+    period = len(template)
+    if period == 0:
+        return 0.0
+
+    phase = float(phase) % period
+    i0 = int(math.floor(phase)) % period
+    i1 = (i0 + 1) % period
+    frac = phase - math.floor(phase)
+
+    return float((1.0 - frac) * template[i0] + frac * template[i1])
+
+
+def resample_template(old_template: np.ndarray, new_period: int) -> np.ndarray:
+    """
+    将模板重采样到新的周期长度。
+
+    当 phase-lock 检测到周期漂移时，需要将旧模板拉伸/压缩到新周期。
+    使用循环扩展（拼接三份）保证边界处插值正确。
+
+    Args:
+        old_template: 原始模板数组
+        new_period: 目标周期长度（秒）
+
+    Returns:
+        重采样后的模板数组，长度为 new_period。
+    """
+    old_period = len(old_template)
+    if old_period == new_period:
+        return old_template.astype(float)
+
+    if old_period <= 1 or new_period <= 1:
+        return np.full(new_period, float(np.mean(old_template)), dtype=float)
+
+    # 归一化到 [0, 1) 相位空间，循环扩展保证边界插值正确
+    old_x = np.linspace(0.0, 1.0, old_period, endpoint=False)
+    new_x = np.linspace(0.0, 1.0, new_period, endpoint=False)
+
+    old_x_ext = np.concatenate([old_x - 1.0, old_x, old_x + 1.0])
+    old_y_ext = np.concatenate([old_template, old_template, old_template])
+
+    return np.interp(new_x, old_x_ext, old_y_ext).astype(float)
+
+
+def predict_template_values(
+    template: np.ndarray,
+    period: int,
+    phase_origin_ts: int,
+    ts_list: List[int],
+) -> np.ndarray:
+    """
+    根据模板和相位原点，预测一组时间戳处的值。
+
+    相位 = (ts - phase_origin_ts) mod period，
+    再从模板中线性插值读取对应值。
+
+    Args:
+        template: 长度为 period 的模板数组
+        period: 周期（秒）
+        phase_origin_ts: 相位原点时间戳（Unix 秒）
+        ts_list: 待预测的时间戳列表（Unix 秒）
+
+    Returns:
+        预测值数组，长度与 ts_list 相同。
+    """
+    if period <= 1:
+        return np.zeros(len(ts_list), dtype=float)
+
+    if len(template) != period:
+        template = resample_template(template, period)
+
+    values = [
+        circular_template_value(template, (int(ts) - int(phase_origin_ts)) % period)
+        for ts in ts_list
+    ]
+    return np.array(values, dtype=float)
+
+
+def predict_state_bundle(
+    state: BaselineState,
+    ts_list: List[int],
+) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
+    """
+    用 BaselineState 中的三条模板预测一组时间戳处的值。
+
+    Args:
+        state: 当前基线状态（包含 period、phase_origin_ts、三条模板）
+        ts_list: 待预测的时间戳列表（Unix 秒）
+
+    Returns:
+        (mid, lower, upper) 三个预测数组，长度与 ts_list 相同。
+    """
+    period = int(state.period)
+    origin = int(state.phase_origin_ts)
+
+    mid = predict_template_values(
+        template=np.array(state.template, dtype=float),
+        period=period,
+        phase_origin_ts=origin,
+        ts_list=ts_list,
+    )
+    lower = predict_template_values(
+        template=np.array(state.lower_template, dtype=float),
+        period=period,
+        phase_origin_ts=origin,
+        ts_list=ts_list,
+    )
+    upper = predict_template_values(
+        template=np.array(state.upper_template, dtype=float),
+        period=period,
+        phase_origin_ts=origin,
+        ts_list=ts_list,
+    )
+    return mid, lower, upper
+
+
+def normalize_origin_near(origin: int, period: int, near_ts: int) -> int:
+    """
+    将相位原点规整化到 near_ts 附近（使 origin <= near_ts < origin + period）。
+
+    phase-lock 搜索时需要将原点移到最近的时间窗口内，
+    避免因原点过旧导致相位计算溢出。
+
+    Args:
+        origin: 当前相位原点（Unix 秒）
+        period: 周期（秒）
+        near_ts: 目标时间戳（通常为最新数据点的时间戳）
+
+    Returns:
+        规整化后的相位原点（Unix 秒）。
+    """
+    if period <= 1:
+        return origin
+
+    origin = int(origin)
+    period = int(period)
+    near_ts = int(near_ts)
+
+    while origin + period <= near_ts:
+        origin += period
+
+    while origin > near_ts:
+        origin -= period
+
+    return origin
+
+
+def merge_template(
+    old_template: np.ndarray,
+    new_template: np.ndarray,
+    alpha: float,
+) -> np.ndarray:
+    """
+    用 EMA 融合旧模板和新模板。
+
+    merged = (1 - alpha) * old + alpha * new
+
+    若两者长度不同，先将旧模板重采样到新模板长度。
+    alpha 越大，新模板权重越高（学习越激进）。
+
+    Args:
+        old_template: 旧模板数组
+        new_template: 新模板数组
+        alpha: EMA 步长，clip 到 [0, 1]
+
+    Returns:
+        融合后的模板数组，长度与 new_template 相同。
+    """
+    alpha = float(np.clip(alpha, 0.0, 1.0))
+
+    if len(old_template) != len(new_template):
+        old_template = resample_template(old_template, len(new_template))
+
+    return ((1.0 - alpha) * old_template + alpha * new_template).astype(float)
diff --git a/ai/pridict_v5.py b/ai/pridict_v5.py
index 6894a66..dde0b11 100644
--- a/ai/pridict_v5.py
+++ b/ai/pridict_v5.py
@@ -1,27 +1,31 @@
 # -*- coding: utf-8 -*-
 """
-ProtoForge Predictor v12
+ProtoForge Predictor v13
 
 核心能力：
-1. feed_rate / spindle_speed / spindle_current 使用 phase-lock 点预测。
-2. vibration_x / vibration_y / vibration_z 使用 phase-band 预测带。
-3. vibration 类指标：
-   - predicted 使用平滑后的中位数模板，用于趋势参考。
-   - upper/lower 使用原始波动分位数模板 + padding，用于正常波动容忍带。
-   - 偶发越界不直接报警，只有持续越界 / 高比例越界 / 严重越界才报警。
-4. 预测起点锚定最后一个真实点 last_real_ts，避免时间错位。
-5. 异常期间冻结健康模板，不学习故障数据。
-6. 故障恢复后等待稳定，再恢复模板学习。
-7. 写入：
-   - xxx_predicted
-   - xxx_predicted_upper
-   - xxx_predicted_lower
-   - xxx_anomaly
-   - xxx_anomaly_outside_ratio
-   - xxx_anomaly_mean_abs_error
-   - xxx_anomaly_mean_rel_error
-   - xxx_anomaly_max_consecutive_outside
-   - xxx_anomaly_max_exceed_ratio
+1. 支持三个独立 CNC 工位：粗铣(fanuc-cnc)、半精铣(fanuc-cnc-semi-finish)、精铣(fanuc-cnc-finish)
+2. 覆盖指标：feed_rate / spindle_speed / spindle_current / spindle_load
+3. feed_rate / spindle_speed / spindle_current 使用 phase-lock 点预测。
+4. spindle_load 使用 phase_band 预测带（多频漂移容忍）。
+5. vibration_x / vibration_y / vibration_z 使用 phase-band 预测带。
+6. 各工位独立阈值配置，匹配实际量程差异：
+   - 粗铣：spindle_speed~2000RPM, feed_rate~800mm/min, spindle_current~21A, spindle_load~56%
+   - 半精铣：spindle_speed~4000RPM, feed_rate~500mm/min, spindle_current~14.5A, spindle_load~38%
+   - 精铣：spindle_speed~6000RPM, feed_rate~300mm/min, spindle_current~8.5A, spindle_load~22%
+7. 粗铣周期含随机抖动(±10s)，phase-lock 搜索范围扩大至 ±18%。
+8. 预测起点锚定最后一个真实点 last_real_ts，避免时间错位。
+9. 异常期间冻结健康模板，不学习故障数据。
+10. 故障恢复后等待稳定，再恢复模板学习。
+11. 写入：
+    - xxx_predicted
+    - xxx_predicted_upper
+    - xxx_predicted_lower
+    - xxx_anomaly
+    - xxx_anomaly_outside_ratio
+    - xxx_anomaly_mean_abs_error
+    - xxx_anomaly_mean_rel_error
+    - xxx_anomaly_max_consecutive_outside
+    - xxx_anomaly_max_exceed_ratio
 """
 
 import json
@@ -55,7 +59,7 @@
 # =============================================================================
 
 VM_URL = "http://localhost:8428"
-STATE_FILE = "/tmp/protoforge_predictor_state_v12.json"
+STATE_FILE = "/tmp/protoforge_predictor_state_v14.json"
 
 HISTORY_MINUTES = 30
 HORIZON_SECONDS = 120
@@ -86,6 +90,7 @@
 
 MAX_DATA_LAG_SECONDS = 180
 
+# 默认 phase-lock 搜索参数（精铣/半精铣：固定周期，搜索范围窄）
 PHASE_LOCK_MIN_WINDOW_SECONDS = 45
 PHASE_LOCK_MAX_WINDOW_SECONDS = 180
 PHASE_LOCK_PERIOD_SEARCH_RATIO = 0.12
@@ -95,105 +100,260 @@
 
 
 # =============================================================================
-# 指标配置
+# 监控指标白名单（可通过环境变量 PROTOFORGE_MONITORED_METRICS 覆盖）
 # =============================================================================
 
-PREDICT_TARGETS = [
-    {
-        "query": 'feed_rate{device_id="fanuc-cnc"}',
-        "pred_metric": "feed_rate_predicted",
-        "anomaly_metric": "feed_rate_anomaly",
-        "strategy": "phase_point",
-        "abs_threshold": 400.0,
-        "rel_threshold": 0.25,
-        "smooth_window": 1,
-        "outside_ratio_threshold": 0.60,
-        "min_consecutive_outside": 5,
-        "severe_exceed_ratio": 1.8,
-    },
-    {
-        "query": 'spindle_speed{device_id="fanuc-cnc"}',
-        "pred_metric": "spindle_speed_predicted",
-        "anomaly_metric": "spindle_speed_anomaly",
-        "strategy": "phase_point",
-        "abs_threshold": 500.0,
-        "rel_threshold": 0.25,
-        "smooth_window": 1,
-        "outside_ratio_threshold": 0.60,
-        "min_consecutive_outside": 5,
-        "severe_exceed_ratio": 1.8,
-    },
-    {
-        "query": 'spindle_current{device_id="fanuc-cnc"}',
-        "pred_metric": "spindle_current_predicted",
-        "anomaly_metric": "spindle_current_anomaly",
-        "strategy": "phase_point",
-        "abs_threshold": 5.0,
-        "rel_threshold": 0.25,
-        "smooth_window": 1,
+_DEFAULT_MONITORED_METRICS = [
+    "feed_rate",
+    "spindle_speed",
+    "spindle_current",
+    "spindle_load",
+    "vibration_x",
+    "vibration_y",
+    "vibration_z",
+]
+
+MONITORED_METRICS: List[str] = [
+    m.strip()
+    for m in os.environ.get(
+        "PROTOFORGE_MONITORED_METRICS",
+        ",".join(_DEFAULT_MONITORED_METRICS),
+    ).split(",")
+    if m.strip()
+]
+
+# 人工上下限覆盖文件（可选，不存在则忽略）
+# 格式：{"device-id": {"metric_name": {"hard_max": 35.0, "hard_min": 0.0}}}
+OVERRIDE_FILE = os.environ.get(
+    "PROTOFORGE_PREDICTOR_OVERRIDE",
+    "/etc/protoforge/predictor_override.json",
+)
+
+# 目标列表刷新间隔（秒）
+TARGETS_REFRESH_INTERVAL = int(os.environ.get("PROTOFORGE_TARGETS_REFRESH", "60"))
+
+# 运行时目标缓存
+_TARGETS_CACHE: List[Dict] = []
+_TARGETS_LAST_REFRESH: float = 0.0
+
+
+# =============================================================================
+# Layer 1: 设备与指标发现
+# =============================================================================
+
+def discover_device_ids() -> List[str]:
+    """查询 VM 中所有 device_id 标签值。"""
+    try:
+        resp = requests.get(
+            f"{VM_URL}/api/v1/label/device_id/values",
+            timeout=10,
+        )
+        resp.raise_for_status()
+        return [v for v in resp.json().get("data", []) if v]
+    except requests.RequestException as e:
+        logger.error("发现 device_id 失败: %s", e)
+        return []
+
+
+def discover_metrics_for_device(device_id: str) -> List[str]:
+    """查询该设备在 VM 中实际存在且有近期数据的指标名。"""
+    found = []
+    for metric in MONITORED_METRICS:
+        try:
+            resp = requests.get(
+                f"{VM_URL}/api/v1/query",
+                params={"query": f'{metric}{{device_id="{device_id}"}}'},
+                timeout=5,
+            )
+            resp.raise_for_status()
+            if resp.json().get("data", {}).get("result"):
+                found.append(metric)
+        except requests.RequestException:
+            pass
+    return found
+
+
+# =============================================================================
+# Layer 2: 自适应配置推断
+# =============================================================================
+
+@dataclass
+class MetricProfile:
+    """从历史数据统计出的指标特征，驱动策略和阈值的自动推断。"""
+    device_id: str
+    metric: str
+    p5: float           # 活跃段 5th percentile
+    p95: float          # 活跃段 95th percentile
+    iqr: float          # p95 - p5
+    cv: float           # 变异系数 std/mean（衡量稳定性）
+    strategy: str       # "phase_point" 或 "phase_band"
+    abs_threshold: float
+    rel_threshold: float
+    band_low_q: float
+    band_high_q: float
+    band_pad_abs: float
+    phase_lock_period_search_ratio: float
+
+
+def infer_metric_profile(device_id: str, metric: str) -> Optional["MetricProfile"]:
+    """
+    拉取历史数据，统计活跃段特征，自动推断预测策略和阈值。
+
+    空闲段过滤：排除 p10 以下的点，避免机床空闲时的零值拉低阈值。
+    strategy 判断：cv < 0.15 → phase_point（稳定信号），否则 phase_band（波动信号）。
+    phase_lock 搜索范围：由周期长度的变异系数动态决定，周期抖动大则搜索范围宽。
+    """
+    ts_raw, ys_raw = fetch_history(f'{metric}{{device_id="{device_id}"}}')
+    if len(ys_raw) < MIN_POINTS:
+        return None
+
+    arr = np.array(ys_raw, dtype=float)
+
+    # 过滤空闲段：只保留活跃值（高于 p10）
+    p10_val = float(np.percentile(arr, 10))
+    active = arr[arr > p10_val]
+    if len(active) < 30:
+        active = arr  # 数据全是活跃段，不过滤
+
+    mean_val = float(np.mean(active))
+    std_val = float(np.std(active))
+    cv = std_val / max(abs(mean_val), 1e-6)
+    p5 = float(np.percentile(active, 5))
+    p95 = float(np.percentile(active, 95))
+    iqr = p95 - p5
+
+    # 策略自动判断
+    strategy = "phase_point" if cv < 0.15 else "phase_band"
+
+    # 阈值自动计算：取 IQR 的 80%、量程的 5%、2倍标准差 三者最大值
+    abs_threshold = max(iqr * 0.8, (p95 - p5) * 0.05, std_val * 2.0)
+    rel_threshold = min(0.30, cv * 1.5)
+
+    # phase_band 容忍带宽度：IQR 的 30% 或 1 倍标准差，取较大值
+    band_pad_abs = max(iqr * 0.3, std_val)
+
+    # phase-lock 搜索范围：从历史数据估算周期抖动率
+    # 用 FFT 粗估周期，再用自相关精化，最后计算多周期长度的变异系数
+    ts_grid, ys_grid = normalize_history(ts_raw, ys_raw)
+    period_search_ratio = PHASE_LOCK_PERIOD_SEARCH_RATIO  # 默认值
+    if len(ys_grid) >= MIN_POINTS:
+        rough_period = estimate_period_rough(ys_grid)
+        if rough_period > MIN_PERIOD_SECONDS:
+            # 用谷底间距估算周期抖动
+            valleys = find_valley_indices(ts_grid, ys_grid, rough_period)
+            if len(valleys) >= 3:
+                diffs = np.diff(ts_grid[valleys].astype(float))
+                valid = diffs[(diffs > rough_period * 0.5) & (diffs < rough_period * 2.0)]
+                if len(valid) >= 2:
+                    period_cv = float(np.std(valid) / max(np.mean(valid), 1e-6))
+                    period_search_ratio = float(np.clip(period_cv * 2.0, 0.12, 0.25))
+
+    logger.info(
+        "推断指标特征 device=%s metric=%s cv=%.3f strategy=%s abs_thr=%.3f rel_thr=%.3f period_search=%.2f",
+        device_id, metric, cv, strategy, abs_threshold, rel_threshold, period_search_ratio,
+    )
+
+    return MetricProfile(
+        device_id=device_id,
+        metric=metric,
+        p5=p5,
+        p95=p95,
+        iqr=iqr,
+        cv=cv,
+        strategy=strategy,
+        abs_threshold=abs_threshold,
+        rel_threshold=rel_threshold,
+        band_low_q=5.0,
+        band_high_q=95.0,
+        band_pad_abs=band_pad_abs,
+        phase_lock_period_search_ratio=period_search_ratio,
+    )
+
+
+def load_overrides() -> Dict:
+    """加载人工上下限覆盖文件，文件不存在时返回空字典。"""
+    if not os.path.exists(OVERRIDE_FILE):
+        return {}
+    try:
+        with open(OVERRIDE_FILE, "r", encoding="utf-8") as f:
+            return json.load(f)
+    except Exception as e:
+        logger.warning("加载 override 文件失败 %s: %s", OVERRIDE_FILE, e)
+        return {}
+
+
+def build_target(profile: MetricProfile, overrides: Dict) -> Dict:
+    """将 MetricProfile 转换为预测执行层可用的 target dict。"""
+    device_overrides = overrides.get(profile.device_id, {}).get(profile.metric, {})
+
+    target: Dict = {
+        "query": f'{profile.metric}{{device_id="{profile.device_id}"}}',
+        "pred_metric": f"{profile.metric}_predicted",
+        "anomaly_metric": f"{profile.metric}_anomaly",
+        "strategy": profile.strategy,
+        "abs_threshold": profile.abs_threshold,
+        "rel_threshold": profile.rel_threshold,
+        "smooth_window": 5 if profile.strategy == "phase_band" else 2,
         "outside_ratio_threshold": 0.60,
         "min_consecutive_outside": 5,
         "severe_exceed_ratio": 1.8,
-    },
-    {
-        "query": 'vibration_x{device_id="fanuc-cnc"}',
-        "pred_metric": "vibration_x_predicted",
-        "anomaly_metric": "vibration_x_anomaly",
-        "strategy": "phase_band",
-
-        # vibration 类指标噪声、尖峰较多，不建议用很窄的阈值。
-        "abs_threshold": 0.18,
-        "rel_threshold": 0.55,
-
-        # 平滑只用于相位锁定和 predicted 中位趋势。
-        "smooth_window": 5,
-
-        # upper/lower 用原始值分位数，范围放宽，覆盖正常尖峰。
-        "band_low_q": 1,
-        "band_high_q": 99,
-        "band_pad_abs": 0.15,
-
-        # 偶发越界容忍。
-        "outside_ratio_threshold": 0.70,
-        "min_consecutive_outside": 5,
-        "severe_exceed_ratio": 2.0,
-    },
-    {
-        "query": 'vibration_y{device_id="fanuc-cnc"}',
-        "pred_metric": "vibration_y_predicted",
-        "anomaly_metric": "vibration_y_anomaly",
-        "strategy": "phase_band",
-        "abs_threshold": 0.18,
-        "rel_threshold": 0.55,
-        "smooth_window": 5,
-        "band_low_q": 1,
-        "band_high_q": 99,
-        "band_pad_abs": 0.15,
-        "outside_ratio_threshold": 0.70,
-        "min_consecutive_outside": 5,
-        "severe_exceed_ratio": 2.0,
-    },
-    {
-        "query": 'vibration_z{device_id="fanuc-cnc"}',
-        "pred_metric": "vibration_z_predicted",
-        "anomaly_metric": "vibration_z_anomaly",
-        "strategy": "phase_band",
-        "abs_threshold": 0.18,
-        "rel_threshold": 0.55,
-        "smooth_window": 5,
-        "band_low_q": 1,
-        "band_high_q": 99,
-        "band_pad_abs": 0.15,
-        "outside_ratio_threshold": 0.70,
-        "min_consecutive_outside": 5,
-        "severe_exceed_ratio": 2.0,
-    },
-]
+        "phase_lock_period_search_ratio": profile.phase_lock_period_search_ratio,
+        "phase_lock_origin_search_ratio": min(
+            0.45, profile.phase_lock_period_search_ratio * 2.5
+        ),
+        # 物理上下限（可选，来自 override 文件）
+        "hard_max": device_overrides.get("hard_max"),
+        "hard_min": device_overrides.get("hard_min"),
+    }
+
+    if profile.strategy == "phase_band":
+        target.update({
+            "band_low_q": profile.band_low_q,
+            "band_high_q": profile.band_high_q,
+            "band_pad_abs": profile.band_pad_abs,
+        })
+
+    return target
+
+
+def refresh_targets_if_needed() -> None:
+    """
+    按 TARGETS_REFRESH_INTERVAL 间隔重新发现设备和指标，动态更新目标列表。
+    首次调用时立即执行发现。
+    """
+    global _TARGETS_CACHE, _TARGETS_LAST_REFRESH
+
+    now = time.time()
+    if now - _TARGETS_LAST_REFRESH < TARGETS_REFRESH_INTERVAL and _TARGETS_CACHE:
+        return
+
+    logger.info("开始发现设备和指标...")
+    overrides = load_overrides()
+    targets: List[Dict] = []
+
+    device_ids = discover_device_ids()
+    if not device_ids:
+        logger.warning("未发现任何 device_id，保持现有目标列表")
+        return
+
+    for device_id in device_ids:
+        metrics = discover_metrics_for_device(device_id)
+        for metric in metrics:
+            profile = infer_metric_profile(device_id, metric)
+            if profile is not None:
+                targets.append(build_target(profile, overrides))
+
+    if targets:
+        _TARGETS_CACHE = targets
+        _TARGETS_LAST_REFRESH = now
+        logger.info(
+            "目标列表已更新：%d 台设备，%d 个指标目标",
+            len(device_ids),
+            len(targets),
+        )
+    else:
+        logger.warning("发现流程未产生任何有效目标，保持现有目标列表")
 
-EXTRA_PREDICT_LABELS = {
-    "forecast": "phase_band_health_v12",
-    "source": "protoforge",
-}
 
 BASELINE_STATUS_HEALTHY = "healthy"
 BASELINE_STATUS_ANOMALY = "anomaly"
@@ -612,8 +772,6 @@ def build_templates_from_valleys(
 
     if strategy == "phase_band":
         mid_template = np.percentile(mid_arr, 50, axis=0)
-
-        # upper/lower 使用原始值分布，而不是平滑值分布。
         lower_template = np.percentile(band_arr, low_q, axis=0)
         upper_template = np.percentile(band_arr, high_q, axis=0)
     else:
@@ -794,17 +952,28 @@ def merge_template(
 
 # =============================================================================
 # Phase Lock
+# 支持 target 级别的 phase_lock_period_search_ratio / phase_lock_origin_search_ratio
+# 粗铣工位周期含随机抖动(±10s)，需要更宽的搜索范围
 # =============================================================================
 
 def phase_lock_recent(
     state: BaselineState,
     ts_grid: np.ndarray,
     ys_model: np.ndarray,
+    target: Optional[Dict] = None,
 ) -> Tuple[int, int, np.ndarray, float]:
     base_period = int(state.period)
     base_origin = int(state.phase_origin_ts)
     base_template = np.array(state.template, dtype=float)
 
+    # 从 target 读取搜索范围，允许粗铣工位使用更宽的范围
+    period_search_ratio = float(
+        (target or {}).get("phase_lock_period_search_ratio", PHASE_LOCK_PERIOD_SEARCH_RATIO)
+    )
+    origin_search_ratio = float(
+        (target or {}).get("phase_lock_origin_search_ratio", PHASE_LOCK_ORIGIN_SEARCH_RATIO)
+    )
+
     if base_period <= 1 or len(base_template) <= 1:
         ts_recent = ts_grid[-DETECT_WINDOW_SECONDS:].astype(int).tolist()
         pred = predict_template_values(base_template, base_period, base_origin, ts_recent)
@@ -832,11 +1001,11 @@ def phase_lock_recent(
 
     p_min = max(
         int(MIN_PERIOD_SECONDS),
-        int(round(base_period * (1.0 - PHASE_LOCK_PERIOD_SEARCH_RATIO))),
+        int(round(base_period * (1.0 - period_search_ratio))),
     )
     p_max = min(
         int(MAX_PERIOD_SECONDS),
-        int(round(base_period * (1.0 + PHASE_LOCK_PERIOD_SEARCH_RATIO))),
+        int(round(base_period * (1.0 + period_search_ratio))),
     )
 
     best_period = base_period
@@ -855,7 +1024,7 @@ def phase_lock_recent(
     for period in range(p_min, p_max + 1, PHASE_LOCK_PERIOD_STEP):
         template = resample_template(base_template, period)
         center_origin = normalize_origin_near(base_origin, period, last_ts)
-        origin_shift = max(2, int(round(period * PHASE_LOCK_ORIGIN_SEARCH_RATIO)))
+        origin_shift = max(2, int(round(period * origin_search_ratio)))
 
         for shift in range(-origin_shift, origin_shift + 1, PHASE_LOCK_ORIGIN_STEP):
             origin = center_origin + shift
@@ -925,7 +1094,6 @@ def calc_final_bounds(
     if strategy == "phase_band":
         pad_abs = float(target.get("band_pad_abs", abs_threshold))
 
-        # 对 vibration 类指标：边界更像正常波动容忍带，不是硬边界。
         dynamic_pad = np.maximum(
             pad_abs,
             np.abs(pred) * rel_threshold * 0.25,
@@ -933,10 +1101,18 @@ def calc_final_bounds(
 
         lower = lower_raw - dynamic_pad
         upper = upper_raw + dynamic_pad
+    else:
+        lower, upper = calc_point_bounds(pred, abs_threshold, rel_threshold)
 
-        return lower, upper
+    # 物理上下限兜底（来自 override 文件，可选）
+    hard_max = target.get("hard_max")
+    hard_min = target.get("hard_min")
+    if hard_max is not None:
+        upper = np.minimum(upper, float(hard_max))
+    if hard_min is not None:
+        lower = np.maximum(lower, float(hard_min))
 
-    return calc_point_bounds(pred, abs_threshold, rel_threshold)
+    return lower, upper
 
 
 def detect_anomaly(
@@ -950,6 +1126,7 @@ def detect_anomaly(
         state=state,
         ts_grid=ts_grid,
         ys_model=ys_model,
+        target=target,
     )
 
     recent_len = len(pred_recent)
@@ -1018,11 +1195,6 @@ def detect_anomaly(
         target.get("severe_exceed_ratio", SEVERE_EXCEED_RATIO)
     )
 
-    # 核心优化：
-    # 1. 偶发 1~3 个点越界不报警。
-    # 2. 持续越界才报警。
-    # 3. 高比例越界才报警。
-    # 4. 严重越界才立即报警。
     is_anomaly = (
         outside_ratio >= outside_ratio_threshold
         or max_outside_seconds >= min_consecutive_outside
@@ -1653,7 +1825,13 @@ def build_prediction_timestamps(
 def run_once() -> None:
     now_str = datetime.now().strftime("%H:%M:%S")
 
-    for target in PREDICT_TARGETS:
+    refresh_targets_if_needed()
+
+    if not _TARGETS_CACHE:
+        logger.warning("[%s] 目标列表为空，等待设备发现完成", now_str)
+        return
+
+    for target in _TARGETS_CACHE:
         query = target["query"]
         pred_metric = target["pred_metric"]
         anomaly_metric = target["anomaly_metric"]
@@ -1749,7 +1927,7 @@ def run_once() -> None:
         origin_str = datetime.fromtimestamp(state.phase_origin_ts).strftime("%H:%M:%S")
 
         logger.info(
-            "[%s] %-40s → %-35s strategy=%s status=%s anomaly=%s outside=%.2f max_outside=%ss max_exceed=%.2f period=%ss origin=%s last_real=%s lag=%ss 写入 %d 点，预测区间 %s ~ %s",
+            "[%s] %-50s → %-35s strategy=%s status=%s anomaly=%s outside=%.2f max_outside=%ss max_exceed=%.2f period=%ss origin=%s last_real=%s lag=%ss 写入 %d 点，预测区间 %s ~ %s",
             now_str,
             query,
             pred_metric,
@@ -1775,7 +1953,7 @@ def main() -> None:
     load_state()
 
     logger.info(
-        "预测服务启动 VM=%s 历史窗口=%dmin 理论预测窗口=%ds 实际写入窗口=%ds 轮询间隔=%ds state=%s forecast=%s",
+        "预测服务启动 VM=%s 历史窗口=%dmin 理论预测窗口=%ds 实际写入窗口=%ds 轮询间隔=%ds state=%s forecast=%s override=%s refresh=%ds",
         VM_URL,
         HISTORY_MINUTES,
         HORIZON_SECONDS,
@@ -1783,6 +1961,8 @@ def main() -> None:
         POLL_INTERVAL,
         STATE_FILE,
         EXTRA_PREDICT_LABELS["forecast"],
+        OVERRIDE_FILE,
+        TARGETS_REFRESH_INTERVAL,
     )
 
     while True:
@@ -1791,4 +1971,4 @@ def main() -> None:
 
 
 if __name__ == "__main__":
-    main()
\ No newline at end of file
+    main()
diff --git a/protoforge/core/fault.py b/protoforge/core/fault.py
index 11b61a7..2182bf1 100644
--- a/protoforge/core/fault.py
+++ b/protoforge/core/fault.py
@@ -32,24 +32,77 @@
 BUILTIN_FAULT_TYPES: list[FaultTypeDefinition] = [
 
     # ------------------------------------------------------------------
-    # 进给堵转 — 工件夹紧松动或切削量过大导致进给卡死
-    # 特征：进给速率瞬间降为0，主轴负载和电流急剧升高，主轴仍在转（区别于崩刃）
-    # 模式：瞬间注入
+    # 进给堵转（粗铣）— fanuc-cnc
+    # 量程：spindle_speed~2000RPM, feed_rate~800mm/min,
+    #        spindle_current~21A, spindle_load~56%
+    # 堵转目标：load→92%, current→38A，转速维持+轻微抖动
     # ------------------------------------------------------------------
     FaultTypeDefinition(
-        id="feed_stall",
-        name="进给堵转",
-        description="进给轴卡死，进给速率降为零，主轴负载和电流急剧升高，主轴转速维持（区别于崩刃停主轴）",
+        id="feed_stall_rough",
+        name="进给堵转（粗铣）",
+        description="粗铣进给轴卡死，进给速率降为零，主轴负载升至~92%，电流升至~38A，主轴转速维持（区别于崩刃停主轴）",
         category="process",
         default_duration=20.0,
-        tags=["进给", "堵转", "突发"],
+        tags=["进给", "堵转", "突发", "粗铣"],
         point_faults=[
             PointFaultConfig(point="feed_rate", mode=FaultMode.INSTANT,
                              target_value=0.0, noise_scale=0.0),
             PointFaultConfig(point="spindle_load", mode=FaultMode.INSTANT,
-                             multiplier=2.8, noise_scale=5.0),
+                             target_value=92.0, noise_scale=4.0),
             PointFaultConfig(point="spindle_current", mode=FaultMode.INSTANT,
-                             multiplier=3.8, noise_scale=1.5),
+                             target_value=38.0, noise_scale=1.5),
+            PointFaultConfig(point="spindle_speed", mode=FaultMode.INSTANT,
+                             multiplier=1.0, noise_scale=30.0),
+        ],
+    ),
+
+    # ------------------------------------------------------------------
+    # 进给堵转（半精铣）— fanuc-cnc-semi-finish
+    # 量程：spindle_speed~4000RPM, feed_rate~500mm/min,
+    #        spindle_current~14.5A, spindle_load~38%
+    # 堵转目标：load→68%, current→26A，转速维持+轻微抖动
+    # ------------------------------------------------------------------
+    FaultTypeDefinition(
+        id="feed_stall_semi",
+        name="进给堵转（半精铣）",
+        description="半精铣进给轴卡死，进给速率降为零，主轴负载升至~68%，电流升至~26A，主轴转速维持（区别于崩刃停主轴）",
+        category="process",
+        default_duration=20.0,
+        tags=["进给", "堵转", "突发", "半精铣"],
+        point_faults=[
+            PointFaultConfig(point="feed_rate", mode=FaultMode.INSTANT,
+                             target_value=0.0, noise_scale=0.0),
+            PointFaultConfig(point="spindle_load", mode=FaultMode.INSTANT,
+                             target_value=68.0, noise_scale=3.0),
+            PointFaultConfig(point="spindle_current", mode=FaultMode.INSTANT,
+                             target_value=26.0, noise_scale=1.2),
+            PointFaultConfig(point="spindle_speed", mode=FaultMode.INSTANT,
+                             multiplier=1.0, noise_scale=50.0),
+        ],
+    ),
+
+    # ------------------------------------------------------------------
+    # 进给堵转（精铣）— fanuc-cnc-finish
+    # 量程：spindle_speed~6000RPM, feed_rate~300mm/min,
+    #        spindle_current~8.5A, spindle_load~22%
+    # 堵转目标：load→40%, current→15A，转速维持+轻微抖动
+    # ------------------------------------------------------------------
+    FaultTypeDefinition(
+        id="feed_stall_finish",
+        name="进给堵转（精铣）",
+        description="精铣进给轴卡死，进给速率降为零，主轴负载升至~40%，电流升至~15A，主轴转速维持（区别于崩刃停主轴）",
+        category="process",
+        default_duration=20.0,
+        tags=["进给", "堵转", "突发", "精铣"],
+        point_faults=[
+            PointFaultConfig(point="feed_rate", mode=FaultMode.INSTANT,
+                             target_value=0.0, noise_scale=0.0),
+            PointFaultConfig(point="spindle_load", mode=FaultMode.INSTANT,
+                             target_value=40.0, noise_scale=2.0),
+            PointFaultConfig(point="spindle_current", mode=FaultMode.INSTANT,
+                             target_value=15.0, noise_scale=0.8),
+            PointFaultConfig(point="spindle_speed", mode=FaultMode.INSTANT,
+                             multiplier=1.0, noise_scale=80.0),
         ],
     ),
 

From 07fc5d6897f2ab7b228fa16d748cd9ad1c74abb9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E5=BC=A0=E5=B0=91=E6=99=BA?=
 <zhangshaozhi@bailongma-inc.com>
Date: Fri, 29 May 2026 10:09:13 +0800
Subject: [PATCH 36/55] fix(fault): fault fix

---
 FAULT_INJECTION.md         |  47 ++++++++++---
 protoforge/core/fault.py   | 133 +++++++++++++++++++++++++++----------
 protoforge/models/fault.py |   8 ++-
 3 files changed, 142 insertions(+), 46 deletions(-)

diff --git a/FAULT_INJECTION.md b/FAULT_INJECTION.md
index 951648d..22746bf 100644
--- a/FAULT_INJECTION.md
+++ b/FAULT_INJECTION.md
@@ -160,19 +160,48 @@ DELETE /api/v1/devices/{device_id}/fault
 
 ---
 
-### spindle_overheat — 主轴过热
+### spindle_overheat_rough — 主轴过热（粗铣）
 
 - **分类**：thermal
-- **模式**：渐进式
+- **模式**：渐进式（绝对目标值）
 - **默认持续时间**：240 秒
-- **真实场景**：长时间高负荷或冷却系统故障，热保护机制逐渐降低转速
+- **真实场景**：粗铣主轴长时间高负荷或冷却不足，负载/电流持续高位，热保护渐进降速
 
-| 测点 | 变化方向 | 峰值倍率 |
-|------|---------|---------|
-| `spindle_current` | 升高 | ×1.8 |
-| `spindle_speed` | 降低 | ×0.6 |
-| `vibration_x` | 升高 | ×1.5 |
-| `vibration_z` | 升高 | ×1.5 |
+| 测点 | 变化方向 | 目标值 |
+|------|---------|--------|
+| `spindle_load` | 持续升高 | →85% |
+| `spindle_current` | 持续升高 | →34A |
+| `spindle_speed` | 渐进降低 | →1400 RPM |
+
+---
+
+### spindle_overheat_semi — 主轴过热（半精铣）
+
+- **分类**：thermal
+- **模式**：渐进式（绝对目标值）
+- **默认持续时间**：240 秒
+- **真实场景**：半精铣主轴长时间高负荷或冷却不足，负载/电流持续高位，热保护渐进降速
+
+| 测点 | 变化方向 | 目标值 |
+|------|---------|--------|
+| `spindle_load` | 持续升高 | →72% |
+| `spindle_current` | 持续升高 | →24A |
+| `spindle_speed` | 渐进降低 | →2600 RPM |
+
+---
+
+### spindle_overheat_finish — 主轴过热（精铣）
+
+- **分类**：thermal
+- **模式**：渐进式（绝对目标值）
+- **默认持续时间**：240 秒
+- **真实场景**：精铣主轴长时间高负荷或冷却不足，负载/电流持续高位，热保护渐进降速
+
+| 测点 | 变化方向 | 目标值 |
+|------|---------|--------|
+| `spindle_load` | 持续升高 | →48% |
+| `spindle_current` | 持续升高 | →15A |
+| `spindle_speed` | 渐进降低 | →3800 RPM |
 
 ---
 
diff --git a/protoforge/core/fault.py b/protoforge/core/fault.py
index 2182bf1..857bf90 100644
--- a/protoforge/core/fault.py
+++ b/protoforge/core/fault.py
@@ -35,12 +35,12 @@
     # 进给堵转（粗铣）— fanuc-cnc
     # 量程：spindle_speed~2000RPM, feed_rate~800mm/min,
     #        spindle_current~21A, spindle_load~56%
-    # 堵转目标：load→92%, current→38A，转速维持+轻微抖动
+    # 堵转目标：load→85~100%, current→34~42A，转速维持+轻微抖动
     # ------------------------------------------------------------------
     FaultTypeDefinition(
         id="feed_stall_rough",
         name="进给堵转（粗铣）",
-        description="粗铣进给轴卡死，进给速率降为零，主轴负载升至~92%，电流升至~38A，主轴转速维持（区别于崩刃停主轴）",
+        description="粗铣进给轴卡死，进给速率降为零，主轴负载升至85~100%，电流升至34~42A，主轴转速维持（区别于崩刃停主轴）",
         category="process",
         default_duration=20.0,
         tags=["进给", "堵转", "突发", "粗铣"],
@@ -48,9 +48,9 @@
             PointFaultConfig(point="feed_rate", mode=FaultMode.INSTANT,
                              target_value=0.0, noise_scale=0.0),
             PointFaultConfig(point="spindle_load", mode=FaultMode.INSTANT,
-                             target_value=92.0, noise_scale=4.0),
+                             target_min=85.0, target_max=100.0, noise_scale=4.0),
             PointFaultConfig(point="spindle_current", mode=FaultMode.INSTANT,
-                             target_value=38.0, noise_scale=1.5),
+                             target_min=34.0, target_max=42.0, noise_scale=1.5),
             PointFaultConfig(point="spindle_speed", mode=FaultMode.INSTANT,
                              multiplier=1.0, noise_scale=30.0),
         ],
@@ -60,12 +60,12 @@
     # 进给堵转（半精铣）— fanuc-cnc-semi-finish
     # 量程：spindle_speed~4000RPM, feed_rate~500mm/min,
     #        spindle_current~14.5A, spindle_load~38%
-    # 堵转目标：load→68%, current→26A，转速维持+轻微抖动
+    # 堵转目标：load→62~75%, current→23~29A，转速维持+轻微抖动
     # ------------------------------------------------------------------
     FaultTypeDefinition(
         id="feed_stall_semi",
         name="进给堵转（半精铣）",
-        description="半精铣进给轴卡死，进给速率降为零，主轴负载升至~68%，电流升至~26A，主轴转速维持（区别于崩刃停主轴）",
+        description="半精铣进给轴卡死，进给速率降为零，主轴负载升至62~75%，电流升至23~29A，主轴转速维持（区别于崩刃停主轴）",
         category="process",
         default_duration=20.0,
         tags=["进给", "堵转", "突发", "半精铣"],
@@ -73,9 +73,9 @@
             PointFaultConfig(point="feed_rate", mode=FaultMode.INSTANT,
                              target_value=0.0, noise_scale=0.0),
             PointFaultConfig(point="spindle_load", mode=FaultMode.INSTANT,
-                             target_value=68.0, noise_scale=3.0),
+                             target_min=62.0, target_max=75.0, noise_scale=3.0),
             PointFaultConfig(point="spindle_current", mode=FaultMode.INSTANT,
-                             target_value=26.0, noise_scale=1.2),
+                             target_min=23.0, target_max=29.0, noise_scale=1.2),
             PointFaultConfig(point="spindle_speed", mode=FaultMode.INSTANT,
                              multiplier=1.0, noise_scale=50.0),
         ],
@@ -85,12 +85,12 @@
     # 进给堵转（精铣）— fanuc-cnc-finish
     # 量程：spindle_speed~6000RPM, feed_rate~300mm/min,
     #        spindle_current~8.5A, spindle_load~22%
-    # 堵转目标：load→40%, current→15A，转速维持+轻微抖动
+    # 堵转目标：load→36~45%, current→13~17A，转速维持+轻微抖动
     # ------------------------------------------------------------------
     FaultTypeDefinition(
         id="feed_stall_finish",
         name="进给堵转（精铣）",
-        description="精铣进给轴卡死，进给速率降为零，主轴负载升至~40%，电流升至~15A，主轴转速维持（区别于崩刃停主轴）",
+        description="精铣进给轴卡死，进给速率降为零，主轴负载升至36~45%，电流升至13~17A，主轴转速维持（区别于崩刃停主轴）",
         category="process",
         default_duration=20.0,
         tags=["进给", "堵转", "突发", "精铣"],
@@ -98,33 +98,79 @@
             PointFaultConfig(point="feed_rate", mode=FaultMode.INSTANT,
                              target_value=0.0, noise_scale=0.0),
             PointFaultConfig(point="spindle_load", mode=FaultMode.INSTANT,
-                             target_value=40.0, noise_scale=2.0),
+                             target_min=36.0, target_max=45.0, noise_scale=2.0),
             PointFaultConfig(point="spindle_current", mode=FaultMode.INSTANT,
-                             target_value=15.0, noise_scale=0.8),
+                             target_min=13.0, target_max=17.0, noise_scale=0.8),
             PointFaultConfig(point="spindle_speed", mode=FaultMode.INSTANT,
                              multiplier=1.0, noise_scale=80.0),
         ],
     ),
 
     # ------------------------------------------------------------------
-    # 主轴过热 — 长时间高负荷或冷却系统故障
-    # 特征：主轴负载和电流持续偏高，转速因热保护逐渐降低
-    # 模式：渐进式，持续时间较长
+    # 主轴过热（粗铣）— fanuc-cnc
+    # 基线：spindle_speed~2000RPM, spindle_current~21A, spindle_load~56%
+    # 过热目标范围：load 78~92%，current 30~38A，转速降至 1200~1600RPM
+    # 范围模拟不同冷却状态、负荷历史、环境温度下的个体差异
+    # 模式：渐进式；全部用 target_min/max，避免 multiplier 在空载基线=0 时失效
     # ------------------------------------------------------------------
     FaultTypeDefinition(
-        id="spindle_overheat",
-        name="主轴过热",
-        description="主轴长时间高负荷运转或冷却不足，spindle_load和spindle_current持续偏高，转速因热保护渐进下降",
+        id="spindle_overheat_rough",
+        name="主轴过热（粗铣）",
+        description="粗铣主轴长时间高负荷或冷却不足，spindle_load渐进升至78~92%，spindle_current升至30~38A，转速因热保护渐进降至1200~1600RPM",
         category="thermal",
         default_duration=240.0,
-        tags=["主轴", "过热", "渐进"],
+        tags=["主轴", "过热", "渐进", "粗铣"],
         point_faults=[
             PointFaultConfig(point="spindle_load", mode=FaultMode.GRADUAL,
-                             multiplier=1.6, noise_scale=3.0),
+                             target_min=78.0, target_max=92.0, noise_scale=3.5),
             PointFaultConfig(point="spindle_current", mode=FaultMode.GRADUAL,
-                             multiplier=1.8, noise_scale=1.2),
+                             target_min=30.0, target_max=38.0, noise_scale=1.5),
             PointFaultConfig(point="spindle_speed", mode=FaultMode.GRADUAL,
-                             multiplier=0.6, noise_scale=50.0),
+                             target_min=1200, target_max=1600, noise_scale=40.0),
+        ],
+    ),
+
+    # ------------------------------------------------------------------
+    # 主轴过热（半精铣）— fanuc-cnc-semi-finish
+    # 基线：spindle_speed~4000RPM, spindle_current~14.5A, spindle_load~38%
+    # 过热目标范围：load 65~78%，current 21~27A，转速降至 2400~2900RPM
+    # ------------------------------------------------------------------
+    FaultTypeDefinition(
+        id="spindle_overheat_semi",
+        name="主轴过热（半精铣）",
+        description="半精铣主轴长时间高负荷或冷却不足，spindle_load渐进升至65~78%，spindle_current升至21~27A，转速因热保护渐进降至2400~2900RPM",
+        category="thermal",
+        default_duration=240.0,
+        tags=["主轴", "过热", "渐进", "半精铣"],
+        point_faults=[
+            PointFaultConfig(point="spindle_load", mode=FaultMode.GRADUAL,
+                             target_min=65.0, target_max=78.0, noise_scale=3.0),
+            PointFaultConfig(point="spindle_current", mode=FaultMode.GRADUAL,
+                             target_min=21.0, target_max=27.0, noise_scale=1.2),
+            PointFaultConfig(point="spindle_speed", mode=FaultMode.GRADUAL,
+                             target_min=2400, target_max=2900, noise_scale=50.0),
+        ],
+    ),
+
+    # ------------------------------------------------------------------
+    # 主轴过热（精铣）— fanuc-cnc-finish
+    # 基线：spindle_speed~6000RPM, spindle_current~8.5A, spindle_load~22%
+    # 过热目标范围：load 42~55%，current 13~17A，转速降至 3600~4200RPM
+    # ------------------------------------------------------------------
+    FaultTypeDefinition(
+        id="spindle_overheat_finish",
+        name="主轴过热（精铣）",
+        description="精铣主轴长时间高负荷或冷却不足，spindle_load渐进升至42~55%，spindle_current升至13~17A，转速因热保护渐进降至3600~4200RPM",
+        category="thermal",
+        default_duration=240.0,
+        tags=["主轴", "过热", "渐进", "精铣"],
+        point_faults=[
+            PointFaultConfig(point="spindle_load", mode=FaultMode.GRADUAL,
+                             target_min=42.0, target_max=55.0, noise_scale=2.0),
+            PointFaultConfig(point="spindle_current", mode=FaultMode.GRADUAL,
+                             target_min=13.0, target_max=17.0, noise_scale=0.8),
+            PointFaultConfig(point="spindle_speed", mode=FaultMode.GRADUAL,
+                             target_min=3600, target_max=4200, noise_scale=60.0),
         ],
     ),
 
@@ -230,15 +276,15 @@
     FaultTypeDefinition(
         id="air_cutting",
         name="空切检测",
-        description="刀具未接触工件，spindle_load跌至空载区间(5-15%)，spindle_current降至空转水平，转速进给保持正常",
+        description="刀具未接触工件，spindle_load跌至空载区间(4-12%)，spindle_current降至空转水平，转速进给保持正常",
         category="tool",
         default_duration=180.0,
         tags=["刀具", "空切", "工况切换", "负载"],
         point_faults=[
             PointFaultConfig(point="spindle_load", mode=FaultMode.INSTANT,
-                             target_value=8.0, noise_scale=2.0),
+                             target_min=4.0, target_max=12.0, noise_scale=2.0),
             PointFaultConfig(point="spindle_current", mode=FaultMode.INSTANT,
-                             target_value=2.5, noise_scale=0.3),
+                             target_min=2.0, target_max=3.5, noise_scale=0.3),
         ],
     ),
 
@@ -356,6 +402,13 @@ def inject(self, device: Any, request: FaultInjectRequest) -> FaultInfo:
                 except (TypeError, ValueError):
                     baseline[pf.point] = 0.0
 
+        # 对有范围定义的测点，注入时随机采样一个实际目标值
+        # 使每次注入的故障严重程度有所不同，模拟真实场景的个体差异
+        resolved_targets: dict[str, float] = {}
+        for pf in fault_type.point_faults:
+            if pf.target_min is not None and pf.target_max is not None:
+                resolved_targets[pf.point] = random.uniform(pf.target_min, pf.target_max)
+
         fault = ActiveFault(
             fault_id=uuid.uuid4().hex[:12],
             device_id=device.id,
@@ -365,10 +418,11 @@ def inject(self, device: Any, request: FaultInjectRequest) -> FaultInfo:
             duration=duration,
             started_at=time.time(),
             baseline_values=baseline,
+            resolved_targets=resolved_targets,
         )
         self._active[device.id] = fault
-        logger.info("Fault injected: device=%s type=%s duration=%.0fs",
-                    device.id, fault_type.id, duration)
+        logger.info("Fault injected: device=%s type=%s duration=%.0fs resolved_targets=%s",
+                    device.id, fault_type.id, duration, resolved_targets)
         return self._to_info(fault, fault_type)
 
     def apply(self, device: Any) -> None:
@@ -400,13 +454,14 @@ def apply(self, device: Any) -> None:
             baseline = fault.baseline_values.get(pf.point, 0.0)
             if baseline == 0.0:
                 # 基线为0说明注入时设备处于换刀/停机状态
-                # target_value 模式可以直接执行（如崩刃归零、空切归空载）
+                # target_value / resolved_targets 模式可以直接执行
                 # multiplier 模式跳过，避免在零基线上产生无意义的值
-                if pf.target_value is None:
+                if pf.target_value is None and pf.point not in fault.resolved_targets:
                     continue
 
+            resolved_target = fault.resolved_targets.get(pf.point)
             device._point_values[pf.point] = self._compute_value(
-                pf, baseline, progress, fault.intensity
+                pf, baseline, progress, fault.intensity, resolved_target
             )
 
     def clear(self, device_id: str) -> bool:
@@ -451,20 +506,26 @@ def _compute_value(
         baseline: float,
         progress: float,
         intensity: float,
+        resolved_target: Optional[float] = None,
     ) -> float:
-        """根据故障配置和当前进度计算覆盖值"""
+        """根据故障配置和当前进度计算覆盖值。
+
+        目标值优先级：resolved_target（注入时随机采样）> target_value（固定值）> multiplier
+        """
+        # 确定本次注入的实际目标值
+        effective_target: Optional[float] = resolved_target if resolved_target is not None else pf.target_value
+
         if pf.mode == FaultMode.INSTANT:
-            # 瞬间模式：直接用目标值，不随时间变化
-            if pf.target_value is not None:
-                target = pf.target_value
+            if effective_target is not None:
+                target = effective_target
             elif pf.multiplier is not None:
                 target = baseline * (1.0 + (pf.multiplier - 1.0) * intensity)
             else:
                 target = baseline
         else:
             # 渐进模式：随 progress 线性劣化
-            if pf.target_value is not None:
-                target = baseline + (pf.target_value - baseline) * progress * intensity
+            if effective_target is not None:
+                target = baseline + (effective_target - baseline) * progress * intensity
             elif pf.multiplier is not None:
                 target = baseline * (1.0 + (pf.multiplier - 1.0) * progress * intensity)
             else:
diff --git a/protoforge/models/fault.py b/protoforge/models/fault.py
index cc038e0..9928332 100644
--- a/protoforge/models/fault.py
+++ b/protoforge/models/fault.py
@@ -25,6 +25,11 @@ class PointFaultConfig(BaseModel):
     target_value: Optional[float] = None
     multiplier: Optional[float] = None     # 异常值 = 当前正常值 × multiplier
 
+    # 目标值范围：注入时在 [target_min, target_max] 内随机采样一个实际目标值
+    # 设置后会覆盖 target_value，使每次注入的故障严重程度有所不同
+    target_min: Optional[float] = None
+    target_max: Optional[float] = None
+
     # GRADUAL 模式：从当前值线性劣化到 target_value 或 multiplier 倍
     # 劣化程度 = progress(0~1) × (target - baseline)
     noise_scale: float = 0.0               # 叠加随机噪声幅度，模拟真实抖动
@@ -59,7 +64,8 @@ class ActiveFault(BaseModel):
     duration: float = 120.0
     started_at: float = 0.0
     cleared_at: Optional[float] = None
-    baseline_values: dict[str, float] = Field(default_factory=dict)  # 注入时的正常基线值
+    baseline_values: dict[str, float] = Field(default_factory=dict)   # 注入时的正常基线值
+    resolved_targets: dict[str, float] = Field(default_factory=dict)  # 注入时随机采样的实际目标值
 
 
 class FaultInfo(BaseModel):

From 5a91ce18b3dadda432d8b651149f83e3d73c5239 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E5=BC=A0=E5=B0=91=E6=99=BA?=
 <zhangshaozhi@bailongma-inc.com>
Date: Wed, 3 Jun 2026 20:15:22 +0800
Subject: [PATCH 37/55] fix

---
 protoforge/core/fault.py   | 23 +++++++++++++++--------
 protoforge/models/fault.py |  2 ++
 2 files changed, 17 insertions(+), 8 deletions(-)

diff --git a/protoforge/core/fault.py b/protoforge/core/fault.py
index 857bf90..a8c3129 100644
--- a/protoforge/core/fault.py
+++ b/protoforge/core/fault.py
@@ -126,7 +126,8 @@
             PointFaultConfig(point="spindle_current", mode=FaultMode.GRADUAL,
                              target_min=30.0, target_max=38.0, noise_scale=1.5),
             PointFaultConfig(point="spindle_speed", mode=FaultMode.GRADUAL,
-                             target_min=1200, target_max=1600, noise_scale=40.0),
+                             target_min=1200, target_max=1600, noise_scale=40.0,
+                             nominal_baseline=2000.0),
         ],
     ),
 
@@ -148,7 +149,8 @@
             PointFaultConfig(point="spindle_current", mode=FaultMode.GRADUAL,
                              target_min=21.0, target_max=27.0, noise_scale=1.2),
             PointFaultConfig(point="spindle_speed", mode=FaultMode.GRADUAL,
-                             target_min=2400, target_max=2900, noise_scale=50.0),
+                             target_min=2400, target_max=2900, noise_scale=50.0,
+                             nominal_baseline=4000.0),
         ],
     ),
 
@@ -170,7 +172,8 @@
             PointFaultConfig(point="spindle_current", mode=FaultMode.GRADUAL,
                              target_min=13.0, target_max=17.0, noise_scale=0.8),
             PointFaultConfig(point="spindle_speed", mode=FaultMode.GRADUAL,
-                             target_min=3600, target_max=4200, noise_scale=60.0),
+                             target_min=3600, target_max=4200, noise_scale=60.0,
+                             nominal_baseline=6000.0),
         ],
     ),
 
@@ -515,21 +518,25 @@ def _compute_value(
         # 确定本次注入的实际目标值
         effective_target: Optional[float] = resolved_target if resolved_target is not None else pf.target_value
 
+        # 如果配置了额定基线，使用它替代注入时采样的瞬时值
+        # 避免在升/降速等非稳态阶段注入时，基线偏低导致渐进目标反而高于基线（转速"上升"bug）
+        effective_baseline = pf.nominal_baseline if pf.nominal_baseline is not None else baseline
+
         if pf.mode == FaultMode.INSTANT:
             if effective_target is not None:
                 target = effective_target
             elif pf.multiplier is not None:
-                target = baseline * (1.0 + (pf.multiplier - 1.0) * intensity)
+                target = effective_baseline * (1.0 + (pf.multiplier - 1.0) * intensity)
             else:
-                target = baseline
+                target = effective_baseline
         else:
             # 渐进模式：随 progress 线性劣化
             if effective_target is not None:
-                target = baseline + (effective_target - baseline) * progress * intensity
+                target = effective_baseline + (effective_target - effective_baseline) * progress * intensity
             elif pf.multiplier is not None:
-                target = baseline * (1.0 + (pf.multiplier - 1.0) * progress * intensity)
+                target = effective_baseline * (1.0 + (pf.multiplier - 1.0) * progress * intensity)
             else:
-                target = baseline
+                target = effective_baseline
 
         # 叠加随机噪声，模拟真实信号抖动
         if pf.noise_scale > 0:
diff --git a/protoforge/models/fault.py b/protoforge/models/fault.py
index 9928332..61eb0e4 100644
--- a/protoforge/models/fault.py
+++ b/protoforge/models/fault.py
@@ -33,6 +33,8 @@ class PointFaultConfig(BaseModel):
     # GRADUAL 模式：从当前值线性劣化到 target_value 或 multiplier 倍
     # 劣化程度 = progress(0~1) × (target - baseline)
     noise_scale: float = 0.0               # 叠加随机噪声幅度，模拟真实抖动
+    nominal_baseline: Optional[float] = None  # 稳态额定基线，设置后替代注入时采样的瞬时值
+                                              # 用于周期性信号（如主轴转速）避免在升/降速段注入时基线失真
 
 
 class FaultTypeDefinition(BaseModel):

From 1d083cb6e5dc3e0da95081a80d5ce9c13d35765f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E5=BC=A0=E5=B0=91=E6=99=BA?=
 <zhangshaozhi@bailongma-inc.com>
Date: Wed, 3 Jun 2026 20:43:56 +0800
Subject: [PATCH 38/55] fix

---
 protoforge/core/fault.py | 63 ++++++++++++++++++++++++++++++++++------
 1 file changed, 54 insertions(+), 9 deletions(-)

diff --git a/protoforge/core/fault.py b/protoforge/core/fault.py
index a8c3129..86aeb1d 100644
--- a/protoforge/core/fault.py
+++ b/protoforge/core/fault.py
@@ -178,24 +178,69 @@
     ),
 
     # ------------------------------------------------------------------
-    # 电源波动 — 供电不稳定
-    # 特征：主轴转速和进给速率出现随机波动，电流不稳定
-    # 模式：瞬间注入（持续期间持续抖动）
+    # 电源波动（粗铣）— fanuc-cnc
+    # 主轴~2000RPM，进给~800mm/min
+    # 转速噪声 ±200 RPM（±10%），进给噪声 ±80 mm/min（±10%），电流噪声 ±3A
     # ------------------------------------------------------------------
     FaultTypeDefinition(
-        id="power_fluctuation",
-        name="电源波动",
-        description="供电电压不稳定，主轴转速和进给速率出现随机波动",
+        id="power_fluctuation_rough",
+        name="电源波动（粗铣）",
+        description="粗铣工位供电电压不稳定，主轴转速出现随机波动（±200RPM），进给速率抖动（±80mm/min），电流不稳定",
         category="electrical",
         default_duration=90.0,
-        tags=["电源", "波动", "突发"],
+        tags=["电源", "波动", "突发", "粗铣"],
+        point_faults=[
+            PointFaultConfig(point="spindle_speed", mode=FaultMode.INSTANT,
+                             multiplier=1.0, noise_scale=200.0),
+            PointFaultConfig(point="spindle_current", mode=FaultMode.INSTANT,
+                             multiplier=1.0, noise_scale=3.0),
+            PointFaultConfig(point="feed_rate", mode=FaultMode.INSTANT,
+                             multiplier=1.0, noise_scale=80.0),
+        ],
+    ),
+
+    # ------------------------------------------------------------------
+    # 电源波动（半精铣）— fanuc-cnc-semi-finish
+    # 主轴~4000RPM，进给~300mm/min
+    # 转速噪声 ±300 RPM（±7.5%），进给噪声 ±25 mm/min（±8%），电流噪声 ±2A
+    # ------------------------------------------------------------------
+    FaultTypeDefinition(
+        id="power_fluctuation_semi",
+        name="电源波动（半精铣）",
+        description="半精铣工位供电电压不稳定，主轴转速出现随机波动（±300RPM），进给速率抖动（±25mm/min），电流不稳定",
+        category="electrical",
+        default_duration=90.0,
+        tags=["电源", "波动", "突发", "半精铣"],
         point_faults=[
             PointFaultConfig(point="spindle_speed", mode=FaultMode.INSTANT,
                              multiplier=1.0, noise_scale=300.0),
             PointFaultConfig(point="spindle_current", mode=FaultMode.INSTANT,
-                             multiplier=1.0, noise_scale=5.0),
+                             multiplier=1.0, noise_scale=2.0),
+            PointFaultConfig(point="feed_rate", mode=FaultMode.INSTANT,
+                             multiplier=1.0, noise_scale=25.0),
+        ],
+    ),
+
+    # ------------------------------------------------------------------
+    # 电源波动（精铣）— fanuc-cnc-finish
+    # 主轴~6000RPM，进给~300mm/min
+    # 转速噪声 ±450 RPM（±7.5%），进给噪声 ±25 mm/min（±8%），电流噪声 ±1.2A
+    # 精铣对稳定性要求高，波动对加工质量影响更敏感
+    # ------------------------------------------------------------------
+    FaultTypeDefinition(
+        id="power_fluctuation_finish",
+        name="电源波动（精铣）",
+        description="精铣工位供电电压不稳定，主轴转速出现随机波动（±450RPM），进给速率抖动（±25mm/min），电流不稳定；精铣对稳定性要求高，波动易导致表面质量下降",
+        category="electrical",
+        default_duration=90.0,
+        tags=["电源", "波动", "突发", "精铣"],
+        point_faults=[
+            PointFaultConfig(point="spindle_speed", mode=FaultMode.INSTANT,
+                             multiplier=1.0, noise_scale=450.0),
+            PointFaultConfig(point="spindle_current", mode=FaultMode.INSTANT,
+                             multiplier=1.0, noise_scale=1.2),
             PointFaultConfig(point="feed_rate", mode=FaultMode.INSTANT,
-                             multiplier=1.0, noise_scale=150.0),
+                             multiplier=1.0, noise_scale=25.0),
         ],
     ),
 

From 30ad880be00d40453451af5fa4a622db5693d511 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E5=BC=A0=E5=B0=91=E6=99=BA?=
 <zhangshaozhi@bailongma-inc.com>
Date: Wed, 3 Jun 2026 21:10:31 +0800
Subject: [PATCH 39/55] fix

---
 protoforge/core/fault.py | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/protoforge/core/fault.py b/protoforge/core/fault.py
index 86aeb1d..cdbcfe9 100644
--- a/protoforge/core/fault.py
+++ b/protoforge/core/fault.py
@@ -191,11 +191,11 @@
         tags=["电源", "波动", "突发", "粗铣"],
         point_faults=[
             PointFaultConfig(point="spindle_speed", mode=FaultMode.INSTANT,
-                             multiplier=1.0, noise_scale=200.0),
+                             multiplier=1.0, noise_scale=200.0, nominal_baseline=2000.0),
             PointFaultConfig(point="spindle_current", mode=FaultMode.INSTANT,
                              multiplier=1.0, noise_scale=3.0),
             PointFaultConfig(point="feed_rate", mode=FaultMode.INSTANT,
-                             multiplier=1.0, noise_scale=80.0),
+                             multiplier=1.0, noise_scale=80.0, nominal_baseline=800.0),
         ],
     ),
 
@@ -213,11 +213,11 @@
         tags=["电源", "波动", "突发", "半精铣"],
         point_faults=[
             PointFaultConfig(point="spindle_speed", mode=FaultMode.INSTANT,
-                             multiplier=1.0, noise_scale=300.0),
+                             multiplier=1.0, noise_scale=300.0, nominal_baseline=4000.0),
             PointFaultConfig(point="spindle_current", mode=FaultMode.INSTANT,
                              multiplier=1.0, noise_scale=2.0),
             PointFaultConfig(point="feed_rate", mode=FaultMode.INSTANT,
-                             multiplier=1.0, noise_scale=25.0),
+                             multiplier=1.0, noise_scale=25.0, nominal_baseline=300.0),
         ],
     ),
 
@@ -236,11 +236,11 @@
         tags=["电源", "波动", "突发", "精铣"],
         point_faults=[
             PointFaultConfig(point="spindle_speed", mode=FaultMode.INSTANT,
-                             multiplier=1.0, noise_scale=450.0),
+                             multiplier=1.0, noise_scale=450.0, nominal_baseline=6000.0),
             PointFaultConfig(point="spindle_current", mode=FaultMode.INSTANT,
                              multiplier=1.0, noise_scale=1.2),
             PointFaultConfig(point="feed_rate", mode=FaultMode.INSTANT,
-                             multiplier=1.0, noise_scale=25.0),
+                             multiplier=1.0, noise_scale=25.0, nominal_baseline=300.0),
         ],
     ),
 
@@ -500,10 +500,11 @@ def apply(self, device: Any) -> None:
             if pf.point not in device._point_values:
                 continue
             baseline = fault.baseline_values.get(pf.point, 0.0)
-            if baseline == 0.0:
+            if baseline == 0.0 and pf.nominal_baseline is None:
                 # 基线为0说明注入时设备处于换刀/停机状态
                 # target_value / resolved_targets 模式可以直接执行
                 # multiplier 模式跳过，避免在零基线上产生无意义的值
+                # 例外：配置了 nominal_baseline 时使用额定值，不跳过
                 if pf.target_value is None and pf.point not in fault.resolved_targets:
                     continue
 

From 8ecfb1168aa1ff9c722550322c8e8d6c0395fa0c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E5=BC=A0=E5=B0=91=E6=99=BA?=
 <zhangshaozhi@bailongma-inc.com>
Date: Wed, 3 Jun 2026 21:50:03 +0800
Subject: [PATCH 40/55] fix

---
 protoforge/core/fault.py   | 9 ++++++---
 protoforge/models/fault.py | 4 +++-
 2 files changed, 9 insertions(+), 4 deletions(-)

diff --git a/protoforge/core/fault.py b/protoforge/core/fault.py
index cdbcfe9..5edfcfc 100644
--- a/protoforge/core/fault.py
+++ b/protoforge/core/fault.py
@@ -259,9 +259,9 @@
         tags=["刀具", "磨损", "负载", "趋势漂移"],
         point_faults=[
             PointFaultConfig(point="spindle_load", mode=FaultMode.GRADUAL,
-                             multiplier=1.8, noise_scale=3.0),
+                             multiplier=1.8, noise_ratio=0.05),
             PointFaultConfig(point="spindle_current", mode=FaultMode.GRADUAL,
-                             multiplier=1.7, noise_scale=1.5),
+                             multiplier=1.7, noise_ratio=0.05),
         ],
     ),
 
@@ -585,7 +585,10 @@ def _compute_value(
                 target = effective_baseline
 
         # 叠加随机噪声，模拟真实信号抖动
-        if pf.noise_scale > 0:
+        # noise_ratio > 0 时按 effective_baseline 比例计算噪声幅度，否则使用绝对值 noise_scale
+        if pf.noise_ratio > 0:
+            target += random.gauss(0, pf.noise_ratio * effective_baseline * intensity)
+        elif pf.noise_scale > 0:
             target += random.gauss(0, pf.noise_scale * intensity)
 
         return round(max(0.0, target), 4)
diff --git a/protoforge/models/fault.py b/protoforge/models/fault.py
index 61eb0e4..5c69ee3 100644
--- a/protoforge/models/fault.py
+++ b/protoforge/models/fault.py
@@ -32,7 +32,9 @@ class PointFaultConfig(BaseModel):
 
     # GRADUAL 模式：从当前值线性劣化到 target_value 或 multiplier 倍
     # 劣化程度 = progress(0~1) × (target - baseline)
-    noise_scale: float = 0.0               # 叠加随机噪声幅度，模拟真实抖动
+    noise_scale: float = 0.0               # 叠加随机噪声幅度（绝对值），模拟真实抖动
+    noise_ratio: float = 0.0               # 叠加随机噪声幅度（相对 effective_baseline 的比例）
+                                           # 与 noise_scale 互斥，noise_ratio > 0 时优先使用
     nominal_baseline: Optional[float] = None  # 稳态额定基线，设置后替代注入时采样的瞬时值
                                               # 用于周期性信号（如主轴转速）避免在升/降速段注入时基线失真
 

From 2d23121ae05e27429390edb583ce1e73bf21cfcd Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E5=BC=A0=E5=B0=91=E6=99=BA?=
 <zhangshaozhi@bailongma-inc.com>
Date: Thu, 4 Jun 2026 09:03:22 +0800
Subject: [PATCH 41/55] fix

---
 protoforge/core/fault.py | 57 +++++++++++++++++++++++++++++++++++-----
 1 file changed, 51 insertions(+), 6 deletions(-)

diff --git a/protoforge/core/fault.py b/protoforge/core/fault.py
index 5edfcfc..5013c4e 100644
--- a/protoforge/core/fault.py
+++ b/protoforge/core/fault.py
@@ -250,18 +250,63 @@
     # 场景：刀具从轻度磨损到需要换刀的完整过程
     # 模式：渐进式，持续时间长
     # ------------------------------------------------------------------
+    # ------------------------------------------------------------------
+    # 刀具磨损加剧（粗铣）
+    # 切削段基线：spindle_load~54%, spindle_current~20A
+    # 目标：load×1.8→97%, current×1.7→34A
+    # ------------------------------------------------------------------
+    FaultTypeDefinition(
+        id="tool_wear_progressive_rough",
+        name="刀具磨损加剧（粗铣）",
+        description="粗铣刀具磨损导致切削阻力持续增大，spindle_load渐进爬升至1.8倍（~97%），spindle_current升至1.7倍（~34A）",
+        category="tool",
+        default_duration=600.0,
+        tags=["刀具", "磨损", "负载", "趋势漂移", "粗铣"],
+        point_faults=[
+            PointFaultConfig(point="spindle_load", mode=FaultMode.GRADUAL,
+                             multiplier=1.8, noise_ratio=0.05, nominal_baseline=54.0),
+            PointFaultConfig(point="spindle_current", mode=FaultMode.GRADUAL,
+                             multiplier=1.7, noise_ratio=0.05, nominal_baseline=20.0),
+        ],
+    ),
+
+    # ------------------------------------------------------------------
+    # 刀具磨损加剧（半精铣）
+    # 切削段基线：spindle_load~33%, spindle_current~13.5A
+    # 目标：load×1.8→59%, current×1.7→23A
+    # ------------------------------------------------------------------
+    FaultTypeDefinition(
+        id="tool_wear_progressive_semi",
+        name="刀具磨损加剧（半精铣）",
+        description="半精铣刀具磨损导致切削阻力持续增大，spindle_load渐进爬升至1.8倍（~59%），spindle_current升至1.7倍（~23A）",
+        category="tool",
+        default_duration=600.0,
+        tags=["刀具", "磨损", "负载", "趋势漂移", "半精铣"],
+        point_faults=[
+            PointFaultConfig(point="spindle_load", mode=FaultMode.GRADUAL,
+                             multiplier=1.8, noise_ratio=0.05, nominal_baseline=33.0),
+            PointFaultConfig(point="spindle_current", mode=FaultMode.GRADUAL,
+                             multiplier=1.7, noise_ratio=0.05, nominal_baseline=13.5),
+        ],
+    ),
+
+    # ------------------------------------------------------------------
+    # 刀具磨损加剧（精铣）
+    # 切削段基线：spindle_load~22%, spindle_current~8.8A
+    # 目标：load×1.8→40%, current×1.7→15A
+    # ------------------------------------------------------------------
     FaultTypeDefinition(
-        id="tool_wear_progressive",
-        name="刀具磨损加剧",
-        description="刀具磨损导致切削阻力持续增大，spindle_load基线缓慢爬升至1.8倍，spindle_current同步升高；进给速度由G代码控制不受影响",
+        id="tool_wear_progressive_finish",
+        name="刀具磨损加剧（精铣）",
+        description="精铣刀具磨损导致切削阻力持续增大，spindle_load渐进爬升至1.8倍（~40%），spindle_current升至1.7倍（~15A）；精铣对负载变化敏感，易影响表面质量",
         category="tool",
         default_duration=600.0,
-        tags=["刀具", "磨损", "负载", "趋势漂移"],
+        tags=["刀具", "磨损", "负载", "趋势漂移", "精铣"],
         point_faults=[
             PointFaultConfig(point="spindle_load", mode=FaultMode.GRADUAL,
-                             multiplier=1.8, noise_ratio=0.05),
+                             multiplier=1.8, noise_ratio=0.05, nominal_baseline=22.0),
             PointFaultConfig(point="spindle_current", mode=FaultMode.GRADUAL,
-                             multiplier=1.7, noise_ratio=0.05),
+                             multiplier=1.7, noise_ratio=0.05, nominal_baseline=8.8),
         ],
     ),
 

From 47dca19a2895a985ad53d73eb26a9bcdd2e7b2ea Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E5=BC=A0=E5=B0=91=E6=99=BA?=
 <zhangshaozhi@bailongma-inc.com>
Date: Thu, 4 Jun 2026 09:23:11 +0800
Subject: [PATCH 42/55] fix

---
 protoforge/core/fault.py | 89 +++++++++++++++++++++++++++++++++++-----
 1 file changed, 79 insertions(+), 10 deletions(-)

diff --git a/protoforge/core/fault.py b/protoforge/core/fault.py
index 5013c4e..7febb4b 100644
--- a/protoforge/core/fault.py
+++ b/protoforge/core/fault.py
@@ -311,23 +311,77 @@
     ),
 
     # ------------------------------------------------------------------
-    # 刀具崩刃 — 主轴负载突发脉冲
-    # 特征：spindle_load 瞬间冲高（可超120%，FANUC最大输出200%），进给停止，CNC停主轴
-    # 场景：刀具突发性失效，机床触发过载报警并停机
-    # 模式：瞬间注入，持续时间极短
+    # 刀具崩刃（粗铣）— fanuc-cnc
+    # 正常切削基线：spindle_load~56%, spindle_current~21A
+    # 崩刃特征：load 瞬间冲高至 160~185%（FANUC 最大输出200%），
+    #            current 冲至 75~90A，转速/进给归零，触发过载报警
+    # 使用绝对目标值（target_min/max），避免注入时恰好处于低电流阶段
+    # 导致 multiplier × 低基线 < 正常切削峰值的问题
     # ------------------------------------------------------------------
     FaultTypeDefinition(
-        id="tool_breakage_sudden",
-        name="刀具崩刃",
-        description="刀具突发性崩刃，spindle_load瞬间冲高至正常值3.2倍（可超120%，FANUC最大输出200%），进给停止，CNC触发过载报警并停主轴",
+        id="tool_breakage_rough",
+        name="刀具崩刃（粗铣）",
+        description="粗铣刀具突发性崩刃，spindle_load瞬间冲高至160~185%，spindle_current冲至75~90A，进给停止，CNC触发过载报警并停主轴",
         category="tool",
         default_duration=10.0,
-        tags=["刀具", "崩刃", "突发", "过载"],
+        tags=["刀具", "崩刃", "突发", "过载", "粗铣"],
         point_faults=[
             PointFaultConfig(point="spindle_load", mode=FaultMode.INSTANT,
-                             multiplier=3.2, noise_scale=8.0),
+                             target_min=160.0, target_max=185.0, noise_scale=8.0),
             PointFaultConfig(point="spindle_current", mode=FaultMode.INSTANT,
-                             multiplier=4.0, noise_scale=3.0),
+                             target_min=75.0, target_max=90.0, noise_scale=3.0),
+            PointFaultConfig(point="spindle_speed", mode=FaultMode.INSTANT,
+                             target_value=0.0, noise_scale=0.0),
+            PointFaultConfig(point="feed_rate", mode=FaultMode.INSTANT,
+                             target_value=0.0, noise_scale=0.0),
+            PointFaultConfig(point="alarm_status", mode=FaultMode.INSTANT,
+                             target_value=1.0, noise_scale=0.0),
+        ],
+    ),
+
+    # ------------------------------------------------------------------
+    # 刀具崩刃（半精铣）— fanuc-cnc-semi-finish
+    # 正常切削基线：spindle_load~38%, spindle_current~14.5A
+    # 崩刃特征：load 瞬间冲高至 120~145%，current 冲至 52~64A
+    # ------------------------------------------------------------------
+    FaultTypeDefinition(
+        id="tool_breakage_semi",
+        name="刀具崩刃（半精铣）",
+        description="半精铣刀具突发性崩刃，spindle_load瞬间冲高至120~145%，spindle_current冲至52~64A，进给停止，CNC触发过载报警并停主轴",
+        category="tool",
+        default_duration=10.0,
+        tags=["刀具", "崩刃", "突发", "过载", "半精铣"],
+        point_faults=[
+            PointFaultConfig(point="spindle_load", mode=FaultMode.INSTANT,
+                             target_min=120.0, target_max=145.0, noise_scale=6.0),
+            PointFaultConfig(point="spindle_current", mode=FaultMode.INSTANT,
+                             target_min=52.0, target_max=64.0, noise_scale=2.5),
+            PointFaultConfig(point="spindle_speed", mode=FaultMode.INSTANT,
+                             target_value=0.0, noise_scale=0.0),
+            PointFaultConfig(point="feed_rate", mode=FaultMode.INSTANT,
+                             target_value=0.0, noise_scale=0.0),
+            PointFaultConfig(point="alarm_status", mode=FaultMode.INSTANT,
+                             target_value=1.0, noise_scale=0.0),
+        ],
+    ),
+
+    # ------------------------------------------------------------------
+    # 刀具崩刃（精铣）— fanuc-cnc-finish
+    # 正常切削基线：spindle_load~22%, spindle_current~8.5A
+    # 崩刃特征：load 瞬间冲高至 70~90%，current 冲至 30~40A
+    # ------------------------------------------------------------------
+    FaultTypeDefinition(
+        id="tool_breakage_finish",
+        name="刀具崩刃（精铣）",
+        description="精铣刀具突发性崩刃，spindle_load瞬间冲高至70~90%，spindle_current冲至30~40A，进给停止，CNC触发过载报警并停主轴",
+        category="tool",
+        default_duration=10.0,
+        tags=["刀具", "崩刃", "突发", "过载", "精铣"],
+        point_faults=[
+            PointFaultConfig(point="spindle_load", mode=FaultMode.INSTANT,
+                             target_min=70.0, target_max=90.0, noise_scale=4.0),
+            PointFaultConfig(point="spindle_current", mode=FaultMode.INSTANT,
+                             target_min=30.0, target_max=40.0, noise_scale=1.5),
             PointFaultConfig(point="spindle_speed", mode=FaultMode.INSTANT,
                              target_value=0.0, noise_scale=0.0),
             PointFaultConfig(point="feed_rate", mode=FaultMode.INSTANT,
@@ -544,6 +598,21 @@ def apply(self, device: Any) -> None:
         for pf in fault_type.point_faults:
             if pf.point not in device._point_values:
                 continue
+            # INSTANT + multiplier 模式：每 tick 取设备当前值作为动态基线。
+            # 这样程序运行中电流/负载自然变化时，故障倍数始终基于实时水位，
+            # 避免注入时恰好在低峰导致 multiplier × 旧低基线 < 正常高峰的问题。
+            # resolved_targets（绝对值）和 target_value 模式不受影响，保持原逻辑。
+            if (pf.mode == FaultMode.INSTANT
+                    and pf.multiplier is not None
+                    and pf.target_value is None
+                    and pf.point not in fault.resolved_targets
+                    and pf.nominal_baseline is None):
+                live_val = device._point_values.get(pf.point)
+                if live_val is not None:
+                    try:
+                        fault.baseline_values[pf.point] = float(live_val)
+                    except (TypeError, ValueError):
+                        pass
             baseline = fault.baseline_values.get(pf.point, 0.0)
             if baseline == 0.0 and pf.nominal_baseline is None:
                 # 基线为0说明注入时设备处于换刀/停机状态

From 45ba03319cdb4a465e522ed5fe0487e2b39e2f9a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E5=BC=A0=E5=B0=91=E6=99=BA?=
 <zhangshaozhi@bailongma-inc.com>
Date: Tue, 9 Jun 2026 08:54:56 +0800
Subject: [PATCH 43/55] fix

---
 ai/ai.md                                      |  680 +++++++
 ai/predict.py                                 |   97 -
 ai/predict_v2.py                              |  571 ------
 ai/predict_v3_single_scene.py                 | 1487 ---------------
 ai/pridict_v4.py                              | 1604 -----------------
 ai/pridict_v5.py                              |  148 +-
 protoforge/core/cnc_metric_generator.py       |  661 +++++++
 protoforge/core/engine.py                     |    5 +
 protoforge/core/generator.py                  |   15 +
 protoforge/core/metrics.py                    |    8 +-
 protoforge/core/simulators.py                 |   33 +
 protoforge/models/device.py                   |    1 +
 .../protocols/mtconnect/lathe_simulator.py    |  421 +++++
 .../templates/mtconnect/lathe_machine.json    |  245 ++-
 tests/test_cnc_metric_generator.py            |  372 ++++
 15 files changed, 2576 insertions(+), 3772 deletions(-)
 create mode 100644 ai/ai.md
 delete mode 100755 ai/predict.py
 delete mode 100755 ai/predict_v2.py
 delete mode 100755 ai/predict_v3_single_scene.py
 delete mode 100644 ai/pridict_v4.py
 create mode 100644 protoforge/core/cnc_metric_generator.py
 create mode 100644 protoforge/core/simulators.py
 create mode 100644 protoforge/protocols/mtconnect/lathe_simulator.py
 create mode 100644 tests/test_cnc_metric_generator.py

diff --git a/ai/ai.md b/ai/ai.md
new file mode 100644
index 0000000..d1a1e16
--- /dev/null
+++ b/ai/ai.md
@@ -0,0 +1,680 @@
+# ProtoForge 预测算法优化方案
+
+## 1. 背景与现状
+
+### 1.1 当前算法（pridict_v5.py）核心能力
+
+- 支持三个 CNC 工位：粗铣（fanuc-cnc）、半精铣（fanuc-cnc-semi-finish）、精铣（fanuc-cnc-finish）
+- 监控指标：feed_rate / spindle_speed / spindle_current / spindle_load / vibration_x/y/z
+- 预测策略：phase_point（稳定信号点预测）、phase_band（波动信号带预测）
+- phase-lock 机制：对齐模板与实时信号的相位偏移
+- 模板学习：健康状态下 EMA 渐进更新，异常状态冻结模板
+- 异常检测：基于超出预测带的比例、连续超出秒数、最大超出倍率三指标
+
+### 1.2 仿真模拟器的变化
+
+仿真器故障类型从早期通用故障（tool_wear、tool_breakage、spindle_overheat）
+**升级为按工位细分的 15+ 种故障类型**，主要变化如下：
+
+| 类型 | 旧故障 | 新故障（现在） |
+|------|--------|--------------|
+| 刀具磨损 | tool_wear（通用） | tool_wear_progressive_rough/semi/finish |
+| 刀具崩刃 | tool_breakage（通用） | tool_breakage_rough/semi/finish |
+| 主轴过热 | spindle_overheat（通用） | spindle_overheat_rough/semi/finish |
+| 进给堵转 | 无 | feed_stall_rough/semi/finish |
+| 电源波动 | 无 | power_fluctuation_rough/semi/finish |
+| 其他 | 无 | tool_overload_protection、air_cutting、built_up_edge、coating_spalling、tool_offset_error |
+
+**故障参数已与工位量程精确对齐**（如粗铣 spindle_load 崩刃冲至 160~185%，精铣仅 70~90%），
+旧算法的硬编码覆盖参数（_SIMULATION_STRATEGY_OVERRIDES）仍可用，但需要与新故障类型匹配更新。
+
+### 1.3 需要解决的核心问题
+
+1. **新故障场景覆盖不足**：积屑瘤（周期性突刺）、空切（负载跳低）、涂层剥落（阶跃跳变）、换刀装夹偏移（均值永久偏移）等新故障模式，当前算法缺乏针对性检测策略。
+2. **真实场景适配欠缺**：渐进式趋势漂移（磨损爬升）用 phase_band 检测不够灵敏；阶跃型故障（崩刃、堵转）用模板对比延迟较高。
+3. **换刀事件无感知**：换刀后正常值域发生整体变化，旧算法会错误告警，需要识别换刀切换并重置模板。
+4. **多指标联动未利用**：各指标独立预测，未利用 load↑+current↑+feed_rate↓ 等联动关系强化检测精度。
+
+---
+
+## 2. 优化目标
+
+1. 覆盖仿真器现有全部 15+ 种故障类型，每种故障至少触发一项指标的异常告警
+2. 区分五类故障模式：**突发冲高**、**渐进漂移**、**周期突刺**、**阶跃跳变**、**均值偏移**
+3. 支持真实场景下的**换刀感知**，自动重置受影响指标的健康模板
+4. 引入**多指标联动置信度**，降低单指标误报率
+5. 保持对仿真环境和真实生产环境的双重适用性
+
+---
+
+## 3. 优化方案
+
+### 3.1 检测层扩展：五种故障模式检测器
+
+在当前基于 phase_band 的超带检测基础上，增加以下检测维度：
+
+#### 3.1.1 趋势漂移检测（针对 tool_wear_progressive_* ）
+- 在长时间窗口（5~10 分钟）上对 spindle_load、spindle_current 拟合线性趋势
+- 斜率超过阈值（如 load > 0.05%/s，current > 0.02A/s）持续 60s 以上，触发告警
+- 与 phase_band 超带并列运行，两者任一触发即告警
+- **参数**：`trend_window_seconds`（默认 300s）、`trend_slope_threshold`（按工位设定）
+
+#### 3.1.2 周期性突刺检测（针对 built_up_edge）
+- 在短时间窗口（30s）内检测超出 phase_band 上边界的瞬时尖峰数量
+- 尖峰定义：持续不超过 5s、幅度超出上界 20% 以上的脉冲
+- 单窗口内尖峰次数 ≥ 3 次触发告警（区别于持续超带的磨损/堵转）
+- **参数**：`spike_window_seconds`、`spike_amp_ratio`、`spike_min_count`
+
+#### 3.1.3 阶跃检测（针对 coating_spalling、tool_breakage_*）
+- 计算相邻 10s 均值之差，超过 IQR 的 1.5 倍视为阶跃
+- 崩刃：阶跃幅度极大（超出正常值 2 倍以上）且随后归零 → 触发 breakage 告警
+- 涂层剥落：阶跃后在新均值稳定 30s 以上 → 触发 coating_spalling 告警
+- **参数**：`step_iqr_multiplier`、`step_stable_seconds`
+
+#### 3.1.4 均值偏移检测（针对 air_cutting、tool_offset_error）
+- 检测最近 60s 的均值与健康模板均值的偏差
+- 空切：load/current 均值跌至模板均值的 30% 以下，且 feed_rate 保持正常 → 空切告警
+- 装夹偏移：load/current 均值持续偏高（1.3~1.6 倍）但不随时间爬升 → 偏移告警
+- **参数**：`air_cut_load_ratio`（默认 0.3）、`offset_load_min_ratio`、`offset_stable_seconds`
+
+#### 3.1.5 快速冲高检测（针对 feed_stall_*、tool_breakage_*）
+- 在 DETECT_WINDOW_SECONDS（30s）内，若超带比例 > 0.8 且值域中位数超出上界，立即触发
+- 相比原有 outside_ratio（0.6）更激进，专用于突发故障的快速响应
+- 与原有检测并行，使用 `fast_alert` 标志与标准告警区分
+
+### 3.2 纯监控数据换刀感知方案
+
+#### 3.2.1 方案背景与可行性评估
+
+在没有 CNC 直接信号（T code / M06）接入的情况下，通过监控数据推断换刀事件是一种工程上可行的兜底方案，但有明确的局限性，需要与使用方对齐预期。
+
+**可行性结论：中等可行，在仿真环境中高度可行，在真实环境中需要配合约束条件。**
+
+| 维度 | 评估 | 说明 |
+|------|------|------|
+| 仿真环境准确率 | 高（>90%） | 仿真器信号无噪声干扰，停机特征清晰 |
+| 真实环境准确率 | 中（70~85%） | 受节拍变化、程序暂停、对刀等操作干扰 |
+| 漏检率 | 低（~5%） | 换刀必然经过停机，主特征不易丢失 |
+| 误报率 | 中（10~20%） | 程序暂停、换工件等也有类似信号 |
+| 响应延迟 | 20~60s | 需要等待换刀后稳定切削才能确认 |
+
+**核心挑战**：换刀停机与以下事件的监控信号高度相似，是误报的主要来源：
+- 程序暂停（M00/M01）
+- 加工完成等待下料
+- 急停或报警停机
+- 主轴定向（换工件时的主轴控制）
+
+#### 3.2.2 换刀事件的监控信号特征
+
+换刀过程在时序上分为四个阶段，每个阶段在监控指标上有不同特征：
+
+```
+阶段一：切削结束        阶段二：停机换刀        阶段三：重启暖机        阶段四：新刀切削
+─────────────────  →  ─────────────────  →  ─────────────────  →  ─────────────────
+feed_rate: 正常         feed_rate ≈ 0          feed_rate = 0          feed_rate: 恢复
+spindle_speed: 正常     spindle_speed ↓→0      spindle_speed: 缓升     spindle_speed: 目标值
+spindle_load: 正常结束  spindle_load ≈ 0       spindle_load: 极低      spindle_load: 新基线
+spindle_current: 正常   spindle_current: 极低  spindle_current: 低     spindle_current: 新基线
+alarm_status: 0        alarm_status: 0        alarm_status: 0        alarm_status: 0
+```
+
+**与相似事件的关键区分特征**：
+
+| 事件 | 停机前 load | 停机持续时长 | alarm_status | 重启后 load |
+|------|------------|-------------|-------------|------------|
+| **换刀** | 正常结束值（无冲高） | 15~60s | 0 | 可能偏离旧基线 ±15%+ |
+| 程序暂停 | 正常结束值 | 不确定（秒级~分钟级） | 0 | 与旧基线一致 |
+| 崩刃停机 | 瞬间冲高（>2倍）后归零 | 短（<15s，触发报警） | **1** | 0（主轴被迫停止） |
+| 加工完成等待 | 正常结束值 | 较长（>60s） | 0 | 与旧基线一致 |
+| 急停/报警 | 不确定 | 不确定 | **1** | 需人工处理 |
+
+#### 3.2.3 换刀检测算法：三阶段判决
+
+换刀检测采用三阶段状态机，避免单一条件误判：
+
+**阶段一：停机候选检测（Idle Candidate）**
+
+触发条件（同时满足）：
+```
+spindle_speed < spindle_speed_idle_threshold（各工位空载转速阈值的 10%）
+  粗铣: < 200 RPM, 半精铣: < 400 RPM, 精铣: < 600 RPM
+AND feed_rate < 10 mm/min（接近零）
+AND alarm_status == 0（无报警，排除崩刃/急停）
+AND 停机前 spindle_load 无冲高（最近 10s 内 load 峰值 < 模板上界 × 1.3）
+AND 持续时长 ≥ idle_min_seconds（默认 8s，避免切削间隙误触发）
+```
+
+进入 `IDLE_CANDIDATE` 状态，记录停机开始时间戳。
+
+**阶段二：停机类型分类（Idle Classification）**
+
+在 `IDLE_CANDIDATE` 状态下，持续观察停机时长：
+
+```
+停机时长 < idle_min_seconds（8s）       → 切削间隙，忽略，返回正常
+停机时长 8s ~ tool_change_max_seconds  → 进入 POSSIBLE_TOOL_CHANGE 候选
+停机时长 > tool_change_max_seconds     → 长时待机（下料/换工件），记录待机开始，
+                                          待机结束重启后重走阶段一
+```
+
+`tool_change_max_seconds` 建议值：粗铣 90s，半精铣 60s，精铣 60s。
+（换刀物理动作：ATC 换刀 10~30s，手动换刀 30~60s，超出则大概率是其他等待）
+
+**阶段三：换刀确认（Tool Change Confirmation）**
+
+主轴重启后，等待 1~2 个完整切削周期，进行基线偏移确认：
+
+```
+新切削均值 vs 旧模板均值:
+  |新均值 - 旧均值| / 旧均值 > baseline_shift_threshold（默认 0.12，即 12%）
+  → 确认换刀，触发模板重置流程
+
+  |新均值 - 旧均值| / 旧均值 ≤ 0.12
+  → 同型号刀具更换或程序恢复，不重置模板，返回 healthy 状态
+```
+
+这一步是关键判决：**程序暂停恢复后新旧基线一致，换刀后新旧基线大概率偏移**。
+同型号换刀（如定期刀具寿命更换）偏移较小（旧刀磨损→新刀约下降 5~15%），依然可以触发。
+
+#### 3.2.4 检测到换刀后的模板处理策略
+
+不立即清空旧模板，采用"快速替换"策略：
+
+**步骤 1：进入 `TOOL_CHANGE` 状态**
+- 暂停所有指标的异常检测输出（避免换刀后初始切削产生大量误报）
+- 保留旧模板，不删除
+
+**步骤 2：新模板预热采集**
+- 等待 `tool_change_stable_cycles`（默认 2 个完整切削周期）的数据
+- 这段时间内用旧模板做参考，不告警，但持续采集数据
+
+**步骤 3：模板切换**
+- 用预热期数据直接重建新模板（build_current_baseline）
+- 新模板建好后，切换回 `healthy` 状态
+- EMA alpha 在换刀后前 5 个周期内升高至 0.3（加速收敛），之后恢复正常 0.1
+
+**步骤 4：换刀事件上报**
+- 向 Webhook 推送换刀事件（时间戳、工位、旧基线值、新基线值、确认置信度）
+- 供 MES/SCADA 层决定是否重置刀具寿命计数器
+
+```
+换刀事件 payload:
+{
+  "event": "tool_change_detected",
+  "device_id": "fanuc-cnc",
+  "timestamp": 1717600000,
+  "idle_duration_seconds": 32,
+  "old_baseline": {"spindle_load": 54.2, "spindle_current": 20.1},
+  "new_baseline": {"spindle_load": 47.8, "spindle_current": 18.5},
+  "baseline_shift_ratio": 0.118,
+  "confidence": 0.82
+}
+```
+
+#### 3.2.5 置信度评分
+
+每次换刀检测附带置信度（0~1），综合以下因素：
+
+| 因素 | 加分条件 | 加分 |
+|------|---------|------|
+| 停机时长合理 | 15~45s（ATC 换刀典型范围） | +0.3 |
+| 停机前无报警 | alarm_status 全程为 0 | +0.2 |
+| 基线偏移显著 | shift_ratio > 0.15 | +0.2 |
+| 基线偏移方向合理 | 新刀 load 低于旧刀（换新刀减摩擦） | +0.1 |
+| 停机前无冲高 | 排除崩刃后停机 | +0.2 |
+
+置信度 < 0.5：记录日志，不触发模板重置，仅标记为"疑似换刀"
+置信度 0.5~0.7：触发模板重置，Webhook 标注 low confidence
+置信度 > 0.7：正常换刀流程
+
+#### 3.2.6 局限性与边界条件
+
+**已知不可处理的情况**：
+1. **同型号换刀 + 新刀基线与旧磨损刀差异 < 12%**：无法触发基线偏移确认，换刀被当程序恢复处理。实际影响：不重置模板，但旧磨损刀的高基线 EMA 会在后续正常切削中自然收敛回新刀水平（约 10~15 个周期）。
+2. **手动操作导致停机时长超过 90s**：被分类为长时待机，换刀重启后会正常走基线偏移检测，只是延迟了确认。
+3. **连续多次换刀（<5 分钟内）**：第二次换刀在第一次模板预热期内发生，需要重置预热计时器并重新采集。设计上支持，但需要测试。
+4. **报警后换刀**：alarm_status=1 期间的停机被过滤掉不识别为换刀，需要 alarm 清除后才重新进入检测流程。
+
+**仿真环境的特殊说明**：
+仿真器当前无 alarm_status 的主动置位逻辑（崩刃故障有 alarm_status=1），换刀模拟需要通过手动停止/启动设备实现，信号特征与真实换刀一致，算法可直接适用。
+
+### 3.3 多指标联动置信度
+
+不改变各指标的独立预测逻辑，在输出层增加联动评分：
+
+**联动规则（基于仿真故障模式）**：
+
+| 联动组合 | 判断 | 置信度加成 |
+|---------|------|----------|
+| load 异常 + current 异常 | 真实负载问题（非噪声） | +0.3 |
+| load 上升 + feed_rate 下降 | 过载保护或磨损 | +0.2 |
+| feed_rate=0 + load 上升 | 进给堵转 | +0.4 |
+| 所有指标归零 | 崩刃/停机 | +0.4 |
+| load 下降 + current 下降 + feed_rate 正常 | 空切 | +0.3 |
+
+**输出**：新增 `composite_anomaly_confidence` 指标（0~1），供告警聚合系统使用。
+
+### 3.4 仿真策略覆盖表更新
+
+当前 `_SIMULATION_STRATEGY_OVERRIDES` 仅覆盖工位级别的策略选择，
+需要针对新故障类型补充以下调整：
+
+| 工位/指标 | 新增覆盖原因 |
+|----------|------------|
+| 粗铣 spindle_load | 崩刃冲高至 160~185%，现有 band_pad_abs=6.0 偏小，建议检测层增加快速冲高检测 |
+| 三工位 spindle_load | tool_wear_progressive 斜率慢，需要补充趋势检测 |
+| 三工位 spindle_load | air_cutting 跌至 4~12%，需要均值偏移检测 |
+| 三工位 all | tool_offset_error 换刀后整体偏移，需要换刀感知 |
+
+### 3.5 真实场景扩展
+
+除仿真环境外，真实 CNC 场景还需考虑：
+
+1. **数据质量**：真实采集存在缺包、抖动、时间戳乱序
+   - 方案：`normalize_history` 已做插值，追加 `outlier_clip`（3σ 剔除离群点）
+
+2. **工况段切割**：真实机床有待机段（load≈0）和切削段，需要识别并只对切削段建模
+   - 方案：在 `infer_metric_profile` 中增加工况分段，仅用活跃段数据建模（已有 p10 过滤基础，强化）
+
+3. **周期变化**：真实刀路程序可能含多段不同周期，FFT 只取主周期，辅周期被忽略
+   - 方案：对检测到多峰 FFT 的场景，构建 multi-period 模板（可选增强）
+
+4. **采集频率差异**：仿真器 1s/点，真实设备可能 100ms~10s 不等
+   - 方案：`QUERY_STEP` 参数化，自动适配采集频率，确保插值后密度一致
+
+---
+
+## 4. 实现计划
+
+按优先级排序：
+
+### P0（核心，与新故障直接相关）
+1. **趋势漂移检测器**：补充 `detect_trend` 函数，在 `detect_anomaly` 中并行运行
+2. **阶跃检测器**：补充 `detect_step_change`，覆盖崩刃和涂层剥落
+3. **快速冲高检测**：降低堵转/崩刃的响应延迟
+4. **换刀感知状态机**：三阶段判决（停机候选→类型分类→基线偏移确认），新增 `IDLE_CANDIDATE`/`POSSIBLE_TOOL_CHANGE`/`TOOL_CHANGE` 状态
+
+### P1（提升精度）
+5. **均值偏移检测**：覆盖空切和装夹偏移
+6. **周期突刺检测**：覆盖积屑瘤
+7. **仿真策略覆盖表更新**：与新故障类型对齐
+
+### P2（真实场景适配）
+8. **多指标联动置信度**：聚合告警输出
+9. **离群点剔除**：提升真实采集数据鲁棒性
+10. **工况分段强化**：精确识别切削段 vs 待机段
+
+---
+
+## 5. 文件结构规划
+
+当前预测代码已重构为模块化目录 `ai/predictor/`，建议在此基础上增加：
+
+```
+ai/predictor/
+├── anomaly.py          # 现有：超带异常检测（phase_band/phase_point）
+├── trend.py            # 新增：趋势漂移检测
+├── step.py             # 新增：阶跃/均值偏移检测
+├── spike.py            # 新增：周期突刺检测
+├── composite.py        # 新增：多指标联动置信度
+├── tool_change.py      # 新增：换刀感知
+├── phase_lock.py       # 现有：相位锁定
+├── template.py         # 现有：模板构建与预测
+├── signal.py           # 现有：信号预处理
+├── profiling.py        # 现有：指标特征推断
+├── discovery.py        # 现有：设备/指标发现
+├── state.py            # 现有：状态机（需扩展 tool_change 状态）
+├── storage.py          # 现有：状态持久化
+├── models.py           # 现有：数据模型
+├── config.py           # 现有：配置参数
+└── service.py          # 现有：主循环
+```
+
+---
+
+## 6. 关键参数（初始建议值，待验证）
+
+| 参数 | 默认值 | 说明 |
+|------|--------|------|
+| `trend_window_seconds` | 300 | 趋势检测时间窗口 |
+| `trend_slope_threshold_load` | 0.04 %/s | load 趋势斜率告警阈值 |
+| `trend_slope_threshold_current` | 0.015 A/s | current 趋势斜率告警阈值 |
+| `step_iqr_multiplier` | 1.5 | 阶跃检测 IQR 倍数 |
+| `step_stable_seconds` | 30 | 涂层剥落：阶跃后稳定确认时间 |
+| `spike_window_seconds` | 30 | 突刺检测窗口 |
+| `spike_amp_ratio` | 0.2 | 突刺幅度（相对于上界超出比例） |
+| `spike_min_count` | 3 | 窗口内最小突刺次数 |
+| `air_cut_load_ratio` | 0.3 | 空切：load 相对于模板均值的比例下限 |
+| `tool_change_idle_min_seconds` | 8 | 停机候选最小持续时长（排除切削间隙） |
+| `tool_change_max_seconds_rough` | 90 | 粗铣换刀最大停机时长（超出视为长时待机） |
+| `tool_change_max_seconds_semi` | 60 | 半精铣换刀最大停机时长 |
+| `tool_change_max_seconds_finish` | 60 | 精铣换刀最大停机时长 |
+| `tool_change_baseline_shift_threshold` | 0.12 | 基线偏移确认阈值（12%） |
+| `tool_change_stable_cycles` | 2 | 换刀后预热采集周期数 |
+| `tool_change_fast_ema_alpha` | 0.3 | 换刀后前 5 周期的快速 EMA alpha |
+| `tool_change_confidence_threshold` | 0.5 | 换刀确认最低置信度 |
+
+---
+
+## 7. 风险与约束
+
+1. **趋势检测误报**：正常切削段 load 也有周期性起伏，需确保趋势窗口足够长（> 3 个完整周期）
+2. **换刀感知误识别**：程序暂停（M00/M01）与换刀停机信号高度相似，三阶段判决通过"停机时长 + 重启后基线偏移"联合判断降低误报；置信度 < 0.5 时不触发模板重置
+3. **多指标联动延迟**：需要所有指标数据对齐同一时刻，异步拉取可能引入 1~2s 偏差
+4. **模块化重构兼容性**：pridict_v5.py 的逻辑正在迁移至 `ai/predictor/` 目录，方案实现应基于新目录结构
+
+---
+
+## 8. 真实场景可行性分析
+
+### 8.1 方案隐含假设与真实环境的差距
+
+当前方案建立在四个隐含假设上，在真实场景中这些假设并不总是成立，是可行性折扣的主要来源：
+
+**假设一：信号是干净的周期性信号**
+
+仿真器产生的是理想波形。真实 CNC 的 spindle_load 受多种非故障因素叠加干扰：
+
+- 切削材料硬度的批次差异（同一程序每次 load 都略有不同，波动 ±5~10%）
+- 刀路中的局部变速段（倒角、孔位、转角减速导致 load 瞬时跳变）
+- 主轴电机温度漂移（长班次后基线自然上浮 3~8%，会被趋势检测误报为磨损）
+- 冷却液状态切换（开/关冷却液时 spindle_current 有 0.5~1.5A 跳变）
+- 振动信号在真实环境受机床安装基础、夹具刚性、相邻机床干扰
+
+**影响**：phase_band 的 band_pad_abs 在真实场景需要更宽（约 1.5~2x 仿真值），灵敏度相应下降。趋势检测的斜率阈值需要上调以抵抗温度漂移。
+
+**假设二：换刀停机时长在 8~90s 之间**
+
+真实工厂差异极大，当前参数覆盖不全：
+
+| 换刀方式 | 典型时长 | 与当前参数的关系 |
+|---------|---------|---------------|
+| 全自动 ATC 刀库 | 5~15s | 可能触发不到 idle_min_seconds=8s 下限，**漏检** |
+| 半自动（刀臂+人工紧固） | 20~45s | 参数范围内，可识别 |
+| 手动换刀（小型车间） | 45~120s | 超过 tool_change_max_seconds=90s，分类为长时待机 |
+| 换刀+对刀仪测量 | 60~180s | 同上，停机过长被误分类 |
+
+**处置建议**：idle_min_seconds 应可配置（建议范围 3~8s），并提供工厂现场标定工具。
+
+**假设三：换刀后基线偏移 ≥ 12%**
+
+以下场景基线偏移不足，导致换刀漏确认：
+
+- 同规格刀片批量更换（新刀 vs 轻度磨损旧刀）：偏移仅 3~8%，低于 12% 阈值
+- 加工中心多工序换刀中，每段切削工况不同，load 基线本身波动就大于 12%，导致阈值失去意义
+
+**假设四：程序周期稳定**
+
+真实加工程序一个 NC 文件可能包含多段异构工序（外轮廓→钻孔→铰孔→精铣内腔），每段的 load 特征完全不同，FFT 周期估计会退化。这是对 phase_band 建模的根本性挑战。
+
+---
+
+### 8.2 各检测器真实可行性评分
+
+| 检测器 | 仿真可行性 | 真实可行性 | 主要障碍 | 建议处置 |
+|--------|-----------|-----------|---------|---------|
+| phase_band 超带 | 高 | 中 | 真实噪声宽，带宽需要放大，灵敏度下降 | 增加工厂标定流程，按实测数据调 band_pad_abs |
+| 趋势漂移检测 | 高 | 中高 | 电机温漂会产生真实斜率，干扰磨损检测 | 增加温漂补偿（以班次为单位做基准修正） |
+| 阶跃检测（崩刃） | 高 | 高 | 崩刃信号极强，真实中同样清晰可辨 | 可直接使用，崩刃幅度远大于噪声 |
+| 均值偏移（空切） | 高 | 高 | 空切 load 跌幅明显，误报少 | 需配合 feed_rate 保持正常来排除停机 |
+| 周期突刺（积屑瘤） | 中 | 低中 | 真实信号噪声大，突刺难与高频噪声区分 | 需要更长确认窗口，或依赖振动信号辅助 |
+| 换刀感知 | 高 | 中低 | ATC 停机可能 <8s，多种停机事件混淆 | 见 8.3 节 |
+
+**整体评估**：在真实单工序铣床上，方案核心功能（崩刃/堵转/空切检测）可行性高，趋势漂移和换刀感知在未经标定时准确率中等，需要现场调参周期（建议 2~4 周）。
+
+---
+
+## 9. 机型适用性分析
+
+### 9.1 适用性全景
+
+| 机型 | 适用性 | 核心原因 |
+|------|--------|---------|
+| 立式/卧式铣床（单工序） | **高** | 信号周期稳定，方案建模原型，仿真器对应此类 |
+| 车床（普通车削） | **中** | 周期结构相似，但转速随直径变化，换刀停机更短 |
+| 钻床/镗床 | **中** | 周期清晰，但 load 特征与铣削不同，参数需重新标定 |
+| 立式加工中心（VMC） | **低中** | 多工序换刀频繁，单一模板假设失效，需架构改造 |
+| 卧式加工中心（HMC） | **低** | 同上，且托盘交换导致更多停机事件干扰 |
+| 五轴加工中心 | **不适用** | 主轴随姿态变化，load 规律性弱，纯监控数据不足 |
+| 车削中心（Turn-Mill） | **低** | 车铣复合，工况切换频繁，模型无法统一 |
+
+---
+
+### 9.2 单工序铣床（方案最适用场景）
+
+这是当前仿真器建模的原型，也是方案设计的基准场景。
+
+**信号特征**：
+- 一个工件对应一个或少量固定刀路程序，主轴 load 周期稳定
+- 换刀频率低（按刀具寿命，数小时到数天一次）
+- 换刀后运行的程序与之前相同，新旧刀具的 load 差异来源于磨损变化
+
+**方案适配程度**：完全适用，仿真器已充分覆盖此类场景的故障模式。
+
+---
+
+### 9.3 加工中心（VMC/HMC）—— 与方案差异最大
+
+加工中心和铣床最本质的区别是**一个程序内连续自动换多把刀**：
+
+```
+装夹工件 → [刀T01钻孔] → ATC换刀(5~15s) → [刀T02铣面] → ATC换刀 → [刀T03铰孔] → ...
+             load~45%                           load~65%                  load~20%
+```
+
+这对当前方案产生以下结构性冲击：
+
+**问题一：周期结构崩坏**
+
+整个加工程序由多段异构切削组成，每段 load/current 特征不同。FFT 无法找到代表全程的稳定周期，phase_lock 机制的前提失效。
+
+实际观测到的"周期"是整个程序的节拍（从装夹到卸料），但程序内部各工序的 load 差异极大，单一模板无法描述。
+
+**问题二：换刀极频繁且停机极短**
+
+ATC 自动换刀时间通常 5~15s，远低于当前 idle_min_seconds=8s 的下限，导致大部分换刀事件被过滤掉。即使降低阈值到 3s，也无法有效区分 ATC 换刀与切削间隙（如钻孔退刀后的主轴短暂减速）。
+
+**问题三：多工序 load 跳变被误检**
+
+刀T01（钻孔，load~45%）→ ATC换刀 → 刀T02（铣面，load~65%），换刀后 load 上升约 44%，远超 baseline_shift_threshold=12%，会被正确"识别"为换刀——但同时，后续每次换刀到更高 load 工序都会触发，造成模板频繁重置，无法稳定建模。
+
+反过来，load 从铣面→铰孔 下降 70%，会被阶跃检测误报为"崩刃停机"。
+
+**加工中心需要的架构**：
+
+纯监控数据在加工中心场景无法可靠工作。正确方案是**按刀号分段建模**：
+
+```
+每个 T 号 → 独立的 phase_band 模板
+  T01 模板：钻孔段 load 均值 + 波动范围
+  T02 模板：铣面段 load 均值 + 波动范围
+  ...
+```
+
+这需要 CNC 提供 T code 信号（FOCAS/OPC-UA 均可读取）。没有 T code，纯监控数据方案在加工中心上只能做粗粒度检测（如全局崩刃、主轴过热），无法做刀具磨损级别的精细检测。
+
+---
+
+### 9.4 车床
+
+车床信号与铣床的主要差异：
+
+1. **恒线速度（CSS）控制**：加工锥面/端面时，主轴转速随工件直径实时变化（spindle_speed 是连续变化曲线，不是常数），当前 phase_band 对 spindle_speed 的建模假设失效
+2. **换刀停机更短**：刀台旋转换刀通常 2~5s，低于 idle_min_seconds=8s，漏检率高
+3. **load 波形特征**：车削 load 曲线相对平稳，周期性特征不如铣削明显，FFT 估计周期的精度下降
+
+**适配建议**：
+- spindle_speed 指标在车床上不适合用 phase_band，改为范围监控（检查是否在编程转速范围内）
+- idle_min_seconds 调低至 3s
+- load/current 检测仍可用，是车床上最有价值的监控指标
+
+---
+
+### 9.5 其他机床类型适用性分析
+
+以下九类机床在主轴特征、切削力表现形式、换刀方式上与铣床有本质差异，逐一评估。
+
+#### 9.5.1 钻床
+
+**切削特征**：轴向压入式，load 曲线呈"空载→切入尖峰→稳态钻削→退刀归零"的固定形态，周期性来自重复钻孔节拍而非刀路轨迹，形态与铣削不同。
+
+**换刀特征**：换刀频率低（一把钻头加工数十~数百孔），换刀停机通常 30~120s，参数范围内可识别。
+
+**指标适用性**：
+- spindle_load / spindle_current：适用，阶跃检测可捕捉钻头折断（折断时 load 瞬冲后归零）
+- phase_band：对"重复相同孔位"的批量加工可以建模，但每孔内部的斜坡形 load 需要适当放宽 band_pad_abs
+- 趋势检测：适用于钻头磨损（稳态钻削段 load 缓慢爬升）
+
+**综合评分**：中（需调参，核心检测功能可用）
+
+---
+
+#### 9.5.2 镗床
+
+**切削特征**：内孔旋转切削，load 曲线形态与铣削接近（旋转稳态），但切削深度小、切削力弱，spindle_load 信号幅度低（5~20%），信噪比差。
+
+**关键问题**：高精度镗削对 load 波动极敏感，正常切削条件变化（材料硬度、进给微调）都会引起 ±3~5% 的 load 抖动，band_pad_abs 过大则漏报，过小则误报。需要比铣床更精细的参数标定。
+
+**指标适用性**：
+- phase_band：适用，但 band_pad_abs 需缩窄（建议为铣床配置的 50~70%）
+- 趋势检测：适用于镗刀磨损
+- 阶跃检测：适用于刀片崩刃
+
+**综合评分**：中（参数敏感，需精细标定）
+
+---
+
+#### 9.5.3 磨床
+
+**切削特征**：这是与方案假设差异最大的机型。磨削无"刀具"概念（砂轮），主轴转速极高（1000~6000 RPM），切削深度极浅，spindle_load 长期处于低位稳态（5~15%），信号几乎无周期性波动。
+
+**核心矛盾**：phase_band 的建模前提是信号有可学习的周期形态，磨削信号接近直流，FFT 无法找到有意义的周期，模板建模机制失效。
+
+磨床的健康状态退化表现为**功率/电流的长周期缓慢上升**（砂轮磨钝→切削阻力增大），这正是趋势检测的目标场景。
+
+**指标适用性**：
+- phase_band / phase_lock：**不适用**，信号无周期性
+- 趋势检测（detect_trend）：**高度适用**，是磨床监控的核心手段
+- 阶跃检测：适用于砂轮崩碎（load 瞬间冲高后停机）
+- 换刀感知：磨床换砂轮停机时间较长（修整+安装 5~30 分钟），需要大幅调整参数
+
+**综合评分**：低中（phase_band 核心机制不适用，趋势检测单独有价值）
+
+---
+
+#### 9.5.4 齿轮加工机床（滚齿 / 插齿 / 磨齿）
+
+**切削特征**：连续展成运动，spindle_load 在物理上有极高频率的每齿切入脉冲（脉冲宽度毫秒级，频率 = 主轴转速 × 齿数，可达数十~数百 Hz）。
+
+**采样率瓶颈**：当前监控采集频率 1s/点，完全无法捕捉每齿脉冲，只能观测到时间平均 load。在 1s 采样分辨率下，齿轮加工机床的信号行为退化为"类铣床"的低频形态，失去了齿轮加工特有的诊断信息。
+
+**在 1s 采样率下的有效范围**：
+- 粗粒度故障（刀具整体磨损导致均值爬升、断刀后 load 归零）：适用
+- 精细故障（单齿崩刃、齿距误差）：**需要 100ms 以下采样率才能检测，当前方案无法覆盖**
+
+**综合评分**：低中（粗粒度检测可用，精细齿形故障超出当前采样能力范围）
+
+---
+
+#### 9.5.5 螺纹加工机床（丝锥攻丝 / 螺纹铣）
+
+**切削特征**：丝锥攻丝的 load 曲线呈线性爬升（随攻入深度增加）后反转退出（力矩反向），单孔周期 2~15s。最典型故障是**丝锥折断**：折断时 load 瞬间冲高（1.5~3 倍正常值），随后归零——与崩刃阶跃模型完全吻合。
+
+**指标适用性**：
+- 阶跃检测（断丝锥）：**高度适用**，信号特征极清晰
+- phase_band：需要适配"线性斜坡→反转"的非平台式 load 曲线，band 需要随相位动态变化，直接套用当前模板会误报攻丝退刀段
+- 趋势检测：适用于丝锥磨损（稳态攻丝段 load 峰值缓慢爬升）
+
+**综合评分**：中（断丝锥检测直接适用；phase_band 需要适配非平台 load 形态）
+
+---
+
+#### 9.5.6 刨床 / 插床
+
+**切削特征**：往复直线运动（刀具直线进给，工件横向步进），主轴概念不存在或仅指进给驱动。核心监控量是进给轴电机电流/力矩，而非旋转主轴的 spindle_load / spindle_current。
+
+**核心矛盾**：当前方案所有指标定义（spindle_speed、spindle_load、spindle_current）均基于旋转主轴，在刨床/插床上物理意义不成立或为空值。
+
+**综合评分**：**不适用**（监控指标体系与当前方案不匹配，需重新定义适合直线驱动的指标）
+
+---
+
+#### 9.5.7 拉床
+
+**切削特征**：单次直线行程切削，拉刀逐步增加齿高，load 在整个行程中呈**单调线性增大**（前刀切入→后刀逐步加深），行程结束后快速归零。单次行程 5~30s。
+
+**指标适用性**：
+- phase_band：不适用，load 是斜坡形而非平台形，无法用恒定均值模板描述
+- 趋势检测（跨多个工件）：**适用**，拉刀磨损表现为行程内 load 斜率逐渐增大
+- 阶跃检测：适用于拉刀崩齿（局部 load 突刺）
+
+**综合评分**：低中（趋势检测和阶跃检测单独有价值，phase_band 核心机制不适用）
+
+---
+
+#### 9.5.8 锯床（带锯 / 圆锯）
+
+**切削特征**：连续稳态切削，spindle_load 在正常切削中几乎是常数（取决于材料和进给速度），周期性极弱。信号接近直流，与磨床相似。
+
+**主要故障特征**：
+- 锯条/锯片断裂：load 瞬间冲高后归零，与崩刃阶跃完全相同
+- 锯条磨钝：load 缓慢爬升，趋势检测适用
+- 进给速度过快（过载）：load 持续偏高，均值偏移检测适用
+
+**指标适用性**：
+- 阶跃检测（锯条断裂）：**高度适用**
+- 趋势检测（锯条磨钝）：**适用**
+- phase_band：信号太平稳，建不出有周期性的模板，意义不大
+
+**综合评分**：中（阶跃+趋势检测有价值，phase_band 不适用）
+
+---
+
+#### 9.5.9 电加工机床（EDM 放电加工 / 线切割）
+
+**切削特征**：无机械切削力，通过电火花放电蚀除材料，主轴不旋转。监控的核心量是**放电脉冲频率、放电间隙电压、峰值电流**，与 spindle_load / spindle_current 的物理含义完全不同（spindle_current 在电加工机床上即使有采集，代表的是伺服轴驱动电流而非切削负载）。
+
+**核心矛盾**：方案所有检测逻辑围绕"机械切削力在主轴上的体现"设计，电加工机床不存在这个物理过程。
+
+**综合评分**：**完全不适用**（需要针对放电参数重新设计监控体系）
+
+---
+
+### 9.6 全机型适用性汇总
+
+| 机型 | 适用性 | 可用的检测模块 | 不适用的模块 | 主要障碍 |
+|------|--------|-------------|------------|---------|
+| 立式/卧式铣床 | **高** | 全部 | — | 方案设计原型 |
+| 钻床 | **中** | 阶跃、趋势、phase_band | — | load 形态为斜坡而非平台，需调参 |
+| 镗床 | **中** | phase_band、趋势、阶跃 | — | 信号幅度弱，参数敏感 |
+| 车床 | **中** | load/current 全部检测器 | spindle_speed 建模 | CSS 控制下转速非常数 |
+| 螺纹加工机床 | **中** | 阶跃（断丝锥）、趋势 | phase_band | load 为斜坡+反转 |
+| 齿轮加工机床 | **低中** | 趋势（粗粒度）、阶跃 | 每齿精细检测 | 采样率不足，1s 无法捕捉每齿脉冲 |
+| 磨床 | **低中** | 趋势检测 | phase_band、phase_lock | 信号无周期性 |
+| 锯床 | **中** | 阶跃（断锯条）、趋势 | phase_band | 信号接近直流 |
+| 拉床 | **低中** | 趋势（跨工件）、阶跃 | phase_band | load 为单次斜坡 |
+| 加工中心（VMC/HMC） | **低中** | 全局崩刃/过热 | 刀具磨损精细检测 | 多工序换刀，单模板失效 |
+| 刨床/插床 | **不适用** | — | 全部 | 指标体系不匹配 |
+| 电加工机床 | **不适用** | — | 全部 | 物理过程完全不同 |
+| 五轴加工中心 | **不适用** | — | 全部 | 姿态变化导致 load 规律性消失 |
+
+**结论**：方案的 phase_band + phase_lock 核心机制依赖"信号具有可重复的周期性形态"，这一前提在铣床、钻床、镗床、车床上成立，在磨床、锯床、拉床上不成立，在刨床/电加工机床上完全不适用。趋势检测和阶跃检测的适用范围更广，在大多数有旋转主轴的机床上都能提供基础价值。
+
+### 9.7 方案定位建议
+
+基于以上分析，建议明确方案的适用范围声明：
+
+**当前方案版本（v13/v14）定位**：
+- 最适合：**单工序立式铣床、卧式铣床**（与仿真器一致）
+- 部分适用：**普通车床**（需调参），**钻镗床**（需重标定）
+- 不建议直接用于：**加工中心**（需按刀号分段建模的架构升级），**五轴机床**
+
+**加工中心适配路线**（如需扩展）：
+
+- 短期：仅做全局级检测（主轴过热、崩刃、过载），放弃刀具磨损级别检测
+- 中期：接入 T code 信号，实现按刀号分模板建模，恢复完整检测能力
+- 长期：引入程序段识别（通过 NC 代码解析预知各段工况），实现自适应建模
+
+---
+
+_最后更新：2026-06-07（补充 9.5~9.6：钻床/镗床/磨床/齿轮机床等九类机型适用性分析及汇总表）_
diff --git a/ai/predict.py b/ai/predict.py
deleted file mode 100755
index b70f822..0000000
--- a/ai/predict.py
+++ /dev/null
@@ -1,97 +0,0 @@
-# -*- coding: utf-8 -*-
-
-import requests
-import numpy as np
-from datetime import datetime, timedelta
-
-VM_URL = "http://localhost:8428"
-DEVICE_ID = "fanuc-cnc"
-METRIC = f'feed_rate{{device_id="{DEVICE_ID}"}}'
-
-def fetch_history(minutes=30):
-    """从VM拉取历史数据"""
-    end = datetime.now()
-    start = end - timedelta(minutes=minutes)
-    resp = requests.get(f"{VM_URL}/api/v1/query_range", params={
-        "query": METRIC,
-        "start": start.timestamp(),
-        "end": end.timestamp(),
-        "step": "1s",
-    })
-    result = resp.json()["data"]["result"]
-    if not result:
-        return [], []
-    values = result[0]["values"]
-    ts = [float(v[0]) for v in values]
-    ys = [float(v[1]) for v in values]
-    return ts, ys
-
-def predict_next(ts, ys, horizon=60):
-    """
-    用FFT检测主频，拟合正弦波，外推未来horizon秒
-    适合周期性信号
-    """
-    if len(ys) < 60:
-        return [], []
-
-    ys = np.array(ys)
-    n = len(ys)
-    dt = 1.0  # 1秒采样
-
-    # FFT找主频
-    fft = np.fft.rfft(ys - ys.mean())
-    freqs = np.fft.rfftfreq(n, d=dt)
-    dominant_idx = np.argmax(np.abs(fft[1:])) + 1
-    dominant_freq = freqs[dominant_idx]
-    period = 1.0 / dominant_freq if dominant_freq > 0 else 60
-
-    # 拟合：y = A*sin(2π/T * t + φ) + offset
-    from scipy.optimize import curve_fit
-    t_rel = np.arange(n, dtype=float)
-    offset = ys.mean()
-    amplitude = (ys.max() - ys.min()) / 2
-
-    def sine_model(t, A, T, phi, C):
-        return A * np.sin(2 * np.pi / T * t + phi) + C
-
-    try:
-        popt, _ = curve_fit(
-            sine_model, t_rel, ys,
-            p0=[amplitude, period, 0, offset],
-            maxfev=5000
-        )
-        # 外推
-        t_future = np.arange(n, n + horizon, dtype=float)
-        y_pred = sine_model(t_future, *popt)
-        ts_future = [ts[-1] + i + 1 for i in range(horizon)]
-        return ts_future, y_pred.tolist()
-    except Exception:
-        # 拟合失败降级为线性
-        slope = (ys[-1] - ys[-10]) / 10
-        ts_future = [ts[-1] + i + 1 for i in range(horizon)]
-        y_pred = [ys[-1] + slope * (i + 1) for i in range(horizon)]
-        return ts_future, y_pred
-
-def write_predictions(ts_future, y_pred, metric_name="protoforge_feed_rate_predicted"):
-    """写回VictoriaMetrics"""
-    lines = []
-    for t, y in zip(ts_future, y_pred):
-        ts_ms = int(t * 1000)
-        lines.append(f'{metric_name}{{device_id="{DEVICE_ID}"}} {y:.2f} {ts_ms}')
-    payload = "\n".join(lines)
-    requests.post(f"{VM_URL}/api/v1/import/prometheus", data=payload)
-
-def run_once():
-    ts, ys = fetch_history(minutes=30)
-    if len(ys) < 60:
-        print("数据不足")
-        return
-    ts_future, y_pred = predict_next(ts, ys, horizon=120)
-    write_predictions(ts_future, y_pred)
-    print(f"写入 {len(y_pred)} 个预测点，预测到 +{len(y_pred)}s")
-
-if __name__ == "__main__":
-    import time
-    while True:
-        run_once()
-        time.sleep(30)  # 每30秒重新预测一次
diff --git a/ai/predict_v2.py b/ai/predict_v2.py
deleted file mode 100755
index 933a34f..0000000
--- a/ai/predict_v2.py
+++ /dev/null
@@ -1,571 +0,0 @@
-# -*- coding: utf-8 -*-
-"""
-ProtoForge 预测服务 v5
-
-修复点：
-1. 不再使用“单正弦拟合”作为主预测算法。
-2. 主算法改为：周期模板预测（同相位历史值加权平均）。
-3. 周期估计使用 FFT 粗估 + 自相关细化，比单纯 FFT 更稳。
-4. 若可用完整周期不足，则降级为多谐波回归（而不是单正弦）。
-5. 每轮只写入未来 min(HORIZON_SECONDS, POLL_INTERVAL) 秒，避免预测窗口重叠。
-6. 不删除旧预测历史，避免历史预测消失。
-"""
-
-import logging
-import math
-import re
-import time
-from datetime import datetime, timedelta
-from typing import Dict, List, Tuple
-
-import numpy as np
-import requests
-
-logging.basicConfig(
-    level=logging.INFO,
-    format="%(asctime)s [%(levelname)s] %(message)s",
-)
-logger = logging.getLogger(__name__)
-
-# ── 配置 ──────────────────────────────────────────────────────────────────────
-
-VM_URL = "http://localhost:8428"
-
-PREDICT_TARGETS = [
-    ('feed_rate{device_id="fanuc-cnc"}', "feed_rate_predicted"),
-    ('spindle_speed{device_id="fanuc-cnc"}', "spindle_speed_predicted"),
-    ('spindle_current{device_id="fanuc-cnc"}', "spindle_current_predicted"),
-    ('vibration_x{device_id="fanuc-cnc"}', "vibration_x_predicted"),
-    ('vibration_y{device_id="fanuc-cnc"}', "vibration_y_predicted"),
-    ('vibration_z{device_id="fanuc-cnc"}', "vibration_z_predicted"),
-]
-
-HISTORY_MINUTES = 30
-HORIZON_SECONDS = 120
-POLL_INTERVAL = 30
-WRITE_HORIZON_SECONDS = min(HORIZON_SECONDS, POLL_INTERVAL)
-MIN_POINTS = 120
-QUERY_STEP = "1s"
-
-# 至少要有多少个完整周期，才使用“周期模板预测”
-MIN_FULL_CYCLES_FOR_TEMPLATE = 3
-MAX_CYCLES_FOR_TEMPLATE = 6
-
-# 周期范围
-MIN_PERIOD_SECONDS = 5
-MAX_PERIOD_SECONDS = 3600
-
-# 多谐波回归最高阶数（降级模式）
-MAX_HARMONICS = 4
-
-EXTRA_PREDICT_LABELS = {
-    "forecast": "seasonal_v1",
-    "source": "protoforge",
-}
-
-# 进程内记录每条预测序列上次写到哪里，避免本进程运行时重复写
-LAST_WRITTEN_UNTIL: Dict[str, int] = {}
-
-# ─────────────────────────────────────────────────────────────────────────────
-
-
-def fetch_history(query: str, minutes: int = HISTORY_MINUTES) -> Tuple[List[float], List[float]]:
-    """从 VictoriaMetrics 拉取历史时序数据。"""
-    now = datetime.now()
-    start = now - timedelta(minutes=minutes)
-
-    try:
-        resp = requests.get(
-            f"{VM_URL}/api/v1/query_range",
-            params={
-                "query": query,
-                "start": start.timestamp(),
-                "end": now.timestamp(),
-                "step": QUERY_STEP,
-            },
-            timeout=10,
-        )
-        resp.raise_for_status()
-    except requests.RequestException as e:
-        logger.error("拉取数据失败 query=%s: %s", query, e)
-        return [], []
-
-    try:
-        result = resp.json().get("data", {}).get("result", [])
-    except Exception as e:
-        logger.error("解析 VM 返回失败 query=%s: %s", query, e)
-        return [], []
-
-    if not result:
-        return [], []
-
-    values = result[0].get("values", [])
-    if not values:
-        return [], []
-
-    ts = []
-    ys = []
-    for item in values:
-        if len(item) < 2:
-            continue
-        try:
-            t = float(item[0])
-            y = float(item[1])
-        except Exception:
-            continue
-        if not math.isfinite(t) or not math.isfinite(y):
-            continue
-        ts.append(t)
-        ys.append(y)
-
-    return ts, ys
-
-
-def normalize_history(ts: List[float], ys: List[float]) -> Tuple[np.ndarray, np.ndarray]:
-    """
-    清洗历史数据：
-    1. 时间戳整秒化
-    2. 排序
-    3. 同一秒多个点保留最后一个
-    4. 按 1 秒插值补齐
-    """
-    if not ts or not ys or len(ts) != len(ys):
-        return np.array([]), np.array([])
-
-    data = {}
-    for t, y in zip(ts, ys):
-        try:
-            sec = int(round(float(t)))
-            val = float(y)
-        except Exception:
-            continue
-        if not math.isfinite(sec) or not math.isfinite(val):
-            continue
-        data[sec] = val
-
-    if not data:
-        return np.array([]), np.array([])
-
-    sorted_items = sorted(data.items(), key=lambda x: x[0])
-    ts_clean = np.array([x[0] for x in sorted_items], dtype=float)
-    ys_clean = np.array([x[1] for x in sorted_items], dtype=float)
-
-    if len(ts_clean) < 2:
-        return ts_clean, ys_clean
-
-    start_sec = int(ts_clean[0])
-    end_sec = int(ts_clean[-1])
-
-    if end_sec <= start_sec:
-        return ts_clean, ys_clean
-
-    ts_grid = np.arange(start_sec, end_sec + 1, 1, dtype=float)
-    ys_grid = np.interp(ts_grid, ts_clean, ys_clean)
-
-    return ts_grid, ys_grid
-
-
-def estimate_period_by_fft(ys_arr: np.ndarray) -> float:
-    """FFT 粗估周期。"""
-    n = len(ys_arr)
-    if n < 8:
-        return 60.0
-
-    centered = ys_arr - np.mean(ys_arr)
-    if np.allclose(centered, 0):
-        return 60.0
-
-    fft_vals = np.fft.rfft(centered)
-    freqs = np.fft.rfftfreq(n, d=1.0)
-
-    if len(freqs) <= 1:
-        return 60.0
-
-    power = np.abs(fft_vals[1:])
-    if len(power) == 0 or np.max(power) <= 0:
-        return 60.0
-
-    dominant_idx = int(np.argmax(power)) + 1
-    dominant_freq = float(freqs[dominant_idx])
-    if dominant_freq <= 0:
-        return 60.0
-
-    period = 1.0 / dominant_freq
-    return float(np.clip(period, MIN_PERIOD_SECONDS, MAX_PERIOD_SECONDS))
-
-
-def refine_period_by_autocorr(ys_arr: np.ndarray, init_period: float) -> float:
-    """
-    用自相关在 init_period 附近细化周期估计。
-    """
-    n = len(ys_arr)
-    if n < 20:
-        return float(np.clip(init_period, MIN_PERIOD_SECONDS, MAX_PERIOD_SECONDS))
-
-    centered = ys_arr - np.mean(ys_arr)
-    if np.allclose(centered, 0):
-        return float(np.clip(init_period, MIN_PERIOD_SECONDS, MAX_PERIOD_SECONDS))
-
-    corr = np.correlate(centered, centered, mode="full")[n - 1:]
-
-    p0 = int(round(init_period))
-    left = max(MIN_PERIOD_SECONDS, int(max(2, p0 * 0.7)))
-    right = min(n // 2, int(max(left + 1, p0 * 1.3)))
-
-    if right <= left:
-        return float(np.clip(init_period, MIN_PERIOD_SECONDS, MAX_PERIOD_SECONDS))
-
-    search = corr[left:right + 1]
-    if len(search) == 0:
-        return float(np.clip(init_period, MIN_PERIOD_SECONDS, MAX_PERIOD_SECONDS))
-
-    best_lag = left + int(np.argmax(search))
-    return float(np.clip(best_lag, MIN_PERIOD_SECONDS, MAX_PERIOD_SECONDS))
-
-
-def estimate_period(ys_arr: np.ndarray) -> float:
-    """FFT + 自相关 的组合周期估计。"""
-    p_fft = estimate_period_by_fft(ys_arr)
-    p_refined = refine_period_by_autocorr(ys_arr, p_fft)
-    return p_refined
-
-
-def seasonal_template_predict(
-    ys_arr: np.ndarray,
-    horizon: int,
-    period: int,
-    gap: int = 0,
-    max_cycles: int = MAX_CYCLES_FOR_TEMPLATE,
-) -> List[float]:
-    """
-    同相位历史值加权平均预测。
-    对未来第 k 个点，取过去多个周期同相位点做加权平均：
-        y[n-1+gap+k] ≈ avg(y[n-1+gap+k-p], y[n-1+gap+k-2p], ...)
-    """
-    n = len(ys_arr)
-    preds = []
-
-    for k in range(1, horizon + 1):
-        target_idx = (n - 1) + gap + k
-
-        values = []
-        weights = []
-
-        # m=1 表示最近一个周期；m 越大越久远
-        for m in range(1, max_cycles + 1):
-            hist_idx = target_idx - m * period
-            if 0 <= hist_idx < n:
-                # 越近权重越大
-                w = 1.0 / m
-                values.append(float(ys_arr[hist_idx]))
-                weights.append(w)
-
-        if not values:
-            # 万一拿不到，退化为最后一个值
-            preds.append(float(ys_arr[-1]))
-        else:
-            preds.append(float(np.average(values, weights=weights)))
-
-    return preds
-
-
-def harmonic_regression_predict(
-    ys_arr: np.ndarray,
-    horizon: int,
-    period: int,
-    gap: int = 0,
-    max_harmonics: int = MAX_HARMONICS,
-) -> List[float]:
-    """
-    多谐波回归（降级模式）：
-    y = c + Σ [a_k sin(2πkt/P) + b_k cos(2πkt/P)]
-    相比单正弦，更能表达非标准正弦波形。
-    """
-    n = len(ys_arr)
-    if n < 10 or period <= 1:
-        return [float(ys_arr[-1])] * horizon
-
-    # 周期太短时，谐波数不能太大
-    K = min(max_harmonics, max(1, period // 4))
-
-    t = np.arange(n, dtype=float)
-    cols = [np.ones(n, dtype=float)]
-
-    for k in range(1, K + 1):
-        angle = 2.0 * np.pi * k * t / period
-        cols.append(np.sin(angle))
-        cols.append(np.cos(angle))
-
-    X = np.column_stack(cols)
-
-    try:
-        coef, _, _, _ = np.linalg.lstsq(X, ys_arr, rcond=None)
-    except Exception:
-        return [float(ys_arr[-1])] * horizon
-
-    t_future = np.arange(n + gap, n + gap + horizon, dtype=float)
-    cols_future = [np.ones(horizon, dtype=float)]
-
-    for k in range(1, K + 1):
-        angle = 2.0 * np.pi * k * t_future / period
-        cols_future.append(np.sin(angle))
-        cols_future.append(np.cos(angle))
-
-    X_future = np.column_stack(cols_future)
-    y_pred = X_future @ coef
-
-    return y_pred.astype(float).tolist()
-
-
-def predict_next(
-    ts: List[float],
-    ys: List[float],
-    horizon: int,
-    base_ts: int,
-) -> Tuple[List[float], List[float]]:
-    """
-    主预测函数：
-    1. 周期估计
-    2. 优先使用周期模板预测
-    3. 周期不够时降级为多谐波回归
-    """
-    ts_grid, ys_grid = normalize_history(ts, ys)
-    if len(ys_grid) < MIN_POINTS:
-        return [], []
-
-    y_min = float(np.min(ys_grid))
-    y_max = float(np.max(ys_grid))
-    y_range = y_max - y_min
-
-    if y_range <= 1e-9:
-        base_ts = max(int(base_ts), int(ts_grid[-1]))
-        ts_future = [base_ts + i + 1 for i in range(horizon)]
-        y_pred = [float(ys_grid[-1])] * horizon
-        return ts_future, y_pred
-
-    period_est = estimate_period(ys_grid)
-    period = int(round(period_est))
-    period = max(MIN_PERIOD_SECONDS, min(MAX_PERIOD_SECONDS, period))
-
-    last_real_ts = int(ts_grid[-1])
-    base_ts = max(int(base_ts), last_real_ts)
-
-    # 如果当前时间已经超过最后一个真实点，gap 表示中间“空过去”的秒数
-    gap = max(0, base_ts - last_real_ts)
-
-    ts_future = [base_ts + i + 1 for i in range(horizon)]
-
-    full_cycles = len(ys_grid) // period if period > 0 else 0
-
-    if full_cycles >= MIN_FULL_CYCLES_FOR_TEMPLATE:
-        y_pred = seasonal_template_predict(
-            ys_arr=ys_grid,
-            horizon=horizon,
-            period=period,
-            gap=gap,
-            max_cycles=min(MAX_CYCLES_FOR_TEMPLATE, full_cycles),
-        )
-        model_name = "seasonal_template"
-    else:
-        y_pred = harmonic_regression_predict(
-            ys_arr=ys_grid,
-            horizon=horizon,
-            period=period,
-            gap=gap,
-            max_harmonics=MAX_HARMONICS,
-        )
-        model_name = "harmonic_regression"
-
-    # 合理裁剪，避免偶然外推过大
-    margin = y_range * 0.15
-    lower = y_min - margin
-    upper = y_max + margin
-    y_pred = np.clip(np.array(y_pred, dtype=float), lower, upper).astype(float).tolist()
-
-    logger.debug(
-        "predict_next model=%s period=%ss full_cycles=%s gap=%s",
-        model_name, period, full_cycles, gap
-    )
-
-    return ts_future, y_pred
-
-
-def prom_escape_label_value(value: str) -> str:
-    """Prometheus label value 转义。"""
-    return (
-        str(value)
-        .replace("\\", "\\\\")
-        .replace("\n", "\\n")
-        .replace('"', '\\"')
-    )
-
-
-def labels_to_str(labels: Dict[str, str]) -> str:
-    if not labels:
-        return ""
-    parts = []
-    for k in sorted(labels.keys()):
-        v = prom_escape_label_value(labels[k])
-        parts.append(f'{k}="{v}"')
-    return "{" + ",".join(parts) + "}"
-
-
-def write_predictions(
-    ts_future: List[float],
-    y_pred: List[float],
-    metric_name: str,
-    labels: Dict[str, str],
-) -> bool:
-    """将预测值以 Prometheus exposition 格式写入 VictoriaMetrics。"""
-    if not ts_future or not y_pred or len(ts_future) != len(y_pred):
-        logger.warning("预测数据为空或长度不一致 metric=%s", metric_name)
-        return False
-
-    label_str = labels_to_str(labels)
-    lines = []
-
-    for t, y in zip(ts_future, y_pred):
-        try:
-            ts_sec = int(round(float(t)))
-            val = float(y)
-        except Exception:
-            continue
-
-        if not math.isfinite(ts_sec) or not math.isfinite(val):
-            continue
-
-        ts_ms = ts_sec * 1000
-        lines.append(f"{metric_name}{label_str} {val:.6f} {ts_ms}")
-
-    if not lines:
-        logger.warning("没有可写入的预测点 metric=%s", metric_name)
-        return False
-
-    payload = "\n".join(lines) + "\n"
-
-    try:
-        resp = requests.post(
-            f"{VM_URL}/api/v1/import/prometheus",
-            data=payload.encode("utf-8"),
-            headers={"Content-Type": "text/plain; version=0.0.4; charset=utf-8"},
-            timeout=10,
-        )
-        resp.raise_for_status()
-        return True
-    except requests.RequestException as e:
-        logger.error("写入预测数据失败 metric=%s: %s", metric_name, e)
-        return False
-
-
-_LABEL_PATTERN = re.compile(
-    r'\s*([a-zA-Z_][a-zA-Z0-9_]*)\s*=\s*"((?:\\.|[^"])*)"\s*'
-)
-
-
-def _parse_labels(query: str) -> Dict[str, str]:
-    """从查询表达式中解析标签。"""
-    labels = {}
-
-    if "{" not in query or "}" not in query:
-        return labels
-
-    try:
-        label_part = query[query.index("{") + 1: query.rindex("}")]
-    except Exception:
-        return labels
-
-    for match in _LABEL_PATTERN.finditer(label_part):
-        key = match.group(1)
-        value = match.group(2)
-        value = value.replace('\\"', '"').replace("\\n", "\n").replace("\\\\", "\\")
-        labels[key] = value
-
-    return labels
-
-
-def merge_labels(*dicts: Dict[str, str]) -> Dict[str, str]:
-    result = {}
-    for d in dicts:
-        if d:
-            result.update(d)
-    return result
-
-
-def series_key(metric_name: str, labels: Dict[str, str]) -> str:
-    return metric_name + labels_to_str(labels)
-
-
-def run_once():
-    now_str = datetime.now().strftime("%H:%M:%S")
-
-    for query, pred_metric in PREDICT_TARGETS:
-        ts, ys = fetch_history(query)
-        if len(ys) < MIN_POINTS:
-            logger.info("[%s] %s 数据不足（%d 点），跳过", now_str, query, len(ys))
-            continue
-
-        base_labels = _parse_labels(query)
-        write_labels = merge_labels(base_labels, EXTRA_PREDICT_LABELS)
-
-        key = series_key(pred_metric, write_labels)
-
-        now_sec = int(time.time())
-        last_until = LAST_WRITTEN_UNTIL.get(key, 0)
-
-        # 避免同一进程内写重叠时间段
-        base_ts = max(now_sec, last_until)
-
-        ts_future, y_pred = predict_next(
-            ts=ts,
-            ys=ys,
-            horizon=WRITE_HORIZON_SECONDS,
-            base_ts=base_ts,
-        )
-
-        if not ts_future or not y_pred:
-            logger.warning("[%s] %s 预测结果为空，跳过", now_str, query)
-            continue
-
-        ok = write_predictions(
-            ts_future=ts_future,
-            y_pred=y_pred,
-            metric_name=pred_metric,
-            labels=write_labels,
-        )
-        if not ok:
-            continue
-
-        LAST_WRITTEN_UNTIL[key] = int(max(ts_future))
-
-        future_start = datetime.fromtimestamp(ts_future[0]).strftime("%H:%M:%S")
-        future_end = datetime.fromtimestamp(ts_future[-1]).strftime("%H:%M:%S")
-
-        logger.info(
-            "[%s] %-40s → %-35s 写入 %d 点，预测区间 %s ~ %s，标签=%s",
-            now_str,
-            query,
-            pred_metric,
-            len(y_pred),
-            future_start,
-            future_end,
-            labels_to_str(write_labels),
-        )
-
-
-def main():
-    logger.info(
-        "预测服务启动 VM=%s 历史窗口=%dmin 理论预测窗口=%ds 实际写入窗口=%ds 轮询间隔=%ds",
-        VM_URL,
-        HISTORY_MINUTES,
-        HORIZON_SECONDS,
-        WRITE_HORIZON_SECONDS,
-        POLL_INTERVAL,
-    )
-
-    while True:
-        run_once()
-        time.sleep(POLL_INTERVAL)
-
-
-if __name__ == "__main__":
-    main()
\ No newline at end of file
diff --git a/ai/predict_v3_single_scene.py b/ai/predict_v3_single_scene.py
deleted file mode 100755
index d212d2d..0000000
--- a/ai/predict_v3_single_scene.py
+++ /dev/null
@@ -1,1487 +0,0 @@
-# -*- coding: utf-8 -*-
-"""
-ProtoForge Predictor v10
-
-修复重点：
-1. 修复 lag=0 但预测线仍然相位漂移的问题。
-2. 在谷底相位对齐基础上，增加 phase-lock 相位锁定。
-3. 每轮使用最近 1~2 个周期真实数据，搜索最佳 period + phase_origin。
-4. 预测起点仍然锚定最后一个真实点 last_real_ts，避免写入延迟。
-5. 保留健康模板冻结逻辑：异常期间不学习故障数据。
-6. 保留预测上下界和异常指标。
-"""
-
-import json
-import logging
-import math
-import os
-import re
-import time
-from dataclasses import asdict, dataclass
-from datetime import datetime, timedelta
-from typing import Dict, List, Optional, Tuple
-
-import numpy as np
-import requests
-
-
-# =============================================================================
-# 日志配置
-# =============================================================================
-
-logging.basicConfig(
-    level=logging.INFO,
-    format="%(asctime)s [%(levelname)s] %(message)s",
-)
-
-logger = logging.getLogger(__name__)
-
-
-# =============================================================================
-# 基础配置
-# =============================================================================
-
-VM_URL = "http://localhost:8428"
-STATE_FILE = "/tmp/protoforge_predictor_state_v10.json"
-
-HISTORY_MINUTES = 30
-HORIZON_SECONDS = 120
-POLL_INTERVAL = 30
-
-WRITE_HORIZON_SECONDS = min(HORIZON_SECONDS, POLL_INTERVAL)
-
-QUERY_STEP = "1s"
-MIN_POINTS = 120
-
-MIN_PERIOD_SECONDS = 5
-MAX_PERIOD_SECONDS = 3600
-
-MIN_FULL_CYCLES_FOR_TEMPLATE = 3
-MAX_CYCLES_FOR_TEMPLATE = 6
-
-DETECT_WINDOW_SECONDS = 20
-RECOVERY_MIN_SECONDS = 60
-
-HEALTHY_EMA_ALPHA = 0.10
-RECOVERY_EMA_ALPHA = 0.25
-
-OUTSIDE_RATIO_THRESHOLD = 0.60
-
-VALLEY_QUANTILE = 45
-
-# phase-lock 配置
-PHASE_LOCK_MIN_WINDOW_SECONDS = 45
-PHASE_LOCK_MAX_WINDOW_SECONDS = 180
-PHASE_LOCK_PERIOD_SEARCH_RATIO = 0.12
-PHASE_LOCK_ORIGIN_SEARCH_RATIO = 0.35
-PHASE_LOCK_PERIOD_STEP = 1
-PHASE_LOCK_ORIGIN_STEP = 1
-
-# 真实数据延迟超过这个值，就不继续预测
-MAX_DATA_LAG_SECONDS = 180
-
-# 预测锚定最后一个真实点
-ALIGN_PREDICTION_TO_LAST_REAL_TS = True
-
-
-# =============================================================================
-# 指标配置
-# =============================================================================
-
-PREDICT_TARGETS = [
-    {
-        "query": 'feed_rate{device_id="fanuc-cnc"}',
-        "pred_metric": "feed_rate_predicted",
-        "anomaly_metric": "feed_rate_anomaly",
-        "abs_threshold": 400.0,
-        "rel_threshold": 0.25,
-    },
-    {
-        "query": 'spindle_speed{device_id="fanuc-cnc"}',
-        "pred_metric": "spindle_speed_predicted",
-        "anomaly_metric": "spindle_speed_anomaly",
-        "abs_threshold": 500.0,
-        "rel_threshold": 0.25,
-    },
-    {
-        "query": 'spindle_current{device_id="fanuc-cnc"}',
-        "pred_metric": "spindle_current_predicted",
-        "anomaly_metric": "spindle_current_anomaly",
-        "abs_threshold": 5.0,
-        "rel_threshold": 0.25,
-    },
-    {
-        "query": 'vibration_x{device_id="fanuc-cnc"}',
-        "pred_metric": "vibration_x_predicted",
-        "anomaly_metric": "vibration_x_anomaly",
-        "abs_threshold": 1.0,
-        "rel_threshold": 0.30,
-    },
-    {
-        "query": 'vibration_y{device_id="fanuc-cnc"}',
-        "pred_metric": "vibration_y_predicted",
-        "anomaly_metric": "vibration_y_anomaly",
-        "abs_threshold": 1.0,
-        "rel_threshold": 0.30,
-    },
-    {
-        "query": 'vibration_z{device_id="fanuc-cnc"}',
-        "pred_metric": "vibration_z_predicted",
-        "anomaly_metric": "vibration_z_anomaly",
-        "abs_threshold": 1.0,
-        "rel_threshold": 0.30,
-    },
-]
-
-EXTRA_PREDICT_LABELS = {
-    "forecast": "phase_locked_health_v10",
-    "source": "protoforge",
-}
-
-BASELINE_STATUS_HEALTHY = "healthy"
-BASELINE_STATUS_ANOMALY = "anomaly"
-BASELINE_STATUS_RECOVERING = "recovering"
-
-
-# =============================================================================
-# 状态结构
-# =============================================================================
-
-@dataclass
-class BaselineState:
-    period: int
-    phase_origin_ts: int
-    template: List[float]
-    status: str
-    clean_seconds: int
-    last_update_ts: int
-    last_seen_ts: int
-    y_min: float
-    y_max: float
-
-
-BASELINE_STATES: Dict[str, BaselineState] = {}
-LAST_REAL_TS_WRITTEN: Dict[str, int] = {}
-
-
-# =============================================================================
-# VictoriaMetrics 读取
-# =============================================================================
-
-def fetch_history(query: str, minutes: int = HISTORY_MINUTES) -> Tuple[List[float], List[float]]:
-    now = datetime.now()
-    start = now - timedelta(minutes=minutes)
-
-    try:
-        resp = requests.get(
-            f"{VM_URL}/api/v1/query_range",
-            params={
-                "query": query,
-                "start": start.timestamp(),
-                "end": now.timestamp(),
-                "step": QUERY_STEP,
-            },
-            timeout=10,
-        )
-        resp.raise_for_status()
-    except requests.RequestException as e:
-        logger.error("拉取数据失败 query=%s: %s", query, e)
-        return [], []
-
-    try:
-        result = resp.json().get("data", {}).get("result", [])
-    except Exception as e:
-        logger.error("解析 VM 返回失败 query=%s: %s", query, e)
-        return [], []
-
-    if not result:
-        return [], []
-
-    values = result[0].get("values", [])
-
-    ts = []
-    ys = []
-
-    for item in values:
-        if len(item) < 2:
-            continue
-
-        try:
-            t = float(item[0])
-            y = float(item[1])
-        except Exception:
-            continue
-
-        if not math.isfinite(t) or not math.isfinite(y):
-            continue
-
-        ts.append(t)
-        ys.append(y)
-
-    return ts, ys
-
-
-def normalize_history(ts: List[float], ys: List[float]) -> Tuple[np.ndarray, np.ndarray]:
-    if not ts or not ys or len(ts) != len(ys):
-        return np.array([]), np.array([])
-
-    data = {}
-
-    for t, y in zip(ts, ys):
-        try:
-            sec = int(round(float(t)))
-            val = float(y)
-        except Exception:
-            continue
-
-        if not math.isfinite(sec) or not math.isfinite(val):
-            continue
-
-        data[sec] = val
-
-    if not data:
-        return np.array([]), np.array([])
-
-    sorted_items = sorted(data.items(), key=lambda x: x[0])
-
-    ts_clean = np.array([x[0] for x in sorted_items], dtype=float)
-    ys_clean = np.array([x[1] for x in sorted_items], dtype=float)
-
-    if len(ts_clean) < 2:
-        return ts_clean, ys_clean
-
-    start_sec = int(ts_clean[0])
-    end_sec = int(ts_clean[-1])
-
-    if end_sec <= start_sec:
-        return ts_clean, ys_clean
-
-    ts_grid = np.arange(start_sec, end_sec + 1, 1, dtype=float)
-    ys_grid = np.interp(ts_grid, ts_clean, ys_clean)
-
-    return ts_grid, ys_grid
-
-
-# =============================================================================
-# 周期估计
-# =============================================================================
-
-def moving_average(arr: np.ndarray, window: int) -> np.ndarray:
-    if window <= 1 or len(arr) < window:
-        return arr.astype(float)
-
-    window = int(window)
-
-    if window % 2 == 0:
-        window += 1
-
-    kernel = np.ones(window, dtype=float) / window
-    pad = window // 2
-    padded = np.pad(arr.astype(float), (pad, pad), mode="edge")
-
-    return np.convolve(padded, kernel, mode="valid")
-
-
-def estimate_period_by_fft(ys_arr: np.ndarray) -> float:
-    n = len(ys_arr)
-
-    if n < 8:
-        return 60.0
-
-    centered = ys_arr - np.mean(ys_arr)
-
-    if np.allclose(centered, 0):
-        return 60.0
-
-    fft_vals = np.fft.rfft(centered)
-    freqs = np.fft.rfftfreq(n, d=1.0)
-
-    if len(freqs) <= 1:
-        return 60.0
-
-    power = np.abs(fft_vals[1:])
-
-    if len(power) == 0 or np.max(power) <= 0:
-        return 60.0
-
-    dominant_idx = int(np.argmax(power)) + 1
-    dominant_freq = float(freqs[dominant_idx])
-
-    if dominant_freq <= 0:
-        return 60.0
-
-    period = 1.0 / dominant_freq
-
-    return float(np.clip(period, MIN_PERIOD_SECONDS, MAX_PERIOD_SECONDS))
-
-
-def refine_period_by_autocorr(ys_arr: np.ndarray, init_period: float) -> float:
-    n = len(ys_arr)
-
-    if n < 20:
-        return float(np.clip(init_period, MIN_PERIOD_SECONDS, MAX_PERIOD_SECONDS))
-
-    centered = ys_arr - np.mean(ys_arr)
-
-    if np.allclose(centered, 0):
-        return float(np.clip(init_period, MIN_PERIOD_SECONDS, MAX_PERIOD_SECONDS))
-
-    corr = np.correlate(centered, centered, mode="full")[n - 1:]
-
-    p0 = int(round(init_period))
-    left = max(int(MIN_PERIOD_SECONDS), int(max(2, p0 * 0.7)))
-    right = min(n // 2, int(max(left + 1, p0 * 1.3)))
-
-    if right <= left:
-        return float(np.clip(init_period, MIN_PERIOD_SECONDS, MAX_PERIOD_SECONDS))
-
-    search = corr[left:right + 1]
-
-    if len(search) == 0:
-        return float(np.clip(init_period, MIN_PERIOD_SECONDS, MAX_PERIOD_SECONDS))
-
-    best_lag = left + int(np.argmax(search))
-
-    return float(np.clip(best_lag, MIN_PERIOD_SECONDS, MAX_PERIOD_SECONDS))
-
-
-def estimate_period_rough(ys_arr: np.ndarray) -> int:
-    p_fft = estimate_period_by_fft(ys_arr)
-    p_refined = refine_period_by_autocorr(ys_arr, p_fft)
-
-    period = int(round(p_refined))
-    period = max(int(MIN_PERIOD_SECONDS), min(int(MAX_PERIOD_SECONDS), period))
-
-    return int(period)
-
-
-# =============================================================================
-# 谷底检测与模板构建
-# =============================================================================
-
-def find_valley_indices(
-    ts_grid: np.ndarray,
-    ys_grid: np.ndarray,
-    expected_period: int,
-) -> List[int]:
-    n = len(ys_grid)
-
-    if n < max(10, expected_period * 2):
-        return []
-
-    period = max(3, int(expected_period))
-
-    smooth_window = max(3, int(round(period * 0.08)))
-    smooth_window = min(smooth_window, 21)
-
-    ys_smooth = moving_average(ys_grid, smooth_window)
-    threshold = float(np.percentile(ys_smooth, VALLEY_QUANTILE))
-
-    candidates = []
-
-    for i in range(1, n - 1):
-        if (
-            ys_smooth[i] <= ys_smooth[i - 1]
-            and ys_smooth[i] < ys_smooth[i + 1]
-            and ys_smooth[i] <= threshold
-        ):
-            candidates.append(i)
-
-    if len(candidates) < MIN_FULL_CYCLES_FOR_TEMPLATE:
-        candidates = []
-
-        for i in range(1, n - 1):
-            if ys_smooth[i] <= ys_smooth[i - 1] and ys_smooth[i] < ys_smooth[i + 1]:
-                candidates.append(i)
-
-    if not candidates:
-        return []
-
-    min_distance = max(2, int(round(period * 0.55)))
-    selected = []
-
-    for idx in candidates:
-        if not selected:
-            selected.append(idx)
-            continue
-
-        if idx - selected[-1] >= min_distance:
-            selected.append(idx)
-            continue
-
-        if ys_smooth[idx] < ys_smooth[selected[-1]]:
-            selected[-1] = idx
-
-    if len(selected) < 2:
-        return selected
-
-    cleaned = [selected[0]]
-
-    for idx in selected[1:]:
-        diff = int(ts_grid[idx] - ts_grid[cleaned[-1]])
-
-        if int(period * 0.55) <= diff <= int(period * 1.60):
-            cleaned.append(idx)
-            continue
-
-        if diff < int(period * 0.55):
-            if ys_smooth[idx] < ys_smooth[cleaned[-1]]:
-                cleaned[-1] = idx
-            continue
-
-        cleaned.append(idx)
-
-    return cleaned
-
-
-def detect_period_and_valleys(
-    ts_grid: np.ndarray,
-    ys_grid: np.ndarray,
-) -> Tuple[int, List[int]]:
-    rough = estimate_period_rough(ys_grid)
-    valleys = find_valley_indices(ts_grid, ys_grid, rough)
-
-    if len(valleys) >= 3:
-        diffs = np.diff(ts_grid[valleys])
-        good = diffs[(diffs >= rough * 0.55) & (diffs <= rough * 1.60)]
-
-        if len(good) > 0:
-            period = int(round(float(np.median(good))))
-        else:
-            period = rough
-    else:
-        period = rough
-
-    period = max(int(MIN_PERIOD_SECONDS), min(int(MAX_PERIOD_SECONDS), period))
-
-    return int(period), valleys
-
-
-def build_template_from_valleys(
-    ts_grid: np.ndarray,
-    ys_grid: np.ndarray,
-    period: int,
-    valleys: List[int],
-    max_cycles: int = MAX_CYCLES_FOR_TEMPLATE,
-) -> Optional[np.ndarray]:
-    if period <= 1 or len(valleys) < MIN_FULL_CYCLES_FOR_TEMPLATE + 1:
-        return None
-
-    pairs = []
-
-    for a, b in zip(valleys[:-1], valleys[1:]):
-        cycle_len = float(ts_grid[b] - ts_grid[a])
-
-        if period * 0.55 <= cycle_len <= period * 1.60:
-            pairs.append((a, b, cycle_len))
-
-    if len(pairs) < MIN_FULL_CYCLES_FOR_TEMPLATE:
-        return None
-
-    pairs = pairs[-max_cycles:]
-
-    phase_grid = np.arange(period, dtype=float)
-    segments = []
-    weights = []
-
-    for idx, (a, b, cycle_len) in enumerate(pairs):
-        seg_ts = ts_grid[a:b + 1]
-        seg_y = ys_grid[a:b + 1]
-
-        if len(seg_y) < 3:
-            continue
-
-        x_old = (seg_ts - seg_ts[0]) / cycle_len * period
-        seg = np.interp(phase_grid, x_old, seg_y)
-
-        segments.append(seg.astype(float))
-
-        weight = 0.5 + 0.5 * ((idx + 1) / len(pairs))
-        weights.append(weight)
-
-    if len(segments) < MIN_FULL_CYCLES_FOR_TEMPLATE:
-        return None
-
-    arr = np.vstack(segments)
-    w_arr = np.array(weights, dtype=float)
-
-    template = np.average(arr, axis=0, weights=w_arr)
-
-    return template.astype(float)
-
-
-def build_current_baseline(
-    ts_grid: np.ndarray,
-    ys_grid: np.ndarray,
-    tail_seconds: Optional[int] = None,
-) -> Optional[Tuple[int, int, np.ndarray]]:
-    if len(ys_grid) < MIN_POINTS:
-        return None
-
-    if tail_seconds is not None and tail_seconds > 0:
-        cutoff = ts_grid[-1] - int(tail_seconds)
-        mask = ts_grid >= cutoff
-        ts_use = ts_grid[mask]
-        ys_use = ys_grid[mask]
-    else:
-        ts_use = ts_grid
-        ys_use = ys_grid
-
-    if len(ys_use) < MIN_POINTS:
-        return None
-
-    period, valleys = detect_period_and_valleys(ts_use, ys_use)
-
-    template = build_template_from_valleys(
-        ts_grid=ts_use,
-        ys_grid=ys_use,
-        period=period,
-        valleys=valleys,
-    )
-
-    if template is None or len(valleys) == 0:
-        return None
-
-    phase_origin_ts = int(round(float(ts_use[valleys[-1]])))
-
-    return int(period), phase_origin_ts, template
-
-
-# =============================================================================
-# 模板预测与重采样
-# =============================================================================
-
-def circular_template_value(template: np.ndarray, phase: float) -> float:
-    period = len(template)
-
-    if period == 0:
-        return 0.0
-
-    phase = float(phase) % period
-
-    i0 = int(math.floor(phase)) % period
-    i1 = (i0 + 1) % period
-    frac = phase - math.floor(phase)
-
-    return float((1.0 - frac) * template[i0] + frac * template[i1])
-
-
-def resample_template(old_template: np.ndarray, new_period: int) -> np.ndarray:
-    old_period = len(old_template)
-
-    if old_period == new_period:
-        return old_template.astype(float)
-
-    if old_period <= 1 or new_period <= 1:
-        return np.full(new_period, float(np.mean(old_template)), dtype=float)
-
-    old_x = np.linspace(0.0, 1.0, old_period, endpoint=False)
-    new_x = np.linspace(0.0, 1.0, new_period, endpoint=False)
-
-    old_x_ext = np.concatenate([old_x - 1.0, old_x, old_x + 1.0])
-    old_y_ext = np.concatenate([old_template, old_template, old_template])
-
-    return np.interp(new_x, old_x_ext, old_y_ext).astype(float)
-
-
-def predict_template_values(
-    template: np.ndarray,
-    period: int,
-    phase_origin_ts: int,
-    ts_list: List[int],
-) -> np.ndarray:
-    if period <= 1:
-        return np.zeros(len(ts_list), dtype=float)
-
-    if len(template) != period:
-        template = resample_template(template, period)
-
-    values = []
-
-    for ts in ts_list:
-        phase = (int(ts) - int(phase_origin_ts)) % period
-        values.append(circular_template_value(template, phase))
-
-    return np.array(values, dtype=float)
-
-
-def predict_with_state(state: BaselineState, ts_list: List[int]) -> np.ndarray:
-    template = np.array(state.template, dtype=float)
-
-    return predict_template_values(
-        template=template,
-        period=int(state.period),
-        phase_origin_ts=int(state.phase_origin_ts),
-        ts_list=ts_list,
-    )
-
-
-def normalize_origin_near(origin: int, period: int, near_ts: int) -> int:
-    if period <= 1:
-        return origin
-
-    origin = int(origin)
-    period = int(period)
-    near_ts = int(near_ts)
-
-    while origin + period <= near_ts:
-        origin += period
-
-    while origin > near_ts:
-        origin -= period
-
-    return origin
-
-
-def align_new_template_to_old(
-    old_template: np.ndarray,
-    new_template: np.ndarray,
-) -> np.ndarray:
-    if len(old_template) != len(new_template):
-        old_template = resample_template(old_template, len(new_template))
-
-    period = len(new_template)
-
-    if period <= 2:
-        return new_template.astype(float)
-
-    max_shift = max(1, int(round(period * 0.10)))
-    old_norm = old_template - np.mean(old_template)
-
-    best_score = None
-    best_template = new_template
-
-    for shift in range(-max_shift, max_shift + 1):
-        shifted = np.roll(new_template, shift)
-        shifted_norm = shifted - np.mean(shifted)
-        score = float(np.dot(old_norm, shifted_norm))
-
-        if best_score is None or score > best_score:
-            best_score = score
-            best_template = shifted
-
-    return best_template.astype(float)
-
-
-def merge_template(
-    old_template: np.ndarray,
-    new_template: np.ndarray,
-    alpha: float,
-) -> np.ndarray:
-    alpha = float(np.clip(alpha, 0.0, 1.0))
-
-    if len(old_template) != len(new_template):
-        old_template = resample_template(old_template, len(new_template))
-
-    new_template = align_new_template_to_old(old_template, new_template)
-
-    merged = (1.0 - alpha) * old_template + alpha * new_template
-
-    return merged.astype(float)
-
-
-# =============================================================================
-# Phase Lock
-# =============================================================================
-
-def phase_lock_recent(
-    state: BaselineState,
-    ts_grid: np.ndarray,
-    ys_grid: np.ndarray,
-) -> Tuple[int, int, np.ndarray, float]:
-    base_period = int(state.period)
-    base_origin = int(state.phase_origin_ts)
-    base_template = np.array(state.template, dtype=float)
-
-    if base_period <= 1 or len(base_template) <= 1:
-        ts_recent = ts_grid[-DETECT_WINDOW_SECONDS:].astype(int).tolist()
-        pred = predict_with_state(state, ts_recent)
-        actual = ys_grid[-len(ts_recent):].astype(float)
-        mae = float(np.mean(np.abs(actual - pred))) if len(actual) else 0.0
-        return base_period, base_origin, pred, mae
-
-    window_seconds = max(
-        PHASE_LOCK_MIN_WINDOW_SECONDS,
-        min(PHASE_LOCK_MAX_WINDOW_SECONDS, int(base_period * 2)),
-    )
-
-    cutoff = ts_grid[-1] - window_seconds
-    mask = ts_grid >= cutoff
-
-    ts_recent_arr = ts_grid[mask].astype(int)
-    actual = ys_grid[mask].astype(float)
-
-    if len(ts_recent_arr) < max(10, DETECT_WINDOW_SECONDS):
-        ts_recent_arr = ts_grid[-DETECT_WINDOW_SECONDS:].astype(int)
-        actual = ys_grid[-DETECT_WINDOW_SECONDS:].astype(float)
-
-    ts_recent = ts_recent_arr.tolist()
-    last_ts = int(ts_recent[-1])
-
-    p_min = max(int(MIN_PERIOD_SECONDS), int(round(base_period * (1.0 - PHASE_LOCK_PERIOD_SEARCH_RATIO))))
-    p_max = min(int(MAX_PERIOD_SECONDS), int(round(base_period * (1.0 + PHASE_LOCK_PERIOD_SEARCH_RATIO))))
-
-    if p_max < p_min:
-        p_min = p_max = base_period
-
-    best_period = base_period
-    best_origin = normalize_origin_near(base_origin, base_period, last_ts)
-    best_template = resample_template(base_template, best_period)
-    best_pred = predict_template_values(best_template, best_period, best_origin, ts_recent)
-    best_mae = float(np.mean(np.abs(actual - best_pred)))
-
-    for period in range(p_min, p_max + 1, PHASE_LOCK_PERIOD_STEP):
-        template = resample_template(base_template, period)
-        center_origin = normalize_origin_near(base_origin, period, last_ts)
-
-        origin_shift = max(2, int(round(period * PHASE_LOCK_ORIGIN_SEARCH_RATIO)))
-
-        for shift in range(-origin_shift, origin_shift + 1, PHASE_LOCK_ORIGIN_STEP):
-            origin = center_origin + shift
-
-            pred = predict_template_values(
-                template=template,
-                period=period,
-                phase_origin_ts=origin,
-                ts_list=ts_recent,
-            )
-
-            mae = float(np.mean(np.abs(actual - pred)))
-
-            # 轻微惩罚周期变化，避免过拟合抖动
-            penalty = abs(period - base_period) * 0.5
-            score = mae + penalty
-
-            best_score = best_mae + abs(best_period - base_period) * 0.5
-
-            if score < best_score:
-                best_period = period
-                best_origin = origin
-                best_pred = pred
-                best_mae = mae
-
-    best_origin = normalize_origin_near(best_origin, best_period, last_ts)
-
-    return int(best_period), int(best_origin), best_pred, float(best_mae)
-
-
-# =============================================================================
-# 异常检测
-# =============================================================================
-
-def calc_threshold(
-    pred: np.ndarray,
-    abs_threshold: float,
-    rel_threshold: float,
-) -> np.ndarray:
-    return np.maximum(abs_threshold, np.abs(pred) * rel_threshold)
-
-
-def calc_bounds(
-    pred: np.ndarray,
-    abs_threshold: float,
-    rel_threshold: float,
-) -> Tuple[np.ndarray, np.ndarray]:
-    threshold = calc_threshold(pred, abs_threshold, rel_threshold)
-
-    return pred - threshold, pred + threshold
-
-
-def detect_anomaly(
-    state: BaselineState,
-    ts_grid: np.ndarray,
-    ys_grid: np.ndarray,
-    abs_threshold: float,
-    rel_threshold: float,
-) -> Tuple[bool, float, float, float, int, int]:
-    best_period, best_origin, pred_recent, _ = phase_lock_recent(
-        state=state,
-        ts_grid=ts_grid,
-        ys_grid=ys_grid,
-    )
-
-    recent_len = len(pred_recent)
-
-    if recent_len <= 0:
-        return False, 0.0, 0.0, 0.0, best_period, best_origin
-
-    actual = ys_grid[-recent_len:].astype(float)
-
-    threshold = calc_threshold(pred_recent, abs_threshold, rel_threshold)
-
-    abs_err = np.abs(actual - pred_recent)
-    outside = abs_err > threshold
-
-    outside_ratio = float(np.mean(outside))
-    mean_abs_err = float(np.mean(abs_err))
-    mean_rel_err = float(np.mean(abs_err / np.maximum(np.abs(pred_recent), 1.0)))
-
-    is_anomaly = outside_ratio >= OUTSIDE_RATIO_THRESHOLD
-
-    return (
-        is_anomaly,
-        outside_ratio,
-        mean_abs_err,
-        mean_rel_err,
-        int(best_period),
-        int(best_origin),
-    )
-
-
-# =============================================================================
-# 健康基线状态管理
-# =============================================================================
-
-def create_initial_state(
-    ts_grid: np.ndarray,
-    ys_grid: np.ndarray,
-    now_sec: int,
-) -> Optional[BaselineState]:
-    baseline = build_current_baseline(ts_grid, ys_grid)
-
-    if baseline is None:
-        return None
-
-    period, phase_origin_ts, template = baseline
-
-    return BaselineState(
-        period=int(period),
-        phase_origin_ts=int(phase_origin_ts),
-        template=template.astype(float).tolist(),
-        status=BASELINE_STATUS_HEALTHY,
-        clean_seconds=int(period * MAX_CYCLES_FOR_TEMPLATE),
-        last_update_ts=now_sec,
-        last_seen_ts=now_sec,
-        y_min=float(np.min(ys_grid)),
-        y_max=float(np.max(ys_grid)),
-    )
-
-
-def apply_phase_lock_to_state(
-    state: BaselineState,
-    best_period: int,
-    best_origin: int,
-) -> None:
-    best_period = int(best_period)
-
-    if best_period <= 1:
-        return
-
-    template = np.array(state.template, dtype=float)
-
-    if len(template) != best_period:
-        template = resample_template(template, best_period)
-
-    state.period = best_period
-    state.phase_origin_ts = int(best_origin)
-    state.template = template.astype(float).tolist()
-
-
-def maybe_update_state(
-    key: str,
-    ts_grid: np.ndarray,
-    ys_grid: np.ndarray,
-    abs_threshold: float,
-    rel_threshold: float,
-) -> Tuple[Optional[BaselineState], bool, float, float, float]:
-    now_sec = int(time.time())
-    state = BASELINE_STATES.get(key)
-
-    if state is None:
-        state = create_initial_state(ts_grid, ys_grid, now_sec)
-
-        if state is None:
-            return None, False, 0.0, 0.0, 0.0
-
-        BASELINE_STATES[key] = state
-
-        logger.info(
-            "初始化健康模板 key=%s period=%ss origin=%s clean=%ss",
-            key,
-            state.period,
-            datetime.fromtimestamp(state.phase_origin_ts).strftime("%H:%M:%S"),
-            state.clean_seconds,
-        )
-
-        return state, False, 0.0, 0.0, 0.0
-
-    elapsed = max(1, now_sec - int(state.last_seen_ts))
-    elapsed = min(elapsed, POLL_INTERVAL * 2)
-    state.last_seen_ts = now_sec
-
-    (
-        is_anomaly,
-        outside_ratio,
-        mean_abs_err,
-        mean_rel_err,
-        best_period,
-        best_origin,
-    ) = detect_anomaly(
-        state=state,
-        ts_grid=ts_grid,
-        ys_grid=ys_grid,
-        abs_threshold=abs_threshold,
-        rel_threshold=rel_threshold,
-    )
-
-    if is_anomaly:
-        state.status = BASELINE_STATUS_ANOMALY
-        state.clean_seconds = 0
-
-        BASELINE_STATES[key] = state
-
-        logger.warning(
-            "检测到异常，冻结模板 key=%s outside_ratio=%.2f mean_abs_err=%.2f mean_rel_err=%.2f",
-            key,
-            outside_ratio,
-            mean_abs_err,
-            mean_rel_err,
-        )
-
-        return state, True, outside_ratio, mean_abs_err, mean_rel_err
-
-    old_period = int(state.period)
-    old_origin = int(state.phase_origin_ts)
-
-    apply_phase_lock_to_state(state, best_period, best_origin)
-
-    if old_period != state.period or old_origin != state.phase_origin_ts:
-        logger.info(
-            "phase-lock key=%s period %s -> %s origin %s -> %s",
-            key,
-            old_period,
-            state.period,
-            datetime.fromtimestamp(old_origin).strftime("%H:%M:%S"),
-            datetime.fromtimestamp(state.phase_origin_ts).strftime("%H:%M:%S"),
-        )
-
-    if state.status == BASELINE_STATUS_ANOMALY:
-        state.status = BASELINE_STATUS_RECOVERING
-        state.clean_seconds = elapsed
-
-        BASELINE_STATES[key] = state
-
-        logger.info(
-            "异常开始恢复 key=%s clean_seconds=%ss",
-            key,
-            state.clean_seconds,
-        )
-
-        return state, False, outside_ratio, mean_abs_err, mean_rel_err
-
-    if state.status == BASELINE_STATUS_RECOVERING:
-        state.clean_seconds += elapsed
-    else:
-        state.status = BASELINE_STATUS_HEALTHY
-        state.clean_seconds += elapsed
-
-    min_clean_for_update = max(
-        RECOVERY_MIN_SECONDS,
-        int(state.period) * MIN_FULL_CYCLES_FOR_TEMPLATE,
-    )
-
-    if state.clean_seconds < min_clean_for_update:
-        BASELINE_STATES[key] = state
-        return state, False, outside_ratio, mean_abs_err, mean_rel_err
-
-    tail_seconds = min(
-        int(state.clean_seconds),
-        int(state.period) * MAX_CYCLES_FOR_TEMPLATE,
-    )
-
-    baseline = build_current_baseline(
-        ts_grid=ts_grid,
-        ys_grid=ys_grid,
-        tail_seconds=tail_seconds,
-    )
-
-    if baseline is None:
-        BASELINE_STATES[key] = state
-        return state, False, outside_ratio, mean_abs_err, mean_rel_err
-
-    new_period, new_origin, new_template = baseline
-
-    old_template = np.array(state.template, dtype=float)
-
-    alpha = RECOVERY_EMA_ALPHA if state.status == BASELINE_STATUS_RECOVERING else HEALTHY_EMA_ALPHA
-
-    merged = merge_template(
-        old_template=old_template,
-        new_template=new_template,
-        alpha=alpha,
-    )
-
-    state.period = int(new_period)
-    state.phase_origin_ts = int(new_origin)
-    state.template = merged.astype(float).tolist()
-    state.status = BASELINE_STATUS_HEALTHY
-    state.last_update_ts = now_sec
-
-    if tail_seconds > 0 and len(ys_grid) >= tail_seconds:
-        state.y_min = float(np.min(ys_grid[-tail_seconds:]))
-        state.y_max = float(np.max(ys_grid[-tail_seconds:]))
-    else:
-        state.y_min = float(np.min(ys_grid))
-        state.y_max = float(np.max(ys_grid))
-
-    BASELINE_STATES[key] = state
-
-    logger.info(
-        "更新健康模板 key=%s period=%ss origin=%s clean=%ss alpha=%.2f",
-        key,
-        state.period,
-        datetime.fromtimestamp(state.phase_origin_ts).strftime("%H:%M:%S"),
-        state.clean_seconds,
-        alpha,
-    )
-
-    return state, False, outside_ratio, mean_abs_err, mean_rel_err
-
-
-# =============================================================================
-# Prometheus Exposition 写入
-# =============================================================================
-
-def prom_escape_label_value(value: str) -> str:
-    return (
-        str(value)
-        .replace("\\", "\\\\")
-        .replace("\n", "\\n")
-        .replace('"', '\\"')
-    )
-
-
-def labels_to_str(labels: Dict[str, str]) -> str:
-    if not labels:
-        return ""
-
-    parts = []
-
-    for k in sorted(labels.keys()):
-        parts.append(f'{k}="{prom_escape_label_value(labels[k])}"')
-
-    return "{" + ",".join(parts) + "}"
-
-
-def write_series(
-    metric_name: str,
-    labels: Dict[str, str],
-    ts_list: List[int],
-    values: List[float],
-) -> bool:
-    if not ts_list or not values or len(ts_list) != len(values):
-        return False
-
-    label_str = labels_to_str(labels)
-    lines = []
-
-    for t, y in zip(ts_list, values):
-        try:
-            ts_sec = int(round(float(t)))
-            val = float(y)
-        except Exception:
-            continue
-
-        if not math.isfinite(ts_sec) or not math.isfinite(val):
-            continue
-
-        lines.append(f"{metric_name}{label_str} {val:.6f} {ts_sec * 1000}")
-
-    if not lines:
-        return False
-
-    payload = "\n".join(lines) + "\n"
-
-    try:
-        resp = requests.post(
-            f"{VM_URL}/api/v1/import/prometheus",
-            data=payload.encode("utf-8"),
-            headers={
-                "Content-Type": "text/plain; version=0.0.4; charset=utf-8",
-            },
-            timeout=10,
-        )
-        resp.raise_for_status()
-        return True
-
-    except requests.RequestException as e:
-        logger.error("写入数据失败 metric=%s: %s", metric_name, e)
-        return False
-
-
-def write_prediction_bundle(
-    pred_metric: str,
-    anomaly_metric: str,
-    labels: Dict[str, str],
-    ts_future: List[int],
-    pred_values: np.ndarray,
-    lower_values: np.ndarray,
-    upper_values: np.ndarray,
-    is_anomaly: bool,
-    outside_ratio: float,
-    mean_abs_err: float,
-    mean_rel_err: float,
-    event_ts: int,
-) -> bool:
-    ok1 = write_series(
-        metric_name=pred_metric,
-        labels=labels,
-        ts_list=ts_future,
-        values=pred_values.astype(float).tolist(),
-    )
-
-    ok2 = write_series(
-        metric_name=f"{pred_metric}_lower",
-        labels=labels,
-        ts_list=ts_future,
-        values=lower_values.astype(float).tolist(),
-    )
-
-    ok3 = write_series(
-        metric_name=f"{pred_metric}_upper",
-        labels=labels,
-        ts_list=ts_future,
-        values=upper_values.astype(float).tolist(),
-    )
-
-    anomaly_labels = dict(labels)
-    anomaly_labels["type"] = "prediction_deviation"
-
-    ok4 = write_series(
-        metric_name=anomaly_metric,
-        labels=anomaly_labels,
-        ts_list=[event_ts],
-        values=[1.0 if is_anomaly else 0.0],
-    )
-
-    ok5 = write_series(
-        metric_name=f"{anomaly_metric}_outside_ratio",
-        labels=anomaly_labels,
-        ts_list=[event_ts],
-        values=[outside_ratio],
-    )
-
-    ok6 = write_series(
-        metric_name=f"{anomaly_metric}_mean_abs_error",
-        labels=anomaly_labels,
-        ts_list=[event_ts],
-        values=[mean_abs_err],
-    )
-
-    ok7 = write_series(
-        metric_name=f"{anomaly_metric}_mean_rel_error",
-        labels=anomaly_labels,
-        ts_list=[event_ts],
-        values=[mean_rel_err],
-    )
-
-    return ok1 and ok2 and ok3 and ok4 and ok5 and ok6 and ok7
-
-
-# =============================================================================
-# 标签解析
-# =============================================================================
-
-_LABEL_PATTERN = re.compile(
-    r'\s*([a-zA-Z_][a-zA-Z0-9_]*)\s*=\s*"((?:\\.|[^"])*)"\s*'
-)
-
-
-def parse_labels_from_query(query: str) -> Dict[str, str]:
-    labels = {}
-
-    if "{" not in query or "}" not in query:
-        return labels
-
-    try:
-        label_part = query[query.index("{") + 1:query.rindex("}")]
-    except Exception:
-        return labels
-
-    for match in _LABEL_PATTERN.finditer(label_part):
-        key = match.group(1)
-        value = match.group(2)
-
-        value = (
-            value
-            .replace('\\"', '"')
-            .replace("\\n", "\n")
-            .replace("\\\\", "\\")
-        )
-
-        labels[key] = value
-
-    return labels
-
-
-def merge_labels(*dicts: Dict[str, str]) -> Dict[str, str]:
-    result = {}
-
-    for d in dicts:
-        if d:
-            result.update(d)
-
-    return result
-
-
-def series_key(metric_name: str, labels: Dict[str, str]) -> str:
-    return metric_name + labels_to_str(labels)
-
-
-# =============================================================================
-# 状态持久化
-# =============================================================================
-
-def load_state() -> None:
-    global BASELINE_STATES
-
-    if not os.path.exists(STATE_FILE):
-        return
-
-    try:
-        with open(STATE_FILE, "r", encoding="utf-8") as f:
-            raw = json.load(f)
-
-        states = {}
-
-        for key, value in raw.get("baseline_states", {}).items():
-            required_fields = {
-                "period",
-                "phase_origin_ts",
-                "template",
-                "status",
-                "clean_seconds",
-                "last_update_ts",
-                "last_seen_ts",
-                "y_min",
-                "y_max",
-            }
-
-            if not required_fields.issubset(set(value.keys())):
-                continue
-
-            states[key] = BaselineState(**value)
-
-        BASELINE_STATES = states
-
-        logger.info(
-            "已加载预测状态文件 %s，状态数量=%d",
-            STATE_FILE,
-            len(BASELINE_STATES),
-        )
-
-    except Exception as e:
-        logger.warning("加载预测状态文件失败，将重新学习: %s", e)
-
-
-def save_state() -> None:
-    try:
-        raw = {
-            "baseline_states": {
-                key: asdict(value)
-                for key, value in BASELINE_STATES.items()
-            }
-        }
-
-        tmp_file = STATE_FILE + ".tmp"
-
-        with open(tmp_file, "w", encoding="utf-8") as f:
-            json.dump(raw, f, ensure_ascii=False, indent=2)
-
-        os.replace(tmp_file, STATE_FILE)
-
-    except Exception as e:
-        logger.warning("保存预测状态文件失败: %s", e)
-
-
-# =============================================================================
-# 时间轴
-# =============================================================================
-
-def build_prediction_timestamps(
-    key: str,
-    last_real_ts: int,
-    now_sec: int,
-) -> Optional[List[int]]:
-    data_lag = now_sec - last_real_ts
-
-    if data_lag > MAX_DATA_LAG_SECONDS:
-        logger.warning(
-            "真实数据延迟过大，跳过预测 key=%s data_lag=%ss max=%ss",
-            key,
-            data_lag,
-            MAX_DATA_LAG_SECONDS,
-        )
-        return None
-
-    last_written_real_ts = LAST_REAL_TS_WRITTEN.get(key)
-
-    if last_written_real_ts is not None and last_real_ts <= int(last_written_real_ts):
-        logger.info(
-            "真实数据时间戳未推进，跳过重复写入 key=%s last_real_ts=%s last_written_real_ts=%s",
-            key,
-            last_real_ts,
-            last_written_real_ts,
-        )
-        return None
-
-    if ALIGN_PREDICTION_TO_LAST_REAL_TS:
-        base_ts = last_real_ts
-    else:
-        base_ts = now_sec
-
-    return [
-        base_ts + i + 1
-        for i in range(WRITE_HORIZON_SECONDS)
-    ]
-
-
-# =============================================================================
-# 主流程
-# =============================================================================
-
-def run_once() -> None:
-    now_str = datetime.now().strftime("%H:%M:%S")
-
-    for target in PREDICT_TARGETS:
-        query = target["query"]
-        pred_metric = target["pred_metric"]
-        anomaly_metric = target["anomaly_metric"]
-        abs_threshold = float(target["abs_threshold"])
-        rel_threshold = float(target["rel_threshold"])
-
-        ts, ys = fetch_history(query)
-
-        if len(ys) < MIN_POINTS:
-            logger.info(
-                "[%s] %s 数据不足（%d 点），跳过",
-                now_str,
-                query,
-                len(ys),
-            )
-            continue
-
-        ts_grid, ys_grid = normalize_history(ts, ys)
-
-        if len(ys_grid) < MIN_POINTS:
-            logger.info(
-                "[%s] %s 清洗后数据不足（%d 点），跳过",
-                now_str,
-                query,
-                len(ys_grid),
-            )
-            continue
-
-        base_labels = parse_labels_from_query(query)
-        write_labels = merge_labels(base_labels, EXTRA_PREDICT_LABELS)
-
-        key = series_key(pred_metric, write_labels)
-
-        state, is_anomaly, outside_ratio, mean_abs_err, mean_rel_err = maybe_update_state(
-            key=key,
-            ts_grid=ts_grid,
-            ys_grid=ys_grid,
-            abs_threshold=abs_threshold,
-            rel_threshold=rel_threshold,
-        )
-
-        if state is None:
-            logger.info(
-                "[%s] %s 暂无可用健康模板，等待学习",
-                now_str,
-                query,
-            )
-            continue
-
-        now_sec = int(time.time())
-        last_real_ts = int(ts_grid[-1])
-        data_lag = now_sec - last_real_ts
-
-        ts_future = build_prediction_timestamps(
-            key=key,
-            last_real_ts=last_real_ts,
-            now_sec=now_sec,
-        )
-
-        if not ts_future:
-            continue
-
-        pred_values = predict_with_state(state, ts_future)
-
-        lower_values, upper_values = calc_bounds(
-            pred=pred_values,
-            abs_threshold=abs_threshold,
-            rel_threshold=rel_threshold,
-        )
-
-        ok = write_prediction_bundle(
-            pred_metric=pred_metric,
-            anomaly_metric=anomaly_metric,
-            labels=write_labels,
-            ts_future=ts_future,
-            pred_values=pred_values,
-            lower_values=lower_values,
-            upper_values=upper_values,
-            is_anomaly=is_anomaly,
-            outside_ratio=outside_ratio,
-            mean_abs_err=mean_abs_err,
-            mean_rel_err=mean_rel_err,
-            event_ts=last_real_ts,
-        )
-
-        if not ok:
-            logger.error(
-                "[%s] %s 写入预测数据失败",
-                now_str,
-                query,
-            )
-            continue
-
-        LAST_REAL_TS_WRITTEN[key] = last_real_ts
-
-        future_start = datetime.fromtimestamp(ts_future[0]).strftime("%H:%M:%S")
-        future_end = datetime.fromtimestamp(ts_future[-1]).strftime("%H:%M:%S")
-        last_real_str = datetime.fromtimestamp(last_real_ts).strftime("%H:%M:%S")
-        origin_str = datetime.fromtimestamp(state.phase_origin_ts).strftime("%H:%M:%S")
-
-        logger.info(
-            "[%s] %-40s → %-35s status=%s anomaly=%s period=%ss origin=%s last_real=%s lag=%ss 写入 %d 点，预测区间 %s ~ %s",
-            now_str,
-            query,
-            pred_metric,
-            state.status,
-            is_anomaly,
-            state.period,
-            origin_str,
-            last_real_str,
-            data_lag,
-            len(ts_future),
-            future_start,
-            future_end,
-        )
-
-    save_state()
-
-
-def main() -> None:
-    load_state()
-
-    logger.info(
-        "预测服务启动 VM=%s 历史窗口=%dmin 理论预测窗口=%ds 实际写入窗口=%ds 轮询间隔=%ds state=%s forecast=%s align_to_last_real=%s",
-        VM_URL,
-        HISTORY_MINUTES,
-        HORIZON_SECONDS,
-        WRITE_HORIZON_SECONDS,
-        POLL_INTERVAL,
-        STATE_FILE,
-        EXTRA_PREDICT_LABELS["forecast"],
-        ALIGN_PREDICTION_TO_LAST_REAL_TS,
-    )
-
-    while True:
-        run_once()
-        time.sleep(POLL_INTERVAL)
-
-
-if __name__ == "__main__":
-    main()
\ No newline at end of file
diff --git a/ai/pridict_v4.py b/ai/pridict_v4.py
deleted file mode 100644
index 8657944..0000000
--- a/ai/pridict_v4.py
+++ /dev/null
@@ -1,1604 +0,0 @@
-# -*- coding: utf-8 -*-
-"""
-ProtoForge Predictor v11
-
-核心能力：
-1. feed_rate / spindle_speed / spindle_current 使用 phase-lock 点预测。
-2. vibration_x / vibration_y / vibration_z 使用 phase-band 预测带。
-3. vibration 类指标不再追求单点完全贴合，而是输出：
-   - xxx_predicted        中位数预测线
-   - xxx_predicted_upper  正常上边界
-   - xxx_predicted_lower  正常下边界
-4. 预测起点锚定最后一个真实点 last_real_ts，避免时间错位。
-5. 异常期间冻结健康模板，不学习故障数据。
-6. 故障恢复后等待稳定，再恢复模板学习。
-"""
-
-import json
-import logging
-import math
-import os
-import re
-import time
-from dataclasses import asdict, dataclass
-from datetime import datetime, timedelta
-from typing import Dict, List, Optional, Tuple
-
-import numpy as np
-import requests
-
-
-# =============================================================================
-# 日志配置
-# =============================================================================
-
-logging.basicConfig(
-    level=logging.INFO,
-    format="%(asctime)s [%(levelname)s] %(message)s",
-)
-
-logger = logging.getLogger(__name__)
-
-
-# =============================================================================
-# 基础配置
-# =============================================================================
-
-VM_URL = "http://localhost:8428"
-STATE_FILE = "/tmp/protoforge_predictor_state_v11.json"
-
-HISTORY_MINUTES = 30
-HORIZON_SECONDS = 120
-POLL_INTERVAL = 30
-
-WRITE_HORIZON_SECONDS = min(HORIZON_SECONDS, POLL_INTERVAL)
-
-QUERY_STEP = "1s"
-MIN_POINTS = 120
-
-MIN_PERIOD_SECONDS = 5
-MAX_PERIOD_SECONDS = 3600
-
-MIN_FULL_CYCLES_FOR_TEMPLATE = 3
-MAX_CYCLES_FOR_TEMPLATE = 8
-
-DETECT_WINDOW_SECONDS = 20
-RECOVERY_MIN_SECONDS = 60
-
-HEALTHY_EMA_ALPHA = 0.10
-RECOVERY_EMA_ALPHA = 0.25
-
-OUTSIDE_RATIO_THRESHOLD = 0.60
-
-VALLEY_QUANTILE = 45
-
-MAX_DATA_LAG_SECONDS = 180
-
-PHASE_LOCK_MIN_WINDOW_SECONDS = 45
-PHASE_LOCK_MAX_WINDOW_SECONDS = 180
-PHASE_LOCK_PERIOD_SEARCH_RATIO = 0.12
-PHASE_LOCK_ORIGIN_SEARCH_RATIO = 0.35
-PHASE_LOCK_PERIOD_STEP = 1
-PHASE_LOCK_ORIGIN_STEP = 1
-
-
-# =============================================================================
-# 指标配置
-# =============================================================================
-
-PREDICT_TARGETS = [
-    {
-        "query": 'feed_rate{device_id="fanuc-cnc"}',
-        "pred_metric": "feed_rate_predicted",
-        "anomaly_metric": "feed_rate_anomaly",
-        "strategy": "phase_point",
-        "abs_threshold": 400.0,
-        "rel_threshold": 0.25,
-        "smooth_window": 1,
-    },
-    {
-        "query": 'spindle_speed{device_id="fanuc-cnc"}',
-        "pred_metric": "spindle_speed_predicted",
-        "anomaly_metric": "spindle_speed_anomaly",
-        "strategy": "phase_point",
-        "abs_threshold": 500.0,
-        "rel_threshold": 0.25,
-        "smooth_window": 1,
-    },
-    {
-        "query": 'spindle_current{device_id="fanuc-cnc"}',
-        "pred_metric": "spindle_current_predicted",
-        "anomaly_metric": "spindle_current_anomaly",
-        "strategy": "phase_point",
-        "abs_threshold": 5.0,
-        "rel_threshold": 0.25,
-        "smooth_window": 1,
-    },
-    {
-        "query": 'vibration_x{device_id="fanuc-cnc"}',
-        "pred_metric": "vibration_x_predicted",
-        "anomaly_metric": "vibration_x_anomaly",
-        "strategy": "phase_band",
-        "abs_threshold": 0.18,
-        "rel_threshold": 0.50,
-        "smooth_window": 5,
-        "band_low_q": 2,
-        "band_high_q": 98,
-        "band_pad_abs": 0.12,
-    },
-    {
-        "query": 'vibration_y{device_id="fanuc-cnc"}',
-        "pred_metric": "vibration_y_predicted",
-        "anomaly_metric": "vibration_y_anomaly",
-        "strategy": "phase_band",
-        "abs_threshold": 0.18,
-        "rel_threshold": 0.50,
-        "smooth_window": 5,
-        "band_low_q": 2,
-        "band_high_q": 98,
-        "band_pad_abs": 0.12,
-    },
-    {
-        "query": 'vibration_z{device_id="fanuc-cnc"}',
-        "pred_metric": "vibration_z_predicted",
-        "anomaly_metric": "vibration_z_anomaly",
-        "strategy": "phase_band",
-        "abs_threshold": 0.18,
-        "rel_threshold": 0.50,
-        "smooth_window": 5,
-        "band_low_q": 2,
-        "band_high_q": 98,
-        "band_pad_abs": 0.12,
-    }
-]
-
-EXTRA_PREDICT_LABELS = {
-    "forecast": "phase_band_health_v11",
-    "source": "protoforge",
-}
-
-BASELINE_STATUS_HEALTHY = "healthy"
-BASELINE_STATUS_ANOMALY = "anomaly"
-BASELINE_STATUS_RECOVERING = "recovering"
-
-
-# =============================================================================
-# 状态结构
-# =============================================================================
-
-@dataclass
-class BaselineState:
-    period: int
-    phase_origin_ts: int
-    template: List[float]
-    lower_template: List[float]
-    upper_template: List[float]
-    strategy: str
-    status: str
-    clean_seconds: int
-    last_update_ts: int
-    last_seen_ts: int
-    y_min: float
-    y_max: float
-
-
-BASELINE_STATES: Dict[str, BaselineState] = {}
-LAST_REAL_TS_WRITTEN: Dict[str, int] = {}
-
-
-# =============================================================================
-# VictoriaMetrics 读取
-# =============================================================================
-
-def fetch_history(query: str, minutes: int = HISTORY_MINUTES) -> Tuple[List[float], List[float]]:
-    now = datetime.now()
-    start = now - timedelta(minutes=minutes)
-
-    try:
-        resp = requests.get(
-            f"{VM_URL}/api/v1/query_range",
-            params={
-                "query": query,
-                "start": start.timestamp(),
-                "end": now.timestamp(),
-                "step": QUERY_STEP,
-            },
-            timeout=10,
-        )
-        resp.raise_for_status()
-    except requests.RequestException as e:
-        logger.error("拉取数据失败 query=%s: %s", query, e)
-        return [], []
-
-    try:
-        result = resp.json().get("data", {}).get("result", [])
-    except Exception as e:
-        logger.error("解析 VM 返回失败 query=%s: %s", query, e)
-        return [], []
-
-    if not result:
-        return [], []
-
-    values = result[0].get("values", [])
-
-    ts = []
-    ys = []
-
-    for item in values:
-        if len(item) < 2:
-            continue
-
-        try:
-            t = float(item[0])
-            y = float(item[1])
-        except Exception:
-            continue
-
-        if not math.isfinite(t) or not math.isfinite(y):
-            continue
-
-        ts.append(t)
-        ys.append(y)
-
-    return ts, ys
-
-
-def normalize_history(ts: List[float], ys: List[float]) -> Tuple[np.ndarray, np.ndarray]:
-    if not ts or not ys or len(ts) != len(ys):
-        return np.array([]), np.array([])
-
-    data = {}
-
-    for t, y in zip(ts, ys):
-        try:
-            sec = int(round(float(t)))
-            val = float(y)
-        except Exception:
-            continue
-
-        if not math.isfinite(sec) or not math.isfinite(val):
-            continue
-
-        data[sec] = val
-
-    if not data:
-        return np.array([]), np.array([])
-
-    sorted_items = sorted(data.items(), key=lambda x: x[0])
-
-    ts_clean = np.array([x[0] for x in sorted_items], dtype=float)
-    ys_clean = np.array([x[1] for x in sorted_items], dtype=float)
-
-    if len(ts_clean) < 2:
-        return ts_clean, ys_clean
-
-    start_sec = int(ts_clean[0])
-    end_sec = int(ts_clean[-1])
-
-    if end_sec <= start_sec:
-        return ts_clean, ys_clean
-
-    ts_grid = np.arange(start_sec, end_sec + 1, 1, dtype=float)
-    ys_grid = np.interp(ts_grid, ts_clean, ys_clean)
-
-    return ts_grid, ys_grid
-
-
-# =============================================================================
-# 平滑与预处理
-# =============================================================================
-
-def rolling_median(arr: np.ndarray, window: int) -> np.ndarray:
-    if window <= 1 or len(arr) < window:
-        return arr.astype(float)
-
-    if window % 2 == 0:
-        window += 1
-
-    pad = window // 2
-    padded = np.pad(arr.astype(float), (pad, pad), mode="edge")
-
-    result = []
-
-    for i in range(len(arr)):
-        result.append(float(np.median(padded[i:i + window])))
-
-    return np.array(result, dtype=float)
-
-
-def moving_average(arr: np.ndarray, window: int) -> np.ndarray:
-    if window <= 1 or len(arr) < window:
-        return arr.astype(float)
-
-    if window % 2 == 0:
-        window += 1
-
-    kernel = np.ones(window, dtype=float) / window
-    pad = window // 2
-    padded = np.pad(arr.astype(float), (pad, pad), mode="edge")
-
-    return np.convolve(padded, kernel, mode="valid")
-
-
-def preprocess_values(ys_grid: np.ndarray, target: Dict) -> np.ndarray:
-    strategy = target.get("strategy", "phase_point")
-    smooth_window = int(target.get("smooth_window", 1))
-
-    if strategy == "phase_band":
-        return rolling_median(ys_grid, smooth_window)
-
-    if smooth_window > 1:
-        return moving_average(ys_grid, smooth_window)
-
-    return ys_grid.astype(float)
-
-
-# =============================================================================
-# 周期估计
-# =============================================================================
-
-def estimate_period_by_fft(ys_arr: np.ndarray) -> float:
-    n = len(ys_arr)
-
-    if n < 8:
-        return 60.0
-
-    centered = ys_arr - np.mean(ys_arr)
-
-    if np.allclose(centered, 0):
-        return 60.0
-
-    fft_vals = np.fft.rfft(centered)
-    freqs = np.fft.rfftfreq(n, d=1.0)
-
-    if len(freqs) <= 1:
-        return 60.0
-
-    power = np.abs(fft_vals[1:])
-
-    if len(power) == 0 or np.max(power) <= 0:
-        return 60.0
-
-    dominant_idx = int(np.argmax(power)) + 1
-    dominant_freq = float(freqs[dominant_idx])
-
-    if dominant_freq <= 0:
-        return 60.0
-
-    period = 1.0 / dominant_freq
-
-    return float(np.clip(period, MIN_PERIOD_SECONDS, MAX_PERIOD_SECONDS))
-
-
-def refine_period_by_autocorr(ys_arr: np.ndarray, init_period: float) -> float:
-    n = len(ys_arr)
-
-    if n < 20:
-        return float(np.clip(init_period, MIN_PERIOD_SECONDS, MAX_PERIOD_SECONDS))
-
-    centered = ys_arr - np.mean(ys_arr)
-
-    if np.allclose(centered, 0):
-        return float(np.clip(init_period, MIN_PERIOD_SECONDS, MAX_PERIOD_SECONDS))
-
-    corr = np.correlate(centered, centered, mode="full")[n - 1:]
-
-    p0 = int(round(init_period))
-    left = max(int(MIN_PERIOD_SECONDS), int(max(2, p0 * 0.7)))
-    right = min(n // 2, int(max(left + 1, p0 * 1.3)))
-
-    if right <= left:
-        return float(np.clip(init_period, MIN_PERIOD_SECONDS, MAX_PERIOD_SECONDS))
-
-    search = corr[left:right + 1]
-
-    if len(search) == 0:
-        return float(np.clip(init_period, MIN_PERIOD_SECONDS, MAX_PERIOD_SECONDS))
-
-    best_lag = left + int(np.argmax(search))
-
-    return float(np.clip(best_lag, MIN_PERIOD_SECONDS, MAX_PERIOD_SECONDS))
-
-
-def estimate_period_rough(ys_arr: np.ndarray) -> int:
-    p_fft = estimate_period_by_fft(ys_arr)
-    p_refined = refine_period_by_autocorr(ys_arr, p_fft)
-
-    period = int(round(p_refined))
-    period = max(int(MIN_PERIOD_SECONDS), min(int(MAX_PERIOD_SECONDS), period))
-
-    return int(period)
-
-
-# =============================================================================
-# 谷底检测
-# =============================================================================
-
-def find_valley_indices(
-    ts_grid: np.ndarray,
-    ys_grid: np.ndarray,
-    expected_period: int,
-) -> List[int]:
-    n = len(ys_grid)
-
-    if n < max(10, expected_period * 2):
-        return []
-
-    period = max(3, int(expected_period))
-    smooth_window = max(3, int(round(period * 0.08)))
-    smooth_window = min(smooth_window, 21)
-
-    ys_smooth = moving_average(ys_grid, smooth_window)
-    threshold = float(np.percentile(ys_smooth, VALLEY_QUANTILE))
-
-    candidates = []
-
-    for i in range(1, n - 1):
-        if (
-            ys_smooth[i] <= ys_smooth[i - 1]
-            and ys_smooth[i] < ys_smooth[i + 1]
-            and ys_smooth[i] <= threshold
-        ):
-            candidates.append(i)
-
-    if len(candidates) < MIN_FULL_CYCLES_FOR_TEMPLATE:
-        candidates = []
-
-        for i in range(1, n - 1):
-            if ys_smooth[i] <= ys_smooth[i - 1] and ys_smooth[i] < ys_smooth[i + 1]:
-                candidates.append(i)
-
-    if not candidates:
-        return []
-
-    min_distance = max(2, int(round(period * 0.55)))
-    selected = []
-
-    for idx in candidates:
-        if not selected:
-            selected.append(idx)
-            continue
-
-        if idx - selected[-1] >= min_distance:
-            selected.append(idx)
-            continue
-
-        if ys_smooth[idx] < ys_smooth[selected[-1]]:
-            selected[-1] = idx
-
-    if len(selected) < 2:
-        return selected
-
-    cleaned = [selected[0]]
-
-    for idx in selected[1:]:
-        diff = int(ts_grid[idx] - ts_grid[cleaned[-1]])
-
-        if int(period * 0.55) <= diff <= int(period * 1.60):
-            cleaned.append(idx)
-            continue
-
-        if diff < int(period * 0.55):
-            if ys_smooth[idx] < ys_smooth[cleaned[-1]]:
-                cleaned[-1] = idx
-            continue
-
-        cleaned.append(idx)
-
-    return cleaned
-
-
-def detect_period_and_valleys(
-    ts_grid: np.ndarray,
-    ys_grid: np.ndarray,
-) -> Tuple[int, List[int]]:
-    rough = estimate_period_rough(ys_grid)
-    valleys = find_valley_indices(ts_grid, ys_grid, rough)
-
-    if len(valleys) >= 3:
-        diffs = np.diff(ts_grid[valleys])
-        good = diffs[(diffs >= rough * 0.55) & (diffs <= rough * 1.60)]
-
-        if len(good) > 0:
-            period = int(round(float(np.median(good))))
-        else:
-            period = rough
-    else:
-        period = rough
-
-    period = max(int(MIN_PERIOD_SECONDS), min(int(MAX_PERIOD_SECONDS), period))
-
-    return int(period), valleys
-
-
-# =============================================================================
-# 模板构建
-# =============================================================================
-
-def build_templates_from_valleys(
-    ts_grid: np.ndarray,
-    ys_grid: np.ndarray,
-    period: int,
-    valleys: List[int],
-    target: Dict,
-) -> Optional[Tuple[np.ndarray, np.ndarray, np.ndarray]]:
-    if period <= 1 or len(valleys) < MIN_FULL_CYCLES_FOR_TEMPLATE + 1:
-        return None
-
-    strategy = target.get("strategy", "phase_point")
-    low_q = float(target.get("band_low_q", 10))
-    high_q = float(target.get("band_high_q", 90))
-
-    pairs = []
-
-    for a, b in zip(valleys[:-1], valleys[1:]):
-        cycle_len = float(ts_grid[b] - ts_grid[a])
-
-        if period * 0.55 <= cycle_len <= period * 1.60:
-            pairs.append((a, b, cycle_len))
-
-    if len(pairs) < MIN_FULL_CYCLES_FOR_TEMPLATE:
-        return None
-
-    pairs = pairs[-MAX_CYCLES_FOR_TEMPLATE:]
-
-    phase_grid = np.arange(period, dtype=float)
-    segments = []
-    weights = []
-
-    for idx, (a, b, cycle_len) in enumerate(pairs):
-        seg_ts = ts_grid[a:b + 1]
-        seg_y = ys_grid[a:b + 1]
-
-        if len(seg_y) < 3:
-            continue
-
-        x_old = (seg_ts - seg_ts[0]) / cycle_len * period
-        seg = np.interp(phase_grid, x_old, seg_y)
-
-        segments.append(seg.astype(float))
-        weights.append(0.5 + 0.5 * ((idx + 1) / len(pairs)))
-
-    if len(segments) < MIN_FULL_CYCLES_FOR_TEMPLATE:
-        return None
-
-    arr = np.vstack(segments)
-    w_arr = np.array(weights, dtype=float)
-
-    if strategy == "phase_band":
-        mid_template = np.percentile(arr, 50, axis=0)
-        lower_template = np.percentile(arr, low_q, axis=0)
-        upper_template = np.percentile(arr, high_q, axis=0)
-    else:
-        mid_template = np.average(arr, axis=0, weights=w_arr)
-        lower_template = mid_template.copy()
-        upper_template = mid_template.copy()
-
-    return (
-        mid_template.astype(float),
-        lower_template.astype(float),
-        upper_template.astype(float),
-    )
-
-
-def build_current_baseline(
-    ts_grid: np.ndarray,
-    ys_grid: np.ndarray,
-    target: Dict,
-    tail_seconds: Optional[int] = None,
-) -> Optional[Tuple[int, int, np.ndarray, np.ndarray, np.ndarray]]:
-    if len(ys_grid) < MIN_POINTS:
-        return None
-
-    if tail_seconds is not None and tail_seconds > 0:
-        cutoff = ts_grid[-1] - int(tail_seconds)
-        mask = ts_grid >= cutoff
-        ts_use = ts_grid[mask]
-        ys_use = ys_grid[mask]
-    else:
-        ts_use = ts_grid
-        ys_use = ys_grid
-
-    if len(ys_use) < MIN_POINTS:
-        return None
-
-    period, valleys = detect_period_and_valleys(ts_use, ys_use)
-
-    templates = build_templates_from_valleys(
-        ts_grid=ts_use,
-        ys_grid=ys_use,
-        period=period,
-        valleys=valleys,
-        target=target,
-    )
-
-    if templates is None or len(valleys) == 0:
-        return None
-
-    template, lower_template, upper_template = templates
-    phase_origin_ts = int(round(float(ts_use[valleys[-1]])))
-
-    return int(period), phase_origin_ts, template, lower_template, upper_template
-
-
-# =============================================================================
-# 模板预测
-# =============================================================================
-
-def circular_template_value(template: np.ndarray, phase: float) -> float:
-    period = len(template)
-
-    if period == 0:
-        return 0.0
-
-    phase = float(phase) % period
-    i0 = int(math.floor(phase)) % period
-    i1 = (i0 + 1) % period
-    frac = phase - math.floor(phase)
-
-    return float((1.0 - frac) * template[i0] + frac * template[i1])
-
-
-def resample_template(old_template: np.ndarray, new_period: int) -> np.ndarray:
-    old_period = len(old_template)
-
-    if old_period == new_period:
-        return old_template.astype(float)
-
-    if old_period <= 1 or new_period <= 1:
-        return np.full(new_period, float(np.mean(old_template)), dtype=float)
-
-    old_x = np.linspace(0.0, 1.0, old_period, endpoint=False)
-    new_x = np.linspace(0.0, 1.0, new_period, endpoint=False)
-
-    old_x_ext = np.concatenate([old_x - 1.0, old_x, old_x + 1.0])
-    old_y_ext = np.concatenate([old_template, old_template, old_template])
-
-    return np.interp(new_x, old_x_ext, old_y_ext).astype(float)
-
-
-def predict_template_values(
-    template: np.ndarray,
-    period: int,
-    phase_origin_ts: int,
-    ts_list: List[int],
-) -> np.ndarray:
-    if period <= 1:
-        return np.zeros(len(ts_list), dtype=float)
-
-    if len(template) != period:
-        template = resample_template(template, period)
-
-    values = []
-
-    for ts in ts_list:
-        phase = (int(ts) - int(phase_origin_ts)) % period
-        values.append(circular_template_value(template, phase))
-
-    return np.array(values, dtype=float)
-
-
-def predict_state_bundle(
-    state: BaselineState,
-    ts_list: List[int],
-) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
-    period = int(state.period)
-    origin = int(state.phase_origin_ts)
-
-    mid = predict_template_values(
-        template=np.array(state.template, dtype=float),
-        period=period,
-        phase_origin_ts=origin,
-        ts_list=ts_list,
-    )
-
-    lower = predict_template_values(
-        template=np.array(state.lower_template, dtype=float),
-        period=period,
-        phase_origin_ts=origin,
-        ts_list=ts_list,
-    )
-
-    upper = predict_template_values(
-        template=np.array(state.upper_template, dtype=float),
-        period=period,
-        phase_origin_ts=origin,
-        ts_list=ts_list,
-    )
-
-    return mid, lower, upper
-
-
-def normalize_origin_near(origin: int, period: int, near_ts: int) -> int:
-    if period <= 1:
-        return origin
-
-    origin = int(origin)
-    period = int(period)
-    near_ts = int(near_ts)
-
-    while origin + period <= near_ts:
-        origin += period
-
-    while origin > near_ts:
-        origin -= period
-
-    return origin
-
-
-def merge_template(
-    old_template: np.ndarray,
-    new_template: np.ndarray,
-    alpha: float,
-) -> np.ndarray:
-    alpha = float(np.clip(alpha, 0.0, 1.0))
-
-    if len(old_template) != len(new_template):
-        old_template = resample_template(old_template, len(new_template))
-
-    merged = (1.0 - alpha) * old_template + alpha * new_template
-
-    return merged.astype(float)
-
-
-# =============================================================================
-# Phase Lock
-# =============================================================================
-
-def phase_lock_recent(
-    state: BaselineState,
-    ts_grid: np.ndarray,
-    ys_model: np.ndarray,
-) -> Tuple[int, int, np.ndarray, float]:
-    base_period = int(state.period)
-    base_origin = int(state.phase_origin_ts)
-    base_template = np.array(state.template, dtype=float)
-
-    if base_period <= 1 or len(base_template) <= 1:
-        ts_recent = ts_grid[-DETECT_WINDOW_SECONDS:].astype(int).tolist()
-        pred = predict_template_values(base_template, base_period, base_origin, ts_recent)
-        actual = ys_model[-len(ts_recent):].astype(float)
-        mae = float(np.mean(np.abs(actual - pred))) if len(actual) else 0.0
-        return base_period, base_origin, pred, mae
-
-    window_seconds = max(
-        PHASE_LOCK_MIN_WINDOW_SECONDS,
-        min(PHASE_LOCK_MAX_WINDOW_SECONDS, int(base_period * 2)),
-    )
-
-    cutoff = ts_grid[-1] - window_seconds
-    mask = ts_grid >= cutoff
-
-    ts_recent_arr = ts_grid[mask].astype(int)
-    actual = ys_model[mask].astype(float)
-
-    if len(ts_recent_arr) < max(10, DETECT_WINDOW_SECONDS):
-        ts_recent_arr = ts_grid[-DETECT_WINDOW_SECONDS:].astype(int)
-        actual = ys_model[-DETECT_WINDOW_SECONDS:].astype(float)
-
-    ts_recent = ts_recent_arr.tolist()
-    last_ts = int(ts_recent[-1])
-
-    p_min = max(
-        int(MIN_PERIOD_SECONDS),
-        int(round(base_period * (1.0 - PHASE_LOCK_PERIOD_SEARCH_RATIO))),
-    )
-    p_max = min(
-        int(MAX_PERIOD_SECONDS),
-        int(round(base_period * (1.0 + PHASE_LOCK_PERIOD_SEARCH_RATIO))),
-    )
-
-    best_period = base_period
-    best_origin = normalize_origin_near(base_origin, base_period, last_ts)
-    best_template = resample_template(base_template, best_period)
-
-    best_pred = predict_template_values(
-        template=best_template,
-        period=best_period,
-        phase_origin_ts=best_origin,
-        ts_list=ts_recent,
-    )
-
-    best_mae = float(np.mean(np.abs(actual - best_pred)))
-
-    for period in range(p_min, p_max + 1, PHASE_LOCK_PERIOD_STEP):
-        template = resample_template(base_template, period)
-        center_origin = normalize_origin_near(base_origin, period, last_ts)
-        origin_shift = max(2, int(round(period * PHASE_LOCK_ORIGIN_SEARCH_RATIO)))
-
-        for shift in range(-origin_shift, origin_shift + 1, PHASE_LOCK_ORIGIN_STEP):
-            origin = center_origin + shift
-
-            pred = predict_template_values(
-                template=template,
-                period=period,
-                phase_origin_ts=origin,
-                ts_list=ts_recent,
-            )
-
-            mae = float(np.mean(np.abs(actual - pred)))
-            penalty = abs(period - base_period) * 0.5
-            score = mae + penalty
-
-            best_score = best_mae + abs(best_period - base_period) * 0.5
-
-            if score < best_score:
-                best_period = period
-                best_origin = origin
-                best_pred = pred
-                best_mae = mae
-
-    best_origin = normalize_origin_near(best_origin, best_period, last_ts)
-
-    return int(best_period), int(best_origin), best_pred, float(best_mae)
-
-
-# =============================================================================
-# 异常检测
-# =============================================================================
-
-def calc_point_bounds(
-    pred: np.ndarray,
-    abs_threshold: float,
-    rel_threshold: float,
-) -> Tuple[np.ndarray, np.ndarray]:
-    threshold = np.maximum(abs_threshold, np.abs(pred) * rel_threshold)
-    return pred - threshold, pred + threshold
-
-
-def calc_final_bounds(
-    state: BaselineState,
-    pred: np.ndarray,
-    lower_raw: np.ndarray,
-    upper_raw: np.ndarray,
-    target: Dict,
-) -> Tuple[np.ndarray, np.ndarray]:
-    strategy = target.get("strategy", "phase_point")
-    abs_threshold = float(target.get("abs_threshold", 1.0))
-    rel_threshold = float(target.get("rel_threshold", 0.25))
-
-    if strategy == "phase_band":
-        pad_abs = float(target.get("band_pad_abs", abs_threshold))
-        dynamic_pad = np.maximum(pad_abs, np.abs(pred) * rel_threshold * 0.20)
-        lower = lower_raw - dynamic_pad
-        upper = upper_raw + dynamic_pad
-        return lower, upper
-
-    return calc_point_bounds(pred, abs_threshold, rel_threshold)
-
-
-def detect_anomaly(
-    state: BaselineState,
-    ts_grid: np.ndarray,
-    ys_model: np.ndarray,
-    target: Dict,
-) -> Tuple[bool, float, float, float, int, int]:
-    best_period, best_origin, pred_recent, _ = phase_lock_recent(
-        state=state,
-        ts_grid=ts_grid,
-        ys_model=ys_model,
-    )
-
-    recent_len = len(pred_recent)
-
-    if recent_len <= 0:
-        return False, 0.0, 0.0, 0.0, best_period, best_origin
-
-    actual = ys_model[-recent_len:].astype(float)
-
-    tmp_state = BaselineState(
-        period=best_period,
-        phase_origin_ts=best_origin,
-        template=state.template,
-        lower_template=state.lower_template,
-        upper_template=state.upper_template,
-        strategy=state.strategy,
-        status=state.status,
-        clean_seconds=state.clean_seconds,
-        last_update_ts=state.last_update_ts,
-        last_seen_ts=state.last_seen_ts,
-        y_min=state.y_min,
-        y_max=state.y_max,
-    )
-
-    recent_ts = ts_grid[-recent_len:].astype(int).tolist()
-    pred, lower_raw, upper_raw = predict_state_bundle(tmp_state, recent_ts)
-
-    lower, upper = calc_final_bounds(
-        state=tmp_state,
-        pred=pred,
-        lower_raw=lower_raw,
-        upper_raw=upper_raw,
-        target=target,
-    )
-
-    outside = (actual < lower) | (actual > upper)
-    abs_err = np.abs(actual - pred)
-
-    outside_ratio = float(np.mean(outside))
-    mean_abs_err = float(np.mean(abs_err))
-    mean_rel_err = float(np.mean(abs_err / np.maximum(np.abs(pred), 1e-6)))
-
-    is_anomaly = outside_ratio >= OUTSIDE_RATIO_THRESHOLD
-
-    return (
-        is_anomaly,
-        outside_ratio,
-        mean_abs_err,
-        mean_rel_err,
-        int(best_period),
-        int(best_origin),
-    )
-
-
-# =============================================================================
-# 状态管理
-# =============================================================================
-
-def create_initial_state(
-    ts_grid: np.ndarray,
-    ys_model: np.ndarray,
-    target: Dict,
-    now_sec: int,
-) -> Optional[BaselineState]:
-    baseline = build_current_baseline(
-        ts_grid=ts_grid,
-        ys_grid=ys_model,
-        target=target,
-    )
-
-    if baseline is None:
-        return None
-
-    period, phase_origin_ts, template, lower_template, upper_template = baseline
-
-    return BaselineState(
-        period=int(period),
-        phase_origin_ts=int(phase_origin_ts),
-        template=template.astype(float).tolist(),
-        lower_template=lower_template.astype(float).tolist(),
-        upper_template=upper_template.astype(float).tolist(),
-        strategy=str(target.get("strategy", "phase_point")),
-        status=BASELINE_STATUS_HEALTHY,
-        clean_seconds=int(period * MAX_CYCLES_FOR_TEMPLATE),
-        last_update_ts=now_sec,
-        last_seen_ts=now_sec,
-        y_min=float(np.min(ys_model)),
-        y_max=float(np.max(ys_model)),
-    )
-
-
-def apply_phase_lock_to_state(
-    state: BaselineState,
-    best_period: int,
-    best_origin: int,
-) -> None:
-    best_period = int(best_period)
-
-    if best_period <= 1:
-        return
-
-    if len(state.template) != best_period:
-        state.template = resample_template(
-            np.array(state.template, dtype=float),
-            best_period,
-        ).astype(float).tolist()
-
-    if len(state.lower_template) != best_period:
-        state.lower_template = resample_template(
-            np.array(state.lower_template, dtype=float),
-            best_period,
-        ).astype(float).tolist()
-
-    if len(state.upper_template) != best_period:
-        state.upper_template = resample_template(
-            np.array(state.upper_template, dtype=float),
-            best_period,
-        ).astype(float).tolist()
-
-    state.period = best_period
-    state.phase_origin_ts = int(best_origin)
-
-
-def maybe_update_state(
-    key: str,
-    ts_grid: np.ndarray,
-    ys_model: np.ndarray,
-    target: Dict,
-) -> Tuple[Optional[BaselineState], bool, float, float, float]:
-    now_sec = int(time.time())
-    state = BASELINE_STATES.get(key)
-
-    if state is None:
-        state = create_initial_state(
-            ts_grid=ts_grid,
-            ys_model=ys_model,
-            target=target,
-            now_sec=now_sec,
-        )
-
-        if state is None:
-            return None, False, 0.0, 0.0, 0.0
-
-        BASELINE_STATES[key] = state
-
-        logger.info(
-            "初始化健康模板 key=%s strategy=%s period=%ss origin=%s clean=%ss",
-            key,
-            state.strategy,
-            state.period,
-            datetime.fromtimestamp(state.phase_origin_ts).strftime("%H:%M:%S"),
-            state.clean_seconds,
-        )
-
-        return state, False, 0.0, 0.0, 0.0
-
-    elapsed = max(1, now_sec - int(state.last_seen_ts))
-    elapsed = min(elapsed, POLL_INTERVAL * 2)
-    state.last_seen_ts = now_sec
-
-    (
-        is_anomaly,
-        outside_ratio,
-        mean_abs_err,
-        mean_rel_err,
-        best_period,
-        best_origin,
-    ) = detect_anomaly(
-        state=state,
-        ts_grid=ts_grid,
-        ys_model=ys_model,
-        target=target,
-    )
-
-    if is_anomaly:
-        state.status = BASELINE_STATUS_ANOMALY
-        state.clean_seconds = 0
-        BASELINE_STATES[key] = state
-
-        logger.warning(
-            "检测到异常，冻结模板 key=%s outside_ratio=%.2f mean_abs_err=%.4f mean_rel_err=%.4f",
-            key,
-            outside_ratio,
-            mean_abs_err,
-            mean_rel_err,
-        )
-
-        return state, True, outside_ratio, mean_abs_err, mean_rel_err
-
-    old_period = int(state.period)
-    old_origin = int(state.phase_origin_ts)
-
-    apply_phase_lock_to_state(state, best_period, best_origin)
-
-    if old_period != state.period or old_origin != state.phase_origin_ts:
-        logger.info(
-            "phase-lock key=%s period %s -> %s origin %s -> %s",
-            key,
-            old_period,
-            state.period,
-            datetime.fromtimestamp(old_origin).strftime("%H:%M:%S"),
-            datetime.fromtimestamp(state.phase_origin_ts).strftime("%H:%M:%S"),
-        )
-
-    if state.status == BASELINE_STATUS_ANOMALY:
-        state.status = BASELINE_STATUS_RECOVERING
-        state.clean_seconds = elapsed
-        BASELINE_STATES[key] = state
-
-        logger.info(
-            "异常开始恢复 key=%s clean_seconds=%ss",
-            key,
-            state.clean_seconds,
-        )
-
-        return state, False, outside_ratio, mean_abs_err, mean_rel_err
-
-    if state.status == BASELINE_STATUS_RECOVERING:
-        state.clean_seconds += elapsed
-    else:
-        state.status = BASELINE_STATUS_HEALTHY
-        state.clean_seconds += elapsed
-
-    min_clean_for_update = max(
-        RECOVERY_MIN_SECONDS,
-        int(state.period) * MIN_FULL_CYCLES_FOR_TEMPLATE,
-    )
-
-    if state.clean_seconds < min_clean_for_update:
-        BASELINE_STATES[key] = state
-        return state, False, outside_ratio, mean_abs_err, mean_rel_err
-
-    tail_seconds = min(
-        int(state.clean_seconds),
-        int(state.period) * MAX_CYCLES_FOR_TEMPLATE,
-    )
-
-    baseline = build_current_baseline(
-        ts_grid=ts_grid,
-        ys_grid=ys_model,
-        target=target,
-        tail_seconds=tail_seconds,
-    )
-
-    if baseline is None:
-        BASELINE_STATES[key] = state
-        return state, False, outside_ratio, mean_abs_err, mean_rel_err
-
-    new_period, new_origin, new_template, new_lower_template, new_upper_template = baseline
-
-    alpha = RECOVERY_EMA_ALPHA if state.status == BASELINE_STATUS_RECOVERING else HEALTHY_EMA_ALPHA
-
-    state.template = merge_template(
-        np.array(state.template, dtype=float),
-        new_template,
-        alpha,
-    ).astype(float).tolist()
-
-    state.lower_template = merge_template(
-        np.array(state.lower_template, dtype=float),
-        new_lower_template,
-        alpha,
-    ).astype(float).tolist()
-
-    state.upper_template = merge_template(
-        np.array(state.upper_template, dtype=float),
-        new_upper_template,
-        alpha,
-    ).astype(float).tolist()
-
-    state.period = int(new_period)
-    state.phase_origin_ts = int(new_origin)
-    state.status = BASELINE_STATUS_HEALTHY
-    state.last_update_ts = now_sec
-
-    if tail_seconds > 0 and len(ys_model) >= tail_seconds:
-        state.y_min = float(np.min(ys_model[-tail_seconds:]))
-        state.y_max = float(np.max(ys_model[-tail_seconds:]))
-    else:
-        state.y_min = float(np.min(ys_model))
-        state.y_max = float(np.max(ys_model))
-
-    BASELINE_STATES[key] = state
-
-    logger.info(
-        "更新健康模板 key=%s strategy=%s period=%ss origin=%s clean=%ss alpha=%.2f",
-        key,
-        state.strategy,
-        state.period,
-        datetime.fromtimestamp(state.phase_origin_ts).strftime("%H:%M:%S"),
-        state.clean_seconds,
-        alpha,
-    )
-
-    return state, False, outside_ratio, mean_abs_err, mean_rel_err
-
-
-# =============================================================================
-# Prometheus 写入
-# =============================================================================
-
-def prom_escape_label_value(value: str) -> str:
-    return (
-        str(value)
-        .replace("\\", "\\\\")
-        .replace("\n", "\\n")
-        .replace('"', '\\"')
-    )
-
-
-def labels_to_str(labels: Dict[str, str]) -> str:
-    if not labels:
-        return ""
-
-    parts = []
-
-    for k in sorted(labels.keys()):
-        parts.append(f'{k}="{prom_escape_label_value(labels[k])}"')
-
-    return "{" + ",".join(parts) + "}"
-
-
-def write_series(
-    metric_name: str,
-    labels: Dict[str, str],
-    ts_list: List[int],
-    values: List[float],
-) -> bool:
-    if not ts_list or not values or len(ts_list) != len(values):
-        return False
-
-    label_str = labels_to_str(labels)
-    lines = []
-
-    for t, y in zip(ts_list, values):
-        try:
-            ts_sec = int(round(float(t)))
-            val = float(y)
-        except Exception:
-            continue
-
-        if not math.isfinite(ts_sec) or not math.isfinite(val):
-            continue
-
-        lines.append(f"{metric_name}{label_str} {val:.6f} {ts_sec * 1000}")
-
-    if not lines:
-        return False
-
-    payload = "\n".join(lines) + "\n"
-
-    try:
-        resp = requests.post(
-            f"{VM_URL}/api/v1/import/prometheus",
-            data=payload.encode("utf-8"),
-            headers={"Content-Type": "text/plain; version=0.0.4; charset=utf-8"},
-            timeout=10,
-        )
-        resp.raise_for_status()
-        return True
-
-    except requests.RequestException as e:
-        logger.error("写入数据失败 metric=%s: %s", metric_name, e)
-        return False
-
-
-def write_prediction_bundle(
-    pred_metric: str,
-    anomaly_metric: str,
-    labels: Dict[str, str],
-    ts_future: List[int],
-    pred_values: np.ndarray,
-    lower_values: np.ndarray,
-    upper_values: np.ndarray,
-    is_anomaly: bool,
-    outside_ratio: float,
-    mean_abs_err: float,
-    mean_rel_err: float,
-    event_ts: int,
-) -> bool:
-    ok1 = write_series(
-        metric_name=pred_metric,
-        labels=labels,
-        ts_list=ts_future,
-        values=pred_values.astype(float).tolist(),
-    )
-
-    ok2 = write_series(
-        metric_name=f"{pred_metric}_lower",
-        labels=labels,
-        ts_list=ts_future,
-        values=lower_values.astype(float).tolist(),
-    )
-
-    ok3 = write_series(
-        metric_name=f"{pred_metric}_upper",
-        labels=labels,
-        ts_list=ts_future,
-        values=upper_values.astype(float).tolist(),
-    )
-
-    anomaly_labels = dict(labels)
-    anomaly_labels["type"] = "prediction_deviation"
-
-    ok4 = write_series(
-        metric_name=anomaly_metric,
-        labels=anomaly_labels,
-        ts_list=[event_ts],
-        values=[1.0 if is_anomaly else 0.0],
-    )
-
-    ok5 = write_series(
-        metric_name=f"{anomaly_metric}_outside_ratio",
-        labels=anomaly_labels,
-        ts_list=[event_ts],
-        values=[outside_ratio],
-    )
-
-    ok6 = write_series(
-        metric_name=f"{anomaly_metric}_mean_abs_error",
-        labels=anomaly_labels,
-        ts_list=[event_ts],
-        values=[mean_abs_err],
-    )
-
-    ok7 = write_series(
-        metric_name=f"{anomaly_metric}_mean_rel_error",
-        labels=anomaly_labels,
-        ts_list=[event_ts],
-        values=[mean_rel_err],
-    )
-
-    return ok1 and ok2 and ok3 and ok4 and ok5 and ok6 and ok7
-
-
-# =============================================================================
-# 标签解析
-# =============================================================================
-
-_LABEL_PATTERN = re.compile(
-    r'\s*([a-zA-Z_][a-zA-Z0-9_]*)\s*=\s*"((?:\\.|[^"])*)"\s*'
-)
-
-
-def parse_labels_from_query(query: str) -> Dict[str, str]:
-    labels = {}
-
-    if "{" not in query or "}" not in query:
-        return labels
-
-    try:
-        label_part = query[query.index("{") + 1:query.rindex("}")]
-    except Exception:
-        return labels
-
-    for match in _LABEL_PATTERN.finditer(label_part):
-        key = match.group(1)
-        value = match.group(2)
-
-        value = (
-            value
-            .replace('\\"', '"')
-            .replace("\\n", "\n")
-            .replace("\\\\", "\\")
-        )
-
-        labels[key] = value
-
-    return labels
-
-
-def merge_labels(*dicts: Dict[str, str]) -> Dict[str, str]:
-    result = {}
-
-    for d in dicts:
-        if d:
-            result.update(d)
-
-    return result
-
-
-def series_key(metric_name: str, labels: Dict[str, str]) -> str:
-    return metric_name + labels_to_str(labels)
-
-
-# =============================================================================
-# 状态持久化
-# =============================================================================
-
-def load_state() -> None:
-    global BASELINE_STATES
-
-    if not os.path.exists(STATE_FILE):
-        return
-
-    try:
-        with open(STATE_FILE, "r", encoding="utf-8") as f:
-            raw = json.load(f)
-
-        states = {}
-
-        for key, value in raw.get("baseline_states", {}).items():
-            required_fields = {
-                "period",
-                "phase_origin_ts",
-                "template",
-                "lower_template",
-                "upper_template",
-                "strategy",
-                "status",
-                "clean_seconds",
-                "last_update_ts",
-                "last_seen_ts",
-                "y_min",
-                "y_max",
-            }
-
-            if not required_fields.issubset(set(value.keys())):
-                continue
-
-            states[key] = BaselineState(**value)
-
-        BASELINE_STATES = states
-
-        logger.info(
-            "已加载预测状态文件 %s，状态数量=%d",
-            STATE_FILE,
-            len(BASELINE_STATES),
-        )
-
-    except Exception as e:
-        logger.warning("加载预测状态文件失败，将重新学习: %s", e)
-
-
-def save_state() -> None:
-    try:
-        raw = {
-            "baseline_states": {
-                key: asdict(value)
-                for key, value in BASELINE_STATES.items()
-            }
-        }
-
-        tmp_file = STATE_FILE + ".tmp"
-
-        with open(tmp_file, "w", encoding="utf-8") as f:
-            json.dump(raw, f, ensure_ascii=False, indent=2)
-
-        os.replace(tmp_file, STATE_FILE)
-
-    except Exception as e:
-        logger.warning("保存预测状态文件失败: %s", e)
-
-
-# =============================================================================
-# 时间轴
-# =============================================================================
-
-def build_prediction_timestamps(
-    key: str,
-    last_real_ts: int,
-    now_sec: int,
-) -> Optional[List[int]]:
-    data_lag = now_sec - last_real_ts
-
-    if data_lag > MAX_DATA_LAG_SECONDS:
-        logger.warning(
-            "真实数据延迟过大，跳过预测 key=%s data_lag=%ss max=%ss",
-            key,
-            data_lag,
-            MAX_DATA_LAG_SECONDS,
-        )
-        return None
-
-    last_written_real_ts = LAST_REAL_TS_WRITTEN.get(key)
-
-    if last_written_real_ts is not None and last_real_ts <= int(last_written_real_ts):
-        logger.info(
-            "真实数据时间戳未推进，跳过重复写入 key=%s last_real_ts=%s last_written_real_ts=%s",
-            key,
-            last_real_ts,
-            last_written_real_ts,
-        )
-        return None
-
-    base_ts = last_real_ts
-
-    return [
-        base_ts + i + 1
-        for i in range(WRITE_HORIZON_SECONDS)
-    ]
-
-
-# =============================================================================
-# 主流程
-# =============================================================================
-
-def run_once() -> None:
-    now_str = datetime.now().strftime("%H:%M:%S")
-
-    for target in PREDICT_TARGETS:
-        query = target["query"]
-        pred_metric = target["pred_metric"]
-        anomaly_metric = target["anomaly_metric"]
-
-        ts, ys = fetch_history(query)
-
-        if len(ys) < MIN_POINTS:
-            logger.info("[%s] %s 数据不足（%d 点），跳过", now_str, query, len(ys))
-            continue
-
-        ts_grid, ys_grid_raw = normalize_history(ts, ys)
-
-        if len(ys_grid_raw) < MIN_POINTS:
-            logger.info("[%s] %s 清洗后数据不足（%d 点），跳过", now_str, query, len(ys_grid_raw))
-            continue
-
-        ys_grid_model = preprocess_values(ys_grid_raw, target)
-
-        base_labels = parse_labels_from_query(query)
-        write_labels = merge_labels(base_labels, EXTRA_PREDICT_LABELS)
-
-        key = series_key(pred_metric, write_labels)
-
-        state, is_anomaly, outside_ratio, mean_abs_err, mean_rel_err = maybe_update_state(
-            key=key,
-            ts_grid=ts_grid,
-            ys_model=ys_grid_model,
-            target=target,
-        )
-
-        if state is None:
-            logger.info("[%s] %s 暂无可用健康模板，等待学习", now_str, query)
-            continue
-
-        now_sec = int(time.time())
-        last_real_ts = int(ts_grid[-1])
-        data_lag = now_sec - last_real_ts
-
-        ts_future = build_prediction_timestamps(
-            key=key,
-            last_real_ts=last_real_ts,
-            now_sec=now_sec,
-        )
-
-        if not ts_future:
-            continue
-
-        pred_values, lower_raw, upper_raw = predict_state_bundle(state, ts_future)
-
-        lower_values, upper_values = calc_final_bounds(
-            state=state,
-            pred=pred_values,
-            lower_raw=lower_raw,
-            upper_raw=upper_raw,
-            target=target,
-        )
-
-        ok = write_prediction_bundle(
-            pred_metric=pred_metric,
-            anomaly_metric=anomaly_metric,
-            labels=write_labels,
-            ts_future=ts_future,
-            pred_values=pred_values,
-            lower_values=lower_values,
-            upper_values=upper_values,
-            is_anomaly=is_anomaly,
-            outside_ratio=outside_ratio,
-            mean_abs_err=mean_abs_err,
-            mean_rel_err=mean_rel_err,
-            event_ts=last_real_ts,
-        )
-
-        if not ok:
-            logger.error("[%s] %s 写入预测数据失败", now_str, query)
-            continue
-
-        LAST_REAL_TS_WRITTEN[key] = last_real_ts
-
-        future_start = datetime.fromtimestamp(ts_future[0]).strftime("%H:%M:%S")
-        future_end = datetime.fromtimestamp(ts_future[-1]).strftime("%H:%M:%S")
-        last_real_str = datetime.fromtimestamp(last_real_ts).strftime("%H:%M:%S")
-        origin_str = datetime.fromtimestamp(state.phase_origin_ts).strftime("%H:%M:%S")
-
-        logger.info(
-            "[%s] %-40s → %-35s strategy=%s status=%s anomaly=%s period=%ss origin=%s last_real=%s lag=%ss 写入 %d 点，预测区间 %s ~ %s",
-            now_str,
-            query,
-            pred_metric,
-            state.strategy,
-            state.status,
-            is_anomaly,
-            state.period,
-            origin_str,
-            last_real_str,
-            data_lag,
-            len(ts_future),
-            future_start,
-            future_end,
-        )
-
-    save_state()
-
-
-def main() -> None:
-    load_state()
-
-    logger.info(
-        "预测服务启动 VM=%s 历史窗口=%dmin 理论预测窗口=%ds 实际写入窗口=%ds 轮询间隔=%ds state=%s forecast=%s",
-        VM_URL,
-        HISTORY_MINUTES,
-        HORIZON_SECONDS,
-        WRITE_HORIZON_SECONDS,
-        POLL_INTERVAL,
-        STATE_FILE,
-        EXTRA_PREDICT_LABELS["forecast"],
-    )
-
-    while True:
-        run_once()
-        time.sleep(POLL_INTERVAL)
-
-
-if __name__ == "__main__":
-    main()
\ No newline at end of file
diff --git a/ai/pridict_v5.py b/ai/pridict_v5.py
index dde0b11..221310b 100644
--- a/ai/pridict_v5.py
+++ b/ai/pridict_v5.py
@@ -355,6 +355,140 @@ def refresh_targets_if_needed() -> None:
         logger.warning("发现流程未产生任何有效目标，保持现有目标列表")
 
 
+EXTRA_PREDICT_LABELS = {
+    "forecast": "phase_band_health_v13",
+    "source": "protoforge",
+}
+
+# =============================================================================
+# 仿真感知策略覆盖
+# 基于模板仿真算法的特征，对特定指标强制覆盖自动推断的策略与参数。
+#
+# 粗铣(fanuc-cnc) 周期约 180s 含随机抖动 ±8~10s：
+#   - feed_rate    : 双频拐角扰动（含 sin 叠加），强制 phase_band
+#   - spindle_current : 双频漂移，强制 phase_band
+#   - spindle_load : 三频漂移，强制 phase_band
+#   - phase_lock   : 周期搜索范围扩至 ±18%（覆盖抖动 + 相位偏移）
+#
+# 半精铣(fanuc-cnc-semi-finish) / 精铣(fanuc-cnc-finish) 周期固定：
+#   - spindle_load : 噪声较大，强制 phase_band
+#   - 其余指标保持自动推断
+# =============================================================================
+
+# 按 device_id 片段 + metric 名称匹配的策略覆盖表
+# 格式: (device_id_substring, metric) -> {overrides}
+_SIMULATION_STRATEGY_OVERRIDES: List[Tuple[str, str, Dict]] = [
+    # ── 粗铣工位 ──────────────────────────────────────────────────────────────
+    # feed_rate: 双频拐角扰动幅值大（±80mm/min），phase_band + 宽搜索
+    ("fanuc-cnc", "feed_rate", {
+        "strategy": "phase_band",
+        "band_low_q": 5.0,
+        "band_high_q": 95.0,
+        "band_pad_abs": 40.0,
+        "phase_lock_period_search_ratio": 0.18,
+        "phase_lock_origin_search_ratio": 0.45,
+        "smooth_window": 5,
+    }),
+    # spindle_current: 双频漂移（约 ±1.5A），phase_band
+    ("fanuc-cnc", "spindle_current", {
+        "strategy": "phase_band",
+        "band_low_q": 5.0,
+        "band_high_q": 95.0,
+        "band_pad_abs": 2.5,
+        "phase_lock_period_search_ratio": 0.18,
+        "phase_lock_origin_search_ratio": 0.45,
+        "smooth_window": 5,
+    }),
+    # spindle_load: 三频漂移（约 ±8%），phase_band
+    ("fanuc-cnc", "spindle_load", {
+        "strategy": "phase_band",
+        "band_low_q": 5.0,
+        "band_high_q": 95.0,
+        "band_pad_abs": 6.0,
+        "phase_lock_period_search_ratio": 0.18,
+        "phase_lock_origin_search_ratio": 0.45,
+        "smooth_window": 5,
+    }),
+    # spindle_speed: 周期抖动大，扩大搜索范围（策略保持自动推断）
+    ("fanuc-cnc", "spindle_speed", {
+        "phase_lock_period_search_ratio": 0.18,
+        "phase_lock_origin_search_ratio": 0.45,
+    }),
+    # ── 半精铣工位 ────────────────────────────────────────────────────────────
+    # spindle_load: gauss(2.5) 噪声较大，phase_band
+    ("fanuc-cnc-semi-finish", "spindle_load", {
+        "strategy": "phase_band",
+        "band_low_q": 5.0,
+        "band_high_q": 95.0,
+        "band_pad_abs": 4.0,
+        "smooth_window": 5,
+    }),
+    # spindle_current: gauss(0.9)，偏稳定，但保留 phase_band 以容忍切入峰值
+    ("fanuc-cnc-semi-finish", "spindle_current", {
+        "strategy": "phase_band",
+        "band_low_q": 5.0,
+        "band_high_q": 95.0,
+        "band_pad_abs": 2.0,
+        "smooth_window": 3,
+    }),
+    # ── 精铣工位 ──────────────────────────────────────────────────────────────
+    # spindle_load: gauss(1.5)，切入有峰值，phase_band
+    ("fanuc-cnc-finish", "spindle_load", {
+        "strategy": "phase_band",
+        "band_low_q": 5.0,
+        "band_high_q": 95.0,
+        "band_pad_abs": 3.0,
+        "smooth_window": 5,
+    }),
+    # spindle_current: 切入峰值 11A vs 稳态 8.5A，phase_band
+    ("fanuc-cnc-finish", "spindle_current", {
+        "strategy": "phase_band",
+        "band_low_q": 5.0,
+        "band_high_q": 95.0,
+        "band_pad_abs": 1.5,
+        "smooth_window": 3,
+    }),
+]
+
+
+def _apply_simulation_overrides(target: Dict, device_id: str) -> Dict:
+    """
+    根据仿真感知覆盖表，对 target dict 应用策略和参数覆盖。
+    匹配规则：device_id 包含指定子串 且 metric 名称匹配。
+    粗铣工位的 device_id 通常含 'fanuc-cnc' 但不含 'semi-finish'/'finish'，
+    因此半精铣/精铣规则放在粗铣规则之后（更具体的子串先匹配）。
+    """
+    # 从 pred_metric 还原 metric 名（格式：xxx_predicted）
+    pred_metric = target.get("pred_metric", "")
+    metric = pred_metric.replace("_predicted", "") if pred_metric.endswith("_predicted") else ""
+
+    if not metric:
+        return target
+
+    # 按顺序匹配——更具体的子串（semi-finish/finish）应排在 fanuc-cnc 前面，
+    # 但在覆盖表中我们已经将半精铣/精铣规则放在粗铣规则之后，通过子串包含顺序
+    # 保证精确匹配：semi-finish 不会匹配纯 "fanuc-cnc" 的规则，因为设备 ID
+    # 是完整字符串，检查如下——对于 fanuc-cnc 规则，额外排除含 semi/finish 的设备。
+    applied = dict(target)
+    for device_substr, rule_metric, overrides in _SIMULATION_STRATEGY_OVERRIDES:
+        if rule_metric != metric:
+            continue
+        # 粗铣规则（substr == "fanuc-cnc"）不应命中半精铣/精铣设备
+        if device_substr == "fanuc-cnc" and (
+            "semi-finish" in device_id or "finish" in device_id
+        ):
+            continue
+        if device_substr in device_id:
+            applied.update(overrides)
+            logger.debug(
+                "仿真策略覆盖 device=%s metric=%s overrides=%s",
+                device_id, metric, list(overrides.keys()),
+            )
+            break
+
+    return applied
+
+
 BASELINE_STATUS_HEALTHY = "healthy"
 BASELINE_STATUS_ANOMALY = "anomaly"
 BASELINE_STATUS_RECOVERING = "recovering"
@@ -1848,9 +1982,15 @@ def run_once() -> None:
             logger.info("[%s] %s 清洗后数据不足（%d 点），跳过", now_str, query, len(ys_grid_raw))
             continue
 
-        ys_grid_model = preprocess_values(ys_grid_raw, target)
-
         base_labels = parse_labels_from_query(query)
+
+        # 根据仿真算法特征，对 feed_rate / spindle_current / spindle_load 等指标
+        # 应用感知覆盖（强制 phase_band、扩大粗铣搜索范围等）
+        device_id_from_labels = base_labels.get("device_id", "")
+        effective_target = _apply_simulation_overrides(target, device_id_from_labels)
+
+        ys_grid_model = preprocess_values(ys_grid_raw, effective_target)
+
         write_labels = merge_labels(base_labels, EXTRA_PREDICT_LABELS)
 
         key = series_key(pred_metric, write_labels)
@@ -1868,7 +2008,7 @@ def run_once() -> None:
             ts_grid=ts_grid,
             ys_model=ys_grid_model,
             ys_actual=ys_grid_raw,
-            target=target,
+            target=effective_target,
         )
 
         if state is None:
@@ -1895,7 +2035,7 @@ def run_once() -> None:
             pred=pred_values,
             lower_raw=lower_raw,
             upper_raw=upper_raw,
-            target=target,
+            target=effective_target,
         )
 
         ok = write_prediction_bundle(
diff --git a/protoforge/core/cnc_metric_generator.py b/protoforge/core/cnc_metric_generator.py
new file mode 100644
index 0000000..329f56e
--- /dev/null
+++ b/protoforge/core/cnc_metric_generator.py
@@ -0,0 +1,661 @@
+"""
+CNC 车床正常加工状态时序数据生成算法
+=====================================
+
+设计原则：
+  - 所有指标由切削强度 cutting_intensity 统一驱动，禁止各自独立随机。
+  - 热惯性模型：tool_temperature 使用一阶 RC 滤波，alpha ≈ 0.04/tick。
+  - 电流滞后：spindle_current 对 spindle_load 有 1~3 tick 的一阶滞后。
+  - 磨损单调：tool_wear_value 在切削阶段只增不减。
+  - 噪声比例：roughing > semi_finishing > finishing，稳定性反向。
+  - 纯 Python 标准库实现，无第三方依赖。
+
+用法：
+  generator = BaseMetricGenerator()
+  frame = generator.generate(t=0.0, dt=1.0, stage="roughing")
+"""
+
+from __future__ import annotations
+
+import math
+import random
+from dataclasses import dataclass, field
+from typing import Optional
+
+
+# ---------------------------------------------------------------------------
+# 数据结构
+# ---------------------------------------------------------------------------
+
+@dataclass
+class StageProfile:
+    """某个加工阶段的工艺参数范围及行为特征。"""
+    stage: str
+
+    feed_rate_min: float
+    feed_rate_max: float
+    spindle_speed_min: float
+    spindle_speed_max: float
+    spindle_current_min: float
+    spindle_current_max: float
+    spindle_load_min: float
+    spindle_load_max: float
+    vibration_min: float
+    vibration_max: float
+    acoustic_min: float
+    acoustic_max: float
+    temperature_min: float
+    temperature_max: float
+    surface_roughness_min: float
+    surface_roughness_max: float
+
+    # 每 tick 磨损增量的阶段系数（idle/tool_change = 0）
+    wear_rate_factor: float
+    # 稳定性因子：越高噪声越小，finishing=0.95，roughing=0.6
+    stability_factor: float
+
+    # 衍生属性
+    @property
+    def feed_rate_mid(self) -> float:
+        return (self.feed_rate_min + self.feed_rate_max) / 2
+
+    @property
+    def spindle_speed_mid(self) -> float:
+        return (self.spindle_speed_min + self.spindle_speed_max) / 2
+
+    @property
+    def spindle_load_mid(self) -> float:
+        return (self.spindle_load_min + self.spindle_load_max) / 2
+
+    @property
+    def vibration_mid(self) -> float:
+        return (self.vibration_min + self.vibration_max) / 2
+
+    @property
+    def acoustic_mid(self) -> float:
+        return (self.acoustic_min + self.acoustic_max) / 2
+
+    @property
+    def temperature_mid(self) -> float:
+        return (self.temperature_min + self.temperature_max) / 2
+
+    @property
+    def surface_roughness_mid(self) -> float:
+        return (self.surface_roughness_min + self.surface_roughness_max) / 2
+
+
+@dataclass
+class MetricFrame:
+    """单个 tick 产出的所有指标快照。"""
+    timestamp: float
+    stage: str
+
+    feed_rate: float            # mm/min
+    spindle_speed: float        # RPM
+    spindle_current: float      # A
+    spindle_load: float         # %
+    vibration_x: float          # mm/s
+    vibration_y: float          # mm/s
+    vibration_z: float          # mm/s
+    acoustic_emission: float    # V（声发射传感器电压，代表强度）
+    tool_temperature: float     # °C
+    surface_roughness: float    # μm Ra
+    tool_wear_value: float      # μm（累积磨损量）
+
+
+@dataclass
+class GeneratorState:
+    """跨 tick 需要持久化的生成器内部状态。"""
+    # 材料扰动随机游走值（慢变量，[-0.05, +0.05]）
+    material_random_walk: float = 0.0
+    # 热状态（tool_temperature 的平滑变量）
+    thermal_state: float = 28.0
+    # 刀具累积磨损（μm，单调不减）
+    tool_wear_accumulated: float = 0.0
+    # 上一 tick 的 spindle_load（用于电流滞后计算）
+    last_spindle_load: float = 0.0
+    # 滞后缓冲区（最多保存 3 tick 历史）
+    load_lag_buffer: list = field(default_factory=lambda: [0.0, 0.0, 0.0])
+    # 上一 tick 的 surface_roughness（idle 阶段保持上次值）
+    last_surface_roughness: float = 1.0
+    # 切削周期相位（用于 cutting_cycle_wave）
+    cycle_phase: float = 0.0
+    # 当前阶段
+    current_stage: str = "idle"
+
+
+# ---------------------------------------------------------------------------
+# 阶段配置
+# ---------------------------------------------------------------------------
+
+_STAGE_PROFILES: dict[str, StageProfile] = {
+    "idle": StageProfile(
+        stage="idle",
+        feed_rate_min=0.0,    feed_rate_max=5.0,
+        spindle_speed_min=0.0, spindle_speed_max=100.0,
+        spindle_current_min=0.5, spindle_current_max=2.0,
+        spindle_load_min=0.0,  spindle_load_max=5.0,
+        vibration_min=0.01,   vibration_max=0.08,
+        acoustic_min=0.01,    acoustic_max=0.08,
+        temperature_min=25.0, temperature_max=40.0,
+        surface_roughness_min=0.3, surface_roughness_max=1.5,
+        wear_rate_factor=0.0,
+        stability_factor=1.0,
+    ),
+    "tool_change": StageProfile(
+        stage="tool_change",
+        feed_rate_min=0.0,    feed_rate_max=20.0,
+        spindle_speed_min=0.0, spindle_speed_max=100.0,
+        spindle_current_min=1.0, spindle_current_max=4.0,
+        spindle_load_min=0.0,  spindle_load_max=8.0,
+        vibration_min=0.05,   vibration_max=0.3,
+        acoustic_min=0.05,    acoustic_max=0.4,
+        temperature_min=25.0, temperature_max=45.0,
+        surface_roughness_min=0.3, surface_roughness_max=1.5,
+        wear_rate_factor=0.0,
+        stability_factor=0.8,
+    ),
+    "roughing": StageProfile(
+        stage="roughing",
+        feed_rate_min=800.0,  feed_rate_max=1600.0,
+        spindle_speed_min=1200.0, spindle_speed_max=2500.0,
+        spindle_current_min=12.0, spindle_current_max=25.0,
+        spindle_load_min=45.0, spindle_load_max=80.0,
+        vibration_min=0.4,    vibration_max=1.2,
+        acoustic_min=0.5,     acoustic_max=1.3,
+        temperature_min=45.0, temperature_max=75.0,
+        surface_roughness_min=2.0, surface_roughness_max=6.0,
+        wear_rate_factor=1.5,
+        stability_factor=0.6,
+    ),
+    "semi_finishing": StageProfile(
+        stage="semi_finishing",
+        feed_rate_min=400.0,  feed_rate_max=900.0,
+        spindle_speed_min=2200.0, spindle_speed_max=3800.0,
+        spindle_current_min=8.0,  spindle_current_max=18.0,
+        spindle_load_min=30.0, spindle_load_max=60.0,
+        vibration_min=0.25,   vibration_max=0.8,
+        acoustic_min=0.3,     acoustic_max=0.9,
+        temperature_min=40.0, temperature_max=65.0,
+        surface_roughness_min=1.0, surface_roughness_max=3.0,
+        wear_rate_factor=1.0,
+        stability_factor=0.8,
+    ),
+    "finishing": StageProfile(
+        stage="finishing",
+        feed_rate_min=100.0,  feed_rate_max=400.0,
+        spindle_speed_min=3000.0, spindle_speed_max=5000.0,
+        spindle_current_min=5.0,  spindle_current_max=12.0,
+        spindle_load_min=15.0, spindle_load_max=40.0,
+        vibration_min=0.1,    vibration_max=0.45,
+        acoustic_min=0.15,    acoustic_max=0.5,
+        temperature_min=35.0, temperature_max=55.0,
+        surface_roughness_min=0.3, surface_roughness_max=1.5,
+        wear_rate_factor=0.5,
+        stability_factor=0.95,
+    ),
+}
+
+# 阶段切削强度基准系数（归一化到 [0,1] 区间用于 cutting_intensity 计算）
+_STAGE_INTENSITY_FACTOR: dict[str, float] = {
+    "idle":          0.02,
+    "tool_change":   0.05,
+    "roughing":      1.00,
+    "semi_finishing": 0.65,
+    "finishing":     0.35,
+}
+
+# 基础磨损速率 μm/tick（roughing 1.5×，finishing 0.5×）
+_BASE_WEAR_RATE = 0.002   # μm/tick，在 roughing 阶段约每 500 tick 磨损 1 μm
+
+
+# ---------------------------------------------------------------------------
+# 主生成器
+# ---------------------------------------------------------------------------
+
+class BaseMetricGenerator:
+    """
+    CNC 车床正常加工状态时序数据生成器。
+
+    典型用法：
+        gen = BaseMetricGenerator(ambient_temperature=28.0, seed=20260609)
+        frame = gen.generate(t=0.0, dt=1.0, stage="roughing")
+    """
+
+    def __init__(
+        self,
+        ambient_temperature: float = 28.0,
+        seed: Optional[int] = None,
+        thermal_alpha: float = 0.04,
+    ):
+        self._ambient = ambient_temperature
+        self._rng = random.Random(seed)
+        # 热惯性系数（每 tick 向目标温度靠近的比例）
+        self._thermal_alpha = thermal_alpha
+        self._state = GeneratorState(
+            thermal_state=ambient_temperature,
+            last_surface_roughness=1.0,
+        )
+
+    # ------------------------------------------------------------------
+    # 公开 API
+    # ------------------------------------------------------------------
+
+    def generate(self, t: float, dt: float, stage: str) -> MetricFrame:
+        """
+        生成一帧指标数据。
+
+        Args:
+            t:     当前时间（秒），用于低频波形计算。
+            dt:    时间步长（秒），影响磨损增量和热惯性。
+            stage: 加工阶段名称（idle/tool_change/roughing/semi_finishing/finishing）。
+
+        Returns:
+            MetricFrame，所有指标均已 clamp 至合理范围。
+        """
+        profile = self.get_stage_profile(stage)
+        state = self._state
+        state.current_stage = stage
+
+        # ── 1. 材料扰动（慢变量，低频正弦 + 随机游走）──────────────────────
+        material_variation = self._calc_material_variation(t, dt, state)
+
+        # ── 2. 切削周期波动 ──────────────────────────────────────────────────
+        cutting_cycle_wave = self._calc_cutting_cycle_wave(t, dt, stage, state, profile)
+
+        # ── 3. feed_rate ──────────────────────────────────────────────────────
+        feed_rate = self._calc_feed_rate(profile, cutting_cycle_wave, stage)
+
+        # ── 4. spindle_speed ──────────────────────────────────────────────────
+        spindle_speed = self._calc_spindle_speed(profile, stage)
+
+        # ── 5. cutting_intensity（归一化切削强度）────────────────────────────
+        cutting_intensity = self._calc_cutting_intensity(
+            feed_rate, stage, material_variation, profile
+        )
+
+        # ── 6. spindle_load ───────────────────────────────────────────────────
+        spindle_load = self._calc_spindle_load(
+            profile, cutting_intensity, cutting_cycle_wave
+        )
+
+        # ── 7. spindle_current（对 load 有 1~2 tick 滞后）────────────────────
+        spindle_current = self._calc_spindle_current(profile, spindle_load, state)
+
+        # ── 8. vibration（三轴，各有小幅随机偏差）────────────────────────────
+        vib_x, vib_y, vib_z = self._calc_vibration(
+            profile, spindle_load, feed_rate, stage
+        )
+
+        # ── 9. acoustic_emission ─────────────────────────────────────────────
+        vibration_rms = (vib_x + vib_y + vib_z) / 3.0
+        acoustic_emission = self._calc_acoustic(profile, vibration_rms, spindle_load)
+
+        # ── 10. tool_temperature（热惯性模型）────────────────────────────────
+        tool_temperature = self._calc_temperature(
+            profile, spindle_load, spindle_current, dt, state
+        )
+
+        # ── 11. tool_wear_value（单调递增）────────────────────────────────────
+        tool_wear_value = self._calc_tool_wear(profile, spindle_load, dt, state)
+
+        # ── 12. surface_roughness ─────────────────────────────────────────────
+        surface_roughness = self._calc_surface_roughness(
+            profile, vibration_rms, tool_wear_value, stage, state
+        )
+
+        # ── 13. 更新滞后缓冲区 ────────────────────────────────────────────────
+        state.load_lag_buffer.pop(0)
+        state.load_lag_buffer.append(spindle_load)
+        state.last_spindle_load = spindle_load
+        state.last_surface_roughness = surface_roughness
+
+        # ── 14. 构造帧 + clamp ────────────────────────────────────────────────
+        frame = MetricFrame(
+            timestamp=t,
+            stage=stage,
+            feed_rate=feed_rate,
+            spindle_speed=spindle_speed,
+            spindle_current=spindle_current,
+            spindle_load=spindle_load,
+            vibration_x=vib_x,
+            vibration_y=vib_y,
+            vibration_z=vib_z,
+            acoustic_emission=acoustic_emission,
+            tool_temperature=tool_temperature,
+            surface_roughness=surface_roughness,
+            tool_wear_value=tool_wear_value,
+        )
+        return self.clamp_frame(frame)
+
+    def get_stage_profile(self, stage: str) -> StageProfile:
+        if stage not in _STAGE_PROFILES:
+            raise ValueError(f"Unknown stage: {stage!r}. Valid: {list(_STAGE_PROFILES)}")
+        return _STAGE_PROFILES[stage]
+
+    def reset_wear(self) -> None:
+        """换刀后重置磨损量（新刀从 0 开始）。"""
+        self._state.tool_wear_accumulated = 0.0
+
+    @property
+    def state(self) -> GeneratorState:
+        return self._state
+
+    # ------------------------------------------------------------------
+    # 各指标计算
+    # ------------------------------------------------------------------
+
+    def _calc_material_variation(
+        self, t: float, dt: float, state: GeneratorState
+    ) -> float:
+        """
+        材料均匀性扰动，慢变量。
+        = 1.0 + 低频正弦（周期60s，幅度±3%）+ 随机游走（±1%/tick）
+        """
+        slow_sine = 0.03 * math.sin(2 * math.pi * t / 60.0)
+        walk_step = self._rng.gauss(0, 0.005) * dt
+        state.material_random_walk = max(
+            -0.05, min(0.05, state.material_random_walk + walk_step)
+        )
+        return 1.0 + slow_sine + state.material_random_walk
+
+    def _calc_cutting_cycle_wave(
+        self,
+        t: float,
+        dt: float,
+        stage: str,
+        state: GeneratorState,
+        profile: StageProfile,
+    ) -> float:
+        """
+        切削周期波动（模拟走刀一圈的周期性载荷）。
+        roughing 幅度较大（±8%），finishing 幅度较小（±3%）。
+        """
+        # 切削周期：roughing 约 0.5~1 rpm 对应进给一圈，用简化固定周期模拟
+        period_map = {
+            "roughing": 8.0,
+            "semi_finishing": 6.0,
+            "finishing": 4.0,
+            "idle": 10.0,
+            "tool_change": 10.0,
+        }
+        amplitude_map = {
+            "roughing": 0.08,
+            "semi_finishing": 0.055,
+            "finishing": 0.03,
+            "idle": 0.01,
+            "tool_change": 0.02,
+        }
+        period = period_map.get(stage, 6.0)
+        amplitude = amplitude_map.get(stage, 0.05)
+        state.cycle_phase = (state.cycle_phase + dt * 2 * math.pi / period) % (
+            2 * math.pi
+        )
+        return 1.0 + amplitude * math.sin(state.cycle_phase)
+
+    def _calc_feed_rate(
+        self,
+        profile: StageProfile,
+        cutting_cycle_wave: float,
+        stage: str,
+    ) -> float:
+        """
+        进给速度 = 阶段中值 × 切削波动 + 噪声。
+        idle/tool_change 接近 0，finishing 更稳定。
+        """
+        if stage in ("idle", "tool_change"):
+            return max(0.0, self._rng.uniform(profile.feed_rate_min, profile.feed_rate_max))
+        noise_ratio = (1.0 - profile.stability_factor) * 0.06
+        base = profile.feed_rate_mid * cutting_cycle_wave
+        noise = self._rng.gauss(0, base * noise_ratio)
+        return max(profile.feed_rate_min, min(profile.feed_rate_max, base + noise))
+
+    def _calc_spindle_speed(self, profile: StageProfile, stage: str) -> float:
+        """
+        主轴转速正常状态下稳定。
+        roughing 允许 2% 波动，finishing 允许 0.8% 波动。
+        """
+        if stage in ("idle", "tool_change"):
+            return self._rng.uniform(profile.spindle_speed_min, profile.spindle_speed_max)
+        noise_pct = {
+            "roughing": 0.020,
+            "semi_finishing": 0.015,
+            "finishing": 0.008,
+        }.get(stage, 0.015)
+        base = profile.spindle_speed_mid
+        noise = self._rng.gauss(0, base * noise_pct)
+        return max(profile.spindle_speed_min, min(profile.spindle_speed_max, base + noise))
+
+    def _calc_cutting_intensity(
+        self,
+        feed_rate: float,
+        stage: str,
+        material_variation: float,
+        profile: StageProfile,
+    ) -> float:
+        """
+        切削强度（0~1），驱动后续所有与切削力相关的指标。
+        = normalize(feed_rate) × stage_factor × material_variation
+        """
+        stage_factor = _STAGE_INTENSITY_FACTOR.get(stage, 0.5)
+        if profile.feed_rate_max <= profile.feed_rate_min:
+            norm_feed = 0.5
+        else:
+            norm_feed = (feed_rate - profile.feed_rate_min) / (
+                profile.feed_rate_max - profile.feed_rate_min
+            )
+            norm_feed = max(0.0, min(1.0, norm_feed))
+        return max(0.0, min(1.0, norm_feed * stage_factor * material_variation))
+
+    def _calc_spindle_load(
+        self,
+        profile: StageProfile,
+        cutting_intensity: float,
+        cutting_cycle_wave: float,
+    ) -> float:
+        """
+        主轴负载（%）= 阶段基线 + cutting_intensity 加权 + 切削波动 + 噪声。
+        """
+        load_range = profile.spindle_load_max - profile.spindle_load_min
+        load_base = profile.spindle_load_min + load_range * cutting_intensity
+        load = load_base * cutting_cycle_wave
+        noise = self._rng.gauss(0, load_range * (1.0 - profile.stability_factor) * 0.04)
+        return max(profile.spindle_load_min, min(profile.spindle_load_max, load + noise))
+
+    def _calc_spindle_current(
+        self,
+        profile: StageProfile,
+        spindle_load: float,
+        state: GeneratorState,
+    ) -> float:
+        """
+        主轴电流（A），对负载有 1~2 tick 滞后（一阶低通）。
+        current = idle_current + k × lag_load + noise
+        k 由阶段电流范围和负载范围反推。
+        """
+        # 滞后混合：60% 当前负载 + 25% 上一 tick + 15% 两 tick 前
+        lag_load = spindle_load * 0.60 + state.load_lag_buffer[1] * 0.25 + state.load_lag_buffer[0] * 0.15
+        # 线性映射：load_min → current_min，load_max → current_max
+        load_range = profile.spindle_load_max - profile.spindle_load_min
+        current_range = profile.spindle_current_max - profile.spindle_current_min
+        if load_range > 0:
+            k = current_range / load_range
+        else:
+            k = 0.0
+        current_base = profile.spindle_current_min + k * (lag_load - profile.spindle_load_min)
+        noise = self._rng.gauss(
+            0,
+            (profile.spindle_current_max - profile.spindle_current_min)
+            * (1.0 - profile.stability_factor)
+            * 0.03,
+        )
+        return max(profile.spindle_current_min, min(profile.spindle_current_max, current_base + noise))
+
+    def _calc_vibration(
+        self,
+        profile: StageProfile,
+        spindle_load: float,
+        feed_rate: float,
+        stage: str,
+    ) -> tuple[float, float, float]:
+        """
+        振动（mm/s），三轴各有独立微偏。
+        vibration = base × (1 + load_factor × feed_factor) + noise
+        """
+        load_norm = (spindle_load - profile.spindle_load_min) / max(
+            profile.spindle_load_max - profile.spindle_load_min, 1.0
+        )
+        feed_norm = (feed_rate - profile.feed_rate_min) / max(
+            profile.feed_rate_max - profile.feed_rate_min, 1.0
+        )
+        vib_base = profile.vibration_min + (
+            profile.vibration_max - profile.vibration_min
+        ) * load_norm
+        vib_combined = vib_base * (1.0 + 0.15 * feed_norm)
+        noise_sigma = vib_combined * (1.0 - profile.stability_factor) * 0.08
+
+        # 三轴偏差因子（确定性偏置 + 小噪声，不完全相同）
+        vib_x = vib_combined * self._rng.uniform(0.85, 1.15) + self._rng.gauss(0, noise_sigma)
+        vib_y = vib_combined * self._rng.uniform(0.90, 1.25) + self._rng.gauss(0, noise_sigma)
+        vib_z = vib_combined * self._rng.uniform(0.75, 1.05) + self._rng.gauss(0, noise_sigma)
+
+        return (
+            max(0.0, vib_x),
+            max(0.0, vib_y),
+            max(0.0, vib_z),
+        )
+
+    def _calc_acoustic(
+        self,
+        profile: StageProfile,
+        vibration_rms: float,
+        spindle_load: float,
+    ) -> float:
+        """
+        声发射（V），受振动（40%权重）和主轴负载（30%权重）影响。
+        """
+        vib_norm = (vibration_rms - profile.vibration_min) / max(
+            profile.vibration_max - profile.vibration_min, 1e-6
+        )
+        load_norm = (spindle_load - profile.spindle_load_min) / max(
+            profile.spindle_load_max - profile.spindle_load_min, 1.0
+        )
+        acoustic_range = profile.acoustic_max - profile.acoustic_min
+        acoustic = profile.acoustic_min + acoustic_range * (
+            0.4 * vib_norm + 0.3 * load_norm + 0.3
+        )
+        noise = self._rng.gauss(0, acoustic_range * 0.03)
+        return max(profile.acoustic_min, min(profile.acoustic_max, acoustic + noise))
+
+    def _calc_temperature(
+        self,
+        profile: StageProfile,
+        spindle_load: float,
+        spindle_current: float,
+        dt: float,
+        state: GeneratorState,
+    ) -> float:
+        """
+        刀具温度（°C），一阶热惯性模型，慢变量。
+        target = ambient + k1 × load + k2 × current
+        thermal_state += alpha × (target - thermal_state) × dt
+        """
+        k1 = (profile.temperature_max - self._ambient) / max(profile.spindle_load_max, 1.0) * 0.6
+        k2 = (profile.temperature_max - self._ambient) / max(profile.spindle_current_max, 1.0) * 0.4
+        target_temp = self._ambient + k1 * spindle_load + k2 * spindle_current
+        target_temp = max(self._ambient, min(120.0, target_temp))
+
+        alpha = self._thermal_alpha * dt
+        state.thermal_state += alpha * (target_temp - state.thermal_state)
+
+        noise = self._rng.gauss(0, 0.3)
+        return max(20.0, min(120.0, state.thermal_state + noise))
+
+    def _calc_tool_wear(
+        self,
+        profile: StageProfile,
+        spindle_load: float,
+        dt: float,
+        state: GeneratorState,
+    ) -> float:
+        """
+        刀具磨损量（μm），只在切削阶段单调递增。
+        wear_delta = base_rate × stage_factor × load_factor × dt
+        """
+        if profile.wear_rate_factor <= 0.0:
+            return state.tool_wear_accumulated
+
+        load_norm = (spindle_load - profile.spindle_load_min) / max(
+            profile.spindle_load_max - profile.spindle_load_min, 1.0
+        )
+        wear_delta = (
+            _BASE_WEAR_RATE
+            * profile.wear_rate_factor
+            * (0.5 + 0.5 * load_norm)
+            * dt
+        )
+        state.tool_wear_accumulated += max(0.0, wear_delta)
+        return state.tool_wear_accumulated
+
+    def _calc_surface_roughness(
+        self,
+        profile: StageProfile,
+        vibration_rms: float,
+        tool_wear_value: float,
+        stage: str,
+        state: GeneratorState,
+    ) -> float:
+        """
+        表面粗糙度 Ra（μm）。
+        idle/tool_change 阶段保持上次值。
+        = profile.base × (1 + 0.2 × vib_factor) × (1 + 0.5 × wear_factor) + noise
+        """
+        if stage in ("idle", "tool_change"):
+            return state.last_surface_roughness
+
+        vib_range = profile.vibration_max - profile.vibration_min
+        vib_factor = (vibration_rms - profile.vibration_min) / max(vib_range, 1e-6)
+        vib_factor = max(0.0, min(1.0, vib_factor))
+
+        # 磨损因子：磨损 50μm 时表面质量开始明显劣化
+        wear_factor = min(tool_wear_value / 50.0, 1.0)
+
+        roughness_range = profile.surface_roughness_max - profile.surface_roughness_min
+        roughness = (
+            profile.surface_roughness_min
+            + roughness_range * (0.4 + 0.35 * vib_factor + 0.25 * wear_factor)
+        )
+        noise = self._rng.gauss(0, roughness_range * 0.03)
+        return max(0.0, roughness + noise)
+
+    # ------------------------------------------------------------------
+    # clamp 和工具函数
+    # ------------------------------------------------------------------
+
+    @staticmethod
+    def clamp_frame(frame: MetricFrame) -> MetricFrame:
+        frame.feed_rate       = max(0.0, frame.feed_rate)
+        frame.spindle_speed   = max(0.0, frame.spindle_speed)
+        frame.spindle_current = max(0.0, frame.spindle_current)
+        frame.spindle_load    = max(0.0, min(100.0, frame.spindle_load))
+        frame.vibration_x     = max(0.0, frame.vibration_x)
+        frame.vibration_y     = max(0.0, frame.vibration_y)
+        frame.vibration_z     = max(0.0, frame.vibration_z)
+        frame.acoustic_emission = max(0.0, frame.acoustic_emission)
+        frame.tool_temperature  = max(20.0, min(120.0, frame.tool_temperature))
+        frame.surface_roughness = max(0.0, frame.surface_roughness)
+        frame.tool_wear_value   = max(0.0, frame.tool_wear_value)
+        return frame
+
+    def add_noise(self, value: float, ratio: float) -> float:
+        """对 value 叠加比例为 ratio 的高斯噪声。"""
+        return value + self._rng.gauss(0, abs(value) * ratio)
+
+    @staticmethod
+    def smooth_step(x: float) -> float:
+        """S 型平滑函数，x ∈ [0,1] → [0,1]。"""
+        x = max(0.0, min(1.0, x))
+        return x * x * (3 - 2 * x)
+
+    def random_walk(self, previous: float, step_sigma: float = 0.01) -> float:
+        return previous + self._rng.gauss(0, step_sigma)
diff --git a/protoforge/core/engine.py b/protoforge/core/engine.py
index d8b72aa..acd786a 100644
--- a/protoforge/core/engine.py
+++ b/protoforge/core/engine.py
@@ -7,6 +7,7 @@
 from protoforge.core.fault import fault_injector
 from protoforge.core.generator import DataGenerator
 from protoforge.core.scenario import Scenario
+from protoforge.core.simulators import get_device_simulator
 from protoforge.models.device import DeviceConfig, DeviceInfo, DeviceStatus, PointValue
 from protoforge.models.fault import FaultInfo, FaultInjectRequest, FaultTypeDefinition
 from protoforge.models.scenario import ScenarioConfig, ScenarioInfo, ScenarioStatus
@@ -60,6 +61,10 @@ async def create_device(self, config: DeviceConfig) -> DeviceInfo:
         self._devices[config.id] = instance
         # 注册故障注入钩子
         instance.register_post_tick_hook(fault_injector.apply)
+        # 注册设备专用仿真器（如车床状态机），根据 template_id 自动匹配
+        simulator = get_device_simulator(config.template_id)
+        if simulator is not None:
+            instance.register_post_tick_hook(simulator)
 
         server = self._protocol_servers.get(config.protocol)
         if server and server.status == ProtocolStatus.RUNNING:
diff --git a/protoforge/core/generator.py b/protoforge/core/generator.py
index 21b3c07..6b488c5 100644
--- a/protoforge/core/generator.py
+++ b/protoforge/core/generator.py
@@ -34,6 +34,7 @@ class DataGenerator:
     def __init__(self):
         self._start_time: dict[str, float] = {}
         self._script_engine = ScriptEngine()
+        self._counters: dict[str, float] = {}
 
     def generate(self, point: PointConfig) -> Any:
         key = f"{point.name}_{point.address}"
@@ -52,6 +53,8 @@ def generate(self, point: PointConfig) -> Any:
             return self._generate_triangle(point, elapsed)
         elif point.generator_type == GeneratorType.SAWTOOTH:
             return self._generate_sawtooth(point, elapsed)
+        elif point.generator_type == GeneratorType.COUNTER:
+            return self._generate_counter(point, key)
         elif point.generator_type == GeneratorType.SCRIPT:
             return self._generate_script(point, elapsed)
         else:
@@ -99,6 +102,18 @@ def _generate_sawtooth(self, point: PointConfig, elapsed: float) -> Any:
         value = lo + (hi - lo) * t
         return self._cast_value(value, point.data_type)
 
+    def _generate_counter(self, point: PointConfig, key: str) -> Any:
+        lo = point.min_value if point.min_value is not None else 0
+        hi = point.max_value if point.max_value is not None else 2**31 - 1
+        step = point.generator_config.get("step", 1)
+        if key not in self._counters:
+            self._counters[key] = lo
+        else:
+            self._counters[key] += step
+            if self._counters[key] > hi:
+                self._counters[key] = lo
+        return self._cast_value(self._counters[key], point.data_type)
+
     def _generate_script(self, point: PointConfig, elapsed: float) -> Any:
         script = point.generator_config.get("script", "result = 0")
         context = {
diff --git a/protoforge/core/metrics.py b/protoforge/core/metrics.py
index 9670525..21e42f7 100644
--- a/protoforge/core/metrics.py
+++ b/protoforge/core/metrics.py
@@ -53,17 +53,21 @@ def collect_from_engine(self, engine: Any) -> None:
                 "protocol": device.config.protocol,
             }
             for point in device.read_all_points():
-                labels = {**labels_base, "point": point.name}
                 point_config = next(
                     (p for p in device.config.points if p.name == point.name), None
                 )
+                labels = {**labels_base, "point": point.name}
                 if point_config and point_config.unit:
                     labels["unit"] = point_config.unit
-                key = self._make_key(point.name, labels)
                 if point.quality != "good":
+                    key = self._make_key(point.name, labels)
                     self._gauges.pop(key, None)
                 elif isinstance(point.value, (int, float)):
                     self.set_gauge(point.name, float(point.value), labels)
+                elif isinstance(point.value, str) and point.value:
+                    # 字符串测点以 info 指标形式上报（gauge=1，值放 value label）
+                    info_labels = {**labels, "value": point.value}
+                    self.set_gauge(f"{point.name}_info", 1.0, info_labels)
 
     def collect_from_test_runner(self, runner: Any) -> None:
         self.set_gauge("protoforge_test_cases_total", len(runner._test_cases))
diff --git a/protoforge/core/simulators.py b/protoforge/core/simulators.py
new file mode 100644
index 0000000..4c75832
--- /dev/null
+++ b/protoforge/core/simulators.py
@@ -0,0 +1,33 @@
+"""
+设备仿真器注册表
+
+根据 template_id 返回对应的仿真器实例（callable，注册为 post_tick_hook）。
+新增仿真器时只需在 _REGISTRY 中添加映射即可，无需修改 engine。
+"""
+
+from typing import Any, Callable, Optional
+
+
+def _build_registry() -> dict[str, Callable[[], Any]]:
+    registry: dict[str, Callable[[], Any]] = {}
+    try:
+        from protoforge.protocols.mtconnect.lathe_simulator import LatheSimulator
+        registry["mtconnect_lathe"] = LatheSimulator
+    except ImportError:
+        pass
+    return registry
+
+
+_REGISTRY = _build_registry()
+
+
+def get_device_simulator(template_id: Optional[str]) -> Optional[Any]:
+    """
+    根据 template_id 返回一个新的仿真器实例，未匹配则返回 None。
+    """
+    if template_id is None:
+        return None
+    factory = _REGISTRY.get(template_id)
+    if factory is None:
+        return None
+    return factory()
diff --git a/protoforge/models/device.py b/protoforge/models/device.py
index 5be8c44..cbe35e4 100644
--- a/protoforge/models/device.py
+++ b/protoforge/models/device.py
@@ -22,6 +22,7 @@ class GeneratorType(str, Enum):
     TRIANGLE = "triangle"
     SAWTOOTH = "sawtooth"
     SCRIPT = "script"
+    COUNTER = "counter"
 
 
 class PointConfig(BaseModel):
diff --git a/protoforge/protocols/mtconnect/lathe_simulator.py b/protoforge/protocols/mtconnect/lathe_simulator.py
new file mode 100644
index 0000000..d3047ca
--- /dev/null
+++ b/protoforge/protocols/mtconnect/lathe_simulator.py
@@ -0,0 +1,421 @@
+"""
+车床状态机仿真器
+
+仿真 CNC 车床通过 MTConnect 协议能真实输出的信号。
+
+工作周期：
+  IDLE → SPINUP → CUTTING → DECEL → TOOL_CHANGE → IDLE
+                     ↓ (偶发，两种故障路径)
+             TOOL_BREAK / CHIP_WRAP → TOOL_CHANGE → IDLE
+
+每个 tick 的处理流程：
+  1. 状态机推进（确定当前 stage）
+  2. BaseMetricGenerator.generate() 生成健康 MetricFrame
+     （联动建模 + 噪声 + clamp，正常加工算法与故障逻辑解耦）
+  3. 把 MetricFrame 写入 device._point_values
+  4. 通过 MetricsCollector 上报 Prometheus
+
+崩刀（TOOL_BREAK）的 CNC 可观测特征：
+  - spindle_load 突增（驱动器过载保护触发）
+  - spindle_speed 急降至 0（CNC 紧急制动）
+  - execution → STOPPED，e_stop → TRIGGERED
+  - system_condition → FAULT，condition_native_code = ALM-401
+
+刀缠屑（CHIP_WRAP）的 CNC 可观测特征：
+  - spindle_load 缓慢持续爬升（缠绕阻力增大）
+  - spindle_speed 因负载升高略微下降（恒功率特性）
+  - feed_rate 出现不规律波动（缠屑阻力脉冲）
+  - 超过负载阈值后 CNC 报警停机
+  - system_condition → FAULT，condition_native_code = ALM-305
+"""
+
+import math
+import random
+import time
+from enum import Enum
+from typing import Any
+
+from protoforge.core.cnc_metric_generator import BaseMetricGenerator
+
+
+class _State(Enum):
+    IDLE = "idle"
+    SPINUP = "spinup"
+    CUTTING = "cutting"
+    DECEL = "decel"
+    TOOL_CHANGE = "tool_change"
+    TOOL_BREAK = "tool_break"
+    CHIP_WRAP = "chip_wrap"
+
+
+# 状态机阶段 → MetricGenerator 加工阶段的映射
+_STATE_TO_STAGE: dict[_State, str] = {
+    _State.IDLE:        "idle",
+    _State.SPINUP:      "idle",
+    _State.CUTTING:     "roughing",    # 默认粗加工，子阶段由 _cutting_stage 动态切换
+    _State.DECEL:       "idle",
+    _State.TOOL_CHANGE: "tool_change",
+    _State.TOOL_BREAK:  "idle",
+    _State.CHIP_WRAP:   "roughing",
+}
+
+# 刀塔配置（刀位号, 刀具ID）
+_TOOL_TABLE = [
+    (1, "T01"),   # 外圆粗车刀
+    (2, "T02"),   # 外圆精车刀
+    (3, "T03"),   # 切槽刀
+    (4, "T04"),   # 螺纹刀
+]
+
+_NC_BLOCKS = [
+    "N0010 G00 X200.0 Z50.0",
+    "N0020 G96 S180 M03",
+    "N0030 G00 X52.0 Z2.0",
+    "N0040 G01 Z-80.0 F0.25",
+    "N0050 G01 X56.0",
+    "N0060 G00 Z2.0",
+    "N0070 G01 X48.0",
+    "N0080 G01 Z-60.0 F0.20",
+    "N0090 G01 X52.0",
+    "N0100 G00 X200.0 Z50.0",
+    "N0110 M05",
+    "N0120 M30",
+]
+
+# 每个零件的加工子阶段序列（按进度切分）
+# (阶段名, 开始进度, 结束进度)
+_CUT_SUBSTAGES = [
+    ("roughing",       0.00, 0.45),
+    ("semi_finishing", 0.45, 0.75),
+    ("finishing",      0.75, 1.00),
+]
+
+
+class LatheSimulator:
+    """注册为 DeviceInstance 的 post_tick_hook，每次 tick 更新所有测点。"""
+
+    def __init__(self):
+        self._state = _State.IDLE
+        self._state_elapsed = 0.0
+        self._state_duration = 0.0
+
+        # 主轴（状态机内部用于 CNC 信号联动）
+        self._spindle_target = 0.0
+        self._spindle_actual = 0.0
+
+        # 进给（状态机内部值）
+        self._feed_actual = 0.0
+
+        # 轴位置
+        self._x_pos = 150.0
+        self._z_pos = 50.0
+
+        # 刀具（只跟踪刀位）
+        self._tool_idx = 0
+
+        # 生产统计
+        self._part_count = 0
+
+        # 程序执行
+        self._program_line = 0
+        self._block_idx = 0
+
+        # 故障状态
+        self._condition_native_code = ""
+        self._break_load_spike = 0.0
+        self._wrap_load_increment = 0.0
+        self._fault_cooldown = 0
+
+        # 当前切削子阶段（roughing/semi_finishing/finishing）
+        self._cutting_stage = "roughing"
+
+        # tick 计数，用于传入 BaseMetricGenerator 的 t
+        self._tick_count = 0
+
+        # 正常加工指标生成器
+        self._metric_gen = BaseMetricGenerator(
+            ambient_temperature=28.0,
+            seed=None,   # None = 随机种子，每次实例化不同
+        )
+
+    # ------------------------------------------------------------------
+    # post_tick_hook 入口
+    # ------------------------------------------------------------------
+
+    def __call__(self, device_instance: Any) -> None:
+        self._tick_count += 1
+        t = float(self._tick_count)   # 用 tick 序号作为时间 t（dt=1s）
+
+        # 1. 状态机推进
+        self._step_state_machine()
+
+        # 2. 确定当前 MetricGenerator 阶段
+        stage = self._get_metric_stage()
+
+        # 3. 生成正常加工 MetricFrame（含联动 + 噪声 + clamp）
+        frame = self._metric_gen.generate(t=t, dt=1.0, stage=stage)
+
+        # 4. 把 MetricFrame 写入 device._point_values（MTConnect 标准测点）
+        vals = device_instance._point_values
+        self._update_cnc_points(vals, frame)
+
+        # 5. 上报 Prometheus
+        self._emit_prometheus(device_instance, frame)
+
+    # ------------------------------------------------------------------
+    # 状态机
+    # ------------------------------------------------------------------
+
+    def _step_state_machine(self) -> None:
+        self._state_elapsed += 1
+        if self._fault_cooldown > 0:
+            self._fault_cooldown -= 1
+
+        dispatch = {
+            _State.IDLE:        self._on_idle,
+            _State.SPINUP:      self._on_spinup,
+            _State.CUTTING:     self._on_cutting,
+            _State.DECEL:       self._on_decel,
+            _State.TOOL_CHANGE: self._on_tool_change,
+            _State.TOOL_BREAK:  self._on_tool_break,
+            _State.CHIP_WRAP:   self._on_chip_wrap,
+        }
+        dispatch[self._state]()
+
+    def _transition(self, new_state: _State, duration: float) -> None:
+        self._state = new_state
+        self._state_elapsed = 0
+        self._state_duration = duration
+
+    def _get_metric_stage(self) -> str:
+        """将状态机状态映射到 MetricGenerator 阶段。"""
+        if self._state == _State.CUTTING:
+            return self._cutting_stage
+        if self._state == _State.CHIP_WRAP:
+            return "roughing"
+        return _STATE_TO_STAGE.get(self._state, "idle")
+
+    def _update_cutting_substage(self, progress: float) -> None:
+        """根据切削进度动态切换粗/半精/精加工子阶段。"""
+        for stage_name, start, end in _CUT_SUBSTAGES:
+            if start <= progress < end:
+                if self._cutting_stage != stage_name:
+                    self._cutting_stage = stage_name
+                    # 换阶段时不重置磨损，但可记录换刀（此处仅切换参数集）
+                return
+        self._cutting_stage = "finishing"
+
+    def _on_idle(self) -> None:
+        self._spindle_target = 0.0
+        self._spindle_actual = self._smooth(self._spindle_actual, 0.0, 0.15)
+        self._feed_actual = 0.0
+        self._condition_native_code = ""
+        self._wrap_load_increment = 0.0
+        if self._state_elapsed >= self._state_duration:
+            self._spindle_target = random.uniform(800, 2500)
+            self._program_line = 1
+            self._block_idx = 0
+            self._cutting_stage = "roughing"
+            self._transition(_State.SPINUP, random.uniform(3, 6))
+
+    def _on_spinup(self) -> None:
+        self._spindle_actual = self._smooth(
+            self._spindle_actual, self._spindle_target, 0.25
+        )
+        if self._state_elapsed >= self._state_duration:
+            self._transition(_State.CUTTING, random.uniform(20, 40))
+
+    def _on_cutting(self) -> None:
+        noise = random.gauss(0, self._spindle_target * 0.02)
+        self._spindle_actual = max(
+            self._spindle_target * 0.85,
+            min(self._spindle_target * 1.05, self._spindle_actual + noise),
+        )
+        self._feed_actual = self._spindle_target * random.uniform(0.08, 0.15)
+
+        progress = self._state_elapsed / max(self._state_duration, 1)
+        self._z_pos = 50.0 - 350.0 * (progress % 1.0)
+        self._x_pos = random.uniform(20, 60) + math.sin(progress * math.pi * 4) * 5
+        self._block_idx = int(progress * len(_NC_BLOCKS)) % len(_NC_BLOCKS)
+        self._program_line = (self._block_idx + 1) * 10
+
+        # 动态切换粗/半精/精加工子阶段
+        self._update_cutting_substage(progress)
+
+        if self._fault_cooldown == 0 and progress > 0.2:
+            r = random.random()
+            if r < 0.004:
+                self._condition_native_code = "ALM-401"
+                self._break_load_spike = random.uniform(1.8, 3.0)
+                self._transition(_State.TOOL_BREAK, random.uniform(3, 6))
+                return
+            elif r < 0.008:
+                self._condition_native_code = "ALM-305"
+                self._wrap_load_increment = 0.0
+                self._transition(_State.CHIP_WRAP, random.uniform(15, 25))
+                return
+
+        if self._state_elapsed >= self._state_duration:
+            self._transition(_State.DECEL, random.uniform(3, 5))
+
+    def _on_decel(self) -> None:
+        self._spindle_actual = self._smooth(self._spindle_actual, 0.0, 0.20)
+        self._feed_actual = self._smooth(self._feed_actual, 0.0, 0.30)
+        self._x_pos = self._smooth(self._x_pos, 150.0, 0.20)
+        self._z_pos = self._smooth(self._z_pos, 50.0, 0.20)
+        if self._state_elapsed >= self._state_duration:
+            self._part_count += 1
+            if self._part_count % 5 == 0:
+                self._metric_gen.reset_wear()
+                self._transition(_State.TOOL_CHANGE, random.uniform(4, 8))
+            else:
+                self._transition(_State.IDLE, random.uniform(3, 6))
+
+    def _on_tool_change(self) -> None:
+        self._spindle_actual = 0.0
+        self._feed_actual = 0.0
+        if self._state_elapsed >= self._state_duration:
+            self._tool_idx = (self._tool_idx + 1) % len(_TOOL_TABLE)
+            self._condition_native_code = ""
+            self._transition(_State.IDLE, random.uniform(2, 4))
+
+    def _on_tool_break(self) -> None:
+        phase = self._state_elapsed / max(self._state_duration, 1)
+        if phase < 0.35:
+            self._spindle_actual *= (1.0 - phase * 0.2)
+        else:
+            self._spindle_actual = self._smooth(self._spindle_actual, 0.0, 0.45)
+        self._feed_actual = 0.0
+        if self._state_elapsed >= self._state_duration:
+            self._fault_cooldown = 40
+            self._transition(_State.TOOL_CHANGE, random.uniform(6, 10))
+
+    def _on_chip_wrap(self) -> None:
+        self._wrap_load_increment += random.uniform(2.5, 4.5)
+        drag = min(self._wrap_load_increment / 200.0, 0.25)
+        self._spindle_actual = max(
+            0.0,
+            self._spindle_target * (1.0 - drag) + random.gauss(0, 20),
+        )
+        feed_base = self._spindle_target * 0.10
+        self._feed_actual = feed_base * (1.0 + random.uniform(-0.3, 0.1))
+        effective_load = 30 + self._wrap_load_increment
+        if effective_load >= 90.0 or self._state_elapsed >= self._state_duration:
+            self._fault_cooldown = 30
+            self._spindle_actual = self._smooth(self._spindle_actual, 0.0, 0.5)
+            self._transition(_State.TOOL_CHANGE, random.uniform(5, 9))
+
+    # ------------------------------------------------------------------
+    # 写入 MTConnect 测点 + MetricFrame 测点
+    # ------------------------------------------------------------------
+
+    def _update_cnc_points(self, vals: dict[str, Any], frame) -> None:
+        """
+        将 MetricFrame（正常加工基础指标）与状态机（CNC 信号）合并写入测点。
+        状态机负责：execution/controller_mode/e_stop/system_condition/position/tool/part_count
+        MetricFrame 负责：spindle_speed/spindle_load/feed_rate/vibration/acoustic/temperature/roughness/wear
+        """
+        state = self._state
+        is_cutting = state == _State.CUTTING
+        is_tool_break = state == _State.TOOL_BREAK
+        is_chip_wrap = state == _State.CHIP_WRAP
+        is_fault = is_tool_break or is_chip_wrap
+        is_tool_change = state == _State.TOOL_CHANGE
+
+        cur_tool_no, cur_tool_id = _TOOL_TABLE[self._tool_idx]
+
+        # ── CNC 状态信号（来自状态机）────────────────────────────────────────
+        vals["availability"] = "AVAILABLE"
+        vals["e_stop"] = "TRIGGERED" if is_fault else "ARMED"
+        vals["system_condition"] = "FAULT" if is_fault else "NORMAL"
+        vals["condition_native_code"] = self._condition_native_code
+
+        if is_fault:
+            vals["execution"] = "STOPPED"
+            vals["controller_mode"] = "MANUAL"
+        elif is_tool_change:
+            vals["execution"] = "WAIT"
+            vals["controller_mode"] = "AUTOMATIC"
+        elif state == _State.IDLE:
+            vals["execution"] = "READY"
+            vals["controller_mode"] = "AUTOMATIC"
+        else:
+            vals["execution"] = "ACTIVE"
+            vals["controller_mode"] = "AUTOMATIC"
+
+        vals["program"] = "O0001" if not is_fault else "O0000"
+        vals["block"] = _NC_BLOCKS[self._block_idx] if is_cutting else ""
+        vals["line"] = self._program_line
+        vals["x_position"] = round(self._x_pos, 3)
+        vals["z_position"] = round(self._z_pos, 3)
+        vals["tool_id"] = cur_tool_id
+        vals["tool_number"] = cur_tool_no
+        vals["part_count"] = self._part_count
+
+        # ── 主轴方向（由状态机内部转速决定）────────────────────────────────
+        vals["spindle_direction"] = "STOPPED" if self._spindle_actual < 10 else "CW"
+        vals["spindle_override"] = 100.0
+        vals["feed_override"] = 100.0
+        vals["rapid_override"] = 100.0
+
+        # ── MetricFrame 基础指标 ─────────────────────────────────────────────
+        vals["spindle_speed"]      = round(frame.spindle_speed, 1)
+        vals["spindle_load"]       = round(frame.spindle_load, 1)
+        vals["spindle_current"]    = round(frame.spindle_current, 2)
+        vals["feed_rate"]          = round(frame.feed_rate, 1)
+        vals["vibration_x"]        = round(frame.vibration_x, 4)
+        vals["vibration_y"]        = round(frame.vibration_y, 4)
+        vals["vibration_z"]        = round(frame.vibration_z, 4)
+        vals["acoustic_emission"]  = round(frame.acoustic_emission, 4)
+        vals["tool_temperature"]   = round(frame.tool_temperature, 2)
+        vals["surface_roughness"]  = round(frame.surface_roughness, 3)
+        vals["tool_wear_value"]    = round(frame.tool_wear_value, 4)
+
+        # 故障覆盖：崩刀时 spindle_load 突增并覆盖 MetricFrame 的值
+        if is_tool_break:
+            phase = self._state_elapsed / max(self._state_duration, 1)
+            spike = self._break_load_spike if phase < 0.35 else 1.0
+            overload = min(100.0, frame.spindle_load * spike)
+            vals["spindle_load"] = round(overload, 1)
+
+        # 缠屑覆盖：负载爬升覆盖 MetricFrame 的值
+        if is_chip_wrap:
+            wrap_load = min(100.0, 30.0 + self._wrap_load_increment + random.gauss(0, 2))
+            vals["spindle_load"] = round(wrap_load, 1)
+
+    def _emit_prometheus(self, device_instance: Any, frame) -> None:
+        """
+        通过 MetricsCollector 上报 Prometheus 指标。
+        复用项目已有的 set_gauge 接口，不重复注册。
+        """
+        try:
+            from protoforge.core.metrics import metrics
+        except ImportError:
+            return
+
+        device_id = getattr(device_instance.config, "id", "unknown")
+        device_name = getattr(device_instance.config, "name", "unknown")
+        labels = {
+            "device_id":   device_id,
+            "device_name": device_name,
+            "protocol":    "mtconnect",
+            "stage":       frame.stage,
+        }
+
+        metrics.set_gauge("cnc_feed_rate",          frame.feed_rate,          {**labels, "unit": "mm/min"})
+        metrics.set_gauge("cnc_spindle_speed",       frame.spindle_speed,      {**labels, "unit": "RPM"})
+        metrics.set_gauge("cnc_spindle_current",     frame.spindle_current,    {**labels, "unit": "A"})
+        metrics.set_gauge("cnc_spindle_load",        frame.spindle_load,       {**labels, "unit": "%"})
+        metrics.set_gauge("cnc_vibration_x",         frame.vibration_x,        {**labels, "unit": "mm/s"})
+        metrics.set_gauge("cnc_vibration_y",         frame.vibration_y,        {**labels, "unit": "mm/s"})
+        metrics.set_gauge("cnc_vibration_z",         frame.vibration_z,        {**labels, "unit": "mm/s"})
+        metrics.set_gauge("cnc_acoustic_emission",   frame.acoustic_emission,  {**labels, "unit": "V"})
+        metrics.set_gauge("cnc_tool_temperature",    frame.tool_temperature,   {**labels, "unit": "C"})
+        metrics.set_gauge("cnc_surface_roughness",   frame.surface_roughness,  {**labels, "unit": "um"})
+        metrics.set_gauge("cnc_tool_wear_value",     frame.tool_wear_value,    {**labels, "unit": "um"})
+
+    # ------------------------------------------------------------------
+
+    @staticmethod
+    def _smooth(current: float, target: float, rate: float) -> float:
+        return current + (target - current) * rate
diff --git a/protoforge/templates/mtconnect/lathe_machine.json b/protoforge/templates/mtconnect/lathe_machine.json
index 33d36b0..5f00ec3 100644
--- a/protoforge/templates/mtconnect/lathe_machine.json
+++ b/protoforge/templates/mtconnect/lathe_machine.json
@@ -6,21 +6,76 @@
     "manufacturer": "Generic",
     "model": "MC-Lathe-2Axis",
     "points": [
+        {
+            "name": "availability",
+            "address": "Availability",
+            "data_type": "string",
+            "description": "设备可用性（MTConnect必需字段）",
+            "access": "r",
+            "generator_type": "fixed",
+            "fixed_value": "AVAILABLE"
+        },
         {
             "name": "execution",
             "address": "Execution",
             "data_type": "string",
-            "description": "执行状态",
+            "description": "执行状态（ACTIVE/READY/STOPPED/WAIT）",
             "access": "r",
             "generator_type": "fixed",
             "fixed_value": "ACTIVE"
         },
+        {
+            "name": "controller_mode",
+            "address": "ControllerMode",
+            "data_type": "string",
+            "description": "控制器模式（AUTOMATIC/MANUAL/MDI）",
+            "access": "r",
+            "generator_type": "fixed",
+            "fixed_value": "AUTOMATIC"
+        },
+        {
+            "name": "e_stop",
+            "address": "EmergencyStop",
+            "data_type": "string",
+            "description": "急停状态（ARMED/TRIGGERED）",
+            "access": "r",
+            "generator_type": "fixed",
+            "fixed_value": "ARMED"
+        },
+        {
+            "name": "program",
+            "address": "Program",
+            "data_type": "string",
+            "description": "当前运行NC程序名",
+            "access": "r",
+            "generator_type": "fixed",
+            "fixed_value": "O0001"
+        },
+        {
+            "name": "block",
+            "address": "Block",
+            "data_type": "string",
+            "description": "当前执行程序段",
+            "access": "r",
+            "generator_type": "fixed",
+            "fixed_value": "N0010 G01 X50.0 Z-100.0 F0.2"
+        },
+        {
+            "name": "line",
+            "address": "Line",
+            "data_type": "int32",
+            "description": "当前程序行号",
+            "access": "r",
+            "generator_type": "counter",
+            "min_value": 1,
+            "max_value": 500
+        },
         {
             "name": "x_position",
             "address": "Xposition",
             "data_type": "float32",
             "unit": "mm",
-            "description": "X轴位置(径向)",
+            "description": "X轴位置（径向）",
             "access": "r",
             "generator_type": "sine",
             "min_value": 0,
@@ -31,22 +86,90 @@
             "address": "Zposition",
             "data_type": "float32",
             "unit": "mm",
-            "description": "Z轴位置(纵向)",
+            "description": "Z轴位置（纵向）",
             "access": "r",
             "generator_type": "sine",
             "min_value": -300,
             "max_value": 50
         },
+        {
+            "name": "feed_rate",
+            "address": "PathFeedrate",
+            "data_type": "float32",
+            "unit": "mm/min",
+            "description": "实际进给速度",
+            "access": "r",
+            "generator_type": "fixed",
+            "fixed_value": 0.0
+        },
+        {
+            "name": "feed_override",
+            "address": "FeedrateOverride",
+            "data_type": "float32",
+            "unit": "%",
+            "description": "进给倍率",
+            "access": "r",
+            "generator_type": "fixed",
+            "fixed_value": 100.0
+        },
+        {
+            "name": "rapid_override",
+            "address": "RapidOverride",
+            "data_type": "float32",
+            "unit": "%",
+            "description": "快速移动倍率",
+            "access": "r",
+            "generator_type": "fixed",
+            "fixed_value": 100.0
+        },
         {
             "name": "spindle_speed",
             "address": "SpindleSpeed",
             "data_type": "float32",
             "unit": "RPM",
-            "description": "主轴转速",
+            "description": "主轴实际转速",
             "access": "r",
-            "generator_type": "random",
-            "min_value": 500,
-            "max_value": 4000
+            "generator_type": "fixed",
+            "fixed_value": 0.0
+        },
+        {
+            "name": "spindle_override",
+            "address": "SpindleOverride",
+            "data_type": "float32",
+            "unit": "%",
+            "description": "主轴倍率",
+            "access": "r",
+            "generator_type": "fixed",
+            "fixed_value": 100.0
+        },
+        {
+            "name": "spindle_load",
+            "address": "SpindleLoad",
+            "data_type": "float32",
+            "unit": "%",
+            "description": "主轴负载（伺服驱动器输出，0~100%）",
+            "access": "r",
+            "generator_type": "fixed",
+            "fixed_value": 0.0
+        },
+        {
+            "name": "spindle_current",
+            "address": "SpindleCurrent",
+            "data_type": "float32",
+            "unit": "A",
+            "description": "主轴电流，与负载正相关，有1~2 tick滞后",
+            "access": "r",
+            "generator_type": "fixed",
+            "fixed_value": 0.0
+        },
+        {
+            "name": "spindle_direction",
+            "address": "RotaryVelocity",
+            "data_type": "string",
+            "description": "主轴旋转方向（CW/CCW/STOPPED）",
+            "access": "r",
+            "generator_type": "fixed",
+            "fixed_value": "CW"
         },
         {
             "name": "tool_id",
@@ -56,6 +179,114 @@
             "access": "r",
             "generator_type": "fixed",
             "fixed_value": "T01"
+        },
+        {
+            "name": "tool_number",
+            "address": "ToolNumber",
+            "data_type": "int32",
+            "description": "刀塔当前刀位号",
+            "access": "r",
+            "generator_type": "fixed",
+            "fixed_value": 1
+        },
+        {
+            "name": "part_count",
+            "address": "PartCount",
+            "data_type": "int32",
+            "unit": "pcs",
+            "description": "累计零件计数（CNC内部M代码触发）",
+            "access": "r",
+            "generator_type": "counter",
+            "min_value": 0,
+            "max_value": 99999
+        },
+        {
+            "name": "system_condition",
+            "address": "SystemCondition",
+            "data_type": "string",
+            "description": "系统报警状态（NORMAL/WARNING/FAULT）",
+            "access": "r",
+            "generator_type": "fixed",
+            "fixed_value": "NORMAL"
+        },
+        {
+            "name": "condition_native_code",
+            "address": "ConditionNativeCode",
+            "data_type": "string",
+            "description": "CNC厂商报警号（如ALM-401），无报警时为空",
+            "access": "r",
+            "generator_type": "fixed",
+            "fixed_value": ""
+        },
+        {
+            "name": "vibration_x",
+            "address": "VibrationX",
+            "data_type": "float32",
+            "unit": "mm/s",
+            "description": "X轴振动速度RMS（外部加速度传感器，随主轴负载和进给联动）",
+            "access": "r",
+            "generator_type": "fixed",
+            "fixed_value": 0.0
+        },
+        {
+            "name": "vibration_y",
+            "address": "VibrationY",
+            "data_type": "float32",
+            "unit": "mm/s",
+            "description": "Y轴振动速度RMS",
+            "access": "r",
+            "generator_type": "fixed",
+            "fixed_value": 0.0
+        },
+        {
+            "name": "vibration_z",
+            "address": "VibrationZ",
+            "data_type": "float32",
+            "unit": "mm/s",
+            "description": "Z轴振动速度RMS（轴向）",
+            "access": "r",
+            "generator_type": "fixed",
+            "fixed_value": 0.0
+        },
+        {
+            "name": "acoustic_emission",
+            "address": "AcousticEmission",
+            "data_type": "float32",
+            "unit": "V",
+            "description": "声发射传感器输出电压，受振动和主轴负载驱动",
+            "access": "r",
+            "generator_type": "fixed",
+            "fixed_value": 0.0
+        },
+        {
+            "name": "tool_temperature",
+            "address": "ToolTemperature",
+            "data_type": "float32",
+            "unit": "C",
+            "description": "刀具/切削区温度（热惯性模型，缓慢变化）",
+            "access": "r",
+            "generator_type": "fixed",
+            "fixed_value": 28.0
+        },
+        {
+            "name": "surface_roughness",
+            "address": "SurfaceRoughness",
+            "data_type": "float32",
+            "unit": "um",
+            "description": "工件表面粗糙度Ra，受加工阶段、振动、磨损驱动",
+            "access": "r",
+            "generator_type": "fixed",
+            "fixed_value": 1.0
+        },
+        {
+            "name": "tool_wear_value",
+            "address": "ToolWearValue",
+            "data_type": "float32",
+            "unit": "um",
+            "description": "刀具累积磨损量（切削阶段单调递增，换刀后归零）",
+            "access": "r",
+            "generator_type": "fixed",
+            "fixed_value": 0.0
         }
     ],
     "protocol_config": {
diff --git a/tests/test_cnc_metric_generator.py b/tests/test_cnc_metric_generator.py
new file mode 100644
index 0000000..d944129
--- /dev/null
+++ b/tests/test_cnc_metric_generator.py
@@ -0,0 +1,372 @@
+"""
+tests/test_cnc_metric_generator.py
+===================================
+
+验证 BaseMetricGenerator 的正常加工状态时序数据生成算法。
+
+覆盖以下场景：
+1. roughing vs finishing 阶段指标大小关系
+2. finishing 阶段主轴转速高且稳定、振动/粗糙度低
+3. tool_temperature 慢变量特性（不瞬变、idle 缓慢回落）
+4. tool_wear_value 在切削阶段单调递增，idle/tool_change 不增长
+5. spindle_current 与 spindle_load 正相关且不完全同步（有滞后）
+6. 所有指标无负值且不超合理边界
+"""
+
+import pytest
+from protoforge.core.cnc_metric_generator import BaseMetricGenerator, MetricFrame
+
+
+# ---------------------------------------------------------------------------
+# Fixtures
+# ---------------------------------------------------------------------------
+
+@pytest.fixture
+def gen():
+    """固定随机种子，保证测试可重现。"""
+    return BaseMetricGenerator(ambient_temperature=28.0, seed=20260609)
+
+
+def _run_n(gen: BaseMetricGenerator, stage: str, n: int) -> list[MetricFrame]:
+    """运行 n 个 tick，返回所有帧。"""
+    frames = []
+    for i in range(n):
+        frames.append(gen.generate(t=float(i), dt=1.0, stage=stage))
+    return frames
+
+
+# ---------------------------------------------------------------------------
+# 1. roughing vs finishing 阶段指标大小关系
+# ---------------------------------------------------------------------------
+
+class TestRoughingVsFinishing:
+    """粗加工各项指标应高于精加工。"""
+
+    N = 50  # 取足够多的样本，用均值比较，避免噪声误判
+
+    def test_feed_rate_roughing_gt_finishing(self):
+        gen_r = BaseMetricGenerator(seed=1)
+        gen_f = BaseMetricGenerator(seed=1)
+        avg_r = sum(f.feed_rate for f in _run_n(gen_r, "roughing", self.N)) / self.N
+        avg_f = sum(f.feed_rate for f in _run_n(gen_f, "finishing", self.N)) / self.N
+        assert avg_r > avg_f, f"roughing feed_rate均值({avg_r:.1f}) 应 > finishing({avg_f:.1f})"
+
+    def test_spindle_load_roughing_gt_finishing(self):
+        gen_r = BaseMetricGenerator(seed=2)
+        gen_f = BaseMetricGenerator(seed=2)
+        avg_r = sum(f.spindle_load for f in _run_n(gen_r, "roughing", self.N)) / self.N
+        avg_f = sum(f.spindle_load for f in _run_n(gen_f, "finishing", self.N)) / self.N
+        assert avg_r > avg_f, f"roughing spindle_load均值({avg_r:.1f}) 应 > finishing({avg_f:.1f})"
+
+    def test_spindle_current_roughing_gt_finishing(self):
+        gen_r = BaseMetricGenerator(seed=3)
+        gen_f = BaseMetricGenerator(seed=3)
+        avg_r = sum(f.spindle_current for f in _run_n(gen_r, "roughing", self.N)) / self.N
+        avg_f = sum(f.spindle_current for f in _run_n(gen_f, "finishing", self.N)) / self.N
+        assert avg_r > avg_f, f"roughing current均值({avg_r:.2f}) 应 > finishing({avg_f:.2f})"
+
+    def test_vibration_roughing_gt_finishing(self):
+        gen_r = BaseMetricGenerator(seed=4)
+        gen_f = BaseMetricGenerator(seed=4)
+        avg_r = sum(
+            (f.vibration_x + f.vibration_y + f.vibration_z) / 3
+            for f in _run_n(gen_r, "roughing", self.N)
+        ) / self.N
+        avg_f = sum(
+            (f.vibration_x + f.vibration_y + f.vibration_z) / 3
+            for f in _run_n(gen_f, "finishing", self.N)
+        ) / self.N
+        assert avg_r > avg_f, f"roughing vibration均值({avg_r:.3f}) 应 > finishing({avg_f:.3f})"
+
+    def test_surface_roughness_roughing_gt_finishing(self):
+        gen_r = BaseMetricGenerator(seed=5)
+        gen_f = BaseMetricGenerator(seed=5)
+        avg_r = sum(f.surface_roughness for f in _run_n(gen_r, "roughing", self.N)) / self.N
+        avg_f = sum(f.surface_roughness for f in _run_n(gen_f, "finishing", self.N)) / self.N
+        assert avg_r > avg_f, f"roughing roughness均值({avg_r:.2f}) 应 > finishing({avg_f:.2f})"
+
+
+# ---------------------------------------------------------------------------
+# 2. finishing 阶段：高转速且稳定，振动/粗糙度低
+# ---------------------------------------------------------------------------
+
+class TestFinishing:
+
+    def test_spindle_speed_high(self, gen):
+        frames = _run_n(gen, "finishing", 30)
+        for f in frames:
+            assert f.spindle_speed >= 3000, f"finishing spindle_speed({f.spindle_speed}) 应 >= 3000 RPM"
+
+    def test_spindle_speed_stable(self, gen):
+        """精加工主轴转速波动应 < 2%（稳定性要求）。"""
+        frames = _run_n(gen, "finishing", 50)
+        speeds = [f.spindle_speed for f in frames]
+        avg = sum(speeds) / len(speeds)
+        max_deviation = max(abs(s - avg) / avg for s in speeds)
+        assert max_deviation < 0.02, f"finishing 转速最大偏差({max_deviation:.3%}) 超过 2%"
+
+    def test_vibration_low(self, gen):
+        frames = _run_n(gen, "finishing", 30)
+        for f in frames:
+            vib_rms = (f.vibration_x + f.vibration_y + f.vibration_z) / 3
+            assert vib_rms <= 0.6, f"finishing vibration_rms({vib_rms:.3f}) 应 <= 0.6 mm/s"
+
+    def test_surface_roughness_low(self, gen):
+        frames = _run_n(gen, "finishing", 30)
+        for f in frames:
+            assert f.surface_roughness <= 1.8, \
+                f"finishing surface_roughness({f.surface_roughness:.3f}) 应 <= 1.8 μm"
+
+
+# ---------------------------------------------------------------------------
+# 3. tool_temperature 慢变量特性
+# ---------------------------------------------------------------------------
+
+class TestToolTemperature:
+
+    MAX_JUMP_PER_TICK = 3.0   # 单 tick 最大允许温度变化（°C）
+
+    def test_no_instant_jump_roughing(self, gen):
+        """粗加工阶段温度不应瞬间大幅跳变。"""
+        frames = _run_n(gen, "roughing", 60)
+        temps = [f.tool_temperature for f in frames]
+        for i in range(1, len(temps)):
+            delta = abs(temps[i] - temps[i - 1])
+            assert delta <= self.MAX_JUMP_PER_TICK, \
+                f"tick {i}: 温度跳变 {delta:.2f}°C 超过 {self.MAX_JUMP_PER_TICK}°C"
+
+    def test_temperature_rises_in_roughing(self, gen):
+        """粗加工持续运行后温度应高于初始环境温度。"""
+        frames = _run_n(gen, "roughing", 100)
+        # 最后 10 tick 均值应高于初始热状态
+        late_avg = sum(f.tool_temperature for f in frames[-10:]) / 10
+        assert late_avg > 35.0, \
+            f"粗加工后期温度均值({late_avg:.1f}°C) 应 > 35°C"
+
+    def test_temperature_falls_in_idle(self):
+        """idle 阶段温度应缓慢回落。"""
+        gen = BaseMetricGenerator(seed=42)
+        # 先跑 80 tick roughing 把温度升高
+        for i in range(80):
+            gen.generate(t=float(i), dt=1.0, stage="roughing")
+        hot_temp = gen.state.thermal_state
+
+        # 再跑 60 tick idle
+        for i in range(80, 140):
+            gen.generate(t=float(i), dt=1.0, stage="idle")
+        cool_temp = gen.state.thermal_state
+
+        assert cool_temp < hot_temp, \
+            f"idle 后温度({cool_temp:.1f}) 应低于加工后温度({hot_temp:.1f})"
+
+    def test_no_instant_jump_idle(self, gen):
+        """idle 阶段温度同样不应瞬变。"""
+        frames = _run_n(gen, "idle", 30)
+        temps = [f.tool_temperature for f in frames]
+        for i in range(1, len(temps)):
+            delta = abs(temps[i] - temps[i - 1])
+            assert delta <= self.MAX_JUMP_PER_TICK, \
+                f"idle tick {i}: 温度跳变 {delta:.2f}°C"
+
+
+# ---------------------------------------------------------------------------
+# 4. tool_wear_value 单调性
+# ---------------------------------------------------------------------------
+
+class TestToolWear:
+
+    def _check_monotone(self, stages: list[str], n_per_stage: int = 20):
+        gen = BaseMetricGenerator(seed=99)
+        prev_wear = 0.0
+        for stage in stages:
+            for i in range(n_per_stage):
+                t = float(len(stages) * n_per_stage + i)
+                frame = gen.generate(t=t, dt=1.0, stage=stage)
+                assert frame.tool_wear_value >= prev_wear - 1e-9, \
+                    f"stage={stage} tick={i}: wear({frame.tool_wear_value:.6f}) < prev({prev_wear:.6f})，磨损不单调"
+                prev_wear = frame.tool_wear_value
+        return prev_wear
+
+    def test_wear_increases_in_roughing(self):
+        gen = BaseMetricGenerator(seed=10)
+        wear_start = gen.state.tool_wear_accumulated
+        _run_n(gen, "roughing", 50)
+        wear_end = gen.state.tool_wear_accumulated
+        assert wear_end > wear_start, \
+            f"粗加工后磨损({wear_end:.4f}) 应 > 初始({wear_start:.4f})"
+
+    def test_wear_increases_in_semi_finishing(self):
+        gen = BaseMetricGenerator(seed=11)
+        _run_n(gen, "semi_finishing", 50)
+        assert gen.state.tool_wear_accumulated > 0, "半精加工后磨损应 > 0"
+
+    def test_wear_increases_in_finishing(self):
+        gen = BaseMetricGenerator(seed=12)
+        _run_n(gen, "finishing", 50)
+        assert gen.state.tool_wear_accumulated > 0, "精加工后磨损应 > 0"
+
+    def test_wear_no_increase_in_idle(self):
+        gen = BaseMetricGenerator(seed=13)
+        # 先加工一段，再 idle
+        _run_n(gen, "roughing", 10)
+        wear_before_idle = gen.state.tool_wear_accumulated
+        _run_n(gen, "idle", 30)
+        assert gen.state.tool_wear_accumulated == wear_before_idle, \
+            "idle 阶段磨损不应增长"
+
+    def test_wear_no_increase_in_tool_change(self):
+        gen = BaseMetricGenerator(seed=14)
+        _run_n(gen, "roughing", 10)
+        wear_before = gen.state.tool_wear_accumulated
+        _run_n(gen, "tool_change", 20)
+        assert gen.state.tool_wear_accumulated == wear_before, \
+            "tool_change 阶段磨损不应增长"
+
+    def test_wear_monotone_across_cutting_stages(self):
+        final_wear = self._check_monotone(
+            ["roughing", "roughing", "semi_finishing", "finishing"],
+            n_per_stage=30
+        )
+        assert final_wear > 0
+
+    def test_roughing_wear_gt_finishing_wear(self):
+        """粗加工单位时间磨损应快于精加工。"""
+        gen_r = BaseMetricGenerator(seed=20)
+        gen_f = BaseMetricGenerator(seed=20)
+        _run_n(gen_r, "roughing", 100)
+        _run_n(gen_f, "finishing", 100)
+        assert gen_r.state.tool_wear_accumulated > gen_f.state.tool_wear_accumulated, \
+            "粗加工磨损速率应高于精加工"
+
+
+# ---------------------------------------------------------------------------
+# 5. spindle_current 与 spindle_load 的相关性与滞后
+# ---------------------------------------------------------------------------
+
+class TestCurrentLoadCorrelation:
+
+    def test_current_load_positive_correlation(self, gen):
+        """电流与负载正相关（Pearson r > 0.5）。
+        注：roughing 阶段噪声较大（stability=0.6），加上 1~2 tick 滞后，
+        实际相关系数在 0.5~0.75 之间，符合真实 CNC 采集数据的特征。
+        """
+        frames = _run_n(gen, "roughing", 200)
+        loads = [f.spindle_load for f in frames]
+        currents = [f.spindle_current for f in frames]
+
+        n = len(loads)
+        mean_l = sum(loads) / n
+        mean_c = sum(currents) / n
+        cov = sum((l - mean_l) * (c - mean_c) for l, c in zip(loads, currents)) / n
+        std_l = (sum((l - mean_l) ** 2 for l in loads) / n) ** 0.5
+        std_c = (sum((c - mean_c) ** 2 for c in currents) / n) ** 0.5
+        r = cov / (std_l * std_c + 1e-9)
+        assert r > 0.5, f"电流-负载 Pearson r({r:.3f}) 应 > 0.5"
+
+    def test_current_not_identical_to_load(self, gen):
+        """电流与负载不完全相同（体现滞后和不同物理量）。"""
+        frames = _run_n(gen, "roughing", 30)
+        diffs = [abs(f.spindle_current - f.spindle_load) for f in frames]
+        avg_diff = sum(diffs) / len(diffs)
+        assert avg_diff > 1.0, \
+            f"电流与负载均值差({avg_diff:.2f}) 过小，可能完全相同"
+
+    def test_current_unit_range(self, gen):
+        """电流应在 roughing 合理范围（12~25 A）附近。"""
+        frames = _run_n(gen, "roughing", 50)
+        for f in frames:
+            assert 5.0 <= f.spindle_current <= 35.0, \
+                f"roughing spindle_current({f.spindle_current:.2f} A) 超出合理范围"
+
+    def test_current_lag_detection(self):
+        """
+        验证滞后：在负载突变后，电流应有一定惯性（不瞬间到达目标值）。
+        用两个生成器模拟：一个跑 idle 后切换 roughing，检查前几 tick 电流低于稳态均值。
+        """
+        gen = BaseMetricGenerator(seed=77)
+        # 先跑 10 tick idle（负载很低）
+        for i in range(10):
+            gen.generate(t=float(i), dt=1.0, stage="idle")
+        # 再跑 roughing，前 3 tick 电流应低于稳态
+        early_currents = []
+        for i in range(10, 13):
+            f = gen.generate(t=float(i), dt=1.0, stage="roughing")
+            early_currents.append(f.spindle_current)
+        # 稳态（第 30~40 tick）
+        for i in range(13, 40):
+            f = gen.generate(t=float(i), dt=1.0, stage="roughing")
+        steady_currents = []
+        for i in range(40, 60):
+            f = gen.generate(t=float(i), dt=1.0, stage="roughing")
+            steady_currents.append(f.spindle_current)
+
+        early_avg = sum(early_currents) / len(early_currents)
+        steady_avg = sum(steady_currents) / len(steady_currents)
+        assert early_avg < steady_avg, \
+            f"切换到 roughing 后早期电流({early_avg:.2f}) 应低于稳态({steady_avg:.2f})，体现滞后"
+
+
+# ---------------------------------------------------------------------------
+# 6. 所有指标边界检查（无负值，不超上限）
+# ---------------------------------------------------------------------------
+
+class TestBoundaries:
+
+    ALL_STAGES = ["idle", "tool_change", "roughing", "semi_finishing", "finishing"]
+
+    def test_no_negative_values(self):
+        for stage in self.ALL_STAGES:
+            gen = BaseMetricGenerator(seed=0)
+            for i, frame in enumerate(_run_n(gen, stage, 30)):
+                assert frame.feed_rate >= 0, f"{stage} t={i}: feed_rate < 0"
+                assert frame.spindle_speed >= 0, f"{stage} t={i}: spindle_speed < 0"
+                assert frame.spindle_current >= 0, f"{stage} t={i}: spindle_current < 0"
+                assert frame.spindle_load >= 0, f"{stage} t={i}: spindle_load < 0"
+                assert frame.vibration_x >= 0, f"{stage} t={i}: vibration_x < 0"
+                assert frame.vibration_y >= 0, f"{stage} t={i}: vibration_y < 0"
+                assert frame.vibration_z >= 0, f"{stage} t={i}: vibration_z < 0"
+                assert frame.acoustic_emission >= 0, f"{stage} t={i}: acoustic_emission < 0"
+                assert frame.surface_roughness >= 0, f"{stage} t={i}: surface_roughness < 0"
+                assert frame.tool_wear_value >= 0, f"{stage} t={i}: tool_wear_value < 0"
+
+    def test_spindle_load_max_100(self):
+        for stage in self.ALL_STAGES:
+            gen = BaseMetricGenerator(seed=1)
+            for i, frame in enumerate(_run_n(gen, stage, 30)):
+                assert frame.spindle_load <= 100.0, \
+                    f"{stage} t={i}: spindle_load({frame.spindle_load}) > 100%"
+
+    def test_tool_temperature_range(self):
+        for stage in self.ALL_STAGES:
+            gen = BaseMetricGenerator(seed=2)
+            for i, frame in enumerate(_run_n(gen, stage, 30)):
+                assert 20.0 <= frame.tool_temperature <= 120.0, \
+                    f"{stage} t={i}: tool_temperature({frame.tool_temperature:.1f}) 超出 [20,120]°C"
+
+    def test_no_unrealistic_instant_jump(self):
+        """任意连续 tick 的指标变化不应超过合理上限（防止仿真器 bug）。"""
+        MAX_SPINDLE_SPEED_JUMP = 500   # RPM/tick
+        MAX_LOAD_JUMP          = 30    # %/tick
+        MAX_TEMP_JUMP          = 5     # °C/tick
+
+        for stage in ["roughing", "finishing"]:
+            gen = BaseMetricGenerator(seed=3)
+            frames = _run_n(gen, stage, 60)
+            for i in range(1, len(frames)):
+                prev, curr = frames[i - 1], frames[i]
+                assert abs(curr.spindle_speed - prev.spindle_speed) <= MAX_SPINDLE_SPEED_JUMP, \
+                    f"{stage} t={i}: spindle_speed 跳变过大"
+                assert abs(curr.spindle_load - prev.spindle_load) <= MAX_LOAD_JUMP, \
+                    f"{stage} t={i}: spindle_load 跳变过大"
+                assert abs(curr.tool_temperature - prev.tool_temperature) <= MAX_TEMP_JUMP, \
+                    f"{stage} t={i}: tool_temperature 跳变过大"
+
+
+# ---------------------------------------------------------------------------
+# 附加：stage 名称错误时应抛出异常
+# ---------------------------------------------------------------------------
+
+def test_invalid_stage_raises():
+    gen = BaseMetricGenerator()
+    with pytest.raises(ValueError, match="Unknown stage"):
+        gen.generate(t=0.0, dt=1.0, stage="nonexistent_stage")

From 47c1b47a42f0197ee51665ea103c391a8ca2de1c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E5=BC=A0=E5=B0=91=E6=99=BA?=
 <zhangshaozhi@bailongma-inc.com>
Date: Tue, 9 Jun 2026 09:36:15 +0800
Subject: [PATCH 44/55] fix

---
 protoforge/core/cnc_metric_generator.py | 152 ++++++++++++++++++++++--
 1 file changed, 142 insertions(+), 10 deletions(-)

diff --git a/protoforge/core/cnc_metric_generator.py b/protoforge/core/cnc_metric_generator.py
index 329f56e..50709b1 100644
--- a/protoforge/core/cnc_metric_generator.py
+++ b/protoforge/core/cnc_metric_generator.py
@@ -124,6 +124,129 @@ class GeneratorState:
     current_stage: str = "idle"
 
 
+# ---------------------------------------------------------------------------
+# SpindleLoadGenerator —— 状态驱动、EMA 平滑的主轴负载生成器
+# ---------------------------------------------------------------------------
+
+# 各工艺阶段的负载基线及允许范围
+_PROCESS_LOAD_CONFIG: dict[str, dict] = {
+    "rough": {
+        "base": 55.0,
+        "slow_amp": 6.0,    # 慢周期波动幅度（%）
+        "cut_amp": 4.0,     # 切削周期扰动幅度（%）
+        "noise_sigma": 2.5, # 高斯噪声标准差（%）
+        "clamp_min": 35.0,
+        "clamp_max": 85.0,
+        "ema_alpha": 0.18,  # 较快响应，粗加工负载变化快
+    },
+    "semi_finish": {
+        "base": 38.0,
+        "slow_amp": 4.0,
+        "cut_amp": 2.5,
+        "noise_sigma": 1.5,
+        "clamp_min": 22.0,
+        "clamp_max": 65.0,
+        "ema_alpha": 0.15,
+    },
+    "finish": {
+        "base": 22.0,
+        "slow_amp": 2.5,
+        "cut_amp": 1.5,
+        "noise_sigma": 0.8,
+        "clamp_min": 12.0,
+        "clamp_max": 42.0,
+        "ema_alpha": 0.12,  # 较慢响应，精加工负载更平稳
+    },
+}
+
+# 各驱动状态的负载基线及 EMA 系数
+_STATE_LOAD_CONFIG: dict[str, dict] = {
+    "idle":        {"base": 1.5,  "noise_sigma": 0.3, "clamp_min": 0.0,  "clamp_max": 5.0,  "ema_alpha": 0.10},
+    "tool_change": {"base": 4.0,  "noise_sigma": 0.8, "clamp_min": 0.0,  "clamp_max": 10.0, "ema_alpha": 0.12},
+    "spindle_on":  {"base": 8.0,  "noise_sigma": 1.2, "clamp_min": 3.0,  "clamp_max": 18.0, "ema_alpha": 0.15},
+    "air_cut":     {"base": 15.0, "noise_sigma": 2.0, "clamp_min": 8.0,  "clamp_max": 28.0, "ema_alpha": 0.16},
+    # "cutting" state delegates to _PROCESS_LOAD_CONFIG
+}
+
+# stage 名称 → 内部 process 名称映射
+_STAGE_TO_PROCESS: dict[str, str] = {
+    "roughing":       "rough",
+    "semi_finishing":  "semi_finish",
+    "finishing":       "finish",
+}
+
+# stage 名称 → 驱动状态映射（非切削阶段）
+_STAGE_TO_STATE: dict[str, str] = {
+    "idle":        "idle",
+    "tool_change": "tool_change",
+}
+
+
+class SpindleLoadGenerator:
+    """
+    状态驱动、EMA 平滑的主轴负载生成器。
+
+    内部维护 prev_load 跨 tick 状态，使负载曲线连续平滑，
+    避免随机脉冲。各切削工艺有独立基线和 clamp 范围，
+    idle/tool_change 等非切削状态接近 0。
+
+    stage 参数取值：idle / tool_change / roughing / semi_finishing / finishing
+    """
+
+    def __init__(self, rng: random.Random):
+        self._rng = rng
+        self.prev_load: float = 0.0
+
+    def generate(
+        self,
+        t: float,
+        stage: str,
+        material_variation: float = 1.0,
+        slow_phase: float = 0.0,
+        cut_phase: float = 0.0,
+    ) -> float:
+        """
+        生成本 tick 的主轴负载（%）。
+
+        Args:
+            t:                当前时间（秒），保留供未来扩展。
+            stage:            加工阶段（idle/tool_change/roughing/semi_finishing/finishing）。
+            material_variation: 材料扰动系数（≈1.0，±5%）。
+            slow_phase:       慢周期相位（弧度），由外部统一维护。
+            cut_phase:        切削周期相位（弧度），由外部统一维护。
+
+        Returns:
+            clamp 后的主轴负载（%）。
+        """
+        process = _STAGE_TO_PROCESS.get(stage)
+
+        if process is not None:
+            # 切削阶段：使用工艺基线
+            cfg = _PROCESS_LOAD_CONFIG[process]
+            slow_wave = cfg["slow_amp"] * math.sin(slow_phase)
+            cut_wave  = cfg["cut_amp"]  * math.sin(cut_phase)
+            noise     = self._rng.gauss(0, cfg["noise_sigma"])
+            mat_delta = (material_variation - 1.0) * cfg["base"] * 0.5  # 材料变化影响基线的 50%
+            target    = cfg["base"] + slow_wave + cut_wave + noise + mat_delta
+            alpha     = cfg["ema_alpha"]
+            lo, hi    = cfg["clamp_min"], cfg["clamp_max"]
+        else:
+            # 非切削阶段：使用状态基线
+            state_key = _STAGE_TO_STATE.get(stage, "idle")
+            cfg = _STATE_LOAD_CONFIG[state_key]
+            noise  = self._rng.gauss(0, cfg["noise_sigma"])
+            target = cfg["base"] + noise
+            alpha  = cfg["ema_alpha"]
+            lo, hi = cfg["clamp_min"], cfg["clamp_max"]
+
+        # EMA 平滑
+        new_load = self.prev_load + alpha * (target - self.prev_load)
+        # clamp
+        new_load = max(lo, min(hi, new_load))
+        self.prev_load = new_load
+        return new_load
+
+
 # ---------------------------------------------------------------------------
 # 阶段配置
 # ---------------------------------------------------------------------------
@@ -236,6 +359,8 @@ def __init__(
             thermal_state=ambient_temperature,
             last_surface_roughness=1.0,
         )
+        # 主轴负载生成器（状态驱动 + EMA 平滑）
+        self._spindle_load_gen = SpindleLoadGenerator(self._rng)
 
     # ------------------------------------------------------------------
     # 公开 API
@@ -274,9 +399,11 @@ def generate(self, t: float, dt: float, stage: str) -> MetricFrame:
             feed_rate, stage, material_variation, profile
         )
 
-        # ── 6. spindle_load ───────────────────────────────────────────────────
+        # ── 6. spindle_load（状态驱动 + EMA 平滑）────────────────────────────
+        # 慢波相位（约 90 s 周期）和切削相位复用 cycle_phase
+        slow_phase = 2 * math.pi * t / 90.0
         spindle_load = self._calc_spindle_load(
-            profile, cutting_intensity, cutting_cycle_wave
+            profile, stage, material_variation, slow_phase, state.cycle_phase
         )
 
         # ── 7. spindle_current（对 load 有 1~2 tick 滞后）────────────────────
@@ -450,17 +577,22 @@ def _calc_cutting_intensity(
     def _calc_spindle_load(
         self,
         profile: StageProfile,
-        cutting_intensity: float,
-        cutting_cycle_wave: float,
+        stage: str,
+        material_variation: float,
+        slow_phase: float,
+        cut_phase: float,
     ) -> float:
         """
-        主轴负载（%）= 阶段基线 + cutting_intensity 加权 + 切削波动 + 噪声。
+        主轴负载（%）—— 委托给 SpindleLoadGenerator。
+        使用状态驱动基线 + EMA 平滑，避免随机脉冲行为。
         """
-        load_range = profile.spindle_load_max - profile.spindle_load_min
-        load_base = profile.spindle_load_min + load_range * cutting_intensity
-        load = load_base * cutting_cycle_wave
-        noise = self._rng.gauss(0, load_range * (1.0 - profile.stability_factor) * 0.04)
-        return max(profile.spindle_load_min, min(profile.spindle_load_max, load + noise))
+        return self._spindle_load_gen.generate(
+            t=0.0,  # t 保留，当前未使用
+            stage=stage,
+            material_variation=material_variation,
+            slow_phase=slow_phase,
+            cut_phase=cut_phase,
+        )
 
     def _calc_spindle_current(
         self,

From c0fe62d6a86c5d7663678607ab757d56bb9a5483 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E5=BC=A0=E5=B0=91=E6=99=BA?=
 <zhangshaozhi@bailongma-inc.com>
Date: Tue, 9 Jun 2026 10:12:43 +0800
Subject: [PATCH 45/55] fix

---
 protoforge/core/cnc_metric_generator.py       | 261 +++++++++++-------
 .../protocols/mtconnect/lathe_simulator.py    |  12 +-
 2 files changed, 176 insertions(+), 97 deletions(-)

diff --git a/protoforge/core/cnc_metric_generator.py b/protoforge/core/cnc_metric_generator.py
index 50709b1..3478fdf 100644
--- a/protoforge/core/cnc_metric_generator.py
+++ b/protoforge/core/cnc_metric_generator.py
@@ -106,146 +106,214 @@ class MetricFrame:
 @dataclass
 class GeneratorState:
     """跨 tick 需要持久化的生成器内部状态。"""
-    # 材料扰动随机游走值（慢变量，[-0.05, +0.05]）
     material_random_walk: float = 0.0
-    # 热状态（tool_temperature 的平滑变量）
     thermal_state: float = 28.0
-    # 刀具累积磨损（μm，单调不减）
     tool_wear_accumulated: float = 0.0
-    # 上一 tick 的 spindle_load（用于电流滞后计算）
     last_spindle_load: float = 0.0
-    # 滞后缓冲区（最多保存 3 tick 历史）
     load_lag_buffer: list = field(default_factory=lambda: [0.0, 0.0, 0.0])
-    # 上一 tick 的 surface_roughness（idle 阶段保持上次值）
     last_surface_roughness: float = 1.0
-    # 切削周期相位（用于 cutting_cycle_wave）
     cycle_phase: float = 0.0
-    # 当前阶段
     current_stage: str = "idle"
+    # 切削阶段内已经过的秒数（用于 entry/exit ramp 计算）
+    cutting_elapsed: float = 0.0
+    # 当前切削阶段预估总时长（由状态机传入）
+    cutting_total: float = 30.0
 
 
 # ---------------------------------------------------------------------------
-# SpindleLoadGenerator —— 状态驱动、EMA 平滑的主轴负载生成器
+# SpindleLoadGenerator —— 状态驱动、切入/切出 ramp、周期扰动的主轴负载生成器
 # ---------------------------------------------------------------------------
 
-# 各工艺阶段的负载基线及允许范围
-_PROCESS_LOAD_CONFIG: dict[str, dict] = {
+# 切削工艺配置
+_PROCESS_CFG: dict[str, dict] = {
     "rough": {
-        "base": 55.0,
-        "slow_amp": 6.0,    # 慢周期波动幅度（%）
-        "cut_amp": 4.0,     # 切削周期扰动幅度（%）
-        "noise_sigma": 2.5, # 高斯噪声标准差（%）
-        "clamp_min": 35.0,
-        "clamp_max": 85.0,
-        "ema_alpha": 0.18,  # 较快响应，粗加工负载变化快
+        "base_load":       55.0,
+        "slow_freq":        0.10,   # rad/s，慢波频率
+        "slow_amp":         5.0,
+        "cut_freq":         0.75,   # rad/s，切削波频率
+        "cut_amp":          2.5,
+        "material_freq":    0.03,
+        "material_amp":     4.0,
+        "noise_range":      2.0,    # uniform ±noise_range
+        "clamp_min":       35.0,
+        "clamp_max":       85.0,
+        "ema_alpha":        0.10,
+        "entry_ramp_s":     6.0,    # 切入 ramp 时长（秒）
+        "exit_ramp_s":      5.0,    # 切出 ramp 时长（秒）
+        # 低负载基准（air_cut 阶段，用于 ramp 起止参考）
+        "air_cut_base":     8.0,
     },
     "semi_finish": {
-        "base": 38.0,
-        "slow_amp": 4.0,
-        "cut_amp": 2.5,
-        "noise_sigma": 1.5,
-        "clamp_min": 22.0,
-        "clamp_max": 65.0,
-        "ema_alpha": 0.15,
+        "base_load":       38.0,
+        "slow_freq":        0.08,
+        "slow_amp":         3.5,
+        "cut_freq":         0.65,
+        "cut_amp":          1.8,
+        "material_freq":    0.025,
+        "material_amp":     2.5,
+        "noise_range":      1.5,
+        "clamp_min":       20.0,
+        "clamp_max":       65.0,
+        "ema_alpha":        0.10,
+        "entry_ramp_s":     5.0,
+        "exit_ramp_s":      4.0,
+        "air_cut_base":     6.0,
     },
     "finish": {
-        "base": 22.0,
-        "slow_amp": 2.5,
-        "cut_amp": 1.5,
-        "noise_sigma": 0.8,
-        "clamp_min": 12.0,
-        "clamp_max": 42.0,
-        "ema_alpha": 0.12,  # 较慢响应，精加工负载更平稳
+        "base_load":       22.0,
+        "slow_freq":        0.06,
+        "slow_amp":         2.0,
+        "cut_freq":         0.55,
+        "cut_amp":          1.0,
+        "material_freq":    0.02,
+        "material_amp":     1.2,
+        "noise_range":      0.8,
+        "clamp_min":        8.0,
+        "clamp_max":       45.0,
+        "ema_alpha":        0.09,
+        "entry_ramp_s":     4.0,
+        "exit_ramp_s":      3.0,
+        "air_cut_base":     5.0,
     },
 }
 
-# 各驱动状态的负载基线及 EMA 系数
-_STATE_LOAD_CONFIG: dict[str, dict] = {
-    "idle":        {"base": 1.5,  "noise_sigma": 0.3, "clamp_min": 0.0,  "clamp_max": 5.0,  "ema_alpha": 0.10},
-    "tool_change": {"base": 4.0,  "noise_sigma": 0.8, "clamp_min": 0.0,  "clamp_max": 10.0, "ema_alpha": 0.12},
-    "spindle_on":  {"base": 8.0,  "noise_sigma": 1.2, "clamp_min": 3.0,  "clamp_max": 18.0, "ema_alpha": 0.15},
-    "air_cut":     {"base": 15.0, "noise_sigma": 2.0, "clamp_min": 8.0,  "clamp_max": 28.0, "ema_alpha": 0.16},
-    # "cutting" state delegates to _PROCESS_LOAD_CONFIG
+# 非切削状态配置（base / noise / clamp / ema_alpha）
+_STATE_CFG: dict[str, dict] = {
+    "idle":        {"base": 1.0,  "noise": 0.4,  "lo": 0.0,  "hi": 2.0,   "alpha": 0.35},
+    "tool_change": {"base": 3.5,  "noise": 0.6,  "lo": 0.0,  "hi": 8.0,   "alpha": 0.25},
+    "spindle_on":  {"base": 4.5,  "noise": 0.5,  "lo": 3.0,  "hi": 8.0,   "alpha": 0.22},
+    "air_cut":     {"base": 7.5,  "noise": 0.8,  "lo": 5.0,  "hi": 12.0,  "alpha": 0.20},
 }
 
-# stage 名称 → 内部 process 名称映射
+# stage → process 映射（切削阶段）
 _STAGE_TO_PROCESS: dict[str, str] = {
-    "roughing":       "rough",
-    "semi_finishing":  "semi_finish",
-    "finishing":       "finish",
-}
-
-# stage 名称 → 驱动状态映射（非切削阶段）
-_STAGE_TO_STATE: dict[str, str] = {
-    "idle":        "idle",
-    "tool_change": "tool_change",
+    "roughing":      "rough",
+    "semi_finishing": "semi_finish",
+    "finishing":      "finish",
 }
 
 
 class SpindleLoadGenerator:
     """
-    状态驱动、EMA 平滑的主轴负载生成器。
+    状态驱动、切入/切出 ramp、周期级扰动的主轴负载生成器。
 
-    内部维护 prev_load 跨 tick 状态，使负载曲线连续平滑，
-    避免随机脉冲。各切削工艺有独立基线和 clamp 范围，
-    idle/tool_change 等非切削状态接近 0。
+    支持的 stage 值（由 LatheSimulator 的 _get_metric_stage 传入）：
+      idle / tool_change / roughing / semi_finishing / finishing
 
-    stage 参数取值：idle / tool_change / roughing / semi_finishing / finishing
+    内部将切削阶段按 cutting_elapsed / cutting_total 推导出
+    entry_cut → cutting → exit_cut 子状态，实现平滑切入切出。
+    每个加工周期开始时随机化 cycle_factor / phase 保证周期间差异。
     """
 
     def __init__(self, rng: random.Random):
         self._rng = rng
         self.prev_load: float = 0.0
 
+        # 周期级随机状态（每次进入切削阶段时刷新）
+        self._cycle_id: Optional[str] = None
+        self._cycle_factor: float = 1.0   # 0.92~1.08，整体缩放基线
+        self._phase1: float = 0.0          # 慢波初相位
+        self._phase2: float = 0.0          # 切削波初相位
+        self._material_phase: float = 0.0  # 材料漂移初相位
+
+        # 上一个 stage，用于检测切削周期切换
+        self._last_stage: str = "idle"
+
+    # ------------------------------------------------------------------
+
+    def _refresh_cycle(self, stage: str) -> None:
+        """检测到新的切削周期时刷新周期级随机参数。"""
+        cycle_id = stage  # 简单以 stage 变化作为新周期标志
+        was_cutting = self._last_stage in _STAGE_TO_PROCESS
+        now_cutting = stage in _STAGE_TO_PROCESS
+        # 从非切削 → 切削，或切削工艺跳转（粗 → 精），认为是新周期
+        if now_cutting and (not was_cutting or stage != self._last_stage):
+            self._cycle_factor = self._rng.uniform(0.92, 1.08)
+            self._phase1 = self._rng.uniform(0, 2 * math.pi)
+            self._phase2 = self._rng.uniform(0, 2 * math.pi)
+            self._material_phase = self._rng.uniform(0, 2 * math.pi)
+        self._last_stage = stage
+
     def generate(
         self,
         t: float,
         stage: str,
-        material_variation: float = 1.0,
-        slow_phase: float = 0.0,
-        cut_phase: float = 0.0,
+        cutting_elapsed: float = 0.0,
+        cutting_total: float = 30.0,
     ) -> float:
         """
         生成本 tick 的主轴负载（%）。
 
         Args:
-            t:                当前时间（秒），保留供未来扩展。
-            stage:            加工阶段（idle/tool_change/roughing/semi_finishing/finishing）。
-            material_variation: 材料扰动系数（≈1.0，±5%）。
-            slow_phase:       慢周期相位（弧度），由外部统一维护。
-            cut_phase:        切削周期相位（弧度），由外部统一维护。
+            t:               当前时间（秒），用于波形计算。
+            stage:           加工阶段（idle/tool_change/roughing/semi_finishing/finishing）。
+            cutting_elapsed: 当前切削阶段已经过的秒数（用于 ramp 计算）。
+            cutting_total:   当前切削阶段总时长预估（用于 exit_cut 判断）。
 
         Returns:
             clamp 后的主轴负载（%）。
         """
+        self._refresh_cycle(stage)
+
         process = _STAGE_TO_PROCESS.get(stage)
 
-        if process is not None:
-            # 切削阶段：使用工艺基线
-            cfg = _PROCESS_LOAD_CONFIG[process]
-            slow_wave = cfg["slow_amp"] * math.sin(slow_phase)
-            cut_wave  = cfg["cut_amp"]  * math.sin(cut_phase)
-            noise     = self._rng.gauss(0, cfg["noise_sigma"])
-            mat_delta = (material_variation - 1.0) * cfg["base"] * 0.5  # 材料变化影响基线的 50%
-            target    = cfg["base"] + slow_wave + cut_wave + noise + mat_delta
-            alpha     = cfg["ema_alpha"]
-            lo, hi    = cfg["clamp_min"], cfg["clamp_max"]
+        if process is None:
+            # 非切削阶段
+            cfg = _STATE_CFG.get(stage, _STATE_CFG["idle"])
+            slow_wave = math.sin(t * 0.20) * 0.8
+            noise = self._rng.uniform(-cfg["noise"], cfg["noise"])
+            target = cfg["base"] + slow_wave + noise
+            alpha = cfg["alpha"]
+            lo, hi = cfg["lo"], cfg["hi"]
         else:
-            # 非切削阶段：使用状态基线
-            state_key = _STAGE_TO_STATE.get(stage, "idle")
-            cfg = _STATE_LOAD_CONFIG[state_key]
-            noise  = self._rng.gauss(0, cfg["noise_sigma"])
-            target = cfg["base"] + noise
-            alpha  = cfg["ema_alpha"]
-            lo, hi = cfg["clamp_min"], cfg["clamp_max"]
+            # 切削阶段：entry_cut → cutting → exit_cut
+            pcfg = _PROCESS_CFG[process]
+            entry_s = pcfg["entry_ramp_s"]
+            exit_s  = pcfg["exit_ramp_s"]
+            air_base = pcfg["air_cut_base"]
+            eff_base = pcfg["base_load"] * self._cycle_factor
+
+            # 切出判断：距切削结束不足 exit_s 秒
+            time_to_end = cutting_total - cutting_elapsed
+            in_exit = (time_to_end <= exit_s) and (cutting_elapsed > entry_s)
+
+            if cutting_elapsed <= entry_s:
+                # ── entry_cut：从 air_base 平滑爬升到 eff_base ──
+                ramp = cutting_elapsed / entry_s           # 0→1
+                smooth_ramp = ramp * ramp * (3 - 2 * ramp)  # smoothstep
+                target_cutting = self._cutting_target(t, pcfg, eff_base)
+                target = air_base + (target_cutting - air_base) * smooth_ramp
+                alpha = 0.12
+                lo, hi = air_base * 0.5, pcfg["clamp_max"]
+            elif in_exit:
+                # ── exit_cut：从 eff_base 平滑下降到 air_base ──
+                exit_elapsed = exit_s - time_to_end
+                ramp = max(0.0, min(1.0, exit_elapsed / exit_s))
+                smooth_ramp = ramp * ramp * (3 - 2 * ramp)
+                target_cutting = self._cutting_target(t, pcfg, eff_base)
+                target = target_cutting * (1.0 - smooth_ramp) + air_base * smooth_ramp
+                alpha = 0.13
+                lo, hi = air_base * 0.4, pcfg["clamp_max"]
+            else:
+                # ── cutting：稳定切削平台 ──
+                target = self._cutting_target(t, pcfg, eff_base)
+                alpha = pcfg["ema_alpha"]
+                lo, hi = pcfg["clamp_min"], pcfg["clamp_max"]
 
         # EMA 平滑
         new_load = self.prev_load + alpha * (target - self.prev_load)
-        # clamp
         new_load = max(lo, min(hi, new_load))
         self.prev_load = new_load
         return new_load
 
+    def _cutting_target(self, t: float, pcfg: dict, eff_base: float) -> float:
+        """计算切削平台目标负载（含慢波 + 切削波 + 材料漂移 + 小噪声）。"""
+        slow_wave      = math.sin(t * pcfg["slow_freq"]     + self._phase1) * pcfg["slow_amp"]
+        cut_wave       = math.sin(t * pcfg["cut_freq"]      + self._phase2) * pcfg["cut_amp"]
+        material_drift = math.sin(t * pcfg["material_freq"] + self._material_phase) * pcfg["material_amp"]
+        noise          = self._rng.uniform(-pcfg["noise_range"], pcfg["noise_range"])
+        return eff_base + slow_wave + cut_wave + material_drift + noise
+
 
 # ---------------------------------------------------------------------------
 # 阶段配置
@@ -380,6 +448,17 @@ def generate(self, t: float, dt: float, stage: str) -> MetricFrame:
         """
         profile = self.get_stage_profile(stage)
         state = self._state
+
+        # ── 切削阶段计时维护 ─────────────────────────────────────────────────
+        is_cutting = stage in _STAGE_TO_PROCESS
+        if is_cutting:
+            if state.current_stage not in _STAGE_TO_PROCESS:
+                # 刚进入切削阶段，重置计时
+                state.cutting_elapsed = 0.0
+            else:
+                state.cutting_elapsed += dt
+        else:
+            state.cutting_elapsed = 0.0
         state.current_stage = stage
 
         # ── 1. 材料扰动（慢变量，低频正弦 + 随机游走）──────────────────────
@@ -399,11 +478,9 @@ def generate(self, t: float, dt: float, stage: str) -> MetricFrame:
             feed_rate, stage, material_variation, profile
         )
 
-        # ── 6. spindle_load（状态驱动 + EMA 平滑）────────────────────────────
-        # 慢波相位（约 90 s 周期）和切削相位复用 cycle_phase
-        slow_phase = 2 * math.pi * t / 90.0
+        # ── 6. spindle_load（状态驱动 + ramp + EMA 平滑）────────────────────
         spindle_load = self._calc_spindle_load(
-            profile, stage, material_variation, slow_phase, state.cycle_phase
+            stage, t, dt, state
         )
 
         # ── 7. spindle_current（对 load 有 1~2 tick 滞后）────────────────────
@@ -576,22 +653,20 @@ def _calc_cutting_intensity(
 
     def _calc_spindle_load(
         self,
-        profile: StageProfile,
         stage: str,
-        material_variation: float,
-        slow_phase: float,
-        cut_phase: float,
+        t: float,
+        dt: float,
+        state: GeneratorState,
     ) -> float:
         """
         主轴负载（%）—— 委托给 SpindleLoadGenerator。
-        使用状态驱动基线 + EMA 平滑，避免随机脉冲行为。
+        传入切削计时信息，实现切入/切出 ramp。
         """
         return self._spindle_load_gen.generate(
-            t=0.0,  # t 保留，当前未使用
+            t=t,
             stage=stage,
-            material_variation=material_variation,
-            slow_phase=slow_phase,
-            cut_phase=cut_phase,
+            cutting_elapsed=state.cutting_elapsed,
+            cutting_total=state.cutting_total,
         )
 
     def _calc_spindle_current(
diff --git a/protoforge/protocols/mtconnect/lathe_simulator.py b/protoforge/protocols/mtconnect/lathe_simulator.py
index d3047ca..da974f8 100644
--- a/protoforge/protocols/mtconnect/lathe_simulator.py
+++ b/protoforge/protocols/mtconnect/lathe_simulator.py
@@ -152,14 +152,18 @@ def __call__(self, device_instance: Any) -> None:
         # 2. 确定当前 MetricGenerator 阶段
         stage = self._get_metric_stage()
 
-        # 3. 生成正常加工 MetricFrame（含联动 + 噪声 + clamp）
+        # 3. 把 CUTTING 状态总时长同步给 MetricGenerator（用于 exit_ramp 计算）
+        if self._state == _State.CUTTING:
+            self._metric_gen.state.cutting_total = self._state_duration
+
+        # 4. 生成正常加工 MetricFrame（含联动 + 噪声 + clamp）
         frame = self._metric_gen.generate(t=t, dt=1.0, stage=stage)
 
-        # 4. 把 MetricFrame 写入 device._point_values（MTConnect 标准测点）
+        # 5. 把 MetricFrame 写入 device._point_values（MTConnect 标准测点）
         vals = device_instance._point_values
         self._update_cnc_points(vals, frame)
 
-        # 5. 上报 Prometheus
+        # 6. 上报 Prometheus
         self._emit_prometheus(device_instance, frame)
 
     # ------------------------------------------------------------------
@@ -223,7 +227,7 @@ def _on_spinup(self) -> None:
             self._spindle_actual, self._spindle_target, 0.25
         )
         if self._state_elapsed >= self._state_duration:
-            self._transition(_State.CUTTING, random.uniform(20, 40))
+            self._transition(_State.CUTTING, random.uniform(35, 65))
 
     def _on_cutting(self) -> None:
         noise = random.gauss(0, self._spindle_target * 0.02)

From db323e42993f9b9dd2ce341ade4a23c38225891e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E5=BC=A0=E5=B0=91=E6=99=BA?=
 <zhangshaozhi@bailongma-inc.com>
Date: Tue, 9 Jun 2026 10:50:22 +0800
Subject: [PATCH 46/55] fix

---
 protoforge/core/cnc_metric_generator.py       | 296 +++++++++++++-----
 .../protocols/mtconnect/lathe_simulator.py    |  18 +-
 2 files changed, 225 insertions(+), 89 deletions(-)

diff --git a/protoforge/core/cnc_metric_generator.py b/protoforge/core/cnc_metric_generator.py
index 3478fdf..49d83d3 100644
--- a/protoforge/core/cnc_metric_generator.py
+++ b/protoforge/core/cnc_metric_generator.py
@@ -3,11 +3,10 @@
 =====================================
 
 设计原则：
-  - 所有指标由切削强度 cutting_intensity 统一驱动，禁止各自独立随机。
+  - spindle_speed / spindle_load / spindle_current 由 CncSpindleGenerator 统一驱动。
+  - 生成链路：工艺阶段 → 目标转速 → 实际转速(EMA) → 负载 → 电流。
   - 热惯性模型：tool_temperature 使用一阶 RC 滤波，alpha ≈ 0.04/tick。
-  - 电流滞后：spindle_current 对 spindle_load 有 1~3 tick 的一阶滞后。
   - 磨损单调：tool_wear_value 在切削阶段只增不减。
-  - 噪声比例：roughing > semi_finishing > finishing，稳定性反向。
   - 纯 Python 标准库实现，无第三方依赖。
 
 用法：
@@ -118,6 +117,8 @@ class GeneratorState:
     cutting_elapsed: float = 0.0
     # 当前切削阶段预估总时长（由状态机传入）
     cutting_total: float = 30.0
+    # 状态机内部状态（idle/spinup/cutting/decel/tool_change），用于转速平滑
+    spindle_state: str = "idle"
 
 
 # ---------------------------------------------------------------------------
@@ -315,6 +316,193 @@ def _cutting_target(self, t: float, pcfg: dict, eff_base: float) -> float:
         return eff_base + slow_wave + cut_wave + material_drift + noise
 
 
+# ---------------------------------------------------------------------------
+# CncSpindleGenerator —— spindle_speed / spindle_load / spindle_current 统一联动
+# ---------------------------------------------------------------------------
+
+# 工艺阶段 → 主轴目标转速配置
+_PROCESS_SPEED_CFG: dict[str, dict] = {
+    "rough":        {"target": 2000.0, "noise": 30.0,  "lo": 1800.0, "hi": 2200.0},
+    "semi_finish":  {"target": 3000.0, "noise": 40.0,  "lo": 2800.0, "hi": 3200.0},
+    "finish":       {"target": 4000.0, "noise": 50.0,  "lo": 3800.0, "hi": 4200.0},
+}
+
+# 非切削状态下转速目标（0 = 停止）
+_STATE_SPEED_TARGET: dict[str, float] = {
+    "idle":        0.0,
+    "tool_change": 0.0,
+}
+
+# 各状态的转速 EMA alpha（值越小过渡越慢）
+_SPEED_ALPHA: dict[str, float] = {
+    "idle":        0.20,   # 快速停止
+    "tool_change": 0.22,
+    "spinup":      0.14,   # 平滑升速
+    "cutting":     0.06,   # 稳定运转，微调
+    "decel":       0.18,   # 降速
+}
+
+# 电流模型配置：各工艺的空载基础电流和负载系数
+_PROCESS_CURRENT_CFG: dict[str, dict] = {
+    "rough":       {"base": 3.0, "load_factor": 0.20, "noise": 0.4, "lo": 8.0,  "hi": 20.0},
+    "semi_finish": {"base": 2.5, "load_factor": 0.16, "noise": 0.3, "lo": 5.0,  "hi": 15.0},
+    "finish":      {"base": 2.0, "load_factor": 0.12, "noise": 0.2, "lo": 3.0,  "hi": 10.0},
+}
+
+# 非切削状态的电流配置
+_STATE_CURRENT_CFG: dict[str, dict] = {
+    "idle":        {"base": 0.3,  "noise": 0.15, "lo": 0.0, "hi": 1.0,  "alpha": 0.35},
+    "tool_change": {"base": 0.5,  "noise": 0.2,  "lo": 0.0, "hi": 1.5,  "alpha": 0.30},
+    "spindle_on":  {"base": 3.2,  "noise": 0.4,  "lo": 2.0, "hi": 5.0,  "alpha": 0.20},
+    "air_cut":     {"base": 4.0,  "noise": 0.5,  "lo": 2.5, "hi": 6.0,  "alpha": 0.18},
+}
+
+# 电流 EMA alpha（切削阶段，略慢于负载，体现电气滞后）
+_CURRENT_ALPHA_CUTTING: dict[str, float] = {
+    "entry_cut": 0.10,
+    "cutting":   0.10,
+    "exit_cut":  0.12,
+}
+
+
+def _clamp(v: float, lo: float, hi: float) -> float:
+    return max(lo, min(hi, v))
+
+
+def _ema(prev: float, target: float, alpha: float) -> float:
+    return prev + alpha * (target - prev)
+
+
+class CncSpindleGenerator:
+    """
+    统一驱动 spindle_speed / spindle_load / spindle_current 的联动生成器。
+
+    生成链路：
+        工艺阶段(process) → 目标转速 → 实际转速(EMA) → 负载(SpindleLoadGenerator)
+        → 电流(负载+转速映射)
+
+    stage 参数取值：idle / tool_change / roughing / semi_finishing / finishing
+    spindle_state 参数取值：idle / tool_change / spinup / cutting / decel
+      （由 LatheSimulator 状态机传入，用于控制转速 EMA alpha）
+    """
+
+    def __init__(self, rng: random.Random, load_gen: SpindleLoadGenerator):
+        self._rng = rng
+        self._load_gen = load_gen  # 复用已有的负载生成器
+
+        self.prev_speed: float = 0.0
+        self.prev_current: float = 0.0
+
+    def generate(
+        self,
+        t: float,
+        stage: str,
+        spindle_state: str = "cutting",
+        cutting_elapsed: float = 0.0,
+        cutting_total: float = 30.0,
+    ) -> tuple[float, float, float]:
+        """
+        生成 (spindle_speed, spindle_load, spindle_current)。
+
+        Args:
+            t:               当前时间（秒）。
+            stage:           MetricGenerator 加工阶段。
+            spindle_state:   LatheSimulator 内部状态（idle/spinup/cutting/decel/tool_change）。
+            cutting_elapsed: 切削阶段已过秒数（传给负载生成器）。
+            cutting_total:   切削阶段总时长（传给负载生成器）。
+        """
+        process = _STAGE_TO_PROCESS.get(stage)   # None = 非切削
+
+        # ── 1. 主轴转速 ────────────────────────────────────────────────────
+        speed = self._calc_speed(stage, spindle_state, process)
+
+        # ── 2. 主轴负载（委托 SpindleLoadGenerator）────────────────────────
+        load = self._load_gen.generate(
+            t=t,
+            stage=stage,
+            cutting_elapsed=cutting_elapsed,
+            cutting_total=cutting_total,
+        )
+
+        # ── 3. 主轴电流（由转速 + 负载推导）───────────────────────────────
+        current = self._calc_current(stage, spindle_state, process, speed, load)
+
+        return speed, load, current
+
+    # ------------------------------------------------------------------
+
+    def _calc_speed(self, stage: str, spindle_state: str, process: Optional[str]) -> float:
+        """转速：按工艺目标 + EMA 平滑，非切削时降到 0。"""
+        if process is not None:
+            scfg = _PROCESS_SPEED_CFG[process]
+            noise = self._rng.gauss(0, scfg["noise"])
+            target = scfg["target"] + noise
+            # 首次从停止状态进入切削：直接跳到目标转速附近，避免长收敛期
+            if self.prev_speed < scfg["lo"] * 0.5:
+                self.prev_speed = scfg["target"]
+            alpha = _SPEED_ALPHA.get("cutting", 0.06)
+            lo, hi = scfg["lo"] * 0.92, scfg["hi"] * 1.05
+        elif spindle_state == "spinup":
+            if self.prev_speed > 500:
+                target = min(self.prev_speed * 1.12, 2200.0)
+            else:
+                target = 2000.0
+            alpha = _SPEED_ALPHA.get("spinup", 0.14)
+            lo, hi = 0.0, 2500.0
+        else:
+            target = _STATE_SPEED_TARGET.get(stage, 0.0)
+            alpha = _SPEED_ALPHA.get(spindle_state, 0.20)
+            lo, hi = 0.0, 200.0
+
+        new_speed = _ema(self.prev_speed, target, alpha)
+        new_speed = _clamp(new_speed, lo, hi)
+        self.prev_speed = new_speed
+        return new_speed
+
+    def _calc_current(
+        self,
+        stage: str,
+        spindle_state: str,
+        process: Optional[str],
+        speed: float,
+        load: float,
+    ) -> float:
+        """电流：空载基础 + 负载映射，有轻微 EMA 滞后。"""
+        if process is not None:
+            ccfg = _PROCESS_CURRENT_CFG[process]
+            # 转速修正：实际转速偏低时电流也偏低（恒功率特性简化）
+            speed_ratio = _clamp(speed / _PROCESS_SPEED_CFG[process]["target"], 0.5, 1.1)
+            noise = self._rng.gauss(0, ccfg["noise"])
+            target = (ccfg["base"] + ccfg["load_factor"] * load + noise) * speed_ratio
+
+            # 判断切削子状态（entry/cutting/exit）决定 alpha
+            if self._load_gen._last_stage in _STAGE_TO_PROCESS:
+                pcfg = _PROCESS_CFG[process]
+                time_to_end = (self._load_gen.prev_load > 0 and
+                               hasattr(self._load_gen, '_last_stage'))
+                # 简化：直接用较小 alpha 保持平滑
+                alpha = 0.10
+            else:
+                alpha = 0.10
+
+            lo, hi = ccfg["lo"], ccfg["hi"]
+        else:
+            # 非切削状态
+            state_key = stage if stage in _STATE_CURRENT_CFG else "idle"
+            ccfg = _STATE_CURRENT_CFG[state_key]
+            noise = self._rng.gauss(0, ccfg["noise"])
+            # 转速联动：主轴停止时电流趋近 0
+            speed_factor = _clamp(speed / 100.0, 0.0, 1.0) if speed < 100 else 1.0
+            target = (ccfg["base"] + noise) * speed_factor
+            alpha = ccfg["alpha"]
+            lo, hi = ccfg["lo"], ccfg["hi"]
+
+        new_current = _ema(self.prev_current, target, alpha)
+        new_current = _clamp(new_current, lo, hi)
+        self.prev_current = new_current
+        return new_current
+
+
 # ---------------------------------------------------------------------------
 # 阶段配置
 # ---------------------------------------------------------------------------
@@ -421,14 +609,15 @@ def __init__(
     ):
         self._ambient = ambient_temperature
         self._rng = random.Random(seed)
-        # 热惯性系数（每 tick 向目标温度靠近的比例）
         self._thermal_alpha = thermal_alpha
         self._state = GeneratorState(
             thermal_state=ambient_temperature,
             last_surface_roughness=1.0,
         )
-        # 主轴负载生成器（状态驱动 + EMA 平滑）
+        # 负载生成器（状态驱动 + ramp + EMA）
         self._spindle_load_gen = SpindleLoadGenerator(self._rng)
+        # 主轴联动生成器（speed / load / current 统一驱动）
+        self._spindle_gen = CncSpindleGenerator(self._rng, self._spindle_load_gen)
 
     # ------------------------------------------------------------------
     # 公开 API
@@ -464,57 +653,55 @@ def generate(self, t: float, dt: float, stage: str) -> MetricFrame:
         # ── 1. 材料扰动（慢变量，低频正弦 + 随机游走）──────────────────────
         material_variation = self._calc_material_variation(t, dt, state)
 
-        # ── 2. 切削周期波动 ──────────────────────────────────────────────────
+        # ── 2. 切削周期波动（feed_rate 使用）────────────────────────────────
         cutting_cycle_wave = self._calc_cutting_cycle_wave(t, dt, stage, state, profile)
 
         # ── 3. feed_rate ──────────────────────────────────────────────────────
         feed_rate = self._calc_feed_rate(profile, cutting_cycle_wave, stage)
 
-        # ── 4. spindle_speed ──────────────────────────────────────────────────
-        spindle_speed = self._calc_spindle_speed(profile, stage)
-
-        # ── 5. cutting_intensity（归一化切削强度）────────────────────────────
+        # ── 4. cutting_intensity（供其他指标参考，不再驱动 load）────────────
         cutting_intensity = self._calc_cutting_intensity(
             feed_rate, stage, material_variation, profile
         )
 
-        # ── 6. spindle_load（状态驱动 + ramp + EMA 平滑）────────────────────
-        spindle_load = self._calc_spindle_load(
-            stage, t, dt, state
+        # ── 5. spindle_speed / spindle_load / spindle_current（联动生成）────
+        spindle_speed, spindle_load, spindle_current = self._spindle_gen.generate(
+            t=t,
+            stage=stage,
+            spindle_state=state.spindle_state,
+            cutting_elapsed=state.cutting_elapsed,
+            cutting_total=state.cutting_total,
         )
 
-        # ── 7. spindle_current（对 load 有 1~2 tick 滞后）────────────────────
-        spindle_current = self._calc_spindle_current(profile, spindle_load, state)
-
-        # ── 8. vibration（三轴，各有小幅随机偏差）────────────────────────────
+        # ── 6. vibration（三轴，各有小幅随机偏差）────────────────────────────
         vib_x, vib_y, vib_z = self._calc_vibration(
             profile, spindle_load, feed_rate, stage
         )
 
-        # ── 9. acoustic_emission ─────────────────────────────────────────────
+        # ── 7. acoustic_emission ─────────────────────────────────────────────
         vibration_rms = (vib_x + vib_y + vib_z) / 3.0
         acoustic_emission = self._calc_acoustic(profile, vibration_rms, spindle_load)
 
-        # ── 10. tool_temperature（热惯性模型）────────────────────────────────
+        # ── 8. tool_temperature（热惯性模型）────────────────────────────────
         tool_temperature = self._calc_temperature(
             profile, spindle_load, spindle_current, dt, state
         )
 
-        # ── 11. tool_wear_value（单调递增）────────────────────────────────────
+        # ── 9. tool_wear_value（单调递增）────────────────────────────────────
         tool_wear_value = self._calc_tool_wear(profile, spindle_load, dt, state)
 
-        # ── 12. surface_roughness ─────────────────────────────────────────────
+        # ── 10. surface_roughness ─────────────────────────────────────────────
         surface_roughness = self._calc_surface_roughness(
             profile, vibration_rms, tool_wear_value, stage, state
         )
 
-        # ── 13. 更新滞后缓冲区 ────────────────────────────────────────────────
+        # ── 11. 更新滞后缓冲区 ────────────────────────────────────────────────
         state.load_lag_buffer.pop(0)
         state.load_lag_buffer.append(spindle_load)
         state.last_spindle_load = spindle_load
         state.last_surface_roughness = surface_roughness
 
-        # ── 14. 构造帧 + clamp ────────────────────────────────────────────────
+        # ── 12. 构造帧 + clamp ────────────────────────────────────────────────
         frame = MetricFrame(
             timestamp=t,
             stage=stage,
@@ -614,22 +801,6 @@ def _calc_feed_rate(
         noise = self._rng.gauss(0, base * noise_ratio)
         return max(profile.feed_rate_min, min(profile.feed_rate_max, base + noise))
 
-    def _calc_spindle_speed(self, profile: StageProfile, stage: str) -> float:
-        """
-        主轴转速正常状态下稳定。
-        roughing 允许 2% 波动，finishing 允许 0.8% 波动。
-        """
-        if stage in ("idle", "tool_change"):
-            return self._rng.uniform(profile.spindle_speed_min, profile.spindle_speed_max)
-        noise_pct = {
-            "roughing": 0.020,
-            "semi_finishing": 0.015,
-            "finishing": 0.008,
-        }.get(stage, 0.015)
-        base = profile.spindle_speed_mid
-        noise = self._rng.gauss(0, base * noise_pct)
-        return max(profile.spindle_speed_min, min(profile.spindle_speed_max, base + noise))
-
     def _calc_cutting_intensity(
         self,
         feed_rate: float,
@@ -651,53 +822,6 @@ def _calc_cutting_intensity(
             norm_feed = max(0.0, min(1.0, norm_feed))
         return max(0.0, min(1.0, norm_feed * stage_factor * material_variation))
 
-    def _calc_spindle_load(
-        self,
-        stage: str,
-        t: float,
-        dt: float,
-        state: GeneratorState,
-    ) -> float:
-        """
-        主轴负载（%）—— 委托给 SpindleLoadGenerator。
-        传入切削计时信息，实现切入/切出 ramp。
-        """
-        return self._spindle_load_gen.generate(
-            t=t,
-            stage=stage,
-            cutting_elapsed=state.cutting_elapsed,
-            cutting_total=state.cutting_total,
-        )
-
-    def _calc_spindle_current(
-        self,
-        profile: StageProfile,
-        spindle_load: float,
-        state: GeneratorState,
-    ) -> float:
-        """
-        主轴电流（A），对负载有 1~2 tick 滞后（一阶低通）。
-        current = idle_current + k × lag_load + noise
-        k 由阶段电流范围和负载范围反推。
-        """
-        # 滞后混合：60% 当前负载 + 25% 上一 tick + 15% 两 tick 前
-        lag_load = spindle_load * 0.60 + state.load_lag_buffer[1] * 0.25 + state.load_lag_buffer[0] * 0.15
-        # 线性映射：load_min → current_min，load_max → current_max
-        load_range = profile.spindle_load_max - profile.spindle_load_min
-        current_range = profile.spindle_current_max - profile.spindle_current_min
-        if load_range > 0:
-            k = current_range / load_range
-        else:
-            k = 0.0
-        current_base = profile.spindle_current_min + k * (lag_load - profile.spindle_load_min)
-        noise = self._rng.gauss(
-            0,
-            (profile.spindle_current_max - profile.spindle_current_min)
-            * (1.0 - profile.stability_factor)
-            * 0.03,
-        )
-        return max(profile.spindle_current_min, min(profile.spindle_current_max, current_base + noise))
-
     def _calc_vibration(
         self,
         profile: StageProfile,
diff --git a/protoforge/protocols/mtconnect/lathe_simulator.py b/protoforge/protocols/mtconnect/lathe_simulator.py
index da974f8..79f947e 100644
--- a/protoforge/protocols/mtconnect/lathe_simulator.py
+++ b/protoforge/protocols/mtconnect/lathe_simulator.py
@@ -152,9 +152,20 @@ def __call__(self, device_instance: Any) -> None:
         # 2. 确定当前 MetricGenerator 阶段
         stage = self._get_metric_stage()
 
-        # 3. 把 CUTTING 状态总时长同步给 MetricGenerator（用于 exit_ramp 计算）
+        # 3. 把状态机信息同步给 MetricGenerator
         if self._state == _State.CUTTING:
             self._metric_gen.state.cutting_total = self._state_duration
+        # spindle_state 用于转速 EMA alpha 控制
+        _sm_to_spindle = {
+            _State.IDLE:        "idle",
+            _State.SPINUP:      "spinup",
+            _State.CUTTING:     "cutting",
+            _State.DECEL:       "decel",
+            _State.TOOL_CHANGE: "tool_change",
+            _State.TOOL_BREAK:  "idle",
+            _State.CHIP_WRAP:   "cutting",
+        }
+        self._metric_gen.state.spindle_state = _sm_to_spindle.get(self._state, "idle")
 
         # 4. 生成正常加工 MetricFrame（含联动 + 噪声 + clamp）
         frame = self._metric_gen.generate(t=t, dt=1.0, stage=stage)
@@ -216,11 +227,12 @@ def _on_idle(self) -> None:
         self._condition_native_code = ""
         self._wrap_load_increment = 0.0
         if self._state_elapsed >= self._state_duration:
-            self._spindle_target = random.uniform(800, 2500)
+            # 目标转速按即将开始的切削工艺设定（粗加工 2000 RPM）
+            self._spindle_target = 2000.0
             self._program_line = 1
             self._block_idx = 0
             self._cutting_stage = "roughing"
-            self._transition(_State.SPINUP, random.uniform(3, 6))
+            self._transition(_State.SPINUP, random.uniform(4, 8))
 
     def _on_spinup(self) -> None:
         self._spindle_actual = self._smooth(

From cb3b770cf1c25864fed939657fcd670ce6dffc1d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E5=BC=A0=E5=B0=91=E6=99=BA?=
 <zhangshaozhi@bailongma-inc.com>
Date: Tue, 9 Jun 2026 11:36:44 +0800
Subject: [PATCH 47/55] fix

---
 protoforge/core/cnc_metric_generator.py       |  68 ++++++++---
 .../protocols/mtconnect/lathe_simulator.py    | 110 +++++++++++++++---
 2 files changed, 148 insertions(+), 30 deletions(-)

diff --git a/protoforge/core/cnc_metric_generator.py b/protoforge/core/cnc_metric_generator.py
index 49d83d3..097e616 100644
--- a/protoforge/core/cnc_metric_generator.py
+++ b/protoforge/core/cnc_metric_generator.py
@@ -119,6 +119,9 @@ class GeneratorState:
     cutting_total: float = 30.0
     # 状态机内部状态（idle/spinup/cutting/decel/tool_change），用于转速平滑
     spindle_state: str = "idle"
+    # 任务级状态：process_running = 主轴保持目标转速；idle = 主轴可以停
+    # 由 LatheSimulator 的 _STATE_TO_TASK 映射传入
+    task_state: str = "idle"
 
 
 # ---------------------------------------------------------------------------
@@ -392,6 +395,8 @@ def __init__(self, rng: random.Random, load_gen: SpindleLoadGenerator):
 
         self.prev_speed: float = 0.0
         self.prev_current: float = 0.0
+        # 最近一次切削工艺目标转速（air_cut 间隙保持用）
+        self._last_process_speed: float = 2000.0
 
     def generate(
         self,
@@ -400,6 +405,7 @@ def generate(
         spindle_state: str = "cutting",
         cutting_elapsed: float = 0.0,
         cutting_total: float = 30.0,
+        task_state: str = "idle",
     ) -> tuple[float, float, float]:
         """
         生成 (spindle_speed, spindle_load, spindle_current)。
@@ -410,11 +416,13 @@ def generate(
             spindle_state:   LatheSimulator 内部状态（idle/spinup/cutting/decel/tool_change）。
             cutting_elapsed: 切削阶段已过秒数（传给负载生成器）。
             cutting_total:   切削阶段总时长（传给负载生成器）。
+            task_state:      任务级状态（process_running/idle）。
+                             process_running 时主轴保持目标转速，即使 stage=idle（air_cut 间隙）。
         """
         process = _STAGE_TO_PROCESS.get(stage)   # None = 非切削
 
         # ── 1. 主轴转速 ────────────────────────────────────────────────────
-        speed = self._calc_speed(stage, spindle_state, process)
+        speed = self._calc_speed(stage, spindle_state, process, task_state)
 
         # ── 2. 主轴负载（委托 SpindleLoadGenerator）────────────────────────
         load = self._load_gen.generate(
@@ -425,31 +433,59 @@ def generate(
         )
 
         # ── 3. 主轴电流（由转速 + 负载推导）───────────────────────────────
-        current = self._calc_current(stage, spindle_state, process, speed, load)
+        current = self._calc_current(stage, spindle_state, process, speed, load, task_state)
 
         return speed, load, current
 
     # ------------------------------------------------------------------
 
-    def _calc_speed(self, stage: str, spindle_state: str, process: Optional[str]) -> float:
-        """转速：按工艺目标 + EMA 平滑，非切削时降到 0。"""
+    def _calc_speed(
+        self,
+        stage: str,
+        spindle_state: str,
+        process: Optional[str],
+        task_state: str = "idle",
+    ) -> float:
+        """
+        转速：按工艺目标 + EMA 平滑，非切削时降到 0。
+
+        task_state="process_running" 时，即使 stage=idle（air_cut 间隙），
+        主轴也保持最近的切削工艺目标转速，不降到 0。
+        只有 task_state="idle" 时才允许主轴停转。
+        """
         if process is not None:
+            # 切削阶段：按工艺目标转速
             scfg = _PROCESS_SPEED_CFG[process]
             noise = self._rng.gauss(0, scfg["noise"])
             target = scfg["target"] + noise
             # 首次从停止状态进入切削：直接跳到目标转速附近，避免长收敛期
             if self.prev_speed < scfg["lo"] * 0.5:
                 self.prev_speed = scfg["target"]
+                # 记录本轮使用的工艺目标（供 air_cut 保持用）
+                self._last_process_speed = scfg["target"]
+            else:
+                self._last_process_speed = scfg["target"]
             alpha = _SPEED_ALPHA.get("cutting", 0.06)
             lo, hi = scfg["lo"] * 0.92, scfg["hi"] * 1.05
         elif spindle_state == "spinup":
+            # 升速阶段：向目标转速爬升
             if self.prev_speed > 500:
                 target = min(self.prev_speed * 1.12, 2200.0)
             else:
                 target = 2000.0
             alpha = _SPEED_ALPHA.get("spinup", 0.14)
             lo, hi = 0.0, 2500.0
+        elif task_state == "process_running":
+            # 任务运行中的非切削间隙（air_cut / decel_cycle）：主轴保持转速
+            # 目标是最近一次切削工艺的目标转速，略加噪声
+            base_target = getattr(self, "_last_process_speed", 2000.0)
+            noise = self._rng.gauss(0, base_target * 0.008)
+            target = base_target + noise
+            alpha = _SPEED_ALPHA.get("cutting", 0.06)   # 保持稳定
+            lo = base_target * 0.90
+            hi = base_target * 1.10
         else:
+            # 任务级停机（idle / tool_change / 故障）：主轴降到 0
             target = _STATE_SPEED_TARGET.get(stage, 0.0)
             alpha = _SPEED_ALPHA.get(spindle_state, 0.20)
             lo, hi = 0.0, 200.0
@@ -466,6 +502,7 @@ def _calc_current(
         process: Optional[str],
         speed: float,
         load: float,
+        task_state: str = "idle",
     ) -> float:
         """电流：空载基础 + 负载映射，有轻微 EMA 滞后。"""
         if process is not None:
@@ -474,20 +511,18 @@ def _calc_current(
             speed_ratio = _clamp(speed / _PROCESS_SPEED_CFG[process]["target"], 0.5, 1.1)
             noise = self._rng.gauss(0, ccfg["noise"])
             target = (ccfg["base"] + ccfg["load_factor"] * load + noise) * speed_ratio
-
-            # 判断切削子状态（entry/cutting/exit）决定 alpha
-            if self._load_gen._last_stage in _STAGE_TO_PROCESS:
-                pcfg = _PROCESS_CFG[process]
-                time_to_end = (self._load_gen.prev_load > 0 and
-                               hasattr(self._load_gen, '_last_stage'))
-                # 简化：直接用较小 alpha 保持平滑
-                alpha = 0.10
-            else:
-                alpha = 0.10
-
+            alpha = 0.10
+            lo, hi = ccfg["lo"], ccfg["hi"]
+        elif task_state == "process_running":
+            # air_cut / decel_cycle：主轴空转，电流略低于切削
+            ccfg = _STATE_CURRENT_CFG["air_cut"]
+            noise = self._rng.gauss(0, ccfg["noise"])
+            speed_factor = _clamp(speed / 2000.0, 0.5, 1.2)
+            target = (ccfg["base"] + noise) * speed_factor
+            alpha = ccfg["alpha"]
             lo, hi = ccfg["lo"], ccfg["hi"]
         else:
-            # 非切削状态
+            # 非切削状态（idle / tool_change）
             state_key = stage if stage in _STATE_CURRENT_CFG else "idle"
             ccfg = _STATE_CURRENT_CFG[state_key]
             noise = self._rng.gauss(0, ccfg["noise"])
@@ -671,6 +706,7 @@ def generate(self, t: float, dt: float, stage: str) -> MetricFrame:
             spindle_state=state.spindle_state,
             cutting_elapsed=state.cutting_elapsed,
             cutting_total=state.cutting_total,
+            task_state=getattr(state, "task_state", "idle"),
         )
 
         # ── 6. vibration（三轴，各有小幅随机偏差）────────────────────────────
diff --git a/protoforge/protocols/mtconnect/lathe_simulator.py b/protoforge/protocols/mtconnect/lathe_simulator.py
index 79f947e..117bcfb 100644
--- a/protoforge/protocols/mtconnect/lathe_simulator.py
+++ b/protoforge/protocols/mtconnect/lathe_simulator.py
@@ -3,10 +3,19 @@
 
 仿真 CNC 车床通过 MTConnect 协议能真实输出的信号。
 
-工作周期：
-  IDLE → SPINUP → CUTTING → DECEL → TOOL_CHANGE → IDLE
-                     ↓ (偶发，两种故障路径)
-             TOOL_BREAK / CHIP_WRAP → TOOL_CHANGE → IDLE
+工作周期（任务级）：
+  IDLE → SPINUP → [切削大循环] → SPINDOWN → TOOL_CHANGE → IDLE
+
+切削大循环（周期级，主轴保持转速）：
+  AIR_CUT → CUTTING → DECEL_CYCLE → AIR_CUT → ...（循环 N 次后退出）
+                ↓ (偶发，两种故障路径)
+        TOOL_BREAK / CHIP_WRAP → TOOL_CHANGE → IDLE
+
+关键设计：
+  - AIR_CUT 状态：主轴已启动，快速定位中，主轴转速保持目标值
+  - CUTTING 和 AIR_CUT 都属于 task_state="process_running"，主轴不停
+  - 只有 IDLE / TOOL_CHANGE / 故障恢复 时 task_state="idle"，主轴才降到 0
+  - 每完成 cycles_per_task 个切削周期后才真正回到 IDLE（换刀或停机）
 
 每个 tick 的处理流程：
   1. 状态机推进（确定当前 stage）
@@ -41,8 +50,10 @@
 class _State(Enum):
     IDLE = "idle"
     SPINUP = "spinup"
+    AIR_CUT = "air_cut"        # 主轴运转，快速定位，不切削
     CUTTING = "cutting"
-    DECEL = "decel"
+    DECEL_CYCLE = "decel_cycle"  # 周期级减速（主轴保持转速，只减进给）
+    DECEL = "decel"              # 任务级降速（主轴降到 0）
     TOOL_CHANGE = "tool_change"
     TOOL_BREAK = "tool_break"
     CHIP_WRAP = "chip_wrap"
@@ -52,13 +63,28 @@ class _State(Enum):
 _STATE_TO_STAGE: dict[_State, str] = {
     _State.IDLE:        "idle",
     _State.SPINUP:      "idle",
+    _State.AIR_CUT:     "idle",        # air_cut 阶段负载模型用 idle，但主轴不停
     _State.CUTTING:     "roughing",    # 默认粗加工，子阶段由 _cutting_stage 动态切换
-    _State.DECEL:       "idle",
+    _State.DECEL_CYCLE: "idle",        # 周期间减速，主轴不停
+    _State.DECEL:       "idle",        # 任务级降速
     _State.TOOL_CHANGE: "tool_change",
     _State.TOOL_BREAK:  "idle",
     _State.CHIP_WRAP:   "roughing",
 }
 
+# task_state 映射：process_running = 主轴保持，idle = 主轴可以停
+_STATE_TO_TASK: dict[_State, str] = {
+    _State.IDLE:        "idle",
+    _State.SPINUP:      "process_running",
+    _State.AIR_CUT:     "process_running",
+    _State.CUTTING:     "process_running",
+    _State.DECEL_CYCLE: "process_running",
+    _State.DECEL:       "idle",
+    _State.TOOL_CHANGE: "idle",
+    _State.TOOL_BREAK:  "idle",
+    _State.CHIP_WRAP:   "process_running",
+}
+
 # 刀塔配置（刀位号, 刀具ID）
 _TOOL_TABLE = [
     (1, "T01"),   # 外圆粗车刀
@@ -129,6 +155,11 @@ def __init__(self):
         # 当前切削子阶段（roughing/semi_finishing/finishing）
         self._cutting_stage = "roughing"
 
+        # 当前任务内已完成的切削周期数（达到上限后才真正停机）
+        self._cycles_in_task = 0
+        # 每个任务包含多少个切削周期（随机 3~6），到达后进入真正 IDLE
+        self._cycles_per_task = random.randint(3, 6)
+
         # tick 计数，用于传入 BaseMetricGenerator 的 t
         self._tick_count = 0
 
@@ -155,11 +186,14 @@ def __call__(self, device_instance: Any) -> None:
         # 3. 把状态机信息同步给 MetricGenerator
         if self._state == _State.CUTTING:
             self._metric_gen.state.cutting_total = self._state_duration
+
         # spindle_state 用于转速 EMA alpha 控制
         _sm_to_spindle = {
             _State.IDLE:        "idle",
             _State.SPINUP:      "spinup",
+            _State.AIR_CUT:     "cutting",   # air_cut 保持转速（cutting alpha）
             _State.CUTTING:     "cutting",
+            _State.DECEL_CYCLE: "cutting",   # 周期间不降速
             _State.DECEL:       "decel",
             _State.TOOL_CHANGE: "tool_change",
             _State.TOOL_BREAK:  "idle",
@@ -167,6 +201,10 @@ def __call__(self, device_instance: Any) -> None:
         }
         self._metric_gen.state.spindle_state = _sm_to_spindle.get(self._state, "idle")
 
+        # task_state：process_running = 主轴保持目标转速；idle = 主轴可以停
+        task_state = _STATE_TO_TASK.get(self._state, "idle")
+        self._metric_gen.state.task_state = task_state
+
         # 4. 生成正常加工 MetricFrame（含联动 + 噪声 + clamp）
         frame = self._metric_gen.generate(t=t, dt=1.0, stage=stage)
 
@@ -189,7 +227,9 @@ def _step_state_machine(self) -> None:
         dispatch = {
             _State.IDLE:        self._on_idle,
             _State.SPINUP:      self._on_spinup,
+            _State.AIR_CUT:     self._on_air_cut,
             _State.CUTTING:     self._on_cutting,
+            _State.DECEL_CYCLE: self._on_decel_cycle,
             _State.DECEL:       self._on_decel,
             _State.TOOL_CHANGE: self._on_tool_change,
             _State.TOOL_BREAK:  self._on_tool_break,
@@ -227,17 +267,33 @@ def _on_idle(self) -> None:
         self._condition_native_code = ""
         self._wrap_load_increment = 0.0
         if self._state_elapsed >= self._state_duration:
-            # 目标转速按即将开始的切削工艺设定（粗加工 2000 RPM）
+            # 开始新任务：主轴升速目标转速（粗加工 2000 RPM）
             self._spindle_target = 2000.0
             self._program_line = 1
             self._block_idx = 0
             self._cutting_stage = "roughing"
+            self._cycles_in_task = 0
+            self._cycles_per_task = random.randint(3, 6)
             self._transition(_State.SPINUP, random.uniform(4, 8))
 
     def _on_spinup(self) -> None:
         self._spindle_actual = self._smooth(
             self._spindle_actual, self._spindle_target, 0.25
         )
+        if self._state_elapsed >= self._state_duration:
+            self._transition(_State.AIR_CUT, random.uniform(3, 6))
+
+    def _on_air_cut(self) -> None:
+        """主轴运转，快速定位，不切削。主轴转速保持目标值。"""
+        noise = random.gauss(0, self._spindle_target * 0.01)
+        self._spindle_actual = max(
+            self._spindle_target * 0.95,
+            min(self._spindle_target * 1.05, self._spindle_actual + noise),
+        )
+        self._feed_actual = 0.0
+        # 快速移动回到起刀点
+        self._x_pos = self._smooth(self._x_pos, 50.0, 0.30)
+        self._z_pos = self._smooth(self._z_pos, 2.0, 0.30)
         if self._state_elapsed >= self._state_duration:
             self._transition(_State.CUTTING, random.uniform(35, 65))
 
@@ -272,20 +328,41 @@ def _on_cutting(self) -> None:
                 return
 
         if self._state_elapsed >= self._state_duration:
-            self._transition(_State.DECEL, random.uniform(3, 5))
+            # 周期结束：进入 DECEL_CYCLE（主轴保持转速，只停进给）
+            self._transition(_State.DECEL_CYCLE, random.uniform(2, 4))
+
+    def _on_decel_cycle(self) -> None:
+        """
+        周期级减速：只停进给，主轴转速保持。
+        结束后：若任务周期未满，回到 AIR_CUT；若满了，进入任务级 DECEL。
+        """
+        self._feed_actual = self._smooth(self._feed_actual, 0.0, 0.40)
+        # 主轴保持转速（微小噪声）
+        noise = random.gauss(0, self._spindle_target * 0.01)
+        self._spindle_actual = max(
+            self._spindle_target * 0.95,
+            min(self._spindle_target * 1.05, self._spindle_actual + noise),
+        )
+        if self._state_elapsed >= self._state_duration:
+            self._cycles_in_task += 1
+            self._part_count += 1
+            if self._cycles_in_task >= self._cycles_per_task:
+                # 任务周期完成：进行真正的降速停机
+                if self._part_count % 5 == 0:
+                    self._metric_gen.reset_wear()
+                self._transition(_State.DECEL, random.uniform(3, 5))
+            else:
+                # 继续下一个切削周期：回到 AIR_CUT
+                self._transition(_State.AIR_CUT, random.uniform(3, 6))
 
     def _on_decel(self) -> None:
+        """任务级降速：主轴降到 0，准备换刀或停机。"""
         self._spindle_actual = self._smooth(self._spindle_actual, 0.0, 0.20)
         self._feed_actual = self._smooth(self._feed_actual, 0.0, 0.30)
         self._x_pos = self._smooth(self._x_pos, 150.0, 0.20)
         self._z_pos = self._smooth(self._z_pos, 50.0, 0.20)
         if self._state_elapsed >= self._state_duration:
-            self._part_count += 1
-            if self._part_count % 5 == 0:
-                self._metric_gen.reset_wear()
-                self._transition(_State.TOOL_CHANGE, random.uniform(4, 8))
-            else:
-                self._transition(_State.IDLE, random.uniform(3, 6))
+            self._transition(_State.TOOL_CHANGE, random.uniform(4, 8))
 
     def _on_tool_change(self) -> None:
         self._spindle_actual = 0.0
@@ -333,10 +410,12 @@ def _update_cnc_points(self, vals: dict[str, Any], frame) -> None:
         """
         state = self._state
         is_cutting = state == _State.CUTTING
+        is_air_cut = state == _State.AIR_CUT
         is_tool_break = state == _State.TOOL_BREAK
         is_chip_wrap = state == _State.CHIP_WRAP
         is_fault = is_tool_break or is_chip_wrap
         is_tool_change = state == _State.TOOL_CHANGE
+        is_decel_cycle = state == _State.DECEL_CYCLE
 
         cur_tool_no, cur_tool_id = _TOOL_TABLE[self._tool_idx]
 
@@ -355,6 +434,9 @@ def _update_cnc_points(self, vals: dict[str, Any], frame) -> None:
         elif state == _State.IDLE:
             vals["execution"] = "READY"
             vals["controller_mode"] = "AUTOMATIC"
+        elif is_air_cut or is_decel_cycle:
+            vals["execution"] = "ACTIVE"
+            vals["controller_mode"] = "AUTOMATIC"
         else:
             vals["execution"] = "ACTIVE"
             vals["controller_mode"] = "AUTOMATIC"

From c618fc56036b1849df403212c7d9e745240604a7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E5=BC=A0=E5=B0=91=E6=99=BA?=
 <zhangshaozhi@bailongma-inc.com>
Date: Tue, 9 Jun 2026 13:55:38 +0800
Subject: [PATCH 48/55] fix

---
 protoforge/core/cnc_metric_generator.py       | 262 +++++++++++++-----
 .../protocols/mtconnect/lathe_simulator.py    |  26 +-
 2 files changed, 196 insertions(+), 92 deletions(-)

diff --git a/protoforge/core/cnc_metric_generator.py b/protoforge/core/cnc_metric_generator.py
index 097e616..457d654 100644
--- a/protoforge/core/cnc_metric_generator.py
+++ b/protoforge/core/cnc_metric_generator.py
@@ -122,6 +122,9 @@ class GeneratorState:
     # 任务级状态：process_running = 主轴保持目标转速；idle = 主轴可以停
     # 由 LatheSimulator 的 _STATE_TO_TASK 映射传入
     task_state: str = "idle"
+    # 加工周期状态：air_cut / entry_cut / cutting / exit_cut
+    # cycle_state 只描述负载形态，不控制主轴启停或转速档位
+    cycle_state: str = "air_cut"
 
 
 # ---------------------------------------------------------------------------
@@ -394,9 +397,30 @@ def __init__(self, rng: random.Random, load_gen: SpindleLoadGenerator):
         self._load_gen = load_gen  # 复用已有的负载生成器
 
         self.prev_speed: float = 0.0
+        self.prev_load: float = 0.0
         self.prev_current: float = 0.0
-        # 最近一次切削工艺目标转速（air_cut 间隙保持用）
-        self._last_process_speed: float = 2000.0
+        self.process: str = "rough"
+
+        self.current_cycle_id: Optional[str] = None
+        self.cycle_factor: float = 1.0
+        self.phase1: float = 0.0
+        self.phase2: float = 0.0
+        self.material_phase: float = 0.0
+        self.entry_ramp_seconds: float = 6.0
+        self.exit_ramp_seconds: float = 4.5
+        self._cycle_cutting_load: float = 55.0
+
+    def start_new_cycle(self, cycle_id: str) -> None:
+        """每个 rough 切削周期只刷新一次周期级扰动参数。"""
+        if cycle_id == self.current_cycle_id:
+            return
+        self.current_cycle_id = cycle_id
+        self.cycle_factor = self._rng.uniform(0.92, 1.08)
+        self.phase1 = self._rng.uniform(0, 2 * math.pi)
+        self.phase2 = self._rng.uniform(0, 2 * math.pi)
+        self.material_phase = self._rng.uniform(0, 2 * math.pi)
+        self.entry_ramp_seconds = self._rng.uniform(4.0, 8.0)
+        self.exit_ramp_seconds = self._rng.uniform(3.0, 6.0)
 
     def generate(
         self,
@@ -419,21 +443,27 @@ def generate(
             task_state:      任务级状态（process_running/idle）。
                              process_running 时主轴保持目标转速，即使 stage=idle（air_cut 间隙）。
         """
-        process = _STAGE_TO_PROCESS.get(stage)   # None = 非切削
+        # 本轮优化固定为 rough 正常工况；stage 仍原样写入 MetricFrame/标签。
+        process = "rough"
+        cycle_state = self._resolve_cycle_state(stage, task_state, cutting_elapsed, cutting_total)
+        cycle_id = self._resolve_cycle_id(t, stage, task_state, cutting_elapsed, cutting_total)
+        self.start_new_cycle(cycle_id)
 
         # ── 1. 主轴转速 ────────────────────────────────────────────────────
         speed = self._calc_speed(stage, spindle_state, process, task_state)
 
-        # ── 2. 主轴负载（委托 SpindleLoadGenerator）────────────────────────
-        load = self._load_gen.generate(
+        # 保持旧负载生成器的周期状态同步，避免其他调用路径依赖其内部状态。
+        self._load_gen.generate(
             t=t,
             stage=stage,
             cutting_elapsed=cutting_elapsed,
             cutting_total=cutting_total,
         )
+        # ── 2. 主轴负载（rough 正常工况，cycle_state 只影响负载形态）───────
+        load = self._calc_rough_load(t, speed, task_state, cycle_state, cutting_elapsed, cutting_total)
 
         # ── 3. 主轴电流（由转速 + 负载推导）───────────────────────────────
-        current = self._calc_current(stage, spindle_state, process, speed, load, task_state)
+        current = self._calc_current(stage, spindle_state, process, speed, load, task_state, cycle_state)
 
         return speed, load, current
 
@@ -447,54 +477,129 @@ def _calc_speed(
         task_state: str = "idle",
     ) -> float:
         """
-        转速：按工艺目标 + EMA 平滑，非切削时降到 0。
-
-        task_state="process_running" 时，即使 stage=idle（air_cut 间隙），
-        主轴也保持最近的切削工艺目标转速，不降到 0。
-        只有 task_state="idle" 时才允许主轴停转。
+        转速只由任务级状态控制启停；rough 周期状态不切换转速档位。
         """
-        if process is not None:
-            # 切削阶段：按工艺目标转速
-            scfg = _PROCESS_SPEED_CFG[process]
-            noise = self._rng.gauss(0, scfg["noise"])
-            target = scfg["target"] + noise
-            # 首次从停止状态进入切削：直接跳到目标转速附近，避免长收敛期
-            if self.prev_speed < scfg["lo"] * 0.5:
-                self.prev_speed = scfg["target"]
-                # 记录本轮使用的工艺目标（供 air_cut 保持用）
-                self._last_process_speed = scfg["target"]
-            else:
-                self._last_process_speed = scfg["target"]
-            alpha = _SPEED_ALPHA.get("cutting", 0.06)
-            lo, hi = scfg["lo"] * 0.92, scfg["hi"] * 1.05
-        elif spindle_state == "spinup":
-            # 升速阶段：向目标转速爬升
-            if self.prev_speed > 500:
-                target = min(self.prev_speed * 1.12, 2200.0)
-            else:
-                target = 2000.0
-            alpha = _SPEED_ALPHA.get("spinup", 0.14)
-            lo, hi = 0.0, 2500.0
-        elif task_state == "process_running":
-            # 任务运行中的非切削间隙（air_cut / decel_cycle）：主轴保持转速
-            # 目标是最近一次切削工艺的目标转速，略加噪声
-            base_target = getattr(self, "_last_process_speed", 2000.0)
-            noise = self._rng.gauss(0, base_target * 0.008)
-            target = base_target + noise
-            alpha = _SPEED_ALPHA.get("cutting", 0.06)   # 保持稳定
-            lo = base_target * 0.90
-            hi = base_target * 1.10
+        if task_state in ("idle", "spindle_off", "tool_change"):
+            target = 0.0
+            alpha = self._rng.uniform(0.12, 0.25)
         else:
-            # 任务级停机（idle / tool_change / 故障）：主轴降到 0
-            target = _STATE_SPEED_TARGET.get(stage, 0.0)
-            alpha = _SPEED_ALPHA.get(spindle_state, 0.20)
-            lo, hi = 0.0, 200.0
+            target = 2000.0
+            if spindle_state == "spinup" or self.prev_speed < 1750.0:
+                alpha = self._rng.uniform(0.10, 0.18)
+            else:
+                alpha = self._rng.uniform(0.03, 0.08)
 
         new_speed = _ema(self.prev_speed, target, alpha)
-        new_speed = _clamp(new_speed, lo, hi)
+        if task_state not in ("idle", "spindle_off", "tool_change") and new_speed > 1750.0:
+            new_speed += self._rng.uniform(-30.0, 30.0)
+        new_speed = _clamp(new_speed, 0.0, 2200.0)
         self.prev_speed = new_speed
         return new_speed
 
+    def _resolve_cycle_state(
+        self,
+        stage: str,
+        task_state: str,
+        cutting_elapsed: float,
+        cutting_total: float,
+    ) -> str:
+        if task_state != "process_running":
+            return "air_cut"
+        if stage not in _STAGE_TO_PROCESS:
+            return "air_cut"
+
+        entry_s = max(self.entry_ramp_seconds, 0.1)
+        exit_s = max(self.exit_ramp_seconds, 0.1)
+        time_to_end = cutting_total - cutting_elapsed
+        if cutting_elapsed <= entry_s:
+            return "entry_cut"
+        if cutting_elapsed > entry_s and time_to_end <= exit_s:
+            return "exit_cut"
+        return "cutting"
+
+    def _resolve_cycle_id(
+        self,
+        t: float,
+        stage: str,
+        task_state: str,
+        cutting_elapsed: float,
+        cutting_total: float,
+    ) -> str:
+        if task_state != "process_running":
+            return "stopped"
+        if stage not in _STAGE_TO_PROCESS:
+            return self.current_cycle_id or "air_cut"
+        cycle_start = t - cutting_elapsed
+        return f"rough:{cycle_start:.0f}:{cutting_total:.0f}"
+
+    def _air_cut_load_target(self, t: float) -> float:
+        target = 7.0 + math.sin(t * 0.20) * 1.5 + self._rng.uniform(-0.8, 0.8)
+        return _clamp(target, 5.0, 12.0)
+
+    def _rough_cutting_load_target(self, t: float) -> float:
+        effective_base = 55.0 * self.cycle_factor
+        slow_wave = math.sin(t * 0.10 + self.phase1) * 5.0
+        cutting_wave = math.sin(t * 0.75 + self.phase2) * 2.5
+        material_drift = math.sin(t * 0.03 + self.material_phase) * 4.0
+        small_noise = self._rng.uniform(-2.0, 2.0)
+        target = effective_base + slow_wave + cutting_wave + material_drift + small_noise
+        return _clamp(target, 35.0, 82.0)
+
+    def _calc_rough_load(
+        self,
+        t: float,
+        speed: float,
+        task_state: str,
+        cycle_state: str,
+        cutting_elapsed: float,
+        cutting_total: float,
+    ) -> float:
+        if speed <= 50.0:
+            target = self._rng.uniform(0.0, 2.0)
+            alpha = self._rng.uniform(0.30, 0.45)
+            lo, hi = 0.0, 2.0
+        elif task_state == "process_running":
+            air_load = self._air_cut_load_target(t)
+            cutting_target = self._rough_cutting_load_target(t)
+            self._cycle_cutting_load = cutting_target
+
+            if cycle_state == "air_cut":
+                target = air_load
+                alpha = self._rng.uniform(0.18, 0.25)
+                lo, hi = 5.0, 12.0
+            elif cycle_state == "entry_cut":
+                ratio = _clamp(cutting_elapsed / max(self.entry_ramp_seconds, 0.1), 0.0, 1.0)
+                target = air_load + (cutting_target - air_load) * ratio
+                alpha = self._rng.uniform(0.08, 0.14)
+                lo, hi = 5.0, 82.0
+            elif cycle_state == "cutting":
+                target = cutting_target
+                alpha = self._rng.uniform(0.08, 0.15)
+                lo, hi = 35.0, 82.0
+            elif cycle_state == "exit_cut":
+                exit_elapsed = max(0.0, self.exit_ramp_seconds - (cutting_total - cutting_elapsed))
+                ratio = _clamp(exit_elapsed / max(self.exit_ramp_seconds, 0.1), 0.0, 1.0)
+                target = self._cycle_cutting_load * (1.0 - ratio) + air_load * ratio
+                alpha = self._rng.uniform(0.10, 0.18)
+                lo, hi = 5.0, 82.0
+            else:
+                target = air_load
+                alpha = self._rng.uniform(0.18, 0.25)
+                lo, hi = 5.0, 12.0
+        else:
+            target = self._rng.uniform(0.0, 2.0)
+            alpha = self._rng.uniform(0.25, 0.40)
+            lo, hi = 0.0, 2.0
+
+        new_load = _ema(self.prev_load, target, alpha)
+        if speed > 50.0 and task_state == "process_running":
+            min_load = 5.0 if cycle_state in ("air_cut", "entry_cut", "exit_cut") else 35.0
+            new_load = _clamp(new_load, min_load, hi)
+        else:
+            new_load = _clamp(new_load, lo, hi)
+        self.prev_load = new_load
+        return new_load
+
     def _calc_current(
         self,
         stage: str,
@@ -503,34 +608,33 @@ def _calc_current(
         speed: float,
         load: float,
         task_state: str = "idle",
+        cycle_state: str = "air_cut",
     ) -> float:
-        """电流：空载基础 + 负载映射，有轻微 EMA 滞后。"""
-        if process is not None:
-            ccfg = _PROCESS_CURRENT_CFG[process]
-            # 转速修正：实际转速偏低时电流也偏低（恒功率特性简化）
-            speed_ratio = _clamp(speed / _PROCESS_SPEED_CFG[process]["target"], 0.5, 1.1)
-            noise = self._rng.gauss(0, ccfg["noise"])
-            target = (ccfg["base"] + ccfg["load_factor"] * load + noise) * speed_ratio
-            alpha = 0.10
-            lo, hi = ccfg["lo"], ccfg["hi"]
-        elif task_state == "process_running":
-            # air_cut / decel_cycle：主轴空转，电流略低于切削
-            ccfg = _STATE_CURRENT_CFG["air_cut"]
-            noise = self._rng.gauss(0, ccfg["noise"])
-            speed_factor = _clamp(speed / 2000.0, 0.5, 1.2)
-            target = (ccfg["base"] + noise) * speed_factor
-            alpha = ccfg["alpha"]
-            lo, hi = ccfg["lo"], ccfg["hi"]
+        """电流由主轴转速和负载推导，避免独立随机曲线。"""
+        if speed <= 50.0:
+            target = self._rng.uniform(0.0, 0.8)
+            alpha = self._rng.uniform(0.25, 0.40)
+            lo, hi = 0.0, 0.8
+        elif cycle_state == "air_cut":
+            target = 3.5 + load * 0.12 + self._rng.uniform(-0.4, 0.4)
+            alpha = self._rng.uniform(0.15, 0.25)
+            lo, hi = 2.5, 6.0
+        elif cycle_state == "entry_cut":
+            target = 3.0 + load * 0.17 + self._rng.uniform(-0.5, 0.5)
+            alpha = self._rng.uniform(0.08, 0.16)
+            lo, hi = 2.5, 17.0
+        elif cycle_state == "cutting":
+            target = 3.0 + load * 0.18 + self._rng.uniform(-0.6, 0.6)
+            alpha = self._rng.uniform(0.08, 0.15)
+            lo, hi = 10.0, 17.0
+        elif cycle_state == "exit_cut":
+            target = 3.0 + load * 0.16 + self._rng.uniform(-0.5, 0.5)
+            alpha = self._rng.uniform(0.10, 0.20)
+            lo, hi = 2.5, 17.0
         else:
-            # 非切削状态（idle / tool_change）
-            state_key = stage if stage in _STATE_CURRENT_CFG else "idle"
-            ccfg = _STATE_CURRENT_CFG[state_key]
-            noise = self._rng.gauss(0, ccfg["noise"])
-            # 转速联动：主轴停止时电流趋近 0
-            speed_factor = _clamp(speed / 100.0, 0.0, 1.0) if speed < 100 else 1.0
-            target = (ccfg["base"] + noise) * speed_factor
-            alpha = ccfg["alpha"]
-            lo, hi = ccfg["lo"], ccfg["hi"]
+            target = 3.0 + load * 0.12 + self._rng.uniform(-0.4, 0.4)
+            alpha = self._rng.uniform(0.15, 0.25)
+            lo, hi = 2.5, 6.0
 
         new_current = _ema(self.prev_current, target, alpha)
         new_current = _clamp(new_current, lo, hi)
@@ -700,13 +804,21 @@ def generate(self, t: float, dt: float, stage: str) -> MetricFrame:
         )
 
         # ── 5. spindle_speed / spindle_load / spindle_current（联动生成）────
+        task_state = getattr(state, "task_state", "idle")
+        spindle_state = state.spindle_state
+        if stage in _STAGE_TO_PROCESS and task_state == "idle":
+            # 兼容直接调用 BaseMetricGenerator.generate(stage="roughing") 的路径：
+            # 显式切削阶段代表正常加工中，而不是任务级停机。
+            task_state = "process_running"
+            if spindle_state == "idle":
+                spindle_state = "cutting"
         spindle_speed, spindle_load, spindle_current = self._spindle_gen.generate(
             t=t,
             stage=stage,
-            spindle_state=state.spindle_state,
+            spindle_state=spindle_state,
             cutting_elapsed=state.cutting_elapsed,
             cutting_total=state.cutting_total,
-            task_state=getattr(state, "task_state", "idle"),
+            task_state=task_state,
         )
 
         # ── 6. vibration（三轴，各有小幅随机偏差）────────────────────────────
diff --git a/protoforge/protocols/mtconnect/lathe_simulator.py b/protoforge/protocols/mtconnect/lathe_simulator.py
index 117bcfb..0465327 100644
--- a/protoforge/protocols/mtconnect/lathe_simulator.py
+++ b/protoforge/protocols/mtconnect/lathe_simulator.py
@@ -108,12 +108,10 @@ class _State(Enum):
     "N0120 M30",
 ]
 
-# 每个零件的加工子阶段序列（按进度切分）
+# 每个零件的加工子阶段序列（本轮正常工况固定为 rough）
 # (阶段名, 开始进度, 结束进度)
 _CUT_SUBSTAGES = [
-    ("roughing",       0.00, 0.45),
-    ("semi_finishing", 0.45, 0.75),
-    ("finishing",      0.75, 1.00),
+    ("roughing", 0.00, 1.00),
 ]
 
 
@@ -245,20 +243,14 @@ def _transition(self, new_state: _State, duration: float) -> None:
     def _get_metric_stage(self) -> str:
         """将状态机状态映射到 MetricGenerator 阶段。"""
         if self._state == _State.CUTTING:
-            return self._cutting_stage
+            return "roughing"
         if self._state == _State.CHIP_WRAP:
             return "roughing"
         return _STATE_TO_STAGE.get(self._state, "idle")
 
     def _update_cutting_substage(self, progress: float) -> None:
-        """根据切削进度动态切换粗/半精/精加工子阶段。"""
-        for stage_name, start, end in _CUT_SUBSTAGES:
-            if start <= progress < end:
-                if self._cutting_stage != stage_name:
-                    self._cutting_stage = stage_name
-                    # 换阶段时不重置磨损，但可记录换刀（此处仅切换参数集）
-                return
-        self._cutting_stage = "finishing"
+        """本轮正常工况只模拟 rough，不在小周期内切换 semi/finish。"""
+        self._cutting_stage = "roughing"
 
     def _on_idle(self) -> None:
         self._spindle_target = 0.0
@@ -281,7 +273,7 @@ def _on_spinup(self) -> None:
             self._spindle_actual, self._spindle_target, 0.25
         )
         if self._state_elapsed >= self._state_duration:
-            self._transition(_State.AIR_CUT, random.uniform(3, 6))
+            self._transition(_State.AIR_CUT, random.uniform(6, 12))
 
     def _on_air_cut(self) -> None:
         """主轴运转，快速定位，不切削。主轴转速保持目标值。"""
@@ -295,7 +287,7 @@ def _on_air_cut(self) -> None:
         self._x_pos = self._smooth(self._x_pos, 50.0, 0.30)
         self._z_pos = self._smooth(self._z_pos, 2.0, 0.30)
         if self._state_elapsed >= self._state_duration:
-            self._transition(_State.CUTTING, random.uniform(35, 65))
+            self._transition(_State.CUTTING, random.uniform(45, 90))
 
     def _on_cutting(self) -> None:
         noise = random.gauss(0, self._spindle_target * 0.02)
@@ -329,7 +321,7 @@ def _on_cutting(self) -> None:
 
         if self._state_elapsed >= self._state_duration:
             # 周期结束：进入 DECEL_CYCLE（主轴保持转速，只停进给）
-            self._transition(_State.DECEL_CYCLE, random.uniform(2, 4))
+            self._transition(_State.DECEL_CYCLE, random.uniform(3, 6))
 
     def _on_decel_cycle(self) -> None:
         """
@@ -353,7 +345,7 @@ def _on_decel_cycle(self) -> None:
                 self._transition(_State.DECEL, random.uniform(3, 5))
             else:
                 # 继续下一个切削周期：回到 AIR_CUT
-                self._transition(_State.AIR_CUT, random.uniform(3, 6))
+                self._transition(_State.AIR_CUT, random.uniform(6, 12))
 
     def _on_decel(self) -> None:
         """任务级降速：主轴降到 0，准备换刀或停机。"""

From 75471894ab8ad95c2bb26c192f5ffab4f19825c5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E5=BC=A0=E5=B0=91=E6=99=BA?=
 <zhangshaozhi@bailongma-inc.com>
Date: Wed, 10 Jun 2026 06:46:09 +0800
Subject: [PATCH 49/55] fix

---
 protoforge/core/fault.py                      | 110 ++++++++++++++++++
 .../protocols/mtconnect/lathe_simulator.py    |  60 +++++++---
 2 files changed, 155 insertions(+), 15 deletions(-)

diff --git a/protoforge/core/fault.py b/protoforge/core/fault.py
index 7febb4b..6867ec9 100644
--- a/protoforge/core/fault.py
+++ b/protoforge/core/fault.py
@@ -502,6 +502,116 @@
                              multiplier=1.0, noise_scale=5.0),
         ],
     ),
+
+    # ==================================================================
+    # 车床 CNC Rough 粗车故障类型
+    # 基线：spindle_speed~2000RPM, spindle_load cutting~55%, spindle_current cutting~13A
+    # 仅影响 spindle_speed / spindle_load / spindle_current 三个测点
+    # ==================================================================
+
+    # ------------------------------------------------------------------
+    # 缠屑（车床粗车）— chip_entanglement_rough
+    # 物理含义：切屑缠绕刀具/工件，切削阻力逐步增大
+    # 特征：spindle_load/current 渐进爬升，spindle_speed 基本维持（严重时轻微下降）
+    # 模式：GRADUAL（渐进式），区别于崩刃的瞬间冲击
+    # ------------------------------------------------------------------
+    FaultTypeDefinition(
+        id="chip_entanglement_rough",
+        name="缠屑（车床粗车）",
+        description="车床粗车切屑缠绕刀具/工件，切削阻力逐步增大。spindle_load渐进从~55%爬升到70~90%，spindle_current从~13A升至16~20A，spindle_speed基本维持2000RPM（严重时轻微下降到1900RPM）。区别于缠屑：不瞬间冲击；区别于磨损：爬升更快且波动更大",
+        category="process",
+        default_duration=180.0,
+        tags=["缠屑", "渐进", "车床", "粗车"],
+        point_faults=[
+            PointFaultConfig(point="spindle_load", mode=FaultMode.GRADUAL,
+                             target_min=70.0, target_max=90.0, noise_scale=4.5),
+            PointFaultConfig(point="spindle_current", mode=FaultMode.GRADUAL,
+                             target_min=16.0, target_max=20.0, noise_scale=1.2),
+            # 转速只在严重时（progress > 0.6）才轻微下降，nominal_baseline 保持 2000
+            PointFaultConfig(point="spindle_speed", mode=FaultMode.GRADUAL,
+                             target_min=1880.0, target_max=1950.0, noise_scale=25.0,
+                             nominal_baseline=2000.0),
+        ],
+    ),
+
+    # ------------------------------------------------------------------
+    # 刀具磨损（车床粗车）— tool_wear_rough
+    # 物理含义：刀具逐步变钝，切削阻力慢性增加
+    # 特征：load/current 长时间缓慢趋势性上升，转速基本稳定
+    # 模式：GRADUAL，持续时间长（600s），不应瞬间恢复
+    # 使用 nominal_baseline 避免注入时恰好在空切段导致基线失真
+    # ------------------------------------------------------------------
+    FaultTypeDefinition(
+        id="tool_wear_rough",
+        name="刀具磨损（车床粗车）",
+        description="车床粗车刀具逐步变钝，切削阻力慢性增加。spindle_load从~55%缓慢抬升到60~75%，spindle_current从~13A抬升到13~16A，spindle_speed基本稳定在2000RPM。区别于缠屑：爬升极慢；区别于崩刃：无冲击峰值，不停主轴",
+        category="tool",
+        default_duration=600.0,
+        tags=["刀具", "磨损", "渐进", "车床", "粗车", "趋势漂移"],
+        point_faults=[
+            PointFaultConfig(point="spindle_load", mode=FaultMode.GRADUAL,
+                             target_min=60.0, target_max=75.0, noise_ratio=0.04,
+                             nominal_baseline=55.0),
+            PointFaultConfig(point="spindle_current", mode=FaultMode.GRADUAL,
+                             target_min=13.0, target_max=16.0, noise_ratio=0.04,
+                             nominal_baseline=13.0),
+            # 磨损对转速影响极小，仅在严重时轻微下降，nominal_baseline 保持 2000
+            PointFaultConfig(point="spindle_speed", mode=FaultMode.GRADUAL,
+                             target_min=1930.0, target_max=1990.0, noise_scale=20.0,
+                             nominal_baseline=2000.0),
+        ],
+    ),
+
+    # ------------------------------------------------------------------
+    # 断刀/崩刀 emergency_stop（车床粗车）— tool_break_emergency_stop_rough
+    # 物理含义：刀具突然断裂，CNC 触发紧急停机
+    # 特征：瞬间冲击后 load/current 归零，spindle_speed 急降到 0
+    # 模式：INSTANT，持续时间短（仅代表报警持续窗口），之后停机
+    # 断刀冲击只触发一次（注入时随机采样 resolved_target），不每 tick 重新冲击
+    # ------------------------------------------------------------------
+    FaultTypeDefinition(
+        id="tool_break_emergency_stop_rough",
+        name="断刀急停（车床粗车）",
+        description="车床粗车刀具突然断裂，CNC触发紧急停机。spindle_load瞬间冲高到85~100%，spindle_current冲高到18~25A，随后（下一tick）主轴急停到0。断刀冲击只触发一次，之后进入停机等待状态，不自动恢复正常切削",
+        category="tool",
+        default_duration=8.0,
+        tags=["断刀", "崩刀", "急停", "突发", "车床", "粗车"],
+        point_faults=[
+            # 瞬间冲高，noise_scale 小（冲击值已由 target_min/max 精确控制）
+            PointFaultConfig(point="spindle_load", mode=FaultMode.INSTANT,
+                             target_min=85.0, target_max=100.0, noise_scale=3.0),
+            PointFaultConfig(point="spindle_current", mode=FaultMode.INSTANT,
+                             target_min=18.0, target_max=25.0, noise_scale=1.5),
+            # 主轴急停到 0
+            PointFaultConfig(point="spindle_speed", mode=FaultMode.INSTANT,
+                             target_value=0.0, noise_scale=0.0),
+        ],
+    ),
+
+    # ------------------------------------------------------------------
+    # 断刀/崩刀 broken_cutting（车床粗车）— tool_break_broken_cutting_rough
+    # 物理含义：刀具断裂但主轴未停，在破损刀具状态下继续异常切削
+    # 特征：瞬间冲击后 load/current 降到低位（破损刀具切不动），转速维持
+    # 模式：INSTANT，持续时间短（8s 冲击窗口）+ 后续低负载异常阶段
+    # ------------------------------------------------------------------
+    FaultTypeDefinition(
+        id="tool_break_broken_cutting_rough",
+        name="断刀异常切削（车床粗车）",
+        description="车床粗车刀具断裂但主轴未停，破损刀具继续异常切削。spindle_load瞬间冲高到85~100%后降至5~15%，spindle_current冲高到18~25A后降至3~6A，spindle_speed维持1800~2200RPM不停机。区别于急停：主轴不归零",
+        category="tool",
+        default_duration=8.0,
+        tags=["断刀", "崩刀", "异常切削", "突发", "车床", "粗车"],
+        point_faults=[
+            # 冲击后维持低负载（破损刀具切不动）
+            PointFaultConfig(point="spindle_load", mode=FaultMode.INSTANT,
+                             target_min=5.0, target_max=15.0, noise_scale=2.0),
+            PointFaultConfig(point="spindle_current", mode=FaultMode.INSTANT,
+                             target_min=3.0, target_max=6.0, noise_scale=0.8),
+            # 转速维持，nominal_baseline 避免注入时基线失真
+            PointFaultConfig(point="spindle_speed", mode=FaultMode.INSTANT,
+                             multiplier=1.0, noise_scale=30.0, nominal_baseline=2000.0),
+        ],
+    ),
 ]
 
 # 按 id 索引
diff --git a/protoforge/protocols/mtconnect/lathe_simulator.py b/protoforge/protocols/mtconnect/lathe_simulator.py
index 0465327..59484cb 100644
--- a/protoforge/protocols/mtconnect/lathe_simulator.py
+++ b/protoforge/protocols/mtconnect/lathe_simulator.py
@@ -45,6 +45,7 @@
 from typing import Any
 
 from protoforge.core.cnc_metric_generator import BaseMetricGenerator
+from protoforge.core.fault import fault_injector
 
 
 class _State(Enum):
@@ -210,8 +211,32 @@ def __call__(self, device_instance: Any) -> None:
         vals = device_instance._point_values
         self._update_cnc_points(vals, frame)
 
-        # 6. 上报 Prometheus
-        self._emit_prometheus(device_instance, frame)
+        # 6. 复用铣床故障注入机制：在 baseline 写入后覆盖故障测点值
+        #    fault_injector.apply() 只覆盖 _point_values，不修改状态机
+        #    只有 process_running 切削阶段的故障才有意义；
+        #    但 apply() 本身会检查 fault.duration，状态机不需要感知
+        fault_injector.apply(device_instance)
+
+        # ── 断刀二阶段后处理（不修改 FaultInjector 框架，符合铣床风格）───────
+        _active_fault = fault_injector.get_fault(device_instance.id)
+        if _active_fault is not None:
+            _fault_id = _active_fault.fault_type_id
+            _elapsed  = _active_fault.elapsed
+
+            # 断刀急停：冲击窗口前 2s → 之后 load/current/speed 降到停机水平
+            if _fault_id == "tool_break_emergency_stop_rough" and _elapsed > 2.0:
+                vals["spindle_load"]    = round(random.uniform(0.0, 2.0), 1)
+                vals["spindle_current"] = round(random.uniform(0.0, 1.0), 2)
+                vals["spindle_speed"]   = 0.0
+
+            # 断刀异常切削：冲击窗口前 2s 输出冲击峰值，之后由 FaultInjector 维持低负载
+            elif _fault_id == "tool_break_broken_cutting_rough" and _elapsed <= 2.0:
+                vals["spindle_load"]    = round(random.uniform(85.0, 100.0) + random.gauss(0, 3.0), 1)
+                vals["spindle_current"] = round(random.uniform(18.0, 25.0) + random.gauss(0, 1.5), 2)
+                # 转速在冲击瞬间保持（FaultInjector 已设置 nominal_baseline=2000，此处不覆盖）
+
+        # 7. 上报 Prometheus（使用 fault-applied 后的 _point_values，而非注入前的 frame）
+        self._emit_prometheus(device_instance, vals)
 
     # ------------------------------------------------------------------
     # 状态机
@@ -460,6 +485,8 @@ def _update_cnc_points(self, vals: dict[str, Any], frame) -> None:
         vals["tool_temperature"]   = round(frame.tool_temperature, 2)
         vals["surface_roughness"]  = round(frame.surface_roughness, 3)
         vals["tool_wear_value"]    = round(frame.tool_wear_value, 4)
+        # 存入 stage 供 _emit_prometheus 使用（不作为 MTConnect 测点上报）
+        vals["_stage"]             = frame.stage
 
         # 故障覆盖：崩刀时 spindle_load 突增并覆盖 MetricFrame 的值
         if is_tool_break:
@@ -473,9 +500,10 @@ def _update_cnc_points(self, vals: dict[str, Any], frame) -> None:
             wrap_load = min(100.0, 30.0 + self._wrap_load_increment + random.gauss(0, 2))
             vals["spindle_load"] = round(wrap_load, 1)
 
-    def _emit_prometheus(self, device_instance: Any, frame) -> None:
+    def _emit_prometheus(self, device_instance: Any, vals: dict) -> None:
         """
         通过 MetricsCollector 上报 Prometheus 指标。
+        使用 fault-applied 后的 device._point_values，确保故障覆盖值能正确上报。
         复用项目已有的 set_gauge 接口，不重复注册。
         """
         try:
@@ -485,24 +513,26 @@ def _emit_prometheus(self, device_instance: Any, frame) -> None:
 
         device_id = getattr(device_instance.config, "id", "unknown")
         device_name = getattr(device_instance.config, "name", "unknown")
+        # stage 仍从 frame 获取（故障不改变 stage 标签）
+        stage = vals.get("_stage", "roughing")
         labels = {
             "device_id":   device_id,
             "device_name": device_name,
             "protocol":    "mtconnect",
-            "stage":       frame.stage,
+            "stage":       stage,
         }
 
-        metrics.set_gauge("cnc_feed_rate",          frame.feed_rate,          {**labels, "unit": "mm/min"})
-        metrics.set_gauge("cnc_spindle_speed",       frame.spindle_speed,      {**labels, "unit": "RPM"})
-        metrics.set_gauge("cnc_spindle_current",     frame.spindle_current,    {**labels, "unit": "A"})
-        metrics.set_gauge("cnc_spindle_load",        frame.spindle_load,       {**labels, "unit": "%"})
-        metrics.set_gauge("cnc_vibration_x",         frame.vibration_x,        {**labels, "unit": "mm/s"})
-        metrics.set_gauge("cnc_vibration_y",         frame.vibration_y,        {**labels, "unit": "mm/s"})
-        metrics.set_gauge("cnc_vibration_z",         frame.vibration_z,        {**labels, "unit": "mm/s"})
-        metrics.set_gauge("cnc_acoustic_emission",   frame.acoustic_emission,  {**labels, "unit": "V"})
-        metrics.set_gauge("cnc_tool_temperature",    frame.tool_temperature,   {**labels, "unit": "C"})
-        metrics.set_gauge("cnc_surface_roughness",   frame.surface_roughness,  {**labels, "unit": "um"})
-        metrics.set_gauge("cnc_tool_wear_value",     frame.tool_wear_value,    {**labels, "unit": "um"})
+        metrics.set_gauge("cnc_feed_rate",          vals.get("feed_rate", 0.0),         {**labels, "unit": "mm/min"})
+        metrics.set_gauge("cnc_spindle_speed",       vals.get("spindle_speed", 0.0),     {**labels, "unit": "RPM"})
+        metrics.set_gauge("cnc_spindle_current",     vals.get("spindle_current", 0.0),   {**labels, "unit": "A"})
+        metrics.set_gauge("cnc_spindle_load",        vals.get("spindle_load", 0.0),      {**labels, "unit": "%"})
+        metrics.set_gauge("cnc_vibration_x",         vals.get("vibration_x", 0.0),       {**labels, "unit": "mm/s"})
+        metrics.set_gauge("cnc_vibration_y",         vals.get("vibration_y", 0.0),       {**labels, "unit": "mm/s"})
+        metrics.set_gauge("cnc_vibration_z",         vals.get("vibration_z", 0.0),       {**labels, "unit": "mm/s"})
+        metrics.set_gauge("cnc_acoustic_emission",   vals.get("acoustic_emission", 0.0), {**labels, "unit": "V"})
+        metrics.set_gauge("cnc_tool_temperature",    vals.get("tool_temperature", 0.0),  {**labels, "unit": "C"})
+        metrics.set_gauge("cnc_surface_roughness",   vals.get("surface_roughness", 0.0), {**labels, "unit": "um"})
+        metrics.set_gauge("cnc_tool_wear_value",     vals.get("tool_wear_value", 0.0),   {**labels, "unit": "um"})
 
     # ------------------------------------------------------------------
 

From dbc74a5775290c427ba12c5070bd4a3b349e056e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E5=BC=A0=E5=B0=91=E6=99=BA?=
 <zhangshaozhi@bailongma-inc.com>
Date: Wed, 10 Jun 2026 08:48:09 +0800
Subject: [PATCH 50/55] fix

---
 protoforge/core/cnc_metric_generator.py       | 204 ++++++++++++------
 .../protocols/mtconnect/lathe_simulator.py    | 161 ++++++++++----
 2 files changed, 252 insertions(+), 113 deletions(-)

diff --git a/protoforge/core/cnc_metric_generator.py b/protoforge/core/cnc_metric_generator.py
index 457d654..d75482d 100644
--- a/protoforge/core/cnc_metric_generator.py
+++ b/protoforge/core/cnc_metric_generator.py
@@ -125,6 +125,10 @@ class GeneratorState:
     # 加工周期状态：air_cut / entry_cut / cutting / exit_cut
     # cycle_state 只描述负载形态，不控制主轴启停或转速档位
     cycle_state: str = "air_cut"
+    # 当前工艺阶段：rough / semi_finish / finish
+    # 由 LatheSimulator 在每次 tick 前设置，支持 single_process 和 process_flow 两种模式
+    # None 表示从 stage 自动推导（保持向后兼容）
+    current_process: Optional[str] = None
 
 
 # ---------------------------------------------------------------------------
@@ -384,12 +388,14 @@ class CncSpindleGenerator:
     统一驱动 spindle_speed / spindle_load / spindle_current 的联动生成器。
 
     生成链路：
-        工艺阶段(process) → 目标转速 → 实际转速(EMA) → 负载(SpindleLoadGenerator)
+        工艺阶段(process) → 目标转速 → 实际转速(EMA) → 负载(process 参数化)
         → 电流(负载+转速映射)
 
     stage 参数取值：idle / tool_change / roughing / semi_finishing / finishing
     spindle_state 参数取值：idle / tool_change / spinup / cutting / decel
       （由 LatheSimulator 状态机传入，用于控制转速 EMA alpha）
+    process 参数取值：rough / semi_finish / finish
+      （由外部传入，覆盖 stage 推导，用于 process_flow 模式）
     """
 
     def __init__(self, rng: random.Random, load_gen: SpindleLoadGenerator):
@@ -399,7 +405,6 @@ def __init__(self, rng: random.Random, load_gen: SpindleLoadGenerator):
         self.prev_speed: float = 0.0
         self.prev_load: float = 0.0
         self.prev_current: float = 0.0
-        self.process: str = "rough"
 
         self.current_cycle_id: Optional[str] = None
         self.cycle_factor: float = 1.0
@@ -410,8 +415,8 @@ def __init__(self, rng: random.Random, load_gen: SpindleLoadGenerator):
         self.exit_ramp_seconds: float = 4.5
         self._cycle_cutting_load: float = 55.0
 
-    def start_new_cycle(self, cycle_id: str) -> None:
-        """每个 rough 切削周期只刷新一次周期级扰动参数。"""
+    def start_new_cycle(self, cycle_id: str, process: str = "rough") -> None:
+        """每个切削周期只刷新一次周期级扰动参数。"""
         if cycle_id == self.current_cycle_id:
             return
         self.current_cycle_id = cycle_id
@@ -419,8 +424,12 @@ def start_new_cycle(self, cycle_id: str) -> None:
         self.phase1 = self._rng.uniform(0, 2 * math.pi)
         self.phase2 = self._rng.uniform(0, 2 * math.pi)
         self.material_phase = self._rng.uniform(0, 2 * math.pi)
-        self.entry_ramp_seconds = self._rng.uniform(4.0, 8.0)
-        self.exit_ramp_seconds = self._rng.uniform(3.0, 6.0)
+        pcfg = _PROCESS_CFG.get(process, _PROCESS_CFG["rough"])
+        # entry/exit ramp 从工艺配置中随机采样
+        entry_range = pcfg.get("entry_ramp_s", 6.0)
+        exit_range = pcfg.get("exit_ramp_s", 4.5)
+        self.entry_ramp_seconds = self._rng.uniform(entry_range * 0.6, entry_range * 1.4)
+        self.exit_ramp_seconds = self._rng.uniform(exit_range * 0.6, exit_range * 1.4)
 
     def generate(
         self,
@@ -430,27 +439,31 @@ def generate(
         cutting_elapsed: float = 0.0,
         cutting_total: float = 30.0,
         task_state: str = "idle",
+        process: Optional[str] = None,
     ) -> tuple[float, float, float]:
         """
         生成 (spindle_speed, spindle_load, spindle_current)。
 
         Args:
             t:               当前时间（秒）。
-            stage:           MetricGenerator 加工阶段。
+            stage:           MetricGenerator 加工阶段（roughing/semi_finishing/finishing/idle/tool_change）。
             spindle_state:   LatheSimulator 内部状态（idle/spinup/cutting/decel/tool_change）。
-            cutting_elapsed: 切削阶段已过秒数（传给负载生成器）。
-            cutting_total:   切削阶段总时长（传给负载生成器）。
+            cutting_elapsed: 切削阶段已过秒数。
+            cutting_total:   切削阶段总时长。
             task_state:      任务级状态（process_running/idle）。
-                             process_running 时主轴保持目标转速，即使 stage=idle（air_cut 间隙）。
+            process:         工艺阶段（rough/semi_finish/finish）。
+                             None 时从 stage 自动推导；可由 LatheSimulator 显式传入以支持 process_flow。
         """
-        # 本轮优化固定为 rough 正常工况；stage 仍原样写入 MetricFrame/标签。
-        process = "rough"
+        # 优先使用外部传入的 process；若为 None 则从 stage 推导
+        if process is None:
+            process = _STAGE_TO_PROCESS.get(stage, "rough")
+
         cycle_state = self._resolve_cycle_state(stage, task_state, cutting_elapsed, cutting_total)
-        cycle_id = self._resolve_cycle_id(t, stage, task_state, cutting_elapsed, cutting_total)
-        self.start_new_cycle(cycle_id)
+        cycle_id = self._resolve_cycle_id(t, stage, task_state, cutting_elapsed, cutting_total, process)
+        self.start_new_cycle(cycle_id, process)
 
-        # ── 1. 主轴转速 ────────────────────────────────────────────────────
-        speed = self._calc_speed(stage, spindle_state, process, task_state)
+        # ── 1. 主轴转速（由 process 和 task_state 决定，不由 cycle_state 决定）──
+        speed = self._calc_speed(spindle_state, process, task_state)
 
         # 保持旧负载生成器的周期状态同步，避免其他调用路径依赖其内部状态。
         self._load_gen.generate(
@@ -459,11 +472,11 @@ def generate(
             cutting_elapsed=cutting_elapsed,
             cutting_total=cutting_total,
         )
-        # ── 2. 主轴负载（rough 正常工况，cycle_state 只影响负载形态）───────
-        load = self._calc_rough_load(t, speed, task_state, cycle_state, cutting_elapsed, cutting_total)
+        # ── 2. 主轴负载（由 process 参数化，cycle_state 控制 ramp 形态）──────
+        load = self._calc_load(t, speed, process, task_state, cycle_state, cutting_elapsed, cutting_total)
 
-        # ── 3. 主轴电流（由转速 + 负载推导）───────────────────────────────
-        current = self._calc_current(stage, spindle_state, process, speed, load, task_state, cycle_state)
+        # ── 3. 主轴电流（由转速 + 负载 + process 推导）─────────────────────
+        current = self._calc_current(process, speed, load, task_state, cycle_state)
 
         return speed, load, current
 
@@ -471,28 +484,44 @@ def generate(
 
     def _calc_speed(
         self,
-        stage: str,
         spindle_state: str,
-        process: Optional[str],
+        process: str,
         task_state: str = "idle",
     ) -> float:
         """
-        转速只由任务级状态控制启停；rough 周期状态不切换转速档位。
+        转速只由 task_state 和 process 决定。
+        - idle/tool_change → 目标 0 RPM（降速）
+        - process_running → 目标转速由当前 process 决定（rough=2000, semi=3000, finish=4000）
+        - cycle_state 不参与转速决策，air_cut 期间转速保持目标值
         """
+        scfg = _PROCESS_SPEED_CFG.get(process, _PROCESS_SPEED_CFG["rough"])
+
         if task_state in ("idle", "spindle_off", "tool_change"):
             target = 0.0
             alpha = self._rng.uniform(0.12, 0.25)
         else:
-            target = 2000.0
-            if spindle_state == "spinup" or self.prev_speed < 1750.0:
+            target = scfg["target"]
+            # 升速阶段（转速还在目标 90% 以下）用较大 alpha，稳态用小 alpha
+            threshold = scfg["target"] * 0.90
+            if spindle_state == "spinup" or self.prev_speed < threshold:
                 alpha = self._rng.uniform(0.10, 0.18)
             else:
                 alpha = self._rng.uniform(0.03, 0.08)
 
         new_speed = _ema(self.prev_speed, target, alpha)
-        if task_state not in ("idle", "spindle_off", "tool_change") and new_speed > 1750.0:
-            new_speed += self._rng.uniform(-30.0, 30.0)
-        new_speed = _clamp(new_speed, 0.0, 2200.0)
+        # 稳态时叠加小幅噪声（转速高于目标 85% 时才加）
+        if task_state not in ("idle", "spindle_off", "tool_change"):
+            threshold_noise = scfg["target"] * 0.85
+            if new_speed > threshold_noise:
+                noise = scfg["noise"]
+                new_speed += self._rng.uniform(-noise, noise)
+
+        # clamp：运行中保持在 process 允许的转速区间
+        if task_state not in ("idle", "spindle_off", "tool_change"):
+            new_speed = _clamp(new_speed, scfg["lo"], scfg["hi"])
+        else:
+            new_speed = _clamp(new_speed, 0.0, scfg["hi"])
+
         self.prev_speed = new_speed
         return new_speed
 
@@ -524,68 +553,94 @@ def _resolve_cycle_id(
         task_state: str,
         cutting_elapsed: float,
         cutting_total: float,
+        process: str = "rough",
     ) -> str:
         if task_state != "process_running":
             return "stopped"
         if stage not in _STAGE_TO_PROCESS:
             return self.current_cycle_id or "air_cut"
         cycle_start = t - cutting_elapsed
-        return f"rough:{cycle_start:.0f}:{cutting_total:.0f}"
-
-    def _air_cut_load_target(self, t: float) -> float:
-        target = 7.0 + math.sin(t * 0.20) * 1.5 + self._rng.uniform(-0.8, 0.8)
-        return _clamp(target, 5.0, 12.0)
-
-    def _rough_cutting_load_target(self, t: float) -> float:
-        effective_base = 55.0 * self.cycle_factor
-        slow_wave = math.sin(t * 0.10 + self.phase1) * 5.0
-        cutting_wave = math.sin(t * 0.75 + self.phase2) * 2.5
-        material_drift = math.sin(t * 0.03 + self.material_phase) * 4.0
-        small_noise = self._rng.uniform(-2.0, 2.0)
-        target = effective_base + slow_wave + cutting_wave + material_drift + small_noise
-        return _clamp(target, 35.0, 82.0)
-
-    def _calc_rough_load(
+        return f"{process}:{cycle_start:.0f}:{cutting_total:.0f}"
+
+    def _air_cut_load_target(self, t: float, process: str) -> float:
+        pcfg = _PROCESS_CFG.get(process, _PROCESS_CFG["rough"])
+        air_base = pcfg["air_cut_base"]
+        target = air_base + math.sin(t * 0.20) * (pcfg["noise_range"] * 0.4) + self._rng.uniform(-0.8, 0.8)
+        # air_cut 负载 clamp 到各工艺的 air 区间（rough:5~12, semi:4~10, finish:3~8）
+        air_lo = {
+            "rough":       5.0,
+            "semi_finish": 4.0,
+            "finish":      3.0,
+        }.get(process, 5.0)
+        air_hi = {
+            "rough":       12.0,
+            "semi_finish": 10.0,
+            "finish":       8.0,
+        }.get(process, 12.0)
+        return _clamp(target, air_lo, air_hi)
+
+    def _cutting_load_target(self, t: float, process: str) -> float:
+        pcfg = _PROCESS_CFG.get(process, _PROCESS_CFG["rough"])
+        effective_base = pcfg["base_load"] * self.cycle_factor
+        slow_wave = math.sin(t * pcfg["slow_freq"] + self.phase1) * pcfg["slow_amp"]
+        cut_wave = math.sin(t * pcfg["cut_freq"] + self.phase2) * pcfg["cut_amp"]
+        material_drift = math.sin(t * pcfg["material_freq"] + self.material_phase) * pcfg["material_amp"]
+        small_noise = self._rng.uniform(-pcfg["noise_range"], pcfg["noise_range"])
+        target = effective_base + slow_wave + cut_wave + material_drift + small_noise
+        return _clamp(target, pcfg["clamp_min"], pcfg["clamp_max"])
+
+    def _calc_load(
         self,
         t: float,
         speed: float,
+        process: str,
         task_state: str,
         cycle_state: str,
         cutting_elapsed: float,
         cutting_total: float,
     ) -> float:
+        """主轴负载由 process 参数化，cycle_state 控制 entry/exit ramp 形态。"""
+        pcfg = _PROCESS_CFG.get(process, _PROCESS_CFG["rough"])
+
         if speed <= 50.0:
             target = self._rng.uniform(0.0, 2.0)
             alpha = self._rng.uniform(0.30, 0.45)
             lo, hi = 0.0, 2.0
         elif task_state == "process_running":
-            air_load = self._air_cut_load_target(t)
-            cutting_target = self._rough_cutting_load_target(t)
+            air_load = self._air_cut_load_target(t, process)
+            cutting_target = self._cutting_load_target(t, process)
             self._cycle_cutting_load = cutting_target
 
+            air_lo = {
+                "rough": 5.0, "semi_finish": 4.0, "finish": 3.0,
+            }.get(process, 5.0)
+
             if cycle_state == "air_cut":
                 target = air_load
                 alpha = self._rng.uniform(0.18, 0.25)
-                lo, hi = 5.0, 12.0
+                lo, hi = air_lo, pcfg["air_cut_base"] + pcfg["noise_range"] * 2
             elif cycle_state == "entry_cut":
                 ratio = _clamp(cutting_elapsed / max(self.entry_ramp_seconds, 0.1), 0.0, 1.0)
+                # smoothstep 使 ramp 更自然
+                ratio = ratio * ratio * (3 - 2 * ratio)
                 target = air_load + (cutting_target - air_load) * ratio
                 alpha = self._rng.uniform(0.08, 0.14)
-                lo, hi = 5.0, 82.0
+                lo, hi = air_lo, pcfg["clamp_max"]
             elif cycle_state == "cutting":
                 target = cutting_target
-                alpha = self._rng.uniform(0.08, 0.15)
-                lo, hi = 35.0, 82.0
+                alpha = pcfg["ema_alpha"]
+                lo, hi = pcfg["clamp_min"], pcfg["clamp_max"]
             elif cycle_state == "exit_cut":
                 exit_elapsed = max(0.0, self.exit_ramp_seconds - (cutting_total - cutting_elapsed))
                 ratio = _clamp(exit_elapsed / max(self.exit_ramp_seconds, 0.1), 0.0, 1.0)
+                ratio = ratio * ratio * (3 - 2 * ratio)
                 target = self._cycle_cutting_load * (1.0 - ratio) + air_load * ratio
                 alpha = self._rng.uniform(0.10, 0.18)
-                lo, hi = 5.0, 82.0
+                lo, hi = air_lo, pcfg["clamp_max"]
             else:
                 target = air_load
                 alpha = self._rng.uniform(0.18, 0.25)
-                lo, hi = 5.0, 12.0
+                lo, hi = air_lo, pcfg["air_cut_base"] + pcfg["noise_range"] * 2
         else:
             target = self._rng.uniform(0.0, 2.0)
             alpha = self._rng.uniform(0.25, 0.40)
@@ -593,7 +648,9 @@ def _calc_rough_load(
 
         new_load = _ema(self.prev_load, target, alpha)
         if speed > 50.0 and task_state == "process_running":
-            min_load = 5.0 if cycle_state in ("air_cut", "entry_cut", "exit_cut") else 35.0
+            air_lo = {"rough": 5.0, "semi_finish": 4.0, "finish": 3.0}.get(process, 5.0)
+            cut_lo = pcfg["clamp_min"]
+            min_load = air_lo if cycle_state in ("air_cut", "entry_cut", "exit_cut") else cut_lo
             new_load = _clamp(new_load, min_load, hi)
         else:
             new_load = _clamp(new_load, lo, hi)
@@ -602,39 +659,49 @@ def _calc_rough_load(
 
     def _calc_current(
         self,
-        stage: str,
-        spindle_state: str,
-        process: Optional[str],
+        process: str,
         speed: float,
         load: float,
         task_state: str = "idle",
         cycle_state: str = "air_cut",
     ) -> float:
-        """电流由主轴转速和负载推导，避免独立随机曲线。"""
+        """电流由 process、主轴转速和负载推导，避免独立随机曲线。"""
+        ccfg = _PROCESS_CURRENT_CFG.get(process, _PROCESS_CURRENT_CFG["rough"])
+        base = ccfg["base"]
+        load_factor = ccfg["load_factor"]
+        noise_amp = ccfg["noise"]
+
         if speed <= 50.0:
             target = self._rng.uniform(0.0, 0.8)
             alpha = self._rng.uniform(0.25, 0.40)
             lo, hi = 0.0, 0.8
         elif cycle_state == "air_cut":
-            target = 3.5 + load * 0.12 + self._rng.uniform(-0.4, 0.4)
+            target = base + load * load_factor * 0.65 + self._rng.uniform(-noise_amp * 0.8, noise_amp * 0.8)
             alpha = self._rng.uniform(0.15, 0.25)
-            lo, hi = 2.5, 6.0
+            # air_cut 电流 clamp 到各工艺 air 区间
+            air_lo_map = {"rough": 2.5, "semi_finish": 2.0, "finish": 1.5}
+            air_hi_map = {"rough": 6.0, "semi_finish": 5.0, "finish": 4.0}
+            lo = air_lo_map.get(process, 2.5)
+            hi = air_hi_map.get(process, 6.0)
         elif cycle_state == "entry_cut":
-            target = 3.0 + load * 0.17 + self._rng.uniform(-0.5, 0.5)
+            target = base + load * load_factor + self._rng.uniform(-noise_amp, noise_amp)
             alpha = self._rng.uniform(0.08, 0.16)
-            lo, hi = 2.5, 17.0
+            lo, hi = ccfg["lo"] * 0.5, ccfg["hi"]
         elif cycle_state == "cutting":
-            target = 3.0 + load * 0.18 + self._rng.uniform(-0.6, 0.6)
+            target = base + load * load_factor + self._rng.uniform(-noise_amp, noise_amp)
             alpha = self._rng.uniform(0.08, 0.15)
-            lo, hi = 10.0, 17.0
+            lo, hi = ccfg["lo"], ccfg["hi"]
         elif cycle_state == "exit_cut":
-            target = 3.0 + load * 0.16 + self._rng.uniform(-0.5, 0.5)
+            target = base + load * load_factor + self._rng.uniform(-noise_amp * 0.9, noise_amp * 0.9)
             alpha = self._rng.uniform(0.10, 0.20)
-            lo, hi = 2.5, 17.0
+            lo, hi = ccfg["lo"] * 0.5, ccfg["hi"]
         else:
-            target = 3.0 + load * 0.12 + self._rng.uniform(-0.4, 0.4)
+            target = base + load * load_factor * 0.65 + self._rng.uniform(-noise_amp * 0.8, noise_amp * 0.8)
             alpha = self._rng.uniform(0.15, 0.25)
-            lo, hi = 2.5, 6.0
+            air_lo_map = {"rough": 2.5, "semi_finish": 2.0, "finish": 1.5}
+            air_hi_map = {"rough": 6.0, "semi_finish": 5.0, "finish": 4.0}
+            lo = air_lo_map.get(process, 2.5)
+            hi = air_hi_map.get(process, 6.0)
 
         new_current = _ema(self.prev_current, target, alpha)
         new_current = _clamp(new_current, lo, hi)
@@ -812,6 +879,8 @@ def generate(self, t: float, dt: float, stage: str) -> MetricFrame:
             task_state = "process_running"
             if spindle_state == "idle":
                 spindle_state = "cutting"
+        # process 优先由外部（LatheSimulator）通过 state 传入；若未设置则从 stage 推导
+        current_process = getattr(state, "current_process", None) or _STAGE_TO_PROCESS.get(stage, "rough")
         spindle_speed, spindle_load, spindle_current = self._spindle_gen.generate(
             t=t,
             stage=stage,
@@ -819,6 +888,7 @@ def generate(self, t: float, dt: float, stage: str) -> MetricFrame:
             cutting_elapsed=state.cutting_elapsed,
             cutting_total=state.cutting_total,
             task_state=task_state,
+            process=current_process,
         )
 
         # ── 6. vibration（三轴，各有小幅随机偏差）────────────────────────────
diff --git a/protoforge/protocols/mtconnect/lathe_simulator.py b/protoforge/protocols/mtconnect/lathe_simulator.py
index 59484cb..1451b67 100644
--- a/protoforge/protocols/mtconnect/lathe_simulator.py
+++ b/protoforge/protocols/mtconnect/lathe_simulator.py
@@ -17,12 +17,18 @@
   - 只有 IDLE / TOOL_CHANGE / 故障恢复 时 task_state="idle"，主轴才降到 0
   - 每完成 cycles_per_task 个切削周期后才真正回到 IDLE（换刀或停机）
 
+process_mode 支持：
+  - "single_process"：固定一种工艺（rough / semi_finish / finish），适合单独观察
+  - "process_flow"：模拟完整流程 rough → semi_finish → finish，各阶段持续时间可配置
+
 每个 tick 的处理流程：
   1. 状态机推进（确定当前 stage）
-  2. BaseMetricGenerator.generate() 生成健康 MetricFrame
+  2. 工艺阶段更新（process_flow 模式下检查是否需要切换 process）
+  3. BaseMetricGenerator.generate() 生成健康 MetricFrame
      （联动建模 + 噪声 + clamp，正常加工算法与故障逻辑解耦）
-  3. 把 MetricFrame 写入 device._point_values
-  4. 通过 MetricsCollector 上报 Prometheus
+  4. 把 MetricFrame 写入 device._point_values
+  5. 故障注入（复用铣床 FaultInjector 机制）
+  6. 通过 MetricsCollector 上报 Prometheus
 
 崩刀（TOOL_BREAK）的 CNC 可观测特征：
   - spindle_load 突增（驱动器过载保护触发）
@@ -60,19 +66,6 @@ class _State(Enum):
     CHIP_WRAP = "chip_wrap"
 
 
-# 状态机阶段 → MetricGenerator 加工阶段的映射
-_STATE_TO_STAGE: dict[_State, str] = {
-    _State.IDLE:        "idle",
-    _State.SPINUP:      "idle",
-    _State.AIR_CUT:     "idle",        # air_cut 阶段负载模型用 idle，但主轴不停
-    _State.CUTTING:     "roughing",    # 默认粗加工，子阶段由 _cutting_stage 动态切换
-    _State.DECEL_CYCLE: "idle",        # 周期间减速，主轴不停
-    _State.DECEL:       "idle",        # 任务级降速
-    _State.TOOL_CHANGE: "tool_change",
-    _State.TOOL_BREAK:  "idle",
-    _State.CHIP_WRAP:   "roughing",
-}
-
 # task_state 映射：process_running = 主轴保持，idle = 主轴可以停
 _STATE_TO_TASK: dict[_State, str] = {
     _State.IDLE:        "idle",
@@ -86,6 +79,21 @@ class _State(Enum):
     _State.CHIP_WRAP:   "process_running",
 }
 
+# 工艺阶段 → MetricGenerator stage 名称的映射
+_PROCESS_TO_STAGE: dict[str, str] = {
+    "rough":       "roughing",
+    "semi_finish": "semi_finishing",
+    "finish":      "finishing",
+}
+
+# process_flow 模式：各工艺阶段的持续时间区间（秒）
+_PROCESS_FLOW_DURATION: dict[str, tuple[float, float]] = {
+    "rough":       (120.0, 300.0),   # 2~5 分钟
+    "semi_finish": (60.0,  180.0),   # 1~3 分钟
+    "finish":      (60.0,  180.0),   # 1~3 分钟
+}
+_PROCESS_FLOW_ORDER = ["rough", "semi_finish", "finish"]
+
 # 刀塔配置（刀位号, 刀具ID）
 _TOOL_TABLE = [
     (1, "T01"),   # 外圆粗车刀
@@ -109,17 +117,31 @@ class _State(Enum):
     "N0120 M30",
 ]
 
-# 每个零件的加工子阶段序列（本轮正常工况固定为 rough）
-# (阶段名, 开始进度, 结束进度)
-_CUT_SUBSTAGES = [
-    ("roughing", 0.00, 1.00),
-]
-
 
 class LatheSimulator:
-    """注册为 DeviceInstance 的 post_tick_hook，每次 tick 更新所有测点。"""
-
-    def __init__(self):
+    """
+    注册为 DeviceInstance 的 post_tick_hook，每次 tick 更新所有测点。
+
+    Args:
+        process_mode: "single_process"（默认，固定工艺）或 "process_flow"（完整流程）
+        process:      single_process 模式下使用的工艺（"rough"/"semi_finish"/"finish"）
+    """
+
+    def __init__(
+        self,
+        process_mode: str = "single_process",
+        process: str = "rough",
+    ):
+        # ── 工艺模式配置 ────────────────────────────────────────────────────
+        self._process_mode = process_mode   # "single_process" | "process_flow"
+        self._process = process             # 当前工艺：rough / semi_finish / finish
+
+        # process_flow 模式下的阶段跟踪
+        self._flow_idx = 0                  # 当前在 _PROCESS_FLOW_ORDER 中的索引
+        self._flow_elapsed = 0.0            # 当前 process 已运行秒数
+        self._flow_duration = self._sample_flow_duration(process)  # 当前 process 目标持续时长
+
+        # ── 状态机 ──────────────────────────────────────────────────────────
         self._state = _State.IDLE
         self._state_elapsed = 0.0
         self._state_duration = 0.0
@@ -151,7 +173,7 @@ def __init__(self):
         self._wrap_load_increment = 0.0
         self._fault_cooldown = 0
 
-        # 当前切削子阶段（roughing/semi_finishing/finishing）
+        # 当前切削子阶段（由 process 决定）
         self._cutting_stage = "roughing"
 
         # 当前任务内已完成的切削周期数（达到上限后才真正停机）
@@ -169,9 +191,48 @@ def __init__(self):
         )
 
     # ------------------------------------------------------------------
-    # post_tick_hook 入口
+    # 工艺阶段管理
     # ------------------------------------------------------------------
 
+    @staticmethod
+    def _sample_flow_duration(process: str) -> float:
+        lo, hi = _PROCESS_FLOW_DURATION.get(process, (120.0, 300.0))
+        return random.uniform(lo, hi)
+
+    def _update_process(self) -> None:
+        """
+        process_flow 模式：每 tick 累加流逝时间，到期后切换到下一工艺。
+        single_process 模式：不做任何操作。
+        """
+        if self._process_mode != "process_flow":
+            return
+
+        task_state = _STATE_TO_TASK.get(self._state, "idle")
+        if task_state == "process_running":
+            self._flow_elapsed += 1.0
+
+        if self._flow_elapsed >= self._flow_duration:
+            next_idx = (self._flow_idx + 1) % len(_PROCESS_FLOW_ORDER)
+            next_process = _PROCESS_FLOW_ORDER[next_idx]
+            self._flow_idx = next_idx
+            self._process = next_process
+            self._flow_elapsed = 0.0
+            self._flow_duration = self._sample_flow_duration(next_process)
+            # 更新状态机内的切削阶段标识（用于 NC 程序行号等信号）
+            self._cutting_stage = _PROCESS_TO_STAGE.get(next_process, "roughing")
+
+    def _get_metric_stage(self) -> str:
+        """
+        将状态机状态映射到 MetricGenerator 加工阶段名。
+        CUTTING / CHIP_WRAP 时使用当前 process 对应的 stage；
+        其余状态使用 idle / tool_change。
+        """
+        if self._state in (_State.CUTTING, _State.CHIP_WRAP):
+            return _PROCESS_TO_STAGE.get(self._process, "roughing")
+        if self._state == _State.TOOL_CHANGE:
+            return "tool_change"
+        return "idle"
+
     def __call__(self, device_instance: Any) -> None:
         self._tick_count += 1
         t = float(self._tick_count)   # 用 tick 序号作为时间 t（dt=1s）
@@ -179,10 +240,13 @@ def __call__(self, device_instance: Any) -> None:
         # 1. 状态机推进
         self._step_state_machine()
 
-        # 2. 确定当前 MetricGenerator 阶段
+        # 2. 工艺阶段更新（process_flow 模式下检查是否需要切换 process）
+        self._update_process()
+
+        # 3. 确定当前 MetricGenerator 阶段（由当前 process 决定）
         stage = self._get_metric_stage()
 
-        # 3. 把状态机信息同步给 MetricGenerator
+        # 4. 把状态机信息同步给 MetricGenerator
         if self._state == _State.CUTTING:
             self._metric_gen.state.cutting_total = self._state_duration
 
@@ -204,17 +268,18 @@ def __call__(self, device_instance: Any) -> None:
         task_state = _STATE_TO_TASK.get(self._state, "idle")
         self._metric_gen.state.task_state = task_state
 
-        # 4. 生成正常加工 MetricFrame（含联动 + 噪声 + clamp）
+        # 当前工艺阶段（传给 CncSpindleGenerator，控制转速目标和负载/电流基线）
+        self._metric_gen.state.current_process = self._process
+
+        # 5. 生成正常加工 MetricFrame（含联动 + 噪声 + clamp）
         frame = self._metric_gen.generate(t=t, dt=1.0, stage=stage)
 
-        # 5. 把 MetricFrame 写入 device._point_values（MTConnect 标准测点）
+        # 6. 把 MetricFrame 写入 device._point_values（MTConnect 标准测点）
         vals = device_instance._point_values
         self._update_cnc_points(vals, frame)
 
-        # 6. 复用铣床故障注入机制：在 baseline 写入后覆盖故障测点值
+        # 7. 复用铣床故障注入机制：在 baseline 写入后覆盖故障测点值
         #    fault_injector.apply() 只覆盖 _point_values，不修改状态机
-        #    只有 process_running 切削阶段的故障才有意义；
-        #    但 apply() 本身会检查 fault.duration，状态机不需要感知
         fault_injector.apply(device_instance)
 
         # ── 断刀二阶段后处理（不修改 FaultInjector 框架，符合铣床风格）───────
@@ -235,7 +300,7 @@ def __call__(self, device_instance: Any) -> None:
                 vals["spindle_current"] = round(random.uniform(18.0, 25.0) + random.gauss(0, 1.5), 2)
                 # 转速在冲击瞬间保持（FaultInjector 已设置 nominal_baseline=2000，此处不覆盖）
 
-        # 7. 上报 Prometheus（使用 fault-applied 后的 _point_values，而非注入前的 frame）
+        # 8. 上报 Prometheus（使用 fault-applied 后的 _point_values，而非注入前的 frame）
         self._emit_prometheus(device_instance, vals)
 
     # ------------------------------------------------------------------
@@ -266,16 +331,19 @@ def _transition(self, new_state: _State, duration: float) -> None:
         self._state_duration = duration
 
     def _get_metric_stage(self) -> str:
-        """将状态机状态映射到 MetricGenerator 阶段。"""
-        if self._state == _State.CUTTING:
-            return "roughing"
-        if self._state == _State.CHIP_WRAP:
-            return "roughing"
-        return _STATE_TO_STAGE.get(self._state, "idle")
+        """
+        将状态机状态映射到 MetricGenerator 加工阶段名（已移至工艺管理区）。
+        此方法保留作为重载点，实现在类上方的工艺管理方法中。
+        """
+        if self._state in (_State.CUTTING, _State.CHIP_WRAP):
+            return _PROCESS_TO_STAGE.get(self._process, "roughing")
+        if self._state == _State.TOOL_CHANGE:
+            return "tool_change"
+        return "idle"
 
     def _update_cutting_substage(self, progress: float) -> None:
-        """本轮正常工况只模拟 rough，不在小周期内切换 semi/finish。"""
-        self._cutting_stage = "roughing"
+        """切削子阶段由当前 process 决定，不随 progress 在周期内切换工艺。"""
+        self._cutting_stage = _PROCESS_TO_STAGE.get(self._process, "roughing")
 
     def _on_idle(self) -> None:
         self._spindle_target = 0.0
@@ -284,11 +352,12 @@ def _on_idle(self) -> None:
         self._condition_native_code = ""
         self._wrap_load_increment = 0.0
         if self._state_elapsed >= self._state_duration:
-            # 开始新任务：主轴升速目标转速（粗加工 2000 RPM）
-            self._spindle_target = 2000.0
+            # 开始新任务：主轴升速到当前 process 的目标转速
+            from protoforge.core.cnc_metric_generator import _PROCESS_SPEED_CFG
+            self._spindle_target = _PROCESS_SPEED_CFG.get(self._process, {}).get("target", 2000.0)
             self._program_line = 1
             self._block_idx = 0
-            self._cutting_stage = "roughing"
+            self._cutting_stage = _PROCESS_TO_STAGE.get(self._process, "roughing")
             self._cycles_in_task = 0
             self._cycles_per_task = random.randint(3, 6)
             self._transition(_State.SPINUP, random.uniform(4, 8))

From a4175c8573ab754a38fddf2f04857cd0ec25aca7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E5=BC=A0=E5=B0=91=E6=99=BA?=
 <zhangshaozhi@bailongma-inc.com>
Date: Thu, 11 Jun 2026 06:24:24 +0800
Subject: [PATCH 51/55] fix

---
 protoforge/core/device.py  | 2 ++
 protoforge/core/metrics.py | 3 +++
 2 files changed, 5 insertions(+)

diff --git a/protoforge/core/device.py b/protoforge/core/device.py
index f04414a..62588b6 100644
--- a/protoforge/core/device.py
+++ b/protoforge/core/device.py
@@ -83,6 +83,8 @@ def read_all_points(self) -> list[PointValue]:
         result = []
         now = time.time()
         for name in self._point_values:
+            if name.startswith("_"):
+                continue
             result.append(
                 PointValue(
                     name=name,
diff --git a/protoforge/core/metrics.py b/protoforge/core/metrics.py
index 21e42f7..9264bb7 100644
--- a/protoforge/core/metrics.py
+++ b/protoforge/core/metrics.py
@@ -52,6 +52,9 @@ def collect_from_engine(self, engine: Any) -> None:
                 "device_name": device.config.name,
                 "protocol": device.config.protocol,
             }
+            stage = device._point_values.get("_stage")
+            if stage:
+                labels_base["stage"] = stage
             for point in device.read_all_points():
                 point_config = next(
                     (p for p in device.config.points if p.name == point.name), None

From 97dd55a8a12b76ade1e534c22976d8a4c8f17144 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E5=BC=A0=E5=B0=91=E6=99=BA?=
 <zhangshaozhi@bailongma-inc.com>
Date: Thu, 11 Jun 2026 06:38:18 +0800
Subject: [PATCH 52/55] fix

---
 protoforge/core/simulators.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/protoforge/core/simulators.py b/protoforge/core/simulators.py
index 4c75832..2569017 100644
--- a/protoforge/core/simulators.py
+++ b/protoforge/core/simulators.py
@@ -12,7 +12,7 @@ def _build_registry() -> dict[str, Callable[[], Any]]:
     registry: dict[str, Callable[[], Any]] = {}
     try:
         from protoforge.protocols.mtconnect.lathe_simulator import LatheSimulator
-        registry["mtconnect_lathe"] = LatheSimulator
+        registry["mtconnect_lathe"] = lambda: LatheSimulator(process_mode="process_flow")
     except ImportError:
         pass
     return registry

From e9cde11c41b4dffcf2979a1bdab09854d5925c80 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E5=BC=A0=E5=B0=91=E6=99=BA?=
 <zhangshaozhi@bailongma-inc.com>
Date: Thu, 11 Jun 2026 06:50:57 +0800
Subject: [PATCH 53/55] fix

---
 protoforge/core/engine.py     |  2 +-
 protoforge/core/simulators.py | 14 +++++++++-----
 protoforge/models/device.py   |  1 +
 3 files changed, 11 insertions(+), 6 deletions(-)

diff --git a/protoforge/core/engine.py b/protoforge/core/engine.py
index acd786a..d6117a6 100644
--- a/protoforge/core/engine.py
+++ b/protoforge/core/engine.py
@@ -62,7 +62,7 @@ async def create_device(self, config: DeviceConfig) -> DeviceInfo:
         # 注册故障注入钩子
         instance.register_post_tick_hook(fault_injector.apply)
         # 注册设备专用仿真器（如车床状态机），根据 template_id 自动匹配
-        simulator = get_device_simulator(config.template_id)
+        simulator = get_device_simulator(config.template_id, config.simulator_params or {})
         if simulator is not None:
             instance.register_post_tick_hook(simulator)
 
diff --git a/protoforge/core/simulators.py b/protoforge/core/simulators.py
index 2569017..809188c 100644
--- a/protoforge/core/simulators.py
+++ b/protoforge/core/simulators.py
@@ -8,11 +8,11 @@
 from typing import Any, Callable, Optional
 
 
-def _build_registry() -> dict[str, Callable[[], Any]]:
-    registry: dict[str, Callable[[], Any]] = {}
+def _build_registry() -> dict[str, Callable[..., Any]]:
+    registry: dict[str, Callable[..., Any]] = {}
     try:
         from protoforge.protocols.mtconnect.lathe_simulator import LatheSimulator
-        registry["mtconnect_lathe"] = lambda: LatheSimulator(process_mode="process_flow")
+        registry["mtconnect_lathe"] = LatheSimulator
     except ImportError:
         pass
     return registry
@@ -21,13 +21,17 @@ def _build_registry() -> dict[str, Callable[[], Any]]:
 _REGISTRY = _build_registry()
 
 
-def get_device_simulator(template_id: Optional[str]) -> Optional[Any]:
+def get_device_simulator(
+    template_id: Optional[str],
+    simulator_params: dict[str, Any] | None = None,
+) -> Optional[Any]:
     """
     根据 template_id 返回一个新的仿真器实例，未匹配则返回 None。
+    simulator_params 会作为关键字参数透传给仿真器构造函数。
     """
     if template_id is None:
         return None
     factory = _REGISTRY.get(template_id)
     if factory is None:
         return None
-    return factory()
+    return factory(**(simulator_params or {}))
diff --git a/protoforge/models/device.py b/protoforge/models/device.py
index cbe35e4..44dc2db 100644
--- a/protoforge/models/device.py
+++ b/protoforge/models/device.py
@@ -48,6 +48,7 @@ class DeviceConfig(BaseModel):
     template_id: Optional[str] = None
     points: list[PointConfig] = Field(default_factory=list)
     protocol_config: dict[str, Any] = Field(default_factory=dict)
+    simulator_params: dict[str, Any] = Field(default_factory=dict)
 
 
 class PointValue(BaseModel):

From a26a04c9597f5fc2359aad45f443bdd7bd030be9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E5=BC=A0=E5=B0=91=E6=99=BA?=
 <zhangshaozhi@bailongma-inc.com>
Date: Thu, 11 Jun 2026 06:55:01 +0800
Subject: [PATCH 54/55] fix

---
 protoforge/api/v1/router.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/protoforge/api/v1/router.py b/protoforge/api/v1/router.py
index 7a6c050..604f521 100644
--- a/protoforge/api/v1/router.py
+++ b/protoforge/api/v1/router.py
@@ -431,6 +431,7 @@ async def search_templates(q: str = "", protocol: Optional[str] = None, tag: Opt
         q_lower = q.lower()
         templates = [t for t in templates if
                      q_lower in t.name.lower() or
+                     q_lower in t.id.lower() or
                      q_lower in (t.description or "").lower() or
                      any(q_lower in tag_item.lower() for tag_item in (t.tags or []))]
     if tag:

From f0aaaeea4a85be694186677f5053808ae565efe8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E5=BC=A0=E5=B0=91=E6=99=BA?=
 <zhangshaozhi@bailongma-inc.com>
Date: Thu, 11 Jun 2026 20:05:34 +0800
Subject: [PATCH 55/55] fox

---
 protoforge/core/simulators.py         | 14 +++++-
 protoforge/core/template.py           | 63 +++++++++++++++++++++++++++
 tests/test_lathe_station_templates.py | 42 ++++++++++++++++++
 3 files changed, 118 insertions(+), 1 deletion(-)
 create mode 100644 tests/test_lathe_station_templates.py

diff --git a/protoforge/core/simulators.py b/protoforge/core/simulators.py
index 809188c..047d7fc 100644
--- a/protoforge/core/simulators.py
+++ b/protoforge/core/simulators.py
@@ -13,6 +13,18 @@ def _build_registry() -> dict[str, Callable[..., Any]]:
     try:
         from protoforge.protocols.mtconnect.lathe_simulator import LatheSimulator
         registry["mtconnect_lathe"] = LatheSimulator
+        registry["mtconnect_lathe_rough"] = lambda **_: LatheSimulator(
+            process_mode="single_process",
+            process="rough",
+        )
+        registry["mtconnect_lathe_semi_finish"] = lambda **_: LatheSimulator(
+            process_mode="single_process",
+            process="semi_finish",
+        )
+        registry["mtconnect_lathe_finish"] = lambda **_: LatheSimulator(
+            process_mode="single_process",
+            process="finish",
+        )
     except ImportError:
         pass
     return registry
@@ -27,7 +39,7 @@ def get_device_simulator(
 ) -> Optional[Any]:
     """
     根据 template_id 返回一个新的仿真器实例，未匹配则返回 None。
-    simulator_params 会作为关键字参数透传给仿真器构造函数。
+    simulator_params 默认作为关键字参数透传；固定工位模板可在注册表中选择忽略参数。
     """
     if template_id is None:
         return None
diff --git a/protoforge/core/template.py b/protoforge/core/template.py
index e7ff122..6a7659c 100644
--- a/protoforge/core/template.py
+++ b/protoforge/core/template.py
@@ -31,6 +31,7 @@ def load_builtin_templates(self) -> None:
         self._load_from_dir(_TEMPLATES_DIR / "opcda")
         self._load_from_dir(_TEMPLATES_DIR / "fanuc")
         self._load_from_dir(_TEMPLATES_DIR / "mtconnect")
+        self._add_lathe_station_templates()
         self._load_from_dir(_TEMPLATES_DIR / "toledo")
         self._loaded = True
         logger.info("Loaded %d built-in templates", len(self._templates))
@@ -98,3 +99,65 @@ def _load_from_dir(self, dir_path: Path) -> None:
                 self._templates[template.id] = template
             except Exception as e:
                 logger.warning("Failed to load template %s: %s", json_file, e)
+
+    def _add_lathe_station_templates(self) -> None:
+        """
+        MTConnect 车床按工位拆分模板。
+
+        原 mtconnect_lathe 保留用于兼容旧设备；三个 station 模板由同一组
+        MTConnect 测点派生，但会在 simulator registry 中绑定到不同工艺。
+        """
+        base = self._templates.get("mtconnect_lathe")
+        if base is None:
+            return
+
+        station_defs = [
+            {
+                "id": "mtconnect_lathe_rough",
+                "name": "MTConnect车床 粗加工工位",
+                "uuid": "mtc-lathe-rough-001",
+                "process_tag": "粗加工",
+                "description": (
+                    "MTConnect标准车床粗加工工位，固定运行粗车工艺；"
+                    "主轴约2000RPM，负载和电流较高，适合单独观察粗加工数据。"
+                ),
+            },
+            {
+                "id": "mtconnect_lathe_semi_finish",
+                "name": "MTConnect车床 半精加工工位",
+                "uuid": "mtc-lathe-semi-finish-001",
+                "process_tag": "半精加工",
+                "description": (
+                    "MTConnect标准车床半精加工工位，固定运行半精车工艺；"
+                    "主轴约3000RPM，负载、电流和粗糙度介于粗加工与精加工之间。"
+                ),
+            },
+            {
+                "id": "mtconnect_lathe_finish",
+                "name": "MTConnect车床 精加工工位",
+                "uuid": "mtc-lathe-finish-001",
+                "process_tag": "精加工",
+                "description": (
+                    "MTConnect标准车床精加工工位，固定运行精车工艺；"
+                    "主轴约4000RPM，负载较低，转速和表面质量更稳定。"
+                ),
+            },
+        ]
+
+        for spec in station_defs:
+            if spec["id"] in self._templates:
+                continue
+            template = base.model_copy(deep=True)
+            template.id = spec["id"]
+            template.name = spec["name"]
+            template.description = spec["description"]
+            template.protocol_config = {
+                **template.protocol_config,
+                "device_uuid": spec["uuid"],
+            }
+            template.tags = [
+                tag for tag in template.tags
+                if tag not in {"粗加工", "半精加工", "精加工"}
+            ]
+            template.tags.extend(["工位", spec["process_tag"]])
+            self._templates[template.id] = template
diff --git a/tests/test_lathe_station_templates.py b/tests/test_lathe_station_templates.py
new file mode 100644
index 0000000..c1e37ae
--- /dev/null
+++ b/tests/test_lathe_station_templates.py
@@ -0,0 +1,42 @@
+from protoforge.core.simulators import get_device_simulator
+from protoforge.core.template import TemplateManager
+
+
+def test_mtconnect_lathe_station_templates_are_available():
+    tm = TemplateManager()
+    tm.load_builtin_templates()
+
+    rough = tm.get_template("mtconnect_lathe_rough")
+    semi = tm.get_template("mtconnect_lathe_semi_finish")
+    finish = tm.get_template("mtconnect_lathe_finish")
+
+    assert rough.name == "MTConnect车床 粗加工工位"
+    assert semi.name == "MTConnect车床 半精加工工位"
+    assert finish.name == "MTConnect车床 精加工工位"
+
+    assert rough.protocol_config["device_uuid"] == "mtc-lathe-rough-001"
+    assert semi.protocol_config["device_uuid"] == "mtc-lathe-semi-finish-001"
+    assert finish.protocol_config["device_uuid"] == "mtc-lathe-finish-001"
+    assert len(rough.points) == len(semi.points) == len(finish.points)
+    assert "工位" in rough.tags
+    assert "粗加工" in rough.tags
+    assert "半精加工" in semi.tags
+    assert "精加工" in finish.tags
+
+
+def test_lathe_station_simulators_force_single_process():
+    cases = [
+        ("mtconnect_lathe_rough", "rough"),
+        ("mtconnect_lathe_semi_finish", "semi_finish"),
+        ("mtconnect_lathe_finish", "finish"),
+    ]
+
+    for template_id, process in cases:
+        simulator = get_device_simulator(
+            template_id,
+            {"process_mode": "process_flow", "process": "rough"},
+        )
+
+        assert simulator is not None
+        assert simulator._process_mode == "single_process"
+        assert simulator._process == process