From 4de615a406c4acbe08030c6c97915b508c0fcb56 Mon Sep 17 00:00:00 2001 From: Jeff Heller Date: Wed, 27 May 2026 08:54:03 -0600 Subject: [PATCH 1/2] fix(ws): remove periodic status broadcast; align push pattern with BT The /snapshot WebSocket was the only protocol pushing periodic state snapshots. BT notifies only weight + on-event (power_off, buttons); USB binary follows the same shape. WiFi did a periodic status frame every 5 s when events_enabled, which: - allocated ~310 B x N clients every 5 s for state the on-device web apps never consumed (verified: zero references to battery_percent, timer_seconds, charging, low_power, etc. in web_apps/) - was the dominant heap-pressure source in the 8 h soak that motivated PR #61 - was inconsistent with BT, surprising clients that interoperate across both transports After this change, clients request status on demand via {"command":"status"} -- same model as BT's 0x22 voltage request, same per-client reply path that already exists (sendWebsocketStatus, kept intact). The events_enabled flag now gates only button events and power_off broadcasts -- the cross-protocol event types that BT also notifies, just without an opt-in over there. Weight, button, and power_off push behavior on WiFi is unchanged. Cleanup follows: - sendWebsocketStatusAll() removed (no callers; was the periodic broadcast helper) - t_lastWebsocketStatusUpdate global removed (no periodic tick to track) - WEBSOCKET_STATUS_NOTIFY_INTERVAL_MS constant removed - WS event handler no longer initialises/resets the period timer - README updated: status frame is on-request only; the "events on" paragraph no longer mentions periodic status; cross-references the BT pattern explicitly Verified on hardware: 20 s window, events on, rate 10k -> 201 weight frames (10.05 Hz) + 0 status frames. On-request {"command":"status"} still answers. Co-Authored-By: Claude Opus 4.7 (1M context) --- README.md | 12 ++++++------ include/parameter.h | 2 -- include/websocket.h | 28 +++++----------------------- src/hds.ino | 7 ------- 4 files changed, 11 insertions(+), 38 deletions(-) diff --git a/README.md b/README.md index c43d8d5..76d848a 100644 --- a/README.md +++ b/README.md @@ -103,9 +103,9 @@ WiFi snapshots are intentionally kept small, especially at 10 Hz: ``` Battery, charging, timer, and power/display state are sent in typed `status` -frames instead of every snapshot. `status`, `battery`, and `info` return a -status frame immediately. After `events on`, the firmware also sends a periodic -`type: "status"` frame every 5 seconds. +frames on demand only. Send `status`, `battery`, or `info` to get one. The +firmware does not push status periodically (mirrors the BT side, which has no +periodic state push either — clients request battery via `0x22`, etc.). WiFi clients can send these text commands over the same WebSocket: @@ -187,9 +187,9 @@ Status frame shape: ``` For backwards compatibility, WiFi only sends weight snapshots by default. A -client must send `events on` before periodic status, local scale button presses, -or power-off notifications are emitted. The event stream resets to off when the -WebSocket disconnects. +client must send `events on` before local scale button presses or power-off +notifications are emitted (status frames remain on-request regardless of the +`events` flag). The event stream resets to off when the WebSocket disconnects. Button event fields: diff --git a/include/parameter.h b/include/parameter.h index 4f12594..e1c4de9 100644 --- a/include/parameter.h +++ b/include/parameter.h @@ -16,14 +16,12 @@ const unsigned long WEBSOCKET_2HZ_NOTIFY_INTERVAL_MS = 500; const unsigned long WEBSOCKET_5HZ_NOTIFY_INTERVAL_MS = 200; const unsigned long WEBSOCKET_10HZ_NOTIFY_INTERVAL_MS = 100; const unsigned long WEBSOCKET_DEFAULT_NOTIFY_INTERVAL_MS = WEBSOCKET_2HZ_NOTIFY_INTERVAL_MS; -const unsigned long WEBSOCKET_STATUS_NOTIFY_INTERVAL_MS = 5000; // volatile: written from the AsyncTCP task (WS event callback) and read // from the main loop. Without volatile, the compiler may keep these cached // in a register across the loop's WS gate check on the other core. volatile unsigned long weightWebsocketNotifyInterval = WEBSOCKET_DEFAULT_NOTIFY_INTERVAL_MS; volatile bool b_websocketEventsEnabled = false; volatile bool b_websocketLowPowerEnabled = false; -volatile unsigned long t_lastWebsocketStatusUpdate = 0; // Websocket pending-command mask. Set on the AsyncTCP task by the WS event // callback; drained on the main loop. Defers hardware-touching ops (u8g2, diff --git a/include/websocket.h b/include/websocket.h index 1a9d907..8aadae0 100644 --- a/include/websocket.h +++ b/include/websocket.h @@ -247,27 +247,11 @@ void sendWebsocketStatus(AsyncWebSocketClient *client, const char *status) { // race client disconnects on the AsyncTCP task). With // setCloseClientOnQueueFull(false), a backed-up client drops its own frame // without blocking the others. -void sendWebsocketStatusAll(const char *status) { - if (!b_wifiEnabled || !b_websocketEventsEnabled || websocket.count() == 0) return; - if (!wsBroadcastHeapOk()) return; - websocket.printfAll("{\"type\":\"status\",\"status\":\"%s\",\"protocol_version\":1,\"firmware_version\":\"%s\",\"grams\":%.2f,\"ms\":%lu,\"battery_percent\":%d,\"battery_voltage\":%.2f,\"charging\":%s,\"timer_running\":%s,\"timer_seconds\":%lu,\"display_on\":%s,\"low_power\":%s,\"soft_sleep\":%s,\"events_enabled\":%s,\"rate_hz\":%lu,\"interval_ms\":%lu}", - status, - FIRMWARE_VER, - f_displayedValue, - millis(), - websocketBatteryPercent(), - f_batteryVoltage, - websocketIsCharging() ? "true" : "false", - stopWatch.isRunning() ? "true" : "false", - (unsigned long)stopWatch.elapsed(), - b_u8g2Sleep ? "false" : "true", - b_websocketLowPowerEnabled ? "true" : "false", - b_softSleep ? "true" : "false", - b_websocketEventsEnabled ? "true" : "false", - websocketRateForInterval(weightWebsocketNotifyInterval), - weightWebsocketNotifyInterval); -} - +// +// Note: status is no longer broadcast periodically -- clients send +// {"command":"status"} on demand, matching the BT pattern (BT has no periodic +// state push either; clients request battery via 0x22 etc.). The lone periodic +// WS push is sendWebsocketWeightAll below. void sendWebsocketWeightAll(float grams, unsigned long ms) { if (!b_wifiEnabled || websocket.count() == 0) return; if (!wsBroadcastHeapOk()) return; @@ -339,7 +323,6 @@ bool handleWebsocketControlCommand(AsyncWebSocketClient *client, String command, if (command == "events") { if (action == "on" || action == "enable" || action == "enabled") { b_websocketEventsEnabled = true; - t_lastWebsocketStatusUpdate = millis(); sendWebsocketStatus(client, "ok"); return true; } @@ -611,7 +594,6 @@ void setupWebsocketEvents() { if (server->count() == 0) { weightWebsocketNotifyInterval = WEBSOCKET_DEFAULT_NOTIFY_INTERVAL_MS; b_websocketEventsEnabled = false; - t_lastWebsocketStatusUpdate = 0; } } else if (type == WS_EVT_ERROR) { // arg = reason code (uint16_t*), data/len = human-readable reason. Log diff --git a/src/hds.ino b/src/hds.ino index c759711..4e080be 100644 --- a/src/hds.ino +++ b/src/hds.ino @@ -1414,13 +1414,6 @@ void loop() { } } - // Periodic WS status frame (5 s) -- not a weight frame, keeps its own gate. - if (b_wifiEnabled && b_websocketEventsEnabled && - millis() - t_lastWebsocketStatusUpdate >= WEBSOCKET_STATUS_NOTIFY_INTERVAL_MS) { - sendWebsocketStatusAll("periodic"); - t_lastWebsocketStatusUpdate = millis(); - } - // ADS debug BLE stream -- separate debug channel, keeps its own gate. if (b_ble_enabled && deviceConnected && bleDebugMode != DEBUG_OFF) { // SINGLE fires once; CONTINUOUS rate-limited to ~10 Hz From 7886e30ade7ec5b65043ea841541177f0b334f48 Mon Sep 17 00:00:00 2001 From: Jeff Heller Date: Wed, 27 May 2026 09:06:56 -0600 Subject: [PATCH 2/2] fix: address review findings on periodic-status removal Three findings from the code review on cb569d3: 1. include/websocket.h:166 heap-gate doc-comment claimed "Skipping a frame is invisible (the next weight frame is <=500 ms away, status <=5 s); crashing is not." The "status <=5 s" recovery guarantee no longer holds after this PR removes the only periodic status broadcaster. Rewrote to describe what's actually still gated by wsBroadcastHeapOk (weight + button + power_off). 2. include/websocket.h:243-247 had an addendum paragraph framing the surviving printfAll comment as "Note: status is no longer broadcast periodically...". The historical framing reads as documentation for a function that no longer exists; dropping it leaves the printfAll technical commentary applying cleanly to sendWebsocketWeightAll (which is what it now sits above). 3. sendWebsocketStatus (include/websocket.h:224-225) reads stopWatch directly from the AsyncTCP task. CLAUDE.md threading model #1 footgun: "stopWatch.* -- No -- multi-field (running flag + start ts + accumulator) and also mutated from loop(), BLE and USB; a status- frame read can tear a write across tasks." The bug is pre-existing on main (not introduced by this PR), but CLAUDE.md's "Fixing bugs you find along the way" applies. Mirroring the snapshot pattern used by PR #57: new g_timerRunning/g_timerElapsed volatiles in parameter.h, refreshed once per main-loop pass in src/hds.ino's WiFi-housekeeping block, read by sendWebsocketStatus. (PR #57 adds the same snapshot independently; merge order between #57 and #62 will produce a trivial same-lines conflict that git auto-resolves.) Verified on hardware: flash + 12s window with events on + rate 10k -> 121 weight frames at 10 Hz, 0 periodic status frames; on-request {"command":"status"} returns the full 16-field frame with timer_running/timer_seconds populated from the snapshot. Co-Authored-By: Claude Opus 4.7 (1M context) --- include/parameter.h | 11 +++++++++++ include/websocket.h | 26 +++++++++++--------------- src/hds.ino | 7 +++++++ 3 files changed, 29 insertions(+), 15 deletions(-) diff --git a/include/parameter.h b/include/parameter.h index e1c4de9..c1b4c0d 100644 --- a/include/parameter.h +++ b/include/parameter.h @@ -23,6 +23,17 @@ volatile unsigned long weightWebsocketNotifyInterval = WEBSOCKET_DEFAULT_NOTIFY_ volatile bool b_websocketEventsEnabled = false; volatile bool b_websocketLowPowerEnabled = false; +// Snapshot of the stopWatch state, refreshed once per main-loop iteration. The +// WS status frame is built on the AsyncTCP task (response to {"command":"status"} +// from handleWebsocketControlCommand); stopWatch is a multi-field object (running +// flag + start ts + accumulator) also mutated from BLE/USB and the main loop, so +// reading it directly off the AsyncTCP task can tear (CLAUDE.md threading model +// "stopWatch.* -- No -- multi-field..."). sendWebsocketStatus reads these single +// aligned volatiles instead. g_timerElapsed carries stopWatch.elapsed() in its +// configured resolution (SECONDS) -- the WS "timer_seconds" field. +volatile bool g_timerRunning = false; +volatile unsigned long g_timerElapsed = 0; + // Websocket pending-command mask. Set on the AsyncTCP task by the WS event // callback; drained on the main loop. Defers hardware-touching ops (u8g2, // stopWatch, power-rail GPIOs) so they never race the main loop. diff --git a/include/websocket.h b/include/websocket.h index 8aadae0..7a1db33 100644 --- a/include/websocket.h +++ b/include/websocket.h @@ -162,17 +162,18 @@ void processWsPendingCmds() { // that allocation then throws std::bad_alloc -> std::terminate() -> abort() // (Arduino-ESP32 builds with -fno-exceptions, so the throw can't be caught) -> // reboot. That OOM-reboot is the "weight stops being collected under sustained -// multi-client load" failure. Skipping a frame is invisible (the next weight -// frame is <=500 ms away, status <=5 s); crashing is not. The floor sits above -// the 15 KB heap watchdog (wifi_setup.cpp) so broadcasts back off well before a -// reboot is even considered. Every broadcast helper below runs on the main loop, -// so the skip counter needs no synchronization. +// multi-client load" failure. Skipping a weight frame is invisible (the next is +// <=500 ms away); skipping a button or power-off broadcast is rarer but also +// tolerable; crashing is not. The floor sits above the 15 KB heap watchdog +// (wifi_setup.cpp) so broadcasts back off well before a reboot is even +// considered. Every broadcast helper below runs on the main loop, so the skip +// counter needs no synchronization. // // The floor is 32 KB (vs. the original 25 KB) to leave headroom for lwIP TX // buffers during the post-broadcast drain. Under 4-client load the 25 KB floor -// let free heap dip to ~22 KB after a status burst, where lwIP silently failed -// to allocate pbufs and WiFi packets dropped while the state machine still -// reported CONNECTED (no disc/rec increments). Raising the floor by ~7 KB +// let free heap dip to ~22 KB after a broadcast burst, where lwIP silently +// failed to allocate pbufs and WiFi packets dropped while the state machine +// still reported CONNECTED (no disc/rec increments). Raising the floor by ~7 KB // keeps the post-burst trough above the lwIP starvation knee. Measured: 2h+ // soak at 4 clients, 0% ping loss, 0 reconnects, ~300 averted OOMs. static const uint32_t WS_BROADCAST_HEAP_FLOOR = 32000; @@ -229,8 +230,8 @@ void sendWebsocketStatus(AsyncWebSocketClient *client, const char *status) { websocketBatteryPercent(), f_batteryVoltage, websocketIsCharging() ? "true" : "false", - stopWatch.isRunning() ? "true" : "false", - (unsigned long)stopWatch.elapsed(), + g_timerRunning ? "true" : "false", + g_timerElapsed, b_u8g2Sleep ? "false" : "true", b_websocketLowPowerEnabled ? "true" : "false", b_softSleep ? "true" : "false", @@ -247,11 +248,6 @@ void sendWebsocketStatus(AsyncWebSocketClient *client, const char *status) { // race client disconnects on the AsyncTCP task). With // setCloseClientOnQueueFull(false), a backed-up client drops its own frame // without blocking the others. -// -// Note: status is no longer broadcast periodically -- clients send -// {"command":"status"} on demand, matching the BT pattern (BT has no periodic -// state push either; clients request battery via 0x22 etc.). The lone periodic -// WS push is sendWebsocketWeightAll below. void sendWebsocketWeightAll(float grams, unsigned long ms) { if (!b_wifiEnabled || websocket.count() == 0) return; if (!wsBroadcastHeapOk()) return; diff --git a/src/hds.ino b/src/hds.ino index 4e080be..8fc43cd 100644 --- a/src/hds.ino +++ b/src/hds.ino @@ -1381,6 +1381,13 @@ void loop() { ElegantOTA.loop(); } + // Snapshot stopWatch state for cross-task-safe reads in + // sendWebsocketStatus. stopWatch is multi-field and mutated from BLE/USB + // and this loop; the WS status frame is built on the AsyncTCP task and + // can tear if it reads stopWatch directly (CLAUDE.md threading model). + g_timerRunning = stopWatch.isRunning(); + g_timerElapsed = (unsigned long)stopWatch.elapsed(); + // Unified weight-output tick: ONE 100 ms grid timer drives every active // interface, each at its own rate (sends every NotifyInterval/base // ticks) -- USB-text 1 Hz, USB-binary/WS at their NotifyInterval, BLE