From 004d308ea96f743be7f93506f9329e83720c267c Mon Sep 17 00:00:00 2001 From: agessaman Date: Sun, 21 Jun 2026 11:57:05 -0700 Subject: [PATCH 1/6] feat(ota): implement pull-based OTA from manifest for observer builds Added functionality to support pull-based OTA updates by fetching firmware from a manifest. The new `otaFromManifest` method allows the system to check for available updates and flash the firmware if necessary. This enhancement improves the update process for observer builds using the MQTT bridge, ensuring a more seamless firmware management experience. --- build.sh | 6 +- src/MeshCore.h | 4 + src/helpers/CommonCLI.cpp | 20 ++ src/helpers/ESP32Board.cpp | 194 ++++++++++++++++++ src/helpers/ESP32Board.h | 1 + variants/heltec_t190/platformio.ini | 2 + variants/heltec_v3/platformio.ini | 4 + variants/heltec_v4/platformio.ini | 4 + variants/lilygo_t3s3/platformio.ini | 2 + variants/lilygo_tbeam_1w/platformio.ini | 2 + variants/lilygo_tbeam_SX1262/platformio.ini | 2 + variants/lilygo_tbeam_SX1276/platformio.ini | 2 + .../platformio.ini | 2 + variants/lilygo_tlora_v2_1/platformio.ini | 2 + variants/rak3112/platformio.ini | 2 + variants/station_g2/platformio.ini | 2 + variants/xiao_s3_wio/platformio.ini | 2 + 17 files changed, 251 insertions(+), 2 deletions(-) diff --git a/build.sh b/build.sh index bff89b1647..edcabbf0d4 100755 --- a/build.sh +++ b/build.sh @@ -151,8 +151,10 @@ build_firmware() { esac EMBEDDED_VERSION_STRING="${FIRMWARE_VERSION}${VARIANT_TAG}-${COMMIT_HASH}" - # add firmware version info to end of existing platformio build flags in environment vars - export PLATFORMIO_BUILD_FLAGS="${PLATFORMIO_BUILD_FLAGS} -DFIRMWARE_BUILD_DATE='\"${FIRMWARE_BUILD_DATE}\"' -DFIRMWARE_VERSION='\"${EMBEDDED_VERSION_STRING}\"'" + # add firmware version info to end of existing platformio build flags in environment vars. + # OTA_VARIANT is the env name ($1) — it is exactly the asset-filename prefix used above, so the + # observer pull-OTA can match its own build in the web-flasher manifest (config.json). + export PLATFORMIO_BUILD_FLAGS="${PLATFORMIO_BUILD_FLAGS} -DFIRMWARE_BUILD_DATE='\"${FIRMWARE_BUILD_DATE}\"' -DFIRMWARE_VERSION='\"${EMBEDDED_VERSION_STRING}\"' -DOTA_VARIANT='\"$1\"'" # disable debug flags if requested disable_debug_flags diff --git a/src/MeshCore.h b/src/MeshCore.h index d91c4ea12f..6a17a4b68b 100644 --- a/src/MeshCore.h +++ b/src/MeshCore.h @@ -64,6 +64,10 @@ class MainBoard { virtual uint8_t getStartupReason() const = 0; virtual bool getBootloaderVersion(char* version, size_t max_len) { return false; } virtual bool startOTAUpdate(const char* id, char reply[]) { return false; } // not supported + // Pull-based OTA: fetch the firmware build for this variant from a baked-in manifest and flash it. + // current_ver is the running firmware version string (used to skip if already up to date); when + // dry_run is true the build is only reported, not flashed. Observer (ESP32+WiFi) builds only. + virtual bool otaFromManifest(const char* current_ver, bool dry_run, char reply[]) { return false; } // Power management interface (boards with power management override these) virtual bool isExternalPowered() { return false; } diff --git a/src/helpers/CommonCLI.cpp b/src/helpers/CommonCLI.cpp index e17adf71a7..6034507f9b 100644 --- a/src/helpers/CommonCLI.cpp +++ b/src/helpers/CommonCLI.cpp @@ -793,9 +793,29 @@ void CommonCLI::handleCommand(uint32_t sender_timestamp, char* command, char* re strcpy(reply, "ERR: unsupported on this platform"); #endif } else if (memcmp(command, "start ota", 9) == 0) { +#if defined(WITH_MQTT_BRIDGE) && defined(OTA_MANIFEST_URL) + // Observer pull-OTA: fetch this variant's build from the baked-in manifest. + // "start ota check" reports the available build without flashing. + const char* arg = command + 9; + while (*arg == ' ') arg++; + bool dry = (memcmp(arg, "check", 5) == 0); + if (WiFi.status() != WL_CONNECTED) { + strcpy(reply, "ERR: WiFi not connected"); + } else { + // Free the MQTT bridge (TLS contexts + task) so the manifest parse and + // OTA download have heap headroom; the WiFi STA link survives end(). + if (!dry) _callbacks->setBridgeState(false); + bool ok = _board->otaFromManifest(_callbacks->getFirmwareVer(), dry, reply); + // On success the board reboots and never returns here; on any abort + // (already up to date, partition change, download error) bring the + // bridge back up so the node resumes uplinking. + if (!ok && !dry) _callbacks->setBridgeState(true); + } +#else if (!_board->startOTAUpdate(_prefs->node_name, reply)) { strcpy(reply, "Error"); } +#endif } else if (memcmp(command, "clock", 5) == 0) { uint32_t now = getRTCClock()->getCurrentTime(); DateTime dt = DateTime(now); diff --git a/src/helpers/ESP32Board.cpp b/src/helpers/ESP32Board.cpp index e0ca1d0eeb..3b4f953b29 100644 --- a/src/helpers/ESP32Board.cpp +++ b/src/helpers/ESP32Board.cpp @@ -44,4 +44,198 @@ bool ESP32Board::startOTAUpdate(const char* id, char reply[]) { } #endif +// --------------------------------------------------------------------------- +// Manifest-driven pull OTA (observer / MQTT-bridge builds only) +// +// The observer already holds a live WiFi station connection (for the MQTT +// bridge) and embeds a root-CA bundle, so it can fetch its own firmware. The +// caller (CommonCLI) stops the MQTT bridge first to free heap/TLS, then calls +// this. We read the web-flasher manifest (config.json), find the `flash-update` +// (app-only) build for our own variant, refuse partition-change releases (OTA +// can't rewrite the partition table), skip if already up to date, then stream +// the .bin straight into the inactive OTA slot via HTTPUpdate. +// --------------------------------------------------------------------------- +#if defined(WITH_MQTT_BRIDGE) +#include +#include +#include +#include +#include + +// Embedded CA bundle (produced by board_build.embed_files). Weak so non-bundle +// builds still link; we check for presence at runtime. +extern const uint8_t rootca_crt_bundle_start[] asm("_binary_src_certs_x509_crt_bundle_bin_start") __attribute__((weak)); +extern const uint8_t rootca_crt_bundle_end[] asm("_binary_src_certs_x509_crt_bundle_bin_end") __attribute__((weak)); + +// Extract the trailing build hash. For a filename we first drop a ".bin" +// suffix, then take the token after the last '-'. Works for both the manifest +// asset name ("...-v1.16.0-8b084d5.bin" -> "8b084d5") and the embedded +// FIRMWARE_VERSION ("v1.16.0-observer-8b084d5" -> "8b084d5"). +static void ota_extractHash(const char* s, char* out, size_t out_sz) { + if (!s) { if (out_sz) out[0] = 0; return; } + size_t len = strlen(s); + if (len > 4 && strcmp(s + len - 4, ".bin") == 0) len -= 4; + size_t i = len; + while (i > 0 && s[i - 1] != '-') i--; + size_t n = len - i; + if (n >= out_sz) n = out_sz - 1; + memcpy(out, s + i, n); + out[n] = 0; +} + +bool ESP32Board::otaFromManifest(const char* current_ver, bool dry_run, char reply[]) { +#if !defined(OTA_MANIFEST_URL) || !defined(OTA_VARIANT) + strcpy(reply, "ERR: OTA not configured (build via build.sh)"); + return false; +#else + if (WiFi.status() != WL_CONNECTED) { + strcpy(reply, "ERR: WiFi not connected"); + return false; + } + + size_t bundle_len = 0; + if (rootca_crt_bundle_start != nullptr && rootca_crt_bundle_end != nullptr && + rootca_crt_bundle_end > rootca_crt_bundle_start) { + bundle_len = (size_t)(rootca_crt_bundle_end - rootca_crt_bundle_start); + } + if (bundle_len == 0) { + strcpy(reply, "ERR: no embedded cert bundle"); + return false; + } + + // --- Fetch + filter-parse the manifest ----------------------------------- + WiFiClientSecure mclient; +#if ESP_ARDUINO_VERSION_MAJOR >= 3 + mclient.setCACertBundle(rootca_crt_bundle_start, bundle_len); +#else + mclient.setCACertBundle(rootca_crt_bundle_start); +#endif + mclient.setTimeout(15000); + + HTTPClient http; + if (!http.begin(mclient, OTA_MANIFEST_URL)) { + strcpy(reply, "ERR: manifest connect failed"); + return false; + } + int code = http.GET(); + if (code != HTTP_CODE_OK) { + snprintf(reply, 160, "ERR: manifest HTTP %d", code); + http.end(); + return false; + } + + // Filter keeps only staticPath + each firmware entry's notice/version, so the + // parsed document stays a fraction of the full manifest. (The dynamic version + // key forces keeping its whole subtree, incl. release notes.) ArduinoJson v7 + // JsonDocument allocates elastically, so this only grows to the kept subset. + JsonDocument filter; + filter["staticPath"] = true; + filter["device"][0]["firmware"][0]["notice"] = true; + filter["device"][0]["firmware"][0]["version"] = true; + + JsonDocument doc; + DeserializationError err = + deserializeJson(doc, http.getStream(), DeserializationOption::Filter(filter)); + http.end(); + if (err) { + snprintf(reply, 160, "ERR: manifest parse (%s)", err.c_str()); + return false; + } + + // Copy out of the document up front: doc gets cleared before these are used. + char base_url[128] = {0}; + strncpy(base_url, doc["staticPath"] | "", sizeof(base_url) - 1); + if (!base_url[0]) { + strcpy(reply, "ERR: manifest missing staticPath"); + return false; + } + + // --- Locate the flash-update build for our variant ----------------------- + const char* variant = OTA_VARIANT; + size_t vlen = strlen(variant); + char target_name[128] = {0}; + bool partition_change = false; + bool found = false; + + for (JsonObject dev : doc["device"].as()) { + for (JsonObject fw : dev["firmware"].as()) { + const char* notice = fw["notice"].is() ? fw["notice"].as() : nullptr; + for (JsonPair vp : fw["version"].as()) { + for (JsonObject file : vp.value()["files"].as()) { + const char* type = file["type"] | ""; + const char* name = file["name"] | ""; + if (strcmp(type, "flash-update") != 0) continue; + if (strncmp(name, variant, vlen) != 0 || name[vlen] != '-') continue; + strncpy(target_name, name, sizeof(target_name) - 1); + partition_change = (notice != nullptr && strcmp(notice, "partition-change") == 0); + found = true; + break; + } + if (found) break; + } + if (found) break; + } + if (found) break; + } + doc.clear(); + + if (!found) { + snprintf(reply, 160, "ERR: no build for %s in manifest", variant); + return false; + } + + char avail_hash[24], cur_hash[24]; + ota_extractHash(target_name, avail_hash, sizeof(avail_hash)); + ota_extractHash(current_ver, cur_hash, sizeof(cur_hash)); + // Compare by shared prefix: git abbreviates the same commit to 7 chars on a + // shallow CI clone but 8 locally, so an exact match would miss equal builds. + size_t la = strlen(avail_hash), lc = strlen(cur_hash); + size_t m = (la < lc) ? la : lc; + bool up_to_date = (m >= 7 && strncmp(avail_hash, cur_hash, m) == 0); + + if (dry_run) { + snprintf(reply, 160, "%s: %s -> %s%s", up_to_date ? "up to date" : "update available", + cur_hash, avail_hash, partition_change ? " [partition change: cable flash]" : ""); + return true; + } + if (partition_change) { + snprintf(reply, 160, "ERR: %s needs cable flash (partition change)", avail_hash); + return false; + } + if (up_to_date) { + snprintf(reply, 160, "OK: already up to date (%s)", cur_hash); + return false; + } + + // --- Stream the .bin into the inactive OTA slot -------------------------- + char url[256]; + snprintf(url, sizeof(url), "%s/%s", base_url, target_name); + + inhibit_sleep = true; // keep awake through the flash + + WiFiClientSecure uclient; +#if ESP_ARDUINO_VERSION_MAJOR >= 3 + uclient.setCACertBundle(rootca_crt_bundle_start, bundle_len); +#else + uclient.setCACertBundle(rootca_crt_bundle_start); +#endif + uclient.setTimeout(20000); + + httpUpdate.rebootOnUpdate(true); // reboots into the new image on success + t_httpUpdate_return ret = httpUpdate.update(uclient, url); + + // Only reached on failure (success reboots inside update()). + inhibit_sleep = false; + snprintf(reply, 160, "ERR: OTA failed (%d): %s", (int)ret, + httpUpdate.getLastErrorString().c_str()); + return false; +#endif // OTA_MANIFEST_URL && OTA_VARIANT +} +#else +bool ESP32Board::otaFromManifest(const char* current_ver, bool dry_run, char reply[]) { + strcpy(reply, "ERR: not supported"); + return false; +} +#endif // WITH_MQTT_BRIDGE + #endif diff --git a/src/helpers/ESP32Board.h b/src/helpers/ESP32Board.h index a4cbf2a980..93d1bdadf7 100644 --- a/src/helpers/ESP32Board.h +++ b/src/helpers/ESP32Board.h @@ -151,6 +151,7 @@ class ESP32Board : public mesh::MainBoard { } bool startOTAUpdate(const char* id, char reply[]) override; + bool otaFromManifest(const char* current_ver, bool dry_run, char reply[]) override; void setInhibitSleep(bool inhibit) { inhibit_sleep = inhibit; diff --git a/variants/heltec_t190/platformio.ini b/variants/heltec_t190/platformio.ini index 7b8de99f75..71b5655645 100644 --- a/variants/heltec_t190/platformio.ini +++ b/variants/heltec_t190/platformio.ini @@ -112,6 +112,7 @@ build_flags = -D ADMIN_PASSWORD='"password"' -D MAX_NEIGHBOURS=50 -D WITH_MQTT_BRIDGE=1 + -D OTA_MANIFEST_URL='"https://observer.gessaman.com/config.json"' -D MAX_MQTT_BROKERS=3 -D MQTT_MAX_PACKET_SIZE=1024 -D MQTT_DEBUG=1 @@ -204,6 +205,7 @@ build_flags = -D ADMIN_PASSWORD='"password"' -D ROOM_PASSWORD='"hello"' -D WITH_MQTT_BRIDGE=1 + -D OTA_MANIFEST_URL='"https://observer.gessaman.com/config.json"' -D MAX_MQTT_BROKERS=3 -D MQTT_MAX_PACKET_SIZE=1024 -D MQTT_DEBUG=1 diff --git a/variants/heltec_v3/platformio.ini b/variants/heltec_v3/platformio.ini index d6cfaaf37d..a71cb90f39 100644 --- a/variants/heltec_v3/platformio.ini +++ b/variants/heltec_v3/platformio.ini @@ -123,6 +123,7 @@ build_flags = -D ADMIN_PASSWORD='"password"' -D MAX_NEIGHBOURS=50 -D WITH_MQTT_BRIDGE=1 + -D OTA_MANIFEST_URL='"https://observer.gessaman.com/config.json"' -D MAX_MQTT_BROKERS=3 -D MQTT_MAX_PACKET_SIZE=1024 -D MQTT_DEBUG=1 @@ -192,6 +193,7 @@ build_flags = -D ADMIN_PASSWORD='"password"' -D ROOM_PASSWORD='"hello"' -D WITH_MQTT_BRIDGE=1 + -D OTA_MANIFEST_URL='"https://observer.gessaman.com/config.json"' -D MAX_MQTT_BROKERS=3 -D MQTT_MAX_PACKET_SIZE=1024 -D MQTT_DEBUG=1 @@ -480,6 +482,7 @@ build_flags = -D ADMIN_PASSWORD='"password"' -D MAX_NEIGHBOURS=50 -D WITH_MQTT_BRIDGE=1 + -D OTA_MANIFEST_URL='"https://observer.gessaman.com/config.json"' -D MAX_MQTT_BROKERS=3 -D MQTT_MAX_PACKET_SIZE=1024 -D MQTT_DEBUG=1 @@ -528,6 +531,7 @@ build_flags = -D ADMIN_PASSWORD='"password"' -D ROOM_PASSWORD='"hello"' -D WITH_MQTT_BRIDGE=1 + -D OTA_MANIFEST_URL='"https://observer.gessaman.com/config.json"' -D MAX_MQTT_BROKERS=3 -D MQTT_MAX_PACKET_SIZE=1024 -D MQTT_DEBUG=1 diff --git a/variants/heltec_v4/platformio.ini b/variants/heltec_v4/platformio.ini index 7d4d7058fa..157ece5dc2 100644 --- a/variants/heltec_v4/platformio.ini +++ b/variants/heltec_v4/platformio.ini @@ -162,6 +162,7 @@ build_flags = -D ADMIN_PASSWORD='"password"' -D MAX_NEIGHBOURS=50 -D WITH_MQTT_BRIDGE=1 + -D OTA_MANIFEST_URL='"https://observer.gessaman.com/config.json"' -D MAX_MQTT_BROKERS=3 -D MQTT_MAX_PACKET_SIZE=1024 -D MQTT_DEBUG=1 @@ -213,6 +214,7 @@ build_flags = -D ADMIN_PASSWORD='"password"' -D MAX_NEIGHBOURS=50 -D WITH_MQTT_BRIDGE=1 + -D OTA_MANIFEST_URL='"https://observer.gessaman.com/config.json"' -D MAX_MQTT_BROKERS=3 -D MQTT_MAX_PACKET_SIZE=1024 -D MQTT_DEBUG=1 @@ -315,6 +317,7 @@ build_flags = -D ADMIN_PASSWORD='"password"' -D ROOM_PASSWORD='"hello"' -D WITH_MQTT_BRIDGE=1 + -D OTA_MANIFEST_URL='"https://observer.gessaman.com/config.json"' -D MAX_MQTT_BROKERS=3 -D MQTT_MAX_PACKET_SIZE=1024 -D MQTT_DEBUG=1 @@ -354,6 +357,7 @@ build_flags = -D ADMIN_PASSWORD='"password"' -D ROOM_PASSWORD='"hello"' -D WITH_MQTT_BRIDGE=1 + -D OTA_MANIFEST_URL='"https://observer.gessaman.com/config.json"' -D MAX_MQTT_BROKERS=3 -D MQTT_MAX_PACKET_SIZE=1024 -D MQTT_DEBUG=1 diff --git a/variants/lilygo_t3s3/platformio.ini b/variants/lilygo_t3s3/platformio.ini index 08cbb342b3..f862a9f695 100644 --- a/variants/lilygo_t3s3/platformio.ini +++ b/variants/lilygo_t3s3/platformio.ini @@ -116,6 +116,7 @@ build_flags = -D ADMIN_PASSWORD='"password"' -D MAX_NEIGHBOURS=50 -D WITH_MQTT_BRIDGE=1 + -D OTA_MANIFEST_URL='"https://observer.gessaman.com/config.json"' -D MAX_MQTT_BROKERS=3 -D MQTT_MAX_PACKET_SIZE=1024 ; -D MQTT_DEBUG=1 @@ -161,6 +162,7 @@ build_flags = -D ADMIN_PASSWORD='"password"' -D ROOM_PASSWORD='"hello"' -D WITH_MQTT_BRIDGE=1 + -D OTA_MANIFEST_URL='"https://observer.gessaman.com/config.json"' -D MAX_MQTT_BROKERS=3 -D MQTT_MAX_PACKET_SIZE=1024 -D MQTT_DEBUG=1 diff --git a/variants/lilygo_tbeam_1w/platformio.ini b/variants/lilygo_tbeam_1w/platformio.ini index 3c8fc4b833..73723c29fe 100644 --- a/variants/lilygo_tbeam_1w/platformio.ini +++ b/variants/lilygo_tbeam_1w/platformio.ini @@ -209,6 +209,7 @@ build_flags = -D ADMIN_PASSWORD='"password"' -D MAX_NEIGHBOURS=50 -D WITH_MQTT_BRIDGE=1 + -D OTA_MANIFEST_URL='"https://observer.gessaman.com/config.json"' -D MAX_MQTT_BROKERS=3 -D MQTT_MAX_PACKET_SIZE=1024 -D PERSISTANT_GPS=1 @@ -255,6 +256,7 @@ build_flags = -D ADMIN_PASSWORD='"password"' -D ROOM_PASSWORD='"hello"' -D WITH_MQTT_BRIDGE=1 + -D OTA_MANIFEST_URL='"https://observer.gessaman.com/config.json"' -D MAX_MQTT_BROKERS=3 -D MQTT_MAX_PACKET_SIZE=1024 -D PERSISTANT_GPS=1 diff --git a/variants/lilygo_tbeam_SX1262/platformio.ini b/variants/lilygo_tbeam_SX1262/platformio.ini index 7ae5671d26..575178576c 100644 --- a/variants/lilygo_tbeam_SX1262/platformio.ini +++ b/variants/lilygo_tbeam_SX1262/platformio.ini @@ -161,6 +161,7 @@ build_flags = -D ADMIN_PASSWORD='"password"' -D MAX_NEIGHBOURS=50 -D WITH_MQTT_BRIDGE=1 + -D OTA_MANIFEST_URL='"https://observer.gessaman.com/config.json"' -D MAX_MQTT_BROKERS=3 -D MQTT_MAX_PACKET_SIZE=1024 ; -D MQTT_DEBUG=1 @@ -203,6 +204,7 @@ build_flags = -D ADMIN_PASSWORD='"password"' -D ROOM_PASSWORD='"hello"' -D WITH_MQTT_BRIDGE=1 + -D OTA_MANIFEST_URL='"https://observer.gessaman.com/config.json"' -D MAX_MQTT_BROKERS=3 -D MQTT_MAX_PACKET_SIZE=1024 -D MQTT_DEBUG=1 diff --git a/variants/lilygo_tbeam_SX1276/platformio.ini b/variants/lilygo_tbeam_SX1276/platformio.ini index e00f8604f8..aa3383dbf4 100644 --- a/variants/lilygo_tbeam_SX1276/platformio.ini +++ b/variants/lilygo_tbeam_SX1276/platformio.ini @@ -160,6 +160,7 @@ build_flags = -D MAX_NEIGHBOURS=50 -D PERSISTANT_GPS=1 -D WITH_MQTT_BRIDGE=1 + -D OTA_MANIFEST_URL='"https://observer.gessaman.com/config.json"' -D MAX_MQTT_BROKERS=3 -D MQTT_MAX_PACKET_SIZE=1024 ; -D MQTT_DEBUG=1 @@ -202,6 +203,7 @@ build_flags = -D ADMIN_PASSWORD='"password"' -D ROOM_PASSWORD='"hello"' -D WITH_MQTT_BRIDGE=1 + -D OTA_MANIFEST_URL='"https://observer.gessaman.com/config.json"' -D MAX_MQTT_BROKERS=3 -D MQTT_MAX_PACKET_SIZE=1024 -D MQTT_DEBUG=1 diff --git a/variants/lilygo_tbeam_supreme_SX1262/platformio.ini b/variants/lilygo_tbeam_supreme_SX1262/platformio.ini index 6657fa0922..a8a8472141 100644 --- a/variants/lilygo_tbeam_supreme_SX1262/platformio.ini +++ b/variants/lilygo_tbeam_supreme_SX1262/platformio.ini @@ -120,6 +120,7 @@ build_flags = -D ADMIN_PASSWORD='"password"' -D MAX_NEIGHBOURS=50 -D WITH_MQTT_BRIDGE=1 + -D OTA_MANIFEST_URL='"https://observer.gessaman.com/config.json"' -D MAX_MQTT_BROKERS=3 -D MQTT_MAX_PACKET_SIZE=1024 -D MQTT_DEBUG=1 @@ -162,6 +163,7 @@ build_flags = -D ADMIN_PASSWORD='"password"' -D ROOM_PASSWORD='"hello"' -D WITH_MQTT_BRIDGE=1 + -D OTA_MANIFEST_URL='"https://observer.gessaman.com/config.json"' -D MAX_MQTT_BROKERS=3 -D MQTT_MAX_PACKET_SIZE=1024 -D MQTT_DEBUG=1 diff --git a/variants/lilygo_tlora_v2_1/platformio.ini b/variants/lilygo_tlora_v2_1/platformio.ini index 1f8942c829..9a4a83656f 100644 --- a/variants/lilygo_tlora_v2_1/platformio.ini +++ b/variants/lilygo_tlora_v2_1/platformio.ini @@ -157,6 +157,7 @@ build_flags = -D ADMIN_PASSWORD='"password"' -D MAX_NEIGHBOURS=50 -D WITH_MQTT_BRIDGE=1 + -D OTA_MANIFEST_URL='"https://observer.gessaman.com/config.json"' -D MAX_MQTT_BROKERS=3 -D MQTT_MAX_PACKET_SIZE=1024 -D MQTT_TASK_STACK_SIZE=16384 @@ -202,6 +203,7 @@ build_flags = -D ADMIN_PASSWORD='"password"' -D ROOM_PASSWORD='"hello"' -D WITH_MQTT_BRIDGE=1 + -D OTA_MANIFEST_URL='"https://observer.gessaman.com/config.json"' -D MAX_MQTT_BROKERS=3 -D MQTT_MAX_PACKET_SIZE=1024 -D MQTT_TASK_STACK_SIZE=16384 diff --git a/variants/rak3112/platformio.ini b/variants/rak3112/platformio.ini index 29cd30c2ec..f284a3a172 100644 --- a/variants/rak3112/platformio.ini +++ b/variants/rak3112/platformio.ini @@ -113,6 +113,7 @@ build_flags = -D ADMIN_PASSWORD='"password"' -D MAX_NEIGHBOURS=50 -D WITH_MQTT_BRIDGE=1 + -D OTA_MANIFEST_URL='"https://observer.gessaman.com/config.json"' -D MAX_MQTT_BROKERS=3 -D MQTT_MAX_PACKET_SIZE=1024 -D MQTT_DEBUG=1 @@ -176,6 +177,7 @@ build_flags = -D ADMIN_PASSWORD='"password"' -D ROOM_PASSWORD='"hello"' -D WITH_MQTT_BRIDGE=1 + -D OTA_MANIFEST_URL='"https://observer.gessaman.com/config.json"' -D MAX_MQTT_BROKERS=3 -D MQTT_MAX_PACKET_SIZE=1024 -D MQTT_DEBUG=1 diff --git a/variants/station_g2/platformio.ini b/variants/station_g2/platformio.ini index 856645aeb5..aaa74b1505 100644 --- a/variants/station_g2/platformio.ini +++ b/variants/station_g2/platformio.ini @@ -187,6 +187,7 @@ build_flags = -D ADMIN_PASSWORD='"password"' -D ROOM_PASSWORD='"hello"' -D WITH_MQTT_BRIDGE=1 + -D OTA_MANIFEST_URL='"https://observer.gessaman.com/config.json"' -D MAX_MQTT_BROKERS=3 -D MQTT_MAX_PACKET_SIZE=1024 -D MQTT_DEBUG=1 @@ -305,6 +306,7 @@ build_flags = -D ADMIN_PASSWORD='"password"' -D MAX_NEIGHBOURS=50 -D WITH_MQTT_BRIDGE=1 + -D OTA_MANIFEST_URL='"https://observer.gessaman.com/config.json"' -D MAX_MQTT_BROKERS=3 -D MQTT_MAX_PACKET_SIZE=1024 -D MQTT_DEBUG=1 diff --git a/variants/xiao_s3_wio/platformio.ini b/variants/xiao_s3_wio/platformio.ini index 01934f2e5e..873454c015 100644 --- a/variants/xiao_s3_wio/platformio.ini +++ b/variants/xiao_s3_wio/platformio.ini @@ -103,6 +103,7 @@ build_flags = -D ADMIN_PASSWORD='"password"' -D MAX_NEIGHBOURS=50 -D WITH_MQTT_BRIDGE=1 + -D OTA_MANIFEST_URL='"https://observer.gessaman.com/config.json"' -D MAX_MQTT_BROKERS=3 -D MQTT_MAX_PACKET_SIZE=1024 -D MQTT_DEBUG=1 @@ -146,6 +147,7 @@ build_flags = -D ADMIN_PASSWORD='"password"' -D ROOM_PASSWORD='"hello"' -D WITH_MQTT_BRIDGE=1 + -D OTA_MANIFEST_URL='"https://observer.gessaman.com/config.json"' -D MAX_MQTT_BROKERS=3 -D MQTT_MAX_PACKET_SIZE=1024 -D MQTT_DEBUG=1 From b679c0d7122ee0c97758f4a4a1592dc3fff3627d Mon Sep 17 00:00:00 2001 From: agessaman Date: Mon, 22 Jun 2026 19:32:23 -0700 Subject: [PATCH 2/6] fix(ota): handle HTTP response and empty manifest in otaFromManifest Updated the otaFromManifest method to enforce HTTP/1.0 for better compatibility with CDNs and to handle empty manifest responses. This ensures that the JSON parser receives a complete body, preventing errors during firmware update checks. --- src/helpers/ESP32Board.cpp | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/src/helpers/ESP32Board.cpp b/src/helpers/ESP32Board.cpp index 3b4f953b29..45f60f833b 100644 --- a/src/helpers/ESP32Board.cpp +++ b/src/helpers/ESP32Board.cpp @@ -117,12 +117,24 @@ bool ESP32Board::otaFromManifest(const char* current_ver, bool dry_run, char rep strcpy(reply, "ERR: manifest connect failed"); return false; } + // Force HTTP/1.0: a CDN (e.g. Cloudflare) answers HTTP/1.1 with + // Transfer-Encoding: chunked and no Content-Length, and the raw chunked + // stream can't be fed to the JSON parser (chunk-size frames corrupt it). + // HTTP/1.0 yields a Connection: close, unframed body that getString() + // assembles in full before we parse it. + http.useHTTP10(true); int code = http.GET(); if (code != HTTP_CODE_OK) { snprintf(reply, 160, "ERR: manifest HTTP %d", code); http.end(); return false; } + String body = http.getString(); + http.end(); + if (body.length() == 0) { + strcpy(reply, "ERR: empty manifest"); + return false; + } // Filter keeps only staticPath + each firmware entry's notice/version, so the // parsed document stays a fraction of the full manifest. (The dynamic version @@ -135,8 +147,8 @@ bool ESP32Board::otaFromManifest(const char* current_ver, bool dry_run, char rep JsonDocument doc; DeserializationError err = - deserializeJson(doc, http.getStream(), DeserializationOption::Filter(filter)); - http.end(); + deserializeJson(doc, body, DeserializationOption::Filter(filter)); + body = String(); // free the raw manifest before we walk the parsed doc if (err) { snprintf(reply, 160, "ERR: manifest parse (%s)", err.c_str()); return false; From e84c26657600323e8b124511126ddb660f497169 Mon Sep 17 00:00:00 2001 From: agessaman Date: Mon, 22 Jun 2026 19:36:05 -0700 Subject: [PATCH 3/6] feat(ota): add ota check and ota update commands for pull-based updates --- src/helpers/CommonCLI.cpp | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/src/helpers/CommonCLI.cpp b/src/helpers/CommonCLI.cpp index 6034507f9b..c62e46b6b4 100644 --- a/src/helpers/CommonCLI.cpp +++ b/src/helpers/CommonCLI.cpp @@ -792,13 +792,15 @@ void CommonCLI::handleCommand(uint32_t sender_timestamp, char* command, char* re #else strcpy(reply, "ERR: unsupported on this platform"); #endif - } else if (memcmp(command, "start ota", 9) == 0) { + } else if (memcmp(command, "ota check", 9) == 0 || memcmp(command, "ota update", 10) == 0) { + // Observer pull-OTA: fetch this variant's build from the baked-in manifest + // and flash it. Intentionally a separate command from "start ota" (the + // manual ElegantOTA web-upload SoftAP) so a remote/online update is never + // triggered by someone expecting to hand-upload a binary. + // ota check -> report available build, do not flash + // ota update -> download and flash, then reboot #if defined(WITH_MQTT_BRIDGE) && defined(OTA_MANIFEST_URL) - // Observer pull-OTA: fetch this variant's build from the baked-in manifest. - // "start ota check" reports the available build without flashing. - const char* arg = command + 9; - while (*arg == ' ') arg++; - bool dry = (memcmp(arg, "check", 5) == 0); + bool dry = (memcmp(command, "ota check", 9) == 0); if (WiFi.status() != WL_CONNECTED) { strcpy(reply, "ERR: WiFi not connected"); } else { @@ -812,10 +814,13 @@ void CommonCLI::handleCommand(uint32_t sender_timestamp, char* command, char* re if (!ok && !dry) _callbacks->setBridgeState(true); } #else + strcpy(reply, "ERR: online OTA not supported on this build"); +#endif + } else if (memcmp(command, "start ota", 9) == 0) { + // Manual OTA: bring up the ElegantOTA SoftAP for a hand-uploaded binary. if (!_board->startOTAUpdate(_prefs->node_name, reply)) { strcpy(reply, "Error"); } -#endif } else if (memcmp(command, "clock", 5) == 0) { uint32_t now = getRTCClock()->getCurrentTime(); DateTime dt = DateTime(now); From 3719ebb980b364ea355ffa4c85b95af022127cae Mon Sep 17 00:00:00 2001 From: agessaman Date: Mon, 22 Jun 2026 19:53:40 -0700 Subject: [PATCH 4/6] fix(ota): improve streaming JSON parsing in otaFromManifest Updated the otaFromManifest method to stream-parse the firmware manifest directly from the network, reducing peak RAM usage during OTA checks. This change enhances compatibility with slow TLS links by implementing a per-read timeout, ensuring a more efficient and reliable update process. --- src/helpers/ESP32Board.cpp | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/src/helpers/ESP32Board.cpp b/src/helpers/ESP32Board.cpp index 45f60f833b..5eaa06c7ae 100644 --- a/src/helpers/ESP32Board.cpp +++ b/src/helpers/ESP32Board.cpp @@ -120,26 +120,26 @@ bool ESP32Board::otaFromManifest(const char* current_ver, bool dry_run, char rep // Force HTTP/1.0: a CDN (e.g. Cloudflare) answers HTTP/1.1 with // Transfer-Encoding: chunked and no Content-Length, and the raw chunked // stream can't be fed to the JSON parser (chunk-size frames corrupt it). - // HTTP/1.0 yields a Connection: close, unframed body that getString() - // assembles in full before we parse it. + // HTTP/1.0 yields a Connection: close, unframed body we can stream-parse. http.useHTTP10(true); + http.setTimeout(20000); // per-read timeout while streaming the body int code = http.GET(); if (code != HTTP_CODE_OK) { snprintf(reply, 160, "ERR: manifest HTTP %d", code); http.end(); return false; } - String body = http.getString(); - http.end(); - if (body.length() == 0) { - strcpy(reply, "ERR: empty manifest"); - return false; - } - // Filter keeps only staticPath + each firmware entry's notice/version, so the - // parsed document stays a fraction of the full manifest. (The dynamic version - // key forces keeping its whole subtree, incl. release notes.) ArduinoJson v7 - // JsonDocument allocates elastically, so this only grows to the kept subset. + // Stream-parse straight from the network: the filter discards all but + // staticPath + each firmware entry's notice/version, so peak RAM is just the + // small kept subset (not the ~40 KB manifest). This matters for `ota check`, + // which runs with the MQTT bridge still up and holding heap. (The dynamic + // version key forces keeping its whole subtree, incl. release notes.) + // readBytes() honours the stream timeout, so a slow TLS link won't be + // mistaken for end-of-input. + WiFiClient* stream = http.getStreamPtr(); + stream->setTimeout(20000); + JsonDocument filter; filter["staticPath"] = true; filter["device"][0]["firmware"][0]["notice"] = true; @@ -147,8 +147,8 @@ bool ESP32Board::otaFromManifest(const char* current_ver, bool dry_run, char rep JsonDocument doc; DeserializationError err = - deserializeJson(doc, body, DeserializationOption::Filter(filter)); - body = String(); // free the raw manifest before we walk the parsed doc + deserializeJson(doc, *stream, DeserializationOption::Filter(filter)); + http.end(); if (err) { snprintf(reply, 160, "ERR: manifest parse (%s)", err.c_str()); return false; From dec668386a5a7fdc993f9a9414852af85db1ec14 Mon Sep 17 00:00:00 2001 From: agessaman Date: Mon, 22 Jun 2026 21:28:18 -0700 Subject: [PATCH 5/6] fix(ota): improve OTA update handling for connected WiFi networks Updated the startOTAUpdate method to serve the ElegantOTA on the station IP when connected to a WiFi network, enhancing accessibility for OTA updates. If not connected, it defaults to the MeshCore-OTA SoftAP. This change improves the user experience by allowing easier access to OTA updates without needing to switch networks. --- src/helpers/ESP32Board.cpp | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/src/helpers/ESP32Board.cpp b/src/helpers/ESP32Board.cpp index 5eaa06c7ae..2306db152c 100644 --- a/src/helpers/ESP32Board.cpp +++ b/src/helpers/ESP32Board.cpp @@ -12,9 +12,19 @@ bool ESP32Board::startOTAUpdate(const char* id, char reply[]) { inhibit_sleep = true; // prevent sleep during OTA - WiFi.softAP("MeshCore-OTA", NULL); - sprintf(reply, "Started: http://%s/update", WiFi.softAPIP().toString().c_str()); + // If the device is already on a WiFi network (e.g. an observer joined in STA + // mode), serve ElegantOTA on the station IP so it's reachable from the LAN + // without joining a separate AP. Otherwise raise the MeshCore-OTA SoftAP. + IPAddress ip; + if (WiFi.status() == WL_CONNECTED) { + ip = WiFi.localIP(); + } else { + WiFi.softAP("MeshCore-OTA", NULL); + ip = WiFi.softAPIP(); + } + + sprintf(reply, "Started: http://%s/update", ip.toString().c_str()); MESH_DEBUG_PRINTLN("startOTAUpdate: %s", reply); static char id_buf[60]; From c1885fcc154fb3c524c2f89b4fd68b269f186c28 Mon Sep 17 00:00:00 2001 From: agessaman Date: Mon, 22 Jun 2026 22:33:57 -0700 Subject: [PATCH 6/6] feat(ota): implement deferred OTA update scheduling and handling Added support for deferred OTA updates in the MyMesh class, allowing the system to schedule firmware updates to occur after a confirmation reply is sent. This change improves the user experience by ensuring that the update process does not block the main application loop, allowing for smoother operation during firmware updates. --- examples/simple_repeater/MyMesh.cpp | 16 ++++++++ examples/simple_repeater/MyMesh.h | 10 +++++ src/helpers/CommonCLI.cpp | 26 ++++++++----- src/helpers/CommonCLI.h | 8 ++++ src/helpers/ESP32Board.cpp | 39 ++++++++++++++++++++ src/helpers/ESP32Board.h | 4 ++ src/helpers/bridges/MQTTBridge.cpp | 57 +++++++++++++++++++---------- src/helpers/bridges/MQTTBridge.h | 5 +++ 8 files changed, 136 insertions(+), 29 deletions(-) diff --git a/examples/simple_repeater/MyMesh.cpp b/examples/simple_repeater/MyMesh.cpp index cc45c2c4f8..72363588d7 100644 --- a/examples/simple_repeater/MyMesh.cpp +++ b/examples/simple_repeater/MyMesh.cpp @@ -1445,6 +1445,22 @@ void MyMesh::loop() { MESH_DEBUG_PRINTLN("Radio params restored"); } +#if defined(WITH_MQTT_BRIDGE) && defined(OTA_MANIFEST_URL) + if (_ota_update_at && millisHasNowPassed(_ota_update_at)) { // deferred `ota update` + _ota_update_at = 0; // clear timer + // The "Beginning update..." reply has now gone out. Free the bridge for heap + // headroom, then flash: otaFromManifest reboots into the new image on success + // (so this never returns); on any abort (already up to date, partition change, + // download error) it returns and we resume the bridge. + setBridgeState(false); + char ota_reply[160]; + if (!_cli.getBoard()->otaFromManifest(getFirmwareVer(), false, ota_reply)) { + MESH_DEBUG_PRINTLN("ota update aborted: %s", ota_reply); + setBridgeState(true); + } + } +#endif + // is pending dirty contacts write needed? if (dirty_contacts_expiry && millisHasNowPassed(dirty_contacts_expiry)) { acl.save(_fs); diff --git a/examples/simple_repeater/MyMesh.h b/examples/simple_repeater/MyMesh.h index 5ea4fcc3cf..09ee140e7b 100644 --- a/examples/simple_repeater/MyMesh.h +++ b/examples/simple_repeater/MyMesh.h @@ -116,6 +116,7 @@ class MyMesh : public mesh::Mesh, public CommonCLICallbacks { #endif CayenneLPP telemetry; unsigned long set_radio_at, revert_radio_at; + unsigned long _ota_update_at = 0; // deferred `ota update` fire time (0 = none scheduled) float pending_freq; float pending_bw; uint8_t pending_sf; @@ -306,6 +307,15 @@ class MyMesh : public mesh::Mesh, public CommonCLICallbacks { bridge->setSlotPreset(slot, _prefs.mqtt_slot_preset[slot]); } + // Schedule the pull-OTA flash to run from loop() in ~2.5 s, leaving time for the + // "Beginning update..." CLI reply (CLI_REPLY_DELAY_MILLIS = 600 ms) to transmit + // before the flash blocks the loop and reboots. + bool beginDeferredOtaUpdate() override { + _ota_update_at = millis() + 2500; + if (_ota_update_at == 0) _ota_update_at = 1; // 0 means "none" + return true; + } + int getQueueSize() override { return bridge ? bridge->getQueueSize() : 0; } diff --git a/src/helpers/CommonCLI.cpp b/src/helpers/CommonCLI.cpp index c62e46b6b4..e44269c039 100644 --- a/src/helpers/CommonCLI.cpp +++ b/src/helpers/CommonCLI.cpp @@ -800,18 +800,26 @@ void CommonCLI::handleCommand(uint32_t sender_timestamp, char* command, char* re // ota check -> report available build, do not flash // ota update -> download and flash, then reboot #if defined(WITH_MQTT_BRIDGE) && defined(OTA_MANIFEST_URL) - bool dry = (memcmp(command, "ota check", 9) == 0); if (WiFi.status() != WL_CONNECTED) { strcpy(reply, "ERR: WiFi not connected"); + } else if (memcmp(command, "ota check", 9) == 0) { + // Check is synchronous so its result lands in this reply. Free the MQTT + // bridge first: on a no-PSRAM board only ~70 KB heap is free with the + // bridge up, and a third TLS connection (the manifest fetch) alongside the + // two live MQTT sessions drives free heap to a few hundred bytes, which + // truncates the read. The WiFi STA link survives end(); restore after. + _callbacks->setBridgeState(false); + _board->otaFromManifest(_callbacks->getFirmwareVer(), true, reply); + _callbacks->setBridgeState(true); } else { - // Free the MQTT bridge (TLS contexts + task) so the manifest parse and - // OTA download have heap headroom; the WiFi STA link survives end(). - if (!dry) _callbacks->setBridgeState(false); - bool ok = _board->otaFromManifest(_callbacks->getFirmwareVer(), dry, reply); - // On success the board reboots and never returns here; on any abort - // (already up to date, partition change, download error) bring the - // bridge back up so the node resumes uplinking. - if (!ok && !dry) _callbacks->setBridgeState(true); + // Update is DEFERRED: the flash blocks the loop and then reboots, so it + // must run only AFTER this reply has gone out over the mesh — otherwise + // the requester never gets a confirmation. The app loop runs it shortly. + if (_callbacks->beginDeferredOtaUpdate()) { + strcpy(reply, "Beginning update... (node will reboot if successful)"); + } else { + strcpy(reply, "ERR: online OTA not available"); + } } #else strcpy(reply, "ERR: online OTA not supported on this build"); diff --git a/src/helpers/CommonCLI.h b/src/helpers/CommonCLI.h index 9a6502d14d..a158324c7a 100644 --- a/src/helpers/CommonCLI.h +++ b/src/helpers/CommonCLI.h @@ -288,6 +288,14 @@ class CommonCLICallbacks { restartBridge(); }; + // Schedule a pull-OTA firmware update to run shortly (from the app loop), after + // the "Beginning update..." CLI reply has been transmitted. Deferred because the + // flash blocks the loop and then reboots, so it can't run inline with the reply. + // Returns true if scheduled. Default: not supported. + virtual bool beginDeferredOtaUpdate() { + return false; + }; + virtual int getQueueSize() { return 0; // no op by default }; diff --git a/src/helpers/ESP32Board.cpp b/src/helpers/ESP32Board.cpp index 2306db152c..139b5f7837 100644 --- a/src/helpers/ESP32Board.cpp +++ b/src/helpers/ESP32Board.cpp @@ -71,6 +71,8 @@ bool ESP32Board::startOTAUpdate(const char* id, char reply[]) { #include #include #include +#include +#include // Embedded CA bundle (produced by board_build.embed_files). Weak so non-bundle // builds still link; we check for presence at runtime. @@ -93,7 +95,44 @@ static void ota_extractHash(const char* s, char* out, size_t out_sz) { out[n] = 0; } +// Parameters handed to the worker task; lives on otaFromManifest()'s stack, +// which stays valid because that function blocks until the worker signals done. +struct OtaTaskArgs { + ESP32Board* self; + const char* current_ver; + bool dry_run; + char* reply; + volatile bool result; + volatile bool done; +}; + +static void ota_task_entry(void* param) { + OtaTaskArgs* a = static_cast(param); + a->result = a->self->otaFromManifestImpl(a->current_ver, a->dry_run, a->reply); + a->done = true; // on a successful `ota update` we reboot before reaching here + vTaskDelete(nullptr); +} + bool ESP32Board::otaFromManifest(const char* current_ver, bool dry_run, char reply[]) { + // The TLS handshake (cert-bundle verify) + JSON parse / HTTPUpdate use far more + // stack than the ~8 KB loop task offers — especially when reached via the deep + // mesh-receive call chain (it overflows the loopTask canary). Run the work in a + // dedicated 24 KB-stack task and block here until it finishes. The big stack is + // freed when the task exits; on a successful update the chip reboots inside it. + OtaTaskArgs args = { this, current_ver, dry_run, reply, false, false }; + TaskHandle_t handle = nullptr; + BaseType_t ok = xTaskCreatePinnedToCore(ota_task_entry, "ota", 24576, &args, 5, &handle, 1); + if (ok != pdPASS) { + strcpy(reply, "ERR: OTA task spawn failed"); + return false; + } + while (!args.done) { + delay(50); // Arduino delay() yields to other tasks + } + return args.result; +} + +bool ESP32Board::otaFromManifestImpl(const char* current_ver, bool dry_run, char reply[]) { #if !defined(OTA_MANIFEST_URL) || !defined(OTA_VARIANT) strcpy(reply, "ERR: OTA not configured (build via build.sh)"); return false; diff --git a/src/helpers/ESP32Board.h b/src/helpers/ESP32Board.h index 93d1bdadf7..eae9ab7844 100644 --- a/src/helpers/ESP32Board.h +++ b/src/helpers/ESP32Board.h @@ -152,6 +152,10 @@ class ESP32Board : public mesh::MainBoard { bool startOTAUpdate(const char* id, char reply[]) override; bool otaFromManifest(const char* current_ver, bool dry_run, char reply[]) override; + // Heavy body (TLS + JSON / HTTPUpdate). Runs in a dedicated large-stack task + // spawned by otaFromManifest() — public only so that task entry point can call + // it; not meant to be invoked directly. + bool otaFromManifestImpl(const char* current_ver, bool dry_run, char reply[]); void setInhibitSleep(bool inhibit) { inhibit_sleep = inhibit; diff --git a/src/helpers/bridges/MQTTBridge.cpp b/src/helpers/bridges/MQTTBridge.cpp index 677e814bd2..0c1ff86b9a 100644 --- a/src/helpers/bridges/MQTTBridge.cpp +++ b/src/helpers/bridges/MQTTBridge.cpp @@ -754,27 +754,44 @@ void MQTTBridge::initializeWiFiInTask() { WiFi.setAutoReconnect(true); WiFi.setAutoConnect(true); - // Set up WiFi event handlers for better diagnostics and immediate disconnection detection - WiFi.onEvent([this](WiFiEvent_t event, WiFiEventInfo_t info) { - switch(event) { - case ARDUINO_EVENT_WIFI_STA_GOT_IP: - MQTT_DEBUG_PRINTLN("WiFi connected: %s", IPAddress(info.got_ip.ip_info.ip.addr).toString().c_str()); - // Set flag to trigger NTP sync from loop() instead of doing it here - if (!_ntp_synced && !_ntp_sync_pending) { - _ntp_sync_pending = true; - } - break; - case ARDUINO_EVENT_WIFI_STA_DISCONNECTED: - s_wifi_disconnect_reason = info.wifi_sta_disconnected.reason; - s_wifi_disconnect_time = millis(); - MQTT_DEBUG_PRINTLN("WiFi disconnected: reason %d", s_wifi_disconnect_reason); - break; - default: - break; - } - }); + // Set up WiFi event handlers for better diagnostics and immediate disconnection + // detection. Register ONCE — the bridge is reused across restarts (e.g. stopped + // for `ota check`/`ota update`, or `set mqtt…` reconfigure) and WiFi.onEvent() + // never removes prior callbacks, so re-registering leaks handlers and duplicates + // every log line. + if (!_wifi_event_registered) { + WiFi.onEvent([this](WiFiEvent_t event, WiFiEventInfo_t info) { + switch(event) { + case ARDUINO_EVENT_WIFI_STA_GOT_IP: + MQTT_DEBUG_PRINTLN("WiFi connected: %s", IPAddress(info.got_ip.ip_info.ip.addr).toString().c_str()); + // Set flag to trigger NTP sync from loop() instead of doing it here + if (!_ntp_synced && !_ntp_sync_pending) { + _ntp_sync_pending = true; + } + break; + case ARDUINO_EVENT_WIFI_STA_DISCONNECTED: + s_wifi_disconnect_reason = info.wifi_sta_disconnected.reason; + s_wifi_disconnect_time = millis(); + MQTT_DEBUG_PRINTLN("WiFi disconnected: reason %d", s_wifi_disconnect_reason); + break; + default: + break; + } + }); + _wifi_event_registered = true; + } - WiFi.begin(_prefs->wifi_ssid, _prefs->wifi_password); + // Only (re)start the WiFi association if it isn't already up. end() leaves the + // STA link connected, so on a restart (e.g. after `ota check`) calling + // WiFi.begin() again forces a needless disconnect/reconnect — which also races + // the MQTT task's first DNS lookup (getaddrinfo fails until WiFi/DNS recovers). + // When already connected, the deferred slot setup still fires in mqttTaskLoop() + // because _ntp_synced persists across end() (only _slots_setup_done is reset). + if (WiFi.status() != WL_CONNECTED) { + WiFi.begin(_prefs->wifi_ssid, _prefs->wifi_password); + } else if (!_ntp_synced && !_ntp_sync_pending) { + _ntp_sync_pending = true; // already connected but never synced — kick NTP now + } // NOTE: Slot setup is deferred until after NTP sync in mqttTaskLoop(). // JWT-auth slots need valid timestamps for token creation, and connecting diff --git a/src/helpers/bridges/MQTTBridge.h b/src/helpers/bridges/MQTTBridge.h index 9eaf66de0a..e19e0544ac 100644 --- a/src/helpers/bridges/MQTTBridge.h +++ b/src/helpers/bridges/MQTTBridge.h @@ -180,6 +180,11 @@ class MQTTBridge : public BridgeBase { bool _ntp_synced; bool _ntp_sync_pending; // Flag to trigger NTP sync from loop() instead of event handler bool _slots_setup_done; // Deferred: slots set up after NTP sync + // WiFi.onEvent() handler registered once and never removed by end(); the bridge + // object is reused across restarts, so re-registering would leak handlers and + // duplicate every connect/disconnect log line. Inline-initialised so it survives + // construction and is NOT reset by end(). + bool _wifi_event_registered = false; int _max_active_slots; // Runtime limit: 5 with PSRAM, 2 without // Pending slot reconfigure: set from CLI (Core 1), processed by MQTT task (Core 0)