diff --git a/.github/workflows/run-demo.yml b/.github/workflows/run-demo.yml index cbb4c24..5675c21 100644 --- a/.github/workflows/run-demo.yml +++ b/.github/workflows/run-demo.yml @@ -35,6 +35,9 @@ jobs: with: workflow: ci.yml workflow_conclusion: success + # Use the build from the same branch this workflow runs on, so a + # manual dispatch on a feature branch tests that branch's binary. + branch: ${{ github.ref_name }} name: empower-native-${{ matrix.name }} path: artifact @@ -51,4 +54,6 @@ jobs: echo "::error::autopilot exited 0 - expected a crash" exit 1 fi - echo "autopilot crashed as expected (exit $code) - events ingested" + # The crash uploads synchronously before exit (crash_upload_sync), so + # no relaunch/flush is needed for this one-shot run. + echo "autopilot crashed as expected (exit $code) - crash uploaded" diff --git a/src/app/main_gui.cpp b/src/app/main_gui.cpp index 959f189..a9c6a63 100644 --- a/src/app/main_gui.cpp +++ b/src/app/main_gui.cpp @@ -131,6 +131,7 @@ int main(int argc, char** argv) { cfg.environment = env_or("SENTRY_ENVIRONMENT", "production"); cfg.component = "fleet"; cfg.debug = env_or("EMPOWER_DEBUG", "")[0] != '\0'; + cfg.use_external_crash_reporter = true; // interactive desktop app bool sentry_ok = empower::SentryManager::init(cfg); // Real GPU context from the live OpenGL renderer, so even non-GPU crashes diff --git a/src/core/backend_client.cpp b/src/core/backend_client.cpp index a49013b..63c0294 100644 --- a/src/core/backend_client.cpp +++ b/src/core/backend_client.cpp @@ -49,7 +49,10 @@ bool perform_post(const std::string& url, const char* body, curl_easy_setopt(curl, CURLOPT_HTTPHEADER, hdrs); curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, write_cb); curl_easy_setopt(curl, CURLOPT_WRITEDATA, &result.body); - curl_easy_setopt(curl, CURLOPT_TIMEOUT, 15L); + // Keep the call well under the app-hang threshold so a slow backend can't + // be mistaken for a UI hang (this runs on the main thread). + curl_easy_setopt(curl, CURLOPT_CONNECTTIMEOUT, 3L); + curl_easy_setopt(curl, CURLOPT_TIMEOUT, 3L); curl_easy_setopt(curl, CURLOPT_USERAGENT, "empower-fleet/1.0"); if (curl_easy_perform(curl) == CURLE_OK) { curl_easy_getinfo(curl, CURLINFO_RESPONSE_CODE, &result.status); @@ -82,6 +85,8 @@ bool perform_post(const std::string& url, const char* body, HINTERNET session = WinHttpOpen(L"empower-fleet/1.0", WINHTTP_ACCESS_TYPE_AUTOMATIC_PROXY, WINHTTP_NO_PROXY_NAME, WINHTTP_NO_PROXY_BYPASS, 0); if (!session) return true; + // Bound the call below the app-hang threshold (runs on the main thread). + WinHttpSetTimeouts(session, 3000, 3000, 3000, 3000); HINTERNET conn = WinHttpConnect(session, host, uc.nPort, 0); HINTERNET req = conn ? WinHttpOpenRequest(conn, L"POST", path, nullptr, WINHTTP_NO_REFERER, diff --git a/src/core/sentry_manager.cpp b/src/core/sentry_manager.cpp index edf3f33..82ced04 100644 --- a/src/core/sentry_manager.cpp +++ b/src/core/sentry_manager.cpp @@ -178,13 +178,23 @@ bool SentryManager::init(const SentryConfig& config) { sentry_options_set_handler_path(options, handler.c_str()); // --- New out-of-process "native" crash backend ------------------------- - // Selected at build time via -DSENTRY_BACKEND=native. These knobs are - // specific to that backend: capture a client-side stackwalk AND a smart - // minidump, and let the daemon finish the upload after the app exits. + // Selected at build time via -DSENTRY_BACKEND=native: a client-side native + // stackwalk plus a smart minidump, with the daemon finishing the upload. sentry_options_set_crash_reporting_mode( options, SENTRY_CRASH_REPORTING_MODE_NATIVE_WITH_MINIDUMP); sentry_options_set_minidump_mode(options, SENTRY_MINIDUMP_MODE_SMART); - sentry_options_set_crash_upload_mode(options, SENTRY_CRASH_UPLOAD_MODE_ASYNC); + sentry_options_set_crash_upload_mode(options, + config.crash_upload_sync ? SENTRY_CRASH_UPLOAD_MODE_SYNC + : SENTRY_CRASH_UPLOAD_MODE_ASYNC); + if (config.crash_upload_sync) { + // SYNC keeps the crashed process alive until the daemon is done, but the + // daemon only gets `shutdown_timeout` to flush. The default (2s) is too + // short for our ~1MB crash envelope (minidump + screenshot), so it would + // be dumped to disk for "next restart" - which never happens in a + // one-shot CI run. Give it enough time to finish the upload in-process + // (kept under the ~10s crash-handler wait cap). + sentry_options_set_shutdown_timeout(options, 8000); + } // --- Performance, logs, metrics, sessions ------------------------------ sentry_options_set_traces_sample_rate(options, config.traces_sample_rate); @@ -204,11 +214,17 @@ bool SentryManager::init(const SentryConfig& config) { #endif // --- External crash reporter (official sentry-desktop-crash-reporter) -- - std::string reporter = !config.crash_reporter_path.empty() - ? config.crash_reporter_path - : find_crash_reporter(); - if (!reporter.empty()) { - sentry_options_set_external_crash_reporter_path(options, reporter.c_str()); + // Only for the interactive GUI: when set, the SDK hands the crash to this + // separate app to submit (with a user-feedback dialog). A headless/CI binary + // can't launch that GUI app, so it must submit crashes itself - otherwise + // the crash is written out for the reporter and never sent. + if (config.use_external_crash_reporter) { + std::string reporter = !config.crash_reporter_path.empty() + ? config.crash_reporter_path + : find_crash_reporter(); + if (!reporter.empty()) { + sentry_options_set_external_crash_reporter_path(options, reporter.c_str()); + } } sentry_options_set_before_send(options, before_send, nullptr); diff --git a/src/core/sentry_manager.h b/src/core/sentry_manager.h index 3db8298..a5f2421 100644 --- a/src/core/sentry_manager.h +++ b/src/core/sentry_manager.h @@ -32,6 +32,12 @@ struct SentryConfig { double traces_sample_rate = 1.0; // App-hang/ANR threshold in milliseconds (kept short for a snappy demo). int app_hang_timeout_ms = 2000; + // Block the crashing process until the daemon finishes uploading the crash. + // Needed for one-shot runs (CI) where nothing relaunches to flush it. + bool crash_upload_sync = false; + // Hand crashes to the external crash reporter UI (interactive desktop app). + // Headless/CI must leave this false so the SDK submits crashes itself. + bool use_external_crash_reporter = false; }; class SentryManager { diff --git a/src/headless/main.cpp b/src/headless/main.cpp index a8017fa..5da10c8 100644 --- a/src/headless/main.cpp +++ b/src/headless/main.cpp @@ -36,6 +36,18 @@ void sleep_ms(int ms) { std::this_thread::sleep_for(std::chrono::milliseconds(ms)); } +// Waits while keeping the app-hang watchdog fed, so the autopilot's own pacing +// is never mistaken for a hang (only the deliberate app-hang scenario blocks +// without a heartbeat). +void idle(int ms) { + const int step = 150; + for (int elapsed = 0; elapsed < ms; elapsed += step) { + empower::SentryManager::app_hang_heartbeat(); + sleep_ms(ms - elapsed < step ? ms - elapsed : step); + } + empower::SentryManager::app_hang_heartbeat(); +} + // One simulated pipeline run as a performance transaction with child spans, // plus a metric and a structured log - the steady-state demo data. void run_pipeline(const char* name, const char* op) { @@ -85,6 +97,10 @@ int main(int argc, char** argv) { empower::SentryConfig cfg; cfg.environment = env_or("SENTRY_ENVIRONMENT", "ci"); cfg.component = "headless"; + cfg.crash_upload_sync = true; // one-shot run: upload the crash before exit + // Headroom over normal pacing (the backend call is bounded to 3s) so only + // the deliberate 8s app-hang scenario trips the watchdog, not the autopilot. + cfg.app_hang_timeout_ms = 6000; cfg.debug = env_or("EMPOWER_DEBUG", "")[0] != '\0'; if (!empower::SentryManager::init(cfg)) { std::fprintf(stderr, "headless: sentry init failed (continuing)\n"); @@ -139,10 +155,17 @@ int main(int argc, char** argv) { bool fired_hang = false; int iter = 0; while (std::chrono::steady_clock::now() < end) { + empower::SentryManager::app_hang_heartbeat(); run_pipeline("sensor.pipeline", "device.ingest"); empower::SentryManager::app_hang_heartbeat(); - if (iter % 3 == 0) empower::checkout("", &console); // distributed trace (no error event) - if (iter % 4 == 0) run_pipeline("image.processing", "image.classify"); + if (iter % 3 == 0) { + empower::checkout("", &console); // distributed trace (no error event) + empower::SentryManager::app_hang_heartbeat(); + } + if (iter % 4 == 0) { + run_pipeline("image.processing", "image.classify"); + empower::SentryManager::app_hang_heartbeat(); + } sentry_value_t online_attrs = sentry_value_new_object(); sentry_value_set_by_key(online_attrs, "fleet_size", sentry_value_new_attribute( @@ -166,7 +189,7 @@ int main(int argc, char** argv) { fired_hang = true; } ++iter; - sleep_ms(1500); + idle(1500); } if (final_crash) { // Event 3: the deterministic headline crash, so every CI run yields