From 8e67506af814a332a833c5af45d7f7a4c38a2073 Mon Sep 17 00:00:00 2001 From: Jackie Li Date: Sat, 13 Jun 2026 12:52:42 +0100 Subject: [PATCH 1/6] grabber: release seize across sleep (root-cause fix) + observability MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Root cause of the "keyboard dead on wake, same registry id" recurrence: we held the IOHIDManager seize across sleep. When the keyboard powers down mid-sleep while seized, the seize goes stale — on wake the manager still reports the device (same id, matched_count=1) but the event pipe is dead, and re-seizing in place can't revive it (only a full device re-enumeration does, which is why a process restart worked). Fix (Karabiner's pattern — devices are "ungrabbable while sleeping"): - New PowerNotify hook: on kIOMessageSystemWillSleep release the seize (keyboard falls back to the normal HID path), mark `sleeping`, then ack the sleep after a 1s delay so IOHIDManagerClose propagates before the device powers down. On kIOMessageSystemHasPoweredOn clear `sleeping` and re-acquire a fresh seize on the now-healthy device. - applyLatestRules and onDeviceChange respect `sleeping` (keep the seize torn down until wake), so nothing re-seizes mid-sleep. - This is the inverse of the old PowerNotify (which re-seized on wake, holding across sleep) — the trigger was right, the action was wrong. Observability (the gap that forced inference last time): - Log each seized device's IORegistry entry id, so a recurrence shows which device the seize holds vs the live keyboard (ioreg). - Prefix every grabber log line with a local [HH:MM:SS] so events correlate directly with `pmset -g log` sleep/wake times. Known minor noise (newly visible): DeviceNotify also matches our own vhidd VirtualHIDKeyboard, causing one spurious idempotent re-seize when it enumerates. Not a loop; left as a follow-up. --- src/grabber/HidSeize.zig | 20 ++++ src/grabber/PowerNotify.zig | 203 ++++++++++++++++++++++++++++++++++++ src/grabber/c.zig | 55 ++++++++++ src/grabber/main.zig | 82 +++++++++++++++ 4 files changed, 360 insertions(+) create mode 100644 src/grabber/PowerNotify.zig diff --git a/src/grabber/HidSeize.zig b/src/grabber/HidSeize.zig index 5af0b31..19d7652 100644 --- a/src/grabber/HidSeize.zig +++ b/src/grabber/HidSeize.zig @@ -295,6 +295,10 @@ pub fn start(self: *Self, mode: Mode) !void { if (count == 0) { log.warn("matching dictionary captured 0 devices — vendor/product mismatch?", .{}); } + // Observability: log the registry entry id of each seized device so a + // recurrence shows exactly which device the seize holds (vs the live + // keyboard's id from `ioreg`) — the gap that forced us to infer it. + if (mode == .seize) self.logSeizedEntryIds(matched, count); // No post-match filtering needed: the matching dicts are exact. The // (0,0) built-in matches only Transport ∈ {FIFO,SPI} keyboards (the @@ -310,6 +314,22 @@ pub fn start(self: *Self, mode: Mode) !void { } } +/// Log the IORegistry entry id of each device in the seized set. +/// Best-effort — bails on any allocation/IOKit hiccup. +fn logSeizedEntryIds(self: *Self, set: ?*anyopaque, count: usize) void { + if (set == null or count == 0) return; + const refs = self.allocator.alloc(?*const anyopaque, count) catch return; + defer self.allocator.free(refs); + c.CFSetGetValues(set, refs.ptr); + for (refs) |ref| { + const dev: c.IOHIDDeviceRef = @constCast(ref); + const svc = c.IOHIDDeviceGetService(dev); + var id: u64 = 0; + if (svc != 0) _ = c.IORegistryEntryGetRegistryEntryID(svc, &id); + log.info("seized device entry_id={d}", .{id}); + } +} + /// Set HIDKeyboardCapsLockDelayOverride=0 on every event-system /// service. Disables Apple's firmware-level "hold caps_lock for ~150ms /// to toggle" behavior — without this the toggle still fires through diff --git a/src/grabber/PowerNotify.zig b/src/grabber/PowerNotify.zig new file mode 100644 index 0000000..706507b --- /dev/null +++ b/src/grabber/PowerNotify.zig @@ -0,0 +1,203 @@ +//! System sleep/wake hook — releases the seize before sleep and +//! re-acquires it on wake, so the seize never spans the sleep power +//! transition. +//! +//! Root cause it addresses: holding an IOHIDManager seize across sleep +//! leaves it stale on wake — the keyboard powers down mid-sleep while +//! seized, and on wake the manager still reports the device (same +//! registry id, matched_count=1) but the event pipe is dead. Re-seizing +//! the same device in place does NOT revive it; only a full device +//! re-enumeration does. So instead we don't hold the seize across sleep +//! at all (the pattern Karabiner-Elements uses: devices are "ungrabbable +//! while system_sleeping"). +//! +//! - kIOMessageSystemWillSleep → on_will_sleep (daemon tears down the +//! seize, marks itself sleeping), THEN ack the sleep after a short +//! delay so the release propagates to the kernel before the device +//! powers down (Karabiner delays its ack ~1s for the same reason). +//! - kIOMessageSystemHasPoweredOn → on_wake (daemon clears sleeping and +//! re-acquires a fresh seize on the now-healthy device). +//! - kIOMessageCanSystemSleep → ack immediately (we never veto sleep). +//! +//! Lifetime: one PowerNotify per Daemon. init registers for system power +//! and schedules its run-loop source; deinit removes it and releases the +//! port + root-domain connection + ack timer. + +const std = @import("std"); +const c = @import("c.zig"); + +const log = std.log.scoped(.power); + +/// Delay between releasing the seize and acking SystemWillSleep, so the +/// IOHIDManagerClose has propagated in the kernel before the device loses +/// power. Matches Karabiner's 1s. It adds this much latency to sleep, +/// which is imperceptible for a lid close. +const will_sleep_ack_delay_s: f64 = 1.0; + +pub const Callback = *const fn (ctx: ?*anyopaque) void; + +allocator: std.mem.Allocator, +root_port: c.io_connect_t = 0, +notifier: c.io_object_t = 0, +notify_port: c.IONotificationPortRef = null, +run_loop_source: c.CFRunLoopSourceRef = null, +/// One-shot timer for the delayed SystemWillSleep ack. +ack_timer: c.CFRunLoopTimerRef = null, +/// Notification id captured at WillSleep, acked when ack_timer fires. +pending_ack_id: isize = 0, +on_will_sleep: Callback, +on_wake: Callback, +ctx: ?*anyopaque, + +const Self = @This(); + +/// Singleton — the C callback's refcon carries `self`, but the ack timer +/// callback needs to find us too and CFRunLoopTimerContext is set up the +/// same way. One Daemon → one PowerNotify, so a global is simplest. +var instance: ?*Self = null; + +pub fn init( + allocator: std.mem.Allocator, + on_will_sleep: Callback, + on_wake: Callback, + ctx: ?*anyopaque, +) !*Self { + if (instance != null) return error.AlreadyInitialized; + + const self = try allocator.create(Self); + errdefer allocator.destroy(self); + self.* = .{ + .allocator = allocator, + .on_will_sleep = on_will_sleep, + .on_wake = on_wake, + .ctx = ctx, + }; + instance = self; + errdefer instance = null; + + var port: c.IONotificationPortRef = null; + var notifier: c.io_object_t = 0; + const root_port = c.IORegisterForSystemPower(self, &port, powerCallback, ¬ifier); + if (root_port == 0) { + log.err("IORegisterForSystemPower returned MACH_PORT_NULL", .{}); + return error.RegisterFailed; + } + errdefer { + _ = c.IODeregisterForSystemPower(¬ifier); + if (port) |p| c.IONotificationPortDestroy(p); + } + + const source = c.IONotificationPortGetRunLoopSource(port); + if (source == null) { + log.err("IONotificationPortGetRunLoopSource returned null", .{}); + return error.RunLoopSourceFailed; + } + c.CFRunLoopAddSource(c.CFRunLoopGetCurrent(), source, c.kCFRunLoopDefaultMode); + + self.root_port = root_port; + self.notifier = notifier; + self.notify_port = port; + self.run_loop_source = source; + log.info("registered for system power notifications (release-on-sleep)", .{}); + return self; +} + +pub fn deinit(self: *Self) void { + self.cancelAckTimer(); + if (self.run_loop_source) |src| { + c.CFRunLoopRemoveSource(c.CFRunLoopGetCurrent(), src, c.kCFRunLoopDefaultMode); + self.run_loop_source = null; // owned by the port; don't release + } + if (self.notifier != 0) { + _ = c.IODeregisterForSystemPower(&self.notifier); + self.notifier = 0; + } + if (self.notify_port) |p| { + c.IONotificationPortDestroy(p); + self.notify_port = null; + } + self.root_port = 0; + instance = null; + self.allocator.destroy(self); +} + +fn cancelAckTimer(self: *Self) void { + if (self.ack_timer) |t| { + c.CFRunLoopTimerInvalidate(t); + c.CFRelease(t); + self.ack_timer = null; + } +} + +fn powerCallback( + refcon: ?*anyopaque, + service: c.io_service_t, + messageType: u32, + messageArgument: ?*anyopaque, +) callconv(.c) void { + _ = service; + const self: *Self = @ptrCast(@alignCast(refcon orelse return)); + const arg_id: isize = @bitCast(@intFromPtr(messageArgument)); + + switch (messageType) { + c.kIOMessageCanSystemSleep => { + // We never veto sleep — ack immediately. + _ = c.IOAllowPowerChange(self.root_port, arg_id); + }, + c.kIOMessageSystemWillSleep => { + log.info("system will sleep — releasing seize before sleep", .{}); + // Release the seize NOW (synchronous), then ack after a short + // delay so the release lands before the device powers down. + self.on_will_sleep(self.ctx); + self.scheduleWillSleepAck(arg_id); + }, + c.kIOMessageSystemWillPowerOn => { + log.info("system will power on (early wake)", .{}); + }, + c.kIOMessageSystemHasPoweredOn => { + log.info("system has powered on — re-acquiring seize", .{}); + self.on_wake(self.ctx); + }, + else => { + log.info("unhandled power message: 0x{X:0>8}", .{messageType}); + }, + } +} + +fn scheduleWillSleepAck(self: *Self, notification_id: isize) void { + self.cancelAckTimer(); + self.pending_ack_id = notification_id; + var timer_ctx: c.CFRunLoopTimerContext = .{ + .version = 0, + .info = self, + .retain = null, + .release = null, + .copyDescription = null, + }; + const fire_at = c.CFAbsoluteTimeGetCurrent() + will_sleep_ack_delay_s; + const timer = c.CFRunLoopTimerCreate( + c.kCFAllocatorDefault, + fire_at, + 0, // one-shot + 0, + 0, + willSleepAckTimerCallback, + &timer_ctx, + ); + if (timer == null) { + // Couldn't schedule the delayed ack — ack now so we don't block + // sleep indefinitely. The seize is already released. + log.warn("ack timer create failed — acking sleep immediately", .{}); + _ = c.IOAllowPowerChange(self.root_port, notification_id); + return; + } + self.ack_timer = timer; + c.CFRunLoopAddTimer(c.CFRunLoopGetCurrent(), timer, c.kCFRunLoopDefaultMode); +} + +fn willSleepAckTimerCallback(_: c.CFRunLoopTimerRef, info: ?*anyopaque) callconv(.c) void { + const self: *Self = @ptrCast(@alignCast(info orelse return)); + const id = self.pending_ack_id; + self.cancelAckTimer(); + _ = c.IOAllowPowerChange(self.root_port, id); +} diff --git a/src/grabber/c.zig b/src/grabber/c.zig index f5669d7..9afe63b 100644 --- a/src/grabber/c.zig +++ b/src/grabber/c.zig @@ -301,6 +301,61 @@ pub const IONotificationPortRef = ?*anyopaque; pub extern fn IONotificationPortGetRunLoopSource(notify: IONotificationPortRef) CFRunLoopSourceRef; pub extern fn IONotificationPortDestroy(notify: IONotificationPortRef) void; +// System sleep/wake notifications via the root power-management domain. +// PowerNotify uses these to RELEASE the seize on will-sleep and +// re-acquire it on power-on, so the seize never spans the sleep power +// transition (a seize held across sleep goes stale: same device id, +// matched_count=1, but the event pipe is dead). Can/WillSleep must be +// acked with IOAllowPowerChange or sleep is blocked. messageArgument is +// a void* on the API but used as a `long` notification id for the ack. +// IOMessage.h: iokit_common_msg(x) = 0xE0000000 | x. +pub const kIOMessageCanSystemSleep: u32 = 0xE0000270; +pub const kIOMessageSystemWillSleep: u32 = 0xE0000280; +pub const kIOMessageSystemWillPowerOn: u32 = 0xE0000320; +pub const kIOMessageSystemHasPoweredOn: u32 = 0xE0000300; + +pub const IOServiceInterestCallback = ?*const fn ( + refcon: ?*anyopaque, + service: io_service_t, + messageType: u32, + messageArgument: ?*anyopaque, +) callconv(.c) void; + +pub extern fn IORegisterForSystemPower( + refcon: ?*anyopaque, + thePortRef: *IONotificationPortRef, + callback: IOServiceInterestCallback, + notifier: *io_object_t, +) io_connect_t; +pub extern fn IODeregisterForSystemPower(notifier: *io_object_t) IOReturn; +pub extern fn IOAllowPowerChange(kernelPort: io_connect_t, notificationID: isize) IOReturn; + +// Map a seized IOHIDDevice back to its IORegistry node so we can log the +// seized device's registry entry id (observability: which device the +// seize actually holds, vs the live keyboard). +pub extern fn IOHIDDeviceGetService(device: IOHIDDeviceRef) io_service_t; + +// libc time — for local-time log timestamps so grabber events correlate +// with `pmset -g log` wake/sleep times. We only read the first three +// `struct tm` fields (sec/min/hour), so the rest of the layout being +// approximate is harmless. +pub const time_t = c_long; +pub const Tm = extern struct { + tm_sec: c_int, + tm_min: c_int, + tm_hour: c_int, + tm_mday: c_int, + tm_mon: c_int, + tm_year: c_int, + tm_wday: c_int, + tm_yday: c_int, + tm_isdst: c_int, + tm_gmtoff: c_long, + tm_zone: ?[*:0]const u8, +}; +pub extern fn time(t: ?*time_t) time_t; +pub extern fn localtime_r(timep: *const time_t, result: *Tm) ?*Tm; + // SystemConfiguration — read the active console user uid. D5 uses // this to apply rules only from the foreground user's agent (so // fast-user-switching doesn't get caps_lock-as-ctrl set up by a diff --git a/src/grabber/main.zig b/src/grabber/main.zig index 87ccd99..e433d02 100644 --- a/src/grabber/main.zig +++ b/src/grabber/main.zig @@ -23,6 +23,7 @@ const HidSystem = @import("HidSystem.zig"); const Ipc = @import("Ipc.zig"); const KbState = @import("KbState.zig"); const DeviceNotify = @import("DeviceNotify.zig"); +const PowerNotify = @import("PowerNotify.zig"); const TapHold = @import("TapHold.zig"); const Vhidd = @import("Vhidd.zig"); @@ -40,8 +41,36 @@ pub const std_options: std.Options = .{ .ReleaseSafe => .info, .ReleaseFast, .ReleaseSmall => .warn, }, + .logFn = grabberLog, }; +/// Like the default log function but prefixes each line with a local +/// wall-clock `[HH:MM:SS]` so grabber events correlate directly with +/// `pmset -g log` sleep/wake times when chasing a seize recurrence. +fn grabberLog( + comptime level: std.log.Level, + comptime scope: @TypeOf(.enum_literal), + comptime format: []const u8, + args: anytype, +) void { + const io = std.Options.debug_io; + const prev = io.swapCancelProtection(.blocked); + defer _ = io.swapCancelProtection(prev); + var buffer: [64]u8 = undefined; + const t = std.debug.lockStderr(&buffer).terminal(); + defer std.debug.unlockStderr(); + var now: c.time_t = c.time(null); + var tm: c.Tm = undefined; + if (c.localtime_r(&now, &tm) != null) { + t.writer.print("[{d:0>2}:{d:0>2}:{d:0>2}] ", .{ + @as(u32, @intCast(@max(0, tm.tm_hour))), + @as(u32, @intCast(@max(0, tm.tm_min))), + @as(u32, @intCast(@max(0, tm.tm_sec))), + }) catch {}; + } + std.log.defaultLogFileTerminal(level, scope, format, args, t) catch {}; +} + /// Set by SIGTERM/SIGINT/SIGHUP so the accept() loop tears down on /// next iteration. async-signal-safe by being volatile primitives only. var should_exit: std.atomic.Value(bool) = .init(false); @@ -262,6 +291,17 @@ const Daemon = struct { /// appears/disappears — no polling, zero steady-state overhead. device_notify: ?*DeviceNotify = null, + /// System sleep/wake hook. Holding the seize across sleep leaves it + /// stale on wake (same device id, matched_count=1, dead event pipe); + /// re-seizing in place can't revive it — only a device re-enumeration + /// does. So we release the seize on will-sleep and re-acquire on + /// wake, never spanning the power transition (Karabiner's pattern). + power_notify: ?*PowerNotify = null, + /// True between will-sleep and power-on. While set, the seize is kept + /// torn down — applyLatestRules and onDeviceChange must not re-seize + /// (the device is powering down/up and a seize would go stale). + sleeping: bool = false, + /// Pending vhidd recovery timer, non-null while the daemon is in /// the "vhidd is broken, retrying" state. One-shot — the callback /// releases it and either schedules a new one (on failure) or @@ -335,6 +375,10 @@ const Daemon = struct { dn.deinit(); self.device_notify = null; } + if (self.power_notify) |pn| { + pn.deinit(); + self.power_notify = null; + } self.stopConsoleUserTimer(); self.cancelVhiddRecoveryTimer(); self.teardownSeize(); @@ -403,6 +447,12 @@ const Daemon = struct { log.warn("DeviceNotify init failed ({s}); keyboard re-enumeration auto-reseize disabled", .{@errorName(err)}); break :blk null; }; + // Release-on-sleep / re-acquire-on-wake. Without it the seize + // held across sleep goes stale and the keyboard dies on wake. + self.power_notify = PowerNotify.init(self.allocator, onWillSleep, onSystemWake, self) catch |err| blk: { + log.warn("PowerNotify init failed ({s}); seize will not be released across sleep", .{@errorName(err)}); + break :blk null; + }; log.info("listening on {s}", .{self.socket_path}); @@ -567,6 +617,15 @@ const Daemon = struct { /// or the console user switched. The active subscription's /// stream becomes the layer-push target. fn applyLatestRules(self: *Daemon) !void { + // While the system is asleep the seize must stay torn down: the + // device is powering down/up and a seize taken across that + // transition goes stale. onSystemWake re-runs this after clearing + // `sleeping`. Rule/subscription state is already stored by the + // caller, so the latest rules apply on wake. + if (self.sleeping) { + self.teardownSeize(); + return; + } const sub = self.activeSubscription() orelse { log.info("no active subscription — keeping seize torn down", .{}); self.teardownSeize(); @@ -902,6 +961,7 @@ fn consoleUserTimerCallback(_: c.CFRunLoopTimerRef, info: ?*anyopaque) callconv( /// there is no feedback loop. fn onDeviceChange(ctx: ?*anyopaque) void { const d: *Daemon = @ptrCast(@alignCast(ctx orelse return)); + if (d.sleeping) return; // seize stays released until wake if (d.seize == null) return; // nothing seized yet → first apply_rules will log.warn("keyboard enumeration changed — re-seizing", .{}); d.applyLatestRules() catch |err| { @@ -909,6 +969,28 @@ fn onDeviceChange(ctx: ?*anyopaque) void { }; } +/// PowerNotify: the system is about to sleep. Release the seize so the +/// keyboard sleeps as a normal HID device — a seize held across the +/// power transition goes stale (dead event pipe, unrecoverable in place). +/// `sleeping` keeps it released until wake. +fn onWillSleep(ctx: ?*anyopaque) void { + const d: *Daemon = @ptrCast(@alignCast(ctx orelse return)); + d.sleeping = true; + d.teardownSeize(); + log.info("seize released for sleep", .{}); +} + +/// PowerNotify: the system finished waking. Re-acquire a fresh seize on +/// the now-healthy device. applyLatestRules rebuilds against the current +/// device set, so this grabs whatever the keyboard re-enumerated to. +fn onSystemWake(ctx: ?*anyopaque) void { + const d: *Daemon = @ptrCast(@alignCast(ctx orelse return)); + d.sleeping = false; + d.applyLatestRules() catch |err| { + log.warn("post-wake re-seize failed: {s}", .{@errorName(err)}); + }; +} + fn vhiddRecoveryTimerCallback(_: c.CFRunLoopTimerRef, info: ?*anyopaque) callconv(.c) void { const d: *Daemon = @ptrCast(@alignCast(info orelse return)); // The timer is one-shot — release its ref before doing work so a From a1bfdc476695828c39bcdc03fa44c898d366ab06 Mon Sep 17 00:00:00 2001 From: Jackie Li Date: Sat, 13 Jun 2026 13:19:13 +0100 Subject: [PATCH 2/6] grabber: report real version + show both daemon versions in --status skhd-grabber --version printed a hardcoded "skhd-grabber (D1 skeleton)" because the grabber module never got the generated VERSION embed. Wire addVersionImport into grabber_mod and print the real build string (matching skhd's "vX.Y-dev--" format). --status now prints both daemons' versions. The grabber's is queried LIVE over IPC: it returns its build version in the hello-ok reply, and the agent client captures it, so --status shows the actually-running grabber (not the on-disk binary). skhd's is this binary's own version, which equals the running agent (same install path; the install flow can't swap a running binary's inode without stopping it first). Backward-compatible: grabber_version is an extra json field on the hello-ok; older agents ignore it, and an older grabber that omits it shows as "not running" in the version line. --- build.zig | 1 + src/agent_grabber_client.zig | 17 ++++++++++++++++- src/grabber/Ipc.zig | 9 ++++++++- src/grabber/main.zig | 7 ++++++- src/grabber_cli.zig | 12 ++++++++++++ src/service.zig | 12 ++++++++++++ 6 files changed, 55 insertions(+), 3 deletions(-) diff --git a/build.zig b/build.zig index bd8e2ec..0aa4f2e 100644 --- a/build.zig +++ b/build.zig @@ -210,6 +210,7 @@ pub fn build(b: *std.Build) void { }); linkGrabberFrameworks(b, grabber_mod); grabber_mod.addImport("grabber_protocol", grabber_protocol_mod); + addVersionImport(b, grabber_mod); const grabber_exe = b.addExecutable(.{ .name = "skhd-grabber", diff --git a/src/agent_grabber_client.zig b/src/agent_grabber_client.zig index d5e796f..7f6381f 100644 --- a/src/agent_grabber_client.zig +++ b/src/agent_grabber_client.zig @@ -15,6 +15,10 @@ pub const Client = struct { allocator: std.mem.Allocator, io: std.Io, stream: std.Io.net.Stream, + /// Running grabber's build version, captured from the hello-ok reply + /// (null until `hello()` succeeds, or if the grabber is too old to + /// send it). Owned by the client; freed in `close`. + grabber_version: ?[]u8 = null, pub fn connect(allocator: std.mem.Allocator, io: std.Io, socket_path: []const u8) !Client { const addr = std.Io.net.UnixAddress.init(socket_path) catch |err| { @@ -39,6 +43,7 @@ pub const Client = struct { } pub fn close(self: *Client) void { + if (self.grabber_version) |v| self.allocator.free(v); self.stream.close(self.io); self.* = undefined; } @@ -106,7 +111,17 @@ fn expectOk(client: *Client) !void { const t = obj.get("type") orelse return error.BadResponse; if (t != .string) return error.BadResponse; - if (std.mem.eql(u8, t.string, "ok")) return; + if (std.mem.eql(u8, t.string, "ok")) { + // Capture the grabber's reported version (parsed json is freed on + // return, so dupe into client-owned memory). + if (obj.get("grabber_version")) |v| { + if (v == .string) { + if (client.grabber_version) |old| client.allocator.free(old); + client.grabber_version = client.allocator.dupe(u8, v.string) catch null; + } + } + return; + } if (std.mem.eql(u8, t.string, "error")) { const code = if (obj.get("code")) |v| (if (v == .string) v.string else "?") else "?"; diff --git a/src/grabber/Ipc.zig b/src/grabber/Ipc.zig index 740919e..4c95ba4 100644 --- a/src/grabber/Ipc.zig +++ b/src/grabber/Ipc.zig @@ -10,6 +10,10 @@ const protocol = @import("grabber_protocol"); const log = std.log.scoped(.grabber_ipc); +/// This grabber's build version, returned in the hello-ok reply so +/// `skhd --status` can show the running grabber's version over IPC. +const grabber_version = std.mem.trimEnd(u8, @embedFile("VERSION"), "\n\r\t "); + /// Owned (deep-copied) rules and remaps from one apply_rules /// message. Caller takes ownership and is responsible for freeing /// each Rule's hold_layer slice plus the rules and remaps slices @@ -135,7 +139,10 @@ fn handleHello(allocator: std.mem.Allocator, sw: *std.Io.net.Stream.Writer, msg: return error.VersionMismatch; } log.info("hello from uid={d} version={d}", .{ uid, version }); - try sendOk(allocator, sw); + // Reply ok with our build version so the status path can read the + // running grabber's version (extra field; older agents ignore it). + try protocol.writeMessage(&sw.interface, allocator, .{ .type = "ok", .grabber_version = grabber_version }); + try sw.interface.flush(); return uid; } diff --git a/src/grabber/main.zig b/src/grabber/main.zig index e433d02..eacad57 100644 --- a/src/grabber/main.zig +++ b/src/grabber/main.zig @@ -29,6 +29,11 @@ const Vhidd = @import("Vhidd.zig"); const log = std.log.scoped(.grabber); +/// Build-stamped version (VERSION file + git hash + build mode), shared +/// with the agent. Reported by `--version` and in the hello-ok reply so +/// `skhd --status` can show the running grabber's version. +pub const version = std.mem.trimEnd(u8, @embedFile("VERSION"), "\n\r\t "); + /// `-P/--profile` instrumentation is compiled in for Debug and /// ReleaseSafe only — matching `Tracer.zig` in the user-agent. In /// ReleaseFast/ReleaseSmall every profile branch folds away at @@ -126,7 +131,7 @@ pub fn main(init: std.process.Init) !void { // accepted for symmetry with other daemons but unused at // D1. D6 will hook it up to a stderr redirect. } else if (std.mem.eql(u8, a, "--version") or std.mem.eql(u8, a, "-v")) { - std.debug.print("skhd-grabber (D1 skeleton)\n", .{}); + std.debug.print("skhd-grabber v{s}\n", .{version}); return; } else if (std.mem.eql(u8, a, "--help") or std.mem.eql(u8, a, "-h")) { printHelp(); diff --git a/src/grabber_cli.zig b/src/grabber_cli.zig index dba4ef4..d6f35ef 100644 --- a/src/grabber_cli.zig +++ b/src/grabber_cli.zig @@ -547,6 +547,18 @@ pub fn uninstallGrabber(allocator: std.mem.Allocator, io: std.Io) !void { , .{}); } +/// Connect to the running grabber and return the build version it +/// reports in its hello-ok reply (caller frees). Returns null if the +/// grabber isn't reachable or is too old to report a version — the +/// status output then shows it as not running / unknown. +pub fn runningGrabberVersion(allocator: std.mem.Allocator, io: std.Io) ?[]u8 { + var client = Client.connect(allocator, io, protocol.default_socket_path) catch return null; + defer client.close(); + client.hello() catch return null; + const v = client.grabber_version orelse return null; + return allocator.dupe(u8, v) catch null; +} + /// Walk every prerequisite for caps_lock-class tap-hold and report /// where the chain breaks. One command users can run when something /// isn't working — gives a clear "this is where it's broken, this is diff --git a/src/service.zig b/src/service.zig index 11abdff..de50185 100644 --- a/src/service.zig +++ b/src/service.zig @@ -611,6 +611,18 @@ pub fn checkServiceStatus(allocator: std.mem.Allocator, io: std.Io) !void { const installed = sm_status != .not_registered and sm_status != .not_found; std.debug.print("skhd service status:\n", .{}); + + // Versions of the two daemons. skhd's is this binary's own build + // (== the running agent: same install path, and macOS won't let our + // install flow swap a running binary's inode without stopping it + // first). The grabber's is queried live over IPC (its hello-ok reply), + // so it reflects the actually-running daemon, not the on-disk binary. + const skhd_version = std.mem.trimEnd(u8, @embedFile("VERSION"), "\n\r\t "); + const grabber_ver = grabber_cli.runningGrabberVersion(allocator, io); + defer if (grabber_ver) |v| allocator.free(v); + std.debug.print(" skhd version: {s}\n", .{skhd_version}); + std.debug.print(" skhd-grabber version: {s}\n", .{grabber_ver orelse "not running"}); + std.debug.print(" Service installed: {s}\n", .{if (installed) "Yes" else "No"}); std.debug.print(" Registration status: {s}\n", .{sm_status.describe()}); From 55ad2789e2f1dd2dc8e52f2a15315b46ccd41026 Mon Sep 17 00:00:00 2001 From: Jackie Li Date: Sat, 13 Jun 2026 19:58:24 +0100 Subject: [PATCH 3/6] grabber: include date in log timestamps for multi-day soak [HH:MM:SS] alone is ambiguous when the daemon sleeps for days. Use the full local [YYYY-MM-DD HH:MM:SS] (matches pmset -g log) so a sleep span across days reads unambiguously. --- src/grabber/main.zig | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/grabber/main.zig b/src/grabber/main.zig index eacad57..968ecef 100644 --- a/src/grabber/main.zig +++ b/src/grabber/main.zig @@ -67,7 +67,12 @@ fn grabberLog( var now: c.time_t = c.time(null); var tm: c.Tm = undefined; if (c.localtime_r(&now, &tm) != null) { - t.writer.print("[{d:0>2}:{d:0>2}:{d:0>2}] ", .{ + // Full local date+time (matches `pmset -g log` format) so a sleep + // span across days is unambiguous — the daemon can be idle for days. + t.writer.print("[{d:0>4}-{d:0>2}-{d:0>2} {d:0>2}:{d:0>2}:{d:0>2}] ", .{ + @as(u32, @intCast(@max(0, tm.tm_year + 1900))), + @as(u32, @intCast(@max(0, tm.tm_mon + 1))), + @as(u32, @intCast(@max(0, tm.tm_mday))), @as(u32, @intCast(@max(0, tm.tm_hour))), @as(u32, @intCast(@max(0, tm.tm_min))), @as(u32, @intCast(@max(0, tm.tm_sec))), From a920e6d0d50cd2acc045e5598b3be3e03945177e Mon Sep 17 00:00:00 2001 From: Jackie Li Date: Sat, 13 Jun 2026 20:04:04 +0100 Subject: [PATCH 4/6] grabber: demote per-wake re-seize logs to info (keep release log bounded) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A forever-running daemon's ReleaseFast log (warn+) must only grow on anomalies, not routine operation. 'keyboard matched/terminated' and 'keyboard enumeration changed — re-seizing' fire on every wake / USB plug / vhidd reconnect — routine, not errors. Demote to info (compiled out of ReleaseFast, still visible in a ReleaseSafe diagnostic build). The FAILURE paths (rebuild failed, post-wake re-seize failed) stay warn. --- src/grabber/DeviceNotify.zig | 9 ++++++--- src/grabber/main.zig | 4 +++- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/src/grabber/DeviceNotify.zig b/src/grabber/DeviceNotify.zig index f3a851b..dcfbcdb 100644 --- a/src/grabber/DeviceNotify.zig +++ b/src/grabber/DeviceNotify.zig @@ -162,9 +162,12 @@ fn drainAndLog(iter: c.io_iterator_t, kind: []const u8) void { if (svc == c.IO_OBJECT_NULL) break; var id: u64 = 0; _ = c.IORegistryEntryGetRegistryEntryID(svc, &id); - // warn: rare (only on real keyboard enumeration changes), so it - // stays in the ReleaseFast log as the record of why we re-seized. - log.warn("keyboard {s}: entry_id={d}", .{ kind, id }); + // info, NOT warn: this fires on every keyboard enumeration change + // (each wake, USB plug, vhidd reconnect) — routine operation, not + // an anomaly. Compiled out of ReleaseFast so a forever-running + // daemon's release log doesn't accumulate per-wake noise; visible + // in a ReleaseSafe diagnostic build. + log.info("keyboard {s}: entry_id={d}", .{ kind, id }); _ = c.IOObjectRelease(svc); } } diff --git a/src/grabber/main.zig b/src/grabber/main.zig index 968ecef..3acc41a 100644 --- a/src/grabber/main.zig +++ b/src/grabber/main.zig @@ -973,7 +973,9 @@ fn onDeviceChange(ctx: ?*anyopaque) void { const d: *Daemon = @ptrCast(@alignCast(ctx orelse return)); if (d.sleeping) return; // seize stays released until wake if (d.seize == null) return; // nothing seized yet → first apply_rules will - log.warn("keyboard enumeration changed — re-seizing", .{}); + // info: routine recovery, fires on every wake/plug — compiled out of + // ReleaseFast. The FAILURE below stays warn (a real anomaly). + log.info("keyboard enumeration changed — re-seizing", .{}); d.applyLatestRules() catch |err| { log.warn("device-change rebuild failed: {s}", .{@errorName(err)}); }; From 4510056f3d3593cd88a8866f5721ae4554fd690a Mon Sep 17 00:00:00 2001 From: Jackie Li Date: Sat, 13 Jun 2026 20:08:20 +0100 Subject: [PATCH 5/6] skhd: demote routine PATH-applied startup log to info MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit main.zig logged the post-.path PATH at warn unconditionally on every start — a routine startup diagnostic, not an error (its sibling, the inherited-PATH line, is already info). Demote so the agent's ReleaseFast log doesn't carry a verbose PATH line per restart. Audit of the agent's warn/err logging otherwise: event-tap handlers are all catch-only (no per-keystroke noise); remaining warn/err are genuine anomalies or the once-per-restart skhd startup banner (kept at warn as a deliberate restart marker). --- src/main.zig | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/main.zig b/src/main.zig index 6e2c91c..02b080c 100644 --- a/src/main.zig +++ b/src/main.zig @@ -421,7 +421,9 @@ fn applyConfigPaths(allocator: std.mem.Allocator, entries: []const []const u8) v log.warn("PATH apply: setenv failed", .{}); return; } - log.warn("PATH after .path directives: {s}", .{buf.items[0 .. buf.items.len - 1]}); + // info, not warn: routine startup diagnostic (mirrors the inherited-PATH + // info line above), not an error — keep it out of the release log. + log.info("PATH after .path directives: {s}", .{buf.items[0 .. buf.items.len - 1]}); } /// Resolve config file path following XDG spec From 5ce2c96f8af5ca238fd9f53f2c71a74242b811cc Mon Sep 17 00:00:00 2001 From: Jackie Li Date: Sat, 13 Jun 2026 20:15:02 +0100 Subject: [PATCH 6/6] skhd: demote startup banner from warn to info MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The '=== skhd vX started at … (PID) ===' session marker was at warn only so it'd show in release logs — the same info-kept-at-warn-for-diagnosis pattern as the PATH line. It's informational, not a problem, so it's info now. Version/start info is still discoverable via skhd --version and --status (which now reports both running daemons' versions). --- src/main.zig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main.zig b/src/main.zig index 02b080c..25ead91 100644 --- a/src/main.zig +++ b/src/main.zig @@ -294,7 +294,7 @@ fn logSessionStart(io: std.Io) void { const month_day = year_day.calculateMonthDay(); const day_secs = epoch_secs.getDaySeconds(); - log.warn("=== skhd {s} started at {d:0>4}-{d:0>2}-{d:0>2}T{d:0>2}:{d:0>2}:{d:0>2}Z (PID {d}) ===", .{ + log.info("=== skhd {s} started at {d:0>4}-{d:0>2}-{d:0>2}T{d:0>2}:{d:0>2}:{d:0>2}Z (PID {d}) ===", .{ version, @as(u32, year_day.year), @intFromEnum(month_day.month),