diff --git a/CMakeLists.txt b/CMakeLists.txt index dd3dcbb..4c3de97 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -127,6 +127,8 @@ add_subdirectory(lib/Pico-PIO-USB) add_subdirectory(lib/kmbox-commands) # Shared libraries +add_subdirectory(lib/hid-defs) +add_subdirectory(lib/fast-protocol) add_subdirectory(lib/wire-protocol) add_subdirectory(lib/peri-clock) add_subdirectory(lib/dma-uart) @@ -161,7 +163,7 @@ target_link_libraries(PIOKMbox pico_pio_usb) target_link_libraries(PIOKMbox kmbox_commands) # Link shared libraries -target_link_libraries(PIOKMbox wire_protocol peri_clock dma_uart led_utils) +target_link_libraries(PIOKMbox hid_defs fast_protocol wire_protocol peri_clock dma_uart led_utils) # Add PIO USB HCD implementation directly target_sources(PIOKMbox PRIVATE diff --git a/PIOKMbox.c b/PIOKMbox.c index 8ace3d5..4b9e869 100644 --- a/PIOKMbox.c +++ b/PIOKMbox.c @@ -119,6 +119,9 @@ static void core1_task_loop(void) { tuh_task(); + // Drain SET_REPORT passthrough queue (device→host vendor reports) + hid_host_task(); + // Xbox host task: forward console commands to controller, keepalive if (g_xbox_mode) { xbox_host_task(); diff --git a/bridge/CMakeLists.txt b/bridge/CMakeLists.txt index f956cbe..99c2f47 100644 --- a/bridge/CMakeLists.txt +++ b/bridge/CMakeLists.txt @@ -130,6 +130,8 @@ if(EXISTS ${CMAKE_CURRENT_LIST_DIR}/lut.pio) endif() # Add shared libraries +add_subdirectory(${CMAKE_CURRENT_LIST_DIR}/../lib/hid-defs hid-defs) +add_subdirectory(${CMAKE_CURRENT_LIST_DIR}/../lib/fast-protocol fast-protocol) add_subdirectory(${CMAKE_CURRENT_LIST_DIR}/../lib/kmbox-commands kmbox-commands) add_subdirectory(${CMAKE_CURRENT_LIST_DIR}/../lib/wire-protocol wire-protocol) add_subdirectory(${CMAKE_CURRENT_LIST_DIR}/../lib/peri-clock peri-clock) @@ -149,6 +151,8 @@ set(BRIDGE_LIBS hardware_uart hardware_adc kmbox_commands + hid_defs + fast_protocol wire_protocol dma_uart led_utils diff --git a/bridge/fast_commands.h b/bridge/fast_commands.h index fb9c309..7410e6e 100644 --- a/bridge/fast_commands.h +++ b/bridge/fast_commands.h @@ -1,101 +1,21 @@ /** - * Fast Binary Command Definitions for Bridge → KMBox UART + * Fast Binary Command Definitions for Bridge -> KMBox UART * - * All bridge translators (Ferrum, Makcu, tracker) should produce these - * 8-byte packets instead of text strings. The KMBox already has an - * optimized binary parser that handles them with zero string parsing. - * - * Key command IDs: - * 0x01 FAST_CMD_MOUSE_MOVE — direct accumulator (buttons + wheel + move) - * 0x02 FAST_CMD_MOUSE_CLICK — button click with repeat count - * 0x07 FAST_CMD_SMOOTH_MOVE — smooth injection queue (humanized) + * Thin wrapper — all definitions now live in the shared library + * lib/fast-protocol/include/fast_protocol.h. This file exists so + * existing bridge #include "fast_commands.h" directives continue to work. */ #ifndef BRIDGE_FAST_COMMANDS_H #define BRIDGE_FAST_COMMANDS_H -#include -#include - -// Command IDs (must match defines.h on KMBox side) -#define FAST_CMD_MOUSE_MOVE 0x01 -#define FAST_CMD_MOUSE_CLICK 0x02 -#define FAST_CMD_SMOOTH_MOVE 0x07 -#define FAST_CMD_SMOOTH_CONFIG 0x08 -#define FAST_CMD_SMOOTH_CLEAR 0x09 -#define FAST_CMD_CYCLE_HUMAN 0x0F -#define FAST_CMD_PING 0xFE - -// Smooth injection modes (must match inject_mode_t on KMBox side) -#define INJECT_MODE_IMMEDIATE 0 -#define INJECT_MODE_SMOOTH 1 -#define INJECT_MODE_VELOCITY_MATCHED 2 -#define INJECT_MODE_MICRO 3 - -#define FAST_CMD_PACKET_SIZE 8 - -// ============================================================================ -// Inline packet builders — produce 8-byte packets, return 8 always -// ============================================================================ - -/** - * Build FAST_CMD_SMOOTH_MOVE (0x07) packet. - * KMBox routes this through smooth_inject_movement() which applies - * humanization (easing, subdivision, tremor, overshoot) automatically. - * Use for: km.move(), tracker aim commands, makcu MOVE. - */ -static inline size_t fast_build_smooth_move(uint8_t *buf, int16_t x, int16_t y, uint8_t mode) { - buf[0] = FAST_CMD_SMOOTH_MOVE; - buf[1] = (uint8_t)(x & 0xFF); - buf[2] = (uint8_t)((x >> 8) & 0xFF); - buf[3] = (uint8_t)(y & 0xFF); - buf[4] = (uint8_t)((y >> 8) & 0xFF); - buf[5] = mode; - buf[6] = 0; - buf[7] = 0; - return FAST_CMD_PACKET_SIZE; -} - -/** - * Build FAST_CMD_MOUSE_MOVE (0x01) packet. - * KMBox routes this through kmbox_add_mouse_movement() (direct accumulator) - * plus optional buttons + wheel. No smooth queue / humanization subdivision. - * Use for: button state changes, wheel, raw passthrough moves. - */ -static inline size_t fast_build_mouse_move(uint8_t *buf, int16_t x, int16_t y, - uint8_t buttons, int8_t wheel) { - buf[0] = FAST_CMD_MOUSE_MOVE; - buf[1] = (uint8_t)(x & 0xFF); - buf[2] = (uint8_t)((x >> 8) & 0xFF); - buf[3] = (uint8_t)(y & 0xFF); - buf[4] = (uint8_t)((y >> 8) & 0xFF); - buf[5] = buttons; - buf[6] = (uint8_t)wheel; - buf[7] = 0; - return FAST_CMD_PACKET_SIZE; -} - -/** - * Build FAST_CMD_MOUSE_CLICK (0x02) packet. - * KMBox generates press + release pairs internally. - */ -static inline size_t fast_build_mouse_click(uint8_t *buf, uint8_t button, uint8_t count) { - buf[0] = FAST_CMD_MOUSE_CLICK; - buf[1] = button; - buf[2] = count; - buf[3] = 0; - buf[4] = 0; - buf[5] = 0; - buf[6] = 0; - buf[7] = 0; - return FAST_CMD_PACKET_SIZE; -} +#include "fast_protocol.h" -// Button masks (match HID standard) -#define FAST_BTN_LEFT 0x01 -#define FAST_BTN_RIGHT 0x02 -#define FAST_BTN_MIDDLE 0x04 -#define FAST_BTN_BACK 0x08 -#define FAST_BTN_FORWARD 0x10 +// Legacy button mask aliases — prefer HID_BTN_* from hid_defs.h in new code +#define FAST_BTN_LEFT HID_BTN_LEFT +#define FAST_BTN_RIGHT HID_BTN_RIGHT +#define FAST_BTN_MIDDLE HID_BTN_MIDDLE +#define FAST_BTN_BACK HID_BTN_BACK +#define FAST_BTN_FORWARD HID_BTN_FORWARD #endif // BRIDGE_FAST_COMMANDS_H diff --git a/bridge/main.c b/bridge/main.c index addf040..5c21852 100644 --- a/bridge/main.c +++ b/bridge/main.c @@ -250,7 +250,7 @@ static uint32_t uart_rx_bytes_total = 0; static uint32_t uart_tx_bytes_total = 0; static uint32_t uart_rx_overflows = 0; -// KMBox temperature (from 0x0C info packet) +// KMBox temperature (from 0x0D info packet) static float kmbox_temperature_c = -999.0f; // Sync stats from hw_uart module (called periodically) @@ -496,8 +496,8 @@ static void uart_rx_task(void) { binary_idx = 0; } - // Check for start of binary response packet (0xFF, 0xFE, 0x0C, or 0x0E from KMBox) - if (!in_binary_packet && (c == 0xFF || c == 0xFE || c == 0x0C || c == 0x0E)) { + // Check for start of binary response packet (0xFF, 0xFE, 0x0D, or 0x0E from KMBox) + if (!in_binary_packet && (c == 0xFF || c == 0xFE || c == 0x0D || c == 0x0E)) { in_binary_packet = true; binary_idx = 0; binary_packet[binary_idx++] = c; @@ -530,8 +530,8 @@ static void uart_rx_task(void) { if (kmbox_state != KMBOX_CONNECTED) { kmbox_state = KMBOX_CONNECTED; } - } else if (binary_packet[0] == 0x0C) { - // Info response: [0x0C][hmode][imode][max_per_frame][queue_count][temp_lo][temp_hi][flags] + } else if (binary_packet[0] == 0x0D) { + // Info response: [0x0D][hmode][imode][max_per_frame][queue_count][temp_lo][temp_hi][flags] // flags: [0]=jitter_en [1]=vel_match [2:4]=queue_depth_3bit kmbox_humanization_mode = binary_packet[1]; kmbox_inject_mode = binary_packet[2]; @@ -555,7 +555,7 @@ static void uart_rx_task(void) { // Extended stats: [0x0E][queue_count][queue_cap][hmode][total_lo][total_hi][ovf_lo][ovf_hi] kmbox_queue_depth = binary_packet[1]; kmbox_queue_capacity = binary_packet[2]; - // binary_packet[3] = hmode (redundant, but useful if 0x0C wasn't received) + // binary_packet[3] = hmode (redundant, but useful if 0x0D wasn't received) if (!kmbox_humanization_valid) { kmbox_humanization_mode = binary_packet[3]; } @@ -672,7 +672,7 @@ static void kmbox_connection_task(void) { last_humanization_request_ms = now - HUMANIZATION_REQUEST_INTERVAL_MS + HUMANIZATION_INITIAL_DELAY_MS; // Send initial info request right away (binary) - uint8_t info_req[8] = {0x0C, 0, 0, 0, 0, 0, 0, 0}; + uint8_t info_req[8] = {0x0D, 0, 0, 0, 0, 0, 0, 0}; send_uart_packet(info_req, 8); } } @@ -696,11 +696,11 @@ static void kmbox_connection_task(void) { if (now - last_humanization_request_ms >= HUMANIZATION_REQUEST_INTERVAL_MS) { last_humanization_request_ms = now; - // Alternate between 0x0C (basic info + temp) and 0x0E (extended stats) + // Alternate between 0x0D (basic info + temp) and 0x0E (extended stats) static uint8_t info_cycle = 0; if (info_cycle % 2 == 0) { // Primary: humanization mode, inject mode, queue depth, temperature, flags - uint8_t info_req[8] = {0x0C, 0, 0, 0, 0, 0, 0, 0}; + uint8_t info_req[8] = {0x0D, 0, 0, 0, 0, 0, 0, 0}; send_uart_packet(info_req, 8); } else { // Extended: total injected, queue overflows, queue capacity @@ -736,16 +736,16 @@ static void kmbox_connection_task(void) { } } - // TEMP: Send 0x0C request even when disconnected to test UART RX + // Send 0x0D info request even when disconnected to test UART RX if (now - last_humanization_request_ms >= HUMANIZATION_REQUEST_INTERVAL_MS) { last_humanization_request_ms = now; - uint8_t info_req[8] = {0x0C, 0, 0, 0, 0, 0, 0, 0}; + uint8_t info_req[8] = {0x0D, 0, 0, 0, 0, 0, 0, 0}; bool sent = send_uart_packet(info_req, 8); - + // Debug: confirm we're sending the request static uint32_t last_info_debug_disc = 0; if (now - last_info_debug_disc > 5000) { - printf("[Bridge TX DISC] 0x0C: %02X %02X %02X %02X %02X %02X %02X %02X (sent=%d)\n", + printf("[Bridge TX DISC] 0x0D: %02X %02X %02X %02X %02X %02X %02X %02X (sent=%d)\n", info_req[0], info_req[1], info_req[2], info_req[3], info_req[4], info_req[5], info_req[6], info_req[7], sent); last_info_debug_disc = now; diff --git a/bridge/makcu_translator.c b/bridge/makcu_translator.c index ead44dc..bc7643b 100644 --- a/bridge/makcu_translator.c +++ b/bridge/makcu_translator.c @@ -66,11 +66,11 @@ uint16_t makcu_build_response( // Helper: Convert Makcu button number to HID button mask static uint8_t makcu_button_to_mask(uint8_t button) { switch (button) { - case 1: return 0x01; // Left - case 2: return 0x02; // Right - case 3: return 0x04; // Middle - case 4: return 0x08; // Side1 - case 5: return 0x10; // Side2 + case 1: return HID_BTN_LEFT; + case 2: return HID_BTN_RIGHT; + case 3: return HID_BTN_MIDDLE; + case 4: return HID_BTN_BACK; + case 5: return HID_BTN_FORWARD; default: return 0x00; } } diff --git a/bridge/tft_display.c b/bridge/tft_display.c index 99c52ce..0705b65 100644 --- a/bridge/tft_display.c +++ b/bridge/tft_display.c @@ -142,6 +142,22 @@ static const int16_t sin_lut[SINCOS_LUT_SIZE] = { static inline int16_t lut_sin(int angle_idx) { return sin_lut[angle_idx % SINCOS_LUT_SIZE]; } static inline int16_t lut_cos(int angle_idx) { return sin_lut[(angle_idx + 45) % SINCOS_LUT_SIZE]; } // +45 entries = +90° +// Fast integer square root (Newton's method, 4 iterations — good for r<=320) +static inline int fast_isqrt(int v) { + if (v <= 0) return 0; + int x = v; + // Initial estimate: highest bit / 2 + int s = 1; + while (s * s <= v) s <<= 1; + x = s; + x = (x + v / x) >> 1; + x = (x + v / x) >> 1; + x = (x + v / x) >> 1; + // Correct off-by-one + if ((x + 1) * (x + 1) <= v) x++; + return x; +} + // ============================================================================ // State — timer-driven background rendering // @@ -210,6 +226,39 @@ static volatile bool stats_pending = false; // Main loop set, timer clears // Frame ready flag: timer ISR sets after drawing, main loop clears after DMA static volatile bool frame_ready = false; +// Stats hash for frame-skip optimization: skip render if stats unchanged +static uint32_t last_stats_hash = 0; + +// Lightweight FNV-1a hash over the key volatile fields of stats +static uint32_t stats_quick_hash(const tft_stats_t *s) { + uint32_t h = 2166136261u; + #define HASH_MIX(val) do { \ + uint32_t v = (uint32_t)(val); \ + h ^= v; h *= 16777619u; \ + } while(0) + HASH_MIX(s->cdc_connected); + HASH_MIX(s->kmbox_connected); + HASH_MIX(s->humanization_mode); + HASH_MIX(s->inject_mode); + HASH_MIX(s->queue_depth); + HASH_MIX(s->tx_rate_bps); + HASH_MIX(s->rx_rate_bps); + HASH_MIX(s->mouse_moves); + HASH_MIX(s->commands_per_sec); + HASH_MIX(s->latency_avg_us); + HASH_MIX(s->latency_jitter_us); + HASH_MIX(s->uptime_sec); + HASH_MIX(s->button_presses); + HASH_MIX(s->total_injected); + HASH_MIX(s->uart_errors + s->frame_errors); + HASH_MIX((uint32_t)(s->bridge_temperature_c * 10)); + HASH_MIX((uint32_t)(s->kmbox_temperature_c * 10)); + HASH_MIX(s->console_mode); + HASH_MIX(s->gamepad_buttons); + #undef HASH_MIX + return h; +} + // Repeating timer handle static repeating_timer_t tft_render_timer; @@ -293,6 +342,12 @@ static inline uint8_t hmode_color(uint8_t m) { #define TEMP_VALID(t) ((t) > -50.0f && (t) < 150.0f) +static inline uint8_t latency_color(uint32_t us) { + if (us < 200) return COL_GREEN; + if (us < 500) return COL_YELLOW; + return COL_RED; +} + // ============================================================================ // Touch Interrupt Callback // ============================================================================ @@ -569,11 +624,13 @@ static void format_stats(const tft_stats_t *stats) { } else { fmt_clear(&fmt_rx_peak); } } -// Helper to draw a section header label +// Helper to draw a section header label with accent bar static int draw_section_header(int y, const char *title) { hline(y, COL_DIM_LINE); y += SEP_GAP + 1; - tft_draw_string(MARGIN, y, COL_CYAN, title); + // Small accent bar before title + box(MARGIN, y + 4, 3, FONT_H - 8, COL_CYAN); + tft_draw_string(MARGIN + 6, y, COL_CYAN, title); y += LINE_H; return y; } @@ -665,22 +722,28 @@ static void draw_stats(const tft_stats_t *stats) { hline(y, COL_DIM_LINE); y += SEP_GAP + 2; - // === CONNECTION STATUS === + // === CONNECTION STATUS (with indicator dots) === { uint8_t cdc_col = stats->cdc_connected ? COL_GREEN : COL_RED; uint8_t km_col = stats->kmbox_connected ? COL_GREEN : COL_RED; - - tft_draw_string(MARGIN, y, COL_GRAY, "Host"); - tft_draw_string(MARGIN + 5 * FONT_W, y, cdc_col, + + // Status dot (filled 5x5 square) + label + box(MARGIN, y + 5, 5, 5, cdc_col); + tft_draw_string(MARGIN + 8, y, COL_GRAY, "Host"); + tft_draw_string(MARGIN + 8 + 5 * FONT_W, y, cdc_col, stats->cdc_connected ? "OK" : "--"); #if (TFT_RAW_WIDTH >= 240) - tft_draw_string(MARGIN + 10 * FONT_W, y, COL_GRAY, "KMBox"); - tft_draw_string(MARGIN + 16 * FONT_W, y, km_col, + int km_x = MARGIN + 14 * FONT_W; + box(km_x, y + 5, 5, 5, km_col); + tft_draw_string(km_x + 8, y, COL_GRAY, "KMBox"); + tft_draw_string(km_x + 8 + 6 * FONT_W, y, km_col, stats->kmbox_connected ? "OK" : "--"); #else - tft_draw_string(MARGIN + 8 * FONT_W, y, COL_GRAY, "KM"); - tft_draw_string(MARGIN + 11 * FONT_W, y, km_col, + int km_x = MARGIN + 9 * FONT_W; + box(km_x, y + 5, 5, 5, km_col); + tft_draw_string(km_x + 8, y, COL_GRAY, "KM"); + tft_draw_string(km_x + 8 + 3 * FONT_W, y, km_col, stats->kmbox_connected ? "OK" : "--"); #endif y += LINE_H; @@ -713,14 +776,30 @@ static void draw_stats(const tft_stats_t *stats) { } y += LINE_H; - // Queue bar + injection count + // Queue bar + injection count (with visual bar) if (fmt_queuebar.len) { tft_draw_string(MARGIN, y, COL_GRAY, "Queue"); + uint8_t q_col = COL_GREEN; if (stats->queue_depth > stats->queue_capacity * 3 / 4) q_col = COL_RED; else if (stats->queue_depth > stats->queue_capacity / 2) q_col = COL_YELLOW; - tft_draw_string(MARGIN + 6 * FONT_W, y, q_col, fmt_queuebar.str); - + + // Visual progress bar + int bar_x = MARGIN + 6 * FONT_W; + int bar_w = TFT_WIDTH - bar_x - MARGIN; + if (fmt_injcount.len) bar_w -= (fmt_injcount.len + 1) * FONT_W; + int bar_h = FONT_H - 4; + int bar_y = y + 2; + box(bar_x, bar_y, bar_w, bar_h, COL_DARK); + if (stats->queue_capacity > 0) { + int fill = (int)((float)stats->queue_depth / stats->queue_capacity * bar_w); + if (fill > bar_w) fill = bar_w; + if (fill > 0) box(bar_x, bar_y, fill, bar_h, q_col); + } + // Overlay text on bar + int txt_x = bar_x + (bar_w - fmt_queuebar.len * FONT_W) / 2; + tft_draw_string(txt_x, y, COL_WHITE, fmt_queuebar.str); + if (fmt_injcount.len) { int ix = TFT_WIDTH - MARGIN - fmt_injcount.len * FONT_W; tft_draw_string(ix, y, COL_GRAY, fmt_injcount.str); @@ -808,17 +887,21 @@ static void draw_stats(const tft_stats_t *stats) { y += SECTION_GAP; y = draw_section_header(y, "Latency"); - draw_row_lr(y, "Avg", COL_GRAY, fmt_lat_avg.str, fmt_lat_avg.len, COL_GREEN); + draw_row_lr(y, "Avg", COL_GRAY, fmt_lat_avg.str, fmt_lat_avg.len, + latency_color(stats->latency_avg_us)); y += LINE_H; - + #if (TFT_RAW_WIDTH >= 240) if (fmt_lat_range.len) { - draw_row_lr(y, "Range", COL_GRAY, fmt_lat_range.str, fmt_lat_range.len, COL_GRAY); + draw_row_lr(y, "Range", COL_GRAY, fmt_lat_range.str, fmt_lat_range.len, + latency_color(stats->latency_max_us)); y += LINE_H; } #endif if (fmt_lat_jitter.len) { - draw_row_lr(y, "Jitter", COL_GRAY, fmt_lat_jitter.str, fmt_lat_jitter.len, COL_YELLOW); + uint8_t jit_col = stats->latency_jitter_us < 50 ? COL_GREEN : + stats->latency_jitter_us < 150 ? COL_YELLOW : COL_RED; + draw_row_lr(y, "Jitter", COL_GRAY, fmt_lat_jitter.str, fmt_lat_jitter.len, jit_col); y += LINE_H; } } @@ -855,16 +938,27 @@ static void draw_stats(const tft_stats_t *stats) { y += LINE_H; } - // Temperatures + // Temperatures — with mini visual bar if (fmt_br_temp.len || fmt_km_temp.len) { tft_draw_string(MARGIN, y, COL_GRAY, "Temp"); int tx = MARGIN + 5 * FONT_W; if (fmt_br_temp.len) { - tft_draw_string(tx, y, temp_color(stats->bridge_temperature_c), fmt_br_temp.str); + uint8_t tcol = temp_color(stats->bridge_temperature_c); + tft_draw_string(tx, y, tcol, fmt_br_temp.str); + // Mini bar: 2px tall under the text, width proportional to temp (0-100C) + int bar_w = (int)(stats->bridge_temperature_c * (fmt_br_temp.len * FONT_W) / 100.0f); + if (bar_w < 0) bar_w = 0; + if (bar_w > fmt_br_temp.len * FONT_W) bar_w = fmt_br_temp.len * FONT_W; + box(tx, y + FONT_H, bar_w, 2, tcol); tx += (fmt_br_temp.len + 1) * FONT_W; } if (fmt_km_temp.len) { - tft_draw_string(tx, y, temp_color(stats->kmbox_temperature_c), fmt_km_temp.str); + uint8_t tcol = temp_color(stats->kmbox_temperature_c); + tft_draw_string(tx, y, tcol, fmt_km_temp.str); + int bar_w = (int)(stats->kmbox_temperature_c * (fmt_km_temp.len * FONT_W) / 100.0f); + if (bar_w < 0) bar_w = 0; + if (bar_w > fmt_km_temp.len * FONT_W) bar_w = fmt_km_temp.len * FONT_W; + box(tx, y + FONT_H, bar_w, 2, tcol); } } } @@ -908,20 +1002,15 @@ static void draw_circle_ring(int cx, int cy, int r_inner, int r_outer, uint8_t c if (py < 0 || py >= TFT_HEIGHT) continue; int dy2 = dy * dy; - // Solve for x range: r_inner^2 <= dx^2+dy^2 <= r_outer^2 int xo2 = r2_outer - dy2; if (xo2 < 0) continue; - // Outer x extent (integer sqrt via isqrt approximation) - int xo = 0; - while ((xo + 1) * (xo + 1) <= xo2) xo++; + int xo = fast_isqrt(xo2); - // Inner x extent (hollow center) int xi = 0; int xi2 = r2_inner - dy2; if (xi2 > 0) { - while ((xi + 1) * (xi + 1) <= xi2) xi++; - xi++; // inner boundary is exclusive + xi = fast_isqrt(xi2) + 1; // inner boundary exclusive } uint8_t *row = &tft_input[py * TFT_WIDTH]; @@ -942,40 +1031,156 @@ static void draw_circle_ring(int cx, int cy, int r_inner, int r_outer, uint8_t c } } +// Draw a filled arc ring between r_inner..r_outer, from start_deg to end_deg +// Uses angle test per-pixel within bounding ring — clean filled arcs +static void draw_arc_ring(int cx, int cy, int r_inner, int r_outer, + int start_deg, int end_deg, uint8_t color) { + int r2_inner = r_inner * r_inner; + int r2_outer = r_outer * r_outer; + + // Precompute angle boundary vectors (Q15) for start/end + int si_start = angle_to_idx(start_deg); + int si_end = angle_to_idx(end_deg); + int16_t sx = lut_cos(si_start), sy = lut_sin(si_start); + int16_t ex = lut_cos(si_end), ey = lut_sin(si_end); + + for (int dy = -r_outer; dy <= r_outer; dy++) { + int py = cy + dy; + if (py < 0 || py >= TFT_HEIGHT) continue; + + int dy2 = dy * dy; + int xo2 = r2_outer - dy2; + if (xo2 < 0) continue; + + int xo = fast_isqrt(xo2); + int xi = 0; + int xi2 = r2_inner - dy2; + if (xi2 > 0) xi = fast_isqrt(xi2) + 1; + + uint8_t *row = &tft_input[py * TFT_WIDTH]; + + for (int dx = -xo; dx <= xo; dx++) { + // Skip inner hollow + if (dx > -xi && dx < xi) { dx = xi - 1; continue; } + + int px = cx + dx; + if (px < 0 || px >= TFT_WIDTH) continue; + + // Angle test: is (dx,dy) between start_deg and end_deg? + // Cross product sign test against boundary vectors + // cross(start_vec, point) >= 0 AND cross(point, end_vec) >= 0 + // For arcs < 180°, both must be true + // For arcs >= 180°, either can be true + int32_t cross_start = (int32_t)sx * dy - (int32_t)sy * dx; + int32_t cross_end = (int32_t)dx * ey - (int32_t)dy * ex; + + int span = end_deg - start_deg; + if (span < 0) span += 360; + bool inside; + if (span <= 180) { + inside = (cross_start >= 0) && (cross_end >= 0); + } else { + inside = (cross_start >= 0) || (cross_end >= 0); + } + + if (inside) { + row[px] = color; + } + } + } +} + static void draw_circular_gauge(int cx, int cy, int radius, float value, float max_value, const char* label, uint8_t color) { - // Draw gauge outline ring (3px thick) - draw_circle_ring(cx, cy, radius - 2, radius, COL_DARK); + // Draw gauge background ring (track) + draw_circle_ring(cx, cy, radius - 8, radius, COL_DARK); - // Draw value arc using LUT (6px thick band) - float percentage = (value / max_value); + // Draw value arc (filled thick ring sector) + float percentage = (max_value > 0.0f) ? (value / max_value) : 0.0f; if (percentage > 1.0f) percentage = 1.0f; - int end_angle = (int)(percentage * 270.0f) - 135; + if (percentage < 0.0f) percentage = 0.0f; - for (int angle = -135; angle < end_angle && angle < 135; angle += 2) { - int ai = angle_to_idx(angle); + if (percentage > 0.01f) { + int start_deg = -135 + 360; // Normalize to positive: 225° + int end_deg = start_deg + (int)(percentage * 270.0f); + draw_arc_ring(cx, cy, radius - 7, radius - 1, + start_deg, end_deg, color); + } + + // Tick marks at 0%, 50%, 100% positions on the outer edge + for (int i = 0; i <= 2; i++) { + int tick_deg = -135 + i * 135; + int ai = angle_to_idx(tick_deg); int16_t s = lut_sin(ai); int16_t c = lut_cos(ai); - // Draw just inner and outer edge pixels of the arc band - for (int r = radius - 8; r <= radius - 3; r++) { - int x = cx + (r * c + 16384) / 32768; - int y = cy + (r * s + 16384) / 32768; - if ((unsigned)x < (unsigned)TFT_WIDTH && (unsigned)y < (unsigned)TFT_HEIGHT) { - tft_input[y * TFT_WIDTH + x] = color; - } + for (int r = radius + 1; r <= radius + 3; r++) { + int tx = cx + (r * c + 16384) / 32768; + int ty = cy + (r * s + 16384) / 32768; + if ((unsigned)tx < (unsigned)TFT_WIDTH && (unsigned)ty < (unsigned)TFT_HEIGHT) + tft_input[ty * TFT_WIDTH + tx] = COL_GRAY; + } + } + + // Draw value text in center (larger visual emphasis) + char value_str[16]; + char *p = u32_to_str(value_str, (uint32_t)(value + 0.5f)); + *p = '\0'; + int text_len = (int)(p - value_str); + tft_draw_string(cx - (text_len * FONT_W) / 2, cy - FONT_H / 2, COL_WHITE, value_str); + + // Draw label below center + int label_len = 0; + const char *lp = label; + while (*lp++) label_len++; + tft_draw_string(cx - (label_len * FONT_W) / 2, cy + FONT_H / 2 + 2, COL_GRAY, label); +} + +// Draw a circular gauge with a unit suffix next to the value (e.g. "42C") +static void draw_circular_gauge_unit(int cx, int cy, int radius, float value, float max_value, + const char* label, const char* unit, uint8_t color) { + // Draw gauge background ring (track) + draw_circle_ring(cx, cy, radius - 8, radius, COL_DARK); + + // Draw value arc + float percentage = (max_value > 0.0f) ? (value / max_value) : 0.0f; + if (percentage > 1.0f) percentage = 1.0f; + if (percentage < 0.0f) percentage = 0.0f; + + if (percentage > 0.01f) { + int start_deg = -135 + 360; + int end_deg = start_deg + (int)(percentage * 270.0f); + draw_arc_ring(cx, cy, radius - 7, radius - 1, + start_deg, end_deg, color); + } + + // Tick marks at 0%, 50%, 100% + for (int i = 0; i <= 2; i++) { + int tick_deg = -135 + i * 135; + int ai = angle_to_idx(tick_deg); + int16_t s = lut_sin(ai); + int16_t c = lut_cos(ai); + for (int r = radius + 1; r <= radius + 3; r++) { + int tx = cx + (r * c + 16384) / 32768; + int ty = cy + (r * s + 16384) / 32768; + if ((unsigned)tx < (unsigned)TFT_WIDTH && (unsigned)ty < (unsigned)TFT_HEIGHT) + tft_input[ty * TFT_WIDTH + tx] = COL_GRAY; } } - // Draw value text in center + // Value text with unit suffix: e.g. "42C" char value_str[16]; char *p = u32_to_str(value_str, (uint32_t)(value + 0.5f)); + const char *up = unit; + while (*up) *p++ = *up++; *p = '\0'; int text_len = (int)(p - value_str); - tft_draw_string(cx - (text_len * FONT_W) / 2, cy - FONT_H / 2, color, value_str); + tft_draw_string(cx - (text_len * FONT_W) / 2, cy - FONT_H / 2, COL_WHITE, value_str); - // Draw label below - int label_len = strlen(label); - tft_draw_string(cx - (label_len * FONT_W) / 2, cy + radius + 4, COL_GRAY, label); + // Label below center + int label_len = 0; + const char *lp = label; + while (*lp++) label_len++; + tft_draw_string(cx - (label_len * FONT_W) / 2, cy + FONT_H / 2 + 2, COL_GRAY, label); } static void draw_bar_gauge(int x, int y, int width, int height, float value, float max_value, @@ -1008,103 +1213,137 @@ static void draw_bar_gauge(int x, int y, int width, int height, float value, flo static void draw_gauge_view(const tft_stats_t *stats) { int y = MARGIN; - - // === HEADER === - tft_draw_string_center(TFT_WIDTH / 2, y, COL_CYAN, "KMBox Gauges"); + + // === HEADER with connection status inline === + { + uint8_t cdc_col = stats->cdc_connected ? COL_GREEN : COL_RED; + uint8_t km_col = stats->kmbox_connected ? COL_GREEN : COL_RED; + + // Left: connection dots + box(MARGIN, y + 5, 5, 5, cdc_col); + box(MARGIN + 8, y + 5, 5, 5, km_col); + + // Center: title + tft_draw_string_center(TFT_WIDTH / 2, y, COL_CYAN, "KMBox Gauges"); + + // Right: uptime + int ux = TFT_WIDTH - MARGIN - fmt_uptime.len * FONT_W; + tft_draw_string(ux, y, COL_DARK, fmt_uptime.str); + } y += LINE_H + SEP_GAP; hline(y, COL_DARK); - y += SEP_GAP + 10; - + y += SEP_GAP + 4; + #if (TFT_RAW_WIDTH >= 240) - // Large display: circular gauges in a grid - int gauge_radius = 50; - int gauge_spacing_x = TFT_WIDTH / 2; - int gauge_spacing_y = 120; - - // Row 1: Latency and Command Rate - if (stats->latency_samples > 0) { - draw_circular_gauge(gauge_spacing_x / 2, y + gauge_radius, gauge_radius, - stats->latency_avg_us, 1000.0f, "Lat us", - stats->latency_avg_us < 200 ? COL_GREEN : - stats->latency_avg_us < 500 ? COL_YELLOW : COL_RED); + // ── Large display (ILI9341 240x320): 3 rows of 2 circular gauges ── + // Row 1: Latency + Cmd/s (performance) + // Row 2: Bridge Temp + KMBox Temp (thermal) + // Row 3: TX Rate + RX Rate (throughput) + int gauge_radius = 42; + int col_cx_l = TFT_WIDTH / 4; // Left column center X + int col_cx_r = TFT_WIDTH * 3 / 4; // Right column center X + int row_h = 100; // Vertical spacing per row + + // ── Row 1: Latency + Command Rate ── + { + int row_cy = y + gauge_radius; + + // Latency gauge — always shown, color by threshold + uint8_t lat_col = latency_color(stats->latency_avg_us); + draw_circular_gauge(col_cx_l, row_cy, gauge_radius, + (float)stats->latency_avg_us, 1000.0f, "Lat us", lat_col); + + // Command rate gauge — always shown + uint8_t cmd_col = (stats->commands_per_sec > 500) ? COL_YELLOW : + (stats->commands_per_sec > 0) ? COL_GREEN : COL_DARK; + draw_circular_gauge(col_cx_r, row_cy, gauge_radius, + (float)stats->commands_per_sec, 1000.0f, "Cmd/s", cmd_col); + + y += row_h; } - - if (stats->commands_per_sec > 0) { - draw_circular_gauge(gauge_spacing_x + gauge_spacing_x / 2, y + gauge_radius, gauge_radius, - stats->commands_per_sec, 1000.0f, "Cmd/s", COL_GREEN); + + // ── Row 2: Temperature Gauges ── + { + int row_cy = y + gauge_radius; + + // Bridge temperature gauge (0-100C range) + if (TEMP_VALID(stats->bridge_temperature_c)) { + draw_circular_gauge_unit(col_cx_l, row_cy, gauge_radius, + stats->bridge_temperature_c, 100.0f, + "Bridge", "C", + temp_color(stats->bridge_temperature_c)); + } else { + // Show empty gauge with "--" placeholder + draw_circle_ring(col_cx_l, row_cy, gauge_radius - 8, gauge_radius, COL_DARK); + tft_draw_string(col_cx_l - FONT_W, row_cy - FONT_H / 2, COL_DARK, "--"); + tft_draw_string(col_cx_l - 3 * FONT_W, row_cy + FONT_H / 2 + 2, COL_GRAY, "Bridge"); + } + + // KMBox temperature gauge + if (stats->kmbox_connected && TEMP_VALID(stats->kmbox_temperature_c)) { + draw_circular_gauge_unit(col_cx_r, row_cy, gauge_radius, + stats->kmbox_temperature_c, 100.0f, + "KMBox", "C", + temp_color(stats->kmbox_temperature_c)); + } else { + draw_circle_ring(col_cx_r, row_cy, gauge_radius - 8, gauge_radius, COL_DARK); + tft_draw_string(col_cx_r - FONT_W, row_cy - FONT_H / 2, COL_DARK, "--"); + tft_draw_string(col_cx_r - 3 * FONT_W / 2, row_cy + FONT_H / 2 + 2, COL_GRAY, "KMBox"); + } + + y += row_h; } - - y += gauge_spacing_y; - - // Row 2: TX and RX rates - float tx_mbps = stats->tx_rate_bps / 1000.0f; - float rx_mbps = stats->rx_rate_bps / 1000.0f; - - draw_circular_gauge(gauge_spacing_x / 2, y + gauge_radius, gauge_radius, - tx_mbps, 100.0f, "TX KB/s", COL_CYAN); - - draw_circular_gauge(gauge_spacing_x + gauge_spacing_x / 2, y + gauge_radius, gauge_radius, - rx_mbps, 100.0f, "RX KB/s", COL_CYAN); - - y += gauge_spacing_y + 10; - - // Connection status row - hline(y, COL_DARK); - y += 4; - - uint8_t cdc_col = stats->cdc_connected ? COL_GREEN : COL_RED; - uint8_t km_col = stats->kmbox_connected ? COL_GREEN : COL_RED; - - tft_draw_string(MARGIN, y, COL_GRAY, "CDC:"); - tft_draw_string(MARGIN + 40, y, cdc_col, stats->cdc_connected ? "CONN" : "DISC"); - - tft_draw_string(TFT_WIDTH / 2, y, COL_GRAY, "KM:"); - tft_draw_string(TFT_WIDTH / 2 + 32, y, km_col, stats->kmbox_connected ? "CONN" : "DISC"); - - y += LINE_H; - - // Temperature - if (TEMP_VALID(stats->bridge_temperature_c)) { - char temp_str[16]; - char *tp = u32_to_str(temp_str, (uint32_t)(stats->bridge_temperature_c + 0.5f)); - *tp++ = 'C'; *tp = '\0'; - tft_draw_string(MARGIN, y, temp_color(stats->bridge_temperature_c), temp_str); + + // ── Row 3: TX + RX throughput ── + { + int row_cy = y + gauge_radius; + float tx_kbps = stats->tx_rate_bps / 1000.0f; + float rx_kbps = stats->rx_rate_bps / 1000.0f; + + uint8_t tx_col = (tx_kbps > 50.0f) ? COL_YELLOW : + (tx_kbps > 0.1f) ? COL_CYAN : COL_DARK; + uint8_t rx_col = (rx_kbps > 50.0f) ? COL_YELLOW : + (rx_kbps > 0.1f) ? COL_CYAN : COL_DARK; + + draw_circular_gauge(col_cx_l, row_cy, gauge_radius, + tx_kbps, 100.0f, "TX KB/s", tx_col); + draw_circular_gauge(col_cx_r, row_cy, gauge_radius, + rx_kbps, 100.0f, "RX KB/s", rx_col); } - - // Uptime (reuse pre-formatted buffer) - int uptime_x = TFT_WIDTH - MARGIN - fmt_uptime.len * FONT_W; - tft_draw_string(uptime_x, y, COL_CYAN, fmt_uptime.str); + #else - // Small display: horizontal bar gauges - int bar_height = 24; + // ── Small display (ST7735 128x160): horizontal bar gauges ── + int bar_height = 20; int bar_width = TFT_WIDTH - MARGIN * 2; - - // Latency bar - if (stats->latency_samples > 0) { - draw_bar_gauge(MARGIN, y, bar_width, bar_height, stats->latency_avg_us, 1000.0f, - "Latency (us)", stats->latency_avg_us < 200 ? COL_GREEN : COL_YELLOW); - y += bar_height + LINE_H + 4; + + // Latency bar (always shown) + { + uint8_t lat_col = latency_color(stats->latency_avg_us); + draw_bar_gauge(MARGIN, y, bar_width, bar_height, + (float)stats->latency_avg_us, 1000.0f, "Latency us", lat_col); + y += bar_height + LINE_H + 2; } - - // Command rate bar - if (stats->commands_per_sec > 0) { - draw_bar_gauge(MARGIN, y, bar_width, bar_height, stats->commands_per_sec, 500.0f, - "Commands/sec", COL_GREEN); - y += bar_height + LINE_H + 4; + + // Command rate bar (always shown) + { + uint8_t cmd_col = (stats->commands_per_sec > 0) ? COL_GREEN : COL_DARK; + draw_bar_gauge(MARGIN, y, bar_width, bar_height, + (float)stats->commands_per_sec, 500.0f, "Cmd/s", cmd_col); + y += bar_height + LINE_H + 2; + } + + // Temperature bar (bridge) + if (TEMP_VALID(stats->bridge_temperature_c)) { + draw_bar_gauge(MARGIN, y, bar_width, bar_height, + stats->bridge_temperature_c, 100.0f, "Temp C", + temp_color(stats->bridge_temperature_c)); + } else { + draw_bar_gauge(MARGIN, y, bar_width, bar_height, 0.0f, 100.0f, "Temp C", COL_DARK); } - - // TX rate bar - draw_bar_gauge(MARGIN, y, bar_width, bar_height, stats->tx_rate_bps / 1000.0f, 50.0f, - "TX (KB/s)", COL_CYAN); - y += bar_height + LINE_H + 4; - - // RX rate bar - draw_bar_gauge(MARGIN, y, bar_width, bar_height, stats->rx_rate_bps / 1000.0f, 50.0f, - "RX (KB/s)", COL_CYAN); #endif #if TOUCH_ENABLED - // Touch zone hints (right edge, centered in each zone — no hlines) + // Touch zone hints (right edge, centered in each zone) { int zone_x = TFT_WIDTH - MARGIN - FONT_W; int z1_y = (TOUCH_ZONE_TOP_END) / 2 - FONT_H / 2; @@ -1367,7 +1606,12 @@ static bool tft_render_timer_callback(repeating_timer_t *rt) { if (!stats_pending) return true; tft_stats_t local_stats = shared_stats; // Snapshot stats_pending = false; - + + // Skip render if stats haven't meaningfully changed (saves CPU in steady state) + uint32_t h = stats_quick_hash(&local_stats); + if (h == last_stats_hash && menu_highlight_item < 0) return true; + last_stats_hash = h; + // Render into the back buffer tft_fill(COL_BG); format_stats(&local_stats); @@ -1446,18 +1690,37 @@ void tft_display_refresh(const tft_stats_t *stats) { void tft_display_splash(void) { if (!initialized) return; - + tft_fill(COL_BG); - - int y = TFT_HEIGHT / 2 - LINE_H * 2; - box(10, y - 4, TFT_WIDTH - 20, LINE_H * 3 + 8, COL_DARK); - - tft_draw_string_center(TFT_WIDTH / 2, y, COL_CYAN, "KMBox Bridge"); - y += LINE_H; - tft_draw_string_center(TFT_WIDTH / 2, y, COL_WHITE, "Autopilot"); - y += LINE_H * 2; - tft_draw_string_center(TFT_WIDTH / 2, y, COL_GREEN, "Starting..."); - + + int cx = TFT_WIDTH / 2; + int cy = TFT_HEIGHT / 2; + + // Outer frame with double border + int bx = 8, by = cy - LINE_H * 3; + int bw = TFT_WIDTH - 16, bh = LINE_H * 5 + 12; + box(bx, by, bw, 2, COL_CYAN); // top + box(bx, by + bh - 2, bw, 2, COL_CYAN); // bottom + box(bx, by, 2, bh, COL_CYAN); // left + box(bx + bw - 2, by, 2, bh, COL_CYAN); // right + // Inner fill + box(bx + 4, by + 4, bw - 8, bh - 8, COL_DARK); + + int y = by + 8; + tft_draw_string_center(cx, y, COL_CYAN, "KMBox Bridge"); + y += LINE_H + 2; + + // Thin accent line + int line_x = cx - 40; + int line_w = 80; + if (line_x < bx + 6) { line_x = bx + 6; line_w = bw - 12; } + box(line_x, y, line_w, 1, COL_CYAN); + y += 6; + + tft_draw_string_center(cx, y, COL_WHITE, "Autopilot"); + y += LINE_H + LINE_H; + tft_draw_string_center(cx, y, COL_GREEN, "Starting..."); + tft_swap_sync(); } diff --git a/defines.h b/defines.h index 42d931a..9897679 100644 --- a/defines.h +++ b/defines.h @@ -103,102 +103,8 @@ #define BRIDGE_CMD_PING 0xFE // Keepalive #define BRIDGE_CMD_RESET 0xFF // Reset state -// Command IDs (must match bridge/fast_commands.h) -#define FAST_CMD_MOUSE_MOVE 0x01 // Direct accumulator (buttons + wheel + move) -#define FAST_CMD_MOUSE_CLICK 0x02 // Button click with repeat count -#define FAST_CMD_SMOOTH_MOVE 0x07 // Smooth injection queue (humanized) -#define FAST_CMD_SMOOTH_CONFIG 0x08 // Configure smooth injection -#define FAST_CMD_SMOOTH_CLEAR 0x09 // Clear smooth injection queue -#define FAST_CMD_TIMED_MOVE 0x0A // Movement with timestamp for sync -#define FAST_CMD_MULTI_MOVE 0x0B // Multiple movements in one packet -#define FAST_CMD_KEY_COMBO 0x0C // Keyboard key combination -#define FAST_CMD_KEY_PRESS 0x0C // Single key press (alias for KEY_COMBO) -#define FAST_CMD_INFO 0x0D // Request system info -#define FAST_CMD_INFO_EXT 0x0E // Request extended stats -#define FAST_CMD_CYCLE_HUMAN 0x0F // Cycle humanization mode -#define FAST_CMD_XBOX_INPUT 0x20 // Xbox gamepad: buttons + triggers -#define FAST_CMD_XBOX_STICK_L 0x22 // Xbox left stick X/Y -#define FAST_CMD_XBOX_STICK_R 0x23 // Xbox right stick X/Y -#define FAST_CMD_XBOX_RELEASE 0x27 // Xbox clear all injection overrides -#define FAST_CMD_XBOX_STATUS 0x28 // Xbox console mode status report -#define FAST_CMD_SYNC 0xFC // Clock synchronization -#define FAST_CMD_RESPONSE 0xFD // Generic response -#define FAST_CMD_PING 0xFE // Keepalive ping -#define FAST_CMD_PACKET_SIZE 8 // Fixed 8-byte packet size - -// Timed move command: 0x0A (for clock-synchronized injection) -typedef struct __attribute__((packed, aligned(4))) { - uint8_t cmd; // 0x0A - int16_t x; // X movement - int16_t y; // Y movement - uint16_t time_us; // Execution time offset (microseconds from sync) - uint8_t mode; // Injection mode -} fast_cmd_timed_t; - -_Static_assert(sizeof(fast_cmd_timed_t) == 8, "fast_cmd_timed_t must be 8 bytes"); - -// Mouse move command: 0x01 -typedef struct __attribute__((packed)) { - uint8_t cmd; // 0x01 - int16_t x, y; // Movement - uint8_t buttons; // Button state - int8_t wheel; // Wheel movement - uint8_t pad[2]; // Padding to 8 bytes -} fast_cmd_move_t; - -typedef fast_cmd_move_t fast_cmd_mouse_move_t; // Alias for compatibility - -// Multi-move command: 0x0B -typedef struct __attribute__((packed)) { - uint8_t cmd; // 0x0B - int8_t x1, y1; // First movement - int8_t x2, y2; // Second movement - int8_t x3, y3; // Third movement - uint8_t pad; // Padding to 8 bytes -} fast_cmd_multi_t; - -// Click command: 0x02 -typedef struct __attribute__((packed)) { - uint8_t cmd; // 0x02 - uint8_t button; // Button mask - uint8_t count; // Click count - uint8_t pad[5]; // Padding to 8 bytes -} fast_cmd_click_t; - -// Key press/combo command: 0x0C -typedef struct __attribute__((packed)) { - uint8_t cmd; // 0x0C - uint8_t modifiers; // Modifier keys - uint8_t keycode; // Primary keycode (for single key) - uint8_t keys[5]; // Additional keycodes (for combo) -} fast_cmd_key_t; - -typedef fast_cmd_key_t fast_cmd_combo_t; // Alias for compatibility - -// Smooth move command: 0x07 -typedef struct __attribute__((packed)) { - uint8_t cmd; // 0x07 - int16_t x; // X movement - int16_t y; // Y movement - uint8_t mode; // Injection mode - uint8_t pad[2]; // Padding to 8 bytes -} fast_cmd_smooth_t; - -// Config command: 0x08 -typedef struct __attribute__((packed)) { - uint8_t cmd; // 0x08 - uint8_t max_per_frame; // Max pixels per frame - uint8_t vel_match; // Velocity matching enable - uint8_t pad[5]; // Padding to 8 bytes -} fast_cmd_config_t; - -// Sync command: 0xFC -typedef struct __attribute__((packed)) { - uint8_t cmd; // 0xFC - uint32_t timestamp; // PC timestamp - uint16_t seq_num; // Sequence number - uint8_t pad; // Padding to 8 bytes -} fast_cmd_sync_t; +// Fast command IDs, packed structs, and packet builders — shared with bridge +#include "fast_protocol.h" #define DEBUG_OUTPUT_USB_CDC 0 // Always disable debug output over USB CDC @@ -317,6 +223,9 @@ typedef struct __attribute__((packed)) { #define USB_STACK_ERROR_THRESHOLD 50 // Number of consecutive errors before reset // USB descriptor configuration +#define MAX_DEVICE_HID_INTERFACES 4 // Max HID interfaces to mirror (matches CFG_TUD_HID) +#define MIRROR_ITF_DESC_MAX 512 // Max HID report descriptor per non-mouse interface +#define DESC_CONFIG_RUNTIME_MAX 256 // Max runtime config descriptor (9 + 4*32 = 137 typical) #define CONFIG_TOTAL_LEN (TUD_CONFIG_DESC_LEN + TUD_HID_DESC_LEN) #define EPNUM_HID HID_ENDPOINT_ADDRESS @@ -338,9 +247,11 @@ typedef struct __attribute__((packed)) { #define HID_KEYBOARD_KEYCODE_COUNT 6 // Number of simultaneous keycodes supported #define HID_CONSUMER_CONTROL_SIZE 2 // Consumer control report size in bytes -// Activity tracking -#define KEYBOARD_ACTIVITY_THROTTLE 50 // Trigger keyboard activity flash every 50 reports -#define MOUSE_ACTIVITY_THROTTLE 100 // Trigger mouse activity flash every 100 reports +// Activity tracking (power-of-2 for bitmask instead of modulo division) +#define KEYBOARD_ACTIVITY_THROTTLE 64 // Trigger keyboard activity flash every 64 reports +#define KEYBOARD_ACTIVITY_MASK (KEYBOARD_ACTIVITY_THROTTLE - 1) +#define MOUSE_ACTIVITY_THROTTLE 128 // Trigger mouse activity flash every 128 reports +#define MOUSE_ACTIVITY_MASK (MOUSE_ACTIVITY_THROTTLE - 1) //--------------------------------------------------------------------+ // MOUSE CONFIGURATION diff --git a/humanization_fpu.c b/humanization_fpu.c index df8f3a9..ae3eb1d 100644 --- a/humanization_fpu.c +++ b/humanization_fpu.c @@ -33,14 +33,45 @@ static inline float jitter_next(void) { g_jitter_lfsr ^= g_jitter_lfsr << 13; g_jitter_lfsr ^= g_jitter_lfsr >> 17; g_jitter_lfsr ^= g_jitter_lfsr << 5; - + // FIX: Balanced [-1.0, 1.0] conversion // Use top 24 bits mapped symmetrically to avoid DC bias - // Old code: (float)(int32_t)g_jitter_lfsr / (float)INT32_MAX had asymmetric range int32_t balanced = (int32_t)(g_jitter_lfsr >> 8) - 0x800000; return (float)balanced * (1.0f / 8388608.0f); // 1/2^23 } +//--------------------------------------------------------------------+ +// Fast sine approximation (~10 FPU cycles vs ~80 for libm sinf) +// +// Degree-5 minimax polynomial on [-pi, pi]: +// sin(x) ≈ x * (1 - x²/6 * (1 - x²/20 * (1 - x²/42))) +// Max error: ~2.5e-5 — more than adequate for tremor noise. +// +// Input is reduced to [-pi, pi] by subtracting multiples of 2*pi. +// Uses M33 VFMA.F32 for fused multiply-add chains. +//--------------------------------------------------------------------+ + +static const float TWO_PI = 6.28318530718f; +static const float INV_2PI = 0.15915494309f; // 1/(2*pi) +static const float PI_F = 3.14159265359f; + +static inline float fast_sinf(float x) { + // Range reduction to [-pi, pi] via round-to-nearest + // floorf(x * INV_2PI + 0.5f) gives nearest integer + float n = x * INV_2PI; + // Round to nearest integer: add 0.5 then truncate + // (faster than floorf on M33 which lacks VRINTM) + n = (float)(int32_t)(n + (n >= 0.0f ? 0.5f : -0.5f)); + x -= n * TWO_PI; + + // Horner form: x * (1 + x²*(-1/6 + x²*(1/120 + x²*(-1/5040)))) + float x2 = x * x; + float r = fmaf(x2, -1.984126984e-4f, 8.333333333e-3f); // -1/5040, 1/120 + r = fmaf(x2, r, -1.666666667e-1f); // -1/6 + r = fmaf(x2, r, 1.0f); // 1 + return x * r; +} + //--------------------------------------------------------------------+ // Public API //--------------------------------------------------------------------+ @@ -56,15 +87,12 @@ void humanization_fpu_init(uint32_t seed) { void humanization_get_tremor(float scale, float *perp_x, float *perp_y) { g_tremor_phase++; - - // FIX: Wrap phase to prevent sinf() precision loss at large arguments. - // After 100 seconds, t*19.1*2*PI = 11,999 — single-precision sinf loses - // fractional accuracy above ~4096. Wrapping at 100000 (~100s) keeps - // arguments well within float32 precision. + + // Wrap phase to prevent precision loss at large arguments. + // Wrapping at 100000 (~100s) keeps arguments within float32 precision. float t = (float)(g_tremor_phase % 100000u) * 0.001f; - + // Pre-computed angular frequency constants (2*PI*freq) - // Eliminates 6 runtime multiplications per call at 240MHz. // M33 FPU executes fmaf() as single VFMA.F32 instruction. static const float W_X1 = 8.7f * (2.0f * (float)M_PI); // ~54.67 rad/s static const float W_X2 = 12.3f * (2.0f * (float)M_PI); // ~77.28 rad/s @@ -72,36 +100,31 @@ void humanization_get_tremor(float scale, float *perp_x, float *perp_y) { static const float W_Y1 = 9.4f * (2.0f * (float)M_PI); // ~59.06 rad/s static const float W_Y2 = 13.7f * (2.0f * (float)M_PI); // ~86.08 rad/s static const float W_Y3 = 17.8f * (2.0f * (float)M_PI); // ~111.84 rad/s - - // FIX: Use fixed offsets for X/Y decorrelation instead of accumulating - // secondary phases. The old approach doubled the effective frequency - // because both t and g_tremor_phase_x advanced by 0.001 per call. - // + // X-axis tremor: Three incommensurate frequencies - // Physiological hand tremor is 8-25Hz, these ratios are irrational - // so the composite waveform has no clean repeat period + // Uses fast_sinf (~10 cycles) instead of libm sinf (~80 cycles) + // 6 calls: ~60 cycles total vs ~480 cycles before float tx = t + 0.7f; // Fixed offset for X channel - float tremor_x = sinf(tx * W_X1) * 0.40f // ~8.7Hz primary - + sinf(tx * W_X2) * 0.25f // ~12.3Hz secondary - + sinf(tx * W_X3) * 0.15f; // ~19.1Hz tertiary - + float tremor_x = fast_sinf(tx * W_X1) * 0.40f // ~8.7Hz primary + + fast_sinf(tx * W_X2) * 0.25f // ~12.3Hz secondary + + fast_sinf(tx * W_X3) * 0.15f; // ~19.1Hz tertiary + // Add LFSR noise component (breaks any remaining periodicity) - // Use fmaf for fused multiply-add -> single VFMA.F32 on M33 tremor_x = fmaf(jitter_next(), 0.3f, tremor_x); - + // Y-axis tremor: Different fixed offset for decorrelation float ty = t + 1.3f; // Different offset than X - float tremor_y = sinf(ty * W_Y1) * 0.40f // ~9.4Hz primary - + sinf(ty * W_Y2) * 0.25f // ~13.7Hz secondary - + sinf(ty * W_Y3) * 0.15f; // ~17.8Hz tertiary - + float tremor_y = fast_sinf(ty * W_Y1) * 0.40f // ~9.4Hz primary + + fast_sinf(ty * W_Y2) * 0.25f // ~13.7Hz secondary + + fast_sinf(ty * W_Y3) * 0.15f; // ~17.8Hz tertiary + // Add independent LFSR noise (fused multiply-add) tremor_y = fmaf(jitter_next(), 0.3f, tremor_y); - + // Apply scale and clamp to reasonable range *perp_x = tremor_x * scale; *perp_y = tremor_y * scale; - + // Clamp to ±3.0px to prevent extreme outliers if (*perp_x > 3.0f) *perp_x = 3.0f; if (*perp_x < -3.0f) *perp_x = -3.0f; diff --git a/humanization_fpu.h b/humanization_fpu.h index 9dae8e4..99c7c06 100644 --- a/humanization_fpu.h +++ b/humanization_fpu.h @@ -55,26 +55,6 @@ static inline float humanization_jitter_scale(float magnitude) { return fmaxf(0.4f, 0.7f - (magnitude - 120.0f) * 0.0015f); } -/** - * Ease-in-out cubic (FPU direct computation) - */ -static inline float ease_in_out_cubic(float t) { - if (t < 0.5f) { - return 4.0f * t * t * t; - } else { - float x = -2.0f * t + 2.0f; - return 1.0f - x * x * x * 0.5f; - } -} - -/** - * Ease-out quadratic (FPU direct computation) - */ -static inline float ease_out_quad(float t) { - float x = 1.0f - t; - return 1.0f - x * x; -} - /** * Minimum-jerk velocity profile * Natural acceleration/deceleration curve @@ -84,20 +64,4 @@ static inline float min_jerk_velocity(float t) { return 30.0f * t * t * one_minus_t * one_minus_t; } -/** - * Convert progress (0-1) to eased progress based on mode - */ -static inline float apply_easing_fpu(float t, uint8_t mode) { - switch (mode) { - case 0: // EASING_LINEAR - return t; - case 1: // EASING_EASE_IN_OUT - return ease_in_out_cubic(t); - case 2: // EASING_EASE_OUT - return ease_out_quad(t); - default: - return t; - } -} - #endif // HUMANIZATION_FPU_H diff --git a/humanization_lut.h b/humanization_lut.h index d0c3361..554e726 100644 --- a/humanization_lut.h +++ b/humanization_lut.h @@ -174,42 +174,6 @@ extern const int32_t g_frame_spread_by_movement_lut[FRAME_SPREAD_LUT_SIZE]; // Fast Lookup Functions (inline for zero overhead) //--------------------------------------------------------------------+ -/** - * Fast easing lookup with interpolation - * @param t Progress in 16.16 fixed-point [0, SMOOTH_FP_ONE] - * @param mode Easing mode - * @return Eased progress in 16.16 fixed-point - */ -static inline int32_t lut_apply_easing(int32_t t, easing_mode_t mode) { - // Clamp t to valid range - if (t <= 0) return 0; - if (t >= SMOOTH_FP_ONE) return SMOOTH_FP_ONE; - - // Convert t to table index (0-255) - // t is 16.16, we want 8-bit index - uint32_t index = (uint32_t)t >> (SMOOTH_FP_SHIFT - EASING_LUT_SHIFT); - if (index >= EASING_LUT_SIZE - 1) index = EASING_LUT_SIZE - 2; - - // Select table based on mode - const int32_t *table; - switch (mode) { - case EASING_LINEAR: table = g_ease_linear_lut; break; - case EASING_EASE_IN_OUT: table = g_ease_in_out_cubic_lut; break; - case EASING_EASE_OUT: table = g_ease_out_quad_lut; break; - default: table = g_ease_linear_lut; break; // Fallback for safety - } - - // Linear interpolation between table entries for smoothness - int32_t v0 = table[index]; - int32_t v1 = table[index + 1]; - - // Fractional part for interpolation (lower 8 bits of shifted t) - uint32_t frac = ((uint32_t)t >> (SMOOTH_FP_SHIFT - EASING_LUT_SHIFT - 8)) & 0xFF; - - // Interpolate: v0 + (v1 - v0) * frac / 256 - return v0 + (((v1 - v0) * (int32_t)frac) >> 8); -} - /** * Fast progress lookup (no division needed) * @param total_frames Total frames for movement diff --git a/kmbox_serial_handler.c b/kmbox_serial_handler.c index ae1518c..b3999e5 100644 --- a/kmbox_serial_handler.c +++ b/kmbox_serial_handler.c @@ -212,32 +212,29 @@ static void __not_in_flash_func(on_uart_irq)(void) { // Non-blocking TX: Use DMA if available, fall back to IRQ ring buffer static bool uart_send_bytes(const uint8_t *data, size_t len) { if (!data || len == 0) return true; - - // DMA TX path: zero-CPU transmission + + // DMA TX path: zero-CPU, zero-wait transmission if (uart_tx_dma_chan >= 0) { - // Wait for previous DMA transfer to complete (should be very fast at 3Mbaud) - // Reduced timeout to 500µs to prevent starving tud_task() under burst traffic - // At 3Mbaud, 256 bytes takes ~0.85ms max; most packets are 8 bytes (~27µs) - uint32_t timeout = time_us_32() + 500; // 500µs safety timeout - while (dma_tx_busy && time_us_32() < timeout) { - tight_loop_contents(); - } + // Non-blocking: if DMA is still sending the previous packet, drop this one. + // At 3Mbaud, an 8-byte packet takes ~27µs — the previous DMA is almost + // always done by the next main loop iteration. The old 500µs spin-wait + // blocked tud_task() and hid_device_task(), adding up to 500µs of input latency. + // Bridge retries on missed responses, so dropping is safe. if (dma_tx_busy) { - // DMA still busy after timeout — drop the data to avoid blocking g_uart_tx_dropped += len; return false; } - + // Copy data to DMA TX staging buffer size_t to_send = (len > DMA_TX_BUFFER_SIZE) ? DMA_TX_BUFFER_SIZE : len; memcpy(dma_tx_buffer, data, to_send); dma_tx_len = to_send; dma_tx_busy = true; - + // Fire DMA transfer dma_channel_set_read_addr(uart_tx_dma_chan, dma_tx_buffer, false); dma_channel_set_trans_count(uart_tx_dma_chan, to_send, true); - + if (to_send < len) { g_uart_tx_dropped += (len - to_send); return false; @@ -395,14 +392,25 @@ static uint8_t __not_in_flash_func(process_bridge_packet)(const uint8_t *data, s extern void process_mouse_report(const hid_mouse_report_t *report); extern void process_kbd_report(const hid_keyboard_report_t *report); +// 256-bit bitmap: O(1) single-load lookup replaces chained range comparisons. +// Bit N is set if byte value N is a valid fast command start byte. +// Excludes 0x0A (\n) and 0x0D (\r) to avoid text protocol conflict. +static const uint32_t g_fast_cmd_bitmap[8] = { + // Bits 0-31: commands 0x01-0x0F (excluding 0x0A, 0x0D) + // 0x01 MOUSE_MOVE, 0x02 MOUSE_CLICK, 0x07 SMOOTH_MOVE, 0x08 SMOOTH_CONFIG, + // 0x09 SMOOTH_CLEAR, 0x0B MULTI_MOVE, 0x0C KEY_COMBO, 0x0E INFO_EXT, 0x0F CYCLE_HUMAN + (1u << 0x01) | (1u << 0x02) | (1u << 0x07) | (1u << 0x08) | + (1u << 0x09) | (1u << 0x0B) | (1u << 0x0C) | (1u << 0x0E) | (1u << 0x0F), + // Bits 32-63: Xbox commands 0x20-0x28 + (1u << (0x20 - 32)) | (1u << (0x22 - 32)) | (1u << (0x23 - 32)) | + (1u << (0x27 - 32)) | (1u << (0x28 - 32)), + 0, 0, 0, 0, 0, // Bits 64-223: none + // Bits 224-255: 0xFC SYNC, 0xFE PING + (1u << (0xFC - 224)) | (1u << (0xFE - 224)), +}; + static __force_inline bool is_fast_cmd_start(uint8_t byte) { - // Exclude 0x0A (\n) and 0x0D (\r) — these overlap with FAST_CMD_TIMED_MOVE - // and FAST_CMD_INFO but the text protocol uses them as line terminators. - // TODO: Reassign FAST_CMD_TIMED_MOVE to 0x10+ to avoid this conflict. - if (__builtin_expect(byte == 0x0A || byte == 0x0D, 0)) return false; - return (byte >= FAST_CMD_MOUSE_MOVE && byte <= FAST_CMD_CYCLE_HUMAN) || - (byte >= FAST_CMD_XBOX_INPUT && byte <= FAST_CMD_XBOX_STATUS) || - byte == FAST_CMD_PING; + return (g_fast_cmd_bitmap[byte >> 5] >> (byte & 31)) & 1; } static bool __not_in_flash_func(process_fast_command)(const uint8_t *pkt) { @@ -463,11 +471,15 @@ static bool __not_in_flash_func(process_fast_command)(const uint8_t *pkt) { } case FAST_CMD_MULTI_MOVE: { + // Direct accumulator path — same as FAST_CMD_MOUSE_MOVE. + // Bridge moves are pre-transformed, so skip transform/velocity tracking + // that process_mouse_report() would apply. Sum all 3 sub-moves into + // a single kmbox_add_mouse_movement() call (1 spinlock instead of 3). const fast_cmd_multi_t *m = (const fast_cmd_multi_t *)pkt; - hid_mouse_report_t report = {0}; - if (m->x1 || m->y1) { report.x = m->x1; report.y = m->y1; process_mouse_report(&report); } - if (m->x2 || m->y2) { report.x = m->x2; report.y = m->y2; process_mouse_report(&report); } - if (m->x3 || m->y3) { report.x = m->x3; report.y = m->y3; process_mouse_report(&report); } + int16_t sum_x = m->x1 + m->x2 + m->x3; + int16_t sum_y = m->y1 + m->y2 + m->y3; + if (sum_x || sum_y) kmbox_add_mouse_movement(sum_x, sum_y); + neopixel_signal_activity(COLOR_BRIDGE_ACTIVE); fast_cmd_count++; return true; } diff --git a/lib/fast-protocol/CMakeLists.txt b/lib/fast-protocol/CMakeLists.txt new file mode 100644 index 0000000..1e8b833 --- /dev/null +++ b/lib/fast-protocol/CMakeLists.txt @@ -0,0 +1,3 @@ +add_library(fast_protocol INTERFACE) +target_include_directories(fast_protocol INTERFACE include) +target_link_libraries(fast_protocol INTERFACE hid_defs) diff --git a/lib/fast-protocol/include/fast_protocol.h b/lib/fast-protocol/include/fast_protocol.h new file mode 100644 index 0000000..73c468f --- /dev/null +++ b/lib/fast-protocol/include/fast_protocol.h @@ -0,0 +1,196 @@ +/** + * Fast Binary Command Protocol + * + * Shared header for the 8-byte fixed-size binary command protocol used + * between Bridge and KMBox over UART. Included by both firmware targets. + * + * Contains: + * - Command IDs (FAST_CMD_*) + * - Injection mode constants (INJECT_MODE_*) + * - Packed struct typedefs for type-punned parsing + * - Inline packet builders + * - HID button masks (via hid_defs.h) + */ + +#ifndef FAST_PROTOCOL_H +#define FAST_PROTOCOL_H + +#include +#include +#include +#include "hid_defs.h" + +// ============================================================================ +// Command IDs (8-byte fixed-size packets, Bridge <-> KMBox UART) +// ============================================================================ + +#define FAST_CMD_MOUSE_MOVE 0x01 // Direct accumulator (buttons + wheel + move) +#define FAST_CMD_MOUSE_CLICK 0x02 // Button click with repeat count +#define FAST_CMD_SMOOTH_MOVE 0x07 // Smooth injection queue (humanized) +#define FAST_CMD_SMOOTH_CONFIG 0x08 // Configure smooth injection +#define FAST_CMD_SMOOTH_CLEAR 0x09 // Clear smooth injection queue +#define FAST_CMD_TIMED_MOVE 0x0A // Movement with timestamp for sync +#define FAST_CMD_MULTI_MOVE 0x0B // Multiple movements in one packet +#define FAST_CMD_KEY_COMBO 0x0C // Keyboard key combination +#define FAST_CMD_KEY_PRESS 0x0C // Single key press (alias for KEY_COMBO) +#define FAST_CMD_INFO 0x0D // Request system info +#define FAST_CMD_INFO_EXT 0x0E // Request extended stats +#define FAST_CMD_CYCLE_HUMAN 0x0F // Cycle humanization mode +#define FAST_CMD_XBOX_INPUT 0x20 // Xbox gamepad: buttons + triggers +#define FAST_CMD_XBOX_STICK_L 0x22 // Xbox left stick X/Y +#define FAST_CMD_XBOX_STICK_R 0x23 // Xbox right stick X/Y +#define FAST_CMD_XBOX_RELEASE 0x27 // Xbox clear all injection overrides +#define FAST_CMD_XBOX_STATUS 0x28 // Xbox console mode status report +#define FAST_CMD_SYNC 0xFC // Clock synchronization +#define FAST_CMD_RESPONSE 0xFD // Generic response +#define FAST_CMD_PING 0xFE // Keepalive ping +#define FAST_CMD_PACKET_SIZE 8 // Fixed 8-byte packet size + +// ============================================================================ +// Injection Modes +// ============================================================================ +// On the KMBox side, smooth_injection.h provides inject_mode_t as an enum +// with the same values. It sets _INJECT_MODES_DEFINED to suppress these +// macros so the enum and macros don't collide in the same translation unit. + +#ifndef _INJECT_MODES_DEFINED +#define INJECT_MODE_IMMEDIATE 0 +#define INJECT_MODE_SMOOTH 1 +#define INJECT_MODE_VELOCITY_MATCHED 2 +#define INJECT_MODE_MICRO 3 +#endif + +// ============================================================================ +// Packed Struct Typedefs (for type-punned parsing on KMBox side) +// ============================================================================ + +// Timed move command: 0x0A (for clock-synchronized injection) +typedef struct __attribute__((packed, aligned(4))) { + uint8_t cmd; // 0x0A + int16_t x; // X movement + int16_t y; // Y movement + uint16_t time_us; // Execution time offset (microseconds from sync) + uint8_t mode; // Injection mode +} fast_cmd_timed_t; + +_Static_assert(sizeof(fast_cmd_timed_t) == 8, "fast_cmd_timed_t must be 8 bytes"); + +// Mouse move command: 0x01 +typedef struct __attribute__((packed)) { + uint8_t cmd; // 0x01 + int16_t x, y; // Movement + uint8_t buttons; // Button state + int8_t wheel; // Wheel movement + uint8_t pad[2]; // Padding to 8 bytes +} fast_cmd_move_t; + +typedef fast_cmd_move_t fast_cmd_mouse_move_t; // Alias for compatibility + +// Multi-move command: 0x0B +typedef struct __attribute__((packed)) { + uint8_t cmd; // 0x0B + int8_t x1, y1; // First movement + int8_t x2, y2; // Second movement + int8_t x3, y3; // Third movement + uint8_t pad; // Padding to 8 bytes +} fast_cmd_multi_t; + +// Click command: 0x02 +typedef struct __attribute__((packed)) { + uint8_t cmd; // 0x02 + uint8_t button; // Button mask + uint8_t count; // Click count + uint8_t pad[5]; // Padding to 8 bytes +} fast_cmd_click_t; + +// Key press/combo command: 0x0C +typedef struct __attribute__((packed)) { + uint8_t cmd; // 0x0C + uint8_t modifiers; // Modifier keys + uint8_t keycode; // Primary keycode (for single key) + uint8_t keys[5]; // Additional keycodes (for combo) +} fast_cmd_key_t; + +typedef fast_cmd_key_t fast_cmd_combo_t; // Alias for compatibility + +// Smooth move command: 0x07 +typedef struct __attribute__((packed)) { + uint8_t cmd; // 0x07 + int16_t x; // X movement + int16_t y; // Y movement + uint8_t mode; // Injection mode + uint8_t pad[2]; // Padding to 8 bytes +} fast_cmd_smooth_t; + +// Config command: 0x08 +typedef struct __attribute__((packed)) { + uint8_t cmd; // 0x08 + uint8_t max_per_frame; // Max pixels per frame + uint8_t vel_match; // Velocity matching enable + uint8_t pad[5]; // Padding to 8 bytes +} fast_cmd_config_t; + +// Sync command: 0xFC +typedef struct __attribute__((packed)) { + uint8_t cmd; // 0xFC + uint32_t timestamp; // PC timestamp + uint16_t seq_num; // Sequence number + uint8_t pad; // Padding to 8 bytes +} fast_cmd_sync_t; + +// ============================================================================ +// Inline Packet Builders +// ============================================================================ + +/** + * Build FAST_CMD_SMOOTH_MOVE (0x07) packet. + * KMBox routes this through smooth_inject_movement() which applies + * humanization (easing, subdivision, tremor, overshoot) automatically. + */ +static inline size_t fast_build_smooth_move(uint8_t *buf, int16_t x, int16_t y, uint8_t mode) { + buf[0] = FAST_CMD_SMOOTH_MOVE; + buf[1] = (uint8_t)(x & 0xFF); + buf[2] = (uint8_t)((x >> 8) & 0xFF); + buf[3] = (uint8_t)(y & 0xFF); + buf[4] = (uint8_t)((y >> 8) & 0xFF); + buf[5] = mode; + buf[6] = 0; + buf[7] = 0; + return FAST_CMD_PACKET_SIZE; +} + +/** + * Build FAST_CMD_MOUSE_MOVE (0x01) packet. + * KMBox routes this through kmbox_add_mouse_movement() (direct accumulator). + * No smooth queue / humanization subdivision. + */ +static inline size_t fast_build_mouse_move(uint8_t *buf, int16_t x, int16_t y, + uint8_t buttons, int8_t wheel) { + buf[0] = FAST_CMD_MOUSE_MOVE; + buf[1] = (uint8_t)(x & 0xFF); + buf[2] = (uint8_t)((x >> 8) & 0xFF); + buf[3] = (uint8_t)(y & 0xFF); + buf[4] = (uint8_t)((y >> 8) & 0xFF); + buf[5] = buttons; + buf[6] = (uint8_t)wheel; + buf[7] = 0; + return FAST_CMD_PACKET_SIZE; +} + +/** + * Build FAST_CMD_MOUSE_CLICK (0x02) packet. + * KMBox generates press + release pairs internally. + */ +static inline size_t fast_build_mouse_click(uint8_t *buf, uint8_t button, uint8_t count) { + buf[0] = FAST_CMD_MOUSE_CLICK; + buf[1] = button; + buf[2] = count; + buf[3] = 0; + buf[4] = 0; + buf[5] = 0; + buf[6] = 0; + buf[7] = 0; + return FAST_CMD_PACKET_SIZE; +} + +#endif // FAST_PROTOCOL_H diff --git a/lib/hid-defs/CMakeLists.txt b/lib/hid-defs/CMakeLists.txt new file mode 100644 index 0000000..a700156 --- /dev/null +++ b/lib/hid-defs/CMakeLists.txt @@ -0,0 +1,2 @@ +add_library(hid_defs INTERFACE) +target_include_directories(hid_defs INTERFACE include) diff --git a/lib/hid-defs/include/hid_defs.h b/lib/hid-defs/include/hid_defs.h new file mode 100644 index 0000000..8e4d810 --- /dev/null +++ b/lib/hid-defs/include/hid_defs.h @@ -0,0 +1,20 @@ +/** + * Shared HID Definitions + * + * Canonical HID mouse button bit masks used by both KMBox firmware, + * bridge firmware, wire protocol, and fast command protocol. + * + * Values follow the USB HID Usage Table, Button Page (0x09). + */ + +#ifndef HID_DEFS_H +#define HID_DEFS_H + +// HID mouse button bit masks +#define HID_BTN_LEFT 0x01 +#define HID_BTN_RIGHT 0x02 +#define HID_BTN_MIDDLE 0x04 +#define HID_BTN_BACK 0x08 +#define HID_BTN_FORWARD 0x10 + +#endif // HID_DEFS_H diff --git a/lib/kmbox-commands/CMakeLists.txt b/lib/kmbox-commands/CMakeLists.txt index 20e06e7..ede7109 100644 --- a/lib/kmbox-commands/CMakeLists.txt +++ b/lib/kmbox-commands/CMakeLists.txt @@ -11,4 +11,5 @@ target_include_directories(kmbox_commands PUBLIC # Link with pico_stdlib for time functions target_link_libraries(kmbox_commands pico_stdlib + hid_defs ) \ No newline at end of file diff --git a/lib/kmbox-commands/kmbox_commands.c b/lib/kmbox-commands/kmbox_commands.c index 77f0bf4..dc9e8fd 100644 --- a/lib/kmbox-commands/kmbox_commands.c +++ b/lib/kmbox-commands/kmbox_commands.c @@ -1363,6 +1363,71 @@ uint8_t kmbox_get_current_buttons(void) (g_kmbox_state.buttons[KMBOX_BUTTON_SIDE2].is_pressed ? 0x10 : 0); } +bool kmbox_try_drain_mouse_16(uint8_t last_sent_buttons, + uint8_t *buttons, int16_t *x, int16_t *y, + int8_t *wheel, int8_t *pan) +{ + // Build button byte (no lock needed — single-core writes) + uint8_t button_byte = + (g_kmbox_state.buttons[KMBOX_BUTTON_LEFT].is_pressed ? 0x01 : 0) | + (g_kmbox_state.buttons[KMBOX_BUTTON_RIGHT].is_pressed ? 0x02 : 0) | + (g_kmbox_state.buttons[KMBOX_BUTTON_MIDDLE].is_pressed ? 0x04 : 0) | + (g_kmbox_state.buttons[KMBOX_BUTTON_SIDE1].is_pressed ? 0x08 : 0) | + (g_kmbox_state.buttons[KMBOX_BUTTON_SIDE2].is_pressed ? 0x10 : 0); + + *buttons = button_byte; + bool buttons_changed = (button_byte != last_sent_buttons); + + // Single spinlock: check pending + drain in one shot + uint32_t irq = spin_lock_blocking(g_acc_spinlock); + + bool pending = (g_kmbox_state.mouse_x_accumulator != 0 || + g_kmbox_state.mouse_y_accumulator != 0 || + g_kmbox_state.wheel_accumulator != 0); + + if (!pending && !buttons_changed) { + // Nothing to do — fast unlock + spin_unlock(g_acc_spinlock, irq); + *x = 0; *y = 0; *wheel = 0; *pan = 0; + return false; + } + + // Drain movement accumulators + *x = (int16_t)g_kmbox_state.mouse_x_accumulator; + g_kmbox_state.mouse_x_accumulator = 0; + *y = (int16_t)g_kmbox_state.mouse_y_accumulator; + g_kmbox_state.mouse_y_accumulator = 0; + + // Drain wheel (clamp to int8 range, keep remainder) + int16_t w_acc = g_kmbox_state.wheel_accumulator; + if (w_acc > 127) { + *wheel = 127; + g_kmbox_state.wheel_accumulator = w_acc - 127; + } else if (w_acc < -128) { + *wheel = -128; + g_kmbox_state.wheel_accumulator = w_acc + 128; + } else { + *wheel = (int8_t)w_acc; + g_kmbox_state.wheel_accumulator = 0; + } + + // Drain pan (clamp to int8 range, keep remainder) + int16_t p_acc = g_kmbox_state.pan_accumulator; + if (p_acc > 127) { + *pan = 127; + g_kmbox_state.pan_accumulator = p_acc - 127; + } else if (p_acc < -128) { + *pan = -128; + g_kmbox_state.pan_accumulator = p_acc + 128; + } else { + *pan = (int8_t)p_acc; + g_kmbox_state.pan_accumulator = 0; + } + + spin_unlock(g_acc_spinlock, irq); + return true; +} + void kmbox_set_axis_lock(bool lock_x, bool lock_y) { g_kmbox_state.lock_mx = lock_x; diff --git a/lib/kmbox-commands/kmbox_commands.h b/lib/kmbox-commands/kmbox_commands.h index 3220b90..1a7cdf8 100644 --- a/lib/kmbox-commands/kmbox_commands.h +++ b/lib/kmbox-commands/kmbox_commands.h @@ -14,12 +14,14 @@ // Button Definitions //--------------------------------------------------------------------+ -// HID Button bit masks (compatible with FAST_BTN_* protocol) -#define KMBOX_HID_BTN_LEFT 0x01 -#define KMBOX_HID_BTN_RIGHT 0x02 -#define KMBOX_HID_BTN_MIDDLE 0x04 -#define KMBOX_HID_BTN_BACK 0x08 -#define KMBOX_HID_BTN_FORWARD 0x10 +#include "hid_defs.h" + +// Legacy aliases — prefer HID_BTN_* from hid_defs.h in new code +#define KMBOX_HID_BTN_LEFT HID_BTN_LEFT +#define KMBOX_HID_BTN_RIGHT HID_BTN_RIGHT +#define KMBOX_HID_BTN_MIDDLE HID_BTN_MIDDLE +#define KMBOX_HID_BTN_BACK HID_BTN_BACK +#define KMBOX_HID_BTN_FORWARD HID_BTN_FORWARD typedef enum { KMBOX_BUTTON_LEFT = 0, @@ -152,6 +154,14 @@ void kmbox_start_button_click(kmbox_button_t button, uint32_t current_time_ms); // Used to detect button-only state changes that need an immediate report. uint8_t kmbox_get_current_buttons(void); +// Atomic check-and-drain: single spinlock acquire to test pending + drain accumulators. +// Returns true if any movement/wheel/pan/button-change was pending and writes drained values. +// Replaces separate kmbox_has_pending_movement() + kmbox_get_mouse_report_16() calls +// to eliminate double spinlock acquisition in the hot path. +bool kmbox_try_drain_mouse_16(uint8_t last_sent_buttons, + uint8_t *buttons, int16_t *x, int16_t *y, + int8_t *wheel, int8_t *pan); + // Get button name string for debugging const char* kmbox_get_button_name(kmbox_button_t button); diff --git a/lib/wire-protocol/CMakeLists.txt b/lib/wire-protocol/CMakeLists.txt index 7254499..46095da 100644 --- a/lib/wire-protocol/CMakeLists.txt +++ b/lib/wire-protocol/CMakeLists.txt @@ -1,2 +1,3 @@ add_library(wire_protocol INTERFACE) target_include_directories(wire_protocol INTERFACE include) +target_link_libraries(wire_protocol INTERFACE hid_defs) diff --git a/lib/wire-protocol/include/wire_protocol.h b/lib/wire-protocol/include/wire_protocol.h index c5aa6df..2c03b0a 100644 --- a/lib/wire-protocol/include/wire_protocol.h +++ b/lib/wire-protocol/include/wire_protocol.h @@ -18,6 +18,7 @@ #include #include +#include "hid_defs.h" // ============================================================================ // Command Bytes @@ -52,14 +53,16 @@ #define WIRE_MAX_PACKET 8 // Maximum packet size in bytes -// Button masks (match HID standard) -#define WIRE_BTN_LEFT 0x01 -#define WIRE_BTN_RIGHT 0x02 -#define WIRE_BTN_MIDDLE 0x04 -#define WIRE_BTN_BACK 0x08 -#define WIRE_BTN_FORWARD 0x10 +// Button masks — canonical definitions in hid_defs.h (HID_BTN_*) +// Legacy aliases for existing wire-protocol consumers +#define WIRE_BTN_LEFT HID_BTN_LEFT +#define WIRE_BTN_RIGHT HID_BTN_RIGHT +#define WIRE_BTN_MIDDLE HID_BTN_MIDDLE +#define WIRE_BTN_BACK HID_BTN_BACK +#define WIRE_BTN_FORWARD HID_BTN_FORWARD -// Smooth injection modes +// Injection modes — canonical definitions in fast_protocol.h (INJECT_MODE_*) +// Legacy aliases for existing wire-protocol consumers #define WIRE_INJECT_IMMEDIATE 0 #define WIRE_INJECT_SMOOTH 1 #define WIRE_INJECT_VELOCITY_MATCHED 2 diff --git a/smooth_injection.c b/smooth_injection.c index eaa61e0..d556fd1 100644 --- a/smooth_injection.c +++ b/smooth_injection.c @@ -113,9 +113,12 @@ static inline int32_t __not_in_flash_func(fp_mul)(int32_t a, int32_t b) { return (int32_t)((uint32_t)(hi << SMOOTH_FP_SHIFT) | (lo >> SMOOTH_FP_SHIFT)); } +// Fixed-point division via M33 FPU float path (~14 cycles VDIV.F32) +// instead of 64-bit software division (__aeabi_ldivmod, ~60-100 cycles). +// Precision: 24-bit mantissa → sufficient for 16.16 fixed-point work. static __force_inline int32_t fp_div(int32_t a, int32_t b) { if (b == 0) return 0; - return (int32_t)(((int64_t)a << SMOOTH_FP_SHIFT) / b); + return (int32_t)((float)a / (float)b * (float)SMOOTH_FP_ONE); } static __force_inline int32_t int_to_fp(int16_t val) { @@ -132,18 +135,59 @@ static __force_inline int16_t fp_to_int(int32_t fp_val) { return (int16_t)((fp_val + SMOOTH_FP_HALF) >> SMOOTH_FP_SHIFT); } -static __force_inline int8_t clamp_i8(int32_t val) { - if (val > 127) return 127; - if (val < -128) return -128; - return (int8_t)val; +static __force_inline int16_t clamp_i16(int32_t val) { + if (val > 32767) return 32767; + if (val < -32768) return -32768; + return (int16_t)val; } //--------------------------------------------------------------------+ -// Easing Curves - FPU direct computation (no LUTs on M33) +// Velocity IIR Filter Helpers //--------------------------------------------------------------------+ -// Easing is now computed directly using apply_easing_fpu() inline -// M33 FPU makes direct computation faster than LUT lookups +// Fixed-point square root using hardware VSQRT.F32 (14 cycles on M33) +static inline int32_t __not_in_flash_func(fp_sqrt)(int32_t x) { + if (x <= 0) return 0; + float f = (float)x / (float)SMOOTH_FP_ONE; + float r = sqrtf(f); + return (int32_t)(r * SMOOTH_FP_ONE); +} + +// Soft saturation using Padé approximant: max * x * (27 + x²) / (27 + 9x²) +// Provides smooth clamping instead of hard clamp — no sharp discontinuity +static inline int32_t __not_in_flash_func(soft_saturate_fp)(int32_t input, int32_t max_fp) { + if (max_fp <= 0) return input; + int32_t abs_input = input >= 0 ? input : -input; + if (abs_input <= max_fp / 2) return input; // Linear region, no saturation needed + + // x = abs_input / max_fp (normalized) + int32_t x = fp_div(abs_input, max_fp); + // x² in fixed-point + int32_t x2 = fp_mul(x, x); + // numerator = x * (27 + x²) + int32_t twenty_seven = 27 * SMOOTH_FP_ONE; + int32_t num = fp_mul(x, twenty_seven + x2); + // denominator = 27 + 9*x² + int32_t den = twenty_seven + 9 * x2; + if (den == 0) return input; + // result = max * num / den + int32_t result = fp_mul(max_fp, fp_div(num, den)); + return input >= 0 ? result : -result; +} + +// Adaptive alpha: high accel → alpha near alpha_max (responsive); +// low accel → alpha near alpha_min (smooth) +// Formula: alpha_min + range - range / (1 + accel_mag * sensitivity) +static inline int32_t __not_in_flash_func(compute_adaptive_alpha)( + int32_t accel_mag, int32_t alpha_min, int32_t alpha_max, int32_t sensitivity) { + int32_t range = alpha_max - alpha_min; + if (range <= 0) return alpha_max; + int32_t product = fp_mul(accel_mag, sensitivity); + int32_t denom = SMOOTH_FP_ONE + product; + if (denom <= 0) denom = 1; + int32_t decay = fp_div(range, denom); + return alpha_min + range - decay; +} //--------------------------------------------------------------------+ // Velocity Tracking @@ -153,26 +197,32 @@ static __force_inline int8_t clamp_i8(int32_t val) { static int32_t g_velocity_sum_x_fp = 0; static int32_t g_velocity_sum_y_fp = 0; +// SMOOTH_VELOCITY_WINDOW must be power of 2 for bitmask; use shift for division +_Static_assert((SMOOTH_VELOCITY_WINDOW & (SMOOTH_VELOCITY_WINDOW - 1)) == 0, + "SMOOTH_VELOCITY_WINDOW must be power of 2"); +#define VELOCITY_WINDOW_MASK (SMOOTH_VELOCITY_WINDOW - 1) +#define VELOCITY_WINDOW_SHIFT 3 // log2(8) = 3 + static void velocity_update(int16_t x, int16_t y) { velocity_tracker_t *v = &g_smooth.velocity; - + const uint8_t idx = v->history_index; + // Get old value that will be replaced - int16_t old_x = v->x_history[v->history_index]; - int16_t old_y = v->y_history[v->history_index]; - + int16_t old_x = v->x_history[idx]; + int16_t old_y = v->y_history[idx]; + // Store new values in history - v->x_history[v->history_index] = x; - v->y_history[v->history_index] = y; - v->history_index = (v->history_index + 1) % SMOOTH_VELOCITY_WINDOW; - + v->x_history[idx] = x; + v->y_history[idx] = y; + v->history_index = (idx + 1) & VELOCITY_WINDOW_MASK; // Bitmask instead of modulo + // Update running sum in fixed-point (remove old, add new) - O(1) - // Keep accumulators in fixed-point to preserve precision across cycles - g_velocity_sum_x_fp = g_velocity_sum_x_fp - int_to_fp(old_x) + int_to_fp(x); - g_velocity_sum_y_fp = g_velocity_sum_y_fp - int_to_fp(old_y) + int_to_fp(y); - - // Store as fixed-point average (no precision loss) - v->avg_velocity_x_fp = g_velocity_sum_x_fp / SMOOTH_VELOCITY_WINDOW; - v->avg_velocity_y_fp = g_velocity_sum_y_fp / SMOOTH_VELOCITY_WINDOW; + g_velocity_sum_x_fp += int_to_fp(x) - int_to_fp(old_x); + g_velocity_sum_y_fp += int_to_fp(y) - int_to_fp(old_y); + + // Arithmetic right shift by 3 = divide by 8 (power of 2, no division instruction) + v->avg_velocity_x_fp = g_velocity_sum_x_fp >> VELOCITY_WINDOW_SHIFT; + v->avg_velocity_y_fp = g_velocity_sum_y_fp >> VELOCITY_WINDOW_SHIFT; } // smooth state accessor for external use @@ -374,23 +424,6 @@ static bool queue_single_substep(int32_t x_fp, int32_t y_fp, inject_mode_t mode, g_smooth.humanization.onset_jitter_max); } - // Fix #4: Fix easing curve selection bias - single RNG draw, correct distribution - // Old code: two draws created biased distribution (33% ease-out, then 25% of remaining) - // New code: one draw with explicit probability buckets - easing_mode_t easing = EASING_LINEAR; - if (max_component > int_to_fp(10)) { - // Larger sub-steps: smooth easing - easing = EASING_EASE_IN_OUT; - } else { - uint32_t r = rng_next() % 12; - if (r < 4) { - easing = EASING_EASE_OUT; // 33% chance - } else if (r < 7) { - easing = EASING_EASE_IN_OUT; // 25% chance - } - // else: 42% linear (default) - } - // For velocity-matched mode, adjust based on current velocity if (mode == INJECT_MODE_VELOCITY_MATCHED && g_smooth.velocity_matching_enabled) { int32_t vel_mag = g_smooth.velocity.avg_velocity_x_fp; @@ -423,7 +456,6 @@ static bool queue_single_substep(int32_t x_fp, int32_t y_fp, inject_mode_t mode, entry->frames_left = frames; entry->total_frames = frames; entry->mode = mode; - entry->easing = easing; entry->active = true; entry->onset_delay = onset_delay; // Fix #1: onset jitter entry->will_overshoot = false; @@ -454,8 +486,8 @@ static bool queue_single_substep(int32_t x_fp, int32_t y_fp, inject_mode_t mode, #define SUBSTEP_MIN_MOVEMENT_PX 3 // Number of sub-steps to split into (base value, randomized) -#define SUBSTEP_COUNT_BASE 4 -#define SUBSTEP_COUNT_EXTRA_MAX 4 // Up to +4 extra = 4-8 total +#define SUBSTEP_COUNT_BASE 2 +#define SUBSTEP_COUNT_EXTRA_MAX 2 // Up to +2 extra = 2-4 total // Frame delay between consecutive sub-steps (randomized) #define SUBSTEP_DELAY_MIN 1 @@ -540,7 +572,7 @@ bool smooth_inject_movement_fp(int32_t x_fp, int32_t y_fp, inject_mode_t mode) { //--------------------------------------------------------------------+ // Determine number of sub-steps (only reached in FULL mode) - uint8_t num_substeps = SUBSTEP_COUNT_BASE + (uint8_t)rng_range(0, 3); // 4-7 sub-steps + uint8_t num_substeps = SUBSTEP_COUNT_BASE + (uint8_t)rng_range(0, 2); // 2-4 sub-steps // Clamp to available queue space uint8_t available = SMOOTH_QUEUE_SIZE - g_smooth.queue_count; @@ -670,11 +702,13 @@ void smooth_record_physical_movement(int16_t x, int16_t y) { velocity_update(x, y); } -void __not_in_flash_func(smooth_process_frame)(int8_t *out_x, int8_t *out_y) { - // Super-fast path for empty queue with no accumulator - if (g_smooth.queue_count == 0 && - g_smooth.x_accumulator_fp == 0 && - g_smooth.y_accumulator_fp == 0) { +void __not_in_flash_func(smooth_process_frame)(int16_t *out_x, int16_t *out_y) { + // Super-fast path for empty queue with no accumulator and no filter debt + if (g_smooth.queue_count == 0 && + g_smooth.x_accumulator_fp == 0 && + g_smooth.y_accumulator_fp == 0 && + g_smooth.filtered_vx_fp == 0 && + g_smooth.filtered_vy_fp == 0) { *out_x = 0; *out_y = 0; g_smooth.frames_processed++; @@ -692,17 +726,17 @@ void __not_in_flash_func(smooth_process_frame)(int8_t *out_x, int8_t *out_y) { int32_t frame_x_fp = 0; int32_t frame_y_fp = 0; - // Early exit if no active entries + // Early exit if no active entries (still run IIR filter to release debt) if (g_smooth.queue_count == 0) { - goto apply_accumulator; + goto apply_vel_filter; } - + // Safety: if queue_count > 0 but linked list is empty, reset to prevent hang if (g_active_head == NULL) { g_smooth.queue_count = 0; g_free_bitmap = 0xFFFFFFFFFFFFFFFFULL; g_active_node_bitmap = 0ULL; - goto apply_accumulator; + goto apply_vel_filter; } // Process active entries using linked list (O(n) where n = active count, not queue size) @@ -722,22 +756,13 @@ void __not_in_flash_func(smooth_process_frame)(int8_t *out_x, int8_t *out_y) { } if (entry->frames_left > 0) { - // Calculate progress using FPU (faster than LUT on M33) - // Offset by +1 so the first frame (frames_elapsed=0) produces - // non-zero output. Without this, progress_delta is always 0 on - // the first frame, which means no HID report is sent when the - // physical mouse is idle — the cursor won't move until frame 2. - uint8_t frames_elapsed = entry->total_frames - entry->frames_left; - float progress_flt = (float)(frames_elapsed + 1) / (float)(entry->total_frames + 1); - float prev_progress = (float)frames_elapsed / (float)(entry->total_frames + 1); - - // Apply easing curve using FPU (3 cycles for multiply on M33) - float eased_progress = apply_easing_fpu(progress_flt, entry->easing); - float prev_eased = apply_easing_fpu(prev_progress, entry->easing); - float progress_delta_flt = eased_progress - prev_eased; - - // Convert to fixed-point for accumulator - int32_t progress_delta = (int32_t)(progress_delta_flt * SMOOTH_FP_ONE); + // Linear progress: equal fraction per frame (IIR filter handles smoothing) + int32_t progress_delta; + if (entry->total_frames <= 1) { + progress_delta = SMOOTH_FP_ONE; + } else { + progress_delta = SMOOTH_FP_ONE / entry->total_frames; + } // === MOVEMENT DELTA (tracked - affects remaining) === int32_t movement_dx_fp = fp_mul(entry->x_fp, progress_delta); @@ -776,7 +801,6 @@ void __not_in_flash_func(smooth_process_frame)(int8_t *out_x, int8_t *out_y) { correction->frames_left = (uint8_t)rng_range(2, 4); // Correct over 2-4 frames correction->total_frames = correction->frames_left; correction->mode = INJECT_MODE_SMOOTH; - correction->easing = EASING_EASE_OUT; // Quick correction correction->active = true; correction->will_overshoot = false; corrections_this_frame++; @@ -809,26 +833,73 @@ void __not_in_flash_func(smooth_process_frame)(int8_t *out_x, int8_t *out_y) { frame_y_fp = 0; } -apply_accumulator: +apply_vel_filter: + // Velocity IIR filter: debt-based smoothing across command boundaries. + // Raw queue output accumulates as "debt"; each frame releases a fraction (alpha). + // Total movement is conserved — no tracking loss. + if (g_smooth.humanization.mode == HUMANIZATION_FULL) { + int32_t raw_vx = frame_x_fp, raw_vy = frame_y_fp; + + // Acceleration magnitude (for adaptive alpha) + int32_t ax = raw_vx - g_smooth.prev_raw_vx_fp; + int32_t ay = raw_vy - g_smooth.prev_raw_vy_fp; + int32_t accel_mag = fp_sqrt(fp_mul(ax, ax) + fp_mul(ay, ay)); + g_smooth.prev_raw_vx_fp = raw_vx; + g_smooth.prev_raw_vy_fp = raw_vy; + + // Adaptive alpha: high accel → fast release, low accel → slow release + int32_t alpha = compute_adaptive_alpha( + accel_mag, + g_smooth.humanization.vel_filter_alpha_min_fp, + g_smooth.humanization.vel_filter_alpha_max_fp, + g_smooth.humanization.vel_filter_accel_sens_fp); + + // Accumulate raw queue output into velocity debt + g_smooth.filtered_vx_fp += raw_vx; + g_smooth.filtered_vy_fp += raw_vy; + + // Release portion of debt (alpha controls release rate) + int32_t release_x = fp_mul(alpha, g_smooth.filtered_vx_fp); + int32_t release_y = fp_mul(alpha, g_smooth.filtered_vy_fp); + g_smooth.filtered_vx_fp -= release_x; + g_smooth.filtered_vy_fp -= release_y; + + frame_x_fp = release_x; + frame_y_fp = release_y; + + // Soft saturation (replaces hard clamp for FULL mode) + int32_t max_fp = int_to_fp(g_smooth.max_per_frame); + int32_t sat_x = soft_saturate_fp(frame_x_fp, max_fp); + int32_t sat_y = soft_saturate_fp(frame_y_fp, max_fp); + // Return any capped excess back to debt (conserves movement) + g_smooth.filtered_vx_fp += frame_x_fp - sat_x; + g_smooth.filtered_vy_fp += frame_y_fp - sat_y; + frame_x_fp = sat_x; + frame_y_fp = sat_y; + } + // Add sub-pixel accumulator frame_x_fp += g_smooth.x_accumulator_fp; frame_y_fp += g_smooth.y_accumulator_fp; - + // Convert to integer with sub-pixel tracking int16_t out_x_int = fp_to_int(frame_x_fp); int16_t out_y_int = fp_to_int(frame_y_fp); - - // Apply per-frame rate limiting - if (out_x_int > g_smooth.max_per_frame) { - out_x_int = g_smooth.max_per_frame; - } else if (out_x_int < -g_smooth.max_per_frame) { - out_x_int = -g_smooth.max_per_frame; - } - - if (out_y_int > g_smooth.max_per_frame) { - out_y_int = g_smooth.max_per_frame; - } else if (out_y_int < -g_smooth.max_per_frame) { - out_y_int = -g_smooth.max_per_frame; + + // Apply per-frame rate limiting (hard clamp for non-FULL modes; + // FULL mode uses soft saturation from IIR filter above) + if (g_smooth.humanization.mode != HUMANIZATION_FULL) { + if (out_x_int > g_smooth.max_per_frame) { + out_x_int = g_smooth.max_per_frame; + } else if (out_x_int < -g_smooth.max_per_frame) { + out_x_int = -g_smooth.max_per_frame; + } + + if (out_y_int > g_smooth.max_per_frame) { + out_y_int = g_smooth.max_per_frame; + } else if (out_y_int < -g_smooth.max_per_frame) { + out_y_int = -g_smooth.max_per_frame; + } } // Update sub-pixel accumulator with remainder @@ -850,8 +921,8 @@ void __not_in_flash_func(smooth_process_frame)(int8_t *out_x, int8_t *out_y) { } // Output - *out_x = clamp_i8(out_x_int); - *out_y = clamp_i8(out_y_int); + *out_x = clamp_i16(out_x_int); + *out_y = clamp_i16(out_y_int); // Fix: When queue is fully drained and output rounds to zero, flush // sub-pixel accumulator residuals. Without this, tiny residuals (<1px) @@ -860,6 +931,20 @@ void __not_in_flash_func(smooth_process_frame)(int8_t *out_x, int8_t *out_y) { if (g_smooth.queue_count == 0 && out_x_int == 0 && out_y_int == 0) { g_smooth.x_accumulator_fp = 0; g_smooth.y_accumulator_fp = 0; + + // Flush velocity filter state to prevent residual drift + if (g_smooth.humanization.mode == HUMANIZATION_FULL) { + // Only flush if filtered velocity is sub-quarter-pixel + int32_t quarter_px = SMOOTH_FP_ONE / 4; + int32_t abs_fvx = g_smooth.filtered_vx_fp >= 0 ? g_smooth.filtered_vx_fp : -g_smooth.filtered_vx_fp; + int32_t abs_fvy = g_smooth.filtered_vy_fp >= 0 ? g_smooth.filtered_vy_fp : -g_smooth.filtered_vy_fp; + if (abs_fvx < quarter_px && abs_fvy < quarter_px) { + g_smooth.filtered_vx_fp = 0; + g_smooth.filtered_vy_fp = 0; + g_smooth.prev_raw_vx_fp = 0; + g_smooth.prev_raw_vy_fp = 0; + } + } } g_smooth.frames_processed++; @@ -892,6 +977,11 @@ void smooth_clear_queue(void) { // Also reset velocity tracking accumulators for consistency g_velocity_sum_x_fp = 0; g_velocity_sum_y_fp = 0; + // Reset velocity IIR filter state + g_smooth.filtered_vx_fp = 0; + g_smooth.filtered_vy_fp = 0; + g_smooth.prev_raw_vx_fp = 0; + g_smooth.prev_raw_vy_fp = 0; } void smooth_get_stats(uint32_t *total_injected, uint32_t *frames_processed, @@ -906,6 +996,9 @@ bool smooth_has_pending(void) { if (g_smooth.queue_count > 0) return true; if (g_smooth.x_accumulator_fp != 0) return true; if (g_smooth.y_accumulator_fp != 0) return true; + // Velocity filter debt: movement received but not yet output + if (g_smooth.filtered_vx_fp != 0) return true; + if (g_smooth.filtered_vy_fp != 0) return true; return false; } @@ -921,7 +1014,7 @@ static void smooth_set_humanization_mode_internal(humanization_mode_t mode, bool switch (mode) { case HUMANIZATION_OFF: // Disable all humanization - pure digital pass-through - g_smooth.max_per_frame = 16; // Fix #2: fixed + g_smooth.max_per_frame = 32767; // No artificial limit g_smooth.velocity_matching_enabled = true; g_smooth.humanization.jitter_enabled = false; g_smooth.humanization.jitter_amount_fp = 0; @@ -933,12 +1026,15 @@ static void smooth_set_humanization_mode_internal(humanization_mode_t mode, bool g_smooth.humanization.accum_clamp_fp = 0; // Fix #13: no clamp (unlimited) g_smooth.humanization.onset_jitter_min = 0; // Fix #1: no onset delay g_smooth.humanization.onset_jitter_max = 0; + g_smooth.humanization.vel_filter_alpha_min_fp = SMOOTH_FP_ONE; // Passthrough + g_smooth.humanization.vel_filter_alpha_max_fp = SMOOTH_FP_ONE; + g_smooth.humanization.vel_filter_accel_sens_fp = 0; break; - + case HUMANIZATION_MICRO: // Micro-noise only — for pre-humanized input // Only adds sub-pixel tremor + sensor noise below the PC's correction threshold - g_smooth.max_per_frame = 16; // Fixed — don't alter delivery rate + g_smooth.max_per_frame = 32767; // No artificial limit g_smooth.velocity_matching_enabled = false; // Input already has natural velocity g_smooth.humanization.jitter_enabled = true; g_smooth.humanization.jitter_amount_fp = int_to_fp(1) / 2; // 0.5px base tremor @@ -947,17 +1043,20 @@ static void smooth_set_humanization_mode_internal(humanization_mode_t mode, bool g_smooth.humanization.vel_slow_threshold_fp = int_to_fp(2); g_smooth.humanization.vel_fast_threshold_fp = int_to_fp(10); g_smooth.humanization.delivery_error_fp = SMOOTH_FP_ONE / 100; // ±1% sensor noise - g_smooth.humanization.accum_clamp_fp = int_to_fp(8); // ±8px — generous clamp prevents + g_smooth.humanization.accum_clamp_fp = int_to_fp(1000); // Generous clamp prevents // unbounded drift from delivery error // residuals while trusting input g_smooth.humanization.onset_jitter_min = 0; // No onset delay g_smooth.humanization.onset_jitter_max = 0; + g_smooth.humanization.vel_filter_alpha_min_fp = SMOOTH_FP_ONE; // Passthrough + g_smooth.humanization.vel_filter_alpha_max_fp = SMOOTH_FP_ONE; + g_smooth.humanization.vel_filter_accel_sens_fp = 0; break; case HUMANIZATION_FULL: // Full humanization — for raw/robotic input - // Subdivision, easing, onset delay, overshoot — the works - g_smooth.max_per_frame = (int16_t)rng_range(12, 20); // Per-session variation + // Subdivision, IIR velocity filter, onset delay, overshoot — the works + g_smooth.max_per_frame = (int16_t)rng_range(10000, 12000); // High-DPI: ~650 IPS at 26000 DPI g_smooth.velocity_matching_enabled = true; g_smooth.humanization.jitter_enabled = true; g_smooth.humanization.jitter_amount_fp = int_to_fp(1); // 1.0px base tremor @@ -966,11 +1065,14 @@ static void smooth_set_humanization_mode_internal(humanization_mode_t mode, bool g_smooth.humanization.vel_slow_threshold_fp = int_to_fp(rng_range(1, 4)); g_smooth.humanization.vel_fast_threshold_fp = int_to_fp(rng_range(8, 14)); g_smooth.humanization.delivery_error_fp = SMOOTH_FP_ONE / 50; // ±2% - g_smooth.humanization.accum_clamp_fp = int_to_fp(16); // ±16px — matches max_per_frame, + g_smooth.humanization.accum_clamp_fp = int_to_fp(12000); // Matches max_per_frame, // lets accumulator drain naturally // when queue overflow dumps to accum g_smooth.humanization.onset_jitter_min = 1; // 1-4 frames g_smooth.humanization.onset_jitter_max = 4; + g_smooth.humanization.vel_filter_alpha_min_fp = SMOOTH_FP_ONE * 50 / 100; // 0.50 — near-passthrough + g_smooth.humanization.vel_filter_alpha_max_fp = SMOOTH_FP_ONE * 90 / 100; // 0.90 — near-instant + g_smooth.humanization.vel_filter_accel_sens_fp = SMOOTH_FP_ONE * 25 / 100; // 0.25 break; default: @@ -978,6 +1080,12 @@ static void smooth_set_humanization_mode_internal(humanization_mode_t mode, bool return; } + // Reset velocity IIR filter state on mode change + g_smooth.filtered_vx_fp = 0; + g_smooth.filtered_vy_fp = 0; + g_smooth.prev_raw_vx_fp = 0; + g_smooth.prev_raw_vy_fp = 0; + // NOTE: Runtime flash saves are DISABLED to prevent device hangs. // flash_safe_execute() pauses Core1 (USB host) via multicore_lockout // during the ~100ms flash erase/program, which causes the USB host diff --git a/smooth_injection.h b/smooth_injection.h index 307ae1c..78e4f6b 100644 --- a/smooth_injection.h +++ b/smooth_injection.h @@ -24,8 +24,8 @@ //--------------------------------------------------------------------+ // Maximum movement per HID frame (prevents jarring jumps) -// At 125Hz (8ms), 16 pixels/frame = ~2000 pixels/sec max smooth speed -#define SMOOTH_MAX_PER_FRAME 16 +// Mode configs override this default. Sized for high-DPI mice (26000+ DPI). +#define SMOOTH_MAX_PER_FRAME 127 // Movement queue size (number of pending inject operations) #define SMOOTH_QUEUE_SIZE 64 @@ -42,30 +42,31 @@ // Injection Modes //--------------------------------------------------------------------+ +// This enum is the canonical typed definition on the KMBox side. +// fast_protocol.h also defines INJECT_MODE_* as macros for the bridge +// side (which doesn't include this header). We undef any existing macros +// (in case fast_protocol.h was included first via defines.h) and set +// _INJECT_MODES_DEFINED to prevent fast_protocol.h from (re)defining them. +#undef INJECT_MODE_IMMEDIATE +#undef INJECT_MODE_SMOOTH +#undef INJECT_MODE_VELOCITY_MATCHED +#undef INJECT_MODE_MICRO +#define _INJECT_MODES_DEFINED + typedef enum { // Immediate: Add directly to accumulator (legacy behavior) INJECT_MODE_IMMEDIATE = 0, - + // Smooth: Spread movement across frames to match max per-frame rate INJECT_MODE_SMOOTH, - + // Velocity-matched: Blend with current mouse velocity INJECT_MODE_VELOCITY_MATCHED, - + // Micro: For tiny sub-pixel adjustments (anti-recoil, aim correction) INJECT_MODE_MICRO, } inject_mode_t; -//--------------------------------------------------------------------+ -// Easing Modes for Natural Movement -//--------------------------------------------------------------------+ - -typedef enum { - EASING_LINEAR = 0, // No easing (constant velocity) - EASING_EASE_IN_OUT, // Slow start, fast middle, slow end (natural) - EASING_EASE_OUT, // Quick start, slow end (corrections) -} easing_mode_t; - //--------------------------------------------------------------------+ // Humanization Modes //--------------------------------------------------------------------+ @@ -87,9 +88,8 @@ typedef struct { int32_t x_remaining_fp; // Remaining X to inject int32_t y_remaining_fp; // Remaining Y to inject uint8_t frames_left; // Frames remaining for this movement - uint8_t total_frames; // Total frames for this movement (for easing calc) + uint8_t total_frames; // Total frames for this movement (for linear progress calc) inject_mode_t mode; // Injection mode - easing_mode_t easing; // Easing curve to apply bool active; // Is this entry in use? uint8_t onset_delay; // Frames to wait before starting delivery (onset jitter) @@ -143,6 +143,12 @@ typedef struct { // Configuration int16_t max_per_frame; bool velocity_matching_enabled; + + // Velocity IIR filter state (output-stage smoothing) + int32_t filtered_vx_fp; // Current filtered velocity X (16.16) + int32_t filtered_vy_fp; // Current filtered velocity Y (16.16) + int32_t prev_raw_vx_fp; // Previous raw velocity X for acceleration calc + int32_t prev_raw_vy_fp; // Previous raw velocity Y for acceleration calc // Humanization settings struct { @@ -157,6 +163,10 @@ typedef struct { int32_t accum_clamp_fp; // Max accumulator magnitude (mode-dependent) uint8_t onset_jitter_min; // Min onset delay frames uint8_t onset_jitter_max; // Max onset delay frames + // Velocity IIR filter parameters + int32_t vel_filter_alpha_min_fp; // Min alpha (responsive, during high accel) ~0.3 + int32_t vel_filter_alpha_max_fp; // Max alpha (smooth, steady state) ~0.85 + int32_t vel_filter_accel_sens_fp; // Acceleration sensitivity ~0.15 } humanization; } smooth_injection_state_t; @@ -205,7 +215,7 @@ void smooth_record_physical_movement(int16_t x, int16_t y); * @param out_x Output X movement for this frame * @param out_y Output Y movement for this frame */ -void smooth_process_frame(int8_t *out_x, int8_t *out_y); +void smooth_process_frame(int16_t *out_x, int16_t *out_y); /** * Get current average velocity (for velocity-matched injection) diff --git a/tusb_config.h b/tusb_config.h index a34767d..32a6c69 100644 --- a/tusb_config.h +++ b/tusb_config.h @@ -81,7 +81,11 @@ #endif //------------- CLASS -------------// -#define CFG_TUD_HID 1 +// Support up to 4 HID interfaces for faithful multi-interface device mirroring. +// Gaming mice expose 2-4 HID interfaces (mouse, keyboard-macros, vendor). +// We mirror all of them on the device side so the downstream PC sees an +// identical interface layout. +#define CFG_TUD_HID 4 // HID buffer size - should be sufficient to hold ID (if any) + Data #define CFG_TUD_HID_EP_BUFSIZE 64 diff --git a/usb_hid.c b/usb_hid.c index 4bb9454..a271388 100644 --- a/usb_hid.c +++ b/usb_hid.c @@ -165,10 +165,118 @@ static struct { bool valid; } host_config_info = { .bmAttributes = TUSB_DESC_CONFIG_ATT_REMOTE_WAKEUP, .bMaxPower = USB_CONFIG_POWER_MA / 2, .bInterfaceProtocol = HID_ITF_PROTOCOL_NONE, .bInterfaceSubClass = 0, .wMaxPacketSize = CFG_TUD_HID_EP_BUFSIZE, .bInterval = HID_POLLING_INTERVAL_MS, .valid = false }; -// Runtime configuration descriptor buffer (mutable so we can patch it) -static uint8_t desc_configuration_runtime[TUD_CONFIG_DESC_LEN + TUD_HID_DESC_LEN]; +// Runtime configuration descriptor buffer (large enough for multi-interface) +static uint8_t desc_configuration_runtime[DESC_CONFIG_RUNTIME_MAX]; static bool desc_config_runtime_valid = false; +//--------------------------------------------------------------------+ +// Multi-interface mirroring infrastructure +//--------------------------------------------------------------------+ + +// Per-interface state captured from host device for faithful mirroring. +// Gaming mice expose 2-4 HID interfaces (mouse, keyboard-macros, vendor). +// We capture all of them and present matching interfaces on the device side. +typedef struct { + // Interface properties (from host config descriptor) + uint8_t itf_subclass; + uint8_t itf_protocol; + uint16_t ep_in_max_packet; + uint8_t ep_in_interval; + bool has_ep_out; + uint16_t ep_out_max_packet; + uint8_t ep_out_interval; + + // Runtime state (populated during tuh_hid_mount_cb) + uint8_t host_dev_addr; + uint8_t host_instance; + bool is_mouse; // Interface we inject mouse/keyboard/consumer into + + // HID report descriptor (non-mouse only; mouse uses desc_hid_report_runtime) + uint8_t report_desc[MIRROR_ITF_DESC_MAX]; + uint16_t report_desc_len; + + bool active; +} mirrored_interface_t; + +static mirrored_interface_t mirrored_itfs[MAX_DEVICE_HID_INTERFACES]; +static uint8_t mirrored_itf_count = 0; // Active mirrored interfaces +static uint8_t expected_hid_itf_count = 0; // Expected from config descriptor +static uint8_t mounted_hid_itf_count = 0; // Mounted so far + +// Which device-side HID instance carries the composite descriptor (keyboard + +// mouse + consumer). All mouse/keyboard/consumer reports must be sent on this +// instance. Defaults to 0 for single-interface mode. +static uint8_t mouse_device_instance = 0; + +// Vendor report passthrough queue (Core1 producer → Core0 consumer). +// When the host mouse sends vendor reports (e.g. Logitech HID++, Razer), +// Core1 queues them here and Core0 drains them via tud_hid_report(). +#define VENDOR_QUEUE_SIZE 8 +#define VENDOR_QUEUE_MASK (VENDOR_QUEUE_SIZE - 1) +#define VENDOR_REPORT_MAX_LEN 64 + +typedef struct { + uint8_t device_instance; // Which device-side HID instance to send on + uint8_t report_id; + uint8_t data[VENDOR_REPORT_MAX_LEN]; + uint8_t len; +} vendor_report_entry_t; + +static struct { + vendor_report_entry_t entries[VENDOR_QUEUE_SIZE]; + volatile uint8_t head; // Written by Core1 (producer) + volatile uint8_t tail; // Read by Core0 (consumer) +} vendor_fwd_queue; + +// SET_REPORT passthrough queue (Core0 producer → Core1 consumer). +// When the downstream PC sends SET_REPORT to vendor interfaces, Core0 +// queues them here and Core1 forwards to the real mouse. +typedef struct { + uint8_t host_dev_addr; + uint8_t host_instance; + uint8_t report_id; + uint8_t report_type; + uint8_t data[VENDOR_REPORT_MAX_LEN]; + uint8_t len; +} set_report_entry_t; + +static struct { + set_report_entry_t entries[VENDOR_QUEUE_SIZE]; + volatile uint8_t head; // Written by Core0 (producer) + volatile uint8_t tail; // Read by Core1 (consumer) +} set_report_queue; + +// Extended string descriptor cache. +// Gaming mice may use string indices beyond the standard 1-3 (manufacturer, +// product, serial). Interface strings, HID class strings, etc. +#define MAX_CACHED_STRINGS 8 +#define CACHED_STRING_MAX_LEN 64 + +typedef struct { + uint8_t index; + char str[CACHED_STRING_MAX_LEN]; + bool valid; +} cached_string_t; + +static cached_string_t extra_strings[MAX_CACHED_STRINGS]; +static uint8_t extra_string_count = 0; +static uint8_t max_string_index_seen = 3; // Track highest string index from host + +// GET_REPORT cache: stores the last received report per (instance, report_id) +// so that tud_hid_get_report_cb can respond to the host (macOS IOKit sends +// GET_REPORT during device open to verify responsiveness). +// Written by Core1 (in queue_vendor_report), read by Core0 (in get_report_cb). +#define REPORT_CACHE_SLOTS_PER_ITF 8 + +typedef struct { + uint8_t report_id; + uint8_t data[VENDOR_REPORT_MAX_LEN]; + uint8_t len; + bool valid; +} cached_report_t; + +static cached_report_t report_cache[MAX_DEVICE_HID_INTERFACES][REPORT_CACHE_SLOTS_PER_ITF]; + // Function to fetch string descriptors from attached device static void fetch_device_string_descriptors(uint8_t dev_addr) { // Reset string descriptors @@ -434,6 +542,16 @@ static uint8_t host_mouse_report_id = 0; // incoming reports with kmbox/smooth deltas injected in-place. // // Layout populated by parse_mouse_report_layout() during tuh_hid_mount_cb. + +// Fast-path classification for forward_raw_mouse_report(): +// Most gaming mice use byte-aligned 8-bit or 16-bit XY. Classifying the +// layout at parse time lets the hot path skip complex bitwise extraction. +typedef enum { + LAYOUT_GENERIC, // Arbitrary bit-width / non-aligned — full extraction needed + LAYOUT_FAST_8BIT, // buttons[1] + X[i8] + Y[i8] + optional wheel/pan — all byte-aligned + LAYOUT_FAST_16BIT, // buttons[1-2] + X[i16 LE] + Y[i16 LE] — all byte-aligned +} layout_class_t; + typedef struct { // Total expected report size (excluding report-ID prefix byte) uint8_t report_size; @@ -471,6 +589,7 @@ typedef struct { bool has_report_id; bool valid; // true once successfully parsed + layout_class_t layout_class; // fast-path classification (set by classify_layout) } mouse_report_layout_t; static mouse_report_layout_t host_mouse_layout = { .valid = false }; @@ -516,7 +635,7 @@ static void reset_device_string_descriptors(void) { memset(attached_serial, 0, sizeof(attached_serial)); string_descriptors_fetched = false; attached_has_serial = false; - + // Reset cloned descriptor state host_device_info.valid = false; host_config_info.valid = false; @@ -525,11 +644,28 @@ static void reset_device_string_descriptors(void) { host_mouse_has_report_id = false; host_mouse_report_id = 0; cloned_dev_addr = 0; - + + // Reset multi-interface mirroring state + mirrored_itf_count = 0; + expected_hid_itf_count = 0; + mounted_hid_itf_count = 0; + mouse_device_instance = 0; + memset(mirrored_itfs, 0, sizeof(mirrored_itfs)); + + // Reset vendor report queues and GET_REPORT cache + vendor_fwd_queue.head = vendor_fwd_queue.tail = 0; + set_report_queue.head = set_report_queue.tail = 0; + memset(report_cache, 0, sizeof(report_cache)); + + // Reset extra string descriptor cache + extra_string_count = 0; + max_string_index_seen = 3; + memset(extra_strings, 0, sizeof(extra_strings)); + // Reset runtime report IDs to defaults runtime_kbd_report_id = REPORT_ID_KEYBOARD; runtime_consumer_report_id = REPORT_ID_CONSUMER_CONTROL; - + // Rebuild config descriptor with defaults build_runtime_hid_report_with_mouse(NULL, 0); rebuild_configuration_descriptor(); @@ -1248,6 +1384,17 @@ bool usb_hid_init(void) // Initialize per-instance HID tracking memset(hid_instances, 0, sizeof(hid_instances)); + // Initialize multi-interface mirroring state + memset(mirrored_itfs, 0, sizeof(mirrored_itfs)); + mirrored_itf_count = 0; + expected_hid_itf_count = 0; + mounted_hid_itf_count = 0; + mouse_device_instance = 0; + vendor_fwd_queue.head = vendor_fwd_queue.tail = 0; + set_report_queue.head = set_report_queue.tail = 0; + memset(report_cache, 0, sizeof(report_cache)); + extra_string_count = 0; + // Seed output-stage PRNG from hardware TRNG hid_rng_seed(get_rand_32()); @@ -1364,13 +1511,13 @@ static bool __not_in_flash_func(process_keyboard_report_internal)(const hid_keyb // CRITICAL FIX: Check readiness before attempting to send // If endpoint is busy, return true anyway to avoid blocking the HID report pipeline // The keyboard state will be sent with the next available opportunity - if (!tud_hid_ready()) + if (!tud_hid_n_ready(mouse_device_instance)) { return true; // Endpoint busy, continue processing without blocking } // Fast path: send report immediately if endpoint is ready - bool success = tud_hid_report(runtime_kbd_report_id, report, sizeof(hid_keyboard_report_t)); + bool success = tud_hid_n_report(mouse_device_instance, runtime_kbd_report_id, report, sizeof(hid_keyboard_report_t)); if (success) { // Skip error counter reset for performance @@ -1447,6 +1594,26 @@ static inline void build_raw_mouse_report(uint8_t *buf, uint8_t sz, buf[L->pan_offset] = (uint8_t)pan; } +// Classify a parsed layout for fast-path dispatch in forward_raw_mouse_report(). +// Called once after parse (+ any fixups), not on the hot path. +static void classify_mouse_layout(mouse_report_layout_t *L) { + if (!L->valid) { L->layout_class = LAYOUT_GENERIC; return; } + + // Check byte-alignment: all axes must start on byte boundary + bool x_aligned = (L->x_bit_in_byte == 0); + bool y_aligned = (L->y_bit_in_byte == 0); + + if (x_aligned && y_aligned && L->x_bits == 16 && L->y_bits == 16 && + L->x_is_16bit && L->y_is_16bit && L->buttons_bits <= 8) { + L->layout_class = LAYOUT_FAST_16BIT; + } else if (x_aligned && y_aligned && L->x_bits == 8 && L->y_bits == 8 && + !L->x_is_16bit && !L->y_is_16bit && L->buttons_bits <= 8) { + L->layout_class = LAYOUT_FAST_8BIT; + } else { + L->layout_class = LAYOUT_GENERIC; + } +} + /** * Core1 accumulate-only mouse report handler. * @@ -1461,11 +1628,36 @@ static void __not_in_flash_func(forward_raw_mouse_report)(const uint8_t *raw, ui { const mouse_report_layout_t *L = &host_mouse_layout; - // --- Extract physical movement from the raw report --- int16_t phys_x = 0, phys_y = 0; int8_t phys_wheel = 0, phys_pan = 0; uint8_t phys_buttons = 0; + // --- Fast paths for common byte-aligned layouts (95%+ of gaming mice) --- + // Avoids all the bitwise extraction, bounds checks, and branches below. + if (__builtin_expect(L->layout_class == LAYOUT_FAST_16BIT, 1)) { + // 16-bit XY, byte-aligned, <=8-bit buttons + if (__builtin_expect(raw_len >= L->y_offset + 2, 1)) { + phys_buttons = raw[L->buttons_offset] & ((L->buttons_bits >= 8) ? 0xFF : ((1u << L->buttons_bits) - 1)); + phys_x = (int16_t)(raw[L->x_offset] | (raw[L->x_offset + 1] << 8)); + phys_y = (int16_t)(raw[L->y_offset] | (raw[L->y_offset + 1] << 8)); + if (L->has_wheel && L->wheel_offset < raw_len) phys_wheel = (int8_t)raw[L->wheel_offset]; + if (L->has_pan && L->pan_offset < raw_len) phys_pan = (int8_t)raw[L->pan_offset]; + goto accumulate; + } + } else if (L->layout_class == LAYOUT_FAST_8BIT) { + // 8-bit XY, byte-aligned, <=8-bit buttons + if (__builtin_expect(raw_len >= L->y_offset + 1, 1)) { + phys_buttons = raw[L->buttons_offset] & ((L->buttons_bits >= 8) ? 0xFF : ((1u << L->buttons_bits) - 1)); + phys_x = (int8_t)raw[L->x_offset]; + phys_y = (int8_t)raw[L->y_offset]; + if (L->has_wheel && L->wheel_offset < raw_len) phys_wheel = (int8_t)raw[L->wheel_offset]; + if (L->has_pan && L->pan_offset < raw_len) phys_pan = (int8_t)raw[L->pan_offset]; + goto accumulate; + } + } + + // --- Generic path: arbitrary bit-width and non-aligned fields --- + // Extract buttons (handle both 8-bit and 16-bit button fields) if (L->buttons_offset < raw_len) { if (L->buttons_bits > 8 && L->buttons_offset + 1 < raw_len) { @@ -1533,6 +1725,7 @@ static void __not_in_flash_func(forward_raw_mouse_report)(const uint8_t *raw, ui phys_pan = (int8_t)raw[L->pan_offset]; } +accumulate: // --- Accumulate into shared state (spinlock-protected inside each call) --- kmbox_update_physical_buttons(phys_buttons & 0x1F); @@ -1600,7 +1793,7 @@ void __not_in_flash_func(process_kbd_report)(const hid_keyboard_report_t *report } static uint32_t activity_counter = 0; - if (++activity_counter % KEYBOARD_ACTIVITY_THROTTLE == 0) + if ((++activity_counter & KEYBOARD_ACTIVITY_MASK) == 0) { neopixel_trigger_activity_flash_color(COLOR_KEYBOARD_ACTIVITY); } @@ -1620,7 +1813,7 @@ void __not_in_flash_func(process_mouse_report)(const hid_mouse_report_t *report) } static uint32_t activity_counter = 0; - if (++activity_counter % MOUSE_ACTIVITY_THROTTLE == 0) + if ((++activity_counter & MOUSE_ACTIVITY_MASK) == 0) { neopixel_trigger_activity_flash_color(0x000000FF); // Blue for mouse activity } @@ -1711,31 +1904,29 @@ void hid_device_task(void) // ARCHITECTURE: Core0 is the ONLY core that calls tud_hid_report(). // Core1 (physical mouse callbacks) accumulates into kmbox accumulators // and sets was_active=true. We drain everything here. - if (tud_hid_ready()) + if (tud_hid_n_ready(mouse_device_instance)) { - bool has_kmbox = kmbox_has_pending_movement(); + // Cache humanization mode for this frame — avoid 3 function calls per frame + const humanization_mode_t frame_human_mode = smooth_get_humanization_mode(); + + // Atomic check-and-drain: single spinlock instead of separate + // kmbox_has_pending_movement() + kmbox_get_mouse_report_16() (was 2 spinlock roundtrips) + uint8_t buttons; + int16_t x, y; + int8_t wheel, pan; + bool has_kmbox = kmbox_try_drain_mouse_16(last_sent_buttons, + &buttons, &x, &y, &wheel, &pan); bool has_smooth = smooth_has_pending(); - // Cheaply read current button byte without draining accumulators - uint8_t current_buttons = kmbox_get_current_buttons(); - bool buttons_changed = (current_buttons != last_sent_buttons); + bool buttons_changed = (buttons != last_sent_buttons); bool has_pending = has_kmbox || has_smooth || buttons_changed; if (!has_pending) goto check_idle; - // Drain accumulators (spinlock-protected, safe from both cores) - uint8_t buttons; - int16_t x, y; - int8_t wheel, pan; - kmbox_get_mouse_report_16(&buttons, &x, &y, &wheel, &pan); - - // Process smooth injection + // Process smooth injection (int16_t for high-DPI support) int16_t smooth_x = 0, smooth_y = 0; if (has_smooth) { - int8_t sx8 = 0, sy8 = 0; - smooth_process_frame(&sx8, &sy8); - smooth_x = sx8; - smooth_y = sy8; + smooth_process_frame(&smooth_x, &smooth_y); x += smooth_x; y += smooth_y; } @@ -1756,7 +1947,7 @@ void hid_device_task(void) // All output-stage noise is scaled by movement magnitude to prevent // overwhelming low-speed signals. At 1-2 counts, ±1 noise is 50-100% // perturbation, creating chaotic scribble. - if (smooth_get_humanization_mode() != HUMANIZATION_OFF && + if (frame_human_mode != HUMANIZATION_OFF && (x != 0 || y != 0)) { float out_mag = sqrtf((float)x * x + (float)y * y); float noise_gate = fminf(1.0f, out_mag / 4.0f); // ramp 0→1 over [0,4]px @@ -1790,7 +1981,7 @@ void hid_device_task(void) uint8_t raw[16]; build_raw_mouse_report(raw, sizeof(raw), &output_mouse_layout_16bit, buttons, x, y, wheel, pan); - tud_hid_report(REPORT_ID_MOUSE, raw, output_mouse_layout_16bit.report_size); + tud_hid_n_report(mouse_device_instance, REPORT_ID_MOUSE, raw, output_mouse_layout_16bit.report_size); } else if (host_mouse_layout.valid && host_mouse_desc_len > 0) { uint8_t raw[64]; uint8_t sz = host_mouse_layout.report_size; @@ -1800,12 +1991,12 @@ void hid_device_task(void) buttons, x, y, wheel, pan); uint8_t rid = host_mouse_layout.has_report_id ? host_mouse_layout.mouse_report_id : REPORT_ID_MOUSE; - tud_hid_report(rid, raw, sz); + tud_hid_n_report(mouse_device_instance, rid, raw, sz); } else { // Clamp to int8 for standard HID mouse report int8_t cx = (x > 127) ? 127 : ((x < -128) ? -128 : (int8_t)x); int8_t cy = (y > 127) ? 127 : ((y < -128) ? -128 : (int8_t)y); - tud_hid_mouse_report(REPORT_ID_MOUSE, buttons, cx, cy, wheel, pan); + tud_hid_n_mouse_report(mouse_device_instance, REPORT_ID_MOUSE, buttons, cx, cy, wheel, pan); } last_sent_buttons = buttons; was_active = true; @@ -1814,7 +2005,7 @@ void hid_device_task(void) // Always jitter when humanization is active — perfectly regular 1ms // intervals are a fingerprint regardless of physical mouse state. // Real USB polling has crystal oscillator drift + OS scheduling jitter. - if (smooth_get_humanization_mode() != HUMANIZATION_OFF) { + if (frame_human_mode != HUMANIZATION_OFF) { // Gaussian jitter, stddev ~350us → CV ≈ 0.35 on 1000us base. // Smaller than before to avoid excessive clamping; range is // roughly ±700us (95th percentile), keeping interval in [500, 2000]. @@ -1830,7 +2021,7 @@ void hid_device_task(void) // --- Active → idle edge: send one final zero-delta stop report --- // Real mice send a last report with zero deltas (confirming the stop) // before they begin NAKing idle polls. Mirror that behavior here. - if (was_active && tud_hid_ready()) + if (was_active && tud_hid_n_ready(mouse_device_instance)) { uint8_t current_buttons = kmbox_get_current_buttons(); if (host_mouse_layout.valid && host_mouse_desc_len > 0) { @@ -1842,9 +2033,9 @@ void hid_device_task(void) current_buttons, 0, 0, 0, 0); uint8_t rid = host_mouse_layout.has_report_id ? host_mouse_layout.mouse_report_id : REPORT_ID_MOUSE; - tud_hid_report(rid, raw, sz); + tud_hid_n_report(mouse_device_instance, rid, raw, sz); } else { - tud_hid_mouse_report(REPORT_ID_MOUSE, current_buttons, 0, 0, 0, 0); + tud_hid_n_mouse_report(mouse_device_instance, REPORT_ID_MOUSE, current_buttons, 0, 0, 0, 0); } last_sent_buttons = current_buttons; was_active = false; @@ -1863,6 +2054,19 @@ void hid_device_task(void) { send_hid_report(REPORT_ID_MOUSE); } + + // --- Drain vendor report queue (Core1 → Core0 passthrough) --- + // Forward vendor/non-mouse reports from the host device to the downstream PC. + // This enables software like Logitech G Hub / Razer Synapse to communicate + // through the proxy for battery status, DPI changes, lighting, etc. + while (vendor_fwd_queue.tail != vendor_fwd_queue.head) { + vendor_report_entry_t *e = &vendor_fwd_queue.entries[vendor_fwd_queue.tail]; + if (e->device_instance < CFG_TUD_HID && tud_hid_n_ready(e->device_instance)) { + tud_hid_n_report(e->device_instance, e->report_id, e->data, e->len); + } + __dmb(); + vendor_fwd_queue.tail = (vendor_fwd_queue.tail + 1) & VENDOR_QUEUE_MASK; + } } void send_hid_report(uint8_t report_id) @@ -1891,11 +2095,11 @@ void send_hid_report(uint8_t report_id) if (!connection_state.keyboard_connected) { // Check device readiness before each report - if (tud_hid_ready()) + if (tud_hid_n_ready(mouse_device_instance)) { // Use static array to avoid stack allocation overhead static const uint8_t empty_keycode[HID_KEYBOARD_KEYCODE_COUNT] = {0}; - tud_hid_keyboard_report(runtime_kbd_report_id, 0, empty_keycode); + tud_hid_n_keyboard_report(mouse_device_instance, runtime_kbd_report_id, 0, empty_keycode); } } break; @@ -1905,7 +2109,7 @@ void send_hid_report(uint8_t report_id) if (!connection_state.mouse_connected) { // Check device readiness before each report - if (tud_hid_ready()) + if (tud_hid_n_ready(mouse_device_instance)) { static bool prev_button_state = true; // true = not pressed (active low) bool current_button_state = gpio_get(PIN_BUTTON); @@ -1913,14 +2117,14 @@ void send_hid_report(uint8_t report_id) if (!current_button_state) { // button pressed (active low) // Mouse move up (negative Y direction) - tud_hid_mouse_report(REPORT_ID_MOUSE, MOUSE_BUTTON_NONE, + tud_hid_n_mouse_report(mouse_device_instance, REPORT_ID_MOUSE, MOUSE_BUTTON_NONE, MOUSE_NO_MOVEMENT, MOUSE_BUTTON_MOVEMENT_DELTA, MOUSE_NO_MOVEMENT, MOUSE_NO_MOVEMENT); } else if (prev_button_state != current_button_state) { // Send stop movement when button is released - tud_hid_mouse_report(REPORT_ID_MOUSE, MOUSE_BUTTON_NONE, + tud_hid_n_mouse_report(mouse_device_instance, REPORT_ID_MOUSE, MOUSE_BUTTON_NONE, MOUSE_NO_MOVEMENT, MOUSE_NO_MOVEMENT, MOUSE_NO_MOVEMENT, MOUSE_NO_MOVEMENT); } @@ -1933,10 +2137,10 @@ void send_hid_report(uint8_t report_id) case REPORT_ID_CONSUMER_CONTROL: { // CRITICAL: Check device readiness before each report - if (tud_hid_ready()) + if (tud_hid_n_ready(mouse_device_instance)) { static const uint16_t empty_key = 0; - tud_hid_report(runtime_consumer_report_id, &empty_key, HID_CONSUMER_CONTROL_SIZE); + tud_hid_n_report(mouse_device_instance, runtime_consumer_report_id, &empty_key, HID_CONSUMER_CONTROL_SIZE); } break; } @@ -1948,8 +2152,18 @@ void send_hid_report(uint8_t report_id) void hid_host_task(void) { - // This function can be called from core0 if needed for additional host processing - // The main host task runs on core1 in PIOKMbox.c + // Drain SET_REPORT passthrough queue (Core0 → Core1). + // Forward vendor SET_REPORT requests from the downstream PC to the real mouse. + // Process at most 1 per call — tuh_hid_set_report() may block on USB + // control transfer, and draining the full queue could stall Core1's PIO USB. + if (set_report_queue.tail != set_report_queue.head) { + set_report_entry_t *e = &set_report_queue.entries[set_report_queue.tail]; + tuh_hid_set_report(e->host_dev_addr, e->host_instance, + e->report_id, e->report_type, + (void*)e->data, e->len); + __dmb(); + set_report_queue.tail = (set_report_queue.tail + 1) & VENDOR_QUEUE_MASK; + } } // Device callbacks with improved error handling @@ -2016,24 +2230,25 @@ void tuh_umount_cb(uint8_t dev_addr) neopixel_update_status(); } -// HID host callbacks with improved validation +// HID host callbacks — multi-interface mirroring with vendor report passthrough void tuh_hid_mount_cb(uint8_t dev_addr, uint8_t instance, const uint8_t *desc_report, uint16_t desc_len) { uint16_t vid, pid; tuh_vid_pid_get(dev_addr, &vid, &pid); // === DEVICE-LEVEL CLONING (once per physical device) === - // Composite devices (e.g. Razer Basilisk V3 = 4 HID interfaces) trigger - // tuh_hid_mount_cb once per interface. Device/config descriptors and strings - // are device-level, so we only need to fetch them once. bool first_interface_for_device = (cloned_dev_addr != dev_addr); - + if (first_interface_for_device) { cloned_dev_addr = dev_addr; - + mounted_hid_itf_count = 0; + mirrored_itf_count = 0; + mouse_device_instance = 0; + memset(mirrored_itfs, 0, sizeof(mirrored_itfs)); + // Fetch string descriptors from the attached device fetch_device_string_descriptors(dev_addr); - + // Capture full device descriptor for identity cloning tusb_desc_device_t host_dev_desc; if (tuh_descriptor_get_device_sync(dev_addr, &host_dev_desc, sizeof(host_dev_desc)) == XFER_RESULT_SUCCESS) { @@ -2044,159 +2259,172 @@ void tuh_hid_mount_cb(uint8_t dev_addr, uint8_t instance, const uint8_t *desc_re host_device_info.bMaxPacketSize0 = host_dev_desc.bMaxPacketSize0; host_device_info.bcdDevice = host_dev_desc.bcdDevice; host_device_info.valid = true; + + // Track string indices from device descriptor + if (host_dev_desc.iManufacturer > max_string_index_seen) max_string_index_seen = host_dev_desc.iManufacturer; + if (host_dev_desc.iProduct > max_string_index_seen) max_string_index_seen = host_dev_desc.iProduct; + if (host_dev_desc.iSerialNumber > max_string_index_seen) max_string_index_seen = host_dev_desc.iSerialNumber; } - - // Capture configuration descriptor (contains all interfaces + endpoints) - uint8_t cfg_buf[256]; + + // Capture and parse full configuration descriptor (all interfaces + endpoints) + uint8_t cfg_buf[512]; if (tuh_descriptor_get_configuration_sync(dev_addr, 0, cfg_buf, sizeof(cfg_buf)) == XFER_RESULT_SUCCESS) { - parse_host_config_descriptor(cfg_buf, sizeof(cfg_buf)); + uint16_t cfg_total = cfg_buf[2] | (cfg_buf[3] << 8); + if (cfg_total > sizeof(cfg_buf)) cfg_total = sizeof(cfg_buf); + parse_host_config_descriptor(cfg_buf, cfg_total); + } + + // Fetch any extra string descriptors beyond the standard 3 (manufacturer, product, serial) + for (uint8_t si = 4; si <= max_string_index_seen && extra_string_count < MAX_CACHED_STRINGS; si++) { + uint16_t tmp_buf[48]; + memset(tmp_buf, 0, sizeof(tmp_buf)); + if (tuh_descriptor_get_string_sync(dev_addr, si, LANGUAGE_ID, tmp_buf, sizeof(tmp_buf)) == XFER_RESULT_SUCCESS) { + cached_string_t *cs = &extra_strings[extra_string_count]; + cs->index = si; + utf16_to_utf8(tmp_buf, sizeof(tmp_buf), cs->str, sizeof(cs->str)); + cs->valid = (strlen(cs->str) > 0); + if (cs->valid) extra_string_count++; + } } } + mounted_hid_itf_count++; + // === DETERMINE EFFECTIVE PROTOCOL === uint8_t const itf_protocol = tuh_hid_interface_protocol(dev_addr, instance); hid_instance_info_t *inst_info = alloc_hid_instance(dev_addr, instance); uint8_t effective_protocol = itf_protocol; - + if (itf_protocol == HID_ITF_PROTOCOL_NONE && desc_report != NULL && desc_len > 0) { - // Non-boot protocol device — detect usage from report descriptor effective_protocol = detect_usage_from_report_descriptor(desc_report, desc_len, inst_info); } - if (inst_info) { inst_info->effective_protocol = effective_protocol; } - // === MOUSE INTERFACE: Capture HID report descriptor === - // CRITICAL: Only capture the mouse descriptor from the mouse interface! - // Composite devices have multiple interfaces with different descriptors. - // e.g. Razer Basilisk V3: - // Interface 0: Boot Mouse (protocol=2), 79 byte desc — THIS IS THE MOUSE - // Interface 1: Keyboard (protocol=1), 159 byte desc — macro keys - // Interface 2: Keyboard (protocol=1), 61 byte desc — more keys - // Interface 3: Vendor (protocol=0), 22 byte desc — lighting control - // We MUST NOT let interfaces 1-3 overwrite the mouse descriptor from interface 0. bool is_mouse_interface = (effective_protocol == HID_ITF_PROTOCOL_MOUSE); - - if (is_mouse_interface && desc_report != NULL && desc_len > 0) { - // Capture this interface's HID report descriptor as the mouse descriptor. - // Logitech Unifying receivers include HID++ vendor collections (Report IDs - // 0x10/0x11, Usage Page 0xFF00) alongside the mouse collection. Strip - // these before storing — they confuse host HID drivers and can trigger - // installation of vendor filter drivers that fight the proxy. - size_t copy_len = desc_len; - if (copy_len > sizeof(host_mouse_desc)) - copy_len = sizeof(host_mouse_desc); - host_mouse_desc_len = strip_vendor_collections(desc_report, copy_len, - host_mouse_desc, sizeof(host_mouse_desc)); - - // Parse the mouse report layout to discover field offsets for raw forwarding - parse_mouse_report_layout(host_mouse_desc, host_mouse_desc_len, &host_mouse_layout); - - // CRITICAL FIX: Detect OS descriptor mismatches (macOS/Windows sometimes expose - // 8-bit descriptors for 16-bit mice). Check if descriptor claims 8-bit X/Y but - // report is large enough for 16-bit data (8+ bytes = 2 btn + 2 X + 2 Y + wheel + pan). - if (host_mouse_layout.valid && - host_mouse_layout.x_bits == 8 && - host_mouse_layout.y_bits == 8 && - host_mouse_layout.report_size >= 8 && - host_mouse_layout.buttons_bits >= 8) { - // Descriptor says 8-bit but structure suggests 16-bit - // Override to 16-bit layout (common with Logitech Lightspeed, etc.) - host_mouse_layout.x_bits = 16; - host_mouse_layout.y_bits = 16; - host_mouse_layout.x_is_16bit = true; - host_mouse_layout.y_is_16bit = true; - host_mouse_layout.report_size = 8; + + // === ALLOCATE MIRRORED INTERFACE SLOT === + // Each host HID interface gets a corresponding device-side interface. + uint8_t mirror_idx = mirrored_itf_count; + if (mirror_idx < MAX_DEVICE_HID_INTERFACES) { + mirrored_interface_t *mitf = &mirrored_itfs[mirror_idx]; + mitf->active = true; + mitf->host_dev_addr = dev_addr; + mitf->host_instance = instance; + mitf->is_mouse = is_mouse_interface; + + // Endpoint config was already parsed from config descriptor; + // the Nth HID interface in the config maps to mirrored_itfs[N]. + // (parse_host_config_descriptor pre-populated subclass/protocol/ep_*) + + // Track which device-side instance is the mouse (composite descriptor) + if (is_mouse_interface) { + mouse_device_instance = mirror_idx; } - - // Use the layout-parsed report ID (extracted from the mouse collection context) - // instead of blindly scanning for the first 0x85 tag - if (host_mouse_layout.valid && host_mouse_layout.has_report_id) { - host_mouse_has_report_id = true; - host_mouse_report_id = host_mouse_layout.mouse_report_id; - } else { - // Fallback: scan for any Report ID tag in the descriptor - host_mouse_has_report_id = false; - host_mouse_report_id = 0; - for (size_t i = 0; i + 1 < host_mouse_desc_len; ++i) { - if (host_mouse_desc[i] == 0x85) { - host_mouse_has_report_id = true; - host_mouse_report_id = host_mouse_desc[i + 1]; - break; + + if (is_mouse_interface && desc_report != NULL && desc_len > 0) { + // --- MOUSE INTERFACE --- + // Store full descriptor (including vendor collections) for the mouse interface. + size_t copy_len = desc_len; + if (copy_len > sizeof(host_mouse_desc)) copy_len = sizeof(host_mouse_desc); + memcpy(host_mouse_desc, desc_report, copy_len); + host_mouse_desc_len = copy_len; + + // Parse mouse report layout for raw forwarding + parse_mouse_report_layout(host_mouse_desc, host_mouse_desc_len, &host_mouse_layout); + + // Detect OS descriptor mismatches (8-bit desc for 16-bit mouse) + if (host_mouse_layout.valid && + host_mouse_layout.x_bits == 8 && host_mouse_layout.y_bits == 8 && + host_mouse_layout.report_size >= 8 && host_mouse_layout.buttons_bits >= 8) { + host_mouse_layout.x_bits = 16; + host_mouse_layout.y_bits = 16; + host_mouse_layout.x_is_16bit = true; + host_mouse_layout.y_is_16bit = true; + host_mouse_layout.report_size = 8; + } + + // Classify layout for fast-path dispatch (after all fixups) + classify_mouse_layout(&host_mouse_layout); + + // Extract mouse report ID + if (host_mouse_layout.valid && host_mouse_layout.has_report_id) { + host_mouse_has_report_id = true; + host_mouse_report_id = host_mouse_layout.mouse_report_id; + } else { + host_mouse_has_report_id = false; + host_mouse_report_id = 0; + for (size_t i = 0; i + 1 < host_mouse_desc_len; ++i) { + if (host_mouse_desc[i] == 0x85) { + host_mouse_has_report_id = true; + host_mouse_report_id = host_mouse_desc[i + 1]; + break; + } } } - } - - // Also update instance info with the correct mouse report ID - if (inst_info) { - inst_info->has_report_id = host_mouse_has_report_id; - inst_info->mouse_report_id = host_mouse_report_id; - } - // Build runtime HID report descriptor (keyboard + mouse + consumer) - build_runtime_hid_report_with_mouse(host_mouse_desc, host_mouse_desc_len); - rebuild_configuration_descriptor(); - - // CRITICAL: set_attached_device_vid_pid() triggers force_usb_reenumeration() - // which disconnects/reconnects the device stack. This MUST happen AFTER all - // descriptors are fully rebuilt. It also only triggers if VID/PID changed. - set_attached_device_vid_pid(vid, pid); - } - else if (first_interface_for_device && !is_mouse_interface) { - // First interface mounted but it's not the mouse — still need to set VID/PID - // so we present the correct identity. The mouse descriptor will be captured - // when the mouse interface mounts (if it exists). - // Don't overwrite host_mouse_desc or host_mouse_layout here! - - // If we haven't seen a mouse interface yet for this device, build - // descriptors with defaults. They'll be rebuilt when mouse mounts. - if (host_mouse_desc_len == 0) { - build_runtime_hid_report_with_mouse(NULL, 0); - rebuild_configuration_descriptor(); - set_attached_device_vid_pid(vid, pid); + if (inst_info) { + inst_info->has_report_id = host_mouse_has_report_id; + inst_info->mouse_report_id = host_mouse_report_id; + } + + // Build composite HID report descriptor (keyboard + mouse + consumer) + build_runtime_hid_report_with_mouse(host_mouse_desc, host_mouse_desc_len); + + } else if (desc_report != NULL && desc_len > 0) { + // --- NON-MOUSE INTERFACE (keyboard-macros, vendor, etc.) --- + // Store verbatim descriptor for faithful mirroring + size_t copy_len = desc_len; + if (copy_len > MIRROR_ITF_DESC_MAX) copy_len = MIRROR_ITF_DESC_MAX; + memcpy(mitf->report_desc, desc_report, copy_len); + mitf->report_desc_len = copy_len; } + + mirrored_itf_count++; } // Handle HID device connection using effective protocol handle_hid_device_connection(dev_addr, effective_protocol); - // Start receiving reports — but only from interfaces we actually want. - // - // Composite gaming mice (e.g. Razer Basilisk V3) expose multiple HID - // interfaces on a single dev_addr: - // Interface 0: Boot Mouse (protocol=2) — WE WANT THIS - // Interface 1: Keyboard (protocol=1) — macro keys, NOT a real keyboard - // Interface 2: Keyboard (protocol=1) — media keys, NOT a real keyboard - // Interface 3: Vendor (protocol=0) — lighting control - // - // If we receive reports from the non-mouse interfaces, their periodic - // status/idle reports get forwarded as keyboard input, causing garbage - // keypresses (e.g. '#' flood). - // - // Rule: Only receive from mouse interfaces, OR from keyboard/vendor - // interfaces on a DIFFERENT device (i.e. a standalone keyboard). + // === START RECEIVING REPORTS === + // Receive from ALL interfaces on the mouse device for vendor report passthrough. + // Standalone keyboards on a different device are also received. + // Filtering of composite macro-keyboard garbage happens in report_received_cb. bool should_receive = false; - + if (is_mouse_interface) { - // Always receive from mouse interfaces should_receive = true; } else if (dev_addr != connection_state.mouse_dev_addr) { - // This interface is on a different physical device than the mouse, - // so it's a standalone keyboard — receive its reports + // Standalone keyboard on different device + should_receive = true; + } else { + // Non-mouse interface on same device — receive for vendor report passthrough should_receive = true; } - // else: non-mouse interface on the same device as the mouse → skip if (should_receive) { if (!tuh_hid_receive_report(dev_addr, instance)) { neopixel_trigger_activity_flash_color(COLOR_USB_DISCONNECTION); - } else { - neopixel_update_status(); } - } else { - // Skip receiving from non-mouse interfaces on the same device } + + // === DEFERRED RE-ENUMERATION === + // Wait until all expected HID interfaces have mounted before triggering + // re-enumeration. This ensures the config descriptor includes ALL + // interfaces, not just the first one. + if (mounted_hid_itf_count >= expected_hid_itf_count) { + // All interfaces captured — build final descriptors + if (host_mouse_desc_len == 0) { + // No mouse interface found; use defaults + build_runtime_hid_report_with_mouse(NULL, 0); + } + rebuild_configuration_descriptor(); + + // Trigger re-enumeration (only fires if VID/PID actually changed) + set_attached_device_vid_pid(vid, pid); + } + neopixel_update_status(); } @@ -2307,6 +2535,61 @@ static void __not_in_flash_func(parse_and_forward_mouse_report)(const uint8_t *d process_mouse_report(&mouse_report_local); } +// Queue a vendor/non-mouse report for Core0 to send on the device side. +// Called from Core1 — must not call any tud_* functions. +static void __not_in_flash_func(queue_vendor_report)(uint8_t device_instance, uint8_t report_id, + const uint8_t *data, uint8_t data_len) +{ + uint8_t capped_len = (data_len > VENDOR_REPORT_MAX_LEN) ? VENDOR_REPORT_MAX_LEN : data_len; + + // Update GET_REPORT cache so tud_hid_get_report_cb can respond to macOS IOKit. + if (device_instance < MAX_DEVICE_HID_INTERFACES) { + cached_report_t *slots = report_cache[device_instance]; + int slot = -1; + int empty = -1; + for (int i = 0; i < REPORT_CACHE_SLOTS_PER_ITF; i++) { + if (slots[i].valid && slots[i].report_id == report_id) { slot = i; break; } + if (!slots[i].valid && empty < 0) empty = i; + } + if (slot < 0) slot = (empty >= 0) ? empty : 0; // Evict first slot if full + slots[slot].report_id = report_id; + slots[slot].len = capped_len; + memcpy(slots[slot].data, data, capped_len); + __dmb(); + slots[slot].valid = true; + } + + // Queue for Core0 to forward via interrupt IN endpoint + uint8_t next_head = (vendor_fwd_queue.head + 1) & VENDOR_QUEUE_MASK; + if (next_head == vendor_fwd_queue.tail) return; // Queue full, drop + + vendor_report_entry_t *e = &vendor_fwd_queue.entries[vendor_fwd_queue.head]; + e->device_instance = device_instance; + e->report_id = report_id; + e->len = capped_len; + memcpy(e->data, data, capped_len); + + __dmb(); // Ensure data written before head advances + vendor_fwd_queue.head = next_head; +} + +// Find which mirrored interface slot corresponds to a host (dev_addr, instance). +static mirrored_interface_t* find_mirrored_interface(uint8_t dev_addr, uint8_t instance) { + for (uint8_t i = 0; i < mirrored_itf_count; i++) { + if (mirrored_itfs[i].active && + mirrored_itfs[i].host_dev_addr == dev_addr && + mirrored_itfs[i].host_instance == instance) { + return &mirrored_itfs[i]; + } + } + return NULL; +} + +// Get the device-side instance index for a mirrored interface. +static uint8_t mirrored_device_instance(const mirrored_interface_t *mitf) { + return (uint8_t)(mitf - mirrored_itfs); +} + void __not_in_flash_func(tuh_hid_report_received_cb)(uint8_t dev_addr, uint8_t instance, const uint8_t *report, uint16_t len) { if (report == NULL || len == 0) @@ -2315,45 +2598,46 @@ void __not_in_flash_func(tuh_hid_report_received_cb)(uint8_t dev_addr, uint8_t i return; } - // Look up effective protocol from our per-instance tracking - // This handles non-boot-protocol devices (Logitech, gaming mice, etc.) + // Look up effective protocol from per-instance tracking uint8_t effective_protocol = tuh_hid_interface_protocol(dev_addr, instance); hid_instance_info_t *inst_info = find_hid_instance(dev_addr, instance); bool has_report_id = false; - uint8_t mouse_report_id = 0; - + uint8_t mouse_report_id_local = 0; + if (inst_info) { effective_protocol = inst_info->effective_protocol; has_report_id = inst_info->has_report_id; - mouse_report_id = inst_info->mouse_report_id; + mouse_report_id_local = inst_info->mouse_report_id; } - // Direct processing without extra copying for better performance - // SAFETY: Only forward keyboard reports from standalone keyboard devices, - // not from keyboard interfaces on composite mouse devices (e.g. Razer - // Basilisk V3 exposes macro/media key interfaces that send garbage data). switch (effective_protocol) { case HID_ITF_PROTOCOL_KEYBOARD: { - // Only forward if this is a standalone keyboard (different device than mouse) + // Only forward keyboard from standalone devices (different device than mouse) if (dev_addr == connection_state.mouse_dev_addr) { - // This keyboard interface is on the same device as the mouse — - // it's a composite gaming mouse's macro/media keys, skip it + // Composite gaming mouse macro/media keys — queue for vendor passthrough + // instead of interpreting as keyboard input + mirrored_interface_t *mitf = find_mirrored_interface(dev_addr, instance); + if (mitf) { + uint8_t dev_inst = mirrored_device_instance(mitf); + uint8_t rid = (has_report_id && len > 0) ? report[0] : 0; + const uint8_t *data = (has_report_id && len > 0) ? report + 1 : report; + uint8_t dlen = (has_report_id && len > 0) ? (uint8_t)(len - 1) : (uint8_t)len; + queue_vendor_report(dev_inst, rid, data, dlen); + } break; } - + const uint8_t *kbd_data = report; uint16_t kbd_len = len; - - // Strip report ID prefix if present + if (has_report_id && kbd_len > 0) { kbd_data++; kbd_len--; } - - if (kbd_len >= (int)sizeof(hid_keyboard_report_t)) - { + + if (kbd_len >= (int)sizeof(hid_keyboard_report_t)) { process_kbd_report((const hid_keyboard_report_t*)kbd_data); } } @@ -2363,26 +2647,40 @@ void __not_in_flash_func(tuh_hid_report_received_cb)(uint8_t dev_addr, uint8_t i { const uint8_t *mouse_data = report; uint16_t mouse_len = len; - - // For composite/report-ID devices, the first byte is the report ID - // We need to strip it before parsing the mouse data + if (has_report_id && mouse_len > 0) { uint8_t received_id = mouse_data[0]; - // Only process if this report ID matches the mouse report ID - if (received_id != mouse_report_id) { - // Not a mouse report from this composite device - skip + if (received_id != mouse_report_id_local) { + // Not the mouse report — this is a vendor report on the mouse + // interface (e.g. Logitech HID++ on same interface as mouse). + // Queue for passthrough to device side. + mirrored_interface_t *mitf = find_mirrored_interface(dev_addr, instance); + if (mitf) { + uint8_t dev_inst = mirrored_device_instance(mitf); + queue_vendor_report(dev_inst, received_id, mouse_data + 1, (uint8_t)(mouse_len - 1)); + } break; } mouse_data++; mouse_len--; } - + parse_and_forward_mouse_report(mouse_data, mouse_len); } break; default: - // Unknown HID protocol - ignore + { + // Unknown/vendor protocol — forward entire report for passthrough + mirrored_interface_t *mitf = find_mirrored_interface(dev_addr, instance); + if (mitf) { + uint8_t dev_inst = mirrored_device_instance(mitf); + uint8_t rid = (has_report_id && len > 0) ? report[0] : 0; + const uint8_t *data = (has_report_id && len > 0) ? report + 1 : report; + uint8_t dlen = (has_report_id && len > 0) ? (uint8_t)(len - 1) : (uint8_t)len; + queue_vendor_report(dev_inst, rid, data, dlen); + } + } break; } @@ -2393,25 +2691,35 @@ void __not_in_flash_func(tuh_hid_report_received_cb)(uint8_t dev_addr, uint8_t i // HID device callbacks with improved validation uint16_t tud_hid_get_report_cb(uint8_t instance, uint8_t report_id, hid_report_type_t report_type, uint8_t *buffer, uint16_t reqlen) { - (void)instance; - (void)report_id; (void)report_type; - (void)buffer; - (void)reqlen; - return 0; + + if (instance >= MAX_DEVICE_HID_INTERFACES || !buffer || reqlen == 0) return 0; + + // Look up cached report from the real device + cached_report_t *slots = report_cache[instance]; + __dmb(); // Ensure we see Core1's latest cache writes + for (int i = 0; i < REPORT_CACHE_SLOTS_PER_ITF; i++) { + if (slots[i].valid && slots[i].report_id == report_id) { + uint16_t copy_len = (slots[i].len < reqlen) ? slots[i].len : reqlen; + memcpy(buffer, slots[i].data, copy_len); + return copy_len; + } + } + + // No cached data yet — return zeros so the host sees the device as responsive. + // macOS IOKit sends GET_REPORT during device open; returning 0 (no data) causes + // "open failed". Returning a zeroed buffer keeps the open path happy. + uint16_t fill_len = (reqlen > VENDOR_REPORT_MAX_LEN) ? VENDOR_REPORT_MAX_LEN : reqlen; + memset(buffer, 0, fill_len); + return fill_len; } void tud_hid_set_report_cb(uint8_t instance, uint8_t report_id, hid_report_type_t report_type, const uint8_t *buffer, uint16_t bufsize) { - (void)instance; - + // Handle keyboard LED output reports (caps lock, etc.) if (report_type == HID_REPORT_TYPE_OUTPUT && report_id == runtime_kbd_report_id) { - // Validate buffer - if (buffer == NULL || bufsize < MIN_BUFFER_SIZE) - { - return; - } + if (buffer == NULL || bufsize < MIN_BUFFER_SIZE) return; uint8_t const kbd_leds = buffer[0]; bool new_caps_state = (kbd_leds & KEYBOARD_LED_CAPSLOCK) != 0; @@ -2419,9 +2727,29 @@ void tud_hid_set_report_cb(uint8_t instance, uint8_t report_id, hid_report_type_ if (new_caps_state != caps_lock_state) { caps_lock_state = new_caps_state; - // Indicate caps lock change with LED flash instead of console logging neopixel_trigger_caps_lock_flash(); } + return; + } + + // Forward all other SET_REPORT requests to the real mouse for vendor passthrough. + // This enables Logitech G Hub, Razer Synapse, etc. to configure DPI, lighting, + // macros, battery queries through the proxy. + if (buffer != NULL && bufsize > 0 && instance < mirrored_itf_count && mirrored_itfs[instance].active) { + mirrored_interface_t *mitf = &mirrored_itfs[instance]; + + uint8_t next_head = (set_report_queue.head + 1) & VENDOR_QUEUE_MASK; + if (next_head != set_report_queue.tail) { + set_report_entry_t *e = &set_report_queue.entries[set_report_queue.head]; + e->host_dev_addr = mitf->host_dev_addr; + e->host_instance = mitf->host_instance; + e->report_id = report_id; + e->report_type = (uint8_t)report_type; + e->len = (bufsize > VENDOR_REPORT_MAX_LEN) ? VENDOR_REPORT_MAX_LEN : (uint8_t)bufsize; + memcpy(e->data, buffer, e->len); + __dmb(); + set_report_queue.head = next_head; + } } } @@ -2586,16 +2914,28 @@ uint8_t const * tud_descriptor_device_cb(void) return (uint8_t const *)&desc_device; } -// HID Report Descriptor +// HID Report Descriptor — per-instance for multi-interface mirroring. +// Instance 0 is typically the mouse interface (composite: keyboard + mouse + consumer). +// Other instances return verbatim cloned descriptors from the host device. uint8_t const *tud_hid_descriptor_report_cb(uint8_t instance) { - (void)instance; - if (desc_hid_runtime_valid) - { + // Multi-interface mode: return the correct descriptor for each instance + if (mirrored_itf_count > 0 && instance < mirrored_itf_count && mirrored_itfs[instance].active) { + if (mirrored_itfs[instance].is_mouse) { + // Mouse interface returns composite descriptor (keyboard + mouse + consumer) + return desc_hid_runtime_valid ? desc_hid_report_runtime : desc_hid_report; + } + // Non-mouse interface returns verbatim host descriptor + if (mirrored_itfs[instance].report_desc_len > 0) { + return mirrored_itfs[instance].report_desc; + } + } + + // Single-interface fallback + if (instance == 0 && desc_hid_runtime_valid) { return desc_hid_report_runtime; } - // Fallback to static concatenation if runtime descriptor not ready - return desc_hid_report_runtime; // still points to buffer (may contain defaults) + return desc_hid_report; } // Configuration Descriptor - now dynamic for cloning @@ -2605,108 +2945,217 @@ enum ITF_NUM_TOTAL }; -// Offsets within the configuration descriptor for fields we patch -#define CFG_DESC_OFFSET_BMATTRIBUTES 7 -#define CFG_DESC_OFFSET_BMAXPOWER 8 -#define HID_ITF_OFFSET_SUBCLASS (TUD_CONFIG_DESC_LEN + 6) // bInterfaceSubClass -#define HID_ITF_OFFSET_PROTOCOL (TUD_CONFIG_DESC_LEN + 7) // bInterfaceProtocol -#define HID_DESC_OFFSET_REPORT_LEN_LO (TUD_CONFIG_DESC_LEN + 9 + 7) // wDescriptorLength low byte -#define HID_DESC_OFFSET_REPORT_LEN_HI (TUD_CONFIG_DESC_LEN + 9 + 8) // wDescriptorLength high byte -#define HID_EP_OFFSET_MAXPACKET_LO (TUD_CONFIG_DESC_LEN + 9 + 9 + 4) // wMaxPacketSize low byte -#define HID_EP_OFFSET_MAXPACKET_HI (TUD_CONFIG_DESC_LEN + 9 + 9 + 5) // wMaxPacketSize high byte -#define HID_EP_OFFSET_INTERVAL (TUD_CONFIG_DESC_LEN + 9 + 9 + 6) // bInterval - -// Build the configuration descriptor from current runtime state +// Helper: write one HID interface block (interface desc + HID desc + endpoint(s)) +// into a buffer. Returns number of bytes written. +static uint16_t write_hid_interface_desc(uint8_t *buf, uint16_t buf_max, + uint8_t itf_num, uint8_t subclass, + uint8_t protocol, uint16_t report_desc_len, + uint8_t ep_in_addr, uint16_t ep_in_size, + uint8_t ep_in_interval, + bool has_ep_out, uint8_t ep_out_interval) +{ + uint16_t pos = 0; + uint8_t num_eps = has_ep_out ? 2 : 1; + + // Interface descriptor (9 bytes) + if (pos + 9 > buf_max) return 0; + buf[pos++] = 9; + buf[pos++] = TUSB_DESC_INTERFACE; + buf[pos++] = itf_num; + buf[pos++] = 0; // bAlternateSetting + buf[pos++] = num_eps; + buf[pos++] = TUSB_CLASS_HID; + buf[pos++] = subclass; + buf[pos++] = protocol; + buf[pos++] = 0; // iInterface + + // HID descriptor (9 bytes) + if (pos + 9 > buf_max) return 0; + buf[pos++] = 9; + buf[pos++] = HID_DESC_TYPE_HID; + buf[pos++] = 0x11; // bcdHID low (1.11) + buf[pos++] = 0x01; // bcdHID high + buf[pos++] = 0; // bCountryCode + buf[pos++] = 1; // bNumDescriptors + buf[pos++] = HID_DESC_TYPE_REPORT; + buf[pos++] = (uint8_t)(report_desc_len & 0xFF); + buf[pos++] = (uint8_t)((report_desc_len >> 8) & 0xFF); + + // IN endpoint descriptor (7 bytes) + if (pos + 7 > buf_max) return 0; + buf[pos++] = 7; + buf[pos++] = TUSB_DESC_ENDPOINT; + buf[pos++] = ep_in_addr; + buf[pos++] = TUSB_XFER_INTERRUPT; + buf[pos++] = (uint8_t)(ep_in_size & 0xFF); + buf[pos++] = (uint8_t)((ep_in_size >> 8) & 0xFF); + buf[pos++] = ep_in_interval; + + // OUT endpoint descriptor (7 bytes, optional) + if (has_ep_out) { + if (pos + 7 > buf_max) return 0; + buf[pos++] = 7; + buf[pos++] = TUSB_DESC_ENDPOINT; + buf[pos++] = ep_in_addr & 0x0F; // Same EP number, OUT direction + buf[pos++] = TUSB_XFER_INTERRUPT; + buf[pos++] = (uint8_t)(ep_in_size & 0xFF); + buf[pos++] = (uint8_t)((ep_in_size >> 8) & 0xFF); + buf[pos++] = ep_out_interval; + } + + return pos; +} + +// Build the configuration descriptor from current runtime state. +// Supports 1-4 HID interfaces mirroring the host device's layout. static void rebuild_configuration_descriptor(void) { - // Determine actual report descriptor length - size_t report_desc_len = desc_hid_runtime_valid ? desc_hid_runtime_len : sizeof(desc_hid_report); - - // Clone ALL configuration descriptor fields from the host mouse. - // The goal is to present as the exact same device to the downstream PC. + uint8_t num_itfs = (mirrored_itf_count > 0) ? mirrored_itf_count : 1; + if (num_itfs > MAX_DEVICE_HID_INTERFACES) num_itfs = MAX_DEVICE_HID_INTERFACES; + uint8_t cfg_attributes = TU_BIT(7) | (host_config_info.valid ? host_config_info.bmAttributes : TUSB_DESC_CONFIG_ATT_REMOTE_WAKEUP); uint8_t cfg_max_power = host_config_info.valid ? host_config_info.bMaxPower : (USB_CONFIG_POWER_MA / 2); - uint8_t itf_sub_class = host_config_info.valid ? host_config_info.bInterfaceSubClass : 0; - uint8_t itf_protocol = host_config_info.valid ? host_config_info.bInterfaceProtocol : HID_ITF_PROTOCOL_NONE; - // CRITICAL: wMaxPacketSize must be our actual EP buffer size, NOT the host mouse's. - // We re-encode reports through tud_hid_mouse_report() / tud_hid_keyboard_report(), - // and our keyboard report (1 + 8 = 9 bytes) may exceed a small mouse EP size (e.g. 8). - // The downstream PC uses this to allocate its receive buffer — it must fit our largest report. - uint16_t ep_max_packet = CFG_TUD_HID_EP_BUFSIZE; - uint8_t ep_interval = host_config_info.valid ? host_config_info.bInterval : HID_POLLING_INTERVAL_MS; - - // Build using the TinyUSB macros as a base template, then patch - uint8_t template[] = { - TUD_CONFIG_DESCRIPTOR(1, ITF_NUM_TOTAL, 0, CONFIG_TOTAL_LEN, TUSB_DESC_CONFIG_ATT_REMOTE_WAKEUP, USB_CONFIG_POWER_MA), - TUD_HID_DESCRIPTOR(ITF_NUM_HID, 0, HID_ITF_PROTOCOL_NONE, sizeof(desc_hid_report), EPNUM_HID, CFG_TUD_HID_EP_BUFSIZE, HID_POLLING_INTERVAL_MS) - }; - - _Static_assert(sizeof(template) == TUD_CONFIG_DESC_LEN + TUD_HID_DESC_LEN, "config descriptor size mismatch"); - memcpy(desc_configuration_runtime, template, sizeof(template)); - - // Patch config descriptor fields - desc_configuration_runtime[CFG_DESC_OFFSET_BMATTRIBUTES] = cfg_attributes; - desc_configuration_runtime[CFG_DESC_OFFSET_BMAXPOWER] = cfg_max_power; - - // Patch HID interface descriptor fields - desc_configuration_runtime[HID_ITF_OFFSET_SUBCLASS] = itf_sub_class; - desc_configuration_runtime[HID_ITF_OFFSET_PROTOCOL] = itf_protocol; - - // Patch HID report descriptor length (critical — must match what tud_hid_descriptor_report_cb returns) - desc_configuration_runtime[HID_DESC_OFFSET_REPORT_LEN_LO] = (uint8_t)(report_desc_len & 0xFF); - desc_configuration_runtime[HID_DESC_OFFSET_REPORT_LEN_HI] = (uint8_t)((report_desc_len >> 8) & 0xFF); - - // Patch endpoint descriptor fields - desc_configuration_runtime[HID_EP_OFFSET_MAXPACKET_LO] = (uint8_t)(ep_max_packet & 0xFF); - desc_configuration_runtime[HID_EP_OFFSET_MAXPACKET_HI] = (uint8_t)((ep_max_packet >> 8) & 0xFF); - desc_configuration_runtime[HID_EP_OFFSET_INTERVAL] = ep_interval; - + + // Leave 9 bytes for config header, fill interfaces after + uint16_t pos = 9; + + for (uint8_t i = 0; i < num_itfs; i++) { + uint16_t report_len; + uint8_t subclass, protocol, ep_interval; + bool has_out = false; + uint8_t ep_out_interval_val = 1; + + if (mirrored_itf_count > 0 && i < mirrored_itf_count && mirrored_itfs[i].active) { + mirrored_interface_t *itf = &mirrored_itfs[i]; + if (itf->is_mouse) { + report_len = desc_hid_runtime_valid ? desc_hid_runtime_len : sizeof(desc_hid_report); + } else { + report_len = itf->report_desc_len; + } + subclass = itf->itf_subclass; + protocol = itf->itf_protocol; + ep_interval = itf->ep_in_interval ? itf->ep_in_interval : HID_POLLING_INTERVAL_MS; + has_out = itf->has_ep_out; + ep_out_interval_val = itf->ep_out_interval ? itf->ep_out_interval : ep_interval; + } else { + // Default single-interface fallback + report_len = desc_hid_runtime_valid ? desc_hid_runtime_len : sizeof(desc_hid_report); + subclass = host_config_info.valid ? host_config_info.bInterfaceSubClass : 0; + protocol = host_config_info.valid ? host_config_info.bInterfaceProtocol : HID_ITF_PROTOCOL_NONE; + ep_interval = host_config_info.valid ? host_config_info.bInterval : HID_POLLING_INTERVAL_MS; + } + + // CRITICAL: wMaxPacketSize must be our buffer size, not the host's. + // The downstream PC uses this to allocate receive buffers. + uint16_t written = write_hid_interface_desc( + &desc_configuration_runtime[pos], DESC_CONFIG_RUNTIME_MAX - pos, + i, // bInterfaceNumber + subclass, protocol, + report_len, + 0x81 + i, // EP IN address: 0x81, 0x82, 0x83, 0x84 + CFG_TUD_HID_EP_BUFSIZE, + ep_interval, + has_out, ep_out_interval_val + ); + if (written == 0) break; // Buffer full + pos += written; + } + + // Fill config descriptor header (first 9 bytes) + desc_configuration_runtime[0] = 9; // bLength + desc_configuration_runtime[1] = TUSB_DESC_CONFIGURATION; // bDescriptorType + desc_configuration_runtime[2] = (uint8_t)(pos & 0xFF); // wTotalLength low + desc_configuration_runtime[3] = (uint8_t)((pos >> 8) & 0xFF); // wTotalLength high + desc_configuration_runtime[4] = num_itfs; // bNumInterfaces + desc_configuration_runtime[5] = 1; // bConfigurationValue + desc_configuration_runtime[6] = 0; // iConfiguration + desc_configuration_runtime[7] = cfg_attributes; // bmAttributes + desc_configuration_runtime[8] = cfg_max_power; // bMaxPower + desc_config_runtime_valid = true; } -// Parse host configuration descriptor to extract endpoint size, interval, power, etc. +// Parse host configuration descriptor to extract ALL HID interfaces and their endpoints. +// Populates mirrored_itfs[] with per-interface endpoint configs, and tracks string indices. static void parse_host_config_descriptor(const uint8_t *cfg_desc, uint16_t cfg_len) { if (!cfg_desc || cfg_len < TUD_CONFIG_DESC_LEN) return; - + // Extract config-level fields host_config_info.bmAttributes = cfg_desc[7] & 0x7F; // Mask off reserved bit 7 (we add it back) host_config_info.bMaxPower = cfg_desc[8]; - - // Walk the descriptor chain to find HID interface + endpoint + + // Reset per-interface state + expected_hid_itf_count = 0; + + // Walk the descriptor chain to find ALL HID interfaces + their endpoints uint16_t offset = 0; - bool found_hid_interface = false; - + int current_hid_idx = -1; // Index into mirrored_itfs for current HID interface + while (offset + 1 < cfg_len) { uint8_t desc_length = cfg_desc[offset]; uint8_t desc_type = cfg_desc[offset + 1]; - + if (desc_length == 0) break; // Prevent infinite loop on malformed descriptors - + // Interface descriptor if (desc_type == TUSB_DESC_INTERFACE && desc_length >= 9 && offset + 8 < cfg_len) { uint8_t itf_class = cfg_desc[offset + 5]; - if (itf_class == TUSB_CLASS_HID) { - host_config_info.bInterfaceSubClass = cfg_desc[offset + 6]; - host_config_info.bInterfaceProtocol = cfg_desc[offset + 7]; - found_hid_interface = true; + if (itf_class == TUSB_CLASS_HID && expected_hid_itf_count < MAX_DEVICE_HID_INTERFACES) { + current_hid_idx = expected_hid_itf_count; + mirrored_itfs[current_hid_idx].itf_subclass = cfg_desc[offset + 6]; + mirrored_itfs[current_hid_idx].itf_protocol = cfg_desc[offset + 7]; + mirrored_itfs[current_hid_idx].has_ep_out = false; + + // Track interface string index for faithful string mirroring + uint8_t iInterface = cfg_desc[offset + 8]; + if (iInterface > 0 && iInterface > max_string_index_seen) + max_string_index_seen = iInterface; + + // Back-compat: populate legacy host_config_info from first HID interface + if (expected_hid_itf_count == 0) { + host_config_info.bInterfaceSubClass = cfg_desc[offset + 6]; + host_config_info.bInterfaceProtocol = cfg_desc[offset + 7]; + } + + expected_hid_itf_count++; + } else { + current_hid_idx = -1; // Non-HID interface, ignore endpoints } } - - // Endpoint descriptor (IN endpoint after HID interface) - if (found_hid_interface && desc_type == TUSB_DESC_ENDPOINT && desc_length >= 7 && offset + 6 < cfg_len) { + + // Endpoint descriptor (after a HID interface) + if (current_hid_idx >= 0 && desc_type == TUSB_DESC_ENDPOINT && desc_length >= 7 && offset + 6 < cfg_len) { uint8_t ep_addr = cfg_desc[offset + 2]; uint8_t ep_attr = cfg_desc[offset + 3]; - - // Only capture IN interrupt endpoint (direction bit 7 set, transfer type = interrupt) - if ((ep_addr & 0x80) && (ep_attr & 0x03) == TUSB_XFER_INTERRUPT) { - host_config_info.wMaxPacketSize = cfg_desc[offset + 4] | (cfg_desc[offset + 5] << 8); - host_config_info.bInterval = cfg_desc[offset + 6]; - host_config_info.valid = true; - break; // Found what we need + uint16_t ep_size = cfg_desc[offset + 4] | (cfg_desc[offset + 5] << 8); + uint8_t ep_interval = cfg_desc[offset + 6]; + + if ((ep_attr & 0x03) == TUSB_XFER_INTERRUPT) { + if (ep_addr & 0x80) { + // IN endpoint + mirrored_itfs[current_hid_idx].ep_in_max_packet = ep_size; + mirrored_itfs[current_hid_idx].ep_in_interval = ep_interval; + + // Back-compat: populate legacy host_config_info from first IN endpoint + if (!host_config_info.valid) { + host_config_info.wMaxPacketSize = ep_size; + host_config_info.bInterval = ep_interval; + host_config_info.valid = true; + } + } else { + // OUT endpoint + mirrored_itfs[current_hid_idx].has_ep_out = true; + mirrored_itfs[current_hid_idx].ep_out_max_packet = ep_size; + mirrored_itfs[current_hid_idx].ep_out_interval = ep_interval; + } } } - + offset += desc_length; } + + if (expected_hid_itf_count == 0) { + expected_hid_itf_count = 1; // At least 1 interface expected + } } // Static fallback (used only for initial sizeof reference) @@ -2791,21 +3240,9 @@ uint16_t const *tud_descriptor_string_cb(uint8_t index, uint16_t langid) } else { - // Note: the 0xEE index string is a Microsoft OS 1.0 Descriptors. - // https://docs.microsoft.com/en-us/windows-hardware/drivers/usbcon/microsoft-defined-usb-descriptors - - if (!(index < sizeof(string_desc_arr) / sizeof(string_desc_arr[BUFFER_FIRST_ELEMENT_INDEX]))) - { - return NULL; - } + const char *str = NULL; - const char *str = string_desc_arr[index]; - if (str == NULL) - { - return NULL; - } - - // Use dynamic string descriptors if available + // Standard indices 1-3: manufacturer, product, serial if (string_descriptors_fetched) { switch (index) { case STRING_DESC_MANUFACTURER_IDX: @@ -2822,17 +3259,34 @@ uint16_t const *tud_descriptor_string_cb(uint8_t index, uint16_t langid) } break; default: - // Use default for other indices break; } - } else { - // Fallback to default strings if not fetched yet - if (index == STRING_DESC_SERIAL_IDX) { - str = get_dynamic_serial_string(); + } + + // Check extra string cache for higher indices (interface strings, etc.) + if (str == NULL && index > STRING_DESC_SERIAL_IDX) { + for (uint8_t i = 0; i < extra_string_count; i++) { + if (extra_strings[i].valid && extra_strings[i].index == index) { + str = extra_strings[i].str; + break; + } + } + } + + // Fallback to static array or defaults + if (str == NULL) { + if (index < sizeof(string_desc_arr) / sizeof(string_desc_arr[0])) { + str = string_desc_arr[index]; } } - // Convert ASCII string to UTF-16 and get character count + // Final fallback for serial + if (str == NULL && index == STRING_DESC_SERIAL_IDX) { + str = get_dynamic_serial_string(); + } + + if (str == NULL) return NULL; + chr_count = convert_string_to_utf16(str, _desc_str); }