From 55a575923c2e4a0e25b4da4afa12697f88581e3a Mon Sep 17 00:00:00 2001 From: Karem Date: Sun, 28 Jun 2026 03:34:30 +0300 Subject: [PATCH] perf(dnd): stream dropped files as base64 to cut peak memory on large videos MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit build_drop_payload previously read the whole file into a Vec, base64-encoded it into a String, copied that into a serde_json::Value, then serialized the Value to the output String — ~4 simultaneous copies, so a 100 MB video drop peaked near half a gigabyte and could OOM or stall the window thread. Now each file is read in 48 KiB chunks and its base64 is written once, directly into the output JSON buffer (manual JSON assembly; serde_json still escapes the name/type strings). A 0-2 byte carry keeps base64 emission aligned to whole 3-byte groups across reads (only the final group is padded), and a mark/truncate rollback keeps `out` valid JSON if a read fails mid-stream. Peak extra allocation drops from ~4x the file size to ~1.33x the base64 of the single largest file plus a fixed 48 KiB read buffer. Adds tests: streaming-vs-one-shot parity across the read-chunk boundary for each length-mod-3 case, empty file, full payload round-trip (name/type/b64), and empty input. Co-Authored-By: Claude Opus 4.8 (1M context) Claude-Session: https://claude.ai/code/session_016o9cWBaPy4zU4BAurUVoTp --- src-tauri/src/window.rs | 191 ++++++++++++++++++++++++++++++++++++---- 1 file changed, 175 insertions(+), 16 deletions(-) diff --git a/src-tauri/src/window.rs b/src-tauri/src/window.rs index fce4cbd..0e031fc 100644 --- a/src-tauri/src/window.rs +++ b/src-tauri/src/window.rs @@ -122,8 +122,16 @@ fn register_drop_handler(win: &WebviewWindow) { /// Read the dropped files into a JSON array `[{name,type,b64}]` for the page-side /// injector. Skips anything too large, non-regular, or unreadable (logging each skip). +/// +/// Memory: each file's base64 is streamed straight into the shared output buffer (see +/// [`append_file_base64`]), reading the file in bounded chunks. A large video is therefore +/// never simultaneously resident as raw bytes *and* a base64 `String` *and* a +/// `serde_json::Value` *and* the serialized output (the old path held ~4 full copies — a +/// 100 MB drop peaked near half a gigabyte). Peak extra allocation is now ~1.33x the base64 +/// of the single largest file (the transport itself) plus a fixed 48 KiB read buffer. fn build_drop_payload(paths: &[std::path::PathBuf]) -> Option { - let mut items: Vec = Vec::new(); + let mut out = String::from("["); + let mut wrote_any = false; for p in paths.iter().take(MAX_DROP_FILES) { let name = p .file_name() @@ -148,19 +156,36 @@ fn build_drop_payload(paths: &[std::path::PathBuf]) -> Option { )); continue; } - match std::fs::read(p) { - Ok(bytes) => { - crate::dlog::log(&format!("dragdrop: read '{name}' ({} bytes)", bytes.len())); - items.push(serde_json::json!({ - "name": name, - "type": mime_for(&name), - "b64": base64_encode(&bytes), - })); + // Rollback point: if the file read fails partway through streaming its base64, we + // truncate the half-written object (and its leading separator) so `out` stays valid + // JSON. serde_json escapes the name/type strings; base64's alphabet (A-Za-z0-9+/=) + // needs no JSON escaping, so it is written raw between the quotes. + let mark = out.len(); + if wrote_any { + out.push(','); + } + out.push_str("{\"name\":"); + out.push_str(&serde_json::to_string(&name).unwrap_or_else(|_| "\"file\"".to_string())); + out.push_str(",\"type\":"); + out.push_str( + &serde_json::to_string(mime_for(&name)) + .unwrap_or_else(|_| "\"application/octet-stream\"".to_string()), + ); + out.push_str(",\"b64\":\""); + match append_file_base64(&mut out, p) { + Ok(n) => { + out.push_str("\"}"); + wrote_any = true; + crate::dlog::log(&format!("dragdrop: read '{name}' ({n} bytes)")); + } + Err(e) => { + out.truncate(mark); + crate::dlog::log(&format!("dragdrop: read '{name}' failed: {e}")); } - Err(e) => crate::dlog::log(&format!("dragdrop: read '{name}' failed: {e}")), } } - serde_json::to_string(&items).ok() + out.push(']'); + Some(out) } /// Best-effort MIME from the file extension, so WhatsApp routes images/videos/docs to @@ -200,11 +225,16 @@ fn mime_for(name: &str) -> &'static str { } } -/// Standard base64 (RFC 4648, with `=` padding). Hand-rolled to avoid pulling a crate -/// into this otherwise lean dependency tree; only used to ferry dropped bytes to the page. -fn base64_encode(data: &[u8]) -> String { +/// Append the standard base64 (RFC 4648, with `=` padding) of `data` to `out`. Hand-rolled +/// to avoid pulling a crate into this otherwise lean dependency tree. +/// +/// Encodes per 3-byte group, padding only a final partial group. Callers that feed data +/// across multiple calls (streaming) MUST pass whole 3-byte groups on every call except the +/// last — otherwise an interior partial group would be padded mid-stream. [`append_file_base64`] +/// upholds that contract via a small carry buffer. +fn base64_encode_into(out: &mut String, data: &[u8]) { const T: &[u8; 64] = b"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; - let mut out = String::with_capacity(data.len().div_ceil(3) * 4); + out.reserve(data.len().div_ceil(3) * 4); for chunk in data.chunks(3) { let b0 = chunk[0] as u32; let b1 = *chunk.get(1).unwrap_or(&0) as u32; @@ -223,9 +253,69 @@ fn base64_encode(data: &[u8]) -> String { '=' }); } +} + +/// Standard base64 of `data` as an owned `String`. Thin wrapper over [`base64_encode_into`]; +/// retained for callers/tests that want the whole encoding at once. +fn base64_encode(data: &[u8]) -> String { + let mut out = String::with_capacity(data.len().div_ceil(3) * 4); + base64_encode_into(&mut out, data); out } +/// Append the base64 of the file at `path` to `out`, reading in bounded 48 KiB chunks so the +/// file is never fully resident in memory — the key to dropping a large *video* without a +/// half-gigabyte spike. Returns the number of bytes read. +/// +/// base64 must be emitted in whole 3-byte groups (only the final group is padded), but a +/// `read` can return any number of bytes, so a 0–2 byte `carry` holds the bytes that don't +/// yet complete a group and rolls them into the next read; the EOF flush pads whatever +/// remains. Every encode call but the EOF flush is therefore a multiple of three bytes. +/// +/// The 48 KiB stack buffer is already large, so we read the `File` directly rather than +/// wrapping it in a `BufReader` (which would only add a redundant intermediate copy here). +fn append_file_base64(out: &mut String, path: &std::path::Path) -> std::io::Result { + use std::io::Read; + let mut file = std::fs::File::open(path)?; + let mut buf = [0u8; 48 * 1024]; // 49152 = an exact number of 3-byte groups + let mut carry = [0u8; 3]; + let mut carry_len = 0usize; + let mut total: u64 = 0; + loop { + let n = file.read(&mut buf)?; + if n == 0 { + break; + } + total += n as u64; + let data = &buf[..n]; + let mut i = 0; + // 1) Top up a carried partial group from the front of this read, then flush it. + while carry_len > 0 && carry_len < 3 && i < n { + carry[carry_len] = data[i]; + carry_len += 1; + i += 1; + } + if carry_len == 3 { + base64_encode_into(out, &carry); // a full group → no padding + carry_len = 0; + } + // 2) Bulk-encode the complete 3-byte groups remaining in this read. + let remaining = n - i; + let groups = remaining - (remaining % 3); + if groups > 0 { + base64_encode_into(out, &data[i..i + groups]); + } + // 3) Stash the trailing 0–2 bytes as the new carry. + for &b in &data[i + groups..n] { + carry[carry_len] = b; + carry_len += 1; + } + } + // EOF: encode whatever is left in the carry, padding the final partial group. + base64_encode_into(out, &carry[..carry_len]); + Ok(total) +} + /// Track the last-focused account window in `ActiveAccount`. Registered once per /// window inside `open_account_window`, so startup *and* dynamically-added windows /// get it exactly once. @@ -492,7 +582,9 @@ pub fn open_settings_window(app: &AppHandle) { #[cfg(test)] mod tests { - use super::{base64_encode, mime_for, toggle_decision, ToggleAct}; + use super::{ + append_file_base64, base64_encode, build_drop_payload, mime_for, toggle_decision, ToggleAct, + }; #[test] fn base64_matches_rfc4648_vectors() { @@ -512,6 +604,73 @@ mod tests { assert_eq!(base64_encode(&[0x00]), "AA=="); } + // Write `bytes` to a unique temp file and return its path. Caller removes it. + fn write_temp(tag: &str, bytes: &[u8]) -> std::path::PathBuf { + use std::io::Write; + let p = std::env::temp_dir().join(format!( + "whatrust_test_{}_{}_{tag}", + std::process::id(), + bytes.len() + )); + std::fs::File::create(&p).unwrap().write_all(bytes).unwrap(); + p + } + + #[test] + fn streaming_base64_matches_oneshot_across_chunk_boundary() { + // append_file_base64 reads in 48 KiB chunks and carries 0..2 bytes between reads. + // Exercise a size just past one chunk for each length-mod-3 case so the carry/padding + // path is covered, and confirm it byte-for-byte matches the one-shot encoder. + for extra in [0usize, 1, 2] { + let len = 48 * 1024 + 3 + extra; + let data: Vec = (0..len).map(|i| (i.wrapping_mul(31).wrapping_add(7)) as u8).collect(); + let path = write_temp(&format!("stream{extra}.bin"), &data); + let mut streamed = String::from("prefix:"); // also proves it APPENDS, not overwrites + let n = append_file_base64(&mut streamed, &path).unwrap(); + let _ = std::fs::remove_file(&path); + assert_eq!(n, len as u64); + assert_eq!(streamed, format!("prefix:{}", base64_encode(&data)), "mismatch at extra={extra}"); + } + } + + #[test] + fn empty_file_streams_to_empty_base64() { + let path = write_temp("empty.bin", b""); + let mut s = String::new(); + let n = append_file_base64(&mut s, &path).unwrap(); + let _ = std::fs::remove_file(&path); + assert_eq!(n, 0); + assert_eq!(s, ""); + } + + #[test] + fn build_drop_payload_roundtrips_name_type_b64() { + // A small image + a video spanning the read-chunk boundary: the JSON must parse, and + // each entry's name/type/b64 must round-trip (b64 == one-shot encoding of the bytes). + let img_bytes: Vec = vec![0x89, 0x50, 0x4e, 0x47, 1, 2, 3, 4, 5]; + let vid_bytes: Vec = (0..(48 * 1024 + 5)).map(|i| (i % 251) as u8).collect(); + let img = write_temp("shot.png", &img_bytes); + let vid = write_temp("clip.mp4", &vid_bytes); + let json = build_drop_payload(&[img.clone(), vid.clone()]).unwrap(); + let _ = std::fs::remove_file(&img); + let _ = std::fs::remove_file(&vid); + + let v: serde_json::Value = serde_json::from_str(&json).expect("payload must be valid JSON"); + let arr = v.as_array().unwrap(); + assert_eq!(arr.len(), 2); + assert_eq!(arr[0]["type"], "image/png"); + assert_eq!(arr[0]["b64"], base64_encode(&img_bytes)); + assert!(arr[0]["name"].as_str().unwrap().ends_with(".png")); + assert_eq!(arr[1]["type"], "video/mp4"); + assert_eq!(arr[1]["b64"], base64_encode(&vid_bytes)); + assert!(arr[1]["name"].as_str().unwrap().ends_with(".mp4")); + } + + #[test] + fn build_drop_payload_empty_for_no_files() { + assert_eq!(build_drop_payload(&[]).unwrap(), "[]"); + } + #[test] fn mime_is_extension_and_case_insensitive() { assert_eq!(mime_for("Photo.JPG"), "image/jpeg");