diff --git a/crates/forkd-cli/src/hub.rs b/crates/forkd-cli/src/hub.rs index 8eaf3e2..84f84e5 100644 --- a/crates/forkd-cli/src/hub.rs +++ b/crates/forkd-cli/src/hub.rs @@ -99,6 +99,33 @@ pub struct Manifest { /// relative to `/` inside the tar. #[serde(default, skip_serializing_if = "Vec::is_empty")] pub chain: Vec, + /// #242: the rootfs block image this snapshot's vmstate was frozen + /// against. It lives OUTSIDE the snapshot dir (`from-image` keeps it + /// at `/var/cache/forkd/.ext4`), so it never travels inside + /// the `.tar.zst`. Instead it ships as a content-addressed sidecar + /// asset (`.rootfs.zst`, a sibling of the pack) and the + /// puller places it back at `target_path` — the exact path FC + /// reopens at restore. `None` for snapshots packed before this + /// existed, or whose rootfs path wasn't recorded (those packs are + /// only restorable on the packing host). Additive: older readers + /// `#[serde(default)]` ignore it, so no pack-version bump. + #[serde(default, skip_serializing_if = "Option::is_none")] + pub rootfs: Option, +} + +/// Reference to a rootfs sidecar shipped alongside (not inside) a pack. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct RootfsRef { + /// Absolute path Firecracker reopens at restore. The puller places + /// the decompressed rootfs here. Reproducible for `from-image` + /// bakes (`/var/cache/forkd/.ext4`). + pub target_path: String, + /// sha256 of the **uncompressed** rootfs. Used both to name the + /// sidecar (content-addressing → dedup across packs sharing a base) + /// and to skip re-placing when `target_path` already matches. + pub sha256: String, + /// Uncompressed size in bytes (for progress + sanity). + pub size: u64, } #[derive(Debug, Clone, Serialize, Deserialize)] @@ -208,6 +235,10 @@ pub fn pack( }); } + // #242: ship the rootfs as a content-addressed sidecar next to the + // pack (it lives outside the snap dir, so it isn't in `files`). + let rootfs = emit_rootfs_sidecar(snap_dir, out_path)?; + let manifest = Manifest { forkd_pack_version: PACK_FORMAT_VERSION_V1, tag: tag.to_string(), @@ -218,6 +249,7 @@ pub fn pack( parent_tag: None, files: files.clone(), chain: Vec::new(), + rootfs, }; // Write manifest as a temp file we'll include in the tar. Doing this @@ -358,6 +390,9 @@ pub fn pack_chain( parent_tag: chain_meta.last().and_then(|m| m.parent_tag.clone()), files: Vec::new(), chain: chain_meta.clone(), + // Chain links share the base rootfs; ship the head's (every + // link's vmstate reopens the same /var/cache/forkd path). #242. + rootfs: emit_rootfs_sidecar(head_dir, out_path)?, }; let manifest_toml = toml::to_string_pretty(&manifest).context("serialize manifest")?; @@ -805,6 +840,129 @@ fn hex(bytes: &[u8]) -> String { s } +/// zstd compression level for rootfs sidecars. Level 19 (vs the pack's +/// default 3) buys ~25% on a real filesystem image — worth it because a +/// sidecar is compressed once at pack time and downloaded by everyone. +const ROOTFS_SIDECAR_ZSTD_LEVEL: i32 = 19; + +/// Sidecar filename for a rootfs with the given uncompressed sha256. +/// Content-addressed (sha-named) so two packs sharing a base rootfs +/// reference — and the puller caches — one asset. +pub fn rootfs_sidecar_name(sha256: &str) -> String { + format!("{}.rootfs.zst", &sha256[..sha256.len().min(16)]) +} + +/// #242: emit the rootfs as a content-addressed `.rootfs.zst` sidecar +/// next to `pack_path`, and return the manifest reference. The rootfs +/// lives outside the snapshot dir (`from-image` keeps it under +/// `/var/cache/forkd/`), so it never enters the `.tar.zst`. +/// +/// Returns `Ok(None)` (with a warning) when the snapshot doesn't record +/// a rootfs path or the file is missing — the pack is still produced, +/// just not portable off the packing host. +fn emit_rootfs_sidecar(snap_dir: &Path, pack_path: &Path) -> Result> { + let rootfs_path = match read_local_snapshot_meta(snap_dir).and_then(|m| m.rootfs) { + Some(p) => p, + None => { + eprintln!( + "⚠ {}/snapshot.json records no rootfs path — this pack will only \ + restore on the host that packed it (re-bake with a current forkd \ + to make it portable; see #242)", + snap_dir.display() + ); + return Ok(None); + } + }; + if !rootfs_path.exists() { + eprintln!( + "⚠ rootfs {} referenced by the snapshot is missing — packing without it; \ + the pack won't be portable", + rootfs_path.display() + ); + return Ok(None); + } + + let sha = sha256_file(&rootfs_path)?; + let size = std::fs::metadata(&rootfs_path)?.len(); + let sidecar = pack_path + .parent() + .unwrap_or_else(|| Path::new(".")) + .join(rootfs_sidecar_name(&sha)); + + if sidecar.exists() { + eprintln!( + "==> rootfs sidecar {} already present — dedup hit, skipping recompress", + sidecar.file_name().unwrap_or_default().to_string_lossy() + ); + } else { + eprintln!( + "==> compressing rootfs sidecar {} (zstd -{ROOTFS_SIDECAR_ZSTD_LEVEL}, {} → once per base)...", + sidecar.file_name().unwrap_or_default().to_string_lossy(), + human_bytes(size), + ); + let t = Instant::now(); + compress_file(&rootfs_path, &sidecar, ROOTFS_SIDECAR_ZSTD_LEVEL)?; + let csize = std::fs::metadata(&sidecar).map(|m| m.len()).unwrap_or(0); + eprintln!( + " {} → {} ({:.1}× ) in {:.1}s", + human_bytes(size), + human_bytes(csize), + if csize > 0 { + size as f64 / csize as f64 + } else { + 0.0 + }, + t.elapsed().as_secs_f64(), + ); + } + + Ok(Some(RootfsRef { + target_path: rootfs_path.to_string_lossy().into_owned(), + sha256: sha, + size, + })) +} + +/// zstd-compress `src` → `dst` at `level`. Used for rootfs sidecars. +fn compress_file(src: &Path, dst: &Path, level: i32) -> Result<()> { + let mut input = File::open(src).with_context(|| format!("open {}", src.display()))?; + let output = File::create(dst).with_context(|| format!("create {}", dst.display()))?; + let mut enc = zstd::Encoder::new(output, level).context("init zstd encoder")?; + io::copy(&mut input, &mut enc).with_context(|| format!("compress {}", src.display()))?; + enc.finish().context("finish zstd stream")?; + Ok(()) +} + +/// Decompress a `.rootfs.zst` sidecar (`src`) → `dst`, verifying the +/// uncompressed bytes hash to `expected_sha`. Atomic: writes to a temp +/// sibling and renames, so an interrupted pull can't leave a truncated +/// rootfs that looks complete. +pub fn place_rootfs_sidecar(src: &Path, dst: &Path, expected_sha: &str) -> Result<()> { + if let Some(parent) = dst.parent() { + std::fs::create_dir_all(parent) + .with_context(|| format!("create rootfs dir {}", parent.display()))?; + } + let tmp = dst.with_extension("ext4.partial"); + { + let input = File::open(src).with_context(|| format!("open sidecar {}", src.display()))?; + let mut dec = zstd::Decoder::new(input).context("init zstd decoder on sidecar")?; + let mut out = File::create(&tmp).with_context(|| format!("create {}", tmp.display()))?; + io::copy(&mut dec, &mut out) + .with_context(|| format!("decompress sidecar to {}", tmp.display()))?; + } + let actual = sha256_file(&tmp)?; + if !actual.eq_ignore_ascii_case(expected_sha) { + let _ = std::fs::remove_file(&tmp); + bail!( + "rootfs sidecar integrity check failed: decompressed sha256={actual} \ + but manifest says {expected_sha}" + ); + } + std::fs::rename(&tmp, dst) + .with_context(|| format!("move {} → {}", tmp.display(), dst.display()))?; + Ok(()) +} + /// Render a list-of-local-snapshots line for `forkd images list`. Walks /// `snapshots/` under the data dir and reports tag + total size + /// memory.bin size + dir mtime. @@ -1063,6 +1221,7 @@ mod tests { sha256: "abc".into(), }], chain: Vec::new(), + rootfs: None, }; let s = toml::to_string_pretty(&m).unwrap(); let m2: Manifest = toml::from_str(&s).unwrap(); @@ -1111,6 +1270,7 @@ mod tests { }], }, ], + rootfs: None, }; let s = toml::to_string_pretty(&m).unwrap(); let m2: Manifest = toml::from_str(&s).unwrap(); @@ -1173,6 +1333,7 @@ mod tests { volumes: Vec::new(), parent_tag: None, parent_content_hash: None, + rootfs: None, }; std::fs::write( base_dir.join("snapshot.json"), @@ -1191,6 +1352,7 @@ mod tests { volumes: Vec::new(), parent_tag: Some("py-base".to_string()), parent_content_hash: Some(base_hash), + rootfs: None, }; std::fs::write( head_dir.join("snapshot.json"), @@ -1215,6 +1377,7 @@ mod tests { volumes: Vec::new(), parent_tag: None, parent_content_hash: None, + rootfs: None, }; std::fs::write( base_dir.join("snapshot.json"), @@ -1316,6 +1479,7 @@ mod tests { parent_content_hash: None, files: vec![], }], + rootfs: None, }; let manifest_toml = toml::to_string_pretty(&evil).unwrap(); let out_file = File::create(&pack_out).unwrap(); diff --git a/crates/forkd-cli/src/main.rs b/crates/forkd-cli/src/main.rs index 472a870..d03e91a 100644 --- a/crates/forkd-cli/src/main.rs +++ b/crates/forkd-cli/src/main.rs @@ -1178,7 +1178,99 @@ fn unpack_cmd(path: PathBuf, tag: Option, force: bool) -> Result<()> { // no-op; otherwise it removes the half-extracted scratch dir. let _ = std::fs::remove_dir_all(&tmp); } - result + // #242: a local unpack finds the rootfs sidecar next to the pack. + if let Ok(Some(rootfs)) = &result { + satisfy_rootfs(rootfs, SidecarSource::LocalSibling(&path))?; + } + result.map(|_| ()) +} + +/// Where to find a pack's rootfs sidecar (`.rootfs.zst`). +enum SidecarSource<'a> { + /// Sibling file of a local pack: `/`. + LocalSibling(&'a std::path::Path), + /// Sibling URL of a downloaded pack: `/`. + RemoteSibling(&'a str), +} + +/// #242: ensure the rootfs a pulled/unpacked snapshot needs is present +/// at the absolute path Firecracker will reopen at restore. Skips when +/// the target already exists with a matching sha (dedup across packs +/// sharing a base); warns (does not fail) when the sidecar can't be +/// found, so the user gets an actionable message at pull time instead +/// of a cryptic block-device error at first fork. +fn satisfy_rootfs(rootfs: &hub::RootfsRef, source: SidecarSource) -> Result<()> { + let dst = PathBuf::from(&rootfs.target_path); + if dst.exists() { + if let Ok(existing) = hub::sha256_file(&dst) { + if existing.eq_ignore_ascii_case(&rootfs.sha256) { + eprintln!( + "✓ rootfs already present at {} (sha match) — reusing", + dst.display() + ); + return Ok(()); + } + } + } + let name = hub::rootfs_sidecar_name(&rootfs.sha256); + match source { + SidecarSource::LocalSibling(pack) => { + let sc = pack + .parent() + .unwrap_or_else(|| std::path::Path::new(".")) + .join(&name); + if !sc.exists() { + eprintln!( + "⚠ rootfs sidecar {name} not found next to the pack.\n \ + This snapshot needs {} to restore. Place the sidecar beside \ + the pack, or rebuild locally with `forkd from-image`. (#242)", + rootfs.target_path + ); + return Ok(()); + } + eprintln!( + "==> placing rootfs → {} (from {})", + dst.display(), + sc.display() + ); + hub::place_rootfs_sidecar(&sc, &dst, &rootfs.sha256)?; + eprintln!("✓ rootfs ready at {}", dst.display()); + } + SidecarSource::RemoteSibling(pack_url) => { + let sc_url = sibling_url(pack_url, &name); + eprintln!( + "==> downloading rootfs sidecar {name} ({})", + hub::human_bytes(rootfs.size) + ); + let tmp = std::env::temp_dir().join(format!("forkd-rootfs-{}.zst", std::process::id())); + match hub::download(&sc_url, &tmp) { + Ok(_) => { + let r = hub::place_rootfs_sidecar(&tmp, &dst, &rootfs.sha256); + let _ = std::fs::remove_file(&tmp); + r?; + eprintln!("✓ rootfs ready at {}", dst.display()); + } + Err(e) => { + let _ = std::fs::remove_file(&tmp); + eprintln!( + "⚠ couldn't fetch rootfs sidecar from {sc_url}: {e}\n \ + This snapshot needs {} to restore — rebuild locally with \ + `forkd from-image` if the sidecar isn't published. (#242)", + rootfs.target_path + ); + } + } + } + } + Ok(()) +} + +/// Replace the last path segment of `url` with `name`. +fn sibling_url(url: &str, name: &str) -> String { + match url.rfind('/') { + Some(i) => format!("{}/{}", &url[..i], name), + None => name.to_string(), + } } fn unpack_into( @@ -1186,11 +1278,12 @@ fn unpack_into( tmp: &std::path::Path, tag: Option, force: bool, -) -> Result<()> { +) -> Result> { let manifest = hub::unpack(path, tmp)?; + let rootfs = manifest.rootfs.clone(); if !manifest.chain.is_empty() { - return unpack_chain_into(tmp, manifest, tag, force); + return unpack_chain_into(tmp, manifest, tag, force).map(|()| rootfs); } // v1 layout (legacy single-snapshot pack). Validate the @@ -1231,7 +1324,7 @@ fn unpack_into( hub::rewrite_snapshot_paths(&dest)?; eprintln!("✓ unpacked tag '{final_tag}' at {}", dest.display()); eprintln!(" next: forkd fork --tag {final_tag} -n "); - Ok(()) + Ok(rootfs) } /// v0.5 Phase 3: materialize a v2 chain pack — every link in @@ -1400,7 +1493,20 @@ fn pull_cmd(target: String, tag: Option, force: bool, hub: Option.rootfs.zst` next to the (now-deleted) temp pack; + // for the GitHub-release hub, the portable-publish flow is + // `forkd pack --out /X.tar.zst` (emits both files) then upload + // both to the release. Warn so a presigned-PUT push doesn't silently + // publish an unrestorable pack. + if let Some(rootfs) = &manifest.rootfs { + let name = hub::rootfs_sidecar_name(&rootfs.sha256); + eprintln!( + "⚠ this snapshot has a rootfs sidecar ({name}, {}). `push` uploaded only \ + the pack — also upload the sidecar to the SAME directory as a sibling \ + ({}), or pullers won't be able to restore. Prefer `forkd pack` + a \ + release upload of both files for hub publishing. (#242)", + hub::human_bytes(rootfs.size), + sibling_url(&url, &name), + ); + } Ok(()) } @@ -2337,9 +2461,17 @@ fn snapshot_cmd( eprintln!("==> snapshotting to {}...", snap_dir.display()); let t = Instant::now(); - let snap = vm + let mut snap = vm .snapshot_to(vmstate, memory, volumes) .context("snapshot create")?; + // Record the rootfs path Firecracker froze into the vmstate so + // `pack` / `pull` can ship + relocate it (issue #242). Canonicalize + // to the absolute path FC actually reopens at restore. + snap.rootfs = Some( + cfg.rootfs + .canonicalize() + .unwrap_or_else(|_| cfg.rootfs.clone()), + ); eprintln!(" snapshot took {} ms", t.elapsed().as_millis()); // Persist Snapshot metadata so subsequent `forkd fork` / `forkd run` @@ -2410,6 +2542,7 @@ fn load_snapshot_meta(snap_dir: &std::path::Path) -> Result { volumes: Vec::new(), parent_tag: None, parent_content_hash: None, + rootfs: None, }) } diff --git a/crates/forkd-controller/src/http.rs b/crates/forkd-controller/src/http.rs index 9e3ac23..13d18cb 100644 --- a/crates/forkd-controller/src/http.rs +++ b/crates/forkd-controller/src/http.rs @@ -630,6 +630,9 @@ async fn compact_snapshot( volumes: head_snapshot.volumes.clone(), parent_tag: None, parent_content_hash: None, + // Compaction flattens but doesn't change the rootfs — inherit + // the head's so the compacted base stays hub-portable (#242). + rootfs: head_snapshot.rootfs.clone(), }; let staging_meta_path = staging.join("snapshot.json"); let new_meta_json = match serde_json::to_vec_pretty(&new_meta) { @@ -707,6 +710,7 @@ fn load_snapshot_with_fallback(snap_dir: &std::path::Path) -> forkd_vmm::Snapsho volumes: Vec::new(), parent_tag: None, parent_content_hash: None, + rootfs: None, }) } @@ -1427,6 +1431,7 @@ async fn branch_sandbox( volumes: source_volumes, parent_tag: None, parent_content_hash: None, + rootfs: None, }); } #[cfg(not(target_os = "linux"))] @@ -1557,6 +1562,7 @@ async fn branch_sandbox( volumes: diff_snap.volumes, parent_tag: None, parent_content_hash: None, + rootfs: None, } } else { let snap = vm.snapshot_to( @@ -2246,6 +2252,7 @@ fn spawn_one_for_workspace( volumes: Vec::new(), parent_tag: None, parent_content_hash: None, + rootfs: None, }, }; let netns_offset = if per_child_netns { @@ -2477,6 +2484,7 @@ async fn suspend_workspace( volumes: diff_snap.volumes, parent_tag: None, parent_content_hash: None, + rootfs: None, } } else { let snap = vm.snapshot_to( @@ -2650,6 +2658,7 @@ mod tests { volumes: Vec::new(), parent_tag: None, parent_content_hash: None, + rootfs: None, }; std::fs::write( dir.join("snapshot.json"), @@ -2676,6 +2685,7 @@ mod tests { volumes: Vec::new(), parent_tag: Some(parent_tag.to_string()), parent_content_hash: Some(parent_hash.to_string()), + rootfs: None, }; std::fs::write( dir.join("snapshot.json"), diff --git a/crates/forkd-vmm/src/chain.rs b/crates/forkd-vmm/src/chain.rs index 2b98faa..e77958d 100644 --- a/crates/forkd-vmm/src/chain.rs +++ b/crates/forkd-vmm/src/chain.rs @@ -381,6 +381,7 @@ mod tests { volumes: Vec::::new(), parent_tag: parent.map(String::from), parent_content_hash: hash.map(String::from), + rootfs: None, } } diff --git a/crates/forkd-vmm/src/lib.rs b/crates/forkd-vmm/src/lib.rs index 1a569ac..0ebc2c5 100644 --- a/crates/forkd-vmm/src/lib.rs +++ b/crates/forkd-vmm/src/lib.rs @@ -408,6 +408,17 @@ pub struct Snapshot { /// bytes. Always set when `parent_tag` is set; `None` for bases. #[serde(default, skip_serializing_if = "Option::is_none")] pub parent_content_hash: Option, + /// Absolute path to the rootfs block image this snapshot's vmstate + /// was frozen against. Firecracker bakes this path into the vmstate + /// and reopens it verbatim at restore, so a snapshot only restores + /// on a host where this exact path exists. Recorded here (v0.5.3+) + /// so `pack` / `pull` can ship the rootfs as a content-addressed + /// sidecar and place it back at this path on the puller — see + /// `forkd pack` / issue #242. `None` for snapshots written before + /// this field existed, and for daemon-side branches that inherit + /// the source's rootfs. + #[serde(default, skip_serializing_if = "Option::is_none")] + pub rootfs: Option, } /// Result of a Diff snapshot. `memory_diff` is a sparse file the same @@ -1134,6 +1145,7 @@ impl Vm { volumes, parent_tag: None, parent_content_hash: None, + rootfs: None, }) } @@ -1242,6 +1254,7 @@ impl Vm { volumes, parent_tag: None, parent_content_hash: None, + rootfs: None, }) } @@ -1979,6 +1992,7 @@ mod tests { }], parent_tag: None, parent_content_hash: None, + rootfs: None, }; let json = serde_json::to_string(&s).unwrap(); let back: Snapshot = serde_json::from_str(&json).unwrap(); @@ -2005,6 +2019,7 @@ mod tests { volumes: Vec::new(), parent_tag: Some("python-numpy".to_string()), parent_content_hash: Some("a".repeat(64)), + rootfs: None, }; let json = serde_json::to_string(&s).unwrap(); let back: Snapshot = serde_json::from_str(&json).unwrap();