From 1261fb1521bd4baf22c10815114da49ef5540e73 Mon Sep 17 00:00:00 2001 From: Justin Schneck Date: Tue, 26 May 2026 19:31:36 -0400 Subject: [PATCH 01/30] config: introduce top-level `permissions:` block for rootfs/initramfs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Users and groups are now declared in a top-level `permissions:` map and referenced by name from `rootfs..permissions` / `initramfs..permissions` (or inlined), instead of buried inside one extension. This puts identity provisioning at the image layer where a single coherent passwd/shadow/group makes sense, lets the same block be reused across rootfs and initramfs, and leaves room to grow into directory perms or sudoers without further grammar churn. When no `permissions:` is set on an image, no script section is emitted — the base packages' generic /etc/passwd/shadow/group are left untouched. Extensions that still declare `users:` / `groups:` continue to work but emit a deprecation warning; that path will be removed in a future release. The script generator was extracted from ext/build.rs into a shared `utils::permissions::render_users_groups_script` helper so the legacy extension path and the new rootfs/initramfs path share one implementation. --- src/commands/ext/build.rs | 479 ++----------------- src/commands/initramfs/image.rs | 19 + src/commands/rootfs/image.rs | 19 + src/commands/runtime/build.rs | 28 +- src/utils/config.rs | 243 ++++++++++ src/utils/mod.rs | 1 + src/utils/permissions.rs | 476 ++++++++++++++++++ src/utils/runtime.rs | 1 + src/utils/target.rs | 3 + tests/fixtures/configs/with-permissions.yaml | 35 ++ 10 files changed, 857 insertions(+), 447 deletions(-) create mode 100644 src/utils/permissions.rs create mode 100644 tests/fixtures/configs/with-permissions.yaml diff --git a/src/commands/ext/build.rs b/src/commands/ext/build.rs index 82a2e511..4346645d 100644 --- a/src/commands/ext/build.rs +++ b/src/commands/ext/build.rs @@ -8,6 +8,7 @@ use crate::utils::config::{ComposedConfig, Config, ExtensionLocation}; use crate::utils::container::{RunConfig, SdkContainer, TuiContext}; use crate::utils::lockfile::LockFile; use crate::utils::output::{print_error, print_info, print_success, print_warning, OutputLevel}; +use crate::utils::permissions::render_users_groups_script; use crate::utils::stamps::{ compute_ext_input_hash, generate_batch_read_stamps_script, generate_write_stamp_script, resolve_required_stamps, validate_stamps_batch, Stamp, StampCommand, StampComponent, @@ -438,9 +439,25 @@ impl ExtBuildCommand { .and_then(|v| v.as_bool()) .unwrap_or(false); - // Get users and groups configuration + // Get users and groups configuration. These are now deprecated on + // extensions — they should be declared in a top-level `permissions:` + // block and referenced from rootfs/initramfs. Continue to honor + // them during the deprecation window, but surface a warning so + // users have time to migrate. let users_config = ext_config.get("users").and_then(|v| v.as_mapping()); let groups_config = ext_config.get("groups").and_then(|v| v.as_mapping()); + if users_config.is_some() || groups_config.is_some() { + print_warning( + &format!( + "[DEPRECATED] extension '{}' declares `users:` / `groups:`. \ + These are deprecated on extensions and will be removed in a future release. \ + Declare users and groups in a top-level `permissions:` block and reference \ + it from your rootfs/initramfs entries instead.", + self.extension + ), + OutputLevel::Normal, + ); + } // Validate that confext is present if enable_services is used if !enable_services.is_empty() && !ext_types.contains(&"confext") { @@ -1284,454 +1301,22 @@ fi } /// Creates a script section for handling user and group configuration - /// This will copy passwd/shadow/group files and create/modify users and groups + /// Thin wrapper around [`render_users_groups_script`] that targets the + /// extension's sysroot `/etc` and seeds it from the rootfs `/etc`. Kept + /// for the legacy `extensions..users`/`groups` path during the + /// deprecation period; new code should call the shared helper directly. fn create_users_script_section( &self, users_config: Option<&serde_yaml::Mapping>, groups_config: Option<&serde_yaml::Mapping>, ) -> String { - // If neither users nor groups are configured, return empty string - if users_config.is_none() && groups_config.is_none() { - return String::new(); - } - - let mut script_lines = Vec::new(); - let mut has_valid_users = false; - script_lines.push("\n# Copy and manage user authentication files".to_string()); - - // Copy authentication files from rootfs - script_lines.push(format!( - r#" -# Copy authentication files from rootfs to extension -echo "Copying /etc/passwd, /etc/shadow, and /etc/group from rootfs to extension" -mkdir -p "$AVOCADO_EXT_SYSROOTS/{}/etc" -cp "$AVOCADO_PREFIX/rootfs/etc/passwd" "$AVOCADO_EXT_SYSROOTS/{}/etc/passwd" -cp "$AVOCADO_PREFIX/rootfs/etc/shadow" "$AVOCADO_EXT_SYSROOTS/{}/etc/shadow" -cp "$AVOCADO_PREFIX/rootfs/etc/group" "$AVOCADO_EXT_SYSROOTS/{}/etc/group" -"#, - self.extension, self.extension, self.extension, self.extension - )); - - // Auto-incrementing counters for uid/gid starting at 1000 - script_lines.push( - "# Auto-incrementing counters for uid/gid\nCURRENT_UID=1000\nCURRENT_GID=1000\n" - .to_string(), - ); - - // Process groups first (they might be referenced by users) - if let Some(groups) = groups_config { - script_lines.push("\n# Create groups".to_string()); - - for (groupname_val, group_config) in groups { - // Convert groupname from Value to String - let groupname = match groupname_val.as_str() { - Some(name) => name, - None => continue, // Skip if groupname is not a string - }; - - if let Some(group_table) = group_config.as_mapping() { - // Parse comprehensive group configuration with defaults - let gid = if let Some(gid_value) = group_table.get("gid") { - if let Some(gid_num) = gid_value.as_i64() { - gid_num.to_string() - } else if let Some(gid_num) = gid_value.as_u64() { - gid_num.to_string() - } else { - "$CURRENT_GID".to_string() - } - } else { - "$CURRENT_GID".to_string() - }; - - let system_group = group_table - .get("system") - .and_then(|s| s.as_bool()) - .unwrap_or(false); - - let password = group_table - .get("password") - .and_then(|p| p.as_str()) - .unwrap_or(""); // Default: no group password - - let members = if let Some(members_value) = group_table.get("members") { - if let Some(members_array) = members_value.as_sequence() { - members_array - .iter() - .filter_map(|m| m.as_str()) - .collect::>() - .join(",") - } else { - "".to_string() - } - } else { - "".to_string() - }; - - let _admins = if let Some(admins_value) = group_table.get("admins") { - if let Some(admins_array) = admins_value.as_sequence() { - admins_array - .iter() - .filter_map(|a| a.as_str()) - .collect::>() - } else { - vec![] - } - } else { - vec![] - }; - - // Escape password for potential gshadow entry - let _escaped_group_password = password.replace("/", "\\/").replace("&", "\\&"); - - let system_type = if system_group { " (system group)" } else { "" }; - let password_note = if !password.is_empty() { - " with password" - } else { - "" - }; - let members_msg = if !members.is_empty() { - format!(" and members: {members}") - } else { - "".to_string() - }; - let password_config = if !password.is_empty() { - format!("\n# Set group password for '{groupname}'\necho \"Note: Group password configured for '{groupname}'\"") - } else { - "".to_string() - }; - - script_lines.push(format!( - r#" -# Create group '{}'{} -echo "Creating group '{}'"{} -if ! grep -q "^{}:" "$AVOCADO_EXT_SYSROOTS/{}/etc/group"; then - echo "{}:x:{}:{}" >> "$AVOCADO_EXT_SYSROOTS/{}/etc/group" - echo "Group '{}' created with GID {}{}" - if [ "{}" = "$CURRENT_GID" ]; then - CURRENT_GID=$((CURRENT_GID + 1)) - fi -else - echo "Group '{}' already exists, updating members" - # Update members if specified - if [ -n "{}" ]; then - sed -i "s|^{}:x:{}:.*$|{}:x:{}:{}|" "$AVOCADO_EXT_SYSROOTS/{}/etc/group" - echo "Updated members for group '{}'" - fi -fi{}"#, - groupname, - system_type, - groupname, - password_note, - groupname, - self.extension, - groupname, - gid, - members, - self.extension, - groupname, - gid, - members_msg, - gid, - groupname, - members, - groupname, - gid, - groupname, - gid, - members, - self.extension, - groupname, - password_config - )); - } else { - // Simple group with just GID auto-assignment - script_lines.push(format!( - r#" -# Create group '{}' -echo "Creating group '{}'" -if ! grep -q "^{}:" "$AVOCADO_EXT_SYSROOTS/{}/etc/group"; then - echo "{}:x:$CURRENT_GID:" >> "$AVOCADO_EXT_SYSROOTS/{}/etc/group" - echo "Group '{}' created with GID $CURRENT_GID" - CURRENT_GID=$((CURRENT_GID + 1)) -else - echo "Group '{}' already exists" -fi"#, - groupname, - groupname, - groupname, - self.extension, - groupname, - self.extension, - groupname, - groupname - )); - } - } - } - - // Process users - if let Some(users) = users_config { - let mut user_script_lines = Vec::new(); - - for (username_val, user_config) in users { - // Convert username from Value to String - let username = match username_val.as_str() { - Some(name) => name, - None => continue, // Skip if username is not a string - }; - - if let Some(user_table) = user_config.as_mapping() { - // Check if user has password field - if not, create with disabled login - let password = user_table - .get("password") - .and_then(|p| p.as_str()) - .unwrap_or("*"); // Default to no login allowed - - has_valid_users = true; - - // Parse comprehensive user configuration with defaults - let uid = if let Some(uid_value) = user_table.get("uid") { - if let Some(uid_num) = uid_value.as_i64() { - uid_num.to_string() - } else { - "$CURRENT_UID".to_string() - } - } else { - "$CURRENT_UID".to_string() - }; - - let gid = if let Some(gid_value) = user_table.get("gid") { - if let Some(gid_num) = gid_value.as_i64() { - gid_num.to_string() - } else { - "$CURRENT_UID".to_string() // Default to same as UID for user private groups - } - } else { - "$CURRENT_UID".to_string() - }; - - let gecos = user_table - .get("gecos") - .and_then(|g| g.as_str()) - .unwrap_or(username); // Default to username - - let default_home = format!("/home/{username}"); - let home = user_table - .get("home") - .and_then(|h| h.as_str()) - .unwrap_or(&default_home); // Default to /home/username - - let shell = user_table - .get("shell") - .and_then(|s| s.as_str()) - .unwrap_or("/bin/sh"); // Default shell - - let groups = if let Some(groups_value) = user_table.get("groups") { - if let Some(groups_array) = groups_value.as_sequence() { - groups_array - .iter() - .filter_map(|g| g.as_str()) - .map(|s| s.to_string()) - .collect::>() - } else { - vec![username.to_string()] // Default to user's own group - } - } else { - vec![username.to_string()] // Default to user's own group - }; - - let _primary_group = groups.first().map(|s| s.as_str()).unwrap_or(username); - - // Shadow file attributes with defaults - let last_change = user_table - .get("last_change") - .and_then(|l| l.as_i64()) - .unwrap_or(19000); // Default to a reasonable epoch day - - let min_days = user_table - .get("min_days") - .and_then(|m| m.as_i64()) - .unwrap_or(0); // Default: no minimum - - let max_days = user_table - .get("max_days") - .and_then(|m| m.as_i64()) - .unwrap_or(99999); // Default: no maximum - - let warn_days = user_table - .get("warn_days") - .and_then(|w| w.as_i64()) - .unwrap_or(7); // Default: warn 7 days before - - let inactive_days = user_table - .get("inactive_days") - .and_then(|i| i.as_i64()) - .map(|i| i.to_string()) - .unwrap_or_else(|| "".to_string()); // Default: no inactive period - - let expire_date = user_table - .get("expire_date") - .and_then(|e| e.as_i64()) - .map(|e| e.to_string()) - .unwrap_or_else(|| "".to_string()); // Default: no expiration - - let disabled = user_table - .get("disabled") - .and_then(|d| d.as_bool()) - .unwrap_or(false); - - let system_user = user_table - .get("system") - .and_then(|s| s.as_bool()) - .unwrap_or(false); - - // Escape special characters in password for sed - // Note: We use | as sed delimiter to avoid conflicts with / in passwords - // We only need to escape characters that have special meaning in sed replacement strings - let escaped_password = password - .replace("\\", "\\\\") // Escape backslashes first - .replace("&", "\\&") // Escape ampersands (sed replacement reference) - .replace("$", "\\$"); // Escape dollar signs (sed end-of-line anchor) - - let warning_message = if password.is_empty() { - format!("\necho \"[WARNING] User '{username}' will be able to login with NO PASSWORD\"") - } else { - String::new() - }; - - // Create user in passwd file - user_script_lines.push(format!( - r#" -# Create user '{}' -echo "Creating user '{}'{}"{} -if ! grep -q "^{}:" "$AVOCADO_EXT_SYSROOTS/{}/etc/passwd"; then - # Add user to passwd file with comprehensive attributes - echo "{}:x:{}:{}:{}:{}:{}" >> "$AVOCADO_EXT_SYSROOTS/{}/etc/passwd" - echo "User '{}' created with UID {}, GID {}, home '{}', shell '{}'" - - if [ "{}" = "$CURRENT_UID" ]; then - CURRENT_UID=$((CURRENT_UID + 1)) - fi -else - echo "User '{}' already exists, updating attributes" -fi"#, - username, - username, - if system_user { " (system user)" } else { "" }, - warning_message, - username, - self.extension, - username, - uid, - gid, - gecos, - home, - shell, - self.extension, - username, - uid, - gid, - home, - shell, - uid, - username - )); - - // Create/update user in shadow file with comprehensive attributes - user_script_lines.push(format!( - r#" -# Set password and shadow attributes for user '{}' -echo "Setting password and aging policy for user '{}'" -if grep -q "^{}:" "$AVOCADO_EXT_SYSROOTS/{}/etc/shadow"; then - # Update existing user's shadow entry completely - sed -i "s|^{}:.*$|{}:{}:{}:{}:{}:{}:{}:{}:|" "$AVOCADO_EXT_SYSROOTS/{}/etc/shadow" - echo "Updated shadow entry for existing user '{}'" -else - # Add new user to shadow file with full attributes - echo "{}:{}:{}:{}:{}:{}:{}:{}:" >> "$AVOCADO_EXT_SYSROOTS/{}/etc/shadow" - echo "Added new user '{}' to shadow file" -fi{}"#, - username, - username, - username, - self.extension, - username, - username, - escaped_password, - last_change, - min_days, - max_days, - warn_days, - inactive_days, - expire_date, - self.extension, - username, - username, - escaped_password, - last_change, - min_days, - max_days, - warn_days, - inactive_days, - expire_date, - self.extension, - username, - if disabled { - "\necho \"Note: User account is marked as disabled\"" - } else { - "" - } - )); - - // Add user to additional groups if specified - if groups.len() > 1 { - user_script_lines.push(format!( - r#" -# Add user '{username}' to additional groups"# - )); - - for group in &groups[1..] { - // Skip primary group - user_script_lines.push(format!( - r#" -if grep -q "^{}:" "$AVOCADO_EXT_SYSROOTS/{}/etc/group"; then - # Add user to group if not already present - if ! grep "^{}:" "$AVOCADO_EXT_SYSROOTS/{}/etc/group" | grep -q "{}"; then - sed -i "s|^{}:\([^:]*\):\([^:]*\):\(.*\)$|{}:\1:\2:\3,{}|" "$AVOCADO_EXT_SYSROOTS/{}/etc/group" - echo "Added user '{}' to group '{}'" - fi -else - echo "Warning: Group '{}' not found, cannot add user '{}'" -fi"#, - group, self.extension, group, self.extension, username, group, group, username, self.extension, username, group, group, username - )); - } - } - } - } - - // Add user scripts to main script if there are valid users - if has_valid_users { - script_lines.push("\n# Create and configure users".to_string()); - script_lines.extend(user_script_lines); - } - } - - // Set proper permissions only if we processed any users or groups - if groups_config.is_some() || has_valid_users { - script_lines.push(format!( - r#" -# Set proper ownership and permissions for authentication files -chown root:root "$AVOCADO_EXT_SYSROOTS/{}/etc/passwd" "$AVOCADO_EXT_SYSROOTS/{}/etc/shadow" "$AVOCADO_EXT_SYSROOTS/{}/etc/group" -chmod 644 "$AVOCADO_EXT_SYSROOTS/{}/etc/passwd" -chmod 640 "$AVOCADO_EXT_SYSROOTS/{}/etc/shadow" -chmod 644 "$AVOCADO_EXT_SYSROOTS/{}/etc/group" -echo "Set proper permissions on authentication files""#, - self.extension, self.extension, self.extension, self.extension, self.extension, self.extension - )); - } - - script_lines.join("") + let etc_dir = format!("$AVOCADO_EXT_SYSROOTS/{}/etc", self.extension); + render_users_groups_script( + users_config, + groups_config, + &etc_dir, + Some("$AVOCADO_PREFIX/rootfs/etc"), + ) } /// Run the extension's `post_build` script inside the SDK container. @@ -3022,8 +2607,10 @@ mod tests { // Verify the users script section contains the expected commands assert!(script.contains("# Copy and manage user authentication files")); - assert!(script - .contains("Copying /etc/passwd, /etc/shadow, and /etc/group from rootfs to extension")); + assert!(script.contains( + "Copying /etc/passwd, /etc/shadow, and /etc/group from \ + $AVOCADO_PREFIX/rootfs/etc to $AVOCADO_EXT_SYSROOTS/avocado-dev/etc" + )); assert!(script.contains("mkdir -p \"$AVOCADO_EXT_SYSROOTS/avocado-dev/etc\"")); assert!(script.contains("cp \"$AVOCADO_PREFIX/rootfs/etc/passwd\" \"$AVOCADO_EXT_SYSROOTS/avocado-dev/etc/passwd\"")); assert!(script.contains("cp \"$AVOCADO_PREFIX/rootfs/etc/shadow\" \"$AVOCADO_EXT_SYSROOTS/avocado-dev/etc/shadow\"")); diff --git a/src/commands/initramfs/image.rs b/src/commands/initramfs/image.rs index a9a291e0..b256bdac 100644 --- a/src/commands/initramfs/image.rs +++ b/src/commands/initramfs/image.rs @@ -11,6 +11,7 @@ use crate::utils::{ host_copy::copy_volume_path_to_host, kab_wrap::generate_kab_wrap_script, output::{print_error, print_info, print_success, OutputLevel}, + permissions::{mapping_from_hashmap, render_users_groups_script}, runs_on::RunsOnContext, target::resolve_target_required, }; @@ -62,6 +63,7 @@ pub fn generate_initramfs_build_script( namespace_uuid: &str, initramfs_filesystem: &str, post_install: Option<&str>, + permissions_section: &str, ) -> String { let post = resolve_install_hooks(post_install, DEFAULT_INITRAMFS_POST_INSTALL); let post_install_block = render_hook_block("post_install", &post); @@ -80,6 +82,7 @@ if [ -d "$INITRAMFS_SYSROOT/usr" ]; then mkdir -p "$(dirname "$INITRAMFS_WORK")" rm -rf "$INITRAMFS_WORK" cp -a "$INITRAMFS_SYSROOT" "$INITRAMFS_WORK" +{permissions_section} {post_install_block} @@ -130,6 +133,7 @@ fi"#, namespace_uuid = namespace_uuid, initramfs_filesystem = initramfs_filesystem, post_install_block = post_install_block, + permissions_section = permissions_section, ) } @@ -216,10 +220,25 @@ impl InitramfsImageCommand { let initramfs_filesystem = config.get_initramfs_filesystem(); let initramfs_node = composed.merged_value.get("initramfs"); let post_install = get_post_install(initramfs_node); + let permissions_section = config + .initramfs_default() + .and_then(|img| config.resolve_image_permissions(img)) + .map(|p| { + let users = mapping_from_hashmap(p.users.as_ref()); + let groups = mapping_from_hashmap(p.groups.as_ref()); + render_users_groups_script( + users.as_ref(), + groups.as_ref(), + "$INITRAMFS_WORK/etc", + None, + ) + }) + .unwrap_or_default(); let build_section = generate_initramfs_build_script( NAMESPACE_UUID, &initramfs_filesystem, post_install.as_deref(), + &permissions_section, ); // Same kab-wrap pipeline as rootfs/image.rs — see comments diff --git a/src/commands/rootfs/image.rs b/src/commands/rootfs/image.rs index 9241b721..661dd9ba 100644 --- a/src/commands/rootfs/image.rs +++ b/src/commands/rootfs/image.rs @@ -11,6 +11,7 @@ use crate::utils::{ host_copy::copy_volume_path_to_host, kab_wrap::generate_kab_wrap_script, output::{print_error, print_info, print_success, OutputLevel}, + permissions::{mapping_from_hashmap, render_users_groups_script}, runs_on::RunsOnContext, target::resolve_target_required, }; @@ -119,6 +120,7 @@ pub fn generate_rootfs_build_script( namespace_uuid: &str, rootfs_filesystem: &str, post_install: Option<&str>, + permissions_section: &str, ) -> String { let post = resolve_install_hooks(post_install, DEFAULT_ROOTFS_POST_INSTALL); let post_install_block = render_hook_block("post_install", &post); @@ -138,6 +140,7 @@ if [ -d "$ROOTFS_SYSROOT/usr" ]; then mkdir -p "$(dirname "$ROOTFS_WORK")" rm -rf "$ROOTFS_WORK" cp -a "$ROOTFS_SYSROOT" "$ROOTFS_WORK" +{permissions_section} {post_install_block} @@ -199,6 +202,7 @@ fi"#, namespace_uuid = namespace_uuid, rootfs_filesystem = rootfs_filesystem, post_install_block = post_install_block, + permissions_section = permissions_section, ) } @@ -285,10 +289,25 @@ impl RootfsImageCommand { let rootfs_filesystem = config.get_rootfs_filesystem(); let rootfs_node = composed.merged_value.get("rootfs"); let post_install = get_post_install(rootfs_node); + let permissions_section = config + .rootfs_default() + .and_then(|img| config.resolve_image_permissions(img)) + .map(|p| { + let users = mapping_from_hashmap(p.users.as_ref()); + let groups = mapping_from_hashmap(p.groups.as_ref()); + render_users_groups_script( + users.as_ref(), + groups.as_ref(), + "$ROOTFS_WORK/etc", + None, + ) + }) + .unwrap_or_default(); let build_section = generate_rootfs_build_script( NAMESPACE_UUID, &rootfs_filesystem, post_install.as_deref(), + &permissions_section, ); // If the avocado.yaml asks for a kab-wrapped rootfs, validate the diff --git a/src/commands/runtime/build.rs b/src/commands/runtime/build.rs index 9de590ed..51da4098 100644 --- a/src/commands/runtime/build.rs +++ b/src/commands/runtime/build.rs @@ -3,9 +3,10 @@ use crate::commands::rootfs::image::{generate_rootfs_build_script, NAMESPACE_UUI use crate::commands::sdk::SdkCompileCommand; use crate::utils::config::get_post_install; use crate::utils::{ - config::{ComposedConfig, Config}, + config::{ComposedConfig, Config, ImageConfig}, container::{RunConfig, SdkContainer, TuiContext}, output::{print_error, print_info, print_success, OutputLevel}, + permissions::{mapping_from_hashmap, render_users_groups_script}, runs_on::RunsOnContext, stamps::{ compute_runtime_input_hash, generate_batch_read_stamps_script, generate_write_stamp_script, @@ -2220,18 +2221,43 @@ echo "Docker image priming complete.""#, } }; + // Helper closure: given the rootfs/initramfs ImageConfig the runtime + // resolves to, render the users/groups script that will edit the + // image's work dir /etc/{passwd,shadow,group} in place. Returns an + // empty string when no permissions are configured on the image — + // the base packages (avocado-pkg-rootfs / avocado-pkg-initramfs) + // ship a generic passwd/shadow/group that we leave untouched. + let render_perms = |image: Option<&ImageConfig>, etc_dir: &str| -> String { + let Some(perms) = image.and_then(|img| config.resolve_image_permissions(img)) else { + return String::new(); + }; + let users = mapping_from_hashmap(perms.users.as_ref()); + let groups = mapping_from_hashmap(perms.groups.as_ref()); + render_users_groups_script(users.as_ref(), groups.as_ref(), etc_dir, None) + }; + let rootfs_post_install = get_post_install(parsed.get("rootfs")); + let rootfs_permissions_section = render_perms( + config.resolve_runtime_rootfs(&self.runtime_name), + "$ROOTFS_WORK/etc", + ); let rootfs_build_section = generate_rootfs_build_script( NAMESPACE_UUID, &config.get_rootfs_filesystem(), rootfs_post_install.as_deref(), + &rootfs_permissions_section, ); let initramfs_post_install = get_post_install(parsed.get("initramfs")); + let initramfs_permissions_section = render_perms( + config.resolve_runtime_initramfs(&self.runtime_name), + "$INITRAMFS_WORK/etc", + ); let initramfs_build_section = generate_initramfs_build_script( NAMESPACE_UUID, &config.get_initramfs_filesystem(), initramfs_post_install.as_deref(), + &initramfs_permissions_section, ); let script = format!( diff --git a/src/utils/config.rs b/src/utils/config.rs index c5bf169a..c46c002b 100644 --- a/src/utils/config.rs +++ b/src/utils/config.rs @@ -154,6 +154,7 @@ where "overlay", "image", "post_install", + "permissions", ], "rootfs", ) @@ -175,11 +176,25 @@ where "overlay", "image", "post_install", + "permissions", ], "initramfs", ) } +/// Custom deserializer for top-level `permissions:` field. Accepts either a +/// singleton form (`permissions: { users: ..., groups: ... }` — synthesized +/// as the implicit `default` entry) or a named-map form +/// (`permissions: { main: { users: ... } }`). +fn deserialize_permissions_map<'de, D>( + deserializer: D, +) -> Result>, D::Error> +where + D: serde::Deserializer<'de>, +{ + named_or_single_deserializer::deserialize(deserializer, &["users", "groups"], "permissions") +} + /// Custom deserializer module for container_args mod container_args_deserializer { use serde::{Deserialize, Deserializer}; @@ -596,6 +611,16 @@ pub enum ImageRef { Inline(Box), } +/// A permissions reference on a rootfs/initramfs image: either a name +/// pointing at a top-level `permissions.` entry, or an inline +/// anonymous block. Same untagged shape as [`KernelRef`] / [`ImageRef`]. +#[derive(Debug, Clone, Deserialize, Serialize)] +#[serde(untagged)] +pub enum PermissionsRef { + Named(String), + Inline(Box), +} + /// Runtime configuration section #[derive(Debug, Clone, Deserialize, Serialize)] pub struct RuntimeConfig { @@ -746,6 +771,22 @@ pub struct ImageConfig { /// the defaults run. #[serde(skip_serializing_if = "Option::is_none")] pub post_install: Option, + /// Optional permissions reference. Either a string name pointing at a + /// top-level `permissions.` entry, or an inline `PermissionsConfig`. + /// When present, users/groups are provisioned into this image's work + /// directory (`/etc/passwd`, `/etc/shadow`, `/etc/group`) during build. + #[serde(skip_serializing_if = "Option::is_none")] + pub permissions: Option, +} + +/// Permissions block: users and groups to provision into a built image +/// (rootfs or initramfs). Values are kept as raw YAML so the existing +/// dynamic field parser in [`crate::utils::permissions`] can consume them +/// without re-typing every shadow attribute. +#[derive(Debug, Clone, Deserialize, Serialize, Default)] +pub struct PermissionsConfig { + pub users: Option>, + pub groups: Option>, } /// Provision profile configuration @@ -1315,6 +1356,14 @@ pub struct Config { /// applied across runtimes that don't pin their own. #[serde(default, deserialize_with = "deserialize_kernels")] pub kernel: Option>, + /// Top-level permissions definition(s). Accepts either a singleton + /// `PermissionsConfig` (synthesized as the implicit `default` entry) or a + /// `name → PermissionsConfig` map. Referenced by name from rootfs or + /// initramfs entries via their `permissions:` field. Users/groups + /// declared here are baked into the corresponding image's + /// `/etc/passwd|shadow|group` during build. + #[serde(default, deserialize_with = "deserialize_permissions_map")] + pub permissions: Option>, } impl Config { @@ -1776,6 +1825,7 @@ impl Config { signing_keys: None, connect: None, kernel: None, + permissions: None, }); // Resolve target: CLI arg > env var > config default @@ -3389,9 +3439,65 @@ impl Config { } } } + + // Validate permissions refs on top-level rootfs/initramfs entries. + let check_perms = + |owner_kind: &str, entries: Option<&HashMap>| -> Result<()> { + let Some(entries) = entries else { + return Ok(()); + }; + for (entry_name, img) in entries { + let Some(PermissionsRef::Named(pname)) = img.permissions.as_ref() else { + continue; + }; + let map = self.permissions.as_ref(); + if map.is_none_or(|m| !m.contains_key(pname)) { + let available = map + .map(|m| { + let mut names: Vec<&str> = m.keys().map(|s| s.as_str()).collect(); + names.sort_unstable(); + names.join(", ") + }) + .unwrap_or_default(); + return Err(if available.is_empty() { + anyhow::anyhow!( + "{owner_kind} '{entry_name}' references permissions '{pname}', \ + but no top-level `permissions:` map is defined in avocado.yaml" + ) + } else { + anyhow::anyhow!( + "{owner_kind} '{entry_name}' references permissions '{pname}', \ + which is not defined in the top-level `permissions:` map. \ + Available: {available}" + ) + }); + } + } + Ok(()) + }; + check_perms("rootfs", self.rootfs.as_ref())?; + check_perms("initramfs", self.initramfs.as_ref())?; + Ok(()) } + /// Resolve an [`ImageConfig`]'s `permissions:` reference to a borrowed + /// [`PermissionsConfig`]. Returns the inline body for `Inline(_)`, looks + /// up the top-level map for `Named(_)`, and `None` when no permissions + /// are configured on the image. Assumes `validate_runtime_refs` has + /// already run — an unresolved named ref returns `None` here rather + /// than erroring (callers in build paths treat absence as "no + /// permissions to apply"). + pub fn resolve_image_permissions<'a>( + &'a self, + image: &'a ImageConfig, + ) -> Option<&'a PermissionsConfig> { + match image.permissions.as_ref()? { + PermissionsRef::Inline(b) => Some(b.as_ref()), + PermissionsRef::Named(name) => self.permissions.as_ref()?.get(name), + } + } + /// Load configuration from a YAML string /// Used primarily in tests #[allow(dead_code)] @@ -10937,6 +11043,143 @@ default_target: qemux86-64 assert!(config.runtimes.is_none()); } + // --- permissions: top-level + per-image ref tests --- + + #[test] + fn test_permissions_singleton_form_synthesizes_default() { + let yaml = r#" +permissions: + users: + root: + password: "" +"#; + let config = Config::load_from_yaml_str(yaml).unwrap(); + let perms = config.permissions.as_ref().expect("permissions parsed"); + assert_eq!(perms.len(), 1); + assert!(perms.contains_key("default")); + let entry = perms.get("default").unwrap(); + let users = entry.users.as_ref().expect("users present"); + assert!(users.contains_key("root")); + } + + #[test] + fn test_permissions_named_map_form_with_multiple_entries() { + let yaml = r#" +permissions: + main: + users: + root: + password: "" + service: + users: + avocado: + uid: 1000 +"#; + let config = Config::load_from_yaml_str(yaml).unwrap(); + let perms = config.permissions.as_ref().unwrap(); + assert_eq!(perms.len(), 2); + assert!(perms.contains_key("main")); + assert!(perms.contains_key("service")); + } + + #[test] + fn test_image_permissions_named_ref_resolves() { + let yaml = r#" +permissions: + main: + users: + root: + password: "" +rootfs: + default: + packages: { avocado-pkg-rootfs: "*" } + permissions: main +"#; + let config = Config::load_from_yaml_str(yaml).unwrap(); + let rootfs = config.rootfs_default().expect("rootfs default present"); + let resolved = config + .resolve_image_permissions(rootfs) + .expect("permissions resolved"); + assert!(resolved.users.as_ref().unwrap().contains_key("root")); + } + + #[test] + fn test_image_permissions_inline_form() { + let yaml = r#" +rootfs: + default: + packages: { avocado-pkg-rootfs: "*" } + permissions: + users: + root: + password: "" +"#; + let config = Config::load_from_yaml_str(yaml).unwrap(); + let rootfs = config.rootfs_default().expect("rootfs default present"); + let resolved = config + .resolve_image_permissions(rootfs) + .expect("inline permissions resolved"); + assert!(resolved.users.as_ref().unwrap().contains_key("root")); + } + + #[test] + fn test_validate_runtime_refs_rejects_unresolved_rootfs_permissions() { + let yaml = r#" +permissions: + main: + users: + root: + password: "" +rootfs: + base: + packages: { avocado-pkg-rootfs: "*" } + permissions: nope +runtimes: + prod: + rootfs: base +"#; + let err = Config::load_from_yaml_str(yaml).unwrap_err().to_string(); + assert!( + err.contains("rootfs 'base'") + && err.contains("'nope'") + && err.contains("Available: main"), + "expected unresolved-permissions-ref error mentioning the rootfs entry; got: {err}" + ); + } + + #[test] + fn test_validate_runtime_refs_rejects_unresolved_initramfs_permissions() { + let yaml = r#" +initramfs: + base: + packages: { avocado-pkg-initramfs: "*" } + permissions: ghost +runtimes: + prod: + initramfs: base +"#; + let err = Config::load_from_yaml_str(yaml).unwrap_err().to_string(); + assert!( + err.contains("initramfs 'base'") + && err.contains("'ghost'") + && err.contains("no top-level"), + "got: {err}" + ); + } + + #[test] + fn test_image_permissions_absent_is_none() { + let yaml = r#" +rootfs: + default: + packages: { avocado-pkg-rootfs: "*" } +"#; + let config = Config::load_from_yaml_str(yaml).unwrap(); + let rootfs = config.rootfs_default().unwrap(); + assert!(rootfs.permissions.is_none()); + assert!(config.resolve_image_permissions(rootfs).is_none()); + } + #[test] fn test_validate_runtime_refs_rejects_unresolved_initramfs_ref() { let yaml = r#" diff --git a/src/utils/mod.rs b/src/utils/mod.rs index ebd176f2..bd54339b 100644 --- a/src/utils/mod.rs +++ b/src/utils/mod.rs @@ -16,6 +16,7 @@ pub mod lockfile; pub mod nfs_server; pub mod output; pub mod output_format; +pub mod permissions; pub mod pkcs11_devices; pub mod prerequisites; pub mod provision_result; diff --git a/src/utils/permissions.rs b/src/utils/permissions.rs new file mode 100644 index 00000000..cc746679 --- /dev/null +++ b/src/utils/permissions.rs @@ -0,0 +1,476 @@ +//! Shared shell-script generator for baking users and groups into an +//! image's `/etc/passwd`, `/etc/shadow`, and `/etc/group`. +//! +//! Used by: +//! - Extension builds (`ext build`) — legacy path, copies passwd/shadow/group +//! from `$AVOCADO_PREFIX/rootfs/etc/` into the extension sysroot, then +//! adds users/groups. Will be removed once the deprecation period ends. +//! - Rootfs / initramfs builds (`runtime build`) — new path, edits the +//! files in the image's work directory in place (the base-passwd / shadow +//! packages have already staged them there). +//! +//! The function consumes raw `serde_yaml::Mapping`s for users/groups so the +//! existing dynamic field handling (uid/gid/gecos/shell/home/groups/shadow +//! attributes) keeps working without re-typing every field. + +use serde_yaml::Mapping; + +/// Render the shell-script section that creates/updates users and groups +/// inside `etc_dir`. +/// +/// * `users` — the `users:` mapping (username → attribute map), or `None`. +/// * `groups` — the `groups:` mapping (groupname → attribute map), or `None`. +/// * `etc_dir` — shell expression pointing at the target `/etc` directory. +/// Examples: `"$AVOCADO_EXT_SYSROOTS/myext/etc"`, `"$ROOTFS_WORK/etc"`. +/// Embedded verbatim into the script — the caller is responsible for +/// ensuring it resolves correctly at script-run time. +/// * `copy_from` — when `Some(dir)`, the script begins by copying +/// `passwd`, `shadow`, `group` from `dir` into `etc_dir`. When `None`, +/// the files are assumed to already exist at `etc_dir` (the package +/// install staged them). +/// +/// Returns an empty string when both `users` and `groups` are `None`. +pub fn render_users_groups_script( + users: Option<&Mapping>, + groups: Option<&Mapping>, + etc_dir: &str, + copy_from: Option<&str>, +) -> String { + if users.is_none() && groups.is_none() { + return String::new(); + } + + let mut script_lines = Vec::new(); + let mut has_valid_users = false; + script_lines.push("\n# Copy and manage user authentication files".to_string()); + + // Optional copy of base passwd/shadow/group from a source dir + // (e.g. the rootfs sysroot's /etc) into the target /etc. + if let Some(src) = copy_from { + script_lines.push(format!( + r#" +# Copy authentication files into target /etc +echo "Copying /etc/passwd, /etc/shadow, and /etc/group from {src} to {etc_dir}" +mkdir -p "{etc_dir}" +cp "{src}/passwd" "{etc_dir}/passwd" +cp "{src}/shadow" "{etc_dir}/shadow" +cp "{src}/group" "{etc_dir}/group" +"# + )); + } + + // Auto-incrementing counters for uid/gid starting at 1000 + script_lines.push( + "# Auto-incrementing counters for uid/gid\nCURRENT_UID=1000\nCURRENT_GID=1000\n" + .to_string(), + ); + + // Process groups first (they might be referenced by users) + if let Some(groups) = groups { + script_lines.push("\n# Create groups".to_string()); + + for (groupname_val, group_config) in groups { + let groupname = match groupname_val.as_str() { + Some(name) => name, + None => continue, + }; + + if let Some(group_table) = group_config.as_mapping() { + let gid = if let Some(gid_value) = group_table.get("gid") { + if let Some(gid_num) = gid_value.as_i64() { + gid_num.to_string() + } else if let Some(gid_num) = gid_value.as_u64() { + gid_num.to_string() + } else { + "$CURRENT_GID".to_string() + } + } else { + "$CURRENT_GID".to_string() + }; + + let system_group = group_table + .get("system") + .and_then(|s| s.as_bool()) + .unwrap_or(false); + + let password = group_table + .get("password") + .and_then(|p| p.as_str()) + .unwrap_or(""); + + let members = if let Some(members_value) = group_table.get("members") { + if let Some(members_array) = members_value.as_sequence() { + members_array + .iter() + .filter_map(|m| m.as_str()) + .collect::>() + .join(",") + } else { + String::new() + } + } else { + String::new() + }; + + let system_type = if system_group { " (system group)" } else { "" }; + let password_note = if !password.is_empty() { + " with password" + } else { + "" + }; + let members_msg = if !members.is_empty() { + format!(" and members: {members}") + } else { + String::new() + }; + let password_config = if !password.is_empty() { + format!("\n# Set group password for '{groupname}'\necho \"Note: Group password configured for '{groupname}'\"") + } else { + String::new() + }; + + script_lines.push(format!( + r#" +# Create group '{groupname}'{system_type} +echo "Creating group '{groupname}'"{password_note} +if ! grep -q "^{groupname}:" "{etc_dir}/group"; then + echo "{groupname}:x:{gid}:{members}" >> "{etc_dir}/group" + echo "Group '{groupname}' created with GID {gid}{members_msg}" + if [ "{gid}" = "$CURRENT_GID" ]; then + CURRENT_GID=$((CURRENT_GID + 1)) + fi +else + echo "Group '{groupname}' already exists, updating members" + if [ -n "{members}" ]; then + sed -i "s|^{groupname}:x:{gid}:.*$|{groupname}:x:{gid}:{members}|" "{etc_dir}/group" + echo "Updated members for group '{groupname}'" + fi +fi{password_config}"# + )); + } else { + // Simple group with just GID auto-assignment + script_lines.push(format!( + r#" +# Create group '{groupname}' +echo "Creating group '{groupname}'" +if ! grep -q "^{groupname}:" "{etc_dir}/group"; then + echo "{groupname}:x:$CURRENT_GID:" >> "{etc_dir}/group" + echo "Group '{groupname}' created with GID $CURRENT_GID" + CURRENT_GID=$((CURRENT_GID + 1)) +else + echo "Group '{groupname}' already exists" +fi"# + )); + } + } + } + + // Process users + if let Some(users) = users { + let mut user_script_lines = Vec::new(); + + for (username_val, user_config) in users { + let username = match username_val.as_str() { + Some(name) => name, + None => continue, + }; + + if let Some(user_table) = user_config.as_mapping() { + let password = user_table + .get("password") + .and_then(|p| p.as_str()) + .unwrap_or("*"); + + has_valid_users = true; + + let uid = if let Some(uid_value) = user_table.get("uid") { + if let Some(uid_num) = uid_value.as_i64() { + uid_num.to_string() + } else { + "$CURRENT_UID".to_string() + } + } else { + "$CURRENT_UID".to_string() + }; + + let gid = if let Some(gid_value) = user_table.get("gid") { + if let Some(gid_num) = gid_value.as_i64() { + gid_num.to_string() + } else { + "$CURRENT_UID".to_string() + } + } else { + "$CURRENT_UID".to_string() + }; + + let gecos = user_table + .get("gecos") + .and_then(|g| g.as_str()) + .unwrap_or(username); + + let default_home = format!("/home/{username}"); + let home = user_table + .get("home") + .and_then(|h| h.as_str()) + .unwrap_or(&default_home); + + let shell = user_table + .get("shell") + .and_then(|s| s.as_str()) + .unwrap_or("/bin/sh"); + + let groups_list = if let Some(groups_value) = user_table.get("groups") { + if let Some(groups_array) = groups_value.as_sequence() { + groups_array + .iter() + .filter_map(|g| g.as_str()) + .map(|s| s.to_string()) + .collect::>() + } else { + vec![username.to_string()] + } + } else { + vec![username.to_string()] + }; + + let last_change = user_table + .get("last_change") + .and_then(|l| l.as_i64()) + .unwrap_or(19000); + + let min_days = user_table + .get("min_days") + .and_then(|m| m.as_i64()) + .unwrap_or(0); + + let max_days = user_table + .get("max_days") + .and_then(|m| m.as_i64()) + .unwrap_or(99999); + + let warn_days = user_table + .get("warn_days") + .and_then(|w| w.as_i64()) + .unwrap_or(7); + + let inactive_days = user_table + .get("inactive_days") + .and_then(|i| i.as_i64()) + .map(|i| i.to_string()) + .unwrap_or_default(); + + let expire_date = user_table + .get("expire_date") + .and_then(|e| e.as_i64()) + .map(|e| e.to_string()) + .unwrap_or_default(); + + let disabled = user_table + .get("disabled") + .and_then(|d| d.as_bool()) + .unwrap_or(false); + + let system_user = user_table + .get("system") + .and_then(|s| s.as_bool()) + .unwrap_or(false); + + // We use | as sed delimiter to avoid conflicts with / in + // password hashes; we still need to escape the chars that + // are special inside a sed replacement string itself. + let escaped_password = password + .replace("\\", "\\\\") + .replace("&", "\\&") + .replace("$", "\\$"); + + let system_label = if system_user { " (system user)" } else { "" }; + let warning_message = if password.is_empty() { + format!("\necho \"[WARNING] User '{username}' will be able to login with NO PASSWORD\"") + } else { + String::new() + }; + let disabled_note = if disabled { + "\necho \"Note: User account is marked as disabled\"" + } else { + "" + }; + + // Create user in passwd file + user_script_lines.push(format!( + r#" +# Create user '{username}' +echo "Creating user '{username}'{system_label}"{warning_message} +if ! grep -q "^{username}:" "{etc_dir}/passwd"; then + echo "{username}:x:{uid}:{gid}:{gecos}:{home}:{shell}" >> "{etc_dir}/passwd" + echo "User '{username}' created with UID {uid}, GID {gid}, home '{home}', shell '{shell}'" + + if [ "{uid}" = "$CURRENT_UID" ]; then + CURRENT_UID=$((CURRENT_UID + 1)) + fi +else + echo "User '{username}' already exists, updating attributes" +fi"# + )); + + // Create/update user in shadow file with comprehensive attributes + user_script_lines.push(format!( + r#" +# Set password and shadow attributes for user '{username}' +echo "Setting password and aging policy for user '{username}'" +if grep -q "^{username}:" "{etc_dir}/shadow"; then + sed -i "s|^{username}:.*$|{username}:{escaped_password}:{last_change}:{min_days}:{max_days}:{warn_days}:{inactive_days}:{expire_date}:|" "{etc_dir}/shadow" + echo "Updated shadow entry for existing user '{username}'" +else + echo "{username}:{escaped_password}:{last_change}:{min_days}:{max_days}:{warn_days}:{inactive_days}:{expire_date}:" >> "{etc_dir}/shadow" + echo "Added new user '{username}' to shadow file" +fi{disabled_note}"# + )); + + // Add user to additional groups if specified + if groups_list.len() > 1 { + user_script_lines.push(format!( + r#" +# Add user '{username}' to additional groups"# + )); + + for group in &groups_list[1..] { + user_script_lines.push(format!( + r#" +if grep -q "^{group}:" "{etc_dir}/group"; then + if ! grep "^{group}:" "{etc_dir}/group" | grep -q "{username}"; then + sed -i "s|^{group}:\([^:]*\):\([^:]*\):\(.*\)$|{group}:\1:\2:\3,{username}|" "{etc_dir}/group" + echo "Added user '{username}' to group '{group}'" + fi +else + echo "Warning: Group '{group}' not found, cannot add user '{username}'" +fi"# + )); + } + } + } + } + + if has_valid_users { + script_lines.push("\n# Create and configure users".to_string()); + script_lines.extend(user_script_lines); + } + } + + // Set proper permissions only if we processed any users or groups + if groups.is_some() || has_valid_users { + script_lines.push(format!( + r#" +# Set proper ownership and permissions for authentication files +chown root:root "{etc_dir}/passwd" "{etc_dir}/shadow" "{etc_dir}/group" +chmod 644 "{etc_dir}/passwd" +chmod 640 "{etc_dir}/shadow" +chmod 644 "{etc_dir}/group" +echo "Set proper permissions on authentication files""# + )); + } + + script_lines.join("") +} + +/// Convert an `Option<&HashMap>` (the shape +/// stored in [`crate::utils::config::PermissionsConfig`]) into an owned +/// `serde_yaml::Mapping` ref appropriate for [`render_users_groups_script`]. +/// +/// Returns `None` if the input is `None` or empty. +pub fn mapping_from_hashmap( + src: Option<&std::collections::HashMap>, +) -> Option { + let map = src?; + if map.is_empty() { + return None; + } + let mut out = Mapping::new(); + for (k, v) in map { + out.insert(serde_yaml::Value::String(k.clone()), v.clone()); + } + Some(out) +} + +#[cfg(test)] +mod tests { + use super::*; + + fn user(password: &str) -> serde_yaml::Value { + let mut m = Mapping::new(); + m.insert( + serde_yaml::Value::String("password".to_string()), + serde_yaml::Value::String(password.to_string()), + ); + serde_yaml::Value::Mapping(m) + } + + #[test] + fn empty_inputs_produce_empty_script() { + assert_eq!(render_users_groups_script(None, None, "/etc", None), ""); + } + + #[test] + fn target_etc_is_substituted_verbatim() { + let mut users = Mapping::new(); + users.insert(serde_yaml::Value::String("root".to_string()), user("")); + let script = render_users_groups_script(Some(&users), None, "$ROOTFS_WORK/etc", None); + assert!(script.contains("$ROOTFS_WORK/etc/passwd")); + assert!(script.contains("$ROOTFS_WORK/etc/shadow")); + assert!(script.contains("$ROOTFS_WORK/etc/group")); + // No copy preamble when copy_from is None. + assert!(!script.contains("cp \"")); + } + + #[test] + fn copy_from_emits_preamble() { + let mut users = Mapping::new(); + users.insert(serde_yaml::Value::String("root".to_string()), user("")); + let script = render_users_groups_script( + Some(&users), + None, + "$AVOCADO_EXT_SYSROOTS/myext/etc", + Some("$AVOCADO_PREFIX/rootfs/etc"), + ); + assert!(script.contains("cp \"$AVOCADO_PREFIX/rootfs/etc/passwd\"")); + assert!(script.contains("cp \"$AVOCADO_PREFIX/rootfs/etc/shadow\"")); + assert!(script.contains("cp \"$AVOCADO_PREFIX/rootfs/etc/group\"")); + assert!(script.contains("$AVOCADO_EXT_SYSROOTS/myext/etc")); + } + + #[test] + fn empty_password_emits_no_login_warning() { + let mut users = Mapping::new(); + users.insert(serde_yaml::Value::String("root".to_string()), user("")); + let script = render_users_groups_script(Some(&users), None, "/etc", None); + assert!(script.contains("[WARNING] User 'root' will be able to login with NO PASSWORD")); + } + + #[test] + fn hashed_password_does_not_warn() { + let mut users = Mapping::new(); + users.insert( + serde_yaml::Value::String("alice".to_string()), + user("$6$salt$hash"), + ); + let script = render_users_groups_script(Some(&users), None, "/etc", None); + assert!(!script.contains("[WARNING]")); + assert!(script.contains("alice:\\$6\\$salt\\$hash")); + } + + #[test] + fn groups_only_still_runs_chown() { + let mut groups = Mapping::new(); + let mut docker = Mapping::new(); + docker.insert( + serde_yaml::Value::String("gid".to_string()), + serde_yaml::Value::Number(999.into()), + ); + groups.insert( + serde_yaml::Value::String("docker".to_string()), + serde_yaml::Value::Mapping(docker), + ); + let script = render_users_groups_script(None, Some(&groups), "/etc", None); + assert!(script.contains("Creating group 'docker'")); + assert!(script.contains("chown root:root \"/etc/passwd\"")); + } +} diff --git a/src/utils/runtime.rs b/src/utils/runtime.rs index b01b5a64..019b0fd0 100644 --- a/src/utils/runtime.rs +++ b/src/utils/runtime.rs @@ -228,6 +228,7 @@ mod tests { rootfs: None, initramfs: None, kernel: None, + permissions: None, } } diff --git a/src/utils/target.rs b/src/utils/target.rs index 0e4cf051..e9969c57 100644 --- a/src/utils/target.rs +++ b/src/utils/target.rs @@ -255,6 +255,7 @@ mod tests { rootfs: None, initramfs: None, kernel: None, + permissions: None, } } @@ -277,6 +278,7 @@ mod tests { rootfs: None, initramfs: None, kernel: None, + permissions: None, } } @@ -299,6 +301,7 @@ mod tests { rootfs: None, initramfs: None, kernel: None, + permissions: None, } } diff --git a/tests/fixtures/configs/with-permissions.yaml b/tests/fixtures/configs/with-permissions.yaml new file mode 100644 index 00000000..b8458f64 --- /dev/null +++ b/tests/fixtures/configs/with-permissions.yaml @@ -0,0 +1,35 @@ +default_target: qemux86-64 +sdk: + image: ghcr.io/avocado-framework/avocado-sdk:latest +runtimes: + default: + target: x86_64-unknown-linux-gnu + +permissions: + main: + users: + root: + password: '' + avocado: + uid: 1000 + groups: + - avocado + password: $6$9YieAo4LtYEIqB6K$og/ykbnIiXP21yc6WHAVKkkIMNE5jaho8Ijj6zFo0UlOxWGpH9xrduFc0P9UYBtQXz2LrJjx7DK7/XAObLoqh0 + gecos: Avocado service user + home: /home/avocado + shell: /bin/bash + groups: + avocado: + gid: 1000 + +rootfs: + default: + packages: + avocado-pkg-rootfs: '*' + permissions: main + +initramfs: + default: + packages: + avocado-pkg-initramfs: '*' + permissions: main From b887f2298f455cfd7f2573e8f7f8c0cbf2cd812d Mon Sep 17 00:00:00 2001 From: Justin Schneck Date: Tue, 26 May 2026 20:23:21 -0400 Subject: [PATCH 02/30] stamps: split input hashes per build step to fix over-invalidation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Previously, ext install/build/image all shared a single `compute_ext_input_hash` and runtime install/build shared a single `compute_runtime_input_hash`. Editing a field that only affects the build (e.g. ext `image:` kabtool args, `var_files:`, runtime `var:`, runtime `post_build:`) invalidated the install stamp too, which cascaded into the install step being re-run via the dependency chain. Per-step hash functions now cover exactly what each step uses: ext install -> packages, types, source ext build -> install inputs + image, overlay, post_build (path + content) ext image -> build inputs + var_files, subvolumes, filesystem runtime install -> packages, target runtime build -> install inputs + narrowed kernel, var, var_files, post_build (path + content), rootfs/initramfs filesystem, ext docker_images sdk install -> sdk.packages/image/repo_url/repo_release (no longer includes rootfs/initramfs.packages — those have their own install stamps) rootfs install -> rootfs.packages, rootfs.overlay, narrowed kernel, post_install (path + content) initramfs install -> same shape as rootfs The `kernel:` block is now hashed via a narrow {package, version, compile, install} mapping at every call site, so cosmetic edits (metadata, new fields) don't invalidate stamps that don't actually consume them. The `post_build` / `post_install` hooks now hash script *contents* in addition to the path, so editing the script body invalidates the stamp without `--no-stamps`. `validate_stamps_batch` now accepts a slice of (component, command, hash) triples so each requirement is compared against the matching step's hash instead of one shared hash applied to all stamps for a component. STAMP_VERSION bumped 1 -> 2; older stamps invalidate on first run after upgrade, then the new narrower hashes apply going forward. Adds 14 negative-invalidation tests locking the new shape in place ("X must NOT invalidate Y" for each step+field pair we untangled). --- src/commands/build.rs | 3 +- src/commands/ext/build.rs | 39 +- src/commands/ext/checkout.rs | 10 +- src/commands/ext/clean.rs | 3 +- src/commands/ext/image.rs | 52 +- src/commands/ext/install.rs | 4 +- src/commands/hitl/server.rs | 12 +- src/commands/rootfs/install.rs | 4 +- src/commands/runtime/build.rs | 38 +- src/commands/runtime/clean.rs | 6 +- src/commands/runtime/deploy.rs | 3 +- src/commands/runtime/install.rs | 4 +- src/commands/runtime/provision.rs | 2 +- src/commands/runtime/sign.rs | 3 +- src/commands/sdk/clean.rs | 2 +- src/commands/sdk/compile.rs | 10 +- src/commands/sdk/package.rs | 2 +- src/utils/config.rs | 14 + src/utils/prerequisites.rs | 2 +- src/utils/stamps.rs | 893 +++++++++++++++++++++++++----- 20 files changed, 895 insertions(+), 211 deletions(-) diff --git a/src/commands/build.rs b/src/commands/build.rs index 30cfde0f..c7af3269 100644 --- a/src/commands/build.rs +++ b/src/commands/build.rs @@ -197,8 +197,7 @@ impl BuildCommand { let output = container_helper .run_in_container_with_output(run_config) .await?; - let validation = - validate_stamps_batch(&required, output.as_deref().unwrap_or(""), None); + let validation = validate_stamps_batch(&required, output.as_deref().unwrap_or(""), &[]); if !validation.is_satisfied() { let error = diff --git a/src/commands/ext/build.rs b/src/commands/ext/build.rs index 4346645d..efffaced 100644 --- a/src/commands/ext/build.rs +++ b/src/commands/ext/build.rs @@ -10,9 +10,9 @@ use crate::utils::lockfile::LockFile; use crate::utils::output::{print_error, print_info, print_success, print_warning, OutputLevel}; use crate::utils::permissions::render_users_groups_script; use crate::utils::stamps::{ - compute_ext_input_hash, generate_batch_read_stamps_script, generate_write_stamp_script, - resolve_required_stamps, validate_stamps_batch, Stamp, StampCommand, StampComponent, - StampOutputs, + compute_ext_build_input_hash, compute_ext_install_input_hash, + generate_batch_read_stamps_script, generate_write_stamp_script, resolve_required_stamps, + validate_stamps_batch, Stamp, StampCommand, StampComponent, StampOutputs, }; use crate::utils::target::resolve_target_required; use crate::utils::tui::{TaskId, TuiGuard}; @@ -266,17 +266,22 @@ impl ExtBuildCommand { .run_in_container_with_output(run_config) .await?; - // Compute current inputs from composed config for staleness detection. - // This ensures that changes to path-based extension packages are detected. - // Only compare against Extension stamps — SDK/compile-deps stamps use their own hash. - let current_inputs = compute_ext_input_hash(parsed, &self.extension).ok(); - let validation = validate_stamps_batch( - &required, - output.as_deref().unwrap_or(""), - current_inputs - .as_ref() - .map(|i| (&StampComponent::Extension, i)), - ); + // Compute step-scoped current inputs for staleness detection. + // Install + build stamps have different input hashes — each requirement + // is matched against the entry for its (component, command) pair. + let project_root = config.project_root(&self.config_path); + let install_inputs = compute_ext_install_input_hash(parsed, &self.extension).ok(); + let build_inputs = + compute_ext_build_input_hash(parsed, &self.extension, &project_root).ok(); + let mut current_inputs: Vec> = Vec::new(); + if let Some(ref i) = install_inputs { + current_inputs.push((StampComponent::Extension, StampCommand::Install, i)); + } + if let Some(ref i) = build_inputs { + current_inputs.push((StampComponent::Extension, StampCommand::Build, i)); + } + let validation = + validate_stamps_batch(&required, output.as_deref().unwrap_or(""), ¤t_inputs); if !validation.is_satisfied() { let err = @@ -706,7 +711,11 @@ impl ExtBuildCommand { // Use the composed/merged config (which includes remote extension configs) // rather than re-reading the raw local file, so that path-based extension // packages are included in the hash for proper staleness detection. - let inputs = compute_ext_input_hash(parsed, &self.extension)?; + let inputs = compute_ext_build_input_hash( + parsed, + &self.extension, + &config.project_root(&self.config_path), + )?; let outputs = StampOutputs::default(); let stamp = Stamp::ext_build(&self.extension, &target, inputs, outputs); let stamp_script = generate_write_stamp_script(&stamp)?; diff --git a/src/commands/ext/checkout.rs b/src/commands/ext/checkout.rs index 672a4942..b121ff28 100644 --- a/src/commands/ext/checkout.rs +++ b/src/commands/ext/checkout.rs @@ -136,7 +136,7 @@ impl ExtCheckoutCommand { .await?; let validation = - validate_stamps_batch(&requirements, output.as_deref().unwrap_or(""), None); + validate_stamps_batch(&requirements, output.as_deref().unwrap_or(""), &[]); if !validation.is_satisfied() { validation @@ -676,7 +676,7 @@ mod tests { install_json ); - let result = validate_stamps_batch(&requirements, &output, None); + let result = validate_stamps_batch(&requirements, &output, &[]); // Should pass without needing build stamp assert!(result.is_satisfied()); @@ -708,7 +708,7 @@ mod tests { sdk_json ); - let result = validate_stamps_batch(&requirements, &output, None); + let result = validate_stamps_batch(&requirements, &output, &[]); assert!(!result.is_satisfied()); assert_eq!(result.missing.len(), 1); @@ -752,7 +752,7 @@ mod tests { install_json ); - let result_before = validate_stamps_batch(&requirements, &output_before, None); + let result_before = validate_stamps_batch(&requirements, &output_before, &[]); assert!(result_before.is_satisfied(), "Should pass before clean"); // After ext clean: SDK still there, ext stamp gone @@ -762,7 +762,7 @@ mod tests { sdk_json ); - let result_after = validate_stamps_batch(&requirements, &output_after, None); + let result_after = validate_stamps_batch(&requirements, &output_after, &[]); assert!(!result_after.is_satisfied(), "Should fail after ext clean"); assert_eq!(result_after.missing.len(), 1); } diff --git a/src/commands/ext/clean.rs b/src/commands/ext/clean.rs index 9682d703..6fd13c7a 100644 --- a/src/commands/ext/clean.rs +++ b/src/commands/ext/clean.rs @@ -235,8 +235,7 @@ impl ExtCleanCommand { .run_in_container_with_output(run_config) .await?; - let validation = - validate_stamps_batch(&requirements, output.as_deref().unwrap_or(""), None); + let validation = validate_stamps_batch(&requirements, output.as_deref().unwrap_or(""), &[]); if !validation.is_satisfied() { validation diff --git a/src/commands/ext/image.rs b/src/commands/ext/image.rs index 8d0fb715..d16075c1 100644 --- a/src/commands/ext/image.rs +++ b/src/commands/ext/image.rs @@ -9,9 +9,9 @@ use crate::utils::container::{RunConfig, SdkContainer, TuiContext}; use crate::utils::lockfile::LockFile; use crate::utils::output::{print_info, print_success, print_warning, OutputLevel}; use crate::utils::stamps::{ - compute_ext_input_hash, compute_ext_input_hash_with_fs, generate_batch_read_stamps_script, - generate_write_stamp_script, resolve_required_stamps, validate_stamps_batch, Stamp, - StampCommand, StampComponent, StampOutputs, + compute_ext_build_input_hash, compute_ext_image_input_hash, compute_ext_install_input_hash, + generate_batch_read_stamps_script, generate_write_stamp_script, resolve_required_stamps, + validate_stamps_batch, CurrentInput, Stamp, StampCommand, StampComponent, StampOutputs, }; use crate::utils::target::resolve_target_required; use crate::utils::tui::{TaskId, TuiGuard}; @@ -267,17 +267,33 @@ impl ExtImageCommand { .run_in_container_with_output(run_config) .await?; - // Compute current inputs from composed config for staleness detection. - // Use the base hash (without filesystem) to match what install/build stamps wrote. - // The filesystem-aware hash is only used when writing/reading the image stamp itself. - let current_inputs = compute_ext_input_hash(parsed, &self.extension).ok(); - let validation = validate_stamps_batch( - &required, - output.as_deref().unwrap_or(""), - current_inputs - .as_ref() - .map(|i| (&StampComponent::Extension, i)), - ); + // Compute step-scoped current inputs. Each Extension stamp + // (install / build / image) has its own narrow hash; pass all + // three so validate_stamps_batch can match each requirement + // against the matching step's hash. + let project_root = config.project_root(&self.config_path); + let install_inputs = compute_ext_install_input_hash(parsed, &self.extension).ok(); + let build_inputs = + compute_ext_build_input_hash(parsed, &self.extension, &project_root).ok(); + let image_inputs = compute_ext_image_input_hash( + parsed, + &self.extension, + Some(effective_fs), + &project_root, + ) + .ok(); + let mut current_inputs: Vec> = Vec::new(); + if let Some(ref i) = install_inputs { + current_inputs.push((StampComponent::Extension, StampCommand::Install, i)); + } + if let Some(ref i) = build_inputs { + current_inputs.push((StampComponent::Extension, StampCommand::Build, i)); + } + if let Some(ref i) = image_inputs { + current_inputs.push((StampComponent::Extension, StampCommand::Image, i)); + } + let validation = + validate_stamps_batch(&required, output.as_deref().unwrap_or(""), ¤t_inputs); if !validation.is_satisfied() { validation @@ -631,8 +647,12 @@ impl ExtImageCommand { // Write extension image stamp (unless --no-stamps) if !self.no_stamps { - let inputs = - compute_ext_input_hash_with_fs(parsed, &self.extension, Some(filesystem))?; + let inputs = compute_ext_image_input_hash( + parsed, + &self.extension, + Some(filesystem), + &config.project_root(&self.config_path), + )?; let outputs = StampOutputs::default(); let stamp = Stamp::ext_image(&self.extension, &target, inputs, outputs); let stamp_script = generate_write_stamp_script(&stamp)?; diff --git a/src/commands/ext/install.rs b/src/commands/ext/install.rs index 790a485e..e025ba31 100644 --- a/src/commands/ext/install.rs +++ b/src/commands/ext/install.rs @@ -13,7 +13,7 @@ use crate::utils::lockfile::{build_package_spec_with_lock, LockFile, SysrootType use crate::utils::output::{print_debug, print_error, print_info, print_success, OutputLevel}; use crate::utils::runs_on::RunsOnContext; use crate::utils::stamps::{ - compute_ext_input_hash, generate_write_stamp_script, Stamp, StampOutputs, + compute_ext_install_input_hash, generate_write_stamp_script, Stamp, StampOutputs, }; use crate::utils::target::resolve_target_required; use crate::utils::tui::{TaskId, TuiGuard}; @@ -412,7 +412,7 @@ impl ExtInstallCommand { ctx.renderer .append_output(&ctx.task_id, "Writing install stamp...".to_string()); } - let inputs = compute_ext_input_hash(parsed, ext_name)?; + let inputs = compute_ext_install_input_hash(parsed, ext_name)?; let outputs = StampOutputs::default(); let stamp = Stamp::ext_install(ext_name, target, inputs, outputs); let stamp_script = generate_write_stamp_script(&stamp)?; diff --git a/src/commands/hitl/server.rs b/src/commands/hitl/server.rs index 3cbc3dc6..579ac439 100644 --- a/src/commands/hitl/server.rs +++ b/src/commands/hitl/server.rs @@ -130,7 +130,7 @@ impl HitlServerCommand { // Validate all stamps from batch output let validation = - validate_stamps_batch(&requirements, output.as_deref().unwrap_or(""), None); + validate_stamps_batch(&requirements, output.as_deref().unwrap_or(""), &[]); if !validation.is_satisfied() { validation @@ -526,7 +526,7 @@ mod tests { build_json ); - let result = validate_stamps_batch(&requirements, &output, None); + let result = validate_stamps_batch(&requirements, &output, &[]); assert!(result.is_satisfied()); } @@ -567,7 +567,7 @@ mod tests { install_json ); - let result = validate_stamps_batch(&requirements, &output, None); + let result = validate_stamps_batch(&requirements, &output, &[]); assert!(!result.is_satisfied()); assert_eq!(result.missing.len(), 1); assert_eq!(result.missing[0].relative_path(), "ext/app/build.stamp"); @@ -618,7 +618,7 @@ mod tests { build_json ); - let result_before = validate_stamps_batch(&requirements, &output_before, None); + let result_before = validate_stamps_batch(&requirements, &output_before, &[]); assert!(result_before.is_satisfied(), "Should pass before clean"); // After ext clean network-driver: SDK still there, ext stamps gone @@ -628,7 +628,7 @@ mod tests { sdk_json ); - let result_after = validate_stamps_batch(&requirements, &output_after, None); + let result_after = validate_stamps_batch(&requirements, &output_after, &[]); assert!(!result_after.is_satisfied(), "Should fail after ext clean"); assert_eq!( result_after.missing.len(), @@ -698,7 +698,7 @@ mod tests { ext_b_build_json ); - let result = validate_stamps_batch(&requirements, &output_partial, None); + let result = validate_stamps_batch(&requirements, &output_partial, &[]); assert!( !result.is_satisfied(), "Should fail when one extension is cleaned" diff --git a/src/commands/rootfs/install.rs b/src/commands/rootfs/install.rs index e7207f2f..5095a5dc 100644 --- a/src/commands/rootfs/install.rs +++ b/src/commands/rootfs/install.rs @@ -816,12 +816,12 @@ $DNF_SDK_HOST $DNF_SDK_TARGET_REPO_CONF \ if let Some(parsed) = params.parsed { let stamp_result = match params.sysroot_type { SysrootType::Rootfs => { - let inputs = compute_rootfs_input_hash(parsed)?; + let inputs = compute_rootfs_input_hash(parsed, params.src_dir)?; let outputs = StampOutputs::default(); Ok(Stamp::rootfs_install(params.target, inputs, outputs)) } SysrootType::Initramfs => { - let inputs = compute_initramfs_input_hash(parsed)?; + let inputs = compute_initramfs_input_hash(parsed, params.src_dir)?; let outputs = StampOutputs::default(); Ok(Stamp::initramfs_install(params.target, inputs, outputs)) } diff --git a/src/commands/runtime/build.rs b/src/commands/runtime/build.rs index 51da4098..b247653a 100644 --- a/src/commands/runtime/build.rs +++ b/src/commands/runtime/build.rs @@ -9,9 +9,10 @@ use crate::utils::{ permissions::{mapping_from_hashmap, render_users_groups_script}, runs_on::RunsOnContext, stamps::{ - compute_runtime_input_hash, generate_batch_read_stamps_script, generate_write_stamp_script, - resolve_required_stamps_for_runtime_build, validate_stamps_batch, Stamp, StampComponent, - StampOutputs, + compute_runtime_build_input_hash, compute_runtime_install_input_hash, + generate_batch_read_stamps_script, generate_write_stamp_script, + resolve_required_stamps_for_runtime_build, validate_stamps_batch, CurrentInput, Stamp, + StampCommand, StampComponent, StampOutputs, }, target::resolve_target_required, tui::{TaskId, TuiGuard}, @@ -271,16 +272,22 @@ impl RuntimeBuildCommand { .get_merged_runtime_config(&self.runtime_name, target_arch, &self.config_path) .ok() .flatten(); - let current_inputs = merged_runtime + let project_root = config.project_root(&self.config_path); + let install_inputs = merged_runtime .as_ref() - .and_then(|mr| compute_runtime_input_hash(mr, &self.runtime_name, parsed).ok()); - let validation = validate_stamps_batch( - &required, - output.as_deref().unwrap_or(""), - current_inputs - .as_ref() - .map(|i| (&StampComponent::Runtime, i)), - ); + .and_then(|mr| compute_runtime_install_input_hash(mr, &self.runtime_name).ok()); + let build_inputs = merged_runtime.as_ref().and_then(|mr| { + compute_runtime_build_input_hash(mr, &self.runtime_name, parsed, &project_root).ok() + }); + let mut current_inputs: Vec> = Vec::new(); + if let Some(ref i) = install_inputs { + current_inputs.push((StampComponent::Runtime, StampCommand::Install, i)); + } + if let Some(ref i) = build_inputs { + current_inputs.push((StampComponent::Runtime, StampCommand::Build, i)); + } + let validation = + validate_stamps_batch(&required, output.as_deref().unwrap_or(""), ¤t_inputs); if !validation.is_satisfied() { validation @@ -736,7 +743,12 @@ impl RuntimeBuildCommand { let merged_runtime = config .get_merged_runtime_config(&self.runtime_name, target_arch, &self.config_path)? .unwrap_or_default(); - let inputs = compute_runtime_input_hash(&merged_runtime, &self.runtime_name, parsed)?; + let inputs = compute_runtime_build_input_hash( + &merged_runtime, + &self.runtime_name, + parsed, + &config.project_root(&self.config_path), + )?; let outputs = StampOutputs::default(); let stamp = Stamp::runtime_build(&self.runtime_name, target_arch, inputs, outputs); let stamp_script = generate_write_stamp_script(&stamp)?; diff --git a/src/commands/runtime/clean.rs b/src/commands/runtime/clean.rs index 6b6bb0d0..c36129be 100644 --- a/src/commands/runtime/clean.rs +++ b/src/commands/runtime/clean.rs @@ -359,7 +359,7 @@ mod tests { rt_json ); - let result_before = validate_stamps_batch(&requirements, &output_before, None); + let result_before = validate_stamps_batch(&requirements, &output_before, &[]); assert!(result_before.is_satisfied()); // After runtime clean: SDK still there, runtime stamps gone @@ -369,7 +369,7 @@ mod tests { sdk_json ); - let result_after = validate_stamps_batch(&requirements, &output_after, None); + let result_after = validate_stamps_batch(&requirements, &output_after, &[]); assert!(!result_after.is_satisfied()); assert_eq!(result_after.missing.len(), 1); assert_eq!( @@ -434,7 +434,7 @@ mod tests { ext_build_json ); - let result = validate_stamps_batch(&requirements, &output_after, None); + let result = validate_stamps_batch(&requirements, &output_after, &[]); assert!(!result.is_satisfied()); // Only runtime stamp should be missing assert_eq!(result.satisfied.len(), 3); diff --git a/src/commands/runtime/deploy.rs b/src/commands/runtime/deploy.rs index 8e08b1f4..b234c75c 100644 --- a/src/commands/runtime/deploy.rs +++ b/src/commands/runtime/deploy.rs @@ -270,8 +270,7 @@ impl RuntimeDeployCommand { } }; - let validation = - validate_stamps_batch(&required, output.as_deref().unwrap_or(""), None); + let validation = validate_stamps_batch(&required, output.as_deref().unwrap_or(""), &[]); if !validation.is_satisfied() { let msg = format!("Cannot deploy runtime '{}'", self.runtime_name); diff --git a/src/commands/runtime/install.rs b/src/commands/runtime/install.rs index 3312b32e..38ca4cf9 100644 --- a/src/commands/runtime/install.rs +++ b/src/commands/runtime/install.rs @@ -13,7 +13,7 @@ use crate::utils::lockfile::{build_package_spec_with_lock, LockFile, SysrootType use crate::utils::output::{print_debug, print_error, print_info, print_success, OutputLevel}; use crate::utils::runs_on::RunsOnContext; use crate::utils::stamps::{ - compute_runtime_input_hash, generate_write_stamp_script, Stamp, StampOutputs, + compute_runtime_install_input_hash, generate_write_stamp_script, Stamp, StampOutputs, }; use crate::utils::target::resolve_target_required; use crate::utils::tui::{TaskId, TuiGuard}; @@ -314,7 +314,7 @@ impl RuntimeInstallCommand { &target_arch, &self.config_path, )? { - let inputs = compute_runtime_input_hash(&merged_runtime, runtime_name, parsed)?; + let inputs = compute_runtime_install_input_hash(&merged_runtime, runtime_name)?; let outputs = StampOutputs::default(); let stamp = Stamp::runtime_install(runtime_name, &target_arch, inputs, outputs); let stamp_script = generate_write_stamp_script(&stamp)?; diff --git a/src/commands/runtime/provision.rs b/src/commands/runtime/provision.rs index fde9c1b1..8b681155 100644 --- a/src/commands/runtime/provision.rs +++ b/src/commands/runtime/provision.rs @@ -180,7 +180,7 @@ impl RuntimeProvisionCommand { }; // Validate all stamps from batch output - let validation = validate_stamps_batch(&required, output_str, None); + let validation = validate_stamps_batch(&required, output_str, &[]); if !validation.is_satisfied() { // Include the --runs-on target in error message for SDK install hints diff --git a/src/commands/runtime/sign.rs b/src/commands/runtime/sign.rs index 29e45fec..032b5e59 100644 --- a/src/commands/runtime/sign.rs +++ b/src/commands/runtime/sign.rs @@ -122,8 +122,7 @@ impl RuntimeSignCommand { .await?; // Validate all stamps from batch output - let validation = - validate_stamps_batch(&required, output.as_deref().unwrap_or(""), None); + let validation = validate_stamps_batch(&required, output.as_deref().unwrap_or(""), &[]); if !validation.is_satisfied() { validation diff --git a/src/commands/sdk/clean.rs b/src/commands/sdk/clean.rs index d6acfce2..903d146e 100644 --- a/src/commands/sdk/clean.rs +++ b/src/commands/sdk/clean.rs @@ -132,7 +132,7 @@ impl SdkCleanCommand { .await?; let validation = - validate_stamps_batch(&requirements, output.as_deref().unwrap_or(""), None); + validate_stamps_batch(&requirements, output.as_deref().unwrap_or(""), &[]); if !validation.is_satisfied() { validation diff --git a/src/commands/sdk/compile.rs b/src/commands/sdk/compile.rs index 2cbb8272..6efc2857 100644 --- a/src/commands/sdk/compile.rs +++ b/src/commands/sdk/compile.rs @@ -154,7 +154,7 @@ impl SdkCompileCommand { .await?; let validation = - validate_stamps_batch(&requirements, output.as_deref().unwrap_or(""), None); + validate_stamps_batch(&requirements, output.as_deref().unwrap_or(""), &[]); if !validation.is_satisfied() { validation @@ -533,7 +533,7 @@ dependencies = { gcc = "*" } // SDK stamp missing let output = format!("sdk/{}/install.stamp:::null", get_local_arch()); - let result = validate_stamps_batch(&requirements, &output, None); + let result = validate_stamps_batch(&requirements, &output, &[]); assert!(!result.is_satisfied()); assert_eq!(result.missing.len(), 1); @@ -558,7 +558,7 @@ dependencies = { gcc = "*" } let sdk_json = serde_json::to_string(&sdk_stamp).unwrap(); let output = format!("sdk/{}/install.stamp:::{}", get_local_arch(), sdk_json); - let result = validate_stamps_batch(&requirements, &output, None); + let result = validate_stamps_batch(&requirements, &output, &[]); assert!(result.is_satisfied()); assert_eq!(result.satisfied.len(), 1); @@ -582,12 +582,12 @@ dependencies = { gcc = "*" } let sdk_json = serde_json::to_string(&sdk_stamp).unwrap(); let output_before = format!("sdk/{}/install.stamp:::{}", get_local_arch(), sdk_json); - let result_before = validate_stamps_batch(&requirements, &output_before, None); + let result_before = validate_stamps_batch(&requirements, &output_before, &[]); assert!(result_before.is_satisfied(), "Should pass before clean"); // After clean --stamps: SDK stamp gone (simulating rm -rf .stamps/) let output_after = format!("sdk/{}/install.stamp:::null", get_local_arch()); - let result_after = validate_stamps_batch(&requirements, &output_after, None); + let result_after = validate_stamps_batch(&requirements, &output_after, &[]); assert!( !result_after.is_satisfied(), "Should fail after clean --stamps" diff --git a/src/commands/sdk/package.rs b/src/commands/sdk/package.rs index 1c732b28..9aea2aec 100644 --- a/src/commands/sdk/package.rs +++ b/src/commands/sdk/package.rs @@ -123,7 +123,7 @@ impl SdkPackageCommand { .await?; let validation = - validate_stamps_batch(&requirements, output.as_deref().unwrap_or(""), None); + validate_stamps_batch(&requirements, output.as_deref().unwrap_or(""), &[]); if !validation.is_satisfied() { validation diff --git a/src/utils/config.rs b/src/utils/config.rs index c46c002b..44a73a2e 100644 --- a/src/utils/config.rs +++ b/src/utils/config.rs @@ -3867,6 +3867,20 @@ impl Config { } /// Get the resolved source directory path + /// Best-effort project root for resolving project-relative paths + /// (`post_install` / `post_build` scripts, etc.) when reading them off + /// disk on the host. Uses the resolved `src_dir` when set, otherwise + /// falls back to the directory containing the config file. + pub fn project_root>(&self, config_path: P) -> PathBuf { + self.get_resolved_src_dir(&config_path).unwrap_or_else(|| { + config_path + .as_ref() + .parent() + .unwrap_or_else(|| Path::new(".")) + .to_path_buf() + }) + } + /// If src_dir is configured, it resolves relative paths relative to the config file /// If not configured, returns None (use default behavior) pub fn get_resolved_src_dir>(&self, config_path: P) -> Option { diff --git a/src/utils/prerequisites.rs b/src/utils/prerequisites.rs index 375c2fe4..6b79c1e3 100644 --- a/src/utils/prerequisites.rs +++ b/src/utils/prerequisites.rs @@ -58,7 +58,7 @@ pub async fn check_prerequisites( .context("Failed to run prerequisite stamp check")? .unwrap_or_default(); - let validation = validate_stamps_batch(&requirements, &stdout, None); + let validation = validate_stamps_batch(&requirements, &stdout, &[]); if !validation.is_satisfied() { validation diff --git a/src/utils/stamps.rs b/src/utils/stamps.rs index f0828757..9ca1090e 100644 --- a/src/utils/stamps.rs +++ b/src/utils/stamps.rs @@ -7,14 +7,12 @@ //! 2. Detects staleness via content-addressable hashing (config + package list) //! 3. Enforces command ordering with dependency resolution from config -// Allow deprecated variants for backward compatibility during migration -#![allow(deprecated)] - use anyhow::{Context, Result}; use chrono::{DateTime, Utc}; use serde::{Deserialize, Serialize}; use sha2::{Digest, Sha256}; use std::fmt; +use std::path::Path; /// Get the local machine's CPU architecture /// @@ -35,8 +33,11 @@ pub fn get_local_arch() -> &'static str { } } -/// Current stamp format version -pub const STAMP_VERSION: u32 = 1; +/// Current stamp format version. Bumped from 1 → 2 in the per-step input-hash +/// rework: each component step now has its own narrow hash, so old stamps +/// written under the broader shared hashes cannot be compared with current +/// inputs. Any stamp at an older version is treated as stale. +pub const STAMP_VERSION: u32 = 2; /// Command types that can have stamps #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)] @@ -357,6 +358,13 @@ impl Stamp { /// Check if the stamp inputs match the current inputs pub fn is_current(&self, current_inputs: &StampInputs) -> bool { + // Stamp format version must match — older stamps were written + // against the pre-split shared hash functions and cannot be + // compared against the new narrower per-step hashes. + if self.version != STAMP_VERSION { + return false; + } + // Config hash must always match if self.inputs.config_hash != current_inputs.config_hash { return false; @@ -814,12 +822,90 @@ pub fn compute_config_hash(value: &serde_yaml::Value) -> Result { Ok(compute_hash(&json)) } +// ─── Per-step input-hash helpers ──────────────────────────────────────── +// +// The hash functions below split each component's inputs into narrow, +// step-scoped subsets. Adding a field to `runtime build`'s hash should NOT +// invalidate `runtime install`'s stamp; this is enforced via separate +// `compute___input_hash` functions, each pulling only the +// keys that actually affect that step. +// +// `narrow_kernel_for_hash` and `hash_script_at` are shared building blocks +// to keep the hash-data construction consistent across components. + +/// Extract the subset of a `kernel:` YAML block that actually affects what +/// gets installed or built. Returns a fresh mapping with only `package`, +/// `version`, `compile`, `install` keys (when present). Unknown / new +/// fields are deliberately ignored so cosmetic kernel-block edits +/// (comments, metadata, future additions that don't drive selection) do +/// not invalidate stamps. +fn narrow_kernel_for_hash(kernel: &serde_yaml::Value) -> serde_yaml::Value { + let mut out = serde_yaml::Mapping::new(); + for key in ["package", "version", "compile", "install"] { + if let Some(v) = kernel.get(key) { + out.insert(serde_yaml::Value::String(key.to_string()), v.clone()); + } + } + serde_yaml::Value::Mapping(out) +} + +/// Hash the contents of a project-relative script file. The returned +/// string is embedded into a hash mapping alongside the original relative +/// path so the stamp invalidates on either (a) path changes, or (b) +/// script-content edits. +/// +/// Missing files hash to the literal `"missing"` sentinel — that way, a +/// stamp written when the file existed will invalidate if the file is +/// later removed, and adding the file later (path unchanged) invalidates +/// the old "missing" stamp. +fn hash_script_at(project_root: &Path, rel_path: &str) -> String { + let abs = project_root.join(rel_path); + match std::fs::read(&abs) { + Ok(bytes) => { + let mut hasher = Sha256::new(); + hasher.update(&bytes); + let result = hasher.finalize(); + let mut hex = String::with_capacity(result.len() * 2); + for b in result.iter() { + use std::fmt::Write; + let _ = write!(hex, "{b:02x}"); + } + format!("sha256:{hex}") + } + Err(_) => "missing".to_string(), + } +} + +/// Build the `{path, content_sha256}` mapping that we embed into input +/// hashes for `post_build` / `post_install` hooks. Both fields go into +/// the parent mapping so a path swap OR a content edit invalidates. +fn script_hash_value(project_root: &Path, rel_path: &str) -> serde_yaml::Value { + let mut m = serde_yaml::Mapping::new(); + m.insert( + serde_yaml::Value::String("path".to_string()), + serde_yaml::Value::String(rel_path.to_string()), + ); + m.insert( + serde_yaml::Value::String("content_sha256".to_string()), + serde_yaml::Value::String(hash_script_at(project_root, rel_path)), + ); + serde_yaml::Value::Mapping(m) +} + /// Compute input hash for SDK install -/// Includes: sdk.dependencies, sdk.image, repo URLs +/// +/// Includes only inputs that affect the SDK toolchain install itself: +/// `sdk.packages`, `sdk.image`, `sdk.repo_url`, `sdk.repo_release`. +/// +/// **Does NOT include `rootfs.packages` / `initramfs.packages`** — +/// the rootfs and initramfs sysroots are populated by separate +/// `rootfs install` / `initramfs install` steps with their own stamps. +/// The orchestrating `avocado sdk install` command writes each of those +/// stamps independently, so a rootfs-package change invalidates only +/// the rootfs-install stamp and not the entire SDK toolchain install. pub fn compute_sdk_input_hash(config: &serde_yaml::Value) -> Result { let mut hash_data = serde_yaml::Mapping::new(); - // Include sdk.dependencies if let Some(sdk) = config.get("sdk") { if let Some(deps) = sdk.get("packages") { hash_data.insert( @@ -847,26 +933,6 @@ pub fn compute_sdk_input_hash(config: &serde_yaml::Value) -> Result } } - // Include rootfs.packages (affects rootfs sysroot installed during sdk install) - if let Some(rootfs) = config.get("rootfs") { - if let Some(packages) = rootfs.get("packages") { - hash_data.insert( - serde_yaml::Value::String("rootfs.packages".to_string()), - packages.clone(), - ); - } - } - - // Include initramfs.packages (affects initramfs sysroot installed during sdk install) - if let Some(initramfs) = config.get("initramfs") { - if let Some(packages) = initramfs.get("packages") { - hash_data.insert( - serde_yaml::Value::String("initramfs.packages".to_string()), - packages.clone(), - ); - } - } - let config_hash = compute_config_hash(&serde_yaml::Value::Mapping(hash_data))?; Ok(StampInputs::new(config_hash)) } @@ -918,22 +984,22 @@ pub fn compute_compile_deps_input_hash( Ok(StampInputs::new(config_hash)) } -pub fn compute_ext_input_hash(config: &serde_yaml::Value, ext_name: &str) -> Result { - compute_ext_input_hash_with_fs(config, ext_name, None) -} - -/// Compute input hash for an extension, including an optional resolved filesystem format. -/// When `filesystem` is `Some`, it is included in the hash so that changing the image -/// format (e.g. squashfs → erofs-lz4) invalidates the stamp. The caller is responsible -/// for resolving the effective value (explicit per-extension override or rootfs default). -pub fn compute_ext_input_hash_with_fs( +/// Compute input hash for **extension install**. +/// +/// Includes only inputs that affect the package-install step: +/// - `ext..packages` (what gets installed) +/// - `ext..types` (sysext/confext drives a small set of auto-included packages) +/// - `ext..source` (where the extension is fetched from) +/// +/// Deliberately excludes `image`, `var_files`, `subvolumes`, `post_build`, +/// `filesystem`, `permissions`, `overlay`, `version`, and all merge/service +/// fields — those affect build/image output, not what gets installed. +pub fn compute_ext_install_input_hash( config: &serde_yaml::Value, ext_name: &str, - filesystem: Option<&str>, ) -> Result { let mut hash_data = serde_yaml::Mapping::new(); - // Include ext..dependencies if let Some(ext) = config.get("extensions").and_then(|e| e.get(ext_name)) { if let Some(deps) = ext.get("packages") { hash_data.insert( @@ -941,67 +1007,145 @@ pub fn compute_ext_input_hash_with_fs( deps.clone(), ); } - // Also include types as they affect build if let Some(types) = ext.get("types") { hash_data.insert( serde_yaml::Value::String(format!("ext.{ext_name}.types")), types.clone(), ); } - // Include var_files as they affect which files are excluded from the .raw image + if let Some(source) = ext.get("source") { + hash_data.insert( + serde_yaml::Value::String(format!("ext.{ext_name}.source")), + source.clone(), + ); + } + } + + let config_hash = compute_config_hash(&serde_yaml::Value::Mapping(hash_data))?; + Ok(StampInputs::new(config_hash)) +} + +/// Compute input hash for **extension build**. +/// +/// Includes the install inputs (so a package change invalidates build too) +/// plus build-only inputs: `image` (kabtool args), `overlay`, and the +/// `post_build` hook (both the relative path and its file content). +/// +/// Excludes `var_files`, `subvolumes`, and the resolved `filesystem` — +/// those only affect the image step. +pub fn compute_ext_build_input_hash( + config: &serde_yaml::Value, + ext_name: &str, + project_root: &Path, +) -> Result { + let hash_data = ext_build_hash_data(config, ext_name, project_root); + let config_hash = compute_config_hash(&serde_yaml::Value::Mapping(hash_data))?; + Ok(StampInputs::new(config_hash)) +} + +/// Compute input hash for **extension image**. +/// +/// Includes the build inputs plus image-only inputs: `var_files`, +/// `subvolumes`, and the resolved `filesystem` format. +pub fn compute_ext_image_input_hash( + config: &serde_yaml::Value, + ext_name: &str, + filesystem: Option<&str>, + project_root: &Path, +) -> Result { + let mut hash_data = ext_build_hash_data(config, ext_name, project_root); + + if let Some(ext) = config.get("extensions").and_then(|e| e.get(ext_name)) { if let Some(var_files) = ext.get("var_files") { hash_data.insert( serde_yaml::Value::String(format!("ext.{ext_name}.var_files")), var_files.clone(), ); } - // Include subvolumes as they affect var image creation flags if let Some(subvolumes) = ext.get("subvolumes") { hash_data.insert( serde_yaml::Value::String(format!("ext.{ext_name}.subvolumes")), subvolumes.clone(), ); } - // Include image config as it determines output format and kabtool args + } + if let Some(fs) = filesystem { + hash_data.insert( + serde_yaml::Value::String(format!("ext.{ext_name}.filesystem")), + serde_yaml::Value::String(fs.to_string()), + ); + } + + let config_hash = compute_config_hash(&serde_yaml::Value::Mapping(hash_data))?; + Ok(StampInputs::new(config_hash)) +} + +/// Shared mapping construction for `ext build` (and the subset used by +/// `ext image`). Keeping both steps' shared inputs in one place avoids +/// drift between the two hash functions. +fn ext_build_hash_data( + config: &serde_yaml::Value, + ext_name: &str, + project_root: &Path, +) -> serde_yaml::Mapping { + let mut hash_data = serde_yaml::Mapping::new(); + + if let Some(ext) = config.get("extensions").and_then(|e| e.get(ext_name)) { + // Install-time inputs are also build-time inputs — a package change + // invalidates everything downstream. + if let Some(deps) = ext.get("packages") { + hash_data.insert( + serde_yaml::Value::String(format!("ext.{ext_name}.dependencies")), + deps.clone(), + ); + } + if let Some(types) = ext.get("types") { + hash_data.insert( + serde_yaml::Value::String(format!("ext.{ext_name}.types")), + types.clone(), + ); + } + if let Some(source) = ext.get("source") { + hash_data.insert( + serde_yaml::Value::String(format!("ext.{ext_name}.source")), + source.clone(), + ); + } + // Build-only inputs. if let Some(image) = ext.get("image") { hash_data.insert( serde_yaml::Value::String(format!("ext.{ext_name}.image")), image.clone(), ); } - // Include post_build so adding/removing/changing the hook re-runs the build. - // Note: this hashes the *path*, not the script's contents — re-run with - // --no-stamps to pick up edits to the script itself. - if let Some(post_build) = ext.get("post_build") { + if let Some(overlay) = ext.get("overlay") { + hash_data.insert( + serde_yaml::Value::String(format!("ext.{ext_name}.overlay")), + overlay.clone(), + ); + } + if let Some(post_build) = ext.get("post_build").and_then(|v| v.as_str()) { hash_data.insert( serde_yaml::Value::String(format!("ext.{ext_name}.post_build")), - post_build.clone(), + script_hash_value(project_root, post_build), ); } } - // Include the resolved filesystem format when provided — determines the image - // format (.raw contents) and must invalidate the stamp when it changes. - if let Some(fs) = filesystem { - hash_data.insert( - serde_yaml::Value::String(format!("ext.{ext_name}.filesystem")), - serde_yaml::Value::String(fs.to_string()), - ); - } - - let config_hash = compute_config_hash(&serde_yaml::Value::Mapping(hash_data))?; - Ok(StampInputs::new(config_hash)) + hash_data } -/// Compute input hash for rootfs install -/// Includes: rootfs.packages, top-level kernel config +/// Compute input hash for **rootfs install**. /// -/// The kernel block matters because rootfs install auto-appends -/// `kernel-image-` and `packagegroup-avocado-rootfs-modules-` -/// based on the resolved kernel version. Changing `kernel.version` -/// changes what gets installed even though `rootfs.packages` is unchanged, -/// so the stamp must invalidate when the kernel block changes. -pub fn compute_rootfs_input_hash(config: &serde_yaml::Value) -> Result { +/// Includes `rootfs.packages`, `rootfs.overlay`, and the narrowed kernel +/// selection (`package`/`version`/`compile`/`install` only — adding an +/// unrelated `kernel.metadata` field does NOT invalidate). Also includes +/// the `post_install` hook path and its file contents so an in-place +/// script edit invalidates without `--no-stamps`. +pub fn compute_rootfs_input_hash( + config: &serde_yaml::Value, + project_root: &Path, +) -> Result { let mut hash_data = serde_yaml::Mapping::new(); if let Some(rootfs) = config.get("rootfs") { @@ -1017,12 +1161,18 @@ pub fn compute_rootfs_input_hash(config: &serde_yaml::Value) -> Result Result` based -/// on the resolved kernel version. -pub fn compute_initramfs_input_hash(config: &serde_yaml::Value) -> Result { +/// Same shape as [`compute_rootfs_input_hash`] — narrowed kernel block, +/// `post_install` content hashed alongside its path. +pub fn compute_initramfs_input_hash( + config: &serde_yaml::Value, + project_root: &Path, +) -> Result { let mut hash_data = serde_yaml::Mapping::new(); if let Some(initramfs) = config.get("initramfs") { @@ -1052,12 +1203,18 @@ pub fn compute_initramfs_input_hash(config: &serde_yaml::Value) -> Result Result.dependencies (merged with target), kernel config, -/// extension docker_images (affects var partition priming) -pub fn compute_runtime_input_hash( +/// Compute input hash for **runtime install**. +/// +/// Includes only the inputs that affect the package-install step for the +/// runtime sysroot: `runtime..packages` (merged with per-target +/// overrides) and `runtime..target`. Excludes kernel, var, var_files, +/// post_build, rootfs/initramfs filesystem, and extension docker_images — +/// those affect the build step, not what gets installed for the runtime +/// itself. +pub fn compute_runtime_install_input_hash( merged_runtime: &serde_yaml::Value, runtime_name: &str, - parsed: &serde_yaml::Value, ) -> Result { let mut hash_data = serde_yaml::Mapping::new(); - // Include the merged dependencies section if let Some(deps) = merged_runtime.get("packages") { hash_data.insert( serde_yaml::Value::String(format!("runtime.{runtime_name}.dependencies")), deps.clone(), ); } + if let Some(target) = merged_runtime.get("target") { + hash_data.insert( + serde_yaml::Value::String(format!("runtime.{runtime_name}.target")), + target.clone(), + ); + } - // Include target if specified + let config_hash = compute_config_hash(&serde_yaml::Value::Mapping(hash_data))?; + Ok(StampInputs::new(config_hash)) +} + +/// Compute input hash for **runtime build**. +/// +/// Includes the install inputs plus build-only inputs: the narrowed +/// kernel selection (`package`/`version`/`compile`/`install` only), the +/// runtime-level `var` and `var_files` config, the `post_build` hook +/// (path + content), the rootfs/initramfs filesystem formats this +/// runtime consumes, and any extension `docker_images` that this runtime +/// needs primed at build time. +pub fn compute_runtime_build_input_hash( + merged_runtime: &serde_yaml::Value, + runtime_name: &str, + parsed: &serde_yaml::Value, + project_root: &Path, +) -> Result { + let mut hash_data = serde_yaml::Mapping::new(); + + // Install inputs are also build inputs. + if let Some(deps) = merged_runtime.get("packages") { + hash_data.insert( + serde_yaml::Value::String(format!("runtime.{runtime_name}.dependencies")), + deps.clone(), + ); + } if let Some(target) = merged_runtime.get("target") { hash_data.insert( serde_yaml::Value::String(format!("runtime.{runtime_name}.target")), @@ -1091,16 +1283,14 @@ pub fn compute_runtime_input_hash( ); } - // Include kernel config if specified (changes to kernel config should trigger rebuild) + // Build-only inputs. if let Some(kernel) = merged_runtime.get("kernel") { hash_data.insert( serde_yaml::Value::String(format!("runtime.{runtime_name}.kernel")), - kernel.clone(), + narrow_kernel_for_hash(kernel), ); } - // Include docker_images from extensions in this runtime - // (changes to extension docker_images should trigger runtime rebuild to re-prime images) if let Some(ext_list) = merged_runtime .get("extensions") .and_then(|e| e.as_sequence()) @@ -1124,33 +1314,25 @@ pub fn compute_runtime_input_hash( } } - // Include runtime-level var_files if specified if let Some(var_files) = merged_runtime.get("var_files") { hash_data.insert( serde_yaml::Value::String(format!("runtime.{runtime_name}.var_files")), var_files.clone(), ); } - - // Include runtime-level var config (subvolumes, compression) if specified if let Some(var) = merged_runtime.get("var") { hash_data.insert( serde_yaml::Value::String(format!("runtime.{runtime_name}.var")), var.clone(), ); } - - // Include post_build so adding/removing/changing the hook re-runs the build. - // Note: this hashes the *path*, not the script's contents — re-run with - // --no-stamps to pick up edits to the script itself. - if let Some(post_build) = merged_runtime.get("post_build") { + if let Some(post_build) = merged_runtime.get("post_build").and_then(|v| v.as_str()) { hash_data.insert( serde_yaml::Value::String(format!("runtime.{runtime_name}.post_build")), - post_build.clone(), + script_hash_value(project_root, post_build), ); } - // Include rootfs/initramfs filesystem formats (changes should trigger rebuild) if let Some(rootfs) = parsed.get("rootfs") { if let Some(fs) = rootfs.get("filesystem") { hash_data.insert( @@ -1271,16 +1453,23 @@ pub fn parse_batch_stamps_output( result } +/// A (component, command) key paired with the freshly computed input +/// hash for that specific step. Passed into [`validate_stamps_batch`] +/// so each requirement is compared against the correct step-scoped hash. +pub type CurrentInput<'a> = (StampComponent, StampCommand, &'a StampInputs); + /// Validate all stamp requirements from batch output in a single pass. /// -/// `current_inputs` is an optional (component, hash) pair used for staleness detection. -/// The hash is only compared against stamps matching the specified component type. -/// Dependency stamps (e.g., SDK stamps when building an extension) are validated -/// for existence only — their content hash was verified when they were created. +/// `current_inputs` is a slice of (component, command, hash) triples +/// used for staleness detection. A requirement is matched against the +/// triple whose component AND command both match it. Requirements with +/// no matching entry are validated for existence only — appropriate for +/// dependency stamps (e.g. SDK stamps when building an extension) whose +/// content hash was verified when they were created. pub fn validate_stamps_batch( requirements: &[StampRequirement], batch_output: &str, - current_inputs: Option<(&StampComponent, &StampInputs)>, + current_inputs: &[CurrentInput<'_>], ) -> StampValidationResult { let stamp_data = parse_batch_stamps_output(batch_output); let mut validation = StampValidationResult::new(); @@ -1289,10 +1478,10 @@ pub fn validate_stamps_batch( let stamp_path = req.relative_path(); let json_content = stamp_data.get(&stamp_path).and_then(|v| v.as_ref()); - // Only apply current_inputs to stamps matching the specified component type. let inputs_for_req = current_inputs - .filter(|(component, _)| req.component == **component) - .map(|(_, inputs)| inputs); + .iter() + .find(|(component, command, _)| req.component == *component && req.command == *command) + .map(|(_, _, inputs)| *inputs); check_stamp_requirement( req, @@ -2099,7 +2288,7 @@ ext/my-ext/build.stamp:::null"# ext_json ); - let result = validate_stamps_batch(&requirements, &output, None); + let result = validate_stamps_batch(&requirements, &output, &[]); assert!(result.is_satisfied()); assert_eq!(result.satisfied.len(), 2); @@ -2129,7 +2318,7 @@ ext/my-ext/build.stamp:::null"# sdk_json ); - let result = validate_stamps_batch(&requirements, &output, None); + let result = validate_stamps_batch(&requirements, &output, &[]); assert!(!result.is_satisfied()); assert_eq!(result.satisfied.len(), 1); @@ -2144,7 +2333,7 @@ ext/my-ext/build.stamp:::null"# StampRequirement::ext_install("my-ext"), ]; - let result = validate_stamps_batch(&requirements, "", None); + let result = validate_stamps_batch(&requirements, "", &[]); assert!(!result.is_satisfied()); assert!(result.satisfied.is_empty()); @@ -2364,7 +2553,7 @@ ext/my-ext/build.stamp:::null"# sdk_json, ext_json ); - let result_before = validate_stamps_batch(&requirements, &output_before, None); + let result_before = validate_stamps_batch(&requirements, &output_before, &[]); assert!(result_before.is_satisfied()); // After ext clean: SDK still there, ext stamps gone @@ -2373,7 +2562,7 @@ ext/my-ext/build.stamp:::null"# get_local_arch(), sdk_json ); - let result_after = validate_stamps_batch(&requirements, &output_after_ext_clean, None); + let result_after = validate_stamps_batch(&requirements, &output_after_ext_clean, &[]); assert!(!result_after.is_satisfied()); assert_eq!(result_after.missing.len(), 1); assert_eq!( @@ -2403,7 +2592,7 @@ runtime/my-runtime/build.stamp:::null"#, get_local_arch() ); - let result = validate_stamps_batch(&requirements, &output, None); + let result = validate_stamps_batch(&requirements, &output, &[]); assert!(!result.is_satisfied()); assert!(result.satisfied.is_empty()); @@ -2435,7 +2624,11 @@ runtime/my-runtime/build.stamp:::null"#, let result = validate_stamps_batch( &requirements, &output, - Some((&StampComponent::Extension, &changed_inputs)), + &[( + StampComponent::Extension, + StampCommand::Install, + &changed_inputs, + )], ); assert!(!result.is_satisfied()); @@ -2483,7 +2676,11 @@ runtime/my-runtime/build.stamp:::null"#, let result = validate_stamps_batch( &requirements, &output, - Some((&StampComponent::Extension, &changed_inputs)), + &[( + StampComponent::Extension, + StampCommand::Install, + &changed_inputs, + )], ); assert!(!result.is_satisfied()); @@ -2675,9 +2872,20 @@ kernel: .unwrap(); let empty_parsed = serde_yaml::Value::Mapping(serde_yaml::Mapping::new()); - let hash_without = - compute_runtime_input_hash(&without_kernel, "dev", &empty_parsed).unwrap(); - let hash_with = compute_runtime_input_hash(&with_kernel, "dev", &empty_parsed).unwrap(); + let hash_without = compute_runtime_build_input_hash( + &without_kernel, + "dev", + &empty_parsed, + std::path::Path::new("."), + ) + .unwrap(); + let hash_with = compute_runtime_build_input_hash( + &with_kernel, + "dev", + &empty_parsed, + std::path::Path::new("."), + ) + .unwrap(); // Hashes should differ when kernel config is added assert_ne!(hash_without.config_hash, hash_with.config_hash); @@ -2708,10 +2916,20 @@ kernel: .unwrap(); let empty_parsed = serde_yaml::Value::Mapping(serde_yaml::Mapping::new()); - let hash_package = - compute_runtime_input_hash(&kernel_package, "dev", &empty_parsed).unwrap(); - let hash_compile = - compute_runtime_input_hash(&kernel_compile, "dev", &empty_parsed).unwrap(); + let hash_package = compute_runtime_build_input_hash( + &kernel_package, + "dev", + &empty_parsed, + std::path::Path::new("."), + ) + .unwrap(); + let hash_compile = compute_runtime_build_input_hash( + &kernel_compile, + "dev", + &empty_parsed, + std::path::Path::new("."), + ) + .unwrap(); // Switching kernel mode should produce a different hash assert_ne!(hash_package.config_hash, hash_compile.config_hash); @@ -2745,8 +2963,16 @@ extensions: ) .unwrap(); - let hash_without = compute_ext_input_hash(&config_without, "my-ext").unwrap(); - let hash_with = compute_ext_input_hash(&config_with, "my-ext").unwrap(); + let hash_without = compute_ext_image_input_hash( + &config_without, + "my-ext", + None, + std::path::Path::new("."), + ) + .unwrap(); + let hash_with = + compute_ext_image_input_hash(&config_with, "my-ext", None, std::path::Path::new(".")) + .unwrap(); assert_ne!( hash_without.config_hash, hash_with.config_hash, @@ -2790,8 +3016,20 @@ extensions: ) .unwrap(); - let hash_without = compute_runtime_input_hash(&runtime, "dev", &parsed_without).unwrap(); - let hash_with = compute_runtime_input_hash(&runtime, "dev", &parsed_with).unwrap(); + let hash_without = compute_runtime_build_input_hash( + &runtime, + "dev", + &parsed_without, + std::path::Path::new("."), + ) + .unwrap(); + let hash_with = compute_runtime_build_input_hash( + &runtime, + "dev", + &parsed_with, + std::path::Path::new("."), + ) + .unwrap(); assert_ne!( hash_without.config_hash, hash_with.config_hash, @@ -2821,9 +3059,20 @@ var_files: .unwrap(); let empty_parsed = serde_yaml::Value::Mapping(serde_yaml::Mapping::new()); - let hash_without = - compute_runtime_input_hash(&runtime_without, "dev", &empty_parsed).unwrap(); - let hash_with = compute_runtime_input_hash(&runtime_with, "dev", &empty_parsed).unwrap(); + let hash_without = compute_runtime_build_input_hash( + &runtime_without, + "dev", + &empty_parsed, + std::path::Path::new("."), + ) + .unwrap(); + let hash_with = compute_runtime_build_input_hash( + &runtime_with, + "dev", + &empty_parsed, + std::path::Path::new("."), + ) + .unwrap(); assert_ne!( hash_without.config_hash, hash_with.config_hash, @@ -2861,8 +3110,16 @@ extensions: ) .unwrap(); - let hash_without = compute_ext_input_hash(&config_without, "my-ext").unwrap(); - let hash_with = compute_ext_input_hash(&config_with, "my-ext").unwrap(); + let hash_without = compute_ext_image_input_hash( + &config_without, + "my-ext", + None, + std::path::Path::new("."), + ) + .unwrap(); + let hash_with = + compute_ext_image_input_hash(&config_with, "my-ext", None, std::path::Path::new(".")) + .unwrap(); assert_ne!( hash_without.config_hash, hash_with.config_hash, @@ -2894,13 +3151,389 @@ var: .unwrap(); let empty_parsed = serde_yaml::Value::Mapping(serde_yaml::Mapping::new()); - let hash_without = - compute_runtime_input_hash(&runtime_without, "dev", &empty_parsed).unwrap(); - let hash_with = compute_runtime_input_hash(&runtime_with, "dev", &empty_parsed).unwrap(); + let hash_without = compute_runtime_build_input_hash( + &runtime_without, + "dev", + &empty_parsed, + std::path::Path::new("."), + ) + .unwrap(); + let hash_with = compute_runtime_build_input_hash( + &runtime_with, + "dev", + &empty_parsed, + std::path::Path::new("."), + ) + .unwrap(); assert_ne!( hash_without.config_hash, hash_with.config_hash, "Adding var config should change the runtime input hash" ); } + + // ──────────────────────────────────────────────────────────────────── + // Negative-invalidation tests + // + // Each test asserts that mutating a field that the step does NOT care + // about leaves the step's input hash unchanged. Without these, the + // per-step split is one refactor away from regressing back to the + // shared-hash over-invalidation behavior. + // ──────────────────────────────────────────────────────────────────── + + fn ext_with_extras(extras: &str) -> serde_yaml::Value { + let yaml = format!( + r#" +extensions: + my-ext: + packages: + foo: "*" + types: [sysext] +{extras} +"# + ); + serde_yaml::from_str(&yaml).unwrap() + } + + fn ext_install_hash(value: &serde_yaml::Value) -> String { + compute_ext_install_input_hash(value, "my-ext") + .unwrap() + .config_hash + } + + fn ext_build_hash(value: &serde_yaml::Value) -> String { + compute_ext_build_input_hash(value, "my-ext", std::path::Path::new(".")) + .unwrap() + .config_hash + } + + fn ext_image_hash(value: &serde_yaml::Value) -> String { + compute_ext_image_input_hash(value, "my-ext", None, std::path::Path::new(".")) + .unwrap() + .config_hash + } + + #[test] + fn ext_install_unaffected_by_image_field() { + let base = ext_with_extras(""); + let with_image = ext_with_extras(" image:\n type: kab\n args: \"-v 1.0.0\""); + assert_eq!(ext_install_hash(&base), ext_install_hash(&with_image)); + } + + #[test] + fn ext_install_unaffected_by_var_files() { + let base = ext_with_extras(""); + let with_var = ext_with_extras(" var_files:\n - \"var/lib/docker/**\""); + assert_eq!(ext_install_hash(&base), ext_install_hash(&with_var)); + } + + #[test] + fn ext_install_unaffected_by_subvolumes_and_post_build() { + let base = ext_with_extras(""); + let with = ext_with_extras( + " subvolumes:\n lib/docker:\n nodatacow: true\n post_build: scripts/build.sh", + ); + assert_eq!(ext_install_hash(&base), ext_install_hash(&with)); + } + + #[test] + fn ext_install_unaffected_by_metadata_and_runtime_fields() { + let base = ext_with_extras(""); + let with = ext_with_extras( + " version: \"1.0.0\"\n scopes: [system]\n enable_services: [foo.service]\n \ + on_merge: [\"echo hi\"]\n on_unmerge: [\"echo bye\"]", + ); + assert_eq!(ext_install_hash(&base), ext_install_hash(&with)); + } + + #[test] + fn ext_build_unaffected_by_var_files_and_subvolumes() { + let base = ext_with_extras(""); + let with = ext_with_extras( + " var_files:\n - \"var/lib/docker/**\"\n subvolumes:\n lib/x:\n nodatacow: true", + ); + assert_eq!(ext_build_hash(&base), ext_build_hash(&with)); + } + + #[test] + fn ext_build_unaffected_by_filesystem_override() { + // The filesystem field is image-only — build must not see it. + let base = ext_with_extras(""); + let with_fs = ext_with_extras(" filesystem: erofs-zst"); + assert_eq!(ext_build_hash(&base), ext_build_hash(&with_fs)); + } + + #[test] + fn ext_image_includes_var_files_and_subvolumes() { + let base = ext_with_extras(""); + let with = ext_with_extras( + " var_files:\n - \"var/lib/docker/**\"\n subvolumes:\n lib/x:\n nodatacow: true", + ); + assert_ne!(ext_image_hash(&base), ext_image_hash(&with)); + } + + #[test] + fn ext_build_content_changes_invalidate_when_post_build_set() { + let tmp = tempfile::TempDir::new().unwrap(); + let script = tmp.path().join("build.sh"); + std::fs::write(&script, b"#!/bin/sh\necho original\n").unwrap(); + + let config = ext_with_extras(" post_build: build.sh"); + let h1 = compute_ext_build_input_hash(&config, "my-ext", tmp.path()) + .unwrap() + .config_hash; + + std::fs::write(&script, b"#!/bin/sh\necho edited\n").unwrap(); + let h2 = compute_ext_build_input_hash(&config, "my-ext", tmp.path()) + .unwrap() + .config_hash; + + assert_ne!( + h1, h2, + "editing post_build script body should invalidate the build hash" + ); + } + + fn runtime(yaml: &str) -> serde_yaml::Value { + serde_yaml::from_str(yaml).unwrap() + } + + #[test] + fn runtime_install_unaffected_by_build_only_fields() { + let base = runtime( + r#" +packages: + avocado-runtime: "*" +target: "x86_64" +"#, + ); + let with_build_only = runtime( + r#" +packages: + avocado-runtime: "*" +target: "x86_64" +kernel: + version: "6.6.*" +var: + compression: zstd +var_files: + - source: "files/x" + dest: "lib/x" +post_build: scripts/post.sh +"#, + ); + let h1 = compute_runtime_install_input_hash(&base, "dev") + .unwrap() + .config_hash; + let h2 = compute_runtime_install_input_hash(&with_build_only, "dev") + .unwrap() + .config_hash; + assert_eq!(h1, h2); + } + + #[test] + fn runtime_install_unaffected_by_top_level_rootfs_initramfs_filesystem() { + let runtime_node = runtime( + r#" +packages: + avocado-runtime: "*" +target: "x86_64" +"#, + ); + let parsed_a: serde_yaml::Value = serde_yaml::from_str( + r#" +rootfs: + filesystem: erofs-lz4 +initramfs: + filesystem: cpio.zst +"#, + ) + .unwrap(); + let parsed_b: serde_yaml::Value = serde_yaml::from_str( + r#" +rootfs: + filesystem: erofs-zst +initramfs: + filesystem: cpio +"#, + ) + .unwrap(); + // install hash ignores the parsed/top-level filesystem entirely. + let h_a = compute_runtime_install_input_hash(&runtime_node, "dev") + .unwrap() + .config_hash; + let h_b = compute_runtime_install_input_hash(&runtime_node, "dev") + .unwrap() + .config_hash; + assert_eq!(h_a, h_b); + // sanity: build hash DOES include filesystem + let b_a = compute_runtime_build_input_hash( + &runtime_node, + "dev", + &parsed_a, + std::path::Path::new("."), + ) + .unwrap() + .config_hash; + let b_b = compute_runtime_build_input_hash( + &runtime_node, + "dev", + &parsed_b, + std::path::Path::new("."), + ) + .unwrap() + .config_hash; + assert_ne!( + b_a, b_b, + "runtime build SHOULD invalidate on filesystem swap" + ); + } + + #[test] + fn sdk_install_unaffected_by_rootfs_initramfs_packages() { + let base: serde_yaml::Value = serde_yaml::from_str( + r#" +sdk: + image: my-sdk:1 + packages: + sdk-deps: "*" +rootfs: + packages: + pkg-a: "*" +initramfs: + packages: + pkg-b: "*" +"#, + ) + .unwrap(); + let bumped: serde_yaml::Value = serde_yaml::from_str( + r#" +sdk: + image: my-sdk:1 + packages: + sdk-deps: "*" +rootfs: + packages: + pkg-a: ">=2.0" +initramfs: + packages: + pkg-b: ">=3.0" +"#, + ) + .unwrap(); + let h_base = compute_sdk_input_hash(&base).unwrap().config_hash; + let h_bumped = compute_sdk_input_hash(&bumped).unwrap().config_hash; + assert_eq!( + h_base, h_bumped, + "rootfs/initramfs package bumps must not invalidate the SDK install stamp" + ); + } + + #[test] + fn rootfs_install_ignores_unrelated_kernel_fields() { + let base: serde_yaml::Value = serde_yaml::from_str( + r#" +rootfs: + packages: + avocado-pkg-rootfs: "*" +kernel: + version: "6.6.*" + package: kernel-image +"#, + ) + .unwrap(); + let with_metadata: serde_yaml::Value = serde_yaml::from_str( + r#" +rootfs: + packages: + avocado-pkg-rootfs: "*" +kernel: + version: "6.6.*" + package: kernel-image + metadata: cosmetic + description: "added later" +"#, + ) + .unwrap(); + let h_base = compute_rootfs_input_hash(&base, std::path::Path::new(".")) + .unwrap() + .config_hash; + let h_extra = compute_rootfs_input_hash(&with_metadata, std::path::Path::new(".")) + .unwrap() + .config_hash; + assert_eq!( + h_base, h_extra, + "adding unrelated keys under `kernel:` must not invalidate the rootfs install stamp" + ); + } + + #[test] + fn rootfs_install_invalidates_on_kernel_version_change() { + let v1: serde_yaml::Value = serde_yaml::from_str( + r#" +rootfs: + packages: + avocado-pkg-rootfs: "*" +kernel: + version: "6.6.*" +"#, + ) + .unwrap(); + let v2: serde_yaml::Value = serde_yaml::from_str( + r#" +rootfs: + packages: + avocado-pkg-rootfs: "*" +kernel: + version: "6.7.*" +"#, + ) + .unwrap(); + let h_v1 = compute_rootfs_input_hash(&v1, std::path::Path::new(".")) + .unwrap() + .config_hash; + let h_v2 = compute_rootfs_input_hash(&v2, std::path::Path::new(".")) + .unwrap() + .config_hash; + assert_ne!(h_v1, h_v2); + } + + #[test] + fn rootfs_install_post_install_content_change_invalidates() { + let tmp = tempfile::TempDir::new().unwrap(); + let script = tmp.path().join("post.sh"); + std::fs::write(&script, b"#!/bin/sh\necho v1\n").unwrap(); + + let config: serde_yaml::Value = serde_yaml::from_str( + r#" +rootfs: + packages: + avocado-pkg-rootfs: "*" + post_install: post.sh +"#, + ) + .unwrap(); + let h1 = compute_rootfs_input_hash(&config, tmp.path()) + .unwrap() + .config_hash; + + std::fs::write(&script, b"#!/bin/sh\necho v2\n").unwrap(); + let h2 = compute_rootfs_input_hash(&config, tmp.path()) + .unwrap() + .config_hash; + + assert_ne!(h1, h2); + } + + #[test] + fn stamp_version_bump_invalidates_old_stamps() { + let inputs = StampInputs::new("sha256:abc".to_string()); + let mut stamp = Stamp::sdk_install("x86_64", inputs.clone(), StampOutputs::default()); + // Forge an older version. + stamp.version = STAMP_VERSION - 1; + assert!( + !stamp.is_current(&inputs), + "older stamp version should be reported as stale" + ); + } } From 3332402da486485c1e4afa51a842a3fc2c258afc Mon Sep 17 00:00:00 2001 From: Justin Schneck Date: Tue, 26 May 2026 21:39:07 -0400 Subject: [PATCH 03/30] runtime build: fall back to default rootfs/initramfs for permissions resolution MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When a runtime has no explicit `rootfs:` / `initramfs:` ref (the common case for projects that define images at the top level), the resolver returned None and the permissions section came out empty — meaning the root user's shadow entry never got rewritten, root login was silently broken on the resulting image. Fix: in runtime/build.rs, fall back to `config.rootfs_default()` / `config.initramfs_default()` when the runtime-level ref is unset, same fallback the image build itself uses for filesystem/post_install. Adds a regression test in `utils::config::tests` that mirrors the test project shape (top-level rootfs/initramfs with `permissions: dev`, runtime declares no rootfs/initramfs of its own) and asserts the fallback path picks up the permissions block. Verified end-to-end: after rebuild, the rootfs erofs image's /etc/shadow now carries `root::19000:...` (empty password) instead of the inherited `root:*:...` from the sysroot. --- src/commands/runtime/build.rs | 23 +++++++++++------- src/utils/config.rs | 44 +++++++++++++++++++++++++++++++++++ 2 files changed, 59 insertions(+), 8 deletions(-) diff --git a/src/commands/runtime/build.rs b/src/commands/runtime/build.rs index b247653a..5e4ee1bf 100644 --- a/src/commands/runtime/build.rs +++ b/src/commands/runtime/build.rs @@ -2248,11 +2248,21 @@ echo "Docker image priming complete.""#, render_users_groups_script(users.as_ref(), groups.as_ref(), etc_dir, None) }; + // When the runtime doesn't declare its own `rootfs:` / `initramfs:` + // ref, fall back to the top-level default entry — same fallback the + // image build itself uses to find filesystem/post_install settings. + // Without this fallback, projects that only set permissions at the + // top level (the common case) silently get no permissions baked + // into the work dir. + let resolved_rootfs = config + .resolve_runtime_rootfs(&self.runtime_name) + .or_else(|| config.rootfs_default()); + let resolved_initramfs = config + .resolve_runtime_initramfs(&self.runtime_name) + .or_else(|| config.initramfs_default()); + let rootfs_post_install = get_post_install(parsed.get("rootfs")); - let rootfs_permissions_section = render_perms( - config.resolve_runtime_rootfs(&self.runtime_name), - "$ROOTFS_WORK/etc", - ); + let rootfs_permissions_section = render_perms(resolved_rootfs, "$ROOTFS_WORK/etc"); let rootfs_build_section = generate_rootfs_build_script( NAMESPACE_UUID, &config.get_rootfs_filesystem(), @@ -2261,10 +2271,7 @@ echo "Docker image priming complete.""#, ); let initramfs_post_install = get_post_install(parsed.get("initramfs")); - let initramfs_permissions_section = render_perms( - config.resolve_runtime_initramfs(&self.runtime_name), - "$INITRAMFS_WORK/etc", - ); + let initramfs_permissions_section = render_perms(resolved_initramfs, "$INITRAMFS_WORK/etc"); let initramfs_build_section = generate_initramfs_build_script( NAMESPACE_UUID, &config.get_initramfs_filesystem(), diff --git a/src/utils/config.rs b/src/utils/config.rs index 44a73a2e..0e4e3fe1 100644 --- a/src/utils/config.rs +++ b/src/utils/config.rs @@ -11136,6 +11136,50 @@ rootfs: assert!(resolved.users.as_ref().unwrap().contains_key("root")); } + /// Regression: when a runtime has no explicit `rootfs:` / `initramfs:` + /// ref, runtime build must still pick up the top-level default entry's + /// `permissions:`. Previously `resolve_runtime_rootfs` returned None + /// here, the permissions section came out empty, and root login was + /// silently broken on the resulting image. + #[test] + fn test_rootfs_default_fallback_carries_permissions() { + let yaml = r#" +default_target: qemuarm64 +permissions: + dev: + users: + root: + password: "" +rootfs: + permissions: dev +initramfs: + permissions: dev +runtimes: + dev: + target: aarch64-unknown-linux-gnu + packages: { avocado-runtime: "*" } +"#; + let config = Config::load_from_yaml_str(yaml).unwrap(); + // Runtime has no explicit rootfs/initramfs ref — must fall back + // to the top-level default entry to pick up permissions. + assert!(config.resolve_runtime_rootfs("dev").is_none()); + assert!(config.resolve_runtime_initramfs("dev").is_none()); + let default_rootfs = config + .rootfs_default() + .expect("rootfs default present from singleton form"); + let perms = config + .resolve_image_permissions(default_rootfs) + .expect("permissions resolve via default rootfs"); + assert!(perms.users.as_ref().unwrap().contains_key("root")); + let default_initramfs = config + .initramfs_default() + .expect("initramfs default present from singleton form"); + let perms = config + .resolve_image_permissions(default_initramfs) + .expect("permissions resolve via default initramfs"); + assert!(perms.users.as_ref().unwrap().contains_key("root")); + } + #[test] fn test_validate_runtime_refs_rejects_unresolved_rootfs_permissions() { let yaml = r#" From f93b9ca16f76796a943849538e1972a2d9dc3f66 Mon Sep 17 00:00:00 2001 From: nicksinas Date: Sun, 31 May 2026 03:27:42 -0500 Subject: [PATCH 04/30] docs: plan for avocado deploy port forwarding on macOS MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Investigate why `avocado runtime deploy` fails on macOS and design the fix. Root cause: the deploy script runs inside the SDK container, which runs inside the slirp-NAT'd avocado-vm, so the TUF repo HTTP server (:8585) it starts is unreachable by the target device, and the script's host-IP autodetect returns container/VM addresses. Plan: a per-deploy QMP hostfwd (bound 0.0.0.0, opened only during deploy) + publishing the container repo port to the VM + setting AVOCADO_DEPLOY_REPO_HOST to the macOS LAN IP (get_local_ip_for_remote), surfaced as a reusable `avocado vm port-forward` primitive. No desktop change — the CLI owns the qemu lifecycle. Plan only; no behavior change yet. --- docs/features/macos-deploy-port-forwarding.md | 190 ++++++++++++++++++ 1 file changed, 190 insertions(+) create mode 100644 docs/features/macos-deploy-port-forwarding.md diff --git a/docs/features/macos-deploy-port-forwarding.md b/docs/features/macos-deploy-port-forwarding.md new file mode 100644 index 00000000..acac37b8 --- /dev/null +++ b/docs/features/macos-deploy-port-forwarding.md @@ -0,0 +1,190 @@ +# `avocado deploy` on macOS: VM port forwarding + +Status: **proposal / plan**. Investigation + design for making +`avocado runtime deploy` work on macOS, where the build/deploy runs +inside the slirp-NAT'd avocado-vm. + +## 1. Symptom + +`avocado deploy ` on macOS fails: the device can't fetch the +TUF repo the deploy serves, so the final +`avocadoctl runtime add --url http://:8585` step on the device errors +out (connection refused / timeout / wrong host). + +## 2. How deploy works (today) + +`runtime/deploy.rs` builds a shell script and runs it **inside the SDK +container** (`run_in_container`, `create_deploy_script`): + +1. Assembles a TUF repo under `/tmp/avocado-deploy-repo` (metadata + + symlinked image `.raw` targets). +2. `python3 -m http.server 8585 --bind 0.0.0.0` to serve it. +3. Auto-detects the IP the device should fetch from + (`ip route get ` / `ip -4 addr show scope global`), or honors + `AVOCADO_DEPLOY_REPO_HOST` if set. +4. SSHes to the device and runs + `avocadoctl runtime add --url http://:8585`. + +It already anticipates this problem — the script comment says +`AVOCADO_DEPLOY_REPO_HOST` is "useful for QEMU user-mode networking where +the host is at 10.0.2.2" — but nothing wires it up on macOS. + +## 3. Why it breaks on macOS (the topology) + +``` +macOS host (LAN: 192.168.x.y, reachable by the device) + │ qemu, slirp user-mode NAT + ▼ +avocado-vm (guest 10.0.2.15; host alias 10.0.2.2; NOT inbound-reachable from LAN) + │ dockerd + ▼ +SDK container ← the deploy script runs HERE + • python3 http.server :8585 (bound 0.0.0.0 *inside the container*) + • ip addr / ip route → container/VM addresses (docker bridge, 10.0.2.15) + • ssh → device (outbound via slirp NAT: OK) +``` + +Three independent gaps, all from the server living inside the NAT'd VM: + +1. **Repo host IP is wrong.** The script's autodetect runs in the + container and returns a docker-bridge / `10.0.2.15` address. The + device is handed `http://10.0.2.15:8585`, which is meaningless on the + LAN. (`10.0.2.2` is only meaningful *inside* the guest, so that's + wrong too.) +2. **No inbound path to the server.** slirp does not let a LAN device + reach the guest. Even with the right IP, nothing forwards the device's + request into the VM/container. The existing qemu `hostfwd` is + `tcp:127.0.0.1:-:22` — **loopback-only, SSH-only**. +3. **Container port isn't exposed to the VM.** The http.server binds + `0.0.0.0` *in the container's* netns (docker bridge), not the VM's + `:8585`. (`deploy.rs` adds no `--net=host` / `-p`, unlike the HITL + server which does.) + +Outbound SSH from the container to the device works (slirp NAT), so the +control path is fine; only the device→repo fetch is broken. + +## 4. Proposed design + +Reuse the in-container HTTP server (keeps the repo files where they're +staged) and bridge the device→server path with a **per-deploy port +forward** plus the correct repo host. Three pieces: + +### 4a. A reusable VM port-forward primitive (QMP) + +Add dynamic slirp forwarding via the existing QMP client +([`src/utils/vm/qmp.rs`](../../src/utils/vm/qmp.rs)), using +`human-monitor-command`: + +``` +hostfwd_add net0 tcp:0.0.0.0:8585-:8585 # open (bind 0.0.0.0 → LAN-reachable) +hostfwd_remove net0 tcp:0.0.0.0:8585 # close +``` + +- `0.0.0.0` (not `127.0.0.1`) so a LAN device can reach `macOS:8585`. + This is the key difference from the SSH forward. +- Forwards `macOS:8585 → guest 10.0.2.15:8585`. +- Surface it as `avocado vm port-forward add|remove|list :-:` + for general use, and have deploy call it internally. (A general + primitive is the "properly support port forwarding" the feature asks + for; deploy is its first consumer.) +- Alternative: a **static** `hostfwd=tcp:0.0.0.0:8585-:8585` baked into + `qemu.rs` at VM start. Simpler, but leaves a LAN port open for the + VM's whole lifetime — rejected in favor of open-only-during-deploy. + +### 4b. Expose the container's repo port to the VM + +In `deploy.rs`, when routing through the VM, publish the repo port from +the SDK container to the VM host so the qemu forward lands on it: + +- Add `-p 8585:8585` to the deploy container args (or `--net=host`, + matching the HITL server's pattern). Then + `macOS:8585 → (hostfwd) → VM:8585 → (docker -p) → container:8585`. + +### 4c. Hand the device the right host IP + +Set `AVOCADO_DEPLOY_REPO_HOST` to the macOS host's LAN IP, reusing +[`get_local_ip_for_remote`](../../src/utils/remote.rs) (resolves the +local interface IP that can reach a given device). `deploy.rs` already +forwards `AVOCADO_DEPLOY_REPO_HOST` into the container env — so the +device is told `http://:8585`, which routes back through +the qemu forward. + +### End-to-end (external device on the LAN — the primary case) + +``` +device ── http GET ──► macOS-LAN-IP:8585 + └─(qemu hostfwd 0.0.0.0:8585→10.0.2.15:8585) + └─ VM:8585 ─(docker -p)─► container http.server +container ── ssh ──► device (outbound slirp NAT) +``` + +## 5. Orchestration (where the glue lives) + +`avocado deploy` runs on the **host**, so the host-side command wraps the +deploy with the forward lifecycle. On macOS + VM-routing active: + +1. Resolve macOS LAN IP via `get_local_ip_for_remote(device_host)`. +2. QMP `hostfwd_add net0 tcp:0.0.0.0:8585-:8585` against the VM's + `qmp.sock` (path from `VmPaths`). +3. Run the deploy container with `AVOCADO_DEPLOY_REPO_HOST=` and + `-p 8585:8585`. +4. On completion (success or error), QMP `hostfwd_remove` to close the + LAN port. Best-effort; also reconcile stale forwards on next `vm + start`. + +Gate all of this on macOS + `route::resolve_mode() == Apply` (the same +signal that says "we're talking to the avocado-vm's docker"). On Linux +with a real local docker / `--runs-on`, deploy already works as-is — +leave it untouched. + +## 6. Scenarios + +- **External board on the LAN (primary):** fully solved by §4. +- **Deploy to the avocado-vm itself as the device (testing):** + degenerate — the deploy script runs in a container *inside* the same + VM and would SSH to the VM and fetch from itself. Out of scope here; + document that the device must be a reachable address and the VM-as- + target needs a separate path (e.g. SSH to the bridge gateway), if we + want it at all. + +## 7. Alternatives considered + +- **Run the TUF http server on the macOS host** instead of the + container. No qemu forward needed (host is already on the LAN). But the + repo targets (manifest + `.raw` images) are staged in the container's + avocado prefix (an NFS/virtiofs-backed volume in the VM) and symlinked; + exposing them to a host-side server is more invasive than forwarding a + port. Rejected for now. +- **Static hostfwd at VM start** (§4a) — simpler, but a permanently-open + LAN port. Rejected. + +## 8. Implementation steps + +1. `src/utils/vm/qmp.rs`: add `hostfwd_add` / `hostfwd_remove` / + `hostfwd_list` helpers (wrap `human-monitor-command`). +2. New `avocado vm port-forward` subcommand (`src/commands/vm/`) using + those, for general use + tests. +3. `src/commands/runtime/deploy.rs`: on macOS+VM-routing, resolve LAN IP, + open the forward, inject `AVOCADO_DEPLOY_REPO_HOST` + `-p 8585:8585`, + and close the forward in a guaranteed-cleanup path. +4. Honor `AVOCADO_DEPLOY_REPO_PORT` end-to-end (forward + publish + URL) + so a non-default port still works. +5. Tests: QMP command formatting; deploy wiring sets the env + container + args on macOS and leaves Linux/`--runs-on` paths unchanged. + +## 9. Open questions / risks + +- **macOS firewall:** opening `0.0.0.0:8585` on the host may prompt the + application firewall. qemu is the listener; confirm the prompt/behavior. +- **Security:** the forward exposes the repo to the LAN for the deploy's + duration. Acceptable (TUF metadata is signed; it's transient), but + document it. Could bind to the specific LAN interface instead of + `0.0.0.0` if we want to narrow it. +- **Port conflict:** if `8585` is taken on the host, the forward fails — + pick a free port and thread it through `AVOCADO_DEPLOY_REPO_PORT`. +- **QMP `hostfwd_add` availability:** confirm the bundled qemu's slirp + build supports runtime `hostfwd_add` (it's standard, but verify on the + pinned qemu). +- **No desktop change needed:** the CLI owns the qemu lifecycle on macOS; + the desktop app drives `avocado` and is unaffected. A future Devices/UI + affordance could call `vm port-forward`, but it's out of scope. From 7a6f1c0d351717fcb05cf303703478dd9daa784f Mon Sep 17 00:00:00 2001 From: nicksinas Date: Sun, 31 May 2026 20:16:31 -0500 Subject: [PATCH 05/30] deploy: make `avocado deploy` work on macOS via VM port forwarding On macOS the deploy container runs inside the slirp-NAT'd avocado-vm, so the TUF repo HTTP server it starts (:8585) was unreachable by the target device and the in-container host-IP autodetect returned VM-internal addresses. Bridge the device->repo path: - qmp: add human_monitor_command + hostfwd_add/hostfwd_remove (runtime slirp port forwarding via the QEMU monitor), with unit tests. - deploy: on macOS/Windows (is_docker_desktop), set AVOCADO_DEPLOY_REPO_HOST to this host's LAN IP and publish the repo port; on the avocado-vm (is_vm_routing_active) also open a `hostfwd 0.0.0.0:PORT->guest:PORT` for the deploy and tear it down afterward. Skip `-p` when the SDK container uses host networking (docker discards it and the hostfwd already reaches the VM-bound port). Linux (native docker) is untouched. Validated end-to-end to a LAN Raspberry Pi 4: device fetched the repo metadata over the forward (HTTP 200). See docs/features/macos-deploy-port-forwarding.md. --- docs/features/macos-deploy-port-forwarding.md | 107 +++++++--- src/commands/runtime/deploy.rs | 185 +++++++++++++++++- src/utils/vm/qmp.rs | 132 +++++++++++++ 3 files changed, 393 insertions(+), 31 deletions(-) diff --git a/docs/features/macos-deploy-port-forwarding.md b/docs/features/macos-deploy-port-forwarding.md index acac37b8..8b58ad8d 100644 --- a/docs/features/macos-deploy-port-forwarding.md +++ b/docs/features/macos-deploy-port-forwarding.md @@ -1,8 +1,8 @@ # `avocado deploy` on macOS: VM port forwarding -Status: **proposal / plan**. Investigation + design for making +Status: **implemented + validated** (avocado-vm path). Makes `avocado runtime deploy` work on macOS, where the build/deploy runs -inside the slirp-NAT'd avocado-vm. +inside the slirp-NAT'd avocado-vm. Validation notes in §12. ## 1. Symptom @@ -91,14 +91,16 @@ hostfwd_remove net0 tcp:0.0.0.0:8585 # close `qemu.rs` at VM start. Simpler, but leaves a LAN port open for the VM's whole lifetime — rejected in favor of open-only-during-deploy. -### 4b. Expose the container's repo port to the VM +### 4b. Expose the container's repo port out of its VM -In `deploy.rs`, when routing through the VM, publish the repo port from -the SDK container to the VM host so the qemu forward lands on it: +On macOS (both contexts) publish the repo port from the SDK container so +it escapes the container netns: -- Add `-p 8585:8585` to the deploy container args (or `--net=host`, - matching the HITL server's pattern). Then - `macOS:8585 → (hostfwd) → VM:8585 → (docker -p) → container:8585`. +- Add `-p 8585:8585` to the deploy container args. +- **avocado-vm:** `-p` publishes onto the VM's interfaces; the qemu + `hostfwd` (§4a) then carries `macOS:8585 → VM:8585 → container:8585`. +- **Docker Desktop:** `-p` is forwarded straight to the macOS host by + Docker Desktop's vpnkit — no qemu step. ### 4c. Hand the device the right host IP @@ -118,24 +120,40 @@ device ── http GET ──► macOS-LAN-IP:8585 container ── ssh ──► device (outbound slirp NAT) ``` -## 5. Orchestration (where the glue lives) - -`avocado deploy` runs on the **host**, so the host-side command wraps the -deploy with the forward lifecycle. On macOS + VM-routing active: - -1. Resolve macOS LAN IP via `get_local_ip_for_remote(device_host)`. -2. QMP `hostfwd_add net0 tcp:0.0.0.0:8585-:8585` against the VM's - `qmp.sock` (path from `VmPaths`). -3. Run the deploy container with `AVOCADO_DEPLOY_REPO_HOST=` and - `-p 8585:8585`. -4. On completion (success or error), QMP `hostfwd_remove` to close the - LAN port. Best-effort; also reconcile stale forwards on next `vm - start`. - -Gate all of this on macOS + `route::resolve_mode() == Apply` (the same -signal that says "we're talking to the avocado-vm's docker"). On Linux -with a real local docker / `--runs-on`, deploy already works as-is — -leave it untouched. +## 5. Detection & orchestration (where the glue lives) + +Deploy fails on **both** macOS contexts, because in either one the deploy +container runs inside a Linux VM — the avocado-vm *or* Docker Desktop's +LinuxKit VM — so its in-container `ip route`/`ip addr` autodetect returns +a VM-internal address the device can't reach. Linux runs the container on +native docker with no VM in between, so it already works and must stay +untouched. + +So the gate is **two-tier**, mirroring the split the HITL server already +uses (`is_docker_desktop()` → publish vs host-net): + +- **`is_docker_desktop()`** (`cfg!(macos) || cfg!(windows)`) — the deploy + container is inside a Linux VM. Apply the fixes common to both Mac + contexts: + 1. `AVOCADO_DEPLOY_REPO_HOST` = macOS LAN IP via + `get_local_ip_for_remote(device_host)` (overrides the broken + in-container autodetect). Respect an explicit user-set value. + 2. Publish the repo port from the container (`-p :`). + - **+ `is_vm_routing_active()`** (DOCKER_HOST → avocado-vm socket): + *also* open a qemu `hostfwd` (`tcp:0.0.0.0:-:`) via QMP + against the VM's `qmp.sock` (`VmPaths`), because raw slirp doesn't + auto-expose the published port to the host LAN. Removed on completion + (success or error); reconcile stale forwards on next `vm start`. + - **else (Docker Desktop)**: no qemu step — Docker Desktop's `-p` + already forwards the container port to the macOS host (vpnkit). +- **Linux** — `is_docker_desktop()` false → skip everything; current + behavior preserved (works today, no VM, native docker). + +Key correction over an earlier draft: the discriminator is +`is_docker_desktop()`, **not** `is_vm_routing_active()` alone — the +Docker-Desktop-on-Mac case is broken too. The qemu `hostfwd` is the +*only* avocado-vm-specific piece; the LAN-IP injection + port publish are +shared by both Mac contexts. ## 6. Scenarios @@ -188,3 +206,40 @@ leave it untouched. - **No desktop change needed:** the CLI owns the qemu lifecycle on macOS; the desktop app drives `avocado` and is unaffected. A future Devices/UI affordance could call `vm port-forward`, but it's out of scope. + +## 12. Validation (2026-06-01) + +Validated end-to-end on the **avocado-vm path** deploying to a real +Raspberry Pi 4 on the LAN: + +- The repo was served at the **macOS host LAN IP** (`AVOCADO_DEPLOY_REPO_HOST` + injection working), and the device successfully fetched + `GET /metadata/timestamp.json → 200` through the qemu `hostfwd` → VM → + container. The device→repo reachability that was previously impossible + on macOS now works. +- `qemu hostfwd_add` is accepted by the pinned qemu (open question §9 + resolved). + +Findings folded back into the implementation: + +- **Host-networking SDK containers:** when the SDK container runs + `--network=host` (e.g. projects that set it), docker discards the `-p` + publish with a "Published ports are discarded when using host network + mode" warning — and it's unnecessary there, since the container already + shares the VM's `:8585` that the `hostfwd` targets. The shim now skips + `-p` when host networking is detected. + +Out of scope / separate concerns surfaced during testing: + +- **Device trust:** sideload deploy then fails at the device with + `Signature verification failed … got 0, need 1` unless the device's + installed TUF root matches the project's signing key — i.e. the device + must be provisioned/flashed from an image built with the same + `signing-keys`. This is a provisioning/trust matter, independent of the + port-forwarding fix. +- **Docker Desktop path** (`--no-vm-auto-start`): the LAN-IP + `-p` half + applies, but it's **not yet validated**, and a project using + `--network=host` won't expose the port to macOS under Docker Desktop + (host net there is the LinuxKit VM, not the host) — needs the project + to use bridge networking, plus confirmation that Docker Desktop's `-p` + binds a LAN-reachable address. diff --git a/src/commands/runtime/deploy.rs b/src/commands/runtime/deploy.rs index b234c75c..f3c188fb 100644 --- a/src/commands/runtime/deploy.rs +++ b/src/commands/runtime/deploy.rs @@ -1,6 +1,6 @@ use crate::utils::{ config::{ComposedConfig, Config}, - container::{RunConfig, SdkContainer}, + container::{is_docker_desktop, is_vm_routing_active, RunConfig, SdkContainer}, lockfile::LockFile, output::{print_info, print_success, print_warning, OutputLevel}, output_format::{emit_json_event, is_json_output_active}, @@ -472,6 +472,36 @@ impl RuntimeDeployCommand { env_vars.insert("AVOCADO_DEPLOY_REPO_PORT".to_string(), repo_port); } + // On macOS/Windows the deploy container runs inside a Linux VM, so + // the TUF repo server it starts is unreachable by the device unless + // we hand the device this host's LAN IP, publish the repo port out + // of the container, and (avocado-vm only) open a qemu slirp forward. + // No-op on Linux (native docker), where deploy already works. + let repo_port: u16 = std::env::var("AVOCADO_DEPLOY_REPO_PORT") + .ok() + .and_then(|s| s.parse().ok()) + .unwrap_or(DEFAULT_DEPLOY_REPO_PORT); + let mut container_args = config.merge_sdk_container_args(self.container_args.as_ref()); + // If the SDK container runs with host networking, the published-port + // (`-p`) trick is both unnecessary and discarded by docker, so the + // shim skips it (see prepare_mac_deploy_net). + let host_net = container_args + .as_deref() + .map(|args| { + args.iter() + .any(|a| a == "--network=host" || a == "--net=host") + }) + .unwrap_or(false); + let MacDeployNet { + env: mac_env, + container_args: mac_args, + forward, + } = prepare_mac_deploy_net(&self.device, repo_port, host_net, self.verbose).await; + env_vars.extend(mac_env); + if !mac_args.is_empty() { + container_args.get_or_insert_with(Vec::new).extend(mac_args); + } + let run_config = RunConfig { container_image: container_image.to_string(), target: target_arch.clone(), @@ -480,16 +510,20 @@ impl RuntimeDeployCommand { source_environment: true, interactive: false, env_vars: Some(env_vars), - container_args: config.merge_sdk_container_args(self.container_args.as_ref()), + container_args, dnf_args: self.dnf_args.clone(), sdk_arch: self.sdk_arch.clone(), ..Default::default() }; - let deploy_result = match container_helper + let run_outcome = container_helper .run_in_container(run_config) .await - .context("Failed to deploy runtime") - { + .context("Failed to deploy runtime"); + // Tear down the VM port forward (best-effort) regardless of outcome. + if let Some(fwd) = forward { + fwd.close().await; + } + let deploy_result = match run_outcome { Ok(r) => r, Err(e) => { let _ = std::fs::remove_dir_all(&staging_dir); @@ -740,6 +774,147 @@ fi } } +/// Host-side networking shim for `avocado deploy` on macOS/Windows, where +/// the deploy container runs inside a Linux VM (avocado-vm or Docker +/// Desktop). The TUF repo HTTP server it starts is otherwise trapped in +/// the container netns, and the script's in-container host-IP autodetect +/// returns a VM-internal address the device can't reach. No-op on Linux. +/// See docs/features/macos-deploy-port-forwarding.md. +struct MacDeployNet { + /// Extra env for the deploy container (AVOCADO_DEPLOY_REPO_HOST). + env: HashMap, + /// Extra `docker run` args (publish the repo port: `-p :`). + container_args: Vec, + /// A qemu slirp forward to tear down afterward (avocado-vm only). + forward: Option, +} + +/// A qemu slirp host-forward opened for the duration of one deploy. Teardown +/// is async, so it's closed explicitly by the caller rather than via `Drop`. +struct OpenForward { + qmp_socket: std::path::PathBuf, + host_port: u16, +} + +impl OpenForward { + async fn close(self) { + if let Ok(mut c) = crate::utils::vm::qmp::QmpClient::connect(&self.qmp_socket).await { + let _ = c.hostfwd_remove("net0", "0.0.0.0", self.host_port).await; + } + } +} + +/// Build the macOS/Windows deploy networking shim. Returns an empty (no-op) +/// value on Linux so the native-docker path is left exactly as-is. +async fn prepare_mac_deploy_net( + device: &str, + port: u16, + host_net: bool, + verbose: bool, +) -> MacDeployNet { + let mut net = MacDeployNet { + env: HashMap::new(), + container_args: Vec::new(), + forward: None, + }; + + // Linux runs the container on native docker — no VM in between, deploy + // already works. Leave it untouched. + if !is_docker_desktop() { + return net; + } + + // Publish the repo port out of the container's netns — but only when it + // has its own. With `--network=host` the container already shares the + // VM's network (so the qemu hostfwd below reaches `:{port}` directly), + // and docker discards `-p` with a "Published ports are discarded when + // using host network mode" warning, so skip it. + if !host_net { + net.container_args.push("-p".to_string()); + net.container_args.push(format!("{port}:{port}")); + } + + // Both Mac contexts: tell the device the host LAN IP it can reach us on, + // overriding the broken in-container autodetect. Respect a user override. + if std::env::var("AVOCADO_DEPLOY_REPO_HOST").is_err() { + let host = DeviceSpec::parse(device) + .map(|s| s.host) + .unwrap_or_default(); + match crate::utils::remote::get_local_ip_for_remote(&host).await { + Ok(ip) => { + if verbose { + print_info( + &format!("deploy: repo host IP {ip} (reachable by device {host})"), + OutputLevel::Normal, + ); + } + net.env + .insert("AVOCADO_DEPLOY_REPO_HOST".to_string(), ip.to_string()); + } + Err(e) => print_warning( + &format!( + "deploy: could not determine a LAN IP the device can reach this host on \ + ({e}). Set AVOCADO_DEPLOY_REPO_HOST to the right address." + ), + OutputLevel::Normal, + ), + } + } + + // avocado-vm only: open a slirp hostfwd so the LAN device can reach the + // published port through qemu. Docker Desktop forwards `-p` to the host + // itself (vpnkit), so it needs no qemu step. + if is_vm_routing_active() { + let sock = match crate::utils::vm::state::VmPaths::resolve() { + Ok(p) => p.qmp_socket(), + Err(e) => { + print_warning( + &format!("deploy: can't locate the avocado-vm for port forwarding ({e})."), + OutputLevel::Normal, + ); + return net; + } + }; + match crate::utils::vm::qmp::QmpClient::connect(&sock).await { + Ok(mut c) => { + // Clear any stale forward from a prior interrupted deploy, then add. + let _ = c.hostfwd_remove("net0", "0.0.0.0", port).await; + match c.hostfwd_add("net0", "0.0.0.0", port, port).await { + Ok(()) => { + if verbose { + print_info( + &format!( + "deploy: opened VM port forward 0.0.0.0:{port} → guest :{port}" + ), + OutputLevel::Normal, + ); + } + net.forward = Some(OpenForward { + qmp_socket: sock, + host_port: port, + }); + } + Err(e) => print_warning( + &format!( + "deploy: failed to open the VM port forward for {port} ({e}); the \ + device may be unable to fetch the repo. Is the avocado-vm running?" + ), + OutputLevel::Normal, + ), + } + } + Err(e) => print_warning( + &format!( + "deploy: couldn't reach the avocado-vm QMP socket for port forwarding ({e})." + ), + OutputLevel::Normal, + ), + } + } + + net +} + #[cfg(test)] mod tests { use super::*; diff --git a/src/utils/vm/qmp.rs b/src/utils/vm/qmp.rs index 68d81dc6..60db50df 100644 --- a/src/utils/vm/qmp.rs +++ b/src/utils/vm/qmp.rs @@ -70,6 +70,64 @@ impl QmpClient { Ok(v.get("return").cloned().unwrap_or(Value::Null)) } + /// Run an HMP (human monitor) command via `human-monitor-command`. + /// HMP errors come back *inside* the returned string, not the QMP + /// `error` field, so callers must inspect the returned text. + pub async fn human_monitor_command(&mut self, command_line: &str) -> Result { + let v = self + .execute( + "human-monitor-command", + Some(serde_json::json!({ "command-line": command_line })), + ) + .await?; + Ok(v.as_str().unwrap_or_default().to_string()) + } + + /// Add a slirp host-forwarding rule to a `user` netdev at runtime: + /// `host_addr:host_port` on the host → `:guest_port` in the guest. + /// Use `host_addr = "0.0.0.0"` to make the forward reachable from the + /// LAN (not just loopback). TCP only — that's all deploy needs. + pub async fn hostfwd_add( + &mut self, + netdev: &str, + host_addr: &str, + host_port: u16, + guest_port: u16, + ) -> Result<()> { + // qemu hostfwd_add form: ` tcp::-:` + let out = self + .human_monitor_command(&format!( + "hostfwd_add {netdev} tcp:{host_addr}:{host_port}-:{guest_port}" + )) + .await?; + let out = out.trim(); + if !out.is_empty() { + bail!("hostfwd_add failed: {out}"); + } + Ok(()) + } + + /// Remove a previously-added slirp host-forwarding rule. The remove + /// form omits the guest side: ` tcp::`. + /// A "not found" reply is tolerated so cleanup is idempotent. + pub async fn hostfwd_remove( + &mut self, + netdev: &str, + host_addr: &str, + host_port: u16, + ) -> Result<()> { + let out = self + .human_monitor_command(&format!( + "hostfwd_remove {netdev} tcp:{host_addr}:{host_port}" + )) + .await?; + let out = out.trim(); + if !out.is_empty() && !out.contains("not found") { + bail!("hostfwd_remove failed: {out}"); + } + Ok(()) + } + /// Read lines until we see one that has a `return` or `error` key /// (i.e. a command response). Events are skipped. async fn read_until_response(&mut self, dur: Duration) -> Result { @@ -188,4 +246,78 @@ mod tests { let err = client.execute("device_add", None).await.unwrap_err(); assert!(format!("{err:#}").contains("nope")); } + + #[tokio::test] + async fn hostfwd_add_ok_on_empty_return() { + let socket = spawn_mock(|mut rh, mut wh| async move { + wh.write_all(b"{\"QMP\":{\"version\":{}}}\n").await.unwrap(); + let mut line = String::new(); + rh.read_line(&mut line).await.unwrap(); + wh.write_all(b"{\"return\":{}}\n").await.unwrap(); + line.clear(); + rh.read_line(&mut line).await.unwrap(); + // The HMP command is carried inside human-monitor-command. + assert!(line.contains("human-monitor-command")); + assert!(line.contains("hostfwd_add net0 tcp:0.0.0.0:8585-:8585")); + // Success: hostfwd_add prints nothing. + wh.write_all(b"{\"return\":\"\"}\n").await.unwrap(); + }) + .await; + + let mut client = QmpClient::connect(&socket).await.unwrap(); + client + .hostfwd_add("net0", "0.0.0.0", 8585, 8585) + .await + .unwrap(); + } + + #[tokio::test] + async fn hostfwd_add_errors_on_nonempty_return() { + let socket = spawn_mock(|mut rh, mut wh| async move { + wh.write_all(b"{\"QMP\":{\"version\":{}}}\n").await.unwrap(); + let mut line = String::new(); + rh.read_line(&mut line).await.unwrap(); + wh.write_all(b"{\"return\":{}}\n").await.unwrap(); + line.clear(); + rh.read_line(&mut line).await.unwrap(); + // HMP errors come back in the return string, not the QMP error field. + wh.write_all(b"{\"return\":\"Could not set up host forwarding rule\\n\"}\n") + .await + .unwrap(); + }) + .await; + + let mut client = QmpClient::connect(&socket).await.unwrap(); + let err = client + .hostfwd_add("net0", "0.0.0.0", 8585, 8585) + .await + .unwrap_err(); + assert!(format!("{err:#}").contains("hostfwd_add failed")); + } + + #[tokio::test] + async fn hostfwd_remove_tolerates_not_found() { + let socket = spawn_mock(|mut rh, mut wh| async move { + wh.write_all(b"{\"QMP\":{\"version\":{}}}\n").await.unwrap(); + let mut line = String::new(); + rh.read_line(&mut line).await.unwrap(); + wh.write_all(b"{\"return\":{}}\n").await.unwrap(); + line.clear(); + rh.read_line(&mut line).await.unwrap(); + assert!(line.contains("hostfwd_remove net0 tcp:0.0.0.0:8585")); + // A stale-cleanup "not found" reply must not be treated as an error. + wh.write_all( + b"{\"return\":\"host forwarding rule for tcp:0.0.0.0:8585 not found\\n\"}\n", + ) + .await + .unwrap(); + }) + .await; + + let mut client = QmpClient::connect(&socket).await.unwrap(); + client + .hostfwd_remove("net0", "0.0.0.0", 8585) + .await + .unwrap(); + } } From c49931d1d07332d0654436cff1fbc8e3c1187c88 Mon Sep 17 00:00:00 2001 From: Justin Schneck Date: Sun, 31 May 2026 00:30:20 -0400 Subject: [PATCH 06/30] repo TLS: support custom CA + insecure mode across all dnf phases Lets the SDK trust a self-signed / private-CA package endpoint (e.g. an internal Pulp behind package-ca). Centralized so it covers EVERY dnf invocation - sdk bootstrap, sdk packages, ext, runtime, rootfs, initramfs, and the per-module 'dnf' subcommands, host AND target repo confs: - config: distro.repo.ca + distro.repo.tls_verify; resolvers get_repo_ca()/get_repo_insecure() (env AVOCADO_REPO_CA / AVOCADO_REPO_INSECURE win over config). promote_repo_tls_env() pushes config values to the process env at load so the container env-builders pick them up uniformly. - container: inject_repo_tls_env() adds AVOCADO_REPO_CA_B64 (base64 of the CA file) + AVOCADO_REPO_INSECURE to the container env at the env-builder chokepoints. REPO_TLS_SETUP_SNIPPET appends the CA to the SDK trust bundle (which SSL_CERT_FILE/CURL_CA_BUNDLE and every explicit sslcacert point at) and, for insecure, adds --setopt=sslverify=0 to DNF_SDK_HOST (base of every dnf call). Emitted by both entrypoint generators. - sdk bootstrap: snippet appended to the bootstrap command so the FIRST dnf (target pkg from sdk/all) is covered too. --- src/commands/sdk/install.rs | 8 ++++- src/utils/config.rs | 59 +++++++++++++++++++++++++++++++++++ src/utils/container.rs | 61 +++++++++++++++++++++++++++++++++++++ 3 files changed, 127 insertions(+), 1 deletion(-) diff --git a/src/commands/sdk/install.rs b/src/commands/sdk/install.rs index 71687ef4..3fba6f25 100644 --- a/src/commands/sdk/install.rs +++ b/src/commands/sdk/install.rs @@ -1599,7 +1599,13 @@ fi let run_config = RunConfig { container_image: container_image.to_string(), target: target.to_string(), - command: env_setup_command.to_string(), + // Append the shared repo-TLS setup so the bootstrap's FIRST dnf (installing the + // target package from the sdk/all repo) trusts a custom CA / honors insecure. + command: format!( + "{}{}", + env_setup_command, + crate::utils::container::REPO_TLS_SETUP_SNIPPET + ), verbose: self.verbose, source_environment: true, interactive: false, diff --git a/src/utils/config.rs b/src/utils/config.rs index 0e4e3fe1..e6d8944a 100644 --- a/src/utils/config.rs +++ b/src/utils/config.rs @@ -860,6 +860,12 @@ pub struct DistroRepoConfig { pub url: Option, /// Explicit releasever override. When not set, derived from distro.release/channel. pub releasever: Option, + /// Path to a CA cert (PEM) to trust for the repo endpoint (self-signed / private CA). + /// Env override: AVOCADO_REPO_CA. Appended to the SDK trust bundle for all dnf phases. + pub ca: Option, + /// TLS verification toggle for the repo endpoint. Set false to skip verification + /// (testing only). Env override: AVOCADO_REPO_INSECURE=1. Default: verify. + pub tls_verify: Option, } /// Reference to a Docker image for priming on the var partition at build time. @@ -1659,6 +1665,10 @@ impl Config { config.validate_cli_requirement()?; + // Promote config-file repo TLS settings (distro.repo.ca / tls_verify) to the process + // env so the container env-builders pick them up the same as the env-var form. + config.promote_repo_tls_env(); + Ok(ComposedConfig { config, merged_value: main_config, @@ -3290,6 +3300,10 @@ impl Config { config.synthesize_implicit_default_runtime(); config.validate_runtime_refs()?; + // Promote config-file repo TLS settings (distro.repo.ca / tls_verify) to the process + // env so the container env-builders pick them up the same as the env-var form. + config.promote_repo_tls_env(); + Ok(config) } @@ -3580,6 +3594,51 @@ impl Config { self.sdk.as_ref()?.repo_url.as_ref().cloned() } + /// Path to a CA cert to trust for the repo endpoint. + /// Priority: AVOCADO_REPO_CA (env) > distro.repo.ca (config). + pub fn get_repo_ca(&self) -> Option { + if let Ok(p) = env::var("AVOCADO_REPO_CA") { + if !p.is_empty() { + return Some(p); + } + } + self.distro + .as_ref() + .and_then(|d| d.repo.as_ref()) + .and_then(|r| r.ca.as_ref()) + .cloned() + } + + /// Whether to skip TLS verification for the repo endpoint (testing only). + /// Priority: AVOCADO_REPO_INSECURE (env, truthy) > distro.repo.tls_verify == false. + pub fn get_repo_insecure(&self) -> bool { + if let Ok(v) = env::var("AVOCADO_REPO_INSECURE") { + return matches!(v.to_ascii_lowercase().as_str(), "1" | "true" | "yes"); + } + self.distro + .as_ref() + .and_then(|d| d.repo.as_ref()) + .and_then(|r| r.tls_verify) + .map(|verify| !verify) + .unwrap_or(false) + } + + /// Promote config-file repo TLS settings to the process env so the container + /// env-builders (which don't see the avocado Config) pick them up uniformly with + /// the env-var form. Env values already set always win; we never override them. + pub fn promote_repo_tls_env(&self) { + // When the env var isn't already set, get_repo_ca()/get_repo_insecure() return the + // config-file value; promote it so the container env-builders see it uniformly. + if env::var_os("AVOCADO_REPO_CA").is_none() { + if let Some(ca) = self.get_repo_ca() { + env::set_var("AVOCADO_REPO_CA", ca); + } + } + if env::var_os("AVOCADO_REPO_INSECURE").is_none() && self.get_repo_insecure() { + env::set_var("AVOCADO_REPO_INSECURE", "1"); + } + } + /// Get the releasever for DNF --releasever. /// Priority: AVOCADO_RELEASEVER > AVOCADO_SDK_REPO_RELEASE (legacy) /// > distro.repo.releasever > sdk.repo_release (legacy) diff --git a/src/utils/container.rs b/src/utils/container.rs index 439b963b..2d82a502 100644 --- a/src/utils/container.rs +++ b/src/utils/container.rs @@ -353,6 +353,55 @@ fn add_security_opts(container_cmd: &mut Vec) { } } +/// Shared bash that wires a custom repo CA / insecure TLS into EVERY dnf phase. Centralized: +/// every dnf call trusts `$SSL_CERT_FILE` / `$CURL_CA_BUNDLE` (the SDK bundle) — and explicit +/// `--setopt=sslcacert=$SSL_CERT_FILE` sites point at the same file — so appending the CA to +/// that bundle covers sdk/ext/runtime/rootfs/initramfs, the `dnf` subcommands, and host AND +/// target repo confs. Likewise every call starts with `$DNF_SDK_HOST`, so adding +/// `--setopt=sslverify=0` there disables verification everywhere when insecure. +/// Must run AFTER `AVOCADO_SDK_PREFIX`, `SSL_CERT_FILE`, and `DNF_SDK_HOST` are defined, and +/// before the first dnf call. Driven by env: AVOCADO_REPO_CA_B64 (base64 PEM) / AVOCADO_REPO_INSECURE. +pub const REPO_TLS_SETUP_SNIPPET: &str = r##" +# --- custom repo CA / insecure TLS (AVOCADO_REPO_CA / AVOCADO_REPO_INSECURE) --- +if [ -n "${AVOCADO_REPO_CA_B64:-}" ]; then + _avocado_ca_bundle="${AVOCADO_SDK_PREFIX}/etc/ssl/certs/ca-certificates.crt" + mkdir -p "$(dirname "$_avocado_ca_bundle")" + if ! grep -q "BEGIN AVOCADO_REPO_CA" "$_avocado_ca_bundle" 2>/dev/null; then + { echo "# BEGIN AVOCADO_REPO_CA"; printf '%s' "$AVOCADO_REPO_CA_B64" | base64 -d; echo; echo "# END AVOCADO_REPO_CA"; } >> "$_avocado_ca_bundle" + echo "[INFO] Added custom repo CA to the SDK trust bundle." + fi +fi +if [ "${AVOCADO_REPO_INSECURE:-}" = "1" ]; then + export DNF_SDK_HOST="${DNF_SDK_HOST} --setopt=sslverify=0" + echo "[WARN] AVOCADO_REPO_INSECURE=1: TLS verification DISABLED for all dnf operations." +fi +"##; + +/// Inject the repo-TLS transport env (read from the process env) into a container's env map. +/// `AVOCADO_REPO_CA` is a host path; we read it and pass the PEM as base64 (`AVOCADO_REPO_CA_B64`) +/// so the multi-line cert survives as a single env value. Insecure is a passthrough flag. +pub fn inject_repo_tls_env(env_vars: &mut std::collections::HashMap) { + if let Ok(path) = std::env::var("AVOCADO_REPO_CA") { + if !path.is_empty() { + match std::fs::read(&path) { + Ok(bytes) => { + use base64::Engine; + let b64 = base64::engine::general_purpose::STANDARD.encode(bytes); + env_vars.insert("AVOCADO_REPO_CA_B64".to_string(), b64); + } + Err(e) => { + eprintln!("[WARN] AVOCADO_REPO_CA={path} could not be read ({e}); repo CA not applied."); + } + } + } + } + if let Ok(v) = std::env::var("AVOCADO_REPO_INSECURE") { + if matches!(v.to_ascii_lowercase().as_str(), "1" | "true" | "yes") { + env_vars.insert("AVOCADO_REPO_INSECURE".to_string(), "1".to_string()); + } + } +} + /// Configuration for running commands in containers #[derive(Debug, Clone)] pub struct RunConfig { @@ -618,6 +667,8 @@ impl SdkContainer { if let Some(release) = &config.repo_release { env_vars.insert("AVOCADO_SDK_REPO_RELEASE".to_string(), release.clone()); } + // Custom repo CA / insecure TLS, applied across all dnf phases (see REPO_TLS_SETUP_SNIPPET). + inject_repo_tls_env(&mut env_vars); if let Some(dnf_args) = &config.dnf_args { env_vars.insert("AVOCADO_DNF_ARGS".to_string(), dnf_args.join(" ")); } @@ -793,6 +844,8 @@ impl SdkContainer { if let Some(release) = &config.repo_release { env_vars.insert("AVOCADO_SDK_REPO_RELEASE".to_string(), release.clone()); } + // Custom repo CA / insecure TLS, applied across all dnf phases (see REPO_TLS_SETUP_SNIPPET). + inject_repo_tls_env(&mut env_vars); if let Some(dnf_args) = &config.dnf_args { env_vars.insert("AVOCADO_DNF_ARGS".to_string(), dnf_args.join(" ")); } @@ -1172,6 +1225,8 @@ impl SdkContainer { if let Some(release) = &config.repo_release { env_vars.insert("AVOCADO_SDK_REPO_RELEASE".to_string(), release.clone()); } + // Custom repo CA / insecure TLS, applied across all dnf phases (see REPO_TLS_SETUP_SNIPPET). + inject_repo_tls_env(&mut env_vars); if let Some(dnf_args) = &config.dnf_args { env_vars.insert("AVOCADO_DNF_ARGS".to_string(), dnf_args.join(" ")); } @@ -1416,6 +1471,8 @@ impl SdkContainer { if let Some(release) = &config.repo_release { env_vars.insert("AVOCADO_SDK_REPO_RELEASE".to_string(), release.clone()); } + // Custom repo CA / insecure TLS, applied across all dnf phases (see REPO_TLS_SETUP_SNIPPET). + inject_repo_tls_env(&mut env_vars); if let Some(dnf_args) = &config.dnf_args { env_vars.insert("AVOCADO_DNF_ARGS".to_string(), dnf_args.join(" ")); } @@ -2187,6 +2244,8 @@ if [ -f "${AVOCADO_SDK_PREFIX}/etc/ssl/certs/ca-certificates.crt" ]; then fi "#, ); + // Custom repo CA / insecure TLS, applied across all dnf phases. + script.push_str(REPO_TLS_SETUP_SNIPPET); } script @@ -2476,6 +2535,8 @@ if [ -f "${AVOCADO_SDK_PREFIX}/etc/ssl/certs/ca-certificates.crt" ]; then fi "#, ); + // Custom repo CA / insecure TLS, applied across all dnf phases. + script.push_str(REPO_TLS_SETUP_SNIPPET); } script From bf82c96d047470bee92b33b8f4ee3c3a1ab4973b Mon Sep 17 00:00:00 2001 From: Justin Schneck Date: Sun, 31 May 2026 17:58:43 -0400 Subject: [PATCH 07/30] feat(snapshots): reproducible channel snapshot pinning in the lock file MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pin each target to an immutable point-in-time snapshot of its feed channel so a clean + rebuild reproduces exactly, even after the live channel head advances or evicts the NEVRAs the lock file references. Mechanism: every dnf baseurl is ${repo_url}/$releasever/... with releasever = {release}/{channel}; pinning injects one segment -> {release}/{channel}/ snapshots/, exposed via AVOCADO_RELEASEVER (which get_releasever() already honors first), so all sysroots freeze together with no per-call-site plumbing. - Lock file v7: per-target `repo-snapshot` (RepoSnapshot). Additive — v6 reads as v7 with no pin (= track head), fully backward-compatible. merge adopts a disk pin when the writer has none; unlock (clear_all) drops it. - utils/snapshot.rs: resolve-and-apply runs once per command — reuse a matching pin, auto-pin to the channel's latest snapshot on first fetch, pre-flight a pinned snapshot and emit an actionable "run avocado update" error if it was GC'd, warn + track head on a stale release/channel, degrade to head if the feed serves no snapshots (snapshots-latest.json 404s). Honors repo CA / TLS. - Wired into install (umbrella) + fetch + sdk/rootfs/runtime/ext/initramfs install; fetch stays the reproducible metadata cache. - avocado update: Cargo-style move-forward — advance the snapshot pin to newest and clear package/kernel pins so the next install re-resolves + re-locks. - Tests: v6->v7 migration, round-trip, clear-on-unlock, merge-adopts-disk-pin, plus pure releasever/pin-status/url transforms. --- src/commands/ext/install.rs | 6 +- src/commands/fetch.rs | 6 + src/commands/initramfs/install.rs | 2 + src/commands/install.rs | 6 + src/commands/mod.rs | 1 + src/commands/rootfs/install.rs | 2 + src/commands/runtime/install.rs | 4 + src/commands/sdk/install.rs | 5 + src/commands/update.rs | 122 +++++++++ src/main.rs | 23 ++ src/utils/lockfile.rs | 186 +++++++++++++- src/utils/mod.rs | 1 + src/utils/snapshot.rs | 394 ++++++++++++++++++++++++++++++ 13 files changed, 753 insertions(+), 5 deletions(-) create mode 100644 src/commands/update.rs create mode 100644 src/utils/snapshot.rs diff --git a/src/commands/ext/install.rs b/src/commands/ext/install.rs index e025ba31..426b7e52 100644 --- a/src/commands/ext/install.rs +++ b/src/commands/ext/install.rs @@ -174,10 +174,14 @@ impl ExtInstallCommand { // Merge container args from config and CLI (similar to SDK commands) let merged_container_args = config.merge_sdk_container_args(self.container_args.as_ref()); + // Resolve target and apply the reproducible snapshot pin before reading + // repo_release, so it reflects the pinned channel snapshot. + let target = resolve_target_required(self.target.as_deref(), config)?; + crate::utils::snapshot::resolve_and_apply_for(config, &self.config_path, &target).await?; + // Get repo_url and repo_release from config let repo_url = config.get_sdk_repo_url(); let repo_release = config.get_sdk_repo_release(); - let target = resolve_target_required(self.target.as_deref(), config)?; // Determine which extensions to install (with their locations) let extensions_to_install: Vec<(String, ExtensionLocation)> = diff --git a/src/commands/fetch.rs b/src/commands/fetch.rs index 1190c4ce..915a1fba 100644 --- a/src/commands/fetch.rs +++ b/src/commands/fetch.rs @@ -84,6 +84,12 @@ impl FetchCommand { // Resolve target architecture let target_arch = resolve_target_required(self.target.as_deref(), config)?; + // Resolve & apply the reproducible snapshot pin before refreshing + // metadata, so `fetch` caches the pinned snapshot's repodata (and + // auto-pins on first run) rather than the advancing live head. + crate::utils::snapshot::resolve_and_apply_for(config, &self.config_path, &target_arch) + .await?; + // Get container configuration from interpolated config let container_image = config .get_sdk_image() diff --git a/src/commands/initramfs/install.rs b/src/commands/initramfs/install.rs index d34ec233..57726a1e 100644 --- a/src/commands/initramfs/install.rs +++ b/src/commands/initramfs/install.rs @@ -87,6 +87,8 @@ impl InitramfsInstallCommand { let config = &composed.config; let target = validate_and_log_target(self.target.as_deref(), config)?; + // Apply the reproducible snapshot pin before any repo_release is read. + crate::utils::snapshot::resolve_and_apply_for(config, &self.config_path, &target).await?; let merged_container_args = config.merge_sdk_container_args(self.container_args.as_ref()); let container_image = config.get_sdk_image().ok_or_else(|| { anyhow::anyhow!("No container image specified in config under 'sdk.image'") diff --git a/src/commands/install.rs b/src/commands/install.rs index cf757964..42e9c1de 100644 --- a/src/commands/install.rs +++ b/src/commands/install.rs @@ -121,6 +121,12 @@ impl InstallCommand { let _parsed = &composed.merged_value; let _target = validate_and_log_target(self.target.as_deref(), config)?; + // Resolve & apply the reproducible snapshot pin once, before any sysroot + // fetch. This exposes AVOCADO_RELEASEVER so every sub-install (SDK, + // rootfs, initramfs, extensions, runtimes) freezes to the same immutable + // channel snapshot — auto-pinning on first fetch, reusing the pin after. + crate::utils::snapshot::resolve_and_apply_for(config, &self.config_path, &_target).await?; + // Compute target runtimes early so we can show a useful start message. let initial_runtimes = self.find_target_relevant_runtimes(config, _parsed, &_target)?; if initial_runtimes.len() == 1 { diff --git a/src/commands/mod.rs b/src/commands/mod.rs index 94a558a4..b9f4d3d2 100644 --- a/src/commands/mod.rs +++ b/src/commands/mod.rs @@ -20,5 +20,6 @@ pub mod sdk; pub mod sign; pub mod signing_keys; pub mod unlock; +pub mod update; pub mod upgrade; pub mod vm; diff --git a/src/commands/rootfs/install.rs b/src/commands/rootfs/install.rs index 5095a5dc..cb21bdbf 100644 --- a/src/commands/rootfs/install.rs +++ b/src/commands/rootfs/install.rs @@ -946,6 +946,8 @@ impl RootfsInstallCommand { let config = &composed.config; let target = validate_and_log_target(self.target.as_deref(), config)?; + // Apply the reproducible snapshot pin before any repo_release is read. + crate::utils::snapshot::resolve_and_apply_for(config, &self.config_path, &target).await?; let merged_container_args = config.merge_sdk_container_args(self.container_args.as_ref()); let container_image = config.get_sdk_image().ok_or_else(|| { anyhow::anyhow!("No container image specified in config under 'sdk.image'") diff --git a/src/commands/runtime/install.rs b/src/commands/runtime/install.rs index 38ca4cf9..d57c36c0 100644 --- a/src/commands/runtime/install.rs +++ b/src/commands/runtime/install.rs @@ -122,6 +122,10 @@ impl RuntimeInstallCommand { // Merge container args from config and CLI (similar to SDK commands) let merged_container_args = config.merge_sdk_container_args(self.container_args.as_ref()); + // Apply the reproducible snapshot pin before reading repo_release. + let target = resolve_target_required(self.target.as_deref(), config)?; + crate::utils::snapshot::resolve_and_apply_for(config, &self.config_path, &target).await?; + // Get repo_url and repo_release from config let repo_url = config.get_sdk_repo_url(); let repo_release = config.get_sdk_repo_release(); diff --git a/src/commands/sdk/install.rs b/src/commands/sdk/install.rs index 3fba6f25..1b16e7f5 100644 --- a/src/commands/sdk/install.rs +++ b/src/commands/sdk/install.rs @@ -153,6 +153,11 @@ impl SdkInstallCommand { let config = &composed.config; let target = validate_and_log_target(self.target.as_deref(), config)?; + // Apply the reproducible snapshot pin (auto-pin on first fetch) before + // any repo_release is read, so the SDK + target sysroots fetch against + // the frozen channel snapshot. + crate::utils::snapshot::resolve_and_apply_for(config, &self.config_path, &target).await?; + // Merge container args from config with CLI args let merged_container_args = config.merge_sdk_container_args(self.container_args.as_ref()); diff --git a/src/commands/update.rs b/src/commands/update.rs new file mode 100644 index 00000000..f3ee096f --- /dev/null +++ b/src/commands/update.rs @@ -0,0 +1,122 @@ +use anyhow::{Context, Result}; +use std::path::Path; + +use crate::utils::{ + config::Config, + lockfile::LockFile, + output::{print_info, print_success, OutputLevel}, + snapshot, + target::resolve_target_required, +}; + +/// `avocado update` — move a target forward to the latest feed state. +/// +/// Cargo-style: re-resolves the lock against the newest published snapshot. +/// Concretely it (1) advances the target's snapshot pin to the channel's +/// current `latest` snapshot, and (2) clears the package + kernel version pins +/// so the next `avocado install`/`fetch` re-selects the latest versions within +/// that new snapshot and re-locks them. +/// +/// Everyday `install`/`fetch` stay reproducible (they reuse the pins); this is +/// the deliberate, explicit "move forward" action. +pub struct UpdateCommand { + config_path: String, + target: Option, + verbose: bool, +} + +impl UpdateCommand { + pub fn new(config_path: String, target: Option, verbose: bool) -> Self { + Self { + config_path, + target, + verbose, + } + } + + pub async fn execute(&self) -> Result<()> { + let config = Config::load(&self.config_path) + .with_context(|| format!("Failed to load config from {}", self.config_path))?; + let target = resolve_target_required(self.target.as_deref(), &config)?; + + let src_dir = config + .get_resolved_src_dir(&self.config_path) + .unwrap_or_else(|| { + Path::new(&self.config_path) + .parent() + .unwrap_or(Path::new(".")) + .to_path_buf() + }); + + let mut lock_file = LockFile::load(&src_dir) + .with_context(|| format!("Failed to load lock file from {}", src_dir.display()))?; + let old_snapshot = lock_file + .get_repo_snapshot(&target) + .map(|s| s.snapshot.clone()); + + // Resolve the channel's current latest snapshot (no env/lock side effects). + let latest = snapshot::resolve_latest(&config, &target).await?; + + // Re-resolve packages to latest by dropping the existing package + + // kernel pins (and the old snapshot pin); the next build re-selects and + // re-locks within the new snapshot. + lock_file.clear_all(&target); + + match latest { + Some(new_pin) => { + let new_id = new_pin.snapshot.clone(); + let feed = format!("{}/{}", new_pin.release, new_pin.channel); + lock_file.set_repo_snapshot(&target, new_pin); + lock_file + .save_replacing(&src_dir) + .with_context(|| "Failed to save lock file")?; + + match old_snapshot { + Some(old) if old == new_id => print_info( + &format!("Already on the latest {feed} snapshot '{new_id}'."), + OutputLevel::Normal, + ), + Some(old) => print_info( + &format!("Advanced {feed} snapshot '{old}' -> '{new_id}' for '{target}'."), + OutputLevel::Normal, + ), + None => print_info( + &format!("Pinned {feed} to latest snapshot '{new_id}' for '{target}'."), + OutputLevel::Normal, + ), + } + print_success( + &format!( + "Updated '{target}'. Run 'avocado install' to resolve and lock the latest \ + package versions within snapshot '{new_id}'." + ), + OutputLevel::Normal, + ); + } + None => { + // No snapshot to advance to (feed serves no snapshots, or + // releasever is manually overridden). Still honor the + // "move to latest" intent for packages: cleared pins mean the + // next build resolves the latest available head. + lock_file + .save_replacing(&src_dir) + .with_context(|| "Failed to save lock file")?; + if self.verbose { + print_info( + "Feed serves no snapshots (or releasever is overridden); no snapshot pin to advance.", + OutputLevel::Normal, + ); + } + print_success( + &format!( + "Cleared package pins for '{target}'. Run 'avocado install' to resolve and \ + lock the latest available versions." + ), + OutputLevel::Normal, + ); + } + } + + Ok(()) + } +} diff --git a/src/main.rs b/src/main.rs index 8e7a1d2d..192215e9 100644 --- a/src/main.rs +++ b/src/main.rs @@ -73,6 +73,7 @@ use commands::signing_keys::{ SigningKeysCreateCommand, SigningKeysListCommand, SigningKeysRemoveCommand, }; use commands::unlock::UnlockCommand; +use commands::update::UpdateCommand; use commands::upgrade::UpgradeCommand; #[derive(Parser)] @@ -550,6 +551,19 @@ enum Commands { #[arg(long)] initramfs: bool, }, + /// Move a target forward: advance to the latest feed snapshot and re-resolve + /// packages to their latest versions on the next install (rewrites the lock). + Update { + /// Path to avocado.yaml configuration file + #[arg(short = 'C', long, default_value = "avocado.yaml")] + config: String, + /// Enable verbose output + #[arg(short, long)] + verbose: bool, + /// Target architecture + #[arg(short, long)] + target: Option, + }, /// Avocado Connect platform commands (auth, upload) Connect { #[command(subcommand)] @@ -2340,6 +2354,15 @@ async fn main() -> Result<()> { unlock_cmd.execute()?; Ok(()) } + Commands::Update { + config, + verbose, + target, + } => { + let update_cmd = UpdateCommand::new(config, target.or(cli.target), verbose); + update_cmd.execute().await?; + Ok(()) + } Commands::Runtime { command } => match command { RuntimeCommands::Install { name, diff --git a/src/utils/lockfile.rs b/src/utils/lockfile.rs index 3b312b80..992550b0 100644 --- a/src/utils/lockfile.rs +++ b/src/utils/lockfile.rs @@ -36,7 +36,11 @@ static LOCKFILE_SAVE_GATE: Mutex<()> = Mutex::new(()); /// `RuntimeLock` carrying both `packages` and per-runtime /// `extensions`. Migration from v5 wraps the old flat map as /// `{ packages: , extensions: {} }`. -const LOCKFILE_VERSION: u32 = 6; +/// Version 7: Adds per-target `repo-snapshot`, the immutable channel snapshot +/// the target's packages were resolved against. Additive — v6 +/// lockfiles read as v7 with `repo_snapshot: None` and behave +/// exactly as before (track the live channel head). +const LOCKFILE_VERSION: u32 = 7; /// Lock file name const LOCKFILE_NAME: &str = "lock.json"; @@ -235,6 +239,29 @@ pub type PackageVersions = HashMap; /// Used for SDK (keyed by host arch) and runtimes (keyed by name) pub type NestedPackageVersions = HashMap; +/// The immutable channel snapshot a target's packages were resolved against. +/// +/// Recorded on the first fetch that resolves a snapshot (auto-pin). Subsequent +/// fetches — including after `avocado clean` — re-resolve against this exact +/// snapshot subtree (`{release}/{channel}/snapshots/{snapshot}`) so the build +/// reproduces even after the live channel head advances or evicts the NEVRAs +/// this lockfile pins. `avocado update` advances it; `avocado unlock` clears it. +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] +pub struct RepoSnapshot { + /// Distro release (feed year) the snapshot belongs to, e.g. "2026". + pub release: String, + /// Channel the snapshot belongs to, e.g. "edge". + pub channel: String, + /// Snapshot id — the immutable `snapshots/` path segment. + pub snapshot: String, + /// Provenance: the base repo URL resolved against at pin time. + #[serde(default, skip_serializing_if = "Option::is_none")] + pub repo_url: Option, + /// Provenance: snapshot mint time (from `snapshots-latest.json`). + #[serde(default, skip_serializing_if = "Option::is_none")] + pub created: Option, +} + /// Source metadata for a fetched extension in the lock file #[derive(Debug, Clone, Serialize, Deserialize)] pub struct ExtensionSourceLock { @@ -404,6 +431,18 @@ pub struct TargetLocks { #[serde(default, skip_serializing_if = "boot_record_is_empty")] pub boot: BootRecord, + /// The immutable channel snapshot this target's packages were resolved + /// against (lockfile v7+). Auto-pinned on first fetch; reused on every + /// later fetch so a clean+rebuild reproduces exactly. `None` means the + /// target tracks the live channel head (pre-v7 behavior, or a feed that + /// doesn't serve snapshots). + #[serde( + default, + skip_serializing_if = "Option::is_none", + rename = "repo-snapshot" + )] + pub repo_snapshot: Option, + /// In-memory only: sysroot sections explicitly cleared during this process /// run (e.g., "rootfs" / "initramfs" after a kernel-pin-change clean). /// Not persisted. `merge_with` skips re-inserting disk packages for any @@ -485,9 +524,13 @@ impl LockFile { if lock_file.version == LOCKFILE_VERSION { return Ok(lock_file); } - if lock_file.version == 3 || lock_file.version == 4 { - // v3 → v4 → v5 → v6: empty runtimes maps parse equivalently; - // new fields default-empty via serde. + if lock_file.version == 3 || lock_file.version == 4 || lock_file.version == 6 { + // v3 → v4 → v5 → v6 → v7: each of these parses cleanly as the + // current shape — the intervening additions (kernel-versions, + // kernels/boot, repo-snapshot) are all `#[serde(default)]`, and + // v6's runtime shape is already the current one. Only the + // `version` field differs, bumped here. (v5's runtime map shape + // is incompatible, so it still falls through to migration.) lock_file.version = LOCKFILE_VERSION; return Ok(lock_file); } @@ -908,6 +951,13 @@ impl LockFile { { self_target.boot = other_target.boot; } + // Repo snapshot — adopt disk's pin only when self has none, so a + // concurrent writer's freshly-resolved pin isn't dropped. An + // explicit clear (unlock) goes through `save_replacing`, which + // never merges, so a cleared pin stays cleared. + if self_target.repo_snapshot.is_none() { + self_target.repo_snapshot = other_target.repo_snapshot; + } } self @@ -929,6 +979,20 @@ impl LockFile { } } + /// Get the recorded repo snapshot pin for a target, if any. + pub fn get_repo_snapshot(&self, target: &str) -> Option<&RepoSnapshot> { + self.targets.get(target)?.repo_snapshot.as_ref() + } + + /// Record (or replace) the repo snapshot pin for a target. Used by the + /// auto-pin-on-first-fetch path and by `avocado update`. + pub fn set_repo_snapshot(&mut self, target: &str, snapshot: RepoSnapshot) { + self.targets + .entry(target.to_string()) + .or_default() + .repo_snapshot = Some(snapshot); + } + /// Get the locked version for a package in a specific target and sysroot pub fn get_locked_version( &self, @@ -1417,6 +1481,9 @@ impl LockFile { target_locks.runtimes.clear(); target_locks.kernel_versions.clear(); target_locks.kernels.clear(); + // Drop the snapshot pin too: unlock means "re-pick latest snapshot + // on the next fetch", mirroring the kernel-pin reset above. + target_locks.repo_snapshot = None; } } @@ -3002,4 +3069,115 @@ avocado-sdk-toolchain 0.1.0-r0.x86_64_avocadosdk assert!(target.kernels.is_empty()); assert!(target.boot.is_empty()); } + + #[test] + fn test_migrate_v6_to_v7_additive() { + use tempfile::TempDir; + + // A v6 lockfile parses directly as v7 (repo-snapshot is #[serde(default)]), + // with only the version bumped and no snapshot pin recorded. + let v6_json = r#"{"version":6,"distro_release":"2026","targets":{"qemux86-64":{"rootfs":{"avocado-pkg-rootfs":"1.0.0-r0"},"runtimes":{"dev":{"packages":{"base":"2.0.0-r0"}}}}}} +"#; + let temp_dir = TempDir::new().unwrap(); + let lock_dir = temp_dir.path().join(LOCKFILE_DIR); + fs::create_dir_all(&lock_dir).unwrap(); + fs::write(lock_dir.join(LOCKFILE_NAME), v6_json).unwrap(); + + let loaded = LockFile::load(temp_dir.path()).unwrap(); + // Version bumped to v7. + assert_eq!(loaded.version, LOCKFILE_VERSION); + // Existing v6 state preserved. + assert_eq!( + loaded.get_locked_version("qemux86-64", &SysrootType::Rootfs, "avocado-pkg-rootfs"), + Some(&"1.0.0-r0".to_string()) + ); + // New v7 field defaults to None — behaves as "track head". + assert!(loaded.get_repo_snapshot("qemux86-64").is_none()); + } + + #[test] + fn test_repo_snapshot_round_trip() { + use tempfile::TempDir; + + let temp_dir = TempDir::new().unwrap(); + let mut lock = LockFile::new(); + lock.set_repo_snapshot( + "qemux86-64", + RepoSnapshot { + release: "2026".to_string(), + channel: "edge".to_string(), + snapshot: "20260531T120000Z-qemux86-64".to_string(), + repo_url: Some("https://repo.example.com".to_string()), + created: Some("2026-05-31T12:00:00Z".to_string()), + }, + ); + lock.save(temp_dir.path()).unwrap(); + + // Persisted under the kebab-case key and reloads intact. + let raw = fs::read_to_string(LockFile::get_path(temp_dir.path())).unwrap(); + assert!(raw.contains("\"repo-snapshot\"")); + + let loaded = LockFile::load(temp_dir.path()).unwrap(); + let snap = loaded.get_repo_snapshot("qemux86-64").unwrap(); + assert_eq!(snap.release, "2026"); + assert_eq!(snap.channel, "edge"); + assert_eq!(snap.snapshot, "20260531T120000Z-qemux86-64"); + } + + #[test] + fn test_clear_all_clears_repo_snapshot() { + let mut lock = LockFile::new(); + lock.set_repo_snapshot( + "qemux86-64", + RepoSnapshot { + release: "2026".to_string(), + channel: "edge".to_string(), + snapshot: "SNAP".to_string(), + repo_url: None, + created: None, + }, + ); + assert!(lock.get_repo_snapshot("qemux86-64").is_some()); + // Unlock semantics: clearing a target drops the snapshot pin too. + lock.clear_all("qemux86-64"); + assert!(lock.get_repo_snapshot("qemux86-64").is_none()); + } + + #[test] + fn test_merge_adopts_disk_snapshot_when_self_unset() { + use tempfile::TempDir; + + // Disk has a pin; an in-memory writer that didn't touch the pin must + // not drop it on save() (merge adopts disk's value). + let temp_dir = TempDir::new().unwrap(); + let mut on_disk = LockFile::new(); + on_disk.set_repo_snapshot( + "qemux86-64", + RepoSnapshot { + release: "2026".to_string(), + channel: "edge".to_string(), + snapshot: "DISK".to_string(), + repo_url: None, + created: None, + }, + ); + on_disk.save(temp_dir.path()).unwrap(); + + // A fresh writer recording an unrelated package, no snapshot in hand. + let mut writer = LockFile::new(); + writer.set_locked_version("qemux86-64", &SysrootType::Rootfs, "curl", "8.0.0-r0"); + writer.save(temp_dir.path()).unwrap(); + + let loaded = LockFile::load(temp_dir.path()).unwrap(); + assert_eq!( + loaded + .get_repo_snapshot("qemux86-64") + .map(|s| s.snapshot.as_str()), + Some("DISK") + ); + assert_eq!( + loaded.get_locked_version("qemux86-64", &SysrootType::Rootfs, "curl"), + Some(&"8.0.0-r0".to_string()) + ); + } } diff --git a/src/utils/mod.rs b/src/utils/mod.rs index bd54339b..13e41cbb 100644 --- a/src/utils/mod.rs +++ b/src/utils/mod.rs @@ -28,6 +28,7 @@ pub mod scheduler; pub mod signing_keys; #[cfg(unix)] pub mod signing_service; +pub mod snapshot; pub mod stamps; pub mod target; pub mod tui; diff --git a/src/utils/snapshot.rs b/src/utils/snapshot.rs new file mode 100644 index 00000000..95c6a216 --- /dev/null +++ b/src/utils/snapshot.rs @@ -0,0 +1,394 @@ +//! Reproducible channel snapshots — auto-pinning the lock file to an immutable +//! point-in-time view of a feed channel. +//! +//! Background: every dnf baseurl is composed as `${repo_url}/$releasever/...` +//! where `$releasever` is `{release}/{channel}` (e.g. `2026/edge`). The serving +//! side publishes an immutable copy of each channel's metadata under +//! `{release}/{channel}/snapshots//...` (sharing the content-addressed +//! `_pkgs` pool) plus a small mutable pointer +//! `{release}/{channel}/target//snapshots-latest.json` naming the +//! newest snapshot. +//! +//! Snapshot pinning therefore reduces to injecting one path segment into +//! `releasever`: `2026/edge` -> `2026/edge/snapshots/`. We resolve the pin +//! once per command and expose it via the `AVOCADO_RELEASEVER` env var, which +//! [`Config::get_releasever`] already honors ahead of the derived +//! `{release}/{channel}` — so every downstream sysroot fetch freezes together +//! with no per-call-site plumbing (mirrors [`Config::promote_repo_tls_env`]). +//! +//! Behavior (confirmed in the feature plan): +//! - **Auto-pin on first fetch**: with no pin recorded, resolve the channel's +//! current `latest` snapshot, record it in the lock file, and fetch against +//! it. A later `avocado clean` + rebuild reproduces it exactly. +//! - **Reuse on later fetches**: a recorded pin is reused verbatim. +//! - **Feed without snapshots**: if the pointer 404s, fall back to tracking the +//! live head (pre-snapshot behavior) and record nothing. +//! - **Manual releasever override**: if the user pins `releasever` explicitly +//! (config or env), we never auto-pin — they own resolution. +//! - **Changed release/channel**: a stale pin (config moved to a different +//! feed) is ignored with a warning telling the user to run `avocado update`. + +use anyhow::{Context, Result}; +use std::env; +use std::path::Path; + +use crate::utils::config::Config; +use crate::utils::lockfile::{LockFile, RepoSnapshot}; +use crate::utils::output::{print_info, OutputLevel}; + +/// The mutable pointer published per (channel, target) naming the newest +/// immutable snapshot. Served at +/// `{release}/{channel}/target//snapshots-latest.json`. +#[derive(Debug, Clone, serde::Deserialize)] +pub struct SnapshotPointer { + /// Snapshot id — the immutable `snapshots/` path segment. + pub id: String, + /// Snapshot mint time (provenance only). + #[serde(default)] + pub created: Option, +} + +/// Whether a recorded pin still applies to the configured feed. +#[derive(Debug, PartialEq, Eq)] +pub enum PinStatus { + /// No pin recorded for this target. + None, + /// Pin matches the configured release+channel — reuse it. + Matches, + /// Pin is for a different release/channel than the config now names. + Mismatch, +} + +/// Classify a recorded pin against the configured feed. Pure — unit-tested. +pub fn pin_status(pin: Option<&RepoSnapshot>, release: &str, channel: &str) -> PinStatus { + match pin { + None => PinStatus::None, + Some(p) if p.release == release && p.channel == channel => PinStatus::Matches, + Some(_) => PinStatus::Mismatch, + } +} + +/// The releasever path segment for a pinned snapshot. Pure — unit-tested. +pub fn effective_releasever(release: &str, channel: &str, snapshot: &str) -> String { + format!("{release}/{channel}/snapshots/{snapshot}") +} + +/// Machine short name as it appears in feed paths (`target//...`). +/// Mirrors `avocado-arch-utils.bbclass`: strip a leading `avocado-`. +fn machine_short(target: &str) -> &str { + target.strip_prefix("avocado-").unwrap_or(target) +} + +/// URL of the per-(channel, target) latest-snapshot pointer. +pub fn pointer_url(repo_url: &str, release: &str, channel: &str, target: &str) -> String { + let base = repo_url.trim_end_matches('/'); + let machine = machine_short(target); + format!("{base}/{release}/{channel}/target/{machine}/snapshots-latest.json") +} + +/// URL of a snapshot's target repomd — used to pre-flight a recorded pin so a +/// GC'd snapshot produces an actionable error rather than a raw dnf failure. +pub fn repomd_url( + repo_url: &str, + release: &str, + channel: &str, + target: &str, + snapshot: &str, +) -> String { + let base = repo_url.trim_end_matches('/'); + let machine = machine_short(target); + format!("{base}/{release}/{channel}/snapshots/{snapshot}/target/{machine}/repodata/repomd.xml") +} + +/// True when the user has taken explicit control of `releasever` (config or +/// env), in which case we must not auto-pin. This also covers the +/// already-applied case: a parent command that set `AVOCADO_RELEASEVER` to a +/// snapshot path makes children no-op. +fn releasever_is_overridden(config: &Config) -> bool { + if env::var_os("AVOCADO_RELEASEVER").is_some() + || env::var_os("AVOCADO_SDK_REPO_RELEASE").is_some() + { + return true; + } + let distro_override = config + .distro + .as_ref() + .and_then(|d| d.repo.as_ref()) + .and_then(|r| r.releasever.as_ref()) + .is_some(); + let sdk_override = config + .sdk + .as_ref() + .and_then(|s| s.repo_release.as_ref()) + .is_some(); + distro_override || sdk_override +} + +/// Build an HTTP client honoring the repo's CA bundle / insecure setting, +/// matching the TLS posture dnf uses for the same endpoint. +fn build_client(config: &Config) -> Result { + let mut builder = reqwest::ClientBuilder::new() + .timeout(std::time::Duration::from_secs(20)) + .user_agent(concat!("avocado-cli/", env!("CARGO_PKG_VERSION"))); + if config.get_repo_insecure() { + builder = builder.danger_accept_invalid_certs(true); + } + if let Some(ca_path) = config.get_repo_ca() { + let pem = std::fs::read(&ca_path) + .with_context(|| format!("Failed to read repo CA bundle: {ca_path}"))?; + // A bundle may carry multiple certs; add each. + for cert in reqwest::Certificate::from_pem_bundle(&pem) + .with_context(|| format!("Failed to parse repo CA bundle: {ca_path}"))? + { + builder = builder.add_root_certificate(cert); + } + } + builder.build().context("Failed to build HTTP client") +} + +/// Outcome of resolving the channel's latest snapshot. +enum LatestResult { + /// Pointer present — the named snapshot id (+ provenance). + Found(SnapshotPointer), + /// Pointer 404 — the feed does not serve snapshots. + Unsupported, +} + +/// GET the latest-snapshot pointer. 404 -> `Unsupported` (degrade to head); +/// transport/other errors propagate (don't silently lose reproducibility). +async fn fetch_latest( + client: &reqwest::Client, + repo_url: &str, + release: &str, + channel: &str, + target: &str, +) -> Result { + let url = pointer_url(repo_url, release, channel, target); + let resp = client + .get(&url) + .send() + .await + .with_context(|| format!("Failed to fetch snapshot pointer: {url}"))?; + if resp.status() == reqwest::StatusCode::NOT_FOUND { + return Ok(LatestResult::Unsupported); + } + let resp = resp + .error_for_status() + .with_context(|| format!("Snapshot pointer request failed: {url}"))?; + let pointer: SnapshotPointer = resp + .json() + .await + .with_context(|| format!("Failed to parse snapshot pointer: {url}"))?; + Ok(LatestResult::Found(pointer)) +} + +/// Build a [`RepoSnapshot`] pin from a resolved pointer. +fn build_pin( + release: &str, + channel: &str, + repo_url: &str, + pointer: &SnapshotPointer, +) -> RepoSnapshot { + RepoSnapshot { + release: release.to_string(), + channel: channel.to_string(), + snapshot: pointer.id.clone(), + repo_url: Some(repo_url.to_string()), + created: pointer.created.clone(), + } +} + +/// Resolve the channel's current latest snapshot into a pin, without touching +/// the lock file or the process env. Used by `avocado update` to advance the +/// pin. Returns `None` when releasever is manually overridden or the feed does +/// not serve snapshots (pointer 404s). +pub async fn resolve_latest(config: &Config, target: &str) -> Result> { + if releasever_is_overridden(config) { + return Ok(None); + } + let (Some(release), Some(channel)) = (config.get_distro_release(), config.get_distro_channel()) + else { + return Ok(None); + }; + let Some(repo_url) = config.get_repo_url() else { + return Ok(None); + }; + let client = build_client(config)?; + match fetch_latest(&client, &repo_url, &release, &channel, target).await? { + LatestResult::Unsupported => Ok(None), + LatestResult::Found(pointer) => { + Ok(Some(build_pin(&release, &channel, &repo_url, &pointer))) + } + } +} + +/// Pre-flight a recorded pin: confirm the snapshot's repomd is still served. +/// A definitive 404 means the snapshot aged out of retention -> actionable +/// error. Transport errors (offline) are tolerated so a cached/offline rebuild +/// against a still-valid pin isn't blocked. +async fn verify_pin_available( + client: &reqwest::Client, + repo_url: &str, + snap: &RepoSnapshot, + target: &str, +) -> Result<()> { + let url = repomd_url( + repo_url, + &snap.release, + &snap.channel, + target, + &snap.snapshot, + ); + match client.head(&url).send().await { + Ok(resp) if resp.status() == reqwest::StatusCode::NOT_FOUND => anyhow::bail!( + "Snapshot '{}' for {}/{} is no longer available (retention horizon). \ + Run 'avocado update' to re-pin to the latest snapshot.", + snap.snapshot, + snap.release, + snap.channel + ), + // Reachable (2xx/redirect) or non-404 status: proceed. + Ok(_) => Ok(()), + // Transport error (offline, DNS, timeout): don't block a pinned rebuild. + Err(_) => Ok(()), + } +} + +/// Resolve the snapshot pin for `target` and, when one applies, expose it via +/// `AVOCADO_RELEASEVER` so every downstream `get_releasever()` fetches against +/// the frozen snapshot subtree. Auto-pins (and persists) on first fetch. +/// +/// Call once at the entry of feed-touching commands; idempotent across the +/// in-process install task graph (children see the parent's env and no-op). +pub async fn resolve_and_apply(config: &Config, src_dir: &Path, target: &str) -> Result<()> { + if releasever_is_overridden(config) { + return Ok(()); + } + let (Some(release), Some(channel)) = (config.get_distro_release(), config.get_distro_channel()) + else { + // No release/channel to derive a feed from — nothing to pin. + return Ok(()); + }; + let Some(repo_url) = config.get_repo_url() else { + return Ok(()); + }; + + let mut lock = LockFile::load(src_dir) + .with_context(|| format!("Failed to load lock file from {}", src_dir.display()))?; + let client = build_client(config)?; + + let effective = match pin_status(lock.get_repo_snapshot(target), &release, &channel) { + PinStatus::Matches => { + let snap = lock.get_repo_snapshot(target).expect("matched"); + verify_pin_available(&client, &repo_url, snap, target).await?; + effective_releasever(&snap.release, &snap.channel, &snap.snapshot) + } + PinStatus::Mismatch => { + let snap = lock.get_repo_snapshot(target).expect("mismatch"); + print_info( + &format!( + "[WARNING] Lock file is pinned to snapshot for {}/{} but config now names {}/{}. \ + Tracking the live channel head; run 'avocado update' to re-pin.", + snap.release, snap.channel, release, channel + ), + OutputLevel::Normal, + ); + return Ok(()); + } + PinStatus::None => { + match fetch_latest(&client, &repo_url, &release, &channel, target).await? { + LatestResult::Unsupported => return Ok(()), + LatestResult::Found(pointer) => { + let snap = build_pin(&release, &channel, &repo_url, &pointer); + let eff = effective_releasever(&snap.release, &snap.channel, &snap.snapshot); + lock.set_repo_snapshot(target, snap); + lock.save(src_dir) + .with_context(|| "Failed to record snapshot pin in lock file")?; + print_info( + &format!("Pinned {release}/{channel} to snapshot '{}'.", pointer.id), + OutputLevel::Normal, + ); + eff + } + } + } + }; + + env::set_var("AVOCADO_RELEASEVER", effective); + Ok(()) +} + +/// Convenience entry for commands that hold a `config_path` string: resolves +/// `src_dir` the same way the install/clean commands do, then delegates to +/// [`resolve_and_apply`]. One line per command call site. +pub async fn resolve_and_apply_for(config: &Config, config_path: &str, target: &str) -> Result<()> { + let src_dir = config.get_resolved_src_dir(config_path).unwrap_or_else(|| { + Path::new(config_path) + .parent() + .unwrap_or(Path::new(".")) + .to_path_buf() + }); + resolve_and_apply(config, &src_dir, target).await +} + +#[cfg(test)] +mod tests { + use super::*; + + fn snap(release: &str, channel: &str, id: &str) -> RepoSnapshot { + RepoSnapshot { + release: release.to_string(), + channel: channel.to_string(), + snapshot: id.to_string(), + repo_url: None, + created: None, + } + } + + #[test] + fn effective_releasever_injects_snapshot_segment() { + assert_eq!( + effective_releasever("2026", "edge", "20260531T120000Z-qemux86-64"), + "2026/edge/snapshots/20260531T120000Z-qemux86-64" + ); + } + + #[test] + fn pin_status_none_when_unpinned() { + assert_eq!(pin_status(None, "2026", "edge"), PinStatus::None); + } + + #[test] + fn pin_status_matches_same_feed() { + let p = snap("2026", "edge", "X"); + assert_eq!(pin_status(Some(&p), "2026", "edge"), PinStatus::Matches); + } + + #[test] + fn pin_status_mismatch_on_channel_change() { + let p = snap("2026", "edge", "X"); + assert_eq!(pin_status(Some(&p), "2026", "stable"), PinStatus::Mismatch); + assert_eq!(pin_status(Some(&p), "2027", "edge"), PinStatus::Mismatch); + } + + #[test] + fn pointer_url_strips_avocado_prefix_and_trailing_slash() { + assert_eq!( + pointer_url( + "https://repo.example.com/", + "2026", + "edge", + "avocado-qemux86-64" + ), + "https://repo.example.com/2026/edge/target/qemux86-64/snapshots-latest.json" + ); + } + + #[test] + fn repomd_url_points_into_snapshot_subtree() { + assert_eq!( + repomd_url("https://r.io", "2026", "edge", "qemux86-64", "SNAP"), + "https://r.io/2026/edge/snapshots/SNAP/target/qemux86-64/repodata/repomd.xml" + ); + } +} From 191c47e6f86e971b91c9a1c2dc90307d71956a7d Mon Sep 17 00:00:00 2001 From: Justin Schneck Date: Sun, 31 May 2026 19:54:49 -0400 Subject: [PATCH 08/30] fix(snapshots): auto-pin against the default feed; single-source the repo URL The snapshot resolver early-returned when distro.repo.url was unset, so projects relying on the baked default feed (no explicit repo.url) never recorded a repo-snapshot pin even though their dnf fetch hit that default. Fix by deriving the same default the container uses. Single source of truth: add Config::DEFAULT_REPO_URL + Config::effective_repo_url() in config.rs. The snapshot resolver uses effective_repo_url(); the container env-builder always sets AVOCADO_SDK_REPO_URL from the same const, so the shell's duplicated literal default is removed (it just consumes the env now). --- src/utils/config.rs | 16 ++++++++++ src/utils/container.rs | 70 +++++++++++++++++++++++++++--------------- src/utils/snapshot.rs | 8 ++--- 3 files changed, 64 insertions(+), 30 deletions(-) diff --git a/src/utils/config.rs b/src/utils/config.rs index e6d8944a..401d852e 100644 --- a/src/utils/config.rs +++ b/src/utils/config.rs @@ -1373,6 +1373,13 @@ pub struct Config { } impl Config { + /// Default prod package-feed base URL, used when neither config + /// (`distro.repo.url`/`sdk.repo_url`) nor env sets one. Single source of + /// truth: the container env-builder and the snapshot resolver both derive + /// the default from here, so the resolver pins against the same feed the + /// container's dnf fetches from. + pub const DEFAULT_REPO_URL: &'static str = "https://repo.avocadolinux.org"; + /// Validate that the running CLI version satisfies the `cli_requirement` if set. pub fn validate_cli_requirement(&self) -> Result<()> { if let Some(ref requirement) = self.cli_requirement { @@ -3594,6 +3601,15 @@ impl Config { self.sdk.as_ref()?.repo_url.as_ref().cloned() } + /// Effective repo base URL: the configured value, or [`Self::DEFAULT_REPO_URL`] + /// when none is set. Single source of truth for the prod-feed default — used by + /// the snapshot resolver and the container env-builder so the resolver pins + /// against the same feed the container's dnf actually fetches from. + pub fn effective_repo_url(&self) -> String { + self.get_repo_url() + .unwrap_or_else(|| Self::DEFAULT_REPO_URL.to_string()) + } + /// Path to a CA cert to trust for the repo endpoint. /// Priority: AVOCADO_REPO_CA (env) > distro.repo.ca (config). pub fn get_repo_ca(&self) -> Option { diff --git a/src/utils/container.rs b/src/utils/container.rs index 2d82a502..a701be3c 100644 --- a/src/utils/container.rs +++ b/src/utils/container.rs @@ -661,9 +661,16 @@ impl SdkContainer { host_platform.to_string(), ); - if let Some(url) = &config.repo_url { - env_vars.insert("AVOCADO_SDK_REPO_URL".to_string(), url.clone()); - } + // Always provide the repo URL (configured value or the prod default) so + // the container shell never needs its own literal default — single source + // of truth is Config::DEFAULT_REPO_URL. + env_vars.insert( + "AVOCADO_SDK_REPO_URL".to_string(), + config + .repo_url + .clone() + .unwrap_or_else(|| crate::utils::config::Config::DEFAULT_REPO_URL.to_string()), + ); if let Some(release) = &config.repo_release { env_vars.insert("AVOCADO_SDK_REPO_RELEASE".to_string(), release.clone()); } @@ -838,9 +845,16 @@ impl SdkContainer { // Set host platform - the remote is running the container env_vars.insert("AVOCADO_HOST_PLATFORM".to_string(), "linux".to_string()); - if let Some(url) = &config.repo_url { - env_vars.insert("AVOCADO_SDK_REPO_URL".to_string(), url.clone()); - } + // Always provide the repo URL (configured value or the prod default) so + // the container shell never needs its own literal default — single source + // of truth is Config::DEFAULT_REPO_URL. + env_vars.insert( + "AVOCADO_SDK_REPO_URL".to_string(), + config + .repo_url + .clone() + .unwrap_or_else(|| crate::utils::config::Config::DEFAULT_REPO_URL.to_string()), + ); if let Some(release) = &config.repo_release { env_vars.insert("AVOCADO_SDK_REPO_RELEASE".to_string(), release.clone()); } @@ -1219,9 +1233,16 @@ impl SdkContainer { host_platform.to_string(), ); - if let Some(url) = &config.repo_url { - env_vars.insert("AVOCADO_SDK_REPO_URL".to_string(), url.clone()); - } + // Always provide the repo URL (configured value or the prod default) so + // the container shell never needs its own literal default — single source + // of truth is Config::DEFAULT_REPO_URL. + env_vars.insert( + "AVOCADO_SDK_REPO_URL".to_string(), + config + .repo_url + .clone() + .unwrap_or_else(|| crate::utils::config::Config::DEFAULT_REPO_URL.to_string()), + ); if let Some(release) = &config.repo_release { env_vars.insert("AVOCADO_SDK_REPO_RELEASE".to_string(), release.clone()); } @@ -1465,9 +1486,16 @@ impl SdkContainer { // Set host platform - the remote is running the container env_vars.insert("AVOCADO_HOST_PLATFORM".to_string(), "linux".to_string()); - if let Some(url) = &config.repo_url { - env_vars.insert("AVOCADO_SDK_REPO_URL".to_string(), url.clone()); - } + // Always provide the repo URL (configured value or the prod default) so + // the container shell never needs its own literal default — single source + // of truth is Config::DEFAULT_REPO_URL. + env_vars.insert( + "AVOCADO_SDK_REPO_URL".to_string(), + config + .repo_url + .clone() + .unwrap_or_else(|| crate::utils::config::Config::DEFAULT_REPO_URL.to_string()), + ); if let Some(release) = &config.repo_release { env_vars.insert("AVOCADO_SDK_REPO_RELEASE".to_string(), release.clone()); } @@ -2058,12 +2086,9 @@ if [ -n "$AVOCADO_EXT_PATH_MOUNTS" ]; then done fi -# Get repo url from environment or default to prod -if [ -n "$AVOCADO_SDK_REPO_URL" ]; then - REPO_URL="$AVOCADO_SDK_REPO_URL" -else - REPO_URL="https://repo.avocadolinux.org" -fi +# Repo URL is always supplied by the CLI env-builder (Config::DEFAULT_REPO_URL +# when unset), so there is no literal default to drift here. +REPO_URL="$AVOCADO_SDK_REPO_URL" if [ -n "$AVOCADO_VERBOSE" ]; then echo "[INFO] Using repo URL: '$REPO_URL'"; fi @@ -2349,12 +2374,9 @@ if [ -n "$AVOCADO_EXT_PATH_MOUNTS" ]; then done fi -# Get repo url from environment or default to prod -if [ -n "$AVOCADO_SDK_REPO_URL" ]; then - REPO_URL="$AVOCADO_SDK_REPO_URL" -else - REPO_URL="https://repo.avocadolinux.org" -fi +# Repo URL is always supplied by the CLI env-builder (Config::DEFAULT_REPO_URL +# when unset), so there is no literal default to drift here. +REPO_URL="$AVOCADO_SDK_REPO_URL" if [ -n "$AVOCADO_VERBOSE" ]; then echo "[INFO] Using repo URL: '$REPO_URL'"; fi diff --git a/src/utils/snapshot.rs b/src/utils/snapshot.rs index 95c6a216..213eb7df 100644 --- a/src/utils/snapshot.rs +++ b/src/utils/snapshot.rs @@ -210,9 +210,7 @@ pub async fn resolve_latest(config: &Config, target: &str) -> Result Ok(None), @@ -269,9 +267,7 @@ pub async fn resolve_and_apply(config: &Config, src_dir: &Path, target: &str) -> // No release/channel to derive a feed from — nothing to pin. return Ok(()); }; - let Some(repo_url) = config.get_repo_url() else { - return Ok(()); - }; + let repo_url = config.effective_repo_url(); let mut lock = LockFile::load(src_dir) .with_context(|| format!("Failed to load lock file from {}", src_dir.display()))?; From 178017b5c9e7a855418cabc9803baf202e4cf6c5 Mon Sep 17 00:00:00 2001 From: Justin Schneck Date: Tue, 2 Jun 2026 10:44:24 -0400 Subject: [PATCH 09/30] feat(ext): nested extension layout for a shared includes installroot Packaged extensions now nest their content under // and self-describe the layout via `Provides: avocado-ext-layout(nested)`. ext_fetch repoqueries that provide (repo metadata, no download) and installs nested packages into the SHARED $AVOCADO_PREFIX/includes installroot, so one rpmdb tracks every installed extension with no cross-extension file collisions. Legacy packages lacking the provide keep the per-extension installroot. Either way the final content lands at includes//, so consumers are unchanged. --- src/commands/ext/package.rs | 19 +++++++++----- src/utils/ext_fetch.rs | 51 +++++++++++++++++++++++++------------ 2 files changed, 48 insertions(+), 22 deletions(-) diff --git a/src/commands/ext/package.rs b/src/commands/ext/package.rs index c89b20e6..fa0bed65 100644 --- a/src/commands/ext/package.rs +++ b/src/commands/ext/package.rs @@ -625,7 +625,10 @@ if [ "$FILE_COUNT" -eq 0 ]; then fi # Create spec file -# Package root (/) maps to the extension's src_dir +# The extension's src_dir maps to a top-level // directory in the package, so +# that installing into a SHARED includes installroot lands its content at +# includes// without colliding with other extensions' files (and one rpmdb +# tracks all installed extensions). cat > SPECS/package.spec << SPEC_EOF %define _buildhost reproducible AutoReqProv: no @@ -637,12 +640,16 @@ Summary: {summary} License: {license} Vendor: {vendor} Group: {group}{url_line} +# Self-describe the on-disk layout so the CLI knows how to install this package: content +# is nested under //, so it installs into the SHARED includes installroot. +# Legacy packages (content at /) lack this provide and use the per-ext installroot. +Provides: avocado-ext-layout(nested) %description {description} %files -/* +/{name} %prep # No prep needed @@ -651,10 +658,10 @@ Group: {group}{url_line} # No build needed %install -mkdir -p %{{buildroot}} -# Copy staged files to buildroot root -# This allows installation to \$AVOCADO_PREFIX/includes// -cp -r "$STAGING_DIR"/* %{{buildroot}}/ +# Nest the staged files under // so a shared includes installroot yields +# includes//... (collision-free, one rpmdb per includes root). +mkdir -p %{{buildroot}}/{name} +cp -r "$STAGING_DIR"/* %{{buildroot}}/{name}/ %clean # Skip clean section - not needed for our use case diff --git a/src/utils/ext_fetch.rs b/src/utils/ext_fetch.rs index a28620f3..df3bdda5 100644 --- a/src/utils/ext_fetch.rs +++ b/src/utils/ext_fetch.rs @@ -218,9 +218,11 @@ impl ExtensionFetcher { /// Fetch an extension from the avocado package repository /// - /// Installs the extension package into a per-extension installroot at - /// `$AVOCADO_PREFIX/includes/` using DNF with `--installroot`. - /// This gives proper RPM tracking, clean upgrades, and version management. + /// Installs the extension package into the SHARED `$AVOCADO_PREFIX/includes` + /// installroot using DNF with `--installroot`. Packages nest their content under a + /// top-level `//` dir, so the content lands at `includes//` and a + /// single rpmdb tracks every installed extension (proper tracking, clean upgrades, + /// version management, no cross-extension file collisions). async fn fetch_from_repo( &self, ext_name: &str, @@ -251,15 +253,14 @@ impl ExtensionFetcher { let repo_arg = repo_name.map(|r| format!("--repo={r}")).unwrap_or_default(); - // Use container path $AVOCADO_PREFIX/includes/ as the installroot - let installroot = format!("$AVOCADO_PREFIX/includes/{ext_name}"); - - // Force mode: clean the installroot for a fresh install - let force_clean = if force { - format!(r#"rm -rf "{installroot}""#) - } else { - String::new() - }; + // The package self-describes its layout via `Provides: avocado-ext-layout(nested)`. + // - NESTED (new): content under // -> install into the SHARED includes + // installroot, so it lands at includes// with one rpmdb tracking all exts. + // - LEGACY (no such provide): content at / -> per-extension installroot includes/. + // Either way the final content is includes//, so consumers are unchanged. The + // installroot is chosen at run time by repoquerying the package's provides. + let ext_dir = format!("$AVOCADO_PREFIX/includes/{ext_name}"); + let force_str = if force { "true" } else { "false" }; // Install the extension package using DNF with --installroot // Uses $DNF_SDK_COMBINED_REPO_CONF to access both SDK and target-specific repos @@ -267,21 +268,39 @@ impl ExtensionFetcher { r#" set -e -{force_clean} +# Detect the package's on-disk layout from its provides (repo metadata, no download). +if RPM_CONFIGDIR=$AVOCADO_SDK_PREFIX/usr/lib/rpm RPM_ETCCONFIGDIR=$AVOCADO_SDK_PREFIX \ + $DNF_SDK_HOST $DNF_SDK_HOST_OPTS $DNF_SDK_COMBINED_REPO_CONF {repo_arg} \ + repoquery --provides {package_spec} 2>/dev/null | grep -q 'avocado-ext-layout(nested)'; then + INSTALLROOT="$AVOCADO_PREFIX/includes" + echo "Extension '{ext_name}': nested layout -> shared includes installroot" +else + INSTALLROOT="{ext_dir}" + echo "Extension '{ext_name}': legacy layout -> per-extension installroot" +fi + +# Force: remove just this extension (rpmdb entry + content dir) for a clean reinstall, +# without disturbing other extensions sharing the installroot. +if [ "{force_str}" = "true" ]; then + RPM_CONFIGDIR=$AVOCADO_SDK_PREFIX/usr/lib/rpm RPM_ETCCONFIGDIR=$AVOCADO_SDK_PREFIX \ + $DNF_SDK_HOST $DNF_SDK_HOST_OPTS --installroot="$INSTALLROOT" -y remove {package_name} 2>/dev/null || true + rm -rf "{ext_dir}" +fi + +mkdir -p "$INSTALLROOT" -# Install the extension package into the per-extension installroot RPM_CONFIGDIR=$AVOCADO_SDK_PREFIX/usr/lib/rpm \ RPM_ETCCONFIGDIR=$AVOCADO_SDK_PREFIX \ $DNF_SDK_HOST \ $DNF_SDK_HOST_OPTS \ $DNF_SDK_COMBINED_REPO_CONF \ {repo_arg} \ - --installroot={installroot} \ + --installroot="$INSTALLROOT" \ -y \ install \ {package_spec} -echo "Successfully fetched extension '{ext_name}' (package: {package_spec}) to {installroot}" +echo "Successfully installed extension '{ext_name}' (package: {package_spec}) to {ext_dir}" "# ); From 9d4a7b252d5dbf40fae0561e6a1b2659f20572a9 Mon Sep 17 00:00:00 2001 From: Justin Schneck Date: Tue, 2 Jun 2026 10:44:30 -0400 Subject: [PATCH 10/30] feat(connect): add `connect ext` publish/status/list (super-admin) Build-once publish of a packaged extension RPM to the feed, plus status and list of published versions. Adds the commands::connect::ext module and wires the ConnectExtCommands subcommands and dispatch in main.rs. --- src/commands/connect/ext.rs | 302 ++++++++++++++++++++++++++++++++++++ src/commands/connect/mod.rs | 1 + src/main.rs | 134 ++++++++++++++++ 3 files changed, 437 insertions(+) create mode 100644 src/commands/connect/ext.rs diff --git a/src/commands/connect/ext.rs b/src/commands/connect/ext.rs new file mode 100644 index 00000000..0bbdd731 --- /dev/null +++ b/src/commands/connect/ext.rs @@ -0,0 +1,302 @@ +//! `avocado connect ext` — publish a packaged extension to the feed via avocado-connect. +//! +//! Flow (see avocado-connect docs/ext-publish.md): reserve a version + get a presigned +//! staging URL, PUT the RPM straight to storage, confirm (connect verifies + enqueues the +//! cluster ingest). Additive + safe: a taken version is rejected, never overwritten. + +use anyhow::{Context, Result}; +use sha2::{Digest, Sha256}; +use std::path::Path; + +use crate::commands::connect::client; +use crate::utils::output::{print_info, print_success, OutputLevel}; + +fn http_client() -> Result { + reqwest::Client::builder() + .use_rustls_tls() + .build() + .context("Failed to build HTTP client") +} + +/// Map connect API errors to plain, actionable messages. +fn api_error(status: u16, body: &str) -> anyhow::Error { + let msg = match status { + 409 => { + "that extension version is already taken — bump the version and republish".to_string() + } + 422 => format!("the request was rejected as invalid: {body}"), + 401 => "not authenticated — run 'avocado connect auth login'".to_string(), + 403 => "not authorized — extension publish is super-admin only for now".to_string(), + 404 => "not found".to_string(), + _ => format!("HTTP {status}: {body}"), + }; + anyhow::anyhow!(msg) +} + +fn sha256_hex(bytes: &[u8]) -> String { + let mut h = Sha256::new(); + h.update(bytes); + h.finalize().iter().map(|b| format!("{b:02x}")).collect() +} + +/// Parse name/version/release/arch from an RPM filename +/// (`--..rpm`). version/release must be dash-free. +fn parse_nevra(path: &Path) -> Result<(String, String, String, String)> { + let fname = path + .file_name() + .and_then(|n| n.to_str()) + .context("invalid RPM path")?; + let stem = fname.strip_suffix(".rpm").context("not an .rpm file")?; + let (nvr, arch) = stem + .rsplit_once('.') + .context("RPM filename missing .")?; + let (nv, release) = nvr + .rsplit_once('-') + .context("RPM filename missing -")?; + let (name, version) = nv + .rsplit_once('-') + .context("RPM filename missing -")?; + Ok(( + name.to_string(), + version.to_string(), + release.to_string(), + arch.to_string(), + )) +} + +pub struct ExtPublishCommand { + pub config: String, + pub org: Option, + pub profile: Option, + pub rpm: String, + pub name: Option, + pub version: Option, + pub release: Option, + pub arch: Option, + pub target_release: String, + pub target_channel: String, + pub targets: String, +} + +impl ExtPublishCommand { + pub async fn execute(&self) -> Result<()> { + // --org is optional for publish: omit it (and connect.org) to target the + // platform (Peridio) org, which connect fills in server-side for super-admins. + // When given (flag or connect.org), it publishes into that tenant org and + // selects a matching auth profile. + let org = self.org.clone().or_else(|| { + std::path::Path::new(&self.config) + .exists() + .then(|| crate::utils::config::load_config(&self.config).ok()) + .flatten() + .and_then(|c| c.connect) + .and_then(|c| c.org) + }); + let cfg = client::load_config()? + .context("Not logged in. Run 'avocado connect auth login' first.")?; + let (_name, profile) = cfg.resolve_profile(self.profile.as_deref(), org.as_deref())?; + let api = profile.api_url.trim_end_matches('/').to_string(); + let token = profile.token.clone(); + + let path = Path::new(&self.rpm); + let bytes = std::fs::read(path).with_context(|| format!("Failed to read {}", self.rpm))?; + let size = bytes.len() as u64; + let sha = sha256_hex(&bytes); + + let (pn, pv, pr, pa) = parse_nevra(path).unwrap_or_default(); + let name = self.name.clone().unwrap_or(pn); + let version = self.version.clone().unwrap_or(pv); + let release = self + .release + .clone() + .unwrap_or(if pr.is_empty() { "r0".into() } else { pr }); + let arch = self + .arch + .clone() + .unwrap_or(if pa.is_empty() { "noarch".into() } else { pa }); + if name.is_empty() || version.is_empty() { + anyhow::bail!("could not determine extension name/version — pass --name and --version"); + } + let machines: Vec = self + .targets + .split(',') + .map(|s| s.trim().to_string()) + .filter(|s| !s.is_empty()) + .collect(); + + let client = http_client()?; + + // 1. reserve version + get presigned staging URL (409 if taken) + let org_label = org.as_deref().unwrap_or("platform"); + print_info( + &format!("Publishing {name}-{version}-{release}.{arch} to {api} (org {org_label})..."), + OutputLevel::Normal, + ); + let res = client + .post(format!("{api}/api/admin/extensions/publish")) + .bearer_auth(&token) + .json(&serde_json::json!({ + "organization_id": org, + "name": name, + "version": version, + "release": release, + "arch": arch, + "sha256": sha, + "size_bytes": size, + "target_release": self.target_release, + "target_channel": self.target_channel, + "target_machines": machines, + })) + .send() + .await + .context("publish request failed")?; + let status = res.status().as_u16(); + let body = res.text().await.unwrap_or_default(); + if !(200..300).contains(&status) { + return Err(api_error(status, &body)); + } + let data: serde_json::Value = + serde_json::from_str(&body).context("failed to parse publish response")?; + let id = data["data"]["id"] + .as_str() + .context("publish response missing version id")? + .to_string(); + let upload_url = data["data"]["upload_url"] + .as_str() + .context("publish response missing upload_url")? + .to_string(); + + // 2. PUT the RPM straight to staging (bytes never pass through connect) + print_info("Uploading package to staging...", OutputLevel::Normal); + let put = client + .put(&upload_url) + .body(bytes) + .send() + .await + .context("staging upload failed")?; + if !put.status().is_success() { + let s = put.status().as_u16(); + let b = put.text().await.unwrap_or_default(); + anyhow::bail!("staging upload failed (HTTP {s}): {b}"); + } + + // 3. confirm -> connect verifies bytes and enqueues the cluster ingest + let conf = client + .post(format!("{api}/api/admin/extensions/{id}/confirm")) + .bearer_auth(&token) + .send() + .await + .context("confirm request failed")?; + let cs = conf.status().as_u16(); + let cb = conf.text().await.unwrap_or_default(); + if !(200..300).contains(&cs) { + return Err(api_error(cs, &cb)); + } + + print_success( + &format!("Published {name}-{version}; ingest queued."), + OutputLevel::Normal, + ); + print_info( + &format!("Track it: avocado connect ext status {id}"), + OutputLevel::Normal, + ); + Ok(()) + } +} + +pub struct ExtStatusCommand { + pub config: String, + pub org: Option, + pub profile: Option, + pub id: String, +} + +impl ExtStatusCommand { + pub async fn execute(&self) -> Result<()> { + let (api, token) = api_and_token(self.org.clone(), &self.config, self.profile.as_deref())?; + let client = http_client()?; + let res = client + .get(format!("{api}/api/admin/extensions/{}", self.id)) + .bearer_auth(&token) + .send() + .await + .context("status request failed")?; + let status = res.status().as_u16(); + let body = res.text().await.unwrap_or_default(); + if !(200..300).contains(&status) { + return Err(api_error(status, &body)); + } + let data: serde_json::Value = serde_json::from_str(&body).unwrap_or_default(); + println!( + "{}", + serde_json::to_string_pretty(&data["data"]).unwrap_or(body) + ); + Ok(()) + } +} + +pub struct ExtListCommand { + pub config: String, + pub org: Option, + pub profile: Option, + pub name: Option, +} + +impl ExtListCommand { + pub async fn execute(&self) -> Result<()> { + let (api, token) = api_and_token(self.org.clone(), &self.config, self.profile.as_deref())?; + let mut url = format!("{api}/api/admin/extensions"); + if let Some(n) = &self.name { + url.push_str(&format!("?name={n}")); + } + let client = http_client()?; + let res = client + .get(&url) + .bearer_auth(&token) + .send() + .await + .context("list request failed")?; + let status = res.status().as_u16(); + let body = res.text().await.unwrap_or_default(); + if !(200..300).contains(&status) { + return Err(api_error(status, &body)); + } + let data: serde_json::Value = serde_json::from_str(&body).unwrap_or_default(); + if let Some(items) = data["data"].as_array() { + for v in items { + println!( + "{:<28} {:<12} {:<10} {}", + v["package"].as_str().unwrap_or("?"), + v["version"].as_str().unwrap_or("?"), + v["status"].as_str().unwrap_or("?"), + v["nevra"].as_str().unwrap_or("") + ); + } + } else { + println!("{}", serde_json::to_string_pretty(&data).unwrap_or(body)); + } + Ok(()) + } +} + +fn api_and_token( + org: Option, + config_path: &str, + profile: Option<&str>, +) -> Result<(String, String)> { + // --org is optional here too: fall back to connect.org, then to the + // default/--profile auth, rather than hard-requiring an org. + let org = org.or_else(|| { + std::path::Path::new(config_path) + .exists() + .then(|| crate::utils::config::load_config(config_path).ok()) + .flatten() + .and_then(|c| c.connect) + .and_then(|c| c.org) + }); + let cfg = + client::load_config()?.context("Not logged in. Run 'avocado connect auth login' first.")?; + let (_name, p) = cfg.resolve_profile(profile, org.as_deref())?; + Ok((p.api_url.trim_end_matches('/').to_string(), p.token.clone())) +} diff --git a/src/commands/connect/mod.rs b/src/commands/connect/mod.rs index 97cc46c6..b5228838 100644 --- a/src/commands/connect/mod.rs +++ b/src/commands/connect/mod.rs @@ -6,6 +6,7 @@ pub mod cohorts; pub mod deploy; pub mod device_reclaim; pub mod devices; +pub mod ext; pub mod init; pub mod keys; pub mod orgs; diff --git a/src/main.rs b/src/main.rs index 192215e9..715386ac 100644 --- a/src/main.rs +++ b/src/main.rs @@ -613,6 +613,11 @@ enum ConnectCommands { #[command(subcommand)] command: ConnectOrgsCommands, }, + /// Publish extensions to the feed (super-admin) + Ext { + #[command(subcommand)] + command: ConnectExtCommands, + }, /// Manage projects Projects { #[command(subcommand)] @@ -855,6 +860,74 @@ enum ConnectOrgsCommands { }, } +#[derive(Subcommand)] +enum ConnectExtCommands { + /// Build-once publish a packaged extension RPM to the feed (super-admin) + Publish { + /// Path to the extension RPM (from `avocado ext package`) + rpm: String, + /// Organization ID (or set connect.org in avocado.yaml) + #[arg(long)] + org: Option, + /// Extension name (default: parsed from the RPM filename) + #[arg(long)] + name: Option, + /// Extension version (default: parsed from the RPM filename) + #[arg(long)] + version: Option, + /// Extension release (default: parsed, else r0) + #[arg(long)] + release: Option, + /// Extension arch (default: parsed, else noarch) + #[arg(long)] + arch: Option, + /// Target feed release + #[arg(long, default_value = "2026")] + target_release: String, + /// Target feed channel + #[arg(long, default_value = "edge")] + target_channel: String, + /// Comma-separated target machines + #[arg(long, default_value = "qemux86-64,qemuarm64")] + targets: String, + /// Path to avocado.yaml configuration file + #[arg(short = 'C', long, default_value = "avocado.yaml")] + config: String, + /// Profile name (defaults to the active default profile) + #[arg(long)] + profile: Option, + }, + /// Show the status of a published extension version + Status { + /// Version id + id: String, + /// Organization ID (or set connect.org in avocado.yaml) + #[arg(long)] + org: Option, + /// Path to avocado.yaml configuration file + #[arg(short = 'C', long, default_value = "avocado.yaml")] + config: String, + /// Profile name (defaults to the active default profile) + #[arg(long)] + profile: Option, + }, + /// List published extension versions + List { + /// Filter by package name + #[arg(long)] + name: Option, + /// Organization ID (or set connect.org in avocado.yaml) + #[arg(long)] + org: Option, + /// Path to avocado.yaml configuration file + #[arg(short = 'C', long, default_value = "avocado.yaml")] + config: String, + /// Profile name (defaults to the active default profile) + #[arg(long)] + profile: Option, + }, +} + #[derive(Subcommand)] enum ConnectProjectsCommands { /// List projects in an organization @@ -3318,6 +3391,67 @@ async fn main() -> Result<()> { Ok(()) } }, + ConnectCommands::Ext { command } => match command { + ConnectExtCommands::Publish { + rpm, + org, + name, + version, + release, + arch, + target_release, + target_channel, + targets, + config, + profile, + } => { + let cmd = commands::connect::ext::ExtPublishCommand { + config, + org, + profile, + rpm, + name, + version, + release, + arch, + target_release, + target_channel, + targets, + }; + cmd.execute().await?; + Ok(()) + } + ConnectExtCommands::Status { + id, + org, + config, + profile, + } => { + let cmd = commands::connect::ext::ExtStatusCommand { + config, + org, + profile, + id, + }; + cmd.execute().await?; + Ok(()) + } + ConnectExtCommands::List { + name, + org, + config, + profile, + } => { + let cmd = commands::connect::ext::ExtListCommand { + config, + org, + profile, + name, + }; + cmd.execute().await?; + Ok(()) + } + }, ConnectCommands::Projects { command } => match command { ConnectProjectsCommands::List { org, From feced96e7cb88d255f4db7c75a580d75434c3fef Mon Sep 17 00:00:00 2001 From: Justin Schneck Date: Tue, 2 Jun 2026 10:44:59 -0400 Subject: [PATCH 11/30] build(ext): package avocado-cli as an extension via `ext package` Add the avocado.yaml manifest plus compile/install/clean helper scripts that build avocado-cli into the avocado-ext-cli extension, and gitignore the transient /.cargo/ cross-compile config that avocado-cli-compile.sh writes during the build. --- .gitignore | 3 +++ avocado-cli-clean.sh | 14 ++++++++++++ avocado-cli-compile.sh | 45 ++++++++++++++++++++++++++++++++++++ avocado-cli-install.sh | 23 +++++++++++++++++++ avocado.yaml | 52 ++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 137 insertions(+) create mode 100755 avocado-cli-clean.sh create mode 100755 avocado-cli-compile.sh create mode 100755 avocado-cli-install.sh create mode 100644 avocado.yaml diff --git a/.gitignore b/.gitignore index 1f11dcad..8f560e57 100644 --- a/.gitignore +++ b/.gitignore @@ -2,6 +2,9 @@ /target/ **/*.rs.bk +# Transient cross-compile config written by avocado-cli-compile.sh during `ext package` +/.cargo/ + # Cargo lock file (uncomment if this is a library) # Cargo.lock diff --git a/avocado-cli-clean.sh b/avocado-cli-clean.sh new file mode 100755 index 00000000..aaa4f7cd --- /dev/null +++ b/avocado-cli-clean.sh @@ -0,0 +1,14 @@ +#!/bin/bash +set -e + +echo "Cleaning avocado-cli build artifacts" + +cd "$(dirname "$0")" + +# Remove Cargo build artifacts +cargo clean + +# Remove any generated config +rm -rf .cargo + +echo "Clean complete" diff --git a/avocado-cli-compile.sh b/avocado-cli-compile.sh new file mode 100755 index 00000000..fb0f7436 --- /dev/null +++ b/avocado-cli-compile.sh @@ -0,0 +1,45 @@ +#!/bin/bash +set -e + +# Find the Rust target from RUST_TARGET_PATH +for json_file in "$RUST_TARGET_PATH"/*.json; do + if [ -f "$json_file" ]; then + json_name=$(basename "$json_file" .json) + if [[ "$json_name" == "${OECORE_TARGET_ARCH}-"* ]]; then + RUST_TARGET="$json_name" + break + fi + fi +done + +if [ -z "$RUST_TARGET" ]; then + echo "Error: Could not find Rust target for $OECORE_TARGET_ARCH" + exit 1 +fi + +echo "Building avocado-cli for target: $RUST_TARGET" + +cd "$(dirname "$0")" + +# Clear any rustflags that might cause conflicts with our .cargo/config.toml. +# The SDK env exports CARGO_TARGET__RUSTFLAGS carrying its own --sysroot; +# left set, cargo merges it with the config below and rustc gets --sysroot twice +# ("Option 'sysroot' given more than once"). Unset every target's flavor, not just +# one hardcoded triple, so this works for x86_64 and aarch64 targets alike. +unset RUSTFLAGS +unset CARGO_BUILD_RUSTFLAGS +for var in $(env | grep -o 'CARGO_TARGET_[A-Z0-9_]*_RUSTFLAGS'); do + unset "$var" +done + +# Remove any existing config that might conflict +rm -rf .cargo + +# Create config.toml with cross-compilation settings +mkdir -p .cargo +cat > .cargo/config.toml << EOF +[target.$RUST_TARGET] +rustflags = ["--sysroot=$SDKTARGETSYSROOT/usr", "-C", "link-arg=--sysroot=$SDKTARGETSYSROOT"] +EOF + +cargo build --release --target "$RUST_TARGET" diff --git a/avocado-cli-install.sh b/avocado-cli-install.sh new file mode 100755 index 00000000..249c368f --- /dev/null +++ b/avocado-cli-install.sh @@ -0,0 +1,23 @@ +#!/bin/bash +set -e + +# Find the Rust target from RUST_TARGET_PATH +for json_file in "$RUST_TARGET_PATH"/*.json; do + if [ -f "$json_file" ]; then + json_name=$(basename "$json_file" .json) + if [[ "$json_name" == "${OECORE_TARGET_ARCH}-"* ]]; then + RUST_TARGET="$json_name" + break + fi + fi +done + +BINARY_PATH="$(dirname "$0")/target/$RUST_TARGET/release/avocado" + +if [ ! -f "$BINARY_PATH" ]; then + echo "Error: Binary not found at $BINARY_PATH" + exit 1 +fi + +install -D -m 755 "$BINARY_PATH" "$AVOCADO_BUILD_EXT_SYSROOT/usr/bin/avocado" +echo "Installed: $(file "$AVOCADO_BUILD_EXT_SYSROOT/usr/bin/avocado")" diff --git a/avocado.yaml b/avocado.yaml new file mode 100644 index 00000000..cd5f813a --- /dev/null +++ b/avocado.yaml @@ -0,0 +1,52 @@ +supported_targets: '*' + +extensions: + avocado-ext-cli: + # Version is injected at build time: `AVOCADO_EXT_VERSION= avocado ext package …` + # (CI sets it from the release/tag; unset => empty + a warning). + version: '{{ env.AVOCADO_EXT_VERSION }}' + release: r0 + summary: Avocado build system command line interface + description: Avocado build system command line interface for managing the system + license: Apache-2.0 + url: https://github.com/avocadolinux/avocado-cli + vendor: Avocado Linux + # Stage the Rust source (the repo root IS the project) plus the packaging + # artifacts, so `ext package` can compile it. configs/ is required: + # src references include_str!("../../configs/default.yaml"). + package_files: + - avocado.yaml + - avocado-cli-*.sh + - Cargo.toml + - Cargo.lock + - build.rs + - src + - configs + + packages: + bash: '*' + avocado-cli-bin: + compile: avocado-cli-compile + install: avocado-cli-install.sh + + sdk: + packages: + nativesdk-binutils: '*' + nativesdk-cargo: '*' + nativesdk-gcc: '*' + nativesdk-glibc-dev: '*' + nativesdk-libgcc-dev: '*' + nativesdk-rust: '*' + nativesdk-git: '*' + packagegroup-rust-cross-canadian-avocado-{{ avocado.target }}: '*' + +sdk: + image: docker.io/avocadolinux/sdk:{{ env.AVOCADO_DISTRO_RELEASE }}-{{ env.AVOCADO_DISTRO_CHANNEL }} + + compile: + avocado-cli-compile: + compile: avocado-cli-compile.sh + clean: avocado-cli-clean.sh + packages: + libstd-rs: '*' + libstd-rs-dev: '*' From 57c82e960b4922694e3d7c7af8530961d52d6ee0 Mon Sep 17 00:00:00 2001 From: Justin Schneck Date: Tue, 2 Jun 2026 10:45:20 -0400 Subject: [PATCH 12/30] fix(tui): route unset-env-var warning through the output module `{{ env.VAR }}` interpolation of an unset variable emitted its warning with a raw eprintln!, which lands inside the TaskRenderer's live cursor region without being counted in rendered_lines. The next redraw's MoveUp/Clear then cleared one line too few and stranded a task line, showing as stacked "sdk bootstrap" spinner lines during installs that fetch remote extensions (whose configs use `{{ env.AVOCADO_EXT_VERSION }}`). Route it through print_warning, which is suppressed while a TUI/JSON renderer is active and still prints in plain/CI runs. --- src/utils/interpolation/env.rs | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/src/utils/interpolation/env.rs b/src/utils/interpolation/env.rs index a003d428..7edaa7ea 100644 --- a/src/utils/interpolation/env.rs +++ b/src/utils/interpolation/env.rs @@ -10,6 +10,8 @@ use anyhow::Result; use std::env; +use crate::utils::output::{print_warning, OutputLevel}; + /// Resolve an environment variable template. /// /// # Arguments @@ -30,8 +32,16 @@ pub fn resolve(var_name: &str) -> Result> { match env::var(var_name) { Ok(value) => Ok(Some(value)), Err(_) => { - eprintln!( - "[WARNING] Environment variable '{var_name}' is not set, replacing with empty string" + // Route through `print_warning` (not a raw `eprintln!`) so the + // message is suppressed while a TUI renderer is active. A direct + // stderr write here lands inside the renderer's cursor region + // without being counted in `rendered_lines`, so the next redraw's + // MoveUp/Clear clears one line too few and strands a task line. + print_warning( + &format!( + "Environment variable '{var_name}' is not set, replacing with empty string" + ), + OutputLevel::Normal, ); Ok(Some(String::new())) } From 7f15259ba294e640ff98cfaa69553cecee14bcff Mon Sep 17 00:00:00 2001 From: Justin Schneck Date: Tue, 2 Jun 2026 11:18:06 -0400 Subject: [PATCH 13/30] fix(deploy): gate avocado-vm QMP port forwarding behind cfg(unix) runtime/deploy.rs referenced crate::utils::vm::qmp::QmpClient unconditionally, but the qmp module is `#[cfg(unix)]` (unix-socket transport). That broke the Windows `cargo check` (E0433: cannot find `qmp` in `vm`). Gate the port-forward setup and teardown behind cfg(unix) with a non-unix no-op; avocado-vm routing only occurs on unix hosts, so there is no behavior change on unix. --- src/commands/runtime/deploy.rs | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/src/commands/runtime/deploy.rs b/src/commands/runtime/deploy.rs index f3c188fb..618bd11c 100644 --- a/src/commands/runtime/deploy.rs +++ b/src/commands/runtime/deploy.rs @@ -798,9 +798,15 @@ struct OpenForward { impl OpenForward { async fn close(self) { + // The qmp module is unix-only (unix-socket transport), so this teardown + // only exists on unix. On other platforms OpenForward is never built + // (the qmp block below is cfg'd out), so this is unreachable there. + #[cfg(unix)] if let Ok(mut c) = crate::utils::vm::qmp::QmpClient::connect(&self.qmp_socket).await { let _ = c.hostfwd_remove("net0", "0.0.0.0", self.host_port).await; } + #[cfg(not(unix))] + let _ = (&self.qmp_socket, self.host_port); } } @@ -875,6 +881,10 @@ async fn prepare_mac_deploy_net( return net; } }; + // QMP rides a unix-socket transport (the qmp module is unix-only), and + // avocado-vm routing only happens on unix hosts — so the port-forward + // setup is compiled in on unix only. Elsewhere it's a no-op. + #[cfg(unix)] match crate::utils::vm::qmp::QmpClient::connect(&sock).await { Ok(mut c) => { // Clear any stale forward from a prior interrupted deploy, then add. @@ -910,6 +920,8 @@ async fn prepare_mac_deploy_net( OutputLevel::Normal, ), } + #[cfg(not(unix))] + let _ = &sock; } net From 5a533620b4faffd9d6978cb106712d18ea0a9ed8 Mon Sep 17 00:00:00 2001 From: Justin Schneck Date: Tue, 2 Jun 2026 20:38:00 -0400 Subject: [PATCH 14/30] perf(vm): splice PSCI idle-states into virt-machine DTB QEMU's `-machine virt` doesn't emit `cpu-idle-states` device-tree bindings, so CONFIG_ARM_PSCI_CPUIDLE never binds and idle CPUs fall back to bare WFI. Under HVF that pattern bounces the vCPU thread through vmexit/vmenter instead of blocking on the WFI handler's pthread_cond_timedwait, costing ~80% host CPU per vCPU at guest idle. On arm64 launches we now dump QEMU's auto-generated DTB once (via `-machine virt,dumpdtb=`), splice in `/idle-states/cpu-sleep-0` plus per-CPU `cpu-idle-states` properties, cache the patched copy under `~/.avocado/vm/dtb/` keyed by (smp, memory, qemu_version), and pass it back with `-dtb`. Cache hits on subsequent launches. Measured on smp=8 idle: 670% -> 275-344% host CPU. State1 stays cosmetic on HVF (PSCI CPU_SUSPEND isn't deeper than WFI) but the framework binding alone fixes the vmexit-loop pattern. Pure-Rust FDT v17 parse/serialize in fdt.rs; no external dtc dependency. Failures degrade gracefully to the previous auto-generated DTB path. `AVOCADO_VM_DTB` env var preserved as a debug override. --- src/utils/vm/fdt.rs | 490 ++++++++++++++++++++++++++++++++++++++++++ src/utils/vm/mod.rs | 1 + src/utils/vm/qemu.rs | 134 +++++++++++- src/utils/vm/state.rs | 6 + 4 files changed, 630 insertions(+), 1 deletion(-) create mode 100644 src/utils/vm/fdt.rs diff --git a/src/utils/vm/fdt.rs b/src/utils/vm/fdt.rs new file mode 100644 index 00000000..88519783 --- /dev/null +++ b/src/utils/vm/fdt.rs @@ -0,0 +1,490 @@ +//! Minimal pure-Rust FDT v17 parser/emitter, scoped to the one DT mutation +//! we need: injecting PSCI `idle-states` into a QEMU-generated `virt` +//! machine DTB. +//! +//! QEMU's `-machine virt` does not emit `idle-states` or per-CPU +//! `cpu-idle-states` properties, so the kernel's PSCI cpuidle driver +//! (`drivers/cpuidle/cpuidle-psci.c`) never binds — even with +//! `CONFIG_ARM_PSCI_CPUIDLE=y`. Without a cpuidle driver, arm64 idle +//! falls back to bare WFI, which under HVF lets the vCPU thread bounce +//! through vmexit/vmenter rather than blocking on the WFI handler's +//! `pthread_cond_timedwait`. End result: a fully-idle 8-vCPU guest burns +//! ~670% host CPU. +//! +//! We dump QEMU's auto-generated DTB once (via `-machine virt,dumpdtb=`), +//! splice in the missing nodes, cache the patched copy, and pass it back +//! on the real launch with `-dtb`. With the driver bound, idle drops by +//! ~50% (the deeper PSCI suspend state stays cosmetic because HVF doesn't +//! implement CPU_SUSPEND any deeper than WFI today, but state0/WFI +//! through cpuidle is enough — the framework binding alone fixes the +//! vmexit-loop pattern). + +use anyhow::{bail, Context, Result}; + +const FDT_MAGIC: u32 = 0xd00d_feed; +const FDT_VERSION_OUT: u32 = 17; +const FDT_LAST_COMP_VERSION_OUT: u32 = 16; +const FDT_BEGIN_NODE: u32 = 0x1; +const FDT_END_NODE: u32 = 0x2; +const FDT_PROP: u32 = 0x3; +const FDT_NOP: u32 = 0x4; +const FDT_END: u32 = 0x9; + +#[derive(Debug, Clone)] +pub struct Property { + pub name: String, + pub value: Vec, +} + +#[derive(Debug, Clone)] +pub struct Node { + pub name: String, + pub props: Vec, + pub children: Vec, +} + +impl Node { + pub fn new(name: impl Into) -> Self { + Self { name: name.into(), props: Vec::new(), children: Vec::new() } + } + + pub fn set_prop(&mut self, name: &str, value: Vec) { + if let Some(p) = self.props.iter_mut().find(|p| p.name == name) { + p.value = value; + } else { + self.props.push(Property { name: name.to_string(), value }); + } + } + + pub fn child_mut(&mut self, name: &str) -> Option<&mut Node> { + self.children.iter_mut().find(|c| c.name == name) + } +} + +/// Parsed DTB: the root node tree, plus the original memory reservation +/// block (preserved verbatim on round-trip) and the original +/// `boot_cpuid_phys` header field. +pub struct Fdt { + pub root: Node, + pub mem_rsv: Vec<(u64, u64)>, + pub boot_cpuid_phys: u32, +} + +pub fn parse(data: &[u8]) -> Result { + if data.len() < 40 { + bail!("DTB too short: {} bytes", data.len()); + } + let read_u32 = |off: usize| -> Result { + let slice = data + .get(off..off + 4) + .with_context(|| format!("DTB header truncated at offset {off}"))?; + Ok(u32::from_be_bytes(slice.try_into().unwrap())) + }; + let magic = read_u32(0)?; + if magic != FDT_MAGIC { + bail!("bad DTB magic {magic:#x}, expected {FDT_MAGIC:#x}"); + } + let totalsize = read_u32(4)? as usize; + let off_dt_struct = read_u32(8)? as usize; + let off_dt_strings = read_u32(12)? as usize; + let off_mem_rsvmap = read_u32(16)? as usize; + let version = read_u32(20)?; + let boot_cpuid_phys = read_u32(28)?; + let size_dt_strings = read_u32(32)? as usize; + let size_dt_struct = read_u32(36)? as usize; + if version < 16 { + bail!("unsupported DTB version {version} (need v16+)"); + } + if data.len() < totalsize { + bail!("DTB truncated: header says {totalsize} bytes, got {}", data.len()); + } + if off_dt_struct + size_dt_struct > data.len() + || off_dt_strings + size_dt_strings > data.len() + { + bail!("DTB struct/strings offsets out of bounds"); + } + + let mut mem_rsv = Vec::new(); + let mut p = off_mem_rsvmap; + loop { + if p + 16 > data.len() { + bail!("DTB memory reservation block truncated"); + } + let addr = u64::from_be_bytes(data[p..p + 8].try_into().unwrap()); + let size = u64::from_be_bytes(data[p + 8..p + 16].try_into().unwrap()); + p += 16; + if addr == 0 && size == 0 { + break; + } + mem_rsv.push((addr, size)); + } + + let mut parser = Parser { data, pos: off_dt_struct, strings_base: off_dt_strings }; + let first = parser.read_u32()?; + if first != FDT_BEGIN_NODE { + bail!("DTB struct block must start with BEGIN_NODE, got {first:#x}"); + } + let root = parser.read_node()?; + let last = parser.read_u32()?; + if last != FDT_END { + bail!("DTB struct block missing FDT_END terminator, got {last:#x}"); + } + Ok(Fdt { root, mem_rsv, boot_cpuid_phys }) +} + +struct Parser<'a> { + data: &'a [u8], + pos: usize, + strings_base: usize, +} + +impl<'a> Parser<'a> { + fn read_u32(&mut self) -> Result { + let slice = self + .data + .get(self.pos..self.pos + 4) + .with_context(|| format!("DTB truncated reading u32 at {}", self.pos))?; + self.pos += 4; + Ok(u32::from_be_bytes(slice.try_into().unwrap())) + } + + fn read_cstr(&mut self) -> Result { + let start = self.pos; + while self.pos < self.data.len() && self.data[self.pos] != 0 { + self.pos += 1; + } + if self.pos >= self.data.len() { + bail!("DTB cstr unterminated at offset {start}"); + } + let s = std::str::from_utf8(&self.data[start..self.pos]) + .with_context(|| format!("non-utf8 name at offset {start}"))? + .to_string(); + self.pos += 1; + while self.pos % 4 != 0 { + self.pos += 1; + } + Ok(s) + } + + fn read_string_at(&self, off: usize) -> Result { + let start = self.strings_base + off; + let mut end = start; + while end < self.data.len() && self.data[end] != 0 { + end += 1; + } + if end >= self.data.len() { + bail!("DTB strings entry unterminated at offset {off}"); + } + Ok(std::str::from_utf8(&self.data[start..end]) + .with_context(|| format!("non-utf8 prop name at strings offset {off}"))? + .to_string()) + } + + fn read_node(&mut self) -> Result { + let name = self.read_cstr()?; + let mut node = Node::new(name); + loop { + let tok = self.read_u32()?; + match tok { + FDT_PROP => { + let len = self.read_u32()? as usize; + let nameoff = self.read_u32()? as usize; + let val = self + .data + .get(self.pos..self.pos + len) + .with_context(|| { + format!("DTB prop value truncated at offset {}", self.pos) + })? + .to_vec(); + self.pos += len; + while self.pos % 4 != 0 { + self.pos += 1; + } + node.props.push(Property { + name: self.read_string_at(nameoff)?, + value: val, + }); + } + FDT_BEGIN_NODE => { + let child = self.read_node()?; + node.children.push(child); + } + FDT_END_NODE => return Ok(node), + FDT_NOP => {} + other => bail!("unexpected DTB token {other:#x} at pos {}", self.pos - 4), + } + } + } +} + +struct Emitter { + structs: Vec, + strings: Vec, +} + +impl Emitter { + fn new() -> Self { Self { structs: Vec::new(), strings: Vec::new() } } + + fn intern(&mut self, name: &str) -> u32 { + let bytes = name.as_bytes(); + let mut i = 0; + while i < self.strings.len() { + let mut j = i; + while j < self.strings.len() && self.strings[j] != 0 { + j += 1; + } + if &self.strings[i..j] == bytes { + return i as u32; + } + i = j + 1; + } + let off = self.strings.len() as u32; + self.strings.extend_from_slice(bytes); + self.strings.push(0); + off + } + + fn push_u32(&mut self, v: u32) { self.structs.extend_from_slice(&v.to_be_bytes()); } + + fn pad4(&mut self) { + while self.structs.len() % 4 != 0 { + self.structs.push(0); + } + } + + fn emit_node(&mut self, node: &Node) { + self.push_u32(FDT_BEGIN_NODE); + self.structs.extend_from_slice(node.name.as_bytes()); + self.structs.push(0); + self.pad4(); + for p in &node.props { + self.push_u32(FDT_PROP); + self.push_u32(p.value.len() as u32); + let off = self.intern(&p.name); + self.push_u32(off); + self.structs.extend_from_slice(&p.value); + self.pad4(); + } + for c in &node.children { + self.emit_node(c); + } + self.push_u32(FDT_END_NODE); + } +} + +pub fn serialize(fdt: &Fdt) -> Vec { + let mut em = Emitter::new(); + em.emit_node(&fdt.root); + em.push_u32(FDT_END); + + let mut rsvbuf = Vec::new(); + for (a, s) in &fdt.mem_rsv { + rsvbuf.extend_from_slice(&a.to_be_bytes()); + rsvbuf.extend_from_slice(&s.to_be_bytes()); + } + rsvbuf.extend_from_slice(&[0u8; 16]); // terminator + + let header_size = 40usize; + let off_mem_rsvmap = header_size; + let off_dt_struct = off_mem_rsvmap + rsvbuf.len(); + let off_dt_strings = off_dt_struct + em.structs.len(); + let totalsize = off_dt_strings + em.strings.len(); + + let mut out = Vec::with_capacity(totalsize); + out.extend_from_slice(&FDT_MAGIC.to_be_bytes()); + out.extend_from_slice(&(totalsize as u32).to_be_bytes()); + out.extend_from_slice(&(off_dt_struct as u32).to_be_bytes()); + out.extend_from_slice(&(off_dt_strings as u32).to_be_bytes()); + out.extend_from_slice(&(off_mem_rsvmap as u32).to_be_bytes()); + out.extend_from_slice(&FDT_VERSION_OUT.to_be_bytes()); + out.extend_from_slice(&FDT_LAST_COMP_VERSION_OUT.to_be_bytes()); + out.extend_from_slice(&fdt.boot_cpuid_phys.to_be_bytes()); + out.extend_from_slice(&(em.strings.len() as u32).to_be_bytes()); + out.extend_from_slice(&(em.structs.len() as u32).to_be_bytes()); + out.extend_from_slice(&rsvbuf); + out.extend_from_slice(&em.structs); + out.extend_from_slice(&em.strings); + out +} + +fn max_phandle(node: &Node) -> u32 { + let mut max = 0; + fn walk(n: &Node, max: &mut u32) { + for p in &n.props { + if (p.name == "phandle" || p.name == "linux,phandle") && p.value.len() == 4 { + let v = u32::from_be_bytes(p.value.as_slice().try_into().unwrap()); + if v > *max && v != u32::MAX { + *max = v; + } + } + } + for c in &n.children { + walk(c, max); + } + } + walk(node, &mut max); + max +} + +fn be32(v: u32) -> Vec { v.to_be_bytes().to_vec() } +fn strprop(s: &str) -> Vec { + let mut v = s.as_bytes().to_vec(); + v.push(0); + v +} + +/// Splice a single PSCI idle-state node into the root and add +/// `cpu-idle-states = ` to each `/cpus/cpu@N` for N in 0..smp. +/// +/// Latency values are intentionally conservative. With aggressive thresholds +/// (entry=10, exit=20, min-residency=100) the kernel falls into a polling +/// code path for sub-100us idles and host CPU goes *up*, not down. +/// entry=100/exit=250/min-residency=1000 keeps cpuidle going through plain +/// WFI which HVF blocks cleanly on `pthread_cond_timedwait`. Confirmed +/// empirically: 670% → 275% on smp=8 idle. +/// +/// Returns the phandle assigned to the new state node, for diagnostics. +pub fn patch_idle_states(fdt: &mut Fdt, smp: u32) -> Result { + let phandle = max_phandle(&fdt.root) + 1; + + let mut idle_states = Node::new("idle-states"); + idle_states.set_prop("entry-method", strprop("psci")); + + let mut sleep = Node::new("cpu-sleep-0"); + sleep.set_prop("compatible", strprop("arm,idle-state")); + sleep.set_prop("idle-state-name", strprop("cpu-sleep")); + // PSCI v0.2+ power_state encoding for a CPU-level powerdown: + // bit 16 = StateType (1 = powerdown), bits 31:24 = AffinityLevel (0 = CPU). + sleep.set_prop("arm,psci-suspend-param", be32(0x0001_0000)); + sleep.set_prop("entry-latency-us", be32(100)); + sleep.set_prop("exit-latency-us", be32(250)); + sleep.set_prop("min-residency-us", be32(1000)); + sleep.set_prop("local-timer-stop", Vec::new()); + sleep.set_prop("phandle", be32(phandle)); + idle_states.children.push(sleep); + fdt.root.children.push(idle_states); + + let cpus = fdt + .root + .child_mut("cpus") + .context("DTB has no /cpus node — not a virt machine?")?; + let mut patched = 0u32; + for i in 0..smp { + let name = format!("cpu@{i}"); + if let Some(cpu) = cpus.child_mut(&name) { + cpu.set_prop("cpu-idle-states", be32(phandle)); + patched += 1; + } + } + if patched == 0 { + bail!("DTB has no /cpus/cpu@N nodes — refusing to patch"); + } + Ok(phandle) +} + +#[cfg(test)] +mod tests { + use super::*; + + /// Build a minimal synthetic DTB with /cpus/cpu@0..cpu@N, round-trip + /// it through parse → patch → serialize → parse, verify shape. + fn synth_dtb(smp: u32) -> Vec { + let mut root = Node::new(""); + root.set_prop("#address-cells", be32(2)); + root.set_prop("#size-cells", be32(2)); + let mut cpus = Node::new("cpus"); + cpus.set_prop("#address-cells", be32(1)); + cpus.set_prop("#size-cells", be32(0)); + for i in 0..smp { + let mut cpu = Node::new(format!("cpu@{i}")); + cpu.set_prop("device_type", strprop("cpu")); + cpu.set_prop("compatible", strprop("arm,armv8")); + cpu.set_prop("reg", be32(i)); + cpu.set_prop("enable-method", strprop("psci")); + cpu.set_prop("phandle", be32(0x8000 + i)); + cpus.children.push(cpu); + } + root.children.push(cpus); + let fdt = Fdt { root, mem_rsv: vec![], boot_cpuid_phys: 0 }; + serialize(&fdt) + } + + #[test] + fn round_trip_synthetic() { + let bytes = synth_dtb(4); + let fdt = parse(&bytes).unwrap(); + assert_eq!(fdt.root.children[0].name, "cpus"); + assert_eq!(fdt.root.children[0].children.len(), 4); + assert_eq!(fdt.boot_cpuid_phys, 0); + } + + #[test] + fn patch_adds_idle_states_and_cpu_props() { + let bytes = synth_dtb(4); + let mut fdt = parse(&bytes).unwrap(); + let phandle = patch_idle_states(&mut fdt, 4).unwrap(); + // existing phandles go up to 0x8003 → new one should be 0x8004 + assert_eq!(phandle, 0x8004); + + let idle = fdt + .root + .children + .iter() + .find(|c| c.name == "idle-states") + .expect("idle-states node missing"); + assert_eq!(idle.children[0].name, "cpu-sleep-0"); + + let cpus = fdt.root.children.iter().find(|c| c.name == "cpus").unwrap(); + for cpu in &cpus.children { + let cis = cpu + .props + .iter() + .find(|p| p.name == "cpu-idle-states") + .expect("cpu-idle-states missing on cpu node"); + assert_eq!(u32::from_be_bytes(cis.value.as_slice().try_into().unwrap()), phandle); + } + } + + #[test] + fn patch_then_serialize_then_reparse_matches() { + let bytes = synth_dtb(2); + let mut fdt = parse(&bytes).unwrap(); + patch_idle_states(&mut fdt, 2).unwrap(); + let out = serialize(&fdt); + let rt = parse(&out).unwrap(); + assert!(rt.root.children.iter().any(|c| c.name == "idle-states")); + let cpus = rt.root.children.iter().find(|c| c.name == "cpus").unwrap(); + assert!(cpus.children.iter().all(|c| c.props.iter().any(|p| p.name == "cpu-idle-states"))); + } + + #[test] + fn parse_rejects_bad_magic() { + let mut bytes = synth_dtb(1); + bytes[0] = 0; + assert!(parse(&bytes).is_err()); + } + + #[test] + fn patch_fails_when_no_cpus_node() { + let mut root = Node::new(""); + root.set_prop("#address-cells", be32(2)); + let fdt_in = Fdt { root, mem_rsv: vec![], boot_cpuid_phys: 0 }; + let bytes = serialize(&fdt_in); + let mut fdt = parse(&bytes).unwrap(); + assert!(patch_idle_states(&mut fdt, 4).is_err()); + } + + #[test] + fn boot_cpuid_phys_preserved() { + let mut root = Node::new(""); + let mut cpus = Node::new("cpus"); + let mut cpu = Node::new("cpu@0"); + cpu.set_prop("reg", be32(0)); + cpus.children.push(cpu); + root.children.push(cpus); + let fdt_in = Fdt { root, mem_rsv: vec![], boot_cpuid_phys: 0x42 }; + let bytes = serialize(&fdt_in); + let parsed = parse(&bytes).unwrap(); + assert_eq!(parsed.boot_cpuid_phys, 0x42); + } +} diff --git a/src/utils/vm/mod.rs b/src/utils/vm/mod.rs index 261e3538..672e66d1 100644 --- a/src/utils/vm/mod.rs +++ b/src/utils/vm/mod.rs @@ -25,6 +25,7 @@ pub mod channel; #[cfg(target_os = "macos")] pub mod client; pub mod config; +pub mod fdt; pub mod forward; pub mod lifecycle; pub mod manifest; diff --git a/src/utils/vm/qemu.rs b/src/utils/vm/qemu.rs index 7e0910a7..8f8e3632 100644 --- a/src/utils/vm/qemu.rs +++ b/src/utils/vm/qemu.rs @@ -6,9 +6,10 @@ //! layer can stop later. use anyhow::{bail, Context, Result}; -use std::path::PathBuf; +use std::path::{Path, PathBuf}; use std::process::Stdio; +use super::fdt; use super::manifest::Manifest; use super::state::VmPaths; @@ -221,9 +222,140 @@ pub fn build_qemu_args( args.push("-pidfile".into()); args.push(paths.pid_file().to_string_lossy().into_owned()); + // On arm64, splice PSCI idle-states into the DTB so the in-guest + // cpuidle driver actually binds. See fdt.rs for the why; the short + // version is "QEMU virt doesn't emit cpu-idle-states bindings, so + // CONFIG_ARM_PSCI_CPUIDLE has nothing to attach to and idle CPUs spin + // through HVF vmexit/vmenter at ~80% host each." + // + // Failures degrade gracefully: log + skip, kernel falls back to the + // auto-generated DTB it would have used anyway. + if matches!(arch.as_str(), "arm64" | "aarch64") { + let dtb_override = std::env::var("AVOCADO_VM_DTB").ok().filter(|s| !s.is_empty()); + match dtb_override { + Some(path) => { + args.push("-dtb".into()); + args.push(path); + } + None => match ensure_idle_states_dtb(paths, cfg) { + Ok(path) => { + args.push("-dtb".into()); + args.push(path.to_string_lossy().into_owned()); + } + Err(e) => { + eprintln!( + "warn: PSCI idle-states DTB preparation failed ({e}); booting with \ + auto-generated DTB (expect higher host CPU at guest idle)" + ); + } + }, + } + } + Ok(args) } +/// Produce a DTB patched with PSCI `idle-states` for the current QEMU +/// config. Cached under `~/.avocado/vm/dtb/`, keyed by parameters that +/// affect the DT layout (memory range and cpu count change DT nodes; +/// QEMU version may change auto-generated property shapes). +/// +/// The cache miss path runs `qemu-system-aarch64 -machine virt,dumpdtb=…` +/// to capture QEMU's auto-generated DTB, splices in the missing nodes, +/// then atomically renames into place. Cost is ~500ms per cache miss, +/// hidden under the rest of VM boot. Cache hits return immediately. +fn ensure_idle_states_dtb(paths: &VmPaths, cfg: &QemuConfig) -> Result { + let qemu_version = qemu_version_tag("qemu-system-aarch64")?; + let cache_dir = paths.dtb_cache_dir(); + std::fs::create_dir_all(&cache_dir) + .with_context(|| format!("failed to create {}", cache_dir.display()))?; + let cache_path = cache_dir.join(format!( + "virt-smp{}-m{}-q{}.dtb", + cfg.cpus, cfg.memory_mib, qemu_version + )); + if cache_path.is_file() { + return Ok(cache_path); + } + let tmp = tempfile::NamedTempFile::new_in(&cache_dir) + .context("failed to create temp file for DTB dump")?; + dump_base_dtb("qemu-system-aarch64", cfg, tmp.path()) + .context("failed to dump base DTB from QEMU")?; + let raw = std::fs::read(tmp.path()) + .with_context(|| format!("failed to read dumped DTB at {}", tmp.path().display()))?; + let mut fdt = fdt::parse(&raw).context("failed to parse QEMU-generated DTB")?; + fdt::patch_idle_states(&mut fdt, cfg.cpus) + .context("failed to splice idle-states into DTB")?; + let patched = fdt::serialize(&fdt); + std::fs::write(tmp.path(), &patched) + .with_context(|| format!("failed to write patched DTB to {}", tmp.path().display()))?; + tmp.persist(&cache_path) + .with_context(|| format!("failed to install patched DTB at {}", cache_path.display()))?; + Ok(cache_path) +} + +/// Run `qemu-system-aarch64 -machine virt,dumpdtb=PATH` and let QEMU +/// write its auto-generated DTB, then exit. We pass the same +/// `-machine`, `-smp`, `-m`, `-cpu`, `-accel` flags that affect DT +/// generation so the dumped tree matches what the real launch would see. +fn dump_base_dtb(qemu_bin: &str, cfg: &QemuConfig, out: &Path) -> Result<()> { + let machine = format!("virt,dumpdtb={}", out.display()); + let status = std::process::Command::new(qemu_bin) + .args([ + "-machine", + &machine, + "-accel", + accel_flag(), + "-cpu", + cpu_for("aarch64"), + "-smp", + &cfg.cpus.to_string(), + "-m", + &format!("{}M", cfg.memory_mib), + "-nographic", + ]) + .stdin(Stdio::null()) + .stdout(Stdio::null()) + .stderr(Stdio::piped()) + .output() + .with_context(|| format!("failed to spawn {qemu_bin} for dumpdtb"))?; + if !status.status.success() { + let stderr = String::from_utf8_lossy(&status.stderr); + bail!( + "{qemu_bin} dumpdtb exited with {}: {}", + status.status, + stderr.trim() + ); + } + Ok(()) +} + +/// Stable, filename-safe identifier for the QEMU binary's version, used +/// in the DTB cache key. First line of `--version` looks like +/// `QEMU emulator version 11.0.0` — we slugify the version token. +fn qemu_version_tag(qemu_bin: &str) -> Result { + let output = std::process::Command::new(qemu_bin) + .arg("--version") + .output() + .with_context(|| format!("failed to run `{qemu_bin} --version`"))?; + if !output.status.success() { + bail!("{qemu_bin} --version exited with {}", output.status); + } + let first = String::from_utf8_lossy(&output.stdout) + .lines() + .next() + .unwrap_or("") + .to_string(); + // "QEMU emulator version 11.0.0" -> "11.0.0" + let version = first + .split_whitespace() + .find(|tok| tok.chars().next().is_some_and(|c| c.is_ascii_digit())) + .unwrap_or("unknown"); + Ok(version + .chars() + .map(|c| if c.is_ascii_alphanumeric() || c == '.' || c == '-' { c } else { '_' }) + .collect()) +} + /// Spawn QEMU detached from the controlling terminal. Returns the child pid. /// The child writes its own pidfile thanks to `-pidfile`; we also capture /// the spawn-time pid so the caller can `kill` it directly if needed. diff --git a/src/utils/vm/state.rs b/src/utils/vm/state.rs index bcab64fb..00b49804 100644 --- a/src/utils/vm/state.rs +++ b/src/utils/vm/state.rs @@ -171,6 +171,12 @@ impl VmPaths { pub fn config_file(&self) -> PathBuf { self.root.join("config.yaml") } + /// Cache directory for patched DTBs (one per `(arch, smp, memory_mib, + /// qemu_version)` combination). Tiny files (~10 KB each), no eviction + /// needed. + pub fn dtb_cache_dir(&self) -> PathBuf { + self.root.join("dtb") + } } /// Find the user's home directory. Wraps the `directories` crate so callers From ea850e07e4729a4eadf299af7d2dd9076a17bba3 Mon Sep 17 00:00:00 2001 From: Justin Schneck Date: Tue, 2 Jun 2026 21:22:42 -0400 Subject: [PATCH 15/30] perf(vm): hibernation supervisor with wake-on-connect MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds a long-lived `avocado vm supervise` process spawned alongside QEMU. Owns the user-facing SSH port and docker socket; QEMU's hostfwd moves to a loopback-only internal port. The supervisor: - Proxies inbound TCP to QEMU's internal hostfwd. On accept, sends QMP `cont` if the VM is paused; SSH handshake then continues against the freshly-resumed guest. - Owns `~/.avocado/vm/docker.sock`. On accept, wakes the VM and lazily spawns the ssh -L tunnel to /run/docker.sock in the guest (cached for the awake-window, torn down on pause so QEMU can sleep cleanly). - Tracks active connections + idle timer. With no inbound activity for `idle.hibernate_after_secs` (default 10s for testing), sends QMP `stop`. Host CPU on QEMU drops to ~0% while RAM stays resident. Any subsequent SSH or docker connection wakes it transparently. Cache key for the DTB also switches from `qemu --version` to the QEMU binary mtime — saves ~300-500ms of subprocess overhead on every VM start. Mtime naturally invalidates on `brew upgrade qemu`. Known limitations (deferred): - Docker forwarder lifecycle is now supervisor-owned when hibernation is enabled (idle_after_secs > 0); legacy long-lived forwarder still used when disabled to avoid regressing existing non-hibernating setups. - CPU hotplug for awake-but-idle floor: QMP `device_add` returns "machine does not support hot-plugging CPUs" on QEMU 11 + HVF + ARM virt. Defer; Linux CPU offline is a fallback path if needed later. --- src/commands/vm/mod.rs | 1 + src/commands/vm/supervise.rs | 40 +++ src/main.rs | 48 ++++ src/utils/vm/config.rs | 17 ++ src/utils/vm/lifecycle.rs | 195 +++++++++++++-- src/utils/vm/mod.rs | 1 + src/utils/vm/qemu.rs | 52 ++-- src/utils/vm/state.rs | 28 +++ src/utils/vm/supervisor.rs | 467 +++++++++++++++++++++++++++++++++++ 9 files changed, 806 insertions(+), 43 deletions(-) create mode 100644 src/commands/vm/supervise.rs create mode 100644 src/utils/vm/supervisor.rs diff --git a/src/commands/vm/mod.rs b/src/commands/vm/mod.rs index daf97cb2..585cd74a 100644 --- a/src/commands/vm/mod.rs +++ b/src/commands/vm/mod.rs @@ -12,4 +12,5 @@ pub mod shell; pub mod start; pub mod status; pub mod stop; +pub mod supervise; pub mod update; diff --git a/src/commands/vm/supervise.rs b/src/commands/vm/supervise.rs new file mode 100644 index 00000000..ce38e4d0 --- /dev/null +++ b/src/commands/vm/supervise.rs @@ -0,0 +1,40 @@ +//! `avocado vm supervise` — long-lived host-side hibernation supervisor. +//! +//! Spawned by `avocado vm start` after QEMU is reachable. Not intended +//! to be run by users directly (hidden in CLI help); the lifecycle +//! layer owns the argv. See [`crate::utils::vm::supervisor`] for the +//! actual loop. + +use anyhow::Result; +use std::path::PathBuf; + +use crate::utils::vm::supervisor::{run, RunArgs}; + +pub struct SuperviseCommand { + pub user_port: u16, + pub internal_port: u16, + pub qmp_socket: PathBuf, + pub idle_after_secs: u64, + pub pid_file: PathBuf, + pub docker_socket: PathBuf, + pub docker_socket_internal: PathBuf, + pub ssh_key: PathBuf, + pub known_hosts: PathBuf, +} + +impl SuperviseCommand { + pub async fn execute(self) -> Result<()> { + run(RunArgs { + user_port: self.user_port, + internal_port: self.internal_port, + qmp_socket: self.qmp_socket, + idle_after_secs: self.idle_after_secs, + pid_file: self.pid_file, + docker_socket: self.docker_socket, + docker_socket_internal: self.docker_socket_internal, + ssh_key: self.ssh_key, + known_hosts: self.known_hosts, + }) + .await + } +} diff --git a/src/main.rs b/src/main.rs index 715386ac..28f4ce07 100644 --- a/src/main.rs +++ b/src/main.rs @@ -3080,6 +3080,29 @@ async fn main() -> Result<()> { cmd.execute().await } VmCommands::Stop { force } => commands::vm::stop::StopCommand { force }.execute().await, + VmCommands::Supervise { + user_port, + internal_port, + qmp_socket, + idle_after_secs, + pid_file, + docker_socket, + docker_socket_internal, + ssh_key, + known_hosts, + } => commands::vm::supervise::SuperviseCommand { + user_port, + internal_port, + qmp_socket, + idle_after_secs, + pid_file, + docker_socket, + docker_socket_internal, + ssh_key, + known_hosts, + } + .execute() + .await, VmCommands::Status => commands::vm::status::StatusCommand.execute().await, VmCommands::Shell { command } => { commands::vm::shell::ShellCommand { command } @@ -4542,6 +4565,31 @@ enum VmCommands { #[command(subcommand)] command: VmConfigCommands, }, + /// Long-lived hibernation supervisor. Internal — spawned by `vm start`, + /// not for direct use. Owns the user-facing SSH port AND docker + /// socket, proxies to QEMU's internal hostfwd / SSH tunnel, and + /// sends QMP stop/cont on the idle timeout. + #[command(hide = true)] + Supervise { + #[arg(long)] + user_port: u16, + #[arg(long)] + internal_port: u16, + #[arg(long)] + qmp_socket: std::path::PathBuf, + #[arg(long)] + idle_after_secs: u64, + #[arg(long)] + pid_file: std::path::PathBuf, + #[arg(long)] + docker_socket: std::path::PathBuf, + #[arg(long)] + docker_socket_internal: std::path::PathBuf, + #[arg(long)] + ssh_key: std::path::PathBuf, + #[arg(long)] + known_hosts: std::path::PathBuf, + }, /// Check for and apply VM image updates from the release channel. /// Stops + restarts the VM if it was running. Preserves the existing /// `var` partition; use `vm reset` to wipe state. diff --git a/src/utils/vm/config.rs b/src/utils/vm/config.rs index a01c9aac..55d14631 100644 --- a/src/utils/vm/config.rs +++ b/src/utils/vm/config.rs @@ -26,6 +26,9 @@ pub struct VmConfig { #[serde(default, skip_serializing_if = "Option::is_none")] pub runtime: Option, + #[serde(default, skip_serializing_if = "Option::is_none")] + pub idle: Option, + /// Forward-compat bucket for keys this CLI version doesn't know about. /// Preserved verbatim on save so a newer desktop's settings survive an /// older CLI's round-trip. @@ -48,6 +51,20 @@ pub struct RuntimeConfig { pub extra: BTreeMap, } +/// Hibernation knobs. The supervisor process (`avocado vm supervise`) +/// proxies the user-facing SSH port to QEMU's internal hostfwd; after +/// `hibernate_after_secs` of no proxied activity, it sends QMP `stop` +/// to halt the vCPUs. Wake happens automatically on the next incoming +/// connection. Set `hibernate_after_secs` to 0 (or omit) to disable. +#[derive(Debug, Default, Clone, Serialize, Deserialize, PartialEq, Eq)] +pub struct IdleConfig { + #[serde(default, skip_serializing_if = "Option::is_none")] + pub hibernate_after_secs: Option, + + #[serde(flatten)] + pub extra: BTreeMap, +} + #[derive(Debug, Default, Clone, Serialize, Deserialize, PartialEq, Eq)] pub struct NetworkConfig { /// Override the guest's DNS resolvers. Applied post-boot via diff --git a/src/utils/vm/lifecycle.rs b/src/utils/vm/lifecycle.rs index 921af48a..ddeddd97 100644 --- a/src/utils/vm/lifecycle.rs +++ b/src/utils/vm/lifecycle.rs @@ -163,6 +163,14 @@ pub async fn start(opts: StartOptions) -> Result { }; state::write_ssh_port(&paths, ssh_port)?; + // Loopback-only port QEMU's hostfwd binds to. The supervisor + // listens on the user-facing `ssh_port` and proxies through to + // this one; downstream callers (vm shell, forward.rs, Avocado.app) + // only ever see `ssh_port`. + let internal_ssh_port = qemu::pick_free_port()?; + std::fs::write(paths.internal_ssh_port_file(), internal_ssh_port.to_string()) + .with_context(|| format!("writing {}", paths.internal_ssh_port_file().display()))?; + // Now that the port is known, write the ssh-config + wire it into // ~/.ssh/config. This is required for `DOCKER_HOST=ssh://avocado-vm` // to resolve in any subprocess we spawn — Docker's ssh transport reads @@ -178,7 +186,7 @@ pub async fn start(opts: StartOptions) -> Result { let cfg = QemuConfig { memory_mib, cpus, - ssh_port, + ssh_port: internal_ssh_port, cmdline_extra: opts.cmdline_extra, artifact_dir: artifact_dir.clone(), workspace: workspace.clone(), @@ -200,6 +208,14 @@ pub async fn start(opts: StartOptions) -> Result { p }; + // Spawn the hibernation supervisor. Owns the user-facing SSH port + // and proxies through to QEMU's internal hostfwd. After + // `idle_after_secs` of no proxied activity, sends QMP `stop` to + // halt the vCPUs; wakes on the next incoming TCP. boot_sync below + // goes through the proxy, which is why we spawn before waiting. + let idle_after_secs = resolve_idle_after_secs(&paths); + spawn_supervisor(&paths, ssh_port, internal_ssh_port, idle_after_secs).await?; + // Wait for the guest to become ready — first signal wins (qga vs SSH). let signal = super::boot_sync::wait_for_guest_ready(&paths.qga_socket(), ssh_port, None) .await @@ -239,18 +255,24 @@ pub async fn start(opts: StartOptions) -> Result { ); } - // Bring up the docker-socket SSH forward so DOCKER_HOST=unix://… works - // from the host without touching the user's ~/.ssh/config. Non-fatal - // on error: a working VM with just SSH access is still useful for - // debugging. - if let Err(e) = super::forward::start(&paths, ssh_port).await { - crate::utils::output::print_warning( - &format!( - "docker socket forward failed: {e:#}. Local DOCKER_HOST routing won't work until you start it. \ - (`avocado vm stop && avocado vm start` retries.)" - ), - crate::utils::output::OutputLevel::Normal, - ); + // Docker socket. With hibernation enabled (supervisor running), the + // supervisor owns `docker.sock` directly and manages an SSH `-L` + // tunnel internally with VM wake/pause lifecycle. Without + // hibernation (idle_after_secs == 0), keep the legacy long-lived + // forwarder behavior so existing setups don't regress. + // + // Non-fatal on error: a working VM with just SSH access is still + // useful for debugging. + if idle_after_secs == 0 { + if let Err(e) = super::forward::start(&paths, ssh_port).await { + crate::utils::output::print_warning( + &format!( + "docker socket forward failed: {e:#}. Local DOCKER_HOST routing won't work until you start it. \ + (`avocado vm stop && avocado vm start` retries.)" + ), + crate::utils::output::OutputLevel::Normal, + ); + } } notify_desktop( @@ -281,9 +303,12 @@ pub async fn stop(force: bool) -> Result<()> { async fn stop_inner(force: bool) -> Result<()> { let paths = VmPaths::resolve()?; - // Always try to tear down the docker socket forward first — the SSH - // process can outlive QEMU if we shut down the VM by signal, leaving - // a stale `docker.sock` on the host. + // Tear down auxiliary host-side processes BEFORE QEMU. The supervisor + // owns the user-facing SSH port; if we left it running after QEMU + // exited, the next `vm start` would race against a still-bound port. + // The docker socket forwarder is an SSH child that can outlive QEMU + // if we shut down by signal, leaving a stale `docker.sock`. + stop_supervisor(&paths); let _ = super::forward::stop(&paths).await; let pid = match state::read_pid(&paths)? { @@ -731,3 +756,141 @@ fn write_ssh_config(paths: &VmPaths, ssh_port: u16) -> Result<()> { .with_context(|| format!("writing {}", paths.ssh_config().display()))?; Ok(()) } + +/// Default idle timeout in seconds when neither config nor env var sets +/// one. Aggressive for testing while the hibernation supervisor is new +/// — production should land on a more user-friendly default (multiple +/// minutes) once the wake-on-connect path has been exercised in real +/// workflows. +const DEFAULT_IDLE_AFTER_SECS: u64 = 10; + +/// Resolve the hibernate timeout. Env var wins (one-shot override for +/// experimentation), else the persisted `idle.hibernate_after_secs`, +/// else the default. `0` disables hibernation while keeping the proxy +/// up — useful for isolating proxy issues from QMP issues. +fn resolve_idle_after_secs(paths: &VmPaths) -> u64 { + if let Ok(raw) = std::env::var("AVOCADO_VM_IDLE_HIBERNATE_SECS") { + if let Ok(parsed) = raw.parse::() { + return parsed; + } + } + if let Ok(cfg) = super::config::VmConfig::load(paths) { + if let Some(idle) = &cfg.idle { + if let Some(v) = idle.hibernate_after_secs { + return v; + } + } + } + DEFAULT_IDLE_AFTER_SECS +} + +/// Spawn `avocado vm supervise` as a detached child. Same daemonization +/// pattern as QEMU (setsid + null stdio), pid recorded so `stop_inner` +/// can take it down before QEMU. We re-exec the running binary +/// (`std::env::current_exe`) rather than expecting an installed +/// `avocado` on PATH — that way a `cargo run` or out-of-tree binary +/// supervises itself instead of pulling in a stale system copy. +/// Best-effort SIGTERM → SIGKILL on the supervisor pid, then remove +/// its pidfile + internal-ssh-port marker. Idempotent — missing +/// pidfile / dead pid is a no-op. +fn stop_supervisor(paths: &VmPaths) { + let pidfile = paths.supervisor_pid(); + if let Ok(raw) = std::fs::read_to_string(&pidfile) { + if let Ok(pid) = raw.trim().parse::() { + if state::pid_alive(pid) { + send_signal(pid, SIGTERM); + for _ in 0..20 { + if !state::pid_alive(pid) { + break; + } + std::thread::sleep(Duration::from_millis(50)); + } + if state::pid_alive(pid) { + send_signal(pid, SIGKILL); + } + } + } + } + let _ = std::fs::remove_file(pidfile); + let _ = std::fs::remove_file(paths.internal_ssh_port_file()); +} + +async fn spawn_supervisor( + paths: &VmPaths, + user_port: u16, + internal_port: u16, + idle_after_secs: u64, +) -> Result<()> { + let exe = std::env::current_exe().context("locating current avocado binary")?; + let mut cmd = tokio::process::Command::new(&exe); + cmd.args([ + "vm", + "supervise", + "--user-port", + &user_port.to_string(), + "--internal-port", + &internal_port.to_string(), + "--qmp-socket", + &paths.qmp_socket().to_string_lossy(), + "--idle-after-secs", + &idle_after_secs.to_string(), + "--pid-file", + &paths.supervisor_pid().to_string_lossy(), + "--docker-socket", + &paths.docker_socket().to_string_lossy(), + "--docker-socket-internal", + &paths.docker_socket_internal().to_string_lossy(), + "--ssh-key", + &paths.ssh_key().to_string_lossy(), + "--known-hosts", + &paths.known_hosts().to_string_lossy(), + ]); + // Append the supervisor's stderr to ~/.avocado/vm/supervisor.log so + // pause/resume events are recoverable post-mortem. `tail -F` is + // robust to the file appearing only after first launch. + let log = std::fs::OpenOptions::new() + .create(true) + .append(true) + .open(paths.supervisor_log()) + .with_context(|| format!("opening {}", paths.supervisor_log().display()))?; + let log_dup = log.try_clone().context("cloning supervisor log handle")?; + cmd.stdin(std::process::Stdio::null()); + cmd.stdout(log); + cmd.stderr(log_dup); + #[cfg(unix)] + unsafe { + cmd.pre_exec(|| { + let _ = libc::setsid(); + Ok(()) + }); + } + let child = cmd + .spawn() + .with_context(|| format!("failed to spawn supervisor: {}", exe.display()))?; + let spawn_pid = child.id().unwrap_or(0); + drop(child); + + // Poll briefly for the supervisor's listener to come up — proves the + // proxy is ready before boot_sync starts pumping connections through. + let deadline = std::time::Instant::now() + Duration::from_secs(5); + loop { + if tokio::net::TcpStream::connect(("127.0.0.1", user_port)).await.is_ok() { + return Ok(()); + } + if std::time::Instant::now() >= deadline { + // Don't fail the whole boot — log + carry on with whatever + // the supervisor managed to do. Worst case the user-facing + // port refuses connections and the user sees a normal SSH + // connection error. + crate::utils::output::print_warning( + &format!( + "hibernation supervisor (pid {spawn_pid}) didn't bind 127.0.0.1:{user_port} within 5s; \ + proxy may be down. SSH may not work until you restart with `vm stop && vm start`." + ), + crate::utils::output::OutputLevel::Normal, + ); + return Ok(()); + } + tokio::time::sleep(Duration::from_millis(50)).await; + } +} diff --git a/src/utils/vm/mod.rs b/src/utils/vm/mod.rs index 672e66d1..0d09870c 100644 --- a/src/utils/vm/mod.rs +++ b/src/utils/vm/mod.rs @@ -39,3 +39,4 @@ pub mod share; pub mod ssh; pub mod staging; pub mod state; +pub mod supervisor; diff --git a/src/utils/vm/qemu.rs b/src/utils/vm/qemu.rs index 8f8e3632..1f750631 100644 --- a/src/utils/vm/qemu.rs +++ b/src/utils/vm/qemu.rs @@ -265,13 +265,18 @@ pub fn build_qemu_args( /// then atomically renames into place. Cost is ~500ms per cache miss, /// hidden under the rest of VM boot. Cache hits return immediately. fn ensure_idle_states_dtb(paths: &VmPaths, cfg: &QemuConfig) -> Result { - let qemu_version = qemu_version_tag("qemu-system-aarch64")?; + // Cache key uses the QEMU binary's mtime instead of `--version` to + // avoid shelling out on every launch. A `brew upgrade qemu` bumps + // the mtime, which naturally invalidates the cache. The mtime stat + // is microseconds; the `--version` subprocess pays the full dyld + // load cost (~300-500 ms on macOS) for libsnappy/libpng/libfdt. + let qemu_tag = qemu_binary_tag("qemu-system-aarch64")?; let cache_dir = paths.dtb_cache_dir(); std::fs::create_dir_all(&cache_dir) .with_context(|| format!("failed to create {}", cache_dir.display()))?; let cache_path = cache_dir.join(format!( "virt-smp{}-m{}-q{}.dtb", - cfg.cpus, cfg.memory_mib, qemu_version + cfg.cpus, cfg.memory_mib, qemu_tag )); if cache_path.is_file() { return Ok(cache_path); @@ -329,31 +334,24 @@ fn dump_base_dtb(qemu_bin: &str, cfg: &QemuConfig, out: &Path) -> Result<()> { Ok(()) } -/// Stable, filename-safe identifier for the QEMU binary's version, used -/// in the DTB cache key. First line of `--version` looks like -/// `QEMU emulator version 11.0.0` — we slugify the version token. -fn qemu_version_tag(qemu_bin: &str) -> Result { - let output = std::process::Command::new(qemu_bin) - .arg("--version") - .output() - .with_context(|| format!("failed to run `{qemu_bin} --version`"))?; - if !output.status.success() { - bail!("{qemu_bin} --version exited with {}", output.status); - } - let first = String::from_utf8_lossy(&output.stdout) - .lines() - .next() - .unwrap_or("") - .to_string(); - // "QEMU emulator version 11.0.0" -> "11.0.0" - let version = first - .split_whitespace() - .find(|tok| tok.chars().next().is_some_and(|c| c.is_ascii_digit())) - .unwrap_or("unknown"); - Ok(version - .chars() - .map(|c| if c.is_ascii_alphanumeric() || c == '.' || c == '-' { c } else { '_' }) - .collect()) +/// Stable, filename-safe identifier for the QEMU binary, used in the +/// DTB cache key. Uses the binary's mtime (seconds since epoch) rather +/// than `--version` so we don't spawn a subprocess on every launch. +/// `brew upgrade qemu` bumps the mtime, naturally invalidating the +/// cache; a binary that hasn't been touched produces the same key +/// indefinitely. +fn qemu_binary_tag(qemu_bin: &str) -> Result { + let path = which_on_path(qemu_bin) + .with_context(|| format!("{qemu_bin} not found on $PATH"))?; + let meta = std::fs::metadata(&path) + .with_context(|| format!("stat {}", path.display()))?; + let mtime = meta + .modified() + .ok() + .and_then(|t| t.duration_since(std::time::UNIX_EPOCH).ok()) + .map(|d| d.as_secs()) + .unwrap_or(0); + Ok(format!("m{mtime}")) } /// Spawn QEMU detached from the controlling terminal. Returns the child pid. diff --git a/src/utils/vm/state.rs b/src/utils/vm/state.rs index 00b49804..6c161118 100644 --- a/src/utils/vm/state.rs +++ b/src/utils/vm/state.rs @@ -154,10 +154,35 @@ impl VmPaths { pub fn docker_socket(&self) -> PathBuf { self.root.join("docker.sock") } + /// Internal-only landing point for the supervisor-managed SSH `-L` + /// tunnel to `/run/docker.sock`. Lives next to `docker.sock` but + /// distinct so the user-facing socket can stay alive (owned by the + /// supervisor) while this one comes and goes with VM wake/pause. + pub fn docker_socket_internal(&self) -> PathBuf { + self.root.join("docker.sock.internal") + } /// PID of the SSH process maintaining the docker socket forward. pub fn forwarder_pid(&self) -> PathBuf { self.root.join("forwarder.pid") } + /// PID of the hibernation supervisor (TCP proxy + QMP stop/cont). + /// Distinct from `pid_file` (QEMU) so the lifecycle layer can tear + /// down the supervisor before QEMU on shutdown. + pub fn supervisor_pid(&self) -> PathBuf { + self.root.join("supervisor.pid") + } + /// Append-only log for the supervisor's pause/resume events. Tail + /// this while reproducing a hibernation issue — every QMP stop / + /// cont and every accept hits this file. + pub fn supervisor_log(&self) -> PathBuf { + self.root.join("supervisor.log") + } + /// Loopback-only port QEMU's SSH hostfwd binds to. The supervisor + /// listens on the user-facing `ssh-port` and proxies to this one; + /// callers never connect here directly. + pub fn internal_ssh_port_file(&self) -> PathBuf { + self.root.join("internal-ssh-port") + } /// Absolute path to the artifact directory that was last used for `vm /// start`. The macOS Avocado.app reads this when launched without an /// AVOCADO_VM_DIR env var (Finder/Dock launches inherit a sanitized env @@ -254,7 +279,10 @@ pub fn cleanup_transient(paths: &VmPaths) { paths.ssh_port_file(), paths.lock_file(), paths.docker_socket(), + paths.docker_socket_internal(), paths.forwarder_pid(), + paths.supervisor_pid(), + paths.internal_ssh_port_file(), ] { let _ = std::fs::remove_file(&p); } diff --git a/src/utils/vm/supervisor.rs b/src/utils/vm/supervisor.rs new file mode 100644 index 00000000..0055903e --- /dev/null +++ b/src/utils/vm/supervisor.rs @@ -0,0 +1,467 @@ +//! Host-side hibernation supervisor for the helper VM. +//! +//! Architecturally a small proxy server with QMP-driven lifecycle. +//! QEMU is launched with its SSH hostfwd bound to a loopback-only +//! "internal" port; the supervisor listens on the user-facing port +//! (the one in `~/.avocado/vm/ssh-port`) and pipes accepted +//! connections through to the internal port. Doing it this way means +//! *we* see every incoming connection, which gives us: +//! +//! 1. **Idle detection** — when no proxied connection has been active +//! for `idle_after_secs`, we send QMP `stop` to halt all vCPU +//! threads. Host CPU drops to ~0%; guest RAM stays resident. +//! 2. **Wake-on-connect** — on the next incoming TCP, we send QMP +//! `cont` *before* opening the inner connection. The guest resumes +//! in-place and the SSH handshake completes ~100ms later than it +//! would on a live VM. +//! +//! The supervisor also owns the user-facing **docker socket** +//! (`~/.avocado/vm/docker.sock`). On any incoming docker client +//! connection it ensures (a) the VM is awake and (b) a single +//! supervisor-managed `ssh -L` tunnel is running between an internal +//! sock (`docker.sock.internal`) and `/run/docker.sock` in the guest, +//! then pipes the client through. The tunnel comes up on wake and is +//! torn down on pause so QEMU can sleep cleanly. +//! +//! Lifecycle: spawned by `lifecycle::start` after QEMU is reachable, +//! killed by `lifecycle::stop` before QEMU. The subcommand entry point +//! lives in `commands::vm::supervise` — this module is the loop it +//! runs. + +use anyhow::{Context, Result}; +use std::path::{Path, PathBuf}; +use std::process::Stdio; +use std::sync::atomic::{AtomicBool, AtomicI64, AtomicUsize, Ordering}; +use std::sync::Arc; +use std::time::Duration; +use tokio::io::AsyncWriteExt; +use tokio::net::{TcpListener, TcpStream, UnixListener, UnixStream}; +use tokio::sync::Mutex; + +use super::qmp::QmpClient; +use super::state; + +/// Arguments passed from the `avocado vm supervise` subcommand into the +/// supervisor loop. Plain owned data so the caller can construct it from +/// clap-parsed flags without leaking lifetimes. +#[derive(Debug, Clone)] +pub struct RunArgs { + /// External TCP port the supervisor listens on. Today this is the + /// SSH port that everything else (`vm shell`, Avocado.app) + /// connects to. + pub user_port: u16, + /// Loopback port QEMU's `hostfwd` binds to. Only the supervisor + /// connects here. + pub internal_port: u16, + /// QMP control socket. + pub qmp_socket: PathBuf, + /// How long with no active connections before we halt the vCPUs. + pub idle_after_secs: u64, + /// Path to write our pid so the lifecycle layer can kill us later. + pub pid_file: PathBuf, + /// Host path for the user-facing docker socket. Supervisor owns it. + pub docker_socket: PathBuf, + /// Host path the supervisor's SSH `-L` tunnel binds to; only the + /// docker proxy connects here. + pub docker_socket_internal: PathBuf, + /// SSH private key for tunneling to the guest. + pub ssh_key: PathBuf, + /// known_hosts file the SSH tunnel uses. + pub known_hosts: PathBuf, +} + +struct State { + paused: AtomicBool, + active_conns: AtomicUsize, + last_activity_ms: AtomicI64, + qmp_socket: PathBuf, + idle_threshold_ms: i64, + args: RunArgs, + /// SSH `-L` tunnel child pid, if running. Mutex serializes + /// spawn/kill so a pause/wake race doesn't leak a child. + tunnel: Mutex>, + /// Serializes QMP stop/cont so racing wake-and-pause attempts + /// can't leave the supervisor's `paused` flag out of sync with + /// QEMU's actual state. + qmp_lock: Mutex<()>, +} + +impl State { + fn touch(&self) { + self.last_activity_ms.store(now_ms(), Ordering::Relaxed); + } + + /// QMP `cont` only — bring vCPUs back to running. Idempotent and + /// fast (single QMP round-trip). Does NOT touch the SSH tunnel: + /// TCP-proxy callers don't need it, and bundling it would make + /// every SSH probe wait 8s on tunnel spawn during boot. + async fn wake(self: &Arc) -> Result<()> { + let _guard = self.qmp_lock.lock().await; + if self.paused.load(Ordering::Relaxed) { + qmp_send(&self.qmp_socket, "cont", None) + .await + .context("QMP cont")?; + self.paused.store(false, Ordering::Relaxed); + eprintln!("supervisor: resumed VM on incoming connection"); + } + Ok(()) + } + + /// Halt the VM and tear down the tunnel so QEMU isn't holding any + /// kernel-side state that the guest can't service while paused. + async fn pause(self: &Arc) -> Result<()> { + let _guard = self.qmp_lock.lock().await; + if self.paused.load(Ordering::Relaxed) { + return Ok(()); + } + // Tear down tunnel first; its SSH keepalives would otherwise + // timeout while QEMU is stopped and the child would die in + // a way we can't tell apart from a real failure. + self.kill_tunnel().await; + qmp_send(&self.qmp_socket, "stop", None) + .await + .context("QMP stop")?; + self.paused.store(true, Ordering::Relaxed); + Ok(()) + } + + /// Spawn the SSH `-L` tunnel if it's not already running. Polls + /// briefly for the local socket to appear so callers can proceed + /// to `connect()` immediately on return. + async fn ensure_tunnel(self: &Arc) -> Result<()> { + let mut lock = self.tunnel.lock().await; + if let Some(pid) = *lock { + if state::pid_alive(pid) && self.args.docker_socket_internal.exists() { + return Ok(()); + } + // stale handle; clean up before respawning + send_signal(pid, SIGTERM); + let _ = std::fs::remove_file(&self.args.docker_socket_internal); + } + let pid = spawn_ssh_tunnel(&self.args)?; + // Wait for the local sock to materialize — ssh -L creates it + // only after authentication completes. + let deadline = std::time::Instant::now() + Duration::from_secs(8); + loop { + if self.args.docker_socket_internal.exists() { + *lock = Some(pid); + eprintln!("supervisor: docker tunnel up (pid {pid})"); + return Ok(()); + } + if !state::pid_alive(pid) { + return Err(anyhow::anyhow!( + "ssh tunnel exited before docker socket appeared" + )); + } + if std::time::Instant::now() >= deadline { + send_signal(pid, SIGTERM); + return Err(anyhow::anyhow!( + "timed out waiting for docker tunnel to come up" + )); + } + tokio::time::sleep(Duration::from_millis(50)).await; + } + } + + async fn kill_tunnel(self: &Arc) { + let mut lock = self.tunnel.lock().await; + if let Some(pid) = lock.take() { + send_signal(pid, SIGTERM); + // Don't block long; ssh dies quickly on SIGTERM. + for _ in 0..20 { + if !state::pid_alive(pid) { + break; + } + tokio::time::sleep(Duration::from_millis(25)).await; + } + if state::pid_alive(pid) { + send_signal(pid, SIGKILL); + } + } + let _ = std::fs::remove_file(&self.args.docker_socket_internal); + } +} + +fn now_ms() -> i64 { + use std::time::{SystemTime, UNIX_EPOCH}; + SystemTime::now() + .duration_since(UNIX_EPOCH) + .map(|d| d.as_millis() as i64) + .unwrap_or(0) +} + +const SIGTERM: libc::c_int = 15; +const SIGKILL: libc::c_int = 9; + +fn send_signal(pid: u32, sig: libc::c_int) { + #[cfg(unix)] + unsafe { + libc::kill(pid as libc::pid_t, sig); + } + #[cfg(not(unix))] + { + let _ = (pid, sig); + } +} + +/// Run the supervisor loop until killed. +pub async fn run(args: RunArgs) -> Result<()> { + std::fs::write(&args.pid_file, std::process::id().to_string()) + .with_context(|| format!("writing {}", args.pid_file.display()))?; + + let state = Arc::new(State { + paused: AtomicBool::new(false), + active_conns: AtomicUsize::new(0), + last_activity_ms: AtomicI64::new(now_ms()), + qmp_socket: args.qmp_socket.clone(), + idle_threshold_ms: (args.idle_after_secs.saturating_mul(1000)) as i64, + tunnel: Mutex::new(None), + qmp_lock: Mutex::new(()), + args: args.clone(), + }); + + // Tunnel comes up lazily on first docker conn (handle_docker calls + // ensure_tunnel). Spawning eagerly here would race against guest + // sshd boot: the SSH handshake fails for ~30s after QEMU starts, + // and during that time the supervisor's TCP listener wouldn't bind + // (this function blocks on tunnel polling), making the whole boot + // cascade fail. + + let tcp_listener = TcpListener::bind(("127.0.0.1", args.user_port)) + .await + .with_context(|| format!("binding 127.0.0.1:{}", args.user_port))?; + eprintln!( + "supervisor: TCP listening on 127.0.0.1:{} → 127.0.0.1:{} (idle {} s)", + args.user_port, args.internal_port, args.idle_after_secs + ); + + // Stale Unix socket would refuse bind; ours is owned by us across restarts. + let _ = std::fs::remove_file(&args.docker_socket); + let unix_listener = UnixListener::bind(&args.docker_socket) + .with_context(|| format!("binding {}", args.docker_socket.display()))?; + eprintln!( + "supervisor: Unix listening on {} → SSH→/run/docker.sock", + args.docker_socket.display() + ); + + if args.idle_after_secs > 0 { + let state_t = state.clone(); + tokio::spawn(async move { + idle_watcher(state_t).await; + }); + } + + // Signal handler: on SIGTERM/SIGINT, restore the VM to a usable + // state (resumed + tunnel down) so the next start doesn't trip + // over a paused VM with no supervisor to wake it. + let state_sig = state.clone(); + tokio::spawn(async move { + if let Err(e) = wait_for_term().await { + eprintln!("supervisor: signal handler error: {e:#}"); + return; + } + let _ = state_sig.wake().await; // ensure VM is resumed before we exit + state_sig.kill_tunnel().await; + std::process::exit(0); + }); + + // Main accept loop: select between TCP and Unix listeners. Spawned + // tasks own their connection through close. + loop { + tokio::select! { + res = tcp_listener.accept() => { + let (sock, peer) = match res { + Ok(v) => v, + Err(e) => { eprintln!("supervisor: TCP accept error: {e:#}"); continue; } + }; + let s = state.clone(); + let internal_port = args.internal_port; + tokio::spawn(async move { + if let Err(e) = handle_tcp(sock, internal_port, s).await { + eprintln!("supervisor: TCP conn {peer} error: {e:#}"); + } + }); + } + res = unix_listener.accept() => { + let (sock, _peer) = match res { + Ok(v) => v, + Err(e) => { eprintln!("supervisor: Unix accept error: {e:#}"); continue; } + }; + let s = state.clone(); + tokio::spawn(async move { + if let Err(e) = handle_docker(sock, s).await { + eprintln!("supervisor: docker conn error: {e:#}"); + } + }); + } + } + } +} + +async fn handle_tcp(mut incoming: TcpStream, internal_port: u16, state: Arc) -> Result<()> { + state.active_conns.fetch_add(1, Ordering::Relaxed); + state.touch(); + + if let Err(e) = state.wake().await { + eprintln!("supervisor: wake failed: {e}"); + } + + let mut inner = TcpStream::connect(("127.0.0.1", internal_port)) + .await + .with_context(|| format!("connecting to internal port {internal_port}"))?; + let res = tokio::io::copy_bidirectional(&mut incoming, &mut inner).await; + let _ = incoming.shutdown().await; + let _ = inner.shutdown().await; + + state.active_conns.fetch_sub(1, Ordering::Relaxed); + state.touch(); + classify_close(res) +} + +async fn handle_docker(mut client: UnixStream, state: Arc) -> Result<()> { + state.active_conns.fetch_add(1, Ordering::Relaxed); + state.touch(); + + // Wake VM first (QMP cont). Then bring the SSH tunnel up — the + // tunnel's auth handshake needs guest sshd running, which is only + // true post-wake. + state.wake().await.context("waking VM for docker conn")?; + state + .ensure_tunnel() + .await + .context("bringing docker tunnel up")?; + + let mut backend = UnixStream::connect(&state.args.docker_socket_internal) + .await + .with_context(|| { + format!( + "connecting to docker tunnel sock {}", + state.args.docker_socket_internal.display() + ) + })?; + let res = tokio::io::copy_bidirectional(&mut client, &mut backend).await; + let _ = client.shutdown().await; + let _ = backend.shutdown().await; + + state.active_conns.fetch_sub(1, Ordering::Relaxed); + state.touch(); + classify_close(res) +} + +/// Filter expected close patterns. SSH probe (boot_sync), `vm shell` +/// exit, docker client disconnect, any client that closes without +/// TCP-FIN — all show up as ECONNRESET / BrokenPipe / UnexpectedEof +/// here. Real I/O faults still propagate. +fn classify_close(res: std::io::Result<(u64, u64)>) -> Result<()> { + match res { + Ok(_) => Ok(()), + Err(e) => match e.kind() { + std::io::ErrorKind::ConnectionReset + | std::io::ErrorKind::BrokenPipe + | std::io::ErrorKind::UnexpectedEof + | std::io::ErrorKind::NotConnected => Ok(()), + _ => Err(e).context("bidirectional copy failed"), + }, + } +} + +async fn idle_watcher(state: Arc) { + loop { + tokio::time::sleep(Duration::from_secs(1)).await; + if state.paused.load(Ordering::Relaxed) { + continue; + } + if state.active_conns.load(Ordering::Relaxed) > 0 { + continue; + } + let since = now_ms() - state.last_activity_ms.load(Ordering::Relaxed); + if since >= state.idle_threshold_ms { + match state.pause().await { + Ok(_) => eprintln!("supervisor: paused VM after {since} ms idle"), + Err(e) => { + eprintln!("supervisor: pause failed: {e}"); + state.touch(); // back off + } + } + } + } +} + +/// Spawn an `ssh -N -L :/run/docker.sock` to the guest. +/// Same flag set as the original `forward.rs`; managed by the +/// supervisor instead of `lifecycle::start`. +fn spawn_ssh_tunnel(args: &RunArgs) -> Result { + let _ = std::fs::remove_file(&args.docker_socket_internal); + let mut cmd = std::process::Command::new("ssh"); + cmd.args([ + "-N", + "-T", + "-o", + "ConnectTimeout=10", + "-o", + "ExitOnForwardFailure=yes", + "-o", + "ServerAliveInterval=30", + "-o", + "ServerAliveCountMax=3", + "-o", + "StrictHostKeyChecking=no", + "-o", + &format!("UserKnownHostsFile={}", args.known_hosts.display()), + "-o", + "PasswordAuthentication=no", + "-o", + "BatchMode=yes", + "-o", + "LogLevel=ERROR", + "-i", + args.ssh_key.to_str().context("ssh key path utf-8")?, + "-p", + &args.internal_port.to_string(), + "-L", + &format!( + "{}:/run/docker.sock", + args.docker_socket_internal.display() + ), + "root@127.0.0.1", + ]); + cmd.stdin(Stdio::null()); + cmd.stdout(Stdio::null()); + cmd.stderr(Stdio::null()); + #[cfg(unix)] + unsafe { + use std::os::unix::process::CommandExt; + cmd.pre_exec(|| { + let _ = libc::setsid(); + Ok(()) + }); + } + let child = cmd.spawn().context("spawning ssh -L tunnel")?; + Ok(child.id()) +} + +/// Thin one-shot QMP command runner. Open + close per call because +/// stop/cont happen at most a few times per minute and the QmpClient +/// holds its own connection state. +async fn qmp_send(socket: &Path, cmd: &str, args: Option) -> Result<()> { + let mut client = QmpClient::connect(socket).await?; + let _ = client.execute(cmd, args).await?; + Ok(()) +} + +#[cfg(unix)] +async fn wait_for_term() -> Result<()> { + use tokio::signal::unix::{signal, SignalKind}; + let mut term = signal(SignalKind::terminate()).context("install SIGTERM handler")?; + let mut intr = signal(SignalKind::interrupt()).context("install SIGINT handler")?; + tokio::select! { + _ = term.recv() => {} + _ = intr.recv() => {} + } + Ok(()) +} + +#[cfg(not(unix))] +async fn wait_for_term() -> Result<()> { + tokio::signal::ctrl_c().await.context("install ctrl-c handler") +} From 50412005e6428c57bac679b2fc01bcdf71513a9e Mon Sep 17 00:00:00 2001 From: Justin Schneck Date: Tue, 2 Jun 2026 21:24:08 -0400 Subject: [PATCH 16/30] perf(vm): default hibernation idle timeout from 10s to 60s MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The 10s default was useful while iterating on the supervisor — short enough to verify pause/wake every few minutes of testing. For real use, 10s pauses mid-SSH-session whenever the user pauses to think, which adds noticeable wake latency on every command. 60s is comfortable for normal interactive work while still freeing host CPU within a minute of stepping away. Users who want either extreme can override via `avocado vm config set idle.hibernate_after_secs N` or the `AVOCADO_VM_IDLE_HIBERNATE_SECS` env var. --- src/utils/vm/lifecycle.rs | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/utils/vm/lifecycle.rs b/src/utils/vm/lifecycle.rs index ddeddd97..5d4bc702 100644 --- a/src/utils/vm/lifecycle.rs +++ b/src/utils/vm/lifecycle.rs @@ -758,11 +758,11 @@ fn write_ssh_config(paths: &VmPaths, ssh_port: u16) -> Result<()> { } /// Default idle timeout in seconds when neither config nor env var sets -/// one. Aggressive for testing while the hibernation supervisor is new -/// — production should land on a more user-friendly default (multiple -/// minutes) once the wake-on-connect path has been exercised in real -/// workflows. -const DEFAULT_IDLE_AFTER_SECS: u64 = 10; +/// one. One minute strikes a balance between freeing host CPU promptly +/// when the user steps away from active work and not pausing mid-pause +/// during normal SSH/docker bursts. Users with snappier wake budgets +/// can lower via `avocado vm config set idle.hibernate_after_secs N`. +const DEFAULT_IDLE_AFTER_SECS: u64 = 60; /// Resolve the hibernate timeout. Env var wins (one-shot override for /// experimentation), else the persisted `idle.hibernate_after_secs`, From fcaab593490ea38a8185be172e235bfab05d98ae Mon Sep 17 00:00:00 2001 From: Justin Schneck Date: Tue, 2 Jun 2026 21:29:46 -0400 Subject: [PATCH 17/30] chore: satisfy fmt + clippy for the supervisor/DTB work `cargo fmt --check` and `cargo clippy --all-targets --all-features -- -D warnings` were both failing on the just-merged supervisor and DTB changes. Auto-applies rustfmt and rewrites three `pos % 4 != 0` clippy::manual_is_multiple_of sites in fdt.rs to `!pos.is_multiple_of(4)`. No behavior changes. --- src/main.rs | 26 +++++++------ src/utils/vm/fdt.rs | 80 +++++++++++++++++++++++++++++--------- src/utils/vm/lifecycle.rs | 12 ++++-- src/utils/vm/qemu.rs | 13 +++---- src/utils/vm/supervisor.rs | 9 ++--- 5 files changed, 95 insertions(+), 45 deletions(-) diff --git a/src/main.rs b/src/main.rs index 28f4ce07..96d433ad 100644 --- a/src/main.rs +++ b/src/main.rs @@ -3090,19 +3090,21 @@ async fn main() -> Result<()> { docker_socket_internal, ssh_key, known_hosts, - } => commands::vm::supervise::SuperviseCommand { - user_port, - internal_port, - qmp_socket, - idle_after_secs, - pid_file, - docker_socket, - docker_socket_internal, - ssh_key, - known_hosts, + } => { + commands::vm::supervise::SuperviseCommand { + user_port, + internal_port, + qmp_socket, + idle_after_secs, + pid_file, + docker_socket, + docker_socket_internal, + ssh_key, + known_hosts, + } + .execute() + .await } - .execute() - .await, VmCommands::Status => commands::vm::status::StatusCommand.execute().await, VmCommands::Shell { command } => { commands::vm::shell::ShellCommand { command } diff --git a/src/utils/vm/fdt.rs b/src/utils/vm/fdt.rs index 88519783..66201a01 100644 --- a/src/utils/vm/fdt.rs +++ b/src/utils/vm/fdt.rs @@ -45,14 +45,21 @@ pub struct Node { impl Node { pub fn new(name: impl Into) -> Self { - Self { name: name.into(), props: Vec::new(), children: Vec::new() } + Self { + name: name.into(), + props: Vec::new(), + children: Vec::new(), + } } pub fn set_prop(&mut self, name: &str, value: Vec) { if let Some(p) = self.props.iter_mut().find(|p| p.name == name) { p.value = value; } else { - self.props.push(Property { name: name.to_string(), value }); + self.props.push(Property { + name: name.to_string(), + value, + }); } } @@ -96,10 +103,12 @@ pub fn parse(data: &[u8]) -> Result { bail!("unsupported DTB version {version} (need v16+)"); } if data.len() < totalsize { - bail!("DTB truncated: header says {totalsize} bytes, got {}", data.len()); + bail!( + "DTB truncated: header says {totalsize} bytes, got {}", + data.len() + ); } - if off_dt_struct + size_dt_struct > data.len() - || off_dt_strings + size_dt_strings > data.len() + if off_dt_struct + size_dt_struct > data.len() || off_dt_strings + size_dt_strings > data.len() { bail!("DTB struct/strings offsets out of bounds"); } @@ -119,7 +128,11 @@ pub fn parse(data: &[u8]) -> Result { mem_rsv.push((addr, size)); } - let mut parser = Parser { data, pos: off_dt_struct, strings_base: off_dt_strings }; + let mut parser = Parser { + data, + pos: off_dt_struct, + strings_base: off_dt_strings, + }; let first = parser.read_u32()?; if first != FDT_BEGIN_NODE { bail!("DTB struct block must start with BEGIN_NODE, got {first:#x}"); @@ -129,7 +142,11 @@ pub fn parse(data: &[u8]) -> Result { if last != FDT_END { bail!("DTB struct block missing FDT_END terminator, got {last:#x}"); } - Ok(Fdt { root, mem_rsv, boot_cpuid_phys }) + Ok(Fdt { + root, + mem_rsv, + boot_cpuid_phys, + }) } struct Parser<'a> { @@ -160,7 +177,7 @@ impl<'a> Parser<'a> { .with_context(|| format!("non-utf8 name at offset {start}"))? .to_string(); self.pos += 1; - while self.pos % 4 != 0 { + while !self.pos.is_multiple_of(4) { self.pos += 1; } Ok(s) @@ -197,7 +214,7 @@ impl<'a> Parser<'a> { })? .to_vec(); self.pos += len; - while self.pos % 4 != 0 { + while !self.pos.is_multiple_of(4) { self.pos += 1; } node.props.push(Property { @@ -223,7 +240,12 @@ struct Emitter { } impl Emitter { - fn new() -> Self { Self { structs: Vec::new(), strings: Vec::new() } } + fn new() -> Self { + Self { + structs: Vec::new(), + strings: Vec::new(), + } + } fn intern(&mut self, name: &str) -> u32 { let bytes = name.as_bytes(); @@ -244,10 +266,12 @@ impl Emitter { off } - fn push_u32(&mut self, v: u32) { self.structs.extend_from_slice(&v.to_be_bytes()); } + fn push_u32(&mut self, v: u32) { + self.structs.extend_from_slice(&v.to_be_bytes()); + } fn pad4(&mut self) { - while self.structs.len() % 4 != 0 { + while !self.structs.len().is_multiple_of(4) { self.structs.push(0); } } @@ -326,7 +350,9 @@ fn max_phandle(node: &Node) -> u32 { max } -fn be32(v: u32) -> Vec { v.to_be_bytes().to_vec() } +fn be32(v: u32) -> Vec { + v.to_be_bytes().to_vec() +} fn strprop(s: &str) -> Vec { let mut v = s.as_bytes().to_vec(); v.push(0); @@ -405,7 +431,11 @@ mod tests { cpus.children.push(cpu); } root.children.push(cpus); - let fdt = Fdt { root, mem_rsv: vec![], boot_cpuid_phys: 0 }; + let fdt = Fdt { + root, + mem_rsv: vec![], + boot_cpuid_phys: 0, + }; serialize(&fdt) } @@ -441,7 +471,10 @@ mod tests { .iter() .find(|p| p.name == "cpu-idle-states") .expect("cpu-idle-states missing on cpu node"); - assert_eq!(u32::from_be_bytes(cis.value.as_slice().try_into().unwrap()), phandle); + assert_eq!( + u32::from_be_bytes(cis.value.as_slice().try_into().unwrap()), + phandle + ); } } @@ -454,7 +487,10 @@ mod tests { let rt = parse(&out).unwrap(); assert!(rt.root.children.iter().any(|c| c.name == "idle-states")); let cpus = rt.root.children.iter().find(|c| c.name == "cpus").unwrap(); - assert!(cpus.children.iter().all(|c| c.props.iter().any(|p| p.name == "cpu-idle-states"))); + assert!(cpus + .children + .iter() + .all(|c| c.props.iter().any(|p| p.name == "cpu-idle-states"))); } #[test] @@ -468,7 +504,11 @@ mod tests { fn patch_fails_when_no_cpus_node() { let mut root = Node::new(""); root.set_prop("#address-cells", be32(2)); - let fdt_in = Fdt { root, mem_rsv: vec![], boot_cpuid_phys: 0 }; + let fdt_in = Fdt { + root, + mem_rsv: vec![], + boot_cpuid_phys: 0, + }; let bytes = serialize(&fdt_in); let mut fdt = parse(&bytes).unwrap(); assert!(patch_idle_states(&mut fdt, 4).is_err()); @@ -482,7 +522,11 @@ mod tests { cpu.set_prop("reg", be32(0)); cpus.children.push(cpu); root.children.push(cpus); - let fdt_in = Fdt { root, mem_rsv: vec![], boot_cpuid_phys: 0x42 }; + let fdt_in = Fdt { + root, + mem_rsv: vec![], + boot_cpuid_phys: 0x42, + }; let bytes = serialize(&fdt_in); let parsed = parse(&bytes).unwrap(); assert_eq!(parsed.boot_cpuid_phys, 0x42); diff --git a/src/utils/vm/lifecycle.rs b/src/utils/vm/lifecycle.rs index 5d4bc702..cd5253ea 100644 --- a/src/utils/vm/lifecycle.rs +++ b/src/utils/vm/lifecycle.rs @@ -168,8 +168,11 @@ pub async fn start(opts: StartOptions) -> Result { // this one; downstream callers (vm shell, forward.rs, Avocado.app) // only ever see `ssh_port`. let internal_ssh_port = qemu::pick_free_port()?; - std::fs::write(paths.internal_ssh_port_file(), internal_ssh_port.to_string()) - .with_context(|| format!("writing {}", paths.internal_ssh_port_file().display()))?; + std::fs::write( + paths.internal_ssh_port_file(), + internal_ssh_port.to_string(), + ) + .with_context(|| format!("writing {}", paths.internal_ssh_port_file().display()))?; // Now that the port is known, write the ssh-config + wire it into // ~/.ssh/config. This is required for `DOCKER_HOST=ssh://avocado-vm` @@ -874,7 +877,10 @@ async fn spawn_supervisor( // proxy is ready before boot_sync starts pumping connections through. let deadline = std::time::Instant::now() + Duration::from_secs(5); loop { - if tokio::net::TcpStream::connect(("127.0.0.1", user_port)).await.is_ok() { + if tokio::net::TcpStream::connect(("127.0.0.1", user_port)) + .await + .is_ok() + { return Ok(()); } if std::time::Instant::now() >= deadline { diff --git a/src/utils/vm/qemu.rs b/src/utils/vm/qemu.rs index 1f750631..1046b7a9 100644 --- a/src/utils/vm/qemu.rs +++ b/src/utils/vm/qemu.rs @@ -231,7 +231,9 @@ pub fn build_qemu_args( // Failures degrade gracefully: log + skip, kernel falls back to the // auto-generated DTB it would have used anyway. if matches!(arch.as_str(), "arm64" | "aarch64") { - let dtb_override = std::env::var("AVOCADO_VM_DTB").ok().filter(|s| !s.is_empty()); + let dtb_override = std::env::var("AVOCADO_VM_DTB") + .ok() + .filter(|s| !s.is_empty()); match dtb_override { Some(path) => { args.push("-dtb".into()); @@ -288,8 +290,7 @@ fn ensure_idle_states_dtb(paths: &VmPaths, cfg: &QemuConfig) -> Result let raw = std::fs::read(tmp.path()) .with_context(|| format!("failed to read dumped DTB at {}", tmp.path().display()))?; let mut fdt = fdt::parse(&raw).context("failed to parse QEMU-generated DTB")?; - fdt::patch_idle_states(&mut fdt, cfg.cpus) - .context("failed to splice idle-states into DTB")?; + fdt::patch_idle_states(&mut fdt, cfg.cpus).context("failed to splice idle-states into DTB")?; let patched = fdt::serialize(&fdt); std::fs::write(tmp.path(), &patched) .with_context(|| format!("failed to write patched DTB to {}", tmp.path().display()))?; @@ -341,10 +342,8 @@ fn dump_base_dtb(qemu_bin: &str, cfg: &QemuConfig, out: &Path) -> Result<()> { /// cache; a binary that hasn't been touched produces the same key /// indefinitely. fn qemu_binary_tag(qemu_bin: &str) -> Result { - let path = which_on_path(qemu_bin) - .with_context(|| format!("{qemu_bin} not found on $PATH"))?; - let meta = std::fs::metadata(&path) - .with_context(|| format!("stat {}", path.display()))?; + let path = which_on_path(qemu_bin).with_context(|| format!("{qemu_bin} not found on $PATH"))?; + let meta = std::fs::metadata(&path).with_context(|| format!("stat {}", path.display()))?; let mtime = meta .modified() .ok() diff --git a/src/utils/vm/supervisor.rs b/src/utils/vm/supervisor.rs index 0055903e..9134e55f 100644 --- a/src/utils/vm/supervisor.rs +++ b/src/utils/vm/supervisor.rs @@ -419,10 +419,7 @@ fn spawn_ssh_tunnel(args: &RunArgs) -> Result { "-p", &args.internal_port.to_string(), "-L", - &format!( - "{}:/run/docker.sock", - args.docker_socket_internal.display() - ), + &format!("{}:/run/docker.sock", args.docker_socket_internal.display()), "root@127.0.0.1", ]); cmd.stdin(Stdio::null()); @@ -463,5 +460,7 @@ async fn wait_for_term() -> Result<()> { #[cfg(not(unix))] async fn wait_for_term() -> Result<()> { - tokio::signal::ctrl_c().await.context("install ctrl-c handler") + tokio::signal::ctrl_c() + .await + .context("install ctrl-c handler") } From abda0c57f543c153f7efea48cd69c0ff58b3a86b Mon Sep 17 00:00:00 2001 From: Justin Schneck Date: Tue, 2 Jun 2026 21:41:16 -0400 Subject: [PATCH 18/30] chore: cfg(unix) gate the supervisor for Windows build The hibernation supervisor uses tokio's UnixListener/UnixStream for the docker socket path and tokio::signal::unix for graceful shutdown, neither of which exist on Windows. Without gating, `cargo check --target x86_64-pc-windows-gnu` fails with E0432 (unresolved UnixListener/UnixStream imports). Gated unix-only: - `pub mod supervisor` in utils/vm/mod.rs - `pub mod supervise` in commands/vm/mod.rs - `VmCommands::Supervise` variant + dispatch in main.rs - `spawn_supervisor` / `stop_supervisor` / `resolve_idle_after_secs` / `DEFAULT_IDLE_AFTER_SECS` in lifecycle.rs - The internal-port pick + ssh_port file write in `start` On Windows the hibernation feature is unavailable: QEMU binds the user-facing port directly (today's pre-supervisor behavior), the legacy long-lived docker forwarder runs, and the VM never auto-pauses. --- src/commands/vm/mod.rs | 1 + src/main.rs | 5 ++++- src/utils/vm/lifecycle.rs | 36 +++++++++++++++++++++++++++--------- src/utils/vm/mod.rs | 1 + 4 files changed, 33 insertions(+), 10 deletions(-) diff --git a/src/commands/vm/mod.rs b/src/commands/vm/mod.rs index 585cd74a..27a9746e 100644 --- a/src/commands/vm/mod.rs +++ b/src/commands/vm/mod.rs @@ -12,5 +12,6 @@ pub mod shell; pub mod start; pub mod status; pub mod stop; +#[cfg(unix)] pub mod supervise; pub mod update; diff --git a/src/main.rs b/src/main.rs index 96d433ad..61f42c4f 100644 --- a/src/main.rs +++ b/src/main.rs @@ -3080,6 +3080,7 @@ async fn main() -> Result<()> { cmd.execute().await } VmCommands::Stop { force } => commands::vm::stop::StopCommand { force }.execute().await, + #[cfg(unix)] VmCommands::Supervise { user_port, internal_port, @@ -4570,7 +4571,9 @@ enum VmCommands { /// Long-lived hibernation supervisor. Internal — spawned by `vm start`, /// not for direct use. Owns the user-facing SSH port AND docker /// socket, proxies to QEMU's internal hostfwd / SSH tunnel, and - /// sends QMP stop/cont on the idle timeout. + /// sends QMP stop/cont on the idle timeout. Unix-only because the + /// docker socket path requires UnixListener. + #[cfg(unix)] #[command(hide = true)] Supervise { #[arg(long)] diff --git a/src/utils/vm/lifecycle.rs b/src/utils/vm/lifecycle.rs index cd5253ea..fca9e73b 100644 --- a/src/utils/vm/lifecycle.rs +++ b/src/utils/vm/lifecycle.rs @@ -166,13 +166,18 @@ pub async fn start(opts: StartOptions) -> Result { // Loopback-only port QEMU's hostfwd binds to. The supervisor // listens on the user-facing `ssh_port` and proxies through to // this one; downstream callers (vm shell, forward.rs, Avocado.app) - // only ever see `ssh_port`. - let internal_ssh_port = qemu::pick_free_port()?; - std::fs::write( - paths.internal_ssh_port_file(), - internal_ssh_port.to_string(), - ) - .with_context(|| format!("writing {}", paths.internal_ssh_port_file().display()))?; + // only ever see `ssh_port`. On non-unix the supervisor is + // unavailable (uses tokio's UnixListener), so QEMU binds the + // user-facing port directly — pre-supervisor behavior. + #[cfg(unix)] + let qemu_hostfwd_port = { + let internal = qemu::pick_free_port()?; + std::fs::write(paths.internal_ssh_port_file(), internal.to_string()) + .with_context(|| format!("writing {}", paths.internal_ssh_port_file().display()))?; + internal + }; + #[cfg(not(unix))] + let qemu_hostfwd_port = ssh_port; // Now that the port is known, write the ssh-config + wire it into // ~/.ssh/config. This is required for `DOCKER_HOST=ssh://avocado-vm` @@ -189,7 +194,7 @@ pub async fn start(opts: StartOptions) -> Result { let cfg = QemuConfig { memory_mib, cpus, - ssh_port: internal_ssh_port, + ssh_port: qemu_hostfwd_port, cmdline_extra: opts.cmdline_extra, artifact_dir: artifact_dir.clone(), workspace: workspace.clone(), @@ -216,8 +221,16 @@ pub async fn start(opts: StartOptions) -> Result { // `idle_after_secs` of no proxied activity, sends QMP `stop` to // halt the vCPUs; wakes on the next incoming TCP. boot_sync below // goes through the proxy, which is why we spawn before waiting. + // Unix-only because the supervisor uses tokio's UnixListener for + // the docker-socket path; on Windows the supervisor is absent + // (idle_after_secs forced to 0) and we fall through to the legacy + // long-lived docker forwarder below. + #[cfg(unix)] let idle_after_secs = resolve_idle_after_secs(&paths); - spawn_supervisor(&paths, ssh_port, internal_ssh_port, idle_after_secs).await?; + #[cfg(not(unix))] + let idle_after_secs: u64 = 0; + #[cfg(unix)] + spawn_supervisor(&paths, ssh_port, qemu_hostfwd_port, idle_after_secs).await?; // Wait for the guest to become ready — first signal wins (qga vs SSH). let signal = super::boot_sync::wait_for_guest_ready(&paths.qga_socket(), ssh_port, None) @@ -311,6 +324,7 @@ async fn stop_inner(force: bool) -> Result<()> { // exited, the next `vm start` would race against a still-bound port. // The docker socket forwarder is an SSH child that can outlive QEMU // if we shut down by signal, leaving a stale `docker.sock`. + #[cfg(unix)] stop_supervisor(&paths); let _ = super::forward::stop(&paths).await; @@ -765,12 +779,14 @@ fn write_ssh_config(paths: &VmPaths, ssh_port: u16) -> Result<()> { /// when the user steps away from active work and not pausing mid-pause /// during normal SSH/docker bursts. Users with snappier wake budgets /// can lower via `avocado vm config set idle.hibernate_after_secs N`. +#[cfg(unix)] const DEFAULT_IDLE_AFTER_SECS: u64 = 60; /// Resolve the hibernate timeout. Env var wins (one-shot override for /// experimentation), else the persisted `idle.hibernate_after_secs`, /// else the default. `0` disables hibernation while keeping the proxy /// up — useful for isolating proxy issues from QMP issues. +#[cfg(unix)] fn resolve_idle_after_secs(paths: &VmPaths) -> u64 { if let Ok(raw) = std::env::var("AVOCADO_VM_IDLE_HIBERNATE_SECS") { if let Ok(parsed) = raw.parse::() { @@ -796,6 +812,7 @@ fn resolve_idle_after_secs(paths: &VmPaths) -> u64 { /// Best-effort SIGTERM → SIGKILL on the supervisor pid, then remove /// its pidfile + internal-ssh-port marker. Idempotent — missing /// pidfile / dead pid is a no-op. +#[cfg(unix)] fn stop_supervisor(paths: &VmPaths) { let pidfile = paths.supervisor_pid(); if let Ok(raw) = std::fs::read_to_string(&pidfile) { @@ -818,6 +835,7 @@ fn stop_supervisor(paths: &VmPaths) { let _ = std::fs::remove_file(paths.internal_ssh_port_file()); } +#[cfg(unix)] async fn spawn_supervisor( paths: &VmPaths, user_port: u16, diff --git a/src/utils/vm/mod.rs b/src/utils/vm/mod.rs index 0d09870c..2b0001c3 100644 --- a/src/utils/vm/mod.rs +++ b/src/utils/vm/mod.rs @@ -39,4 +39,5 @@ pub mod share; pub mod ssh; pub mod staging; pub mod state; +#[cfg(unix)] pub mod supervisor; From 0d1e209f398ca0c2e722851af46d3bae02b5d3a9 Mon Sep 17 00:00:00 2001 From: Justin Schneck Date: Tue, 2 Jun 2026 22:10:30 -0400 Subject: [PATCH 19/30] feat(vm): `vm status` reports hibernated state MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The hibernation supervisor halts vCPUs via QMP `stop` after the configured idle timeout — a running pid alone no longer tells us whether the guest is actively executing. Probe QMP `query-status` from `lifecycle::status` (500ms timeout so a wedged QEMU doesn't hang the command) and surface a "(hibernated — wakes on next ssh/docker call)" suffix when paused. `VmStatus` gains a `paused: Option` where `Some(true)` = paused, `Some(false)` = confirmed running, `None` = couldn't probe (no QMP socket, non-unix host, supervisor down). Lets desktop / other consumers distinguish the three explicitly. --- src/commands/vm/status.rs | 6 ++++- src/utils/vm/lifecycle.rs | 47 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 52 insertions(+), 1 deletion(-) diff --git a/src/commands/vm/status.rs b/src/commands/vm/status.rs index 1ec01d9d..8edff1b0 100644 --- a/src/commands/vm/status.rs +++ b/src/commands/vm/status.rs @@ -10,8 +10,12 @@ impl StatusCommand { pub async fn execute(self) -> Result<()> { let s = lifecycle::status().await?; if s.running { + let state_tag = match s.paused { + Some(true) => " (hibernated — wakes on next ssh/docker call)", + _ => "", + }; println!( - "avocado-vm running (pid {}, ssh 127.0.0.1:{})", + "avocado-vm running (pid {}, ssh 127.0.0.1:{}){state_tag}", s.pid.unwrap_or(0), s.ssh_port.unwrap_or(0), ); diff --git a/src/utils/vm/lifecycle.rs b/src/utils/vm/lifecycle.rs index fca9e73b..7cae01f6 100644 --- a/src/utils/vm/lifecycle.rs +++ b/src/utils/vm/lifecycle.rs @@ -89,6 +89,12 @@ pub struct VmStatus { pub manifest_platform: Option, pub manifest_arch: Option, pub paths: VmPaths, + /// `Some(true)` when QEMU has been paused by the hibernation + /// supervisor (vCPUs halted, host CPU ~0%, RAM resident — wakes + /// on next inbound SSH/docker connection). `Some(false)` when + /// confirmed running. `None` when liveness couldn't be probed (no + /// QMP socket, supervisor down, non-unix host, etc.). + pub paused: Option, } /// Start the VM. Errors if one is already running. Performs manifest sha256 @@ -303,6 +309,8 @@ pub async fn start(opts: StartOptions) -> Result { manifest_platform: Some(manifest.platform), manifest_arch: Some(manifest.architecture), paths, + // We just finished boot — definitely not paused. + paused: Some(false), }) } @@ -393,6 +401,15 @@ pub async fn status() -> Result { (None, None) }; + // Probe QMP for paused state — the hibernation supervisor halts + // vCPUs via QMP `stop` when idle, so a running pid does not + // necessarily mean the guest is actively executing. `query-status` + // returns `{ status: "running" | "paused", ... }`. Failures here + // (no socket, QMP unreachable on non-unix, etc.) leave the field + // as `None` so callers can distinguish "couldn't tell" from + // "definitely paused". + let paused = if running { probe_paused(&paths).await } else { None }; + Ok(VmStatus { running, pid: if running { pid } else { None }, @@ -400,6 +417,7 @@ pub async fn status() -> Result { manifest_platform: platform, manifest_arch: arch, paths, + paused, }) } @@ -774,6 +792,35 @@ fn write_ssh_config(paths: &VmPaths, ssh_port: u16) -> Result<()> { Ok(()) } +/// Ask QEMU via QMP whether the VM is currently paused. Returns +/// `Some(true)` for paused, `Some(false)` for any non-paused running +/// state, and `None` if the QMP socket is unreachable. Short timeout +/// (~500ms) so a wedged QEMU doesn't make `vm status` hang. +async fn probe_paused(paths: &VmPaths) -> Option { + #[cfg(unix)] + { + if !paths.qmp_socket().exists() { + return None; + } + let probe = async { + let mut client = QmpClient::connect(&paths.qmp_socket()).await.ok()?; + let v = client.execute("query-status", None).await.ok()?; + v.get("status") + .and_then(|s| s.as_str()) + .map(|s| s == "paused") + }; + tokio::time::timeout(Duration::from_millis(500), probe) + .await + .ok() + .flatten() + } + #[cfg(not(unix))] + { + let _ = paths; + None + } +} + /// Default idle timeout in seconds when neither config nor env var sets /// one. One minute strikes a balance between freeing host CPU promptly /// when the user steps away from active work and not pausing mid-pause From 270af2e8e0a2052eb39e361812f5da3383c704af Mon Sep 17 00:00:00 2001 From: Justin Schneck Date: Tue, 2 Jun 2026 23:10:43 -0400 Subject: [PATCH 20/30] perf(vm): infrastructure lane for idle-exempt telemetry + timestamps Adds a second TCP listener (`infra-ssh-port`) and a second docker socket (`docker-stream.sock`) on the supervisor. Connections via these "infra" lanes are proxied identically to the user-facing counterparts but do NOT count toward the idle-hibernation timer, AND do NOT wake the VM if it's paused. Without this, long-lived telemetry channels (the desktop app's agent SSH tunnel, a future docker `/events` subscription, etc.) would either pin the VM awake forever (when held open during Running) or respawn-loop wake the VM as soon as it tries to hibernate (when the backing SSH tunnel dies on pause and the consumer reconnects). Semantics: - User-facing: `wake()` on accept, `ensure_tunnel()` on docker conn, counts toward idle while open. - Infra: no wake, no `ensure_tunnel`. Fast-fail if the tunnel isn't already up (caller backs off and retries). Tunnel comes up only when real user activity happens. Net result: hibernation actually sticks when the desktop is open. Background telemetry sees the VM as available iff a human is using it; otherwise it accepts the timer-driven hibernation. Also adds UTC RFC3339 timestamps to `supervisor.log` via a `slog!` macro so pause/wake cycles are legible without correlating against shell history. Wired into the CLI Supervise subcommand via two new flags (`--infra-port`, `--docker-socket-stream`). `state.rs` gains `infra_ssh_port_file()` and `docker_socket_stream()` accessors; `lifecycle::start` picks the infra port + passes paths through. --- src/commands/vm/supervise.rs | 4 + src/main.rs | 8 ++ src/utils/vm/lifecycle.rs | 31 +++++- src/utils/vm/state.rs | 18 ++++ src/utils/vm/supervisor.rs | 178 ++++++++++++++++++++++++++++------- 5 files changed, 204 insertions(+), 35 deletions(-) diff --git a/src/commands/vm/supervise.rs b/src/commands/vm/supervise.rs index ce38e4d0..740bace0 100644 --- a/src/commands/vm/supervise.rs +++ b/src/commands/vm/supervise.rs @@ -13,11 +13,13 @@ use crate::utils::vm::supervisor::{run, RunArgs}; pub struct SuperviseCommand { pub user_port: u16, pub internal_port: u16, + pub infra_port: u16, pub qmp_socket: PathBuf, pub idle_after_secs: u64, pub pid_file: PathBuf, pub docker_socket: PathBuf, pub docker_socket_internal: PathBuf, + pub docker_socket_stream: PathBuf, pub ssh_key: PathBuf, pub known_hosts: PathBuf, } @@ -27,11 +29,13 @@ impl SuperviseCommand { run(RunArgs { user_port: self.user_port, internal_port: self.internal_port, + infra_port: self.infra_port, qmp_socket: self.qmp_socket, idle_after_secs: self.idle_after_secs, pid_file: self.pid_file, docker_socket: self.docker_socket, docker_socket_internal: self.docker_socket_internal, + docker_socket_stream: self.docker_socket_stream, ssh_key: self.ssh_key, known_hosts: self.known_hosts, }) diff --git a/src/main.rs b/src/main.rs index 61f42c4f..cd09b625 100644 --- a/src/main.rs +++ b/src/main.rs @@ -3084,22 +3084,26 @@ async fn main() -> Result<()> { VmCommands::Supervise { user_port, internal_port, + infra_port, qmp_socket, idle_after_secs, pid_file, docker_socket, docker_socket_internal, + docker_socket_stream, ssh_key, known_hosts, } => { commands::vm::supervise::SuperviseCommand { user_port, internal_port, + infra_port, qmp_socket, idle_after_secs, pid_file, docker_socket, docker_socket_internal, + docker_socket_stream, ssh_key, known_hosts, } @@ -4581,6 +4585,8 @@ enum VmCommands { #[arg(long)] internal_port: u16, #[arg(long)] + infra_port: u16, + #[arg(long)] qmp_socket: std::path::PathBuf, #[arg(long)] idle_after_secs: u64, @@ -4591,6 +4597,8 @@ enum VmCommands { #[arg(long)] docker_socket_internal: std::path::PathBuf, #[arg(long)] + docker_socket_stream: std::path::PathBuf, + #[arg(long)] ssh_key: std::path::PathBuf, #[arg(long)] known_hosts: std::path::PathBuf, diff --git a/src/utils/vm/lifecycle.rs b/src/utils/vm/lifecycle.rs index 7cae01f6..f1995289 100644 --- a/src/utils/vm/lifecycle.rs +++ b/src/utils/vm/lifecycle.rs @@ -185,6 +185,22 @@ pub async fn start(opts: StartOptions) -> Result { #[cfg(not(unix))] let qemu_hostfwd_port = ssh_port; + // Infrastructure SSH port — second user-facing port that the + // supervisor proxies identically to `ssh_port`, but connections to + // it do NOT count toward the idle-hibernation timer. Long-lived + // telemetry channels (Avocado.app's agent SSH tunnel, docker + // /events subscriptions) connect here so they wake the VM on + // attach but don't pin it awake forever. + #[cfg(unix)] + let infra_ssh_port = { + let port = qemu::pick_free_port()?; + std::fs::write(paths.infra_ssh_port_file(), port.to_string()) + .with_context(|| format!("writing {}", paths.infra_ssh_port_file().display()))?; + port + }; + #[cfg(not(unix))] + let infra_ssh_port = ssh_port; + // Now that the port is known, write the ssh-config + wire it into // ~/.ssh/config. This is required for `DOCKER_HOST=ssh://avocado-vm` // to resolve in any subprocess we spawn — Docker's ssh transport reads @@ -236,7 +252,14 @@ pub async fn start(opts: StartOptions) -> Result { #[cfg(not(unix))] let idle_after_secs: u64 = 0; #[cfg(unix)] - spawn_supervisor(&paths, ssh_port, qemu_hostfwd_port, idle_after_secs).await?; + spawn_supervisor( + &paths, + ssh_port, + qemu_hostfwd_port, + infra_ssh_port, + idle_after_secs, + ) + .await?; // Wait for the guest to become ready — first signal wins (qga vs SSH). let signal = super::boot_sync::wait_for_guest_ready(&paths.qga_socket(), ssh_port, None) @@ -880,6 +903,7 @@ fn stop_supervisor(paths: &VmPaths) { } let _ = std::fs::remove_file(pidfile); let _ = std::fs::remove_file(paths.internal_ssh_port_file()); + let _ = std::fs::remove_file(paths.infra_ssh_port_file()); } #[cfg(unix)] @@ -887,6 +911,7 @@ async fn spawn_supervisor( paths: &VmPaths, user_port: u16, internal_port: u16, + infra_port: u16, idle_after_secs: u64, ) -> Result<()> { let exe = std::env::current_exe().context("locating current avocado binary")?; @@ -898,6 +923,8 @@ async fn spawn_supervisor( &user_port.to_string(), "--internal-port", &internal_port.to_string(), + "--infra-port", + &infra_port.to_string(), "--qmp-socket", &paths.qmp_socket().to_string_lossy(), "--idle-after-secs", @@ -908,6 +935,8 @@ async fn spawn_supervisor( &paths.docker_socket().to_string_lossy(), "--docker-socket-internal", &paths.docker_socket_internal().to_string_lossy(), + "--docker-socket-stream", + &paths.docker_socket_stream().to_string_lossy(), "--ssh-key", &paths.ssh_key().to_string_lossy(), "--known-hosts", diff --git a/src/utils/vm/state.rs b/src/utils/vm/state.rs index 6c161118..19edfb0b 100644 --- a/src/utils/vm/state.rs +++ b/src/utils/vm/state.rs @@ -183,6 +183,22 @@ impl VmPaths { pub fn internal_ssh_port_file(&self) -> PathBuf { self.root.join("internal-ssh-port") } + /// Supervisor's "infrastructure" SSH lane — a second TCP listener + /// that proxies to QEMU's internal hostfwd identically to the + /// user-facing port, BUT does not count toward the idle-hibernation + /// activity tracker. Long-lived telemetry channels (Avocado.app's + /// agent SSH tunnel, future event-stream consumers) connect here + /// so they wake the VM on attach but don't pin it awake forever. + pub fn infra_ssh_port_file(&self) -> PathBuf { + self.root.join("infra-ssh-port") + } + /// "Infrastructure" docker socket. Same backing SSH tunnel as + /// `docker_socket()`, but connections here don't count toward idle + /// — meant for streaming subscriptions like `GET /events` that + /// stay open for the VM's lifetime. + pub fn docker_socket_stream(&self) -> PathBuf { + self.root.join("docker-stream.sock") + } /// Absolute path to the artifact directory that was last used for `vm /// start`. The macOS Avocado.app reads this when launched without an /// AVOCADO_VM_DIR env var (Finder/Dock launches inherit a sanitized env @@ -280,9 +296,11 @@ pub fn cleanup_transient(paths: &VmPaths) { paths.lock_file(), paths.docker_socket(), paths.docker_socket_internal(), + paths.docker_socket_stream(), paths.forwarder_pid(), paths.supervisor_pid(), paths.internal_ssh_port_file(), + paths.infra_ssh_port_file(), ] { let _ = std::fs::remove_file(&p); } diff --git a/src/utils/vm/supervisor.rs b/src/utils/vm/supervisor.rs index 9134e55f..dcb911bf 100644 --- a/src/utils/vm/supervisor.rs +++ b/src/utils/vm/supervisor.rs @@ -41,6 +41,15 @@ use tokio::sync::Mutex; use super::qmp::QmpClient; use super::state; +/// Log a supervisor event with a UTC timestamp prefix. Timestamps make +/// pause/wake cycles in `~/.avocado/vm/supervisor.log` legible without +/// having to correlate against shell history. +macro_rules! slog { + ($($arg:tt)*) => {{ + eprintln!("[{}] supervisor: {}", chrono::Utc::now().to_rfc3339(), format_args!($($arg)*)) + }}; +} + /// Arguments passed from the `avocado vm supervise` subcommand into the /// supervisor loop. Plain owned data so the caller can construct it from /// clap-parsed flags without leaking lifetimes. @@ -64,6 +73,16 @@ pub struct RunArgs { /// Host path the supervisor's SSH `-L` tunnel binds to; only the /// docker proxy connects here. pub docker_socket_internal: PathBuf, + /// "Infrastructure" TCP lane — second user-facing port that + /// wakes the VM on connect but does NOT count toward idle. + /// Used by long-lived telemetry channels (Avocado.app's agent SSH + /// tunnel) so they don't pin the VM awake. + pub infra_port: u16, + /// "Infrastructure" docker socket. Same SSH `-L` tunnel as + /// `docker_socket`, but accepted connections here don't count + /// toward idle — meant for `GET /events` style streaming + /// subscriptions. + pub docker_socket_stream: PathBuf, /// SSH private key for tunneling to the guest. pub ssh_key: PathBuf, /// known_hosts file the SSH tunnel uses. @@ -102,7 +121,7 @@ impl State { .await .context("QMP cont")?; self.paused.store(false, Ordering::Relaxed); - eprintln!("supervisor: resumed VM on incoming connection"); + slog!("resumed VM on incoming connection"); } Ok(()) } @@ -145,7 +164,7 @@ impl State { loop { if self.args.docker_socket_internal.exists() { *lock = Some(pid); - eprintln!("supervisor: docker tunnel up (pid {pid})"); + slog!("docker tunnel up (pid {pid})"); return Ok(()); } if !state::pid_alive(pid) { @@ -244,6 +263,28 @@ pub async fn run(args: RunArgs) -> Result<()> { args.docker_socket.display() ); + // Infrastructure TCP lane — wakes the VM on connect, proxies to the + // same internal hostfwd, but does NOT count toward idle. Long-lived + // telemetry channels (desktop's agent SSH tunnel, future event-stream + // consumers) connect here so they don't pin the VM awake. + let infra_tcp_listener = TcpListener::bind(("127.0.0.1", args.infra_port)) + .await + .with_context(|| format!("binding 127.0.0.1:{}", args.infra_port))?; + eprintln!( + "supervisor: infra TCP listening on 127.0.0.1:{} → 127.0.0.1:{} (idle-exempt)", + args.infra_port, args.internal_port + ); + + // Infrastructure docker socket — same SSH tunnel, doesn't count toward + // idle. Meant for `GET /events` streaming subscriptions. + let _ = std::fs::remove_file(&args.docker_socket_stream); + let infra_unix_listener = UnixListener::bind(&args.docker_socket_stream) + .with_context(|| format!("binding {}", args.docker_socket_stream.display()))?; + eprintln!( + "supervisor: infra Unix listening on {} → SSH→/run/docker.sock (idle-exempt)", + args.docker_socket_stream.display() + ); + if args.idle_after_secs > 0 { let state_t = state.clone(); tokio::spawn(async move { @@ -257,7 +298,7 @@ pub async fn run(args: RunArgs) -> Result<()> { let state_sig = state.clone(); tokio::spawn(async move { if let Err(e) = wait_for_term().await { - eprintln!("supervisor: signal handler error: {e:#}"); + slog!("signal handler error: {e:#}"); return; } let _ = state_sig.wake().await; // ensure VM is resumed before we exit @@ -265,32 +306,58 @@ pub async fn run(args: RunArgs) -> Result<()> { std::process::exit(0); }); - // Main accept loop: select between TCP and Unix listeners. Spawned + // Main accept loop: select between TCP/Unix user-facing listeners + // (counted) and the two infra listeners (idle-exempt). Spawned // tasks own their connection through close. loop { tokio::select! { res = tcp_listener.accept() => { let (sock, peer) = match res { Ok(v) => v, - Err(e) => { eprintln!("supervisor: TCP accept error: {e:#}"); continue; } + Err(e) => { slog!("TCP accept error: {e:#}"); continue; } + }; + let s = state.clone(); + let internal_port = args.internal_port; + tokio::spawn(async move { + if let Err(e) = handle_tcp(sock, internal_port, s, /* count */ true).await { + slog!("TCP conn {peer} error: {e:#}"); + } + }); + } + res = infra_tcp_listener.accept() => { + let (sock, peer) = match res { + Ok(v) => v, + Err(e) => { slog!("infra TCP accept error: {e:#}"); continue; } }; let s = state.clone(); let internal_port = args.internal_port; tokio::spawn(async move { - if let Err(e) = handle_tcp(sock, internal_port, s).await { - eprintln!("supervisor: TCP conn {peer} error: {e:#}"); + if let Err(e) = handle_tcp(sock, internal_port, s, /* count */ false).await { + slog!("infra TCP conn {peer} error: {e:#}"); } }); } res = unix_listener.accept() => { let (sock, _peer) = match res { Ok(v) => v, - Err(e) => { eprintln!("supervisor: Unix accept error: {e:#}"); continue; } + Err(e) => { slog!("Unix accept error: {e:#}"); continue; } + }; + let s = state.clone(); + tokio::spawn(async move { + if let Err(e) = handle_docker(sock, s, /* count */ true).await { + slog!("docker conn error: {e:#}"); + } + }); + } + res = infra_unix_listener.accept() => { + let (sock, _peer) = match res { + Ok(v) => v, + Err(e) => { slog!("infra Unix accept error: {e:#}"); continue; } }; let s = state.clone(); tokio::spawn(async move { - if let Err(e) = handle_docker(sock, s).await { - eprintln!("supervisor: docker conn error: {e:#}"); + if let Err(e) = handle_docker(sock, s, /* count */ false).await { + slog!("infra docker conn error: {e:#}"); } }); } @@ -298,12 +365,32 @@ pub async fn run(args: RunArgs) -> Result<()> { } } -async fn handle_tcp(mut incoming: TcpStream, internal_port: u16, state: Arc) -> Result<()> { - state.active_conns.fetch_add(1, Ordering::Relaxed); - state.touch(); - - if let Err(e) = state.wake().await { - eprintln!("supervisor: wake failed: {e}"); +/// Proxy a TCP connection from a user-facing or infra listener to QEMU's +/// internal hostfwd. +/// +/// `count`-true: user-facing traffic (a real SSH session, etc.) — +/// bumps `active_conns` + activity time, and calls `wake()` to bring +/// the VM out of hibernation. Drives the VM lifecycle. +/// +/// `count`-false: infrastructure (long-lived telemetry like the +/// desktop's agent SSH tunnel) — does NOT touch activity counters and +/// does NOT wake the VM. Just opportunistically uses the VM if it's +/// already running. Otherwise the inner connect to `internal_port` +/// succeeds (QEMU slirp accepts) but bytes queue without delivery; the +/// caller times out and retries with backoff. Keeps hibernation +/// intact: only real user activity can wake the VM. +async fn handle_tcp( + mut incoming: TcpStream, + internal_port: u16, + state: Arc, + count: bool, +) -> Result<()> { + if count { + state.active_conns.fetch_add(1, Ordering::Relaxed); + state.touch(); + if let Err(e) = state.wake().await { + slog!("wake failed: {e}"); + } } let mut inner = TcpStream::connect(("127.0.0.1", internal_port)) @@ -313,23 +400,44 @@ async fn handle_tcp(mut incoming: TcpStream, internal_port: u16, state: Arc) -> Result<()> { - state.active_conns.fetch_add(1, Ordering::Relaxed); - state.touch(); - - // Wake VM first (QMP cont). Then bring the SSH tunnel up — the - // tunnel's auth handshake needs guest sshd running, which is only - // true post-wake. - state.wake().await.context("waking VM for docker conn")?; - state - .ensure_tunnel() - .await - .context("bringing docker tunnel up")?; +/// Proxy a docker client to the supervisor-managed SSH tunnel. +/// +/// `count`-true: user-facing docker call (avocado build, docker ps from +/// the user shell, etc.) — wakes VM + brings tunnel up if needed, +/// counts toward idle. The VM stays awake until the call finishes. +/// +/// `count`-false: infrastructure (containers watcher's `/events` stream, +/// snapshot refreshes) — does NOT wake VM and does NOT ensure the +/// tunnel. Just connects to the existing tunnel socket; if it's down +/// (paused VM, boot still in progress), returns a fast error. Caller +/// backs off and retries. This is what lets hibernation actually stick +/// when the desktop is open: the watcher's reconnect attempts can't +/// pin the VM awake by themselves, only user activity can. +async fn handle_docker(mut client: UnixStream, state: Arc, count: bool) -> Result<()> { + if count { + state.active_conns.fetch_add(1, Ordering::Relaxed); + state.touch(); + state.wake().await.context("waking VM for docker conn")?; + state + .ensure_tunnel() + .await + .context("bringing docker tunnel up")?; + } else if !state.args.docker_socket_internal.exists() { + // Infra: tunnel not currently up. Fail fast — caller (likely + // ContainersWatcher) backs off and retries; a future + // user-driven docker call will bring the tunnel up and the + // next retry will succeed. + return Err(anyhow::anyhow!( + "docker tunnel not up (VM paused or still booting)" + )); + } let mut backend = UnixStream::connect(&state.args.docker_socket_internal) .await @@ -343,8 +451,10 @@ async fn handle_docker(mut client: UnixStream, state: Arc) -> Result<()> let _ = client.shutdown().await; let _ = backend.shutdown().await; - state.active_conns.fetch_sub(1, Ordering::Relaxed); - state.touch(); + if count { + state.active_conns.fetch_sub(1, Ordering::Relaxed); + state.touch(); + } classify_close(res) } @@ -377,9 +487,9 @@ async fn idle_watcher(state: Arc) { let since = now_ms() - state.last_activity_ms.load(Ordering::Relaxed); if since >= state.idle_threshold_ms { match state.pause().await { - Ok(_) => eprintln!("supervisor: paused VM after {since} ms idle"), + Ok(_) => slog!("paused VM after {since} ms idle"), Err(e) => { - eprintln!("supervisor: pause failed: {e}"); + slog!("pause failed: {e}"); state.touch(); // back off } } From 5c4e2118c763d3eea500d9fb6ac641b6c3a58ce5 Mon Sep 17 00:00:00 2001 From: Justin Schneck Date: Thu, 4 Jun 2026 16:05:36 -0400 Subject: [PATCH 21/30] chore: rustfmt lifecycle.rs (unblock CI) The query-status `paused` probe line in `build_status` was over the formatter's single-line `if`/`else` width. CI's `cargo fmt --all -- --check` rejected the multiline-rewrite diff. --- src/utils/vm/lifecycle.rs | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/utils/vm/lifecycle.rs b/src/utils/vm/lifecycle.rs index f1995289..cd8bc6b4 100644 --- a/src/utils/vm/lifecycle.rs +++ b/src/utils/vm/lifecycle.rs @@ -431,7 +431,11 @@ pub async fn status() -> Result { // (no socket, QMP unreachable on non-unix, etc.) leave the field // as `None` so callers can distinguish "couldn't tell" from // "definitely paused". - let paused = if running { probe_paused(&paths).await } else { None }; + let paused = if running { + probe_paused(&paths).await + } else { + None + }; Ok(VmStatus { running, From cd52b0df98d4c75ddcdfa636f33248904252a6fd Mon Sep 17 00:00:00 2001 From: Justin Schneck Date: Thu, 4 Jun 2026 16:05:53 -0400 Subject: [PATCH 22/30] feat(vm): notify desktop on hibernation wake for USB re-attach MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When the hibernation supervisor sends QMP `cont` to resume a paused VM, fire `vm.notify.woke` to Avocado.app over the existing best-effort IPC channel. The desktop side uses this to re-issue device_available for every USB device it thinks is attached, since the host-side USB/IP helper times out reading from a frozen guest during pause and drops its IOUSBHost claim — leaving vhci_hcd holding a dead socket FD that the agent can't notice on its own. Dispatched via `spawn_blocking` so the sync `client::notify` (with its 100ms timeout) can't briefly stall the supervisor's accept loop on a slow/missing desktop. Notification only fires on the paused→running edge; repeat wakes against an already-running VM are no-ops. --- src/utils/vm/supervisor.rs | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/src/utils/vm/supervisor.rs b/src/utils/vm/supervisor.rs index dcb911bf..54a1dcfe 100644 --- a/src/utils/vm/supervisor.rs +++ b/src/utils/vm/supervisor.rs @@ -114,6 +114,14 @@ impl State { /// fast (single QMP round-trip). Does NOT touch the SSH tunnel: /// TCP-proxy callers don't need it, and bundling it would make /// every SSH probe wait 8s on tunnel spawn during boot. + /// + /// On macOS, fires `vm.notify.woke` to Avocado.app after the + /// successful `cont` so the desktop can re-attach USB devices that + /// the host-side USB/IP helper may have dropped while the VM was + /// paused (the helper times out reading from a frozen guest, drops + /// the IOUSBHost claim, and leaves vhci_hcd holding a dead FD). + /// The notification is best-effort; if the desktop isn't running, + /// it's a silent no-op. async fn wake(self: &Arc) -> Result<()> { let _guard = self.qmp_lock.lock().await; if self.paused.load(Ordering::Relaxed) { @@ -122,6 +130,7 @@ impl State { .context("QMP cont")?; self.paused.store(false, Ordering::Relaxed); slog!("resumed VM on incoming connection"); + notify_woke().await; } Ok(()) } @@ -556,6 +565,23 @@ async fn qmp_send(socket: &Path, cmd: &str, args: Option) -> Ok(()) } +/// Fire `vm.notify.woke` to Avocado.app via the shared CLI/desktop +/// notify channel. Dispatched via `spawn_blocking` because the underlying +/// `client::notify` uses sync std-net I/O with a 100ms timeout; running +/// it directly on the runtime thread would briefly block the supervisor's +/// accept loop on a slow/missing desktop. +#[cfg(target_os = "macos")] +async fn notify_woke() { + tokio::task::spawn_blocking(|| { + super::client::notify("vm.notify.woke", serde_json::json!({})); + }) + .await + .ok(); +} + +#[cfg(not(target_os = "macos"))] +async fn notify_woke() {} + #[cfg(unix)] async fn wait_for_term() -> Result<()> { use tokio::signal::unix::{signal, SignalKind}; From a9994a7b963b2d5816f92feb5f76e2c47768a072 Mon Sep 17 00:00:00 2001 From: Justin Schneck Date: Tue, 9 Jun 2026 16:41:12 -0400 Subject: [PATCH 23/30] feat(init): generate permissions/rootfs/initramfs in new projects Switch the `avocado init` template away from a standalone `config` confext that set an empty root password, to the top-level `permissions:` model: a `dev` profile referenced by explicit `rootfs:`/`initramfs:` blocks (matching the avocado-vm reference layout). The images and permissions sections are placed after extensions and before the sdk. Bump the generated `cli_requirement` to >=0.41.0, the first release carrying the top-level permissions schema, and update the init tests to assert the new structure. --- configs/default.yaml | 30 ++++++++++++++++++++---------- src/commands/init.rs | 8 ++++++-- 2 files changed, 26 insertions(+), 12 deletions(-) diff --git a/configs/default.yaml b/configs/default.yaml index 3b0a6c7b..208deea1 100644 --- a/configs/default.yaml +++ b/configs/default.yaml @@ -1,4 +1,4 @@ -cli_requirement: ">=0.26.0" +cli_requirement: ">=0.41.0" default_target: "{target}" @@ -24,7 +24,6 @@ runtimes: - avocado-ext-dev - avocado-ext-sshd-dev - avocado-bsp-{{ avocado.target.board }} - - config - app packages: avocado-runtime: "*" @@ -62,15 +61,26 @@ extensions: #curl: "*" #iperf3: "*" - # Generated default config extension - # Use or modify this to configure "real" user accounts (passwd, shadow, group) - # or configure other system services - config: - types: - - confext - version: "0.1.0" +## +## Images +## + +rootfs: + permissions: dev - # NOT FOR PRODUCTION: Set root password to empty string +initramfs: + permissions: dev + +## +## Permissions +## + +# NOT FOR PRODUCTION: the `dev` profile sets an empty root password and is +# referenced by rootfs/initramfs above. Define additional profiles with real +# users, groups, and hashed passwords, then point rootfs/initramfs at them via +# `permissions: `. +permissions: + dev: users: root: password: "" diff --git a/src/commands/init.rs b/src/commands/init.rs index 7f4d7c3f..a00d3ac3 100644 --- a/src/commands/init.rs +++ b/src/commands/init.rs @@ -1127,7 +1127,7 @@ mod tests { let content = fs::read_to_string(&config_path).unwrap(); let expected_target = InitCommand::get_default_target(); assert!(content.contains(&format!("default_target: \"{expected_target}\""))); - assert!(content.contains("cli_requirement: \">=0.26.0\"")); + assert!(content.contains("cli_requirement: \">=0.41.0\"")); assert!(content.contains("distro:")); assert!(content.contains("channel: edge")); assert!(content.contains("release: 2024")); @@ -1140,11 +1140,15 @@ mod tests { assert!( content.contains("image: \"docker.io/avocadolinux/sdk:{{ config.distro.release }}-{{ config.distro.channel }}\"") ); + // Empty root password now comes from the `dev` permissions profile + // referenced by rootfs/initramfs, not a standalone `config` confext. + assert!(content.contains("permissions:")); + assert!(content.contains("rootfs:")); + assert!(content.contains("initramfs:")); assert!(content.contains("extensions:")); assert!(content.contains("app:")); assert!(content.contains("- sysext")); assert!(content.contains("- confext")); - assert!(content.contains("config:")); assert!(content.contains("avocado-sdk-toolchain: \"*\"")); } From 3093eca03118a50270017c21c100ad90098c44df Mon Sep 17 00:00:00 2001 From: Justin Schneck Date: Tue, 9 Jun 2026 16:41:29 -0400 Subject: [PATCH 24/30] chore(init): drop channel suffix from generated SDK image tag The generated SDK image now pins only the distro release (docker.io/avocadolinux/sdk:{{ config.distro.release }}); the channel no longer participates in the image tag. --- configs/default.yaml | 2 +- src/commands/init.rs | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/configs/default.yaml b/configs/default.yaml index 208deea1..ebc205d3 100644 --- a/configs/default.yaml +++ b/configs/default.yaml @@ -90,7 +90,7 @@ permissions: ## sdk: - image: "docker.io/avocadolinux/sdk:{{ config.distro.release }}-{{ config.distro.channel }}" + image: "docker.io/avocadolinux/sdk:{{ config.distro.release }}" container_args: - --privileged diff --git a/src/commands/init.rs b/src/commands/init.rs index a00d3ac3..e56cf036 100644 --- a/src/commands/init.rs +++ b/src/commands/init.rs @@ -1138,7 +1138,7 @@ mod tests { assert!(content.contains("avocado-ext-dev:")); assert!(content.contains("type: package")); assert!( - content.contains("image: \"docker.io/avocadolinux/sdk:{{ config.distro.release }}-{{ config.distro.channel }}\"") + content.contains("image: \"docker.io/avocadolinux/sdk:{{ config.distro.release }}\"") ); // Empty root password now comes from the `dev` permissions profile // referenced by rootfs/initramfs, not a standalone `config` confext. From fa036b3ba145370fe9c9cbeb06e38f59f0b98efa Mon Sep 17 00:00:00 2001 From: nicksinas Date: Wed, 10 Jun 2026 08:37:52 -0500 Subject: [PATCH 25/30] Add Connect runtime listing and connect-signed deploy support --- src/commands/connect/clean.rs | 18 ++++ src/commands/connect/cohorts.rs | 13 +++ src/commands/connect/deploy.rs | 123 +++++++++++++++---------- src/commands/connect/init.rs | 60 +++++++++--- src/commands/connect/mod.rs | 1 + src/commands/connect/orgs.rs | 14 +++ src/commands/connect/projects.rs | 11 +++ src/commands/connect/runtimes.rs | 66 ++++++++++++++ src/commands/connect/upload.rs | 97 ++++++++++++++++---- src/commands/runtime/deploy.rs | 151 ++++++++++++++++++++++++++++--- src/main.rs | 93 ++++++++++++++++++- 11 files changed, 547 insertions(+), 100 deletions(-) create mode 100644 src/commands/connect/runtimes.rs diff --git a/src/commands/connect/clean.rs b/src/commands/connect/clean.rs index 8c207137..68110fc7 100644 --- a/src/commands/connect/clean.rs +++ b/src/commands/connect/clean.rs @@ -1,12 +1,15 @@ use anyhow::{Context, Result}; +use serde_json::json; use std::path::Path; use crate::utils::config_edit; use crate::utils::output::{print_info, print_success, print_warning, OutputLevel}; +use crate::utils::output_format::{emit_json_object, OutputFormat}; pub struct ConnectCleanCommand { pub runtime: String, pub config_path: String, + pub output: OutputFormat, } impl ConnectCleanCommand { @@ -21,6 +24,9 @@ impl ConnectCleanCommand { let config_dir = config_path.parent().unwrap_or(Path::new(".")); let mut any_changes = false; + let mut removed_connect_section = false; + let mut removed_extension = false; + let mut removed_overlay = false; // 1. Remove connect: section from avocado.yaml match config_edit::remove_connect_fields(config_path) { @@ -30,6 +36,7 @@ impl ConnectCleanCommand { OutputLevel::Normal, ); any_changes = true; + removed_connect_section = true; } Ok(false) => { print_info( @@ -53,6 +60,7 @@ impl ConnectCleanCommand { OutputLevel::Normal, ); any_changes = true; + removed_extension = true; } Ok(false) => { print_info( @@ -78,6 +86,7 @@ impl ConnectCleanCommand { OutputLevel::Normal, ); any_changes = true; + removed_overlay = true; } else { print_info( &format!("{} does not exist, skipping.", overlay_conn_dir.display()), @@ -85,6 +94,15 @@ impl ConnectCleanCommand { ); } + if self.output.is_json() { + emit_json_object(&json!({ + "removed_connect_section": removed_connect_section, + "removed_extension": removed_extension, + "removed_overlay": removed_overlay, + })); + return Ok(()); + } + if any_changes { println!(); print_success("Connect configuration cleaned.", OutputLevel::Normal); diff --git a/src/commands/connect/cohorts.rs b/src/commands/connect/cohorts.rs index d7b18718..54810736 100644 --- a/src/commands/connect/cohorts.rs +++ b/src/commands/connect/cohorts.rs @@ -1,14 +1,17 @@ use anyhow::Result; +use serde_json::json; use crate::commands::connect::client::{ self, ConnectClient, CreateCohortParams, CreateCohortRequest, }; use crate::utils::output::{print_info, print_success, OutputLevel}; +use crate::utils::output_format::{emit_json_object, OutputFormat}; pub struct ConnectCohortsListCommand { pub org: String, pub project: String, pub profile: Option, + pub output: OutputFormat, } impl ConnectCohortsListCommand { @@ -20,6 +23,16 @@ impl ConnectCohortsListCommand { let cohorts = client.list_cohorts(&self.org, &self.project).await?; + if self.output.is_json() { + emit_json_object(&json!({ + "cohorts": cohorts.iter().map(|c| json!({ + "id": c.id, + "name": c.name, + })).collect::>() + })); + return Ok(()); + } + if cohorts.is_empty() { print_info("No cohorts found.", OutputLevel::Normal); return Ok(()); diff --git a/src/commands/connect/deploy.rs b/src/commands/connect/deploy.rs index 62fb6059..37e5566c 100644 --- a/src/commands/connect/deploy.rs +++ b/src/commands/connect/deploy.rs @@ -1,11 +1,15 @@ use anyhow::{Context, Result}; use chrono::Utc; +use serde_json::json; use crate::commands::connect::client::{ self, CohortInfo, ConnectClient, CreateDeploymentParams, CreateDeploymentRequest, RuntimeListItem, }; use crate::utils::output::{print_info, print_success, print_warning, OutputLevel}; +use crate::utils::output_format::{ + emit_json_event, is_json_output_active, JsonOutputGuard, OutputFormat, +}; pub struct ConnectDeployCommand { pub org: String, @@ -17,22 +21,30 @@ pub struct ConnectDeployCommand { pub tags: Vec, pub activate: bool, pub profile: Option, + pub output: OutputFormat, } impl ConnectDeployCommand { pub async fn execute(&self) -> Result<()> { + if self.output.is_json() { + if self.runtime.is_none() { + anyhow::bail!("--runtime is required when using --output json"); + } + if self.cohort.is_none() { + anyhow::bail!("--cohort is required when using --output json"); + } + } + + let _json_guard = self.output.is_json().then(JsonOutputGuard::enable); + let config = client::load_config()? .ok_or_else(|| anyhow::anyhow!("Not logged in. Run 'avocado connect auth login'"))?; let (_, profile) = config.resolve_profile(self.profile.as_deref(), Some(&self.org))?; let client = ConnectClient::from_profile(profile)?; - // Select runtime (fetches full record even when --runtime is passed) let selected_runtime = self.resolve_runtime(&client).await?; - - // Select cohort (fetches full record even when --cohort is passed) let selected_cohort = self.resolve_cohort(&client).await?; - // Generate deployment name if not provided let version_display = selected_runtime .display_version .as_deref() @@ -42,7 +54,6 @@ impl ConnectDeployCommand { format!("{version_display}-{timestamp}") }); - // Create deployment print_info( &format!("Creating deployment '{deploy_name}'..."), OutputLevel::Normal, @@ -62,38 +73,13 @@ impl ConnectDeployCommand { .create_deployment(&self.org, &self.project, &req) .await?; - // Always print ID first — if activation fails, user still has the deployment ID - println!(); - print_success( - &format!( - "Deployment '{}' created (id: {})", - deploy_name, deployment.id - ), - OutputLevel::Normal, - ); - println!(" Runtime: {} ({})", version_display, selected_runtime.id); - if !selected_cohort.name.is_empty() { - println!( - " Cohort: {} ({})", - selected_cohort.name, selected_cohort.id - ); - } else { - println!(" Cohort: {}", selected_cohort.id); - } - if !self.tags.is_empty() { - println!(" Tags: {}", self.tags.join(", ")); - } - - // Optionally activate - if self.activate { + let final_status = if self.activate { print_info("Activating deployment...", OutputLevel::Normal); match client .activate_deployment(&self.org, &self.project, &deployment.id) .await { - Ok(activated) => { - println!(" Status: {}", activated.status); - } + Ok(activated) => activated.status, Err(e) => { print_warning( &format!( @@ -102,10 +88,44 @@ impl ConnectDeployCommand { ), OutputLevel::Normal, ); + deployment.status.clone() } } } else { - println!(" Status: {}", deployment.status); + deployment.status.clone() + }; + + if self.output.is_json() { + emit_json_event(&json!({ + "event": "complete", + "deployment_id": deployment.id, + "deployment_name": deploy_name, + "runtime_id": selected_runtime.id, + "cohort_id": selected_cohort.id, + "status": final_status, + })); + } else { + println!(); + print_success( + &format!( + "Deployment '{}' created (id: {})", + deploy_name, deployment.id + ), + OutputLevel::Normal, + ); + println!(" Runtime: {} ({})", version_display, selected_runtime.id); + if !selected_cohort.name.is_empty() { + println!( + " Cohort: {} ({})", + selected_cohort.name, selected_cohort.id + ); + } else { + println!(" Cohort: {}", selected_cohort.id); + } + if !self.tags.is_empty() { + println!(" Tags: {}", self.tags.join(", ")); + } + println!(" Status: {final_status}"); } Ok(()) @@ -229,24 +249,13 @@ pub async fn deploy_after_upload(params: &DeployAfterUploadParams<'_>) -> Result let deployment = client.create_deployment(org, project, &req).await?; - // Always print ID first - print_success( - &format!( - "Deployment '{}' created (id: {})", - deploy_name, deployment.id - ), - OutputLevel::Normal, - ); - - if *activate { + let final_status = if *activate { print_info("Activating deployment...", OutputLevel::Normal); match client .activate_deployment(org, project, &deployment.id) .await { - Ok(activated) => { - println!(" Status: {}", activated.status); - } + Ok(activated) => activated.status, Err(e) => { print_warning( &format!( @@ -255,10 +264,30 @@ pub async fn deploy_after_upload(params: &DeployAfterUploadParams<'_>) -> Result ), OutputLevel::Normal, ); + deployment.status.clone() } } } else { - println!(" Status: {}", deployment.status); + deployment.status.clone() + }; + + if is_json_output_active() { + emit_json_event(&json!({ + "event": "deployed", + "deployment_id": deployment.id, + "deployment_name": deploy_name, + "cohort_id": cohort_id, + "status": final_status, + })); + } else { + print_success( + &format!( + "Deployment '{}' created (id: {})", + deploy_name, deployment.id + ), + OutputLevel::Normal, + ); + println!(" Status: {final_status}"); } Ok(()) diff --git a/src/commands/connect/init.rs b/src/commands/connect/init.rs index 0be29356..ef87ab80 100644 --- a/src/commands/connect/init.rs +++ b/src/commands/connect/init.rs @@ -1,5 +1,6 @@ use anyhow::{Context, Result}; use chrono::Utc; +use serde_json::json; use std::path::Path; use crate::commands::connect::client::{ @@ -8,6 +9,7 @@ use crate::commands::connect::client::{ }; use crate::utils::config_edit; use crate::utils::output::{print_info, print_success, print_warning, OutputLevel}; +use crate::utils::output_format::{emit_json_event, JsonOutputGuard, OutputFormat}; pub struct ConnectInitCommand { pub org: Option, @@ -16,10 +18,23 @@ pub struct ConnectInitCommand { pub runtime: String, pub config_path: String, pub profile: Option, + pub output: OutputFormat, } impl ConnectInitCommand { pub async fn execute(&self) -> Result<()> { + if self.output.is_json() { + if self.org.is_none() { + anyhow::bail!("--org is required when using --output json"); + } + if self.project.is_none() { + anyhow::bail!("--project is required when using --output json"); + } + } + + // Suppress all prose output in JSON mode — callers read the NDJSON stream. + let _json_guard = self.output.is_json().then(JsonOutputGuard::enable); + // 1. Verify login let mut config = client::load_config()? .ok_or_else(|| anyhow::anyhow!("Not logged in. Run 'avocado connect auth login'"))?; @@ -41,7 +56,6 @@ impl ConnectInitCommand { // 2. Select organization let selected_org = if let Some(ref org_flag) = self.org { - // Non-interactive: use provided org me.organizations .iter() .find(|o| o.id == *org_flag) @@ -71,7 +85,6 @@ impl ConnectInitCommand { // 3. Ensure we have an org-scoped profile for the selected org (unless --profile was explicit) if self.profile.is_none() { if let Some((_, org_profile)) = config.find_profile_by_org(&selected_org.id) { - // Reuse existing org-scoped profile. print_info( &format!( "Using existing org-scoped profile for '{}'.", @@ -81,7 +94,6 @@ impl ConnectInitCommand { ); client = ConnectClient::from_profile(org_profile)?; } else { - // Create a new org-scoped token and profile. let hostname = std::env::var("HOSTNAME") .or_else(|_| std::env::var("COMPUTERNAME")) .unwrap_or_else(|_| "unknown".to_string()); @@ -97,7 +109,6 @@ impl ConnectInitCommand { .create_org_token(&selected_org.id, &token_name) .await?; - // Derive a profile name from the org name. let profile_name = selected_org.name.to_lowercase().replace(' ', "-"); let new_profile = Profile { api_url: initial_api_url.clone(), @@ -185,6 +196,12 @@ impl ConnectInitCommand { ); Some(cohort) } else if cohorts.len() > 1 { + if self.output.is_json() { + anyhow::bail!( + "--cohort is required when using --output json and multiple cohorts exist. Available: {}", + cohorts.iter().map(|c| format!("{} ({})", c.name, c.id)).collect::>().join(", ") + ); + } Some(prompt_select_cohort(&cohorts)?) } else { print_info( @@ -238,7 +255,8 @@ impl ConnectInitCommand { let overlay_path = config_dir.join(&overlay_dir); let config_toml_path = overlay_path.join("etc/avocado-conn/config.toml"); - if config_toml_path.exists() { + // In JSON mode, auto-overwrite existing config. In interactive mode, prompt. + if config_toml_path.exists() && !self.output.is_json() { print_warning( "Device already has connect configuration at:", OutputLevel::Normal, @@ -335,16 +353,28 @@ keepalive_secs = 30 OutputLevel::Normal, ); - // 13. Print summary - print_final_summary( - &selected_org, - &selected_project, - &selected_cohort, - &server_key.public_key_hex, - &server_key.keyid, - Some(&token_name), - &self.config_path, - ); + // 13. Print summary or emit JSON completion event + if self.output.is_json() { + emit_json_event(&json!({ + "event": "complete", + "org": selected_org.id, + "org_name": selected_org.name, + "project": selected_project.id, + "project_name": selected_project.name, + "cohort": selected_cohort.as_ref().map(|c| c.id.as_str()), + "claim_token": token_name, + })); + } else { + print_final_summary( + &selected_org, + &selected_project, + &selected_cohort, + &server_key.public_key_hex, + &server_key.keyid, + Some(&token_name), + &self.config_path, + ); + } Ok(()) } diff --git a/src/commands/connect/mod.rs b/src/commands/connect/mod.rs index b5228838..15be09d2 100644 --- a/src/commands/connect/mod.rs +++ b/src/commands/connect/mod.rs @@ -11,6 +11,7 @@ pub mod init; pub mod keys; pub mod orgs; pub mod projects; +pub mod runtimes; pub mod server_key; pub mod trust; pub mod upload; diff --git a/src/commands/connect/orgs.rs b/src/commands/connect/orgs.rs index 8618a72a..a30d2169 100644 --- a/src/commands/connect/orgs.rs +++ b/src/commands/connect/orgs.rs @@ -1,10 +1,13 @@ use anyhow::Result; +use serde_json::json; use crate::commands::connect::client::{self, ConnectClient}; use crate::utils::output::{print_info, OutputLevel}; +use crate::utils::output_format::{emit_json_object, OutputFormat}; pub struct ConnectOrgsListCommand { pub profile: Option, + pub output: OutputFormat, } impl ConnectOrgsListCommand { @@ -16,6 +19,17 @@ impl ConnectOrgsListCommand { let me = client.get_me_full().await?; + if self.output.is_json() { + emit_json_object(&json!({ + "organizations": me.organizations.iter().map(|o| json!({ + "id": o.id, + "name": o.name, + "role": o.role, + })).collect::>() + })); + return Ok(()); + } + if me.organizations.is_empty() { print_info("No organizations found.", OutputLevel::Normal); return Ok(()); diff --git a/src/commands/connect/projects.rs b/src/commands/connect/projects.rs index 761443cc..fd14e707 100644 --- a/src/commands/connect/projects.rs +++ b/src/commands/connect/projects.rs @@ -10,6 +10,7 @@ use crate::utils::output_format::{emit_json_object, OutputFormat}; pub struct ConnectProjectsListCommand { pub org: String, pub profile: Option, + pub output: OutputFormat, } impl ConnectProjectsListCommand { @@ -21,6 +22,16 @@ impl ConnectProjectsListCommand { let projects = client.list_projects(&self.org).await?; + if self.output.is_json() { + emit_json_object(&json!({ + "projects": projects.iter().map(|p| json!({ + "id": p.id, + "name": p.name, + })).collect::>() + })); + return Ok(()); + } + if projects.is_empty() { print_info( &format!("No projects found in org '{}'.", self.org), diff --git a/src/commands/connect/runtimes.rs b/src/commands/connect/runtimes.rs new file mode 100644 index 00000000..d52c8085 --- /dev/null +++ b/src/commands/connect/runtimes.rs @@ -0,0 +1,66 @@ +use anyhow::Result; +use serde_json::json; + +use crate::commands::connect::client::{self, ConnectClient}; +use crate::utils::output::{print_info, OutputLevel}; +use crate::utils::output_format::{emit_json_object, OutputFormat}; + +pub struct ConnectRuntimesListCommand { + pub org: String, + pub project: String, + pub profile: Option, + pub output: OutputFormat, +} + +impl ConnectRuntimesListCommand { + pub async fn execute(&self) -> Result<()> { + let config = client::load_config()? + .ok_or_else(|| anyhow::anyhow!("Not logged in. Run 'avocado connect auth login'"))?; + let (_, profile) = config.resolve_profile(self.profile.as_deref(), Some(&self.org))?; + let client = ConnectClient::from_profile(profile)?; + + let runtimes = client.list_runtimes(&self.org, &self.project).await?; + + if self.output.is_json() { + emit_json_object(&json!({ + "runtimes": runtimes.iter().map(|r| json!({ + "id": r.id, + "version": r.version, + "display_version": r.display_version, + "status": r.status, + })).collect::>() + })); + return Ok(()); + } + + if runtimes.is_empty() { + print_info("No runtimes found.", OutputLevel::Normal); + return Ok(()); + } + + let max_version = runtimes + .iter() + .map(|r| r.display_version.as_deref().unwrap_or(&r.version).len()) + .max() + .unwrap_or(0); + + println!( + "{:, pub deploy_tags: Vec, pub deploy_activate: bool, + pub output: OutputFormat, } struct ArtifactInfo { @@ -55,6 +57,8 @@ impl TaskPrerequisites for ConnectUploadCommand { impl ConnectUploadCommand { pub async fn execute(&self) -> Result<()> { + let _json_guard = self.output.is_json().then(JsonOutputGuard::enable); + // 0. Validate deploy-after-upload flags super::deploy::validate_deploy_flags( &self.deploy_cohort, @@ -341,7 +345,7 @@ impl ConnectUploadCommand { OutputLevel::Normal, ); - if self.publish { + let final_status = if self.publish { print_info("Publishing runtime...", OutputLevel::Normal); let published = connect .publish_runtime(&self.org, &self.project, &runtime.id) @@ -353,6 +357,18 @@ impl ConnectUploadCommand { ), OutputLevel::Normal, ); + published.status + } else { + runtime.status.clone() + }; + + if is_json_output_active() { + emit_json_event(&serde_json::json!({ + "event": "complete", + "runtime_id": runtime.id, + "version": runtime.version, + "status": final_status, + })); } if let Some(ref cohort_id) = self.deploy_cohort { @@ -411,7 +427,7 @@ impl ConnectUploadCommand { OutputLevel::Normal, ); - if self.publish { + let final_status = if self.publish { print_info("Publishing runtime...", OutputLevel::Normal); let published = connect .publish_runtime(&self.org, &self.project, &runtime.id) @@ -423,6 +439,18 @@ impl ConnectUploadCommand { ), OutputLevel::Normal, ); + published.status + } else { + result.status.clone() + }; + + if is_json_output_active() { + emit_json_event(&serde_json::json!({ + "event": "complete", + "runtime_id": runtime.id, + "version": result.version, + "status": final_status, + })); } if let Some(ref cohort_id) = self.deploy_cohort { @@ -560,6 +588,7 @@ impl ConnectUploadCommand { let volume_manager = VolumeManager::new(container.container_tool.clone(), false); let volume_state = volume_manager.get_or_create_volume(&container.cwd).await?; + let json_mode = is_json_output_active(); let multi = MultiProgress::new(); let mut all_completed = Vec::new(); @@ -575,14 +604,19 @@ impl ConnectUploadCommand { ) })?; - let pb = multi.add(ProgressBar::new(artifact.size_bytes)); - pb.set_style( - ProgressStyle::with_template( - " {msg} [{wide_bar:.cyan/blue}] {bytes}/{total_bytes} ({bytes_per_sec})", - )? - .progress_chars("#>-"), - ); - pb.set_message(artifact.name.clone()); + let pb = if json_mode { + ProgressBar::hidden() + } else { + let pb = multi.add(ProgressBar::new(artifact.size_bytes)); + pb.set_style( + ProgressStyle::with_template( + " {msg} [{wide_bar:.cyan/blue}] {bytes}/{total_bytes} ({bytes_per_sec})", + )? + .progress_chars("#>-"), + ); + pb.set_message(artifact.name.clone()); + pb + }; // Convert container path (/opt/_avocado/...) to volume-relative path let volume_path = artifact @@ -740,7 +774,16 @@ impl ConnectUploadCommand { } }; - pb.finish_with_message(format!("{} (done)", artifact.name)); + if json_mode { + emit_json_event(&serde_json::json!({ + "event": "artifact_uploaded", + "artifact": artifact.name, + "image_id": artifact.image_id, + "bytes": artifact.size_bytes, + })); + } else { + pb.finish_with_message(format!("{} (done)", artifact.name)); + } // Ensure container exits cleanly let status = child.wait().await?; @@ -1002,6 +1045,7 @@ async fn upload_artifacts( OutputLevel::Normal, ); + let json_mode = is_json_output_active(); let multi = MultiProgress::new(); for spec in &to_upload { @@ -1010,14 +1054,19 @@ async fn upload_artifacts( .find(|a| a.image_id == spec.image_id) .with_context(|| format!("API returned unknown image_id '{}'", spec.image_id))?; - let pb = multi.add(ProgressBar::new(artifact.size_bytes)); - pb.set_style( - ProgressStyle::with_template( - " {msg} [{wide_bar:.cyan/blue}] {bytes}/{total_bytes} ({bytes_per_sec})", - )? - .progress_chars("#>-"), - ); - pb.set_message(format!("{}...", &spec.image_id[..8])); + let pb = if json_mode { + ProgressBar::hidden() + } else { + let pb = multi.add(ProgressBar::new(artifact.size_bytes)); + pb.set_style( + ProgressStyle::with_template( + " {msg} [{wide_bar:.cyan/blue}] {bytes}/{total_bytes} ({bytes_per_sec})", + )? + .progress_chars("#>-"), + ); + pb.set_message(format!("{}...", &spec.image_id[..8])); + pb + }; let mut file = tokio::fs::File::open(&artifact.path).await?; let mut completed_parts = Vec::new(); @@ -1118,7 +1167,15 @@ async fn upload_artifacts( pb.set_position(std::cmp::min(offset + PART_SIZE, artifact.size_bytes)); } - pb.finish_with_message(format!("{}... (done)", &spec.image_id[..8])); + if json_mode { + emit_json_event(&serde_json::json!({ + "event": "artifact_uploaded", + "image_id": spec.image_id, + "bytes": artifact.size_bytes, + })); + } else { + pb.finish_with_message(format!("{}... (done)", &spec.image_id[..8])); + } all_completed.push(BlobParts { image_id: spec.image_id.clone(), diff --git a/src/commands/runtime/deploy.rs b/src/commands/runtime/deploy.rs index 618bd11c..b678dc80 100644 --- a/src/commands/runtime/deploy.rs +++ b/src/commands/runtime/deploy.rs @@ -482,22 +482,27 @@ impl RuntimeDeployCommand { .and_then(|s| s.parse().ok()) .unwrap_or(DEFAULT_DEPLOY_REPO_PORT); let mut container_args = config.merge_sdk_container_args(self.container_args.as_ref()); - // If the SDK container runs with host networking, the published-port - // (`-p`) trick is both unnecessary and discarded by docker, so the - // shim skips it (see prepare_mac_deploy_net). + // Whether the SDK container requests host networking. On avocado-vm the + // published-port (`-p`) trick is unnecessary and discarded by docker, + // so the shim skips it; on Docker Desktop host networking traps the + // repo port in the LinuxKit VM, so the shim strips it instead (see + // prepare_mac_deploy_net). let host_net = container_args .as_deref() - .map(|args| { - args.iter() - .any(|a| a == "--network=host" || a == "--net=host") - }) + .map(args_have_host_network) .unwrap_or(false); let MacDeployNet { env: mac_env, container_args: mac_args, + strip_host_net, forward, } = prepare_mac_deploy_net(&self.device, repo_port, host_net, self.verbose).await; env_vars.extend(mac_env); + if strip_host_net { + if let Some(args) = container_args.as_mut() { + strip_host_network_args(args); + } + } if !mac_args.is_empty() { container_args.get_or_insert_with(Vec::new).extend(mac_args); } @@ -785,10 +790,53 @@ struct MacDeployNet { env: HashMap, /// Extra `docker run` args (publish the repo port: `-p :`). container_args: Vec, + /// Strip `--network=host` from the merged container args before running. + /// Docker Desktop traps a host-networked container's ports inside the + /// LinuxKit VM (unreachable from the Mac/LAN), and `-p` publishing is + /// incompatible with host networking — so the deploy container must run on + /// its own bridge netns. The caller applies this to the merged args. + strip_host_net: bool, /// A qemu slirp forward to tear down afterward (avocado-vm only). forward: Option, } +/// True if `args` request host networking, in either the `--network=host` / +/// `--net=host` form or the space-separated `--network host` / `--net host` +/// form. +fn args_have_host_network(args: &[String]) -> bool { + args.iter().enumerate().any(|(i, a)| { + a == "--network=host" + || a == "--net=host" + || ((a == "--network" || a == "--net") + && args.get(i + 1).map(|v| v == "host").unwrap_or(false)) + }) +} + +/// Remove host-networking flags from container args so a published port (`-p`) +/// takes effect. Handles both the `--network=host` / `--net=host` and +/// space-separated `--network host` / `--net host` forms. See +/// [`args_have_host_network`] and `prepare_mac_deploy_net`. +fn strip_host_network_args(args: &mut Vec) { + let mut out = Vec::with_capacity(args.len()); + let mut i = 0; + while i < args.len() { + let a = &args[i]; + if a == "--network=host" || a == "--net=host" { + i += 1; + continue; + } + if (a == "--network" || a == "--net") + && args.get(i + 1).map(|v| v == "host").unwrap_or(false) + { + i += 2; + continue; + } + out.push(a.clone()); + i += 1; + } + *args = out; +} + /// A qemu slirp host-forward opened for the duration of one deploy. Teardown /// is async, so it's closed explicitly by the caller rather than via `Drop`. struct OpenForward { @@ -821,6 +869,7 @@ async fn prepare_mac_deploy_net( let mut net = MacDeployNet { env: HashMap::new(), container_args: Vec::new(), + strip_host_net: false, forward: None, }; @@ -830,14 +879,29 @@ async fn prepare_mac_deploy_net( return net; } - // Publish the repo port out of the container's netns — but only when it - // has its own. With `--network=host` the container already shares the - // VM's network (so the qemu hostfwd below reaches `:{port}` directly), - // and docker discards `-p` with a "Published ports are discarded when - // using host network mode" warning, so skip it. - if !host_net { + let vm_routing = is_vm_routing_active(); + + // Expose the in-container TUF repo server to the LAN device. The two macOS + // backends need different bridges: + // + // * Docker Desktop (no avocado-vm): vpnkit only forwards *published* + // (`-p`) ports to the Mac, and `-p` is incompatible with host + // networking — a host-networked container's ports stay trapped in the + // LinuxKit VM, unreachable from the Mac/LAN (the "Connection refused" + // people hit). So force the deploy container onto its own bridge netns + // (strip `--network=host`) and publish the port on 0.0.0.0. Deploy only + // needs outbound SSH + inbound HTTP, both of which work on a bridge. + // + // * avocado-vm: the container stays host-networked *inside qemu* and the + // qemu slirp hostfwd below bridges the port to the Mac; docker discards + // `-p` under host networking, so only publish when it has its own netns. + let publish = if vm_routing { !host_net } else { true }; + if !vm_routing && host_net { + net.strip_host_net = true; + } + if publish { net.container_args.push("-p".to_string()); - net.container_args.push(format!("{port}:{port}")); + net.container_args.push(format!("0.0.0.0:{port}:{port}")); } // Both Mac contexts: tell the device the host LAN IP it can reach us on, @@ -870,7 +934,7 @@ async fn prepare_mac_deploy_net( // avocado-vm only: open a slirp hostfwd so the LAN device can reach the // published port through qemu. Docker Desktop forwards `-p` to the host // itself (vpnkit), so it needs no qemu step. - if is_vm_routing_active() { + if vm_routing { let sock = match crate::utils::vm::state::VmPaths::resolve() { Ok(p) => p.qmp_socket(), Err(e) => { @@ -1284,4 +1348,61 @@ mod tests { assert!(script.contains("trap cleanup EXIT")); assert!(script.contains("kill \"$HTTP_PID\"")); } + + // --- host-networking detection / stripping (Docker Desktop deploy) --- + + fn sv(args: &[&str]) -> Vec { + args.iter().map(|s| s.to_string()).collect() + } + + #[test] + fn test_args_have_host_network_eq_form() { + assert!(args_have_host_network(&sv(&["--privileged", "--network=host"]))); + assert!(args_have_host_network(&sv(&["--net=host"]))); + } + + #[test] + fn test_args_have_host_network_space_form() { + assert!(args_have_host_network(&sv(&["--network", "host"]))); + assert!(args_have_host_network(&sv(&["--net", "host"]))); + } + + #[test] + fn test_args_have_host_network_none() { + assert!(!args_have_host_network(&sv(&["--privileged"]))); + // `--network` followed by something other than host is not host networking. + assert!(!args_have_host_network(&sv(&["--network", "bridge"]))); + assert!(!args_have_host_network(&sv(&[]))); + } + + #[test] + fn test_strip_host_network_eq_form() { + let mut args = sv(&["--privileged", "--network=host", "-v", "/x:/y"]); + strip_host_network_args(&mut args); + assert_eq!(args, sv(&["--privileged", "-v", "/x:/y"])); + assert!(!args_have_host_network(&args)); + } + + #[test] + fn test_strip_host_network_space_form() { + let mut args = sv(&["--net", "host", "--privileged"]); + strip_host_network_args(&mut args); + assert_eq!(args, sv(&["--privileged"])); + } + + #[test] + fn test_strip_host_network_preserves_non_host_network() { + // A non-host `--network ` must survive untouched. + let mut args = sv(&["--network", "mynet", "--privileged"]); + strip_host_network_args(&mut args); + assert_eq!(args, sv(&["--network", "mynet", "--privileged"])); + } + + #[test] + fn test_strip_host_network_noop_when_absent() { + let mut args = sv(&["--privileged", "-p", "8585:8585"]); + let before = args.clone(); + strip_host_network_args(&mut args); + assert_eq!(args, before); + } } diff --git a/src/main.rs b/src/main.rs index cd09b625..4125b889 100644 --- a/src/main.rs +++ b/src/main.rs @@ -43,6 +43,7 @@ use commands::connect::server_key::ConnectServerKeyCommand; use commands::connect::trust::{ ConnectTrustPromoteRootCommand, ConnectTrustRotateServerKeyCommand, ConnectTrustStatusCommand, }; +use commands::connect::runtimes::ConnectRuntimesListCommand; use commands::connect::upload::ConnectUploadCommand; use commands::ext::{ ExtBuildCommand, ExtCheckoutCommand, ExtCleanCommand, ExtDepsCommand, ExtDnfCommand, @@ -598,6 +599,9 @@ enum ConnectCommands { /// Profile name (defaults to the active default profile) #[arg(long)] profile: Option, + /// Output format (human prose or NDJSON event stream) + #[arg(long, value_enum, default_value_t = crate::utils::output_format::OutputFormat::Human)] + output: crate::utils::output_format::OutputFormat, }, /// Remove connect configuration (connect section, connect-config extension, and device config overlay) Clean { @@ -607,6 +611,9 @@ enum ConnectCommands { /// Path to avocado.yaml configuration file #[arg(short = 'C', long, default_value = "avocado.yaml")] config: String, + /// Output format (human prose or single JSON object) + #[arg(long, value_enum, default_value_t = crate::utils::output_format::OutputFormat::Human)] + output: crate::utils::output_format::OutputFormat, }, /// Manage organizations Orgs { @@ -633,6 +640,11 @@ enum ConnectCommands { #[command(subcommand)] command: ConnectCohortsCommands, }, + /// List uploaded runtimes on the Connect platform + Runtimes { + #[command(subcommand)] + command: ConnectRuntimesCommands, + }, /// Manage claim tokens ClaimTokens { #[command(subcommand)] @@ -681,6 +693,9 @@ enum ConnectCommands { /// Deploy after upload: activate immediately (skip draft) #[arg(long)] deploy_activate: bool, + /// Output format (human prose or NDJSON event stream) + #[arg(long, value_enum, default_value_t = crate::utils::output_format::OutputFormat::Human)] + output: crate::utils::output_format::OutputFormat, }, /// Deploy a runtime to a cohort Deploy { @@ -714,6 +729,9 @@ enum ConnectCommands { /// Profile name (defaults to the active default profile) #[arg(long)] profile: Option, + /// Output format (human prose or NDJSON event stream) + #[arg(long, value_enum, default_value_t = crate::utils::output_format::OutputFormat::Human)] + output: crate::utils::output_format::OutputFormat, }, /// Retrieve the Connect server's TUF signing public key ServerKey { @@ -857,6 +875,9 @@ enum ConnectOrgsCommands { /// Profile name (defaults to the active default profile) #[arg(long)] profile: Option, + /// Output format (human prose or single JSON object) + #[arg(long, value_enum, default_value_t = crate::utils::output_format::OutputFormat::Human)] + output: crate::utils::output_format::OutputFormat, }, } @@ -941,6 +962,9 @@ enum ConnectProjectsCommands { /// Profile name (defaults to the active default profile) #[arg(long)] profile: Option, + /// Output format (human prose or single JSON object) + #[arg(long, value_enum, default_value_t = crate::utils::output_format::OutputFormat::Human)] + output: crate::utils::output_format::OutputFormat, }, /// Create a new project Create { @@ -1177,6 +1201,9 @@ enum ConnectCohortsCommands { /// Profile name (defaults to the active default profile) #[arg(long)] profile: Option, + /// Output format (human prose or single JSON object) + #[arg(long, value_enum, default_value_t = crate::utils::output_format::OutputFormat::Human)] + output: crate::utils::output_format::OutputFormat, }, /// Create a new cohort Create { @@ -1222,6 +1249,28 @@ enum ConnectCohortsCommands { }, } +#[derive(Subcommand)] +enum ConnectRuntimesCommands { + /// List runtimes uploaded to the Connect platform + List { + /// Organization ID (or set connect.org in avocado.yaml) + #[arg(long)] + org: Option, + /// Project ID (or set connect.project in avocado.yaml) + #[arg(long)] + project: Option, + /// Path to avocado.yaml configuration file + #[arg(short = 'C', long, default_value = "avocado.yaml")] + config: String, + /// Profile name (defaults to the active default profile) + #[arg(long)] + profile: Option, + /// Output format (human prose or single JSON object) + #[arg(long, value_enum, default_value_t = crate::utils::output_format::OutputFormat::Human)] + output: crate::utils::output_format::OutputFormat, + }, +} + #[derive(Subcommand)] enum ConnectClaimTokensCommands { /// List claim tokens in an organization @@ -3394,6 +3443,7 @@ async fn main() -> Result<()> { runtime, config, profile, + output, } => { let cmd = ConnectInitCommand { org, @@ -3402,21 +3452,23 @@ async fn main() -> Result<()> { runtime, config_path: config, profile, + output, }; cmd.execute().await?; Ok(()) } - ConnectCommands::Clean { runtime, config } => { + ConnectCommands::Clean { runtime, config, output } => { let cmd = ConnectCleanCommand { runtime, config_path: config, + output, }; cmd.execute()?; Ok(()) } ConnectCommands::Orgs { command } => match command { - ConnectOrgsCommands::List { profile } => { - let cmd = ConnectOrgsListCommand { profile }; + ConnectOrgsCommands::List { profile, output } => { + let cmd = ConnectOrgsListCommand { profile, output }; cmd.execute().await?; Ok(()) } @@ -3487,6 +3539,7 @@ async fn main() -> Result<()> { org, config, profile, + output, } => { let profile_org = commands::connect::profile_organization_id(profile.as_deref())?; @@ -3494,6 +3547,7 @@ async fn main() -> Result<()> { let cmd = ConnectProjectsListCommand { org: resolved_org, profile, + output, }; cmd.execute().await?; Ok(()) @@ -3684,6 +3738,7 @@ async fn main() -> Result<()> { project, config, profile, + output, } => { let profile_org = commands::connect::profile_organization_id(profile.as_deref())?; @@ -3698,6 +3753,7 @@ async fn main() -> Result<()> { org: resolved_org, project: resolved_project, profile, + output, }; cmd.execute().await?; Ok(()) @@ -3757,6 +3813,33 @@ async fn main() -> Result<()> { Ok(()) } }, + ConnectCommands::Runtimes { command } => match command { + ConnectRuntimesCommands::List { + org, + project, + config, + profile, + output, + } => { + let profile_org = + commands::connect::profile_organization_id(profile.as_deref())?; + let (resolved_org, resolved_project) = + commands::connect::resolve_org_and_project( + org, + project, + &config, + profile_org, + )?; + let cmd = ConnectRuntimesListCommand { + org: resolved_org, + project: resolved_project, + profile, + output, + }; + cmd.execute().await?; + Ok(()) + } + }, ConnectCommands::ClaimTokens { command } => match command { ConnectClaimTokensCommands::List { org, @@ -3835,6 +3918,7 @@ async fn main() -> Result<()> { deploy_name, deploy_tag, deploy_activate, + output, } => { let profile_org = commands::connect::profile_organization_id(profile.as_deref())?; let (resolved_org, resolved_project) = commands::connect::resolve_org_and_project( @@ -3859,6 +3943,7 @@ async fn main() -> Result<()> { deploy_name, deploy_tags: deploy_tag, deploy_activate, + output, }; cmd.execute().await?; Ok(()) @@ -3874,6 +3959,7 @@ async fn main() -> Result<()> { activate, config, profile, + output, } => { let profile_org = commands::connect::profile_organization_id(profile.as_deref())?; let (resolved_org, resolved_project) = @@ -3888,6 +3974,7 @@ async fn main() -> Result<()> { tags: tag, activate, profile, + output, }; cmd.execute().await?; Ok(()) From dd5a351c220e1f15d334f81a22863e9a5dd869cb Mon Sep 17 00:00:00 2001 From: nicksinas Date: Wed, 10 Jun 2026 16:35:30 -0500 Subject: [PATCH 26/30] Tested and working support for connect integration in desktop. Added emits and adjusted output format. --- src/commands/connect/deploy.rs | 66 +++++++++++-- src/commands/connect/upload.rs | 163 ++++++++++++++++++++++----------- src/main.rs | 38 +++++++- src/utils/output_format.rs | 40 ++++++++ 4 files changed, 247 insertions(+), 60 deletions(-) diff --git a/src/commands/connect/deploy.rs b/src/commands/connect/deploy.rs index 37e5566c..c43cf882 100644 --- a/src/commands/connect/deploy.rs +++ b/src/commands/connect/deploy.rs @@ -8,9 +8,32 @@ use crate::commands::connect::client::{ }; use crate::utils::output::{print_info, print_success, print_warning, OutputLevel}; use crate::utils::output_format::{ - emit_json_event, is_json_output_active, JsonOutputGuard, OutputFormat, + emit_json_event, emit_step, emit_step_error, emit_task_registered, is_json_output_active, + JsonOutputGuard, OutputFormat, }; +// Step names for the desktop per-step strip. +const PHASE_RESOLVE: &str = "resolve"; +const PHASE_CREATE: &str = "create-deployment"; +const PHASE_ACTIVATE: &str = "activate"; + +/// Run a phase, emitting `running` → `success`/`failed` (+ `step_error`) so the +/// desktop strip tracks fleet-deploy progress like build/install. +async fn run_phase(name: &str, fut: impl std::future::Future>) -> Result { + emit_step(name, "running"); + match fut.await { + Ok(v) => { + emit_step(name, "success"); + Ok(v) + } + Err(e) => { + emit_step_error(name, &format!("{e:#}")); + emit_step(name, "failed"); + Err(e) + } + } +} + pub struct ConnectDeployCommand { pub org: String, pub project: String, @@ -36,14 +59,32 @@ impl ConnectDeployCommand { } let _json_guard = self.output.is_json().then(JsonOutputGuard::enable); + let result = self.execute_inner().await; + if let Err(e) = &result { + if is_json_output_active() { + emit_json_event(&json!({ "event": "error", "message": format!("{e:#}") })); + } + } + result + } + + async fn execute_inner(&self) -> Result<()> { + // Register steps up front so the desktop shows the full list. + emit_task_registered(PHASE_RESOLVE, "Resolve runtime + cohort"); + emit_task_registered(PHASE_CREATE, "Create deployment"); + emit_task_registered(PHASE_ACTIVATE, "Activate deployment"); let config = client::load_config()? .ok_or_else(|| anyhow::anyhow!("Not logged in. Run 'avocado connect auth login'"))?; let (_, profile) = config.resolve_profile(self.profile.as_deref(), Some(&self.org))?; let client = ConnectClient::from_profile(profile)?; - let selected_runtime = self.resolve_runtime(&client).await?; - let selected_cohort = self.resolve_cohort(&client).await?; + let (selected_runtime, selected_cohort) = run_phase(PHASE_RESOLVE, async { + let r = self.resolve_runtime(&client).await?; + let c = self.resolve_cohort(&client).await?; + Ok((r, c)) + }) + .await?; let version_display = selected_runtime .display_version @@ -69,18 +110,28 @@ impl ConnectDeployCommand { }, }; - let deployment = client - .create_deployment(&self.org, &self.project, &req) - .await?; + let deployment = run_phase( + PHASE_CREATE, + client.create_deployment(&self.org, &self.project, &req), + ) + .await?; let final_status = if self.activate { print_info("Activating deployment...", OutputLevel::Normal); + emit_step(PHASE_ACTIVATE, "running"); match client .activate_deployment(&self.org, &self.project, &deployment.id) .await { - Ok(activated) => activated.status, + Ok(activated) => { + emit_step(PHASE_ACTIVATE, "success"); + activated.status + } Err(e) => { + // Activation failure is non-fatal: the deployment exists in + // draft. Surface it as a step error but don't fail the run. + emit_step_error(PHASE_ACTIVATE, &format!("{e}")); + emit_step(PHASE_ACTIVATE, "failed"); print_warning( &format!( "Deployment created but activation failed: {e}\n \ @@ -92,6 +143,7 @@ impl ConnectDeployCommand { } } } else { + emit_step(PHASE_ACTIVATE, "skipped"); deployment.status.clone() }; diff --git a/src/commands/connect/upload.rs b/src/commands/connect/upload.rs index 7fba1e18..fb294356 100644 --- a/src/commands/connect/upload.rs +++ b/src/commands/connect/upload.rs @@ -11,19 +11,62 @@ use crate::commands::connect::client::{ }; use crate::utils::config::{load_config, Config}; use crate::utils::container::{RunConfig, SdkContainer}; -use crate::utils::output::{print_info, print_success, print_warning, OutputLevel}; -use crate::utils::output_format::{emit_json_event, is_json_output_active, JsonOutputGuard, OutputFormat}; +use crate::utils::output::{print_success, print_warning, tui_is_active, OutputLevel}; +use crate::utils::output_format::{ + emit_json_event, emit_step, emit_step_error, emit_task_registered, is_json_output_active, + JsonOutputGuard, OutputFormat, +}; use crate::utils::prerequisites::{check_prerequisites, TaskPrerequisites}; use crate::utils::stamps::StampRequirement; use crate::utils::target::resolve_target_required; const PART_SIZE: u64 = 52_428_800; // 50 MiB, matching API's @part_size +/// Progress line for the upload flow. In `--output json` mode (which the +/// desktop uses), `print_info` is suppressed, so emit an `output` event the +/// desktop appends to the run's terminal instead. Outside JSON mode this +/// mirrors `print_info` exactly. +fn progress(msg: &str, _level: OutputLevel) { + if is_json_output_active() { + emit_json_event(&serde_json::json!({ "event": "output", "line": msg })); + } else if !tui_is_active() { + println!("\x1b[94m[INFO]\x1b[0m {msg}"); + } +} + +// Step names for the desktop per-step strip (container upload path). +const PHASE_PREREQ: &str = "prerequisites"; +const PHASE_DISCOVER: &str = "discover"; +const PHASE_CREATE: &str = "create-runtime"; +const PHASE_UPLOAD: &str = "upload-artifacts"; +const PHASE_FINALIZE: &str = "finalize"; + +/// Run one phase of the upload, emitting `running` → `success`/`failed` step +/// events around it (and a `step_error` with the reason on failure) so the +/// desktop strip tracks progress like build/install. +async fn run_phase( + name: &str, + fut: impl std::future::Future>, +) -> Result { + emit_step(name, "running"); + match fut.await { + Ok(v) => { + emit_step(name, "success"); + Ok(v) + } + Err(e) => { + emit_step_error(name, &format!("{e:#}")); + emit_step(name, "failed"); + Err(e) + } + } +} + pub struct ConnectUploadCommand { pub org: String, pub project: String, pub runtime: String, - pub version: Option, + pub version: String, pub description: Option, pub config_path: String, pub target: Option, @@ -57,8 +100,26 @@ impl TaskPrerequisites for ConnectUploadCommand { impl ConnectUploadCommand { pub async fn execute(&self) -> Result<()> { + // Guard stays alive across the error-emit below so the event lands + // on the JSON stream the desktop is parsing. let _json_guard = self.output.is_json().then(JsonOutputGuard::enable); + let result = self.execute_inner().await; + if let Err(e) = &result { + // `print_info` is suppressed in JSON mode and the command's only + // other signal is the process exit code — emit the failure reason + // as an `error` event so the desktop surfaces it instead of a bare + // "avocado exited 1". + if is_json_output_active() { + emit_json_event(&serde_json::json!({ + "event": "error", + "message": format!("{e:#}"), + })); + } + } + result + } + async fn execute_inner(&self) -> Result<()> { // 0. Validate deploy-after-upload flags super::deploy::validate_deploy_flags( &self.deploy_cohort, @@ -96,20 +157,17 @@ impl ConnectUploadCommand { let (artifacts_dir, _tmp_dir) = self.get_artifacts_dir().await?; let manifest = read_manifest(&artifacts_dir)?; - let version = self - .version - .clone() - .unwrap_or_else(|| format_version_from_manifest(&manifest)); + let version = self.version.clone(); let build_id = manifest .get("id") .and_then(|v| v.as_str()) .map(|s| s.to_string()); - print_info("Discovering artifacts...", OutputLevel::Normal); + progress("Discovering artifacts...", OutputLevel::Normal); let artifact_infos = discover_artifacts(&artifacts_dir, &manifest)?; for info in &artifact_infos { - print_info( + progress( &format!(" {} ({})", info.image_id, format_bytes(info.size_bytes)), OutputLevel::Normal, ); @@ -181,22 +239,30 @@ impl ConnectUploadCommand { connect: &ConnectClient, project_config: &Config, ) -> Result<()> { + // Register the steps up front so the desktop shows the full list. + emit_task_registered(PHASE_PREREQ, "Verify build prerequisites"); + emit_task_registered(PHASE_DISCOVER, "Discover artifacts"); + emit_task_registered(PHASE_CREATE, "Create runtime"); + emit_task_registered(PHASE_UPLOAD, "Upload artifacts"); + emit_task_registered(PHASE_FINALIZE, "Finalize"); + // Validate prerequisites (runtime has been built) let target = resolve_target_required(self.target.as_deref(), project_config)?; let container_image = project_config .get_sdk_image() .context("No SDK container image specified in configuration")?; let container = SdkContainer::from_config(&self.config_path, project_config)?; - check_prerequisites(self, &target, &container, container_image).await?; + run_phase( + PHASE_PREREQ, + check_prerequisites(self, &target, &container, container_image), + ) + .await?; // Phase A: Discover artifacts inside the container - let discovery = self.discover_in_container(project_config).await?; + let discovery = run_phase(PHASE_DISCOVER, self.discover_in_container(project_config)).await?; let manifest = &discovery.manifest; - let version = self - .version - .clone() - .unwrap_or_else(|| format_version_from_manifest(manifest)); + let version = self.version.clone(); let build_id = manifest .get("id") .and_then(|v| v.as_str()) @@ -220,8 +286,9 @@ impl ConnectUploadCommand { }; // Phase B: Create runtime via API - let (runtime, num_artifacts) = self - .create_runtime_api( + let (runtime, num_artifacts) = run_phase( + PHASE_CREATE, + self.create_runtime_api( connect, version, build_id, @@ -238,23 +305,33 @@ impl ConnectUploadCommand { }) .collect::>(), delegation_refs, - ) - .await?; + ), + ) + .await?; if runtime.status == "draft" { + // Nothing to upload (all artifacts already present) — mark the + // upload/finalize steps skipped so the strip doesn't look stuck. + emit_step(PHASE_UPLOAD, "skipped"); + emit_step(PHASE_FINALIZE, "skipped"); self.handle_draft_status(connect, &runtime, num_artifacts) .await?; return Ok(()); } // Phase C: Stream artifacts from Docker volume and upload with progress - let completed_parts = self - .upload_in_container(project_config, connect, &runtime.artifacts, &discovery) - .await?; + let completed_parts = run_phase( + PHASE_UPLOAD, + self.upload_in_container(project_config, connect, &runtime.artifacts, &discovery), + ) + .await?; // Phase D: Complete and finalize - self.complete_and_finalize(connect, &runtime, completed_parts, num_artifacts) - .await + run_phase( + PHASE_FINALIZE, + self.complete_and_finalize(connect, &runtime, completed_parts, num_artifacts), + ) + .await } /// Create a runtime via the Connect API. Returns the runtime data and @@ -268,7 +345,7 @@ impl ConnectUploadCommand { artifacts: &[ArtifactParam], delegation: Option<(&String, &String, &String)>, ) -> Result<(client::RuntimeCreateData, usize)> { - print_info( + progress( &format!("Creating runtime {version}..."), OutputLevel::Normal, ); @@ -346,7 +423,7 @@ impl ConnectUploadCommand { ); let final_status = if self.publish { - print_info("Publishing runtime...", OutputLevel::Normal); + progress("Publishing runtime...", OutputLevel::Normal); let published = connect .publish_runtime(&self.org, &self.project, &runtime.id) .await?; @@ -407,7 +484,7 @@ impl ConnectUploadCommand { return Ok(()); } - print_info("Completing upload...", OutputLevel::Normal); + progress("Completing upload...", OutputLevel::Normal); let result = connect .complete_runtime( &self.org, @@ -428,7 +505,7 @@ impl ConnectUploadCommand { ); let final_status = if self.publish { - print_info("Publishing runtime...", OutputLevel::Normal); + progress("Publishing runtime...", OutputLevel::Normal); let published = connect .publish_runtime(&self.org, &self.project, &runtime.id) .await?; @@ -501,7 +578,7 @@ impl ConnectUploadCommand { .context("No SDK container image specified in configuration")?; let container = SdkContainer::from_config(&self.config_path, config)?; - print_info( + progress( "Discovering artifacts in build volume...", OutputLevel::Normal, ); @@ -531,7 +608,7 @@ impl ConnectUploadCommand { })?; for info in &result.artifacts { - print_info( + progress( &format!(" {} ({})", info.name, format_bytes(info.size_bytes)), OutputLevel::Normal, ); @@ -561,12 +638,12 @@ impl ConnectUploadCommand { .collect(); if to_upload.is_empty() { - print_info("All artifact(s) already uploaded.", OutputLevel::Normal); + progress("All artifact(s) already uploaded.", OutputLevel::Normal); return Ok(Vec::new()); } let skipped = upload_specs.len() - to_upload.len(); - print_info( + progress( &format!( "Uploading {} artifact(s){}...", to_upload.len(), @@ -855,22 +932,6 @@ fn read_manifest(dir: &Path) -> Result { ) } -fn format_version_from_manifest(manifest: &serde_json::Value) -> String { - if let Some(runtime) = manifest.get("runtime") { - if let (Some(name), Some(ver)) = ( - runtime.get("name").and_then(|v| v.as_str()), - runtime.get("version").and_then(|v| v.as_str()), - ) { - return format!("{name}-{ver}"); - } - } - manifest - .get("id") - .and_then(|v| v.as_str()) - .unwrap_or("unknown") - .to_string() -} - // --------------------------------------------------------------------------- // Artifact discovery (manifest-driven, with SHA256 hashing for TUF) // --------------------------------------------------------------------------- @@ -1012,7 +1073,7 @@ async fn upload_artifacts( for spec in upload_specs { if spec.parts.is_empty() { - print_info( + progress( &format!(" {} (already uploaded, skipping)", spec.image_id), OutputLevel::Normal, ); @@ -1024,7 +1085,7 @@ async fn upload_artifacts( if to_upload.is_empty() { if skipped > 0 { - print_info( + progress( &format!("All {skipped} artifact(s) already uploaded."), OutputLevel::Normal, ); @@ -1032,7 +1093,7 @@ async fn upload_artifacts( return Ok(all_completed); } - print_info( + progress( &format!( "Uploading {} artifact(s){}...", to_upload.len(), diff --git a/src/main.rs b/src/main.rs index 4125b889..4ed05ca4 100644 --- a/src/main.rs +++ b/src/main.rs @@ -660,9 +660,9 @@ enum ConnectCommands { project: Option, /// Runtime name to upload runtime: String, - /// Version string (defaults to runtime name-version from manifest) + /// Human-readable version for this upload (e.g. v0.0.2-dev) #[arg(long)] - version: Option, + version: String, /// Description for the upload #[arg(long)] description: Option, @@ -1964,6 +1964,13 @@ fn needs_vm_routing(cmd: &Commands) -> bool { | Commands::Clean { .. } | Commands::Sign { .. } | Commands::Deploy { .. } + // `connect upload` runs a prerequisite stamp check + in-container + // artifact discovery, both of which need Docker. The other connect + // subcommands are pure Connect-API calls, so they stay off the VM + // (routing can auto-start it) — gate on the subcommand, not the group. + | Commands::Connect { + command: ConnectCommands::Upload { .. } + } ) } @@ -4880,4 +4887,31 @@ mod tests { let result = build_env_vars(None, Some(&empty_vec)); assert_eq!(result, None); } + + /// `connect upload` talks to Docker (prerequisite stamp check + + /// in-container artifact discovery), so it must route to the avocado-vm. + /// The other `connect` subcommands are pure Connect-API calls and must + /// NOT route — routing can auto-start the VM, which is wrong for e.g. + /// `connect auth`/`deploy`. Pins the Docker-socket fix against regressions + /// (dropping the upload arm, or over-broadly routing all of `Connect`). + #[test] + fn needs_vm_routing_gates_connect_upload_only() { + let cmd = |args: &[&str]| Cli::try_parse_from(args).expect("args should parse").command; + + // connect upload → routes + assert!(needs_vm_routing(&cmd(&[ + "avocado", "connect", "upload", "dev", "--version", "v0.0.1", + ]))); + + // other connect subcommands → do NOT route + assert!(!needs_vm_routing(&cmd(&[ + "avocado", "connect", "deploy", "--runtime", "r", "--cohort", "c", + ]))); + assert!(!needs_vm_routing(&cmd(&["avocado", "connect", "clean"]))); + + // sanity: an existing docker command still routes + assert!(needs_vm_routing(&cmd(&[ + "avocado", "build", "--target", "qemux86-64", + ]))); + } } diff --git a/src/utils/output_format.rs b/src/utils/output_format.rs index 64f8ab5c..3dcf7297 100644 --- a/src/utils/output_format.rs +++ b/src/utils/output_format.rs @@ -55,6 +55,46 @@ pub fn emit_json_object(value: &serde_json::Value) { emit_json_event(value) } +// --------------------------------------------------------------------------- +// Step-event helpers — the same NDJSON vocabulary the task renderer emits for +// `build`/`install` (`task_registered` / `step` / `step_error`), so the +// desktop's per-step strip works identically for imperative commands +// (`connect upload`, `connect deploy`) that don't run the task scheduler. +// All are no-ops outside JSON output mode. + +/// Register a step up front so the desktop shows it (pending) before it runs. +pub fn emit_task_registered(name: &str, label: &str) { + if is_json_output_active() { + emit_json_event(&serde_json::json!({ + "event": "task_registered", + "name": name, + "label": label, + })); + } +} + +/// Transition a step to `pending` / `running` / `success` / `failed` / `skipped`. +pub fn emit_step(name: &str, status: &str) { + if is_json_output_active() { + emit_json_event(&serde_json::json!({ + "event": "step", + "name": name, + "status": status, + })); + } +} + +/// Attach an error message to a step so the desktop can show it inline. +pub fn emit_step_error(name: &str, message: &str) { + if is_json_output_active() { + emit_json_event(&serde_json::json!({ + "event": "step_error", + "name": name, + "message": message, + })); + } +} + // --------------------------------------------------------------------------- // Process-wide "JSON output active" flag. // From 3b4dc8c1929bff593d2f2d64172ac5933a125182 Mon Sep 17 00:00:00 2001 From: Justin Schneck Date: Fri, 12 Jun 2026 18:10:52 -0400 Subject: [PATCH 27/30] style: apply rustfmt to connect upload/deploy and main --- src/commands/connect/upload.rs | 8 +++----- src/commands/runtime/deploy.rs | 5 ++++- src/main.rs | 34 ++++++++++++++++++++++++++++------ 3 files changed, 35 insertions(+), 12 deletions(-) diff --git a/src/commands/connect/upload.rs b/src/commands/connect/upload.rs index fb294356..4ef2876c 100644 --- a/src/commands/connect/upload.rs +++ b/src/commands/connect/upload.rs @@ -44,10 +44,7 @@ const PHASE_FINALIZE: &str = "finalize"; /// Run one phase of the upload, emitting `running` → `success`/`failed` step /// events around it (and a `step_error` with the reason on failure) so the /// desktop strip tracks progress like build/install. -async fn run_phase( - name: &str, - fut: impl std::future::Future>, -) -> Result { +async fn run_phase(name: &str, fut: impl std::future::Future>) -> Result { emit_step(name, "running"); match fut.await { Ok(v) => { @@ -259,7 +256,8 @@ impl ConnectUploadCommand { .await?; // Phase A: Discover artifacts inside the container - let discovery = run_phase(PHASE_DISCOVER, self.discover_in_container(project_config)).await?; + let discovery = + run_phase(PHASE_DISCOVER, self.discover_in_container(project_config)).await?; let manifest = &discovery.manifest; let version = self.version.clone(); diff --git a/src/commands/runtime/deploy.rs b/src/commands/runtime/deploy.rs index b678dc80..a318155e 100644 --- a/src/commands/runtime/deploy.rs +++ b/src/commands/runtime/deploy.rs @@ -1357,7 +1357,10 @@ mod tests { #[test] fn test_args_have_host_network_eq_form() { - assert!(args_have_host_network(&sv(&["--privileged", "--network=host"]))); + assert!(args_have_host_network(&sv(&[ + "--privileged", + "--network=host" + ]))); assert!(args_have_host_network(&sv(&["--net=host"]))); } diff --git a/src/main.rs b/src/main.rs index 4ed05ca4..234e48f3 100644 --- a/src/main.rs +++ b/src/main.rs @@ -39,11 +39,11 @@ use commands::connect::orgs::ConnectOrgsListCommand; use commands::connect::projects::{ ConnectProjectsCreateCommand, ConnectProjectsDeleteCommand, ConnectProjectsListCommand, }; +use commands::connect::runtimes::ConnectRuntimesListCommand; use commands::connect::server_key::ConnectServerKeyCommand; use commands::connect::trust::{ ConnectTrustPromoteRootCommand, ConnectTrustRotateServerKeyCommand, ConnectTrustStatusCommand, }; -use commands::connect::runtimes::ConnectRuntimesListCommand; use commands::connect::upload::ConnectUploadCommand; use commands::ext::{ ExtBuildCommand, ExtCheckoutCommand, ExtCleanCommand, ExtDepsCommand, ExtDnfCommand, @@ -3464,7 +3464,11 @@ async fn main() -> Result<()> { cmd.execute().await?; Ok(()) } - ConnectCommands::Clean { runtime, config, output } => { + ConnectCommands::Clean { + runtime, + config, + output, + } => { let cmd = ConnectCleanCommand { runtime, config_path: config, @@ -4896,22 +4900,40 @@ mod tests { /// (dropping the upload arm, or over-broadly routing all of `Connect`). #[test] fn needs_vm_routing_gates_connect_upload_only() { - let cmd = |args: &[&str]| Cli::try_parse_from(args).expect("args should parse").command; + let cmd = |args: &[&str]| { + Cli::try_parse_from(args) + .expect("args should parse") + .command + }; // connect upload → routes assert!(needs_vm_routing(&cmd(&[ - "avocado", "connect", "upload", "dev", "--version", "v0.0.1", + "avocado", + "connect", + "upload", + "dev", + "--version", + "v0.0.1", ]))); // other connect subcommands → do NOT route assert!(!needs_vm_routing(&cmd(&[ - "avocado", "connect", "deploy", "--runtime", "r", "--cohort", "c", + "avocado", + "connect", + "deploy", + "--runtime", + "r", + "--cohort", + "c", ]))); assert!(!needs_vm_routing(&cmd(&["avocado", "connect", "clean"]))); // sanity: an existing docker command still routes assert!(needs_vm_routing(&cmd(&[ - "avocado", "build", "--target", "qemux86-64", + "avocado", + "build", + "--target", + "qemux86-64", ]))); } } From b0526f5bde867fd987b2d4c5bf6cd9eb87f927bc Mon Sep 17 00:00:00 2001 From: Justin Schneck Date: Fri, 12 Jun 2026 18:59:25 -0400 Subject: [PATCH 28/30] ext: derive supported targets from avocado.yaml, stamp as RPM provides avocado ext package now stamps a Provides: avocado-target() per supported_targets entry (wildcard/unset -> avocado-target(*)). These land in the feed's repo metadata (primary.xml), so the CLI can query target compatibility without downloading the RPM and the feed server can route the package to the correct per-target -ext feed(s). avocado connect ext publish no longer defaults --targets to a hardcoded qemux86-64,qemuarm64 list: --targets becomes an explicit override, and target machines are otherwise derived from the project's supported_targets. A wildcard supported_targets yields no machine list (the RPM's avocado-target(*) provide drives server-side expansion). --- src/commands/connect/ext.rs | 24 +++++++++++++++++------- src/commands/ext/package.rs | 20 ++++++++++++++++++++ src/main.rs | 7 ++++--- 3 files changed, 41 insertions(+), 10 deletions(-) diff --git a/src/commands/connect/ext.rs b/src/commands/connect/ext.rs index 0bbdd731..56285900 100644 --- a/src/commands/connect/ext.rs +++ b/src/commands/connect/ext.rs @@ -75,7 +75,7 @@ pub struct ExtPublishCommand { pub arch: Option, pub target_release: String, pub target_channel: String, - pub targets: String, + pub targets: Option, } impl ExtPublishCommand { @@ -117,12 +117,22 @@ impl ExtPublishCommand { if name.is_empty() || version.is_empty() { anyhow::bail!("could not determine extension name/version — pass --name and --version"); } - let machines: Vec = self - .targets - .split(',') - .map(|s| s.trim().to_string()) - .filter(|s| !s.is_empty()) - .collect(); + // Target machines: an explicit --targets overrides; otherwise derive from the + // project's avocado.yaml `supported_targets` (never a hardcoded list). A wildcard or + // unset `supported_targets` ("all targets") yields an empty list here — the RPM still + // carries an `avocado-target(*)` provide, and the feed server expands it to every + // known target feed on ingest. + let machines: Vec = match &self.targets { + Some(t) => t + .split(',') + .map(|s| s.trim().to_string()) + .filter(|s| !s.is_empty()) + .collect(), + None => crate::utils::config::load_config(&self.config) + .ok() + .and_then(|c| c.get_supported_targets()) + .unwrap_or_default(), + }; let client = http_client()?; diff --git a/src/commands/ext/package.rs b/src/commands/ext/package.rs index fa0bed65..2cc6d73f 100644 --- a/src/commands/ext/package.rs +++ b/src/commands/ext/package.rs @@ -554,6 +554,20 @@ impl ExtPackageCommand { // Convert package_files to a space-separated string for the shell script let package_files_str = package_files.join(" "); + // Stamp the supported targets into the RPM as `avocado-target()` provides, + // derived from avocado.yaml `supported_targets` (never a hardcoded list). The feed + // server reads these to route the package to the right per-target `-ext` feed(s). + // `get_supported_targets()` returns the explicit list, or None for `*`/unset — both + // of which mean "all targets", which we record as the single wildcard `avocado-target(*)`. + let target_provides = match config.get_supported_targets() { + Some(targets) if !targets.is_empty() => targets + .iter() + .map(|t| format!("Provides: avocado-target({t})")) + .collect::>() + .join("\n"), + _ => "Provides: avocado-target(*)".to_string(), + }; + // Create RPM using rpmbuild in container // Package root (/) maps to the extension's src_dir contents let rpm_build_script = format!( @@ -644,6 +658,11 @@ Group: {group}{url_line} # is nested under //, so it installs into the SHARED includes installroot. # Legacy packages (content at /) lack this provide and use the per-ext installroot. Provides: avocado-ext-layout(nested) +# Self-describe which targets this extension supports, derived from avocado.yaml +# supported_targets. Surfaced in the feed's repo metadata (primary.xml) so the CLI can +# query target compatibility without downloading the RPM, and so the feed server can route +# it to the correct per-target -ext feed(s). "*" means all targets (cross-target). +{target_provides} %description {description} @@ -699,6 +718,7 @@ rm -rf "$TMPDIR" }, description = metadata.description, arch = metadata.arch, + target_provides = target_provides, rpm_filename = rpm_filename, container_src_dir = container_src_dir, package_files_str = package_files_str, diff --git a/src/main.rs b/src/main.rs index 234e48f3..83ffcbe7 100644 --- a/src/main.rs +++ b/src/main.rs @@ -908,9 +908,10 @@ enum ConnectExtCommands { /// Target feed channel #[arg(long, default_value = "edge")] target_channel: String, - /// Comma-separated target machines - #[arg(long, default_value = "qemux86-64,qemuarm64")] - targets: String, + /// Override the target machines (comma-separated). Defaults to the project's + /// `supported_targets` from avocado.yaml; only pass this to override that. + #[arg(long)] + targets: Option, /// Path to avocado.yaml configuration file #[arg(short = 'C', long, default_value = "avocado.yaml")] config: String, From dc1f1971a9aca8acbc23629ed408bb2c6bcf58c3 Mon Sep 17 00:00:00 2001 From: Justin Schneck Date: Sat, 13 Jun 2026 20:06:26 -0400 Subject: [PATCH 29/30] connect ext publish: derive target feed from distro config; drop --target-release/--target-channel MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The publish feed was a separate --target-release/--target-channel (hard-defaulting to 2026/edge, no env binding), which could silently diverge from the distro you build against. Remove the flags and derive the target feed from get_distro_release()/get_distro_channel() (AVOCADO_DISTRO_RELEASE/ CHANNEL env > distro.release/channel in avocado.yaml) — one source of truth for build AND publish. To publish into a different feed, change the distro config/env. Errors clearly if unset. --- src/commands/connect/ext.rs | 46 ++++++++++++++++++++++++++----------- src/main.rs | 10 -------- 2 files changed, 33 insertions(+), 23 deletions(-) diff --git a/src/commands/connect/ext.rs b/src/commands/connect/ext.rs index 56285900..899b1a8b 100644 --- a/src/commands/connect/ext.rs +++ b/src/commands/connect/ext.rs @@ -73,8 +73,6 @@ pub struct ExtPublishCommand { pub version: Option, pub release: Option, pub arch: Option, - pub target_release: String, - pub target_channel: String, pub targets: Option, } @@ -84,13 +82,14 @@ impl ExtPublishCommand { // platform (Peridio) org, which connect fills in server-side for super-admins. // When given (flag or connect.org), it publishes into that tenant org and // selects a matching auth profile. + // Load the project config once — it drives the org fallback, the target machines, AND + // the target feed (release/channel) below. + let project_cfg = crate::utils::config::load_config(&self.config).ok(); let org = self.org.clone().or_else(|| { - std::path::Path::new(&self.config) - .exists() - .then(|| crate::utils::config::load_config(&self.config).ok()) - .flatten() - .and_then(|c| c.connect) - .and_then(|c| c.org) + project_cfg + .as_ref() + .and_then(|c| c.connect.as_ref()) + .and_then(|c| c.org.clone()) }); let cfg = client::load_config()? .context("Not logged in. Run 'avocado connect auth login' first.")?; @@ -128,18 +127,39 @@ impl ExtPublishCommand { .map(|s| s.trim().to_string()) .filter(|s| !s.is_empty()) .collect(), - None => crate::utils::config::load_config(&self.config) - .ok() + None => project_cfg + .as_ref() .and_then(|c| c.get_supported_targets()) .unwrap_or_default(), }; + // Target feed = the configured distro release/channel (AVOCADO_DISTRO_RELEASE/CHANNEL + // env > distro.release/channel in avocado.yaml). There is no separate publish flag: the + // feed you publish into always matches the distro you're configured to build, so the two + // can't silently diverge. To publish into a different feed, change the distro config/env. + let target_release = project_cfg + .as_ref() + .and_then(|c| c.get_distro_release()) + .or_else(|| std::env::var("AVOCADO_DISTRO_RELEASE").ok()) + .context( + "no target feed release — set distro.release in avocado.yaml (or AVOCADO_DISTRO_RELEASE)", + )?; + let target_channel = project_cfg + .as_ref() + .and_then(|c| c.get_distro_channel()) + .or_else(|| std::env::var("AVOCADO_DISTRO_CHANNEL").ok()) + .context( + "no target feed channel — set distro.channel in avocado.yaml (or AVOCADO_DISTRO_CHANNEL)", + )?; + let client = http_client()?; // 1. reserve version + get presigned staging URL (409 if taken) let org_label = org.as_deref().unwrap_or("platform"); print_info( - &format!("Publishing {name}-{version}-{release}.{arch} to {api} (org {org_label})..."), + &format!( + "Publishing {name}-{version}-{release}.{arch} to {target_release}/{target_channel} via {api} (org {org_label})..." + ), OutputLevel::Normal, ); let res = client @@ -153,8 +173,8 @@ impl ExtPublishCommand { "arch": arch, "sha256": sha, "size_bytes": size, - "target_release": self.target_release, - "target_channel": self.target_channel, + "target_release": target_release, + "target_channel": target_channel, "target_machines": machines, })) .send() diff --git a/src/main.rs b/src/main.rs index 83ffcbe7..45f63858 100644 --- a/src/main.rs +++ b/src/main.rs @@ -902,12 +902,6 @@ enum ConnectExtCommands { /// Extension arch (default: parsed, else noarch) #[arg(long)] arch: Option, - /// Target feed release - #[arg(long, default_value = "2026")] - target_release: String, - /// Target feed channel - #[arg(long, default_value = "edge")] - target_channel: String, /// Override the target machines (comma-separated). Defaults to the project's /// `supported_targets` from avocado.yaml; only pass this to override that. #[arg(long)] @@ -3493,8 +3487,6 @@ async fn main() -> Result<()> { version, release, arch, - target_release, - target_channel, targets, config, profile, @@ -3508,8 +3500,6 @@ async fn main() -> Result<()> { version, release, arch, - target_release, - target_channel, targets, }; cmd.execute().await?; From 1bd0589f5e981a46ed41d9cb15274daac95ff46b Mon Sep 17 00:00:00 2001 From: Justin Schneck Date: Tue, 2 Jun 2026 11:27:54 -0400 Subject: [PATCH 30/30] release: bump to 0.41.0 --- Cargo.lock | 2 +- Cargo.toml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index d530a751..eecd58a4 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -130,7 +130,7 @@ checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8" [[package]] name = "avocado-cli" -version = "0.40.2" +version = "0.41.0" dependencies = [ "anyhow", "base64", diff --git a/Cargo.toml b/Cargo.toml index 2dc0d143..3b05098a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "avocado-cli" -version = "0.40.2" +version = "0.41.0" edition = "2021" description = "Command line interface for Avocado." authors = ["Avocado"]