From fb286f3138c3080eae56d5e77a269335991ee586 Mon Sep 17 00:00:00 2001 From: sidneychang <2190206983@qq.com> Date: Sat, 23 May 2026 17:50:30 +0800 Subject: [PATCH 1/6] feat(config): add rootfs view host opt-in Add UruncRootfsView and rootfs_view.enabled to urunc.toml so the shim can opt in to per-container read-only rootfs views on block/devmapper rootfs. Signed-off-by: sidneychang <2190206983@qq.com> --- deployment/urunc-deploy/config.toml | 3 +++ docs/configuration.md | 26 ++++++++++++++++++++++++++ pkg/unikontainers/urunc_config.go | 11 +++++++++++ 3 files changed, 40 insertions(+) diff --git a/deployment/urunc-deploy/config.toml b/deployment/urunc-deploy/config.toml index 8eeffaf4e..0330dba96 100644 --- a/deployment/urunc-deploy/config.toml +++ b/deployment/urunc-deploy/config.toml @@ -7,6 +7,9 @@ syslog = false [timestamps] enabled = false +[rootfs_view] +enabled = false + [monitors.qemu] default_memory_mb = 256 default_vcpus = 1 diff --git a/docs/configuration.md b/docs/configuration.md index a2daa4ba4..4111badd7 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -38,6 +38,9 @@ default_vcpus = 1 [extra_binaries.virtiofsd] path = "/usr/libexec/virtiofsd" options = "--sandbox none" + +[rootfs_view] +enabled = false ``` ## Configuration Sections @@ -89,6 +92,23 @@ destination = "/tmp/urunc-timestamps.log" When enabled, `urunc` will log performance timestamps to help with debugging and optimization. +### Rootfs View Configuration + +The `[rootfs_view]` section controls whether the urunc shim prepares a +per-container containerd rootfs view at task Create (for `devmapper` / +`blockfile` snapshotters). + +| Option | Type | Default | Description | +|--------|------|---------|-------------| +| `enabled` | boolean | `false` | Prepare rootfs views for container block rootfs after shim task Create | + +**Example:** + +```toml +[rootfs_view] +enabled = true +``` + ### Monitor Configuration The `[monitors]` section allows you to configure default settings for different @@ -201,6 +221,9 @@ To create a configuration file, you can: [monitors.spt] default_memory_mb = 256 default_vcpus = 1 + + [rootfs_view] + enabled = false EOF ``` @@ -244,6 +267,9 @@ default_vcpus = 1 default_memory_mb = 256 default_vcpus = 1 # path is not set by default - urunc will search in PATH + +[rootfs_view] +enabled = false ``` ## Notes diff --git a/pkg/unikontainers/urunc_config.go b/pkg/unikontainers/urunc_config.go index 5f21d106e..36436fbdd 100644 --- a/pkg/unikontainers/urunc_config.go +++ b/pkg/unikontainers/urunc_config.go @@ -34,9 +34,15 @@ type UruncTimestamps struct { Destination string `toml:"destination"` // Used to specify a file for timestamps } +// UruncRootfsView configures shim-side per-container rootfs views (devmapper/blockfile). +type UruncRootfsView struct { + Enabled bool `toml:"enabled"` +} + type UruncConfig struct { Log UruncLog `toml:"log"` Timestamps UruncTimestamps `toml:"timestamps"` + RootfsView UruncRootfsView `toml:"rootfs_view"` Monitors map[string]types.MonitorConfig `toml:"monitors"` ExtraBins map[string]types.ExtraBinConfig `toml:"extra_binaries"` } @@ -94,10 +100,15 @@ func defaultExtraBinConfig() map[string]types.ExtraBinConfig { } } +func defaultRootfsViewConfig() UruncRootfsView { + return UruncRootfsView{Enabled: false} +} + func defaultUruncConfig() *UruncConfig { return &UruncConfig{ Log: defaultLogConfig(), Timestamps: defaultTimestampsConfig(), + RootfsView: defaultRootfsViewConfig(), Monitors: defaultMonitorsConfig(), ExtraBins: defaultExtraBinConfig(), } From fa05bde484cb317d331856ad74becc0c03e3ef85 Mon Sep 17 00:00:00 2001 From: sidneychang <2190206983@qq.com> Date: Sat, 23 May 2026 18:00:39 +0800 Subject: [PATCH 2/6] feat: add rootfs view accessor and runtime boot binds Introduce RootfsViewAccessor to create read-only containerd view snapshots with leases and persist state in bundle config.json under com.urunc.internal.rootfs.view. Bind kernel, initrd, and urunc.json from the view in block rootfs after prepareRoot(), with legacy extract fallback. Signed-off-by: sidneychang <2190206983@qq.com> --- pkg/containerd-shim/containerd/rootfs_view.go | 332 ++++++++++++++++++ pkg/containerd-shim/containerd/session.go | 2 - pkg/unikontainers/block.go | 71 ++-- pkg/unikontainers/rootfs_view_boot.go | 219 ++++++++++++ pkg/unikontainers/unikontainers.go | 44 ++- 5 files changed, 634 insertions(+), 34 deletions(-) create mode 100644 pkg/containerd-shim/containerd/rootfs_view.go create mode 100644 pkg/unikontainers/rootfs_view_boot.go diff --git a/pkg/containerd-shim/containerd/rootfs_view.go b/pkg/containerd-shim/containerd/rootfs_view.go new file mode 100644 index 000000000..bf8929094 --- /dev/null +++ b/pkg/containerd-shim/containerd/rootfs_view.go @@ -0,0 +1,332 @@ +// Copyright (c) 2023-2026, Nubificus LTD +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package containerd + +import ( + "context" + "encoding/json" + "errors" + "fmt" + + leasesapi "github.com/containerd/containerd/api/services/leases/v1" + snapshotsapi "github.com/containerd/containerd/api/services/snapshots/v1" + cntrtypes "github.com/containerd/containerd/api/types" + "github.com/containerd/containerd/errdefs" + "github.com/containerd/containerd/mount" + "github.com/sirupsen/logrus" + "github.com/urunc-dev/urunc/pkg/unikontainers" + "github.com/urunc-dev/urunc/pkg/unikontainers/types" + "google.golang.org/grpc/metadata" +) + +const ( + rootfsViewKeyPrefix = "urunc-rootfs-view-" + rootfsViewLeasePrefix = "urunc-rootfs-view-lease-" + rootfsViewAnnotation = "com.urunc.internal.rootfs.view" +) + +var ( + ErrRootfsViewNotPrepared = errors.New("rootfs view not prepared") + rootfsViewLog = logrus.WithField("subsystem", "containerd-shim-rootfs-view") +) + +type rootfsViewState struct { + Snapshotter string `json:"snapshotter"` + Mounts []mount.Mount `json:"mounts,omitempty"` +} + +type RootfsViewAccessor struct { + namespace string + containerID string + snapshotter string + snapshotKey string + snapshots snapshotsapi.SnapshotsClient + leases leasesapi.LeasesClient +} + +func NewRootfsViewAccessor(s *Session) *RootfsViewAccessor { + a := &RootfsViewAccessor{ + namespace: s.namespace, + containerID: s.containerID, + snapshots: s.snapshotsClient(), + leases: s.leasesClient(), + } + ctr := s.GetContainer() + if ctr != nil && ctr.GetSnapshotKey() != "" { + a.snapshotter = ctr.GetSnapshotter() + a.snapshotKey = ctr.GetSnapshotKey() + } + return a +} + +func (a *RootfsViewAccessor) getViewKey() string { + return rootfsViewKeyPrefix + a.containerID +} + +func (a *RootfsViewAccessor) getLeaseID() string { + return rootfsViewLeasePrefix + a.containerID +} + +func (a *RootfsViewAccessor) ShouldPrepare(rootfs types.RootfsParams) bool { + if a == nil || + a.snapshotter == "" || + a.snapshotKey == "" || + (a.snapshotter != "devmapper" && a.snapshotter != "blockfile") || + rootfs.Type != "block" || + rootfs.MountedPath == "" { + return false + } + + uruncCfg, cfgErr := unikontainers.LoadUruncConfig(unikontainers.UruncConfigPath) + if cfgErr != nil { + rootfsViewLog.WithError(cfgErr).Warn("failed to load urunc config; rootfs view disabled") + return false + } + return uruncCfg.RootfsView.Enabled +} + +// Prepare records a read-only view of the committed rootfs snapshot for runtime use. +func (a *RootfsViewAccessor) Prepare(ctx context.Context, bundle string) error { + if a == nil { + return fmt.Errorf("rootfs view accessor is nil") + } + + snapshotKey, err := a.resolveCommittedSnapshotBase(ctx, a.snapshotter, a.snapshotKey) + if err != nil { + return err + } + + viewKey := a.getViewKey() + leaseID := a.getLeaseID() + + nsCtx := withNamespace(ctx, a.namespace) + if _, err := a.leases.Create(nsCtx, &leasesapi.CreateRequest{ID: leaseID}); err != nil { + err = containerdErr(err) + if err != nil && !errdefs.IsAlreadyExists(err) { + return fmt.Errorf("create rootfs view lease %s: %w", leaseID, err) + } + } + + leaseCtx := metadata.AppendToOutgoingContext(nsCtx, "containerd-lease", leaseID) + mounts, err := a.createRootfsView(leaseCtx, viewKey, snapshotKey) + if err != nil { +<<<<<<< HEAD + if cleanupErr := cleanupRootfsViewLease(ctx, a.namespace, a.leaseID(), a.leases); cleanupErr != nil { + rootfsViewLog.WithError(cleanupErr).Warn("failed to clean up rootfs view lease after prepare failure") + } + return err +======= + _ = cleanupRootfsViewLease(ctx, a.namespace, a.getLeaseID(), a.leases) + return "", err +>>>>>>> 5f95296 (fixup! feat: add rootfs view accessor and runtime boot binds) + } + + state := &rootfsViewState{ + Snapshotter: a.snapshotter, + Mounts: mounts, + } + + encoded, err := json.Marshal(state) + if err != nil { + return fmt.Errorf("marshal rootfs view state: %w", err) + } + if err := unikontainers.PatchBundleRootfsView(bundle, string(encoded)); err != nil { + if cleanupErr := cleanupRootfsView(ctx, a.namespace, a.containerID, a.snapshotter, a.snapshots, a.leases); cleanupErr != nil { + rootfsViewLog.WithError(cleanupErr).Warn("failed to clean up rootfs view after state persistence failure") + return fmt.Errorf("persist rootfs view state: %w (cleanup also failed: %v)", err, cleanupErr) + } + return fmt.Errorf("persist rootfs view state: %w", err) + } + + return nil +} + +func (a *RootfsViewAccessor) Cleanup(ctx context.Context, snapshotter string) error { + if a == nil { + return fmt.Errorf("rootfs view accessor is nil") + } + if a.containerID == "" { + return fmt.Errorf("container id is empty") + } + if snapshotter == "" { + return fmt.Errorf("snapshotter is empty") + } + return cleanupRootfsView(ctx, a.namespace, a.containerID, snapshotter, a.snapshots, a.leases) +} + +func (a *RootfsViewAccessor) CleanupFromBundle(ctx context.Context, bundle string) error { + if a == nil { + return fmt.Errorf("rootfs view accessor is nil") + } + snapshotter, err := GetSnapshotterFromBundle(bundle) + if err != nil { + if errors.Is(err, ErrRootfsViewNotPrepared) { + return nil + } + return err + } + return a.Cleanup(ctx, snapshotter) +} + +func (a *RootfsViewAccessor) statSnapshot(ctx context.Context, snapshotter, key string) (parent string, committed bool, err error) { + resp, err := a.snapshots.Stat(withNamespace(ctx, a.namespace), &snapshotsapi.StatSnapshotRequest{ + Snapshotter: snapshotter, + Key: key, + }) + if err = containerdErr(err); err != nil { + return "", false, err + } + info := resp.GetInfo() + if info == nil { + return "", false, fmt.Errorf("stat snapshot %s (%s): empty info", key, snapshotter) + } + return info.GetParent(), info.GetKind() == snapshotsapi.Kind_COMMITTED, nil +} + +func (a *RootfsViewAccessor) resolveCommittedSnapshotBase(ctx context.Context, snapshotter, snapshotKey string) (string, error) { + parent, committed, err := a.statSnapshot(ctx, snapshotter, snapshotKey) + if err != nil { + return "", fmt.Errorf("stat snapshot %s (%s): %w", snapshotKey, snapshotter, err) + } + if committed { + return snapshotKey, nil + } + if parent == "" { + return snapshotKey, nil + } + + current := parent + for { + parent, committed, err = a.statSnapshot(ctx, snapshotter, current) + if err != nil { + return "", fmt.Errorf("stat snapshot %s (%s parent walk): %w", current, snapshotter, err) + } + if committed { + return current, nil + } + if parent == "" { + return "", fmt.Errorf("%s snapshot %s has no committed parent in chain", snapshotter, snapshotKey) + } + current = parent + } +} + +func (a *RootfsViewAccessor) createRootfsView(ctx context.Context, viewKey, parentKey string) ([]mount.Mount, error) { + nsCtx := withNamespace(ctx, a.namespace) + viewResp, err := a.snapshots.View(nsCtx, &snapshotsapi.ViewSnapshotRequest{ + Snapshotter: a.snapshotter, + Key: viewKey, + Parent: parentKey, + }) + if err = containerdErr(err); err == nil { + return protoMountsToMounts(viewResp.GetMounts()), nil + } + if !errdefs.IsAlreadyExists(err) { + return nil, fmt.Errorf("create rootfs view %s from %s: %w", viewKey, parentKey, err) + } + + // Reuse an existing view left by a retry or partial prepare. + mountsResp, err := a.snapshots.Mounts(nsCtx, &snapshotsapi.MountsRequest{ + Snapshotter: a.snapshotter, + Key: viewKey, + }) + if err = containerdErr(err); err != nil { + return nil, fmt.Errorf("create rootfs view %s from %s: %w", viewKey, parentKey, err) + } + return protoMountsToMounts(mountsResp.GetMounts()), nil +} + +func protoMountsToMounts(mm []*cntrtypes.Mount) []mount.Mount { + out := make([]mount.Mount, len(mm)) + for i, m := range mm { + out[i] = mount.Mount{ + Type: m.Type, + Source: m.Source, + Target: m.Target, + Options: m.Options, + } + } + return out +} + +func GetSnapshotterFromBundle(bundle string) (string, error) { + raw, err := unikontainers.ReadBundleRootfsView(bundle) + if err != nil { + return "", err + } + if raw == "" { + return "", ErrRootfsViewNotPrepared + } + var state rootfsViewState + if err := json.Unmarshal([]byte(raw), &state); err != nil { + return "", fmt.Errorf("unmarshal rootfs view state %s: %w", rootfsViewAnnotation, err) + } + if state.Snapshotter == "" { + return "", ErrRootfsViewNotPrepared + } + return state.Snapshotter, nil +} + +func ShouldCleanupRootfsView(bundle string) (bool, string, error) { + snapshotter, err := GetSnapshotterFromBundle(bundle) + if err != nil { + if errors.Is(err, ErrRootfsViewNotPrepared) { + return false, "", nil + } + return false, "", err + } + return true, snapshotter, nil +} + +func cleanupRootfsView( + ctx context.Context, + namespace, containerID, snapshotter string, + snapshots snapshotsapi.SnapshotsClient, + leases leasesapi.LeasesClient, +) error { + if containerID == "" || snapshotter == "" { + return nil + } + nsCtx := withNamespace(ctx, namespace) + _, err := snapshots.Remove(nsCtx, &snapshotsapi.RemoveSnapshotRequest{ + Snapshotter: snapshotter, + Key: rootfsViewKey(containerID), + }) + if err = containerdErr(err); err != nil && !errdefs.IsNotFound(err) { + rootfsViewLog.WithError(err).Warn("failed to remove rootfs view from containerd") + return err + } + return cleanupRootfsViewLease(ctx, namespace, rootfsViewLeaseID(containerID), leases) +} + +func cleanupRootfsViewLease(ctx context.Context, namespace, leaseID string, leases leasesapi.LeasesClient) error { + if leaseID == "" { + return nil + } + _, err := leases.Delete(withNamespace(ctx, namespace), &leasesapi.DeleteRequest{ID: leaseID}) + if err = containerdErr(err); err != nil && !errdefs.IsNotFound(err) { + rootfsViewLog.WithError(err).Warn("failed to remove rootfs view lease from containerd") + return err + } + return nil +} + +func rootfsViewKey(containerID string) string { + return rootfsViewKeyPrefix + containerID +} + +func rootfsViewLeaseID(containerID string) string { + return rootfsViewLeasePrefix + containerID +} diff --git a/pkg/containerd-shim/containerd/session.go b/pkg/containerd-shim/containerd/session.go index e7168ffa1..c3ff02ce1 100644 --- a/pkg/containerd-shim/containerd/session.go +++ b/pkg/containerd-shim/containerd/session.go @@ -158,12 +158,10 @@ func (s *Session) contentClient() contentapi.ContentClient { return contentapi.NewContentClient(s.conn) } -//nolint:unused // Used by follow-up feature-specific access constructors. func (s *Session) snapshotsClient() snapshotsapi.SnapshotsClient { return snapshotsapi.NewSnapshotsClient(s.conn) } -//nolint:unused // Used by follow-up feature-specific access constructors. func (s *Session) leasesClient() leasesapi.LeasesClient { return leasesapi.NewLeasesClient(s.conn) } diff --git a/pkg/unikontainers/block.go b/pkg/unikontainers/block.go index 1b7bf7892..a45f1f90a 100644 --- a/pkg/unikontainers/block.go +++ b/pkg/unikontainers/block.go @@ -36,15 +36,16 @@ const tmpfsSizeForBlockRootfs = "65536k" var ErrMountpoint = errors.New("no FS is mounted in this mountpoint") type blockRootfs struct { - mounts []specs.Mount - monRootfs string - mountedPath string - path string - kernelPath string - initrdPath string - uruncJSONPath string - guestType string - guest types.Unikernel + mounts []specs.Mount + monRootfs string + mountedPath string + path string + kernelPath string + initrdPath string + uruncJSONPath string + guestType string + guest types.Unikernel + rootfsViewState *rootfsViewState } // getMountInfo determines whether the provided path is a mount point @@ -122,8 +123,6 @@ func getMountInfo(path string) (types.BlockDevParams, error) { // extractUnikernelFromBlock moves unikernel binary, initrd and urunc.json // files from old rootfsPath to newRootfsPath -// FIXME: This approach fills up /run with unikernel binaries, initrds and urunc.json -// files for each unikernel we run func extractBootFiles(rootfsPath string, newRootfsPath string, unikernel string, uruncJSON string, initrd string) error { currentUnikernelPath := filepath.Join(rootfsPath, unikernel) targetUnikernelPath := filepath.Join(newRootfsPath, unikernel) @@ -148,7 +147,6 @@ func extractBootFiles(rootfsPath string, newRootfsPath string, unikernel string, if err != nil { return fmt.Errorf("could not move %s to %s: %w", currentConfigPath, newRootfsPath, err) } - return nil } @@ -226,30 +224,59 @@ func getBlockVolumes(monRootfs string, mounts []specs.Mount, ukernel types.Unike } func (b blockRootfs) preSetup() error { + // Preserve main's propagation fix: consume boot artifacts and unmount the + // container rootfs before prepareRoot() makes the mount tree private/slave. if b.mountedPath == "" { return nil } - err := copyMountfiles(b.mountedPath, b.mounts) - if err != nil { - return fmt.Errorf("failed to copy files from mount list: %w", err) + useViewPath := b.rootfsViewState != nil + if useViewPath { + // Probe only; the real bind must happen after prepareRoot. + useView, err := probeRootfsViewBootArtifacts(b.rootfsViewState, b.kernelPath, b.initrdPath, b.uruncJSONPath) + if err != nil { + return err + } + if !useView { + useViewPath = false + } } - // FIXME: This approach fills up /run with unikernel binaries and - // urunc.json files for each unikernel instance we run - err = extractBootFiles(b.mountedPath, b.monRootfs, b.kernelPath, b.uruncJSONPath, b.initrdPath) - if err != nil { - return fmt.Errorf("failed to extract boot files from rootfs: %w", err) + if !useViewPath { + err := extractBootFiles(b.mountedPath, b.monRootfs, b.kernelPath, b.uruncJSONPath, b.initrdPath) + if err != nil { + return fmt.Errorf("failed to extract boot files from rootfs: %w", err) + } } - err = mount.Unmount(b.mountedPath) - if err != nil { + if err := copyMountfiles(b.mountedPath, b.mounts); err != nil { + return fmt.Errorf("failed to copy files from mount list: %w", err) + } + + if err := mount.Unmount(b.mountedPath); err != nil { return fmt.Errorf("failed to unmount rootfs: %w", err) } return nil } +// rebindRootfsViewBootAfterPrepareRoot binds boot artifacts into the rootfs +// tree that qemu sees after chroot. +func (b blockRootfs) rebindRootfsViewBootAfterPrepareRoot() error { + if b.rootfsViewState == nil { + return nil + } + + useView, err := prepareRootfsViewBootBinds(b.rootfsViewState, b.monRootfs, b.kernelPath, b.initrdPath, b.uruncJSONPath) + if err != nil { + return err + } + if !useView { + return fmt.Errorf("rootfs view boot artifact bind failed after prepareRoot (container rootfs already unmounted)") + } + return nil +} + func (b blockRootfs) postSetup() error { if b.mountedPath != "" { err := setupDev(b.monRootfs, b.path) diff --git a/pkg/unikontainers/rootfs_view_boot.go b/pkg/unikontainers/rootfs_view_boot.go new file mode 100644 index 000000000..4b88f4c7c --- /dev/null +++ b/pkg/unikontainers/rootfs_view_boot.go @@ -0,0 +1,219 @@ +// Copyright (c) 2023-2026, Nubificus LTD +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package unikontainers + +import ( + "encoding/json" + "fmt" + "os" + "path/filepath" + "strings" + + "github.com/containerd/containerd/mount" + "golang.org/x/sys/unix" +) + +const rootfsViewAnnotation = "com.urunc.internal.rootfs.view" + +type rootfsViewState struct { + Snapshotter string `json:"snapshotter"` + Mounts []mount.Mount `json:"mounts,omitempty"` +} + +// PatchBundleRootfsView writes shim-prepared view state into bundle config.json. +func PatchBundleRootfsView(bundleDir, rootfsViewJSON string) error { + configPath := filepath.Join(bundleDir, configFilename) + fi, err := os.Stat(configPath) + if err != nil { + return fmt.Errorf("stat config.json: %w", err) + } + + spec, err := loadSpec(bundleDir) + if err != nil { + return fmt.Errorf("load bundle spec: %w", err) + } + if spec.Annotations == nil { + spec.Annotations = make(map[string]string) + } + if rootfsViewJSON == "" { + delete(spec.Annotations, rootfsViewAnnotation) + } else { + spec.Annotations[rootfsViewAnnotation] = rootfsViewJSON + } + + patched, err := json.MarshalIndent(spec, "", " ") + if err != nil { + return fmt.Errorf("marshal config.json: %w", err) + } + if err := os.WriteFile(configPath, patched, fi.Mode()); err != nil { + return fmt.Errorf("write config.json: %w", err) + } + return nil +} + +func ReadBundleRootfsView(bundleDir string) (string, error) { + configPath := filepath.Join(bundleDir, configFilename) + if _, err := os.Stat(configPath); err != nil { + if os.IsNotExist(err) { + return "", nil + } + return "", fmt.Errorf("stat config.json: %w", err) + } + + spec, err := loadSpec(bundleDir) + if err != nil { + return "", err + } + if spec.Annotations == nil { + return "", nil + } + return spec.Annotations[rootfsViewAnnotation], nil +} + +func rootfsViewBootArtifactBindPaths(viewRoot, monRootfs, unikernelPath, initrdPath, uruncJSON string) []struct{ src, target string } { + artifactPaths := []string{unikernelPath, uruncJSON} + if initrdPath != "" { + artifactPaths = append(artifactPaths, initrdPath) + } + files := make([]struct{ src, target string }, 0, len(artifactPaths)) + for _, artifactPath := range artifactPaths { + rootfsRelPath := strings.TrimPrefix(filepath.Clean(artifactPath), "/") + files = append(files, struct{ src, target string }{ + src: filepath.Join(viewRoot, rootfsRelPath), + target: filepath.Join(monRootfs, rootfsRelPath), + }) + } + return files +} + +func bindBootArtifactsFromView(viewRoot, monRootfs, unikernelPath, initrdPath, uruncJSON string, bindTargets *[]string) error { + for _, f := range rootfsViewBootArtifactBindPaths(viewRoot, monRootfs, unikernelPath, initrdPath, uruncJSON) { + dstPath := f.target + dstDir := filepath.Dir(dstPath) + if err := bindMountFile(f.src, dstDir, dstPath, 0, unix.MS_BIND, false); err != nil { + rollbackRootfsViewBinds(*bindTargets) + *bindTargets = nil + return fmt.Errorf("bind view %s -> %s: %w", f.src, f.target, err) + } + *bindTargets = append(*bindTargets, dstPath) + } + return nil +} + +func rollbackRootfsViewBinds(targets []string) { + for i := len(targets) - 1; i >= 0; i-- { + if err := unmountRootfsViewBind(targets[i]); err != nil { + uniklog.WithError(err).WithField("target", filepath.Clean(targets[i])).Warn("failed to roll back rootfs view bind mount") + } + } +} + +// probeRootfsViewBootArtifacts keeps the legacy extract fallback available: +// preSetup still has mountedPath, but does not keep boot bind mounts. +func probeRootfsViewBootArtifacts(rootfsViewState *rootfsViewState, unikernelPath, initrdPath, uruncJSON string) (useView bool, err error) { + if rootfsViewState == nil { + return false, nil + } + + mountpoint, err := os.MkdirTemp("", "urunc-rootfs-view-") + if err != nil { + return false, fmt.Errorf("create temporary rootfs view mountpoint: %w", err) + } + defer os.RemoveAll(mountpoint) + + if err := mount.All(rootfsViewState.Mounts, mountpoint); err != nil { + uniklog.WithError(err).Warn("rootfs view unavailable; falling back to legacy boot file extraction") + return false, nil + } + + var probeErr error + for _, f := range rootfsViewBootArtifactBindPaths(mountpoint, "", unikernelPath, initrdPath, uruncJSON) { + if _, err := os.Stat(f.src); err != nil { + probeErr = fmt.Errorf("stat rootfs view boot artifact %s: %w", f.src, err) + break + } + } + + if uerr := mount.Unmount(mountpoint, 0); uerr != nil && !os.IsNotExist(uerr) && uerr != unix.EINVAL { + if probeErr != nil { + uniklog.WithError(probeErr).Warn("rootfs view boot artifact probe failed") + } + return false, fmt.Errorf("unmount temporary rootfs view mountpoint: %w", uerr) + } + + if probeErr != nil { + uniklog.WithError(probeErr).Warn("rootfs view unavailable; falling back to legacy boot file extraction") + return false, nil + } + return true, nil +} + +// prepareRootfsViewBootBinds runs after prepareRoot, so the binds live in the +// monitor mount namespace and are released with it. +func prepareRootfsViewBootBinds(rootfsViewState *rootfsViewState, monRootfs, unikernelPath, initrdPath, uruncJSON string) (useView bool, err error) { + if rootfsViewState == nil { + return false, nil + } + + var bindTargets []string + keepBinds := false + defer func() { + if !keepBinds { + rollbackRootfsViewBinds(bindTargets) + } + }() + + mountpoint, err := os.MkdirTemp("", "urunc-rootfs-view-") + if err != nil { + return false, fmt.Errorf("create temporary rootfs view mountpoint: %w", err) + } + defer os.RemoveAll(mountpoint) + + if err := mount.All(rootfsViewState.Mounts, mountpoint); err != nil { + uniklog.WithError(err).Warn("rootfs view unavailable; falling back to legacy boot file extraction") + return false, nil + } + + bindErr := bindBootArtifactsFromView(mountpoint, monRootfs, unikernelPath, initrdPath, uruncJSON, &bindTargets) + + uerr := mount.Unmount(mountpoint, 0) + if uerr != nil && !os.IsNotExist(uerr) && uerr != unix.EINVAL { + if bindErr == nil { + bindErr = uerr + } else { + uniklog.WithError(uerr).WithField("path", mountpoint).Warn("failed to unmount temporary rootfs view mount") + } + } + + if bindErr != nil { + if len(bindTargets) > 0 { + return false, fmt.Errorf("rootfs view boot artifact bind completed but cleanup failed: %w", bindErr) + } + uniklog.WithError(bindErr).Warn("rootfs view unavailable; falling back to legacy boot file extraction") + return false, nil + } + + keepBinds = true + return true, nil +} + +func unmountRootfsViewBind(target string) error { + target = filepath.Clean(target) + err := unix.Unmount(target, unix.MNT_DETACH) + if err == nil || err == unix.EINVAL || err == unix.ENOENT || os.IsNotExist(err) { + return nil + } + return fmt.Errorf("failed to unmount rootfs view bind %s: %w", target, err) +} diff --git a/pkg/unikontainers/unikontainers.go b/pkg/unikontainers/unikontainers.go index a3a00bb5f..7b0a52fe3 100644 --- a/pkg/unikontainers/unikontainers.go +++ b/pkg/unikontainers/unikontainers.go @@ -306,11 +306,21 @@ func ChooseRootfs(bundle, specRoot string, annot map[string]string, cfg *UruncCo func (u *Unikontainer) Exec(metrics m.Writer) error { metrics.Capture(m.TS15) + // Reload annotations written by the shim after Create. + spec, err := loadSpec(u.State.Bundle) + if err != nil { + return fmt.Errorf("reload bundle spec: %w", err) + } + if spec == nil || spec.Linux == nil { + return fmt.Errorf("invalid OCI spec: linux section is required") + } + u.Spec = spec + // container Paths // Make sure paths are clean bundleDir := filepath.Clean(u.State.Bundle) rootfsDir := filepath.Clean(u.Spec.Root.Path) - rootfsDir, err := resolveAgainstBase(bundleDir, rootfsDir) + rootfsDir, err = resolveAgainstBase(bundleDir, rootfsDir) if err != nil { uniklog.Errorf("could not resolve rootfs directory %s: %v", rootfsDir, err) return err @@ -461,16 +471,24 @@ func (u *Unikontainer) Exec(metrics m.Writer) error { var rfsBuilder rootfsBuilder switch rootfsParams.Type { case "block": + var view *rootfsViewState + if rootfsViewJSON := u.Spec.Annotations[rootfsViewAnnotation]; rootfsViewJSON != "" { + view = &rootfsViewState{} + if err := json.Unmarshal([]byte(rootfsViewJSON), view); err != nil { + return fmt.Errorf("could not decode guest rootfs view: %w", err) + } + } rfsBuilder = blockRootfs{ - mounts: u.Spec.Mounts, - monRootfs: rootfsParams.MonRootfs, - mountedPath: rootfsParams.MountedPath, - path: rootfsParams.Path, - kernelPath: unikernelPath, - initrdPath: initrdPath, - uruncJSONPath: uruncJSONFilename, - guestType: unikernelType, - guest: unikernel, + mounts: u.Spec.Mounts, + monRootfs: rootfsParams.MonRootfs, + mountedPath: rootfsParams.MountedPath, + path: rootfsParams.Path, + kernelPath: unikernelPath, + initrdPath: initrdPath, + uruncJSONPath: uruncJSONFilename, + guestType: unikernelType, + guest: unikernel, + rootfsViewState: view, } case "initrd": rfsBuilder = initrdRootfs{ @@ -517,6 +535,12 @@ func (u *Unikontainer) Exec(metrics m.Writer) error { return err } + if b, ok := rfsBuilder.(blockRootfs); ok { + if err := b.rebindRootfsViewBootAfterPrepareRoot(); err != nil { + return fmt.Errorf("boot artifact setup after prepareRoot failed: %w", err) + } + } + // Setup the rootfs for the monitor execution, creating necessary // devices and the monitor's binary. err = prepareMonRootfs(rootfsParams.MonRootfs, vmm.Path(), u.UruncCfg.Monitors[vmmType].DataPath, vmm.UsesKVM(), withTUNTAP) From 3f5300184a8c79eb9e83d50869812799fef42aa5 Mon Sep 17 00:00:00 2001 From: sidneychang <2190206983@qq.com> Date: Sat, 23 May 2026 18:00:45 +0800 Subject: [PATCH 3/6] feat(shim): wire rootfs view into task lifecycle Prepare a read-only rootfs view after guest rootfs choice on Create and tear down containerd view resources on task Delete when the shim process is still alive. Signed-off-by: sidneychang <2190206983@qq.com> --- docs/configuration.md | 11 ++++ docs/package/index.md | 10 ++++ pkg/containerd-shim/guest_rootfs.go | 27 +++++---- pkg/containerd-shim/task_plugin.go | 9 +++ pkg/containerd-shim/task_service.go | 90 +++++++++++++++++++++++++++-- 5 files changed, 131 insertions(+), 16 deletions(-) diff --git a/docs/configuration.md b/docs/configuration.md index 4111badd7..75dd87f1c 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -102,6 +102,17 @@ per-container containerd rootfs view at task Create (for `devmapper` / |--------|------|---------|-------------| | `enabled` | boolean | `false` | Prepare rootfs views for container block rootfs after shim task Create | +When `enabled = true`, the shim first lets the wrapped task service create the +task so the bundle rootfs is mounted. It then runs `ChooseRootfs` and prepares a +view only if **all** of the following hold: + +1. The container snapshotter is block-based (`devmapper` or `blockfile`). +2. Shim `ChooseRootfs` selected **container block rootfs** (`type=block` with a + non-empty `MountedPath`). + +This matches the block-rootfs boot-artifact path: kernel/initrd are read from a +read-only view instead of being copied out of the container rootfs before attach. + **Example:** ```toml diff --git a/docs/package/index.md b/docs/package/index.md index 2e772414a..b61f0be64 100644 --- a/docs/package/index.md +++ b/docs/package/index.md @@ -73,6 +73,16 @@ Except of the above, `urunc` accepts the following optional annotations: requests from `urunc` to mount the container's image rootfs in the unikernel (either as a block device or through shared-fs). +Per-container rootfs views are controlled by `[rootfs_view] enabled` in +`/etc/urunc/config.toml`. See +[configuration](../configuration.md#rootfs-view-configuration). When enabled, +the container must also use `com.urunc.unikernel.mountRootfs=true` (typically +from image annotations merged into `config.json` before shim task Create). +Supported snapshotters include `devmapper` and `blockfile`. After the wrapped +task service creates the task and mounts the bundle rootfs, the shim runs +`ChooseRootfs` and prepares a view only when that selection is container block +rootfs. + Due to the fact that [Docker](https://www.docker.com/) and some high-level container runtimes do not pass the image annotations to the underlying container runtime, `urunc` can also read the above information from a file inside the diff --git a/pkg/containerd-shim/guest_rootfs.go b/pkg/containerd-shim/guest_rootfs.go index f8982ecf1..f1fb5612f 100644 --- a/pkg/containerd-shim/guest_rootfs.go +++ b/pkg/containerd-shim/guest_rootfs.go @@ -24,6 +24,7 @@ import ( taskAPI "github.com/containerd/containerd/api/runtime/task/v2" specs "github.com/opencontainers/runtime-spec/specs-go" "github.com/urunc-dev/urunc/pkg/unikontainers" + "github.com/urunc-dev/urunc/pkg/unikontainers/types" ) const annotRootfsParams = "com.urunc.internal.rootfs.params" @@ -33,35 +34,35 @@ var errGuestRootfsChoiceSkipped = errors.New("guest rootfs choice skipped") // chooseGuestRootfs runs the same ChooseRootfs logic as runtime Exec after inner // task Create (#684) and records the result in annotRootfsParams so Exec knows // selection already happened. -func chooseGuestRootfs(r *taskAPI.CreateTaskRequest) error { +func chooseGuestRootfs(r *taskAPI.CreateTaskRequest) (types.RootfsParams, error) { configPath := filepath.Join(r.Bundle, "config.json") info, err := os.Stat(configPath) if err != nil { - return fmt.Errorf("stat config.json: %w", err) + return types.RootfsParams{}, fmt.Errorf("stat config.json: %w", err) } data, err := os.ReadFile(configPath) if err != nil { - return fmt.Errorf("read config.json: %w", err) + return types.RootfsParams{}, fmt.Errorf("read config.json: %w", err) } var spec specs.Spec if err := json.Unmarshal(data, &spec); err != nil { - return fmt.Errorf("unmarshal config.json: %w", err) + return types.RootfsParams{}, fmt.Errorf("unmarshal config.json: %w", err) } if spec.Root == nil { - return fmt.Errorf("invalid OCI spec: root section is required") + return types.RootfsParams{}, fmt.Errorf("invalid OCI spec: root section is required") } config, err := unikontainers.GetUnikernelConfig(filepath.Clean(r.Bundle), &spec) if err != nil { - return fmt.Errorf("%w: %w", errGuestRootfsChoiceSkipped, err) + return types.RootfsParams{}, fmt.Errorf("%w: %w", errGuestRootfsChoiceSkipped, err) } annotations := config.Map() uruncCfg, err := unikontainers.LoadUruncConfig(unikontainers.UruncConfigPath) if err != nil && uruncCfg == nil { - return err + return types.RootfsParams{}, err } rootfsParams, err := unikontainers.ChooseRootfs( @@ -71,12 +72,12 @@ func chooseGuestRootfs(r *taskAPI.CreateTaskRequest) error { uruncCfg, ) if err != nil { - return err + return types.RootfsParams{}, err } encoded, err := json.Marshal(rootfsParams) if err != nil { - return err + return types.RootfsParams{}, err } if spec.Annotations == nil { spec.Annotations = make(map[string]string) @@ -85,8 +86,10 @@ func chooseGuestRootfs(r *taskAPI.CreateTaskRequest) error { patched, err := json.MarshalIndent(spec, "", " ") if err != nil { - return fmt.Errorf("marshal config.json: %w", err) + return types.RootfsParams{}, fmt.Errorf("marshal config.json: %w", err) } - - return os.WriteFile(configPath, patched, info.Mode()) + if err := os.WriteFile(configPath, patched, info.Mode()); err != nil { + return types.RootfsParams{}, err + } + return rootfsParams, nil } diff --git a/pkg/containerd-shim/task_plugin.go b/pkg/containerd-shim/task_plugin.go index 85226f383..54dfabddf 100644 --- a/pkg/containerd-shim/task_plugin.go +++ b/pkg/containerd-shim/task_plugin.go @@ -15,6 +15,9 @@ package containerdshim import ( + "os" + "path/filepath" + "github.com/containerd/containerd/pkg/shutdown" "github.com/containerd/containerd/plugin" runcTask "github.com/containerd/containerd/runtime/v2/runc/task" @@ -45,9 +48,15 @@ func init() { return nil, err } + cwd, err := os.Getwd() + if err != nil { + return nil, err + } + return &taskService{ TaskService: inner, containerdAddress: ic.Address, + stateRoot: filepath.Dir(filepath.Dir(cwd)), }, nil }, }) diff --git a/pkg/containerd-shim/task_service.go b/pkg/containerd-shim/task_service.go index fb126c3f0..3b28a8034 100644 --- a/pkg/containerd-shim/task_service.go +++ b/pkg/containerd-shim/task_service.go @@ -17,8 +17,11 @@ package containerdshim import ( "context" "errors" + "fmt" + "path/filepath" taskAPI "github.com/containerd/containerd/api/runtime/task/v2" + "github.com/containerd/containerd/namespaces" "github.com/containerd/log" "github.com/containerd/ttrpc" containerdShim "github.com/urunc-dev/urunc/pkg/containerd-shim/containerd" @@ -31,6 +34,8 @@ type taskService struct { taskAPI.TaskService containerdAddress string + // Used on Delete, where cwd may no longer be the bundle. + stateRoot string } func (s *taskService) Create(ctx context.Context, r *taskAPI.CreateTaskRequest) (*taskAPI.CreateTaskResponse, error) { @@ -53,9 +58,8 @@ func (s *taskService) Create(ctx context.Context, r *taskAPI.CreateTaskRequest) return resp, err } - // ChooseRootfs after inner task Create so bundle rootfs is mounted; - // params are persisted in bundle config.json for runtime Exec. - if err := chooseGuestRootfs(r); err != nil { + rootfsChoice, err := chooseGuestRootfs(r) + if err != nil { if errors.Is(err, errGuestRootfsChoiceSkipped) { log.G(ctx).WithError(err).Debug("urunc(shim): guest rootfs choice skipped") return resp, nil @@ -64,14 +68,92 @@ func (s *taskService) Create(ctx context.Context, r *taskAPI.CreateTaskRequest) return nil, err } + log.G(ctx).WithFields(map[string]any{ + "rootfs_type": rootfsChoice.Type, + "rootfs_path": rootfsChoice.Path, + "mon_rootfs": rootfsChoice.MonRootfs, + }).Debug("urunc(shim): guest rootfs chosen") + + if session != nil { + rootfsViewAccessor := containerdShim.NewRootfsViewAccessor(session) + if rootfsViewAccessor.ShouldPrepare(rootfsChoice) { + if err := rootfsViewAccessor.Prepare(ctx, r.Bundle); err != nil { + log.G(ctx).WithError(err).Warn("urunc(shim): failed to prepare rootfs view; falling back to legacy boot artifact extraction") + } else { + log.G(ctx).Debug("urunc(shim): rootfs view prepared") + } + } else { + log.G(ctx).WithField("rootfs_type", rootfsChoice.Type).Debug("urunc(shim): rootfs view prepare skipped") + } + } + return resp, nil } func (s *taskService) Delete(ctx context.Context, r *taskAPI.DeleteRequest) (*taskAPI.DeleteResponse, error) { - return s.TaskService.Delete(ctx, r) + shouldCleanup := false + snapshotter := "" + var loadErr error + + if r.ExecID == "" { + bundle, err := s.bundlePathFor(ctx, r.ID) + if err != nil { + log.G(ctx).WithError(err).Warn("urunc(shim): resolve bundle path during Delete failed") + loadErr = err + } else { + // Read view state before inner Delete; snapshotter is taken from bundle + // (written at Prepare) because container metadata may be gone after Delete. + shouldCleanup, snapshotter, loadErr = containerdShim.ShouldCleanupRootfsView(bundle) + } + } + + // Delete tears down the monitor namespace before removing the view it may pin. + resp, err := s.TaskService.Delete(ctx, r) + + if loadErr != nil { + if err != nil { + return resp, err + } + return resp, loadErr + } + + if shouldCleanup { + session, sessionErr := containerdShim.OpenSession(ctx, s.containerdAddress, r.ID) + if sessionErr != nil { + log.G(ctx).WithError(sessionErr).Warn("urunc(shim): open containerd session for rootfs view cleanup failed") + if err == nil { + err = sessionErr + } + } else { + defer func() { + if err := session.Close(); err != nil { + log.G(ctx).WithError(err).Warn("urunc(shim): failed to close containerd session after rootfs view cleanup") + } + }() + if cleanupErr := containerdShim.NewRootfsViewAccessor(session).Cleanup(ctx, snapshotter); cleanupErr != nil { + log.G(ctx).WithError(cleanupErr).Warn("urunc(shim): delete rootfs view during Delete failed") + if err == nil { + err = cleanupErr + } + } + } + } + + return resp, err } func (s *taskService) RegisterTTRPC(server *ttrpc.Server) error { taskAPI.RegisterTaskService(server, s) return nil } + +func (s *taskService) bundlePathFor(ctx context.Context, containerID string) (string, error) { + if s.stateRoot == "" { + return "", fmt.Errorf("task service state root is empty (shim cwd layout assumption violated)") + } + ns, err := namespaces.NamespaceRequired(ctx) + if err != nil { + return "", fmt.Errorf("namespace required: %w", err) + } + return filepath.Join(s.stateRoot, ns, containerID), nil +} From 4cc2c6e1618047549961fd67ce04f8025d4ab95b Mon Sep 17 00:00:00 2001 From: sidneychang <2190206983@qq.com> Date: Sat, 23 May 2026 18:00:51 +0800 Subject: [PATCH 4/6] feat(shim): clean up rootfs views on dead-shim delete path Wrap the runc shim manager so containerd shim delete subcommand removes rootfs view resources before the bundle is torn down. Signed-off-by: sidneychang <2190206983@qq.com> --- cmd/containerd-shim-urunc-v2/main.go | 4 +- pkg/containerd-shim/shim_manager.go | 80 ++++++++++++++++++++++++++++ 2 files changed, 82 insertions(+), 2 deletions(-) create mode 100644 pkg/containerd-shim/shim_manager.go diff --git a/cmd/containerd-shim-urunc-v2/main.go b/cmd/containerd-shim-urunc-v2/main.go index dac1451c2..70d2398ea 100644 --- a/cmd/containerd-shim-urunc-v2/main.go +++ b/cmd/containerd-shim-urunc-v2/main.go @@ -17,11 +17,11 @@ package main import ( "context" - "github.com/containerd/containerd/runtime/v2/runc/manager" "github.com/containerd/containerd/runtime/v2/shim" _ "github.com/urunc-dev/urunc/pkg/containerd-shim" + containerdshim "github.com/urunc-dev/urunc/pkg/containerd-shim" ) func main() { - shim.RunManager(context.Background(), manager.NewShimManager("io.containerd.urunc.v2")) + shim.RunManager(context.Background(), containerdshim.NewShimManager("io.containerd.urunc.v2")) } diff --git a/pkg/containerd-shim/shim_manager.go b/pkg/containerd-shim/shim_manager.go new file mode 100644 index 000000000..c8193f8df --- /dev/null +++ b/pkg/containerd-shim/shim_manager.go @@ -0,0 +1,80 @@ +// Copyright (c) 2023-2026, Nubificus LTD +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package containerdshim + +import ( + "context" + "os" + + "github.com/containerd/containerd/runtime/v2/runc/manager" + "github.com/containerd/containerd/runtime/v2/shim" + "github.com/containerd/log" + containerdShim "github.com/urunc-dev/urunc/pkg/containerd-shim/containerd" +) + +const containerdGRPCAddressEnv = "GRPC_ADDRESS" + +func containerdGRPCAddress() string { + return os.Getenv(containerdGRPCAddressEnv) +} + +type shimManager struct { + shim.Manager +} + +func NewShimManager(runtime string) shim.Manager { + return &shimManager{Manager: manager.NewShimManager(runtime)} +} + +func (m *shimManager) Stop(ctx context.Context, id string) (shim.StopStatus, error) { + bundle, err := os.Getwd() + if err != nil { + log.G(ctx).WithError(err).Warn("urunc(shim): getwd during delete failed") + return m.Manager.Stop(ctx, id) + } + + shouldCleanup, snapshotter, err := containerdShim.ShouldCleanupRootfsView(bundle) + if err != nil { + log.G(ctx).WithError(err).Warn("urunc(shim): read rootfs view cleanup state from bundle during delete failed") + return m.Manager.Stop(ctx, id) + } + if !shouldCleanup { + return m.Manager.Stop(ctx, id) + } + + address := containerdGRPCAddress() + if address == "" { + log.G(ctx).Warn("urunc(shim): containerd gRPC address unset during delete; rootfs view cleanup skipped") + return m.Manager.Stop(ctx, id) + } + + session, err := containerdShim.OpenSession(ctx, address, id) + if err != nil { + log.G(ctx).WithError(err).Warn("urunc(shim): open containerd session for rootfs view cleanup failed") + return m.Manager.Stop(ctx, id) + } + defer func() { + if err := session.Close(); err != nil { + log.G(ctx).WithError(err).Warn("urunc(shim): failed to close containerd session after rootfs view cleanup") + } + }() + + // snapshotter from bundle view state; shim cwd may outlive task Delete. + if err := containerdShim.NewRootfsViewAccessor(session).Cleanup(ctx, snapshotter); err != nil { + log.G(ctx).WithError(err).Warn("urunc(shim): rootfs view cleanup during delete failed") + } + + return m.Manager.Stop(ctx, id) +} From fa21893381d0d80cd05106fe58223a7e72cd35e6 Mon Sep 17 00:00:00 2001 From: sidneychang <2190206983@qq.com> Date: Sun, 24 May 2026 22:14:28 +0800 Subject: [PATCH 5/6] refactor(shim): persist create annotations in one config.json write Defer writing com.urunc.internal.rootfs.params and .view until after guest rootfs choice and optional view prepare, then patch both via a single PatchConfigJSON call instead of separate config.json updates. Signed-off-by: sidneychang <2190206983@qq.com> --- pkg/containerd-shim/containerd/annotations.go | 8 +-- pkg/containerd-shim/containerd/rootfs_view.go | 56 ++++++++----------- pkg/containerd-shim/guest_rootfs.go | 41 ++++---------- pkg/containerd-shim/task_service.go | 48 ++++++++++++---- pkg/unikontainers/rootfs_view_boot.go | 32 ----------- 5 files changed, 76 insertions(+), 109 deletions(-) diff --git a/pkg/containerd-shim/containerd/annotations.go b/pkg/containerd-shim/containerd/annotations.go index 5d980c961..099c34639 100644 --- a/pkg/containerd-shim/containerd/annotations.go +++ b/pkg/containerd-shim/containerd/annotations.go @@ -86,7 +86,7 @@ func InjectUruncAnnotations(ctx context.Context, session *Session, bundlePath st return nil } - return patchConfigJSON(bundlePath, annotations) + return PatchConfigJSON(bundlePath, annotations) } func (f *annotationFetcher) fetchUruncAnnotations(ctx context.Context) (map[string]string, error) { @@ -152,12 +152,12 @@ func readBlob(ctx context.Context, namespace string, contentClient contentapi.Co return raw, nil } -// patchConfigJSON injects missing annotations into the OCI runtime spec -// stored in the bundle's config.json. +// PatchConfigJSON injects missing annotations into the OCI runtime spec stored in +// the bundle's config.json. // // Existing annotations in config.json are preserved. Only annotation keys that // are not already present in the runtime spec are added. -func patchConfigJSON(bundlePath string, annotations map[string]string) error { +func PatchConfigJSON(bundlePath string, annotations map[string]string) error { configPath := filepath.Join(bundlePath, "config.json") fi, err := os.Stat(configPath) diff --git a/pkg/containerd-shim/containerd/rootfs_view.go b/pkg/containerd-shim/containerd/rootfs_view.go index bf8929094..37b217484 100644 --- a/pkg/containerd-shim/containerd/rootfs_view.go +++ b/pkg/containerd-shim/containerd/rootfs_view.go @@ -25,7 +25,6 @@ import ( cntrtypes "github.com/containerd/containerd/api/types" "github.com/containerd/containerd/errdefs" "github.com/containerd/containerd/mount" - "github.com/sirupsen/logrus" "github.com/urunc-dev/urunc/pkg/unikontainers" "github.com/urunc-dev/urunc/pkg/unikontainers/types" "google.golang.org/grpc/metadata" @@ -37,10 +36,7 @@ const ( rootfsViewAnnotation = "com.urunc.internal.rootfs.view" ) -var ( - ErrRootfsViewNotPrepared = errors.New("rootfs view not prepared") - rootfsViewLog = logrus.WithField("subsystem", "containerd-shim-rootfs-view") -) +var ErrRootfsViewNotPrepared = errors.New("rootfs view not prepared") type rootfsViewState struct { Snapshotter string `json:"snapshotter"` @@ -79,33 +75,34 @@ func (a *RootfsViewAccessor) getLeaseID() string { return rootfsViewLeasePrefix + a.containerID } -func (a *RootfsViewAccessor) ShouldPrepare(rootfs types.RootfsParams) bool { +func (a *RootfsViewAccessor) ShouldPrepare(rootfs types.RootfsParams) (bool, error) { if a == nil || a.snapshotter == "" || a.snapshotKey == "" || (a.snapshotter != "devmapper" && a.snapshotter != "blockfile") || rootfs.Type != "block" || rootfs.MountedPath == "" { - return false + return false, nil } uruncCfg, cfgErr := unikontainers.LoadUruncConfig(unikontainers.UruncConfigPath) if cfgErr != nil { - rootfsViewLog.WithError(cfgErr).Warn("failed to load urunc config; rootfs view disabled") - return false + return false, cfgErr } - return uruncCfg.RootfsView.Enabled + return uruncCfg.RootfsView.Enabled, nil } // Prepare records a read-only view of the committed rootfs snapshot for runtime use. -func (a *RootfsViewAccessor) Prepare(ctx context.Context, bundle string) error { +// On success it returns JSON-encoded view state for the caller to persist in bundle +// config.json together with other shim Create annotations. +func (a *RootfsViewAccessor) Prepare(ctx context.Context) (string, error) { if a == nil { - return fmt.Errorf("rootfs view accessor is nil") + return "", fmt.Errorf("rootfs view accessor is nil") } snapshotKey, err := a.resolveCommittedSnapshotBase(ctx, a.snapshotter, a.snapshotKey) if err != nil { - return err + return "", err } viewKey := a.getViewKey() @@ -115,22 +112,15 @@ func (a *RootfsViewAccessor) Prepare(ctx context.Context, bundle string) error { if _, err := a.leases.Create(nsCtx, &leasesapi.CreateRequest{ID: leaseID}); err != nil { err = containerdErr(err) if err != nil && !errdefs.IsAlreadyExists(err) { - return fmt.Errorf("create rootfs view lease %s: %w", leaseID, err) + return "", fmt.Errorf("create rootfs view lease %s: %w", leaseID, err) } } leaseCtx := metadata.AppendToOutgoingContext(nsCtx, "containerd-lease", leaseID) mounts, err := a.createRootfsView(leaseCtx, viewKey, snapshotKey) if err != nil { -<<<<<<< HEAD - if cleanupErr := cleanupRootfsViewLease(ctx, a.namespace, a.leaseID(), a.leases); cleanupErr != nil { - rootfsViewLog.WithError(cleanupErr).Warn("failed to clean up rootfs view lease after prepare failure") - } - return err -======= _ = cleanupRootfsViewLease(ctx, a.namespace, a.getLeaseID(), a.leases) return "", err ->>>>>>> 5f95296 (fixup! feat: add rootfs view accessor and runtime boot binds) } state := &rootfsViewState{ @@ -140,19 +130,18 @@ func (a *RootfsViewAccessor) Prepare(ctx context.Context, bundle string) error { encoded, err := json.Marshal(state) if err != nil { - return fmt.Errorf("marshal rootfs view state: %w", err) + return "", fmt.Errorf("marshal rootfs view state: %w", err) } - if err := unikontainers.PatchBundleRootfsView(bundle, string(encoded)); err != nil { - if cleanupErr := cleanupRootfsView(ctx, a.namespace, a.containerID, a.snapshotter, a.snapshots, a.leases); cleanupErr != nil { - rootfsViewLog.WithError(cleanupErr).Warn("failed to clean up rootfs view after state persistence failure") - return fmt.Errorf("persist rootfs view state: %w (cleanup also failed: %v)", err, cleanupErr) - } - return fmt.Errorf("persist rootfs view state: %w", err) - } - - return nil + return string(encoded), nil } +// Cleanup removes the per-container rootfs view snapshot and lease. +// snapshotter names the devmapper/blockfile plugin that owns the view; callers +// on Delete should pass the value persisted in bundle config.json at Prepare +// time (see ShouldCleanupRootfsView). After inner task Delete, containerd +// container metadata may no longer be loadable, so do not rely on a.snapshotter +// alone on that path. When snapshotter is empty, a.snapshotter from the session +// is used (Create-time rollback while the container record still exists). func (a *RootfsViewAccessor) Cleanup(ctx context.Context, snapshotter string) error { if a == nil { return fmt.Errorf("rootfs view accessor is nil") @@ -160,6 +149,9 @@ func (a *RootfsViewAccessor) Cleanup(ctx context.Context, snapshotter string) er if a.containerID == "" { return fmt.Errorf("container id is empty") } + if snapshotter == "" { + snapshotter = a.snapshotter + } if snapshotter == "" { return fmt.Errorf("snapshotter is empty") } @@ -305,7 +297,6 @@ func cleanupRootfsView( Key: rootfsViewKey(containerID), }) if err = containerdErr(err); err != nil && !errdefs.IsNotFound(err) { - rootfsViewLog.WithError(err).Warn("failed to remove rootfs view from containerd") return err } return cleanupRootfsViewLease(ctx, namespace, rootfsViewLeaseID(containerID), leases) @@ -317,7 +308,6 @@ func cleanupRootfsViewLease(ctx context.Context, namespace, leaseID string, leas } _, err := leases.Delete(withNamespace(ctx, namespace), &leasesapi.DeleteRequest{ID: leaseID}) if err = containerdErr(err); err != nil && !errdefs.IsNotFound(err) { - rootfsViewLog.WithError(err).Warn("failed to remove rootfs view lease from containerd") return err } return nil diff --git a/pkg/containerd-shim/guest_rootfs.go b/pkg/containerd-shim/guest_rootfs.go index f1fb5612f..496ed362d 100644 --- a/pkg/containerd-shim/guest_rootfs.go +++ b/pkg/containerd-shim/guest_rootfs.go @@ -27,42 +27,35 @@ import ( "github.com/urunc-dev/urunc/pkg/unikontainers/types" ) -const annotRootfsParams = "com.urunc.internal.rootfs.params" - var errGuestRootfsChoiceSkipped = errors.New("guest rootfs choice skipped") // chooseGuestRootfs runs the same ChooseRootfs logic as runtime Exec after inner -// task Create (#684) and records the result in annotRootfsParams so Exec knows -// selection already happened. -func chooseGuestRootfs(r *taskAPI.CreateTaskRequest) (types.RootfsParams, error) { +// task Create (#684). The caller persists the JSON-encoded result in bundle +// config.json so Exec can reuse the selection. +func chooseGuestRootfs(r *taskAPI.CreateTaskRequest) (types.RootfsParams, string, error) { configPath := filepath.Join(r.Bundle, "config.json") - info, err := os.Stat(configPath) - if err != nil { - return types.RootfsParams{}, fmt.Errorf("stat config.json: %w", err) - } - data, err := os.ReadFile(configPath) if err != nil { - return types.RootfsParams{}, fmt.Errorf("read config.json: %w", err) + return types.RootfsParams{}, "", fmt.Errorf("read config.json: %w", err) } var spec specs.Spec if err := json.Unmarshal(data, &spec); err != nil { - return types.RootfsParams{}, fmt.Errorf("unmarshal config.json: %w", err) + return types.RootfsParams{}, "", fmt.Errorf("unmarshal config.json: %w", err) } if spec.Root == nil { - return types.RootfsParams{}, fmt.Errorf("invalid OCI spec: root section is required") + return types.RootfsParams{}, "", fmt.Errorf("invalid OCI spec: root section is required") } config, err := unikontainers.GetUnikernelConfig(filepath.Clean(r.Bundle), &spec) if err != nil { - return types.RootfsParams{}, fmt.Errorf("%w: %w", errGuestRootfsChoiceSkipped, err) + return types.RootfsParams{}, "", fmt.Errorf("%w: %w", errGuestRootfsChoiceSkipped, err) } annotations := config.Map() uruncCfg, err := unikontainers.LoadUruncConfig(unikontainers.UruncConfigPath) if err != nil && uruncCfg == nil { - return types.RootfsParams{}, err + return types.RootfsParams{}, "", err } rootfsParams, err := unikontainers.ChooseRootfs( @@ -72,24 +65,12 @@ func chooseGuestRootfs(r *taskAPI.CreateTaskRequest) (types.RootfsParams, error) uruncCfg, ) if err != nil { - return types.RootfsParams{}, err + return types.RootfsParams{}, "", err } encoded, err := json.Marshal(rootfsParams) if err != nil { - return types.RootfsParams{}, err - } - if spec.Annotations == nil { - spec.Annotations = make(map[string]string) - } - spec.Annotations[annotRootfsParams] = string(encoded) - - patched, err := json.MarshalIndent(spec, "", " ") - if err != nil { - return types.RootfsParams{}, fmt.Errorf("marshal config.json: %w", err) - } - if err := os.WriteFile(configPath, patched, info.Mode()); err != nil { - return types.RootfsParams{}, err + return types.RootfsParams{}, "", err } - return rootfsParams, nil + return rootfsParams, string(encoded), nil } diff --git a/pkg/containerd-shim/task_service.go b/pkg/containerd-shim/task_service.go index 3b28a8034..ffcfd65d2 100644 --- a/pkg/containerd-shim/task_service.go +++ b/pkg/containerd-shim/task_service.go @@ -27,6 +27,12 @@ import ( containerdShim "github.com/urunc-dev/urunc/pkg/containerd-shim/containerd" ) +// Internal bundle annotations (duplicated in unikontainers; keep in sync). +const ( + annotRootfsParams = "com.urunc.internal.rootfs.params" + annotRootfsView = "com.urunc.internal.rootfs.view" +) + // taskService is urunc's shim-side wrapper around containerd's runc task // service. It wires urunc task setup before forwarding calls to the wrapped // service. @@ -58,7 +64,7 @@ func (s *taskService) Create(ctx context.Context, r *taskAPI.CreateTaskRequest) return resp, err } - rootfsChoice, err := chooseGuestRootfs(r) + rootfsChoice, rootfsParamsJSON, err := chooseGuestRootfs(r) if err != nil { if errors.Is(err, errGuestRootfsChoiceSkipped) { log.G(ctx).WithError(err).Debug("urunc(shim): guest rootfs choice skipped") @@ -68,17 +74,18 @@ func (s *taskService) Create(ctx context.Context, r *taskAPI.CreateTaskRequest) return nil, err } - log.G(ctx).WithFields(map[string]any{ - "rootfs_type": rootfsChoice.Type, - "rootfs_path": rootfsChoice.Path, - "mon_rootfs": rootfsChoice.MonRootfs, - }).Debug("urunc(shim): guest rootfs chosen") - + rootfsViewJSON := "" + var rootfsViewAccessor *containerdShim.RootfsViewAccessor if session != nil { - rootfsViewAccessor := containerdShim.NewRootfsViewAccessor(session) - if rootfsViewAccessor.ShouldPrepare(rootfsChoice) { - if err := rootfsViewAccessor.Prepare(ctx, r.Bundle); err != nil { + rootfsViewAccessor = containerdShim.NewRootfsViewAccessor(session) + shouldPrepare, shouldPrepareErr := rootfsViewAccessor.ShouldPrepare(rootfsChoice) + if shouldPrepareErr != nil { + log.G(ctx).WithError(shouldPrepareErr).Warn("urunc(shim): failed to load urunc config; rootfs view disabled") + } else if shouldPrepare { + rootfsViewJSON, err = rootfsViewAccessor.Prepare(ctx) + if err != nil { log.G(ctx).WithError(err).Warn("urunc(shim): failed to prepare rootfs view; falling back to legacy boot artifact extraction") + rootfsViewJSON = "" } else { log.G(ctx).Debug("urunc(shim): rootfs view prepared") } @@ -87,6 +94,27 @@ func (s *taskService) Create(ctx context.Context, r *taskAPI.CreateTaskRequest) } } + var shimAnnotations map[string]string + if rootfsViewJSON != "" { + shimAnnotations = map[string]string{ + annotRootfsParams: rootfsParamsJSON, + annotRootfsView: rootfsViewJSON, + } + } else { + shimAnnotations = map[string]string{ + annotRootfsParams: rootfsParamsJSON, + } + } + if err := containerdShim.PatchConfigJSON(r.Bundle, shimAnnotations); err != nil { + if rootfsViewJSON != "" && rootfsViewAccessor != nil { + if cleanupErr := rootfsViewAccessor.Cleanup(ctx, ""); cleanupErr != nil { + log.G(ctx).WithError(cleanupErr).Warn("urunc(shim): failed to clean up rootfs view after annotation persistence failure") + } + } + log.G(ctx).WithError(err).Warn("urunc(shim): failed to persist shim create annotations") + return nil, err + } + return resp, nil } diff --git a/pkg/unikontainers/rootfs_view_boot.go b/pkg/unikontainers/rootfs_view_boot.go index 4b88f4c7c..cad3c63ff 100644 --- a/pkg/unikontainers/rootfs_view_boot.go +++ b/pkg/unikontainers/rootfs_view_boot.go @@ -15,7 +15,6 @@ package unikontainers import ( - "encoding/json" "fmt" "os" "path/filepath" @@ -32,37 +31,6 @@ type rootfsViewState struct { Mounts []mount.Mount `json:"mounts,omitempty"` } -// PatchBundleRootfsView writes shim-prepared view state into bundle config.json. -func PatchBundleRootfsView(bundleDir, rootfsViewJSON string) error { - configPath := filepath.Join(bundleDir, configFilename) - fi, err := os.Stat(configPath) - if err != nil { - return fmt.Errorf("stat config.json: %w", err) - } - - spec, err := loadSpec(bundleDir) - if err != nil { - return fmt.Errorf("load bundle spec: %w", err) - } - if spec.Annotations == nil { - spec.Annotations = make(map[string]string) - } - if rootfsViewJSON == "" { - delete(spec.Annotations, rootfsViewAnnotation) - } else { - spec.Annotations[rootfsViewAnnotation] = rootfsViewJSON - } - - patched, err := json.MarshalIndent(spec, "", " ") - if err != nil { - return fmt.Errorf("marshal config.json: %w", err) - } - if err := os.WriteFile(configPath, patched, fi.Mode()); err != nil { - return fmt.Errorf("write config.json: %w", err) - } - return nil -} - func ReadBundleRootfsView(bundleDir string) (string, error) { configPath := filepath.Join(bundleDir, configFilename) if _, err := os.Stat(configPath); err != nil { From 7981cf27cd32e75e48496627420d4be396ff2fa1 Mon Sep 17 00:00:00 2001 From: sidneychang <2190206983@qq.com> Date: Sat, 30 May 2026 20:28:24 +0800 Subject: [PATCH 6/6] refactor(rootfs-view): persist view state in bundle rootfs-view.json Pass RootfsViewState between shim and runtime via rootfs-view.json instead of the com.urunc.internal.rootfs.view annotation. Share the struct in pkg/unikontainers/types and return errors from prepareRootfsViewBootBinds after prepareRoot, where legacy boot file extraction is no longer possible. Signed-off-by: sidneychang <2190206983@qq.com> --- pkg/containerd-shim/containerd/rootfs_view.go | 149 ++++++------------ pkg/containerd-shim/task_service.go | 47 +++--- pkg/unikontainers/block.go | 19 +-- pkg/unikontainers/rootfs_view_boot.go | 132 ++++++++-------- pkg/unikontainers/types/types.go | 11 +- pkg/unikontainers/unikontainers.go | 13 +- pkg/unikontainers/utils.go | 7 +- 7 files changed, 155 insertions(+), 223 deletions(-) diff --git a/pkg/containerd-shim/containerd/rootfs_view.go b/pkg/containerd-shim/containerd/rootfs_view.go index 37b217484..a43648b39 100644 --- a/pkg/containerd-shim/containerd/rootfs_view.go +++ b/pkg/containerd-shim/containerd/rootfs_view.go @@ -16,8 +16,6 @@ package containerd import ( "context" - "encoding/json" - "errors" "fmt" leasesapi "github.com/containerd/containerd/api/services/leases/v1" @@ -33,16 +31,8 @@ import ( const ( rootfsViewKeyPrefix = "urunc-rootfs-view-" rootfsViewLeasePrefix = "urunc-rootfs-view-lease-" - rootfsViewAnnotation = "com.urunc.internal.rootfs.view" ) -var ErrRootfsViewNotPrepared = errors.New("rootfs view not prepared") - -type rootfsViewState struct { - Snapshotter string `json:"snapshotter"` - Mounts []mount.Mount `json:"mounts,omitempty"` -} - type RootfsViewAccessor struct { namespace string containerID string @@ -52,14 +42,14 @@ type RootfsViewAccessor struct { leases leasesapi.LeasesClient } -func NewRootfsViewAccessor(s *Session) *RootfsViewAccessor { +func NewRootfsViewAccessor(session *Session) *RootfsViewAccessor { a := &RootfsViewAccessor{ - namespace: s.namespace, - containerID: s.containerID, - snapshots: s.snapshotsClient(), - leases: s.leasesClient(), + namespace: session.GetNamespace(), + containerID: session.GetContainerID(), + snapshots: session.snapshotsClient(), + leases: session.leasesClient(), } - ctr := s.GetContainer() + ctr := session.GetContainer() if ctr != nil && ctr.GetSnapshotKey() != "" { a.snapshotter = ctr.GetSnapshotter() a.snapshotKey = ctr.GetSnapshotKey() @@ -67,14 +57,6 @@ func NewRootfsViewAccessor(s *Session) *RootfsViewAccessor { return a } -func (a *RootfsViewAccessor) getViewKey() string { - return rootfsViewKeyPrefix + a.containerID -} - -func (a *RootfsViewAccessor) getLeaseID() string { - return rootfsViewLeasePrefix + a.containerID -} - func (a *RootfsViewAccessor) ShouldPrepare(rootfs types.RootfsParams) (bool, error) { if a == nil || a.snapshotter == "" || @@ -93,55 +75,50 @@ func (a *RootfsViewAccessor) ShouldPrepare(rootfs types.RootfsParams) (bool, err } // Prepare records a read-only view of the committed rootfs snapshot for runtime use. -// On success it returns JSON-encoded view state for the caller to persist in bundle -// config.json together with other shim Create annotations. -func (a *RootfsViewAccessor) Prepare(ctx context.Context) (string, error) { +// On success it returns view state for the caller to persist in bundle rootfs-view.json. +func (a *RootfsViewAccessor) Prepare(ctx context.Context) (types.RootfsViewState, error) { if a == nil { - return "", fmt.Errorf("rootfs view accessor is nil") + return types.RootfsViewState{}, fmt.Errorf("rootfs view accessor is nil") } snapshotKey, err := a.resolveCommittedSnapshotBase(ctx, a.snapshotter, a.snapshotKey) if err != nil { - return "", err + return types.RootfsViewState{}, err } - viewKey := a.getViewKey() - leaseID := a.getLeaseID() + viewKey := rootfsViewKeyPrefix + a.containerID + leaseID := rootfsViewLeasePrefix + a.containerID nsCtx := withNamespace(ctx, a.namespace) if _, err := a.leases.Create(nsCtx, &leasesapi.CreateRequest{ID: leaseID}); err != nil { err = containerdErr(err) if err != nil && !errdefs.IsAlreadyExists(err) { - return "", fmt.Errorf("create rootfs view lease %s: %w", leaseID, err) + return types.RootfsViewState{}, fmt.Errorf("create rootfs view lease %s: %w", leaseID, err) } } leaseCtx := metadata.AppendToOutgoingContext(nsCtx, "containerd-lease", leaseID) mounts, err := a.createRootfsView(leaseCtx, viewKey, snapshotKey) if err != nil { - _ = cleanupRootfsViewLease(ctx, a.namespace, a.getLeaseID(), a.leases) - return "", err + _ = deleteRootfsViewLease(ctx, a.namespace, leaseID, a.leases) + return types.RootfsViewState{}, err } - state := &rootfsViewState{ + return types.RootfsViewState{ Snapshotter: a.snapshotter, Mounts: mounts, - } - - encoded, err := json.Marshal(state) - if err != nil { - return "", fmt.Errorf("marshal rootfs view state: %w", err) - } - return string(encoded), nil + }, nil } -// Cleanup removes the per-container rootfs view snapshot and lease. -// snapshotter names the devmapper/blockfile plugin that owns the view; callers -// on Delete should pass the value persisted in bundle config.json at Prepare -// time (see ShouldCleanupRootfsView). After inner task Delete, containerd -// container metadata may no longer be loadable, so do not rely on a.snapshotter -// alone on that path. When snapshotter is empty, a.snapshotter from the session -// is used (Create-time rollback while the container record still exists). +// Rootfs view cleanup (call chain): +// +// Delete / Stop: ShouldCleanupRootfsView(bundle) → Cleanup(ctx, snapshotter from bundle) +// Create rollback: Cleanup(ctx, "") — snapshotter comes from container metadata on the accessor +// +// Cleanup → removeRootfsViewSnapshotAndLease (view snapshot + lease in containerd) +// Prepare failure after lease create → deleteRootfsViewLease (lease only) + +// Cleanup removes the per-container rootfs view snapshot and its containerd lease. func (a *RootfsViewAccessor) Cleanup(ctx context.Context, snapshotter string) error { if a == nil { return fmt.Errorf("rootfs view accessor is nil") @@ -149,27 +126,18 @@ func (a *RootfsViewAccessor) Cleanup(ctx context.Context, snapshotter string) er if a.containerID == "" { return fmt.Errorf("container id is empty") } - if snapshotter == "" { - snapshotter = a.snapshotter - } - if snapshotter == "" { - return fmt.Errorf("snapshotter is empty") - } - return cleanupRootfsView(ctx, a.namespace, a.containerID, snapshotter, a.snapshots, a.leases) -} -func (a *RootfsViewAccessor) CleanupFromBundle(ctx context.Context, bundle string) error { - if a == nil { - return fmt.Errorf("rootfs view accessor is nil") + effectiveSnapshotter := snapshotter + if effectiveSnapshotter == "" { + effectiveSnapshotter = a.snapshotter } - snapshotter, err := GetSnapshotterFromBundle(bundle) - if err != nil { - if errors.Is(err, ErrRootfsViewNotPrepared) { - return nil - } - return err + if effectiveSnapshotter == "" { + return fmt.Errorf("snapshotter name required for rootfs view cleanup") } - return a.Cleanup(ctx, snapshotter) + + return removeRootfsViewSnapshotAndLease( + ctx, a.namespace, a.containerID, effectiveSnapshotter, a.snapshots, a.leases, + ) } func (a *RootfsViewAccessor) statSnapshot(ctx context.Context, snapshotter, key string) (parent string, committed bool, err error) { @@ -253,36 +221,20 @@ func protoMountsToMounts(mm []*cntrtypes.Mount) []mount.Mount { return out } -func GetSnapshotterFromBundle(bundle string) (string, error) { - raw, err := unikontainers.ReadBundleRootfsView(bundle) - if err != nil { - return "", err - } - if raw == "" { - return "", ErrRootfsViewNotPrepared - } - var state rootfsViewState - if err := json.Unmarshal([]byte(raw), &state); err != nil { - return "", fmt.Errorf("unmarshal rootfs view state %s: %w", rootfsViewAnnotation, err) - } - if state.Snapshotter == "" { - return "", ErrRootfsViewNotPrepared - } - return state.Snapshotter, nil -} - +// ShouldCleanupRootfsView reports whether bundle rootfs-view.json exists and returns its snapshotter. func ShouldCleanupRootfsView(bundle string) (bool, string, error) { - snapshotter, err := GetSnapshotterFromBundle(bundle) + state, err := unikontainers.LoadBundleRootfsView(bundle) if err != nil { - if errors.Is(err, ErrRootfsViewNotPrepared) { - return false, "", nil - } return false, "", err } - return true, snapshotter, nil + if state == nil || state.Snapshotter == "" { + return false, "", nil + } + return true, state.Snapshotter, nil } -func cleanupRootfsView( +// removeRootfsViewSnapshotAndLease deletes the view snapshot and its lease in containerd. +func removeRootfsViewSnapshotAndLease( ctx context.Context, namespace, containerID, snapshotter string, snapshots snapshotsapi.SnapshotsClient, @@ -294,15 +246,16 @@ func cleanupRootfsView( nsCtx := withNamespace(ctx, namespace) _, err := snapshots.Remove(nsCtx, &snapshotsapi.RemoveSnapshotRequest{ Snapshotter: snapshotter, - Key: rootfsViewKey(containerID), + Key: rootfsViewKeyPrefix + containerID, }) if err = containerdErr(err); err != nil && !errdefs.IsNotFound(err) { return err } - return cleanupRootfsViewLease(ctx, namespace, rootfsViewLeaseID(containerID), leases) + return deleteRootfsViewLease(ctx, namespace, rootfsViewLeasePrefix+containerID, leases) } -func cleanupRootfsViewLease(ctx context.Context, namespace, leaseID string, leases leasesapi.LeasesClient) error { +// deleteRootfsViewLease removes only the containerd lease (Prepare rollback after lease create). +func deleteRootfsViewLease(ctx context.Context, namespace, leaseID string, leases leasesapi.LeasesClient) error { if leaseID == "" { return nil } @@ -312,11 +265,3 @@ func cleanupRootfsViewLease(ctx context.Context, namespace, leaseID string, leas } return nil } - -func rootfsViewKey(containerID string) string { - return rootfsViewKeyPrefix + containerID -} - -func rootfsViewLeaseID(containerID string) string { - return rootfsViewLeasePrefix + containerID -} diff --git a/pkg/containerd-shim/task_service.go b/pkg/containerd-shim/task_service.go index ffcfd65d2..698719013 100644 --- a/pkg/containerd-shim/task_service.go +++ b/pkg/containerd-shim/task_service.go @@ -25,13 +25,12 @@ import ( "github.com/containerd/log" "github.com/containerd/ttrpc" containerdShim "github.com/urunc-dev/urunc/pkg/containerd-shim/containerd" + "github.com/urunc-dev/urunc/pkg/unikontainers" + "github.com/urunc-dev/urunc/pkg/unikontainers/types" ) -// Internal bundle annotations (duplicated in unikontainers; keep in sync). -const ( - annotRootfsParams = "com.urunc.internal.rootfs.params" - annotRootfsView = "com.urunc.internal.rootfs.view" -) +// Internal bundle annotation (duplicated in unikontainers; keep in sync). +const annotRootfsParams = "com.urunc.internal.rootfs.params" // taskService is urunc's shim-side wrapper around containerd's runc task // service. It wires urunc task setup before forwarding calls to the wrapped @@ -74,7 +73,8 @@ func (s *taskService) Create(ctx context.Context, r *taskAPI.CreateTaskRequest) return nil, err } - rootfsViewJSON := "" + rootfsViewPrepared := false + var rootfsViewState types.RootfsViewState var rootfsViewAccessor *containerdShim.RootfsViewAccessor if session != nil { rootfsViewAccessor = containerdShim.NewRootfsViewAccessor(session) @@ -82,11 +82,11 @@ func (s *taskService) Create(ctx context.Context, r *taskAPI.CreateTaskRequest) if shouldPrepareErr != nil { log.G(ctx).WithError(shouldPrepareErr).Warn("urunc(shim): failed to load urunc config; rootfs view disabled") } else if shouldPrepare { - rootfsViewJSON, err = rootfsViewAccessor.Prepare(ctx) + rootfsViewState, err = rootfsViewAccessor.Prepare(ctx) if err != nil { log.G(ctx).WithError(err).Warn("urunc(shim): failed to prepare rootfs view; falling back to legacy boot artifact extraction") - rootfsViewJSON = "" } else { + rootfsViewPrepared = true log.G(ctx).Debug("urunc(shim): rootfs view prepared") } } else { @@ -94,27 +94,30 @@ func (s *taskService) Create(ctx context.Context, r *taskAPI.CreateTaskRequest) } } - var shimAnnotations map[string]string - if rootfsViewJSON != "" { - shimAnnotations = map[string]string{ - annotRootfsParams: rootfsParamsJSON, - annotRootfsView: rootfsViewJSON, - } - } else { - shimAnnotations = map[string]string{ - annotRootfsParams: rootfsParamsJSON, - } - } - if err := containerdShim.PatchConfigJSON(r.Bundle, shimAnnotations); err != nil { - if rootfsViewJSON != "" && rootfsViewAccessor != nil { + if err := containerdShim.PatchConfigJSON(r.Bundle, map[string]string{ + annotRootfsParams: rootfsParamsJSON, + }); err != nil { + if rootfsViewPrepared && rootfsViewAccessor != nil { if cleanupErr := rootfsViewAccessor.Cleanup(ctx, ""); cleanupErr != nil { - log.G(ctx).WithError(cleanupErr).Warn("urunc(shim): failed to clean up rootfs view after annotation persistence failure") + log.G(ctx).WithError(cleanupErr).Warn("urunc(shim): failed to clean up rootfs view after rootfs params persistence failure") } } log.G(ctx).WithError(err).Warn("urunc(shim): failed to persist shim create annotations") return nil, err } + if rootfsViewPrepared { + if err := unikontainers.WriteBundleRootfsView(r.Bundle, rootfsViewState); err != nil { + if rootfsViewAccessor != nil { + if cleanupErr := rootfsViewAccessor.Cleanup(ctx, ""); cleanupErr != nil { + log.G(ctx).WithError(cleanupErr).Warn("urunc(shim): failed to clean up rootfs view after state persistence failure") + } + } + log.G(ctx).WithError(err).Warn("urunc(shim): failed to persist rootfs view state") + return nil, err + } + } + return resp, nil } diff --git a/pkg/unikontainers/block.go b/pkg/unikontainers/block.go index a45f1f90a..c429d0aa5 100644 --- a/pkg/unikontainers/block.go +++ b/pkg/unikontainers/block.go @@ -45,7 +45,7 @@ type blockRootfs struct { uruncJSONPath string guestType string guest types.Unikernel - rootfsViewState *rootfsViewState + rootfsViewState *types.RootfsViewState } // getMountInfo determines whether the provided path is a mount point @@ -260,23 +260,6 @@ func (b blockRootfs) preSetup() error { return nil } -// rebindRootfsViewBootAfterPrepareRoot binds boot artifacts into the rootfs -// tree that qemu sees after chroot. -func (b blockRootfs) rebindRootfsViewBootAfterPrepareRoot() error { - if b.rootfsViewState == nil { - return nil - } - - useView, err := prepareRootfsViewBootBinds(b.rootfsViewState, b.monRootfs, b.kernelPath, b.initrdPath, b.uruncJSONPath) - if err != nil { - return err - } - if !useView { - return fmt.Errorf("rootfs view boot artifact bind failed after prepareRoot (container rootfs already unmounted)") - } - return nil -} - func (b blockRootfs) postSetup() error { if b.mountedPath != "" { err := setupDev(b.monRootfs, b.path) diff --git a/pkg/unikontainers/rootfs_view_boot.go b/pkg/unikontainers/rootfs_view_boot.go index cad3c63ff..bd9fbe768 100644 --- a/pkg/unikontainers/rootfs_view_boot.go +++ b/pkg/unikontainers/rootfs_view_boot.go @@ -15,39 +15,48 @@ package unikontainers import ( + "encoding/json" "fmt" "os" "path/filepath" "strings" "github.com/containerd/containerd/mount" + "github.com/urunc-dev/urunc/pkg/unikontainers/types" "golang.org/x/sys/unix" ) -const rootfsViewAnnotation = "com.urunc.internal.rootfs.view" - -type rootfsViewState struct { - Snapshotter string `json:"snapshotter"` - Mounts []mount.Mount `json:"mounts,omitempty"` +// WriteBundleRootfsView persists shim-prepared view state in the bundle. +func WriteBundleRootfsView(bundleDir string, state types.RootfsViewState) error { + bundleDir = filepath.Clean(bundleDir) + data, err := json.Marshal(state) + if err != nil { + return fmt.Errorf("marshal %s: %w", rootfsViewFilename, err) + } + path := filepath.Join(bundleDir, rootfsViewFilename) + if err := os.WriteFile(path, data, 0o644); err != nil { //nolint:gosec // bundle metadata, same as state.json + return fmt.Errorf("write %s: %w", path, err) + } + return nil } -func ReadBundleRootfsView(bundleDir string) (string, error) { - configPath := filepath.Join(bundleDir, configFilename) - if _, err := os.Stat(configPath); err != nil { +// LoadBundleRootfsView reads view state written by the shim at task Create. +// Returns (nil, nil) when the file is absent. +func LoadBundleRootfsView(bundleDir string) (*types.RootfsViewState, error) { + bundleDir = filepath.Clean(bundleDir) + path := filepath.Join(bundleDir, rootfsViewFilename) + data, err := os.ReadFile(path) + if err != nil { if os.IsNotExist(err) { - return "", nil + return nil, nil } - return "", fmt.Errorf("stat config.json: %w", err) + return nil, fmt.Errorf("read %s: %w", path, err) } - - spec, err := loadSpec(bundleDir) - if err != nil { - return "", err + var state types.RootfsViewState + if err := json.Unmarshal(data, &state); err != nil { + return nil, fmt.Errorf("unmarshal %s: %w", path, err) } - if spec.Annotations == nil { - return "", nil - } - return spec.Annotations[rootfsViewAnnotation], nil + return &state, nil } func rootfsViewBootArtifactBindPaths(viewRoot, monRootfs, unikernelPath, initrdPath, uruncJSON string) []struct{ src, target string } { @@ -66,20 +75,6 @@ func rootfsViewBootArtifactBindPaths(viewRoot, monRootfs, unikernelPath, initrdP return files } -func bindBootArtifactsFromView(viewRoot, monRootfs, unikernelPath, initrdPath, uruncJSON string, bindTargets *[]string) error { - for _, f := range rootfsViewBootArtifactBindPaths(viewRoot, monRootfs, unikernelPath, initrdPath, uruncJSON) { - dstPath := f.target - dstDir := filepath.Dir(dstPath) - if err := bindMountFile(f.src, dstDir, dstPath, 0, unix.MS_BIND, false); err != nil { - rollbackRootfsViewBinds(*bindTargets) - *bindTargets = nil - return fmt.Errorf("bind view %s -> %s: %w", f.src, f.target, err) - } - *bindTargets = append(*bindTargets, dstPath) - } - return nil -} - func rollbackRootfsViewBinds(targets []string) { for i := len(targets) - 1; i >= 0; i-- { if err := unmountRootfsViewBind(targets[i]); err != nil { @@ -88,10 +83,10 @@ func rollbackRootfsViewBinds(targets []string) { } } -// probeRootfsViewBootArtifacts keeps the legacy extract fallback available: -// preSetup still has mountedPath, but does not keep boot bind mounts. -func probeRootfsViewBootArtifacts(rootfsViewState *rootfsViewState, unikernelPath, initrdPath, uruncJSON string) (useView bool, err error) { - if rootfsViewState == nil { +// probeRootfsViewBootArtifacts checks that boot artifacts can be bind-mounted +// from the view. preSetup still has mountedPath; binds are rolled back immediately. +func probeRootfsViewBootArtifacts(view *types.RootfsViewState, unikernelPath, initrdPath, uruncJSON string) (useView bool, err error) { + if view == nil { return false, nil } @@ -101,38 +96,40 @@ func probeRootfsViewBootArtifacts(rootfsViewState *rootfsViewState, unikernelPat } defer os.RemoveAll(mountpoint) - if err := mount.All(rootfsViewState.Mounts, mountpoint); err != nil { + if err := mount.All(view.Mounts, mountpoint); err != nil { uniklog.WithError(err).Warn("rootfs view unavailable; falling back to legacy boot file extraction") return false, nil } - var probeErr error - for _, f := range rootfsViewBootArtifactBindPaths(mountpoint, "", unikernelPath, initrdPath, uruncJSON) { - if _, err := os.Stat(f.src); err != nil { - probeErr = fmt.Errorf("stat rootfs view boot artifact %s: %w", f.src, err) - break + probeRoot, err := os.MkdirTemp("", "urunc-rootfs-view-probe-") + if err != nil { + return false, fmt.Errorf("create temporary rootfs view probe mountpoint: %w", err) + } + defer os.RemoveAll(probeRoot) + + var bindTargets []string + defer rollbackRootfsViewBinds(bindTargets) + + for _, f := range rootfsViewBootArtifactBindPaths(mountpoint, probeRoot, unikernelPath, initrdPath, uruncJSON) { + dstPath := f.target + if err := bindMountFile(f.src, filepath.Dir(dstPath), dstPath, 0, unix.MS_BIND, false); err != nil { + return false, fmt.Errorf("bind view %s -> %s: %w", f.src, f.target, err) } + bindTargets = append(bindTargets, dstPath) } if uerr := mount.Unmount(mountpoint, 0); uerr != nil && !os.IsNotExist(uerr) && uerr != unix.EINVAL { - if probeErr != nil { - uniklog.WithError(probeErr).Warn("rootfs view boot artifact probe failed") - } return false, fmt.Errorf("unmount temporary rootfs view mountpoint: %w", uerr) } - if probeErr != nil { - uniklog.WithError(probeErr).Warn("rootfs view unavailable; falling back to legacy boot file extraction") - return false, nil - } return true, nil } // prepareRootfsViewBootBinds runs after prepareRoot, so the binds live in the // monitor mount namespace and are released with it. -func prepareRootfsViewBootBinds(rootfsViewState *rootfsViewState, monRootfs, unikernelPath, initrdPath, uruncJSON string) (useView bool, err error) { - if rootfsViewState == nil { - return false, nil +func prepareRootfsViewBootBinds(view *types.RootfsViewState, monRootfs, unikernelPath, initrdPath, uruncJSON string) error { + if view == nil { + return nil } var bindTargets []string @@ -145,36 +142,33 @@ func prepareRootfsViewBootBinds(rootfsViewState *rootfsViewState, monRootfs, uni mountpoint, err := os.MkdirTemp("", "urunc-rootfs-view-") if err != nil { - return false, fmt.Errorf("create temporary rootfs view mountpoint: %w", err) + return fmt.Errorf("create temporary rootfs view mountpoint: %w", err) } defer os.RemoveAll(mountpoint) - if err := mount.All(rootfsViewState.Mounts, mountpoint); err != nil { - uniklog.WithError(err).Warn("rootfs view unavailable; falling back to legacy boot file extraction") - return false, nil + if err := mount.All(view.Mounts, mountpoint); err != nil { + return fmt.Errorf("mount rootfs view: %w", err) } - bindErr := bindBootArtifactsFromView(mountpoint, monRootfs, unikernelPath, initrdPath, uruncJSON, &bindTargets) - - uerr := mount.Unmount(mountpoint, 0) - if uerr != nil && !os.IsNotExist(uerr) && uerr != unix.EINVAL { - if bindErr == nil { - bindErr = uerr - } else { - uniklog.WithError(uerr).WithField("path", mountpoint).Warn("failed to unmount temporary rootfs view mount") + for _, f := range rootfsViewBootArtifactBindPaths(mountpoint, monRootfs, unikernelPath, initrdPath, uruncJSON) { + dstPath := f.target + if err := bindMountFile(f.src, filepath.Dir(dstPath), dstPath, 0, unix.MS_BIND, false); err != nil { + return fmt.Errorf("bind view %s -> %s: %w", f.src, f.target, err) } + bindTargets = append(bindTargets, dstPath) } - if bindErr != nil { + bindErr := mount.Unmount(mountpoint, 0) + + if bindErr != nil && !os.IsNotExist(bindErr) && bindErr != unix.EINVAL { if len(bindTargets) > 0 { - return false, fmt.Errorf("rootfs view boot artifact bind completed but cleanup failed: %w", bindErr) + return fmt.Errorf("rootfs view boot artifact bind completed but cleanup failed: %w", bindErr) } - uniklog.WithError(bindErr).Warn("rootfs view unavailable; falling back to legacy boot file extraction") - return false, nil + return fmt.Errorf("unmount temporary rootfs view mountpoint: %w", bindErr) } keepBinds = true - return true, nil + return nil } func unmountRootfsViewBind(target string) error { diff --git a/pkg/unikontainers/types/types.go b/pkg/unikontainers/types/types.go index c6388e2cc..6c796dbb7 100644 --- a/pkg/unikontainers/types/types.go +++ b/pkg/unikontainers/types/types.go @@ -15,7 +15,10 @@ //revive:disable:var-naming package types -import "golang.org/x/sys/unix" +import ( + "github.com/containerd/containerd/mount" + "golang.org/x/sys/unix" +) type Unikernel interface { Init(UnikernelParams) error @@ -72,6 +75,12 @@ type RootfsParams struct { MonRootfs string // The rootfs for the monitor process } +// RootfsViewState is passed from shim to runtime via bundle rootfs-view.json. +type RootfsViewState struct { + Snapshotter string `json:"snapshotter"` + Mounts []mount.Mount `json:"mounts,omitempty"` +} + // Specific to Linux type ProcessConfig struct { UID uint32 // The uid of the process inside the guest diff --git a/pkg/unikontainers/unikontainers.go b/pkg/unikontainers/unikontainers.go index 7b0a52fe3..c3c3bf942 100644 --- a/pkg/unikontainers/unikontainers.go +++ b/pkg/unikontainers/unikontainers.go @@ -471,12 +471,9 @@ func (u *Unikontainer) Exec(metrics m.Writer) error { var rfsBuilder rootfsBuilder switch rootfsParams.Type { case "block": - var view *rootfsViewState - if rootfsViewJSON := u.Spec.Annotations[rootfsViewAnnotation]; rootfsViewJSON != "" { - view = &rootfsViewState{} - if err := json.Unmarshal([]byte(rootfsViewJSON), view); err != nil { - return fmt.Errorf("could not decode guest rootfs view: %w", err) - } + view, err := LoadBundleRootfsView(bundleDir) + if err != nil { + return fmt.Errorf("could not load guest rootfs view: %w", err) } rfsBuilder = blockRootfs{ mounts: u.Spec.Mounts, @@ -535,8 +532,8 @@ func (u *Unikontainer) Exec(metrics m.Writer) error { return err } - if b, ok := rfsBuilder.(blockRootfs); ok { - if err := b.rebindRootfsViewBootAfterPrepareRoot(); err != nil { + if b, ok := rfsBuilder.(blockRootfs); ok && b.rootfsViewState != nil { + if err := prepareRootfsViewBootBinds(b.rootfsViewState, b.monRootfs, b.kernelPath, b.initrdPath, b.uruncJSONPath); err != nil { return fmt.Errorf("boot artifact setup after prepareRoot failed: %w", err) } } diff --git a/pkg/unikontainers/utils.go b/pkg/unikontainers/utils.go index c53c0fc05..668aab722 100644 --- a/pkg/unikontainers/utils.go +++ b/pkg/unikontainers/utils.go @@ -35,9 +35,10 @@ import ( ) const ( - configFilename = "config.json" - stateFilename = "state.json" - initPidFilename = "init.pid" + configFilename = "config.json" + stateFilename = "state.json" + rootfsViewFilename = "rootfs-view.json" + initPidFilename = "init.pid" uruncJSONFilename = "urunc.json" rootfsDirName = "rootfs" )