Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions cmd/containerd-shim-urunc-v2/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,11 @@ package main
import (
"context"

"github.com/containerd/containerd/runtime/v2/runc/manager"
"github.com/containerd/containerd/runtime/v2/shim"
_ "github.com/urunc-dev/urunc/pkg/containerd-shim"
containerdshim "github.com/urunc-dev/urunc/pkg/containerd-shim"
)

func main() {
shim.RunManager(context.Background(), manager.NewShimManager("io.containerd.urunc.v2"))
shim.RunManager(context.Background(), containerdshim.NewShimManager("io.containerd.urunc.v2"))
}
3 changes: 3 additions & 0 deletions deployment/urunc-deploy/config.toml
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,9 @@ syslog = false
[timestamps]
enabled = false

[rootfs_view]
enabled = false

[monitors.qemu]
default_memory_mb = 256
default_vcpus = 1
Expand Down
37 changes: 37 additions & 0 deletions docs/configuration.md
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,9 @@ default_vcpus = 1
[extra_binaries.virtiofsd]
path = "/usr/libexec/virtiofsd"
options = "--sandbox none"

[rootfs_view]
enabled = false
```

## Configuration Sections
Expand Down Expand Up @@ -89,6 +92,34 @@ destination = "/tmp/urunc-timestamps.log"

When enabled, `urunc` will log performance timestamps to help with debugging and optimization.

### Rootfs View Configuration

The `[rootfs_view]` section controls whether the urunc shim prepares a
per-container containerd rootfs view at task Create (for `devmapper` /
`blockfile` snapshotters).

| Option | Type | Default | Description |
|--------|------|---------|-------------|
| `enabled` | boolean | `false` | Prepare rootfs views for container block rootfs after shim task Create |

When `enabled = true`, the shim first lets the wrapped task service create the
task so the bundle rootfs is mounted. It then runs `ChooseRootfs` and prepares a
view only if **all** of the following hold:

1. The container snapshotter is block-based (`devmapper` or `blockfile`).
2. Shim `ChooseRootfs` selected **container block rootfs** (`type=block` with a
non-empty `MountedPath`).

This matches the block-rootfs boot-artifact path: kernel/initrd are read from a
read-only view instead of being copied out of the container rootfs before attach.

**Example:**

```toml
[rootfs_view]
enabled = true
```

### Monitor Configuration

The `[monitors]` section allows you to configure default settings for different
Expand Down Expand Up @@ -201,6 +232,9 @@ To create a configuration file, you can:
[monitors.spt]
default_memory_mb = 256
default_vcpus = 1

[rootfs_view]
enabled = false
EOF
```

Expand Down Expand Up @@ -244,6 +278,9 @@ default_vcpus = 1
default_memory_mb = 256
default_vcpus = 1
# path is not set by default - urunc will search in PATH

[rootfs_view]
enabled = false
```

## Notes
Expand Down
10 changes: 10 additions & 0 deletions docs/package/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,16 @@ Except of the above, `urunc` accepts the following optional annotations:
requests from `urunc` to mount the container's image rootfs in the unikernel
(either as a block device or through shared-fs).

Per-container rootfs views are controlled by `[rootfs_view] enabled` in
`/etc/urunc/config.toml`. See
[configuration](../configuration.md#rootfs-view-configuration). When enabled,
the container must also use `com.urunc.unikernel.mountRootfs=true` (typically
from image annotations merged into `config.json` before shim task Create).
Supported snapshotters include `devmapper` and `blockfile`. After the wrapped
task service creates the task and mounts the bundle rootfs, the shim runs
`ChooseRootfs` and prepares a view only when that selection is container block
rootfs.

Due to the fact that [Docker](https://www.docker.com/) and some high-level
container runtimes do not pass the image annotations to the underlying container
runtime, `urunc` can also read the above information from a file inside the
Expand Down
8 changes: 4 additions & 4 deletions pkg/containerd-shim/containerd/annotations.go
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ func InjectUruncAnnotations(ctx context.Context, session *Session, bundlePath st
return nil
}

return patchConfigJSON(bundlePath, annotations)
return PatchConfigJSON(bundlePath, annotations)
}

func (f *annotationFetcher) fetchUruncAnnotations(ctx context.Context) (map[string]string, error) {
Expand Down Expand Up @@ -152,12 +152,12 @@ func readBlob(ctx context.Context, namespace string, contentClient contentapi.Co
return raw, nil
}

// patchConfigJSON injects missing annotations into the OCI runtime spec
// stored in the bundle's config.json.
// PatchConfigJSON injects missing annotations into the OCI runtime spec stored in
// the bundle's config.json.
//
// Existing annotations in config.json are preserved. Only annotation keys that
// are not already present in the runtime spec are added.
func patchConfigJSON(bundlePath string, annotations map[string]string) error {
func PatchConfigJSON(bundlePath string, annotations map[string]string) error {
configPath := filepath.Join(bundlePath, "config.json")

fi, err := os.Stat(configPath)
Expand Down
267 changes: 267 additions & 0 deletions pkg/containerd-shim/containerd/rootfs_view.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,267 @@
// Copyright (c) 2023-2026, Nubificus LTD
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package containerd

import (
"context"
"fmt"

leasesapi "github.com/containerd/containerd/api/services/leases/v1"
snapshotsapi "github.com/containerd/containerd/api/services/snapshots/v1"
cntrtypes "github.com/containerd/containerd/api/types"
"github.com/containerd/containerd/errdefs"
"github.com/containerd/containerd/mount"
"github.com/urunc-dev/urunc/pkg/unikontainers"
"github.com/urunc-dev/urunc/pkg/unikontainers/types"
"google.golang.org/grpc/metadata"
)

const (
rootfsViewKeyPrefix = "urunc-rootfs-view-"
rootfsViewLeasePrefix = "urunc-rootfs-view-lease-"
)

type RootfsViewAccessor struct {
namespace string
containerID string
snapshotter string
snapshotKey string
snapshots snapshotsapi.SnapshotsClient
leases leasesapi.LeasesClient
}

func NewRootfsViewAccessor(session *Session) *RootfsViewAccessor {
a := &RootfsViewAccessor{
namespace: session.GetNamespace(),
containerID: session.GetContainerID(),
snapshots: session.snapshotsClient(),
leases: session.leasesClient(),
}
ctr := session.GetContainer()
if ctr != nil && ctr.GetSnapshotKey() != "" {
a.snapshotter = ctr.GetSnapshotter()
a.snapshotKey = ctr.GetSnapshotKey()
}
return a
}

func (a *RootfsViewAccessor) ShouldPrepare(rootfs types.RootfsParams) (bool, error) {
if a == nil ||
a.snapshotter == "" ||
a.snapshotKey == "" ||
(a.snapshotter != "devmapper" && a.snapshotter != "blockfile") ||
rootfs.Type != "block" ||
rootfs.MountedPath == "" {
return false, nil
}

uruncCfg, cfgErr := unikontainers.LoadUruncConfig(unikontainers.UruncConfigPath)
if cfgErr != nil {
return false, cfgErr
}
return uruncCfg.RootfsView.Enabled, nil
}

// Prepare records a read-only view of the committed rootfs snapshot for runtime use.
// On success it returns view state for the caller to persist in bundle rootfs-view.json.
func (a *RootfsViewAccessor) Prepare(ctx context.Context) (types.RootfsViewState, error) {
if a == nil {
return types.RootfsViewState{}, fmt.Errorf("rootfs view accessor is nil")
}

snapshotKey, err := a.resolveCommittedSnapshotBase(ctx, a.snapshotter, a.snapshotKey)
if err != nil {
return types.RootfsViewState{}, err
}

viewKey := rootfsViewKeyPrefix + a.containerID
leaseID := rootfsViewLeasePrefix + a.containerID

nsCtx := withNamespace(ctx, a.namespace)
if _, err := a.leases.Create(nsCtx, &leasesapi.CreateRequest{ID: leaseID}); err != nil {
err = containerdErr(err)
if err != nil && !errdefs.IsAlreadyExists(err) {
return types.RootfsViewState{}, fmt.Errorf("create rootfs view lease %s: %w", leaseID, err)
}
}

leaseCtx := metadata.AppendToOutgoingContext(nsCtx, "containerd-lease", leaseID)
mounts, err := a.createRootfsView(leaseCtx, viewKey, snapshotKey)
if err != nil {
_ = deleteRootfsViewLease(ctx, a.namespace, leaseID, a.leases)
return types.RootfsViewState{}, err
}

return types.RootfsViewState{
Snapshotter: a.snapshotter,
Mounts: mounts,
}, nil
}

// Rootfs view cleanup (call chain):
//
// Delete / Stop: ShouldCleanupRootfsView(bundle) → Cleanup(ctx, snapshotter from bundle)
// Create rollback: Cleanup(ctx, "") — snapshotter comes from container metadata on the accessor
//
// Cleanup → removeRootfsViewSnapshotAndLease (view snapshot + lease in containerd)
// Prepare failure after lease create → deleteRootfsViewLease (lease only)

// Cleanup removes the per-container rootfs view snapshot and its containerd lease.
func (a *RootfsViewAccessor) Cleanup(ctx context.Context, snapshotter string) error {
if a == nil {
return fmt.Errorf("rootfs view accessor is nil")
}
if a.containerID == "" {
return fmt.Errorf("container id is empty")
}

effectiveSnapshotter := snapshotter
if effectiveSnapshotter == "" {
effectiveSnapshotter = a.snapshotter
}
if effectiveSnapshotter == "" {
return fmt.Errorf("snapshotter name required for rootfs view cleanup")
}

return removeRootfsViewSnapshotAndLease(
ctx, a.namespace, a.containerID, effectiveSnapshotter, a.snapshots, a.leases,
)
}

func (a *RootfsViewAccessor) statSnapshot(ctx context.Context, snapshotter, key string) (parent string, committed bool, err error) {
resp, err := a.snapshots.Stat(withNamespace(ctx, a.namespace), &snapshotsapi.StatSnapshotRequest{
Snapshotter: snapshotter,
Key: key,
})
if err = containerdErr(err); err != nil {
return "", false, err
}
info := resp.GetInfo()
if info == nil {
return "", false, fmt.Errorf("stat snapshot %s (%s): empty info", key, snapshotter)
}
return info.GetParent(), info.GetKind() == snapshotsapi.Kind_COMMITTED, nil
}

func (a *RootfsViewAccessor) resolveCommittedSnapshotBase(ctx context.Context, snapshotter, snapshotKey string) (string, error) {
parent, committed, err := a.statSnapshot(ctx, snapshotter, snapshotKey)
if err != nil {
return "", fmt.Errorf("stat snapshot %s (%s): %w", snapshotKey, snapshotter, err)
}
if committed {
return snapshotKey, nil
}
if parent == "" {
return snapshotKey, nil
}

current := parent
for {
parent, committed, err = a.statSnapshot(ctx, snapshotter, current)
if err != nil {
return "", fmt.Errorf("stat snapshot %s (%s parent walk): %w", current, snapshotter, err)
}
if committed {
return current, nil
}
if parent == "" {
return "", fmt.Errorf("%s snapshot %s has no committed parent in chain", snapshotter, snapshotKey)
}
current = parent
}
}

func (a *RootfsViewAccessor) createRootfsView(ctx context.Context, viewKey, parentKey string) ([]mount.Mount, error) {
nsCtx := withNamespace(ctx, a.namespace)
viewResp, err := a.snapshots.View(nsCtx, &snapshotsapi.ViewSnapshotRequest{
Snapshotter: a.snapshotter,
Key: viewKey,
Parent: parentKey,
})
if err = containerdErr(err); err == nil {
return protoMountsToMounts(viewResp.GetMounts()), nil
}
if !errdefs.IsAlreadyExists(err) {
return nil, fmt.Errorf("create rootfs view %s from %s: %w", viewKey, parentKey, err)
}

// Reuse an existing view left by a retry or partial prepare.
mountsResp, err := a.snapshots.Mounts(nsCtx, &snapshotsapi.MountsRequest{
Snapshotter: a.snapshotter,
Key: viewKey,
})
if err = containerdErr(err); err != nil {
return nil, fmt.Errorf("create rootfs view %s from %s: %w", viewKey, parentKey, err)
}
return protoMountsToMounts(mountsResp.GetMounts()), nil
}

func protoMountsToMounts(mm []*cntrtypes.Mount) []mount.Mount {
out := make([]mount.Mount, len(mm))
for i, m := range mm {
out[i] = mount.Mount{
Type: m.Type,
Source: m.Source,
Target: m.Target,
Options: m.Options,
}
}
return out
}

// ShouldCleanupRootfsView reports whether bundle rootfs-view.json exists and returns its snapshotter.
func ShouldCleanupRootfsView(bundle string) (bool, string, error) {
state, err := unikontainers.LoadBundleRootfsView(bundle)
if err != nil {
return false, "", err
}
if state == nil || state.Snapshotter == "" {
return false, "", nil
}
return true, state.Snapshotter, nil
}

// removeRootfsViewSnapshotAndLease deletes the view snapshot and its lease in containerd.
func removeRootfsViewSnapshotAndLease(
ctx context.Context,
namespace, containerID, snapshotter string,
snapshots snapshotsapi.SnapshotsClient,
leases leasesapi.LeasesClient,
) error {
if containerID == "" || snapshotter == "" {
return nil
}
nsCtx := withNamespace(ctx, namespace)
_, err := snapshots.Remove(nsCtx, &snapshotsapi.RemoveSnapshotRequest{
Snapshotter: snapshotter,
Key: rootfsViewKeyPrefix + containerID,
})
if err = containerdErr(err); err != nil && !errdefs.IsNotFound(err) {
return err
}
return deleteRootfsViewLease(ctx, namespace, rootfsViewLeasePrefix+containerID, leases)
}

// deleteRootfsViewLease removes only the containerd lease (Prepare rollback after lease create).
func deleteRootfsViewLease(ctx context.Context, namespace, leaseID string, leases leasesapi.LeasesClient) error {
if leaseID == "" {
return nil
}
_, err := leases.Delete(withNamespace(ctx, namespace), &leasesapi.DeleteRequest{ID: leaseID})
if err = containerdErr(err); err != nil && !errdefs.IsNotFound(err) {
return err
}
return nil
}
Loading