Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .changelog/28097.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
```release-note:bug
scheduler: Fixed a bug where setting `sticky` on a static host volume could fail the evaluation instead of being rejected during feasibility checking
```
25 changes: 18 additions & 7 deletions scheduler/feasible/feasible.go
Original file line number Diff line number Diff line change
Expand Up @@ -355,12 +355,13 @@ func (h *HostVolumeChecker) hasVolumes(n *structs.Node) bool {
}

if req.Sticky {
// the node is feasible if there are no remaining claims to
// the volume is feasible if there are no remaining claims to
// fulfill or if there's an exact match
if len(h.unmetClaims) == 0 {
return true
continue
}

matched := false
for _, c := range h.unmetClaims {
if c.VolumeID == vol.ID {
// if we have a match for a volume claim, delete this
Expand All @@ -372,15 +373,25 @@ func (h *HostVolumeChecker) hasVolumes(n *structs.Node) bool {
func(claim *structs.TaskGroupHostVolumeClaim) bool {
return claim.VolumeID == vol.ID
})
return true
matched = true
break
}
}
if !matched {
return false
}
}
} else {
// this is a static host volume

// sticky is only supported on dynamic host volumes
if req.Sticky {
return false
}
} else if !req.ReadOnly {
// this is a static host volume and can only be mounted ReadOnly,
// validate that no requests for it are ReadWrite.
if volCfg.ReadOnly {

// a static host volume can only be mounted ReadOnly, so validate
// that no requests for it are ReadWrite
if !req.ReadOnly && volCfg.ReadOnly {
return false
}
}
Expand Down
106 changes: 106 additions & 0 deletions scheduler/feasible/feasible_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -240,6 +240,112 @@ func TestHostVolumeChecker_Static(t *testing.T) {
}
}

func TestHostVolumeChecker_StaticSticky(t *testing.T) {
ci.Parallel(t)

_, ctx := MockContext(t)

node := mock.Node()
node.HostVolumes = map[string]*structs.ClientHostVolumeConfig{
"foo": {}, // static host volume: ID is empty
}

// sticky is only supported on dynamic host volumes, so requesting it on a
// static host volume must fail feasibility regardless of ReadOnly.
cases := []struct {
name string
req *structs.VolumeRequest
result bool
}{
{
name: "sticky read-write",
req: &structs.VolumeRequest{Type: "host", Source: "foo", Sticky: true},
result: false,
},
{
name: "sticky read-only",
req: &structs.VolumeRequest{Type: "host", Source: "foo", Sticky: true, ReadOnly: true},
result: false,
},
{
name: "not sticky",
req: &structs.VolumeRequest{Type: "host", Source: "foo"},
result: true,
},
}

checker := NewHostVolumeChecker(ctx)
alloc := mock.Alloc()
alloc.NodeID = node.ID
job := mock.Job()
taskGroup := job.TaskGroups[0]

for _, tc := range cases {
t.Run(tc.name, func(t *testing.T) {
checker.SetVolumes(alloc.Name, structs.DefaultNamespace, job.ID, taskGroup.Name,
map[string]*structs.VolumeRequest{"foo": tc.req})
must.Eq(t, tc.result, checker.Feasible(node))
})
}
}

func TestHostVolumeChecker_StaticStickyMultiVolume(t *testing.T) {
ci.Parallel(t)

store, ctx := MockContext(t)

// the node has one dynamic host volume (ID set) and one static host volume
// (ID empty)
dhvID := uuid.Generate()
node := mock.Node()
node.HostVolumes = map[string]*structs.ClientHostVolumeConfig{
"dyn": {ID: dhvID},
"stat": {},
}
must.NoError(t, store.UpsertNode(structs.MsgTypeTestSetup, 1000, node))

dhv := &structs.HostVolume{
Namespace: structs.DefaultNamespace,
ID: dhvID,
Name: "dyn",
NodeID: node.ID,
RequestedCapabilities: []*structs.HostVolumeCapability{{
AttachmentMode: structs.HostVolumeAttachmentModeFilesystem,
AccessMode: structs.HostVolumeAccessModeSingleNodeSingleWriter,
}},
State: structs.HostVolumeStateReady,
}
must.NoError(t, store.UpsertHostVolume(1000, dhv))

// both volumes are requested sticky. The dynamic one is available with no
// outstanding claims, so on its own it satisfies the per-volume check; the
// static one is invalid for sticky. The node must be infeasible no matter
// which volume is checked first, and volumeReqs is built from a map so the
// order is not deterministic.
reqs := map[string]*structs.VolumeRequest{
"dyn": {
Type: "host",
Source: "dyn",
Sticky: true,
AccessMode: structs.HostVolumeAccessModeSingleNodeSingleWriter,
AttachmentMode: structs.CSIVolumeAttachmentModeFilesystem,
},
"stat": {Type: "host", Source: "stat", Sticky: true},
}

checker := NewHostVolumeChecker(ctx)
alloc := mock.Alloc()
alloc.NodeID = node.ID
job := mock.Job()
taskGroup := job.TaskGroups[0]

for i := 0; i < 100; i++ {
checker.SetVolumes(alloc.Name, structs.DefaultNamespace, job.ID, taskGroup.Name, reqs)
must.False(t, checker.Feasible(node),
must.Sprint("sticky on a static host volume must be infeasible regardless of volume order"))
}
}

func TestHostVolumeChecker_Dynamic(t *testing.T) {
ci.Parallel(t)

Expand Down
60 changes: 60 additions & 0 deletions scheduler/generic_sched_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ import (
"github.com/hashicorp/nomad/helper/uuid"
"github.com/hashicorp/nomad/nomad/mock"
"github.com/hashicorp/nomad/nomad/structs"
"github.com/hashicorp/nomad/scheduler/feasible"
"github.com/hashicorp/nomad/scheduler/reconciler"
sstructs "github.com/hashicorp/nomad/scheduler/structs"
"github.com/hashicorp/nomad/scheduler/tests"
Expand Down Expand Up @@ -476,6 +477,65 @@ func TestServiceSched_JobRegister_StickyHostVolumes(t *testing.T) {
must.SliceLen(t, 10, newPlanned)
}

func TestServiceSched_JobRegister_StickyStaticHostVolume(t *testing.T) {
ci.Parallel(t)

h := tests.NewHarness(t)

// A node with a static host volume (its ID is empty).
node := mock.Node()
node.HostVolumes = map[string]*structs.ClientHostVolumeConfig{"foo": {Name: "foo"}}
must.NoError(t, h.State.UpsertNode(structs.MsgTypeTestSetup, h.NextIndex(), node))

// A job that requests the static volume with sticky, which is only valid for
// dynamic host volumes.
job := mock.Job()
job.TaskGroups[0].Count = 1
job.TaskGroups[0].Volumes = map[string]*structs.VolumeRequest{
"foo": {
Type: "host",
Source: "foo",
Sticky: true,
},
}
must.NoError(t, h.State.UpsertJob(structs.MsgTypeTestSetup, h.NextIndex(), nil, job))

eval := &structs.Evaluation{
Namespace: structs.DefaultNamespace,
ID: uuid.Generate(),
Priority: job.Priority,
TriggeredBy: structs.EvalTriggerJobRegister,
JobID: job.ID,
Status: structs.EvalStatusPending,
}
must.NoError(t, h.State.UpsertEvals(structs.MsgTypeTestSetup, h.NextIndex(), []*structs.Evaluation{eval}))

// Eval must process cleanly. Before the fix the alloc was placed and plan
// apply failed inserting the claim with an empty volume ID.
must.NoError(t, h.Process(NewServiceScheduler, eval))

// Nothing is placed: sticky on a static host volume is infeasible.
placed := 0
for _, plan := range h.Plans {
for _, allocs := range plan.NodeAllocation {
placed += len(allocs)
}
}
must.Zero(t, placed, must.Sprint("sticky on a static host volume must not place"))

// The unplaced allocation spawns a blocked eval, and the node is filtered
// out for the host volume constraint.
must.SliceLen(t, 1, h.CreateEvals)
must.Eq(t, structs.EvalStatusBlocked, h.CreateEvals[0].Status)

must.SliceLen(t, 1, h.Evals)
outEval := h.Evals[0]
must.MapLen(t, 1, outEval.FailedTGAllocs)
metrics := outEval.FailedTGAllocs[job.TaskGroups[0].Name]
must.NotNil(t, metrics)
must.MapContainsKey(t, metrics.ConstraintFiltered, feasible.FilterConstraintHostVolumes)
}

func TestServiceSched_JobRegister_DiskConstraints(t *testing.T) {
ci.Parallel(t)

Expand Down
Loading