From 08e2398800421e501201a98ac5012140d76ab675 Mon Sep 17 00:00:00 2001 From: Tehut Getahun Date: Tue, 3 Mar 2026 10:51:02 -0800 Subject: [PATCH 1/7] Squashed nmd_1234 commit history add will_share to task device block fixes to get share working make sure WillShare != nil add helpers and test to exercise changes to createOffers add test helpers to generate shared devices and allocations add TestDeviceAccounter_AllocateAndReserveSharedDevices add willShare functionality to deviceAccounter.AddReserved fix bug that prevented reservation & add cases to TestDeviceChecker add cases to rank tests consolidate WillShare maps to AllocatedDeviceResource.WillShare refactor Task.WillShare to Task.ShareDevices fix comments tidying make plugin & structs DeviceSharing struct match proto generated struct simplify AddAllocs_Collision test and add nil check before using String() function in AddAllocs fix unkeyed literal Add dai.GetSharedByID helper and some refactor cleanup TEMP: plugin helper tidy DeviceSharing in structs.go to more closely mirror device/device.go bring api.go DeviceSharing struct into line with plugins/device.go replace GpuId with SharedDeviceId update GPU_ID references replace plugin device.DeviceSharing with Shared proto enum and cascade through api & structs fields Pass Shared by value now that its a typed string regression fix: not sure why i made the happy path unreachable --- api/resources.go | 35 +++ client/devicemanager/utils.go | 14 ++ command/agent/job_endpoint.go | 20 +- nomad/mock/node.go | 39 +++ nomad/structs/devices.go | 77 ++++-- nomad/structs/devices_test.go | 166 ++++++++++++- nomad/structs/structs.go | 48 +++- nomad/structs/testing.go | 29 +++ plugins/device/cmd/example/cmd/main.go | 5 +- plugins/device/cmd/example/device.go | 322 +++++++++++++++++++------ plugins/device/device.go | 23 ++ plugins/device/proto/device.pb.go | 220 ++++++++++++----- plugins/device/proto/device.proto | 15 ++ plugins/device/util.go | 15 ++ scheduler/feasible/device.go | 62 ++++- scheduler/feasible/device_test.go | 107 ++++++++ scheduler/feasible/feasible_test.go | 48 +++- scheduler/feasible/rank.go | 1 - scheduler/feasible/rank_test.go | 239 +++++++++++++----- 19 files changed, 1249 insertions(+), 236 deletions(-) diff --git a/api/resources.go b/api/resources.go index 415f0a7613c..e1d2bdd8e2e 100644 --- a/api/resources.go +++ b/api/resources.go @@ -245,6 +245,10 @@ type NodeDevice struct { // Locality stores HW locality information for the node to optionally be // used when making placement decisions. Locality *NodeDeviceLocality + + // Shared mirrors a string enum on device.DetectedDevice that some + // devices use to report status and presence of sharing subsystems + Shared Shared } // Attribute is used to describe the value of an attribute, optionally @@ -289,6 +293,23 @@ func (a Attribute) String() string { } } +type Shared string + +// DeviceSharing mirrors the plugin.DeviceSharing struct found +// on Devices.DetectedDevice. It holds a string that some +// devices use to report the status and presence of sharing +// subsystems +type DeviceSharing struct { + Shared Shared +} + +const ( + DeviceSharingUnset Shared = "" + DeviceSharingIneligible Shared = "ineligible" + DeviceSharingActive Shared = "active" + DeviceSharingInactive Shared = "inactive" +) + // NodeDeviceLocality stores information about the devices hardware locality on // the node. type NodeDeviceLocality struct { @@ -296,6 +317,17 @@ type NodeDeviceLocality struct { PciBusID string } +// ShareDevices indicates whether the task should be placed on a shared device +type ShareDevices struct { + // Enabled + Enabled bool `hcl:"enabled"` + // SharedDeviceID is an optional field for use in environments with + // multiple shared devices, to make the shared device ID available to + // the plugin. If in use alongside the device.id constraint, the two must + // match or the job will not be placed. + SharedDeviceId string `hcl:"shared_device_id,optional"` +} + // RequestedDevice is used to request a device for a task. type RequestedDevice struct { // Name is the request name. The possible values are as follows: @@ -319,6 +351,9 @@ type RequestedDevice struct { // Affinities are a set of affinites to apply when selecting the device // to use. Affinities []*Affinity `hcl:"affinity,block"` + + // ShareDevices reports whether the task should be placed on a shared device + ShareDevices *ShareDevices `hcl:"share_devices,block"` } func (d *RequestedDevice) Canonicalize() { diff --git a/client/devicemanager/utils.go b/client/devicemanager/utils.go index 12e5ab5c1e1..2b9bd129698 100644 --- a/client/devicemanager/utils.go +++ b/client/devicemanager/utils.go @@ -82,6 +82,7 @@ func convertDevice(dev *device.Device) *structs.NodeDevice { Healthy: dev.Healthy, HealthDescription: dev.HealthDesc, Locality: convertHwLocality(dev.HwLocality), + Shared: convertShared(dev.Shared), } } @@ -94,3 +95,16 @@ func convertHwLocality(l *device.DeviceLocality) *structs.NodeDeviceLocality { PciBusID: l.PciBusID, } } + +func convertShared(s device.Shared) structs.Shared { + switch s { + case device.SharingIneligible: + return structs.DeviceSharingIneligible + case device.SharingActive: + return structs.DeviceSharingActive + case device.SharingInactive: + return structs.DeviceSharingInactive + default: + } + return structs.DeviceSharingUnset +} diff --git a/command/agent/job_endpoint.go b/command/agent/job_endpoint.go index 6774c11bbdc..81e07c1ee71 100644 --- a/command/agent/job_endpoint.go +++ b/command/agent/job_endpoint.go @@ -1625,10 +1625,11 @@ func ApiResourcesToStructs(in *api.Resources) *structs.Resources { out.Devices = []*structs.RequestedDevice{} for _, d := range in.Devices { out.Devices = append(out.Devices, &structs.RequestedDevice{ - Name: d.Name, - Count: *d.Count, - Constraints: ApiConstraintsToStructs(d.Constraints), - Affinities: ApiAffinitiesToStructs(d.Affinities), + Name: d.Name, + Count: *d.Count, + Constraints: ApiConstraintsToStructs(d.Constraints), + Affinities: ApiAffinitiesToStructs(d.Affinities), + ShareDevices: ApiSharingToStructs(d.ShareDevices), }) } } @@ -2330,3 +2331,14 @@ func validateEvalPriorityOpt(priority int) HTTPCodedError { } return nil } + +func ApiSharingToStructs(in *api.ShareDevices) *structs.ShareDevices { + if in == nil { + return nil + } + return &structs.ShareDevices{ + Enabled: in.Enabled, + SharedDeviceId: in.SharedDeviceId, + } + +} diff --git a/nomad/mock/node.go b/nomad/mock/node.go index 2b2e88498c5..96cf3e719db 100644 --- a/nomad/mock/node.go +++ b/nomad/mock/node.go @@ -148,3 +148,42 @@ func NvidiaNode() *structs.Node { _ = n.ComputeClass() return n } + +// SharedNvidiaNode returns a node with two sharing enabled instances of an Nvidia GPU +func SharedNvidiaNode() *structs.Node { + n := Node() + n.NodeResources.Processors.Topology = structs.MockWorkstationTopology() + n.NodeResources.Devices = []*structs.NodeDeviceResource{ + { + Type: "gpu", + Vendor: "nvidia", + Name: "1080ti", + Attributes: map[string]*psstructs.Attribute{ + "memory": psstructs.NewIntAttribute(11, psstructs.UnitGiB), + "cuda_cores": psstructs.NewIntAttribute(3584, ""), + "graphics_clock": psstructs.NewIntAttribute(1480, psstructs.UnitMHz), + "memory_bandwidth": psstructs.NewIntAttribute(11, psstructs.UnitGBPerS), + }, + Instances: []*structs.NodeDevice{ + { + ID: uuid.Generate(), + Healthy: true, + Locality: &structs.NodeDeviceLocality{ + PciBusID: "0000:02:00.1", // node 0 + }, + Shared: structs.DeviceSharingActive, + }, + { + ID: uuid.Generate(), + Healthy: true, + Locality: &structs.NodeDeviceLocality{ + PciBusID: "0000:02:01.1", // node 0 + }, + Shared: structs.DeviceSharingActive, + }, + }, + }, + } + _ = n.ComputeClass() + return n +} diff --git a/nomad/structs/devices.go b/nomad/structs/devices.go index a1049208a9e..3096a175642 100644 --- a/nomad/structs/devices.go +++ b/nomad/structs/devices.go @@ -3,7 +3,9 @@ package structs -import "maps" +import ( + "maps" +) // DeviceAccounter is used to account for device usage on a node. It can detect // when a node is oversubscribed and can be used for deciding what devices are @@ -44,6 +46,20 @@ func (dai *DeviceAccounterInstance) Copy() *DeviceAccounterInstance { } } +// GetSharedByID returns the underlying Shared string value of the instance +// of the specific deviceID. +// +// If no instance matching the deviceID is found or if Shared is nil +// an empty string, equivalent to DeviceSharingUnset is returned +func (dai *DeviceAccounterInstance) GetSharedByID(instanceID string) Shared { + for _, instance := range dai.Device.Instances { + if instance.ID == instanceID { + return instance.Shared + } + } + return "" +} + // NewDeviceAccounter returns a new device accounter. The node is used to // populate the set of available devices based on what healthy device instances // exist on the node. @@ -90,7 +106,8 @@ func (d *DeviceAccounter) Copy() *DeviceAccounter { // AddAllocs takes a set of allocations and internally marks which devices are // used. If a device is used more than once by the set of passed allocations, -// the collision will be returned as true. +// the collision will be returned as true unless it has been placed on a +// device that explicitly allows sharing. func (d *DeviceAccounter) AddAllocs(allocs []*Allocation) (collision bool) { for _, a := range allocs { // Filter any terminal allocation @@ -109,20 +126,23 @@ func (d *DeviceAccounter) AddAllocs(allocs []*Allocation) (collision bool) { for _, tr := range a.AllocatedResources.Tasks { // Go through each assigned device group - for _, device := range tr.Devices { - devID := device.ID() + for _, allocatedDeviceGroup := range tr.Devices { + devID := allocatedDeviceGroup.ID() // Go through each assigned device - for _, instanceID := range device.DeviceIDs { + for _, instanceID := range allocatedDeviceGroup.DeviceIDs { // Mark that we are using the device. It may not be in the // map if the device is no longer being fingerprinted, is // unhealthy, etc. - if devInst, ok := d.Devices[*devID]; ok { - if i, ok := devInst.Instances[instanceID]; ok { + if devAccounter, ok := d.Devices[*devID]; ok { + if i, ok := devAccounter.Instances[instanceID]; ok { // Mark that the device is in use - devInst.Instances[instanceID]++ - + devAccounter.Instances[instanceID]++ + shared := devAccounter.GetSharedByID(instanceID) + if shared == DeviceSharingActive { + continue + } if i != 0 { collision = true } @@ -136,30 +156,55 @@ func (d *DeviceAccounter) AddAllocs(allocs []*Allocation) (collision bool) { return } +// willingToShare is called in the loop that marks each reserved instance as used +// in the accounter. It takes a deviceID string and uses it to look up +// return the task requesting the device is willing to share +func willingToShare(res *AllocatedDeviceResource, deviceID string) bool { + // res.WillShare is nil => return false as default and do reservation as usual + if res.WillShare == nil { + return false + } + // does exist, is true = > this is the shared device, it will share => return true + if exists, willing := res.WillShare[deviceID]; exists && willing { + + return true + } + // In all remaining cases we return false + return false +} + // AddReserved marks the device instances in the passed device reservation as -// used and returns if there is a collision. +// used, checks the res.WillingToShare map to see if the createOffer expected the device +// to share. If the device will share we do not report a collision even if it +// has already been used func (d *DeviceAccounter) AddReserved(res *AllocatedDeviceResource) (collision bool) { - // Lookup the device. - devInst, ok := d.Devices[*res.ID()] + // Lookup the deviceAccounter + devAccounter, ok := d.Devices[*res.ID()] if !ok { return false } // For each reserved instance, mark it as used for _, id := range res.DeviceIDs { - cur, ok := devInst.Instances[id] + cur, ok := devAccounter.Instances[id] if !ok { continue } - // It has already been used, so mark that there is a collision + // if offer expects device will share, mark device as used + // and continue without marking collision + if willingToShare(res, id) { + devAccounter.Instances[id]++ + continue + } + + // mark collision if device will not share and has already been used if cur != 0 { collision = true } + devAccounter.Instances[id]++ - devInst.Instances[id]++ } - return } diff --git a/nomad/structs/devices_test.go b/nomad/structs/devices_test.go index 694d98a534a..d6d83176bf6 100644 --- a/nomad/structs/devices_test.go +++ b/nomad/structs/devices_test.go @@ -32,6 +32,13 @@ func nvidiaAlloc() *Allocation { return a } +// sets the supplied DeviceSharing on the node and returns the node and 1st deviceID +func sharedNodeWithDeviceID(node *Node, sharingStatus Shared) (*Node, string) { + node.NodeResources.Devices[0].Instances[0].Shared = sharingStatus + deviceID := node.NodeResources.Devices[0].Instances[0].ID + return node, deviceID +} + // devNode returns a node containing two devices, an nvidia gpu and an intel // FPGA. func devNode() *Node { @@ -150,20 +157,157 @@ func TestDeviceAccounter_AddAllocs_UnknownID(t *testing.T) { func TestDeviceAccounter_AddAllocs_Collision(t *testing.T) { ci.Parallel(t) - require := require.New(t) - n := devNode() - d := NewDeviceAccounter(n) - require.NotNil(d) + for _, tc := range []struct { + name string + shared bool + expCollision bool + }{ + { + name: "standard", + shared: false, + expCollision: true, + }, { + name: "sharedNode", + shared: true, + expCollision: false, + }, + } { + t.Run(tc.name, func(t *testing.T) { + + n := devNode() + if tc.shared { + n.NodeResources.Devices[0].Instances[0].Shared = DeviceSharingActive + n.NodeResources.Devices[0].Instances[1].Shared = DeviceSharingActive + } + d := NewDeviceAccounter(n) + must.NotNil(t, d) + // Create two allocations, both with the same device + a1, a2 := nvidiaAlloc(), nvidiaAlloc() + + nvidiaDev0ID := n.NodeResources.Devices[0].Instances[0].ID + a1.AllocatedResources.Tasks["web"].Devices[0].DeviceIDs = []string{nvidiaDev0ID} + a2.AllocatedResources.Tasks["web"].Devices[0].DeviceIDs = []string{nvidiaDev0ID} + + allocs := []*Allocation{a1, a2} + must.Eq(t, tc.expCollision, d.AddAllocs(allocs)) + + }) + } +} - // Create two allocations, both with the same device - a1, a2 := nvidiaAlloc(), nvidiaAlloc() +// Tests that allocs on any shared devices can be double scheduled +// if device and request both agree to share +func TestDeviceAccounter_AllocateAndReserveSharedDevices(t *testing.T) { + ci.Parallel(t) - nvidiaDev0ID := n.NodeResources.Devices[0].Instances[0].ID - a1.AllocatedResources.Tasks["web"].Devices[0].DeviceIDs = []string{nvidiaDev0ID} - a2.AllocatedResources.Tasks["web"].Devices[0].DeviceIDs = []string{nvidiaDev0ID} + nvidiaNode, nvidiaSharedDeviceId := sharedNodeWithDeviceID(MockNvidiaNode(), DeviceSharingUnset) + sharedNvidiaNode, sharedNvidiaSharedDeviceId := sharedNodeWithDeviceID(MockNvidiaNode(), DeviceSharingActive) + sharedIntelNode, sharedIntelNodeSharedDeviceId := sharedNodeWithDeviceID(MockIntelNode(), DeviceSharingActive) + genNvidiaOrIntelAllocs := func(isNvidia bool, willShare bool, count int, sharedSharedDeviceId string) []*Allocation { + var ( + allocs []*Allocation + allocated *AllocatedDeviceResource + ) + if isNvidia { + allocated = &AllocatedDeviceResource{ + Type: "gpu", + Vendor: "nvidia", + Name: "1080ti", + } + } else { + allocated = &AllocatedDeviceResource{ + Type: "fpga", + Vendor: "intel", + Name: "F100", + } + } + // function to generate a single intel or nvidia allocation + genAlloc := func(ID string, allocated *AllocatedDeviceResource, willShare bool) *Allocation { + var SharedDeviceId string + if len(ID) == 0 { + SharedDeviceId = uuid.Generate() + } else { + SharedDeviceId = ID + } + allocated.DeviceIDs = []string{SharedDeviceId} + allocated.WillShare = map[string]bool{SharedDeviceId: willShare} + + a := MockAlloc() + a.AllocatedResources.Tasks["web"].Devices = []*AllocatedDeviceResource{allocated} + a.ClientStatus = AllocClientStatusPending + return a + } + + // build []*Allocation + for range count { + allocs = append(allocs, genAlloc(sharedSharedDeviceId, allocated, willShare)) + } + + return allocs + + } + for _, tc := range []struct { + name string + node *Node + SharedDeviceId string + allocs []*Allocation + allocWillCollide bool + reserveWillCollide bool + expectedCount int + }{ + { + name: "shared device- alloc passes, willing request- reservation passes", + node: sharedNvidiaNode, + allocs: genNvidiaOrIntelAllocs(true, true, 2, sharedNvidiaSharedDeviceId), + SharedDeviceId: sharedNvidiaSharedDeviceId, + allocWillCollide: false, + reserveWillCollide: false, + expectedCount: 3, + }, + { + name: "intel , reservation passes", + node: sharedIntelNode, + allocs: genNvidiaOrIntelAllocs(false, true, 2, sharedIntelNodeSharedDeviceId), + SharedDeviceId: sharedIntelNodeSharedDeviceId, + allocWillCollide: false, + reserveWillCollide: false, + expectedCount: 3, + }, + { + name: "unshared device- alloc collides, unsharing request- reservation collides", + node: nvidiaNode, + allocs: genNvidiaOrIntelAllocs(true, false, 2, nvidiaSharedDeviceId), + SharedDeviceId: nvidiaSharedDeviceId, + allocWillCollide: true, + reserveWillCollide: true, + expectedCount: 3, + }, + { + name: "shared device- alloc passes, unsharing request - reservation collides", + node: sharedNvidiaNode, + allocs: genNvidiaOrIntelAllocs(true, false, 2, sharedNvidiaSharedDeviceId), + SharedDeviceId: sharedNvidiaSharedDeviceId, + allocWillCollide: false, + reserveWillCollide: true, + expectedCount: 3, + }, + } { + t.Run(tc.name, func(t *testing.T) { + d := NewDeviceAccounter(tc.node) + // create allocations + collision := d.AddAllocs(tc.allocs) + + must.Eq(t, tc.allocWillCollide, collision) + // attempt to reserve one of the previously allocated devices + device := tc.allocs[0].AllocatedResources.Tasks["web"].Devices[0] + + deviceName := DeviceIdTuple{device.Vendor, device.Type, device.Name} + must.Eq(t, tc.reserveWillCollide, d.AddReserved(device)) + //demonstrate the Instance counter was incremented at each attempt + must.Eq(t, tc.expectedCount, d.Devices[deviceName].Instances[tc.SharedDeviceId]) + }) + } - allocs := []*Allocation{a1, a2} - require.True(d.AddAllocs(allocs)) } // Assert that devices are not freed when an alloc's ServerTerminalStatus is diff --git a/nomad/structs/structs.go b/nomad/structs/structs.go index 589548bc49e..e291e5b9968 100644 --- a/nomad/structs/structs.go +++ b/nomad/structs/structs.go @@ -3038,6 +3038,16 @@ func (ns Networks) Modes() *set.Set[string] { }) } +// ShareDevices indicates whether the task should be placed on a shared device +type ShareDevices struct { + Enabled bool + // SharedDeviceID is an optional field for use in environments with + // multiple shared devices, to make the shared device ID available to + // the plugin. If in use alongside the device.id constraint, the two must + // match or the job will not be placed. + SharedDeviceId string +} + // RequestedDevice is used to request a device for a task. type RequestedDevice struct { // Name is the request name. The possible values are as follows: @@ -3061,6 +3071,10 @@ type RequestedDevice struct { // Affinities are a set of affinities to apply when selecting the device // to use. Affinities Affinities + + // ShareDevices indicates whether the job should be placed on a shared device + // and is willing to share + ShareDevices *ShareDevices } func (r *RequestedDevice) String() string { @@ -3575,6 +3589,28 @@ func (n *NodeDeviceResource) Equal(o *NodeDeviceResource) bool { return true } +type Shared string + +func (s Shared) String() string { + switch s { + case DeviceSharingInactive: + return "inactive" + case DeviceSharingIneligible: + return "inelegible" + case DeviceSharingActive: + return "active" + default: + return "unset" + } +} + +const ( + DeviceSharingUnset Shared = "" + DeviceSharingIneligible Shared = "ineligible" + DeviceSharingActive Shared = "active" + DeviceSharingInactive Shared = "inactive" +) + // NodeDevice is an instance of a particular device. type NodeDevice struct { // ID is the ID of the device. @@ -3590,6 +3626,10 @@ type NodeDevice struct { // Locality stores HW locality information for the node to optionally be // used when making placement decisions. Locality *NodeDeviceLocality + + // Shared mirrors a string enum on device.DetectedDevice that some + // devices use to report status and presence of sharing subsystems + Shared Shared } func (n *NodeDevice) Equal(o *NodeDevice) bool { @@ -3609,6 +3649,8 @@ func (n *NodeDevice) Equal(o *NodeDevice) bool { return false } else if !n.Locality.Equal(o.Locality) { return false + } else if n.Shared != o.Shared { + return false } return true @@ -3624,7 +3666,6 @@ func (n *NodeDevice) Copy() *NodeDevice { // Copy the locality nn.Locality = nn.Locality.Copy() - return &nn } @@ -4176,6 +4217,10 @@ type AllocatedDeviceResource struct { // DeviceIDs is the set of allocated devices DeviceIDs []string + + // WillShare is a map of DeviceIDs[bool] that indicates whether the + // requesting task is willing to share the device + WillShare map[string]bool } func (a *AllocatedDeviceResource) ID() *DeviceIdTuple { @@ -4208,6 +4253,7 @@ func (a *AllocatedDeviceResource) Copy() *AllocatedDeviceResource { // Copy the devices na.DeviceIDs = make([]string, len(a.DeviceIDs)) copy(na.DeviceIDs, a.DeviceIDs) + na.WillShare = make(map[string]bool, len(a.DeviceIDs)) return &na } diff --git a/nomad/structs/testing.go b/nomad/structs/testing.go index 1c37ae0ccb9..03cd6b78b97 100644 --- a/nomad/structs/testing.go +++ b/nomad/structs/testing.go @@ -256,6 +256,35 @@ func MockNvidiaNode() *Node { return n } +// MockIntelNode returns a shared node with a single Intel GPU +func MockIntelNode() *Node { + n := MockNode() + n.NodeResources.Devices = []*NodeDeviceResource{ + { + Type: "fpga", + Vendor: "intel", + Name: "F100", + Attributes: map[string]*psstructs.Attribute{ + "memory": psstructs.NewIntAttribute(4, psstructs.UnitGiB), + }, + Instances: []*NodeDevice{ + { + ID: uuid.Generate(), + Healthy: true, + }, + { + ID: uuid.Generate(), + Healthy: false, + }, + }, + }, + } + err := n.ComputeClass() + if err != nil { + panic(fmt.Sprintf("failed to compute node class: %v", err)) + } + return n +} func MockJob() *Job { job := &Job{ Region: "global", diff --git a/plugins/device/cmd/example/cmd/main.go b/plugins/device/cmd/example/cmd/main.go index 15f9c496c5d..593427b74c0 100644 --- a/plugins/device/cmd/example/cmd/main.go +++ b/plugins/device/cmd/example/cmd/main.go @@ -4,6 +4,8 @@ package main import ( + "context" + log "github.com/hashicorp/go-hclog" "github.com/hashicorp/nomad/plugins" @@ -17,5 +19,6 @@ func main() { // factory returns a new instance of our example device plugin func factory(log log.Logger) interface{} { - return example.NewExampleDevice(log) + ctx := context.Background() + return example.NewNvidiaDevice(ctx, log) } diff --git a/plugins/device/cmd/example/device.go b/plugins/device/cmd/example/device.go index 60d1d43ca06..0d9236bec51 100644 --- a/plugins/device/cmd/example/device.go +++ b/plugins/device/cmd/example/device.go @@ -5,6 +5,7 @@ package example import ( "context" + "errors" "fmt" "io/ioutil" "os" @@ -12,41 +13,82 @@ import ( "sync" "time" - log "github.com/hashicorp/go-hclog" + "github.com/hashicorp/go-hclog" + "github.com/hashicorp/nomad/helper/pluginutils/loader" "github.com/hashicorp/nomad/plugins/base" "github.com/hashicorp/nomad/plugins/device" "github.com/hashicorp/nomad/plugins/shared/hclspec" "github.com/hashicorp/nomad/plugins/shared/structs" - "github.com/kr/pretty" + "github.com/hashicorp/nomad/version" "google.golang.org/grpc/codes" "google.golang.org/grpc/status" ) const ( // pluginName is the name of the plugin - pluginName = "example-fs-device" + pluginName = "nvidia-example" // vendor is the vendor providing the devices - vendor = "nomad" + vendor = "nvidia" // deviceType is the type of device being returned - deviceType = "file" + deviceType = device.DeviceTypeGPU - // deviceName is the name of the devices being exposed - deviceName = "mock" + // notAvailable value is returned to nomad server in case some properties were + // undetected by nvml driver + notAvailable = "N/A" + + // Nvidia-container-runtime environment variable names + NvidiaVisibleDevices = "NVIDIA_VISIBLE_DEVICES" + + // MPS runtime environment variables + MpsPipeDirectoryKey = "MPS_PIPE_DIRECTORY" + MpsLogDirectoryKey = "MPS_LOG_DIRECTORY" + CustomMpsUserKey = "MPS_USER" + + DefaultMpsSockFileAddr = "control" + + deviceName1 = "T4" + deviceName2 = "T4" ) var ( + // PluginID is the nvidia plugin metadata registered in the plugin + // catalog. + PluginID = loader.PluginID{ + Name: pluginName, + PluginType: base.PluginTypeDevice, + } + + // PluginConfig is the nvidia factory function registered in the + // plugin catalog. + PluginConfig = &loader.InternalPluginConfig{ + Factory: func(ctx context.Context, l hclog.Logger) interface{} { return NewNvidiaDevice(ctx, l) }, + } + // pluginInfo describes the plugin pluginInfo = &base.PluginInfoResponse{ Type: base.PluginTypeDevice, PluginApiVersions: []string{device.ApiVersion010}, - PluginVersion: "v0.1.0", + PluginVersion: version.Version, Name: pluginName, } // configSpec is the specification of the plugin's configuration configSpec = hclspec.NewObject(map[string]*hclspec.Spec{ + "enabled": hclspec.NewDefault( + hclspec.NewAttr("enabled", "bool", false), + hclspec.NewLiteral("true"), + ), + + "ignored_gpu_ids": hclspec.NewDefault( + hclspec.NewAttr("ignored_gpu_ids", "list(string)", false), + hclspec.NewLiteral("[]"), + ), + "fingerprint_period": hclspec.NewDefault( + hclspec.NewAttr("fingerprint_period", "string", false), + hclspec.NewLiteral("\"1m\""), + ), "dir": hclspec.NewDefault( hclspec.NewAttr("dir", "string", false), hclspec.NewLiteral("\".\""), @@ -59,22 +101,62 @@ var ( hclspec.NewAttr("unhealthy_perm", "string", false), hclspec.NewLiteral("\"-rwxrwxrwx\""), ), + "mps": hclspec.NewBlock("mps", false, + hclspec.NewObject(map[string]*hclspec.Spec{ + "enabled": hclspec.NewAttr("enabled", "bool", true), + "mps_user": hclspec.NewAttr("mps_user", "string", false), + "mps_pipe_directory": hclspec.NewAttr("mps_pipe_directory", "string", false), + "mps_log_directory": hclspec.NewAttr("mps_log_directory", "string", false), + "mps_sock_addr": hclspec.NewAttr("mps_sock_addr", "string", false), + "device_specific_mps_config": hclspec.NewBlockList("device_specific_mps_config", + hclspec.NewObject(map[string]*hclspec.Spec{ + "uuid": hclspec.NewAttr("uuid", "string", true), + "mps_pipe_directory": hclspec.NewAttr("mps_pipe_directory", "string", true), + "mps_log_directory": hclspec.NewAttr("mps_log_directory", "string", true), + }), + ), + }), + ), }) ) // Config contains configuration information for the plugin. type Config struct { - Dir string `codec:"dir"` - ListPeriod string `codec:"list_period"` - UnhealthyPerm string `codec:"unhealthy_perm"` + Enabled bool `codec:"enabled"` + IgnoredGPUIDs []string `codec:"ignored_gpu_ids"` + FingerprintPeriod string `codec:"fingerprint_period"` + MpsConfig *MpsConfig `codec:"mps"` + Dir string `codec:"dir"` + ListPeriod string `codec:"list_period"` + UnhealthyPerm string `codec:"unhealthy_perm"` } -// FsDevice is an example device plugin. The device plugin exposes files as -// devices and periodically polls the directory for new files. If a file has a -// given file permission, it is considered unhealthy. This device plugin is -// purely for use as an example. -type FsDevice struct { - logger log.Logger +// MpsConfig contains configuration for mps sharing +type MpsConfig struct { + MpsUser string `codec:"mps_user"` + MpsSockFile string `codec:"mps_sock_addr"` + MpsPipeDirectory string `codec:"mps_pipe_directory"` + MpsLogDirectory string `codec:"mps_log_directory"` + DeviceConfig []DeviceMpsConfig `codec:"device_specific_mps_config"` + DeviceMpsConfig map[string]DeviceMpsConfig +} + +// DeviceMpsConfig contains configuration GPU level mps sharing +type DeviceMpsConfig struct { + UUID string `codec:"uuid"` + MpsPipeDirectory string `codec:"mps_pipe_directory"` + MpsLogDirectory string `codec:"mps_log_directory"` +} + +type NvidiaDevice struct { + // enabled indicates whether the plugin should be enabled + enabled bool + + //nvmlClient nvml.NvmlClient + + // initErr holds an error retrieved during + // nvmlClient initialization + //initErr error // deviceDir is the directory we expose as devices deviceDir string @@ -86,37 +168,104 @@ type FsDevice struct { // devices listPeriod time.Duration - // devices is the set of detected devices and maps whether they are healthy - devices map[string]bool + // ignoredGPUIDs is a set of UUIDs that would not be exposed to nomad + ignoredGPUIDs map[string]struct{} + + // fingerprintPeriod is how often we should call nvml to get list of devices + //fingerprintPeriod time.Duration + + //MpsConfig holds a pointer to the MPS configuration + MpsConfig *MpsConfig + + // devices is the set of detected eligible devices + devices map[string]device.Shared deviceLock sync.RWMutex + + logger hclog.Logger } -// NewExampleDevice returns a new example device plugin. -func NewExampleDevice(log log.Logger) *FsDevice { - return &FsDevice{ - logger: log.Named(pluginName), - devices: make(map[string]bool), +// NewNvidiaDevice returns a new nvidia device plugin. +func NewNvidiaDevice(_ context.Context, log hclog.Logger) *NvidiaDevice { + //nvmlClient, err := nvml.NewNvmlClient() + logger := log.Named(pluginName) + return &NvidiaDevice{ + logger: logger, + devices: make(map[string]device.Shared), + ignoredGPUIDs: make(map[string]struct{}), } } // PluginInfo returns information describing the plugin. -func (d *FsDevice) PluginInfo() (*base.PluginInfoResponse, error) { +func (d *NvidiaDevice) PluginInfo() (*base.PluginInfoResponse, error) { return pluginInfo, nil } // ConfigSchema returns the plugins configuration schema. -func (d *FsDevice) ConfigSchema() (*hclspec.Spec, error) { +func (d *NvidiaDevice) ConfigSchema() (*hclspec.Spec, error) { return configSpec, nil } +func checkAndSetDefault(c string, d string) string { + if config := c; config != "" { + return c + } + return d +} + // SetConfig is used to set the configuration of the plugin. -func (d *FsDevice) SetConfig(c *base.Config) error { +func (d *NvidiaDevice) SetConfig(cfg *base.Config) error { var config Config - if err := base.MsgPackDecode(c.PluginConfig, &config); err != nil { - return err + if len(cfg.PluginConfig) != 0 { + if err := base.MsgPackDecode(cfg.PluginConfig, &config); err != nil { + return err + } + } + d.enabled = config.Enabled + // set MPS config values + if config.MpsConfig != nil { + d.MpsConfig = &MpsConfig{} + d.logger.Info("not nil", "config", fmt.Sprintf("%+v", config.MpsConfig)) + // ensure only global or device specific config are set + if (config.MpsConfig.MpsPipeDirectory != "" || config.MpsConfig.MpsLogDirectory != "") && + len(config.MpsConfig.DeviceMpsConfig) != 0 { + return errors.New("only global mps variables or device_specific_mps_config block may be set ") + } + // set straightforward value on device + d.MpsConfig.MpsUser = checkAndSetDefault(config.MpsConfig.MpsUser, "unset") + d.MpsConfig.MpsSockFile = checkAndSetDefault(config.MpsConfig.MpsSockFile, DefaultMpsSockFileAddr) + + // if present set device specific mps config, otherwise set top level config + if len(config.MpsConfig.DeviceMpsConfig) != 0 { + + // build map of device UUIDs to config + deviceConfigMap := make(map[string]DeviceMpsConfig, len(config.MpsConfig.DeviceMpsConfig)) + for _, devConfig := range config.MpsConfig.DeviceMpsConfig { + deviceConfigMap[devConfig.UUID] = DeviceMpsConfig{ + UUID: devConfig.UUID, + MpsPipeDirectory: devConfig.MpsPipeDirectory, + MpsLogDirectory: devConfig.MpsLogDirectory, + } + } + // set device specific mpsConfig + d.MpsConfig.DeviceMpsConfig = deviceConfigMap + } else { + // set top level mps directories if no device specific config + // we have defaults so always use config values + d.MpsConfig.MpsPipeDirectory = config.MpsConfig.MpsPipeDirectory + d.MpsConfig.MpsLogDirectory = config.MpsConfig.MpsLogDirectory + if pipe_dir := config.MpsConfig.MpsPipeDirectory; pipe_dir != "" { + d.MpsConfig.MpsPipeDirectory = pipe_dir + } else { + d.MpsConfig.MpsPipeDirectory = checkAndSetDefault(config.MpsConfig.MpsPipeDirectory, "/tmp/nvidia-mps") + + d.MpsConfig.MpsLogDirectory = checkAndSetDefault(config.MpsConfig.MpsLogDirectory, "/var/log/nvidia-mps") + } + } + } + for _, ignoredGPUId := range config.IgnoredGPUIDs { + d.ignoredGPUIDs[ignoredGPUId] = struct{}{} } - // Save the device directory and the unhealthy permissions d.deviceDir = config.Dir d.unhealthyPerm = config.UnhealthyPerm @@ -127,16 +276,14 @@ func (d *FsDevice) SetConfig(c *base.Config) error { } d.listPeriod = period - d.logger.Debug("test debug") - d.logger.Info("config set", "config", log.Fmt("% #v", pretty.Formatter(config))) return nil } // Fingerprint streams detected devices. If device changes are detected or the // devices health changes, messages will be emitted. -func (d *FsDevice) Fingerprint(ctx context.Context) (<-chan *device.FingerprintResponse, error) { - if d.deviceDir == "" { - return nil, status.New(codes.Internal, "device directory not set in config").Err() +func (d *NvidiaDevice) Fingerprint(ctx context.Context) (<-chan *device.FingerprintResponse, error) { + if !d.enabled { + return nil, device.ErrPluginDisabled } outCh := make(chan *device.FingerprintResponse) @@ -145,7 +292,7 @@ func (d *FsDevice) Fingerprint(ctx context.Context) (<-chan *device.FingerprintR } // fingerprint is the long running goroutine that detects hardware -func (d *FsDevice) fingerprint(ctx context.Context, devices chan *device.FingerprintResponse) { +func (d *NvidiaDevice) fingerprint(ctx context.Context, devices chan *device.FingerprintResponse) { defer close(devices) // Create a timer that will fire immediately for the first detection @@ -159,7 +306,7 @@ func (d *FsDevice) fingerprint(ctx context.Context, devices chan *device.Fingerp ticker.Reset(d.listPeriod) } - d.logger.Trace("scanning for changes") + d.logger.Info("scanning for changes") files, err := ioutil.ReadDir(d.deviceDir) if err != nil { @@ -167,97 +314,122 @@ func (d *FsDevice) fingerprint(ctx context.Context, devices chan *device.Fingerp devices <- device.NewFingerprintError(err) return } - - detected := d.diffFiles(files) - if len(detected) == 0 { - continue + deviceGroups := make([]*device.DeviceGroup, 0) + shared, inactive := d.diffFiles(files) + if len(inactive) != 0 { + deviceGroups = append(deviceGroups, d.getDeviceGroup(inactive, deviceName2)) } - devices <- device.NewFingerprint(getDeviceGroup(detected)) + if len(shared) != 0 { + deviceGroups = append(deviceGroups, d.getDeviceGroup(shared, deviceName1)) + } + d.logger.Info("files to fingerprint", "inactive files", len(inactive), "active files", len(shared)) + devices <- device.NewFingerprint(deviceGroups...) } } - -func (d *FsDevice) diffFiles(files []os.FileInfo) []*device.Device { +func (d *NvidiaDevice) diffFiles(files []os.FileInfo) ([]*device.Device, []*device.Device) { d.deviceLock.Lock() defer d.deviceLock.Unlock() // Build an unhealthy message unhealthyDesc := fmt.Sprintf("Device has bad permissions %q", d.unhealthyPerm) - var changes bool + //var changes bool fnames := make(map[string]struct{}) for _, f := range files { name := f.Name() fnames[name] = struct{}{} if f.IsDir() { - d.logger.Trace("skipping directory", "directory", name) + d.logger.Info("skipping directory", "directory", name) continue } // Determine the health perms := f.Mode().Perm().String() - healthy := perms != d.unhealthyPerm - d.logger.Trace("checking health", "file perm", perms, "unhealthy perms", d.unhealthyPerm, "healthy", healthy) + //turn health into sharing status + healthBool := perms != d.unhealthyPerm + var healthy string + if healthBool { + healthy = device.SharingActive.String() + } else { + healthy = device.SharingInactive.String() + } + d.logger.Info("checking health", "file perm", perms, "unhealthy perms", d.unhealthyPerm, "healthy", healthy) - // See if we alreay have the device + // See if we already have the device oldHealth, ok := d.devices[name] - if ok && oldHealth == healthy { + if ok && oldHealth.String() == healthy { continue } // Health has changed or we have a new object - changes = true - d.devices[name] = healthy + //changes = true + + d.devices[name] = device.Shared(healthy) } for id := range d.devices { if _, ok := fnames[id]; !ok { delete(d.devices, id) - changes = true + //changes = true } } - // Nothing to do - if !changes { - return nil - } - // Build the devices - detected := make([]*device.Device, 0, len(d.devices)) + shared := make([]*device.Device, 0, len(d.devices)) + inactive := make([]*device.Device, 0, len(d.devices)) + for name, healthy := range d.devices { var desc string - if !healthy { + if healthy != device.SharingActive { desc = unhealthyDesc + inactive = append(inactive, &device.Device{ + ID: name, + Shared: healthy, + HealthDesc: desc, + Healthy: true, + }) + continue } - - detected = append(detected, &device.Device{ + shared = append(shared, &device.Device{ ID: name, - Healthy: healthy, - HealthDesc: desc, + Shared: healthy, + HealthDesc: "healthy", + Healthy: true, }) } - return detected + return shared, inactive } // getDeviceGroup is a helper to build the DeviceGroup given a set of devices. -func getDeviceGroup(devices []*device.Device) *device.DeviceGroup { +func (d *NvidiaDevice) getDeviceGroup(devices []*device.Device, name string) *device.DeviceGroup { + //d.logger.Error("getDeviceGroup", "device count", len(devices)) + var shared string + for _, v := range devices { + if shared == "" { + shared = v.Shared.String() + } + //d.logger.Error("getDeviceGroup", "loop", n, "deviceID", v.ID, "shared", v.Shared.String()) + } return &device.DeviceGroup{ Vendor: vendor, Type: deviceType, - Name: deviceName, + Name: name, Devices: devices, Attributes: map[string]*structs.Attribute{ + "cool-attribute": { String: new("attribute-wearing-sunglasses"), }, }, } + } // Reserve returns information on how to mount the given devices. -func (d *FsDevice) Reserve(deviceIDs []string) (*device.ContainerReservation, error) { +func (d *NvidiaDevice) Reserve(deviceIDs []string) (*device.ContainerReservation, error) { if len(deviceIDs) == 0 { return nil, status.New(codes.InvalidArgument, "no device ids given").Err() } @@ -268,12 +440,16 @@ func (d *FsDevice) Reserve(deviceIDs []string) (*device.ContainerReservation, er } resp := &device.ContainerReservation{} - + containerEnvs := make(map[string]string) for _, id := range deviceIDs { // Check if the device is known if _, ok := d.devices[id]; !ok { return nil, status.Newf(codes.InvalidArgument, "unknown device %q", id).Err() } + if d.devices[id] == device.SharingActive { + containerEnvs[MpsPipeDirectoryKey] = d.MpsConfig.MpsPipeDirectory + containerEnvs[MpsLogDirectoryKey] = d.MpsConfig.MpsLogDirectory + } // Add a mount resp.Mounts = append(resp.Mounts, &device.Mount{ @@ -287,14 +463,14 @@ func (d *FsDevice) Reserve(deviceIDs []string) (*device.ContainerReservation, er } // Stats streams statistics for the detected devices. -func (d *FsDevice) Stats(ctx context.Context, interval time.Duration) (<-chan *device.StatsResponse, error) { +func (d *NvidiaDevice) Stats(ctx context.Context, interval time.Duration) (<-chan *device.StatsResponse, error) { outCh := make(chan *device.StatsResponse) go d.stats(ctx, outCh, interval) return outCh, nil } // stats is the long running goroutine that streams device statistics -func (d *FsDevice) stats(ctx context.Context, stats chan *device.StatsResponse, interval time.Duration) { +func (d *NvidiaDevice) stats(ctx context.Context, stats chan *device.StatsResponse, interval time.Duration) { defer close(stats) // Create a timer that will fire immediately for the first detection @@ -325,7 +501,7 @@ func (d *FsDevice) stats(ctx context.Context, stats chan *device.StatsResponse, } } -func (d *FsDevice) collectStats() (*device.DeviceGroupStats, error) { +func (d *NvidiaDevice) collectStats() (*device.DeviceGroupStats, error) { d.deviceLock.RLock() defer d.deviceLock.RUnlock() l := len(d.devices) @@ -337,7 +513,7 @@ func (d *FsDevice) collectStats() (*device.DeviceGroupStats, error) { group := &device.DeviceGroupStats{ Vendor: vendor, Type: deviceType, - Name: deviceName, + Name: deviceName1, InstanceStats: make(map[string]*device.DeviceStats, l), } diff --git a/plugins/device/device.go b/plugins/device/device.go index cf1df4bfd4a..ea90499aa1d 100644 --- a/plugins/device/device.go +++ b/plugins/device/device.go @@ -16,8 +16,28 @@ import ( const ( // DeviceTypeGPU is a canonical device type for a GPU. DeviceTypeGPU = "gpu" + + SharingUnset Shared = "" + SharingIneligible Shared = "ineligible" + SharingActive Shared = "active" + SharingInactive Shared = "inactive" ) +type Shared string + +func (s Shared) String() string { + switch s { + case SharingInactive: + return "inactive" + case SharingIneligible: + return "inelegible" + case SharingActive: + return "active" + default: + return "unset" + } +} + var ( // ErrPluginDisabled indicates that the device plugin is disabled ErrPluginDisabled = fmt.Errorf("device is not enabled") @@ -133,6 +153,9 @@ type Device struct { // HwLocality captures hardware locality information for the device. HwLocality *DeviceLocality + + // Shared marks whether Device Sharing is enabled + Shared Shared } // Validate validates that the device is valid diff --git a/plugins/device/proto/device.pb.go b/plugins/device/proto/device.pb.go index 815268efc44..a255ad71e39 100644 --- a/plugins/device/proto/device.pb.go +++ b/plugins/device/proto/device.pb.go @@ -27,6 +27,37 @@ var _ = math.Inf // proto package needs to be updated. const _ = proto.ProtoPackageIsVersion3 // please upgrade the proto package +type Shared int32 + +const ( + Shared_SHARED_UNSET Shared = 0 + Shared_SHARED_ACTIVE Shared = 1 + Shared_SHARED_INACTIVE Shared = 2 + Shared_SHARED_INELIGIBLE Shared = 3 +) + +var Shared_name = map[int32]string{ + 0: "SHARED_UNSET", + 1: "SHARED_ACTIVE", + 2: "SHARED_INACTIVE", + 3: "SHARED_INELIGIBLE", +} + +var Shared_value = map[string]int32{ + "SHARED_UNSET": 0, + "SHARED_ACTIVE": 1, + "SHARED_INACTIVE": 2, + "SHARED_INELIGIBLE": 3, +} + +func (x Shared) String() string { + return proto.EnumName(Shared_name, int32(x)) +} + +func (Shared) EnumDescriptor() ([]byte, []int) { + return fileDescriptor_5edb0c35c07fa415, []int{0} +} + // FingerprintRequest is used to request for devices to be fingerprinted. type FingerprintRequest struct { XXX_NoUnkeyedLiteral struct{} `json:"-"` @@ -193,10 +224,12 @@ type DetectedDevice struct { HealthDescription string `protobuf:"bytes,3,opt,name=health_description,json=healthDescription,proto3" json:"health_description,omitempty"` // hw_locality is optionally set to expose hardware locality information for // more optimal placement decisions. - HwLocality *DeviceLocality `protobuf:"bytes,4,opt,name=hw_locality,json=hwLocality,proto3" json:"hw_locality,omitempty"` - XXX_NoUnkeyedLiteral struct{} `json:"-"` - XXX_unrecognized []byte `json:"-"` - XXX_sizecache int32 `json:"-"` + HwLocality *DeviceLocality `protobuf:"bytes,4,opt,name=hw_locality,json=hwLocality,proto3" json:"hw_locality,omitempty"` + // shared reports on the presence and state of a device sharing daemon + Shared Shared `protobuf:"varint,5,opt,name=shared,proto3,enum=hashicorp.nomad.plugins.device.Shared" json:"shared,omitempty"` + XXX_NoUnkeyedLiteral struct{} `json:"-"` + XXX_unrecognized []byte `json:"-"` + XXX_sizecache int32 `json:"-"` } func (m *DetectedDevice) Reset() { *m = DetectedDevice{} } @@ -252,6 +285,13 @@ func (m *DetectedDevice) GetHwLocality() *DeviceLocality { return nil } +func (m *DetectedDevice) GetShared() Shared { + if m != nil { + return m.Shared + } + return Shared_SHARED_UNSET +} + // DeviceLocality is used to expose HW locality information about a device. type DeviceLocality struct { // pci_bus_id is the PCI bus ID for the device. If reported, it @@ -770,7 +810,48 @@ func (m *DeviceStats) GetTimestamp() *timestamp.Timestamp { return nil } +// DeviceSharing is a representation of the DeviceSharing string enum +type DeviceSharing struct { + Shared Shared `protobuf:"varint,1,opt,name=shared,proto3,enum=hashicorp.nomad.plugins.device.Shared" json:"shared,omitempty"` + XXX_NoUnkeyedLiteral struct{} `json:"-"` + XXX_unrecognized []byte `json:"-"` + XXX_sizecache int32 `json:"-"` +} + +func (m *DeviceSharing) Reset() { *m = DeviceSharing{} } +func (m *DeviceSharing) String() string { return proto.CompactTextString(m) } +func (*DeviceSharing) ProtoMessage() {} +func (*DeviceSharing) Descriptor() ([]byte, []int) { + return fileDescriptor_5edb0c35c07fa415, []int{14} +} + +func (m *DeviceSharing) XXX_Unmarshal(b []byte) error { + return xxx_messageInfo_DeviceSharing.Unmarshal(m, b) +} +func (m *DeviceSharing) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) { + return xxx_messageInfo_DeviceSharing.Marshal(b, m, deterministic) +} +func (m *DeviceSharing) XXX_Merge(src proto.Message) { + xxx_messageInfo_DeviceSharing.Merge(m, src) +} +func (m *DeviceSharing) XXX_Size() int { + return xxx_messageInfo_DeviceSharing.Size(m) +} +func (m *DeviceSharing) XXX_DiscardUnknown() { + xxx_messageInfo_DeviceSharing.DiscardUnknown(m) +} + +var xxx_messageInfo_DeviceSharing proto.InternalMessageInfo + +func (m *DeviceSharing) GetShared() Shared { + if m != nil { + return m.Shared + } + return Shared_SHARED_UNSET +} + func init() { + proto.RegisterEnum("hashicorp.nomad.plugins.device.Shared", Shared_name, Shared_value) proto.RegisterType((*FingerprintRequest)(nil), "hashicorp.nomad.plugins.device.FingerprintRequest") proto.RegisterType((*FingerprintResponse)(nil), "hashicorp.nomad.plugins.device.FingerprintResponse") proto.RegisterType((*DeviceGroup)(nil), "hashicorp.nomad.plugins.device.DeviceGroup") @@ -788,6 +869,7 @@ func init() { proto.RegisterType((*DeviceGroupStats)(nil), "hashicorp.nomad.plugins.device.DeviceGroupStats") proto.RegisterMapType((map[string]*DeviceStats)(nil), "hashicorp.nomad.plugins.device.DeviceGroupStats.InstanceStatsEntry") proto.RegisterType((*DeviceStats)(nil), "hashicorp.nomad.plugins.device.DeviceStats") + proto.RegisterType((*DeviceSharing)(nil), "hashicorp.nomad.plugins.device.DeviceSharing") } func init() { @@ -795,68 +877,74 @@ func init() { } var fileDescriptor_5edb0c35c07fa415 = []byte{ - // 965 bytes of a gzipped FileDescriptorProto - 0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0xff, 0xa4, 0x56, 0xef, 0x8e, 0xdb, 0x44, - 0x10, 0x27, 0xc9, 0xe5, 0x92, 0x4c, 0xee, 0xae, 0x65, 0x7b, 0x42, 0xc6, 0x40, 0x7b, 0x58, 0x42, - 0x3a, 0x41, 0xeb, 0x94, 0x14, 0x89, 0x0a, 0x04, 0x52, 0xdb, 0x94, 0x5e, 0xf8, 0xd3, 0xab, 0xb6, - 0x15, 0x52, 0x8b, 0x84, 0xb5, 0x67, 0x2f, 0xf1, 0xb6, 0xf6, 0xda, 0xec, 0xae, 0x53, 0x99, 0x4f, - 0x3c, 0x0e, 0x5f, 0x78, 0x01, 0x1e, 0x86, 0x0f, 0x3c, 0x09, 0xf2, 0xee, 0x3a, 0xf1, 0xfd, 0xe9, - 0x25, 0x81, 0x4f, 0xde, 0x9d, 0x99, 0xdf, 0xcc, 0xec, 0xcc, 0x6f, 0x67, 0x0d, 0x1f, 0xe6, 0x49, - 0x31, 0x63, 0x5c, 0x8e, 0x22, 0x3a, 0x67, 0x21, 0x1d, 0xe5, 0x22, 0x53, 0x99, 0xdd, 0xf8, 0x7a, - 0x83, 0xae, 0xc7, 0x44, 0xc6, 0x2c, 0xcc, 0x44, 0xee, 0xf3, 0x2c, 0x25, 0x91, 0x6f, 0x21, 0xbe, - 0xb1, 0x72, 0x6f, 0xcc, 0xb2, 0x6c, 0x96, 0x58, 0xe8, 0x49, 0xf1, 0xcb, 0x48, 0xb1, 0x94, 0x4a, - 0x45, 0xd2, 0xdc, 0x38, 0x70, 0xaf, 0x9f, 0x35, 0x88, 0x0a, 0x41, 0x14, 0xcb, 0xb8, 0xd5, 0xdf, - 0xac, 0x73, 0x90, 0x31, 0x11, 0x34, 0x1a, 0x49, 0x25, 0x8a, 0x50, 0x49, 0x9b, 0x0b, 0x51, 0x4a, - 0xb0, 0x93, 0x42, 0xd9, 0x74, 0xdc, 0xc3, 0x4b, 0xad, 0xa5, 0x22, 0x4a, 0x1a, 0x4b, 0x6f, 0x1f, - 0xd0, 0x37, 0x8c, 0xcf, 0xa8, 0xc8, 0x05, 0xe3, 0x0a, 0xd3, 0x5f, 0x0b, 0x2a, 0x95, 0x47, 0xe1, - 0xda, 0x29, 0xa9, 0xcc, 0x33, 0x2e, 0x29, 0x7a, 0x0c, 0x3b, 0xe6, 0x3c, 0xc1, 0x4c, 0x64, 0x45, - 0xee, 0xb4, 0x0e, 0x3a, 0x87, 0xc3, 0xf1, 0x27, 0xfe, 0xe5, 0x87, 0xf7, 0x27, 0xfa, 0xf3, 0xa8, - 0x82, 0xe0, 0x61, 0xb4, 0xdc, 0x78, 0xbf, 0x77, 0x60, 0xd8, 0x50, 0xa2, 0x77, 0x60, 0x7b, 0x4e, - 0x79, 0x94, 0x09, 0xa7, 0x75, 0xd0, 0x3a, 0x1c, 0x60, 0xbb, 0x43, 0x37, 0xc0, 0xc2, 0x02, 0x55, - 0xe6, 0xd4, 0x69, 0x6b, 0x25, 0x18, 0xd1, 0xb3, 0x32, 0xa7, 0x0d, 0x03, 0x4e, 0x52, 0xea, 0x74, - 0x9a, 0x06, 0x8f, 0x49, 0x4a, 0xd1, 0x11, 0xf4, 0xcc, 0x4e, 0x3a, 0x5b, 0x3a, 0x69, 0x7f, 0x75, - 0xd2, 0x8a, 0x86, 0x8a, 0x46, 0x26, 0x3f, 0x5c, 0xc3, 0xd1, 0x4f, 0x00, 0x8b, 0x6a, 0x4b, 0xa7, - 0xab, 0x9d, 0x7d, 0xb9, 0x41, 0x05, 0xfc, 0x7b, 0x0b, 0xf4, 0x43, 0xae, 0x44, 0x89, 0x1b, 0xee, - 0xdc, 0x1c, 0xae, 0x9c, 0x51, 0xa3, 0xab, 0xd0, 0x79, 0x45, 0x4b, 0x5b, 0x90, 0x6a, 0x89, 0x1e, - 0x41, 0x77, 0x4e, 0x92, 0xc2, 0xd4, 0x61, 0x38, 0xfe, 0xf4, 0x8d, 0xc1, 0x4d, 0xf3, 0x7d, 0xdb, - 0xfc, 0x65, 0x60, 0x6c, 0xf0, 0x5f, 0xb4, 0xef, 0xb6, 0xbc, 0xbf, 0x5a, 0xb0, 0x77, 0xfa, 0xa8, - 0x68, 0x0f, 0xda, 0xd3, 0x89, 0x0d, 0xd8, 0x9e, 0x4e, 0x90, 0x03, 0xbd, 0x98, 0x92, 0x44, 0xc5, - 0xa5, 0x8e, 0xd8, 0xc7, 0xf5, 0x16, 0xdd, 0x02, 0x64, 0x96, 0x41, 0x44, 0x65, 0x28, 0x58, 0x5e, - 0x11, 0xd6, 0x56, 0xff, 0x6d, 0xa3, 0x99, 0x2c, 0x15, 0xe8, 0x18, 0x86, 0xf1, 0xeb, 0x20, 0xc9, - 0x42, 0x92, 0x30, 0x55, 0x3a, 0x5b, 0x3a, 0x7d, 0x7f, 0xbd, 0xda, 0x7d, 0x6f, 0x51, 0x18, 0xe2, - 0xd7, 0xf5, 0xda, 0xf3, 0xab, 0xdc, 0x9b, 0x5a, 0xf4, 0x3e, 0x40, 0x1e, 0xb2, 0xe0, 0xa4, 0x90, - 0x01, 0x8b, 0xec, 0x19, 0xfa, 0x79, 0xc8, 0xee, 0x17, 0x72, 0x1a, 0x79, 0x23, 0xd8, 0xc3, 0x54, - 0x52, 0x31, 0xa7, 0x96, 0xe8, 0xe8, 0x03, 0xb0, 0x2c, 0x09, 0x58, 0x24, 0x35, 0x9f, 0x07, 0x78, - 0x60, 0x24, 0xd3, 0x48, 0x7a, 0x09, 0x5c, 0x59, 0x00, 0xec, 0x1d, 0x78, 0x0e, 0xbb, 0x61, 0xc6, - 0x15, 0x61, 0x9c, 0x8a, 0x40, 0x50, 0xa9, 0x83, 0x0c, 0xc7, 0x9f, 0xad, 0x3a, 0xc6, 0x83, 0x1a, - 0x64, 0x1c, 0xea, 0xbb, 0x8d, 0x77, 0xc2, 0x86, 0xd4, 0xfb, 0xa3, 0x0d, 0xfb, 0x17, 0x99, 0x21, - 0x0c, 0x5b, 0x94, 0xcf, 0xa5, 0xbd, 0x6f, 0x5f, 0xff, 0x97, 0x50, 0xfe, 0x43, 0x3e, 0xb7, 0x84, - 0xd3, 0xbe, 0xd0, 0x57, 0xb0, 0x9d, 0x66, 0x05, 0x57, 0xd2, 0x69, 0x6b, 0xaf, 0x1f, 0xad, 0xf2, - 0xfa, 0x43, 0x65, 0x8d, 0x2d, 0x08, 0x4d, 0x96, 0x17, 0xaa, 0xa3, 0xf1, 0x1f, 0xaf, 0xd7, 0xc7, - 0xa7, 0x39, 0x0d, 0x17, 0x97, 0xc9, 0xfd, 0x1c, 0x06, 0x8b, 0xbc, 0x2e, 0x60, 0xfa, 0x7e, 0x93, - 0xe9, 0x83, 0x26, 0x6d, 0x7f, 0x86, 0xae, 0xce, 0x07, 0xbd, 0x07, 0x03, 0x45, 0xe4, 0xab, 0x20, - 0x27, 0x2a, 0xae, 0xfb, 0x5d, 0x09, 0x9e, 0x10, 0x15, 0x57, 0xca, 0x38, 0x93, 0xca, 0x28, 0x8d, - 0x8f, 0x7e, 0x25, 0xa8, 0x95, 0x82, 0x92, 0x28, 0xc8, 0x78, 0x52, 0x6a, 0xce, 0xf6, 0x71, 0xbf, - 0x12, 0x1c, 0xf3, 0xa4, 0xf4, 0x62, 0x80, 0x65, 0xbe, 0xff, 0x23, 0xc8, 0x01, 0x0c, 0x73, 0x2a, - 0x52, 0x26, 0x25, 0xcb, 0xb8, 0xb4, 0x57, 0xa3, 0x29, 0xf2, 0x5e, 0xc0, 0xce, 0xd3, 0x6a, 0x1e, - 0xd7, 0x8c, 0xfc, 0x16, 0xae, 0x85, 0x59, 0x92, 0xd0, 0xb0, 0xea, 0x5a, 0xc0, 0xb8, 0xaa, 0x3a, - 0x98, 0x58, 0x96, 0xbd, 0xeb, 0x9b, 0x67, 0xc2, 0xaf, 0x9f, 0x09, 0x7f, 0x62, 0x9f, 0x09, 0x8c, - 0x96, 0xa8, 0xa9, 0x05, 0x79, 0xcf, 0x61, 0xd7, 0xfa, 0xb6, 0xe4, 0x3d, 0x82, 0x6d, 0x3d, 0xb9, - 0x6b, 0x2a, 0xdd, 0xde, 0x60, 0x70, 0x19, 0x4f, 0x16, 0xef, 0xfd, 0xd9, 0x86, 0xab, 0x67, 0x95, - 0x6f, 0x9c, 0xdf, 0x08, 0xb6, 0x1a, 0x83, 0x5b, 0xaf, 0x2b, 0x59, 0x63, 0x56, 0xeb, 0x35, 0x7a, - 0x09, 0x7b, 0x8c, 0x4b, 0x45, 0x78, 0x48, 0x03, 0xfd, 0x48, 0xd9, 0x61, 0xfd, 0x60, 0xd3, 0x34, - 0xfd, 0xa9, 0x75, 0xa3, 0x77, 0x86, 0xf6, 0xbb, 0xac, 0x29, 0x73, 0x53, 0x40, 0xe7, 0x8d, 0x2e, - 0xe0, 0xe0, 0xbd, 0xd3, 0xd3, 0x76, 0xcd, 0xc7, 0xce, 0x14, 0xab, 0x41, 0xd8, 0xbf, 0x5b, 0xf5, - 0x53, 0x67, 0x4a, 0xf5, 0x1d, 0xf4, 0x64, 0x91, 0xa6, 0x44, 0x94, 0xb6, 0xb5, 0x6b, 0x8f, 0xf1, - 0x0a, 0xff, 0x63, 0xe5, 0x17, 0xd7, 0x1e, 0xd0, 0x11, 0x74, 0x4d, 0xb9, 0x4c, 0x8e, 0xe3, 0x4d, - 0x5c, 0x1d, 0x9f, 0xbc, 0xa4, 0xa1, 0xc2, 0xc6, 0x01, 0xba, 0x0b, 0x83, 0xc5, 0x9f, 0x89, 0x6e, - 0xcd, 0x70, 0xec, 0x9e, 0xe3, 0xdc, 0xb3, 0xda, 0x02, 0x2f, 0x8d, 0xc7, 0xff, 0xb4, 0x61, 0xc7, - 0x1c, 0xf0, 0x89, 0x0e, 0x86, 0x7e, 0x83, 0x61, 0xe3, 0x1f, 0x02, 0x8d, 0x57, 0x15, 0xee, 0xfc, - 0x6f, 0x88, 0x7b, 0x67, 0x23, 0x8c, 0xe1, 0xb8, 0xf7, 0xd6, 0xed, 0x16, 0x4a, 0xa0, 0x67, 0xe7, - 0x36, 0x5a, 0xf9, 0xbe, 0x9c, 0x7e, 0x11, 0xdc, 0xd1, 0xda, 0xf6, 0x75, 0x3c, 0x14, 0x43, 0xd7, - 0x34, 0xf5, 0xe6, 0x2a, 0x6c, 0xf3, 0xa6, 0xbb, 0xb7, 0xd6, 0xb4, 0x5e, 0x9e, 0xeb, 0x7e, 0xef, - 0x45, 0xd7, 0x74, 0x61, 0x5b, 0x7f, 0xee, 0xfc, 0x1b, 0x00, 0x00, 0xff, 0xff, 0x11, 0xd4, 0x56, - 0x04, 0x9b, 0x0a, 0x00, 0x00, + // 1061 bytes of a gzipped FileDescriptorProto + 0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0xff, 0xa4, 0x56, 0xdd, 0x6e, 0x1b, 0x45, + 0x14, 0xae, 0xed, 0xd8, 0x89, 0x8f, 0xf3, 0xe3, 0x4c, 0x02, 0x32, 0x06, 0xda, 0xb0, 0x12, 0x28, + 0x2a, 0xed, 0xba, 0xb8, 0x48, 0x54, 0x20, 0x2a, 0x25, 0xb1, 0x49, 0x0c, 0x21, 0xa9, 0x26, 0xa1, + 0x52, 0x8a, 0xc4, 0x6a, 0xb2, 0x3b, 0x78, 0xa7, 0xdd, 0x9d, 0x5d, 0x76, 0x66, 0x5d, 0x99, 0x2b, + 0x1e, 0x87, 0x1b, 0x5e, 0x89, 0x0b, 0x1e, 0x83, 0x2b, 0xb4, 0x33, 0xb3, 0xf6, 0xe6, 0xa7, 0xb5, + 0x5d, 0xae, 0x76, 0xe6, 0x9c, 0xf3, 0x7d, 0x73, 0xe6, 0xcc, 0xf9, 0x59, 0xf8, 0x24, 0x0e, 0xd2, + 0x21, 0xe3, 0xa2, 0xe3, 0xd1, 0x11, 0x73, 0x69, 0x27, 0x4e, 0x22, 0x19, 0x99, 0x8d, 0xad, 0x36, + 0xe8, 0xae, 0x4f, 0x84, 0xcf, 0xdc, 0x28, 0x89, 0x6d, 0x1e, 0x85, 0xc4, 0xb3, 0x0d, 0xc4, 0xd6, + 0x56, 0xed, 0x7b, 0xc3, 0x28, 0x1a, 0x06, 0x06, 0x7a, 0x99, 0xfe, 0xda, 0x91, 0x2c, 0xa4, 0x42, + 0x92, 0x30, 0xd6, 0x04, 0xed, 0xbb, 0xd7, 0x0d, 0xbc, 0x34, 0x21, 0x92, 0x45, 0xdc, 0xe8, 0x1f, + 0xe4, 0x3e, 0x08, 0x9f, 0x24, 0xd4, 0xeb, 0x08, 0x99, 0xa4, 0xae, 0x14, 0xc6, 0x17, 0x22, 0x65, + 0xc2, 0x2e, 0x53, 0x69, 0xdc, 0x69, 0xef, 0xbe, 0xd5, 0x5a, 0x48, 0x22, 0x85, 0xb6, 0xb4, 0xb6, + 0x01, 0x7d, 0xc7, 0xf8, 0x90, 0x26, 0x71, 0xc2, 0xb8, 0xc4, 0xf4, 0xb7, 0x94, 0x0a, 0x69, 0x51, + 0xd8, 0xba, 0x22, 0x15, 0x71, 0xc4, 0x05, 0x45, 0x27, 0xb0, 0xaa, 0xef, 0xe3, 0x0c, 0x93, 0x28, + 0x8d, 0x5b, 0xa5, 0x9d, 0xca, 0x6e, 0xa3, 0xfb, 0xb9, 0xfd, 0xf6, 0xcb, 0xdb, 0x3d, 0xf5, 0x39, + 0xcc, 0x20, 0xb8, 0xe1, 0x4d, 0x37, 0xd6, 0x1f, 0x15, 0x68, 0x14, 0x94, 0xe8, 0x7d, 0xa8, 0x8d, + 0x28, 0xf7, 0xa2, 0xa4, 0x55, 0xda, 0x29, 0xed, 0xd6, 0xb1, 0xd9, 0xa1, 0x7b, 0x60, 0x60, 0x8e, + 0x1c, 0xc7, 0xb4, 0x55, 0x56, 0x4a, 0xd0, 0xa2, 0xf3, 0x71, 0x4c, 0x0b, 0x06, 0x9c, 0x84, 0xb4, + 0x55, 0x29, 0x1a, 0x9c, 0x90, 0x90, 0xa2, 0x23, 0x58, 0xd6, 0x3b, 0xd1, 0x5a, 0x52, 0x4e, 0xdb, + 0xb3, 0x9d, 0x96, 0xd4, 0x95, 0xd4, 0xd3, 0xfe, 0xe1, 0x1c, 0x8e, 0x7e, 0x06, 0x98, 0x44, 0x5b, + 0xb4, 0xaa, 0x8a, 0xec, 0x9b, 0x05, 0x22, 0x60, 0xef, 0x4d, 0xd0, 0x7d, 0x2e, 0x93, 0x31, 0x2e, + 0xd0, 0xb5, 0x63, 0xd8, 0xb8, 0xa6, 0x46, 0x4d, 0xa8, 0xbc, 0xa2, 0x63, 0x13, 0x90, 0x6c, 0x89, + 0x0e, 0xa1, 0x3a, 0x22, 0x41, 0xaa, 0xe3, 0xd0, 0xe8, 0x7e, 0xf1, 0xc6, 0xc3, 0xf5, 0xe3, 0xdb, + 0xe6, 0xf1, 0xa7, 0x07, 0x63, 0x8d, 0xff, 0xba, 0xfc, 0xa4, 0x64, 0xfd, 0x5b, 0x82, 0xf5, 0xab, + 0x57, 0x45, 0xeb, 0x50, 0x1e, 0xf4, 0xcc, 0x81, 0xe5, 0x41, 0x0f, 0xb5, 0x60, 0xd9, 0xa7, 0x24, + 0x90, 0xfe, 0x58, 0x9d, 0xb8, 0x82, 0xf3, 0x2d, 0x7a, 0x08, 0x48, 0x2f, 0x1d, 0x8f, 0x0a, 0x37, + 0x61, 0x71, 0x96, 0xb0, 0x26, 0xfa, 0x9b, 0x5a, 0xd3, 0x9b, 0x2a, 0xd0, 0x29, 0x34, 0xfc, 0xd7, + 0x4e, 0x10, 0xb9, 0x24, 0x60, 0x72, 0xdc, 0x5a, 0x52, 0xee, 0xdb, 0xf3, 0xc5, 0xee, 0xd8, 0xa0, + 0x30, 0xf8, 0xaf, 0xf3, 0x35, 0x7a, 0x0a, 0x35, 0x7d, 0xc7, 0x56, 0x75, 0xa7, 0xb4, 0xbb, 0xde, + 0xfd, 0x6c, 0x16, 0xd7, 0x99, 0xb2, 0xc6, 0x06, 0x65, 0xd9, 0xd9, 0xdd, 0x8b, 0xec, 0xe8, 0x23, + 0x80, 0xd8, 0x65, 0xce, 0x65, 0x2a, 0x1c, 0xe6, 0x99, 0x18, 0xac, 0xc4, 0x2e, 0xdb, 0x4f, 0xc5, + 0xc0, 0xb3, 0x3a, 0xb0, 0x8e, 0xa9, 0xa0, 0xc9, 0x88, 0x9a, 0x42, 0x41, 0x1f, 0x83, 0xc9, 0x32, + 0x87, 0x79, 0x42, 0xd5, 0x43, 0x1d, 0xd7, 0xb5, 0x64, 0xe0, 0x09, 0x2b, 0x80, 0x8d, 0x09, 0xc0, + 0xd4, 0xd0, 0x05, 0xac, 0xb9, 0x11, 0x97, 0x84, 0x71, 0x9a, 0x38, 0x09, 0x15, 0xea, 0x90, 0x46, + 0xf7, 0xcb, 0x59, 0xae, 0x1f, 0xe4, 0x20, 0x4d, 0xa8, 0x7a, 0x03, 0x5e, 0x75, 0x0b, 0x52, 0xeb, + 0xcf, 0x32, 0x6c, 0xdf, 0x66, 0x86, 0x30, 0x2c, 0x51, 0x3e, 0x12, 0xa6, 0x5e, 0x9f, 0xbe, 0xcb, + 0x51, 0x76, 0x9f, 0x8f, 0x4c, 0xc2, 0x2a, 0x2e, 0xf4, 0x2d, 0xd4, 0xc2, 0x28, 0xe5, 0x52, 0xb4, + 0xca, 0x8a, 0xf5, 0xd3, 0x59, 0xac, 0x3f, 0x66, 0xd6, 0xd8, 0x80, 0x50, 0x6f, 0x5a, 0x90, 0x15, + 0x85, 0xbf, 0x3f, 0x5f, 0x1e, 0x9c, 0xc5, 0xd4, 0x9d, 0x14, 0x63, 0xfb, 0x2b, 0xa8, 0x4f, 0xfc, + 0xba, 0xa5, 0x52, 0xb6, 0x8b, 0x95, 0x52, 0x2f, 0xa6, 0xfd, 0x2f, 0x50, 0x55, 0xfe, 0xa0, 0x0f, + 0xa1, 0x2e, 0x89, 0x78, 0xe5, 0xc4, 0x44, 0xfa, 0xf9, 0x7b, 0x67, 0x82, 0x67, 0x44, 0xfa, 0x99, + 0xd2, 0x8f, 0x84, 0xd4, 0x4a, 0xcd, 0xb1, 0x92, 0x09, 0x72, 0x65, 0x42, 0x89, 0xe7, 0x44, 0x3c, + 0x18, 0xab, 0x9c, 0x5f, 0xc1, 0x2b, 0x99, 0xe0, 0x94, 0x07, 0x63, 0xcb, 0x07, 0x98, 0xfa, 0xfb, + 0x3f, 0x0e, 0xd9, 0x81, 0x46, 0x4c, 0x93, 0x90, 0x09, 0xc1, 0x22, 0x2e, 0x4c, 0x69, 0x15, 0x45, + 0xd6, 0x0b, 0x58, 0x3d, 0xcb, 0xfa, 0x79, 0x9e, 0x91, 0xdf, 0xc3, 0x96, 0x1b, 0x05, 0x01, 0x75, + 0xb3, 0x57, 0x73, 0x18, 0x97, 0xd9, 0x0b, 0x06, 0x26, 0xcb, 0x3e, 0xb0, 0xf5, 0x98, 0xb1, 0xf3, + 0x31, 0x63, 0xf7, 0xcc, 0x98, 0xc1, 0x68, 0x8a, 0x1a, 0x18, 0x90, 0x75, 0x01, 0x6b, 0x86, 0xdb, + 0x24, 0xef, 0x11, 0xd4, 0x54, 0xe7, 0xcf, 0x53, 0xe9, 0xd1, 0x02, 0x8d, 0x4f, 0x33, 0x19, 0xbc, + 0xf5, 0x57, 0x19, 0x9a, 0xd7, 0x95, 0x6f, 0xec, 0xff, 0x08, 0x96, 0x0a, 0x8d, 0x5f, 0xad, 0x33, + 0x59, 0xa1, 0xd7, 0xab, 0x35, 0x7a, 0x09, 0xeb, 0x8c, 0x0b, 0x49, 0xb8, 0x4b, 0x1d, 0x35, 0xe4, + 0x4c, 0xb3, 0x3f, 0x58, 0xd4, 0x4d, 0x7b, 0x60, 0x68, 0xd4, 0x4e, 0xa7, 0xfd, 0x1a, 0x2b, 0xca, + 0xda, 0x21, 0xa0, 0x9b, 0x46, 0xb7, 0xe4, 0xe0, 0xde, 0xd5, 0x6e, 0x3d, 0xe7, 0xb0, 0xd4, 0xc1, + 0x2a, 0x24, 0xec, 0xdf, 0xa5, 0x7c, 0x54, 0xea, 0x50, 0xfd, 0x00, 0xcb, 0x22, 0x0d, 0x43, 0x92, + 0x8c, 0xcd, 0xd3, 0xce, 0x3d, 0x06, 0x32, 0xfc, 0xf3, 0x8c, 0x17, 0xe7, 0x0c, 0xe8, 0x08, 0xaa, + 0x3a, 0x5c, 0xda, 0xc7, 0xee, 0x22, 0x54, 0xa7, 0x97, 0x2f, 0xa9, 0x2b, 0xb1, 0x26, 0x40, 0x4f, + 0xa0, 0x3e, 0xf9, 0xb3, 0x51, 0x4f, 0xd3, 0xe8, 0xb6, 0x6f, 0xe4, 0xdc, 0x79, 0x6e, 0x81, 0xa7, + 0xc6, 0xd6, 0x29, 0xac, 0x99, 0xfb, 0xf9, 0x24, 0x61, 0x7c, 0x58, 0x68, 0xee, 0xa5, 0x77, 0x69, + 0xee, 0xf7, 0x2f, 0xa0, 0xa6, 0x25, 0xa8, 0x09, 0xab, 0x67, 0x47, 0x7b, 0xb8, 0xdf, 0x73, 0x7e, + 0x3a, 0x39, 0xeb, 0x9f, 0x37, 0xef, 0xa0, 0x4d, 0x58, 0x33, 0x92, 0xbd, 0x83, 0xf3, 0xc1, 0xf3, + 0x7e, 0xb3, 0x84, 0xb6, 0x60, 0xc3, 0x88, 0x06, 0x27, 0x46, 0x58, 0x46, 0xef, 0xc1, 0xe6, 0x44, + 0xd8, 0x3f, 0x1e, 0x1c, 0x0e, 0xf6, 0x8f, 0xfb, 0xcd, 0x4a, 0xf7, 0x9f, 0x32, 0xac, 0x6a, 0x67, + 0x9f, 0x29, 0x17, 0xd0, 0xef, 0xd0, 0x28, 0xfc, 0x2f, 0xa1, 0xee, 0x2c, 0x57, 0x6f, 0xfe, 0x72, + 0xb5, 0x1f, 0x2f, 0x84, 0xd1, 0xf5, 0x68, 0xdd, 0x79, 0x54, 0x42, 0x01, 0x2c, 0x9b, 0x19, 0x83, + 0x66, 0xce, 0xd2, 0xab, 0xd3, 0xab, 0xdd, 0x99, 0xdb, 0x3e, 0x3f, 0x0f, 0xf9, 0x50, 0xd5, 0x09, + 0xf8, 0x60, 0xe6, 0x73, 0x14, 0xba, 0x52, 0xfb, 0xe1, 0x9c, 0xd6, 0xd3, 0x7b, 0xed, 0x2f, 0xbf, + 0xa8, 0xea, 0x8c, 0xa9, 0xa9, 0xcf, 0xe3, 0xff, 0x02, 0x00, 0x00, 0xff, 0xff, 0x32, 0x18, 0xe8, + 0x73, 0x87, 0x0b, 0x00, 0x00, } // Reference imports to suppress errors if they are not otherwise used. diff --git a/plugins/device/proto/device.proto b/plugins/device/proto/device.proto index 2387371dfdb..87db0828870 100644 --- a/plugins/device/proto/device.proto +++ b/plugins/device/proto/device.proto @@ -74,6 +74,9 @@ message DetectedDevice { // hw_locality is optionally set to expose hardware locality information for // more optimal placement decisions. DeviceLocality hw_locality = 4; + + // shared reports on the presence and state of a device sharing daemon + Shared shared = 5; } // DeviceLocality is used to expose HW locality information about a device. @@ -177,3 +180,15 @@ message DeviceStats { // timestamp is the time the statistics were collected. google.protobuf.Timestamp timestamp = 3; } + +// DeviceSharing is a representation of the DeviceSharing string enum +message DeviceSharing { + Shared shared = 1; +} + +enum Shared { + SHARED_UNSET = 0; + SHARED_ACTIVE = 1; + SHARED_INACTIVE = 2; + SHARED_INELIGIBLE = 3; +} diff --git a/plugins/device/util.go b/plugins/device/util.go index 24e99516e0c..5a1b273edeb 100644 --- a/plugins/device/util.go +++ b/plugins/device/util.go @@ -63,6 +63,7 @@ func convertProtoDevice(in *proto.DetectedDevice) *Device { Healthy: in.Healthy, HealthDesc: in.HealthDescription, HwLocality: convertProtoDeviceLocality(in.HwLocality), + Shared: Shared(in.GetShared()), } } @@ -199,6 +200,7 @@ func convertStructDevice(in *Device) *proto.DetectedDevice { Healthy: in.Healthy, HealthDescription: in.HealthDesc, HwLocality: convertStructDeviceLocality(in.HwLocality), + Shared: convertShared(in.Shared), } } @@ -388,3 +390,16 @@ func convertStructDeviceStats(in *DeviceStats) *proto.DeviceStats { Timestamp: ts, } } + +func convertShared(s Shared) proto.Shared { + + switch s.String() { + case "ineligible": + return proto.Shared_SHARED_INELIGIBLE + case "active": + return proto.Shared_SHARED_ACTIVE + case "inactive": + return proto.Shared_SHARED_INACTIVE + } + return proto.Shared_SHARED_UNSET +} diff --git a/scheduler/feasible/device.go b/scheduler/feasible/device.go index 66f083f7c92..3b06062676b 100644 --- a/scheduler/feasible/device.go +++ b/scheduler/feasible/device.go @@ -127,17 +127,30 @@ func (d *deviceAllocator) createOffer(mem *memoryNodeMatcher, ask *structs.Reque // Check if we have enough unused instances to use this assignable := []string{} + willShare := make(map[string]bool) + for instanceID, v := range devInst.Instances { - if v != 0 { + var instanceSharedStatus structs.Shared + // mark shareable if we find a single shareable device + instanceSharedStatus = devInst.GetSharedByID(instanceID) + if instanceSharedStatus == structs.DeviceSharingActive { + d.ctx.Logger().Error("device sharing value", "value", instanceSharedStatus) + } + + if v != 0 && instanceSharedStatus != structs.DeviceSharingActive { continue } + if !mem.Matches(instanceID, devInst.Device) { continue } - if d.deviceIDMatchesConstraint(instanceID, ask.Constraints, devInst.Device) { + + if d.deviceIDConstraintAndSharingChecks(instanceID, ask.Constraints, ask.ShareDevices, devInst.Device) { assignable = append(assignable, instanceID) + if ask.ShareDevices != nil { + willShare[instanceID] = ask.ShareDevices.Enabled //only update willShare map if assignable + } } - // Don't assign more than the ask if len(assignable) == int(ask.Count) { break @@ -193,6 +206,7 @@ func (d *deviceAllocator) createOffer(mem *memoryNodeMatcher, ask *structs.Reque Type: id.Type, Name: id.Name, DeviceIDs: assignable, + WillShare: willShare, } } @@ -200,7 +214,6 @@ func (d *deviceAllocator) createOffer(mem *memoryNodeMatcher, ask *structs.Reque if offer == nil { return nil, 0.0, fmt.Errorf("no devices match request") } - return offer, matchedWeights, nil } @@ -234,3 +247,44 @@ func (d *deviceAllocator) deviceIDMatchesConstraint(id string, constraints struc return true } + +// deviceIDAllowsSharing checks a device instance ID against the +// device's Shared status to ensure we're only assigning devices that +// are set up to be shared. +func (d *deviceAllocator) deviceIDAllowsSharing(id string, sharing *structs.ShareDevices, device *structs.NodeDeviceResource) bool { + canShare := false + for _, dev := range device.Instances { + // if the device has sharing active + if dev.ID == id { + if sharing.Enabled == true && dev.Shared.String() == structs.DeviceSharingActive.String() { + canShare = true + } else { + continue + } + } + } + // if the device and task are sharable and we're targeting a specific GPU + // confirm it's the one we want + if len(sharing.SharedDeviceId) != 0 { + if sharing.SharedDeviceId != id { + canShare = false + } + } + + return canShare +} + +// deviceIDConstraintAndSharingChecks returns a single boolean to report whether +// device ID matches all of the constraints and if applicable all of the +// requested sharing modes +func (d *deviceAllocator) deviceIDConstraintAndSharingChecks(id string, constraints structs.Constraints, sharing *structs.ShareDevices, device *structs.NodeDeviceResource) bool { + if passesConstraint := d.deviceIDMatchesConstraint(id, constraints, device); !passesConstraint { + return false + } + if sharing != nil { + if passesSharing := d.deviceIDAllowsSharing(id, sharing, device); !passesSharing { + return false + } + } + return true +} diff --git a/scheduler/feasible/device_test.go b/scheduler/feasible/device_test.go index 14f52cfb13d..756bcde70e4 100644 --- a/scheduler/feasible/device_test.go +++ b/scheduler/feasible/device_test.go @@ -35,6 +35,19 @@ func deviceRequest(name string, count uint64, } } +// sharedDeviceRequest takes the name, count and potential constraints and affinities +// and returns a device request. +func sharedDeviceRequest(name string, count uint64, + constraints []*structs.Constraint, affinities []*structs.Affinity, shareDevices *structs.ShareDevices) *structs.RequestedDevice { + return &structs.RequestedDevice{ + Name: name, + Count: count, + Constraints: constraints, + Affinities: affinities, + ShareDevices: shareDevices, + } +} + // devNode returns a node containing two devices, an nvidia gpu and an intel // FPGA. func devNode() *structs.Node { @@ -615,3 +628,97 @@ func Test_memoryNodeMatcher(t *testing.T) { }) } } + +func TestDeviceAllocator_Allocate_SharedDevices(t *testing.T) { + ci.Parallel(t) + + n := mock.SharedNvidiaNode() + nvidia0 := n.NodeResources.Devices[0] + SharedDeviceId0 := n.NodeResources.Devices[0].Instances[0] + SharedDeviceId1 := n.NodeResources.Devices[0].Instances[1] + _, ctx := MockContext(t) + d := newDeviceAllocator(ctx, n) + must.NotNil(t, d) + mem := &memoryNodeMatcher{ + memoryNode: -1, // we are not testing + } + + for _, tc := range []struct { + name string + deviceName string + deviceID string + shareDevices *structs.ShareDevices + count uint64 + expectedErr string + }{ + { + name: "happy path", + deviceName: "nvidia/gpu", + deviceID: SharedDeviceId0.ID, + shareDevices: &structs.ShareDevices{Enabled: true}, + count: 1, + }, + { + name: "structs.ShareDevices can be nil", + deviceName: "nvidia/gpu", + deviceID: SharedDeviceId0.ID, + shareDevices: nil, + count: 1, + }, + { + name: "if present, shareDevices must match device", + deviceName: "nvidia/gpu", + deviceID: SharedDeviceId0.ID, + shareDevices: &structs.ShareDevices{Enabled: false}, + count: 1, + expectedErr: "no devices match request", + }, + { + name: "if present, gpu_id must match device", + deviceName: "nvidia/gpu", + deviceID: SharedDeviceId0.ID, + shareDevices: &structs.ShareDevices{Enabled: false, SharedDeviceId: SharedDeviceId1.ID}, + count: 1, + expectedErr: "no devices match request", + }, + { + name: "sharing passes, constraint doesn't match", + deviceName: "nvidia/gpu", + deviceID: "notanID", + shareDevices: &structs.ShareDevices{Enabled: true}, + count: 1, + expectedErr: "no devices match request", + }, + } { + t.Run(tc.name, func(t *testing.T) { + testConstraints := []*structs.Constraint{ + { + LTarget: "${device.ids}", + Operand: "set_contains", + RTarget: tc.deviceID, + }, + } + ask := sharedDeviceRequest(tc.deviceName, tc.count, testConstraints, nil, tc.shareDevices) + + out, _, err := d.createOffer(mem, ask) + if len(tc.expectedErr) != 0 { + must.ErrorContains(t, err, tc.expectedErr) + must.Nil(t, out) + return + } + must.NoError(t, err) + must.NotNil(t, out) + must.Len(t, 1, out.DeviceIDs) + // validate expected instance and device IDs + must.SliceContains(t, collectInstanceIDs(nvidia0), out.DeviceIDs[0]) + must.SliceContains(t, out.DeviceIDs, nvidia0.Instances[0].ID) + must.Eq(t, tc.deviceID, out.DeviceIDs[0]) + + if tc.shareDevices != nil { + must.MapContainsKey(t, out.WillShare, out.DeviceIDs[0]) + } + + }) + } + +} diff --git a/scheduler/feasible/feasible_test.go b/scheduler/feasible/feasible_test.go index 1db3b2631b2..97265f2a9d8 100644 --- a/scheduler/feasible/feasible_test.go +++ b/scheduler/feasible/feasible_test.go @@ -3193,7 +3193,23 @@ func TestDeviceChecker(t *testing.T) { }, } } - + // will create a taskgroup with with len(devices) tasks, each task will request + // all devices + getSharedTg := func(devices ...*structs.RequestedDevice) *structs.TaskGroup { + var tasks []*structs.Task + + for range devices { + tasks = append(tasks, &structs.Task{ + Resources: &structs.Resources{ + Devices: devices, + }, + }) + } + return &structs.TaskGroup{ + Name: "example", + Tasks: tasks, + } + } // Just type gpuTypeReq := &structs.RequestedDevice{ Name: "gpu", @@ -3235,7 +3251,6 @@ func TestDeviceChecker(t *testing.T) { n.NodeResources.Devices = devices return n } - nvidia_A := &structs.NodeDeviceResource{ Vendor: "nvidia", Type: "gpu", @@ -3277,7 +3292,12 @@ func TestDeviceChecker(t *testing.T) { }, }, } - + makeDeviceSharable := func(device *structs.NodeDeviceResource) *structs.NodeDeviceResource { + for _, v := range device.Instances { + v.Shared = structs.DeviceSharingActive + } + return device + } nvidiaUnhealthy := &structs.NodeDeviceResource{ Vendor: "nvidia", Type: "gpu", @@ -3299,6 +3319,7 @@ func TestDeviceChecker(t *testing.T) { Result bool NodeDevices []*structs.NodeDeviceResource RequestedDevices []*structs.RequestedDevice + isShared bool }{ { Name: "no devices on node", @@ -3360,6 +3381,20 @@ func TestDeviceChecker(t *testing.T) { NodeDevices: []*structs.NodeDeviceResource{nvidia_A}, RequestedDevices: []*structs.RequestedDevice{gpuTypeHighCountReq}, }, + { + Name: "shared device and two tasks", + Result: true, + NodeDevices: []*structs.NodeDeviceResource{makeDeviceSharable(nvidia_A)}, + RequestedDevices: []*structs.RequestedDevice{gpuTypeReq, gpuTypeReq}, + isShared: true, + }, + { + Name: "unshared device and two tasks", + Result: true, + NodeDevices: []*structs.NodeDeviceResource{nvidia_A}, + RequestedDevices: []*structs.RequestedDevice{gpuTypeReq, gpuTypeReq}, + isShared: true, + }, { Name: "meets constraints requirement", Result: true, @@ -3570,6 +3605,13 @@ func TestDeviceChecker(t *testing.T) { t.Run(c.Name, func(t *testing.T) { _, ctx := MockContext(t) checker := NewDeviceChecker(ctx) + var tg *structs.TaskGroup + tg = getTg(c.RequestedDevices...) + if c.isShared { + getSharedTg(c.RequestedDevices...) + } + checker.SetTaskGroup(tg) + checker.SetTaskGroup(getTg(c.RequestedDevices...)) if act := checker.Feasible(getNode(c.NodeDevices...)); act != c.Result { t.Fatalf("got %v; want %v", act, c.Result) diff --git a/scheduler/feasible/rank.go b/scheduler/feasible/rank.go index 294ad58ca82..add4f587570 100644 --- a/scheduler/feasible/rank.go +++ b/scheduler/feasible/rank.go @@ -224,7 +224,6 @@ NEXTNODE: iter.ctx.Logger().Named("binpack").Error("failed retrieving proposed allocations", "error", err) continue } - // Index the existing network usage. // This should never collide, since it represents the current state of // the node. If it does collide though, it means we found a bug! So diff --git a/scheduler/feasible/rank_test.go b/scheduler/feasible/rank_test.go index 42e57e5b869..050d384eb47 100644 --- a/scheduler/feasible/rank_test.go +++ b/scheduler/feasible/rank_test.go @@ -1648,6 +1648,20 @@ func TestBinPackIterator_Devices(t *testing.T) { }, } + sharedNvidiaNode := mock.SharedNvidiaNode() + sharedDevs := sharedNvidiaNode.NodeResources.Devices[0].Instances + sharedNvidiaDevices := []string{sharedDevs[0].ID, sharedDevs[1].ID} + + sharedNvidiaDev0 := mock.Alloc() + sharedNvidiaDev0.AllocatedResources.Tasks["web"].Devices = []*structs.AllocatedDeviceResource{ + { + Type: "gpu", + Vendor: "nvidia", + Name: "1080ti", + DeviceIDs: []string{sharedNvidiaDevices[0]}, + WillShare: map[string]bool{sharedNvidiaDevices[0]: true}, + }, + } type devPlacementTuple struct { Count int ExcludeIDs []string @@ -1887,6 +1901,66 @@ func TestBinPackIterator_Devices(t *testing.T) { }, PlannedAllocs: []*structs.Allocation{nvidiaDev0}, }, + { + Name: "shared request with planned uses", + Node: sharedNvidiaNode, + TaskGroup: &structs.TaskGroup{ + EphemeralDisk: &structs.EphemeralDisk{}, + Tasks: []*structs.Task{ + { + Name: "web2", + Resources: &structs.Resources{ + CPU: 1024, + MemoryMB: 1024, + Devices: []*structs.RequestedDevice{ + { + Name: "nvidia/gpu/1080ti", + Count: 1, + ShareDevices: &structs.ShareDevices{Enabled: true}, + }, + }, + }, + }, + { + Name: "web3", + Resources: &structs.Resources{ + CPU: 1024, + MemoryMB: 1024, + Devices: []*structs.RequestedDevice{ + { + Name: "nvidia/gpu/1080ti", + Count: 1, + ShareDevices: &structs.ShareDevices{Enabled: true}, + }, + }, + }, + }, + }, + }, + ExpectedPlacements: map[string]map[structs.DeviceIdTuple]devPlacementTuple{ + "web2": { + { + Vendor: "nvidia", + Type: "gpu", + Name: "1080ti", + }: { + Count: 1, + ExcludeIDs: []string{sharedNvidiaDevices[1]}, + }, + }, + "web3": { + { + Vendor: "nvidia", + Type: "gpu", + Name: "1080ti", + }: { + Count: 1, + ExcludeIDs: []string{sharedNvidiaDevices[1]}, + }, + }, + }, + PlannedAllocs: []*structs.Allocation{sharedNvidiaDev0}, + }, } for _, c := range cases { @@ -1958,9 +2032,9 @@ func TestBinPackIterator_Devices(t *testing.T) { } // Tests that bin packing iterator fails due to overprovisioning of devices +// when devices are not shared. Demonstrates shared devices do not fail // This test has devices at task level func TestBinPackIterator_Device_Failure_With_Eviction(t *testing.T) { - _, ctx := MockContext(t) nodes := []*RankedNode{ { Node: &structs.Node{ @@ -1999,71 +2073,124 @@ func TestBinPackIterator_Device_Failure_With_Eviction(t *testing.T) { }, } - // Add a planned alloc that takes up a gpu - plan := ctx.Plan() - plan.NodeAllocation[nodes[0].Node.ID] = []*structs.Allocation{ + for _, tc := range []struct { + name string + nodes []*RankedNode + deviceShared bool + taskWillShare bool + allocWillShare bool + rankedNodes int + exhaustedNodes int + }{ { - AllocatedResources: &structs.AllocatedResources{ - Tasks: map[string]*structs.AllocatedTaskResources{ - "web": { - Cpu: structs.AllocatedCpuResources{ - CpuShares: 2048, - }, - Memory: structs.AllocatedMemoryResources{ - MemoryMB: 2048, - }, - Networks: []*structs.NetworkResource{}, - Devices: []*structs.AllocatedDeviceResource{ - { - Vendor: "nvidia", - Type: "gpu", - Name: "SOME-GPU", - DeviceIDs: []string{"1"}, + name: "expect failure", + nodes: nodes, + rankedNodes: 0, + exhaustedNodes: 1, + }, + { + name: "shared device, expect success", + nodes: nodes, + deviceShared: true, + rankedNodes: 1, + exhaustedNodes: 0, + }, + } { + t.Run(tc.name, func(t *testing.T) { + _, ctx := MockContext(t) + nodes := tc.nodes + // , existing allocation, and task + if tc.deviceShared { + var n []*RankedNode + + //mark gpu as SharingActive on RankedNodes + for _, v := range tc.nodes { + newNode := v.Node.Copy() + newNode.NodeResources.Devices[0].Instances[0].Shared = structs.DeviceSharingActive + n = append(n, &RankedNode{ + Node: newNode, + }) + } + //overwrite RankedNodes + nodes = n + } + + plan := ctx.Plan() + plan.NodeAllocation[nodes[0].Node.ID] = []*structs.Allocation{ + { + AllocatedResources: &structs.AllocatedResources{ + Tasks: map[string]*structs.AllocatedTaskResources{ + "web": { + Cpu: structs.AllocatedCpuResources{ + CpuShares: 2048, + }, + Memory: structs.AllocatedMemoryResources{ + MemoryMB: 2048, + }, + Networks: []*structs.NetworkResource{}, + Devices: []*structs.AllocatedDeviceResource{ + { + Vendor: "nvidia", + Type: "gpu", + Name: "SOME-GPU", + DeviceIDs: []string{"1"}, + }, + }, }, }, + Shared: structs.AllocatedSharedResources{}, }, }, - Shared: structs.AllocatedSharedResources{}, - }, - }, - } - static := NewStaticRankIterator(ctx, nodes) - - // Create a task group with gpu device specified - taskGroup := &structs.TaskGroup{ - EphemeralDisk: &structs.EphemeralDisk{}, - Tasks: []*structs.Task{ - { - Name: "web", - Resources: &structs.Resources{ - CPU: 1024, - MemoryMB: 1024, - Networks: []*structs.NetworkResource{}, - Devices: structs.ResourceDevices{ - { - Name: "nvidia/gpu", - Count: 1, + } + tg := &structs.TaskGroup{ + EphemeralDisk: &structs.EphemeralDisk{}, + Tasks: []*structs.Task{ + { + Name: "web", + Resources: &structs.Resources{ + CPU: 1024, + MemoryMB: 1024, + Networks: []*structs.NetworkResource{}, + Devices: structs.ResourceDevices{ + { + Name: "nvidia/gpu", + Count: 1, + }, + }, + NUMA: &structs.NUMA{Affinity: structs.NoneNUMA}, + }, + }, + { + Name: "web", + Resources: &structs.Resources{ + CPU: 1024, + MemoryMB: 1024, + Networks: []*structs.NetworkResource{}, + Devices: structs.ResourceDevices{ + { + Name: "nvidia/gpu", + Count: 1, + }, + }, + NUMA: &structs.NUMA{Affinity: structs.NoneNUMA}, }, }, - NUMA: &structs.NUMA{Affinity: structs.NoneNUMA}, }, - }, - }, - Networks: []*structs.NetworkResource{}, - } - - binp := NewBinPackIterator(ctx, static, true, 0) - binp.SetTaskGroup(taskGroup) - binp.SetSchedulerConfiguration(testSchedulerConfig) - - scoreNorm := NewScoreNormalizationIterator(ctx, binp) + Networks: []*structs.NetworkResource{}, + } + static := NewStaticRankIterator(ctx, nodes) + binp := NewBinPackIterator(ctx, static, true, 0) + binp.SetTaskGroup(tg) + binp.SetSchedulerConfiguration(testSchedulerConfig) - out := collectRanked(scoreNorm) + scoreNorm := NewScoreNormalizationIterator(ctx, binp) + out := collectRanked(scoreNorm) - // We expect a placement failure because we need 1 GPU device - // and the other one is taken - must.SliceEmpty(t, out) - must.Eq(t, 1, ctx.metrics.DimensionExhausted["devices: no devices match request"]) + // check if we get the expected number of rankedNodes (0 or 1) + must.SliceLen(t, tc.rankedNodes, out) + must.Eq(t, tc.exhaustedNodes, ctx.metrics.DimensionExhausted["devices: no devices match request"]) + }) + } } func TestBinPackIterator_Device_Preemption_MultipleDeviceRequests(t *testing.T) { From 97e1696ed22a7337a301592197ef0c01e29d9d8b Mon Sep 17 00:00:00 2001 From: Chris Boulton Date: Tue, 20 Jan 2026 22:55:23 -0800 Subject: [PATCH 2/7] devices: add support for first_available device priotisation --- GNUmakefile | 9 + api/resources.go | 63 +++- api/resources_test.go | 46 +++ command/agent/job_endpoint.go | 30 ++ e2e/devices/basic_test.go | 327 ++++++++++++++++++ e2e/devices/doc.go | 7 + e2e/devices/first_available_test.go | 193 +++++++++++ .../input/device_constraint_no_match.hcl | 36 ++ e2e/devices/input/device_count_only.hcl | 30 ++ e2e/devices/input/device_with_affinity.hcl | 36 ++ e2e/devices/input/device_with_constraint.hcl | 35 ++ .../device_with_constraint_and_affinity.hcl | 41 +++ .../input/first_available_no_match.hcl | 44 +++ .../first_available_with_base_constraint.hcl | 51 +++ .../input/first_available_with_basic.hcl | 49 +++ e2e/e2e_test.go | 1 + e2e/terraform/main.tf | 1 + e2e/terraform/provision-infra/nomad.tf | 2 + .../etc/nomad.d/client-linux.hcl | 8 + .../provision-nomad/install-linux.tf | 31 ++ .../provision-nomad/variables.tf | 6 + e2e/terraform/provision-infra/variables.tf | 6 + e2e/terraform/terraform.tfvars | 4 + e2e/terraform/variables.tf | 6 + nomad/structs/constraint.go | 1 + nomad/structs/structs.go | 133 ++++++- nomad/structs/structs_test.go | 174 ++++++++++ scheduler/feasible/device.go | 113 +++++- scheduler/feasible/device_test.go | 298 +++++++++++++++- scheduler/feasible/feasible.go | 90 ++++- scheduler/feasible/feasible_test.go | 151 ++++++++ scheduler/feasible/rank.go | 27 +- 32 files changed, 1995 insertions(+), 54 deletions(-) create mode 100644 e2e/devices/basic_test.go create mode 100644 e2e/devices/doc.go create mode 100644 e2e/devices/first_available_test.go create mode 100644 e2e/devices/input/device_constraint_no_match.hcl create mode 100644 e2e/devices/input/device_count_only.hcl create mode 100644 e2e/devices/input/device_with_affinity.hcl create mode 100644 e2e/devices/input/device_with_constraint.hcl create mode 100644 e2e/devices/input/device_with_constraint_and_affinity.hcl create mode 100644 e2e/devices/input/first_available_no_match.hcl create mode 100644 e2e/devices/input/first_available_with_base_constraint.hcl create mode 100644 e2e/devices/input/first_available_with_basic.hcl diff --git a/GNUmakefile b/GNUmakefile index afdd84c5632..62023fbb915 100644 --- a/GNUmakefile +++ b/GNUmakefile @@ -108,6 +108,15 @@ endif pkg/windows_%/nomad: GO_OUT = $@.exe pkg/windows_%/nomad: GO_TAGS += timetzdata +# Build the example device plugin for e2e device tests +pkg/%/nomad-device-example: GO_OUT ?= $@ +pkg/%/nomad-device-example: ## Build the example device plugin for GOOS_GOARCH + @echo "==> Building $@..." + @CGO_ENABLED=0 \ + GOOS=$(firstword $(subst _, ,$*)) \ + GOARCH=$(lastword $(subst _, ,$*)) \ + go build -trimpath -o $(GO_OUT) ./plugins/device/cmd/example/cmd + # Define package targets for each of the build targets we actually have on this system define makePackageTarget diff --git a/api/resources.go b/api/resources.go index e1d2bdd8e2e..9a16bc25dee 100644 --- a/api/resources.go +++ b/api/resources.go @@ -328,6 +328,42 @@ type ShareDevices struct { SharedDeviceId string `hcl:"shared_device_id,optional"` } +type DevicePreferences struct { + // Enabled + Enabled bool `hcl:"enabled"` + // SharedDeviceID is an optional field for use in environments with + // multiple shared devices, to make the shared device ID available to + // the plugin. If in use alongside the device.id constraint, the two must + // match or the job will not be placed. + SharedDeviceId string `hcl:"shared_device_id,optional"` + + // FirstAvailable specifies a prioritized list of device options. The + // scheduler will attempt to satisfy each option in order, selecting the + // first one that can be fulfilled. Mutually exclusive with Count. + FirstAvailable []*DeviceOption `hcl:"first_available,block"` +} + +// DeviceOption represents a single option in a first_available device selection. +// Each option specifies a count and optional constraints that must be satisfied +// for this option to be selected. +type DeviceOption struct { + // Count is the number of requested devices for this option + Count *uint64 `hcl:"count,optional"` + + // Constraints are a set of constraints to apply when selecting the device + // to use for this option. + Constraints []*Constraint `hcl:"constraint,block"` +} + +func (o *DeviceOption) Canonicalize() { + if o == nil { + return + } + if o.Count == nil { + o.Count = pointerOf(uint64(1)) + } +} + // RequestedDevice is used to request a device for a task. type RequestedDevice struct { // Name is the request name. The possible values are as follows: @@ -341,23 +377,40 @@ type RequestedDevice struct { // * "nvidia/gpu/GTX2080Ti" Name string `hcl:",label"` - // Count is the number of requested devices + // Count is the number of requested devices. Mutually exclusive with + // FirstAvailable. Count *uint64 `hcl:"count,optional"` // Constraints are a set of constraints to apply when selecting the device - // to use. + // to use. When FirstAvailable is specified, these constraints are applied + // as base constraints that all options must also satisfy. Constraints []*Constraint `hcl:"constraint,block"` - // Affinities are a set of affinites to apply when selecting the device - // to use. + // Affinities are a set of affinities to apply when selecting the device + // to use. When FirstAvailable is specified, these affinities are applied + // as base affinities for all options. Affinities []*Affinity `hcl:"affinity,block"` + //Device Preferences + DevicePreferences *DevicePreferences `hcl:"device_preferences,block"` + // ShareDevices reports whether the task should be placed on a shared device ShareDevices *ShareDevices `hcl:"share_devices,block"` + + // FirstAvailable specifies a prioritized list of device options. The + // scheduler will attempt to satisfy each option in order, selecting the + // first one that can be fulfilled. Mutually exclusive with Count. + FirstAvailable []*DeviceOption `hcl:"first_available,block"` } func (d *RequestedDevice) Canonicalize() { - if d.Count == nil { + // If using first_available, canonicalize each option but don't set default count + if len(d.FirstAvailable) > 0 { + for _, opt := range d.FirstAvailable { + opt.Canonicalize() + } + } else if d.Count == nil { + // Only set default count when not using first_available d.Count = pointerOf(uint64(1)) } diff --git a/api/resources_test.go b/api/resources_test.go index 608c6f7833e..b58d39af6fb 100644 --- a/api/resources_test.go +++ b/api/resources_test.go @@ -116,3 +116,49 @@ func TestNUMAResource_Canonicalize(t *testing.T) { n3.Canonicalize() must.Eq(t, &NUMAResource{Affinity: "require", Devices: nil}, n3) } + +func TestDeviceOption_Canonicalize(t *testing.T) { + testutil.Parallel(t) + + // Nil option + var opt *DeviceOption + opt.Canonicalize() // should not panic + + // Count defaults to 1 + opt2 := &DeviceOption{} + opt2.Canonicalize() + must.Eq(t, uint64(1), *opt2.Count) + + // Explicit count preserved + opt3 := &DeviceOption{Count: pointerOf(uint64(4))} + opt3.Canonicalize() + must.Eq(t, uint64(4), *opt3.Count) +} + +func TestRequestedDevice_Canonicalize_FirstAvailable(t *testing.T) { + testutil.Parallel(t) + + // With FirstAvailable, Count should NOT be set to default + rd := &RequestedDevice{ + Name: "nvidia/gpu", + FirstAvailable: []*DeviceOption{ + {Count: pointerOf(uint64(2))}, + {}, // no count set + }, + } + rd.Canonicalize() + + // Count should remain nil when using FirstAvailable + must.Nil(t, rd.Count) + + // FirstAvailable options should be canonicalized + must.Eq(t, uint64(2), *rd.FirstAvailable[0].Count) + must.Eq(t, uint64(1), *rd.FirstAvailable[1].Count) // defaulted to 1 + + // Without FirstAvailable, Count defaults to 1 + rd2 := &RequestedDevice{ + Name: "nvidia/gpu", + } + rd2.Canonicalize() + must.Eq(t, uint64(1), *rd2.Count) +} diff --git a/command/agent/job_endpoint.go b/command/agent/job_endpoint.go index 81e07c1ee71..844dda60563 100644 --- a/command/agent/job_endpoint.go +++ b/command/agent/job_endpoint.go @@ -1631,6 +1631,36 @@ func ApiResourcesToStructs(in *api.Resources) *structs.Resources { Affinities: ApiAffinitiesToStructs(d.Affinities), ShareDevices: ApiSharingToStructs(d.ShareDevices), }) + rd := &structs.RequestedDevice{ + Name: d.Name, + Constraints: ApiConstraintsToStructs(d.Constraints), + Affinities: ApiAffinitiesToStructs(d.Affinities), + } + // Only set Count if not using FirstAvailable + if d.Count != nil { + rd.Count = *d.Count + } + // Convert FirstAvailable options + if len(d.FirstAvailable) > 0 { + rd.FirstAvailable = make([]*structs.DeviceOption, len(d.FirstAvailable)) + for i, opt := range d.FirstAvailable { + rd.FirstAvailable[i] = &structs.DeviceOption{ + Constraints: ApiConstraintsToStructs(opt.Constraints), + } + if opt.Count != nil { + rd.FirstAvailable[i].Count = *opt.Count + } + } + } + //TODO: temp build out make better soon + if d.ShareDevices != nil || len(d.FirstAvailable) > 0 { + rd.DevicePreferences = &structs.DevicePreferences{FirstAvailable: rd.FirstAvailable} + } + if d.ShareDevices != nil { + rd.DevicePreferences.Enabled = d.ShareDevices.Enabled + rd.DevicePreferences.SharedDeviceId = d.ShareDevices.SharedDeviceId + } + out.Devices = append(out.Devices, rd) } } diff --git a/e2e/devices/basic_test.go b/e2e/devices/basic_test.go new file mode 100644 index 00000000000..66a4af5f94e --- /dev/null +++ b/e2e/devices/basic_test.go @@ -0,0 +1,327 @@ +// Copyright IBM Corp. 2015, 2025 +// SPDX-License-Identifier: BUSL-1.1 + +package devices + +import ( + "fmt" + "strings" + "testing" + "time" + + "github.com/hashicorp/nomad/api" + "github.com/hashicorp/nomad/e2e/e2eutil" + "github.com/hashicorp/nomad/helper/uuid" + "github.com/hashicorp/nomad/testutil" + "github.com/shoenig/test/must" +) + +// TestDeviceScheduling runs end-to-end tests for traditional device scheduling +// (count, constraint, affinity without first_available). These tests require: +// - A Nomad cluster with at least one Linux client +// - The example device plugin (nomad/file/mock) installed and configured +// - Mock device files created in the configured directory +// +// See plugins/device/cmd/example/README.md for setup instructions. +func TestDeviceScheduling(t *testing.T) { + nomadClient := e2eutil.NomadClient(t) + e2eutil.WaitForLeader(t, nomadClient) + e2eutil.WaitForNodesReady(t, nomadClient, 1) + + // Check if any nodes have mock devices available + if !hasDevicePlugin(t, nomadClient, "nomad/file/mock") { + t.Skip("skipping: no nodes with nomad/file/mock device plugin") + } + + t.Run("testDeviceCountOnly", testDeviceCountOnly) + t.Run("testDeviceWithConstraint", testDeviceWithConstraint) + t.Run("testDeviceWithAffinity", testDeviceWithAffinity) + t.Run("testDeviceWithConstraintAndAffinity", testDeviceWithConstraintAndAffinity) + t.Run("testDeviceConstraintNoMatch", testDeviceConstraintNoMatch) +} + +// hasDevicePlugin checks if any node in the cluster has the specified device +// plugin available. +func hasDevicePlugin(t *testing.T, client *api.Client, deviceName string) bool { + t.Helper() + + nodes, _, err := client.Nodes().List(nil) + must.NoError(t, err) + + for _, nodeStub := range nodes { + node, _, err := client.Nodes().Info(nodeStub.ID, nil) + must.NoError(t, err) + + if node.NodeResources != nil && node.NodeResources.Devices != nil { + for _, device := range node.NodeResources.Devices { + fullName := device.Vendor + "/" + device.Type + "/" + device.Name + if strings.Contains(fullName, deviceName) || + strings.Contains(device.Name, deviceName) { + return true + } + } + } + } + return false +} + +// testDeviceCountOnly tests that a job with only device count specified +// can be successfully scheduled. +func testDeviceCountOnly(t *testing.T) { + nomadClient := e2eutil.NomadClient(t) + + jobID := "device-count-" + uuid.Short() + jobIDs := []string{jobID} + t.Cleanup(e2eutil.CleanupJobsAndGC(t, &jobIDs)) + + allocs := e2eutil.RegisterAndWaitForAllocs(t, nomadClient, "./input/device_count_only.hcl", jobID, "") + must.Len(t, 1, allocs, must.Sprint("expected 1 allocation")) + + alloc, _, err := nomadClient.Allocations().Info(allocs[0].ID, nil) + must.NoError(t, err) + must.Eq(t, api.AllocClientStatusRunning, alloc.ClientStatus, + must.Sprintf("allocation status: %s, description: %s", + alloc.ClientStatus, alloc.ClientDescription)) + + // Verify device was allocated + must.NotNil(t, alloc.AllocatedResources) + taskResources := alloc.AllocatedResources.Tasks["sleep"] + must.NotNil(t, taskResources) + must.SliceNotEmpty(t, taskResources.Devices, + must.Sprint("expected devices to be allocated")) + + // Verify exactly 1 device + totalDevices := 0 + for _, deviceResource := range taskResources.Devices { + totalDevices += len(deviceResource.DeviceIDs) + } + must.Eq(t, 1, totalDevices, must.Sprint("expected exactly 1 device")) +} + +// testDeviceWithConstraint tests that a job with device count and constraint +// can be successfully scheduled when the constraint is satisfied. +func testDeviceWithConstraint(t *testing.T) { + nomadClient := e2eutil.NomadClient(t) + + jobID := "device-constraint-" + uuid.Short() + jobIDs := []string{jobID} + t.Cleanup(e2eutil.CleanupJobsAndGC(t, &jobIDs)) + + allocs := e2eutil.RegisterAndWaitForAllocs(t, nomadClient, "./input/device_with_constraint.hcl", jobID, "") + must.Len(t, 1, allocs, must.Sprint("expected 1 allocation")) + + alloc, _, err := nomadClient.Allocations().Info(allocs[0].ID, nil) + must.NoError(t, err) + must.Eq(t, api.AllocClientStatusRunning, alloc.ClientStatus, + must.Sprintf("allocation status: %s, description: %s", + alloc.ClientStatus, alloc.ClientDescription)) + + // Verify device was allocated + must.NotNil(t, alloc.AllocatedResources) + taskResources := alloc.AllocatedResources.Tasks["sleep"] + must.NotNil(t, taskResources) + must.SliceNotEmpty(t, taskResources.Devices, + must.Sprint("expected devices to be allocated")) +} + +// testDeviceWithAffinity tests that a job with device count and affinity +// can be successfully scheduled. +func testDeviceWithAffinity(t *testing.T) { + nomadClient := e2eutil.NomadClient(t) + + jobID := "device-affinity-" + uuid.Short() + jobIDs := []string{jobID} + t.Cleanup(e2eutil.CleanupJobsAndGC(t, &jobIDs)) + + allocs := e2eutil.RegisterAndWaitForAllocs(t, nomadClient, "./input/device_with_affinity.hcl", jobID, "") + must.Len(t, 1, allocs, must.Sprint("expected 1 allocation")) + + alloc, _, err := nomadClient.Allocations().Info(allocs[0].ID, nil) + must.NoError(t, err) + must.Eq(t, api.AllocClientStatusRunning, alloc.ClientStatus, + must.Sprintf("allocation status: %s, description: %s", + alloc.ClientStatus, alloc.ClientDescription)) + + // Verify device was allocated + must.NotNil(t, alloc.AllocatedResources) + taskResources := alloc.AllocatedResources.Tasks["sleep"] + must.NotNil(t, taskResources) + must.SliceNotEmpty(t, taskResources.Devices, + must.Sprint("expected devices to be allocated")) +} + +// testDeviceWithConstraintAndAffinity tests that a job with device count, +// constraint, and affinity can be successfully scheduled. +func testDeviceWithConstraintAndAffinity(t *testing.T) { + nomadClient := e2eutil.NomadClient(t) + + jobID := "device-both-" + uuid.Short() + jobIDs := []string{jobID} + t.Cleanup(e2eutil.CleanupJobsAndGC(t, &jobIDs)) + + allocs := e2eutil.RegisterAndWaitForAllocs(t, nomadClient, "./input/device_with_constraint_and_affinity.hcl", jobID, "") + must.Len(t, 1, allocs, must.Sprint("expected 1 allocation")) + + alloc, _, err := nomadClient.Allocations().Info(allocs[0].ID, nil) + must.NoError(t, err) + must.Eq(t, api.AllocClientStatusRunning, alloc.ClientStatus, + must.Sprintf("allocation status: %s, description: %s", + alloc.ClientStatus, alloc.ClientDescription)) + + // Verify devices were allocated + must.NotNil(t, alloc.AllocatedResources) + taskResources := alloc.AllocatedResources.Tasks["sleep"] + must.NotNil(t, taskResources) + must.SliceNotEmpty(t, taskResources.Devices, + must.Sprint("expected devices to be allocated")) + + // Verify 2 devices were allocated + totalDevices := 0 + for _, deviceResource := range taskResources.Devices { + totalDevices += len(deviceResource.DeviceIDs) + } + must.Eq(t, 2, totalDevices, must.Sprint("expected exactly 2 devices")) +} + +// testDeviceConstraintNoMatch tests that when a device constraint cannot be +// satisfied, the job fails to schedule with appropriate error messages. +func testDeviceConstraintNoMatch(t *testing.T) { + nomadClient := e2eutil.NomadClient(t) + + jobID := "device-nomatch-" + uuid.Short() + jobIDs := []string{jobID} + t.Cleanup(e2eutil.CleanupJobsAndGC(t, &jobIDs)) + + // Parse and register the job + job, err := e2eutil.Parse2(t, "./input/device_constraint_no_match.hcl") + must.NoError(t, err) + job.ID = &jobID + + resp, _, err := nomadClient.Jobs().Register(job, nil) + must.NoError(t, err) + + evalID := resp.EvalID + + // Wait for the evaluation to complete (it should fail to place) + var eval *api.Evaluation + testutil.WaitForResultRetries(30, func() (bool, error) { + time.Sleep(500 * time.Millisecond) + eval, _, err = nomadClient.Evaluations().Info(evalID, nil) + if err != nil { + return false, err + } + if eval.Status == api.EvalStatusComplete || eval.Status == api.EvalStatusBlocked { + return true, nil + } + return false, fmt.Errorf("eval status: %s", eval.Status) + }, func(err error) { + must.NoError(t, err) + }) + + // The evaluation should have failed task group allocations + must.MapNotEmpty(t, eval.FailedTGAllocs, + must.Sprint("expected failed task group allocations")) + + // Check that the failure is due to device exhaustion or constraint filtering + for _, metrics := range eval.FailedTGAllocs { + exhausted := metrics.NodesExhausted > 0 || + len(metrics.DimensionExhausted) > 0 || + len(metrics.ConstraintFiltered) > 0 + must.True(t, exhausted, + must.Sprintf("expected device exhaustion, got metrics: %+v", metrics)) + } +} + +// TestDeviceParsing tests that traditional device configurations (count, +// constraint, affinity) are parsed correctly. These are unit-style tests +// that don't require a running Nomad cluster. +func TestDeviceParsing(t *testing.T) { + t.Run("testParseDeviceCountOnly", testParseDeviceCountOnly) + t.Run("testParseDeviceWithConstraint", testParseDeviceWithConstraint) + t.Run("testParseDeviceWithAffinity", testParseDeviceWithAffinity) + t.Run("testParseDeviceWithConstraintAndAffinity", testParseDeviceWithConstraintAndAffinity) +} + +// testParseDeviceCountOnly verifies parsing of a device with only count. +func testParseDeviceCountOnly(t *testing.T) { + job, err := e2eutil.Parse2(t, "./input/device_count_only.hcl") + must.NoError(t, err) + must.NotNil(t, job) + + must.Len(t, 1, job.TaskGroups) + task := job.TaskGroups[0].Tasks[0] + must.NotNil(t, task.Resources) + must.Len(t, 1, task.Resources.Devices) + + device := task.Resources.Devices[0] + must.Eq(t, "nomad/file/mock", device.Name) + must.Eq(t, uint64(1), *device.Count) + must.Len(t, 0, device.Constraints) + must.Len(t, 0, device.Affinities) + must.Len(t, 0, device.FirstAvailable) +} + +// testParseDeviceWithConstraint verifies parsing of a device with count and constraint. +func testParseDeviceWithConstraint(t *testing.T) { + job, err := e2eutil.Parse2(t, "./input/device_with_constraint.hcl") + must.NoError(t, err) + must.NotNil(t, job) + + task := job.TaskGroups[0].Tasks[0] + device := task.Resources.Devices[0] + + must.Eq(t, "nomad/file/mock", device.Name) + must.Eq(t, uint64(1), *device.Count) + must.Len(t, 1, device.Constraints) + must.Eq(t, "${device.attr.type}", device.Constraints[0].LTarget) + must.Eq(t, "file", device.Constraints[0].RTarget) + must.Len(t, 0, device.Affinities) + must.Len(t, 0, device.FirstAvailable) +} + +// testParseDeviceWithAffinity verifies parsing of a device with count and affinity. +func testParseDeviceWithAffinity(t *testing.T) { + job, err := e2eutil.Parse2(t, "./input/device_with_affinity.hcl") + must.NoError(t, err) + must.NotNil(t, job) + + task := job.TaskGroups[0].Tasks[0] + device := task.Resources.Devices[0] + + must.Eq(t, "nomad/file/mock", device.Name) + must.Eq(t, uint64(1), *device.Count) + must.Len(t, 0, device.Constraints) + must.Len(t, 1, device.Affinities) + must.Eq(t, "${device.attr.priority}", device.Affinities[0].LTarget) + must.Eq(t, "high", device.Affinities[0].RTarget) + must.Eq(t, int8(100), *device.Affinities[0].Weight) + must.Len(t, 0, device.FirstAvailable) +} + +// testParseDeviceWithConstraintAndAffinity verifies parsing of a device with +// count, constraint, and affinity. +func testParseDeviceWithConstraintAndAffinity(t *testing.T) { + job, err := e2eutil.Parse2(t, "./input/device_with_constraint_and_affinity.hcl") + must.NoError(t, err) + must.NotNil(t, job) + + task := job.TaskGroups[0].Tasks[0] + device := task.Resources.Devices[0] + + must.Eq(t, "nomad/file/mock", device.Name) + must.Eq(t, uint64(2), *device.Count) + + // Verify constraint + must.Len(t, 1, device.Constraints) + must.Eq(t, "${device.attr.type}", device.Constraints[0].LTarget) + must.Eq(t, "file", device.Constraints[0].RTarget) + + // Verify affinity + must.Len(t, 1, device.Affinities) + must.Eq(t, "${device.attr.priority}", device.Affinities[0].LTarget) + must.Eq(t, "high", device.Affinities[0].RTarget) + must.Eq(t, int8(50), *device.Affinities[0].Weight) + + // No first_available + must.Len(t, 0, device.FirstAvailable) +} diff --git a/e2e/devices/doc.go b/e2e/devices/doc.go new file mode 100644 index 00000000000..85336e16234 --- /dev/null +++ b/e2e/devices/doc.go @@ -0,0 +1,7 @@ +// Copyright IBM Corp. 2015, 2025 +// SPDX-License-Identifier: BUSL-1.1 + +// Package devices provides end-to-end tests for Nomad's device scheduling +// functionality, including the first_available feature for flexible device +// selection. +package devices diff --git a/e2e/devices/first_available_test.go b/e2e/devices/first_available_test.go new file mode 100644 index 00000000000..aa288f6f4e2 --- /dev/null +++ b/e2e/devices/first_available_test.go @@ -0,0 +1,193 @@ +// Copyright IBM Corp. 2015, 2025 +// SPDX-License-Identifier: BUSL-1.1 + +package devices + +import ( + "fmt" + "testing" + "time" + + "github.com/hashicorp/nomad/api" + "github.com/hashicorp/nomad/e2e/e2eutil" + "github.com/hashicorp/nomad/helper/uuid" + "github.com/hashicorp/nomad/testutil" + "github.com/shoenig/test/must" +) + +// TestDeviceFirstAvailable runs end-to-end tests for the first_available +// device scheduling feature. These tests require: +// - A Nomad cluster with at least one Linux client +// - The example device plugin (nomad/file/mock) installed and configured +// - Mock device files created in the configured directory +// +// See plugins/device/cmd/example/README.md for setup instructions. +func TestDeviceFirstAvailable(t *testing.T) { + nomadClient := e2eutil.NomadClient(t) + e2eutil.WaitForLeader(t, nomadClient) + e2eutil.WaitForNodesReady(t, nomadClient, 1) + + // Check if any nodes have mock devices available + if !hasDevicePlugin(t, nomadClient, "nomad/file/mock") { + t.Skip("skipping: no nodes with nomad/file/mock device plugin") + } + + t.Run("testFirstAvailableSelectsCorrectOption", testFirstAvailableSelectsCorrectOption) + t.Run("testFirstAvailableNoMatch", testFirstAvailableNoMatch) +} + +// testFirstAvailableSelectsCorrectOption tests that first_available correctly +// evaluates options in order and selects the appropriate one. The first option +// has an impossible constraint (should fail), so the scheduler must fall back +// to the second option. We verify by checking that exactly 2 devices were +// allocated (second option's count), not 1 (first option's count) or 3 (third +// option's count). +func testFirstAvailableSelectsCorrectOption(t *testing.T) { + nomadClient := e2eutil.NomadClient(t) + + jobID := "device-fa-second-" + uuid.Short() + jobIDs := []string{jobID} + t.Cleanup(e2eutil.CleanupJobsAndGC(t, &jobIDs)) + + // Register the job - first option has impossible constraint (should fail), + // second option requests 2 devices (should be selected) + allocs := e2eutil.RegisterAndWaitForAllocs(t, nomadClient, "./input/first_available_with_basic.hcl", jobID, "") + must.Len(t, 1, allocs, must.Sprint("expected 1 allocation")) + + // Verify the allocation is running (fallback to second option succeeded) + alloc, _, err := nomadClient.Allocations().Info(allocs[0].ID, nil) + must.NoError(t, err) + must.Eq(t, api.AllocClientStatusRunning, alloc.ClientStatus, + must.Sprintf("allocation status: %s, description: %s", + alloc.ClientStatus, alloc.ClientDescription)) + + // Verify devices were allocated + must.NotNil(t, alloc.AllocatedResources) + taskResources := alloc.AllocatedResources.Tasks["sleep"] + must.NotNil(t, taskResources) + must.SliceNotEmpty(t, taskResources.Devices, + must.Sprint("expected devices to be allocated")) + + // Count total devices allocated - should be 2 (second option), not 1 (first option) + totalDevices := 0 + for _, deviceResource := range taskResources.Devices { + totalDevices += len(deviceResource.DeviceIDs) + } + must.Eq(t, 2, totalDevices, + must.Sprint("expected exactly 2 devices from SECOND option, got different count indicating wrong option selected")) +} + +// testFirstAvailableNoMatch tests that when no first_available options can be +// satisfied, the job fails to schedule with appropriate error messages. +func testFirstAvailableNoMatch(t *testing.T) { + nomadClient := e2eutil.NomadClient(t) + + jobID := "device-fa-nomatch-" + uuid.Short() + jobIDs := []string{jobID} + t.Cleanup(e2eutil.CleanupJobsAndGC(t, &jobIDs)) + + // Parse and register the job + job, err := e2eutil.Parse2(t, "./input/first_available_no_match.hcl") + must.NoError(t, err) + job.ID = &jobID + + resp, _, err := nomadClient.Jobs().Register(job, nil) + must.NoError(t, err) + + evalID := resp.EvalID + + // Wait for the evaluation to complete (it should fail to place) + var eval *api.Evaluation + testutil.WaitForResultRetries(30, func() (bool, error) { + time.Sleep(500 * time.Millisecond) + eval, _, err = nomadClient.Evaluations().Info(evalID, nil) + if err != nil { + return false, err + } + // Wait until eval is complete or blocked + if eval.Status == api.EvalStatusComplete || eval.Status == api.EvalStatusBlocked { + return true, nil + } + return false, fmt.Errorf("eval status: %s", eval.Status) + }, func(err error) { + must.NoError(t, err) + }) + + // The evaluation should have failed task group allocations + must.MapNotEmpty(t, eval.FailedTGAllocs, + must.Sprint("expected failed task group allocations")) + + // Check that the failure is due to device exhaustion + for _, metrics := range eval.FailedTGAllocs { + // Should see nodes exhausted or constraint filtered + exhausted := metrics.NodesExhausted > 0 || + len(metrics.DimensionExhausted) > 0 || + len(metrics.ConstraintFiltered) > 0 + must.True(t, exhausted, + must.Sprintf("expected device exhaustion, got metrics: %+v", metrics)) + } +} + +// TestDeviceFirstAvailableParsing tests that jobs with first_available blocks +// are parsed correctly. These are unit-style tests that don't require a +// running Nomad cluster. +func TestDeviceFirstAvailableParsing(t *testing.T) { + t.Run("testParseFirstAvailable", testParseFirstAvailable) + t.Run("testParseWithBaseConstraint", testParseWithBaseConstraint) +} + +// testParseFirstAvailable verifies parsing of first_available with multiple +// options including constraints. +func testParseFirstAvailable(t *testing.T) { + job, err := e2eutil.Parse2(t, "./input/first_available_with_basic.hcl") + must.NoError(t, err) + must.NotNil(t, job) + + // Verify the structure was parsed correctly + must.Len(t, 1, job.TaskGroups) + task := job.TaskGroups[0].Tasks[0] + must.NotNil(t, task.Resources) + must.Len(t, 1, task.Resources.Devices) + + device := task.Resources.Devices[0] + must.Eq(t, "nomad/file/mock", device.Name) + must.Len(t, 3, device.FirstAvailable, + must.Sprint("expected 3 first_available options")) + + // Verify first option: count=1, with impossible constraint + opt1 := device.FirstAvailable[0] + must.Eq(t, uint64(1), *opt1.Count) + must.Len(t, 1, opt1.Constraints) + must.Eq(t, "${device.attr.impossible_attr}", opt1.Constraints[0].LTarget) + must.Eq(t, "impossible_value", opt1.Constraints[0].RTarget) + + // Verify second option: count=2, no constraints + opt2 := device.FirstAvailable[1] + must.Eq(t, uint64(2), *opt2.Count) + must.Len(t, 0, opt2.Constraints) + + // Verify third option: count=3, no constraints + opt3 := device.FirstAvailable[2] + must.Eq(t, uint64(3), *opt3.Count) + must.Len(t, 0, opt3.Constraints) +} + +// testParseWithBaseConstraint verifies parsing with base and option constraints. +func testParseWithBaseConstraint(t *testing.T) { + job, err := e2eutil.Parse2(t, "./input/first_available_with_base_constraint.hcl") + must.NoError(t, err) + must.NotNil(t, job) + + task := job.TaskGroups[0].Tasks[0] + device := task.Resources.Devices[0] + + // Verify base constraint exists + must.Len(t, 1, device.Constraints, + must.Sprint("expected 1 base constraint")) + must.Eq(t, "${device.attr.cool-attribute}", device.Constraints[0].LTarget) + + // Verify first_available options also have their own constraints + must.Len(t, 2, device.FirstAvailable) + must.Len(t, 1, device.FirstAvailable[0].Constraints) + must.Len(t, 1, device.FirstAvailable[1].Constraints) +} diff --git a/e2e/devices/input/device_constraint_no_match.hcl b/e2e/devices/input/device_constraint_no_match.hcl new file mode 100644 index 00000000000..e7271618a04 --- /dev/null +++ b/e2e/devices/input/device_constraint_no_match.hcl @@ -0,0 +1,36 @@ +# Copyright IBM Corp. 2015, 2025 +# SPDX-License-Identifier: BUSL-1.1 + +# Test for device constraint that cannot be satisfied. +# The job should fail to schedule because no device matches the constraint. + +job "device-constraint-nomatch" { + type = "batch" + + group "test" { + count = 1 + + task "sleep" { + driver = "raw_exec" + + config { + command = "sleep" + args = ["30"] + } + + resources { + cpu = 10 + memory = 64 + + device "nomad/file/mock" { + count = 1 + + constraint { + attribute = "${device.attr.cool-attribute}" + value = "impossible-value-that-will-never-match" + } + } + } + } + } +} diff --git a/e2e/devices/input/device_count_only.hcl b/e2e/devices/input/device_count_only.hcl new file mode 100644 index 00000000000..43192495293 --- /dev/null +++ b/e2e/devices/input/device_count_only.hcl @@ -0,0 +1,30 @@ +# Copyright IBM Corp. 2015, 2025 +# SPDX-License-Identifier: BUSL-1.1 + +# Basic test for device scheduling with only count specified. + +job "device-count-only" { + type = "batch" + + group "test" { + count = 1 + + task "sleep" { + driver = "raw_exec" + + config { + command = "sleep" + args = ["30"] + } + + resources { + cpu = 10 + memory = 64 + + device "nomad/file/mock" { + count = 1 + } + } + } + } +} diff --git a/e2e/devices/input/device_with_affinity.hcl b/e2e/devices/input/device_with_affinity.hcl new file mode 100644 index 00000000000..c2d0eee88d7 --- /dev/null +++ b/e2e/devices/input/device_with_affinity.hcl @@ -0,0 +1,36 @@ +# Copyright IBM Corp. 2015, 2025 +# SPDX-License-Identifier: BUSL-1.1 + +# Test for device scheduling with count and affinity. + +job "device-with-affinity" { + type = "batch" + + group "test" { + count = 1 + + task "sleep" { + driver = "raw_exec" + + config { + command = "sleep" + args = ["30"] + } + + resources { + cpu = 10 + memory = 64 + + device "nomad/file/mock" { + count = 1 + + affinity { + attribute = "${device.attr.cool-attribute}" + value = "high" + weight = 100 + } + } + } + } + } +} diff --git a/e2e/devices/input/device_with_constraint.hcl b/e2e/devices/input/device_with_constraint.hcl new file mode 100644 index 00000000000..38bb79b5892 --- /dev/null +++ b/e2e/devices/input/device_with_constraint.hcl @@ -0,0 +1,35 @@ +# Copyright IBM Corp. 2015, 2025 +# SPDX-License-Identifier: BUSL-1.1 + +# Test for device scheduling with count and constraint. + +job "device-with-constraint" { + type = "batch" + + group "test" { + count = 1 + + task "sleep" { + driver = "raw_exec" + + config { + command = "sleep" + args = ["30"] + } + + resources { + cpu = 10 + memory = 64 + + device "nomad/file/mock" { + count = 1 + + constraint { + attribute = "${device.attr.cool-attribute}" + value = "attribute-wearing-sunglasses" + } + } + } + } + } +} diff --git a/e2e/devices/input/device_with_constraint_and_affinity.hcl b/e2e/devices/input/device_with_constraint_and_affinity.hcl new file mode 100644 index 00000000000..410f5c51c77 --- /dev/null +++ b/e2e/devices/input/device_with_constraint_and_affinity.hcl @@ -0,0 +1,41 @@ +# Copyright IBM Corp. 2015, 2025 +# SPDX-License-Identifier: BUSL-1.1 + +# Test for device scheduling with count, constraint, and affinity combined. + +job "device-constraint-affinity" { + type = "batch" + + group "test" { + count = 1 + + task "sleep" { + driver = "raw_exec" + + config { + command = "sleep" + args = ["30"] + } + + resources { + cpu = 10 + memory = 64 + + device "nomad/file/mock" { + count = 2 + + constraint { + attribute = "${device.attr.cool-attribute}" + value = "attribute-wearing-sunglasses" + } + + affinity { + attribute = "${device.attr.priority}" + value = "high" + weight = 50 + } + } + } + } + } +} diff --git a/e2e/devices/input/first_available_no_match.hcl b/e2e/devices/input/first_available_no_match.hcl new file mode 100644 index 00000000000..92bfd9c0465 --- /dev/null +++ b/e2e/devices/input/first_available_no_match.hcl @@ -0,0 +1,44 @@ +# Copyright IBM Corp. 2015, 2025 +# SPDX-License-Identifier: BUSL-1.1 + +# Test for first_available when no options can be satisfied. +# All options have impossible constraints, so the job should fail to schedule. + +job "device-first-available-nomatch" { + type = "batch" + + group "test" { + count = 1 + + task "sleep" { + driver = "raw_exec" + + config { + command = "sleep" + args = ["30"] + } + + resources { + cpu = 10 + memory = 64 + + device "nomad/file/mock" { + first_available { + count = 100 + constraint { + attribute = "${device.attr.nonexistent1}" + value = "impossible1" + } + } + first_available { + count = 100 + constraint { + attribute = "${device.attr.nonexistent2}" + value = "impossible2" + } + } + } + } + } + } +} diff --git a/e2e/devices/input/first_available_with_base_constraint.hcl b/e2e/devices/input/first_available_with_base_constraint.hcl new file mode 100644 index 00000000000..8c3fd57f45a --- /dev/null +++ b/e2e/devices/input/first_available_with_base_constraint.hcl @@ -0,0 +1,51 @@ +# Copyright IBM Corp. 2015, 2025 +# SPDX-License-Identifier: BUSL-1.1 + +# Test for first_available with base constraints. +# The device block has a base constraint that all options must satisfy, +# plus each first_available option can have additional constraints. + +job "device-first-available-base" { + type = "batch" + + group "test" { + count = 1 + + task "sleep" { + driver = "raw_exec" + + config { + command = "sleep" + args = ["30"] + } + + resources { + cpu = 10 + memory = 64 + + device "nomad/file/mock" { + # Base constraint applied to all first_available options + constraint { + attribute = "${device.attr.cool-attribute}" + value = "attribute-wearing-sunglasses" + } + + first_available { + count = 2 + constraint { + attribute = "${device.attr.type}" + value = "premium" + } + } + first_available { + count = 1 + constraint { + attribute = "${device.attr.type}" + value = "standard" + } + } + } + } + } + } +} diff --git a/e2e/devices/input/first_available_with_basic.hcl b/e2e/devices/input/first_available_with_basic.hcl new file mode 100644 index 00000000000..4cde776897a --- /dev/null +++ b/e2e/devices/input/first_available_with_basic.hcl @@ -0,0 +1,49 @@ +# Copyright IBM Corp. 2015, 2025 +# SPDX-License-Identifier: BUSL-1.1 + +# Test that the SECOND option is selected when the first cannot be satisfied. +# Option 1: 1 device with impossible constraint (should fail) +# Option 2: 2 devices with no constraints (should be selected) +# +# We verify by checking that exactly 2 devices were allocated. + +job "device-first-available-second" { + type = "batch" + + group "test" { + count = 1 + + task "sleep" { + driver = "raw_exec" + + config { + command = "sleep" + args = ["30"] + } + + resources { + cpu = 10 + memory = 64 + + device "nomad/file/mock" { + # First option: impossible constraint (should fail) + first_available { + count = 1 + constraint { + attribute = "${device.attr.impossible_attr}" + value = "impossible_value" + } + } + # Second option: request 2 devices (should be selected) + first_available { + count = 2 + } + # Second option: request 3 devices (should not be selected) + first_available { + count = 3 + } + } + } + } + } +} diff --git a/e2e/e2e_test.go b/e2e/e2e_test.go index 61cb37ee822..fb385c47b6d 100644 --- a/e2e/e2e_test.go +++ b/e2e/e2e_test.go @@ -16,6 +16,7 @@ import ( _ "github.com/hashicorp/nomad/e2e/consul" _ "github.com/hashicorp/nomad/e2e/csi" _ "github.com/hashicorp/nomad/e2e/deployment" + _ "github.com/hashicorp/nomad/e2e/devices" _ "github.com/hashicorp/nomad/e2e/eval_priority" _ "github.com/hashicorp/nomad/e2e/events" _ "github.com/hashicorp/nomad/e2e/lifecycle" diff --git a/e2e/terraform/main.tf b/e2e/terraform/main.tf index acafb1fc967..76c5ae9f15b 100644 --- a/e2e/terraform/main.tf +++ b/e2e/terraform/main.tf @@ -16,6 +16,7 @@ module "provision-infra" { nomad_local_binary = var.nomad_local_binary nomad_local_binary_client_ubuntu_jammy = var.nomad_local_binary_client_ubuntu_jammy nomad_local_binary_client_windows_2022 = var.nomad_local_binary_client_windows_2022 + device_plugin_local_binary = var.device_plugin_local_binary nomad_license = var.nomad_license consul_license = var.consul_license nomad_region = var.nomad_region diff --git a/e2e/terraform/provision-infra/nomad.tf b/e2e/terraform/provision-infra/nomad.tf index 23dc7813499..43086de7178 100644 --- a/e2e/terraform/provision-infra/nomad.tf +++ b/e2e/terraform/provision-infra/nomad.tf @@ -55,6 +55,8 @@ module "nomad_client_ubuntu_jammy" { nomad_region = var.nomad_region nomad_local_binary = local.linux_binary + device_plugin_local_binary = var.device_plugin_local_binary + tls_ca_key = tls_private_key.ca.private_key_pem tls_ca_cert = tls_self_signed_cert.ca.cert_pem diff --git a/e2e/terraform/provision-infra/provision-nomad/etc/nomad.d/client-linux.hcl b/e2e/terraform/provision-infra/provision-nomad/etc/nomad.d/client-linux.hcl index 20f24261a8a..1dfd5e9000a 100644 --- a/e2e/terraform/provision-infra/provision-nomad/etc/nomad.d/client-linux.hcl +++ b/e2e/terraform/provision-infra/provision-nomad/etc/nomad.d/client-linux.hcl @@ -51,3 +51,11 @@ plugin "nomad-driver-exec2" { unveil_paths = ["r:/etc/mime.types"] } } + +plugin "nomad-device-example" { + config { + dir = "/tmp/nomad-device" + list_period = "1s" + unhealthy_perm = "-rwxrwxrwx" + } +} diff --git a/e2e/terraform/provision-infra/provision-nomad/install-linux.tf b/e2e/terraform/provision-infra/provision-nomad/install-linux.tf index c99e9f02c48..afc6a733a85 100644 --- a/e2e/terraform/provision-infra/provision-nomad/install-linux.tf +++ b/e2e/terraform/provision-infra/provision-nomad/install-linux.tf @@ -31,6 +31,37 @@ resource "null_resource" "install_nomad_binary_linux" { } } +resource "null_resource" "install_device_plugin_linux" { + count = var.platform == "linux" && var.role == "client" && var.device_plugin_local_binary != "" ? 1 : 0 + + connection { + type = "ssh" + user = var.connection.user + host = var.instance.public_ip + port = var.connection.port + private_key = file(var.connection.private_key) + timeout = "5m" + } + + provisioner "file" { + source = var.device_plugin_local_binary + destination = "/tmp/nomad-device-example" + } + provisioner "remote-exec" { + inline = [ + "sudo mv /tmp/nomad-device-example /opt/nomad/plugins/nomad-device-example", + "sudo chmod +x /opt/nomad/plugins/nomad-device-example", + # Create mock device directory and files for e2e device tests + "sudo mkdir -p /tmp/nomad-device", + "sudo touch /tmp/nomad-device/device01", + "sudo touch /tmp/nomad-device/device02", + "sudo touch /tmp/nomad-device/device03", + # Mark device01 as unhealthy + "sudo chmod 0777 /tmp/nomad-device/device01", + ] + } +} + resource "null_resource" "install_consul_configs_linux" { count = var.platform == "linux" ? 1 : 0 diff --git a/e2e/terraform/provision-infra/provision-nomad/variables.tf b/e2e/terraform/provision-infra/provision-nomad/variables.tf index b64fd37cca4..927f41e9d2e 100644 --- a/e2e/terraform/provision-infra/provision-nomad/variables.tf +++ b/e2e/terraform/provision-infra/provision-nomad/variables.tf @@ -99,3 +99,9 @@ variable "keys_dir" { description = "Directory where all the configuration TLS and SSH keys and certificates will be stored for provisioning" default = "" } + +variable "device_plugin_local_binary" { + type = string + description = "Path to the example device plugin binary for e2e device tests" + default = "" +} diff --git a/e2e/terraform/provision-infra/variables.tf b/e2e/terraform/provision-infra/variables.tf index 44345a5a18f..d343fcd0ede 100644 --- a/e2e/terraform/provision-infra/variables.tf +++ b/e2e/terraform/provision-infra/variables.tf @@ -125,3 +125,9 @@ variable "nomad_local_binary_client_windows_2022" { type = string default = "" } + +variable "device_plugin_local_binary" { + description = "Path to the example device plugin binary for e2e device tests" + type = string + default = "" +} diff --git a/e2e/terraform/terraform.tfvars b/e2e/terraform/terraform.tfvars index a454167079b..eb0fbbd5549 100644 --- a/e2e/terraform/terraform.tfvars +++ b/e2e/terraform/terraform.tfvars @@ -5,6 +5,10 @@ # with `make dev` or similar (../../ = this repository root) # before running `terraform apply` and created the /pkg/goos_goarch/binary # folder +# +# For the device e2e tests, also build the example device plugin: +# make pkg/linux_amd64/nomad-device-example nomad_local_binary = "../../pkg/linux_amd64/nomad" nomad_local_binary_client_windows_2022 = "../../pkg/windows_amd64/nomad.exe" +device_plugin_local_binary = "../../pkg/linux_amd64/nomad-device-example" diff --git a/e2e/terraform/variables.tf b/e2e/terraform/variables.tf index 7b586013648..e2af8ed5fee 100644 --- a/e2e/terraform/variables.tf +++ b/e2e/terraform/variables.tf @@ -122,3 +122,9 @@ variable "nomad_local_binary_client_windows_2022" { type = string default = "" } + +variable "device_plugin_local_binary" { + description = "Path to the example device plugin binary for e2e device tests" + type = string + default = "" +} diff --git a/nomad/structs/constraint.go b/nomad/structs/constraint.go index d48fe1fe17f..e84389d3903 100644 --- a/nomad/structs/constraint.go +++ b/nomad/structs/constraint.go @@ -25,6 +25,7 @@ var ( "${attr.", "${device.", "${meta.", + "${device.attr.", } ) diff --git a/nomad/structs/structs.go b/nomad/structs/structs.go index e291e5b9968..e0ee7f63ffc 100644 --- a/nomad/structs/structs.go +++ b/nomad/structs/structs.go @@ -3038,14 +3038,85 @@ func (ns Networks) Modes() *set.Set[string] { }) } +// ShareDevices indicates whether the task should be placed on a shared device // ShareDevices indicates whether the task should be placed on a shared device type ShareDevices struct { + // Enabled + Enabled bool `hcl:"enabled"` + // SharedDeviceID is an optional field for use in environments with + // multiple shared devices, to make the shared device ID available to + // the plugin. If in use alongside the device.id constraint, the two must + // match or the job will not be placed. + SharedDeviceId string `hcl:"shared_device_id,optional"` +} +type DevicePreferences struct { Enabled bool // SharedDeviceID is an optional field for use in environments with // multiple shared devices, to make the shared device ID available to // the plugin. If in use alongside the device.id constraint, the two must // match or the job will not be placed. SharedDeviceId string + + // FirstAvailable specifies a prioritized list of device options. The + // scheduler will attempt to satisfy each option in order, selecting the + // first one that can be fulfilled. Mutually exclusive with Count. + FirstAvailable []*DeviceOption +} + +// DeviceOption represents a single option in a first_available device selection. +// Each option specifies a count and optional constraints that must be satisfied +// for this option to be selected. +type DeviceOption struct { + // Count is the number of requested devices for this option + Count uint64 + + // Constraints are a set of constraints to apply when selecting the device + // to use for this option. + Constraints Constraints +} + +func (o *DeviceOption) Equal(other *DeviceOption) bool { + if o == other { + return true + } + if o == nil || other == nil { + return false + } + return o.Count == other.Count && + o.Constraints.Equal(&other.Constraints) +} + +func (o *DeviceOption) Copy() *DeviceOption { + if o == nil { + return nil + } + return &DeviceOption{ + Count: o.Count, + Constraints: CopySliceConstraints(o.Constraints), + } +} + +func (o *DeviceOption) Validate() error { + if o == nil { + return nil + } + + var mErr multierror.Error + for idx, constr := range o.Constraints { + // Ensure that the constraint doesn't use an operand we do not allow + switch constr.Operand { + case ConstraintDistinctHosts, ConstraintDistinctProperty: + outer := fmt.Errorf("Constraint %d validation failed: using unsupported operand %q", idx+1, constr.Operand) + _ = multierror.Append(&mErr, outer) + default: + if err := constr.Validate(); err != nil { + outer := fmt.Errorf("Constraint %d validation failed: %s", idx+1, err) + _ = multierror.Append(&mErr, outer) + } + } + } + + return mErr.ErrorOrNil() } // RequestedDevice is used to request a device for a task. @@ -3061,20 +3132,30 @@ type RequestedDevice struct { // * "nvidia/gpu/GTX2080Ti" Name string - // Count is the number of requested devices + // Count is the number of requested devices. Mutually exclusive with + // FirstAvailable. Count uint64 // Constraints are a set of constraints to apply when selecting the device - // to use. + // to use. When FirstAvailable is specified, these constraints are applied + // as base constraints that all options must also satisfy. Constraints Constraints // Affinities are a set of affinities to apply when selecting the device - // to use. + // to use. When FirstAvailable is specified, these affinities are applied + // as base affinities for all options. Affinities Affinities + //Device Preferences + DevicePreferences *DevicePreferences + // ShareDevices indicates whether the job should be placed on a shared device // and is willing to share ShareDevices *ShareDevices + // FirstAvailable specifies a prioritized list of device options. The + // scheduler will attempt to satisfy each option in order, selecting the + // first one that can be fulfilled. Mutually exclusive with Count. + FirstAvailable []*DeviceOption } func (r *RequestedDevice) String() string { @@ -3088,10 +3169,21 @@ func (r *RequestedDevice) Equal(o *RequestedDevice) bool { if r == nil || o == nil { return false } - return r.Name == o.Name && - r.Count == o.Count && - r.Constraints.Equal(&o.Constraints) && - r.Affinities.Equal(&o.Affinities) + if r.Name != o.Name || r.Count != o.Count { + return false + } + if !r.Constraints.Equal(&o.Constraints) || !r.Affinities.Equal(&o.Affinities) { + return false + } + if len(r.FirstAvailable) != len(o.FirstAvailable) { + return false + } + for i, opt := range r.FirstAvailable { + if !opt.Equal(o.FirstAvailable[i]) { + return false + } + } + return true } func (r *RequestedDevice) Copy() *RequestedDevice { @@ -3103,6 +3195,13 @@ func (r *RequestedDevice) Copy() *RequestedDevice { nr.Constraints = CopySliceConstraints(nr.Constraints) nr.Affinities = CopySliceAffinities(nr.Affinities) + if len(r.FirstAvailable) > 0 { + nr.FirstAvailable = make([]*DeviceOption, len(r.FirstAvailable)) + for i, opt := range r.FirstAvailable { + nr.FirstAvailable[i] = opt.Copy() + } + } + return &nr } @@ -3141,6 +3240,12 @@ func (r *RequestedDevice) Validate() error { _ = multierror.Append(&mErr, errors.New("device name must be given as one of the following: type, vendor/type, or vendor/type/name")) } + // Count and FirstAvailable are mutually exclusive + if r.Count > 0 && len(r.FirstAvailable) > 0 { + _ = multierror.Append(&mErr, errors.New("'count' and 'first_available' are mutually exclusive")) + } + + // Validate base constraints for idx, constr := range r.Constraints { // Ensure that the constraint doesn't use an operand we do not allow switch constr.Operand { @@ -3154,6 +3259,8 @@ func (r *RequestedDevice) Validate() error { } } } + + // Validate base affinities for idx, affinity := range r.Affinities { if err := affinity.Validate(); err != nil { outer := fmt.Errorf("Affinity %d validation failed: %s", idx+1, err) @@ -3161,6 +3268,18 @@ func (r *RequestedDevice) Validate() error { } } + // Validate each first_available option + for idx, opt := range r.FirstAvailable { + if opt == nil { + _ = multierror.Append(&mErr, fmt.Errorf("first_available %d is nil", idx+1)) + continue + } + if err := opt.Validate(); err != nil { + outer := fmt.Errorf("first_available %d validation failed: %s", idx+1, err) + _ = multierror.Append(&mErr, outer) + } + } + return mErr.ErrorOrNil() } diff --git a/nomad/structs/structs_test.go b/nomad/structs/structs_test.go index 2f66c9c2de0..9eae90cd5ce 100644 --- a/nomad/structs/structs_test.go +++ b/nomad/structs/structs_test.go @@ -6780,6 +6780,180 @@ func TestDevicesEquals(t *testing.T) { } } +func TestDeviceOption_Equal(t *testing.T) { + ci.Parallel(t) + + must.Equal[*DeviceOption](t, nil, nil) + must.NotEqual[*DeviceOption](t, nil, new(DeviceOption)) + + opt1 := &DeviceOption{ + Count: 2, + Constraints: []*Constraint{ + {LTarget: "${attr.kernel.name}", Operand: "=", RTarget: "linux"}, + }, + } + + // Equal copy + opt2 := opt1.Copy() + must.True(t, opt1.Equal(opt2)) + + // Different count + opt3 := opt1.Copy() + opt3.Count = 4 + must.False(t, opt1.Equal(opt3)) + + // Different constraints + opt4 := opt1.Copy() + opt4.Constraints = []*Constraint{ + {LTarget: "${attr.kernel.name}", Operand: "=", RTarget: "darwin"}, + } + must.False(t, opt1.Equal(opt4)) +} + +func TestDeviceOption_Copy(t *testing.T) { + ci.Parallel(t) + + // Nil copy + var nilOpt *DeviceOption + must.Nil(t, nilOpt.Copy()) + + opt := &DeviceOption{ + Count: 2, + Constraints: []*Constraint{ + {LTarget: "${attr.kernel.name}", Operand: "=", RTarget: "linux"}, + }, + } + + cp := opt.Copy() + must.True(t, opt.Equal(cp)) + + // Modify original, copy should be unchanged + opt.Count = 10 + opt.Constraints[0].RTarget = "darwin" + must.Eq(t, uint64(2), cp.Count) + must.Eq(t, "linux", cp.Constraints[0].RTarget) +} + +func TestDeviceOption_Validate(t *testing.T) { + ci.Parallel(t) + + // Valid option (no constraints - device constraints use ${device.*} which + // is validated at scheduler time, not job submission time) + opt := &DeviceOption{ + Count: 2, + } + must.NoError(t, opt.Validate()) + + // Invalid constraint operand (distinct_hosts not allowed for devices) + opt2 := &DeviceOption{ + Count: 1, + Constraints: []*Constraint{ + {LTarget: "${attr.kernel.name}", Operand: ConstraintDistinctHosts, RTarget: "true"}, + }, + } + err := opt2.Validate() + must.Error(t, err) + must.StrContains(t, err.Error(), "unsupported operand") +} + +func TestRequestedDevice_FirstAvailable_Equal(t *testing.T) { + ci.Parallel(t) + + rd1 := &RequestedDevice{ + Name: "nvidia/gpu", + FirstAvailable: []*DeviceOption{ + {Count: 2}, + {Count: 1}, + }, + } + + // Equal copy + rd2 := rd1.Copy() + must.True(t, rd1.Equal(rd2)) + + // Different number of options + rd3 := rd1.Copy() + rd3.FirstAvailable = rd3.FirstAvailable[:1] + must.False(t, rd1.Equal(rd3)) + + // Different option content + rd4 := rd1.Copy() + rd4.FirstAvailable[0].Count = 4 + must.False(t, rd1.Equal(rd4)) +} + +func TestRequestedDevice_FirstAvailable_Copy(t *testing.T) { + ci.Parallel(t) + + rd := &RequestedDevice{ + Name: "nvidia/gpu", + FirstAvailable: []*DeviceOption{ + {Count: 2, Constraints: []*Constraint{{LTarget: "${attr.kernel.name}", Operand: "=", RTarget: "linux"}}}, + }, + } + + cp := rd.Copy() + must.True(t, rd.Equal(cp)) + + // Modify original, copy should be unchanged + rd.FirstAvailable[0].Count = 10 + rd.FirstAvailable[0].Constraints[0].RTarget = "darwin" + must.Eq(t, uint64(2), cp.FirstAvailable[0].Count) + must.Eq(t, "linux", cp.FirstAvailable[0].Constraints[0].RTarget) +} + +func TestRequestedDevice_FirstAvailable_Validate(t *testing.T) { + ci.Parallel(t) + + // Valid first_available request (no constraints - device constraints use + // ${device.*} which is validated at scheduler time, not job submission time) + rd := &RequestedDevice{ + Name: "nvidia/gpu", + FirstAvailable: []*DeviceOption{ + {Count: 2}, + {Count: 1}, + }, + } + must.NoError(t, rd.Validate()) + + // Count and FirstAvailable are mutually exclusive + rd2 := &RequestedDevice{ + Name: "nvidia/gpu", + Count: 2, + FirstAvailable: []*DeviceOption{ + {Count: 1}, + }, + } + err := rd2.Validate() + must.Error(t, err) + must.StrContains(t, err.Error(), "mutually exclusive") + + // Invalid option in FirstAvailable (distinct_hosts not allowed) + rd3 := &RequestedDevice{ + Name: "nvidia/gpu", + FirstAvailable: []*DeviceOption{ + { + Count: 1, + Constraints: []*Constraint{ + {LTarget: "${attr.kernel.name}", Operand: ConstraintDistinctHosts, RTarget: "true"}, + }, + }, + }, + } + err = rd3.Validate() + must.Error(t, err) + must.StrContains(t, err.Error(), "first_available 1 validation failed") + + // Nil option in FirstAvailable + rd4 := &RequestedDevice{ + Name: "nvidia/gpu", + FirstAvailable: []*DeviceOption{nil}, + } + err = rd4.Validate() + must.Error(t, err) + must.StrContains(t, err.Error(), "is nil") +} + func TestAllocatedPortMapping_Equal(t *testing.T) { ci.Parallel(t) diff --git a/scheduler/feasible/device.go b/scheduler/feasible/device.go index 3b06062676b..7ba3789c1aa 100644 --- a/scheduler/feasible/device.go +++ b/scheduler/feasible/device.go @@ -102,26 +102,75 @@ func (m *memoryNodeMatcher) Matches(instanceID string, device *structs.NodeDevic // createOffer takes a device request and returns an assignment as well as a // score for the assignment. If no assignment is possible, an error is -// returned explaining why. -func (d *deviceAllocator) createOffer(mem *memoryNodeMatcher, ask *structs.RequestedDevice) (out *structs.AllocatedDeviceResource, score float64, err error) { +// returned explaining why. The returned sumMatchedAffinityWeights is the sum +// of affinity weights that matched, and totalAffinityWeight is the sum of +// absolute values of all affinity weights considered (for normalization). +func (d *deviceAllocator) createOffer(mem *memoryNodeMatcher, ask *structs.RequestedDevice) (out *structs.AllocatedDeviceResource, sumMatchedAffinityWeights float64, totalAffinityWeight float64, err error) { // Try to hot path if len(d.Devices) == 0 { - return nil, 0.0, fmt.Errorf("no devices available") + return nil, 0.0, 0.0, fmt.Errorf("no devices available") } + + // Handle first_available selection + if len(ask.FirstAvailable) > 0 { + return d.createOfferFirstAvailable(mem, ask) + } + if ask.Count == 0 { - return nil, 0.0, fmt.Errorf("invalid request of zero devices") + return nil, 0.0, 0.0, fmt.Errorf("invalid request of zero devices") + } + + return d.createOfferWithParams(mem, ask.ID(), ask.Count, ask.Constraints, ask.Affinities, ask.ShareDevices) +} + +// createOfferFirstAvailable tries each option in the FirstAvailable list in order, +// returning the first successful offer. +func (d *deviceAllocator) createOfferFirstAvailable(mem *memoryNodeMatcher, ask *structs.RequestedDevice) (out *structs.AllocatedDeviceResource, sumMatchedAffinityWeights float64, totalAffinityWeight float64, err error) { + var lastErr error + + for _, opt := range ask.FirstAvailable { + if opt.Count == 0 { + continue + } + + // Combine base constraints with option-specific constraints + combinedConstraints := make(structs.Constraints, 0, len(ask.Constraints)+len(opt.Constraints)) + combinedConstraints = append(combinedConstraints, ask.Constraints...) + combinedConstraints = append(combinedConstraints, opt.Constraints...) + + offer, matchedWeights, totalWeight, offerErr := d.createOfferWithParams(mem, ask.ID(), opt.Count, combinedConstraints, ask.Affinities, ask.ShareDevices) + if offer != nil { + return offer, matchedWeights, totalWeight, nil + } + lastErr = offerErr } + // None of the options could be satisfied + if lastErr != nil { + return nil, 0.0, 0.0, fmt.Errorf("no first_available option could be satisfied: %v", lastErr) + } + return nil, 0.0, 0.0, fmt.Errorf("no first_available options defined") +} + +// createOfferWithParams is the core offer creation logic that can be used for both +// standard requests and first_available options. +func (d *deviceAllocator) createOfferWithParams(mem *memoryNodeMatcher, deviceID *structs.DeviceIdTuple, count uint64, constraints structs.Constraints, affinities structs.Affinities, shareDevices *structs.ShareDevices) (out *structs.AllocatedDeviceResource, sumMatchedAffinityWeights float64, totalAffinityWeight float64, err error) { // Hold the current best offer var offer *structs.AllocatedDeviceResource var offerScore float64 var matchedWeights float64 + // Calculate the total weight of all affinities (for normalization purposes) + var totalWeight float64 + for _, a := range affinities { + totalWeight += math.Abs(float64(a.Weight)) + } + // Determine the devices that are feasible based on availability and // constraints for id, devInst := range d.Devices { - // Check if the device works - if !nodeDeviceMatches(d.ctx, devInst.Device, ask) { + // Check if the device works (name/type match and constraints) + if !d.deviceMatchesWithConstraints(devInst.Device, deviceID, constraints) { continue } @@ -144,21 +193,24 @@ func (d *deviceAllocator) createOffer(mem *memoryNodeMatcher, ask *structs.Reque if !mem.Matches(instanceID, devInst.Device) { continue } + if d.deviceIDMatchesConstraint(instanceID, constraints, devInst.Device) { + assignable = append(assignable, instanceID) + } - if d.deviceIDConstraintAndSharingChecks(instanceID, ask.Constraints, ask.ShareDevices, devInst.Device) { + if d.deviceIDConstraintAndSharingChecks(instanceID, constraints, shareDevices, devInst.Device) { assignable = append(assignable, instanceID) - if ask.ShareDevices != nil { - willShare[instanceID] = ask.ShareDevices.Enabled //only update willShare map if assignable + if shareDevices != nil { + willShare[instanceID] = shareDevices.Enabled //only update willShare map if assignable } } // Don't assign more than the ask - if len(assignable) == int(ask.Count) { + if len(assignable) == int(count) { break } } // This device doesn't have enough instances - if len(assignable) < int(ask.Count) { + if len(assignable) < int(count) { continue } @@ -168,15 +220,12 @@ func (d *deviceAllocator) createOffer(mem *memoryNodeMatcher, ask *structs.Reque // Track the sum of matched affinity weights in a separate variable // We return this if this device had the best score compared to other devices considered var sumMatchedWeights float64 - if l := len(ask.Affinities); l != 0 { - totalWeight := 0.0 - for _, a := range ask.Affinities { + if len(affinities) != 0 { + for _, a := range affinities { // Resolve the targets lVal, lOk := resolveDeviceTarget(a.LTarget, devInst.Device) rVal, rOk := resolveDeviceTarget(a.RTarget, devInst.Device) - totalWeight += math.Abs(float64(a.Weight)) - // Check if satisfied if !checkAttributeAffinity(d.ctx, a.Operand, lVal, rVal, lOk, rOk) { continue @@ -186,7 +235,9 @@ func (d *deviceAllocator) createOffer(mem *memoryNodeMatcher, ask *structs.Reque } // normalize - choiceScore /= totalWeight + if totalWeight > 0 { + choiceScore /= totalWeight + } } // Only use the device if it is a higher score than we have already seen @@ -212,9 +263,33 @@ func (d *deviceAllocator) createOffer(mem *memoryNodeMatcher, ask *structs.Reque // Failed to find a match if offer == nil { - return nil, 0.0, fmt.Errorf("no devices match request") + return nil, 0.0, 0.0, fmt.Errorf("no devices match request") + } + + return offer, matchedWeights, totalWeight, nil +} + +// deviceMatchesWithConstraints checks if a device matches the given device ID +// and constraints. This is used for offer creation where we have explicit +// parameters rather than a full RequestedDevice. +func (d *deviceAllocator) deviceMatchesWithConstraints(device *structs.NodeDeviceResource, deviceID *structs.DeviceIdTuple, constraints structs.Constraints) bool { + if !device.ID().Matches(deviceID) { + return false } - return offer, matchedWeights, nil + + // Check constraints + for _, c := range constraints { + // Resolve the targets + lVal, lOk := resolveDeviceTarget(c.LTarget, device) + rVal, rOk := resolveDeviceTarget(c.RTarget, device) + + // Check if satisfied + if !checkAttributeConstraint(d.ctx, c.Operand, lVal, rVal, lOk, rOk) { + return false + } + } + + return true } // deviceIDMatchesConstraint checks a device instance ID against the constraints diff --git a/scheduler/feasible/device_test.go b/scheduler/feasible/device_test.go index 756bcde70e4..4c25c37717d 100644 --- a/scheduler/feasible/device_test.go +++ b/scheduler/feasible/device_test.go @@ -125,7 +125,7 @@ func TestDeviceAllocator_Allocate_GenericRequest(t *testing.T) { ask := deviceRequest("gpu", 1, nil, nil) mem := anyMemoryNodeMatcher() - out, score, err := d.createOffer(mem, ask) + out, score, _, err := d.createOffer(mem, ask) must.NotNil(t, out) must.Zero(t, score) must.NoError(t, err) @@ -148,7 +148,7 @@ func TestDeviceAllocator_Allocate_FullyQualifiedRequest(t *testing.T) { ask := deviceRequest("intel/fpga/F100", 1, nil, nil) mem := anyMemoryNodeMatcher() - out, score, err := d.createOffer(mem, ask) + out, score, _, err := d.createOffer(mem, ask) must.NotNil(t, out) must.Zero(t, score) must.NoError(t, err) @@ -171,7 +171,7 @@ func TestDeviceAllocator_Allocate_NotEnoughInstances(t *testing.T) { ask := deviceRequest("gpu", 4, nil, nil) mem := anyMemoryNodeMatcher() - out, _, err := d.createOffer(mem, ask) + out, _, _, err := d.createOffer(mem, ask) must.Nil(t, out) must.ErrorContains(t, err, "no devices match request") } @@ -190,7 +190,7 @@ func TestDeviceAllocator_Allocate_NUMA_available(t *testing.T) { topology: structs.MockWorkstationTopology(), devices: set.From([]string{"nvidia/gpu/1080ti"}), } - out, _, err := d.createOffer(mem, ask) + out, _, _, err := d.createOffer(mem, ask) must.NoError(t, err) must.SliceLen(t, 2, out.DeviceIDs) // DeviceIDs are actually instance ids } @@ -223,7 +223,7 @@ func TestDeviceAllocator_Allocate_NUMA_node1(t *testing.T) { topology: structs.MockWorkstationTopology(), devices: set.From([]string{"xilinx/fpga/7XA"}), } - out, _, err := d.createOffer(mem, ask) + out, _, _, err := d.createOffer(mem, ask) must.NoError(t, err) must.SliceLen(t, 1, out.DeviceIDs) } @@ -345,7 +345,7 @@ func TestDeviceAllocate_Constraints_NoMemoryMatch(t *testing.T) { ask := deviceRequest(c.Name, 1, c.Constraints, nil) mem := anyMemoryNodeMatcher() - out, score, err := d.createOffer(mem, ask) + out, score, _, err := d.createOffer(mem, ask) if c.NoPlacement { must.Nil(t, out) } else { @@ -393,7 +393,7 @@ func TestDeviceAllocate_Constraints_MemoryMatch(t *testing.T) { }, devices: set.From([]string{nvidia0.ID().String()}), } - out, _, err := d.createOffer(mem, ask) + out, _, _, err := d.createOffer(mem, ask) // the first memoryNodeMatcher does not have the correct memoryNode must.ErrorContains(t, err, "no devices match") @@ -401,7 +401,7 @@ func TestDeviceAllocate_Constraints_MemoryMatch(t *testing.T) { // change to the correct node mem.memoryNode = 2 - out, _, err = d.createOffer(mem, ask) + out, _, _, err = d.createOffer(mem, ask) must.NoError(t, err) must.Len(t, 1, out.DeviceIDs) @@ -498,7 +498,7 @@ func TestDeviceAllocator_Affinities(t *testing.T) { ask := deviceRequest(c.Name, 1, nil, c.Affinities) mem := anyMemoryNodeMatcher() - out, score, err := d.createOffer(mem, ask) + out, score, _, err := d.createOffer(mem, ask) must.NotNil(t, out) must.NoError(t, err) if c.ZeroScore { @@ -514,6 +514,286 @@ func TestDeviceAllocator_Affinities(t *testing.T) { } } +// Test FirstAvailable: first option is selected when it can be satisfied +func TestDeviceAllocator_FirstAvailable_SelectsFirstOption(t *testing.T) { + ci.Parallel(t) + + _, ctx := MockContext(t) + n := multipleNvidiaNode() + d := newDeviceAllocator(ctx, n) + must.NotNil(t, d) + + nvidia0 := n.NodeResources.Devices[0] // 1080ti with 2 instances + nvidia1 := n.NodeResources.Devices[1] // 2080ti with 2 instances + + // Build a request that prefers 1080ti first, then falls back to 2080ti + ask := &structs.RequestedDevice{ + Name: "nvidia/gpu", + FirstAvailable: []*structs.DeviceOption{ + { + Count: 1, + Constraints: []*structs.Constraint{ + { + LTarget: "${device.model}", + Operand: "=", + RTarget: "1080ti", + }, + }, + }, + { + Count: 1, + Constraints: []*structs.Constraint{ + { + LTarget: "${device.model}", + Operand: "=", + RTarget: "2080ti", + }, + }, + }, + }, + } + + mem := anyMemoryNodeMatcher() + out, _, _, err := d.createOffer(mem, ask) + must.NoError(t, err) + must.NotNil(t, out) + + // Should select 1080ti (first option) + must.Eq(t, "1080ti", out.Name) + must.SliceLen(t, 1, out.DeviceIDs) + must.SliceContains(t, collectInstanceIDs(nvidia0), out.DeviceIDs[0]) + _ = nvidia1 // silence unused warning +} + +// Test FirstAvailable: falls back to second option when first cannot be satisfied +func TestDeviceAllocator_FirstAvailable_FallsBackToSecondOption(t *testing.T) { + ci.Parallel(t) + + _, ctx := MockContext(t) + n := multipleNvidiaNode() + d := newDeviceAllocator(ctx, n) + must.NotNil(t, d) + + nvidia1 := n.NodeResources.Devices[1] // 2080ti with 2 instances + + // Build a request where first option cannot be satisfied (no H100) + // but second option can (2080ti exists) + ask := &structs.RequestedDevice{ + Name: "nvidia/gpu", + FirstAvailable: []*structs.DeviceOption{ + { + Count: 1, + Constraints: []*structs.Constraint{ + { + LTarget: "${device.model}", + Operand: "=", + RTarget: "H100", // doesn't exist + }, + }, + }, + { + Count: 1, + Constraints: []*structs.Constraint{ + { + LTarget: "${device.model}", + Operand: "=", + RTarget: "2080ti", + }, + }, + }, + }, + } + + mem := anyMemoryNodeMatcher() + out, _, _, err := d.createOffer(mem, ask) + must.NoError(t, err) + must.NotNil(t, out) + + // Should select 2080ti (second option since first failed) + must.Eq(t, "2080ti", out.Name) + must.SliceLen(t, 1, out.DeviceIDs) + must.SliceContains(t, collectInstanceIDs(nvidia1), out.DeviceIDs[0]) +} + +// Test FirstAvailable: count requirements are respected +func TestDeviceAllocator_FirstAvailable_CountRequirements(t *testing.T) { + ci.Parallel(t) + + _, ctx := MockContext(t) + n := multipleNvidiaNode() + d := newDeviceAllocator(ctx, n) + must.NotNil(t, d) + + nvidia1 := n.NodeResources.Devices[1] // 2080ti with 2 instances + + // Build a request where first option needs 4 GPUs (not available) + // but second option only needs 2 + ask := &structs.RequestedDevice{ + Name: "nvidia/gpu", + FirstAvailable: []*structs.DeviceOption{ + { + Count: 4, // can't satisfy - not enough instances + Constraints: []*structs.Constraint{ + { + LTarget: "${device.model}", + Operand: "=", + RTarget: "1080ti", + }, + }, + }, + { + Count: 2, // can satisfy + Constraints: []*structs.Constraint{ + { + LTarget: "${device.model}", + Operand: "=", + RTarget: "2080ti", + }, + }, + }, + }, + } + + mem := anyMemoryNodeMatcher() + out, _, _, err := d.createOffer(mem, ask) + must.NoError(t, err) + must.NotNil(t, out) + + // Should select 2080ti with 2 instances + must.Eq(t, "2080ti", out.Name) + must.SliceLen(t, 2, out.DeviceIDs) + must.SliceContainsSubset(t, collectInstanceIDs(nvidia1), out.DeviceIDs) +} + +// Test FirstAvailable: base constraints are applied to all options +func TestDeviceAllocator_FirstAvailable_BaseConstraints(t *testing.T) { + ci.Parallel(t) + + _, ctx := MockContext(t) + n := devNode() // has nvidia/gpu/1080ti and intel/fpga/F100 + d := newDeviceAllocator(ctx, n) + must.NotNil(t, d) + + // Build a request with a base constraint that limits to nvidia vendor + // First option asks for a model that doesn't exist, second asks for 1080ti + ask := &structs.RequestedDevice{ + Name: "gpu", + // Base constraint: must be nvidia + Constraints: []*structs.Constraint{ + { + LTarget: "${device.vendor}", + Operand: "=", + RTarget: "nvidia", + }, + }, + FirstAvailable: []*structs.DeviceOption{ + { + Count: 1, + Constraints: []*structs.Constraint{ + { + LTarget: "${device.model}", + Operand: "=", + RTarget: "H100", // doesn't exist + }, + }, + }, + { + Count: 1, + // No additional constraints - should match nvidia/gpu/1080ti + }, + }, + } + + mem := anyMemoryNodeMatcher() + out, _, _, err := d.createOffer(mem, ask) + must.NoError(t, err) + must.NotNil(t, out) + + // Should select nvidia device (second option) + must.Eq(t, "nvidia", out.Vendor) +} + +// Test FirstAvailable: all options fail returns error +func TestDeviceAllocator_FirstAvailable_AllOptionsFail(t *testing.T) { + ci.Parallel(t) + + _, ctx := MockContext(t) + n := devNode() + d := newDeviceAllocator(ctx, n) + must.NotNil(t, d) + + // Build a request where no option can be satisfied + ask := &structs.RequestedDevice{ + Name: "nvidia/gpu", + FirstAvailable: []*structs.DeviceOption{ + { + Count: 1, + Constraints: []*structs.Constraint{ + { + LTarget: "${device.model}", + Operand: "=", + RTarget: "H100", // doesn't exist + }, + }, + }, + { + Count: 1, + Constraints: []*structs.Constraint{ + { + LTarget: "${device.model}", + Operand: "=", + RTarget: "GH200", // doesn't exist either + }, + }, + }, + }, + } + + mem := anyMemoryNodeMatcher() + out, _, _, err := d.createOffer(mem, ask) + must.Nil(t, out) + must.ErrorContains(t, err, "no first_available option could be satisfied") +} + +// Test FirstAvailable: base affinities are applied to all options +func TestDeviceAllocator_FirstAvailable_BaseAffinities(t *testing.T) { + ci.Parallel(t) + + _, ctx := MockContext(t) + n := multipleNvidiaNode() + d := newDeviceAllocator(ctx, n) + must.NotNil(t, d) + + // Build a request with base affinities that apply to all first_available options + ask := &structs.RequestedDevice{ + Name: "nvidia/gpu", + // Base affinity applies to whichever option is selected + Affinities: []*structs.Affinity{ + { + LTarget: "${device.attr.memory}", + Operand: ">", + RTarget: "10 GiB", + Weight: 50, + }, + }, + FirstAvailable: []*structs.DeviceOption{ + { + Count: 1, + }, + }, + } + + mem := anyMemoryNodeMatcher() + out, sumMatched, totalWeight, err := d.createOffer(mem, ask) + must.NoError(t, err) + must.NotNil(t, out) + + // Base affinity should have been considered + must.Eq(t, 50.0, totalWeight) + // sumMatched depends on which device was selected and matched + must.True(t, sumMatched >= 0) +} + func Test_equalBusID(t *testing.T) { must.True(t, equalBusID("0000:03:00.1", "00000000:03:00.1")) must.False(t, equalBusID("0000:03:00.1", "0000:03:00.0")) diff --git a/scheduler/feasible/feasible.go b/scheduler/feasible/feasible.go index 91f4cfcd5da..1e9c70d021e 100644 --- a/scheduler/feasible/feasible.go +++ b/scheduler/feasible/feasible.go @@ -1567,7 +1567,16 @@ func (c *DeviceChecker) hasDevices(option *structs.Node) bool { // Go through the required devices trying to find matches OUTER: for _, req := range c.required { - // Determine how many there are to place + // Handle first_available selection + if len(req.FirstAvailable) > 0 { + if c.canSatisfyFirstAvailable(req, available) { + continue OUTER + } + // None of the first_available options could be satisfied + return false + } + + // Standard device request - determine how many there are to place desiredCount := req.Count // Go through the device resources and see if we have a match @@ -1599,6 +1608,85 @@ OUTER: return true } +// canSatisfyFirstAvailable checks if any of the first_available options can be +// satisfied given the available devices. It tries each option in order and +// returns true if any option can be satisfied. If an option is satisfied, the +// available counts are decremented accordingly. +func (c *DeviceChecker) canSatisfyFirstAvailable(req *structs.RequestedDevice, available map[*structs.NodeDeviceResource]uint64) bool { + for _, opt := range req.FirstAvailable { + // Try to satisfy this option + if c.canSatisfyDeviceOption(req, opt, available) { + return true + } + } + return false +} + +// canSatisfyDeviceOption checks if a single device option can be satisfied. +// It combines the base constraints from the request with the option-specific +// constraints and checks if enough devices match. +func (c *DeviceChecker) canSatisfyDeviceOption(req *structs.RequestedDevice, opt *structs.DeviceOption, available map[*structs.NodeDeviceResource]uint64) bool { + desiredCount := opt.Count + + // Create a snapshot of available counts to restore if this option fails + snapshot := make(map[*structs.NodeDeviceResource]uint64, len(available)) + for k, v := range available { + snapshot[k] = v + } + + for d, unused := range available { + if unused == 0 { + continue + } + + // Check if device matches base requirements (name/type) + if !d.ID().Matches(req.ID()) { + continue + } + + // Check base constraints from the RequestedDevice + if !deviceMatchesConstraints(c.ctx, d, req.Constraints) { + continue + } + + // Check option-specific constraints + if !deviceMatchesConstraints(c.ctx, d, opt.Constraints) { + continue + } + + // This device type matches, consume instances + for desiredCount > 0 && available[d] > 0 { + available[d] -= 1 + desiredCount -= 1 + } + + if desiredCount == 0 { + return true + } + } + + // Failed to satisfy this option - restore available counts + for k, v := range snapshot { + available[k] = v + } + return false +} + +// deviceMatchesConstraints checks if a device satisfies a set of constraints. +func deviceMatchesConstraints(ctx Context, d *structs.NodeDeviceResource, constraints structs.Constraints) bool { + for _, c := range constraints { + // Resolve the targets + lVal, lOk := resolveDeviceTarget(c.LTarget, d) + rVal, rOk := resolveDeviceTarget(c.RTarget, d) + + // Check if satisfied + if !checkAttributeConstraint(ctx, c.Operand, lVal, rVal, lOk, rOk) { + return false + } + } + return true +} + // nodeDeviceMatches checks if the device matches the request and its // constraints. It doesn't check the count. func nodeDeviceMatches(ctx Context, d *structs.NodeDeviceResource, req *structs.RequestedDevice) bool { diff --git a/scheduler/feasible/feasible_test.go b/scheduler/feasible/feasible_test.go index 97265f2a9d8..bba10029810 100644 --- a/scheduler/feasible/feasible_test.go +++ b/scheduler/feasible/feasible_test.go @@ -3599,6 +3599,157 @@ func TestDeviceChecker(t *testing.T) { }, }, }, + { + Name: "first_available first option satisfied", + Result: true, + NodeDevices: []*structs.NodeDeviceResource{nvidia_A}, + RequestedDevices: []*structs.RequestedDevice{ + { + Name: "nvidia/gpu", + FirstAvailable: []*structs.DeviceOption{ + { + Count: 1, + Constraints: []*structs.Constraint{ + { + Operand: "=", + LTarget: "${device.model}", + RTarget: "1080ti", + }, + }, + }, + { + Count: 1, + Constraints: []*structs.Constraint{ + { + Operand: "=", + LTarget: "${device.model}", + RTarget: "2080ti", + }, + }, + }, + }, + }, + }, + }, + { + Name: "first_available fallback to second option", + Result: true, + NodeDevices: []*structs.NodeDeviceResource{nvidia_B}, // only has 2080ti + RequestedDevices: []*structs.RequestedDevice{ + { + Name: "nvidia/gpu", + FirstAvailable: []*structs.DeviceOption{ + { + Count: 1, + Constraints: []*structs.Constraint{ + { + Operand: "=", + LTarget: "${device.model}", + RTarget: "1080ti", // not available + }, + }, + }, + { + Count: 1, + Constraints: []*structs.Constraint{ + { + Operand: "=", + LTarget: "${device.model}", + RTarget: "2080ti", // available + }, + }, + }, + }, + }, + }, + }, + { + Name: "first_available no options satisfy", + Result: false, + NodeDevices: []*structs.NodeDeviceResource{nvidia_A}, + RequestedDevices: []*structs.RequestedDevice{ + { + Name: "nvidia/gpu", + FirstAvailable: []*structs.DeviceOption{ + { + Count: 1, + Constraints: []*structs.Constraint{ + { + Operand: "=", + LTarget: "${device.model}", + RTarget: "H100", // not available + }, + }, + }, + { + Count: 1, + Constraints: []*structs.Constraint{ + { + Operand: "=", + LTarget: "${device.model}", + RTarget: "GH200", // not available + }, + }, + }, + }, + }, + }, + }, + { + Name: "first_available with base constraint applied", + Result: true, + NodeDevices: []*structs.NodeDeviceResource{nvidia_A, nvidia_B}, + RequestedDevices: []*structs.RequestedDevice{ + { + Name: "nvidia/gpu", + // Base constraint that must be satisfied + Constraints: []*structs.Constraint{ + { + Operand: ">", + LTarget: "${device.attr.memory}", + RTarget: "3 GiB", + }, + }, + FirstAvailable: []*structs.DeviceOption{ + { + Count: 2, // need 2 devices + }, + }, + }, + }, + }, + { + Name: "first_available count not satisfiable falls back", + Result: true, + NodeDevices: []*structs.NodeDeviceResource{nvidia_A}, // only has 2 instances + RequestedDevices: []*structs.RequestedDevice{ + { + Name: "nvidia/gpu", + FirstAvailable: []*structs.DeviceOption{ + { + Count: 4, // can't satisfy - need 4 but only 2 available + Constraints: []*structs.Constraint{ + { + Operand: "=", + LTarget: "${device.model}", + RTarget: "1080ti", + }, + }, + }, + { + Count: 1, // can satisfy + Constraints: []*structs.Constraint{ + { + Operand: "=", + LTarget: "${device.model}", + RTarget: "1080ti", + }, + }, + }, + }, + }, + }, + }, } for _, c := range cases { diff --git a/scheduler/feasible/rank.go b/scheduler/feasible/rank.go index add4f587570..d43c5c47965 100644 --- a/scheduler/feasible/rank.go +++ b/scheduler/feasible/rank.go @@ -508,7 +508,8 @@ NEXTNODE: var offer *structs.AllocatedDeviceResource var sumAffinities float64 - offer, sumAffinities, err = devAllocator.createOffer(memory, device) + var deviceTotalWeight float64 + offer, sumAffinities, deviceTotalWeight, err = devAllocator.createOffer(memory, device) if offer == nil || err != nil { devAllocator = devAllocatorSnapshot taskResources.Devices = taskResourcesSnapshot @@ -521,11 +522,10 @@ NEXTNODE: devAllocator.AddReserved(offer) taskResources.Devices = append(taskResources.Devices, offer) - // Add the scores - if len(device.Affinities) != 0 { - for _, a := range device.Affinities { - totalDeviceAffinityWeight += math.Abs(float64(a.Weight)) - } + // Add the scores - use returned weights which correctly + // handle first_available option-specific affinities + if deviceTotalWeight > 0 { + totalDeviceAffinityWeight += deviceTotalWeight sumMatchingAffinities += sumAffinities } count++ @@ -595,7 +595,7 @@ NEXTNODE: devices: set.From(task.Resources.NUMA.GetDevices()), } - offer, sumAffinities, err := devAllocator.createOffer(memory, device) + offer, sumAffinities, deviceTotalWeight, err := devAllocator.createOffer(memory, device) if offer == nil { offerErr = err @@ -630,7 +630,7 @@ NEXTNODE: devAllocatorEvict.AddAllocs(proposed) // attempt the offer again - offerEvict, sumAffinitiesEvict, err := devAllocatorEvict.createOffer(memory, device) + offerEvict, sumAffinitiesEvict, deviceTotalWeightEvict, err := devAllocatorEvict.createOffer(memory, device) if offerEvict == nil || err != nil { // we cannot acquire this device even with preemption iter.ctx.Logger().Named("binpack").Debug("unexpected error, unable to create device offer after considering preemption", "error", err) @@ -640,18 +640,19 @@ NEXTNODE: offer = offerEvict sumAffinities = sumAffinitiesEvict + //TODO: investigate and make sure this works as expected still devAllocator = devAllocatorEvict + deviceTotalWeight = deviceTotalWeightEvict } // assign the offer for this device to our allocator devAllocator.AddReserved(offer) taskResources.Devices = append(taskResources.Devices, offer) - // Add the scores - if len(device.Affinities) != 0 { - for _, a := range device.Affinities { - totalDeviceAffinityWeight += math.Abs(float64(a.Weight)) - } + // Add the scores - use returned weights which correctly + // handle first_available option-specific affinities + if deviceTotalWeight > 0 { + totalDeviceAffinityWeight += deviceTotalWeight sumMatchingAffinities += sumAffinities } count++ From 5f8b3a94ee7834536dad0ccd55597f6e3f2fcc8d Mon Sep 17 00:00:00 2001 From: Tehut Getahun Date: Mon, 1 Jun 2026 10:48:05 -0700 Subject: [PATCH 3/7] Add ShareDevices logic --- api/resources.go | 43 +++++--------- command/agent/job_endpoint.go | 47 +++++++--------- nomad/structs/devices.go | 5 +- nomad/structs/structs.go | 41 +++++++------- plugins/device/cmd/example/device.go | 55 ++++++++++++------ plugins/device/util.go | 16 +++++- scheduler/feasible/device.go | 84 ++++++++++++---------------- scheduler/feasible/device_test.go | 4 +- scheduler/feasible/rank.go | 7 +-- 9 files changed, 144 insertions(+), 158 deletions(-) diff --git a/api/resources.go b/api/resources.go index 9a16bc25dee..7a29d641a78 100644 --- a/api/resources.go +++ b/api/resources.go @@ -293,15 +293,11 @@ func (a Attribute) String() string { } } -type Shared string +// Shared mirrors the plugin.Shared string enum found +// on Devices.DetectedDevice that some devices use to +// report the status and presence of sharing subsystems -// DeviceSharing mirrors the plugin.DeviceSharing struct found -// on Devices.DetectedDevice. It holds a string that some -// devices use to report the status and presence of sharing -// subsystems -type DeviceSharing struct { - Shared Shared -} +type Shared string const ( DeviceSharingUnset Shared = "" @@ -317,7 +313,7 @@ type NodeDeviceLocality struct { PciBusID string } -// ShareDevices indicates whether the task should be placed on a shared device +// ShareDevices indicates whether the task is willing to share it's device type ShareDevices struct { // Enabled Enabled bool `hcl:"enabled"` @@ -328,21 +324,6 @@ type ShareDevices struct { SharedDeviceId string `hcl:"shared_device_id,optional"` } -type DevicePreferences struct { - // Enabled - Enabled bool `hcl:"enabled"` - // SharedDeviceID is an optional field for use in environments with - // multiple shared devices, to make the shared device ID available to - // the plugin. If in use alongside the device.id constraint, the two must - // match or the job will not be placed. - SharedDeviceId string `hcl:"shared_device_id,optional"` - - // FirstAvailable specifies a prioritized list of device options. The - // scheduler will attempt to satisfy each option in order, selecting the - // first one that can be fulfilled. Mutually exclusive with Count. - FirstAvailable []*DeviceOption `hcl:"first_available,block"` -} - // DeviceOption represents a single option in a first_available device selection. // Each option specifies a count and optional constraints that must be satisfied // for this option to be selected. @@ -353,6 +334,11 @@ type DeviceOption struct { // Constraints are a set of constraints to apply when selecting the device // to use for this option. Constraints []*Constraint `hcl:"constraint,block"` + + // ShareDevices indicates whether this device option is willing to share + // TODO: determine if ShareDevices should be inherited or if, like count, + // it should only be set on one or the other + ShareDevices *ShareDevices `hcl:"share_devices,block"` } func (o *DeviceOption) Canonicalize() { @@ -391,15 +377,12 @@ type RequestedDevice struct { // as base affinities for all options. Affinities []*Affinity `hcl:"affinity,block"` - //Device Preferences - DevicePreferences *DevicePreferences `hcl:"device_preferences,block"` - // ShareDevices reports whether the task should be placed on a shared device ShareDevices *ShareDevices `hcl:"share_devices,block"` - // FirstAvailable specifies a prioritized list of device options. The - // scheduler will attempt to satisfy each option in order, selecting the - // first one that can be fulfilled. Mutually exclusive with Count. + //// FirstAvailable specifies a prioritized list of device options. The + //// scheduler will attempt to satisfy each option in order, selecting the + //// first one that can be fulfilled. Mutually exclusive with Count. FirstAvailable []*DeviceOption `hcl:"first_available,block"` } diff --git a/command/agent/job_endpoint.go b/command/agent/job_endpoint.go index 844dda60563..0b35da48106 100644 --- a/command/agent/job_endpoint.go +++ b/command/agent/job_endpoint.go @@ -1623,24 +1623,21 @@ func ApiResourcesToStructs(in *api.Resources) *structs.Resources { if len(in.Devices) > 0 { out.Devices = []*structs.RequestedDevice{} + for _, d := range in.Devices { - out.Devices = append(out.Devices, &structs.RequestedDevice{ - Name: d.Name, - Count: *d.Count, - Constraints: ApiConstraintsToStructs(d.Constraints), - Affinities: ApiAffinitiesToStructs(d.Affinities), - ShareDevices: ApiSharingToStructs(d.ShareDevices), - }) rd := &structs.RequestedDevice{ Name: d.Name, Constraints: ApiConstraintsToStructs(d.Constraints), Affinities: ApiAffinitiesToStructs(d.Affinities), } // Only set Count if not using FirstAvailable - if d.Count != nil { + if d.Count != nil && len(d.FirstAvailable) == 0 { rd.Count = *d.Count } - // Convert FirstAvailable options + if d.ShareDevices != nil && len(d.FirstAvailable) == 0 { + rd.ShareDevices = ApiShareDevicesToStructs(d.ShareDevices) + } + //// Convert FirstAvailable options if len(d.FirstAvailable) > 0 { rd.FirstAvailable = make([]*structs.DeviceOption, len(d.FirstAvailable)) for i, opt := range d.FirstAvailable { @@ -1650,16 +1647,11 @@ func ApiResourcesToStructs(in *api.Resources) *structs.Resources { if opt.Count != nil { rd.FirstAvailable[i].Count = *opt.Count } + if opt.ShareDevices != nil { + rd.FirstAvailable[i].ShareDevices = ApiShareDevicesToStructs(opt.ShareDevices) + } } } - //TODO: temp build out make better soon - if d.ShareDevices != nil || len(d.FirstAvailable) > 0 { - rd.DevicePreferences = &structs.DevicePreferences{FirstAvailable: rd.FirstAvailable} - } - if d.ShareDevices != nil { - rd.DevicePreferences.Enabled = d.ShareDevices.Enabled - rd.DevicePreferences.SharedDeviceId = d.ShareDevices.SharedDeviceId - } out.Devices = append(out.Devices, rd) } } @@ -1677,6 +1669,16 @@ func ApiResourcesToStructs(in *api.Resources) *structs.Resources { return out } +func ApiShareDevicesToStructs(in *api.ShareDevices) *structs.ShareDevices { + if in == nil { + return nil + } + return &structs.ShareDevices{ + Enabled: in.Enabled, + SharedDeviceId: in.SharedDeviceId, + } + +} func ApiNetworkResourceToStructs(in []*api.NetworkResource) []*structs.NetworkResource { var out []*structs.NetworkResource @@ -2361,14 +2363,3 @@ func validateEvalPriorityOpt(priority int) HTTPCodedError { } return nil } - -func ApiSharingToStructs(in *api.ShareDevices) *structs.ShareDevices { - if in == nil { - return nil - } - return &structs.ShareDevices{ - Enabled: in.Enabled, - SharedDeviceId: in.SharedDeviceId, - } - -} diff --git a/nomad/structs/devices.go b/nomad/structs/devices.go index 3096a175642..78061251fe4 100644 --- a/nomad/structs/devices.go +++ b/nomad/structs/devices.go @@ -124,14 +124,12 @@ func (d *DeviceAccounter) AddAllocs(allocs []*Allocation) (collision bool) { // Go through each task resource for _, tr := range a.AllocatedResources.Tasks { - // Go through each assigned device group for _, allocatedDeviceGroup := range tr.Devices { devID := allocatedDeviceGroup.ID() // Go through each assigned device for _, instanceID := range allocatedDeviceGroup.DeviceIDs { - // Mark that we are using the device. It may not be in the // map if the device is no longer being fingerprinted, is // unhealthy, etc. @@ -165,8 +163,7 @@ func willingToShare(res *AllocatedDeviceResource, deviceID string) bool { return false } // does exist, is true = > this is the shared device, it will share => return true - if exists, willing := res.WillShare[deviceID]; exists && willing { - + if willing, exists := res.WillShare[deviceID]; willing && exists { return true } // In all remaining cases we return false diff --git a/nomad/structs/structs.go b/nomad/structs/structs.go index e0ee7f63ffc..0cbb2e2e70f 100644 --- a/nomad/structs/structs.go +++ b/nomad/structs/structs.go @@ -3049,19 +3049,6 @@ type ShareDevices struct { // match or the job will not be placed. SharedDeviceId string `hcl:"shared_device_id,optional"` } -type DevicePreferences struct { - Enabled bool - // SharedDeviceID is an optional field for use in environments with - // multiple shared devices, to make the shared device ID available to - // the plugin. If in use alongside the device.id constraint, the two must - // match or the job will not be placed. - SharedDeviceId string - - // FirstAvailable specifies a prioritized list of device options. The - // scheduler will attempt to satisfy each option in order, selecting the - // first one that can be fulfilled. Mutually exclusive with Count. - FirstAvailable []*DeviceOption -} // DeviceOption represents a single option in a first_available device selection. // Each option specifies a count and optional constraints that must be satisfied @@ -3073,6 +3060,11 @@ type DeviceOption struct { // Constraints are a set of constraints to apply when selecting the device // to use for this option. Constraints Constraints + + // ShareDevices indicates whether this device option is willing to share + // TODO: determine if ShareDevices should be inherited or if, like count, + // it should only be set on one or the other + ShareDevices *ShareDevices `hcl:"share_devices,optional"` } func (o *DeviceOption) Equal(other *DeviceOption) bool { @@ -3091,8 +3083,9 @@ func (o *DeviceOption) Copy() *DeviceOption { return nil } return &DeviceOption{ - Count: o.Count, - Constraints: CopySliceConstraints(o.Constraints), + Count: o.Count, + ShareDevices: o.ShareDevices, + Constraints: CopySliceConstraints(o.Constraints), } } @@ -3146,12 +3139,10 @@ type RequestedDevice struct { // as base affinities for all options. Affinities Affinities - //Device Preferences - DevicePreferences *DevicePreferences - // ShareDevices indicates whether the job should be placed on a shared device // and is willing to share ShareDevices *ShareDevices + // FirstAvailable specifies a prioritized list of device options. The // scheduler will attempt to satisfy each option in order, selecting the // first one that can be fulfilled. Mutually exclusive with Count. @@ -3175,6 +3166,10 @@ func (r *RequestedDevice) Equal(o *RequestedDevice) bool { if !r.Constraints.Equal(&o.Constraints) || !r.Affinities.Equal(&o.Affinities) { return false } + + if r.ShareDevices != o.ShareDevices { + return false + } if len(r.FirstAvailable) != len(o.FirstAvailable) { return false } @@ -3708,6 +3703,9 @@ func (n *NodeDeviceResource) Equal(o *NodeDeviceResource) bool { return true } +// Shared mirrors the plugin.Shared string enum found +// on Devices.DetectedDevice that some devices use to +// report the status and presence of sharing subsystems type Shared string func (s Shared) String() string { @@ -3715,12 +3713,13 @@ func (s Shared) String() string { case DeviceSharingInactive: return "inactive" case DeviceSharingIneligible: - return "inelegible" + return "inelgible" case DeviceSharingActive: return "active" - default: - return "unset" + case DeviceSharingUnset: + return "" } + return "" } const ( diff --git a/plugins/device/cmd/example/device.go b/plugins/device/cmd/example/device.go index 0d9236bec51..67be6763bc3 100644 --- a/plugins/device/cmd/example/device.go +++ b/plugins/device/cmd/example/device.go @@ -50,6 +50,11 @@ const ( deviceName1 = "T4" deviceName2 = "T4" + deviceName3 = "P100" + deviceName4 = "P100" + deviceName5 = "P100" + deviceName6 = "A2" + deviceName7 = "A2" ) var ( @@ -118,6 +123,8 @@ var ( }), ), }) + + dSlice = []string{deviceName1, deviceName2, deviceName3, deviceName4, deviceName5, deviceName6, deviceName7} ) // Config contains configuration information for the plugin. @@ -315,14 +322,23 @@ func (d *NvidiaDevice) fingerprint(ctx context.Context, devices chan *device.Fin return } deviceGroups := make([]*device.DeviceGroup, 0) + //all := make([]*device.Device, len(dSlice)) shared, inactive := d.diffFiles(files) - if len(inactive) != 0 { - deviceGroups = append(deviceGroups, d.getDeviceGroup(inactive, deviceName2)) - } - if len(shared) != 0 { - deviceGroups = append(deviceGroups, d.getDeviceGroup(shared, deviceName1)) + for k, v := range shared { + deviceGroups = append(deviceGroups, d.getDeviceGroup([]*device.Device{v}, dSlice[k], "shared")) + } + for k, v := range inactive { + deviceGroups = append(deviceGroups, d.getDeviceGroup([]*device.Device{v}, dSlice[k], "inactive")) } + //if len(inactive) != 0 { + //deviceGroups = append(deviceGroups, d.getDeviceGroup(inactive)) + //} + + //if len(shared) != 0 { + // deviceGroups = append(deviceGroups, d.getDeviceGroup(shared)) + //} + d.logger.Info("files to fingerprint", "inactive files", len(inactive), "active files", len(shared)) devices <- device.NewFingerprint(deviceGroups...) @@ -355,7 +371,6 @@ func (d *NvidiaDevice) diffFiles(files []os.FileInfo) ([]*device.Device, []*devi } else { healthy = device.SharingInactive.String() } - d.logger.Info("checking health", "file perm", perms, "unhealthy perms", d.unhealthyPerm, "healthy", healthy) // See if we already have the device oldHealth, ok := d.devices[name] @@ -404,24 +419,30 @@ func (d *NvidiaDevice) diffFiles(files []os.FileInfo) ([]*device.Device, []*devi } // getDeviceGroup is a helper to build the DeviceGroup given a set of devices. -func (d *NvidiaDevice) getDeviceGroup(devices []*device.Device, name string) *device.DeviceGroup { +func (d *NvidiaDevice) getDeviceGroup(devices []*device.Device, name string, isShared string) *device.DeviceGroup { //d.logger.Error("getDeviceGroup", "device count", len(devices)) - var shared string - for _, v := range devices { - if shared == "" { - shared = v.Shared.String() - } - //d.logger.Error("getDeviceGroup", "loop", n, "deviceID", v.ID, "shared", v.Shared.String()) - } + //var ( + // shared string + //) + //shared = devices[0].Shared.String() + //for _, v := range devices { + // if shared == "" { + // shared = v.Shared.String() + // } + // //dName = dSlice[n] + // //d.logger.Error("getDeviceGroup", "loop", n, "deviceID", v.ID, "dSlice length", len(dSlice)) + //} return &device.DeviceGroup{ Vendor: vendor, Type: deviceType, Name: name, Devices: devices, Attributes: map[string]*structs.Attribute{ - - "cool-attribute": { - String: new("attribute-wearing-sunglasses"), + "shared": { + String: &isShared, + }, + "model": { + String: &name, }, }, } diff --git a/plugins/device/util.go b/plugins/device/util.go index 5a1b273edeb..234b41bb9b7 100644 --- a/plugins/device/util.go +++ b/plugins/device/util.go @@ -63,7 +63,7 @@ func convertProtoDevice(in *proto.DetectedDevice) *Device { Healthy: in.Healthy, HealthDesc: in.HealthDescription, HwLocality: convertProtoDeviceLocality(in.HwLocality), - Shared: Shared(in.GetShared()), + Shared: convertProtoDeviceShared(in.GetShared()), } } @@ -78,6 +78,19 @@ func convertProtoDeviceLocality(in *proto.DeviceLocality) *DeviceLocality { } } +// convertProtoDeviceShared converts between a proto device.Shared and structs.Shared +func convertProtoDeviceShared(in proto.Shared) Shared { + switch in { + case proto.Shared_SHARED_INELIGIBLE: + return "ineligible" + case proto.Shared_SHARED_ACTIVE: + return "active" + case proto.Shared_SHARED_INACTIVE: + return "inactive" + } + return "" +} + // convertProtoContainerReservation is used to convert between a proto and struct // ContainerReservation func convertProtoContainerReservation(in *proto.ContainerReservation) *ContainerReservation { @@ -392,7 +405,6 @@ func convertStructDeviceStats(in *DeviceStats) *proto.DeviceStats { } func convertShared(s Shared) proto.Shared { - switch s.String() { case "ineligible": return proto.Shared_SHARED_INELIGIBLE diff --git a/scheduler/feasible/device.go b/scheduler/feasible/device.go index 7ba3789c1aa..5352e7d3801 100644 --- a/scheduler/feasible/device.go +++ b/scheduler/feasible/device.go @@ -110,7 +110,6 @@ func (d *deviceAllocator) createOffer(mem *memoryNodeMatcher, ask *structs.Reque if len(d.Devices) == 0 { return nil, 0.0, 0.0, fmt.Errorf("no devices available") } - // Handle first_available selection if len(ask.FirstAvailable) > 0 { return d.createOfferFirstAvailable(mem, ask) @@ -138,7 +137,8 @@ func (d *deviceAllocator) createOfferFirstAvailable(mem *memoryNodeMatcher, ask combinedConstraints = append(combinedConstraints, ask.Constraints...) combinedConstraints = append(combinedConstraints, opt.Constraints...) - offer, matchedWeights, totalWeight, offerErr := d.createOfferWithParams(mem, ask.ID(), opt.Count, combinedConstraints, ask.Affinities, ask.ShareDevices) + offer, matchedWeights, totalWeight, offerErr := d.createOfferWithParams(mem, ask.ID(), opt.Count, + combinedConstraints, ask.Affinities, opt.ShareDevices) if offer != nil { return offer, matchedWeights, totalWeight, nil } @@ -154,7 +154,8 @@ func (d *deviceAllocator) createOfferFirstAvailable(mem *memoryNodeMatcher, ask // createOfferWithParams is the core offer creation logic that can be used for both // standard requests and first_available options. -func (d *deviceAllocator) createOfferWithParams(mem *memoryNodeMatcher, deviceID *structs.DeviceIdTuple, count uint64, constraints structs.Constraints, affinities structs.Affinities, shareDevices *structs.ShareDevices) (out *structs.AllocatedDeviceResource, sumMatchedAffinityWeights float64, totalAffinityWeight float64, err error) { +func (d *deviceAllocator) createOfferWithParams(mem *memoryNodeMatcher, deviceID *structs.DeviceIdTuple, count uint64, + constraints structs.Constraints, affinities structs.Affinities, shareDevices *structs.ShareDevices) (out *structs.AllocatedDeviceResource, sumMatchedAffinityWeights float64, totalAffinityWeight float64, err error) { // Hold the current best offer var offer *structs.AllocatedDeviceResource var offerScore float64 @@ -178,37 +179,34 @@ func (d *deviceAllocator) createOfferWithParams(mem *memoryNodeMatcher, deviceID assignable := []string{} willShare := make(map[string]bool) - for instanceID, v := range devInst.Instances { - var instanceSharedStatus structs.Shared - // mark shareable if we find a single shareable device - instanceSharedStatus = devInst.GetSharedByID(instanceID) - if instanceSharedStatus == structs.DeviceSharingActive { - d.ctx.Logger().Error("device sharing value", "value", instanceSharedStatus) + for instanceID, claimCount := range devInst.Instances { + if claimCount != 0 && devInst.GetSharedByID(instanceID) != structs.DeviceSharingActive { + continue } - if v != 0 && instanceSharedStatus != structs.DeviceSharingActive { + if !mem.Matches(instanceID, devInst.Device) { continue } - if !mem.Matches(instanceID, devInst.Device) { + if !d.deviceIDMatchesConstraint(instanceID, constraints, devInst.Device) { continue } - if d.deviceIDMatchesConstraint(instanceID, constraints, devInst.Device) { - assignable = append(assignable, instanceID) + // if we're targeting a specific GPU confirm it' the one we want + if shareDevices.SharedDeviceId != "" && shareDevices.SharedDeviceId != instanceID { + continue } + // if the task is willing to share, document in deviceAllocator + if d.deviceIDAllowsSharing(instanceID, shareDevices, devInst.Device) { + //only update willShare map if assignable & willing to share + willShare[instanceID] = true - if d.deviceIDConstraintAndSharingChecks(instanceID, constraints, shareDevices, devInst.Device) { - assignable = append(assignable, instanceID) - if shareDevices != nil { - willShare[instanceID] = shareDevices.Enabled //only update willShare map if assignable - } } + assignable = append(assignable, instanceID) // Don't assign more than the ask if len(assignable) == int(count) { break } } - // This device doesn't have enough instances if len(assignable) < int(count) { continue @@ -225,7 +223,6 @@ func (d *deviceAllocator) createOfferWithParams(mem *memoryNodeMatcher, deviceID // Resolve the targets lVal, lOk := resolveDeviceTarget(a.LTarget, devInst.Device) rVal, rOk := resolveDeviceTarget(a.RTarget, devInst.Device) - // Check if satisfied if !checkAttributeAffinity(d.ctx, a.Operand, lVal, rVal, lOk, rOk) { continue @@ -239,7 +236,6 @@ func (d *deviceAllocator) createOfferWithParams(mem *memoryNodeMatcher, deviceID choiceScore /= totalWeight } } - // Only use the device if it is a higher score than we have already seen if offer != nil && choiceScore < offerScore { continue @@ -323,43 +319,33 @@ func (d *deviceAllocator) deviceIDMatchesConstraint(id string, constraints struc return true } -// deviceIDAllowsSharing checks a device instance ID against the -// device's Shared status to ensure we're only assigning devices that -// are set up to be shared. -func (d *deviceAllocator) deviceIDAllowsSharing(id string, sharing *structs.ShareDevices, device *structs.NodeDeviceResource) bool { +// deviceIDAllowsSharing checks a device instance ID against the device's +// Shared status to ensure we're only assigning devices that can share +func (d *deviceAllocator) deviceIDAllowsSharing(id string, shareDevices *structs.ShareDevices, device *structs.NodeDeviceResource) bool { canShare := false + if shareDevices == nil { + return canShare + } for _, dev := range device.Instances { - // if the device has sharing active - if dev.ID == id { - if sharing.Enabled == true && dev.Shared.String() == structs.DeviceSharingActive.String() { - canShare = true - } else { - continue - } + if dev.ID != id { + continue } - } - // if the device and task are sharable and we're targeting a specific GPU - // confirm it's the one we want - if len(sharing.SharedDeviceId) != 0 { - if sharing.SharedDeviceId != id { - canShare = false + // return true if the device has sharing active and the task will share + if shareDevices.Enabled && dev.Shared == structs.DeviceSharingActive { + canShare = true } + } return canShare } - -// deviceIDConstraintAndSharingChecks returns a single boolean to report whether -// device ID matches all of the constraints and if applicable all of the -// requested sharing modes -func (d *deviceAllocator) deviceIDConstraintAndSharingChecks(id string, constraints structs.Constraints, sharing *structs.ShareDevices, device *structs.NodeDeviceResource) bool { - if passesConstraint := d.deviceIDMatchesConstraint(id, constraints, device); !passesConstraint { - return false +func (d *deviceAllocator) sharedDeviceIDMatches(instanceID string, shareDevices *structs.ShareDevices) bool { + if shareDevices == nil { + return true } - if sharing != nil { - if passesSharing := d.deviceIDAllowsSharing(id, sharing, device); !passesSharing { - return false - } + // if we're targeting a specific GPU confirm its the one we want + if shareDevices.SharedDeviceId != "" && shareDevices.SharedDeviceId != instanceID { + return false } return true } diff --git a/scheduler/feasible/device_test.go b/scheduler/feasible/device_test.go index 4c25c37717d..96017d7225c 100644 --- a/scheduler/feasible/device_test.go +++ b/scheduler/feasible/device_test.go @@ -980,8 +980,8 @@ func TestDeviceAllocator_Allocate_SharedDevices(t *testing.T) { } ask := sharedDeviceRequest(tc.deviceName, tc.count, testConstraints, nil, tc.shareDevices) - out, _, err := d.createOffer(mem, ask) - if len(tc.expectedErr) != 0 { + out, _, _, err := d.createOffer(mem, ask) + if tc.expectedErr != "" { must.ErrorContains(t, err, tc.expectedErr) must.Nil(t, out) return diff --git a/scheduler/feasible/rank.go b/scheduler/feasible/rank.go index d43c5c47965..5656dfd8d00 100644 --- a/scheduler/feasible/rank.go +++ b/scheduler/feasible/rank.go @@ -208,7 +208,6 @@ func (iter *BinPackIterator) SetSchedulerConfiguration(schedConfig *structs.Sche } func (iter *BinPackIterator) Next() *RankedNode { - NEXTNODE: for { // Get the next potential option @@ -285,7 +284,6 @@ NEXTNODE: currentPreemptions = append(currentPreemptions, allocs...) } preemptor.SetPreemptions(currentPreemptions) - // Check if we need task group network resource if len(iter.taskGroup.Networks) > 0 { ask := iter.taskGroup.Networks[0].Copy() @@ -554,7 +552,6 @@ NEXTNODE: // and devices WITH leveraging preemption. We will have already // made attempts without preemption. - // If preemption is not enabled, then this node is exhausted. if !iter.evict { // surface err from createOffer() iter.ctx.Metrics().ExhaustedNode(option.Node, fmt.Sprintf("devices: %s", err)) @@ -640,7 +637,6 @@ NEXTNODE: offer = offerEvict sumAffinities = sumAffinitiesEvict - //TODO: investigate and make sure this works as expected still devAllocator = devAllocatorEvict deviceTotalWeight = deviceTotalWeightEvict } @@ -1002,7 +998,8 @@ type ScoreNormalizationIterator struct { func NewScoreNormalizationIterator(ctx Context, source RankIterator) *ScoreNormalizationIterator { return &ScoreNormalizationIterator{ ctx: ctx, - source: source} + source: source, + } } func (iter *ScoreNormalizationIterator) Reset() { From ca9aec5ab2549da54f3217c387c03757edc47d59 Mon Sep 17 00:00:00 2001 From: Tehut Getahun Date: Wed, 10 Jun 2026 10:54:41 -0700 Subject: [PATCH 4/7] include device sharing considerations in feasibility check --- scheduler/feasible/device_test.go | 12 +++++----- scheduler/feasible/feasible.go | 39 +++++++++++++++++++++++++++---- 2 files changed, 40 insertions(+), 11 deletions(-) diff --git a/scheduler/feasible/device_test.go b/scheduler/feasible/device_test.go index 96017d7225c..3eab15f54a6 100644 --- a/scheduler/feasible/device_test.go +++ b/scheduler/feasible/device_test.go @@ -38,13 +38,13 @@ func deviceRequest(name string, count uint64, // sharedDeviceRequest takes the name, count and potential constraints and affinities // and returns a device request. func sharedDeviceRequest(name string, count uint64, - constraints []*structs.Constraint, affinities []*structs.Affinity, shareDevices *structs.ShareDevices) *structs.RequestedDevice { + constraints []*structs.Constraint, affinities []*structs.Affinity, firstAvailable []*structs.DeviceOption) *structs.RequestedDevice { return &structs.RequestedDevice{ - Name: name, - Count: count, - Constraints: constraints, - Affinities: affinities, - ShareDevices: shareDevices, + Name: name, + Count: count, + Constraints: constraints, + Affinities: affinities, + FirstAvailable: firstAvailable, } } diff --git a/scheduler/feasible/feasible.go b/scheduler/feasible/feasible.go index 1e9c70d021e..4e37ecfff71 100644 --- a/scheduler/feasible/feasible.go +++ b/scheduler/feasible/feasible.go @@ -1578,10 +1578,21 @@ OUTER: // Standard device request - determine how many there are to place desiredCount := req.Count + var willShare bool + if req.ShareDevices != nil { + willShare = req.ShareDevices.Enabled + } // Go through the device resources and see if we have a match for d, unused := range available { - if unused == 0 { + sharable := false + if willShare { + s, ok := d.Attributes["shared"].GetString() + if ok && s == "active" { + sharable = true + } + } + if unused == 0 { // don't need to change this because we only decrement if device & task are not sharable // Depleted continue } @@ -1589,13 +1600,17 @@ OUTER: // Check the constraints if nodeDeviceMatches(c.ctx, d, req) { for desiredCount > 0 && available[d] > 0 { - available[d] -= 1 desiredCount -= 1 + // consume device if not sharable + if !sharable { + available[d] -= 1 + } } if desiredCount == 0 { continue OUTER } + } } @@ -1627,7 +1642,10 @@ func (c *DeviceChecker) canSatisfyFirstAvailable(req *structs.RequestedDevice, a // constraints and checks if enough devices match. func (c *DeviceChecker) canSatisfyDeviceOption(req *structs.RequestedDevice, opt *structs.DeviceOption, available map[*structs.NodeDeviceResource]uint64) bool { desiredCount := opt.Count - + var willShare bool + if opt.ShareDevices != nil { + willShare = opt.ShareDevices.Enabled + } // Create a snapshot of available counts to restore if this option fails snapshot := make(map[*structs.NodeDeviceResource]uint64, len(available)) for k, v := range available { @@ -1635,7 +1653,15 @@ func (c *DeviceChecker) canSatisfyDeviceOption(req *structs.RequestedDevice, opt } for d, unused := range available { - if unused == 0 { + sharable := false + if willShare { + s, ok := d.Attributes["shared"].GetString() + if ok && s == "active" { + sharable = true + } + } + if unused == 0 { // don't need to change this because we only decrement if device & task are not sharable + // Depleted continue } @@ -1656,13 +1682,16 @@ func (c *DeviceChecker) canSatisfyDeviceOption(req *structs.RequestedDevice, opt // This device type matches, consume instances for desiredCount > 0 && available[d] > 0 { - available[d] -= 1 desiredCount -= 1 + if !sharable { + available[d] -= 1 + } } if desiredCount == 0 { return true } + } // Failed to satisfy this option - restore available counts From 21731e1cb0aa8c453a439cfb1150dc87454a338b Mon Sep 17 00:00:00 2001 From: Tehut Getahun Date: Thu, 11 Jun 2026 08:57:53 -0700 Subject: [PATCH 5/7] Add sharedDeviceIDMatches helper for createOffer --- command/agent/job_endpoint.go | 1 + scheduler/feasible/device.go | 18 +++++++++++++++-- scheduler/feasible/device_test.go | 32 +++++++++++++------------------ 3 files changed, 30 insertions(+), 21 deletions(-) diff --git a/command/agent/job_endpoint.go b/command/agent/job_endpoint.go index 0b35da48106..586eb29f8fc 100644 --- a/command/agent/job_endpoint.go +++ b/command/agent/job_endpoint.go @@ -1634,6 +1634,7 @@ func ApiResourcesToStructs(in *api.Resources) *structs.Resources { if d.Count != nil && len(d.FirstAvailable) == 0 { rd.Count = *d.Count } + // Only set ShareDevices if not using FirstAvailable if d.ShareDevices != nil && len(d.FirstAvailable) == 0 { rd.ShareDevices = ApiShareDevicesToStructs(d.ShareDevices) } diff --git a/scheduler/feasible/device.go b/scheduler/feasible/device.go index 5352e7d3801..6c8588595a2 100644 --- a/scheduler/feasible/device.go +++ b/scheduler/feasible/device.go @@ -191,8 +191,7 @@ func (d *deviceAllocator) createOfferWithParams(mem *memoryNodeMatcher, deviceID if !d.deviceIDMatchesConstraint(instanceID, constraints, devInst.Device) { continue } - // if we're targeting a specific GPU confirm it' the one we want - if shareDevices.SharedDeviceId != "" && shareDevices.SharedDeviceId != instanceID { + if !d.sharedDeviceIDMatches(instanceID, shareDevices) { continue } // if the task is willing to share, document in deviceAllocator @@ -349,3 +348,18 @@ func (d *deviceAllocator) sharedDeviceIDMatches(instanceID string, shareDevices } return true } + +//// deviceIDConstraintAndSharingChecks returns a single boolean to report whether +//// device ID matches all of the constraints and if applicable all of the +//// requested sharing modes +//func (d *deviceAllocator) deviceIDConstraintAndSharingChecks(id string, constraints structs.Constraints, sharing *structs.ShareDevices, device *structs.NodeDeviceResource) bool { +// if passesConstraint := d.deviceIDMatchesConstraint(id, constraints, device); !passesConstraint { +// return false +// } +// if sharing != nil { +// if passesSharing := d.deviceIDAllowsSharing(id, sharing, device); !passesSharing { +// return false +// } +// } +// return true +//} diff --git a/scheduler/feasible/device_test.go b/scheduler/feasible/device_test.go index 3eab15f54a6..c4536560840 100644 --- a/scheduler/feasible/device_test.go +++ b/scheduler/feasible/device_test.go @@ -35,19 +35,6 @@ func deviceRequest(name string, count uint64, } } -// sharedDeviceRequest takes the name, count and potential constraints and affinities -// and returns a device request. -func sharedDeviceRequest(name string, count uint64, - constraints []*structs.Constraint, affinities []*structs.Affinity, firstAvailable []*structs.DeviceOption) *structs.RequestedDevice { - return &structs.RequestedDevice{ - Name: name, - Count: count, - Constraints: constraints, - Affinities: affinities, - FirstAvailable: firstAvailable, - } -} - // devNode returns a node containing two devices, an nvidia gpu and an intel // FPGA. func devNode() *structs.Node { @@ -946,18 +933,17 @@ func TestDeviceAllocator_Allocate_SharedDevices(t *testing.T) { count: 1, }, { - name: "if present, shareDevices must match device", + name: "if shareDevices enabled, device must be sharable", deviceName: "nvidia/gpu", deviceID: SharedDeviceId0.ID, - shareDevices: &structs.ShareDevices{Enabled: false}, + shareDevices: &structs.ShareDevices{Enabled: true}, count: 1, - expectedErr: "no devices match request", }, { - name: "if present, gpu_id must match device", + name: "if present, SharedDeviceID must match allocated device", deviceName: "nvidia/gpu", deviceID: SharedDeviceId0.ID, - shareDevices: &structs.ShareDevices{Enabled: false, SharedDeviceId: SharedDeviceId1.ID}, + shareDevices: &structs.ShareDevices{Enabled: true, SharedDeviceId: SharedDeviceId1.ID}, count: 1, expectedErr: "no devices match request", }, @@ -978,7 +964,12 @@ func TestDeviceAllocator_Allocate_SharedDevices(t *testing.T) { RTarget: tc.deviceID, }, } - ask := sharedDeviceRequest(tc.deviceName, tc.count, testConstraints, nil, tc.shareDevices) + ask := &structs.RequestedDevice{ + Name: tc.deviceName, + Count: tc.count, + Constraints: testConstraints, + ShareDevices: tc.shareDevices, + } out, _, _, err := d.createOffer(mem, ask) if tc.expectedErr != "" { @@ -996,6 +987,9 @@ func TestDeviceAllocator_Allocate_SharedDevices(t *testing.T) { if tc.shareDevices != nil { must.MapContainsKey(t, out.WillShare, out.DeviceIDs[0]) + if tc.shareDevices.SharedDeviceId != "" { + must.SliceContains(t, out.DeviceIDs, tc.shareDevices.SharedDeviceId) + } } }) From 919955fa759f7dbe58df83a31aa25816bd86f275 Mon Sep 17 00:00:00 2001 From: Tehut Getahun Date: Thu, 11 Jun 2026 09:16:41 -0700 Subject: [PATCH 6/7] remove mock-nvidia customization from file plugin --- plugins/device/cmd/example/cmd/main.go | 5 +- plugins/device/cmd/example/device.go | 345 ++++++------------------- 2 files changed, 75 insertions(+), 275 deletions(-) diff --git a/plugins/device/cmd/example/cmd/main.go b/plugins/device/cmd/example/cmd/main.go index 593427b74c0..15f9c496c5d 100644 --- a/plugins/device/cmd/example/cmd/main.go +++ b/plugins/device/cmd/example/cmd/main.go @@ -4,8 +4,6 @@ package main import ( - "context" - log "github.com/hashicorp/go-hclog" "github.com/hashicorp/nomad/plugins" @@ -19,6 +17,5 @@ func main() { // factory returns a new instance of our example device plugin func factory(log log.Logger) interface{} { - ctx := context.Background() - return example.NewNvidiaDevice(ctx, log) + return example.NewExampleDevice(log) } diff --git a/plugins/device/cmd/example/device.go b/plugins/device/cmd/example/device.go index 67be6763bc3..60d1d43ca06 100644 --- a/plugins/device/cmd/example/device.go +++ b/plugins/device/cmd/example/device.go @@ -5,7 +5,6 @@ package example import ( "context" - "errors" "fmt" "io/ioutil" "os" @@ -13,87 +12,41 @@ import ( "sync" "time" - "github.com/hashicorp/go-hclog" - "github.com/hashicorp/nomad/helper/pluginutils/loader" + log "github.com/hashicorp/go-hclog" "github.com/hashicorp/nomad/plugins/base" "github.com/hashicorp/nomad/plugins/device" "github.com/hashicorp/nomad/plugins/shared/hclspec" "github.com/hashicorp/nomad/plugins/shared/structs" - "github.com/hashicorp/nomad/version" + "github.com/kr/pretty" "google.golang.org/grpc/codes" "google.golang.org/grpc/status" ) const ( // pluginName is the name of the plugin - pluginName = "nvidia-example" + pluginName = "example-fs-device" // vendor is the vendor providing the devices - vendor = "nvidia" + vendor = "nomad" // deviceType is the type of device being returned - deviceType = device.DeviceTypeGPU + deviceType = "file" - // notAvailable value is returned to nomad server in case some properties were - // undetected by nvml driver - notAvailable = "N/A" - - // Nvidia-container-runtime environment variable names - NvidiaVisibleDevices = "NVIDIA_VISIBLE_DEVICES" - - // MPS runtime environment variables - MpsPipeDirectoryKey = "MPS_PIPE_DIRECTORY" - MpsLogDirectoryKey = "MPS_LOG_DIRECTORY" - CustomMpsUserKey = "MPS_USER" - - DefaultMpsSockFileAddr = "control" - - deviceName1 = "T4" - deviceName2 = "T4" - deviceName3 = "P100" - deviceName4 = "P100" - deviceName5 = "P100" - deviceName6 = "A2" - deviceName7 = "A2" + // deviceName is the name of the devices being exposed + deviceName = "mock" ) var ( - // PluginID is the nvidia plugin metadata registered in the plugin - // catalog. - PluginID = loader.PluginID{ - Name: pluginName, - PluginType: base.PluginTypeDevice, - } - - // PluginConfig is the nvidia factory function registered in the - // plugin catalog. - PluginConfig = &loader.InternalPluginConfig{ - Factory: func(ctx context.Context, l hclog.Logger) interface{} { return NewNvidiaDevice(ctx, l) }, - } - // pluginInfo describes the plugin pluginInfo = &base.PluginInfoResponse{ Type: base.PluginTypeDevice, PluginApiVersions: []string{device.ApiVersion010}, - PluginVersion: version.Version, + PluginVersion: "v0.1.0", Name: pluginName, } // configSpec is the specification of the plugin's configuration configSpec = hclspec.NewObject(map[string]*hclspec.Spec{ - "enabled": hclspec.NewDefault( - hclspec.NewAttr("enabled", "bool", false), - hclspec.NewLiteral("true"), - ), - - "ignored_gpu_ids": hclspec.NewDefault( - hclspec.NewAttr("ignored_gpu_ids", "list(string)", false), - hclspec.NewLiteral("[]"), - ), - "fingerprint_period": hclspec.NewDefault( - hclspec.NewAttr("fingerprint_period", "string", false), - hclspec.NewLiteral("\"1m\""), - ), "dir": hclspec.NewDefault( hclspec.NewAttr("dir", "string", false), hclspec.NewLiteral("\".\""), @@ -106,64 +59,22 @@ var ( hclspec.NewAttr("unhealthy_perm", "string", false), hclspec.NewLiteral("\"-rwxrwxrwx\""), ), - "mps": hclspec.NewBlock("mps", false, - hclspec.NewObject(map[string]*hclspec.Spec{ - "enabled": hclspec.NewAttr("enabled", "bool", true), - "mps_user": hclspec.NewAttr("mps_user", "string", false), - "mps_pipe_directory": hclspec.NewAttr("mps_pipe_directory", "string", false), - "mps_log_directory": hclspec.NewAttr("mps_log_directory", "string", false), - "mps_sock_addr": hclspec.NewAttr("mps_sock_addr", "string", false), - "device_specific_mps_config": hclspec.NewBlockList("device_specific_mps_config", - hclspec.NewObject(map[string]*hclspec.Spec{ - "uuid": hclspec.NewAttr("uuid", "string", true), - "mps_pipe_directory": hclspec.NewAttr("mps_pipe_directory", "string", true), - "mps_log_directory": hclspec.NewAttr("mps_log_directory", "string", true), - }), - ), - }), - ), }) - - dSlice = []string{deviceName1, deviceName2, deviceName3, deviceName4, deviceName5, deviceName6, deviceName7} ) // Config contains configuration information for the plugin. type Config struct { - Enabled bool `codec:"enabled"` - IgnoredGPUIDs []string `codec:"ignored_gpu_ids"` - FingerprintPeriod string `codec:"fingerprint_period"` - MpsConfig *MpsConfig `codec:"mps"` - Dir string `codec:"dir"` - ListPeriod string `codec:"list_period"` - UnhealthyPerm string `codec:"unhealthy_perm"` -} - -// MpsConfig contains configuration for mps sharing -type MpsConfig struct { - MpsUser string `codec:"mps_user"` - MpsSockFile string `codec:"mps_sock_addr"` - MpsPipeDirectory string `codec:"mps_pipe_directory"` - MpsLogDirectory string `codec:"mps_log_directory"` - DeviceConfig []DeviceMpsConfig `codec:"device_specific_mps_config"` - DeviceMpsConfig map[string]DeviceMpsConfig + Dir string `codec:"dir"` + ListPeriod string `codec:"list_period"` + UnhealthyPerm string `codec:"unhealthy_perm"` } -// DeviceMpsConfig contains configuration GPU level mps sharing -type DeviceMpsConfig struct { - UUID string `codec:"uuid"` - MpsPipeDirectory string `codec:"mps_pipe_directory"` - MpsLogDirectory string `codec:"mps_log_directory"` -} - -type NvidiaDevice struct { - // enabled indicates whether the plugin should be enabled - enabled bool - - //nvmlClient nvml.NvmlClient - - // initErr holds an error retrieved during - // nvmlClient initialization - //initErr error +// FsDevice is an example device plugin. The device plugin exposes files as +// devices and periodically polls the directory for new files. If a file has a +// given file permission, it is considered unhealthy. This device plugin is +// purely for use as an example. +type FsDevice struct { + logger log.Logger // deviceDir is the directory we expose as devices deviceDir string @@ -175,104 +86,37 @@ type NvidiaDevice struct { // devices listPeriod time.Duration - // ignoredGPUIDs is a set of UUIDs that would not be exposed to nomad - ignoredGPUIDs map[string]struct{} - - // fingerprintPeriod is how often we should call nvml to get list of devices - //fingerprintPeriod time.Duration - - //MpsConfig holds a pointer to the MPS configuration - MpsConfig *MpsConfig - - // devices is the set of detected eligible devices - devices map[string]device.Shared + // devices is the set of detected devices and maps whether they are healthy + devices map[string]bool deviceLock sync.RWMutex - - logger hclog.Logger } -// NewNvidiaDevice returns a new nvidia device plugin. -func NewNvidiaDevice(_ context.Context, log hclog.Logger) *NvidiaDevice { - //nvmlClient, err := nvml.NewNvmlClient() - logger := log.Named(pluginName) - return &NvidiaDevice{ - logger: logger, - devices: make(map[string]device.Shared), - ignoredGPUIDs: make(map[string]struct{}), +// NewExampleDevice returns a new example device plugin. +func NewExampleDevice(log log.Logger) *FsDevice { + return &FsDevice{ + logger: log.Named(pluginName), + devices: make(map[string]bool), } } // PluginInfo returns information describing the plugin. -func (d *NvidiaDevice) PluginInfo() (*base.PluginInfoResponse, error) { +func (d *FsDevice) PluginInfo() (*base.PluginInfoResponse, error) { return pluginInfo, nil } // ConfigSchema returns the plugins configuration schema. -func (d *NvidiaDevice) ConfigSchema() (*hclspec.Spec, error) { +func (d *FsDevice) ConfigSchema() (*hclspec.Spec, error) { return configSpec, nil } -func checkAndSetDefault(c string, d string) string { - if config := c; config != "" { - return c - } - return d -} - // SetConfig is used to set the configuration of the plugin. -func (d *NvidiaDevice) SetConfig(cfg *base.Config) error { +func (d *FsDevice) SetConfig(c *base.Config) error { var config Config - if len(cfg.PluginConfig) != 0 { - if err := base.MsgPackDecode(cfg.PluginConfig, &config); err != nil { - return err - } - } - d.enabled = config.Enabled - // set MPS config values - if config.MpsConfig != nil { - d.MpsConfig = &MpsConfig{} - d.logger.Info("not nil", "config", fmt.Sprintf("%+v", config.MpsConfig)) - // ensure only global or device specific config are set - if (config.MpsConfig.MpsPipeDirectory != "" || config.MpsConfig.MpsLogDirectory != "") && - len(config.MpsConfig.DeviceMpsConfig) != 0 { - return errors.New("only global mps variables or device_specific_mps_config block may be set ") - } - // set straightforward value on device - d.MpsConfig.MpsUser = checkAndSetDefault(config.MpsConfig.MpsUser, "unset") - d.MpsConfig.MpsSockFile = checkAndSetDefault(config.MpsConfig.MpsSockFile, DefaultMpsSockFileAddr) - - // if present set device specific mps config, otherwise set top level config - if len(config.MpsConfig.DeviceMpsConfig) != 0 { - - // build map of device UUIDs to config - deviceConfigMap := make(map[string]DeviceMpsConfig, len(config.MpsConfig.DeviceMpsConfig)) - for _, devConfig := range config.MpsConfig.DeviceMpsConfig { - deviceConfigMap[devConfig.UUID] = DeviceMpsConfig{ - UUID: devConfig.UUID, - MpsPipeDirectory: devConfig.MpsPipeDirectory, - MpsLogDirectory: devConfig.MpsLogDirectory, - } - } - // set device specific mpsConfig - d.MpsConfig.DeviceMpsConfig = deviceConfigMap - } else { - // set top level mps directories if no device specific config - // we have defaults so always use config values - d.MpsConfig.MpsPipeDirectory = config.MpsConfig.MpsPipeDirectory - d.MpsConfig.MpsLogDirectory = config.MpsConfig.MpsLogDirectory - if pipe_dir := config.MpsConfig.MpsPipeDirectory; pipe_dir != "" { - d.MpsConfig.MpsPipeDirectory = pipe_dir - } else { - d.MpsConfig.MpsPipeDirectory = checkAndSetDefault(config.MpsConfig.MpsPipeDirectory, "/tmp/nvidia-mps") - - d.MpsConfig.MpsLogDirectory = checkAndSetDefault(config.MpsConfig.MpsLogDirectory, "/var/log/nvidia-mps") - } - } - } - for _, ignoredGPUId := range config.IgnoredGPUIDs { - d.ignoredGPUIDs[ignoredGPUId] = struct{}{} + if err := base.MsgPackDecode(c.PluginConfig, &config); err != nil { + return err } + // Save the device directory and the unhealthy permissions d.deviceDir = config.Dir d.unhealthyPerm = config.UnhealthyPerm @@ -283,14 +127,16 @@ func (d *NvidiaDevice) SetConfig(cfg *base.Config) error { } d.listPeriod = period + d.logger.Debug("test debug") + d.logger.Info("config set", "config", log.Fmt("% #v", pretty.Formatter(config))) return nil } // Fingerprint streams detected devices. If device changes are detected or the // devices health changes, messages will be emitted. -func (d *NvidiaDevice) Fingerprint(ctx context.Context) (<-chan *device.FingerprintResponse, error) { - if !d.enabled { - return nil, device.ErrPluginDisabled +func (d *FsDevice) Fingerprint(ctx context.Context) (<-chan *device.FingerprintResponse, error) { + if d.deviceDir == "" { + return nil, status.New(codes.Internal, "device directory not set in config").Err() } outCh := make(chan *device.FingerprintResponse) @@ -299,7 +145,7 @@ func (d *NvidiaDevice) Fingerprint(ctx context.Context) (<-chan *device.Fingerpr } // fingerprint is the long running goroutine that detects hardware -func (d *NvidiaDevice) fingerprint(ctx context.Context, devices chan *device.FingerprintResponse) { +func (d *FsDevice) fingerprint(ctx context.Context, devices chan *device.FingerprintResponse) { defer close(devices) // Create a timer that will fire immediately for the first detection @@ -313,7 +159,7 @@ func (d *NvidiaDevice) fingerprint(ctx context.Context, devices chan *device.Fin ticker.Reset(d.listPeriod) } - d.logger.Info("scanning for changes") + d.logger.Trace("scanning for changes") files, err := ioutil.ReadDir(d.deviceDir) if err != nil { @@ -321,136 +167,97 @@ func (d *NvidiaDevice) fingerprint(ctx context.Context, devices chan *device.Fin devices <- device.NewFingerprintError(err) return } - deviceGroups := make([]*device.DeviceGroup, 0) - //all := make([]*device.Device, len(dSlice)) - shared, inactive := d.diffFiles(files) - for k, v := range shared { - deviceGroups = append(deviceGroups, d.getDeviceGroup([]*device.Device{v}, dSlice[k], "shared")) - } - for k, v := range inactive { - deviceGroups = append(deviceGroups, d.getDeviceGroup([]*device.Device{v}, dSlice[k], "inactive")) + detected := d.diffFiles(files) + if len(detected) == 0 { + continue } - //if len(inactive) != 0 { - //deviceGroups = append(deviceGroups, d.getDeviceGroup(inactive)) - //} - - //if len(shared) != 0 { - // deviceGroups = append(deviceGroups, d.getDeviceGroup(shared)) - //} - d.logger.Info("files to fingerprint", "inactive files", len(inactive), "active files", len(shared)) - devices <- device.NewFingerprint(deviceGroups...) + devices <- device.NewFingerprint(getDeviceGroup(detected)) } } -func (d *NvidiaDevice) diffFiles(files []os.FileInfo) ([]*device.Device, []*device.Device) { + +func (d *FsDevice) diffFiles(files []os.FileInfo) []*device.Device { d.deviceLock.Lock() defer d.deviceLock.Unlock() // Build an unhealthy message unhealthyDesc := fmt.Sprintf("Device has bad permissions %q", d.unhealthyPerm) - //var changes bool + var changes bool fnames := make(map[string]struct{}) for _, f := range files { name := f.Name() fnames[name] = struct{}{} if f.IsDir() { - d.logger.Info("skipping directory", "directory", name) + d.logger.Trace("skipping directory", "directory", name) continue } // Determine the health perms := f.Mode().Perm().String() - //turn health into sharing status - healthBool := perms != d.unhealthyPerm - var healthy string - if healthBool { - healthy = device.SharingActive.String() - } else { - healthy = device.SharingInactive.String() - } + healthy := perms != d.unhealthyPerm + d.logger.Trace("checking health", "file perm", perms, "unhealthy perms", d.unhealthyPerm, "healthy", healthy) - // See if we already have the device + // See if we alreay have the device oldHealth, ok := d.devices[name] - if ok && oldHealth.String() == healthy { + if ok && oldHealth == healthy { continue } // Health has changed or we have a new object - //changes = true - - d.devices[name] = device.Shared(healthy) + changes = true + d.devices[name] = healthy } for id := range d.devices { if _, ok := fnames[id]; !ok { delete(d.devices, id) - //changes = true + changes = true } } - // Build the devices - shared := make([]*device.Device, 0, len(d.devices)) - inactive := make([]*device.Device, 0, len(d.devices)) + // Nothing to do + if !changes { + return nil + } + // Build the devices + detected := make([]*device.Device, 0, len(d.devices)) for name, healthy := range d.devices { var desc string - if healthy != device.SharingActive { + if !healthy { desc = unhealthyDesc - inactive = append(inactive, &device.Device{ - ID: name, - Shared: healthy, - HealthDesc: desc, - Healthy: true, - }) - continue } - shared = append(shared, &device.Device{ + + detected = append(detected, &device.Device{ ID: name, - Shared: healthy, - HealthDesc: "healthy", - Healthy: true, + Healthy: healthy, + HealthDesc: desc, }) } - return shared, inactive + return detected } // getDeviceGroup is a helper to build the DeviceGroup given a set of devices. -func (d *NvidiaDevice) getDeviceGroup(devices []*device.Device, name string, isShared string) *device.DeviceGroup { - //d.logger.Error("getDeviceGroup", "device count", len(devices)) - //var ( - // shared string - //) - //shared = devices[0].Shared.String() - //for _, v := range devices { - // if shared == "" { - // shared = v.Shared.String() - // } - // //dName = dSlice[n] - // //d.logger.Error("getDeviceGroup", "loop", n, "deviceID", v.ID, "dSlice length", len(dSlice)) - //} +func getDeviceGroup(devices []*device.Device) *device.DeviceGroup { return &device.DeviceGroup{ Vendor: vendor, Type: deviceType, - Name: name, + Name: deviceName, Devices: devices, Attributes: map[string]*structs.Attribute{ - "shared": { - String: &isShared, - }, - "model": { - String: &name, + "cool-attribute": { + String: new("attribute-wearing-sunglasses"), }, }, } - } // Reserve returns information on how to mount the given devices. -func (d *NvidiaDevice) Reserve(deviceIDs []string) (*device.ContainerReservation, error) { +func (d *FsDevice) Reserve(deviceIDs []string) (*device.ContainerReservation, error) { if len(deviceIDs) == 0 { return nil, status.New(codes.InvalidArgument, "no device ids given").Err() } @@ -461,16 +268,12 @@ func (d *NvidiaDevice) Reserve(deviceIDs []string) (*device.ContainerReservation } resp := &device.ContainerReservation{} - containerEnvs := make(map[string]string) + for _, id := range deviceIDs { // Check if the device is known if _, ok := d.devices[id]; !ok { return nil, status.Newf(codes.InvalidArgument, "unknown device %q", id).Err() } - if d.devices[id] == device.SharingActive { - containerEnvs[MpsPipeDirectoryKey] = d.MpsConfig.MpsPipeDirectory - containerEnvs[MpsLogDirectoryKey] = d.MpsConfig.MpsLogDirectory - } // Add a mount resp.Mounts = append(resp.Mounts, &device.Mount{ @@ -484,14 +287,14 @@ func (d *NvidiaDevice) Reserve(deviceIDs []string) (*device.ContainerReservation } // Stats streams statistics for the detected devices. -func (d *NvidiaDevice) Stats(ctx context.Context, interval time.Duration) (<-chan *device.StatsResponse, error) { +func (d *FsDevice) Stats(ctx context.Context, interval time.Duration) (<-chan *device.StatsResponse, error) { outCh := make(chan *device.StatsResponse) go d.stats(ctx, outCh, interval) return outCh, nil } // stats is the long running goroutine that streams device statistics -func (d *NvidiaDevice) stats(ctx context.Context, stats chan *device.StatsResponse, interval time.Duration) { +func (d *FsDevice) stats(ctx context.Context, stats chan *device.StatsResponse, interval time.Duration) { defer close(stats) // Create a timer that will fire immediately for the first detection @@ -522,7 +325,7 @@ func (d *NvidiaDevice) stats(ctx context.Context, stats chan *device.StatsRespon } } -func (d *NvidiaDevice) collectStats() (*device.DeviceGroupStats, error) { +func (d *FsDevice) collectStats() (*device.DeviceGroupStats, error) { d.deviceLock.RLock() defer d.deviceLock.RUnlock() l := len(d.devices) @@ -534,7 +337,7 @@ func (d *NvidiaDevice) collectStats() (*device.DeviceGroupStats, error) { group := &device.DeviceGroupStats{ Vendor: vendor, Type: deviceType, - Name: deviceName1, + Name: deviceName, InstanceStats: make(map[string]*device.DeviceStats, l), } From 19b0850b4aed96489f60013e978bd8b3ae04a971 Mon Sep 17 00:00:00 2001 From: Tehut Getahun Date: Thu, 11 Jun 2026 19:39:14 -0700 Subject: [PATCH 7/7] device constraints pass without this --- nomad/structs/constraint.go | 1 - nomad/structs/constraint_test.go | 5 +++++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/nomad/structs/constraint.go b/nomad/structs/constraint.go index e84389d3903..d48fe1fe17f 100644 --- a/nomad/structs/constraint.go +++ b/nomad/structs/constraint.go @@ -25,7 +25,6 @@ var ( "${attr.", "${device.", "${meta.", - "${device.attr.", } ) diff --git a/nomad/structs/constraint_test.go b/nomad/structs/constraint_test.go index 716ef167dfd..524faea8d5f 100644 --- a/nomad/structs/constraint_test.go +++ b/nomad/structs/constraint_test.go @@ -87,6 +87,11 @@ func TestValidateConstraintTarget(t *testing.T) { inputTarget: "${device.type}", expectedErrorMsg: "", }, + { + name: "valid device.attr", + inputTarget: "${device.attr.model}", + expectedErrorMsg: "", + }, { name: "missing closing brace", inputTarget: "${node.datacenter",