diff --git a/charts/topograph/values.schema.json b/charts/topograph/values.schema.json index 09466be5..2338c563 100644 --- a/charts/topograph/values.schema.json +++ b/charts/topograph/values.schema.json @@ -50,6 +50,10 @@ "type": "string", "description": "Scheduler-output engine. Must match a registered engine in pkg/registry/registry.go.", "enum": ["graph", "k8s", "slinky", "slurm"] + }, + "params": { + "type": "object", + "description": "Engine-specific parameters. For slinky, useGpuCliqueLabel=true reads nvidia.com/gpu.clique as the topology/block domain source." } }, "required": ["name"] diff --git a/charts/topograph/values.slinky.block-example.yaml b/charts/topograph/values.slinky.block-example.yaml index 41b270e3..44ae23f5 100644 --- a/charts/topograph/values.slinky.block-example.yaml +++ b/charts/topograph/values.slinky.block-example.yaml @@ -20,6 +20,7 @@ global: app.kubernetes.io/component: compute plugin: topology/block blockSizes: [4] + # useGpuCliqueLabel: true topologyConfigPath: topology.conf topologyConfigmapName: slurm-config useDynamicNodes: false diff --git a/charts/topograph/values.slinky.ib.block-example.yaml b/charts/topograph/values.slinky.ib.block-example.yaml new file mode 100644 index 00000000..ebee9993 --- /dev/null +++ b/charts/topograph/values.slinky.ib.block-example.yaml @@ -0,0 +1,67 @@ +# Default values for topology-generator. +# This is a YAML-formatted file. +# Declare variables to be passed into your templates. + +global: + provider: + # name: "aws", "oci", "gcp", "nebius", "netq", "infiniband-k8s", "dra" or "test" + name: infiniband-k8s + params: + nodeSelector: + slurmCluster: my-cluster + useGpuCliqueLabel: true + engine: + name: slinky + params: + namespace: slurm + nodeSelector: + slurmCluster: my-cluster + podSelector: + matchLabels: + app.kubernetes.io/component: compute + plugin: topology/block + blockSizes: [4] + useGpuCliqueLabel: true + topologyConfigPath: topology.conf + topologyConfigmapName: slurm-config + +nodeSelector: + dedicated: user-workload + +tolerations: + - operator: Exists + +node-observer: + nodeSelector: + dedicated: user-workload + topograph: + trigger: + podSelector: + matchLabels: + app.kubernetes.io/component: compute + +node-data-broker: + image: + repository: ghcr.io/nvidia/topograph/ib + pullPolicy: IfNotPresent + tag: main + initc: + enabled: true + verbosity: 4 + securityContext: + privileged: true + nodeSelector: + node.dgxc.nvidia.com/has-gpu: "true" + volumeMounts: + - name: sys-class-volume + mountPath: /sys/class + volumes: + - name: sys-class-volume + hostPath: + path: /sys/class + type: Directory + tolerations: + - key: "nvidia.com/gpu" + operator: "Equal" + value: "present" + effect: "NoSchedule" diff --git a/charts/topograph/values.slinky.partition-example.yaml b/charts/topograph/values.slinky.partition-example.yaml index bce30344..67ede8e6 100644 --- a/charts/topograph/values.slinky.partition-example.yaml +++ b/charts/topograph/values.slinky.partition-example.yaml @@ -44,6 +44,7 @@ global: clusterDefault: true topologyConfigPath: topology.conf topologyConfigmapName: slurm-config + # useGpuCliqueLabel: true useDynamicNodes: true configUpdateMode: skeleton-only diff --git a/charts/topograph/values.yaml b/charts/topograph/values.yaml index bb181ee9..7b79a78c 100644 --- a/charts/topograph/values.yaml +++ b/charts/topograph/values.yaml @@ -14,6 +14,10 @@ global: engine: # name: "k8s", "slinky", "slurm" or "graph" name: k8s + # params: + # # For slinky topology/block output, use the GPU Operator's existing + # # Kubernetes node label as the block-domain source. + # useGpuCliqueLabel: true service: type: ClusterIP diff --git a/docs/api.md b/docs/api.md index 1c1e8636..47a02834 100644 --- a/docs/api.md +++ b/docs/api.md @@ -92,6 +92,7 @@ Topograph exposes three endpoints for interacting with the service. Below are th - **nodeSelector**: (optional) Used in: [`k8s`, `slinky`]. A Kubernetes node label map that filters which nodes participate in topology generation. - **topologyConfigmapName**: Used in: [`slinky`]. The required name of the ConfigMap containing the topology config. - **useDynamicNodes**: (optional) Used in: [`slinky`]. If `true`, Kubernetes nodes matched by the Node Selector will be annotated with the topology spec. + - **useGpuCliqueLabel**: (optional) Used in: [`slinky`]. If `true`, `topology/block` domains are built from the GPU Operator's `nvidia.com/gpu.clique` node label instead of provider accelerator-domain data. - **configUpdateMode**: (optional) Used in: [`slinky`]. By default, the full topology YAML is written in the Slurm ConfigMap. `skeleton-only` overrides to include switches or blocks only (no node lines); `none` skips updating the topology key in the ConfigMap. - **nodes**: (optional) Supplies the cluster nodes used for topology generation as an array of regions mapping instance IDs to node names. diff --git a/docs/engines/slinky.md b/docs/engines/slinky.md index e6bc62a8..15f9ee73 100644 --- a/docs/engines/slinky.md +++ b/docs/engines/slinky.md @@ -76,6 +76,30 @@ global: clusterDefault: true # no podSelector, no nodes → scontrol fallback ``` +### Using `nvidia.com/gpu.clique` for block topology + +On MNNVL Kubernetes clusters, the NVIDIA GPU Operator can label nodes with `nvidia.com/gpu.clique`. When `useGpuCliqueLabel` is enabled, the Slinky engine uses that label as the source for `topology/block` domains instead of the accelerator domains returned by the provider. This is useful with cloud API providers whose `InstanceTopology.AcceleratorID` describes a broader provider domain than the GPU Operator clique label. + +The option only affects block topology. Tree topology still comes from the selected provider, and the engine still maps Kubernetes nodes to Slurm nodes through the configured slurmd pod selector. + +```yaml +global: + engine: + name: slinky + params: + namespace: ns-slinky + podSelector: + matchLabels: + app.kubernetes.io/component: compute + plugin: topology/block + blockSizes: [8, 16] + topologyConfigmapName: slurm-config + topologyConfigPath: topology.conf + useGpuCliqueLabel: true +``` + +If `useGpuCliqueLabel` is enabled for a block topology and no matching nodes have the `nvidia.com/gpu.clique` label plus the Topograph instance annotation, topology generation fails with a `502` error instead of falling back to provider accelerator domains. + ## ConfigMap Annotations Slinky automatically adds metadata annotations to managed ConfigMaps for improved observability: diff --git a/docs/reference/node-labels.md b/docs/reference/node-labels.md index 8f0665a8..593ed45c 100644 --- a/docs/reference/node-labels.md +++ b/docs/reference/node-labels.md @@ -33,7 +33,7 @@ Not all providers produce both topology types: | `infiniband-bm` | Yes (`ClusterUUID.CliqueId`) | Yes (IB switch hierarchy) | | `infiniband-k8s` | Yes (`ClusterUUID.CliqueId`) | Yes (IB switch hierarchy) | -**Relationship to `nvidia.com/gpu.clique`**: The GPU Operator device plugin sets `nvidia.com/gpu.clique` on nodes with Multi-Node NVLink (MNNVL) GPUs. The k8s engine treats that label as authoritative when present and does not write Topograph's configured accelerator label for that node, regardless of whether the selected provider also returned an accelerator domain from API data. For `infiniband-k8s`, setting `global.provider.params.useGpuCliqueLabel: true` also makes the provider read that existing node label instead of collecting the same value through `nvidia-smi`. The `netq` provider uses a `DomainUUID` from the NMX management API — a different identifier that refers to the same physical domain but cannot be compared as a string. +**Relationship to `nvidia.com/gpu.clique`**: The GPU Operator device plugin sets `nvidia.com/gpu.clique` on nodes with Multi-Node NVLink (MNNVL) GPUs. The k8s engine treats that label as authoritative when present and does not write Topograph's configured accelerator label for that node, regardless of whether the selected provider also returned an accelerator domain from API data. For Slinky block topology, setting `global.engine.params.useGpuCliqueLabel: true` makes the Slinky engine build `topology/block` domains from `nvidia.com/gpu.clique` instead of provider accelerator-domain data. For `infiniband-k8s`, setting `global.provider.params.useGpuCliqueLabel: true` also makes the provider read that existing node label instead of collecting the same value through `nvidia-smi`. The `netq` provider uses a `DomainUUID` from the NMX management API — a different identifier that refers to the same physical domain but cannot be compared as a string. [NVIDIA Fabric Manager](https://docs.nvidia.com/datacenter/tesla/fabric-manager-user-guide/) runs at node init on MNNVL-capable hardware, discovers the NVLink fabric across GPUs, and registers each GPU with [NVML](https://docs.nvidia.com/deploy/nvml-api/) (NVIDIA Management Library — a C API that exposes per-GPU state). The GPU Operator's IMEX labeler writes `nvidia.com/gpu.clique` only once NVML reports the node's fabric state as `GPU_FABRIC_STATE_COMPLETED` — meaning Fabric Manager finished initialization successfully and the node is part of an NVLink domain. diff --git a/pkg/engines/slinky/engine.go b/pkg/engines/slinky/engine.go index 0e80c5bf..57b89e2a 100644 --- a/pkg/engines/slinky/engine.go +++ b/pkg/engines/slinky/engine.go @@ -58,6 +58,11 @@ type SlinkyEngine struct { params *Params } +type clusterNodes struct { + nodes *corev1.NodeList + nodeMap map[string]string +} + type Params struct { slurm.BaseParams `mapstructure:",squash"` // Namespace specifies the namespace where Slinky cluster is deployed @@ -72,6 +77,9 @@ type Params struct { ConfigPath string `mapstructure:"topologyConfigPath"` // UseDynamicNodes specifies whether to use dynamic nodes for reporting: true or false UseDynamicNodes bool `mapstructure:"useDynamicNodes" default:"false"` + // UseGPUCliqueLabel uses the GPU Operator's nvidia.com/gpu.clique node label + // as the block-domain source for topology/block output. + UseGPUCliqueLabel bool `mapstructure:"useGpuCliqueLabel"` // ConfigUpdateMode specifies the mode for updating the slurm config: valid values {"none", "skeleton-only"} ConfigUpdateMode string `mapstructure:"configUpdateMode,omitempty"` // Topologies specifies per-partition topology configuration @@ -172,24 +180,27 @@ func isEmptySelector(sel *metav1.LabelSelector) bool { } func (eng *SlinkyEngine) GetComputeInstances(ctx context.Context, _ any) ([]topology.ComputeInstances, *httperr.Error) { - - nodes, nodeMap, err := eng.getClusterNodes(ctx) + clusterNodes, err := eng.getClusterNodes(ctx) if err != nil { return nil, err } - return getComputeInstances(nodes, nodeMap) + return getComputeInstances(clusterNodes.nodes, clusterNodes.nodeMap) } -func (eng *SlinkyEngine) getClusterNodes(ctx context.Context) (*corev1.NodeList, map[string]string, *httperr.Error) { +// getClusterNodes returns the Kubernetes nodes selected for topology generation +// and a map from Kubernetes node name to Slurm node name. The mapping is built +// from Ready slurmd pods in the configured namespace and pod selector, using the +// slurm.node.name label when present and falling back to pod.spec.hostname. +func (eng *SlinkyEngine) getClusterNodes(ctx context.Context) (*clusterNodes, *httperr.Error) { nodes, err := k8s.GetNodes(ctx, eng.client, eng.params.nodeListOpt) if err != nil { - return nil, nil, httperr.NewError(http.StatusBadGateway, err.Error()) + return nil, httperr.NewError(http.StatusBadGateway, err.Error()) } pods, err := eng.client.CoreV1().Pods(eng.params.Namespace).List(ctx, *eng.params.podListOpt) if err != nil { - return nil, nil, httperr.NewError(http.StatusBadGateway, + return nil, httperr.NewError(http.StatusBadGateway, fmt.Sprintf("failed to list SLURM pods in the cluster: %v", err)) } @@ -208,7 +219,10 @@ func (eng *SlinkyEngine) getClusterNodes(ctx context.Context) (*corev1.NodeList, klog.V(4).Infof("Mapping k8s node %s to SLURM node %s", pod.Spec.NodeName, host) nodeMap[pod.Spec.NodeName] = host } - return nodes, nodeMap, nil + return &clusterNodes{ + nodes: nodes, + nodeMap: nodeMap, + }, nil } func getComputeInstances(nodes *corev1.NodeList, nodeMap map[string]string) ([]topology.ComputeInstances, *httperr.Error) { @@ -246,6 +260,64 @@ func getComputeInstances(nodes *corev1.NodeList, nodeMap map[string]string) ([]t return cis, nil } +func withGPUCliqueDomains(graph *topology.Graph, clusterNodes *clusterNodes) (*topology.Graph, *httperr.Error) { + domains := topology.NewDomainMap() + for _, node := range clusterNodes.nodes.Items { + slurmName, ok := clusterNodes.nodeMap[node.Name] + if !ok || slurmName == "" { + klog.V(4).Infof("Skipping node %s as it does not have a corresponding SLURM name", node.Name) + continue + } + + gpuClique := strings.TrimSpace(node.Labels[topology.KeyNvidiaGPUClique]) + if gpuClique == "" { + continue + } + + instance, ok := node.Annotations[topology.KeyNodeInstance] + if !ok { + klog.Warningf("missing %q annotation in node %s", topology.KeyNodeInstance, node.Name) + continue + } + + domains.AddHost(gpuClique, instance, slurmName) + } + + if len(domains) == 0 { + return nil, httperr.NewError(http.StatusBadGateway, + fmt.Sprintf("useGpuCliqueLabel=true but no matching nodes found; check label %q and annotation %q", + topology.KeyNvidiaGPUClique, topology.KeyNodeInstance)) + } + + if graph == nil { + graph = &topology.Graph{} + } else { + cloned := *graph + graph = &cloned + } + graph.Domains = domains + + return graph, nil +} + +func usesBlockTopology(cfg *translate.Config) bool { + if cfg == nil { + return false + } + + if cfg.Plugin == topology.TopologyBlock { + return true + } + + for _, spec := range cfg.Topologies { + if spec != nil && spec.Plugin == topology.TopologyBlock { + return true + } + } + + return false +} + // generateConfigMapAnnotations creates metadata annotations for ConfigMaps func (eng *SlinkyEngine) generateConfigMapAnnotations() map[string]string { annotations := map[string]string{ @@ -283,6 +355,27 @@ func (eng *SlinkyEngine) GenerateOutput(ctx context.Context, graph *topology.Gra return nil, httperr.NewError(http.StatusInternalServerError, err.Error()) } + var clusterNodeData *clusterNodes + loadClusterNodes := func() (*clusterNodes, *httperr.Error) { + if clusterNodeData != nil { + return clusterNodeData, nil + } + var httpErr *httperr.Error + clusterNodeData, httpErr = eng.getClusterNodes(ctx) + return clusterNodeData, httpErr + } + + if p.UseGPUCliqueLabel && usesBlockTopology(cfg) { + clusterNodeData, httpErr := loadClusterNodes() + if httpErr != nil { + return nil, httpErr + } + graph, httpErr = withGPUCliqueDomains(graph, clusterNodeData) + if httpErr != nil { + return nil, httpErr + } + } + nt, err := translate.NewNetworkTopology(graph, cfg) if err != nil { return nil, httperr.NewError(http.StatusBadRequest, err.Error()) @@ -306,7 +399,11 @@ func (eng *SlinkyEngine) GenerateOutput(ctx context.Context, graph *topology.Gra // For dynamic mode, perform reconciliation using the latest topology information from the provider (root) and the cluster (nodes and their annotations) if p.UseDynamicNodes { - httpErr := eng.performReconciliation(ctx, nt, topologies) + clusterNodeData, httpErr := loadClusterNodes() + if httpErr != nil { + return nil, httpErr + } + httpErr = eng.performReconciliation(ctx, nt, topologies, clusterNodeData) if httpErr != nil { return nil, httpErr } @@ -466,17 +563,11 @@ func (eng *SlinkyEngine) getPartitionNodes(ctx context.Context, partition string return "", fmt.Errorf("no running pods with labels %v", labels) } -func (eng *SlinkyEngine) performReconciliation(ctx context.Context, nt *translate.NetworkTopology, topologies []*translate.TopologyUnit) *httperr.Error { - - nodes, nodeMap, err := eng.getClusterNodes(ctx) - if err != nil { - return err - } - +func (eng *SlinkyEngine) performReconciliation(ctx context.Context, nt *translate.NetworkTopology, topologies []*translate.TopologyUnit, clusterNodes *clusterNodes) *httperr.Error { // Update node annotations based on the desired topology and the current cluster state. // This will trigger Slinky to reconfigure the nodes accordingly. - for _, node := range nodes.Items { - slurmName, ok := nodeMap[node.Name] + for _, node := range clusterNodes.nodes.Items { + slurmName, ok := clusterNodes.nodeMap[node.Name] if !ok { klog.V(4).Infof("Skipping node %s as it does not have a corresponding SLURM name", node.Name) continue diff --git a/pkg/engines/slinky/engine_test.go b/pkg/engines/slinky/engine_test.go index 9d56ec54..a1bbd694 100644 --- a/pkg/engines/slinky/engine_test.go +++ b/pkg/engines/slinky/engine_test.go @@ -33,6 +33,7 @@ import ( "github.com/NVIDIA/topograph/pkg/engines/slurm" "github.com/NVIDIA/topograph/pkg/models" "github.com/NVIDIA/topograph/pkg/topology" + "github.com/NVIDIA/topograph/pkg/translate" ) func TestGetParameters(t *testing.T) { @@ -202,6 +203,24 @@ func TestGetParameters(t *testing.T) { nodeListOpt: &metav1.ListOptions{LabelSelector: "key=value"}, }, }, + { + name: "Case 10: use GPU clique label", + params: map[string]any{ + topology.KeyNamespace: "namespace", + topology.KeyPodSelector: podSelector, + topology.KeyTopoConfigPath: "path", + topology.KeyTopoConfigmapName: "name", + "useGpuCliqueLabel": true, + }, + ret: &Params{ + Namespace: "namespace", + PodSelector: labelSelector, + ConfigPath: "path", + ConfigMapName: "name", + UseGPUCliqueLabel: true, + podListOpt: &metav1.ListOptions{LabelSelector: "key=value"}, + }, + }, } for _, tc := range testCases { @@ -281,6 +300,223 @@ func TestGetComputeInstances(t *testing.T) { } } +func TestWithGPUCliqueDomains(t *testing.T) { + ctx := context.Background() + client := fake.NewSimpleClientset() + + nodes := []*corev1.Node{ + { + ObjectMeta: metav1.ObjectMeta{ + Name: "k8s-node-0", + Labels: map[string]string{topology.KeyNvidiaGPUClique: "clique-a"}, + Annotations: map[string]string{topology.KeyNodeInstance: "instance-0"}, + }, + }, + { + ObjectMeta: metav1.ObjectMeta{ + Name: "k8s-node-1", + Labels: map[string]string{topology.KeyNvidiaGPUClique: " clique-b "}, + Annotations: map[string]string{topology.KeyNodeInstance: "instance-1"}, + }, + }, + { + ObjectMeta: metav1.ObjectMeta{ + Name: "k8s-node-no-instance", + Labels: map[string]string{topology.KeyNvidiaGPUClique: "clique-c"}, + Annotations: map[string]string{}, + }, + }, + { + ObjectMeta: metav1.ObjectMeta{ + Name: "k8s-node-no-pod", + Labels: map[string]string{topology.KeyNvidiaGPUClique: "clique-d"}, + Annotations: map[string]string{topology.KeyNodeInstance: "instance-3"}, + }, + }, + } + for _, node := range nodes { + _, err := client.CoreV1().Nodes().Create(ctx, node, metav1.CreateOptions{}) + require.NoError(t, err) + } + + for _, pod := range []*corev1.Pod{ + makeReadySlurmdPod("pod-0", "k8s-node-0", "slurm-0"), + makeReadySlurmdPod("pod-1", "k8s-node-1", "slurm-1"), + makeReadySlurmdPod("pod-no-instance", "k8s-node-no-instance", "slurm-no-instance"), + } { + _, err := client.CoreV1().Pods("test-ns").Create(ctx, pod, metav1.CreateOptions{}) + require.NoError(t, err) + } + + existingDomains := topology.NewDomainMap() + existingDomains.AddHost("provider-domain", "provider-instance", "provider-node") + graph := &topology.Graph{ + Tiers: &topology.Vertex{ID: "root"}, + Domains: existingDomains, + } + eng := &SlinkyEngine{ + client: client, + params: &Params{ + Namespace: "test-ns", + podListOpt: &metav1.ListOptions{LabelSelector: "app=slinky"}, + }, + } + + clusterNodes, httpErr := eng.getClusterNodes(ctx) + require.Nil(t, httpErr) + got, httpErr := withGPUCliqueDomains(graph, clusterNodes) + require.Nil(t, httpErr) + require.NotSame(t, graph, got) + require.Same(t, graph.Tiers, got.Tiers) + require.Equal(t, topology.DomainMap{ + "clique-a": map[string]string{"slurm-0": "instance-0"}, + "clique-b": map[string]string{"slurm-1": "instance-1"}, + }, got.Domains) + require.Equal(t, topology.DomainMap{ + "provider-domain": map[string]string{"provider-node": "provider-instance"}, + }, graph.Domains) +} + +func TestWithGPUCliqueDomainsNoMatchingNodes(t *testing.T) { + ctx := context.Background() + client := fake.NewSimpleClientset() + + _, err := client.CoreV1().Nodes().Create(ctx, &corev1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Name: "k8s-node-0", + Annotations: map[string]string{topology.KeyNodeInstance: "instance-0"}, + }, + }, metav1.CreateOptions{}) + require.NoError(t, err) + + _, err = client.CoreV1().Pods("test-ns").Create(ctx, makeReadySlurmdPod("pod-0", "k8s-node-0", "slurm-0"), metav1.CreateOptions{}) + require.NoError(t, err) + + eng := &SlinkyEngine{ + client: client, + params: &Params{ + Namespace: "test-ns", + podListOpt: &metav1.ListOptions{LabelSelector: "app=slinky"}, + }, + } + + clusterNodes, httpErr := eng.getClusterNodes(ctx) + require.Nil(t, httpErr) + got, httpErr := withGPUCliqueDomains(&topology.Graph{}, clusterNodes) + require.Nil(t, got) + require.ErrorContains(t, httpErr, "useGpuCliqueLabel=true but no matching nodes found") +} + +func TestGenerateOutputUsesGPUCliqueDomains(t *testing.T) { + ctx := context.Background() + client := fake.NewSimpleClientset() + + for _, node := range []*corev1.Node{ + { + ObjectMeta: metav1.ObjectMeta{ + Name: "k8s-node-0", + Labels: map[string]string{topology.KeyNvidiaGPUClique: "clique-a"}, + Annotations: map[string]string{topology.KeyNodeInstance: "instance-0"}, + }, + }, + { + ObjectMeta: metav1.ObjectMeta{ + Name: "k8s-node-1", + Labels: map[string]string{topology.KeyNvidiaGPUClique: "clique-b"}, + Annotations: map[string]string{topology.KeyNodeInstance: "instance-1"}, + }, + }, + } { + _, err := client.CoreV1().Nodes().Create(ctx, node, metav1.CreateOptions{}) + require.NoError(t, err) + } + + for _, pod := range []*corev1.Pod{ + makeReadySlurmdPod("pod-0", "k8s-node-0", "alpha"), + makeReadySlurmdPod("pod-1", "k8s-node-1", "beta"), + } { + _, err := client.CoreV1().Pods("test-ns").Create(ctx, pod, metav1.CreateOptions{}) + require.NoError(t, err) + } + + providerDomains := topology.NewDomainMap() + providerDomains.AddHost("provider-domain", "instance-0", "alpha") + providerDomains.AddHost("provider-domain", "instance-1", "beta") + + eng := &SlinkyEngine{ + client: client, + params: &Params{ + BaseParams: slurm.BaseParams{ + Plugin: topology.TopologyBlock, + BlockSizes: []int{1}, + }, + Namespace: "test-ns", + ConfigMapName: "slurm-config", + ConfigPath: "topology.conf", + UseGPUCliqueLabel: true, + podListOpt: &metav1.ListOptions{LabelSelector: "app=slinky"}, + }, + } + + result, httpErr := eng.GenerateOutput(ctx, &topology.Graph{Domains: providerDomains}, nil) + require.Nil(t, httpErr) + require.Equal(t, []byte("OK\n"), result) + + cm, err := client.CoreV1().ConfigMaps("test-ns").Get(ctx, "slurm-config", metav1.GetOptions{}) + require.NoError(t, err) + require.Equal(t, `# block001=clique-a +BlockName=block001 Nodes=alpha +# block002=clique-b +BlockName=block002 Nodes=beta +BlockSizes=1 +`, cm.Data["topology.conf"]) +} + +func TestUsesBlockTopology(t *testing.T) { + require.False(t, usesBlockTopology(nil)) + require.False(t, usesBlockTopology(&translate.Config{Plugin: topology.TopologyTree})) + require.True(t, usesBlockTopology(&translate.Config{Plugin: topology.TopologyBlock})) + require.True(t, usesBlockTopology(&translate.Config{ + Topologies: map[string]*translate.TopologySpec{ + "block": {Plugin: topology.TopologyBlock}, + }, + })) + require.False(t, usesBlockTopology(&translate.Config{ + Topologies: map[string]*translate.TopologySpec{ + "flat": {Plugin: topology.TopologyFlat}, + "nil": nil, + }, + })) +} + +func makeReadySlurmdPod(name, nodeName, slurmName string) *corev1.Pod { + return &corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: name, + Namespace: "test-ns", + Labels: map[string]string{ + "app": "slinky", + topology.KeySlurmNodeName: slurmName, + }, + }, + Spec: corev1.PodSpec{ + NodeName: nodeName, + Containers: []corev1.Container{ + {Name: "test", Image: "test"}, + }, + }, + Status: corev1.PodStatus{ + Phase: corev1.PodRunning, + Conditions: []corev1.PodCondition{ + { + Type: corev1.PodReady, + Status: corev1.ConditionTrue, + }, + }, + }, + } +} + // Helper for annotation checks func requireAnnotation(t *testing.T, annotations map[string]string, key, expected string) { val, ok := annotations[key] diff --git a/tests/charts/values.slinky.ib.block-example.yaml.golden.yaml b/tests/charts/values.slinky.ib.block-example.yaml.golden.yaml new file mode 100644 index 00000000..360523f4 --- /dev/null +++ b/tests/charts/values.slinky.ib.block-example.yaml.golden.yaml @@ -0,0 +1,495 @@ +--- +# Source: topograph/charts/node-data-broker/templates/serviceaccount.yaml +apiVersion: v1 +kind: ServiceAccount +metadata: + name: chart-ci-node-data-broker + labels: + helm.sh/chart: node-data-broker-0.4.0 + app.kubernetes.io/name: node-data-broker + app.kubernetes.io/instance: chart-ci + app.kubernetes.io/version: "v0.4.0" + app.kubernetes.io/managed-by: Helm +automountServiceAccountToken: true +--- +# Source: topograph/charts/node-observer/templates/serviceaccount.yaml +apiVersion: v1 +kind: ServiceAccount +metadata: + name: chart-ci-node-observer + labels: + helm.sh/chart: node-observer-0.4.0 + app.kubernetes.io/name: node-observer + app.kubernetes.io/instance: chart-ci + app.kubernetes.io/version: "v0.4.0" + app.kubernetes.io/managed-by: Helm +automountServiceAccountToken: true +--- +# Source: topograph/templates/serviceaccount.yaml +apiVersion: v1 +kind: ServiceAccount +metadata: + name: chart-ci-topograph + labels: + helm.sh/chart: topograph-0.4.0 + app.kubernetes.io/name: topograph + app.kubernetes.io/instance: chart-ci + app.kubernetes.io/version: "v0.4.0" + app.kubernetes.io/managed-by: Helm +automountServiceAccountToken: true +--- +# Source: topograph/charts/node-observer/templates/configmap.yml +apiVersion: v1 +kind: ConfigMap +metadata: + name: chart-ci-node-observer + labels: + helm.sh/chart: node-observer-0.4.0 + app.kubernetes.io/name: node-observer + app.kubernetes.io/instance: chart-ci + app.kubernetes.io/version: "v0.4.0" + app.kubernetes.io/managed-by: Helm +data: + node-observer-config.yaml: |- + generateTopologyUrl: "http://chart-ci.topograph.svc.cluster.local:49021/v1/generate" + provider: + name: infiniband-k8s + params: + nodeSelector: + slurmCluster: my-cluster + useGpuCliqueLabel: true + engine: + name: slinky + params: + blockSizes: + - 4 + namespace: slurm + nodeSelector: + slurmCluster: my-cluster + plugin: topology/block + podSelector: + matchLabels: + app.kubernetes.io/component: compute + topologyConfigPath: topology.conf + topologyConfigmapName: slurm-config + useGpuCliqueLabel: true + trigger: + podSelector: + matchLabels: + app.kubernetes.io/component: compute +--- +# Source: topograph/templates/configmap.yml +apiVersion: v1 +kind: ConfigMap +metadata: + name: chart-ci-topograph + labels: + helm.sh/chart: topograph-0.4.0 + app.kubernetes.io/name: topograph + app.kubernetes.io/instance: chart-ci + app.kubernetes.io/version: "v0.4.0" + app.kubernetes.io/managed-by: Helm +data: + topograph-config.yaml: |- + http: + port: 49021 + ssl: false + requestAggregationDelay: 15s +--- +# Source: topograph/charts/node-data-broker/templates/rbac.yaml +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: chart-ci-node-data-broker +rules: +- apiGroups: [""] + resources: [nodes] + verbs: [get,list,update] +--- +# Source: topograph/charts/node-observer/templates/rbac.yaml +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: chart-ci-node-observer +rules: +- apiGroups: [""] + resources: [nodes,pods] + verbs: [get,list,watch] +--- +# Source: topograph/templates/rbac.yaml +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: chart-ci-topograph +rules: +- apiGroups: [""] + resources: [pods] + verbs: [get,list] +- apiGroups: [""] + resources: [pods/exec] + verbs: [create] +- apiGroups: [""] + resources: [nodes] + verbs: [get,list,update] +- apiGroups: [apps] + resources: [daemonsets] + verbs: [get,list] +- apiGroups: [""] + resources: [configmaps] + verbs: [create,get,list,update] +--- +# Source: topograph/charts/node-data-broker/templates/rbac.yaml +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: chart-ci-node-data-broker +subjects: +- kind: ServiceAccount + name: chart-ci-node-data-broker + namespace: topograph + apiGroup: "" +roleRef: + kind: ClusterRole + name: chart-ci-node-data-broker + apiGroup: "" +--- +# Source: topograph/charts/node-observer/templates/rbac.yaml +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: chart-ci-node-observer +subjects: +- kind: ServiceAccount + name: chart-ci-node-observer + namespace: topograph + apiGroup: "" +roleRef: + kind: ClusterRole + name: chart-ci-node-observer + apiGroup: "" +--- +# Source: topograph/templates/rbac.yaml +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: chart-ci-topograph +subjects: +- kind: ServiceAccount + name: chart-ci-topograph + namespace: topograph + apiGroup: "" +roleRef: + kind: ClusterRole + name: chart-ci-topograph + apiGroup: "" +--- +# Source: topograph/templates/service.yaml +apiVersion: v1 +kind: Service +metadata: + name: chart-ci-topograph + labels: + helm.sh/chart: topograph-0.4.0 + app.kubernetes.io/name: topograph + app.kubernetes.io/instance: chart-ci + app.kubernetes.io/version: "v0.4.0" + app.kubernetes.io/managed-by: Helm +spec: + type: ClusterIP + ports: + - port: 49021 + targetPort: http + protocol: TCP + name: http + selector: + app.kubernetes.io/name: topograph + app.kubernetes.io/instance: chart-ci +--- +# Source: topograph/charts/node-data-broker/templates/daemonset.yaml +apiVersion: apps/v1 +kind: DaemonSet +metadata: + name: chart-ci-node-data-broker + labels: + helm.sh/chart: node-data-broker-0.4.0 + app.kubernetes.io/name: node-data-broker + app.kubernetes.io/instance: chart-ci + app.kubernetes.io/version: "v0.4.0" + app.kubernetes.io/managed-by: Helm +spec: + selector: + matchLabels: + app.kubernetes.io/name: node-data-broker + app.kubernetes.io/instance: chart-ci + template: + metadata: + labels: + helm.sh/chart: node-data-broker-0.4.0 + app.kubernetes.io/name: node-data-broker + app.kubernetes.io/instance: chart-ci + app.kubernetes.io/version: "v0.4.0" + app.kubernetes.io/managed-by: Helm + spec: + serviceAccountName: chart-ci-node-data-broker + securityContext: + {} + initContainers: + - name: init-node-labels + image: "ghcr.io/nvidia/topograph:v0.4.0" + imagePullPolicy: IfNotPresent + command: + - /usr/local/bin/node-data-broker-initc + args: + - --provider=infiniband-k8s + - -v=4 + - --set=useGpuCliqueLabel=true + env: + - name: NODE_NAME + valueFrom: + fieldRef: + fieldPath: spec.nodeName + volumeMounts: + - mountPath: /sys/class + name: sys-class-volume + containers: + - name: node-data-broker + securityContext: + privileged: true + image: "ghcr.io/nvidia/topograph/ib:main" + imagePullPolicy: IfNotPresent + command: + + - tail + - -f + - /dev/null + resources: + limits: + cpu: 100m + memory: 128Mi + requests: + cpu: 100m + memory: 128Mi + volumeMounts: + - mountPath: /sys/class + name: sys-class-volume + volumes: + - hostPath: + path: /sys/class + type: Directory + name: sys-class-volume + nodeSelector: + node.dgxc.nvidia.com/has-gpu: "true" + tolerations: + - effect: NoSchedule + key: nvidia.com/gpu + operator: Equal + value: present +--- +# Source: topograph/charts/node-observer/templates/deployment.yaml +apiVersion: apps/v1 +kind: Deployment +metadata: + name: chart-ci-node-observer + labels: + helm.sh/chart: node-observer-0.4.0 + app.kubernetes.io/name: node-observer + app.kubernetes.io/instance: chart-ci + app.kubernetes.io/version: "v0.4.0" + app.kubernetes.io/managed-by: Helm +spec: + replicas: 1 + selector: + matchLabels: + app.kubernetes.io/name: node-observer + app.kubernetes.io/instance: chart-ci + template: + metadata: + labels: + helm.sh/chart: node-observer-0.4.0 + app.kubernetes.io/name: node-observer + app.kubernetes.io/instance: chart-ci + app.kubernetes.io/version: "v0.4.0" + app.kubernetes.io/managed-by: Helm + spec: + serviceAccountName: chart-ci-node-observer + securityContext: + {} + initContainers: + - name: wait + image: curlimages/curl:8.13.0 + imagePullPolicy: IfNotPresent + command: + - sh + args: + - -c + - | + until curl -sf "http://chart-ci.topograph.svc.cluster.local:49021/healthz" ; do + echo "Waiting for topograph to start ..." + sleep 2 + done + containers: + - name: node-observer + securityContext: + {} + image: "ghcr.io/nvidia/topograph:v0.4.0" + imagePullPolicy: IfNotPresent + command: + - /usr/local/bin/node-observer + args: + - -v=3 + resources: + limits: + cpu: 400m + memory: 512Mi + requests: + cpu: 250m + memory: 256Mi + volumeMounts: + - name: config-volume + mountPath: /etc/topograph + volumes: + - name: config-volume + configMap: + defaultMode: 420 + name: chart-ci-node-observer + nodeSelector: + dedicated: user-workload +--- +# Source: topograph/templates/deployment.yaml +apiVersion: apps/v1 +kind: Deployment +metadata: + name: chart-ci-topograph + labels: + helm.sh/chart: topograph-0.4.0 + app.kubernetes.io/name: topograph + app.kubernetes.io/instance: chart-ci + app.kubernetes.io/version: "v0.4.0" + app.kubernetes.io/managed-by: Helm +spec: + replicas: 1 + selector: + matchLabels: + app.kubernetes.io/name: topograph + app.kubernetes.io/instance: chart-ci + template: + metadata: + labels: + helm.sh/chart: topograph-0.4.0 + app.kubernetes.io/name: topograph + app.kubernetes.io/instance: chart-ci + app.kubernetes.io/version: "v0.4.0" + app.kubernetes.io/managed-by: Helm + spec: + serviceAccountName: chart-ci-topograph + securityContext: + {} + containers: + - name: topograph + securityContext: + {} + image: "ghcr.io/nvidia/topograph:v0.4.0" + imagePullPolicy: IfNotPresent + command: + - /usr/local/bin/topograph + args: + - -v=3 + env: + - name: NODE_DATA_BROKER_NAME + value: chart-ci-node-data-broker + - name: NODE_DATA_BROKER_NAMESPACE + value: topograph + ports: + - name: http + containerPort: 49021 + protocol: TCP + livenessProbe: + httpGet: + path: /healthz + port: http + readinessProbe: + httpGet: + path: /healthz + port: http + resources: + limits: + cpu: 400m + memory: 512Mi + requests: + cpu: 250m + memory: 256Mi + volumeMounts: + - name: config-volume + mountPath: /etc/topograph + volumes: + - name: config-volume + configMap: + defaultMode: 420 + name: chart-ci-topograph + nodeSelector: + dedicated: user-workload + tolerations: + - operator: Exists +--- +# Source: topograph/templates/tests/test-healthz.yaml +apiVersion: v1 +kind: Pod +metadata: + name: "chart-ci-topograph-test-healthz" + labels: + helm.sh/chart: topograph-0.4.0 + app.kubernetes.io/name: topograph + app.kubernetes.io/instance: chart-ci + app.kubernetes.io/version: "v0.4.0" + app.kubernetes.io/managed-by: Helm + annotations: + "helm.sh/hook": test + "helm.sh/hook-delete-policy": before-hook-creation,hook-succeeded +spec: + restartPolicy: Never + securityContext: + {} + containers: + - name: healthz + securityContext: + {} + image: "ghcr.io/nvidia/topograph:v0.4.0" + imagePullPolicy: IfNotPresent + command: ["sh", "-c"] + args: + - | + set -eu + wget -q -O /dev/null -T 10 \ + "http://chart-ci-topograph:49021/healthz" +--- +# Source: topograph/templates/tests/test-metrics.yaml +apiVersion: v1 +kind: Pod +metadata: + name: "chart-ci-topograph-test-metrics" + labels: + helm.sh/chart: topograph-0.4.0 + app.kubernetes.io/name: topograph + app.kubernetes.io/instance: chart-ci + app.kubernetes.io/version: "v0.4.0" + app.kubernetes.io/managed-by: Helm + annotations: + "helm.sh/hook": test + "helm.sh/hook-delete-policy": before-hook-creation,hook-succeeded +spec: + restartPolicy: Never + securityContext: + {} + containers: + - name: metrics + securityContext: + {} + image: "ghcr.io/nvidia/topograph:v0.4.0" + imagePullPolicy: IfNotPresent + command: ["sh", "-c"] + args: + - | + set -eu + body=$(wget -q -O - -T 10 \ + "http://chart-ci-topograph:49021/metrics") + echo "$body" | grep -q '^topograph_version'