From 6cc76f6bbef0778d31841af4d63ead60210fc49d Mon Sep 17 00:00:00 2001 From: ghostloda <78798447@qq.con> Date: Thu, 15 May 2025 10:34:43 +0800 Subject: [PATCH] fix: Update Prometheus query for Ascend NPU device health status Signed-off-by: ghostloda <78798447@qq.com> --- server/internal/provider/ascend/provider.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/server/internal/provider/ascend/provider.go b/server/internal/provider/ascend/provider.go index 6c84d83a..efe8fbed 100644 --- a/server/internal/provider/ascend/provider.go +++ b/server/internal/provider/ascend/provider.go @@ -43,7 +43,7 @@ type DeviceMeta struct { func (a *Ascend) GetDevicesFromPrometheus(node *corev1.Node) map[string]*util.DeviceInfo { device := make(map[string]*util.DeviceInfo) - queryString := fmt.Sprintf("npu_chip_info_health_status{node=\"%s\"}", node.Name) + queryString := fmt.Sprintf("npu_chip_info_health_status{} * on(namespace, pod) group_left(node) kube_pod_info{node=\"%s\"}", node.Name) vs, err := a.prom.Query(context.Background(), queryString) if err != nil { a.log.Warnf("query %s failed", queryString)