From f2d19ebe0a6fac570bea724a21b52f1c7d94f2b5 Mon Sep 17 00:00:00 2001 From: nate stephany Date: Tue, 10 Mar 2026 09:20:18 +0100 Subject: [PATCH] ocp4_workload_nvidia_gpu_operator: Wait for ClusterPolicy CRD before applying The install_operator role considers installation complete when the CSV reaches Succeeded state. However, the NVIDIA GPU Operator registers its ClusterPolicy CRD after the CSV succeeds, causing a race condition where the ClusterPolicy task fails to resolve nvidia.com/v1.ClusterPolicy. Add an explicit wait for clusterpolicies.nvidia.com CRD availability (30 retries x 10s = 5 min) before applying the ClusterPolicy resource. --- .../tasks/workload.yml | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/roles/ocp4_workload_nvidia_gpu_operator/tasks/workload.yml b/roles/ocp4_workload_nvidia_gpu_operator/tasks/workload.yml index 1e55735..e23b3a6 100644 --- a/roles/ocp4_workload_nvidia_gpu_operator/tasks/workload.yml +++ b/roles/ocp4_workload_nvidia_gpu_operator/tasks/workload.yml @@ -32,6 +32,16 @@ install_operator_catalogsource_image: "{{ ocp4_workload_nvidia_gpu_operator_catalog_snapshot_image }}" install_operator_catalogsource_image_tag: "{{ ocp4_workload_nvidia_gpu_operator_catalog_snapshot_image_tag }}" +- name: Wait for NVIDIA GPU Operator ClusterPolicy CRD to be available + kubernetes.core.k8s_info: + api_version: apiextensions.k8s.io/v1 + kind: CustomResourceDefinition + name: clusterpolicies.nvidia.com + register: r_clusterpolicy_crd + retries: 30 + delay: 10 + until: r_clusterpolicy_crd.resources | length > 0 + - name: Setup NVIDIA GPU Cluster Policy kubernetes.core.k8s: state: present