From efbc004e49f5a7b33b121bfa080c50bd624ecc6d Mon Sep 17 00:00:00 2001 From: Martin Gencur Date: Thu, 25 Jun 2026 11:04:18 +0200 Subject: [PATCH 1/4] test(healthcheck): add unit tests for AWS identity provider validation Extract validateAWSIdentityProvider from awsHealthCheckIdentityProvider to enable testing with a mock EC2 client. Add test cases covering DescribeVpcEndpoints error paths (WebIdentityErr, other API errors, non-API errors) and the success path that were previously untested. Signed-off-by: Martin Gencur Co-Authored-By: Claude Opus 4.6 --- .../controllers/healthcheck/aws.go | 5 + .../controllers/healthcheck/aws_test.go | 127 +++++++++++++++--- 2 files changed, 115 insertions(+), 17 deletions(-) diff --git a/control-plane-operator/controllers/healthcheck/aws.go b/control-plane-operator/controllers/healthcheck/aws.go index a99463c0a392..552f206c58c7 100644 --- a/control-plane-operator/controllers/healthcheck/aws.go +++ b/control-plane-operator/controllers/healthcheck/aws.go @@ -7,6 +7,7 @@ import ( hyperv1 "github.com/openshift/hypershift/api/hypershift/v1beta1" "github.com/openshift/hypershift/control-plane-operator/controllers/hostedcontrolplane" + "github.com/openshift/hypershift/support/awsapi" "github.com/aws/aws-sdk-go-v2/service/ec2" "github.com/aws/smithy-go" @@ -33,6 +34,10 @@ func awsHealthCheckIdentityProvider(ctx context.Context, hcp *hyperv1.HostedCont } ec2Client, _ := hostedcontrolplane.GetEC2Client(ctx) + return validateAWSIdentityProvider(ctx, hcp, ec2Client) +} + +func validateAWSIdentityProvider(ctx context.Context, hcp *hyperv1.HostedControlPlane, ec2Client awsapi.EC2API) error { if ec2Client == nil { // EC2 client is not available (token minting may have failed) condition := metav1.Condition{ diff --git a/control-plane-operator/controllers/healthcheck/aws_test.go b/control-plane-operator/controllers/healthcheck/aws_test.go index c7c21330745a..67e6e703ef1f 100644 --- a/control-plane-operator/controllers/healthcheck/aws_test.go +++ b/control-plane-operator/controllers/healthcheck/aws_test.go @@ -1,34 +1,42 @@ package healthcheck import ( + "fmt" "testing" hyperv1 "github.com/openshift/hypershift/api/hypershift/v1beta1" + "github.com/openshift/hypershift/support/awsapi" + + "github.com/aws/aws-sdk-go-v2/service/ec2" + "github.com/aws/smithy-go" "k8s.io/apimachinery/pkg/api/meta" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + + "go.uber.org/mock/gomock" ) // TestAWSHealthCheckIdentityProviderConditionLogic tests the condition setting logic -// when KAS is not available. This validates that the function correctly sets the -// ValidAWSIdentityProvider condition to Unknown when it cannot perform validation. +// for the ValidAWSIdentityProvider condition across different code paths. func TestAWSHealthCheckIdentityProviderConditionLogic(t *testing.T) { testCases := []struct { name string kasCondition *metav1.Condition + setupEC2Mock func(*gomock.Controller) awsapi.EC2API expectedStatus metav1.ConditionStatus expectedReason string expectedMessage string + expectError bool }{ { - name: "KAS not available - condition missing", + name: "When KAS condition is missing, it should set ValidAWSIdentityProvider to Unknown", kasCondition: nil, expectedStatus: metav1.ConditionUnknown, expectedReason: hyperv1.StatusUnknownReason, expectedMessage: "Cannot validate AWS identity provider while KubeAPIServer is not available", }, { - name: "KAS not available - condition False", + name: "When KAS condition is False, it should set ValidAWSIdentityProvider to Unknown", kasCondition: &metav1.Condition{ Type: string(hyperv1.KubeAPIServerAvailable), Status: metav1.ConditionFalse, @@ -38,7 +46,7 @@ func TestAWSHealthCheckIdentityProviderConditionLogic(t *testing.T) { expectedMessage: "Cannot validate AWS identity provider while KubeAPIServer is not available", }, { - name: "KAS not available - condition Unknown", + name: "When KAS condition is Unknown, it should set ValidAWSIdentityProvider to Unknown", kasCondition: &metav1.Condition{ Type: string(hyperv1.KubeAPIServerAvailable), Status: metav1.ConditionUnknown, @@ -48,7 +56,7 @@ func TestAWSHealthCheckIdentityProviderConditionLogic(t *testing.T) { expectedMessage: "Cannot validate AWS identity provider while KubeAPIServer is not available", }, { - name: "KAS available", + name: "When KAS is available but EC2 client is nil, it should set ValidAWSIdentityProvider to Unknown", kasCondition: &metav1.Condition{ Type: string(hyperv1.KubeAPIServerAvailable), Status: metav1.ConditionTrue, @@ -57,11 +65,83 @@ func TestAWSHealthCheckIdentityProviderConditionLogic(t *testing.T) { expectedReason: hyperv1.StatusUnknownReason, expectedMessage: "AWS EC2 client is not available", }, + { + name: "When DescribeVpcEndpoints returns WebIdentityErr, it should set ValidAWSIdentityProvider to False", + kasCondition: &metav1.Condition{ + Type: string(hyperv1.KubeAPIServerAvailable), + Status: metav1.ConditionTrue, + }, + setupEC2Mock: func(ctrl *gomock.Controller) awsapi.EC2API { + m := awsapi.NewMockEC2API(ctrl) + m.EXPECT().DescribeVpcEndpoints(gomock.Any(), gomock.Any()). + Return(nil, &smithy.GenericAPIError{ + Code: "WebIdentityErr", + Message: "some AWS request details", + }) + return m + }, + expectedStatus: metav1.ConditionFalse, + expectedReason: hyperv1.InvalidIdentityProvider, + expectedMessage: "WebIdentityErr", + expectError: true, + }, + { + name: "When DescribeVpcEndpoints returns a non-WebIdentityErr API error, it should set ValidAWSIdentityProvider to Unknown", + kasCondition: &metav1.Condition{ + Type: string(hyperv1.KubeAPIServerAvailable), + Status: metav1.ConditionTrue, + }, + setupEC2Mock: func(ctrl *gomock.Controller) awsapi.EC2API { + m := awsapi.NewMockEC2API(ctrl) + m.EXPECT().DescribeVpcEndpoints(gomock.Any(), gomock.Any()). + Return(nil, &smithy.GenericAPIError{ + Code: "UnauthorizedAccess", + Message: "access denied", + }) + return m + }, + expectedStatus: metav1.ConditionUnknown, + expectedReason: hyperv1.AWSErrorReason, + expectedMessage: "UnauthorizedAccess", + expectError: true, + }, + { + name: "When DescribeVpcEndpoints returns a non-API error, it should set ValidAWSIdentityProvider to Unknown", + kasCondition: &metav1.Condition{ + Type: string(hyperv1.KubeAPIServerAvailable), + Status: metav1.ConditionTrue, + }, + setupEC2Mock: func(ctrl *gomock.Controller) awsapi.EC2API { + m := awsapi.NewMockEC2API(ctrl) + m.EXPECT().DescribeVpcEndpoints(gomock.Any(), gomock.Any()). + Return(nil, fmt.Errorf("network timeout")) + return m + }, + expectedStatus: metav1.ConditionUnknown, + expectedReason: hyperv1.StatusUnknownReason, + expectedMessage: "network timeout", + expectError: true, + }, + { + name: "When DescribeVpcEndpoints succeeds, it should set ValidAWSIdentityProvider to True", + kasCondition: &metav1.Condition{ + Type: string(hyperv1.KubeAPIServerAvailable), + Status: metav1.ConditionTrue, + }, + setupEC2Mock: func(ctrl *gomock.Controller) awsapi.EC2API { + m := awsapi.NewMockEC2API(ctrl) + m.EXPECT().DescribeVpcEndpoints(gomock.Any(), gomock.Any()). + Return(&ec2.DescribeVpcEndpointsOutput{}, nil) + return m + }, + expectedStatus: metav1.ConditionTrue, + expectedReason: hyperv1.AsExpectedReason, + expectedMessage: hyperv1.AllIsWellMessage, + }, } for _, tc := range testCases { t.Run(tc.name, func(t *testing.T) { - // Create a test HCP hcp := &hyperv1.HostedControlPlane{ ObjectMeta: metav1.ObjectMeta{ Name: "test-hcp", @@ -73,37 +153,50 @@ func TestAWSHealthCheckIdentityProviderConditionLogic(t *testing.T) { }, } - // Add KAS condition if specified if tc.kasCondition != nil { meta.SetStatusCondition(&hcp.Status.Conditions, *tc.kasCondition) } - err := awsHealthCheckIdentityProvider(t.Context(), hcp) - if err != nil { - t.Errorf("Expected no error but got: %v", err) + kasAvailable := meta.FindStatusCondition(hcp.Status.Conditions, string(hyperv1.KubeAPIServerAvailable)) + if kasAvailable != nil && kasAvailable.Status == metav1.ConditionTrue { + var ec2Client awsapi.EC2API + if tc.setupEC2Mock != nil { + mockCtrl := gomock.NewController(t) + ec2Client = tc.setupEC2Mock(mockCtrl) + } + err := validateAWSIdentityProvider(t.Context(), hcp, ec2Client) + if tc.expectError && err == nil { + t.Error("expected error but got nil") + } + if !tc.expectError && err != nil { + t.Errorf("expected no error but got: %v", err) + } + } else { + err := awsHealthCheckIdentityProvider(t.Context(), hcp) + if err != nil { + t.Errorf("expected no error but got: %v", err) + } } - // Verify the ValidAWSIdentityProvider condition was set correctly condition := meta.FindStatusCondition(hcp.Status.Conditions, string(hyperv1.ValidAWSIdentityProvider)) if condition == nil { t.Fatal("ValidAWSIdentityProvider condition was not set") - return } if condition.Status != tc.expectedStatus { - t.Errorf("Expected status %v, got %v", tc.expectedStatus, condition.Status) + t.Errorf("expected status %v, got %v", tc.expectedStatus, condition.Status) } if condition.Reason != tc.expectedReason { - t.Errorf("Expected reason %v, got %v", tc.expectedReason, condition.Reason) + t.Errorf("expected reason %v, got %v", tc.expectedReason, condition.Reason) } if tc.expectedMessage != "" && condition.Message != tc.expectedMessage { - t.Errorf("Expected message %q, got %q", tc.expectedMessage, condition.Message) + t.Errorf("expected message %q, got %q", tc.expectedMessage, condition.Message) } if condition.ObservedGeneration != hcp.Generation { - t.Errorf("Expected ObservedGeneration %v, got %v", hcp.Generation, condition.ObservedGeneration) + t.Errorf("expected ObservedGeneration %v, got %v", hcp.Generation, condition.ObservedGeneration) } }) } From c404aaac96e4efbbc488cb4fdf2f7034eb3eb2d2 Mon Sep 17 00:00:00 2001 From: Martin Gencur Date: Thu, 25 Jun 2026 14:59:57 +0200 Subject: [PATCH 2/4] test(aws): add unit tests for identity provider deletion chain MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add tests for the AWS identity provider deletion path (OCP-60484): - TestCleanupAWSOIDCBucketData: S3 cleanup, error handling, NoSuchBucket tolerance - TestDeleteAWSEndpointServices: CPO finalizer removal for invalid creds and expired grace period - TestDeleteOrphanedMachines: AWSMachine finalizer cleanup based on credential status These tests exercise the condition chain: OIDC upload fails → no finalizer → cleanup no-op → GetCredentialStatus=Invalid → deleteAWSEndpointServices removes CPO finalizer → DeleteOrphanedMachines clears AWSMachine finalizers → deletion proceeds. Improve existing TestGetCredentialStatus: standardize test names to "it should" convention, replace &[]T{v}[0] with ptr.To(), and extract hostedClusterWithCredentialConditions helper to reduce duplication. Co-Authored-By: Claude Opus 4.6 --- .../aws_endpoint_services_test.go | 123 ++++++++++++ .../hostedcluster/aws_oidc_test.go | 136 +++++++++++++ .../internal/platform/aws/aws_test.go | 190 +++++++++++++++--- 3 files changed, 421 insertions(+), 28 deletions(-) create mode 100644 hypershift-operator/controllers/hostedcluster/aws_endpoint_services_test.go create mode 100644 hypershift-operator/controllers/hostedcluster/aws_oidc_test.go diff --git a/hypershift-operator/controllers/hostedcluster/aws_endpoint_services_test.go b/hypershift-operator/controllers/hostedcluster/aws_endpoint_services_test.go new file mode 100644 index 000000000000..4c7f5425d50b --- /dev/null +++ b/hypershift-operator/controllers/hostedcluster/aws_endpoint_services_test.go @@ -0,0 +1,123 @@ +package hostedcluster + +import ( + "testing" + "time" + + . "github.com/onsi/gomega" + + hyperv1 "github.com/openshift/hypershift/api/hypershift/v1beta1" + "github.com/openshift/hypershift/support/api" + + apierrors "k8s.io/apimachinery/pkg/api/errors" + "k8s.io/apimachinery/pkg/api/meta" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + + crclient "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/client/fake" +) + +func hostedClusterWithCredentialConditions(oidcStatus, awsIdpStatus metav1.ConditionStatus) *hyperv1.HostedCluster { + hc := &hyperv1.HostedCluster{ + ObjectMeta: metav1.ObjectMeta{Name: "test", Namespace: "clusters"}, + } + meta.SetStatusCondition(&hc.Status.Conditions, metav1.Condition{ + Type: string(hyperv1.ValidOIDCConfiguration), + Status: oidcStatus, + Reason: "test", + }) + meta.SetStatusCondition(&hc.Status.Conditions, metav1.Condition{ + Type: string(hyperv1.ValidAWSIdentityProvider), + Status: awsIdpStatus, + Reason: "test", + }) + return hc +} + +func TestDeleteAWSEndpointServices(t *testing.T) { + cpoFinalizer := "hypershift.openshift.io/control-plane-operator-finalizer" + namespace := "clusters-test" + + tests := []struct { + name string + hc *hyperv1.HostedCluster + endpoints []hyperv1.AWSEndpointService + expectErr bool + expectPending bool + expectFinalizerRemoved bool + }{ + { + name: "When endpoint is deleting with invalid creds, it should remove CPO finalizer", + hc: hostedClusterWithCredentialConditions(metav1.ConditionFalse, metav1.ConditionTrue), + endpoints: []hyperv1.AWSEndpointService{ + { + ObjectMeta: metav1.ObjectMeta{ + Name: "ep-1", + Namespace: namespace, + DeletionTimestamp: &metav1.Time{Time: time.Now().Add(-1 * time.Minute)}, + Finalizers: []string{cpoFinalizer}, + }, + }, + }, + expectPending: true, + expectFinalizerRemoved: true, + }, + { + name: "When endpoint is deleting with valid creds past grace period, it should remove CPO finalizer", + hc: hostedClusterWithCredentialConditions(metav1.ConditionTrue, metav1.ConditionTrue), + endpoints: []hyperv1.AWSEndpointService{ + { + ObjectMeta: metav1.ObjectMeta{ + Name: "ep-1", + Namespace: namespace, + DeletionTimestamp: &metav1.Time{Time: time.Now().Add(-15 * time.Minute)}, + Finalizers: []string{cpoFinalizer}, + }, + }, + }, + expectPending: true, + expectFinalizerRemoved: true, + }, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + g := NewGomegaWithT(t) + + objects := []crclient.Object{} + for i := range tc.endpoints { + objects = append(objects, &tc.endpoints[i]) + } + + fakeClient := fake.NewClientBuilder(). + WithScheme(api.Scheme). + WithObjects(objects...). + Build() + + pending, err := deleteAWSEndpointServices(t.Context(), fakeClient, tc.hc, namespace) + + if tc.expectErr { + g.Expect(err).To(HaveOccurred()) + } else { + g.Expect(err).ToNot(HaveOccurred()) + } + + g.Expect(pending).To(Equal(tc.expectPending)) + + if tc.expectFinalizerRemoved { + for _, ep := range tc.endpoints { + updatedEP := &hyperv1.AWSEndpointService{} + err := fakeClient.Get(t.Context(), crclient.ObjectKey{ + Namespace: ep.Namespace, + Name: ep.Name, + }, updatedEP) + if apierrors.IsNotFound(err) { + continue + } + g.Expect(err).ToNot(HaveOccurred()) + g.Expect(updatedEP.Finalizers).ToNot(ContainElement(cpoFinalizer)) + } + } + }) + } +} diff --git a/hypershift-operator/controllers/hostedcluster/aws_oidc_test.go b/hypershift-operator/controllers/hostedcluster/aws_oidc_test.go new file mode 100644 index 000000000000..41f9c2a43653 --- /dev/null +++ b/hypershift-operator/controllers/hostedcluster/aws_oidc_test.go @@ -0,0 +1,136 @@ +package hostedcluster + +import ( + "fmt" + "testing" + + . "github.com/onsi/gomega" + + "github.com/aws/aws-sdk-go-v2/aws" + "github.com/aws/aws-sdk-go-v2/service/s3" + s3types "github.com/aws/aws-sdk-go-v2/service/s3/types" + + hyperv1 "github.com/openshift/hypershift/api/hypershift/v1beta1" + "github.com/openshift/hypershift/support/api" + "github.com/openshift/hypershift/support/awsapi" + + "go.uber.org/mock/gomock" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + + ctrl "sigs.k8s.io/controller-runtime" + crclient "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/client/fake" +) + +func TestCleanupAWSOIDCBucketData(t *testing.T) { + tests := []struct { + name string + hcluster *hyperv1.HostedCluster + setupS3Mock func(*gomock.Controller) awsapi.S3API + bucketName string + expectErr bool + expectErrMsg string + expectFinalizer bool + }{ + { + name: "When cleanup succeeds, it should delete objects and remove finalizer", + hcluster: &hyperv1.HostedCluster{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test", + Namespace: "clusters", + Finalizers: []string{oidcDocumentsFinalizer}, + }, + Spec: hyperv1.HostedClusterSpec{InfraID: "test-infra"}, + }, + setupS3Mock: func(ctrl *gomock.Controller) awsapi.S3API { + m := awsapi.NewMockS3API(ctrl) + m.EXPECT().DeleteObjects(gomock.Any(), gomock.Any()). + Return(&s3.DeleteObjectsOutput{}, nil) + return m + }, + bucketName: "my-bucket", + expectFinalizer: false, + }, + { + name: "When DeleteObjects fails with a non-NoSuchBucket error, it should return error and keep finalizer", + hcluster: &hyperv1.HostedCluster{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test", + Namespace: "clusters", + Finalizers: []string{oidcDocumentsFinalizer}, + }, + Spec: hyperv1.HostedClusterSpec{InfraID: "test-infra"}, + }, + setupS3Mock: func(ctrl *gomock.Controller) awsapi.S3API { + m := awsapi.NewMockS3API(ctrl) + m.EXPECT().DeleteObjects(gomock.Any(), gomock.Any()). + Return(nil, fmt.Errorf("access denied")) + return m + }, + bucketName: "my-bucket", + expectErr: true, + expectErrMsg: "failed to delete OIDC objects", + expectFinalizer: true, + }, + { + name: "When DeleteObjects fails with NoSuchBucket, it should succeed and remove finalizer", + hcluster: &hyperv1.HostedCluster{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test", + Namespace: "clusters", + Finalizers: []string{oidcDocumentsFinalizer}, + }, + Spec: hyperv1.HostedClusterSpec{InfraID: "test-infra"}, + }, + setupS3Mock: func(ctrl *gomock.Controller) awsapi.S3API { + m := awsapi.NewMockS3API(ctrl) + m.EXPECT().DeleteObjects(gomock.Any(), gomock.Any()). + Return(nil, &s3types.NoSuchBucket{Message: aws.String("bucket not found")}) + return m + }, + bucketName: "my-bucket", + expectFinalizer: false, + }, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + g := NewGomegaWithT(t) + + fakeClient := fake.NewClientBuilder(). + WithScheme(api.Scheme). + WithObjects(tc.hcluster). + Build() + + mockCtrl := gomock.NewController(t) + var s3Client awsapi.S3API + if tc.setupS3Mock != nil { + s3Client = tc.setupS3Mock(mockCtrl) + } + + r := &HostedClusterReconciler{ + Client: fakeClient, + S3Client: s3Client, + OIDCStorageProviderS3BucketName: tc.bucketName, + } + + err := r.cleanupOIDCBucketData(t.Context(), ctrl.Log, tc.hcluster) + + if tc.expectErr { + g.Expect(err).To(HaveOccurred()) + g.Expect(err.Error()).To(ContainSubstring(tc.expectErrMsg)) + } else { + g.Expect(err).ToNot(HaveOccurred()) + } + + updatedHC := &hyperv1.HostedCluster{} + g.Expect(fakeClient.Get(t.Context(), crclient.ObjectKeyFromObject(tc.hcluster), updatedHC)).To(Succeed()) + + if tc.expectFinalizer { + g.Expect(updatedHC.Finalizers).To(ContainElement(oidcDocumentsFinalizer)) + } else { + g.Expect(updatedHC.Finalizers).ToNot(ContainElement(oidcDocumentsFinalizer)) + } + }) + } +} diff --git a/hypershift-operator/controllers/hostedcluster/internal/platform/aws/aws_test.go b/hypershift-operator/controllers/hostedcluster/internal/platform/aws/aws_test.go index 1634286ffb3f..0021fd9170c1 100644 --- a/hypershift-operator/controllers/hostedcluster/internal/platform/aws/aws_test.go +++ b/hypershift-operator/controllers/hostedcluster/internal/platform/aws/aws_test.go @@ -3,14 +3,19 @@ package aws import ( "testing" + "github.com/google/go-cmp/cmp" + . "github.com/onsi/gomega" + hyperv1 "github.com/openshift/hypershift/api/hypershift/v1beta1" + "github.com/openshift/hypershift/support/api" "k8s.io/apimachinery/pkg/api/meta" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/utils/ptr" capiaws "sigs.k8s.io/cluster-api-provider-aws/v2/api/v1beta2" - - "github.com/google/go-cmp/cmp" + crclient "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/client/fake" ) func TestReconcileAWSCluster(t *testing.T) { @@ -104,61 +109,61 @@ func TestGetCredentialStatus(t *testing.T) { expectedResult CredentialStatus }{ { - name: "When both conditions are True, return Valid", - ValidOIDCConfigurationConditionStatus: &[]metav1.ConditionStatus{metav1.ConditionTrue}[0], - ValidAWSIdentityProviderConditionStatus: &[]metav1.ConditionStatus{metav1.ConditionTrue}[0], + name: "When both conditions are True, it should return Valid", + ValidOIDCConfigurationConditionStatus: ptr.To(metav1.ConditionTrue), + ValidAWSIdentityProviderConditionStatus: ptr.To(metav1.ConditionTrue), expectedResult: CredentialStatusValid, }, { - name: "When OIDC is False, return Invalid", - ValidOIDCConfigurationConditionStatus: &[]metav1.ConditionStatus{metav1.ConditionFalse}[0], - ValidAWSIdentityProviderConditionStatus: &[]metav1.ConditionStatus{metav1.ConditionTrue}[0], + name: "When OIDC is False, it should return Invalid", + ValidOIDCConfigurationConditionStatus: ptr.To(metav1.ConditionFalse), + ValidAWSIdentityProviderConditionStatus: ptr.To(metav1.ConditionTrue), expectedResult: CredentialStatusInvalid, }, { - name: "When AWS Identity Provider is False, return Invalid", - ValidOIDCConfigurationConditionStatus: &[]metav1.ConditionStatus{metav1.ConditionTrue}[0], - ValidAWSIdentityProviderConditionStatus: &[]metav1.ConditionStatus{metav1.ConditionFalse}[0], + name: "When AWS Identity Provider is False, it should return Invalid", + ValidOIDCConfigurationConditionStatus: ptr.To(metav1.ConditionTrue), + ValidAWSIdentityProviderConditionStatus: ptr.To(metav1.ConditionFalse), expectedResult: CredentialStatusInvalid, }, { - name: "When both are False, return Invalid", - ValidOIDCConfigurationConditionStatus: &[]metav1.ConditionStatus{metav1.ConditionFalse}[0], - ValidAWSIdentityProviderConditionStatus: &[]metav1.ConditionStatus{metav1.ConditionFalse}[0], + name: "When both are False, it should return Invalid", + ValidOIDCConfigurationConditionStatus: ptr.To(metav1.ConditionFalse), + ValidAWSIdentityProviderConditionStatus: ptr.To(metav1.ConditionFalse), expectedResult: CredentialStatusInvalid, }, { - name: "When OIDC is Unknown, return Unknown", - ValidOIDCConfigurationConditionStatus: &[]metav1.ConditionStatus{metav1.ConditionUnknown}[0], - ValidAWSIdentityProviderConditionStatus: &[]metav1.ConditionStatus{metav1.ConditionTrue}[0], + name: "When OIDC is Unknown, it should return Unknown", + ValidOIDCConfigurationConditionStatus: ptr.To(metav1.ConditionUnknown), + ValidAWSIdentityProviderConditionStatus: ptr.To(metav1.ConditionTrue), expectedResult: CredentialStatusUnknown, }, { - name: "When AWS Identity Provider is Unknown, return Unknown", - ValidOIDCConfigurationConditionStatus: &[]metav1.ConditionStatus{metav1.ConditionTrue}[0], - ValidAWSIdentityProviderConditionStatus: &[]metav1.ConditionStatus{metav1.ConditionUnknown}[0], + name: "When AWS Identity Provider is Unknown, it should return Unknown", + ValidOIDCConfigurationConditionStatus: ptr.To(metav1.ConditionTrue), + ValidAWSIdentityProviderConditionStatus: ptr.To(metav1.ConditionUnknown), expectedResult: CredentialStatusUnknown, }, { - name: "When both are Unknown, return Unknown", - ValidOIDCConfigurationConditionStatus: &[]metav1.ConditionStatus{metav1.ConditionUnknown}[0], - ValidAWSIdentityProviderConditionStatus: &[]metav1.ConditionStatus{metav1.ConditionUnknown}[0], + name: "When both are Unknown, it should return Unknown", + ValidOIDCConfigurationConditionStatus: ptr.To(metav1.ConditionUnknown), + ValidAWSIdentityProviderConditionStatus: ptr.To(metav1.ConditionUnknown), expectedResult: CredentialStatusUnknown, }, { - name: "When OIDC condition is missing, return Unknown", + name: "When OIDC condition is missing, it should return Unknown", ValidOIDCConfigurationConditionStatus: nil, - ValidAWSIdentityProviderConditionStatus: &[]metav1.ConditionStatus{metav1.ConditionTrue}[0], + ValidAWSIdentityProviderConditionStatus: ptr.To(metav1.ConditionTrue), expectedResult: CredentialStatusUnknown, }, { - name: "When AWS Identity Provider condition is missing, return Unknown", - ValidOIDCConfigurationConditionStatus: &[]metav1.ConditionStatus{metav1.ConditionTrue}[0], + name: "When AWS Identity Provider condition is missing, it should return Unknown", + ValidOIDCConfigurationConditionStatus: ptr.To(metav1.ConditionTrue), ValidAWSIdentityProviderConditionStatus: nil, expectedResult: CredentialStatusUnknown, }, { - name: "When both conditions are missing, return Unknown", + name: "When both conditions are missing, it should return Unknown", ValidOIDCConfigurationConditionStatus: nil, ValidAWSIdentityProviderConditionStatus: nil, expectedResult: CredentialStatusUnknown, @@ -249,3 +254,132 @@ region = us-east-1 }) } } + +func hostedClusterWithCredentialConditions(oidcStatus, awsIdpStatus metav1.ConditionStatus) *hyperv1.HostedCluster { + hc := &hyperv1.HostedCluster{ + ObjectMeta: metav1.ObjectMeta{Name: "test", Namespace: "clusters"}, + } + meta.SetStatusCondition(&hc.Status.Conditions, metav1.Condition{ + Type: string(hyperv1.ValidOIDCConfiguration), + Status: oidcStatus, + Reason: "test", + }) + meta.SetStatusCondition(&hc.Status.Conditions, metav1.Condition{ + Type: string(hyperv1.ValidAWSIdentityProvider), + Status: awsIdpStatus, + Reason: "test", + }) + return hc +} + +func TestDeleteOrphanedMachines(t *testing.T) { + namespace := "clusters-test" + deletionTime := metav1.Now() + + tests := []struct { + name string + hc *hyperv1.HostedCluster + machines []capiaws.AWSMachine + expectErr bool + expectFinalizersCleared bool + }{ + { + name: "When credentials are valid, it should skip cleanup", + hc: hostedClusterWithCredentialConditions(metav1.ConditionTrue, metav1.ConditionTrue), + machines: []capiaws.AWSMachine{ + { + ObjectMeta: metav1.ObjectMeta{ + Name: "machine-1", + Namespace: namespace, + DeletionTimestamp: &deletionTime, + Finalizers: []string{"test-finalizer"}, + }, + }, + { + ObjectMeta: metav1.ObjectMeta{ + Name: "machine-2", + Namespace: namespace, + Finalizers: []string{"test-finalizer"}, + }, + }, + }, + expectFinalizersCleared: false, + }, + { + name: "When credentials are invalid and machines have deletion timestamps, it should clear finalizers", + hc: hostedClusterWithCredentialConditions(metav1.ConditionFalse, metav1.ConditionTrue), + machines: []capiaws.AWSMachine{ + { + ObjectMeta: metav1.ObjectMeta{ + Name: "machine-1", + Namespace: namespace, + DeletionTimestamp: &deletionTime, + Finalizers: []string{"test-finalizer"}, + }, + }, + }, + expectFinalizersCleared: true, + }, + { + name: "When credentials are invalid and machines have no deletion timestamps, it should not modify them", + hc: hostedClusterWithCredentialConditions(metav1.ConditionFalse, metav1.ConditionTrue), + machines: []capiaws.AWSMachine{ + { + ObjectMeta: metav1.ObjectMeta{ + Name: "machine-1", + Namespace: namespace, + Finalizers: []string{"test-finalizer"}, + }, + }, + }, + expectFinalizersCleared: false, + }, + { + name: "When no AWSMachines exist, it should return nil", + hc: hostedClusterWithCredentialConditions(metav1.ConditionFalse, metav1.ConditionTrue), + machines: []capiaws.AWSMachine{}, + expectFinalizersCleared: false, + }, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + g := NewGomegaWithT(t) + + objects := []crclient.Object{} + for i := range tc.machines { + objects = append(objects, &tc.machines[i]) + } + + fakeClient := fake.NewClientBuilder(). + WithScheme(api.Scheme). + WithObjects(objects...). + Build() + + a := AWS{} + err := a.DeleteOrphanedMachines(t.Context(), fakeClient, tc.hc, namespace) + + if tc.expectErr { + g.Expect(err).To(HaveOccurred()) + } else { + g.Expect(err).ToNot(HaveOccurred()) + } + + if tc.expectFinalizersCleared { + var machineList capiaws.AWSMachineList + g.Expect(fakeClient.List(t.Context(), &machineList, crclient.InNamespace(namespace))).To(Succeed()) + for _, m := range machineList.Items { + g.Expect(m.Finalizers).To(BeEmpty(), "expected finalizers to be cleared on machine %s", m.Name) + } + } + + if !tc.expectFinalizersCleared && len(tc.machines) > 0 { + var machineList capiaws.AWSMachineList + g.Expect(fakeClient.List(t.Context(), &machineList, crclient.InNamespace(namespace))).To(Succeed()) + for _, m := range machineList.Items { + g.Expect(m.Finalizers).To(ContainElement("test-finalizer"), "expected finalizers to be preserved on machine %s", m.Name) + } + } + }) + } +} From e63956274d812372845ee3161af0b38129774bb2 Mon Sep 17 00:00:00 2001 From: Martin Gencur Date: Fri, 26 Jun 2026 09:41:26 +0200 Subject: [PATCH 3/4] fix(aws): distinguish grace period expiry from invalid credentials in endpoint service log Differentiate between "no valid credentials" and "grace period expired" when logging CPO finalizer removal for AWSEndpointService resources. Co-Authored-By: Claude Opus 4.6 --- .../controllers/hostedcluster/hostedcluster_controller.go | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/hypershift-operator/controllers/hostedcluster/hostedcluster_controller.go b/hypershift-operator/controllers/hostedcluster/hostedcluster_controller.go index b3a6c3828d75..0a21de964999 100644 --- a/hypershift-operator/controllers/hostedcluster/hostedcluster_controller.go +++ b/hypershift-operator/controllers/hostedcluster/hostedcluster_controller.go @@ -3579,7 +3579,12 @@ func deleteAWSEndpointServices(ctx context.Context, c client.Client, hc *hyperv1 return false, fmt.Errorf("failed to remove finalizer from awsendpointservice: %w", err) } } - log.Info("Removed CPO finalizer for awsendpointservice because the HC has no valid aws credentials", "name", ep.Name, "endpoint-id", ep.Status.EndpointID) + reason := "the HC has no valid aws credentials" + if platformaws.GetCredentialStatus(hc) == platformaws.CredentialStatusValid { + reason = "deletion grace period expired" + } + + log.Info("Removed CPO finalizer for awsendpointservice because "+reason, "name", ep.Name, "endpoint-id", ep.Status.EndpointID) continue } From 5293291b91e768e25538d5534dba2d5f7517fb11 Mon Sep 17 00:00:00 2001 From: Martin Gencur Date: Fri, 26 Jun 2026 09:52:46 +0200 Subject: [PATCH 4/4] fix(aws): check S3 DeleteObjects partial failures during OIDC cleanup The S3 DeleteObjects API can return (output, nil) with per-object errors in output.Errors, meaning some objects failed to delete while the call itself succeeded. Previously the output was discarded, so partial failures silently removed the finalizer, leaving orphaned S3 objects with no retry path. Capture the output and return an error on partial failure, keeping the finalizer in place so the controller retries on the next reconcile. Co-Authored-By: Claude Opus 4.6 --- .../hostedcluster/aws_oidc_test.go | 40 ++++++++++++++++--- .../hostedcluster/hostedcluster_controller.go | 9 ++++- .../internal/platform/aws/aws_test.go | 3 +- 3 files changed, 44 insertions(+), 8 deletions(-) diff --git a/hypershift-operator/controllers/hostedcluster/aws_oidc_test.go b/hypershift-operator/controllers/hostedcluster/aws_oidc_test.go index 41f9c2a43653..aa6f8a55870d 100644 --- a/hypershift-operator/controllers/hostedcluster/aws_oidc_test.go +++ b/hypershift-operator/controllers/hostedcluster/aws_oidc_test.go @@ -6,20 +6,21 @@ import ( . "github.com/onsi/gomega" - "github.com/aws/aws-sdk-go-v2/aws" - "github.com/aws/aws-sdk-go-v2/service/s3" - s3types "github.com/aws/aws-sdk-go-v2/service/s3/types" - hyperv1 "github.com/openshift/hypershift/api/hypershift/v1beta1" "github.com/openshift/hypershift/support/api" "github.com/openshift/hypershift/support/awsapi" - "go.uber.org/mock/gomock" + "github.com/aws/aws-sdk-go-v2/aws" + "github.com/aws/aws-sdk-go-v2/service/s3" + s3types "github.com/aws/aws-sdk-go-v2/service/s3/types" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" ctrl "sigs.k8s.io/controller-runtime" crclient "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/client/fake" + + "go.uber.org/mock/gomock" ) func TestCleanupAWSOIDCBucketData(t *testing.T) { @@ -91,6 +92,35 @@ func TestCleanupAWSOIDCBucketData(t *testing.T) { bucketName: "my-bucket", expectFinalizer: false, }, + { + name: "When DeleteObjects returns partial failure, it should return error and keep finalizer", + hcluster: &hyperv1.HostedCluster{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test", + Namespace: "clusters", + Finalizers: []string{oidcDocumentsFinalizer}, + }, + Spec: hyperv1.HostedClusterSpec{InfraID: "test-infra"}, + }, + setupS3Mock: func(ctrl *gomock.Controller) awsapi.S3API { + m := awsapi.NewMockS3API(ctrl) + m.EXPECT().DeleteObjects(gomock.Any(), gomock.Any()). + Return(&s3.DeleteObjectsOutput{ + Errors: []s3types.Error{ + { + Key: aws.String("test-infra/.well-known/openid-configuration"), + Code: aws.String("AccessDenied"), + Message: aws.String("Access Denied"), + }, + }, + }, nil) + return m + }, + bucketName: "my-bucket", + expectErr: true, + expectErrMsg: "partial failure deleting OIDC objects", + expectFinalizer: true, + }, } for _, tc := range tests { diff --git a/hypershift-operator/controllers/hostedcluster/hostedcluster_controller.go b/hypershift-operator/controllers/hostedcluster/hostedcluster_controller.go index 0a21de964999..17d71b6b6614 100644 --- a/hypershift-operator/controllers/hostedcluster/hostedcluster_controller.go +++ b/hypershift-operator/controllers/hostedcluster/hostedcluster_controller.go @@ -4852,14 +4852,19 @@ func (r *HostedClusterReconciler) cleanupOIDCBucketData(ctx context.Context, log }) } - if _, err := r.S3Client.DeleteObjects(ctx, &s3.DeleteObjectsInput{ + output, err := r.S3Client.DeleteObjects(ctx, &s3.DeleteObjectsInput{ Bucket: aws.String(r.OIDCStorageProviderS3BucketName), Delete: &s3types.Delete{Objects: objectsToDelete}, - }); err != nil { + }) + if err != nil { var nsbErr *s3types.NoSuchBucket if !errors.As(err, &nsbErr) { return fmt.Errorf("failed to delete OIDC objects from %s S3 bucket: %w", r.OIDCStorageProviderS3BucketName, err) } + } else if output != nil && len(output.Errors) > 0 { + return fmt.Errorf("partial failure deleting OIDC objects from %s S3 bucket: %d of %d objects failed, first error: %s", + r.OIDCStorageProviderS3BucketName, len(output.Errors), len(objectsToDelete), + aws.ToString(output.Errors[0].Message)) } controllerutil.RemoveFinalizer(hcluster, oidcDocumentsFinalizer) diff --git a/hypershift-operator/controllers/hostedcluster/internal/platform/aws/aws_test.go b/hypershift-operator/controllers/hostedcluster/internal/platform/aws/aws_test.go index 0021fd9170c1..b7758387d63e 100644 --- a/hypershift-operator/controllers/hostedcluster/internal/platform/aws/aws_test.go +++ b/hypershift-operator/controllers/hostedcluster/internal/platform/aws/aws_test.go @@ -3,7 +3,6 @@ package aws import ( "testing" - "github.com/google/go-cmp/cmp" . "github.com/onsi/gomega" hyperv1 "github.com/openshift/hypershift/api/hypershift/v1beta1" @@ -16,6 +15,8 @@ import ( capiaws "sigs.k8s.io/cluster-api-provider-aws/v2/api/v1beta2" crclient "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/client/fake" + + "github.com/google/go-cmp/cmp" ) func TestReconcileAWSCluster(t *testing.T) {