Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions api/hypershift/v1beta1/nodepool_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -264,8 +264,10 @@ type NodePoolSpec struct {

const (
// OSImageStreamRHEL9 is the OS image stream name for RHEL 9.
// This value must match support/releaseinfo.StreamRHEL9.
OSImageStreamRHEL9 = "rhel-9"
// OSImageStreamRHEL10 is the OS image stream name for RHEL 10.
// This value must match support/releaseinfo.StreamRHEL10.
OSImageStreamRHEL10 = "rhel-10"
)

Expand Down
31 changes: 22 additions & 9 deletions hypershift-operator/controllers/nodepool/aws.go
Original file line number Diff line number Diff line change
Expand Up @@ -60,8 +60,8 @@ func isSpotEnabled(nodePool *hyperv1.NodePool) bool {
return false
}

func awsMachineTemplateSpec(infraName string, hostedCluster *hyperv1.HostedCluster, nodePool *hyperv1.NodePool, defaultSG bool, releaseImage *releaseinfo.ReleaseImage) (*capiaws.AWSMachineTemplateSpec, error) {
ami, err := resolveAWSAMI(hostedCluster, nodePool, releaseImage)
func awsMachineTemplateSpec(infraName string, hostedCluster *hyperv1.HostedCluster, nodePool *hyperv1.NodePool, defaultSG bool, releaseImage *releaseinfo.ReleaseImage, rhelStream string) (*capiaws.AWSMachineTemplateSpec, error) {
ami, err := resolveAWSAMI(hostedCluster, nodePool, releaseImage, rhelStream)
if err != nil {
return nil, err
}
Expand Down Expand Up @@ -118,7 +118,7 @@ func awsMachineTemplateSpec(infraName string, hostedCluster *hyperv1.HostedClust
return awsMachineTemplateSpec, nil
}

func resolveAWSAMI(hostedCluster *hyperv1.HostedCluster, nodePool *hyperv1.NodePool, releaseImage *releaseinfo.ReleaseImage) (string, error) {
func resolveAWSAMI(hostedCluster *hyperv1.HostedCluster, nodePool *hyperv1.NodePool, releaseImage *releaseinfo.ReleaseImage, rhelStream string) (string, error) {
// TODO: Should the region be included in the NodePool platform information?
region := hostedCluster.Spec.Platform.AWS.Region
arch := nodePool.Spec.Arch
Expand All @@ -134,8 +134,7 @@ func resolveAWSAMI(hostedCluster *hyperv1.HostedCluster, nodePool *hyperv1.NodeP
return ami, nil
}
// Default behavior for Linux/RHCOS AMIs
// TODO(CNTRLPLANE-3553): resolve streamName via GetRHELStream once osImageStream API field is available
ami, err := defaultNodePoolAMI(region, arch, "", releaseImage)
ami, err := defaultNodePoolAMI(region, arch, rhelStream, releaseImage)
if err != nil {
return "", fmt.Errorf("couldn't discover an AMI for release image: %w", err)
}
Expand Down Expand Up @@ -271,7 +270,7 @@ func awsAdditionalTags(nodePool *hyperv1.NodePool, hostedCluster *hyperv1.Hosted
}

func (c *CAPI) awsMachineTemplate(ctx context.Context, templateNameGenerator func(spec any) (string, error)) (*capiaws.AWSMachineTemplate, error) {
desiredSpec, err := awsMachineTemplateSpec(c.capiClusterName, c.hostedCluster, c.nodePool, c.cpoCapabilities.CreateDefaultAWSSecurityGroup, c.releaseImage)
desiredSpec, err := awsMachineTemplateSpec(c.capiClusterName, c.hostedCluster, c.nodePool, c.cpoCapabilities.CreateDefaultAWSSecurityGroup, c.releaseImage, c.resolvedRHELStreamForBootImage)
if err != nil {
return nil, fmt.Errorf("failed to generate AWSMachineTemplateSpec: %w", err)
}
Expand Down Expand Up @@ -361,9 +360,23 @@ func (r *NodePoolReconciler) setAWSConditions(_ context.Context, nodePool *hyper
ObservedGeneration: nodePool.Generation,
})
} else {
// Default behavior for Linux/RHCOS AMIs
// TODO(CNTRLPLANE-3553): resolve streamName via GetRHELStream once osImageStream API field is available
ami, err := defaultNodePoolAMI(hcluster.Spec.Platform.AWS.Region, nodePool.Spec.Arch, "", releaseImage)
// Default behavior for Linux/RHCOS AMIs.
// Use getRHELStreamForBootImage so that the AMI lookup is consistent
// with the CAPI path: version-derived default (rhel-9 for OCP < 5.0,
// rhel-10 for OCP >= 5.0) for unset osImageStream, or a validated
// stream name for explicit osImageStream.
rhelStream, err := getRHELStreamForBootImage(nodePool, releaseImage)

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why is this aws specific? how about the other platforms?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Last week I opened #8834 for other platforms. May we not block this one?

if err != nil {
SetStatusCondition(&nodePool.Status.Conditions, hyperv1.NodePoolCondition{
Type: hyperv1.NodePoolValidPlatformImageType,
Status: corev1.ConditionFalse,
Reason: hyperv1.NodePoolValidationFailedReason,
Message: fmt.Sprintf("Couldn't resolve RHEL stream for release image %q: %s", nodePool.Spec.Release.Image, err.Error()),
ObservedGeneration: nodePool.Generation,
})
return fmt.Errorf("couldn't resolve RHEL stream for release image: %w", err)
}
ami, err := defaultNodePoolAMI(hcluster.Spec.Platform.AWS.Region, nodePool.Spec.Arch, rhelStream, releaseImage)
if err != nil {
SetStatusCondition(&nodePool.Status.Conditions, hyperv1.NodePoolCondition{
Type: hyperv1.NodePoolValidPlatformImageType,
Expand Down
196 changes: 182 additions & 14 deletions hypershift-operator/controllers/nodepool/aws_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -298,6 +298,7 @@ func TestAWSMachineTemplateSpec(t *testing.T) {
},
true,
releaseImage,
"",
)
if tc.checkError != nil {
tc.checkError(t, err)
Expand Down Expand Up @@ -1090,19 +1091,7 @@ func TestSetAWSConditions(t *testing.T) {
ImageStream: &v1.ImageStream{
ObjectMeta: metav1.ObjectMeta{Name: "4.17.0"},
},
StreamMetadata: &stream.Stream{
Architectures: map[string]stream.Arch{
"x86_64": {
Images: stream.Images{
Aws: &stream.AwsImage{
Regions: map[string]stream.SingleImage{
"us-east-1": {Release: "4.17.0", Image: "ami-linux-us-east-1"},
},
},
},
},
},
},
StreamMetadata: testAWSStreamWithRelease("x86_64", "us-east-1", "ami-linux-us-east-1", "4.17.0"),
}

testCases := []struct {
Expand Down Expand Up @@ -1175,6 +1164,28 @@ func TestSetAWSConditions(t *testing.T) {
expectedCondType: string(hyperv1.NodePoolValidPlatformImageType),
expectedCondValue: corev1.ConditionFalse,
},
{
name: "When osImageStream is invalid for the release version it should set ValidPlatformImage to false",
nodePool: &hyperv1.NodePool{
Spec: hyperv1.NodePoolSpec{
Arch: hyperv1.ArchitectureAMD64,
Platform: hyperv1.NodePoolPlatform{Type: hyperv1.AWSPlatform, AWS: &hyperv1.AWSNodePoolPlatform{}},
OSImageStream: hyperv1.OSImageStreamReference{Name: "rhel-10"},
},
},
hostedCluster: &hyperv1.HostedCluster{
Spec: hyperv1.HostedClusterSpec{
Platform: hyperv1.PlatformSpec{AWS: &hyperv1.AWSPlatformSpec{Region: "us-east-1"}},
},
Status: hyperv1.HostedClusterStatus{
Platform: &hyperv1.PlatformStatus{AWS: &hyperv1.AWSPlatformStatus{DefaultWorkerSecurityGroupID: "sg-123"}},
},
},
releaseImage: releaseImageWithStreams, // 4.17.0 — rhel-10 is not valid
expectError: true,
expectedCondType: string(hyperv1.NodePoolValidPlatformImageType),
expectedCondValue: corev1.ConditionFalse,
},
{
name: "When HostedCluster has no AWS platform it should return error",
nodePool: &hyperv1.NodePool{
Expand Down Expand Up @@ -1250,6 +1261,7 @@ func TestResolveAWSAMI(t *testing.T) {
hostedCluster *hyperv1.HostedCluster
nodePool *hyperv1.NodePool
releaseImage *releaseinfo.ReleaseImage
rhelStream string
expectedAMI string
expectError bool
}{
Expand Down Expand Up @@ -1335,12 +1347,100 @@ func TestResolveAWSAMI(t *testing.T) {
},
expectError: true,
},
{
name: "When rhelStream is rhel-9 with single-stream payload (OSStreams nil), it should fall back to StreamMetadata",

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nice test cases!

hostedCluster: &hyperv1.HostedCluster{
Spec: hyperv1.HostedClusterSpec{
Platform: hyperv1.PlatformSpec{AWS: &hyperv1.AWSPlatformSpec{Region: "us-east-1"}},
},
},
nodePool: &hyperv1.NodePool{
Spec: hyperv1.NodePoolSpec{
Arch: hyperv1.ArchitectureAMD64,
Platform: hyperv1.NodePoolPlatform{AWS: &hyperv1.AWSNodePoolPlatform{}},
},
},
rhelStream: "rhel-9",
releaseImage: &releaseinfo.ReleaseImage{
ImageStream: &v1.ImageStream{ObjectMeta: metav1.ObjectMeta{Name: "4.17.0"}},
StreamMetadata: testAWSStreamWithRelease("x86_64", "us-east-1", "ami-fallback-stream-metadata", "4.17.0"),
OSStreams: nil,
},
expectedAMI: "ami-fallback-stream-metadata",
},
{
name: "When rhelStream is rhel-9 with multi-stream payload, it should use OSStreams rhel-9",
hostedCluster: &hyperv1.HostedCluster{
Spec: hyperv1.HostedClusterSpec{
Platform: hyperv1.PlatformSpec{AWS: &hyperv1.AWSPlatformSpec{Region: "us-east-1"}},
},
},
nodePool: &hyperv1.NodePool{
Spec: hyperv1.NodePoolSpec{
Arch: hyperv1.ArchitectureAMD64,
Platform: hyperv1.NodePoolPlatform{AWS: &hyperv1.AWSNodePoolPlatform{}},
},
},
rhelStream: "rhel-9",
releaseImage: &releaseinfo.ReleaseImage{
ImageStream: &v1.ImageStream{ObjectMeta: metav1.ObjectMeta{Name: "5.0.0"}},
StreamMetadata: testAWSStreamWithRelease("x86_64", "us-east-1", "ami-default-stream", "5.0.0"),
OSStreams: map[string]*stream.Stream{
"rhel-9": testAWSStreamWithRelease("x86_64", "us-east-1", "ami-rhel9-osstreams", "5.0.0"),
},
},
expectedAMI: "ami-rhel9-osstreams",
},
{
name: "When rhelStream is rhel-10 with multi-stream payload, it should use OSStreams rhel-10",
hostedCluster: &hyperv1.HostedCluster{
Spec: hyperv1.HostedClusterSpec{
Platform: hyperv1.PlatformSpec{AWS: &hyperv1.AWSPlatformSpec{Region: "us-east-1"}},
},
},
nodePool: &hyperv1.NodePool{
Spec: hyperv1.NodePoolSpec{
Arch: hyperv1.ArchitectureAMD64,
Platform: hyperv1.NodePoolPlatform{AWS: &hyperv1.AWSNodePoolPlatform{}},
},
},
rhelStream: "rhel-10",
releaseImage: &releaseinfo.ReleaseImage{
ImageStream: &v1.ImageStream{ObjectMeta: metav1.ObjectMeta{Name: "5.0.0"}},
StreamMetadata: testAWSStreamWithRelease("x86_64", "us-east-1", "ami-default-stream", "5.0.0"),
OSStreams: map[string]*stream.Stream{
"rhel-10": testAWSStreamWithRelease("x86_64", "us-east-1", "ami-rhel10-osstreams", "5.0.0"),
},
},
expectedAMI: "ami-rhel10-osstreams",
},
{
name: "When rhelStream is rhel-10 with single-stream payload (OSStreams nil), it should fall back to StreamMetadata",
hostedCluster: &hyperv1.HostedCluster{
Spec: hyperv1.HostedClusterSpec{
Platform: hyperv1.PlatformSpec{AWS: &hyperv1.AWSPlatformSpec{Region: "us-east-1"}},
},
},
nodePool: &hyperv1.NodePool{
Spec: hyperv1.NodePoolSpec{
Arch: hyperv1.ArchitectureAMD64,
Platform: hyperv1.NodePoolPlatform{AWS: &hyperv1.AWSNodePoolPlatform{}},
},
},
rhelStream: "rhel-10",
releaseImage: &releaseinfo.ReleaseImage{
ImageStream: &v1.ImageStream{ObjectMeta: metav1.ObjectMeta{Name: "4.18.0"}},
StreamMetadata: testAWSStreamWithRelease("x86_64", "us-east-1", "ami-legacy-fallback", "4.18.0"),
OSStreams: nil,
},
expectedAMI: "ami-legacy-fallback",
},
}

for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
g := NewWithT(t)
ami, err := resolveAWSAMI(tc.hostedCluster, tc.nodePool, tc.releaseImage)
ami, err := resolveAWSAMI(tc.hostedCluster, tc.nodePool, tc.releaseImage, tc.rhelStream)
if tc.expectError {
g.Expect(err).To(HaveOccurred())
} else {
Expand Down Expand Up @@ -1864,3 +1964,71 @@ func TestApplyAWSPlacementOptions(t *testing.T) {
})
}
}

// TestAWSMachineTemplateSpec_StreamSelection verifies that on a multi-stream
// OCP 5.0+ payload, passing different rhelStream values to awsMachineTemplateSpec
// selects different AMIs and produces different machine template name hashes,
// confirming that a stream switch triggers a CAPI node rollout.
func TestAWSMachineTemplateSpec_StreamSelection(t *testing.T) {
g := NewWithT(t)

const (
legacyAMI = "ami-legacy-rhel9"
rhel10AMI = "ami-rhel10-new"
region = "us-east-1"
)

releaseImage := &releaseinfo.ReleaseImage{
ImageStream: &v1.ImageStream{ObjectMeta: metav1.ObjectMeta{Name: "5.0.0"}},
StreamMetadata: testAWSStreamWithRelease("x86_64", region, legacyAMI, "5.0.0"),
OSStreams: map[string]*stream.Stream{
"rhel-9": testAWSStreamWithRelease("x86_64", region, legacyAMI, "5.0.0"),
"rhel-10": testAWSStreamWithRelease("x86_64", region, rhel10AMI, "5.0.0"),
},
}

hostedCluster := &hyperv1.HostedCluster{
Spec: hyperv1.HostedClusterSpec{
Platform: hyperv1.PlatformSpec{
AWS: &hyperv1.AWSPlatformSpec{Region: region},
},
},
Status: hyperv1.HostedClusterStatus{
Platform: &hyperv1.PlatformStatus{
AWS: &hyperv1.AWSPlatformStatus{DefaultWorkerSecurityGroupID: "sg-default"},
},
},
}

nodePool := &hyperv1.NodePool{
ObjectMeta: metav1.ObjectMeta{Name: "test-nodepool"},
Spec: hyperv1.NodePoolSpec{
Arch: hyperv1.ArchitectureAMD64,
Platform: hyperv1.NodePoolPlatform{
Type: hyperv1.AWSPlatform,
AWS: &hyperv1.AWSNodePoolPlatform{},
},
},
}

// Legacy path (empty stream) should select the RHEL-9 AMI from StreamMetadata.
legacySpec, err := awsMachineTemplateSpec(infraName, hostedCluster, nodePool, true, releaseImage, "")
g.Expect(err).ToNot(HaveOccurred())
g.Expect(*legacySpec.Template.Spec.AMI.ID).To(Equal(legacyAMI))

// Concrete "rhel-10" stream should select the RHEL-10 AMI from OSStreams.
rhel10Spec, err := awsMachineTemplateSpec(infraName, hostedCluster, nodePool, true, releaseImage, "rhel-10")
g.Expect(err).ToNot(HaveOccurred())
g.Expect(*rhel10Spec.Template.Spec.AMI.ID).To(Equal(rhel10AMI))

// Different AMIs should produce different machine template name hashes,
// causing CAPI to create a new infrastructure ref and trigger node replacement.
legacyJSON, err := json.Marshal(legacySpec)
g.Expect(err).ToNot(HaveOccurred())
rhel10JSON, err := json.Marshal(rhel10Spec)
g.Expect(err).ToNot(HaveOccurred())

g.Expect(generateMachineTemplateName(nodePool, legacyJSON)).
ToNot(Equal(generateMachineTemplateName(nodePool, rhel10JSON)),
"different streams should produce different machine template names")
}
2 changes: 2 additions & 0 deletions hypershift-operator/controllers/nodepool/azure.go
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,8 @@ func defaultAzureNodePoolImage(nodePool *hyperv1.NodePool, releaseImage *release

// getAzureMarketplaceMetadata extracts Azure Marketplace metadata from the release payload
func getAzureMarketplaceMetadata(releaseImage *releaseinfo.ReleaseImage, arch string) (*azureMarketplaceMetadata, error) {
// TODO(CNTRLPLANE-3553): use releaseImage.StreamForName(rhelStream) instead of
// accessing StreamMetadata directly, to support dual-stream payloads.
if releaseImage.StreamMetadata == nil {
return nil, nil // No stream metadata available
}
Expand Down
15 changes: 15 additions & 0 deletions hypershift-operator/controllers/nodepool/conditions.go
Original file line number Diff line number Diff line change
Expand Up @@ -368,6 +368,20 @@ func (r *NodePoolReconciler) validMachineConfigCondition(ctx context.Context, no
return &ctrl.Result{}, nil
}

// Validate osImageStream before expensive config generation to fail fast.
// TODO(CNTRLPLANE-3553): add integration test covering this condition path
// (invalid osImageStream.Name → ValidMachineConfig condition False + error return).
if err := validateOSImageStream(nodePool, releaseImage); err != nil {
Comment thread
sdminonne marked this conversation as resolved.
SetStatusCondition(&nodePool.Status.Conditions, hyperv1.NodePoolCondition{
Type: hyperv1.NodePoolValidMachineConfigConditionType,
Status: corev1.ConditionFalse,
Reason: hyperv1.NodePoolValidationFailedReason,
Message: err.Error(),
ObservedGeneration: nodePool.Generation,
})
return &ctrl.Result{}, fmt.Errorf("failed to validate osImageStream: %w", err)
}

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This runs after `NewConfigGenerator` succeeds — meaning we do expensive MCO config generation (listing core configs, parsing MachineConfigs) before checking a simple string allowlist. Moving `validateOSImageStream` before the config generation would fail fast for invalid stream names.

Not blocking, but worth reordering.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done. validateOSImageStream is now called before NewConfigGenerator in conditions.go, so invalid stream names fail fast before expensive MCO config generation — see 39be8a3.

haproxyRawConfig, err := r.generateHAProxyRawConfig(ctx, nodePool, hcluster, releaseImage)
if err != nil {
return &ctrl.Result{}, fmt.Errorf("failed to generate HAProxy raw config: %w", err)
Expand All @@ -385,6 +399,7 @@ func (r *NodePoolReconciler) validMachineConfigCondition(ctx context.Context, no
})
return &ctrl.Result{}, fmt.Errorf("failed to generate config: %w", err)
}

SetStatusCondition(&nodePool.Status.Conditions, hyperv1.NodePoolCondition{
Type: hyperv1.NodePoolValidMachineConfigConditionType,
Status: corev1.ConditionTrue,
Expand Down
Loading