Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 21 additions & 10 deletions hypershift-operator/controllers/nodepool/aws.go
Original file line number Diff line number Diff line change
Expand Up @@ -60,8 +60,8 @@ func isSpotEnabled(nodePool *hyperv1.NodePool) bool {
return false
}

func awsMachineTemplateSpec(infraName string, hostedCluster *hyperv1.HostedCluster, nodePool *hyperv1.NodePool, defaultSG bool, releaseImage *releaseinfo.ReleaseImage) (*capiaws.AWSMachineTemplateSpec, error) {
ami, err := resolveAWSAMI(hostedCluster, nodePool, releaseImage)
func awsMachineTemplateSpec(infraName string, hostedCluster *hyperv1.HostedCluster, nodePool *hyperv1.NodePool, defaultSG bool, releaseImage *releaseinfo.ReleaseImage, rhelStream string) (*capiaws.AWSMachineTemplateSpec, error) {
ami, err := resolveAWSAMI(hostedCluster, nodePool, releaseImage, rhelStream)
if err != nil {
return nil, err
}
Expand Down Expand Up @@ -118,7 +118,7 @@ func awsMachineTemplateSpec(infraName string, hostedCluster *hyperv1.HostedClust
return awsMachineTemplateSpec, nil
}

func resolveAWSAMI(hostedCluster *hyperv1.HostedCluster, nodePool *hyperv1.NodePool, releaseImage *releaseinfo.ReleaseImage) (string, error) {
func resolveAWSAMI(hostedCluster *hyperv1.HostedCluster, nodePool *hyperv1.NodePool, releaseImage *releaseinfo.ReleaseImage, rhelStream string) (string, error) {
// TODO: Should the region be included in the NodePool platform information?
region := hostedCluster.Spec.Platform.AWS.Region
arch := nodePool.Spec.Arch
Expand All @@ -134,8 +134,7 @@ func resolveAWSAMI(hostedCluster *hyperv1.HostedCluster, nodePool *hyperv1.NodeP
return ami, nil
}
// Default behavior for Linux/RHCOS AMIs
// TODO(CNTRLPLANE-3553): resolve streamName via GetRHELStream once osImageStream API field is available
ami, err := defaultNodePoolAMI(region, arch, "", releaseImage)
ami, err := defaultNodePoolAMI(region, arch, rhelStream, releaseImage)
if err != nil {
return "", fmt.Errorf("couldn't discover an AMI for release image: %w", err)
}
Expand Down Expand Up @@ -271,7 +270,7 @@ func awsAdditionalTags(nodePool *hyperv1.NodePool, hostedCluster *hyperv1.Hosted
}

func (c *CAPI) awsMachineTemplate(ctx context.Context, templateNameGenerator func(spec any) (string, error)) (*capiaws.AWSMachineTemplate, error) {
desiredSpec, err := awsMachineTemplateSpec(c.capiClusterName, c.hostedCluster, c.nodePool, c.cpoCapabilities.CreateDefaultAWSSecurityGroup, c.releaseImage)
desiredSpec, err := awsMachineTemplateSpec(c.capiClusterName, c.hostedCluster, c.nodePool, c.cpoCapabilities.CreateDefaultAWSSecurityGroup, c.releaseImage, c.resolvedRHELStream)
if err != nil {
return nil, fmt.Errorf("failed to generate AWSMachineTemplateSpec: %w", err)
}
Expand Down Expand Up @@ -332,7 +331,7 @@ func (c *CAPI) reconcileAWSMachines(ctx context.Context) error {
return errors.NewAggregate(errs)
}

func (r *NodePoolReconciler) setAWSConditions(_ context.Context, nodePool *hyperv1.NodePool, hcluster *hyperv1.HostedCluster, _ string, releaseImage *releaseinfo.ReleaseImage) error {
func (r *NodePoolReconciler) setAWSConditions(ctx context.Context, nodePool *hyperv1.NodePool, hcluster *hyperv1.HostedCluster, _ string, releaseImage *releaseinfo.ReleaseImage) error {
if nodePool.Spec.Platform.Type == hyperv1.AWSPlatform {
if hcluster.Spec.Platform.AWS == nil {
return fmt.Errorf("the HostedCluster for this NodePool has no .Spec.Platform.AWS, this is unsupported")
Expand Down Expand Up @@ -361,9 +360,21 @@ func (r *NodePoolReconciler) setAWSConditions(_ context.Context, nodePool *hyper
ObservedGeneration: nodePool.Generation,
})
} else {
// Default behavior for Linux/RHCOS AMIs
// TODO(CNTRLPLANE-3553): resolve streamName via GetRHELStream once osImageStream API field is available
ami, err := defaultNodePoolAMI(hcluster.Spec.Platform.AWS.Region, nodePool.Spec.Arch, "", releaseImage)
// Default behavior for Linux/RHCOS AMIs.
// Use the resolved stream (via GetRHELStream) rather than the raw spec field,
// so that the AMI lookup is consistent with the CAPI path.
rhelStream, err := getRHELStream(ctx, r.Client, nodePool, releaseImage)
if err != nil {
SetStatusCondition(&nodePool.Status.Conditions, hyperv1.NodePoolCondition{
Type: hyperv1.NodePoolValidPlatformImageType,
Status: corev1.ConditionFalse,
Reason: hyperv1.NodePoolValidationFailedReason,
Message: fmt.Sprintf("Couldn't resolve RHEL stream for release image %q: %s", nodePool.Spec.Release.Image, err.Error()),
ObservedGeneration: nodePool.Generation,
})
return fmt.Errorf("couldn't resolve RHEL stream for release image: %w", err)
}
ami, err := defaultNodePoolAMI(hcluster.Spec.Platform.AWS.Region, nodePool.Spec.Arch, rhelStream, releaseImage)
if err != nil {
SetStatusCondition(&nodePool.Status.Conditions, hyperv1.NodePoolCondition{
Type: hyperv1.NodePoolValidPlatformImageType,
Expand Down
3 changes: 2 additions & 1 deletion hypershift-operator/controllers/nodepool/aws_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -298,6 +298,7 @@ func TestAWSMachineTemplateSpec(t *testing.T) {
},
true,
releaseImage,
"",
)
if tc.checkError != nil {
tc.checkError(t, err)
Expand Down Expand Up @@ -1340,7 +1341,7 @@ func TestResolveAWSAMI(t *testing.T) {
for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
g := NewWithT(t)
ami, err := resolveAWSAMI(tc.hostedCluster, tc.nodePool, tc.releaseImage)
ami, err := resolveAWSAMI(tc.hostedCluster, tc.nodePool, tc.releaseImage, "")
if tc.expectError {
g.Expect(err).To(HaveOccurred())
} else {
Expand Down
2 changes: 1 addition & 1 deletion hypershift-operator/controllers/nodepool/capi_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2648,7 +2648,7 @@ func TestReconcileMachineDeploymentStatus(t *testing.T) {
expectedReadyConditionSet bool
}{
{
name: "When MachineDeployment is complete, it should update nodePool version and annotations",
name: "When MachineDeployment is complete, it should update nodePool version",
machineDeployment: &capiv1.MachineDeployment{
ObjectMeta: metav1.ObjectMeta{Generation: 1},
Spec: capiv1.MachineDeploymentSpec{
Expand Down
13 changes: 13 additions & 0 deletions hypershift-operator/controllers/nodepool/conditions.go
Original file line number Diff line number Diff line change
Expand Up @@ -368,6 +368,18 @@ func (r *NodePoolReconciler) validMachineConfigCondition(ctx context.Context, no
return &ctrl.Result{}, nil
}

// Validate osImageStream before expensive config generation to fail fast.
if err := validateOSImageStream(ctx, r.Client, nodePool, releaseImage); err != nil {
SetStatusCondition(&nodePool.Status.Conditions, hyperv1.NodePoolCondition{
Type: hyperv1.NodePoolValidMachineConfigConditionType,
Status: corev1.ConditionFalse,
Reason: hyperv1.NodePoolValidationFailedReason,
Message: err.Error(),
ObservedGeneration: nodePool.Generation,
})
return &ctrl.Result{}, fmt.Errorf("failed to validate osImageStream: %w", err)
}

haproxyRawConfig, err := r.generateHAProxyRawConfig(ctx, nodePool, hcluster, releaseImage)
if err != nil {
return &ctrl.Result{}, fmt.Errorf("failed to generate HAProxy raw config: %w", err)
Expand All @@ -385,6 +397,7 @@ func (r *NodePoolReconciler) validMachineConfigCondition(ctx context.Context, no
})
return &ctrl.Result{}, fmt.Errorf("failed to generate config: %w", err)
}

SetStatusCondition(&nodePool.Status.Conditions, hyperv1.NodePoolCondition{
Type: hyperv1.NodePoolValidMachineConfigConditionType,
Status: corev1.ConditionTrue,
Expand Down
52 changes: 50 additions & 2 deletions hypershift-operator/controllers/nodepool/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,8 @@ import (
"k8s.io/apimachinery/pkg/util/yaml"

"sigs.k8s.io/controller-runtime/pkg/client"

"github.com/blang/semver"
)

// ConfigGenerator knows how to:
Expand All @@ -42,6 +44,15 @@ type ConfigGenerator struct {
hostedCluster *hyperv1.HostedCluster
nodePool *hyperv1.NodePool
controlplaneNamespace string
// resolvedRHELStream is the explicit RHEL stream name resolved via
// getRHELStream. Unlike rhelStream (which may be empty after normalization
// for hash stability), this always holds a concrete stream name (e.g.
// "rhel-9", "rhel-10"). It must be used by CAPI/Token paths that pass
// the stream to StreamForName, which requires an explicit value once
// the legacy DefaultStream is removed (OCP > 5.3).
// This field is intentionally outside rolloutConfig because it does not
// participate in the config hash that drives rollouts.
resolvedRHELStream string
*rolloutConfig
}

Expand All @@ -60,6 +71,13 @@ type rolloutConfig struct {
// TODO(alberto): consider let haproxyRawConfig be an implementation detail of ConfigGenerator.
// For now, it's a required input to keep the haproxy business logic and files outside the scope of this initial refactor.
haproxyRawConfig string
// rhelStream is the OS image stream name used for hash computation.
// It is set from spec.osImageStream.Name but normalized: if the explicit
// value matches the version-derived default it is kept empty so that
// setting the default stream does not change the hash.
// Only a non-default stream (e.g. "rhel-10" on a 4.x release) produces
// a non-empty value here and triggers a rollout.
rhelStream string
}

// NewConfigGenerator is the contract to create a new ConfigGenerator.
Expand All @@ -77,16 +95,46 @@ func NewConfigGenerator(ctx context.Context, client client.Client, hostedCluster
return nil, err
}

// Resolve the explicit RHEL stream for image lookup and token plumbing.
resolvedRHELStream, err := getRHELStream(ctx, client, nodePool, releaseImage)
if err != nil {
return nil, fmt.Errorf("failed to resolve RHEL stream: %w", err)
}

// Normalize rhelStream for the config hash: when the resolved stream
// matches the version-derived default, keep it empty so that setting the
// default explicitly does not change the hash and trigger a spurious rollout.
rhelStream := nodePool.Spec.OSImageStream.Name
if rhelStream != "" {
version, err := semver.Parse(releaseImage.Version())
if err != nil {
return nil, fmt.Errorf("failed to parse release image version %q: %w", releaseImage.Version(), err)
}
usesRunc, err := usesRuncRuntime(ctx, client, nodePool)
if err != nil {
return nil, fmt.Errorf("failed to detect container runtime: %w", err)
}
defaultStream, err := GetRHELStream("", version, usesRunc)
if err != nil {
return nil, fmt.Errorf("failed to resolve default RHEL stream: %w", err)
}
if rhelStream == defaultStream {
rhelStream = ""
}
}

cg := &ConfigGenerator{
Client: client,
hostedCluster: hostedCluster,
nodePool: nodePool,
controlplaneNamespace: controlPlaneNamespace,
resolvedRHELStream: resolvedRHELStream,
rolloutConfig: &rolloutConfig{
releaseImage: releaseImage,
pullSecretName: hostedCluster.Spec.PullSecret.Name,
globalConfig: globalConfig,
haproxyRawConfig: haproxyRawConfig,
rhelStream: rhelStream,
},
}

Expand Down Expand Up @@ -118,15 +166,15 @@ func (cg *ConfigGenerator) CompressedAndEncoded() (*bytes.Buffer, error) {
// TODO(alberto): hash the struct directly instead of the string representation field by field.
// This is kept like this for now to contain the scope of the refactor and avoid backward compatibility issues.
func (cg *ConfigGenerator) Hash() string {
return supportutil.HashSimple(cg.mcoRawConfig + cg.releaseImage.Version() + cg.pullSecretName + cg.additionalTrustBundleName + cg.globalConfig)
return supportutil.HashSimple(cg.mcoRawConfig + cg.releaseImage.Version() + cg.pullSecretName + cg.additionalTrustBundleName + cg.globalConfig + cg.rhelStream)
}

// HashWithOutVersion is like Hash but doesn't compute the release version.
// This is only used to signal if a rollout is driven by a new release or by something else.
// TODO(alberto): This was left inconsistent in https://github.com/openshift/hypershift/pull/3795/files. It should also contain cg.globalConfig.
// This is kept like this for now to contain the scope of the refactor and avoid backward compatibility issues.
func (cg *ConfigGenerator) HashWithoutVersion() string {
return supportutil.HashSimple(cg.mcoRawConfig + cg.pullSecretName + cg.additionalTrustBundleName)
return supportutil.HashSimple(cg.mcoRawConfig + cg.pullSecretName + cg.additionalTrustBundleName + cg.rhelStream)
}

func (cg *ConfigGenerator) Version() string {
Expand Down
Loading
Loading