Skip to content

Commit 435aefa

Browse files
committed
Do not override condition in buffers limits translator
Limits translator overrides NotReadyForProvisioning condition, which contains an error message from the previous translators. That hides the error from the user, making it harder to troubleshoot issues.
1 parent 458b438 commit 435aefa

3 files changed

Lines changed: 73 additions & 22 deletions

File tree

cluster-autoscaler/capacitybuffer/testutil/testutil.go

Lines changed: 19 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -96,10 +96,15 @@ func GetBufferStatus(podTempRef *v1.LocalObjectRef, replicas *int32, podTemplate
9696

9797
// GetConditionReady returns a list of conditions with a condition ready and empty message, should be used for testing purposes only
9898
func GetConditionReady() []metav1.Condition {
99+
return GetConditionReadyWithMessage("")
100+
}
101+
102+
// GetConditionReadyWithMessage returns a list of conditions with a condition ready and the specified message
103+
func GetConditionReadyWithMessage(message string) []metav1.Condition {
99104
readyCondition := metav1.Condition{
100105
Type: capacitybuffer.ReadyForProvisioningCondition,
101106
Status: metav1.ConditionTrue,
102-
Message: "",
107+
Message: message,
103108
Reason: capacitybuffer.AttributesSetSuccessfullyReason,
104109
LastTransitionTime: metav1.Time{},
105110
}
@@ -108,10 +113,15 @@ func GetConditionReady() []metav1.Condition {
108113

109114
// GetConditionNotReady returns a list of conditions with a condition not ready and empty message, should be used for testing purposes only
110115
func GetConditionNotReady() []metav1.Condition {
116+
return GetConditionNotReadyWithMessage("")
117+
}
118+
119+
// GetConditionNotReadyWithMessage returns a list of condition with a condition not ready and specified message.
120+
func GetConditionNotReadyWithMessage(message string) []metav1.Condition {
111121
notReadyCondition := metav1.Condition{
112122
Type: capacitybuffer.ReadyForProvisioningCondition,
113123
Status: metav1.ConditionFalse,
114-
Message: "",
124+
Message: message,
115125
Reason: "error",
116126
LastTransitionTime: metav1.Time{},
117127
}
@@ -155,6 +165,13 @@ func WithReplicas(replicas int32) BufferOption {
155165
}
156166
}
157167

168+
// WithLimits sets the Spec.Limits
169+
func WithLimits(limits v1.ResourceList) BufferOption {
170+
return func(b *v1.CapacityBuffer) {
171+
b.Spec.Limits = &limits
172+
}
173+
}
174+
158175
// WithStatusPodTemplateRef sets the Status.PodTemplateRef
159176
func WithStatusPodTemplateRef(name string) BufferOption {
160177
return func(b *v1.CapacityBuffer) {

cluster-autoscaler/capacitybuffer/translators/resource_limits_translator.go

Lines changed: 12 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -17,10 +17,13 @@ limitations under the License.
1717
package translator
1818

1919
import (
20+
"errors"
2021
"fmt"
2122

2223
corev1 "k8s.io/api/core/v1"
24+
"k8s.io/apimachinery/pkg/api/meta"
2325
api_v1 "k8s.io/autoscaler/cluster-autoscaler/apis/capacitybuffer/autoscaling.x-k8s.io/v1beta1"
26+
"k8s.io/autoscaler/cluster-autoscaler/capacitybuffer"
2427
cbclient "k8s.io/autoscaler/cluster-autoscaler/capacitybuffer/client"
2528
"k8s.io/autoscaler/cluster-autoscaler/capacitybuffer/common"
2629
)
@@ -39,36 +42,35 @@ func NewResourceLimitsTranslator(client *cbclient.CapacityBufferClient) *resourc
3942

4043
// Translate translates buffers processors into pod capacity.
4144
func (t *resourceLimitsTranslator) Translate(buffers []*api_v1.CapacityBuffer) []error {
42-
errors := []error{}
45+
var errs []error
4346
for _, buffer := range buffers {
4447
if isResourcesLimitsDefinedInBuffer(buffer) {
45-
if buffer.Status.PodTemplateRef == nil {
46-
err := fmt.Errorf("Can't get pod template, PodTemplateRef is nil")
47-
common.SetBufferAsNotReadyForProvisioning(buffer, nil, nil, nil, buffer.Spec.ProvisioningStrategy, err)
48-
errors = append(errors, err)
48+
if buffer.Status.PodTemplateRef == nil || buffer.Status.PodTemplateGeneration == nil || meta.IsStatusConditionFalse(buffer.Status.Conditions, capacitybuffer.ReadyForProvisioningCondition) {
49+
// that means that previous translators failed to resolve the pod template, we don't want to override
50+
// the condition here in order to keep the error message from previous translators.
4951
continue
5052
}
5153
podTemplate, err := t.client.GetPodTemplate(buffer.Namespace, buffer.Status.PodTemplateRef.Name)
5254
if err != nil {
5355
err = fmt.Errorf("Couldn't get pod template, error: %v", err)
5456
common.SetBufferAsNotReadyForProvisioning(buffer, nil, nil, nil, buffer.Spec.ProvisioningStrategy, err)
55-
errors = append(errors, err)
57+
errs = append(errs, err)
5658
continue
5759
}
5860
numberOfPods := limitNumberOfPodsForResource(podTemplate, *buffer.Spec.Limits)
5961
if numberOfPods == nil {
60-
err := fmt.Errorf("Couldn't calculate number of pods for buffer %v based on provided resource limits %v", buffer.Name, *buffer.Spec.Limits)
62+
err := errors.New("couldn't calculate number of pods for buffer based on provided resource limits. Check if the pod template requests at least one limited resource")
6163
common.SetBufferAsNotReadyForProvisioning(buffer, nil, nil, nil, buffer.Spec.ProvisioningStrategy, err)
62-
errors = append(errors, err)
64+
// this error is expected when the buffer is misconfigured, so we don't want it to trigger requeue
6365
continue
6466
}
6567
if buffer.Status.Replicas != nil {
6668
numberOfPods = pointerToInt32(min(*buffer.Status.Replicas, *numberOfPods))
6769
}
68-
common.SetBufferAsReadyForProvisioning(buffer, buffer.Status.PodTemplateRef, &podTemplate.Generation, numberOfPods, buffer.Spec.ProvisioningStrategy)
70+
common.SetBufferAsReadyForProvisioning(buffer, buffer.Status.PodTemplateRef, buffer.Status.PodTemplateGeneration, numberOfPods, buffer.Spec.ProvisioningStrategy)
6971
}
7072
}
71-
return errors
73+
return errs
7274
}
7375

7476
func limitNumberOfPodsForResource(podTemplate *corev1.PodTemplate, limits api_v1.ResourceList) *int32 {

cluster-autoscaler/capacitybuffer/translators/resource_limits_translator_test.go

Lines changed: 42 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -19,15 +19,17 @@ package translator
1919
import (
2020
"testing"
2121

22+
"github.com/google/go-cmp/cmp"
23+
"github.com/google/go-cmp/cmp/cmpopts"
24+
"github.com/stretchr/testify/assert"
2225
corev1 "k8s.io/api/core/v1"
2326
"k8s.io/apimachinery/pkg/api/resource"
2427
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
25-
fakeClient "k8s.io/client-go/kubernetes/fake"
26-
27-
"github.com/stretchr/testify/assert"
2828
v1 "k8s.io/autoscaler/cluster-autoscaler/apis/capacitybuffer/autoscaling.x-k8s.io/v1beta1"
2929
cbclient "k8s.io/autoscaler/cluster-autoscaler/capacitybuffer/client"
3030
"k8s.io/autoscaler/cluster-autoscaler/capacitybuffer/testutil"
31+
fakeClient "k8s.io/client-go/kubernetes/fake"
32+
"k8s.io/utils/ptr"
3133
)
3234

3335
func TestResourceLimitsTranslator(t *testing.T) {
@@ -46,6 +48,7 @@ func TestResourceLimitsTranslator(t *testing.T) {
4648
})
4749
fakeClient := fakeClient.NewSimpleClientset(podTemp4mem100cpu, podTemp8mem200cpu, podTemp4gpu)
4850
fakeCapacityBuffersClient, _ := cbclient.NewCapacityBufferClient(nil, fakeClient, nil, nil, nil, nil, nil, nil, nil, nil, nil)
51+
noResourcesSetMessage := "couldn't calculate number of pods for buffer based on provided resource limits. Check if the pod template requests at least one limited resource"
4952

5053
tests := []struct {
5154
name string
@@ -71,9 +74,9 @@ func TestResourceLimitsTranslator(t *testing.T) {
7174
}),
7275
},
7376
expectedStatus: []*v1.CapacityBufferStatus{
74-
testutil.GetBufferStatus(nil, nil, nil, &testutil.ProvisioningStrategy, testutil.GetConditionNotReady()),
77+
testutil.GetBufferStatus(nil, nil, nil, &testutil.ProvisioningStrategy, nil),
7578
},
76-
expectedNumberOfErrors: 1,
79+
expectedNumberOfErrors: 0,
7780
},
7881
{
7982
name: "Limits exist and no replicas, buffer filtered",
@@ -185,27 +188,56 @@ func TestResourceLimitsTranslator(t *testing.T) {
185188
}),
186189
},
187190
expectedStatus: []*v1.CapacityBufferStatus{
188-
testutil.GetBufferStatus(nil, nil, nil, &testutil.ProvisioningStrategy, testutil.GetConditionNotReady()),
191+
testutil.GetBufferStatus(
192+
nil, nil, nil, &testutil.ProvisioningStrategy, testutil.GetConditionNotReadyWithMessage(noResourcesSetMessage)),
189193
},
190-
expectedNumberOfErrors: 1,
194+
expectedNumberOfErrors: 0,
195+
},
196+
{
197+
name: "conditions are not overridden if buffer is not ready",
198+
buffers: []*v1.CapacityBuffer{
199+
testutil.NewBuffer(
200+
testutil.WithPodTemplateRef(podTemp4mem100cpu.Name),
201+
testutil.WithLimits(v1.ResourceList{
202+
"nvidia.com/gpu": resource.MustParse("100m"),
203+
}),
204+
testutil.WithActiveProvisioningStrategy(),
205+
func(buffer *v1.CapacityBuffer) {
206+
buffer.Status.Conditions = testutil.GetConditionNotReadyWithMessage("test error")
207+
},
208+
),
209+
},
210+
expectedStatus: []*v1.CapacityBufferStatus{
211+
testutil.GetBufferStatus(nil, nil, nil, nil, testutil.GetConditionNotReadyWithMessage("test error")),
212+
},
213+
expectedNumberOfErrors: 0,
191214
},
192215
}
193216
for _, test := range tests {
194217
t.Run(test.name, func(t *testing.T) {
195218
translator := NewResourceLimitsTranslator(fakeCapacityBuffersClient)
196219
errors := translator.Translate(test.buffers)
197220
assert.Equal(t, len(errors), test.expectedNumberOfErrors)
198-
assert.ElementsMatch(t, test.expectedStatus, testutil.SanitizeBuffersStatus(test.buffers))
221+
for i, buffer := range test.buffers {
222+
wantStatus := test.expectedStatus[i]
223+
if diff := cmp.Diff(wantStatus, &buffer.Status, cmpopts.IgnoreFields(metav1.Condition{}, "LastTransitionTime")); diff != "" {
224+
t.Errorf("buffer status mismatch (-want +got):\n%s", diff)
225+
}
226+
}
199227
})
200228
}
201229
}
202230

203231
func getTestBufferWithLimits(podTemplateRef *v1.LocalObjectRef, replicas *int32, limits *v1.ResourceList) *v1.CapacityBuffer {
204-
return testutil.GetBuffer(&testutil.ProvisioningStrategy, nil, nil, podTemplateRef, replicas, nil, nil, limits)
232+
var podTemplateGeneration *int64
233+
if podTemplateRef != nil {
234+
podTemplateGeneration = ptr.To[int64](1)
235+
}
236+
return testutil.GetBuffer(&testutil.ProvisioningStrategy, nil, nil, podTemplateRef, replicas, podTemplateGeneration, nil, limits)
205237
}
206238

207239
func getTestBufferStatusWithReplicas(podTemplateRef *v1.LocalObjectRef, replicas int32) *v1.CapacityBufferStatus {
208-
return testutil.GetBufferStatus(podTemplateRef, &replicas, pointerToInt64(1), &testutil.ProvisioningStrategy, testutil.GetConditionReady())
240+
return testutil.GetBufferStatus(podTemplateRef, &replicas, ptr.To[int64](1), &testutil.ProvisioningStrategy, testutil.GetConditionReadyWithMessage("ready"))
209241
}
210242

211243
func getPodTemplateWithResources(name string, resources corev1.ResourceList) *corev1.PodTemplate {

0 commit comments

Comments
 (0)