From 4238aa18870b77bedde202b49d5e96c0d5759d30 Mon Sep 17 00:00:00 2001 From: Arnob Kumar Saha Date: Fri, 12 Jun 2026 00:52:04 +0600 Subject: [PATCH] Introduce new sidekick phase: degraded, for pod failures Signed-off-by: Arnob Kumar Saha --- apis/apps/v1alpha1/sidekick_types.go | 5 ++++- crds/apps.k8s.appscode.com_sidekicks.yaml | 1 + pkg/controllers/apps/distributed.go | 23 ++++++++++++++++++++++ pkg/controllers/apps/sidekick.go | 24 +++++++++++++++++++++++ 4 files changed, 52 insertions(+), 1 deletion(-) diff --git a/apis/apps/v1alpha1/sidekick_types.go b/apis/apps/v1alpha1/sidekick_types.go index cfcad6c0..28de7558 100644 --- a/apis/apps/v1alpha1/sidekick_types.go +++ b/apis/apps/v1alpha1/sidekick_types.go @@ -40,7 +40,7 @@ const ( PodSelectionPolicyLast LeaderSelectionPolicy = "Last" ) -// +kubebuilder:validation:Enum=Pending;Current;Failed;Succeeded +// +kubebuilder:validation:Enum=Pending;Current;Failed;Succeeded;Degraded type SideKickPhase string const ( @@ -48,6 +48,9 @@ const ( SideKickPhaseFailed SideKickPhase = "Failed" SidekickPhaseSucceeded SideKickPhase = "Succeeded" SideKickPhasePending SideKickPhase = "Pending" + // SideKickPhaseDegraded means the sidekick pod is expected to run but is + // currently missing, pending, failed (retryable) or has non-running containers. + SideKickPhaseDegraded SideKickPhase = "Degraded" ) type LeaderSpec struct { diff --git a/crds/apps.k8s.appscode.com_sidekicks.yaml b/crds/apps.k8s.appscode.com_sidekicks.yaml index 07eea674..95c8726f 100644 --- a/crds/apps.k8s.appscode.com_sidekicks.yaml +++ b/crds/apps.k8s.appscode.com_sidekicks.yaml @@ -8098,6 +8098,7 @@ spec: - Current - Failed - Succeeded + - Degraded type: string pod: description: PodPhase is a label for the condition of a pod at the diff --git a/pkg/controllers/apps/distributed.go b/pkg/controllers/apps/distributed.go index 41ba36a9..08bd61cd 100644 --- a/pkg/controllers/apps/distributed.go +++ b/pkg/controllers/apps/distributed.go @@ -652,6 +652,9 @@ func (r *SidekickReconciler) getDistributedSidekickPhase(sidekick *appsv1alpha1. // if restartPolicy is always, we will always try to keep a pod running // if pod.status.phase == failed, then we will start a new pod if sidekick.Spec.RestartPolicy == corev1.RestartPolicyAlways { + if !isDistributedSidekickPodRunning(mw) { + return appsv1alpha1.SideKickPhaseDegraded + } return appsv1alpha1.SideKickPhaseCurrent } if sidekick.Status.Phase == appsv1alpha1.SidekickPhaseSucceeded { @@ -683,9 +686,29 @@ func (r *SidekickReconciler) getDistributedSidekickPhase(sidekick *appsv1alpha1. if backOffCounts > *sidekick.Spec.BackoffLimit { return appsv1alpha1.SideKickPhaseFailed } + if !isDistributedSidekickPodRunning(mw) { + return appsv1alpha1.SideKickPhaseDegraded + } return appsv1alpha1.SideKickPhaseCurrent } +// isDistributedSidekickPodRunning reports whether the ManifestWork feedback +// shows the sidekick pod in Running phase. Container-level state is not +// available via ManifestWork feedback, so pod phase is the only signal. +func isDistributedSidekickPodRunning(mw *apiworkv1.ManifestWork) bool { + if mw == nil || mw.DeletionTimestamp != nil { + return false + } + for _, manifestStatus := range mw.Status.ResourceStatus.Manifests { + for _, value := range manifestStatus.StatusFeedbacks.Values { + if value.Name == "PodPhase" && value.Value.String != nil { + return *value.Value.String == string(corev1.PodRunning) + } + } + } + return false +} + func (r *SidekickReconciler) getDistributedPodNamespace(ctx context.Context, mwName string) (string, error) { // Get all namespaces var err error diff --git a/pkg/controllers/apps/sidekick.go b/pkg/controllers/apps/sidekick.go index b7c4ae22..eec47824 100644 --- a/pkg/controllers/apps/sidekick.go +++ b/pkg/controllers/apps/sidekick.go @@ -123,6 +123,9 @@ func (r *SidekickReconciler) getSidekickPhase(sidekick *appsv1alpha1.Sidekick, p // if pod.status.phase == failed, then we will start a new pod // TODO: which of these two should come first? if sidekick.Spec.RestartPolicy == corev1.RestartPolicyAlways { + if !isSidekickPodRunning(pod) { + return appsv1alpha1.SideKickPhaseDegraded + } return appsv1alpha1.SideKickPhaseCurrent } if sidekick.Status.Phase == appsv1alpha1.SidekickPhaseSucceeded { @@ -144,9 +147,30 @@ func (r *SidekickReconciler) getSidekickPhase(sidekick *appsv1alpha1.Sidekick, p if backOffCounts > *sidekick.Spec.BackoffLimit { return appsv1alpha1.SideKickPhaseFailed } + if !isSidekickPodRunning(pod) { + return appsv1alpha1.SideKickPhaseDegraded + } return appsv1alpha1.SideKickPhaseCurrent } +// isSidekickPodRunning reports whether the sidekick pod exists, is not being +// deleted, is in Running phase and all its containers are actually running +// (a pod stuck in CrashLoopBackOff stays in Running phase with waiting containers). +func isSidekickPodRunning(pod *corev1.Pod) bool { + if pod == nil || pod.GetUID() == "" || pod.DeletionTimestamp != nil { + return false + } + if pod.Status.Phase != corev1.PodRunning { + return false + } + for _, cs := range pod.Status.ContainerStatuses { + if cs.State.Running == nil { + return false + } + } + return true +} + func (r *SidekickReconciler) updateSidekickStatus(ctx context.Context, sidekick *appsv1alpha1.Sidekick) error { _, err := cu.PatchStatus(ctx, r.Client, sidekick, func(obj client.Object) client.Object { sk := obj.(*appsv1alpha1.Sidekick)