From 369cdc33d42a365b3e4edb9a555ef659179d3c5f Mon Sep 17 00:00:00 2001 From: Linying Assad Date: Mon, 25 May 2026 16:29:29 +0800 Subject: [PATCH 1/2] Support DNS-ready graceful shutdown for Doris pods --- pkg/common/utils/resource/pod.go | 128 +++++++++++++----- pkg/common/utils/resource/pod_test.go | 22 +++ pkg/controller/sub_controller/be/pod.go | 8 +- pkg/controller/sub_controller/be/pod_test.go | 9 +- pkg/controller/sub_controller/cn/pod.go | 1 + pkg/controller/sub_controller/cn/pod_test.go | 60 ++++++++ .../computegroups/statefulset.go | 6 +- .../computegroups/statefulset_test.go | 52 +++++++ .../disaggregated_fe/statefulset.go | 3 + .../disaggregated_fe/statefulset_test.go | 53 ++++++++ pkg/controller/sub_controller/fe/pod.go | 1 + pkg/controller/sub_controller/fe/pod_test.go | 9 +- 12 files changed, 308 insertions(+), 44 deletions(-) create mode 100644 pkg/controller/sub_controller/cn/pod_test.go create mode 100644 pkg/controller/sub_controller/disaggregated_cluster/computegroups/statefulset_test.go create mode 100644 pkg/controller/sub_controller/disaggregated_cluster/disaggregated_fe/statefulset_test.go diff --git a/pkg/common/utils/resource/pod.go b/pkg/common/utils/resource/pod.go index 4cac6118..50a0d859 100644 --- a/pkg/common/utils/resource/pod.go +++ b/pkg/common/utils/resource/pod.go @@ -56,23 +56,27 @@ const ( BROKER_PRESTOP = "/opt/apache-doris/broker_prestop.sh" //keys for pod env variables - POD_NAME = "POD_NAME" - POD_IP = "POD_IP" - HOST_IP = "HOST_IP" - POD_NAMESPACE = "POD_NAMESPACE" - ADMIN_USER = "USER" - ADMIN_PASSWD = "PASSWD" - DORIS_ROOT_KEY = "DORIS_ROOT" + POD_NAME = "POD_NAME" + POD_IP = "POD_IP" + HOST_IP = "HOST_IP" + POD_NAMESPACE = "POD_NAMESPACE" + ADMIN_USER = "USER" + ADMIN_PASSWD = "PASSWD" + DORIS_ROOT_KEY = "DORIS_ROOT" + DNS_READY_TIMEOUT = "DNS_READY_TIMEOUT" + DNS_READY_INTERVAL = "DNS_READY_INTERVAL" KRB5_MOUNT_PATH = "KRB5_MOUNT_PATH" KRB5_CONFIG = "KRB5_CONFIG" KEYTAB_MOUNT_PATH = "KEYTAB_MOUNT_PATH" KEYTAB_FINAL_USED_PATH = "KEYTAB_FINAL_USED_PATH" - DEFAULT_ADMIN_USER = "root" - DEFAULT_ROOT_PATH = "/opt/apache-doris" - POD_INFO_PATH = "/etc/podinfo" - POD_INFO_VOLUME_NAME = "podinfo" + DEFAULT_ADMIN_USER = "root" + DEFAULT_ROOT_PATH = "/opt/apache-doris" + DEFAULT_DNS_READY_TIMEOUT = "120" + DEFAULT_DNS_READY_INTERVAL = "2" + POD_INFO_PATH = "/etc/podinfo" + POD_INFO_VOLUME_NAME = "podinfo" NODE_TOPOLOGYKEY = "kubernetes.io/hostname" @@ -90,6 +94,9 @@ const ( DISAGGREGATED_FE_MAIN_CONTAINER_NAME = "fe" DISAGGREGATED_BE_MAIN_CONTAINER_NAME = "compute" DISAGGREGATED_MS_MAIN_CONTAINER_NAME = "metaservice" + + DEFAULT_FE_TERMINATION_GRACE_PERIOD_SECONDS int64 = 330 + DEFAULT_BE_TERMINATION_GRACE_PERIOD_SECONDS int64 = 200 ) type ProbeType string @@ -100,6 +107,39 @@ var ( Exec ProbeType = "exec" ) +func buildFQDNReadinessExecProbe(enableTLS string, config map[string]interface{}, port int32, path string, policy *v1.ReadinessProbePolicy) *corev1.Probe { + host := "$(hostname -f)" + var curlCmd string + if enableTLS == "true" { + caCert := GetString(config, TLS_CA_CERTIFICATE_PATH_KEY) + clientCert := GetString(config, TLS_CERTIFICATE_PATH_KEY) + clientKey := GetString(config, TLS_PRIVATE_KEY_PATH_KEY) + curlCmd = fmt.Sprintf( + "host=%s; (getent hosts \"$host\" >/dev/null 2>&1 || nslookup \"$host\" >/dev/null 2>&1) && curl --fail --silent --output /dev/null --cacert %s --cert %s --key %s https://$host:%d%s", + host, caCert, clientCert, clientKey, port, path, + ) + } else { + curlCmd = fmt.Sprintf( + "host=%s; (getent hosts \"$host\" >/dev/null 2>&1 || nslookup \"$host\" >/dev/null 2>&1) && curl --fail --silent --output /dev/null http://$host:%d%s", + host, port, path, + ) + } + + probe := &corev1.Probe{ + PeriodSeconds: 5, + FailureThreshold: 3, + ProbeHandler: corev1.ProbeHandler{ + Exec: &corev1.ExecAction{ + Command: []string{"bash", "-c", curlCmd}, + }, + }, + } + if policy != nil { + applyReadinessProbePolicy(probe, policy.TimeoutSeconds, policy.FailureThreshold, policy.PeriodSeconds) + } + return probe +} + func NewPodTemplateSpec(dcr *v1.DorisCluster, config map[string]interface{}, componentType v1.ComponentType) corev1.PodTemplateSpec { spec := getBaseSpecFromCluster(dcr, componentType) var volumes []corev1.Volume @@ -439,7 +479,7 @@ func NewBaseMainContainer(dcr *v1.DorisCluster, config map[string]interface{}, c skipInit = dcr.Spec.BeSpec.SkipDefaultSystemInit case v1.Component_CN: spec = dcr.Spec.CnSpec.BaseSpec - skipInit = dcr.Spec.BeSpec.SkipDefaultSystemInit + skipInit = dcr.Spec.CnSpec.SkipDefaultSystemInit case v1.Component_Broker: spec = dcr.Spec.BrokerSpec.BaseSpec default: @@ -550,27 +590,31 @@ func NewBaseMainContainer(dcr *v1.DorisCluster, config map[string]interface{}, c // client certificates. Kubernetes HTTPGet probes cannot provide client certs, so we use // an Exec probe with curl instead. This is compatible with both TLS and mTLS modes. enableTLS := GetString(config, ENABLE_TLS_KEY) - if enableTLS == "true" && c.ReadinessProbe != nil && c.ReadinessProbe.HTTPGet != nil { - caCert := GetString(config, TLS_CA_CERTIFICATE_PATH_KEY) - clientCert := GetString(config, TLS_CERTIFICATE_PATH_KEY) - clientKey := GetString(config, TLS_PRIVATE_KEY_PATH_KEY) - curlCmd := fmt.Sprintf( - "curl --fail --silent --output /dev/null --cacert %s --cert %s --key %s https://localhost:%d%s", - caCert, clientCert, clientKey, readnessPort, health_api_path, - ) - tlsProbe := &corev1.Probe{ - PeriodSeconds: 5, - FailureThreshold: 3, - ProbeHandler: corev1.ProbeHandler{ - Exec: &corev1.ExecAction{ - Command: []string{"bash", "-c", curlCmd}, + if c.ReadinessProbe != nil && c.ReadinessProbe.HTTPGet != nil { + if GetStartMode(config) == START_MODEL_FQDN && (componentType == v1.Component_FE || componentType == v1.Component_BE || componentType == v1.Component_CN) { + c.ReadinessProbe = buildFQDNReadinessExecProbe(enableTLS, config, readnessPort, health_api_path, spec.ReadinessProbePolicy) + } else if enableTLS == "true" { + caCert := GetString(config, TLS_CA_CERTIFICATE_PATH_KEY) + clientCert := GetString(config, TLS_CERTIFICATE_PATH_KEY) + clientKey := GetString(config, TLS_PRIVATE_KEY_PATH_KEY) + curlCmd := fmt.Sprintf( + "curl --fail --silent --output /dev/null --cacert %s --cert %s --key %s https://localhost:%d%s", + caCert, clientCert, clientKey, readnessPort, health_api_path, + ) + tlsProbe := &corev1.Probe{ + PeriodSeconds: 5, + FailureThreshold: 3, + ProbeHandler: corev1.ProbeHandler{ + Exec: &corev1.ExecAction{ + Command: []string{"bash", "-c", curlCmd}, + }, }, - }, - } - if spec.ReadinessProbePolicy != nil { - applyReadinessProbePolicy(tlsProbe, spec.ReadinessProbePolicy.TimeoutSeconds, spec.ReadinessProbePolicy.FailureThreshold, spec.ReadinessProbePolicy.PeriodSeconds) + } + if spec.ReadinessProbePolicy != nil { + applyReadinessProbePolicy(tlsProbe, spec.ReadinessProbePolicy.TimeoutSeconds, spec.ReadinessProbePolicy.FailureThreshold, spec.ReadinessProbePolicy.PeriodSeconds) + } + c.ReadinessProbe = tlsProbe } - c.ReadinessProbe = tlsProbe } c.Lifecycle = lifeCycle(prestopScript) @@ -694,6 +738,14 @@ func buildEnvFromPod() []corev1.EnvVar { Name: config_env_name, Value: config_env_path, }, + { + Name: DNS_READY_TIMEOUT, + Value: DEFAULT_DNS_READY_TIMEOUT, + }, + { + Name: DNS_READY_INTERVAL, + Value: DEFAULT_DNS_READY_INTERVAL, + }, } } @@ -1033,8 +1085,8 @@ func ReadinessProbe(port int32, path string, commands []string, pt ProbeType, po // StartupProbe returns a startup probe. func startupProbe(port, timeout int32, path string, commands []string, pt ProbeType) *corev1.Probe { var failurethreshold int32 - if timeout < 300 { - timeout = 300 + if timeout < 360 { + timeout = 360 } failurethreshold = timeout / 5 @@ -1119,6 +1171,16 @@ func LifeCycleWithPreStopScript(lc *corev1.Lifecycle, preStopScript string) *cor return lc } +func AddTerminationGracePeriodSeconds(tplSpec *corev1.PodTemplateSpec, config map[string]interface{}, defaultSeconds int64) { + seconds := GetTerminationGracePeriodSeconds(config) + if seconds <= 0 { + seconds = defaultSeconds + } + if seconds > 0 { + tplSpec.Spec.TerminationGracePeriodSeconds = &seconds + } +} + // getProbe describe a health check. func getProbe(port int32, path string, commands []string, pt ProbeType) corev1.ProbeHandler { switch pt { diff --git a/pkg/common/utils/resource/pod_test.go b/pkg/common/utils/resource/pod_test.go index dd589387..33164679 100644 --- a/pkg/common/utils/resource/pod_test.go +++ b/pkg/common/utils/resource/pod_test.go @@ -144,6 +144,28 @@ func Test_LifeCycleWithPreStopScript(t *testing.T) { } } +func Test_AddTerminationGracePeriodSeconds_Default(t *testing.T) { + pts := &corev1.PodTemplateSpec{} + AddTerminationGracePeriodSeconds(pts, map[string]interface{}{}, DEFAULT_BE_TERMINATION_GRACE_PERIOD_SECONDS) + if pts.Spec.TerminationGracePeriodSeconds == nil { + t.Fatalf("expected terminationGracePeriodSeconds") + } + if *pts.Spec.TerminationGracePeriodSeconds != DEFAULT_BE_TERMINATION_GRACE_PERIOD_SECONDS { + t.Errorf("expected default terminationGracePeriodSeconds=%d, got %d", DEFAULT_BE_TERMINATION_GRACE_PERIOD_SECONDS, *pts.Spec.TerminationGracePeriodSeconds) + } +} + +func Test_AddTerminationGracePeriodSeconds_ConfigOverride(t *testing.T) { + pts := &corev1.PodTemplateSpec{} + AddTerminationGracePeriodSeconds(pts, map[string]interface{}{GRACE_SHUTDOWN_WAIT_SECONDS: "60"}, DEFAULT_BE_TERMINATION_GRACE_PERIOD_SECONDS) + if pts.Spec.TerminationGracePeriodSeconds == nil { + t.Fatalf("expected terminationGracePeriodSeconds") + } + if *pts.Spec.TerminationGracePeriodSeconds != 60 { + t.Errorf("expected configured terminationGracePeriodSeconds=60, got %d", *pts.Spec.TerminationGracePeriodSeconds) + } +} + func Test_BuildDisaggregatedProbe(t *testing.T) { c := &corev1.Container{} cs := &dv1.CommonSpec{ diff --git a/pkg/controller/sub_controller/be/pod.go b/pkg/controller/sub_controller/be/pod.go index fb58297b..80f9ab43 100644 --- a/pkg/controller/sub_controller/be/pod.go +++ b/pkg/controller/sub_controller/be/pod.go @@ -153,13 +153,7 @@ func (be *Controller) beContainer(dcr *v1.DorisCluster) corev1.Container { return c } -// Only configure the TerminationGracePeriodSeconds when grace_shutdown_wait_seconds configured in be.conf func (be *Controller) addTerminationGracePeriodSeconds(dcr *v1.DorisCluster, tplSpec *corev1.PodTemplateSpec) { config, _ := be.GetConfig(context.Background(), &dcr.Spec.BeSpec.ConfigMapInfo, dcr.Namespace, v1.Component_BE) - seconds := resource.GetTerminationGracePeriodSeconds(config) - if seconds > 0 { - tplSpec.Spec.TerminationGracePeriodSeconds = &seconds - return - } - return + resource.AddTerminationGracePeriodSeconds(tplSpec, config, resource.DEFAULT_BE_TERMINATION_GRACE_PERIOD_SECONDS) } diff --git a/pkg/controller/sub_controller/be/pod_test.go b/pkg/controller/sub_controller/be/pod_test.go index 054afd3a..aa6f25d9 100644 --- a/pkg/controller/sub_controller/be/pod_test.go +++ b/pkg/controller/sub_controller/be/pod_test.go @@ -19,6 +19,7 @@ package be import ( "encoding/json" v1 "github.com/apache/doris-operator/api/doris/v1" + "github.com/apache/doris-operator/pkg/common/utils/resource" "testing" ) @@ -57,7 +58,13 @@ func Test_buildBEPodTemplateSpec(t *testing.T) { } be := &Controller{} - be.buildBEPodTemplateSpec(dcr, map[string]interface{}{}) + pts := be.buildBEPodTemplateSpec(dcr, map[string]interface{}{}) + if pts.Spec.TerminationGracePeriodSeconds == nil { + t.Fatalf("expected BE terminationGracePeriodSeconds") + } + if *pts.Spec.TerminationGracePeriodSeconds != resource.DEFAULT_BE_TERMINATION_GRACE_PERIOD_SECONDS { + t.Errorf("expected BE terminationGracePeriodSeconds=%d, got %d", resource.DEFAULT_BE_TERMINATION_GRACE_PERIOD_SECONDS, *pts.Spec.TerminationGracePeriodSeconds) + } } func Test_buildBEPodTemplateSpecWithFEAffinity(t *testing.T) { diff --git a/pkg/controller/sub_controller/cn/pod.go b/pkg/controller/sub_controller/cn/pod.go index 70dc8e92..3401263a 100644 --- a/pkg/controller/sub_controller/cn/pod.go +++ b/pkg/controller/sub_controller/cn/pod.go @@ -28,6 +28,7 @@ import ( func (cn *Controller) buildCnPodTemplateSpec(dcr *v1.DorisCluster, config map[string]interface{}) corev1.PodTemplateSpec { podTemplateSpec := resource.NewPodTemplateSpec(dcr, config, v1.Component_CN) + resource.AddTerminationGracePeriodSeconds(&podTemplateSpec, config, resource.DEFAULT_BE_TERMINATION_GRACE_PERIOD_SECONDS) var containers []corev1.Container containers = append(containers, podTemplateSpec.Spec.Containers...) cnContainer := cn.cnContainer(dcr) diff --git a/pkg/controller/sub_controller/cn/pod_test.go b/pkg/controller/sub_controller/cn/pod_test.go new file mode 100644 index 00000000..28b170cb --- /dev/null +++ b/pkg/controller/sub_controller/cn/pod_test.go @@ -0,0 +1,60 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. +package cn + +import ( + "encoding/json" + "testing" + + v1 "github.com/apache/doris-operator/api/doris/v1" + "github.com/apache/doris-operator/pkg/common/utils/resource" +) + +func Test_buildCnPodTemplateSpec(t *testing.T) { + dcrJsonStr := `{ + "apiVersion": "doris.selectdb.com/v1", + "kind": "DorisCluster", + "metadata": { + "name": "doriscluster-sample", + "namespace": "default" + }, + "spec": { + "cnSpec": { + "image": "selectdb/doris.be-ubuntu:2.1.6", + "replicas": 3 + }, + "feSpec": { + "image": "selectdb/doris.fe-ubuntu:2.1.6", + "replicas": 3 + } + } +}` + + dcr := &v1.DorisCluster{} + if err := json.Unmarshal([]byte(dcrJsonStr), dcr); err != nil { + t.Errorf("the buildCnPodTemplateSpec unmarshal failed, err=%s", err.Error()) + } + + cn := &Controller{} + pts := cn.buildCnPodTemplateSpec(dcr, map[string]interface{}{}) + if pts.Spec.TerminationGracePeriodSeconds == nil { + t.Fatalf("expected CN terminationGracePeriodSeconds") + } + if *pts.Spec.TerminationGracePeriodSeconds != resource.DEFAULT_BE_TERMINATION_GRACE_PERIOD_SECONDS { + t.Errorf("expected CN terminationGracePeriodSeconds=%d, got %d", resource.DEFAULT_BE_TERMINATION_GRACE_PERIOD_SECONDS, *pts.Spec.TerminationGracePeriodSeconds) + } +} diff --git a/pkg/controller/sub_controller/disaggregated_cluster/computegroups/statefulset.go b/pkg/controller/sub_controller/disaggregated_cluster/computegroups/statefulset.go index 1d8dc359..527537b1 100644 --- a/pkg/controller/sub_controller/disaggregated_cluster/computegroups/statefulset.go +++ b/pkg/controller/sub_controller/disaggregated_cluster/computegroups/statefulset.go @@ -85,6 +85,7 @@ func (dcgs *DisaggregatedComputeGroupsController) NewStatefulset(ddc *dv1.DorisD func (dcgs *DisaggregatedComputeGroupsController) NewPodTemplateSpec(ddc *dv1.DorisDisaggregatedCluster, selector map[string]string, cvs map[string]interface{}, cg *dv1.ComputeGroup) corev1.PodTemplateSpec { pts := resource.NewPodTemplateSpecWithCommonSpec(cg.SkipDefaultSystemInit, &cg.CommonSpec, dv1.DisaggregatedBE) + resource.AddTerminationGracePeriodSeconds(&pts, cvs, resource.DEFAULT_BE_TERMINATION_GRACE_PERIOD_SECONDS) //pod template metadata. func() { l := (resource.Labels)(selector) @@ -143,7 +144,7 @@ func (dcgs *DisaggregatedComputeGroupsController) NewCGContainer(ddc *dv1.DorisD c.Ports = resource.GetDisaggregatedContainerPorts(cvs, dv1.DisaggregatedBE) c.Env = cg.CommonSpec.EnvVars c.Env = append(c.Env, resource.GetPodDefaultEnv()...) - c.Env = append(c.Env, dcgs.newSpecificEnvs(ddc, cg)...) + c.Env = append(c.Env, dcgs.newSpecificEnvs(ddc, cg, cvs)...) if cg.SkipDefaultSystemInit { // Only works when the doris version is higher than 2.1.8 or 3.0.4 @@ -182,7 +183,7 @@ func (dcgs *DisaggregatedComputeGroupsController) NewCGContainer(ddc *dv1.DorisD } // add specific envs for be, the env will used by be_disaggregated_entrypoint script. -func (dcgs *DisaggregatedComputeGroupsController) newSpecificEnvs(ddc *dv1.DorisDisaggregatedCluster, cg *dv1.ComputeGroup) []corev1.EnvVar { +func (dcgs *DisaggregatedComputeGroupsController) newSpecificEnvs(ddc *dv1.DorisDisaggregatedCluster, cg *dv1.ComputeGroup, cvs map[string]interface{}) []corev1.EnvVar { var cgEnvs []corev1.EnvVar stsName := ddc.GetCGStatefulsetName(cg) @@ -195,6 +196,7 @@ func (dcgs *DisaggregatedComputeGroupsController) newSpecificEnvs(ddc *dv1.Doris cgEnvs = append(cgEnvs, corev1.EnvVar{Name: resource.STATEFULSET_NAME, Value: stsName}, corev1.EnvVar{Name: resource.COMPUTE_GROUP_NAME, Value: ddc.GetCGName(cg)}, + corev1.EnvVar{Name: "HOST_TYPE", Value: resource.GetStartMode(cvs)}, corev1.EnvVar{Name: resource.ENV_FE_ADDR, Value: feAddr}, corev1.EnvVar{Name: resource.ENV_FE_PORT, Value: fqpStr}) diff --git a/pkg/controller/sub_controller/disaggregated_cluster/computegroups/statefulset_test.go b/pkg/controller/sub_controller/disaggregated_cluster/computegroups/statefulset_test.go new file mode 100644 index 00000000..0186f436 --- /dev/null +++ b/pkg/controller/sub_controller/disaggregated_cluster/computegroups/statefulset_test.go @@ -0,0 +1,52 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package computegroups + +import ( + "testing" + + dv1 "github.com/apache/doris-operator/api/disaggregated/v1" + "github.com/apache/doris-operator/pkg/common/utils/resource" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/utils/pointer" +) + +func Test_NewPodTemplateSpec_TerminationGracePeriodSeconds(t *testing.T) { + ddc := &dv1.DorisDisaggregatedCluster{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-ddc", + Namespace: "default", + }, + } + cg := &dv1.ComputeGroup{ + UniqueId: "cg1", + CommonSpec: dv1.CommonSpec{ + Replicas: pointer.Int32(1), + Image: "selectdb/doris.be-ubuntu:latest", + }, + } + + dcgs := &DisaggregatedComputeGroupsController{} + pts := dcgs.NewPodTemplateSpec(ddc, map[string]string{}, map[string]interface{}{}, cg) + if pts.Spec.TerminationGracePeriodSeconds == nil { + t.Fatalf("expected BE terminationGracePeriodSeconds") + } + if *pts.Spec.TerminationGracePeriodSeconds != resource.DEFAULT_BE_TERMINATION_GRACE_PERIOD_SECONDS { + t.Errorf("expected BE terminationGracePeriodSeconds=%d, got %d", resource.DEFAULT_BE_TERMINATION_GRACE_PERIOD_SECONDS, *pts.Spec.TerminationGracePeriodSeconds) + } +} diff --git a/pkg/controller/sub_controller/disaggregated_cluster/disaggregated_fe/statefulset.go b/pkg/controller/sub_controller/disaggregated_cluster/disaggregated_fe/statefulset.go index f007abf8..eb0b24b5 100644 --- a/pkg/controller/sub_controller/disaggregated_cluster/disaggregated_fe/statefulset.go +++ b/pkg/controller/sub_controller/disaggregated_cluster/disaggregated_fe/statefulset.go @@ -90,6 +90,7 @@ func (dfc *DisaggregatedFEController) getFEPodLabels(ddc *v1.DorisDisaggregatedC func (dfc *DisaggregatedFEController) NewPodTemplateSpec(ddc *v1.DorisDisaggregatedCluster, confMap map[string]interface{}) corev1.PodTemplateSpec { pts := resource.NewPodTemplateSpecWithCommonSpec(false, &ddc.Spec.FeSpec.CommonSpec, v1.DisaggregatedFE) + resource.AddTerminationGracePeriodSeconds(&pts, confMap, resource.DEFAULT_FE_TERMINATION_GRACE_PERIOD_SECONDS) //pod template metadata. labels := dfc.getFEPodLabels(ddc) pts.Labels = labels @@ -173,6 +174,8 @@ func (dfc *DisaggregatedFEController) newSpecificEnvs(ddc *v1.DorisDisaggregated corev1.EnvVar{Name: MS_ENDPOINT, Value: msEndpoint}, corev1.EnvVar{Name: CLUSTER_ID, Value: fmt.Sprintf("%d", ddc.GetInstanceHashId())}, corev1.EnvVar{Name: STATEFULSET_NAME, Value: stsName}, + corev1.EnvVar{Name: resource.DNS_READY_TIMEOUT, Value: resource.DEFAULT_DNS_READY_TIMEOUT}, + corev1.EnvVar{Name: resource.DNS_READY_INTERVAL, Value: resource.DEFAULT_DNS_READY_INTERVAL}, corev1.EnvVar{Name: resource.ENV_FE_ADDR, Value: ddc.GetFEVIPAddresss()}, corev1.EnvVar{Name: resource.ENV_FE_ELECT_NUMBER, Value: strconv.FormatInt(int64(ddc.GetElectionNumber()), 10)}, ) diff --git a/pkg/controller/sub_controller/disaggregated_cluster/disaggregated_fe/statefulset_test.go b/pkg/controller/sub_controller/disaggregated_cluster/disaggregated_fe/statefulset_test.go new file mode 100644 index 00000000..c96a7ece --- /dev/null +++ b/pkg/controller/sub_controller/disaggregated_cluster/disaggregated_fe/statefulset_test.go @@ -0,0 +1,53 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package disaggregated_fe + +import ( + "testing" + + dv1 "github.com/apache/doris-operator/api/disaggregated/v1" + "github.com/apache/doris-operator/pkg/common/utils/resource" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/utils/pointer" +) + +func Test_NewPodTemplateSpec_TerminationGracePeriodSeconds(t *testing.T) { + ddc := &dv1.DorisDisaggregatedCluster{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-ddc", + Namespace: "default", + }, + Spec: dv1.DorisDisaggregatedClusterSpec{ + FeSpec: dv1.FeSpec{ + CommonSpec: dv1.CommonSpec{ + Replicas: pointer.Int32(1), + Image: "selectdb/doris.fe-ubuntu:latest", + }, + }, + }, + } + + dfc := &DisaggregatedFEController{} + pts := dfc.NewPodTemplateSpec(ddc, map[string]interface{}{}) + if pts.Spec.TerminationGracePeriodSeconds == nil { + t.Fatalf("expected FE terminationGracePeriodSeconds") + } + if *pts.Spec.TerminationGracePeriodSeconds != resource.DEFAULT_FE_TERMINATION_GRACE_PERIOD_SECONDS { + t.Errorf("expected FE terminationGracePeriodSeconds=%d, got %d", resource.DEFAULT_FE_TERMINATION_GRACE_PERIOD_SECONDS, *pts.Spec.TerminationGracePeriodSeconds) + } +} diff --git a/pkg/controller/sub_controller/fe/pod.go b/pkg/controller/sub_controller/fe/pod.go index 2b176a2c..541a34aa 100644 --- a/pkg/controller/sub_controller/fe/pod.go +++ b/pkg/controller/sub_controller/fe/pod.go @@ -27,6 +27,7 @@ import ( func (fc *Controller) buildFEPodTemplateSpec(dcr *v1.DorisCluster, config map[string]interface{}) corev1.PodTemplateSpec { podTemplateSpec := resource.NewPodTemplateSpec(dcr, config, v1.Component_FE) + resource.AddTerminationGracePeriodSeconds(&podTemplateSpec, config, resource.DEFAULT_FE_TERMINATION_GRACE_PERIOD_SECONDS) var containers []corev1.Container //containers = append(containers, podTemplateSpec.Spec.Containers...) feContainer := fc.feContainer(dcr, config) diff --git a/pkg/controller/sub_controller/fe/pod_test.go b/pkg/controller/sub_controller/fe/pod_test.go index 108a61b8..c4fe6329 100644 --- a/pkg/controller/sub_controller/fe/pod_test.go +++ b/pkg/controller/sub_controller/fe/pod_test.go @@ -19,6 +19,7 @@ package fe import ( "encoding/json" v1 "github.com/apache/doris-operator/api/doris/v1" + "github.com/apache/doris-operator/pkg/common/utils/resource" "testing" ) @@ -65,5 +66,11 @@ func Test_buildFEPodTemplateSpec(t *testing.T) { t.Errorf("Test_buildBEStatefulSet unmarshal failed, err=%s", err.Error()) } fc := &Controller{} - fc.buildFEPodTemplateSpec(dcr, map[string]interface{}{}) + pts := fc.buildFEPodTemplateSpec(dcr, map[string]interface{}{}) + if pts.Spec.TerminationGracePeriodSeconds == nil { + t.Fatalf("expected FE terminationGracePeriodSeconds") + } + if *pts.Spec.TerminationGracePeriodSeconds != resource.DEFAULT_FE_TERMINATION_GRACE_PERIOD_SECONDS { + t.Errorf("expected FE terminationGracePeriodSeconds=%d, got %d", resource.DEFAULT_FE_TERMINATION_GRACE_PERIOD_SECONDS, *pts.Spec.TerminationGracePeriodSeconds) + } } From 878d99273347a0fbba298b90c882e1d4141db50f Mon Sep 17 00:00:00 2001 From: Linying Assad Date: Wed, 27 May 2026 14:28:19 +0800 Subject: [PATCH 2/2] Fix disaggregated BE host type for FQDN registration --- .../computegroups/statefulset.go | 2 +- .../computegroups/statefulset_test.go | 32 +++++++++++++++++++ 2 files changed, 33 insertions(+), 1 deletion(-) diff --git a/pkg/controller/sub_controller/disaggregated_cluster/computegroups/statefulset.go b/pkg/controller/sub_controller/disaggregated_cluster/computegroups/statefulset.go index 527537b1..591fe1d9 100644 --- a/pkg/controller/sub_controller/disaggregated_cluster/computegroups/statefulset.go +++ b/pkg/controller/sub_controller/disaggregated_cluster/computegroups/statefulset.go @@ -196,7 +196,7 @@ func (dcgs *DisaggregatedComputeGroupsController) newSpecificEnvs(ddc *dv1.Doris cgEnvs = append(cgEnvs, corev1.EnvVar{Name: resource.STATEFULSET_NAME, Value: stsName}, corev1.EnvVar{Name: resource.COMPUTE_GROUP_NAME, Value: ddc.GetCGName(cg)}, - corev1.EnvVar{Name: "HOST_TYPE", Value: resource.GetStartMode(cvs)}, + corev1.EnvVar{Name: "HOST_TYPE", Value: resource.START_MODEL_FQDN}, corev1.EnvVar{Name: resource.ENV_FE_ADDR, Value: feAddr}, corev1.EnvVar{Name: resource.ENV_FE_PORT, Value: fqpStr}) diff --git a/pkg/controller/sub_controller/disaggregated_cluster/computegroups/statefulset_test.go b/pkg/controller/sub_controller/disaggregated_cluster/computegroups/statefulset_test.go index 0186f436..382392b8 100644 --- a/pkg/controller/sub_controller/disaggregated_cluster/computegroups/statefulset_test.go +++ b/pkg/controller/sub_controller/disaggregated_cluster/computegroups/statefulset_test.go @@ -22,6 +22,7 @@ import ( dv1 "github.com/apache/doris-operator/api/disaggregated/v1" "github.com/apache/doris-operator/pkg/common/utils/resource" + corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/utils/pointer" ) @@ -50,3 +51,34 @@ func Test_NewPodTemplateSpec_TerminationGracePeriodSeconds(t *testing.T) { t.Errorf("expected BE terminationGracePeriodSeconds=%d, got %d", resource.DEFAULT_BE_TERMINATION_GRACE_PERIOD_SECONDS, *pts.Spec.TerminationGracePeriodSeconds) } } + +func Test_newSpecificEnvs_AlwaysUseFQDNHostType(t *testing.T) { + ddc := &dv1.DorisDisaggregatedCluster{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-ddc", + Namespace: "default", + }, + } + cg := &dv1.ComputeGroup{ + UniqueId: "cg1", + } + cvs := map[string]interface{}{ + resource.ENABLE_FQDN: "false", + } + + dcgs := &DisaggregatedComputeGroupsController{} + envs := dcgs.newSpecificEnvs(ddc, cg, cvs) + + if got := findEnvValue(envs, "HOST_TYPE"); got != resource.START_MODEL_FQDN { + t.Fatalf("expected HOST_TYPE=%s, got %s", resource.START_MODEL_FQDN, got) + } +} + +func findEnvValue(envs []corev1.EnvVar, name string) string { + for _, env := range envs { + if env.Name == name { + return env.Value + } + } + return "" +}