@@ -20,11 +20,9 @@ import (
2020 "context"
2121 "fmt"
2222 "os"
23- "strings"
2423 "time"
2524
2625 "github.com/spf13/pflag"
27- corev1 "k8s.io/api/core/v1"
2826 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
2927 "k8s.io/apimachinery/pkg/util/uuid"
3028 "k8s.io/client-go/informers"
@@ -34,40 +32,25 @@ import (
3432 componentbaseconfig "k8s.io/component-base/config"
3533 componentbaseoptions "k8s.io/component-base/config/options"
3634 "k8s.io/klog/v2"
37- resourceclient "k8s.io/metrics/pkg/client/clientset/versioned/typed/metrics/v1beta1"
3835
3936 "k8s.io/autoscaler/vertical-pod-autoscaler/common"
4037 vpa_clientset "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/client/clientset/versioned"
41- "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/recommender/checkpoint"
4238 recommender_config "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/recommender/config"
43- "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/recommender/input"
44- "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/recommender/input/history"
45- input_metrics "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/recommender/input/metrics"
46- "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/recommender/logic"
47- "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/recommender/model"
4839 "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/recommender/routines"
49- "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/target"
50- controllerfetcher "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/target/controller_fetcher"
5140 "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/utils/metrics"
5241 metrics_quality "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/utils/metrics/quality"
5342 metrics_recommender "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/utils/metrics/recommender"
5443 metrics_resources "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/utils/metrics/resources"
5544 "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/utils/server"
56- vpa_api_util "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/utils/vpa"
5745)
5846
47+ var config * recommender_config.RecommenderConfig
48+
5949const (
60- // aggregateContainerStateGCInterval defines how often expired AggregateContainerStates are garbage collected.
61- aggregateContainerStateGCInterval = 1 * time .Hour
62- scaleCacheEntryLifetime time.Duration = time .Hour
63- scaleCacheEntryFreshnessTime time.Duration = 10 * time .Minute
64- scaleCacheEntryJitterFactor float64 = 1.
65- scaleCacheLoopPeriod = 7 * time .Second
66- defaultResyncPeriod time.Duration = 10 * time .Minute
50+ // defaultResyncPeriod is the default resync period for shared informer factories.
51+ defaultResyncPeriod time.Duration = 10 * time .Minute
6752)
6853
69- var config * recommender_config.RecommenderConfig
70-
7154func main () {
7255 // Leader election needs to be initialized before any other flag, because it may be used in other flag's validation.
7356 leaderElection := defaultLeaderElectionConfiguration ()
@@ -86,7 +69,7 @@ func main() {
8669 server .Initialize (& config .CommonFlags .EnableProfiling , healthCheck , & config .Address )
8770
8871 if ! leaderElection .LeaderElect {
89- run (ctx , healthCheck , config . CommonFlags )
72+ run (ctx , healthCheck , config )
9073 } else {
9174 id , err := os .Hostname ()
9275 if err != nil {
@@ -121,7 +104,7 @@ func main() {
121104 ReleaseOnCancel : true ,
122105 Callbacks : leaderelection.LeaderCallbacks {
123106 OnStartedLeading : func (_ context.Context ) {
124- run (ctx , healthCheck , config . CommonFlags )
107+ run (ctx , healthCheck , config )
125108 },
126109 OnStoppedLeading : func () {
127110 klog .Fatal ("lost master" )
@@ -150,16 +133,30 @@ func defaultLeaderElectionConfiguration() componentbaseconfig.LeaderElectionConf
150133 }
151134}
152135
153- func run (ctx context.Context , healthCheck * metrics.HealthCheck , commonFlag * common. CommonFlags ) {
136+ func run (ctx context.Context , healthCheck * metrics.HealthCheck , config * recommender_config. RecommenderConfig ) {
154137 // Create a stop channel that will be used to signal shutdown
155138 stopCh := make (chan struct {})
156139 defer close (stopCh )
157- kubeConfig := common .CreateKubeConfigOrDie (commonFlag . KubeConfig , float32 (commonFlag . KubeApiQps ), int (commonFlag .KubeApiBurst ))
140+ kubeConfig := common .CreateKubeConfigOrDie (config . CommonFlags . KubeConfig , float32 (config . CommonFlags . KubeApiQps ), int (config . CommonFlags .KubeApiBurst ))
158141 kubeClient := kube_client .NewForConfigOrDie (kubeConfig )
159- clusterState := model .NewClusterState (aggregateContainerStateGCInterval )
160- factory := informers .NewSharedInformerFactoryWithOptions (kubeClient , defaultResyncPeriod , informers .WithNamespace (commonFlag .VpaObjectNamespace ))
161- controllerFetcher := controllerfetcher .NewControllerFetcher (kubeConfig , kubeClient , factory , scaleCacheEntryFreshnessTime , scaleCacheEntryLifetime , scaleCacheEntryJitterFactor )
162- podLister , oomObserver := input .NewPodListerAndOOMObserver (ctx , kubeClient , commonFlag .VpaObjectNamespace , stopCh )
142+ vpaClient := vpa_clientset .NewForConfigOrDie (kubeConfig )
143+ factory := informers .NewSharedInformerFactoryWithOptions (kubeClient , defaultResyncPeriod , informers .WithNamespace (config .CommonFlags .VpaObjectNamespace ))
144+
145+ controller , err := routines .NewRecommenderController (
146+ ctx ,
147+ kubeConfig ,
148+ kubeClient ,
149+ vpaClient ,
150+ factory ,
151+ config ,
152+ healthCheck ,
153+ stopCh ,
154+ )
155+
156+ if err != nil {
157+ klog .ErrorS (err , "Failed to create recommender controller" )
158+ klog .FlushAndExit (klog .ExitFlushTimeout , 1 )
159+ }
163160
164161 factory .Start (stopCh )
165162 informerMap := factory .WaitForCacheSync (stopCh )
@@ -170,142 +167,8 @@ func run(ctx context.Context, healthCheck *metrics.HealthCheck, commonFlag *comm
170167 }
171168 }
172169
173- model .InitializeAggregationsConfig (model .NewAggregationsConfig (config .MemoryAggregationInterval , config .MemoryAggregationIntervalCount , config .MemoryHistogramDecayHalfLife , config .CpuHistogramDecayHalfLife , config .OOMBumpUpRatio , config .OOMMinBumpUp ))
174-
175- useCheckpoints := config .Storage != "prometheus"
176-
177- var postProcessors []routines.RecommendationPostProcessor
178- if config .PostProcessorCPUasInteger {
179- postProcessors = append (postProcessors , & routines.IntegerCPUPostProcessor {})
180- }
181-
182- globalMaxAllowed := initGlobalMaxAllowed ()
183- // CappingPostProcessor, should always come in the last position for post-processing
184- postProcessors = append (postProcessors , routines .NewCappingRecommendationProcessor (globalMaxAllowed ))
185- var source input_metrics.PodMetricsLister
186- if config .UseExternalMetrics {
187- resourceMetrics := map [corev1.ResourceName ]string {}
188- if config .ExternalCpuMetric != "" {
189- resourceMetrics [corev1 .ResourceCPU ] = config .ExternalCpuMetric
190- }
191- if config .ExternalMemoryMetric != "" {
192- resourceMetrics [corev1 .ResourceMemory ] = config .ExternalMemoryMetric
193- }
194- externalClientOptions := & input_metrics.ExternalClientOptions {ResourceMetrics : resourceMetrics , ContainerNameLabel : config .CtrNameLabel }
195- klog .V (1 ).InfoS ("Using External Metrics" , "options" , externalClientOptions )
196- source = input_metrics .NewExternalClient (kubeConfig , clusterState , * externalClientOptions )
197- } else {
198- klog .V (1 ).InfoS ("Using Metrics Server" )
199- source = input_metrics .NewPodMetricsesSource (resourceclient .NewForConfigOrDie (kubeConfig ))
200- }
201-
202- ignoredNamespaces := strings .Split (commonFlag .IgnoredVpaObjectNamespaces , "," )
203-
204- clusterStateFeeder := input.ClusterStateFeederFactory {
205- PodLister : podLister ,
206- OOMObserver : oomObserver ,
207- KubeClient : kubeClient ,
208- MetricsClient : input_metrics .NewMetricsClient (source , commonFlag .VpaObjectNamespace , "default-metrics-client" ),
209- VpaCheckpointClient : vpa_clientset .NewForConfigOrDie (kubeConfig ).AutoscalingV1 (),
210- VpaLister : vpa_api_util .NewVpasLister (vpa_clientset .NewForConfigOrDie (kubeConfig ), make (chan struct {}), commonFlag .VpaObjectNamespace ),
211- VpaCheckpointLister : vpa_api_util .NewVpaCheckpointLister (vpa_clientset .NewForConfigOrDie (kubeConfig ), make (chan struct {}), commonFlag .VpaObjectNamespace ),
212- ClusterState : clusterState ,
213- SelectorFetcher : target .NewVpaTargetSelectorFetcher (kubeConfig , kubeClient , factory ),
214- MemorySaveMode : config .MemorySaver ,
215- ControllerFetcher : controllerFetcher ,
216- RecommenderName : config .RecommenderName ,
217- IgnoredNamespaces : ignoredNamespaces ,
218- VpaObjectNamespace : commonFlag .VpaObjectNamespace ,
219- }.Make ()
220- controllerFetcher .Start (ctx , scaleCacheLoopPeriod )
221-
222- recommender := routines.RecommenderFactory {
223- ClusterState : clusterState ,
224- ClusterStateFeeder : clusterStateFeeder ,
225- ControllerFetcher : controllerFetcher ,
226- CheckpointWriter : checkpoint .NewCheckpointWriter (clusterState , vpa_clientset .NewForConfigOrDie (kubeConfig ).AutoscalingV1 ()),
227- VpaClient : vpa_clientset .NewForConfigOrDie (kubeConfig ).AutoscalingV1 (),
228- PodResourceRecommender : logic .CreatePodResourceRecommender (logic.RecommendationConfig {
229- SafetyMarginFraction : config .SafetyMarginFraction ,
230- PodMinCPUMillicores : config .PodMinCPUMillicores ,
231- PodMinMemoryMb : config .PodMinMemoryMb ,
232- TargetCPUPercentile : config .TargetCPUPercentile ,
233- LowerBoundCPUPercentile : config .LowerBoundCPUPercentile ,
234- UpperBoundCPUPercentile : config .UpperBoundCPUPercentile ,
235- ConfidenceIntervalCPU : config .ConfidenceIntervalCPU ,
236- TargetMemoryPercentile : config .TargetMemoryPercentile ,
237- LowerBoundMemoryPercentile : config .LowerBoundMemoryPercentile ,
238- UpperBoundMemoryPercentile : config .UpperBoundMemoryPercentile ,
239- ConfidenceIntervalMemory : config .ConfidenceIntervalMemory ,
240- }),
241- RecommendationFormat : logic.RecommendationFormat {
242- HumanizeMemory : config .HumanizeMemory ,
243- RoundCPUMillicores : config .RoundCPUMillicores ,
244- RoundMemoryBytes : config .RoundMemoryBytes ,
245- },
246- RecommendationPostProcessors : postProcessors ,
247- CheckpointsGCInterval : config .CheckpointsGCInterval ,
248- CheckpointsWriteTimeout : config .CheckpointsWriteTimeout ,
249- UseCheckpoints : useCheckpoints ,
250- UpdateWorkerCount : config .UpdateWorkerCount ,
251- }.Make ()
252-
253- promQueryTimeout , err := time .ParseDuration (config .QueryTimeout )
254- if err != nil {
255- klog .ErrorS (err , "Could not parse --prometheus-query-timeout as a time.Duration" )
170+ if err := controller .Run (ctx ); err != nil {
171+ klog .ErrorS (err , "Recommender controller exited with error" )
256172 klog .FlushAndExit (klog .ExitFlushTimeout , 1 )
257173 }
258-
259- if useCheckpoints {
260- recommender .GetClusterStateFeeder ().InitFromCheckpoints (ctx )
261- } else {
262- config := history.PrometheusHistoryProviderConfig {
263- Address : config .PrometheusAddress ,
264- Insecure : config .PrometheusInsecure ,
265- QueryTimeout : promQueryTimeout ,
266- HistoryLength : config .HistoryLength ,
267- HistoryResolution : config .HistoryResolution ,
268- PodLabelPrefix : config .PodLabelPrefix ,
269- PodLabelsMetricName : config .PodLabelsMetricName ,
270- PodNamespaceLabel : config .PodNamespaceLabel ,
271- PodNameLabel : config .PodNameLabel ,
272- CtrNamespaceLabel : config .CtrNamespaceLabel ,
273- CtrPodNameLabel : config .CtrPodNameLabel ,
274- CtrNameLabel : config .CtrNameLabel ,
275- CadvisorMetricsJobName : config .PrometheusJobName ,
276- Namespace : commonFlag .VpaObjectNamespace ,
277- Authentication : history.PrometheusCredentials {
278- BearerToken : config .PrometheusBearerToken ,
279- Username : config .Username ,
280- Password : config .Password ,
281- },
282- }
283- provider , err := history .NewPrometheusHistoryProvider (config )
284- if err != nil {
285- klog .ErrorS (err , "Could not initialize history provider" )
286- klog .FlushAndExit (klog .ExitFlushTimeout , 1 )
287- }
288- recommender .GetClusterStateFeeder ().InitFromHistoryProvider (provider )
289- }
290-
291- // Start updating health check endpoint.
292- healthCheck .StartMonitoring ()
293-
294- ticker := time .Tick (config .MetricsFetcherInterval )
295- for range ticker {
296- recommender .RunOnce ()
297- healthCheck .UpdateLastActivity ()
298- }
299- }
300-
301- func initGlobalMaxAllowed () corev1.ResourceList {
302- result := make (corev1.ResourceList )
303- if ! config .MaxAllowedCPU .IsZero () {
304- result [corev1 .ResourceCPU ] = config .MaxAllowedCPU .Quantity
305- }
306- if ! config .MaxAllowedMemory .IsZero () {
307- result [corev1 .ResourceMemory ] = config .MaxAllowedMemory .Quantity
308- }
309-
310- return result
311174}
0 commit comments