From 8306131a5f0a68a8abdb0b571f8497b40f408376 Mon Sep 17 00:00:00 2001
From: tpp <terry.purcell@pingcap.com>
Date: Fri, 3 Jul 2026 19:19:18 -0700
Subject: [PATCH] statistics: reduce TopN/buckets collected for non-predicate
 columns

Add the global variable tidb_analyze_non_predicate_column_ratio (default
0.1, range [0,1]). During ANALYZE v2, columns that are not predicate
columns collect only ratio times the configured TopN and bucket numbers
(buckets floored at 1). Columns that keep the configured numbers are:

- predicate columns recorded in mysql.column_stats_usage, when any exist;
- otherwise the handle column and the first column of each index;
- columns explicitly specified in ANALYZE TABLE ... COLUMNS.

Index statistics are never reduced. The full-stats column set is decided
at plan-build time and carried on AnalyzeColumnsTask to the analyze
executor, so auto-analyze picks it up as well. Setting the ratio to 1
disables the reduction.

Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
---
 pkg/executor/analyze_col.go                   |  6 ++
 pkg/executor/analyze_col_sampling.go          | 11 ++-
 pkg/executor/builder.go                       |  2 +
 pkg/executor/test/analyzetest/analyze_test.go |  9 ++
 .../test/analyzetest/columns/BUILD.bazel      |  2 +-
 .../columns/analyze_columns_with_test.go      | 89 ++++++++++++++++++
 .../options/analyze_saved_options_test.go     |  6 ++
 pkg/planner/cardinality/selectivity_test.go   |  6 ++
 .../casetest/planstats/plan_stats_test.go     |  3 +
 pkg/planner/core/common_plans.go              |  8 ++
 pkg/planner/core/planbuilder.go               | 93 ++++++++++++++++---
 pkg/sessionctx/vardef/tidb_vars.go            | 38 +++++---
 pkg/sessionctx/variable/sysvar.go             | 15 +++
 .../handle/handletest/handle_test.go          |  3 +
 14 files changed, 260 insertions(+), 31 deletions(-)

diff --git a/pkg/executor/analyze_col.go b/pkg/executor/analyze_col.go
index f6b8227d580c9..961bd0b328367 100644
--- a/pkg/executor/analyze_col.go
+++ b/pkg/executor/analyze_col.go
@@ -55,6 +55,12 @@ type AnalyzeColumnsExec struct {
 	// concurrent lookup across partition workers.
 	samplingStatsConcurrency int
 
+	// fullStatsCols holds the IDs of the columns that keep the configured TopN/bucket
+	// numbers; the other columns only collect nonPredicateColRatio times the configured
+	// numbers. A nil map disables the reduction. See AnalyzeColumnsTask.FullStatsCols.
+	fullStatsCols        map[int64]struct{}
+	nonPredicateColRatio float64
+
 	memTracker *memory.Tracker
 }
 
diff --git a/pkg/executor/analyze_col_sampling.go b/pkg/executor/analyze_col_sampling.go
index b306cb159acb6..38428924fce9d 100644
--- a/pkg/executor/analyze_col_sampling.go
+++ b/pkg/executor/analyze_col_sampling.go
@@ -867,7 +867,16 @@ workLoop:
 				failpoint.InjectCall("analyzeSamplingBuildAfterReleaseCollectorMemory", collectorMemSize, e.memTracker.BytesConsumed())
 			}
 			numTopN := int(e.opts[ast.AnalyzeOptNumTopN])
+			numBuckets := int(e.opts[ast.AnalyzeOptNumBuckets])
 			if task.isColumn {
+				if e.fullStatsCols != nil {
+					if _, ok := e.fullStatsCols[e.colsInfo[task.slicePos].ID]; !ok {
+						// The column is not a predicate column, so only collect
+						// nonPredicateColRatio times the configured TopN/bucket numbers.
+						numTopN = int(float64(numTopN) * e.nonPredicateColRatio)
+						numBuckets = max(1, int(float64(numBuckets)*e.nonPredicateColRatio))
+					}
+				}
 				if e.tableInfo != nil && isColumnCoveredBySingleColUniqueIndex(e.tableInfo, e.colsInfo[task.slicePos].Offset) {
 					numTopN = 0
 				}
@@ -877,7 +886,7 @@ workLoop:
 					numTopN = 0
 				}
 			}
-			hist, topn, err := statistics.BuildHistAndTopN(e.ctx, int(e.opts[ast.AnalyzeOptNumBuckets]), numTopN, task.id, collector, task.tp, task.isColumn, e.memTracker)
+			hist, topn, err := statistics.BuildHistAndTopN(e.ctx, numBuckets, numTopN, task.id, collector, task.tp, task.isColumn, e.memTracker)
 			if err != nil {
 				resultCh <- err
 				releaseCollectorMemory()
diff --git a/pkg/executor/builder.go b/pkg/executor/builder.go
index 7644929184311..b6d47eebf724e 100644
--- a/pkg/executor/builder.go
+++ b/pkg/executor/builder.go
@@ -3228,6 +3228,8 @@ func (b *executorBuilder) buildAnalyzeSamplingPushdown(
 		schemaForVirtualColEval: schemaForVirtualColEval,
 		baseCount:               count,
 		baseModifyCnt:           modifyCount,
+		fullStatsCols:           task.FullStatsCols,
+		nonPredicateColRatio:    task.NonPredicateColRatio,
 	}
 	e.analyzePB.ColReq = &tipb.AnalyzeColumnsReq{
 		BucketSize:   int64(opts[ast.AnalyzeOptNumBuckets]),
diff --git a/pkg/executor/test/analyzetest/analyze_test.go b/pkg/executor/test/analyzetest/analyze_test.go
index 770d0f88f0fbf..98ef4bdc47ac9 100644
--- a/pkg/executor/test/analyzetest/analyze_test.go
+++ b/pkg/executor/test/analyzetest/analyze_test.go
@@ -636,6 +636,9 @@ func TestAnalyzeColumnsAfterAnalyzeAll(t *testing.T) {
 			tk.MustExec("use test")
 			tk.MustExec("drop table if exists t")
 			tk.MustExec("set @@tidb_analyze_version = 2")
+			// Keep the configured TopN/bucket numbers for all columns; the reduction for
+			// non-predicate columns is covered by TestAnalyzeNonPredicateColumnRatio.
+			tk.MustExec("set global tidb_analyze_non_predicate_column_ratio = 1")
 			tk.MustExec("create table t (a int, b int)")
 			tk.MustExec("insert into t (a,b) values (1,1), (1,1), (2,2), (2,2), (3,3), (4,4)")
 			tk.MustExec("flush stats_delta *.*")
@@ -1061,6 +1064,9 @@ func TestAnalyzePartitionTableWithDynamicMode(t *testing.T) {
 
 	tk.MustExec("use test")
 	tk.MustExec("set @@session.tidb_analyze_version = 2")
+	// Keep the configured TopN/bucket numbers for all columns; the reduction for
+	// non-predicate columns is covered by TestAnalyzeNonPredicateColumnRatio.
+	tk.MustExec("set global tidb_analyze_non_predicate_column_ratio = 1")
 	tk.MustExec("set @@session.tidb_stats_load_sync_wait = 20000") // to stabilise test
 	tk.MustExec("set @@session.tidb_partition_prune_mode = 'dynamic'")
 	createTable := `CREATE TABLE t (a int, b int, c varchar(10), d int, primary key(a), index idx(b))
@@ -1155,6 +1161,9 @@ func TestAnalyzePartitionTableStaticToDynamic(t *testing.T) {
 
 	tk.MustExec("use test")
 	tk.MustExec("set @@session.tidb_analyze_version = 2")
+	// Keep the configured TopN/bucket numbers for all columns; the reduction for
+	// non-predicate columns is covered by TestAnalyzeNonPredicateColumnRatio.
+	tk.MustExec("set global tidb_analyze_non_predicate_column_ratio = 1")
 	tk.MustExec("set @@session.tidb_stats_load_sync_wait = 20000") // to stabilise test
 	tk.MustExec("set @@session.tidb_partition_prune_mode = 'static'")
 	createTable := `CREATE TABLE t (a int, b int, c varchar(10), d int, primary key(a), index idx(b))
diff --git a/pkg/executor/test/analyzetest/columns/BUILD.bazel b/pkg/executor/test/analyzetest/columns/BUILD.bazel
index eed1fc5a8fd1f..392be07b76f2b 100644
--- a/pkg/executor/test/analyzetest/columns/BUILD.bazel
+++ b/pkg/executor/test/analyzetest/columns/BUILD.bazel
@@ -8,7 +8,7 @@ go_test(
         "main_test.go",
     ],
     flaky = True,
-    shard_count = 6,
+    shard_count = 7,
     deps = [
         "//pkg/config",
         "//pkg/parser/ast",
diff --git a/pkg/executor/test/analyzetest/columns/analyze_columns_with_test.go b/pkg/executor/test/analyzetest/columns/analyze_columns_with_test.go
index 156945fca66c1..c52587b299c02 100644
--- a/pkg/executor/test/analyzetest/columns/analyze_columns_with_test.go
+++ b/pkg/executor/test/analyzetest/columns/analyze_columns_with_test.go
@@ -35,6 +35,9 @@ func TestAnalyzeColumnsWithPrimaryKey(t *testing.T) {
 			tk.MustExec("use test")
 			tk.MustExec("drop table if exists t")
 			tk.MustExec("set @@tidb_analyze_version = 2")
+			// Pin the ratio so every analyzed column collects the configured TopN/bucket
+			// numbers; the reduction behavior is covered by TestAnalyzeNonPredicateColumnRatio.
+			tk.MustExec("set global tidb_analyze_non_predicate_column_ratio = 1")
 			tk.MustExec("create table t (a int, b int, c int primary key)")
 			statstestutil.HandleNextDDLEventWithTxn(h)
 			tk.MustExec("insert into t values (1,1,1), (1,1,2), (2,2,3), (2,2,4), (3,3,5), (4,3,6), (5,4,7), (6,4,8), (null,null,9)")
@@ -103,6 +106,9 @@ func TestAnalyzeColumnsWithIndex(t *testing.T) {
 			tk.MustExec("use test")
 			tk.MustExec("drop table if exists t")
 			tk.MustExec("set @@tidb_analyze_version = 2")
+			// Pin the ratio so every analyzed column collects the configured TopN/bucket
+			// numbers; the reduction behavior is covered by TestAnalyzeNonPredicateColumnRatio.
+			tk.MustExec("set global tidb_analyze_non_predicate_column_ratio = 1")
 			tk.MustExec("create table t (a int, b int, c int, d int, index idx_b_d(b, d))")
 			statstestutil.HandleNextDDLEventWithTxn(h)
 			tk.MustExec("insert into t values (1,1,null,1), (2,1,9,1), (1,1,8,1), (2,2,7,2), (1,3,7,3), (2,4,6,4), (1,4,6,5), (2,4,6,5), (1,5,6,5)")
@@ -180,6 +186,9 @@ func TestAnalyzeColumnsWithClusteredIndex(t *testing.T) {
 			tk.MustExec("use test")
 			tk.MustExec("drop table if exists t")
 			tk.MustExec("set @@tidb_analyze_version = 2")
+			// Pin the ratio so every analyzed column collects the configured TopN/bucket
+			// numbers; the reduction behavior is covered by TestAnalyzeNonPredicateColumnRatio.
+			tk.MustExec("set global tidb_analyze_non_predicate_column_ratio = 1")
 			tk.MustExec("create table t (a int, b int, c int, d int, primary key(b, d) clustered)")
 			statstestutil.HandleNextDDLEventWithTxn(h)
 			tk.MustExec("insert into t values (1,1,null,1), (2,2,9,2), (1,3,8,3), (2,4,7,4), (1,5,7,5), (2,6,6,6), (1,7,6,7), (2,8,6,8), (1,9,6,9)")
@@ -257,6 +266,9 @@ func TestAnalyzeColumnsWithDynamicPartitionTable(t *testing.T) {
 			tk.MustExec("use test")
 			tk.MustExec("drop table if exists t")
 			tk.MustExec("set @@tidb_analyze_version = 2")
+			// Pin the ratio so every analyzed column collects the configured TopN/bucket
+			// numbers; the reduction behavior is covered by TestAnalyzeNonPredicateColumnRatio.
+			tk.MustExec("set global tidb_analyze_non_predicate_column_ratio = 1")
 			tk.MustExec("set @@tidb_partition_prune_mode = 'dynamic'")
 			tk.MustExec("create table t (a int, b int, c int, index idx(c)) partition by range (a) (partition p0 values less than (10), partition p1 values less than maxvalue)")
 			statstestutil.HandleNextDDLEventWithTxn(h)
@@ -383,6 +395,9 @@ func TestAnalyzeColumnsWithStaticPartitionTable(t *testing.T) {
 			tk.MustExec("use test")
 			tk.MustExec("drop table if exists t")
 			tk.MustExec("set @@tidb_analyze_version = 2")
+			// Pin the ratio so every analyzed column collects the configured TopN/bucket
+			// numbers; the reduction behavior is covered by TestAnalyzeNonPredicateColumnRatio.
+			tk.MustExec("set global tidb_analyze_non_predicate_column_ratio = 1")
 			tk.MustExec("set @@tidb_partition_prune_mode = 'static'")
 			tk.MustExec("create table t (a int, b int, c int, index idx(c)) partition by range (a) (partition p0 values less than (10), partition p1 values less than maxvalue)")
 			statstestutil.HandleNextDDLEventWithTxn(h)
@@ -494,6 +509,9 @@ func TestAnalyzeColumnsWithVirtualColumnIndex(t *testing.T) {
 			tk.MustExec("use test")
 			tk.MustExec("drop table if exists t")
 			tk.MustExec("set @@tidb_analyze_version = 2")
+			// Pin the ratio so every analyzed column collects the configured TopN/bucket
+			// numbers; the reduction behavior is covered by TestAnalyzeNonPredicateColumnRatio.
+			tk.MustExec("set global tidb_analyze_non_predicate_column_ratio = 1")
 			tk.MustExec("create table t (a int, b int, c int as (b+1), index idx(c))")
 			statstestutil.HandleNextDDLEventWithTxn(h)
 			tk.MustExec("insert into t (a,b) values (1,1), (2,2), (3,3), (4,4), (5,4), (6,5), (7,5), (8,5), (null,null)")
@@ -550,3 +568,74 @@ func TestAnalyzeColumnsWithVirtualColumnIndex(t *testing.T) {
 		}(val)
 	}
 }
+
+func TestAnalyzeNonPredicateColumnRatio(t *testing.T) {
+	store, dom := testkit.CreateMockStoreAndDomain(t)
+
+	tk := testkit.NewTestKit(t, store)
+	h := dom.StatsHandle()
+	tk.MustExec("use test")
+	// The reduction is enabled by default with a ratio of 0.1.
+	tk.MustQuery("select @@global.tidb_analyze_non_predicate_column_ratio").Check(testkit.Rows("0.1"))
+	tk.MustExec("set global tidb_analyze_non_predicate_column_ratio = 0.5")
+	defer tk.MustExec("set global tidb_analyze_non_predicate_column_ratio = default")
+	tk.MustExec("set @@tidb_analyze_version = 2")
+
+	checkStatsSize := func(tblName, colName string, isIndex, numTopN, numBuckets int) {
+		cond := fmt.Sprintf(
+			"where db_name = 'test' and table_name = '%s' and column_name = '%s' and is_index = %d",
+			tblName, colName, isIndex,
+		)
+		require.Len(t, tk.MustQuery("show stats_topn "+cond).Rows(), numTopN,
+			"unexpected topn size for %s.%s", tblName, colName)
+		require.Len(t, tk.MustQuery("show stats_buckets "+cond).Rows(), numBuckets,
+			"unexpected bucket count for %s.%s", tblName, colName)
+	}
+	prepareTable := func(tblName string) {
+		tk.MustExec(fmt.Sprintf("create table %s (a int, b int, c int, index ib (b))", tblName))
+		statstestutil.HandleNextDDLEventWithTxn(h)
+		// Every column gets 8 distinct values: 1~4 appear twice and 5~8 appear once, so
+		// with `with 2 topn, 2 buckets` a full-stats column collects 2 TopN values and
+		// 2 buckets while a reduced column collects 1 TopN value and 1 bucket.
+		for i := 1; i <= 4; i++ {
+			tk.MustExec(fmt.Sprintf("insert into %s values (%[2]d, %[2]d, %[2]d), (%[2]d, %[2]d, %[2]d)", tblName, i))
+		}
+		for i := 5; i <= 8; i++ {
+			tk.MustExec(fmt.Sprintf("insert into %s values (%[2]d, %[2]d, %[2]d)", tblName, i))
+		}
+		tk.MustExec("flush stats_delta *.*")
+	}
+
+	// Case 1: column a is a predicate column, so it keeps the configured numbers while
+	// the other columns (including the indexed column b) collect the reduced numbers.
+	prepareTable("t1")
+	tk.MustExec("select * from t1 where a > 0")
+	require.NoError(t, h.DumpColStatsUsageToKV())
+	tk.MustExec("analyze table t1 all columns with 2 topn, 2 buckets")
+	checkStatsSize("t1", "a", 0, 2, 2)
+	checkStatsSize("t1", "b", 0, 1, 1)
+	checkStatsSize("t1", "c", 0, 1, 1)
+	// Index stats always keep the configured numbers.
+	checkStatsSize("t1", "ib", 1, 2, 2)
+
+	// Case 2: no predicate column has been collected for the table, so the first column
+	// of the index keeps the configured numbers while the other columns are reduced.
+	prepareTable("t2")
+	tk.MustExec("analyze table t2 all columns with 2 topn, 2 buckets")
+	checkStatsSize("t2", "a", 0, 1, 1)
+	checkStatsSize("t2", "b", 0, 2, 2)
+	checkStatsSize("t2", "c", 0, 1, 1)
+
+	// Case 3: columns specified in ANALYZE ... COLUMNS keep the configured numbers.
+	prepareTable("t3")
+	tk.MustExec("analyze table t3 columns a, b with 2 topn, 2 buckets")
+	checkStatsSize("t3", "a", 0, 2, 2)
+	checkStatsSize("t3", "b", 0, 2, 2)
+
+	// Case 4: setting the ratio to 1 disables the reduction.
+	tk.MustExec("set global tidb_analyze_non_predicate_column_ratio = 1")
+	tk.MustExec("analyze table t1 all columns with 2 topn, 2 buckets")
+	checkStatsSize("t1", "a", 0, 2, 2)
+	checkStatsSize("t1", "b", 0, 2, 2)
+	checkStatsSize("t1", "c", 0, 2, 2)
+}
diff --git a/pkg/executor/test/analyzetest/options/analyze_saved_options_test.go b/pkg/executor/test/analyzetest/options/analyze_saved_options_test.go
index 04c5db4c8c0e2..806f9b73ffb23 100644
--- a/pkg/executor/test/analyzetest/options/analyze_saved_options_test.go
+++ b/pkg/executor/test/analyzetest/options/analyze_saved_options_test.go
@@ -37,6 +37,9 @@ func TestSavedAnalyzeOptions(t *testing.T) {
 		tk.MustExec(fmt.Sprintf("set global tidb_persist_analyze_options = %v", originalVal1))
 	}()
 	tk.MustExec("set global tidb_persist_analyze_options = true")
+	// Keep the configured TopN/bucket numbers for all columns; the reduction for
+	// non-predicate columns is covered by TestAnalyzeNonPredicateColumnRatio.
+	tk.MustExec("set global tidb_analyze_non_predicate_column_ratio = 1")
 	originalVal2 := tk.MustQuery("select @@tidb_auto_analyze_ratio").Rows()[0][0].(string)
 	defer func() {
 		tk.MustExec(fmt.Sprintf("set global tidb_auto_analyze_ratio = %v", originalVal2))
@@ -136,6 +139,9 @@ func TestSavedPartitionAnalyzeOptions(t *testing.T) {
 		tk.MustExec(fmt.Sprintf("set global tidb_persist_analyze_options = %v", originalVal))
 	}()
 	tk.MustExec("set global tidb_persist_analyze_options = true")
+	// Keep the configured TopN/bucket numbers for all columns; the reduction for
+	// non-predicate columns is covered by TestAnalyzeNonPredicateColumnRatio.
+	tk.MustExec("set global tidb_analyze_non_predicate_column_ratio = 1")
 
 	tk.MustExec("use test")
 	tk.MustExec("set @@session.tidb_analyze_version = 2")
diff --git a/pkg/planner/cardinality/selectivity_test.go b/pkg/planner/cardinality/selectivity_test.go
index 45eaa12931484..8bcd4481a600a 100644
--- a/pkg/planner/cardinality/selectivity_test.go
+++ b/pkg/planner/cardinality/selectivity_test.go
@@ -856,6 +856,9 @@ func TestNewIndexWithColumnStats(t *testing.T) {
 	store, dom := testkit.CreateMockStoreAndDomain(t)
 	testKit := testkit.NewTestKit(t, store)
 	testKit.MustExec("use test")
+	// Keep the configured TopN/bucket numbers for all columns; the reduction for
+	// non-predicate columns is covered by TestAnalyzeNonPredicateColumnRatio.
+	testKit.MustExec("set global tidb_analyze_non_predicate_column_ratio = 1")
 	testKit.MustExec("drop table if exists t")
 	testKit.MustExec("drop table if exists t2")
 	testKit.MustExec("create table t(a int)")
@@ -1508,6 +1511,9 @@ func testTopNAssistedEstimationInner(t *testing.T, input []string, output []outp
 	h.Clear()
 	tk := testkit.NewTestKit(t, store)
 	tk.MustExec("use test")
+	// Keep the configured TopN/bucket numbers for all columns; the reduction for
+	// non-predicate columns is covered by TestAnalyzeNonPredicateColumnRatio.
+	tk.MustExec("set global tidb_analyze_non_predicate_column_ratio = 1")
 	tk.MustExec("drop table if exists t")
 	tk.MustExec("set @@tidb_default_string_match_selectivity = 0")
 	tk.MustExec("set @@tidb_stats_load_sync_wait = 3000")
diff --git a/pkg/planner/core/casetest/planstats/plan_stats_test.go b/pkg/planner/core/casetest/planstats/plan_stats_test.go
index f8c32c8ef3075..5a245161600e5 100644
--- a/pkg/planner/core/casetest/planstats/plan_stats_test.go
+++ b/pkg/planner/core/casetest/planstats/plan_stats_test.go
@@ -591,6 +591,9 @@ func TestStatsAnalyzedInDDL(t *testing.T) {
 	testkit.RunTestUnderCascadesWithDomain(t, func(t *testing.T, testKit *testkit.TestKit, dom *domain.Domain, cascades, caller string) {
 		testKit.MustExec("use test")
 		testKit.MustExec("set session tidb_stats_update_during_ddl = 1")
+		// Keep the configured TopN/bucket numbers for all columns; the reduction for
+		// non-predicate columns is covered by TestAnalyzeNonPredicateColumnRatio.
+		testKit.MustExec("set global tidb_analyze_non_predicate_column_ratio = 1")
 		// test normal table
 		testKit.MustExec("create table t(a int, b int, c int, primary key(a), key idx(b))")
 
diff --git a/pkg/planner/core/common_plans.go b/pkg/planner/core/common_plans.go
index ae124a35819e5..74952868070f4 100644
--- a/pkg/planner/core/common_plans.go
+++ b/pkg/planner/core/common_plans.go
@@ -395,6 +395,14 @@ type AnalyzeColumnsTask struct {
 	SkipColsInfo     []*model.ColumnInfo
 	TblInfo          *model.TableInfo
 	Indexes          []*model.IndexInfo
+	// FullStatsCols holds the IDs of the columns that keep the configured TopN/bucket
+	// numbers when tidb_analyze_non_predicate_column_ratio < 1. Columns absent from the
+	// set only collect NonPredicateColRatio times the configured numbers. A nil map
+	// disables the reduction and every column keeps the configured numbers.
+	FullStatsCols map[int64]struct{}
+	// NonPredicateColRatio is the value of tidb_analyze_non_predicate_column_ratio
+	// captured when the plan was built.
+	NonPredicateColRatio float64
 	AnalyzeInfo
 }
 
diff --git a/pkg/planner/core/planbuilder.go b/pkg/planner/core/planbuilder.go
index 72dc07b04ad3e..d4390e73b04a1 100644
--- a/pkg/planner/core/planbuilder.go
+++ b/pkg/planner/core/planbuilder.go
@@ -2302,7 +2302,9 @@ func (b *PlanBuilder) getMustAnalyzedColumns(tbl *resolve.TableNameW, cols *calc
 }
 
 // getPredicateColumns gets the columns used in predicates.
-func (b *PlanBuilder) getPredicateColumns(tbl *resolve.TableNameW, cols *calcOnceMap) (map[int64]struct{}, error) {
+// When warnEmpty is true and no predicate column has been collected for the table,
+// it appends a warning about falling back to analyzing only indexed columns.
+func (b *PlanBuilder) getPredicateColumns(tbl *resolve.TableNameW, cols *calcOnceMap, warnEmpty bool) (map[int64]struct{}, error) {
 	// Already calculated in the previous call.
 	if cols.calculated {
 		return cols.data, nil
@@ -2316,13 +2318,15 @@ func (b *PlanBuilder) getPredicateColumns(tbl *resolve.TableNameW, cols *calcOnc
 		return nil, err
 	}
 	if len(colList) == 0 {
-		b.ctx.GetSessionVars().StmtCtx.AppendWarning(
-			errors.NewNoStackErrorf(
-				"No predicate column has been collected yet for table %s.%s, so only indexes and the columns composing the indexes will be analyzed",
-				tbl.Schema.L,
-				tbl.Name.L,
-			),
-		)
+		if warnEmpty {
+			b.ctx.GetSessionVars().StmtCtx.AppendWarning(
+				errors.NewNoStackErrorf(
+					"No predicate column has been collected yet for table %s.%s, so only indexes and the columns composing the indexes will be analyzed",
+					tbl.Schema.L,
+					tbl.Name.L,
+				),
+			)
+		}
 	} else {
 		// Some predicate columns are generated columns so we also need to add the columns that make up those generated columns.
 		err := b.addColumnsWithVirtualExprs(tbl, cols, func(columns []*expression.Column) []expression.Expression {
@@ -2449,7 +2453,7 @@ func (b *PlanBuilder) getColumnsBasedOnPredicateColumns(
 	if rewriteAllStatsNeeded {
 		return tbl.TableInfo.Columns, nil
 	}
-	predicate, err := b.getPredicateColumns(tbl, predicateCols)
+	predicate, err := b.getPredicateColumns(tbl, predicateCols, true)
 	if err != nil {
 		return nil, err
 	}
@@ -2461,6 +2465,58 @@ func (b *PlanBuilder) getColumnsBasedOnPredicateColumns(
 	return getColumnListFromSet(tbl.TableInfo.Columns, colSet), nil
 }
 
+// getFullStatsColsAndRatio decides which columns keep the configured (full) TopN/bucket
+// numbers when tidb_analyze_non_predicate_column_ratio is smaller than 1:
+//   - Predicate columns always keep the full numbers.
+//   - Columns explicitly specified in ANALYZE TABLE ... COLUMNS keep the full numbers.
+//   - When no predicate column has been collected for the table yet, the handle column and
+//     the first column of each index keep the full numbers, since they are the most likely
+//     columns to be used in future predicates.
+//
+// Every other column only collects ratio times the configured TopN/bucket numbers.
+// It returns a nil map when the reduction is disabled (ratio >= 1), meaning every column
+// keeps the configured numbers.
+func (b *PlanBuilder) getFullStatsColsAndRatio(
+	tbl *resolve.TableNameW,
+	predicateCols *calcOnceMap,
+	specifiedCols []*model.ColumnInfo,
+) (map[int64]struct{}, float64, error) {
+	ratio := vardef.AnalyzeNonPredicateColumnRatio.Load()
+	if ratio >= 1 {
+		return nil, 1, nil
+	}
+	predicate, err := b.getPredicateColumns(tbl, predicateCols, false)
+	if err != nil {
+		return nil, 1, err
+	}
+	var fullStatsCols map[int64]struct{}
+	if len(predicate) > 0 {
+		fullStatsCols = make(map[int64]struct{}, len(predicate)+len(specifiedCols))
+		maps.Copy(fullStatsCols, predicate)
+	} else {
+		// No predicate column has been collected for the table yet, so fall back to
+		// keeping full stats for the handle column and the first column of each index.
+		tblInfo := tbl.TableInfo
+		fullStatsCols = make(map[int64]struct{}, len(tblInfo.Indices)+len(specifiedCols)+1)
+		if tblInfo.PKIsHandle {
+			if pkCol := tblInfo.GetPkColInfo(); pkCol != nil {
+				fullStatsCols[pkCol.ID] = struct{}{}
+			}
+		}
+		fullStatsCols[model.ExtraHandleID] = struct{}{}
+		for _, idx := range tblInfo.Indices {
+			if idx.State != model.StatePublic || len(idx.Columns) == 0 {
+				continue
+			}
+			fullStatsCols[tblInfo.Columns[idx.Columns[0].Offset].ID] = struct{}{}
+		}
+	}
+	for _, col := range specifiedCols {
+		fullStatsCols[col.ID] = struct{}{}
+	}
+	return fullStatsCols, ratio, nil
+}
+
 // Helper function to combine two column sets.
 func combineColumnSets(sets ...map[int64]struct{}) map[int64]struct{} {
 	result := make(map[int64]struct{})
@@ -2737,6 +2793,11 @@ func (b *PlanBuilder) buildAnalyzeFullSamplingTask(
 		return err
 	}
 
+	fullStatsCols, nonPredicateColRatio, err := b.getFullStatsColsAndRatio(tbl, &predicateCols, astColList)
+	if err != nil {
+		return err
+	}
+
 	optionsMap, colsInfoMap, err := b.genV2AnalyzeOptions(persistOpts, tbl, isAnalyzeTable, physicalIDs, astOpts, as.ColumnChoice, astColList, &predicateCols, &mustAnalyzedCols, mustAllColumns)
 	if err != nil {
 		return err
@@ -2777,12 +2838,14 @@ func (b *PlanBuilder) buildAnalyzeFullSamplingTask(
 			indexes, independentIndexes, specialGlobalIndexes = getModifiedIndexesInfoForAnalyze(b.ctx, tbl.TableInfo, allColumns, execColsInfo)
 			handleCols := BuildHandleColsForAnalyze(b.ctx, tbl.TableInfo, allColumns, execColsInfo)
 			newTask := AnalyzeColumnsTask{
-				HandleCols:   handleCols,
-				ColsInfo:     execColsInfo,
-				AnalyzeInfo:  info,
-				TblInfo:      tbl.TableInfo,
-				Indexes:      indexes,
-				SkipColsInfo: skipColsInfo,
+				HandleCols:           handleCols,
+				ColsInfo:             execColsInfo,
+				AnalyzeInfo:          info,
+				TblInfo:              tbl.TableInfo,
+				Indexes:              indexes,
+				SkipColsInfo:         skipColsInfo,
+				FullStatsCols:        fullStatsCols,
+				NonPredicateColRatio: nonPredicateColRatio,
 			}
 			if newTask.HandleCols == nil {
 				extraCol := model.NewExtraHandleColInfo()
diff --git a/pkg/sessionctx/vardef/tidb_vars.go b/pkg/sessionctx/vardef/tidb_vars.go
index 23ab58a02678e..0480574e30627 100644
--- a/pkg/sessionctx/vardef/tidb_vars.go
+++ b/pkg/sessionctx/vardef/tidb_vars.go
@@ -1171,6 +1171,11 @@ const (
 	// `PREDICATE`: Analyze only the columns that are used in the predicates of the query.
 	// `ALL`: Analyze all columns in the table.
 	TiDBAnalyzeColumnOptions = "tidb_analyze_column_options"
+	// TiDBAnalyzeNonPredicateColumnRatio scales down the number of TopN values and histogram buckets
+	// collected for columns that are not predicate columns. When ANALYZE collects statistics for a
+	// column that has never been used in query predicates, it collects only
+	// `ratio * (the configured TopN/bucket numbers)`. Setting it to 1 disables the reduction.
+	TiDBAnalyzeNonPredicateColumnRatio = "tidb_analyze_non_predicate_column_ratio"
 	// TiDBDisableColumnTrackingTime records the last time TiDBEnableColumnTracking is set off.
 	// It is used to invalidate the collected predicate columns after turning off TiDBEnableColumnTracking, which avoids physical deletion.
 	// It doesn't have cache in memory, and we directly get/set the variable value from/to mysql.tidb.
@@ -1676,6 +1681,7 @@ const (
 	DefTiDBEnableAutoAnalyze                          = true
 	DefTiDBEnableAutoAnalyzePriorityQueue             = true
 	DefTiDBAnalyzeColumnOptions                       = "ALL"
+	DefTiDBAnalyzeNonPredicateColumnRatio             = 0.1
 	DefTiDBMemOOMAction                               = "CANCEL"
 	DefTiDBMaxAutoAnalyzeTime                         = 12 * 60 * 60
 	DefTiDBAutoAnalyzeConcurrency                     = 3
@@ -1849,20 +1855,24 @@ var (
 	//    the value of `tidb_analyze_column_options` determines the behavior of the analyze operation.
 	// 2. If `tidb_persist_analyze_options` is disabled, `tidb_analyze_column_options` is used directly to decide
 	//    whether to analyze all columns or just the predicate columns.
-	AnalyzeColumnOptions           = atomic.NewString(DefTiDBAnalyzeColumnOptions)
-	GlobalLogMaxDays               = atomic.NewInt32(int32(config.GetGlobalConfig().Log.File.MaxDays))
-	QueryLogMaxLen                 = atomic.NewInt32(DefTiDBQueryLogMaxLen)
-	EnablePProfSQLCPU              = atomic.NewBool(false)
-	EnableBatchDML                 = atomic.NewBool(false)
-	EnableTmpStorageOnOOM          = atomic.NewBool(DefTiDBEnableTmpStorageOnOOM)
-	DDLReorgWorkerCounter    int32 = DefTiDBDDLReorgWorkerCount
-	DDLReorgBatchSize        int32 = DefTiDBDDLReorgBatchSize
-	DDLFlashbackConcurrency  int32 = DefTiDBDDLFlashbackConcurrency
-	DDLErrorCountLimit       int64 = DefTiDBDDLErrorCountLimit
-	DDLReorgRowFormat        int64 = DefTiDBRowFormatV2
-	DDLReorgMaxWriteSpeed          = atomic.NewInt64(DefTiDBDDLReorgMaxWriteSpeed)
-	MaxDeltaSchemaCount      int64 = DefTiDBMaxDeltaSchemaCount
-	GlobalSlowLogRateLimiter       = rate.NewLimiter(rate.Inf, 1)
+	AnalyzeColumnOptions = atomic.NewString(DefTiDBAnalyzeColumnOptions)
+	// AnalyzeNonPredicateColumnRatio is a global variable that scales down the TopN/bucket numbers
+	// collected by ANALYZE for columns that are not predicate columns. See
+	// TiDBAnalyzeNonPredicateColumnRatio for the detailed behavior.
+	AnalyzeNonPredicateColumnRatio       = atomic.NewFloat64(DefTiDBAnalyzeNonPredicateColumnRatio)
+	GlobalLogMaxDays                     = atomic.NewInt32(int32(config.GetGlobalConfig().Log.File.MaxDays))
+	QueryLogMaxLen                       = atomic.NewInt32(DefTiDBQueryLogMaxLen)
+	EnablePProfSQLCPU                    = atomic.NewBool(false)
+	EnableBatchDML                       = atomic.NewBool(false)
+	EnableTmpStorageOnOOM                = atomic.NewBool(DefTiDBEnableTmpStorageOnOOM)
+	DDLReorgWorkerCounter          int32 = DefTiDBDDLReorgWorkerCount
+	DDLReorgBatchSize              int32 = DefTiDBDDLReorgBatchSize
+	DDLFlashbackConcurrency        int32 = DefTiDBDDLFlashbackConcurrency
+	DDLErrorCountLimit             int64 = DefTiDBDDLErrorCountLimit
+	DDLReorgRowFormat              int64 = DefTiDBRowFormatV2
+	DDLReorgMaxWriteSpeed                = atomic.NewInt64(DefTiDBDDLReorgMaxWriteSpeed)
+	MaxDeltaSchemaCount            int64 = DefTiDBMaxDeltaSchemaCount
+	GlobalSlowLogRateLimiter             = rate.NewLimiter(rate.Inf, 1)
 	// DDLSlowOprThreshold is the threshold for ddl slow operations, uint is millisecond.
 	DDLSlowOprThreshold = config.GetGlobalConfig().Instance.DDLSlowOprThreshold
 	GlobalSlowLogRules  = atomic.NewPointer[slowlogrule.GlobalSlowLogRules](
diff --git a/pkg/sessionctx/variable/sysvar.go b/pkg/sessionctx/variable/sysvar.go
index 714884a49b360..3e9cd88f7ab8a 100644
--- a/pkg/sessionctx/variable/sysvar.go
+++ b/pkg/sessionctx/variable/sysvar.go
@@ -1207,6 +1207,21 @@ var defaultSysVars = []*SysVar{
 			return normalizedValue, nil
 		},
 	},
+	{
+		Scope:    vardef.ScopeGlobal,
+		Name:     vardef.TiDBAnalyzeNonPredicateColumnRatio,
+		Value:    strconv.FormatFloat(vardef.DefTiDBAnalyzeNonPredicateColumnRatio, 'f', -1, 64),
+		Type:     vardef.TypeFloat,
+		MinValue: 0,
+		MaxValue: 1,
+		GetGlobal: func(_ context.Context, s *SessionVars) (string, error) {
+			return strconv.FormatFloat(vardef.AnalyzeNonPredicateColumnRatio.Load(), 'f', -1, 64), nil
+		},
+		SetGlobal: func(_ context.Context, s *SessionVars, val string) error {
+			vardef.AnalyzeNonPredicateColumnRatio.Store(tidbOptFloat64(val, vardef.DefTiDBAnalyzeNonPredicateColumnRatio))
+			return nil
+		},
+	},
 	{
 		Scope: vardef.ScopeGlobal,
 		Name:  vardef.TiDBEnableAutoAnalyzePriorityQueue,
diff --git a/pkg/statistics/handle/handletest/handle_test.go b/pkg/statistics/handle/handletest/handle_test.go
index b04b4f7b9c470..61aa54923d34f 100644
--- a/pkg/statistics/handle/handletest/handle_test.go
+++ b/pkg/statistics/handle/handletest/handle_test.go
@@ -925,6 +925,9 @@ func TestInitStatsLiteRecordsSynthesizedColumnStats(t *testing.T) {
 	store, dom := testkit.CreateMockStoreAndDomain(t)
 	tk := testkit.NewTestKit(t, store)
 	tk.MustExec("use test")
+	// Keep the configured TopN/bucket numbers for all columns; the reduction for
+	// non-predicate columns is covered by TestAnalyzeNonPredicateColumnRatio.
+	tk.MustExec("set global tidb_analyze_non_predicate_column_ratio = 1")
 	tk.MustExec("create table t(a int)")
 
 	h := dom.StatsHandle()