pingcap · terry1purcell · Jul 4, 2026
diff --git a/pkg/executor/analyze_col.go b/pkg/executor/analyze_col.go
@@ -55,6 +55,12 @@ type AnalyzeColumnsExec struct {
 	// concurrent lookup across partition workers.
 	samplingStatsConcurrency int
 
+	// fullStatsCols holds the IDs of the columns that keep the configured TopN/bucket
+	// numbers; the other columns only collect nonPredicateColRatio times the configured
+	// numbers. A nil map disables the reduction. See AnalyzeColumnsTask.FullStatsCols.
+	fullStatsCols        map[int64]struct{}
+	nonPredicateColRatio float64
+
 	memTracker *memory.Tracker
 }
 

diff --git a/pkg/executor/analyze_col_sampling.go b/pkg/executor/analyze_col_sampling.go
@@ -867,7 +867,16 @@ workLoop:
 				failpoint.InjectCall("analyzeSamplingBuildAfterReleaseCollectorMemory", collectorMemSize, e.memTracker.BytesConsumed())
 			}
 			numTopN := int(e.opts[ast.AnalyzeOptNumTopN])
+			numBuckets := int(e.opts[ast.AnalyzeOptNumBuckets])
 			if task.isColumn {
+				if e.fullStatsCols != nil {
+					if _, ok := e.fullStatsCols[e.colsInfo[task.slicePos].ID]; !ok {
+						// The column is not a predicate column, so only collect
+						// nonPredicateColRatio times the configured TopN/bucket numbers.
+						numTopN = int(float64(numTopN) * e.nonPredicateColRatio)
+						numBuckets = max(1, int(float64(numBuckets)*e.nonPredicateColRatio))
+					}
+				}
 				if e.tableInfo != nil && isColumnCoveredBySingleColUniqueIndex(e.tableInfo, e.colsInfo[task.slicePos].Offset) {
 					numTopN = 0
 				}
@@ -877,7 +886,7 @@ workLoop:
 					numTopN = 0
 				}
 			}
-			hist, topn, err := statistics.BuildHistAndTopN(e.ctx, int(e.opts[ast.AnalyzeOptNumBuckets]), numTopN, task.id, collector, task.tp, task.isColumn, e.memTracker)
+			hist, topn, err := statistics.BuildHistAndTopN(e.ctx, numBuckets, numTopN, task.id, collector, task.tp, task.isColumn, e.memTracker)
 			if err != nil {
 				resultCh <- err
 				releaseCollectorMemory()

diff --git a/pkg/executor/builder.go b/pkg/executor/builder.go
@@ -3228,6 +3228,8 @@ func (b *executorBuilder) buildAnalyzeSamplingPushdown(
 		schemaForVirtualColEval: schemaForVirtualColEval,
 		baseCount:               count,
 		baseModifyCnt:           modifyCount,
+		fullStatsCols:           task.FullStatsCols,
+		nonPredicateColRatio:    task.NonPredicateColRatio,
 	}
 	e.analyzePB.ColReq = &tipb.AnalyzeColumnsReq{
 		BucketSize:   int64(opts[ast.AnalyzeOptNumBuckets]),

diff --git a/pkg/executor/test/analyzetest/analyze_test.go b/pkg/executor/test/analyzetest/analyze_test.go
@@ -636,6 +636,9 @@ func TestAnalyzeColumnsAfterAnalyzeAll(t *testing.T) {
 			tk.MustExec("use test")
 			tk.MustExec("drop table if exists t")
 			tk.MustExec("set @@tidb_analyze_version = 2")
+			// Keep the configured TopN/bucket numbers for all columns; the reduction for
+			// non-predicate columns is covered by TestAnalyzeNonPredicateColumnRatio.
+			tk.MustExec("set global tidb_analyze_non_predicate_column_ratio = 1")
 			tk.MustExec("create table t (a int, b int)")
 			tk.MustExec("insert into t (a,b) values (1,1), (1,1), (2,2), (2,2), (3,3), (4,4)")
 			tk.MustExec("flush stats_delta *.*")
@@ -1061,6 +1064,9 @@ func TestAnalyzePartitionTableWithDynamicMode(t *testing.T) {
 
 	tk.MustExec("use test")
 	tk.MustExec("set @@session.tidb_analyze_version = 2")
+	// Keep the configured TopN/bucket numbers for all columns; the reduction for
+	// non-predicate columns is covered by TestAnalyzeNonPredicateColumnRatio.
+	tk.MustExec("set global tidb_analyze_non_predicate_column_ratio = 1")
 	tk.MustExec("set @@session.tidb_stats_load_sync_wait = 20000") // to stabilise test
 	tk.MustExec("set @@session.tidb_partition_prune_mode = 'dynamic'")
 	createTable := `CREATE TABLE t (a int, b int, c varchar(10), d int, primary key(a), index idx(b))
@@ -1155,6 +1161,9 @@ func TestAnalyzePartitionTableStaticToDynamic(t *testing.T) {
 
 	tk.MustExec("use test")
 	tk.MustExec("set @@session.tidb_analyze_version = 2")
+	// Keep the configured TopN/bucket numbers for all columns; the reduction for
+	// non-predicate columns is covered by TestAnalyzeNonPredicateColumnRatio.
+	tk.MustExec("set global tidb_analyze_non_predicate_column_ratio = 1")
 	tk.MustExec("set @@session.tidb_stats_load_sync_wait = 20000") // to stabilise test
 	tk.MustExec("set @@session.tidb_partition_prune_mode = 'static'")
 	createTable := `CREATE TABLE t (a int, b int, c varchar(10), d int, primary key(a), index idx(b))

diff --git a/pkg/executor/test/analyzetest/columns/BUILD.bazel b/pkg/executor/test/analyzetest/columns/BUILD.bazel
@@ -8,7 +8,7 @@ go_test(
         "main_test.go",
     ],
     flaky = True,
-    shard_count = 6,
+    shard_count = 7,
     deps = [
         "//pkg/config",
         "//pkg/parser/ast",

diff --git a/pkg/executor/test/analyzetest/columns/analyze_columns_with_test.go b/pkg/executor/test/analyzetest/columns/analyze_columns_with_test.go
@@ -35,6 +35,9 @@ func TestAnalyzeColumnsWithPrimaryKey(t *testing.T) {
 			tk.MustExec("use test")
 			tk.MustExec("drop table if exists t")
 			tk.MustExec("set @@tidb_analyze_version = 2")
+			// Pin the ratio so every analyzed column collects the configured TopN/bucket
+			// numbers; the reduction behavior is covered by TestAnalyzeNonPredicateColumnRatio.
+			tk.MustExec("set global tidb_analyze_non_predicate_column_ratio = 1")
 			tk.MustExec("create table t (a int, b int, c int primary key)")
 			statstestutil.HandleNextDDLEventWithTxn(h)
 			tk.MustExec("insert into t values (1,1,1), (1,1,2), (2,2,3), (2,2,4), (3,3,5), (4,3,6), (5,4,7), (6,4,8), (null,null,9)")
@@ -103,6 +106,9 @@ func TestAnalyzeColumnsWithIndex(t *testing.T) {
 			tk.MustExec("use test")
 			tk.MustExec("drop table if exists t")
 			tk.MustExec("set @@tidb_analyze_version = 2")
+			// Pin the ratio so every analyzed column collects the configured TopN/bucket
+			// numbers; the reduction behavior is covered by TestAnalyzeNonPredicateColumnRatio.
+			tk.MustExec("set global tidb_analyze_non_predicate_column_ratio = 1")
 			tk.MustExec("create table t (a int, b int, c int, d int, index idx_b_d(b, d))")
 			statstestutil.HandleNextDDLEventWithTxn(h)
 			tk.MustExec("insert into t values (1,1,null,1), (2,1,9,1), (1,1,8,1), (2,2,7,2), (1,3,7,3), (2,4,6,4), (1,4,6,5), (2,4,6,5), (1,5,6,5)")
@@ -180,6 +186,9 @@ func TestAnalyzeColumnsWithClusteredIndex(t *testing.T) {
 			tk.MustExec("use test")
 			tk.MustExec("drop table if exists t")
 			tk.MustExec("set @@tidb_analyze_version = 2")
+			// Pin the ratio so every analyzed column collects the configured TopN/bucket
+			// numbers; the reduction behavior is covered by TestAnalyzeNonPredicateColumnRatio.
+			tk.MustExec("set global tidb_analyze_non_predicate_column_ratio = 1")
 			tk.MustExec("create table t (a int, b int, c int, d int, primary key(b, d) clustered)")
 			statstestutil.HandleNextDDLEventWithTxn(h)
 			tk.MustExec("insert into t values (1,1,null,1), (2,2,9,2), (1,3,8,3), (2,4,7,4), (1,5,7,5), (2,6,6,6), (1,7,6,7), (2,8,6,8), (1,9,6,9)")
@@ -257,6 +266,9 @@ func TestAnalyzeColumnsWithDynamicPartitionTable(t *testing.T) {
 			tk.MustExec("use test")
 			tk.MustExec("drop table if exists t")
 			tk.MustExec("set @@tidb_analyze_version = 2")
+			// Pin the ratio so every analyzed column collects the configured TopN/bucket
+			// numbers; the reduction behavior is covered by TestAnalyzeNonPredicateColumnRatio.
+			tk.MustExec("set global tidb_analyze_non_predicate_column_ratio = 1")
 			tk.MustExec("set @@tidb_partition_prune_mode = 'dynamic'")
 			tk.MustExec("create table t (a int, b int, c int, index idx(c)) partition by range (a) (partition p0 values less than (10), partition p1 values less than maxvalue)")
 			statstestutil.HandleNextDDLEventWithTxn(h)
@@ -383,6 +395,9 @@ func TestAnalyzeColumnsWithStaticPartitionTable(t *testing.T) {
 			tk.MustExec("use test")
 			tk.MustExec("drop table if exists t")
 			tk.MustExec("set @@tidb_analyze_version = 2")
+			// Pin the ratio so every analyzed column collects the configured TopN/bucket
+			// numbers; the reduction behavior is covered by TestAnalyzeNonPredicateColumnRatio.
+			tk.MustExec("set global tidb_analyze_non_predicate_column_ratio = 1")
 			tk.MustExec("set @@tidb_partition_prune_mode = 'static'")
 			tk.MustExec("create table t (a int, b int, c int, index idx(c)) partition by range (a) (partition p0 values less than (10), partition p1 values less than maxvalue)")
 			statstestutil.HandleNextDDLEventWithTxn(h)
@@ -494,6 +509,9 @@ func TestAnalyzeColumnsWithVirtualColumnIndex(t *testing.T) {
 			tk.MustExec("use test")
 			tk.MustExec("drop table if exists t")
 			tk.MustExec("set @@tidb_analyze_version = 2")
+			// Pin the ratio so every analyzed column collects the configured TopN/bucket
+			// numbers; the reduction behavior is covered by TestAnalyzeNonPredicateColumnRatio.
+			tk.MustExec("set global tidb_analyze_non_predicate_column_ratio = 1")
 			tk.MustExec("create table t (a int, b int, c int as (b+1), index idx(c))")
 			statstestutil.HandleNextDDLEventWithTxn(h)
 			tk.MustExec("insert into t (a,b) values (1,1), (2,2), (3,3), (4,4), (5,4), (6,5), (7,5), (8,5), (null,null)")
@@ -550,3 +568,74 @@ func TestAnalyzeColumnsWithVirtualColumnIndex(t *testing.T) {
 		}(val)
 	}
 }
+
+func TestAnalyzeNonPredicateColumnRatio(t *testing.T) {
+	store, dom := testkit.CreateMockStoreAndDomain(t)
+
+	tk := testkit.NewTestKit(t, store)
+	h := dom.StatsHandle()
+	tk.MustExec("use test")
+	// The reduction is enabled by default with a ratio of 0.1.
+	tk.MustQuery("select @@global.tidb_analyze_non_predicate_column_ratio").Check(testkit.Rows("0.1"))
+	tk.MustExec("set global tidb_analyze_non_predicate_column_ratio = 0.5")
+	defer tk.MustExec("set global tidb_analyze_non_predicate_column_ratio = default")
+	tk.MustExec("set @@tidb_analyze_version = 2")
+
+	checkStatsSize := func(tblName, colName string, isIndex, numTopN, numBuckets int) {
+		cond := fmt.Sprintf(
+			"where db_name = 'test' and table_name = '%s' and column_name = '%s' and is_index = %d",
+			tblName, colName, isIndex,
+		)
+		require.Len(t, tk.MustQuery("show stats_topn "+cond).Rows(), numTopN,
+			"unexpected topn size for %s.%s", tblName, colName)
+		require.Len(t, tk.MustQuery("show stats_buckets "+cond).Rows(), numBuckets,
+			"unexpected bucket count for %s.%s", tblName, colName)
+	}
+	prepareTable := func(tblName string) {
+		tk.MustExec(fmt.Sprintf("create table %s (a int, b int, c int, index ib (b))", tblName))
+		statstestutil.HandleNextDDLEventWithTxn(h)
+		// Every column gets 8 distinct values: 1~4 appear twice and 5~8 appear once, so
+		// with `with 2 topn, 2 buckets` a full-stats column collects 2 TopN values and
+		// 2 buckets while a reduced column collects 1 TopN value and 1 bucket.
+		for i := 1; i <= 4; i++ {
+			tk.MustExec(fmt.Sprintf("insert into %s values (%[2]d, %[2]d, %[2]d), (%[2]d, %[2]d, %[2]d)", tblName, i))
+		}
+		for i := 5; i <= 8; i++ {
+			tk.MustExec(fmt.Sprintf("insert into %s values (%[2]d, %[2]d, %[2]d)", tblName, i))
+		}
+		tk.MustExec("flush stats_delta *.*")
+	}
+
+	// Case 1: column a is a predicate column, so it keeps the configured numbers while
+	// the other columns (including the indexed column b) collect the reduced numbers.
+	prepareTable("t1")
+	tk.MustExec("select * from t1 where a > 0")
+	require.NoError(t, h.DumpColStatsUsageToKV())
+	tk.MustExec("analyze table t1 all columns with 2 topn, 2 buckets")
+	checkStatsSize("t1", "a", 0, 2, 2)
+	checkStatsSize("t1", "b", 0, 1, 1)
+	checkStatsSize("t1", "c", 0, 1, 1)
+	// Index stats always keep the configured numbers.
+	checkStatsSize("t1", "ib", 1, 2, 2)
+
+	// Case 2: no predicate column has been collected for the table, so the first column
+	// of the index keeps the configured numbers while the other columns are reduced.
+	prepareTable("t2")
+	tk.MustExec("analyze table t2 all columns with 2 topn, 2 buckets")
+	checkStatsSize("t2", "a", 0, 1, 1)
+	checkStatsSize("t2", "b", 0, 2, 2)
+	checkStatsSize("t2", "c", 0, 1, 1)
+
+	// Case 3: columns specified in ANALYZE ... COLUMNS keep the configured numbers.
+	prepareTable("t3")
+	tk.MustExec("analyze table t3 columns a, b with 2 topn, 2 buckets")
+	checkStatsSize("t3", "a", 0, 2, 2)
+	checkStatsSize("t3", "b", 0, 2, 2)
+
+	// Case 4: setting the ratio to 1 disables the reduction.
+	tk.MustExec("set global tidb_analyze_non_predicate_column_ratio = 1")
+	tk.MustExec("analyze table t1 all columns with 2 topn, 2 buckets")
+	checkStatsSize("t1", "a", 0, 2, 2)
+	checkStatsSize("t1", "b", 0, 2, 2)
+	checkStatsSize("t1", "c", 0, 2, 2)
+}
diff --git a/pkg/executor/test/analyzetest/options/analyze_saved_options_test.go b/pkg/executor/test/analyzetest/options/analyze_saved_options_test.go
@@ -37,6 +37,9 @@ func TestSavedAnalyzeOptions(t *testing.T) {
 		tk.MustExec(fmt.Sprintf("set global tidb_persist_analyze_options = %v", originalVal1))
 	}()
 	tk.MustExec("set global tidb_persist_analyze_options = true")
+	// Keep the configured TopN/bucket numbers for all columns; the reduction for
+	// non-predicate columns is covered by TestAnalyzeNonPredicateColumnRatio.
+	tk.MustExec("set global tidb_analyze_non_predicate_column_ratio = 1")
 	originalVal2 := tk.MustQuery("select @@tidb_auto_analyze_ratio").Rows()[0][0].(string)
 	defer func() {
 		tk.MustExec(fmt.Sprintf("set global tidb_auto_analyze_ratio = %v", originalVal2))
@@ -136,6 +139,9 @@ func TestSavedPartitionAnalyzeOptions(t *testing.T) {
 		tk.MustExec(fmt.Sprintf("set global tidb_persist_analyze_options = %v", originalVal))
 	}()
 	tk.MustExec("set global tidb_persist_analyze_options = true")
+	// Keep the configured TopN/bucket numbers for all columns; the reduction for
+	// non-predicate columns is covered by TestAnalyzeNonPredicateColumnRatio.
+	tk.MustExec("set global tidb_analyze_non_predicate_column_ratio = 1")
 
 	tk.MustExec("use test")
 	tk.MustExec("set @@session.tidb_analyze_version = 2")

diff --git a/pkg/planner/cardinality/selectivity_test.go b/pkg/planner/cardinality/selectivity_test.go
@@ -856,6 +856,9 @@ func TestNewIndexWithColumnStats(t *testing.T) {
 	store, dom := testkit.CreateMockStoreAndDomain(t)
 	testKit := testkit.NewTestKit(t, store)
 	testKit.MustExec("use test")
+	// Keep the configured TopN/bucket numbers for all columns; the reduction for
+	// non-predicate columns is covered by TestAnalyzeNonPredicateColumnRatio.
+	testKit.MustExec("set global tidb_analyze_non_predicate_column_ratio = 1")
 	testKit.MustExec("drop table if exists t")
 	testKit.MustExec("drop table if exists t2")
 	testKit.MustExec("create table t(a int)")
@@ -1508,6 +1511,9 @@ func testTopNAssistedEstimationInner(t *testing.T, input []string, output []outp
 	h.Clear()
 	tk := testkit.NewTestKit(t, store)
 	tk.MustExec("use test")
+	// Keep the configured TopN/bucket numbers for all columns; the reduction for
+	// non-predicate columns is covered by TestAnalyzeNonPredicateColumnRatio.
+	tk.MustExec("set global tidb_analyze_non_predicate_column_ratio = 1")
 	tk.MustExec("drop table if exists t")
 	tk.MustExec("set @@tidb_default_string_match_selectivity = 0")
 	tk.MustExec("set @@tidb_stats_load_sync_wait = 3000")

diff --git a/pkg/planner/core/casetest/planstats/plan_stats_test.go b/pkg/planner/core/casetest/planstats/plan_stats_test.go
@@ -591,6 +591,9 @@ func TestStatsAnalyzedInDDL(t *testing.T) {
 	testkit.RunTestUnderCascadesWithDomain(t, func(t *testing.T, testKit *testkit.TestKit, dom *domain.Domain, cascades, caller string) {
 		testKit.MustExec("use test")
 		testKit.MustExec("set session tidb_stats_update_during_ddl = 1")
+		// Keep the configured TopN/bucket numbers for all columns; the reduction for
+		// non-predicate columns is covered by TestAnalyzeNonPredicateColumnRatio.
+		testKit.MustExec("set global tidb_analyze_non_predicate_column_ratio = 1")
 		// test normal table
 		testKit.MustExec("create table t(a int, b int, c int, primary key(a), key idx(b))")
 

diff --git a/pkg/planner/core/common_plans.go b/pkg/planner/core/common_plans.go
@@ -395,6 +395,14 @@ type AnalyzeColumnsTask struct {
 	SkipColsInfo     []*model.ColumnInfo
 	TblInfo          *model.TableInfo
 	Indexes          []*model.IndexInfo
+	// FullStatsCols holds the IDs of the columns that keep the configured TopN/bucket
+	// numbers when tidb_analyze_non_predicate_column_ratio < 1. Columns absent from the
+	// set only collect NonPredicateColRatio times the configured numbers. A nil map
+	// disables the reduction and every column keeps the configured numbers.
+	FullStatsCols map[int64]struct{}
+	// NonPredicateColRatio is the value of tidb_analyze_non_predicate_column_ratio
+	// captured when the plan was built.
+	NonPredicateColRatio float64
 	AnalyzeInfo
 }