Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions pkg/executor/analyze_col.go
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,12 @@ type AnalyzeColumnsExec struct {
// concurrent lookup across partition workers.
samplingStatsConcurrency int

// fullStatsCols holds the IDs of the columns that keep the configured TopN/bucket
// numbers; the other columns only collect nonPredicateColRatio times the configured
// numbers. A nil map disables the reduction. See AnalyzeColumnsTask.FullStatsCols.
fullStatsCols map[int64]struct{}
nonPredicateColRatio float64

memTracker *memory.Tracker
}

Expand Down
11 changes: 10 additions & 1 deletion pkg/executor/analyze_col_sampling.go
Original file line number Diff line number Diff line change
Expand Up @@ -867,7 +867,16 @@ workLoop:
failpoint.InjectCall("analyzeSamplingBuildAfterReleaseCollectorMemory", collectorMemSize, e.memTracker.BytesConsumed())
}
numTopN := int(e.opts[ast.AnalyzeOptNumTopN])
numBuckets := int(e.opts[ast.AnalyzeOptNumBuckets])
if task.isColumn {
if e.fullStatsCols != nil {
if _, ok := e.fullStatsCols[e.colsInfo[task.slicePos].ID]; !ok {
// The column is not a predicate column, so only collect
// nonPredicateColRatio times the configured TopN/bucket numbers.
numTopN = int(float64(numTopN) * e.nonPredicateColRatio)
numBuckets = max(1, int(float64(numBuckets)*e.nonPredicateColRatio))
}
}
if e.tableInfo != nil && isColumnCoveredBySingleColUniqueIndex(e.tableInfo, e.colsInfo[task.slicePos].Offset) {
numTopN = 0
}
Expand All @@ -877,7 +886,7 @@ workLoop:
numTopN = 0
}
}
hist, topn, err := statistics.BuildHistAndTopN(e.ctx, int(e.opts[ast.AnalyzeOptNumBuckets]), numTopN, task.id, collector, task.tp, task.isColumn, e.memTracker)
hist, topn, err := statistics.BuildHistAndTopN(e.ctx, numBuckets, numTopN, task.id, collector, task.tp, task.isColumn, e.memTracker)
if err != nil {
resultCh <- err
releaseCollectorMemory()
Expand Down
2 changes: 2 additions & 0 deletions pkg/executor/builder.go
Original file line number Diff line number Diff line change
Expand Up @@ -3228,6 +3228,8 @@ func (b *executorBuilder) buildAnalyzeSamplingPushdown(
schemaForVirtualColEval: schemaForVirtualColEval,
baseCount: count,
baseModifyCnt: modifyCount,
fullStatsCols: task.FullStatsCols,
nonPredicateColRatio: task.NonPredicateColRatio,
}
e.analyzePB.ColReq = &tipb.AnalyzeColumnsReq{
BucketSize: int64(opts[ast.AnalyzeOptNumBuckets]),
Expand Down
9 changes: 9 additions & 0 deletions pkg/executor/test/analyzetest/analyze_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -636,6 +636,9 @@ func TestAnalyzeColumnsAfterAnalyzeAll(t *testing.T) {
tk.MustExec("use test")
tk.MustExec("drop table if exists t")
tk.MustExec("set @@tidb_analyze_version = 2")
// Keep the configured TopN/bucket numbers for all columns; the reduction for
// non-predicate columns is covered by TestAnalyzeNonPredicateColumnRatio.
tk.MustExec("set global tidb_analyze_non_predicate_column_ratio = 1")
tk.MustExec("create table t (a int, b int)")
tk.MustExec("insert into t (a,b) values (1,1), (1,1), (2,2), (2,2), (3,3), (4,4)")
tk.MustExec("flush stats_delta *.*")
Expand Down Expand Up @@ -1061,6 +1064,9 @@ func TestAnalyzePartitionTableWithDynamicMode(t *testing.T) {

tk.MustExec("use test")
tk.MustExec("set @@session.tidb_analyze_version = 2")
// Keep the configured TopN/bucket numbers for all columns; the reduction for
// non-predicate columns is covered by TestAnalyzeNonPredicateColumnRatio.
tk.MustExec("set global tidb_analyze_non_predicate_column_ratio = 1")
tk.MustExec("set @@session.tidb_stats_load_sync_wait = 20000") // to stabilise test
tk.MustExec("set @@session.tidb_partition_prune_mode = 'dynamic'")
createTable := `CREATE TABLE t (a int, b int, c varchar(10), d int, primary key(a), index idx(b))
Expand Down Expand Up @@ -1155,6 +1161,9 @@ func TestAnalyzePartitionTableStaticToDynamic(t *testing.T) {

tk.MustExec("use test")
tk.MustExec("set @@session.tidb_analyze_version = 2")
// Keep the configured TopN/bucket numbers for all columns; the reduction for
// non-predicate columns is covered by TestAnalyzeNonPredicateColumnRatio.
tk.MustExec("set global tidb_analyze_non_predicate_column_ratio = 1")
tk.MustExec("set @@session.tidb_stats_load_sync_wait = 20000") // to stabilise test
tk.MustExec("set @@session.tidb_partition_prune_mode = 'static'")
createTable := `CREATE TABLE t (a int, b int, c varchar(10), d int, primary key(a), index idx(b))
Expand Down
2 changes: 1 addition & 1 deletion pkg/executor/test/analyzetest/columns/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ go_test(
"main_test.go",
],
flaky = True,
shard_count = 6,
shard_count = 7,
deps = [
"//pkg/config",
"//pkg/parser/ast",
Expand Down
89 changes: 89 additions & 0 deletions pkg/executor/test/analyzetest/columns/analyze_columns_with_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,9 @@ func TestAnalyzeColumnsWithPrimaryKey(t *testing.T) {
tk.MustExec("use test")
tk.MustExec("drop table if exists t")
tk.MustExec("set @@tidb_analyze_version = 2")
// Pin the ratio so every analyzed column collects the configured TopN/bucket
// numbers; the reduction behavior is covered by TestAnalyzeNonPredicateColumnRatio.
tk.MustExec("set global tidb_analyze_non_predicate_column_ratio = 1")
tk.MustExec("create table t (a int, b int, c int primary key)")
statstestutil.HandleNextDDLEventWithTxn(h)
tk.MustExec("insert into t values (1,1,1), (1,1,2), (2,2,3), (2,2,4), (3,3,5), (4,3,6), (5,4,7), (6,4,8), (null,null,9)")
Expand Down Expand Up @@ -103,6 +106,9 @@ func TestAnalyzeColumnsWithIndex(t *testing.T) {
tk.MustExec("use test")
tk.MustExec("drop table if exists t")
tk.MustExec("set @@tidb_analyze_version = 2")
// Pin the ratio so every analyzed column collects the configured TopN/bucket
// numbers; the reduction behavior is covered by TestAnalyzeNonPredicateColumnRatio.
tk.MustExec("set global tidb_analyze_non_predicate_column_ratio = 1")
tk.MustExec("create table t (a int, b int, c int, d int, index idx_b_d(b, d))")
statstestutil.HandleNextDDLEventWithTxn(h)
tk.MustExec("insert into t values (1,1,null,1), (2,1,9,1), (1,1,8,1), (2,2,7,2), (1,3,7,3), (2,4,6,4), (1,4,6,5), (2,4,6,5), (1,5,6,5)")
Expand Down Expand Up @@ -180,6 +186,9 @@ func TestAnalyzeColumnsWithClusteredIndex(t *testing.T) {
tk.MustExec("use test")
tk.MustExec("drop table if exists t")
tk.MustExec("set @@tidb_analyze_version = 2")
// Pin the ratio so every analyzed column collects the configured TopN/bucket
// numbers; the reduction behavior is covered by TestAnalyzeNonPredicateColumnRatio.
tk.MustExec("set global tidb_analyze_non_predicate_column_ratio = 1")
tk.MustExec("create table t (a int, b int, c int, d int, primary key(b, d) clustered)")
statstestutil.HandleNextDDLEventWithTxn(h)
tk.MustExec("insert into t values (1,1,null,1), (2,2,9,2), (1,3,8,3), (2,4,7,4), (1,5,7,5), (2,6,6,6), (1,7,6,7), (2,8,6,8), (1,9,6,9)")
Expand Down Expand Up @@ -257,6 +266,9 @@ func TestAnalyzeColumnsWithDynamicPartitionTable(t *testing.T) {
tk.MustExec("use test")
tk.MustExec("drop table if exists t")
tk.MustExec("set @@tidb_analyze_version = 2")
// Pin the ratio so every analyzed column collects the configured TopN/bucket
// numbers; the reduction behavior is covered by TestAnalyzeNonPredicateColumnRatio.
tk.MustExec("set global tidb_analyze_non_predicate_column_ratio = 1")
tk.MustExec("set @@tidb_partition_prune_mode = 'dynamic'")
tk.MustExec("create table t (a int, b int, c int, index idx(c)) partition by range (a) (partition p0 values less than (10), partition p1 values less than maxvalue)")
statstestutil.HandleNextDDLEventWithTxn(h)
Expand Down Expand Up @@ -383,6 +395,9 @@ func TestAnalyzeColumnsWithStaticPartitionTable(t *testing.T) {
tk.MustExec("use test")
tk.MustExec("drop table if exists t")
tk.MustExec("set @@tidb_analyze_version = 2")
// Pin the ratio so every analyzed column collects the configured TopN/bucket
// numbers; the reduction behavior is covered by TestAnalyzeNonPredicateColumnRatio.
tk.MustExec("set global tidb_analyze_non_predicate_column_ratio = 1")
tk.MustExec("set @@tidb_partition_prune_mode = 'static'")
tk.MustExec("create table t (a int, b int, c int, index idx(c)) partition by range (a) (partition p0 values less than (10), partition p1 values less than maxvalue)")
statstestutil.HandleNextDDLEventWithTxn(h)
Expand Down Expand Up @@ -494,6 +509,9 @@ func TestAnalyzeColumnsWithVirtualColumnIndex(t *testing.T) {
tk.MustExec("use test")
tk.MustExec("drop table if exists t")
tk.MustExec("set @@tidb_analyze_version = 2")
// Pin the ratio so every analyzed column collects the configured TopN/bucket
// numbers; the reduction behavior is covered by TestAnalyzeNonPredicateColumnRatio.
tk.MustExec("set global tidb_analyze_non_predicate_column_ratio = 1")
tk.MustExec("create table t (a int, b int, c int as (b+1), index idx(c))")
statstestutil.HandleNextDDLEventWithTxn(h)
tk.MustExec("insert into t (a,b) values (1,1), (2,2), (3,3), (4,4), (5,4), (6,5), (7,5), (8,5), (null,null)")
Expand Down Expand Up @@ -550,3 +568,74 @@ func TestAnalyzeColumnsWithVirtualColumnIndex(t *testing.T) {
}(val)
}
}

func TestAnalyzeNonPredicateColumnRatio(t *testing.T) {
store, dom := testkit.CreateMockStoreAndDomain(t)

tk := testkit.NewTestKit(t, store)
h := dom.StatsHandle()
tk.MustExec("use test")
// The reduction is enabled by default with a ratio of 0.1.
tk.MustQuery("select @@global.tidb_analyze_non_predicate_column_ratio").Check(testkit.Rows("0.1"))
tk.MustExec("set global tidb_analyze_non_predicate_column_ratio = 0.5")
defer tk.MustExec("set global tidb_analyze_non_predicate_column_ratio = default")
tk.MustExec("set @@tidb_analyze_version = 2")

checkStatsSize := func(tblName, colName string, isIndex, numTopN, numBuckets int) {
cond := fmt.Sprintf(
"where db_name = 'test' and table_name = '%s' and column_name = '%s' and is_index = %d",
tblName, colName, isIndex,
)
require.Len(t, tk.MustQuery("show stats_topn "+cond).Rows(), numTopN,
"unexpected topn size for %s.%s", tblName, colName)
require.Len(t, tk.MustQuery("show stats_buckets "+cond).Rows(), numBuckets,
"unexpected bucket count for %s.%s", tblName, colName)
}
prepareTable := func(tblName string) {
tk.MustExec(fmt.Sprintf("create table %s (a int, b int, c int, index ib (b))", tblName))
statstestutil.HandleNextDDLEventWithTxn(h)
// Every column gets 8 distinct values: 1~4 appear twice and 5~8 appear once, so
// with `with 2 topn, 2 buckets` a full-stats column collects 2 TopN values and
// 2 buckets while a reduced column collects 1 TopN value and 1 bucket.
for i := 1; i <= 4; i++ {
tk.MustExec(fmt.Sprintf("insert into %s values (%[2]d, %[2]d, %[2]d), (%[2]d, %[2]d, %[2]d)", tblName, i))
}
for i := 5; i <= 8; i++ {
tk.MustExec(fmt.Sprintf("insert into %s values (%[2]d, %[2]d, %[2]d)", tblName, i))
}
tk.MustExec("flush stats_delta *.*")
}

// Case 1: column a is a predicate column, so it keeps the configured numbers while
// the other columns (including the indexed column b) collect the reduced numbers.
prepareTable("t1")
tk.MustExec("select * from t1 where a > 0")
require.NoError(t, h.DumpColStatsUsageToKV())
tk.MustExec("analyze table t1 all columns with 2 topn, 2 buckets")
checkStatsSize("t1", "a", 0, 2, 2)
checkStatsSize("t1", "b", 0, 1, 1)
checkStatsSize("t1", "c", 0, 1, 1)
// Index stats always keep the configured numbers.
checkStatsSize("t1", "ib", 1, 2, 2)

// Case 2: no predicate column has been collected for the table, so the first column
// of the index keeps the configured numbers while the other columns are reduced.
prepareTable("t2")
tk.MustExec("analyze table t2 all columns with 2 topn, 2 buckets")
checkStatsSize("t2", "a", 0, 1, 1)
checkStatsSize("t2", "b", 0, 2, 2)
checkStatsSize("t2", "c", 0, 1, 1)

// Case 3: columns specified in ANALYZE ... COLUMNS keep the configured numbers.
prepareTable("t3")
tk.MustExec("analyze table t3 columns a, b with 2 topn, 2 buckets")
checkStatsSize("t3", "a", 0, 2, 2)
checkStatsSize("t3", "b", 0, 2, 2)

// Case 4: setting the ratio to 1 disables the reduction.
tk.MustExec("set global tidb_analyze_non_predicate_column_ratio = 1")
tk.MustExec("analyze table t1 all columns with 2 topn, 2 buckets")
checkStatsSize("t1", "a", 0, 2, 2)
checkStatsSize("t1", "b", 0, 2, 2)
checkStatsSize("t1", "c", 0, 2, 2)
}
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,9 @@ func TestSavedAnalyzeOptions(t *testing.T) {
tk.MustExec(fmt.Sprintf("set global tidb_persist_analyze_options = %v", originalVal1))
}()
tk.MustExec("set global tidb_persist_analyze_options = true")
// Keep the configured TopN/bucket numbers for all columns; the reduction for
// non-predicate columns is covered by TestAnalyzeNonPredicateColumnRatio.
tk.MustExec("set global tidb_analyze_non_predicate_column_ratio = 1")
originalVal2 := tk.MustQuery("select @@tidb_auto_analyze_ratio").Rows()[0][0].(string)
defer func() {
tk.MustExec(fmt.Sprintf("set global tidb_auto_analyze_ratio = %v", originalVal2))
Expand Down Expand Up @@ -136,6 +139,9 @@ func TestSavedPartitionAnalyzeOptions(t *testing.T) {
tk.MustExec(fmt.Sprintf("set global tidb_persist_analyze_options = %v", originalVal))
}()
tk.MustExec("set global tidb_persist_analyze_options = true")
// Keep the configured TopN/bucket numbers for all columns; the reduction for
// non-predicate columns is covered by TestAnalyzeNonPredicateColumnRatio.
tk.MustExec("set global tidb_analyze_non_predicate_column_ratio = 1")

tk.MustExec("use test")
tk.MustExec("set @@session.tidb_analyze_version = 2")
Expand Down
6 changes: 6 additions & 0 deletions pkg/planner/cardinality/selectivity_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -856,6 +856,9 @@ func TestNewIndexWithColumnStats(t *testing.T) {
store, dom := testkit.CreateMockStoreAndDomain(t)
testKit := testkit.NewTestKit(t, store)
testKit.MustExec("use test")
// Keep the configured TopN/bucket numbers for all columns; the reduction for
// non-predicate columns is covered by TestAnalyzeNonPredicateColumnRatio.
testKit.MustExec("set global tidb_analyze_non_predicate_column_ratio = 1")
testKit.MustExec("drop table if exists t")
testKit.MustExec("drop table if exists t2")
testKit.MustExec("create table t(a int)")
Expand Down Expand Up @@ -1508,6 +1511,9 @@ func testTopNAssistedEstimationInner(t *testing.T, input []string, output []outp
h.Clear()
tk := testkit.NewTestKit(t, store)
tk.MustExec("use test")
// Keep the configured TopN/bucket numbers for all columns; the reduction for
// non-predicate columns is covered by TestAnalyzeNonPredicateColumnRatio.
tk.MustExec("set global tidb_analyze_non_predicate_column_ratio = 1")
tk.MustExec("drop table if exists t")
tk.MustExec("set @@tidb_default_string_match_selectivity = 0")
tk.MustExec("set @@tidb_stats_load_sync_wait = 3000")
Expand Down
3 changes: 3 additions & 0 deletions pkg/planner/core/casetest/planstats/plan_stats_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -591,6 +591,9 @@ func TestStatsAnalyzedInDDL(t *testing.T) {
testkit.RunTestUnderCascadesWithDomain(t, func(t *testing.T, testKit *testkit.TestKit, dom *domain.Domain, cascades, caller string) {
testKit.MustExec("use test")
testKit.MustExec("set session tidb_stats_update_during_ddl = 1")
// Keep the configured TopN/bucket numbers for all columns; the reduction for
// non-predicate columns is covered by TestAnalyzeNonPredicateColumnRatio.
testKit.MustExec("set global tidb_analyze_non_predicate_column_ratio = 1")
// test normal table
testKit.MustExec("create table t(a int, b int, c int, primary key(a), key idx(b))")

Expand Down
8 changes: 8 additions & 0 deletions pkg/planner/core/common_plans.go
Original file line number Diff line number Diff line change
Expand Up @@ -395,6 +395,14 @@ type AnalyzeColumnsTask struct {
SkipColsInfo []*model.ColumnInfo
TblInfo *model.TableInfo
Indexes []*model.IndexInfo
// FullStatsCols holds the IDs of the columns that keep the configured TopN/bucket
// numbers when tidb_analyze_non_predicate_column_ratio < 1. Columns absent from the
// set only collect NonPredicateColRatio times the configured numbers. A nil map
// disables the reduction and every column keeps the configured numbers.
FullStatsCols map[int64]struct{}
// NonPredicateColRatio is the value of tidb_analyze_non_predicate_column_ratio
// captured when the plan was built.
NonPredicateColRatio float64
AnalyzeInfo
}

Expand Down
Loading
Loading