From 04ad0894a7dedcb2735e179c3e2296f42de0dfa1 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Wed, 22 Apr 2026 10:45:38 +0000 Subject: [PATCH 01/41] Backport #102360 to 26.3: fix data race for ast in database memory on alter and format --- src/Databases/DatabaseMemory.cpp | 13 ++-- ...04093_database_memory_alter_race.reference | 1 + .../04093_database_memory_alter_race.sh | 63 +++++++++++++++++++ 3 files changed, 73 insertions(+), 4 deletions(-) create mode 100644 tests/queries/0_stateless/04093_database_memory_alter_race.reference create mode 100755 tests/queries/0_stateless/04093_database_memory_alter_race.sh diff --git a/src/Databases/DatabaseMemory.cpp b/src/Databases/DatabaseMemory.cpp index 9f18f0723139..57e8a422b3e6 100644 --- a/src/Databases/DatabaseMemory.cpp +++ b/src/Databases/DatabaseMemory.cpp @@ -153,7 +153,6 @@ void DatabaseMemory::drop(ContextPtr local_context) void DatabaseMemory::alterTable(ContextPtr local_context, const StorageID & table_id, const StorageInMemoryMetadata & metadata, const bool validate_new_create_query) { - /// NOTE: It is safe to modify AST without lock since alterTable() is called under IStorage::lockForShare() ASTPtr create_query; { std::lock_guard lock{mutex}; @@ -165,17 +164,23 @@ void DatabaseMemory::alterTable(ContextPtr local_context, const StorageID & tabl if (it_query == create_queries.end() || !it_query->second) throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot alter: There is no metadata of table {}", table_id.getNameForLogs()); - create_query = it_query->second; + create_query = it_query->second->clone(); } - /// Apply metadata changes without holding a lock to avoid possible deadlock - /// (i.e. when ALTER contains IN (table)) + /// Apply metadata changes to the cloned AST without holding a lock to avoid possible deadlock + /// (i.e. when ALTER contains IN (table)). applyMetadataChangesToCreateQuery(create_query, metadata, local_context, validate_new_create_query); /// The create query of the table has been just changed, we need to update dependencies too. auto ref_dependencies = getDependenciesFromCreateQuery(local_context->getGlobalContext(), table_id.getQualifiedName(), create_query, local_context->getCurrentDatabase()); auto loading_dependencies = getLoadingDependenciesFromCreateQuery(local_context->getGlobalContext(), table_id.getQualifiedName(), create_query); DatabaseCatalog::instance().checkTableCanBeAddedWithNoCyclicDependencies(table_id.getQualifiedName(), ref_dependencies.dependencies, loading_dependencies); + + { + std::lock_guard lock{mutex}; + create_queries[table_id.table_name] = create_query; + } + DatabaseCatalog::instance().updateDependencies(table_id, ref_dependencies.dependencies, loading_dependencies, ref_dependencies.mv_from_dependency ? TableNamesSet{ref_dependencies.mv_from_dependency->getQualifiedName()} : TableNamesSet{}); } diff --git a/tests/queries/0_stateless/04093_database_memory_alter_race.reference b/tests/queries/0_stateless/04093_database_memory_alter_race.reference new file mode 100644 index 000000000000..d86bac9de59a --- /dev/null +++ b/tests/queries/0_stateless/04093_database_memory_alter_race.reference @@ -0,0 +1 @@ +OK diff --git a/tests/queries/0_stateless/04093_database_memory_alter_race.sh b/tests/queries/0_stateless/04093_database_memory_alter_race.sh new file mode 100755 index 000000000000..6f691ebb9436 --- /dev/null +++ b/tests/queries/0_stateless/04093_database_memory_alter_race.sh @@ -0,0 +1,63 @@ +#!/usr/bin/env bash +# Tags: race, no-parallel +# no-parallel: runs 20 concurrent ALTER workers for 30 s — too heavy to share the server +# with other tests; under asan_ubsan the server hits the docker memory limit + +# Regression test for a data race in DatabaseMemory::alterTable. +# alterTable() used to take a raw (non-cloned) pointer to the stored +# create_query AST, release the mutex, then mutate the AST in place. +# Concurrently, system.tables queries call getCreateTableQueryImpl() +# which clones that same AST — reading children while they are being mutated. + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +DB="test_04093_${CLICKHOUSE_DATABASE}" + +$CLICKHOUSE_CLIENT -q "DROP DATABASE IF EXISTS ${DB}" +$CLICKHOUSE_CLIENT -q "CREATE DATABASE ${DB} ENGINE = Memory" + +# Wide initial schema increases the race window during applyMetadataChangesToCreateQuery +COLS=$(seq 0 19 | awk '{printf "%scol%d String", (NR>1?", ":""), $1}') +$CLICKHOUSE_CLIENT -q "CREATE TABLE ${DB}.t (${COLS}) ENGINE = Memory" + +function alter_worker() +{ + local tid=$1 + local TIMELIMIT=$((SECONDS + TIMEOUT)) + local idx=0 + while [ $SECONDS -lt "$TIMELIMIT" ]; do + $CLICKHOUSE_CLIENT -q "ALTER TABLE ${DB}.t ADD COLUMN dyn_${tid}_${idx} UInt64" 2>/dev/null || true + if [ "$idx" -gt 3 ]; then + $CLICKHOUSE_CLIENT -q "ALTER TABLE ${DB}.t DROP COLUMN dyn_${tid}_$(( idx - 3 ))" 2>/dev/null || true + fi + idx=$(( idx + 1 )) + done +} + +function read_worker() +{ + local TIMELIMIT=$((SECONDS + TIMEOUT)) + while [ $SECONDS -lt "$TIMELIMIT" ]; do + $CLICKHOUSE_CLIENT -q "SELECT create_table_query FROM system.tables WHERE database = '${DB}' AND name = 't'" > /dev/null 2>&1 || true + done +} + +TIMEOUT=30 + +# Keep worker count low so the cgroup memory limit (10 GB under asan_ubsan) +# is not exceeded by many concurrent clickhouse-client ASan processes. +for i in {1..10}; do + alter_worker "$i" 2>/dev/null & +done + +for _ in {1..10}; do + read_worker 2>/dev/null & +done + +wait + +$CLICKHOUSE_CLIENT -q "DROP DATABASE IF EXISTS ${DB}" + +echo 'OK' From efce815033c952428141b34404aa761598358f35 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Sat, 2 May 2026 10:35:10 +0000 Subject: [PATCH 02/41] Backport #103858 to 26.3: Fix data part check and consistency check for tables with JSON column --- .../MergeTree/MergeTreeDataPartWide.cpp | 185 +++++++++++++----- src/Storages/MergeTree/checkDataPart.cpp | 73 +++++-- .../04109_check_table_json_wide.reference | 8 + .../04109_check_table_json_wide.sql | 27 +++ 4 files changed, 229 insertions(+), 64 deletions(-) create mode 100644 tests/queries/0_stateless/04109_check_table_json_wide.reference create mode 100644 tests/queries/0_stateless/04109_check_table_json_wide.sql diff --git a/src/Storages/MergeTree/MergeTreeDataPartWide.cpp b/src/Storages/MergeTree/MergeTreeDataPartWide.cpp index 915fc4250e69..44f62e46b48e 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartWide.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartWide.cpp @@ -345,31 +345,77 @@ void MergeTreeDataPartWide::doCheckConsistency(bool require_part_metadata) const { if (require_part_metadata) { - for (const auto & name_type : columns) + const auto & cols_substreams = getColumnsSubstreams(); + if (!cols_substreams.empty()) { - getSerialization(name_type.name)->enumerateStreams([&](const ISerialization::SubstreamPath & substream_path) + /// Use columns_substreams.txt which contains the exact list of substream + /// file names written at part creation time. This is more reliable than + /// enumerateStreams for types with complex serialization (e.g. JSON) + /// where enumerateStreams needs deserialization state to enumerate + /// the correct streams. + size_t col_idx = 0; + for (const auto & name_type : columns) { - /// Skip ephemeral subcolumns that don't store any real data. - if (ISerialization::isEphemeralSubcolumn(substream_path, substream_path.size())) - return; - - auto stream_name = getStreamNameForColumn(name_type, substream_path, DATA_FILE_EXTENSION, checksums, storage.getSettings()); - if (!stream_name) - throw Exception( - ErrorCodes::NO_FILE_IN_DATA_PART, - "No stream ({}{}) file checksum for column {} in part {}", - ISerialization::getFileNameForStream(name_type, substream_path, ISerialization::StreamFileNameSettings(*storage.getSettings())), - DATA_FILE_EXTENSION, - name_type.name, - getDataPartStorage().getFullPath()); - - auto mrk_file_name = *stream_name + marks_file_extension; - if (!checksums.files.contains(mrk_file_name)) - throw Exception( - ErrorCodes::NO_FILE_IN_DATA_PART, - "No {} file checksum for column {} in part {} ", - mrk_file_name, name_type.name, getDataPartStorage().getFullPath()); - }); + const auto & substreams = cols_substreams.getColumnSubstreams(col_idx); + for (const auto & substream_name : substreams) + { + auto bin_file_name = getStreamNameOrHash(substream_name, DATA_FILE_EXTENSION, checksums); + if (!bin_file_name) + throw Exception( + ErrorCodes::NO_FILE_IN_DATA_PART, + "No stream ({}{}) file checksum for column {} in part {}", + substream_name, + DATA_FILE_EXTENSION, + name_type.name, + getDataPartStorage().getFullPath()); + + auto mrk_file_name = *bin_file_name + marks_file_extension; + if (!checksums.files.contains(mrk_file_name)) + throw Exception( + ErrorCodes::NO_FILE_IN_DATA_PART, + "No {} file checksum for column {} in part {} ", + mrk_file_name, name_type.name, getDataPartStorage().getFullPath()); + } + ++col_idx; + } + } + else + { + /// Fallback for old parts without columns_substreams.txt. + /// Disable enumerate_dynamic_streams because without deserialization state + /// we don't know the correct serialization version for types like JSON, + /// and enumerating dynamic streams with wrong defaults would produce + /// incorrect stream names leading to false positive errors. + for (const auto & name_type : columns) + { + auto serialization = getSerialization(name_type.name); + ISerialization::EnumerateStreamsSettings settings; + settings.enumerate_dynamic_streams = false; + auto data = ISerialization::SubstreamData(serialization).withType(name_type.type).withColumn(name_type.type->createColumn()); + serialization->enumerateStreams(settings, [&](const ISerialization::SubstreamPath & substream_path) + { + /// Skip ephemeral subcolumns that don't store any real data. + if (ISerialization::isEphemeralSubcolumn(substream_path, substream_path.size())) + return; + + auto stream_name = getStreamNameForColumn(name_type, substream_path, DATA_FILE_EXTENSION, checksums, storage.getSettings()); + if (!stream_name) + throw Exception( + ErrorCodes::NO_FILE_IN_DATA_PART, + "No stream ({}{}) file checksum for column {} in part {}", + ISerialization::getFileNameForStream(name_type, substream_path, ISerialization::StreamFileNameSettings(*storage.getSettings())), + DATA_FILE_EXTENSION, + name_type.name, + getDataPartStorage().getFullPath()); + + auto mrk_file_name = *stream_name + marks_file_extension; + if (!checksums.files.contains(mrk_file_name)) + throw Exception( + ErrorCodes::NO_FILE_IN_DATA_PART, + "No {} file checksum for column {} in part {} ", + mrk_file_name, name_type.name, getDataPartStorage().getFullPath()); + }, data); + } } } } @@ -377,33 +423,76 @@ void MergeTreeDataPartWide::doCheckConsistency(bool require_part_metadata) const { /// Check that all marks are nonempty and have the same size. std::optional marks_size; - for (const auto & name_type : columns) + + const auto & cols_substreams = getColumnsSubstreams(); + if (!cols_substreams.empty()) { - getSerialization(name_type.name)->enumerateStreams([&](const ISerialization::SubstreamPath & substream_path) + for (size_t col_idx = 0; col_idx != columns.size(); ++col_idx) { - auto stream_name = getStreamNameForColumn(name_type, substream_path, marks_file_extension, getDataPartStorage(), storage.getSettings()); - - /// Missing file is Ok for case when new column was added. - if (!stream_name) - return; - - auto file_path = *stream_name + marks_file_extension; - UInt64 file_size = getDataPartStorage().getFileSize(file_path); - - if (!file_size) - throw Exception( - ErrorCodes::BAD_SIZE_OF_FILE_IN_DATA_PART, - "Part {} is broken: {} is empty.", - getDataPartStorage().getFullPath(), - std::string(fs::path(getDataPartStorage().getFullPath()) / file_path)); - - if (!marks_size) - marks_size = file_size; - else if (file_size != *marks_size) - throw Exception( - ErrorCodes::BAD_SIZE_OF_FILE_IN_DATA_PART, - "Part {} is broken: marks have different sizes.", getDataPartStorage().getFullPath()); - }); + const auto & substreams = cols_substreams.getColumnSubstreams(col_idx); + for (const auto & substream_name : substreams) + { + auto stream_name = getStreamNameOrHash(substream_name, marks_file_extension, getDataPartStorage()); + + /// Missing file is Ok for case when new column was added. + if (!stream_name) + continue; + + auto file_path = *stream_name + marks_file_extension; + UInt64 file_size = getDataPartStorage().getFileSize(file_path); + + if (!file_size) + throw Exception( + ErrorCodes::BAD_SIZE_OF_FILE_IN_DATA_PART, + "Part {} is broken: {} is empty.", + getDataPartStorage().getFullPath(), + std::string(fs::path(getDataPartStorage().getFullPath()) / file_path)); + + if (!marks_size) + marks_size = file_size; + else if (file_size != *marks_size) + throw Exception( + ErrorCodes::BAD_SIZE_OF_FILE_IN_DATA_PART, + "Part {} is broken: marks have different sizes.", getDataPartStorage().getFullPath()); + } + } + } + else + { + /// Fallback for old parts without columns_substreams.txt. + /// Disable enumerate_dynamic_streams (see comment above). + for (const auto & name_type : columns) + { + auto serialization = getSerialization(name_type.name); + ISerialization::EnumerateStreamsSettings settings; + settings.enumerate_dynamic_streams = false; + auto data = ISerialization::SubstreamData(serialization).withType(name_type.type).withColumn(name_type.type->createColumn()); + serialization->enumerateStreams(settings, [&](const ISerialization::SubstreamPath & substream_path) + { + auto stream_name = getStreamNameForColumn(name_type, substream_path, marks_file_extension, getDataPartStorage(), storage.getSettings()); + + /// Missing file is Ok for case when new column was added. + if (!stream_name) + return; + + auto file_path = *stream_name + marks_file_extension; + UInt64 file_size = getDataPartStorage().getFileSize(file_path); + + if (!file_size) + throw Exception( + ErrorCodes::BAD_SIZE_OF_FILE_IN_DATA_PART, + "Part {} is broken: {} is empty.", + getDataPartStorage().getFullPath(), + std::string(fs::path(getDataPartStorage().getFullPath()) / file_path)); + + if (!marks_size) + marks_size = file_size; + else if (file_size != *marks_size) + throw Exception( + ErrorCodes::BAD_SIZE_OF_FILE_IN_DATA_PART, + "Part {} is broken: marks have different sizes.", getDataPartStorage().getFullPath()); + }, data); + } } } } diff --git a/src/Storages/MergeTree/checkDataPart.cpp b/src/Storages/MergeTree/checkDataPart.cpp index e11fe4abcc8e..1e0ce8c8a09e 100644 --- a/src/Storages/MergeTree/checkDataPart.cpp +++ b/src/Storages/MergeTree/checkDataPart.cpp @@ -245,24 +245,65 @@ static IMergeTreeDataPart::Checksums checkDataPart( } else if (part_type == MergeTreeDataPartType::Wide) { - for (const auto & column : columns_list) + const auto & columns_substreams = data_part->getColumnsSubstreams(); + if (!columns_substreams.empty()) { - get_serialization(column)->enumerateStreams([&](const ISerialization::SubstreamPath & substream_path) + /// Use columns_substreams.txt which contains the exact list of substream + /// file names written at part creation time. This is more reliable than + /// enumerateStreams for types with complex serialization (e.g. JSON) + /// where enumerateStreams needs deserialization state to enumerate + /// the correct streams. + size_t col_idx = 0; + for (const auto & column : columns_list) { - /// Skip ephemeral subcolumns that don't store any real data. - if (ISerialization::isEphemeralSubcolumn(substream_path, substream_path.size())) - return; - - auto stream_name = IMergeTreeDataPart::getStreamNameForColumn(column, substream_path, ".bin", data_part_storage, data_part->storage.getSettings()); - - if (!stream_name) - throw Exception(ErrorCodes::NO_FILE_IN_DATA_PART, - "There is no file for column '{}' in data part '{}'", - column.name, data_part->name); - - auto file_name = *stream_name + ".bin"; - checksums_data.files[file_name] = checksum_compressed_file(data_part_storage, file_name); - }, column.type, data_part->getColumnSample(column)); + const auto & substreams = columns_substreams.getColumnSubstreams(col_idx); + for (const auto & substream_name : substreams) + { + auto stream_name = IMergeTreeDataPart::getStreamNameOrHash(substream_name, ".bin", data_part_storage); + + if (!stream_name) + throw Exception(ErrorCodes::NO_FILE_IN_DATA_PART, + "There is no file for column '{}' (substream '{}') in data part '{}'", + column.name, substream_name, data_part->name); + + auto file_name = *stream_name + ".bin"; + checksums_data.files[file_name] = checksum_compressed_file(data_part_storage, file_name); + } + ++col_idx; + } + } + else + { + /// Fallback for old parts without columns_substreams.txt. + /// Disable enumerate_dynamic_streams because without deserialization state + /// we don't know the correct dynamic structure and serialization version for types like JSON, + /// and enumerating dynamic streams with wrong defaults would produce + /// incorrect stream names leading to false positive errors. + /// The files for dynamic streams will still be checked against checksums.txt + /// by the subsequent iteration over all files in the part directory. + for (const auto & column : columns_list) + { + auto serialization = get_serialization(column); + ISerialization::EnumerateStreamsSettings settings; + settings.enumerate_dynamic_streams = false; + auto data = ISerialization::SubstreamData(serialization).withType(column.type).withColumn(column.type->createColumn()); + serialization->enumerateStreams(settings, [&](const ISerialization::SubstreamPath & substream_path) + { + /// Skip ephemeral subcolumns that don't store any real data. + if (ISerialization::isEphemeralSubcolumn(substream_path, substream_path.size())) + return; + + auto stream_name = IMergeTreeDataPart::getStreamNameForColumn(column, substream_path, ".bin", data_part_storage, data_part->storage.getSettings()); + + if (!stream_name) + throw Exception(ErrorCodes::NO_FILE_IN_DATA_PART, + "There is no file for column '{}' in data part '{}'", + column.name, data_part->name); + + auto file_name = *stream_name + ".bin"; + checksums_data.files[file_name] = checksum_compressed_file(data_part_storage, file_name); + }, data); + } } } else diff --git a/tests/queries/0_stateless/04109_check_table_json_wide.reference b/tests/queries/0_stateless/04109_check_table_json_wide.reference new file mode 100644 index 000000000000..5439f6a8092b --- /dev/null +++ b/tests/queries/0_stateless/04109_check_table_json_wide.reference @@ -0,0 +1,8 @@ +1 +1 +1 1 +2 2 +3 3 +1 1 +2 2 +3 3 diff --git a/tests/queries/0_stateless/04109_check_table_json_wide.sql b/tests/queries/0_stateless/04109_check_table_json_wide.sql new file mode 100644 index 000000000000..1abd7094d17f --- /dev/null +++ b/tests/queries/0_stateless/04109_check_table_json_wide.sql @@ -0,0 +1,27 @@ +-- Tags: no-fasttest + +DROP TABLE IF EXISTS test_check_json_wide; + +CREATE TABLE test_check_json_wide (id UInt64, data JSON) +ENGINE = MergeTree ORDER BY id +SETTINGS min_rows_for_wide_part=1, min_bytes_for_wide_part=1; + +INSERT INTO test_check_json_wide VALUES (1, '{"a": 1, "b": "hello"}'); +INSERT INTO test_check_json_wide VALUES (2, '{"a": 2, "c": [1, 2, 3]}'); +INSERT INTO test_check_json_wide VALUES (3, '{"a": 3, "b": "world", "d": [{"nested": true}]}'); + +CHECK TABLE test_check_json_wide SETTINGS check_query_single_value_result = 1; + +OPTIMIZE TABLE test_check_json_wide FINAL; + +CHECK TABLE test_check_json_wide SETTINGS check_query_single_value_result = 1; + +SELECT id, data.a FROM test_check_json_wide ORDER BY id; + +-- Test that DETACH/ATTACH works (checkConsistency is called during attach). +ALTER TABLE test_check_json_wide DETACH PARTITION tuple(); +ALTER TABLE test_check_json_wide ATTACH PARTITION tuple(); + +SELECT id, data.a FROM test_check_json_wide ORDER BY id; + +DROP TABLE test_check_json_wide; From be638bc970e81f3d1064585d10da509af0197794 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Mon, 4 May 2026 16:49:12 +0000 Subject: [PATCH 03/41] Backport #100758 to 26.3: Erase sorted_dynamic_paths entries before dynamic_paths ones --- src/Columns/ColumnObject.cpp | 2 + src/Columns/tests/gtest_column_object.cpp | 47 +++++++++++++++++++++++ 2 files changed, 49 insertions(+) diff --git a/src/Columns/ColumnObject.cpp b/src/Columns/ColumnObject.cpp index 4312dd65a674..df8aea6251e8 100644 --- a/src/Columns/ColumnObject.cpp +++ b/src/Columns/ColumnObject.cpp @@ -545,6 +545,7 @@ bool ColumnObject::tryInsert(const Field & x) for (const auto & path : new_dynamic_paths) { dynamic_paths_ptrs.erase(path); + sorted_dynamic_paths.erase(path); dynamic_paths.erase(path); } @@ -580,6 +581,7 @@ bool ColumnObject::tryInsert(const Field & x) } else if (auto * dynamic_path_column = tryToAddNewDynamicPath(path)) { + new_dynamic_paths.insert(String(path)); if (!dynamic_path_column->tryInsert(value_field)) { restore_sizes(); diff --git a/src/Columns/tests/gtest_column_object.cpp b/src/Columns/tests/gtest_column_object.cpp index fd6adb05f52d..2a191f5a79af 100644 --- a/src/Columns/tests/gtest_column_object.cpp +++ b/src/Columns/tests/gtest_column_object.cpp @@ -444,3 +444,50 @@ TEST(ColumnObject, RepairDuplicatesInDynamicPathsAndSharedData) ASSERT_EQ((*column_object)[2], (Object{{"b", Field(1u)}, {"c", Field(1u)}, {"d", Field(1u)}})); ASSERT_EQ((*column_object)[3], (Object{{"d", Field(1u)}})); } + +TEST(ColumnObject, TryInsertRestoresSortedDynamicPaths) +{ + /// "b" is a typed UInt32 path; everything else becomes a dynamic path. + auto type = DataTypeFactory::instance().get("JSON(max_dynamic_types=10, max_dynamic_paths=10, b UInt32)"); + auto col = type->createColumn(); + auto & col_object = assert_cast(*col); + const auto & dynamic_paths = col_object.getDynamicPaths(); + + /// One valid row so we have something to serialize later. + col_object.insert(Object{{"b", Field{5u}}}); + ASSERT_EQ(col_object.size(), 1u); + ASSERT_EQ(dynamic_paths.size(), 0u); + + /// tryInsert with Object{"a_new": 1, "b": "not_a_number"}. + /// Fields are processed in alphabetical order ("a_new" before "b"), so: + /// 1. "a_new" is new → tryToAddNewDynamicPath succeeds, "a_new" is added to all + /// three structures including sorted_dynamic_paths. + /// 2. ColumnDynamic::tryInsert(1u) for "a_new" succeeds. + /// 3. ColumnUInt32::tryInsert(String) for "b" returns false. + /// 4. restore_sizes() is called. + /// BUG 1: new_dynamic_paths was never populated, so the loop that removes + /// newly-added paths is a no-op — "a_new" is left in dynamic_paths, + /// dynamic_paths_ptrs, and sorted_dynamic_paths with a rolled-back size. + /// BUG 2: even after populating new_dynamic_paths, the old code erased from + /// dynamic_paths before sorted_dynamic_paths, leaving a dangling view. + /// FIX: record the path in new_dynamic_paths immediately after + /// tryToAddNewDynamicPath succeeds, and erase sorted_dynamic_paths + /// before dynamic_paths. + bool result = col_object.tryInsert(Object{{"a_new", Field{1u}}, {"b", Field{String("not_a_number")}}}); + ASSERT_FALSE(result); + + ASSERT_EQ(col_object.size(), 1u); + ASSERT_EQ(dynamic_paths.size(), 0u); /// "a_new" must be fully rolled back + + /// serializeValueIntoArena iterates sorted_dynamic_paths. + /// Without the fix the stale "a_new" entry causes undefined behavior here (see comment above). + Arena arena; + const char * begin = nullptr; + auto ref = col_object.serializeValueIntoArena(0, arena, begin, nullptr); + + /// Round-trip sanity check. + ReadBufferFromMemory buf(ref.data(), ref.size()); + col_object.deserializeAndInsertFromArena(buf, nullptr); + ASSERT_EQ(col_object.size(), 2u); + ASSERT_EQ(col_object[1], col_object[0]); +} From 67caa16abb3485be1d5d4a4d4e23efb937348c7e Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Mon, 4 May 2026 17:47:03 +0000 Subject: [PATCH 04/41] Backport #102884 to 26.3: Optimization for deferring row policy and PREWHERE --- .../QueryPlan/ReadFromMergeTree.cpp | 30 ++++- ...742_apply_row_policy_after_final.reference | 12 +- .../03742_apply_row_policy_after_final.sql | 33 ++++- ...ly_row_policy_after_final_safety.reference | 21 +++ ...43_apply_row_policy_after_final_safety.sql | 120 ++++++++++++++++++ 5 files changed, 207 insertions(+), 9 deletions(-) create mode 100644 tests/queries/0_stateless/03743_apply_row_policy_after_final_safety.reference create mode 100644 tests/queries/0_stateless/03743_apply_row_policy_after_final_safety.sql diff --git a/src/Processors/QueryPlan/ReadFromMergeTree.cpp b/src/Processors/QueryPlan/ReadFromMergeTree.cpp index c4131f6f2d95..3e8c9bbbf5a2 100644 --- a/src/Processors/QueryPlan/ReadFromMergeTree.cpp +++ b/src/Processors/QueryPlan/ReadFromMergeTree.cpp @@ -97,7 +97,8 @@ size_t countPartitions(const RangesInDataParts & parts_with_ranges) return countPartitions(parts_with_ranges, get_partition_id); } -/// check if a DAG node only depends on sorting key columns (ActionsDAG version of isExpressionOverSortingKey) +/// check if a DAG node only depends on sorting key columns +/// (ActionsDAG version of isExpressionOverSortingKey) bool isNodeOverSortingKey(const ActionsDAG::Node * node, const NameSet & sorting_key_set) { if (sorting_key_set.contains(node->result_name)) @@ -112,6 +113,17 @@ bool isNodeOverSortingKey(const ActionsDAG::Node * node, const NameSet & sorting return true; } +bool isNodeDeterministic(const ActionsDAG::Node * node) +{ + if (node->type == ActionsDAG::ActionType::FUNCTION + && node->function_base && !node->function_base->isDeterministic()) + return false; + for (const auto * child : node->children) + if (!isNodeDeterministic(child)) + return false; + return true; +} + bool restoreDAGInputs(ActionsDAG & dag, const NameSet & inputs) { std::unordered_set outputs(dag.getOutputs().begin(), dag.getOutputs().end()); @@ -2140,15 +2152,25 @@ void ReadFromMergeTree::deferFiltersAfterFinalIfNeeded() bool defer_row_policy = settings[Setting::apply_row_policy_after_final] && query_info.row_level_filter; bool defer_prewhere = settings[Setting::apply_prewhere_after_final] && query_info.prewhere_info; - /// If row policy touches non-sorting-key columns, prewhere must be deferred too - if (defer_row_policy && query_info.prewhere_info) + if (defer_row_policy) { const auto & sorting_key_columns = storage_snapshot->metadata->getSortingKeyColumns(); NameSet sorting_key_set(sorting_key_columns.begin(), sorting_key_columns.end()); const auto * filter_output = &query_info.row_level_filter->actions.findInOutputs( query_info.row_level_filter->column_name); - if (!isNodeOverSortingKey(filter_output, sorting_key_set)) + + /// Safe to apply before FINAL only if the policy is SK-only (verdict + /// is the same for every row of a dedup group) and deterministic + /// (no `rand`/`now` flipping the winner) + bool row_policy_over_sk = + isNodeOverSortingKey(filter_output, sorting_key_set) + && isNodeDeterministic(filter_output); + + if (row_policy_over_sk) + defer_row_policy = false; + + if (!row_policy_over_sk && query_info.prewhere_info) defer_prewhere = true; } diff --git a/tests/queries/0_stateless/03742_apply_row_policy_after_final.reference b/tests/queries/0_stateless/03742_apply_row_policy_after_final.reference index 11f3630e3ab0..d540daaf6b39 100644 --- a/tests/queries/0_stateless/03742_apply_row_policy_after_final.reference +++ b/tests/queries/0_stateless/03742_apply_row_policy_after_final.reference @@ -57,9 +57,7 @@ 2 bbb 1 = row policy on toDate(time) with ORDER BY toDate(time) — prewhere should NOT be deferred = ---- toDate(time) row policy: only row filter deferred, not prewhere - Deferred filters (applied after FINAL) - Deferred row level filter column: equals(toDate(time), \'2024-01-01\'_String) +--- toDate(time) row policy: neither row policy nor prewhere deferred = compound row policy: sorting-key atom should be used for index analysis = --- FINAL: x>1 atom should still participate in primary key analysis 2 eee 2 @@ -141,3 +139,11 @@ Expression (Project names) --- EXPLAIN actions: prewhere should be deferred Deferred filters (applied after FINAL) Deferred prewhere filter column: and(and(notEquals(y, \'eee\'_String), greater(x, 1_UInt8)), less(x, 5_UInt8)) + += row policy on non-SK column + PREWHERE on SK column: both must be deferred = +--- data correctness: PREWHERE x = 2 with row policy deleted = 0 +2 eee 0 2 +--- EXPLAIN: both row policy and PREWHERE deferred + Deferred filters (applied after FINAL) + Deferred row level filter column: equals(deleted, 0_UInt8) + Deferred prewhere filter column: equals(x, 2_UInt8) diff --git a/tests/queries/0_stateless/03742_apply_row_policy_after_final.sql b/tests/queries/0_stateless/03742_apply_row_policy_after_final.sql index dc2c3e339c0d..c1b226c03a04 100644 --- a/tests/queries/0_stateless/03742_apply_row_policy_after_final.sql +++ b/tests/queries/0_stateless/03742_apply_row_policy_after_final.sql @@ -213,8 +213,8 @@ INSERT INTO tab_todate_policy VALUES ('2024-01-01 11:00:00', 'ccc', 2), ('2024-0 CREATE ROW POLICY pol_todate ON tab_todate_policy USING toDate(time) = '2024-01-01' TO ALL; SET apply_row_policy_after_final = 1; --- rp is over sorting key toDate(time), so only row policy itself should be deferred, not prewhere -SELECT '--- toDate(time) row policy: only row filter deferred, not prewhere'; +-- rp is over sorting key toDate(time), so neither row policy nor prewhere should be deferred +SELECT '--- toDate(time) row policy: neither row policy nor prewhere deferred'; SELECT explain FROM (EXPLAIN actions=1 SELECT * FROM tab_todate_policy FINAL PREWHERE y != 'ddd' ORDER BY time) WHERE explain LIKE '%Deferred%' SETTINGS enable_analyzer=1; DROP ROW POLICY pol_todate ON tab_todate_policy; @@ -322,3 +322,32 @@ SELECT explain FROM (EXPLAIN actions=1 SELECT * FROM tab_nested_and_pw FINAL PRE SET apply_prewhere_after_final = 0; DROP TABLE tab_nested_and_pw; + +SELECT ''; +SELECT '= row policy on non-SK column + PREWHERE on SK column: both must be deferred ='; + +DROP TABLE IF EXISTS tab_sk_prewhere; +DROP ROW POLICY IF EXISTS pol_sk_pw ON tab_sk_prewhere; + +CREATE TABLE tab_sk_prewhere (x UInt32, y String, deleted Int8, version UInt32) +ENGINE = ReplacingMergeTree(version) ORDER BY x; + +INSERT INTO tab_sk_prewhere VALUES (1, 'aaa', 0, 1), (2, 'bbb', 0, 1), (3, 'ccc', 1, 1); +INSERT INTO tab_sk_prewhere VALUES (1, 'ddd', 1, 2), (2, 'eee', 0, 2); + +CREATE ROW POLICY pol_sk_pw ON tab_sk_prewhere USING deleted = 0 TO ALL; + +SET apply_row_policy_after_final = 1; + +SELECT '--- data correctness: PREWHERE x = 2 with row policy deleted = 0'; +-- After FINAL: (1,'ddd',1,2), (2,'eee',0,2), (3,'ccc',1,1) +-- Row policy deleted=0: (2,'eee',0,2) +-- PREWHERE x=2: (2,'eee',0,2) +SELECT * FROM tab_sk_prewhere FINAL PREWHERE x = 2 ORDER BY x; + +SELECT '--- EXPLAIN: both row policy and PREWHERE deferred'; +SELECT explain FROM (EXPLAIN actions=1 SELECT * FROM tab_sk_prewhere FINAL PREWHERE x = 2 ORDER BY x) WHERE explain LIKE '%Deferred%' SETTINGS enable_analyzer=1; + +DROP ROW POLICY pol_sk_pw ON tab_sk_prewhere; +SET apply_row_policy_after_final = 0; +DROP TABLE tab_sk_prewhere; diff --git a/tests/queries/0_stateless/03743_apply_row_policy_after_final_safety.reference b/tests/queries/0_stateless/03743_apply_row_policy_after_final_safety.reference new file mode 100644 index 000000000000..0dd03dae2e8b --- /dev/null +++ b/tests/queries/0_stateless/03743_apply_row_policy_after_final_safety.reference @@ -0,0 +1,21 @@ += non-deterministic row policy that structurally only uses SK columns must remain deferred = +--- non-deterministic row policy stays deferred (count of "Deferred row level filter" lines) +1 + += row policy over SK column without PREWHERE: row policy itself should not be deferred = +--- data correctness +2 eee 2 +3 ccc 1 +--- no Deferred filters expected (count of any "Deferred" lines) +0 + += regression: PREWHERE on SK column with non-SK row policy must remain deferred (no side-channel leak) = +--- throwIf in PREWHERE must not observe hidden rows +1 public + +--- EXPLAIN actions=1, apply_row_policy_after_final=1: deferred filter lines + Deferred filters (applied after FINAL) + Deferred row level filter column: equals(deleted, 0_UInt8) + Deferred prewhere filter column: equals(key, 12345_UInt16) +--- EXPLAIN actions=1, apply_row_policy_after_final=0: no deferral expected +0 diff --git a/tests/queries/0_stateless/03743_apply_row_policy_after_final_safety.sql b/tests/queries/0_stateless/03743_apply_row_policy_after_final_safety.sql new file mode 100644 index 000000000000..8167fd2b61a6 --- /dev/null +++ b/tests/queries/0_stateless/03743_apply_row_policy_after_final_safety.sql @@ -0,0 +1,120 @@ +-- Edge cases for row-policy / PREWHERE deferral with FINAL. Three things to guard: +-- 1. SK-only-but-non-deterministic row policy must stay deferred +-- 2. SK-only deterministic row policy without PREWHERE skips deferral +-- 3. Non-SK row policy keeps PREWHERE deferred even when PREWHERE is +-- itself SK-only — otherwise `throwIf` leak hidden rows + +SET enable_analyzer = 1; + +SELECT '= non-deterministic row policy that structurally only uses SK columns must remain deferred ='; + +DROP TABLE IF EXISTS tab_nondet_policy; +DROP ROW POLICY IF EXISTS pol_nondet ON tab_nondet_policy; + +CREATE TABLE tab_nondet_policy (x UInt32, y String, version UInt32) +ENGINE = ReplacingMergeTree(version) ORDER BY x; + +INSERT INTO tab_nondet_policy VALUES (1, 'aaa', 1), (2, 'bbb', 1); + +-- Reads only `x` (SK) but contains `rand` — two evaluations can disagree, +-- so it must stay deferred. +CREATE ROW POLICY pol_nondet ON tab_nondet_policy USING (rand() % (x + 1)) = 0 TO ALL; + +SET apply_row_policy_after_final = 1; + +SELECT '--- non-deterministic row policy stays deferred (count of "Deferred row level filter" lines)'; +SELECT count() +FROM (EXPLAIN actions = 1 SELECT * FROM tab_nondet_policy FINAL ORDER BY x) +WHERE explain LIKE '%Deferred row level filter%' +SETTINGS enable_analyzer = 1; + +DROP ROW POLICY pol_nondet ON tab_nondet_policy; +SET apply_row_policy_after_final = 0; +DROP TABLE tab_nondet_policy; + +SELECT ''; +SELECT '= row policy over SK column without PREWHERE: row policy itself should not be deferred ='; + +DROP TABLE IF EXISTS tab_sk_no_pw; +DROP ROW POLICY IF EXISTS pol_sk_no_pw ON tab_sk_no_pw; + +CREATE TABLE tab_sk_no_pw (x UInt32, y String, version UInt32) +ENGINE = ReplacingMergeTree(version) ORDER BY x; + +INSERT INTO tab_sk_no_pw VALUES (1, 'aaa', 1), (2, 'bbb', 1), (3, 'ccc', 1); +INSERT INTO tab_sk_no_pw VALUES (1, 'ddd', 2), (2, 'eee', 2); + +-- SK-only and deterministic — safe before FINAL, even without PREWHERE. +CREATE ROW POLICY pol_sk_no_pw ON tab_sk_no_pw USING x > 1 TO ALL; + +SET apply_row_policy_after_final = 1; + +SELECT '--- data correctness'; +-- FINAL groups: (1, 'ddd', 2), (2, 'eee', 2), (3, 'ccc', 1) +-- row policy x > 1 keeps: (2, 'eee', 2), (3, 'ccc', 1) +SELECT * FROM tab_sk_no_pw FINAL ORDER BY x; + +SELECT '--- no Deferred filters expected (count of any "Deferred" lines)'; +SELECT count() +FROM (EXPLAIN actions = 1 SELECT * FROM tab_sk_no_pw FINAL ORDER BY x) +WHERE explain LIKE '%Deferred%' +SETTINGS enable_analyzer = 1; + +DROP ROW POLICY pol_sk_no_pw ON tab_sk_no_pw; +SET apply_row_policy_after_final = 0; +DROP TABLE tab_sk_no_pw; + +SELECT ''; +SELECT '= regression: PREWHERE on SK column with non-SK row policy must remain deferred (no side-channel leak) ='; + +DROP TABLE IF EXISTS tab_sidechannel; +DROP ROW POLICY IF EXISTS pol_sidechannel ON tab_sidechannel; + +CREATE TABLE tab_sidechannel (id UInt32, secret String, version UInt32) +ENGINE = ReplacingMergeTree(version) ORDER BY id; + +-- id = 1 is visible; id = 2 is hidden by the row policy on `secret`. +INSERT INTO tab_sidechannel VALUES (1, 'public', 1), (2, 'private', 1); + +CREATE ROW POLICY pol_sidechannel ON tab_sidechannel USING secret = 'public' TO ALL; + +SET apply_row_policy_after_final = 1; + +-- If PREWHERE ran before the row policy, `throwIf(id = 2)` would fire and +-- leak the hidden row's existence. +SELECT '--- throwIf in PREWHERE must not observe hidden rows'; +SELECT id, secret FROM tab_sidechannel FINAL PREWHERE throwIf(id = 2, 'leaked existence of hidden row') = 0 ORDER BY id; + +DROP ROW POLICY pol_sidechannel ON tab_sidechannel; +SET apply_row_policy_after_final = 0; +DROP TABLE tab_sidechannel; + +SELECT ''; + +DROP TABLE IF EXISTS repro; +DROP ROW POLICY IF EXISTS repro_policy ON repro; + +CREATE TABLE repro (id Int64, key Int64, ts DateTime64(6), data String, deleted Int8, ver Int64) +ENGINE = ReplacingMergeTree(ver) ORDER BY (key, id); + +CREATE ROW POLICY repro_policy ON repro AS restrictive FOR SELECT USING deleted = 0 TO ALL; + +INSERT INTO repro +SELECT number, rand64() % 25000, toDateTime64('2024-01-01', 6) + number, repeat('x', 200), 0, 1 +FROM numbers(1000); + +-- Row policy is on `deleted` (non-SK), PREWHERE is on `key` (part of SK `(key, id)`) +-- Both filters must be deferred — earlier revisions of this PR let PREWHERE +-- skip deferral here, which is the side channel `throwIf` would exploit +SELECT '--- EXPLAIN actions=1, apply_row_policy_after_final=1: deferred filter lines'; +SELECT explain +FROM (EXPLAIN actions=1 SELECT count() FROM repro FINAL PREWHERE key = 12345 SETTINGS apply_row_policy_after_final=1) +WHERE explain LIKE '%Deferred%'; + +SELECT '--- EXPLAIN actions=1, apply_row_policy_after_final=0: no deferral expected'; +SELECT count() +FROM (EXPLAIN actions=1 SELECT count() FROM repro FINAL PREWHERE key = 12345 SETTINGS apply_row_policy_after_final=0) +WHERE explain LIKE '%Deferred%'; + +DROP ROW POLICY repro_policy ON repro; +DROP TABLE repro; From 48492923833eb0c8b2df05b6eb89c5fc8858ee7b Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Thu, 7 May 2026 11:30:46 +0000 Subject: [PATCH 05/41] Update autogenerated version to 26.3.10.62 and contributors --- cmake/autogenerated_versions.txt | 10 +++++----- .../System/StorageSystemContributors.generated.cpp | 1 + 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/cmake/autogenerated_versions.txt b/cmake/autogenerated_versions.txt index d2edda671281..022b3cbb03c3 100644 --- a/cmake/autogenerated_versions.txt +++ b/cmake/autogenerated_versions.txt @@ -2,11 +2,11 @@ # NOTE: VERSION_REVISION has nothing common with DBMS_TCP_PROTOCOL_VERSION, # only DBMS_TCP_PROTOCOL_VERSION should be incremented on protocol changes. -SET(VERSION_REVISION 54517) +SET(VERSION_REVISION 54518) SET(VERSION_MAJOR 26) SET(VERSION_MINOR 3) -SET(VERSION_PATCH 10) -SET(VERSION_GITHASH 0d82c1998e3b4d54e110dd8a77a64d247c4bff4a) -SET(VERSION_DESCRIBE v26.3.10.1-lts) -SET(VERSION_STRING 26.3.10.1) +SET(VERSION_PATCH 11) +SET(VERSION_GITHASH e1c11930c28196f954a93287e43c1aa112c8c607) +SET(VERSION_DESCRIBE v26.3.11.1-lts) +SET(VERSION_STRING 26.3.11.1) # end of autochange diff --git a/src/Storages/System/StorageSystemContributors.generated.cpp b/src/Storages/System/StorageSystemContributors.generated.cpp index d7acf4215f8c..05c0a60f9d59 100644 --- a/src/Storages/System/StorageSystemContributors.generated.cpp +++ b/src/Storages/System/StorageSystemContributors.generated.cpp @@ -1204,6 +1204,7 @@ const char * auto_contributors[] { "Rafael David Tinoco", "Rafael Roquetto", "Rahul", + "Rahul Nair", "Rajakavitha Kodhandapani", "Rajkumar", "Rajkumar Varada", From 1592c7402e3aa20b34e08b196e18b88ea919aad0 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Fri, 8 May 2026 07:48:12 +0000 Subject: [PATCH 06/41] Backport #104133 to 26.3: Fix numerous semantic inconsistency with `optimize_inverse_dictionary_lookup` --- .../Passes/InverseDictionaryLookupPass.cpp | 149 +++++- ..._inverse_dictionary_lookup_basic.reference | 148 ++++-- ...timize_inverse_dictionary_lookup_basic.sql | 171 ++++++- ...ary_lookup_composite_and_layouts.reference | 22 + ...ictionary_lookup_composite_and_layouts.sql | 45 +- ...dictionary_lookup_dictget_family.reference | 50 +- ...verse_dictionary_lookup_dictget_family.sql | 121 +++-- ...ookup_setting_rewrite_in_to_join.reference | 3 + ...nary_lookup_setting_rewrite_in_to_join.sql | 9 +- ...nverse_dictionary_lookup_remote_shards.sql | 3 +- ...rse_dictionary_lookup_edge_cases.reference | 344 +++++++++++++ ...1_inverse_dictionary_lookup_edge_cases.sql | 475 ++++++++++++++++++ ...ary_lookup_pruning_key_condition.reference | 63 +++ ...ictionary_lookup_pruning_key_condition.sql | 49 ++ 14 files changed, 1515 insertions(+), 137 deletions(-) create mode 100644 tests/queries/0_stateless/04201_inverse_dictionary_lookup_edge_cases.reference create mode 100644 tests/queries/0_stateless/04201_inverse_dictionary_lookup_edge_cases.sql create mode 100644 tests/queries/0_stateless/04202_inverse_dictionary_lookup_pruning_key_condition.reference create mode 100644 tests/queries/0_stateless/04202_inverse_dictionary_lookup_pruning_key_condition.sql diff --git a/src/Analyzer/Passes/InverseDictionaryLookupPass.cpp b/src/Analyzer/Passes/InverseDictionaryLookupPass.cpp index 6434fa6b5f78..85fc0ce4b10d 100644 --- a/src/Analyzer/Passes/InverseDictionaryLookupPass.cpp +++ b/src/Analyzer/Passes/InverseDictionaryLookupPass.cpp @@ -13,6 +13,11 @@ #include +#include +#include +#include + +#include #include #include @@ -26,6 +31,9 @@ namespace DB namespace Setting { +extern const SettingsUInt64 max_bytes_in_set; +extern const SettingsUInt64 max_rows_in_set; +extern const SettingsOverflowMode set_overflow_mode; extern const SettingsBool optimize_inverse_dictionary_lookup; extern const SettingsBool rewrite_in_to_join; } @@ -68,8 +76,7 @@ bool isSupportedDictGetFunction(const String & name) "dictGetDateTime", "dictGetUUID", "dictGetIPv4", - "dictGetIPv6", - "dictGetOrNull"}; + "dictGetIPv6"}; return supported_functions.contains(name); } @@ -127,6 +134,98 @@ void resolveNode(const Node & node, const ContextPtr & context) QueryAnalysisPass(/*only_analyze*/ false).run(querytree_node, context); } +bool hasNullableComponentInComplexKey(const QueryTreeNodePtr & key_expr_node) +{ + auto type = removeNullable(key_expr_node->getResultType()); + const auto * tuple_type = typeid_cast(type.get()); + if (!tuple_type) + return false; + + for (const auto & element : tuple_type->getElements()) + { + if (isNullableOrLowCardinalityNullable(element)) + return true; + } + return false; +} + +bool isRewriteSemanticallySafe( + const DataTypePtr & dict_attr_type, + const DataTypePtr & dictget_result_type, + const Field & attr_null_value, + const ConstantNode & const_arg_node, + bool default_is_lhs, + const String & attr_comparison_function_name, + const ContextPtr & context) +{ + /// Same underlying type after stripping `Nullable` / `LowCardinality` needed. If attribute `n` + /// is `UInt32`, `dictGetUInt16(..., 'n', id) = 42` throws because the underlying types differ (`UInt32` vs `UInt16`) + const bool stripped_types_match + = removeLowCardinalityAndNullable(dict_attr_type)->equals(*removeLowCardinalityAndNullable(dictget_result_type)); + if (!stripped_types_match) + return false; + + /// `dictGet` and `IN` don't have the same stored-NULL attribute semantics. + /// Example: if dictionary has `id = 1, name = NULL`, `dictGet(..., 1) = 'x'` gives + /// `NULL`. The `IN` rewrite uses `WHERE name = 'x'`, so the row is filtered out and + /// `1 IN (...)` gives `0`. This is visible in projection or `isNull(predicate)`. + /// Skip optimization when the attribute can contain `NULL`, including + /// `LowCardinality(Nullable(...))`. + if (isNullableOrLowCardinalityNullable(dict_attr_type)) + return false; + + const DataTypePtr const_arg_type = const_arg_node.getResultType(); + const Field & const_arg_value = const_arg_node.getValue(); + + auto default_column = ColumnWithTypeAndName(dict_attr_type->createColumnConst(1, attr_null_value), dict_attr_type, "default_value"); + auto const_arg_column = ColumnWithTypeAndName(const_arg_type->createColumnConst(1, const_arg_value), const_arg_type, "const_value"); + + ColumnsWithTypeAndName comparison_arguments; + if (default_is_lhs) + comparison_arguments = {std::move(default_column), std::move(const_arg_column)}; + else + comparison_arguments = {std::move(const_arg_column), std::move(default_column)}; + + /// `dictGet` and `IN` don't have the same missing-key default semantics. + /// e.g: `dictGet(..., id) = ''` vs `id IN (SELECT id FROM dictionary(...) WHERE name = '')` + /// Example: if dictionary has one row `id = 1, name = 'x'`, data has `id = 2`, and + /// attribute `DEFAULT` is `''`, `dictGet(..., 2)` returns `''`, so + /// `dictGet(..., id) = ''` is true for `id = 2`. The `IN` rewrite scans only + /// dictionary keys, so the subquery has no `id = 2` and `2 IN (...)` is false. + /// + /// One of the alternatives is to add `OR id NOT IN (SELECT id FROM dictionary(...))` when the + /// predicate is true for `DEFAULT`, but it requires another set with all dictionary keys. + /// This can be expensive to materialize, so skip optimization for such case. + /// + /// As a result, given the current rewrite, if `const DEFAULT` is false, only then the + /// transformation is semantically correct. + Field comparison_result; + try + { + auto function_resolver = FunctionFactory::instance().get(attr_comparison_function_name, context); + auto comparison_function_base = function_resolver->build(comparison_arguments); + auto comparison_result_column + = comparison_function_base->execute(comparison_arguments, comparison_function_base->getResultType(), 1, /* dry_run = */ false); + comparison_result = (*comparison_result_column)[0]; + } + catch (const Exception &) + { + /// The constant fold runs during optimization and can throw for values that runtime + /// would not evaluate. Example: `match('', '(')` throws `CANNOT_COMPILE_REGEXP`, but + /// `id < 0 AND match(dictGetString(...), '(')` can skip the `match` branch due to + /// short-circuit evaluation. If we throw here, the optimization breaks a query that + /// works without it. Skip optimization for such case. + return false; + } + + if (comparison_result.isNull()) + return false; + + /// Check `const DEFAULT` is false + UInt64 comparison_result_uint = 0; + return comparison_result.tryGet(comparison_result_uint) && comparison_result_uint == 0; +} + class InverseDictionaryLookupVisitor : public InDepthQueryTreeVisitorWithContext { @@ -142,6 +241,14 @@ class InverseDictionaryLookupVisitor : public InDepthQueryTreeVisitorWithContext if (getSettings()[Setting::rewrite_in_to_join]) return; + /// We build an `IN` set from the dictionary subquery, which respects `max_rows_in_set`, + /// `max_bytes_in_set` and `set_overflow_mode`. With `set_overflow_mode = 'break'`, the set + /// can be truncated and not contain all required elements, so the optimization can produce + /// wrong results. Skip optimization for such case. + if ((getSettings()[Setting::max_rows_in_set] != 0 || getSettings()[Setting::max_bytes_in_set] != 0) + && getSettings()[Setting::set_overflow_mode] == OverflowMode::BREAK) + return; + auto * node_function = node->as(); if (!node_function) @@ -226,12 +333,37 @@ class InverseDictionaryLookupVisitor : public InDepthQueryTreeVisitorWithContext return; } + /// For complex-key dictionaries, `dictGet` and `IN` don't have the same `NULL` key semantics. + /// e.g: `dictGet(..., (k1, k2))` vs `(k1, k2) IN (SELECT k1, k2 FROM dictionary(...))` + /// Example: if `k1` is `Nullable(UInt64)` and the dictionary has `(NULL, 'a')`, + /// `dictGet(..., (k1, k2))` can match it, but `(NULL, 'a') IN (...)` is not a match + /// with `transform_null_in = 0`. + /// Single-key dictionaries are not affected. Example: if `id` is `Nullable(UInt64)`, + /// `dictGet(..., id) = 'x'` gives `NULL` for `id = NULL`, and `id IN (...)` also gives + /// `NULL` for `id = NULL`. + if (dict_structure.key && hasNullableComponentInComplexKey(dictget_function_info.key_expr_node)) + return; + const String attr_col_name = dictget_function_info.attr_col_name_node->getValue().safeGet(); if (!dict_structure.hasAttribute(attr_col_name)) return; - DataTypePtr dict_attr_col_type = dict_structure.getAttribute(attr_col_name).type; + const DictionaryAttribute & attr = dict_structure.getAttribute(attr_col_name); + DataTypePtr dict_attr_col_type = attr.type; + + const auto * const_arg_node = (dict_side == Side::LHS) ? arguments[1]->as() : arguments[0]->as(); + + /// Skip rewrites that would change query behavior. Details are in the function. + if (!isRewriteSemanticallySafe( + dict_attr_col_type, + dictget_function_info.return_type, + attr.null_value, + *const_arg_node, + dict_side == Side::LHS, + attr_comparison_function_name, + getContext())) + return; auto dict_table_function = std::make_shared("dictionary"); dict_table_function->getArguments().getNodes().push_back(dictget_function_info.dict_name_node); @@ -240,23 +372,16 @@ class InverseDictionaryLookupVisitor : public InDepthQueryTreeVisitorWithContext NameAndTypePair attr_col{attr_col_name, dict_attr_col_type}; auto attr_col_node = std::make_shared(attr_col, dict_table_function); - /// Needed for dictGet functions like `dictGetString`, `dictGetInt32`, etc. - QueryTreeNodePtr attr_col_node_casted = attr_col_node; - if (!attr_col_node->getResultType()->equals(*dictget_function_info.return_type)) - { - attr_col_node_casted = createCastFunction(attr_col_node, dictget_function_info.return_type, getContext()); - } - auto attr_comparison_function_node = std::static_pointer_cast(node_function->clone()); attr_comparison_function_node->markAsOperator(); if (dict_side == Side::LHS) { - attr_comparison_function_node->getArguments().getNodes() = { attr_col_node_casted, arguments[1] }; + attr_comparison_function_node->getArguments().getNodes() = { attr_col_node, arguments[1] }; } else { - attr_comparison_function_node->getArguments().getNodes() = { arguments[0], attr_col_node_casted }; + attr_comparison_function_node->getArguments().getNodes() = { arguments[0], attr_col_node }; } resolveOrdinaryFunctionNodeByName(*attr_comparison_function_node, attr_comparison_function_name, getContext()); diff --git a/tests/queries/0_stateless/03701_optimize_inverse_dictionary_lookup_basic.reference b/tests/queries/0_stateless/03701_optimize_inverse_dictionary_lookup_basic.reference index c1d3ebf27d93..57c3e035ce9c 100644 --- a/tests/queries/0_stateless/03701_optimize_inverse_dictionary_lookup_basic.reference +++ b/tests/queries/0_stateless/03701_optimize_inverse_dictionary_lookup_basic.reference @@ -14,6 +14,9 @@ ORDER BY Equality, LHS 1 a 3 c +Equality, LHS, opt off +1 a +3 c Equality, RHS - plan SELECT __table1.color_id AS color_id, @@ -30,60 +33,47 @@ ORDER BY Equality, RHS 1 a 3 c -Inequality <, LHS - plan +Equality, RHS, opt off +1 a +3 c +Inequality <, LHS, no rewrite (default 0 < 10) - plan SELECT __table1.color_id AS color_id, __table1.payload AS payload FROM default.t AS __table1 -WHERE __table1.color_id IN ( - SELECT __table1.id AS id - FROM dictionary(\'colors\') AS __table1 - WHERE __table1.n < 10 -) +WHERE dictGetUInt64(\'colors\', \'n\', __table1.color_id) < 10 ORDER BY __table1.color_id ASC, __table1.payload ASC -Inequality <, LHS +Inequality <, LHS, no rewrite (default 0 < 10) 1 a 2 b 4 d 5 R -Inequality <, RHS - plan +Inequality <, RHS, no rewrite (default 0 < 10) - plan SELECT __table1.color_id AS color_id, __table1.payload AS payload FROM default.t AS __table1 -WHERE __table1.color_id IN ( - SELECT __table1.id AS id - FROM dictionary(\'colors\') AS __table1 - WHERE 10 > __table1.n -) +WHERE 10 > dictGetUInt64(\'colors\', \'n\', __table1.color_id) ORDER BY __table1.color_id ASC, __table1.payload ASC -Inequality <, RHS +Inequality <, RHS, no rewrite (default 0 < 10) 1 a 2 b 4 d 5 R -Type variant cast, >= Int32 - plan +Type mismatch not allowed, >= Int32 - plan SELECT __table1.color_id AS color_id, __table1.payload AS payload FROM default.t AS __table1 -WHERE __table1.color_id IN ( - SELECT __table1.id AS id - FROM dictionary(\'colors\') AS __table1 - WHERE _CAST(__table1.n, \'Int32\') >= 2 -) +WHERE dictGetInt32(\'colors\', \'n\', __table1.color_id) >= 2 ORDER BY __table1.color_id ASC, __table1.payload ASC -Type variant cast, >= Int32 -1 a -2 b -3 c -5 R +Type mismatch not allowed, >= Int32 LIKE - plan SELECT __table1.color_id AS color_id, @@ -100,6 +90,9 @@ ORDER BY LIKE 1 a 3 c +LIKE, opt off +1 a +3 c ILIKE - plan SELECT __table1.color_id AS color_id, @@ -117,6 +110,10 @@ ILIKE 1 a 3 c 5 R +ILIKE, opt off +1 a +3 c +5 R equals() - plan SELECT __table1.color_id AS color_id FROM default.t AS __table1 @@ -129,54 +126,45 @@ ORDER BY __table1.color_id ASC equals() 1 3 -notEquals - plan +equals(), opt off +1 +3 +notEquals, no rewrite (default empty string != red) - plan SELECT __table1.color_id AS color_id, __table1.payload AS payload FROM default.t AS __table1 -WHERE __table1.color_id IN ( - SELECT __table1.id AS id - FROM dictionary(\'colors\') AS __table1 - WHERE __table1.name != \'red\' -) +WHERE dictGetString(\'colors\', \'name\', __table1.color_id) != \'red\' ORDER BY __table1.color_id ASC, __table1.payload ASC -notEquals +notEquals, no rewrite (default empty string != red) 2 b 4 d 5 R -NOT LIKE r% - plan +NOT LIKE r%, no rewrite (default empty string NOT LIKE r%) - plan SELECT __table1.color_id AS color_id, __table1.payload AS payload FROM default.t AS __table1 -WHERE __table1.color_id IN ( - SELECT __table1.id AS id - FROM dictionary(\'colors\') AS __table1 - WHERE __table1.name NOT LIKE \'r%\' -) +WHERE dictGetString(\'colors\', \'name\', __table1.color_id) NOT LIKE \'r%\' ORDER BY __table1.color_id ASC, __table1.payload ASC -NOT LIKE r% +NOT LIKE r%, no rewrite (default empty string NOT LIKE r%) 2 b 4 d 5 R -NOT ILIKE r% - plan +NOT ILIKE r%, no rewrite (default empty string NOT ILIKE r%) - plan SELECT __table1.color_id AS color_id, __table1.payload AS payload FROM default.t AS __table1 -WHERE __table1.color_id IN ( - SELECT __table1.id AS id - FROM dictionary(\'colors\') AS __table1 - WHERE __table1.name NOT ILIKE \'r%\' -) +WHERE dictGetString(\'colors\', \'name\', __table1.color_id) NOT ILIKE \'r%\' ORDER BY __table1.color_id ASC, __table1.payload ASC -NOT ILIKE r% +NOT ILIKE r%, no rewrite (default empty string NOT ILIKE r%) 2 b 4 d match ^r - plan @@ -195,6 +183,9 @@ ORDER BY match ^r 1 a 3 c +match ^r, opt off +1 a +3 c NOT recursion - plan SELECT __table1.color_id AS color_id, @@ -212,6 +203,10 @@ NOT recursion 2 b 4 d 5 R +NOT recursion, opt off +2 b +4 d +5 R AND/OR recursion - plan SELECT __table1.color_id AS color_id, @@ -221,11 +216,7 @@ WHERE ((__table1.color_id IN ( SELECT __table1.id AS id FROM dictionary(\'colors\') AS __table1 WHERE __table1.name = \'red\' -)) AND (__table1.color_id IN ( - SELECT __table1.id AS id - FROM dictionary(\'colors\') AS __table1 - WHERE __table1.n < 10 -))) OR (__table1.color_id IN ( +)) AND (dictGetUInt64(\'colors\', \'n\', __table1.color_id) < 10)) OR (__table1.color_id IN ( SELECT __table1.id AS id FROM dictionary(\'colors\') AS __table1 WHERE __table1.name = \'green\' @@ -236,7 +227,10 @@ ORDER BY AND/OR recursion 1 a 4 d -NULL constant - plan +AND/OR recursion, opt off +1 a +4 d +NULL constant, no rewrite - plan SELECT __table1.color_id AS color_id, __table1.payload AS payload @@ -245,7 +239,7 @@ WHERE _CAST(NULL, \'Nullable(Nothing)\') ORDER BY __table1.color_id ASC, __table1.payload ASC -NULL constant +NULL constant, no rewrite PREWHERE - plan SELECT __table1.color_id AS color_id FROM default.t AS __table1 @@ -258,6 +252,9 @@ ORDER BY __table1.color_id ASC PREWHERE 1 3 +PREWHERE, opt off +1 +3 QUALIFY - plan SELECT __table1.color_id AS color_id, @@ -274,6 +271,9 @@ ORDER BY QUALIFY 1 1 3 3 +QUALIFY, opt off +1 1 +3 3 Empty result set - plan SELECT __table1.color_id AS color_id FROM default.t AS __table1 @@ -284,6 +284,7 @@ WHERE __table1.color_id IN ( ) ORDER BY __table1.color_id ASC Empty result set +Empty result set, opt off HAVING - plan SELECT __table1.color_id AS color_id, @@ -301,6 +302,9 @@ ORDER BY HAVING 1 1 3 1 +HAVING, opt off +1 1 +3 1 JOIN ON (INNER) - plan SELECT __table1.color_id AS color_id, @@ -319,6 +323,9 @@ ORDER BY JOIN ON (INNER) 1 a a 3 c c +JOIN ON (INNER), opt off +1 a a +3 c c JOIN ON (LEFT) - plan SELECT __table1.color_id AS color_id, @@ -340,6 +347,12 @@ JOIN ON (LEFT) 3 c c 4 d 5 R +JOIN ON (LEFT), opt off +1 a a +2 b +3 c c +4 d +5 R SELECT multiIf - plan SELECT __table1.color_id AS color_id, @@ -364,6 +377,12 @@ SELECT multiIf 3 c match 4 d no_match 5 R no_match +SELECT multiIf, opt off +1 a match +2 b no_match +3 c match +4 d no_match +5 R no_match countIf - plan SELECT countIf(__table1.color_id IN ( SELECT __table1.id AS id @@ -373,6 +392,8 @@ SELECT countIf(__table1.color_id IN ( FROM default.t AS __table1 countIf 2 +countIf, opt off +2 sumIf - plan SELECT sumIf(__table1.color_id, (__table1.color_id IN ( SELECT __table1.id AS id @@ -382,6 +403,8 @@ SELECT sumIf(__table1.color_id, (__table1.color_id IN ( FROM default.t AS __table1 sumIf 4 +sumIf, opt off +4 ORDER BY - plan SELECT __table1.color_id AS color_id, @@ -401,6 +424,12 @@ ORDER BY 2 b 4 d 5 R +ORDER BY, opt off +1 a +3 c +2 b +4 d +5 R GROUP BY - plan SELECT __table1.color_id IN ( @@ -425,6 +454,9 @@ ORDER BY GROUP BY 0 3 1 2 +GROUP BY, opt off +0 3 +1 2 LIMIT BY - plan SELECT __table1.color_id AS color_id, @@ -441,6 +473,9 @@ LIMIT _CAST(1, \'UInt64\') BY __table1.color_id IN ( LIMIT BY 1 a 2 b +LIMIT BY, opt off +1 a +2 b WINDOW PARTITION BY - plan SELECT __table1.color_id AS color_id, @@ -463,9 +498,16 @@ WINDOW PARTITION BY 3 2 4 2 5 3 +WINDOW PARTITION BY, opt off +1 1 +2 1 +3 2 +4 2 +5 3 Negative: non-constant RHS - plan SELECT __table1.color_id AS color_id FROM default.t AS __table1 WHERE dictGetString(\'colors\', \'name\', __table1.color_id) = __table1.payload ORDER BY __table1.color_id ASC Negative: non-constant RHS +Negative: non-constant RHS, opt off diff --git a/tests/queries/0_stateless/03701_optimize_inverse_dictionary_lookup_basic.sql b/tests/queries/0_stateless/03701_optimize_inverse_dictionary_lookup_basic.sql index 191756168d6f..8ce427ed7b2f 100644 --- a/tests/queries/0_stateless/03701_optimize_inverse_dictionary_lookup_basic.sql +++ b/tests/queries/0_stateless/03701_optimize_inverse_dictionary_lookup_basic.sql @@ -1,5 +1,6 @@ -- Tags: no-replicated-database, no-parallel-replicas --- no-parallel, no-parallel-replicas: Dictionary is not created in parallel replicas. +-- no-replicated-database: EXPLAIN output differs for replicated database. +-- no-parallel-replicas: Dictionary is not available on parallel-replica workers. SET enable_analyzer = 1; SET optimize_inverse_dictionary_lookup = 1; @@ -64,6 +65,12 @@ SELECT color_id, payload FROM t WHERE dictGetString('colors', 'name', color_id) = 'red' ORDER BY color_id, payload; +SELECT 'Equality, LHS, opt off'; +SELECT color_id, payload +FROM t +WHERE dictGetString('colors', 'name', color_id) = 'red' +ORDER BY color_id, payload +SETTINGS optimize_inverse_dictionary_lookup = 0; SELECT 'Equality, RHS - plan'; EXPLAIN SYNTAX run_query_tree_passes=1 @@ -77,45 +84,51 @@ SELECT color_id, payload FROM t WHERE 'red' = dictGetString('colors', 'name', color_id) ORDER BY color_id, payload; +SELECT 'Equality, RHS, opt off'; +SELECT color_id, payload +FROM t +WHERE 'red' = dictGetString('colors', 'name', color_id) +ORDER BY color_id, payload +SETTINGS optimize_inverse_dictionary_lookup = 0; -SELECT 'Inequality <, LHS - plan'; +SELECT 'Inequality <, LHS, no rewrite (default 0 < 10) - plan'; EXPLAIN SYNTAX run_query_tree_passes=1 SELECT color_id, payload FROM t WHERE dictGetUInt64('colors', 'n', color_id) < 10 ORDER BY color_id, payload; -SELECT 'Inequality <, LHS'; +SELECT 'Inequality <, LHS, no rewrite (default 0 < 10)'; SELECT color_id, payload FROM t WHERE dictGetUInt64('colors', 'n', color_id) < 10 ORDER BY color_id, payload; -SELECT 'Inequality <, RHS - plan'; +SELECT 'Inequality <, RHS, no rewrite (default 0 < 10) - plan'; EXPLAIN SYNTAX run_query_tree_passes=1 SELECT color_id, payload FROM t WHERE 10 > dictGetUInt64('colors', 'n', color_id) ORDER BY color_id, payload; -SELECT 'Inequality <, RHS'; +SELECT 'Inequality <, RHS, no rewrite (default 0 < 10)'; SELECT color_id, payload FROM t WHERE 10 > dictGetUInt64('colors', 'n', color_id) ORDER BY color_id, payload; -SELECT 'Type variant cast, >= Int32 - plan'; +SELECT 'Type mismatch not allowed, >= Int32 - plan'; EXPLAIN SYNTAX run_query_tree_passes=1 SELECT color_id, payload FROM t WHERE dictGetInt32('colors', 'n', color_id) >= 2 ORDER BY color_id, payload; -SELECT 'Type variant cast, >= Int32'; +SELECT 'Type mismatch not allowed, >= Int32'; SELECT color_id, payload FROM t WHERE dictGetInt32('colors', 'n', color_id) >= 2 -ORDER BY color_id, payload; +ORDER BY color_id, payload; -- { serverError TYPE_MISMATCH } SELECT 'LIKE - plan'; EXPLAIN SYNTAX run_query_tree_passes=1 @@ -129,6 +142,12 @@ SELECT color_id, payload FROM t WHERE dictGetString('colors', 'name', color_id) LIKE 'r%' ORDER BY color_id, payload; +SELECT 'LIKE, opt off'; +SELECT color_id, payload +FROM t +WHERE dictGetString('colors', 'name', color_id) LIKE 'r%' +ORDER BY color_id, payload +SETTINGS optimize_inverse_dictionary_lookup = 0; SELECT 'ILIKE - plan'; EXPLAIN SYNTAX run_query_tree_passes=1 @@ -142,6 +161,12 @@ SELECT color_id, payload FROM t WHERE dictGetString('colors', 'name', color_id) ILIKE 'r%' ORDER BY color_id, payload; +SELECT 'ILIKE, opt off'; +SELECT color_id, payload +FROM t +WHERE dictGetString('colors', 'name', color_id) ILIKE 'r%' +ORDER BY color_id, payload +SETTINGS optimize_inverse_dictionary_lookup = 0; SELECT 'equals() - plan'; EXPLAIN SYNTAX run_query_tree_passes=1 @@ -155,41 +180,47 @@ SELECT color_id FROM t WHERE equals(dictGetString('colors','name', color_id), 'red') ORDER BY color_id; +SELECT 'equals(), opt off'; +SELECT color_id +FROM t +WHERE equals(dictGetString('colors','name', color_id), 'red') +ORDER BY color_id +SETTINGS optimize_inverse_dictionary_lookup = 0; -SELECT 'notEquals - plan'; +SELECT 'notEquals, no rewrite (default empty string != red) - plan'; EXPLAIN SYNTAX run_query_tree_passes=1 SELECT color_id, payload FROM t WHERE dictGetString('colors','name', color_id) != 'red' ORDER BY color_id, payload; -SELECT 'notEquals'; +SELECT 'notEquals, no rewrite (default empty string != red)'; SELECT color_id, payload FROM t WHERE dictGetString('colors','name', color_id) != 'red' ORDER BY color_id, payload; -SELECT 'NOT LIKE r% - plan'; +SELECT 'NOT LIKE r%, no rewrite (default empty string NOT LIKE r%) - plan'; EXPLAIN SYNTAX run_query_tree_passes=1 SELECT color_id, payload FROM t WHERE dictGetString('colors','name', color_id) NOT LIKE 'r%' ORDER BY color_id, payload; -SELECT 'NOT LIKE r%'; +SELECT 'NOT LIKE r%, no rewrite (default empty string NOT LIKE r%)'; SELECT color_id, payload FROM t WHERE dictGetString('colors','name', color_id) NOT LIKE 'r%' ORDER BY color_id, payload; -SELECT 'NOT ILIKE r% - plan'; +SELECT 'NOT ILIKE r%, no rewrite (default empty string NOT ILIKE r%) - plan'; EXPLAIN SYNTAX run_query_tree_passes=1 SELECT color_id, payload FROM t WHERE dictGetString('colors','name', color_id) NOT ILIKE 'r%' ORDER BY color_id, payload; -SELECT 'NOT ILIKE r%'; +SELECT 'NOT ILIKE r%, no rewrite (default empty string NOT ILIKE r%)'; SELECT color_id, payload FROM t WHERE dictGetString('colors','name', color_id) NOT ILIKE 'r%' @@ -207,6 +238,12 @@ SELECT color_id, payload FROM t WHERE match(dictGetString('colors','name', color_id), '^r') ORDER BY color_id, payload; +SELECT 'match ^r, opt off'; +SELECT color_id, payload +FROM t +WHERE match(dictGetString('colors','name', color_id), '^r') +ORDER BY color_id, payload +SETTINGS optimize_inverse_dictionary_lookup = 0; SELECT 'NOT recursion - plan'; EXPLAIN SYNTAX run_query_tree_passes=1 @@ -220,6 +257,12 @@ SELECT color_id, payload FROM t WHERE NOT (dictGetString('colors', 'name', color_id) = 'red') ORDER BY color_id, payload; +SELECT 'NOT recursion, opt off'; +SELECT color_id, payload +FROM t +WHERE NOT (dictGetString('colors', 'name', color_id) = 'red') +ORDER BY color_id, payload +SETTINGS optimize_inverse_dictionary_lookup = 0; SELECT 'AND/OR recursion - plan'; EXPLAIN SYNTAX run_query_tree_passes=1 @@ -235,15 +278,22 @@ FROM t WHERE (dictGetString('colors', 'name', color_id) = 'red' AND dictGetUInt64('colors', 'n', color_id) < 10) OR dictGetString('colors', 'name', color_id) = 'green' ORDER BY color_id, payload; +SELECT 'AND/OR recursion, opt off'; +SELECT color_id, payload +FROM t +WHERE (dictGetString('colors', 'name', color_id) = 'red' AND dictGetUInt64('colors', 'n', color_id) < 10) + OR dictGetString('colors', 'name', color_id) = 'green' +ORDER BY color_id, payload +SETTINGS optimize_inverse_dictionary_lookup = 0; -SELECT 'NULL constant - plan'; +SELECT 'NULL constant, no rewrite - plan'; EXPLAIN SYNTAX run_query_tree_passes=1 SELECT color_id, payload FROM t WHERE dictGetString('colors', 'name', color_id) = NULL ORDER BY color_id, payload; -SELECT 'NULL constant'; +SELECT 'NULL constant, no rewrite'; SELECT color_id, payload FROM t WHERE dictGetString('colors', 'name', color_id) = NULL @@ -261,6 +311,12 @@ SELECT color_id FROM t PREWHERE dictGetString('colors', 'name', color_id) = 'red' ORDER BY color_id; +SELECT 'PREWHERE, opt off'; +SELECT color_id +FROM t +PREWHERE dictGetString('colors', 'name', color_id) = 'red' +ORDER BY color_id +SETTINGS optimize_inverse_dictionary_lookup = 0; SELECT 'QUALIFY - plan'; EXPLAIN SYNTAX run_query_tree_passes=1 @@ -274,6 +330,12 @@ SELECT color_id, row_number() OVER (PARTITION BY 1 ORDER BY color_id) AS rn FROM t QUALIFY dictGetString('colors', 'name', color_id) = 'red' ORDER BY color_id, rn; +SELECT 'QUALIFY, opt off'; +SELECT color_id, row_number() OVER (PARTITION BY 1 ORDER BY color_id) AS rn +FROM t +QUALIFY dictGetString('colors', 'name', color_id) = 'red' +ORDER BY color_id, rn +SETTINGS optimize_inverse_dictionary_lookup = 0; SELECT 'Empty result set - plan'; EXPLAIN SYNTAX run_query_tree_passes=1 @@ -287,6 +349,12 @@ SELECT color_id FROM t WHERE dictGetString('colors', 'name', color_id) = 'nonexistent_color' ORDER BY color_id; +SELECT 'Empty result set, opt off'; +SELECT color_id +FROM t +WHERE dictGetString('colors', 'name', color_id) = 'nonexistent_color' +ORDER BY color_id +SETTINGS optimize_inverse_dictionary_lookup = 0; SELECT 'HAVING - plan'; EXPLAIN SYNTAX run_query_tree_passes=1 @@ -302,6 +370,13 @@ FROM t GROUP BY color_id HAVING dictGetString('colors','name', color_id) = 'red' ORDER BY color_id, c; +SELECT 'HAVING, opt off'; +SELECT color_id, count() AS c +FROM t +GROUP BY color_id +HAVING dictGetString('colors','name', color_id) = 'red' +ORDER BY color_id, c +SETTINGS optimize_inverse_dictionary_lookup = 0; SELECT 'JOIN ON (INNER) - plan'; EXPLAIN SYNTAX run_query_tree_passes=1 @@ -319,6 +394,14 @@ INNER JOIN t AS t2 ON t1.color_id = t2.color_id AND dictGetString('colors','name', t1.color_id) = 'red' ORDER BY t1.color_id, t1.payload, payload2; +SELECT 'JOIN ON (INNER), opt off'; +SELECT t1.color_id, t1.payload, t2.payload AS payload2 +FROM t AS t1 +INNER JOIN t AS t2 + ON t1.color_id = t2.color_id + AND dictGetString('colors','name', t1.color_id) = 'red' +ORDER BY t1.color_id, t1.payload, payload2 +SETTINGS optimize_inverse_dictionary_lookup = 0; SELECT 'JOIN ON (LEFT) - plan'; EXPLAIN SYNTAX run_query_tree_passes=1 @@ -336,6 +419,14 @@ LEFT JOIN t AS t2 ON t1.color_id = t2.color_id AND dictGetString('colors','name', t1.color_id) = 'red' ORDER BY t1.color_id, t1.payload, payload2; +SELECT 'JOIN ON (LEFT), opt off'; +SELECT t1.color_id, t1.payload, t2.payload AS payload2 +FROM t AS t1 +LEFT JOIN t AS t2 + ON t1.color_id = t2.color_id + AND dictGetString('colors','name', t1.color_id) = 'red' +ORDER BY t1.color_id, t1.payload, payload2 +SETTINGS optimize_inverse_dictionary_lookup = 0; SELECT 'SELECT multiIf - plan'; EXPLAIN SYNTAX run_query_tree_passes=1 @@ -349,6 +440,12 @@ SELECT color_id, payload, multiIf(dictGetString('colors','name', color_id) = 'red', 'match', 'no_match') AS tag FROM t ORDER BY color_id, payload, tag; +SELECT 'SELECT multiIf, opt off'; +SELECT color_id, payload, + multiIf(dictGetString('colors','name', color_id) = 'red', 'match', 'no_match') AS tag +FROM t +ORDER BY color_id, payload, tag +SETTINGS optimize_inverse_dictionary_lookup = 0; SELECT 'countIf - plan'; EXPLAIN SYNTAX run_query_tree_passes=1 @@ -358,6 +455,10 @@ FROM t; SELECT 'countIf'; SELECT countIf(dictGetString('colors','name', color_id) = 'red') AS cnt FROM t; +SELECT 'countIf, opt off'; +SELECT countIf(dictGetString('colors','name', color_id) = 'red') AS cnt +FROM t +SETTINGS optimize_inverse_dictionary_lookup = 0; SELECT 'sumIf - plan'; EXPLAIN SYNTAX run_query_tree_passes=1 @@ -367,6 +468,10 @@ FROM t; SELECT 'sumIf'; SELECT sumIf(color_id, dictGetString('colors','name', color_id) = 'red') AS sum_id_match FROM t; +SELECT 'sumIf, opt off'; +SELECT sumIf(color_id, dictGetString('colors','name', color_id) = 'red') AS sum_id_match +FROM t +SETTINGS optimize_inverse_dictionary_lookup = 0; SELECT 'ORDER BY - plan'; EXPLAIN SYNTAX run_query_tree_passes=1 @@ -378,6 +483,11 @@ SELECT 'ORDER BY'; SELECT color_id, payload FROM t ORDER BY (dictGetString('colors','name', color_id) = 'red') DESC, color_id, payload; +SELECT 'ORDER BY, opt off'; +SELECT color_id, payload +FROM t +ORDER BY (dictGetString('colors','name', color_id) = 'red') DESC, color_id, payload +SETTINGS optimize_inverse_dictionary_lookup = 0; SELECT 'GROUP BY - plan'; EXPLAIN SYNTAX run_query_tree_passes=1 @@ -391,6 +501,12 @@ SELECT (dictGetString('colors','name', color_id) = 'red') AS is_red, count() AS FROM t GROUP BY (dictGetString('colors','name', color_id) = 'red') ORDER BY is_red, c; +SELECT 'GROUP BY, opt off'; +SELECT (dictGetString('colors','name', color_id) = 'red') AS is_red, count() AS c +FROM t +GROUP BY (dictGetString('colors','name', color_id) = 'red') +ORDER BY is_red, c +SETTINGS optimize_inverse_dictionary_lookup = 0; SELECT 'LIMIT BY - plan'; EXPLAIN SYNTAX run_query_tree_passes=1 @@ -404,6 +520,12 @@ SELECT color_id, payload FROM t ORDER BY color_id, payload LIMIT 1 BY (dictGetString('colors','name', color_id) = 'red'); +SELECT 'LIMIT BY, opt off'; +SELECT color_id, payload +FROM t +ORDER BY color_id, payload +LIMIT 1 BY (dictGetString('colors','name', color_id) = 'red') +SETTINGS optimize_inverse_dictionary_lookup = 0; SELECT 'WINDOW PARTITION BY - plan'; EXPLAIN SYNTAX run_query_tree_passes=1 @@ -423,6 +545,15 @@ SELECT color_id, ) AS rn FROM t ORDER BY color_id, rn; +SELECT 'WINDOW PARTITION BY, opt off'; +SELECT color_id, + row_number() OVER ( + PARTITION BY (dictGetString('colors','name', color_id) = 'red') + ORDER BY color_id + ) AS rn +FROM t +ORDER BY color_id, rn +SETTINGS optimize_inverse_dictionary_lookup = 0; -- Negative: non-constant RHS, expect no rewrite SELECT 'Negative: non-constant RHS - plan'; @@ -437,6 +568,12 @@ SELECT color_id FROM t WHERE dictGetString('colors', 'name', color_id) = payload ORDER BY color_id; +SELECT 'Negative: non-constant RHS, opt off'; +SELECT color_id +FROM t +WHERE dictGetString('colors', 'name', color_id) = payload +ORDER BY color_id +SETTINGS optimize_inverse_dictionary_lookup = 0; -- Validation of attribute name @@ -493,4 +630,4 @@ LAYOUT(HASHED()); SELECT DISTINCT 13, *, or(-32 = dictGetInt32(toFixedString('dictionary_all', toLowCardinality(14)), toFixedString('i32', 3), id), isNotDistinctFrom(1, 9223372036854775806), toLowCardinality(19), not(equals(payload, 9223372036854775806))), isNotNull('dictGetFloat64 - plan'), id, isNotNull(1) FROM tab__fuzz_24 PREWHERE equals(9223372036854775806, payload) WHERE isNotDistinctFrom(id, isNotDistinctFrom(9223372036854775806, equals(1, isNotNull(9223372036854775806)))) QUALIFY and(NULL, equals(1, isZeroOrNull(1))) -ORDER BY payload DESC; +ORDER BY payload DESC; -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } diff --git a/tests/queries/0_stateless/03702_optimize_inverse_dictionary_lookup_composite_and_layouts.reference b/tests/queries/0_stateless/03702_optimize_inverse_dictionary_lookup_composite_and_layouts.reference index 233bcde45a12..527f584c36fa 100644 --- a/tests/queries/0_stateless/03702_optimize_inverse_dictionary_lookup_composite_and_layouts.reference +++ b/tests/queries/0_stateless/03702_optimize_inverse_dictionary_lookup_composite_and_layouts.reference @@ -17,6 +17,8 @@ ORDER BY __table1.payload ASC ComplexKeyHashed 1 a x +ComplexKeyHashed, opt off +1 a x ComplexHashedArray - plan SELECT __table1.k1 AS k1, @@ -36,6 +38,8 @@ ORDER BY __table1.payload ASC ComplexHashedArray 1 a x +ComplexHashedArray, opt off +1 a x ComplexKeySparseHashed - plan SELECT __table1.k1 AS k1, @@ -55,6 +59,8 @@ ORDER BY __table1.payload ASC ComplexKeySparseHashed 1 a x +ComplexKeySparseHashed, opt off +1 a x Flat - plan SELECT __table1.id AS id, @@ -72,6 +78,10 @@ Flat 1 u 1 x 3 z +Flat, opt off +1 u +1 x +3 z Hashed - plan SELECT __table1.id AS id, @@ -89,6 +99,10 @@ Hashed 1 u 1 x 3 z +Hashed, opt off +1 u +1 x +3 z HashedArray - plan SELECT __table1.id AS id, @@ -106,6 +120,10 @@ HashedArray 1 u 1 x 3 z +HashedArray, opt off +1 u +1 x +3 z SparseHashed - plan SELECT __table1.id AS id, @@ -123,3 +141,7 @@ SparseHashed 1 u 1 x 3 z +SparseHashed, opt off +1 u +1 x +3 z diff --git a/tests/queries/0_stateless/03702_optimize_inverse_dictionary_lookup_composite_and_layouts.sql b/tests/queries/0_stateless/03702_optimize_inverse_dictionary_lookup_composite_and_layouts.sql index 3f112233907d..1d928b6b7962 100644 --- a/tests/queries/0_stateless/03702_optimize_inverse_dictionary_lookup_composite_and_layouts.sql +++ b/tests/queries/0_stateless/03702_optimize_inverse_dictionary_lookup_composite_and_layouts.sql @@ -1,5 +1,6 @@ -- Tags: no-replicated-database, no-parallel-replicas --- no-parallel, no-parallel-replicas: Dictionary is not created in parallel replicas. +-- no-replicated-database: EXPLAIN output differs for replicated database. +-- no-parallel-replicas: Dictionary is not available on parallel-replica workers. SET enable_analyzer = 1; SET optimize_inverse_dictionary_lookup = 1; @@ -156,6 +157,12 @@ SELECT k1, k2, payload FROM f WHERE dictGet('dict_prices_ckh', 'tag', (k1, k2)) = 'pro' ORDER BY k1, k2, payload; +SELECT 'ComplexKeyHashed, opt off'; +SELECT k1, k2, payload +FROM f +WHERE dictGet('dict_prices_ckh', 'tag', (k1, k2)) = 'pro' +ORDER BY k1, k2, payload +SETTINGS optimize_inverse_dictionary_lookup = 0; SELECT 'ComplexHashedArray - plan'; EXPLAIN SYNTAX run_query_tree_passes=1 @@ -169,6 +176,12 @@ SELECT k1, k2, payload FROM f WHERE dictGet('dict_prices_ch_array', 'tag', (k1, k2)) = 'pro' ORDER BY k1, k2, payload; +SELECT 'ComplexHashedArray, opt off'; +SELECT k1, k2, payload +FROM f +WHERE dictGet('dict_prices_ch_array', 'tag', (k1, k2)) = 'pro' +ORDER BY k1, k2, payload +SETTINGS optimize_inverse_dictionary_lookup = 0; SELECT 'ComplexKeySparseHashed - plan'; EXPLAIN SYNTAX run_query_tree_passes=1 @@ -181,6 +194,12 @@ SELECT k1, k2, payload FROM f WHERE dictGet('dict_prices_ck_sparse_hashed', 'tag', (k1, k2)) = 'pro' ORDER BY k1, k2, payload; +SELECT 'ComplexKeySparseHashed, opt off'; +SELECT k1, k2, payload +FROM f +WHERE dictGet('dict_prices_ck_sparse_hashed', 'tag', (k1, k2)) = 'pro' +ORDER BY k1, k2, payload +SETTINGS optimize_inverse_dictionary_lookup = 0; SELECT 'Flat - plan'; EXPLAIN SYNTAX run_query_tree_passes=1 @@ -194,6 +213,12 @@ SELECT id, payload FROM f WHERE dictGet('dict_items_flat', 'name', id) = 'alpha' ORDER BY id, payload; +SELECT 'Flat, opt off'; +SELECT id, payload +FROM f +WHERE dictGet('dict_items_flat', 'name', id) = 'alpha' +ORDER BY id, payload +SETTINGS optimize_inverse_dictionary_lookup = 0; SELECT 'Hashed - plan'; EXPLAIN SYNTAX run_query_tree_passes=1 @@ -207,6 +232,12 @@ SELECT id, payload FROM f WHERE dictGet('dict_items_hashed', 'name', id) = 'alpha' ORDER BY id, payload; +SELECT 'Hashed, opt off'; +SELECT id, payload +FROM f +WHERE dictGet('dict_items_hashed', 'name', id) = 'alpha' +ORDER BY id, payload +SETTINGS optimize_inverse_dictionary_lookup = 0; SELECT 'HashedArray - plan'; EXPLAIN SYNTAX run_query_tree_passes=1 @@ -220,6 +251,12 @@ SELECT id, payload FROM f WHERE dictGet('dict_items_hashed_array', 'name', id) = 'alpha' ORDER BY id, payload; +SELECT 'HashedArray, opt off'; +SELECT id, payload +FROM f +WHERE dictGet('dict_items_hashed_array', 'name', id) = 'alpha' +ORDER BY id, payload +SETTINGS optimize_inverse_dictionary_lookup = 0; SELECT 'SparseHashed - plan'; EXPLAIN SYNTAX run_query_tree_passes=1 @@ -233,3 +270,9 @@ SELECT id, payload FROM f WHERE dictGet('dict_items_sparse_hashed', 'name', id) = 'alpha' ORDER BY id, payload; +SELECT 'SparseHashed, opt off'; +SELECT id, payload +FROM f +WHERE dictGet('dict_items_sparse_hashed', 'name', id) = 'alpha' +ORDER BY id, payload +SETTINGS optimize_inverse_dictionary_lookup = 0; diff --git a/tests/queries/0_stateless/03703_optimize_inverse_dictionary_lookup_dictget_family.reference b/tests/queries/0_stateless/03703_optimize_inverse_dictionary_lookup_dictget_family.reference index e192bdd67093..88cbc97389a4 100644 --- a/tests/queries/0_stateless/03703_optimize_inverse_dictionary_lookup_dictget_family.reference +++ b/tests/queries/0_stateless/03703_optimize_inverse_dictionary_lookup_dictget_family.reference @@ -13,6 +13,8 @@ ORDER BY __table1.payload ASC dictGet (generic) 1 x +dictGet (generic), opt off +1 x dictGetString - plan SELECT __table1.id AS id, @@ -28,6 +30,8 @@ ORDER BY __table1.payload ASC dictGetString 1 x +dictGetString, opt off +1 x dictGetInt32 - plan SELECT __table1.id AS id, @@ -36,13 +40,15 @@ FROM default.tab AS __table1 WHERE __table1.id IN ( SELECT __table1.id AS id FROM dictionary(\'dictionary_all\') AS __table1 - WHERE _CAST(__table1.i32, \'Int32\') = -32 + WHERE __table1.i32 = -32 ) ORDER BY __table1.id ASC, __table1.payload ASC dictGetInt32 1 x +dictGetInt32, opt off +1 x dictGetUInt64 - plan SELECT __table1.id AS id, @@ -51,13 +57,15 @@ FROM default.tab AS __table1 WHERE __table1.id IN ( SELECT __table1.id AS id FROM dictionary(\'dictionary_all\') AS __table1 - WHERE _CAST(__table1.u64, \'UInt64\') = 64 + WHERE __table1.u64 = 64 ) ORDER BY __table1.id ASC, __table1.payload ASC dictGetUInt64 1 x +dictGetUInt64, opt off +1 x dictGetFloat64 - plan SELECT __table1.id AS id, @@ -66,13 +74,15 @@ FROM default.tab AS __table1 WHERE __table1.id IN ( SELECT __table1.id AS id FROM dictionary(\'dictionary_all\') AS __table1 - WHERE _CAST(__table1.f64, \'Float64\') = 20. + WHERE __table1.f64 = 20. ) ORDER BY __table1.id ASC, __table1.payload ASC dictGetFloat64 1 x +dictGetFloat64, opt off +1 x dictGetDate - plan SELECT __table1.id AS id, @@ -81,13 +91,15 @@ FROM default.tab AS __table1 WHERE __table1.id IN ( SELECT __table1.id AS id FROM dictionary(\'dictionary_all\') AS __table1 - WHERE _CAST(__table1.d, \'Date\') = _CAST(\'2025-01-01\', \'Date\') + WHERE __table1.d = _CAST(\'2025-01-01\', \'Date\') ) ORDER BY __table1.id ASC, __table1.payload ASC dictGetDate 1 x +dictGetDate, opt off +1 x dictGetDateTime - plan SELECT __table1.id AS id, @@ -96,13 +108,15 @@ FROM default.tab AS __table1 WHERE __table1.id IN ( SELECT __table1.id AS id FROM dictionary(\'dictionary_all\') AS __table1 - WHERE _CAST(__table1.dt, \'DateTime\') = _CAST(\'2025-01-01 10:00:00\', \'DateTime\') + WHERE __table1.dt = _CAST(\'2025-01-01 10:00:00\', \'DateTime\') ) ORDER BY __table1.id ASC, __table1.payload ASC dictGetDateTime 1 x +dictGetDateTime, opt off +1 x dictGetUUID - plan SELECT __table1.id AS id, @@ -111,13 +125,15 @@ FROM default.tab AS __table1 WHERE __table1.id IN ( SELECT __table1.id AS id FROM dictionary(\'dictionary_all\') AS __table1 - WHERE _CAST(__table1.uid, \'UUID\') = _CAST(\'00000000-0000-0000-0000-000000000001\', \'UUID\') + WHERE __table1.uid = _CAST(\'00000000-0000-0000-0000-000000000001\', \'UUID\') ) ORDER BY __table1.id ASC, __table1.payload ASC dictGetUUID 1 x +dictGetUUID, opt off +1 x dictGetIPv4 - plan SELECT __table1.id AS id, @@ -126,13 +142,15 @@ FROM default.tab AS __table1 WHERE __table1.id IN ( SELECT __table1.id AS id FROM dictionary(\'dictionary_all\') AS __table1 - WHERE _CAST(__table1.ip4, \'IPv4\') = _CAST(\'192.168.0.1\', \'IPv4\') + WHERE __table1.ip4 = _CAST(\'192.168.0.1\', \'IPv4\') ) ORDER BY __table1.id ASC, __table1.payload ASC dictGetIPv4 1 x +dictGetIPv4, opt off +1 x dictGetIPv6 - plan SELECT __table1.id AS id, @@ -141,29 +159,27 @@ FROM default.tab AS __table1 WHERE __table1.id IN ( SELECT __table1.id AS id FROM dictionary(\'dictionary_all\') AS __table1 - WHERE _CAST(__table1.ip6, \'IPv6\') = _CAST(\'2001:db8::1\', \'IPv6\') + WHERE __table1.ip6 = _CAST(\'2001:db8::1\', \'IPv6\') ) ORDER BY __table1.id ASC, __table1.payload ASC dictGetIPv6 1 x -dictGetOrNull(String) - plan +dictGetIPv6, opt off +1 x +dictGetOrNull(String), no rewrite (dictGetOrNull is not supported by the optimization) - plan SELECT __table1.id AS id, __table1.payload AS payload FROM default.tab AS __table1 -WHERE _CAST((__table1.id IN ( - SELECT __table1.id AS id - FROM dictionary(\'dictionary_all\') AS __table1 - WHERE _CAST(__table1.name, \'Nullable(String)\') = \'alpha\' -)), \'Nullable(UInt8)\') +WHERE dictGetOrNull(\'dictionary_all\', \'name\', __table1.id) = \'alpha\' ORDER BY __table1.id ASC, __table1.payload ASC -dictGetOrNull(String) +dictGetOrNull(String), no rewrite (dictGetOrNull is not supported by the optimization) 1 x -dictGetOrNull(String) IS NULL - plan +dictGetOrNull(String) IS NULL, no rewrite (dictGetOrNull is not supported by the optimization) - plan SELECT __table1.id AS id, __table1.payload AS payload @@ -172,5 +188,5 @@ WHERE isNull(dictGetOrNull(\'dictionary_all\', \'name\', __table1.id)) ORDER BY __table1.id ASC, __table1.payload ASC -dictGetOrNull(String) IS NULL +dictGetOrNull(String) IS NULL, no rewrite (dictGetOrNull is not supported by the optimization) 99 z diff --git a/tests/queries/0_stateless/03703_optimize_inverse_dictionary_lookup_dictget_family.sql b/tests/queries/0_stateless/03703_optimize_inverse_dictionary_lookup_dictget_family.sql index fbdbc37dd9ca..da4c43217871 100644 --- a/tests/queries/0_stateless/03703_optimize_inverse_dictionary_lookup_dictget_family.sql +++ b/tests/queries/0_stateless/03703_optimize_inverse_dictionary_lookup_dictget_family.sql @@ -1,5 +1,6 @@ -- Tags: no-replicated-database, no-parallel-replicas --- no-parallel, no-parallel-replicas: Dictionary is not created in parallel replicas. +-- no-replicated-database: EXPLAIN output differs for replicated database. +-- no-parallel-replicas: Dictionary is not available on parallel-replica workers. SET enable_analyzer = 1; SET optimize_inverse_dictionary_lookup = 1; @@ -13,21 +14,21 @@ CREATE TABLE ref_table_all ( id UInt64, name String, - i8 String, - i16 String, - i32 String, - i64 String, - u8 String, - u16 String, - u32 String, - u64 String, - f32 String, - f64 String, - d String, - dt String, - uid String, - ip4 String, - ip6 String + i8 Int8, + i16 Int16, + i32 Int32, + i64 Int64, + u8 UInt8, + u16 UInt16, + u32 UInt32, + u64 UInt64, + f32 Float32, + f64 Float64, + d Date, + dt DateTime, + uid UUID, + ip4 IPv4, + ip6 IPv6 ) ENGINE = MergeTree ORDER BY id; @@ -46,21 +47,21 @@ CREATE DICTIONARY dictionary_all ( id UInt64, name String, - i8 String, - i16 String, - i32 String, - i64 String, - u8 String, - u16 String, - u32 String, - u64 String, - f32 String, - f64 String, - d String, - dt String, - uid String, - ip4 String, - ip6 String + i8 Int8, + i16 Int16, + i32 Int32, + i64 Int64, + u8 UInt8, + u16 UInt16, + u32 UInt32, + u64 UInt64, + f32 Float32, + f64 Float64, + d Date, + dt DateTime, + uid UUID, + ip4 IPv4, + ip6 IPv6 ) PRIMARY KEY id SOURCE(CLICKHOUSE(TABLE 'ref_table_all')) @@ -87,6 +88,11 @@ SELECT 'dictGet (generic)'; SELECT id, payload FROM tab WHERE dictGet('dictionary_all', 'name', id) = 'alpha' ORDER BY id, payload; +SELECT 'dictGet (generic), opt off'; +SELECT id, payload FROM tab +WHERE dictGet('dictionary_all', 'name', id) = 'alpha' +ORDER BY id, payload +SETTINGS optimize_inverse_dictionary_lookup = 0; SELECT 'dictGetString - plan'; EXPLAIN SYNTAX run_query_tree_passes=1 @@ -98,6 +104,11 @@ SELECT 'dictGetString'; SELECT id, payload FROM tab WHERE dictGetString('dictionary_all', 'name', id) = 'alpha' ORDER BY id, payload; +SELECT 'dictGetString, opt off'; +SELECT id, payload FROM tab +WHERE dictGetString('dictionary_all', 'name', id) = 'alpha' +ORDER BY id, payload +SETTINGS optimize_inverse_dictionary_lookup = 0; SELECT 'dictGetInt32 - plan'; EXPLAIN SYNTAX run_query_tree_passes=1 @@ -109,6 +120,11 @@ SELECT 'dictGetInt32'; SELECT id, payload FROM tab WHERE dictGetInt32('dictionary_all', 'i32', id) = -32 ORDER BY id, payload; +SELECT 'dictGetInt32, opt off'; +SELECT id, payload FROM tab +WHERE dictGetInt32('dictionary_all', 'i32', id) = -32 +ORDER BY id, payload +SETTINGS optimize_inverse_dictionary_lookup = 0; SELECT 'dictGetUInt64 - plan'; EXPLAIN SYNTAX run_query_tree_passes=1 @@ -120,6 +136,11 @@ SELECT 'dictGetUInt64'; SELECT id, payload FROM tab WHERE dictGetUInt64('dictionary_all', 'u64', id) = 64 ORDER BY id, payload; +SELECT 'dictGetUInt64, opt off'; +SELECT id, payload FROM tab +WHERE dictGetUInt64('dictionary_all', 'u64', id) = 64 +ORDER BY id, payload +SETTINGS optimize_inverse_dictionary_lookup = 0; SELECT 'dictGetFloat64 - plan'; EXPLAIN SYNTAX run_query_tree_passes=1 @@ -131,6 +152,11 @@ SELECT 'dictGetFloat64'; SELECT id, payload FROM tab WHERE dictGetFloat64('dictionary_all', 'f64', id) = 20.0 ORDER BY id, payload; +SELECT 'dictGetFloat64, opt off'; +SELECT id, payload FROM tab +WHERE dictGetFloat64('dictionary_all', 'f64', id) = 20.0 +ORDER BY id, payload +SETTINGS optimize_inverse_dictionary_lookup = 0; SELECT 'dictGetDate - plan'; EXPLAIN SYNTAX run_query_tree_passes=1 @@ -142,6 +168,11 @@ SELECT 'dictGetDate'; SELECT id, payload FROM tab WHERE dictGetDate('dictionary_all', 'd', id) = toDate('2025-01-01') ORDER BY id, payload; +SELECT 'dictGetDate, opt off'; +SELECT id, payload FROM tab +WHERE dictGetDate('dictionary_all', 'd', id) = toDate('2025-01-01') +ORDER BY id, payload +SETTINGS optimize_inverse_dictionary_lookup = 0; SELECT 'dictGetDateTime - plan'; EXPLAIN SYNTAX run_query_tree_passes=1 @@ -153,6 +184,11 @@ SELECT 'dictGetDateTime'; SELECT id, payload FROM tab WHERE dictGetDateTime('dictionary_all', 'dt', id) = toDateTime('2025-01-01 10:00:00') ORDER BY id, payload; +SELECT 'dictGetDateTime, opt off'; +SELECT id, payload FROM tab +WHERE dictGetDateTime('dictionary_all', 'dt', id) = toDateTime('2025-01-01 10:00:00') +ORDER BY id, payload +SETTINGS optimize_inverse_dictionary_lookup = 0; SELECT 'dictGetUUID - plan'; EXPLAIN SYNTAX run_query_tree_passes=1 @@ -164,6 +200,11 @@ SELECT 'dictGetUUID'; SELECT id, payload FROM tab WHERE dictGetUUID('dictionary_all', 'uid', id) = toUUID('00000000-0000-0000-0000-000000000001') ORDER BY id, payload; +SELECT 'dictGetUUID, opt off'; +SELECT id, payload FROM tab +WHERE dictGetUUID('dictionary_all', 'uid', id) = toUUID('00000000-0000-0000-0000-000000000001') +ORDER BY id, payload +SETTINGS optimize_inverse_dictionary_lookup = 0; SELECT 'dictGetIPv4 - plan'; EXPLAIN SYNTAX run_query_tree_passes=1 @@ -175,6 +216,11 @@ SELECT 'dictGetIPv4'; SELECT id, payload FROM tab WHERE dictGetIPv4('dictionary_all', 'ip4', id) = toIPv4('192.168.0.1') ORDER BY id, payload; +SELECT 'dictGetIPv4, opt off'; +SELECT id, payload FROM tab +WHERE dictGetIPv4('dictionary_all', 'ip4', id) = toIPv4('192.168.0.1') +ORDER BY id, payload +SETTINGS optimize_inverse_dictionary_lookup = 0; SELECT 'dictGetIPv6 - plan'; EXPLAIN SYNTAX run_query_tree_passes=1 @@ -186,25 +232,30 @@ SELECT 'dictGetIPv6'; SELECT id, payload FROM tab WHERE dictGetIPv6('dictionary_all', 'ip6', id) = toIPv6('2001:db8::1') ORDER BY id, payload; +SELECT 'dictGetIPv6, opt off'; +SELECT id, payload FROM tab +WHERE dictGetIPv6('dictionary_all', 'ip6', id) = toIPv6('2001:db8::1') +ORDER BY id, payload +SETTINGS optimize_inverse_dictionary_lookup = 0; -SELECT 'dictGetOrNull(String) - plan'; +SELECT 'dictGetOrNull(String), no rewrite (dictGetOrNull is not supported by the optimization) - plan'; EXPLAIN SYNTAX run_query_tree_passes=1 SELECT id, payload FROM tab WHERE dictGetOrNull('dictionary_all', 'name', id) = 'alpha' ORDER BY id, payload; -SELECT 'dictGetOrNull(String)'; +SELECT 'dictGetOrNull(String), no rewrite (dictGetOrNull is not supported by the optimization)'; SELECT id, payload FROM tab WHERE dictGetOrNull('dictionary_all', 'name', id) = 'alpha' ORDER BY id, payload; -SELECT 'dictGetOrNull(String) IS NULL - plan'; +SELECT 'dictGetOrNull(String) IS NULL, no rewrite (dictGetOrNull is not supported by the optimization) - plan'; EXPLAIN SYNTAX run_query_tree_passes=1 SELECT id, payload FROM tab WHERE isNull(dictGetOrNull('dictionary_all','name', id)) ORDER BY id, payload; -SELECT 'dictGetOrNull(String) IS NULL'; +SELECT 'dictGetOrNull(String) IS NULL, no rewrite (dictGetOrNull is not supported by the optimization)'; SELECT id, payload FROM tab WHERE isNull(dictGetOrNull('dictionary_all','name', id)) ORDER BY id, payload; diff --git a/tests/queries/0_stateless/03713_optimize_inverse_dictionary_lookup_setting_rewrite_in_to_join.reference b/tests/queries/0_stateless/03713_optimize_inverse_dictionary_lookup_setting_rewrite_in_to_join.reference index b77801083f21..d854aa4e79d0 100644 --- a/tests/queries/0_stateless/03713_optimize_inverse_dictionary_lookup_setting_rewrite_in_to_join.reference +++ b/tests/queries/0_stateless/03713_optimize_inverse_dictionary_lookup_setting_rewrite_in_to_join.reference @@ -10,3 +10,6 @@ ORDER BY Equality, LHS 1 a 3 c +Equality, LHS, opt off +1 a +3 c diff --git a/tests/queries/0_stateless/03713_optimize_inverse_dictionary_lookup_setting_rewrite_in_to_join.sql b/tests/queries/0_stateless/03713_optimize_inverse_dictionary_lookup_setting_rewrite_in_to_join.sql index cf313561f94f..4a43adbc6fcb 100644 --- a/tests/queries/0_stateless/03713_optimize_inverse_dictionary_lookup_setting_rewrite_in_to_join.sql +++ b/tests/queries/0_stateless/03713_optimize_inverse_dictionary_lookup_setting_rewrite_in_to_join.sql @@ -1,5 +1,6 @@ -- Tags: no-replicated-database, no-parallel-replicas --- no-parallel, no-parallel-replicas: Dictionary is not created in parallel replicas. +-- no-replicated-database: EXPLAIN output differs for replicated database. +-- no-parallel-replicas: Dictionary is not available on parallel-replica workers. SET enable_analyzer = 1; SET optimize_inverse_dictionary_lookup = 1; @@ -66,3 +67,9 @@ SELECT color_id, payload FROM t WHERE dictGetString('colors', 'name', color_id) = 'red' ORDER BY color_id, payload; +SELECT 'Equality, LHS, opt off'; +SELECT color_id, payload +FROM t +WHERE dictGetString('colors', 'name', color_id) = 'red' +ORDER BY color_id, payload +SETTINGS optimize_inverse_dictionary_lookup = 0; diff --git a/tests/queries/0_stateless/03787_optimize_inverse_dictionary_lookup_remote_shards.sql b/tests/queries/0_stateless/03787_optimize_inverse_dictionary_lookup_remote_shards.sql index ad870629e51f..197c435389c1 100644 --- a/tests/queries/0_stateless/03787_optimize_inverse_dictionary_lookup_remote_shards.sql +++ b/tests/queries/0_stateless/03787_optimize_inverse_dictionary_lookup_remote_shards.sql @@ -1,5 +1,6 @@ -- Tags: no-replicated-database, no-parallel-replicas --- no-parallel, no-parallel-replicas: Dictionary is not created in parallel replicas. +-- no-replicated-database: EXPLAIN output differs for replicated database. +-- no-parallel-replicas: Dictionary is not available on parallel-replica workers. SET enable_analyzer = 1; SET rewrite_in_to_join = 0; diff --git a/tests/queries/0_stateless/04201_inverse_dictionary_lookup_edge_cases.reference b/tests/queries/0_stateless/04201_inverse_dictionary_lookup_edge_cases.reference new file mode 100644 index 000000000000..60e646078f70 --- /dev/null +++ b/tests/queries/0_stateless/04201_inverse_dictionary_lookup_edge_cases.reference @@ -0,0 +1,344 @@ +dictGet = default, missing key, no rewrite (default -1 = -1) - plan +SELECT + __table1.id AS id, + dictGet(\'dict_int_default_minus1\', \'from\', toUInt64(__table1.id)) AS a, + dictGet(\'dict_int_default_minus1\', \'from\', toUInt64(__table1.id)) = -1 AS pred +FROM default.data_int AS __table1 +ORDER BY __table1.id ASC +SETTINGS optimize_inverse_dictionary_lookup = 1 +dictGet = default, missing key, no rewrite (default -1 = -1) +53 -1 1 +dictGet = default, Nullable source, no rewrite (default -1 = -1) - plan +SELECT + __table1.id AS id, + dictGet(\'dict_int_nullable_default_minus1\', \'from\', toUInt64(__table1.id)) AS a, + dictGet(\'dict_int_nullable_default_minus1\', \'from\', toUInt64(__table1.id)) = -1 AS pred +FROM default.data_int AS __table1 +ORDER BY __table1.id ASC +SETTINGS optimize_inverse_dictionary_lookup = 1 +dictGet = default, Nullable source, no rewrite (default -1 = -1) +53 -1 1 +dictGetOrNull = const, no rewrite (dictGetOrNull is not supported by the optimization) - plan +SELECT + __table1.id AS id, + dictGetOrNull(\'dict_string\', \'name\', __table1.id) = \'abc\' AS pred +FROM default.data_str AS __table1 +ORDER BY __table1.id ASC +SETTINGS optimize_inverse_dictionary_lookup = 1 +dictGetOrNull = const, no rewrite (dictGetOrNull is not supported by the optimization) +1 1 +2 \N +3 \N +dictGet != non-default, no rewrite (default empty string != abc) - plan +SELECT count() AS `count()` +FROM default.data_str AS __table1 +WHERE dictGet(\'dict_string\', \'name\', __table1.id) != \'abc\' +SETTINGS optimize_inverse_dictionary_lookup = 1 +dictGet != non-default, no rewrite (default empty string != abc) +2 +dictGet LIKE %, no rewrite (default empty string LIKE %) - plan +SELECT count() AS `count()` +FROM default.data_str AS __table1 +WHERE dictGet(\'dict_string\', \'name\', __table1.id) LIKE \'%\' +SETTINGS optimize_inverse_dictionary_lookup = 1 +dictGet LIKE %, no rewrite (default empty string LIKE %) +3 +Nullable key, dictGetString != abc, no rewrite (default empty string != abc) - plan +SELECT count() AS `count()` +FROM default.data_str_nullable AS __table1 +WHERE dictGetString(\'dict_string\', \'name\', __table1.id) != \'abc\' +SETTINGS optimize_inverse_dictionary_lookup = 1 +Nullable key, dictGetString != abc, no rewrite (default empty string != abc) +1 +LC(Nullable) simple key, dictGetString != abc, no rewrite (default empty string != abc) - plan +SELECT count() AS `count()` +FROM default.data_str_lc_nullable AS __table1 +WHERE dictGetString(\'dict_string\', \'name\', __table1.id) != \'abc\' +SETTINGS optimize_inverse_dictionary_lookup = 1 +LC(Nullable) simple key, dictGetString != abc, no rewrite (default empty string != abc) +1 +Composite key, Nullable component, no rewrite (skip to preserve dictGet throw on NULL component) - plan +SELECT + __table1.k1 AS k1, + dictGetString(\'dict_complex_str\', \'name\', (__table1.k1, __table1.k2)) = \'abc\' AS pred +FROM default.data_complex AS __table1 +ORDER BY __table1.k1 ASC +SETTINGS optimize_inverse_dictionary_lookup = 1 +Composite key, Nullable component, no rewrite (skip to preserve dictGet throw on NULL component) +1 1 +2 0 +Composite key, LC(Nullable) component, no rewrite (skip to preserve throw) - plan +SELECT + __table1.k1 AS k1, + dictGetString(\'dict_complex_str\', \'name\', (__table1.k1, __table1.k2)) = \'abc\' AS pred +FROM default.data_complex_lc_nullable AS __table1 +ORDER BY __table1.k1 ASC +SETTINGS optimize_inverse_dictionary_lookup = 1 +Composite key, LC(Nullable) component, no rewrite (skip to preserve throw) +1 1 +2 0 +Composite key, dict declares Nullable key, Nullable data, no rewrite - plan +SELECT count() AS `count()` +FROM default.data_complex_nullable_dict_key AS __table1 +WHERE dictGetString(\'dict_complex_nullable_dict_key\', \'name\', (__table1.k1, __table1.k2)) = \'nullhit\' +SETTINGS optimize_inverse_dictionary_lookup = 1 +Composite key, dict declares Nullable key, Nullable data, no rewrite +1 +Nullable(Tuple(Nullable(K))), no rewrite - plan +SELECT count() AS `count()` +FROM default.data_outer_nullable_tuple AS __table1 +WHERE dictGetString(\'dict_complex_nullable_dict_key\', \'name\', __table1.t) = \'nullhit\' +SETTINGS optimize_inverse_dictionary_lookup = 1 +Nullable(Tuple(Nullable(K))), no rewrite +1 +Nullable attr with stored NULL, isNull(predicate), no rewrite - plan +SELECT __table1.id AS id +FROM default.data_str AS __table1 +WHERE isNull(dictGet(\'dict_nullable_attr_stored\', \'name\', __table1.id) = \'abc\') +ORDER BY __table1.id ASC +SETTINGS optimize_inverse_dictionary_lookup = 1 +Nullable attr with stored NULL, isNull(predicate), no rewrite +2 +op equals, no rewrite +SELECT __table1.id AS id +FROM default.data_ops AS __table1 +WHERE dictGetUInt64(\'dict_ops\', \'n\', __table1.id) = 0 +ORDER BY __table1.id ASC +SETTINGS optimize_inverse_dictionary_lookup = 1 +op notEquals, no rewrite +SELECT __table1.id AS id +FROM default.data_ops AS __table1 +WHERE dictGetString(\'dict_ops\', \'name\', __table1.id) != \'zzz\' +ORDER BY __table1.id ASC +SETTINGS optimize_inverse_dictionary_lookup = 1 +op less, no rewrite +SELECT __table1.id AS id +FROM default.data_ops AS __table1 +WHERE dictGetUInt64(\'dict_ops\', \'n\', __table1.id) < 100 +ORDER BY __table1.id ASC +SETTINGS optimize_inverse_dictionary_lookup = 1 +op lessOrEquals, no rewrite +SELECT __table1.id AS id +FROM default.data_ops AS __table1 +WHERE dictGetUInt64(\'dict_ops\', \'n\', __table1.id) <= 0 +ORDER BY __table1.id ASC +SETTINGS optimize_inverse_dictionary_lookup = 1 +op greater, no rewrite +SELECT __table1.id AS id +FROM default.data_ops AS __table1 +WHERE \'apple\' > dictGetString(\'dict_ops\', \'name\', __table1.id) +ORDER BY __table1.id ASC +SETTINGS optimize_inverse_dictionary_lookup = 1 +op greaterOrEquals, no rewrite +SELECT __table1.id AS id +FROM default.data_ops AS __table1 +WHERE dictGetUInt64(\'dict_ops\', \'n\', __table1.id) >= 0 +ORDER BY __table1.id ASC +SETTINGS optimize_inverse_dictionary_lookup = 1 +op like, no rewrite +SELECT __table1.id AS id +FROM default.data_ops AS __table1 +WHERE dictGetString(\'dict_ops\', \'name\', __table1.id) LIKE \'%\' +ORDER BY __table1.id ASC +SETTINGS optimize_inverse_dictionary_lookup = 1 +op notLike, no rewrite +SELECT __table1.id AS id +FROM default.data_ops AS __table1 +WHERE dictGetString(\'dict_ops\', \'name\', __table1.id) NOT LIKE \'a%\' +ORDER BY __table1.id ASC +SETTINGS optimize_inverse_dictionary_lookup = 1 +op ilike, no rewrite +SELECT __table1.id AS id +FROM default.data_ops AS __table1 +WHERE dictGetString(\'dict_ops\', \'name\', __table1.id) ILIKE \'%\' +ORDER BY __table1.id ASC +SETTINGS optimize_inverse_dictionary_lookup = 1 +op notILike, no rewrite +SELECT __table1.id AS id +FROM default.data_ops AS __table1 +WHERE dictGetString(\'dict_ops\', \'name\', __table1.id) NOT ILIKE \'A%\' +ORDER BY __table1.id ASC +SETTINGS optimize_inverse_dictionary_lookup = 1 +op match, no rewrite +SELECT __table1.id AS id +FROM default.data_ops AS __table1 +WHERE match(dictGetString(\'dict_ops\', \'name\', __table1.id), \'.*\') +ORDER BY __table1.id ASC +SETTINGS optimize_inverse_dictionary_lookup = 1 +layout Flat, no rewrite +SELECT __table1.id AS id +FROM default.data_ops AS __table1 +WHERE dictGetString(\'dict_flat\', \'name\', __table1.id) LIKE \'%\' +ORDER BY __table1.id ASC +SETTINGS optimize_inverse_dictionary_lookup = 1 +layout Hashed, no rewrite +SELECT __table1.id AS id +FROM default.data_ops AS __table1 +WHERE dictGetString(\'dict_hashed\', \'name\', __table1.id) LIKE \'%\' +ORDER BY __table1.id ASC +SETTINGS optimize_inverse_dictionary_lookup = 1 +layout HashedArray, no rewrite +SELECT __table1.id AS id +FROM default.data_ops AS __table1 +WHERE dictGetString(\'dict_hashed_array\', \'name\', __table1.id) LIKE \'%\' +ORDER BY __table1.id ASC +SETTINGS optimize_inverse_dictionary_lookup = 1 +layout SparseHashed, no rewrite +SELECT __table1.id AS id +FROM default.data_ops AS __table1 +WHERE dictGetString(\'dict_sparse_hashed\', \'name\', __table1.id) LIKE \'%\' +ORDER BY __table1.id ASC +SETTINGS optimize_inverse_dictionary_lookup = 1 +layout ComplexKeyHashed, no rewrite +SELECT __table1.id AS id +FROM default.data_ops AS __table1 +WHERE dictGetString(\'dict_complex\', \'name\', (__table1.id, \'a\')) LIKE \'%\' +ORDER BY __table1.id ASC +SETTINGS optimize_inverse_dictionary_lookup = 1 +layout ComplexHashedArray, no rewrite +SELECT __table1.id AS id +FROM default.data_ops AS __table1 +WHERE dictGetString(\'dict_complex_array\', \'name\', (__table1.id, \'a\')) LIKE \'%\' +ORDER BY __table1.id ASC +SETTINGS optimize_inverse_dictionary_lookup = 1 +layout ComplexKeySparseHashed, no rewrite +SELECT __table1.id AS id +FROM default.data_ops AS __table1 +WHERE dictGetString(\'dict_complex_sparse\', \'name\', (__table1.id, \'a\')) LIKE \'%\' +ORDER BY __table1.id ASC +SETTINGS optimize_inverse_dictionary_lookup = 1 +rewrite: equals on non-default - plan +SELECT __table1.id AS id +FROM default.data_ops AS __table1 +WHERE __table1.id IN ( + SELECT __table1.id AS id + FROM dictionary(\'dict_hashed\') AS __table1 + WHERE __table1.name = \'red\' +) +ORDER BY __table1.id ASC +SETTINGS optimize_inverse_dictionary_lookup = 1 +rewrite: equals on non-default +1 +rewrite: equals on non-default, opt off +1 +rewrite: greater than non-default - plan +SELECT __table1.id AS id +FROM default.data_ops AS __table1 +WHERE __table1.id IN ( + SELECT __table1.id AS id + FROM dictionary(\'dict_ops\') AS __table1 + WHERE __table1.n > 100 +) +ORDER BY __table1.id ASC +SETTINGS optimize_inverse_dictionary_lookup = 1 +rewrite: greater than non-default +rewrite: greater than non-default, opt off +rewrite: != default (numeric) - plan +SELECT __table1.id AS id +FROM default.data_ops AS __table1 +WHERE __table1.id IN ( + SELECT __table1.id AS id + FROM dictionary(\'dict_ops\') AS __table1 + WHERE __table1.n != 0 +) +ORDER BY __table1.id ASC +SETTINGS optimize_inverse_dictionary_lookup = 1 +rewrite: != default (numeric) +1 +rewrite: != default (numeric), opt off +1 +rewrite: NOT LIKE default (string) - plan +SELECT __table1.id AS id +FROM default.data_ops AS __table1 +WHERE __table1.id IN ( + SELECT __table1.id AS id + FROM dictionary(\'dict_ops\') AS __table1 + WHERE __table1.name NOT LIKE \'\' +) +ORDER BY __table1.id ASC +SETTINGS optimize_inverse_dictionary_lookup = 1 +rewrite: NOT LIKE default (string) +1 +rewrite: NOT LIKE default (string), opt off +1 +rewrite: NOT ILIKE default (string) - plan +SELECT __table1.id AS id +FROM default.data_ops AS __table1 +WHERE __table1.id IN ( + SELECT __table1.id AS id + FROM dictionary(\'dict_ops\') AS __table1 + WHERE __table1.name NOT ILIKE \'\' +) +ORDER BY __table1.id ASC +SETTINGS optimize_inverse_dictionary_lookup = 1 +rewrite: NOT ILIKE default (string) +1 +rewrite: NOT ILIKE default (string), opt off +1 +rewrite: plain key - plan +SELECT count() AS `count()` +FROM default.data_ops AS __table1 +WHERE __table1.id IN ( + SELECT __table1.id AS id + FROM dictionary(\'dict_ops\') AS __table1 + WHERE __table1.name = \'apple\' +) +SETTINGS optimize_inverse_dictionary_lookup = 1 +rewrite: plain key +1 +rewrite: plain key, opt off +1 +rewrite: Nullable(K) key - plan +SELECT count() AS `count()` +FROM default.data_str_nullable AS __table1 +WHERE __table1.id IN ( + SELECT __table1.id AS id + FROM dictionary(\'dict_string\') AS __table1 + WHERE __table1.name = \'abc\' +) +SETTINGS optimize_inverse_dictionary_lookup = 1 +rewrite: Nullable(K) key +1 +rewrite: Nullable(K) key, opt off +1 +rewrite: LC(Nullable(K)) key - plan +SELECT count() AS `count()` +FROM default.data_str_lc_nullable AS __table1 +WHERE _CAST((__table1.id IN ( + SELECT __table1.id AS id + FROM dictionary(\'dict_string\') AS __table1 + WHERE __table1.name = \'abc\' +)), \'LowCardinality(Nullable(UInt8))\') +SETTINGS optimize_inverse_dictionary_lookup = 1 +rewrite: LC(Nullable(K)) key +1 +rewrite: LC(Nullable(K)) key, opt off +1 +short-circuited bad-regex match: optimization on, returns 0 +0 +short-circuited bad-regex match: optimization on, returns 0, opt off +0 +short-circuited bad-regex match, no rewrite - plan +SELECT count() AS `count()` +FROM default.data_ops AS __table1 +WHERE (__table1.id < 0) AND match(dictGetString(\'dict_ops\', \'name\', __table1.id), \'[unclosed\') +SETTINGS optimize_inverse_dictionary_lookup = 1 +max_rows_in_set + break, no rewrite - plan +SELECT groupArray(__table1.id) AS `groupArray(id)` +FROM default.data_set_limit AS __table1 +WHERE dictGetString(\'dict_set_limit\', \'name\', __table1.id) = \'hit\' +SETTINGS optimize_inverse_dictionary_lookup = 1, max_rows_in_set = 1, set_overflow_mode = \'break\' +max_rows_in_set + break, no rewrite +[1,2] +max_rows_in_set + throw, rewrite - plan +SELECT groupArray(__table1.id) AS `groupArray(id)` +FROM default.data_set_limit AS __table1 +WHERE __table1.id IN ( + SELECT __table1.id AS id + FROM dictionary(\'dict_set_limit\') AS __table1 + WHERE __table1.name = \'hit\' +) +SETTINGS optimize_inverse_dictionary_lookup = 1, max_rows_in_set = 1, set_overflow_mode = \'throw\' +max_rows_in_set + throw: execution raises SET_SIZE_LIMIT_EXCEEDED +max_bytes_in_set + break, no rewrite +[1,2] diff --git a/tests/queries/0_stateless/04201_inverse_dictionary_lookup_edge_cases.sql b/tests/queries/0_stateless/04201_inverse_dictionary_lookup_edge_cases.sql new file mode 100644 index 000000000000..992cad27fc05 --- /dev/null +++ b/tests/queries/0_stateless/04201_inverse_dictionary_lookup_edge_cases.sql @@ -0,0 +1,475 @@ +-- Tags: no-replicated-database, no-parallel-replicas +-- no-replicated-database: EXPLAIN output differs for replicated database. +-- no-parallel-replicas: Dictionary is not available on parallel-replica workers. + +SET optimize_or_like_chain = 0; +SET optimize_rewrite_like_perfect_affix = 0; + +DROP DICTIONARY IF EXISTS dict_int_default_minus1; +DROP DICTIONARY IF EXISTS dict_int_nullable_default_minus1; +DROP DICTIONARY IF EXISTS dict_string; +DROP DICTIONARY IF EXISTS dict_complex_str; +DROP DICTIONARY IF EXISTS dict_nullable_attr_stored; +DROP DICTIONARY IF EXISTS dict_complex_nullable_dict_key; +DROP DICTIONARY IF EXISTS dict_set_limit; +DROP DICTIONARY IF EXISTS dict_ops; +DROP DICTIONARY IF EXISTS dict_flat; +DROP DICTIONARY IF EXISTS dict_hashed; +DROP DICTIONARY IF EXISTS dict_hashed_array; +DROP DICTIONARY IF EXISTS dict_sparse_hashed; +DROP DICTIONARY IF EXISTS dict_complex; +DROP DICTIONARY IF EXISTS dict_complex_array; +DROP DICTIONARY IF EXISTS dict_complex_sparse; +DROP TABLE IF EXISTS ref_int; +DROP TABLE IF EXISTS ref_int_nullable; +DROP TABLE IF EXISTS ref_str; +DROP TABLE IF EXISTS ref_complex_str; +DROP TABLE IF EXISTS ref_nullable_attr_stored; +DROP TABLE IF EXISTS ref_complex_nullable_dict_key; +DROP TABLE IF EXISTS ref_set_limit; +DROP TABLE IF EXISTS ref_ops; +DROP TABLE IF EXISTS ref_layouts_simple; +DROP TABLE IF EXISTS ref_layouts_complex; +DROP TABLE IF EXISTS data_int; +DROP TABLE IF EXISTS data_str; +DROP TABLE IF EXISTS data_str_nullable; +DROP TABLE IF EXISTS data_str_lc_nullable; +DROP TABLE IF EXISTS data_complex; +DROP TABLE IF EXISTS data_complex_lc_nullable; +DROP TABLE IF EXISTS data_complex_single_lc; +DROP TABLE IF EXISTS data_complex_nullable_dict_key; +DROP TABLE IF EXISTS data_outer_nullable_tuple; +DROP TABLE IF EXISTS data_set_limit; +DROP TABLE IF EXISTS data_ops; + +CREATE TABLE ref_int (`to` UInt64, `from` Int32) ENGINE = MergeTree ORDER BY `to`; +INSERT INTO ref_int VALUES (42, 9289150); + +CREATE TABLE ref_int_nullable (`to` UInt64, `from` Nullable(Int32)) ENGINE = MergeTree ORDER BY `to`; +INSERT INTO ref_int_nullable VALUES (42, 9289150); + +CREATE TABLE ref_str (id UInt64, name String) ENGINE = MergeTree ORDER BY id; +INSERT INTO ref_str VALUES (1, 'abc'); + +CREATE DICTIONARY dict_int_default_minus1 (`to` UInt64, `from` Int32 DEFAULT -1) +PRIMARY KEY `to` SOURCE(CLICKHOUSE(TABLE 'ref_int')) LAYOUT(HASHED()) LIFETIME(0); + +CREATE DICTIONARY dict_int_nullable_default_minus1 (`to` UInt64, `from` Int32 DEFAULT -1) +PRIMARY KEY `to` SOURCE(CLICKHOUSE(TABLE 'ref_int_nullable')) LAYOUT(HASHED()) LIFETIME(0); + +CREATE DICTIONARY dict_string (id UInt64, name String) +PRIMARY KEY id SOURCE(CLICKHOUSE(TABLE 'ref_str')) LAYOUT(HASHED()) LIFETIME(0); + +CREATE TABLE data_int (id Int32) ENGINE = MergeTree ORDER BY id; +INSERT INTO data_int VALUES (53); + +CREATE TABLE data_str (id UInt64) ENGINE = MergeTree ORDER BY id; +INSERT INTO data_str VALUES (1), (2), (3); + +CREATE TABLE data_str_nullable (id Nullable(UInt64)) ENGINE = MergeTree ORDER BY tuple(); +INSERT INTO data_str_nullable VALUES (1), (2), (NULL); + +CREATE TABLE data_str_lc_nullable (id LowCardinality(Nullable(UInt64))) ENGINE = MergeTree ORDER BY tuple() +SETTINGS allow_suspicious_low_cardinality_types = 1; +INSERT INTO data_str_lc_nullable VALUES (1), (2), (NULL); + + +SELECT 'dictGet = default, missing key, no rewrite (default -1 = -1) - plan'; +EXPLAIN SYNTAX run_query_tree_passes=1 +SELECT id, dictGet('dict_int_default_minus1', 'from', toUInt64(id)) AS a, a = -1 AS pred FROM data_int ORDER BY id +SETTINGS optimize_inverse_dictionary_lookup = 1; + +SELECT 'dictGet = default, missing key, no rewrite (default -1 = -1)'; +SELECT id, dictGet('dict_int_default_minus1', 'from', toUInt64(id)) AS a, a = -1 AS pred FROM data_int ORDER BY id +SETTINGS optimize_inverse_dictionary_lookup = 1; + +SELECT 'dictGet = default, Nullable source, no rewrite (default -1 = -1) - plan'; +EXPLAIN SYNTAX run_query_tree_passes=1 +SELECT id, dictGet('dict_int_nullable_default_minus1', 'from', toUInt64(id)) AS a, a = -1 AS pred FROM data_int ORDER BY id +SETTINGS optimize_inverse_dictionary_lookup = 1; + +SELECT 'dictGet = default, Nullable source, no rewrite (default -1 = -1)'; +SELECT id, dictGet('dict_int_nullable_default_minus1', 'from', toUInt64(id)) AS a, a = -1 AS pred FROM data_int ORDER BY id +SETTINGS optimize_inverse_dictionary_lookup = 1; + +SELECT 'dictGetOrNull = const, no rewrite (dictGetOrNull is not supported by the optimization) - plan'; +EXPLAIN SYNTAX run_query_tree_passes=1 +SELECT id, dictGetOrNull('dict_string', 'name', id) = 'abc' AS pred FROM data_str ORDER BY id +SETTINGS optimize_inverse_dictionary_lookup = 1; + +SELECT 'dictGetOrNull = const, no rewrite (dictGetOrNull is not supported by the optimization)'; +SELECT id, dictGetOrNull('dict_string', 'name', id) = 'abc' AS pred FROM data_str ORDER BY id +SETTINGS optimize_inverse_dictionary_lookup = 1; + +SELECT 'dictGet != non-default, no rewrite (default empty string != abc) - plan'; +EXPLAIN SYNTAX run_query_tree_passes=1 +SELECT count() FROM data_str WHERE dictGet('dict_string', 'name', id) != 'abc' +SETTINGS optimize_inverse_dictionary_lookup = 1; + +SELECT 'dictGet != non-default, no rewrite (default empty string != abc)'; +SELECT count() FROM data_str WHERE dictGet('dict_string', 'name', id) != 'abc' +SETTINGS optimize_inverse_dictionary_lookup = 1; + +SELECT 'dictGet LIKE %, no rewrite (default empty string LIKE %) - plan'; +EXPLAIN SYNTAX run_query_tree_passes=1 +SELECT count() FROM data_str WHERE dictGet('dict_string', 'name', id) LIKE '%' +SETTINGS optimize_inverse_dictionary_lookup = 1; + +SELECT 'dictGet LIKE %, no rewrite (default empty string LIKE %)'; +SELECT count() FROM data_str WHERE dictGet('dict_string', 'name', id) LIKE '%' +SETTINGS optimize_inverse_dictionary_lookup = 1; + +SELECT 'Nullable key, dictGetString != abc, no rewrite (default empty string != abc) - plan'; +EXPLAIN SYNTAX run_query_tree_passes=1 +SELECT count() FROM data_str_nullable WHERE dictGetString('dict_string', 'name', id) != 'abc' +SETTINGS optimize_inverse_dictionary_lookup = 1; + +SELECT 'Nullable key, dictGetString != abc, no rewrite (default empty string != abc)'; +SELECT count() FROM data_str_nullable WHERE dictGetString('dict_string', 'name', id) != 'abc' +SETTINGS optimize_inverse_dictionary_lookup = 1; + +SELECT 'LC(Nullable) simple key, dictGetString != abc, no rewrite (default empty string != abc) - plan'; +EXPLAIN SYNTAX run_query_tree_passes=1 +SELECT count() FROM data_str_lc_nullable WHERE dictGetString('dict_string', 'name', id) != 'abc' +SETTINGS optimize_inverse_dictionary_lookup = 1; + +SELECT 'LC(Nullable) simple key, dictGetString != abc, no rewrite (default empty string != abc)'; +SELECT count() FROM data_str_lc_nullable WHERE dictGetString('dict_string', 'name', id) != 'abc' +SETTINGS optimize_inverse_dictionary_lookup = 1; + +CREATE TABLE ref_complex_str (k1 UInt64, k2 String, name String) ENGINE = MergeTree ORDER BY (k1, k2); +INSERT INTO ref_complex_str VALUES (1, 'a', 'abc'); +CREATE DICTIONARY dict_complex_str (k1 UInt64, k2 String, name String) +PRIMARY KEY k1, k2 SOURCE(CLICKHOUSE(TABLE 'ref_complex_str')) LAYOUT(COMPLEX_KEY_HASHED()) LIFETIME(0); +CREATE TABLE data_complex (k1 Nullable(UInt64), k2 String) ENGINE = MergeTree ORDER BY tuple(); +INSERT INTO data_complex VALUES (1, 'a'), (2, 'a'); + +SELECT 'Composite key, Nullable component, no rewrite (skip to preserve dictGet throw on NULL component) - plan'; +EXPLAIN SYNTAX run_query_tree_passes=1 +SELECT k1, dictGetString('dict_complex_str', 'name', (k1, k2)) = 'abc' AS pred FROM data_complex ORDER BY k1 +SETTINGS optimize_inverse_dictionary_lookup = 1; + +SELECT 'Composite key, Nullable component, no rewrite (skip to preserve dictGet throw on NULL component)'; +SELECT k1, dictGetString('dict_complex_str', 'name', (k1, k2)) = 'abc' AS pred FROM data_complex ORDER BY k1 +SETTINGS optimize_inverse_dictionary_lookup = 1; + +CREATE TABLE data_complex_lc_nullable (k1 LowCardinality(Nullable(UInt64)), k2 String) ENGINE = MergeTree ORDER BY tuple() +SETTINGS allow_suspicious_low_cardinality_types = 1; +INSERT INTO data_complex_lc_nullable VALUES (1, 'a'), (2, 'a'); + +SELECT 'Composite key, LC(Nullable) component, no rewrite (skip to preserve throw) - plan'; +EXPLAIN SYNTAX run_query_tree_passes=1 +SELECT k1, dictGetString('dict_complex_str', 'name', (k1, k2)) = 'abc' AS pred FROM data_complex_lc_nullable ORDER BY k1 +SETTINGS optimize_inverse_dictionary_lookup = 1; + +SELECT 'Composite key, LC(Nullable) component, no rewrite (skip to preserve throw)'; +SELECT k1, dictGetString('dict_complex_str', 'name', (k1, k2)) = 'abc' AS pred FROM data_complex_lc_nullable ORDER BY k1 +SETTINGS optimize_inverse_dictionary_lookup = 1; + +CREATE TABLE ref_complex_nullable_dict_key (k1 Nullable(UInt64), k2 String, name String) ENGINE = MergeTree ORDER BY tuple(); +INSERT INTO ref_complex_nullable_dict_key VALUES (NULL, 'x', 'nullhit'), (1, 'x', 'one'); +CREATE DICTIONARY dict_complex_nullable_dict_key (k1 Nullable(UInt64), k2 String, name String) +PRIMARY KEY k1, k2 SOURCE(CLICKHOUSE(TABLE 'ref_complex_nullable_dict_key')) LAYOUT(COMPLEX_KEY_HASHED()) LIFETIME(0); +CREATE TABLE data_complex_nullable_dict_key (k1 Nullable(UInt64), k2 String) ENGINE = MergeTree ORDER BY tuple(); +INSERT INTO data_complex_nullable_dict_key VALUES (NULL, 'x'), (1, 'x'), (2, 'x'); + +SELECT 'Composite key, dict declares Nullable key, Nullable data, no rewrite - plan'; +EXPLAIN SYNTAX run_query_tree_passes=1 +SELECT count() FROM data_complex_nullable_dict_key WHERE dictGetString('dict_complex_nullable_dict_key', 'name', (k1, k2)) = 'nullhit' +SETTINGS optimize_inverse_dictionary_lookup = 1; + +SELECT 'Composite key, dict declares Nullable key, Nullable data, no rewrite'; +SELECT count() FROM data_complex_nullable_dict_key WHERE dictGetString('dict_complex_nullable_dict_key', 'name', (k1, k2)) = 'nullhit' +SETTINGS optimize_inverse_dictionary_lookup = 1; + +CREATE TABLE data_outer_nullable_tuple (t Nullable(Tuple(Nullable(UInt64), String))) ENGINE = MergeTree ORDER BY tuple() +SETTINGS allow_experimental_nullable_tuple_type = 1; +INSERT INTO data_outer_nullable_tuple VALUES ((NULL, 'x')), ((1, 'x')); + +SELECT 'Nullable(Tuple(Nullable(K))), no rewrite - plan'; +EXPLAIN SYNTAX run_query_tree_passes=1 +SELECT count() FROM data_outer_nullable_tuple WHERE dictGetString('dict_complex_nullable_dict_key', 'name', t) = 'nullhit' +SETTINGS optimize_inverse_dictionary_lookup = 1; + +SELECT 'Nullable(Tuple(Nullable(K))), no rewrite'; +SELECT count() FROM data_outer_nullable_tuple WHERE dictGetString('dict_complex_nullable_dict_key', 'name', t) = 'nullhit' +SETTINGS optimize_inverse_dictionary_lookup = 1; + +CREATE TABLE ref_nullable_attr_stored (id UInt64, name Nullable(String)) ENGINE = MergeTree ORDER BY id; +INSERT INTO ref_nullable_attr_stored VALUES (1, 'abc'), (2, NULL); +CREATE DICTIONARY dict_nullable_attr_stored (id UInt64, name Nullable(String) DEFAULT 'missing') +PRIMARY KEY id SOURCE(CLICKHOUSE(TABLE 'ref_nullable_attr_stored')) LAYOUT(HASHED()) LIFETIME(0); + +SELECT 'Nullable attr with stored NULL, isNull(predicate), no rewrite - plan'; +EXPLAIN SYNTAX run_query_tree_passes=1 +SELECT id FROM data_str WHERE isNull(dictGet('dict_nullable_attr_stored', 'name', id) = 'abc') ORDER BY id +SETTINGS optimize_inverse_dictionary_lookup = 1; + +SELECT 'Nullable attr with stored NULL, isNull(predicate), no rewrite'; +SELECT id FROM data_str WHERE isNull(dictGet('dict_nullable_attr_stored', 'name', id) = 'abc') ORDER BY id +SETTINGS optimize_inverse_dictionary_lookup = 1; + +-- Per-operator no-rewrite examples (default = '' for String, default = 0 for UInt64) +CREATE TABLE ref_ops (id UInt64, name String, n UInt64) ENGINE = MergeTree ORDER BY id; +INSERT INTO ref_ops VALUES (1, 'apple', 5); +CREATE DICTIONARY dict_ops (id UInt64, name String, n UInt64) +PRIMARY KEY id SOURCE(CLICKHOUSE(TABLE 'ref_ops')) LAYOUT(HASHED()) LIFETIME(0); +CREATE TABLE data_ops (id UInt64) ENGINE = MergeTree ORDER BY id; +INSERT INTO data_ops VALUES (1), (2); + +SELECT 'op equals, no rewrite'; +EXPLAIN SYNTAX run_query_tree_passes=1 SELECT id FROM data_ops WHERE dictGetUInt64('dict_ops', 'n', id) = 0 ORDER BY id +SETTINGS optimize_inverse_dictionary_lookup = 1; + +SELECT 'op notEquals, no rewrite'; +EXPLAIN SYNTAX run_query_tree_passes=1 SELECT id FROM data_ops WHERE dictGetString('dict_ops', 'name', id) != 'zzz' ORDER BY id +SETTINGS optimize_inverse_dictionary_lookup = 1; + +SELECT 'op less, no rewrite'; +EXPLAIN SYNTAX run_query_tree_passes=1 SELECT id FROM data_ops WHERE dictGetUInt64('dict_ops', 'n', id) < 100 ORDER BY id +SETTINGS optimize_inverse_dictionary_lookup = 1; + +SELECT 'op lessOrEquals, no rewrite'; +EXPLAIN SYNTAX run_query_tree_passes=1 SELECT id FROM data_ops WHERE dictGetUInt64('dict_ops', 'n', id) <= 0 ORDER BY id +SETTINGS optimize_inverse_dictionary_lookup = 1; + +SELECT 'op greater, no rewrite'; +EXPLAIN SYNTAX run_query_tree_passes=1 SELECT id FROM data_ops WHERE 'apple' > dictGetString('dict_ops', 'name', id) ORDER BY id +SETTINGS optimize_inverse_dictionary_lookup = 1; + +SELECT 'op greaterOrEquals, no rewrite'; +EXPLAIN SYNTAX run_query_tree_passes=1 SELECT id FROM data_ops WHERE dictGetUInt64('dict_ops', 'n', id) >= 0 ORDER BY id +SETTINGS optimize_inverse_dictionary_lookup = 1; + +SELECT 'op like, no rewrite'; +EXPLAIN SYNTAX run_query_tree_passes=1 SELECT id FROM data_ops WHERE dictGetString('dict_ops', 'name', id) LIKE '%' ORDER BY id +SETTINGS optimize_inverse_dictionary_lookup = 1; + +SELECT 'op notLike, no rewrite'; +EXPLAIN SYNTAX run_query_tree_passes=1 SELECT id FROM data_ops WHERE dictGetString('dict_ops', 'name', id) NOT LIKE 'a%' ORDER BY id +SETTINGS optimize_inverse_dictionary_lookup = 1; + +SELECT 'op ilike, no rewrite'; +EXPLAIN SYNTAX run_query_tree_passes=1 SELECT id FROM data_ops WHERE dictGetString('dict_ops', 'name', id) ILIKE '%' ORDER BY id +SETTINGS optimize_inverse_dictionary_lookup = 1; + +SELECT 'op notILike, no rewrite'; +EXPLAIN SYNTAX run_query_tree_passes=1 SELECT id FROM data_ops WHERE dictGetString('dict_ops', 'name', id) NOT ILIKE 'A%' ORDER BY id +SETTINGS optimize_inverse_dictionary_lookup = 1; + +SELECT 'op match, no rewrite'; +EXPLAIN SYNTAX run_query_tree_passes=1 SELECT id FROM data_ops WHERE match(dictGetString('dict_ops', 'name', id), '.*') ORDER BY id +SETTINGS optimize_inverse_dictionary_lookup = 1; + + +-- Per-layout no-rewrite examples (predicate `dictGetString... LIKE '%'` always skips) +CREATE TABLE ref_layouts_simple (id UInt64, name String) ENGINE = MergeTree ORDER BY id; +INSERT INTO ref_layouts_simple VALUES (1, 'red'); +CREATE TABLE ref_layouts_complex (k1 UInt64, k2 String, name String) ENGINE = MergeTree ORDER BY (k1, k2); +INSERT INTO ref_layouts_complex VALUES (1, 'a', 'red'); + +CREATE DICTIONARY dict_flat (id UInt64, name String) PRIMARY KEY id SOURCE(CLICKHOUSE(TABLE 'ref_layouts_simple')) LAYOUT(FLAT()) LIFETIME(0); +CREATE DICTIONARY dict_hashed (id UInt64, name String) PRIMARY KEY id SOURCE(CLICKHOUSE(TABLE 'ref_layouts_simple')) LAYOUT(HASHED()) LIFETIME(0); +CREATE DICTIONARY dict_hashed_array (id UInt64, name String) PRIMARY KEY id SOURCE(CLICKHOUSE(TABLE 'ref_layouts_simple')) LAYOUT(HASHED_ARRAY()) LIFETIME(0); +CREATE DICTIONARY dict_sparse_hashed (id UInt64, name String) PRIMARY KEY id SOURCE(CLICKHOUSE(TABLE 'ref_layouts_simple')) LAYOUT(SPARSE_HASHED()) LIFETIME(0); +CREATE DICTIONARY dict_complex (k1 UInt64, k2 String, name String) PRIMARY KEY k1, k2 SOURCE(CLICKHOUSE(TABLE 'ref_layouts_complex')) LAYOUT(COMPLEX_KEY_HASHED()) LIFETIME(0); +CREATE DICTIONARY dict_complex_array (k1 UInt64, k2 String, name String) PRIMARY KEY k1, k2 SOURCE(CLICKHOUSE(TABLE 'ref_layouts_complex')) LAYOUT(COMPLEX_KEY_HASHED_ARRAY()) LIFETIME(0); +CREATE DICTIONARY dict_complex_sparse (k1 UInt64, k2 String, name String) PRIMARY KEY k1, k2 SOURCE(CLICKHOUSE(TABLE 'ref_layouts_complex')) LAYOUT(COMPLEX_KEY_SPARSE_HASHED()) LIFETIME(0); + +SELECT 'layout Flat, no rewrite'; +EXPLAIN SYNTAX run_query_tree_passes=1 SELECT id FROM data_ops WHERE dictGetString('dict_flat', 'name', id) LIKE '%' ORDER BY id +SETTINGS optimize_inverse_dictionary_lookup = 1; + +SELECT 'layout Hashed, no rewrite'; +EXPLAIN SYNTAX run_query_tree_passes=1 SELECT id FROM data_ops WHERE dictGetString('dict_hashed', 'name', id) LIKE '%' ORDER BY id +SETTINGS optimize_inverse_dictionary_lookup = 1; + +SELECT 'layout HashedArray, no rewrite'; +EXPLAIN SYNTAX run_query_tree_passes=1 SELECT id FROM data_ops WHERE dictGetString('dict_hashed_array', 'name', id) LIKE '%' ORDER BY id +SETTINGS optimize_inverse_dictionary_lookup = 1; + +SELECT 'layout SparseHashed, no rewrite'; +EXPLAIN SYNTAX run_query_tree_passes=1 SELECT id FROM data_ops WHERE dictGetString('dict_sparse_hashed', 'name', id) LIKE '%' ORDER BY id +SETTINGS optimize_inverse_dictionary_lookup = 1; + +SELECT 'layout ComplexKeyHashed, no rewrite'; +EXPLAIN SYNTAX run_query_tree_passes=1 SELECT id FROM data_ops WHERE dictGetString('dict_complex', 'name', (id, 'a')) LIKE '%' ORDER BY id +SETTINGS optimize_inverse_dictionary_lookup = 1; + +SELECT 'layout ComplexHashedArray, no rewrite'; +EXPLAIN SYNTAX run_query_tree_passes=1 SELECT id FROM data_ops WHERE dictGetString('dict_complex_array', 'name', (id, 'a')) LIKE '%' ORDER BY id +SETTINGS optimize_inverse_dictionary_lookup = 1; + +SELECT 'layout ComplexKeySparseHashed, no rewrite'; +EXPLAIN SYNTAX run_query_tree_passes=1 SELECT id FROM data_ops WHERE dictGetString('dict_complex_sparse', 'name', (id, 'a')) LIKE '%' ORDER BY id +SETTINGS optimize_inverse_dictionary_lookup = 1; + + +SELECT 'rewrite: equals on non-default - plan'; +EXPLAIN SYNTAX run_query_tree_passes=1 SELECT id FROM data_ops WHERE dictGetString('dict_hashed', 'name', id) = 'red' ORDER BY id +SETTINGS optimize_inverse_dictionary_lookup = 1; + +SELECT 'rewrite: equals on non-default'; +SELECT id FROM data_ops WHERE dictGetString('dict_hashed', 'name', id) = 'red' ORDER BY id +SETTINGS optimize_inverse_dictionary_lookup = 1; +SELECT 'rewrite: equals on non-default, opt off'; +SELECT id FROM data_ops WHERE dictGetString('dict_hashed', 'name', id) = 'red' ORDER BY id +SETTINGS optimize_inverse_dictionary_lookup = 0; + +SELECT 'rewrite: greater than non-default - plan'; +EXPLAIN SYNTAX run_query_tree_passes=1 SELECT id FROM data_ops WHERE dictGetUInt64('dict_ops', 'n', id) > 100 ORDER BY id +SETTINGS optimize_inverse_dictionary_lookup = 1; + +SELECT 'rewrite: greater than non-default'; +SELECT id FROM data_ops WHERE dictGetUInt64('dict_ops', 'n', id) > 100 ORDER BY id +SETTINGS optimize_inverse_dictionary_lookup = 1; +SELECT 'rewrite: greater than non-default, opt off'; +SELECT id FROM data_ops WHERE dictGetUInt64('dict_ops', 'n', id) > 100 ORDER BY id +SETTINGS optimize_inverse_dictionary_lookup = 0; + +SELECT 'rewrite: != default (numeric) - plan'; +EXPLAIN SYNTAX run_query_tree_passes=1 SELECT id FROM data_ops WHERE dictGetUInt64('dict_ops', 'n', id) != 0 ORDER BY id +SETTINGS optimize_inverse_dictionary_lookup = 1; + +SELECT 'rewrite: != default (numeric)'; +SELECT id FROM data_ops WHERE dictGetUInt64('dict_ops', 'n', id) != 0 ORDER BY id +SETTINGS optimize_inverse_dictionary_lookup = 1; +SELECT 'rewrite: != default (numeric), opt off'; +SELECT id FROM data_ops WHERE dictGetUInt64('dict_ops', 'n', id) != 0 ORDER BY id +SETTINGS optimize_inverse_dictionary_lookup = 0; + +SELECT 'rewrite: NOT LIKE default (string) - plan'; +EXPLAIN SYNTAX run_query_tree_passes=1 SELECT id FROM data_ops WHERE dictGetString('dict_ops', 'name', id) NOT LIKE '' ORDER BY id +SETTINGS optimize_inverse_dictionary_lookup = 1; + +SELECT 'rewrite: NOT LIKE default (string)'; +SELECT id FROM data_ops WHERE dictGetString('dict_ops', 'name', id) NOT LIKE '' ORDER BY id +SETTINGS optimize_inverse_dictionary_lookup = 1; +SELECT 'rewrite: NOT LIKE default (string), opt off'; +SELECT id FROM data_ops WHERE dictGetString('dict_ops', 'name', id) NOT LIKE '' ORDER BY id +SETTINGS optimize_inverse_dictionary_lookup = 0; + +SELECT 'rewrite: NOT ILIKE default (string) - plan'; +EXPLAIN SYNTAX run_query_tree_passes=1 SELECT id FROM data_ops WHERE dictGetString('dict_ops', 'name', id) NOT ILIKE '' ORDER BY id +SETTINGS optimize_inverse_dictionary_lookup = 1; + +SELECT 'rewrite: NOT ILIKE default (string)'; +SELECT id FROM data_ops WHERE dictGetString('dict_ops', 'name', id) NOT ILIKE '' ORDER BY id +SETTINGS optimize_inverse_dictionary_lookup = 1; +SELECT 'rewrite: NOT ILIKE default (string), opt off'; +SELECT id FROM data_ops WHERE dictGetString('dict_ops', 'name', id) NOT ILIKE '' ORDER BY id +SETTINGS optimize_inverse_dictionary_lookup = 0; + + +SELECT 'rewrite: plain key - plan'; +EXPLAIN SYNTAX run_query_tree_passes=1 SELECT count() FROM data_ops WHERE dictGetString('dict_ops', 'name', id) = 'apple' +SETTINGS optimize_inverse_dictionary_lookup = 1; + +SELECT 'rewrite: plain key'; +SELECT count() FROM data_ops WHERE dictGetString('dict_ops', 'name', id) = 'apple' +SETTINGS optimize_inverse_dictionary_lookup = 1; +SELECT 'rewrite: plain key, opt off'; +SELECT count() FROM data_ops WHERE dictGetString('dict_ops', 'name', id) = 'apple' +SETTINGS optimize_inverse_dictionary_lookup = 0; + +SELECT 'rewrite: Nullable(K) key - plan'; +EXPLAIN SYNTAX run_query_tree_passes=1 SELECT count() FROM data_str_nullable WHERE dictGetString('dict_string', 'name', id) = 'abc' +SETTINGS optimize_inverse_dictionary_lookup = 1; + +SELECT 'rewrite: Nullable(K) key'; +SELECT count() FROM data_str_nullable WHERE dictGetString('dict_string', 'name', id) = 'abc' +SETTINGS optimize_inverse_dictionary_lookup = 1; +SELECT 'rewrite: Nullable(K) key, opt off'; +SELECT count() FROM data_str_nullable WHERE dictGetString('dict_string', 'name', id) = 'abc' +SETTINGS optimize_inverse_dictionary_lookup = 0; + +SELECT 'rewrite: LC(Nullable(K)) key - plan'; +EXPLAIN SYNTAX run_query_tree_passes=1 SELECT count() FROM data_str_lc_nullable WHERE dictGetString('dict_string', 'name', id) = 'abc' +SETTINGS optimize_inverse_dictionary_lookup = 1; + +SELECT 'rewrite: LC(Nullable(K)) key'; +SELECT count() FROM data_str_lc_nullable WHERE dictGetString('dict_string', 'name', id) = 'abc' +SETTINGS optimize_inverse_dictionary_lookup = 1; +SELECT 'rewrite: LC(Nullable(K)) key, opt off'; +SELECT count() FROM data_str_lc_nullable WHERE dictGetString('dict_string', 'name', id) = 'abc' +SETTINGS optimize_inverse_dictionary_lookup = 0; + + +SELECT 'short-circuited bad-regex match: optimization on, returns 0'; +SELECT count() FROM data_ops WHERE id < 0 AND match(dictGetString('dict_ops', 'name', id), '[unclosed') +SETTINGS optimize_inverse_dictionary_lookup = 1; +SELECT 'short-circuited bad-regex match: optimization on, returns 0, opt off'; +SELECT count() FROM data_ops WHERE id < 0 AND match(dictGetString('dict_ops', 'name', id), '[unclosed') +SETTINGS optimize_inverse_dictionary_lookup = 0; + +SELECT 'short-circuited bad-regex match, no rewrite - plan'; +EXPLAIN SYNTAX run_query_tree_passes=1 +SELECT count() FROM data_ops WHERE id < 0 AND match(dictGetString('dict_ops', 'name', id), '[unclosed') +SETTINGS optimize_inverse_dictionary_lookup = 1; + + +-- max_rows_in_set / max_bytes_in_set with set_overflow_mode = 'break': skip to avoid silent truncation +CREATE TABLE ref_set_limit (id UInt64, name String) ENGINE = MergeTree ORDER BY id; +INSERT INTO ref_set_limit VALUES (1, 'hit'), (2, 'hit'), (3, 'miss'); +CREATE DICTIONARY dict_set_limit (id UInt64, name String) +PRIMARY KEY id SOURCE(CLICKHOUSE(TABLE 'ref_set_limit')) LAYOUT(HASHED()) LIFETIME(0); +CREATE TABLE data_set_limit (id UInt64) ENGINE = MergeTree ORDER BY id; +INSERT INTO data_set_limit VALUES (1), (2), (3); + +SELECT 'max_rows_in_set + break, no rewrite - plan'; +EXPLAIN SYNTAX run_query_tree_passes=1 +SELECT groupArray(id) FROM data_set_limit WHERE dictGetString('dict_set_limit', 'name', id) = 'hit' +SETTINGS optimize_inverse_dictionary_lookup = 1, max_rows_in_set = 1, set_overflow_mode = 'break'; + +SELECT 'max_rows_in_set + break, no rewrite'; +SELECT groupArray(id) FROM data_set_limit WHERE dictGetString('dict_set_limit', 'name', id) = 'hit' +SETTINGS optimize_inverse_dictionary_lookup = 1, max_rows_in_set = 1, set_overflow_mode = 'break'; +SELECT 'max_rows_in_set + throw, rewrite - plan'; +EXPLAIN SYNTAX run_query_tree_passes=1 +SELECT groupArray(id) FROM data_set_limit WHERE dictGetString('dict_set_limit', 'name', id) = 'hit' +SETTINGS optimize_inverse_dictionary_lookup = 1, max_rows_in_set = 1, set_overflow_mode = 'throw'; + +SELECT 'max_rows_in_set + throw: execution raises SET_SIZE_LIMIT_EXCEEDED'; +SELECT groupArray(id) FROM data_set_limit WHERE dictGetString('dict_set_limit', 'name', id) = 'hit' +SETTINGS optimize_inverse_dictionary_lookup = 1, max_rows_in_set = 1, set_overflow_mode = 'throw'; -- { serverError SET_SIZE_LIMIT_EXCEEDED } + +SELECT 'max_bytes_in_set + break, no rewrite'; +SELECT groupArray(id) FROM data_set_limit WHERE dictGetString('dict_set_limit', 'name', id) = 'hit' +SETTINGS optimize_inverse_dictionary_lookup = 1, max_bytes_in_set = 1, set_overflow_mode = 'break'; + + +DROP DICTIONARY dict_int_default_minus1; +DROP DICTIONARY dict_int_nullable_default_minus1; +DROP DICTIONARY dict_string; +DROP DICTIONARY dict_complex_str; +DROP DICTIONARY dict_nullable_attr_stored; +DROP DICTIONARY dict_complex_nullable_dict_key; +DROP DICTIONARY dict_set_limit; +DROP DICTIONARY dict_ops; +DROP DICTIONARY dict_flat; +DROP DICTIONARY dict_hashed; +DROP DICTIONARY dict_hashed_array; +DROP DICTIONARY dict_sparse_hashed; +DROP DICTIONARY dict_complex; +DROP DICTIONARY dict_complex_array; +DROP DICTIONARY dict_complex_sparse; +DROP TABLE ref_int; +DROP TABLE ref_int_nullable; +DROP TABLE ref_str; +DROP TABLE ref_complex_str; +DROP TABLE ref_nullable_attr_stored; +DROP TABLE ref_complex_nullable_dict_key; +DROP TABLE ref_set_limit; +DROP TABLE ref_ops; +DROP TABLE ref_layouts_simple; +DROP TABLE ref_layouts_complex; +DROP TABLE data_int; +DROP TABLE data_str; +DROP TABLE data_str_nullable; +DROP TABLE data_str_lc_nullable; +DROP TABLE data_complex; +DROP TABLE data_complex_lc_nullable; +DROP TABLE data_complex_nullable_dict_key; +DROP TABLE data_outer_nullable_tuple; +DROP TABLE data_set_limit; +DROP TABLE data_ops; diff --git a/tests/queries/0_stateless/04202_inverse_dictionary_lookup_pruning_key_condition.reference b/tests/queries/0_stateless/04202_inverse_dictionary_lookup_pruning_key_condition.reference new file mode 100644 index 000000000000..34de5e124215 --- /dev/null +++ b/tests/queries/0_stateless/04202_inverse_dictionary_lookup_pruning_key_condition.reference @@ -0,0 +1,63 @@ +-- { echo } + +SET enable_analyzer = 1; +SET optimize_or_like_chain = 0; +DROP TABLE IF EXISTS pruning_ref; +DROP TABLE IF EXISTS pruning_data; +DROP DICTIONARY IF EXISTS pruning_dict; +CREATE TABLE pruning_ref (id UInt64, name String) ENGINE = MergeTree ORDER BY id; +INSERT INTO pruning_ref VALUES (4242, 'match'); +CREATE DICTIONARY pruning_dict (id UInt64, name String) +PRIMARY KEY id SOURCE(CLICKHOUSE(TABLE 'pruning_ref')) LAYOUT(HASHED()) LIFETIME(0); +CREATE TABLE pruning_data (id UInt64) ENGINE = MergeTree ORDER BY id +SETTINGS index_granularity = 100, add_minmax_index_for_numeric_columns = 0; +SYSTEM STOP MERGES pruning_data; +INSERT INTO pruning_data SELECT number FROM numbers(10000); +-- With optimization: predicate becomes `id IN (SELECT id FROM dictionary(...) WHERE name = 'match')`. +SET optimize_inverse_dictionary_lookup = 1; +EXPLAIN indexes = 1 +SELECT count() FROM pruning_data +WHERE dictGetString('pruning_dict', 'name', id) = 'match'; +CreatingSets (Create sets before main query execution) + Expression ((Project names + Projection)) + Aggregating + Expression (Before GROUP BY) + Filter ((WHERE + Change column names to column identifiers)) + ReadFromMergeTree (default.pruning_data) + Indexes: + PrimaryKey + Keys: + id + Condition: (id in 1-element set) + Parts: 1/1 + Granules: 1/100 + Search Algorithm: binary search + Ranges: 1 +-- Without optimization: `dictGet(...)` is stays, so all granules are scanned. +SET optimize_inverse_dictionary_lookup = 0; +EXPLAIN indexes = 1 +SELECT count() FROM pruning_data +WHERE dictGetString('pruning_dict', 'name', id) = 'match'; +Expression ((Project names + Projection)) + Aggregating + Expression (Before GROUP BY) + Filter ((WHERE + Change column names to column identifiers)) + ReadFromMergeTree (default.pruning_data) + Indexes: + PrimaryKey + Condition: true + Parts: 1/1 + Granules: 100/100 + Ranges: 1 +-- Result are same +SET optimize_inverse_dictionary_lookup = 1; +SELECT count() FROM pruning_data +WHERE dictGetString('pruning_dict', 'name', id) = 'match'; +1 +SET optimize_inverse_dictionary_lookup = 0; +SELECT count() FROM pruning_data +WHERE dictGetString('pruning_dict', 'name', id) = 'match'; +1 +DROP DICTIONARY pruning_dict; +DROP TABLE pruning_data; +DROP TABLE pruning_ref; diff --git a/tests/queries/0_stateless/04202_inverse_dictionary_lookup_pruning_key_condition.sql b/tests/queries/0_stateless/04202_inverse_dictionary_lookup_pruning_key_condition.sql new file mode 100644 index 000000000000..54b6478f586b --- /dev/null +++ b/tests/queries/0_stateless/04202_inverse_dictionary_lookup_pruning_key_condition.sql @@ -0,0 +1,49 @@ +-- Tags: no-replicated-database, no-parallel-replicas, no-random-merge-tree-settings +-- no-replicated-database: EXPLAIN output differs for replicated database. +-- no-parallel-replicas: Dictionary is not available on parallel-replica workers. + +-- { echo } + +SET enable_analyzer = 1; +SET optimize_or_like_chain = 0; + +DROP TABLE IF EXISTS pruning_ref; +DROP TABLE IF EXISTS pruning_data; +DROP DICTIONARY IF EXISTS pruning_dict; + +CREATE TABLE pruning_ref (id UInt64, name String) ENGINE = MergeTree ORDER BY id; +INSERT INTO pruning_ref VALUES (4242, 'match'); + +CREATE DICTIONARY pruning_dict (id UInt64, name String) +PRIMARY KEY id SOURCE(CLICKHOUSE(TABLE 'pruning_ref')) LAYOUT(HASHED()) LIFETIME(0); + +CREATE TABLE pruning_data (id UInt64) ENGINE = MergeTree ORDER BY id +SETTINGS index_granularity = 100, add_minmax_index_for_numeric_columns = 0; + +SYSTEM STOP MERGES pruning_data; +INSERT INTO pruning_data SELECT number FROM numbers(10000); + +-- With optimization: predicate becomes `id IN (SELECT id FROM dictionary(...) WHERE name = 'match')`. +SET optimize_inverse_dictionary_lookup = 1; +EXPLAIN indexes = 1 +SELECT count() FROM pruning_data +WHERE dictGetString('pruning_dict', 'name', id) = 'match'; + +-- Without optimization: `dictGet(...)` is stays, so all granules are scanned. +SET optimize_inverse_dictionary_lookup = 0; +EXPLAIN indexes = 1 +SELECT count() FROM pruning_data +WHERE dictGetString('pruning_dict', 'name', id) = 'match'; + +-- Result are same +SET optimize_inverse_dictionary_lookup = 1; +SELECT count() FROM pruning_data +WHERE dictGetString('pruning_dict', 'name', id) = 'match'; + +SET optimize_inverse_dictionary_lookup = 0; +SELECT count() FROM pruning_data +WHERE dictGetString('pruning_dict', 'name', id) = 'match'; + +DROP DICTIONARY pruning_dict; +DROP TABLE pruning_data; +DROP TABLE pruning_ref; From 7f43038915ad8730b48cd6753732209f116ebd02 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Fri, 8 May 2026 11:45:53 +0000 Subject: [PATCH 07/41] Backport #103708 to 26.3: Fix skip-index matching for ALIAS columns with lambda + constants --- src/Storages/MergeTree/RPNBuilder.cpp | 106 ++++++++++-- ...ndex_alias_lambda_with_constants.reference | 10 ++ ...text_index_alias_lambda_with_constants.sql | 161 ++++++++++++++++++ 3 files changed, 261 insertions(+), 16 deletions(-) create mode 100644 tests/queries/0_stateless/04141_text_index_alias_lambda_with_constants.reference create mode 100644 tests/queries/0_stateless/04141_text_index_alias_lambda_with_constants.sql diff --git a/src/Storages/MergeTree/RPNBuilder.cpp b/src/Storages/MergeTree/RPNBuilder.cpp index 08fc2d2f208c..b7857abdefb9 100644 --- a/src/Storages/MergeTree/RPNBuilder.cpp +++ b/src/Storages/MergeTree/RPNBuilder.cpp @@ -47,7 +47,89 @@ namespace ErrorCodes namespace { -void appendColumnNameWithoutAlias(const ActionsDAG::Node & node, WriteBuffer & out, const ContextPtr & context, bool use_analyzer, bool legacy = false) +void appendColumnNameWithoutAlias(const ActionsDAG::Node & node, WriteBuffer & out, const ContextPtr & context, bool use_analyzer, bool legacy = false); + +/// Produces the lambda's column name in the AST format `lambda(tuple(args), body)`. +/// Used both for live FUNCTION nodes wrapping `ExecutableFunctionCapture`/`FunctionCapture` and for +/// constant-folded COLUMN nodes that hold a `ColumnConst` (e.g. when all captured +/// arguments are constants and `removeUnusedActions` collapsed the FUNCTION into a COLUMN). +void appendLambdaColumnName( + const LambdaCapture & capture, + ActionsDAG capture_dag, + WriteBuffer & out, + const ContextPtr & context, + bool use_analyzer, + bool legacy) +{ + writeString("lambda(tuple(", out); + bool first = true; + for (const auto & arg : capture.lambda_arguments) + { + if (!first) + writeCString(", ", out); + first = false; + + writeString(arg.name, out); + } + writeString("), ", out); + + ActionsDAGWithInversionPushDown inverted_capture_dag(capture_dag.getOutputs().at(0), context); + appendColumnNameWithoutAlias(*inverted_capture_dag.predicate, out, context, use_analyzer, legacy); + writeChar(')', out); +} + +/// For a constant-folded lambda (`ColumnConst` wrapping `ColumnFunction`), reconstruct the lambda +/// AST-format name. Returns true on success and writes the name to `out`. +bool tryAppendConstantFunctionColumnName( + const ActionsDAG::Node & node, + WriteBuffer & out, + const ContextPtr & context, + bool use_analyzer, + bool legacy) +{ + const auto * column_const = typeid_cast(node.column.get()); + if (!column_const) + return false; + + const auto * column_function = typeid_cast(&column_const->getDataColumn()); + if (!column_function) + return false; + + const auto * function_expression = typeid_cast(column_function->getFunction().get()); + if (!function_expression) + return false; + + const auto & capture = function_expression->getCapture(); + auto capture_dag = function_expression->getAcionsDAG().clone(); + + /// Stitch the captured constant columns into the body DAG so the body's input nodes + /// are resolved to actual constants. After `ActionsDAGWithInversionPushDown` rewrites + /// the constant column names to their AST form, the resulting name will match the one + /// produced for the index sample block (which was built via the old analyzer). + const auto & captured_columns = column_function->getCapturedColumns(); + if (!captured_columns.empty()) + { + if (captured_columns.size() != capture.captured_names.size()) + return false; + + ActionsDAG captured_columns_dag; + auto & outputs = captured_columns_dag.getOutputs(); + outputs.reserve(captured_columns.size()); + for (size_t i = 0; i < captured_columns.size(); ++i) + { + const auto & captured_node = captured_columns_dag.addColumn(captured_columns[i]); + const auto & alias_node = captured_columns_dag.addAlias(captured_node, capture.captured_names[i]); + outputs.push_back(&alias_node); + } + + capture_dag = ActionsDAG::merge(std::move(captured_columns_dag), std::move(capture_dag)); + } + + appendLambdaColumnName(capture, std::move(capture_dag), out, context, use_analyzer, legacy); + return true; +} + +void appendColumnNameWithoutAlias(const ActionsDAG::Node & node, WriteBuffer & out, const ContextPtr & context, bool use_analyzer, bool legacy) { switch (node.type) { @@ -56,6 +138,12 @@ void appendColumnNameWithoutAlias(const ActionsDAG::Node & node, WriteBuffer & o break; case ActionsDAG::ActionType::COLUMN: { + /// A constant-folded lambda is a `ColumnConst` of a `ColumnFunction`. Recover the + /// `lambda(tuple(args), body)` AST form so the name aligns with what the index sample + /// block produced for the same expression (the index goes through the old analyzer). + if (tryAppendConstantFunctionColumnName(node, out, context, use_analyzer, legacy)) + break; + /// If it was created from ASTLiteral, then result_name can be an alias. /// We need to convert value back to string here. const auto * column_const = typeid_cast(node.column.get()); @@ -89,21 +177,7 @@ void appendColumnNameWithoutAlias(const ActionsDAG::Node & node, WriteBuffer & o capture_dag = ActionsDAG::merge(std::move(captured_columns_dag), std::move(capture_dag)); } - writeString("lambda(tuple(", out); - bool first = true; - for (const auto & arg : capture->lambda_arguments) - { - if (!first) - writeCString(", ", out); - first = false; - - writeString(arg.name, out); - } - writeString("), ", out); - - ActionsDAGWithInversionPushDown inverted_capture_dag(capture_dag.getOutputs().at(0), context); - appendColumnNameWithoutAlias(*inverted_capture_dag.predicate, out, context, use_analyzer, legacy); - writeChar(')', out); + appendLambdaColumnName(*capture, std::move(capture_dag), out, context, use_analyzer, legacy); break; } else diff --git a/tests/queries/0_stateless/04141_text_index_alias_lambda_with_constants.reference b/tests/queries/0_stateless/04141_text_index_alias_lambda_with_constants.reference new file mode 100644 index 000000000000..9e75e50b57de --- /dev/null +++ b/tests/queries/0_stateless/04141_text_index_alias_lambda_with_constants.reference @@ -0,0 +1,10 @@ +splitByChar 1 +arrayMap 1 +concat-prefix 1 +bloom_filter arrayMap 1 +bloom_filter concat-prefix 1 +quoted lambda args 1 +keyword lambda arg text 1 +keyword lambda arg bloom_filter 1 +concatWithSeparator text 1 +concatWithSeparator bloom_filter 1 diff --git a/tests/queries/0_stateless/04141_text_index_alias_lambda_with_constants.sql b/tests/queries/0_stateless/04141_text_index_alias_lambda_with_constants.sql new file mode 100644 index 000000000000..5a928df30ebb --- /dev/null +++ b/tests/queries/0_stateless/04141_text_index_alias_lambda_with_constants.sql @@ -0,0 +1,161 @@ +-- Regression test: text index on an ALIAS column whose expression contains a +-- lambda with captured constants must be usable in queries. +-- +-- The lambda body materializes captured constants as a constant `ColumnFunction` +-- (e.g. `arrayMap((k, v) -> concat(k, '=', v), ...)` captures `'='`). The new +-- analyzer's column name for that constant carries an `_String` suffix and the +-- whole lambda is rendered as `k String, v String -> ...`, while the index +-- `sample_block` (built via the old analyzer) uses the AST form +-- `lambda(tuple(k, v), ...)` with bare literals. Without the fix +-- `header.has(...)` failed and the index was not used. + +DROP TABLE IF EXISTS user_favorites; + +CREATE TABLE user_favorites +( + user_id UInt64, + colors String, + color_map Map(String, String), + colors_text Array(String) ALIAS splitByChar(',', colors), + colors_kv Array(String) ALIAS arrayMap((k, v) -> concat(k, '=', v), mapKeys(color_map), mapValues(color_map)), + INDEX idx_colors_text colors_text TYPE text(tokenizer = 'array') GRANULARITY 100000000, + INDEX idx_colors_kv_text colors_kv TYPE text(tokenizer = 'array') GRANULARITY 100000000 +) +ENGINE = MergeTree +ORDER BY user_id; + +INSERT INTO user_favorites VALUES (1, 'favorite=red,second=blue', {'favorite': 'red', 'second': 'blue'}); +INSERT INTO user_favorites VALUES (2, 'favorite=green,favorite=blue', {'favorite': 'green', 'second': 'blue'}); + +-- The simple alias works (no lambda, no constants captured). +SELECT 'splitByChar', user_id FROM user_favorites WHERE has(colors_text, 'favorite=red') +SETTINGS force_data_skipping_indices = 'idx_colors_text'; + +-- The arrayMap alias used to be broken because of the captured `'='` constant. +SELECT 'arrayMap', user_id FROM user_favorites WHERE has(colors_kv, 'favorite=red') +SETTINGS force_data_skipping_indices = 'idx_colors_kv_text'; + +DROP TABLE user_favorites; + +-- Minimal reproducer: a lambda body that captures a constant breaks index matching +-- whenever the lambda is folded into a constant `ColumnFunction`. +DROP TABLE IF EXISTS t_arr; + +CREATE TABLE t_arr +( + id UInt64, + arr Array(String), + arr_prefixed Array(String) ALIAS arrayMap(s -> concat('-', s), arr), + INDEX idx_prefixed arr_prefixed TYPE text(tokenizer = 'array') GRANULARITY 100000000 +) +ENGINE = MergeTree +ORDER BY id; + +INSERT INTO t_arr VALUES (1, ['hello', 'world']); + +SELECT 'concat-prefix', id FROM t_arr WHERE has(arr_prefixed, '-hello') +SETTINGS force_data_skipping_indices = 'idx_prefixed'; + +DROP TABLE t_arr; + +-- Same column-name-matching path is taken by `bloom_filter`-family indices, so +-- guard them too: with the same captured-constant lambda an `ALIAS` column +-- bound to a `bloom_filter` index must still be selectable. +DROP TABLE IF EXISTS t_bf; + +CREATE TABLE t_bf +( + user_id UInt64, + color_map Map(String, String), + colors_kv Array(String) ALIAS arrayMap((k, v) -> concat(k, '=', v), mapKeys(color_map), mapValues(color_map)), + INDEX idx_kv_bf colors_kv TYPE bloom_filter GRANULARITY 100000000 +) +ENGINE = MergeTree +ORDER BY user_id; + +INSERT INTO t_bf VALUES (1, {'favorite': 'red', 'second': 'blue'}); + +SELECT 'bloom_filter arrayMap', user_id FROM t_bf WHERE has(colors_kv, 'favorite=red') +SETTINGS force_data_skipping_indices = 'idx_kv_bf'; + +DROP TABLE t_bf; + +DROP TABLE IF EXISTS t_bf_arr; + +CREATE TABLE t_bf_arr +( + id UInt64, + arr Array(String), + arr_prefixed Array(String) ALIAS arrayMap(s -> concat('-', s), arr), + INDEX idx_prefixed_bf arr_prefixed TYPE bloom_filter GRANULARITY 100000000 +) +ENGINE = MergeTree +ORDER BY id; + +INSERT INTO t_bf_arr VALUES (1, ['hello', 'world']); + +SELECT 'bloom_filter concat-prefix', id FROM t_bf_arr WHERE has(arr_prefixed, '-hello') +SETTINGS force_data_skipping_indices = 'idx_prefixed_bf'; + +DROP TABLE t_bf_arr; + +-- Lambda argument names that require backquoting (contain whitespace, or clash +-- with SQL keywords) must produce identical column names on the AST and DAG +-- sides, so index matching keeps working. +DROP TABLE IF EXISTS t_quoted_lambda; + +CREATE TABLE t_quoted_lambda +( + user_id UInt64, + color_map Map(String, String), + colors_kv Array(String) ALIAS arrayMap((`my key`, `my value`) -> concat(`my key`, '=', `my value`), mapKeys(color_map), mapValues(color_map)), + arr Array(String), + arr_keyword Array(String) ALIAS arrayMap(`select` -> concat('-', `select`), arr), + INDEX idx_kv_quoted colors_kv TYPE text(tokenizer = 'array') GRANULARITY 100000000, + INDEX idx_kw_text arr_keyword TYPE text(tokenizer = 'array') GRANULARITY 100000000, + INDEX idx_kw_bf arr_keyword TYPE bloom_filter GRANULARITY 100000000 +) +ENGINE = MergeTree +ORDER BY user_id; + +INSERT INTO t_quoted_lambda VALUES (1, {'favorite': 'red', 'second': 'blue'}, ['hello', 'world']); + +SELECT 'quoted lambda args', user_id FROM t_quoted_lambda WHERE has(colors_kv, 'favorite=red') +SETTINGS force_data_skipping_indices = 'idx_kv_quoted'; + +SELECT 'keyword lambda arg text', user_id FROM t_quoted_lambda WHERE has(arr_keyword, '-hello') +SETTINGS force_data_skipping_indices = 'idx_kw_text'; + +SELECT 'keyword lambda arg bloom_filter', user_id FROM t_quoted_lambda WHERE has(arr_keyword, '-hello') +SETTINGS force_data_skipping_indices = 'idx_kw_bf'; + +DROP TABLE t_quoted_lambda; + +-- A lambda with more than three captured arguments and a captured-constant +-- separator (`concatWithSeparator('-', x1, x2, x3, x4)`) exercises the same +-- folding path with a wider capture list and a different higher-order function. +DROP TABLE IF EXISTS t_cws; + +CREATE TABLE t_cws +( + id UInt64, + a Array(String), + b Array(String), + c Array(String), + d Array(String), + joined Array(String) ALIAS arrayMap((x1, x2, x3, x4) -> concatWithSeparator('-', x1, x2, x3, x4), a, b, c, d), + INDEX idx_joined_text joined TYPE text(tokenizer = 'array') GRANULARITY 100000000, + INDEX idx_joined_bf joined TYPE bloom_filter GRANULARITY 100000000 +) +ENGINE = MergeTree +ORDER BY id; + +INSERT INTO t_cws VALUES (1, ['hello', 'foo'], ['world', 'bar'], ['a', 'baz'], ['b', 'qux']); + +SELECT 'concatWithSeparator text', id FROM t_cws WHERE has(joined, 'hello-world-a-b') +SETTINGS force_data_skipping_indices = 'idx_joined_text'; + +SELECT 'concatWithSeparator bloom_filter', id FROM t_cws WHERE has(joined, 'hello-world-a-b') +SETTINGS force_data_skipping_indices = 'idx_joined_bf'; + +DROP TABLE t_cws; From 1269501babcc795368700abcdb1a068efcfb55e8 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Fri, 8 May 2026 12:50:46 +0000 Subject: [PATCH 08/41] Update autogenerated version to 26.3.10.60 and contributors --- cmake/autogenerated_versions.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmake/autogenerated_versions.txt b/cmake/autogenerated_versions.txt index 022b3cbb03c3..c5044472bdfa 100644 --- a/cmake/autogenerated_versions.txt +++ b/cmake/autogenerated_versions.txt @@ -6,7 +6,7 @@ SET(VERSION_REVISION 54518) SET(VERSION_MAJOR 26) SET(VERSION_MINOR 3) SET(VERSION_PATCH 11) -SET(VERSION_GITHASH e1c11930c28196f954a93287e43c1aa112c8c607) +SET(VERSION_GITHASH 6a6d2d137dfc14972ab1f77412a9e2ada5e0698f) SET(VERSION_DESCRIBE v26.3.11.1-lts) SET(VERSION_STRING 26.3.11.1) # end of autochange From f07e5995d7d72f85a7ea2ebb7d44391ed4531fb8 Mon Sep 17 00:00:00 2001 From: Max Kainov Date: Fri, 8 May 2026 16:10:21 +0200 Subject: [PATCH 09/41] Revert "Update autogenerated version to 26.3.10.60 and contributors" This reverts commit 1269501babcc795368700abcdb1a068efcfb55e8. --- cmake/autogenerated_versions.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmake/autogenerated_versions.txt b/cmake/autogenerated_versions.txt index c5044472bdfa..022b3cbb03c3 100644 --- a/cmake/autogenerated_versions.txt +++ b/cmake/autogenerated_versions.txt @@ -6,7 +6,7 @@ SET(VERSION_REVISION 54518) SET(VERSION_MAJOR 26) SET(VERSION_MINOR 3) SET(VERSION_PATCH 11) -SET(VERSION_GITHASH 6a6d2d137dfc14972ab1f77412a9e2ada5e0698f) +SET(VERSION_GITHASH e1c11930c28196f954a93287e43c1aa112c8c607) SET(VERSION_DESCRIBE v26.3.11.1-lts) SET(VERSION_STRING 26.3.11.1) # end of autochange From c2c7c0add94a980f02a9aba88bd1631387cd3795 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Fri, 8 May 2026 14:50:30 +0000 Subject: [PATCH 10/41] Backport #103384 to 26.3: Make a decision to skip backup of the target RMV table based on snapshot --- src/Backups/BackupEntriesCollector.cpp | 65 ++++++++++++++++++-------- src/Backups/BackupEntriesCollector.h | 7 ++- src/Backups/BackupUtils.cpp | 17 ------- src/Backups/BackupUtils.h | 5 -- 4 files changed, 51 insertions(+), 43 deletions(-) diff --git a/src/Backups/BackupEntriesCollector.cpp b/src/Backups/BackupEntriesCollector.cpp index 1698c6032f3d..f9e0c368325d 100644 --- a/src/Backups/BackupEntriesCollector.cpp +++ b/src/Backups/BackupEntriesCollector.cpp @@ -11,9 +11,12 @@ #include #include #include +#include #include #include #include +#include +#include #include #include #include @@ -524,6 +527,14 @@ void BackupEntriesCollector::gatherTablesMetadata() checkIsQueryCancelled(); table_infos.clear(); + + /// Collect target tables of refreshable materialized views that use the REPLACE + /// refresh strategy (APPEND is excluded) among the tables being backed up. + /// We build this snapshot from the storages we've already found, so the decision + /// in `shouldBackupTableData` doesn't need to query `DatabaseCatalog` again and + /// is scoped to the tables within the backup. + std::unordered_set rmv_replace_target_ids; + for (const auto & [database_name, database_info] : database_infos) { std::vector> db_tables = findTablesInDatabase(database_name); @@ -551,7 +562,6 @@ void BackupEntriesCollector::gatherTablesMetadata() / escapeForFileName(table_name_in_backup.table); } - /// Add information to `table_infos`. const auto qualified_name = QualifiedTableName{database_name, table_name}; auto & res_table_info = table_infos[qualified_name]; res_table_info.database = database_info.database; @@ -559,27 +569,41 @@ void BackupEntriesCollector::gatherTablesMetadata() res_table_info.create_table_query = create_table_query; res_table_info.metadata_path_in_backup = metadata_path_in_backup; res_table_info.data_path_in_backup = data_path_in_backup; - res_table_info.should_backup_data = shouldBackupTableData(qualified_name, storage); - if (res_table_info.should_backup_data) + if (const auto * mv = typeid_cast(storage.get())) { - auto it = database_info.tables.find(table_name); - if (it != database_info.tables.end()) - { - const auto & partitions = it->second.partitions; - if (partitions && storage && !storage->supportsBackupPartition()) - { - throw Exception( - ErrorCodes::CANNOT_BACKUP_TABLE, - "Table engine {} doesn't support partitions, cannot backup {}", - storage->getName(), - tableNameWithTypeToString(database_name, table_name, false)); - } - res_table_info.partitions = partitions; - } + if (mv->isRefreshable() && !mv->isAppendRefreshStrategy()) + rmv_replace_target_ids.insert(mv->getTargetTableId()); } } } + + /// Second pass: now that we have the full snapshot of tables and RMV targets, + /// decide whether the data of each table should be backed up and validate + /// partition-related constraints. + for (auto & [qualified_name, res_table_info] : table_infos) + { + res_table_info.should_backup_data = shouldBackupTableData(qualified_name, res_table_info.storage, rmv_replace_target_ids); + + if (!res_table_info.should_backup_data) + continue; + + const auto & database_info = database_infos.at(qualified_name.database); + auto it = database_info.tables.find(qualified_name.table); + if (it == database_info.tables.end()) + continue; + + const auto & partitions = it->second.partitions; + if (partitions && res_table_info.storage && !res_table_info.storage->supportsBackupPartition()) + { + throw Exception( + ErrorCodes::CANNOT_BACKUP_TABLE, + "Table engine {} doesn't support partitions, cannot backup {}", + res_table_info.storage->getName(), + tableNameWithTypeToString(qualified_name.database, qualified_name.table, false)); + } + res_table_info.partitions = partitions; + } } std::vector> BackupEntriesCollector::findTablesInDatabase(const String & database_name) const @@ -857,7 +881,10 @@ void BackupEntriesCollector::makeBackupEntriesForTableData(const QualifiedTableN } } -bool BackupEntriesCollector::shouldBackupTableData(const QualifiedTableName & table_name, const StoragePtr & storage) const +bool BackupEntriesCollector::shouldBackupTableData( + const QualifiedTableName & table_name, + const StoragePtr & storage, + const std::unordered_set & rmv_replace_target_ids) const { if (backup_settings.structure_only) return false; @@ -866,7 +893,7 @@ bool BackupEntriesCollector::shouldBackupTableData(const QualifiedTableName & ta return true; if (!backup_settings.backup_data_from_refreshable_materialized_view_targets - && BackupUtils::isTargetForReplaceRefreshableMaterializedView(storage->getStorageID(), context)) + && rmv_replace_target_ids.contains(storage->getStorageID())) { LOG_TRACE(log, "Skipping table data for {} (a target of a refreshable materialized view)", table_name.getFullName()); return false; diff --git a/src/Backups/BackupEntriesCollector.h b/src/Backups/BackupEntriesCollector.h index 5cf0275d1212..a7b54226acda 100644 --- a/src/Backups/BackupEntriesCollector.h +++ b/src/Backups/BackupEntriesCollector.h @@ -5,6 +5,7 @@ #include #include #include +#include #include #include #include @@ -20,7 +21,6 @@ using BackupEntries = std::vector>; class IBackupCoordination; class IDatabase; using DatabasePtr = std::shared_ptr; -struct StorageID; struct IAccessEntity; using AccessEntityPtr = std::shared_ptr; class QueryStatus; @@ -97,7 +97,10 @@ class BackupEntriesCollector : private boost::noncopyable void makeBackupEntriesForTablesDefs(); void makeBackupEntriesForTablesData(); void makeBackupEntriesForTableData(const QualifiedTableName & table_name); - bool shouldBackupTableData(const QualifiedTableName & table_name, const StoragePtr & storage) const; + bool shouldBackupTableData( + const QualifiedTableName & table_name, + const StoragePtr & storage, + const std::unordered_set & rmv_replace_target_ids) const; void addBackupEntryUnlocked(const String & file_name, BackupEntryPtr backup_entry); diff --git a/src/Backups/BackupUtils.cpp b/src/Backups/BackupUtils.cpp index 14953a2930a7..87b4031c3414 100644 --- a/src/Backups/BackupUtils.cpp +++ b/src/Backups/BackupUtils.cpp @@ -4,7 +4,6 @@ #include #include #include -#include #include @@ -131,20 +130,4 @@ bool isInnerTable(const String & /* database_name */, const String & table_name) return table_name.starts_with(".inner.") || table_name.starts_with(".inner_id.") || table_name.starts_with(".tmp.inner.") || table_name.starts_with(".tmp.inner_id."); } -bool isTargetForReplaceRefreshableMaterializedView(const StorageID & storage_id, const ContextPtr & context) -{ - auto dependents = DatabaseCatalog::instance().getReferentialDependents(storage_id); - - auto is_rmv_targeting_table = [&](const StorageID & mv_candidate, const StorageID & target_id) -> bool - { - auto table = DatabaseCatalog::instance().tryGetTable(mv_candidate, context); - if (!table || table->getName() != "MaterializedView") - return false; - - const auto * mv = typeid_cast(table.get()); - return mv && mv->isRefreshable() && !mv->isAppendRefreshStrategy() && mv->getTargetTableId() == target_id; - }; - return std::any_of( - dependents.begin(), dependents.end(), [&](const auto & dependent) { return is_rmv_targeting_table(dependent, storage_id); }); -} } diff --git a/src/Backups/BackupUtils.h b/src/Backups/BackupUtils.h index 4b3fb6412a7a..ba889eccc48e 100644 --- a/src/Backups/BackupUtils.h +++ b/src/Backups/BackupUtils.h @@ -9,7 +9,6 @@ namespace DB class IBackup; class AccessRightsElements; class DDLRenamingMap; -struct StorageID; struct QualifiedTableName; namespace BackupUtils @@ -29,10 +28,6 @@ bool compareRestoredDatabaseDef(const IAST & restored_database_create_query, con bool isInnerTable(const QualifiedTableName & table_name); bool isInnerTable(const String & database_name, const String & table_name); -/// Returns true if the table is a target of a refreshable materialized view -/// using the REPLACE refresh strategy. -/// Targets with the APPEND strategy are excluded. -bool isTargetForReplaceRefreshableMaterializedView(const StorageID & storage_id, const ContextPtr & context); } } From 25a39b7ce91be78c8e62ef3ace84a825fef48c89 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Fri, 8 May 2026 16:46:47 +0000 Subject: [PATCH 11/41] Backport #101504 to 26.3: Fix join reorder pushing INNER JOIN conditions into outer join ON clause --- .../QueryPlan/Optimizations/optimizeJoin.cpp | 14 +++++- ...order_outer_join_filter_pushdown.reference | 4 ++ ...oin_reorder_outer_join_filter_pushdown.sql | 43 +++++++++++++++++++ 3 files changed, 60 insertions(+), 1 deletion(-) create mode 100644 tests/queries/0_stateless/04075_join_reorder_outer_join_filter_pushdown.reference create mode 100644 tests/queries/0_stateless/04075_join_reorder_outer_join_filter_pushdown.sql diff --git a/src/Processors/QueryPlan/Optimizations/optimizeJoin.cpp b/src/Processors/QueryPlan/Optimizations/optimizeJoin.cpp index d3425b283f2a..eec6001aec3d 100644 --- a/src/Processors/QueryPlan/Optimizations/optimizeJoin.cpp +++ b/src/Processors/QueryPlan/Optimizations/optimizeJoin.cpp @@ -735,15 +735,27 @@ void buildQueryGraph(QueryGraphBuilder & query_graph, QueryPlan::Node & node, Qu else { auto sources = edge.getSourceRelations(); + bool should_pin = false; + for (auto rel_id : sources) { auto it = query_graph.join_kinds.find(rel_id); if (it != query_graph.join_kinds.end()) { - query_graph.pinned[edge] = total_inputs - 1; + should_pin = true; break; } } + + /// If a condition references only the preserved side of an outer join, + /// it must not be placed in that outer join's ON clause, because + /// ON-clause conditions on the preserved side only affect matching, + /// not filtering — rows from the preserved side are kept regardless. + should_pin = should_pin || std::ranges::any_of(query_graph.join_kinds | std::views::values, + [&sources](const auto & partner_info) { return isSubsetOf(sources, partner_info.first); }); + + if (should_pin) + query_graph.pinned[edge] = total_inputs - 1; } } diff --git a/tests/queries/0_stateless/04075_join_reorder_outer_join_filter_pushdown.reference b/tests/queries/0_stateless/04075_join_reorder_outer_join_filter_pushdown.reference new file mode 100644 index 000000000000..98fb6a686563 --- /dev/null +++ b/tests/queries/0_stateless/04075_join_reorder_outer_join_filter_pushdown.reference @@ -0,0 +1,4 @@ +1 +1 +1 +1 diff --git a/tests/queries/0_stateless/04075_join_reorder_outer_join_filter_pushdown.sql b/tests/queries/0_stateless/04075_join_reorder_outer_join_filter_pushdown.sql new file mode 100644 index 000000000000..514956337ed4 --- /dev/null +++ b/tests/queries/0_stateless/04075_join_reorder_outer_join_filter_pushdown.sql @@ -0,0 +1,43 @@ +-- Regression test: join reorder must not push INNER JOIN filter conditions +-- into an outer join's ON clause, where they only affect matching +-- but do not filter preserved-side rows. + +DROP TABLE IF EXISTS t1; +DROP TABLE IF EXISTS t2; +DROP TABLE IF EXISTS t3; + +CREATE TABLE t1 (id UInt64, value String) ENGINE = MergeTree ORDER BY tuple(); +CREATE TABLE t2 (id UInt64, value String) ENGINE = MergeTree ORDER BY tuple(); +CREATE TABLE t3 (id UInt64, value String) ENGINE = MergeTree ORDER BY tuple(); + +INSERT INTO t1 VALUES (0, 'a'), (1, 'b'), (2, 'c'); +INSERT INTO t2 VALUES (0, 'x'), (1, 'y'), (3, 'z'); +INSERT INTO t3 VALUES (0, 'p'), (1, 'q'), (4, 'r'); + +-- The condition `t2.value = 'x'` is part of the INNER JOIN. +-- It must NOT be pushed into the RIGHT JOIN's ON clause by join reorder, +-- because that changes the semantics (preserved-side conditions in an +-- outer join ON clause only affect matching, not filtering). + +SELECT count() +FROM t1 RIGHT JOIN t2 ON t1.id = t2.id AND t1.value = 'a' +INNER JOIN t3 ON t2.id = t3.id AND t2.value = 'x'; + +SELECT count() +FROM t1 RIGHT JOIN t2 ON t1.id = t2.id AND t1.value = 'a' +INNER JOIN t3 ON t2.id = t3.id AND t2.value = 'x' +SETTINGS query_plan_split_filter = 0; + +-- Same for LEFT JOIN (condition on preserved left side from parent INNER JOIN) +SELECT count() +FROM t1 LEFT JOIN t2 ON t1.id = t2.id AND t2.value = 'x' +INNER JOIN t3 ON t1.id = t3.id AND t1.value = 'a'; + +SELECT count() +FROM t1 LEFT JOIN t2 ON t1.id = t2.id AND t2.value = 'x' +INNER JOIN t3 ON t1.id = t3.id AND t1.value = 'a' +SETTINGS query_plan_split_filter = 0; + +DROP TABLE t1; +DROP TABLE t2; +DROP TABLE t3; From a19fe3c0d35a627d9cb62975fca9ad52bcd3d4c9 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Sat, 9 May 2026 17:32:26 +0000 Subject: [PATCH 12/41] Backport #100375 to 26.3: Fix Not-ready Set exception when IN subquery is moved to PREWHERE --- src/Interpreters/misc.h | 7 ++- .../QueryPlan/ReadFromMergeTree.cpp | 17 ++++++ .../MergeTree/MergeTreeWhereOptimizer.cpp | 7 +++ src/Storages/VirtualColumnUtils.cpp | 53 +++++++++++++++++++ src/Storages/VirtualColumnUtils.h | 5 ++ ...llel_replicas_joins_and_analyzer.reference | 16 ------ ...zer_distributed_filter_push_down.reference | 24 ++++----- ...03457_move_global_in_to_prewhere.reference | 4 -- ...0_analyzer_distributed_global_in.reference | 4 +- ..._subquery_prewhere_not_ready_set.reference | 0 ...053_in_subquery_prewhere_not_ready_set.sql | 32 +++++++++++ ...4070_global_in_subquery_prewhere.reference | 0 .../04070_global_in_subquery_prewhere.sql | 32 +++++++++++ 13 files changed, 166 insertions(+), 35 deletions(-) create mode 100644 tests/queries/0_stateless/04053_in_subquery_prewhere_not_ready_set.reference create mode 100644 tests/queries/0_stateless/04053_in_subquery_prewhere_not_ready_set.sql create mode 100644 tests/queries/0_stateless/04070_global_in_subquery_prewhere.reference create mode 100644 tests/queries/0_stateless/04070_global_in_subquery_prewhere.sql diff --git a/src/Interpreters/misc.h b/src/Interpreters/misc.h index b77fc5aee1ec..5526b85be82c 100644 --- a/src/Interpreters/misc.h +++ b/src/Interpreters/misc.h @@ -16,9 +16,14 @@ inline bool functionIsInOperator(const std::string & name) return name == "in" || name == "notIn" || name == "nullIn" || name == "notNullIn"; } +inline bool functionIsGlobalInOperator(const std::string & name) +{ + return name == "globalIn" || name == "globalNotIn" || name == "globalNullIn" || name == "globalNotNullIn"; +} + inline bool functionIsInOrGlobalInOperator(const std::string & name) { - return functionIsInOperator(name) || name == "globalIn" || name == "globalNotIn" || name == "globalNullIn" || name == "globalNotNullIn"; + return functionIsInOperator(name) || functionIsGlobalInOperator(name); } inline bool functionIsLikeOperator(const std::string & name) diff --git a/src/Processors/QueryPlan/ReadFromMergeTree.cpp b/src/Processors/QueryPlan/ReadFromMergeTree.cpp index c4131f6f2d95..4eb8094fa17c 100644 --- a/src/Processors/QueryPlan/ReadFromMergeTree.cpp +++ b/src/Processors/QueryPlan/ReadFromMergeTree.cpp @@ -2822,6 +2822,23 @@ void ReadFromMergeTree::updatePrewhereInfo(const PrewhereInfoPtr & prewhere_info { query_info.prewhere_info = prewhere_info_value; + /// Build sets for the new PREWHERE synchronously. PREWHERE is evaluated at the + /// storage level during data reading, before the pipeline-level CreatingSetsStep + /// has a chance to execute. If a condition with IN (subquery) was moved to PREWHERE + /// by optimizePrewhere after applyFilters already ran, the set would remain unbuilt + /// and cause a "Not-ready Set" error. + /// We must skip sets used in GLOBAL IN functions because ReadFromRemote needs to + /// attach external tables to those sets before they are built. Building them here + /// would cause "Trying to attach external table to a ready set" errors. + /// Only build sets when applyFilters has already been called for this step (indicated by + /// `indexes` being populated). The plan built by `considerEnablingParallelReplicas` for + /// statistics collection runs `optimizePrewhere` without `optimizePrimaryKeyConditionAndLimit`, + /// so `applyFilters` is skipped there and sets must not be built — the original plan's + /// `CreatingSetsStep` (added later via `addStepsToBuildSets`) handles them. Building here + /// would re-execute the IN-subquery and double-count its rows against `max_rows_to_read`. + if (query_info.prewhere_info && indexes.has_value()) + VirtualColumnUtils::buildSetsForDAGExcludingGlobalIn(query_info.prewhere_info->prewhere_actions, context); + output_header = std::make_shared(MergeTreeSelectProcessor::transformHeader( storage_snapshot->getSampleBlockForColumns(all_column_names), query_info.row_level_filter, diff --git a/src/Storages/MergeTree/MergeTreeWhereOptimizer.cpp b/src/Storages/MergeTree/MergeTreeWhereOptimizer.cpp index 460f6ebf22ee..5bd0d6094de6 100644 --- a/src/Storages/MergeTree/MergeTreeWhereOptimizer.cpp +++ b/src/Storages/MergeTree/MergeTreeWhereOptimizer.cpp @@ -602,6 +602,13 @@ bool MergeTreeWhereOptimizer::cannotBeMoved(const RPNBuilderTreeNode & node, con if (function_name == "arrayJoin") return true; + /// Disallow GLOBAL IN conditions from being moved to PREWHERE. + /// GLOBAL IN sets are populated via external tables attached by `ReadFromRemote`; + /// they cannot be built synchronously during PREWHERE evaluation, which runs + /// before the pipeline-level `CreatingSetsStep` has a chance to execute. + if (functionIsGlobalInOperator(function_name)) + return true; + size_t arguments_size = function_node.getArgumentsSize(); for (size_t i = 0; i < arguments_size; ++i) { diff --git a/src/Storages/VirtualColumnUtils.cpp b/src/Storages/VirtualColumnUtils.cpp index cce131a5b959..c07e2e4b712b 100644 --- a/src/Storages/VirtualColumnUtils.cpp +++ b/src/Storages/VirtualColumnUtils.cpp @@ -1,5 +1,6 @@ #include #include +#include #include @@ -7,6 +8,7 @@ #include #include +#include #include #include #include @@ -94,6 +96,57 @@ void buildSetsForDAG(const ActionsDAG & dag, const ContextPtr & context) buildSetsForDagImpl(dag, context, /* ordered = */ false); } +void buildSetsForDAGExcludingGlobalIn(const ActionsDAG & dag, const ContextPtr & context) +{ + /// Collect ColumnSet nodes that are arguments to globalIn/globalNotIn functions. + /// These sets must NOT be built synchronously here because ReadFromRemote needs to + /// attach external tables to them first (via setExternalTable). Building them early + /// would make the set "created" without explicit elements, causing a LOGICAL_ERROR. + std::unordered_set global_in_set_nodes; + for (const auto & node : dag.getNodes()) + { + if (node.type == ActionsDAG::ActionType::FUNCTION && node.function_base) + { + auto name = node.function_base->getName(); + if (functionIsGlobalInOperator(name)) + { + /// The set is the second argument (index 1) + if (node.children.size() >= 2) + global_in_set_nodes.insert(node.children[1]); + } + } + } + + for (const auto & node : dag.getNodes()) + { + if (node.type == ActionsDAG::ActionType::COLUMN && !global_in_set_nodes.contains(&node)) + { + const ColumnSet * column_set = checkAndGetColumnConstData(node.column.get()); + if (!column_set) + column_set = checkAndGetColumn(node.column.get()); + + if (column_set) + { + auto future_set = column_set->getData(); + if (!future_set->get()) + { + if (auto * set_from_subquery = typeid_cast(future_set.get())) + { + /// Prefer ordered build so that the set retains explicit elements, + /// which `KeyCondition` and skip-index analysis require to use the set + /// for primary-key / skip-index filtering (via `buildOrderedSetInplace`). + /// If `use_index_for_in_with_subqueries` is disabled, the ordered build + /// returns `nullptr` without building; fall back to unordered so the set + /// is still ready when PREWHERE is evaluated at read time. + if (!set_from_subquery->buildOrderedSetInplace(context)) + set_from_subquery->buildSetInplace(context); + } + } + } + } + } +} + void buildOrderedSetsForDAG(const ActionsDAG & dag, const ContextPtr & context) { buildSetsForDagImpl(dag, context, /* ordered = */ true); diff --git a/src/Storages/VirtualColumnUtils.h b/src/Storages/VirtualColumnUtils.h index bcd3b9974b1d..631ab9250d26 100644 --- a/src/Storages/VirtualColumnUtils.h +++ b/src/Storages/VirtualColumnUtils.h @@ -45,6 +45,11 @@ void filterBlockWithExpression(const ExpressionActionsPtr & actions, Block & blo /// Builds sets used by ActionsDAG inplace. void buildSetsForDAG(const ActionsDAG & dag, const ContextPtr & context); +/// Builds sets used by ActionsDAG inplace, but skips sets that are arguments to +/// GLOBAL IN functions (globalIn, globalNotIn, globalNullIn, globalNotNullIn). +/// Those sets need external tables set up by ReadFromRemote before they can be built. +void buildSetsForDAGExcludingGlobalIn(const ActionsDAG & dag, const ContextPtr & context); + /// Builds ordered sets used by ActionsDAG inplace. void buildOrderedSetsForDAG(const ActionsDAG & dag, const ContextPtr & context); diff --git a/tests/queries/0_stateless/02967_parallel_replicas_joins_and_analyzer.reference b/tests/queries/0_stateless/02967_parallel_replicas_joins_and_analyzer.reference index 6be4160043ac..72f729ad0a4d 100644 --- a/tests/queries/0_stateless/02967_parallel_replicas_joins_and_analyzer.reference +++ b/tests/queries/0_stateless/02967_parallel_replicas_joins_and_analyzer.reference @@ -407,10 +407,6 @@ Expression Expression Expression ReadFromMergeTree - CreatingSet - Expression - Filter - ReadFromSystemNumbers Expression Expression ReadFromMemoryStorage @@ -466,10 +462,6 @@ Expression Expression Expression ReadFromMergeTree - CreatingSet - Expression - Filter - ReadFromSystemNumbers Expression Union Expression @@ -894,10 +886,6 @@ Expression Expression Expression ReadFromMergeTree - CreatingSet - Expression - Filter - ReadFromSystemNumbers Expression Expression Expression @@ -955,10 +943,6 @@ Expression Expression Expression ReadFromMergeTree - CreatingSet - Expression - Filter - ReadFromSystemNumbers Expression Union Expression diff --git a/tests/queries/0_stateless/03302_analyzer_distributed_filter_push_down.reference b/tests/queries/0_stateless/03302_analyzer_distributed_filter_push_down.reference index 8f349489e516..b805deabd02d 100644 --- a/tests/queries/0_stateless/03302_analyzer_distributed_filter_push_down.reference +++ b/tests/queries/0_stateless/03302_analyzer_distributed_filter_push_down.reference @@ -312,7 +312,7 @@ CreatingSets (Create sets before main query execution) ReadFromRemote (Read from remote replica) CreatingSets (Create sets before main query execution) Expression ((Project names + Projection)) - Expression ((WHERE + Change column names to column identifiers)) + Filter ((WHERE + Change column names to column identifiers)) ReadFromMergeTree (default.tab0) Indexes: PrimaryKey @@ -331,7 +331,7 @@ CreatingSets (Create sets before main query execution) Expression ((Project names + Projection)) Aggregating Expression (Before GROUP BY) - Expression ((WHERE + (Change column names to column identifiers + (Project names + (Projection + Change column names to column identifiers))))) + Filter ((WHERE + (Change column names to column identifiers + (Project names + (Projection + Change column names to column identifiers))))) ReadFromMergeTree (default.tab0) Indexes: PrimaryKey @@ -351,7 +351,7 @@ CreatingSets (Create sets before main query execution) ReadFromRemote (Read from remote replica) CreatingSets (Create sets before main query execution) Expression ((Project names + Projection)) - Expression ((WHERE + Change column names to column identifiers)) + Filter ((WHERE + Change column names to column identifiers)) ReadFromMergeTree (default.tab0) Indexes: PrimaryKey @@ -364,7 +364,7 @@ CreatingSets (Create sets before main query execution) Ranges: 0 CreatingSets (Create sets before main query execution) Expression ((Project names + Projection)) - Expression ((WHERE + Change column names to column identifiers)) + Filter ((WHERE + Change column names to column identifiers)) ReadFromMergeTree (default.tab0) Indexes: PrimaryKey @@ -384,7 +384,7 @@ CreatingSets (Create sets before main query execution) Aggregating Union Expression (Before GROUP BY) - Expression ((WHERE + (Change column names to column identifiers + (Project names + (Projection + Change column names to column identifiers))))) + Filter ((WHERE + (Change column names to column identifiers + (Project names + (Projection + Change column names to column identifiers))))) ReadFromMergeTree (default.tab0) Indexes: PrimaryKey @@ -400,7 +400,7 @@ CreatingSets (Create sets before main query execution) ReadFromRemote (Read from remote replica) CreatingSets (Create sets before main query execution) Expression ((Project names + Projection)) - Expression ((WHERE + Change column names to column identifiers)) + Filter ((WHERE + Change column names to column identifiers)) ReadFromMergeTree (default.tab0) Indexes: PrimaryKey @@ -417,7 +417,7 @@ CreatingSets (Create sets before main query execution) Aggregating Union Expression (Before GROUP BY) - Expression ((WHERE + (Change column names to column identifiers + (Project names + (Projection + Change column names to column identifiers))))) + Filter ((WHERE + (Change column names to column identifiers + (Project names + (Projection + Change column names to column identifiers))))) ReadFromMergeTree (default.tab0) Indexes: PrimaryKey @@ -433,7 +433,7 @@ CreatingSets (Create sets before main query execution) ReadFromRemote (Read from remote replica) CreatingSets (Create sets before main query execution) Expression ((Project names + Projection)) - Expression ((WHERE + Change column names to column identifiers)) + Filter ((WHERE + Change column names to column identifiers)) ReadFromMergeTree (default.tab0) Indexes: PrimaryKey @@ -446,7 +446,7 @@ CreatingSets (Create sets before main query execution) Ranges: 1 CreatingSets (Create sets before main query execution) Expression ((Project names + Projection)) - Expression ((WHERE + Change column names to column identifiers)) + Filter ((WHERE + Change column names to column identifiers)) ReadFromMergeTree (default.tab0) Indexes: PrimaryKey @@ -463,7 +463,7 @@ CreatingSets (Create sets before main query execution) Aggregating Union Expression (Before GROUP BY) - Expression ((WHERE + (Change column names to column identifiers + (Project names + (Projection + Change column names to column identifiers))))) + Filter ((WHERE + (Change column names to column identifiers + (Project names + (Projection + Change column names to column identifiers))))) ReadFromMergeTree (default.tab0) Indexes: PrimaryKey @@ -476,7 +476,7 @@ CreatingSets (Create sets before main query execution) ReadFromRemote (Read from remote replica) CreatingSets (Create sets before main query execution) Expression ((Project names + Projection)) - Expression ((WHERE + Change column names to column identifiers)) + Filter ((WHERE + Change column names to column identifiers)) ReadFromMergeTree (default.tab0) Indexes: PrimaryKey @@ -489,7 +489,7 @@ CreatingSets (Create sets before main query execution) ReadFromMemoryStorage CreatingSets (Create sets before main query execution) Expression ((Project names + Projection)) - Expression ((WHERE + Change column names to column identifiers)) + Filter ((WHERE + Change column names to column identifiers)) ReadFromMergeTree (default.tab0) Indexes: PrimaryKey diff --git a/tests/queries/0_stateless/03457_move_global_in_to_prewhere.reference b/tests/queries/0_stateless/03457_move_global_in_to_prewhere.reference index 6b3f681fc90b..f7486d4b5aca 100644 --- a/tests/queries/0_stateless/03457_move_global_in_to_prewhere.reference +++ b/tests/queries/0_stateless/03457_move_global_in_to_prewhere.reference @@ -1,17 +1,13 @@ 3 2048 23 2048 -Prewhere filter column: globalIn(key, ) (removed) 3 2048 -Prewhere filter column: globalIn(key, ) (removed) 0 2048 1 2048 2 2048 4 2048 5 2048 -Prewhere filter column: globalNotIn(key, ) (removed) 0 2048 1 2048 2 2048 4 2048 5 2048 -Prewhere filter column: globalNotIn(key, ) (removed) diff --git a/tests/queries/0_stateless/03620_analyzer_distributed_global_in.reference b/tests/queries/0_stateless/03620_analyzer_distributed_global_in.reference index 070fc644d1c5..689e784cc037 100644 --- a/tests/queries/0_stateless/03620_analyzer_distributed_global_in.reference +++ b/tests/queries/0_stateless/03620_analyzer_distributed_global_in.reference @@ -22,7 +22,7 @@ CreatingSets (Create sets before main query execution) Expression ((Project names + Projection)) Aggregating Expression (Before GROUP BY) - Expression ((WHERE + (Change column names to column identifiers + (Project names + (Projection + Change column names to column identifiers))))) + Filter ((WHERE + (Change column names to column identifiers + (Project names + (Projection + Change column names to column identifiers))))) ReadFromMergeTree (default.tab0) Indexes: PrimaryKey @@ -44,7 +44,7 @@ CreatingSets (Create sets before main query execution) ReadFromRemote (Read from remote replica) CreatingSets (Create sets before main query execution) Expression ((Project names + Projection)) - Expression ((WHERE + Change column names to column identifiers)) + Filter ((WHERE + Change column names to column identifiers)) ReadFromMergeTree (default.tab0) Indexes: PrimaryKey diff --git a/tests/queries/0_stateless/04053_in_subquery_prewhere_not_ready_set.reference b/tests/queries/0_stateless/04053_in_subquery_prewhere_not_ready_set.reference new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/tests/queries/0_stateless/04053_in_subquery_prewhere_not_ready_set.sql b/tests/queries/0_stateless/04053_in_subquery_prewhere_not_ready_set.sql new file mode 100644 index 000000000000..78898228fc4b --- /dev/null +++ b/tests/queries/0_stateless/04053_in_subquery_prewhere_not_ready_set.sql @@ -0,0 +1,32 @@ +-- Regression test for "Not-ready Set" error when IN (subquery) condition +-- gets moved to PREWHERE by optimizePrewhere after applyFilters already ran. +-- https://github.com/ClickHouse/ClickHouse/issues/100318 + +-- Pin the optimizer settings that trigger the rewrite this test exercises; +-- otherwise randomized runs may disable PREWHERE move and skip the fixed path. +SET query_plan_optimize_prewhere = 1; +SET optimize_move_to_prewhere = 1; + +CREATE TABLE t_100318_log (v0 UInt32) ENGINE = Log; +CREATE TABLE t_100318_mt (v0 UInt32, v1 UInt32, v2 DateTime, PRIMARY KEY(v1)) ENGINE = SummingMergeTree; +CREATE TABLE t_100318_rmt (v0 UInt32, v1 UInt32, PRIMARY KEY(v0)) ENGINE = ReplacingMergeTree; + +INSERT INTO t_100318_mt VALUES (13, 23000, '2100-01-05'); +INSERT INTO t_100318_mt VALUES (16, 26000, '2066-10-07'); +INSERT INTO t_100318_rmt VALUES (91, 101000); + +SELECT 1 FROM (SELECT 1 FROM t_100318_log) +WHERE EXISTS ( + SELECT 1 + UNION ALL + SELECT ref_4.v0 FROM ( + SELECT row_number() OVER (PARTITION BY t_100318_mt.v0) AS c_1 + FROM t_100318_mt + WHERE t_100318_mt.v2 IN (SELECT 1 FROM t_100318_log) + ) AS ref_3 + INNER JOIN t_100318_rmt AS ref_4 ON (ref_3.c_1 = ref_4.v0) +); + +DROP TABLE t_100318_log; +DROP TABLE t_100318_mt; +DROP TABLE t_100318_rmt; diff --git a/tests/queries/0_stateless/04070_global_in_subquery_prewhere.reference b/tests/queries/0_stateless/04070_global_in_subquery_prewhere.reference new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/tests/queries/0_stateless/04070_global_in_subquery_prewhere.sql b/tests/queries/0_stateless/04070_global_in_subquery_prewhere.sql new file mode 100644 index 000000000000..2833b5580c1d --- /dev/null +++ b/tests/queries/0_stateless/04070_global_in_subquery_prewhere.sql @@ -0,0 +1,32 @@ +-- Regression test: GLOBAL IN (subquery) must not be moved to PREWHERE, +-- because GLOBAL IN sets are populated via external tables attached by ReadFromRemote +-- and cannot be built synchronously during PREWHERE evaluation. +-- Also covers null-aware variants (globalNullIn/globalNotNullIn) via transform_null_in. +-- https://github.com/ClickHouse/ClickHouse/pull/100375 + +-- Pin the optimizer settings that drive PREWHERE assignment so that randomized +-- runs which disable them do not bypass the `cannotBeMoved` guard under test. +SET query_plan_optimize_prewhere = 1; +SET optimize_move_to_prewhere = 1; +SET transform_null_in = 1; + +CREATE TABLE t_100375_mt (v0 UInt32, v1 UInt32, v2 Nullable(DateTime), PRIMARY KEY(v1)) ENGINE = SummingMergeTree; +CREATE TABLE t_100375_log (v0 UInt32) ENGINE = Log; + +INSERT INTO t_100375_mt VALUES (13, 23000, '2100-01-05'); +INSERT INTO t_100375_mt VALUES (16, 26000, '2066-10-07'); + +SELECT 1 FROM (SELECT 1 FROM t_100375_log) +WHERE EXISTS ( + SELECT 1 + UNION ALL + SELECT ref_4.v0 FROM ( + SELECT row_number() OVER (PARTITION BY t_100375_mt.v0) AS c_1 + FROM t_100375_mt + WHERE t_100375_mt.v2 GLOBAL IN (SELECT 1 FROM t_100375_log) + ) AS ref_3 + INNER JOIN t_100375_mt AS ref_4 ON (ref_3.c_1 = ref_4.v0) +); + +DROP TABLE t_100375_mt; +DROP TABLE t_100375_log; From 1b5dd2992722e93b89a0a39539037539e0996a2d Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Sat, 9 May 2026 18:36:22 +0000 Subject: [PATCH 13/41] Backport #104317 to 26.3: Fix projection matching regression from `removeTrivialWrappers` in `appendExpression` --- .../Optimizations/projectionsCommon.cpp | 4 +- ..._projection_prewhere_materialize.reference | 2 + .../04202_projection_prewhere_materialize.sql | 103 ++++++++++++++++++ 3 files changed, 108 insertions(+), 1 deletion(-) create mode 100644 tests/queries/0_stateless/04202_projection_prewhere_materialize.reference create mode 100644 tests/queries/0_stateless/04202_projection_prewhere_materialize.sql diff --git a/src/Processors/QueryPlan/Optimizations/projectionsCommon.cpp b/src/Processors/QueryPlan/Optimizations/projectionsCommon.cpp index 4555226b606a..0c0d77713726 100644 --- a/src/Processors/QueryPlan/Optimizations/projectionsCommon.cpp +++ b/src/Processors/QueryPlan/Optimizations/projectionsCommon.cpp @@ -105,7 +105,6 @@ PartitionIdToMaxBlockPtr getMaxAddedBlocks(ReadFromMergeTree * reading) void QueryDAG::appendExpression(const ActionsDAG & expression) { auto cloned = expression.clone(); - cloned.removeTrivialWrappers(); if (dag) dag->mergeInplace(std::move(cloned)); @@ -250,6 +249,9 @@ bool QueryDAG::build(QueryPlan::Node & node) outputs.insert(outputs.begin(), filter_node); } + if (dag) + dag->removeTrivialWrappers(); + return true; } diff --git a/tests/queries/0_stateless/04202_projection_prewhere_materialize.reference b/tests/queries/0_stateless/04202_projection_prewhere_materialize.reference new file mode 100644 index 000000000000..e27f187e08bd --- /dev/null +++ b/tests/queries/0_stateless/04202_projection_prewhere_materialize.reference @@ -0,0 +1,2 @@ +12.5 +a4f08805-17ee-44bd-bd12-53a909221f5e diff --git a/tests/queries/0_stateless/04202_projection_prewhere_materialize.sql b/tests/queries/0_stateless/04202_projection_prewhere_materialize.sql new file mode 100644 index 000000000000..4b58d9d6f6a3 --- /dev/null +++ b/tests/queries/0_stateless/04202_projection_prewhere_materialize.sql @@ -0,0 +1,103 @@ +-- Test for issues #104117, #104235, #104256 +-- Regression from PR #88798: removeTrivialWrappers in appendExpression +-- breaks incremental DAG merging when materialize() wrappers are stripped +-- before the full DAG is assembled. + +SET optimize_use_projections = 1; +SET force_optimize_projection = 1; + +-- Issue #104117: NOT_FOUND_COLUMN_IN_BLOCK with UNION ALL view + projection + WHERE on aggregated column +DROP TABLE IF EXISTS t1_04202; +DROP TABLE IF EXISTS t2_04202; +DROP VIEW IF EXISTS v_04202; + +CREATE TABLE t1_04202 ( + id Int64, + ts DateTime64(6), + grp String, + val Int32, + PROJECTION proj_filter (SELECT grp, ts, val ORDER BY grp, ts) +) ENGINE = MergeTree ORDER BY ts; + +CREATE TABLE t2_04202 ( + id Int64, + ts DateTime64(6), + grp String, + val Int32, + PROJECTION proj_filter (SELECT grp, ts, val ORDER BY grp, ts) +) ENGINE = MergeTree ORDER BY ts; + +INSERT INTO t1_04202 VALUES (1, '2026-04-01 00:00:00', 'a', 5), (2, '2026-04-02 00:00:00', 'b', 10); +INSERT INTO t2_04202 VALUES (3, '2026-02-01 00:00:00', 'a', 15), (4, '2026-02-02 00:00:00', 'b', 20); + +CREATE VIEW v_04202 AS +SELECT toInt64(0) AS id, ts, grp, val FROM t1_04202 WHERE ts >= '2026-03-05 21:00:00' +UNION ALL +SELECT id, ts, grp, val FROM t2_04202 WHERE ts < '2026-03-05 21:00:00'; + +SELECT avg(val) FROM v_04202 WHERE val > 0; + +DROP VIEW v_04202; +DROP TABLE t1_04202; +DROP TABLE t2_04202; + +-- Issue #104235: Block structure mismatch with window functions + projection + WHERE +DROP TABLE IF EXISTS t_window_04202; + +CREATE TABLE t_window_04202 ( + id UUID, + session_id UUID, + role String, + ts DateTime64(6), + PROJECTION proj_session_id (SELECT * ORDER BY session_id) +) ENGINE = MergeTree ORDER BY id; + +INSERT INTO t_window_04202 VALUES + (generateUUIDv4(), generateUUIDv4(), 'assistant', '2026-01-01 00:00:00'), + (generateUUIDv4(), generateUUIDv4(), 'user', '2026-01-01 00:00:01'); + +SELECT + session_id, + ts, + row_number() OVER (PARTITION BY session_id ORDER BY ts) AS rn +FROM t_window_04202 +WHERE role = 'assistant' +SETTINGS query_plan_remove_unused_columns = 0 +FORMAT Null; + +DROP TABLE t_window_04202; + +-- Issue #104256: AMBIGUOUS_COLUMN_NAME when alias collides with source column + projection +-- The regression is the exception itself (AMBIGUOUS_COLUMN_NAME / block structure mismatch), +-- not the empty result — the empty result is pre-existing analyzer behavior where alias +-- shadows the column name in WHERE clause. Use prefer_column_name_to_alias = 1 to fix that. +DROP TABLE IF EXISTS t_alias_04202; + +CREATE TABLE t_alias_04202 ( + id UInt64, + session_id UUID, + role String, + PROJECTION proj_session_id (SELECT * ORDER BY session_id) +) ENGINE = MergeTree ORDER BY id; + +INSERT INTO t_alias_04202 VALUES + (1, 'a4f08805-17ee-44bd-bd12-53a909221f5e', 'assistant'), + (2, generateUUIDv4(), 'user'); + +-- Without prefer_column_name_to_alias, alias shadows column in WHERE → empty result (pre-existing). +-- Previously threw AMBIGUOUS_COLUMN_NAME with projection + query_plan_remove_unused_columns = 0. +SELECT toString(session_id) AS session_id +FROM t_alias_04202 +WHERE session_id IN _CAST(['a4f08805-17ee-44bd-bd12-53a909221f5e'], 'Array(UUID)') +ORDER BY role +SETTINGS query_plan_remove_unused_columns = 0 +FORMAT Null; + +-- With prefer_column_name_to_alias = 1, WHERE references the source column correctly. +SELECT toString(session_id) AS session_id +FROM t_alias_04202 +WHERE session_id IN _CAST(['a4f08805-17ee-44bd-bd12-53a909221f5e'], 'Array(UUID)') +ORDER BY role +SETTINGS query_plan_remove_unused_columns = 0, prefer_column_name_to_alias = 1; + +DROP TABLE t_alias_04202; From b4dfe08b6581ee6f4f1b90b4414d6ed7cabf3ad2 Mon Sep 17 00:00:00 2001 From: avogar Date: Mon, 11 May 2026 11:32:57 +0000 Subject: [PATCH 14/41] Backport fix for IPv6StringToNumOrDefault to 26.3 Fix `IPv6StringToNumOrDefault` not properly zeroing output bytes on parse failure. Only one byte was zeroed (`vec_res[i] = 0`) instead of all 16 bytes (`std::fill_n(&vec_res[out_offset], offset_inc, 0)`), causing stale data from previous successful parses to leak into default results. Backport of https://github.com/ClickHouse/ClickHouse/pull/93543 --- src/Functions/FunctionsCodingIP.h | 2 +- .../04206_ipv6_string_to_num_or_default_bug.reference | 2 ++ .../0_stateless/04206_ipv6_string_to_num_or_default_bug.sql | 6 ++++++ 3 files changed, 9 insertions(+), 1 deletion(-) create mode 100644 tests/queries/0_stateless/04206_ipv6_string_to_num_or_default_bug.reference create mode 100644 tests/queries/0_stateless/04206_ipv6_string_to_num_or_default_bug.sql diff --git a/src/Functions/FunctionsCodingIP.h b/src/Functions/FunctionsCodingIP.h index 00b1deb1a3d5..9ec94dc2d628 100644 --- a/src/Functions/FunctionsCodingIP.h +++ b/src/Functions/FunctionsCodingIP.h @@ -180,7 +180,7 @@ namespace detail if constexpr (exception_mode == IPStringToNumExceptionMode::Throw) throw Exception(ErrorCodes::CANNOT_PARSE_IPV6, "Invalid IPv6 value"); else if constexpr (exception_mode == IPStringToNumExceptionMode::Default) - vec_res[i] = 0; + std::fill_n(&vec_res[out_offset], offset_inc, 0); else if constexpr (exception_mode == IPStringToNumExceptionMode::Null) (*vec_null_map_to)[i] = true; } diff --git a/tests/queries/0_stateless/04206_ipv6_string_to_num_or_default_bug.reference b/tests/queries/0_stateless/04206_ipv6_string_to_num_or_default_bug.reference new file mode 100644 index 000000000000..0c8995e09979 --- /dev/null +++ b/tests/queries/0_stateless/04206_ipv6_string_to_num_or_default_bug.reference @@ -0,0 +1,2 @@ +:: 50 +::ffff:104.30.2.197 50 diff --git a/tests/queries/0_stateless/04206_ipv6_string_to_num_or_default_bug.sql b/tests/queries/0_stateless/04206_ipv6_string_to_num_or_default_bug.sql new file mode 100644 index 000000000000..7b37931f50b3 --- /dev/null +++ b/tests/queries/0_stateless/04206_ipv6_string_to_num_or_default_bug.sql @@ -0,0 +1,6 @@ +-- Verify that IPv6StringToNumOrDefault properly zeroes all 16 bytes on failure. +-- Bug: only one byte was zeroed (vec_res[i] = 0 instead of filling all 16 bytes), +-- causing stale parse data to leak into "default" results. +-- https://github.com/ClickHouse/ClickHouse/pull/93543 + +SELECT IPv6NumToString(IPv6StringToNumOrDefault(if(number % 2 = 0, '::ffff:104.30.2.197', 'invalid'))) AS ip, count() AS c FROM numbers(100) GROUP BY ip ORDER BY ip; From 68d5adc0cb24cf95f7031fa43eaf25bd0819d161 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Mon, 11 May 2026 17:09:22 +0000 Subject: [PATCH 15/41] Backport #103277 to 26.3: Fix data race in dictionaries --- .../ClickHouseDictionarySource.cpp | 6 ++--- src/Dictionaries/ClickHouseDictionarySource.h | 3 ++- src/Dictionaries/InvalidateQueryResponse.cpp | 21 ++++++++++++++++ src/Dictionaries/InvalidateQueryResponse.h | 24 +++++++++++++++++++ src/Dictionaries/MySQLDictionarySource.cpp | 6 +---- src/Dictionaries/MySQLDictionarySource.h | 3 ++- .../PostgreSQLDictionarySource.cpp | 4 +--- src/Dictionaries/PostgreSQLDictionarySource.h | 3 ++- src/Dictionaries/XDBCDictionarySource.cpp | 4 +--- src/Dictionaries/XDBCDictionarySource.h | 3 ++- 10 files changed, 58 insertions(+), 19 deletions(-) create mode 100644 src/Dictionaries/InvalidateQueryResponse.cpp create mode 100644 src/Dictionaries/InvalidateQueryResponse.h diff --git a/src/Dictionaries/ClickHouseDictionarySource.cpp b/src/Dictionaries/ClickHouseDictionarySource.cpp index e37b5704467f..e771691db3fe 100644 --- a/src/Dictionaries/ClickHouseDictionarySource.cpp +++ b/src/Dictionaries/ClickHouseDictionarySource.cpp @@ -145,10 +145,8 @@ bool ClickHouseDictionarySource::isModified() const if (!configuration.invalidate_query.empty()) { auto response = doInvalidateQuery(configuration.invalidate_query); - LOG_TRACE(log, "Invalidate query has returned: {}, previous value: {}", response, invalidate_query_response); - if (invalidate_query_response == response) - return false; - invalidate_query_response = response; + LOG_TRACE(log, "Invalidate query has returned: {}", response); + return invalidate_query_response.updateAndCheckModified(response); } return true; } diff --git a/src/Dictionaries/ClickHouseDictionarySource.h b/src/Dictionaries/ClickHouseDictionarySource.h index 91e4bbad88bc..6b9440c80172 100644 --- a/src/Dictionaries/ClickHouseDictionarySource.h +++ b/src/Dictionaries/ClickHouseDictionarySource.h @@ -8,6 +8,7 @@ #include #include #include +#include namespace DB @@ -82,7 +83,7 @@ class ClickHouseDictionarySource final : public IDictionarySource std::chrono::time_point update_time; const DictionaryStructure dict_struct; const Configuration configuration; - mutable std::string invalidate_query_response; + mutable InvalidateQueryResponse invalidate_query_response; ExternalQueryBuilderPtr query_builder; Block sample_block; ContextMutablePtr context; diff --git a/src/Dictionaries/InvalidateQueryResponse.cpp b/src/Dictionaries/InvalidateQueryResponse.cpp new file mode 100644 index 000000000000..7ed91254673b --- /dev/null +++ b/src/Dictionaries/InvalidateQueryResponse.cpp @@ -0,0 +1,21 @@ +#include + +namespace DB +{ + +InvalidateQueryResponse::InvalidateQueryResponse(const InvalidateQueryResponse & other) +{ + std::lock_guard lock(other.mutex); + response = other.response; +} + +bool InvalidateQueryResponse::updateAndCheckModified(const std::string & new_response) +{ + std::lock_guard lock(mutex); + if (response == new_response) + return false; + response = new_response; + return true; +} + +} diff --git a/src/Dictionaries/InvalidateQueryResponse.h b/src/Dictionaries/InvalidateQueryResponse.h new file mode 100644 index 000000000000..5ad836ea196a --- /dev/null +++ b/src/Dictionaries/InvalidateQueryResponse.h @@ -0,0 +1,24 @@ +#pragma once + +#include +#include + +namespace DB +{ + +/// Thread-safe holder for the last value returned by a dictionary source's invalidate query. +class InvalidateQueryResponse +{ +public: + InvalidateQueryResponse() = default; + InvalidateQueryResponse(const InvalidateQueryResponse & other); + InvalidateQueryResponse & operator=(const InvalidateQueryResponse &) = delete; + + bool updateAndCheckModified(const std::string & new_response); + +private: + mutable std::mutex mutex; + std::string response; +}; + +} diff --git a/src/Dictionaries/MySQLDictionarySource.cpp b/src/Dictionaries/MySQLDictionarySource.cpp index 5ed6947f4617..89fcc53c151a 100644 --- a/src/Dictionaries/MySQLDictionarySource.cpp +++ b/src/Dictionaries/MySQLDictionarySource.cpp @@ -301,11 +301,7 @@ bool MySQLDictionarySource::isModified() const { LOG_TRACE(log, "Executing invalidate query: {}", configuration.invalidate_query); auto response = doInvalidateQuery(configuration.invalidate_query); - if (response == invalidate_query_response) - return false; - - invalidate_query_response = response; - return true; + return invalidate_query_response.updateAndCheckModified(response); } return true; diff --git a/src/Dictionaries/MySQLDictionarySource.h b/src/Dictionaries/MySQLDictionarySource.h index 46f1adaf2e08..ab3a8559904d 100644 --- a/src/Dictionaries/MySQLDictionarySource.h +++ b/src/Dictionaries/MySQLDictionarySource.h @@ -10,6 +10,7 @@ # include # include # include +# include # include namespace Poco @@ -89,7 +90,7 @@ class MySQLDictionarySource final : public IDictionarySource Block sample_block; ExternalQueryBuilder query_builder; const std::string load_all_query; - mutable std::string invalidate_query_response; + mutable InvalidateQueryResponse invalidate_query_response; const StreamSettings settings; }; diff --git a/src/Dictionaries/PostgreSQLDictionarySource.cpp b/src/Dictionaries/PostgreSQLDictionarySource.cpp index 99f5502c8157..8c459d38ced0 100644 --- a/src/Dictionaries/PostgreSQLDictionarySource.cpp +++ b/src/Dictionaries/PostgreSQLDictionarySource.cpp @@ -138,9 +138,7 @@ bool PostgreSQLDictionarySource::isModified() const if (!configuration.invalidate_query.empty()) { auto response = doInvalidateQuery(configuration.invalidate_query); - if (response == invalidate_query_response) - return false; - invalidate_query_response = response; + return invalidate_query_response.updateAndCheckModified(response); } return true; } diff --git a/src/Dictionaries/PostgreSQLDictionarySource.h b/src/Dictionaries/PostgreSQLDictionarySource.h index d390a1b8407f..8e72c3e82ebc 100644 --- a/src/Dictionaries/PostgreSQLDictionarySource.h +++ b/src/Dictionaries/PostgreSQLDictionarySource.h @@ -3,6 +3,7 @@ #include "config.h" #include #include +#include #if USE_LIBPQXX #include @@ -65,7 +66,7 @@ class PostgreSQLDictionarySource final : public IDictionarySource ExternalQueryBuilder query_builder; const std::string load_all_query; std::chrono::time_point update_time; - mutable std::string invalidate_query_response; + mutable InvalidateQueryResponse invalidate_query_response; }; diff --git a/src/Dictionaries/XDBCDictionarySource.cpp b/src/Dictionaries/XDBCDictionarySource.cpp index 0c89f1ae79da..15697d9c86e3 100644 --- a/src/Dictionaries/XDBCDictionarySource.cpp +++ b/src/Dictionaries/XDBCDictionarySource.cpp @@ -193,9 +193,7 @@ bool XDBCDictionarySource::isModified() const if (!configuration.invalidate_query.empty()) { auto response = doInvalidateQuery(configuration.invalidate_query); - if (invalidate_query_response == response) - return false; - invalidate_query_response = response; + return invalidate_query_response.updateAndCheckModified(response); } return true; } diff --git a/src/Dictionaries/XDBCDictionarySource.h b/src/Dictionaries/XDBCDictionarySource.h index 7f09bd4d83cc..7de694e9c715 100644 --- a/src/Dictionaries/XDBCDictionarySource.h +++ b/src/Dictionaries/XDBCDictionarySource.h @@ -7,6 +7,7 @@ #include #include #include +#include namespace Poco @@ -83,7 +84,7 @@ class XDBCDictionarySource final : public IDictionarySource, WithContext Block sample_block; ExternalQueryBuilder query_builder; const std::string load_all_query; - mutable std::string invalidate_query_response; + mutable InvalidateQueryResponse invalidate_query_response; BridgeHelperPtr bridge_helper; Poco::URI bridge_url; From b8baac1bba65cbc45fce9c4b682a50f4d8e19751 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Tue, 12 May 2026 11:01:59 +0000 Subject: [PATCH 16/41] Backport #104065 to 26.3: Refreshable MV: relax sanity checks in SECONDARY_CREATE --- src/Storages/StorageMaterializedView.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Storages/StorageMaterializedView.cpp b/src/Storages/StorageMaterializedView.cpp index 884c39189e01..b1d88c21060f 100644 --- a/src/Storages/StorageMaterializedView.cpp +++ b/src/Storages/StorageMaterializedView.cpp @@ -197,7 +197,7 @@ StorageMaterializedView::StorageMaterializedView( } } - if (mode < LoadingStrictnessLevel::ATTACH && !fixed_uuid) + if (mode < LoadingStrictnessLevel::SECONDARY_CREATE && !fixed_uuid) { /// Sanity-check the table engine. String inner_engine; From 4e2c591126a4dc8b62627e6fcc0151696879c17e Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Tue, 12 May 2026 12:58:41 +0000 Subject: [PATCH 17/41] Backport #104229 to 26.3: Fix int32_t overflow in `lowerUTF8`/`upperUTF8` for large buffers --- src/Functions/LowerUpperUTF8Impl.h | 41 ++++++++++++---- ...04_lower_upper_utf8_large_buffer.reference | 25 ++++++++++ .../04204_lower_upper_utf8_large_buffer.sql | 47 +++++++++++++++++++ 3 files changed, 105 insertions(+), 8 deletions(-) create mode 100644 tests/queries/0_stateless/04204_lower_upper_utf8_large_buffer.reference create mode 100644 tests/queries/0_stateless/04204_lower_upper_utf8_large_buffer.sql diff --git a/src/Functions/LowerUpperUTF8Impl.h b/src/Functions/LowerUpperUTF8Impl.h index 6c15a472c180..2a7ae337e393 100644 --- a/src/Functions/LowerUpperUTF8Impl.h +++ b/src/Functions/LowerUpperUTF8Impl.h @@ -13,6 +13,8 @@ # include # include +# include + namespace DB { @@ -61,22 +63,43 @@ struct LowerUpperUTF8Impl const auto * src = reinterpret_cast(&data[offsets[row_i - 1]]); size_t src_size = offsets[row_i] - offsets[row_i - 1]; + /// ICU APIs accept `int32_t` for buffer sizes and return the required output + /// length as `int32_t` on `U_BUFFER_OVERFLOW_ERROR`. Unicode full case mapping + /// (Unicode `SpecialCasing.txt`, e.g. `U+0390` maps to 3 code points / 6 bytes + /// from a 2-byte input) expands UTF-8 output by at most 3x. Reject inputs + /// whose worst-case case-mapped output could exceed `INT32_MAX` — the retry + /// path could otherwise receive an overflowed `dst_size` and corrupt `res_data`. + if (static_cast(src_size) * 3 > INT32_MAX) + throw Exception( + ErrorCodes::BAD_ARGUMENTS, + "String size {} exceeds the maximum supported length for {}: " + "case mapping could produce output larger than the 2 GiB ICU API limit", + src_size, + upper ? "upperUTF8" : "lowerUTF8"); + + /// `res_data` accumulates output for all rows and may exceed `INT32_MAX`. Cap + /// the destination capacity passed to ICU; the `U_BUFFER_OVERFLOW_ERROR` retry + /// path enlarges `res_data` to fit and the guard above keeps the per-row + /// requested length representable as `int32_t`. + auto safe_dest_capacity = static_cast(std::min(res_data.size() - curr_offset, INT32_MAX)); + auto safe_src_size = static_cast(src_size); + int32_t dst_size; if constexpr (upper) dst_size = ucasemap_utf8ToUpper( case_map, reinterpret_cast(&res_data[curr_offset]), - static_cast(res_data.size() - curr_offset), + safe_dest_capacity, src, - static_cast(src_size), + safe_src_size, &error_code); else dst_size = ucasemap_utf8ToLower( case_map, reinterpret_cast(&res_data[curr_offset]), - static_cast(res_data.size() - curr_offset), + safe_dest_capacity, src, - static_cast(src_size), + safe_src_size, &error_code); if (error_code == U_BUFFER_OVERFLOW_ERROR) @@ -84,22 +107,24 @@ struct LowerUpperUTF8Impl size_t new_size = curr_offset + dst_size; res_data.resize(new_size); + safe_dest_capacity = static_cast(std::min(res_data.size() - curr_offset, INT32_MAX)); + error_code = U_ZERO_ERROR; if constexpr (upper) dst_size = ucasemap_utf8ToUpper( case_map, reinterpret_cast(&res_data[curr_offset]), - static_cast(res_data.size() - curr_offset), + safe_dest_capacity, src, - static_cast(src_size), + safe_src_size, &error_code); else dst_size = ucasemap_utf8ToLower( case_map, reinterpret_cast(&res_data[curr_offset]), - static_cast(res_data.size() - curr_offset), + safe_dest_capacity, src, - static_cast(src_size), + safe_src_size, &error_code); } diff --git a/tests/queries/0_stateless/04204_lower_upper_utf8_large_buffer.reference b/tests/queries/0_stateless/04204_lower_upper_utf8_large_buffer.reference new file mode 100644 index 000000000000..c4613bbc133b --- /dev/null +++ b/tests/queries/0_stateless/04204_lower_upper_utf8_large_buffer.reference @@ -0,0 +1,25 @@ +münchen +MÜNCHEN +привет мир +ПРИВЕТ МИР +北京hello +北京HELLO +ähren +über +straße +naïve +café +ñoño +ÄHREN +ÜBER +STRASSE +NAÏVE +CAFÉ +ÑOÑO +ü +Ü +α 2 Α 2 +ʼn 2 ʼN 3 +և 2 ԵՒ 4 +ΐ 2 Ϊ́ 6 +ΰ 2 Ϋ́ 6 diff --git a/tests/queries/0_stateless/04204_lower_upper_utf8_large_buffer.sql b/tests/queries/0_stateless/04204_lower_upper_utf8_large_buffer.sql new file mode 100644 index 000000000000..876d9f6cfa1a --- /dev/null +++ b/tests/queries/0_stateless/04204_lower_upper_utf8_large_buffer.sql @@ -0,0 +1,47 @@ +-- Tags: no-fasttest +-- Reason: lowerUTF8 / upperUTF8 are only available with ICU support, which is disabled in the fast-test build. + +-- Test that `lowerUTF8`/`upperUTF8` handle the ICU code path without int32_t overflow. +-- The root cause was that destCapacity passed to ICU's `ucasemap_utf8ToLower/Upper` +-- overflowed int32_t when the accumulated output buffer exceeded ~2 GB, causing +-- U_ILLEGAL_ARGUMENT_ERROR. We cannot allocate 2 GB in a stateless test, but we +-- exercise the ICU branch with non-ASCII inputs (which bypass the all-ASCII fast path) +-- so any future regression in the wrapping/casting logic surfaces as a wrong result +-- or exception here. + +-- Basic non-ASCII correctness +SELECT lowerUTF8('MÜNCHEN'); +SELECT upperUTF8('münchen'); +SELECT lowerUTF8('ПРИВЕТ МИР'); +SELECT upperUTF8('привет мир'); +SELECT lowerUTF8('北京HELLO'); +SELECT upperUTF8('北京hello'); + +-- Batch of non-ASCII rows to exercise the per-row loop +SELECT lowerUTF8(s) FROM ( + SELECT arrayJoin(['ÄHREN', 'ÜBER', 'STRAẞE', 'NAÏVE', 'CAFÉ', 'ÑOÑO']) AS s +); + +SELECT upperUTF8(s) FROM ( + SELECT arrayJoin(['ähren', 'über', 'straße', 'naïve', 'café', 'ñoño']) AS s +); + +-- Single-character non-ASCII edge cases +SELECT lowerUTF8('Ü'); +SELECT upperUTF8('ü'); + +-- UTF-8 byte-expansion ratios under full case mapping (Unicode `SpecialCasing.txt`). +-- Each row prints input, input bytes, upper-cased output, output bytes. The 3.0x rows +-- are the worst case and the binding constraint behind the BAD_ARGUMENTS guard: +-- a single-row input > INT32_MAX / 3 ≈ 715 MiB of these would produce > INT32_MAX bytes, +-- which ICU cannot return as int32_t. +-- 1.0x: α (U+03B1) -> Α (U+0391): 2 bytes -> 2 bytes +-- 1.5x: ʼn (U+0149) -> ʼN (U+02BC U+004E): 2 bytes -> 3 bytes +-- 2.0x: և (U+0587) -> ԵՒ (U+0535 U+0552): 2 bytes -> 4 bytes +-- 3.0x: ΐ (U+0390) -> Ϊ́ (U+0399 U+0308 U+0301): 2 bytes -> 6 bytes +-- 3.0x: ΰ (U+03B0) -> Ϋ́ (U+03A5 U+0308 U+0301): 2 bytes -> 6 bytes +SELECT 'α' AS input, length('α') AS in_bytes, upperUTF8('α') AS upper, length(upperUTF8('α')) AS out_bytes; +SELECT 'ʼn' AS input, length('ʼn') AS in_bytes, upperUTF8('ʼn') AS upper, length(upperUTF8('ʼn')) AS out_bytes; +SELECT 'և' AS input, length('և') AS in_bytes, upperUTF8('և') AS upper, length(upperUTF8('և')) AS out_bytes; +SELECT 'ΐ' AS input, length('ΐ') AS in_bytes, upperUTF8('ΐ') AS upper, length(upperUTF8('ΐ')) AS out_bytes; +SELECT 'ΰ' AS input, length('ΰ') AS in_bytes, upperUTF8('ΰ') AS upper, length(upperUTF8('ΰ')) AS out_bytes; From c72d063dc7f61dbd9dda47706e588964e0bc14e0 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Tue, 12 May 2026 17:01:50 +0000 Subject: [PATCH 18/41] Backport #104678 to 26.3: Revert "Fix data part check and consistency check for tables with JSON column" --- .../MergeTree/MergeTreeDataPartWide.cpp | 185 +++++------------- src/Storages/MergeTree/checkDataPart.cpp | 73 ++----- .../04109_check_table_json_wide.reference | 8 - .../04109_check_table_json_wide.sql | 27 --- 4 files changed, 64 insertions(+), 229 deletions(-) delete mode 100644 tests/queries/0_stateless/04109_check_table_json_wide.reference delete mode 100644 tests/queries/0_stateless/04109_check_table_json_wide.sql diff --git a/src/Storages/MergeTree/MergeTreeDataPartWide.cpp b/src/Storages/MergeTree/MergeTreeDataPartWide.cpp index 44f62e46b48e..915fc4250e69 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartWide.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartWide.cpp @@ -345,77 +345,31 @@ void MergeTreeDataPartWide::doCheckConsistency(bool require_part_metadata) const { if (require_part_metadata) { - const auto & cols_substreams = getColumnsSubstreams(); - if (!cols_substreams.empty()) - { - /// Use columns_substreams.txt which contains the exact list of substream - /// file names written at part creation time. This is more reliable than - /// enumerateStreams for types with complex serialization (e.g. JSON) - /// where enumerateStreams needs deserialization state to enumerate - /// the correct streams. - size_t col_idx = 0; - for (const auto & name_type : columns) - { - const auto & substreams = cols_substreams.getColumnSubstreams(col_idx); - for (const auto & substream_name : substreams) - { - auto bin_file_name = getStreamNameOrHash(substream_name, DATA_FILE_EXTENSION, checksums); - if (!bin_file_name) - throw Exception( - ErrorCodes::NO_FILE_IN_DATA_PART, - "No stream ({}{}) file checksum for column {} in part {}", - substream_name, - DATA_FILE_EXTENSION, - name_type.name, - getDataPartStorage().getFullPath()); - - auto mrk_file_name = *bin_file_name + marks_file_extension; - if (!checksums.files.contains(mrk_file_name)) - throw Exception( - ErrorCodes::NO_FILE_IN_DATA_PART, - "No {} file checksum for column {} in part {} ", - mrk_file_name, name_type.name, getDataPartStorage().getFullPath()); - } - ++col_idx; - } - } - else + for (const auto & name_type : columns) { - /// Fallback for old parts without columns_substreams.txt. - /// Disable enumerate_dynamic_streams because without deserialization state - /// we don't know the correct serialization version for types like JSON, - /// and enumerating dynamic streams with wrong defaults would produce - /// incorrect stream names leading to false positive errors. - for (const auto & name_type : columns) + getSerialization(name_type.name)->enumerateStreams([&](const ISerialization::SubstreamPath & substream_path) { - auto serialization = getSerialization(name_type.name); - ISerialization::EnumerateStreamsSettings settings; - settings.enumerate_dynamic_streams = false; - auto data = ISerialization::SubstreamData(serialization).withType(name_type.type).withColumn(name_type.type->createColumn()); - serialization->enumerateStreams(settings, [&](const ISerialization::SubstreamPath & substream_path) - { - /// Skip ephemeral subcolumns that don't store any real data. - if (ISerialization::isEphemeralSubcolumn(substream_path, substream_path.size())) - return; - - auto stream_name = getStreamNameForColumn(name_type, substream_path, DATA_FILE_EXTENSION, checksums, storage.getSettings()); - if (!stream_name) - throw Exception( - ErrorCodes::NO_FILE_IN_DATA_PART, - "No stream ({}{}) file checksum for column {} in part {}", - ISerialization::getFileNameForStream(name_type, substream_path, ISerialization::StreamFileNameSettings(*storage.getSettings())), - DATA_FILE_EXTENSION, - name_type.name, - getDataPartStorage().getFullPath()); - - auto mrk_file_name = *stream_name + marks_file_extension; - if (!checksums.files.contains(mrk_file_name)) - throw Exception( - ErrorCodes::NO_FILE_IN_DATA_PART, - "No {} file checksum for column {} in part {} ", - mrk_file_name, name_type.name, getDataPartStorage().getFullPath()); - }, data); - } + /// Skip ephemeral subcolumns that don't store any real data. + if (ISerialization::isEphemeralSubcolumn(substream_path, substream_path.size())) + return; + + auto stream_name = getStreamNameForColumn(name_type, substream_path, DATA_FILE_EXTENSION, checksums, storage.getSettings()); + if (!stream_name) + throw Exception( + ErrorCodes::NO_FILE_IN_DATA_PART, + "No stream ({}{}) file checksum for column {} in part {}", + ISerialization::getFileNameForStream(name_type, substream_path, ISerialization::StreamFileNameSettings(*storage.getSettings())), + DATA_FILE_EXTENSION, + name_type.name, + getDataPartStorage().getFullPath()); + + auto mrk_file_name = *stream_name + marks_file_extension; + if (!checksums.files.contains(mrk_file_name)) + throw Exception( + ErrorCodes::NO_FILE_IN_DATA_PART, + "No {} file checksum for column {} in part {} ", + mrk_file_name, name_type.name, getDataPartStorage().getFullPath()); + }); } } } @@ -423,76 +377,33 @@ void MergeTreeDataPartWide::doCheckConsistency(bool require_part_metadata) const { /// Check that all marks are nonempty and have the same size. std::optional marks_size; - - const auto & cols_substreams = getColumnsSubstreams(); - if (!cols_substreams.empty()) + for (const auto & name_type : columns) { - for (size_t col_idx = 0; col_idx != columns.size(); ++col_idx) + getSerialization(name_type.name)->enumerateStreams([&](const ISerialization::SubstreamPath & substream_path) { - const auto & substreams = cols_substreams.getColumnSubstreams(col_idx); - for (const auto & substream_name : substreams) - { - auto stream_name = getStreamNameOrHash(substream_name, marks_file_extension, getDataPartStorage()); - - /// Missing file is Ok for case when new column was added. - if (!stream_name) - continue; - - auto file_path = *stream_name + marks_file_extension; - UInt64 file_size = getDataPartStorage().getFileSize(file_path); - - if (!file_size) - throw Exception( - ErrorCodes::BAD_SIZE_OF_FILE_IN_DATA_PART, - "Part {} is broken: {} is empty.", - getDataPartStorage().getFullPath(), - std::string(fs::path(getDataPartStorage().getFullPath()) / file_path)); - - if (!marks_size) - marks_size = file_size; - else if (file_size != *marks_size) - throw Exception( - ErrorCodes::BAD_SIZE_OF_FILE_IN_DATA_PART, - "Part {} is broken: marks have different sizes.", getDataPartStorage().getFullPath()); - } - } - } - else - { - /// Fallback for old parts without columns_substreams.txt. - /// Disable enumerate_dynamic_streams (see comment above). - for (const auto & name_type : columns) - { - auto serialization = getSerialization(name_type.name); - ISerialization::EnumerateStreamsSettings settings; - settings.enumerate_dynamic_streams = false; - auto data = ISerialization::SubstreamData(serialization).withType(name_type.type).withColumn(name_type.type->createColumn()); - serialization->enumerateStreams(settings, [&](const ISerialization::SubstreamPath & substream_path) - { - auto stream_name = getStreamNameForColumn(name_type, substream_path, marks_file_extension, getDataPartStorage(), storage.getSettings()); - - /// Missing file is Ok for case when new column was added. - if (!stream_name) - return; - - auto file_path = *stream_name + marks_file_extension; - UInt64 file_size = getDataPartStorage().getFileSize(file_path); - - if (!file_size) - throw Exception( - ErrorCodes::BAD_SIZE_OF_FILE_IN_DATA_PART, - "Part {} is broken: {} is empty.", - getDataPartStorage().getFullPath(), - std::string(fs::path(getDataPartStorage().getFullPath()) / file_path)); - - if (!marks_size) - marks_size = file_size; - else if (file_size != *marks_size) - throw Exception( - ErrorCodes::BAD_SIZE_OF_FILE_IN_DATA_PART, - "Part {} is broken: marks have different sizes.", getDataPartStorage().getFullPath()); - }, data); - } + auto stream_name = getStreamNameForColumn(name_type, substream_path, marks_file_extension, getDataPartStorage(), storage.getSettings()); + + /// Missing file is Ok for case when new column was added. + if (!stream_name) + return; + + auto file_path = *stream_name + marks_file_extension; + UInt64 file_size = getDataPartStorage().getFileSize(file_path); + + if (!file_size) + throw Exception( + ErrorCodes::BAD_SIZE_OF_FILE_IN_DATA_PART, + "Part {} is broken: {} is empty.", + getDataPartStorage().getFullPath(), + std::string(fs::path(getDataPartStorage().getFullPath()) / file_path)); + + if (!marks_size) + marks_size = file_size; + else if (file_size != *marks_size) + throw Exception( + ErrorCodes::BAD_SIZE_OF_FILE_IN_DATA_PART, + "Part {} is broken: marks have different sizes.", getDataPartStorage().getFullPath()); + }); } } } diff --git a/src/Storages/MergeTree/checkDataPart.cpp b/src/Storages/MergeTree/checkDataPart.cpp index 1e0ce8c8a09e..e11fe4abcc8e 100644 --- a/src/Storages/MergeTree/checkDataPart.cpp +++ b/src/Storages/MergeTree/checkDataPart.cpp @@ -245,65 +245,24 @@ static IMergeTreeDataPart::Checksums checkDataPart( } else if (part_type == MergeTreeDataPartType::Wide) { - const auto & columns_substreams = data_part->getColumnsSubstreams(); - if (!columns_substreams.empty()) + for (const auto & column : columns_list) { - /// Use columns_substreams.txt which contains the exact list of substream - /// file names written at part creation time. This is more reliable than - /// enumerateStreams for types with complex serialization (e.g. JSON) - /// where enumerateStreams needs deserialization state to enumerate - /// the correct streams. - size_t col_idx = 0; - for (const auto & column : columns_list) + get_serialization(column)->enumerateStreams([&](const ISerialization::SubstreamPath & substream_path) { - const auto & substreams = columns_substreams.getColumnSubstreams(col_idx); - for (const auto & substream_name : substreams) - { - auto stream_name = IMergeTreeDataPart::getStreamNameOrHash(substream_name, ".bin", data_part_storage); - - if (!stream_name) - throw Exception(ErrorCodes::NO_FILE_IN_DATA_PART, - "There is no file for column '{}' (substream '{}') in data part '{}'", - column.name, substream_name, data_part->name); - - auto file_name = *stream_name + ".bin"; - checksums_data.files[file_name] = checksum_compressed_file(data_part_storage, file_name); - } - ++col_idx; - } - } - else - { - /// Fallback for old parts without columns_substreams.txt. - /// Disable enumerate_dynamic_streams because without deserialization state - /// we don't know the correct dynamic structure and serialization version for types like JSON, - /// and enumerating dynamic streams with wrong defaults would produce - /// incorrect stream names leading to false positive errors. - /// The files for dynamic streams will still be checked against checksums.txt - /// by the subsequent iteration over all files in the part directory. - for (const auto & column : columns_list) - { - auto serialization = get_serialization(column); - ISerialization::EnumerateStreamsSettings settings; - settings.enumerate_dynamic_streams = false; - auto data = ISerialization::SubstreamData(serialization).withType(column.type).withColumn(column.type->createColumn()); - serialization->enumerateStreams(settings, [&](const ISerialization::SubstreamPath & substream_path) - { - /// Skip ephemeral subcolumns that don't store any real data. - if (ISerialization::isEphemeralSubcolumn(substream_path, substream_path.size())) - return; - - auto stream_name = IMergeTreeDataPart::getStreamNameForColumn(column, substream_path, ".bin", data_part_storage, data_part->storage.getSettings()); - - if (!stream_name) - throw Exception(ErrorCodes::NO_FILE_IN_DATA_PART, - "There is no file for column '{}' in data part '{}'", - column.name, data_part->name); - - auto file_name = *stream_name + ".bin"; - checksums_data.files[file_name] = checksum_compressed_file(data_part_storage, file_name); - }, data); - } + /// Skip ephemeral subcolumns that don't store any real data. + if (ISerialization::isEphemeralSubcolumn(substream_path, substream_path.size())) + return; + + auto stream_name = IMergeTreeDataPart::getStreamNameForColumn(column, substream_path, ".bin", data_part_storage, data_part->storage.getSettings()); + + if (!stream_name) + throw Exception(ErrorCodes::NO_FILE_IN_DATA_PART, + "There is no file for column '{}' in data part '{}'", + column.name, data_part->name); + + auto file_name = *stream_name + ".bin"; + checksums_data.files[file_name] = checksum_compressed_file(data_part_storage, file_name); + }, column.type, data_part->getColumnSample(column)); } } else diff --git a/tests/queries/0_stateless/04109_check_table_json_wide.reference b/tests/queries/0_stateless/04109_check_table_json_wide.reference deleted file mode 100644 index 5439f6a8092b..000000000000 --- a/tests/queries/0_stateless/04109_check_table_json_wide.reference +++ /dev/null @@ -1,8 +0,0 @@ -1 -1 -1 1 -2 2 -3 3 -1 1 -2 2 -3 3 diff --git a/tests/queries/0_stateless/04109_check_table_json_wide.sql b/tests/queries/0_stateless/04109_check_table_json_wide.sql deleted file mode 100644 index 1abd7094d17f..000000000000 --- a/tests/queries/0_stateless/04109_check_table_json_wide.sql +++ /dev/null @@ -1,27 +0,0 @@ --- Tags: no-fasttest - -DROP TABLE IF EXISTS test_check_json_wide; - -CREATE TABLE test_check_json_wide (id UInt64, data JSON) -ENGINE = MergeTree ORDER BY id -SETTINGS min_rows_for_wide_part=1, min_bytes_for_wide_part=1; - -INSERT INTO test_check_json_wide VALUES (1, '{"a": 1, "b": "hello"}'); -INSERT INTO test_check_json_wide VALUES (2, '{"a": 2, "c": [1, 2, 3]}'); -INSERT INTO test_check_json_wide VALUES (3, '{"a": 3, "b": "world", "d": [{"nested": true}]}'); - -CHECK TABLE test_check_json_wide SETTINGS check_query_single_value_result = 1; - -OPTIMIZE TABLE test_check_json_wide FINAL; - -CHECK TABLE test_check_json_wide SETTINGS check_query_single_value_result = 1; - -SELECT id, data.a FROM test_check_json_wide ORDER BY id; - --- Test that DETACH/ATTACH works (checkConsistency is called during attach). -ALTER TABLE test_check_json_wide DETACH PARTITION tuple(); -ALTER TABLE test_check_json_wide ATTACH PARTITION tuple(); - -SELECT id, data.a FROM test_check_json_wide ORDER BY id; - -DROP TABLE test_check_json_wide; From f68d64b9b80b3c5f53a0f12c311fe0189f880901 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Tue, 12 May 2026 20:48:02 +0000 Subject: [PATCH 19/41] Backport #104322 to 26.3: fixing a possible underflow while parsing postgres array values --- src/Core/PostgreSQL/insertPostgreSQLValue.cpp | 3 + .../tests/gtest_insertPostgreSQLValue.cpp | 96 +++++++++++++++++++ .../test_storage_postgresql/test.py | 43 +++++++++ 3 files changed, 142 insertions(+) create mode 100644 src/Core/tests/gtest_insertPostgreSQLValue.cpp diff --git a/src/Core/PostgreSQL/insertPostgreSQLValue.cpp b/src/Core/PostgreSQL/insertPostgreSQLValue.cpp index da6d6597cb5b..84b666989c15 100644 --- a/src/Core/PostgreSQL/insertPostgreSQLValue.cpp +++ b/src/Core/PostgreSQL/insertPostgreSQLValue.cpp @@ -149,6 +149,9 @@ void insertPostgreSQLValue( { max_dimension = std::max(max_dimension, dimension); + if (dimension == 0) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unexpected array closing bracket"); + --dimension; if (dimension == 0) break; diff --git a/src/Core/tests/gtest_insertPostgreSQLValue.cpp b/src/Core/tests/gtest_insertPostgreSQLValue.cpp new file mode 100644 index 000000000000..f37e64b43c52 --- /dev/null +++ b/src/Core/tests/gtest_insertPostgreSQLValue.cpp @@ -0,0 +1,96 @@ +#include "config.h" + +#if USE_LIBPQXX + +#include + +#include +#include +#include +#include +#include +#include + + +using namespace DB; + +namespace DB::ErrorCodes +{ + extern const int BAD_ARGUMENTS; +} + +/// Regression test for dimension underflow in PostgreSQL array parser. +/// When pqxx::array_parser emits row_end before any row_start (e.g. malformed +/// input starting with '}'), the dimension counter must not underflow from 0. +/// See https://github.com/ClickHouse/clickhouse-core-incidents/issues/1693 + +TEST(InsertPostgreSQLValue, MalformedArrayClosingBracketThrows) +{ + auto nested_type = std::make_shared(); + auto array_type = std::make_shared(nested_type); + auto column = ColumnArray::create(ColumnInt32::create()); + + std::unordered_map array_info; + preparePostgreSQLArrayInfo(array_info, 0, array_type); + + /// Input "}" causes row_end at dimension 0 — must throw BAD_ARGUMENTS, + /// not underflow size_t to SIZE_MAX and crash. + try + { + insertPostgreSQLValue( + *column, "}", + ExternalResultDescription::ValueType::vtArray, + array_type, array_info, 0); + FAIL() << "Expected BAD_ARGUMENTS exception for malformed array '}'"; + } + catch (const Exception & e) + { + EXPECT_EQ(e.code(), ErrorCodes::BAD_ARGUMENTS); + } +} + +TEST(InsertPostgreSQLValue, MalformedArrayClosingThenOpeningThrows) +{ + auto nested_type = std::make_shared(); + auto array_type = std::make_shared(nested_type); + auto column = ColumnArray::create(ColumnInt32::create()); + + std::unordered_map array_info; + preparePostgreSQLArrayInfo(array_info, 0, array_type); + + /// Input "}{" also starts with row_end at dimension 0. + try + { + insertPostgreSQLValue( + *column, "}{", + ExternalResultDescription::ValueType::vtArray, + array_type, array_info, 0); + FAIL() << "Expected BAD_ARGUMENTS exception for malformed array '}{'" ; + } + catch (const Exception & e) + { + EXPECT_EQ(e.code(), ErrorCodes::BAD_ARGUMENTS); + } +} + +TEST(InsertPostgreSQLValue, WellFormedArraySucceeds) +{ + auto nested_type = std::make_shared(); + auto array_type = std::make_shared(nested_type); + auto column = ColumnArray::create(ColumnInt32::create()); + + std::unordered_map array_info; + preparePostgreSQLArrayInfo(array_info, 0, array_type); + + /// Well-formed "{1,2,3}" must succeed without exceptions. + EXPECT_NO_THROW( + insertPostgreSQLValue( + *column, "{1,2,3}", + ExternalResultDescription::ValueType::vtArray, + array_type, array_info, 0)); + + /// Verify the column now has one row with 3 elements. + ASSERT_EQ(column->size(), 1u); +} + +#endif diff --git a/tests/integration/test_storage_postgresql/test.py b/tests/integration/test_storage_postgresql/test.py index fb89d7c5e7b3..b60c3bded396 100644 --- a/tests/integration/test_storage_postgresql/test.py +++ b/tests/integration/test_storage_postgresql/test.py @@ -1011,6 +1011,49 @@ def test_postgres_date32_array(started_cluster): cursor.execute("DROP TABLE test_date32_array") +def test_postgres_array_parser_dimension_underflow(started_cluster): + """Regression test for `size_t` underflow in the PostgreSQL array parser. + + When `pqxx::array_parser` emits `row_end` while the parser's `dimension` + counter is 0 (for example, an array text starting with `}`), the previous + code decremented `dimension` past 0 — a `size_t` underflow to `SIZE_MAX` — + and then indexed `dimensions[SIZE_MAX]`, which is out-of-bounds. The fix + throws a `BAD_ARGUMENTS` exception in this case instead. + + PostgreSQL itself validates array literals at INSERT time, so the bug is + unreachable via a column declared as `boolean[]`/`integer[]` on the + PostgreSQL side. The reproducer below stores the malformed payload in a + PostgreSQL `text` column and declares the same column as `Array(Int32)` on + the ClickHouse side via the `PostgreSQL` table engine. ClickHouse then + dispatches the raw `'}'` value through the `vtArray` branch of + `insertPostgreSQLValue`, which calls `pqxx::array_parser` on it and + reproduces the bug. + """ + cursor = started_cluster.postgres_conn.cursor() + cursor.execute("DROP TABLE IF EXISTS test_array_underflow") + cursor.execute( + "CREATE TABLE test_array_underflow (id integer, payload text)" + ) + cursor.execute("INSERT INTO test_array_underflow VALUES (1, '}')") + started_cluster.postgres_conn.commit() + + node1.query("DROP TABLE IF EXISTS pg_array_underflow") + node1.query( + f"CREATE TABLE pg_array_underflow (id Int32, payload Array(Int32)) " + f"ENGINE = PostgreSQL(" + f"'{started_cluster.postgres_ip}:{started_cluster.postgres_port}', " + f"'postgres', 'test_array_underflow', 'postgres', '{pg_pass}')" + ) + + error = node1.query_and_get_error("SELECT id, payload FROM pg_array_underflow") + assert "Unexpected array closing bracket" in error, ( + f"Expected BAD_ARGUMENTS('Unexpected array closing bracket'), got: {error}" + ) + + node1.query("DROP TABLE pg_array_underflow") + cursor.execute("DROP TABLE test_array_underflow") + + if __name__ == "__main__": cluster.start() input("Cluster created, press any key to destroy...") From 31d98474457796d740b26b6d277e7f941fef13f6 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Wed, 13 May 2026 13:02:47 +0000 Subject: [PATCH 20/41] Backport #104751 to 26.3: Fix use-after-free in `AvroConfluentRowInputFormat` --- .../Formats/Impl/AvroRowInputFormat.cpp | 2 +- .../Formats/Impl/AvroRowInputFormat.h | 6 +++ .../test_format_avro_confluent/test.py | 41 +++++++++++++++++++ 3 files changed, 48 insertions(+), 1 deletion(-) diff --git a/src/Processors/Formats/Impl/AvroRowInputFormat.cpp b/src/Processors/Formats/Impl/AvroRowInputFormat.cpp index fd31a1d90913..2f40576da654 100644 --- a/src/Processors/Formats/Impl/AvroRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/AvroRowInputFormat.cpp @@ -1260,7 +1260,7 @@ const AvroDeserializer & AvroConfluentRowInputFormat::getOrCreateDeserializer(Sc auto schema = schema_registry->getSchema(schema_id); AvroDeserializer deserializer( output.getHeader(), schema, format_settings.avro.allow_missing_fields, format_settings.null_as_default, format_settings); - it = deserializer_cache.emplace(schema_id, deserializer).first; + it = deserializer_cache.emplace(schema_id, std::move(deserializer)).first; } return it->second; } diff --git a/src/Processors/Formats/Impl/AvroRowInputFormat.h b/src/Processors/Formats/Impl/AvroRowInputFormat.h index 81f25c36ca05..b9acc525676a 100644 --- a/src/Processors/Formats/Impl/AvroRowInputFormat.h +++ b/src/Processors/Formats/Impl/AvroRowInputFormat.h @@ -51,6 +51,12 @@ class AvroDeserializer public: AvroDeserializer(const Block & header, avro::ValidSchema schema, bool allow_missing_fields, bool null_as_default_, const FormatSettings & settings_); AvroDeserializer(DataTypePtr data_type, const std::string & column_name, avro::ValidSchema schema, bool allow_missing_fields, bool null_as_default_, const FormatSettings & settings_); + + AvroDeserializer(const AvroDeserializer &) = delete; + AvroDeserializer & operator=(const AvroDeserializer &) = delete; + AvroDeserializer(AvroDeserializer &&) = default; + AvroDeserializer & operator=(AvroDeserializer &&) = delete; + void deserializeRow(MutableColumns & columns, avro::Decoder & decoder, RowReadExtension & ext) const; using DeserializeFn = std::function; diff --git a/tests/integration/test_format_avro_confluent/test.py b/tests/integration/test_format_avro_confluent/test.py index b6d49c60c01f..587498fa5dad 100644 --- a/tests/integration/test_format_avro_confluent/test.py +++ b/tests/integration/test_format_avro_confluent/test.py @@ -85,6 +85,47 @@ def test_select(started_cluster): ] +def test_select_skip_symbolic(started_cluster): + # type: (ClickHouseCluster) -> None + + reg_url = "http://localhost:{}".format(started_cluster.schema_registry_port) + schema_registry_client = CachedSchemaRegistryClient({"url": reg_url}) + serializer = MessageSerializer(schema_registry_client) + + schema = avro.schema.make_avsc_object( + { + "name": "Node", + "type": "record", + "fields": [ + {"name": "value", "type": "long"}, + {"name": "next", "type": ["null", "Node"]}, + ], + } + ) + + record = {"value": 0, "next": {"value": 1, "next": {"value": 2, "next": None}}} + data = serializer.encode_record_with_schema( + "test_subject_skip_symbolic", schema, record + ) + + instance = started_cluster.instances["dummy"] # type: ClickHouseInstance + schema_registry_url = "http://{}:{}".format( + started_cluster.schema_registry_host, started_cluster.schema_registry_port + ) + settings = {"format_avro_schema_registry_url": schema_registry_url} + run_query( + instance, + "create table avro_data_skip_symbolic(value Int64) engine = Memory()", + ) + run_query( + instance, + "insert into avro_data_skip_symbolic format AvroConfluent", + data, + settings, + ) + assert run_query(instance, "select value from avro_data_skip_symbolic").strip() == "0" + + def test_select_auth(started_cluster): # type: (ClickHouseCluster) -> None From 965bab70352c23591672c2d789ba2d2853279aed Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Thu, 14 May 2026 11:49:33 +0000 Subject: [PATCH 21/41] Backport #101484 to 26.3: Add missing Keeper component tracking guard to no-arg loadStatistics --- src/Storages/MergeTree/IMergeTreeDataPart.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.cpp b/src/Storages/MergeTree/IMergeTreeDataPart.cpp index 834f88941093..4ac9e406e56a 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPart.cpp +++ b/src/Storages/MergeTree/IMergeTreeDataPart.cpp @@ -1088,6 +1088,8 @@ ColumnsStatistics IMergeTreeDataPart::loadStatisticsWide(const NameSet & require ColumnsStatistics IMergeTreeDataPart::loadStatistics() const { + auto component_guard = Coordination::setCurrentComponent("IMergeTreeDataPart::loadStatistics"); + if (auto * reader = getStatisticsPackedReader()) return loadStatisticsPacked(*reader, {}); From 0fb5eeba6e348b07cc3c9c433242c6005af4281d Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Thu, 14 May 2026 16:05:31 +0000 Subject: [PATCH 22/41] Backport #104673 to 26.3: Fix data race in FutureSetFromTuple --- src/Interpreters/PreparedSets.cpp | 18 ++++++++---------- src/Interpreters/PreparedSets.h | 4 ++++ 2 files changed, 12 insertions(+), 10 deletions(-) diff --git a/src/Interpreters/PreparedSets.cpp b/src/Interpreters/PreparedSets.cpp index ae777d5308d6..932fd6f312e0 100644 --- a/src/Interpreters/PreparedSets.cpp +++ b/src/Interpreters/PreparedSets.cpp @@ -114,30 +114,28 @@ FutureSetFromTuple::FutureSetFromTuple( DataTypes FutureSetFromTuple::getTypes() const { return set->getElementsTypes(); } FutureSet::Hash FutureSetFromTuple::getHash() const { return hash; } -Columns FutureSetFromTuple::getKeyColumns() +void FutureSetFromTuple::fillSetElementsOnce() { - if (!set->hasExplicitSetElements()) + callOnce(fill_set_elements_once, [this] { set->fillSetElements(); set->appendSetElements(set_key_columns); - } + }); +} +Columns FutureSetFromTuple::getKeyColumns() +{ + fillSetElementsOnce(); return set->getSetElements(); } SetPtr FutureSetFromTuple::buildOrderedSetInplace(const ContextPtr & context) { - if (set->hasExplicitSetElements()) - return set; - const auto & settings = context->getSettingsRef(); size_t max_values = settings[Setting::use_index_for_in_with_subqueries_max_values]; bool too_many_values = max_values && max_values < set->getTotalRowCount(); if (!too_many_values) - { - set->fillSetElements(); - set->appendSetElements(set_key_columns); - } + fillSetElementsOnce(); return set; } diff --git a/src/Interpreters/PreparedSets.h b/src/Interpreters/PreparedSets.h index 85ecc55a840f..e60ba05d0044 100644 --- a/src/Interpreters/PreparedSets.h +++ b/src/Interpreters/PreparedSets.h @@ -7,6 +7,7 @@ #include #include #include +#include #include #include #include @@ -105,10 +106,13 @@ class FutureSetFromTuple final : public FutureSet ASTPtr getSourceAST() const override { return ast; } Columns getKeyColumns(); private: + void fillSetElementsOnce(); + Hash hash; ASTPtr ast; SetPtr set; SetKeyColumns set_key_columns; + OnceFlag fill_set_elements_once; }; using FutureSetFromTuplePtr = std::shared_ptr; From 54ccc452d2758247b11d0ffffefaa54a1faeac15 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Fri, 15 May 2026 11:02:07 +0000 Subject: [PATCH 23/41] Backport #98827 to 26.3: Implement http header validation for DataLakeCatalog --- src/Databases/DataLake/DatabaseDataLake.cpp | 18 ++++++++++ .../integration/test_database_iceberg/test.py | 21 +++++++++++- .../test.py | 21 +++++++++++- .../test.py | 23 +++++++++++-- ...alake_restful_catalog_bad_format.reference | 2 ++ ...913_datalake_restful_catalog_bad_format.sh | 33 +++++++++++++++---- 6 files changed, 108 insertions(+), 10 deletions(-) diff --git a/src/Databases/DataLake/DatabaseDataLake.cpp b/src/Databases/DataLake/DatabaseDataLake.cpp index 2bdb55ba9886..20c605e95dae 100644 --- a/src/Databases/DataLake/DatabaseDataLake.cpp +++ b/src/Databases/DataLake/DatabaseDataLake.cpp @@ -47,6 +47,7 @@ #include #include #include +#include namespace DB { @@ -936,6 +937,23 @@ void registerDatabaseDataLake(DatabaseFactory & factory) if (database_engine_define->settings) database_settings.loadFromQuery(*database_engine_define, args.create_query.attach); + const auto & auth_header_str = database_settings[DatabaseDataLakeSetting::auth_header].value; + if (!auth_header_str.empty()) + { + /// Validate `auth_header` against the forbidden HTTP header filter at creation time. + /// Only headers with a valid `name: value` format are accepted. + auto pos = auth_header_str.find(':'); + if (pos != std::string::npos) + { + DB::HTTPHeaderEntries header_entries{{auth_header_str.substr(0, pos), auth_header_str.substr(pos + 1)}}; + args.context->getGlobalContext()->getHTTPHeaderFilter().checkAndNormalizeHeaders(header_entries); + } + else + { + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Invalid auth header format. Expected 'HeaderName: HeaderValue'"); + } + } + auto catalog_type = database_settings[DB::DatabaseDataLakeSetting::catalog_type].value; /// Glue catalog is one per region, so it's fully identified by aws keys and region /// There is no URL you need to provide in constructor, even if we would want it diff --git a/tests/integration/test_database_iceberg/test.py b/tests/integration/test_database_iceberg/test.py index c75dfce46d5a..e979a99c18b8 100644 --- a/tests/integration/test_database_iceberg/test.py +++ b/tests/integration/test_database_iceberg/test.py @@ -384,7 +384,7 @@ def test_hide_sensitive_info(started_cluster): started_cluster, node, CATALOG_NAME, - additional_settings={"auth_header": "SECRET_2"}, + additional_settings={"auth_header": "Authorization: SECRET_2"}, ) assert "SECRET_2" not in node.query(f"SHOW CREATE DATABASE {CATALOG_NAME}") @@ -787,3 +787,22 @@ def test_gcs(started_cluster): """ ) assert "Google cloud storage converts to S3" in str(err.value) + + +def test_invalid_auth_header_format(started_cluster): + node = started_cluster.instances["node1"] + + node.query(f"DROP DATABASE IF EXISTS {CATALOG_NAME};") + with pytest.raises(Exception) as err: + node.query( + f""" + SET allow_database_iceberg = 1; + CREATE DATABASE {CATALOG_NAME} + ENGINE = DataLakeCatalog('{BASE_URL}', 'minio', 'dummy') + SETTINGS + catalog_type = 'rest', + warehouse = 'demo', + auth_header = 'wrong.header' + """ + ) + assert "Invalid auth header format" in str(err.value) diff --git a/tests/integration/test_database_iceberg_lakekeeper_catalog/test.py b/tests/integration/test_database_iceberg_lakekeeper_catalog/test.py index d670b4997cde..9a9cea39fd09 100644 --- a/tests/integration/test_database_iceberg_lakekeeper_catalog/test.py +++ b/tests/integration/test_database_iceberg_lakekeeper_catalog/test.py @@ -309,7 +309,7 @@ def test_hide_sensitive_info(started_cluster): started_cluster, node, CATALOG_NAME, - additional_settings={"auth_header": "SECRET_2"}, + additional_settings={"auth_header": "Authorization: SECRET_2"}, ) show_result = node.query(f"SHOW CREATE DATABASE {CATALOG_NAME}") assert "SECRET_2" not in show_result @@ -365,3 +365,22 @@ def test_tables_with_same_location(started_cluster): f"SELECT symbol FROM {CATALOG_NAME}.`{namespace[0]}.{table_name_2}`" ).strip() + +def test_invalid_auth_header_format(started_cluster): + node = started_cluster.instances["node1"] + + node.query(f"DROP DATABASE IF EXISTS {CATALOG_NAME};") + with pytest.raises(Exception) as err: + node.query( + f""" + SET allow_experimental_database_iceberg = 1; + CREATE DATABASE {CATALOG_NAME} + ENGINE = DataLakeCatalog('{BASE_URL}', 'minio', 'dummy') + SETTINGS + catalog_type = 'rest', + warehouse = 'demo', + auth_header = 'wrong.header' + """ + ) + assert "Invalid auth header format" in str(err.value) + diff --git a/tests/integration/test_database_iceberg_nessie_catalog/test.py b/tests/integration/test_database_iceberg_nessie_catalog/test.py index fe595233a851..3e0e8b3658ba 100644 --- a/tests/integration/test_database_iceberg_nessie_catalog/test.py +++ b/tests/integration/test_database_iceberg_nessie_catalog/test.py @@ -296,7 +296,7 @@ def test_hide_sensitive_info(started_cluster): started_cluster, node, CATALOG_NAME, - additional_settings={"auth_header": "SECRET_2"}, + additional_settings={"auth_header": "Authorization: SECRET_2"}, ) show_result = node.query(f"SHOW CREATE DATABASE {CATALOG_NAME}") assert "SECRET_2" not in show_result @@ -501,4 +501,23 @@ def test_drop_table(started_cluster): create_clickhouse_iceberg_database(started_cluster, node, CATALOG_NAME) result = node.query(f"SHOW TABLES FROM {CATALOG_NAME}") - assert test_table_name not in result \ No newline at end of file + assert test_table_name not in result + + +def test_invalid_auth_header_format(started_cluster): + node = started_cluster.instances["node1"] + + node.query(f"DROP DATABASE IF EXISTS {CATALOG_NAME};") + with pytest.raises(Exception) as err: + node.query( + f""" + SET allow_experimental_database_iceberg = 1; + CREATE DATABASE {CATALOG_NAME} + ENGINE = DataLakeCatalog('{BASE_URL}', 'minio', 'dummy') + SETTINGS + catalog_type = 'rest', + warehouse = 'warehouse', + auth_header = 'wrong.header' + """ + ) + assert "Invalid auth header format" in str(err.value) \ No newline at end of file diff --git a/tests/queries/0_stateless/03913_datalake_restful_catalog_bad_format.reference b/tests/queries/0_stateless/03913_datalake_restful_catalog_bad_format.reference index e69de29bb2d1..523a2aa661eb 100644 --- a/tests/queries/0_stateless/03913_datalake_restful_catalog_bad_format.reference +++ b/tests/queries/0_stateless/03913_datalake_restful_catalog_bad_format.reference @@ -0,0 +1,2 @@ +is forbidden +0 diff --git a/tests/queries/0_stateless/03913_datalake_restful_catalog_bad_format.sh b/tests/queries/0_stateless/03913_datalake_restful_catalog_bad_format.sh index b7f78f733d34..fb32a9b0c162 100755 --- a/tests/queries/0_stateless/03913_datalake_restful_catalog_bad_format.sh +++ b/tests/queries/0_stateless/03913_datalake_restful_catalog_bad_format.sh @@ -9,23 +9,44 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -NEW_DB_NAME="${CLICKHOUSE_DATABASE}_03913_DATALKE" +# Verify that creating a database with incorrect credentials in auth_header succeeds, and that +# querying system.tables on such a database does not cause an exception (the error is caught lazily). +NEW_DB_NAME="${CLICKHOUSE_DATABASE}_03913_DATALAKE" $CLICKHOUSE_CLIENT -q "DROP DATABASE IF EXISTS ${NEW_DB_NAME};" $CLICKHOUSE_CLIENT -q " SET allow_experimental_database_iceberg = 1; CREATE DATABASE ${NEW_DB_NAME} ENGINE = DataLakeCatalog('http://rest:8181/v1', 'admin', 'password') -SETTINGS - catalog_type = 'rest', - auth_header = 'wrong.header', - storage_endpoint = 'http://minio:9000/lakehouse', +SETTINGS + catalog_type = 'rest', + auth_header = 'Authorization: Wrong header', + storage_endpoint = 'http://minio:9000/lakehouse', warehouse = 'demo'; " $CLICKHOUSE_CLIENT -q " -select database || '.' || name FROM system.tables where database = '${NEW_DB_NAME}' and engine = 'MergeTree' +SELECT database || '.' || name FROM system.tables WHERE database = '${NEW_DB_NAME}' AND engine = 'MergeTree' SETTINGS show_data_lake_catalogs_in_system_tables = 1; " $CLICKHOUSE_CLIENT -q "DROP DATABASE IF EXISTS ${NEW_DB_NAME};" + +# Verify that a forbidden header (configured via http_forbid_headers) is rejected at CREATE DATABASE time. +# The values for exact_header are defined as forbidden in tests/config/config.d/forbidden_headers.xml. +NEW_DB_FORBIDDEN="${CLICKHOUSE_DATABASE}_03913_FORBIDDEN" + +$CLICKHOUSE_CLIENT -q "DROP DATABASE IF EXISTS ${NEW_DB_FORBIDDEN};" +$CLICKHOUSE_CLIENT -q " +SET allow_experimental_database_iceberg = 1; +CREATE DATABASE ${NEW_DB_FORBIDDEN} +ENGINE = DataLakeCatalog('http://localhost:8181/v1') +SETTINGS + catalog_type = 'rest', + auth_header = 'exact_header: some_value', + warehouse = 'demo'; +" 2>&1 | grep -o 'is forbidden' + +$CLICKHOUSE_CLIENT -q "SELECT count() FROM system.databases WHERE name = '${NEW_DB_FORBIDDEN}';" + +$CLICKHOUSE_CLIENT -q "DROP DATABASE IF EXISTS ${NEW_DB_FORBIDDEN};" From 29e7e946bf13e31e5c0b0c3cf89f1caf02f4bcef Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Fri, 15 May 2026 17:45:16 +0000 Subject: [PATCH 24/41] Backport #104705 to 26.3: Add `defer_partition_pruning_after_final` setting to gate the 26.3 FINAL pruning regression --- src/Core/Settings.cpp | 21 ++++ src/Core/SettingsChangesHistory.cpp | 1 + .../QueryPlan/ReadFromMergeTree.cpp | 15 ++- ...er_partition_pruning_after_final.reference | 6 + ...29_defer_partition_pruning_after_final.sql | 104 ++++++++++++++++++ 5 files changed, 144 insertions(+), 3 deletions(-) create mode 100644 tests/queries/0_stateless/04229_defer_partition_pruning_after_final.reference create mode 100644 tests/queries/0_stateless/04229_defer_partition_pruning_after_final.sql diff --git a/src/Core/Settings.cpp b/src/Core/Settings.cpp index 5a8338356a98..d336fd66505e 100644 --- a/src/Core/Settings.cpp +++ b/src/Core/Settings.cpp @@ -1525,6 +1525,27 @@ This can be useful when PREWHERE references columns that may have different valu and you want FINAL to select the winning row before filtering. When disabled, PREWHERE is applied during reading. Note: If apply_row_level_security_after_final is enabled and row policy uses non-sorting-key columns, PREWHERE will also be deferred to maintain correct execution order (row policy must be applied before PREWHERE). +)", 0) \ + DECLARE(Bool, defer_partition_pruning_after_final, true, R"( +When enabled (default), partition pruning is skipped for `FINAL` queries on tables whose +partition-key columns are not part of the sorting key. This is the correctness-safe behavior +introduced in 26.3: `FINAL` may need to deduplicate rows that share a primary key but live +in different partitions, and partition pruning would silently exclude such rows from the +deduplication input. + +When disabled, partition pruning is applied even with `FINAL`, restoring the pre-26.3 +behavior. This can be substantially faster for queries with `WHERE` predicates on the +partition column, but is only correct when rows with the same primary key cannot exist +in different partitions — e.g. event-log tables whose partition column is set at insert +time and never changes. + +This setting only affects partitioned tables whose partition-key columns are not contained +in the sorting key; for other tables partition pruning is always applied. + +Possible values: + +- 0 — Apply partition pruning before `FINAL` (pre-26.3 behavior, faster but unsafe in the general case). +- 1 — Defer partition pruning to after `FINAL` (default, correctness-safe). )", 0) \ \ DECLARE(UInt64, mysql_max_rows_to_insert, 65536, R"( diff --git a/src/Core/SettingsChangesHistory.cpp b/src/Core/SettingsChangesHistory.cpp index 900dd95a75ac..5734aaff44fb 100644 --- a/src/Core/SettingsChangesHistory.cpp +++ b/src/Core/SettingsChangesHistory.cpp @@ -41,6 +41,7 @@ const VersionToSettingsChangesMap & getSettingsChangesHistory() /// Note: please check if the key already exists to prevent duplicate entries. addSettingsChanges(settings_changes_history, "26.3", { + {"defer_partition_pruning_after_final", false, true, "Gates the FINAL planner's unconditional skipping of partition pruning when the partition-key column is not in the sorting key. The behavior change itself shipped silently in 26.3 via https://github.com/ClickHouse/ClickHouse/pull/98242; this entry retroactively documents it so `compatibility = '26.2'` restores the pre-regression behavior (0 = prune before FINAL, fast; 1 = defer pruning, correctness-safe)."}, {"http_max_fields", 1000000, 1000, "Reduce default to limit pre-authentication memory usage by HTTP connections."}, {"http_max_field_name_size", 131072, 4096, "Reduce default to limit pre-authentication memory usage by HTTP connections."}, {"http_max_request_header_size", 0, 10485760, "New setting to limit total HTTP request header size before authentication."}, diff --git a/src/Processors/QueryPlan/ReadFromMergeTree.cpp b/src/Processors/QueryPlan/ReadFromMergeTree.cpp index 4eb8094fa17c..11ebdd67a98b 100644 --- a/src/Processors/QueryPlan/ReadFromMergeTree.cpp +++ b/src/Processors/QueryPlan/ReadFromMergeTree.cpp @@ -218,6 +218,7 @@ namespace Setting extern const SettingsUInt64 query_plan_max_step_description_length; extern const SettingsBool apply_row_policy_after_final; extern const SettingsBool apply_prewhere_after_final; + extern const SettingsBool defer_partition_pruning_after_final; } namespace MergeTreeSetting @@ -2157,9 +2158,17 @@ void ReadFromMergeTree::deferFiltersAfterFinalIfNeeded() if (defer_prewhere) deferred_prewhere_info = query_info.prewhere_info; - /// don't prune partitions unless the partition key is determined by the sorting key - /// matters when FINAL merges across partitions - if (!doNotMergePartsAcrossPartitionsFinal() && storage_snapshot->metadata->hasPartitionKey()) + /// Don't prune partitions unless the partition key is determined by the sorting key: + /// when FINAL merges across partitions, rows with the same primary key in different + /// partitions must all participate in deduplication, so partition pruning would drop + /// rows that affect the FINAL result. + /// + /// Users whose data structure guarantees same-PK rows cannot span partitions (e.g. event-log + /// tables whose partition column is set at insert time and never changes) can opt out via + /// `defer_partition_pruning_after_final = 0` to restore pre-26.3 performance. + if (settings[Setting::defer_partition_pruning_after_final] + && !doNotMergePartsAcrossPartitionsFinal() + && storage_snapshot->metadata->hasPartitionKey()) { const auto & partition_key = storage_snapshot->metadata->getPartitionKey(); const auto & sorting_key_columns = storage_snapshot->metadata->getSortingKeyColumns(); diff --git a/tests/queries/0_stateless/04229_defer_partition_pruning_after_final.reference b/tests/queries/0_stateless/04229_defer_partition_pruning_after_final.reference new file mode 100644 index 000000000000..042b663ef74a --- /dev/null +++ b/tests/queries/0_stateless/04229_defer_partition_pruning_after_final.reference @@ -0,0 +1,6 @@ +default-defer count 1 +opt-out count 1 +default-defer partition pruning 1 3 0 +opt-out partition pruning 1 0 1 +pathological default-defer 0 +pathological opt-out 1 diff --git a/tests/queries/0_stateless/04229_defer_partition_pruning_after_final.sql b/tests/queries/0_stateless/04229_defer_partition_pruning_after_final.sql new file mode 100644 index 000000000000..8a51088e53c5 --- /dev/null +++ b/tests/queries/0_stateless/04229_defer_partition_pruning_after_final.sql @@ -0,0 +1,104 @@ +-- Tags: no-parallel-replicas +-- +-- Regression test for https://github.com/ClickHouse/ClickHouse/issues/104263 +-- +-- PR #98242 (commit ebc5cb49baa594b87f4631850ae9902424414113, 26.3) introduced an +-- unconditional `skip_partition_pruning = !exprs_match && !columns_match` branch in +-- `ReadFromMergeTree::deferFiltersAfterFinalIfNeeded`. The intent was correctness for +-- FINAL queries that may need to deduplicate same-primary-key rows across partitions. +-- Side effect: all FINAL queries whose partition-key column is not in the sorting key +-- stop using partition pruning, even when the WHERE references only the partition column. +-- +-- Setting `defer_partition_pruning_after_final` (default 1) gates the new branch: +-- 1 -> deferred pruning, correctness-safe (default; 26.3+ behavior) +-- 0 -> pre-26.3 pruning before FINAL, valid when same-PK rows cannot span partitions + +DROP TABLE IF EXISTS repro_104263 SYNC; + +CREATE TABLE repro_104263 +( + pk UUID, + event_time DateTime64(3, 'UTC'), + version UInt64 +) +ENGINE = ReplacingMergeTree(version) +PARTITION BY toYYYYMM(event_time) +ORDER BY pk; + +INSERT INTO repro_104263 SELECT generateUUIDv4(), toDateTime64('2026-01-15 00:00:00', 3, 'UTC'), 1 FROM numbers(1); +INSERT INTO repro_104263 SELECT generateUUIDv4(), toDateTime64('2026-02-15 00:00:00', 3, 'UTC'), 1 FROM numbers(1); +INSERT INTO repro_104263 SELECT generateUUIDv4(), toDateTime64('2026-03-15 00:00:00', 3, 'UTC'), 1 FROM numbers(1); +INSERT INTO repro_104263 SELECT generateUUIDv4(), toDateTime64('2026-04-15 00:00:00', 3, 'UTC'), 1 FROM numbers(1); +INSERT INTO repro_104263 SELECT generateUUIDv4(), toDateTime64('2026-05-15 00:00:00', 3, 'UTC'), 1 FROM numbers(1); + +-- Both modes return the same row count (one PK in March matches the filter). +SELECT 'default-defer count', count() FROM repro_104263 FINAL +WHERE event_time >= '2026-03-01' AND event_time <= '2026-03-31'; + +SELECT 'opt-out count', count() FROM repro_104263 FINAL +WHERE event_time >= '2026-03-01' AND event_time <= '2026-03-31' +SETTINGS defer_partition_pruning_after_final = 0; + +-- The behavioral difference shows up in the `Partition Min-Max` index step of EXPLAIN. +-- Default reads all 5 parts (pruning disabled). Opt-out prunes to 1. +SELECT 'default-defer partition pruning', + countIf(explain LIKE '%Partition Min-Max%') AS has_partition_step, + countIf(explain LIKE '%Parts: 5/5%') AS no_pruning, + countIf(explain LIKE '%Parts: 1/5%') AS pruned +FROM +( + EXPLAIN indexes = 1 + SELECT count() FROM repro_104263 FINAL + WHERE event_time >= '2026-03-01' AND event_time <= '2026-03-31' +); + +SELECT 'opt-out partition pruning', + countIf(explain LIKE '%Partition Min-Max%') AS has_partition_step, + countIf(explain LIKE '%Parts: 5/5%') AS no_pruning, + countIf(explain LIKE '%Parts: 1/5%') AS pruned +FROM +( + EXPLAIN indexes = 1 + SELECT count() FROM repro_104263 FINAL + WHERE event_time >= '2026-03-01' AND event_time <= '2026-03-31' + SETTINGS defer_partition_pruning_after_final = 0 +); + +DROP TABLE repro_104263 SYNC; + + +-- Pathological case from the issue discussion: same primary key in two partitions with +-- reversed versions. Strict semantics (default) requires reading both partitions so +-- FINAL can pick the cross-partition winner. Opt-out trusts the user's assertion that +-- same-PK rows do not span partitions and prunes before FINAL. +-- +-- Default (defer = 1): +-- read both -> FINAL picks (Jan, v=2) -> filter `event_time >= Feb` rejects -> 0 rows. +-- Opt-out (defer = 0): +-- prune to Feb only -> FINAL on {(Feb, v=1)} -> filter accepts -> 1 row. + +DROP TABLE IF EXISTS repro_104263_path SYNC; + +CREATE TABLE repro_104263_path +( + pk UInt64, + event_time DateTime, + version UInt64 +) +ENGINE = ReplacingMergeTree(version) +PARTITION BY toYYYYMM(event_time) +ORDER BY pk; + +INSERT INTO repro_104263_path VALUES (1, '2026-01-15 00:00:00', 2); +INSERT INTO repro_104263_path VALUES (1, '2026-02-15 00:00:00', 1); + +SELECT 'pathological default-defer', count() +FROM repro_104263_path FINAL +WHERE event_time >= '2026-02-01'; + +SELECT 'pathological opt-out', count() +FROM repro_104263_path FINAL +WHERE event_time >= '2026-02-01' +SETTINGS defer_partition_pruning_after_final = 0; + +DROP TABLE repro_104263_path SYNC; From 37c84de778e4f4dcaca4657b5957381edcee5fdb Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Fri, 15 May 2026 19:47:48 +0000 Subject: [PATCH 25/41] Backport #103890 to 26.3: Fix use-after-free in KeeperHandlingConsumer::setKeeper (StorageKafka2) --- src/Storages/Kafka/KeeperHandlingConsumer.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/Storages/Kafka/KeeperHandlingConsumer.cpp b/src/Storages/Kafka/KeeperHandlingConsumer.cpp index 496b0e23cb5b..22a6b363bfe7 100644 --- a/src/Storages/Kafka/KeeperHandlingConsumer.cpp +++ b/src/Storages/Kafka/KeeperHandlingConsumer.cpp @@ -131,12 +131,14 @@ bool KeeperHandlingConsumer::needsNewKeeper() const void KeeperHandlingConsumer::setKeeper(const std::shared_ptr & keeper_) { - keeper = keeper_; + /// Drop the lock holders before replacing `keeper` -- same use-after-free hazard as + /// `replica_is_active_node` in `StorageKafka2::partialShutdown` (see comment there). { std::lock_guard lock(topic_partition_locks_mutex); permanent_locks.clear(); tmp_locks.clear(); } + keeper = keeper_; tmp_locks_quota = 0; assigned_topic_partitions.clear(); topic_partition_index_to_consume_from = 0; From 363bc1e87496f49523f8e2bac1df4860b183a57d Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Sat, 16 May 2026 01:00:04 +0000 Subject: [PATCH 26/41] Backport #105048 to 26.3: JIT: register __fixunssfti / __fixunsdfti for toUInt128(Float) --- src/Interpreters/JIT/CHJIT.cpp | 15 ++++++ ...40_jit_float_to_big_int_libcalls.reference | 38 ++++++++++++++ .../04240_jit_float_to_big_int_libcalls.sql | 50 +++++++++++++++++++ 3 files changed, 103 insertions(+) create mode 100644 tests/queries/0_stateless/04240_jit_float_to_big_int_libcalls.reference create mode 100644 tests/queries/0_stateless/04240_jit_float_to_big_int_libcalls.sql diff --git a/src/Interpreters/JIT/CHJIT.cpp b/src/Interpreters/JIT/CHJIT.cpp index 9993e7d87d47..1c50b36a9a6b 100644 --- a/src/Interpreters/JIT/CHJIT.cpp +++ b/src/Interpreters/JIT/CHJIT.cpp @@ -42,6 +42,12 @@ namespace ErrorCodes /// These functions will be provided to the linker of JIT code, /// so it can call them to work with big integers on platforms without native support. +/// +/// IMPORTANT: every libcall is registered as a signed/unsigned pair. LLVM picks the +/// signed or unsigned variant based on whether the IR uses `fptosi`/`sitofp`/`sdiv`/`srem` +/// or `fptoui`/`uitofp`/`udiv`/`urem`. Forgetting the unsigned half compiles fine but +/// fails the JIT link at run time with `Could not find symbol __fixunsdfti` (or similar). +/// Keep the lists below symmetric. #pragma clang diagnostic push #pragma clang diagnostic ignored "-Wbit-int-extension" #pragma clang diagnostic ignored "-Wreserved-identifier" @@ -52,9 +58,13 @@ using BitUInt128 = unsigned _BitInt(128); /// NOLINTBEGIN extern "C" BitInt128 __divti3(BitInt128, BitInt128); extern "C" BitInt128 __modti3(BitInt128, BitInt128); +extern "C" BitUInt128 __udivti3(BitUInt128, BitUInt128); +extern "C" BitUInt128 __umodti3(BitUInt128, BitUInt128); extern "C" BitInt128 __fixsfti(float); extern "C" BitInt128 __fixdfti(double); +extern "C" BitUInt128 __fixunssfti(float); +extern "C" BitUInt128 __fixunsdfti(double); extern "C" float __floattisf(BitInt128); extern "C" float __floatuntisf(BitUInt128); @@ -394,11 +404,16 @@ CHJIT::CHJIT() symbol_resolver->registerSymbol("memcmp", reinterpret_cast(&memcmp)); symbol_resolver->registerSymbol("fmod", reinterpret_cast(static_cast(&fmod))); + /// Signed and unsigned variants must be kept together: see the comment above the extern declarations. symbol_resolver->registerSymbol("__divti3", reinterpret_cast(&__divti3)); symbol_resolver->registerSymbol("__modti3", reinterpret_cast(&__modti3)); + symbol_resolver->registerSymbol("__udivti3", reinterpret_cast(&__udivti3)); + symbol_resolver->registerSymbol("__umodti3", reinterpret_cast(&__umodti3)); symbol_resolver->registerSymbol("__fixsfti", reinterpret_cast(&__fixsfti)); symbol_resolver->registerSymbol("__fixdfti", reinterpret_cast(&__fixdfti)); + symbol_resolver->registerSymbol("__fixunssfti", reinterpret_cast(&__fixunssfti)); + symbol_resolver->registerSymbol("__fixunsdfti", reinterpret_cast(&__fixunsdfti)); symbol_resolver->registerSymbol("__floattisf", reinterpret_cast(&__floattisf)); symbol_resolver->registerSymbol("__floatuntisf", reinterpret_cast(&__floatuntisf)); diff --git a/tests/queries/0_stateless/04240_jit_float_to_big_int_libcalls.reference b/tests/queries/0_stateless/04240_jit_float_to_big_int_libcalls.reference new file mode 100644 index 000000000000..67a5306eb758 --- /dev/null +++ b/tests/queries/0_stateless/04240_jit_float_to_big_int_libcalls.reference @@ -0,0 +1,38 @@ +--- JIT --- +--- Float -> 128-bit / 256-bit integers --- +2 +2 +2 +2 +2 +2 +2 +2 +--- 128-bit / 256-bit integers -> Float --- +2 +2 +2 +2 +2 +2 +2 +2 +--- no JIT --- +--- Float -> 128-bit / 256-bit integers --- +2 +2 +2 +2 +2 +2 +2 +2 +--- 128-bit / 256-bit integers -> Float --- +2 +2 +2 +2 +2 +2 +2 +2 diff --git a/tests/queries/0_stateless/04240_jit_float_to_big_int_libcalls.sql b/tests/queries/0_stateless/04240_jit_float_to_big_int_libcalls.sql new file mode 100644 index 000000000000..54923031e8c2 --- /dev/null +++ b/tests/queries/0_stateless/04240_jit_float_to_big_int_libcalls.sql @@ -0,0 +1,50 @@ +-- Tags: no-fasttest +-- no-fasttest: JIT compilation is not available in fasttest + +-- https://github.com/ClickHouse/ClickHouse/issues/105031 + +SELECT '--- JIT ---'; +SET compile_expressions = 1, min_count_to_compile_expression = 0; + +SELECT '--- Float -> 128-bit / 256-bit integers ---'; +SELECT toInt128 (materialize(1.5) + materialize(0.5)); +SELECT toUInt128(materialize(1.5) + materialize(0.5)); +SELECT toInt256 (materialize(1.5) + materialize(0.5)); +SELECT toUInt256(materialize(1.5) + materialize(0.5)); +SELECT toInt128 (materialize(1.5)::Float32 + materialize(0.5)::Float32); +SELECT toUInt128(materialize(1.5)::Float32 + materialize(0.5)::Float32); +SELECT toInt256 (materialize(1.5)::Float32 + materialize(0.5)::Float32); +SELECT toUInt256(materialize(1.5)::Float32 + materialize(0.5)::Float32); + +SELECT '--- 128-bit / 256-bit integers -> Float ---'; +SELECT toFloat32(materialize(2::Int128) + materialize(0::Int128)); +SELECT toFloat64(materialize(2::Int128) + materialize(0::Int128)); +SELECT toFloat32(materialize(2::UInt128) + materialize(0::UInt128)); +SELECT toFloat64(materialize(2::UInt128) + materialize(0::UInt128)); +SELECT toFloat32(materialize(2::Int256) + materialize(0::Int256)); +SELECT toFloat64(materialize(2::Int256) + materialize(0::Int256)); +SELECT toFloat32(materialize(2::UInt256) + materialize(0::UInt256)); +SELECT toFloat64(materialize(2::UInt256) + materialize(0::UInt256)); + +SELECT '--- no JIT ---'; +SET compile_expressions = 0; + +SELECT '--- Float -> 128-bit / 256-bit integers ---'; +SELECT toInt128 (materialize(1.5) + materialize(0.5)); +SELECT toUInt128(materialize(1.5) + materialize(0.5)); +SELECT toInt256 (materialize(1.5) + materialize(0.5)); +SELECT toUInt256(materialize(1.5) + materialize(0.5)); +SELECT toInt128 (materialize(1.5)::Float32 + materialize(0.5)::Float32); +SELECT toUInt128(materialize(1.5)::Float32 + materialize(0.5)::Float32); +SELECT toInt256 (materialize(1.5)::Float32 + materialize(0.5)::Float32); +SELECT toUInt256(materialize(1.5)::Float32 + materialize(0.5)::Float32); + +SELECT '--- 128-bit / 256-bit integers -> Float ---'; +SELECT toFloat32(materialize(2::Int128) + materialize(0::Int128)); +SELECT toFloat64(materialize(2::Int128) + materialize(0::Int128)); +SELECT toFloat32(materialize(2::UInt128) + materialize(0::UInt128)); +SELECT toFloat64(materialize(2::UInt128) + materialize(0::UInt128)); +SELECT toFloat32(materialize(2::Int256) + materialize(0::Int256)); +SELECT toFloat64(materialize(2::Int256) + materialize(0::Int256)); +SELECT toFloat32(materialize(2::UInt256) + materialize(0::UInt256)); +SELECT toFloat64(materialize(2::UInt256) + materialize(0::UInt256)); From 1ce7df422dc599d0c0f28feec978e4f94139b094 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Mon, 18 May 2026 09:30:14 +0000 Subject: [PATCH 27/41] Backport #101580 to 26.3: Fix flaky test 03706_statistics_preserve_checksums_on_mutations --- .../03706_statistics_preserve_checksums_on_mutations.sh | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tests/queries/0_stateless/03706_statistics_preserve_checksums_on_mutations.sh b/tests/queries/0_stateless/03706_statistics_preserve_checksums_on_mutations.sh index adb2ee8027d0..593d2d93b026 100755 --- a/tests/queries/0_stateless/03706_statistics_preserve_checksums_on_mutations.sh +++ b/tests/queries/0_stateless/03706_statistics_preserve_checksums_on_mutations.sh @@ -14,6 +14,12 @@ create table mt (key Int, value String) engine=MergeTree() order by key settings min_bytes_for_wide_part=0, -- otherwise sparse info will be different, since for INSERTs the sparse ratio is calculated for the whole block, while for mutations for each granula (FIXME?) ratio_of_defaults_for_sparse_serialization=1, + map_serialization_version='basic', + map_serialization_version_for_zero_level_parts='basic', + -- Pin serialization_info_version to 'basic': with 'with_types', the INSERT and mutation + -- paths may produce different serialization.json content (different type version metadata), + -- causing checksum mismatches that are not actual data corruption. + serialization_info_version='basic', -- This uncovers the bug auto_statistics_types='uniq,minmax,countmin,tdigest' ; From 4b6bf90fc3af0d6257f90a3b52185ac62456698e Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Mon, 18 May 2026 11:38:42 +0000 Subject: [PATCH 28/41] Backport #103536 to 26.3: Fix heap-use-after-free in `executeAggregateMultiply` self-merge --- src/Functions/FunctionBinaryArithmetic.h | 24 +++++++++- ...06_reservoir_sample_self_merging.reference | 2 +- ...te_state_multiply_self_merge_uaf.reference | 13 +++++ ...ggregate_state_multiply_self_merge_uaf.sql | 47 ++++++++++++++++++ ...ultiply_self_merge_uaf_0988_2212.reference | 11 +++++ ...impl_multiply_self_merge_uaf_0988_2212.sql | 48 +++++++++++++++++++ 6 files changed, 143 insertions(+), 2 deletions(-) create mode 100644 tests/queries/0_stateless/04117_aggregate_state_multiply_self_merge_uaf.reference create mode 100644 tests/queries/0_stateless/04117_aggregate_state_multiply_self_merge_uaf.sql create mode 100644 tests/queries/0_stateless/04235_moving_impl_multiply_self_merge_uaf_0988_2212.reference create mode 100644 tests/queries/0_stateless/04235_moving_impl_multiply_self_merge_uaf_0988_2212.sql diff --git a/src/Functions/FunctionBinaryArithmetic.h b/src/Functions/FunctionBinaryArithmetic.h index 56e8926b9054..14c34f11c745 100644 --- a/src/Functions/FunctionBinaryArithmetic.h +++ b/src/Functions/FunctionBinaryArithmetic.h @@ -1187,6 +1187,21 @@ class FunctionBinaryArithmetic : public IFunction /// We use exponentiation by squaring algorithm to perform multiplying aggregate states by N in O(log(N)) operations /// https://en.wikipedia.org/wiki/Exponentiation_by_squaring + /// + /// The squaring step computes vec_from[i] := vec_from[i] + vec_from[i]. + /// Calling `function->merge(state, state)` directly is undefined behaviour: + /// many aggregate function `merge` implementations append the source's + /// internal storage to the destination's. When `src == dst` and the + /// destination reallocates, the source iterators (pointing to the freed + /// buffer) are dereferenced, producing a heap-use-after-free. We avoid + /// the alias by copying `vec_from` into an independent column first. + /// + /// The temporary column is constructed inside the squaring branch so + /// that its arena is released at the end of each iteration. Re-using a + /// single column across iterations would leak per-iteration state + /// memory into the column's monotonic arena (`popBack` decrements the + /// row count but does not free arena allocations), causing the + /// temporary footprint to grow with the multiplier. while (m) { if (m % 2) @@ -1197,8 +1212,15 @@ class FunctionBinaryArithmetic : public IFunction } else { + auto column_temp = ColumnAggregateFunction::create(function); + column_temp->reserve(size); for (size_t i = 0; i < size; ++i) - function->merge(vec_from[i], vec_from[i], arena.get()); + column_temp->insertFrom(vec_from[i]); + auto & vec_temp = column_temp->getData(); + + for (size_t i = 0; i < size; ++i) + function->merge(vec_from[i], vec_temp[i], arena.get()); + m /= 2; } } diff --git a/tests/queries/0_stateless/03406_reservoir_sample_self_merging.reference b/tests/queries/0_stateless/03406_reservoir_sample_self_merging.reference index 15e0c9417a7c..1538e93e2abf 100644 --- a/tests/queries/0_stateless/03406_reservoir_sample_self_merging.reference +++ b/tests/queries/0_stateless/03406_reservoir_sample_self_merging.reference @@ -1,3 +1,3 @@ 4.5 -499.5 +495 50501.5 diff --git a/tests/queries/0_stateless/04117_aggregate_state_multiply_self_merge_uaf.reference b/tests/queries/0_stateless/04117_aggregate_state_multiply_self_merge_uaf.reference new file mode 100644 index 000000000000..cb68ed1c2e92 --- /dev/null +++ b/tests/queries/0_stateless/04117_aggregate_state_multiply_self_merge_uaf.reference @@ -0,0 +1,13 @@ +50 +100 +200 +400 +800 +12800 +150 +250 +350 +1 +200 +200 +4900 diff --git a/tests/queries/0_stateless/04117_aggregate_state_multiply_self_merge_uaf.sql b/tests/queries/0_stateless/04117_aggregate_state_multiply_self_merge_uaf.sql new file mode 100644 index 000000000000..49996f0db012 --- /dev/null +++ b/tests/queries/0_stateless/04117_aggregate_state_multiply_self_merge_uaf.sql @@ -0,0 +1,47 @@ +-- Tags: no-fasttest +-- ^ The reproducer needs Address Sanitizer / Memory Sanitizer to surface the +-- bug; release builds happen to read freed memory without aborting, so a +-- fast-test-only run would be silent. + +-- Regression test for STID 0988-40af: heap-use-after-free in +-- FunctionBinaryArithmetic::executeAggregateMultiply, triggered when the +-- exponentiation-by-squaring loop calls func->merge(state, state) on an +-- aggregate function whose merge implementation appends the source's internal +-- storage into the destination's. With src == dst, the source iterators are +-- invalidated by the destination's reallocation, producing a heap-use-after- +-- free. The fix copies the state to an independent column before each +-- squaring step. + +-- groupArrayState's merge appends rhs.value to lhs.value (a PODArray). With +-- src == dst the source iterator points to a buffer that is reallocated mid- +-- merge, which deterministically reproduces the heap-use-after-free under +-- Address Sanitizer. The result length grows linearly with the multiplier: +SELECT length(finalizeAggregation(state * 1)) FROM (SELECT groupArrayState(number) AS state FROM numbers(50)); +SELECT length(finalizeAggregation(state * 2)) FROM (SELECT groupArrayState(number) AS state FROM numbers(50)); +SELECT length(finalizeAggregation(state * 4)) FROM (SELECT groupArrayState(number) AS state FROM numbers(50)); +SELECT length(finalizeAggregation(state * 8)) FROM (SELECT groupArrayState(number) AS state FROM numbers(50)); +SELECT length(finalizeAggregation(state * 16)) FROM (SELECT groupArrayState(number) AS state FROM numbers(50)); +SELECT length(finalizeAggregation(state * 256)) FROM (SELECT groupArrayState(number) AS state FROM numbers(50)); + +-- Pure-odd and mixed multipliers still exercise the squaring branch (e.g. +-- 7 = 4 + 2 + 1 squares twice). +SELECT length(finalizeAggregation(state * 3)) FROM (SELECT groupArrayState(number) AS state FROM numbers(50)); +SELECT length(finalizeAggregation(state * 5)) FROM (SELECT groupArrayState(number) AS state FROM numbers(50)); +SELECT length(finalizeAggregation(state * 7)) FROM (SELECT groupArrayState(number) AS state FROM numbers(50)); + +-- quantilesExactState is the variant the production fuzzer caught (its +-- internal PODArray is what the original sanitizer +-- report flagged). The aggregate result is a fixed-length quantiles array, +-- but the squaring path still allocates / merges the underlying buffer the +-- same way. +SELECT length(finalizeAggregation(state * 8)) FROM (SELECT quantilesExactState(0.5)(number) AS state FROM numbers(50)); + +-- Constant column path: a single state value replicated across multiple +-- rows. agg_state_is_const takes the size=1 branch in executeAggregateMultiply. +SELECT length(finalizeAggregation(arrayJoin([state, state]) * 4)) +FROM (SELECT groupArrayState(number) AS state FROM numbers(50)); + +-- Numerical correctness: multiplying the state by N should produce N*size +-- elements; sum-aggregating them gives N * sum([0..49]) = N * 1225. +SELECT sum(arrayJoin(finalizeAggregation(state * 4))) +FROM (SELECT groupArrayState(number) AS state FROM numbers(50)); diff --git a/tests/queries/0_stateless/04235_moving_impl_multiply_self_merge_uaf_0988_2212.reference b/tests/queries/0_stateless/04235_moving_impl_multiply_self_merge_uaf_0988_2212.reference new file mode 100644 index 000000000000..20740609668d --- /dev/null +++ b/tests/queries/0_stateless/04235_moving_impl_multiply_self_merge_uaf_0988_2212.reference @@ -0,0 +1,11 @@ +50 +100 +150 +200 +400 +800 +200 +200 +200 +200 +200 diff --git a/tests/queries/0_stateless/04235_moving_impl_multiply_self_merge_uaf_0988_2212.sql b/tests/queries/0_stateless/04235_moving_impl_multiply_self_merge_uaf_0988_2212.sql new file mode 100644 index 000000000000..a59b7f6a4a21 --- /dev/null +++ b/tests/queries/0_stateless/04235_moving_impl_multiply_self_merge_uaf_0988_2212.sql @@ -0,0 +1,48 @@ +-- Tags: no-fasttest +-- ^ The reproducer surfaces as a `Segmentation fault` / `use-after-poison` +-- on AddressSanitizer / MemorySanitizer builds, and as a PODArray +-- `insertPrepare` assertion (`assertNotIntersects`) on Debug builds. +-- On Release builds the corruption can be silent, so the no-fasttest tag +-- restricts the test to builds where the regression is observable. + +-- Regression test for STID 0988-2212: `Segmentation fault` inside +-- `DB::MovingImpl::merge` (`AggregateFunctionGroupArrayMoving`) reached via +-- `FunctionBinaryArithmetic::executeAggregateMultiply` during +-- the analyzer dry-run. Same bug class as STID 0988-40af / 0988-3351 — the +-- exponentiation-by-squaring loop in `executeAggregateMultiply` invoked +-- `function->merge(state, state)`, aliasing the source and destination +-- aggregate state. `MovingImpl::merge` then called +-- `cur_elems.value.insert(rhs_elems.value.begin(), rhs_elems.value.end(), arena)` +-- where `rhs_elems` referenced the same `PODArray` as `cur_elems.value`, so +-- the source iterators pointed into the buffer that `insertPrepare` was +-- about to reallocate. The fix removes the self-alias at the call site by +-- copying `vec_from` into an independent column before each squaring step. + +-- Length grows linearly with the multiplier: each squaring concatenates the +-- backing prefix-sum array. For 50 input rows, `state * N` materialises +-- `N * 50` prefix sums. +SELECT length(finalizeAggregation(state * 1)) FROM (SELECT groupArrayMovingSumState(number) AS state FROM numbers(50)); +SELECT length(finalizeAggregation(state * 2)) FROM (SELECT groupArrayMovingSumState(number) AS state FROM numbers(50)); +SELECT length(finalizeAggregation(state * 3)) FROM (SELECT groupArrayMovingSumState(number) AS state FROM numbers(50)); +SELECT length(finalizeAggregation(state * 4)) FROM (SELECT groupArrayMovingSumState(number) AS state FROM numbers(50)); +SELECT length(finalizeAggregation(state * 8)) FROM (SELECT groupArrayMovingSumState(number) AS state FROM numbers(50)); +SELECT length(finalizeAggregation(state * 16)) FROM (SELECT groupArrayMovingSumState(number) AS state FROM numbers(50)); + +-- groupArrayMovingAvg exercises the same `MovingImpl::merge` code path with +-- a different `Data::Accumulator` (Float64 / Decimal). +SELECT length(finalizeAggregation(state * 4)) FROM (SELECT groupArrayMovingAvgState(number) AS state FROM numbers(50)); + +-- Decimal accumulator path: `MovingImpl<..., MovingSumData>>::merge` +-- — the variant caught on PR #101062 (STID 0988-2212 on amd_debug). The +-- squaring branch exercises the same self-alias hazard. +SELECT length(finalizeAggregation(state * 4)) FROM (SELECT groupArrayMovingSumState(toDecimal64(number, 2)) AS state FROM numbers(50)); + +-- Windowed variant `groupArrayMovingSum(window_size)`: parameterised template +-- (`LimitNumElements::value = true`) exercises the same merge path, with a +-- different `MovingImpl` instantiation. +SELECT length(finalizeAggregation(state * 4)) FROM (SELECT groupArrayMovingSumState(5)(number) AS state FROM numbers(50)); + +-- Constant column path: `executeAggregateMultiply` takes the `size = 1` +-- branch through `agg_state_is_const` and produces a `ColumnConst`. +SELECT length(finalizeAggregation(arrayJoin([state, state]) * 4)) +FROM (SELECT groupArrayMovingSumState(number) AS state FROM numbers(50)); From 55fb72fc955acfd737d8b2172557981d5e29b6c4 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Mon, 18 May 2026 15:21:05 +0000 Subject: [PATCH 29/41] Backport #104663 to 26.3: Fix Keeper termination on get `/keeper/availability_zone` with `quorum_reads=true` --- src/Coordination/KeeperStorage.cpp | 3 +- .../__init__.py | 0 .../configs/keeper_config.xml | 36 +++++++ .../configs/keeper_config_configured.xml | 41 ++++++++ .../configs/keeper_config_local_reads.xml | 38 +++++++ .../test.py | 98 +++++++++++++++++++ 6 files changed, 214 insertions(+), 2 deletions(-) create mode 100644 tests/integration/test_keeper_availability_zone_quorum_reads/__init__.py create mode 100644 tests/integration/test_keeper_availability_zone_quorum_reads/configs/keeper_config.xml create mode 100644 tests/integration/test_keeper_availability_zone_quorum_reads/configs/keeper_config_configured.xml create mode 100644 tests/integration/test_keeper_availability_zone_quorum_reads/configs/keeper_config_local_reads.xml create mode 100644 tests/integration/test_keeper_availability_zone_quorum_reads/test.py diff --git a/src/Coordination/KeeperStorage.cpp b/src/Coordination/KeeperStorage.cpp index b184ce27ef84..6c6943c3837a 100644 --- a/src/Coordination/KeeperStorage.cpp +++ b/src/Coordination/KeeperStorage.cpp @@ -1851,8 +1851,7 @@ std::list preprocess( ProfileEvents::increment(ProfileEvents::KeeperGetRequest); if (zk_request.path == Coordination::keeper_api_feature_flags_path - || zk_request.path == Coordination::keeper_config_path - || zk_request.path == Coordination::keeper_availability_zone_path) + || zk_request.path == Coordination::keeper_config_path) return {}; if (!storage.uncommitted_state.getNode(zk_request.path)) diff --git a/tests/integration/test_keeper_availability_zone_quorum_reads/__init__.py b/tests/integration/test_keeper_availability_zone_quorum_reads/__init__.py new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/tests/integration/test_keeper_availability_zone_quorum_reads/configs/keeper_config.xml b/tests/integration/test_keeper_availability_zone_quorum_reads/configs/keeper_config.xml new file mode 100644 index 000000000000..24095f39136e --- /dev/null +++ b/tests/integration/test_keeper_availability_zone_quorum_reads/configs/keeper_config.xml @@ -0,0 +1,36 @@ + + + + + 9181 + 1 + /var/lib/clickhouse/coordination/log + /var/lib/clickhouse/coordination/snapshots + + 5000 + 10000 + trace + true + + + + 1 + node + 2888 + + + + + + + node + 9181 + + + diff --git a/tests/integration/test_keeper_availability_zone_quorum_reads/configs/keeper_config_configured.xml b/tests/integration/test_keeper_availability_zone_quorum_reads/configs/keeper_config_configured.xml new file mode 100644 index 000000000000..cce9700173aa --- /dev/null +++ b/tests/integration/test_keeper_availability_zone_quorum_reads/configs/keeper_config_configured.xml @@ -0,0 +1,41 @@ + + + + + 0 + az-configured + + + + 9181 + 1 + /var/lib/clickhouse/coordination/log + /var/lib/clickhouse/coordination/snapshots + + 5000 + 10000 + trace + true + + + + 1 + node_configured + 2888 + + + + + + + node_configured + 9181 + + + diff --git a/tests/integration/test_keeper_availability_zone_quorum_reads/configs/keeper_config_local_reads.xml b/tests/integration/test_keeper_availability_zone_quorum_reads/configs/keeper_config_local_reads.xml new file mode 100644 index 000000000000..2497feede885 --- /dev/null +++ b/tests/integration/test_keeper_availability_zone_quorum_reads/configs/keeper_config_local_reads.xml @@ -0,0 +1,38 @@ + + + + + 9181 + 1 + /var/lib/clickhouse/coordination/log + /var/lib/clickhouse/coordination/snapshots + + 5000 + 10000 + trace + false + + + + 1 + node_local + 2888 + + + + + + + node_local + 9181 + + + diff --git a/tests/integration/test_keeper_availability_zone_quorum_reads/test.py b/tests/integration/test_keeper_availability_zone_quorum_reads/test.py new file mode 100644 index 000000000000..c2ddc1cd0486 --- /dev/null +++ b/tests/integration/test_keeper_availability_zone_quorum_reads/test.py @@ -0,0 +1,98 @@ +#!/usr/bin/env python3 +"""Regression test for keeper termination when `Get /keeper/availability_zone` +is sent to a keeper that was started without `` and runs with +`quorum_reads=true`. + +Before the fix the keeper called `onStorageInconsistency` (=> `std::terminate`) +on commit, and the request entry persisted in the raft log, so log replay +re-triggered the termination on every restart. + +The companion test under `quorum_reads=false` exercises the local read path +(`processLocalRequests` -> `processImpl`), which already returned +`ZNONODE` gracefully before the fix. Running both ensures the two read modes +return the same response for an unconfigured AZ. A third case pins the +configured branch: with `` set, a `quorum_reads=true` `Get` should +return the AZ value normally. +""" + +import pytest + +import helpers.keeper_utils as keeper_utils +from helpers.cluster import ClickHouseCluster +from helpers.keeper_utils import KeeperClient, KeeperException + +cluster = ClickHouseCluster(__file__) + +node = cluster.add_instance( + "node", + main_configs=["configs/keeper_config.xml"], + stay_alive=True, +) + +node_local = cluster.add_instance( + "node_local", + main_configs=["configs/keeper_config_local_reads.xml"], + stay_alive=True, +) + +node_configured = cluster.add_instance( + "node_configured", + main_configs=["configs/keeper_config_configured.xml"], + stay_alive=True, +) + + +@pytest.fixture(scope="module", autouse=True) +def started_cluster(): + try: + cluster.start() + yield cluster + finally: + cluster.shutdown() + + +def _keeper_alive(target) -> bool: + return keeper_utils.send_4lw_cmd(cluster, target, "ruok") == "imok" + + +def _assert_znonode(target, instance_name): + keeper_utils.wait_until_connected(cluster, target) + assert _keeper_alive(target) + + with KeeperClient.from_cluster(cluster, instance_name, port=9181) as client: + with pytest.raises(KeeperException) as ex: + client.get("/keeper/availability_zone") + + error = str(ex.value) + assert "node doesn't exist" in error + assert "/keeper/availability_zone" in error + + # The keeper should still be alive — no SIGABRT, no raft-log poisoning. + assert _keeper_alive(target) + + +def test_get_availability_zone_returns_znonode_under_quorum_reads(): + """Keeper without `` should reply with `ZNONODE` for the AZ + path even when `quorum_reads=true` routes the read through the raft + commit path. The keeper must remain alive afterwards.""" + _assert_znonode(node, "node") + + +def test_get_availability_zone_returns_znonode_under_local_reads(): + """Same path, `quorum_reads=false` (the default). The read goes through + `processLocalRequests` and `processImpl` and should also return + `ZNONODE`, matching the response of the quorum-read path after the fix.""" + _assert_znonode(node_local, "node_local") + + +def test_get_availability_zone_returns_value_when_configured(): + """When `` is configured, a `quorum_reads=true` `Get` must + return the configured AZ value. This pins the configured branch of the + fix: the container lookup still succeeds under the raft commit path.""" + keeper_utils.wait_until_connected(cluster, node_configured) + assert _keeper_alive(node_configured) + + with KeeperClient.from_cluster(cluster, "node_configured", port=9181) as client: + assert client.get("/keeper/availability_zone") == "az-configured" + + assert _keeper_alive(node_configured) From fac697a493f30516fdf8f5c2d4b654b2e3219ce3 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Mon, 18 May 2026 18:54:27 +0000 Subject: [PATCH 30/41] Backport #104888 to 26.3: Fix data part consistency checks for types with dynamic structure and detect corrupted columns_substreams.txt --- src/Storages/MergeTree/ColumnsSubstreams.cpp | 43 ++++ src/Storages/MergeTree/ColumnsSubstreams.h | 7 + src/Storages/MergeTree/IMergeTreeDataPart.cpp | 31 +++ .../MergeTree/MergeTreeDataPartWide.cpp | 187 +++++++++++++----- src/Storages/MergeTree/checkDataPart.cpp | 76 +++++-- .../04234_check_table_json_wide.reference | 8 + .../04234_check_table_json_wide.sql | 34 ++++ ...ted_columns_substreams_detection.reference | 19 ++ ..._corrupted_columns_substreams_detection.sh | 124 ++++++++++++ 9 files changed, 461 insertions(+), 68 deletions(-) create mode 100644 tests/queries/0_stateless/04234_check_table_json_wide.reference create mode 100644 tests/queries/0_stateless/04234_check_table_json_wide.sql create mode 100644 tests/queries/0_stateless/04235_corrupted_columns_substreams_detection.reference create mode 100755 tests/queries/0_stateless/04235_corrupted_columns_substreams_detection.sh diff --git a/src/Storages/MergeTree/ColumnsSubstreams.cpp b/src/Storages/MergeTree/ColumnsSubstreams.cpp index 69b8f88b30d6..42c2ab93af86 100644 --- a/src/Storages/MergeTree/ColumnsSubstreams.cpp +++ b/src/Storages/MergeTree/ColumnsSubstreams.cpp @@ -1,4 +1,6 @@ #include +#include +#include #include #include @@ -231,4 +233,45 @@ void ColumnsSubstreams::validateColumns(const std::vector & columns) con } } +/// Check if substream name has a valid prefix: it must be exactly the prefix +/// or start with prefix followed by '.' or '%2E' (escaped dot used for Tuple element substreams). +static bool hasValidPrefix(const String & substream, const String & escaped_prefix) +{ + if (!substream.starts_with(escaped_prefix)) + return false; + /// Must be exactly the prefix, or followed by '.' or '%2E' separator. + /// Tuple element substreams use escapeForFileName(".element_name") which produces "%2Eelement_name". + if (substream.size() == escaped_prefix.size()) + return true; + if (substream[escaped_prefix.size()] == '.') + return true; + if (substream.substr(escaped_prefix.size(), 3) == "%2E") + return true; + return false; +} + +std::pair ColumnsSubstreams::findInvalidSubstreamName() const +{ + for (const auto & [column_name, substreams] : columns_substreams) + { + auto escaped_column_name = escapeForFileName(column_name); + auto nested_table_name = Nested::extractTableName(column_name); + auto escaped_nested_table_name = escapeForFileName(nested_table_name); + bool has_nested_prefix = (escaped_nested_table_name != escaped_column_name); + + for (const auto & substream : substreams) + { + if (hasValidPrefix(substream, escaped_column_name)) + continue; + + if (has_nested_prefix && hasValidPrefix(substream, escaped_nested_table_name)) + continue; + + return {substream, column_name}; + } + } + + return {}; +} + } diff --git a/src/Storages/MergeTree/ColumnsSubstreams.h b/src/Storages/MergeTree/ColumnsSubstreams.h index e4c18361a04a..50f00c23777f 100644 --- a/src/Storages/MergeTree/ColumnsSubstreams.h +++ b/src/Storages/MergeTree/ColumnsSubstreams.h @@ -46,6 +46,13 @@ class ColumnsSubstreams /// Check that we have substreams for all columns and they have the same order as in provided list. void validateColumns(const std::vector & columns) const; + /// Check that all substream names have valid prefixes matching their column names. + /// Every substream for a column must start with escapeForFileName(column_name) (or + /// escapeForFileName(Nested::extractTableName(column_name)) for shared Nested offsets), + /// followed by '.', '%2E', or end-of-string. + /// Returns {invalid_substream, column_name} pair, or empty strings if all are valid. + std::pair findInvalidSubstreamName() const; + /// Merge 2 sets of columns substreams with specified columns order. /// If some column exists in both left and right we keep only substreams from the left. static ColumnsSubstreams merge(const ColumnsSubstreams & left, const ColumnsSubstreams & right, const std::vector & columns_order); diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.cpp b/src/Storages/MergeTree/IMergeTreeDataPart.cpp index 4ac9e406e56a..c59095f13a6b 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPart.cpp +++ b/src/Storages/MergeTree/IMergeTreeDataPart.cpp @@ -2022,6 +2022,37 @@ void IMergeTreeDataPart::loadColumnsSubstreams() if (auto in = readFileIfExists(COLUMNS_SUBSTREAMS_FILE_NAME)) { columns_substreams.readText(*in); + + /// Validate that all substream names have valid prefixes matching their column names. + /// This detects a specific corruption caused by a bug in getFileNameForRenamedColumnStream + /// (fixed in https://github.com/ClickHouse/ClickHouse/pull/102689) where renaming a column + /// produced wrong substream names in columns_substreams.txt. + /// For Wide parts this file is not mandatory, so we can safely discard it and proceed + /// as if it didn't exist. + if (part_type == MergeTreeDataPartType::Wide) + { + auto [invalid_substream, invalid_column] = columns_substreams.findInvalidSubstreamName(); + if (!invalid_substream.empty()) + { + LOG_WARNING( + storage.log, + "Ignoring corrupted {} in part {}: substream '{}' has invalid prefix for column '{}' " + "(expected prefix '{}' or '{}' followed by '.' or '%2E'). " + "The file was likely corrupted by a bug in column rename. " + "The part will work correctly without this file.", + COLUMNS_SUBSTREAMS_FILE_NAME, + name, + invalid_substream, + invalid_column, + escapeForFileName(invalid_column), + escapeForFileName(Nested::extractTableName(invalid_column))); + + columns_substreams = {}; + return; + } + } + + columns_substreams.validateColumns(getColumns().getNames()); } /// In Compact part with marks for substreams we must have substreams file. For other cases it's not mandatory. else if (part_type == MergeTreeDataPartType::Compact && index_granularity_info.mark_type.with_substreams) diff --git a/src/Storages/MergeTree/MergeTreeDataPartWide.cpp b/src/Storages/MergeTree/MergeTreeDataPartWide.cpp index 915fc4250e69..952479486441 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartWide.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartWide.cpp @@ -340,70 +340,157 @@ MergeTreeDataPartWide::~MergeTreeDataPartWide() void MergeTreeDataPartWide::doCheckConsistency(bool require_part_metadata) const { std::string marks_file_extension = index_granularity_info.mark_type.getFileExtension(); + const auto & cols_substreams = getColumnsSubstreams(); if (!checksums.empty()) { if (require_part_metadata) { - for (const auto & name_type : columns) + if (!cols_substreams.empty()) { - getSerialization(name_type.name)->enumerateStreams([&](const ISerialization::SubstreamPath & substream_path) + /// Use columns_substreams.txt as the source of truth for substream file names. + /// This is more reliable than enumerateStreams for types with dynamic structure (JSON, Dynamic) + /// because enumerateStreams requires deserialization state to correctly enumerate dynamic substreams. + size_t col_idx = 0; + for (const auto & name_type : columns) { - /// Skip ephemeral subcolumns that don't store any real data. - if (ISerialization::isEphemeralSubcolumn(substream_path, substream_path.size())) - return; - - auto stream_name = getStreamNameForColumn(name_type, substream_path, DATA_FILE_EXTENSION, checksums, storage.getSettings()); - if (!stream_name) - throw Exception( - ErrorCodes::NO_FILE_IN_DATA_PART, - "No stream ({}{}) file checksum for column {} in part {}", - ISerialization::getFileNameForStream(name_type, substream_path, ISerialization::StreamFileNameSettings(*storage.getSettings())), - DATA_FILE_EXTENSION, - name_type.name, - getDataPartStorage().getFullPath()); - - auto mrk_file_name = *stream_name + marks_file_extension; - if (!checksums.files.contains(mrk_file_name)) - throw Exception( - ErrorCodes::NO_FILE_IN_DATA_PART, - "No {} file checksum for column {} in part {} ", - mrk_file_name, name_type.name, getDataPartStorage().getFullPath()); - }); + const auto & substreams = cols_substreams.getColumnSubstreams(col_idx); + for (const auto & substream : substreams) + { + auto stream_name = getStreamNameOrHash(substream, DATA_FILE_EXTENSION, checksums); + if (!stream_name) + throw Exception( + ErrorCodes::NO_FILE_IN_DATA_PART, + "No stream ({}{}) file checksum for column {} (substream {}) in part {}", + substream, + DATA_FILE_EXTENSION, + name_type.name, + substream, + getDataPartStorage().getFullPath()); + + auto mrk_file_name = *stream_name + marks_file_extension; + if (!checksums.files.contains(mrk_file_name)) + throw Exception( + ErrorCodes::NO_FILE_IN_DATA_PART, + "No {} file checksum for column {} in part {}", + mrk_file_name, name_type.name, getDataPartStorage().getFullPath()); + } + ++col_idx; + } + } + else + { + /// Fallback for old parts without columns_substreams.txt. + /// Don't enumerate dynamic streams because we don't have the proper deserialization state, + /// so enumerateStreams may produce incorrect stream names for types with dynamic structure. + /// Dynamic stream files will still be verified by the subsequent directory-level check + /// against checksums.txt. + ISerialization::EnumerateStreamsSettings settings; + settings.enumerate_dynamic_streams = false; + for (const auto & name_type : columns) + { + auto serialization = getSerialization(name_type.name); + auto data = ISerialization::SubstreamData(serialization) + .withType(name_type.type) + .withColumn(getColumnSample(name_type)); + serialization->enumerateStreams(settings, [&](const ISerialization::SubstreamPath & substream_path) + { + if (ISerialization::isEphemeralSubcolumn(substream_path, substream_path.size())) + return; + + auto stream_name = getStreamNameForColumn(name_type, substream_path, DATA_FILE_EXTENSION, checksums, storage.getSettings()); + if (!stream_name) + throw Exception( + ErrorCodes::NO_FILE_IN_DATA_PART, + "No stream ({}{}) file checksum for column {} in part {}", + ISerialization::getFileNameForStream(name_type, substream_path, ISerialization::StreamFileNameSettings(*storage.getSettings())), + DATA_FILE_EXTENSION, + name_type.name, + getDataPartStorage().getFullPath()); + + auto mrk_file_name = *stream_name + marks_file_extension; + if (!checksums.files.contains(mrk_file_name)) + throw Exception( + ErrorCodes::NO_FILE_IN_DATA_PART, + "No {} file checksum for column {} in part {}", + mrk_file_name, name_type.name, getDataPartStorage().getFullPath()); + }, data); + } } } } else { - /// Check that all marks are nonempty and have the same size. - std::optional marks_size; - for (const auto & name_type : columns) + if (!cols_substreams.empty()) { - getSerialization(name_type.name)->enumerateStreams([&](const ISerialization::SubstreamPath & substream_path) + /// Use columns_substreams.txt as the source of truth. + std::optional marks_size; + for (size_t col_idx = 0; col_idx != columns.size(); ++col_idx) { - auto stream_name = getStreamNameForColumn(name_type, substream_path, marks_file_extension, getDataPartStorage(), storage.getSettings()); - - /// Missing file is Ok for case when new column was added. - if (!stream_name) - return; - - auto file_path = *stream_name + marks_file_extension; - UInt64 file_size = getDataPartStorage().getFileSize(file_path); - - if (!file_size) - throw Exception( - ErrorCodes::BAD_SIZE_OF_FILE_IN_DATA_PART, - "Part {} is broken: {} is empty.", - getDataPartStorage().getFullPath(), - std::string(fs::path(getDataPartStorage().getFullPath()) / file_path)); - - if (!marks_size) - marks_size = file_size; - else if (file_size != *marks_size) - throw Exception( - ErrorCodes::BAD_SIZE_OF_FILE_IN_DATA_PART, - "Part {} is broken: marks have different sizes.", getDataPartStorage().getFullPath()); - }); + const auto & substreams = cols_substreams.getColumnSubstreams(col_idx); + for (const auto & substream : substreams) + { + auto stream_name = getStreamNameOrHash(substream, marks_file_extension, getDataPartStorage()); + if (!stream_name) + continue; + + auto file_path = *stream_name + marks_file_extension; + UInt64 file_size = getDataPartStorage().getFileSize(file_path); + + if (!file_size) + throw Exception( + ErrorCodes::BAD_SIZE_OF_FILE_IN_DATA_PART, + "Part {} is broken: {} is empty.", + getDataPartStorage().getFullPath(), + std::string(fs::path(getDataPartStorage().getFullPath()) / file_path)); + + if (!marks_size) + marks_size = file_size; + else if (file_size != *marks_size) + throw Exception( + ErrorCodes::BAD_SIZE_OF_FILE_IN_DATA_PART, + "Part {} is broken: marks have different sizes.", getDataPartStorage().getFullPath()); + } + } + } + else + { + /// Fallback: check that all marks are nonempty and have the same size. + ISerialization::EnumerateStreamsSettings settings; + settings.enumerate_dynamic_streams = false; + std::optional marks_size; + for (const auto & name_type : columns) + { + auto serialization = getSerialization(name_type.name); + auto data = ISerialization::SubstreamData(serialization) + .withType(name_type.type) + .withColumn(getColumnSample(name_type)); + serialization->enumerateStreams(settings, [&](const ISerialization::SubstreamPath & substream_path) + { + auto stream_name = getStreamNameForColumn(name_type, substream_path, marks_file_extension, getDataPartStorage(), storage.getSettings()); + + /// Missing file is Ok for case when new column was added. + if (!stream_name) + return; + + auto file_path = *stream_name + marks_file_extension; + UInt64 file_size = getDataPartStorage().getFileSize(file_path); + + if (!file_size) + throw Exception( + ErrorCodes::BAD_SIZE_OF_FILE_IN_DATA_PART, + "Part {} is broken: {} is empty.", + getDataPartStorage().getFullPath(), + std::string(fs::path(getDataPartStorage().getFullPath()) / file_path)); + + if (!marks_size) + marks_size = file_size; + else if (file_size != *marks_size) + throw Exception( + ErrorCodes::BAD_SIZE_OF_FILE_IN_DATA_PART, + "Part {} is broken: marks have different sizes.", getDataPartStorage().getFullPath()); + }, data); + } } } } diff --git a/src/Storages/MergeTree/checkDataPart.cpp b/src/Storages/MergeTree/checkDataPart.cpp index e11fe4abcc8e..2fd47973cf98 100644 --- a/src/Storages/MergeTree/checkDataPart.cpp +++ b/src/Storages/MergeTree/checkDataPart.cpp @@ -245,24 +245,60 @@ static IMergeTreeDataPart::Checksums checkDataPart( } else if (part_type == MergeTreeDataPartType::Wide) { - for (const auto & column : columns_list) + const auto & cols_substreams = data_part->getColumnsSubstreams(); + if (!cols_substreams.empty()) { - get_serialization(column)->enumerateStreams([&](const ISerialization::SubstreamPath & substream_path) + /// Use columns_substreams.txt as the source of truth for substream file names. + /// This is more reliable than enumerateStreams for types with dynamic structure (JSON, Dynamic) + /// because enumerateStreams requires deserialization state to correctly enumerate dynamic substreams. + size_t col_idx = 0; + for (const auto & column : columns_list) { - /// Skip ephemeral subcolumns that don't store any real data. - if (ISerialization::isEphemeralSubcolumn(substream_path, substream_path.size())) - return; - - auto stream_name = IMergeTreeDataPart::getStreamNameForColumn(column, substream_path, ".bin", data_part_storage, data_part->storage.getSettings()); - - if (!stream_name) - throw Exception(ErrorCodes::NO_FILE_IN_DATA_PART, - "There is no file for column '{}' in data part '{}'", - column.name, data_part->name); - - auto file_name = *stream_name + ".bin"; - checksums_data.files[file_name] = checksum_compressed_file(data_part_storage, file_name); - }, column.type, data_part->getColumnSample(column)); + const auto & substreams = cols_substreams.getColumnSubstreams(col_idx); + for (const auto & substream : substreams) + { + auto stream_name = IMergeTreeDataPart::getStreamNameOrHash(substream, ".bin", data_part_storage); + if (!stream_name) + throw Exception(ErrorCodes::NO_FILE_IN_DATA_PART, + "There is no file for column '{}' (substream {}) in data part '{}'", + column.name, substream, data_part->name); + + auto file_name = *stream_name + ".bin"; + checksums_data.files[file_name] = checksum_compressed_file(data_part_storage, file_name); + } + ++col_idx; + } + } + else + { + /// Fallback for old parts without columns_substreams.txt. + /// Don't enumerate dynamic streams because we don't have the proper deserialization state. + /// Dynamic stream files will still be verified by the subsequent directory-level check + /// against checksums.txt. + ISerialization::EnumerateStreamsSettings settings; + settings.enumerate_dynamic_streams = false; + for (const auto & column : columns_list) + { + auto serialization = get_serialization(column); + auto data = ISerialization::SubstreamData(serialization) + .withType(column.type) + .withColumn(data_part->getColumnSample(column)); + serialization->enumerateStreams(settings, [&](const ISerialization::SubstreamPath & substream_path) + { + if (ISerialization::isEphemeralSubcolumn(substream_path, substream_path.size())) + return; + + auto stream_name = IMergeTreeDataPart::getStreamNameForColumn(column, substream_path, ".bin", data_part_storage, data_part->storage.getSettings()); + + if (!stream_name) + throw Exception(ErrorCodes::NO_FILE_IN_DATA_PART, + "There is no file for column '{}' in data part '{}'", + column.name, data_part->name); + + auto file_name = *stream_name + ".bin"; + checksums_data.files[file_name] = checksum_compressed_file(data_part_storage, file_name); + }, data); + } } } else @@ -298,9 +334,13 @@ static IMergeTreeDataPart::Checksums checkDataPart( if (checksum_it == checksums_data.files.end() && !files_without_checksums.contains(file_name)) { auto txt_checksum_it = checksums_txt_files.find(file_name); - if ((txt_checksum_it != checksums_txt_files.end() && txt_checksum_it->second.is_compressed)) + if ((txt_checksum_it != checksums_txt_files.end() && txt_checksum_it->second.is_compressed) + || file_name.ends_with(".bin")) { - /// If we have both compressed and uncompressed in txt or its .cmrk(2/3) or .cidx, then calculate them + /// If we know from checksums.txt that the file is compressed, or it has the .bin extension + /// (all .bin files in MergeTree are compressed), compute both compressed and uncompressed checksums. + /// The .bin check is important for dynamic stream files that may not be visited + /// during enumerateStreams when columns_substreams.txt is absent. checksums_data.files[file_name] = checksum_compressed_file(data_part_storage, file_name); } else diff --git a/tests/queries/0_stateless/04234_check_table_json_wide.reference b/tests/queries/0_stateless/04234_check_table_json_wide.reference new file mode 100644 index 000000000000..5439f6a8092b --- /dev/null +++ b/tests/queries/0_stateless/04234_check_table_json_wide.reference @@ -0,0 +1,8 @@ +1 +1 +1 1 +2 2 +3 3 +1 1 +2 2 +3 3 diff --git a/tests/queries/0_stateless/04234_check_table_json_wide.sql b/tests/queries/0_stateless/04234_check_table_json_wide.sql new file mode 100644 index 000000000000..dfd43d31f57d --- /dev/null +++ b/tests/queries/0_stateless/04234_check_table_json_wide.sql @@ -0,0 +1,34 @@ +-- Tags: no-fasttest + +SET allow_experimental_json_type = 1; + +DROP TABLE IF EXISTS t_check_json_wide; + +CREATE TABLE t_check_json_wide (id UInt64, data JSON) +ENGINE = MergeTree ORDER BY id +SETTINGS min_rows_for_wide_part = 1, min_bytes_for_wide_part = 1; + +-- Insert rows with different JSON structures to create parts with varying dynamic substreams. +INSERT INTO t_check_json_wide VALUES (1, '{"a": 1}'); +INSERT INTO t_check_json_wide VALUES (2, '{"a": 2, "b": [1, 2, 3]}'); +INSERT INTO t_check_json_wide VALUES (3, '{"a": 3, "c": {"d": "hello"}}'); + +-- CHECK TABLE should succeed on multiple Wide parts with different dynamic substreams. +CHECK TABLE t_check_json_wide SETTINGS check_query_single_value_result = 1; + +-- Merge parts and check again. +OPTIMIZE TABLE t_check_json_wide FINAL; + +CHECK TABLE t_check_json_wide SETTINGS check_query_single_value_result = 1; + +-- Verify data correctness. +SELECT id, data.a FROM t_check_json_wide ORDER BY id; + +-- DETACH/ATTACH triggers doCheckConsistency. +ALTER TABLE t_check_json_wide DETACH PARTITION tuple(); +ALTER TABLE t_check_json_wide ATTACH PARTITION tuple(); + +-- Verify data after reattach. +SELECT id, data.a FROM t_check_json_wide ORDER BY id; + +DROP TABLE t_check_json_wide; diff --git a/tests/queries/0_stateless/04235_corrupted_columns_substreams_detection.reference b/tests/queries/0_stateless/04235_corrupted_columns_substreams_detection.reference new file mode 100644 index 000000000000..a3be30c427ec --- /dev/null +++ b/tests/queries/0_stateless/04235_corrupted_columns_substreams_detection.reference @@ -0,0 +1,19 @@ +Data before corruption: +10 45 20 +Data after attach with corrupted file: +10 45 20 +CHECK TABLE result: +1 +Data after partition reattach: +10 45 20 +JSON data before corruption: +1 1 +2 2 +JSON data after attach with corrupted file: +1 1 +2 2 +JSON CHECK TABLE result: +1 +JSON data after partition reattach: +1 1 +2 2 diff --git a/tests/queries/0_stateless/04235_corrupted_columns_substreams_detection.sh b/tests/queries/0_stateless/04235_corrupted_columns_substreams_detection.sh new file mode 100755 index 000000000000..c22992161980 --- /dev/null +++ b/tests/queries/0_stateless/04235_corrupted_columns_substreams_detection.sh @@ -0,0 +1,124 @@ +#!/usr/bin/env bash +# Tags: no-fasttest, no-shared-merge-tree, no-object-storage + +# Test that corrupted columns_substreams.txt (from a historical rename bug) is detected +# and safely discarded at load time, allowing the part to work correctly without it. +# Tests both simple types (Array) and types with dynamic substreams (JSON). + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +# ---- Test 1: Array(UInt32) column ---- + +${CLICKHOUSE_CLIENT} --query "DROP TABLE IF EXISTS t_corrupted_substreams" + +${CLICKHOUSE_CLIENT} --query " + CREATE TABLE t_corrupted_substreams + ( + id UInt64, + arr Array(UInt32) + ) + ENGINE = MergeTree ORDER BY id + SETTINGS min_rows_for_wide_part = 1, min_bytes_for_wide_part = 1, + enable_block_number_column = 0, enable_block_offset_column = 0, + replace_long_file_name_to_hash = 0, ratio_of_defaults_for_sparse_serialization = 1; +" + +${CLICKHOUSE_CLIENT} --query "INSERT INTO t_corrupted_substreams SELECT number, [number, number + 1] FROM numbers(10)" + +echo "Data before corruption:" +${CLICKHOUSE_CLIENT} --query "SELECT count(), sum(id), sum(length(arr)) FROM t_corrupted_substreams" + +# Get the data path of the active part. +DATA_PATH=$(${CLICKHOUSE_CLIENT} --query "SELECT path FROM system.parts WHERE database = currentDatabase() AND table = 't_corrupted_substreams' AND active") + +# Detach the table so we can modify files on disk. +${CLICKHOUSE_CLIENT} --query "DETACH TABLE t_corrupted_substreams" + +# Corrupt columns_substreams.txt by writing substream names that simulate the rename bug: +# substream names like "arrwrong" instead of "arr" or "arr.size0". +cat > "${DATA_PATH}columns_substreams.txt" << 'EOF' +columns substreams version: 1 +2 columns: +1 substreams for column `id`: + id +1 substreams for column `arr`: + arrwrongprefix +EOF + +# Attach the table - this triggers loadColumnsSubstreams which should detect the corruption, +# log a warning, and discard the corrupted data. +${CLICKHOUSE_CLIENT} --query "ATTACH TABLE t_corrupted_substreams" 2>/dev/null + +echo "Data after attach with corrupted file:" +${CLICKHOUSE_CLIENT} --query "SELECT count(), sum(id), sum(length(arr)) FROM t_corrupted_substreams" + +# CHECK TABLE should also work (falls back to enumerateStreams since columns_substreams was discarded). +echo "CHECK TABLE result:" +${CLICKHOUSE_CLIENT} --query "CHECK TABLE t_corrupted_substreams SETTINGS check_query_single_value_result = 1" + +# DETACH/ATTACH partition should also work. +${CLICKHOUSE_CLIENT} --query "ALTER TABLE t_corrupted_substreams DETACH PARTITION tuple()" +${CLICKHOUSE_CLIENT} --query "ALTER TABLE t_corrupted_substreams ATTACH PARTITION tuple()" 2>/dev/null + +echo "Data after partition reattach:" +${CLICKHOUSE_CLIENT} --query "SELECT count(), sum(id), sum(length(arr)) FROM t_corrupted_substreams" + +${CLICKHOUSE_CLIENT} --query "DROP TABLE t_corrupted_substreams" + +# ---- Test 2: JSON column (dynamic substreams, exercises enumerate_dynamic_streams = false fallback) ---- + +${CLICKHOUSE_CLIENT} --query "DROP TABLE IF EXISTS t_corrupted_substreams_json" + +${CLICKHOUSE_CLIENT} --query " + SET allow_experimental_json_type = 1; + CREATE TABLE t_corrupted_substreams_json + ( + id UInt64, + data JSON + ) + ENGINE = MergeTree ORDER BY id + SETTINGS min_rows_for_wide_part = 1, min_bytes_for_wide_part = 1, + enable_block_number_column = 0, enable_block_offset_column = 0, + replace_long_file_name_to_hash = 0, ratio_of_defaults_for_sparse_serialization = 1; +" + +${CLICKHOUSE_CLIENT} --query "INSERT INTO t_corrupted_substreams_json VALUES (1, '{\"a\": 1, \"b\": \"hello\"}'), (2, '{\"a\": 2, \"c\": [1, 2, 3]}')" + +echo "JSON data before corruption:" +${CLICKHOUSE_CLIENT} --query "SELECT id, data.a FROM t_corrupted_substreams_json ORDER BY id" + +# Get the data path of the active part. +DATA_PATH_JSON=$(${CLICKHOUSE_CLIENT} --query "SELECT path FROM system.parts WHERE database = currentDatabase() AND table = 't_corrupted_substreams_json' AND active") + +# Detach the table so we can modify files on disk. +${CLICKHOUSE_CLIENT} --query "DETACH TABLE t_corrupted_substreams_json" + +# Corrupt columns_substreams.txt by writing a wrong prefix for the data column substreams. +cat > "${DATA_PATH_JSON}columns_substreams.txt" << 'EOF' +columns substreams version: 1 +2 columns: +1 substreams for column `id`: + id +1 substreams for column `data`: + datawrongprefix.object_structure +EOF + +# Attach the table - corruption detected, file discarded, falls back to enumerate_dynamic_streams = false. +${CLICKHOUSE_CLIENT} --query "ATTACH TABLE t_corrupted_substreams_json" 2>/dev/null + +echo "JSON data after attach with corrupted file:" +${CLICKHOUSE_CLIENT} --query "SELECT id, data.a FROM t_corrupted_substreams_json ORDER BY id" + +echo "JSON CHECK TABLE result:" +${CLICKHOUSE_CLIENT} --query "CHECK TABLE t_corrupted_substreams_json SETTINGS check_query_single_value_result = 1" + +# DETACH/ATTACH partition should also work. +${CLICKHOUSE_CLIENT} --query "ALTER TABLE t_corrupted_substreams_json DETACH PARTITION tuple()" +${CLICKHOUSE_CLIENT} --query "ALTER TABLE t_corrupted_substreams_json ATTACH PARTITION tuple()" 2>/dev/null + +echo "JSON data after partition reattach:" +${CLICKHOUSE_CLIENT} --query "SELECT id, data.a FROM t_corrupted_substreams_json ORDER BY id" + +${CLICKHOUSE_CLIENT} --query "DROP TABLE t_corrupted_substreams_json" From 173a7ccd24af32e6e4fe7e09d1f53d8402fe46c0 Mon Sep 17 00:00:00 2001 From: robot-ch-test-poll1 <47390204+robot-ch-test-poll1@users.noreply.github.com> Date: Tue, 19 May 2026 06:07:17 +0200 Subject: [PATCH 31/41] Backport #104009 to 26.3: Fix SQL injection in ExternalQueryBuilder via incorrect string escaping (#105185) Co-authored-by: robot-clickhouse --- src/Dictionaries/ExternalQueryBuilder.cpp | 4 + src/Dictionaries/ExternalQueryBuilder.h | 2 +- .../tests/gtest_external_query_builder.cpp | 85 +++++++++++++++++++ .../test_dictionaries_mysql/test.py | 72 ++++++++++++++++ .../test_dictionaries_postgresql/test.py | 65 ++++++++++++++ 5 files changed, 227 insertions(+), 1 deletion(-) create mode 100644 src/Dictionaries/tests/gtest_external_query_builder.cpp diff --git a/src/Dictionaries/ExternalQueryBuilder.cpp b/src/Dictionaries/ExternalQueryBuilder.cpp index 4d3f9cd63ff7..2df29bd2c638 100644 --- a/src/Dictionaries/ExternalQueryBuilder.cpp +++ b/src/Dictionaries/ExternalQueryBuilder.cpp @@ -41,6 +41,10 @@ ExternalQueryBuilder::ExternalQueryBuilder( , where(where_) , quoting_style(quoting_style_) { + // SQL-standard DBs (PostgreSQL, Cassandra, etc.) treat '\' as a literal character, so use '' escaping. + if (quoting_style == IdentifierQuotingStyle::DoubleQuotes) + format_settings.values.escape_quote_with_quote = true; + if (table.empty() && query.empty()) throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "Setting `table` or `query` must be non empty"); diff --git a/src/Dictionaries/ExternalQueryBuilder.h b/src/Dictionaries/ExternalQueryBuilder.h index f09ce545428d..a6da98ebfdfd 100644 --- a/src/Dictionaries/ExternalQueryBuilder.h +++ b/src/Dictionaries/ExternalQueryBuilder.h @@ -65,7 +65,7 @@ struct ExternalQueryBuilder protected: - const FormatSettings format_settings = {}; + FormatSettings format_settings; virtual void composeLoadAllQuery(WriteBuffer & out) const; diff --git a/src/Dictionaries/tests/gtest_external_query_builder.cpp b/src/Dictionaries/tests/gtest_external_query_builder.cpp new file mode 100644 index 000000000000..ac94a069b5d0 --- /dev/null +++ b/src/Dictionaries/tests/gtest_external_query_builder.cpp @@ -0,0 +1,85 @@ +#include + +#include + +#include +#include +#include +#include +#include +#include + +using namespace DB; + +/// Minimal dictionary structure: one complex String key, no attributes. +static DictionaryStructure makeSingleStringKeyStructure() +{ + const char * xml = R"( + + + + + + k + String + + + + +)"; + std::istringstream stream(xml); // STYLE_CHECK_ALLOW_STD_STRING_STREAM + auto config = Poco::AutoPtr(new Poco::Util::XMLConfiguration(stream)); + return DictionaryStructure(*config, "dictionary"); +} + +/// Returns the WHERE clause of a composeLoadKeysQuery call for the given values. +static std::string buildWhereClause(IdentifierQuotingStyle style, const std::vector & values) +{ + auto dict_struct = makeSingleStringKeyStructure(); + ExternalQueryBuilder builder(dict_struct, "", "", "t", "", "", style); + + auto col = ColumnString::create(); + for (const auto & v : values) + col->insert(v); + + Columns key_columns; + key_columns.push_back(std::move(col)); + + VectorWithMemoryTracking rows; + for (size_t i = 0; i < values.size(); ++i) + rows.push_back(i); + + return builder.composeLoadKeysQuery(key_columns, rows, ExternalQueryBuilder::AND_OR_CHAIN); +} + +/// DoubleQuotes (PostgreSQL / Cassandra): '' for single quotes, \ left intact. +TEST(ExternalQueryBuilderEscaping, DoubleQuotesEscapesSingleQuoteWithDoubling) +{ + std::string sql = buildWhereClause(IdentifierQuotingStyle::DoubleQuotes, {"it's"}); + EXPECT_NE(sql.find("'it''s'"), std::string::npos) << "SQL: " << sql; + EXPECT_EQ(sql.find("it\\'s"), std::string::npos) << "Must not use backslash escaping. SQL: " << sql; +} + +TEST(ExternalQueryBuilderEscaping, DoubleQuotesLeavesBackslashUnchanged) +{ + // PostgreSQL/Cassandra treat '\' as a literal character; the value must round-trip correctly. + std::string sql = buildWhereClause(IdentifierQuotingStyle::DoubleQuotes, {"foo\\bar"}); + EXPECT_NE(sql.find("'foo\\bar'"), std::string::npos) << "SQL: " << sql; + // Must NOT double the backslash (that would store two backslashes in the DB). + EXPECT_EQ(sql.find("'foo\\\\bar'"), std::string::npos) << "Must not double backslash. SQL: " << sql; +} + +/// Backticks (ClickHouse / MySQL): \' for single quotes, \\ for backslashes. +TEST(ExternalQueryBuilderEscaping, BackticksEscapesSingleQuoteWithBackslash) +{ + std::string sql = buildWhereClause(IdentifierQuotingStyle::Backticks, {"it's"}); + EXPECT_NE(sql.find("\\'"), std::string::npos) << "SQL: " << sql; + EXPECT_EQ(sql.find("''"), std::string::npos) << "Must not use quote-doubling. SQL: " << sql; +} + +TEST(ExternalQueryBuilderEscaping, BackticksDoublesBackslash) +{ + // ClickHouse/MySQL treat '\' as an escape character; backslashes must be doubled. + std::string sql = buildWhereClause(IdentifierQuotingStyle::Backticks, {"foo\\bar"}); + EXPECT_NE(sql.find("foo\\\\bar"), std::string::npos) << "SQL: " << sql; +} diff --git a/tests/integration/test_dictionaries_mysql/test.py b/tests/integration/test_dictionaries_mysql/test.py index f71bfa7cab23..8381773d0add 100644 --- a/tests/integration/test_dictionaries_mysql/test.py +++ b/tests/integration/test_dictionaries_mysql/test.py @@ -251,6 +251,78 @@ def test_mysql_dictionaries_custom_query_partial_load_complex_key(started_cluste execute_mysql_query(mysql_connection, "DROP TABLE test.test_table_2;") +def test_mysql_dict_complex_key_with_special_chars(started_cluster): + """Regression test: ExternalQueryBuilder uses backslash escaping for MySQL backend. + + MySQL uses IdentifierQuotingStyle::Backticks, so escape_quote_with_quote stays false + (backslash escaping: ' -> \\', \\ -> \\\\). Verify that keys containing single quotes + and backslashes are looked up correctly via dictGet. + """ + mysql_connection = get_mysql_conn(started_cluster) + + execute_mysql_query( + mysql_connection, + "CREATE TABLE IF NOT EXISTS test.test_mysql_escape (key_col TEXT, value_col TEXT);", + ) + # Single-quote key: use double-quote MySQL string literal to avoid escaping. + execute_mysql_query( + mysql_connection, + "INSERT INTO test.test_mysql_escape VALUES (\"it's\", 'quote');", + ) + # Backslash key: use CHAR(92) to insert a literal backslash without SQL escaping confusion. + execute_mysql_query( + mysql_connection, + "INSERT INTO test.test_mysql_escape VALUES (CONCAT('foo', CHAR(92), 'bar'), 'backslash');", + ) + execute_mysql_query( + mysql_connection, + "INSERT INTO test.test_mysql_escape VALUES ('normal', 'normal value');", + ) + + query = instance.query + query( + f""" + CREATE DICTIONARY test_dict_mysql_escape + ( + key_col String, + value_col String DEFAULT '' + ) + PRIMARY KEY key_col + LAYOUT(COMPLEX_KEY_DIRECT()) + SOURCE(MYSQL( + HOST 'mysql80' + PORT 3306 + USER 'root' + PASSWORD '{mysql_pass}' + DB 'test' + TABLE 'test_mysql_escape')) + """ + ) + + result = query( + "SELECT dictGet('test_dict_mysql_escape', 'value_col', tuple('it\\'s'))" + ) + assert result == "quote\n", f"Unexpected result: {result!r}" + + result = query( + "SELECT dictGet('test_dict_mysql_escape', 'value_col', tuple('foo\\\\bar'))" + ) + assert result == "backslash\n", f"Unexpected result: {result!r}" + + result = query( + "SELECT dictGet('test_dict_mysql_escape', 'value_col', tuple('normal'))" + ) + assert result == "normal value\n", f"Unexpected result: {result!r}" + + result = query( + "SELECT dictGet('test_dict_mysql_escape', 'value_col', tuple('missing'))" + ) + assert result == "\n", f"Unexpected result: {result!r}" + + query("DROP DICTIONARY test_dict_mysql_escape;") + execute_mysql_query(mysql_connection, "DROP TABLE test.test_mysql_escape;") + + def test_predefined_connection_configuration(started_cluster): mysql_connection = get_mysql_conn(started_cluster) diff --git a/tests/integration/test_dictionaries_postgresql/test.py b/tests/integration/test_dictionaries_postgresql/test.py index 091ec313a755..88de097bbc1c 100644 --- a/tests/integration/test_dictionaries_postgresql/test.py +++ b/tests/integration/test_dictionaries_postgresql/test.py @@ -273,6 +273,71 @@ def test_postgres_dictionaries_custom_query_partial_load_complex_key(started_clu cursor.execute("DROP TABLE test_table_1;") +def test_postgres_dict_complex_key_with_single_quote(started_cluster): + """Regression test: string keys containing single quotes must not cause SQL injection. + + ExternalQueryBuilder previously used backslash escaping (\\') which PostgreSQL + (standard_conforming_strings=on by default since 9.1) treats as a literal backslash, + breaking out of the string literal and allowing injection. + The fix switches to SQL-standard quote doubling (''). + """ + conn = get_postgres_conn( + ip=started_cluster.postgres_ip, + database=True, + port=started_cluster.postgres_port, + ) + cursor = conn.cursor() + + cursor.execute( + "CREATE TABLE IF NOT EXISTS test_single_quote (key Text PRIMARY KEY, value Text);" + ) + cursor.execute("INSERT INTO test_single_quote VALUES ('it''s a key', 'found it');") + cursor.execute( + "INSERT INTO test_single_quote VALUES ('normal', 'normal value');" + ) + + query = node1.query + query( + f""" + CREATE DICTIONARY test_dict_single_quote + ( + key String, + value String DEFAULT '' + ) + PRIMARY KEY key + LAYOUT(COMPLEX_KEY_DIRECT()) + SOURCE(PostgreSQL( + DB 'postgres_database' + HOST '{started_cluster.postgres_ip}' + PORT {started_cluster.postgres_port} + USER 'postgres' + PASSWORD '{pg_pass}' + TABLE 'test_single_quote')) + """ + ) + + # Key containing a single quote: must return the correct value, not an error. + result = query( + "SELECT dictGet('test_dict_single_quote', 'value', tuple('it''s a key'))" + ) + assert result == "found it\n", f"Unexpected result: {result!r}" + + # Key without a quote: must still work after the escaping change. + result = query( + "SELECT dictGet('test_dict_single_quote', 'value', tuple('normal'))" + ) + assert result == "normal value\n", f"Unexpected result: {result!r}" + + # Missing key must return the default, not raise an exception. + result = query( + "SELECT dictGet('test_dict_single_quote', 'value', tuple('no such key'))" + ) + assert result == "\n", f"Unexpected result: {result!r}" + + query("DROP DICTIONARY test_dict_single_quote;") + cursor.execute("DROP TABLE test_single_quote;") + + def test_invalidate_query(started_cluster): conn = get_postgres_conn( ip=started_cluster.postgres_ip, From be8e4daa64969be71f87d34f752f728e0126d540 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Tue, 19 May 2026 17:15:34 +0000 Subject: [PATCH 32/41] Backport #104610 to 26.3: Use restore Keeper retries for restored parts --- src/Backups/RestorerFromBackup.h | 1 + src/Common/FailPoint.cpp | 1 + src/Storages/MergeTree/MergeTreeData.cpp | 13 ++++-- src/Storages/MergeTree/MergeTreeData.h | 3 +- .../MergeTree/ReplicatedMergeTreeSink.cpp | 25 ++++++++--- .../MergeTree/ReplicatedMergeTreeSink.h | 5 ++- src/Storages/StorageMergeTree.cpp | 2 +- src/Storages/StorageMergeTree.h | 2 +- src/Storages/StorageReplicatedMergeTree.cpp | 5 ++- src/Storages/StorageReplicatedMergeTree.h | 2 +- ...tach_uses_restore_keeper_retries.reference | 1 + ...ted_attach_uses_restore_keeper_retries.sql | 45 +++++++++++++++++++ 12 files changed, 89 insertions(+), 16 deletions(-) create mode 100644 tests/queries/0_stateless/04218_restore_replicated_attach_uses_restore_keeper_retries.reference create mode 100644 tests/queries/0_stateless/04218_restore_replicated_attach_uses_restore_keeper_retries.sql diff --git a/src/Backups/RestorerFromBackup.h b/src/Backups/RestorerFromBackup.h index 7493d5ed974d..42c621a4ee14 100644 --- a/src/Backups/RestorerFromBackup.h +++ b/src/Backups/RestorerFromBackup.h @@ -57,6 +57,7 @@ class RestorerFromBackup : public BackupMetadataFinder bool isNonEmptyTableAllowed() const { return getRestoreSettings().allow_non_empty_tables; } std::shared_ptr getRestoreCoordination() const { return restore_coordination; } ContextMutablePtr getContext() const { return context; } + const ZooKeeperRetriesInfo & getZooKeeperRetriesInfo() const { return zookeeper_retries_info; } /// Adds a data restore task which will be later returned by getDataRestoreTasks(). /// This function can be called by implementations of IStorage::restoreFromBackup() in inherited storage classes. diff --git a/src/Common/FailPoint.cpp b/src/Common/FailPoint.cpp index 56a4a55d07a5..dc63090931c9 100644 --- a/src/Common/FailPoint.cpp +++ b/src/Common/FailPoint.cpp @@ -84,6 +84,7 @@ static struct InitFiu REGULAR(smt_outdated_parts_exception_response) \ REGULAR(object_storage_queue_fail_in_the_middle_of_file) \ PAUSEABLE_ONCE(replicated_merge_tree_insert_retry_pause) \ + ONCE(replicated_merge_tree_restore_attach_retry) \ PAUSEABLE_ONCE(finish_set_quorum_failed_parts) \ PAUSEABLE_ONCE(finish_clean_quorum_failed_parts) \ PAUSEABLE_ONCE(smt_wait_next_mutation) \ diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index d1c4c984a5f5..88617b4231b9 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -6878,8 +6878,11 @@ void MergeTreeData::restoreDataFromBackup(RestorerFromBackup & restorer, const S class MergeTreeData::RestoredPartsHolder { public: - RestoredPartsHolder(const std::shared_ptr & storage_, const BackupPtr & backup_) - : storage(storage_), backup(backup_) + RestoredPartsHolder( + const std::shared_ptr & storage_, + const BackupPtr & backup_, + const ZooKeeperRetriesInfo & zookeeper_retries_info_) + : storage(storage_), backup(backup_), zookeeper_retries_info(zookeeper_retries_info_) { } @@ -6936,7 +6939,7 @@ class MergeTreeData::RestoredPartsHolder parts.end(), [](const MutableDataPartPtr & lhs, const MutableDataPartPtr & rhs) { return lhs->info.min_block < rhs->info.min_block; }); - storage->attachRestoredParts(std::move(parts)); + storage->attachRestoredParts(std::move(parts), zookeeper_retries_info); parts.clear(); temp_part_dirs.clear(); num_parts = 0; @@ -6944,6 +6947,7 @@ class MergeTreeData::RestoredPartsHolder const std::shared_ptr storage; const BackupPtr backup; + const ZooKeeperRetriesInfo zookeeper_retries_info; size_t num_parts = 0; size_t num_broken_parts = 0; MutableDataPartsVector parts; @@ -6963,7 +6967,8 @@ void MergeTreeData::restorePartsFromBackup(RestorerFromBackup & restorer, const bool restore_broken_parts_as_detached = restorer.getRestoreSettings().restore_broken_parts_as_detached; - auto restored_parts_holder = std::make_shared(std::static_pointer_cast(shared_from_this()), backup); + auto restored_parts_holder = std::make_shared( + std::static_pointer_cast(shared_from_this()), backup, restorer.getZooKeeperRetriesInfo()); fs::path data_path_in_backup_fs = data_path_in_backup; size_t num_parts = 0; diff --git a/src/Storages/MergeTree/MergeTreeData.h b/src/Storages/MergeTree/MergeTreeData.h index c2b1a4a50fde..81d993628fef 100644 --- a/src/Storages/MergeTree/MergeTreeData.h +++ b/src/Storages/MergeTree/MergeTreeData.h @@ -60,6 +60,7 @@ class Context; struct JobAndPool; class MergeTreeTransaction; struct ZeroCopyLock; +struct ZooKeeperRetriesInfo; class IBackupEntry; using BackupEntries = std::vector>>; @@ -1742,7 +1743,7 @@ class MergeTreeData : public WithMutableContext, public IStorage, public IBackgr MutableDataPartPtr loadPartRestoredFromBackup(const String & part_name, const DiskPtr & disk, const String & temp_part_dir, bool detach_if_broken) const; /// Attaches restored parts to the storage. - virtual void attachRestoredParts(MutableDataPartsVector && parts) = 0; + virtual void attachRestoredParts(MutableDataPartsVector && parts, const std::optional & zookeeper_retries_info) = 0; void resetSerializationHints(const DataPartsLock & lock); diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp index 2242211a4fbf..f9a45e1593a4 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp @@ -73,6 +73,7 @@ namespace FailPoints extern const char replicated_merge_tree_insert_quorum_fail_0[]; extern const char replicated_merge_tree_commit_zk_fail_when_recovering_from_hw_fault[]; extern const char replicated_merge_tree_insert_retry_pause[]; + extern const char replicated_merge_tree_restore_attach_retry[]; extern const char rmt_delay_commit_part[]; } @@ -114,7 +115,8 @@ ReplicatedMergeTreeSink::ReplicatedMergeTreeSink( bool majority_quorum, ContextPtr context_, bool is_attach_, - bool allow_attach_while_readonly_) + bool allow_attach_while_readonly_, + std::optional keeper_retries_info_) : SinkToStorage(std::make_shared(metadata_snapshot_->getSampleBlock())) , storage(storage_) , metadata_snapshot(metadata_snapshot_) @@ -131,6 +133,7 @@ ReplicatedMergeTreeSink::ReplicatedMergeTreeSink( , log(getLogger(storage.getLogName() + " (Replicated OutputStream)")) , context(context_) , storage_snapshot(storage.getStorageSnapshotWithoutData(metadata_snapshot, context_)) + , keeper_retries_info(std::move(keeper_retries_info_)) , is_async_insert(async_insert_) , insert_deduplication_version(context->getServerSettings()[ServerSetting::insert_deduplication_version].value) { @@ -708,13 +711,15 @@ std::vector ReplicatedMergeTreeSink::commitPart( CommitRetryContext retry_context; const auto & settings = context->getSettingsRef(); + ZooKeeperRetriesInfo retries_info = keeper_retries_info.value_or(ZooKeeperRetriesInfo{ + settings[Setting::insert_keeper_max_retries], + settings[Setting::insert_keeper_retry_initial_backoff_ms], + settings[Setting::insert_keeper_retry_max_backoff_ms], + context->getProcessListElement()}); ZooKeeperRetriesControl retries_ctl( "commitPart", log, - {settings[Setting::insert_keeper_max_retries], - settings[Setting::insert_keeper_retry_initial_backoff_ms], - settings[Setting::insert_keeper_retry_max_backoff_ms], - context->getProcessListElement()}); + retries_info); auto resolve_duplicate_stage = [&] () -> CommitRetryContext::Stages { @@ -836,6 +841,16 @@ std::vector ReplicatedMergeTreeSink::commitPart( auto commit_new_part_stage = [&]() -> CommitRetryContext::Stages { + if (is_attach) + { + fiu_do_on(FailPoints::replicated_merge_tree_restore_attach_retry, + { + retries_ctl.setUserError( + Exception(ErrorCodes::TABLE_IS_READ_ONLY, "Injected read-only error while attaching restored part")); + return CommitRetryContext::LOCK_AND_COMMIT; + }); + } + if (storage.is_readonly) { /// stop retries if in shutdown diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeSink.h b/src/Storages/MergeTree/ReplicatedMergeTreeSink.h index 39c5c6df2767..0f4964415d88 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeSink.h +++ b/src/Storages/MergeTree/ReplicatedMergeTreeSink.h @@ -1,5 +1,6 @@ #pragma once +#include #include #include #include @@ -67,7 +68,8 @@ class ReplicatedMergeTreeSink : public SinkToStorage // special flag to determine the ALTER TABLE ATTACH PART without the query context, // needed to set the special LogEntryType::ATTACH_PART bool is_attach_ = false, - bool allow_attach_while_readonly_ = false); + bool allow_attach_while_readonly_ = false, + std::optional keeper_retries_info_ = std::nullopt); ~ReplicatedMergeTreeSink() override; @@ -148,6 +150,7 @@ class ReplicatedMergeTreeSink : public SinkToStorage ContextPtr context; StorageSnapshotPtr storage_snapshot; + std::optional keeper_retries_info; bool is_async_insert = true; InsertDeduplicationVersions insert_deduplication_version = InsertDeduplicationVersions::NEW_UNIFIED_HASHES; diff --git a/src/Storages/StorageMergeTree.cpp b/src/Storages/StorageMergeTree.cpp index 8a57eaaa2482..14dcd83d62bf 100644 --- a/src/Storages/StorageMergeTree.cpp +++ b/src/Storages/StorageMergeTree.cpp @@ -2974,7 +2974,7 @@ BackupEntries StorageMergeTree::backupMutations(UInt64 version, const String & d } -void StorageMergeTree::attachRestoredParts(MutableDataPartsVector && parts) +void StorageMergeTree::attachRestoredParts(MutableDataPartsVector && parts, const std::optional &) { for (auto part : parts) { diff --git a/src/Storages/StorageMergeTree.h b/src/Storages/StorageMergeTree.h index 9bceb139c4dc..861a537a2423 100644 --- a/src/Storages/StorageMergeTree.h +++ b/src/Storages/StorageMergeTree.h @@ -295,7 +295,7 @@ class StorageMergeTree final : public MergeTreeData BackupEntries backupMutations(UInt64 version, const String & data_path_in_backup) const; /// Attaches restored parts to the storage. - void attachRestoredParts(MutableDataPartsVector && parts) override; + void attachRestoredParts(MutableDataPartsVector && parts, const std::optional & zookeeper_retries_info) override; std::unique_ptr getDefaultSettings() const override; diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index feb80e0c9b78..213d6d259ad3 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -11559,14 +11559,15 @@ void StorageReplicatedMergeTree::restoreDataFromBackup(RestorerFromBackup & rest restorePartsFromBackup(restorer, data_path_in_backup, partitions); } -void StorageReplicatedMergeTree::attachRestoredParts(MutableDataPartsVector && parts) +void StorageReplicatedMergeTree::attachRestoredParts( + MutableDataPartsVector && parts, const std::optional & zookeeper_retries_info) { auto component_guard = Coordination::setCurrentComponent("StorageReplicatedMergeTree::attachRestoredParts"); auto metadata_snapshot = getInMemoryMetadataPtr(); auto sink = std::make_shared( /* async_insert */ false, *this, metadata_snapshot, /* quorum */ 0, /* quorum_timeout_ms */ 0, /* max_parts_per_block */ 0, /* quorum_parallel */ false, - /* majority_quorum */ false, getContext(), /* is_attach */ true, /* allow_attach_while_readonly */ false); + /* majority_quorum */ false, getContext(), /* is_attach */ true, /* allow_attach_while_readonly */ false, zookeeper_retries_info); for (auto part : parts) sink->writeExistingPart(part); diff --git a/src/Storages/StorageReplicatedMergeTree.h b/src/Storages/StorageReplicatedMergeTree.h index 030c79ba3636..fa38283fd6cc 100644 --- a/src/Storages/StorageReplicatedMergeTree.h +++ b/src/Storages/StorageReplicatedMergeTree.h @@ -961,7 +961,7 @@ class StorageReplicatedMergeTree final : public MergeTreeData void startBackgroundMovesIfNeeded() override; /// Attaches restored parts to the storage. - void attachRestoredParts(MutableDataPartsVector && parts) override; + void attachRestoredParts(MutableDataPartsVector && parts, const std::optional & zookeeper_retries_info) override; std::unique_ptr getDefaultSettings() const override; diff --git a/tests/queries/0_stateless/04218_restore_replicated_attach_uses_restore_keeper_retries.reference b/tests/queries/0_stateless/04218_restore_replicated_attach_uses_restore_keeper_retries.reference new file mode 100644 index 000000000000..0cfbf08886fc --- /dev/null +++ b/tests/queries/0_stateless/04218_restore_replicated_attach_uses_restore_keeper_retries.reference @@ -0,0 +1 @@ +2 diff --git a/tests/queries/0_stateless/04218_restore_replicated_attach_uses_restore_keeper_retries.sql b/tests/queries/0_stateless/04218_restore_replicated_attach_uses_restore_keeper_retries.sql new file mode 100644 index 000000000000..2699e89a725e --- /dev/null +++ b/tests/queries/0_stateless/04218_restore_replicated_attach_uses_restore_keeper_retries.sql @@ -0,0 +1,45 @@ +-- Tags: no-parallel +-- Uses a global one-shot failpoint in `ReplicatedMergeTreeSink::commitPart`. + +SYSTEM DISABLE FAILPOINT replicated_merge_tree_restore_attach_retry; + +DROP TABLE IF EXISTS src_04218 SYNC; +DROP TABLE IF EXISTS dst_04218_fail SYNC; +DROP TABLE IF EXISTS dst_04218_ok SYNC; + +CREATE TABLE src_04218 (x UInt64) +ENGINE = ReplicatedMergeTree('/clickhouse/tables/{database}/04218_restore_retries/src', 'r1') +ORDER BY x; + +INSERT INTO src_04218 VALUES (1), (2); + +BACKUP TABLE src_04218 TO Memory('04218_restore_retries') FORMAT Null; + +CREATE TABLE dst_04218_fail (x UInt64) +ENGINE = ReplicatedMergeTree('/clickhouse/tables/{database}/04218_restore_retries/dst_fail', 'r1') +ORDER BY x; + +SYSTEM ENABLE FAILPOINT replicated_merge_tree_restore_attach_retry; + +RESTORE TABLE src_04218 AS dst_04218_fail FROM Memory('04218_restore_retries') +SETTINGS allow_different_table_def = 1, backup_restore_keeper_max_retries = 0; -- { serverError TABLE_IS_READ_ONLY } + +DROP TABLE dst_04218_fail SYNC; + +CREATE TABLE dst_04218_ok (x UInt64) +ENGINE = ReplicatedMergeTree('/clickhouse/tables/{database}/04218_restore_retries/dst_ok', 'r1') +ORDER BY x; + +SYSTEM ENABLE FAILPOINT replicated_merge_tree_restore_attach_retry; + +RESTORE TABLE src_04218 AS dst_04218_ok FROM Memory('04218_restore_retries') +SETTINGS allow_different_table_def = 1, + backup_restore_keeper_max_retries = 1, + backup_restore_keeper_retry_initial_backoff_ms = 1, + backup_restore_keeper_retry_max_backoff_ms = 1 +FORMAT Null; + +SELECT count() FROM dst_04218_ok; + +DROP TABLE src_04218 SYNC; +DROP TABLE dst_04218_ok SYNC; From 9b5a0a066a6a35a995d38b094ea9a6c89e00649e Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Tue, 19 May 2026 19:50:45 +0000 Subject: [PATCH 33/41] Backport #102064 to 26.3: Fix incorrect results for RIGHT ANY JOIN --- src/Interpreters/HashJoin/HashJoinResult.cpp | 2 +- .../04075_right_any_join_incorrect.reference | 2 + .../04075_right_any_join_incorrect.sql | 43 +++++++++++++++++++ 3 files changed, 46 insertions(+), 1 deletion(-) create mode 100644 tests/queries/0_stateless/04075_right_any_join_incorrect.reference create mode 100644 tests/queries/0_stateless/04075_right_any_join_incorrect.sql diff --git a/src/Interpreters/HashJoin/HashJoinResult.cpp b/src/Interpreters/HashJoin/HashJoinResult.cpp index a5fe394bc9c2..a785e2ae4e39 100644 --- a/src/Interpreters/HashJoin/HashJoinResult.cpp +++ b/src/Interpreters/HashJoin/HashJoinResult.cpp @@ -500,7 +500,7 @@ IJoinResult::JoinResultBlock HashJoinResult::next() /// Copy data from the original columns to preserve columns size in the block. rhs_columns.reserve(columns.size()); for (auto & column : columns) - rhs_columns.push_back(column->cut(start_row, num_rhs_rows)->assumeMutable()); + rhs_columns.push_back(column->cut(prev_offset, num_rhs_rows)->assumeMutable()); if (is_last) columns.clear(); diff --git a/tests/queries/0_stateless/04075_right_any_join_incorrect.reference b/tests/queries/0_stateless/04075_right_any_join_incorrect.reference new file mode 100644 index 000000000000..0292dbf87eac --- /dev/null +++ b/tests/queries/0_stateless/04075_right_any_join_incorrect.reference @@ -0,0 +1,2 @@ +4 4 PASS +36000 36000 PASS diff --git a/tests/queries/0_stateless/04075_right_any_join_incorrect.sql b/tests/queries/0_stateless/04075_right_any_join_incorrect.sql new file mode 100644 index 000000000000..4589eafec54d --- /dev/null +++ b/tests/queries/0_stateless/04075_right_any_join_incorrect.sql @@ -0,0 +1,43 @@ +DROP TABLE IF EXISTS fact; +DROP TABLE IF EXISTS dim; +DROP TABLE IF EXISTS rt; +DROP TABLE IF EXISTS lk1; +DROP TABLE IF EXISTS lk2; +DROP TABLE IF EXISTS lk3; + +CREATE TABLE fact (id UInt32, grp String, fy String, cc String, x2 String) ENGINE = MergeTree() ORDER BY (grp, fy); +CREATE TABLE dim (grp String, fy String, cc String, dt String, cl String) ENGINE = MergeTree() ORDER BY (grp, fy); +CREATE TABLE rt (jk String, cc String, dt String, cl String) ENGINE = MergeTree() ORDER BY (jk, cc); +CREATE TABLE lk1 (dt String, d String, cl String) ENGINE = MergeTree() ORDER BY dt; +CREATE TABLE lk2 (cc String, d String, cl String) ENGINE = MergeTree() ORDER BY cc; +CREATE TABLE lk3 (x2 String, d String) ENGINE = MergeTree() ORDER BY x2; + +INSERT INTO fact SELECT number+1, toString(intDiv(number,5)+1), '2025', toString((intDiv(number,5)%10)+1), toString((number%20)+1) FROM numbers(36000); + +INSERT INTO dim SELECT toString(number+1), '2025', toString((number%10)+1), toString((number%5)+1), '100' FROM numbers(7200); +INSERT INTO rt SELECT concat(toString(number+1),'2025'), toString((number%10)+1), toString((number%5)+1), '100' FROM numbers(7200); +INSERT INTO lk1 SELECT toString(number+1), 'l1', '100' FROM numbers(5); +INSERT INTO lk2 SELECT toString(number+1), 'l2', '100' FROM numbers(10); +INSERT INTO lk3 SELECT toString(number+1), 'l3' FROM numbers(20); + +SELECT count(), uniqExact(r_id), if(uniqExact(r_id) = 4, 'PASS', 'FAIL') + FROM (SELECT number AS l_key FROM numbers(2)) AS l + RIGHT ANY JOIN (SELECT intDiv(number, 2) AS r_key, number AS r_id FROM numbers(4)) AS r + ON l.l_key = r.r_key +SETTINGS max_joined_block_size_rows = 2, join_algorithm = 'hash'; + +SET query_plan_optimize_join_order_limit = 0; +SET query_plan_join_swap_table = 0; +SET join_algorithm = 'hash'; + +SELECT count(), uniqExact(id), if(uniqExact(id) = 36000, 'PASS', 'FAIL') AS result +FROM rt AS rt +RIGHT ANY JOIN ( + SELECT fact.id AS id, fact.x2 AS x2, dim.cc AS cc, dim.cl AS cl, dim.grp AS grp, dim.fy AS fy + FROM fact AS fact + INNER JOIN dim AS dim ON dim.grp = fact.grp AND dim.fy = fact.fy +) AS fd ON rt.jk = concat(fd.grp, fd.fy) AND rt.cc = fd.cc +LEFT JOIN lk1 AS l1 ON l1.dt = rt.dt AND l1.cl = rt.cl +LEFT JOIN lk2 AS l2 ON l2.cc = fd.cc AND l2.cl = fd.cl +LEFT JOIN lk3 AS l3 ON l3.x2 = fd.x2 +; From 8fc1a97bb233589778559e0a48e5aa734dd4147a Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Tue, 19 May 2026 20:51:31 +0000 Subject: [PATCH 34/41] Backport #102417 to 26.3: Check for stack overflow in Avro reader --- contrib/avro | 2 +- .../Formats/Impl/AvroRowInputFormat.cpp | 18 +++++++++++++++--- .../Formats/Impl/AvroRowInputFormat.h | 6 ++++++ .../Common/AvroForIcebergDeserializer.cpp | 2 +- ...eply_nested_schema_stack_overflow.reference | 0 ...avro_deeply_nested_schema_stack_overflow.sh | 17 +++++++++++++++++ .../data_avro/deeply_nested_schema.avro | 1 + 7 files changed, 41 insertions(+), 5 deletions(-) create mode 100644 tests/queries/0_stateless/04093_avro_deeply_nested_schema_stack_overflow.reference create mode 100755 tests/queries/0_stateless/04093_avro_deeply_nested_schema_stack_overflow.sh create mode 100644 tests/queries/0_stateless/data_avro/deeply_nested_schema.avro diff --git a/contrib/avro b/contrib/avro index b32d5f3c7858..477ed40c656b 160000 --- a/contrib/avro +++ b/contrib/avro @@ -1 +1 @@ -Subproject commit b32d5f3c7858e32e51d3bc905bf5943d1cddca53 +Subproject commit 477ed40c656b1646a093d663668fb76dd0e75e47 diff --git a/src/Processors/Formats/Impl/AvroRowInputFormat.cpp b/src/Processors/Formats/Impl/AvroRowInputFormat.cpp index 2f40576da654..d545b15c9742 100644 --- a/src/Processors/Formats/Impl/AvroRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/AvroRowInputFormat.cpp @@ -3,6 +3,7 @@ #include +#include #include #include @@ -255,6 +256,8 @@ static bool canBeDeserializedFromFixed(const DataTypePtr & target_type, size_t f AvroDeserializer::DeserializeFn AvroDeserializer::createDeserializeFn(const avro::NodePtr & root_node, const DataTypePtr & target_type) { + checkStackSize(); + if (target_type->lowCardinality()) { const auto * lc_type = assert_cast(target_type.get()); @@ -730,6 +733,8 @@ AvroDeserializer::DeserializeFn AvroDeserializer::createDeserializeFn(const avro AvroDeserializer::SkipFn AvroDeserializer::createSkipFn(const avro::NodePtr & root_node) { + checkStackSize(); + switch (root_node->type()) { case avro::AVRO_STRING: @@ -872,6 +877,8 @@ static inline std::string concatPath(const std::string & a, const std::string & AvroDeserializer::Action AvroDeserializer::createAction(const Block & header, const avro::NodePtr & node, const std::string & current_path) { + checkStackSize(); + if (node->type() == avro::AVRO_SYMBOLIC) { /// continue traversal only if some column name starts with current_path @@ -1035,7 +1042,8 @@ AvroRowInputFormat::AvroRowInputFormat(SharedHeader header_, ReadBuffer & in_, P void AvroRowInputFormat::readPrefix() { - file_reader_ptr = std::make_unique(std::make_unique(*in)); + file_reader_ptr = std::make_unique( + std::make_unique(*in), MAX_AVRO_SCHEMA_DEPTH); deserializer_ptr = std::make_unique( output.getHeader(), file_reader_ptr->dataSchema(), format_settings.avro.allow_missing_fields, format_settings.null_as_default, format_settings); file_reader_ptr->init(); @@ -1139,7 +1147,7 @@ class AvroConfluentRowInputFormat::SchemaRegistry auto schema = json_body->getValue("schema"); LOG_TRACE((getLogger("AvroConfluentRowInputFormat")), "Successfully fetched schema id = {}\n{}", id, schema); - return avro::compileJsonSchemaFromString(schema); + return avro::compileJsonSchemaFromString(schema, MAX_AVRO_SCHEMA_DEPTH); } catch (const Exception &) { @@ -1280,7 +1288,8 @@ NamesAndTypesList AvroSchemaReader::readSchema() } else { - auto file_reader_ptr = std::make_unique(std::make_unique(in)); + auto file_reader_ptr = std::make_unique( + std::make_unique(in), MAX_AVRO_SCHEMA_DEPTH); root_node = file_reader_ptr->dataSchema().root(); } @@ -1296,6 +1305,9 @@ NamesAndTypesList AvroSchemaReader::readSchema() DataTypePtr AvroSchemaReader::avroNodeToDataType(avro::NodePtr node) { + checkStackSize(); + + switch (node->type()) { case avro::Type::AVRO_INT: diff --git a/src/Processors/Formats/Impl/AvroRowInputFormat.h b/src/Processors/Formats/Impl/AvroRowInputFormat.h index b9acc525676a..520802802cb6 100644 --- a/src/Processors/Formats/Impl/AvroRowInputFormat.h +++ b/src/Processors/Formats/Impl/AvroRowInputFormat.h @@ -29,6 +29,12 @@ namespace ErrorCodes class Block; +/// Maximum nesting depth for Avro schemas. Passed to the Avro library to +/// prevent stack overflow on deeply nested schemas (e.g. crafted inputs with +/// thousands of nested arrays/records). Real-world schemas rarely exceed 10-20 +/// levels, so 256 is more than enough. +static constexpr size_t MAX_AVRO_SCHEMA_DEPTH = 256; + class AvroInputStreamReadBufferAdapter : public avro::InputStream { public: diff --git a/src/Storages/ObjectStorage/DataLakes/Common/AvroForIcebergDeserializer.cpp b/src/Storages/ObjectStorage/DataLakes/Common/AvroForIcebergDeserializer.cpp index 3c07bbb8ab4f..be74e84205fa 100644 --- a/src/Storages/ObjectStorage/DataLakes/Common/AvroForIcebergDeserializer.cpp +++ b/src/Storages/ObjectStorage/DataLakes/Common/AvroForIcebergDeserializer.cpp @@ -34,7 +34,7 @@ try , manifest_file_path(manifest_file_path_) { auto manifest_file_reader - = std::make_unique(std::make_unique(*buffer)); + = std::make_unique(std::make_unique(*buffer), MAX_AVRO_SCHEMA_DEPTH); avro::NodePtr root_node = manifest_file_reader->dataSchema().root(); auto data_type = AvroSchemaReader::avroNodeToDataType(root_node); diff --git a/tests/queries/0_stateless/04093_avro_deeply_nested_schema_stack_overflow.reference b/tests/queries/0_stateless/04093_avro_deeply_nested_schema_stack_overflow.reference new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/tests/queries/0_stateless/04093_avro_deeply_nested_schema_stack_overflow.sh b/tests/queries/0_stateless/04093_avro_deeply_nested_schema_stack_overflow.sh new file mode 100755 index 000000000000..d06e6ec65418 --- /dev/null +++ b/tests/queries/0_stateless/04093_avro_deeply_nested_schema_stack_overflow.sh @@ -0,0 +1,17 @@ +#!/usr/bin/env bash +# Tags: no-fasttest, long + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +# Use a pre-generated deeply nested Avro file (2000 levels of nested arrays) +# that would cause stack overflow without proper recursion depth checks. +AVRO_FILE="${CURDIR}/data_avro/deeply_nested_schema.avro" + +# Should not crash (SIGSEGV). May succeed or fail with TOO_DEEP_RECURSION depending on build type. +${CLICKHOUSE_LOCAL} -q "SELECT * FROM file('${AVRO_FILE}', 'Avro') FORMAT Null" > /dev/null 2>&1 +[ $? -lt 128 ] || echo "CRASHED" + +${CLICKHOUSE_LOCAL} -q "DESC file('${AVRO_FILE}', 'Avro')" > /dev/null 2>&1 +[ $? -lt 128 ] || echo "CRASHED" diff --git a/tests/queries/0_stateless/data_avro/deeply_nested_schema.avro b/tests/queries/0_stateless/data_avro/deeply_nested_schema.avro new file mode 100644 index 000000000000..51af4042ced9 --- /dev/null +++ b/tests/queries/0_stateless/data_avro/deeply_nested_schema.avro @@ -0,0 +1 @@ +Objavro.codecnullavro.schema{"type": "record", "name": "test", "fields": [{"type": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": {"type": "array", "items": "int"}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}, "name": "nested"}]}VD1>TVD1 \ No newline at end of file From e6bc9b7d24d6bc4da10b74fff6e61bbb3a66edcd Mon Sep 17 00:00:00 2001 From: avogar Date: Wed, 20 May 2026 14:02:57 +0000 Subject: [PATCH 35/41] Fix `contrib/avro` submodule pointer for stack overflow protection The cherry-pick of #102417 resolved the avro submodule conflict to the wrong commit (477ed40c656b, old API) instead of 3b5d52bdf9f9 (which adds the `max_depth` parameter to `DataFileReaderBase` and `compileJsonSchemaFromString`). Co-Authored-By: Claude Opus 4.6 --- contrib/avro | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/contrib/avro b/contrib/avro index 477ed40c656b..3b5d52bdf9f9 160000 --- a/contrib/avro +++ b/contrib/avro @@ -1 +1 @@ -Subproject commit 477ed40c656b1646a093d663668fb76dd0e75e47 +Subproject commit 3b5d52bdf9f9f66121b663741f4270b33c91c1a6 From 26e346585fd35e6eb04a8c02e2a76d00a3db0682 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Thu, 21 May 2026 18:53:42 +0000 Subject: [PATCH 36/41] Backport #104881 to 26.3: Use explicit flag for secondary on cluster queries --- src/Backups/BackupsWorker.cpp | 4 ++-- src/Databases/DatabaseReplicated.cpp | 6 +++--- src/Interpreters/ClientInfo.h | 2 +- src/Interpreters/Context.cpp | 1 + src/Interpreters/Context.h | 9 +++++++++ src/Interpreters/DDLTask.cpp | 4 ++-- src/Interpreters/InterpreterCreateQuery.cpp | 12 ++++++------ src/Interpreters/InterpreterDropQuery.cpp | 4 ++-- src/Interpreters/InterpreterSystemQuery.cpp | 2 +- src/Storages/Kafka/StorageKafkaUtils.cpp | 4 ++-- src/Storages/MergeTree/registerStorageMergeTree.cpp | 2 +- .../ObjectStorageQueueMetadata.cpp | 2 +- .../ObjectStorageQueue/registerQueueStorage.cpp | 4 ++-- src/Storages/TableZnodeInfo.cpp | 4 ++-- 14 files changed, 35 insertions(+), 25 deletions(-) diff --git a/src/Backups/BackupsWorker.cpp b/src/Backups/BackupsWorker.cpp index 641eb67a5c07..384931961568 100644 --- a/src/Backups/BackupsWorker.cpp +++ b/src/Backups/BackupsWorker.cpp @@ -399,7 +399,7 @@ struct BackupsWorker::BackupStarter /// The "internal" option can only be used by a query that was initiated by another query (e.g., ON CLUSTER query). /// It should not be allowed for an initial query explicitly specified by a user. - if (is_internal_backup && (query_context->getClientInfo().query_kind == ClientInfo::QueryKind::INITIAL_QUERY)) + if (is_internal_backup && !query_context->isDDLOrOnClusterInternal()) throw Exception(ErrorCodes::ACCESS_DENIED, "Setting 'internal' cannot be set explicitly"); on_cluster = !backup_query->cluster.empty() || is_internal_backup; @@ -875,7 +875,7 @@ struct BackupsWorker::RestoreStarter /// The "internal" option can only be used by a query that was initiated by another query (e.g., ON CLUSTER query). /// It should not be allowed for an initial query explicitly specified by a user. - if (is_internal_restore && (query_context->getClientInfo().query_kind == ClientInfo::QueryKind::INITIAL_QUERY)) + if (is_internal_restore && !query_context->isDDLOrOnClusterInternal()) throw Exception(ErrorCodes::ACCESS_DENIED, "Setting 'internal' cannot be set explicitly"); /// RESTORE is a write operation, it should be forbidden in strict readonly mode (readonly=1). diff --git a/src/Databases/DatabaseReplicated.cpp b/src/Databases/DatabaseReplicated.cpp index 7598a4e414ef..70138c7c1237 100644 --- a/src/Databases/DatabaseReplicated.cpp +++ b/src/Databases/DatabaseReplicated.cpp @@ -1392,7 +1392,7 @@ BlockIO DatabaseReplicated::tryEnqueueReplicatedDDL(const ASTPtr & query, Contex host_fqdn_id = ddl_worker->getCommonHostID(); } - if (!flags.internal && (query_context->getClientInfo().query_kind != ClientInfo::QueryKind::INITIAL_QUERY)) + if (!flags.internal && query_context->isDDLOrOnClusterInternal()) throw Exception(ErrorCodes::INCORRECT_QUERY, "It's not initial query. ON CLUSTER is not allowed for Replicated database."); checkQueryValid(query, query_context); @@ -1546,7 +1546,7 @@ void DatabaseReplicated::recoverLostReplica(const ZooKeeperPtr & current_zookeep { auto query_context = Context::createCopy(getContext()); query_context->makeQueryContext(); - query_context->setQueryKind(ClientInfo::QueryKind::SECONDARY_QUERY); + query_context->setDDLOrOnClusterInternal(true); query_context->setQueryKindReplicatedDatabaseInternal(); query_context->setCurrentDatabase(getDatabaseName()); query_context->setCurrentQueryId({}); @@ -2618,7 +2618,7 @@ void registerDatabaseReplicated(DatabaseFactory & factory) info.expand_for_database = true; info.table_id.database_name = args.database_name; - const bool is_on_cluster = args.context->getClientInfo().query_kind == ClientInfo::QueryKind::SECONDARY_QUERY; + const bool is_on_cluster = args.context->isDDLOrOnClusterInternal(); /// Allow implicit {uuid} macros only for zookeeper_path in ON CLUSTER queries /// and if UUID was explicitly passed in CREATE DATABASE (like for ATTACH) bool allow_uuid_macro = is_on_cluster || args.create_query.attach || args.create_query.has_uuid; diff --git a/src/Interpreters/ClientInfo.h b/src/Interpreters/ClientInfo.h index a5702a1fa332..02b8c7734de0 100644 --- a/src/Interpreters/ClientInfo.h +++ b/src/Interpreters/ClientInfo.h @@ -53,7 +53,7 @@ class ClientInfo { NO_QUERY = 0, /// Uninitialized object. INITIAL_QUERY = 1, - SECONDARY_QUERY = 2, /// Query that was initiated by another query for distributed or ON CLUSTER query execution. + SECONDARY_QUERY = 2, /// Query that was initiated by another query for distributed query execution. }; ClientInfo(); diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp index a9b1a948f2ea..837b2b385a55 100644 --- a/src/Interpreters/Context.cpp +++ b/src/Interpreters/Context.cpp @@ -1239,6 +1239,7 @@ ContextData::ContextData(const ContextData &o) : buffer_context(o.buffer_context), is_internal_query(o.is_internal_query), is_background_operation(o.is_background_operation), + is_ddl_or_on_cluster_internal(o.is_ddl_or_on_cluster_internal), temp_data_on_disk(o.temp_data_on_disk), classifier(o.classifier), prepared_sets_cache(o.prepared_sets_cache), diff --git a/src/Interpreters/Context.h b/src/Interpreters/Context.h index 9d24b4e9f187..c2758f2b7cff 100644 --- a/src/Interpreters/Context.h +++ b/src/Interpreters/Context.h @@ -562,6 +562,11 @@ class ContextData bool is_internal_query = false; /// A flag, used to detect sub-operations of background operations - in this case we won't need to build another background contexts bool is_background_operation = false; + /// Set for queries created internally by the server for DDL replication (ON CLUSTER, DatabaseReplicated) + /// and internal backup coordination. + /// Unlike query_kind == SECONDARY_QUERY (which comes from the client and can be spoofed), + /// this flag can only be set server-side and is safe to use for security-sensitive checks. + bool is_ddl_or_on_cluster_internal = false; inline static ContextPtr global_context_instance; inline static ContextPtr background_context_instance; /// Global holder to maintain ownership of background_context @@ -1598,6 +1603,10 @@ class Context: public ContextData, public std::enable_shared_from_this bool isInternalQuery() const { return is_internal_query; } void setInternalQuery(bool internal) { is_internal_query = internal; } + bool isDDLOrOnClusterInternal() const { return is_ddl_or_on_cluster_internal; } + void setDDLOrOnClusterInternal(bool value) { is_ddl_or_on_cluster_internal = value; } + + ActionLocksManagerPtr getActionLocksManager() const; enum class ApplicationType : uint8_t diff --git a/src/Interpreters/DDLTask.cpp b/src/Interpreters/DDLTask.cpp index 6b613e140825..a559e9f0ca91 100644 --- a/src/Interpreters/DDLTask.cpp +++ b/src/Interpreters/DDLTask.cpp @@ -291,7 +291,7 @@ ContextMutablePtr DDLTaskBase::makeQueryContext(ContextPtr from_context, const Z auto query_context = Context::createCopy(from_context); query_context->makeQueryContext(); query_context->setCurrentQueryId(""); // generate random query_id - query_context->setQueryKind(ClientInfo::QueryKind::SECONDARY_QUERY); + query_context->setDDLOrOnClusterInternal(true); const bool preserve_user = from_context->getServerSettings()[ServerSetting::distributed_ddl_use_initial_user_and_roles]; if (preserve_user && !entry.initiator_user.empty()) @@ -662,7 +662,7 @@ void DatabaseReplicatedTask::parseQueryFromEntry(ContextPtr context) ContextMutablePtr DatabaseReplicatedTask::makeQueryContext(ContextPtr from_context, const ZooKeeperPtr & zookeeper) { auto query_context = DDLTaskBase::makeQueryContext(from_context, zookeeper); - query_context->setQueryKind(ClientInfo::QueryKind::SECONDARY_QUERY); + query_context->setDDLOrOnClusterInternal(true); query_context->setQueryKindReplicatedDatabaseInternal(); query_context->setCurrentDatabase(database->getDatabaseName()); diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp index 8cffff74a894..427f2a2cfbcb 100644 --- a/src/Interpreters/InterpreterCreateQuery.cpp +++ b/src/Interpreters/InterpreterCreateQuery.cpp @@ -294,7 +294,7 @@ BlockIO InterpreterCreateQuery::createDatabase(ASTCreateQuery & create) } else { - bool is_on_cluster = getContext()->getClientInfo().query_kind == ClientInfo::QueryKind::SECONDARY_QUERY; + bool is_on_cluster = getContext()->isDDLOrOnClusterInternal(); if (create.uuid != UUIDHelpers::Nil && !is_on_cluster && !internal) throw Exception(ErrorCodes::INCORRECT_QUERY, "Ordinary database engine does not support UUID"); @@ -1399,7 +1399,7 @@ void InterpreterCreateQuery::assertOrSetUUID(ASTCreateQuery & create, const Data const auto * kind_upper = create.is_dictionary ? "DICTIONARY" : "TABLE"; bool is_replicated_database_internal = database->getEngineName() == "Replicated" && getContext()->getClientInfo().is_replicated_database_internal; bool from_path = create.has_attach_from_path; - bool is_on_cluster = getContext()->getClientInfo().query_kind == ClientInfo::QueryKind::SECONDARY_QUERY; + bool is_on_cluster = getContext()->isDDLOrOnClusterInternal(); if (database->getEngineName() == "Replicated" && create.uuid != UUIDHelpers::Nil && !is_replicated_database_internal && !internal && !is_on_cluster && !create.attach) { @@ -1613,7 +1613,7 @@ BlockIO InterpreterCreateQuery::createTable(ASTCreateQuery & create) fs::path user_files = fs::path(getContext()->getUserFilesPath()).lexically_normal(); fs::path root_path = fs::path(getContext()->getPath()).lexically_normal(); - if (getContext()->getClientInfo().query_kind == ClientInfo::QueryKind::INITIAL_QUERY) + if (!getContext()->isDDLOrOnClusterInternal()) { fs::path data_path = fs::path(create.attach_from_path).lexically_normal(); if (data_path.is_relative()) @@ -1633,7 +1633,7 @@ BlockIO InterpreterCreateQuery::createTable(ASTCreateQuery & create) "Data directory {} must be inside {} to attach it", String(data_path), String(user_files)); } } - else if (create.attach && !create.attach_short_syntax && getContext()->getClientInfo().query_kind != ClientInfo::QueryKind::SECONDARY_QUERY) + else if (create.attach && !create.attach_short_syntax && !getContext()->isDDLOrOnClusterInternal()) { auto log = getLogger("InterpreterCreateQuery"); LOG_WARNING(log, "ATTACH TABLE query with full table definition is not recommended: " @@ -2011,9 +2011,9 @@ bool InterpreterCreateQuery::doCreateTable(ASTCreateQuery & create, /// Before actually creating the table, check if it will lead to cyclic dependencies. checkTableCanBeAddedWithNoCyclicDependencies(create, query_ptr, getContext()); - /// Initial queries in Replicated database at this point have query_kind = ClientInfo::QueryKind::SECONNDARY_QUERY, + /// Initial queries in Replicated database at this point have is_ddl_or_on_cluster_internal = true, /// so we need to check whether the query is initial through getZooKeeperMetadataTransaction()->isInitialQuery() - bool is_initial_query = getContext()->getClientInfo().query_kind == ClientInfo::QueryKind::INITIAL_QUERY || + bool is_initial_query = !getContext()->isDDLOrOnClusterInternal() || (getContext()->getZooKeeperMetadataTransaction() && getContext()->getZooKeeperMetadataTransaction()->isInitialQuery()); bool is_predefined_database = DatabaseCatalog::isPredefinedDatabase(create.getDatabase()); if (!internal && is_initial_query && !is_predefined_database) diff --git a/src/Interpreters/InterpreterDropQuery.cpp b/src/Interpreters/InterpreterDropQuery.cpp index 3773725b7749..2e829ec8dc72 100644 --- a/src/Interpreters/InterpreterDropQuery.cpp +++ b/src/Interpreters/InterpreterDropQuery.cpp @@ -198,7 +198,7 @@ BlockIO InterpreterDropQuery::executeToTableImpl(const ContextPtr & context_, AS "Table {} is not a Dictionary", table_id.getNameForLogs()); - bool secondary_query = getContext()->getClientInfo().query_kind == ClientInfo::QueryKind::SECONDARY_QUERY; + bool secondary_query = getContext()->isDDLOrOnClusterInternal(); if (!secondary_query && settings[Setting::ignore_drop_queries_probability] != 0 && ast_drop_query.kind == ASTDropQuery::Kind::Drop && std::uniform_real_distribution<>(0.0, 1.0)(thread_local_rng) <= settings[Setting::ignore_drop_queries_probability]) { @@ -828,7 +828,7 @@ void InterpreterDropQuery::executeDropQuery(ASTDropQuery::Kind kind, ContextPtr if (ignore_sync_setting) drop_context->setSetting("database_atomic_wait_for_drop_and_detach_synchronously", false); - drop_context->setQueryKind(ClientInfo::QueryKind::SECONDARY_QUERY); + drop_context->setDDLOrOnClusterInternal(true); if (auto txn = current_context->getZooKeeperMetadataTransaction()) { /// For Replicated database diff --git a/src/Interpreters/InterpreterSystemQuery.cpp b/src/Interpreters/InterpreterSystemQuery.cpp index 8602cf1c36be..8fb576f38bad 100644 --- a/src/Interpreters/InterpreterSystemQuery.cpp +++ b/src/Interpreters/InterpreterSystemQuery.cpp @@ -1597,7 +1597,7 @@ DatabasePtr InterpreterSystemQuery::restoreDatabaseFromKeeperPath( { auto query_context = Context::createCopy(getContext()); query_context->makeQueryContext(); - query_context->setQueryKind(ClientInfo::QueryKind::SECONDARY_QUERY); + query_context->setDDLOrOnClusterInternal(true); query_context->setCurrentDatabase(restoring_database_name); query_context->setCurrentQueryId(""); diff --git a/src/Storages/Kafka/StorageKafkaUtils.cpp b/src/Storages/Kafka/StorageKafkaUtils.cpp index cb7e5bb85c27..79bea6964321 100644 --- a/src/Storages/Kafka/StorageKafkaUtils.cpp +++ b/src/Storages/Kafka/StorageKafkaUtils.cpp @@ -263,8 +263,8 @@ void registerStorageKafka(StorageFactory & factory) ErrorCodes::BAD_ARGUMENTS, "To store committed offsets in Keeper both kafka_keeper_path and kafka_replica_name must be specified"); - const auto is_on_cluster = args.getLocalContext()->getClientInfo().query_kind == ClientInfo::QueryKind::SECONDARY_QUERY; - const auto is_replicated_database = args.getLocalContext()->getClientInfo().query_kind == ClientInfo::QueryKind::SECONDARY_QUERY + const auto is_on_cluster = args.getLocalContext()->isDDLOrOnClusterInternal(); + const auto is_replicated_database = args.getLocalContext()->isDDLOrOnClusterInternal() && DatabaseCatalog::instance().getDatabase(args.table_id.database_name)->getEngineName() == "Replicated"; // UUID macro is only allowed: diff --git a/src/Storages/MergeTree/registerStorageMergeTree.cpp b/src/Storages/MergeTree/registerStorageMergeTree.cpp index e63605989cde..127ad0dc9420 100644 --- a/src/Storages/MergeTree/registerStorageMergeTree.cpp +++ b/src/Storages/MergeTree/registerStorageMergeTree.cpp @@ -248,7 +248,7 @@ static TableZnodeInfo extractZooKeeperPathAndReplicaNameFromEngineArgs( if (has_valid_arguments) { - bool is_replicated_database = local_context->getClientInfo().query_kind == ClientInfo::QueryKind::SECONDARY_QUERY && + bool is_replicated_database = local_context->isDDLOrOnClusterInternal() && DatabaseCatalog::instance().getDatabase(table_id.database_name)->getEngineName() == "Replicated"; /// Get path and name from engine arguments diff --git a/src/Storages/ObjectStorageQueue/ObjectStorageQueueMetadata.cpp b/src/Storages/ObjectStorageQueue/ObjectStorageQueueMetadata.cpp index 87b109d4db60..b440b9ad03a3 100644 --- a/src/Storages/ObjectStorageQueue/ObjectStorageQueueMetadata.cpp +++ b/src/Storages/ObjectStorageQueue/ObjectStorageQueueMetadata.cpp @@ -329,7 +329,7 @@ ObjectStorageQueueMetadata::tryAcquireBucket(const Bucket & bucket) void ObjectStorageQueueMetadata::alterSettings(const SettingsChanges & changes, const ContextPtr & context) { - bool is_initial_query = context->getClientInfo().query_kind == ClientInfo::QueryKind::INITIAL_QUERY || + bool is_initial_query = !context->isDDLOrOnClusterInternal() || (context->getZooKeeperMetadataTransaction() && context->getZooKeeperMetadataTransaction()->isInitialQuery()); const fs::path alter_settings_lock_path = zookeeper_path / "alter_settings_lock"; diff --git a/src/Storages/ObjectStorageQueue/registerQueueStorage.cpp b/src/Storages/ObjectStorageQueue/registerQueueStorage.cpp index eba3d81a0e3d..27f3c6004559 100644 --- a/src/Storages/ObjectStorageQueue/registerQueueStorage.cpp +++ b/src/Storages/ObjectStorageQueue/registerQueueStorage.cpp @@ -64,8 +64,8 @@ StoragePtr createQueueStorage(const StorageFactory::Arguments & args) auto database = DatabaseCatalog::instance().tryGetDatabase(args.table_id.database_name); const String database_engine = database ? database->getEngineName() : ""; - bool is_on_cluster = args.getLocalContext()->getClientInfo().query_kind == ClientInfo::QueryKind::SECONDARY_QUERY; - bool is_replicated_database = args.getLocalContext()->getClientInfo().query_kind == ClientInfo::QueryKind::SECONDARY_QUERY && + bool is_on_cluster = args.getLocalContext()->isDDLOrOnClusterInternal(); + bool is_replicated_database = args.getLocalContext()->isDDLOrOnClusterInternal() && database_engine == "Replicated"; /// Allow implicit {uuid} macros only for keeper_path in ON CLUSTER queries diff --git a/src/Storages/TableZnodeInfo.cpp b/src/Storages/TableZnodeInfo.cpp index b829b25dfeb3..7eb07a637d95 100644 --- a/src/Storages/TableZnodeInfo.cpp +++ b/src/Storages/TableZnodeInfo.cpp @@ -19,8 +19,8 @@ namespace ErrorCodes TableZnodeInfo TableZnodeInfo::resolve(const String & requested_path, const String & requested_replica_name, const StorageID & table_id, const ASTCreateQuery & query, LoadingStrictnessLevel mode, const ContextPtr & context) { - bool is_on_cluster = context->getClientInfo().query_kind == ClientInfo::QueryKind::SECONDARY_QUERY; - bool is_replicated_database = context->getClientInfo().query_kind == ClientInfo::QueryKind::SECONDARY_QUERY && + bool is_on_cluster = context->isDDLOrOnClusterInternal(); + bool is_replicated_database = context->isDDLOrOnClusterInternal() && DatabaseCatalog::instance().getDatabase(table_id.database_name)->getEngineName() == "Replicated"; /// Allow implicit {uuid} macros only for zookeeper_path in ON CLUSTER queries From 705eb74d8aa7ed2b397c7b5aad2fdf1f07433b0c Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Thu, 21 May 2026 22:42:31 +0000 Subject: [PATCH 37/41] Backport #105492 to 26.3: Skip test_numbers_check on release branches --- ci/jobs/check_style.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/ci/jobs/check_style.py b/ci/jobs/check_style.py index f27bf263b0b4..26bb13f6f762 100644 --- a/ci/jobs/check_style.py +++ b/ci/jobs/check_style.py @@ -6,6 +6,7 @@ from concurrent.futures import ProcessPoolExecutor from pathlib import Path +from praktika.info import Info from praktika.result import Result from praktika.utils import Shell, Utils @@ -573,7 +574,13 @@ def parse_args(): ) ) testname = "test_numbers_check" - if testpattern.lower() in testname.lower(): + # Skip on release branches and backport PRs: backports cherry-pick a small + # subset of test files, which legitimately leaves large gaps in the numbering. + info = Info() + release_branch_re = re.compile(r"^\d{2}\.\d+$") + branch_to_check = (info.base_branch or info.git_branch or "").removeprefix("release/") + is_release_branch = bool(release_branch_re.match(branch_to_check)) + if testpattern.lower() in testname.lower() and not is_release_branch: results.append( Result.from_commands_run( name=testname, From 3a04d41a5eda1eac37a152aae87a903b137eecc7 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Fri, 22 May 2026 01:10:06 +0000 Subject: [PATCH 38/41] Backport #103148 to 26.3: Fix backward compatibility break in sendProgress for INSERT queries --- src/Core/ProtocolDefines.h | 4 +++- src/Server/TCPHandler.cpp | 4 ++-- tests/integration/test_old_versions/test.py | 2 +- tests/queries/0_stateless/02010_lc_native.python | 3 --- 4 files changed, 6 insertions(+), 7 deletions(-) diff --git a/src/Core/ProtocolDefines.h b/src/Core/ProtocolDefines.h index 45e6463a4225..2846cf098815 100644 --- a/src/Core/ProtocolDefines.h +++ b/src/Core/ProtocolDefines.h @@ -125,6 +125,8 @@ static constexpr auto DBMS_MIN_REVISION_WITH_REPLICATED_SERIALIZATION = 54482; static constexpr auto DBMS_MIN_REVISION_WITH_NULLABLE_SPARSE_SERIALIZATION = 54483; +static constexpr auto DBMS_MIN_PROTOCOL_VERSION_WITH_PROGRESS_IN_ASYNC_INSERT = 54484; + /// Version of ClickHouse TCP protocol. /// @@ -133,5 +135,5 @@ static constexpr auto DBMS_MIN_REVISION_WITH_NULLABLE_SPARSE_SERIALIZATION = 544 /// NOTE: DBMS_TCP_PROTOCOL_VERSION has nothing common with VERSION_REVISION, /// later is just a number for server version (one number instead of commit SHA) /// for simplicity (sometimes it may be more convenient in some use cases). -static constexpr auto DBMS_TCP_PROTOCOL_VERSION = 54483; +static constexpr auto DBMS_TCP_PROTOCOL_VERSION = 54484; } diff --git a/src/Server/TCPHandler.cpp b/src/Server/TCPHandler.cpp index f024bd1780d7..8240427cc89d 100644 --- a/src/Server/TCPHandler.cpp +++ b/src/Server/TCPHandler.cpp @@ -1395,7 +1395,8 @@ void TCPHandler::processInsertQuery(QueryState & state) { std::lock_guard lock(*callback_mutex); - sendProgress(state); + if (client_tcp_protocol_version >= DBMS_MIN_PROTOCOL_VERSION_WITH_PROGRESS_IN_ASYNC_INSERT) + sendProgress(state); sendInsertProfileEvents(state); } return; @@ -1419,7 +1420,6 @@ void TCPHandler::processInsertQuery(QueryState & state) } std::lock_guard lock(*callback_mutex); - sendProgress(state); sendInsertProfileEvents(state); } diff --git a/tests/integration/test_old_versions/test.py b/tests/integration/test_old_versions/test.py index e2ad4fc7c649..4769014adab4 100644 --- a/tests/integration/test_old_versions/test.py +++ b/tests/integration/test_old_versions/test.py @@ -7,7 +7,7 @@ node_oldest = cluster.add_instance( "node_oldest", image="clickhouse/clickhouse-server", - tag="25.12", + tag=CLICKHOUSE_CI_MIN_TESTED_VERSION, with_installed_binary=True, main_configs=["configs/config.d/test_cluster.xml"], ) diff --git a/tests/queries/0_stateless/02010_lc_native.python b/tests/queries/0_stateless/02010_lc_native.python index 5e9bcb81097a..589ea81e1cdf 100755 --- a/tests/queries/0_stateless/02010_lc_native.python +++ b/tests/queries/0_stateless/02010_lc_native.python @@ -254,9 +254,6 @@ def insertValidLowCardinalityRow(): # Fin block sendEmptyBlock(s) - assertPacket(readVarUInt(s), 3) # Progress - readProgress(s) - assertPacket(readVarUInt(s), 5) # End of stream s.close() From 207874d51203e4173401cf22d8f4aef5f6eee83e Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Fri, 22 May 2026 09:23:29 +0000 Subject: [PATCH 39/41] Backport #105449 to 26.3: Fix heap-buffer-overflow and null-pointer dereference in Arrow geo/string column readers --- .../Formats/Impl/ArrowColumnToCHColumn.cpp | 79 +++++++++- .../04259_arrow_corrupted_offsets.reference | 4 + .../04259_arrow_corrupted_offsets.sh | 139 ++++++++++++++++++ ...4260_arrow_geo_corrupted_offsets.reference | 3 + .../04260_arrow_geo_corrupted_offsets.sh | 91 ++++++++++++ 5 files changed, 309 insertions(+), 7 deletions(-) create mode 100644 tests/queries/0_stateless/04259_arrow_corrupted_offsets.reference create mode 100755 tests/queries/0_stateless/04259_arrow_corrupted_offsets.sh create mode 100644 tests/queries/0_stateless/04260_arrow_geo_corrupted_offsets.reference create mode 100755 tests/queries/0_stateless/04260_arrow_geo_corrupted_offsets.sh diff --git a/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp b/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp index 1ed34febcd15..8d8e58b89a9e 100644 --- a/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp +++ b/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp @@ -119,17 +119,61 @@ static ColumnWithTypeAndName readColumnWithStringData(const std::shared_ptr & column_chars = assert_cast(*internal_column).getChars(); PaddedPODArray & column_offsets = assert_cast(*internal_column).getOffsets(); + /// Pre-flight: validate every non-null row's offset+length against the actual buffer and + /// accumulate an exact chars_size. This prevents both the source read overflow and the + /// destination write overflow via insert_assume_reserved. + /// + /// Using buffer->size() as the bound (rather than value_offset(N-1)+value_length(N-1)) + /// is deliberate: corrupted offsets can make intermediate value_length(i) entries much + /// larger than the buffer while keeping the last entry's value look correct. Counting + /// per-row also handles non-monotonic (rewinding) offsets at null rows, which would let + /// later non-null rows re-read the same range and exceed a buffer-size-based reserve. + /// + /// Casting a negative signed offset/length to size_t wraps to a huge value, so the + /// two-part check (safe_offset > buf || safe_length > buf - safe_offset) catches negative + /// values without a separate signed test. size_t chars_size = 0; for (int chunk_i = 0, num_chunks = arrow_column->num_chunks(); chunk_i < num_chunks; ++chunk_i) { ArrowArray & chunk = dynamic_cast(*(arrow_column->chunk(chunk_i))); + std::shared_ptr buffer = chunk.value_data(); const size_t chunk_length = chunk.length(); + const size_t buffer_size = buffer ? static_cast(buffer->size()) : 0; - if (chunk_length > 0) + if (chunk.null_count() == 0) { - chars_size += chunk.value_offset(chunk_length - 1) + chunk.value_length(chunk_length - 1); - chars_size += chunk_length; + for (size_t i = 0; i < chunk_length; ++i) + { + const size_t safe_offset = static_cast(chunk.value_offset(i)); + const size_t safe_length = static_cast(chunk.value_length(i)); + if (unlikely(safe_offset > buffer_size || safe_length > buffer_size - safe_offset)) + throw Exception( + ErrorCodes::INCORRECT_DATA, + "Arrow BinaryArray offsets exceed data buffer bounds: " + "row {} has offset {} and length {} but buffer is {} bytes", + i, chunk.value_offset(i), chunk.value_length(i), buffer_size); + chars_size += safe_length; + } } + else + { + for (size_t i = 0; i < chunk_length; ++i) + { + if (!chunk.IsNull(i)) + { + const size_t safe_offset = static_cast(chunk.value_offset(i)); + const size_t safe_length = static_cast(chunk.value_length(i)); + if (unlikely(safe_offset > buffer_size || safe_length > buffer_size - safe_offset)) + throw Exception( + ErrorCodes::INCORRECT_DATA, + "Arrow BinaryArray offsets exceed data buffer bounds: " + "row {} has offset {} and length {} but buffer is {} bytes", + i, chunk.value_offset(i), chunk.value_length(i), buffer_size); + chars_size += safe_length; + } + } + } + chars_size += chunk_length; // one null terminator per row } column_chars.reserve(chars_size); @@ -146,8 +190,12 @@ static ColumnWithTypeAndName readColumnWithStringData(const std::shared_ptrdata() + chunk.value_offset(offset_i); - column_chars.insert_assume_reserved(raw_data, raw_data + chunk.value_length(offset_i)); + const auto value_length = chunk.value_length(offset_i); + if (value_length > 0) + { + const auto * raw_data = buffer->data() + chunk.value_offset(offset_i); + column_chars.insert_assume_reserved(raw_data, raw_data + value_length); + } column_offsets.emplace_back(column_chars.size()); } } @@ -531,16 +579,33 @@ static ColumnWithTypeAndName readColumnWithGeoData(const std::shared_ptr(*(arrow_column->chunk(chunk_i))); std::shared_ptr buffer = chunk.value_data(); const size_t chunk_length = chunk.length(); + const size_t buffer_size = buffer ? static_cast(buffer->size()) : 0; for (size_t offset_i = 0; offset_i != chunk_length; ++offset_i) { - auto * raw_data = buffer->mutable_data() + chunk.value_offset(offset_i); if (chunk.IsNull(offset_i)) { column->insertDefault(); continue; } - ReadBuffer in_buffer(reinterpret_cast(raw_data), chunk.value_length(offset_i), 0); + + if (!buffer) + throw Exception( + ErrorCodes::INCORRECT_DATA, + "Arrow BinaryArray has no data buffer for non-null geo row {}", + offset_i); + + const size_t safe_offset = static_cast(chunk.value_offset(offset_i)); + const size_t safe_length = static_cast(chunk.value_length(offset_i)); + if (unlikely(safe_offset > buffer_size || safe_length > buffer_size - safe_offset)) + throw Exception( + ErrorCodes::INCORRECT_DATA, + "Arrow BinaryArray offsets exceed data buffer bounds: " + "row {} has offset {} and length {} but buffer is {} bytes", + offset_i, chunk.value_offset(offset_i), chunk.value_length(offset_i), buffer_size); + + const auto * raw_data = buffer->data() + safe_offset; + ReadBuffer in_buffer(const_cast(reinterpret_cast(raw_data)), safe_length, 0); GeometricObject result_object; switch (geo_metadata.encoding) { diff --git a/tests/queries/0_stateless/04259_arrow_corrupted_offsets.reference b/tests/queries/0_stateless/04259_arrow_corrupted_offsets.reference new file mode 100644 index 000000000000..530e25cd51cd --- /dev/null +++ b/tests/queries/0_stateless/04259_arrow_corrupted_offsets.reference @@ -0,0 +1,4 @@ +INCORRECT_DATA +INCORRECT_DATA +INCORRECT_DATA +3 3 diff --git a/tests/queries/0_stateless/04259_arrow_corrupted_offsets.sh b/tests/queries/0_stateless/04259_arrow_corrupted_offsets.sh new file mode 100755 index 000000000000..9f82bf5fc5e7 --- /dev/null +++ b/tests/queries/0_stateless/04259_arrow_corrupted_offsets.sh @@ -0,0 +1,139 @@ +#!/usr/bin/env bash +# Tags: no-fasttest +# Regression test for heap-buffer-overflow in readColumnWithStringData when an +# Arrow file carries corrupted intermediate offsets that make individual +# value_length(i) much larger than the actual values buffer. + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +DATA_FILE="${CLICKHOUSE_TMP}/${CLICKHOUSE_TEST_UNIQUE_NAME}.arrow" +DATA_FILE_STREAM="${CLICKHOUSE_TMP}/${CLICKHOUSE_TEST_UNIQUE_NAME}_stream.arrow" +DATA_FILE_NULLABLE="${CLICKHOUSE_TMP}/${CLICKHOUSE_TEST_UNIQUE_NAME}_nullable.arrow" +trap 'rm -f "$DATA_FILE" "$DATA_FILE_STREAM" "$DATA_FILE_NULLABLE"' EXIT + +# Build a well-formed Arrow file with 4 binary rows of 10 bytes each, +# then corrupt offset[1] from 10 to 0x40000000 (1 GiB). +# A vulnerable build would attempt a 1 GiB memcpy into a ~40-byte heap buffer; +# the fixed build must reject the file with INCORRECT_DATA. +python3 - "$DATA_FILE" <<'EOF' +import struct +import sys +import pyarrow as pa +import pyarrow.ipc as ipc + +path = sys.argv[1] + +arr = pa.array([b'A' * 10, b'B' * 10, b'C' * 10, b'D' * 10], type=pa.binary()) +tbl = pa.Table.from_arrays([arr], names=['x']) +with pa.OSFile(path, 'wb') as f: + w = ipc.new_file(f, tbl.schema) + w.write_table(tbl) + w.close() + +# Patch offsets [0, 10, 20, 30, 40] -> [0, 0x40000000, 20, 30, 40]. +# value_length(0) = offset[1] - offset[0] = 1 GiB; the last entry is unchanged +# so the pre-flight reserve stays small, triggering a write overflow on the old code. +data = bytearray(open(path, 'rb').read()) +needle = struct.pack('= 0, "could not locate offsets array" +data[idx + 4 : idx + 8] = struct.pack('&1 \ + | grep -oF 'INCORRECT_DATA' || echo 'FAIL: expected INCORRECT_DATA' + +# Same payload via ArrowStream format. +python3 - "$DATA_FILE_STREAM" <<'EOF' +import struct +import sys +import pyarrow as pa +import pyarrow.ipc as ipc + +path = sys.argv[1] + +arr = pa.array([b'A' * 10, b'B' * 10, b'C' * 10, b'D' * 10], type=pa.binary()) +tbl = pa.Table.from_arrays([arr], names=['x']) +with pa.OSFile(path, 'wb') as f: + w = ipc.new_stream(f, tbl.schema) + w.write_table(tbl) + w.close() + +data = bytearray(open(path, 'rb').read()) +needle = struct.pack('= 0, "could not locate offsets array" +data[idx + 4 : idx + 8] = struct.pack('&1 \ + | grep -oF 'INCORRECT_DATA' || echo 'FAIL: expected INCORRECT_DATA' + +# Nullable column: corrupt the last offset on a non-null row so its value_length +# becomes 1 GiB. The pre-flight must validate non-null rows in the nullable path. +python3 - "$DATA_FILE_NULLABLE" <<'EOF' +import struct +import sys +import pyarrow as pa +import pyarrow.ipc as ipc + +path = sys.argv[1] + +# 3 rows: non-null / null / non-null. Well-formed offsets: [0, 10, 10, 20]. +arr = pa.array([b'A' * 10, None, b'C' * 10], type=pa.binary()) +tbl = pa.Table.from_arrays([arr], names=['x']) +with pa.OSFile(path, 'wb') as f: + w = ipc.new_file(f, tbl.schema) + w.write_table(tbl) + w.close() + +# Corrupt offsets[3]: 20 -> 0x40000000. +# Row 2 (non-null): value_offset = offsets[2] = 10, value_length = 0x40000000 - 10 = 1 GiB. +# Pre-flight must reject this with INCORRECT_DATA. +data = bytearray(open(path, 'rb').read()) +needle = struct.pack('= 0, "could not locate offsets array" +data[idx + 12 : idx + 16] = struct.pack('&1 \ + | grep -oF 'INCORRECT_DATA' || echo 'FAIL: expected INCORRECT_DATA' + +# Write loop path: non-null empty strings with absent values buffer (IPC body offset=-1). +# This exercises the null_count==0 write path when buffer->data() may be null. +# The fixed build must read all rows as empty strings without crashing. +DATA_FILE_EMPTY="${CLICKHOUSE_TMP}/${CLICKHOUSE_TEST_UNIQUE_NAME}_empty.arrow" +trap 'rm -f "$DATA_FILE" "$DATA_FILE_STREAM" "$DATA_FILE_NULLABLE" "$DATA_FILE_EMPTY"' EXIT +python3 - "$DATA_FILE_EMPTY" <<'EOF' +import struct +import sys +import pyarrow as pa +import pyarrow.ipc as ipc + +path = sys.argv[1] + +arr = pa.array([b'', b'', b''], type=pa.binary()) +tbl = pa.Table.from_arrays([arr], names=['x']) +with pa.OSFile(path, 'wb') as f: + w = ipc.new_file(f, tbl.schema) + w.write_table(tbl) + w.close() + +# Set values buffer entry offset to -1 to mark it as absent in the IPC body. +# The buffer entry for values is {offset: int64, length: int64}; for 3 empty +# strings the values buffer has length=0. Setting offset=-1 signals absence. +data = bytearray(open(path, 'rb').read()) +target = struct.pack('= 0, "could not locate values buffer entry" +data[idx : idx + 8] = struct.pack('&1 diff --git a/tests/queries/0_stateless/04260_arrow_geo_corrupted_offsets.reference b/tests/queries/0_stateless/04260_arrow_geo_corrupted_offsets.reference new file mode 100644 index 000000000000..aadb02b069d8 --- /dev/null +++ b/tests/queries/0_stateless/04260_arrow_geo_corrupted_offsets.reference @@ -0,0 +1,3 @@ +(1,2) +(1,2) +INCORRECT_DATA diff --git a/tests/queries/0_stateless/04260_arrow_geo_corrupted_offsets.sh b/tests/queries/0_stateless/04260_arrow_geo_corrupted_offsets.sh new file mode 100755 index 000000000000..b1cba5812e3c --- /dev/null +++ b/tests/queries/0_stateless/04260_arrow_geo_corrupted_offsets.sh @@ -0,0 +1,91 @@ +#!/usr/bin/env bash +# Tags: no-fasttest +# Regression test for heap-buffer-overflow in readColumnWithGeoData when an +# Arrow file carries corrupted intermediate offsets, and for the crash caused +# by using buffer->mutable_data() (which returns nullptr for read-only IPC +# buffers) instead of buffer->data(). + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +DATA_FILE="${CLICKHOUSE_TMP}/${CLICKHOUSE_TEST_UNIQUE_NAME}_geo.arrow" +DATA_FILE_VALID="${CLICKHOUSE_TMP}/${CLICKHOUSE_TEST_UNIQUE_NAME}_geo_valid.arrow" +trap 'rm -f "$DATA_FILE" "$DATA_FILE_VALID"' EXIT + +# Build a valid Arrow IPC file with WKB Point rows and geo schema metadata. +# Before the fix, buffer->mutable_data() returned nullptr for read-only IPC +# buffers, causing a null-pointer dereference on any geo-tagged Arrow column. +python3 - "$DATA_FILE_VALID" <<'EOF' +import struct +import sys +import json +import pyarrow as pa +import pyarrow.ipc as ipc + +path = sys.argv[1] + +# WKB Point(1.0, 2.0): byte_order=1(LE), type=1(Point), x=1.0, y=2.0 +wkb_point = struct.pack('metadata()), not to the +# IPC footer (file_reader->metadata()), which is what ArrowBlockInputFormat reads. +meta = tbl.schema.metadata or {} +meta[b'geo'] = geo_meta.encode() +tbl = tbl.replace_schema_metadata(meta) + +with pa.OSFile(path, 'wb') as f: + w = ipc.new_file(f, tbl.schema) + w.write_table(tbl) + w.close() +EOF + +$CLICKHOUSE_LOCAL --input_format_parquet_allow_geoparquet_parser=1 \ + --query "SELECT x FROM file('${DATA_FILE_VALID}', 'Arrow')" 2>&1 + +# Build an Arrow IPC file with WKB binary rows, geo schema metadata, and a +# corrupted offset that makes value_length(0) = 1 GiB. +# Before the fix, this caused a heap-buffer-overflow in parseWKBFormat. +# After the fix, it must be rejected with INCORRECT_DATA. +python3 - "$DATA_FILE" <<'EOF' +import struct +import sys +import json +import pyarrow as pa +import pyarrow.ipc as ipc + +path = sys.argv[1] + +wkb_point = struct.pack(' [0, 0x40000000, 42, 63]. +# value_length(0) = offset[1] - offset[0] = 1 GiB; the read would overflow +# the ~63-byte data buffer. +data = bytearray(open(path, 'rb').read()) +needle = struct.pack('= 0, "could not locate offsets array" +data[idx + 4 : idx + 8] = struct.pack('&1 \ + | grep -oF 'INCORRECT_DATA' || echo 'FAIL: expected INCORRECT_DATA' From c13b94c02126af83a5c845126cf711bad7634875 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Fri, 22 May 2026 14:33:27 +0000 Subject: [PATCH 40/41] Fix geoparquet metadata handling in Arrow IPC format Arrow IPC stores schema-level metadata in `schema->metadata()`, not in `file_reader->metadata()` (which returns IPC footer custom metadata). The Arrow reader was passing the wrong metadata to `ArrowColumnToCHColumn`, so the `geo` key was never found and geoparquet parsing never activated for Arrow IPC files. Backport of 14122ed29df7bc630e89c0e3c2e75bcc8f625dfd. Required for test `04260_arrow_geo_corrupted_offsets` (added by #105449) to pass. --- src/Processors/Formats/Impl/ArrowBlockInputFormat.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/Processors/Formats/Impl/ArrowBlockInputFormat.cpp b/src/Processors/Formats/Impl/ArrowBlockInputFormat.cpp index 6cce6530d789..e155c74bbac0 100644 --- a/src/Processors/Formats/Impl/ArrowBlockInputFormat.cpp +++ b/src/Processors/Formats/Impl/ArrowBlockInputFormat.cpp @@ -103,7 +103,8 @@ Chunk ArrowBlockInputFormat::read() /// If defaults_for_omitted_fields is true, calculate the default values from default expression for omitted fields. /// Otherwise fill the missing columns with zero values of its type. BlockMissingValues * block_missing_values_ptr = format_settings.defaults_for_omitted_fields ? &block_missing_values : nullptr; - res = arrow_column_to_ch_column->arrowTableToCHChunk(*table_result, (*table_result)->num_rows(), file_reader ? file_reader->metadata() : nullptr, block_missing_values_ptr); + auto schema_metadata = stream ? stream_reader->schema()->metadata() : file_reader->schema()->metadata(); + res = arrow_column_to_ch_column->arrowTableToCHChunk(*table_result, (*table_result)->num_rows(), schema_metadata, block_missing_values_ptr); /// There is no easy way to get original record batch size from Arrow metadata. /// Let's just use the number of bytes read from read buffer. @@ -259,7 +260,7 @@ NamesAndTypesList ArrowSchemaReader::readSchema() auto header = ArrowColumnToCHColumn::arrowSchemaToCHHeader( *schema, - file_reader ? file_reader->metadata() : nullptr, + schema->metadata(), stream ? "ArrowStream" : "Arrow", format_settings, format_settings.arrow.skip_columns_with_unsupported_types_in_schema_inference, From e44ae6e9587dfebc3dfe814248baa24fe494b0cb Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Fri, 22 May 2026 15:28:42 +0000 Subject: [PATCH 41/41] Update autogenerated version to 26.3.11.36 and contributors --- cmake/autogenerated_versions.txt | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/cmake/autogenerated_versions.txt b/cmake/autogenerated_versions.txt index 022b3cbb03c3..7a74a4b3c27c 100644 --- a/cmake/autogenerated_versions.txt +++ b/cmake/autogenerated_versions.txt @@ -2,11 +2,11 @@ # NOTE: VERSION_REVISION has nothing common with DBMS_TCP_PROTOCOL_VERSION, # only DBMS_TCP_PROTOCOL_VERSION should be incremented on protocol changes. -SET(VERSION_REVISION 54518) +SET(VERSION_REVISION 54519) SET(VERSION_MAJOR 26) SET(VERSION_MINOR 3) -SET(VERSION_PATCH 11) -SET(VERSION_GITHASH e1c11930c28196f954a93287e43c1aa112c8c607) -SET(VERSION_DESCRIBE v26.3.11.1-lts) -SET(VERSION_STRING 26.3.11.1) +SET(VERSION_PATCH 12) +SET(VERSION_GITHASH fa3aa24e79104d29f8bfde078fc586ec6ac3a565) +SET(VERSION_DESCRIBE v26.3.12.1-lts) +SET(VERSION_STRING 26.3.12.1) # end of autochange