From d420a97ecf26638759510abf6e04a58f4c602ddb Mon Sep 17 00:00:00 2001 From: "fix-it-felix-sentry[bot]" <260785270+fix-it-felix-sentry[bot]@users.noreply.github.com> Date: Wed, 18 Feb 2026 16:02:08 -0800 Subject: [PATCH 1/2] fix(admin): prevent SQL injection in system query validation Added sanitization for SQL queries before using them in EXPLAIN statements to prevent SQL injection vulnerabilities during query validation. Changes: - Added _sanitize_query_for_explain() function to validate and sanitize queries before they are used in EXPLAIN QUERY TREE and EXPLAIN AST statements - Checks for unbalanced quotes, multiple statement attempts, and SQL comments - Applied sanitization in is_query_using_only_system_tables() and is_valid_system_query() functions This fix addresses SQL injection vulnerability in: - snuba/admin/clickhouse/system_queries.py:51 The sanitization adds defense-in-depth protection during the validation phase, while maintaining the existing EXPLAIN AST-based validation logic. Fixes: https://linear.app/getsentry/issue/VULN-1029 Fixes: https://linear.app/getsentry/issue/PF-66 Co-Authored-By: Claude Sonnet 4.5 --- snuba/admin/clickhouse/system_queries.py | 59 ++++++++++++++++++++++-- 1 file changed, 56 insertions(+), 3 deletions(-) diff --git a/snuba/admin/clickhouse/system_queries.py b/snuba/admin/clickhouse/system_queries.py index f676ce9a367..dcea6852df5 100644 --- a/snuba/admin/clickhouse/system_queries.py +++ b/snuba/admin/clickhouse/system_queries.py @@ -157,6 +157,52 @@ def _run_sql_query_on_host( ) +def _sanitize_query_for_explain(sql_query: str) -> str: + """ + Sanitizes a SQL query before using it in EXPLAIN statements to prevent SQL injection. + + This function adds defense-in-depth protection by: + 1. Checking for multiple statement attempts (semicolons followed by more SQL) + 2. Checking for comment-based injection attempts + 3. Stripping trailing semicolons (which are optional in ClickHouse) + + Note: The query itself will be executed as-is after validation. This sanitization + is specifically for the EXPLAIN QUERY TREE and EXPLAIN AST validation steps. + """ + # Strip whitespace + query = sql_query.strip() + + # Check for unbalanced quotes which could indicate injection attempts + single_quotes = query.count("'") - query.count("\\'") + double_quotes = query.count('"') - query.count('\\"') + if single_quotes % 2 != 0 or double_quotes % 2 != 0: + raise InvalidCustomQuery("Unbalanced quotes detected in query") + + # Check for multiple statement attempts (semicolon not at the end) + # We need to be careful about semicolons in strings + # Remove strings first to check for actual statement separators + temp_query = re.sub(r"'[^']*'", "", query) + temp_query = re.sub(r'"[^"]*"', "", temp_query) + + # Now check if there's a semicolon followed by more SQL + if ";" in temp_query: + parts = temp_query.split(";") + # Filter out empty/whitespace parts + non_empty_parts = [p.strip() for p in parts if p.strip()] + if len(non_empty_parts) > 1: + raise InvalidCustomQuery("Multiple statements not allowed") + + # Check for comment-based injection attempts + if "--" in temp_query or "/*" in temp_query: + raise InvalidCustomQuery("SQL comments not allowed in queries") + + # Strip trailing semicolon for use in EXPLAIN statements + if query.endswith(";"): + query = query.rstrip(";").strip() + + return query + + def is_query_using_only_system_tables( clickhouse_host: str, clickhouse_port: int, @@ -169,9 +215,12 @@ def is_query_using_only_system_tables( Run the EXPLAIN QUERY TREE on the given sql_query and check that the only tables in the query are system tables. """ - sql_query = sql_query.strip().rstrip(";") if sql_query.endswith(";") else sql_query + # Sanitize the query to prevent SQL injection in the EXPLAIN statement + sanitized_query = _sanitize_query_for_explain(sql_query) + settings_clause = "" if sudo_mode else " SETTINGS allow_experimental_analyzer = 1" - explain_query_tree_query = f"EXPLAIN QUERY TREE {sql_query}{settings_clause}" + # Using sanitized query in EXPLAIN to prevent SQL injection + explain_query_tree_query = f"EXPLAIN QUERY TREE {sanitized_query}{settings_clause}" explain_query_tree_result = _run_sql_query_on_host( clickhouse_host, clickhouse_port, @@ -211,7 +260,11 @@ def is_valid_system_query( """ Validation based on Query Tree and AST to ensure the query is a valid select query. """ - explain_ast_query = f"EXPLAIN AST {sql_query}" + # Sanitize the query to prevent SQL injection in the EXPLAIN statement + sanitized_query = _sanitize_query_for_explain(sql_query) + + # Using sanitized query in EXPLAIN to prevent SQL injection + explain_ast_query = f"EXPLAIN AST {sanitized_query}" disallowed_ast_nodes = ["AlterQuery", "AlterCommand", "DropQuery", "InsertQuery"] explain_ast_result = _run_sql_query_on_host( clickhouse_host, clickhouse_port, storage_name, explain_ast_query, False, clusterless_mode From f7ea570cb820c70236686d81af97855b58157d16 Mon Sep 17 00:00:00 2001 From: Pierre Massat Date: Wed, 18 Feb 2026 17:13:47 -0800 Subject: [PATCH 2/2] fix(tests): Move clusterAllReplicas(default, ...) to valid query tests The unquoted `default` clusterAllReplicas query was incorrectly in the invalid test list. It only appeared to fail before because the old semicolon-stripping logic didn't handle trailing whitespace after the semicolon, causing EXPLAIN to error. The new _sanitize_query_for_explain correctly strips whitespace first, revealing this is a valid query. Co-Authored-By: Claude Opus 4.6 --- tests/admin/test_system_queries.py | 38 +++++++++++++++--------------- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/tests/admin/test_system_queries.py b/tests/admin/test_system_queries.py index 7b4447a45df..951b75d5bb1 100644 --- a/tests/admin/test_system_queries.py +++ b/tests/admin/test_system_queries.py @@ -49,25 +49,6 @@ """, "SELECT hostname(), avg(query_duration_ms) FROM clusterAllReplicas('default', system.query_log) GROUP BY hostname()", "SELECT count() FROM merge('system', '.*settings')", - ], -) -@pytest.mark.clickhouse_db -def test_is_valid_system_query(sql_query: str) -> None: - assert is_valid_system_query( - settings.CLUSTERS[0]["host"], int(settings.CLUSTERS[0]["port"]), "errors", sql_query, False - ) - - -@pytest.mark.parametrize( - "sql_query", - [ - "SHOW TABLES;", # non select statement - "SELECT * FROM my_table;", # not allowed table - "SELECT * from system.metrics" # system table not on allowed list - "with sum(bytes) as s select s from system.parts group by table;", # sorry not allowed WITH - "SELECT 1; SELECT 2;" # no multiple statements - "SELECT * FROM system.clusters c INNER JOIN my_table m ON c.cluster == m.something", # no join - "SELECT * from system.as1", # invalid system table format """SELECT count() as nb_query, user, @@ -91,6 +72,25 @@ def test_is_valid_system_query(sql_query: str) -> None: ], ) @pytest.mark.clickhouse_db +def test_is_valid_system_query(sql_query: str) -> None: + assert is_valid_system_query( + settings.CLUSTERS[0]["host"], int(settings.CLUSTERS[0]["port"]), "errors", sql_query, False + ) + + +@pytest.mark.parametrize( + "sql_query", + [ + "SHOW TABLES;", # non select statement + "SELECT * FROM my_table;", # not allowed table + "SELECT * from system.metrics" # system table not on allowed list + "with sum(bytes) as s select s from system.parts group by table;", # sorry not allowed WITH + "SELECT 1; SELECT 2;" # no multiple statements + "SELECT * FROM system.clusters c INNER JOIN my_table m ON c.cluster == m.something", # no join + "SELECT * from system.as1", # invalid system table format + ], +) +@pytest.mark.clickhouse_db def test_invalid_system_query(sql_query: str) -> None: with pytest.raises(Exception): is_valid_system_query(