diff --git a/snuba/admin/clickhouse/system_queries.py b/snuba/admin/clickhouse/system_queries.py index 4db2123db41..5c5abfae515 100644 --- a/snuba/admin/clickhouse/system_queries.py +++ b/snuba/admin/clickhouse/system_queries.py @@ -168,6 +168,47 @@ def _run_sql_query_on_host( ) +def _sanitize_query_for_explain(sql_query: str) -> str: + """ + Lightweight defense-in-depth check before embedding an admin-supplied query + in an EXPLAIN statement for validation. + + The query is SQL typed by an authenticated admin user and, if it passes the + EXPLAIN AST / EXPLAIN QUERY TREE validation below, is executed as the + read-only user, so this is not the primary security control. It only: + + 1. Strips the trailing semicolon (optional in ClickHouse) so the query can + be embedded in "EXPLAIN ... ". + 2. Rejects obvious statement-chaining (e.g. "SELECT ...; DROP TABLE ..."). + 3. Rejects SQL comments. + + String literals are removed before the chaining/comment scan so that + semicolons, quotes or comment-like sequences inside string values don't + cause false-positive rejections of legitimate queries. + """ + query = sql_query.strip() + + # Trailing semicolons are optional in ClickHouse; drop one so the query can + # be embedded in an EXPLAIN statement. + if query.endswith(";"): + query = query[:-1].strip() + + # Strip string literals (handling '' / \\' / "" / \\" escapes) so that + # quotes, semicolons or comment sequences inside string values are ignored. + without_strings = re.sub(r"'(?:[^'\\]|\\.|'')*'", "", query) + without_strings = re.sub(r'"(?:[^"\\]|\\.|"")*"', "", without_strings) + + # Reject statement chaining outside of string literals. + if ";" in without_strings: + raise InvalidCustomQuery("Multiple statements are not allowed") + + # Reject comment-based injection attempts. + if "--" in without_strings or "/*" in without_strings: + raise InvalidCustomQuery("SQL comments are not allowed in queries") + + return query + + def is_query_using_only_system_tables( clickhouse_host: str, clickhouse_port: int, @@ -180,9 +221,12 @@ def is_query_using_only_system_tables( Run the EXPLAIN QUERY TREE on the given sql_query and check that the only tables in the query are system tables. """ - sql_query = sql_query.strip().rstrip(";") if sql_query.endswith(";") else sql_query + # Sanitize the query to prevent SQL injection in the EXPLAIN statement + sanitized_query = _sanitize_query_for_explain(sql_query) + settings_clause = "" if sudo_mode else " SETTINGS allow_experimental_analyzer = 1" - explain_query_tree_query = f"EXPLAIN QUERY TREE {sql_query}{settings_clause}" + # Using sanitized query in EXPLAIN to prevent SQL injection + explain_query_tree_query = f"EXPLAIN QUERY TREE {sanitized_query}{settings_clause}" explain_query_tree_result = _run_sql_query_on_host( clickhouse_host, clickhouse_port, @@ -222,7 +266,11 @@ def is_valid_system_query( """ Validation based on Query Tree and AST to ensure the query is a valid select query. """ - explain_ast_query = f"EXPLAIN AST {sql_query}" + # Sanitize the query to prevent SQL injection in the EXPLAIN statement + sanitized_query = _sanitize_query_for_explain(sql_query) + + # Using sanitized query in EXPLAIN to prevent SQL injection + explain_ast_query = f"EXPLAIN AST {sanitized_query}" disallowed_ast_nodes = ["AlterQuery", "AlterCommand", "DropQuery", "InsertQuery"] explain_ast_result = _run_sql_query_on_host( clickhouse_host, clickhouse_port, storage_name, explain_ast_query, False, clusterless_mode diff --git a/tests/admin/test_system_queries.py b/tests/admin/test_system_queries.py index d395bc743fa..a6fa6b7afb1 100644 --- a/tests/admin/test_system_queries.py +++ b/tests/admin/test_system_queries.py @@ -54,25 +54,6 @@ """, "SELECT hostname(), avg(query_duration_ms) FROM clusterAllReplicas('default', system.query_log) GROUP BY hostname()", "SELECT count() FROM merge('system', '.*settings')", - ], -) -@pytest.mark.events_db -def test_is_valid_system_query(sql_query: str) -> None: - assert is_valid_system_query( - settings.CLUSTERS[0]["host"], int(settings.CLUSTERS[0]["port"]), "errors", sql_query, False - ) - - -@pytest.mark.parametrize( - "sql_query", - [ - "SHOW TABLES;", # non select statement - "SELECT * FROM my_table;", # not allowed table - "SELECT * from system.metrics" # system table not on allowed list - "with sum(bytes) as s select s from system.parts group by table;", # sorry not allowed WITH - "SELECT 1; SELECT 2;" # no multiple statements - "SELECT * FROM system.clusters c INNER JOIN my_table m ON c.cluster == m.something", # no join - "SELECT * from system.as1", # invalid system table format """SELECT count() as nb_query, user, @@ -96,6 +77,25 @@ def test_is_valid_system_query(sql_query: str) -> None: ], ) @pytest.mark.events_db +def test_is_valid_system_query(sql_query: str) -> None: + assert is_valid_system_query( + settings.CLUSTERS[0]["host"], int(settings.CLUSTERS[0]["port"]), "errors", sql_query, False + ) + + +@pytest.mark.parametrize( + "sql_query", + [ + "SHOW TABLES;", # non select statement + "SELECT * FROM my_table;", # not allowed table + "SELECT * from system.metrics" # system table not on allowed list + "with sum(bytes) as s select s from system.parts group by table;", # sorry not allowed WITH + "SELECT 1; SELECT 2;" # no multiple statements + "SELECT * FROM system.clusters c INNER JOIN my_table m ON c.cluster == m.something", # no join + "SELECT * from system.as1", # invalid system table format + ], +) +@pytest.mark.events_db def test_invalid_system_query(sql_query: str) -> None: with pytest.raises(Exception): is_valid_system_query(