Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
54 changes: 51 additions & 3 deletions snuba/admin/clickhouse/system_queries.py
Original file line number Diff line number Diff line change
Expand Up @@ -168,6 +168,47 @@ def _run_sql_query_on_host(
)


def _sanitize_query_for_explain(sql_query: str) -> str:
"""
Lightweight defense-in-depth check before embedding an admin-supplied query
in an EXPLAIN statement for validation.

The query is SQL typed by an authenticated admin user and, if it passes the
EXPLAIN AST / EXPLAIN QUERY TREE validation below, is executed as the
read-only user, so this is not the primary security control. It only:

1. Strips the trailing semicolon (optional in ClickHouse) so the query can
be embedded in "EXPLAIN ... <query>".
2. Rejects obvious statement-chaining (e.g. "SELECT ...; DROP TABLE ...").
3. Rejects SQL comments.

String literals are removed before the chaining/comment scan so that
semicolons, quotes or comment-like sequences inside string values don't
cause false-positive rejections of legitimate queries.
"""
query = sql_query.strip()

# Trailing semicolons are optional in ClickHouse; drop one so the query can
# be embedded in an EXPLAIN statement.
if query.endswith(";"):
query = query[:-1].strip()

# Strip string literals (handling '' / \\' / "" / \\" escapes) so that
# quotes, semicolons or comment sequences inside string values are ignored.
without_strings = re.sub(r"'(?:[^'\\]|\\.|'')*'", "", query)
without_strings = re.sub(r'"(?:[^"\\]|\\.|"")*"', "", without_strings)

# Reject statement chaining outside of string literals.
if ";" in without_strings:
raise InvalidCustomQuery("Multiple statements are not allowed")

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Double trailing semicolon rejected

Low Severity

_sanitize_query_for_explain removes only one trailing semicolon, then treats any remaining ; as multiple statements. Queries that end with more than one semicolon (previously normalized via rstrip(";")) now raise InvalidCustomQuery with a misleading “Multiple statements” message even when the SQL is a single statement.

Fix in Cursor Fix in Web

Reviewed by Cursor Bugbot for commit 21ee79f. Configure here.


# Reject comment-based injection attempts.
if "--" in without_strings or "/*" in without_strings:
raise InvalidCustomQuery("SQL comments are not allowed in queries")

return query


def is_query_using_only_system_tables(
clickhouse_host: str,
clickhouse_port: int,
Expand All @@ -180,9 +221,12 @@ def is_query_using_only_system_tables(
Run the EXPLAIN QUERY TREE on the given sql_query and check that the only tables
in the query are system tables.
"""
sql_query = sql_query.strip().rstrip(";") if sql_query.endswith(";") else sql_query
# Sanitize the query to prevent SQL injection in the EXPLAIN statement
sanitized_query = _sanitize_query_for_explain(sql_query)

settings_clause = "" if sudo_mode else " SETTINGS allow_experimental_analyzer = 1"
explain_query_tree_query = f"EXPLAIN QUERY TREE {sql_query}{settings_clause}"
# Using sanitized query in EXPLAIN to prevent SQL injection
explain_query_tree_query = f"EXPLAIN QUERY TREE {sanitized_query}{settings_clause}"
explain_query_tree_result = _run_sql_query_on_host(
clickhouse_host,
clickhouse_port,
Expand Down Expand Up @@ -222,7 +266,11 @@ def is_valid_system_query(
"""
Validation based on Query Tree and AST to ensure the query is a valid select query.
"""
explain_ast_query = f"EXPLAIN AST {sql_query}"
# Sanitize the query to prevent SQL injection in the EXPLAIN statement
sanitized_query = _sanitize_query_for_explain(sql_query)

# Using sanitized query in EXPLAIN to prevent SQL injection
explain_ast_query = f"EXPLAIN AST {sanitized_query}"
disallowed_ast_nodes = ["AlterQuery", "AlterCommand", "DropQuery", "InsertQuery"]
explain_ast_result = _run_sql_query_on_host(
clickhouse_host, clickhouse_port, storage_name, explain_ast_query, False, clusterless_mode
Expand Down
38 changes: 19 additions & 19 deletions tests/admin/test_system_queries.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,25 +54,6 @@
""",
"SELECT hostname(), avg(query_duration_ms) FROM clusterAllReplicas('default', system.query_log) GROUP BY hostname()",
"SELECT count() FROM merge('system', '.*settings')",
],
)
@pytest.mark.events_db
def test_is_valid_system_query(sql_query: str) -> None:
assert is_valid_system_query(
settings.CLUSTERS[0]["host"], int(settings.CLUSTERS[0]["port"]), "errors", sql_query, False
)


@pytest.mark.parametrize(
"sql_query",
[
"SHOW TABLES;", # non select statement
"SELECT * FROM my_table;", # not allowed table
"SELECT * from system.metrics" # system table not on allowed list
"with sum(bytes) as s select s from system.parts group by table;", # sorry not allowed WITH
"SELECT 1; SELECT 2;" # no multiple statements
"SELECT * FROM system.clusters c INNER JOIN my_table m ON c.cluster == m.something", # no join
"SELECT * from system.as1", # invalid system table format
"""SELECT
count() as nb_query,
user,
Expand All @@ -96,6 +77,25 @@ def test_is_valid_system_query(sql_query: str) -> None:
],
)
@pytest.mark.events_db
def test_is_valid_system_query(sql_query: str) -> None:
assert is_valid_system_query(
settings.CLUSTERS[0]["host"], int(settings.CLUSTERS[0]["port"]), "errors", sql_query, False
)


@pytest.mark.parametrize(
"sql_query",
[
"SHOW TABLES;", # non select statement
"SELECT * FROM my_table;", # not allowed table
"SELECT * from system.metrics" # system table not on allowed list
"with sum(bytes) as s select s from system.parts group by table;", # sorry not allowed WITH
"SELECT 1; SELECT 2;" # no multiple statements
"SELECT * FROM system.clusters c INNER JOIN my_table m ON c.cluster == m.something", # no join
"SELECT * from system.as1", # invalid system table format
],
)
@pytest.mark.events_db
def test_invalid_system_query(sql_query: str) -> None:
with pytest.raises(Exception):
is_valid_system_query(
Expand Down
Loading