getsentry · phacops · Jun 18, 2026 · Jun 18, 2026 · Jun 18, 2026 · Jun 18, 2026
@@ -18,3 +18,4 @@ gocd/templates/vendor/
 gocd/generated-pipelines/
 Brewfile.lock.json
 .zed/
+dump.rdb
@@ -4,6 +4,7 @@ version = "26.7.0.dev0"
 dependencies = [
     "blinker>=1.9",
     "click>=8.1.7",
+    "clickhouse-connect>=0.8.0",
     "clickhouse-driver>=0.2.10",
     "confluent-kafka>=2.7.0",
     "datadog>=0.49.1",
@@ -125,6 +126,8 @@ exclude = ["^rust_snuba/", "^tests/datasets/", "^tests/query/"]
 [[tool.mypy.overrides]]
 module = [
     "_strptime",
+    "clickhouse_connect",
+    "clickhouse_connect.*",
     "clickhouse_driver",
     "clickhouse_driver.errors",
     "confluent_kafka",

@@ -7,7 +7,13 @@
 
 from snuba import settings
 from snuba.clickhouse.native import ClickhousePool
-from snuba.clusters.cluster import ClickhouseClientSettings, ClickhouseCluster
+from snuba.clusters.cluster import (
+    ClickhouseClientSettings,
+    ClickhouseCluster,
+    ClickhouseNode,
+    connection_cache,
+    use_clickhouse_connect_driver,
+)
 from snuba.datasets.storage import ReadableTableStorage
 from snuba.datasets.storages.factory import get_storage
 from snuba.datasets.storages.storage_key import StorageKey
@@ -43,7 +49,7 @@ def is_valid_node(host: str, port: int, cluster: ClickhouseCluster, storage_name
             },
         )
 
-    return any(node.host_name == host and node.port == port for node in nodes)
+    return any(node.host_name == host and node.native_port == port for node in nodes)
 
 
 def _get_storage(storage_name: str) -> ReadableTableStorage:
@@ -71,7 +77,7 @@ def _validate_node(
                 "host": clickhouse_host,
                 "port": clickhouse_port,
                 "query_host": cluster.get_query_node().host_name,
-                "query_port": cluster.get_query_node().port,
+                "query_port": cluster.get_query_node().native_port,
             },
         )
 
@@ -89,24 +95,38 @@ def _build_validated_pool(
     password: str,
     client_settings: ClickhouseClientSettings,
 ) -> ClickhousePool:
-    # Single chokepoint for admin ClickhousePool construction. ClickhousePool
-    # ships the user/password in the first hello packet of the native protocol,
-    # so an unvalidated host means credentials reach whatever listener answers.
-    # All admin helpers must go through here — never call ClickhousePool
-    # directly from this module. The regression test
-    # test_no_direct_clickhouse_pool_construction_in_admin enforces this.
+    # Single chokepoint for admin ClickhousePool acquisition. A pool ships the
+    # user/password to the node (the native protocol's first hello packet, or
+    # the HTTP auth header), so an unvalidated host means credentials reach
+    # whatever listener answers. All admin helpers must go through here — never
+    # acquire a pool from the connection cache directly in this module. The
+    # regression test test_no_direct_clickhouse_pool_construction_in_admin
+    # enforces this.
     _validate_node(clickhouse_host, clickhouse_port, cluster, storage_name)
-    return ClickhousePool(
-        clickhouse_host,
-        clickhouse_port,
+    # Go through the shared connection cache so the driver (native vs
+    # clickhouse-connect/HTTP) is selected by the runtime config, behind the
+    # abstract ClickhousePool type, just like the cluster's own connections.
+    return connection_cache.get_node_connection(
+        client_settings,
+        ClickhouseNode(clickhouse_host, clickhouse_port, http_port=cluster.get_http_port()),
         username,
         password,
         database,
-        max_pool_size=2,
-        client_settings=client_settings.value.settings,
+        secure=False,
+        ca_certs=None,
+        verify=False,
     )
 
 
+def _driver_cache_token() -> str:
+    # Part of the admin connection cache keys so that flipping the
+    # use_clickhouse_connect_driver runtime flag re-resolves admin connections
+    # to the new driver, instead of returning a pool pinned to whichever driver
+    # was active when the entry was first cached. This keeps admin traffic
+    # switchable at runtime, like the cluster query/reader paths.
+    return "connect" if use_clickhouse_connect_driver() else "native"
+
+
 def get_ro_node_connection(
     clickhouse_host: str,
     clickhouse_port: int,
@@ -115,7 +135,7 @@ def get_ro_node_connection(
 ) -> ClickhousePool:
     storage = _get_storage(storage_name)
 
-    key = f"{storage.get_storage_key()}-{clickhouse_host}"
+    key = f"{storage.get_storage_key()}-{clickhouse_host}-{_driver_cache_token()}"
     if key in NODE_CONNECTIONS:
         return NODE_CONNECTIONS[key]
 
@@ -162,8 +182,9 @@ def get_ro_node_connection(
 def get_ro_query_node_connection(
     storage_name: str, client_settings: ClickhouseClientSettings
 ) -> ClickhousePool:
-    if storage_name in CLUSTER_CONNECTIONS:
-        return CLUSTER_CONNECTIONS[storage_name]
+    key = f"{storage_name}-{_driver_cache_token()}"
+    if key in CLUSTER_CONNECTIONS:
+        return CLUSTER_CONNECTIONS[key]
 
     storage = _get_storage(storage_name)
     cluster = storage.get_cluster()
@@ -172,7 +193,7 @@ def get_ro_query_node_connection(
         connection_id.hostname, connection_id.tcp_port, storage_name, client_settings
     )
 
-    CLUSTER_CONNECTIONS[storage_name] = connection
+    CLUSTER_CONNECTIONS[key] = connection
     return connection
 
 
@@ -184,7 +205,7 @@ def get_sudo_node_connection(
 ) -> ClickhousePool:
     storage = _get_storage(storage_name)
 
-    key = f"{storage.get_storage_key()}-{clickhouse_host}-sudo"
+    key = f"{storage.get_storage_key()}-{clickhouse_host}-sudo-{_driver_cache_token()}"
     if key in NODE_CONNECTIONS:
         return NODE_CONNECTIONS[key]
 
@@ -216,7 +237,7 @@ def get_clusterless_node_connection(
     cluster = storage.get_cluster()
     database = cluster.get_database()
 
-    key = f"{storage.get_storage_key()}-{clickhouse_host}-clusterless-{database}"
+    key = f"{storage.get_storage_key()}-{clickhouse_host}-clusterless-{database}-{_driver_cache_token()}"
     if key in NODE_CONNECTIONS:
         return NODE_CONNECTIONS[key]
 
@@ -245,7 +266,7 @@ def get_ro_clusterless_node_connection(
     cluster = storage.get_cluster()
     database = cluster.get_database()
 
-    key = f"{storage.get_storage_key()}-{clickhouse_host}-clusterless-ro-{database}"
+    key = f"{storage.get_storage_key()}-{clickhouse_host}-clusterless-ro-{database}-{_driver_cache_token()}"
     if key in NODE_CONNECTIONS:
         return NODE_CONNECTIONS[key]
 

@@ -128,7 +128,9 @@ def copy_tables(
     skip_on_cluster: bool = False,
     cluster_name_override: Optional[str] = None,
 ) -> CopyTablesResponse:
-    settings = ClickhouseClientSettings.QUERY
+    # Table copies can run long, so use the unbounded INTERNAL profile rather
+    # than the 30s user-read QUERY profile.
+    settings = ClickhouseClientSettings.INTERNAL
     source_connection = get_clusterless_node_connection(
         source_host, 9000, storage_name, client_settings=settings
     )

@@ -48,7 +48,7 @@ def _get_nodes(storage_key: StorageKey, local: bool = True) -> Sequence[Node]:
             return []
         else:
             return [
-                {"host": node.host_name, "port": node.port}
+                {"host": node.host_name, "port": node.native_port}
                 for node in (
                     cluster.get_local_nodes() if local else cluster.get_distributed_nodes()
                 )
@@ -62,7 +62,7 @@ def _get_query_node(storage_key: StorageKey) -> Optional[Node]:
     try:
         cluster = get_storage(storage_key).get_cluster()
         query_node = cluster.get_query_node()
-        return {"host": query_node.host_name, "port": query_node.port}
+        return {"host": query_node.host_name, "port": query_node.native_port}
     except (AssertionError, KeyError, UndefinedClickhouseCluster):
         return None
 

@@ -70,6 +70,7 @@ def cleanup(
 
     from snuba.cleanup import logger, run_cleanup
     from snuba.clickhouse.native import ClickhousePool
+    from snuba.clusters.cluster import ClickhouseNode, connection_cache
 
     storage = get_writable_storage(StorageKey(storage_name))
 
@@ -81,16 +82,20 @@ def cleanup(
     cluster = storage.get_cluster()
     database = cluster.get_database()
 
+    connection: ClickhousePool
     if clickhouse_host and clickhouse_port:
-        connection = ClickhousePool(
-            clickhouse_host,
-            clickhouse_port,
+        # Go through the shared connection cache so the driver (native vs
+        # clickhouse-connect/HTTP) is selected by the runtime config, behind
+        # the abstract ClickhousePool type.
+        connection = connection_cache.get_node_connection(
+            ClickhouseClientSettings.CLEANUP,
+            ClickhouseNode(clickhouse_host, clickhouse_port, http_port=cluster.get_http_port()),
             clickhouse_user,
             clickhouse_password,
             database,
-            clickhouse_secure,
-            clickhouse_ca_certs,
-            clickhouse_verify,
+            secure=clickhouse_secure,
+            ca_certs=clickhouse_ca_certs,
+            verify=clickhouse_verify,
         )
     elif not cluster.is_single_node():
         raise click.ClickException("Provide ClickHouse host and port for cleanup")

@@ -80,6 +80,7 @@ def optimize(
 
     from snuba.clickhouse.native import ClickhousePool
     from snuba.clickhouse.optimize.optimize import logger
+    from snuba.clusters.cluster import ClickhouseNode, connection_cache
 
     setup_logging(log_level)
     setup_sentry()
@@ -100,17 +101,25 @@ def optimize(
     # passing this information won't be necessary, and running this command once
     # will ensure that optimize is performed on all of the individual nodes for
     # that cluster.
+    connection: ClickhousePool
     if clickhouse_host and clickhouse_port:
-        connection = ClickhousePool(
-            clickhouse_host,
-            clickhouse_port,
+        # Go through the shared connection cache so the driver (native vs
+        # clickhouse-connect/HTTP) is selected by the runtime config, behind
+        # the abstract ClickhousePool type. The OPTIMIZE timeout is carried by
+        # the client settings profile the cache reads.
+        connection = connection_cache.get_node_connection(
+            ClickhouseClientSettings.OPTIMIZE,
+            ClickhouseNode(
+                clickhouse_host,
+                clickhouse_port,
+                http_port=storage.get_cluster().get_http_port(),
+            ),
             clickhouse_user,
             clickhouse_password,
             database,
-            clickhouse_secure,
-            clickhouse_ca_certs,
-            clickhouse_verify,
-            send_receive_timeout=ClickhouseClientSettings.OPTIMIZE.value.timeout,
+            secure=clickhouse_secure,
+            ca_certs=clickhouse_ca_certs,
+            verify=clickhouse_verify,
         )
     elif not storage.get_cluster().is_single_node():
         raise click.ClickException("Provide Clickhouse host and port for optimize")

@@ -1,4 +1,5 @@
 import csv
+import os
 from datetime import datetime
 from typing import NamedTuple, Optional, Sequence, Tuple
 
@@ -7,8 +8,12 @@
 
 from snuba import settings
 from snuba.admin.notifications.slack.client import SlackClient
-from snuba.clickhouse.native import ClickhousePool
-from snuba.clusters.cluster import ClickhouseClientSettings
+from snuba.clusters.cluster import (
+    DEFAULT_CLICKHOUSE_HTTP_PORT,
+    ClickhouseClientSettings,
+    ClickhouseNode,
+    connection_cache,
+)
 from snuba.environment import setup_logging, setup_sentry
 
 logger = structlog.get_logger().bind(module=__name__)
@@ -160,13 +165,21 @@ def querylog_to_csv(
     query = get_query_results(event_type, [database], tables, start_time, end_time)
 
     (clickhouse_user, clickhouse_password) = get_credentials()
-    connection = ClickhousePool(
-        host=clickhouse_host,
-        port=clickhouse_port,
-        user=clickhouse_user,
-        password=clickhouse_password,
-        database=database,
-        client_settings=ClickhouseClientSettings.QUERY.value.settings,
+    # Go through the shared connection cache so the driver (native vs
+    # clickhouse-connect/HTTP) is selected by the runtime config, behind the
+    # abstract ClickhousePool type. There is no cluster here to read an
+    # http_port from, so use the configured CLICKHOUSE_HTTP_PORT (the same env
+    # var the cluster config reads), defaulting to the well-known port.
+    http_port = int(os.environ.get("CLICKHOUSE_HTTP_PORT", DEFAULT_CLICKHOUSE_HTTP_PORT))
+    connection = connection_cache.get_node_connection(
+        ClickhouseClientSettings.QUERY,
+        ClickhouseNode(clickhouse_host, clickhouse_port, http_port=http_port),
+        clickhouse_user,
+        clickhouse_password,
+        database,
+        secure=False,
+        ca_certs=None,
+        verify=False,
     )
     results = connection.execute(query)
     filename = format_filename(table)