From 0e30cb05e0d7d40b734ff02a4dfc92084f1ddb69 Mon Sep 17 00:00:00 2001
From: Pierre Massat <pierre.massat@sentry.io>
Date: Wed, 17 Jun 2026 16:40:20 -0700
Subject: [PATCH 1/8] feat(eap-items): use indexed_name column for
 sentry.op/sentry.metric.name filters

Filtering on the per-item-type primary name attribute (sentry.op for spans,
sentry.metric.name for metrics) was translated into an unindexed
attributes_string bucket map lookup (arrayElement(attributes_string_N, ...)).
The same value is also stored in the dedicated indexed_name String column, which
has a bloom filter index (migration 0057).

Add an IndexedNameOptimizer logical query processor that rewrites
attributes_string['sentry.op'|'sentry.metric.name'] SubscriptableReferences to
the indexed_name column so the bloom filter index is used. Because indexed_name
is item-type specific, the rewrite is only applied when the query is
unambiguously scoped to the owning item type via an equals(item_type, N)
condition; otherwise it safely falls back to the bucket lookup.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
Claude-Session: https://claude.ai/code/session_01GXBnLMCV79qaAkVRkQu7Nw

Agent transcript: https://claudescope.sentry.dev/share/cdjPeZ3xwrigGK-uGIWE0P3ZX1YWtOxOZwNTOWPmZMo
---
 .../entities/eap_items.yaml                   |   1 +
 .../logical/indexed_name_optimizer.py         |  91 +++++++++
 .../processors/test_indexed_name_optimizer.py | 173 ++++++++++++++++++
 3 files changed, 265 insertions(+)
 create mode 100644 snuba/query/processors/logical/indexed_name_optimizer.py
 create mode 100644 tests/query/processors/test_indexed_name_optimizer.py

diff --git a/snuba/datasets/configuration/events_analytics_platform/entities/eap_items.yaml b/snuba/datasets/configuration/events_analytics_platform/entities/eap_items.yaml
index d47659d1555..bab229eb410 100644
--- a/snuba/datasets/configuration/events_analytics_platform/entities/eap_items.yaml
+++ b/snuba/datasets/configuration/events_analytics_platform/entities/eap_items.yaml
@@ -109,6 +109,7 @@ storage_selector:
   selector: EAPItemsStorageSelector
 
 query_processors:
+  - processor: IndexedNameOptimizer
   - processor: HashBucketFunctionTransformer
     args:
       hash_bucket_names:
diff --git a/snuba/query/processors/logical/indexed_name_optimizer.py b/snuba/query/processors/logical/indexed_name_optimizer.py
new file mode 100644
index 00000000000..a44a9948959
--- /dev/null
+++ b/snuba/query/processors/logical/indexed_name_optimizer.py
@@ -0,0 +1,91 @@
+from typing import Optional
+
+from sentry_protos.snuba.v1.request_common_pb2 import TraceItemType
+
+from snuba.query.conditions import get_first_level_and_conditions
+from snuba.query.expressions import (
+    Column,
+    Expression,
+    FunctionCall,
+    Literal,
+    SubscriptableReference,
+)
+from snuba.query.logical import Query
+from snuba.query.processors.logical import LogicalQueryProcessor
+from snuba.query.query_settings import QuerySettings
+
+
+class IndexedNameOptimizer(LogicalQueryProcessor):
+    """
+    In eap_items, the per-item-type primary name attribute is promoted to a
+    dedicated ``indexed_name`` String column with a bloom filter index:
+    ``sentry.op`` for spans and ``sentry.metric.name`` for metrics. The same
+    value is still written to the hashed ``attributes_string`` buckets.
+
+    Filtering ``attributes_string['sentry.op']`` (resp.
+    ``attributes_string['sentry.metric.name']``) would otherwise be translated
+    into a bucket map lookup (``arrayElement(attributes_string_N, ...)``) which
+    has no index. This processor rewrites those subscriptable accesses into a
+    reference to the indexed ``indexed_name`` column so the bloom filter index
+    can be used.
+
+    The promotion is item-type specific, so the rewrite is only applied when the
+    query is unambiguously scoped (via an ``item_type`` equality condition) to
+    the item type that owns the attribute. Otherwise the access is left as is and
+    falls back to the bucket lookup.
+    """
+
+    # item_type proto enum value -> attribute promoted into ``indexed_name``
+    INDEXED_NAME_KEY_BY_ITEM_TYPE: dict[int, str] = {
+        TraceItemType.TRACE_ITEM_TYPE_SPAN: "sentry.op",
+        TraceItemType.TRACE_ITEM_TYPE_METRIC: "sentry.metric.name",
+    }
+
+    INDEXED_NAME_COLUMN = "indexed_name"
+    ATTRIBUTES_STRING_COLUMN = "attributes_string"
+
+    def _indexed_name_key(self, query: Query) -> Optional[str]:
+        """Return the attribute name promoted into ``indexed_name`` for this
+        query, or ``None`` if the query is not unambiguously scoped to a single
+        supported item type."""
+        condition = query.get_condition()
+        if condition is None:
+            return None
+
+        item_types: set[int] = set()
+        for cond in get_first_level_and_conditions(condition):
+            if not isinstance(cond, FunctionCall) or cond.function_name != "equals":
+                continue
+            if len(cond.parameters) != 2:
+                continue
+            lhs, rhs = cond.parameters
+            if not isinstance(lhs, Column) or lhs.column_name != "item_type":
+                continue
+            if not isinstance(rhs, Literal) or not isinstance(rhs.value, int):
+                continue
+            item_types.add(rhs.value)
+
+        if len(item_types) != 1:
+            return None
+        return self.INDEXED_NAME_KEY_BY_ITEM_TYPE.get(item_types.pop())
+
+    def process_query(self, query: Query, query_settings: QuerySettings) -> None:
+        key = self._indexed_name_key(query)
+        if key is None:
+            return
+
+        def transform(exp: Expression) -> Expression:
+            if (
+                isinstance(exp, SubscriptableReference)
+                and exp.column.column_name == self.ATTRIBUTES_STRING_COLUMN
+                and isinstance(exp.key, Literal)
+                and exp.key.value == key
+            ):
+                return Column(
+                    alias=exp.alias,
+                    table_name=exp.column.table_name,
+                    column_name=self.INDEXED_NAME_COLUMN,
+                )
+            return exp
+
+        query.transform_expressions(transform)
diff --git a/tests/query/processors/test_indexed_name_optimizer.py b/tests/query/processors/test_indexed_name_optimizer.py
new file mode 100644
index 00000000000..34438a59565
--- /dev/null
+++ b/tests/query/processors/test_indexed_name_optimizer.py
@@ -0,0 +1,173 @@
+from copy import deepcopy
+
+import pytest
+
+from snuba.clickhouse.columns import ColumnSet
+from snuba.datasets.entities.entity_key import EntityKey
+from snuba.query import SelectedExpression
+from snuba.query.data_source.simple import Entity as QueryEntity
+from snuba.query.dsl import and_cond, column, equals, literal
+from snuba.query.expressions import (
+    Column,
+    Expression,
+    Literal,
+    SubscriptableReference,
+)
+from snuba.query.logical import Query
+from snuba.query.processors.logical.indexed_name_optimizer import (
+    IndexedNameOptimizer,
+)
+from snuba.query.query_settings import HTTPQuerySettings
+
+SPAN = 1
+METRIC = 8
+LOG = 2
+
+
+def _attr_str(key: str, alias: str | None = None) -> SubscriptableReference:
+    return SubscriptableReference(
+        alias=alias,
+        column=column("attributes_string"),
+        key=literal(key),
+    )
+
+
+def _query(
+    condition: Expression,
+    selected_columns: list[SelectedExpression] | None = None,
+) -> Query:
+    return Query(
+        QueryEntity(EntityKey.EAP_ITEMS, ColumnSet([])),
+        selected_columns=selected_columns or [SelectedExpression("c", column("project_id"))],
+        condition=condition,
+    )
+
+
+test_data = [
+    pytest.param(
+        _query(
+            and_cond(
+                equals(column("item_type"), literal(SPAN)),
+                equals(_attr_str("sentry.op"), literal("db.query")),
+            )
+        ),
+        _query(
+            and_cond(
+                equals(column("item_type"), literal(SPAN)),
+                equals(Column(None, None, "indexed_name"), literal("db.query")),
+            )
+        ),
+        id="span sentry.op rewritten to indexed_name",
+    ),
+    pytest.param(
+        _query(
+            and_cond(
+                equals(column("item_type"), literal(METRIC)),
+                equals(_attr_str("sentry.metric.name"), literal("my.metric")),
+            )
+        ),
+        _query(
+            and_cond(
+                equals(column("item_type"), literal(METRIC)),
+                equals(Column(None, None, "indexed_name"), literal("my.metric")),
+            )
+        ),
+        id="metric sentry.metric.name rewritten to indexed_name",
+    ),
+    pytest.param(
+        _query(
+            and_cond(
+                equals(column("item_type"), literal(SPAN)),
+                equals(_attr_str("sentry.metric.name"), literal("my.metric")),
+            )
+        ),
+        _query(
+            and_cond(
+                equals(column("item_type"), literal(SPAN)),
+                equals(_attr_str("sentry.metric.name"), literal("my.metric")),
+            )
+        ),
+        id="span query with metric key is left untouched",
+    ),
+    pytest.param(
+        _query(
+            and_cond(
+                equals(column("item_type"), literal(SPAN)),
+                equals(_attr_str("foo"), literal("bar")),
+            )
+        ),
+        _query(
+            and_cond(
+                equals(column("item_type"), literal(SPAN)),
+                equals(_attr_str("foo"), literal("bar")),
+            )
+        ),
+        id="non-indexed attribute is left untouched",
+    ),
+    pytest.param(
+        _query(equals(_attr_str("sentry.op"), literal("db.query"))),
+        _query(equals(_attr_str("sentry.op"), literal("db.query"))),
+        id="no item_type condition leaves bucket lookup",
+    ),
+    pytest.param(
+        _query(
+            and_cond(
+                equals(column("item_type"), literal(LOG)),
+                equals(_attr_str("sentry.op"), literal("db.query")),
+            )
+        ),
+        _query(
+            and_cond(
+                equals(column("item_type"), literal(LOG)),
+                equals(_attr_str("sentry.op"), literal("db.query")),
+            )
+        ),
+        id="unsupported item_type leaves bucket lookup",
+    ),
+    pytest.param(
+        _query(
+            condition=equals(column("item_type"), literal(SPAN)),
+            selected_columns=[
+                SelectedExpression("op", _attr_str("sentry.op", alias="op")),
+            ],
+        ),
+        _query(
+            condition=equals(column("item_type"), literal(SPAN)),
+            selected_columns=[
+                SelectedExpression("op", Column("op", None, "indexed_name")),
+            ],
+        ),
+        id="select preserves alias on rewrite",
+    ),
+]
+
+
+@pytest.mark.parametrize("pre_format, expected_query", test_data)
+def test_indexed_name_optimizer(pre_format: Query, expected_query: Query) -> None:
+    copy = deepcopy(pre_format)
+    IndexedNameOptimizer().process_query(copy, HTTPQuerySettings())
+    assert copy.get_selected_columns() == expected_query.get_selected_columns()
+    assert copy.get_condition() == expected_query.get_condition()
+
+
+def test_contradictory_item_types_left_untouched() -> None:
+    query = _query(
+        and_cond(
+            equals(column("item_type"), literal(SPAN)),
+            and_cond(
+                equals(column("item_type"), literal(METRIC)),
+                equals(_attr_str("sentry.op"), literal("db.query")),
+            ),
+        )
+    )
+    copy = deepcopy(query)
+    IndexedNameOptimizer().process_query(copy, HTTPQuerySettings())
+    # ambiguous item_type -> no rewrite; the subscriptable access survives
+    condition = copy.get_condition()
+    assert condition is not None
+    assert any(
+        isinstance(e, SubscriptableReference)
+        and isinstance(e.key, Literal)
+        and e.key.value == "sentry.op"
+        for e in condition
+    )

From 32ce6d81b081c69750efaf6f7cc19812c3846bf5 Mon Sep 17 00:00:00 2001
From: Pierre Massat <pierre.massat@sentry.io>
Date: Wed, 17 Jun 2026 17:05:02 -0700
Subject: [PATCH 2/8] test: use protobuf TraceItemType enum instead of
 hardcoded item_type values

Keeps the optimizer tests in sync with the sentry_protos enum values they
key off of, so a divergence in the upstream enum fails the tests instead of
silently degrading the optimization.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
Claude-Session: https://claude.ai/code/session_01GXBnLMCV79qaAkVRkQu7Nw
---
 tests/query/processors/test_indexed_name_optimizer.py | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/tests/query/processors/test_indexed_name_optimizer.py b/tests/query/processors/test_indexed_name_optimizer.py
index 34438a59565..79d4745e99f 100644
--- a/tests/query/processors/test_indexed_name_optimizer.py
+++ b/tests/query/processors/test_indexed_name_optimizer.py
@@ -1,6 +1,7 @@
 from copy import deepcopy
 
 import pytest
+from sentry_protos.snuba.v1.request_common_pb2 import TraceItemType
 
 from snuba.clickhouse.columns import ColumnSet
 from snuba.datasets.entities.entity_key import EntityKey
@@ -19,9 +20,11 @@
 )
 from snuba.query.query_settings import HTTPQuerySettings
 
-SPAN = 1
-METRIC = 8
-LOG = 2
+# Use the protobuf enum values directly so the tests stay in sync with the
+# production dependency the optimizer keys off of.
+SPAN = TraceItemType.TRACE_ITEM_TYPE_SPAN
+METRIC = TraceItemType.TRACE_ITEM_TYPE_METRIC
+LOG = TraceItemType.TRACE_ITEM_TYPE_LOG
 
 
 def _attr_str(key: str, alias: str | None = None) -> SubscriptableReference:

From 10147ea38db37291c647cc076ef906c404b2c9b2 Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Fri, 19 Jun 2026 03:13:41 +0000
Subject: [PATCH 3/8] ref(eap-items): use ConditionFunctions.EQ instead of
 hardcoded "equals"

Address review nit: reference the ConditionFunctions.EQ constant rather
than the hardcoded "equals" string in IndexedNameOptimizer.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
Claude-Session: https://claude.ai/code/session_01CajnvSCYTxvJ9Hw5jELRPH
---
 .../query/processors/logical/indexed_name_optimizer.py | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/snuba/query/processors/logical/indexed_name_optimizer.py b/snuba/query/processors/logical/indexed_name_optimizer.py
index a44a9948959..c2187b3abdb 100644
--- a/snuba/query/processors/logical/indexed_name_optimizer.py
+++ b/snuba/query/processors/logical/indexed_name_optimizer.py
@@ -2,7 +2,10 @@
 
 from sentry_protos.snuba.v1.request_common_pb2 import TraceItemType
 
-from snuba.query.conditions import get_first_level_and_conditions
+from snuba.query.conditions import (
+    ConditionFunctions,
+    get_first_level_and_conditions,
+)
 from snuba.query.expressions import (
     Column,
     Expression,
@@ -54,7 +57,10 @@ def _indexed_name_key(self, query: Query) -> Optional[str]:
 
         item_types: set[int] = set()
         for cond in get_first_level_and_conditions(condition):
-            if not isinstance(cond, FunctionCall) or cond.function_name != "equals":
+            if (
+                not isinstance(cond, FunctionCall)
+                or cond.function_name != ConditionFunctions.EQ
+            ):
                 continue
             if len(cond.parameters) != 2:
                 continue

From 77b3cde305c5cac19fbd9fb7a293705681a81fb2 Mon Sep 17 00:00:00 2001
From: "getsantry[bot]" <66042841+getsantry[bot]@users.noreply.github.com>
Date: Fri, 19 Jun 2026 03:18:33 +0000
Subject: [PATCH 4/8] [getsentry/action-github-commit] Auto commit

---
 snuba/query/processors/logical/indexed_name_optimizer.py | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/snuba/query/processors/logical/indexed_name_optimizer.py b/snuba/query/processors/logical/indexed_name_optimizer.py
index c2187b3abdb..2de19c1613c 100644
--- a/snuba/query/processors/logical/indexed_name_optimizer.py
+++ b/snuba/query/processors/logical/indexed_name_optimizer.py
@@ -57,10 +57,7 @@ def _indexed_name_key(self, query: Query) -> Optional[str]:
 
         item_types: set[int] = set()
         for cond in get_first_level_and_conditions(condition):
-            if (
-                not isinstance(cond, FunctionCall)
-                or cond.function_name != ConditionFunctions.EQ
-            ):
+            if not isinstance(cond, FunctionCall) or cond.function_name != ConditionFunctions.EQ:
                 continue
             if len(cond.parameters) != 2:
                 continue

From 147b34d587d5f6be0525806e8b86e51a05324796 Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Fri, 19 Jun 2026 04:25:04 +0000
Subject: [PATCH 5/8] feat(eap-items): keep attributes_string fallback and
 match the real RPC filter shape

The IndexedNameOptimizer matched SubscriptableReference, but the EAP RPC
builder emits map-backed filters as arrayElement(attributes_string, key)
(since #8035), so the optimizer never fired on real traffic. It also
replaced the bucket lookup outright, which would drop rows whose
indexed_name is empty (migration 0057 adds the column with no backfill).

Rewrite a WHERE-clause equals/in filter on the indexed attribute into
or(<op on indexed_name>, <original op on the bucket>): the bloom filter
index can prune granules via indexed_name while the bucket lookup keeps
results correct for non-backfilled rows. Only non-empty string literals
are pushed onto the index (indexed_name = '' would over-match every
non-backfilled row), and SELECT/GROUP BY references keep reading the
bucket.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
Claude-Session: https://claude.ai/code/session_01Hax3wseAtUEKY7GfpcJNTx
---
 .../logical/indexed_name_optimizer.py         | 148 +++++++++++--
 .../processors/test_indexed_name_optimizer.py | 204 ++++++++++++++++--
 2 files changed, 310 insertions(+), 42 deletions(-)

diff --git a/snuba/query/processors/logical/indexed_name_optimizer.py b/snuba/query/processors/logical/indexed_name_optimizer.py
index 2de19c1613c..a9a3e3a3735 100644
--- a/snuba/query/processors/logical/indexed_name_optimizer.py
+++ b/snuba/query/processors/logical/indexed_name_optimizer.py
@@ -4,8 +4,10 @@
 
 from snuba.query.conditions import (
     ConditionFunctions,
+    combine_and_conditions,
     get_first_level_and_conditions,
 )
+from snuba.query.dsl import or_cond
 from snuba.query.expressions import (
     Column,
     Expression,
@@ -25,12 +27,41 @@ class IndexedNameOptimizer(LogicalQueryProcessor):
     ``sentry.op`` for spans and ``sentry.metric.name`` for metrics. The same
     value is still written to the hashed ``attributes_string`` buckets.
 
-    Filtering ``attributes_string['sentry.op']`` (resp.
-    ``attributes_string['sentry.metric.name']``) would otherwise be translated
-    into a bucket map lookup (``arrayElement(attributes_string_N, ...)``) which
-    has no index. This processor rewrites those subscriptable accesses into a
-    reference to the indexed ``indexed_name`` column so the bloom filter index
-    can be used.
+    A filter on ``attributes_string['sentry.op']`` (resp.
+    ``attributes_string['sentry.metric.name']``) is otherwise translated into a
+    bucket map lookup (``arrayElement(attributes_string_N, ...)``) which has no
+    index. This processor rewrites such a filter into an ``OR`` that also probes
+    the indexed ``indexed_name`` column, so the bloom filter index can prune
+    granules::
+
+        equals(arrayElement(attributes_string, 'sentry.op'), 'db.query')
+        ->
+        or(
+            equals(indexed_name, 'db.query'),
+            equals(arrayElement(attributes_string, 'sentry.op'), 'db.query'),
+        )
+
+    The bucket lookup is intentionally kept as a fallback: ``indexed_name`` was
+    added (with no backfill) by migration ``0057_add_name_column_and_index``, so
+    rows written before that migration — or before the value started being
+    populated — have an empty ``indexed_name`` while still carrying the value in
+    ``attributes_string``. Filtering on ``indexed_name`` alone would silently
+    drop those rows; the ``OR`` keeps the query correct while still letting the
+    index help for the rows that do have it.
+
+    Only the WHERE clause is rewritten (the bloom filter index only helps
+    filtering), and only the positive ``equals`` / ``in`` operators against
+    non-empty string literals are touched — for those the ``OR`` fallback is
+    correctness-preserving (see ``_is_index_safe_value`` for why the empty-string
+    default is excluded). Everything else (negations, guarded forms, comparisons
+    to the column default, SELECT/GROUP BY references) keeps reading the bucket
+    and is left untouched.
+
+    The EAP RPC builder emits the ``arrayElement(attributes_string, key)`` form
+    (see ``snuba.web.rpc.common.common._map_backed_operands``); the equivalent
+    hand-written SnQL ``SubscriptableReference`` form is handled too. This runs
+    before ``HashBucketFunctionTransformer`` so the bucket branch still gets
+    mapped to its hashed bucket column downstream.
 
     The promotion is item-type specific, so the rewrite is only applied when the
     query is unambiguously scoped (via an ``item_type`` equality condition) to
@@ -47,6 +78,10 @@ class IndexedNameOptimizer(LogicalQueryProcessor):
     INDEXED_NAME_COLUMN = "indexed_name"
     ATTRIBUTES_STRING_COLUMN = "attributes_string"
 
+    # Operators whose granule pruning the ``indexed_name`` bloom filter can serve
+    # and for which an OR fallback to the bucket lookup is correctness-preserving.
+    OPTIMIZABLE_OPS = frozenset({ConditionFunctions.EQ, ConditionFunctions.IN})
+
     def _indexed_name_key(self, query: Query) -> Optional[str]:
         """Return the attribute name promoted into ``indexed_name`` for this
         query, or ``None`` if the query is not unambiguously scoped to a single
@@ -72,23 +107,94 @@ def _indexed_name_key(self, query: Query) -> Optional[str]:
             return None
         return self.INDEXED_NAME_KEY_BY_ITEM_TYPE.get(item_types.pop())
 
+    def _indexed_name_ref(self, exp: Expression, key: str) -> Optional[Column]:
+        """If ``exp`` is an ``attributes_string[key]`` access — either the
+        ``arrayElement(attributes_string, key)`` form emitted by the EAP RPC
+        builder or the hand-written ``SubscriptableReference`` form — return the
+        equivalent reference to the ``indexed_name`` column (carrying over the
+        table name). Otherwise return ``None``."""
+        if (
+            isinstance(exp, FunctionCall)
+            and exp.function_name == "arrayElement"
+            and len(exp.parameters) == 2
+            and isinstance(exp.parameters[0], Column)
+            and exp.parameters[0].column_name == self.ATTRIBUTES_STRING_COLUMN
+            and isinstance(exp.parameters[1], Literal)
+            and exp.parameters[1].value == key
+        ):
+            return Column(None, exp.parameters[0].table_name, self.INDEXED_NAME_COLUMN)
+
+        if (
+            isinstance(exp, SubscriptableReference)
+            and exp.column.column_name == self.ATTRIBUTES_STRING_COLUMN
+            and isinstance(exp.key, Literal)
+            and exp.key.value == key
+        ):
+            return Column(None, exp.column.table_name, self.INDEXED_NAME_COLUMN)
+
+        return None
+
+    def _is_index_safe_value(self, function_name: str, rhs: Expression) -> bool:
+        """An empty string is the ``String`` column default, which non-backfilled
+        rows (empty ``indexed_name``) also read as — so ``indexed_name = ''`` would
+        match every such row regardless of its real value. Only probe
+        ``indexed_name`` for non-empty string literals, where a match means the
+        value was actually populated; the bucket fallback still covers the rest.
+        This mirrors ``_comparison_can_match_column_default`` on the RPC side: the
+        cases it guards are exactly the ones we must not push onto the index."""
+
+        def safe_literal(exp: Expression) -> bool:
+            return isinstance(exp, Literal) and isinstance(exp.value, str) and exp.value != ""
+
+        if function_name == ConditionFunctions.EQ:
+            return safe_literal(rhs)
+        # IN: the right-hand side is an ``array(...)`` of literals.
+        if function_name == ConditionFunctions.IN:
+            return (
+                isinstance(rhs, FunctionCall)
+                and rhs.function_name == "array"
+                and len(rhs.parameters) > 0
+                and all(safe_literal(p) for p in rhs.parameters)
+            )
+        return False
+
+    def _maybe_rewrite_conjunct(self, conjunct: Expression, key: str) -> Expression:
+        """Rewrite a top-level ``equals``/``in`` filter on the indexed attribute
+        into ``or(<op on indexed_name>, <original op on the bucket>)``. Any other
+        conjunct is returned unchanged."""
+        if (
+            not isinstance(conjunct, FunctionCall)
+            or conjunct.function_name not in self.OPTIMIZABLE_OPS
+            or len(conjunct.parameters) != 2
+        ):
+            return conjunct
+
+        indexed_ref = self._indexed_name_ref(conjunct.parameters[0], key)
+        if indexed_ref is None:
+            return conjunct
+
+        if not self._is_index_safe_value(conjunct.function_name, conjunct.parameters[1]):
+            return conjunct
+
+        indexed_branch = FunctionCall(
+            None,
+            conjunct.function_name,
+            (indexed_ref, conjunct.parameters[1]),
+        )
+        return or_cond(indexed_branch, conjunct)
+
     def process_query(self, query: Query, query_settings: QuerySettings) -> None:
         key = self._indexed_name_key(query)
         if key is None:
             return
 
-        def transform(exp: Expression) -> Expression:
-            if (
-                isinstance(exp, SubscriptableReference)
-                and exp.column.column_name == self.ATTRIBUTES_STRING_COLUMN
-                and isinstance(exp.key, Literal)
-                and exp.key.value == key
-            ):
-                return Column(
-                    alias=exp.alias,
-                    table_name=exp.column.table_name,
-                    column_name=self.INDEXED_NAME_COLUMN,
-                )
-            return exp
-
-        query.transform_expressions(transform)
+        condition = query.get_condition()
+        if condition is None:
+            return
+
+        conjuncts = list(get_first_level_and_conditions(condition))
+        rewritten = [self._maybe_rewrite_conjunct(cond, key) for cond in conjuncts]
+        if rewritten == conjuncts:
+            return
+
+        query.set_ast_condition(combine_and_conditions(rewritten))
diff --git a/tests/query/processors/test_indexed_name_optimizer.py b/tests/query/processors/test_indexed_name_optimizer.py
index 79d4745e99f..2dd9a074ade 100644
--- a/tests/query/processors/test_indexed_name_optimizer.py
+++ b/tests/query/processors/test_indexed_name_optimizer.py
@@ -7,10 +7,20 @@
 from snuba.datasets.entities.entity_key import EntityKey
 from snuba.query import SelectedExpression
 from snuba.query.data_source.simple import Entity as QueryEntity
-from snuba.query.dsl import and_cond, column, equals, literal
+from snuba.query.dsl import (
+    and_cond,
+    arrayElement,
+    column,
+    equals,
+    in_cond,
+    literal,
+    literals_array,
+    or_cond,
+)
 from snuba.query.expressions import (
     Column,
     Expression,
+    FunctionCall,
     Literal,
     SubscriptableReference,
 )
@@ -27,7 +37,14 @@
 LOG = TraceItemType.TRACE_ITEM_TYPE_LOG
 
 
+def _attr_arr(key: str) -> FunctionCall:
+    """The ``arrayElement(attributes_string, key)`` form the EAP RPC builder
+    emits for a map-backed string filter (see ``_map_backed_operands``)."""
+    return arrayElement(None, column("attributes_string"), literal(key))
+
+
 def _attr_str(key: str, alias: str | None = None) -> SubscriptableReference:
+    """The hand-written SnQL ``attributes_string[key]`` form."""
     return SubscriptableReference(
         alias=alias,
         column=column("attributes_string"),
@@ -47,6 +64,24 @@ def _query(
 
 
 test_data = [
+    pytest.param(
+        _query(
+            and_cond(
+                equals(column("item_type"), literal(SPAN)),
+                equals(_attr_arr("sentry.op"), literal("db.query")),
+            )
+        ),
+        _query(
+            and_cond(
+                equals(column("item_type"), literal(SPAN)),
+                or_cond(
+                    equals(column("indexed_name"), literal("db.query")),
+                    equals(_attr_arr("sentry.op"), literal("db.query")),
+                ),
+            )
+        ),
+        id="span sentry.op (arrayElement) gets an indexed_name OR fallback",
+    ),
     pytest.param(
         _query(
             and_cond(
@@ -57,37 +92,70 @@ def _query(
         _query(
             and_cond(
                 equals(column("item_type"), literal(SPAN)),
-                equals(Column(None, None, "indexed_name"), literal("db.query")),
+                or_cond(
+                    equals(column("indexed_name"), literal("db.query")),
+                    equals(_attr_str("sentry.op"), literal("db.query")),
+                ),
             )
         ),
-        id="span sentry.op rewritten to indexed_name",
+        id="span sentry.op (SubscriptableReference) gets an indexed_name OR fallback",
     ),
     pytest.param(
         _query(
             and_cond(
                 equals(column("item_type"), literal(METRIC)),
-                equals(_attr_str("sentry.metric.name"), literal("my.metric")),
+                equals(_attr_arr("sentry.metric.name"), literal("my.metric")),
             )
         ),
         _query(
             and_cond(
                 equals(column("item_type"), literal(METRIC)),
-                equals(Column(None, None, "indexed_name"), literal("my.metric")),
+                or_cond(
+                    equals(column("indexed_name"), literal("my.metric")),
+                    equals(_attr_arr("sentry.metric.name"), literal("my.metric")),
+                ),
+            )
+        ),
+        id="metric sentry.metric.name gets an indexed_name OR fallback",
+    ),
+    pytest.param(
+        _query(
+            and_cond(
+                equals(column("item_type"), literal(SPAN)),
+                in_cond(
+                    _attr_arr("sentry.op"),
+                    literals_array(None, [literal("db.query"), literal("http.client")]),
+                ),
+            )
+        ),
+        _query(
+            and_cond(
+                equals(column("item_type"), literal(SPAN)),
+                or_cond(
+                    in_cond(
+                        column("indexed_name"),
+                        literals_array(None, [literal("db.query"), literal("http.client")]),
+                    ),
+                    in_cond(
+                        _attr_arr("sentry.op"),
+                        literals_array(None, [literal("db.query"), literal("http.client")]),
+                    ),
+                ),
             )
         ),
-        id="metric sentry.metric.name rewritten to indexed_name",
+        id="span sentry.op IN gets an indexed_name OR fallback",
     ),
     pytest.param(
         _query(
             and_cond(
                 equals(column("item_type"), literal(SPAN)),
-                equals(_attr_str("sentry.metric.name"), literal("my.metric")),
+                equals(_attr_arr("sentry.metric.name"), literal("my.metric")),
             )
         ),
         _query(
             and_cond(
                 equals(column("item_type"), literal(SPAN)),
-                equals(_attr_str("sentry.metric.name"), literal("my.metric")),
+                equals(_attr_arr("sentry.metric.name"), literal("my.metric")),
             )
         ),
         id="span query with metric key is left untouched",
@@ -96,37 +164,128 @@ def _query(
         _query(
             and_cond(
                 equals(column("item_type"), literal(SPAN)),
-                equals(_attr_str("foo"), literal("bar")),
+                equals(_attr_arr("foo"), literal("bar")),
             )
         ),
         _query(
             and_cond(
                 equals(column("item_type"), literal(SPAN)),
-                equals(_attr_str("foo"), literal("bar")),
+                equals(_attr_arr("foo"), literal("bar")),
             )
         ),
         id="non-indexed attribute is left untouched",
     ),
     pytest.param(
-        _query(equals(_attr_str("sentry.op"), literal("db.query"))),
-        _query(equals(_attr_str("sentry.op"), literal("db.query"))),
+        _query(equals(_attr_arr("sentry.op"), literal("db.query"))),
+        _query(equals(_attr_arr("sentry.op"), literal("db.query"))),
         id="no item_type condition leaves bucket lookup",
     ),
     pytest.param(
         _query(
             and_cond(
                 equals(column("item_type"), literal(LOG)),
-                equals(_attr_str("sentry.op"), literal("db.query")),
+                equals(_attr_arr("sentry.op"), literal("db.query")),
             )
         ),
         _query(
             and_cond(
                 equals(column("item_type"), literal(LOG)),
-                equals(_attr_str("sentry.op"), literal("db.query")),
+                equals(_attr_arr("sentry.op"), literal("db.query")),
             )
         ),
         id="unsupported item_type leaves bucket lookup",
     ),
+    pytest.param(
+        _query(
+            and_cond(
+                equals(column("item_type"), literal(SPAN)),
+                FunctionCall(
+                    None,
+                    "notEquals",
+                    (_attr_arr("sentry.op"), literal("db.query")),
+                ),
+            )
+        ),
+        _query(
+            and_cond(
+                equals(column("item_type"), literal(SPAN)),
+                FunctionCall(
+                    None,
+                    "notEquals",
+                    (_attr_arr("sentry.op"), literal("db.query")),
+                ),
+            )
+        ),
+        id="negated filter is left untouched (OR fallback would be incorrect)",
+    ),
+    pytest.param(
+        _query(
+            and_cond(
+                equals(column("item_type"), literal(SPAN)),
+                equals(_attr_arr("sentry.op"), literal("")),
+            )
+        ),
+        _query(
+            and_cond(
+                equals(column("item_type"), literal(SPAN)),
+                equals(_attr_arr("sentry.op"), literal("")),
+            )
+        ),
+        id="empty-string equals is left untouched (indexed_name='' over-matches)",
+    ),
+    pytest.param(
+        _query(
+            and_cond(
+                equals(column("item_type"), literal(SPAN)),
+                in_cond(
+                    _attr_arr("sentry.op"),
+                    literals_array(None, [literal("db.query"), literal("")]),
+                ),
+            )
+        ),
+        _query(
+            and_cond(
+                equals(column("item_type"), literal(SPAN)),
+                in_cond(
+                    _attr_arr("sentry.op"),
+                    literals_array(None, [literal("db.query"), literal("")]),
+                ),
+            )
+        ),
+        id="IN containing the empty string is left untouched",
+    ),
+    pytest.param(
+        # The guarded form the RPC emits when the value is the column default:
+        # and(mapContains(...), equals(arrayElement(...), '')). The empty value
+        # makes the indexed_name probe unsafe, so the whole thing is left alone.
+        _query(
+            and_cond(
+                equals(column("item_type"), literal(SPAN)),
+                and_cond(
+                    FunctionCall(
+                        None,
+                        "mapContains",
+                        (column("attributes_string"), literal("sentry.op")),
+                    ),
+                    equals(_attr_arr("sentry.op"), literal("")),
+                ),
+            )
+        ),
+        _query(
+            and_cond(
+                equals(column("item_type"), literal(SPAN)),
+                and_cond(
+                    FunctionCall(
+                        None,
+                        "mapContains",
+                        (column("attributes_string"), literal("sentry.op")),
+                    ),
+                    equals(_attr_arr("sentry.op"), literal("")),
+                ),
+            )
+        ),
+        id="guarded empty-value form is left untouched",
+    ),
     pytest.param(
         _query(
             condition=equals(column("item_type"), literal(SPAN)),
@@ -137,10 +296,10 @@ def _query(
         _query(
             condition=equals(column("item_type"), literal(SPAN)),
             selected_columns=[
-                SelectedExpression("op", Column("op", None, "indexed_name")),
+                SelectedExpression("op", _attr_str("sentry.op", alias="op")),
             ],
         ),
-        id="select preserves alias on rewrite",
+        id="SELECT references are left on the bucket (index only helps WHERE)",
     ),
 ]
 
@@ -159,18 +318,21 @@ def test_contradictory_item_types_left_untouched() -> None:
             equals(column("item_type"), literal(SPAN)),
             and_cond(
                 equals(column("item_type"), literal(METRIC)),
-                equals(_attr_str("sentry.op"), literal("db.query")),
+                equals(_attr_arr("sentry.op"), literal("db.query")),
             ),
         )
     )
     copy = deepcopy(query)
     IndexedNameOptimizer().process_query(copy, HTTPQuerySettings())
-    # ambiguous item_type -> no rewrite; the subscriptable access survives
+    # ambiguous item_type -> no rewrite; no indexed_name reference is introduced.
     condition = copy.get_condition()
     assert condition is not None
+    assert not any(isinstance(e, Column) and e.column_name == "indexed_name" for e in condition)
+    # the original bucket lookup survives untouched.
     assert any(
-        isinstance(e, SubscriptableReference)
-        and isinstance(e.key, Literal)
-        and e.key.value == "sentry.op"
+        isinstance(e, FunctionCall)
+        and e.function_name == "arrayElement"
+        and isinstance(e.parameters[1], Literal)
+        and e.parameters[1].value == "sentry.op"
         for e in condition
     )

From b535ee4d6d4eb7e0c9e10ebe26b329c8601da59a Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Fri, 19 Jun 2026 04:38:47 +0000
Subject: [PATCH 6/8] ref(eap-items): gate indexed_name rewrite behind a config
 flag, drop the fallback

Replace the OR bucket fallback with a direct index-only rewrite
(attributes_string[key] -> indexed_name), gated behind the
eap_items_use_indexed_name runtime flag (default off). The fallback forced
ClickHouse to scan every granule (an OR with the unindexed bucket branch is
never prunable), so it negated the bloom filter index it was meant to use.

Index-only filtering is only correct once indexed_name is fully populated
across the retention window (migration 0057 adds the column without a
backfill), so the flag must stay off until then; flipping it on lets the
bloom filter index serve sentry.op / sentry.metric.name filters. The rewrite
still matches the real arrayElement(attributes_string, key) shape emitted by
the RPC builder (and the SnQL SubscriptableReference form).

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
Claude-Session: https://claude.ai/code/session_01Hax3wseAtUEKY7GfpcJNTx
---
 .../logical/indexed_name_optimizer.py         | 125 ++++---------
 .../processors/test_indexed_name_optimizer.py | 169 ++++++------------
 2 files changed, 81 insertions(+), 213 deletions(-)

diff --git a/snuba/query/processors/logical/indexed_name_optimizer.py b/snuba/query/processors/logical/indexed_name_optimizer.py
index a9a3e3a3735..9bc44810d17 100644
--- a/snuba/query/processors/logical/indexed_name_optimizer.py
+++ b/snuba/query/processors/logical/indexed_name_optimizer.py
@@ -2,12 +2,11 @@
 
 from sentry_protos.snuba.v1.request_common_pb2 import TraceItemType
 
+from snuba import state
 from snuba.query.conditions import (
     ConditionFunctions,
-    combine_and_conditions,
     get_first_level_and_conditions,
 )
-from snuba.query.dsl import or_cond
 from snuba.query.expressions import (
     Column,
     Expression,
@@ -30,38 +29,26 @@ class IndexedNameOptimizer(LogicalQueryProcessor):
     A filter on ``attributes_string['sentry.op']`` (resp.
     ``attributes_string['sentry.metric.name']``) is otherwise translated into a
     bucket map lookup (``arrayElement(attributes_string_N, ...)``) which has no
-    index. This processor rewrites such a filter into an ``OR`` that also probes
-    the indexed ``indexed_name`` column, so the bloom filter index can prune
-    granules::
+    index. This processor rewrites that access into a reference to the indexed
+    ``indexed_name`` column so the bloom filter index can be used::
 
         equals(arrayElement(attributes_string, 'sentry.op'), 'db.query')
-        ->
-        or(
-            equals(indexed_name, 'db.query'),
-            equals(arrayElement(attributes_string, 'sentry.op'), 'db.query'),
-        )
-
-    The bucket lookup is intentionally kept as a fallback: ``indexed_name`` was
-    added (with no backfill) by migration ``0057_add_name_column_and_index``, so
-    rows written before that migration — or before the value started being
-    populated — have an empty ``indexed_name`` while still carrying the value in
-    ``attributes_string``. Filtering on ``indexed_name`` alone would silently
-    drop those rows; the ``OR`` keeps the query correct while still letting the
-    index help for the rows that do have it.
-
-    Only the WHERE clause is rewritten (the bloom filter index only helps
-    filtering), and only the positive ``equals`` / ``in`` operators against
-    non-empty string literals are touched — for those the ``OR`` fallback is
-    correctness-preserving (see ``_is_index_safe_value`` for why the empty-string
-    default is excluded). Everything else (negations, guarded forms, comparisons
-    to the column default, SELECT/GROUP BY references) keeps reading the bucket
-    and is left untouched.
+        -> equals(indexed_name, 'db.query')
 
     The EAP RPC builder emits the ``arrayElement(attributes_string, key)`` form
     (see ``snuba.web.rpc.common.common._map_backed_operands``); the equivalent
     hand-written SnQL ``SubscriptableReference`` form is handled too. This runs
-    before ``HashBucketFunctionTransformer`` so the bucket branch still gets
-    mapped to its hashed bucket column downstream.
+    before ``HashBucketFunctionTransformer``, so untouched accesses still get
+    mapped to their hashed bucket column downstream.
+
+    ``indexed_name`` was added (with no backfill) by migration
+    ``0057_add_name_column_and_index``, so rows written before that migration —
+    or before the value started being populated — have an empty ``indexed_name``
+    while still carrying the value in ``attributes_string``. Reading
+    ``indexed_name`` for those rows would silently drop them, so the rewrite is
+    gated behind the ``CONFIG_KEY`` runtime flag (default off) and must only be
+    enabled once ``indexed_name`` is fully populated for the live retention
+    window.
 
     The promotion is item-type specific, so the rewrite is only applied when the
     query is unambiguously scoped (via an ``item_type`` equality condition) to
@@ -69,6 +56,11 @@ class IndexedNameOptimizer(LogicalQueryProcessor):
     falls back to the bucket lookup.
     """
 
+    # Runtime flag gating the rewrite. Off (0) until ``indexed_name`` has been
+    # backfilled across the retention window; flipping it on lets the bloom
+    # filter index serve ``sentry.op`` / ``sentry.metric.name`` filters.
+    CONFIG_KEY = "eap_items_use_indexed_name"
+
     # item_type proto enum value -> attribute promoted into ``indexed_name``
     INDEXED_NAME_KEY_BY_ITEM_TYPE: dict[int, str] = {
         TraceItemType.TRACE_ITEM_TYPE_SPAN: "sentry.op",
@@ -78,10 +70,6 @@ class IndexedNameOptimizer(LogicalQueryProcessor):
     INDEXED_NAME_COLUMN = "indexed_name"
     ATTRIBUTES_STRING_COLUMN = "attributes_string"
 
-    # Operators whose granule pruning the ``indexed_name`` bloom filter can serve
-    # and for which an OR fallback to the bucket lookup is correctness-preserving.
-    OPTIMIZABLE_OPS = frozenset({ConditionFunctions.EQ, ConditionFunctions.IN})
-
     def _indexed_name_key(self, query: Query) -> Optional[str]:
         """Return the attribute name promoted into ``indexed_name`` for this
         query, or ``None`` if the query is not unambiguously scoped to a single
@@ -111,8 +99,8 @@ def _indexed_name_ref(self, exp: Expression, key: str) -> Optional[Column]:
         """If ``exp`` is an ``attributes_string[key]`` access — either the
         ``arrayElement(attributes_string, key)`` form emitted by the EAP RPC
         builder or the hand-written ``SubscriptableReference`` form — return the
-        equivalent reference to the ``indexed_name`` column (carrying over the
-        table name). Otherwise return ``None``."""
+        equivalent reference to the ``indexed_name`` column (preserving the alias
+        and table name). Otherwise return ``None``."""
         if (
             isinstance(exp, FunctionCall)
             and exp.function_name == "arrayElement"
@@ -122,7 +110,7 @@ def _indexed_name_ref(self, exp: Expression, key: str) -> Optional[Column]:
             and isinstance(exp.parameters[1], Literal)
             and exp.parameters[1].value == key
         ):
-            return Column(None, exp.parameters[0].table_name, self.INDEXED_NAME_COLUMN)
+            return Column(exp.alias, exp.parameters[0].table_name, self.INDEXED_NAME_COLUMN)
 
         if (
             isinstance(exp, SubscriptableReference)
@@ -130,71 +118,20 @@ def _indexed_name_ref(self, exp: Expression, key: str) -> Optional[Column]:
             and isinstance(exp.key, Literal)
             and exp.key.value == key
         ):
-            return Column(None, exp.column.table_name, self.INDEXED_NAME_COLUMN)
+            return Column(exp.alias, exp.column.table_name, self.INDEXED_NAME_COLUMN)
 
         return None
 
-    def _is_index_safe_value(self, function_name: str, rhs: Expression) -> bool:
-        """An empty string is the ``String`` column default, which non-backfilled
-        rows (empty ``indexed_name``) also read as — so ``indexed_name = ''`` would
-        match every such row regardless of its real value. Only probe
-        ``indexed_name`` for non-empty string literals, where a match means the
-        value was actually populated; the bucket fallback still covers the rest.
-        This mirrors ``_comparison_can_match_column_default`` on the RPC side: the
-        cases it guards are exactly the ones we must not push onto the index."""
-
-        def safe_literal(exp: Expression) -> bool:
-            return isinstance(exp, Literal) and isinstance(exp.value, str) and exp.value != ""
-
-        if function_name == ConditionFunctions.EQ:
-            return safe_literal(rhs)
-        # IN: the right-hand side is an ``array(...)`` of literals.
-        if function_name == ConditionFunctions.IN:
-            return (
-                isinstance(rhs, FunctionCall)
-                and rhs.function_name == "array"
-                and len(rhs.parameters) > 0
-                and all(safe_literal(p) for p in rhs.parameters)
-            )
-        return False
-
-    def _maybe_rewrite_conjunct(self, conjunct: Expression, key: str) -> Expression:
-        """Rewrite a top-level ``equals``/``in`` filter on the indexed attribute
-        into ``or(<op on indexed_name>, <original op on the bucket>)``. Any other
-        conjunct is returned unchanged."""
-        if (
-            not isinstance(conjunct, FunctionCall)
-            or conjunct.function_name not in self.OPTIMIZABLE_OPS
-            or len(conjunct.parameters) != 2
-        ):
-            return conjunct
-
-        indexed_ref = self._indexed_name_ref(conjunct.parameters[0], key)
-        if indexed_ref is None:
-            return conjunct
-
-        if not self._is_index_safe_value(conjunct.function_name, conjunct.parameters[1]):
-            return conjunct
-
-        indexed_branch = FunctionCall(
-            None,
-            conjunct.function_name,
-            (indexed_ref, conjunct.parameters[1]),
-        )
-        return or_cond(indexed_branch, conjunct)
-
     def process_query(self, query: Query, query_settings: QuerySettings) -> None:
-        key = self._indexed_name_key(query)
-        if key is None:
+        if state.get_int_config(self.CONFIG_KEY, 0) == 0:
             return
 
-        condition = query.get_condition()
-        if condition is None:
+        key = self._indexed_name_key(query)
+        if key is None:
             return
 
-        conjuncts = list(get_first_level_and_conditions(condition))
-        rewritten = [self._maybe_rewrite_conjunct(cond, key) for cond in conjuncts]
-        if rewritten == conjuncts:
-            return
+        def transform(exp: Expression) -> Expression:
+            indexed_ref = self._indexed_name_ref(exp, key)
+            return indexed_ref if indexed_ref is not None else exp
 
-        query.set_ast_condition(combine_and_conditions(rewritten))
+        query.transform_expressions(transform)
diff --git a/tests/query/processors/test_indexed_name_optimizer.py b/tests/query/processors/test_indexed_name_optimizer.py
index 2dd9a074ade..cf8cea31236 100644
--- a/tests/query/processors/test_indexed_name_optimizer.py
+++ b/tests/query/processors/test_indexed_name_optimizer.py
@@ -15,7 +15,6 @@
     in_cond,
     literal,
     literals_array,
-    or_cond,
 )
 from snuba.query.expressions import (
     Column,
@@ -29,6 +28,7 @@
     IndexedNameOptimizer,
 )
 from snuba.query.query_settings import HTTPQuerySettings
+from snuba.state import set_config
 
 # Use the protobuf enum values directly so the tests stay in sync with the
 # production dependency the optimizer keys off of.
@@ -74,13 +74,10 @@ def _query(
         _query(
             and_cond(
                 equals(column("item_type"), literal(SPAN)),
-                or_cond(
-                    equals(column("indexed_name"), literal("db.query")),
-                    equals(_attr_arr("sentry.op"), literal("db.query")),
-                ),
+                equals(column("indexed_name"), literal("db.query")),
             )
         ),
-        id="span sentry.op (arrayElement) gets an indexed_name OR fallback",
+        id="span sentry.op (arrayElement) rewritten to indexed_name",
     ),
     pytest.param(
         _query(
@@ -92,13 +89,10 @@ def _query(
         _query(
             and_cond(
                 equals(column("item_type"), literal(SPAN)),
-                or_cond(
-                    equals(column("indexed_name"), literal("db.query")),
-                    equals(_attr_str("sentry.op"), literal("db.query")),
-                ),
+                equals(column("indexed_name"), literal("db.query")),
             )
         ),
-        id="span sentry.op (SubscriptableReference) gets an indexed_name OR fallback",
+        id="span sentry.op (SubscriptableReference) rewritten to indexed_name",
     ),
     pytest.param(
         _query(
@@ -110,13 +104,10 @@ def _query(
         _query(
             and_cond(
                 equals(column("item_type"), literal(METRIC)),
-                or_cond(
-                    equals(column("indexed_name"), literal("my.metric")),
-                    equals(_attr_arr("sentry.metric.name"), literal("my.metric")),
-                ),
+                equals(column("indexed_name"), literal("my.metric")),
             )
         ),
-        id="metric sentry.metric.name gets an indexed_name OR fallback",
+        id="metric sentry.metric.name rewritten to indexed_name",
     ),
     pytest.param(
         _query(
@@ -131,19 +122,31 @@ def _query(
         _query(
             and_cond(
                 equals(column("item_type"), literal(SPAN)),
-                or_cond(
-                    in_cond(
-                        column("indexed_name"),
-                        literals_array(None, [literal("db.query"), literal("http.client")]),
-                    ),
-                    in_cond(
-                        _attr_arr("sentry.op"),
-                        literals_array(None, [literal("db.query"), literal("http.client")]),
-                    ),
+                in_cond(
+                    column("indexed_name"),
+                    literals_array(None, [literal("db.query"), literal("http.client")]),
                 ),
             )
         ),
-        id="span sentry.op IN gets an indexed_name OR fallback",
+        id="span sentry.op IN rewritten to indexed_name",
+    ),
+    pytest.param(
+        # The rewrite is a value substitution, so it applies to any operator the
+        # access appears under (the index only helps equals/in, but substituting
+        # is always correct once indexed_name is populated).
+        _query(
+            and_cond(
+                equals(column("item_type"), literal(SPAN)),
+                FunctionCall(None, "notEquals", (_attr_arr("sentry.op"), literal("db.query"))),
+            )
+        ),
+        _query(
+            and_cond(
+                equals(column("item_type"), literal(SPAN)),
+                FunctionCall(None, "notEquals", (column("indexed_name"), literal("db.query"))),
+            )
+        ),
+        id="any operator on the access is rewritten (notEquals)",
     ),
     pytest.param(
         _query(
@@ -195,97 +198,6 @@ def _query(
         ),
         id="unsupported item_type leaves bucket lookup",
     ),
-    pytest.param(
-        _query(
-            and_cond(
-                equals(column("item_type"), literal(SPAN)),
-                FunctionCall(
-                    None,
-                    "notEquals",
-                    (_attr_arr("sentry.op"), literal("db.query")),
-                ),
-            )
-        ),
-        _query(
-            and_cond(
-                equals(column("item_type"), literal(SPAN)),
-                FunctionCall(
-                    None,
-                    "notEquals",
-                    (_attr_arr("sentry.op"), literal("db.query")),
-                ),
-            )
-        ),
-        id="negated filter is left untouched (OR fallback would be incorrect)",
-    ),
-    pytest.param(
-        _query(
-            and_cond(
-                equals(column("item_type"), literal(SPAN)),
-                equals(_attr_arr("sentry.op"), literal("")),
-            )
-        ),
-        _query(
-            and_cond(
-                equals(column("item_type"), literal(SPAN)),
-                equals(_attr_arr("sentry.op"), literal("")),
-            )
-        ),
-        id="empty-string equals is left untouched (indexed_name='' over-matches)",
-    ),
-    pytest.param(
-        _query(
-            and_cond(
-                equals(column("item_type"), literal(SPAN)),
-                in_cond(
-                    _attr_arr("sentry.op"),
-                    literals_array(None, [literal("db.query"), literal("")]),
-                ),
-            )
-        ),
-        _query(
-            and_cond(
-                equals(column("item_type"), literal(SPAN)),
-                in_cond(
-                    _attr_arr("sentry.op"),
-                    literals_array(None, [literal("db.query"), literal("")]),
-                ),
-            )
-        ),
-        id="IN containing the empty string is left untouched",
-    ),
-    pytest.param(
-        # The guarded form the RPC emits when the value is the column default:
-        # and(mapContains(...), equals(arrayElement(...), '')). The empty value
-        # makes the indexed_name probe unsafe, so the whole thing is left alone.
-        _query(
-            and_cond(
-                equals(column("item_type"), literal(SPAN)),
-                and_cond(
-                    FunctionCall(
-                        None,
-                        "mapContains",
-                        (column("attributes_string"), literal("sentry.op")),
-                    ),
-                    equals(_attr_arr("sentry.op"), literal("")),
-                ),
-            )
-        ),
-        _query(
-            and_cond(
-                equals(column("item_type"), literal(SPAN)),
-                and_cond(
-                    FunctionCall(
-                        None,
-                        "mapContains",
-                        (column("attributes_string"), literal("sentry.op")),
-                    ),
-                    equals(_attr_arr("sentry.op"), literal("")),
-                ),
-            )
-        ),
-        id="guarded empty-value form is left untouched",
-    ),
     pytest.param(
         _query(
             condition=equals(column("item_type"), literal(SPAN)),
@@ -296,23 +208,42 @@ def _query(
         _query(
             condition=equals(column("item_type"), literal(SPAN)),
             selected_columns=[
-                SelectedExpression("op", _attr_str("sentry.op", alias="op")),
+                SelectedExpression("op", Column("op", None, "indexed_name")),
             ],
         ),
-        id="SELECT references are left on the bucket (index only helps WHERE)",
+        id="select preserves alias on rewrite",
     ),
 ]
 
 
+@pytest.mark.redis_db
 @pytest.mark.parametrize("pre_format, expected_query", test_data)
 def test_indexed_name_optimizer(pre_format: Query, expected_query: Query) -> None:
+    set_config(IndexedNameOptimizer.CONFIG_KEY, 1)
     copy = deepcopy(pre_format)
     IndexedNameOptimizer().process_query(copy, HTTPQuerySettings())
     assert copy.get_selected_columns() == expected_query.get_selected_columns()
     assert copy.get_condition() == expected_query.get_condition()
 
 
+@pytest.mark.redis_db
+def test_disabled_by_default_leaves_query_untouched() -> None:
+    set_config(IndexedNameOptimizer.CONFIG_KEY, 0)
+    query = _query(
+        and_cond(
+            equals(column("item_type"), literal(SPAN)),
+            equals(_attr_arr("sentry.op"), literal("db.query")),
+        )
+    )
+    copy = deepcopy(query)
+    IndexedNameOptimizer().process_query(copy, HTTPQuerySettings())
+    assert copy.get_condition() == query.get_condition()
+    assert copy.get_selected_columns() == query.get_selected_columns()
+
+
+@pytest.mark.redis_db
 def test_contradictory_item_types_left_untouched() -> None:
+    set_config(IndexedNameOptimizer.CONFIG_KEY, 1)
     query = _query(
         and_cond(
             equals(column("item_type"), literal(SPAN)),

From 0ad79ee954068507deacd480df42756ac1f8b2f2 Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Fri, 19 Jun 2026 04:56:09 +0000
Subject: [PATCH 7/8] fix(eap-items): keep indexed_name key typed as str inside
 the transform closure

mypy widens the narrowed Optional[str] back to Optional[str] inside the nested
transform() closure, failing the pre-commit mypy hook. Bind it to a str-typed
local before the closure.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
Claude-Session: https://claude.ai/code/session_01Hax3wseAtUEKY7GfpcJNTx
---
 snuba/query/processors/logical/indexed_name_optimizer.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/snuba/query/processors/logical/indexed_name_optimizer.py b/snuba/query/processors/logical/indexed_name_optimizer.py
index 9bc44810d17..bec0de541e7 100644
--- a/snuba/query/processors/logical/indexed_name_optimizer.py
+++ b/snuba/query/processors/logical/indexed_name_optimizer.py
@@ -130,8 +130,12 @@ def process_query(self, query: Query, query_settings: QuerySettings) -> None:
         if key is None:
             return
 
+        # Bind to a str-typed local: mypy widens a narrowed enclosing-scope
+        # variable back to its declared Optional[str] inside the nested closure.
+        indexed_key: str = key
+
         def transform(exp: Expression) -> Expression:
-            indexed_ref = self._indexed_name_ref(exp, key)
+            indexed_ref = self._indexed_name_ref(exp, indexed_key)
             return indexed_ref if indexed_ref is not None else exp
 
         query.transform_expressions(transform)

From 3f6fe40178f752bc7c92c7f87dfdb229b4bc9a71 Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Fri, 19 Jun 2026 14:45:55 +0000
Subject: [PATCH 8/8] test(eap-items): end-to-end time series test for
 metric-name filter via indexed_name

Add an EndpointTimeSeries test that writes METRIC items and runs a
sentry.metric.name-filtered SUM time series with the IndexedNameOptimizer flag
enabled. Asserts the name filter still returns the correct rows once it's
rewritten onto the indexed_name column, and that results are identical with the
flag off (bucket lookup) and on (indexed_name).

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
Claude-Session: https://claude.ai/code/session_01Hax3wseAtUEKY7GfpcJNTx
---
 .../test_endpoint_time_series_indexed_name.py | 130 ++++++++++++++++++
 1 file changed, 130 insertions(+)
 create mode 100644 tests/web/rpc/v1/test_endpoint_time_series/test_endpoint_time_series_indexed_name.py

diff --git a/tests/web/rpc/v1/test_endpoint_time_series/test_endpoint_time_series_indexed_name.py b/tests/web/rpc/v1/test_endpoint_time_series/test_endpoint_time_series_indexed_name.py
new file mode 100644
index 00000000000..8ed925873f8
--- /dev/null
+++ b/tests/web/rpc/v1/test_endpoint_time_series/test_endpoint_time_series_indexed_name.py
@@ -0,0 +1,130 @@
+from datetime import UTC, datetime, timedelta
+
+import pytest
+from google.protobuf.timestamp_pb2 import Timestamp
+from sentry_protos.snuba.v1.endpoint_time_series_pb2 import (
+    TimeSeriesRequest,
+    TimeSeriesResponse,
+)
+from sentry_protos.snuba.v1.request_common_pb2 import RequestMeta, TraceItemType
+from sentry_protos.snuba.v1.trace_item_attribute_pb2 import (
+    AttributeAggregation,
+    AttributeKey,
+    AttributeValue,
+    ExtrapolationMode,
+    Function,
+)
+from sentry_protos.snuba.v1.trace_item_filter_pb2 import (
+    ComparisonFilter,
+    TraceItemFilter,
+)
+from sentry_protos.snuba.v1.trace_item_pb2 import AnyValue
+
+from snuba.datasets.storages.factory import get_storage
+from snuba.datasets.storages.storage_key import StorageKey
+from snuba.query.processors.logical.indexed_name_optimizer import IndexedNameOptimizer
+from snuba.state import set_config
+from snuba.web.rpc.v1.endpoint_time_series import EndpointTimeSeries
+from tests.base import BaseApiTest
+from tests.helpers import write_raw_unprocessed_events
+from tests.web.rpc.v1.test_utils import gen_item_message
+
+BASE_TIME = datetime.utcnow().replace(
+    hour=8, minute=0, second=0, microsecond=0, tzinfo=UTC
+) - timedelta(hours=24)
+
+GRANULARITY_SECS = 300
+QUERY_DURATION = 3600
+
+# A matching metric is written this many times; each carries value=1.0, so a SUM
+# over the matching items totals MATCHING_COUNT.
+MATCHING_COUNT = 6
+NON_MATCHING_COUNT = 4
+
+
+def _store_metrics() -> None:
+    """Write METRIC items: ``MATCHING_COUNT`` named ``my.metric`` plus
+    ``NON_MATCHING_COUNT`` named ``other.metric``. The rust processor promotes
+    ``sentry.metric.name`` into the ``indexed_name`` column (see
+    ``rust_snuba/src/processors/eap_items.rs``), which is what the optimizer
+    rewrites the name filter onto."""
+    messages: list[bytes] = []
+    for name, count in (("my.metric", MATCHING_COUNT), ("other.metric", NON_MATCHING_COUNT)):
+        for _ in range(count):
+            messages.append(
+                gen_item_message(
+                    start_timestamp=BASE_TIME + timedelta(seconds=60),
+                    type=TraceItemType.TRACE_ITEM_TYPE_METRIC,
+                    remove_default_attributes=True,
+                    attributes={
+                        "sentry.metric.name": AnyValue(string_value=name),
+                        "value": AnyValue(double_value=1.0),
+                    },
+                )
+            )
+    write_raw_unprocessed_events(get_storage(StorageKey("eap_items")), messages)  # type: ignore
+
+
+def _request() -> TimeSeriesRequest:
+    """SUM(value) over metrics named ``my.metric``. The name filter is what the
+    IndexedNameOptimizer rewrites onto the indexed_name column for metrics."""
+    return TimeSeriesRequest(
+        meta=RequestMeta(
+            project_ids=[1],
+            organization_id=1,
+            cogs_category="something",
+            referrer="something",
+            start_timestamp=Timestamp(seconds=int(BASE_TIME.timestamp())),
+            end_timestamp=Timestamp(seconds=int(BASE_TIME.timestamp()) + QUERY_DURATION),
+            trace_item_type=TraceItemType.TRACE_ITEM_TYPE_METRIC,
+        ),
+        filter=TraceItemFilter(
+            comparison_filter=ComparisonFilter(
+                key=AttributeKey(type=AttributeKey.TYPE_STRING, name="sentry.metric.name"),
+                op=ComparisonFilter.OP_EQUALS,
+                value=AttributeValue(val_str="my.metric"),
+            )
+        ),
+        aggregations=[
+            AttributeAggregation(
+                aggregate=Function.FUNCTION_SUM,
+                key=AttributeKey(type=AttributeKey.TYPE_FLOAT, name="value"),
+                label="sum",
+                extrapolation_mode=ExtrapolationMode.EXTRAPOLATION_MODE_NONE,
+            ),
+        ],
+        granularity_secs=GRANULARITY_SECS,
+    )
+
+
+def _total(response: TimeSeriesResponse) -> float:
+    return float(sum(dp.data for ts in response.result_timeseries for dp in ts.data_points))
+
+
+@pytest.mark.eap
+@pytest.mark.redis_db
+class TestTimeSeriesIndexedName(BaseApiTest):
+    def test_metric_name_filter_with_indexed_name_enabled(self) -> None:
+        """A metric-name-filtered time series returns the right rows when the
+        optimizer rewrites the name filter onto the indexed_name column."""
+        _store_metrics()
+        set_config(IndexedNameOptimizer.CONFIG_KEY, 1)
+
+        response = EndpointTimeSeries().execute(_request())
+
+        # Only the my.metric items (value=1.0 each) match; other.metric is excluded.
+        assert _total(response) == float(MATCHING_COUNT)
+
+    def test_indexed_name_rewrite_is_result_preserving(self) -> None:
+        """The rewrite must not change results: the same request returns the same
+        time series with the flag off (bucket lookup) and on (indexed_name)."""
+        _store_metrics()
+
+        set_config(IndexedNameOptimizer.CONFIG_KEY, 0)
+        disabled = EndpointTimeSeries().execute(_request())
+
+        set_config(IndexedNameOptimizer.CONFIG_KEY, 1)
+        enabled = EndpointTimeSeries().execute(_request())
+
+        assert _total(disabled) == float(MATCHING_COUNT)
+        assert list(enabled.result_timeseries) == list(disabled.result_timeseries)