From 047522065ee438fcdad6bb6590a8dba0d8941f0d Mon Sep 17 00:00:00 2001 From: Clemens Portele Date: Sat, 27 Jun 2026 16:12:07 +0200 Subject: [PATCH] improve SQL queries for predicates on main-table columns A filter predicate on a scalar column of the main feature table was always encoded as a self-semi-join (A.id IN (SELECT A.id FROM
WHERE ...)). That subquery scans the whole table and the planner does not flatten it, so its cost grew with the table size rather than with the number of selected features. Such predicates are now emitted as a direct conjunct on the main alias. The semi-join form is kept whenever evaluating the predicate requires a join, junction, or array traversal, where it is load-bearing for cardinality. This also fixes a latent bug in the temporal interval (OVERLAPS) encoding, which hardcoded a trailing parenthesis to close the self-semi-join subquery; it now preserves the operand expression's suffix so both the inline and the subquery forms stay balanced. A negated inline predicate is wrapped as NOT (...). Updates the golden SQL specs and query fixtures accordingly. --- .../features/sql/app/FilterEncoderSql.java | 49 ++++++++++-- .../FilterEncoderSqlInResultSetSpec.groovy | 16 ++-- .../sql/app/FilterEncoderSqlSpec.groovy | 80 +++++++++---------- .../sql/app/SqlQueryTemplatesFixtures.groovy | 10 +-- .../sql-queries/self_joins_filter.yml | 6 +- .../resources/sql-queries/simple_filter.yml | 2 +- .../sql-queries/simple_filter_scopes.yml | 2 +- .../simple_filter_scopes_filter.yml | 2 +- 8 files changed, 102 insertions(+), 65 deletions(-) diff --git a/xtraplatform-features-sql/src/main/java/de/ii/xtraplatform/features/sql/app/FilterEncoderSql.java b/xtraplatform-features-sql/src/main/java/de/ii/xtraplatform/features/sql/app/FilterEncoderSql.java index 016749522..054b2e246 100644 --- a/xtraplatform-features-sql/src/main/java/de/ii/xtraplatform/features/sql/app/FilterEncoderSql.java +++ b/xtraplatform-features-sql/src/main/java/de/ii/xtraplatform/features/sql/app/FilterEncoderSql.java @@ -768,6 +768,20 @@ public String visit(Property property, List children) { FilterEncoderSql.this); if (!join.isEmpty()) join += " "; + // When the predicate needs no sub-table join, its operand is a column reachable directly from + // the main table (aliased A). Emit it as a direct conjunct instead of a redundant + // self-semi-join (A.id IN (SELECT A.id FROM
WHERE ...)): that subquery scans the whole + // main table and the planner does not flatten it, so it is O(table) per predicate regardless + // of how few rows are selected. The semi-join form is kept whenever a join, junction, or + // array + // traversal is genuinely required (join non-empty) — there it is load-bearing for + // cardinality. + if (join.isEmpty() && !Objects.equals(table.getParentPath(), ImmutableList.of("_route_"))) { + return String.format( + "%%1$s%1$s%%2$s", + getQualifiedColumn(table, propertyName, "A", allowColumnFallback).first()); + } + return String.format( "A.%3$s IN (SELECT %2$s.%3$s FROM %1$s %2$s %4$sWHERE %%1$s%5$s%%2$s)", rootSchema.getName(), @@ -832,7 +846,8 @@ public String visit(de.ii.xtraplatform.cql.domain.Interval interval, List children) { FilterEncoderSql.this); if (!join.isEmpty()) join += " "; + // When the predicate needs no sub-table join, its operand is a column reachable directly from + // the main table (aliased A). Emit it as a direct conjunct instead of a redundant + // self-semi-join (A.id IN (SELECT A.id FROM
WHERE ...)): that subquery scans the whole + // main table and the planner does not flatten it, so it is O(table) per predicate regardless + // of how few rows are selected. The semi-join form is kept whenever a join, junction, or + // array + // traversal is genuinely required (join non-empty) — there it is load-bearing for + // cardinality. + if (join.isEmpty() + && !Objects.equals(table.first().getParentPath(), ImmutableList.of("_route_"))) { + return String.format( + "%%1$s%1$s%%2$s", + getQualifiedColumn( + table.first(), table.second(), propertyName, "A", allowColumnFallback) + .first()); + } + return String.format( "A.%3$s IN (SELECT %2$s.%3$s FROM %1$s %2$s %4$sWHERE %%1$s%5$s%%2$s)", mapping.getMainTable().getName(), @@ -1840,7 +1874,8 @@ public String visit(de.ii.xtraplatform.cql.domain.Interval interval, List SELF_JOINS_FILTER = [ - "SELECT A.id AS SKEY, A.oid FROM building A WHERE (A.id IN (SELECT AA.id FROM building AA WHERE AA.oid > 1)) ORDER BY 1", - "SELECT A.id AS SKEY, B.id AS SKEY_1, B.id FROM building A JOIN building B ON (A.id=B.fk_buildingpart_parent AND (B.id > 100)) WHERE (A.id IN (SELECT AA.id FROM building AA WHERE AA.oid > 1)) ORDER BY 1,2", - "SELECT A.id AS SKEY, B.id AS SKEY_1, B.id FROM building A JOIN building B ON (A.fk_buildingpart_parent=B.id AND (B.id > 1000)) WHERE (A.id IN (SELECT AA.id FROM building AA WHERE AA.oid > 1)) ORDER BY 1,2" + "SELECT A.id AS SKEY, A.oid FROM building A WHERE (A.oid > 1) ORDER BY 1", + "SELECT A.id AS SKEY, B.id AS SKEY_1, B.id FROM building A JOIN building B ON (A.id=B.fk_buildingpart_parent AND (B.id > 100)) WHERE (A.oid > 1) ORDER BY 1,2", + "SELECT A.id AS SKEY, B.id AS SKEY_1, B.id FROM building A JOIN building B ON (A.fk_buildingpart_parent=B.id AND (B.id > 1000)) WHERE (A.oid > 1) ORDER BY 1,2" ] static List SELF_JOIN_NESTED_DUPLICATE = [ diff --git a/xtraplatform-features-sql/src/test/resources/sql-queries/self_joins_filter.yml b/xtraplatform-features-sql/src/test/resources/sql-queries/self_joins_filter.yml index 9754d6d0d..f245dd3f8 100644 --- a/xtraplatform-features-sql/src/test/resources/sql-queries/self_joins_filter.yml +++ b/xtraplatform-features-sql/src/test/resources/sql-queries/self_joins_filter.yml @@ -1,17 +1,17 @@ - >- SELECT A.id AS SKEY, A.oid FROM building A - WHERE (A.id IN (SELECT AA.id FROM building AA WHERE AA.oid > 1)) + WHERE (A.oid > 1) ORDER BY 1 - >- SELECT A.id AS SKEY, B.id AS SKEY_1, B.id FROM building A JOIN building B ON (A.id=B.fk_buildingpart_parent AND (B.id > 100)) - WHERE (A.id IN (SELECT AA.id FROM building AA WHERE AA.oid > 1)) + WHERE (A.oid > 1) ORDER BY 1,2 - >- SELECT A.id AS SKEY, B.id AS SKEY_1, B.id FROM building A JOIN building B ON (A.fk_buildingpart_parent=B.id AND (B.id > 1000)) - WHERE (A.id IN (SELECT AA.id FROM building AA WHERE AA.oid > 1)) + WHERE (A.oid > 1) ORDER BY 1,2 \ No newline at end of file diff --git a/xtraplatform-features-sql/src/test/resources/sql-queries/simple_filter.yml b/xtraplatform-features-sql/src/test/resources/sql-queries/simple_filter.yml index d14081e56..f3782f1e1 100644 --- a/xtraplatform-features-sql/src/test/resources/sql-queries/simple_filter.yml +++ b/xtraplatform-features-sql/src/test/resources/sql-queries/simple_filter.yml @@ -1,5 +1,5 @@ - >- SELECT A.id AS SKEY, A.id FROM externalprovider A - WHERE (A.id IN (SELECT AA.id FROM externalprovider AA WHERE AA.type = 1)) + WHERE (A.type = 1) ORDER BY 1 \ No newline at end of file diff --git a/xtraplatform-features-sql/src/test/resources/sql-queries/simple_filter_scopes.yml b/xtraplatform-features-sql/src/test/resources/sql-queries/simple_filter_scopes.yml index f0cfdadd0..92c2d655d 100644 --- a/xtraplatform-features-sql/src/test/resources/sql-queries/simple_filter_scopes.yml +++ b/xtraplatform-features-sql/src/test/resources/sql-queries/simple_filter_scopes.yml @@ -1,5 +1,5 @@ - >- SELECT A.id AS SKEY, A.id FROM coretable A - WHERE (A.id IN (SELECT AA.id FROM coretable AA WHERE AA.featuretype = 'BP_BaugebietsTeilFlaeche')) + WHERE (A.featuretype = 'BP_BaugebietsTeilFlaeche') ORDER BY 1 diff --git a/xtraplatform-features-sql/src/test/resources/sql-queries/simple_filter_scopes_filter.yml b/xtraplatform-features-sql/src/test/resources/sql-queries/simple_filter_scopes_filter.yml index cc6ba1724..ebef85882 100644 --- a/xtraplatform-features-sql/src/test/resources/sql-queries/simple_filter_scopes_filter.yml +++ b/xtraplatform-features-sql/src/test/resources/sql-queries/simple_filter_scopes_filter.yml @@ -1,5 +1,5 @@ - >- SELECT A.id AS SKEY, A.id FROM coretable A - WHERE ((A.id IN (SELECT AA.id FROM coretable AA WHERE AA.featuretype = 'BP_BaugebietsTeilFlaeche') AND A.id IN (SELECT AA.id FROM coretable AA JOIN refs AB ON (AA.id=AB.related_id AND (AB.rel = 'texte')) WHERE AB.base_id = 'foo'))) + WHERE ((A.featuretype = 'BP_BaugebietsTeilFlaeche' AND A.id IN (SELECT AA.id FROM coretable AA JOIN refs AB ON (AA.id=AB.related_id AND (AB.rel = 'texte')) WHERE AB.base_id = 'foo'))) ORDER BY 1