apache · wombatu-kun · Jun 18, 2026 · Jun 14, 2026 · Jun 15, 2026 · Jun 15, 2026
diff --git a/hudi-trino-plugin/src/test/java/io/trino/plugin/hudi/TestHudiAlluxioCacheFileOperations.java b/hudi-trino-plugin/src/test/java/io/trino/plugin/hudi/TestHudiAlluxioCacheFileOperations.java
@@ -17,6 +17,7 @@
 import com.google.common.collect.ImmutableMap;
 import com.google.common.collect.ImmutableMultiset;
 import com.google.common.collect.Multiset;
+import io.airlift.units.Duration;
 import io.trino.plugin.hudi.testing.ResourceHudiTablesInitializer;
 import io.trino.plugin.hudi.util.FileOperationUtils.FileOperation;
 import io.trino.testing.AbstractTestQueryFramework;
@@ -36,15 +37,16 @@
 import static com.google.common.io.RecursiveDeleteOption.ALLOW_INSECURE;
 import static io.trino.filesystem.tracing.CacheFileSystemTraceUtils.getCacheOperationSpans;
 import static io.trino.plugin.hudi.testing.ResourceHudiTablesInitializer.TestingTable.HUDI_MULTI_FG_PT_V8_MOR;
-import static io.trino.plugin.hudi.util.FileOperationUtils.FileType.DATA;
 import static io.trino.plugin.hudi.util.FileOperationUtils.FileType.INDEX_DEFINITION;
 import static io.trino.plugin.hudi.util.FileOperationUtils.FileType.LOG;
 import static io.trino.plugin.hudi.util.FileOperationUtils.FileType.METADATA_TABLE;
 import static io.trino.plugin.hudi.util.FileOperationUtils.FileType.METADATA_TABLE_PROPERTIES;
 import static io.trino.plugin.hudi.util.FileOperationUtils.FileType.TABLE_PROPERTIES;
 import static io.trino.plugin.hudi.util.FileOperationUtils.FileType.TIMELINE;
 import static io.trino.testing.MultisetAssertions.assertMultisetsEqual;
+import static io.trino.testing.assertions.Assert.assertEventually;
 import static java.util.stream.Collectors.toCollection;
+import static org.assertj.core.api.Assertions.assertThat;
 
 @ResourceLock("HUDI_CACHE_SYSTEM")
 @Execution(ExecutionMode.SAME_THREAD)
@@ -66,10 +68,11 @@ protected DistributedQueryRunner createQueryRunner()
                 .put("fs.cache.directories", cacheDirectory.toAbsolutePath().toString())
                 .put("fs.cache.max-sizes", "100MB")
                 .put("hudi.metadata.cache.enabled", "false")
-                // Disable async table-statistics refresh: it reads the metadata table on a
-                // background executor whose spans can outlive the query and leak into the next
-                // test's measurement (the symmetric off-by-N flake). Disabling it makes the
-                // file-operation counts deterministic right after the query returns.
+                // Disable the async table-statistics refresh: on the first query it reads the index
+                // definitions and table-property files (and the metadata table) on a background
+                // executor. Those non-metadata-table reads land in the asserted set and their timing
+                // is non-deterministic, so we turn the refresh off and assert only the synchronous
+                // planning-path reads.
                 .put("hudi.table-statistics-enabled", "false")
                 .buildOrThrow();
 
@@ -87,12 +90,6 @@ public void testSelectWithFilter()
         assertFileSystemAccesses(
                 query,
                 ImmutableMultiset.<FileOperation>builder()
-                        .addCopies(new FileOperation("Alluxio.readCached", DATA), 2)
-                        .addCopies(new FileOperation("Alluxio.readCached", METADATA_TABLE), 27)
-                        .addCopies(new FileOperation("Alluxio.readCached", TIMELINE), 4)
-                        .addCopies(new FileOperation("Alluxio.readCached", LOG), 15)
-                        .addCopies(new FileOperation("InputFile.lastModified", METADATA_TABLE), 4)
-                        .addCopies(new FileOperation("InputFile.length", METADATA_TABLE), 10)
                         .addCopies(new FileOperation("InputFile.length", TIMELINE), 2)
                         .addCopies(new FileOperation("InputFile.length", LOG), 1)
                         .addCopies(new FileOperation("InputFile.newStream", INDEX_DEFINITION), 2)
@@ -103,12 +100,6 @@ public void testSelectWithFilter()
         assertFileSystemAccesses(
                 query,
                 ImmutableMultiset.<FileOperation>builder()
-                        .addCopies(new FileOperation("Alluxio.readCached", DATA), 2)
-                        .addCopies(new FileOperation("Alluxio.readCached", METADATA_TABLE), 27)
-                        .addCopies(new FileOperation("Alluxio.readCached", TIMELINE), 4)
-                        .addCopies(new FileOperation("Alluxio.readCached", LOG), 15)
-                        .addCopies(new FileOperation("InputFile.lastModified", METADATA_TABLE), 4)
-                        .addCopies(new FileOperation("InputFile.length", METADATA_TABLE), 10)
                         .addCopies(new FileOperation("InputFile.length", TIMELINE), 2)
                         .addCopies(new FileOperation("InputFile.length", LOG), 1)
                         .addCopies(new FileOperation("InputFile.newStream", INDEX_DEFINITION), 2)
@@ -127,12 +118,6 @@ public void testJoin()
 
         assertFileSystemAccesses(query,
                 ImmutableMultiset.<FileOperation>builder()
-                        .addCopies(new FileOperation("Alluxio.readCached", DATA), 6)
-                        .addCopies(new FileOperation("Alluxio.readCached", METADATA_TABLE), 215)
-                        .addCopies(new FileOperation("Alluxio.readCached", TIMELINE), 8)
-                        .addCopies(new FileOperation("Alluxio.readCached", LOG), 30)
-                        .addCopies(new FileOperation("InputFile.lastModified", METADATA_TABLE), 29)
-                        .addCopies(new FileOperation("InputFile.length", METADATA_TABLE), 69)
                         .addCopies(new FileOperation("InputFile.length", TIMELINE), 4)
                         .addCopies(new FileOperation("InputFile.length", LOG), 2)
                         .addCopies(new FileOperation("InputFile.newStream", INDEX_DEFINITION), 4)
@@ -142,12 +127,6 @@ public void testJoin()
 
         assertFileSystemAccesses(query,
                 ImmutableMultiset.<FileOperation>builder()
-                        .addCopies(new FileOperation("Alluxio.readCached", DATA), 6)
-                        .addCopies(new FileOperation("Alluxio.readCached", METADATA_TABLE), 215)
-                        .addCopies(new FileOperation("Alluxio.readCached", TIMELINE), 8)
-                        .addCopies(new FileOperation("Alluxio.readCached", LOG), 30)
-                        .addCopies(new FileOperation("InputFile.lastModified", METADATA_TABLE), 29)
-                        .addCopies(new FileOperation("InputFile.length", METADATA_TABLE), 69)
                         .addCopies(new FileOperation("InputFile.length", TIMELINE), 4)
                         .addCopies(new FileOperation("InputFile.length", LOG), 2)
                         .addCopies(new FileOperation("InputFile.newStream", INDEX_DEFINITION), 4)
@@ -156,6 +135,40 @@ public void testJoin()
                         .build());
     }
 
+    @Test
+    public void testReadsServedFromAlluxioCache()
+    {
+        // The tests above intentionally do not assert exact Alluxio cache hit/miss counts: the cache
+        // write is asynchronous, so the per-query counts flake (a write from one query can still be in
+        // flight when the next query runs). This test instead gives count-independent coverage that the
+        // Alluxio cache is actually engaged: once the cache is warmed, at least one read is served from
+        // it (an "Alluxio.readCached" span). assertEventually re-runs the query until the asynchronous
+        // cache write has landed and a genuine hit is observed, and fails loudly at the deadline if the
+        // cache never serves a read.
+        @Language("SQL") String query = "SELECT * FROM " + HUDI_MULTI_FG_PT_V8_MOR;
+        DistributedQueryRunner queryRunner = getDistributedQueryRunner();
+
+        // Warm the cache; the page write into Alluxio happens on a background thread.
+        queryRunner.executeWithPlan(queryRunner.getDefaultSession(), query);
+
+        assertEventually(
+                Duration.valueOf("30s"),
+                Duration.valueOf("500ms"),
+                () -> {
+                    queryRunner.executeWithPlan(queryRunner.getDefaultSession(), query);
+                    assertThat(countCachedReads(queryRunner))
+                            .as("Alluxio.readCached spans (cache hits)")
+                            .isGreaterThanOrEqualTo(1);
+                });
+    }
+
+    private static long countCachedReads(QueryRunner queryRunner)
+    {
+        return getCacheOperationSpans(queryRunner).stream()
+                .filter(span -> span.getName().equals("Alluxio.readCached"))
+                .count();
+    }
+
     private void assertFileSystemAccesses(@Language("SQL") String query, Multiset<FileOperation> expectedCacheAccesses)
     {
         DistributedQueryRunner queryRunner = getDistributedQueryRunner();
@@ -169,6 +182,14 @@ public static Multiset<FileOperation> getFileOperations(QueryRunner queryRunner)
                 .stream()
                 .filter(span -> !span.getName().startsWith("InputFile.exists"))
                 .map(FileOperation::create)
+                // Metadata-table reads are issued from Hudi background pools (split loading, partition
+                // listing, table-statistics refresh) whose spans can outlive the synchronous query and
+                // land in the next query's measurement window, so their per-query counts are not
+                // deterministic. Alluxio cache hits/misses (Alluxio.*) depend on whether an earlier
+                // asynchronous cache write had already completed, so their counts are not deterministic
+                // either. Both are excluded; only synchronous foreground reads are asserted.
+                .filter(operation -> operation.fileType() != METADATA_TABLE)
+                .filter(operation -> !operation.operationType().startsWith("Alluxio."))
                 .collect(toCollection(HashMultiset::create));
     }
 }
diff --git a/hudi-trino-plugin/src/test/java/io/trino/plugin/hudi/TestHudiMemoryCacheFileOperations.java b/hudi-trino-plugin/src/test/java/io/trino/plugin/hudi/TestHudiMemoryCacheFileOperations.java
@@ -56,10 +56,11 @@ protected DistributedQueryRunner createQueryRunner()
                 .put("hudi.metadata-enabled", "true")
                 .put("hudi.metadata.cache.enabled", "true")
                 .put("fs.cache.enabled", "false")
-                // Disable async table-statistics refresh: it reads the metadata table on a
-                // background executor whose spans can outlive the query and leak into the next
-                // test's measurement (the symmetric off-by-N flake). Disabling it makes the
-                // file-operation counts deterministic right after the query returns.
+                // Disable the async table-statistics refresh: on the first query it reads the index
+                // definitions and table-property files (and the metadata table) on a background
+                // executor. Those non-metadata-table reads land in the asserted set and their timing
+                // is non-deterministic, so we turn the refresh off and assert only the synchronous
+                // planning-path reads.
                 .put("hudi.table-statistics-enabled", "false")
                 .buildOrThrow();
 
@@ -78,11 +79,8 @@ public void testSelectWithFilter()
                 query,
                 ImmutableMultiset.<FileOperation>builder()
                         .addCopies(new FileOperation("FileSystemCache.cacheInput", DATA), 2)
-                        .addCopies(new FileOperation("FileSystemCache.cacheLength", METADATA_TABLE), 4)
-                        .addCopies(new FileOperation("FileSystemCache.cacheStream", METADATA_TABLE), 6)
                         .addCopies(new FileOperation("FileSystemCache.cacheStream", TIMELINE), 2)
                         .addCopies(new FileOperation("FileSystemCache.cacheStream", LOG), 1)
-                        .addCopies(new FileOperation("InputFile.lastModified", METADATA_TABLE), 4)
                         .addCopies(new FileOperation("InputFile.newStream", INDEX_DEFINITION), 2)
                         .add(new FileOperation("InputFile.newStream", METADATA_TABLE_PROPERTIES))
                         .addCopies(new FileOperation("InputFile.newStream", TABLE_PROPERTIES), 2)
@@ -92,11 +90,8 @@ public void testSelectWithFilter()
                 query,
                 ImmutableMultiset.<FileOperation>builder()
                         .addCopies(new FileOperation("FileSystemCache.cacheInput", DATA), 2)
-                        .addCopies(new FileOperation("FileSystemCache.cacheLength", METADATA_TABLE), 4)
-                        .addCopies(new FileOperation("FileSystemCache.cacheStream", METADATA_TABLE), 6)
                         .addCopies(new FileOperation("FileSystemCache.cacheStream", TIMELINE), 2)
                         .addCopies(new FileOperation("FileSystemCache.cacheStream", LOG), 1)
-                        .addCopies(new FileOperation("InputFile.lastModified", METADATA_TABLE), 4)
                         .addCopies(new FileOperation("InputFile.newStream", INDEX_DEFINITION), 2)
                         .add(new FileOperation("InputFile.newStream", METADATA_TABLE_PROPERTIES))
                         .addCopies(new FileOperation("InputFile.newStream", TABLE_PROPERTIES), 2)
@@ -114,11 +109,8 @@ public void testJoin()
         assertFileSystemAccesses(query,
                 ImmutableMultiset.<FileOperation>builder()
                         .addCopies(new FileOperation("FileSystemCache.cacheInput", DATA), 6)
-                        .addCopies(new FileOperation("FileSystemCache.cacheLength", METADATA_TABLE), 29)
-                        .addCopies(new FileOperation("FileSystemCache.cacheStream", METADATA_TABLE), 40)
                         .addCopies(new FileOperation("FileSystemCache.cacheStream", TIMELINE), 4)
                         .addCopies(new FileOperation("FileSystemCache.cacheStream", LOG), 2)
-                        .addCopies(new FileOperation("InputFile.lastModified", METADATA_TABLE), 29)
                         .addCopies(new FileOperation("InputFile.newStream", INDEX_DEFINITION), 4)
                         .addCopies(new FileOperation("InputFile.newStream", METADATA_TABLE_PROPERTIES), 2)
                         .addCopies(new FileOperation("InputFile.newStream", TABLE_PROPERTIES), 4)
@@ -127,11 +119,8 @@ public void testJoin()
         assertFileSystemAccesses(query,
                 ImmutableMultiset.<FileOperation>builder()
                         .addCopies(new FileOperation("FileSystemCache.cacheInput", DATA), 6)
-                        .addCopies(new FileOperation("FileSystemCache.cacheLength", METADATA_TABLE), 29)
-                        .addCopies(new FileOperation("FileSystemCache.cacheStream", METADATA_TABLE), 40)
                         .addCopies(new FileOperation("FileSystemCache.cacheStream", TIMELINE), 4)
                         .addCopies(new FileOperation("FileSystemCache.cacheStream", LOG), 2)
-                        .addCopies(new FileOperation("InputFile.lastModified", METADATA_TABLE), 29)
                         .addCopies(new FileOperation("InputFile.newStream", INDEX_DEFINITION), 4)
                         .addCopies(new FileOperation("InputFile.newStream", METADATA_TABLE_PROPERTIES), 2)
                         .addCopies(new FileOperation("InputFile.newStream", TABLE_PROPERTIES), 4)
@@ -153,6 +142,11 @@ private static Multiset<FileOperation> getFileOperations(QueryRunner queryRunner
                 .filter(span -> !span.getName().startsWith("InputFile.exists"))
                 .filter(span -> !isTrinoSchemaOrPermissions(getFileLocation(span)))
                 .map(FileOperation::create)
+                // Metadata-table reads are issued from Hudi background pools (split loading, partition
+                // listing, table-statistics refresh) whose spans can outlive the synchronous query and
+                // land in the next query's measurement window. Their per-query counts are therefore
+                // non-deterministic, so they are excluded; only synchronous foreground reads are asserted.
+                .filter(operation -> operation.fileType() != METADATA_TABLE)
                 .collect(toCollection(HashMultiset::create));
     }
 }