From ef563eed651df600d3b527afe221ace7d2c95d75 Mon Sep 17 00:00:00 2001 From: Steven Wu Date: Sat, 23 May 2026 21:23:01 -0700 Subject: [PATCH 1/5] Spark: Trim row-level test parameter rows from 6 to 3 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reduces SparkRowLevelOperationsTestBase parameter rows from 6 to 3 in v4.0 / v4.1 (and from 7 to 3 in v3.5), shifting from "test every catalog backend" to "test the catalogs that matter for production": - testhive (Hive) — kept as the established Hive metastore baseline - testrest (REST) — added in place of testhadoop, since REST is the OSS-strategic catalog and testhadoop isn't recommended for prod - spark_catalog (REST-backed) — repointed from Hive to REST so the SessionCatalog row exercises the REST commit path instead of Hive formatVersion 2 covered by the testhive and testrest rows; formatVersion 3 covered by the spark_catalog/REST row, which exercises the DV (deletion-vector) path that validateSnapshot checks via formatVersion >= 3. The trim affects 9 concrete subclasses (TestCopyOnWriteMerge/Update/Delete, TestMergeOnReadMerge/Update/Delete, both *MergeMetrics, and TestMergeSchemaEvolution), each cutting test invocations by 50% (~57% on Spark 3.5). Note: TestCopyOnWriteWithLineage and TestMergeOnReadWithLineage are unaffected — TestRowLevelOperationsWithLineage redeclares parameters() with its own row set. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../SparkRowLevelOperationsTestBase.java | 80 +++---------------- .../SparkRowLevelOperationsTestBase.java | 68 +++------------- .../SparkRowLevelOperationsTestBase.java | 68 +++------------- 3 files changed, 36 insertions(+), 180 deletions(-) diff --git a/spark/v3.5/spark-extensions/src/test/java/org/apache/iceberg/spark/extensions/SparkRowLevelOperationsTestBase.java b/spark/v3.5/spark-extensions/src/test/java/org/apache/iceberg/spark/extensions/SparkRowLevelOperationsTestBase.java index 893f9931cfa2..dad8b1244e26 100644 --- a/spark/v3.5/spark-extensions/src/test/java/org/apache/iceberg/spark/extensions/SparkRowLevelOperationsTestBase.java +++ b/spark/v3.5/spark-extensions/src/test/java/org/apache/iceberg/spark/extensions/SparkRowLevelOperationsTestBase.java @@ -46,11 +46,10 @@ import java.util.Arrays; import java.util.List; import java.util.Map; -import java.util.Random; import java.util.Set; import java.util.UUID; -import java.util.concurrent.ThreadLocalRandom; import java.util.stream.Collectors; +import org.apache.iceberg.CatalogProperties; import org.apache.iceberg.DataFile; import org.apache.iceberg.FileFormat; import org.apache.iceberg.Files; @@ -83,8 +82,6 @@ @ExtendWith(ParameterizedTestExtension.class) public abstract class SparkRowLevelOperationsTestBase extends ExtensionsTestBase { - private static final Random RANDOM = ThreadLocalRandom.current(); - @Parameter(index = 3) protected FileFormat fileFormat; @@ -128,86 +125,33 @@ public static Object[][] parameters() { 2 }, { - "testhive", - SparkCatalog.class.getName(), - ImmutableMap.of( - "type", "hive", - "default-namespace", "default"), - FileFormat.PARQUET, - true, - WRITE_DISTRIBUTION_MODE_NONE, - false, - "test", - DISTRIBUTED, - 2 - }, - { - "testhadoop", + "testrest", SparkCatalog.class.getName(), - ImmutableMap.of("type", "hadoop"), - FileFormat.PARQUET, - RANDOM.nextBoolean(), - WRITE_DISTRIBUTION_MODE_HASH, - true, - null, - LOCAL, - 2 - }, - { - "spark_catalog", - SparkSessionCatalog.class.getName(), ImmutableMap.of( - "type", "hive", - "default-namespace", "default", - "clients", "1", - "parquet-enabled", "false", + "type", + "rest", + CatalogProperties.URI, + restCatalog.properties().get(CatalogProperties.URI), "cache-enabled", - "false" // Spark will delete tables using v1, leaving the cache out of sync - ), - FileFormat.AVRO, - false, - WRITE_DISTRIBUTION_MODE_RANGE, - false, - "test", - DISTRIBUTED, - 2 - }, - { - "testhadoop", - SparkCatalog.class.getName(), - ImmutableMap.of("type", "hadoop"), + "false"), FileFormat.PARQUET, true, WRITE_DISTRIBUTION_MODE_HASH, - true, - null, - LOCAL, - 3 - }, - { - "testhadoop", - SparkCatalog.class.getName(), - ImmutableMap.of("type", "hadoop"), - FileFormat.PARQUET, false, - WRITE_DISTRIBUTION_MODE_HASH, - true, null, - LOCAL, - 3 + DISTRIBUTED, + 2 }, { "spark_catalog", SparkSessionCatalog.class.getName(), ImmutableMap.of( "type", - "hive", + "rest", + CatalogProperties.URI, + restCatalog.properties().get(CatalogProperties.URI), "default-namespace", "default", - "clients", - "1", - "parquet-enabled", - "false", "cache-enabled", "false" // Spark will delete tables using v1, leaving the cache out of sync ), diff --git a/spark/v4.0/spark-extensions/src/test/java/org/apache/iceberg/spark/extensions/SparkRowLevelOperationsTestBase.java b/spark/v4.0/spark-extensions/src/test/java/org/apache/iceberg/spark/extensions/SparkRowLevelOperationsTestBase.java index b5d641576314..dad8b1244e26 100644 --- a/spark/v4.0/spark-extensions/src/test/java/org/apache/iceberg/spark/extensions/SparkRowLevelOperationsTestBase.java +++ b/spark/v4.0/spark-extensions/src/test/java/org/apache/iceberg/spark/extensions/SparkRowLevelOperationsTestBase.java @@ -46,11 +46,10 @@ import java.util.Arrays; import java.util.List; import java.util.Map; -import java.util.Random; import java.util.Set; import java.util.UUID; -import java.util.concurrent.ThreadLocalRandom; import java.util.stream.Collectors; +import org.apache.iceberg.CatalogProperties; import org.apache.iceberg.DataFile; import org.apache.iceberg.FileFormat; import org.apache.iceberg.Files; @@ -83,8 +82,6 @@ @ExtendWith(ParameterizedTestExtension.class) public abstract class SparkRowLevelOperationsTestBase extends ExtensionsTestBase { - private static final Random RANDOM = ThreadLocalRandom.current(); - @Parameter(index = 3) protected FileFormat fileFormat; @@ -128,74 +125,33 @@ public static Object[][] parameters() { 2 }, { - "testhive", + "testrest", SparkCatalog.class.getName(), ImmutableMap.of( - "type", "hive", - "default-namespace", "default"), + "type", + "rest", + CatalogProperties.URI, + restCatalog.properties().get(CatalogProperties.URI), + "cache-enabled", + "false"), FileFormat.PARQUET, true, - WRITE_DISTRIBUTION_MODE_NONE, - false, - "test", - DISTRIBUTED, - 2 - }, - { - "testhadoop", - SparkCatalog.class.getName(), - ImmutableMap.of("type", "hadoop"), - FileFormat.PARQUET, - RANDOM.nextBoolean(), WRITE_DISTRIBUTION_MODE_HASH, - true, - null, - LOCAL, - 2 - }, - { - "spark_catalog", - SparkSessionCatalog.class.getName(), - ImmutableMap.of( - "type", "hive", - "default-namespace", "default", - "clients", "1", - "parquet-enabled", "false", - "cache-enabled", - "false" // Spark will delete tables using v1, leaving the cache out of sync - ), - FileFormat.AVRO, false, - WRITE_DISTRIBUTION_MODE_RANGE, - false, - "test", + null, DISTRIBUTED, 2 }, - { - "testhadoop", - SparkCatalog.class.getName(), - ImmutableMap.of("type", "hadoop"), - FileFormat.PARQUET, - RANDOM.nextBoolean(), - WRITE_DISTRIBUTION_MODE_HASH, - true, - null, - LOCAL, - 3 - }, { "spark_catalog", SparkSessionCatalog.class.getName(), ImmutableMap.of( "type", - "hive", + "rest", + CatalogProperties.URI, + restCatalog.properties().get(CatalogProperties.URI), "default-namespace", "default", - "clients", - "1", - "parquet-enabled", - "false", "cache-enabled", "false" // Spark will delete tables using v1, leaving the cache out of sync ), diff --git a/spark/v4.1/spark-extensions/src/test/java/org/apache/iceberg/spark/extensions/SparkRowLevelOperationsTestBase.java b/spark/v4.1/spark-extensions/src/test/java/org/apache/iceberg/spark/extensions/SparkRowLevelOperationsTestBase.java index b5d641576314..dad8b1244e26 100644 --- a/spark/v4.1/spark-extensions/src/test/java/org/apache/iceberg/spark/extensions/SparkRowLevelOperationsTestBase.java +++ b/spark/v4.1/spark-extensions/src/test/java/org/apache/iceberg/spark/extensions/SparkRowLevelOperationsTestBase.java @@ -46,11 +46,10 @@ import java.util.Arrays; import java.util.List; import java.util.Map; -import java.util.Random; import java.util.Set; import java.util.UUID; -import java.util.concurrent.ThreadLocalRandom; import java.util.stream.Collectors; +import org.apache.iceberg.CatalogProperties; import org.apache.iceberg.DataFile; import org.apache.iceberg.FileFormat; import org.apache.iceberg.Files; @@ -83,8 +82,6 @@ @ExtendWith(ParameterizedTestExtension.class) public abstract class SparkRowLevelOperationsTestBase extends ExtensionsTestBase { - private static final Random RANDOM = ThreadLocalRandom.current(); - @Parameter(index = 3) protected FileFormat fileFormat; @@ -128,74 +125,33 @@ public static Object[][] parameters() { 2 }, { - "testhive", + "testrest", SparkCatalog.class.getName(), ImmutableMap.of( - "type", "hive", - "default-namespace", "default"), + "type", + "rest", + CatalogProperties.URI, + restCatalog.properties().get(CatalogProperties.URI), + "cache-enabled", + "false"), FileFormat.PARQUET, true, - WRITE_DISTRIBUTION_MODE_NONE, - false, - "test", - DISTRIBUTED, - 2 - }, - { - "testhadoop", - SparkCatalog.class.getName(), - ImmutableMap.of("type", "hadoop"), - FileFormat.PARQUET, - RANDOM.nextBoolean(), WRITE_DISTRIBUTION_MODE_HASH, - true, - null, - LOCAL, - 2 - }, - { - "spark_catalog", - SparkSessionCatalog.class.getName(), - ImmutableMap.of( - "type", "hive", - "default-namespace", "default", - "clients", "1", - "parquet-enabled", "false", - "cache-enabled", - "false" // Spark will delete tables using v1, leaving the cache out of sync - ), - FileFormat.AVRO, false, - WRITE_DISTRIBUTION_MODE_RANGE, - false, - "test", + null, DISTRIBUTED, 2 }, - { - "testhadoop", - SparkCatalog.class.getName(), - ImmutableMap.of("type", "hadoop"), - FileFormat.PARQUET, - RANDOM.nextBoolean(), - WRITE_DISTRIBUTION_MODE_HASH, - true, - null, - LOCAL, - 3 - }, { "spark_catalog", SparkSessionCatalog.class.getName(), ImmutableMap.of( "type", - "hive", + "rest", + CatalogProperties.URI, + restCatalog.properties().get(CatalogProperties.URI), "default-namespace", "default", - "clients", - "1", - "parquet-enabled", - "false", "cache-enabled", "false" // Spark will delete tables using v1, leaving the cache out of sync ), From be47c2ebddb57791c08623c7283b6c273d8ca840 Mon Sep 17 00:00:00 2001 From: Steven Wu Date: Sat, 23 May 2026 21:43:43 -0700 Subject: [PATCH 2/5] Replace non-session REST row with second testhive row Drops the testrest row in favor of restoring testhive as the carrier for the HASH/null/DISTRIBUTED axes that previously sat on testhadoop. REST catalog coverage is preserved by the spark_catalog (REST-backed) row. Rationale: for the row-level operation code paths these tests exercise, a non-session SparkCatalog wrapper around REST adds little beyond what the SessionCatalog wrapper already covers. Both return SparkTable; both commit through the same MergingSnapshotProducer; the differences live in table-resolution paths (DDL/aliasing), not in MERGE/UPDATE/DELETE. Other test classes (TestStructuredStreamingRead3 etc.) already exercise REST as a non-session catalog. Eliminates 2 of the 3 known TestBase.move() / metadata-delete fixture failures from the previous version. The remaining failure (testDeleteWithoutScanningTable on spark_catalog/REST) is a pre-existing TestBase.move() limitation with non-URI paths and needs a follow-up fix. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../extensions/SparkRowLevelOperationsTestBase.java | 10 +++------- .../extensions/SparkRowLevelOperationsTestBase.java | 10 +++------- .../extensions/SparkRowLevelOperationsTestBase.java | 10 +++------- 3 files changed, 9 insertions(+), 21 deletions(-) diff --git a/spark/v3.5/spark-extensions/src/test/java/org/apache/iceberg/spark/extensions/SparkRowLevelOperationsTestBase.java b/spark/v3.5/spark-extensions/src/test/java/org/apache/iceberg/spark/extensions/SparkRowLevelOperationsTestBase.java index dad8b1244e26..72988ae0ed9e 100644 --- a/spark/v3.5/spark-extensions/src/test/java/org/apache/iceberg/spark/extensions/SparkRowLevelOperationsTestBase.java +++ b/spark/v3.5/spark-extensions/src/test/java/org/apache/iceberg/spark/extensions/SparkRowLevelOperationsTestBase.java @@ -125,15 +125,11 @@ public static Object[][] parameters() { 2 }, { - "testrest", + "testhive", SparkCatalog.class.getName(), ImmutableMap.of( - "type", - "rest", - CatalogProperties.URI, - restCatalog.properties().get(CatalogProperties.URI), - "cache-enabled", - "false"), + "type", "hive", + "default-namespace", "default"), FileFormat.PARQUET, true, WRITE_DISTRIBUTION_MODE_HASH, diff --git a/spark/v4.0/spark-extensions/src/test/java/org/apache/iceberg/spark/extensions/SparkRowLevelOperationsTestBase.java b/spark/v4.0/spark-extensions/src/test/java/org/apache/iceberg/spark/extensions/SparkRowLevelOperationsTestBase.java index dad8b1244e26..72988ae0ed9e 100644 --- a/spark/v4.0/spark-extensions/src/test/java/org/apache/iceberg/spark/extensions/SparkRowLevelOperationsTestBase.java +++ b/spark/v4.0/spark-extensions/src/test/java/org/apache/iceberg/spark/extensions/SparkRowLevelOperationsTestBase.java @@ -125,15 +125,11 @@ public static Object[][] parameters() { 2 }, { - "testrest", + "testhive", SparkCatalog.class.getName(), ImmutableMap.of( - "type", - "rest", - CatalogProperties.URI, - restCatalog.properties().get(CatalogProperties.URI), - "cache-enabled", - "false"), + "type", "hive", + "default-namespace", "default"), FileFormat.PARQUET, true, WRITE_DISTRIBUTION_MODE_HASH, diff --git a/spark/v4.1/spark-extensions/src/test/java/org/apache/iceberg/spark/extensions/SparkRowLevelOperationsTestBase.java b/spark/v4.1/spark-extensions/src/test/java/org/apache/iceberg/spark/extensions/SparkRowLevelOperationsTestBase.java index dad8b1244e26..72988ae0ed9e 100644 --- a/spark/v4.1/spark-extensions/src/test/java/org/apache/iceberg/spark/extensions/SparkRowLevelOperationsTestBase.java +++ b/spark/v4.1/spark-extensions/src/test/java/org/apache/iceberg/spark/extensions/SparkRowLevelOperationsTestBase.java @@ -125,15 +125,11 @@ public static Object[][] parameters() { 2 }, { - "testrest", + "testhive", SparkCatalog.class.getName(), ImmutableMap.of( - "type", - "rest", - CatalogProperties.URI, - restCatalog.properties().get(CatalogProperties.URI), - "cache-enabled", - "false"), + "type", "hive", + "default-namespace", "default"), FileFormat.PARQUET, true, WRITE_DISTRIBUTION_MODE_HASH, From 5864e065daf53967cbf888267f3dd3224b8acbbb Mon Sep 17 00:00:00 2001 From: Steven Wu Date: Sat, 23 May 2026 22:13:27 -0700 Subject: [PATCH 3/5] Spark: Fix TestBase.move() to handle locations without URI schemes Paths.get(URI.create(location)) requires the URI to carry a scheme. HiveCatalog returns manifest paths with file:// schemes, so the existing move() works. RESTCatalog (as configured by RESTServerExtension in tests) returns plain local paths without a scheme, which makes Paths.get(URI) throw IllegalArgumentException: Missing scheme. Add a small toPath() helper that falls back to Paths.get(location) when URI.create(location).getScheme() is null. This unblocks testDeleteWithoutScanningTable and the equivalent MERGE-side test on spark_catalog/REST rows. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../src/test/java/org/apache/iceberg/spark/TestBase.java | 9 +++++++-- .../src/test/java/org/apache/iceberg/spark/TestBase.java | 9 +++++++-- .../src/test/java/org/apache/iceberg/spark/TestBase.java | 9 +++++++-- 3 files changed, 21 insertions(+), 6 deletions(-) diff --git a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/TestBase.java b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/TestBase.java index 5e7e1a1f6193..58437773a5ab 100644 --- a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/TestBase.java +++ b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/TestBase.java @@ -157,8 +157,8 @@ protected void withUnavailableFiles(Iterable> files, Ac } private void move(String location, String newLocation) { - Path path = Paths.get(URI.create(location)); - Path tempPath = Paths.get(URI.create(newLocation)); + Path path = toPath(location); + Path tempPath = toPath(newLocation); try { Files.move(path, tempPath); @@ -167,6 +167,11 @@ private void move(String location, String newLocation) { } } + private static Path toPath(String location) { + URI uri = URI.create(location); + return uri.getScheme() != null ? Paths.get(uri) : Paths.get(location); + } + protected void withUnavailableLocations(Iterable locations, Action action) { for (String location : locations) { move(location, location + "_temp"); diff --git a/spark/v4.0/spark/src/test/java/org/apache/iceberg/spark/TestBase.java b/spark/v4.0/spark/src/test/java/org/apache/iceberg/spark/TestBase.java index 5e7e1a1f6193..58437773a5ab 100644 --- a/spark/v4.0/spark/src/test/java/org/apache/iceberg/spark/TestBase.java +++ b/spark/v4.0/spark/src/test/java/org/apache/iceberg/spark/TestBase.java @@ -157,8 +157,8 @@ protected void withUnavailableFiles(Iterable> files, Ac } private void move(String location, String newLocation) { - Path path = Paths.get(URI.create(location)); - Path tempPath = Paths.get(URI.create(newLocation)); + Path path = toPath(location); + Path tempPath = toPath(newLocation); try { Files.move(path, tempPath); @@ -167,6 +167,11 @@ private void move(String location, String newLocation) { } } + private static Path toPath(String location) { + URI uri = URI.create(location); + return uri.getScheme() != null ? Paths.get(uri) : Paths.get(location); + } + protected void withUnavailableLocations(Iterable locations, Action action) { for (String location : locations) { move(location, location + "_temp"); diff --git a/spark/v4.1/spark/src/test/java/org/apache/iceberg/spark/TestBase.java b/spark/v4.1/spark/src/test/java/org/apache/iceberg/spark/TestBase.java index 507d7b313b42..a7639a006a15 100644 --- a/spark/v4.1/spark/src/test/java/org/apache/iceberg/spark/TestBase.java +++ b/spark/v4.1/spark/src/test/java/org/apache/iceberg/spark/TestBase.java @@ -158,8 +158,8 @@ protected void withUnavailableFiles(Iterable> files, Ac } private void move(String location, String newLocation) { - Path path = Paths.get(URI.create(location)); - Path tempPath = Paths.get(URI.create(newLocation)); + Path path = toPath(location); + Path tempPath = toPath(newLocation); try { Files.move(path, tempPath); @@ -168,6 +168,11 @@ private void move(String location, String newLocation) { } } + private static Path toPath(String location) { + URI uri = URI.create(location); + return uri.getScheme() != null ? Paths.get(uri) : Paths.get(location); + } + protected void withUnavailableLocations(Iterable locations, Action action) { for (String location : locations) { move(location, location + "_temp"); From 503d2904fd2c75c1067f61c22c902cd58840c0af Mon Sep 17 00:00:00 2001 From: Steven Wu Date: Sun, 24 May 2026 08:25:43 -0700 Subject: [PATCH 4/5] Build: Fix REST catalog test fixture warehouse path scheme Move the URI-scheme handling from TestBase.move() (Spark) to the source of the inconsistency: the REST catalog test fixture's default warehouse path. RESTCatalogServer used getAbsolutePath() which returns plain filesystem paths without a URI scheme; HiveCatalog and HadoopCatalog return file:// paths via Hadoop's Path machinery, so test fixtures that consume catalog paths (e.g. TestBase.move() which calls Paths.get(URI.create(...))) work for those catalogs but break for REST. Switch to toURI().toString() so the REST fixture matches the Hive and Hadoop convention. testDeleteWithoutScanningTable and the equivalent MERGE-side test now pass on the spark_catalog/REST row without needing scheme-handling logic at every consumer. Reverts the TestBase.move() change from the previous commit. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../java/org/apache/iceberg/rest/RESTCatalogServer.java | 2 +- .../src/test/java/org/apache/iceberg/spark/TestBase.java | 9 ++------- .../src/test/java/org/apache/iceberg/spark/TestBase.java | 9 ++------- .../src/test/java/org/apache/iceberg/spark/TestBase.java | 9 ++------- 4 files changed, 7 insertions(+), 22 deletions(-) diff --git a/open-api/src/testFixtures/java/org/apache/iceberg/rest/RESTCatalogServer.java b/open-api/src/testFixtures/java/org/apache/iceberg/rest/RESTCatalogServer.java index 2e4541b50b33..daed482c74a5 100644 --- a/open-api/src/testFixtures/java/org/apache/iceberg/rest/RESTCatalogServer.java +++ b/open-api/src/testFixtures/java/org/apache/iceberg/rest/RESTCatalogServer.java @@ -92,7 +92,7 @@ private CatalogContext initializeBackendCatalog() throws IOException { if (warehouseLocation == null) { File tmp = java.nio.file.Files.createTempDirectory("iceberg_warehouse").toFile(); tmp.deleteOnExit(); - warehouseLocation = new File(tmp, "iceberg_data").getAbsolutePath(); + warehouseLocation = new File(tmp, "iceberg_data").toURI().toString(); catalogProperties.put(CatalogProperties.WAREHOUSE_LOCATION, warehouseLocation); LOG.info("No warehouse location set. Defaulting to temp location: {}", warehouseLocation); diff --git a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/TestBase.java b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/TestBase.java index 58437773a5ab..5e7e1a1f6193 100644 --- a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/TestBase.java +++ b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/TestBase.java @@ -157,8 +157,8 @@ protected void withUnavailableFiles(Iterable> files, Ac } private void move(String location, String newLocation) { - Path path = toPath(location); - Path tempPath = toPath(newLocation); + Path path = Paths.get(URI.create(location)); + Path tempPath = Paths.get(URI.create(newLocation)); try { Files.move(path, tempPath); @@ -167,11 +167,6 @@ private void move(String location, String newLocation) { } } - private static Path toPath(String location) { - URI uri = URI.create(location); - return uri.getScheme() != null ? Paths.get(uri) : Paths.get(location); - } - protected void withUnavailableLocations(Iterable locations, Action action) { for (String location : locations) { move(location, location + "_temp"); diff --git a/spark/v4.0/spark/src/test/java/org/apache/iceberg/spark/TestBase.java b/spark/v4.0/spark/src/test/java/org/apache/iceberg/spark/TestBase.java index 58437773a5ab..5e7e1a1f6193 100644 --- a/spark/v4.0/spark/src/test/java/org/apache/iceberg/spark/TestBase.java +++ b/spark/v4.0/spark/src/test/java/org/apache/iceberg/spark/TestBase.java @@ -157,8 +157,8 @@ protected void withUnavailableFiles(Iterable> files, Ac } private void move(String location, String newLocation) { - Path path = toPath(location); - Path tempPath = toPath(newLocation); + Path path = Paths.get(URI.create(location)); + Path tempPath = Paths.get(URI.create(newLocation)); try { Files.move(path, tempPath); @@ -167,11 +167,6 @@ private void move(String location, String newLocation) { } } - private static Path toPath(String location) { - URI uri = URI.create(location); - return uri.getScheme() != null ? Paths.get(uri) : Paths.get(location); - } - protected void withUnavailableLocations(Iterable locations, Action action) { for (String location : locations) { move(location, location + "_temp"); diff --git a/spark/v4.1/spark/src/test/java/org/apache/iceberg/spark/TestBase.java b/spark/v4.1/spark/src/test/java/org/apache/iceberg/spark/TestBase.java index a7639a006a15..507d7b313b42 100644 --- a/spark/v4.1/spark/src/test/java/org/apache/iceberg/spark/TestBase.java +++ b/spark/v4.1/spark/src/test/java/org/apache/iceberg/spark/TestBase.java @@ -158,8 +158,8 @@ protected void withUnavailableFiles(Iterable> files, Ac } private void move(String location, String newLocation) { - Path path = toPath(location); - Path tempPath = toPath(newLocation); + Path path = Paths.get(URI.create(location)); + Path tempPath = Paths.get(URI.create(newLocation)); try { Files.move(path, tempPath); @@ -168,11 +168,6 @@ private void move(String location, String newLocation) { } } - private static Path toPath(String location) { - URI uri = URI.create(location); - return uri.getScheme() != null ? Paths.get(uri) : Paths.get(location); - } - protected void withUnavailableLocations(Iterable locations, Action action) { for (String location : locations) { move(location, location + "_temp"); From 6f4ed58f25eb51e8e185a433ba828f7f64055747 Mon Sep 17 00:00:00 2001 From: Steven Wu Date: Sun, 24 May 2026 10:20:12 -0700 Subject: [PATCH 5/5] Spark: Drop REST-only path workaround in TestRewriteTablePathProcedure The test had a special-case branch that called file.getAbsolutePath() (no scheme) instead of file.toURI().toString() when catalogName was testrest, because the old REST fixture returned warehouse paths without a scheme and the test needed the deletes.parquet path to match. With RESTCatalogServer now using toURI().toString() for the default warehouse, table.location() returns a scheme-prefixed path on REST too, so the workaround is no longer needed - and is now incorrect, since RewriteTablePathUtil.newPositionDeleteEntry validates that delete file paths start with the table location prefix (which now includes file://). Removing the special case lets the simple toURI().toString() path apply uniformly across all catalogs. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../spark/extensions/TestRewriteTablePathProcedure.java | 7 ------- .../spark/extensions/TestRewriteTablePathProcedure.java | 7 ------- .../spark/extensions/TestRewriteTablePathProcedure.java | 7 ------- 3 files changed, 21 deletions(-) diff --git a/spark/v3.5/spark-extensions/src/test/java/org/apache/iceberg/spark/extensions/TestRewriteTablePathProcedure.java b/spark/v3.5/spark-extensions/src/test/java/org/apache/iceberg/spark/extensions/TestRewriteTablePathProcedure.java index ceb3077c5670..996fb2636ada 100644 --- a/spark/v3.5/spark-extensions/src/test/java/org/apache/iceberg/spark/extensions/TestRewriteTablePathProcedure.java +++ b/spark/v3.5/spark-extensions/src/test/java/org/apache/iceberg/spark/extensions/TestRewriteTablePathProcedure.java @@ -35,7 +35,6 @@ import org.apache.iceberg.TableUtil; import org.apache.iceberg.data.FileHelpers; import org.apache.iceberg.relocated.com.google.common.collect.Lists; -import org.apache.iceberg.spark.SparkCatalogConfig; import org.apache.iceberg.util.Pair; import org.apache.spark.sql.AnalysisException; import org.junit.jupiter.api.AfterEach; @@ -222,12 +221,6 @@ public void testRewriteTablePathWithManifestAndDeleteCounts() throws IOException File file = new File(removePrefix(table.location()) + "/data/deletes.parquet"); String filePath = file.toURI().toString(); - if (SparkCatalogConfig.REST.catalogName().equals(catalogName)) { - // We applied this special handling because the base path for - // matching the RESTCATALOG's Hive BaseLocation is represented - // in the form of an AbsolutePath. - filePath = file.getAbsolutePath().toString(); - } DeleteFile positionDeletes = FileHelpers.writeDeleteFile(table, table.io().newOutputFile(filePath), rowsToDelete) diff --git a/spark/v4.0/spark-extensions/src/test/java/org/apache/iceberg/spark/extensions/TestRewriteTablePathProcedure.java b/spark/v4.0/spark-extensions/src/test/java/org/apache/iceberg/spark/extensions/TestRewriteTablePathProcedure.java index 78f6b80ac948..cab33c8b005c 100644 --- a/spark/v4.0/spark-extensions/src/test/java/org/apache/iceberg/spark/extensions/TestRewriteTablePathProcedure.java +++ b/spark/v4.0/spark-extensions/src/test/java/org/apache/iceberg/spark/extensions/TestRewriteTablePathProcedure.java @@ -35,7 +35,6 @@ import org.apache.iceberg.TableUtil; import org.apache.iceberg.data.FileHelpers; import org.apache.iceberg.relocated.com.google.common.collect.Lists; -import org.apache.iceberg.spark.SparkCatalogConfig; import org.apache.iceberg.util.Pair; import org.apache.spark.sql.AnalysisException; import org.junit.jupiter.api.AfterEach; @@ -224,12 +223,6 @@ public void testRewriteTablePathWithManifestAndDeleteCounts() throws IOException File file = new File(removePrefix(table.location()) + "/data/deletes.parquet"); String filePath = file.toURI().toString(); - if (SparkCatalogConfig.REST.catalogName().equals(catalogName)) { - // We applied this special handling because the base path for - // matching the RESTCATALOG's Hive BaseLocation is represented - // in the form of an AbsolutePath. - filePath = file.getAbsolutePath().toString(); - } DeleteFile positionDeletes = FileHelpers.writeDeleteFile(table, table.io().newOutputFile(filePath), rowsToDelete) diff --git a/spark/v4.1/spark-extensions/src/test/java/org/apache/iceberg/spark/extensions/TestRewriteTablePathProcedure.java b/spark/v4.1/spark-extensions/src/test/java/org/apache/iceberg/spark/extensions/TestRewriteTablePathProcedure.java index 78f6b80ac948..cab33c8b005c 100644 --- a/spark/v4.1/spark-extensions/src/test/java/org/apache/iceberg/spark/extensions/TestRewriteTablePathProcedure.java +++ b/spark/v4.1/spark-extensions/src/test/java/org/apache/iceberg/spark/extensions/TestRewriteTablePathProcedure.java @@ -35,7 +35,6 @@ import org.apache.iceberg.TableUtil; import org.apache.iceberg.data.FileHelpers; import org.apache.iceberg.relocated.com.google.common.collect.Lists; -import org.apache.iceberg.spark.SparkCatalogConfig; import org.apache.iceberg.util.Pair; import org.apache.spark.sql.AnalysisException; import org.junit.jupiter.api.AfterEach; @@ -224,12 +223,6 @@ public void testRewriteTablePathWithManifestAndDeleteCounts() throws IOException File file = new File(removePrefix(table.location()) + "/data/deletes.parquet"); String filePath = file.toURI().toString(); - if (SparkCatalogConfig.REST.catalogName().equals(catalogName)) { - // We applied this special handling because the base path for - // matching the RESTCATALOG's Hive BaseLocation is represented - // in the form of an AbsolutePath. - filePath = file.getAbsolutePath().toString(); - } DeleteFile positionDeletes = FileHelpers.writeDeleteFile(table, table.io().newOutputFile(filePath), rowsToDelete)