From 456e6dc2c38130628f73798129799aa1de4d5b92 Mon Sep 17 00:00:00 2001 From: Jing chen He Date: Tue, 12 May 2026 11:45:21 -0700 Subject: [PATCH] fix(java): expose updatedFragmentOffsets on Update operation for RewriteColumns commits --- java/lance-jni/src/transaction.rs | 72 +++++++++++++++++-- .../main/java/org/lance/operation/Update.java | 62 +++++++++++++++- .../java/org/lance/operation/UpdateTest.java | 58 +++++++++++++++ 3 files changed, 185 insertions(+), 7 deletions(-) diff --git a/java/lance-jni/src/transaction.rs b/java/lance-jni/src/transaction.rs index 26bc0daf955..ffe179ea329 100644 --- a/java/lance-jni/src/transaction.rs +++ b/java/lance-jni/src/transaction.rs @@ -19,7 +19,7 @@ use jni::sys::{jboolean, jint}; use lance::dataset::CommitBuilder; use lance::dataset::transaction::{ DataReplacementGroup, Operation, RewriteGroup, RewrittenIndex, Transaction, TransactionBuilder, - UpdateMap, UpdateMapEntry, UpdateMode, + UpdateMap, UpdateMapEntry, UpdateMode, UpdatedFragmentOffsets, }; use lance::io::ObjectStoreParams; use lance::io::commit::namespace_manifest::LanceNamespaceExternalManifestStore; @@ -433,7 +433,7 @@ fn convert_to_java_operation_inner<'local>( fields_for_preserving_frag_bitmap, update_mode, inserted_rows_filter: _, - updated_fragment_offsets: _, + updated_fragment_offsets, } => { let removed_ids: Vec> = removed_fragment_ids .iter() @@ -457,9 +457,38 @@ fn convert_to_java_operation_inner<'local>( &[JValue::Object(&update_mode)], )? .l()?; + // Serialize updated_fragment_offsets to Java Map. + // Empty HashMap when None so the Java constructor always receives a non-null map. + // A per-iteration local frame (capacity 4: Long + long[] + put return + slack) + // bounds local-ref growth for large offset maps. + let java_offsets_map = { + let java_map = env.new_object("java/util/HashMap", "()V", &[])?; + if let Some(UpdatedFragmentOffsets(ref map)) = updated_fragment_offsets { + for (frag_id, bitmap) in map { + let offsets: Vec = bitmap.iter().map(|x| x as i64).collect(); + env.with_local_frame(4, |env| { + let java_key = env.new_object( + "java/lang/Long", + "(J)V", + &[JValue::Long(*frag_id as i64)], + )?; + let java_arr = env.new_long_array(offsets.len() as i32)?; + env.set_long_array_region(&java_arr, 0, &offsets)?; + env.call_method( + &java_map, + "put", + "(Ljava/lang/Object;Ljava/lang/Object;)Ljava/lang/Object;", + &[JValue::Object(&java_key), JValue::Object(&*java_arr)], + )?; + Ok::(JObject::null()) + })?; + } + } + java_map + }; Ok(env.new_object( "org/lance/operation/Update", - "(Ljava/util/List;Ljava/util/List;Ljava/util/List;[J[JLjava/util/Optional;)V", + "(Ljava/util/List;Ljava/util/List;Ljava/util/List;[J[JLjava/util/Optional;Ljava/util/Map;)V", &[ JValue::Object(&removed_fragment_ids_obj), JValue::Object(&updated_fragments_obj), @@ -467,6 +496,7 @@ fn convert_to_java_operation_inner<'local>( JValueGen::Object(&fields_modified), JValueGen::Object(&fields_for_preserving_frag_bitmap), JValue::Object(&update_mode_optional), + JValue::Object(&java_offsets_map), ], )?) } @@ -1214,6 +1244,40 @@ fn convert_to_rust_operation( update_mode.extract_object(env) })?; + let updated_fragment_offsets = { + let offsets_obj = env + .call_method( + java_operation, + "updatedFragmentOffsets", + "()Ljava/util/Map;", + &[], + )? + .l()?; + if offsets_obj.is_null() { + None + } else { + let jmap = JMap::from_env(env, &offsets_obj)?; + let mut iter = jmap.iter(env)?; + let mut offsets: HashMap = HashMap::new(); + env.with_local_frame(32, |env| { + while let Some((key, value)) = iter.next(env)? { + let frag_id = + env.call_method(&key, "longValue", "()J", &[])?.j()? as u64; + let row_offsets: Vec = + JLongArray::from(value).extract_object(env)?; + let bitmap: RoaringBitmap = row_offsets.into_iter().collect(); + offsets.insert(frag_id, bitmap); + } + Ok::<(), Error>(()) + })?; + if offsets.is_empty() { + None + } else { + Some(UpdatedFragmentOffsets(offsets)) + } + } + }; + Operation::Update { removed_fragment_ids, updated_fragments, @@ -1223,7 +1287,7 @@ fn convert_to_rust_operation( fields_for_preserving_frag_bitmap, update_mode, inserted_rows_filter: None, - updated_fragment_offsets: None, + updated_fragment_offsets, } } "DataReplacement" => { diff --git a/java/src/main/java/org/lance/operation/Update.java b/java/src/main/java/org/lance/operation/Update.java index f886942b4b9..7abcac58385 100644 --- a/java/src/main/java/org/lance/operation/Update.java +++ b/java/src/main/java/org/lance/operation/Update.java @@ -20,6 +20,7 @@ import java.util.Arrays; import java.util.Collections; import java.util.List; +import java.util.Map; import java.util.Objects; import java.util.Optional; @@ -31,19 +32,29 @@ public class Update implements Operation { private final long[] fieldsForPreservingFragBitmap; private final Optional updateMode; + /** + * Per-fragment physical row offsets that matched an update_columns hash join (RewriteColumns). + * Keys are fragment ids; values are the local physical row offsets (0-based) within the fragment + * whose columns were rewritten. Empty map means the caller did not supply offsets and the partial + * last_updated refresh in build_manifest will not activate. + */ + private final Map updatedFragmentOffsets; + private Update( List removedFragmentIds, List updatedFragments, List newFragments, long[] fieldsModified, long[] fieldsForPreservingFragBitmap, - Optional updateMode) { + Optional updateMode, + Map updatedFragmentOffsets) { this.removedFragmentIds = removedFragmentIds; this.updatedFragments = updatedFragments; this.newFragments = newFragments; this.fieldsModified = fieldsModified; this.fieldsForPreservingFragBitmap = fieldsForPreservingFragBitmap; this.updateMode = updateMode; + this.updatedFragmentOffsets = updatedFragmentOffsets; } public static Builder builder() { @@ -74,6 +85,10 @@ public Optional updateMode() { return updateMode; } + public Map updatedFragmentOffsets() { + return updatedFragmentOffsets; + } + @Override public String name() { return "Update"; @@ -87,6 +102,7 @@ public String toString() { .add("fieldsModified", fieldsModified) .add("fieldsForPreservingFragBitmap", fieldsForPreservingFragBitmap) .add("updateMode", updateMode) + .add("updatedFragmentOffsets", updatedFragmentOffsets) .toString(); } @@ -100,7 +116,32 @@ public boolean equals(Object o) { && Objects.equals(newFragments, that.newFragments) && Arrays.equals(fieldsModified, that.fieldsModified) && Arrays.equals(fieldsForPreservingFragBitmap, that.fieldsForPreservingFragBitmap) - && Objects.equals(updateMode, that.updateMode); + && Objects.equals(updateMode, that.updateMode) + && offsetMapsEqual(updatedFragmentOffsets, that.updatedFragmentOffsets); + } + + /** Deep-equality for {@code Map}: keys by value, arrays by content. */ + private static boolean offsetMapsEqual(Map a, Map b) { + if (a == b) return true; + if (a.size() != b.size()) return false; + for (Map.Entry entry : a.entrySet()) { + if (!Arrays.equals(entry.getValue(), b.get(entry.getKey()))) return false; + } + return true; + } + + @Override + public int hashCode() { + int h = Objects.hash(removedFragmentIds, updatedFragments, newFragments, updateMode); + h = 31 * h + Arrays.hashCode(fieldsModified); + h = 31 * h + Arrays.hashCode(fieldsForPreservingFragBitmap); + // Sum entry hashes (XOR key ^ array-content hash) so result is insertion-order-independent. + int mapHash = 0; + for (Map.Entry entry : updatedFragmentOffsets.entrySet()) { + mapHash += Long.hashCode(entry.getKey()) ^ Arrays.hashCode(entry.getValue()); + } + h = 31 * h + mapHash; + return h; } public enum UpdateMode { @@ -115,6 +156,7 @@ public static class Builder { private long[] fieldsModified = new long[0]; private long[] fieldsForPreservingFragBitmap = new long[0]; private Optional updateMode = Optional.empty(); + private Map updatedFragmentOffsets = Collections.emptyMap(); private Builder() {} @@ -148,6 +190,19 @@ public Builder updateMode(Optional updateMode) { return this; } + /** + * Set the per-fragment matched row offsets for a RewriteColumns commit. + * + *

Keys are fragment ids; values are the local physical row offsets (0-based) within the + * fragment that matched the update_columns hash join. When non-empty and update mode is + * RewriteColumns with stable row IDs enabled, build_manifest will call the partial last_updated + * refresh for those offsets only. + */ + public Builder updatedFragmentOffsets(Map updatedFragmentOffsets) { + this.updatedFragmentOffsets = updatedFragmentOffsets; + return this; + } + public Update build() { return new Update( removedFragmentIds, @@ -155,7 +210,8 @@ public Update build() { newFragments, fieldsModified, fieldsForPreservingFragBitmap, - updateMode); + updateMode, + updatedFragmentOffsets); } } } diff --git a/java/src/test/java/org/lance/operation/UpdateTest.java b/java/src/test/java/org/lance/operation/UpdateTest.java index bb39a5f4d12..66e2e284307 100644 --- a/java/src/test/java/org/lance/operation/UpdateTest.java +++ b/java/src/test/java/org/lance/operation/UpdateTest.java @@ -36,10 +36,14 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; +import java.util.HashMap; import java.util.List; +import java.util.Map; import java.util.Optional; +import static org.junit.jupiter.api.Assertions.assertArrayEquals; import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertInstanceOf; import static org.junit.jupiter.api.Assertions.assertThrows; public class UpdateTest extends OperationTestBase { @@ -104,6 +108,60 @@ void testUpdate(@TempDir Path tempDir) throws Exception { } } + @Test + void testUpdatedFragmentOffsetsRoundTrip(@TempDir Path tempDir) throws Exception { + String datasetPath = tempDir.resolve("testUpdatedFragmentOffsetsRoundTrip").toString(); + try (RootAllocator allocator = new RootAllocator(Long.MAX_VALUE)) { + TestUtils.SimpleTestDataset testDataset = + new TestUtils.SimpleTestDataset(allocator, datasetPath); + dataset = testDataset.createEmptyDataset(); + + // Append an initial fragment so we have a real fragment id. + FragmentMetadata fragmentMeta = testDataset.createNewFragment(10); + try (Transaction appendTxn = + new Transaction.Builder() + .readVersion(dataset.version()) + .operation( + Append.builder().fragments(Collections.singletonList(fragmentMeta)).build()) + .build()) { + new CommitBuilder(dataset).execute(appendTxn).close(); + } + + dataset = Dataset.open(datasetPath, allocator); + long fragmentId = dataset.getFragments().get(0).getId(); + FragmentMetadata newFragment = testDataset.createNewFragment(10); + + // Build Update with non-empty updatedFragmentOffsets. Values must fit in u32 + // (RoaringBitmap) and are already sorted so the round-trip order is deterministic. + Map offsets = new HashMap<>(); + offsets.put(fragmentId, new long[] {1L, 3L, 5L}); + + try (Transaction updateTxn = + new Transaction.Builder() + .readVersion(dataset.version()) + .operation( + Update.builder() + .removedFragmentIds(Collections.singletonList(fragmentId)) + .newFragments(Collections.singletonList(newFragment)) + .updateMode(Optional.of(UpdateMode.RewriteRows)) + .updatedFragmentOffsets(offsets) + .build()) + .build()) { + try (Dataset committed = new CommitBuilder(dataset).execute(updateTxn)) { + // Read the committed transaction back (exercises the IntoJava JNI path). + try (Transaction readTx = committed.readTransaction().orElseThrow()) { + assertInstanceOf(Update.class, readTx.operation()); + Update readOp = (Update) readTx.operation(); + + Map readOffsets = readOp.updatedFragmentOffsets(); + assertEquals(1, readOffsets.size()); + assertArrayEquals(new long[] {1L, 3L, 5L}, readOffsets.get(fragmentId)); + } + } + } + } + } + @Test void testUpdateColumns(@TempDir Path tempDir) throws Exception { String datasetPath = tempDir.resolve("testUpdateColumns").toString();