Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
72 changes: 68 additions & 4 deletions java/lance-jni/src/transaction.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ use jni::sys::{jboolean, jint};
use lance::dataset::CommitBuilder;
use lance::dataset::transaction::{
DataReplacementGroup, Operation, RewriteGroup, RewrittenIndex, Transaction, TransactionBuilder,
UpdateMap, UpdateMapEntry, UpdateMode,
UpdateMap, UpdateMapEntry, UpdateMode, UpdatedFragmentOffsets,
};
use lance::io::ObjectStoreParams;
use lance::io::commit::namespace_manifest::LanceNamespaceExternalManifestStore;
Expand Down Expand Up @@ -433,7 +433,7 @@ fn convert_to_java_operation_inner<'local>(
fields_for_preserving_frag_bitmap,
update_mode,
inserted_rows_filter: _,
updated_fragment_offsets: _,
updated_fragment_offsets,
} => {
let removed_ids: Vec<JLance<i64>> = removed_fragment_ids
.iter()
Expand All @@ -457,16 +457,46 @@ fn convert_to_java_operation_inner<'local>(
&[JValue::Object(&update_mode)],
)?
.l()?;
// Serialize updated_fragment_offsets to Java Map<Long, long[]>.
// Empty HashMap when None so the Java constructor always receives a non-null map.
// A per-iteration local frame (capacity 4: Long + long[] + put return + slack)
// bounds local-ref growth for large offset maps.
let java_offsets_map = {
let java_map = env.new_object("java/util/HashMap", "()V", &[])?;
if let Some(UpdatedFragmentOffsets(ref map)) = updated_fragment_offsets {
for (frag_id, bitmap) in map {
let offsets: Vec<i64> = bitmap.iter().map(|x| x as i64).collect();
env.with_local_frame(4, |env| {
let java_key = env.new_object(
"java/lang/Long",
"(J)V",
&[JValue::Long(*frag_id as i64)],
)?;
let java_arr = env.new_long_array(offsets.len() as i32)?;
env.set_long_array_region(&java_arr, 0, &offsets)?;
env.call_method(
&java_map,
"put",
"(Ljava/lang/Object;Ljava/lang/Object;)Ljava/lang/Object;",
&[JValue::Object(&java_key), JValue::Object(&*java_arr)],
)?;
Ok::<JObject, Error>(JObject::null())
})?;
}
}
java_map
};
Ok(env.new_object(
"org/lance/operation/Update",
"(Ljava/util/List;Ljava/util/List;Ljava/util/List;[J[JLjava/util/Optional;)V",
"(Ljava/util/List;Ljava/util/List;Ljava/util/List;[J[JLjava/util/Optional;Ljava/util/Map;)V",
&[
JValue::Object(&removed_fragment_ids_obj),
JValue::Object(&updated_fragments_obj),
JValue::Object(&new_fragments_obj),
JValueGen::Object(&fields_modified),
JValueGen::Object(&fields_for_preserving_frag_bitmap),
JValue::Object(&update_mode_optional),
JValue::Object(&java_offsets_map),
],
)?)
}
Expand Down Expand Up @@ -1214,6 +1244,40 @@ fn convert_to_rust_operation(
update_mode.extract_object(env)
})?;

let updated_fragment_offsets = {
let offsets_obj = env
.call_method(
java_operation,
"updatedFragmentOffsets",
"()Ljava/util/Map;",
&[],
)?
.l()?;
if offsets_obj.is_null() {
None
} else {
let jmap = JMap::from_env(env, &offsets_obj)?;
let mut iter = jmap.iter(env)?;
let mut offsets: HashMap<u64, RoaringBitmap> = HashMap::new();
env.with_local_frame(32, |env| {
while let Some((key, value)) = iter.next(env)? {
let frag_id =
env.call_method(&key, "longValue", "()J", &[])?.j()? as u64;
let row_offsets: Vec<u32> =
JLongArray::from(value).extract_object(env)?;
let bitmap: RoaringBitmap = row_offsets.into_iter().collect();
offsets.insert(frag_id, bitmap);
}
Ok::<(), Error>(())
})?;
if offsets.is_empty() {
None
} else {
Some(UpdatedFragmentOffsets(offsets))
}
}
};

Operation::Update {
removed_fragment_ids,
updated_fragments,
Expand All @@ -1223,7 +1287,7 @@ fn convert_to_rust_operation(
fields_for_preserving_frag_bitmap,
update_mode,
inserted_rows_filter: None,
updated_fragment_offsets: None,
updated_fragment_offsets,
}
}
"DataReplacement" => {
Expand Down
62 changes: 59 additions & 3 deletions java/src/main/java/org/lance/operation/Update.java
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.Optional;

Expand All @@ -31,19 +32,29 @@ public class Update implements Operation {
private final long[] fieldsForPreservingFragBitmap;
private final Optional<UpdateMode> updateMode;

/**
* Per-fragment physical row offsets that matched an update_columns hash join (RewriteColumns).
* Keys are fragment ids; values are the local physical row offsets (0-based) within the fragment
* whose columns were rewritten. Empty map means the caller did not supply offsets and the partial
* last_updated refresh in build_manifest will not activate.
*/
private final Map<Long, long[]> updatedFragmentOffsets;

private Update(
List<Long> removedFragmentIds,
List<FragmentMetadata> updatedFragments,
List<FragmentMetadata> newFragments,
long[] fieldsModified,
long[] fieldsForPreservingFragBitmap,
Optional<UpdateMode> updateMode) {
Optional<UpdateMode> updateMode,
Map<Long, long[]> updatedFragmentOffsets) {
this.removedFragmentIds = removedFragmentIds;
this.updatedFragments = updatedFragments;
this.newFragments = newFragments;
this.fieldsModified = fieldsModified;
this.fieldsForPreservingFragBitmap = fieldsForPreservingFragBitmap;
this.updateMode = updateMode;
this.updatedFragmentOffsets = updatedFragmentOffsets;
}

public static Builder builder() {
Expand Down Expand Up @@ -74,6 +85,10 @@ public Optional<UpdateMode> updateMode() {
return updateMode;
}

public Map<Long, long[]> updatedFragmentOffsets() {
return updatedFragmentOffsets;
}

@Override
public String name() {
return "Update";
Expand All @@ -87,6 +102,7 @@ public String toString() {
.add("fieldsModified", fieldsModified)
.add("fieldsForPreservingFragBitmap", fieldsForPreservingFragBitmap)
.add("updateMode", updateMode)
.add("updatedFragmentOffsets", updatedFragmentOffsets)
.toString();
}

Expand All @@ -100,7 +116,32 @@ public boolean equals(Object o) {
&& Objects.equals(newFragments, that.newFragments)
&& Arrays.equals(fieldsModified, that.fieldsModified)
&& Arrays.equals(fieldsForPreservingFragBitmap, that.fieldsForPreservingFragBitmap)
&& Objects.equals(updateMode, that.updateMode);
&& Objects.equals(updateMode, that.updateMode)
&& offsetMapsEqual(updatedFragmentOffsets, that.updatedFragmentOffsets);
}

/** Deep-equality for {@code Map<Long, long[]>}: keys by value, arrays by content. */
private static boolean offsetMapsEqual(Map<Long, long[]> a, Map<Long, long[]> b) {
if (a == b) return true;
if (a.size() != b.size()) return false;
for (Map.Entry<Long, long[]> entry : a.entrySet()) {
if (!Arrays.equals(entry.getValue(), b.get(entry.getKey()))) return false;
}
return true;
}

@Override
public int hashCode() {
int h = Objects.hash(removedFragmentIds, updatedFragments, newFragments, updateMode);
h = 31 * h + Arrays.hashCode(fieldsModified);
h = 31 * h + Arrays.hashCode(fieldsForPreservingFragBitmap);
// Sum entry hashes (XOR key ^ array-content hash) so result is insertion-order-independent.
int mapHash = 0;
for (Map.Entry<Long, long[]> entry : updatedFragmentOffsets.entrySet()) {
mapHash += Long.hashCode(entry.getKey()) ^ Arrays.hashCode(entry.getValue());
}
h = 31 * h + mapHash;
return h;
}

public enum UpdateMode {
Expand All @@ -115,6 +156,7 @@ public static class Builder {
private long[] fieldsModified = new long[0];
private long[] fieldsForPreservingFragBitmap = new long[0];
private Optional<UpdateMode> updateMode = Optional.empty();
private Map<Long, long[]> updatedFragmentOffsets = Collections.emptyMap();

private Builder() {}

Expand Down Expand Up @@ -148,14 +190,28 @@ public Builder updateMode(Optional<UpdateMode> updateMode) {
return this;
}

/**
* Set the per-fragment matched row offsets for a RewriteColumns commit.
*
* <p>Keys are fragment ids; values are the local physical row offsets (0-based) within the
* fragment that matched the update_columns hash join. When non-empty and update mode is
* RewriteColumns with stable row IDs enabled, build_manifest will call the partial last_updated
* refresh for those offsets only.
*/
public Builder updatedFragmentOffsets(Map<Long, long[]> updatedFragmentOffsets) {
this.updatedFragmentOffsets = updatedFragmentOffsets;
return this;
}

public Update build() {
return new Update(
removedFragmentIds,
updatedFragments,
newFragments,
fieldsModified,
fieldsForPreservingFragBitmap,
updateMode);
updateMode,
updatedFragmentOffsets);
}
}
}
58 changes: 58 additions & 0 deletions java/src/test/java/org/lance/operation/UpdateTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -36,10 +36,14 @@
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Optional;

import static org.junit.jupiter.api.Assertions.assertArrayEquals;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertInstanceOf;
import static org.junit.jupiter.api.Assertions.assertThrows;

public class UpdateTest extends OperationTestBase {
Expand Down Expand Up @@ -104,6 +108,60 @@ void testUpdate(@TempDir Path tempDir) throws Exception {
}
}

@Test
void testUpdatedFragmentOffsetsRoundTrip(@TempDir Path tempDir) throws Exception {
String datasetPath = tempDir.resolve("testUpdatedFragmentOffsetsRoundTrip").toString();
try (RootAllocator allocator = new RootAllocator(Long.MAX_VALUE)) {
TestUtils.SimpleTestDataset testDataset =
new TestUtils.SimpleTestDataset(allocator, datasetPath);
dataset = testDataset.createEmptyDataset();

// Append an initial fragment so we have a real fragment id.
FragmentMetadata fragmentMeta = testDataset.createNewFragment(10);
try (Transaction appendTxn =
new Transaction.Builder()
.readVersion(dataset.version())
.operation(
Append.builder().fragments(Collections.singletonList(fragmentMeta)).build())
.build()) {
new CommitBuilder(dataset).execute(appendTxn).close();
}

dataset = Dataset.open(datasetPath, allocator);
long fragmentId = dataset.getFragments().get(0).getId();
FragmentMetadata newFragment = testDataset.createNewFragment(10);

// Build Update with non-empty updatedFragmentOffsets. Values must fit in u32
// (RoaringBitmap) and are already sorted so the round-trip order is deterministic.
Map<Long, long[]> offsets = new HashMap<>();
offsets.put(fragmentId, new long[] {1L, 3L, 5L});

try (Transaction updateTxn =
new Transaction.Builder()
.readVersion(dataset.version())
.operation(
Update.builder()
.removedFragmentIds(Collections.singletonList(fragmentId))
.newFragments(Collections.singletonList(newFragment))
.updateMode(Optional.of(UpdateMode.RewriteRows))
.updatedFragmentOffsets(offsets)
.build())
.build()) {
try (Dataset committed = new CommitBuilder(dataset).execute(updateTxn)) {
// Read the committed transaction back (exercises the IntoJava JNI path).
try (Transaction readTx = committed.readTransaction().orElseThrow()) {
assertInstanceOf(Update.class, readTx.operation());
Update readOp = (Update) readTx.operation();

Map<Long, long[]> readOffsets = readOp.updatedFragmentOffsets();
assertEquals(1, readOffsets.size());
assertArrayEquals(new long[] {1L, 3L, 5L}, readOffsets.get(fragmentId));
}
}
}
}
}

@Test
void testUpdateColumns(@TempDir Path tempDir) throws Exception {
String datasetPath = tempDir.resolve("testUpdateColumns").toString();
Expand Down
Loading