Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 29 additions & 9 deletions hudi-io/src/main/java/org/apache/hudi/io/hfile/HFileDataBlock.java
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
package org.apache.hudi.io.hfile;

import org.apache.hudi.common.util.Option;
import org.apache.hudi.io.util.IOUtils;

import java.io.ByteArrayOutputStream;
import java.io.DataOutputStream;
Expand All @@ -30,6 +31,7 @@

import static org.apache.hudi.io.hfile.DataSize.SIZEOF_BYTE;
import static org.apache.hudi.io.hfile.DataSize.SIZEOF_INT16;
import static org.apache.hudi.io.hfile.DataSize.SIZEOF_INT32;
import static org.apache.hudi.io.hfile.DataSize.SIZEOF_INT64;
import static org.apache.hudi.io.hfile.HFileReader.SEEK_TO_BEFORE_BLOCK_FIRST_KEY;
import static org.apache.hudi.io.hfile.HFileReader.SEEK_TO_FOUND;
Expand Down Expand Up @@ -104,39 +106,57 @@ static HFileDataBlock createDataBlockToWrite(HFileContext context,
int seekTo(HFileCursor cursor, Key key, int blockStartOffsetInFile) {
int relativeOffset = cursor.getOffset() - blockStartOffsetInFile;
int lastRelativeOffset = relativeOffset;
// The key-value cached at the starting position, if any. It is only consulted to re-cache it
// in the cursor when the lookup lands "in range" on the very first comparison; entries scanned
// past are compared directly against the backing buffer below (no per-entry KeyValue/Key
// allocation), so this is emptied after the first iteration and the cursor falls back to a
// deferred read.
Option<KeyValue> lastKeyValue = cursor.getKeyValue();
// The lookup key content is fixed across the scan; hoist it out of the loop. Note the lookup
// key may be a UTF8StringKey, so use the polymorphic content accessors (no 2-byte prefix).
byte[] lookupBytes = key.getBytes();
int lookupContentOffset = key.getContentOffset();
int lookupContentLength = key.getContentLength();

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can we add some UTs to validate this.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done 82abf7d. Added a focused test in TestHFileReader that writes a small-block-size HFile (several entries per data block) and validates seekTo across SEEK_TO_FOUND, SEEK_TO_IN_RANGE, SEEK_TO_BEFORE_FILE_FIRST_KEY, and SEEK_TO_EOF; the FOUND/IN_RANGE cases land mid-block so they exercise the new buffer-direct key comparison and the deferred cursor read.

while (relativeOffset < uncompressedContentEndRelativeOffset) {
// Full length is not known yet until parsing
KeyValue kv = readKeyValue(relativeOffset);
int comp = kv.getKey().compareTo(key);
// Compare the entry key against the lookup key directly on the buffer, without materializing
// a KeyValue/Key for every scanned entry. Layout at `relativeOffset`: [int keyLength]
// [int valueLength][short keyContentLength][key content]...; see KeyValue and Key.
int keyContentLength = IOUtils.readShort(byteBuff, relativeOffset + KEY_OFFSET);
int keyContentOffset = relativeOffset + KEY_OFFSET + KEY_LENGTH_LENGTH;
int comp = IOUtils.compareTo(
byteBuff, keyContentOffset, keyContentLength,
lookupBytes, lookupContentOffset, lookupContentLength);
if (comp == 0) {
// The lookup key equals the key `relativeOffset` points to; the key is found.
// Set the cursor to the current offset that points to the exact match
cursor.set(relativeOffset + blockStartOffsetInFile, kv);
// Materialize the KeyValue once and set the cursor to the exact match.
cursor.set(relativeOffset + blockStartOffsetInFile, readKeyValue(relativeOffset));
return SEEK_TO_FOUND;
} else if (comp > 0) {
// There is no matched key (otherwise, the method should already stop there and return 0)
// and the key `relativeOffset` points to is already greater than the lookup key.
// So set the cursor to the previous offset, pointing the greatest key in the file that is
// less than the lookup key.
if (lastKeyValue.isPresent()) {
// If the key-value pair is already, cache it
// The previous key-value was already cached (first iteration); reuse it.
cursor.set(lastRelativeOffset + blockStartOffsetInFile, lastKeyValue.get());
} else {
// Otherwise, defer the read till it's needed
// Otherwise, defer the read till it's needed; getKeyValue() materializes it lazily.
cursor.setOffset(lastRelativeOffset + blockStartOffsetInFile);
}
// If the lookup key is lexicographically smaller than the first key pointed to by
// the cursor, SEEK_TO_BEFORE_BLOCK_FIRST_KEY should be returned, so the caller
// know that the cursor is ahead of the lookup key in this case.
return isAtFirstKey(relativeOffset) ? SEEK_TO_BEFORE_BLOCK_FIRST_KEY : SEEK_TO_IN_RANGE;
}
int entryKeyLength = IOUtils.readInt(byteBuff, relativeOffset);
int entryValueLength = IOUtils.readInt(byteBuff, relativeOffset + SIZEOF_INT32);
long increment =
(long) KEY_OFFSET + (long) kv.getKeyLength() + (long) kv.getValueLength()
(long) KEY_OFFSET + (long) entryKeyLength + (long) entryValueLength
+ ZERO_TS_VERSION_BYTE_LENGTH;
lastRelativeOffset = relativeOffset;
relativeOffset += increment;
lastKeyValue = Option.of(kv);
// Past entries are not materialized; clear the cache so the "in range" branch above defers.
lastKeyValue = Option.empty();
}
// We reach the end of the block. Set the cursor to the offset of last key.
// In this case, the lookup key is greater than the last key.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,8 @@
import org.junit.jupiter.params.provider.MethodSource;
import org.junit.jupiter.params.provider.ValueSource;

import java.io.ByteArrayOutputStream;
import java.io.DataOutputStream;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.nio.charset.StandardCharsets;
Expand Down Expand Up @@ -843,6 +845,61 @@ public void testReadHFileCompatibility(String hfilePrefix) throws IOException {
verifyHFileReadCompatibility(bootstrapIndexFile, 4, Option.empty());
}

/**
* Validates {@link HFileDataBlock#seekTo} when a data block holds many entries, which is the
* case the optimization targets: the scan compares each entry key directly against the lookup
* key on the backing buffer instead of materializing a {@link KeyValue}/{@link Key} per entry.
*
* <p>The HFile is written with a small block size so that several entries land in each data
* block (and the file spans multiple blocks). Keys/values are zero-padded to a constant width,
* so the packing is deterministic (8 entries per block at this block size); the lookups below are
* chosen to land in the middle of a block so the scan must iterate past earlier entries before
* the comparison resolves. This exercises both the buffer-direct comparison / {@code readInt}
* based increment and the deferred-cursor path (the previous key-value is no longer cached for
* scanned-past entries, so an in-range result resolves the key lazily via {@code getKeyValue}).
*/
@Test
public void testSeekToScanWithinMultiEntryBlocks() throws IOException {
int numEntries = 64;
// 59 bytes per entry (18-byte key + 20-byte value + 21 extra bytes), so a 512-byte block
// holds 8 entries and the 64 entries span 8 data blocks.
HFileContext context = HFileContext.builder().blockSize(512).build();
ByteArrayOutputStream baos = new ByteArrayOutputStream();
try (DataOutputStream outputStream = new DataOutputStream(baos);
HFileWriter writer = new HFileWriterImpl(context, outputStream)) {
for (int i = 0; i < numEntries; i++) {
writer.append(KEY_CREATOR.apply(i), VALUE_CREATOR.apply(i).getBytes(StandardCharsets.UTF_8));
}
}
byte[] content = baos.toByteArray();

List<KeyLookUpInfo> keyLookUpInfoList = Arrays.asList(
// Lookup smaller than the first key: cursor sits before the file's first key.
new KeyLookUpInfo("", SEEK_TO_BEFORE_FILE_FIRST_KEY, KEY_CREATOR.apply(0), VALUE_CREATOR.apply(0)),
// Exact match on the first entry of the first block (match on the first comparison).
new KeyLookUpInfo(KEY_CREATOR.apply(0), SEEK_TO_FOUND, KEY_CREATOR.apply(0), VALUE_CREATOR.apply(0)),
// Exact match on the last entry of a block: the scan walks past the earlier 7 entries.
new KeyLookUpInfo(KEY_CREATOR.apply(7), SEEK_TO_FOUND, KEY_CREATOR.apply(7), VALUE_CREATOR.apply(7)),
// Exact match mid-block.
new KeyLookUpInfo(KEY_CREATOR.apply(25), SEEK_TO_FOUND, KEY_CREATOR.apply(25), VALUE_CREATOR.apply(25)),
// Lookup strictly between two adjacent mid-block keys: in range, cursor resolves to the
// lower key via the deferred read (the scanned-past key-value is not cached).
new KeyLookUpInfo(KEY_CREATOR.apply(30) + "a", SEEK_TO_IN_RANGE, KEY_CREATOR.apply(30), VALUE_CREATOR.apply(30)),
// Exact match in a later block.
new KeyLookUpInfo(KEY_CREATOR.apply(50), SEEK_TO_FOUND, KEY_CREATOR.apply(50), VALUE_CREATOR.apply(50)),
// Exact match on the very last entry of the file.
new KeyLookUpInfo(KEY_CREATOR.apply(numEntries - 1), SEEK_TO_FOUND,
KEY_CREATOR.apply(numEntries - 1), VALUE_CREATOR.apply(numEntries - 1)),
// Lookup greater than the last key: end of file.
new KeyLookUpInfo(KEY_CREATOR.apply(numEntries - 1) + "a", SEEK_TO_EOF, "", ""));

try (HFileReader reader = new HFileReaderImpl(
new ByteArraySeekableDataInputStream(new ByteBufferBackedInputStream(content)), content.length)) {
reader.initializeMetadata();
verifyHFileSeekToReads(reader, keyLookUpInfoList);
}
}

public static byte[] readHFileFromResources(String filename) throws IOException {
long size = TestHFileReader.class
.getResource(filename).openConnection().getContentLength();
Expand Down
Loading