diff --git a/internal-api/src/jmh/java/datadog/trace/util/HashtableD1Benchmark.java b/internal-api/src/jmh/java/datadog/trace/util/HashtableD1Benchmark.java new file mode 100644 index 00000000000..f8ba7177e88 --- /dev/null +++ b/internal-api/src/jmh/java/datadog/trace/util/HashtableD1Benchmark.java @@ -0,0 +1,169 @@ +package datadog.trace.util; + +import static java.util.concurrent.TimeUnit.MICROSECONDS; + +import java.util.HashMap; +import java.util.Map; +import java.util.function.Consumer; +import org.openjdk.jmh.annotations.Benchmark; +import org.openjdk.jmh.annotations.BenchmarkMode; +import org.openjdk.jmh.annotations.Fork; +import org.openjdk.jmh.annotations.Level; +import org.openjdk.jmh.annotations.Measurement; +import org.openjdk.jmh.annotations.Mode; +import org.openjdk.jmh.annotations.OperationsPerInvocation; +import org.openjdk.jmh.annotations.OutputTimeUnit; +import org.openjdk.jmh.annotations.Scope; +import org.openjdk.jmh.annotations.Setup; +import org.openjdk.jmh.annotations.State; +import org.openjdk.jmh.annotations.Threads; +import org.openjdk.jmh.annotations.Warmup; +import org.openjdk.jmh.infra.Blackhole; + +/** + * Compares {@link Hashtable.D1} against equivalent {@link HashMap} usage for add, update, and + * iterate operations. + * + *

Each benchmark thread owns its own map ({@link Scope#Thread}), but a non-trivial thread count + * is used so allocation/GC pressure surfaces in the throughput numbers — that pressure is the main + * thing Hashtable is built to avoid. + * + *

+ * + *

Update is where Hashtable dominates: D1 is ~14x faster, because the HashMap path + * allocates per call (a {@code Long}) and the resulting GC pressure throttles throughput under + * multiple threads. Add is roughly comparable (both allocate one entry per insert). + * Iterate is essentially a wash — both are bucket walks. + * MacBook M1 8 threads (Java 8) + * + * Benchmark Mode Cnt Score Error Units + * HashtableD1Benchmark.add_hashMap thrpt 6 187.883 ± 189.858 ops/us + * HashtableD1Benchmark.add_hashtable thrpt 6 198.710 ± 273.035 ops/us + * + * HashtableD1Benchmark.update_hashMap thrpt 6 127.392 ± 87.482 ops/us + * HashtableD1Benchmark.update_hashtable thrpt 6 1810.244 ± 44.645 ops/us + * + * HashtableD1Benchmark.iterate_hashMap thrpt 6 20.043 ± 0.752 ops/us + * HashtableD1Benchmark.iterate_hashtable thrpt 6 22.208 ± 0.956 ops/us + * + */ +@Fork(2) +@Warmup(iterations = 2) +@Measurement(iterations = 3) +@BenchmarkMode(Mode.Throughput) +@OutputTimeUnit(MICROSECONDS) +@Threads(8) +public class HashtableD1Benchmark { + + static final int N_KEYS = 64; + static final int CAPACITY = 128; + + static final String[] SOURCE_KEYS = new String[N_KEYS]; + + static { + for (int i = 0; i < N_KEYS; ++i) { + SOURCE_KEYS[i] = "key-" + i; + } + } + + static final class D1Counter extends Hashtable.D1.Entry { + long count; + + D1Counter(String key) { + super(key); + } + } + + /** Reusable iteration consumer — avoids per-call lambda capture allocation. */ + static final class BhD1Consumer implements Consumer { + Blackhole bh; + + @Override + public void accept(D1Counter e) { + bh.consume(e.key); + bh.consume(e.count); + } + } + + @State(Scope.Thread) + public static class D1State { + Hashtable.D1 table; + HashMap hashMap; + String[] keys; + int cursor; + final BhD1Consumer consumer = new BhD1Consumer(); + + @Setup(Level.Iteration) + public void setUp() { + table = new Hashtable.D1<>(CAPACITY); + hashMap = new HashMap<>(CAPACITY); + keys = SOURCE_KEYS; + for (int i = 0; i < N_KEYS; ++i) { + table.insert(new D1Counter(keys[i])); + hashMap.put(keys[i], 0L); + } + cursor = 0; + } + + String nextKey() { + int i = cursor; + cursor = (i + 1) & (N_KEYS - 1); + return keys[i]; + } + } + + @Benchmark + @OperationsPerInvocation(N_KEYS) + public void add_hashtable(D1State s) { + Hashtable.D1 t = s.table; + String[] keys = s.keys; + t.clear(); + for (int i = 0; i < N_KEYS; ++i) { + t.insert(new D1Counter(keys[i])); + } + } + + @Benchmark + @OperationsPerInvocation(N_KEYS) + public void add_hashMap(D1State s) { + HashMap m = s.hashMap; + String[] keys = s.keys; + m.clear(); + for (int i = 0; i < N_KEYS; ++i) { + m.put(keys[i], (long) i); + } + } + + @Benchmark + public long update_hashtable(D1State s) { + D1Counter e = s.table.get(s.nextKey()); + return ++e.count; + } + + @Benchmark + public Long update_hashMap(D1State s) { + return s.hashMap.merge(s.nextKey(), 1L, Long::sum); + } + + @Benchmark + public void iterate_hashtable(D1State s, Blackhole bh) { + s.consumer.bh = bh; + s.table.forEach(s.consumer); + } + + @Benchmark + public void iterate_hashMap(D1State s, Blackhole bh) { + for (Map.Entry entry : s.hashMap.entrySet()) { + bh.consume(entry.getKey()); + bh.consume(entry.getValue()); + } + } +} diff --git a/internal-api/src/jmh/java/datadog/trace/util/HashtableD2Benchmark.java b/internal-api/src/jmh/java/datadog/trace/util/HashtableD2Benchmark.java new file mode 100644 index 00000000000..6f46a702005 --- /dev/null +++ b/internal-api/src/jmh/java/datadog/trace/util/HashtableD2Benchmark.java @@ -0,0 +1,209 @@ +package datadog.trace.util; + +import static java.util.concurrent.TimeUnit.MICROSECONDS; + +import java.util.HashMap; +import java.util.Map; +import java.util.Objects; +import java.util.function.Consumer; +import org.openjdk.jmh.annotations.Benchmark; +import org.openjdk.jmh.annotations.BenchmarkMode; +import org.openjdk.jmh.annotations.Fork; +import org.openjdk.jmh.annotations.Level; +import org.openjdk.jmh.annotations.Measurement; +import org.openjdk.jmh.annotations.Mode; +import org.openjdk.jmh.annotations.OperationsPerInvocation; +import org.openjdk.jmh.annotations.OutputTimeUnit; +import org.openjdk.jmh.annotations.Scope; +import org.openjdk.jmh.annotations.Setup; +import org.openjdk.jmh.annotations.State; +import org.openjdk.jmh.annotations.Threads; +import org.openjdk.jmh.annotations.Warmup; +import org.openjdk.jmh.infra.Blackhole; + +/** + * Compares {@link Hashtable.D2} against equivalent {@link HashMap} usage for add, update, and + * iterate operations. + * + *

Each benchmark thread owns its own map ({@link Scope#Thread}), but a non-trivial thread count + * is used so allocation/GC pressure surfaces in the throughput numbers — that pressure is the main + * thing Hashtable is built to avoid. + * + *

+ * + *

The D2 variants additionally pay for a composite-key wrapper allocation in the HashMap path + * (Java has no built-in tuple-as-key) — D2 sidesteps it by taking both key parts directly. + * + *

Update is where Hashtable dominates: D2 is ~26x faster, because the HashMap path + * allocates per call (a {@code Long}, plus a {@code Key2}) and the resulting GC pressure throttles + * throughput under multiple threads. Add is ~3x faster for D2 (Hashtable sidesteps the + * {@code Key2} allocation). Iterate is essentially a wash — both are bucket walks. + * MacBook M1 8 threads (Java 8) + * + * Benchmark Mode Cnt Score Error Units + * HashtableD2Benchmark.add_hashMap thrpt 6 77.082 ± 72.278 ops/us + * HashtableD2Benchmark.add_hashtable thrpt 6 216.813 ± 413.236 ops/us + * + * HashtableD2Benchmark.update_hashMap thrpt 6 56.077 ± 23.716 ops/us + * HashtableD2Benchmark.update_hashtable thrpt 6 1445.868 ± 157.705 ops/us + * + * HashtableD2Benchmark.iterate_hashMap thrpt 6 19.508 ± 0.760 ops/us + * HashtableD2Benchmark.iterate_hashtable thrpt 6 16.968 ± 0.371 ops/us + * + */ +@Fork(2) +@Warmup(iterations = 2) +@Measurement(iterations = 3) +@BenchmarkMode(Mode.Throughput) +@OutputTimeUnit(MICROSECONDS) +@Threads(8) +public class HashtableD2Benchmark { + + static final int N_KEYS = 64; + static final int CAPACITY = 128; + + static final String[] SOURCE_K1 = new String[N_KEYS]; + static final Integer[] SOURCE_K2 = new Integer[N_KEYS]; + + static { + for (int i = 0; i < N_KEYS; ++i) { + SOURCE_K1[i] = "key-" + i; + SOURCE_K2[i] = i * 31 + 17; + } + } + + static final class D2Counter extends Hashtable.D2.Entry { + long count; + + D2Counter(String k1, Integer k2) { + super(k1, k2); + } + } + + /** Composite key for the HashMap baseline against D2. */ + static final class Key2 { + final String k1; + final Integer k2; + final int hash; + + Key2(String k1, Integer k2) { + this.k1 = k1; + this.k2 = k2; + this.hash = Objects.hash(k1, k2); + } + + @Override + public boolean equals(Object o) { + if (!(o instanceof Key2)) { + return false; + } + Key2 other = (Key2) o; + return Objects.equals(k1, other.k1) && Objects.equals(k2, other.k2); + } + + @Override + public int hashCode() { + return hash; + } + } + + /** Reusable iteration consumer — avoids per-call lambda capture allocation. */ + static final class BhD2Consumer implements Consumer { + Blackhole bh; + + @Override + public void accept(D2Counter e) { + bh.consume(e.key1); + bh.consume(e.key2); + bh.consume(e.count); + } + } + + @State(Scope.Thread) + public static class D2State { + Hashtable.D2 table; + HashMap hashMap; + String[] k1s; + Integer[] k2s; + int cursor; + final BhD2Consumer consumer = new BhD2Consumer(); + + @Setup(Level.Iteration) + public void setUp() { + table = new Hashtable.D2<>(CAPACITY); + hashMap = new HashMap<>(CAPACITY); + k1s = SOURCE_K1; + k2s = SOURCE_K2; + for (int i = 0; i < N_KEYS; ++i) { + table.insert(new D2Counter(k1s[i], k2s[i])); + hashMap.put(new Key2(k1s[i], k2s[i]), 0L); + } + cursor = 0; + } + + int nextIndex() { + int i = cursor; + cursor = (i + 1) & (N_KEYS - 1); + return i; + } + } + + @Benchmark + @OperationsPerInvocation(N_KEYS) + public void add_hashtable(D2State s) { + Hashtable.D2 t = s.table; + String[] k1s = s.k1s; + Integer[] k2s = s.k2s; + t.clear(); + for (int i = 0; i < N_KEYS; ++i) { + t.insert(new D2Counter(k1s[i], k2s[i])); + } + } + + @Benchmark + @OperationsPerInvocation(N_KEYS) + public void add_hashMap(D2State s) { + HashMap m = s.hashMap; + String[] k1s = s.k1s; + Integer[] k2s = s.k2s; + m.clear(); + for (int i = 0; i < N_KEYS; ++i) { + m.put(new Key2(k1s[i], k2s[i]), (long) i); + } + } + + @Benchmark + public long update_hashtable(D2State s) { + int i = s.nextIndex(); + D2Counter e = s.table.get(s.k1s[i], s.k2s[i]); + return ++e.count; + } + + @Benchmark + public Long update_hashMap(D2State s) { + int i = s.nextIndex(); + return s.hashMap.merge(new Key2(s.k1s[i], s.k2s[i]), 1L, Long::sum); + } + + @Benchmark + public void iterate_hashtable(D2State s, Blackhole bh) { + s.consumer.bh = bh; + s.table.forEach(s.consumer); + } + + @Benchmark + public void iterate_hashMap(D2State s, Blackhole bh) { + for (Map.Entry entry : s.hashMap.entrySet()) { + bh.consume(entry.getKey()); + bh.consume(entry.getValue()); + } + } +} diff --git a/internal-api/src/main/java/datadog/trace/util/HashingUtils.java b/internal-api/src/main/java/datadog/trace/util/HashingUtils.java index 1522554836a..d975149f433 100644 --- a/internal-api/src/main/java/datadog/trace/util/HashingUtils.java +++ b/internal-api/src/main/java/datadog/trace/util/HashingUtils.java @@ -79,7 +79,7 @@ public static final int hash(int hash0, int hash1, int hash2, int hash3) { } public static final int hash(Object obj0, Object obj1, Object obj2, Object obj3, Object obj4) { - return hash(hashCode(obj0), hashCode(obj1), hashCode(obj2), hashCode(obj3)); + return hash(hashCode(obj0), hashCode(obj1), hashCode(obj2), hashCode(obj3), hashCode(obj4)); } public static final int hash(int hash0, int hash1, int hash2, int hash3, int hash4) { diff --git a/internal-api/src/main/java/datadog/trace/util/Hashtable.java b/internal-api/src/main/java/datadog/trace/util/Hashtable.java new file mode 100644 index 00000000000..8f40e4609bc --- /dev/null +++ b/internal-api/src/main/java/datadog/trace/util/Hashtable.java @@ -0,0 +1,862 @@ +package datadog.trace.util; + +import java.util.Arrays; +import java.util.Iterator; +import java.util.NoSuchElementException; +import java.util.Objects; +import java.util.function.BiConsumer; +import java.util.function.BiFunction; +import java.util.function.Consumer; +import java.util.function.Function; + +/** + * Light weight simple Hashtable system that can be useful when HashMap would be unnecessarily + * heavy. + * + *

+ * + * Convenience classes are provided for lower key dimensions. + * + *

For higher key dimensions, client code must implement its own class, but can still use the + * support class to ease the implementation complexity. + * + *

This outer class is a pure namespace -- it can't be instantiated. The actual table types are + * {@link D1}, {@link D2}, and (for higher-arity callers) {@link Support}-driven custom tables. + */ +public final class Hashtable { + private Hashtable() {} + + /** + * Internal base class for entries. Stores the precomputed 64-bit keyHash and the chain-next + * pointer used to link colliding entries within a single bucket. + * + *

Subclasses add the actual key field(s) and a {@code matches(...)} method tailored to their + * key arity. See {@link D1.Entry} and {@link D2.Entry}; for higher arities, client code can + * subclass this directly and use {@link Support} to drive the table mechanics. + */ + public abstract static class Entry { + public final long keyHash; + private Entry next = null; + + protected Entry(long keyHash) { + this.keyHash = keyHash; + } + + public final void setNext(TEntry next) { + this.next = next; + } + + @SuppressWarnings("unchecked") + public final TEntry next() { + return (TEntry) this.next; + } + } + + /** + * Single-key open hash table with chaining. + * + *

The user supplies an {@link D1.Entry} subclass that carries the key and whatever value + * fields they want to mutate in place, then instantiates this class over that entry type. The + * main advantage over {@code HashMap} is that mutating an existing entry's value fields + * requires no allocation: call {@link #get} once and write directly to the returned entry's + * fields. For counter-style workloads this can be several times faster than {@code HashMap} and produces effectively zero GC pressure. + * + *

Capacity is fixed at construction. The table does not resize, so the caller is responsible + * for choosing a capacity appropriate to the working set. Actual bucket-array length is rounded + * up to the next power of two. + * + *

Null keys are permitted; they collapse to a single bucket via the sentinel hash {@link + * Long#MIN_VALUE} defined in {@link D1.Entry#hash}. + * + *

Not thread-safe. Concurrent access (including mixing reads with writes) requires + * external synchronization. + * + * @param the key type + * @param the user's {@link D1.Entry D1.Entry<K>} subclass + */ + public static final class D1> { + /** + * Abstract base for {@link D1} entries. Subclass to add value fields you wish to mutate in + * place after retrieving the entry via {@link D1#get}. + * + *

The key is captured at construction and stored alongside its precomputed 64-bit hash. + * {@link #matches(Object)} uses {@link Objects#equals} by default; override if a different + * equality semantics is needed (e.g. reference equality for interned keys). + * + * @param the key type + */ + public abstract static class Entry extends Hashtable.Entry { + final K key; + + protected Entry(K key) { + super(hash(key)); + this.key = key; + } + + public boolean matches(Object key) { + return Objects.equals(this.key, key); + } + + /** + * Returns the 64-bit lookup hash for {@code key}. Null keys map to {@link Long#MIN_VALUE} so + * that they don't collide with a real key that hashes to 0 (e.g. {@code + * Integer.hashCode(0)}). The {@code Long.MIN_VALUE} sentinel is safe against any {@code + * int}-valued {@code hashCode()} since those widen to a long in the range {@code + * [Integer.MIN_VALUE, Integer.MAX_VALUE]}; real-key collisions in chains are resolved by + * {@link #matches(Object)}. + */ + public static long hash(Object key) { + return (key == null) ? Long.MIN_VALUE : key.hashCode(); + } + } + + // Package-private so iterator tests in the same package can drive Support.bucketIterator and + // friends directly against the table's bucket array. + final Hashtable.Entry[] buckets; + private int size; + + public D1(int capacity) { + this.buckets = Support.create(capacity); + this.size = 0; + } + + public int size() { + return this.size; + } + + public TEntry get(K key) { + long keyHash = D1.Entry.hash(key); + for (TEntry te = Support.bucket(this.buckets, keyHash); te != null; te = te.next()) { + if (te.keyHash == keyHash && te.matches(key)) { + return te; + } + } + return null; + } + + public TEntry remove(K key) { + long keyHash = D1.Entry.hash(key); + + for (MutatingBucketIterator iter = + Support.mutatingBucketIterator(this.buckets, keyHash); + iter.hasNext(); ) { + TEntry curEntry = iter.next(); + + if (curEntry.matches(key)) { + iter.remove(); + this.size -= 1; + return curEntry; + } + } + + return null; + } + + public void insert(TEntry newEntry) { + Support.insertHeadEntry(this.buckets, newEntry.keyHash, newEntry); + this.size += 1; + } + + public TEntry insertOrReplace(TEntry newEntry) { + for (MutatingBucketIterator iter = + Support.mutatingBucketIterator(this.buckets, newEntry.keyHash); + iter.hasNext(); ) { + TEntry curEntry = iter.next(); + + if (curEntry.matches(newEntry.key)) { + iter.replace(newEntry); + return curEntry; + } + } + + Support.insertHeadEntry(this.buckets, newEntry.keyHash, newEntry); + this.size += 1; + return null; + } + + /** + * Returns the entry for {@code key}, building one via {@code creator} if absent. Computes the + * hash once and reuses it for both the lookup and (on miss) the insert -- avoids the + * double-hash that "{@code get}; if null then {@code insert}" would incur. + * + *

The {@code creator} is expected to build an entry whose {@code keyHash} equals {@link + * Entry#hash(Object) D1.Entry.hash(key)} -- typically by passing {@code key} to a constructor + * that calls {@code super(key)}. A mismatched hash will leave the new entry inserted at a + * bucket that future {@link #get} calls won't probe. + */ + public TEntry getOrCreate(K key, Function creator) { + long keyHash = D1.Entry.hash(key); + for (TEntry te = Support.bucket(this.buckets, keyHash); te != null; te = te.next()) { + if (te.keyHash == keyHash && te.matches(key)) { + return te; + } + } + TEntry newEntry = creator.apply(key); + Support.insertHeadEntry(this.buckets, newEntry.keyHash, newEntry); + this.size += 1; + return newEntry; + } + + public void clear() { + Support.clear(this.buckets); + this.size = 0; + } + + public void forEach(Consumer consumer) { + Support.forEach(this.buckets, consumer); + } + + /** + * Context-passing forEach. Useful for callers that want to avoid a capturing-lambda allocation + * -- pass a non-capturing {@link BiConsumer} (typically a {@code static final}) plus whatever + * side-band state it needs as {@code context}. + */ + public void forEach(T context, BiConsumer consumer) { + Support.forEach(this.buckets, context, consumer); + } + } + + /** + * Two-key (composite-key) hash table with chaining. + * + *

The user supplies a {@link D2.Entry} subclass carrying both key parts and any value fields. + * Compared to {@code HashMap} this avoids the per-lookup {@code Pair} (or record) + * allocation: both key parts are passed directly through {@link #get}, {@link #remove}, {@link + * #insert}, and {@link #insertOrReplace}. Combined with in-place value mutation, this makes + * {@code D2} substantially less GC-intensive than the equivalent {@code HashMap} for + * counter-style workloads. + * + *

Capacity is fixed at construction; the table does not resize. Actual bucket-array length is + * rounded up to the next power of two. + * + *

Key parts are combined into a 64-bit hash via {@link LongHashingUtils}; see {@link + * D2.Entry#hash(Object, Object)}. + * + *

Not thread-safe. + * + * @param first key type + * @param second key type + * @param the user's {@link D2.Entry D2.Entry<K1, K2>} subclass + */ + public static final class D2> { + /** + * Abstract base for {@link D2} entries. Subclass to add value fields you wish to mutate in + * place. + * + *

Both key parts are captured at construction and stored alongside their combined 64-bit + * hash. {@link #matches(Object, Object)} uses {@link Objects#equals} pairwise on the two parts. + * + * @param first key type + * @param second key type + */ + public abstract static class Entry extends Hashtable.Entry { + final K1 key1; + final K2 key2; + + protected Entry(K1 key1, K2 key2) { + super(hash(key1, key2)); + this.key1 = key1; + this.key2 = key2; + } + + public boolean matches(K1 key1, K2 key2) { + return Objects.equals(this.key1, key1) && Objects.equals(this.key2, key2); + } + + /** + * Returns the 64-bit lookup hash combining both key parts via {@link + * LongHashingUtils#hash(Object, Object)}. Null parts contribute {@code 0} (not a sentinel, + * unlike {@link D1.Entry#hash(Object)}): the combined hash can collide with real-key + * combinations whose chained hash equals {@code hash(0, 0) = 0} or similar values. {@link + * #matches(Object, Object)} resolves any such collision. + */ + public static long hash(Object key1, Object key2) { + return LongHashingUtils.hash(key1, key2); + } + } + + // Package-private to match D1.buckets -- available for iterator tests in the same package. + final Hashtable.Entry[] buckets; + private int size; + + public D2(int capacity) { + this.buckets = Support.create(capacity); + this.size = 0; + } + + public int size() { + return this.size; + } + + public TEntry get(K1 key1, K2 key2) { + long keyHash = D2.Entry.hash(key1, key2); + for (TEntry te = Support.bucket(this.buckets, keyHash); te != null; te = te.next()) { + if (te.keyHash == keyHash && te.matches(key1, key2)) { + return te; + } + } + return null; + } + + public TEntry remove(K1 key1, K2 key2) { + long keyHash = D2.Entry.hash(key1, key2); + + for (MutatingBucketIterator iter = + Support.mutatingBucketIterator(this.buckets, keyHash); + iter.hasNext(); ) { + TEntry curEntry = iter.next(); + + if (curEntry.matches(key1, key2)) { + iter.remove(); + this.size -= 1; + return curEntry; + } + } + + return null; + } + + public void insert(TEntry newEntry) { + Support.insertHeadEntry(this.buckets, newEntry.keyHash, newEntry); + this.size += 1; + } + + public TEntry insertOrReplace(TEntry newEntry) { + for (MutatingBucketIterator iter = + Support.mutatingBucketIterator(this.buckets, newEntry.keyHash); + iter.hasNext(); ) { + TEntry curEntry = iter.next(); + + if (curEntry.matches(newEntry.key1, newEntry.key2)) { + iter.replace(newEntry); + return curEntry; + } + } + + Support.insertHeadEntry(this.buckets, newEntry.keyHash, newEntry); + this.size += 1; + return null; + } + + /** + * Two-key analogue of {@link D1#getOrCreate}. Computes the combined hash once and reuses it for + * both lookup and (on miss) insert. The {@code creator} is expected to build an entry whose + * {@code keyHash} equals {@link Entry#hash(Object, Object) D2.Entry.hash(key1, key2)}. + */ + public TEntry getOrCreate( + K1 key1, K2 key2, BiFunction creator) { + long keyHash = D2.Entry.hash(key1, key2); + for (TEntry te = Support.bucket(this.buckets, keyHash); te != null; te = te.next()) { + if (te.keyHash == keyHash && te.matches(key1, key2)) { + return te; + } + } + TEntry newEntry = creator.apply(key1, key2); + Support.insertHeadEntry(this.buckets, newEntry.keyHash, newEntry); + this.size += 1; + return newEntry; + } + + public void clear() { + Support.clear(this.buckets); + this.size = 0; + } + + public void forEach(Consumer consumer) { + Support.forEach(this.buckets, consumer); + } + + /** + * Context-passing forEach. Useful for callers that want to avoid a capturing-lambda allocation + * -- pass a non-capturing {@link BiConsumer} (typically a {@code static final}) plus whatever + * side-band state it needs as {@code context}. + */ + public void forEach(T context, BiConsumer consumer) { + Support.forEach(this.buckets, context, consumer); + } + } + + /** + * Building blocks for hash-table operations. + * + *

Used by {@link D1} and {@link D2}, and available to callers that want to assemble their own + * higher-arity table (3+ key parts) without re-implementing the bucket-array mechanics. The + * typical recipe: + * + *

+ * + *

All bucket arrays produced by {@code create} have a power-of-two length, so {@link + * #bucketIndex(Object[], long)} can use a bit mask. + */ + public static final class Support { + /** + * Allocates a bucket array sized to hold {@code requestedSize} entries. Returned length is + * {@code requestedSize} rounded up to the next power of two (capped at {@link #MAX_BUCKETS}). + */ + public static final Hashtable.Entry[] create(int requestedSize) { + return new Entry[sizeFor(requestedSize)]; + } + + /** + * Variant of {@link #create(int)} that scales the requested working-set size before sizing the + * bucket array. Pair with {@link #MAX_RATIO} to leave headroom over the working set for a + * desired load factor; the canonical call is {@code create(n, MAX_RATIO)}. + * + *

The scaled size is truncated to {@code int} before going through {@link #sizeFor(int)}. + * Truncation rather than {@code ceil} is intentional: {@code sizeFor} rounds up to the next + * power of two anyway, so the fractional part would only matter when float fuzz pushes the + * result across a power-of-two boundary -- {@code ceil} would then double the array size for no + * reason (e.g. {@code 12 * 4/3 = 16.0...0005f -> ceil 17 -> sizeFor 32}). + */ + public static final Hashtable.Entry[] create(int requestedSize, float scale) { + return new Entry[sizeFor((int) (requestedSize * scale))]; + } + + /** Upper bound on the bucket array length returned by {@link #sizeFor(int)}. */ + static final int MAX_BUCKETS = 1 << 30; + + /** + * Inverse of a 75% load factor. Callers that size their bucket array from a target working-set + * size {@code n} should pass {@code create(n, MAX_RATIO)} to leave ~25% headroom in the array. + */ + public static final float MAX_RATIO = 4.0f / 3.0f; + + /** + * Rounds {@code requestedSize} up to the next power of two, capped at {@link #MAX_BUCKETS}. + * Throws {@link IllegalArgumentException} for negative inputs or inputs above the cap. Returns + * the bucket-array length to allocate. + */ + static final int sizeFor(int requestedSize) { + if (requestedSize < 0) { + throw new IllegalArgumentException("requestedSize must be non-negative: " + requestedSize); + } + if (requestedSize > MAX_BUCKETS) { + throw new IllegalArgumentException( + "requestedSize exceeds maximum bucket count (" + MAX_BUCKETS + "): " + requestedSize); + } + if (requestedSize <= 1) { + return 1; + } + return Integer.highestOneBit(requestedSize - 1) << 1; + } + + public static final void clear(Hashtable.Entry[] buckets) { + Arrays.fill(buckets, null); + } + + public static final BucketIterator bucketIterator( + Hashtable.Entry[] buckets, long keyHash) { + return new BucketIterator(buckets, keyHash); + } + + public static final + MutatingBucketIterator mutatingBucketIterator( + Hashtable.Entry[] buckets, long keyHash) { + return new MutatingBucketIterator(buckets, keyHash); + } + + /** + * Returns a {@link MutatingTableIterator} over every entry in {@code buckets}. Useful for + * sweeps -- eviction, expunge -- that aren't keyed to a specific hash. + */ + public static final + MutatingTableIterator mutatingTableIterator(Hashtable.Entry[] buckets) { + return new MutatingTableIterator(buckets); + } + + public static final int bucketIndex(Object[] buckets, long keyHash) { + return (int) (keyHash & buckets.length - 1); + } + + /** + * Splices {@code entry} in as the new head of the chain at {@code bucketIndex}. Caller is + * responsible for size accounting -- this method only touches the chain pointers. + */ + public static final void insertHeadEntry( + Hashtable.Entry[] buckets, int bucketIndex, Hashtable.Entry entry) { + entry.setNext(buckets[bucketIndex]); + buckets[bucketIndex] = entry; + } + + /** + * Convenience overload of {@link #insertHeadEntry(Hashtable.Entry[], int, Hashtable.Entry)} + * that derives the bucket index from {@code keyHash}. Use this when the caller has the hash but + * not the index; if the index has already been computed for another reason, prefer the + * int-taking overload to avoid the redundant mask. + */ + public static final void insertHeadEntry( + Hashtable.Entry[] buckets, long keyHash, Hashtable.Entry entry) { + insertHeadEntry(buckets, bucketIndex(buckets, keyHash), entry); + } + + /** + * Returns the head entry of the bucket that {@code keyHash} maps to, cast to the caller's + * concrete entry type. The unchecked cast lives here so the chain-walk loop at the call site + * doesn't need to thread a raw {@link Entry} variable through. + */ + @SuppressWarnings("unchecked") + public static final TEntry bucket( + Hashtable.Entry[] buckets, long keyHash) { + return (TEntry) buckets[bucketIndex(buckets, keyHash)]; + } + + /** + * Walks every entry in {@code buckets} and invokes {@code consumer} on it. The unchecked cast + * to {@code TEntry} lives here (mirroring {@link Entry#next()}) so callers don't have to + * sprinkle it across their own forEach loops. + */ + @SuppressWarnings("unchecked") + public static final void forEach( + Hashtable.Entry[] buckets, Consumer consumer) { + for (int i = 0; i < buckets.length; i++) { + for (Hashtable.Entry e = buckets[i]; e != null; e = e.next()) { + consumer.accept((TEntry) e); + } + } + } + + /** + * Context-passing variant of {@link #forEach(Hashtable.Entry[], Consumer)}. Pair a + * non-capturing {@link BiConsumer} (typically a {@code static final}) with side-band state + * passed as {@code context} to avoid a fresh-Consumer allocation each call. + */ + @SuppressWarnings("unchecked") + public static final void forEach( + Hashtable.Entry[] buckets, T context, BiConsumer consumer) { + for (int i = 0; i < buckets.length; i++) { + for (Hashtable.Entry e = buckets[i]; e != null; e = e.next()) { + consumer.accept(context, (TEntry) e); + } + } + } + } + + /** + * Read-only iterator over entries in a single bucket whose {@code keyHash} matches a specific + * search hash. Cheaper than {@link MutatingBucketIterator} because it does not track the + * previous-node pointers required for splicing — use it when you only need to walk the chain. + * + *

For {@code remove} or {@code replace} operations, use {@link MutatingBucketIterator} + * instead. + * + *

The chain-walk work to find the next-match entry happens in {@link #next()} (and in the + * constructor for the first match); {@link #hasNext()} is an O(1) field read. + */ + public static final class BucketIterator implements Iterator { + private final long keyHash; + private Hashtable.Entry nextEntry; + + BucketIterator(Hashtable.Entry[] buckets, long keyHash) { + this.keyHash = keyHash; + Hashtable.Entry cur = buckets[Support.bucketIndex(buckets, keyHash)]; + while (cur != null && cur.keyHash != keyHash) { + cur = cur.next(); + } + this.nextEntry = cur; + } + + @Override + public boolean hasNext() { + return this.nextEntry != null; + } + + @Override + @SuppressWarnings("unchecked") + public TEntry next() { + Hashtable.Entry cur = this.nextEntry; + if (cur == null) { + throw new NoSuchElementException("no next!"); + } + + Hashtable.Entry advance = cur.next(); + while (advance != null && advance.keyHash != keyHash) { + advance = advance.next(); + } + this.nextEntry = advance; + + return (TEntry) cur; + } + } + + /** + * Mutating iterator over entries in a single bucket whose {@code keyHash} matches a specific + * search hash. Supports {@link #remove()} and {@link #replace(Entry)} to splice the chain in + * place. + * + *

Carries previous-node pointers for the current entry and the next-match entry so that {@code + * remove} and {@code replace} can fix up the chain in O(1) without re-walking from the bucket + * head. After {@code remove} or {@code replace}, iteration may continue with another {@link + * #next()}. + * + *

The chain-walk work to find the next-match entry happens in {@link #next()} (and in the + * constructor for the first match); {@link #hasNext()} is an O(1) field read. + */ + public static final class MutatingBucketIterator + implements Iterator { + private final long keyHash; + + private final Hashtable.Entry[] buckets; + + /** The entry prior to the last entry returned by next Used for mutating operations */ + private Hashtable.Entry curPrevEntry; + + /** The entry that was last returned by next */ + private Hashtable.Entry curEntry; + + /** The entry prior to the next entry */ + private Hashtable.Entry nextPrevEntry; + + /** The next entry to be returned by next */ + private Hashtable.Entry nextEntry; + + MutatingBucketIterator(Hashtable.Entry[] buckets, long keyHash) { + this.buckets = buckets; + this.keyHash = keyHash; + + int bucketIndex = Support.bucketIndex(buckets, keyHash); + Hashtable.Entry headEntry = this.buckets[bucketIndex]; + if (headEntry == null) { + this.nextEntry = null; + this.nextPrevEntry = null; + + this.curEntry = null; + this.curPrevEntry = null; + } else { + Hashtable.Entry prev, cur; + for (prev = null, cur = headEntry; cur != null; prev = cur, cur = cur.next()) { + if (cur.keyHash == keyHash) { + break; + } + } + this.nextPrevEntry = prev; + this.nextEntry = cur; + + this.curEntry = null; + this.curPrevEntry = null; + } + } + + @Override + public boolean hasNext() { + return (this.nextEntry != null); + } + + @Override + @SuppressWarnings("unchecked") + public TEntry next() { + Hashtable.Entry curEntry = this.nextEntry; + if (curEntry == null) { + throw new NoSuchElementException("no next!"); + } + + this.curEntry = curEntry; + this.curPrevEntry = this.nextPrevEntry; + + Hashtable.Entry prev, cur; + for (prev = this.nextEntry, cur = this.nextEntry.next(); + cur != null; + prev = cur, cur = prev.next()) { + if (cur.keyHash == keyHash) { + break; + } + } + this.nextPrevEntry = prev; + this.nextEntry = cur; + + return (TEntry) curEntry; + } + + @Override + public void remove() { + Hashtable.Entry oldCurEntry = this.curEntry; + if (oldCurEntry == null) { + throw new IllegalStateException(); + } + + Hashtable.Entry oldNext = oldCurEntry.next(); + this.setPrevNext(oldNext); + // Detach the removed entry from the chain so stale references can't traverse back into + // the live chain and so a now-unreachable tail can be reclaimed by GC. + oldCurEntry.setNext(null); + + // If the next match was directly after oldCurEntry, its predecessor is now + // curPrevEntry (oldCurEntry was just unlinked from the chain). + if (this.nextPrevEntry == oldCurEntry) { + this.nextPrevEntry = this.curPrevEntry; + } + this.curEntry = null; + } + + public void replace(TEntry replacementEntry) { + Hashtable.Entry oldCurEntry = this.curEntry; + if (oldCurEntry == null) { + throw new IllegalStateException(); + } + + Hashtable.Entry oldNext = oldCurEntry.next(); + replacementEntry.setNext(oldNext); + this.setPrevNext(replacementEntry); + // Detach the replaced entry from the chain; the replacement now owns the chain slot. + oldCurEntry.setNext(null); + + // If the next match was directly after oldCurEntry, its predecessor is now + // the replacement entry (which took oldCurEntry's chain slot). + if (this.nextPrevEntry == oldCurEntry) { + this.nextPrevEntry = replacementEntry; + } + this.curEntry = replacementEntry; + } + + void setPrevNext(Hashtable.Entry nextEntry) { + if (this.curPrevEntry == null) { + Hashtable.Entry[] buckets = this.buckets; + buckets[Support.bucketIndex(buckets, this.keyHash)] = nextEntry; + } else { + this.curPrevEntry.setNext(nextEntry); + } + } + } + + /** + * Mutating iterator over every entry in a bucket array, regardless of hash. Supports {@link + * #remove()} to unlink the entry last returned by {@link #next()}. + * + *

Walks buckets in array order; within a bucket, walks the chain head-to-tail. After {@code + * remove}, iteration may continue with another {@link #next()}. + * + *

Use this for sweeps -- eviction, expunge, full-table cleanup -- that aren't keyed to a + * specific hash. For per-bucket walks keyed to a search hash, use {@link MutatingBucketIterator}. + */ + public static final class MutatingTableIterator + implements Iterator { + private final Hashtable.Entry[] buckets; + + /** + * Index of the bucket holding {@link #nextEntry} (or holding {@link #curEntry} after remove). + */ + private int nextBucketIndex; + + /** + * Predecessor of {@link #nextEntry}, or {@code null} when {@code nextEntry} is the bucket head. + */ + private Hashtable.Entry nextPrevEntry; + + /** Next entry to be returned by {@link #next()}, or {@code null} if iteration is exhausted. */ + private Hashtable.Entry nextEntry; + + /** + * Bucket index that held the entry last returned by {@code next}; {@code -1} after {@code + * remove}. + */ + private int curBucketIndex = -1; + + /** + * Predecessor of the entry last returned by {@code next}, or {@code null} if it was the bucket + * head. + */ + private Hashtable.Entry curPrevEntry; + + /** + * Entry last returned by {@code next}; {@code null} before any call and after {@code remove}. + */ + private Hashtable.Entry curEntry; + + MutatingTableIterator(Hashtable.Entry[] buckets) { + this.buckets = buckets; + seekFromBucket(0); + } + + @Override + public boolean hasNext() { + return this.nextEntry != null; + } + + @Override + @SuppressWarnings("unchecked") + public TEntry next() { + Hashtable.Entry e = this.nextEntry; + if (e == null) { + throw new NoSuchElementException("no next!"); + } + + this.curEntry = e; + this.curPrevEntry = this.nextPrevEntry; + this.curBucketIndex = this.nextBucketIndex; + + Hashtable.Entry n = e.next(); + if (n != null) { + this.nextPrevEntry = e; + this.nextEntry = n; + } else { + // walked off the end of this bucket; pick up at the next non-empty bucket + seekFromBucket(this.nextBucketIndex + 1); + } + return (TEntry) e; + } + + @Override + public void remove() { + Hashtable.Entry oldCurEntry = this.curEntry; + if (oldCurEntry == null) { + throw new IllegalStateException(); + } + + Hashtable.Entry oldNext = oldCurEntry.next(); + if (this.curPrevEntry == null) { + this.buckets[this.curBucketIndex] = oldNext; + } else { + this.curPrevEntry.setNext(oldNext); + } + // Detach the removed entry from the chain so stale references can't traverse back into + // the live chain and so a now-unreachable tail can be reclaimed by GC. + oldCurEntry.setNext(null); + + // If the next entry was the immediate chain successor of oldCurEntry, its predecessor is + // now what came before oldCurEntry (oldCurEntry was just unlinked). + if (this.nextPrevEntry == oldCurEntry) { + this.nextPrevEntry = this.curPrevEntry; + } + this.curEntry = null; + } + + /** + * Advance {@code nextBucketIndex} / {@code nextEntry} to the first non-empty bucket >= {@code + * from}. + */ + private void seekFromBucket(int from) { + Hashtable.Entry[] thisBuckets = this.buckets; + for (int i = from; i < thisBuckets.length; i++) { + Hashtable.Entry head = thisBuckets[i]; + if (head != null) { + this.nextBucketIndex = i; + this.nextPrevEntry = null; + this.nextEntry = head; + return; + } + } + this.nextEntry = null; + this.nextPrevEntry = null; + } + } +} diff --git a/internal-api/src/main/java/datadog/trace/util/LongHashingUtils.java b/internal-api/src/main/java/datadog/trace/util/LongHashingUtils.java new file mode 100644 index 00000000000..88104baa8d8 --- /dev/null +++ b/internal-api/src/main/java/datadog/trace/util/LongHashingUtils.java @@ -0,0 +1,162 @@ +package datadog.trace.util; + +/** + * This class is intended to be a drop-in replacement for the hashing portions of java.util.Objects. + * This class provides more convenience methods for hashing primitives and includes overrides for + * hash that take many argument lengths to avoid var-args allocation. + */ +public final class LongHashingUtils { + private LongHashingUtils() {} + + public static final long hash(Object obj) { + return obj == null ? Long.MIN_VALUE : obj.hashCode(); + } + + public static final long hash(boolean value) { + return Boolean.hashCode(value); + } + + public static final long hash(char value) { + return Character.hashCode(value); + } + + public static final long hash(byte value) { + return Byte.hashCode(value); + } + + public static final long hash(short value) { + return Short.hashCode(value); + } + + public static final long hash(int value) { + return Integer.hashCode(value); + } + + public static final long hash(long value) { + return value; + } + + public static final long hash(float value) { + return Float.hashCode(value); + } + + public static final long hash(double value) { + return Double.doubleToRawLongBits(value); + } + + public static final long hash(Object obj0, Object obj1) { + return hash(intHash(obj0), intHash(obj1)); + } + + static final long hash(int hash0, int hash1) { + return 31L * hash0 + hash1; + } + + private static final int intHash(Object obj) { + return obj == null ? 0 : obj.hashCode(); + } + + public static final long hash(Object obj0, Object obj1, Object obj2) { + return hash(intHash(obj0), intHash(obj1), intHash(obj2)); + } + + static final long hash(int hash0, int hash1, int hash2) { + // DQH - Micro-optimizing, 31L * 31L will constant fold + // Since there are multiple execution ports for load & store, + // this will make good use of the core. + return 31L * 31L * hash0 + 31L * hash1 + hash2; + } + + public static final long hash(Object obj0, Object obj1, Object obj2, Object obj3) { + return hash(intHash(obj0), intHash(obj1), intHash(obj2), intHash(obj3)); + } + + static final long hash(int hash0, int hash1, int hash2, int hash3) { + // DQH - Micro-optimizing, 31L * 31L will constant fold + // Since there are multiple execution ports for load & store, + // this will make good use of the core. + return 31L * 31L * 31L * hash0 + 31L * 31L * hash1 + 31L * hash2 + hash3; + } + + public static final long hash(Object obj0, Object obj1, Object obj2, Object obj3, Object obj4) { + return hash(intHash(obj0), intHash(obj1), intHash(obj2), intHash(obj3), intHash(obj4)); + } + + static final long hash(int hash0, int hash1, int hash2, int hash3, int hash4) { + // DQH - Micro-optimizing, 31L * 31L will constant fold + // Since there are multiple execution ports for load & store, + // this will make good use of the core. + return 31L * 31L * 31L * 31L * hash0 + + 31L * 31L * 31L * hash1 + + 31L * 31L * hash2 + + 31L * hash3 + + hash4; + } + + @Deprecated + public static final long hash(int[] hashes) { + long result = 0; + for (int hash : hashes) { + result = addToHash(result, hash); + } + return result; + } + + public static final long addToHash(long hash, int value) { + return 31L * hash + value; + } + + public static final long addToHash(long hash, Object obj) { + return addToHash(hash, intHash(obj)); + } + + public static final long addToHash(long hash, boolean value) { + return addToHash(hash, Boolean.hashCode(value)); + } + + public static final long addToHash(long hash, char value) { + return addToHash(hash, Character.hashCode(value)); + } + + public static final long addToHash(long hash, byte value) { + return addToHash(hash, Byte.hashCode(value)); + } + + public static final long addToHash(long hash, short value) { + return addToHash(hash, Short.hashCode(value)); + } + + public static final long addToHash(long hash, long value) { + return addToHash(hash, Long.hashCode(value)); + } + + public static final long addToHash(long hash, float value) { + return addToHash(hash, Float.hashCode(value)); + } + + public static final long addToHash(long hash, double value) { + return addToHash(hash, Double.hashCode(value)); + } + + public static final long hash(Iterable objs) { + long result = 0; + for (Object obj : objs) { + result = addToHash(result, obj); + } + return result; + } + + /** + * Calling this var-arg version can result in large amounts of allocation (see HashingBenchmark) + * Rather than calliing this method, add another override of hash that handles a larger number of + * arguments or use calls to addToHash. + */ + @Deprecated + public static final long hash(Object[] objs) { + long result = 0; + for (Object obj : objs) { + result = addToHash(result, obj); + } + return result; + } +} diff --git a/internal-api/src/test/java/datadog/trace/util/HashingUtilsTest.java b/internal-api/src/test/java/datadog/trace/util/HashingUtilsTest.java index 185d5a4f2e4..1f171852866 100644 --- a/internal-api/src/test/java/datadog/trace/util/HashingUtilsTest.java +++ b/internal-api/src/test/java/datadog/trace/util/HashingUtilsTest.java @@ -99,7 +99,7 @@ public void hash5() { String str3 = "foobar"; String str4 = "hello"; - assertNotEquals(0, HashingUtils.hash(str0, str1, str2, str3)); + assertNotEquals(0, HashingUtils.hash(str0, str1, str2, str3, str4)); String clone0 = clone(str0); String clone1 = clone(str1); @@ -110,6 +110,11 @@ public void hash5() { assertEquals( HashingUtils.hash(str0, str1, str2, str3, str4), HashingUtils.hash(clone0, clone1, clone2, clone3, clone4)); + + // The 5th argument must actually affect the hash (regression for a missing-arg bug). + assertNotEquals( + HashingUtils.hash(str0, str1, str2, str3, str4), + HashingUtils.hash(str0, str1, str2, str3, "different")); } @Test diff --git a/internal-api/src/test/java/datadog/trace/util/HashtableD1Test.java b/internal-api/src/test/java/datadog/trace/util/HashtableD1Test.java new file mode 100644 index 00000000000..11cf93fc1dd --- /dev/null +++ b/internal-api/src/test/java/datadog/trace/util/HashtableD1Test.java @@ -0,0 +1,235 @@ +package datadog.trace.util; + +import static datadog.trace.util.HashtableTestEntries.CollidingKey; +import static datadog.trace.util.HashtableTestEntries.CollidingKeyEntry; +import static datadog.trace.util.HashtableTestEntries.StringIntEntry; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.assertNull; +import static org.junit.jupiter.api.Assertions.assertSame; + +import java.util.HashMap; +import java.util.Map; +import org.junit.jupiter.api.Test; + +class HashtableD1Test { + + @Test + void emptyTableLookupReturnsNull() { + Hashtable.D1 table = new Hashtable.D1<>(8); + assertNull(table.get("missing")); + assertEquals(0, table.size()); + } + + @Test + void insertedEntryIsRetrievable() { + Hashtable.D1 table = new Hashtable.D1<>(8); + StringIntEntry e = new StringIntEntry("foo", 1); + table.insert(e); + assertEquals(1, table.size()); + assertSame(e, table.get("foo")); + } + + @Test + void multipleInsertsRetrievableSeparately() { + Hashtable.D1 table = new Hashtable.D1<>(16); + StringIntEntry a = new StringIntEntry("alpha", 1); + StringIntEntry b = new StringIntEntry("beta", 2); + StringIntEntry c = new StringIntEntry("gamma", 3); + table.insert(a); + table.insert(b); + table.insert(c); + assertEquals(3, table.size()); + assertSame(a, table.get("alpha")); + assertSame(b, table.get("beta")); + assertSame(c, table.get("gamma")); + } + + @Test + void inPlaceMutationVisibleViaSubsequentGet() { + Hashtable.D1 table = new Hashtable.D1<>(8); + table.insert(new StringIntEntry("counter", 0)); + for (int i = 0; i < 10; i++) { + StringIntEntry e = table.get("counter"); + e.value++; + } + assertEquals(10, table.get("counter").value); + } + + @Test + void removeUnlinksEntryAndDecrementsSize() { + Hashtable.D1 table = new Hashtable.D1<>(8); + table.insert(new StringIntEntry("a", 1)); + table.insert(new StringIntEntry("b", 2)); + assertEquals(2, table.size()); + + StringIntEntry removed = table.remove("a"); + assertNotNull(removed); + assertEquals("a", removed.key); + assertEquals(1, table.size()); + assertNull(table.get("a")); + assertNotNull(table.get("b")); + } + + @Test + void removeNonexistentReturnsNullAndDoesNotChangeSize() { + Hashtable.D1 table = new Hashtable.D1<>(8); + table.insert(new StringIntEntry("a", 1)); + assertNull(table.remove("nope")); + assertEquals(1, table.size()); + } + + @Test + void insertOrReplaceReturnsPriorEntryOrNullOnInsert() { + Hashtable.D1 table = new Hashtable.D1<>(8); + StringIntEntry first = new StringIntEntry("k", 1); + assertNull(table.insertOrReplace(first), "fresh insert returns null"); + assertEquals(1, table.size()); + + StringIntEntry second = new StringIntEntry("k", 2); + assertSame(first, table.insertOrReplace(second), "replace returns the prior entry"); + assertEquals(1, table.size()); + assertSame(second, table.get("k"), "new entry visible after replace"); + } + + @Test + void clearEmptiesTheTable() { + Hashtable.D1 table = new Hashtable.D1<>(8); + table.insert(new StringIntEntry("a", 1)); + table.insert(new StringIntEntry("b", 2)); + table.clear(); + assertEquals(0, table.size()); + assertNull(table.get("a")); + // Reinsertion works after clear + table.insert(new StringIntEntry("a", 99)); + assertEquals(99, table.get("a").value); + } + + @Test + void forEachVisitsEveryInsertedEntry() { + Hashtable.D1 table = new Hashtable.D1<>(8); + table.insert(new StringIntEntry("a", 1)); + table.insert(new StringIntEntry("b", 2)); + table.insert(new StringIntEntry("c", 3)); + Map seen = new HashMap<>(); + table.forEach(e -> seen.put(e.key, e.value)); + assertEquals(3, seen.size()); + assertEquals(1, seen.get("a")); + assertEquals(2, seen.get("b")); + assertEquals(3, seen.get("c")); + } + + @Test + void forEachWithContextPassesContextToConsumer() { + Hashtable.D1 table = new Hashtable.D1<>(8); + table.insert(new StringIntEntry("a", 10)); + table.insert(new StringIntEntry("b", 20)); + table.insert(new StringIntEntry("c", 30)); + Map seen = new HashMap<>(); + table.forEach(seen, (ctx, e) -> ctx.put(e.key, e.value)); + assertEquals(3, seen.size()); + assertEquals(10, seen.get("a")); + assertEquals(20, seen.get("b")); + assertEquals(30, seen.get("c")); + } + + @Test + void forEachWithContextOnEmptyTableDoesNothing() { + Hashtable.D1 table = new Hashtable.D1<>(8); + Map seen = new HashMap<>(); + table.forEach(seen, (ctx, e) -> ctx.put(e.key, e.value)); + assertEquals(0, seen.size()); + } + + @Test + void nullKeyIsPermittedAndDistinctFromAbsent() { + Hashtable.D1 table = new Hashtable.D1<>(8); + assertNull(table.get(null)); + StringIntEntry nullKeyed = new StringIntEntry(null, 7); + table.insert(nullKeyed); + assertSame(nullKeyed, table.get(null)); + assertEquals(1, table.size()); + assertSame(nullKeyed, table.remove(null)); + assertEquals(0, table.size()); + } + + @Test + void hashCollisionsResolveByEquality() { + // Force two distinct keys with the same hashCode -- the chain must still distinguish them + // via matches(). + Hashtable.D1 table = new Hashtable.D1<>(4); + CollidingKey k1 = new CollidingKey("first", 17); + CollidingKey k2 = new CollidingKey("second", 17); + CollidingKeyEntry e1 = new CollidingKeyEntry(k1, 100); + CollidingKeyEntry e2 = new CollidingKeyEntry(k2, 200); + table.insert(e1); + table.insert(e2); + assertEquals(2, table.size()); + assertSame(e1, table.get(k1)); + assertSame(e2, table.get(k2)); + } + + @Test + void hashCollisionsThenRemoveLeavesOtherIntact() { + Hashtable.D1 table = new Hashtable.D1<>(4); + CollidingKey k1 = new CollidingKey("first", 17); + CollidingKey k2 = new CollidingKey("second", 17); + CollidingKey k3 = new CollidingKey("third", 17); + table.insert(new CollidingKeyEntry(k1, 1)); + table.insert(new CollidingKeyEntry(k2, 2)); + table.insert(new CollidingKeyEntry(k3, 3)); + table.remove(k2); + assertEquals(2, table.size()); + assertNotNull(table.get(k1)); + assertNull(table.get(k2)); + assertNotNull(table.get(k3)); + } + + @Test + void getOrCreateOnMissBuildsEntryViaCreator() { + Hashtable.D1 table = new Hashtable.D1<>(8); + int[] createCount = {0}; + StringIntEntry created = + table.getOrCreate( + "foo", + k -> { + createCount[0]++; + return new StringIntEntry(k, 42); + }); + assertNotNull(created); + assertEquals("foo", created.key); + assertEquals(42, created.value); + assertEquals(1, table.size()); + assertEquals(1, createCount[0]); + assertSame(created, table.get("foo")); + } + + @Test + void getOrCreateOnHitSkipsCreator() { + Hashtable.D1 table = new Hashtable.D1<>(8); + StringIntEntry seeded = new StringIntEntry("foo", 1); + table.insert(seeded); + int[] createCount = {0}; + StringIntEntry got = + table.getOrCreate( + "foo", + k -> { + createCount[0]++; + return new StringIntEntry(k, 999); + }); + assertSame(seeded, got); + assertEquals(1, table.size()); + assertEquals(0, createCount[0]); + } + + @Test + void getOrCreateNullKeyIsPermitted() { + Hashtable.D1 table = new Hashtable.D1<>(8); + StringIntEntry created = table.getOrCreate(null, k -> new StringIntEntry(k, 7)); + assertNotNull(created); + assertNull(created.key); + assertEquals(7, created.value); + assertSame(created, table.getOrCreate(null, k -> new StringIntEntry(k, 999))); + assertEquals(1, table.size()); + } +} diff --git a/internal-api/src/test/java/datadog/trace/util/HashtableD2Test.java b/internal-api/src/test/java/datadog/trace/util/HashtableD2Test.java new file mode 100644 index 00000000000..edcb0ad9f74 --- /dev/null +++ b/internal-api/src/test/java/datadog/trace/util/HashtableD2Test.java @@ -0,0 +1,129 @@ +package datadog.trace.util; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.assertNull; +import static org.junit.jupiter.api.Assertions.assertSame; +import static org.junit.jupiter.api.Assertions.assertTrue; + +import java.util.HashSet; +import java.util.Set; +import org.junit.jupiter.api.Test; + +class HashtableD2Test { + + @Test + void pairKeysParticipateInIdentity() { + Hashtable.D2 table = new Hashtable.D2<>(8); + PairEntry ab = new PairEntry("a", 1, 100); + PairEntry ac = new PairEntry("a", 2, 200); + PairEntry bb = new PairEntry("b", 1, 300); + table.insert(ab); + table.insert(ac); + table.insert(bb); + assertEquals(3, table.size()); + assertSame(ab, table.get("a", 1)); + assertSame(ac, table.get("a", 2)); + assertSame(bb, table.get("b", 1)); + assertNull(table.get("a", 3)); + } + + @Test + void removePairUnlinks() { + Hashtable.D2 table = new Hashtable.D2<>(8); + PairEntry ab = new PairEntry("a", 1, 100); + PairEntry ac = new PairEntry("a", 2, 200); + table.insert(ab); + table.insert(ac); + assertSame(ab, table.remove("a", 1)); + assertEquals(1, table.size()); + assertNull(table.get("a", 1)); + assertSame(ac, table.get("a", 2)); + } + + @Test + void insertOrReplaceMatchesOnBothKeys() { + Hashtable.D2 table = new Hashtable.D2<>(8); + PairEntry first = new PairEntry("k", 7, 1); + assertNull(table.insertOrReplace(first)); + PairEntry second = new PairEntry("k", 7, 2); + assertSame(first, table.insertOrReplace(second)); + // Different second-key: should insert new, not replace + PairEntry third = new PairEntry("k", 8, 3); + assertNull(table.insertOrReplace(third)); + assertEquals(2, table.size()); + } + + @Test + void forEachVisitsBothPairs() { + Hashtable.D2 table = new Hashtable.D2<>(8); + table.insert(new PairEntry("a", 1, 100)); + table.insert(new PairEntry("b", 2, 200)); + Set seen = new HashSet<>(); + table.forEach(e -> seen.add(e.key1 + ":" + e.key2)); + assertEquals(2, seen.size()); + assertTrue(seen.contains("a:1")); + assertTrue(seen.contains("b:2")); + } + + @Test + void forEachWithContextPassesContextToConsumer() { + Hashtable.D2 table = new Hashtable.D2<>(8); + table.insert(new PairEntry("a", 1, 100)); + table.insert(new PairEntry("b", 2, 200)); + Set seen = new HashSet<>(); + table.forEach(seen, (ctx, e) -> ctx.add(e.key1 + ":" + e.key2)); + assertEquals(2, seen.size()); + assertTrue(seen.contains("a:1")); + assertTrue(seen.contains("b:2")); + } + + @Test + void getOrCreateOnMissBuildsEntryViaCreator() { + Hashtable.D2 table = new Hashtable.D2<>(8); + int[] createCount = {0}; + PairEntry created = + table.getOrCreate( + "a", + 1, + (k1, k2) -> { + createCount[0]++; + return new PairEntry(k1, k2, 100); + }); + assertNotNull(created); + assertEquals("a", created.key1); + assertEquals(Integer.valueOf(1), created.key2); + assertEquals(100, created.value); + assertEquals(1, table.size()); + assertEquals(1, createCount[0]); + assertSame(created, table.get("a", 1)); + } + + @Test + void getOrCreateOnHitSkipsCreator() { + Hashtable.D2 table = new Hashtable.D2<>(8); + PairEntry seeded = new PairEntry("a", 1, 100); + table.insert(seeded); + int[] createCount = {0}; + PairEntry got = + table.getOrCreate( + "a", + 1, + (k1, k2) -> { + createCount[0]++; + return new PairEntry(k1, k2, 999); + }); + assertSame(seeded, got); + assertEquals(1, table.size()); + assertEquals(0, createCount[0]); + } + + private static final class PairEntry extends Hashtable.D2.Entry { + int value; + + PairEntry(String key1, Integer key2, int value) { + super(key1, key2); + this.value = value; + } + } +} diff --git a/internal-api/src/test/java/datadog/trace/util/HashtableTest.java b/internal-api/src/test/java/datadog/trace/util/HashtableTest.java new file mode 100644 index 00000000000..2992279be6d --- /dev/null +++ b/internal-api/src/test/java/datadog/trace/util/HashtableTest.java @@ -0,0 +1,353 @@ +package datadog.trace.util; + +import static datadog.trace.util.HashtableTestEntries.CollidingKey; +import static datadog.trace.util.HashtableTestEntries.CollidingKeyEntry; +import static datadog.trace.util.HashtableTestEntries.StringIntEntry; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.assertNull; +import static org.junit.jupiter.api.Assertions.assertSame; +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.api.Assertions.assertTrue; + +import datadog.trace.util.Hashtable.BucketIterator; +import datadog.trace.util.Hashtable.MutatingBucketIterator; +import datadog.trace.util.Hashtable.MutatingTableIterator; +import datadog.trace.util.Hashtable.Support; +import java.util.HashSet; +import java.util.NoSuchElementException; +import java.util.Set; +import org.junit.jupiter.api.Nested; +import org.junit.jupiter.api.Test; + +class HashtableTest { + + // ============ Support ============ + + @Nested + class SupportTests { + + @Test + void createRoundsCapacityUpToPowerOfTwo() { + // The Hashtable.D1 / D2 size() reflects entries, but the bucket array length is + // a power of two >= requestedCapacity. We can verify indirectly via bucketIndex masking. + Hashtable.Entry[] buckets = Support.create(5); + // Length must be a power of two >= 5 + int len = buckets.length; + assertTrue(len >= 5); + assertEquals(0, len & (len - 1), "length must be a power of two"); + } + + @Test + void sizeForReturnsAtLeastOne() { + assertEquals(1, Support.sizeFor(0)); + assertEquals(1, Support.sizeFor(1)); + } + + @Test + void sizeForRoundsUpToPowerOfTwo() { + assertEquals(2, Support.sizeFor(2)); + assertEquals(4, Support.sizeFor(3)); + assertEquals(4, Support.sizeFor(4)); + assertEquals(8, Support.sizeFor(5)); + assertEquals(1 << 30, Support.sizeFor(1 << 30)); + } + + @Test + void sizeForRejectsCapacityAboveMax() { + assertThrows(IllegalArgumentException.class, () -> Support.sizeFor((1 << 30) + 1)); + assertThrows(IllegalArgumentException.class, () -> Support.sizeFor(Integer.MAX_VALUE)); + } + + @Test + void sizeForRejectsNegativeCapacity() { + assertThrows(IllegalArgumentException.class, () -> Support.sizeFor(-1)); + assertThrows(IllegalArgumentException.class, () -> Support.sizeFor(Integer.MIN_VALUE)); + } + + @Test + void bucketIndexIsBoundedByArrayLength() { + Hashtable.Entry[] buckets = Support.create(16); + for (long h : new long[] {0L, 1L, -1L, Long.MIN_VALUE, Long.MAX_VALUE, 12345L}) { + int idx = Support.bucketIndex(buckets, h); + assertTrue(idx >= 0 && idx < buckets.length, "bucketIndex out of range for hash " + h); + } + } + + @Test + void clearNullsAllBuckets() { + Hashtable.Entry[] buckets = Support.create(4); + buckets[0] = new StringIntEntry("x", 1); + buckets[1] = new StringIntEntry("y", 2); + Support.clear(buckets); + for (Hashtable.Entry b : buckets) { + assertNull(b); + } + } + + @Test + void maxRatioScalesTargetForLoadFactor() { + // 75% load factor => bucket array sized at requestedSize * 4/3, rounded up to power of 2. + // 12 * (4/3) = 16 entries, rounded up to power-of-2 length = 16. + assertEquals(4.0f / 3.0f, Support.MAX_RATIO); + Hashtable.Entry[] buckets = Support.create(12, Support.MAX_RATIO); + assertEquals(16, buckets.length); + } + + @Test + void createWithScaleRoundsUpToPowerOfTwo() { + // 7 * 1.5 = 10.5 -> (int) 10 -> sizeFor rounds up to next power-of-two = 16 + Hashtable.Entry[] buckets = Support.create(7, 1.5f); + assertEquals(16, buckets.length); + } + + @Test + void insertHeadEntrySplicesAsNewHead() { + Hashtable.Entry[] buckets = Support.create(4); + StringIntEntry a = new StringIntEntry("a", 1); + StringIntEntry b = new StringIntEntry("b", 2); + Support.insertHeadEntry(buckets, 0, a); + assertSame(a, buckets[0]); + assertNull(a.next()); + + Support.insertHeadEntry(buckets, 0, b); + assertSame(b, buckets[0]); + assertSame(a, b.next()); + assertNull(a.next()); + } + } + + // ============ BucketIterator ============ + + @Nested + class BucketIteratorTests { + + @Test + void walksOnlyMatchingHash() { + // Build a bucket array with two entries that share a bucket but have different hashes. + // Use Hashtable.D1 to seed; then call Support.bucketIterator directly with the matching + // hash and verify it only returns the matching entry. + Hashtable.D1 table = new Hashtable.D1<>(4); + CollidingKey k1 = new CollidingKey("first", 17); + CollidingKey k2 = new CollidingKey("second", 17); + CollidingKey k3 = new CollidingKey("third", 17); + table.insert(new CollidingKeyEntry(k1, 1)); + table.insert(new CollidingKeyEntry(k2, 2)); + table.insert(new CollidingKeyEntry(k3, 3)); + // All three share the same hash (17), so a bucket iterator over hash=17 yields all three. + BucketIterator it = Support.bucketIterator(table.buckets, 17L); + int count = 0; + while (it.hasNext()) { + assertNotNull(it.next()); + count++; + } + assertEquals(3, count); + } + + @Test + void exhaustedIteratorThrowsNoSuchElement() { + Hashtable.D1 table = new Hashtable.D1<>(4); + table.insert(new StringIntEntry("only", 1)); + long h = Hashtable.D1.Entry.hash("only"); + BucketIterator it = Support.bucketIterator(table.buckets, h); + it.next(); + assertFalse(it.hasNext()); + assertThrows(NoSuchElementException.class, it::next); + } + } + + // ============ MutatingBucketIterator ============ + + @Nested + class MutatingBucketIteratorTests { + + @Test + void removeFromHeadOfChainUnlinks() { + // Make three entries with the same hash so they chain in one bucket + Hashtable.D1 table = new Hashtable.D1<>(4); + CollidingKey k1 = new CollidingKey("first", 17); + CollidingKey k2 = new CollidingKey("second", 17); + CollidingKey k3 = new CollidingKey("third", 17); + table.insert(new CollidingKeyEntry(k1, 1)); + table.insert(new CollidingKeyEntry(k2, 2)); + table.insert(new CollidingKeyEntry(k3, 3)); + + MutatingBucketIterator it = + Support.mutatingBucketIterator(table.buckets, 17L); + it.next(); // first match (head of chain in insertion-reverse order) + it.remove(); + // Two should remain + int remaining = 0; + while (it.hasNext()) { + it.next(); + remaining++; + } + assertEquals(2, remaining); + // And the table still finds the survivors via get(...) + // (which entry was the head depends on insertion order; we just verify count + that two + // of the three keys are still retrievable.) + int found = 0; + for (CollidingKey k : new CollidingKey[] {k1, k2, k3}) { + if (table.get(k) != null) { + found++; + } + } + assertEquals(2, found); + } + + @Test + void replaceSwapsEntryAndPreservesChain() { + Hashtable.D1 table = new Hashtable.D1<>(4); + CollidingKey k1 = new CollidingKey("first", 17); + CollidingKey k2 = new CollidingKey("second", 17); + CollidingKeyEntry e1 = new CollidingKeyEntry(k1, 1); + CollidingKeyEntry e2 = new CollidingKeyEntry(k2, 2); + table.insert(e1); + table.insert(e2); + + MutatingBucketIterator it = + Support.mutatingBucketIterator(table.buckets, 17L); + CollidingKeyEntry first = it.next(); + CollidingKeyEntry replacement = new CollidingKeyEntry(first.key, 999); + it.replace(replacement); + // Both entries still in the chain + assertNotNull(table.get(k1)); + assertNotNull(table.get(k2)); + // The replaced one now has value 999 + assertEquals(999, table.get(first.key).value); + } + + @Test + void removeWithoutNextThrows() { + Hashtable.D1 table = new Hashtable.D1<>(4); + table.insert(new StringIntEntry("a", 1)); + MutatingBucketIterator it = + Support.mutatingBucketIterator(table.buckets, Hashtable.D1.Entry.hash("a")); + assertThrows(IllegalStateException.class, it::remove); + } + } + + // ============ MutatingTableIterator ============ + + @Nested + class MutatingTableIteratorTests { + + @Test + void walksEveryEntryAcrossBuckets() { + Hashtable.D1 table = new Hashtable.D1<>(16); + table.insert(new StringIntEntry("a", 1)); + table.insert(new StringIntEntry("b", 2)); + table.insert(new StringIntEntry("c", 3)); + + Set seen = new HashSet<>(); + for (MutatingTableIterator it = Support.mutatingTableIterator(table.buckets); + it.hasNext(); ) { + seen.add(it.next().key); + } + assertEquals(3, seen.size()); + assertTrue(seen.contains("a")); + assertTrue(seen.contains("b")); + assertTrue(seen.contains("c")); + } + + @Test + void emptyTableIteratorIsExhausted() { + Hashtable.D1 table = new Hashtable.D1<>(8); + MutatingTableIterator it = Support.mutatingTableIterator(table.buckets); + assertFalse(it.hasNext()); + assertThrows(NoSuchElementException.class, it::next); + } + + @Test + void removeUnlinksBucketHead() { + Hashtable.D1 table = new Hashtable.D1<>(4); + CollidingKey k1 = new CollidingKey("first", 17); + CollidingKey k2 = new CollidingKey("second", 17); + table.insert(new CollidingKeyEntry(k1, 1)); + table.insert(new CollidingKeyEntry(k2, 2)); + + // The head of the chain is whichever was inserted last (insert prepends). + MutatingTableIterator it = Support.mutatingTableIterator(table.buckets); + CollidingKeyEntry head = it.next(); + it.remove(); + + // Survivor still reachable via the table; removed one is not. + CollidingKey survivorKey = head.key.equals(k1) ? k2 : k1; + assertNotNull(table.get(survivorKey)); + assertNull(table.get(head.key)); + } + + @Test + void removeUnlinksMidChainEntry() { + Hashtable.D1 table = new Hashtable.D1<>(4); + CollidingKey k1 = new CollidingKey("first", 17); + CollidingKey k2 = new CollidingKey("second", 17); + CollidingKey k3 = new CollidingKey("third", 17); + table.insert(new CollidingKeyEntry(k1, 1)); + table.insert(new CollidingKeyEntry(k2, 2)); + table.insert(new CollidingKeyEntry(k3, 3)); + + // Walk to the second entry, remove it. + MutatingTableIterator it = Support.mutatingTableIterator(table.buckets); + it.next(); + CollidingKeyEntry victim = it.next(); + it.remove(); + + assertNull(table.get(victim.key)); + // The remaining two keys still resolve. + int remaining = 0; + for (CollidingKey k : new CollidingKey[] {k1, k2, k3}) { + if (table.get(k) != null) { + remaining++; + } + } + assertEquals(2, remaining); + + // Iteration can continue past a remove and yield the third entry. + assertTrue(it.hasNext()); + assertNotNull(it.next()); + assertFalse(it.hasNext()); + } + + @Test + void removeSkipsOverEmptyBuckets() { + // Three distinct keys that land in different buckets (low entry count vs large bucket array + // makes empty buckets between them very likely). Verify the iterator skips empties cleanly + // after a remove. + Hashtable.D1 table = new Hashtable.D1<>(64); + table.insert(new StringIntEntry("alpha", 1)); + table.insert(new StringIntEntry("beta", 2)); + table.insert(new StringIntEntry("gamma", 3)); + + MutatingTableIterator it = Support.mutatingTableIterator(table.buckets); + it.next(); + it.remove(); + int remaining = 0; + while (it.hasNext()) { + it.next(); + remaining++; + } + assertEquals(2, remaining); + } + + @Test + void removeWithoutNextThrows() { + Hashtable.D1 table = new Hashtable.D1<>(4); + table.insert(new StringIntEntry("a", 1)); + MutatingTableIterator it = Support.mutatingTableIterator(table.buckets); + assertThrows(IllegalStateException.class, it::remove); + } + + @Test + void removeTwiceWithoutInterveningNextThrows() { + Hashtable.D1 table = new Hashtable.D1<>(4); + table.insert(new StringIntEntry("a", 1)); + table.insert(new StringIntEntry("b", 2)); + MutatingTableIterator it = Support.mutatingTableIterator(table.buckets); + it.next(); + it.remove(); + assertThrows(IllegalStateException.class, it::remove); + } + } +} diff --git a/internal-api/src/test/java/datadog/trace/util/HashtableTestEntries.java b/internal-api/src/test/java/datadog/trace/util/HashtableTestEntries.java new file mode 100644 index 00000000000..e657028ee8b --- /dev/null +++ b/internal-api/src/test/java/datadog/trace/util/HashtableTestEntries.java @@ -0,0 +1,54 @@ +package datadog.trace.util; + +/** Shared test entry types for {@link HashtableTest}, {@link HashtableD1Test}, and friends. */ +final class HashtableTestEntries { + private HashtableTestEntries() {} + + static final class StringIntEntry extends Hashtable.D1.Entry { + int value; + + StringIntEntry(String key, int value) { + super(key); + this.value = value; + } + } + + /** Key whose hashCode is fully controllable, to force chain collisions deterministically. */ + static final class CollidingKey { + final String label; + final int hash; + + CollidingKey(String label, int hash) { + this.label = label; + this.hash = hash; + } + + @Override + public int hashCode() { + return hash; + } + + @Override + public boolean equals(Object o) { + if (!(o instanceof CollidingKey)) { + return false; + } + CollidingKey that = (CollidingKey) o; + return hash == that.hash && label.equals(that.label); + } + + @Override + public String toString() { + return "CollidingKey(" + label + ", " + hash + ")"; + } + } + + static final class CollidingKeyEntry extends Hashtable.D1.Entry { + int value; + + CollidingKeyEntry(CollidingKey key, int value) { + super(key); + this.value = value; + } + } +} diff --git a/internal-api/src/test/java/datadog/trace/util/LongHashingUtilsTest.java b/internal-api/src/test/java/datadog/trace/util/LongHashingUtilsTest.java new file mode 100644 index 00000000000..795c182df18 --- /dev/null +++ b/internal-api/src/test/java/datadog/trace/util/LongHashingUtilsTest.java @@ -0,0 +1,159 @@ +package datadog.trace.util; + +import static datadog.trace.util.LongHashingUtils.addToHash; +import static datadog.trace.util.LongHashingUtils.hash; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNotEquals; + +import java.util.Arrays; +import java.util.Objects; +import org.junit.jupiter.api.Test; + +class LongHashingUtilsTest { + + // ----- single-value overloads ----- + + @Test + void hashOfObjectReturnsHashCodeOrSentinelForNull() { + Object o = new Object(); + assertEquals(o.hashCode(), hash(o)); + assertEquals(Long.MIN_VALUE, hash((Object) null)); + } + + @Test + void primitiveOverloadsMatchBoxedHashCodes() { + assertEquals(Boolean.hashCode(true), hash(true)); + assertEquals(Boolean.hashCode(false), hash(false)); + assertEquals(Character.hashCode('x'), hash('x')); + assertEquals(Byte.hashCode((byte) 42), hash((byte) 42)); + assertEquals(Short.hashCode((short) -7), hash((short) -7)); + assertEquals(Integer.hashCode(123456), hash(123456)); + assertEquals(123456L, hash(123456L)); + assertEquals(Float.hashCode(3.14f), hash(3.14f)); + assertEquals(Double.doubleToRawLongBits(2.71828), hash(2.71828)); + } + + // ----- multi-arg Object overloads vs chained addToHash ----- + + @Test + void twoArgHashMatchesChainedAddToHash() { + Object a = "alpha"; + Object b = 42; + assertEquals(addToHash(addToHash(0L, a), b), hash(a, b)); + } + + @Test + void threeArgHashMatchesChainedAddToHash() { + Object a = "alpha"; + Object b = 42; + Object c = true; + assertEquals(addToHash(addToHash(addToHash(0L, a), b), c), hash(a, b, c)); + } + + @Test + void fourArgHashMatchesChainedAddToHash() { + Object a = "alpha"; + Object b = 42; + Object c = true; + Object d = 3.14; + assertEquals(addToHash(addToHash(addToHash(addToHash(0L, a), b), c), d), hash(a, b, c, d)); + } + + @Test + void fiveArgHashMatchesChainedAddToHash() { + Object a = "alpha"; + Object b = 42; + Object c = true; + Object d = 3.14; + Object e = 'q'; + assertEquals( + addToHash(addToHash(addToHash(addToHash(addToHash(0L, a), b), c), d), e), + hash(a, b, c, d, e)); + } + + @Test + void multiArgHashHandlesNullsConsistentlyWithChainedAddToHash() { + assertEquals(addToHash(addToHash(0L, (Object) null), "x"), hash(null, "x")); + assertEquals( + addToHash(addToHash(addToHash(0L, "x"), (Object) null), "y"), hash("x", null, "y")); + } + + @Test + void differentInputsProduceDifferentHashes() { + // Sanity: ordering matters, and distinct values produce distinct results in general. + assertNotEquals(hash("a", "b"), hash("b", "a")); + assertNotEquals(hash("a", "b", "c"), hash("a", "c", "b")); + } + + // ----- addToHash primitive overloads ----- + + @Test + void addToHashPrimitivesMatchObjectVersion() { + long seed = 100L; + assertEquals(addToHash(seed, Boolean.hashCode(true)), addToHash(seed, true)); + assertEquals(addToHash(seed, Character.hashCode('z')), addToHash(seed, 'z')); + assertEquals(addToHash(seed, Byte.hashCode((byte) 9)), addToHash(seed, (byte) 9)); + assertEquals(addToHash(seed, Short.hashCode((short) 5)), addToHash(seed, (short) 5)); + assertEquals(addToHash(seed, Long.hashCode(999_999L)), addToHash(seed, 999_999L)); + assertEquals(addToHash(seed, Float.hashCode(1.5f)), addToHash(seed, 1.5f)); + assertEquals(addToHash(seed, Double.hashCode(2.5d)), addToHash(seed, 2.5d)); + } + + @Test + void addToHashIsLinearAcrossSteps() { + // 31*h + v formula -- verify by accumulating an explicit sequence. + long expected = 0L; + for (int v : new int[] {1, 2, 3, 4, 5}) { + expected = 31L * expected + v; + } + long actual = 0L; + for (int v : new int[] {1, 2, 3, 4, 5}) { + actual = addToHash(actual, v); + } + assertEquals(expected, actual); + } + + // ----- iterable / array versions ----- + + @Test + void hashIterableMatchesChainedAddToHash() { + Iterable values = Arrays.asList("a", 1, true, null); + long expected = 0L; + for (Object o : values) { + expected = addToHash(expected, o); + } + assertEquals(expected, hash(values)); + } + + @Test + @SuppressWarnings("deprecation") + void deprecatedIntArrayHashMatchesChainedAddToHash() { + int[] hashes = new int[] {7, 13, 31, 1024}; + long expected = 0L; + for (int h : hashes) { + expected = addToHash(expected, h); + } + assertEquals(expected, hash(hashes)); + } + + @Test + @SuppressWarnings("deprecation") + void deprecatedObjectArrayHashMatchesChainedAddToHash() { + Object[] objs = new Object[] {"alpha", 7, null, true}; + long expected = 0L; + for (Object o : objs) { + expected = addToHash(expected, o); + } + assertEquals(expected, hash(objs)); + } + + // ----- intHash null behavior is observable via multi-arg overloads ----- + + @Test + void multiArgHashTreatsNullAsZero() { + // hash(Object,Object) feeds intHash(...) which returns 0 for null. + // Verify: hash(null, "x") == 31L*0 + "x".hashCode() + int xHash = Objects.hashCode("x"); + assertEquals(31L * 0 + xHash, hash(null, "x")); + } +}