diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/PartitionKeyRangeTest.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/PartitionKeyRangeTest.java new file mode 100644 index 000000000000..339f3f95ad53 --- /dev/null +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/PartitionKeyRangeTest.java @@ -0,0 +1,177 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. +package com.azure.cosmos.implementation; + +import com.azure.cosmos.implementation.routing.Range; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.fasterxml.jackson.databind.node.ObjectNode; +import org.testng.annotations.Test; + +import java.util.Arrays; +import java.util.List; + +import static org.testng.Assert.assertEquals; +import static org.testng.Assert.assertNotNull; +import static org.testng.Assert.assertTrue; + +/** + * Tests for {@link PartitionKeyRange}, focused on the memory-saving "strip unused fields" behavior + * applied when constructing from a Jackson {@link ObjectNode}. + * + *

The retained-field set is intentionally kept aligned with + * azure-sdk-for-python#46297 + * (Python's {@code PKRange} namedtuple). These tests pin that contract.

+ */ +public class PartitionKeyRangeTest { + + private static final ObjectMapper MAPPER = Utils.getSimpleObjectMapper(); + + /** Mirrors the JSON shape the Cosmos DB service returns for a partition key range. */ + private static final String FULL_PK_RANGE_JSON = + "{" + + "\"_rid\":\"90t-ALzvP44CAAAAAAAAUA==\"," + + "\"id\":\"0\"," + + "\"_etag\":\"\\\"00001e02-0000-0800-0000-6a2c41690000\\\"\"," + + "\"minInclusive\":\"\"," + + "\"maxExclusive\":\"FF\"," + + "\"ridPrefix\":0," + + "\"_self\":\"dbs/90t-AA==/colls/90t-ALzvP44=/pkranges/90t-ALzvP44CAAAAAAAAUA==/\"," + + "\"throughputFraction\":1," + + "\"status\":\"online\"," + + "\"parents\":[]," + + "\"ownedArchivalPKRangeIds\":[]," + + "\"_ts\":1781285225," + + "\"lsn\":87," + + "\"_lsn\":87" + + "}"; + + /** + * Allow-list kept in heap on every deserialized {@link PartitionKeyRange}. + * + *

Includes Python's {@code PKRange} namedtuple slots + * ({@code id}, {@code minInclusive}, {@code maxExclusive}, {@code parents}, + * {@code status}, {@code throughputFraction}) plus {@code _rid} — Java-specific because + * {@code AddressResolver.isSameCollection} reads {@code getResourceId()} on a + * {@code PartitionKeyRange} during retry target-change detection.

+ */ + private static final List KEPT_FIELDS = Arrays.asList( + "id", "minInclusive", "maxExclusive", "parents", "status", "throughputFraction", "_rid"); + + /** All non-kept fields present in the full payload above; everything here must be stripped. */ + private static final List STRIPPED_FIELDS = Arrays.asList( + "_etag", "ridPrefix", "_self", "ownedArchivalPKRangeIds", "_ts", "lsn", "_lsn"); + + private static ObjectNode fullPkRangeNode() throws Exception { + return (ObjectNode) MAPPER.readTree(FULL_PK_RANGE_JSON); + } + + @Test(groups = "unit") + public void objectNodeConstructor_stripsEverythingNotOnAllowList() throws Exception { + // Pin the allow-list: every field not on Python's PKRange namedtuple must be dropped. + ObjectNode node = fullPkRangeNode(); + PartitionKeyRange range = new PartitionKeyRange(node); + + for (String dropped : STRIPPED_FIELDS) { + assertEquals(range.has(dropped), false, dropped + " must be stripped (not on allow-list)"); + } + } + + @Test(groups = "unit") + public void objectNodeConstructor_preservesAllowListedFields() throws Exception { + // Every field on the allow-list must survive the strip. + ObjectNode node = fullPkRangeNode(); + PartitionKeyRange range = new PartitionKeyRange(node); + + // Routing-map essentials. + assertEquals(range.getId(), "0"); + assertEquals(range.getMinInclusive(), ""); + assertEquals(range.getMaxExclusive(), "FF"); + assertNotNull(range.getParents()); + assertEquals(range.getParents().size(), 0); + + // _rid is on the allow-list specifically so AddressResolver.isSameCollection + // can call getResourceId() on a deserialized PartitionKeyRange during retry + // target-change detection. Without this, ResourceId.parse(null) throws + // "INVALID resource id null" on the first retry after a 410/Gone. + assertEquals(range.getResourceId(), "90t-ALzvP44CAAAAAAAAUA=="); + + for (String kept : KEPT_FIELDS) { + assertTrue(range.has(kept), kept + " must be preserved (on allow-list)"); + } + } + + @Test(groups = "unit") + public void objectNodeConstructor_dropsUnknownFutureField() throws Exception { + // Allow-list (not deny-list) semantics: a new server-side field tomorrow is dropped + // by default so per-instance heap stays bounded against payload growth. Mirrors + // Python's PKRange namedtuple, which has no slot for unknown fields. + String json = "{" + + "\"id\":\"0\",\"minInclusive\":\"\",\"maxExclusive\":\"FF\"," + + "\"futureFieldA\":\"hello\"," + + "\"futureFieldB\":{\"nested\":42}" + + "}"; + ObjectNode node = (ObjectNode) MAPPER.readTree(json); + PartitionKeyRange range = new PartitionKeyRange(node); + + assertEquals(range.getId(), "0"); + assertEquals(range.has("futureFieldA"), false, "unknown future field must be dropped by allow-list"); + assertEquals(range.has("futureFieldB"), false, "unknown future nested field must be dropped by allow-list"); + } + + @Test(groups = "unit") + public void objectNodeConstructor_preservesNonEmptyParents() throws Exception { + // Split-merge bookkeeping uses parents; verify it survives the strip. + String json = "{" + + "\"_rid\":\"X==\",\"id\":\"1\",\"_etag\":\"\\\"e\\\"\"," + + "\"minInclusive\":\"\",\"maxExclusive\":\"FF\"," + + "\"_self\":\"x/\",\"parents\":[\"0\"],\"_ts\":1,\"lsn\":1" + + "}"; + ObjectNode node = (ObjectNode) MAPPER.readTree(json); + PartitionKeyRange range = new PartitionKeyRange(node); + + assertNotNull(range.getParents()); + assertEquals(range.getParents().size(), 1); + assertEquals(range.getParents().get(0), "0"); + // Dropped fields still gone. _rid stays now that it's on the allow-list. + assertEquals(range.has("_self"), false); + assertEquals(range.has("lsn"), false); + assertEquals(range.getResourceId(), "X=="); + } + + @Test(groups = "unit") + public void objectNodeConstructor_equalsAndHashCodeUnchanged() throws Exception { + // PartitionKeyRange#equals / #hashCode use id/min/max only -- the slim instance must + // remain value-equal to a manually-constructed instance with the same identity fields. + ObjectNode node = fullPkRangeNode(); + PartitionKeyRange slim = new PartitionKeyRange(node); + PartitionKeyRange handBuilt = new PartitionKeyRange("0", "", "FF"); + + assertEquals(slim, handBuilt); + assertEquals(slim.hashCode(), handBuilt.hashCode()); + assertEquals(slim.toRange(), new Range<>("", "FF", true, false)); + } + + @Test(groups = "unit") + public void objectNodeConstructor_handlesNull() { + // Defensive: a null ObjectNode argument must not throw; super(null) is the existing + // pre-PR contract and must be preserved. + new PartitionKeyRange((ObjectNode) null); + } + + @Test(groups = "unit") + public void deserializationFunnelStripsForFeedResponsePath() throws Exception { + // JsonSerializable.instantiateFromObjectNodeAndType is the funnel every FeedResponse + // page uses when deserializing pkranges. Confirm it routes through the + // PartitionKeyRange(ObjectNode) ctor and therefore inherits the strip. + ObjectNode node = fullPkRangeNode(); + Object result = + JsonSerializable.instantiateFromObjectNodeAndType(node, PartitionKeyRange.class); + + assertTrue(result instanceof PartitionKeyRange); + PartitionKeyRange range = (PartitionKeyRange) result; + for (String dropped : STRIPPED_FIELDS) { + assertEquals(range.has(dropped), false, dropped + " must be stripped via FeedResponse funnel"); + } + assertEquals(range.getId(), "0"); + } +} diff --git a/sdk/cosmos/azure-cosmos/CHANGELOG.md b/sdk/cosmos/azure-cosmos/CHANGELOG.md index 5146e18db2e8..eee4f87437f9 100644 --- a/sdk/cosmos/azure-cosmos/CHANGELOG.md +++ b/sdk/cosmos/azure-cosmos/CHANGELOG.md @@ -9,6 +9,7 @@ #### Bugs Fixed #### Other Changes +* Reduced memory footprint of deserialized `PartitionKeyRange` instances by stripping unused fields in the `PartitionKeyRange(ObjectNode)` constructor - See PR [49513](https://github.com/Azure/azure-sdk-for-java/pull/49513). ### 4.81.0 (2026-06-08) diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/PartitionKeyRange.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/PartitionKeyRange.java index 4488426683d0..6488d1d57b4d 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/PartitionKeyRange.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/PartitionKeyRange.java @@ -6,7 +6,11 @@ import com.azure.cosmos.implementation.routing.Range; import com.fasterxml.jackson.databind.node.ObjectNode; +import java.util.Arrays; +import java.util.Collections; +import java.util.HashSet; import java.util.List; +import java.util.Set; /** * Represent a partition key range in the Azure Cosmos DB database service. @@ -16,14 +20,64 @@ public class PartitionKeyRange extends Resource { public static final String MAXIMUM_EXCLUSIVE_EFFECTIVE_PARTITION_KEY = "FF"; public static final String MASTER_PARTITION_KEY_RANGE_ID = "M"; + /** + * Fields of the Cosmos DB service partition key range payload that the SDK retains in heap + * for the lifetime of the {@link com.azure.cosmos.implementation.routing.CollectionRoutingMap}. + * + *

The set is broadly aligned with the equivalent Python optimization in + * azure-sdk-for-python#46297 + * (item #2 — "Strip unused fields → compact PKRange"), which retains + * {@code id}, {@code minInclusive}, {@code maxExclusive}, {@code parents}, + * {@code status}, and {@code throughputFraction}. Java additionally keeps {@code _rid} + * because {@link com.azure.cosmos.implementation.directconnectivity.AddressResolver#isSameCollection} + * calls {@code getResourceId()} on {@code PartitionKeyRange} instances during target-change + * detection on retry — stripping it would surface as {@code "INVALID resource id null"} + * from {@code ResourceId.parse(null)} the next time the SDK retries an address-staleness + * check (e.g. after a 410/Gone). Python's address-resolution path does not have the + * equivalent dependency, so the SDKs intentionally diverge on this one field.

+ * + *

This is an allow-list: any field the service returns that is not in this set + * (including any field added by the service in the future) is dropped at construction. + * That keeps per-instance heap bounded against server-side payload growth. Adding a new + * field to the allow-list is a one-line change here when a consumer needs it.

+ */ + private static final Set KEPT_FIELDS = Collections.unmodifiableSet( + new HashSet<>(Arrays.asList( + Constants.Properties.ID, + "minInclusive", + "maxExclusive", + Constants.Properties.PARENTS, + "status", + "throughputFraction", + Constants.Properties.R_ID + ))); + /** * Constructor. * - * @param objectNode the {@link ObjectNode} that represent the - * {@link JsonSerializable} + *

Fields not listed in {@link #KEPT_FIELDS} are removed from {@code objectNode} as part of + * construction so the resulting instance retains only the fields the SDK actually needs. + * This is the universal funnel for every {@code PartitionKeyRange} the SDK deserializes from + * a service response (see {@link JsonSerializable#instantiateFromObjectNodeAndType}), so the + * memory saving applies to all routing-map cache entries and any other code path that + * consumes deserialized partition key ranges.

+ * + *

The argument is mutated in place. This is safe because every production caller obtains + * {@code objectNode} from Jackson deserialization and does not retain another reference to + * it. Tests that need to preserve a fully-populated source object should use + * {@code objectNode.deepCopy()} before handing it to this constructor.

+ * + * @param objectNode the {@link ObjectNode} that represents the {@link JsonSerializable} */ public PartitionKeyRange(ObjectNode objectNode) { - super(objectNode); + super(stripToKeptFields(objectNode)); + } + + private static ObjectNode stripToKeptFields(ObjectNode objectNode) { + if (objectNode != null) { + objectNode.retain(KEPT_FIELDS); + } + return objectNode; } /** @@ -33,17 +87,6 @@ public PartitionKeyRange() { super(); } - /** - * Initialize a partition key range object from json string. - * - * @param jsonString - * the json string that represents the partition key range - * object. - */ - public PartitionKeyRange(String jsonString) { - super(jsonString); - } - /** * Set id of partition key range * @param id the name of the resource.