From 80ff3596f08e51d4fdea9b31db9b9e2f61ec7eb0 Mon Sep 17 00:00:00 2001 From: Eric Hare Date: Mon, 15 Jun 2026 16:12:07 -0700 Subject: [PATCH 01/13] feat: expose extended vector indexing options on createVectorIndex (#2487) Add an `indexingOptions` field to the createVectorIndex command's `definition.options`. It accepts either: - a String naming a predefined profile (expanded by the in-code VectorIndexProfiles registry into a set of SAI options), or - an Object of raw Cassandra SAI indexing options, passed through verbatim using Cassandra's snake_case names (forward-compatible). Anything else is rejected. The existing `metric` / `sourceModel` fields are unchanged and remain the dedicated way to set similarity_function / source_model; those keys are rejected inside the raw options object. Implemented by mirroring the existing ApiTextIndex.analyzer JsonNode pattern. Adds two SchemaException codes (UNKNOWN_VECTOR_INDEXING_PROFILE, INVALID_VECTOR_INDEXING_OPTIONS) with errors.yaml templates. listIndexes renders the resolved options back under indexingOptions (excluding the structural and dedicated-field keys). Note: the new tuning options require the target backend to allow custom SAI HNSW parameters; per the "pass-through" design, the API forwards the options and surfaces the database error on backends that disallow them. --- .../indexes/VectorIndexDefinitionDesc.java | 20 +- .../config/constants/VectorConstants.java | 20 ++ .../jsonapi/exception/SchemaException.java | 2 + .../service/schema/tables/ApiVectorIndex.java | 111 +++++++++- .../schema/tables/VectorIndexProfiles.java | 52 +++++ src/main/resources/errors.yaml | 31 ++- .../CreateTableIndexIntegrationTest.java | 66 ++++++ .../schema/tables/ApiVectorIndexTest.java | 191 ++++++++++++++++++ .../tables/VectorIndexProfilesTest.java | 65 ++++++ 9 files changed, 552 insertions(+), 6 deletions(-) create mode 100644 src/main/java/io/stargate/sgv2/jsonapi/service/schema/tables/VectorIndexProfiles.java create mode 100644 src/test/java/io/stargate/sgv2/jsonapi/service/schema/tables/ApiVectorIndexTest.java create mode 100644 src/test/java/io/stargate/sgv2/jsonapi/service/schema/tables/VectorIndexProfilesTest.java diff --git a/src/main/java/io/stargate/sgv2/jsonapi/api/model/command/table/definition/indexes/VectorIndexDefinitionDesc.java b/src/main/java/io/stargate/sgv2/jsonapi/api/model/command/table/definition/indexes/VectorIndexDefinitionDesc.java index 03d355c0a7..a61efad5f5 100644 --- a/src/main/java/io/stargate/sgv2/jsonapi/api/model/command/table/definition/indexes/VectorIndexDefinitionDesc.java +++ b/src/main/java/io/stargate/sgv2/jsonapi/api/model/command/table/definition/indexes/VectorIndexDefinitionDesc.java @@ -3,6 +3,7 @@ import com.fasterxml.jackson.annotation.JsonInclude; import com.fasterxml.jackson.annotation.JsonProperty; import com.fasterxml.jackson.annotation.JsonPropertyOrder; +import com.fasterxml.jackson.databind.JsonNode; import io.stargate.sgv2.jsonapi.config.constants.TableDescConstants; import io.stargate.sgv2.jsonapi.config.constants.VectorConstants; import io.stargate.sgv2.jsonapi.config.constants.VectorIndexDescDefaults; @@ -34,7 +35,8 @@ public record VectorIndexDefinitionDesc( /** Options for the vector index */ @JsonPropertyOrder({ VectorConstants.VectorColumn.METRIC, - VectorConstants.VectorColumn.SOURCE_MODEL + VectorConstants.VectorColumn.SOURCE_MODEL, + VectorConstants.VectorColumn.INDEXING_OPTIONS }) public record VectorIndexDescOptions( @Nullable @@ -59,5 +61,19 @@ public record VectorIndexDescOptions( + EmbeddingSourceModel.ApiConstants.ALL) @JsonInclude(JsonInclude.Include.NON_NULL) @JsonProperty(VectorConstants.VectorColumn.SOURCE_MODEL) - String sourceModel) {} + String sourceModel, + // + @Nullable + @Schema( + description = + """ +Optional additional vector (SAI) indexing configuration: either a String naming a predefined profile \ +(e.g. "small-high-recall") that the API expands into a set of options, or an Object of raw Cassandra \ +indexing options passed through as-is (e.g. {"enable_hierarchy": true, "maximum_node_connections": 32}). \ +The dedicated "metric" and "sourceModel" fields must not be repeated here.\ +""", + type = SchemaType.OBJECT) + @JsonInclude(JsonInclude.Include.NON_NULL) + @JsonProperty(VectorConstants.VectorColumn.INDEXING_OPTIONS) + JsonNode indexingOptions) {} } diff --git a/src/main/java/io/stargate/sgv2/jsonapi/config/constants/VectorConstants.java b/src/main/java/io/stargate/sgv2/jsonapi/config/constants/VectorConstants.java index 753872a95b..b373c85b53 100644 --- a/src/main/java/io/stargate/sgv2/jsonapi/config/constants/VectorConstants.java +++ b/src/main/java/io/stargate/sgv2/jsonapi/config/constants/VectorConstants.java @@ -1,17 +1,37 @@ package io.stargate.sgv2.jsonapi.config.constants; +import java.util.Set; + public interface VectorConstants { interface VectorColumn { String DIMENSION = "dimension"; String METRIC = "metric"; String SOURCE_MODEL = "sourceModel"; String SERVICE = ServiceDescConstants.SERVICE; + String INDEXING_OPTIONS = "indexingOptions"; } interface Vectorize extends ServiceDescConstants {} + /** + * Names of the options used in the CQL {@code CREATE CUSTOM INDEX ... WITH OPTIONS = {...}} + * clause for a vector (SAI ANN) index. {@link #SOURCE_MODEL} and {@link #SIMILARITY_FUNCTION} + * have dedicated API fields ({@code sourceModel} / {@code metric}); the remaining tuning options + * are exposed via the {@code indexingOptions} field (see {@link VectorColumn#INDEXING_OPTIONS}). + */ interface CQLAnnIndex { String SOURCE_MODEL = "source_model"; String SIMILARITY_FUNCTION = "similarity_function"; + String MAXIMUM_NODE_CONNECTIONS = "maximum_node_connections"; + String CONSTRUCTION_BEAM_WIDTH = "construction_beam_width"; + String NEIGHBORHOOD_OVERFLOW = "neighborhood_overflow"; + String ALPHA = "alpha"; + String ENABLE_HIERARCHY = "enable_hierarchy"; + + /** + * Options that have dedicated API fields ({@code sourceModel} / {@code metric}) and so must not + * be set again through the raw {@code indexingOptions} object. + */ + Set RESERVED_OPTIONS = Set.of(SOURCE_MODEL, SIMILARITY_FUNCTION); } } diff --git a/src/main/java/io/stargate/sgv2/jsonapi/exception/SchemaException.java b/src/main/java/io/stargate/sgv2/jsonapi/exception/SchemaException.java index be262773fb..6de0b2128a 100644 --- a/src/main/java/io/stargate/sgv2/jsonapi/exception/SchemaException.java +++ b/src/main/java/io/stargate/sgv2/jsonapi/exception/SchemaException.java @@ -47,6 +47,7 @@ public enum Code implements ErrorCode { INVALID_INDEXING_DEFINITION, INVALID_USAGE_OF_VECTORIZE, // legacy: converted from ErrorCodeV1 INVALID_USER_DEFINED_TYPE_NAME, + INVALID_VECTOR_INDEXING_OPTIONS, LEXICAL_FEATURE_NOT_ENABLED, LEXICAL_NOT_ENABLED_FOR_COLLECTION, MISSING_ALTER_TABLE_OPERATIONS, @@ -76,6 +77,7 @@ public enum Code implements ErrorCode { UNKNOWN_PARTITION_SORT_COLUMNS, UNKNOWN_PRIMITIVE_DATA_TYPE, UNKNOWN_USER_DEFINED_TYPE, + UNKNOWN_VECTOR_INDEXING_PROFILE, UNKNOWN_VECTOR_METRIC, UNKNOWN_VECTOR_SOURCE_MODEL, UNSUPPORTED_DATA_TYPE_TABLE_CREATION, diff --git a/src/main/java/io/stargate/sgv2/jsonapi/service/schema/tables/ApiVectorIndex.java b/src/main/java/io/stargate/sgv2/jsonapi/service/schema/tables/ApiVectorIndex.java index 0bcf7a85d8..2472af8d98 100644 --- a/src/main/java/io/stargate/sgv2/jsonapi/service/schema/tables/ApiVectorIndex.java +++ b/src/main/java/io/stargate/sgv2/jsonapi/service/schema/tables/ApiVectorIndex.java @@ -6,6 +6,9 @@ import com.datastax.oss.driver.api.core.CqlIdentifier; import com.datastax.oss.driver.api.core.metadata.schema.IndexMetadata; +import com.fasterxml.jackson.databind.JsonNode; +import com.fasterxml.jackson.databind.node.JsonNodeFactory; +import com.fasterxml.jackson.databind.node.ObjectNode; import io.stargate.sgv2.jsonapi.api.model.command.table.IndexDesc; import io.stargate.sgv2.jsonapi.api.model.command.table.SchemaDescSource; import io.stargate.sgv2.jsonapi.api.model.command.table.definition.indexes.RegularIndexDefinitionDesc; @@ -17,6 +20,7 @@ import io.stargate.sgv2.jsonapi.service.schema.SimilarityFunction; import io.stargate.sgv2.jsonapi.service.schema.tables.factories.IndexFactoryFromCql; import io.stargate.sgv2.jsonapi.service.schema.tables.factories.IndexFactoryFromIndexDesc; +import io.stargate.sgv2.jsonapi.util.JsonUtil; import java.util.*; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -54,7 +58,9 @@ public IndexDesc getSchemaDescription( var definitionOptions = new VectorIndexDefinitionDesc.VectorIndexDescOptions( - similarityFunction.apiName(), sourceModel.apiName()); + similarityFunction.apiName(), + sourceModel.apiName(), + renderIndexingOptions(indexOptions)); var definition = new VectorIndexDefinitionDesc(cqlIdentifierToJsonKey(targetColumn), definitionOptions); @@ -76,6 +82,103 @@ public VectorIndexDefinitionDesc definition() { }; } + /** + * Renders the additional tuning options from a CQL index options map (everything other than the + * structural {@code class_name} / {@code target} and the {@code source_model} / {@code + * similarity_function} options, which have dedicated fields) as an {@code indexingOptions} object + * for the public schema description. + * + *

Profiles are resolved to their concrete options at create time, so the description always + * shows the resolved raw options. Values are rendered as Strings because CQL index options are a + * {@code Map}. + * + * @param indexOptions the CQL index options map + * @return the options object, or null when there are none (so the field is omitted) + */ + static JsonNode renderIndexingOptions(Map indexOptions) { + ObjectNode node = null; + for (var entry : indexOptions.entrySet()) { + var optionName = entry.getKey(); + // Skip the structural SAI options (added by the driver / CQL builder) and the options that + // have dedicated API fields (source_model / similarity_function). + if (CQLSAIIndex.Options.CLASS_NAME.equals(optionName) + || CQLSAIIndex.Options.TARGET.equals(optionName) + || VectorConstants.CQLAnnIndex.RESERVED_OPTIONS.contains(optionName)) { + continue; + } + if (node == null) { + node = JsonNodeFactory.instance.objectNode(); + } + node.put(optionName, entry.getValue()); + } + return node; + } + + /** + * Applies the optional {@code indexingOptions} from the user request into the CQL index options + * map. The value is either a String naming a {@link VectorIndexProfiles profile} (expanded into a + * set of options) or an Object of raw Cassandra indexing options passed through as-is. The {@code + * source_model} / {@code similarity_function} options have dedicated fields and must not be set + * this way. + * + * @param indexOptions the CQL options map being built, mutated in place + * @param indexingOptions the raw node from the request, may be null + */ + static void applyIndexingOptions(Map indexOptions, JsonNode indexingOptions) { + + if (indexingOptions == null || indexingOptions.isNull()) { + // nothing provided, leave the options as they are + return; + } + + if (indexingOptions.isTextual()) { + // String -> named profile + var profileName = indexingOptions.textValue(); + var profileOptions = + VectorIndexProfiles.forName(profileName) + .orElseThrow( + () -> + SchemaException.Code.UNKNOWN_VECTOR_INDEXING_PROFILE.get( + Map.of( + "knownProfiles", + errFmtJoin(VectorIndexProfiles.knownNames()), + "unknownProfile", + profileName))); + indexOptions.putAll(profileOptions); + return; + } + + if (indexingOptions.isObject()) { + // Object -> raw indexing options, passed through as-is using Cassandra's snake_case names. + indexingOptions + .fields() + .forEachRemaining( + entry -> { + var optionName = entry.getKey(); + if (VectorConstants.CQLAnnIndex.RESERVED_OPTIONS.contains(optionName)) { + throw SchemaException.Code.INVALID_VECTOR_INDEXING_OPTIONS.get( + Map.of( + "reason", + "The option '%s' must be set using its dedicated field, not indexingOptions." + .formatted(optionName))); + } + // CQL index options are strings: keep a textual value raw, otherwise serialise the + // JSON value (e.g. number 32 -> "32", boolean true -> "true"). + var value = entry.getValue(); + indexOptions.put( + optionName, value.isTextual() ? value.textValue() : value.toString()); + }); + return; + } + + // neither String nor Object -> not supported + throw SchemaException.Code.INVALID_VECTOR_INDEXING_OPTIONS.get( + Map.of( + "reason", + "indexingOptions must be a String (profile name) or an Object (raw options), but was: " + + JsonUtil.nodeTypeAsString(indexingOptions))); + } + /** * Logic to map from the name of the similarity function, from either the user or the CQL index, * to a {@link SimilarityFunction} enum value. @@ -245,6 +348,12 @@ public ApiVectorIndex create( metricToUse); } + // Apply the optional additional indexing options (a profile name or raw options); the source + // model and metric above have dedicated fields and must not be set this way. + var userIndexingOptions = + (indexDesc.options() == null) ? null : indexDesc.options().indexingOptions(); + applyIndexingOptions(indexOptions, userIndexingOptions); + return new ApiVectorIndex( indexIdentifier, targetIdentifier, indexOptions, metricToUse, sourceModelToUse); } diff --git a/src/main/java/io/stargate/sgv2/jsonapi/service/schema/tables/VectorIndexProfiles.java b/src/main/java/io/stargate/sgv2/jsonapi/service/schema/tables/VectorIndexProfiles.java new file mode 100644 index 0000000000..a8ce72ee63 --- /dev/null +++ b/src/main/java/io/stargate/sgv2/jsonapi/service/schema/tables/VectorIndexProfiles.java @@ -0,0 +1,52 @@ +package io.stargate.sgv2.jsonapi.service.schema.tables; + +import io.stargate.sgv2.jsonapi.config.constants.VectorConstants; +import java.util.Map; +import java.util.Optional; +import java.util.Set; + +/** + * Registry of named vector-index "profiles": each maps a user-facing profile name to a set of + * Cassandra SAI indexing options (the {@code WITH OPTIONS = {...}} entries). Selecting a profile is + * an alternative to passing raw options through the {@code indexingOptions} field on the + * createVectorIndex command. + * + *

Profiles only set the tuning options; they never set {@code source_model} or {@code + * similarity_function}, which have dedicated API fields ({@code sourceModel} / {@code metric}). + * Values are stored as Strings because CQL index options are a {@code Map}. + * + *

NOTE: the concrete mappings below are an initial in-code starter set; the values are expected + * to be tuned and eventually externalised to configuration. + */ +public final class VectorIndexProfiles { + + private VectorIndexProfiles() {} + + /** Profile name (lower-cased for case-insensitive lookup) to CQL indexing options. */ + private static final Map> PROFILES = + Map.of( + "small-high-recall", + Map.of( + VectorConstants.CQLAnnIndex.MAXIMUM_NODE_CONNECTIONS, "32", + VectorConstants.CQLAnnIndex.CONSTRUCTION_BEAM_WIDTH, "200"), + "big-low-latency", + Map.of(VectorConstants.CQLAnnIndex.MAXIMUM_NODE_CONNECTIONS, "16")); + + /** + * Looks up a profile by name, case-insensitively. + * + * @param name the profile name from the user request, may be null or blank + * @return the CQL options for the profile, or empty if the name is null, blank, or not known + */ + public static Optional> forName(String name) { + if (name == null || name.isBlank()) { + return Optional.empty(); + } + return Optional.ofNullable(PROFILES.get(name.toLowerCase())); + } + + /** Names of all known profiles, for use in error messages. */ + public static Set knownNames() { + return PROFILES.keySet(); + } +} diff --git a/src/main/resources/errors.yaml b/src/main/resources/errors.yaml index 6d267f0457..cf6a11ca11 100644 --- a/src/main/resources/errors.yaml +++ b/src/main/resources/errors.yaml @@ -1811,14 +1811,39 @@ request-errors: title: Vector metric is unknown body: |- The command attempted to create an vector index using a metric that is not known by the API. - + The metric, also known as the similarity function, used to create the vector index is used to compare vectors to find the similar vectors. - + The known metrics are: ${knownMetrics}. The command attempted to use the metric: ${unknownMetric}. - + Resend the command using a known metric. + - scope: SCHEMA + code: UNKNOWN_VECTOR_INDEXING_PROFILE + title: Vector indexing profile is unknown + body: |- + The command attempted to create a vector index using an indexing profile that is not known by the API. + + A profile is selected by setting `indexingOptions` to a String; the API expands it into a set of indexing options. To pass options directly instead, set `indexingOptions` to an Object of raw Cassandra indexing options. + + The known profiles are: ${knownProfiles}. + The command attempted to use the profile: ${unknownProfile}. + + Resend the command using a known profile, or use a raw indexing options Object. + + - scope: SCHEMA + code: INVALID_VECTOR_INDEXING_OPTIONS + title: Vector indexingOptions are invalid + body: |- + The command attempted to create a vector index with invalid `indexingOptions`. + + `indexingOptions` must be either a String naming a predefined profile, or an Object of raw Cassandra indexing options. The dedicated `metric` and `sourceModel` fields must not be repeated inside the raw options Object. + + ${reason} + + Resend the command with corrected `indexingOptions`. + - scope: SCHEMA code: VECTOR_SEARCH_NOT_SUPPORTED title: Vector search not enabled for collection diff --git a/src/test/java/io/stargate/sgv2/jsonapi/api/v1/tables/CreateTableIndexIntegrationTest.java b/src/test/java/io/stargate/sgv2/jsonapi/api/v1/tables/CreateTableIndexIntegrationTest.java index 59b7622c85..db0c5615aa 100644 --- a/src/test/java/io/stargate/sgv2/jsonapi/api/v1/tables/CreateTableIndexIntegrationTest.java +++ b/src/test/java/io/stargate/sgv2/jsonapi/api/v1/tables/CreateTableIndexIntegrationTest.java @@ -1136,6 +1136,72 @@ public void invalidSourceModel() { "The command attempted to use the source model: invalid_source_model."); } + @Test + public void unknownIndexingProfile() { + DataApiCommandSenders.assertTableCommand(keyspaceName, vectorTableName) + .postCreateVectorIndex( + """ + { + "name": "vector_type_7_idx", + "definition": { + "column": "vector_type_7", + "options": { + "indexingOptions": "no-such-profile" + } + } + } + """) + .hasSingleApiError( + SchemaException.Code.UNKNOWN_VECTOR_INDEXING_PROFILE, + SchemaException.class, + "The command attempted to use the profile: no-such-profile."); + } + + @Test + public void rawIndexingOptionsWithReservedOption() { + DataApiCommandSenders.assertTableCommand(keyspaceName, vectorTableName) + .postCreateVectorIndex( + """ + { + "name": "vector_type_7_idx", + "definition": { + "column": "vector_type_7", + "options": { + "indexingOptions": { + "similarity_function": "COSINE" + } + } + } + } + """) + .hasSingleApiError( + SchemaException.Code.INVALID_VECTOR_INDEXING_OPTIONS, + SchemaException.class, + "The option 'similarity_function' must be set using its dedicated field"); + } + + @Test + public void indexingOptionsNotStringOrObject() { + DataApiCommandSenders.assertTableCommand(keyspaceName, vectorTableName) + .postCreateVectorIndex( + """ + { + "name": "vector_type_7_idx", + "definition": { + "column": "vector_type_7", + "options": { + "indexingOptions": [1, 2, 3] + } + } + } + """) + .hasSingleApiError( + SchemaException.Code.INVALID_VECTOR_INDEXING_OPTIONS, + SchemaException.class, + "indexingOptions must be a String (profile name) or an Object (raw options)", + "but was: Array"); + } + @Test public void createVectorIndexWithUnsupportedIndexType() { assertTableCommand(keyspaceName, vectorTableName) diff --git a/src/test/java/io/stargate/sgv2/jsonapi/service/schema/tables/ApiVectorIndexTest.java b/src/test/java/io/stargate/sgv2/jsonapi/service/schema/tables/ApiVectorIndexTest.java new file mode 100644 index 0000000000..d046dd9823 --- /dev/null +++ b/src/test/java/io/stargate/sgv2/jsonapi/service/schema/tables/ApiVectorIndexTest.java @@ -0,0 +1,191 @@ +package io.stargate.sgv2.jsonapi.service.schema.tables; + +import static org.assertj.core.api.Assertions.assertThat; +import static org.assertj.core.api.Assertions.assertThatThrownBy; + +import com.fasterxml.jackson.databind.JsonNode; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.fasterxml.jackson.databind.node.JsonNodeFactory; +import io.stargate.sgv2.jsonapi.config.constants.VectorConstants; +import io.stargate.sgv2.jsonapi.exception.SchemaException; +import java.util.HashMap; +import java.util.Map; +import org.junit.jupiter.api.DisplayName; +import org.junit.jupiter.api.Nested; +import org.junit.jupiter.api.Test; + +/** + * Unit tests for the {@code indexingOptions} handling on {@link ApiVectorIndex}: how the public + * value (profile name or raw options) is turned into the CQL index options map, and how it is + * rendered back for the schema description. These are deterministic and do not need a database (the + * end-to-end behaviour also depends on the backend allowing custom SAI parameters). + */ +class ApiVectorIndexTest { + + private static final ObjectMapper MAPPER = new ObjectMapper(); + + private static JsonNode json(String raw) { + try { + return MAPPER.readTree(raw); + } catch (Exception e) { + throw new RuntimeException(e); + } + } + + @Nested + class ApplyIndexingOptions { + + @Test + @DisplayName("null / JSON null leaves the options untouched") + void nullValueIsNoOp() { + var options = new HashMap(); + + ApiVectorIndex.applyIndexingOptions(options, null); + ApiVectorIndex.applyIndexingOptions(options, JsonNodeFactory.instance.nullNode()); + + assertThat(options).isEmpty(); + } + + @Test + @DisplayName("empty object leaves the options untouched") + void emptyObjectIsNoOp() { + var options = new HashMap(); + + ApiVectorIndex.applyIndexingOptions(options, json("{}")); + + assertThat(options).isEmpty(); + } + + @Test + @DisplayName("a profile name expands to its CQL options") + void profileExpands() { + var options = new HashMap(); + + ApiVectorIndex.applyIndexingOptions( + options, JsonNodeFactory.instance.textNode("small-high-recall")); + + assertThat(options) + .containsAllEntriesOf(VectorIndexProfiles.forName("small-high-recall").orElseThrow()); + } + + @Test + @DisplayName("an unknown profile name throws UNKNOWN_VECTOR_INDEXING_PROFILE") + void unknownProfileThrows() { + var options = new HashMap(); + + assertThatThrownBy( + () -> + ApiVectorIndex.applyIndexingOptions( + options, JsonNodeFactory.instance.textNode("no-such-profile"))) + .isInstanceOf(SchemaException.class) + .satisfies( + t -> + assertThat(((SchemaException) t).code) + .isEqualTo(SchemaException.Code.UNKNOWN_VECTOR_INDEXING_PROFILE.name())); + } + + @Test + @DisplayName("raw options are passed through, non-text values serialised to Strings") + void rawOptionsPassThrough() { + var options = new HashMap(); + + ApiVectorIndex.applyIndexingOptions( + options, + json( + """ + { + "maximum_node_connections": 32, + "enable_hierarchy": true, + "alpha": "1.2" + } + """)); + + assertThat(options) + .containsEntry("maximum_node_connections", "32") + .containsEntry("enable_hierarchy", "true") + .containsEntry("alpha", "1.2"); + } + + @Test + @DisplayName("raw options merge with options already present") + void rawOptionsMergeWithExisting() { + var options = new HashMap(); + options.put(VectorConstants.CQLAnnIndex.SOURCE_MODEL, "OTHER"); + + ApiVectorIndex.applyIndexingOptions(options, json("{\"maximum_node_connections\": 16}")); + + assertThat(options) + .containsEntry(VectorConstants.CQLAnnIndex.SOURCE_MODEL, "OTHER") + .containsEntry("maximum_node_connections", "16"); + } + + @Test + @DisplayName("a reserved option inside raw options throws INVALID_VECTOR_INDEXING_OPTIONS") + void reservedOptionThrows() { + var options = new HashMap(); + + assertThatThrownBy( + () -> + ApiVectorIndex.applyIndexingOptions( + options, json("{\"similarity_function\": \"COSINE\"}"))) + .isInstanceOf(SchemaException.class) + .satisfies( + t -> + assertThat(((SchemaException) t).code) + .isEqualTo(SchemaException.Code.INVALID_VECTOR_INDEXING_OPTIONS.name())); + } + + @Test + @DisplayName("a value that is neither String nor Object throws INVALID_VECTOR_INDEXING_OPTIONS") + void wrongTypeThrows() { + var options = new HashMap(); + + assertThatThrownBy(() -> ApiVectorIndex.applyIndexingOptions(options, json("[1, 2, 3]"))) + .isInstanceOf(SchemaException.class) + .satisfies( + t -> + assertThat(((SchemaException) t).code) + .isEqualTo(SchemaException.Code.INVALID_VECTOR_INDEXING_OPTIONS.name())); + } + } + + @Nested + class RenderIndexingOptions { + + @Test + @DisplayName("returns null when only structural and dedicated-field options are present") + void nullWhenNoTuningOptions() { + var options = new HashMap(); + options.put(CQLSAIIndex.Options.CLASS_NAME, CQLSAIIndex.SAI_CLASS_NAME); + options.put(CQLSAIIndex.Options.TARGET, "my_vector"); + options.put(VectorConstants.CQLAnnIndex.SOURCE_MODEL, "OTHER"); + options.put(VectorConstants.CQLAnnIndex.SIMILARITY_FUNCTION, "COSINE"); + + assertThat(ApiVectorIndex.renderIndexingOptions(options)).isNull(); + } + + @Test + @DisplayName( + "returns only the tuning options, excluding structural and dedicated-field options") + void rendersOnlyTuningOptions() { + var options = new HashMap(); + options.put(CQLSAIIndex.Options.CLASS_NAME, CQLSAIIndex.SAI_CLASS_NAME); + options.put(CQLSAIIndex.Options.TARGET, "my_vector"); + options.put(VectorConstants.CQLAnnIndex.SOURCE_MODEL, "OPENAI_V3_SMALL"); + options.put(VectorConstants.CQLAnnIndex.MAXIMUM_NODE_CONNECTIONS, "32"); + + var rendered = ApiVectorIndex.renderIndexingOptions(options); + + assertThat(rendered).isNotNull(); + assertThat(rendered.size()).isEqualTo(1); + assertThat(rendered.get(VectorConstants.CQLAnnIndex.MAXIMUM_NODE_CONNECTIONS).asText()) + .isEqualTo("32"); + } + + @Test + @DisplayName("empty map renders null") + void emptyMapRendersNull() { + assertThat(ApiVectorIndex.renderIndexingOptions(Map.of())).isNull(); + } + } +} diff --git a/src/test/java/io/stargate/sgv2/jsonapi/service/schema/tables/VectorIndexProfilesTest.java b/src/test/java/io/stargate/sgv2/jsonapi/service/schema/tables/VectorIndexProfilesTest.java new file mode 100644 index 0000000000..17c1203331 --- /dev/null +++ b/src/test/java/io/stargate/sgv2/jsonapi/service/schema/tables/VectorIndexProfilesTest.java @@ -0,0 +1,65 @@ +package io.stargate.sgv2.jsonapi.service.schema.tables; + +import static org.assertj.core.api.Assertions.assertThat; + +import io.stargate.sgv2.jsonapi.config.constants.VectorConstants; +import org.junit.jupiter.api.DisplayName; +import org.junit.jupiter.api.Nested; +import org.junit.jupiter.api.Test; + +class VectorIndexProfilesTest { + + @Nested + class ForName { + @Test + @DisplayName("forName returns options for a known profile") + void knownProfile() { + assertThat(VectorIndexProfiles.forName("small-high-recall")) + .isPresent() + .get() + .satisfies(opts -> assertThat(opts).isNotEmpty()); + } + + @Test + @DisplayName("forName is case-insensitive") + void caseInsensitive() { + assertThat(VectorIndexProfiles.forName("SMALL-HIGH-RECALL")) + .isEqualTo(VectorIndexProfiles.forName("small-high-recall")); + } + + @Test + @DisplayName("forName returns empty for an unknown profile") + void unknownProfile() { + assertThat(VectorIndexProfiles.forName("does-not-exist")).isEmpty(); + } + + @Test + @DisplayName("forName returns empty for null or blank") + void nullOrBlank() { + assertThat(VectorIndexProfiles.forName(null)).isEmpty(); + assertThat(VectorIndexProfiles.forName(" ")).isEmpty(); + } + } + + @Nested + class KnownNames { + @Test + @DisplayName("knownNames lists the available profiles") + void listsProfiles() { + assertThat(VectorIndexProfiles.knownNames()).contains("small-high-recall", "big-low-latency"); + } + } + + @Nested + class ProfileContents { + @Test + @DisplayName("profiles never set the reserved (dedicated-field) options") + void noReservedOptions() { + for (var name : VectorIndexProfiles.knownNames()) { + var options = VectorIndexProfiles.forName(name).orElseThrow(); + assertThat(options.keySet()) + .doesNotContainAnyElementsOf(VectorConstants.CQLAnnIndex.RESERVED_OPTIONS); + } + } + } +} From 580e5f00b4667a566d59e379628ec0cb3660136e Mon Sep 17 00:00:00 2001 From: Eric Hare Date: Mon, 15 Jun 2026 19:17:19 -0700 Subject: [PATCH 02/13] fix: reject structural class_name/target in raw indexingOptions; review cleanups Address review feedback on #2487: - Reject raw indexingOptions keys class_name/target (set automatically by the API) with INVALID_VECTOR_INDEXING_OPTIONS, symmetric with how renderIndexingOptions filters them on read. Adds unit + IT coverage. - @Schema description for indexingOptions: use concatenated string literals (matching metric/sourceModel) and drop type=OBJECT to match the analyzer precedent for String-or-Object fields. - Mark applyIndexingOptions/renderIndexingOptions @VisibleForTesting. - Drop @DisplayName from the new unit tests to match repo convention. - Remove unused CQLAnnIndex constants (neighborhood_overflow, alpha, enable_hierarchy); keep the two used by profiles. - Use bare assertTableCommand in the new IT cases. - errors.yaml: revert unrelated whitespace churn on UNKNOWN_VECTOR_METRIC. --- .../indexes/VectorIndexDefinitionDesc.java | 12 +++---- .../config/constants/VectorConstants.java | 3 -- .../service/schema/tables/ApiVectorIndex.java | 13 +++++++ src/main/resources/errors.yaml | 6 ++-- .../CreateTableIndexIntegrationTest.java | 29 ++++++++++++++-- .../schema/tables/ApiVectorIndexTest.java | 34 ++++++++++++------- .../tables/VectorIndexProfilesTest.java | 7 ---- 7 files changed, 68 insertions(+), 36 deletions(-) diff --git a/src/main/java/io/stargate/sgv2/jsonapi/api/model/command/table/definition/indexes/VectorIndexDefinitionDesc.java b/src/main/java/io/stargate/sgv2/jsonapi/api/model/command/table/definition/indexes/VectorIndexDefinitionDesc.java index a61efad5f5..8033e7ec1b 100644 --- a/src/main/java/io/stargate/sgv2/jsonapi/api/model/command/table/definition/indexes/VectorIndexDefinitionDesc.java +++ b/src/main/java/io/stargate/sgv2/jsonapi/api/model/command/table/definition/indexes/VectorIndexDefinitionDesc.java @@ -66,13 +66,11 @@ public record VectorIndexDescOptions( @Nullable @Schema( description = - """ -Optional additional vector (SAI) indexing configuration: either a String naming a predefined profile \ -(e.g. "small-high-recall") that the API expands into a set of options, or an Object of raw Cassandra \ -indexing options passed through as-is (e.g. {"enable_hierarchy": true, "maximum_node_connections": 32}). \ -The dedicated "metric" and "sourceModel" fields must not be repeated here.\ -""", - type = SchemaType.OBJECT) + "Optional additional vector (SAI) indexing configuration: either a String naming a " + + "predefined profile (e.g. \"small-high-recall\") that the API expands into a set of " + + "options, or an Object of raw Cassandra indexing options passed through as-is " + + "(e.g. {\"enable_hierarchy\": true, \"maximum_node_connections\": 32}). The dedicated " + + "\"metric\" and \"sourceModel\" fields must not be repeated here.") @JsonInclude(JsonInclude.Include.NON_NULL) @JsonProperty(VectorConstants.VectorColumn.INDEXING_OPTIONS) JsonNode indexingOptions) {} diff --git a/src/main/java/io/stargate/sgv2/jsonapi/config/constants/VectorConstants.java b/src/main/java/io/stargate/sgv2/jsonapi/config/constants/VectorConstants.java index b373c85b53..ae01eb570d 100644 --- a/src/main/java/io/stargate/sgv2/jsonapi/config/constants/VectorConstants.java +++ b/src/main/java/io/stargate/sgv2/jsonapi/config/constants/VectorConstants.java @@ -24,9 +24,6 @@ interface CQLAnnIndex { String SIMILARITY_FUNCTION = "similarity_function"; String MAXIMUM_NODE_CONNECTIONS = "maximum_node_connections"; String CONSTRUCTION_BEAM_WIDTH = "construction_beam_width"; - String NEIGHBORHOOD_OVERFLOW = "neighborhood_overflow"; - String ALPHA = "alpha"; - String ENABLE_HIERARCHY = "enable_hierarchy"; /** * Options that have dedicated API fields ({@code sourceModel} / {@code metric}) and so must not diff --git a/src/main/java/io/stargate/sgv2/jsonapi/service/schema/tables/ApiVectorIndex.java b/src/main/java/io/stargate/sgv2/jsonapi/service/schema/tables/ApiVectorIndex.java index 2472af8d98..39f0fbe8a1 100644 --- a/src/main/java/io/stargate/sgv2/jsonapi/service/schema/tables/ApiVectorIndex.java +++ b/src/main/java/io/stargate/sgv2/jsonapi/service/schema/tables/ApiVectorIndex.java @@ -9,6 +9,7 @@ import com.fasterxml.jackson.databind.JsonNode; import com.fasterxml.jackson.databind.node.JsonNodeFactory; import com.fasterxml.jackson.databind.node.ObjectNode; +import com.google.common.annotations.VisibleForTesting; import io.stargate.sgv2.jsonapi.api.model.command.table.IndexDesc; import io.stargate.sgv2.jsonapi.api.model.command.table.SchemaDescSource; import io.stargate.sgv2.jsonapi.api.model.command.table.definition.indexes.RegularIndexDefinitionDesc; @@ -95,6 +96,7 @@ public VectorIndexDefinitionDesc definition() { * @param indexOptions the CQL index options map * @return the options object, or null when there are none (so the field is omitted) */ + @VisibleForTesting static JsonNode renderIndexingOptions(Map indexOptions) { ObjectNode node = null; for (var entry : indexOptions.entrySet()) { @@ -124,6 +126,7 @@ static JsonNode renderIndexingOptions(Map indexOptions) { * @param indexOptions the CQL options map being built, mutated in place * @param indexingOptions the raw node from the request, may be null */ + @VisibleForTesting static void applyIndexingOptions(Map indexOptions, JsonNode indexingOptions) { if (indexingOptions == null || indexingOptions.isNull()) { @@ -162,6 +165,16 @@ static void applyIndexingOptions(Map indexOptions, JsonNode inde "The option '%s' must be set using its dedicated field, not indexingOptions." .formatted(optionName))); } + // class_name / target are structural SAI options set automatically by the API; + // reject them here, mirroring how renderIndexingOptions filters them out. + if (CQLSAIIndex.Options.CLASS_NAME.equals(optionName) + || CQLSAIIndex.Options.TARGET.equals(optionName)) { + throw SchemaException.Code.INVALID_VECTOR_INDEXING_OPTIONS.get( + Map.of( + "reason", + "The option '%s' is set automatically and must not be provided in indexingOptions." + .formatted(optionName))); + } // CQL index options are strings: keep a textual value raw, otherwise serialise the // JSON value (e.g. number 32 -> "32", boolean true -> "true"). var value = entry.getValue(); diff --git a/src/main/resources/errors.yaml b/src/main/resources/errors.yaml index cf6a11ca11..0a1c90f2e9 100644 --- a/src/main/resources/errors.yaml +++ b/src/main/resources/errors.yaml @@ -1811,12 +1811,12 @@ request-errors: title: Vector metric is unknown body: |- The command attempted to create an vector index using a metric that is not known by the API. - + The metric, also known as the similarity function, used to create the vector index is used to compare vectors to find the similar vectors. - + The known metrics are: ${knownMetrics}. The command attempted to use the metric: ${unknownMetric}. - + Resend the command using a known metric. - scope: SCHEMA diff --git a/src/test/java/io/stargate/sgv2/jsonapi/api/v1/tables/CreateTableIndexIntegrationTest.java b/src/test/java/io/stargate/sgv2/jsonapi/api/v1/tables/CreateTableIndexIntegrationTest.java index db0c5615aa..159bbaa1b2 100644 --- a/src/test/java/io/stargate/sgv2/jsonapi/api/v1/tables/CreateTableIndexIntegrationTest.java +++ b/src/test/java/io/stargate/sgv2/jsonapi/api/v1/tables/CreateTableIndexIntegrationTest.java @@ -1138,7 +1138,7 @@ public void invalidSourceModel() { @Test public void unknownIndexingProfile() { - DataApiCommandSenders.assertTableCommand(keyspaceName, vectorTableName) + assertTableCommand(keyspaceName, vectorTableName) .postCreateVectorIndex( """ { @@ -1159,7 +1159,7 @@ public void unknownIndexingProfile() { @Test public void rawIndexingOptionsWithReservedOption() { - DataApiCommandSenders.assertTableCommand(keyspaceName, vectorTableName) + assertTableCommand(keyspaceName, vectorTableName) .postCreateVectorIndex( """ { @@ -1180,9 +1180,32 @@ public void rawIndexingOptionsWithReservedOption() { "The option 'similarity_function' must be set using its dedicated field"); } + @Test + public void rawIndexingOptionsWithStructuralOption() { + assertTableCommand(keyspaceName, vectorTableName) + .postCreateVectorIndex( + """ + { + "name": "vector_type_7_idx", + "definition": { + "column": "vector_type_7", + "options": { + "indexingOptions": { + "class_name": "StorageAttachedIndex" + } + } + } + } + """) + .hasSingleApiError( + SchemaException.Code.INVALID_VECTOR_INDEXING_OPTIONS, + SchemaException.class, + "The option 'class_name' is set automatically and must not be provided in indexingOptions"); + } + @Test public void indexingOptionsNotStringOrObject() { - DataApiCommandSenders.assertTableCommand(keyspaceName, vectorTableName) + assertTableCommand(keyspaceName, vectorTableName) .postCreateVectorIndex( """ { diff --git a/src/test/java/io/stargate/sgv2/jsonapi/service/schema/tables/ApiVectorIndexTest.java b/src/test/java/io/stargate/sgv2/jsonapi/service/schema/tables/ApiVectorIndexTest.java index d046dd9823..19c4f5813b 100644 --- a/src/test/java/io/stargate/sgv2/jsonapi/service/schema/tables/ApiVectorIndexTest.java +++ b/src/test/java/io/stargate/sgv2/jsonapi/service/schema/tables/ApiVectorIndexTest.java @@ -10,7 +10,6 @@ import io.stargate.sgv2.jsonapi.exception.SchemaException; import java.util.HashMap; import java.util.Map; -import org.junit.jupiter.api.DisplayName; import org.junit.jupiter.api.Nested; import org.junit.jupiter.api.Test; @@ -36,7 +35,6 @@ private static JsonNode json(String raw) { class ApplyIndexingOptions { @Test - @DisplayName("null / JSON null leaves the options untouched") void nullValueIsNoOp() { var options = new HashMap(); @@ -47,7 +45,6 @@ void nullValueIsNoOp() { } @Test - @DisplayName("empty object leaves the options untouched") void emptyObjectIsNoOp() { var options = new HashMap(); @@ -57,7 +54,6 @@ void emptyObjectIsNoOp() { } @Test - @DisplayName("a profile name expands to its CQL options") void profileExpands() { var options = new HashMap(); @@ -69,7 +65,6 @@ void profileExpands() { } @Test - @DisplayName("an unknown profile name throws UNKNOWN_VECTOR_INDEXING_PROFILE") void unknownProfileThrows() { var options = new HashMap(); @@ -85,7 +80,6 @@ void unknownProfileThrows() { } @Test - @DisplayName("raw options are passed through, non-text values serialised to Strings") void rawOptionsPassThrough() { var options = new HashMap(); @@ -107,7 +101,6 @@ void rawOptionsPassThrough() { } @Test - @DisplayName("raw options merge with options already present") void rawOptionsMergeWithExisting() { var options = new HashMap(); options.put(VectorConstants.CQLAnnIndex.SOURCE_MODEL, "OTHER"); @@ -120,7 +113,6 @@ void rawOptionsMergeWithExisting() { } @Test - @DisplayName("a reserved option inside raw options throws INVALID_VECTOR_INDEXING_OPTIONS") void reservedOptionThrows() { var options = new HashMap(); @@ -136,7 +128,6 @@ options, json("{\"similarity_function\": \"COSINE\"}"))) } @Test - @DisplayName("a value that is neither String nor Object throws INVALID_VECTOR_INDEXING_OPTIONS") void wrongTypeThrows() { var options = new HashMap(); @@ -147,13 +138,33 @@ void wrongTypeThrows() { assertThat(((SchemaException) t).code) .isEqualTo(SchemaException.Code.INVALID_VECTOR_INDEXING_OPTIONS.name())); } + + @Test + void structuralOptionThrows() { + var options = new HashMap(); + + assertThatThrownBy( + () -> ApiVectorIndex.applyIndexingOptions(options, json("{\"class_name\": \"x\"}"))) + .isInstanceOf(SchemaException.class) + .satisfies( + t -> + assertThat(((SchemaException) t).code) + .isEqualTo(SchemaException.Code.INVALID_VECTOR_INDEXING_OPTIONS.name())); + + assertThatThrownBy( + () -> ApiVectorIndex.applyIndexingOptions(options, json("{\"target\": \"y\"}"))) + .isInstanceOf(SchemaException.class) + .satisfies( + t -> + assertThat(((SchemaException) t).code) + .isEqualTo(SchemaException.Code.INVALID_VECTOR_INDEXING_OPTIONS.name())); + } } @Nested class RenderIndexingOptions { @Test - @DisplayName("returns null when only structural and dedicated-field options are present") void nullWhenNoTuningOptions() { var options = new HashMap(); options.put(CQLSAIIndex.Options.CLASS_NAME, CQLSAIIndex.SAI_CLASS_NAME); @@ -165,8 +176,6 @@ void nullWhenNoTuningOptions() { } @Test - @DisplayName( - "returns only the tuning options, excluding structural and dedicated-field options") void rendersOnlyTuningOptions() { var options = new HashMap(); options.put(CQLSAIIndex.Options.CLASS_NAME, CQLSAIIndex.SAI_CLASS_NAME); @@ -183,7 +192,6 @@ void rendersOnlyTuningOptions() { } @Test - @DisplayName("empty map renders null") void emptyMapRendersNull() { assertThat(ApiVectorIndex.renderIndexingOptions(Map.of())).isNull(); } diff --git a/src/test/java/io/stargate/sgv2/jsonapi/service/schema/tables/VectorIndexProfilesTest.java b/src/test/java/io/stargate/sgv2/jsonapi/service/schema/tables/VectorIndexProfilesTest.java index 17c1203331..188b648897 100644 --- a/src/test/java/io/stargate/sgv2/jsonapi/service/schema/tables/VectorIndexProfilesTest.java +++ b/src/test/java/io/stargate/sgv2/jsonapi/service/schema/tables/VectorIndexProfilesTest.java @@ -3,7 +3,6 @@ import static org.assertj.core.api.Assertions.assertThat; import io.stargate.sgv2.jsonapi.config.constants.VectorConstants; -import org.junit.jupiter.api.DisplayName; import org.junit.jupiter.api.Nested; import org.junit.jupiter.api.Test; @@ -12,7 +11,6 @@ class VectorIndexProfilesTest { @Nested class ForName { @Test - @DisplayName("forName returns options for a known profile") void knownProfile() { assertThat(VectorIndexProfiles.forName("small-high-recall")) .isPresent() @@ -21,20 +19,17 @@ void knownProfile() { } @Test - @DisplayName("forName is case-insensitive") void caseInsensitive() { assertThat(VectorIndexProfiles.forName("SMALL-HIGH-RECALL")) .isEqualTo(VectorIndexProfiles.forName("small-high-recall")); } @Test - @DisplayName("forName returns empty for an unknown profile") void unknownProfile() { assertThat(VectorIndexProfiles.forName("does-not-exist")).isEmpty(); } @Test - @DisplayName("forName returns empty for null or blank") void nullOrBlank() { assertThat(VectorIndexProfiles.forName(null)).isEmpty(); assertThat(VectorIndexProfiles.forName(" ")).isEmpty(); @@ -44,7 +39,6 @@ void nullOrBlank() { @Nested class KnownNames { @Test - @DisplayName("knownNames lists the available profiles") void listsProfiles() { assertThat(VectorIndexProfiles.knownNames()).contains("small-high-recall", "big-low-latency"); } @@ -53,7 +47,6 @@ void listsProfiles() { @Nested class ProfileContents { @Test - @DisplayName("profiles never set the reserved (dedicated-field) options") void noReservedOptions() { for (var name : VectorIndexProfiles.knownNames()) { var options = VectorIndexProfiles.forName(name).orElseThrow(); From d47e5b68914aeb05384b521ed1e720955013dfa9 Mon Sep 17 00:00:00 2001 From: Eric Hare Date: Tue, 16 Jun 2026 12:04:09 -0700 Subject: [PATCH 03/13] refactor: rename indexingOptions field to vectorIndexing (#2509) Per #2509 (amorton), rename the new createVectorIndex option from `indexingOptions` to `vectorIndexing` before it ships. The field is still unreleased (added in #2487 / #2505), so this is a clean wire-name change with no backwards-compatibility concern. - VectorConstants.VectorColumn: INDEXING_OPTIONS -> VECTOR_INDEXING ("vectorIndexing"); this single constant drives the JSON key. - Rename the record component JsonNode indexingOptions -> vectorIndexing so the Java field matches the wire name (as metric / sourceModel do). - Update all user-visible text: the three INVALID_VECTOR_INDEXING_OPTIONS messages, the errors.yaml bodies (+ retitle "Vector indexing options are invalid"), and javadoc references. - Update IT request bodies and assertion strings. Internal identifiers that describe behavior rather than the wire field are intentionally unchanged: the error codes (INVALID_VECTOR_INDEXING_OPTIONS, UNKNOWN_VECTOR_INDEXING_PROFILE) and the applyIndexingOptions / renderIndexingOptions helpers. Verified: ./mvnw clean test -Dtest=ApiVectorIndexTest,VectorIndexProfilesTest passes (18/18); fmt:check clean. --- .../indexes/VectorIndexDefinitionDesc.java | 6 ++-- .../config/constants/VectorConstants.java | 6 ++-- .../service/schema/tables/ApiVectorIndex.java | 32 +++++++++---------- .../schema/tables/VectorIndexProfiles.java | 2 +- src/main/resources/errors.yaml | 10 +++--- .../CreateTableIndexIntegrationTest.java | 12 +++---- .../schema/tables/ApiVectorIndexTest.java | 2 +- 7 files changed, 35 insertions(+), 35 deletions(-) diff --git a/src/main/java/io/stargate/sgv2/jsonapi/api/model/command/table/definition/indexes/VectorIndexDefinitionDesc.java b/src/main/java/io/stargate/sgv2/jsonapi/api/model/command/table/definition/indexes/VectorIndexDefinitionDesc.java index 8033e7ec1b..e863058ab8 100644 --- a/src/main/java/io/stargate/sgv2/jsonapi/api/model/command/table/definition/indexes/VectorIndexDefinitionDesc.java +++ b/src/main/java/io/stargate/sgv2/jsonapi/api/model/command/table/definition/indexes/VectorIndexDefinitionDesc.java @@ -36,7 +36,7 @@ public record VectorIndexDefinitionDesc( @JsonPropertyOrder({ VectorConstants.VectorColumn.METRIC, VectorConstants.VectorColumn.SOURCE_MODEL, - VectorConstants.VectorColumn.INDEXING_OPTIONS + VectorConstants.VectorColumn.VECTOR_INDEXING }) public record VectorIndexDescOptions( @Nullable @@ -72,6 +72,6 @@ public record VectorIndexDescOptions( + "(e.g. {\"enable_hierarchy\": true, \"maximum_node_connections\": 32}). The dedicated " + "\"metric\" and \"sourceModel\" fields must not be repeated here.") @JsonInclude(JsonInclude.Include.NON_NULL) - @JsonProperty(VectorConstants.VectorColumn.INDEXING_OPTIONS) - JsonNode indexingOptions) {} + @JsonProperty(VectorConstants.VectorColumn.VECTOR_INDEXING) + JsonNode vectorIndexing) {} } diff --git a/src/main/java/io/stargate/sgv2/jsonapi/config/constants/VectorConstants.java b/src/main/java/io/stargate/sgv2/jsonapi/config/constants/VectorConstants.java index ae01eb570d..525e9c472a 100644 --- a/src/main/java/io/stargate/sgv2/jsonapi/config/constants/VectorConstants.java +++ b/src/main/java/io/stargate/sgv2/jsonapi/config/constants/VectorConstants.java @@ -8,7 +8,7 @@ interface VectorColumn { String METRIC = "metric"; String SOURCE_MODEL = "sourceModel"; String SERVICE = ServiceDescConstants.SERVICE; - String INDEXING_OPTIONS = "indexingOptions"; + String VECTOR_INDEXING = "vectorIndexing"; } interface Vectorize extends ServiceDescConstants {} @@ -17,7 +17,7 @@ interface Vectorize extends ServiceDescConstants {} * Names of the options used in the CQL {@code CREATE CUSTOM INDEX ... WITH OPTIONS = {...}} * clause for a vector (SAI ANN) index. {@link #SOURCE_MODEL} and {@link #SIMILARITY_FUNCTION} * have dedicated API fields ({@code sourceModel} / {@code metric}); the remaining tuning options - * are exposed via the {@code indexingOptions} field (see {@link VectorColumn#INDEXING_OPTIONS}). + * are exposed via the {@code vectorIndexing} field (see {@link VectorColumn#VECTOR_INDEXING}). */ interface CQLAnnIndex { String SOURCE_MODEL = "source_model"; @@ -27,7 +27,7 @@ interface CQLAnnIndex { /** * Options that have dedicated API fields ({@code sourceModel} / {@code metric}) and so must not - * be set again through the raw {@code indexingOptions} object. + * be set again through the raw {@code vectorIndexing} object. */ Set RESERVED_OPTIONS = Set.of(SOURCE_MODEL, SIMILARITY_FUNCTION); } diff --git a/src/main/java/io/stargate/sgv2/jsonapi/service/schema/tables/ApiVectorIndex.java b/src/main/java/io/stargate/sgv2/jsonapi/service/schema/tables/ApiVectorIndex.java index 39f0fbe8a1..e88c0bbd48 100644 --- a/src/main/java/io/stargate/sgv2/jsonapi/service/schema/tables/ApiVectorIndex.java +++ b/src/main/java/io/stargate/sgv2/jsonapi/service/schema/tables/ApiVectorIndex.java @@ -86,7 +86,7 @@ public VectorIndexDefinitionDesc definition() { /** * Renders the additional tuning options from a CQL index options map (everything other than the * structural {@code class_name} / {@code target} and the {@code source_model} / {@code - * similarity_function} options, which have dedicated fields) as an {@code indexingOptions} object + * similarity_function} options, which have dedicated fields) as a {@code vectorIndexing} object * for the public schema description. * *

Profiles are resolved to their concrete options at create time, so the description always @@ -117,26 +117,26 @@ static JsonNode renderIndexingOptions(Map indexOptions) { } /** - * Applies the optional {@code indexingOptions} from the user request into the CQL index options + * Applies the optional {@code vectorIndexing} from the user request into the CQL index options * map. The value is either a String naming a {@link VectorIndexProfiles profile} (expanded into a * set of options) or an Object of raw Cassandra indexing options passed through as-is. The {@code * source_model} / {@code similarity_function} options have dedicated fields and must not be set * this way. * * @param indexOptions the CQL options map being built, mutated in place - * @param indexingOptions the raw node from the request, may be null + * @param vectorIndexing the raw node from the request, may be null */ @VisibleForTesting - static void applyIndexingOptions(Map indexOptions, JsonNode indexingOptions) { + static void applyIndexingOptions(Map indexOptions, JsonNode vectorIndexing) { - if (indexingOptions == null || indexingOptions.isNull()) { + if (vectorIndexing == null || vectorIndexing.isNull()) { // nothing provided, leave the options as they are return; } - if (indexingOptions.isTextual()) { + if (vectorIndexing.isTextual()) { // String -> named profile - var profileName = indexingOptions.textValue(); + var profileName = vectorIndexing.textValue(); var profileOptions = VectorIndexProfiles.forName(profileName) .orElseThrow( @@ -151,9 +151,9 @@ static void applyIndexingOptions(Map indexOptions, JsonNode inde return; } - if (indexingOptions.isObject()) { + if (vectorIndexing.isObject()) { // Object -> raw indexing options, passed through as-is using Cassandra's snake_case names. - indexingOptions + vectorIndexing .fields() .forEachRemaining( entry -> { @@ -162,7 +162,7 @@ static void applyIndexingOptions(Map indexOptions, JsonNode inde throw SchemaException.Code.INVALID_VECTOR_INDEXING_OPTIONS.get( Map.of( "reason", - "The option '%s' must be set using its dedicated field, not indexingOptions." + "The option '%s' must be set using its dedicated field, not vectorIndexing." .formatted(optionName))); } // class_name / target are structural SAI options set automatically by the API; @@ -172,7 +172,7 @@ static void applyIndexingOptions(Map indexOptions, JsonNode inde throw SchemaException.Code.INVALID_VECTOR_INDEXING_OPTIONS.get( Map.of( "reason", - "The option '%s' is set automatically and must not be provided in indexingOptions." + "The option '%s' is set automatically and must not be provided in vectorIndexing." .formatted(optionName))); } // CQL index options are strings: keep a textual value raw, otherwise serialise the @@ -188,8 +188,8 @@ static void applyIndexingOptions(Map indexOptions, JsonNode inde throw SchemaException.Code.INVALID_VECTOR_INDEXING_OPTIONS.get( Map.of( "reason", - "indexingOptions must be a String (profile name) or an Object (raw options), but was: " - + JsonUtil.nodeTypeAsString(indexingOptions))); + "vectorIndexing must be a String (profile name) or an Object (raw options), but was: " + + JsonUtil.nodeTypeAsString(vectorIndexing))); } /** @@ -363,9 +363,9 @@ public ApiVectorIndex create( // Apply the optional additional indexing options (a profile name or raw options); the source // model and metric above have dedicated fields and must not be set this way. - var userIndexingOptions = - (indexDesc.options() == null) ? null : indexDesc.options().indexingOptions(); - applyIndexingOptions(indexOptions, userIndexingOptions); + var userVectorIndexing = + (indexDesc.options() == null) ? null : indexDesc.options().vectorIndexing(); + applyIndexingOptions(indexOptions, userVectorIndexing); return new ApiVectorIndex( indexIdentifier, targetIdentifier, indexOptions, metricToUse, sourceModelToUse); diff --git a/src/main/java/io/stargate/sgv2/jsonapi/service/schema/tables/VectorIndexProfiles.java b/src/main/java/io/stargate/sgv2/jsonapi/service/schema/tables/VectorIndexProfiles.java index a8ce72ee63..cbff58d71f 100644 --- a/src/main/java/io/stargate/sgv2/jsonapi/service/schema/tables/VectorIndexProfiles.java +++ b/src/main/java/io/stargate/sgv2/jsonapi/service/schema/tables/VectorIndexProfiles.java @@ -8,7 +8,7 @@ /** * Registry of named vector-index "profiles": each maps a user-facing profile name to a set of * Cassandra SAI indexing options (the {@code WITH OPTIONS = {...}} entries). Selecting a profile is - * an alternative to passing raw options through the {@code indexingOptions} field on the + * an alternative to passing raw options through the {@code vectorIndexing} field on the * createVectorIndex command. * *

Profiles only set the tuning options; they never set {@code source_model} or {@code diff --git a/src/main/resources/errors.yaml b/src/main/resources/errors.yaml index 0a1c90f2e9..bb1afc18d5 100644 --- a/src/main/resources/errors.yaml +++ b/src/main/resources/errors.yaml @@ -1825,7 +1825,7 @@ request-errors: body: |- The command attempted to create a vector index using an indexing profile that is not known by the API. - A profile is selected by setting `indexingOptions` to a String; the API expands it into a set of indexing options. To pass options directly instead, set `indexingOptions` to an Object of raw Cassandra indexing options. + A profile is selected by setting `vectorIndexing` to a String; the API expands it into a set of indexing options. To pass options directly instead, set `vectorIndexing` to an Object of raw Cassandra indexing options. The known profiles are: ${knownProfiles}. The command attempted to use the profile: ${unknownProfile}. @@ -1834,15 +1834,15 @@ request-errors: - scope: SCHEMA code: INVALID_VECTOR_INDEXING_OPTIONS - title: Vector indexingOptions are invalid + title: Vector indexing options are invalid body: |- - The command attempted to create a vector index with invalid `indexingOptions`. + The command attempted to create a vector index with invalid `vectorIndexing`. - `indexingOptions` must be either a String naming a predefined profile, or an Object of raw Cassandra indexing options. The dedicated `metric` and `sourceModel` fields must not be repeated inside the raw options Object. + `vectorIndexing` must be either a String naming a predefined profile, or an Object of raw Cassandra indexing options. The dedicated `metric` and `sourceModel` fields must not be repeated inside the raw options Object. ${reason} - Resend the command with corrected `indexingOptions`. + Resend the command with corrected `vectorIndexing`. - scope: SCHEMA code: VECTOR_SEARCH_NOT_SUPPORTED diff --git a/src/test/java/io/stargate/sgv2/jsonapi/api/v1/tables/CreateTableIndexIntegrationTest.java b/src/test/java/io/stargate/sgv2/jsonapi/api/v1/tables/CreateTableIndexIntegrationTest.java index 159bbaa1b2..6c03aa3893 100644 --- a/src/test/java/io/stargate/sgv2/jsonapi/api/v1/tables/CreateTableIndexIntegrationTest.java +++ b/src/test/java/io/stargate/sgv2/jsonapi/api/v1/tables/CreateTableIndexIntegrationTest.java @@ -1146,7 +1146,7 @@ public void unknownIndexingProfile() { "definition": { "column": "vector_type_7", "options": { - "indexingOptions": "no-such-profile" + "vectorIndexing": "no-such-profile" } } } @@ -1167,7 +1167,7 @@ public void rawIndexingOptionsWithReservedOption() { "definition": { "column": "vector_type_7", "options": { - "indexingOptions": { + "vectorIndexing": { "similarity_function": "COSINE" } } @@ -1190,7 +1190,7 @@ public void rawIndexingOptionsWithStructuralOption() { "definition": { "column": "vector_type_7", "options": { - "indexingOptions": { + "vectorIndexing": { "class_name": "StorageAttachedIndex" } } @@ -1200,7 +1200,7 @@ public void rawIndexingOptionsWithStructuralOption() { .hasSingleApiError( SchemaException.Code.INVALID_VECTOR_INDEXING_OPTIONS, SchemaException.class, - "The option 'class_name' is set automatically and must not be provided in indexingOptions"); + "The option 'class_name' is set automatically and must not be provided in vectorIndexing"); } @Test @@ -1213,7 +1213,7 @@ public void indexingOptionsNotStringOrObject() { "definition": { "column": "vector_type_7", "options": { - "indexingOptions": [1, 2, 3] + "vectorIndexing": [1, 2, 3] } } } @@ -1221,7 +1221,7 @@ public void indexingOptionsNotStringOrObject() { .hasSingleApiError( SchemaException.Code.INVALID_VECTOR_INDEXING_OPTIONS, SchemaException.class, - "indexingOptions must be a String (profile name) or an Object (raw options)", + "vectorIndexing must be a String (profile name) or an Object (raw options)", "but was: Array"); } diff --git a/src/test/java/io/stargate/sgv2/jsonapi/service/schema/tables/ApiVectorIndexTest.java b/src/test/java/io/stargate/sgv2/jsonapi/service/schema/tables/ApiVectorIndexTest.java index 19c4f5813b..2d33a900c0 100644 --- a/src/test/java/io/stargate/sgv2/jsonapi/service/schema/tables/ApiVectorIndexTest.java +++ b/src/test/java/io/stargate/sgv2/jsonapi/service/schema/tables/ApiVectorIndexTest.java @@ -14,7 +14,7 @@ import org.junit.jupiter.api.Test; /** - * Unit tests for the {@code indexingOptions} handling on {@link ApiVectorIndex}: how the public + * Unit tests for the {@code vectorIndexing} handling on {@link ApiVectorIndex}: how the public * value (profile name or raw options) is turned into the CQL index options map, and how it is * rendered back for the schema description. These are deterministic and do not need a database (the * end-to-end behaviour also depends on the backend allowing custom SAI parameters). From 9c916551fd03a61b486b39cb72f700739aa6d7c2 Mon Sep 17 00:00:00 2001 From: Eric Hare Date: Tue, 16 Jun 2026 16:53:43 -0700 Subject: [PATCH 04/13] refactor: structure vectorIndexing as {profile, options} with restricted SAI options Reshape the createVectorIndex `vectorIndexing` field from a polymorphic String|Object into a structured object `{ profile, options }`: - `profile` expands via VectorIndexProfiles; explicit `options` override it - `options` keys are validated against an allow-list (maximum_node_connections, construction_beam_width, neighborhood_overflow, alpha, enable_hierarchy); reserved metric/sourceModel keys, unknown keys, and non-scalar values are rejected - describeIndexingOptions filters to the allow-list and echoes `{ options }` (echoing the profile name back is a follow-up) Persist the chosen profile name + the options it expanded to in the table extensions (VECTOR_INDEX_PROFILES), clobber-safe across all extension writers. Also: rename renderIndexingOptions -> describeIndexingOptions, trim comments, normalize numeric option values to plain (non-scientific) strings, and expand unit coverage (request deserialization, apply, describe, round-trip). --- .../indexes/VectorIndexDefinitionDesc.java | 45 ++- .../config/constants/SchemaConstants.java | 2 + .../config/constants/VectorConstants.java | 33 +- .../cqldriver/executor/TableExtensions.java | 47 ++- .../VectorIndexProfileDefinition.java | 71 +++++ .../resolver/AlterTableCommandResolver.java | 22 +- .../CreateVectorIndexCommandResolver.java | 109 +++++-- .../service/schema/tables/ApiVectorIndex.java | 153 +++++---- .../schema/tables/VectorIndexProfiles.java | 20 +- src/main/resources/errors.yaml | 6 +- .../CreateTableIndexIntegrationTest.java | 21 +- .../executor/TableExtensionsTest.java | 74 +++++ .../VectorIndexProfileDefinitionTest.java | 96 ++++++ .../schema/tables/ApiVectorIndexTest.java | 294 +++++++++++++----- 14 files changed, 756 insertions(+), 237 deletions(-) create mode 100644 src/main/java/io/stargate/sgv2/jsonapi/service/cqldriver/executor/VectorIndexProfileDefinition.java create mode 100644 src/test/java/io/stargate/sgv2/jsonapi/service/cqldriver/executor/TableExtensionsTest.java create mode 100644 src/test/java/io/stargate/sgv2/jsonapi/service/cqldriver/executor/VectorIndexProfileDefinitionTest.java diff --git a/src/main/java/io/stargate/sgv2/jsonapi/api/model/command/table/definition/indexes/VectorIndexDefinitionDesc.java b/src/main/java/io/stargate/sgv2/jsonapi/api/model/command/table/definition/indexes/VectorIndexDefinitionDesc.java index e863058ab8..cbaed7cb56 100644 --- a/src/main/java/io/stargate/sgv2/jsonapi/api/model/command/table/definition/indexes/VectorIndexDefinitionDesc.java +++ b/src/main/java/io/stargate/sgv2/jsonapi/api/model/command/table/definition/indexes/VectorIndexDefinitionDesc.java @@ -3,7 +3,6 @@ import com.fasterxml.jackson.annotation.JsonInclude; import com.fasterxml.jackson.annotation.JsonProperty; import com.fasterxml.jackson.annotation.JsonPropertyOrder; -import com.fasterxml.jackson.databind.JsonNode; import io.stargate.sgv2.jsonapi.config.constants.TableDescConstants; import io.stargate.sgv2.jsonapi.config.constants.VectorConstants; import io.stargate.sgv2.jsonapi.config.constants.VectorIndexDescDefaults; @@ -12,6 +11,7 @@ import jakarta.annotation.Nullable; import jakarta.validation.constraints.NotNull; import jakarta.validation.constraints.Pattern; +import java.util.Map; import org.eclipse.microprofile.openapi.annotations.enums.SchemaType; import org.eclipse.microprofile.openapi.annotations.media.Schema; @@ -66,12 +66,43 @@ public record VectorIndexDescOptions( @Nullable @Schema( description = - "Optional additional vector (SAI) indexing configuration: either a String naming a " - + "predefined profile (e.g. \"small-high-recall\") that the API expands into a set of " - + "options, or an Object of raw Cassandra indexing options passed through as-is " - + "(e.g. {\"enable_hierarchy\": true, \"maximum_node_connections\": 32}). The dedicated " - + "\"metric\" and \"sourceModel\" fields must not be repeated here.") + "Optional vector (SAI) indexing configuration: an object with an optional " + + "\"profile\" (a predefined name the API expands into options, e.g. " + + "\"small-high-recall\") and an optional \"options\" object of Cassandra SAI " + + "tuning options (e.g. {\"maximum_node_connections\": 32, \"alpha\": 1.2}). " + + "Explicit options override the profile. Set \"metric\" / \"sourceModel\" via " + + "their dedicated fields, not here.", + type = SchemaType.OBJECT) @JsonInclude(JsonInclude.Include.NON_NULL) @JsonProperty(VectorConstants.VectorColumn.VECTOR_INDEXING) - JsonNode vectorIndexing) {} + VectorIndexingDesc vectorIndexing) {} + + /** + * The {@code vectorIndexing} value: an optional profile name plus optional SAI tuning options. + */ + @JsonPropertyOrder({ + VectorConstants.VectorIndexing.PROFILE, + VectorConstants.VectorIndexing.OPTIONS + }) + public record VectorIndexingDesc( + @Nullable + @Schema( + description = + "Optional predefined indexing profile name; the API expands it into SAI options.", + type = SchemaType.STRING) + @JsonInclude(JsonInclude.Include.NON_NULL) + @JsonProperty(VectorConstants.VectorIndexing.PROFILE) + String profile, + // + @Nullable + @Schema( + description = + "Optional Cassandra SAI tuning options (snake_case), restricted to: " + + "maximum_node_connections, construction_beam_width, neighborhood_overflow, " + + "alpha, enable_hierarchy. Values may be string, number, or boolean on input " + + "and are returned as strings in index descriptions.", + type = SchemaType.OBJECT) + @JsonInclude(JsonInclude.Include.NON_NULL) + @JsonProperty(VectorConstants.VectorIndexing.OPTIONS) + Map options) {} } diff --git a/src/main/java/io/stargate/sgv2/jsonapi/config/constants/SchemaConstants.java b/src/main/java/io/stargate/sgv2/jsonapi/config/constants/SchemaConstants.java index 739a2c619e..3430f7e9cd 100644 --- a/src/main/java/io/stargate/sgv2/jsonapi/config/constants/SchemaConstants.java +++ b/src/main/java/io/stargate/sgv2/jsonapi/config/constants/SchemaConstants.java @@ -11,6 +11,8 @@ interface MetadataFieldsNames { String SCHEMA_TYPE = "com.datastax.data-api.schema-type"; String SCHEMA_VERSION = "com.datastax.data-api.schema-def-version"; String VECTORIZE_CONFIG = "com.datastax.data-api.vectorize-config"; + // Per vector-index record of the profile it was created with (name + expanded options). + String VECTOR_INDEX_PROFILES = "com.datastax.data-api.vector-index-profiles"; } interface MetadataFieldsValues { diff --git a/src/main/java/io/stargate/sgv2/jsonapi/config/constants/VectorConstants.java b/src/main/java/io/stargate/sgv2/jsonapi/config/constants/VectorConstants.java index 525e9c472a..b0c24b9225 100644 --- a/src/main/java/io/stargate/sgv2/jsonapi/config/constants/VectorConstants.java +++ b/src/main/java/io/stargate/sgv2/jsonapi/config/constants/VectorConstants.java @@ -11,24 +11,45 @@ interface VectorColumn { String VECTOR_INDEXING = "vectorIndexing"; } + /** Field names inside the {@code vectorIndexing} object. */ + interface VectorIndexing { + String PROFILE = "profile"; + String OPTIONS = "options"; + } + interface Vectorize extends ServiceDescConstants {} /** - * Names of the options used in the CQL {@code CREATE CUSTOM INDEX ... WITH OPTIONS = {...}} - * clause for a vector (SAI ANN) index. {@link #SOURCE_MODEL} and {@link #SIMILARITY_FUNCTION} - * have dedicated API fields ({@code sourceModel} / {@code metric}); the remaining tuning options - * are exposed via the {@code vectorIndexing} field (see {@link VectorColumn#VECTOR_INDEXING}). + * CQL {@code WITH OPTIONS} keys for a vector (SAI) index. {@link #SOURCE_MODEL} and {@link + * #SIMILARITY_FUNCTION} have dedicated API fields ({@code sourceModel} / {@code metric}); the + * rest are tuning options set via {@code vectorIndexing.options}. */ interface CQLAnnIndex { String SOURCE_MODEL = "source_model"; String SIMILARITY_FUNCTION = "similarity_function"; String MAXIMUM_NODE_CONNECTIONS = "maximum_node_connections"; String CONSTRUCTION_BEAM_WIDTH = "construction_beam_width"; + String NEIGHBORHOOD_OVERFLOW = "neighborhood_overflow"; + String ALPHA = "alpha"; + String ENABLE_HIERARCHY = "enable_hierarchy"; /** - * Options that have dedicated API fields ({@code sourceModel} / {@code metric}) and so must not - * be set again through the raw {@code vectorIndexing} object. + * Options with dedicated API fields ({@code metric} / {@code sourceModel}); rejected inside + * {@code vectorIndexing.options}. */ Set RESERVED_OPTIONS = Set.of(SOURCE_MODEL, SIMILARITY_FUNCTION); + + /** + * The SAI tuning options a user may set through {@code vectorIndexing.options}. Excludes the + * dedicated-field options and the structural ones. {@code optimize_for} exists in OSS Cassandra + * but is de-emphasised in DSE 6.9 / HCD, so it is intentionally left out for now. + */ + Set ALLOWED_OPTIONS = + Set.of( + MAXIMUM_NODE_CONNECTIONS, + CONSTRUCTION_BEAM_WIDTH, + NEIGHBORHOOD_OVERFLOW, + ALPHA, + ENABLE_HIERARCHY); } } diff --git a/src/main/java/io/stargate/sgv2/jsonapi/service/cqldriver/executor/TableExtensions.java b/src/main/java/io/stargate/sgv2/jsonapi/service/cqldriver/executor/TableExtensions.java index c1095108db..ca8166dfcf 100644 --- a/src/main/java/io/stargate/sgv2/jsonapi/service/cqldriver/executor/TableExtensions.java +++ b/src/main/java/io/stargate/sgv2/jsonapi/service/cqldriver/executor/TableExtensions.java @@ -63,13 +63,25 @@ private static Map uncheckedExtensions(TableMetadata tableMe return (Map) tableMetadata.getOptions().get(TABLE_OPTIONS_EXTENSION_KEY); } + /** As {@link #createCustomProperties(Map, Map, ObjectMapper)} with no vector index profiles. */ + public static Map createCustomProperties( + Map vectorDefs, ObjectMapper objectMapper) { + return createCustomProperties(vectorDefs, Map.of(), objectMapper); + } + /** - * Create custom properties for table metadata, This needs to add schema and table always since - * the command may be altering CQL created tables + * Builds the table extensions payload: schema type/version (always written, since the command may + * be altering a CQL-created table) plus the vectorize config and vector index profiles. + * + *

Extensions are fully replaced on every write, so callers must pass the complete set of defs + * and profiles they want to keep; anything omitted is dropped. */ public static Map createCustomProperties( - Map vectorDefs, ObjectMapper objectMapper) { + Map vectorDefs, + Map indexProfiles, + ObjectMapper objectMapper) { Objects.requireNonNull(vectorDefs, "vectorDefs must not be null"); + Objects.requireNonNull(indexProfiles, "indexProfiles must not be null"); Objects.requireNonNull(objectMapper, "objectMapper must not be null"); Map customProperties = new HashMap<>(); @@ -81,9 +93,7 @@ public static Map createCustomProperties( SchemaConstants.MetadataFieldsNames.SCHEMA_VERSION, SchemaConstants.MetadataFieldsValues.SCHEMA_VERSION_VERSION); - // because the extensions are always fully replaced, we do not need to write the key if there - // are none - // the full map will be replaced, replacing any existing extensions + // Only write a key when it has content (the map is fully replaced anyway). if (!vectorDefs.isEmpty()) { // convert to strings for serialisation Map stringKeysDefs = @@ -92,15 +102,24 @@ public static Map createCustomProperties( Collectors.toMap( entry -> cqlIdentifierToJsonKey(entry.getKey()), Map.Entry::getValue)); - try { - customProperties.put( - SchemaConstants.MetadataFieldsNames.VECTORIZE_CONFIG, - objectMapper.writeValueAsString(stringKeysDefs)); - } catch (JsonProcessingException e) { - // this should never happen - throw new RuntimeException(e); - } + customProperties.put( + SchemaConstants.MetadataFieldsNames.VECTORIZE_CONFIG, + writeJson(stringKeysDefs, objectMapper)); + } + if (!indexProfiles.isEmpty()) { + customProperties.put( + SchemaConstants.MetadataFieldsNames.VECTOR_INDEX_PROFILES, + writeJson(indexProfiles, objectMapper)); } return customProperties; } + + private static String writeJson(Object value, ObjectMapper objectMapper) { + try { + return objectMapper.writeValueAsString(value); + } catch (JsonProcessingException e) { + // this should never happen + throw new RuntimeException(e); + } + } } diff --git a/src/main/java/io/stargate/sgv2/jsonapi/service/cqldriver/executor/VectorIndexProfileDefinition.java b/src/main/java/io/stargate/sgv2/jsonapi/service/cqldriver/executor/VectorIndexProfileDefinition.java new file mode 100644 index 0000000000..b2d67e903f --- /dev/null +++ b/src/main/java/io/stargate/sgv2/jsonapi/service/cqldriver/executor/VectorIndexProfileDefinition.java @@ -0,0 +1,71 @@ +package io.stargate.sgv2.jsonapi.service.cqldriver.executor; + +import com.datastax.oss.driver.api.core.metadata.schema.TableMetadata; +import com.fasterxml.jackson.core.JsonProcessingException; +import com.fasterxml.jackson.databind.JsonNode; +import com.fasterxml.jackson.databind.ObjectMapper; +import io.stargate.sgv2.jsonapi.config.constants.SchemaConstants; +import java.util.HashMap; +import java.util.Map; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * The profile a vector index was created with: the profile name plus the SAI options it expanded + * to. Stored per index name in the table extensions (key {@link + * SchemaConstants.MetadataFieldsNames#VECTOR_INDEX_PROFILES}) so the friendly name is not lost once + * the options are expanded at create time. The options snapshot keeps the record meaningful even if + * the profile definition changes later. + */ +public record VectorIndexProfileDefinition(String profile, Map options) { + + private static final Logger LOGGER = LoggerFactory.getLogger(VectorIndexProfileDefinition.class); + + /** Reads the stored profiles, keyed by index name, from the table extensions. */ + public static Map from( + TableMetadata tableMetadata, ObjectMapper objectMapper) { + var extensions = TableExtensions.getExtensions(tableMetadata); + return fromJson( + extensions.get(SchemaConstants.MetadataFieldsNames.VECTOR_INDEX_PROFILES), objectMapper); + } + + /** + * Parses the {@code index name -> profile} JSON written into the extensions. Returns a mutable + * map so callers can merge changes before writing it back. Profiles are advisory metadata, so a + * bad blob is logged and skipped rather than failing the read. + */ + static Map fromJson( + String json, ObjectMapper objectMapper) { + Map defs = new HashMap<>(); + if (json == null || json.isBlank()) { + return defs; + } + try { + JsonNode byIndex = objectMapper.readTree(json); + for (Map.Entry entry : byIndex.properties()) { + defs.put( + entry.getKey(), + objectMapper.treeToValue(entry.getValue(), VectorIndexProfileDefinition.class)); + } + } catch (JsonProcessingException | IllegalArgumentException e) { + LOGGER.error("Error parsing vector index profiles, json: {}", json, e); + defs.clear(); + } + return defs; + } + + /** + * Records the profile for {@code indexKey} in {@code profiles}, or removes any stale entry when + * {@code def} is null (no profile was used). Returns true if the map changed, so the caller can + * skip an unnecessary extension write. + */ + public static boolean putOrRemove( + Map profiles, + String indexKey, + VectorIndexProfileDefinition def) { + if (def == null) { + return profiles.remove(indexKey) != null; + } + return !def.equals(profiles.put(indexKey, def)); + } +} diff --git a/src/main/java/io/stargate/sgv2/jsonapi/service/resolver/AlterTableCommandResolver.java b/src/main/java/io/stargate/sgv2/jsonapi/service/resolver/AlterTableCommandResolver.java index 28ea3128f4..e52e6a6cfb 100644 --- a/src/main/java/io/stargate/sgv2/jsonapi/service/resolver/AlterTableCommandResolver.java +++ b/src/main/java/io/stargate/sgv2/jsonapi/service/resolver/AlterTableCommandResolver.java @@ -14,6 +14,7 @@ import io.stargate.sgv2.jsonapi.exception.SchemaException; import io.stargate.sgv2.jsonapi.service.cqldriver.executor.DefaultDriverExceptionHandler; import io.stargate.sgv2.jsonapi.service.cqldriver.executor.TableExtensions; +import io.stargate.sgv2.jsonapi.service.cqldriver.executor.VectorIndexProfileDefinition; import io.stargate.sgv2.jsonapi.service.cqldriver.executor.VectorizeDefinition; import io.stargate.sgv2.jsonapi.service.operation.Operation; import io.stargate.sgv2.jsonapi.service.operation.SchemaDBTask; @@ -86,6 +87,15 @@ public Operation resolveTableCommand( taskGroup, SchemaDBTaskPage.accumulator(AlterTableDBTask.class, commandContext)); } + /** + * Existing vector-index profiles for the table. Altering columns/vectorize fully replaces the + * extensions, so these must be carried through or they would be wiped. + */ + private Map existingIndexProfiles( + TableSchemaObject tableSchemaObject) { + return VectorIndexProfileDefinition.from(tableSchemaObject.tableMetadata(), objectMapper); + } + private List handleAddColumns( AlterTableDBTaskBuilder taskBuilder, TableSchemaObject tableSchemaObject, @@ -160,7 +170,8 @@ private List handleAddColumns( // New custom property to be updated var customProperties = - TableExtensions.createCustomProperties(existingVectorizeDef, objectMapper); + TableExtensions.createCustomProperties( + existingVectorizeDef, existingIndexProfiles(tableSchemaObject), objectMapper); // First execute the extension update for add columns // so if we fail to add this we do not end up with a column that has missing vectorize // definition @@ -273,7 +284,8 @@ private List handleDropColumns( if (updateVectorize) { attempts.add( taskBuilder.buildUpdateExtensions( - TableExtensions.createCustomProperties(existingVectorizeDefs, objectMapper))); + TableExtensions.createCustomProperties( + existingVectorizeDefs, existingIndexProfiles(tableSchemaObject), objectMapper))); } return attempts; } @@ -357,7 +369,8 @@ private List handleAddVectorize( return List.of( taskBuilder.buildUpdateExtensions( - TableExtensions.createCustomProperties(existingVectorizeDefs, objectMapper))); + TableExtensions.createCustomProperties( + existingVectorizeDefs, existingIndexProfiles(tableSchemaObject), objectMapper))); } private List handleDropVectorize( @@ -434,7 +447,8 @@ private List handleDropVectorize( return List.of( taskBuilder.buildUpdateExtensions( - TableExtensions.createCustomProperties(existingVectorizeDefs, objectMapper))); + TableExtensions.createCustomProperties( + existingVectorizeDefs, existingIndexProfiles(tableSchemaObject), objectMapper))); } @Override diff --git a/src/main/java/io/stargate/sgv2/jsonapi/service/resolver/CreateVectorIndexCommandResolver.java b/src/main/java/io/stargate/sgv2/jsonapi/service/resolver/CreateVectorIndexCommandResolver.java index 87f24724b8..5d3b8c6564 100644 --- a/src/main/java/io/stargate/sgv2/jsonapi/service/resolver/CreateVectorIndexCommandResolver.java +++ b/src/main/java/io/stargate/sgv2/jsonapi/service/resolver/CreateVectorIndexCommandResolver.java @@ -2,14 +2,21 @@ import static io.stargate.sgv2.jsonapi.exception.ErrorFormatters.errFmtJoin; import static io.stargate.sgv2.jsonapi.util.ApiOptionUtils.getOrDefault; +import static io.stargate.sgv2.jsonapi.util.CqlIdentifierUtil.cqlIdentifierToJsonKey; +import com.datastax.oss.driver.api.core.CqlIdentifier; +import com.fasterxml.jackson.databind.ObjectMapper; import io.stargate.sgv2.jsonapi.api.model.command.CommandContext; import io.stargate.sgv2.jsonapi.api.model.command.impl.CreateVectorIndexCommand; import io.stargate.sgv2.jsonapi.config.OperationsConfig; import io.stargate.sgv2.jsonapi.config.constants.TableDescDefaults; import io.stargate.sgv2.jsonapi.exception.SchemaException; import io.stargate.sgv2.jsonapi.service.cqldriver.executor.DefaultDriverExceptionHandler; +import io.stargate.sgv2.jsonapi.service.cqldriver.executor.TableExtensions; +import io.stargate.sgv2.jsonapi.service.cqldriver.executor.VectorIndexProfileDefinition; import io.stargate.sgv2.jsonapi.service.operation.*; +import io.stargate.sgv2.jsonapi.service.operation.tables.AlterTableDBTask; +import io.stargate.sgv2.jsonapi.service.operation.tables.AlterTableExceptionHandler; import io.stargate.sgv2.jsonapi.service.operation.tables.CreateIndexDBTask; import io.stargate.sgv2.jsonapi.service.operation.tables.CreateIndexDBTaskBuilder; import io.stargate.sgv2.jsonapi.service.operation.tables.CreateIndexExceptionHandler; @@ -19,7 +26,9 @@ import io.stargate.sgv2.jsonapi.service.schema.tables.ApiIndexType; import io.stargate.sgv2.jsonapi.service.schema.tables.ApiVectorIndex; import io.stargate.sgv2.jsonapi.service.schema.tables.TableSchemaObject; +import io.stargate.sgv2.jsonapi.service.schema.tables.VectorIndexProfiles; import jakarta.enterprise.context.ApplicationScoped; +import jakarta.inject.Inject; import java.time.Duration; import java.util.Map; @@ -27,6 +36,8 @@ @ApplicationScoped public class CreateVectorIndexCommandResolver implements CommandResolver { + @Inject ObjectMapper objectMapper; + @Override public Class getCommandClass() { return CreateVectorIndexCommand.class; @@ -62,39 +73,99 @@ public Operation resolveTableCommand( command.indexType())); } + var schemaObject = commandContext.schemaObject(); + // TODO: we need a centralised way of creating retry attempt. + var schemaRetryPolicy = + new SchemaDBTask.SchemaRetryPolicy( + commandContext.config().get(OperationsConfig.class).databaseConfig().ddlRetries(), + Duration.ofMillis( + commandContext + .config() + .get(OperationsConfig.class) + .databaseConfig() + .ddlRetryDelayMillis())); + CreateIndexDBTaskBuilder taskBuilder = - CreateIndexDBTask.builder(commandContext.schemaObject()) + CreateIndexDBTask.builder(schemaObject) .withIfNotExists( getOrDefault( command.options(), CreateVectorIndexCommand.CreateVectorIndexCommandOptions::ifNotExists, TableDescDefaults.CreateVectorIndexOptionsDefaults.IF_NOT_EXISTS)) - .withSchemaRetryPolicy( - new SchemaDBTask.SchemaRetryPolicy( - commandContext - .config() - .get(OperationsConfig.class) - .databaseConfig() - .ddlRetries(), - Duration.ofMillis( - commandContext - .config() - .get(OperationsConfig.class) - .databaseConfig() - .ddlRetryDelayMillis()))); + .withSchemaRetryPolicy(schemaRetryPolicy); // this will throw APIException if the index is not supported var apiIndex = - ApiVectorIndex.FROM_DESC_FACTORY.create( - commandContext.schemaObject(), indexName, command.definition()); + ApiVectorIndex.FROM_DESC_FACTORY.create(schemaObject, indexName, command.definition()); taskBuilder.withExceptionHandlerFactory( DefaultDriverExceptionHandler.Factory.withIdentifier( CreateIndexExceptionHandler::new, apiIndex.indexName())); - var taskGroup = new TaskGroup<>(taskBuilder.build(apiIndex)); + var createIndexTask = taskBuilder.build(apiIndex); + + // If a named profile was used, record the name + the options it expanded to in the table + // extensions so the friendly name survives. Written as a second DDL after the index so a failed + // create leaves no orphan record. (With ifNotExists on an existing index the create is a no-op + // but we still write the latest requested profile.) + var extensionTask = + buildProfileExtensionTask(schemaObject, apiIndex.indexName(), command, schemaRetryPolicy); + if (extensionTask == null) { + return new TaskOperation<>( + new TaskGroup<>(createIndexTask), + SchemaDBTaskPage.accumulator(CreateIndexDBTask.class, commandContext)); + } + + // sequential so the extension write only runs if the index was created + TaskGroup, TableSchemaObject> taskGroup = new TaskGroup<>(true); + taskGroup.add(createIndexTask); + taskGroup.add(extensionTask); + + @SuppressWarnings("unchecked") + Class> taskClass = + (Class>) (Class) SchemaDBTask.class; + return new TaskOperation<>(taskGroup, SchemaDBTaskPage.accumulator(taskClass, commandContext)); + } + + /** + * Builds the ALTER TABLE task that records this index's profile in the table extensions, or null + * when nothing needs to change (no profile used and no stale entry for the index name to clear). + * Existing vectorize config and other profiles are read back and rewritten so they are not lost. + */ + private AlterTableDBTask buildProfileExtensionTask( + TableSchemaObject schemaObject, + CqlIdentifier indexIdentifier, + CreateVectorIndexCommand command, + SchemaDBTask.SchemaRetryPolicy schemaRetryPolicy) { + + var options = command.definition().options(); + var vectorIndexing = (options == null) ? null : options.vectorIndexing(); + // Only a named profile is recorded; bare options carry no name to store. + var profileName = (vectorIndexing == null) ? null : vectorIndexing.profile(); + + var indexKey = cqlIdentifierToJsonKey(indexIdentifier); + var profiles = VectorIndexProfileDefinition.from(schemaObject.tableMetadata(), objectMapper); + + VectorIndexProfileDefinition def = null; + if (profileName != null) { + // forName was already validated by the index factory above, so it is present here. + var profileOptions = VectorIndexProfiles.forName(profileName).orElseThrow(); + def = new VectorIndexProfileDefinition(profileName, profileOptions); + } + + if (!VectorIndexProfileDefinition.putOrRemove(profiles, indexKey, def)) { + return null; + } + + var customProperties = + TableExtensions.createCustomProperties( + schemaObject.apiTableDef().allColumns().getVectorizeDefs(), profiles, objectMapper); - return new TaskOperation<>( - taskGroup, SchemaDBTaskPage.accumulator(CreateIndexDBTask.class, commandContext)); + return AlterTableDBTask.builder(schemaObject) + .withRetryPolicy(schemaRetryPolicy) + .withExceptionHandlerFactory( + DefaultDriverExceptionHandler.Factory.withIdentifier( + AlterTableExceptionHandler::new, schemaObject.tableName())) + .buildUpdateExtensions(customProperties); } } diff --git a/src/main/java/io/stargate/sgv2/jsonapi/service/schema/tables/ApiVectorIndex.java b/src/main/java/io/stargate/sgv2/jsonapi/service/schema/tables/ApiVectorIndex.java index e88c0bbd48..67b1a125cf 100644 --- a/src/main/java/io/stargate/sgv2/jsonapi/service/schema/tables/ApiVectorIndex.java +++ b/src/main/java/io/stargate/sgv2/jsonapi/service/schema/tables/ApiVectorIndex.java @@ -6,9 +6,6 @@ import com.datastax.oss.driver.api.core.CqlIdentifier; import com.datastax.oss.driver.api.core.metadata.schema.IndexMetadata; -import com.fasterxml.jackson.databind.JsonNode; -import com.fasterxml.jackson.databind.node.JsonNodeFactory; -import com.fasterxml.jackson.databind.node.ObjectNode; import com.google.common.annotations.VisibleForTesting; import io.stargate.sgv2.jsonapi.api.model.command.table.IndexDesc; import io.stargate.sgv2.jsonapi.api.model.command.table.SchemaDescSource; @@ -21,7 +18,7 @@ import io.stargate.sgv2.jsonapi.service.schema.SimilarityFunction; import io.stargate.sgv2.jsonapi.service.schema.tables.factories.IndexFactoryFromCql; import io.stargate.sgv2.jsonapi.service.schema.tables.factories.IndexFactoryFromIndexDesc; -import io.stargate.sgv2.jsonapi.util.JsonUtil; +import java.math.BigDecimal; import java.util.*; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -61,7 +58,7 @@ public IndexDesc getSchemaDescription( new VectorIndexDefinitionDesc.VectorIndexDescOptions( similarityFunction.apiName(), sourceModel.apiName(), - renderIndexingOptions(indexOptions)); + describeIndexingOptions(indexOptions)); var definition = new VectorIndexDefinitionDesc(cqlIdentifierToJsonKey(targetColumn), definitionOptions); @@ -84,59 +81,55 @@ public VectorIndexDefinitionDesc definition() { } /** - * Renders the additional tuning options from a CQL index options map (everything other than the - * structural {@code class_name} / {@code target} and the {@code source_model} / {@code - * similarity_function} options, which have dedicated fields) as a {@code vectorIndexing} object - * for the public schema description. - * - *

Profiles are resolved to their concrete options at create time, so the description always - * shows the resolved raw options. Values are rendered as Strings because CQL index options are a - * {@code Map}. + * Builds the {@code vectorIndexing} description from the CQL index options map: the supported + * tuning options (see {@link VectorConstants.CQLAnnIndex#ALLOWED_OPTIONS}) under {@code options}. + * Structural, dedicated-field, and any other (e.g. CQL-only) keys are omitted so the description + * stays symmetric with what the API accepts. The profile name is not reconstructed here (it lives + * in the table extensions), so only {@code options} is set. * * @param indexOptions the CQL index options map - * @return the options object, or null when there are none (so the field is omitted) + * @return the {@code vectorIndexing} description, or null when there are no supported tuning + * options */ @VisibleForTesting - static JsonNode renderIndexingOptions(Map indexOptions) { - ObjectNode node = null; + static VectorIndexDefinitionDesc.VectorIndexingDesc describeIndexingOptions( + Map indexOptions) { + Map options = null; for (var entry : indexOptions.entrySet()) { - var optionName = entry.getKey(); - // Skip the structural SAI options (added by the driver / CQL builder) and the options that - // have dedicated API fields (source_model / similarity_function). - if (CQLSAIIndex.Options.CLASS_NAME.equals(optionName) - || CQLSAIIndex.Options.TARGET.equals(optionName) - || VectorConstants.CQLAnnIndex.RESERVED_OPTIONS.contains(optionName)) { + // Only surface options the API also accepts, so a description round-trips into a request. + if (!VectorConstants.CQLAnnIndex.ALLOWED_OPTIONS.contains(entry.getKey())) { continue; } - if (node == null) { - node = JsonNodeFactory.instance.objectNode(); + if (options == null) { + options = new LinkedHashMap<>(); } - node.put(optionName, entry.getValue()); + options.put(entry.getKey(), entry.getValue()); } - return node; + return options == null ? null : new VectorIndexDefinitionDesc.VectorIndexingDesc(null, options); } /** - * Applies the optional {@code vectorIndexing} from the user request into the CQL index options - * map. The value is either a String naming a {@link VectorIndexProfiles profile} (expanded into a - * set of options) or an Object of raw Cassandra indexing options passed through as-is. The {@code - * source_model} / {@code similarity_function} options have dedicated fields and must not be set - * this way. + * Applies the request's {@code vectorIndexing} into the CQL index options map. An optional {@code + * profile} is expanded via {@link VectorIndexProfiles}; an optional {@code options} object adds + * Cassandra SAI tuning options (validated against {@link + * VectorConstants.CQLAnnIndex#ALLOWED_OPTIONS}) on top, overriding the profile. {@code + * source_model} / {@code similarity_function} have dedicated fields and are rejected here. * * @param indexOptions the CQL options map being built, mutated in place - * @param vectorIndexing the raw node from the request, may be null + * @param vectorIndexing the structured request value, may be null */ @VisibleForTesting - static void applyIndexingOptions(Map indexOptions, JsonNode vectorIndexing) { + static void applyIndexingOptions( + Map indexOptions, + VectorIndexDefinitionDesc.VectorIndexingDesc vectorIndexing) { - if (vectorIndexing == null || vectorIndexing.isNull()) { - // nothing provided, leave the options as they are + if (vectorIndexing == null) { return; } - if (vectorIndexing.isTextual()) { - // String -> named profile - var profileName = vectorIndexing.textValue(); + // A profile expands to a base set of options. + var profileName = vectorIndexing.profile(); + if (profileName != null) { var profileOptions = VectorIndexProfiles.forName(profileName) .orElseThrow( @@ -148,48 +141,52 @@ static void applyIndexingOptions(Map indexOptions, JsonNode vect "unknownProfile", profileName))); indexOptions.putAll(profileOptions); - return; } - if (vectorIndexing.isObject()) { - // Object -> raw indexing options, passed through as-is using Cassandra's snake_case names. - vectorIndexing - .fields() - .forEachRemaining( - entry -> { - var optionName = entry.getKey(); - if (VectorConstants.CQLAnnIndex.RESERVED_OPTIONS.contains(optionName)) { - throw SchemaException.Code.INVALID_VECTOR_INDEXING_OPTIONS.get( - Map.of( - "reason", - "The option '%s' must be set using its dedicated field, not vectorIndexing." - .formatted(optionName))); - } - // class_name / target are structural SAI options set automatically by the API; - // reject them here, mirroring how renderIndexingOptions filters them out. - if (CQLSAIIndex.Options.CLASS_NAME.equals(optionName) - || CQLSAIIndex.Options.TARGET.equals(optionName)) { - throw SchemaException.Code.INVALID_VECTOR_INDEXING_OPTIONS.get( - Map.of( - "reason", - "The option '%s' is set automatically and must not be provided in vectorIndexing." - .formatted(optionName))); - } - // CQL index options are strings: keep a textual value raw, otherwise serialise the - // JSON value (e.g. number 32 -> "32", boolean true -> "true"). - var value = entry.getValue(); - indexOptions.put( - optionName, value.isTextual() ? value.textValue() : value.toString()); - }); - return; + // Explicit options are validated against the allow-list and override the profile. + var options = vectorIndexing.options(); + if (options != null) { + for (var entry : options.entrySet()) { + var optionName = entry.getKey(); + if (VectorConstants.CQLAnnIndex.RESERVED_OPTIONS.contains(optionName)) { + var dedicatedField = + VectorConstants.CQLAnnIndex.SOURCE_MODEL.equals(optionName) + ? VectorConstants.VectorColumn.SOURCE_MODEL + : VectorConstants.VectorColumn.METRIC; + throw SchemaException.Code.INVALID_VECTOR_INDEXING_OPTIONS.get( + Map.of( + "reason", + "The option '%s' must be set using its dedicated field '%s', not vectorIndexing.options." + .formatted(optionName, dedicatedField))); + } + if (!VectorConstants.CQLAnnIndex.ALLOWED_OPTIONS.contains(optionName)) { + throw SchemaException.Code.INVALID_VECTOR_INDEXING_OPTIONS.get( + Map.of( + "reason", + "Unsupported vector indexing option '%s'. Supported options: %s." + .formatted( + optionName, errFmtJoin(VectorConstants.CQLAnnIndex.ALLOWED_OPTIONS)))); + } + indexOptions.put(optionName, optionValueToString(optionName, entry.getValue())); + } } + } - // neither String nor Object -> not supported - throw SchemaException.Code.INVALID_VECTOR_INDEXING_OPTIONS.get( - Map.of( - "reason", - "vectorIndexing must be a String (profile name) or an Object (raw options), but was: " - + JsonUtil.nodeTypeAsString(vectorIndexing))); + /** CQL index options are strings; accept scalar JSON values and reject objects, arrays, null. */ + private static String optionValueToString(String optionName, Object value) { + if (value == null || value instanceof Map || value instanceof Iterable) { + throw SchemaException.Code.INVALID_VECTOR_INDEXING_OPTIONS.get( + Map.of( + "reason", + "The option '%s' must be a scalar value (string, number, or boolean)." + .formatted(optionName))); + } + // JSON numbers deserialize to BigDecimal; use plain (non-scientific) notation for the CQL + // value. + if (value instanceof BigDecimal number) { + return number.toPlainString(); + } + return String.valueOf(value); } /** @@ -361,8 +358,8 @@ public ApiVectorIndex create( metricToUse); } - // Apply the optional additional indexing options (a profile name or raw options); the source - // model and metric above have dedicated fields and must not be set this way. + // Apply vectorIndexing (optional profile and/or explicit options; options override the + // profile); metric / sourceModel above have dedicated fields. var userVectorIndexing = (indexDesc.options() == null) ? null : indexDesc.options().vectorIndexing(); applyIndexingOptions(indexOptions, userVectorIndexing); diff --git a/src/main/java/io/stargate/sgv2/jsonapi/service/schema/tables/VectorIndexProfiles.java b/src/main/java/io/stargate/sgv2/jsonapi/service/schema/tables/VectorIndexProfiles.java index cbff58d71f..0a51ef54bf 100644 --- a/src/main/java/io/stargate/sgv2/jsonapi/service/schema/tables/VectorIndexProfiles.java +++ b/src/main/java/io/stargate/sgv2/jsonapi/service/schema/tables/VectorIndexProfiles.java @@ -6,17 +6,14 @@ import java.util.Set; /** - * Registry of named vector-index "profiles": each maps a user-facing profile name to a set of - * Cassandra SAI indexing options (the {@code WITH OPTIONS = {...}} entries). Selecting a profile is - * an alternative to passing raw options through the {@code vectorIndexing} field on the - * createVectorIndex command. + * Named vector-index profiles: each maps a profile name to a set of Cassandra SAI indexing options. + * Selecting a profile is an alternative to passing raw options through {@code vectorIndexing}. * - *

Profiles only set the tuning options; they never set {@code source_model} or {@code - * similarity_function}, which have dedicated API fields ({@code sourceModel} / {@code metric}). - * Values are stored as Strings because CQL index options are a {@code Map}. + *

Profiles never set {@code source_model} or {@code similarity_function}; those have the + * dedicated {@code sourceModel} / {@code metric} fields. Values are Strings because CQL index + * options are a {@code Map}. * - *

NOTE: the concrete mappings below are an initial in-code starter set; the values are expected - * to be tuned and eventually externalised to configuration. + *

This is an initial in-code set; the values are expected to be tuned and moved to config. */ public final class VectorIndexProfiles { @@ -35,8 +32,7 @@ private VectorIndexProfiles() {} /** * Looks up a profile by name, case-insensitively. * - * @param name the profile name from the user request, may be null or blank - * @return the CQL options for the profile, or empty if the name is null, blank, or not known + * @return the profile's CQL options, or empty if {@code name} is null, blank, or unknown */ public static Optional> forName(String name) { if (name == null || name.isBlank()) { @@ -45,7 +41,7 @@ public static Optional> forName(String name) { return Optional.ofNullable(PROFILES.get(name.toLowerCase())); } - /** Names of all known profiles, for use in error messages. */ + /** Names of all known profiles, for error messages. */ public static Set knownNames() { return PROFILES.keySet(); } diff --git a/src/main/resources/errors.yaml b/src/main/resources/errors.yaml index bb1afc18d5..5371f91824 100644 --- a/src/main/resources/errors.yaml +++ b/src/main/resources/errors.yaml @@ -1825,12 +1825,12 @@ request-errors: body: |- The command attempted to create a vector index using an indexing profile that is not known by the API. - A profile is selected by setting `vectorIndexing` to a String; the API expands it into a set of indexing options. To pass options directly instead, set `vectorIndexing` to an Object of raw Cassandra indexing options. + A profile is selected with `vectorIndexing.profile`; the API expands it into a set of indexing options. To set options directly instead, use `vectorIndexing.options`. The known profiles are: ${knownProfiles}. The command attempted to use the profile: ${unknownProfile}. - Resend the command using a known profile, or use a raw indexing options Object. + Resend the command using a known profile, or set `vectorIndexing.options` directly. - scope: SCHEMA code: INVALID_VECTOR_INDEXING_OPTIONS @@ -1838,7 +1838,7 @@ request-errors: body: |- The command attempted to create a vector index with invalid `vectorIndexing`. - `vectorIndexing` must be either a String naming a predefined profile, or an Object of raw Cassandra indexing options. The dedicated `metric` and `sourceModel` fields must not be repeated inside the raw options Object. + `vectorIndexing` is an object with an optional `profile` (a predefined name) and an optional `options` object of Cassandra SAI tuning options. Only the supported tuning options may be set, and the dedicated `metric` / `sourceModel` fields must be used instead of setting them in `options`. ${reason} diff --git a/src/test/java/io/stargate/sgv2/jsonapi/api/v1/tables/CreateTableIndexIntegrationTest.java b/src/test/java/io/stargate/sgv2/jsonapi/api/v1/tables/CreateTableIndexIntegrationTest.java index 6c03aa3893..3c2fef5506 100644 --- a/src/test/java/io/stargate/sgv2/jsonapi/api/v1/tables/CreateTableIndexIntegrationTest.java +++ b/src/test/java/io/stargate/sgv2/jsonapi/api/v1/tables/CreateTableIndexIntegrationTest.java @@ -1146,7 +1146,7 @@ public void unknownIndexingProfile() { "definition": { "column": "vector_type_7", "options": { - "vectorIndexing": "no-such-profile" + "vectorIndexing": { "profile": "no-such-profile" } } } } @@ -1158,7 +1158,7 @@ public void unknownIndexingProfile() { } @Test - public void rawIndexingOptionsWithReservedOption() { + public void reservedOptionRejected() { assertTableCommand(keyspaceName, vectorTableName) .postCreateVectorIndex( """ @@ -1168,7 +1168,7 @@ public void rawIndexingOptionsWithReservedOption() { "column": "vector_type_7", "options": { "vectorIndexing": { - "similarity_function": "COSINE" + "options": { "similarity_function": "COSINE" } } } } @@ -1181,7 +1181,7 @@ public void rawIndexingOptionsWithReservedOption() { } @Test - public void rawIndexingOptionsWithStructuralOption() { + public void unsupportedOptionRejected() { assertTableCommand(keyspaceName, vectorTableName) .postCreateVectorIndex( """ @@ -1191,7 +1191,7 @@ public void rawIndexingOptionsWithStructuralOption() { "column": "vector_type_7", "options": { "vectorIndexing": { - "class_name": "StorageAttachedIndex" + "options": { "class_name": "StorageAttachedIndex" } } } } @@ -1200,11 +1200,11 @@ public void rawIndexingOptionsWithStructuralOption() { .hasSingleApiError( SchemaException.Code.INVALID_VECTOR_INDEXING_OPTIONS, SchemaException.class, - "The option 'class_name' is set automatically and must not be provided in vectorIndexing"); + "Unsupported vector indexing option 'class_name'"); } @Test - public void indexingOptionsNotStringOrObject() { + public void nonScalarOptionValueRejected() { assertTableCommand(keyspaceName, vectorTableName) .postCreateVectorIndex( """ @@ -1213,7 +1213,9 @@ public void indexingOptionsNotStringOrObject() { "definition": { "column": "vector_type_7", "options": { - "vectorIndexing": [1, 2, 3] + "vectorIndexing": { + "options": { "alpha": [1, 2, 3] } + } } } } @@ -1221,8 +1223,7 @@ public void indexingOptionsNotStringOrObject() { .hasSingleApiError( SchemaException.Code.INVALID_VECTOR_INDEXING_OPTIONS, SchemaException.class, - "vectorIndexing must be a String (profile name) or an Object (raw options)", - "but was: Array"); + "The option 'alpha' must be a scalar value"); } @Test diff --git a/src/test/java/io/stargate/sgv2/jsonapi/service/cqldriver/executor/TableExtensionsTest.java b/src/test/java/io/stargate/sgv2/jsonapi/service/cqldriver/executor/TableExtensionsTest.java new file mode 100644 index 0000000000..29a476b5d8 --- /dev/null +++ b/src/test/java/io/stargate/sgv2/jsonapi/service/cqldriver/executor/TableExtensionsTest.java @@ -0,0 +1,74 @@ +package io.stargate.sgv2.jsonapi.service.cqldriver.executor; + +import static org.assertj.core.api.Assertions.assertThat; + +import com.datastax.oss.driver.api.core.CqlIdentifier; +import com.fasterxml.jackson.databind.ObjectMapper; +import io.stargate.sgv2.jsonapi.config.constants.SchemaConstants; +import java.util.Map; +import org.junit.jupiter.api.Test; + +class TableExtensionsTest { + + private static final ObjectMapper MAPPER = new ObjectMapper(); + + @Test + void schemaTypeAndVersionAlwaysPresent() { + var props = TableExtensions.createCustomProperties(Map.of(), Map.of(), MAPPER); + + assertThat(props) + .containsEntry( + SchemaConstants.MetadataFieldsNames.SCHEMA_TYPE, + SchemaConstants.MetadataFieldsValues.SCHEMA_TYPE_TABLE_VALUE) + .containsEntry( + SchemaConstants.MetadataFieldsNames.SCHEMA_VERSION, + SchemaConstants.MetadataFieldsValues.SCHEMA_VERSION_VERSION) + .doesNotContainKey(SchemaConstants.MetadataFieldsNames.VECTORIZE_CONFIG) + .doesNotContainKey(SchemaConstants.MetadataFieldsNames.VECTOR_INDEX_PROFILES); + } + + @Test + void writesIndexProfilesWhenPresent() { + var profiles = + Map.of( + "my_idx", + new VectorIndexProfileDefinition( + "small-high-recall", Map.of("maximum_node_connections", "32"))); + + var props = TableExtensions.createCustomProperties(Map.of(), profiles, MAPPER); + + assertThat(props).containsKey(SchemaConstants.MetadataFieldsNames.VECTOR_INDEX_PROFILES); + // the written value round-trips back to the same profiles + assertThat( + VectorIndexProfileDefinition.fromJson( + props.get(SchemaConstants.MetadataFieldsNames.VECTOR_INDEX_PROFILES), MAPPER)) + .isEqualTo(profiles); + } + + @Test + void preservesVectorizeAndProfilesTogether() { + // both keys written in one payload, so an extension rewrite carrying both does not lose either + var vectorDefs = + Map.of( + CqlIdentifier.fromInternal("v"), + new VectorizeDefinition("openai", "text-embedding-3-small", null, null)); + var profiles = + Map.of( + "v_idx", + new VectorIndexProfileDefinition( + "big-low-latency", Map.of("maximum_node_connections", "16"))); + + var props = TableExtensions.createCustomProperties(vectorDefs, profiles, MAPPER); + + assertThat(props) + .containsKey(SchemaConstants.MetadataFieldsNames.VECTORIZE_CONFIG) + .containsKey(SchemaConstants.MetadataFieldsNames.VECTOR_INDEX_PROFILES); + } + + @Test + void twoArgOverloadOmitsProfiles() { + var props = TableExtensions.createCustomProperties(Map.of(), MAPPER); + + assertThat(props).doesNotContainKey(SchemaConstants.MetadataFieldsNames.VECTOR_INDEX_PROFILES); + } +} diff --git a/src/test/java/io/stargate/sgv2/jsonapi/service/cqldriver/executor/VectorIndexProfileDefinitionTest.java b/src/test/java/io/stargate/sgv2/jsonapi/service/cqldriver/executor/VectorIndexProfileDefinitionTest.java new file mode 100644 index 0000000000..6a94ea1607 --- /dev/null +++ b/src/test/java/io/stargate/sgv2/jsonapi/service/cqldriver/executor/VectorIndexProfileDefinitionTest.java @@ -0,0 +1,96 @@ +package io.stargate.sgv2.jsonapi.service.cqldriver.executor; + +import static org.assertj.core.api.Assertions.assertThat; + +import com.fasterxml.jackson.databind.ObjectMapper; +import java.util.HashMap; +import java.util.Map; +import org.junit.jupiter.api.Nested; +import org.junit.jupiter.api.Test; + +class VectorIndexProfileDefinitionTest { + + private static final ObjectMapper MAPPER = new ObjectMapper(); + + @Nested + class FromJson { + + @Test + void nullOrBlankIsEmpty() { + assertThat(VectorIndexProfileDefinition.fromJson(null, MAPPER)).isEmpty(); + assertThat(VectorIndexProfileDefinition.fromJson(" ", MAPPER)).isEmpty(); + } + + @Test + void parsesNameAndOptions() { + var json = + "{\"my_idx\":{\"profile\":\"small-high-recall\"," + + "\"options\":{\"maximum_node_connections\":\"32\"}}}"; + + var defs = VectorIndexProfileDefinition.fromJson(json, MAPPER); + + assertThat(defs).containsOnlyKeys("my_idx"); + assertThat(defs.get("my_idx").profile()).isEqualTo("small-high-recall"); + assertThat(defs.get("my_idx").options()).containsEntry("maximum_node_connections", "32"); + } + + @Test + void malformedJsonIsEmpty() { + // advisory metadata: a bad blob must not fail the read + assertThat(VectorIndexProfileDefinition.fromJson("not json", MAPPER)).isEmpty(); + } + + @Test + void roundTripThroughObjectMapper() throws Exception { + Map original = new HashMap<>(); + original.put( + "idx", + new VectorIndexProfileDefinition( + "big-low-latency", Map.of("maximum_node_connections", "16"))); + + var json = MAPPER.writeValueAsString(original); + + assertThat(VectorIndexProfileDefinition.fromJson(json, MAPPER)).isEqualTo(original); + } + } + + @Nested + class PutOrRemove { + + @Test + void putNewReturnsChanged() { + var profiles = new HashMap(); + var def = new VectorIndexProfileDefinition("p", Map.of("a", "1")); + + assertThat(VectorIndexProfileDefinition.putOrRemove(profiles, "idx", def)).isTrue(); + assertThat(profiles).containsEntry("idx", def); + } + + @Test + void putIdenticalReturnsUnchanged() { + var profiles = new HashMap(); + profiles.put("idx", new VectorIndexProfileDefinition("p", Map.of("a", "1"))); + + assertThat( + VectorIndexProfileDefinition.putOrRemove( + profiles, "idx", new VectorIndexProfileDefinition("p", Map.of("a", "1")))) + .isFalse(); + } + + @Test + void removeExistingReturnsChanged() { + var profiles = new HashMap(); + profiles.put("idx", new VectorIndexProfileDefinition("p", Map.of())); + + assertThat(VectorIndexProfileDefinition.putOrRemove(profiles, "idx", null)).isTrue(); + assertThat(profiles).doesNotContainKey("idx"); + } + + @Test + void removeMissingReturnsUnchanged() { + var profiles = new HashMap(); + + assertThat(VectorIndexProfileDefinition.putOrRemove(profiles, "idx", null)).isFalse(); + } + } +} diff --git a/src/test/java/io/stargate/sgv2/jsonapi/service/schema/tables/ApiVectorIndexTest.java b/src/test/java/io/stargate/sgv2/jsonapi/service/schema/tables/ApiVectorIndexTest.java index 2d33a900c0..c969377b42 100644 --- a/src/test/java/io/stargate/sgv2/jsonapi/service/schema/tables/ApiVectorIndexTest.java +++ b/src/test/java/io/stargate/sgv2/jsonapi/service/schema/tables/ApiVectorIndexTest.java @@ -2,53 +2,106 @@ import static org.assertj.core.api.Assertions.assertThat; import static org.assertj.core.api.Assertions.assertThatThrownBy; +import static org.assertj.core.api.Assertions.entry; -import com.fasterxml.jackson.databind.JsonNode; import com.fasterxml.jackson.databind.ObjectMapper; -import com.fasterxml.jackson.databind.node.JsonNodeFactory; +import io.stargate.sgv2.jsonapi.api.model.command.table.definition.indexes.VectorIndexDefinitionDesc.VectorIndexDescOptions; +import io.stargate.sgv2.jsonapi.api.model.command.table.definition.indexes.VectorIndexDefinitionDesc.VectorIndexingDesc; import io.stargate.sgv2.jsonapi.config.constants.VectorConstants; import io.stargate.sgv2.jsonapi.exception.SchemaException; +import java.math.BigDecimal; import java.util.HashMap; +import java.util.List; import java.util.Map; +import java.util.stream.Stream; import org.junit.jupiter.api.Nested; import org.junit.jupiter.api.Test; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.MethodSource; +import org.junit.jupiter.params.provider.ValueSource; /** - * Unit tests for the {@code vectorIndexing} handling on {@link ApiVectorIndex}: how the public - * value (profile name or raw options) is turned into the CQL index options map, and how it is - * rendered back for the schema description. These are deterministic and do not need a database (the - * end-to-end behaviour also depends on the backend allowing custom SAI parameters). + * Unit tests for the structured {@code vectorIndexing} ({@code {profile, options}}) handling on + * {@link ApiVectorIndex}: that a request body deserializes to the expected object, how it is + * validated and turned into the CQL index options map, and how it is described back. Deterministic; + * needs no database (end-to-end also depends on the backend allowing custom SAI parameters). */ class ApiVectorIndexTest { private static final ObjectMapper MAPPER = new ObjectMapper(); - private static JsonNode json(String raw) { - try { - return MAPPER.readTree(raw); - } catch (Exception e) { - throw new RuntimeException(e); + private static VectorIndexingDesc vi(String profile, Map options) { + return new VectorIndexingDesc(profile, options); + } + + /** Source of every option that has a dedicated field and so is rejected inside options. */ + static Stream reservedOptions() { + return VectorConstants.CQLAnnIndex.RESERVED_OPTIONS.stream(); + } + + /** The request body deserializes into the expected {@code vectorIndexing} object. */ + @Nested + class RequestShape { + + @Test + void deserializesProfileAndOptions() throws Exception { + var opts = + MAPPER.readValue( + """ + { + "vectorIndexing": { + "profile": "small-high-recall", + "options": { "maximum_node_connections": 32, "enable_hierarchy": true } + } + } + """, + VectorIndexDescOptions.class); + + assertThat(opts.vectorIndexing()).isNotNull(); + assertThat(opts.vectorIndexing().profile()).isEqualTo("small-high-recall"); + assertThat(opts.vectorIndexing().options()) + .containsEntry("maximum_node_connections", 32) + .containsEntry("enable_hierarchy", true); + } + + @Test + void deserializesProfileOnly() throws Exception { + var opts = + MAPPER.readValue( + "{\"vectorIndexing\": {\"profile\": \"big-low-latency\"}}", + VectorIndexDescOptions.class); + + assertThat(opts.vectorIndexing().profile()).isEqualTo("big-low-latency"); + assertThat(opts.vectorIndexing().options()).isNull(); + } + + @Test + void absentVectorIndexingIsNull() throws Exception { + var opts = MAPPER.readValue("{\"metric\": \"cosine\"}", VectorIndexDescOptions.class); + + assertThat(opts.vectorIndexing()).isNull(); } } + /** A {@code vectorIndexing} object resolves to the expected CQL index options map. */ @Nested class ApplyIndexingOptions { @Test - void nullValueIsNoOp() { + void nullIsNoOp() { var options = new HashMap(); ApiVectorIndex.applyIndexingOptions(options, null); - ApiVectorIndex.applyIndexingOptions(options, JsonNodeFactory.instance.nullNode()); assertThat(options).isEmpty(); } @Test - void emptyObjectIsNoOp() { + void emptyDescIsNoOp() { var options = new HashMap(); - ApiVectorIndex.applyIndexingOptions(options, json("{}")); + ApiVectorIndex.applyIndexingOptions(options, vi(null, null)); + ApiVectorIndex.applyIndexingOptions(options, vi(null, Map.of())); assertThat(options).isEmpty(); } @@ -57,55 +110,48 @@ void emptyObjectIsNoOp() { void profileExpands() { var options = new HashMap(); - ApiVectorIndex.applyIndexingOptions( - options, JsonNodeFactory.instance.textNode("small-high-recall")); + ApiVectorIndex.applyIndexingOptions(options, vi("small-high-recall", null)); assertThat(options) .containsAllEntriesOf(VectorIndexProfiles.forName("small-high-recall").orElseThrow()); } @Test - void unknownProfileThrows() { + void optionsApplied() { var options = new HashMap(); - assertThatThrownBy( - () -> - ApiVectorIndex.applyIndexingOptions( - options, JsonNodeFactory.instance.textNode("no-such-profile"))) - .isInstanceOf(SchemaException.class) - .satisfies( - t -> - assertThat(((SchemaException) t).code) - .isEqualTo(SchemaException.Code.UNKNOWN_VECTOR_INDEXING_PROFILE.name())); + ApiVectorIndex.applyIndexingOptions( + options, + vi( + null, + Map.of("maximum_node_connections", 32, "enable_hierarchy", true, "alpha", "1.2"))); + + assertThat(options) + .containsEntry("maximum_node_connections", "32") + .containsEntry("enable_hierarchy", "true") + .containsEntry("alpha", "1.2"); } @Test - void rawOptionsPassThrough() { + void optionsOverrideProfile() { var options = new HashMap(); + // small-high-recall sets maximum_node_connections=32, construction_beam_width=200 ApiVectorIndex.applyIndexingOptions( - options, - json( - """ - { - "maximum_node_connections": 32, - "enable_hierarchy": true, - "alpha": "1.2" - } - """)); + options, vi("small-high-recall", Map.of("maximum_node_connections", 99))); assertThat(options) - .containsEntry("maximum_node_connections", "32") - .containsEntry("enable_hierarchy", "true") - .containsEntry("alpha", "1.2"); + .containsEntry("maximum_node_connections", "99") // explicit option wins + .containsEntry("construction_beam_width", "200"); // inherited from the profile } @Test - void rawOptionsMergeWithExisting() { + void mergesWithExistingOptions() { var options = new HashMap(); options.put(VectorConstants.CQLAnnIndex.SOURCE_MODEL, "OTHER"); - ApiVectorIndex.applyIndexingOptions(options, json("{\"maximum_node_connections\": 16}")); + ApiVectorIndex.applyIndexingOptions( + options, vi(null, Map.of("maximum_node_connections", 16))); assertThat(options) .containsEntry(VectorConstants.CQLAnnIndex.SOURCE_MODEL, "OTHER") @@ -113,56 +159,96 @@ void rawOptionsMergeWithExisting() { } @Test - void reservedOptionThrows() { + void allAllowedOptionsAccepted() { var options = new HashMap(); - assertThatThrownBy( - () -> - ApiVectorIndex.applyIndexingOptions( - options, json("{\"similarity_function\": \"COSINE\"}"))) - .isInstanceOf(SchemaException.class) - .satisfies( - t -> - assertThat(((SchemaException) t).code) - .isEqualTo(SchemaException.Code.INVALID_VECTOR_INDEXING_OPTIONS.name())); + ApiVectorIndex.applyIndexingOptions( + options, + vi( + null, + Map.of( + "maximum_node_connections", + 16, + "construction_beam_width", + 100, + "neighborhood_overflow", + 1.2, + "alpha", + 1.2, + "enable_hierarchy", + true))); + + assertThat(options.keySet()) + .containsExactlyInAnyOrderElementsOf(VectorConstants.CQLAnnIndex.ALLOWED_OPTIONS); } @Test - void wrongTypeThrows() { - var options = new HashMap(); + void unknownProfileThrows() { + assertSchemaError( + vi("no-such-profile", null), SchemaException.Code.UNKNOWN_VECTOR_INDEXING_PROFILE); + } - assertThatThrownBy(() -> ApiVectorIndex.applyIndexingOptions(options, json("[1, 2, 3]"))) - .isInstanceOf(SchemaException.class) - .satisfies( - t -> - assertThat(((SchemaException) t).code) - .isEqualTo(SchemaException.Code.INVALID_VECTOR_INDEXING_OPTIONS.name())); + @Test + void blankProfileThrows() { + assertSchemaError(vi("", null), SchemaException.Code.UNKNOWN_VECTOR_INDEXING_PROFILE); + } + + @ParameterizedTest + @MethodSource( + "io.stargate.sgv2.jsonapi.service.schema.tables.ApiVectorIndexTest#reservedOptions") + void reservedOptionThrows(String reservedOption) { + assertSchemaError( + vi(null, Map.of(reservedOption, "x")), + SchemaException.Code.INVALID_VECTOR_INDEXING_OPTIONS); + } + + @ParameterizedTest + @ValueSource(strings = {"class_name", "target", "optimize_for", "bogus_option"}) + void unsupportedOptionThrows(String optionName) { + assertSchemaError( + vi(null, Map.of(optionName, "x")), SchemaException.Code.INVALID_VECTOR_INDEXING_OPTIONS); } @Test - void structuralOptionThrows() { + void numericOptionsUsePlainString() { var options = new HashMap(); - assertThatThrownBy( - () -> ApiVectorIndex.applyIndexingOptions(options, json("{\"class_name\": \"x\"}"))) - .isInstanceOf(SchemaException.class) - .satisfies( - t -> - assertThat(((SchemaException) t).code) - .isEqualTo(SchemaException.Code.INVALID_VECTOR_INDEXING_OPTIONS.name())); + // JSON numbers arrive as BigDecimal; the CQL value must not use scientific notation. + ApiVectorIndex.applyIndexingOptions( + options, + vi( + null, + Map.of( + "construction_beam_width", new BigDecimal("1E+2"), + "alpha", new BigDecimal("1.5")))); + + assertThat(options) + .containsEntry("construction_beam_width", "100") + .containsEntry("alpha", "1.5"); + } - assertThatThrownBy( - () -> ApiVectorIndex.applyIndexingOptions(options, json("{\"target\": \"y\"}"))) + @Test + void nonScalarOptionValueThrows() { + // "alpha" is an allowed key, so this reaches the scalar-value check + assertSchemaError( + vi(null, Map.of("alpha", List.of(1, 2))), + SchemaException.Code.INVALID_VECTOR_INDEXING_OPTIONS); + assertSchemaError( + vi(null, Map.of("alpha", Map.of("x", 1))), + SchemaException.Code.INVALID_VECTOR_INDEXING_OPTIONS); + } + + private void assertSchemaError(VectorIndexingDesc desc, SchemaException.Code code) { + var options = new HashMap(); + assertThatThrownBy(() -> ApiVectorIndex.applyIndexingOptions(options, desc)) .isInstanceOf(SchemaException.class) - .satisfies( - t -> - assertThat(((SchemaException) t).code) - .isEqualTo(SchemaException.Code.INVALID_VECTOR_INDEXING_OPTIONS.name())); + .satisfies(t -> assertThat(((SchemaException) t).code).isEqualTo(code.name())); } } + /** The CQL index options map describes back to the expected {@code vectorIndexing} object. */ @Nested - class RenderIndexingOptions { + class DescribeIndexingOptions { @Test void nullWhenNoTuningOptions() { @@ -172,28 +258,68 @@ void nullWhenNoTuningOptions() { options.put(VectorConstants.CQLAnnIndex.SOURCE_MODEL, "OTHER"); options.put(VectorConstants.CQLAnnIndex.SIMILARITY_FUNCTION, "COSINE"); - assertThat(ApiVectorIndex.renderIndexingOptions(options)).isNull(); + assertThat(ApiVectorIndex.describeIndexingOptions(options)).isNull(); } @Test - void rendersOnlyTuningOptions() { + void describesTuningOptionsUnderOptions() { var options = new HashMap(); options.put(CQLSAIIndex.Options.CLASS_NAME, CQLSAIIndex.SAI_CLASS_NAME); options.put(CQLSAIIndex.Options.TARGET, "my_vector"); options.put(VectorConstants.CQLAnnIndex.SOURCE_MODEL, "OPENAI_V3_SMALL"); + options.put(VectorConstants.CQLAnnIndex.SIMILARITY_FUNCTION, "COSINE"); options.put(VectorConstants.CQLAnnIndex.MAXIMUM_NODE_CONNECTIONS, "32"); + options.put(VectorConstants.CQLAnnIndex.CONSTRUCTION_BEAM_WIDTH, "200"); + + var described = ApiVectorIndex.describeIndexingOptions(options); + + assertThat(described).isNotNull(); + assertThat(described.profile()).isNull(); + assertThat(described.options()) + .containsOnly( + entry(VectorConstants.CQLAnnIndex.MAXIMUM_NODE_CONNECTIONS, "32"), + entry(VectorConstants.CQLAnnIndex.CONSTRUCTION_BEAM_WIDTH, "200")); + } + + @Test + void omitsNonAllowlistedKeys() { + var options = new HashMap(); + options.put(CQLSAIIndex.Options.CLASS_NAME, CQLSAIIndex.SAI_CLASS_NAME); + options.put(VectorConstants.CQLAnnIndex.SOURCE_MODEL, "OTHER"); + options.put(VectorConstants.CQLAnnIndex.MAXIMUM_NODE_CONNECTIONS, "16"); + // a real SAI option the API does not manage (e.g. set directly via CQL); not surfaced + options.put("optimize_for", "recall"); - var rendered = ApiVectorIndex.renderIndexingOptions(options); + var described = ApiVectorIndex.describeIndexingOptions(options); - assertThat(rendered).isNotNull(); - assertThat(rendered.size()).isEqualTo(1); - assertThat(rendered.get(VectorConstants.CQLAnnIndex.MAXIMUM_NODE_CONNECTIONS).asText()) - .isEqualTo("32"); + assertThat(described).isNotNull(); + assertThat(described.options()) + .containsOnlyKeys(VectorConstants.CQLAnnIndex.MAXIMUM_NODE_CONNECTIONS); } @Test - void emptyMapRendersNull() { - assertThat(ApiVectorIndex.renderIndexingOptions(Map.of())).isNull(); + void emptyMapDescribesNull() { + assertThat(ApiVectorIndex.describeIndexingOptions(Map.of())).isNull(); + } + } + + /** Applying options then describing them round-trips the tuning options. */ + @Nested + class RoundTrip { + + @Test + void applyThenDescribe() { + var options = new HashMap(); + + ApiVectorIndex.applyIndexingOptions( + options, vi(null, Map.of("maximum_node_connections", 32, "alpha", 1.2))); + var described = ApiVectorIndex.describeIndexingOptions(options); + + assertThat(described).isNotNull(); + assertThat(described.options()) + .containsOnly( + entry(VectorConstants.CQLAnnIndex.MAXIMUM_NODE_CONNECTIONS, "32"), + entry(VectorConstants.CQLAnnIndex.ALPHA, "1.2")); } } } From e0b98b56acdd3a87531f06670cd4f75b0350af02 Mon Sep 17 00:00:00 2001 From: Eric Hare Date: Tue, 16 Jun 2026 17:14:51 -0700 Subject: [PATCH 05/13] fix: persist effective vector-index options and skip metadata rewrite on no-op create Address review findings on the vectorIndexing profile persistence: - Store the options actually applied to the index (profile expansion plus explicit overrides) in VECTOR_INDEX_PROFILES, not the base profile, so the snapshot matches the live index. Adds ApiVectorIndex.appliedTuningOptions() backed by a shared tuningOptions() allow-list filter also used by describeIndexingOptions. - Skip the extension write when the index already exists, so a CREATE ... IF NOT EXISTS no-op no longer rewrites or removes a live index's stored profile. Removing the profile entry on dropIndex is tracked as a follow-up (the drop path is keyspace-scoped and needs a separate cleanup task). --- .../CreateVectorIndexCommandResolver.java | 39 +++++++++++-------- .../service/schema/tables/ApiVectorIndex.java | 31 ++++++++++----- .../schema/tables/ApiVectorIndexTest.java | 30 ++++++++++++++ 3 files changed, 75 insertions(+), 25 deletions(-) diff --git a/src/main/java/io/stargate/sgv2/jsonapi/service/resolver/CreateVectorIndexCommandResolver.java b/src/main/java/io/stargate/sgv2/jsonapi/service/resolver/CreateVectorIndexCommandResolver.java index 5d3b8c6564..ac08eeba8a 100644 --- a/src/main/java/io/stargate/sgv2/jsonapi/service/resolver/CreateVectorIndexCommandResolver.java +++ b/src/main/java/io/stargate/sgv2/jsonapi/service/resolver/CreateVectorIndexCommandResolver.java @@ -4,7 +4,6 @@ import static io.stargate.sgv2.jsonapi.util.ApiOptionUtils.getOrDefault; import static io.stargate.sgv2.jsonapi.util.CqlIdentifierUtil.cqlIdentifierToJsonKey; -import com.datastax.oss.driver.api.core.CqlIdentifier; import com.fasterxml.jackson.databind.ObjectMapper; import io.stargate.sgv2.jsonapi.api.model.command.CommandContext; import io.stargate.sgv2.jsonapi.api.model.command.impl.CreateVectorIndexCommand; @@ -26,7 +25,6 @@ import io.stargate.sgv2.jsonapi.service.schema.tables.ApiIndexType; import io.stargate.sgv2.jsonapi.service.schema.tables.ApiVectorIndex; import io.stargate.sgv2.jsonapi.service.schema.tables.TableSchemaObject; -import io.stargate.sgv2.jsonapi.service.schema.tables.VectorIndexProfiles; import jakarta.enterprise.context.ApplicationScoped; import jakarta.inject.Inject; import java.time.Duration; @@ -104,12 +102,12 @@ public Operation resolveTableCommand( var createIndexTask = taskBuilder.build(apiIndex); - // If a named profile was used, record the name + the options it expanded to in the table + // If a named profile was used, record the name + the options it resolved to in the table // extensions so the friendly name survives. Written as a second DDL after the index so a failed - // create leaves no orphan record. (With ifNotExists on an existing index the create is a no-op - // but we still write the latest requested profile.) + // create leaves no orphan record. Returns null (no extension write) when there is nothing to + // persist or the index already exists (the CREATE IF NOT EXISTS would be a no-op). var extensionTask = - buildProfileExtensionTask(schemaObject, apiIndex.indexName(), command, schemaRetryPolicy); + buildProfileExtensionTask(schemaObject, apiIndex, command, schemaRetryPolicy); if (extensionTask == null) { return new TaskOperation<>( new TaskGroup<>(createIndexTask), @@ -129,29 +127,38 @@ public Operation resolveTableCommand( /** * Builds the ALTER TABLE task that records this index's profile in the table extensions, or null - * when nothing needs to change (no profile used and no stale entry for the index name to clear). - * Existing vectorize config and other profiles are read back and rewritten so they are not lost. + * when nothing needs to change. Returns null when the index already exists (a {@code CREATE ... + * IF NOT EXISTS} would be a no-op, so its stored profile must not be rewritten), or when no + * profile is used and there is no stale entry to clear. The snapshot stores the options actually + * applied to the index (profile expansion plus any explicit overrides); existing vectorize config + * and other profiles are read back and rewritten so they are not lost. */ private AlterTableDBTask buildProfileExtensionTask( TableSchemaObject schemaObject, - CqlIdentifier indexIdentifier, + ApiVectorIndex apiIndex, CreateVectorIndexCommand command, SchemaDBTask.SchemaRetryPolicy schemaRetryPolicy) { + // The create is "IF NOT EXISTS": if the index already exists the create is a no-op, so leave + // its stored profile untouched (it must keep matching the live index options). + if (schemaObject.tableMetadata().getIndexes().containsKey(apiIndex.indexName())) { + return null; + } + var options = command.definition().options(); var vectorIndexing = (options == null) ? null : options.vectorIndexing(); // Only a named profile is recorded; bare options carry no name to store. var profileName = (vectorIndexing == null) ? null : vectorIndexing.profile(); - var indexKey = cqlIdentifierToJsonKey(indexIdentifier); + var indexKey = cqlIdentifierToJsonKey(apiIndex.indexName()); var profiles = VectorIndexProfileDefinition.from(schemaObject.tableMetadata(), objectMapper); - VectorIndexProfileDefinition def = null; - if (profileName != null) { - // forName was already validated by the index factory above, so it is present here. - var profileOptions = VectorIndexProfiles.forName(profileName).orElseThrow(); - def = new VectorIndexProfileDefinition(profileName, profileOptions); - } + // Snapshot the options actually applied to the index (profile expansion plus explicit + // overrides), so the stored metadata matches the live index rather than the base profile. + var def = + (profileName == null) + ? null + : new VectorIndexProfileDefinition(profileName, apiIndex.appliedTuningOptions()); if (!VectorIndexProfileDefinition.putOrRemove(profiles, indexKey, def)) { return null; diff --git a/src/main/java/io/stargate/sgv2/jsonapi/service/schema/tables/ApiVectorIndex.java b/src/main/java/io/stargate/sgv2/jsonapi/service/schema/tables/ApiVectorIndex.java index 67b1a125cf..3c6e28c7bd 100644 --- a/src/main/java/io/stargate/sgv2/jsonapi/service/schema/tables/ApiVectorIndex.java +++ b/src/main/java/io/stargate/sgv2/jsonapi/service/schema/tables/ApiVectorIndex.java @@ -94,18 +94,31 @@ public VectorIndexDefinitionDesc definition() { @VisibleForTesting static VectorIndexDefinitionDesc.VectorIndexingDesc describeIndexingOptions( Map indexOptions) { - Map options = null; + var tuning = tuningOptions(indexOptions); + return tuning.isEmpty() + ? null + : new VectorIndexDefinitionDesc.VectorIndexingDesc(null, new LinkedHashMap<>(tuning)); + } + + /** + * The supported SAI tuning options actually applied to this index (profile expansion plus any + * explicit overrides). Used to snapshot the resolved options next to a stored profile name, so + * the snapshot matches the live index rather than the base profile. + */ + public Map appliedTuningOptions() { + return tuningOptions(indexOptions); + } + + /** Keeps only the {@link VectorConstants.CQLAnnIndex#ALLOWED_OPTIONS} from a CQL options map. */ + @VisibleForTesting + static Map tuningOptions(Map indexOptions) { + Map tuning = new LinkedHashMap<>(); for (var entry : indexOptions.entrySet()) { - // Only surface options the API also accepts, so a description round-trips into a request. - if (!VectorConstants.CQLAnnIndex.ALLOWED_OPTIONS.contains(entry.getKey())) { - continue; - } - if (options == null) { - options = new LinkedHashMap<>(); + if (VectorConstants.CQLAnnIndex.ALLOWED_OPTIONS.contains(entry.getKey())) { + tuning.put(entry.getKey(), entry.getValue()); } - options.put(entry.getKey(), entry.getValue()); } - return options == null ? null : new VectorIndexDefinitionDesc.VectorIndexingDesc(null, options); + return tuning; } /** diff --git a/src/test/java/io/stargate/sgv2/jsonapi/service/schema/tables/ApiVectorIndexTest.java b/src/test/java/io/stargate/sgv2/jsonapi/service/schema/tables/ApiVectorIndexTest.java index c969377b42..499c0d8dd4 100644 --- a/src/test/java/io/stargate/sgv2/jsonapi/service/schema/tables/ApiVectorIndexTest.java +++ b/src/test/java/io/stargate/sgv2/jsonapi/service/schema/tables/ApiVectorIndexTest.java @@ -303,6 +303,36 @@ void emptyMapDescribesNull() { } } + /** The persisted profile snapshot reflects the options actually applied, not the base profile. */ + @Nested + class TuningOptionsSnapshot { + + @Test + void keepsAllowlistedAppliedOptionsExcludingReservedAndStructural() { + var indexOptions = new HashMap(); + indexOptions.put(CQLSAIIndex.Options.CLASS_NAME, CQLSAIIndex.SAI_CLASS_NAME); + indexOptions.put(CQLSAIIndex.Options.TARGET, "my_vector"); + indexOptions.put(VectorConstants.CQLAnnIndex.SOURCE_MODEL, "OPENAI_V3_SMALL"); + indexOptions.put(VectorConstants.CQLAnnIndex.SIMILARITY_FUNCTION, "COSINE"); + // small-high-recall's base is 32, but an explicit override applied 99 — snapshot must keep 99 + indexOptions.put(VectorConstants.CQLAnnIndex.MAXIMUM_NODE_CONNECTIONS, "99"); + indexOptions.put(VectorConstants.CQLAnnIndex.CONSTRUCTION_BEAM_WIDTH, "200"); + + assertThat(ApiVectorIndex.tuningOptions(indexOptions)) + .containsOnly( + entry(VectorConstants.CQLAnnIndex.MAXIMUM_NODE_CONNECTIONS, "99"), + entry(VectorConstants.CQLAnnIndex.CONSTRUCTION_BEAM_WIDTH, "200")); + } + + @Test + void emptyWhenNoTuningOptions() { + var indexOptions = new HashMap(); + indexOptions.put(VectorConstants.CQLAnnIndex.SOURCE_MODEL, "OTHER"); + + assertThat(ApiVectorIndex.tuningOptions(indexOptions)).isEmpty(); + } + } + /** Applying options then describing them round-trips the tuning options. */ @Nested class RoundTrip { From e8e2b6158eb89732c78fa99f18ff7c611641e3ee Mon Sep 17 00:00:00 2001 From: Eric Hare Date: Mon, 22 Jun 2026 10:54:24 -0700 Subject: [PATCH 06/13] fix: remove vector index profile from owning table on dropIndex (#2511) --- .../cqldriver/executor/TableExtensions.java | 58 ++++++++ .../tables/DropVectorIndexProfileDBTask.java | 60 +++++++++ .../DropVectorIndexProfileDBTaskBuilder.java | 56 ++++++++ .../resolver/DropIndexCommandResolver.java | 108 ++++++++++----- .../service/schema/KeyspaceSchemaObject.java | 13 ++ .../executor/TableExtensionsTest.java | 125 ++++++++++++++++++ .../DropVectorIndexProfileDBTaskTest.java | 53 ++++++++ 7 files changed, 443 insertions(+), 30 deletions(-) create mode 100644 src/main/java/io/stargate/sgv2/jsonapi/service/operation/tables/DropVectorIndexProfileDBTask.java create mode 100644 src/main/java/io/stargate/sgv2/jsonapi/service/operation/tables/DropVectorIndexProfileDBTaskBuilder.java create mode 100644 src/test/java/io/stargate/sgv2/jsonapi/service/operation/tables/DropVectorIndexProfileDBTaskTest.java diff --git a/src/main/java/io/stargate/sgv2/jsonapi/service/cqldriver/executor/TableExtensions.java b/src/main/java/io/stargate/sgv2/jsonapi/service/cqldriver/executor/TableExtensions.java index ca8166dfcf..f713e7d892 100644 --- a/src/main/java/io/stargate/sgv2/jsonapi/service/cqldriver/executor/TableExtensions.java +++ b/src/main/java/io/stargate/sgv2/jsonapi/service/cqldriver/executor/TableExtensions.java @@ -4,6 +4,7 @@ import com.datastax.oss.driver.api.core.CqlIdentifier; import com.datastax.oss.driver.api.core.data.ByteUtils; +import com.datastax.oss.driver.api.core.metadata.schema.KeyspaceMetadata; import com.datastax.oss.driver.api.core.metadata.schema.TableMetadata; import com.fasterxml.jackson.core.JsonProcessingException; import com.fasterxml.jackson.databind.ObjectMapper; @@ -13,6 +14,7 @@ import java.util.HashMap; import java.util.Map; import java.util.Objects; +import java.util.Optional; import java.util.stream.Collectors; import org.slf4j.Logger; @@ -114,6 +116,62 @@ public static Map createCustomProperties( return customProperties; } + /** + * Computes the extensions payload that drops {@code indexName}'s vector-index profile from the + * table that owns it. Used to keep the {@link + * SchemaConstants.MetadataFieldsNames#VECTOR_INDEX_PROFILES} extension in sync when an index is + * dropped, so a profile record does not outlive its index. + * + *

The owning table is found by scanning {@code keyspaceMetadata} for the table whose indexes + * contain {@code indexName}. Returns empty when there is nothing to do — no table owns the index, + * or the owning table has no stored profile for it — so the caller can skip the extra DDL. + * + *

When a rewrite is needed, the existing vectorize config and the other indexes' profiles are + * read back and included so the clobbering extension write does not lose them (the same approach + * as the create side, see {@link #createCustomProperties(Map, Map, ObjectMapper)}). + */ + public static Optional removeIndexProfile( + KeyspaceMetadata keyspaceMetadata, CqlIdentifier indexName, ObjectMapper objectMapper) { + Objects.requireNonNull(keyspaceMetadata, "keyspaceMetadata must not be null"); + Objects.requireNonNull(indexName, "indexName must not be null"); + Objects.requireNonNull(objectMapper, "objectMapper must not be null"); + + var owningTable = + keyspaceMetadata.getTables().values().stream() + .filter(table -> table.getIndexes().containsKey(indexName)) + .findFirst(); + if (owningTable.isEmpty()) { + return Optional.empty(); + } + + var tableMetadata = owningTable.get(); + var profiles = VectorIndexProfileDefinition.from(tableMetadata, objectMapper); + // null def => remove; false return => no entry existed, so there is nothing to rewrite. + if (!VectorIndexProfileDefinition.putOrRemove( + profiles, cqlIdentifierToJsonKey(indexName), null)) { + return Optional.empty(); + } + + // Read the vectorize config back so the full-replace extension write preserves it. The stored + // keys are the column identifiers' internal form, so reconstruct the CqlIdentifier keys that + // createCustomProperties expects. + var vectorDefs = + VectorizeDefinition.from(tableMetadata, objectMapper).entrySet().stream() + .collect( + Collectors.toMap( + entry -> CqlIdentifier.fromInternal(entry.getKey()), Map.Entry::getValue)); + + var customProperties = createCustomProperties(vectorDefs, profiles, objectMapper); + return Optional.of(new IndexProfileRemoval(tableMetadata.getName(), customProperties)); + } + + /** + * The result of {@link #removeIndexProfile}: the table to alter and the complete extensions + * payload to write (with the dropped index's profile removed and everything else preserved). + */ + public record IndexProfileRemoval( + CqlIdentifier tableName, Map customProperties) {} + private static String writeJson(Object value, ObjectMapper objectMapper) { try { return objectMapper.writeValueAsString(value); diff --git a/src/main/java/io/stargate/sgv2/jsonapi/service/operation/tables/DropVectorIndexProfileDBTask.java b/src/main/java/io/stargate/sgv2/jsonapi/service/operation/tables/DropVectorIndexProfileDBTask.java new file mode 100644 index 0000000000..c14079814c --- /dev/null +++ b/src/main/java/io/stargate/sgv2/jsonapi/service/operation/tables/DropVectorIndexProfileDBTask.java @@ -0,0 +1,60 @@ +package io.stargate.sgv2.jsonapi.service.operation.tables; + +import static com.datastax.oss.driver.api.querybuilder.SchemaBuilder.alterTable; + +import com.datastax.oss.driver.api.core.CqlIdentifier; +import com.datastax.oss.driver.api.core.cql.SimpleStatement; +import io.stargate.sgv2.jsonapi.service.cqldriver.executor.DefaultDriverExceptionHandler; +import io.stargate.sgv2.jsonapi.service.cqldriver.executor.TableExtensions; +import io.stargate.sgv2.jsonapi.service.operation.SchemaDBTask; +import io.stargate.sgv2.jsonapi.service.schema.KeyspaceSchemaObject; +import java.util.Map; +import java.util.Objects; + +/** + * Removes a dropped index's entry from its owning table's vector-index-profiles extension, so a + * profile record does not outlive the index it described. + * + *

This runs as a keyspace-scoped sibling to {@link DropIndexDBTask} so the two can share one + * {@link io.stargate.sgv2.jsonapi.service.operation.tasks.TaskGroup}; a {@link + * io.stargate.sgv2.jsonapi.service.schema.tables.TableSchemaObject}-typed {@link AlterTableDBTask} + * (used by the create side) could not, because a TaskGroup has a single schema-object type. The + * owning table and the rewritten extensions payload are resolved at command-resolve time via {@link + * TableExtensions#removeIndexProfile}; this task only issues the {@code ALTER TABLE ... WITH + * extensions = {...}}. + */ +public class DropVectorIndexProfileDBTask extends SchemaDBTask { + + private final CqlIdentifier tableName; + private final Map customProperties; + + public DropVectorIndexProfileDBTask( + int position, + KeyspaceSchemaObject schemaObject, + SchemaDBTask.SchemaRetryPolicy schemaRetryPolicy, + DefaultDriverExceptionHandler.Factory exceptionHandlerFactory, + CqlIdentifier tableName, + Map customProperties) { + super(position, schemaObject, schemaRetryPolicy, exceptionHandlerFactory); + + this.tableName = Objects.requireNonNull(tableName, "tableName must not be null"); + this.customProperties = + Objects.requireNonNull(customProperties, "customProperties must not be null"); + setStatus(TaskStatus.READY); + } + + public static DropVectorIndexProfileDBTaskBuilder builder(KeyspaceSchemaObject schemaObject) { + return new DropVectorIndexProfileDBTaskBuilder(schemaObject); + } + + @Override + protected SimpleStatement buildStatement() { + + // The owning table lives in this keyspace; take the keyspace from the schema object identifier, + // mirroring DropIndexDBTask which builds its statement the same way. + var extensions = TableExtensions.toExtensions(customProperties); + return alterTable(schemaObject.identifier().keyspace(), tableName) + .withOption(TableExtensions.TABLE_OPTIONS_EXTENSION_KEY.asInternal(), extensions) + .build(); + } +} diff --git a/src/main/java/io/stargate/sgv2/jsonapi/service/operation/tables/DropVectorIndexProfileDBTaskBuilder.java b/src/main/java/io/stargate/sgv2/jsonapi/service/operation/tables/DropVectorIndexProfileDBTaskBuilder.java new file mode 100644 index 0000000000..1f60b57fe8 --- /dev/null +++ b/src/main/java/io/stargate/sgv2/jsonapi/service/operation/tables/DropVectorIndexProfileDBTaskBuilder.java @@ -0,0 +1,56 @@ +package io.stargate.sgv2.jsonapi.service.operation.tables; + +import com.datastax.oss.driver.api.core.CqlIdentifier; +import io.stargate.sgv2.jsonapi.service.operation.SchemaDBTask; +import io.stargate.sgv2.jsonapi.service.operation.tasks.TaskBuilder; +import io.stargate.sgv2.jsonapi.service.schema.KeyspaceSchemaObject; +import java.util.Map; +import java.util.Objects; + +/** Builds a {@link DropVectorIndexProfileDBTask}. */ +public class DropVectorIndexProfileDBTaskBuilder + extends TaskBuilder< + DropVectorIndexProfileDBTask, KeyspaceSchemaObject, DropVectorIndexProfileDBTaskBuilder> { + + private CqlIdentifier tableName; + private Map customProperties; + private SchemaDBTask.SchemaRetryPolicy schemaRetryPolicy; + + protected DropVectorIndexProfileDBTaskBuilder(KeyspaceSchemaObject schemaObject) { + super(schemaObject); + } + + public DropVectorIndexProfileDBTaskBuilder withTableName(CqlIdentifier tableName) { + this.tableName = Objects.requireNonNull(tableName, "tableName must not be null"); + return this; + } + + public DropVectorIndexProfileDBTaskBuilder withCustomProperties( + Map customProperties) { + this.customProperties = + Objects.requireNonNull(customProperties, "customProperties must not be null"); + return this; + } + + public DropVectorIndexProfileDBTaskBuilder withSchemaRetryPolicy( + SchemaDBTask.SchemaRetryPolicy schemaRetryPolicy) { + this.schemaRetryPolicy = + Objects.requireNonNull(schemaRetryPolicy, "schemaRetryPolicy cannot be null"); + return this; + } + + public DropVectorIndexProfileDBTask build() { + + Objects.requireNonNull(tableName, "tableName must not be null"); + Objects.requireNonNull(customProperties, "customProperties must not be null"); + Objects.requireNonNull(schemaRetryPolicy, "schemaRetryPolicy cannot be null"); + + return new DropVectorIndexProfileDBTask( + nextPosition(), + schemaObject, + schemaRetryPolicy, + getExceptionHandlerFactory(), + tableName, + customProperties); + } +} diff --git a/src/main/java/io/stargate/sgv2/jsonapi/service/resolver/DropIndexCommandResolver.java b/src/main/java/io/stargate/sgv2/jsonapi/service/resolver/DropIndexCommandResolver.java index cf6ff8e511..c324f34910 100644 --- a/src/main/java/io/stargate/sgv2/jsonapi/service/resolver/DropIndexCommandResolver.java +++ b/src/main/java/io/stargate/sgv2/jsonapi/service/resolver/DropIndexCommandResolver.java @@ -2,20 +2,26 @@ import static io.stargate.sgv2.jsonapi.util.CqlIdentifierUtil.cqlIdentifierFromUserInput; +import com.datastax.oss.driver.api.core.CqlIdentifier; +import com.fasterxml.jackson.databind.ObjectMapper; import io.stargate.sgv2.jsonapi.api.model.command.CommandContext; import io.stargate.sgv2.jsonapi.api.model.command.impl.DropIndexCommand; import io.stargate.sgv2.jsonapi.config.OperationsConfig; import io.stargate.sgv2.jsonapi.service.cqldriver.executor.DefaultDriverExceptionHandler; +import io.stargate.sgv2.jsonapi.service.cqldriver.executor.TableExtensions; import io.stargate.sgv2.jsonapi.service.operation.Operation; import io.stargate.sgv2.jsonapi.service.operation.SchemaDBTask; import io.stargate.sgv2.jsonapi.service.operation.SchemaDBTaskPage; +import io.stargate.sgv2.jsonapi.service.operation.keyspaces.KeyspaceDriverExceptionHandler; import io.stargate.sgv2.jsonapi.service.operation.tables.DropIndexDBTask; import io.stargate.sgv2.jsonapi.service.operation.tables.DropIndexExceptionHandler; +import io.stargate.sgv2.jsonapi.service.operation.tables.DropVectorIndexProfileDBTask; import io.stargate.sgv2.jsonapi.service.operation.tasks.TaskGroup; import io.stargate.sgv2.jsonapi.service.operation.tasks.TaskOperation; import io.stargate.sgv2.jsonapi.service.schema.KeyspaceSchemaObject; import io.stargate.sgv2.jsonapi.util.ApiOptionUtils; import jakarta.enterprise.context.ApplicationScoped; +import jakarta.inject.Inject; import java.time.Duration; /** Resolver for the {@link DropIndexCommand}. */ @@ -24,6 +30,8 @@ public class DropIndexCommandResolver implements CommandResolver getCommandClass() { return DropIndexCommand.class; @@ -33,39 +41,79 @@ public Class getCommandClass() { public Operation resolveKeyspaceCommand( CommandContext commandContext, DropIndexCommand command) { + var schemaObject = commandContext.schemaObject(); var indexName = cqlIdentifierFromUserInput(command.name()); // Check if the index exists, we check if columns exist before trying to drop them so do for // indexes as well - var taskBuilder = - DropIndexDBTask.builder(commandContext.schemaObject()) - .withSchemaRetryPolicy( - new SchemaDBTask.SchemaRetryPolicy( - commandContext - .config() - .get(OperationsConfig.class) - .databaseConfig() - .ddlRetries(), - Duration.ofMillis( - commandContext - .config() - .get(OperationsConfig.class) - .databaseConfig() - .ddlRetryDelayMillis()))); - - taskBuilder.withExceptionHandlerFactory( - DefaultDriverExceptionHandler.Factory.withIdentifier( - DropIndexExceptionHandler::new, indexName)); - - taskBuilder - .withIndexName(indexName) - .withIfExists( - ApiOptionUtils.getOrDefault( - command.options(), DropIndexCommand.Options::ifExists, IF_EXISTS_DEFAULT)); - - var taskGroup = new TaskGroup<>(taskBuilder.build()); - - return new TaskOperation<>( - taskGroup, SchemaDBTaskPage.accumulator(DropIndexDBTask.class, commandContext)); + var schemaRetryPolicy = + new SchemaDBTask.SchemaRetryPolicy( + commandContext.config().get(OperationsConfig.class).databaseConfig().ddlRetries(), + Duration.ofMillis( + commandContext + .config() + .get(OperationsConfig.class) + .databaseConfig() + .ddlRetryDelayMillis())); + + var dropIndexTask = + DropIndexDBTask.builder(schemaObject) + .withSchemaRetryPolicy(schemaRetryPolicy) + .withExceptionHandlerFactory( + DefaultDriverExceptionHandler.Factory.withIdentifier( + DropIndexExceptionHandler::new, indexName)) + .withIndexName(indexName) + .withIfExists( + ApiOptionUtils.getOrDefault( + command.options(), DropIndexCommand.Options::ifExists, IF_EXISTS_DEFAULT)) + .build(); + + // Also drop the index's vector-index profile (if any) from the owning table's extensions, so + // the profile record does not outlive the index. Null when the keyspace metadata is unknown or + // the owning table has no stored profile for this index, in which case only the drop runs. + var profileCleanupTask = buildProfileCleanupTask(schemaObject, indexName, schemaRetryPolicy); + + if (profileCleanupTask == null) { + return new TaskOperation<>( + new TaskGroup<>(dropIndexTask), + SchemaDBTaskPage.accumulator(DropIndexDBTask.class, commandContext)); + } + + // Sequential so the extension cleanup only runs if the index drop succeeded. + TaskGroup, KeyspaceSchemaObject> taskGroup = + new TaskGroup<>(true); + taskGroup.add(dropIndexTask); + taskGroup.add(profileCleanupTask); + + @SuppressWarnings("unchecked") + Class> taskClass = + (Class>) (Class) SchemaDBTask.class; + return new TaskOperation<>(taskGroup, SchemaDBTaskPage.accumulator(taskClass, commandContext)); + } + + /** + * Builds the cleanup task that removes the dropped index's profile from its owning table's + * extensions, or null when there is nothing to clean up (keyspace metadata unknown, no owning + * table, or no stored profile for this index). + */ + private DropVectorIndexProfileDBTask buildProfileCleanupTask( + KeyspaceSchemaObject schemaObject, + CqlIdentifier indexName, + SchemaDBTask.SchemaRetryPolicy schemaRetryPolicy) { + + return schemaObject + .keyspaceMetadata() + .flatMap( + keyspaceMetadata -> + TableExtensions.removeIndexProfile(keyspaceMetadata, indexName, objectMapper)) + .map( + removal -> + DropVectorIndexProfileDBTask.builder(schemaObject) + .withSchemaRetryPolicy(schemaRetryPolicy) + .withExceptionHandlerFactory(KeyspaceDriverExceptionHandler::new) + .withTableName(removal.tableName()) + .withCustomProperties(removal.customProperties()) + .build()) + .orElse(null); } } diff --git a/src/main/java/io/stargate/sgv2/jsonapi/service/schema/KeyspaceSchemaObject.java b/src/main/java/io/stargate/sgv2/jsonapi/service/schema/KeyspaceSchemaObject.java index 11bdef3f59..7f86f5d3ad 100644 --- a/src/main/java/io/stargate/sgv2/jsonapi/service/schema/KeyspaceSchemaObject.java +++ b/src/main/java/io/stargate/sgv2/jsonapi/service/schema/KeyspaceSchemaObject.java @@ -6,6 +6,7 @@ import io.stargate.sgv2.jsonapi.service.cqldriver.executor.IndexUsage; import io.stargate.sgv2.jsonapi.service.cqldriver.executor.VectorConfig; import java.util.Objects; +import java.util.Optional; /** * A Keyspace in the API. @@ -39,6 +40,18 @@ public KeyspaceSchemaObject(Tenant tenant, KeyspaceMetadata keyspaceMetadata) { Objects.requireNonNull(keyspaceMetadata, "keyspaceMetadata must not be null"); } + /** + * The Cassandra metadata for this keyspace, when known. + * + *

Empty when the object was built via the {@link + * #KeyspaceSchemaObject(SchemaObjectIdentifier)} test constructor, which carries no metadata. + * Present for objects built from live schema, where it lets callers reach the keyspace's tables + * and their indexes (e.g. to find the table that owns a named index). + */ + public Optional keyspaceMetadata() { + return Optional.ofNullable(keyspaceMetadata); + } + @Override public VectorConfig vectorConfig() { return VectorConfig.NOT_ENABLED_CONFIG; diff --git a/src/test/java/io/stargate/sgv2/jsonapi/service/cqldriver/executor/TableExtensionsTest.java b/src/test/java/io/stargate/sgv2/jsonapi/service/cqldriver/executor/TableExtensionsTest.java index 29a476b5d8..4ac19cd453 100644 --- a/src/test/java/io/stargate/sgv2/jsonapi/service/cqldriver/executor/TableExtensionsTest.java +++ b/src/test/java/io/stargate/sgv2/jsonapi/service/cqldriver/executor/TableExtensionsTest.java @@ -1,11 +1,21 @@ package io.stargate.sgv2.jsonapi.service.cqldriver.executor; import static org.assertj.core.api.Assertions.assertThat; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; import com.datastax.oss.driver.api.core.CqlIdentifier; +import com.datastax.oss.driver.api.core.metadata.schema.IndexMetadata; +import com.datastax.oss.driver.api.core.metadata.schema.KeyspaceMetadata; +import com.datastax.oss.driver.api.core.metadata.schema.TableMetadata; import com.fasterxml.jackson.databind.ObjectMapper; import io.stargate.sgv2.jsonapi.config.constants.SchemaConstants; +import java.nio.ByteBuffer; +import java.nio.charset.StandardCharsets; +import java.util.HashMap; import java.util.Map; +import java.util.Set; +import org.junit.jupiter.api.Nested; import org.junit.jupiter.api.Test; class TableExtensionsTest { @@ -71,4 +81,119 @@ void twoArgOverloadOmitsProfiles() { assertThat(props).doesNotContainKey(SchemaConstants.MetadataFieldsNames.VECTOR_INDEX_PROFILES); } + + @Nested + class RemoveIndexProfile { + + private static final CqlIdentifier MY_IDX = CqlIdentifier.fromInternal("my_idx"); + + @Test + void emptyWhenNoTableOwnsTheIndex() { + // the only table in the keyspace carries a different index + var keyspace = + keyspace( + table( + "other_table", + Set.of(CqlIdentifier.fromInternal("some_other_idx")), + Map.of( + SchemaConstants.MetadataFieldsNames.VECTOR_INDEX_PROFILES, + profilesJson("some_other_idx")))); + + assertThat(TableExtensions.removeIndexProfile(keyspace, MY_IDX, MAPPER)).isEmpty(); + } + + @Test + void emptyWhenOwningTableHasNoProfileForTheIndex() { + // the owning table has a profiles blob, but not for the index being dropped + var keyspace = + keyspace( + table( + "my_table", + Set.of(MY_IDX), + Map.of( + SchemaConstants.MetadataFieldsNames.VECTOR_INDEX_PROFILES, + profilesJson("unrelated_idx")))); + + assertThat(TableExtensions.removeIndexProfile(keyspace, MY_IDX, MAPPER)).isEmpty(); + } + + @Test + void removesProfileAndPreservesOtherProfilesAndVectorize() { + var keyspace = + keyspace( + table( + "my_table", + Set.of(MY_IDX), + Map.of( + SchemaConstants.MetadataFieldsNames.VECTOR_INDEX_PROFILES, + profilesJson("my_idx", "kept_idx"), + SchemaConstants.MetadataFieldsNames.VECTORIZE_CONFIG, + "{\"v\":{\"provider\":\"openai\",\"modelName\":\"text-embedding-3-small\"}}"))); + + var removal = TableExtensions.removeIndexProfile(keyspace, MY_IDX, MAPPER); + + assertThat(removal).isPresent(); + assertThat(removal.get().tableName()).isEqualTo(CqlIdentifier.fromInternal("my_table")); + + var customProperties = removal.get().customProperties(); + // schema type/version always written + assertThat(customProperties) + .containsKey(SchemaConstants.MetadataFieldsNames.SCHEMA_TYPE) + .containsKey(SchemaConstants.MetadataFieldsNames.SCHEMA_VERSION) + // vectorize config is read back and preserved + .containsKey(SchemaConstants.MetadataFieldsNames.VECTORIZE_CONFIG); + + // the dropped index's profile is gone, the other index's profile is kept + var profiles = + VectorIndexProfileDefinition.fromJson( + customProperties.get(SchemaConstants.MetadataFieldsNames.VECTOR_INDEX_PROFILES), + MAPPER); + assertThat(profiles).containsOnlyKeys("kept_idx"); + } + + /** Builds a {@code {index: {profile, options}}} blob for the given index keys. */ + private static String profilesJson(String... indexKeys) { + var profiles = new HashMap(); + for (var key : indexKeys) { + profiles.put(key, new VectorIndexProfileDefinition("small-high-recall", Map.of())); + } + try { + return MAPPER.writeValueAsString(profiles); + } catch (Exception e) { + throw new RuntimeException(e); + } + } + + private static KeyspaceMetadata keyspace(TableMetadata... tables) { + var keyspaceMetadata = mock(KeyspaceMetadata.class); + Map tableMap = new HashMap<>(); + for (var table : tables) { + tableMap.put(table.getName(), table); + } + when(keyspaceMetadata.getTables()).thenReturn(tableMap); + return keyspaceMetadata; + } + + private static TableMetadata table( + String name, Set indexNames, Map extensions) { + var tableMetadata = mock(TableMetadata.class); + when(tableMetadata.getName()).thenReturn(CqlIdentifier.fromInternal(name)); + + Map indexes = new HashMap<>(); + for (var indexName : indexNames) { + indexes.put(indexName, mock(IndexMetadata.class)); + } + when(tableMetadata.getIndexes()).thenReturn(indexes); + + Map extensionBuffers = new HashMap<>(); + extensions.forEach( + (key, value) -> + extensionBuffers.put(key, ByteBuffer.wrap(value.getBytes(StandardCharsets.UTF_8)))); + Map options = new HashMap<>(); + options.put(TableExtensions.TABLE_OPTIONS_EXTENSION_KEY, extensionBuffers); + when(tableMetadata.getOptions()).thenReturn(options); + + return tableMetadata; + } + } } diff --git a/src/test/java/io/stargate/sgv2/jsonapi/service/operation/tables/DropVectorIndexProfileDBTaskTest.java b/src/test/java/io/stargate/sgv2/jsonapi/service/operation/tables/DropVectorIndexProfileDBTaskTest.java new file mode 100644 index 0000000000..fdb75f7040 --- /dev/null +++ b/src/test/java/io/stargate/sgv2/jsonapi/service/operation/tables/DropVectorIndexProfileDBTaskTest.java @@ -0,0 +1,53 @@ +package io.stargate.sgv2.jsonapi.service.operation.tables; + +import static org.assertj.core.api.Assertions.assertThat; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; + +import com.datastax.oss.driver.api.core.CqlIdentifier; +import com.fasterxml.jackson.databind.ObjectMapper; +import io.stargate.sgv2.jsonapi.service.cqldriver.executor.TableExtensions; +import io.stargate.sgv2.jsonapi.service.cqldriver.executor.VectorIndexProfileDefinition; +import io.stargate.sgv2.jsonapi.service.operation.SchemaDBTask; +import io.stargate.sgv2.jsonapi.service.operation.keyspaces.KeyspaceDriverExceptionHandler; +import io.stargate.sgv2.jsonapi.service.schema.KeyspaceSchemaObject; +import io.stargate.sgv2.jsonapi.service.schema.SchemaObjectIdentifier; +import java.time.Duration; +import java.util.Map; +import org.junit.jupiter.api.Test; + +class DropVectorIndexProfileDBTaskTest { + + private static final ObjectMapper MAPPER = new ObjectMapper(); + + @Test + void buildsAlterTableExtensionsStatementForOwningTable() { + var identifier = mock(SchemaObjectIdentifier.class); + when(identifier.keyspace()).thenReturn(CqlIdentifier.fromInternal("my_ks")); + var schemaObject = mock(KeyspaceSchemaObject.class); + when(schemaObject.identifier()).thenReturn(identifier); + + var customProperties = + TableExtensions.createCustomProperties( + Map.of(), + Map.of("kept_idx", new VectorIndexProfileDefinition("small-high-recall", Map.of())), + MAPPER); + + var task = + DropVectorIndexProfileDBTask.builder(schemaObject) + .withSchemaRetryPolicy(new SchemaDBTask.SchemaRetryPolicy(1, Duration.ofMillis(1))) + .withExceptionHandlerFactory(KeyspaceDriverExceptionHandler::new) + .withTableName(CqlIdentifier.fromInternal("my_table")) + .withCustomProperties(customProperties) + .build(); + + var query = task.buildStatement().getQuery(); + + // ALTER TABLE on the owning table in the schema object's keyspace, updating the extensions map. + assertThat(query) + .contains("ALTER TABLE") + .contains("my_ks") + .contains("my_table") + .contains("extensions"); + } +} From cefb4477c9510a12626e9304b4364b554351f9de Mon Sep 17 00:00:00 2001 From: Eric Hare Date: Mon, 22 Jun 2026 12:07:51 -0700 Subject: [PATCH 07/13] fix: match the interface of createIndex --- .../VectorIndexingDescDeserializer.java | 64 +++++++++ .../indexes/VectorIndexDefinitionDesc.java | 65 ++++----- .../config/constants/VectorConstants.java | 6 - .../CreateVectorIndexCommandResolver.java | 9 +- .../service/schema/tables/ApiVectorIndex.java | 24 ++-- src/main/resources/errors.yaml | 6 +- .../CreateTableIndexIntegrationTest.java | 14 +- ...ndexUnknownOptionProbeIntegrationTest.java | 130 ++++++++++++++++++ .../schema/tables/ApiVectorIndexTest.java | 128 +++++++++-------- 9 files changed, 322 insertions(+), 124 deletions(-) create mode 100644 src/main/java/io/stargate/sgv2/jsonapi/api/model/command/deserializers/VectorIndexingDescDeserializer.java create mode 100644 src/test/java/io/stargate/sgv2/jsonapi/api/v1/tables/VectorIndexUnknownOptionProbeIntegrationTest.java diff --git a/src/main/java/io/stargate/sgv2/jsonapi/api/model/command/deserializers/VectorIndexingDescDeserializer.java b/src/main/java/io/stargate/sgv2/jsonapi/api/model/command/deserializers/VectorIndexingDescDeserializer.java new file mode 100644 index 0000000000..e330f1ddda --- /dev/null +++ b/src/main/java/io/stargate/sgv2/jsonapi/api/model/command/deserializers/VectorIndexingDescDeserializer.java @@ -0,0 +1,64 @@ +package io.stargate.sgv2.jsonapi.api.model.command.deserializers; + +import com.fasterxml.jackson.core.JsonParser; +import com.fasterxml.jackson.core.type.TypeReference; +import com.fasterxml.jackson.databind.DeserializationContext; +import com.fasterxml.jackson.databind.JsonNode; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.fasterxml.jackson.databind.deser.std.StdDeserializer; +import io.stargate.sgv2.jsonapi.api.model.command.table.definition.indexes.VectorIndexDefinitionDesc.VectorIndexingDesc; +import io.stargate.sgv2.jsonapi.exception.SchemaException; +import java.io.IOException; +import java.util.LinkedHashMap; +import java.util.Map; + +/** + * Deserializes the overloaded {@code vectorIndexing} value, which is either: + * + *

    + *
  • a JSON string → a named profile the API expands into SAI options, e.g. + * "vectorIndexing": "small-high-recall" + *
  • a JSON object → raw Cassandra SAI tuning options set directly, e.g. + * "vectorIndexing": { "maximum_node_connections": 32 } + *
+ * + * Anything else (number, boolean, array, null token) is a request error. This mirrors the design in + * #2508: the field is overloaded by + * JSON type rather than carrying separate {@code profile} / {@code options} sub-keys, so a profile + * and raw options are mutually exclusive in a single request. + */ +public class VectorIndexingDescDeserializer extends StdDeserializer { + + private static final TypeReference> OPTIONS_TYPE = + new TypeReference<>() {}; + + public VectorIndexingDescDeserializer() { + super(VectorIndexingDesc.class); + } + + @Override + public VectorIndexingDesc deserialize( + JsonParser jsonParser, DeserializationContext deserializationContext) throws IOException { + JsonNode node = deserializationContext.readTree(jsonParser); + + if (node.isTextual()) { + // "vectorIndexing": "small-high-recall" -> a named profile (validated at apply time). + return VectorIndexingDesc.ofProfile(node.textValue()); + } + if (node.isObject()) { + // "vectorIndexing": { "maximum_node_connections": 32 } -> raw SAI options. convertValue + // honours the mapper config (e.g. float handling) just as a Map field would. + Map options = + ((ObjectMapper) jsonParser.getCodec()).convertValue(node, OPTIONS_TYPE); + return VectorIndexingDesc.ofOptions(options); + } + + throw SchemaException.Code.INVALID_VECTOR_INDEXING_OPTIONS.get( + Map.of( + "reason", + "`vectorIndexing` must be either a profile name (string) or an object of indexing " + + "options, but was: " + + node.getNodeType().name().toLowerCase() + + ".")); + } +} diff --git a/src/main/java/io/stargate/sgv2/jsonapi/api/model/command/table/definition/indexes/VectorIndexDefinitionDesc.java b/src/main/java/io/stargate/sgv2/jsonapi/api/model/command/table/definition/indexes/VectorIndexDefinitionDesc.java index cbaed7cb56..2defb26def 100644 --- a/src/main/java/io/stargate/sgv2/jsonapi/api/model/command/table/definition/indexes/VectorIndexDefinitionDesc.java +++ b/src/main/java/io/stargate/sgv2/jsonapi/api/model/command/table/definition/indexes/VectorIndexDefinitionDesc.java @@ -3,6 +3,9 @@ import com.fasterxml.jackson.annotation.JsonInclude; import com.fasterxml.jackson.annotation.JsonProperty; import com.fasterxml.jackson.annotation.JsonPropertyOrder; +import com.fasterxml.jackson.annotation.JsonValue; +import com.fasterxml.jackson.databind.annotation.JsonDeserialize; +import io.stargate.sgv2.jsonapi.api.model.command.deserializers.VectorIndexingDescDeserializer; import io.stargate.sgv2.jsonapi.config.constants.TableDescConstants; import io.stargate.sgv2.jsonapi.config.constants.VectorConstants; import io.stargate.sgv2.jsonapi.config.constants.VectorIndexDescDefaults; @@ -66,43 +69,41 @@ public record VectorIndexDescOptions( @Nullable @Schema( description = - "Optional vector (SAI) indexing configuration: an object with an optional " - + "\"profile\" (a predefined name the API expands into options, e.g. " - + "\"small-high-recall\") and an optional \"options\" object of Cassandra SAI " - + "tuning options (e.g. {\"maximum_node_connections\": 32, \"alpha\": 1.2}). " - + "Explicit options override the profile. Set \"metric\" / \"sourceModel\" via " - + "their dedicated fields, not here.", - type = SchemaType.OBJECT) + "Optional vector (SAI) indexing configuration. Either a profile name (string) " + + "the API expands into SAI options, e.g. \"small-high-recall\"; or an object " + + "of Cassandra SAI tuning options (snake_case), restricted to: " + + "maximum_node_connections, construction_beam_width, neighborhood_overflow, " + + "alpha, enable_hierarchy, e.g. {\"maximum_node_connections\": 32, " + + "\"alpha\": 1.2}. A profile and explicit options are mutually exclusive. " + + "Set \"metric\" / \"sourceModel\" via their dedicated fields, not here.") @JsonInclude(JsonInclude.Include.NON_NULL) @JsonProperty(VectorConstants.VectorColumn.VECTOR_INDEXING) VectorIndexingDesc vectorIndexing) {} /** - * The {@code vectorIndexing} value: an optional profile name plus optional SAI tuning options. + * The overloaded {@code vectorIndexing} value: exactly one of a named {@code profile} (a JSON + * string) or raw SAI tuning {@code options} (a JSON object) is set. Deserialized by {@link + * VectorIndexingDescDeserializer} (discriminated by JSON type) and serialized back to the bare + * string or object via {@link #jsonValue()}. */ - @JsonPropertyOrder({ - VectorConstants.VectorIndexing.PROFILE, - VectorConstants.VectorIndexing.OPTIONS - }) + @JsonDeserialize(using = VectorIndexingDescDeserializer.class) public record VectorIndexingDesc( - @Nullable - @Schema( - description = - "Optional predefined indexing profile name; the API expands it into SAI options.", - type = SchemaType.STRING) - @JsonInclude(JsonInclude.Include.NON_NULL) - @JsonProperty(VectorConstants.VectorIndexing.PROFILE) - String profile, - // - @Nullable - @Schema( - description = - "Optional Cassandra SAI tuning options (snake_case), restricted to: " - + "maximum_node_connections, construction_beam_width, neighborhood_overflow, " - + "alpha, enable_hierarchy. Values may be string, number, or boolean on input " - + "and are returned as strings in index descriptions.", - type = SchemaType.OBJECT) - @JsonInclude(JsonInclude.Include.NON_NULL) - @JsonProperty(VectorConstants.VectorIndexing.OPTIONS) - Map options) {} + @Nullable String profile, @Nullable Map options) { + + /** A {@code vectorIndexing} that selects a named profile. */ + public static VectorIndexingDesc ofProfile(String profile) { + return new VectorIndexingDesc(profile, null); + } + + /** A {@code vectorIndexing} that sets raw SAI options directly. */ + public static VectorIndexingDesc ofOptions(Map options) { + return new VectorIndexingDesc(null, options); + } + + /** Serializes back to the overloaded shape: the bare profile string or the bare options map. */ + @JsonValue + Object jsonValue() { + return profile != null ? profile : options; + } + } } diff --git a/src/main/java/io/stargate/sgv2/jsonapi/config/constants/VectorConstants.java b/src/main/java/io/stargate/sgv2/jsonapi/config/constants/VectorConstants.java index b0c24b9225..9a79d8b23c 100644 --- a/src/main/java/io/stargate/sgv2/jsonapi/config/constants/VectorConstants.java +++ b/src/main/java/io/stargate/sgv2/jsonapi/config/constants/VectorConstants.java @@ -11,12 +11,6 @@ interface VectorColumn { String VECTOR_INDEXING = "vectorIndexing"; } - /** Field names inside the {@code vectorIndexing} object. */ - interface VectorIndexing { - String PROFILE = "profile"; - String OPTIONS = "options"; - } - interface Vectorize extends ServiceDescConstants {} /** diff --git a/src/main/java/io/stargate/sgv2/jsonapi/service/resolver/CreateVectorIndexCommandResolver.java b/src/main/java/io/stargate/sgv2/jsonapi/service/resolver/CreateVectorIndexCommandResolver.java index ac08eeba8a..ba5121f78a 100644 --- a/src/main/java/io/stargate/sgv2/jsonapi/service/resolver/CreateVectorIndexCommandResolver.java +++ b/src/main/java/io/stargate/sgv2/jsonapi/service/resolver/CreateVectorIndexCommandResolver.java @@ -129,9 +129,9 @@ public Operation resolveTableCommand( * Builds the ALTER TABLE task that records this index's profile in the table extensions, or null * when nothing needs to change. Returns null when the index already exists (a {@code CREATE ... * IF NOT EXISTS} would be a no-op, so its stored profile must not be rewritten), or when no - * profile is used and there is no stale entry to clear. The snapshot stores the options actually - * applied to the index (profile expansion plus any explicit overrides); existing vectorize config - * and other profiles are read back and rewritten so they are not lost. + * profile is used and there is no stale entry to clear. The snapshot stores the options the + * profile expanded to; existing vectorize config and other profiles are read back and rewritten + * so they are not lost. */ private AlterTableDBTask buildProfileExtensionTask( TableSchemaObject schemaObject, @@ -153,8 +153,7 @@ private AlterTableDBTask buildProfileExtensionTask( var indexKey = cqlIdentifierToJsonKey(apiIndex.indexName()); var profiles = VectorIndexProfileDefinition.from(schemaObject.tableMetadata(), objectMapper); - // Snapshot the options actually applied to the index (profile expansion plus explicit - // overrides), so the stored metadata matches the live index rather than the base profile. + // Snapshot the options the profile expanded to, so the stored metadata matches the live index. var def = (profileName == null) ? null diff --git a/src/main/java/io/stargate/sgv2/jsonapi/service/schema/tables/ApiVectorIndex.java b/src/main/java/io/stargate/sgv2/jsonapi/service/schema/tables/ApiVectorIndex.java index 3c6e28c7bd..f36ded4d2c 100644 --- a/src/main/java/io/stargate/sgv2/jsonapi/service/schema/tables/ApiVectorIndex.java +++ b/src/main/java/io/stargate/sgv2/jsonapi/service/schema/tables/ApiVectorIndex.java @@ -97,7 +97,7 @@ static VectorIndexDefinitionDesc.VectorIndexingDesc describeIndexingOptions( var tuning = tuningOptions(indexOptions); return tuning.isEmpty() ? null - : new VectorIndexDefinitionDesc.VectorIndexingDesc(null, new LinkedHashMap<>(tuning)); + : VectorIndexDefinitionDesc.VectorIndexingDesc.ofOptions(new LinkedHashMap<>(tuning)); } /** @@ -122,11 +122,13 @@ static Map tuningOptions(Map indexOptions) { } /** - * Applies the request's {@code vectorIndexing} into the CQL index options map. An optional {@code - * profile} is expanded via {@link VectorIndexProfiles}; an optional {@code options} object adds - * Cassandra SAI tuning options (validated against {@link - * VectorConstants.CQLAnnIndex#ALLOWED_OPTIONS}) on top, overriding the profile. {@code - * source_model} / {@code similarity_function} have dedicated fields and are rejected here. + * Applies the request's {@code vectorIndexing} into the CQL index options map. {@code + * vectorIndexing} is overloaded: either a {@code profile} name expanded via {@link + * VectorIndexProfiles}, or an {@code options} object of Cassandra SAI tuning options (validated + * against {@link VectorConstants.CQLAnnIndex#ALLOWED_OPTIONS}) set directly — the two are + * mutually exclusive (see {@link + * io.stargate.sgv2.jsonapi.api.model.command.deserializers.VectorIndexingDescDeserializer}). + * {@code source_model} / {@code similarity_function} have dedicated fields and are rejected here. * * @param indexOptions the CQL options map being built, mutated in place * @param vectorIndexing the structured request value, may be null @@ -140,7 +142,7 @@ static void applyIndexingOptions( return; } - // A profile expands to a base set of options. + // A profile expands to a set of options. var profileName = vectorIndexing.profile(); if (profileName != null) { var profileOptions = @@ -156,7 +158,7 @@ static void applyIndexingOptions( indexOptions.putAll(profileOptions); } - // Explicit options are validated against the allow-list and override the profile. + // Raw options (mutually exclusive with a profile) are validated against the allow-list. var options = vectorIndexing.options(); if (options != null) { for (var entry : options.entrySet()) { @@ -169,7 +171,7 @@ static void applyIndexingOptions( throw SchemaException.Code.INVALID_VECTOR_INDEXING_OPTIONS.get( Map.of( "reason", - "The option '%s' must be set using its dedicated field '%s', not vectorIndexing.options." + "The option '%s' must be set using its dedicated field '%s', not as a vectorIndexing option." .formatted(optionName, dedicatedField))); } if (!VectorConstants.CQLAnnIndex.ALLOWED_OPTIONS.contains(optionName)) { @@ -371,8 +373,8 @@ public ApiVectorIndex create( metricToUse); } - // Apply vectorIndexing (optional profile and/or explicit options; options override the - // profile); metric / sourceModel above have dedicated fields. + // Apply vectorIndexing (either a profile name or raw options, mutually exclusive); metric / + // sourceModel above have dedicated fields. var userVectorIndexing = (indexDesc.options() == null) ? null : indexDesc.options().vectorIndexing(); applyIndexingOptions(indexOptions, userVectorIndexing); diff --git a/src/main/resources/errors.yaml b/src/main/resources/errors.yaml index 5371f91824..160814476f 100644 --- a/src/main/resources/errors.yaml +++ b/src/main/resources/errors.yaml @@ -1825,12 +1825,12 @@ request-errors: body: |- The command attempted to create a vector index using an indexing profile that is not known by the API. - A profile is selected with `vectorIndexing.profile`; the API expands it into a set of indexing options. To set options directly instead, use `vectorIndexing.options`. + A profile is selected by setting `vectorIndexing` to a profile name (string); the API expands it into a set of indexing options. To set options directly instead, set `vectorIndexing` to an object of indexing options. The known profiles are: ${knownProfiles}. The command attempted to use the profile: ${unknownProfile}. - Resend the command using a known profile, or set `vectorIndexing.options` directly. + Resend the command using a known profile, or set `vectorIndexing` to an object of indexing options. - scope: SCHEMA code: INVALID_VECTOR_INDEXING_OPTIONS @@ -1838,7 +1838,7 @@ request-errors: body: |- The command attempted to create a vector index with invalid `vectorIndexing`. - `vectorIndexing` is an object with an optional `profile` (a predefined name) and an optional `options` object of Cassandra SAI tuning options. Only the supported tuning options may be set, and the dedicated `metric` / `sourceModel` fields must be used instead of setting them in `options`. + `vectorIndexing` is either a profile name (string) or an object of Cassandra SAI tuning options. Only the supported tuning options may be set, and the dedicated `metric` / `sourceModel` fields must be used instead of setting them as options. ${reason} diff --git a/src/test/java/io/stargate/sgv2/jsonapi/api/v1/tables/CreateTableIndexIntegrationTest.java b/src/test/java/io/stargate/sgv2/jsonapi/api/v1/tables/CreateTableIndexIntegrationTest.java index 3c2fef5506..95bd5e5c65 100644 --- a/src/test/java/io/stargate/sgv2/jsonapi/api/v1/tables/CreateTableIndexIntegrationTest.java +++ b/src/test/java/io/stargate/sgv2/jsonapi/api/v1/tables/CreateTableIndexIntegrationTest.java @@ -1146,7 +1146,7 @@ public void unknownIndexingProfile() { "definition": { "column": "vector_type_7", "options": { - "vectorIndexing": { "profile": "no-such-profile" } + "vectorIndexing": "no-such-profile" } } } @@ -1167,9 +1167,7 @@ public void reservedOptionRejected() { "definition": { "column": "vector_type_7", "options": { - "vectorIndexing": { - "options": { "similarity_function": "COSINE" } - } + "vectorIndexing": { "similarity_function": "COSINE" } } } } @@ -1190,9 +1188,7 @@ public void unsupportedOptionRejected() { "definition": { "column": "vector_type_7", "options": { - "vectorIndexing": { - "options": { "class_name": "StorageAttachedIndex" } - } + "vectorIndexing": { "class_name": "StorageAttachedIndex" } } } } @@ -1213,9 +1209,7 @@ public void nonScalarOptionValueRejected() { "definition": { "column": "vector_type_7", "options": { - "vectorIndexing": { - "options": { "alpha": [1, 2, 3] } - } + "vectorIndexing": { "alpha": [1, 2, 3] } } } } diff --git a/src/test/java/io/stargate/sgv2/jsonapi/api/v1/tables/VectorIndexUnknownOptionProbeIntegrationTest.java b/src/test/java/io/stargate/sgv2/jsonapi/api/v1/tables/VectorIndexUnknownOptionProbeIntegrationTest.java new file mode 100644 index 0000000000..84eea157ed --- /dev/null +++ b/src/test/java/io/stargate/sgv2/jsonapi/api/v1/tables/VectorIndexUnknownOptionProbeIntegrationTest.java @@ -0,0 +1,130 @@ +package io.stargate.sgv2.jsonapi.api.v1.tables; + +import static org.assertj.core.api.Assertions.assertThat; +import static org.assertj.core.api.Assertions.assertThatCode; +import static org.assertj.core.api.Assertions.assertThatThrownBy; + +import com.datastax.oss.driver.api.core.CqlSession; +import com.datastax.oss.driver.api.core.cql.SimpleStatement; +import io.quarkus.test.junit.QuarkusIntegrationTest; +import io.stargate.sgv2.jsonapi.api.v1.AbstractKeyspaceIntegrationTestBase; +import java.lang.reflect.Method; +import org.apache.commons.lang3.RandomStringUtils; +import org.junit.jupiter.api.MethodOrderer; +import org.junit.jupiter.api.Order; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.TestMethodOrder; + +/** + * EMPIRICAL PROBE (issue #2487): Does the backing DB (DSE 6.9 / HCD) accept an UNKNOWN option KEY in + * a vector SAI index's {@code CREATE CUSTOM INDEX ... WITH OPTIONS}? + * + *

This BYPASSES data-api's own {@code ApiVectorIndex.applyIndexingOptions} allow-list by issuing + * RAW CQL directly against the running test container via the admin {@link CqlSession} provided by + * {@link AbstractKeyspaceIntegrationTestBase} (driver session, {@code cassandra/cassandra}). It does + * NOT go through the data-api HTTP command layer. + * + *

Hypothesis: a key SAI has never heard of (here {@code profile}) should be rejected by SAI's + * option validation regardless of {@code SAI_HNSW_ALLOW_CUSTOM_PARAMETERS} (that flag only gates the + * KNOWN custom HNSW tuning params like {@code maximum_node_connections}). The control index, using + * only {@code similarity_function:cosine}, must succeed to prove the table/column/CQL is otherwise + * valid. + * + *

The test is written to ALWAYS PASS while RECORDING the observed behavior to stdout, so the probe + * never fails CI ambiguously; flip {@code EXPECT_UNKNOWN_KEY_REJECTED} to turn it into a hard + * assertion once the empirical answer is known. + */ +@QuarkusIntegrationTest +@TestMethodOrder(MethodOrderer.OrderAnnotation.class) +class VectorIndexUnknownOptionProbeIntegrationTest extends AbstractKeyspaceIntegrationTestBase { + + /** Flip to true to turn the probe into a hard assertion that the unknown key is rejected. */ + private static final boolean EXPECT_UNKNOWN_KEY_REJECTED = false; + + private static final String TABLE = + "probe_" + RandomStringUtils.insecure().nextAlphanumeric(12).toLowerCase(); + private static final String VECTOR_COL = "embedding"; + private static final int DIMENSION = 4; + + /** Reflective accessor to the private CqlSession in the base class, for direct error inspection. */ + private CqlSession session() { + try { + Method m = AbstractKeyspaceIntegrationTestBase.class.getDeclaredMethod("createDriverSession"); + m.setAccessible(true); + return (CqlSession) m.invoke(this); + } catch (ReflectiveOperationException e) { + throw new RuntimeException("Could not obtain CqlSession from base class", e); + } + } + + @Test + @Order(1) + void createVectorTable() { + // Raw CQL: keyspace already created by AbstractKeyspaceIntegrationTestBase#createKeyspace. + boolean applied = + executeCqlStatement( + String.format( + "CREATE TABLE IF NOT EXISTS \"%s\".\"%s\" " + + "(id text PRIMARY KEY, %s vector)", + keyspaceName, TABLE, VECTOR_COL, DIMENSION)); + assertThat(applied).as("vector table create applied").isTrue(); + } + + @Test + @Order(2) + void controlIndex_knownGoodOptionsOnly_mustSucceed() { + // CONTROL: only a known-good SAI option. Must succeed -> proves table/column/CQL path is valid + // and that an unknown-key failure in the TEST case is specifically about the unknown key. + String cql = + String.format( + "CREATE CUSTOM INDEX \"idx_control_%s\" ON \"%s\".\"%s\" (%s) " + + "USING 'StorageAttachedIndex' WITH OPTIONS = {'similarity_function':'cosine'}", + TABLE, keyspaceName, TABLE, VECTOR_COL); + + assertThatCode(() -> session().execute(SimpleStatement.newInstance(cql))) + .as("CONTROL index with only {'similarity_function':'cosine'} must be accepted") + .doesNotThrowAnyException(); + } + + @Test + @Order(3) + void testIndex_unknownOptionKey_recordAcceptOrReject() { + // TEST: add an option key SAI does not know about ('profile'). similarity_function is kept so + // the ONLY difference vs the control is the unknown key. + String cql = + String.format( + "CREATE CUSTOM INDEX \"idx_test_%s\" ON \"%s\".\"%s\" (%s) " + + "USING 'StorageAttachedIndex' " + + "WITH OPTIONS = {'similarity_function':'cosine','profile':'small-high-recall'}", + TABLE, keyspaceName, TABLE, VECTOR_COL); + + Throwable thrown = + org.junit.jupiter.api.Assertions.assertDoesNotThrow( + () -> { + try { + session().execute(SimpleStatement.newInstance(cql)); + return (Throwable) null; + } catch (Throwable t) { + return t; + } + }); + + boolean rejected = thrown != null; + System.out.println("================================================================="); + System.out.println("[VECTOR-INDEX-UNKNOWN-OPTION PROBE] unknown key 'profile' rejected=" + + rejected); + if (rejected) { + System.out.println("[PROBE] rejection class : " + thrown.getClass().getName()); + System.out.println("[PROBE] rejection message: " + thrown.getMessage()); + } else { + System.out.println("[PROBE] DB SILENTLY ACCEPTED the unknown 'profile' key."); + } + System.out.println("================================================================="); + + if (EXPECT_UNKNOWN_KEY_REJECTED) { + assertThat(rejected) + .as("DB should reject unknown SAI option key 'profile'") + .isTrue(); + } + } +} diff --git a/src/test/java/io/stargate/sgv2/jsonapi/service/schema/tables/ApiVectorIndexTest.java b/src/test/java/io/stargate/sgv2/jsonapi/service/schema/tables/ApiVectorIndexTest.java index 499c0d8dd4..6dfcf1b976 100644 --- a/src/test/java/io/stargate/sgv2/jsonapi/service/schema/tables/ApiVectorIndexTest.java +++ b/src/test/java/io/stargate/sgv2/jsonapi/service/schema/tables/ApiVectorIndexTest.java @@ -21,17 +21,22 @@ import org.junit.jupiter.params.provider.ValueSource; /** - * Unit tests for the structured {@code vectorIndexing} ({@code {profile, options}}) handling on - * {@link ApiVectorIndex}: that a request body deserializes to the expected object, how it is - * validated and turned into the CQL index options map, and how it is described back. Deterministic; - * needs no database (end-to-end also depends on the backend allowing custom SAI parameters). + * Unit tests for the overloaded {@code vectorIndexing} (a profile name string or a raw SAI + * options object) handling on {@link ApiVectorIndex}: that a request body deserializes to the + * expected value, how it is validated and turned into the CQL index options map, and how it is + * described back. Deterministic; needs no database (end-to-end also depends on the backend allowing + * custom SAI parameters). */ class ApiVectorIndexTest { private static final ObjectMapper MAPPER = new ObjectMapper(); - private static VectorIndexingDesc vi(String profile, Map options) { - return new VectorIndexingDesc(profile, options); + private static VectorIndexingDesc profile(String profile) { + return VectorIndexingDesc.ofProfile(profile); + } + + private static VectorIndexingDesc options(Map options) { + return VectorIndexingDesc.ofOptions(options); } /** Source of every option that has a dedicated field and so is rejected inside options. */ @@ -39,40 +44,40 @@ static Stream reservedOptions() { return VectorConstants.CQLAnnIndex.RESERVED_OPTIONS.stream(); } - /** The request body deserializes into the expected {@code vectorIndexing} object. */ + /** + * The overloaded {@code vectorIndexing} deserializes by JSON type: a string is a profile, an + * object is raw options, and anything else is rejected. + */ @Nested class RequestShape { @Test - void deserializesProfileAndOptions() throws Exception { + void stringDeserializesToProfile() throws Exception { var opts = MAPPER.readValue( - """ - { - "vectorIndexing": { - "profile": "small-high-recall", - "options": { "maximum_node_connections": 32, "enable_hierarchy": true } - } - } - """, - VectorIndexDescOptions.class); + "{\"vectorIndexing\": \"small-high-recall\"}", VectorIndexDescOptions.class); assertThat(opts.vectorIndexing()).isNotNull(); assertThat(opts.vectorIndexing().profile()).isEqualTo("small-high-recall"); - assertThat(opts.vectorIndexing().options()) - .containsEntry("maximum_node_connections", 32) - .containsEntry("enable_hierarchy", true); + assertThat(opts.vectorIndexing().options()).isNull(); } @Test - void deserializesProfileOnly() throws Exception { + void objectDeserializesToRawOptions() throws Exception { var opts = MAPPER.readValue( - "{\"vectorIndexing\": {\"profile\": \"big-low-latency\"}}", + """ + { + "vectorIndexing": { "maximum_node_connections": 32, "enable_hierarchy": true } + } + """, VectorIndexDescOptions.class); - assertThat(opts.vectorIndexing().profile()).isEqualTo("big-low-latency"); - assertThat(opts.vectorIndexing().options()).isNull(); + assertThat(opts.vectorIndexing()).isNotNull(); + assertThat(opts.vectorIndexing().profile()).isNull(); + assertThat(opts.vectorIndexing().options()) + .containsEntry("maximum_node_connections", 32) + .containsEntry("enable_hierarchy", true); } @Test @@ -81,9 +86,36 @@ void absentVectorIndexingIsNull() throws Exception { assertThat(opts.vectorIndexing()).isNull(); } + + @ParameterizedTest + @ValueSource(strings = {"123", "true", "[\"small-high-recall\"]"}) + void nonStringNonObjectRejected(String value) { + // Jackson may surface the deserializer's SchemaException directly or wrap it; assert that a + // SchemaException with the expected code is somewhere in the chain. + assertThatThrownBy( + () -> + MAPPER.readValue( + "{\"vectorIndexing\": " + value + "}", VectorIndexDescOptions.class)) + .satisfies( + t -> { + var schemaException = findSchemaException(t); + assertThat(schemaException).as("a SchemaException in the cause chain").isNotNull(); + assertThat(schemaException.code) + .isEqualTo(SchemaException.Code.INVALID_VECTOR_INDEXING_OPTIONS.name()); + }); + } + + private SchemaException findSchemaException(Throwable t) { + for (Throwable cause = t; cause != null; cause = cause.getCause()) { + if (cause instanceof SchemaException schemaException) { + return schemaException; + } + } + return null; + } } - /** A {@code vectorIndexing} object resolves to the expected CQL index options map. */ + /** A {@code vectorIndexing} value resolves to the expected CQL index options map. */ @Nested class ApplyIndexingOptions { @@ -97,11 +129,10 @@ void nullIsNoOp() { } @Test - void emptyDescIsNoOp() { + void emptyOptionsIsNoOp() { var options = new HashMap(); - ApiVectorIndex.applyIndexingOptions(options, vi(null, null)); - ApiVectorIndex.applyIndexingOptions(options, vi(null, Map.of())); + ApiVectorIndex.applyIndexingOptions(options, options(Map.of())); assertThat(options).isEmpty(); } @@ -110,7 +141,7 @@ void emptyDescIsNoOp() { void profileExpands() { var options = new HashMap(); - ApiVectorIndex.applyIndexingOptions(options, vi("small-high-recall", null)); + ApiVectorIndex.applyIndexingOptions(options, profile("small-high-recall")); assertThat(options) .containsAllEntriesOf(VectorIndexProfiles.forName("small-high-recall").orElseThrow()); @@ -122,8 +153,7 @@ void optionsApplied() { ApiVectorIndex.applyIndexingOptions( options, - vi( - null, + options( Map.of("maximum_node_connections", 32, "enable_hierarchy", true, "alpha", "1.2"))); assertThat(options) @@ -132,26 +162,12 @@ void optionsApplied() { .containsEntry("alpha", "1.2"); } - @Test - void optionsOverrideProfile() { - var options = new HashMap(); - - // small-high-recall sets maximum_node_connections=32, construction_beam_width=200 - ApiVectorIndex.applyIndexingOptions( - options, vi("small-high-recall", Map.of("maximum_node_connections", 99))); - - assertThat(options) - .containsEntry("maximum_node_connections", "99") // explicit option wins - .containsEntry("construction_beam_width", "200"); // inherited from the profile - } - @Test void mergesWithExistingOptions() { var options = new HashMap(); options.put(VectorConstants.CQLAnnIndex.SOURCE_MODEL, "OTHER"); - ApiVectorIndex.applyIndexingOptions( - options, vi(null, Map.of("maximum_node_connections", 16))); + ApiVectorIndex.applyIndexingOptions(options, options(Map.of("maximum_node_connections", 16))); assertThat(options) .containsEntry(VectorConstants.CQLAnnIndex.SOURCE_MODEL, "OTHER") @@ -164,8 +180,7 @@ void allAllowedOptionsAccepted() { ApiVectorIndex.applyIndexingOptions( options, - vi( - null, + options( Map.of( "maximum_node_connections", 16, @@ -185,12 +200,12 @@ void allAllowedOptionsAccepted() { @Test void unknownProfileThrows() { assertSchemaError( - vi("no-such-profile", null), SchemaException.Code.UNKNOWN_VECTOR_INDEXING_PROFILE); + profile("no-such-profile"), SchemaException.Code.UNKNOWN_VECTOR_INDEXING_PROFILE); } @Test void blankProfileThrows() { - assertSchemaError(vi("", null), SchemaException.Code.UNKNOWN_VECTOR_INDEXING_PROFILE); + assertSchemaError(profile(""), SchemaException.Code.UNKNOWN_VECTOR_INDEXING_PROFILE); } @ParameterizedTest @@ -198,7 +213,7 @@ void blankProfileThrows() { "io.stargate.sgv2.jsonapi.service.schema.tables.ApiVectorIndexTest#reservedOptions") void reservedOptionThrows(String reservedOption) { assertSchemaError( - vi(null, Map.of(reservedOption, "x")), + options(Map.of(reservedOption, "x")), SchemaException.Code.INVALID_VECTOR_INDEXING_OPTIONS); } @@ -206,7 +221,7 @@ void reservedOptionThrows(String reservedOption) { @ValueSource(strings = {"class_name", "target", "optimize_for", "bogus_option"}) void unsupportedOptionThrows(String optionName) { assertSchemaError( - vi(null, Map.of(optionName, "x")), SchemaException.Code.INVALID_VECTOR_INDEXING_OPTIONS); + options(Map.of(optionName, "x")), SchemaException.Code.INVALID_VECTOR_INDEXING_OPTIONS); } @Test @@ -216,8 +231,7 @@ void numericOptionsUsePlainString() { // JSON numbers arrive as BigDecimal; the CQL value must not use scientific notation. ApiVectorIndex.applyIndexingOptions( options, - vi( - null, + options( Map.of( "construction_beam_width", new BigDecimal("1E+2"), "alpha", new BigDecimal("1.5")))); @@ -231,10 +245,10 @@ void numericOptionsUsePlainString() { void nonScalarOptionValueThrows() { // "alpha" is an allowed key, so this reaches the scalar-value check assertSchemaError( - vi(null, Map.of("alpha", List.of(1, 2))), + options(Map.of("alpha", List.of(1, 2))), SchemaException.Code.INVALID_VECTOR_INDEXING_OPTIONS); assertSchemaError( - vi(null, Map.of("alpha", Map.of("x", 1))), + options(Map.of("alpha", Map.of("x", 1))), SchemaException.Code.INVALID_VECTOR_INDEXING_OPTIONS); } @@ -342,7 +356,7 @@ void applyThenDescribe() { var options = new HashMap(); ApiVectorIndex.applyIndexingOptions( - options, vi(null, Map.of("maximum_node_connections", 32, "alpha", 1.2))); + options, options(Map.of("maximum_node_connections", 32, "alpha", 1.2))); var described = ApiVectorIndex.describeIndexingOptions(options); assertThat(described).isNotNull(); From 14a8f10941775f660f1836c580563af6e63f423e Mon Sep 17 00:00:00 2001 From: Eric Hare Date: Mon, 22 Jun 2026 12:11:44 -0700 Subject: [PATCH 08/13] style: apply fmt to VectorIndexUnknownOptionProbeIntegrationTest --- ...ndexUnknownOptionProbeIntegrationTest.java | 33 +++++++++---------- 1 file changed, 16 insertions(+), 17 deletions(-) diff --git a/src/test/java/io/stargate/sgv2/jsonapi/api/v1/tables/VectorIndexUnknownOptionProbeIntegrationTest.java b/src/test/java/io/stargate/sgv2/jsonapi/api/v1/tables/VectorIndexUnknownOptionProbeIntegrationTest.java index 84eea157ed..5c3948dbf7 100644 --- a/src/test/java/io/stargate/sgv2/jsonapi/api/v1/tables/VectorIndexUnknownOptionProbeIntegrationTest.java +++ b/src/test/java/io/stargate/sgv2/jsonapi/api/v1/tables/VectorIndexUnknownOptionProbeIntegrationTest.java @@ -2,7 +2,6 @@ import static org.assertj.core.api.Assertions.assertThat; import static org.assertj.core.api.Assertions.assertThatCode; -import static org.assertj.core.api.Assertions.assertThatThrownBy; import com.datastax.oss.driver.api.core.CqlSession; import com.datastax.oss.driver.api.core.cql.SimpleStatement; @@ -16,22 +15,22 @@ import org.junit.jupiter.api.TestMethodOrder; /** - * EMPIRICAL PROBE (issue #2487): Does the backing DB (DSE 6.9 / HCD) accept an UNKNOWN option KEY in - * a vector SAI index's {@code CREATE CUSTOM INDEX ... WITH OPTIONS}? + * EMPIRICAL PROBE (issue #2487): Does the backing DB (DSE 6.9 / HCD) accept an UNKNOWN option KEY + * in a vector SAI index's {@code CREATE CUSTOM INDEX ... WITH OPTIONS}? * *

This BYPASSES data-api's own {@code ApiVectorIndex.applyIndexingOptions} allow-list by issuing * RAW CQL directly against the running test container via the admin {@link CqlSession} provided by - * {@link AbstractKeyspaceIntegrationTestBase} (driver session, {@code cassandra/cassandra}). It does - * NOT go through the data-api HTTP command layer. + * {@link AbstractKeyspaceIntegrationTestBase} (driver session, {@code cassandra/cassandra}). It + * does NOT go through the data-api HTTP command layer. * *

Hypothesis: a key SAI has never heard of (here {@code profile}) should be rejected by SAI's - * option validation regardless of {@code SAI_HNSW_ALLOW_CUSTOM_PARAMETERS} (that flag only gates the - * KNOWN custom HNSW tuning params like {@code maximum_node_connections}). The control index, using - * only {@code similarity_function:cosine}, must succeed to prove the table/column/CQL is otherwise - * valid. + * option validation regardless of {@code SAI_HNSW_ALLOW_CUSTOM_PARAMETERS} (that flag only gates + * the KNOWN custom HNSW tuning params like {@code maximum_node_connections}). The control index, + * using only {@code similarity_function:cosine}, must succeed to prove the table/column/CQL is + * otherwise valid. * - *

The test is written to ALWAYS PASS while RECORDING the observed behavior to stdout, so the probe - * never fails CI ambiguously; flip {@code EXPECT_UNKNOWN_KEY_REJECTED} to turn it into a hard + *

The test is written to ALWAYS PASS while RECORDING the observed behavior to stdout, so the + * probe never fails CI ambiguously; flip {@code EXPECT_UNKNOWN_KEY_REJECTED} to turn it into a hard * assertion once the empirical answer is known. */ @QuarkusIntegrationTest @@ -46,7 +45,9 @@ class VectorIndexUnknownOptionProbeIntegrationTest extends AbstractKeyspaceInteg private static final String VECTOR_COL = "embedding"; private static final int DIMENSION = 4; - /** Reflective accessor to the private CqlSession in the base class, for direct error inspection. */ + /** + * Reflective accessor to the private CqlSession in the base class, for direct error inspection. + */ private CqlSession session() { try { Method m = AbstractKeyspaceIntegrationTestBase.class.getDeclaredMethod("createDriverSession"); @@ -111,8 +112,8 @@ void testIndex_unknownOptionKey_recordAcceptOrReject() { boolean rejected = thrown != null; System.out.println("================================================================="); - System.out.println("[VECTOR-INDEX-UNKNOWN-OPTION PROBE] unknown key 'profile' rejected=" - + rejected); + System.out.println( + "[VECTOR-INDEX-UNKNOWN-OPTION PROBE] unknown key 'profile' rejected=" + rejected); if (rejected) { System.out.println("[PROBE] rejection class : " + thrown.getClass().getName()); System.out.println("[PROBE] rejection message: " + thrown.getMessage()); @@ -122,9 +123,7 @@ void testIndex_unknownOptionKey_recordAcceptOrReject() { System.out.println("================================================================="); if (EXPECT_UNKNOWN_KEY_REJECTED) { - assertThat(rejected) - .as("DB should reject unknown SAI option key 'profile'") - .isTrue(); + assertThat(rejected).as("DB should reject unknown SAI option key 'profile'").isTrue(); } } } From 495a4592fc70d1bcddf3f35d82f0e466aa5b36e4 Mon Sep 17 00:00:00 2001 From: Eric Hare Date: Mon, 22 Jun 2026 12:21:45 -0700 Subject: [PATCH 09/13] docs: tighten and de-AI comments on the vectorIndexing changes Comment-only edits across the PR: shorter, plainer wording; drop em-dashes, filler, and obvious narration while keeping the {@link} refs, issue references, and the non-obvious rationale. --- .../VectorIndexingDescDeserializer.java | 20 +++++----- .../indexes/VectorIndexDefinitionDesc.java | 9 ++--- .../config/constants/SchemaConstants.java | 2 +- .../config/constants/VectorConstants.java | 7 ++-- .../cqldriver/executor/TableExtensions.java | 31 ++++++++-------- .../VectorIndexProfileDefinition.java | 15 ++++---- .../tables/DropVectorIndexProfileDBTask.java | 18 ++++----- .../resolver/AlterTableCommandResolver.java | 4 +- .../CreateVectorIndexCommandResolver.java | 22 +++++------ .../resolver/DropIndexCommandResolver.java | 10 ++--- .../service/schema/KeyspaceSchemaObject.java | 7 ++-- .../service/schema/tables/ApiVectorIndex.java | 28 +++++++------- .../schema/tables/VectorIndexProfiles.java | 8 ++-- ...ndexUnknownOptionProbeIntegrationTest.java | 37 ++++++++----------- .../executor/TableExtensionsTest.java | 6 +-- .../VectorIndexProfileDefinitionTest.java | 2 +- .../DropVectorIndexProfileDBTaskTest.java | 2 +- .../schema/tables/ApiVectorIndexTest.java | 21 ++++++----- 18 files changed, 117 insertions(+), 132 deletions(-) diff --git a/src/main/java/io/stargate/sgv2/jsonapi/api/model/command/deserializers/VectorIndexingDescDeserializer.java b/src/main/java/io/stargate/sgv2/jsonapi/api/model/command/deserializers/VectorIndexingDescDeserializer.java index e330f1ddda..50b76e9101 100644 --- a/src/main/java/io/stargate/sgv2/jsonapi/api/model/command/deserializers/VectorIndexingDescDeserializer.java +++ b/src/main/java/io/stargate/sgv2/jsonapi/api/model/command/deserializers/VectorIndexingDescDeserializer.java @@ -13,19 +13,19 @@ import java.util.Map; /** - * Deserializes the overloaded {@code vectorIndexing} value, which is either: + * Deserializes the overloaded {@code vectorIndexing} value, either: * *

    - *
  • a JSON string → a named profile the API expands into SAI options, e.g. + *
  • a JSON string: a named profile expanded into SAI options, e.g. * "vectorIndexing": "small-high-recall" - *
  • a JSON object → raw Cassandra SAI tuning options set directly, e.g. + *
  • a JSON object: raw Cassandra SAI tuning options, e.g. * "vectorIndexing": { "maximum_node_connections": 32 } *
* - * Anything else (number, boolean, array, null token) is a request error. This mirrors the design in - * #2508: the field is overloaded by - * JSON type rather than carrying separate {@code profile} / {@code options} sub-keys, so a profile - * and raw options are mutually exclusive in a single request. + *

Anything else (number, boolean, array, null token) is a request error. Per #2508 the field is overloaded by JSON + * type rather than separate {@code profile} / {@code options} sub-keys, so profile and raw options + * are mutually exclusive in one request. */ public class VectorIndexingDescDeserializer extends StdDeserializer { @@ -42,12 +42,12 @@ public VectorIndexingDesc deserialize( JsonNode node = deserializationContext.readTree(jsonParser); if (node.isTextual()) { - // "vectorIndexing": "small-high-recall" -> a named profile (validated at apply time). + // named profile, validated at apply time return VectorIndexingDesc.ofProfile(node.textValue()); } if (node.isObject()) { - // "vectorIndexing": { "maximum_node_connections": 32 } -> raw SAI options. convertValue - // honours the mapper config (e.g. float handling) just as a Map field would. + // raw SAI options. convertValue applies the mapper config (e.g. float handling), as a + // Map field would Map options = ((ObjectMapper) jsonParser.getCodec()).convertValue(node, OPTIONS_TYPE); return VectorIndexingDesc.ofOptions(options); diff --git a/src/main/java/io/stargate/sgv2/jsonapi/api/model/command/table/definition/indexes/VectorIndexDefinitionDesc.java b/src/main/java/io/stargate/sgv2/jsonapi/api/model/command/table/definition/indexes/VectorIndexDefinitionDesc.java index 2defb26def..af18eac40c 100644 --- a/src/main/java/io/stargate/sgv2/jsonapi/api/model/command/table/definition/indexes/VectorIndexDefinitionDesc.java +++ b/src/main/java/io/stargate/sgv2/jsonapi/api/model/command/table/definition/indexes/VectorIndexDefinitionDesc.java @@ -81,10 +81,9 @@ public record VectorIndexDescOptions( VectorIndexingDesc vectorIndexing) {} /** - * The overloaded {@code vectorIndexing} value: exactly one of a named {@code profile} (a JSON - * string) or raw SAI tuning {@code options} (a JSON object) is set. Deserialized by {@link - * VectorIndexingDescDeserializer} (discriminated by JSON type) and serialized back to the bare - * string or object via {@link #jsonValue()}. + * Overloaded {@code vectorIndexing} value: exactly one of a named {@code profile} (JSON string) + * or raw SAI tuning {@code options} (JSON object) is set. {@link VectorIndexingDescDeserializer} + * discriminates by JSON type; {@link #jsonValue()} serializes back to the bare string or object. */ @JsonDeserialize(using = VectorIndexingDescDeserializer.class) public record VectorIndexingDesc( @@ -100,7 +99,7 @@ public static VectorIndexingDesc ofOptions(Map options) { return new VectorIndexingDesc(null, options); } - /** Serializes back to the overloaded shape: the bare profile string or the bare options map. */ + /** Serializes to the bare profile string or the bare options map. */ @JsonValue Object jsonValue() { return profile != null ? profile : options; diff --git a/src/main/java/io/stargate/sgv2/jsonapi/config/constants/SchemaConstants.java b/src/main/java/io/stargate/sgv2/jsonapi/config/constants/SchemaConstants.java index 3430f7e9cd..9ea6a4c00c 100644 --- a/src/main/java/io/stargate/sgv2/jsonapi/config/constants/SchemaConstants.java +++ b/src/main/java/io/stargate/sgv2/jsonapi/config/constants/SchemaConstants.java @@ -11,7 +11,7 @@ interface MetadataFieldsNames { String SCHEMA_TYPE = "com.datastax.data-api.schema-type"; String SCHEMA_VERSION = "com.datastax.data-api.schema-def-version"; String VECTORIZE_CONFIG = "com.datastax.data-api.vectorize-config"; - // Per vector-index record of the profile it was created with (name + expanded options). + // Per vector-index, the profile it was created with (name + expanded options). String VECTOR_INDEX_PROFILES = "com.datastax.data-api.vector-index-profiles"; } diff --git a/src/main/java/io/stargate/sgv2/jsonapi/config/constants/VectorConstants.java b/src/main/java/io/stargate/sgv2/jsonapi/config/constants/VectorConstants.java index 9a79d8b23c..a7d1b19ccc 100644 --- a/src/main/java/io/stargate/sgv2/jsonapi/config/constants/VectorConstants.java +++ b/src/main/java/io/stargate/sgv2/jsonapi/config/constants/VectorConstants.java @@ -15,7 +15,7 @@ interface Vectorize extends ServiceDescConstants {} /** * CQL {@code WITH OPTIONS} keys for a vector (SAI) index. {@link #SOURCE_MODEL} and {@link - * #SIMILARITY_FUNCTION} have dedicated API fields ({@code sourceModel} / {@code metric}); the + * #SIMILARITY_FUNCTION} map to dedicated API fields ({@code sourceModel} / {@code metric}); the * rest are tuning options set via {@code vectorIndexing.options}. */ interface CQLAnnIndex { @@ -34,9 +34,8 @@ interface CQLAnnIndex { Set RESERVED_OPTIONS = Set.of(SOURCE_MODEL, SIMILARITY_FUNCTION); /** - * The SAI tuning options a user may set through {@code vectorIndexing.options}. Excludes the - * dedicated-field options and the structural ones. {@code optimize_for} exists in OSS Cassandra - * but is de-emphasised in DSE 6.9 / HCD, so it is intentionally left out for now. + * SAI tuning options settable through {@code vectorIndexing.options}. {@code optimize_for} + * exists in OSS Cassandra but is de-emphasized in DSE 6.9 / HCD, so it is left out for now. */ Set ALLOWED_OPTIONS = Set.of( diff --git a/src/main/java/io/stargate/sgv2/jsonapi/service/cqldriver/executor/TableExtensions.java b/src/main/java/io/stargate/sgv2/jsonapi/service/cqldriver/executor/TableExtensions.java index f713e7d892..b1546c4e2e 100644 --- a/src/main/java/io/stargate/sgv2/jsonapi/service/cqldriver/executor/TableExtensions.java +++ b/src/main/java/io/stargate/sgv2/jsonapi/service/cqldriver/executor/TableExtensions.java @@ -75,8 +75,8 @@ public static Map createCustomProperties( * Builds the table extensions payload: schema type/version (always written, since the command may * be altering a CQL-created table) plus the vectorize config and vector index profiles. * - *

Extensions are fully replaced on every write, so callers must pass the complete set of defs - * and profiles they want to keep; anything omitted is dropped. + *

Extensions are fully replaced on every write, so callers must pass every def and profile + * they want to keep; anything omitted is dropped. */ public static Map createCustomProperties( Map vectorDefs, @@ -118,17 +118,17 @@ public static Map createCustomProperties( /** * Computes the extensions payload that drops {@code indexName}'s vector-index profile from the - * table that owns it. Used to keep the {@link - * SchemaConstants.MetadataFieldsNames#VECTOR_INDEX_PROFILES} extension in sync when an index is - * dropped, so a profile record does not outlive its index. + * table that owns it, keeping the {@link + * SchemaConstants.MetadataFieldsNames#VECTOR_INDEX_PROFILES} extension in sync so a profile does + * not outlive its index. * - *

The owning table is found by scanning {@code keyspaceMetadata} for the table whose indexes - * contain {@code indexName}. Returns empty when there is nothing to do — no table owns the index, - * or the owning table has no stored profile for it — so the caller can skip the extra DDL. + *

The owning table is the one in {@code keyspaceMetadata} whose indexes contain {@code + * indexName}. Returns empty (so the caller can skip the DDL) when no table owns the index or the + * owning table has no stored profile for it. * - *

When a rewrite is needed, the existing vectorize config and the other indexes' profiles are - * read back and included so the clobbering extension write does not lose them (the same approach - * as the create side, see {@link #createCustomProperties(Map, Map, ObjectMapper)}). + *

On rewrite the existing vectorize config and the other indexes' profiles are read back and + * included so the full-replace write does not lose them, as on the create side (see {@link + * #createCustomProperties(Map, Map, ObjectMapper)}). */ public static Optional removeIndexProfile( KeyspaceMetadata keyspaceMetadata, CqlIdentifier indexName, ObjectMapper objectMapper) { @@ -152,9 +152,8 @@ profiles, cqlIdentifierToJsonKey(indexName), null)) { return Optional.empty(); } - // Read the vectorize config back so the full-replace extension write preserves it. The stored - // keys are the column identifiers' internal form, so reconstruct the CqlIdentifier keys that - // createCustomProperties expects. + // Read the vectorize config back so the full-replace write preserves it. Stored keys are the + // columns' internal form, so rebuild the CqlIdentifier keys createCustomProperties expects. var vectorDefs = VectorizeDefinition.from(tableMetadata, objectMapper).entrySet().stream() .collect( @@ -166,8 +165,8 @@ profiles, cqlIdentifierToJsonKey(indexName), null)) { } /** - * The result of {@link #removeIndexProfile}: the table to alter and the complete extensions - * payload to write (with the dropped index's profile removed and everything else preserved). + * Result of {@link #removeIndexProfile}: the table to alter and the extensions payload to write + * (dropped index's profile removed, everything else preserved). */ public record IndexProfileRemoval( CqlIdentifier tableName, Map customProperties) {} diff --git a/src/main/java/io/stargate/sgv2/jsonapi/service/cqldriver/executor/VectorIndexProfileDefinition.java b/src/main/java/io/stargate/sgv2/jsonapi/service/cqldriver/executor/VectorIndexProfileDefinition.java index b2d67e903f..e6638d559d 100644 --- a/src/main/java/io/stargate/sgv2/jsonapi/service/cqldriver/executor/VectorIndexProfileDefinition.java +++ b/src/main/java/io/stargate/sgv2/jsonapi/service/cqldriver/executor/VectorIndexProfileDefinition.java @@ -13,9 +13,8 @@ /** * The profile a vector index was created with: the profile name plus the SAI options it expanded * to. Stored per index name in the table extensions (key {@link - * SchemaConstants.MetadataFieldsNames#VECTOR_INDEX_PROFILES}) so the friendly name is not lost once - * the options are expanded at create time. The options snapshot keeps the record meaningful even if - * the profile definition changes later. + * SchemaConstants.MetadataFieldsNames#VECTOR_INDEX_PROFILES}) to keep the profile name. The options + * snapshot stays valid even if the profile definition changes later. */ public record VectorIndexProfileDefinition(String profile, Map options) { @@ -30,9 +29,9 @@ public static Map from( } /** - * Parses the {@code index name -> profile} JSON written into the extensions. Returns a mutable - * map so callers can merge changes before writing it back. Profiles are advisory metadata, so a - * bad blob is logged and skipped rather than failing the read. + * Parses the {@code index name -> profile} JSON from the extensions. Returns a mutable map so + * callers can merge changes before writing it back. Profiles are advisory metadata, so a bad blob + * is logged and skipped, not failed. */ static Map fromJson( String json, ObjectMapper objectMapper) { @@ -56,8 +55,8 @@ static Map fromJson( /** * Records the profile for {@code indexKey} in {@code profiles}, or removes any stale entry when - * {@code def} is null (no profile was used). Returns true if the map changed, so the caller can - * skip an unnecessary extension write. + * {@code def} is null (no profile was used). Returns true if the map changed, letting the caller + * skip an unneeded extension write. */ public static boolean putOrRemove( Map profiles, diff --git a/src/main/java/io/stargate/sgv2/jsonapi/service/operation/tables/DropVectorIndexProfileDBTask.java b/src/main/java/io/stargate/sgv2/jsonapi/service/operation/tables/DropVectorIndexProfileDBTask.java index c14079814c..011b9b601e 100644 --- a/src/main/java/io/stargate/sgv2/jsonapi/service/operation/tables/DropVectorIndexProfileDBTask.java +++ b/src/main/java/io/stargate/sgv2/jsonapi/service/operation/tables/DropVectorIndexProfileDBTask.java @@ -12,14 +12,14 @@ import java.util.Objects; /** - * Removes a dropped index's entry from its owning table's vector-index-profiles extension, so a - * profile record does not outlive the index it described. + * Removes a dropped index's entry from its owning table's vector-index-profiles extension, so the + * profile record does not outlive the index. * - *

This runs as a keyspace-scoped sibling to {@link DropIndexDBTask} so the two can share one - * {@link io.stargate.sgv2.jsonapi.service.operation.tasks.TaskGroup}; a {@link - * io.stargate.sgv2.jsonapi.service.schema.tables.TableSchemaObject}-typed {@link AlterTableDBTask} - * (used by the create side) could not, because a TaskGroup has a single schema-object type. The - * owning table and the rewritten extensions payload are resolved at command-resolve time via {@link + *

Keyspace-scoped so it can share a {@link + * io.stargate.sgv2.jsonapi.service.operation.tasks.TaskGroup} with {@link DropIndexDBTask}; a + * {@link io.stargate.sgv2.jsonapi.service.schema.tables.TableSchemaObject}-typed {@link + * AlterTableDBTask} (the create side) cannot, since a TaskGroup has a single schema-object type. + * Owning table and rewritten extensions payload are resolved at command-resolve time via {@link * TableExtensions#removeIndexProfile}; this task only issues the {@code ALTER TABLE ... WITH * extensions = {...}}. */ @@ -50,8 +50,8 @@ public static DropVectorIndexProfileDBTaskBuilder builder(KeyspaceSchemaObject s @Override protected SimpleStatement buildStatement() { - // The owning table lives in this keyspace; take the keyspace from the schema object identifier, - // mirroring DropIndexDBTask which builds its statement the same way. + // owning table lives in this keyspace; keyspace from the schema object identifier, as + // DropIndexDBTask does var extensions = TableExtensions.toExtensions(customProperties); return alterTable(schemaObject.identifier().keyspace(), tableName) .withOption(TableExtensions.TABLE_OPTIONS_EXTENSION_KEY.asInternal(), extensions) diff --git a/src/main/java/io/stargate/sgv2/jsonapi/service/resolver/AlterTableCommandResolver.java b/src/main/java/io/stargate/sgv2/jsonapi/service/resolver/AlterTableCommandResolver.java index e52e6a6cfb..2de0e51ca9 100644 --- a/src/main/java/io/stargate/sgv2/jsonapi/service/resolver/AlterTableCommandResolver.java +++ b/src/main/java/io/stargate/sgv2/jsonapi/service/resolver/AlterTableCommandResolver.java @@ -88,8 +88,8 @@ public Operation resolveTableCommand( } /** - * Existing vector-index profiles for the table. Altering columns/vectorize fully replaces the - * extensions, so these must be carried through or they would be wiped. + * Existing vector-index profiles for the table. Altering columns/vectorize replaces the + * extensions wholesale, so these must be carried through to avoid wiping them. */ private Map existingIndexProfiles( TableSchemaObject tableSchemaObject) { diff --git a/src/main/java/io/stargate/sgv2/jsonapi/service/resolver/CreateVectorIndexCommandResolver.java b/src/main/java/io/stargate/sgv2/jsonapi/service/resolver/CreateVectorIndexCommandResolver.java index ba5121f78a..48841723ab 100644 --- a/src/main/java/io/stargate/sgv2/jsonapi/service/resolver/CreateVectorIndexCommandResolver.java +++ b/src/main/java/io/stargate/sgv2/jsonapi/service/resolver/CreateVectorIndexCommandResolver.java @@ -102,10 +102,9 @@ public Operation resolveTableCommand( var createIndexTask = taskBuilder.build(apiIndex); - // If a named profile was used, record the name + the options it resolved to in the table - // extensions so the friendly name survives. Written as a second DDL after the index so a failed - // create leaves no orphan record. Returns null (no extension write) when there is nothing to - // persist or the index already exists (the CREATE IF NOT EXISTS would be a no-op). + // Records a named profile (name plus the options it expanded to) in the table extensions, run + // as a second DDL after the index so a failed create leaves no orphan record. Null when there + // is nothing to persist or the index already exists. var extensionTask = buildProfileExtensionTask(schemaObject, apiIndex, command, schemaRetryPolicy); if (extensionTask == null) { @@ -126,12 +125,12 @@ public Operation resolveTableCommand( } /** - * Builds the ALTER TABLE task that records this index's profile in the table extensions, or null - * when nothing needs to change. Returns null when the index already exists (a {@code CREATE ... - * IF NOT EXISTS} would be a no-op, so its stored profile must not be rewritten), or when no - * profile is used and there is no stale entry to clear. The snapshot stores the options the - * profile expanded to; existing vectorize config and other profiles are read back and rewritten - * so they are not lost. + * Builds the ALTER TABLE task that records this index's profile in the table extensions. + * + *

Returns null when the index already exists (a {@code CREATE ... IF NOT EXISTS} is a no-op, + * so its stored profile must keep matching the live index), or when no profile is used and there + * is no stale entry to clear. The stored options are what the profile expanded to; existing + * vectorize config and other profiles are read back and rewritten so they are not lost. */ private AlterTableDBTask buildProfileExtensionTask( TableSchemaObject schemaObject, @@ -139,8 +138,7 @@ private AlterTableDBTask buildProfileExtensionTask( CreateVectorIndexCommand command, SchemaDBTask.SchemaRetryPolicy schemaRetryPolicy) { - // The create is "IF NOT EXISTS": if the index already exists the create is a no-op, so leave - // its stored profile untouched (it must keep matching the live index options). + // Index already exists: the IF NOT EXISTS create is a no-op, so leave its stored profile alone. if (schemaObject.tableMetadata().getIndexes().containsKey(apiIndex.indexName())) { return null; } diff --git a/src/main/java/io/stargate/sgv2/jsonapi/service/resolver/DropIndexCommandResolver.java b/src/main/java/io/stargate/sgv2/jsonapi/service/resolver/DropIndexCommandResolver.java index c324f34910..27d7dbca3c 100644 --- a/src/main/java/io/stargate/sgv2/jsonapi/service/resolver/DropIndexCommandResolver.java +++ b/src/main/java/io/stargate/sgv2/jsonapi/service/resolver/DropIndexCommandResolver.java @@ -68,9 +68,8 @@ public Operation resolveKeyspaceCommand( command.options(), DropIndexCommand.Options::ifExists, IF_EXISTS_DEFAULT)) .build(); - // Also drop the index's vector-index profile (if any) from the owning table's extensions, so - // the profile record does not outlive the index. Null when the keyspace metadata is unknown or - // the owning table has no stored profile for this index, in which case only the drop runs. + // Drop the index's vector-index profile from the owning table's extensions so it does not + // outlive the index. Null when there is no profile to remove, leaving only the drop. var profileCleanupTask = buildProfileCleanupTask(schemaObject, indexName, schemaRetryPolicy); if (profileCleanupTask == null) { @@ -92,9 +91,8 @@ public Operation resolveKeyspaceCommand( } /** - * Builds the cleanup task that removes the dropped index's profile from its owning table's - * extensions, or null when there is nothing to clean up (keyspace metadata unknown, no owning - * table, or no stored profile for this index). + * Task that removes the dropped index's profile from its owning table's extensions. Null when + * there is nothing to clean up: keyspace metadata unknown, no owning table, or no stored profile. */ private DropVectorIndexProfileDBTask buildProfileCleanupTask( KeyspaceSchemaObject schemaObject, diff --git a/src/main/java/io/stargate/sgv2/jsonapi/service/schema/KeyspaceSchemaObject.java b/src/main/java/io/stargate/sgv2/jsonapi/service/schema/KeyspaceSchemaObject.java index 7f86f5d3ad..57f879d3e1 100644 --- a/src/main/java/io/stargate/sgv2/jsonapi/service/schema/KeyspaceSchemaObject.java +++ b/src/main/java/io/stargate/sgv2/jsonapi/service/schema/KeyspaceSchemaObject.java @@ -41,11 +41,10 @@ public KeyspaceSchemaObject(Tenant tenant, KeyspaceMetadata keyspaceMetadata) { } /** - * The Cassandra metadata for this keyspace, when known. + * The Cassandra metadata for this keyspace. * - *

Empty when the object was built via the {@link - * #KeyspaceSchemaObject(SchemaObjectIdentifier)} test constructor, which carries no metadata. - * Present for objects built from live schema, where it lets callers reach the keyspace's tables + *

Empty for objects built via the {@link #KeyspaceSchemaObject(SchemaObjectIdentifier)} test + * constructor; present for objects built from live schema. Gives access to the keyspace's tables * and their indexes (e.g. to find the table that owns a named index). */ public Optional keyspaceMetadata() { diff --git a/src/main/java/io/stargate/sgv2/jsonapi/service/schema/tables/ApiVectorIndex.java b/src/main/java/io/stargate/sgv2/jsonapi/service/schema/tables/ApiVectorIndex.java index f36ded4d2c..82711247da 100644 --- a/src/main/java/io/stargate/sgv2/jsonapi/service/schema/tables/ApiVectorIndex.java +++ b/src/main/java/io/stargate/sgv2/jsonapi/service/schema/tables/ApiVectorIndex.java @@ -81,13 +81,12 @@ public VectorIndexDefinitionDesc definition() { } /** - * Builds the {@code vectorIndexing} description from the CQL index options map: the supported - * tuning options (see {@link VectorConstants.CQLAnnIndex#ALLOWED_OPTIONS}) under {@code options}. - * Structural, dedicated-field, and any other (e.g. CQL-only) keys are omitted so the description - * stays symmetric with what the API accepts. The profile name is not reconstructed here (it lives - * in the table extensions), so only {@code options} is set. + * Builds the {@code vectorIndexing} description from the CQL index options map, keeping only the + * supported tuning options (see {@link VectorConstants.CQLAnnIndex#ALLOWED_OPTIONS}) under {@code + * options}. Structural, dedicated-field, and CQL-only keys are dropped to stay symmetric with + * what the API accepts. The profile name lives in the table extensions, not here, so only {@code + * options} is set. * - * @param indexOptions the CQL index options map * @return the {@code vectorIndexing} description, or null when there are no supported tuning * options */ @@ -101,9 +100,9 @@ static VectorIndexDefinitionDesc.VectorIndexingDesc describeIndexingOptions( } /** - * The supported SAI tuning options actually applied to this index (profile expansion plus any - * explicit overrides). Used to snapshot the resolved options next to a stored profile name, so - * the snapshot matches the live index rather than the base profile. + * The supported SAI tuning options applied to this index (profile expansion plus explicit + * overrides). Snapshotted next to a stored profile name to capture the live index rather than the + * base profile. */ public Map appliedTuningOptions() { return tuningOptions(indexOptions); @@ -123,10 +122,9 @@ static Map tuningOptions(Map indexOptions) { /** * Applies the request's {@code vectorIndexing} into the CQL index options map. {@code - * vectorIndexing} is overloaded: either a {@code profile} name expanded via {@link - * VectorIndexProfiles}, or an {@code options} object of Cassandra SAI tuning options (validated - * against {@link VectorConstants.CQLAnnIndex#ALLOWED_OPTIONS}) set directly — the two are - * mutually exclusive (see {@link + * vectorIndexing} is either a {@code profile} name expanded via {@link VectorIndexProfiles}, or + * an {@code options} object of Cassandra SAI tuning options validated against {@link + * VectorConstants.CQLAnnIndex#ALLOWED_OPTIONS}. The two are mutually exclusive (see {@link * io.stargate.sgv2.jsonapi.api.model.command.deserializers.VectorIndexingDescDeserializer}). * {@code source_model} / {@code similarity_function} have dedicated fields and are rejected here. * @@ -373,8 +371,8 @@ public ApiVectorIndex create( metricToUse); } - // Apply vectorIndexing (either a profile name or raw options, mutually exclusive); metric / - // sourceModel above have dedicated fields. + // vectorIndexing is a profile name or raw options (mutually exclusive); metric / sourceModel + // above use dedicated fields. var userVectorIndexing = (indexDesc.options() == null) ? null : indexDesc.options().vectorIndexing(); applyIndexingOptions(indexOptions, userVectorIndexing); diff --git a/src/main/java/io/stargate/sgv2/jsonapi/service/schema/tables/VectorIndexProfiles.java b/src/main/java/io/stargate/sgv2/jsonapi/service/schema/tables/VectorIndexProfiles.java index 0a51ef54bf..978ea95228 100644 --- a/src/main/java/io/stargate/sgv2/jsonapi/service/schema/tables/VectorIndexProfiles.java +++ b/src/main/java/io/stargate/sgv2/jsonapi/service/schema/tables/VectorIndexProfiles.java @@ -6,14 +6,14 @@ import java.util.Set; /** - * Named vector-index profiles: each maps a profile name to a set of Cassandra SAI indexing options. - * Selecting a profile is an alternative to passing raw options through {@code vectorIndexing}. + * Named vector-index profiles, each mapping a profile name to Cassandra SAI indexing options. An + * alternative to passing raw options through {@code vectorIndexing}. * *

Profiles never set {@code source_model} or {@code similarity_function}; those have the * dedicated {@code sourceModel} / {@code metric} fields. Values are Strings because CQL index * options are a {@code Map}. * - *

This is an initial in-code set; the values are expected to be tuned and moved to config. + *

Initial in-code set; values to be tuned and moved to config. */ public final class VectorIndexProfiles { @@ -30,7 +30,7 @@ private VectorIndexProfiles() {} Map.of(VectorConstants.CQLAnnIndex.MAXIMUM_NODE_CONNECTIONS, "16")); /** - * Looks up a profile by name, case-insensitively. + * Case-insensitive profile lookup. * * @return the profile's CQL options, or empty if {@code name} is null, blank, or unknown */ diff --git a/src/test/java/io/stargate/sgv2/jsonapi/api/v1/tables/VectorIndexUnknownOptionProbeIntegrationTest.java b/src/test/java/io/stargate/sgv2/jsonapi/api/v1/tables/VectorIndexUnknownOptionProbeIntegrationTest.java index 5c3948dbf7..da32ed0b8c 100644 --- a/src/test/java/io/stargate/sgv2/jsonapi/api/v1/tables/VectorIndexUnknownOptionProbeIntegrationTest.java +++ b/src/test/java/io/stargate/sgv2/jsonapi/api/v1/tables/VectorIndexUnknownOptionProbeIntegrationTest.java @@ -15,23 +15,20 @@ import org.junit.jupiter.api.TestMethodOrder; /** - * EMPIRICAL PROBE (issue #2487): Does the backing DB (DSE 6.9 / HCD) accept an UNKNOWN option KEY - * in a vector SAI index's {@code CREATE CUSTOM INDEX ... WITH OPTIONS}? + * Probe (issue #2487): does the backing DB (DSE 6.9 / HCD) accept an unknown option key in a vector + * SAI index's {@code CREATE CUSTOM INDEX ... WITH OPTIONS}? * - *

This BYPASSES data-api's own {@code ApiVectorIndex.applyIndexingOptions} allow-list by issuing - * RAW CQL directly against the running test container via the admin {@link CqlSession} provided by - * {@link AbstractKeyspaceIntegrationTestBase} (driver session, {@code cassandra/cassandra}). It - * does NOT go through the data-api HTTP command layer. + *

Issues raw CQL via the admin {@link CqlSession} from {@link + * AbstractKeyspaceIntegrationTestBase}, bypassing the {@code ApiVectorIndex.applyIndexingOptions} + * allow-list and the data-api HTTP command layer. * - *

Hypothesis: a key SAI has never heard of (here {@code profile}) should be rejected by SAI's - * option validation regardless of {@code SAI_HNSW_ALLOW_CUSTOM_PARAMETERS} (that flag only gates - * the KNOWN custom HNSW tuning params like {@code maximum_node_connections}). The control index, - * using only {@code similarity_function:cosine}, must succeed to prove the table/column/CQL is - * otherwise valid. + *

Hypothesis: an unknown key (here {@code profile}) is rejected by SAI option validation + * regardless of {@code SAI_HNSW_ALLOW_CUSTOM_PARAMETERS}, which only gates known HNSW tuning params + * like {@code maximum_node_connections}. The control index uses only {@code + * similarity_function:cosine} to confirm the table/column/CQL is otherwise valid. * - *

The test is written to ALWAYS PASS while RECORDING the observed behavior to stdout, so the - * probe never fails CI ambiguously; flip {@code EXPECT_UNKNOWN_KEY_REJECTED} to turn it into a hard - * assertion once the empirical answer is known. + *

Always passes, recording observed behavior to stdout. Flip {@code EXPECT_UNKNOWN_KEY_REJECTED} + * to make it a hard assertion once the answer is known. */ @QuarkusIntegrationTest @TestMethodOrder(MethodOrderer.OrderAnnotation.class) @@ -45,9 +42,7 @@ class VectorIndexUnknownOptionProbeIntegrationTest extends AbstractKeyspaceInteg private static final String VECTOR_COL = "embedding"; private static final int DIMENSION = 4; - /** - * Reflective accessor to the private CqlSession in the base class, for direct error inspection. - */ + /** Reflective accessor to the base class's private CqlSession, for direct error inspection. */ private CqlSession session() { try { Method m = AbstractKeyspaceIntegrationTestBase.class.getDeclaredMethod("createDriverSession"); @@ -74,8 +69,8 @@ void createVectorTable() { @Test @Order(2) void controlIndex_knownGoodOptionsOnly_mustSucceed() { - // CONTROL: only a known-good SAI option. Must succeed -> proves table/column/CQL path is valid - // and that an unknown-key failure in the TEST case is specifically about the unknown key. + // Control: only a known-good SAI option. Must succeed, so an unknown-key failure in the test + // case is attributable to the unknown key, not the table/column/CQL path. String cql = String.format( "CREATE CUSTOM INDEX \"idx_control_%s\" ON \"%s\".\"%s\" (%s) " @@ -90,8 +85,8 @@ void controlIndex_knownGoodOptionsOnly_mustSucceed() { @Test @Order(3) void testIndex_unknownOptionKey_recordAcceptOrReject() { - // TEST: add an option key SAI does not know about ('profile'). similarity_function is kept so - // the ONLY difference vs the control is the unknown key. + // Adds an unknown key ('profile'); similarity_function is kept so the only difference vs the + // control is that key. String cql = String.format( "CREATE CUSTOM INDEX \"idx_test_%s\" ON \"%s\".\"%s\" (%s) " diff --git a/src/test/java/io/stargate/sgv2/jsonapi/service/cqldriver/executor/TableExtensionsTest.java b/src/test/java/io/stargate/sgv2/jsonapi/service/cqldriver/executor/TableExtensionsTest.java index 4ac19cd453..c1ec67003b 100644 --- a/src/test/java/io/stargate/sgv2/jsonapi/service/cqldriver/executor/TableExtensionsTest.java +++ b/src/test/java/io/stargate/sgv2/jsonapi/service/cqldriver/executor/TableExtensionsTest.java @@ -48,7 +48,7 @@ void writesIndexProfilesWhenPresent() { var props = TableExtensions.createCustomProperties(Map.of(), profiles, MAPPER); assertThat(props).containsKey(SchemaConstants.MetadataFieldsNames.VECTOR_INDEX_PROFILES); - // the written value round-trips back to the same profiles + // written value round-trips to the same profiles assertThat( VectorIndexProfileDefinition.fromJson( props.get(SchemaConstants.MetadataFieldsNames.VECTOR_INDEX_PROFILES), MAPPER)) @@ -57,7 +57,7 @@ void writesIndexProfilesWhenPresent() { @Test void preservesVectorizeAndProfilesTogether() { - // both keys written in one payload, so an extension rewrite carrying both does not lose either + // both keys written in one payload, so a rewrite carrying both loses neither var vectorDefs = Map.of( CqlIdentifier.fromInternal("v"), @@ -140,7 +140,7 @@ void removesProfileAndPreservesOtherProfilesAndVectorize() { assertThat(customProperties) .containsKey(SchemaConstants.MetadataFieldsNames.SCHEMA_TYPE) .containsKey(SchemaConstants.MetadataFieldsNames.SCHEMA_VERSION) - // vectorize config is read back and preserved + // vectorize config read back and preserved .containsKey(SchemaConstants.MetadataFieldsNames.VECTORIZE_CONFIG); // the dropped index's profile is gone, the other index's profile is kept diff --git a/src/test/java/io/stargate/sgv2/jsonapi/service/cqldriver/executor/VectorIndexProfileDefinitionTest.java b/src/test/java/io/stargate/sgv2/jsonapi/service/cqldriver/executor/VectorIndexProfileDefinitionTest.java index 6a94ea1607..85782c9c2f 100644 --- a/src/test/java/io/stargate/sgv2/jsonapi/service/cqldriver/executor/VectorIndexProfileDefinitionTest.java +++ b/src/test/java/io/stargate/sgv2/jsonapi/service/cqldriver/executor/VectorIndexProfileDefinitionTest.java @@ -36,7 +36,7 @@ void parsesNameAndOptions() { @Test void malformedJsonIsEmpty() { - // advisory metadata: a bad blob must not fail the read + // advisory metadata: bad JSON must not fail the read assertThat(VectorIndexProfileDefinition.fromJson("not json", MAPPER)).isEmpty(); } diff --git a/src/test/java/io/stargate/sgv2/jsonapi/service/operation/tables/DropVectorIndexProfileDBTaskTest.java b/src/test/java/io/stargate/sgv2/jsonapi/service/operation/tables/DropVectorIndexProfileDBTaskTest.java index fdb75f7040..b336d1152c 100644 --- a/src/test/java/io/stargate/sgv2/jsonapi/service/operation/tables/DropVectorIndexProfileDBTaskTest.java +++ b/src/test/java/io/stargate/sgv2/jsonapi/service/operation/tables/DropVectorIndexProfileDBTaskTest.java @@ -43,7 +43,7 @@ void buildsAlterTableExtensionsStatementForOwningTable() { var query = task.buildStatement().getQuery(); - // ALTER TABLE on the owning table in the schema object's keyspace, updating the extensions map. + // ALTER TABLE on the owning table, in the schema object keyspace, updating extensions assertThat(query) .contains("ALTER TABLE") .contains("my_ks") diff --git a/src/test/java/io/stargate/sgv2/jsonapi/service/schema/tables/ApiVectorIndexTest.java b/src/test/java/io/stargate/sgv2/jsonapi/service/schema/tables/ApiVectorIndexTest.java index 6dfcf1b976..b1d51b5993 100644 --- a/src/test/java/io/stargate/sgv2/jsonapi/service/schema/tables/ApiVectorIndexTest.java +++ b/src/test/java/io/stargate/sgv2/jsonapi/service/schema/tables/ApiVectorIndexTest.java @@ -21,11 +21,12 @@ import org.junit.jupiter.params.provider.ValueSource; /** - * Unit tests for the overloaded {@code vectorIndexing} (a profile name string or a raw SAI - * options object) handling on {@link ApiVectorIndex}: that a request body deserializes to the - * expected value, how it is validated and turned into the CQL index options map, and how it is - * described back. Deterministic; needs no database (end-to-end also depends on the backend allowing - * custom SAI parameters). + * Unit tests for the overloaded {@code vectorIndexing} on {@link ApiVectorIndex}, where the value + * is either a profile name string or a raw SAI options object. + * + *

Covers deserialization of a request body, validation, the resulting CQL index options map, and + * the describe-back. Needs no database (end-to-end also depends on the backend allowing custom SAI + * parameters). */ class ApiVectorIndexTest { @@ -39,7 +40,7 @@ private static VectorIndexingDesc options(Map options) { return VectorIndexingDesc.ofOptions(options); } - /** Source of every option that has a dedicated field and so is rejected inside options. */ + /** Options with a dedicated field, so rejected inside options. */ static Stream reservedOptions() { return VectorConstants.CQLAnnIndex.RESERVED_OPTIONS.stream(); } @@ -90,8 +91,8 @@ void absentVectorIndexingIsNull() throws Exception { @ParameterizedTest @ValueSource(strings = {"123", "true", "[\"small-high-recall\"]"}) void nonStringNonObjectRejected(String value) { - // Jackson may surface the deserializer's SchemaException directly or wrap it; assert that a - // SchemaException with the expected code is somewhere in the chain. + // Jackson may surface the deserializer's SchemaException directly or wrapped, so assert one + // with the expected code is somewhere in the cause chain. assertThatThrownBy( () -> MAPPER.readValue( @@ -301,7 +302,7 @@ void omitsNonAllowlistedKeys() { options.put(CQLSAIIndex.Options.CLASS_NAME, CQLSAIIndex.SAI_CLASS_NAME); options.put(VectorConstants.CQLAnnIndex.SOURCE_MODEL, "OTHER"); options.put(VectorConstants.CQLAnnIndex.MAXIMUM_NODE_CONNECTIONS, "16"); - // a real SAI option the API does not manage (e.g. set directly via CQL); not surfaced + // a real SAI option the API does not manage (e.g. set directly via CQL), not surfaced options.put("optimize_for", "recall"); var described = ApiVectorIndex.describeIndexingOptions(options); @@ -328,7 +329,7 @@ void keepsAllowlistedAppliedOptionsExcludingReservedAndStructural() { indexOptions.put(CQLSAIIndex.Options.TARGET, "my_vector"); indexOptions.put(VectorConstants.CQLAnnIndex.SOURCE_MODEL, "OPENAI_V3_SMALL"); indexOptions.put(VectorConstants.CQLAnnIndex.SIMILARITY_FUNCTION, "COSINE"); - // small-high-recall's base is 32, but an explicit override applied 99 — snapshot must keep 99 + // small-high-recall's base is 32, but an override applied 99, so the snapshot keeps 99 indexOptions.put(VectorConstants.CQLAnnIndex.MAXIMUM_NODE_CONNECTIONS, "99"); indexOptions.put(VectorConstants.CQLAnnIndex.CONSTRUCTION_BEAM_WIDTH, "200"); From 225c0a918cf55de3bd231156e2a4342dcc09fd94 Mon Sep 17 00:00:00 2001 From: Eric Hare Date: Mon, 22 Jun 2026 13:12:07 -0700 Subject: [PATCH 10/13] Create VectorIndexProfilePersistenceIntegrationTest.java --- ...ndexProfilePersistenceIntegrationTest.java | 177 ++++++++++++++++++ 1 file changed, 177 insertions(+) create mode 100644 src/test/java/io/stargate/sgv2/jsonapi/api/v1/tables/VectorIndexProfilePersistenceIntegrationTest.java diff --git a/src/test/java/io/stargate/sgv2/jsonapi/api/v1/tables/VectorIndexProfilePersistenceIntegrationTest.java b/src/test/java/io/stargate/sgv2/jsonapi/api/v1/tables/VectorIndexProfilePersistenceIntegrationTest.java new file mode 100644 index 0000000000..94f6678bdc --- /dev/null +++ b/src/test/java/io/stargate/sgv2/jsonapi/api/v1/tables/VectorIndexProfilePersistenceIntegrationTest.java @@ -0,0 +1,177 @@ +package io.stargate.sgv2.jsonapi.api.v1.tables; + +import static io.stargate.sgv2.jsonapi.api.v1.util.DataApiCommandSenders.assertNamespaceCommand; +import static io.stargate.sgv2.jsonapi.api.v1.util.DataApiCommandSenders.assertTableCommand; +import static org.assertj.core.api.Assertions.assertThat; +import static org.junit.jupiter.api.Assumptions.assumeTrue; + +import com.datastax.oss.driver.api.core.CqlIdentifier; +import com.datastax.oss.driver.api.core.CqlSession; +import com.datastax.oss.driver.api.core.cql.SimpleStatement; +import com.datastax.oss.driver.api.core.metadata.schema.KeyspaceMetadata; +import com.datastax.oss.driver.api.core.metadata.schema.TableMetadata; +import com.datastax.oss.driver.api.core.servererrors.QueryValidationException; +import com.fasterxml.jackson.databind.ObjectMapper; +import io.quarkus.test.common.WithTestResource; +import io.quarkus.test.junit.QuarkusIntegrationTest; +import io.stargate.sgv2.jsonapi.api.v1.AbstractKeyspaceIntegrationTestBase; +import io.stargate.sgv2.jsonapi.config.constants.SchemaConstants; +import io.stargate.sgv2.jsonapi.service.cqldriver.executor.VectorIndexProfileDefinition; +import io.stargate.sgv2.jsonapi.testresource.DseTestResource; +import java.lang.reflect.Method; +import java.util.Map; +import org.apache.commons.lang3.RandomStringUtils; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Test; + +/** + * Happy-path IT for {@code vectorIndexing} profiles (#2487): creating a vector index with a named + * profile records the profile in the table extensions ({@link + * SchemaConstants.MetadataFieldsNames#VECTOR_INDEX_PROFILES}), and dropping the index removes it. + * + *

Creating the index emits custom SAI HNSW params (the profile's expanded options, e.g. {@code + * maximum_node_connections}), which a cluster only accepts with {@code + * SAI_HNSW_ALLOW_CUSTOM_PARAMETERS} enabled. The default {@code dse-server:6.9.21} lane rejects + * them, so a capability probe runs in {@link #setup()} and the test is skipped where unsupported. + * + *

This complements the API-validation cases in {@link CreateTableIndexIntegrationTest}, which + * are backend-agnostic; the create/persist and drop/cleanup DB paths only run where the cluster + * allows custom params. + */ +@QuarkusIntegrationTest +@WithTestResource(value = DseTestResource.class) +class VectorIndexProfilePersistenceIntegrationTest extends AbstractTableIntegrationTestBase { + + private static final ObjectMapper MAPPER = new ObjectMapper(); + + private static final String PROFILE = "small-high-recall"; + // What "small-high-recall" expands to (VectorIndexProfiles registry), stored as the snapshot. + private static final Map EXPECTED_OPTIONS = + Map.of("maximum_node_connections", "32", "construction_beam_width", "200"); + + private final String tableName = + "vix_profile_" + RandomStringUtils.insecure().nextAlphanumeric(8).toLowerCase(); + private final String vectorColumn = "embedding"; + + /** Whether the backing cluster accepts custom SAI HNSW params (probed in {@link #setup()}). */ + private boolean customParamsSupported; + + /** Reflective accessor to the base class's private admin CqlSession, to read schema metadata. */ + private CqlSession session() { + try { + Method m = AbstractKeyspaceIntegrationTestBase.class.getDeclaredMethod("createDriverSession"); + m.setAccessible(true); + return (CqlSession) m.invoke(this); + } catch (ReflectiveOperationException e) { + throw new RuntimeException("Could not obtain CqlSession from base class", e); + } + } + + @BeforeAll + void setup() { + assertNamespaceCommand(keyspaceName) + .templated() + .createTable( + tableName, + Map.ofEntries( + Map.entry("id", Map.of("type", "text")), + Map.entry(vectorColumn, Map.of("type", "vector", "dimension", 4))), + "id") + .wasSuccessful(); + + customParamsSupported = probeCustomSaiParamsSupported(); + } + + @Test + void profilePersistedOnCreateAndRemovedOnDrop() { + assumeTrue( + customParamsSupported, + "cluster does not allow custom SAI HNSW params (SAI_HNSW_ALLOW_CUSTOM_PARAMETERS)"); + + String indexName = tableName + "_idx"; + + assertTableCommand(keyspaceName, tableName) + .postCreateVectorIndex( + """ + { + "name": "%s", + "definition": { + "column": "%s", + "options": { "vectorIndexing": "%s" } + } + } + """ + .formatted(indexName, vectorColumn, PROFILE)) + .wasSuccessful(); + + var afterCreate = readProfiles(); + assertThat(afterCreate).as("profile recorded after create").containsKey(indexName); + assertThat(afterCreate.get(indexName).profile()).isEqualTo(PROFILE); + assertThat(afterCreate.get(indexName).options()) + .as("stored snapshot is the options the profile expanded to") + .isEqualTo(EXPECTED_OPTIONS); + + assertNamespaceCommand(keyspaceName).templated().dropIndex(indexName, false).wasSuccessful(); + + assertThat(readProfiles()).as("profile removed after drop").doesNotContainKey(indexName); + } + + /** + * Probes whether the cluster accepts custom SAI HNSW params by issuing raw CQL on a throwaway + * table: a CREATE CUSTOM INDEX with a known tuning option. Returns false when the cluster rejects + * it ({@link QueryValidationException}); the throwaway table is always dropped. Connection or + * other errors propagate so a broken environment fails loudly rather than silently skipping. + */ + private boolean probeCustomSaiParamsSupported() { + String probeTable = + "vix_probe_" + RandomStringUtils.insecure().nextAlphanumeric(8).toLowerCase(); + session() + .execute( + SimpleStatement.newInstance( + String.format( + "CREATE TABLE IF NOT EXISTS \"%s\".\"%s\" " + + "(id text PRIMARY KEY, %s vector)", + keyspaceName, probeTable, vectorColumn))); + try { + session() + .execute( + SimpleStatement.newInstance( + String.format( + "CREATE CUSTOM INDEX \"%s_idx\" ON \"%s\".\"%s\" (%s) " + + "USING 'StorageAttachedIndex' " + + "WITH OPTIONS = {'similarity_function':'cosine'," + + "'maximum_node_connections':'16'}", + probeTable, keyspaceName, probeTable, vectorColumn))); + return true; + } catch (QueryValidationException e) { + return false; + } finally { + session() + .execute( + SimpleStatement.newInstance( + String.format("DROP TABLE IF EXISTS \"%s\".\"%s\"", keyspaceName, probeTable))); + } + } + + /** Reads the VECTOR_INDEX_PROFILES extension off the table after refreshing schema metadata. */ + private Map readProfiles() { + try { + session().refreshSchemaAsync().toCompletableFuture().get(); + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + throw new RuntimeException("schema refresh interrupted", e); + } catch (Exception e) { + throw new RuntimeException("schema refresh failed", e); + } + KeyspaceMetadata keyspace = + session() + .getMetadata() + .getKeyspace(CqlIdentifier.fromInternal(keyspaceName)) + .orElseThrow(() -> new RuntimeException("keyspace not found: " + keyspaceName)); + TableMetadata table = + keyspace + .getTable(CqlIdentifier.fromInternal(tableName)) + .orElseThrow(() -> new RuntimeException("table not found: " + tableName)); + return VectorIndexProfileDefinition.from(table, MAPPER); + } +} From ab27666d1145ffd1b2b727d6fd7d57223874a0ee Mon Sep 17 00:00:00 2001 From: Eric Hare Date: Mon, 22 Jun 2026 13:32:16 -0700 Subject: [PATCH 11/13] refactor: detect vectorIndexing profile from options instead of persisting it Per discussion on #2508: rather than storing the profile name in table extensions (and cleaning it up on drop), reconstruct it on read-back by matching the index's tuning options against the known profiles. Echo the profile name when they match exactly, otherwise the raw options. Removes the extension-storage path: VectorIndexProfileDefinition, the VECTOR_INDEX_PROFILES extension, the create-side extension write, and the dropIndex profile cleanup (DropVectorIndexProfileDBTask, removeIndexProfile). The request-side API (vectorIndexing field, validation, profile expansion on create) is unchanged. Detection is a stopgap and will likely be replaced before prod. --- .../config/constants/SchemaConstants.java | 2 - .../cqldriver/executor/TableExtensions.java | 104 ++------- .../VectorIndexProfileDefinition.java | 70 ------ .../tables/DropVectorIndexProfileDBTask.java | 60 ------ .../DropVectorIndexProfileDBTaskBuilder.java | 56 ----- .../resolver/AlterTableCommandResolver.java | 22 +- .../CreateVectorIndexCommandResolver.java | 113 ++-------- .../resolver/DropIndexCommandResolver.java | 106 +++------- .../service/schema/KeyspaceSchemaObject.java | 12 -- .../service/schema/tables/ApiVectorIndex.java | 30 ++- .../schema/tables/VectorIndexProfiles.java | 18 ++ ...ndexProfilePersistenceIntegrationTest.java | 177 ---------------- .../executor/TableExtensionsTest.java | 199 ------------------ .../VectorIndexProfileDefinitionTest.java | 96 --------- .../DropVectorIndexProfileDBTaskTest.java | 53 ----- .../schema/tables/ApiVectorIndexTest.java | 34 ++- .../tables/VectorIndexProfilesTest.java | 36 ++++ 17 files changed, 162 insertions(+), 1026 deletions(-) delete mode 100644 src/main/java/io/stargate/sgv2/jsonapi/service/cqldriver/executor/VectorIndexProfileDefinition.java delete mode 100644 src/main/java/io/stargate/sgv2/jsonapi/service/operation/tables/DropVectorIndexProfileDBTask.java delete mode 100644 src/main/java/io/stargate/sgv2/jsonapi/service/operation/tables/DropVectorIndexProfileDBTaskBuilder.java delete mode 100644 src/test/java/io/stargate/sgv2/jsonapi/api/v1/tables/VectorIndexProfilePersistenceIntegrationTest.java delete mode 100644 src/test/java/io/stargate/sgv2/jsonapi/service/cqldriver/executor/TableExtensionsTest.java delete mode 100644 src/test/java/io/stargate/sgv2/jsonapi/service/cqldriver/executor/VectorIndexProfileDefinitionTest.java delete mode 100644 src/test/java/io/stargate/sgv2/jsonapi/service/operation/tables/DropVectorIndexProfileDBTaskTest.java diff --git a/src/main/java/io/stargate/sgv2/jsonapi/config/constants/SchemaConstants.java b/src/main/java/io/stargate/sgv2/jsonapi/config/constants/SchemaConstants.java index 9ea6a4c00c..739a2c619e 100644 --- a/src/main/java/io/stargate/sgv2/jsonapi/config/constants/SchemaConstants.java +++ b/src/main/java/io/stargate/sgv2/jsonapi/config/constants/SchemaConstants.java @@ -11,8 +11,6 @@ interface MetadataFieldsNames { String SCHEMA_TYPE = "com.datastax.data-api.schema-type"; String SCHEMA_VERSION = "com.datastax.data-api.schema-def-version"; String VECTORIZE_CONFIG = "com.datastax.data-api.vectorize-config"; - // Per vector-index, the profile it was created with (name + expanded options). - String VECTOR_INDEX_PROFILES = "com.datastax.data-api.vector-index-profiles"; } interface MetadataFieldsValues { diff --git a/src/main/java/io/stargate/sgv2/jsonapi/service/cqldriver/executor/TableExtensions.java b/src/main/java/io/stargate/sgv2/jsonapi/service/cqldriver/executor/TableExtensions.java index b1546c4e2e..c1095108db 100644 --- a/src/main/java/io/stargate/sgv2/jsonapi/service/cqldriver/executor/TableExtensions.java +++ b/src/main/java/io/stargate/sgv2/jsonapi/service/cqldriver/executor/TableExtensions.java @@ -4,7 +4,6 @@ import com.datastax.oss.driver.api.core.CqlIdentifier; import com.datastax.oss.driver.api.core.data.ByteUtils; -import com.datastax.oss.driver.api.core.metadata.schema.KeyspaceMetadata; import com.datastax.oss.driver.api.core.metadata.schema.TableMetadata; import com.fasterxml.jackson.core.JsonProcessingException; import com.fasterxml.jackson.databind.ObjectMapper; @@ -14,7 +13,6 @@ import java.util.HashMap; import java.util.Map; import java.util.Objects; -import java.util.Optional; import java.util.stream.Collectors; import org.slf4j.Logger; @@ -65,25 +63,13 @@ private static Map uncheckedExtensions(TableMetadata tableMe return (Map) tableMetadata.getOptions().get(TABLE_OPTIONS_EXTENSION_KEY); } - /** As {@link #createCustomProperties(Map, Map, ObjectMapper)} with no vector index profiles. */ - public static Map createCustomProperties( - Map vectorDefs, ObjectMapper objectMapper) { - return createCustomProperties(vectorDefs, Map.of(), objectMapper); - } - /** - * Builds the table extensions payload: schema type/version (always written, since the command may - * be altering a CQL-created table) plus the vectorize config and vector index profiles. - * - *

Extensions are fully replaced on every write, so callers must pass every def and profile - * they want to keep; anything omitted is dropped. + * Create custom properties for table metadata, This needs to add schema and table always since + * the command may be altering CQL created tables */ public static Map createCustomProperties( - Map vectorDefs, - Map indexProfiles, - ObjectMapper objectMapper) { + Map vectorDefs, ObjectMapper objectMapper) { Objects.requireNonNull(vectorDefs, "vectorDefs must not be null"); - Objects.requireNonNull(indexProfiles, "indexProfiles must not be null"); Objects.requireNonNull(objectMapper, "objectMapper must not be null"); Map customProperties = new HashMap<>(); @@ -95,7 +81,9 @@ public static Map createCustomProperties( SchemaConstants.MetadataFieldsNames.SCHEMA_VERSION, SchemaConstants.MetadataFieldsValues.SCHEMA_VERSION_VERSION); - // Only write a key when it has content (the map is fully replaced anyway). + // because the extensions are always fully replaced, we do not need to write the key if there + // are none + // the full map will be replaced, replacing any existing extensions if (!vectorDefs.isEmpty()) { // convert to strings for serialisation Map stringKeysDefs = @@ -104,79 +92,15 @@ public static Map createCustomProperties( Collectors.toMap( entry -> cqlIdentifierToJsonKey(entry.getKey()), Map.Entry::getValue)); - customProperties.put( - SchemaConstants.MetadataFieldsNames.VECTORIZE_CONFIG, - writeJson(stringKeysDefs, objectMapper)); - } - if (!indexProfiles.isEmpty()) { - customProperties.put( - SchemaConstants.MetadataFieldsNames.VECTOR_INDEX_PROFILES, - writeJson(indexProfiles, objectMapper)); + try { + customProperties.put( + SchemaConstants.MetadataFieldsNames.VECTORIZE_CONFIG, + objectMapper.writeValueAsString(stringKeysDefs)); + } catch (JsonProcessingException e) { + // this should never happen + throw new RuntimeException(e); + } } return customProperties; } - - /** - * Computes the extensions payload that drops {@code indexName}'s vector-index profile from the - * table that owns it, keeping the {@link - * SchemaConstants.MetadataFieldsNames#VECTOR_INDEX_PROFILES} extension in sync so a profile does - * not outlive its index. - * - *

The owning table is the one in {@code keyspaceMetadata} whose indexes contain {@code - * indexName}. Returns empty (so the caller can skip the DDL) when no table owns the index or the - * owning table has no stored profile for it. - * - *

On rewrite the existing vectorize config and the other indexes' profiles are read back and - * included so the full-replace write does not lose them, as on the create side (see {@link - * #createCustomProperties(Map, Map, ObjectMapper)}). - */ - public static Optional removeIndexProfile( - KeyspaceMetadata keyspaceMetadata, CqlIdentifier indexName, ObjectMapper objectMapper) { - Objects.requireNonNull(keyspaceMetadata, "keyspaceMetadata must not be null"); - Objects.requireNonNull(indexName, "indexName must not be null"); - Objects.requireNonNull(objectMapper, "objectMapper must not be null"); - - var owningTable = - keyspaceMetadata.getTables().values().stream() - .filter(table -> table.getIndexes().containsKey(indexName)) - .findFirst(); - if (owningTable.isEmpty()) { - return Optional.empty(); - } - - var tableMetadata = owningTable.get(); - var profiles = VectorIndexProfileDefinition.from(tableMetadata, objectMapper); - // null def => remove; false return => no entry existed, so there is nothing to rewrite. - if (!VectorIndexProfileDefinition.putOrRemove( - profiles, cqlIdentifierToJsonKey(indexName), null)) { - return Optional.empty(); - } - - // Read the vectorize config back so the full-replace write preserves it. Stored keys are the - // columns' internal form, so rebuild the CqlIdentifier keys createCustomProperties expects. - var vectorDefs = - VectorizeDefinition.from(tableMetadata, objectMapper).entrySet().stream() - .collect( - Collectors.toMap( - entry -> CqlIdentifier.fromInternal(entry.getKey()), Map.Entry::getValue)); - - var customProperties = createCustomProperties(vectorDefs, profiles, objectMapper); - return Optional.of(new IndexProfileRemoval(tableMetadata.getName(), customProperties)); - } - - /** - * Result of {@link #removeIndexProfile}: the table to alter and the extensions payload to write - * (dropped index's profile removed, everything else preserved). - */ - public record IndexProfileRemoval( - CqlIdentifier tableName, Map customProperties) {} - - private static String writeJson(Object value, ObjectMapper objectMapper) { - try { - return objectMapper.writeValueAsString(value); - } catch (JsonProcessingException e) { - // this should never happen - throw new RuntimeException(e); - } - } } diff --git a/src/main/java/io/stargate/sgv2/jsonapi/service/cqldriver/executor/VectorIndexProfileDefinition.java b/src/main/java/io/stargate/sgv2/jsonapi/service/cqldriver/executor/VectorIndexProfileDefinition.java deleted file mode 100644 index e6638d559d..0000000000 --- a/src/main/java/io/stargate/sgv2/jsonapi/service/cqldriver/executor/VectorIndexProfileDefinition.java +++ /dev/null @@ -1,70 +0,0 @@ -package io.stargate.sgv2.jsonapi.service.cqldriver.executor; - -import com.datastax.oss.driver.api.core.metadata.schema.TableMetadata; -import com.fasterxml.jackson.core.JsonProcessingException; -import com.fasterxml.jackson.databind.JsonNode; -import com.fasterxml.jackson.databind.ObjectMapper; -import io.stargate.sgv2.jsonapi.config.constants.SchemaConstants; -import java.util.HashMap; -import java.util.Map; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -/** - * The profile a vector index was created with: the profile name plus the SAI options it expanded - * to. Stored per index name in the table extensions (key {@link - * SchemaConstants.MetadataFieldsNames#VECTOR_INDEX_PROFILES}) to keep the profile name. The options - * snapshot stays valid even if the profile definition changes later. - */ -public record VectorIndexProfileDefinition(String profile, Map options) { - - private static final Logger LOGGER = LoggerFactory.getLogger(VectorIndexProfileDefinition.class); - - /** Reads the stored profiles, keyed by index name, from the table extensions. */ - public static Map from( - TableMetadata tableMetadata, ObjectMapper objectMapper) { - var extensions = TableExtensions.getExtensions(tableMetadata); - return fromJson( - extensions.get(SchemaConstants.MetadataFieldsNames.VECTOR_INDEX_PROFILES), objectMapper); - } - - /** - * Parses the {@code index name -> profile} JSON from the extensions. Returns a mutable map so - * callers can merge changes before writing it back. Profiles are advisory metadata, so a bad blob - * is logged and skipped, not failed. - */ - static Map fromJson( - String json, ObjectMapper objectMapper) { - Map defs = new HashMap<>(); - if (json == null || json.isBlank()) { - return defs; - } - try { - JsonNode byIndex = objectMapper.readTree(json); - for (Map.Entry entry : byIndex.properties()) { - defs.put( - entry.getKey(), - objectMapper.treeToValue(entry.getValue(), VectorIndexProfileDefinition.class)); - } - } catch (JsonProcessingException | IllegalArgumentException e) { - LOGGER.error("Error parsing vector index profiles, json: {}", json, e); - defs.clear(); - } - return defs; - } - - /** - * Records the profile for {@code indexKey} in {@code profiles}, or removes any stale entry when - * {@code def} is null (no profile was used). Returns true if the map changed, letting the caller - * skip an unneeded extension write. - */ - public static boolean putOrRemove( - Map profiles, - String indexKey, - VectorIndexProfileDefinition def) { - if (def == null) { - return profiles.remove(indexKey) != null; - } - return !def.equals(profiles.put(indexKey, def)); - } -} diff --git a/src/main/java/io/stargate/sgv2/jsonapi/service/operation/tables/DropVectorIndexProfileDBTask.java b/src/main/java/io/stargate/sgv2/jsonapi/service/operation/tables/DropVectorIndexProfileDBTask.java deleted file mode 100644 index 011b9b601e..0000000000 --- a/src/main/java/io/stargate/sgv2/jsonapi/service/operation/tables/DropVectorIndexProfileDBTask.java +++ /dev/null @@ -1,60 +0,0 @@ -package io.stargate.sgv2.jsonapi.service.operation.tables; - -import static com.datastax.oss.driver.api.querybuilder.SchemaBuilder.alterTable; - -import com.datastax.oss.driver.api.core.CqlIdentifier; -import com.datastax.oss.driver.api.core.cql.SimpleStatement; -import io.stargate.sgv2.jsonapi.service.cqldriver.executor.DefaultDriverExceptionHandler; -import io.stargate.sgv2.jsonapi.service.cqldriver.executor.TableExtensions; -import io.stargate.sgv2.jsonapi.service.operation.SchemaDBTask; -import io.stargate.sgv2.jsonapi.service.schema.KeyspaceSchemaObject; -import java.util.Map; -import java.util.Objects; - -/** - * Removes a dropped index's entry from its owning table's vector-index-profiles extension, so the - * profile record does not outlive the index. - * - *

Keyspace-scoped so it can share a {@link - * io.stargate.sgv2.jsonapi.service.operation.tasks.TaskGroup} with {@link DropIndexDBTask}; a - * {@link io.stargate.sgv2.jsonapi.service.schema.tables.TableSchemaObject}-typed {@link - * AlterTableDBTask} (the create side) cannot, since a TaskGroup has a single schema-object type. - * Owning table and rewritten extensions payload are resolved at command-resolve time via {@link - * TableExtensions#removeIndexProfile}; this task only issues the {@code ALTER TABLE ... WITH - * extensions = {...}}. - */ -public class DropVectorIndexProfileDBTask extends SchemaDBTask { - - private final CqlIdentifier tableName; - private final Map customProperties; - - public DropVectorIndexProfileDBTask( - int position, - KeyspaceSchemaObject schemaObject, - SchemaDBTask.SchemaRetryPolicy schemaRetryPolicy, - DefaultDriverExceptionHandler.Factory exceptionHandlerFactory, - CqlIdentifier tableName, - Map customProperties) { - super(position, schemaObject, schemaRetryPolicy, exceptionHandlerFactory); - - this.tableName = Objects.requireNonNull(tableName, "tableName must not be null"); - this.customProperties = - Objects.requireNonNull(customProperties, "customProperties must not be null"); - setStatus(TaskStatus.READY); - } - - public static DropVectorIndexProfileDBTaskBuilder builder(KeyspaceSchemaObject schemaObject) { - return new DropVectorIndexProfileDBTaskBuilder(schemaObject); - } - - @Override - protected SimpleStatement buildStatement() { - - // owning table lives in this keyspace; keyspace from the schema object identifier, as - // DropIndexDBTask does - var extensions = TableExtensions.toExtensions(customProperties); - return alterTable(schemaObject.identifier().keyspace(), tableName) - .withOption(TableExtensions.TABLE_OPTIONS_EXTENSION_KEY.asInternal(), extensions) - .build(); - } -} diff --git a/src/main/java/io/stargate/sgv2/jsonapi/service/operation/tables/DropVectorIndexProfileDBTaskBuilder.java b/src/main/java/io/stargate/sgv2/jsonapi/service/operation/tables/DropVectorIndexProfileDBTaskBuilder.java deleted file mode 100644 index 1f60b57fe8..0000000000 --- a/src/main/java/io/stargate/sgv2/jsonapi/service/operation/tables/DropVectorIndexProfileDBTaskBuilder.java +++ /dev/null @@ -1,56 +0,0 @@ -package io.stargate.sgv2.jsonapi.service.operation.tables; - -import com.datastax.oss.driver.api.core.CqlIdentifier; -import io.stargate.sgv2.jsonapi.service.operation.SchemaDBTask; -import io.stargate.sgv2.jsonapi.service.operation.tasks.TaskBuilder; -import io.stargate.sgv2.jsonapi.service.schema.KeyspaceSchemaObject; -import java.util.Map; -import java.util.Objects; - -/** Builds a {@link DropVectorIndexProfileDBTask}. */ -public class DropVectorIndexProfileDBTaskBuilder - extends TaskBuilder< - DropVectorIndexProfileDBTask, KeyspaceSchemaObject, DropVectorIndexProfileDBTaskBuilder> { - - private CqlIdentifier tableName; - private Map customProperties; - private SchemaDBTask.SchemaRetryPolicy schemaRetryPolicy; - - protected DropVectorIndexProfileDBTaskBuilder(KeyspaceSchemaObject schemaObject) { - super(schemaObject); - } - - public DropVectorIndexProfileDBTaskBuilder withTableName(CqlIdentifier tableName) { - this.tableName = Objects.requireNonNull(tableName, "tableName must not be null"); - return this; - } - - public DropVectorIndexProfileDBTaskBuilder withCustomProperties( - Map customProperties) { - this.customProperties = - Objects.requireNonNull(customProperties, "customProperties must not be null"); - return this; - } - - public DropVectorIndexProfileDBTaskBuilder withSchemaRetryPolicy( - SchemaDBTask.SchemaRetryPolicy schemaRetryPolicy) { - this.schemaRetryPolicy = - Objects.requireNonNull(schemaRetryPolicy, "schemaRetryPolicy cannot be null"); - return this; - } - - public DropVectorIndexProfileDBTask build() { - - Objects.requireNonNull(tableName, "tableName must not be null"); - Objects.requireNonNull(customProperties, "customProperties must not be null"); - Objects.requireNonNull(schemaRetryPolicy, "schemaRetryPolicy cannot be null"); - - return new DropVectorIndexProfileDBTask( - nextPosition(), - schemaObject, - schemaRetryPolicy, - getExceptionHandlerFactory(), - tableName, - customProperties); - } -} diff --git a/src/main/java/io/stargate/sgv2/jsonapi/service/resolver/AlterTableCommandResolver.java b/src/main/java/io/stargate/sgv2/jsonapi/service/resolver/AlterTableCommandResolver.java index 2de0e51ca9..28ea3128f4 100644 --- a/src/main/java/io/stargate/sgv2/jsonapi/service/resolver/AlterTableCommandResolver.java +++ b/src/main/java/io/stargate/sgv2/jsonapi/service/resolver/AlterTableCommandResolver.java @@ -14,7 +14,6 @@ import io.stargate.sgv2.jsonapi.exception.SchemaException; import io.stargate.sgv2.jsonapi.service.cqldriver.executor.DefaultDriverExceptionHandler; import io.stargate.sgv2.jsonapi.service.cqldriver.executor.TableExtensions; -import io.stargate.sgv2.jsonapi.service.cqldriver.executor.VectorIndexProfileDefinition; import io.stargate.sgv2.jsonapi.service.cqldriver.executor.VectorizeDefinition; import io.stargate.sgv2.jsonapi.service.operation.Operation; import io.stargate.sgv2.jsonapi.service.operation.SchemaDBTask; @@ -87,15 +86,6 @@ public Operation resolveTableCommand( taskGroup, SchemaDBTaskPage.accumulator(AlterTableDBTask.class, commandContext)); } - /** - * Existing vector-index profiles for the table. Altering columns/vectorize replaces the - * extensions wholesale, so these must be carried through to avoid wiping them. - */ - private Map existingIndexProfiles( - TableSchemaObject tableSchemaObject) { - return VectorIndexProfileDefinition.from(tableSchemaObject.tableMetadata(), objectMapper); - } - private List handleAddColumns( AlterTableDBTaskBuilder taskBuilder, TableSchemaObject tableSchemaObject, @@ -170,8 +160,7 @@ private List handleAddColumns( // New custom property to be updated var customProperties = - TableExtensions.createCustomProperties( - existingVectorizeDef, existingIndexProfiles(tableSchemaObject), objectMapper); + TableExtensions.createCustomProperties(existingVectorizeDef, objectMapper); // First execute the extension update for add columns // so if we fail to add this we do not end up with a column that has missing vectorize // definition @@ -284,8 +273,7 @@ private List handleDropColumns( if (updateVectorize) { attempts.add( taskBuilder.buildUpdateExtensions( - TableExtensions.createCustomProperties( - existingVectorizeDefs, existingIndexProfiles(tableSchemaObject), objectMapper))); + TableExtensions.createCustomProperties(existingVectorizeDefs, objectMapper))); } return attempts; } @@ -369,8 +357,7 @@ private List handleAddVectorize( return List.of( taskBuilder.buildUpdateExtensions( - TableExtensions.createCustomProperties( - existingVectorizeDefs, existingIndexProfiles(tableSchemaObject), objectMapper))); + TableExtensions.createCustomProperties(existingVectorizeDefs, objectMapper))); } private List handleDropVectorize( @@ -447,8 +434,7 @@ private List handleDropVectorize( return List.of( taskBuilder.buildUpdateExtensions( - TableExtensions.createCustomProperties( - existingVectorizeDefs, existingIndexProfiles(tableSchemaObject), objectMapper))); + TableExtensions.createCustomProperties(existingVectorizeDefs, objectMapper))); } @Override diff --git a/src/main/java/io/stargate/sgv2/jsonapi/service/resolver/CreateVectorIndexCommandResolver.java b/src/main/java/io/stargate/sgv2/jsonapi/service/resolver/CreateVectorIndexCommandResolver.java index 48841723ab..87f24724b8 100644 --- a/src/main/java/io/stargate/sgv2/jsonapi/service/resolver/CreateVectorIndexCommandResolver.java +++ b/src/main/java/io/stargate/sgv2/jsonapi/service/resolver/CreateVectorIndexCommandResolver.java @@ -2,20 +2,14 @@ import static io.stargate.sgv2.jsonapi.exception.ErrorFormatters.errFmtJoin; import static io.stargate.sgv2.jsonapi.util.ApiOptionUtils.getOrDefault; -import static io.stargate.sgv2.jsonapi.util.CqlIdentifierUtil.cqlIdentifierToJsonKey; -import com.fasterxml.jackson.databind.ObjectMapper; import io.stargate.sgv2.jsonapi.api.model.command.CommandContext; import io.stargate.sgv2.jsonapi.api.model.command.impl.CreateVectorIndexCommand; import io.stargate.sgv2.jsonapi.config.OperationsConfig; import io.stargate.sgv2.jsonapi.config.constants.TableDescDefaults; import io.stargate.sgv2.jsonapi.exception.SchemaException; import io.stargate.sgv2.jsonapi.service.cqldriver.executor.DefaultDriverExceptionHandler; -import io.stargate.sgv2.jsonapi.service.cqldriver.executor.TableExtensions; -import io.stargate.sgv2.jsonapi.service.cqldriver.executor.VectorIndexProfileDefinition; import io.stargate.sgv2.jsonapi.service.operation.*; -import io.stargate.sgv2.jsonapi.service.operation.tables.AlterTableDBTask; -import io.stargate.sgv2.jsonapi.service.operation.tables.AlterTableExceptionHandler; import io.stargate.sgv2.jsonapi.service.operation.tables.CreateIndexDBTask; import io.stargate.sgv2.jsonapi.service.operation.tables.CreateIndexDBTaskBuilder; import io.stargate.sgv2.jsonapi.service.operation.tables.CreateIndexExceptionHandler; @@ -26,7 +20,6 @@ import io.stargate.sgv2.jsonapi.service.schema.tables.ApiVectorIndex; import io.stargate.sgv2.jsonapi.service.schema.tables.TableSchemaObject; import jakarta.enterprise.context.ApplicationScoped; -import jakarta.inject.Inject; import java.time.Duration; import java.util.Map; @@ -34,8 +27,6 @@ @ApplicationScoped public class CreateVectorIndexCommandResolver implements CommandResolver { - @Inject ObjectMapper objectMapper; - @Override public Class getCommandClass() { return CreateVectorIndexCommand.class; @@ -71,105 +62,39 @@ public Operation resolveTableCommand( command.indexType())); } - var schemaObject = commandContext.schemaObject(); - // TODO: we need a centralised way of creating retry attempt. - var schemaRetryPolicy = - new SchemaDBTask.SchemaRetryPolicy( - commandContext.config().get(OperationsConfig.class).databaseConfig().ddlRetries(), - Duration.ofMillis( - commandContext - .config() - .get(OperationsConfig.class) - .databaseConfig() - .ddlRetryDelayMillis())); - CreateIndexDBTaskBuilder taskBuilder = - CreateIndexDBTask.builder(schemaObject) + CreateIndexDBTask.builder(commandContext.schemaObject()) .withIfNotExists( getOrDefault( command.options(), CreateVectorIndexCommand.CreateVectorIndexCommandOptions::ifNotExists, TableDescDefaults.CreateVectorIndexOptionsDefaults.IF_NOT_EXISTS)) - .withSchemaRetryPolicy(schemaRetryPolicy); + .withSchemaRetryPolicy( + new SchemaDBTask.SchemaRetryPolicy( + commandContext + .config() + .get(OperationsConfig.class) + .databaseConfig() + .ddlRetries(), + Duration.ofMillis( + commandContext + .config() + .get(OperationsConfig.class) + .databaseConfig() + .ddlRetryDelayMillis()))); // this will throw APIException if the index is not supported var apiIndex = - ApiVectorIndex.FROM_DESC_FACTORY.create(schemaObject, indexName, command.definition()); + ApiVectorIndex.FROM_DESC_FACTORY.create( + commandContext.schemaObject(), indexName, command.definition()); taskBuilder.withExceptionHandlerFactory( DefaultDriverExceptionHandler.Factory.withIdentifier( CreateIndexExceptionHandler::new, apiIndex.indexName())); - var createIndexTask = taskBuilder.build(apiIndex); - - // Records a named profile (name plus the options it expanded to) in the table extensions, run - // as a second DDL after the index so a failed create leaves no orphan record. Null when there - // is nothing to persist or the index already exists. - var extensionTask = - buildProfileExtensionTask(schemaObject, apiIndex, command, schemaRetryPolicy); - if (extensionTask == null) { - return new TaskOperation<>( - new TaskGroup<>(createIndexTask), - SchemaDBTaskPage.accumulator(CreateIndexDBTask.class, commandContext)); - } - - // sequential so the extension write only runs if the index was created - TaskGroup, TableSchemaObject> taskGroup = new TaskGroup<>(true); - taskGroup.add(createIndexTask); - taskGroup.add(extensionTask); - - @SuppressWarnings("unchecked") - Class> taskClass = - (Class>) (Class) SchemaDBTask.class; - return new TaskOperation<>(taskGroup, SchemaDBTaskPage.accumulator(taskClass, commandContext)); - } - - /** - * Builds the ALTER TABLE task that records this index's profile in the table extensions. - * - *

Returns null when the index already exists (a {@code CREATE ... IF NOT EXISTS} is a no-op, - * so its stored profile must keep matching the live index), or when no profile is used and there - * is no stale entry to clear. The stored options are what the profile expanded to; existing - * vectorize config and other profiles are read back and rewritten so they are not lost. - */ - private AlterTableDBTask buildProfileExtensionTask( - TableSchemaObject schemaObject, - ApiVectorIndex apiIndex, - CreateVectorIndexCommand command, - SchemaDBTask.SchemaRetryPolicy schemaRetryPolicy) { - - // Index already exists: the IF NOT EXISTS create is a no-op, so leave its stored profile alone. - if (schemaObject.tableMetadata().getIndexes().containsKey(apiIndex.indexName())) { - return null; - } - - var options = command.definition().options(); - var vectorIndexing = (options == null) ? null : options.vectorIndexing(); - // Only a named profile is recorded; bare options carry no name to store. - var profileName = (vectorIndexing == null) ? null : vectorIndexing.profile(); - - var indexKey = cqlIdentifierToJsonKey(apiIndex.indexName()); - var profiles = VectorIndexProfileDefinition.from(schemaObject.tableMetadata(), objectMapper); - - // Snapshot the options the profile expanded to, so the stored metadata matches the live index. - var def = - (profileName == null) - ? null - : new VectorIndexProfileDefinition(profileName, apiIndex.appliedTuningOptions()); - - if (!VectorIndexProfileDefinition.putOrRemove(profiles, indexKey, def)) { - return null; - } - - var customProperties = - TableExtensions.createCustomProperties( - schemaObject.apiTableDef().allColumns().getVectorizeDefs(), profiles, objectMapper); + var taskGroup = new TaskGroup<>(taskBuilder.build(apiIndex)); - return AlterTableDBTask.builder(schemaObject) - .withRetryPolicy(schemaRetryPolicy) - .withExceptionHandlerFactory( - DefaultDriverExceptionHandler.Factory.withIdentifier( - AlterTableExceptionHandler::new, schemaObject.tableName())) - .buildUpdateExtensions(customProperties); + return new TaskOperation<>( + taskGroup, SchemaDBTaskPage.accumulator(CreateIndexDBTask.class, commandContext)); } } diff --git a/src/main/java/io/stargate/sgv2/jsonapi/service/resolver/DropIndexCommandResolver.java b/src/main/java/io/stargate/sgv2/jsonapi/service/resolver/DropIndexCommandResolver.java index 27d7dbca3c..cf6ff8e511 100644 --- a/src/main/java/io/stargate/sgv2/jsonapi/service/resolver/DropIndexCommandResolver.java +++ b/src/main/java/io/stargate/sgv2/jsonapi/service/resolver/DropIndexCommandResolver.java @@ -2,26 +2,20 @@ import static io.stargate.sgv2.jsonapi.util.CqlIdentifierUtil.cqlIdentifierFromUserInput; -import com.datastax.oss.driver.api.core.CqlIdentifier; -import com.fasterxml.jackson.databind.ObjectMapper; import io.stargate.sgv2.jsonapi.api.model.command.CommandContext; import io.stargate.sgv2.jsonapi.api.model.command.impl.DropIndexCommand; import io.stargate.sgv2.jsonapi.config.OperationsConfig; import io.stargate.sgv2.jsonapi.service.cqldriver.executor.DefaultDriverExceptionHandler; -import io.stargate.sgv2.jsonapi.service.cqldriver.executor.TableExtensions; import io.stargate.sgv2.jsonapi.service.operation.Operation; import io.stargate.sgv2.jsonapi.service.operation.SchemaDBTask; import io.stargate.sgv2.jsonapi.service.operation.SchemaDBTaskPage; -import io.stargate.sgv2.jsonapi.service.operation.keyspaces.KeyspaceDriverExceptionHandler; import io.stargate.sgv2.jsonapi.service.operation.tables.DropIndexDBTask; import io.stargate.sgv2.jsonapi.service.operation.tables.DropIndexExceptionHandler; -import io.stargate.sgv2.jsonapi.service.operation.tables.DropVectorIndexProfileDBTask; import io.stargate.sgv2.jsonapi.service.operation.tasks.TaskGroup; import io.stargate.sgv2.jsonapi.service.operation.tasks.TaskOperation; import io.stargate.sgv2.jsonapi.service.schema.KeyspaceSchemaObject; import io.stargate.sgv2.jsonapi.util.ApiOptionUtils; import jakarta.enterprise.context.ApplicationScoped; -import jakarta.inject.Inject; import java.time.Duration; /** Resolver for the {@link DropIndexCommand}. */ @@ -30,8 +24,6 @@ public class DropIndexCommandResolver implements CommandResolver getCommandClass() { return DropIndexCommand.class; @@ -41,77 +33,39 @@ public Class getCommandClass() { public Operation resolveKeyspaceCommand( CommandContext commandContext, DropIndexCommand command) { - var schemaObject = commandContext.schemaObject(); var indexName = cqlIdentifierFromUserInput(command.name()); // Check if the index exists, we check if columns exist before trying to drop them so do for // indexes as well - var schemaRetryPolicy = - new SchemaDBTask.SchemaRetryPolicy( - commandContext.config().get(OperationsConfig.class).databaseConfig().ddlRetries(), - Duration.ofMillis( - commandContext - .config() - .get(OperationsConfig.class) - .databaseConfig() - .ddlRetryDelayMillis())); - - var dropIndexTask = - DropIndexDBTask.builder(schemaObject) - .withSchemaRetryPolicy(schemaRetryPolicy) - .withExceptionHandlerFactory( - DefaultDriverExceptionHandler.Factory.withIdentifier( - DropIndexExceptionHandler::new, indexName)) - .withIndexName(indexName) - .withIfExists( - ApiOptionUtils.getOrDefault( - command.options(), DropIndexCommand.Options::ifExists, IF_EXISTS_DEFAULT)) - .build(); - - // Drop the index's vector-index profile from the owning table's extensions so it does not - // outlive the index. Null when there is no profile to remove, leaving only the drop. - var profileCleanupTask = buildProfileCleanupTask(schemaObject, indexName, schemaRetryPolicy); - - if (profileCleanupTask == null) { - return new TaskOperation<>( - new TaskGroup<>(dropIndexTask), - SchemaDBTaskPage.accumulator(DropIndexDBTask.class, commandContext)); - } - - // Sequential so the extension cleanup only runs if the index drop succeeded. - TaskGroup, KeyspaceSchemaObject> taskGroup = - new TaskGroup<>(true); - taskGroup.add(dropIndexTask); - taskGroup.add(profileCleanupTask); - - @SuppressWarnings("unchecked") - Class> taskClass = - (Class>) (Class) SchemaDBTask.class; - return new TaskOperation<>(taskGroup, SchemaDBTaskPage.accumulator(taskClass, commandContext)); - } - - /** - * Task that removes the dropped index's profile from its owning table's extensions. Null when - * there is nothing to clean up: keyspace metadata unknown, no owning table, or no stored profile. - */ - private DropVectorIndexProfileDBTask buildProfileCleanupTask( - KeyspaceSchemaObject schemaObject, - CqlIdentifier indexName, - SchemaDBTask.SchemaRetryPolicy schemaRetryPolicy) { - - return schemaObject - .keyspaceMetadata() - .flatMap( - keyspaceMetadata -> - TableExtensions.removeIndexProfile(keyspaceMetadata, indexName, objectMapper)) - .map( - removal -> - DropVectorIndexProfileDBTask.builder(schemaObject) - .withSchemaRetryPolicy(schemaRetryPolicy) - .withExceptionHandlerFactory(KeyspaceDriverExceptionHandler::new) - .withTableName(removal.tableName()) - .withCustomProperties(removal.customProperties()) - .build()) - .orElse(null); + var taskBuilder = + DropIndexDBTask.builder(commandContext.schemaObject()) + .withSchemaRetryPolicy( + new SchemaDBTask.SchemaRetryPolicy( + commandContext + .config() + .get(OperationsConfig.class) + .databaseConfig() + .ddlRetries(), + Duration.ofMillis( + commandContext + .config() + .get(OperationsConfig.class) + .databaseConfig() + .ddlRetryDelayMillis()))); + + taskBuilder.withExceptionHandlerFactory( + DefaultDriverExceptionHandler.Factory.withIdentifier( + DropIndexExceptionHandler::new, indexName)); + + taskBuilder + .withIndexName(indexName) + .withIfExists( + ApiOptionUtils.getOrDefault( + command.options(), DropIndexCommand.Options::ifExists, IF_EXISTS_DEFAULT)); + + var taskGroup = new TaskGroup<>(taskBuilder.build()); + + return new TaskOperation<>( + taskGroup, SchemaDBTaskPage.accumulator(DropIndexDBTask.class, commandContext)); } } diff --git a/src/main/java/io/stargate/sgv2/jsonapi/service/schema/KeyspaceSchemaObject.java b/src/main/java/io/stargate/sgv2/jsonapi/service/schema/KeyspaceSchemaObject.java index 57f879d3e1..11bdef3f59 100644 --- a/src/main/java/io/stargate/sgv2/jsonapi/service/schema/KeyspaceSchemaObject.java +++ b/src/main/java/io/stargate/sgv2/jsonapi/service/schema/KeyspaceSchemaObject.java @@ -6,7 +6,6 @@ import io.stargate.sgv2.jsonapi.service.cqldriver.executor.IndexUsage; import io.stargate.sgv2.jsonapi.service.cqldriver.executor.VectorConfig; import java.util.Objects; -import java.util.Optional; /** * A Keyspace in the API. @@ -40,17 +39,6 @@ public KeyspaceSchemaObject(Tenant tenant, KeyspaceMetadata keyspaceMetadata) { Objects.requireNonNull(keyspaceMetadata, "keyspaceMetadata must not be null"); } - /** - * The Cassandra metadata for this keyspace. - * - *

Empty for objects built via the {@link #KeyspaceSchemaObject(SchemaObjectIdentifier)} test - * constructor; present for objects built from live schema. Gives access to the keyspace's tables - * and their indexes (e.g. to find the table that owns a named index). - */ - public Optional keyspaceMetadata() { - return Optional.ofNullable(keyspaceMetadata); - } - @Override public VectorConfig vectorConfig() { return VectorConfig.NOT_ENABLED_CONFIG; diff --git a/src/main/java/io/stargate/sgv2/jsonapi/service/schema/tables/ApiVectorIndex.java b/src/main/java/io/stargate/sgv2/jsonapi/service/schema/tables/ApiVectorIndex.java index 82711247da..a69548389a 100644 --- a/src/main/java/io/stargate/sgv2/jsonapi/service/schema/tables/ApiVectorIndex.java +++ b/src/main/java/io/stargate/sgv2/jsonapi/service/schema/tables/ApiVectorIndex.java @@ -82,10 +82,11 @@ public VectorIndexDefinitionDesc definition() { /** * Builds the {@code vectorIndexing} description from the CQL index options map, keeping only the - * supported tuning options (see {@link VectorConstants.CQLAnnIndex#ALLOWED_OPTIONS}) under {@code - * options}. Structural, dedicated-field, and CQL-only keys are dropped to stay symmetric with - * what the API accepts. The profile name lives in the table extensions, not here, so only {@code - * options} is set. + * supported tuning options (see {@link VectorConstants.CQLAnnIndex#ALLOWED_OPTIONS}). When those + * options exactly match a known profile the profile name is echoed; otherwise the raw options + * are. Structural, dedicated-field, and CQL-only keys are dropped to stay symmetric with what the + * API accepts. The profile is not stored, so it is detected from the options (see {@link + * VectorIndexProfiles#detect(Map)}). * * @return the {@code vectorIndexing} description, or null when there are no supported tuning * options @@ -94,18 +95,15 @@ public VectorIndexDefinitionDesc definition() { static VectorIndexDefinitionDesc.VectorIndexingDesc describeIndexingOptions( Map indexOptions) { var tuning = tuningOptions(indexOptions); - return tuning.isEmpty() - ? null - : VectorIndexDefinitionDesc.VectorIndexingDesc.ofOptions(new LinkedHashMap<>(tuning)); - } - - /** - * The supported SAI tuning options applied to this index (profile expansion plus explicit - * overrides). Snapshotted next to a stored profile name to capture the live index rather than the - * base profile. - */ - public Map appliedTuningOptions() { - return tuningOptions(indexOptions); + if (tuning.isEmpty()) { + return null; + } + return VectorIndexProfiles.detect(tuning) + .map(VectorIndexDefinitionDesc.VectorIndexingDesc::ofProfile) + .orElseGet( + () -> + VectorIndexDefinitionDesc.VectorIndexingDesc.ofOptions( + new LinkedHashMap<>(tuning))); } /** Keeps only the {@link VectorConstants.CQLAnnIndex#ALLOWED_OPTIONS} from a CQL options map. */ diff --git a/src/main/java/io/stargate/sgv2/jsonapi/service/schema/tables/VectorIndexProfiles.java b/src/main/java/io/stargate/sgv2/jsonapi/service/schema/tables/VectorIndexProfiles.java index 978ea95228..4b8a373fc3 100644 --- a/src/main/java/io/stargate/sgv2/jsonapi/service/schema/tables/VectorIndexProfiles.java +++ b/src/main/java/io/stargate/sgv2/jsonapi/service/schema/tables/VectorIndexProfiles.java @@ -45,4 +45,22 @@ public static Optional> forName(String name) { public static Set knownNames() { return PROFILES.keySet(); } + + /** + * Reverse lookup: the profile whose expanded options exactly match {@code options}, used on + * read-back to label an index that was created from a known profile. Exact match only, so an + * index whose options differ from, or are a superset of, a profile reports its raw options + * instead. The stored options are not persisted, so this is a best-effort reconstruction. + * + * @return the matching profile name, or empty if {@code options} is null/empty or matches none + */ + public static Optional detect(Map options) { + if (options == null || options.isEmpty()) { + return Optional.empty(); + } + return PROFILES.entrySet().stream() + .filter(entry -> entry.getValue().equals(options)) + .map(Map.Entry::getKey) + .findFirst(); + } } diff --git a/src/test/java/io/stargate/sgv2/jsonapi/api/v1/tables/VectorIndexProfilePersistenceIntegrationTest.java b/src/test/java/io/stargate/sgv2/jsonapi/api/v1/tables/VectorIndexProfilePersistenceIntegrationTest.java deleted file mode 100644 index 94f6678bdc..0000000000 --- a/src/test/java/io/stargate/sgv2/jsonapi/api/v1/tables/VectorIndexProfilePersistenceIntegrationTest.java +++ /dev/null @@ -1,177 +0,0 @@ -package io.stargate.sgv2.jsonapi.api.v1.tables; - -import static io.stargate.sgv2.jsonapi.api.v1.util.DataApiCommandSenders.assertNamespaceCommand; -import static io.stargate.sgv2.jsonapi.api.v1.util.DataApiCommandSenders.assertTableCommand; -import static org.assertj.core.api.Assertions.assertThat; -import static org.junit.jupiter.api.Assumptions.assumeTrue; - -import com.datastax.oss.driver.api.core.CqlIdentifier; -import com.datastax.oss.driver.api.core.CqlSession; -import com.datastax.oss.driver.api.core.cql.SimpleStatement; -import com.datastax.oss.driver.api.core.metadata.schema.KeyspaceMetadata; -import com.datastax.oss.driver.api.core.metadata.schema.TableMetadata; -import com.datastax.oss.driver.api.core.servererrors.QueryValidationException; -import com.fasterxml.jackson.databind.ObjectMapper; -import io.quarkus.test.common.WithTestResource; -import io.quarkus.test.junit.QuarkusIntegrationTest; -import io.stargate.sgv2.jsonapi.api.v1.AbstractKeyspaceIntegrationTestBase; -import io.stargate.sgv2.jsonapi.config.constants.SchemaConstants; -import io.stargate.sgv2.jsonapi.service.cqldriver.executor.VectorIndexProfileDefinition; -import io.stargate.sgv2.jsonapi.testresource.DseTestResource; -import java.lang.reflect.Method; -import java.util.Map; -import org.apache.commons.lang3.RandomStringUtils; -import org.junit.jupiter.api.BeforeAll; -import org.junit.jupiter.api.Test; - -/** - * Happy-path IT for {@code vectorIndexing} profiles (#2487): creating a vector index with a named - * profile records the profile in the table extensions ({@link - * SchemaConstants.MetadataFieldsNames#VECTOR_INDEX_PROFILES}), and dropping the index removes it. - * - *

Creating the index emits custom SAI HNSW params (the profile's expanded options, e.g. {@code - * maximum_node_connections}), which a cluster only accepts with {@code - * SAI_HNSW_ALLOW_CUSTOM_PARAMETERS} enabled. The default {@code dse-server:6.9.21} lane rejects - * them, so a capability probe runs in {@link #setup()} and the test is skipped where unsupported. - * - *

This complements the API-validation cases in {@link CreateTableIndexIntegrationTest}, which - * are backend-agnostic; the create/persist and drop/cleanup DB paths only run where the cluster - * allows custom params. - */ -@QuarkusIntegrationTest -@WithTestResource(value = DseTestResource.class) -class VectorIndexProfilePersistenceIntegrationTest extends AbstractTableIntegrationTestBase { - - private static final ObjectMapper MAPPER = new ObjectMapper(); - - private static final String PROFILE = "small-high-recall"; - // What "small-high-recall" expands to (VectorIndexProfiles registry), stored as the snapshot. - private static final Map EXPECTED_OPTIONS = - Map.of("maximum_node_connections", "32", "construction_beam_width", "200"); - - private final String tableName = - "vix_profile_" + RandomStringUtils.insecure().nextAlphanumeric(8).toLowerCase(); - private final String vectorColumn = "embedding"; - - /** Whether the backing cluster accepts custom SAI HNSW params (probed in {@link #setup()}). */ - private boolean customParamsSupported; - - /** Reflective accessor to the base class's private admin CqlSession, to read schema metadata. */ - private CqlSession session() { - try { - Method m = AbstractKeyspaceIntegrationTestBase.class.getDeclaredMethod("createDriverSession"); - m.setAccessible(true); - return (CqlSession) m.invoke(this); - } catch (ReflectiveOperationException e) { - throw new RuntimeException("Could not obtain CqlSession from base class", e); - } - } - - @BeforeAll - void setup() { - assertNamespaceCommand(keyspaceName) - .templated() - .createTable( - tableName, - Map.ofEntries( - Map.entry("id", Map.of("type", "text")), - Map.entry(vectorColumn, Map.of("type", "vector", "dimension", 4))), - "id") - .wasSuccessful(); - - customParamsSupported = probeCustomSaiParamsSupported(); - } - - @Test - void profilePersistedOnCreateAndRemovedOnDrop() { - assumeTrue( - customParamsSupported, - "cluster does not allow custom SAI HNSW params (SAI_HNSW_ALLOW_CUSTOM_PARAMETERS)"); - - String indexName = tableName + "_idx"; - - assertTableCommand(keyspaceName, tableName) - .postCreateVectorIndex( - """ - { - "name": "%s", - "definition": { - "column": "%s", - "options": { "vectorIndexing": "%s" } - } - } - """ - .formatted(indexName, vectorColumn, PROFILE)) - .wasSuccessful(); - - var afterCreate = readProfiles(); - assertThat(afterCreate).as("profile recorded after create").containsKey(indexName); - assertThat(afterCreate.get(indexName).profile()).isEqualTo(PROFILE); - assertThat(afterCreate.get(indexName).options()) - .as("stored snapshot is the options the profile expanded to") - .isEqualTo(EXPECTED_OPTIONS); - - assertNamespaceCommand(keyspaceName).templated().dropIndex(indexName, false).wasSuccessful(); - - assertThat(readProfiles()).as("profile removed after drop").doesNotContainKey(indexName); - } - - /** - * Probes whether the cluster accepts custom SAI HNSW params by issuing raw CQL on a throwaway - * table: a CREATE CUSTOM INDEX with a known tuning option. Returns false when the cluster rejects - * it ({@link QueryValidationException}); the throwaway table is always dropped. Connection or - * other errors propagate so a broken environment fails loudly rather than silently skipping. - */ - private boolean probeCustomSaiParamsSupported() { - String probeTable = - "vix_probe_" + RandomStringUtils.insecure().nextAlphanumeric(8).toLowerCase(); - session() - .execute( - SimpleStatement.newInstance( - String.format( - "CREATE TABLE IF NOT EXISTS \"%s\".\"%s\" " - + "(id text PRIMARY KEY, %s vector)", - keyspaceName, probeTable, vectorColumn))); - try { - session() - .execute( - SimpleStatement.newInstance( - String.format( - "CREATE CUSTOM INDEX \"%s_idx\" ON \"%s\".\"%s\" (%s) " - + "USING 'StorageAttachedIndex' " - + "WITH OPTIONS = {'similarity_function':'cosine'," - + "'maximum_node_connections':'16'}", - probeTable, keyspaceName, probeTable, vectorColumn))); - return true; - } catch (QueryValidationException e) { - return false; - } finally { - session() - .execute( - SimpleStatement.newInstance( - String.format("DROP TABLE IF EXISTS \"%s\".\"%s\"", keyspaceName, probeTable))); - } - } - - /** Reads the VECTOR_INDEX_PROFILES extension off the table after refreshing schema metadata. */ - private Map readProfiles() { - try { - session().refreshSchemaAsync().toCompletableFuture().get(); - } catch (InterruptedException e) { - Thread.currentThread().interrupt(); - throw new RuntimeException("schema refresh interrupted", e); - } catch (Exception e) { - throw new RuntimeException("schema refresh failed", e); - } - KeyspaceMetadata keyspace = - session() - .getMetadata() - .getKeyspace(CqlIdentifier.fromInternal(keyspaceName)) - .orElseThrow(() -> new RuntimeException("keyspace not found: " + keyspaceName)); - TableMetadata table = - keyspace - .getTable(CqlIdentifier.fromInternal(tableName)) - .orElseThrow(() -> new RuntimeException("table not found: " + tableName)); - return VectorIndexProfileDefinition.from(table, MAPPER); - } -} diff --git a/src/test/java/io/stargate/sgv2/jsonapi/service/cqldriver/executor/TableExtensionsTest.java b/src/test/java/io/stargate/sgv2/jsonapi/service/cqldriver/executor/TableExtensionsTest.java deleted file mode 100644 index c1ec67003b..0000000000 --- a/src/test/java/io/stargate/sgv2/jsonapi/service/cqldriver/executor/TableExtensionsTest.java +++ /dev/null @@ -1,199 +0,0 @@ -package io.stargate.sgv2.jsonapi.service.cqldriver.executor; - -import static org.assertj.core.api.Assertions.assertThat; -import static org.mockito.Mockito.mock; -import static org.mockito.Mockito.when; - -import com.datastax.oss.driver.api.core.CqlIdentifier; -import com.datastax.oss.driver.api.core.metadata.schema.IndexMetadata; -import com.datastax.oss.driver.api.core.metadata.schema.KeyspaceMetadata; -import com.datastax.oss.driver.api.core.metadata.schema.TableMetadata; -import com.fasterxml.jackson.databind.ObjectMapper; -import io.stargate.sgv2.jsonapi.config.constants.SchemaConstants; -import java.nio.ByteBuffer; -import java.nio.charset.StandardCharsets; -import java.util.HashMap; -import java.util.Map; -import java.util.Set; -import org.junit.jupiter.api.Nested; -import org.junit.jupiter.api.Test; - -class TableExtensionsTest { - - private static final ObjectMapper MAPPER = new ObjectMapper(); - - @Test - void schemaTypeAndVersionAlwaysPresent() { - var props = TableExtensions.createCustomProperties(Map.of(), Map.of(), MAPPER); - - assertThat(props) - .containsEntry( - SchemaConstants.MetadataFieldsNames.SCHEMA_TYPE, - SchemaConstants.MetadataFieldsValues.SCHEMA_TYPE_TABLE_VALUE) - .containsEntry( - SchemaConstants.MetadataFieldsNames.SCHEMA_VERSION, - SchemaConstants.MetadataFieldsValues.SCHEMA_VERSION_VERSION) - .doesNotContainKey(SchemaConstants.MetadataFieldsNames.VECTORIZE_CONFIG) - .doesNotContainKey(SchemaConstants.MetadataFieldsNames.VECTOR_INDEX_PROFILES); - } - - @Test - void writesIndexProfilesWhenPresent() { - var profiles = - Map.of( - "my_idx", - new VectorIndexProfileDefinition( - "small-high-recall", Map.of("maximum_node_connections", "32"))); - - var props = TableExtensions.createCustomProperties(Map.of(), profiles, MAPPER); - - assertThat(props).containsKey(SchemaConstants.MetadataFieldsNames.VECTOR_INDEX_PROFILES); - // written value round-trips to the same profiles - assertThat( - VectorIndexProfileDefinition.fromJson( - props.get(SchemaConstants.MetadataFieldsNames.VECTOR_INDEX_PROFILES), MAPPER)) - .isEqualTo(profiles); - } - - @Test - void preservesVectorizeAndProfilesTogether() { - // both keys written in one payload, so a rewrite carrying both loses neither - var vectorDefs = - Map.of( - CqlIdentifier.fromInternal("v"), - new VectorizeDefinition("openai", "text-embedding-3-small", null, null)); - var profiles = - Map.of( - "v_idx", - new VectorIndexProfileDefinition( - "big-low-latency", Map.of("maximum_node_connections", "16"))); - - var props = TableExtensions.createCustomProperties(vectorDefs, profiles, MAPPER); - - assertThat(props) - .containsKey(SchemaConstants.MetadataFieldsNames.VECTORIZE_CONFIG) - .containsKey(SchemaConstants.MetadataFieldsNames.VECTOR_INDEX_PROFILES); - } - - @Test - void twoArgOverloadOmitsProfiles() { - var props = TableExtensions.createCustomProperties(Map.of(), MAPPER); - - assertThat(props).doesNotContainKey(SchemaConstants.MetadataFieldsNames.VECTOR_INDEX_PROFILES); - } - - @Nested - class RemoveIndexProfile { - - private static final CqlIdentifier MY_IDX = CqlIdentifier.fromInternal("my_idx"); - - @Test - void emptyWhenNoTableOwnsTheIndex() { - // the only table in the keyspace carries a different index - var keyspace = - keyspace( - table( - "other_table", - Set.of(CqlIdentifier.fromInternal("some_other_idx")), - Map.of( - SchemaConstants.MetadataFieldsNames.VECTOR_INDEX_PROFILES, - profilesJson("some_other_idx")))); - - assertThat(TableExtensions.removeIndexProfile(keyspace, MY_IDX, MAPPER)).isEmpty(); - } - - @Test - void emptyWhenOwningTableHasNoProfileForTheIndex() { - // the owning table has a profiles blob, but not for the index being dropped - var keyspace = - keyspace( - table( - "my_table", - Set.of(MY_IDX), - Map.of( - SchemaConstants.MetadataFieldsNames.VECTOR_INDEX_PROFILES, - profilesJson("unrelated_idx")))); - - assertThat(TableExtensions.removeIndexProfile(keyspace, MY_IDX, MAPPER)).isEmpty(); - } - - @Test - void removesProfileAndPreservesOtherProfilesAndVectorize() { - var keyspace = - keyspace( - table( - "my_table", - Set.of(MY_IDX), - Map.of( - SchemaConstants.MetadataFieldsNames.VECTOR_INDEX_PROFILES, - profilesJson("my_idx", "kept_idx"), - SchemaConstants.MetadataFieldsNames.VECTORIZE_CONFIG, - "{\"v\":{\"provider\":\"openai\",\"modelName\":\"text-embedding-3-small\"}}"))); - - var removal = TableExtensions.removeIndexProfile(keyspace, MY_IDX, MAPPER); - - assertThat(removal).isPresent(); - assertThat(removal.get().tableName()).isEqualTo(CqlIdentifier.fromInternal("my_table")); - - var customProperties = removal.get().customProperties(); - // schema type/version always written - assertThat(customProperties) - .containsKey(SchemaConstants.MetadataFieldsNames.SCHEMA_TYPE) - .containsKey(SchemaConstants.MetadataFieldsNames.SCHEMA_VERSION) - // vectorize config read back and preserved - .containsKey(SchemaConstants.MetadataFieldsNames.VECTORIZE_CONFIG); - - // the dropped index's profile is gone, the other index's profile is kept - var profiles = - VectorIndexProfileDefinition.fromJson( - customProperties.get(SchemaConstants.MetadataFieldsNames.VECTOR_INDEX_PROFILES), - MAPPER); - assertThat(profiles).containsOnlyKeys("kept_idx"); - } - - /** Builds a {@code {index: {profile, options}}} blob for the given index keys. */ - private static String profilesJson(String... indexKeys) { - var profiles = new HashMap(); - for (var key : indexKeys) { - profiles.put(key, new VectorIndexProfileDefinition("small-high-recall", Map.of())); - } - try { - return MAPPER.writeValueAsString(profiles); - } catch (Exception e) { - throw new RuntimeException(e); - } - } - - private static KeyspaceMetadata keyspace(TableMetadata... tables) { - var keyspaceMetadata = mock(KeyspaceMetadata.class); - Map tableMap = new HashMap<>(); - for (var table : tables) { - tableMap.put(table.getName(), table); - } - when(keyspaceMetadata.getTables()).thenReturn(tableMap); - return keyspaceMetadata; - } - - private static TableMetadata table( - String name, Set indexNames, Map extensions) { - var tableMetadata = mock(TableMetadata.class); - when(tableMetadata.getName()).thenReturn(CqlIdentifier.fromInternal(name)); - - Map indexes = new HashMap<>(); - for (var indexName : indexNames) { - indexes.put(indexName, mock(IndexMetadata.class)); - } - when(tableMetadata.getIndexes()).thenReturn(indexes); - - Map extensionBuffers = new HashMap<>(); - extensions.forEach( - (key, value) -> - extensionBuffers.put(key, ByteBuffer.wrap(value.getBytes(StandardCharsets.UTF_8)))); - Map options = new HashMap<>(); - options.put(TableExtensions.TABLE_OPTIONS_EXTENSION_KEY, extensionBuffers); - when(tableMetadata.getOptions()).thenReturn(options); - - return tableMetadata; - } - } -} diff --git a/src/test/java/io/stargate/sgv2/jsonapi/service/cqldriver/executor/VectorIndexProfileDefinitionTest.java b/src/test/java/io/stargate/sgv2/jsonapi/service/cqldriver/executor/VectorIndexProfileDefinitionTest.java deleted file mode 100644 index 85782c9c2f..0000000000 --- a/src/test/java/io/stargate/sgv2/jsonapi/service/cqldriver/executor/VectorIndexProfileDefinitionTest.java +++ /dev/null @@ -1,96 +0,0 @@ -package io.stargate.sgv2.jsonapi.service.cqldriver.executor; - -import static org.assertj.core.api.Assertions.assertThat; - -import com.fasterxml.jackson.databind.ObjectMapper; -import java.util.HashMap; -import java.util.Map; -import org.junit.jupiter.api.Nested; -import org.junit.jupiter.api.Test; - -class VectorIndexProfileDefinitionTest { - - private static final ObjectMapper MAPPER = new ObjectMapper(); - - @Nested - class FromJson { - - @Test - void nullOrBlankIsEmpty() { - assertThat(VectorIndexProfileDefinition.fromJson(null, MAPPER)).isEmpty(); - assertThat(VectorIndexProfileDefinition.fromJson(" ", MAPPER)).isEmpty(); - } - - @Test - void parsesNameAndOptions() { - var json = - "{\"my_idx\":{\"profile\":\"small-high-recall\"," - + "\"options\":{\"maximum_node_connections\":\"32\"}}}"; - - var defs = VectorIndexProfileDefinition.fromJson(json, MAPPER); - - assertThat(defs).containsOnlyKeys("my_idx"); - assertThat(defs.get("my_idx").profile()).isEqualTo("small-high-recall"); - assertThat(defs.get("my_idx").options()).containsEntry("maximum_node_connections", "32"); - } - - @Test - void malformedJsonIsEmpty() { - // advisory metadata: bad JSON must not fail the read - assertThat(VectorIndexProfileDefinition.fromJson("not json", MAPPER)).isEmpty(); - } - - @Test - void roundTripThroughObjectMapper() throws Exception { - Map original = new HashMap<>(); - original.put( - "idx", - new VectorIndexProfileDefinition( - "big-low-latency", Map.of("maximum_node_connections", "16"))); - - var json = MAPPER.writeValueAsString(original); - - assertThat(VectorIndexProfileDefinition.fromJson(json, MAPPER)).isEqualTo(original); - } - } - - @Nested - class PutOrRemove { - - @Test - void putNewReturnsChanged() { - var profiles = new HashMap(); - var def = new VectorIndexProfileDefinition("p", Map.of("a", "1")); - - assertThat(VectorIndexProfileDefinition.putOrRemove(profiles, "idx", def)).isTrue(); - assertThat(profiles).containsEntry("idx", def); - } - - @Test - void putIdenticalReturnsUnchanged() { - var profiles = new HashMap(); - profiles.put("idx", new VectorIndexProfileDefinition("p", Map.of("a", "1"))); - - assertThat( - VectorIndexProfileDefinition.putOrRemove( - profiles, "idx", new VectorIndexProfileDefinition("p", Map.of("a", "1")))) - .isFalse(); - } - - @Test - void removeExistingReturnsChanged() { - var profiles = new HashMap(); - profiles.put("idx", new VectorIndexProfileDefinition("p", Map.of())); - - assertThat(VectorIndexProfileDefinition.putOrRemove(profiles, "idx", null)).isTrue(); - assertThat(profiles).doesNotContainKey("idx"); - } - - @Test - void removeMissingReturnsUnchanged() { - var profiles = new HashMap(); - - assertThat(VectorIndexProfileDefinition.putOrRemove(profiles, "idx", null)).isFalse(); - } - } -} diff --git a/src/test/java/io/stargate/sgv2/jsonapi/service/operation/tables/DropVectorIndexProfileDBTaskTest.java b/src/test/java/io/stargate/sgv2/jsonapi/service/operation/tables/DropVectorIndexProfileDBTaskTest.java deleted file mode 100644 index b336d1152c..0000000000 --- a/src/test/java/io/stargate/sgv2/jsonapi/service/operation/tables/DropVectorIndexProfileDBTaskTest.java +++ /dev/null @@ -1,53 +0,0 @@ -package io.stargate.sgv2.jsonapi.service.operation.tables; - -import static org.assertj.core.api.Assertions.assertThat; -import static org.mockito.Mockito.mock; -import static org.mockito.Mockito.when; - -import com.datastax.oss.driver.api.core.CqlIdentifier; -import com.fasterxml.jackson.databind.ObjectMapper; -import io.stargate.sgv2.jsonapi.service.cqldriver.executor.TableExtensions; -import io.stargate.sgv2.jsonapi.service.cqldriver.executor.VectorIndexProfileDefinition; -import io.stargate.sgv2.jsonapi.service.operation.SchemaDBTask; -import io.stargate.sgv2.jsonapi.service.operation.keyspaces.KeyspaceDriverExceptionHandler; -import io.stargate.sgv2.jsonapi.service.schema.KeyspaceSchemaObject; -import io.stargate.sgv2.jsonapi.service.schema.SchemaObjectIdentifier; -import java.time.Duration; -import java.util.Map; -import org.junit.jupiter.api.Test; - -class DropVectorIndexProfileDBTaskTest { - - private static final ObjectMapper MAPPER = new ObjectMapper(); - - @Test - void buildsAlterTableExtensionsStatementForOwningTable() { - var identifier = mock(SchemaObjectIdentifier.class); - when(identifier.keyspace()).thenReturn(CqlIdentifier.fromInternal("my_ks")); - var schemaObject = mock(KeyspaceSchemaObject.class); - when(schemaObject.identifier()).thenReturn(identifier); - - var customProperties = - TableExtensions.createCustomProperties( - Map.of(), - Map.of("kept_idx", new VectorIndexProfileDefinition("small-high-recall", Map.of())), - MAPPER); - - var task = - DropVectorIndexProfileDBTask.builder(schemaObject) - .withSchemaRetryPolicy(new SchemaDBTask.SchemaRetryPolicy(1, Duration.ofMillis(1))) - .withExceptionHandlerFactory(KeyspaceDriverExceptionHandler::new) - .withTableName(CqlIdentifier.fromInternal("my_table")) - .withCustomProperties(customProperties) - .build(); - - var query = task.buildStatement().getQuery(); - - // ALTER TABLE on the owning table, in the schema object keyspace, updating extensions - assertThat(query) - .contains("ALTER TABLE") - .contains("my_ks") - .contains("my_table") - .contains("extensions"); - } -} diff --git a/src/test/java/io/stargate/sgv2/jsonapi/service/schema/tables/ApiVectorIndexTest.java b/src/test/java/io/stargate/sgv2/jsonapi/service/schema/tables/ApiVectorIndexTest.java index b1d51b5993..80bc9c0e2b 100644 --- a/src/test/java/io/stargate/sgv2/jsonapi/service/schema/tables/ApiVectorIndexTest.java +++ b/src/test/java/io/stargate/sgv2/jsonapi/service/schema/tables/ApiVectorIndexTest.java @@ -278,13 +278,14 @@ void nullWhenNoTuningOptions() { @Test void describesTuningOptionsUnderOptions() { + // options that do not match any profile are echoed verbatim under options var options = new HashMap(); options.put(CQLSAIIndex.Options.CLASS_NAME, CQLSAIIndex.SAI_CLASS_NAME); options.put(CQLSAIIndex.Options.TARGET, "my_vector"); options.put(VectorConstants.CQLAnnIndex.SOURCE_MODEL, "OPENAI_V3_SMALL"); options.put(VectorConstants.CQLAnnIndex.SIMILARITY_FUNCTION, "COSINE"); options.put(VectorConstants.CQLAnnIndex.MAXIMUM_NODE_CONNECTIONS, "32"); - options.put(VectorConstants.CQLAnnIndex.CONSTRUCTION_BEAM_WIDTH, "200"); + options.put(VectorConstants.CQLAnnIndex.CONSTRUCTION_BEAM_WIDTH, "123"); var described = ApiVectorIndex.describeIndexingOptions(options); @@ -293,7 +294,25 @@ void describesTuningOptionsUnderOptions() { assertThat(described.options()) .containsOnly( entry(VectorConstants.CQLAnnIndex.MAXIMUM_NODE_CONNECTIONS, "32"), - entry(VectorConstants.CQLAnnIndex.CONSTRUCTION_BEAM_WIDTH, "200")); + entry(VectorConstants.CQLAnnIndex.CONSTRUCTION_BEAM_WIDTH, "123")); + } + + @Test + void detectsKnownProfileFromOptions() { + // options that exactly match small-high-recall's expansion are echoed as the profile name + var options = new HashMap(); + options.put(CQLSAIIndex.Options.CLASS_NAME, CQLSAIIndex.SAI_CLASS_NAME); + options.put(CQLSAIIndex.Options.TARGET, "my_vector"); + options.put(VectorConstants.CQLAnnIndex.SOURCE_MODEL, "OPENAI_V3_SMALL"); + options.put(VectorConstants.CQLAnnIndex.SIMILARITY_FUNCTION, "COSINE"); + options.put(VectorConstants.CQLAnnIndex.MAXIMUM_NODE_CONNECTIONS, "32"); + options.put(VectorConstants.CQLAnnIndex.CONSTRUCTION_BEAM_WIDTH, "200"); + + var described = ApiVectorIndex.describeIndexingOptions(options); + + assertThat(described).isNotNull(); + assertThat(described.profile()).isEqualTo("small-high-recall"); + assertThat(described.options()).isNull(); } @Test @@ -301,7 +320,8 @@ void omitsNonAllowlistedKeys() { var options = new HashMap(); options.put(CQLSAIIndex.Options.CLASS_NAME, CQLSAIIndex.SAI_CLASS_NAME); options.put(VectorConstants.CQLAnnIndex.SOURCE_MODEL, "OTHER"); - options.put(VectorConstants.CQLAnnIndex.MAXIMUM_NODE_CONNECTIONS, "16"); + // a non-profile value, so the allow-listed key is echoed as raw options + options.put(VectorConstants.CQLAnnIndex.MAXIMUM_NODE_CONNECTIONS, "20"); // a real SAI option the API does not manage (e.g. set directly via CQL), not surfaced options.put("optimize_for", "recall"); @@ -318,18 +338,18 @@ void emptyMapDescribesNull() { } } - /** The persisted profile snapshot reflects the options actually applied, not the base profile. */ + /** tuningOptions keeps only the allow-listed tuning options, dropping reserved and structural. */ @Nested - class TuningOptionsSnapshot { + class TuningOptionsFilter { @Test - void keepsAllowlistedAppliedOptionsExcludingReservedAndStructural() { + void keepsAllowlistedOptionsExcludingReservedAndStructural() { var indexOptions = new HashMap(); indexOptions.put(CQLSAIIndex.Options.CLASS_NAME, CQLSAIIndex.SAI_CLASS_NAME); indexOptions.put(CQLSAIIndex.Options.TARGET, "my_vector"); indexOptions.put(VectorConstants.CQLAnnIndex.SOURCE_MODEL, "OPENAI_V3_SMALL"); indexOptions.put(VectorConstants.CQLAnnIndex.SIMILARITY_FUNCTION, "COSINE"); - // small-high-recall's base is 32, but an override applied 99, so the snapshot keeps 99 + // values are kept as-is; the filter only drops keys, it does not interpret them indexOptions.put(VectorConstants.CQLAnnIndex.MAXIMUM_NODE_CONNECTIONS, "99"); indexOptions.put(VectorConstants.CQLAnnIndex.CONSTRUCTION_BEAM_WIDTH, "200"); diff --git a/src/test/java/io/stargate/sgv2/jsonapi/service/schema/tables/VectorIndexProfilesTest.java b/src/test/java/io/stargate/sgv2/jsonapi/service/schema/tables/VectorIndexProfilesTest.java index 188b648897..3b45ac05b6 100644 --- a/src/test/java/io/stargate/sgv2/jsonapi/service/schema/tables/VectorIndexProfilesTest.java +++ b/src/test/java/io/stargate/sgv2/jsonapi/service/schema/tables/VectorIndexProfilesTest.java @@ -3,6 +3,7 @@ import static org.assertj.core.api.Assertions.assertThat; import io.stargate.sgv2.jsonapi.config.constants.VectorConstants; +import java.util.Map; import org.junit.jupiter.api.Nested; import org.junit.jupiter.api.Test; @@ -55,4 +56,39 @@ void noReservedOptions() { } } } + + @Nested + class Detect { + @Test + void exactMatchReturnsProfile() { + var smallHighRecall = VectorIndexProfiles.forName("small-high-recall").orElseThrow(); + assertThat(VectorIndexProfiles.detect(smallHighRecall)).contains("small-high-recall"); + } + + @Test + void noMatchWhenOptionsDiffer() { + assertThat( + VectorIndexProfiles.detect( + Map.of(VectorConstants.CQLAnnIndex.MAXIMUM_NODE_CONNECTIONS, "20"))) + .isEmpty(); + } + + @Test + void noMatchWhenSupersetOfAProfile() { + // a superset of small-high-recall is not an exact match + assertThat( + VectorIndexProfiles.detect( + Map.of( + VectorConstants.CQLAnnIndex.MAXIMUM_NODE_CONNECTIONS, "32", + VectorConstants.CQLAnnIndex.CONSTRUCTION_BEAM_WIDTH, "200", + VectorConstants.CQLAnnIndex.ALPHA, "1.2"))) + .isEmpty(); + } + + @Test + void emptyOrNull() { + assertThat(VectorIndexProfiles.detect(Map.of())).isEmpty(); + assertThat(VectorIndexProfiles.detect(null)).isEmpty(); + } + } } From 3a49602f4f48260ba6a194d1fcbb19444e1c4f14 Mon Sep 17 00:00:00 2001 From: Eric Hare Date: Mon, 22 Jun 2026 14:36:26 -0700 Subject: [PATCH 12/13] fix: validate vectorIndexing option values and address review findings - Reject non-numeric/non-boolean vectorIndexing option values so a quote cannot break out of the CQL WITH OPTIONS literal (the driver renders option values unescaped); every allowed option is numeric or boolean. - Remove VectorIndexUnknownOptionProbeIntegrationTest: an always-green stdout probe that asserted nothing and reflected into a private base method, and was failing CI on a connection-init error during setup. - Declare the vectorIndexing @Schema as oneOf {String, Map} so OpenAPI reflects the real string-or-object wire contract. - Add capability-gated create + listIndexes round-trip ITs (profile-name and raw-options echo); they skip via assumption when the backend lacks SAI_HNSW_ALLOW_CUSTOM_PARAMETERS. - Use JsonUtil.nodeTypeAsString in the deserializer error, drop the inaccurate 'null token' Javadoc, and link the profile stopgaps to #2508. --- .../VectorIndexingDescDeserializer.java | 5 +- .../indexes/VectorIndexDefinitionDesc.java | 3 +- .../config/constants/VectorConstants.java | 3 + .../service/schema/tables/ApiVectorIndex.java | 46 +++++-- .../schema/tables/VectorIndexProfiles.java | 6 +- .../CreateTableIndexIntegrationTest.java | 77 ++++++++++- ...ndexUnknownOptionProbeIntegrationTest.java | 124 ------------------ .../schema/tables/ApiVectorIndexTest.java | 29 +++- 8 files changed, 153 insertions(+), 140 deletions(-) delete mode 100644 src/test/java/io/stargate/sgv2/jsonapi/api/v1/tables/VectorIndexUnknownOptionProbeIntegrationTest.java diff --git a/src/main/java/io/stargate/sgv2/jsonapi/api/model/command/deserializers/VectorIndexingDescDeserializer.java b/src/main/java/io/stargate/sgv2/jsonapi/api/model/command/deserializers/VectorIndexingDescDeserializer.java index 50b76e9101..6865c9a8ba 100644 --- a/src/main/java/io/stargate/sgv2/jsonapi/api/model/command/deserializers/VectorIndexingDescDeserializer.java +++ b/src/main/java/io/stargate/sgv2/jsonapi/api/model/command/deserializers/VectorIndexingDescDeserializer.java @@ -8,6 +8,7 @@ import com.fasterxml.jackson.databind.deser.std.StdDeserializer; import io.stargate.sgv2.jsonapi.api.model.command.table.definition.indexes.VectorIndexDefinitionDesc.VectorIndexingDesc; import io.stargate.sgv2.jsonapi.exception.SchemaException; +import io.stargate.sgv2.jsonapi.util.JsonUtil; import java.io.IOException; import java.util.LinkedHashMap; import java.util.Map; @@ -22,7 +23,7 @@ * "vectorIndexing": { "maximum_node_connections": 32 } * * - *

Anything else (number, boolean, array, null token) is a request error. Per Anything else (number, boolean, array) is a request error. Per #2508 the field is overloaded by JSON * type rather than separate {@code profile} / {@code options} sub-keys, so profile and raw options * are mutually exclusive in one request. @@ -58,7 +59,7 @@ public VectorIndexingDesc deserialize( "reason", "`vectorIndexing` must be either a profile name (string) or an object of indexing " + "options, but was: " - + node.getNodeType().name().toLowerCase() + + JsonUtil.nodeTypeAsString(node) + ".")); } } diff --git a/src/main/java/io/stargate/sgv2/jsonapi/api/model/command/table/definition/indexes/VectorIndexDefinitionDesc.java b/src/main/java/io/stargate/sgv2/jsonapi/api/model/command/table/definition/indexes/VectorIndexDefinitionDesc.java index af18eac40c..3b2aba68e6 100644 --- a/src/main/java/io/stargate/sgv2/jsonapi/api/model/command/table/definition/indexes/VectorIndexDefinitionDesc.java +++ b/src/main/java/io/stargate/sgv2/jsonapi/api/model/command/table/definition/indexes/VectorIndexDefinitionDesc.java @@ -75,7 +75,8 @@ public record VectorIndexDescOptions( + "maximum_node_connections, construction_beam_width, neighborhood_overflow, " + "alpha, enable_hierarchy, e.g. {\"maximum_node_connections\": 32, " + "\"alpha\": 1.2}. A profile and explicit options are mutually exclusive. " - + "Set \"metric\" / \"sourceModel\" via their dedicated fields, not here.") + + "Set \"metric\" / \"sourceModel\" via their dedicated fields, not here.", + oneOf = {String.class, Map.class}) @JsonInclude(JsonInclude.Include.NON_NULL) @JsonProperty(VectorConstants.VectorColumn.VECTOR_INDEXING) VectorIndexingDesc vectorIndexing) {} diff --git a/src/main/java/io/stargate/sgv2/jsonapi/config/constants/VectorConstants.java b/src/main/java/io/stargate/sgv2/jsonapi/config/constants/VectorConstants.java index a7d1b19ccc..18e0fea8fc 100644 --- a/src/main/java/io/stargate/sgv2/jsonapi/config/constants/VectorConstants.java +++ b/src/main/java/io/stargate/sgv2/jsonapi/config/constants/VectorConstants.java @@ -44,5 +44,8 @@ interface CQLAnnIndex { NEIGHBORHOOD_OVERFLOW, ALPHA, ENABLE_HIERARCHY); + + /** Allowed options whose value must be a boolean; the rest are numeric. */ + Set BOOLEAN_OPTIONS = Set.of(ENABLE_HIERARCHY); } } diff --git a/src/main/java/io/stargate/sgv2/jsonapi/service/schema/tables/ApiVectorIndex.java b/src/main/java/io/stargate/sgv2/jsonapi/service/schema/tables/ApiVectorIndex.java index a69548389a..f754955fd2 100644 --- a/src/main/java/io/stargate/sgv2/jsonapi/service/schema/tables/ApiVectorIndex.java +++ b/src/main/java/io/stargate/sgv2/jsonapi/service/schema/tables/ApiVectorIndex.java @@ -183,21 +183,51 @@ static void applyIndexingOptions( } } - /** CQL index options are strings; accept scalar JSON values and reject objects, arrays, null. */ + /** + * Validates and renders an option value to the CQL string form. CQL index options are a {@code + * Map} that the driver emits unescaped into {@code WITH OPTIONS = {...}}, so a + * raw string would let a quote break out of the literal; every allowed option is numeric or + * boolean, so the value is coerced to that type and anything else is rejected. + */ private static String optionValueToString(String optionName, Object value) { - if (value == null || value instanceof Map || value instanceof Iterable) { - throw SchemaException.Code.INVALID_VECTOR_INDEXING_OPTIONS.get( - Map.of( - "reason", - "The option '%s' must be a scalar value (string, number, or boolean)." - .formatted(optionName))); + if (VectorConstants.CQLAnnIndex.BOOLEAN_OPTIONS.contains(optionName)) { + return booleanOptionValue(optionName, value); + } + return numericOptionValue(optionName, value); + } + + private static String booleanOptionValue(String optionName, Object value) { + if (value instanceof Boolean bool) { + return bool.toString(); } + if (value instanceof String text + && ("true".equalsIgnoreCase(text) || "false".equalsIgnoreCase(text))) { + return text.toLowerCase(); + } + throw SchemaException.Code.INVALID_VECTOR_INDEXING_OPTIONS.get( + Map.of("reason", "The option '%s' must be true or false.".formatted(optionName))); + } + + private static String numericOptionValue(String optionName, Object value) { // JSON numbers deserialize to BigDecimal; use plain (non-scientific) notation for the CQL // value. if (value instanceof BigDecimal number) { return number.toPlainString(); } - return String.valueOf(value); + if (value instanceof Number number) { + return number.toString(); + } + // A numeric value sent as a JSON string is accepted only if it parses as a number, which also + // rejects any quote/garbage that could break out of the CQL options literal. + if (value instanceof String text) { + try { + return new BigDecimal(text.trim()).toPlainString(); + } catch (NumberFormatException e) { + // fall through to the rejection below + } + } + throw SchemaException.Code.INVALID_VECTOR_INDEXING_OPTIONS.get( + Map.of("reason", "The option '%s' must be a number.".formatted(optionName))); } /** diff --git a/src/main/java/io/stargate/sgv2/jsonapi/service/schema/tables/VectorIndexProfiles.java b/src/main/java/io/stargate/sgv2/jsonapi/service/schema/tables/VectorIndexProfiles.java index 4b8a373fc3..0dffa05cb9 100644 --- a/src/main/java/io/stargate/sgv2/jsonapi/service/schema/tables/VectorIndexProfiles.java +++ b/src/main/java/io/stargate/sgv2/jsonapi/service/schema/tables/VectorIndexProfiles.java @@ -13,7 +13,8 @@ * dedicated {@code sourceModel} / {@code metric} fields. Values are Strings because CQL index * options are a {@code Map}. * - *

Initial in-code set; values to be tuned and moved to config. + *

Initial in-code set; values to be tuned and moved to config (#2508). */ public final class VectorIndexProfiles { @@ -50,7 +51,8 @@ public static Set knownNames() { * Reverse lookup: the profile whose expanded options exactly match {@code options}, used on * read-back to label an index that was created from a known profile. Exact match only, so an * index whose options differ from, or are a superset of, a profile reports its raw options - * instead. The stored options are not persisted, so this is a best-effort reconstruction. + * instead. The stored options are not persisted, so this is a best-effort reconstruction (#2508). * * @return the matching profile name, or empty if {@code options} is null/empty or matches none */ diff --git a/src/test/java/io/stargate/sgv2/jsonapi/api/v1/tables/CreateTableIndexIntegrationTest.java b/src/test/java/io/stargate/sgv2/jsonapi/api/v1/tables/CreateTableIndexIntegrationTest.java index 95bd5e5c65..2321e9ccf2 100644 --- a/src/test/java/io/stargate/sgv2/jsonapi/api/v1/tables/CreateTableIndexIntegrationTest.java +++ b/src/test/java/io/stargate/sgv2/jsonapi/api/v1/tables/CreateTableIndexIntegrationTest.java @@ -2,6 +2,7 @@ import static io.stargate.sgv2.jsonapi.api.v1.util.DataApiCommandSenders.assertNamespaceCommand; import static io.stargate.sgv2.jsonapi.api.v1.util.DataApiCommandSenders.assertTableCommand; +import static org.assertj.core.api.Assertions.assertThat; import io.quarkus.test.common.WithTestResource; import io.quarkus.test.junit.QuarkusIntegrationTest; @@ -10,6 +11,7 @@ import io.stargate.sgv2.jsonapi.exception.SchemaException; import io.stargate.sgv2.jsonapi.testresource.DseTestResource; import jakarta.ws.rs.core.Response; +import java.util.List; import java.util.Map; import java.util.stream.Stream; import org.junit.jupiter.api.*; @@ -49,6 +51,50 @@ private void verifyCreatedVectorIndex(String indexName) { .hasIndex(indexName); } + /** + * Creates a vector index whose {@code vectorIndexing} sets SAI tuning options. Those options need + * a cluster with {@code SAI_HNSW_ALLOW_CUSTOM_PARAMETERS}; where the backend rejects them the + * create returns an error and there is nothing to round-trip, so the test is skipped (assumption) + * rather than failed. When it does apply, the create must succeed. + */ + private void createTunedVectorIndexOrSkip( + String indexName, String column, String vectorIndexingJson) { + var validator = + assertTableCommand(keyspaceName, vectorTableName) + .postCreateVectorIndex( + """ + { + "name": "%s", + "definition": { + "column": "%s", + "options": { "vectorIndexing": %s } + } + } + """ + .formatted(indexName, column, vectorIndexingJson)); + + List errors = validator.response().extract().path("errors"); + Assumptions.assumeTrue( + errors == null || errors.isEmpty(), + () -> + "backend rejected vectorIndexing tuning options (needs SAI_HNSW_ALLOW_CUSTOM_PARAMETERS): " + + errors); + validator.wasSuccessful(); + } + + /** The {@code vectorIndexing} echoed back by listIndexes for the given index (string or map). */ + private Object readBackVectorIndexing(String indexName) { + return assertTableCommand(keyspaceName, vectorTableName) + .templated() + .listIndexes(true) + .wasSuccessful() + .response() + .extract() + .path( + "status.indexes.find { it.name == '%s' }.definition.options.vectorIndexing" + .formatted(indexName)); + } + @BeforeAll public final void createTestTables() { // Create test tables for indexing: first one for "regular" indexes @@ -96,7 +142,9 @@ public final void createTestTables() { Map.entry("vector_type_4", Map.of("type", "vector", "dimension", 1024)), Map.entry("vector_type_5", Map.of("type", "vector", "dimension", 1024)), Map.entry("vector_type_6", Map.of("type", "vector", "dimension", 1024)), - Map.entry("vector_type_7", Map.of("type", "vector", "dimension", 1024))), + Map.entry("vector_type_7", Map.of("type", "vector", "dimension", 1024)), + Map.entry("vector_type_8", Map.of("type", "vector", "dimension", 1024)), + Map.entry("vector_type_9", Map.of("type", "vector", "dimension", 1024))), "id") .wasSuccessful(); @@ -596,6 +644,31 @@ public void createVectorIndexWithCorrectIndexType() { verifyCreatedVectorIndex("vector_type_6_idx"); } + + @Test + public void createVectorIndexWithProfileRoundTrip() { + createTunedVectorIndexOrSkip("vector_type_8_idx", "vector_type_8", "\"small-high-recall\""); + + verifyCreatedVectorIndex("vector_type_8_idx"); + // The profile name is not persisted; read-back detects it from the applied options and + // echoes the name back. + assertThat(readBackVectorIndexing("vector_type_8_idx")).isEqualTo("small-high-recall"); + } + + @Test + @SuppressWarnings("unchecked") + public void createVectorIndexWithRawOptionsRoundTrip() { + createTunedVectorIndexOrSkip( + "vector_type_9_idx", + "vector_type_9", + "{ \"maximum_node_connections\": 24, \"alpha\": 1.5 }"); + + verifyCreatedVectorIndex("vector_type_9_idx"); + // Options that match no profile are echoed back verbatim, as the strings CQL stores. + assertThat((Map) readBackVectorIndexing("vector_type_9_idx")) + .containsEntry("maximum_node_connections", "24") + .containsEntry("alpha", "1.5"); + } } @Nested @@ -1217,7 +1290,7 @@ public void nonScalarOptionValueRejected() { .hasSingleApiError( SchemaException.Code.INVALID_VECTOR_INDEXING_OPTIONS, SchemaException.class, - "The option 'alpha' must be a scalar value"); + "The option 'alpha' must be a number."); } @Test diff --git a/src/test/java/io/stargate/sgv2/jsonapi/api/v1/tables/VectorIndexUnknownOptionProbeIntegrationTest.java b/src/test/java/io/stargate/sgv2/jsonapi/api/v1/tables/VectorIndexUnknownOptionProbeIntegrationTest.java deleted file mode 100644 index da32ed0b8c..0000000000 --- a/src/test/java/io/stargate/sgv2/jsonapi/api/v1/tables/VectorIndexUnknownOptionProbeIntegrationTest.java +++ /dev/null @@ -1,124 +0,0 @@ -package io.stargate.sgv2.jsonapi.api.v1.tables; - -import static org.assertj.core.api.Assertions.assertThat; -import static org.assertj.core.api.Assertions.assertThatCode; - -import com.datastax.oss.driver.api.core.CqlSession; -import com.datastax.oss.driver.api.core.cql.SimpleStatement; -import io.quarkus.test.junit.QuarkusIntegrationTest; -import io.stargate.sgv2.jsonapi.api.v1.AbstractKeyspaceIntegrationTestBase; -import java.lang.reflect.Method; -import org.apache.commons.lang3.RandomStringUtils; -import org.junit.jupiter.api.MethodOrderer; -import org.junit.jupiter.api.Order; -import org.junit.jupiter.api.Test; -import org.junit.jupiter.api.TestMethodOrder; - -/** - * Probe (issue #2487): does the backing DB (DSE 6.9 / HCD) accept an unknown option key in a vector - * SAI index's {@code CREATE CUSTOM INDEX ... WITH OPTIONS}? - * - *

Issues raw CQL via the admin {@link CqlSession} from {@link - * AbstractKeyspaceIntegrationTestBase}, bypassing the {@code ApiVectorIndex.applyIndexingOptions} - * allow-list and the data-api HTTP command layer. - * - *

Hypothesis: an unknown key (here {@code profile}) is rejected by SAI option validation - * regardless of {@code SAI_HNSW_ALLOW_CUSTOM_PARAMETERS}, which only gates known HNSW tuning params - * like {@code maximum_node_connections}. The control index uses only {@code - * similarity_function:cosine} to confirm the table/column/CQL is otherwise valid. - * - *

Always passes, recording observed behavior to stdout. Flip {@code EXPECT_UNKNOWN_KEY_REJECTED} - * to make it a hard assertion once the answer is known. - */ -@QuarkusIntegrationTest -@TestMethodOrder(MethodOrderer.OrderAnnotation.class) -class VectorIndexUnknownOptionProbeIntegrationTest extends AbstractKeyspaceIntegrationTestBase { - - /** Flip to true to turn the probe into a hard assertion that the unknown key is rejected. */ - private static final boolean EXPECT_UNKNOWN_KEY_REJECTED = false; - - private static final String TABLE = - "probe_" + RandomStringUtils.insecure().nextAlphanumeric(12).toLowerCase(); - private static final String VECTOR_COL = "embedding"; - private static final int DIMENSION = 4; - - /** Reflective accessor to the base class's private CqlSession, for direct error inspection. */ - private CqlSession session() { - try { - Method m = AbstractKeyspaceIntegrationTestBase.class.getDeclaredMethod("createDriverSession"); - m.setAccessible(true); - return (CqlSession) m.invoke(this); - } catch (ReflectiveOperationException e) { - throw new RuntimeException("Could not obtain CqlSession from base class", e); - } - } - - @Test - @Order(1) - void createVectorTable() { - // Raw CQL: keyspace already created by AbstractKeyspaceIntegrationTestBase#createKeyspace. - boolean applied = - executeCqlStatement( - String.format( - "CREATE TABLE IF NOT EXISTS \"%s\".\"%s\" " - + "(id text PRIMARY KEY, %s vector)", - keyspaceName, TABLE, VECTOR_COL, DIMENSION)); - assertThat(applied).as("vector table create applied").isTrue(); - } - - @Test - @Order(2) - void controlIndex_knownGoodOptionsOnly_mustSucceed() { - // Control: only a known-good SAI option. Must succeed, so an unknown-key failure in the test - // case is attributable to the unknown key, not the table/column/CQL path. - String cql = - String.format( - "CREATE CUSTOM INDEX \"idx_control_%s\" ON \"%s\".\"%s\" (%s) " - + "USING 'StorageAttachedIndex' WITH OPTIONS = {'similarity_function':'cosine'}", - TABLE, keyspaceName, TABLE, VECTOR_COL); - - assertThatCode(() -> session().execute(SimpleStatement.newInstance(cql))) - .as("CONTROL index with only {'similarity_function':'cosine'} must be accepted") - .doesNotThrowAnyException(); - } - - @Test - @Order(3) - void testIndex_unknownOptionKey_recordAcceptOrReject() { - // Adds an unknown key ('profile'); similarity_function is kept so the only difference vs the - // control is that key. - String cql = - String.format( - "CREATE CUSTOM INDEX \"idx_test_%s\" ON \"%s\".\"%s\" (%s) " - + "USING 'StorageAttachedIndex' " - + "WITH OPTIONS = {'similarity_function':'cosine','profile':'small-high-recall'}", - TABLE, keyspaceName, TABLE, VECTOR_COL); - - Throwable thrown = - org.junit.jupiter.api.Assertions.assertDoesNotThrow( - () -> { - try { - session().execute(SimpleStatement.newInstance(cql)); - return (Throwable) null; - } catch (Throwable t) { - return t; - } - }); - - boolean rejected = thrown != null; - System.out.println("================================================================="); - System.out.println( - "[VECTOR-INDEX-UNKNOWN-OPTION PROBE] unknown key 'profile' rejected=" + rejected); - if (rejected) { - System.out.println("[PROBE] rejection class : " + thrown.getClass().getName()); - System.out.println("[PROBE] rejection message: " + thrown.getMessage()); - } else { - System.out.println("[PROBE] DB SILENTLY ACCEPTED the unknown 'profile' key."); - } - System.out.println("================================================================="); - - if (EXPECT_UNKNOWN_KEY_REJECTED) { - assertThat(rejected).as("DB should reject unknown SAI option key 'profile'").isTrue(); - } - } -} diff --git a/src/test/java/io/stargate/sgv2/jsonapi/service/schema/tables/ApiVectorIndexTest.java b/src/test/java/io/stargate/sgv2/jsonapi/service/schema/tables/ApiVectorIndexTest.java index 80bc9c0e2b..1522a9eb81 100644 --- a/src/test/java/io/stargate/sgv2/jsonapi/service/schema/tables/ApiVectorIndexTest.java +++ b/src/test/java/io/stargate/sgv2/jsonapi/service/schema/tables/ApiVectorIndexTest.java @@ -244,7 +244,7 @@ void numericOptionsUsePlainString() { @Test void nonScalarOptionValueThrows() { - // "alpha" is an allowed key, so this reaches the scalar-value check + // "alpha" is an allowed key, so this reaches the value-type check assertSchemaError( options(Map.of("alpha", List.of(1, 2))), SchemaException.Code.INVALID_VECTOR_INDEXING_OPTIONS); @@ -253,6 +253,33 @@ void nonScalarOptionValueThrows() { SchemaException.Code.INVALID_VECTOR_INDEXING_OPTIONS); } + @Test + void numericOptionRejectsNonNumericString() { + // Driver renders option values unescaped into WITH OPTIONS = {...}; a quote-bearing value + // would break out of the literal, so a numeric option must parse as a number. + assertSchemaError( + options(Map.of("alpha", "1.2'} AND injected={'x':'y")), + SchemaException.Code.INVALID_VECTOR_INDEXING_OPTIONS); + } + + @Test + void booleanOptionAcceptsBooleanOrString() { + var fromBoolean = new HashMap(); + ApiVectorIndex.applyIndexingOptions(fromBoolean, options(Map.of("enable_hierarchy", true))); + assertThat(fromBoolean).containsEntry("enable_hierarchy", "true"); + + var fromString = new HashMap(); + ApiVectorIndex.applyIndexingOptions(fromString, options(Map.of("enable_hierarchy", "TRUE"))); + assertThat(fromString).containsEntry("enable_hierarchy", "true"); + } + + @Test + void booleanOptionRejectsNonBoolean() { + assertSchemaError( + options(Map.of("enable_hierarchy", "yes")), + SchemaException.Code.INVALID_VECTOR_INDEXING_OPTIONS); + } + private void assertSchemaError(VectorIndexingDesc desc, SchemaException.Code code) { var options = new HashMap(); assertThatThrownBy(() -> ApiVectorIndex.applyIndexingOptions(options, desc)) From 1c53e0b548244f086a62b5d3898bcfe9cfbf4ff2 Mon Sep 17 00:00:00 2001 From: Eric Hare Date: Mon, 22 Jun 2026 14:56:24 -0700 Subject: [PATCH 13/13] test: narrow vectorIndexing round-trip skip to the SAI custom-param rejection Per review feedback: the round-trip ITs skipped on any create error, so a regression in deserialization, profile expansion, or option rendering would show as skipped instead of failed. Skip now fires only when the single response error names SAI_HNSW_ALLOW_CUSTOM_PARAMETERS; any other (or no) error is asserted via wasSuccessful(). Also switch the raw-options case from 'alpha' (rejected as 'not understood by StorageAttachedIndex', so never runnable even with the flag) to maximum_node_connections + construction_beam_width, which the backend recognizes and gates behind the flag, making the round-trip meaningful on a flag-enabled cluster. --- .../CreateTableIndexIntegrationTest.java | 36 +++++++++++++------ 1 file changed, 25 insertions(+), 11 deletions(-) diff --git a/src/test/java/io/stargate/sgv2/jsonapi/api/v1/tables/CreateTableIndexIntegrationTest.java b/src/test/java/io/stargate/sgv2/jsonapi/api/v1/tables/CreateTableIndexIntegrationTest.java index 2321e9ccf2..179b041e39 100644 --- a/src/test/java/io/stargate/sgv2/jsonapi/api/v1/tables/CreateTableIndexIntegrationTest.java +++ b/src/test/java/io/stargate/sgv2/jsonapi/api/v1/tables/CreateTableIndexIntegrationTest.java @@ -51,12 +51,19 @@ private void verifyCreatedVectorIndex(String indexName) { .hasIndex(indexName); } + /** + * Database error a cluster returns for custom SAI HNSW params when the feature is not enabled. + */ + private static final String SAI_CUSTOM_PARAMS_DISABLED = "SAI_HNSW_ALLOW_CUSTOM_PARAMETERS"; + /** * Creates a vector index whose {@code vectorIndexing} sets SAI tuning options. Those options need - * a cluster with {@code SAI_HNSW_ALLOW_CUSTOM_PARAMETERS}; where the backend rejects them the - * create returns an error and there is nothing to round-trip, so the test is skipped (assumption) - * rather than failed. When it does apply, the create must succeed. + * a cluster with {@code SAI_HNSW_ALLOW_CUSTOM_PARAMETERS}; only that specific backend rejection + * is tolerated (skipped via assumption), because there is nothing to round-trip there. Any other + * error — request shape, profile expansion, option rendering, or an unrelated server failure — is + * a real regression and fails the test rather than hiding it as a skip. */ + @SuppressWarnings("unchecked") private void createTunedVectorIndexOrSkip( String indexName, String column, String vectorIndexingJson) { var validator = @@ -73,12 +80,16 @@ private void createTunedVectorIndexOrSkip( """ .formatted(indexName, column, vectorIndexingJson)); - List errors = validator.response().extract().path("errors"); - Assumptions.assumeTrue( - errors == null || errors.isEmpty(), - () -> - "backend rejected vectorIndexing tuning options (needs SAI_HNSW_ALLOW_CUSTOM_PARAMETERS): " - + errors); + List> errors = validator.response().extract().path("errors"); + boolean customParamsDisabled = + errors != null + && errors.size() == 1 + && String.valueOf(errors.get(0).get("message")).contains(SAI_CUSTOM_PARAMS_DISABLED); + Assumptions.assumeFalse( + customParamsDisabled, + () -> "skipping round-trip: cluster has not enabled " + SAI_CUSTOM_PARAMS_DISABLED); + + // Not the tolerated rejection: any other (or no) error must be asserted, not skipped. validator.wasSuccessful(); } @@ -658,16 +669,19 @@ public void createVectorIndexWithProfileRoundTrip() { @Test @SuppressWarnings("unchecked") public void createVectorIndexWithRawOptionsRoundTrip() { + // Both keys are HNSW params the backend recognizes (gated by + // SAI_HNSW_ALLOW_CUSTOM_PARAMETERS), + // and the pair matches no profile, so read-back echoes the raw options rather than a name. createTunedVectorIndexOrSkip( "vector_type_9_idx", "vector_type_9", - "{ \"maximum_node_connections\": 24, \"alpha\": 1.5 }"); + "{ \"maximum_node_connections\": 24, \"construction_beam_width\": 150 }"); verifyCreatedVectorIndex("vector_type_9_idx"); // Options that match no profile are echoed back verbatim, as the strings CQL stores. assertThat((Map) readBackVectorIndexing("vector_type_9_idx")) .containsEntry("maximum_node_connections", "24") - .containsEntry("alpha", "1.5"); + .containsEntry("construction_beam_width", "150"); } }