diff --git a/src/main/java/io/stargate/sgv2/jsonapi/api/model/command/deserializers/VectorIndexingDescDeserializer.java b/src/main/java/io/stargate/sgv2/jsonapi/api/model/command/deserializers/VectorIndexingDescDeserializer.java new file mode 100644 index 0000000000..6865c9a8ba --- /dev/null +++ b/src/main/java/io/stargate/sgv2/jsonapi/api/model/command/deserializers/VectorIndexingDescDeserializer.java @@ -0,0 +1,65 @@ +package io.stargate.sgv2.jsonapi.api.model.command.deserializers; + +import com.fasterxml.jackson.core.JsonParser; +import com.fasterxml.jackson.core.type.TypeReference; +import com.fasterxml.jackson.databind.DeserializationContext; +import com.fasterxml.jackson.databind.JsonNode; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.fasterxml.jackson.databind.deser.std.StdDeserializer; +import io.stargate.sgv2.jsonapi.api.model.command.table.definition.indexes.VectorIndexDefinitionDesc.VectorIndexingDesc; +import io.stargate.sgv2.jsonapi.exception.SchemaException; +import io.stargate.sgv2.jsonapi.util.JsonUtil; +import java.io.IOException; +import java.util.LinkedHashMap; +import java.util.Map; + +/** + * Deserializes the overloaded {@code vectorIndexing} value, either: + * + * + * + *

Anything else (number, boolean, array) is a request error. Per #2508 the field is overloaded by JSON + * type rather than separate {@code profile} / {@code options} sub-keys, so profile and raw options + * are mutually exclusive in one request. + */ +public class VectorIndexingDescDeserializer extends StdDeserializer { + + private static final TypeReference> OPTIONS_TYPE = + new TypeReference<>() {}; + + public VectorIndexingDescDeserializer() { + super(VectorIndexingDesc.class); + } + + @Override + public VectorIndexingDesc deserialize( + JsonParser jsonParser, DeserializationContext deserializationContext) throws IOException { + JsonNode node = deserializationContext.readTree(jsonParser); + + if (node.isTextual()) { + // named profile, validated at apply time + return VectorIndexingDesc.ofProfile(node.textValue()); + } + if (node.isObject()) { + // raw SAI options. convertValue applies the mapper config (e.g. float handling), as a + // Map field would + Map options = + ((ObjectMapper) jsonParser.getCodec()).convertValue(node, OPTIONS_TYPE); + return VectorIndexingDesc.ofOptions(options); + } + + throw SchemaException.Code.INVALID_VECTOR_INDEXING_OPTIONS.get( + Map.of( + "reason", + "`vectorIndexing` must be either a profile name (string) or an object of indexing " + + "options, but was: " + + JsonUtil.nodeTypeAsString(node) + + ".")); + } +} diff --git a/src/main/java/io/stargate/sgv2/jsonapi/api/model/command/table/definition/indexes/VectorIndexDefinitionDesc.java b/src/main/java/io/stargate/sgv2/jsonapi/api/model/command/table/definition/indexes/VectorIndexDefinitionDesc.java index 03d355c0a7..3b2aba68e6 100644 --- a/src/main/java/io/stargate/sgv2/jsonapi/api/model/command/table/definition/indexes/VectorIndexDefinitionDesc.java +++ b/src/main/java/io/stargate/sgv2/jsonapi/api/model/command/table/definition/indexes/VectorIndexDefinitionDesc.java @@ -3,6 +3,9 @@ import com.fasterxml.jackson.annotation.JsonInclude; import com.fasterxml.jackson.annotation.JsonProperty; import com.fasterxml.jackson.annotation.JsonPropertyOrder; +import com.fasterxml.jackson.annotation.JsonValue; +import com.fasterxml.jackson.databind.annotation.JsonDeserialize; +import io.stargate.sgv2.jsonapi.api.model.command.deserializers.VectorIndexingDescDeserializer; import io.stargate.sgv2.jsonapi.config.constants.TableDescConstants; import io.stargate.sgv2.jsonapi.config.constants.VectorConstants; import io.stargate.sgv2.jsonapi.config.constants.VectorIndexDescDefaults; @@ -11,6 +14,7 @@ import jakarta.annotation.Nullable; import jakarta.validation.constraints.NotNull; import jakarta.validation.constraints.Pattern; +import java.util.Map; import org.eclipse.microprofile.openapi.annotations.enums.SchemaType; import org.eclipse.microprofile.openapi.annotations.media.Schema; @@ -34,7 +38,8 @@ public record VectorIndexDefinitionDesc( /** Options for the vector index */ @JsonPropertyOrder({ VectorConstants.VectorColumn.METRIC, - VectorConstants.VectorColumn.SOURCE_MODEL + VectorConstants.VectorColumn.SOURCE_MODEL, + VectorConstants.VectorColumn.VECTOR_INDEXING }) public record VectorIndexDescOptions( @Nullable @@ -59,5 +64,46 @@ public record VectorIndexDescOptions( + EmbeddingSourceModel.ApiConstants.ALL) @JsonInclude(JsonInclude.Include.NON_NULL) @JsonProperty(VectorConstants.VectorColumn.SOURCE_MODEL) - String sourceModel) {} + String sourceModel, + // + @Nullable + @Schema( + description = + "Optional vector (SAI) indexing configuration. Either a profile name (string) " + + "the API expands into SAI options, e.g. \"small-high-recall\"; or an object " + + "of Cassandra SAI tuning options (snake_case), restricted to: " + + "maximum_node_connections, construction_beam_width, neighborhood_overflow, " + + "alpha, enable_hierarchy, e.g. {\"maximum_node_connections\": 32, " + + "\"alpha\": 1.2}. A profile and explicit options are mutually exclusive. " + + "Set \"metric\" / \"sourceModel\" via their dedicated fields, not here.", + oneOf = {String.class, Map.class}) + @JsonInclude(JsonInclude.Include.NON_NULL) + @JsonProperty(VectorConstants.VectorColumn.VECTOR_INDEXING) + VectorIndexingDesc vectorIndexing) {} + + /** + * Overloaded {@code vectorIndexing} value: exactly one of a named {@code profile} (JSON string) + * or raw SAI tuning {@code options} (JSON object) is set. {@link VectorIndexingDescDeserializer} + * discriminates by JSON type; {@link #jsonValue()} serializes back to the bare string or object. + */ + @JsonDeserialize(using = VectorIndexingDescDeserializer.class) + public record VectorIndexingDesc( + @Nullable String profile, @Nullable Map options) { + + /** A {@code vectorIndexing} that selects a named profile. */ + public static VectorIndexingDesc ofProfile(String profile) { + return new VectorIndexingDesc(profile, null); + } + + /** A {@code vectorIndexing} that sets raw SAI options directly. */ + public static VectorIndexingDesc ofOptions(Map options) { + return new VectorIndexingDesc(null, options); + } + + /** Serializes to the bare profile string or the bare options map. */ + @JsonValue + Object jsonValue() { + return profile != null ? profile : options; + } + } } diff --git a/src/main/java/io/stargate/sgv2/jsonapi/config/constants/VectorConstants.java b/src/main/java/io/stargate/sgv2/jsonapi/config/constants/VectorConstants.java index 753872a95b..18e0fea8fc 100644 --- a/src/main/java/io/stargate/sgv2/jsonapi/config/constants/VectorConstants.java +++ b/src/main/java/io/stargate/sgv2/jsonapi/config/constants/VectorConstants.java @@ -1,17 +1,51 @@ package io.stargate.sgv2.jsonapi.config.constants; +import java.util.Set; + public interface VectorConstants { interface VectorColumn { String DIMENSION = "dimension"; String METRIC = "metric"; String SOURCE_MODEL = "sourceModel"; String SERVICE = ServiceDescConstants.SERVICE; + String VECTOR_INDEXING = "vectorIndexing"; } interface Vectorize extends ServiceDescConstants {} + /** + * CQL {@code WITH OPTIONS} keys for a vector (SAI) index. {@link #SOURCE_MODEL} and {@link + * #SIMILARITY_FUNCTION} map to dedicated API fields ({@code sourceModel} / {@code metric}); the + * rest are tuning options set via {@code vectorIndexing.options}. + */ interface CQLAnnIndex { String SOURCE_MODEL = "source_model"; String SIMILARITY_FUNCTION = "similarity_function"; + String MAXIMUM_NODE_CONNECTIONS = "maximum_node_connections"; + String CONSTRUCTION_BEAM_WIDTH = "construction_beam_width"; + String NEIGHBORHOOD_OVERFLOW = "neighborhood_overflow"; + String ALPHA = "alpha"; + String ENABLE_HIERARCHY = "enable_hierarchy"; + + /** + * Options with dedicated API fields ({@code metric} / {@code sourceModel}); rejected inside + * {@code vectorIndexing.options}. + */ + Set RESERVED_OPTIONS = Set.of(SOURCE_MODEL, SIMILARITY_FUNCTION); + + /** + * SAI tuning options settable through {@code vectorIndexing.options}. {@code optimize_for} + * exists in OSS Cassandra but is de-emphasized in DSE 6.9 / HCD, so it is left out for now. + */ + Set ALLOWED_OPTIONS = + Set.of( + MAXIMUM_NODE_CONNECTIONS, + CONSTRUCTION_BEAM_WIDTH, + NEIGHBORHOOD_OVERFLOW, + ALPHA, + ENABLE_HIERARCHY); + + /** Allowed options whose value must be a boolean; the rest are numeric. */ + Set BOOLEAN_OPTIONS = Set.of(ENABLE_HIERARCHY); } } diff --git a/src/main/java/io/stargate/sgv2/jsonapi/exception/SchemaException.java b/src/main/java/io/stargate/sgv2/jsonapi/exception/SchemaException.java index be262773fb..6de0b2128a 100644 --- a/src/main/java/io/stargate/sgv2/jsonapi/exception/SchemaException.java +++ b/src/main/java/io/stargate/sgv2/jsonapi/exception/SchemaException.java @@ -47,6 +47,7 @@ public enum Code implements ErrorCode { INVALID_INDEXING_DEFINITION, INVALID_USAGE_OF_VECTORIZE, // legacy: converted from ErrorCodeV1 INVALID_USER_DEFINED_TYPE_NAME, + INVALID_VECTOR_INDEXING_OPTIONS, LEXICAL_FEATURE_NOT_ENABLED, LEXICAL_NOT_ENABLED_FOR_COLLECTION, MISSING_ALTER_TABLE_OPERATIONS, @@ -76,6 +77,7 @@ public enum Code implements ErrorCode { UNKNOWN_PARTITION_SORT_COLUMNS, UNKNOWN_PRIMITIVE_DATA_TYPE, UNKNOWN_USER_DEFINED_TYPE, + UNKNOWN_VECTOR_INDEXING_PROFILE, UNKNOWN_VECTOR_METRIC, UNKNOWN_VECTOR_SOURCE_MODEL, UNSUPPORTED_DATA_TYPE_TABLE_CREATION, diff --git a/src/main/java/io/stargate/sgv2/jsonapi/service/schema/tables/ApiVectorIndex.java b/src/main/java/io/stargate/sgv2/jsonapi/service/schema/tables/ApiVectorIndex.java index 0bcf7a85d8..f754955fd2 100644 --- a/src/main/java/io/stargate/sgv2/jsonapi/service/schema/tables/ApiVectorIndex.java +++ b/src/main/java/io/stargate/sgv2/jsonapi/service/schema/tables/ApiVectorIndex.java @@ -6,6 +6,7 @@ import com.datastax.oss.driver.api.core.CqlIdentifier; import com.datastax.oss.driver.api.core.metadata.schema.IndexMetadata; +import com.google.common.annotations.VisibleForTesting; import io.stargate.sgv2.jsonapi.api.model.command.table.IndexDesc; import io.stargate.sgv2.jsonapi.api.model.command.table.SchemaDescSource; import io.stargate.sgv2.jsonapi.api.model.command.table.definition.indexes.RegularIndexDefinitionDesc; @@ -17,6 +18,7 @@ import io.stargate.sgv2.jsonapi.service.schema.SimilarityFunction; import io.stargate.sgv2.jsonapi.service.schema.tables.factories.IndexFactoryFromCql; import io.stargate.sgv2.jsonapi.service.schema.tables.factories.IndexFactoryFromIndexDesc; +import java.math.BigDecimal; import java.util.*; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -54,7 +56,9 @@ public IndexDesc getSchemaDescription( var definitionOptions = new VectorIndexDefinitionDesc.VectorIndexDescOptions( - similarityFunction.apiName(), sourceModel.apiName()); + similarityFunction.apiName(), + sourceModel.apiName(), + describeIndexingOptions(indexOptions)); var definition = new VectorIndexDefinitionDesc(cqlIdentifierToJsonKey(targetColumn), definitionOptions); @@ -76,6 +80,156 @@ public VectorIndexDefinitionDesc definition() { }; } + /** + * Builds the {@code vectorIndexing} description from the CQL index options map, keeping only the + * supported tuning options (see {@link VectorConstants.CQLAnnIndex#ALLOWED_OPTIONS}). When those + * options exactly match a known profile the profile name is echoed; otherwise the raw options + * are. Structural, dedicated-field, and CQL-only keys are dropped to stay symmetric with what the + * API accepts. The profile is not stored, so it is detected from the options (see {@link + * VectorIndexProfiles#detect(Map)}). + * + * @return the {@code vectorIndexing} description, or null when there are no supported tuning + * options + */ + @VisibleForTesting + static VectorIndexDefinitionDesc.VectorIndexingDesc describeIndexingOptions( + Map indexOptions) { + var tuning = tuningOptions(indexOptions); + if (tuning.isEmpty()) { + return null; + } + return VectorIndexProfiles.detect(tuning) + .map(VectorIndexDefinitionDesc.VectorIndexingDesc::ofProfile) + .orElseGet( + () -> + VectorIndexDefinitionDesc.VectorIndexingDesc.ofOptions( + new LinkedHashMap<>(tuning))); + } + + /** Keeps only the {@link VectorConstants.CQLAnnIndex#ALLOWED_OPTIONS} from a CQL options map. */ + @VisibleForTesting + static Map tuningOptions(Map indexOptions) { + Map tuning = new LinkedHashMap<>(); + for (var entry : indexOptions.entrySet()) { + if (VectorConstants.CQLAnnIndex.ALLOWED_OPTIONS.contains(entry.getKey())) { + tuning.put(entry.getKey(), entry.getValue()); + } + } + return tuning; + } + + /** + * Applies the request's {@code vectorIndexing} into the CQL index options map. {@code + * vectorIndexing} is either a {@code profile} name expanded via {@link VectorIndexProfiles}, or + * an {@code options} object of Cassandra SAI tuning options validated against {@link + * VectorConstants.CQLAnnIndex#ALLOWED_OPTIONS}. The two are mutually exclusive (see {@link + * io.stargate.sgv2.jsonapi.api.model.command.deserializers.VectorIndexingDescDeserializer}). + * {@code source_model} / {@code similarity_function} have dedicated fields and are rejected here. + * + * @param indexOptions the CQL options map being built, mutated in place + * @param vectorIndexing the structured request value, may be null + */ + @VisibleForTesting + static void applyIndexingOptions( + Map indexOptions, + VectorIndexDefinitionDesc.VectorIndexingDesc vectorIndexing) { + + if (vectorIndexing == null) { + return; + } + + // A profile expands to a set of options. + var profileName = vectorIndexing.profile(); + if (profileName != null) { + var profileOptions = + VectorIndexProfiles.forName(profileName) + .orElseThrow( + () -> + SchemaException.Code.UNKNOWN_VECTOR_INDEXING_PROFILE.get( + Map.of( + "knownProfiles", + errFmtJoin(VectorIndexProfiles.knownNames()), + "unknownProfile", + profileName))); + indexOptions.putAll(profileOptions); + } + + // Raw options (mutually exclusive with a profile) are validated against the allow-list. + var options = vectorIndexing.options(); + if (options != null) { + for (var entry : options.entrySet()) { + var optionName = entry.getKey(); + if (VectorConstants.CQLAnnIndex.RESERVED_OPTIONS.contains(optionName)) { + var dedicatedField = + VectorConstants.CQLAnnIndex.SOURCE_MODEL.equals(optionName) + ? VectorConstants.VectorColumn.SOURCE_MODEL + : VectorConstants.VectorColumn.METRIC; + throw SchemaException.Code.INVALID_VECTOR_INDEXING_OPTIONS.get( + Map.of( + "reason", + "The option '%s' must be set using its dedicated field '%s', not as a vectorIndexing option." + .formatted(optionName, dedicatedField))); + } + if (!VectorConstants.CQLAnnIndex.ALLOWED_OPTIONS.contains(optionName)) { + throw SchemaException.Code.INVALID_VECTOR_INDEXING_OPTIONS.get( + Map.of( + "reason", + "Unsupported vector indexing option '%s'. Supported options: %s." + .formatted( + optionName, errFmtJoin(VectorConstants.CQLAnnIndex.ALLOWED_OPTIONS)))); + } + indexOptions.put(optionName, optionValueToString(optionName, entry.getValue())); + } + } + } + + /** + * Validates and renders an option value to the CQL string form. CQL index options are a {@code + * Map} that the driver emits unescaped into {@code WITH OPTIONS = {...}}, so a + * raw string would let a quote break out of the literal; every allowed option is numeric or + * boolean, so the value is coerced to that type and anything else is rejected. + */ + private static String optionValueToString(String optionName, Object value) { + if (VectorConstants.CQLAnnIndex.BOOLEAN_OPTIONS.contains(optionName)) { + return booleanOptionValue(optionName, value); + } + return numericOptionValue(optionName, value); + } + + private static String booleanOptionValue(String optionName, Object value) { + if (value instanceof Boolean bool) { + return bool.toString(); + } + if (value instanceof String text + && ("true".equalsIgnoreCase(text) || "false".equalsIgnoreCase(text))) { + return text.toLowerCase(); + } + throw SchemaException.Code.INVALID_VECTOR_INDEXING_OPTIONS.get( + Map.of("reason", "The option '%s' must be true or false.".formatted(optionName))); + } + + private static String numericOptionValue(String optionName, Object value) { + // JSON numbers deserialize to BigDecimal; use plain (non-scientific) notation for the CQL + // value. + if (value instanceof BigDecimal number) { + return number.toPlainString(); + } + if (value instanceof Number number) { + return number.toString(); + } + // A numeric value sent as a JSON string is accepted only if it parses as a number, which also + // rejects any quote/garbage that could break out of the CQL options literal. + if (value instanceof String text) { + try { + return new BigDecimal(text.trim()).toPlainString(); + } catch (NumberFormatException e) { + // fall through to the rejection below + } + } + throw SchemaException.Code.INVALID_VECTOR_INDEXING_OPTIONS.get( + Map.of("reason", "The option '%s' must be a number.".formatted(optionName))); + } + /** * Logic to map from the name of the similarity function, from either the user or the CQL index, * to a {@link SimilarityFunction} enum value. @@ -245,6 +399,12 @@ public ApiVectorIndex create( metricToUse); } + // vectorIndexing is a profile name or raw options (mutually exclusive); metric / sourceModel + // above use dedicated fields. + var userVectorIndexing = + (indexDesc.options() == null) ? null : indexDesc.options().vectorIndexing(); + applyIndexingOptions(indexOptions, userVectorIndexing); + return new ApiVectorIndex( indexIdentifier, targetIdentifier, indexOptions, metricToUse, sourceModelToUse); } diff --git a/src/main/java/io/stargate/sgv2/jsonapi/service/schema/tables/VectorIndexProfiles.java b/src/main/java/io/stargate/sgv2/jsonapi/service/schema/tables/VectorIndexProfiles.java new file mode 100644 index 0000000000..0dffa05cb9 --- /dev/null +++ b/src/main/java/io/stargate/sgv2/jsonapi/service/schema/tables/VectorIndexProfiles.java @@ -0,0 +1,68 @@ +package io.stargate.sgv2.jsonapi.service.schema.tables; + +import io.stargate.sgv2.jsonapi.config.constants.VectorConstants; +import java.util.Map; +import java.util.Optional; +import java.util.Set; + +/** + * Named vector-index profiles, each mapping a profile name to Cassandra SAI indexing options. An + * alternative to passing raw options through {@code vectorIndexing}. + * + *

Profiles never set {@code source_model} or {@code similarity_function}; those have the + * dedicated {@code sourceModel} / {@code metric} fields. Values are Strings because CQL index + * options are a {@code Map}. + * + *

Initial in-code set; values to be tuned and moved to config (#2508). + */ +public final class VectorIndexProfiles { + + private VectorIndexProfiles() {} + + /** Profile name (lower-cased for case-insensitive lookup) to CQL indexing options. */ + private static final Map> PROFILES = + Map.of( + "small-high-recall", + Map.of( + VectorConstants.CQLAnnIndex.MAXIMUM_NODE_CONNECTIONS, "32", + VectorConstants.CQLAnnIndex.CONSTRUCTION_BEAM_WIDTH, "200"), + "big-low-latency", + Map.of(VectorConstants.CQLAnnIndex.MAXIMUM_NODE_CONNECTIONS, "16")); + + /** + * Case-insensitive profile lookup. + * + * @return the profile's CQL options, or empty if {@code name} is null, blank, or unknown + */ + public static Optional> forName(String name) { + if (name == null || name.isBlank()) { + return Optional.empty(); + } + return Optional.ofNullable(PROFILES.get(name.toLowerCase())); + } + + /** Names of all known profiles, for error messages. */ + public static Set knownNames() { + return PROFILES.keySet(); + } + + /** + * Reverse lookup: the profile whose expanded options exactly match {@code options}, used on + * read-back to label an index that was created from a known profile. Exact match only, so an + * index whose options differ from, or are a superset of, a profile reports its raw options + * instead. The stored options are not persisted, so this is a best-effort reconstruction (#2508). + * + * @return the matching profile name, or empty if {@code options} is null/empty or matches none + */ + public static Optional detect(Map options) { + if (options == null || options.isEmpty()) { + return Optional.empty(); + } + return PROFILES.entrySet().stream() + .filter(entry -> entry.getValue().equals(options)) + .map(Map.Entry::getKey) + .findFirst(); + } +} diff --git a/src/main/resources/errors.yaml b/src/main/resources/errors.yaml index 6d267f0457..160814476f 100644 --- a/src/main/resources/errors.yaml +++ b/src/main/resources/errors.yaml @@ -1819,6 +1819,31 @@ request-errors: Resend the command using a known metric. + - scope: SCHEMA + code: UNKNOWN_VECTOR_INDEXING_PROFILE + title: Vector indexing profile is unknown + body: |- + The command attempted to create a vector index using an indexing profile that is not known by the API. + + A profile is selected by setting `vectorIndexing` to a profile name (string); the API expands it into a set of indexing options. To set options directly instead, set `vectorIndexing` to an object of indexing options. + + The known profiles are: ${knownProfiles}. + The command attempted to use the profile: ${unknownProfile}. + + Resend the command using a known profile, or set `vectorIndexing` to an object of indexing options. + + - scope: SCHEMA + code: INVALID_VECTOR_INDEXING_OPTIONS + title: Vector indexing options are invalid + body: |- + The command attempted to create a vector index with invalid `vectorIndexing`. + + `vectorIndexing` is either a profile name (string) or an object of Cassandra SAI tuning options. Only the supported tuning options may be set, and the dedicated `metric` / `sourceModel` fields must be used instead of setting them as options. + + ${reason} + + Resend the command with corrected `vectorIndexing`. + - scope: SCHEMA code: VECTOR_SEARCH_NOT_SUPPORTED title: Vector search not enabled for collection diff --git a/src/test/java/io/stargate/sgv2/jsonapi/api/v1/tables/CreateTableIndexIntegrationTest.java b/src/test/java/io/stargate/sgv2/jsonapi/api/v1/tables/CreateTableIndexIntegrationTest.java index 59b7622c85..179b041e39 100644 --- a/src/test/java/io/stargate/sgv2/jsonapi/api/v1/tables/CreateTableIndexIntegrationTest.java +++ b/src/test/java/io/stargate/sgv2/jsonapi/api/v1/tables/CreateTableIndexIntegrationTest.java @@ -2,6 +2,7 @@ import static io.stargate.sgv2.jsonapi.api.v1.util.DataApiCommandSenders.assertNamespaceCommand; import static io.stargate.sgv2.jsonapi.api.v1.util.DataApiCommandSenders.assertTableCommand; +import static org.assertj.core.api.Assertions.assertThat; import io.quarkus.test.common.WithTestResource; import io.quarkus.test.junit.QuarkusIntegrationTest; @@ -10,6 +11,7 @@ import io.stargate.sgv2.jsonapi.exception.SchemaException; import io.stargate.sgv2.jsonapi.testresource.DseTestResource; import jakarta.ws.rs.core.Response; +import java.util.List; import java.util.Map; import java.util.stream.Stream; import org.junit.jupiter.api.*; @@ -49,6 +51,61 @@ private void verifyCreatedVectorIndex(String indexName) { .hasIndex(indexName); } + /** + * Database error a cluster returns for custom SAI HNSW params when the feature is not enabled. + */ + private static final String SAI_CUSTOM_PARAMS_DISABLED = "SAI_HNSW_ALLOW_CUSTOM_PARAMETERS"; + + /** + * Creates a vector index whose {@code vectorIndexing} sets SAI tuning options. Those options need + * a cluster with {@code SAI_HNSW_ALLOW_CUSTOM_PARAMETERS}; only that specific backend rejection + * is tolerated (skipped via assumption), because there is nothing to round-trip there. Any other + * error — request shape, profile expansion, option rendering, or an unrelated server failure — is + * a real regression and fails the test rather than hiding it as a skip. + */ + @SuppressWarnings("unchecked") + private void createTunedVectorIndexOrSkip( + String indexName, String column, String vectorIndexingJson) { + var validator = + assertTableCommand(keyspaceName, vectorTableName) + .postCreateVectorIndex( + """ + { + "name": "%s", + "definition": { + "column": "%s", + "options": { "vectorIndexing": %s } + } + } + """ + .formatted(indexName, column, vectorIndexingJson)); + + List> errors = validator.response().extract().path("errors"); + boolean customParamsDisabled = + errors != null + && errors.size() == 1 + && String.valueOf(errors.get(0).get("message")).contains(SAI_CUSTOM_PARAMS_DISABLED); + Assumptions.assumeFalse( + customParamsDisabled, + () -> "skipping round-trip: cluster has not enabled " + SAI_CUSTOM_PARAMS_DISABLED); + + // Not the tolerated rejection: any other (or no) error must be asserted, not skipped. + validator.wasSuccessful(); + } + + /** The {@code vectorIndexing} echoed back by listIndexes for the given index (string or map). */ + private Object readBackVectorIndexing(String indexName) { + return assertTableCommand(keyspaceName, vectorTableName) + .templated() + .listIndexes(true) + .wasSuccessful() + .response() + .extract() + .path( + "status.indexes.find { it.name == '%s' }.definition.options.vectorIndexing" + .formatted(indexName)); + } + @BeforeAll public final void createTestTables() { // Create test tables for indexing: first one for "regular" indexes @@ -96,7 +153,9 @@ public final void createTestTables() { Map.entry("vector_type_4", Map.of("type", "vector", "dimension", 1024)), Map.entry("vector_type_5", Map.of("type", "vector", "dimension", 1024)), Map.entry("vector_type_6", Map.of("type", "vector", "dimension", 1024)), - Map.entry("vector_type_7", Map.of("type", "vector", "dimension", 1024))), + Map.entry("vector_type_7", Map.of("type", "vector", "dimension", 1024)), + Map.entry("vector_type_8", Map.of("type", "vector", "dimension", 1024)), + Map.entry("vector_type_9", Map.of("type", "vector", "dimension", 1024))), "id") .wasSuccessful(); @@ -596,6 +655,34 @@ public void createVectorIndexWithCorrectIndexType() { verifyCreatedVectorIndex("vector_type_6_idx"); } + + @Test + public void createVectorIndexWithProfileRoundTrip() { + createTunedVectorIndexOrSkip("vector_type_8_idx", "vector_type_8", "\"small-high-recall\""); + + verifyCreatedVectorIndex("vector_type_8_idx"); + // The profile name is not persisted; read-back detects it from the applied options and + // echoes the name back. + assertThat(readBackVectorIndexing("vector_type_8_idx")).isEqualTo("small-high-recall"); + } + + @Test + @SuppressWarnings("unchecked") + public void createVectorIndexWithRawOptionsRoundTrip() { + // Both keys are HNSW params the backend recognizes (gated by + // SAI_HNSW_ALLOW_CUSTOM_PARAMETERS), + // and the pair matches no profile, so read-back echoes the raw options rather than a name. + createTunedVectorIndexOrSkip( + "vector_type_9_idx", + "vector_type_9", + "{ \"maximum_node_connections\": 24, \"construction_beam_width\": 150 }"); + + verifyCreatedVectorIndex("vector_type_9_idx"); + // Options that match no profile are echoed back verbatim, as the strings CQL stores. + assertThat((Map) readBackVectorIndexing("vector_type_9_idx")) + .containsEntry("maximum_node_connections", "24") + .containsEntry("construction_beam_width", "150"); + } } @Nested @@ -1136,6 +1223,90 @@ public void invalidSourceModel() { "The command attempted to use the source model: invalid_source_model."); } + @Test + public void unknownIndexingProfile() { + assertTableCommand(keyspaceName, vectorTableName) + .postCreateVectorIndex( + """ + { + "name": "vector_type_7_idx", + "definition": { + "column": "vector_type_7", + "options": { + "vectorIndexing": "no-such-profile" + } + } + } + """) + .hasSingleApiError( + SchemaException.Code.UNKNOWN_VECTOR_INDEXING_PROFILE, + SchemaException.class, + "The command attempted to use the profile: no-such-profile."); + } + + @Test + public void reservedOptionRejected() { + assertTableCommand(keyspaceName, vectorTableName) + .postCreateVectorIndex( + """ + { + "name": "vector_type_7_idx", + "definition": { + "column": "vector_type_7", + "options": { + "vectorIndexing": { "similarity_function": "COSINE" } + } + } + } + """) + .hasSingleApiError( + SchemaException.Code.INVALID_VECTOR_INDEXING_OPTIONS, + SchemaException.class, + "The option 'similarity_function' must be set using its dedicated field"); + } + + @Test + public void unsupportedOptionRejected() { + assertTableCommand(keyspaceName, vectorTableName) + .postCreateVectorIndex( + """ + { + "name": "vector_type_7_idx", + "definition": { + "column": "vector_type_7", + "options": { + "vectorIndexing": { "class_name": "StorageAttachedIndex" } + } + } + } + """) + .hasSingleApiError( + SchemaException.Code.INVALID_VECTOR_INDEXING_OPTIONS, + SchemaException.class, + "Unsupported vector indexing option 'class_name'"); + } + + @Test + public void nonScalarOptionValueRejected() { + assertTableCommand(keyspaceName, vectorTableName) + .postCreateVectorIndex( + """ + { + "name": "vector_type_7_idx", + "definition": { + "column": "vector_type_7", + "options": { + "vectorIndexing": { "alpha": [1, 2, 3] } + } + } + } + """) + .hasSingleApiError( + SchemaException.Code.INVALID_VECTOR_INDEXING_OPTIONS, + SchemaException.class, + "The option 'alpha' must be a number."); + } + @Test public void createVectorIndexWithUnsupportedIndexType() { assertTableCommand(keyspaceName, vectorTableName) diff --git a/src/test/java/io/stargate/sgv2/jsonapi/service/schema/tables/ApiVectorIndexTest.java b/src/test/java/io/stargate/sgv2/jsonapi/service/schema/tables/ApiVectorIndexTest.java new file mode 100644 index 0000000000..1522a9eb81 --- /dev/null +++ b/src/test/java/io/stargate/sgv2/jsonapi/service/schema/tables/ApiVectorIndexTest.java @@ -0,0 +1,417 @@ +package io.stargate.sgv2.jsonapi.service.schema.tables; + +import static org.assertj.core.api.Assertions.assertThat; +import static org.assertj.core.api.Assertions.assertThatThrownBy; +import static org.assertj.core.api.Assertions.entry; + +import com.fasterxml.jackson.databind.ObjectMapper; +import io.stargate.sgv2.jsonapi.api.model.command.table.definition.indexes.VectorIndexDefinitionDesc.VectorIndexDescOptions; +import io.stargate.sgv2.jsonapi.api.model.command.table.definition.indexes.VectorIndexDefinitionDesc.VectorIndexingDesc; +import io.stargate.sgv2.jsonapi.config.constants.VectorConstants; +import io.stargate.sgv2.jsonapi.exception.SchemaException; +import java.math.BigDecimal; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.stream.Stream; +import org.junit.jupiter.api.Nested; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.MethodSource; +import org.junit.jupiter.params.provider.ValueSource; + +/** + * Unit tests for the overloaded {@code vectorIndexing} on {@link ApiVectorIndex}, where the value + * is either a profile name string or a raw SAI options object. + * + *

Covers deserialization of a request body, validation, the resulting CQL index options map, and + * the describe-back. Needs no database (end-to-end also depends on the backend allowing custom SAI + * parameters). + */ +class ApiVectorIndexTest { + + private static final ObjectMapper MAPPER = new ObjectMapper(); + + private static VectorIndexingDesc profile(String profile) { + return VectorIndexingDesc.ofProfile(profile); + } + + private static VectorIndexingDesc options(Map options) { + return VectorIndexingDesc.ofOptions(options); + } + + /** Options with a dedicated field, so rejected inside options. */ + static Stream reservedOptions() { + return VectorConstants.CQLAnnIndex.RESERVED_OPTIONS.stream(); + } + + /** + * The overloaded {@code vectorIndexing} deserializes by JSON type: a string is a profile, an + * object is raw options, and anything else is rejected. + */ + @Nested + class RequestShape { + + @Test + void stringDeserializesToProfile() throws Exception { + var opts = + MAPPER.readValue( + "{\"vectorIndexing\": \"small-high-recall\"}", VectorIndexDescOptions.class); + + assertThat(opts.vectorIndexing()).isNotNull(); + assertThat(opts.vectorIndexing().profile()).isEqualTo("small-high-recall"); + assertThat(opts.vectorIndexing().options()).isNull(); + } + + @Test + void objectDeserializesToRawOptions() throws Exception { + var opts = + MAPPER.readValue( + """ + { + "vectorIndexing": { "maximum_node_connections": 32, "enable_hierarchy": true } + } + """, + VectorIndexDescOptions.class); + + assertThat(opts.vectorIndexing()).isNotNull(); + assertThat(opts.vectorIndexing().profile()).isNull(); + assertThat(opts.vectorIndexing().options()) + .containsEntry("maximum_node_connections", 32) + .containsEntry("enable_hierarchy", true); + } + + @Test + void absentVectorIndexingIsNull() throws Exception { + var opts = MAPPER.readValue("{\"metric\": \"cosine\"}", VectorIndexDescOptions.class); + + assertThat(opts.vectorIndexing()).isNull(); + } + + @ParameterizedTest + @ValueSource(strings = {"123", "true", "[\"small-high-recall\"]"}) + void nonStringNonObjectRejected(String value) { + // Jackson may surface the deserializer's SchemaException directly or wrapped, so assert one + // with the expected code is somewhere in the cause chain. + assertThatThrownBy( + () -> + MAPPER.readValue( + "{\"vectorIndexing\": " + value + "}", VectorIndexDescOptions.class)) + .satisfies( + t -> { + var schemaException = findSchemaException(t); + assertThat(schemaException).as("a SchemaException in the cause chain").isNotNull(); + assertThat(schemaException.code) + .isEqualTo(SchemaException.Code.INVALID_VECTOR_INDEXING_OPTIONS.name()); + }); + } + + private SchemaException findSchemaException(Throwable t) { + for (Throwable cause = t; cause != null; cause = cause.getCause()) { + if (cause instanceof SchemaException schemaException) { + return schemaException; + } + } + return null; + } + } + + /** A {@code vectorIndexing} value resolves to the expected CQL index options map. */ + @Nested + class ApplyIndexingOptions { + + @Test + void nullIsNoOp() { + var options = new HashMap(); + + ApiVectorIndex.applyIndexingOptions(options, null); + + assertThat(options).isEmpty(); + } + + @Test + void emptyOptionsIsNoOp() { + var options = new HashMap(); + + ApiVectorIndex.applyIndexingOptions(options, options(Map.of())); + + assertThat(options).isEmpty(); + } + + @Test + void profileExpands() { + var options = new HashMap(); + + ApiVectorIndex.applyIndexingOptions(options, profile("small-high-recall")); + + assertThat(options) + .containsAllEntriesOf(VectorIndexProfiles.forName("small-high-recall").orElseThrow()); + } + + @Test + void optionsApplied() { + var options = new HashMap(); + + ApiVectorIndex.applyIndexingOptions( + options, + options( + Map.of("maximum_node_connections", 32, "enable_hierarchy", true, "alpha", "1.2"))); + + assertThat(options) + .containsEntry("maximum_node_connections", "32") + .containsEntry("enable_hierarchy", "true") + .containsEntry("alpha", "1.2"); + } + + @Test + void mergesWithExistingOptions() { + var options = new HashMap(); + options.put(VectorConstants.CQLAnnIndex.SOURCE_MODEL, "OTHER"); + + ApiVectorIndex.applyIndexingOptions(options, options(Map.of("maximum_node_connections", 16))); + + assertThat(options) + .containsEntry(VectorConstants.CQLAnnIndex.SOURCE_MODEL, "OTHER") + .containsEntry("maximum_node_connections", "16"); + } + + @Test + void allAllowedOptionsAccepted() { + var options = new HashMap(); + + ApiVectorIndex.applyIndexingOptions( + options, + options( + Map.of( + "maximum_node_connections", + 16, + "construction_beam_width", + 100, + "neighborhood_overflow", + 1.2, + "alpha", + 1.2, + "enable_hierarchy", + true))); + + assertThat(options.keySet()) + .containsExactlyInAnyOrderElementsOf(VectorConstants.CQLAnnIndex.ALLOWED_OPTIONS); + } + + @Test + void unknownProfileThrows() { + assertSchemaError( + profile("no-such-profile"), SchemaException.Code.UNKNOWN_VECTOR_INDEXING_PROFILE); + } + + @Test + void blankProfileThrows() { + assertSchemaError(profile(""), SchemaException.Code.UNKNOWN_VECTOR_INDEXING_PROFILE); + } + + @ParameterizedTest + @MethodSource( + "io.stargate.sgv2.jsonapi.service.schema.tables.ApiVectorIndexTest#reservedOptions") + void reservedOptionThrows(String reservedOption) { + assertSchemaError( + options(Map.of(reservedOption, "x")), + SchemaException.Code.INVALID_VECTOR_INDEXING_OPTIONS); + } + + @ParameterizedTest + @ValueSource(strings = {"class_name", "target", "optimize_for", "bogus_option"}) + void unsupportedOptionThrows(String optionName) { + assertSchemaError( + options(Map.of(optionName, "x")), SchemaException.Code.INVALID_VECTOR_INDEXING_OPTIONS); + } + + @Test + void numericOptionsUsePlainString() { + var options = new HashMap(); + + // JSON numbers arrive as BigDecimal; the CQL value must not use scientific notation. + ApiVectorIndex.applyIndexingOptions( + options, + options( + Map.of( + "construction_beam_width", new BigDecimal("1E+2"), + "alpha", new BigDecimal("1.5")))); + + assertThat(options) + .containsEntry("construction_beam_width", "100") + .containsEntry("alpha", "1.5"); + } + + @Test + void nonScalarOptionValueThrows() { + // "alpha" is an allowed key, so this reaches the value-type check + assertSchemaError( + options(Map.of("alpha", List.of(1, 2))), + SchemaException.Code.INVALID_VECTOR_INDEXING_OPTIONS); + assertSchemaError( + options(Map.of("alpha", Map.of("x", 1))), + SchemaException.Code.INVALID_VECTOR_INDEXING_OPTIONS); + } + + @Test + void numericOptionRejectsNonNumericString() { + // Driver renders option values unescaped into WITH OPTIONS = {...}; a quote-bearing value + // would break out of the literal, so a numeric option must parse as a number. + assertSchemaError( + options(Map.of("alpha", "1.2'} AND injected={'x':'y")), + SchemaException.Code.INVALID_VECTOR_INDEXING_OPTIONS); + } + + @Test + void booleanOptionAcceptsBooleanOrString() { + var fromBoolean = new HashMap(); + ApiVectorIndex.applyIndexingOptions(fromBoolean, options(Map.of("enable_hierarchy", true))); + assertThat(fromBoolean).containsEntry("enable_hierarchy", "true"); + + var fromString = new HashMap(); + ApiVectorIndex.applyIndexingOptions(fromString, options(Map.of("enable_hierarchy", "TRUE"))); + assertThat(fromString).containsEntry("enable_hierarchy", "true"); + } + + @Test + void booleanOptionRejectsNonBoolean() { + assertSchemaError( + options(Map.of("enable_hierarchy", "yes")), + SchemaException.Code.INVALID_VECTOR_INDEXING_OPTIONS); + } + + private void assertSchemaError(VectorIndexingDesc desc, SchemaException.Code code) { + var options = new HashMap(); + assertThatThrownBy(() -> ApiVectorIndex.applyIndexingOptions(options, desc)) + .isInstanceOf(SchemaException.class) + .satisfies(t -> assertThat(((SchemaException) t).code).isEqualTo(code.name())); + } + } + + /** The CQL index options map describes back to the expected {@code vectorIndexing} object. */ + @Nested + class DescribeIndexingOptions { + + @Test + void nullWhenNoTuningOptions() { + var options = new HashMap(); + options.put(CQLSAIIndex.Options.CLASS_NAME, CQLSAIIndex.SAI_CLASS_NAME); + options.put(CQLSAIIndex.Options.TARGET, "my_vector"); + options.put(VectorConstants.CQLAnnIndex.SOURCE_MODEL, "OTHER"); + options.put(VectorConstants.CQLAnnIndex.SIMILARITY_FUNCTION, "COSINE"); + + assertThat(ApiVectorIndex.describeIndexingOptions(options)).isNull(); + } + + @Test + void describesTuningOptionsUnderOptions() { + // options that do not match any profile are echoed verbatim under options + var options = new HashMap(); + options.put(CQLSAIIndex.Options.CLASS_NAME, CQLSAIIndex.SAI_CLASS_NAME); + options.put(CQLSAIIndex.Options.TARGET, "my_vector"); + options.put(VectorConstants.CQLAnnIndex.SOURCE_MODEL, "OPENAI_V3_SMALL"); + options.put(VectorConstants.CQLAnnIndex.SIMILARITY_FUNCTION, "COSINE"); + options.put(VectorConstants.CQLAnnIndex.MAXIMUM_NODE_CONNECTIONS, "32"); + options.put(VectorConstants.CQLAnnIndex.CONSTRUCTION_BEAM_WIDTH, "123"); + + var described = ApiVectorIndex.describeIndexingOptions(options); + + assertThat(described).isNotNull(); + assertThat(described.profile()).isNull(); + assertThat(described.options()) + .containsOnly( + entry(VectorConstants.CQLAnnIndex.MAXIMUM_NODE_CONNECTIONS, "32"), + entry(VectorConstants.CQLAnnIndex.CONSTRUCTION_BEAM_WIDTH, "123")); + } + + @Test + void detectsKnownProfileFromOptions() { + // options that exactly match small-high-recall's expansion are echoed as the profile name + var options = new HashMap(); + options.put(CQLSAIIndex.Options.CLASS_NAME, CQLSAIIndex.SAI_CLASS_NAME); + options.put(CQLSAIIndex.Options.TARGET, "my_vector"); + options.put(VectorConstants.CQLAnnIndex.SOURCE_MODEL, "OPENAI_V3_SMALL"); + options.put(VectorConstants.CQLAnnIndex.SIMILARITY_FUNCTION, "COSINE"); + options.put(VectorConstants.CQLAnnIndex.MAXIMUM_NODE_CONNECTIONS, "32"); + options.put(VectorConstants.CQLAnnIndex.CONSTRUCTION_BEAM_WIDTH, "200"); + + var described = ApiVectorIndex.describeIndexingOptions(options); + + assertThat(described).isNotNull(); + assertThat(described.profile()).isEqualTo("small-high-recall"); + assertThat(described.options()).isNull(); + } + + @Test + void omitsNonAllowlistedKeys() { + var options = new HashMap(); + options.put(CQLSAIIndex.Options.CLASS_NAME, CQLSAIIndex.SAI_CLASS_NAME); + options.put(VectorConstants.CQLAnnIndex.SOURCE_MODEL, "OTHER"); + // a non-profile value, so the allow-listed key is echoed as raw options + options.put(VectorConstants.CQLAnnIndex.MAXIMUM_NODE_CONNECTIONS, "20"); + // a real SAI option the API does not manage (e.g. set directly via CQL), not surfaced + options.put("optimize_for", "recall"); + + var described = ApiVectorIndex.describeIndexingOptions(options); + + assertThat(described).isNotNull(); + assertThat(described.options()) + .containsOnlyKeys(VectorConstants.CQLAnnIndex.MAXIMUM_NODE_CONNECTIONS); + } + + @Test + void emptyMapDescribesNull() { + assertThat(ApiVectorIndex.describeIndexingOptions(Map.of())).isNull(); + } + } + + /** tuningOptions keeps only the allow-listed tuning options, dropping reserved and structural. */ + @Nested + class TuningOptionsFilter { + + @Test + void keepsAllowlistedOptionsExcludingReservedAndStructural() { + var indexOptions = new HashMap(); + indexOptions.put(CQLSAIIndex.Options.CLASS_NAME, CQLSAIIndex.SAI_CLASS_NAME); + indexOptions.put(CQLSAIIndex.Options.TARGET, "my_vector"); + indexOptions.put(VectorConstants.CQLAnnIndex.SOURCE_MODEL, "OPENAI_V3_SMALL"); + indexOptions.put(VectorConstants.CQLAnnIndex.SIMILARITY_FUNCTION, "COSINE"); + // values are kept as-is; the filter only drops keys, it does not interpret them + indexOptions.put(VectorConstants.CQLAnnIndex.MAXIMUM_NODE_CONNECTIONS, "99"); + indexOptions.put(VectorConstants.CQLAnnIndex.CONSTRUCTION_BEAM_WIDTH, "200"); + + assertThat(ApiVectorIndex.tuningOptions(indexOptions)) + .containsOnly( + entry(VectorConstants.CQLAnnIndex.MAXIMUM_NODE_CONNECTIONS, "99"), + entry(VectorConstants.CQLAnnIndex.CONSTRUCTION_BEAM_WIDTH, "200")); + } + + @Test + void emptyWhenNoTuningOptions() { + var indexOptions = new HashMap(); + indexOptions.put(VectorConstants.CQLAnnIndex.SOURCE_MODEL, "OTHER"); + + assertThat(ApiVectorIndex.tuningOptions(indexOptions)).isEmpty(); + } + } + + /** Applying options then describing them round-trips the tuning options. */ + @Nested + class RoundTrip { + + @Test + void applyThenDescribe() { + var options = new HashMap(); + + ApiVectorIndex.applyIndexingOptions( + options, options(Map.of("maximum_node_connections", 32, "alpha", 1.2))); + var described = ApiVectorIndex.describeIndexingOptions(options); + + assertThat(described).isNotNull(); + assertThat(described.options()) + .containsOnly( + entry(VectorConstants.CQLAnnIndex.MAXIMUM_NODE_CONNECTIONS, "32"), + entry(VectorConstants.CQLAnnIndex.ALPHA, "1.2")); + } + } +} diff --git a/src/test/java/io/stargate/sgv2/jsonapi/service/schema/tables/VectorIndexProfilesTest.java b/src/test/java/io/stargate/sgv2/jsonapi/service/schema/tables/VectorIndexProfilesTest.java new file mode 100644 index 0000000000..3b45ac05b6 --- /dev/null +++ b/src/test/java/io/stargate/sgv2/jsonapi/service/schema/tables/VectorIndexProfilesTest.java @@ -0,0 +1,94 @@ +package io.stargate.sgv2.jsonapi.service.schema.tables; + +import static org.assertj.core.api.Assertions.assertThat; + +import io.stargate.sgv2.jsonapi.config.constants.VectorConstants; +import java.util.Map; +import org.junit.jupiter.api.Nested; +import org.junit.jupiter.api.Test; + +class VectorIndexProfilesTest { + + @Nested + class ForName { + @Test + void knownProfile() { + assertThat(VectorIndexProfiles.forName("small-high-recall")) + .isPresent() + .get() + .satisfies(opts -> assertThat(opts).isNotEmpty()); + } + + @Test + void caseInsensitive() { + assertThat(VectorIndexProfiles.forName("SMALL-HIGH-RECALL")) + .isEqualTo(VectorIndexProfiles.forName("small-high-recall")); + } + + @Test + void unknownProfile() { + assertThat(VectorIndexProfiles.forName("does-not-exist")).isEmpty(); + } + + @Test + void nullOrBlank() { + assertThat(VectorIndexProfiles.forName(null)).isEmpty(); + assertThat(VectorIndexProfiles.forName(" ")).isEmpty(); + } + } + + @Nested + class KnownNames { + @Test + void listsProfiles() { + assertThat(VectorIndexProfiles.knownNames()).contains("small-high-recall", "big-low-latency"); + } + } + + @Nested + class ProfileContents { + @Test + void noReservedOptions() { + for (var name : VectorIndexProfiles.knownNames()) { + var options = VectorIndexProfiles.forName(name).orElseThrow(); + assertThat(options.keySet()) + .doesNotContainAnyElementsOf(VectorConstants.CQLAnnIndex.RESERVED_OPTIONS); + } + } + } + + @Nested + class Detect { + @Test + void exactMatchReturnsProfile() { + var smallHighRecall = VectorIndexProfiles.forName("small-high-recall").orElseThrow(); + assertThat(VectorIndexProfiles.detect(smallHighRecall)).contains("small-high-recall"); + } + + @Test + void noMatchWhenOptionsDiffer() { + assertThat( + VectorIndexProfiles.detect( + Map.of(VectorConstants.CQLAnnIndex.MAXIMUM_NODE_CONNECTIONS, "20"))) + .isEmpty(); + } + + @Test + void noMatchWhenSupersetOfAProfile() { + // a superset of small-high-recall is not an exact match + assertThat( + VectorIndexProfiles.detect( + Map.of( + VectorConstants.CQLAnnIndex.MAXIMUM_NODE_CONNECTIONS, "32", + VectorConstants.CQLAnnIndex.CONSTRUCTION_BEAM_WIDTH, "200", + VectorConstants.CQLAnnIndex.ALPHA, "1.2"))) + .isEmpty(); + } + + @Test + void emptyOrNull() { + assertThat(VectorIndexProfiles.detect(Map.of())).isEmpty(); + assertThat(VectorIndexProfiles.detect(null)).isEmpty(); + } + } +}