diff --git a/src/main/java/io/stargate/sgv2/jsonapi/api/model/command/deserializers/VectorIndexingDescDeserializer.java b/src/main/java/io/stargate/sgv2/jsonapi/api/model/command/deserializers/VectorIndexingDescDeserializer.java
new file mode 100644
index 0000000000..6865c9a8ba
--- /dev/null
+++ b/src/main/java/io/stargate/sgv2/jsonapi/api/model/command/deserializers/VectorIndexingDescDeserializer.java
@@ -0,0 +1,65 @@
+package io.stargate.sgv2.jsonapi.api.model.command.deserializers;
+
+import com.fasterxml.jackson.core.JsonParser;
+import com.fasterxml.jackson.core.type.TypeReference;
+import com.fasterxml.jackson.databind.DeserializationContext;
+import com.fasterxml.jackson.databind.JsonNode;
+import com.fasterxml.jackson.databind.ObjectMapper;
+import com.fasterxml.jackson.databind.deser.std.StdDeserializer;
+import io.stargate.sgv2.jsonapi.api.model.command.table.definition.indexes.VectorIndexDefinitionDesc.VectorIndexingDesc;
+import io.stargate.sgv2.jsonapi.exception.SchemaException;
+import io.stargate.sgv2.jsonapi.util.JsonUtil;
+import java.io.IOException;
+import java.util.LinkedHashMap;
+import java.util.Map;
+
+/**
+ * Deserializes the overloaded {@code vectorIndexing} value, either:
+ *
+ *
+ * - a JSON string: a named profile expanded into SAI options, e.g.
+ * "vectorIndexing": "small-high-recall"
+ * - a JSON object: raw Cassandra SAI tuning options, e.g.
+ * "vectorIndexing": { "maximum_node_connections": 32 }
+ *
+ *
+ * Anything else (number, boolean, array) is a request error. Per #2508 the field is overloaded by JSON
+ * type rather than separate {@code profile} / {@code options} sub-keys, so profile and raw options
+ * are mutually exclusive in one request.
+ */
+public class VectorIndexingDescDeserializer extends StdDeserializer {
+
+ private static final TypeReference> OPTIONS_TYPE =
+ new TypeReference<>() {};
+
+ public VectorIndexingDescDeserializer() {
+ super(VectorIndexingDesc.class);
+ }
+
+ @Override
+ public VectorIndexingDesc deserialize(
+ JsonParser jsonParser, DeserializationContext deserializationContext) throws IOException {
+ JsonNode node = deserializationContext.readTree(jsonParser);
+
+ if (node.isTextual()) {
+ // named profile, validated at apply time
+ return VectorIndexingDesc.ofProfile(node.textValue());
+ }
+ if (node.isObject()) {
+ // raw SAI options. convertValue applies the mapper config (e.g. float handling), as a
+ // Map field would
+ Map options =
+ ((ObjectMapper) jsonParser.getCodec()).convertValue(node, OPTIONS_TYPE);
+ return VectorIndexingDesc.ofOptions(options);
+ }
+
+ throw SchemaException.Code.INVALID_VECTOR_INDEXING_OPTIONS.get(
+ Map.of(
+ "reason",
+ "`vectorIndexing` must be either a profile name (string) or an object of indexing "
+ + "options, but was: "
+ + JsonUtil.nodeTypeAsString(node)
+ + "."));
+ }
+}
diff --git a/src/main/java/io/stargate/sgv2/jsonapi/api/model/command/table/definition/indexes/VectorIndexDefinitionDesc.java b/src/main/java/io/stargate/sgv2/jsonapi/api/model/command/table/definition/indexes/VectorIndexDefinitionDesc.java
index 03d355c0a7..3b2aba68e6 100644
--- a/src/main/java/io/stargate/sgv2/jsonapi/api/model/command/table/definition/indexes/VectorIndexDefinitionDesc.java
+++ b/src/main/java/io/stargate/sgv2/jsonapi/api/model/command/table/definition/indexes/VectorIndexDefinitionDesc.java
@@ -3,6 +3,9 @@
import com.fasterxml.jackson.annotation.JsonInclude;
import com.fasterxml.jackson.annotation.JsonProperty;
import com.fasterxml.jackson.annotation.JsonPropertyOrder;
+import com.fasterxml.jackson.annotation.JsonValue;
+import com.fasterxml.jackson.databind.annotation.JsonDeserialize;
+import io.stargate.sgv2.jsonapi.api.model.command.deserializers.VectorIndexingDescDeserializer;
import io.stargate.sgv2.jsonapi.config.constants.TableDescConstants;
import io.stargate.sgv2.jsonapi.config.constants.VectorConstants;
import io.stargate.sgv2.jsonapi.config.constants.VectorIndexDescDefaults;
@@ -11,6 +14,7 @@
import jakarta.annotation.Nullable;
import jakarta.validation.constraints.NotNull;
import jakarta.validation.constraints.Pattern;
+import java.util.Map;
import org.eclipse.microprofile.openapi.annotations.enums.SchemaType;
import org.eclipse.microprofile.openapi.annotations.media.Schema;
@@ -34,7 +38,8 @@ public record VectorIndexDefinitionDesc(
/** Options for the vector index */
@JsonPropertyOrder({
VectorConstants.VectorColumn.METRIC,
- VectorConstants.VectorColumn.SOURCE_MODEL
+ VectorConstants.VectorColumn.SOURCE_MODEL,
+ VectorConstants.VectorColumn.VECTOR_INDEXING
})
public record VectorIndexDescOptions(
@Nullable
@@ -59,5 +64,46 @@ public record VectorIndexDescOptions(
+ EmbeddingSourceModel.ApiConstants.ALL)
@JsonInclude(JsonInclude.Include.NON_NULL)
@JsonProperty(VectorConstants.VectorColumn.SOURCE_MODEL)
- String sourceModel) {}
+ String sourceModel,
+ //
+ @Nullable
+ @Schema(
+ description =
+ "Optional vector (SAI) indexing configuration. Either a profile name (string) "
+ + "the API expands into SAI options, e.g. \"small-high-recall\"; or an object "
+ + "of Cassandra SAI tuning options (snake_case), restricted to: "
+ + "maximum_node_connections, construction_beam_width, neighborhood_overflow, "
+ + "alpha, enable_hierarchy, e.g. {\"maximum_node_connections\": 32, "
+ + "\"alpha\": 1.2}. A profile and explicit options are mutually exclusive. "
+ + "Set \"metric\" / \"sourceModel\" via their dedicated fields, not here.",
+ oneOf = {String.class, Map.class})
+ @JsonInclude(JsonInclude.Include.NON_NULL)
+ @JsonProperty(VectorConstants.VectorColumn.VECTOR_INDEXING)
+ VectorIndexingDesc vectorIndexing) {}
+
+ /**
+ * Overloaded {@code vectorIndexing} value: exactly one of a named {@code profile} (JSON string)
+ * or raw SAI tuning {@code options} (JSON object) is set. {@link VectorIndexingDescDeserializer}
+ * discriminates by JSON type; {@link #jsonValue()} serializes back to the bare string or object.
+ */
+ @JsonDeserialize(using = VectorIndexingDescDeserializer.class)
+ public record VectorIndexingDesc(
+ @Nullable String profile, @Nullable Map options) {
+
+ /** A {@code vectorIndexing} that selects a named profile. */
+ public static VectorIndexingDesc ofProfile(String profile) {
+ return new VectorIndexingDesc(profile, null);
+ }
+
+ /** A {@code vectorIndexing} that sets raw SAI options directly. */
+ public static VectorIndexingDesc ofOptions(Map options) {
+ return new VectorIndexingDesc(null, options);
+ }
+
+ /** Serializes to the bare profile string or the bare options map. */
+ @JsonValue
+ Object jsonValue() {
+ return profile != null ? profile : options;
+ }
+ }
}
diff --git a/src/main/java/io/stargate/sgv2/jsonapi/config/constants/VectorConstants.java b/src/main/java/io/stargate/sgv2/jsonapi/config/constants/VectorConstants.java
index 753872a95b..18e0fea8fc 100644
--- a/src/main/java/io/stargate/sgv2/jsonapi/config/constants/VectorConstants.java
+++ b/src/main/java/io/stargate/sgv2/jsonapi/config/constants/VectorConstants.java
@@ -1,17 +1,51 @@
package io.stargate.sgv2.jsonapi.config.constants;
+import java.util.Set;
+
public interface VectorConstants {
interface VectorColumn {
String DIMENSION = "dimension";
String METRIC = "metric";
String SOURCE_MODEL = "sourceModel";
String SERVICE = ServiceDescConstants.SERVICE;
+ String VECTOR_INDEXING = "vectorIndexing";
}
interface Vectorize extends ServiceDescConstants {}
+ /**
+ * CQL {@code WITH OPTIONS} keys for a vector (SAI) index. {@link #SOURCE_MODEL} and {@link
+ * #SIMILARITY_FUNCTION} map to dedicated API fields ({@code sourceModel} / {@code metric}); the
+ * rest are tuning options set via {@code vectorIndexing.options}.
+ */
interface CQLAnnIndex {
String SOURCE_MODEL = "source_model";
String SIMILARITY_FUNCTION = "similarity_function";
+ String MAXIMUM_NODE_CONNECTIONS = "maximum_node_connections";
+ String CONSTRUCTION_BEAM_WIDTH = "construction_beam_width";
+ String NEIGHBORHOOD_OVERFLOW = "neighborhood_overflow";
+ String ALPHA = "alpha";
+ String ENABLE_HIERARCHY = "enable_hierarchy";
+
+ /**
+ * Options with dedicated API fields ({@code metric} / {@code sourceModel}); rejected inside
+ * {@code vectorIndexing.options}.
+ */
+ Set RESERVED_OPTIONS = Set.of(SOURCE_MODEL, SIMILARITY_FUNCTION);
+
+ /**
+ * SAI tuning options settable through {@code vectorIndexing.options}. {@code optimize_for}
+ * exists in OSS Cassandra but is de-emphasized in DSE 6.9 / HCD, so it is left out for now.
+ */
+ Set ALLOWED_OPTIONS =
+ Set.of(
+ MAXIMUM_NODE_CONNECTIONS,
+ CONSTRUCTION_BEAM_WIDTH,
+ NEIGHBORHOOD_OVERFLOW,
+ ALPHA,
+ ENABLE_HIERARCHY);
+
+ /** Allowed options whose value must be a boolean; the rest are numeric. */
+ Set BOOLEAN_OPTIONS = Set.of(ENABLE_HIERARCHY);
}
}
diff --git a/src/main/java/io/stargate/sgv2/jsonapi/exception/SchemaException.java b/src/main/java/io/stargate/sgv2/jsonapi/exception/SchemaException.java
index be262773fb..6de0b2128a 100644
--- a/src/main/java/io/stargate/sgv2/jsonapi/exception/SchemaException.java
+++ b/src/main/java/io/stargate/sgv2/jsonapi/exception/SchemaException.java
@@ -47,6 +47,7 @@ public enum Code implements ErrorCode {
INVALID_INDEXING_DEFINITION,
INVALID_USAGE_OF_VECTORIZE, // legacy: converted from ErrorCodeV1
INVALID_USER_DEFINED_TYPE_NAME,
+ INVALID_VECTOR_INDEXING_OPTIONS,
LEXICAL_FEATURE_NOT_ENABLED,
LEXICAL_NOT_ENABLED_FOR_COLLECTION,
MISSING_ALTER_TABLE_OPERATIONS,
@@ -76,6 +77,7 @@ public enum Code implements ErrorCode {
UNKNOWN_PARTITION_SORT_COLUMNS,
UNKNOWN_PRIMITIVE_DATA_TYPE,
UNKNOWN_USER_DEFINED_TYPE,
+ UNKNOWN_VECTOR_INDEXING_PROFILE,
UNKNOWN_VECTOR_METRIC,
UNKNOWN_VECTOR_SOURCE_MODEL,
UNSUPPORTED_DATA_TYPE_TABLE_CREATION,
diff --git a/src/main/java/io/stargate/sgv2/jsonapi/service/schema/tables/ApiVectorIndex.java b/src/main/java/io/stargate/sgv2/jsonapi/service/schema/tables/ApiVectorIndex.java
index 0bcf7a85d8..f754955fd2 100644
--- a/src/main/java/io/stargate/sgv2/jsonapi/service/schema/tables/ApiVectorIndex.java
+++ b/src/main/java/io/stargate/sgv2/jsonapi/service/schema/tables/ApiVectorIndex.java
@@ -6,6 +6,7 @@
import com.datastax.oss.driver.api.core.CqlIdentifier;
import com.datastax.oss.driver.api.core.metadata.schema.IndexMetadata;
+import com.google.common.annotations.VisibleForTesting;
import io.stargate.sgv2.jsonapi.api.model.command.table.IndexDesc;
import io.stargate.sgv2.jsonapi.api.model.command.table.SchemaDescSource;
import io.stargate.sgv2.jsonapi.api.model.command.table.definition.indexes.RegularIndexDefinitionDesc;
@@ -17,6 +18,7 @@
import io.stargate.sgv2.jsonapi.service.schema.SimilarityFunction;
import io.stargate.sgv2.jsonapi.service.schema.tables.factories.IndexFactoryFromCql;
import io.stargate.sgv2.jsonapi.service.schema.tables.factories.IndexFactoryFromIndexDesc;
+import java.math.BigDecimal;
import java.util.*;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -54,7 +56,9 @@ public IndexDesc getSchemaDescription(
var definitionOptions =
new VectorIndexDefinitionDesc.VectorIndexDescOptions(
- similarityFunction.apiName(), sourceModel.apiName());
+ similarityFunction.apiName(),
+ sourceModel.apiName(),
+ describeIndexingOptions(indexOptions));
var definition =
new VectorIndexDefinitionDesc(cqlIdentifierToJsonKey(targetColumn), definitionOptions);
@@ -76,6 +80,156 @@ public VectorIndexDefinitionDesc definition() {
};
}
+ /**
+ * Builds the {@code vectorIndexing} description from the CQL index options map, keeping only the
+ * supported tuning options (see {@link VectorConstants.CQLAnnIndex#ALLOWED_OPTIONS}). When those
+ * options exactly match a known profile the profile name is echoed; otherwise the raw options
+ * are. Structural, dedicated-field, and CQL-only keys are dropped to stay symmetric with what the
+ * API accepts. The profile is not stored, so it is detected from the options (see {@link
+ * VectorIndexProfiles#detect(Map)}).
+ *
+ * @return the {@code vectorIndexing} description, or null when there are no supported tuning
+ * options
+ */
+ @VisibleForTesting
+ static VectorIndexDefinitionDesc.VectorIndexingDesc describeIndexingOptions(
+ Map indexOptions) {
+ var tuning = tuningOptions(indexOptions);
+ if (tuning.isEmpty()) {
+ return null;
+ }
+ return VectorIndexProfiles.detect(tuning)
+ .map(VectorIndexDefinitionDesc.VectorIndexingDesc::ofProfile)
+ .orElseGet(
+ () ->
+ VectorIndexDefinitionDesc.VectorIndexingDesc.ofOptions(
+ new LinkedHashMap<>(tuning)));
+ }
+
+ /** Keeps only the {@link VectorConstants.CQLAnnIndex#ALLOWED_OPTIONS} from a CQL options map. */
+ @VisibleForTesting
+ static Map tuningOptions(Map indexOptions) {
+ Map tuning = new LinkedHashMap<>();
+ for (var entry : indexOptions.entrySet()) {
+ if (VectorConstants.CQLAnnIndex.ALLOWED_OPTIONS.contains(entry.getKey())) {
+ tuning.put(entry.getKey(), entry.getValue());
+ }
+ }
+ return tuning;
+ }
+
+ /**
+ * Applies the request's {@code vectorIndexing} into the CQL index options map. {@code
+ * vectorIndexing} is either a {@code profile} name expanded via {@link VectorIndexProfiles}, or
+ * an {@code options} object of Cassandra SAI tuning options validated against {@link
+ * VectorConstants.CQLAnnIndex#ALLOWED_OPTIONS}. The two are mutually exclusive (see {@link
+ * io.stargate.sgv2.jsonapi.api.model.command.deserializers.VectorIndexingDescDeserializer}).
+ * {@code source_model} / {@code similarity_function} have dedicated fields and are rejected here.
+ *
+ * @param indexOptions the CQL options map being built, mutated in place
+ * @param vectorIndexing the structured request value, may be null
+ */
+ @VisibleForTesting
+ static void applyIndexingOptions(
+ Map indexOptions,
+ VectorIndexDefinitionDesc.VectorIndexingDesc vectorIndexing) {
+
+ if (vectorIndexing == null) {
+ return;
+ }
+
+ // A profile expands to a set of options.
+ var profileName = vectorIndexing.profile();
+ if (profileName != null) {
+ var profileOptions =
+ VectorIndexProfiles.forName(profileName)
+ .orElseThrow(
+ () ->
+ SchemaException.Code.UNKNOWN_VECTOR_INDEXING_PROFILE.get(
+ Map.of(
+ "knownProfiles",
+ errFmtJoin(VectorIndexProfiles.knownNames()),
+ "unknownProfile",
+ profileName)));
+ indexOptions.putAll(profileOptions);
+ }
+
+ // Raw options (mutually exclusive with a profile) are validated against the allow-list.
+ var options = vectorIndexing.options();
+ if (options != null) {
+ for (var entry : options.entrySet()) {
+ var optionName = entry.getKey();
+ if (VectorConstants.CQLAnnIndex.RESERVED_OPTIONS.contains(optionName)) {
+ var dedicatedField =
+ VectorConstants.CQLAnnIndex.SOURCE_MODEL.equals(optionName)
+ ? VectorConstants.VectorColumn.SOURCE_MODEL
+ : VectorConstants.VectorColumn.METRIC;
+ throw SchemaException.Code.INVALID_VECTOR_INDEXING_OPTIONS.get(
+ Map.of(
+ "reason",
+ "The option '%s' must be set using its dedicated field '%s', not as a vectorIndexing option."
+ .formatted(optionName, dedicatedField)));
+ }
+ if (!VectorConstants.CQLAnnIndex.ALLOWED_OPTIONS.contains(optionName)) {
+ throw SchemaException.Code.INVALID_VECTOR_INDEXING_OPTIONS.get(
+ Map.of(
+ "reason",
+ "Unsupported vector indexing option '%s'. Supported options: %s."
+ .formatted(
+ optionName, errFmtJoin(VectorConstants.CQLAnnIndex.ALLOWED_OPTIONS))));
+ }
+ indexOptions.put(optionName, optionValueToString(optionName, entry.getValue()));
+ }
+ }
+ }
+
+ /**
+ * Validates and renders an option value to the CQL string form. CQL index options are a {@code
+ * Map} that the driver emits unescaped into {@code WITH OPTIONS = {...}}, so a
+ * raw string would let a quote break out of the literal; every allowed option is numeric or
+ * boolean, so the value is coerced to that type and anything else is rejected.
+ */
+ private static String optionValueToString(String optionName, Object value) {
+ if (VectorConstants.CQLAnnIndex.BOOLEAN_OPTIONS.contains(optionName)) {
+ return booleanOptionValue(optionName, value);
+ }
+ return numericOptionValue(optionName, value);
+ }
+
+ private static String booleanOptionValue(String optionName, Object value) {
+ if (value instanceof Boolean bool) {
+ return bool.toString();
+ }
+ if (value instanceof String text
+ && ("true".equalsIgnoreCase(text) || "false".equalsIgnoreCase(text))) {
+ return text.toLowerCase();
+ }
+ throw SchemaException.Code.INVALID_VECTOR_INDEXING_OPTIONS.get(
+ Map.of("reason", "The option '%s' must be true or false.".formatted(optionName)));
+ }
+
+ private static String numericOptionValue(String optionName, Object value) {
+ // JSON numbers deserialize to BigDecimal; use plain (non-scientific) notation for the CQL
+ // value.
+ if (value instanceof BigDecimal number) {
+ return number.toPlainString();
+ }
+ if (value instanceof Number number) {
+ return number.toString();
+ }
+ // A numeric value sent as a JSON string is accepted only if it parses as a number, which also
+ // rejects any quote/garbage that could break out of the CQL options literal.
+ if (value instanceof String text) {
+ try {
+ return new BigDecimal(text.trim()).toPlainString();
+ } catch (NumberFormatException e) {
+ // fall through to the rejection below
+ }
+ }
+ throw SchemaException.Code.INVALID_VECTOR_INDEXING_OPTIONS.get(
+ Map.of("reason", "The option '%s' must be a number.".formatted(optionName)));
+ }
+
/**
* Logic to map from the name of the similarity function, from either the user or the CQL index,
* to a {@link SimilarityFunction} enum value.
@@ -245,6 +399,12 @@ public ApiVectorIndex create(
metricToUse);
}
+ // vectorIndexing is a profile name or raw options (mutually exclusive); metric / sourceModel
+ // above use dedicated fields.
+ var userVectorIndexing =
+ (indexDesc.options() == null) ? null : indexDesc.options().vectorIndexing();
+ applyIndexingOptions(indexOptions, userVectorIndexing);
+
return new ApiVectorIndex(
indexIdentifier, targetIdentifier, indexOptions, metricToUse, sourceModelToUse);
}
diff --git a/src/main/java/io/stargate/sgv2/jsonapi/service/schema/tables/VectorIndexProfiles.java b/src/main/java/io/stargate/sgv2/jsonapi/service/schema/tables/VectorIndexProfiles.java
new file mode 100644
index 0000000000..0dffa05cb9
--- /dev/null
+++ b/src/main/java/io/stargate/sgv2/jsonapi/service/schema/tables/VectorIndexProfiles.java
@@ -0,0 +1,68 @@
+package io.stargate.sgv2.jsonapi.service.schema.tables;
+
+import io.stargate.sgv2.jsonapi.config.constants.VectorConstants;
+import java.util.Map;
+import java.util.Optional;
+import java.util.Set;
+
+/**
+ * Named vector-index profiles, each mapping a profile name to Cassandra SAI indexing options. An
+ * alternative to passing raw options through {@code vectorIndexing}.
+ *
+ * Profiles never set {@code source_model} or {@code similarity_function}; those have the
+ * dedicated {@code sourceModel} / {@code metric} fields. Values are Strings because CQL index
+ * options are a {@code Map}.
+ *
+ * Initial in-code set; values to be tuned and moved to config (#2508).
+ */
+public final class VectorIndexProfiles {
+
+ private VectorIndexProfiles() {}
+
+ /** Profile name (lower-cased for case-insensitive lookup) to CQL indexing options. */
+ private static final Map> PROFILES =
+ Map.of(
+ "small-high-recall",
+ Map.of(
+ VectorConstants.CQLAnnIndex.MAXIMUM_NODE_CONNECTIONS, "32",
+ VectorConstants.CQLAnnIndex.CONSTRUCTION_BEAM_WIDTH, "200"),
+ "big-low-latency",
+ Map.of(VectorConstants.CQLAnnIndex.MAXIMUM_NODE_CONNECTIONS, "16"));
+
+ /**
+ * Case-insensitive profile lookup.
+ *
+ * @return the profile's CQL options, or empty if {@code name} is null, blank, or unknown
+ */
+ public static Optional