Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
package io.stargate.sgv2.jsonapi.api.model.command.deserializers;

import com.fasterxml.jackson.core.JsonParser;
import com.fasterxml.jackson.core.type.TypeReference;
import com.fasterxml.jackson.databind.DeserializationContext;
import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.fasterxml.jackson.databind.deser.std.StdDeserializer;
import io.stargate.sgv2.jsonapi.api.model.command.table.definition.indexes.VectorIndexDefinitionDesc.VectorIndexingDesc;
import io.stargate.sgv2.jsonapi.exception.SchemaException;
import io.stargate.sgv2.jsonapi.util.JsonUtil;
import java.io.IOException;
import java.util.LinkedHashMap;
import java.util.Map;

/**
* Deserializes the overloaded {@code vectorIndexing} value, either:
*
* <ul>
* <li>a JSON string: a named profile expanded into SAI options, e.g. <code>
* "vectorIndexing": "small-high-recall"</code>
* <li>a JSON object: raw Cassandra SAI tuning options, e.g. <code>
* "vectorIndexing": { "maximum_node_connections": 32 }</code>
* </ul>
*
* <p>Anything else (number, boolean, array) is a request error. Per <a
* href="https://github.com/stargate/data-api/issues/2508">#2508</a> the field is overloaded by JSON
* type rather than separate {@code profile} / {@code options} sub-keys, so profile and raw options
* are mutually exclusive in one request.
*/
public class VectorIndexingDescDeserializer extends StdDeserializer<VectorIndexingDesc> {

private static final TypeReference<LinkedHashMap<String, Object>> OPTIONS_TYPE =
new TypeReference<>() {};

public VectorIndexingDescDeserializer() {
super(VectorIndexingDesc.class);
}

@Override
public VectorIndexingDesc deserialize(
JsonParser jsonParser, DeserializationContext deserializationContext) throws IOException {
JsonNode node = deserializationContext.readTree(jsonParser);

if (node.isTextual()) {
// named profile, validated at apply time
return VectorIndexingDesc.ofProfile(node.textValue());
}
if (node.isObject()) {
// raw SAI options. convertValue applies the mapper config (e.g. float handling), as a
// Map<String, Object> field would
Map<String, Object> options =
((ObjectMapper) jsonParser.getCodec()).convertValue(node, OPTIONS_TYPE);
return VectorIndexingDesc.ofOptions(options);
}

throw SchemaException.Code.INVALID_VECTOR_INDEXING_OPTIONS.get(
Map.of(
"reason",
"`vectorIndexing` must be either a profile name (string) or an object of indexing "
+ "options, but was: "
+ JsonUtil.nodeTypeAsString(node)
+ "."));
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,9 @@
import com.fasterxml.jackson.annotation.JsonInclude;
import com.fasterxml.jackson.annotation.JsonProperty;
import com.fasterxml.jackson.annotation.JsonPropertyOrder;
import com.fasterxml.jackson.annotation.JsonValue;
import com.fasterxml.jackson.databind.annotation.JsonDeserialize;
import io.stargate.sgv2.jsonapi.api.model.command.deserializers.VectorIndexingDescDeserializer;
import io.stargate.sgv2.jsonapi.config.constants.TableDescConstants;
import io.stargate.sgv2.jsonapi.config.constants.VectorConstants;
import io.stargate.sgv2.jsonapi.config.constants.VectorIndexDescDefaults;
Expand All @@ -11,6 +14,7 @@
import jakarta.annotation.Nullable;
import jakarta.validation.constraints.NotNull;
import jakarta.validation.constraints.Pattern;
import java.util.Map;
import org.eclipse.microprofile.openapi.annotations.enums.SchemaType;
import org.eclipse.microprofile.openapi.annotations.media.Schema;

Expand All @@ -34,7 +38,8 @@ public record VectorIndexDefinitionDesc(
/** Options for the vector index */
@JsonPropertyOrder({
VectorConstants.VectorColumn.METRIC,
VectorConstants.VectorColumn.SOURCE_MODEL
VectorConstants.VectorColumn.SOURCE_MODEL,
VectorConstants.VectorColumn.VECTOR_INDEXING
})
public record VectorIndexDescOptions(
@Nullable
Expand All @@ -59,5 +64,46 @@ public record VectorIndexDescOptions(
+ EmbeddingSourceModel.ApiConstants.ALL)
@JsonInclude(JsonInclude.Include.NON_NULL)
@JsonProperty(VectorConstants.VectorColumn.SOURCE_MODEL)
String sourceModel) {}
String sourceModel,
//
@Nullable
@Schema(
description =
"Optional vector (SAI) indexing configuration. Either a profile name (string) "
+ "the API expands into SAI options, e.g. \"small-high-recall\"; or an object "
+ "of Cassandra SAI tuning options (snake_case), restricted to: "
+ "maximum_node_connections, construction_beam_width, neighborhood_overflow, "
+ "alpha, enable_hierarchy, e.g. {\"maximum_node_connections\": 32, "
+ "\"alpha\": 1.2}. A profile and explicit options are mutually exclusive. "
+ "Set \"metric\" / \"sourceModel\" via their dedicated fields, not here.",
oneOf = {String.class, Map.class})
@JsonInclude(JsonInclude.Include.NON_NULL)
@JsonProperty(VectorConstants.VectorColumn.VECTOR_INDEXING)
VectorIndexingDesc vectorIndexing) {}

/**
* Overloaded {@code vectorIndexing} value: exactly one of a named {@code profile} (JSON string)
* or raw SAI tuning {@code options} (JSON object) is set. {@link VectorIndexingDescDeserializer}
* discriminates by JSON type; {@link #jsonValue()} serializes back to the bare string or object.
*/
@JsonDeserialize(using = VectorIndexingDescDeserializer.class)
public record VectorIndexingDesc(
@Nullable String profile, @Nullable Map<String, Object> options) {

/** A {@code vectorIndexing} that selects a named profile. */
public static VectorIndexingDesc ofProfile(String profile) {
return new VectorIndexingDesc(profile, null);
}

/** A {@code vectorIndexing} that sets raw SAI options directly. */
public static VectorIndexingDesc ofOptions(Map<String, Object> options) {
return new VectorIndexingDesc(null, options);
}

/** Serializes to the bare profile string or the bare options map. */
@JsonValue
Object jsonValue() {
return profile != null ? profile : options;
}
}
}
Original file line number Diff line number Diff line change
@@ -1,17 +1,51 @@
package io.stargate.sgv2.jsonapi.config.constants;

import java.util.Set;

public interface VectorConstants {
interface VectorColumn {
String DIMENSION = "dimension";
String METRIC = "metric";
String SOURCE_MODEL = "sourceModel";
String SERVICE = ServiceDescConstants.SERVICE;
String VECTOR_INDEXING = "vectorIndexing";
}

interface Vectorize extends ServiceDescConstants {}

/**
* CQL {@code WITH OPTIONS} keys for a vector (SAI) index. {@link #SOURCE_MODEL} and {@link
* #SIMILARITY_FUNCTION} map to dedicated API fields ({@code sourceModel} / {@code metric}); the
* rest are tuning options set via {@code vectorIndexing.options}.
*/
interface CQLAnnIndex {
String SOURCE_MODEL = "source_model";
String SIMILARITY_FUNCTION = "similarity_function";
String MAXIMUM_NODE_CONNECTIONS = "maximum_node_connections";
String CONSTRUCTION_BEAM_WIDTH = "construction_beam_width";
String NEIGHBORHOOD_OVERFLOW = "neighborhood_overflow";
String ALPHA = "alpha";
String ENABLE_HIERARCHY = "enable_hierarchy";

/**
* Options with dedicated API fields ({@code metric} / {@code sourceModel}); rejected inside
* {@code vectorIndexing.options}.
*/
Set<String> RESERVED_OPTIONS = Set.of(SOURCE_MODEL, SIMILARITY_FUNCTION);

/**
* SAI tuning options settable through {@code vectorIndexing.options}. {@code optimize_for}
* exists in OSS Cassandra but is de-emphasized in DSE 6.9 / HCD, so it is left out for now.
*/
Set<String> ALLOWED_OPTIONS =
Set.of(
MAXIMUM_NODE_CONNECTIONS,
CONSTRUCTION_BEAM_WIDTH,
NEIGHBORHOOD_OVERFLOW,
ALPHA,
ENABLE_HIERARCHY);

/** Allowed options whose value must be a boolean; the rest are numeric. */
Set<String> BOOLEAN_OPTIONS = Set.of(ENABLE_HIERARCHY);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ public enum Code implements ErrorCode<SchemaException> {
INVALID_INDEXING_DEFINITION,
INVALID_USAGE_OF_VECTORIZE, // legacy: converted from ErrorCodeV1
INVALID_USER_DEFINED_TYPE_NAME,
INVALID_VECTOR_INDEXING_OPTIONS,
LEXICAL_FEATURE_NOT_ENABLED,
LEXICAL_NOT_ENABLED_FOR_COLLECTION,
MISSING_ALTER_TABLE_OPERATIONS,
Expand Down Expand Up @@ -76,6 +77,7 @@ public enum Code implements ErrorCode<SchemaException> {
UNKNOWN_PARTITION_SORT_COLUMNS,
UNKNOWN_PRIMITIVE_DATA_TYPE,
UNKNOWN_USER_DEFINED_TYPE,
UNKNOWN_VECTOR_INDEXING_PROFILE,
UNKNOWN_VECTOR_METRIC,
UNKNOWN_VECTOR_SOURCE_MODEL,
UNSUPPORTED_DATA_TYPE_TABLE_CREATION,
Expand Down
Loading