Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.node.ObjectNode;
import io.stargate.sgv2.jsonapi.config.constants.DocumentConstants;
import io.stargate.sgv2.jsonapi.exception.ErrorCodeV1;
import io.stargate.sgv2.jsonapi.exception.DocumentException;
import io.stargate.sgv2.jsonapi.util.JsonUtil;
import io.stargate.sgv2.jsonapi.util.PathMatch;
import io.stargate.sgv2.jsonapi.util.PathMatchLocator;
Expand Down Expand Up @@ -113,7 +113,10 @@ public UpdateOperationResult updateDocument(ObjectNode doc) {
doc.putNull(DocumentConstants.Fields.VECTOR_EMBEDDING_FIELD);
} else if (!newValue.isTextual()) {
// if $vectorize is not textual value
throw ErrorCodeV1.INVALID_VECTORIZE_VALUE_TYPE.toApiException();
throw DocumentException.Code.INVALID_VECTORIZE_VALUE_TYPE.get(
Map.of(
"errorMessage",
"needs to be String, not %s".formatted(JsonUtil.nodeTypeAsString(newValue))));
} else if (newValue.asText().isBlank()) {
// $vectorize is blank text value, set $vector as null value, no need to vectorize
doc.putNull(DocumentConstants.Fields.VECTOR_EMBEDDING_FIELD);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
import io.stargate.sgv2.jsonapi.api.model.command.CommandName;
import io.stargate.sgv2.jsonapi.config.constants.DocumentConstants;
import io.stargate.sgv2.jsonapi.config.constants.ServiceDescConstants;
import io.stargate.sgv2.jsonapi.exception.ErrorCodeV1;
import io.stargate.sgv2.jsonapi.exception.SchemaException;
import io.stargate.sgv2.jsonapi.service.schema.collections.DocumentPath;
import io.stargate.sgv2.jsonapi.service.schema.naming.NamingRules;
import jakarta.validation.Valid;
Expand Down Expand Up @@ -162,29 +162,29 @@ public record IndexingConfig(

public void validate() {
if (allow() != null && deny() != null) {
throw ErrorCodeV1.INVALID_INDEXING_DEFINITION.toApiException(
"`allow` and `deny` cannot be used together");
throw SchemaException.Code.INVALID_INDEXING_DEFINITION.get(
Map.of("errorMessage", "'allow' and 'deny' cannot be used together"));
}

if (allow() == null && deny() == null) {
throw ErrorCodeV1.INVALID_INDEXING_DEFINITION.toApiException(
"`allow` or `deny` should be provided");
throw SchemaException.Code.INVALID_INDEXING_DEFINITION.get(
Map.of("errorMessage", "'allow' or 'deny' should be provided"));
}

if (allow() != null) {
Set<String> dedupe = new HashSet<>(allow());
if (dedupe.size() != allow().size()) {
throw ErrorCodeV1.INVALID_INDEXING_DEFINITION.toApiException(
"`allow` cannot contain duplicates");
throw SchemaException.Code.INVALID_INDEXING_DEFINITION.get(
Map.of("errorMessage", "'allow' cannot contain duplicates"));
}
validateIndexingPath(allow());
}

if (deny() != null) {
Set<String> dedupe = new HashSet<>(deny());
if (dedupe.size() != deny().size()) {
throw ErrorCodeV1.INVALID_INDEXING_DEFINITION.toApiException(
"`deny` cannot contain duplicates");
throw SchemaException.Code.INVALID_INDEXING_DEFINITION.get(
Map.of("errorMessage", "'deny' cannot contain duplicates"));
}
validateIndexingPath(deny());
}
Expand Down Expand Up @@ -213,23 +213,26 @@ public void validateIndexingPath(List<String> paths) {
for (String path : paths) {
if (!NamingRules.FIELD.apply(path)) {
if (path.isEmpty()) {
throw ErrorCodeV1.INVALID_INDEXING_DEFINITION.toApiException(
"path must be represented as a non-empty string");
throw SchemaException.Code.INVALID_INDEXING_DEFINITION.get(
Map.of("errorMessage", "path must be represented as a non-empty string"));
}
if (path.startsWith("$")) {
// $vector is allowed, otherwise throw error
if (!DocumentConstants.Fields.VECTOR_EMBEDDING_FIELD.equals(path)) {
throw ErrorCodeV1.INVALID_INDEXING_DEFINITION.toApiException(
"path must not start with '$'");
throw SchemaException.Code.INVALID_INDEXING_DEFINITION.get(
Map.of("errorMessage", "path ('%s') must not start with '$'".formatted(path)));
}
}
}

try {
DocumentPath.verifyEncodedPath(path);
} catch (IllegalArgumentException e) {
throw ErrorCodeV1.INVALID_INDEXING_DEFINITION.toApiException(
"indexing path ('%s') is not a valid path. " + e.getMessage(), path);
throw SchemaException.Code.INVALID_INDEXING_DEFINITION.get(
Map.of(
"errorMessage",
"indexing path ('%s') is not a valid path: %s"
.formatted(path, e.getMessage())));
}
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ public enum Code implements ErrorCode<DocumentException> {
LEXICAL_CONTENT_TOO_LONG,

INVALID_COLUMN_VALUES,
INVALID_VECTORIZE_VALUE_TYPE, // converted from ErrorCodeV1
INVALID_VECTOR_LENGTH, // copy from V1 VECTOR_SIZE_MISMATCH("Length of vector parameter
// different from declared '$vector' dimension"),
MISSING_PRIMARY_KEY_COLUMNS,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,13 +18,6 @@ public enum ErrorCodeV1 {
EMBEDDING_PROVIDER_API_KEY_MISSING("The Embedding Provider API key is missing"),

// CreateCollection error codes:
EMBEDDING_SERVICE_NOT_CONFIGURED(
"Unable to vectorize data, embedding service not configured for the collection "),

INVALID_INDEXING_DEFINITION("Invalid indexing definition"),
INVALID_JSONAPI_COLLECTION_SCHEMA("Not a valid json api collection schema"),
INVALID_VECTORIZE_VALUE_TYPE("$vectorize value needs to be text value"),
TOO_MANY_COLLECTIONS("Too many collections"),

RERANKING_FEATURE_NOT_ENABLED("Reranking feature is not enabled"),
RERANKING_SERVICE_TYPE_UNAVAILABLE("Reranking service unavailable"),
Expand Down Expand Up @@ -64,7 +57,6 @@ public enum ErrorCodeV1 {
VECTORIZE_INVALID_AUTHENTICATION_TYPE("Invalid vectorize authentication type"),

VECTORIZE_CREDENTIAL_INVALID("Invalid credential name for vectorize"),
VECTORIZECONFIG_CHECK_FAIL("Internal server error: VectorizeDefinition check fail"),

HYBRID_FIELD_CONFLICT(
"The '$hybrid' field cannot be used with '$lexical', '$vector', or '$vectorize'."),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,14 +32,12 @@ public class JsonApiException extends RuntimeException {
private static final Set<ErrorCodeV1> serverFamily =
new HashSet<>() {
{
add(TOO_MANY_COLLECTIONS);
add(VECTOR_SEARCH_NOT_SUPPORTED);
add(VECTORIZE_FEATURE_NOT_AVAILABLE);
add(VECTORIZE_SERVICE_NOT_REGISTERED);
add(VECTORIZE_SERVICE_TYPE_UNAVAILABLE);
add(VECTORIZE_INVALID_AUTHENTICATION_TYPE);
add(VECTORIZE_CREDENTIAL_INVALID);
add(VECTORIZECONFIG_CHECK_FAIL);
}
};

Expand All @@ -51,7 +49,6 @@ public class JsonApiException extends RuntimeException {
add(INVALID_USAGE_OF_VECTORIZE);
add(VECTOR_SEARCH_INVALID_FUNCTION_NAME);
add(VECTOR_SEARCH_TOO_BIG_VALUE);
add(INVALID_INDEXING_DEFINITION);
}
},
ErrorScope.SCHEMA,
Expand All @@ -60,13 +57,7 @@ public class JsonApiException extends RuntimeException {
add(INVALID_REQUEST);
}
},
ErrorScope.EMBEDDING,
new HashSet<>() {
{
add(INVALID_VECTORIZE_VALUE_TYPE);
}
},
ErrorScope.DOCUMENT);
ErrorScope.EMBEDDING);

protected JsonApiException(ErrorCodeV1 errorCode) {
this(errorCode, errorCode.getMessage(), null);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,13 +35,16 @@ public enum Code implements ErrorCode<SchemaException> {
CANNOT_VECTORIZE_UNKNOWN_COLUMNS,
COLLECTION_NOT_EXIST, // converted from ErrorCodeV1
DEPRECATED_AI_MODEL,
EMBEDDING_SERVICE_NOT_CONFIGURED, // converted from ErrorCodeV1
END_OF_LIFE_AI_MODEL,
EXISTING_INDEX_FOR_COLLECTION,
// from ErrorCodeV1 but used by clients DO NOT RENAME:
EXISTING_COLLECTION_DIFFERENT_SETTINGS,

EXISTING_TABLE_NOT_DATA_API_COLLECTION, // converted from ErrorCodeV1
INVALID_CREATE_COLLECTION_OPTIONS,
INVALID_FORMAT_FOR_INDEX_CREATION_COLUMN,
INVALID_INDEXING_DEFINITION,
INVALID_USER_DEFINED_TYPE_NAME,
LEXICAL_NOT_AVAILABLE_FOR_DATABASE,
LEXICAL_NOT_ENABLED_FOR_COLLECTION,
Expand All @@ -50,6 +53,7 @@ public enum Code implements ErrorCode<SchemaException> {
MISSING_DIMENSION_IN_VECTOR_COLUMN,
MISSING_FIELDS_FOR_TYPE_CREATION,
MISSING_PARTITION_COLUMNS,
TOO_MANY_COLLECTIONS, // legacy: converted from ErrorCodeV1
TOO_MANY_INDEXES_FOR_COLLECTION,
UNKNOWN_DATA_TYPE,
UNKNOWN_INDEX_COLUMN,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,6 @@
import com.github.benmanes.caffeine.cache.Caffeine;
import io.smallrye.mutiny.Uni;
import io.stargate.sgv2.jsonapi.api.request.RequestContext;
import io.stargate.sgv2.jsonapi.exception.ErrorCodeV1;
import io.stargate.sgv2.jsonapi.exception.JsonApiException;
import io.stargate.sgv2.jsonapi.exception.SchemaException;
import io.stargate.sgv2.jsonapi.service.schema.collections.CollectionSchemaObject;
import io.stargate.sgv2.jsonapi.service.schema.collections.CollectionTableMatcher;
Expand Down Expand Up @@ -59,24 +57,14 @@ protected Uni<SchemaObject> getSchemaObject(
.transformToUni(
(result, error) -> {
if (null != error) {
// not a valid collection schema
// TODO: Explain why this changes the error code
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Turns out, nothing was actually throwing VECTORIZECONFIG_CHECK_FAIL! Can just remove.

if (error instanceof JsonApiException
&& ((JsonApiException) error).getErrorCode()
== ErrorCodeV1.VECTORIZECONFIG_CHECK_FAIL) {
return Uni.createFrom()
.failure(
ErrorCodeV1.INVALID_JSONAPI_COLLECTION_SCHEMA.toApiException(
"%s", collectionName));
}
// collection does not exist
// TODO: DO NOT do a string starts with, use proper error structures
// again, why is this here, looks like it returns the same error code ?
// Guess: this a driver exception, not Data API's internal one
// ... seems unlikely as driver does not have concept of "Collection" (vs Tables)?
// (that is: "Collection" would refer to column datatype not "funny table"?)
if (error instanceof RuntimeException rte
&& rte.getMessage().startsWith("Collection does not exist")) {
if (error instanceof RuntimeException
&& error.getMessage().startsWith("Collection does not exist")) {
return Uni.createFrom()
.failure(
SchemaException.Code.COLLECTION_NOT_EXIST.get(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,16 +13,15 @@
import io.stargate.sgv2.jsonapi.api.model.command.clause.sort.SortExpression;
import io.stargate.sgv2.jsonapi.api.request.EmbeddingCredentials;
import io.stargate.sgv2.jsonapi.config.constants.DocumentConstants;
import io.stargate.sgv2.jsonapi.exception.DocumentException;
import io.stargate.sgv2.jsonapi.exception.ErrorCodeV1;
import io.stargate.sgv2.jsonapi.exception.JsonApiException;
import io.stargate.sgv2.jsonapi.exception.*;
import io.stargate.sgv2.jsonapi.service.cqldriver.executor.SchemaObject;
import io.stargate.sgv2.jsonapi.service.cqldriver.executor.VectorColumnDefinition;
import io.stargate.sgv2.jsonapi.service.cqldriver.executor.VectorConfig;
import io.stargate.sgv2.jsonapi.service.embedding.operation.EmbeddingProvider;
import io.stargate.sgv2.jsonapi.service.schema.tables.ApiColumnDef;
import io.stargate.sgv2.jsonapi.service.schema.tables.ApiTypeName;
import io.stargate.sgv2.jsonapi.service.schema.tables.ApiVectorType;
import io.stargate.sgv2.jsonapi.util.JsonUtil;
import java.util.*;

/**
Expand Down Expand Up @@ -81,8 +80,12 @@ public Uni<Boolean> vectorize(List<JsonNode> documents) {
continue;
}
if (!jsonNode.isTextual()) {
throw ErrorCodeV1.INVALID_VECTORIZE_VALUE_TYPE.toApiException(
"issue in document at position %s", (position + 1));
throw DocumentException.Code.INVALID_VECTORIZE_VALUE_TYPE.get(
Map.of(
"errorMessage",
"needs to be String, not %s (issue in document at position %s)"
.formatted(
JsonUtil.nodeTypeAsString(jsonNode), String.valueOf(position + 1))));
}

String vectorizeData = jsonNode.asText();
Expand All @@ -99,8 +102,8 @@ public Uni<Boolean> vectorize(List<JsonNode> documents) {

if (!vectorizeTexts.isEmpty()) {
if (embeddingProvider == null) {
throw ErrorCodeV1.EMBEDDING_SERVICE_NOT_CONFIGURED.toApiException(
schemaObject.name().table());
throw SchemaException.Code.EMBEDDING_SERVICE_NOT_CONFIGURED.get(
Map.of("table", schemaObject.name().table()));
}
Uni<List<float[]>> vectors =
embeddingProvider
Expand Down Expand Up @@ -154,6 +157,8 @@ public Uni<Boolean> vectorize(List<JsonNode> documents) {
return Uni.createFrom().item(true);
} catch (JsonApiException e) {
return Uni.createFrom().failure(e);
} catch (APIException e) {
return Uni.createFrom().failure(e);
}
}

Expand All @@ -166,8 +171,8 @@ public Uni<Boolean> vectorize(List<JsonNode> documents) {
*/
public Uni<float[]> vectorize(String vectorizeContent) {
if (embeddingProvider == null) {
throw ErrorCodeV1.EMBEDDING_SERVICE_NOT_CONFIGURED.toApiException(
schemaObject.name().table());
throw SchemaException.Code.EMBEDDING_SERVICE_NOT_CONFIGURED.get(
Map.of("table", schemaObject.name().table()));
}
Uni<List<float[]>> vectors =
embeddingProvider
Expand Down Expand Up @@ -210,8 +215,8 @@ public Uni<Boolean> vectorize(SortClause sortClause) {
return Uni.createFrom().item(true);
if (sortClause.hasVectorizeSearchClause()) {
if (embeddingProvider == null) {
throw ErrorCodeV1.EMBEDDING_SERVICE_NOT_CONFIGURED.toApiException(
schemaObject.name().table());
throw SchemaException.Code.EMBEDDING_SERVICE_NOT_CONFIGURED.get(
Map.of("table", schemaObject.name().table()));
}
final List<SortExpression> sortExpressions = sortClause.sortExpressions();
SortExpression expression = sortExpressions.getFirst();
Expand Down Expand Up @@ -252,6 +257,8 @@ public Uni<Boolean> vectorize(SortClause sortClause) {
return Uni.createFrom().item(true);
} catch (JsonApiException e) {
return Uni.createFrom().failure(e);
} catch (APIException e) {
return Uni.createFrom().failure(e);
}
}

Expand Down Expand Up @@ -298,8 +305,8 @@ private Uni<List<float[]>> vectorizeTexts(

// Copied from vectorize(List<JsonNode> documents) above leaving as is for now
if (embeddingProvider == null) {
throw ErrorCodeV1.EMBEDDING_SERVICE_NOT_CONFIGURED.toApiException(
schemaObject.name().table());
throw SchemaException.Code.EMBEDDING_SERVICE_NOT_CONFIGURED.get(
Map.of("table", schemaObject.name().table()));
}

return embeddingProvider
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
import io.smallrye.mutiny.Uni;
import io.stargate.sgv2.jsonapi.api.request.EmbeddingCredentials;
import io.stargate.sgv2.jsonapi.config.constants.HttpConstants;
import io.stargate.sgv2.jsonapi.exception.ErrorCodeV1;
import io.stargate.sgv2.jsonapi.exception.DocumentException;
import io.stargate.sgv2.jsonapi.service.embedding.configuration.EmbeddingProviderResponseValidation;
import io.stargate.sgv2.jsonapi.service.embedding.configuration.EmbeddingProvidersConfig;
import io.stargate.sgv2.jsonapi.service.embedding.configuration.ServiceConfigStore;
Expand Down Expand Up @@ -117,8 +117,11 @@ public Uni<BatchedEmbeddingResponse> vectorize(
// TODO: This should be IllegalArgumentException

// Temporary fail message: with re-batching will give better information
throw ErrorCodeV1.INVALID_VECTORIZE_VALUE_TYPE.toApiException(
"UpstageAI only supports vectorization of 1 text at a time, got " + texts.size());
throw DocumentException.Code.INVALID_VECTORIZE_VALUE_TYPE.get(
Map.of(
"errorMessage",
"UpstageAI only supports vectorization of 1 text at a time, got %s"
.formatted(texts.size())));
}

// Another oddity: model name used as prefix
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,8 @@
import io.stargate.sgv2.jsonapi.api.model.command.tracing.RequestTracing;
import io.stargate.sgv2.jsonapi.api.request.RequestContext;
import io.stargate.sgv2.jsonapi.config.DatabaseLimitsConfig;
import io.stargate.sgv2.jsonapi.exception.*;
import io.stargate.sgv2.jsonapi.exception.DatabaseException;
import io.stargate.sgv2.jsonapi.exception.SchemaException;
import io.stargate.sgv2.jsonapi.service.cqldriver.CQLSessionCache;
import io.stargate.sgv2.jsonapi.service.cqldriver.executor.KeyspaceSchemaObject;
import io.stargate.sgv2.jsonapi.service.cqldriver.executor.QueryExecutor;
Expand Down Expand Up @@ -509,9 +510,14 @@ TableMetadata findTableAndValidateLimits(
final long collectionCount = allTables.stream().filter(COLLECTION_MATCHER).count();
final int MAX_COLLECTIONS = dbLimitsConfig.maxCollections();
if (collectionCount >= MAX_COLLECTIONS) {
throw ErrorCodeV1.TOO_MANY_COLLECTIONS.toApiException(
"number of collections in database cannot exceed %d, already have %d",
MAX_COLLECTIONS, collectionCount);
throw SchemaException.Code.TOO_MANY_COLLECTIONS.get(
Map.of(
"table",
tableName,
"collectionCount",
String.valueOf(collectionCount),
"collectionMaxCount",
String.valueOf(MAX_COLLECTIONS)));
}
// And then see how many Indexes have been created, how many available
int saisUsed = allTables.stream().mapToInt(table -> table.getIndexes().size()).sum();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@
import io.stargate.sgv2.jsonapi.api.model.command.clause.update.*;
import io.stargate.sgv2.jsonapi.config.constants.DocumentConstants;
import io.stargate.sgv2.jsonapi.exception.DocumentException;
import io.stargate.sgv2.jsonapi.exception.ErrorCodeV1;
import io.stargate.sgv2.jsonapi.service.embedding.DataVectorizer;
import io.stargate.sgv2.jsonapi.service.embedding.DataVectorizerService;
import io.stargate.sgv2.jsonapi.util.JsonUtil;
Expand Down Expand Up @@ -118,7 +117,10 @@ private DocumentUpdaterResponse replace(ObjectNode docToUpdate, boolean docInser
replaceDocument.putNull(DocumentConstants.Fields.VECTOR_EMBEDDING_FIELD);
} else if (!vectorizeNode.isTextual()) {
// if $vectorize is not textual value
throw ErrorCodeV1.INVALID_VECTORIZE_VALUE_TYPE.toApiException();
throw DocumentException.Code.INVALID_VECTORIZE_VALUE_TYPE.get(
Map.of(
"errorMessage",
"needs to be String, not %s".formatted(JsonUtil.nodeTypeAsString(vectorizeNode))));
} else if (vectorizeNode.asText().isBlank()) {
// $vectorize is blank text value, set $vector as null value, no need to vectorize
replaceDocument.putNull(DocumentConstants.Fields.VECTOR_EMBEDDING_FIELD);
Expand Down
Loading