diff --git a/auto-configurations/vector-stores/spring-ai-autoconfigure-vector-store-azure/src/main/java/org/springframework/ai/vectorstore/azure/autoconfigure/AzureVectorStoreAutoConfiguration.java b/auto-configurations/vector-stores/spring-ai-autoconfigure-vector-store-azure/src/main/java/org/springframework/ai/vectorstore/azure/autoconfigure/AzureVectorStoreAutoConfiguration.java index 05342d99468..20f597b429a 100644 --- a/auto-configurations/vector-stores/spring-ai-autoconfigure-vector-store-azure/src/main/java/org/springframework/ai/vectorstore/azure/autoconfigure/AzureVectorStoreAutoConfiguration.java +++ b/auto-configurations/vector-stores/spring-ai-autoconfigure-vector-store-azure/src/main/java/org/springframework/ai/vectorstore/azure/autoconfigure/AzureVectorStoreAutoConfiguration.java @@ -44,6 +44,7 @@ * * @author Christian Tzolov * @author Soby Chacko + * @author Alexandros Pappas */ @AutoConfiguration @ConditionalOnClass({ EmbeddingModel.class, SearchIndexClient.class, AzureVectorStore.class }) @@ -102,6 +103,18 @@ public AzureVectorStore vectorStore(SearchIndexClient searchIndexClient, Embeddi builder.defaultSimilarityThreshold(properties.getDefaultSimilarityThreshold()); } + if (properties.getContentFieldName() != null) { + builder.contentFieldName(properties.getContentFieldName()); + } + + if (properties.getEmbeddingFieldName() != null) { + builder.embeddingFieldName(properties.getEmbeddingFieldName()); + } + + if (properties.getMetadataFieldName() != null) { + builder.metadataFieldName(properties.getMetadataFieldName()); + } + return builder.build(); } diff --git a/auto-configurations/vector-stores/spring-ai-autoconfigure-vector-store-azure/src/main/java/org/springframework/ai/vectorstore/azure/autoconfigure/AzureVectorStoreProperties.java b/auto-configurations/vector-stores/spring-ai-autoconfigure-vector-store-azure/src/main/java/org/springframework/ai/vectorstore/azure/autoconfigure/AzureVectorStoreProperties.java index 519b93ce5bc..4cfd0ee554c 100644 --- a/auto-configurations/vector-stores/spring-ai-autoconfigure-vector-store-azure/src/main/java/org/springframework/ai/vectorstore/azure/autoconfigure/AzureVectorStoreProperties.java +++ b/auto-configurations/vector-stores/spring-ai-autoconfigure-vector-store-azure/src/main/java/org/springframework/ai/vectorstore/azure/autoconfigure/AzureVectorStoreProperties.java @@ -24,6 +24,7 @@ * Configuration properties for Azure Vector Store. * * @author Christian Tzolov + * @author Alexandros Pappas */ @ConfigurationProperties(AzureVectorStoreProperties.CONFIG_PREFIX) public class AzureVectorStoreProperties extends CommonVectorStoreProperties { @@ -42,6 +43,12 @@ public class AzureVectorStoreProperties extends CommonVectorStoreProperties { private boolean useKeylessAuth; + private String contentFieldName; + + private String embeddingFieldName; + + private String metadataFieldName; + public String getUrl() { return this.url; } @@ -90,4 +97,28 @@ public void setUseKeylessAuth(boolean useKeylessAuth) { this.useKeylessAuth = useKeylessAuth; } + public String getContentFieldName() { + return this.contentFieldName; + } + + public void setContentFieldName(String contentFieldName) { + this.contentFieldName = contentFieldName; + } + + public String getEmbeddingFieldName() { + return this.embeddingFieldName; + } + + public void setEmbeddingFieldName(String embeddingFieldName) { + this.embeddingFieldName = embeddingFieldName; + } + + public String getMetadataFieldName() { + return this.metadataFieldName; + } + + public void setMetadataFieldName(String metadataFieldName) { + this.metadataFieldName = metadataFieldName; + } + } diff --git a/vector-stores/spring-ai-azure-store/src/main/java/org/springframework/ai/vectorstore/azure/AzureVectorStore.java b/vector-stores/spring-ai-azure-store/src/main/java/org/springframework/ai/vectorstore/azure/AzureVectorStore.java index b25ecd7c4cc..176ae597832 100644 --- a/vector-stores/spring-ai-azure-store/src/main/java/org/springframework/ai/vectorstore/azure/AzureVectorStore.java +++ b/vector-stores/spring-ai-azure-store/src/main/java/org/springframework/ai/vectorstore/azure/AzureVectorStore.java @@ -77,6 +77,7 @@ * @author Thomas Vitale * @author Soby Chacko * @author Jinwoo Lee + * @author Alexandros Pappas */ public class AzureVectorStore extends AbstractObservationVectorStore implements InitializingBean { @@ -119,6 +120,12 @@ public class AzureVectorStore extends AbstractObservationVectorStore implements */ private final List filterMetadataFields; + private final String contentFieldName; + + private final String embeddingFieldName; + + private final String metadataFieldName; + @Nullable private SearchClient searchClient; @@ -145,6 +152,9 @@ protected AzureVectorStore(Builder builder) { this.defaultTopK = builder.defaultTopK; this.defaultSimilarityThreshold = builder.defaultSimilarityThreshold; this.indexName = builder.indexName; + this.contentFieldName = builder.contentFieldName; + this.embeddingFieldName = builder.embeddingFieldName; + this.metadataFieldName = builder.metadataFieldName; this.filterExpressionConverter = new AzureAiSearchFilterExpressionConverter(this.filterMetadataFields); } @@ -166,9 +176,9 @@ public void doAdd(List documents) { final var searchDocuments = documents.stream().map(document -> { SearchDocument searchDocument = new SearchDocument(); searchDocument.put(ID_FIELD_NAME, document.getId()); - searchDocument.put(EMBEDDING_FIELD_NAME, embeddings.get(documents.indexOf(document))); - searchDocument.put(CONTENT_FIELD_NAME, document.getText()); - searchDocument.put(METADATA_FIELD_NAME, new JSONObject(document.getMetadata()).toJSONString()); + searchDocument.put(this.embeddingFieldName, embeddings.get(documents.indexOf(document))); + searchDocument.put(this.contentFieldName, document.getText()); + searchDocument.put(this.metadataFieldName, new JSONObject(document.getMetadata()).toJSONString()); // Add the filterable metadata fields as top level fields, allowing filler // expressions on them. @@ -223,7 +233,7 @@ public List doSimilaritySearch(SearchRequest request) { .setKNearestNeighborsCount(request.getTopK()) // Set the fields to compare the vector against. This is a comma-delimited // list of field names. - .setFields(EMBEDDING_FIELD_NAME); + .setFields(this.embeddingFieldName); var searchOptions = new SearchOptions() .setVectorSearchOptions(new VectorSearchOptions().setQueries(vectorQuery)); @@ -239,18 +249,19 @@ public List doSimilaritySearch(SearchRequest request) { .filter(result -> result.getScore() >= request.getSimilarityThreshold()) .map(result -> { - final AzureSearchDocument entry = result.getDocument(AzureSearchDocument.class); + SearchDocument document = result.getDocument(SearchDocument.class); + + String id = document.get(ID_FIELD_NAME) != null ? document.get(ID_FIELD_NAME).toString() : ""; + String content = document.get(this.contentFieldName) != null + ? document.get(this.contentFieldName).toString() : ""; + String metadataJson = document.get(this.metadataFieldName) != null + ? document.get(this.metadataFieldName).toString() : ""; - Map metadata = parseMetadataToMutable(entry.metadata()); + Map metadata = parseMetadataToMutable(metadataJson); metadata.put(DocumentMetadata.DISTANCE.value(), 1.0 - result.getScore()); - return Document.builder() - .id(entry.id()) - .text(entry.content) - .metadata(metadata) - .score(result.getScore()) - .build(); + return Document.builder().id(id).text(content).metadata(metadata).score(result.getScore()).build(); }) .collect(Collectors.toList()); } @@ -270,15 +281,15 @@ public void afterPropertiesSet() throws Exception { fields.add(new SearchField(ID_FIELD_NAME, SearchFieldDataType.STRING).setKey(true) .setFilterable(true) .setSortable(true)); - fields.add(new SearchField(EMBEDDING_FIELD_NAME, SearchFieldDataType.collection(SearchFieldDataType.SINGLE)) + fields.add(new SearchField(this.embeddingFieldName, SearchFieldDataType.collection(SearchFieldDataType.SINGLE)) .setSearchable(true) .setHidden(false) .setVectorSearchDimensions(dimensions) // This must match a vector search configuration name. .setVectorSearchProfileName(SPRING_AI_VECTOR_PROFILE)); - fields.add(new SearchField(CONTENT_FIELD_NAME, SearchFieldDataType.STRING).setSearchable(true) + fields.add(new SearchField(this.contentFieldName, SearchFieldDataType.STRING).setSearchable(true) .setFilterable(true)); - fields.add(new SearchField(METADATA_FIELD_NAME, SearchFieldDataType.STRING).setSearchable(true) + fields.add(new SearchField(this.metadataFieldName, SearchFieldDataType.STRING).setSearchable(true) .setFilterable(true)); for (MetadataField filterableMetadataField : this.filterMetadataFields) { @@ -367,13 +378,6 @@ public static MetadataField date(String name) { } - /** - * Internal data structure for retrieving and storing documents. - */ - private record AzureSearchDocument(String id, String content, List embedding, String metadata) { - - } - /** * Builder class for creating {@link AzureVectorStore} instances. *

@@ -395,6 +399,12 @@ public static class Builder extends AbstractVectorStoreBuilder { private String indexName = DEFAULT_INDEX_NAME; + private String contentFieldName = CONTENT_FIELD_NAME; + + private String embeddingFieldName = EMBEDDING_FIELD_NAME; + + private String metadataFieldName = METADATA_FIELD_NAME; + private Builder(SearchIndexClient searchIndexClient, EmbeddingModel embeddingModel) { super(embeddingModel); Assert.notNull(searchIndexClient, "SearchIndexClient must not be null"); @@ -460,6 +470,38 @@ public Builder defaultSimilarityThreshold(Double defaultSimilarityThreshold) { return this; } + /** + * Sets the content field name in the Azure Search index. + * @param contentFieldName the name of the content field (defaults to "content") + * @return the builder instance + */ + public Builder contentFieldName(@Nullable String contentFieldName) { + this.contentFieldName = contentFieldName != null ? contentFieldName : CONTENT_FIELD_NAME; + return this; + } + + /** + * Sets the embedding field name in the Azure Search index. + * @param embeddingFieldName the name of the embedding field (defaults to + * "embedding") + * @return the builder instance + */ + public Builder embeddingFieldName(@Nullable String embeddingFieldName) { + this.embeddingFieldName = embeddingFieldName != null ? embeddingFieldName : EMBEDDING_FIELD_NAME; + return this; + } + + /** + * Sets the metadata field name in the Azure Search index. + * @param metadataFieldName the name of the metadata field (defaults to + * "metadata") + * @return the builder instance + */ + public Builder metadataFieldName(@Nullable String metadataFieldName) { + this.metadataFieldName = metadataFieldName != null ? metadataFieldName : METADATA_FIELD_NAME; + return this; + } + @Override public AzureVectorStore build() { return new AzureVectorStore(this); diff --git a/vector-stores/spring-ai-azure-store/src/test/java/org/springframework/ai/vectorstore/azure/AzureVectorStoreIT.java b/vector-stores/spring-ai-azure-store/src/test/java/org/springframework/ai/vectorstore/azure/AzureVectorStoreIT.java index 1e4bd4aea04..7cd80adf2d7 100644 --- a/vector-stores/spring-ai-azure-store/src/test/java/org/springframework/ai/vectorstore/azure/AzureVectorStoreIT.java +++ b/vector-stores/spring-ai-azure-store/src/test/java/org/springframework/ai/vectorstore/azure/AzureVectorStoreIT.java @@ -56,6 +56,7 @@ /** * @author Christian Tzolov * @author Thomas Vitale + * @author Alexandros Pappas */ @EnabledIfEnvironmentVariable(named = "AZURE_AI_SEARCH_API_KEY", matches = ".+") @EnabledIfEnvironmentVariable(named = "AZURE_AI_SEARCH_ENDPOINT", matches = ".+") @@ -329,6 +330,49 @@ void getNativeClientTest() { }); } + @Test + @EnabledIfEnvironmentVariable(named = "AZURE_AI_SEARCH_INDEX_NAME", matches = ".+") + void customFieldNamesTest() throws Exception { + // Test with existing production index that uses custom field names + String existingIndexName = System.getenv("AZURE_AI_SEARCH_INDEX_NAME"); + String endpoint = System.getenv("AZURE_AI_SEARCH_ENDPOINT"); + String apiKey = System.getenv("AZURE_AI_SEARCH_API_KEY"); + + SearchIndexClient searchIndexClient = new SearchIndexClientBuilder().endpoint(endpoint) + .credential(new AzureKeyCredential(apiKey)) + .buildClient(); + + TransformersEmbeddingModel embeddingModel = new TransformersEmbeddingModel(); + embeddingModel.afterPropertiesSet(); + + // Create vector store with custom field names matching the production index + // Index uses: chunk_text (content), embedding, metadata + VectorStore vectorStore = AzureVectorStore.builder(searchIndexClient, embeddingModel) + .indexName(existingIndexName) + .initializeSchema(false) // Don't create - use existing index + .contentFieldName("chunk_text") // Custom field name! + .embeddingFieldName("embedding") // Standard name + .metadataFieldName("metadata") // Standard name + .build(); + + // Trigger initialization + ((AzureVectorStore) vectorStore).afterPropertiesSet(); + + // Search the existing index + List results = vectorStore + .similaritySearch(SearchRequest.builder().query("Azure Databricks").topK(3).build()); + + // Verify we got results + assertThat(results).isNotEmpty(); + assertThat(results.size()).isLessThanOrEqualTo(3); + + // Verify documents have content (from chunk_text field) + Document firstDoc = results.get(0); + assertThat(firstDoc.getId()).isNotNull(); + assertThat(firstDoc.getText()).isNotEmpty(); + assertThat(firstDoc.getScore()).isNotNull(); + } + @SpringBootConfiguration @EnableAutoConfiguration public static class Config {