Skip to content

Commit 3618524

Browse files
lsh1215ilayaperumalg
authored andcommitted
OpenSearch: omit explicit IDs when manageDocumentIds=false; add unit/ITs; AWS Serverless compat.
- Update OpenSearchVectorStore#doAdd to omit explicit document IDs when manageDocumentIds=false, enabling AWS OpenSearch Serverless compatibility - Add unit tests for document ID management logic in doAdd - Add integration tests covering explicit/non-explicit ID modes and delete-by-ID behavior Closes gh-3818 Signed-off-by: sanghun <vitash1215@gmail.com> Set manageDocumentIds default to true for backward compatibility The manageDocumentIds flag was initially set to false, which would break existing users who rely on explicit document ID management. This change sets the default to true to preserve the current behavior for all existing OpenSearch users. AWS OpenSearch Serverless users can explicitly opt-in by setting manageDocumentIds(false) when they need auto-generated IDs due to the platform's restrictions on custom document IDs. This ensures backward compatibility while still providing the flexibility needed for AWS Serverless environments. Related: gh-3818 Signed-off-by: sanghun <vitash1215@gmail.com> Fix Checkstyle violations in OpenSearchVectorStoreTest Resolved 14 Checkstyle errors that blocked the build process: - Corrected import statement ordering - Added 'this.' qualifier to instance variable references - Added missing newline at end of file This ensures compliance with Spring AI coding standards and enables successful compilation after rebasing onto upstream/main. Signed-off-by: sanghun <vitash1215@gmail.com> OpenSearch: omit explicit IDs when manageDocumentIds=false; add unit/ITs; AWS Serverless compat. - Update OpenSearchVectorStore#doAdd to omit explicit document IDs when manageDocumentIds=false, enabling AWS OpenSearch Serverless compatibility - Add unit tests for document ID management logic in doAdd - Add integration tests covering explicit/non-explicit ID modes and delete-by-ID behavior Closes gh-3818 Set manageDocumentIds default to true for backward compatibility AWS OpenSearch Serverless users can explicitly opt-in by setting manageDocumentIds(false) when they need auto-generated IDs due to the platform's restrictions on custom document IDs. This ensures backward compatibility while still providing the flexibility needed for AWS Serverless environments. Related: gh-3818 Signed-off-by: sanghun <vitash1215@gmail.com>
1 parent 0d89fb9 commit 3618524

File tree

3 files changed

+414
-2
lines changed

3 files changed

+414
-2
lines changed

vector-stores/spring-ai-opensearch-store/src/main/java/org/springframework/ai/vectorstore/opensearch/OpenSearchVectorStore.java

Lines changed: 53 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -102,6 +102,16 @@
102102
* }</pre>
103103
*
104104
* <p>
105+
* AWS OpenSearch Serverless usage example:
106+
* </p>
107+
* <pre>{@code
108+
* OpenSearchVectorStore vectorStore = OpenSearchVectorStore.builder(openSearchClient, embeddingModel)
109+
* .initializeSchema(true)
110+
* .manageDocumentIds(false) // Required for AWS OpenSearch Serverless
111+
* .build();
112+
* }</pre>
113+
*
114+
* <p>
105115
* Advanced configuration example:
106116
* </p>
107117
* <pre>{@code
@@ -137,6 +147,7 @@
137147
* @author Christian Tzolov
138148
* @author Thomas Vitale
139149
* @author inpink
150+
* @author Sanghun Lee
140151
* @since 1.0.0
141152
*/
142153
public class OpenSearchVectorStore extends AbstractObservationVectorStore implements InitializingBean {
@@ -174,6 +185,8 @@ public class OpenSearchVectorStore extends AbstractObservationVectorStore implem
174185

175186
private final int dimensions;
176187

188+
private final boolean manageDocumentIds;
189+
177190
/**
178191
* Creates a new OpenSearchVectorStore using the builder pattern.
179192
* @param builder The configured builder instance
@@ -193,6 +206,7 @@ protected OpenSearchVectorStore(Builder builder) {
193206
this.initializeSchema = builder.initializeSchema;
194207
this.useApproximateKnn = builder.useApproximateKnn;
195208
this.dimensions = builder.dimensions;
209+
this.manageDocumentIds = builder.manageDocumentIds;
196210
}
197211

198212
/**
@@ -216,14 +230,27 @@ public void doAdd(List<Document> documents) {
216230
for (Document document : documents) {
217231
OpenSearchDocument openSearchDocument = new OpenSearchDocument(document.getId(), document.getText(),
218232
document.getMetadata(), embedding.get(documents.indexOf(document)));
219-
bulkRequestBuilder.operations(op -> op
220-
.index(idx -> idx.index(this.index).id(openSearchDocument.id()).document(openSearchDocument)));
233+
234+
// Conditionally set document ID based on manageDocumentIds flag
235+
if (this.manageDocumentIds) {
236+
bulkRequestBuilder.operations(op -> op
237+
.index(idx -> idx.index(this.index).id(openSearchDocument.id()).document(openSearchDocument)));
238+
}
239+
else {
240+
bulkRequestBuilder
241+
.operations(op -> op.index(idx -> idx.index(this.index).document(openSearchDocument)));
242+
}
221243
}
222244
bulkRequest(bulkRequestBuilder.build());
223245
}
224246

225247
@Override
226248
public void doDelete(List<String> idList) {
249+
if (!this.manageDocumentIds) {
250+
logger.warn("Document ID management is disabled. Delete operations may not work as expected "
251+
+ "since document IDs are auto-generated by OpenSearch. Consider using filter-based deletion instead.");
252+
}
253+
227254
BulkRequest.Builder bulkRequestBuilder = new BulkRequest.Builder();
228255
for (String id : idList) {
229256
bulkRequestBuilder.operations(op -> op.delete(idx -> idx.index(this.index).id(id)));
@@ -481,6 +508,8 @@ public static class Builder extends AbstractVectorStoreBuilder<Builder> {
481508

482509
private int dimensions = 1536;
483510

511+
private boolean manageDocumentIds = true;
512+
484513
/**
485514
* Sets the OpenSearch client.
486515
* @param openSearchClient The OpenSearch client to use
@@ -585,6 +614,28 @@ public Builder dimensions(int dimensions) {
585614
return this;
586615
}
587616

617+
/**
618+
* Sets whether to manage document IDs during indexing operations.
619+
* <p>
620+
* When set to {@code true} (default), document IDs will be explicitly set during
621+
* indexing operations. When set to {@code false}, OpenSearch will auto-generate
622+
* document IDs, which is required for AWS OpenSearch Serverless vector search
623+
* collections.
624+
* </p>
625+
* <p>
626+
* Note: When document ID management is disabled, the {@link #doDelete(List)}
627+
* method may not work as expected since document IDs are auto-generated by
628+
* OpenSearch.
629+
* </p>
630+
* @param manageDocumentIds true to manage document IDs (default), false to let
631+
* OpenSearch auto-generate IDs
632+
* @return The builder instance
633+
*/
634+
public Builder manageDocumentIds(boolean manageDocumentIds) {
635+
this.manageDocumentIds = manageDocumentIds;
636+
return this;
637+
}
638+
588639
/**
589640
* Builds a new OpenSearchVectorStore instance with the configured properties.
590641
* @return A new OpenSearchVectorStore instance

vector-stores/spring-ai-opensearch-store/src/test/java/org/springframework/ai/vectorstore/opensearch/OpenSearchVectorStoreIT.java

Lines changed: 155 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -810,6 +810,161 @@ public void approximateSearchThresholdTest(String similarityFunction) {
810810
});
811811
}
812812

813+
@ParameterizedTest(name = "manageDocumentIds={0}")
814+
@ValueSource(booleans = { true, false })
815+
void testManageDocumentIdsSetting(boolean manageDocumentIds) {
816+
getContextRunner().run(context -> {
817+
OpenSearchVectorStore vectorStore = context.getBean("vectorStore", OpenSearchVectorStore.class);
818+
819+
// Create a new vector store with specific manageDocumentIds setting
820+
OpenSearchVectorStore testVectorStore = OpenSearchVectorStore
821+
.builder((OpenSearchClient) vectorStore.getNativeClient().orElseThrow(),
822+
context.getBean(EmbeddingModel.class))
823+
.manageDocumentIds(manageDocumentIds)
824+
.index("test_manage_document_ids_" + manageDocumentIds)
825+
.initializeSchema(true)
826+
.build();
827+
828+
// Test documents
829+
List<Document> testDocuments = List.of(new Document("doc1", "Test content 1", Map.of("key1", "value1")),
830+
new Document("doc2", "Test content 2", Map.of("key2", "value2")));
831+
832+
// Add documents
833+
testVectorStore.add(testDocuments);
834+
835+
// Wait for indexing
836+
Awaitility.await()
837+
.until(() -> testVectorStore
838+
.similaritySearch(SearchRequest.builder().query("Test content").topK(2).build()), hasSize(2));
839+
840+
// Search and verify results
841+
List<Document> results = testVectorStore
842+
.similaritySearch(SearchRequest.builder().query("Test content").topK(2).build());
843+
844+
assertThat(results).hasSize(2);
845+
846+
// Verify document content and metadata are preserved
847+
assertThat(results.stream().map(Document::getText).toList()).containsExactlyInAnyOrder("Test content 1",
848+
"Test content 2");
849+
850+
assertThat(results.stream().map(doc -> doc.getMetadata().get("key1")).toList()).contains("value1");
851+
assertThat(results.stream().map(doc -> doc.getMetadata().get("key2")).toList()).contains("value2");
852+
853+
// Clean up
854+
testVectorStore.delete(testDocuments.stream().map(Document::getId).toList());
855+
});
856+
}
857+
858+
@Test
859+
void testManageDocumentIdsFalseForAWSOpenSearchServerless() {
860+
getContextRunner().run(context -> {
861+
OpenSearchVectorStore vectorStore = context.getBean("vectorStore", OpenSearchVectorStore.class);
862+
863+
// Create vector store with manageDocumentIds=false (AWS OpenSearch Serverless
864+
// mode)
865+
OpenSearchVectorStore awsCompatibleVectorStore = OpenSearchVectorStore
866+
.builder((OpenSearchClient) vectorStore.getNativeClient().orElseThrow(),
867+
context.getBean(EmbeddingModel.class))
868+
.manageDocumentIds(false)
869+
.index("test_aws_serverless_compatible")
870+
.initializeSchema(true)
871+
.build();
872+
873+
// Test documents with IDs (these should be ignored when
874+
// manageDocumentIds=false)
875+
List<Document> testDocuments = List.of(
876+
new Document("custom-id-1", "AWS Serverless content 1", Map.of("env", "aws-serverless")),
877+
new Document("custom-id-2", "AWS Serverless content 2", Map.of("env", "aws-serverless")));
878+
879+
// Add documents - should work without explicit document ID errors
880+
awsCompatibleVectorStore.add(testDocuments);
881+
882+
// Wait for indexing
883+
Awaitility.await()
884+
.until(() -> awsCompatibleVectorStore
885+
.similaritySearch(SearchRequest.builder().query("AWS Serverless").topK(2).build()), hasSize(2));
886+
887+
// Search and verify results
888+
List<Document> results = awsCompatibleVectorStore
889+
.similaritySearch(SearchRequest.builder().query("AWS Serverless").topK(2).build());
890+
891+
assertThat(results).hasSize(2);
892+
893+
// Verify content is preserved
894+
assertThat(results.stream().map(Document::getText).toList())
895+
.containsExactlyInAnyOrder("AWS Serverless content 1", "AWS Serverless content 2");
896+
897+
// Verify metadata is preserved
898+
assertThat(results.stream().map(doc -> doc.getMetadata().get("env")).toList())
899+
.containsOnly("aws-serverless");
900+
901+
// Clean up
902+
awsCompatibleVectorStore.delete(List.of("_all"));
903+
});
904+
}
905+
906+
@Test
907+
void testManageDocumentIdsTrueWithExplicitIds() {
908+
getContextRunner().run(context -> {
909+
OpenSearchVectorStore vectorStore = context.getBean("vectorStore", OpenSearchVectorStore.class);
910+
911+
// Create vector store with manageDocumentIds=true (default behavior)
912+
OpenSearchVectorStore explicitIdVectorStore = OpenSearchVectorStore
913+
.builder((OpenSearchClient) vectorStore.getNativeClient().orElseThrow(),
914+
context.getBean(EmbeddingModel.class))
915+
.manageDocumentIds(true)
916+
.index("test_explicit_ids")
917+
.initializeSchema(true)
918+
.build();
919+
920+
// Test documents with specific IDs
921+
List<Document> testDocuments = List.of(
922+
new Document("explicit-id-1", "Explicit ID content 1", Map.of("type", "explicit")),
923+
new Document("explicit-id-2", "Explicit ID content 2", Map.of("type", "explicit")));
924+
925+
// Add documents
926+
explicitIdVectorStore.add(testDocuments);
927+
928+
// Wait for indexing
929+
Awaitility.await()
930+
.until(() -> explicitIdVectorStore
931+
.similaritySearch(SearchRequest.builder().query("Explicit ID").topK(2).build()), hasSize(2));
932+
933+
// Search and verify results
934+
List<Document> results = explicitIdVectorStore
935+
.similaritySearch(SearchRequest.builder().query("Explicit ID").topK(2).build());
936+
937+
assertThat(results).hasSize(2);
938+
939+
// Verify document IDs are preserved
940+
assertThat(results.stream().map(Document::getId).toList()).containsExactlyInAnyOrder("explicit-id-1",
941+
"explicit-id-2");
942+
943+
// Verify content and metadata
944+
assertThat(results.stream().map(Document::getText).toList())
945+
.containsExactlyInAnyOrder("Explicit ID content 1", "Explicit ID content 2");
946+
947+
assertThat(results.stream().map(doc -> doc.getMetadata().get("type")).toList()).containsOnly("explicit");
948+
949+
// Test deletion by specific IDs
950+
explicitIdVectorStore.delete(List.of("explicit-id-1"));
951+
952+
Awaitility.await()
953+
.until(() -> explicitIdVectorStore
954+
.similaritySearch(SearchRequest.builder().query("Explicit ID").topK(2).build()), hasSize(1));
955+
956+
// Verify only one document remains
957+
results = explicitIdVectorStore
958+
.similaritySearch(SearchRequest.builder().query("Explicit ID").topK(2).build());
959+
960+
assertThat(results).hasSize(1);
961+
assertThat(results.get(0).getId()).isEqualTo("explicit-id-2");
962+
963+
// Clean up
964+
explicitIdVectorStore.delete(List.of("explicit-id-2"));
965+
});
966+
}
967+
813968
@SpringBootConfiguration
814969
@EnableAutoConfiguration(exclude = DataSourceAutoConfiguration.class)
815970
public static class TestApplication {

0 commit comments

Comments
 (0)