Skip to content
Merged
5 changes: 5 additions & 0 deletions .changeset/empty-ligers-live.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
"@langchain/community": patch
---

add elasticsearch hybrid search
Original file line number Diff line number Diff line change
@@ -0,0 +1,125 @@
import { Client, ClientOptions } from "@elastic/elasticsearch";
import { OpenAIEmbeddings } from "@langchain/openai";
import {
ElasticClientArgs,
ElasticVectorSearch,
HybridRetrievalStrategy,
} from "@langchain/community/vectorstores/elasticsearch";
import { Document } from "@langchain/core/documents";

/**
* Demonstrates hybrid search with Elasticsearch, combining:
* - Vector (semantic) search using embeddings
* - BM25 (lexical) full-text search
* - Reciprocal Rank Fusion (RRF) for result merging
*
* Requirements:
* - Elasticsearch 8.9+ (for RRF support)
* - Run: docker-compose up -d --build (in elasticsearch directory)
* - Set ELASTIC_URL, ELASTIC_API_KEY (or ELASTIC_USERNAME/ELASTIC_PASSWORD)
*/
export async function run() {
const config: ClientOptions = {
node: process.env.ELASTIC_URL ?? "http://127.0.0.1:9200",
};
if (process.env.ELASTIC_API_KEY) {
config.auth = {
apiKey: process.env.ELASTIC_API_KEY,
};
} else if (process.env.ELASTIC_USERNAME && process.env.ELASTIC_PASSWORD) {
config.auth = {
username: process.env.ELASTIC_USERNAME,
password: process.env.ELASTIC_PASSWORD,
};
}

const embeddings = new OpenAIEmbeddings();

const clientArgs: ElasticClientArgs = {
client: new Client(config),
indexName: process.env.ELASTIC_INDEX ?? "test_hybrid_search",
strategy: new HybridRetrievalStrategy({
rankWindowSize: 100,
rankConstant: 60,
textField: "text",
}),
};

const vectorStore = new ElasticVectorSearch(embeddings, clientArgs);

await vectorStore.deleteIfExists();

// Add sample documents
const docs = [
new Document({
pageContent:
"Running helps build cardiovascular endurance and strengthens leg muscles.",
metadata: { category: "fitness", topic: "running" },
}),
new Document({
pageContent:
"Marathon training requires consistent mileage and proper recovery.",
metadata: { category: "fitness", topic: "running" },
}),
new Document({
pageContent:
"Muscle soreness after exercise is caused by microscopic damage to muscle fibers.",
metadata: { category: "health", topic: "recovery" },
}),
new Document({
pageContent:
"Stretching and foam rolling can help prevent post-workout muscle pain.",
metadata: { category: "health", topic: "recovery" },
}),
new Document({
pageContent:
"Python is a popular programming language for data science and machine learning.",
metadata: { category: "technology", topic: "programming" },
}),
];

console.log("Adding documents to Elasticsearch...");
await vectorStore.addDocuments(docs);
console.log("Documents added successfully!\n");

// Example 1: Hybrid search combines semantic + keyword matching
console.log("=== Example 1: Hybrid Search ===");
const query1 = "How to avoid muscle soreness while running?";
console.log(`Query: "${query1}"\n`);

const results1 = await vectorStore.similaritySearchWithScore(query1, 3);
results1.forEach(([doc, score], i) => {
console.log(`${i + 1}. [Score: ${score.toFixed(4)}] ${doc.pageContent}`);
console.log(` Metadata: ${JSON.stringify(doc.metadata)}\n`);
});

// Example 2: Semantic search works well for conceptual queries
console.log("\n=== Example 2: Semantic Query ===");
const query2 = "tips for preventing pain after workouts";
console.log(`Query: "${query2}"\n`);

const results2 = await vectorStore.similaritySearchWithScore(query2, 2);
results2.forEach(([doc, score], i) => {
console.log(`${i + 1}. [Score: ${score.toFixed(4)}] ${doc.pageContent}`);
console.log(` Metadata: ${JSON.stringify(doc.metadata)}\n`);
});

// Example 3: With metadata filters
console.log("\n=== Example 3: Hybrid Search with Filters ===");
const query3 = "fitness advice";
console.log(`Query: "${query3}"`);
console.log(`Filter: category = "fitness"\n`);

const results3 = await vectorStore.similaritySearchWithScore(query3, 3, {
category: "fitness",
});
results3.forEach(([doc, score], i) => {
console.log(`${i + 1}. [Score: ${score.toFixed(4)}] ${doc.pageContent}`);
console.log(` Metadata: ${JSON.stringify(doc.metadata)}\n`);
});

// Clean up
console.log("\n=== Cleanup ===");
await vectorStore.deleteIfExists();
console.log("Index deleted.");
}
130 changes: 125 additions & 5 deletions libs/langchain-community/src/vectorstores/elasticsearch.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ import { Client, estypes } from "@elastic/elasticsearch";
import type { EmbeddingsInterface } from "@langchain/core/embeddings";
import { VectorStore } from "@langchain/core/vectorstores";
import { Document } from "@langchain/core/documents";
import type { Callbacks } from "@langchain/core/callbacks/manager";
/**
* Type representing the k-nearest neighbors (k-NN) engine used in
* Elasticsearch.
Expand All @@ -24,6 +25,30 @@ interface VectorSearchOptions {
readonly candidates?: number;
}

/**
* Configuration options for hybrid retrieval strategy.
*/
export interface HybridRetrievalStrategyConfig {
rankWindowSize?: number;
rankConstant?: number;
textField?: string;
}

/**
* Hybrid search strategy combining vector and BM25 search using RRF.
*/
export class HybridRetrievalStrategy {
public readonly rankWindowSize: number;
public readonly rankConstant: number;
public readonly textField: string;

constructor(config: HybridRetrievalStrategyConfig = {}) {
this.rankWindowSize = config.rankWindowSize ?? 100;
this.rankConstant = config.rankConstant ?? 60;
this.textField = config.textField ?? "text";
}
}

/**
* Interface defining the arguments required to create an Elasticsearch
* client.
Expand All @@ -32,6 +57,7 @@ export interface ElasticClientArgs {
readonly client: Client;
readonly indexName?: string;
readonly vectorSearchOptions?: VectorSearchOptions;
readonly strategy?: HybridRetrievalStrategy;
}

/**
Expand All @@ -51,10 +77,23 @@ type ElasticMetadataTerms = {
};

/**
* Class for interacting with an Elasticsearch database. It extends the
* VectorStore base class and provides methods for adding documents and
* vectors to the Elasticsearch database, performing similarity searches,
* deleting documents, and more.
* Elasticsearch vector store supporting vector and hybrid search.
*
* Hybrid search combines kNN vector search with BM25 full-text search
* using RRF. Enable by passing a `HybridRetrievalStrategy` to the constructor.
*
* @example
* ```typescript
* // Vector search (default)
* const vectorStore = new ElasticVectorSearch(embeddings, { client, indexName });
*
* // Hybrid search
* const hybridStore = new ElasticVectorSearch(embeddings, {
* client,
* indexName,
* strategy: new HybridRetrievalStrategy()
* });
* ```
*/
export class ElasticVectorSearch extends VectorStore {
declare FilterType: ElasticFilter;
Expand All @@ -73,6 +112,10 @@ export class ElasticVectorSearch extends VectorStore {

private readonly candidates: number;

private readonly strategy?: HybridRetrievalStrategy;

private lastQueryText?: string;

_vectorstoreType(): string {
return "elasticsearch";
}
Expand All @@ -85,9 +128,14 @@ export class ElasticVectorSearch extends VectorStore {
this.m = args.vectorSearchOptions?.m ?? 16;
this.efConstruction = args.vectorSearchOptions?.efConstruction ?? 100;
this.candidates = args.vectorSearchOptions?.candidates ?? 200;
this.strategy = args.strategy;

const userAgent = this.strategy
? "langchain-js-vs-hybrid/0.0.1"
: "langchain-js-vs/0.0.1";

this.client = args.client.child({
headers: { "user-agent": "langchain-js-vs/0.0.1" },
headers: { "user-agent": userAgent },
});
this.indexName = args.indexName ?? "documents";
}
Expand Down Expand Up @@ -155,6 +203,16 @@ export class ElasticVectorSearch extends VectorStore {
return documentIds;
}

async similaritySearch(
query: string,
k = 4,
filter?: ElasticFilter,
_callbacks?: Callbacks
): Promise<Document[]> {
this.lastQueryText = query;
return super.similaritySearch(query, k, filter, _callbacks);
}

/**
* Method to perform a similarity search in the Elasticsearch database
* using a vector. It returns the k most similar documents along with
Expand All @@ -169,6 +227,15 @@ export class ElasticVectorSearch extends VectorStore {
k: number,
filter?: ElasticFilter
): Promise<[Document, number][]> {
if (this.strategy && this.lastQueryText) {
return this.hybridSearchVectorWithScore(
this.lastQueryText,
query,
k,
filter
);
}

const result = await this.client.search({
index: this.indexName,
size: k,
Expand All @@ -191,6 +258,59 @@ export class ElasticVectorSearch extends VectorStore {
]);
}

private async hybridSearchVectorWithScore(
queryText: string,
queryVector: number[],
k: number,
filter?: ElasticFilter
): Promise<[Document, number][]> {
const metadataTerms = this.buildMetadataTerms(filter);
const filterClauses =
metadataTerms.must.length > 0 || metadataTerms.must_not.length > 0
? { bool: metadataTerms }
: undefined;

const result = await this.client.search({
index: this.indexName,
size: k,
retriever: {
rrf: {
retrievers: [
{
standard: {
query: {
match: {
[this.strategy!.textField]: queryText,
},
},
},
},
{
knn: {
field: "embedding",
query_vector: queryVector,
k,
num_candidates: this.candidates,
},
},
],
rank_window_size: this.strategy!.rankWindowSize,
rank_constant: this.strategy!.rankConstant,
},
},
...(filterClauses && { query: filterClauses }),
});

// eslint-disable-next-line @typescript-eslint/no-explicit-any
return result.hits.hits.map((hit: any) => [
new Document({
pageContent: hit._source.text,
metadata: hit._source.metadata,
}),
hit._score,
]);
}

/**
* Method to delete documents from the Elasticsearch database.
* @param params Object containing the IDs of the documents to delete.
Expand Down
Loading