From b97013bb1871225bd50f8e8a73e2810d24d5235d Mon Sep 17 00:00:00 2001 From: Ahmed EBEN HASSINE Date: Wed, 15 Oct 2025 13:48:07 +0200 Subject: [PATCH 1/5] Add PostgresHybridStore for hybrid search with RRF Combines pgvector semantic search with PostgreSQL Full-Text Search using Reciprocal Rank Fusion (RRF), following Supabase approach. Features: - Configurable semantic/keyword ratio (0.0 to 1.0) - RRF fusion with customizable k parameter - Multilingual FTS support (default: 'simple') - Optional relevance filtering with defaultMaxScore - All pgvector distance metrics supported --- .../Bridge/Postgres/PostgresHybridStore.php | 348 ++++++++++++ .../Postgres/PostgresHybridStoreTest.php | 499 ++++++++++++++++++ 2 files changed, 847 insertions(+) create mode 100644 src/store/src/Bridge/Postgres/PostgresHybridStore.php create mode 100644 src/store/tests/Bridge/Postgres/PostgresHybridStoreTest.php diff --git a/src/store/src/Bridge/Postgres/PostgresHybridStore.php b/src/store/src/Bridge/Postgres/PostgresHybridStore.php new file mode 100644 index 000000000..1886e9683 --- /dev/null +++ b/src/store/src/Bridge/Postgres/PostgresHybridStore.php @@ -0,0 +1,348 @@ + + * + * For the full copyright and license information, please view the LICENSE + * file that was distributed with this source code. + */ + +namespace Symfony\AI\Store\Bridge\Postgres; + +use Symfony\AI\Platform\Vector\Vector; +use Symfony\AI\Platform\Vector\VectorInterface; +use Symfony\AI\Store\Document\Metadata; +use Symfony\AI\Store\Document\VectorDocument; +use Symfony\AI\Store\Exception\InvalidArgumentException; +use Symfony\AI\Store\ManagedStoreInterface; +use Symfony\AI\Store\StoreInterface; +use Symfony\Component\Uid\Uuid; + +/** + * Hybrid Search Store for PostgreSQL/Supabase + * Combines pgvector (semantic) + PostgreSQL Full-Text Search (ts_rank_cd) using RRF. + * + * Uses Reciprocal Rank Fusion (RRF) to combine vector similarity and full-text search, + * following the same approach as Supabase hybrid search implementation. + * + * Requirements: + * - PostgreSQL with pgvector extension + * - A 'content' text field for full-text search + * + * @see https://supabase.com/docs/guides/ai/hybrid-search + * + * @author Ahmed EBEN HASSINE + */ +final readonly class PostgresHybridStore implements ManagedStoreInterface, StoreInterface +{ + /** + * @param string $vectorFieldName Name of the vector field + * @param string $contentFieldName Name of the text field for FTS + * @param float $semanticRatio Ratio between semantic (vector) and keyword (FTS) search (0.0 to 1.0) + * - 0.0 = 100% keyword search (FTS) + * - 0.5 = balanced hybrid search + * - 1.0 = 100% semantic search (vector only) - default + * @param Distance $distance Distance metric for vector similarity + * @param string $language PostgreSQL text search configuration (default: 'simple') + * - 'simple': Works for ALL languages, no stemming (recommended for multilingual content) + * - 'english', 'french', 'spanish', etc.: Language-specific stemming/stopwords + * @param int $rrfK RRF (Reciprocal Rank Fusion) constant for hybrid search (default: 60) + * Higher values = more equal weighting between results + * @param float|null $defaultMaxScore Default maximum distance threshold for vector search (default: null = no filter) + * Only applies to pure vector search (semanticRatio = 1.0) + * Prevents returning irrelevant results with high distance scores + * Example: 0.8 means only return documents with distance < 0.8 + */ + public function __construct( + private \PDO $connection, + private string $tableName, + private string $vectorFieldName = 'embedding', + private string $contentFieldName = 'content', + private float $semanticRatio = 1.0, + private Distance $distance = Distance::L2, + private string $language = 'simple', + private int $rrfK = 60, + private ?float $defaultMaxScore = null, + ) { + if ($semanticRatio < 0.0 || $semanticRatio > 1.0) { + throw new InvalidArgumentException(\sprintf('The semantic ratio must be between 0.0 and 1.0, "%s" given.', $semanticRatio)); + } + } + + public function setup(array $options = []): void + { + // Enable pgvector extension + $this->connection->exec('CREATE EXTENSION IF NOT EXISTS vector'); + + // Create table with vector field, content field for FTS, and tsvector field + $this->connection->exec( + \sprintf( + 'CREATE TABLE IF NOT EXISTS %s ( + id UUID PRIMARY KEY, + metadata JSONB, + %s TEXT NOT NULL, + %s %s(%d) NOT NULL, + content_tsv tsvector GENERATED ALWAYS AS (to_tsvector(\'%s\', %s)) STORED + )', + $this->tableName, + $this->contentFieldName, + $this->vectorFieldName, + $options['vector_type'] ?? 'vector', + $options['vector_size'] ?? 1536, + $this->language, + $this->contentFieldName, + ), + ); + + // Create vector index + $this->connection->exec( + \sprintf( + 'CREATE INDEX IF NOT EXISTS %s_%s_idx ON %s USING %s (%s %s)', + $this->tableName, + $this->vectorFieldName, + $this->tableName, + $options['index_method'] ?? 'ivfflat', + $this->vectorFieldName, + $options['index_opclass'] ?? 'vector_cosine_ops', + ), + ); + + // Create GIN index for full-text search + $this->connection->exec( + \sprintf( + 'CREATE INDEX IF NOT EXISTS %s_content_tsv_idx ON %s USING gin(content_tsv)', + $this->tableName, + $this->tableName, + ), + ); + } + + public function drop(): void + { + $this->connection->exec(\sprintf('DROP TABLE IF EXISTS %s', $this->tableName)); + } + + public function add(VectorDocument ...$documents): void + { + $statement = $this->connection->prepare( + \sprintf( + 'INSERT INTO %1$s (id, metadata, %2$s, %3$s) + VALUES (:id, :metadata, :content, :vector) + ON CONFLICT (id) DO UPDATE SET + metadata = EXCLUDED.metadata, + %2$s = EXCLUDED.%2$s, + %3$s = EXCLUDED.%3$s', + $this->tableName, + $this->contentFieldName, + $this->vectorFieldName, + ), + ); + + foreach ($documents as $document) { + $operation = [ + 'id' => $document->id->toRfc4122(), + 'metadata' => json_encode($document->metadata->getArrayCopy(), \JSON_THROW_ON_ERROR), + 'content' => $document->metadata->getText() ?? '', + 'vector' => $this->toPgvector($document->vector), + ]; + + $statement->execute($operation); + } + } + + /** + * Hybrid search combining vector similarity and full-text search. + * + * @param array{ + * q?: string, + * semanticRatio?: float, + * limit?: int, + * where?: string, + * params?: array, + * maxScore?: float + * } $options + */ + public function query(Vector $vector, array $options = []): array + { + $semanticRatio = $options['semanticRatio'] ?? $this->semanticRatio; + + if ($semanticRatio < 0.0 || $semanticRatio > 1.0) { + throw new InvalidArgumentException(\sprintf('The semantic ratio must be between 0.0 and 1.0, "%s" given.', $semanticRatio)); + } + + $queryText = $options['q'] ?? ''; + $limit = $options['limit'] ?? 5; + + // Build WHERE clause + $where = []; + $params = []; + + // Only add embedding param if we're doing vector search + if ($semanticRatio > 0.0) { + $params['embedding'] = $this->toPgvector($vector); + } + + // Use maxScore from options, or defaultMaxScore if configured + $maxScore = $options['maxScore'] ?? $this->defaultMaxScore; + + if (null !== $maxScore) { + $where[] = "({$this->vectorFieldName} {$this->distance->getComparisonSign()} :embedding) <= :maxScore"; + $params['maxScore'] = $maxScore; + // Ensure embedding is available if maxScore is used + if (!isset($params['embedding'])) { + $params['embedding'] = $this->toPgvector($vector); + } + } + + if ($options['where'] ?? false) { + $where[] = '('.$options['where'].')'; + } + + $whereClause = $where ? 'WHERE '.implode(' AND ', $where) : ''; + + // Choose query strategy based on semanticRatio and query text + if (1.0 === $semanticRatio || empty($queryText)) { + // Pure vector search + $sql = $this->buildVectorOnlyQuery($whereClause, $limit); + } elseif (0.0 === $semanticRatio) { + // Pure full-text search + $sql = $this->buildFtsOnlyQuery($whereClause, $limit); + $params['query'] = $queryText; + } else { + // Hybrid search with weighted combination + $sql = $this->buildHybridQuery($whereClause, $limit, $semanticRatio); + $params['query'] = $queryText; + } + + $statement = $this->connection->prepare($sql); + $statement->execute([...$params, ...($options['params'] ?? [])]); + + $documents = []; + foreach ($statement->fetchAll(\PDO::FETCH_ASSOC) as $result) { + $documents[] = new VectorDocument( + id: Uuid::fromString($result['id']), + vector: new Vector($this->fromPgvector($result['embedding'])), + metadata: new Metadata(json_decode($result['metadata'] ?? '{}', true, 512, \JSON_THROW_ON_ERROR)), + score: $result['score'], + ); + } + + return $documents; + } + + private function buildVectorOnlyQuery(string $whereClause, int $limit): string + { + return \sprintf(<<vectorFieldName, + $this->vectorFieldName, + $this->distance->getComparisonSign(), + $this->tableName, + $whereClause, + $limit, + ); + } + + private function buildFtsOnlyQuery(string $whereClause, int $limit): string + { + // Add FTS match filter to ensure only relevant documents are returned + $ftsFilter = \sprintf("content_tsv @@ websearch_to_tsquery('%s', :query)", $this->language); + + if ($whereClause) { + // Combine existing WHERE clause with FTS filter + $whereClause = str_replace('WHERE ', "WHERE $ftsFilter AND ", $whereClause); + } else { + $whereClause = "WHERE $ftsFilter"; + } + + return \sprintf(<<vectorFieldName, + $this->language, + $this->tableName, + $whereClause, + $limit, + ); + } + + private function buildHybridQuery(string $whereClause, int $limit, float $semanticRatio): string + { + // Add FTS filter for the fts_scores CTE + $ftsWhereClause = $whereClause; + $ftsFilter = \sprintf("content_tsv @@ websearch_to_tsquery('%s', :query)", $this->language); + + if ($whereClause) { + $ftsWhereClause = str_replace('WHERE ', "WHERE $ftsFilter AND ", $whereClause); + } else { + $ftsWhereClause = "WHERE $ftsFilter"; + } + + // RRF (Reciprocal Rank Fusion) - Same approach as Supabase + // Formula: COALESCE(1.0 / (k + rank), 0.0) * weight + // Lower score is better (like distance) + return \sprintf(<<vectorFieldName, + $this->vectorFieldName, + $this->distance->getComparisonSign(), + $this->tableName, + $whereClause, + $this->language, + $this->tableName, + $ftsWhereClause, + $this->rrfK, + $semanticRatio, + $this->rrfK, + 1.0 - $semanticRatio, + $limit, + ); + } + + private function toPgvector(VectorInterface $vector): string + { + return '['.implode(',', $vector->getData()).']'; + } + + /** + * @return float[] + */ + private function fromPgvector(string $vector): array + { + return json_decode($vector, true, 512, \JSON_THROW_ON_ERROR); + } +} diff --git a/src/store/tests/Bridge/Postgres/PostgresHybridStoreTest.php b/src/store/tests/Bridge/Postgres/PostgresHybridStoreTest.php new file mode 100644 index 000000000..75bde3818 --- /dev/null +++ b/src/store/tests/Bridge/Postgres/PostgresHybridStoreTest.php @@ -0,0 +1,499 @@ + + * + * For the full copyright and license information, please view the LICENSE + * file that was distributed with this source code. + */ + +namespace Symfony\AI\Store\Tests\Bridge\Postgres; + +use PHPUnit\Framework\TestCase; +use Symfony\AI\Platform\Vector\Vector; +use Symfony\AI\Store\Bridge\Postgres\PostgresHybridStore; +use Symfony\AI\Store\Document\Metadata; +use Symfony\AI\Store\Document\VectorDocument; +use Symfony\AI\Store\Exception\InvalidArgumentException; +use Symfony\Component\Uid\Uuid; + +final class PostgresHybridStoreTest extends TestCase +{ + public function testConstructorValidatesSemanticRatio() + { + $this->expectException(InvalidArgumentException::class); + $this->expectExceptionMessage('The semantic ratio must be between 0.0 and 1.0'); + + $pdo = $this->createMock(\PDO::class); + new PostgresHybridStore($pdo, 'test_table', semanticRatio: 1.5); + } + + public function testConstructorValidatesNegativeSemanticRatio() + { + $this->expectException(InvalidArgumentException::class); + $this->expectExceptionMessage('The semantic ratio must be between 0.0 and 1.0'); + + $pdo = $this->createMock(\PDO::class); + new PostgresHybridStore($pdo, 'test_table', semanticRatio: -0.5); + } + + public function testSetupCreatesTableWithFullTextSearchSupport() + { + $pdo = $this->createMock(\PDO::class); + $store = new PostgresHybridStore($pdo, 'hybrid_table'); + + $pdo->expects($this->exactly(4)) + ->method('exec') + ->willReturnCallback(function (string $sql): int { + static $callCount = 0; + ++$callCount; + + if (1 === $callCount) { + $this->assertSame('CREATE EXTENSION IF NOT EXISTS vector', $sql); + } elseif (2 === $callCount) { + $this->assertStringContainsString('CREATE TABLE IF NOT EXISTS hybrid_table', $sql); + $this->assertStringContainsString('content TEXT NOT NULL', $sql); + $this->assertStringContainsString('embedding vector(1536) NOT NULL', $sql); + $this->assertStringContainsString('content_tsv tsvector GENERATED ALWAYS AS (to_tsvector(\'simple\', content)) STORED', $sql); + } elseif (3 === $callCount) { + $this->assertStringContainsString('CREATE INDEX IF NOT EXISTS hybrid_table_embedding_idx', $sql); + } else { + $this->assertStringContainsString('CREATE INDEX IF NOT EXISTS hybrid_table_content_tsv_idx', $sql); + $this->assertStringContainsString('USING gin(content_tsv)', $sql); + } + + return 0; + }); + + $store->setup(); + } + + public function testAddDocument() + { + $pdo = $this->createMock(\PDO::class); + $statement = $this->createMock(\PDOStatement::class); + + $store = new PostgresHybridStore($pdo, 'hybrid_table'); + + $expectedSql = 'INSERT INTO hybrid_table (id, metadata, content, embedding) + VALUES (:id, :metadata, :content, :vector) + ON CONFLICT (id) DO UPDATE SET + metadata = EXCLUDED.metadata, + content = EXCLUDED.content, + embedding = EXCLUDED.embedding'; + + $pdo->expects($this->once()) + ->method('prepare') + ->with($this->callback(function ($sql) use ($expectedSql) { + return $this->normalizeQuery($sql) === $this->normalizeQuery($expectedSql); + })) + ->willReturn($statement); + + $uuid = Uuid::v4(); + + $statement->expects($this->once()) + ->method('execute') + ->with([ + 'id' => $uuid->toRfc4122(), + 'metadata' => json_encode(['_text' => 'Test content', 'category' => 'test']), + 'content' => 'Test content', + 'vector' => '[0.1,0.2,0.3]', + ]); + + $metadata = new Metadata(['_text' => 'Test content', 'category' => 'test']); + $document = new VectorDocument($uuid, new Vector([0.1, 0.2, 0.3]), $metadata); + $store->add($document); + } + + public function testPureVectorSearch() + { + $pdo = $this->createMock(\PDO::class); + $statement = $this->createMock(\PDOStatement::class); + + $store = new PostgresHybridStore($pdo, 'hybrid_table', semanticRatio: 1.0); + + $expectedSql = 'SELECT id, embedding AS embedding, metadata, (embedding <-> :embedding) AS score + FROM hybrid_table + + ORDER BY score ASC + LIMIT 5'; + + $pdo->expects($this->once()) + ->method('prepare') + ->with($this->callback(function ($sql) use ($expectedSql) { + return $this->normalizeQuery($sql) === $this->normalizeQuery($expectedSql); + })) + ->willReturn($statement); + + $statement->expects($this->once()) + ->method('execute') + ->with(['embedding' => '[0.1,0.2,0.3]']); + + $uuid = Uuid::v4(); + + $statement->expects($this->once()) + ->method('fetchAll') + ->with(\PDO::FETCH_ASSOC) + ->willReturn([ + [ + 'id' => $uuid->toRfc4122(), + 'embedding' => '[0.1,0.2,0.3]', + 'metadata' => json_encode(['text' => 'Test Document']), + 'score' => 0.05, + ], + ]); + + $results = $store->query(new Vector([0.1, 0.2, 0.3])); + + $this->assertCount(1, $results); + $this->assertInstanceOf(VectorDocument::class, $results[0]); + $this->assertSame(0.05, $results[0]->score); + } + + public function testPureKeywordSearch() + { + $pdo = $this->createMock(\PDO::class); + $statement = $this->createMock(\PDOStatement::class); + + $store = new PostgresHybridStore($pdo, 'hybrid_table', semanticRatio: 0.0); + + $expectedSql = "SELECT id, embedding AS embedding, metadata, + (1.0 / (1.0 + ts_rank_cd(content_tsv, websearch_to_tsquery('simple', :query)))) AS score + FROM hybrid_table + WHERE content_tsv @@ websearch_to_tsquery('simple', :query) + ORDER BY score ASC + LIMIT 5"; + + $pdo->expects($this->once()) + ->method('prepare') + ->with($this->callback(function ($sql) use ($expectedSql) { + return $this->normalizeQuery($sql) === $this->normalizeQuery($expectedSql); + })) + ->willReturn($statement); + + $statement->expects($this->once()) + ->method('execute') + ->with($this->callback(function ($params) { + return isset($params['query']) && 'PostgreSQL' === $params['query']; + })); + + $uuid = Uuid::v4(); + + $statement->expects($this->once()) + ->method('fetchAll') + ->with(\PDO::FETCH_ASSOC) + ->willReturn([ + [ + 'id' => $uuid->toRfc4122(), + 'embedding' => '[0.1,0.2,0.3]', + 'metadata' => json_encode(['text' => 'PostgreSQL is awesome']), + 'score' => 0.5, + ], + ]); + + $results = $store->query(new Vector([0.1, 0.2, 0.3]), ['q' => 'PostgreSQL']); + + $this->assertCount(1, $results); + $this->assertSame(0.5, $results[0]->score); + } + + public function testHybridSearchWithRRF() + { + $pdo = $this->createMock(\PDO::class); + $statement = $this->createMock(\PDOStatement::class); + + $store = new PostgresHybridStore($pdo, 'hybrid_table', semanticRatio: 0.5, rrfK: 60); + + $pdo->expects($this->once()) + ->method('prepare') + ->with($this->callback(function ($sql) { + // Check for RRF CTE structure + $this->assertStringContainsString('WITH vector_scores AS', $sql); + $this->assertStringContainsString('fts_scores AS', $sql); + $this->assertStringContainsString('ROW_NUMBER() OVER', $sql); + $this->assertStringContainsString('COALESCE(1.0 / (60 + v.rank_ix), 0.0)', $sql); + $this->assertStringContainsString('FULL OUTER JOIN', $sql); + $this->assertStringContainsString('ORDER BY score DESC', $sql); + + return true; + })) + ->willReturn($statement); + + $statement->expects($this->once()) + ->method('execute') + ->with($this->callback(function ($params) { + return isset($params['embedding']) && isset($params['query']); + })); + + $uuid = Uuid::v4(); + + $statement->expects($this->once()) + ->method('fetchAll') + ->with(\PDO::FETCH_ASSOC) + ->willReturn([ + [ + 'id' => $uuid->toRfc4122(), + 'embedding' => '[0.1,0.2,0.3]', + 'metadata' => json_encode(['text' => 'PostgreSQL database']), + 'score' => 0.025, + ], + ]); + + $results = $store->query(new Vector([0.1, 0.2, 0.3]), ['q' => 'PostgreSQL', 'semanticRatio' => 0.5]); + + $this->assertCount(1, $results); + $this->assertSame(0.025, $results[0]->score); + } + + public function testQueryWithDefaultMaxScore() + { + $pdo = $this->createMock(\PDO::class); + $statement = $this->createMock(\PDOStatement::class); + + $store = new PostgresHybridStore( + $pdo, + 'hybrid_table', + semanticRatio: 1.0, + defaultMaxScore: 0.8 + ); + + $pdo->expects($this->once()) + ->method('prepare') + ->with($this->callback(function ($sql) { + $this->assertStringContainsString('WHERE (embedding <-> :embedding) <= :maxScore', $sql); + + return true; + })) + ->willReturn($statement); + + $statement->expects($this->once()) + ->method('execute') + ->with($this->callback(function ($params) { + return isset($params['embedding']) + && isset($params['maxScore']) + && 0.8 === $params['maxScore']; + })); + + $statement->expects($this->once()) + ->method('fetchAll') + ->with(\PDO::FETCH_ASSOC) + ->willReturn([]); + + $results = $store->query(new Vector([0.1, 0.2, 0.3])); + + $this->assertCount(0, $results); + } + + public function testQueryWithMaxScoreOverride() + { + $pdo = $this->createMock(\PDO::class); + $statement = $this->createMock(\PDOStatement::class); + + $store = new PostgresHybridStore( + $pdo, + 'hybrid_table', + semanticRatio: 1.0, + defaultMaxScore: 0.8 + ); + + $pdo->expects($this->once()) + ->method('prepare') + ->willReturn($statement); + + $statement->expects($this->once()) + ->method('execute') + ->with($this->callback(function ($params) { + // Should use override value 0.5, not default 0.8 + return isset($params['maxScore']) && 0.5 === $params['maxScore']; + })); + + $statement->expects($this->once()) + ->method('fetchAll') + ->with(\PDO::FETCH_ASSOC) + ->willReturn([]); + + $results = $store->query(new Vector([0.1, 0.2, 0.3]), ['maxScore' => 0.5]); + + $this->assertCount(0, $results); + } + + public function testQueryWithCustomLanguage() + { + $pdo = $this->createMock(\PDO::class); + $statement = $this->createMock(\PDOStatement::class); + + $store = new PostgresHybridStore($pdo, 'hybrid_table', semanticRatio: 0.0, language: 'french'); + + $pdo->expects($this->once()) + ->method('prepare') + ->with($this->callback(function ($sql) { + $this->assertStringContainsString("websearch_to_tsquery('french'", $sql); + + return true; + })) + ->willReturn($statement); + + $statement->expects($this->once()) + ->method('execute'); + + $statement->expects($this->once()) + ->method('fetchAll') + ->with(\PDO::FETCH_ASSOC) + ->willReturn([]); + + $store->query(new Vector([0.1, 0.2, 0.3]), ['q' => 'développement']); + } + + public function testQueryWithCustomRRFK() + { + $pdo = $this->createMock(\PDO::class); + $statement = $this->createMock(\PDOStatement::class); + + $store = new PostgresHybridStore($pdo, 'hybrid_table', semanticRatio: 0.5, rrfK: 100); + + $pdo->expects($this->once()) + ->method('prepare') + ->with($this->callback(function ($sql) { + $this->assertStringContainsString('COALESCE(1.0 / (100 + v.rank_ix), 0.0)', $sql); + $this->assertStringContainsString('COALESCE(1.0 / (100 + f.rank_ix), 0.0)', $sql); + + return true; + })) + ->willReturn($statement); + + $statement->expects($this->once()) + ->method('execute'); + + $statement->expects($this->once()) + ->method('fetchAll') + ->with(\PDO::FETCH_ASSOC) + ->willReturn([]); + + $store->query(new Vector([0.1, 0.2, 0.3]), ['q' => 'test']); + } + + public function testQueryInvalidSemanticRatioInOptions() + { + $pdo = $this->createMock(\PDO::class); + $store = new PostgresHybridStore($pdo, 'hybrid_table'); + + $this->expectException(InvalidArgumentException::class); + $this->expectExceptionMessage('The semantic ratio must be between 0.0 and 1.0'); + + $store->query(new Vector([0.1, 0.2, 0.3]), ['semanticRatio' => 1.5]); + } + + public function testDrop() + { + $pdo = $this->createMock(\PDO::class); + $store = new PostgresHybridStore($pdo, 'hybrid_table'); + + $pdo->expects($this->once()) + ->method('exec') + ->with('DROP TABLE IF EXISTS hybrid_table'); + + $store->drop(); + } + + public function testQueryWithCustomLimit() + { + $pdo = $this->createMock(\PDO::class); + $statement = $this->createMock(\PDOStatement::class); + + $store = new PostgresHybridStore($pdo, 'hybrid_table', semanticRatio: 1.0); + + $pdo->expects($this->once()) + ->method('prepare') + ->with($this->callback(function ($sql) { + $this->assertStringContainsString('LIMIT 10', $sql); + + return true; + })) + ->willReturn($statement); + + $statement->expects($this->once()) + ->method('execute'); + + $statement->expects($this->once()) + ->method('fetchAll') + ->with(\PDO::FETCH_ASSOC) + ->willReturn([]); + + $store->query(new Vector([0.1, 0.2, 0.3]), ['limit' => 10]); + } + + public function testAddMultipleDocuments() + { + $pdo = $this->createMock(\PDO::class); + $statement = $this->createMock(\PDOStatement::class); + + $store = new PostgresHybridStore($pdo, 'hybrid_table'); + + $pdo->expects($this->once()) + ->method('prepare') + ->willReturn($statement); + + $uuid1 = Uuid::v4(); + $uuid2 = Uuid::v4(); + + $statement->expects($this->exactly(2)) + ->method('execute') + ->willReturnCallback(function (array $params) use ($uuid1, $uuid2): bool { + static $callCount = 0; + ++$callCount; + + if (1 === $callCount) { + $this->assertSame($uuid1->toRfc4122(), $params['id']); + $this->assertSame('First document', $params['content']); + } else { + $this->assertSame($uuid2->toRfc4122(), $params['id']); + $this->assertSame('Second document', $params['content']); + } + + return true; + }); + + $metadata1 = new Metadata(['_text' => 'First document']); + $metadata2 = new Metadata(['_text' => 'Second document']); + + $document1 = new VectorDocument($uuid1, new Vector([0.1, 0.2, 0.3]), $metadata1); + $document2 = new VectorDocument($uuid2, new Vector([0.4, 0.5, 0.6]), $metadata2); + + $store->add($document1, $document2); + } + + public function testPureKeywordSearchReturnsEmptyWhenNoMatch() + { + $pdo = $this->createMock(\PDO::class); + $statement = $this->createMock(\PDOStatement::class); + + $store = new PostgresHybridStore($pdo, 'hybrid_table', semanticRatio: 0.0); + + $pdo->expects($this->once()) + ->method('prepare') + ->willReturn($statement); + + $statement->expects($this->once()) + ->method('execute'); + + $statement->expects($this->once()) + ->method('fetchAll') + ->with(\PDO::FETCH_ASSOC) + ->willReturn([]); + + $results = $store->query(new Vector([0.1, 0.2, 0.3]), ['q' => 'zzzzzzzzzzzzz']); + + $this->assertCount(0, $results); + } + + private function normalizeQuery(string $query): string + { + // Remove extra spaces, tabs and newlines + $normalized = preg_replace('/\s+/', ' ', $query); + + // Trim the result + return trim($normalized); + } +} From 5b43dd19fba9ae6dbbbd19554eac39b84481b9cd Mon Sep 17 00:00:00 2001 From: Ahmed EBEN HASSINE Date: Thu, 16 Oct 2025 09:32:27 +0200 Subject: [PATCH 2/5] fix(style): apply php-cs-fixer and fix phpstan alerts --- src/store/src/Bridge/Postgres/PostgresHybridStore.php | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/store/src/Bridge/Postgres/PostgresHybridStore.php b/src/store/src/Bridge/Postgres/PostgresHybridStore.php index 1886e9683..624cc0e96 100644 --- a/src/store/src/Bridge/Postgres/PostgresHybridStore.php +++ b/src/store/src/Bridge/Postgres/PostgresHybridStore.php @@ -160,7 +160,7 @@ public function add(VectorDocument ...$documents): void * semanticRatio?: float, * limit?: int, * where?: string, - * params?: array, + * params?: array, * maxScore?: float * } $options */ @@ -196,14 +196,14 @@ public function query(Vector $vector, array $options = []): array } } - if ($options['where'] ?? false) { + if (isset($options['where']) && '' !== $options['where']) { $where[] = '('.$options['where'].')'; } $whereClause = $where ? 'WHERE '.implode(' AND ', $where) : ''; // Choose query strategy based on semanticRatio and query text - if (1.0 === $semanticRatio || empty($queryText)) { + if (1.0 === $semanticRatio || '' === $queryText) { // Pure vector search $sql = $this->buildVectorOnlyQuery($whereClause, $limit); } elseif (0.0 === $semanticRatio) { @@ -255,7 +255,7 @@ private function buildFtsOnlyQuery(string $whereClause, int $limit): string // Add FTS match filter to ensure only relevant documents are returned $ftsFilter = \sprintf("content_tsv @@ websearch_to_tsquery('%s', :query)", $this->language); - if ($whereClause) { + if ('' !== $whereClause) { // Combine existing WHERE clause with FTS filter $whereClause = str_replace('WHERE ', "WHERE $ftsFilter AND ", $whereClause); } else { @@ -284,7 +284,7 @@ private function buildHybridQuery(string $whereClause, int $limit, float $semant $ftsWhereClause = $whereClause; $ftsFilter = \sprintf("content_tsv @@ websearch_to_tsquery('%s', :query)", $this->language); - if ($whereClause) { + if ('' !== $whereClause) { $ftsWhereClause = str_replace('WHERE ', "WHERE $ftsFilter AND ", $whereClause); } else { $ftsWhereClause = "WHERE $ftsFilter"; From e33b2c20973769abdcf722d33f653ea37defff3f Mon Sep 17 00:00:00 2001 From: Ahmed EBEN HASSINE Date: Thu, 30 Oct 2025 15:23:46 +0100 Subject: [PATCH 3/5] refactor(store): centralize WHERE clause building in PostgresHybridStore - Extract WHERE clause logic into addFilterToWhereClause() helper method - Fix embedding param logic: ensure it's set before maxScore uses it - Replace fragile str_replace() with robust str_starts_with() approach - Remove code duplication between buildFtsOnlyQuery and buildHybridQuery This addresses review feedback about fragile WHERE clause manipulation and centralizes the logic in a single, reusable method. --- .../Bridge/Postgres/PostgresHybridStore.php | 56 +++++++++++-------- 1 file changed, 32 insertions(+), 24 deletions(-) diff --git a/src/store/src/Bridge/Postgres/PostgresHybridStore.php b/src/store/src/Bridge/Postgres/PostgresHybridStore.php index 624cc0e96..a7e1e503c 100644 --- a/src/store/src/Bridge/Postgres/PostgresHybridStore.php +++ b/src/store/src/Bridge/Postgres/PostgresHybridStore.php @@ -33,7 +33,7 @@ * * @see https://supabase.com/docs/guides/ai/hybrid-search * - * @author Ahmed EBEN HASSINE + * @author Ahmed EBEN HASSINE */ final readonly class PostgresHybridStore implements ManagedStoreInterface, StoreInterface { @@ -179,21 +179,17 @@ public function query(Vector $vector, array $options = []): array $where = []; $params = []; - // Only add embedding param if we're doing vector search - if ($semanticRatio > 0.0) { - $params['embedding'] = $this->toPgvector($vector); - } - // Use maxScore from options, or defaultMaxScore if configured $maxScore = $options['maxScore'] ?? $this->defaultMaxScore; + // Ensure embedding param is set if maxScore is used (regardless of semanticRatio) + if ($semanticRatio > 0.0 || null !== $maxScore) { + $params['embedding'] = $this->toPgvector($vector); + } + if (null !== $maxScore) { $where[] = "({$this->vectorFieldName} {$this->distance->getComparisonSign()} :embedding) <= :maxScore"; $params['maxScore'] = $maxScore; - // Ensure embedding is available if maxScore is used - if (!isset($params['embedding'])) { - $params['embedding'] = $this->toPgvector($vector); - } } if (isset($options['where']) && '' !== $options['where']) { @@ -254,13 +250,7 @@ private function buildFtsOnlyQuery(string $whereClause, int $limit): string { // Add FTS match filter to ensure only relevant documents are returned $ftsFilter = \sprintf("content_tsv @@ websearch_to_tsquery('%s', :query)", $this->language); - - if ('' !== $whereClause) { - // Combine existing WHERE clause with FTS filter - $whereClause = str_replace('WHERE ', "WHERE $ftsFilter AND ", $whereClause); - } else { - $whereClause = "WHERE $ftsFilter"; - } + $whereClause = $this->addFilterToWhereClause($whereClause, $ftsFilter); return \sprintf(<<language); - - if ('' !== $whereClause) { - $ftsWhereClause = str_replace('WHERE ', "WHERE $ftsFilter AND ", $whereClause); - } else { - $ftsWhereClause = "WHERE $ftsFilter"; - } + $ftsWhereClause = $this->addFilterToWhereClause($whereClause, $ftsFilter); // RRF (Reciprocal Rank Fusion) - Same approach as Supabase // Formula: COALESCE(1.0 / (k + rank), 0.0) * weight @@ -333,6 +317,30 @@ private function buildHybridQuery(string $whereClause, int $limit, float $semant ); } + /** + * Adds a filter condition to an existing WHERE clause using AND logic. + * + * @param string $whereClause Existing WHERE clause (may be empty or start with 'WHERE ') + * @param string $filter Filter condition to add (without 'WHERE ') + * + * @return string Combined WHERE clause + */ + private function addFilterToWhereClause(string $whereClause, string $filter): string + { + if ('' === $whereClause) { + return "WHERE $filter"; + } + + $whereClause = rtrim($whereClause); + + if (str_starts_with($whereClause, 'WHERE ')) { + return "$whereClause AND $filter"; + } + + // Unexpected format, prepend WHERE + return "WHERE $filter AND ".ltrim($whereClause); + } + private function toPgvector(VectorInterface $vector): string { return '['.implode(',', $vector->getData()).']'; From 954d44dae38628543fff71920868c5c15e2bde69 Mon Sep 17 00:00:00 2001 From: Ahmed EBEN HASSINE Date: Thu, 30 Oct 2025 15:31:49 +0100 Subject: [PATCH 4/5] refactor(store): rename PostgresHybridStore to HybridStore - Rename class from PostgresHybridStore to HybridStore - The namespace already indicates it's Postgres-specific - Add postgres-hybrid.php RAG example demonstrating: * Different semantic ratios (0.0, 0.5, 1.0) * RRF (Reciprocal Rank Fusion) hybrid search * Full-text search with 'q' parameter * Per-query semanticRatio override --- examples/rag/postgres-hybrid.php | 126 ++++++++++++++++++ ...ostgresHybridStore.php => HybridStore.php} | 2 +- ...ybridStoreTest.php => HybridStoreTest.php} | 36 ++--- 3 files changed, 145 insertions(+), 19 deletions(-) create mode 100644 examples/rag/postgres-hybrid.php rename src/store/src/Bridge/Postgres/{PostgresHybridStore.php => HybridStore.php} (99%) rename src/store/tests/Bridge/Postgres/{PostgresHybridStoreTest.php => HybridStoreTest.php} (92%) diff --git a/examples/rag/postgres-hybrid.php b/examples/rag/postgres-hybrid.php new file mode 100644 index 000000000..032fc1678 --- /dev/null +++ b/examples/rag/postgres-hybrid.php @@ -0,0 +1,126 @@ + + * + * For the full copyright and license information, please view the LICENSE + * file that was distributed with this source code. + */ + +use Doctrine\DBAL\DriverManager; +use Doctrine\DBAL\Tools\DsnParser; +use Symfony\AI\Fixtures\Movies; +use Symfony\AI\Platform\Bridge\OpenAi\PlatformFactory; +use Symfony\AI\Store\Bridge\Postgres\HybridStore; +use Symfony\AI\Store\Document\Loader\InMemoryLoader; +use Symfony\AI\Store\Document\Metadata; +use Symfony\AI\Store\Document\TextDocument; +use Symfony\AI\Store\Document\Vectorizer; +use Symfony\AI\Store\Indexer; +use Symfony\Component\Uid\Uuid; + +require_once dirname(__DIR__).'/bootstrap.php'; + +echo "=== PostgreSQL Hybrid Search Demo ===\n\n"; +echo "This example demonstrates how to configure the semantic ratio to balance\n"; +echo "between semantic (vector) search and PostgreSQL Full-Text Search.\n\n"; + +// Initialize the hybrid store with balanced search (50/50) +$connection = DriverManager::getConnection((new DsnParser())->parse(env('POSTGRES_URI'))); +$pdo = $connection->getNativeConnection(); + +if (!$pdo instanceof PDO) { + throw new RuntimeException('Unable to get native PDO connection from Doctrine DBAL'); +} + +$store = new HybridStore( + connection: $pdo, + tableName: 'hybrid_movies', + semanticRatio: 0.5, // Balanced hybrid search by default +); + +// Create embeddings and documents +$documents = []; +foreach (Movies::all() as $i => $movie) { + $documents[] = new TextDocument( + id: Uuid::v4(), + content: 'Title: '.$movie['title'].\PHP_EOL.'Director: '.$movie['director'].\PHP_EOL.'Description: '.$movie['description'], + metadata: new Metadata(array_merge($movie, ['content' => 'Title: '.$movie['title'].\PHP_EOL.'Director: '.$movie['director'].\PHP_EOL.'Description: '.$movie['description']])), + ); +} + +// Initialize the table +$store->setup(); + +// Create embeddings for documents +$platform = PlatformFactory::create(env('OPENAI_API_KEY'), http_client()); +$vectorizer = new Vectorizer($platform, 'text-embedding-3-small', logger()); +$indexer = new Indexer(new InMemoryLoader($documents), $vectorizer, $store, logger: logger()); +$indexer->index($documents); + +// Create a query embedding +$queryText = 'futuristic technology and artificial intelligence'; +echo "Query: \"$queryText\"\n\n"; +$queryEmbedding = $vectorizer->vectorize($queryText); + +// Test different semantic ratios to compare results +$ratios = [ + ['ratio' => 0.0, 'description' => '100% Full-text search (keyword matching)'], + ['ratio' => 0.5, 'description' => 'Balanced hybrid (RRF: 50% semantic + 50% FTS)'], + ['ratio' => 1.0, 'description' => '100% Semantic search (vector similarity)'], +]; + +foreach ($ratios as $config) { + echo "--- {$config['description']} ---\n"; + + // Override the semantic ratio for this specific query + $results = $store->query($queryEmbedding, [ + 'semanticRatio' => $config['ratio'], + 'q' => 'technology', // Full-text search keyword + 'limit' => 3, + ]); + + echo "Top 3 results:\n"; + foreach ($results as $i => $result) { + $metadata = $result->metadata->getArrayCopy(); + echo sprintf( + " %d. %s (Score: %.4f)\n", + $i + 1, + $metadata['title'] ?? 'Unknown', + $result->score ?? 0.0 + ); + } + echo "\n"; +} + +echo "--- Custom query with pure semantic search ---\n"; +echo "Query: Movies about space exploration\n"; +$spaceEmbedding = $vectorizer->vectorize('space exploration and cosmic adventures'); +$results = $store->query($spaceEmbedding, [ + 'semanticRatio' => 1.0, // Pure semantic search + 'limit' => 3, +]); + +echo "Top 3 results:\n"; +foreach ($results as $i => $result) { + $metadata = $result->metadata->getArrayCopy(); + echo sprintf( + " %d. %s (Score: %.4f)\n", + $i + 1, + $metadata['title'] ?? 'Unknown', + $result->score ?? 0.0 + ); +} +echo "\n"; + +// Cleanup +$store->drop(); + +echo "=== Summary ===\n"; +echo "- semanticRatio = 0.0: Best for exact keyword matches (PostgreSQL FTS)\n"; +echo "- semanticRatio = 0.5: Balanced approach using RRF (Reciprocal Rank Fusion)\n"; +echo "- semanticRatio = 1.0: Best for conceptual similarity searches (pgvector)\n"; +echo "\nYou can set the default ratio when instantiating the HybridStore,\n"; +echo "and override it per query using the 'semanticRatio' option.\n"; diff --git a/src/store/src/Bridge/Postgres/PostgresHybridStore.php b/src/store/src/Bridge/Postgres/HybridStore.php similarity index 99% rename from src/store/src/Bridge/Postgres/PostgresHybridStore.php rename to src/store/src/Bridge/Postgres/HybridStore.php index a7e1e503c..5c225fa93 100644 --- a/src/store/src/Bridge/Postgres/PostgresHybridStore.php +++ b/src/store/src/Bridge/Postgres/HybridStore.php @@ -35,7 +35,7 @@ * * @author Ahmed EBEN HASSINE */ -final readonly class PostgresHybridStore implements ManagedStoreInterface, StoreInterface +final readonly class HybridStore implements ManagedStoreInterface, StoreInterface { /** * @param string $vectorFieldName Name of the vector field diff --git a/src/store/tests/Bridge/Postgres/PostgresHybridStoreTest.php b/src/store/tests/Bridge/Postgres/HybridStoreTest.php similarity index 92% rename from src/store/tests/Bridge/Postgres/PostgresHybridStoreTest.php rename to src/store/tests/Bridge/Postgres/HybridStoreTest.php index 75bde3818..2fdd59e2a 100644 --- a/src/store/tests/Bridge/Postgres/PostgresHybridStoreTest.php +++ b/src/store/tests/Bridge/Postgres/HybridStoreTest.php @@ -13,13 +13,13 @@ use PHPUnit\Framework\TestCase; use Symfony\AI\Platform\Vector\Vector; -use Symfony\AI\Store\Bridge\Postgres\PostgresHybridStore; +use Symfony\AI\Store\Bridge\Postgres\HybridStore; use Symfony\AI\Store\Document\Metadata; use Symfony\AI\Store\Document\VectorDocument; use Symfony\AI\Store\Exception\InvalidArgumentException; use Symfony\Component\Uid\Uuid; -final class PostgresHybridStoreTest extends TestCase +final class HybridStoreTest extends TestCase { public function testConstructorValidatesSemanticRatio() { @@ -27,7 +27,7 @@ public function testConstructorValidatesSemanticRatio() $this->expectExceptionMessage('The semantic ratio must be between 0.0 and 1.0'); $pdo = $this->createMock(\PDO::class); - new PostgresHybridStore($pdo, 'test_table', semanticRatio: 1.5); + new HybridStore($pdo, 'test_table', semanticRatio: 1.5); } public function testConstructorValidatesNegativeSemanticRatio() @@ -36,13 +36,13 @@ public function testConstructorValidatesNegativeSemanticRatio() $this->expectExceptionMessage('The semantic ratio must be between 0.0 and 1.0'); $pdo = $this->createMock(\PDO::class); - new PostgresHybridStore($pdo, 'test_table', semanticRatio: -0.5); + new HybridStore($pdo, 'test_table', semanticRatio: -0.5); } public function testSetupCreatesTableWithFullTextSearchSupport() { $pdo = $this->createMock(\PDO::class); - $store = new PostgresHybridStore($pdo, 'hybrid_table'); + $store = new HybridStore($pdo, 'hybrid_table'); $pdo->expects($this->exactly(4)) ->method('exec') @@ -75,7 +75,7 @@ public function testAddDocument() $pdo = $this->createMock(\PDO::class); $statement = $this->createMock(\PDOStatement::class); - $store = new PostgresHybridStore($pdo, 'hybrid_table'); + $store = new HybridStore($pdo, 'hybrid_table'); $expectedSql = 'INSERT INTO hybrid_table (id, metadata, content, embedding) VALUES (:id, :metadata, :content, :vector) @@ -112,7 +112,7 @@ public function testPureVectorSearch() $pdo = $this->createMock(\PDO::class); $statement = $this->createMock(\PDOStatement::class); - $store = new PostgresHybridStore($pdo, 'hybrid_table', semanticRatio: 1.0); + $store = new HybridStore($pdo, 'hybrid_table', semanticRatio: 1.0); $expectedSql = 'SELECT id, embedding AS embedding, metadata, (embedding <-> :embedding) AS score FROM hybrid_table @@ -157,7 +157,7 @@ public function testPureKeywordSearch() $pdo = $this->createMock(\PDO::class); $statement = $this->createMock(\PDOStatement::class); - $store = new PostgresHybridStore($pdo, 'hybrid_table', semanticRatio: 0.0); + $store = new HybridStore($pdo, 'hybrid_table', semanticRatio: 0.0); $expectedSql = "SELECT id, embedding AS embedding, metadata, (1.0 / (1.0 + ts_rank_cd(content_tsv, websearch_to_tsquery('simple', :query)))) AS score @@ -204,7 +204,7 @@ public function testHybridSearchWithRRF() $pdo = $this->createMock(\PDO::class); $statement = $this->createMock(\PDOStatement::class); - $store = new PostgresHybridStore($pdo, 'hybrid_table', semanticRatio: 0.5, rrfK: 60); + $store = new HybridStore($pdo, 'hybrid_table', semanticRatio: 0.5, rrfK: 60); $pdo->expects($this->once()) ->method('prepare') @@ -252,7 +252,7 @@ public function testQueryWithDefaultMaxScore() $pdo = $this->createMock(\PDO::class); $statement = $this->createMock(\PDOStatement::class); - $store = new PostgresHybridStore( + $store = new HybridStore( $pdo, 'hybrid_table', semanticRatio: 1.0, @@ -291,7 +291,7 @@ public function testQueryWithMaxScoreOverride() $pdo = $this->createMock(\PDO::class); $statement = $this->createMock(\PDOStatement::class); - $store = new PostgresHybridStore( + $store = new HybridStore( $pdo, 'hybrid_table', semanticRatio: 1.0, @@ -324,7 +324,7 @@ public function testQueryWithCustomLanguage() $pdo = $this->createMock(\PDO::class); $statement = $this->createMock(\PDOStatement::class); - $store = new PostgresHybridStore($pdo, 'hybrid_table', semanticRatio: 0.0, language: 'french'); + $store = new HybridStore($pdo, 'hybrid_table', semanticRatio: 0.0, language: 'french'); $pdo->expects($this->once()) ->method('prepare') @@ -351,7 +351,7 @@ public function testQueryWithCustomRRFK() $pdo = $this->createMock(\PDO::class); $statement = $this->createMock(\PDOStatement::class); - $store = new PostgresHybridStore($pdo, 'hybrid_table', semanticRatio: 0.5, rrfK: 100); + $store = new HybridStore($pdo, 'hybrid_table', semanticRatio: 0.5, rrfK: 100); $pdo->expects($this->once()) ->method('prepare') @@ -377,7 +377,7 @@ public function testQueryWithCustomRRFK() public function testQueryInvalidSemanticRatioInOptions() { $pdo = $this->createMock(\PDO::class); - $store = new PostgresHybridStore($pdo, 'hybrid_table'); + $store = new HybridStore($pdo, 'hybrid_table'); $this->expectException(InvalidArgumentException::class); $this->expectExceptionMessage('The semantic ratio must be between 0.0 and 1.0'); @@ -388,7 +388,7 @@ public function testQueryInvalidSemanticRatioInOptions() public function testDrop() { $pdo = $this->createMock(\PDO::class); - $store = new PostgresHybridStore($pdo, 'hybrid_table'); + $store = new HybridStore($pdo, 'hybrid_table'); $pdo->expects($this->once()) ->method('exec') @@ -402,7 +402,7 @@ public function testQueryWithCustomLimit() $pdo = $this->createMock(\PDO::class); $statement = $this->createMock(\PDOStatement::class); - $store = new PostgresHybridStore($pdo, 'hybrid_table', semanticRatio: 1.0); + $store = new HybridStore($pdo, 'hybrid_table', semanticRatio: 1.0); $pdo->expects($this->once()) ->method('prepare') @@ -429,7 +429,7 @@ public function testAddMultipleDocuments() $pdo = $this->createMock(\PDO::class); $statement = $this->createMock(\PDOStatement::class); - $store = new PostgresHybridStore($pdo, 'hybrid_table'); + $store = new HybridStore($pdo, 'hybrid_table'); $pdo->expects($this->once()) ->method('prepare') @@ -469,7 +469,7 @@ public function testPureKeywordSearchReturnsEmptyWhenNoMatch() $pdo = $this->createMock(\PDO::class); $statement = $this->createMock(\PDOStatement::class); - $store = new PostgresHybridStore($pdo, 'hybrid_table', semanticRatio: 0.0); + $store = new HybridStore($pdo, 'hybrid_table', semanticRatio: 0.0); $pdo->expects($this->once()) ->method('prepare') From 2c7b49a6c64ed6cd9866d5ce3e511687d166b5e9 Mon Sep 17 00:00:00 2001 From: Ahmed EBEN HASSINE Date: Fri, 7 Nov 2025 14:24:30 +0100 Subject: [PATCH 5/5] Add postgres_hybrid store config and tests --- examples/rag/postgres-hybrid.php | 3 +- src/ai-bundle/config/options.php | 48 ++++++++++++ src/ai-bundle/src/AiBundle.php | 76 +++++++++++++++++++ .../DependencyInjection/AiBundleTest.php | 69 +++++++++++++++++ src/store/src/Bridge/Postgres/HybridStore.php | 2 +- 5 files changed, 196 insertions(+), 2 deletions(-) diff --git a/examples/rag/postgres-hybrid.php b/examples/rag/postgres-hybrid.php index 032fc1678..f71fa24db 100644 --- a/examples/rag/postgres-hybrid.php +++ b/examples/rag/postgres-hybrid.php @@ -18,6 +18,7 @@ use Symfony\AI\Store\Document\Metadata; use Symfony\AI\Store\Document\TextDocument; use Symfony\AI\Store\Document\Vectorizer; +use Symfony\AI\Store\Exception\RuntimeException; use Symfony\AI\Store\Indexer; use Symfony\Component\Uid\Uuid; @@ -32,7 +33,7 @@ $pdo = $connection->getNativeConnection(); if (!$pdo instanceof PDO) { - throw new RuntimeException('Unable to get native PDO connection from Doctrine DBAL'); + throw new RuntimeException('Unable to get native PDO connection from Doctrine DBAL.'); } $store = new HybridStore( diff --git a/src/ai-bundle/config/options.php b/src/ai-bundle/config/options.php index bafabd267..0479b6492 100644 --- a/src/ai-bundle/config/options.php +++ b/src/ai-bundle/config/options.php @@ -738,6 +738,54 @@ ->end() ->end() ->end() + ->arrayNode('postgres_hybrid') + ->info('PostgreSQL Hybrid Search combining pgvector (semantic) and Full-Text Search (lexical) using RRF') + ->useAttributeAsKey('name') + ->arrayPrototype() + ->children() + ->stringNode('connection')->cannotBeEmpty()->end() + ->stringNode('dsn')->cannotBeEmpty()->end() + ->stringNode('username')->end() + ->stringNode('password')->end() + ->stringNode('table_name')->isRequired()->end() + ->stringNode('vector_field')->defaultValue('embedding')->end() + ->stringNode('content_field')->defaultValue('content')->end() + ->floatNode('semantic_ratio') + ->info('Ratio between semantic (vector) and keyword (FTS) search. 0.0 = pure FTS, 0.5 = balanced, 1.0 = pure semantic') + ->defaultValue(1.0) + ->min(0.0) + ->max(1.0) + ->end() + ->enumNode('distance') + ->info('Distance metric to use for vector similarity search') + ->enumFqcn(PostgresDistance::class) + ->defaultValue(PostgresDistance::L2) + ->end() + ->stringNode('language') + ->info('PostgreSQL text search configuration (e.g., "simple", "english", "french"). Default: "simple" (multilingual)') + ->defaultValue('simple') + ->end() + ->integerNode('rrf_k') + ->info('RRF (Reciprocal Rank Fusion) constant. Higher = more equal weighting. Default: 60 (Supabase)') + ->defaultValue(60) + ->min(1) + ->end() + ->floatNode('default_max_score') + ->info('Default maximum distance threshold for filtering results (optional)') + ->defaultNull() + ->end() + ->stringNode('dbal_connection')->cannotBeEmpty()->end() + ->end() + ->validate() + ->ifTrue(static fn ($v) => !isset($v['dsn']) && !isset($v['dbal_connection']) && !isset($v['connection'])) + ->thenInvalid('Either "dsn", "dbal_connection", or "connection" must be configured.') + ->end() + ->validate() + ->ifTrue(static fn ($v) => (int) isset($v['dsn']) + (int) isset($v['dbal_connection']) + (int) isset($v['connection']) > 1) + ->thenInvalid('Only one of "dsn", "dbal_connection", or "connection" can be configured.') + ->end() + ->end() + ->end() ->end() ->end() ->arrayNode('message_store') diff --git a/src/ai-bundle/src/AiBundle.php b/src/ai-bundle/src/AiBundle.php index a602b339a..297ad0161 100644 --- a/src/ai-bundle/src/AiBundle.php +++ b/src/ai-bundle/src/AiBundle.php @@ -79,6 +79,7 @@ use Symfony\AI\Store\Bridge\MongoDb\Store as MongoDbStore; use Symfony\AI\Store\Bridge\Neo4j\Store as Neo4jStore; use Symfony\AI\Store\Bridge\Pinecone\Store as PineconeStore; +use Symfony\AI\Store\Bridge\Postgres\HybridStore; use Symfony\AI\Store\Bridge\Postgres\Store as PostgresStore; use Symfony\AI\Store\Bridge\Qdrant\Store as QdrantStore; use Symfony\AI\Store\Bridge\Redis\Store as RedisStore; @@ -1366,6 +1367,81 @@ private function processStoreConfig(string $type, array $stores, ContainerBuilde } } + if ('postgres_hybrid' === $type) { + foreach ($stores as $name => $store) { + $definition = new Definition(HybridStore::class); + + // Handle connection (PDO service reference, DBAL connection, or DSN) + if (\array_key_exists('connection', $store)) { + // Direct PDO service reference + $serviceId = ltrim($store['connection'], '@'); + $connection = new Reference($serviceId); + $arguments = [ + $connection, + $store['table_name'], + ]; + } elseif (\array_key_exists('dbal_connection', $store)) { + // DBAL connection - extract native PDO + $connection = (new Definition(\PDO::class)) + ->setFactory([new Reference($store['dbal_connection']), 'getNativeConnection']); + $arguments = [ + $connection, + $store['table_name'], + ]; + } else { + // Create new PDO instance from DSN + $pdo = new Definition(\PDO::class); + $pdo->setArguments([ + $store['dsn'], + $store['username'] ?? null, + $store['password'] ?? null], + ); + + $arguments = [ + $pdo, + $store['table_name'], + ]; + } + + // Add optional parameters + if (\array_key_exists('vector_field', $store)) { + $arguments[2] = $store['vector_field']; + } + + if (\array_key_exists('content_field', $store)) { + $arguments[3] = $store['content_field']; + } + + if (\array_key_exists('semantic_ratio', $store)) { + $arguments[4] = $store['semantic_ratio']; + } + + if (\array_key_exists('distance', $store)) { + $arguments[5] = $store['distance']; + } + + if (\array_key_exists('language', $store)) { + $arguments[6] = $store['language']; + } + + if (\array_key_exists('rrf_k', $store)) { + $arguments[7] = $store['rrf_k']; + } + + if (\array_key_exists('default_max_score', $store)) { + $arguments[8] = $store['default_max_score']; + } + + $definition + ->addTag('ai.store') + ->setArguments($arguments); + + $container->setDefinition('ai.store.'.$type.'.'.$name, $definition); + $container->registerAliasForArgument('ai.store.'.$type.'.'.$name, StoreInterface::class, $name); + $container->registerAliasForArgument('ai.store.'.$type.'.'.$name, StoreInterface::class, $type.'_'.$name); + } + } + if ('supabase' === $type) { foreach ($stores as $name => $store) { $arguments = [ diff --git a/src/ai-bundle/tests/DependencyInjection/AiBundleTest.php b/src/ai-bundle/tests/DependencyInjection/AiBundleTest.php index 37e515b92..b6cfa100f 100644 --- a/src/ai-bundle/tests/DependencyInjection/AiBundleTest.php +++ b/src/ai-bundle/tests/DependencyInjection/AiBundleTest.php @@ -540,6 +540,75 @@ public function testPostgresStoreWithDifferentConnectionCanBeConfigured() $this->assertInstanceOf(Reference::class, $definition->getArgument(0)); } + public function testPostgresHybridStoreWithDsnCanBeConfigured() + { + $container = $this->buildContainer([ + 'ai' => [ + 'store' => [ + 'postgres_hybrid' => [ + 'hybrid_db' => [ + 'dsn' => 'pgsql:host=localhost;port=5432;dbname=testdb', + 'username' => 'app', + 'password' => 'mypass', + 'table_name' => 'hybrid_vectors', + 'semantic_ratio' => 0.7, + 'language' => 'english', + ], + ], + ], + ], + ]); + + $this->assertTrue($container->hasDefinition('ai.store.postgres_hybrid.hybrid_db')); + $definition = $container->getDefinition('ai.store.postgres_hybrid.hybrid_db'); + $this->assertInstanceOf(Definition::class, $definition->getArgument(0)); + $this->assertSame('hybrid_vectors', $definition->getArgument(1)); + } + + public function testPostgresHybridStoreWithDbalConnectionCanBeConfigured() + { + $container = $this->buildContainer([ + 'ai' => [ + 'store' => [ + 'postgres_hybrid' => [ + 'hybrid_db' => [ + 'dbal_connection' => 'my_connection', + 'table_name' => 'hybrid_vectors', + 'rrf_k' => 100, + ], + ], + ], + ], + ]); + + $this->assertTrue($container->hasDefinition('ai.store.postgres_hybrid.hybrid_db')); + $definition = $container->getDefinition('ai.store.postgres_hybrid.hybrid_db'); + $this->assertInstanceOf(Definition::class, $definition->getArgument(0)); + $this->assertSame('hybrid_vectors', $definition->getArgument(1)); + $this->assertSame(100, $definition->getArgument(7)); + } + + public function testPostgresHybridStoreWithConnectionReferenceCanBeConfigured() + { + $container = $this->buildContainer([ + 'ai' => [ + 'store' => [ + 'postgres_hybrid' => [ + 'hybrid_db' => [ + 'connection' => '@my_pdo_service', + 'table_name' => 'hybrid_vectors', + ], + ], + ], + ], + ]); + + $this->assertTrue($container->hasDefinition('ai.store.postgres_hybrid.hybrid_db')); + $definition = $container->getDefinition('ai.store.postgres_hybrid.hybrid_db'); + $this->assertInstanceOf(Reference::class, $definition->getArgument(0)); + $this->assertSame('my_pdo_service', (string) $definition->getArgument(0)); + } + public function testConfigurationWithUseAttributeAsKeyWorksWithoutNormalizeKeys() { // Test that configurations using useAttributeAsKey work correctly diff --git a/src/store/src/Bridge/Postgres/HybridStore.php b/src/store/src/Bridge/Postgres/HybridStore.php index 5c225fa93..80e1bad9b 100644 --- a/src/store/src/Bridge/Postgres/HybridStore.php +++ b/src/store/src/Bridge/Postgres/HybridStore.php @@ -35,7 +35,7 @@ * * @author Ahmed EBEN HASSINE */ -final readonly class HybridStore implements ManagedStoreInterface, StoreInterface +final class HybridStore implements ManagedStoreInterface, StoreInterface { /** * @param string $vectorFieldName Name of the vector field