Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
127 changes: 127 additions & 0 deletions examples/rag/postgres-hybrid.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,127 @@
<?php

/*
* This file is part of the Symfony package.
*
* (c) Fabien Potencier <fabien@symfony.com>
*
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
*/

use Doctrine\DBAL\DriverManager;
use Doctrine\DBAL\Tools\DsnParser;
use Symfony\AI\Fixtures\Movies;
use Symfony\AI\Platform\Bridge\OpenAi\PlatformFactory;
use Symfony\AI\Store\Bridge\Postgres\HybridStore;
use Symfony\AI\Store\Document\Loader\InMemoryLoader;
use Symfony\AI\Store\Document\Metadata;
use Symfony\AI\Store\Document\TextDocument;
use Symfony\AI\Store\Document\Vectorizer;
use Symfony\AI\Store\Exception\RuntimeException;
use Symfony\AI\Store\Indexer;
use Symfony\Component\Uid\Uuid;

require_once dirname(__DIR__).'/bootstrap.php';

echo "=== PostgreSQL Hybrid Search Demo ===\n\n";
echo "This example demonstrates how to configure the semantic ratio to balance\n";
echo "between semantic (vector) search and PostgreSQL Full-Text Search.\n\n";

// Initialize the hybrid store with balanced search (50/50)
$connection = DriverManager::getConnection((new DsnParser())->parse(env('POSTGRES_URI')));
$pdo = $connection->getNativeConnection();

if (!$pdo instanceof PDO) {
throw new RuntimeException('Unable to get native PDO connection from Doctrine DBAL.');
}

$store = new HybridStore(
connection: $pdo,
tableName: 'hybrid_movies',
semanticRatio: 0.5, // Balanced hybrid search by default
);

// Create embeddings and documents
$documents = [];
foreach (Movies::all() as $i => $movie) {
$documents[] = new TextDocument(
id: Uuid::v4(),
content: 'Title: '.$movie['title'].\PHP_EOL.'Director: '.$movie['director'].\PHP_EOL.'Description: '.$movie['description'],
metadata: new Metadata(array_merge($movie, ['content' => 'Title: '.$movie['title'].\PHP_EOL.'Director: '.$movie['director'].\PHP_EOL.'Description: '.$movie['description']])),
);
}

// Initialize the table
$store->setup();

// Create embeddings for documents
$platform = PlatformFactory::create(env('OPENAI_API_KEY'), http_client());
$vectorizer = new Vectorizer($platform, 'text-embedding-3-small', logger());
$indexer = new Indexer(new InMemoryLoader($documents), $vectorizer, $store, logger: logger());
$indexer->index($documents);

// Create a query embedding
$queryText = 'futuristic technology and artificial intelligence';
echo "Query: \"$queryText\"\n\n";
$queryEmbedding = $vectorizer->vectorize($queryText);

// Test different semantic ratios to compare results
$ratios = [
['ratio' => 0.0, 'description' => '100% Full-text search (keyword matching)'],
['ratio' => 0.5, 'description' => 'Balanced hybrid (RRF: 50% semantic + 50% FTS)'],
['ratio' => 1.0, 'description' => '100% Semantic search (vector similarity)'],
];

foreach ($ratios as $config) {
echo "--- {$config['description']} ---\n";

// Override the semantic ratio for this specific query
$results = $store->query($queryEmbedding, [
'semanticRatio' => $config['ratio'],
'q' => 'technology', // Full-text search keyword
'limit' => 3,
]);

echo "Top 3 results:\n";
foreach ($results as $i => $result) {
$metadata = $result->metadata->getArrayCopy();
echo sprintf(
" %d. %s (Score: %.4f)\n",
$i + 1,
$metadata['title'] ?? 'Unknown',
$result->score ?? 0.0
);
}
echo "\n";
}

echo "--- Custom query with pure semantic search ---\n";
echo "Query: Movies about space exploration\n";
$spaceEmbedding = $vectorizer->vectorize('space exploration and cosmic adventures');
$results = $store->query($spaceEmbedding, [
'semanticRatio' => 1.0, // Pure semantic search
'limit' => 3,
]);

echo "Top 3 results:\n";
foreach ($results as $i => $result) {
$metadata = $result->metadata->getArrayCopy();
echo sprintf(
" %d. %s (Score: %.4f)\n",
$i + 1,
$metadata['title'] ?? 'Unknown',
$result->score ?? 0.0
);
}
echo "\n";

// Cleanup
$store->drop();

echo "=== Summary ===\n";
echo "- semanticRatio = 0.0: Best for exact keyword matches (PostgreSQL FTS)\n";
echo "- semanticRatio = 0.5: Balanced approach using RRF (Reciprocal Rank Fusion)\n";
echo "- semanticRatio = 1.0: Best for conceptual similarity searches (pgvector)\n";
echo "\nYou can set the default ratio when instantiating the HybridStore,\n";
echo "and override it per query using the 'semanticRatio' option.\n";
48 changes: 48 additions & 0 deletions src/ai-bundle/config/options.php
Original file line number Diff line number Diff line change
Expand Up @@ -738,6 +738,54 @@
->end()
->end()
->end()
->arrayNode('postgres_hybrid')
->info('PostgreSQL Hybrid Search combining pgvector (semantic) and Full-Text Search (lexical) using RRF')
->useAttributeAsKey('name')
->arrayPrototype()
->children()
->stringNode('connection')->cannotBeEmpty()->end()
->stringNode('dsn')->cannotBeEmpty()->end()
->stringNode('username')->end()
->stringNode('password')->end()
->stringNode('table_name')->isRequired()->end()
->stringNode('vector_field')->defaultValue('embedding')->end()
->stringNode('content_field')->defaultValue('content')->end()
->floatNode('semantic_ratio')
->info('Ratio between semantic (vector) and keyword (FTS) search. 0.0 = pure FTS, 0.5 = balanced, 1.0 = pure semantic')
->defaultValue(1.0)
->min(0.0)
->max(1.0)
->end()
->enumNode('distance')
->info('Distance metric to use for vector similarity search')
->enumFqcn(PostgresDistance::class)
->defaultValue(PostgresDistance::L2)
->end()
->stringNode('language')
->info('PostgreSQL text search configuration (e.g., "simple", "english", "french"). Default: "simple" (multilingual)')
->defaultValue('simple')
->end()
->integerNode('rrf_k')
->info('RRF (Reciprocal Rank Fusion) constant. Higher = more equal weighting. Default: 60 (Supabase)')
->defaultValue(60)
->min(1)
->end()
->floatNode('default_max_score')
->info('Default maximum distance threshold for filtering results (optional)')
->defaultNull()
->end()
->stringNode('dbal_connection')->cannotBeEmpty()->end()
->end()
->validate()
->ifTrue(static fn ($v) => !isset($v['dsn']) && !isset($v['dbal_connection']) && !isset($v['connection']))
->thenInvalid('Either "dsn", "dbal_connection", or "connection" must be configured.')
->end()
->validate()
->ifTrue(static fn ($v) => (int) isset($v['dsn']) + (int) isset($v['dbal_connection']) + (int) isset($v['connection']) > 1)
->thenInvalid('Only one of "dsn", "dbal_connection", or "connection" can be configured.')
->end()
->end()
->end()
->end()
->end()
->arrayNode('message_store')
Expand Down
76 changes: 76 additions & 0 deletions src/ai-bundle/src/AiBundle.php
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,7 @@
use Symfony\AI\Store\Bridge\MongoDb\Store as MongoDbStore;
use Symfony\AI\Store\Bridge\Neo4j\Store as Neo4jStore;
use Symfony\AI\Store\Bridge\Pinecone\Store as PineconeStore;
use Symfony\AI\Store\Bridge\Postgres\HybridStore;
use Symfony\AI\Store\Bridge\Postgres\Store as PostgresStore;
use Symfony\AI\Store\Bridge\Qdrant\Store as QdrantStore;
use Symfony\AI\Store\Bridge\Redis\Store as RedisStore;
Expand Down Expand Up @@ -1366,6 +1367,81 @@ private function processStoreConfig(string $type, array $stores, ContainerBuilde
}
}

if ('postgres_hybrid' === $type) {
foreach ($stores as $name => $store) {
$definition = new Definition(HybridStore::class);

// Handle connection (PDO service reference, DBAL connection, or DSN)
if (\array_key_exists('connection', $store)) {
// Direct PDO service reference
$serviceId = ltrim($store['connection'], '@');
$connection = new Reference($serviceId);
$arguments = [
$connection,
$store['table_name'],
];
} elseif (\array_key_exists('dbal_connection', $store)) {
// DBAL connection - extract native PDO
$connection = (new Definition(\PDO::class))
->setFactory([new Reference($store['dbal_connection']), 'getNativeConnection']);
$arguments = [
$connection,
$store['table_name'],
];
} else {
// Create new PDO instance from DSN
$pdo = new Definition(\PDO::class);
$pdo->setArguments([
$store['dsn'],
$store['username'] ?? null,
$store['password'] ?? null],
);

$arguments = [
$pdo,
$store['table_name'],
];
}

// Add optional parameters
if (\array_key_exists('vector_field', $store)) {
$arguments[2] = $store['vector_field'];
}

if (\array_key_exists('content_field', $store)) {
$arguments[3] = $store['content_field'];
}

if (\array_key_exists('semantic_ratio', $store)) {
$arguments[4] = $store['semantic_ratio'];
}

if (\array_key_exists('distance', $store)) {
$arguments[5] = $store['distance'];
}

if (\array_key_exists('language', $store)) {
$arguments[6] = $store['language'];
}

if (\array_key_exists('rrf_k', $store)) {
$arguments[7] = $store['rrf_k'];
}

if (\array_key_exists('default_max_score', $store)) {
$arguments[8] = $store['default_max_score'];
}

$definition
->addTag('ai.store')
->setArguments($arguments);

$container->setDefinition('ai.store.'.$type.'.'.$name, $definition);
$container->registerAliasForArgument('ai.store.'.$type.'.'.$name, StoreInterface::class, $name);
$container->registerAliasForArgument('ai.store.'.$type.'.'.$name, StoreInterface::class, $type.'_'.$name);
}
}

if ('supabase' === $type) {
foreach ($stores as $name => $store) {
$arguments = [
Expand Down
69 changes: 69 additions & 0 deletions src/ai-bundle/tests/DependencyInjection/AiBundleTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -540,6 +540,75 @@ public function testPostgresStoreWithDifferentConnectionCanBeConfigured()
$this->assertInstanceOf(Reference::class, $definition->getArgument(0));
}

public function testPostgresHybridStoreWithDsnCanBeConfigured()
{
$container = $this->buildContainer([
'ai' => [
'store' => [
'postgres_hybrid' => [
'hybrid_db' => [
'dsn' => 'pgsql:host=localhost;port=5432;dbname=testdb',
'username' => 'app',
'password' => 'mypass',
'table_name' => 'hybrid_vectors',
'semantic_ratio' => 0.7,
'language' => 'english',
],
],
],
],
]);

$this->assertTrue($container->hasDefinition('ai.store.postgres_hybrid.hybrid_db'));
$definition = $container->getDefinition('ai.store.postgres_hybrid.hybrid_db');
$this->assertInstanceOf(Definition::class, $definition->getArgument(0));
$this->assertSame('hybrid_vectors', $definition->getArgument(1));
}

public function testPostgresHybridStoreWithDbalConnectionCanBeConfigured()
{
$container = $this->buildContainer([
'ai' => [
'store' => [
'postgres_hybrid' => [
'hybrid_db' => [
'dbal_connection' => 'my_connection',
'table_name' => 'hybrid_vectors',
'rrf_k' => 100,
],
],
],
],
]);

$this->assertTrue($container->hasDefinition('ai.store.postgres_hybrid.hybrid_db'));
$definition = $container->getDefinition('ai.store.postgres_hybrid.hybrid_db');
$this->assertInstanceOf(Definition::class, $definition->getArgument(0));
$this->assertSame('hybrid_vectors', $definition->getArgument(1));
$this->assertSame(100, $definition->getArgument(7));
}

public function testPostgresHybridStoreWithConnectionReferenceCanBeConfigured()
{
$container = $this->buildContainer([
'ai' => [
'store' => [
'postgres_hybrid' => [
'hybrid_db' => [
'connection' => '@my_pdo_service',
'table_name' => 'hybrid_vectors',
],
],
],
],
]);

$this->assertTrue($container->hasDefinition('ai.store.postgres_hybrid.hybrid_db'));
$definition = $container->getDefinition('ai.store.postgres_hybrid.hybrid_db');
$this->assertInstanceOf(Reference::class, $definition->getArgument(0));
$this->assertSame('my_pdo_service', (string) $definition->getArgument(0));
}

public function testConfigurationWithUseAttributeAsKeyWorksWithoutNormalizeKeys()
{
// Test that configurations using useAttributeAsKey work correctly
Expand Down
Loading