Skip to content

Commit 71ef282

Browse files
authored
Use the key module, and get embeddings working (#13)
* port openai_embeddings, refactor for key module * composer update * fix settings fetch * fix settings * get embeddings working with key * use key in openai.module
1 parent 1b3e7df commit 71ef282

File tree

6 files changed

+249
-118
lines changed

6 files changed

+249
-118
lines changed

modules/openai_embeddings/includes/EmbeddingQueueWorker.php

Lines changed: 27 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -18,84 +18,84 @@ public function processItem($data) {
1818
if (!$entity) {
1919
throw new Exception("Could not load entity with ID {$data['entity_id']}.");
2020
}
21-
21+
2222
// Log the entity type.
23-
/* watchdog('openai_embeddings', 'Processing entity ID: @id, Type: @type', [
23+
watchdog('openai_embeddings', 'Processing entity ID: @id, Type: @type', [
2424
'@id' => $entity->nid,
2525
'@type' => $entity->type,
26-
], WATCHDOG_DEBUG); */
27-
26+
], WATCHDOG_DEBUG);
27+
2828
// Get configuration.
2929
$config = config_get('openai_embeddings.settings');
3030
$stopwords = array_map('trim', explode(',', $config['stopwords'] ?? ''));
3131
$model = $config['model'] ?? 'text-embedding-ada-002';
3232
$plugin_id = $config['vector_client_plugin'] ?? NULL;
3333
$allowed_bundles = $config['content_types'] ?? [];
34-
34+
3535
if (!$plugin_id) {
3636
throw new Exception('Vector client plugin ID is not configured.');
3737
}
38-
38+
3939
// Skip entity if its bundle is not allowed.
4040
if (!in_array($entity->type, $allowed_bundles)) {
41-
/* watchdog('openai_embeddings', 'Skipping entity ID: @id because its bundle (@bundle) is not allowed.', [
41+
/*watchdog('openai_embeddings', 'Skipping entity ID: @id because its bundle (@bundle) is not allowed.', [
4242
'@id' => $entity->nid,
4343
'@bundle' => $entity->type,
44-
], WATCHDOG_INFO); */
44+
], WATCHDOG_INFO);*/
4545
return;
4646
}
47-
47+
4848
// Load the vector client.
4949
$vector_client = openai_embeddings_get_vector_client($plugin_id);
50-
50+
5151
// Supported field types.
5252
$supported_field_types = ['string', 'text', 'text_long', 'text_with_summary', 'text_textarea_with_summary'];
53-
53+
5454
// Retrieve field definitions.
5555
$fields = field_info_instances('node', $entity->type);
56-
56+
5757
foreach ($fields as $field_name => $field_info) {
5858
$field_type = $field_info['type'] ?? ($field_info['widget']['type'] ?? 'undefined');
59-
59+
6060
// Check if the field type is supported.
6161
if (!in_array($field_type, $supported_field_types)) {
62-
/* watchdog('openai_embeddings', 'Skipping unsupported field: @field_name, Type: @field_type', [
62+
/*watchdog('openai_embeddings', 'Skipping unsupported field: @field_name, Type: @field_type', [
6363
'@field_name' => $field_name,
6464
'@field_type' => $field_type,
65-
], WATCHDOG_INFO); */
65+
], WATCHDOG_INFO);*/
6666
continue;
6767
}
68-
68+
6969
// Retrieve field values.
7070
$field_items = field_get_items('node', $entity, $field_name);
7171
if (empty($field_items)) {
72-
/* watchdog('openai_embeddings', 'Field @field_name has no items or is empty.', [
72+
/*watchdog('openai_embeddings', 'Field @field_name has no items or is empty.', [
7373
'@field_name' => $field_name,
74-
], WATCHDOG_INFO); */
74+
], WATCHDOG_INFO);*/
7575
continue;
7676
}
77-
77+
7878
foreach ($field_items as $delta => $item) {
7979
if (empty($item['value'])) {
8080
continue;
8181
}
82-
82+
8383
// Prepare text and remove stopwords.
8484
$text = StringHelper::prepareText($item['value'], [], 8000);
8585
foreach ($stopwords as $word) {
8686
$text = $this->removeStopWord($word, $text);
8787
}
88-
88+
8989
// Generate embedding via OpenAI.
9090
$response = openai_client_embed($text, $model);
9191
if (empty($response['data'][0]['embedding'])) {
9292
throw new Exception('Failed to generate embedding.');
9393
}
9494
$embedding = $response['data'][0]['embedding'];
95-
95+
9696
// Dynamically determine the namespace based on entity type.
9797
$collection = $data['entity_type']; // Use the entity type directly.
98-
98+
9999
// Prepare vector metadata.
100100
$unique_id = $this->generateUniqueId($entity, $field_name, $delta);
101101
$vectors = [
@@ -109,13 +109,13 @@ public function processItem($data) {
109109
'field_delta' => $delta,
110110
],
111111
];
112-
112+
113113
// Upsert into vector database.
114114
$vector_client->upsert([
115115
'vectors' => [$vectors],
116116
'collection' => $collection,
117117
]);
118-
118+
119119
// Update the local database.
120120
db_merge('openai_embeddings')
121121
->key([
@@ -139,8 +139,8 @@ public function processItem($data) {
139139
], WATCHDOG_ERROR);
140140
}
141141
}
142-
143-
142+
143+
144144

145145
/**
146146
* Generates a unique ID for the record in the vector database.

modules/openai_embeddings/includes/PineconeVectorClient.php

Lines changed: 142 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -15,13 +15,31 @@ class PineconeVectorClient extends VectorClientBase {
1515
* The configured HTTP client for Pinecone.
1616
*/
1717
protected function getPineconeClient(): \GuzzleHttp\Client {
18+
// Retrieve Pinecone API key and hostname from configuration.
1819
$api_key = $this->getEmbeddingConfig('pinecone_api_key');
19-
$hostname = rtrim($this->getEmbeddingConfig('pinecone_hostname'), '/');
20+
$hostname = $this->getEmbeddingConfig('pinecone_hostname');
2021

21-
if (empty($api_key) || empty($hostname)) {
22-
throw new \Exception('Pinecone API key or hostname is not set. Please check your configuration.');
22+
// If the Key module is used, resolve the values.
23+
if (is_array($api_key)) {
24+
$api_key = key_get_key_value($api_key);
25+
}
26+
if (is_array($hostname)) {
27+
$hostname = key_get_key_value($hostname);
28+
}
29+
30+
// Validate and sanitize the values.
31+
if (empty($api_key) || !is_string($api_key)) {
32+
throw new \Exception('Invalid or missing Pinecone API key. Please check your configuration.');
33+
}
34+
if (empty($hostname) || !is_string($hostname)) {
35+
throw new \Exception('Invalid or missing Pinecone hostname. Please check your configuration.');
2336
}
2437

38+
// Trim and sanitize the hostname and API key.
39+
$api_key = trim($api_key);
40+
$hostname = rtrim(trim($hostname), '/');
41+
42+
// Return a configured HTTP client.
2543
return $this->getHttpClient([
2644
'headers' => [
2745
'API-Key' => $api_key,
@@ -81,9 +99,9 @@ public function upsert(array $parameters) {
8199
}
82100

83101
// Log the payload being sent for upsert.
84-
/* watchdog('openai_embeddings', 'Pinecone upsert payload: @payload', [
102+
watchdog('openai_embeddings', 'Pinecone upsert payload: @payload', [
85103
'@payload' => json_encode($payload),
86-
], WATCHDOG_DEBUG); */
104+
], WATCHDOG_DEBUG);
87105

88106
try {
89107
$response = $client->post('/vectors/upsert', ['json' => $payload]);
@@ -140,6 +158,125 @@ public function stats(): array {
140158
return [];
141159
}
142160
}
161+
162+
/**
163+
* Delete records in Pinecone.
164+
*
165+
* @param array $parameters
166+
* An array with at least key 'source_ids'. The key
167+
* 'collection' is required if not using the Pinecone free Starter plan.
168+
*
169+
* @return \Psr\Http\Message\ResponseInterface
170+
* The response object.
171+
*
172+
* @throws \Exception
173+
* If required parameters are missing or an error occurs.
174+
*/
175+
public function delete(array $parameters): ResponseInterface {
176+
// Ensure necessary parameters are provided.
177+
if (empty($parameters['source_ids']) && empty($parameters['filter'])) {
178+
throw new \Exception('Either "source_ids" to delete or a "filter" is required for Pinecone deletion.');
179+
}
180+
181+
if (!empty($parameters['deleteAll'])) {
182+
throw new \Exception('"deleteAll" must be handled by the deleteAll() method.');
183+
}
184+
185+
// Prepare the payload for deletion.
186+
$payload = [];
187+
if (!empty($parameters['source_ids'])) {
188+
$payload['ids'] = $parameters['source_ids'];
189+
}
190+
if (!empty($parameters['collection'])) {
191+
$payload['namespace'] = $parameters['collection'];
192+
}
193+
if (!empty($parameters['filter'])) {
194+
$payload['filter'] = $parameters['filter'];
195+
}
196+
197+
try {
198+
// Execute the delete request.
199+
return $this->getClient()->post('/vectors/delete', ['json' => $payload]);
200+
} catch (RequestException $e) {
201+
watchdog('openai_embeddings', 'Error during Pinecone delete: @message', ['@message' => $e->getMessage()], WATCHDOG_ERROR);
202+
throw $e;
203+
}
204+
}
143205

206+
/**
207+
* Delete all records in Pinecone.
208+
*
209+
* @param array $parameters
210+
* An array with at least the key 'collection'. Full deletion
211+
* is not supported in the Pinecone free Starter plan.
212+
*
213+
* @throws \Exception
214+
* If required parameters are missing or an error occurs.
215+
*/
216+
public function deleteAll(array $parameters): void {
217+
// Use the configuration method provided by the VectorClientBase class.
218+
$disable_namespace = $this->getEmbeddingConfig('disable_namespace');
219+
220+
// Validate the Pinecone plan and required parameters.
221+
if (!empty($disable_namespace)) {
222+
watchdog('openai_embeddings', 'Pinecone free starter plan does not support Delete All.', [], WATCHDOG_WARNING);
223+
throw new \Exception('Pinecone free starter plan does not support full namespace deletion.');
224+
}
225+
if (empty($parameters['collection'])) {
226+
throw new \Exception('Namespace (collection) is required for deleteAll in Pinecone.');
227+
}
228+
229+
// Retrieve Pinecone API key and hostname from configuration.
230+
$api_key = $this->getEmbeddingConfig('pinecone_api_key');
231+
$hostname = $this->getEmbeddingConfig('pinecone_hostname');
232+
233+
// If the Key module is used, resolve the values.
234+
if (is_array($api_key)) {
235+
$api_key = key_get_key_value($api_key);
236+
}
237+
if (is_array($hostname)) {
238+
$hostname = key_get_key_value($hostname);
239+
}
240+
241+
// Validate the resolved values.
242+
if (empty($api_key) || !is_string($api_key)) {
243+
throw new \Exception('Invalid or missing Pinecone API key. Please check your configuration.');
244+
}
245+
if (empty($hostname) || !is_string($hostname)) {
246+
throw new \Exception('Invalid or missing Pinecone hostname. Please check your configuration.');
247+
}
248+
249+
// Trim and sanitize the hostname and API key.
250+
$api_key = trim($api_key);
251+
$hostname = rtrim(trim($hostname), '/');
252+
253+
// Prepare the payload for full deletion.
254+
$payload = [
255+
'deleteAll' => TRUE,
256+
'namespace' => $parameters['collection'],
257+
];
258+
259+
try {
260+
// Create a Guzzle client with the resolved configuration.
261+
$client = $this->getHttpClient([
262+
'headers' => [
263+
'API-Key' => $api_key,
264+
],
265+
'base_uri' => $hostname,
266+
]);
267+
268+
// Execute the deleteAll request.
269+
$client->post('/vectors/delete', ['json' => $payload]);
270+
} catch (RequestException $e) {
271+
watchdog('openai_embeddings', 'Error during Pinecone deleteAll: @message', ['@message' => $e->getMessage()], WATCHDOG_ERROR);
272+
throw $e;
273+
} catch (\Exception $e) {
274+
watchdog('openai_embeddings', 'Unexpected error during Pinecone deleteAll: @message', ['@message' => $e->getMessage()], WATCHDOG_ERROR);
275+
throw $e;
276+
}
277+
}
278+
279+
280+
144281
}
145282

modules/openai_embeddings/includes/VectorClientBase.php

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,9 +48,17 @@ public function getOpenAIKey(): ?string {
4848
* The configuration value or NULL if not set.
4949
*/
5050
public function getEmbeddingConfig(string $key) {
51-
return $this->embeddingsConfig[$key] ?? NULL;
51+
$value = $this->embeddingsConfig[$key] ?? NULL;
52+
53+
// If this key references a managed key, resolve its value.
54+
if ($key === 'pinecone_api_key' || $key === 'pinecone_hostname') {
55+
return key_get_key_value($value);
56+
}
57+
58+
return $value;
5259
}
5360

61+
5462
/**
5563
* Initialize an HTTP client with appropriate headers and base URI.
5664
*

0 commit comments

Comments
 (0)