added orchestration to call NL2SQL or RAG

yorek · yorek · commit 3bb083d7fb97 · 2025-01-16T12:42:50.000-08:00
diff --git a/db/sql/02-tables.sql b/db/sql/02-tables.sql
@@ -56,6 +56,8 @@ create table dbo.semantic_cache
 (
     [id] int identity primary key nonclustered,    
     [query] nvarchar(max) not null,
+    [action] nvarchar(max) not null,
+    [samples] nvarchar(max) not null,
     [embedding] vector(1536) not null,
     [query_date] datetime2(0) not null,
     [response] nvarchar(max) not null,    
diff --git a/db/sql/04-find_samples.sql b/db/sql/04-find_samples.sql
@@ -1,7 +1,8 @@
-create or alter procedure [web].[find_samples] @text nvarchar(max), @k int = 50
+create or alter procedure [web].[find_samples] @text nvarchar(max), @k int = null
 as
-declare @cached_response nvarchar(max)
-declare @retval int, @response nvarchar(max);
+declare @response nvarchar(max), @cached_response nvarchar(max);
+declare @retval int;
+declare @samples nvarchar(max)
 
 /* Get the embedding for the requested text */
 declare @qv vector(1536)
@@ -10,163 +11,81 @@ if (@retval != 0) return;
 
 /* Check in the semantic cache to see if a similar question has been already answered */
 delete from [dbo].[semantic_cache] where query_date < dateadd(hour, -1, sysdatetime())
+
 select top(1) *, vector_distance('cosine', @qv, embedding) as d 
 into #c 
-from [dbo].[semantic_cache] order by d
-select @cached_response = response from #c where d < 0.3
-
-if (@cached_response is not null)
-begin
-    set @response = @cached_response
-end 
-else 
-begin
-    /* Find the samples most similar to the requested topic */
-    drop table if exists #s;
-    select top(@k) 
-        s.id, [name], [description], [url], [notes], [details],
-        least(
-            vector_distance('cosine', e.[embedding], @qv), 
-            vector_distance('cosine', ne.[embedding], @qv), 
-            vector_distance('cosine', de.[embedding], @qv) 
-        ) as distance_score
-    into
-        #s
-    from 
-        dbo.samples s
-    inner join    
-        dbo.samples_embeddings e on e.id = s.id
-    left join
-        dbo.samples_notes_embeddings ne on e.id = ne.id
-    left join
-        dbo.samples_details_embeddings de on e.id = de.id    
-    order by 
-        distance_score asc
-    --select * from #s
+from [dbo].[semantic_cache] order by d;
 
-    /* Prepare the JSON string with relevant results to be sent to LLM for evaluation */
-    declare @s nvarchar(max) = (
-        select 
-            [id], [name], [description], [notes], [details], 
-            cast((1-distance_score)*100 as int) as similiarity_score
-        from #s 
-        where distance_score < 0.85
-        order by distance_score for json path
-    )
-    --select @s
+select top(1) @cached_response = response from #c where d < 0.3
+if (@cached_response is not null) set @response = @cached_response
 
-    /* Create the prompt for the LLM */
-    declare @p nvarchar(max) = 
-    json_object(
-        'messages': json_array(
-            json_object(
-                'role':'system',
-                'content':'
-                    You as a system assistant who helps users find code samples the user can use to learn the topic they are interested in.
-                    Samples are provided in an assitant message using a JSON Array with the following format: [{id, name, description, note, details, similiarity_score}]. 
-                    Put in sample_summary output property a markdown short summary of the sample using the provided description, notes, and details. 
-                    Use only the provided samples to help you answer the user''s question.                    
-                    Make sure to use details, notes, and description that are provided in each sample are used only with that sample.
-                    If there are related links or repos in the details of a sample that is included in the answer, include them in the short summary. Include links only if they are related to the sample and if they are available in the note or details of that sample.               
-                    If the question cannot be answered by the provided samples, you must say that you don''t know.
-                    If asked question is about topics you don''t know, answer that you don''t know.
-                '
-            ),
-            json_object(
-                'role':'assistant',
-                'content': 'The available samples are the following:'
-                ),
-            json_object(
-                'role':'assistant',
-                'content': coalesce(@s, 'No samples found for the requested search text.')
-                ),
-            json_object(
-                'role':'user',
-                'content': + @text
-            )
-        ),    
-        'temperature': 0.2,
-        'frequency_penalty': 0,
-        'presence_penalty': 0,    
-        'stop': null
-    );
+/* If no cached response is available then generate a fresh answer */
+if (@response is null) begin
+    
+    /* Orchestrate answer */
+    declare @rt varchar(50), @rq nvarchar(max)    
+    exec @retval = [web].[orchestrate_request]  @text, @rt output, @rq output
+    if (@retval != 0) return;
 
-    declare @js nvarchar(max) = N'{
-        "type": "json_schema",
-        "json_schema": {
-            "name": "samples",
-            "strict": true,
-            "schema": {
-                "type": "object",
-                "properties": {
-                    "samples": {
-                        "type": "array",
-                        "items": {
-                            "type": "object",
-                            "properties": {
-                                "result_position": {
-                                    "type": "number"
-                                },
-                                "id": {
-                                    "type": "number"
-                                },
-                                "sample_summary": {
-                                    "type": "string"
-                                },                            
-                                "thoughts": {
-                                    "type": "string"
-                                }
-                            },
-                            "required": [
-                                "id",                            
-                                "sample_summary",                            
-                                "thoughts",
-                                "result_position"
-                            ],
-                            "additionalProperties": false
-                        }
-                    }
-                },
-                "required": ["samples"],
-                "additionalProperties": false
-            }        
-        }        
-    }'
+    --print @rt
+    --print @rq
 
-    set @p = json_modify(@p, '$.response_format', json_query(@js))
-    ---select @p
-    
-    /* Send request to LLM */
-    begin try
-        exec @retval = sp_invoke_external_rest_endpoint
-            @url = '$OPENAI_URL$/openai/deployments/$OPENAI_CHAT_DEPLOYMENT_NAME$/chat/completions?api-version=2024-08-01-preview',
-            @headers = '{"Content-Type":"application/json"}',
-            @method = 'POST',
-            @credential = [$OPENAI_URL$],
-            @timeout = 120,
-            @payload = @p,
-            @response = @response output;
-    end try
-    begin catch
-        select 'REST' as [error], ERROR_NUMBER() as [error_code], ERROR_MESSAGE() as [error_message]
-        return
-    end catch
-    --select @response
+    /* Find the samples most similar to the requested topic */
+    if (@rt = 'SEMANTIC') begin
+        set @k = coalesce(@k, 50)        
+        drop table if exists #s;
+        select top(@k) 
+            s.id, [name], [description], [url], [notes], [details],
+            least(
+                vector_distance('cosine', e.[embedding], @qv), 
+                vector_distance('cosine', ne.[embedding], @qv), 
+                vector_distance('cosine', de.[embedding], @qv) 
+            ) as distance_score
+        into
+            #s
+        from 
+            dbo.samples s
+        inner join    
+            dbo.samples_embeddings e on e.id = s.id
+        left join
+            dbo.samples_notes_embeddings ne on e.id = ne.id
+        left join
+            dbo.samples_details_embeddings de on e.id = de.id    
+        order by 
+            distance_score asc;
 
-    if @retval != 0 begin
-        select 'OpenAI' as [error], @retval as [error_code], @response as [response]
-        return
+        /* Prepare the JSON string with relevant results to be sent to LLM for evaluation */
+        set @samples = (
+            select 
+                [id], [name], [description], [notes], [details], 
+                cast((1-distance_score)*100 as int) as similiarity_score
+            from #s 
+            where distance_score < 0.85
+            order by distance_score for json path
+        )
     end
 
-    declare @refusal nvarchar(max) = (select coalesce(json_value(@response, '$.result.choices[0].refusal'), ''));
+    /* Find the samples using T-SQL */
+    if (@rt = 'SQL') begin
+        declare @trq nvarchar(max) = trim(replace(replace(@rq, char(13), ' '), char(10), ' '));
+        if (@trq like '%INSERT %' or @trq like '%UPDATE %' or @trq like '%DELETE %' or @trq like '%DROP %' or @trq like '%ALTER %' or @trq like '%CREATE %') begin
+            --select @trq
+            select 'NL2SQL' as [error], -1 as [error_code], 'Unauthorized SQL command requested' as [response]
+            return -1
+        end
 
-    if @refusal != '' begin
-        select 'OpenAI/Refusal' as [error], @refusal as [refusal], @response as [response]
-        return
+        declare @q nvarchar(max) = 'SET @s = (' + @rq + ')';
+        exec sp_executesql @q, N'@s NVARCHAR(MAX) OUTPUT', @s = @samples output
+        --print @samples
     end
+    
+    --select @samples;    
+    exec @retval = [web].[generate_answer] @text, @samples, @response output;
+    if (@retval != 0) return;
 
-    insert into dbo.semantic_cache (query, embedding, query_date, response) 
-    values (@text, @qv, sysdatetime(), @response)
+    /* Cache results */
+    insert into dbo.semantic_cache (query, [action], samples, embedding, query_date, response) 
+    values (@text, @rt + isnull(':' + @rq, ''), @samples, @qv, sysdatetime(), @response)
 end
 
 select 
@@ -193,4 +112,5 @@ inner join
     dbo.samples as s on s.id = sr.id
 order by
     sr.result_position
+    
 GO
diff --git a/db/sql/10-generate_answer.sql b/db/sql/10-generate_answer.sql
@@ -0,0 +1,130 @@
+create or alter procedure [web].[generate_answer] 
+@query_text nvarchar(max),
+@source nvarchar(max),
+@response nvarchar(max) output
+as
+declare @retval int;
+
+if (@query_text is null) begin
+    select 'Generator' as [error], -1 as [error_code], 'Query not provided' as [error_message]
+    return -1
+end
+
+if (@source is null) begin
+    select 'Generator' as [error], -1 as [error_code], 'Sample list not provided' as [error_message]
+    return -1
+end
+
+
+/* Create the prompt for the LLM */
+declare @p nvarchar(max) = 
+json_object(
+    'messages': json_array(
+        json_object(
+            'role':'system',
+            'content':'
+                You as a system assistant who helps users find code samples the user can use to learn the topic they are interested in.
+                Samples are provided in an assitant message using a JSON Array with the following format: [{id, name, description, note, details, similiarity_score}]. 
+                Put in sample_summary output property a markdown short summary of the sample using the provided description, notes, and details. 
+                Use only the provided samples to help you answer the question.        
+                Use only the informations available in the provided JSON to answer the question.
+                Make sure to use details, notes, and description that are provided in each sample are used only with that sample.
+                If there are related links or repos in the details of a sample that is included in the answer, include them in the short summary. Include links only if they are related to the sample and if they are available in the note or details of that sample.               
+                If the question cannot be answered by the provided samples, you must say that you don''t know.
+                If asked question is about topics you don''t know, answer that you don''t know.
+                If no samples are provided, say that you canno''t answer as no samples have been found.
+            '
+        ),
+        json_object(
+            'role':'assistant',
+            'content': 'The available samples are the following:'
+            ),
+        json_object(
+            'role':'assistant',
+            'content': coalesce(@source, '')
+            ),
+        json_object(
+            'role':'user',
+            'content': + @query_text
+        )
+    ),    
+    'temperature': 0.2,
+    'frequency_penalty': 0,
+    'presence_penalty': 0,    
+    'stop': null
+);
+
+declare @js nvarchar(max) = N'{
+    "type": "json_schema",
+    "json_schema": {
+        "name": "samples",
+        "strict": true,
+        "schema": {
+            "type": "object",
+            "properties": {
+                "samples": {
+                    "type": "array",
+                    "items": {
+                        "type": "object",
+                        "properties": {
+                            "result_position": {
+                                "type": "number"
+                            },
+                            "id": {
+                                "type": "number"
+                            },
+                            "sample_summary": {
+                                "type": "string"
+                            },                            
+                            "thoughts": {
+                                "type": "string"
+                            }
+                        },
+                        "required": [
+                            "id",                            
+                            "sample_summary",                            
+                            "thoughts",
+                            "result_position"
+                        ],
+                        "additionalProperties": false
+                    }
+                }
+            },
+            "required": ["samples"],
+            "additionalProperties": false
+        }        
+    }        
+}'
+
+set @p = json_modify(@p, '$.response_format', json_query(@js))
+---select @p
+
+/* Send request to LLM */
+begin try
+    exec @retval = sp_invoke_external_rest_endpoint
+        @url = '$OPENAI_URL$/openai/deployments/$OPENAI_CHAT_DEPLOYMENT_NAME$/chat/completions?api-version=2024-08-01-preview',
+        @headers = '{"Content-Type":"application/json"}',
+        @method = 'POST',
+        @credential = [$OPENAI_URL$],
+        @timeout = 120,
+        @payload = @p,
+        @response = @response output;
+end try
+begin catch
+    select 'Generator:REST' as [error], ERROR_NUMBER() as [error_code], ERROR_MESSAGE() as [error_message]
+    return -1
+end catch
+--select @response
+
+if @retval != 0 begin
+    select 'Generator:OpenAI' as [error], @retval as [error_code], @response as [response]
+    return @retval
+end
+
+declare @refusal nvarchar(max) = (select coalesce(json_value(@response, '$.result.choices[0].refusal'), ''));
+
+if @refusal != '' begin
+    select 'Generator:OpenAI/Refusal' as [error], @refusal as [refusal], @response as [response]
+    return -1
+end
+GO
diff --git a/db/sql/11-orchestrate_request.sql b/db/sql/11-orchestrate_request.sql

Original file line number	Diff line number	Diff line change
`@@ -56,6 +56,8 @@ create table dbo.semantic_cache`
`56`	`56`	`(`
`57`	`57`	`[id] int identity primary key nonclustered,`
`58`	`58`	`[query] nvarchar(max) not null,`
	`59`	`+ [action] nvarchar(max) not null,`
	`60`	`+ [samples] nvarchar(max) not null,`
`59`	`61`	`[embedding] vector(1536) not null,`
`60`	`62`	`[query_date] datetime2(0) not null,`
`61`	`63`	`[response] nvarchar(max) not null,`