Skip to content

Commit d6c4d3b

Browse files
committed
updated scripts to use hybrid search
1 parent d6b9663 commit d6c4d3b

File tree

9 files changed

+347
-17
lines changed

9 files changed

+347
-17
lines changed
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
exec dbo.add_sample '
2+
{
3+
"name": "Fabric Conference 2025 Samples",
4+
"description": "Demos and samples used a Fabric Conference 2025 Workshop",
5+
"notes": "the repo contains samples used both during the ''Building AI applications with SQL: Ground to Cloud to Fabric'' workshop and then ''Operational RAG Solutions with Azure SQL and Microsoft Fabric''",
6+
"url": "https://github.com/yorek/fabric-conference-2025",
7+
"details": {
8+
"author": "Davide Mauri",
9+
"languages": ["T-SQL", "C#", "Python"],
10+
"services": ["Azure SQL", "SQL Server", "Fabric SQL"],
11+
"license": "MIT",
12+
"libraries": ["Semantic Kernel", "LangChain"],
13+
"tags": ["RAG", "Vectors", "Retrieval Augmented Generation", "AI", "GenAI"],
14+
"conferences": ["Fabric Community Conference 2025", "FabCon 2025"]
15+
}
16+
}';
17+
GO

db/samples/sample-data.sql

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -398,7 +398,7 @@ exec dbo.add_sample '
398398
"NL2SQL",
399399
"Natural Language to SQL"
400400
],
401-
"conferences": ["Live 360 Orlando 2024", "SQL Conf 2025", "SQL Conference 2025"]
401+
"conferences": ["Live 360 Orlando 2024", "SQL Conf 2025", "SQL Conference 2025", "Fabric Community Conference 2025", "FabCon 2025"]
402402
}
403403
}
404404
';

db/sql/02-tables.sql

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ drop table if exists dbo.samples;
66

77
create table dbo.samples
88
(
9-
[id] int identity primary key,
9+
[id] int identity constraint pk__samples primary key,
1010
[name] nvarchar(100) not null,
1111
[description] nvarchar(max) not null,
1212
[notes] nvarchar(max) null,
@@ -63,4 +63,4 @@ create table dbo.semantic_cache
6363
[response] nvarchar(max) not null,
6464
)
6565
go
66-
create clustered index ixc__semantic_cache on dbo.semantic_cache(query_date desc)
66+
create clustered index ixc__semantic_cache on dbo.semantic_cache(query_date desc)

db/sql/02b-fulltext.sql

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
if not exists(select * from sys.fulltext_catalogs where [name] = 'FullTextCatalog')
2+
begin
3+
create fulltext catalog [FullTextCatalog] as default;
4+
end
5+
go
6+
7+
create fulltext index on dbo.samples ([description], [notes]) key index pk__samples;
8+
go
9+
10+
alter fulltext index on dbo.samples enable;
11+
go
12+
13+
select * from sys.fulltext_catalogs
14+
go

db/sql/04-find_samples.sql

Lines changed: 77 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
1-
create or alter procedure [web].[find_samples] @text nvarchar(max), @k int = null
1+
create or alter procedure [web].[find_samples] @text nvarchar(1000), @k int = null
22
as
3+
set nocount on;
34
declare @response nvarchar(max), @cached_response nvarchar(max);
45
declare @retval int;
56
declare @samples nvarchar(max)
@@ -45,8 +46,7 @@ if (@response is null) begin
4546
declare @trq nvarchar(max) = trim(replace(replace(@rq, char(13), ' '), char(10), ' '));
4647
if (@trq like '%INSERT %' or @trq like '%UPDATE %' or @trq like '%DELETE %' or @trq like '%DROP %' or @trq like '%ALTER %' or @trq like '%CREATE %') begin
4748
--select @trq
48-
set @error = json_object('error':'NL2SQL', 'error_code':-1, 'response':'Unauthorized SQL command requested')
49-
select @error as error;
49+
select 'NL2SQL' as [error], -1 as [error_code], 'Unauthorized SQL command requested' as [response]
5050
return -1
5151
end
5252
--print @rq
@@ -65,17 +65,19 @@ if (@response is null) begin
6565

6666
/* Find the samples most similar to the requested topic */
6767
if (@rt like '%SEMANTIC%') begin
68-
set @k = coalesce(@k, 50)
69-
drop table if exists #s;
68+
set @k = coalesce(@k, 50)
69+
70+
-- Semantic Search
71+
drop table if exists #ss;
7072
select top(@k)
71-
s.id, [name], [description], [url], [notes], [details],
73+
s.id,
7274
least(
7375
vector_distance('cosine', e.[embedding], @qv),
7476
vector_distance('cosine', ne.[embedding], @qv),
7577
vector_distance('cosine', de.[embedding], @qv)
76-
) as distance_score
78+
) as cosine_distance
7779
into
78-
#s
80+
#ss
7981
from
8082
dbo.samples s
8183
inner join
@@ -85,16 +87,78 @@ if (@response is null) begin
8587
left join
8688
dbo.samples_details_embeddings de on e.id = de.id
8789
order by
88-
distance_score asc;
90+
cosine_distance asc;
91+
--select * from #ss;
92+
93+
-- Fulltext Search
94+
drop table if exists #ks;
95+
select top(@k)
96+
id,
97+
ftt.[RANK] AS ft_rank
98+
into
99+
#ks
100+
from
101+
dbo.samples w
102+
inner join
103+
FREETEXTTABLE(dbo.samples, *, @text) as ftt on w.id = ftt.[KEY]
104+
order by
105+
ft_rank desc;
106+
--select * from #ks;
107+
108+
-- RRF
109+
drop table if exists #s;
110+
with semantic_search as
111+
(
112+
select top(@k)
113+
id,
114+
rank() over (order by cosine_distance) as rank
115+
from
116+
#ss
117+
),
118+
keyword_search as
119+
(
120+
select top(50)
121+
id,
122+
rank() over (order by ft_rank desc) as rank
123+
from
124+
#ks
125+
),
126+
final_rank as
127+
(
128+
select top(@k)
129+
coalesce(ss.id, ks.id) AS id,
130+
ss.rank AS semantic_rank,
131+
ks.rank AS keyword_rank,
132+
1000 * (coalesce(1.0 / (@k + ss.rank), 0.0) +
133+
coalesce(1.0 / (@k + ks.rank), 0.0)) AS similiarity_score -- Reciprocal Rank Fusion (RRF)
134+
from
135+
semantic_search ss
136+
full outer join
137+
keyword_search ks on ss.id = ks.id
138+
order by
139+
similiarity_score desc
140+
)
141+
select top(@k)
142+
s.[id], [name], [description], [notes], [details],
143+
semantic_rank,
144+
keyword_rank,
145+
[similiarity_score]
146+
into
147+
#s
148+
from
149+
dbo.samples s
150+
inner join
151+
final_rank fr on s.id = fr.id;
152+
--select * from #s where similiarity_score > 30 order by similiarity_score desc;
89153

90154
/* Prepare the JSON string with relevant results to be sent to LLM for evaluation */
91155
set @samples = (
92-
select
156+
select top(10)
93157
[id], [name], [description], [notes], [details],
94-
cast((1-distance_score)*100 as int) as similiarity_score
158+
similiarity_score
95159
from #s
96-
where distance_score < 0.85
97-
order by distance_score for json path
160+
where similiarity_score > 30
161+
order by similiarity_score desc for json path
98162
)
99163
end
100164

@@ -141,4 +205,3 @@ inner join
141205
order by
142206
sr.result_position
143207

144-
go

db/sql/10-generate_answer.sql

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,8 @@ json_object(
2828
Put in sample_summary output property a markdown short summary of the sample using the provided description, notes, and details.
2929
Use only the provided samples to help you answer the question.
3030
Use only the information available in the provided JSON to answer the question.
31+
Make sure to use the information in the details to answer the question.
32+
Return at least five samples if you can.
3133
Make sure to use details, notes, and description that are provided in each sample are used only with that sample.
3234
If there are related links or repos in the details of a sample that is included in the answer, include them in the short summary. Include links only if they are related to the sample and if they are available in the note or details of that sample.
3335
If the question cannot be answered by the provided samples, you must say that you don''t know.
Lines changed: 140 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,140 @@
1+
SET ANSI_NULLS ON
2+
GO
3+
SET QUOTED_IDENTIFIER ON
4+
GO
5+
6+
7+
create or alter procedure [web].[orchestrate_request]
8+
@text nvarchar(max),
9+
@result_type varchar(50) output,
10+
@result_query nvarchar(max) output,
11+
@error nvarchar(max) output
12+
as
13+
declare @retval int, @response nvarchar(max);
14+
15+
/*
16+
Create the prompt for the LLM
17+
*/
18+
declare @p nvarchar(max) =
19+
json_object(
20+
'messages': json_array(
21+
json_object(
22+
'role':'system',
23+
'content':'
24+
You are a SQL Server database assistant. You answer the questions providing the correct T-SQL query to get the result. The user question is provided in the next message.
25+
26+
This is the database table you can use:
27+
28+
create table dbo.samples
29+
(
30+
[id] int identity primary key,
31+
[created_on] datetime2(0) not null,
32+
[updated_on] datetime2(0) not null
33+
)
34+
35+
The use question is provided in the next message. If the user question cannot be answered using the dbo.samples table and using a T-SQL query only, you should respond with an empty string.
36+
Unless otherwise specifed by the user, return the top 10 results if you can. Never return more than 50 rows. Do not use semicolon to terminate the T-SQL statement.
37+
Only return the following columns: id int, [name] nvarchar(100), [description] nvarchar(max), notes nvarchar(max), details json, distance_score float.
38+
You can generate only SELECT statements. If the user is asking something that will generate INSERT, UPDATE, DELETE, CREATE, ALTER or DROP statement, refuse to generate the query.
39+
'
40+
),
41+
json_object(
42+
'role':'user',
43+
'content': + @text
44+
)
45+
),
46+
'temperature': 0.4,
47+
'frequency_penalty': 0,
48+
'presence_penalty': 0,
49+
'stop': null
50+
);
51+
52+
declare @js nvarchar(max) = N'{
53+
"type": "json_schema",
54+
"json_schema": {
55+
"name": "samples",
56+
"strict": true,
57+
"schema": {
58+
"type": "object",
59+
"properties": {
60+
"samples": {
61+
"type": "array",
62+
"items": {
63+
"type": "object",
64+
"properties": {
65+
"response_type": {
66+
"type": "string",
67+
"description": "SQL if a SQL query is provided, NONE if no SQL query is provided"
68+
},
69+
"sql_query": {
70+
"type": "string",
71+
"description": "SQL query to get the result"
72+
}
73+
},
74+
"required": [
75+
"response_type",
76+
"sql_query"
77+
],
78+
"additionalProperties": false
79+
}
80+
}
81+
},
82+
"required": ["samples"],
83+
"additionalProperties": false
84+
}
85+
}
86+
}'
87+
88+
set @p = json_modify(@p, '$.response_format', json_query(@js))
89+
--select @p
90+
91+
/* Send request to LLM */
92+
begin try
93+
exec @retval = sp_invoke_external_rest_endpoint
94+
@url = '$OPENAI_URL$/openai/deployments/$OPENAI_CHAT_DEPLOYMENT_NAME$/chat/completions?api-version=2024-08-01-preview',
95+
@headers = '{"Content-Type":"application/json"}',
96+
@method = 'POST',
97+
@credential = [$OPENAI_URL$],
98+
@timeout = 120,
99+
@payload = @p,
100+
@response = @response output
101+
with result sets none;
102+
end try
103+
begin catch
104+
set @error = json_object('error':'Orchestrator:REST', 'error_code':ERROR_NUMBER(), 'error_message':ERROR_MESSAGE())
105+
return -1
106+
end catch
107+
--select @response
108+
109+
if @retval != 0 begin
110+
set @error = json_object('error':'Orchestrator:OpenAI', 'error_code':@retval, 'error_message':@response)
111+
return -1
112+
end
113+
114+
declare @refusal nvarchar(max) = (select coalesce(json_value(@response, '$.result.choices[0].refusal'), ''));
115+
116+
if @refusal != '' begin
117+
set @error = json_object('error':'Orchestrator:OpenAI/Refusal', 'refusal':@refusal, 'response':@response)
118+
return -1
119+
end
120+
121+
select top(1)
122+
@result_type = sr.response_type,
123+
@result_query = sr.sql_query
124+
from
125+
openjson(@response, '$.result.choices[0].message') with (
126+
content nvarchar(max) '$.content'
127+
) m
128+
cross apply
129+
openjson(m.content, '$.samples') with (
130+
response_type varchar(10),
131+
sql_query nvarchar(max)
132+
) as sr
133+
134+
if (@result_type = 'NONE') begin
135+
set @result_type = 'SEMANTIC'
136+
set @result_query = @text
137+
end
138+
139+
return 0
140+
GO

0 commit comments

Comments
 (0)