66
77from azure .search .documents .agent .aio import KnowledgeAgentRetrievalClient
88from azure .search .documents .agent .models import (
9- KnowledgeAgentAzureSearchDocReference ,
10- KnowledgeAgentIndexParams ,
119 KnowledgeAgentMessage ,
1210 KnowledgeAgentMessageTextContent ,
1311 KnowledgeAgentRetrievalRequest ,
1412 KnowledgeAgentRetrievalResponse ,
15- KnowledgeAgentSearchActivityRecord ,
13+ KnowledgeAgentSearchIndexActivityRecord ,
14+ KnowledgeAgentSearchIndexReference ,
15+ SearchIndexKnowledgeSourceParams ,
1616)
1717from azure .search .documents .aio import SearchClient
1818from azure .search .documents .models import (
@@ -162,7 +162,6 @@ def __init__(
162162 openai_host : str ,
163163 prompt_manager : PromptManager ,
164164 reasoning_effort : Optional [str ] = None ,
165- hydrate_references : bool = False ,
166165 multimodal_enabled : bool = False ,
167166 image_embeddings_client : Optional [ImageEmbeddings ] = None ,
168167 global_blob_manager : Optional [BlobManager ] = None ,
@@ -180,7 +179,6 @@ def __init__(
180179 self .openai_host = openai_host
181180 self .prompt_manager = prompt_manager
182181 self .reasoning_effort = reasoning_effort
183- self .hydrate_references = hydrate_references
184182 self .include_token_usage = True
185183 self .multimodal_enabled = multimodal_enabled
186184 self .image_embeddings_client = image_embeddings_client
@@ -276,7 +274,6 @@ async def run_agentic_retrieval(
276274 top : Optional [int ] = None ,
277275 filter_add_on : Optional [str ] = None ,
278276 minimum_reranker_score : Optional [float ] = None ,
279- max_docs_for_reranker : Optional [int ] = None ,
280277 results_merge_strategy : Optional [str ] = None ,
281278 ) -> tuple [KnowledgeAgentRetrievalResponse , list [Document ]]:
282279 # STEP 1: Invoke agentic retrieval
@@ -289,13 +286,10 @@ async def run_agentic_retrieval(
289286 for msg in messages
290287 if msg ["role" ] != "system"
291288 ],
292- target_index_params = [
293- KnowledgeAgentIndexParams (
294- index_name = search_index_name ,
295- reranker_threshold = minimum_reranker_score ,
296- max_docs_for_reranker = max_docs_for_reranker ,
289+ knowledge_source_params = [
290+ SearchIndexKnowledgeSourceParams (
291+ knowledge_source_name = search_index_name ,
297292 filter_add_on = filter_add_on ,
298- include_reference_source_data = True ,
299293 )
300294 ],
301295 )
@@ -305,12 +299,12 @@ async def run_agentic_retrieval(
305299 activities = response .activity
306300 activity_mapping : dict [int , str ] = (
307301 {
308- activity .id : activity .query .search
302+ activity .id : activity .search_index_arguments .search
309303 for activity in activities
310304 if (
311- isinstance (activity , KnowledgeAgentSearchActivityRecord )
312- and activity .query
313- and activity .query .search is not None
305+ isinstance (activity , KnowledgeAgentSearchIndexActivityRecord )
306+ and activity .search_index_arguments
307+ and activity .search_index_arguments .search is not None
314308 )
315309 }
316310 if activities
@@ -322,92 +316,42 @@ async def run_agentic_retrieval(
322316 return response , []
323317
324318 # Extract references
325- refs = [r for r in response .references if isinstance (r , KnowledgeAgentAzureSearchDocReference )]
326-
319+ refs = [r for r in response .references if isinstance (r , KnowledgeAgentSearchIndexReference )]
327320 documents : list [Document ] = []
328-
329- if self .hydrate_references :
330- # Hydrate references to get full documents
331- documents = await self .hydrate_agent_references (
332- references = refs ,
333- top = top ,
334- )
335- else :
336- # Create documents from reference source data
337- for ref in refs :
338- if ref .source_data :
339- documents .append (
340- Document (
341- id = ref .doc_key ,
342- content = ref .source_data .get ("content" ),
343- sourcepage = ref .source_data .get ("sourcepage" ),
344- )
345- )
346- if top and len (documents ) >= top :
347- break
348-
349- # Build mappings for agent queries and sorting
350- ref_to_activity : dict [str , int ] = {}
351321 doc_to_ref_id : dict [str , str ] = {}
322+
323+ # Create documents from reference source data
352324 for ref in refs :
353- if ref .doc_key :
354- ref_to_activity [ref .doc_key ] = ref .activity_source
325+ if ref .source_data and ref .doc_key :
326+ # Note that ref.doc_key is the same as source_data["id"]
327+ documents .append (
328+ Document (
329+ id = ref .doc_key ,
330+ content = ref .source_data .get ("content" ),
331+ category = ref .source_data .get ("category" ),
332+ sourcepage = ref .source_data .get ("sourcepage" ),
333+ sourcefile = ref .source_data .get ("sourcefile" ),
334+ oids = ref .source_data .get ("oids" ),
335+ groups = ref .source_data .get ("groups" ),
336+ reranker_score = ref .reranker_score ,
337+ images = ref .source_data .get ("images" ),
338+ search_agent_query = activity_mapping [ref .activity_source ],
339+ )
340+ )
355341 doc_to_ref_id [ref .doc_key ] = ref .id
342+ if top and len (documents ) >= top :
343+ break
356344
357- # Inject agent search queries into all documents
358- for doc in documents :
359- if doc .id and doc .id in ref_to_activity :
360- activity_id = ref_to_activity [doc .id ]
361- doc .search_agent_query = activity_mapping .get (activity_id , "" )
345+ if minimum_reranker_score is not None :
346+ documents = [doc for doc in documents if (doc .reranker_score or 0 ) >= minimum_reranker_score ]
362347
363- # Apply sorting strategy to the documents
364- if results_merge_strategy == "interleaved" : # Use interleaved reference order
348+ if results_merge_strategy == "interleaved" :
365349 documents = sorted (
366350 documents ,
367351 key = lambda d : int (doc_to_ref_id .get (d .id , 0 )) if d .id and doc_to_ref_id .get (d .id ) else 0 ,
368352 )
369- # else: Default - preserve original order
370-
371353 return response , documents
372354
373- async def hydrate_agent_references (
374- self ,
375- references : list [KnowledgeAgentAzureSearchDocReference ],
376- top : Optional [int ],
377- ) -> list [Document ]:
378- doc_keys : set [str ] = set ()
379-
380- for ref in references :
381- if not ref .doc_key :
382- continue
383- doc_keys .add (ref .doc_key )
384- if top and len (doc_keys ) >= top :
385- break
386-
387- if not doc_keys :
388- return []
389-
390- # Build search filter only on unique doc IDs
391- id_csv = "," .join (doc_keys )
392- id_filter = f"search.in(id, '{ id_csv } ', ',')"
393-
394- # Fetch full documents
395- hydrated_docs : list [Document ] = await self .search (
396- top = len (doc_keys ),
397- query_text = None ,
398- filter = id_filter ,
399- vectors = [],
400- use_text_search = False ,
401- use_vector_search = False ,
402- use_semantic_ranker = False ,
403- use_semantic_captions = False ,
404- minimum_search_score = None ,
405- minimum_reranker_score = None ,
406- use_query_rewriting = False ,
407- )
408-
409- return hydrated_docs
410-
411355 async def get_sources_content (
412356 self ,
413357 results : list [Document ],
0 commit comments