fern-api · tsbhangu · Nov 23, 2025 · Nov 23, 2025 · Nov 23, 2025 · Nov 23, 2025
diff --git a/servers/fai-lambda/fai-chat/src/prompts/system.py b/servers/fai-lambda/fai-chat/src/prompts/system.py
@@ -26,6 +26,15 @@
 Tool calls should only be made when the provided documents clearly lack the necessary information. Always attempt to answer from the documents first before invoking any tools.
 </core_principles>
 
+<retrieval_context>
+The initial documents below are the top 5 results from a hybrid search that combines vector search over small content chunks and BM25 search over document titles, keywords, and chunks, merged with reciprocal rank fusion (RRF) using the user's query. Vector hits return the full document for the matched chunk. Documents are ordered from most to least relevant; higher scores indicate stronger relevance. Use this ordering and the scores to prioritize evidence.
+Documents are provided in XML under <retrieved_documents> and come from the domain's documentation, related websites, or relevant code examples in the knowledge base. Each <document> includes: index, title, score, url (if available), product (if available), source (if provided), document_id (when available), and content in a CDATA section. Use these fields to understand provenance and relevance before answering.
+</retrieval_context>
+
+<tool_usage>
+Use the documentationSearch tool when the provided documents do not fully answer the question. The tool runs the same hybrid search described above against the knowledge base and returns additional documents with titles and URLs when available. Craft concise, specific queries that capture the missing details you need. Make at most 2 calls and incorporate any new results with citations.
+</tool_usage>
+
 <format_rules>
 Write a well-formatted answer that is clear, structured, and optimized for readability using Markdown headers, lists, and text. Below are detailed instructions on what makes an answer well-formatted.
 
@@ -108,22 +117,35 @@
 
 def format_retrieved_docs(docs: list[RetrievedDocument], domain: str) -> str:
     if not docs:
-        return "No relevant documentation was found."
+        return f'<retrieved_documents domain="{domain}" empty="true" />'
+
+    def to_cdata(value: str) -> str:
+        return f"<![CDATA[{value.replace(']]>', ']]]]><![CDATA[>')}]]>"
 
-    formatted_docs = []
+    lines = [f'<retrieved_documents domain="{domain}">']
     for i, doc in enumerate(docs, 1):
         title = doc.metadata.get("title", "Untitled") if doc.metadata else "Untitled"
         url = doc.metadata.get("url", "") if doc.metadata else ""
-        content = doc.content
-
-        doc_section = f"## Document {i}: {title}"
+        product = doc.metadata.get("product", "") if doc.metadata else ""
+        source = doc.metadata.get("source", "") if doc.metadata else ""
+        score = f"{doc.score:.3f}"
+        document_id = doc.document_id or ""
+        lines.append(f'  <document index="{i}" document_id="{document_id}">')
+        lines.append(f"    <title>{to_cdata(title)}</title>")
+        lines.append(f"    <score>{score}</score>")
         if url:
-            doc_section += f"\nURL: {url}"
-        doc_section += f"\n\n{content}"
-
-        formatted_docs.append(doc_section)
-
-    return "\n\n---\n\n".join(formatted_docs)
+            lines.append(f"    <url>{to_cdata(url)}</url>")
+        if product:
+            lines.append(f"    <product>{to_cdata(product)}</product>")
+        if source:
+            lines.append(f"    <source>{to_cdata(source)}</source>")
+        lines.append("    <content>")
+        lines.append(f"      {to_cdata(doc.content)}")
+        lines.append("    </content>")
+        lines.append("  </document>")
+    lines.append("</retrieved_documents>")
+
+    return "\n".join(lines)
 
 
 def build_messages(

diff --git a/servers/fai-lambda/fai-chat/src/retrieval/turbopuffer_retriever.py b/servers/fai-lambda/fai-chat/src/retrieval/turbopuffer_retriever.py
@@ -22,7 +22,7 @@
     build_turbopuffer_filters,
 )
 
-TURBOPUFFER_INCLUDE_ATTRIBUTES = ["document", "title", "url", "id"]
+TURBOPUFFER_INCLUDE_ATTRIBUTES = ["document", "title", "url", "id", "product", "version"]
 
 
 class TurbopufferRetriever(RAGRetriever):
@@ -196,13 +196,13 @@ async def _hybrid_retrieve(self, query: RetrievalQuery) -> tuple[list[RetrievedD
         multiquery_response = await tpuf_ns.multi_query(
             queries=[
                 {
-                    "top_k": query.top_k * 2,
+                    "top_k": query.top_k * 5,
                     "include_attributes": TURBOPUFFER_INCLUDE_ATTRIBUTES,
                     "rank_by": ("vector", "ANN", embedding),
                     "filters": filters,
                 },
                 {
-                    "top_k": query.top_k * 2,
+                    "top_k": query.top_k * 5,
                     "include_attributes": TURBOPUFFER_INCLUDE_ATTRIBUTES,
                     "rank_by": (
                         "Sum",

diff --git a/servers/fai-lambda/fai-chat/src/routes/chat.py b/servers/fai-lambda/fai-chat/src/routes/chat.py
@@ -101,7 +101,7 @@ async def chat(
         retrieval_query = RetrievalQuery(
             query=user_query,
             domain=domain,
-            top_k=5,
+            top_k=3,
             strategy=RetrievalStrategy.HYBRID,
             filters=query_filters,
         )

diff --git a/servers/fai-lambda/fai-chat/tests/test_auth.py b/servers/fai-lambda/fai-chat/tests/test_auth.py
@@ -1,34 +1,32 @@
-import pytest
-
 from src.auth.roles import create_exploded_roles
 
 
 class TestRoleExplosion:
-    def test_empty_roles(self):
+    def test_empty_roles(self) -> None:
         result = create_exploded_roles([])
         assert result == []
 
-    def test_single_role(self):
+    def test_single_role(self) -> None:
         result = create_exploded_roles(["admin"])
         assert result == ["admin"]
 
-    def test_two_roles(self):
+    def test_two_roles(self) -> None:
         result = create_exploded_roles(["admin", "user"])
         assert set(result) == {"admin", "user", "admin&user"}
 
-    def test_three_roles(self):
+    def test_three_roles(self) -> None:
         result = create_exploded_roles(["a", "b", "c"])
         assert set(result) == {"a", "b", "c", "a&b", "a&c", "b&c", "a&b&c"}
 
-    def test_duplicate_roles_removed(self):
+    def test_duplicate_roles_removed(self) -> None:
         result = create_exploded_roles(["admin", "admin", "user"])
         assert set(result) == {"admin", "user", "admin&user"}
 
-    def test_roles_are_sorted_in_combinations(self):
+    def test_roles_are_sorted_in_combinations(self) -> None:
         result = create_exploded_roles(["z", "a"])
         assert "a&z" in result
 
-    def test_four_roles(self):
+    def test_four_roles(self) -> None:
         result = create_exploded_roles(["a", "b", "c", "d"])
         assert len(result) == 15
         assert "a&b&c&d" in result