diff --git a/servers/fai-lambda/fai-chat/src/prompts/system.py b/servers/fai-lambda/fai-chat/src/prompts/system.py
index 7a28ff6600..21b1caef63 100644
--- a/servers/fai-lambda/fai-chat/src/prompts/system.py
+++ b/servers/fai-lambda/fai-chat/src/prompts/system.py
@@ -26,6 +26,15 @@
Tool calls should only be made when the provided documents clearly lack the necessary information. Always attempt to answer from the documents first before invoking any tools.
+
+The initial documents below are the top 5 results from a hybrid search that combines vector search over small content chunks and BM25 search over document titles, keywords, and chunks, merged with reciprocal rank fusion (RRF) using the user's query. Vector hits return the full document for the matched chunk. Documents are ordered from most to least relevant; higher scores indicate stronger relevance. Use this ordering and the scores to prioritize evidence.
+Documents are provided in XML under and come from the domain's documentation, related websites, or relevant code examples in the knowledge base. Each includes: index, title, score, url (if available), product (if available), source (if provided), document_id (when available), and content in a CDATA section. Use these fields to understand provenance and relevance before answering.
+
+
+
+Use the documentationSearch tool when the provided documents do not fully answer the question. The tool runs the same hybrid search described above against the knowledge base and returns additional documents with titles and URLs when available. Craft concise, specific queries that capture the missing details you need. Make at most 2 calls and incorporate any new results with citations.
+
+
Write a well-formatted answer that is clear, structured, and optimized for readability using Markdown headers, lists, and text. Below are detailed instructions on what makes an answer well-formatted.
@@ -108,22 +117,35 @@
def format_retrieved_docs(docs: list[RetrievedDocument], domain: str) -> str:
if not docs:
- return "No relevant documentation was found."
+ return f''
+
+ def to_cdata(value: str) -> str:
+ return f"', ']]]]>')}]]>"
- formatted_docs = []
+ lines = [f'']
for i, doc in enumerate(docs, 1):
title = doc.metadata.get("title", "Untitled") if doc.metadata else "Untitled"
url = doc.metadata.get("url", "") if doc.metadata else ""
- content = doc.content
-
- doc_section = f"## Document {i}: {title}"
+ product = doc.metadata.get("product", "") if doc.metadata else ""
+ source = doc.metadata.get("source", "") if doc.metadata else ""
+ score = f"{doc.score:.3f}"
+ document_id = doc.document_id or ""
+ lines.append(f' ')
+ lines.append(f" {to_cdata(title)}")
+ lines.append(f" {score}")
if url:
- doc_section += f"\nURL: {url}"
- doc_section += f"\n\n{content}"
-
- formatted_docs.append(doc_section)
-
- return "\n\n---\n\n".join(formatted_docs)
+ lines.append(f" {to_cdata(url)}")
+ if product:
+ lines.append(f" {to_cdata(product)}")
+ if source:
+ lines.append(f" {to_cdata(source)}")
+ lines.append(" ")
+ lines.append(f" {to_cdata(doc.content)}")
+ lines.append(" ")
+ lines.append(" ")
+ lines.append("")
+
+ return "\n".join(lines)
def build_messages(
diff --git a/servers/fai-lambda/fai-chat/src/retrieval/turbopuffer_retriever.py b/servers/fai-lambda/fai-chat/src/retrieval/turbopuffer_retriever.py
index e41298655d..5d92990a39 100644
--- a/servers/fai-lambda/fai-chat/src/retrieval/turbopuffer_retriever.py
+++ b/servers/fai-lambda/fai-chat/src/retrieval/turbopuffer_retriever.py
@@ -22,7 +22,7 @@
build_turbopuffer_filters,
)
-TURBOPUFFER_INCLUDE_ATTRIBUTES = ["document", "title", "url", "id"]
+TURBOPUFFER_INCLUDE_ATTRIBUTES = ["document", "title", "url", "id", "product", "version"]
class TurbopufferRetriever(RAGRetriever):
@@ -196,13 +196,13 @@ async def _hybrid_retrieve(self, query: RetrievalQuery) -> tuple[list[RetrievedD
multiquery_response = await tpuf_ns.multi_query(
queries=[
{
- "top_k": query.top_k * 2,
+ "top_k": query.top_k * 5,
"include_attributes": TURBOPUFFER_INCLUDE_ATTRIBUTES,
"rank_by": ("vector", "ANN", embedding),
"filters": filters,
},
{
- "top_k": query.top_k * 2,
+ "top_k": query.top_k * 5,
"include_attributes": TURBOPUFFER_INCLUDE_ATTRIBUTES,
"rank_by": (
"Sum",
diff --git a/servers/fai-lambda/fai-chat/src/routes/chat.py b/servers/fai-lambda/fai-chat/src/routes/chat.py
index cf228a83eb..55687e70e5 100644
--- a/servers/fai-lambda/fai-chat/src/routes/chat.py
+++ b/servers/fai-lambda/fai-chat/src/routes/chat.py
@@ -111,7 +111,7 @@ async def chat(
retrieval_query = RetrievalQuery(
query=user_query,
domain=domain,
- top_k=5,
+ top_k=3,
strategy=RetrievalStrategy.HYBRID,
filters=query_filters,
)
diff --git a/servers/fai-lambda/fai-chat/tests/test_auth.py b/servers/fai-lambda/fai-chat/tests/test_auth.py
index 6682779cfc..46f8b65a8f 100644
--- a/servers/fai-lambda/fai-chat/tests/test_auth.py
+++ b/servers/fai-lambda/fai-chat/tests/test_auth.py
@@ -2,31 +2,31 @@
class TestRoleExplosion:
- def test_empty_roles(self):
+ def test_empty_roles(self) -> None:
result = create_exploded_roles([])
assert result == []
- def test_single_role(self):
+ def test_single_role(self) -> None:
result = create_exploded_roles(["admin"])
assert result == ["admin"]
- def test_two_roles(self):
+ def test_two_roles(self) -> None:
result = create_exploded_roles(["admin", "user"])
assert set(result) == {"admin", "user", "admin&user"}
- def test_three_roles(self):
+ def test_three_roles(self) -> None:
result = create_exploded_roles(["a", "b", "c"])
assert set(result) == {"a", "b", "c", "a&b", "a&c", "b&c", "a&b&c"}
- def test_duplicate_roles_removed(self):
+ def test_duplicate_roles_removed(self) -> None:
result = create_exploded_roles(["admin", "admin", "user"])
assert set(result) == {"admin", "user", "admin&user"}
- def test_roles_are_sorted_in_combinations(self):
+ def test_roles_are_sorted_in_combinations(self) -> None:
result = create_exploded_roles(["z", "a"])
assert "a&z" in result
- def test_four_roles(self):
+ def test_four_roles(self) -> None:
result = create_exploded_roles(["a", "b", "c", "d"])
assert len(result) == 15
assert "a&b&c&d" in result