Skip to content
Open
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
44 changes: 33 additions & 11 deletions servers/fai-lambda/fai-chat/src/prompts/system.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,15 @@
Tool calls should only be made when the provided documents clearly lack the necessary information. Always attempt to answer from the documents first before invoking any tools.
</core_principles>

<retrieval_context>
The initial documents below are the top 5 results from a hybrid search that combines vector search over small content chunks and BM25 search over document titles, keywords, and chunks, merged with reciprocal rank fusion (RRF) using the user's query. Vector hits return the full document for the matched chunk. Documents are ordered from most to least relevant; higher scores indicate stronger relevance. Use this ordering and the scores to prioritize evidence.
Documents are provided in XML under <retrieved_documents> and come from the domain's documentation, related websites, or relevant code examples in the knowledge base. Each <document> includes: index, title, score, url (if available), product (if available), source (if provided), document_id (when available), and content in a CDATA section. Use these fields to understand provenance and relevance before answering.
</retrieval_context>

<tool_usage>
Use the documentationSearch tool when the provided documents do not fully answer the question. The tool runs the same hybrid search described above against the knowledge base and returns additional documents with titles and URLs when available. Craft concise, specific queries that capture the missing details you need. Make at most 2 calls and incorporate any new results with citations.
</tool_usage>

<format_rules>
Write a well-formatted answer that is clear, structured, and optimized for readability using Markdown headers, lists, and text. Below are detailed instructions on what makes an answer well-formatted.

Expand Down Expand Up @@ -108,22 +117,35 @@

def format_retrieved_docs(docs: list[RetrievedDocument], domain: str) -> str:
if not docs:
return "No relevant documentation was found."
return f'<retrieved_documents domain="{domain}" empty="true" />'

def to_cdata(value: str) -> str:
return f"<![CDATA[{value.replace(']]>', ']]]]><![CDATA[>')}]]>"

formatted_docs = []
lines = [f'<retrieved_documents domain="{domain}">']
for i, doc in enumerate(docs, 1):
title = doc.metadata.get("title", "Untitled") if doc.metadata else "Untitled"
url = doc.metadata.get("url", "") if doc.metadata else ""
content = doc.content

doc_section = f"## Document {i}: {title}"
product = doc.metadata.get("product", "") if doc.metadata else ""
source = doc.metadata.get("source", "") if doc.metadata else ""
score = f"{doc.score:.3f}"
document_id = doc.document_id or ""
lines.append(f' <document index="{i}" document_id="{document_id}">')
lines.append(f" <title>{to_cdata(title)}</title>")
lines.append(f" <score>{score}</score>")
if url:
doc_section += f"\nURL: {url}"
doc_section += f"\n\n{content}"

formatted_docs.append(doc_section)

return "\n\n---\n\n".join(formatted_docs)
lines.append(f" <url>{to_cdata(url)}</url>")
if product:
lines.append(f" <product>{to_cdata(product)}</product>")
if source:
lines.append(f" <source>{to_cdata(source)}</source>")
lines.append(" <content>")
lines.append(f" {to_cdata(doc.content)}")
lines.append(" </content>")
lines.append(" </document>")
lines.append("</retrieved_documents>")

return "\n".join(lines)


def build_messages(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
build_turbopuffer_filters,
)

TURBOPUFFER_INCLUDE_ATTRIBUTES = ["document", "title", "url", "id"]
TURBOPUFFER_INCLUDE_ATTRIBUTES = ["document", "title", "url", "id", "product", "version"]


class TurbopufferRetriever(RAGRetriever):
Expand Down Expand Up @@ -196,13 +196,13 @@ async def _hybrid_retrieve(self, query: RetrievalQuery) -> tuple[list[RetrievedD
multiquery_response = await tpuf_ns.multi_query(
queries=[
{
"top_k": query.top_k * 2,
"top_k": query.top_k * 5,
"include_attributes": TURBOPUFFER_INCLUDE_ATTRIBUTES,
"rank_by": ("vector", "ANN", embedding),
"filters": filters,
},
{
"top_k": query.top_k * 2,
"top_k": query.top_k * 5,
"include_attributes": TURBOPUFFER_INCLUDE_ATTRIBUTES,
"rank_by": (
"Sum",
Expand Down
2 changes: 1 addition & 1 deletion servers/fai-lambda/fai-chat/src/routes/chat.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@ async def chat(
retrieval_query = RetrievalQuery(
query=user_query,
domain=domain,
top_k=5,
top_k=3,
strategy=RetrievalStrategy.HYBRID,
filters=query_filters,
)
Expand Down
16 changes: 7 additions & 9 deletions servers/fai-lambda/fai-chat/tests/test_auth.py
Original file line number Diff line number Diff line change
@@ -1,34 +1,32 @@
import pytest

from src.auth.roles import create_exploded_roles


class TestRoleExplosion:
def test_empty_roles(self):
def test_empty_roles(self) -> None:
result = create_exploded_roles([])
assert result == []

def test_single_role(self):
def test_single_role(self) -> None:
result = create_exploded_roles(["admin"])
assert result == ["admin"]

def test_two_roles(self):
def test_two_roles(self) -> None:
result = create_exploded_roles(["admin", "user"])
assert set(result) == {"admin", "user", "admin&user"}

def test_three_roles(self):
def test_three_roles(self) -> None:
result = create_exploded_roles(["a", "b", "c"])
assert set(result) == {"a", "b", "c", "a&b", "a&c", "b&c", "a&b&c"}

def test_duplicate_roles_removed(self):
def test_duplicate_roles_removed(self) -> None:
result = create_exploded_roles(["admin", "admin", "user"])
assert set(result) == {"admin", "user", "admin&user"}

def test_roles_are_sorted_in_combinations(self):
def test_roles_are_sorted_in_combinations(self) -> None:
result = create_exploded_roles(["z", "a"])
assert "a&z" in result

def test_four_roles(self):
def test_four_roles(self) -> None:
result = create_exploded_roles(["a", "b", "c", "d"])
assert len(result) == 15
assert "a&b&c&d" in result
Loading