Merge ChatReadRetrieveReadApproach into Approach class

pamelafox · pamelafox · commit c57f62fdf392 · 2025-12-05T22:21:16.000-08:00
Since the Ask tab was removed, there is now only one approach implementation.
This refactor merges ChatReadRetrieveReadApproach into the base Approach class,
simplifying the codebase by removing the unnecessary inheritance hierarchy.

Changes:
- Merge all ChatReadRetrieveReadApproach code into approach.py
- Remove ABC base class since there's only one implementation
- Delete chatreadretrieveread.py
- Update app.py to use Approach directly
- Update tests to use new import paths
- Update documentation to reflect simplified architecture
diff --git a/AGENTS.md b/AGENTS.md
@@ -10,8 +10,7 @@ If necessary, edit this file to ensure it accurately reflects the current state
 * app: Contains the main application code, including frontend and backend.
   * app/backend: Contains the Python backend code, written with Quart framework.
     * app/backend/approaches: Contains the different approaches
-      * app/backend/approaches/approach.py: Base class for all approaches
-      * app/backend/approaches/chatreadretrieveread.py: Chat approach, includes query rewriting step first
+      * app/backend/approaches/approach.py: Main RAG approach class with query rewriting and retrieval
       * app/backend/approaches/prompts/chat_query_rewrite.prompty: Prompt used to rewrite the query based off search history into a better search query
       * app/backend/approaches/prompts/chat_query_rewrite_tools.json: Tools used by the query rewriting prompt
       * app/backend/approaches/prompts/chat_answer_question.prompty: Prompt used by the Chat approach to actually answer the question based off sources
@@ -86,7 +85,7 @@ When adding a new developer setting, update:
   * app/frontend/src/pages/chat/Chat.tsx: Add the setting to the component, pass it to Settings
 
 * backend:
-  * app/backend/approaches/chatreadretrieveread.py :  Retrieve from overrides parameter
+  * app/backend/approaches/approach.py :  Retrieve from overrides parameter
   * app/backend/app.py: Some settings may need to be sent down in the /config route.
 
 ## When adding tests for a new feature
diff --git a/app/backend/app.py b/app/backend/app.py
@@ -45,7 +45,6 @@
 from quart_cors import cors
 
 from approaches.approach import Approach, DataPoints
-from approaches.chatreadretrieveread import ChatReadRetrieveReadApproach
 from approaches.promptmanager import PromptyManager
 from chat_history.cosmosdb import chat_history_cosmosdb_bp
 from config import (
@@ -702,8 +701,8 @@ async def setup_clients():
 
     prompt_manager = PromptyManager()
 
-    # ChatReadRetrieveReadApproach is used by /chat for multi-turn conversation
-    current_app.config[CONFIG_CHAT_APPROACH] = ChatReadRetrieveReadApproach(
+    # Approach is used by /chat for multi-turn conversation
+    current_app.config[CONFIG_CHAT_APPROACH] = Approach(
         search_client=search_client,
         search_index_name=AZURE_SEARCH_INDEX,
         knowledgebase_model=AZURE_OPENAI_KNOWLEDGEBASE_MODEL,
diff --git a/app/backend/approaches/approach.py b/app/backend/approaches/approach.py
diff --git a/app/backend/approaches/chatreadretrieveread.py b/app/backend/approaches/chatreadretrieveread.py
diff --git a/docs/architecture.md b/docs/architecture.md
@@ -1,6 +1,6 @@
-# RAG Chat: Application Architecture
+# RAG Chat: Application architecture
 
-This document provides a detailed architectural overview of this application, a Retrieval Augmented Generation (RAG) application that creates a ChatGPT-like experience over your own documents. It combines Azure OpenAI Service for AI capabilities with Azure AI Search for document indexing and retrieval.
+This document provides a detailed architectural overview of this application, a Retrieval Augmented Generation (RAG) application that creates a ChatGPT-like experience over your own documents. It combines Azure OpenAI Service for LLM calls with Azure AI Search for document indexing and retrieval.
 
 For getting started with the application, see the main [README](../README.md).
 
@@ -21,20 +21,16 @@ graph TB
         end
 
         subgraph "Backend"
-            API[🐍 Python API<br/>Flask/Quart<br/>Chat Endpoints<br/>Document Upload<br/>Authentication]
-
-            subgraph "Approaches"
-                CRR[ChatReadRetrieveRead<br/>Approach]
-            end
+            API[🐍 Python API<br/>Quart<br/>Chat Endpoints<br/>Document Upload<br/>Authentication<br/>RAG Approach]
         end
     end
 
     subgraph "Azure Services"
         subgraph "AI Services"
-            OpenAI[🤖 Azure OpenAI<br/>GPT-4 Mini<br/>Text Embeddings<br/>GPT-4 Vision]
+            OpenAI[🤖 Azure OpenAI<br/>GPT-4.1 Mini<br/>Text Embeddings]
             Search[🔍 Azure AI Search<br/>Vector Search<br/>Semantic Ranking<br/>Full-text Search]
             DocIntel[📄 Azure Document<br/>Intelligence<br/>Text Extraction<br/>Layout Analysis]
-            Vision2[👁️ Azure AI Vision<br/>optional]
+            Vision[👁️ Azure AI Vision<br/>optional]
             Speech[🎤 Azure Speech<br/>Services optional]
         end
 
@@ -46,7 +42,6 @@ graph TB
         subgraph "Platform Services"
             ContainerApps[📦 Azure Container Apps<br/>or App Service<br/>Application Hosting]
             AppInsights[📊 Application Insights<br/>Monitoring<br/>Telemetry]
-            KeyVault[🔐 Azure Key Vault<br/>Secrets Management]
         end
     end
 
@@ -59,9 +54,6 @@ graph TB
     Browser <--> React
     React <--> API
 
-    %% Backend Processing
-    API --> CRR
-
     %% Azure Service Connections
     API <--> OpenAI
     API <--> Search
@@ -78,7 +70,6 @@ graph TB
     %% Platform Integration
     ContainerApps --> API
     API --> AppInsights
-    API --> KeyVault
 
     %% Styling
     classDef userLayer fill:#e1f5fe
@@ -89,10 +80,10 @@ graph TB
     classDef processing fill:#f1f8e9
 
     class User,Browser userLayer
-    class React,API,CRR appLayer
-    class OpenAI,Search,DocIntel,Vision2,Speech azureAI
+    class React,API appLayer
+    class OpenAI,Search,DocIntel,Vision,Speech azureAI
     class Blob,Cosmos azureStorage
-    class ContainerApps,AppInsights,KeyVault azurePlatform
+    class ContainerApps,AppInsights azurePlatform
     class PrepDocs processing
 ```
 
@@ -148,16 +139,15 @@ sequenceDiagram
 
 ### Frontend (React/TypeScript)
 
-- **Chat Interface**: Main conversational UI
-- **Settings Panel**: Configuration options for AI behavior
-- **Citation Display**: Shows sources and references
+- **Chat interface**: Main conversational UI
+- **Settings panel**: Configuration options for AI behavior
+- **Citation display**: Shows sources and references
 - **Authentication**: Optional user login integration
 
 ### Backend (Python)
 
 - **API Layer**: RESTful endpoints for chat, search, and configuration. See [HTTP Protocol](http_protocol.md) for detailed API documentation.
-- **Approach Patterns**: Different strategies for processing queries
-  - `ChatReadRetrieveRead`: Multi-turn conversation with retrieval
+- **RAG approach**: Multi-turn conversation with retrieval
 - **Authentication**: Optional integration with Azure Active Directory
 
 ### Azure Services Integration
@@ -171,11 +161,14 @@ sequenceDiagram
 
 The architecture supports several optional features that can be enabled. For detailed configuration instructions, see the [optional features guide](deploy_features.md):
 
-- **GPT-4 with Vision**: Process image-heavy documents
-- **Speech Services**: Voice input/output capabilities
-- **Chat History**: Persistent conversation storage in Cosmos DB
-- **Authentication**: User login and access control
-- **Private Endpoints**: Network isolation for enhanced security
+- **Multimodal embeddings and answering**: Use image embeddings for searching and images when answering
+- **Reasoning models**: Use reasoning models like o3/o4-mini for more thoughtful responses
+- **Agentic retrieval**: Use agentic retrieval in place of the Search API
+- **Speech input/output**: Voice input via browser API, voice output via Azure Speech Services
+- **Chat history**: Browser-based (IndexedDB) or persistent storage in Cosmos DB
+- **Authentication**: User login and document-level access control
+- **User document upload**: Allow users to upload and chat with their own documents
+- **Private endpoints**: Network isolation for enhanced security
 
 ## Deployment Options
 
diff --git a/docs/customization.md b/docs/customization.md
@@ -32,7 +32,7 @@ The backend is built using [Quart](https://quart.palletsprojects.com/), a Python
 
 Typically, the primary backend code you'll want to customize is the `app/backend/approaches` folder, which contains the code and prompts powering the RAG flow.
 
-The RAG flow is implemented in [chatreadretrieveread.py](https://github.com/Azure-Samples/azure-search-openai-demo/blob/main/app/backend/approaches/chatreadretrieveread.py).
+The RAG flow is implemented in [approach.py](https://github.com/Azure-Samples/azure-search-openai-demo/blob/main/app/backend/approaches/approach.py).
 
 1. **Query rewriting**: It calls the OpenAI ChatCompletion API to turn the user question into a good search query, using the prompt and tools from [chat_query_rewrite.prompty](https://github.com/Azure-Samples/azure-search-openai-demo/blob/main/app/backend/approaches/prompts/chat_query_rewrite.prompty).
 2. **Search**: It queries Azure AI Search for search results for that query (optionally using the vector embeddings for that query).
diff --git a/tests/conftest.py b/tests/conftest.py
@@ -31,7 +31,7 @@
 
 import app
 import core
-from approaches.chatreadretrieveread import ChatReadRetrieveReadApproach
+from approaches.approach import Approach
 from approaches.promptmanager import PromptyManager
 from core.authentication import AuthenticationHelper
 from prepdocslib.blobmanager import AdlsBlobManager, BlobManager
@@ -1125,7 +1125,7 @@ def mock_user_directory_client(monkeypatch):
 
 @pytest.fixture
 def chat_approach():
-    return ChatReadRetrieveReadApproach(
+    return Approach(
         search_client=SearchClient(endpoint="", index_name="", credential=AzureKeyCredential("")),
         search_index_name=None,
         knowledgebase_model=None,
diff --git a/tests/test_app.py b/tests/test_app.py
@@ -205,7 +205,7 @@ async def test_auth_setup_returns_payload(client):
 @pytest.mark.asyncio
 async def test_chat_handle_exception(client, monkeypatch, snapshot, caplog):
     monkeypatch.setattr(
-        "approaches.chatreadretrieveread.ChatReadRetrieveReadApproach.run",
+        "approaches.approach.Approach.run",
         mock.Mock(side_effect=ZeroDivisionError("something bad happened")),
     )
 
@@ -222,7 +222,7 @@ async def test_chat_handle_exception(client, monkeypatch, snapshot, caplog):
 @pytest.mark.asyncio
 async def test_chat_stream_handle_exception(client, monkeypatch, snapshot, caplog):
     monkeypatch.setattr(
-        "approaches.chatreadretrieveread.ChatReadRetrieveReadApproach.run_stream",
+        "approaches.approach.Approach.run_stream",
         mock.Mock(side_effect=ZeroDivisionError("something bad happened")),
     )
 
@@ -239,7 +239,7 @@ async def test_chat_stream_handle_exception(client, monkeypatch, snapshot, caplo
 @pytest.mark.asyncio
 async def test_chat_handle_exception_contentsafety(client, monkeypatch, snapshot, caplog):
     monkeypatch.setattr(
-        "approaches.chatreadretrieveread.ChatReadRetrieveReadApproach.run",
+        "approaches.approach.Approach.run",
         mock.Mock(side_effect=filtered_response),
     )
 
diff --git a/tests/test_chatapproach.py b/tests/test_chatapproach.py
@@ -8,14 +8,14 @@
 
 from approaches.approach import (
     ActivityDetail,
+    Approach,
     DataPoints,
     Document,
     ExtraInfo,
     SharePointResult,
     ThoughtStep,
     WebResult,
 )
-from approaches.chatreadretrieveread import ChatReadRetrieveReadApproach
 from approaches.promptmanager import PromptyManager
 from prepdocslib.embeddings import ImageEmbeddings
 
@@ -149,7 +149,9 @@ def test_extract_rewritten_query_invalid_json(chat_approach):
     }
     completion = ChatCompletion.model_validate(payload, strict=False)
 
-    result = chat_approach.extract_rewritten_query(completion, "original", no_response_token=chat_approach.NO_RESPONSE)
+    result = chat_approach.extract_rewritten_query(
+        completion, "original", no_response_token=chat_approach.QUERY_REWRITE_NO_RESPONSE
+    )
 
     assert result == "fallback query"
 
@@ -281,7 +283,7 @@ async def mock_create_embedding_for_text(self, q: str):
 async def test_compute_multimodal_embedding_no_client():
     """Test that compute_multimodal_embedding raises ValueError when image_embeddings_client is not set."""
     # Create a chat approach without an image_embeddings_client
-    chat_approach = ChatReadRetrieveReadApproach(
+    chat_approach = Approach(
         search_client=SearchClient(endpoint="", index_name="", credential=AzureKeyCredential("")),
         search_index_name=None,
         knowledgebase_model=None,