From 0294b13a80cf31df128c743e8e0ab77e1332a830 Mon Sep 17 00:00:00 2001
From: Isaac Miller <isaacbmiller@gmail.com>
Date: Wed, 3 Dec 2025 16:35:22 -0500
Subject: [PATCH 1/2] Initial refactor. To Clean

---
 dspy/adapters/base.py               |  76 +++++++--
 dspy/adapters/types/history.py      | 134 +++++++++++++--
 dspy/predict/react.py               | 207 +++++++++++++++++-----
 dspy/utils/inspect_history.py       | 100 +++++++----
 scripts/test_coding_agent.py        |  18 ++
 tests/adapters/test_baml_adapter.py |   3 +-
 tests/adapters/test_chat_adapter.py | 200 +++++++++++++++++++++-
 tests/adapters/test_json_adapter.py |   3 +-
 tests/predict/test_react.py         | 255 ++++++++++++++++++----------
 9 files changed, 799 insertions(+), 197 deletions(-)
 create mode 100644 scripts/test_coding_agent.py
diff --git a/dspy/adapters/base.py b/dspy/adapters/base.py
index 8696697d3a..9ac3146671 100644
--- a/dspy/adapters/base.py
+++ b/dspy/adapters/base.py
@@ -9,6 +9,7 @@
 from dspy.adapters.types.reasoning import Reasoning
 from dspy.adapters.types.tool import Tool, ToolCalls
 from dspy.experimental import Citations
+from dspy.signatures.field import InputField, OutputField
 from dspy.signatures.signature import Signature
 from dspy.utils.callback import BaseCallback, with_callbacks
 
@@ -474,6 +475,23 @@ def _get_tool_call_output_field_name(self, signature: type[Signature]) -> bool:
                 return name
         return None
 
+    def _serialize_kv_value(self, v: Any) -> Any:
+        """Safely serialize values for kv-mode formatting."""
+        if isinstance(v, (str, int, float, bool)) or v is None:
+            return v
+        try:
+            return str(v)
+        except Exception:
+            return f"<unserializable {type(v).__name__}>"
+
+    def _make_dynamic_signature_for_inputs(self, keys: list[str]) -> type[Signature]:
+        """Create a dynamic signature with input fields only (no instructions)."""
+        return Signature({k: InputField() for k in keys}, instructions="")
+
+    def _make_dynamic_signature_for_outputs(self, keys: list[str]) -> type[Signature]:
+        """Create a dynamic signature with output fields only (no instructions)."""
+        return Signature({k: OutputField() for k in keys}, instructions="")
+
     def format_conversation_history(
         self,
         signature: type[Signature],
@@ -483,6 +501,11 @@ def format_conversation_history(
         """Format the conversation history.
 
         This method formats the conversation history and the current input as multiturn messages.
+        Supports four modes:
+        - signature: Dict keys match signature input/output fields → user/assistant pairs
+        - kv: Nested {"input_fields": {...}, "output_fields": {...}} → user/assistant pairs
+        - dict: Arbitrary serializable kv pairs → all in single user message (default)
+        - raw: Direct LM messages with {"role": "user", "content": "..."} → passed through
 
         Args:
             signature: The DSPy signature for which to format the conversation history.
@@ -492,25 +515,50 @@ def format_conversation_history(
         Returns:
             A list of multiturn messages.
         """
-        conversation_history = inputs[history_field_name].messages if history_field_name in inputs else None
-
-        if conversation_history is None:
+        history = inputs.get(history_field_name)
+        if history is None:
             return []
 
         messages = []
-        for message in conversation_history:
-            messages.append(
-                {
+        for msg in history.messages:
+            mode = history._detect_mode(msg)
+
+            if mode == "raw":
+                messages.append(dict(msg))
+
+            elif mode == "kv":
+                if "input_fields" in msg:
+                    input_dict = {k: self._serialize_kv_value(v) for k, v in msg["input_fields"].items()}
+                    sig = self._make_dynamic_signature_for_inputs(list(input_dict.keys()))
+                    messages.append({
+                        "role": "user",
+                        "content": self.format_user_message_content(sig, input_dict),
+                    })
+                if "output_fields" in msg:
+                    output_dict = {k: self._serialize_kv_value(v) for k, v in msg["output_fields"].items()}
+                    sig = self._make_dynamic_signature_for_outputs(list(output_dict.keys()))
+                    messages.append({
+                        "role": "assistant",
+                        "content": self.format_assistant_message_content(sig, output_dict),
+                    })
+
+            elif mode == "signature":
+                messages.append({
                     "role": "user",
-                    "content": self.format_user_message_content(signature, message),
-                }
-            )
-            messages.append(
-                {
+                    "content": self.format_user_message_content(signature, msg),
+                })
+                messages.append({
                     "role": "assistant",
-                    "content": self.format_assistant_message_content(signature, message),
-                }
-            )
+                    "content": self.format_assistant_message_content(signature, msg),
+                })
+
+            else:  # dict mode (default) - all kv pairs go into single user message
+                serialized = {k: self._serialize_kv_value(v) for k, v in msg.items()}
+                sig = self._make_dynamic_signature_for_inputs(list(serialized.keys()))
+                messages.append({
+                    "role": "user",
+                    "content": self.format_user_message_content(sig, serialized),
+                })
 
         # Remove the history field from the inputs
         del inputs[history_field_name]
diff --git a/dspy/adapters/types/history.py b/dspy/adapters/types/history.py
index 2c39d5c4ab..9659a06c30 100644
--- a/dspy/adapters/types/history.py
+++ b/dspy/adapters/types/history.py
@@ -1,4 +1,4 @@
-from typing import Any
+from typing import Any, Literal
 
 import pydantic
 
@@ -6,20 +6,42 @@
 class History(pydantic.BaseModel):
     """Class representing the conversation history.
 
-    The conversation history is a list of messages, each message entity should have keys from the associated signature.
-    For example, if you have the following signature:
-
-    ```
-    class MySignature(dspy.Signature):
-        question: str = dspy.InputField()
-        history: dspy.History = dspy.InputField()
-        answer: str = dspy.OutputField()
-    ```
-
-    Then the history should be a list of dictionaries with keys "question" and "answer".
+    History supports four message formats:
+    
+    1. **Signature mode**: Dict keys match signature input/output fields → user/assistant pairs.
+       Must be explicitly set via mode="signature".
+       ```python
+       history = dspy.History(messages=[
+           {"question": "What is 2+2?", "answer": "4"},
+       ], mode="signature")
+       ```
+    
+    2. **KV mode**: Nested `{"input_fields": {...}, "output_fields": {...}}` → user/assistant pairs.
+       ```python
+       history = dspy.History.from_kv([
+           {"input_fields": {"thought": "...", "tool_name": "search"}, "output_fields": {"observation": "..."}},
+       ])
+       ```
+    
+    3. **Dict mode** (default): Arbitrary serializable key-value pairs → all in single user message.
+       ```python
+       history = dspy.History(messages=[
+           {"thought": "I need to search", "tool_name": "search", "observation": "Results found"},
+       ])
+       ```
+    
+    4. **Raw mode**: Direct LM messages with `{"role": "user", "content": "..."}` → passed through.
+       ```python
+       history = dspy.History.from_raw([
+           {"role": "user", "content": "Hello"},
+           {"role": "assistant", "content": "Hi there!"},
+       ])
+       ```
+
+    The mode is auto-detected from the first message if not explicitly provided.
 
     Example:
-        ```
+        ```python
         import dspy
 
         dspy.configure(lm=dspy.LM("openai/gpt-4o-mini"))
@@ -41,7 +63,7 @@ class MySignature(dspy.Signature):
         ```
 
     Example of capturing the conversation history:
-        ```
+        ```python
         import dspy
 
         dspy.configure(lm=dspy.LM("openai/gpt-4o-mini"))
@@ -59,6 +81,7 @@ class MySignature(dspy.Signature):
     """
 
     messages: list[dict[str, Any]]
+    mode: Literal["signature", "kv", "dict", "raw"] | None = None
 
     model_config = pydantic.ConfigDict(
         frozen=True,
@@ -66,3 +89,86 @@ class MySignature(dspy.Signature):
         validate_assignment=True,
         extra="forbid",
     )
+
+    def _detect_mode(self, msg: dict) -> str:
+        """Detect the mode for a message based on its structure.
+        
+        Detection rules:
+        - Raw: has "role" and "content" keys, but NOT "input_fields"/"output_fields"
+        - KV: keys are ONLY "input_fields" and/or "output_fields"
+        - Signature: must be explicitly set (requires matching against signature fields)
+        - Dict: everything else (default) - arbitrary kv pairs go into user message
+        """
+        if self.mode:
+            return self.mode
+
+        keys = set(msg.keys())
+
+        if {"role", "content"} <= keys and not ({"input_fields", "output_fields"} & keys):
+            return "raw"
+
+        if keys <= {"input_fields", "output_fields"} and keys:
+            return "kv"
+
+        return "dict"
+
+    @pydantic.model_validator(mode="after")
+    def _validate_messages(self) -> "History":
+        for msg in self.messages:
+            detected = self._detect_mode(msg)
+
+            if detected == "raw":
+                if not isinstance(msg.get("role"), str):
+                    raise ValueError(f"'role' must be a string: {msg}")
+                # content can be None for tool call messages, or string otherwise
+                content = msg.get("content")
+                if content is not None and not isinstance(content, str):
+                    raise ValueError(f"'content' must be a string or None: {msg}")
+
+            elif detected == "kv":
+                if "input_fields" in msg and not isinstance(msg["input_fields"], dict):
+                    raise ValueError(f"'input_fields' must be a dict: {msg}")
+                if "output_fields" in msg and not isinstance(msg["output_fields"], dict):
+                    raise ValueError(f"'output_fields' must be a dict: {msg}")
+
+        return self
+
+    def with_messages(self, messages: list[dict[str, Any]]) -> "History":
+        """Return a new History with additional messages appended.
+        
+        Args:
+            messages: List of messages to append.
+            
+        Returns:
+            A new History instance with the messages appended.
+        """
+        return History(messages=[*self.messages, *messages], mode=self.mode)
+
+    @classmethod
+    def from_kv(cls, messages: list[dict[str, Any]]) -> "History":
+        """Create a History instance with KV mode.
+        
+        KV mode expects messages with "input_fields" and/or "output_fields" keys,
+        each containing a dict of field names to values.
+        
+        Args:
+            messages: List of dicts with "input_fields" and/or "output_fields" keys.
+            
+        Returns:
+            A History instance with mode="kv".
+        """
+        return cls(messages=messages, mode="kv")
+
+    @classmethod
+    def from_raw(cls, messages: list[dict[str, Any]]) -> "History":
+        """Create a History instance with raw mode.
+        
+        Raw mode expects direct LM messages with "role" and "content" keys.
+        
+        Args:
+            messages: List of dicts with "role" and "content" keys.
+            
+        Returns:
+            A History instance with mode="raw".
+        """
+        return cls(messages=messages, mode="raw")
diff --git a/dspy/predict/react.py b/dspy/predict/react.py
index 5f87879f80..b7e1d076cd 100644
--- a/dspy/predict/react.py
+++ b/dspy/predict/react.py
@@ -1,9 +1,12 @@
+import json
 import logging
+import uuid
 from typing import TYPE_CHECKING, Any, Callable, Literal
 
 from litellm import ContextWindowExceededError
 
 import dspy
+from dspy.adapters.types.history import History
 from dspy.adapters.types.tool import Tool
 from dspy.primitives.module import Module
 from dspy.signatures.signature import ensure_signature
@@ -73,115 +76,231 @@ def get_weather(city: str) -> str:
 
         react_signature = (
             dspy.Signature({**signature.input_fields}, "\n".join(instr))
-            .append("trajectory", dspy.InputField(), type_=str)
+            .append("trajectory", dspy.InputField(), type_=History)
             .append("next_thought", dspy.OutputField(), type_=str)
             .append("next_tool_name", dspy.OutputField(), type_=Literal[tuple(tools.keys())])
             .append("next_tool_args", dspy.OutputField(), type_=dict[str, Any])
         )
 
+        extract_instructions = (
+            "You are an extraction Agent whose job it is to extract the fields: {outputs} from the given trajectory."
+            + "The original task was:\n"
+            + signature.instructions
+            + "\nIn trying to solve this task, an executor agent with has used tools to generate the conversation below."
+            + "\nGiven this trajectory, your only job is to extract the fields: {outputs}."
+        )
         fallback_signature = dspy.Signature(
             {**signature.input_fields, **signature.output_fields},
-            signature.instructions,
-        ).append("trajectory", dspy.InputField(), type_=str)
+            extract_instructions,
+        ).append("trajectory", dspy.InputField(desc="The history of the conversation. There is enough context to produce the final output"), type_=History)
 
         self.tools = tools
         self.react = dspy.Predict(react_signature)
         self.extract = dspy.ChainOfThought(fallback_signature)
 
-    def _format_trajectory(self, trajectory: dict[str, Any]):
-        adapter = dspy.settings.adapter or dspy.ChatAdapter()
-        trajectory_signature = dspy.Signature(f"{', '.join(trajectory.keys())} -> x")
-        return adapter.format_user_message_content(trajectory_signature, trajectory)
-
     def forward(self, **input_args):
-        trajectory = {}
         max_iters = input_args.pop("max_iters", self.max_iters)
-        for idx in range(max_iters):
+
+        # Check for existing history in input_args, otherwise start empty
+        trajectory = input_args.pop("trajectory", None)
+        if trajectory is None:
+            trajectory = History(messages=[], mode="raw")
+
+        for _ in range(max_iters):
             try:
-                pred = self._call_with_potential_trajectory_truncation(self.react, trajectory, **input_args)
+                pred, trajectory = self._call_with_potential_truncation(self.react, trajectory, **input_args)
             except ValueError as err:
                 logger.warning(f"Ending the trajectory: Agent failed to select a valid tool: {_fmt_exc(err)}")
                 break
 
-            trajectory[f"thought_{idx}"] = pred.next_thought
-            trajectory[f"tool_name_{idx}"] = pred.next_tool_name
-            trajectory[f"tool_args_{idx}"] = pred.next_tool_args
+            # Add the agent's action to trajectory
+            trajectory, tool_call_id = self._append_action(
+                trajectory,
+                thought=pred.next_thought,
+                tool_name=pred.next_tool_name,
+                tool_args=pred.next_tool_args,
+            )
 
+            # Execute tool and get observation
             try:
-                trajectory[f"observation_{idx}"] = self.tools[pred.next_tool_name](**pred.next_tool_args)
+                observation = self.tools[pred.next_tool_name](**pred.next_tool_args)
             except Exception as err:
-                trajectory[f"observation_{idx}"] = f"Execution error in {pred.next_tool_name}: {_fmt_exc(err)}"
+                observation = f"Execution error in {pred.next_tool_name}: {_fmt_exc(err)}"
+
+            # Add observation to trajectory
+            trajectory = self._append_observation(trajectory, observation, tool_call_id)
 
             if pred.next_tool_name == "finish":
                 break
 
-        extract = self._call_with_potential_trajectory_truncation(self.extract, trajectory, **input_args)
+        extract, trajectory = self._call_with_potential_truncation(self.extract, trajectory, **input_args)
+
+        # Add the extract step to the trajectory
+        trajectory = self._append_extract(trajectory, extract)
+
         return dspy.Prediction(trajectory=trajectory, **extract)
 
     async def aforward(self, **input_args):
-        trajectory = {}
         max_iters = input_args.pop("max_iters", self.max_iters)
-        for idx in range(max_iters):
+
+        # Check for existing history in input_args, otherwise start empty
+        trajectory = input_args.pop("trajectory", None)
+        if trajectory is None:
+            trajectory = History(messages=[], mode="raw")
+
+        for _ in range(max_iters):
             try:
-                pred = await self._async_call_with_potential_trajectory_truncation(self.react, trajectory, **input_args)
+                pred, trajectory = await self._async_call_with_potential_truncation(self.react, trajectory, **input_args)
             except ValueError as err:
                 logger.warning(f"Ending the trajectory: Agent failed to select a valid tool: {_fmt_exc(err)}")
                 break
 
-            trajectory[f"thought_{idx}"] = pred.next_thought
-            trajectory[f"tool_name_{idx}"] = pred.next_tool_name
-            trajectory[f"tool_args_{idx}"] = pred.next_tool_args
+            # Add the agent's action to trajectory
+            trajectory, tool_call_id = self._append_action(
+                trajectory,
+                thought=pred.next_thought,
+                tool_name=pred.next_tool_name,
+                tool_args=pred.next_tool_args,
+            )
 
+            # Execute tool and get observation
             try:
-                trajectory[f"observation_{idx}"] = await self.tools[pred.next_tool_name].acall(**pred.next_tool_args)
+                observation = await self.tools[pred.next_tool_name].acall(**pred.next_tool_args)
             except Exception as err:
-                trajectory[f"observation_{idx}"] = f"Execution error in {pred.next_tool_name}: {_fmt_exc(err)}"
+                observation = f"Execution error in {pred.next_tool_name}: {_fmt_exc(err)}"
+
+            # Add observation to trajectory
+            trajectory = self._append_observation(trajectory, observation, tool_call_id)
 
             if pred.next_tool_name == "finish":
                 break
 
-        extract = await self._async_call_with_potential_trajectory_truncation(self.extract, trajectory, **input_args)
+        extract, trajectory = await self._async_call_with_potential_truncation(self.extract, trajectory, **input_args)
+
+        # Add the extract step to the trajectory
+        trajectory = self._append_extract(trajectory, extract)
+
         return dspy.Prediction(trajectory=trajectory, **extract)
 
-    def _call_with_potential_trajectory_truncation(self, module, trajectory, **input_args):
+    def _generate_tool_call_id(self) -> str:
+        """Generate a unique tool call ID."""
+        return f"call_{uuid.uuid4().hex[:24]}"
+
+    def _append_action(self, trajectory: History, thought: str, tool_name: str, tool_args: dict) -> tuple[History, str]:
+        """Append an action (thought + tool call) to the trajectory.
+        
+        Returns:
+            Tuple of (updated trajectory, tool_call_id for matching with observation)
+        """
+        tool_call_id = self._generate_tool_call_id()
+        new_msg = {
+            "role": "assistant",
+            "content": thought,
+            "tool_calls": [
+                {
+                    "id": tool_call_id,
+                    "type": "function",
+                    "function": {
+                        "name": tool_name,
+                        "arguments": json.dumps(tool_args),
+                    },
+                }
+            ],
+        }
+        return trajectory.with_messages([new_msg]), tool_call_id
+
+    def _append_observation(self, trajectory: History, observation: Any, tool_call_id: str) -> History:
+        """Append a tool response to the trajectory."""
+        if isinstance(observation, str):
+            content = observation
+        else:
+            try:
+                content = json.dumps(observation)
+            except (TypeError, ValueError):
+                content = str(observation)
+
+        new_msg = {
+            "role": "tool",
+            "tool_call_id": tool_call_id,
+            "content": content,
+        }
+        return trajectory.with_messages([new_msg])
+
+    def _append_extract(self, trajectory: History, extract) -> History:
+        """Append the extract step (final reasoning and outputs) to the trajectory."""
+        extract_dict = dict(extract)
+        reasoning = extract_dict.pop("reasoning", None)
+
+        content_parts = []
+        if reasoning:
+            content_parts.append(f"Reasoning: {reasoning}")
+        for key, value in extract_dict.items():
+            if isinstance(value, str):
+                content_parts.append(f"{key}: {value}")
+            else:
+                try:
+                    content_parts.append(f"{key}: {json.dumps(value)}")
+                except (TypeError, ValueError):
+                    content_parts.append(f"{key}: {value}")
+
+        new_msg = {
+            "role": "assistant",
+            "content": "\n".join(content_parts),
+        }
+        return trajectory.with_messages([new_msg])
+
+    def _call_with_potential_truncation(self, module, trajectory: History, **input_args) -> tuple[Any, History]:
+        """Call module with trajectory, truncating if context window exceeded.
+        
+        Returns:
+            Tuple of (module result, potentially truncated trajectory)
+        """
         for _ in range(3):
             try:
-                return module(
-                    **input_args,
-                    trajectory=self._format_trajectory(trajectory),
-                )
+                return module(**input_args, trajectory=trajectory), trajectory
             except ContextWindowExceededError:
                 logger.warning("Trajectory exceeded the context window, truncating the oldest tool call information.")
                 trajectory = self.truncate_trajectory(trajectory)
+        return None, trajectory
 
-    async def _async_call_with_potential_trajectory_truncation(self, module, trajectory, **input_args):
+    async def _async_call_with_potential_truncation(self, module, trajectory: History, **input_args) -> tuple[Any, History]:
+        """Call module with trajectory, truncating if context window exceeded.
+        
+        Returns:
+            Tuple of (module result, potentially truncated trajectory)
+        """
         for _ in range(3):
             try:
-                return await module.acall(
-                    **input_args,
-                    trajectory=self._format_trajectory(trajectory),
-                )
+                return await module.acall(**input_args, trajectory=trajectory), trajectory
             except ContextWindowExceededError:
                 logger.warning("Trajectory exceeded the context window, truncating the oldest tool call information.")
                 trajectory = self.truncate_trajectory(trajectory)
+        return None, trajectory
 
-    def truncate_trajectory(self, trajectory):
+    def truncate_trajectory(self, trajectory: History) -> History:
         """Truncates the trajectory so that it fits in the context window.
 
         Users can override this method to implement their own truncation logic.
+        For tool call format, we remove pairs of messages (assistant + tool) together.
         """
-        keys = list(trajectory.keys())
-        if len(keys) < 4:
-            # Every tool call has 4 keys: thought, tool_name, tool_args, and observation.
+        if len(trajectory.messages) < 2:
             raise ValueError(
                 "The trajectory is too long so your prompt exceeded the context window, but the trajectory cannot be "
                 "truncated because it only has one tool call."
             )
 
-        for key in keys[:4]:
-            trajectory.pop(key)
-
-        return trajectory
+        # Remove the oldest pair (assistant message with tool_calls + tool response)
+        messages = list(trajectory.messages)
+        if messages and messages[0].get("role") == "assistant" and messages[0].get("tool_calls"):
+            # Remove assistant + following tool message(s)
+            messages = messages[1:]
+            while messages and messages[0].get("role") == "tool":
+                messages = messages[1:]
+        else:
+            # Fallback: just remove the first message
+            messages = messages[1:]
+
+        return History(messages=messages, mode="raw")
 
 
 def _fmt_exc(err: BaseException, *, limit: int = 5) -> str:
diff --git a/dspy/utils/inspect_history.py b/dspy/utils/inspect_history.py
index 07934157fd..65a32fab25 100644
--- a/dspy/utils/inspect_history.py
+++ b/dspy/utils/inspect_history.py
@@ -10,6 +10,14 @@ def _blue(text: str, end: str = "\n"):
     return "\x1b[34m" + str(text) + "\x1b[0m" + end
 
 
+def _yellow(text: str, end: str = "\n"):
+    return "\x1b[33m" + str(text) + "\x1b[0m" + end
+
+
+def _cyan(text: str, end: str = "\n"):
+    return "\x1b[36m" + str(text) + "\x1b[0m" + end
+
+
 def pretty_print_history(history, n: int = 1):
     """Prints the last n prompts and their completions."""
 
@@ -22,37 +30,67 @@ def pretty_print_history(history, n: int = 1):
         print("\x1b[34m" + f"[{timestamp}]" + "\x1b[0m" + "\n")
 
         for msg in messages:
-            print(_red(f"{msg['role'].capitalize()} message:"))
-            if isinstance(msg["content"], str):
-                print(msg["content"].strip())
-            else:
-                if isinstance(msg["content"], list):
-                    for c in msg["content"]:
-                        if c["type"] == "text":
-                            print(c["text"].strip())
-                        elif c["type"] == "image_url":
-                            image_str = ""
-                            if "base64" in c["image_url"].get("url", ""):
-                                len_base64 = len(c["image_url"]["url"].split("base64,")[1])
-                                image_str = (
-                                    f"<{c['image_url']['url'].split('base64,')[0]}base64,"
-                                    f"<IMAGE BASE 64 ENCODED({len_base64!s})>"
-                                )
-                            else:
-                                image_str = f"<image_url: {c['image_url']['url']}>"
-                            print(_blue(image_str.strip()))
-                        elif c["type"] == "input_audio":
-                            audio_format = c["input_audio"]["format"]
-                            len_audio = len(c["input_audio"]["data"])
-                            audio_str = f"<audio format='{audio_format}' base64-encoded, length={len_audio}>"
-                            print(_blue(audio_str.strip()))
-                        elif c["type"] == "file" or c["type"] == "input_file":
-                            file = c.get("file", c.get("input_file", {}))
-                            filename = file.get("filename", "")
-                            file_id = file.get("file_id", "")
-                            file_data = file.get("file_data", "")
-                            file_str = f"<file: name:{filename}, id:{file_id}, data_length:{len(file_data)}>"
-                            print(_blue(file_str.strip()))
+            role = msg.get("role", "unknown")
+
+            # Handle tool response messages
+            if role == "tool":
+                tool_call_id = msg.get("tool_call_id", "unknown")
+                print(_yellow(f"Tool response (id: {tool_call_id}):"))
+                content = msg.get("content", "")
+                if content:
+                    print(content.strip() if isinstance(content, str) else str(content))
+                print("\n")
+                continue
+
+            print(_red(f"{role.capitalize()} message:"))
+
+            # Handle tool_calls in assistant messages
+            if role == "assistant" and msg.get("tool_calls"):
+                content = msg.get("content")
+                if content:
+                    print(content.strip() if isinstance(content, str) else str(content))
+                print(_cyan("Tool calls:"))
+                for tool_call in msg["tool_calls"]:
+                    func = tool_call.get("function", {})
+                    tool_id = tool_call.get("id", "unknown")
+                    name = func.get("name", "unknown")
+                    args = func.get("arguments", "{}")
+                    print(_cyan(f"  [{tool_id}] {name}({args})"))
+                print("\n")
+                continue
+
+            content = msg.get("content")
+            if content is None:
+                print("<no content>")
+            elif isinstance(content, str):
+                print(content.strip())
+            elif isinstance(content, list):
+                for c in content:
+                    if c["type"] == "text":
+                        print(c["text"].strip())
+                    elif c["type"] == "image_url":
+                        image_str = ""
+                        if "base64" in c["image_url"].get("url", ""):
+                            len_base64 = len(c["image_url"]["url"].split("base64,")[1])
+                            image_str = (
+                                f"<{c['image_url']['url'].split('base64,')[0]}base64,"
+                                f"<IMAGE BASE 64 ENCODED({len_base64!s})>"
+                            )
+                        else:
+                            image_str = f"<image_url: {c['image_url']['url']}>"
+                        print(_blue(image_str.strip()))
+                    elif c["type"] == "input_audio":
+                        audio_format = c["input_audio"]["format"]
+                        len_audio = len(c["input_audio"]["data"])
+                        audio_str = f"<audio format='{audio_format}' base64-encoded, length={len_audio}>"
+                        print(_blue(audio_str.strip()))
+                    elif c["type"] == "file" or c["type"] == "input_file":
+                        file = c.get("file", c.get("input_file", {}))
+                        filename = file.get("filename", "")
+                        file_id = file.get("file_id", "")
+                        file_data = file.get("file_data", "")
+                        file_str = f"<file: name:{filename}, id:{file_id}, data_length:{len(file_data)}>"
+                        print(_blue(file_str.strip()))
             print("\n")
 
         if isinstance(outputs[0], dict):
diff --git a/scripts/test_coding_agent.py b/scripts/test_coding_agent.py
new file mode 100644
index 0000000000..2002ce4365
--- /dev/null
+++ b/scripts/test_coding_agent.py
@@ -0,0 +1,18 @@
+import dspy
+
+dspy.configure(lm=dspy.LM("openai/gpt-4o-mini"))
+
+interpreter = dspy.PythonInterpreter()
+
+def run_python(code: str) -> str:
+    """Execute Python code and return the output."""
+    return interpreter.execute(code)
+
+agent = dspy.ReAct("request: str -> result: str", tools=[run_python])
+
+try:
+    result = agent(request="Calculate 2+2 and give me the answer")
+    print(f"Result: {result.result}")
+    print(f"Trajectory: {result.trajectory}")
+finally:
+    interpreter.shutdown()
diff --git a/tests/adapters/test_baml_adapter.py b/tests/adapters/test_baml_adapter.py
index eaaa4f0d23..b8c6027678 100644
--- a/tests/adapters/test_baml_adapter.py
+++ b/tests/adapters/test_baml_adapter.py
@@ -361,7 +361,8 @@ class TestSignature(dspy.Signature):
         messages=[
             {"question": "What is the patient's age?", "answer": "45 years old"},
             {"question": "Any allergies?", "answer": "Penicillin allergy"},
-        ]
+        ],
+        mode="signature",
     )
 
     adapter = BAMLAdapter()
diff --git a/tests/adapters/test_chat_adapter.py b/tests/adapters/test_chat_adapter.py
index adb0dfed22..4a1f505f33 100644
--- a/tests/adapters/test_chat_adapter.py
+++ b/tests/adapters/test_chat_adapter.py
@@ -408,7 +408,8 @@ class MySignature(dspy.Signature):
         messages=[
             {"question": "What is the capital of France?", "answer": "Paris"},
             {"question": "What is the capital of Germany?", "answer": "Berlin"},
-        ]
+        ],
+        mode="signature",
     )
 
     adapter = dspy.ChatAdapter()
@@ -710,3 +711,200 @@ class MySignature(dspy.Signature):
 In adhering to this structure, your objective is: 
         Answer the question with multiple answers and scores"""
     assert system_message == expected_system_message
+
+
+class TestHistoryModes:
+    """Tests for History mode detection and adapter formatting."""
+
+    def test_history_mode_detection_dict_default(self):
+        """Messages with arbitrary keys are detected as dict mode (default)."""
+        history = dspy.History(messages=[{"question": "...", "answer": "..."}])
+        assert history._detect_mode(history.messages[0]) == "dict"
+
+    def test_history_mode_detection_kv(self):
+        """Messages with only input_fields/output_fields keys are detected as kv mode."""
+        history = dspy.History(messages=[{"input_fields": {"a": 1}, "output_fields": {"b": 2}}])
+        assert history._detect_mode(history.messages[0]) == "kv"
+
+    def test_history_mode_detection_kv_input_only(self):
+        """Messages with only input_fields are detected as kv mode."""
+        history = dspy.History(messages=[{"input_fields": {"a": 1}}])
+        assert history._detect_mode(history.messages[0]) == "kv"
+
+    def test_history_mode_detection_raw(self):
+        """Messages with role+content are detected as raw mode."""
+        history = dspy.History(messages=[{"role": "user", "content": "hello"}])
+        assert history._detect_mode(history.messages[0]) == "raw"
+
+    def test_history_mode_detection_raw_with_extra_keys(self):
+        """Raw mode allows extra keys like 'name'."""
+        history = dspy.History(messages=[{"role": "user", "content": "hello", "name": "tool"}])
+        assert history._detect_mode(history.messages[0]) == "raw"
+
+    def test_history_mode_detection_dict_with_input_fields_and_extra(self):
+        """Messages with input_fields AND extra keys fallback to dict mode."""
+        history = dspy.History(messages=[{"question": "...", "input_fields": {"a": 1}}])
+        assert history._detect_mode(history.messages[0]) == "dict"
+
+    def test_history_explicit_mode_override(self):
+        """Explicit mode overrides auto-detection."""
+        history = dspy.History(messages=[{"question": "...", "answer": "..."}], mode="signature")
+        assert history._detect_mode(history.messages[0]) == "signature"
+
+    def test_history_validation_kv_non_dict_input_fields(self):
+        """KV mode with non-dict input_fields raises ValueError."""
+        with pytest.raises(ValueError, match="'input_fields' must be a dict"):
+            dspy.History(messages=[{"input_fields": "not a dict"}])
+
+    def test_history_validation_raw_non_string_content(self):
+        """Raw mode with non-string content raises ValueError."""
+        with pytest.raises(ValueError, match="'content' must be a string or None"):
+            dspy.History(messages=[{"role": "user", "content": 123}])
+
+    def test_history_validation_raw_allows_none_content(self):
+        """Raw mode allows None content for tool call messages."""
+        history = dspy.History(messages=[
+            {"role": "assistant", "content": None, "tool_calls": [{"id": "1", "type": "function", "function": {"name": "test", "arguments": "{}"}}]}
+        ])
+        assert history.messages[0]["content"] is None
+
+    def test_history_validation_raw_non_string_role(self):
+        """Raw mode with non-string role raises ValueError."""
+        with pytest.raises(ValueError, match="'role' must be a string"):
+            dspy.History(messages=[{"role": 123, "content": "hello"}])
+
+    def test_history_from_kv_factory(self):
+        """from_kv factory sets mode to kv."""
+        history = dspy.History.from_kv([{"input_fields": {"a": 1}}])
+        assert history.mode == "kv"
+
+    def test_history_from_raw_factory(self):
+        """from_raw factory sets mode to raw."""
+        history = dspy.History.from_raw([{"role": "user", "content": "hello"}])
+        assert history.mode == "raw"
+
+    def test_adapter_formats_kv_mode_history(self):
+        """Adapter correctly formats kv-mode history."""
+        class MySignature(dspy.Signature):
+            question: str = dspy.InputField()
+            history: dspy.History = dspy.InputField()
+            answer: str = dspy.OutputField()
+
+        history = dspy.History.from_kv([
+            {
+                "input_fields": {"thought": "I need to search", "tool_name": "search"},
+                "output_fields": {"observation": "Results found"},
+            }
+        ])
+
+        adapter = dspy.ChatAdapter()
+        messages = adapter.format(MySignature, [], {"question": "test", "history": history})
+
+        # Should have: system, user (thought+tool_name), assistant (observation), user (question)
+        assert len(messages) == 4
+        assert messages[1]["role"] == "user"
+        assert "thought" in messages[1]["content"]
+        assert "tool_name" in messages[1]["content"]
+        assert messages[2]["role"] == "assistant"
+        assert "observation" in messages[2]["content"]
+
+    def test_adapter_formats_raw_mode_history(self):
+        """Adapter correctly formats raw-mode history."""
+        class MySignature(dspy.Signature):
+            question: str = dspy.InputField()
+            history: dspy.History = dspy.InputField()
+            answer: str = dspy.OutputField()
+
+        history = dspy.History.from_raw([
+            {"role": "user", "content": "Hello"},
+            {"role": "assistant", "content": "Hi there!"},
+        ])
+
+        adapter = dspy.ChatAdapter()
+        messages = adapter.format(MySignature, [], {"question": "test", "history": history})
+
+        # Should have: system, user (Hello), assistant (Hi there!), user (test question)
+        assert len(messages) == 4
+        assert messages[1] == {"role": "user", "content": "Hello"}
+        assert messages[2] == {"role": "assistant", "content": "Hi there!"}
+
+    def test_adapter_kv_mode_serializes_complex_values(self):
+        """KV mode serializes non-primitive values to strings."""
+        class MySignature(dspy.Signature):
+            question: str = dspy.InputField()
+            history: dspy.History = dspy.InputField()
+            answer: str = dspy.OutputField()
+
+        history = dspy.History.from_kv([
+            {
+                "input_fields": {"args": {"key": "value"}, "number": 42},
+                "output_fields": {"result": ["a", "b", "c"]},
+            }
+        ])
+
+        adapter = dspy.ChatAdapter()
+        messages = adapter.format(MySignature, [], {"question": "test", "history": history})
+
+        # Values should be serialized to strings
+        assert "args" in messages[1]["content"]
+        assert "number" in messages[1]["content"]
+        assert "result" in messages[2]["content"]
+
+    def test_adapter_kv_mode_input_only(self):
+        """KV mode with only input_fields produces only user message."""
+        class MySignature(dspy.Signature):
+            question: str = dspy.InputField()
+            history: dspy.History = dspy.InputField()
+            answer: str = dspy.OutputField()
+
+        history = dspy.History.from_kv([{"input_fields": {"thought": "Thinking..."}}])
+
+        adapter = dspy.ChatAdapter()
+        messages = adapter.format(MySignature, [], {"question": "test", "history": history})
+
+        # Should have: system, user (thought), user (question)
+        assert len(messages) == 3
+        assert messages[1]["role"] == "user"
+        assert "thought" in messages[1]["content"]
+
+    def test_adapter_formats_dict_mode_history(self):
+        """Dict mode (default) puts all kv pairs in single user message."""
+        class MySignature(dspy.Signature):
+            question: str = dspy.InputField()
+            history: dspy.History = dspy.InputField()
+            answer: str = dspy.OutputField()
+
+        history = dspy.History(messages=[
+            {"thought": "I need to search", "tool_name": "search", "observation": "Results found"},
+        ])
+
+        adapter = dspy.ChatAdapter()
+        messages = adapter.format(MySignature, [], {"question": "test", "history": history})
+
+        # Should have: system, user (all fields), user (question)
+        assert len(messages) == 3
+        assert messages[1]["role"] == "user"
+        assert "thought" in messages[1]["content"]
+        assert "tool_name" in messages[1]["content"]
+        assert "observation" in messages[1]["content"]
+
+    def test_adapter_formats_signature_mode_history(self):
+        """Signature mode produces user/assistant pairs matching signature fields."""
+        class MySignature(dspy.Signature):
+            question: str = dspy.InputField()
+            history: dspy.History = dspy.InputField()
+            answer: str = dspy.OutputField()
+
+        history = dspy.History(messages=[
+            {"question": "What is 2+2?", "answer": "4"},
+        ], mode="signature")
+
+        adapter = dspy.ChatAdapter()
+        messages = adapter.format(MySignature, [], {"question": "test", "history": history})
+
+        # Should have: system, user (question), assistant (answer), user (question)
+        assert len(messages) == 4
+        assert messages[1]["role"] == "user"
+        assert "What is 2+2?" in messages[1]["content"]
+        assert messages[2]["role"] == "assistant"
+        assert "4" in messages[2]["content"]
diff --git a/tests/adapters/test_json_adapter.py b/tests/adapters/test_json_adapter.py
index 373c01c67e..95abacfe0a 100644
--- a/tests/adapters/test_json_adapter.py
+++ b/tests/adapters/test_json_adapter.py
@@ -522,7 +522,8 @@ class MySignature(dspy.Signature):
         messages=[
             {"question": "What is the capital of France?", "answer": "Paris"},
             {"question": "What is the capital of Germany?", "answer": "Berlin"},
-        ]
+        ],
+        mode="signature",
     )
 
     adapter = dspy.JSONAdapter()
diff --git a/tests/predict/test_react.py b/tests/predict/test_react.py
index 09fd1c7c85..e9267e8760 100644
--- a/tests/predict/test_react.py
+++ b/tests/predict/test_react.py
@@ -1,4 +1,3 @@
-import re
 
 import litellm
 import pytest
@@ -46,7 +45,7 @@ def make_images():
     react = dspy.ReAct("question -> answer", tools=[make_images])
     react(question="Draw me something red")
 
-    sigs_with_obs = [sig for sig, inputs in captured_calls if "observation_0" in inputs]
+    sigs_with_obs = [sig for sig, inputs in captured_calls if "observation_0" in str(inputs)]
     assert sigs_with_obs, "Expected ReAct to format a trajectory containing observation_0"
 
     observation_content = lm.history[1]["messages"][1]["content"]
@@ -111,24 +110,41 @@ class InvitationSignature(dspy.Signature):
     )
     assert outputs.invitation_letter == "It's my honor to invite Alice to the Science Fair event on Friday."
 
-    expected_trajectory = {
-        "thought_0": "I need to write an invitation letter for Alice to the Science Fair event.",
-        "tool_name_0": "write_invitation_letter",
-        "tool_args_0": {
-            "participant_name": "Alice",
-            "event_info": {
-                "name": "Science Fair",
-                "date": "Friday",
-                "participants": {"Alice": "female", "Bob": "male"},
-            },
-        },
-        "observation_0": "It's my honor to invite Alice to event Science Fair on Friday",
-        "thought_1": "I have successfully written the invitation letter for Alice to the Science Fair. Now I can finish the task.",
-        "tool_name_1": "finish",
-        "tool_args_1": {},
-        "observation_1": "Completed.",
-    }
-    assert outputs.trajectory == expected_trajectory
+    # Verify trajectory is a History object with raw mode (tool call format)
+    traj = outputs.trajectory
+    assert isinstance(traj, dspy.History)
+    assert traj.mode == "raw"
+    # 2 tool calls (write_invitation_letter + finish), each = assistant + tool = 4 messages + 1 extract = 5 messages
+    assert len(traj.messages) == 5
+
+    # Check first message (tool call - assistant)
+    msg0 = traj.messages[0]
+    assert msg0["role"] == "assistant"
+    assert msg0["content"] == "I need to write an invitation letter for Alice to the Science Fair event."
+    assert len(msg0["tool_calls"]) == 1
+    assert msg0["tool_calls"][0]["function"]["name"] == "write_invitation_letter"
+
+    # Check second message (tool response)
+    msg1 = traj.messages[1]
+    assert msg1["role"] == "tool"
+    assert msg1["tool_call_id"] == msg0["tool_calls"][0]["id"]
+    assert "It's my honor to invite Alice to event Science Fair on Friday" in msg1["content"]
+
+    # Check third message (finish - assistant)
+    msg2 = traj.messages[2]
+    assert msg2["role"] == "assistant"
+    assert msg2["tool_calls"][0]["function"]["name"] == "finish"
+
+    # Check fourth message (finish - tool response)
+    msg3 = traj.messages[3]
+    assert msg3["role"] == "tool"
+    assert msg3["content"] == "Completed."
+
+    # Check last message (extract)
+    msg_extract = traj.messages[-1]
+    assert msg_extract["role"] == "assistant"
+    assert "This is a very rigorous reasoning process, trust me bro!" in msg_extract["content"]
+    assert "invitation_letter" in msg_extract["content"]
 
 
 def test_tool_calling_without_typehint():
@@ -147,20 +163,40 @@ def foo(a, b):
     dspy.configure(lm=lm)
     outputs = react(a=1, b=2)
 
-    expected_trajectory = {
-        "thought_0": "I need to add two numbers.",
-        "tool_name_0": "foo",
-        "tool_args_0": {
-            "a": 1,
-            "b": 2,
-        },
-        "observation_0": 3,
-        "thought_1": "I have the sum, now I can finish.",
-        "tool_name_1": "finish",
-        "tool_args_1": {},
-        "observation_1": "Completed.",
-    }
-    assert outputs.trajectory == expected_trajectory
+    # Verify trajectory is a History object with raw mode
+    traj = outputs.trajectory
+    assert isinstance(traj, dspy.History)
+    assert traj.mode == "raw"
+    # 2 tool calls (each = assistant + tool) + 1 extract = 5 messages
+    assert len(traj.messages) == 5
+
+    # Check first message (tool call - assistant)
+    msg0 = traj.messages[0]
+    assert msg0["role"] == "assistant"
+    assert msg0["content"] == "I need to add two numbers."
+    assert msg0["tool_calls"][0]["function"]["name"] == "foo"
+
+    # Check second message (tool response)
+    msg1 = traj.messages[1]
+    assert msg1["role"] == "tool"
+    assert msg1["content"] == "3"  # JSON serialized
+
+    # Check third message (finish - assistant)
+    msg2 = traj.messages[2]
+    assert msg2["role"] == "assistant"
+    assert msg2["content"] == "I have the sum, now I can finish."
+    assert msg2["tool_calls"][0]["function"]["name"] == "finish"
+
+    # Check fourth message (finish - tool response)
+    msg3 = traj.messages[3]
+    assert msg3["role"] == "tool"
+    assert msg3["content"] == "Completed."
+
+    # Check last message (extract)
+    msg_extract = traj.messages[-1]
+    assert msg_extract["role"] == "assistant"
+    assert "I added the numbers successfully" in msg_extract["content"]
+    assert "c: 3" in msg_extract["content"]
 
 
 def test_trajectory_truncation():
@@ -198,9 +234,19 @@ def mock_react(**kwargs):
     # Call forward and get the result
     result = react(input_text="test input")
 
-    # Verify that older entries in the trajectory were truncated
-    assert "thought_0" not in result.trajectory
-    assert "thought_2" in result.trajectory
+    # Verify trajectory is a History object
+    traj = result.trajectory
+    assert isinstance(traj, dspy.History)
+    assert traj.mode == "raw"
+
+    # Verify that older entries were truncated (first assistant+tool pair removed)
+    # After truncation, we should have messages for: Thought 2 (assistant+tool), finish (assistant+tool), extract
+    assert len(traj.messages) >= 4
+
+    # First message should be Thought 2's assistant message (Thought 1 was truncated)
+    assert traj.messages[0]["role"] == "assistant"
+    assert traj.messages[0]["content"] == "Thought 2"
+
     assert result.output_text == "Final output"
 
 
@@ -232,24 +278,32 @@ def foo(a, b):
     outputs = react(a=1, b=2, max_iters=2)
     traj = outputs.trajectory
 
-    # --- exact-match checks (thoughts + tool calls) -------------------------
-    control_expected = {
-        "thought_0": "I need to add two numbers.",
-        "tool_name_0": "foo",
-        "tool_args_0": {"a": 1, "b": 2},
-        "thought_1": "I need to add two numbers.",
-        "tool_name_1": "foo",
-        "tool_args_1": {"a": 1, "b": 2},
-    }
-    for k, v in control_expected.items():
-        assert traj[k] == v, f"{k} mismatch"
-
-    # --- flexible checks for observations ----------------------------------
-    # We only care that each observation mentions our error string; we ignore
-    # any extra traceback detail or differing prefixes.
-    for i in range(2):
-        obs = traj[f"observation_{i}"]
-        assert re.search(r"\btool error\b", obs), f"unexpected observation_{i!r}: {obs}"
+    # Verify trajectory is a History object with raw mode
+    assert isinstance(traj, dspy.History)
+    assert traj.mode == "raw"
+    # 2 tool calls (each = assistant + tool) + 1 extract = 5 messages
+    assert len(traj.messages) == 5
+
+    # Check tool call messages have the expected structure
+    # Messages 0, 2 are assistant messages with tool_calls
+    # Messages 1, 3 are tool response messages
+    for i in [0, 2]:
+        msg = traj.messages[i]
+        assert msg["role"] == "assistant"
+        assert msg["content"] == "I need to add two numbers."
+        assert msg["tool_calls"][0]["function"]["name"] == "foo"
+
+    for i in [1, 3]:
+        msg = traj.messages[i]
+        assert msg["role"] == "tool"
+        # Observation should contain the error
+        assert "tool error" in msg["content"]
+
+    # Check extract message
+    msg_extract = traj.messages[-1]
+    assert msg_extract["role"] == "assistant"
+    assert "I added the numbers successfully" in msg_extract["content"]
+    assert "c: 3" in msg_extract["content"]
 
 
 @pytest.mark.asyncio
@@ -310,24 +364,37 @@ class InvitationSignature(dspy.Signature):
         )
     assert outputs.invitation_letter == "It's my honor to invite Alice to the Science Fair event on Friday."
 
-    expected_trajectory = {
-        "thought_0": "I need to write an invitation letter for Alice to the Science Fair event.",
-        "tool_name_0": "write_invitation_letter",
-        "tool_args_0": {
-            "participant_name": "Alice",
-            "event_info": {
-                "name": "Science Fair",
-                "date": "Friday",
-                "participants": {"Alice": "female", "Bob": "male"},
-            },
-        },
-        "observation_0": "It's my honor to invite Alice to event Science Fair on Friday",
-        "thought_1": "I have successfully written the invitation letter for Alice to the Science Fair. Now I can finish the task.",
-        "tool_name_1": "finish",
-        "tool_args_1": {},
-        "observation_1": "Completed.",
-    }
-    assert outputs.trajectory == expected_trajectory
+    # Verify trajectory is a History object with raw mode
+    traj = outputs.trajectory
+    assert isinstance(traj, dspy.History)
+    assert traj.mode == "raw"
+    # 2 tool calls (write_invitation_letter + finish), each = assistant + tool = 4 messages + 1 extract = 5 messages
+    assert len(traj.messages) == 5
+
+    # Check first message (tool call - assistant)
+    msg0 = traj.messages[0]
+    assert msg0["role"] == "assistant"
+    assert msg0["tool_calls"][0]["function"]["name"] == "write_invitation_letter"
+
+    # Check second message (tool response)
+    msg1 = traj.messages[1]
+    assert msg1["role"] == "tool"
+    assert "It's my honor to invite Alice to event Science Fair on Friday" in msg1["content"]
+
+    # Check third message (finish - assistant)
+    msg2 = traj.messages[2]
+    assert msg2["role"] == "assistant"
+    assert msg2["tool_calls"][0]["function"]["name"] == "finish"
+
+    # Check fourth message (finish - tool response)
+    msg3 = traj.messages[3]
+    assert msg3["role"] == "tool"
+    assert msg3["content"] == "Completed."
+
+    # Check last message (extract)
+    msg_extract = traj.messages[-1]
+    assert msg_extract["role"] == "assistant"
+    assert "This is a very rigorous reasoning process, trust me bro!" in msg_extract["content"]
 
 
 @pytest.mark.asyncio
@@ -357,21 +424,27 @@ async def foo(a, b):
         outputs = await react.acall(a=1, b=2, max_iters=2)
     traj = outputs.trajectory
 
-    # Exact-match checks (thoughts + tool calls)
-    control_expected = {
-        "thought_0": "I need to add two numbers.",
-        "tool_name_0": "foo",
-        "tool_args_0": {"a": 1, "b": 2},
-        "thought_1": "I need to add two numbers.",
-        "tool_name_1": "foo",
-        "tool_args_1": {"a": 1, "b": 2},
-    }
-    for k, v in control_expected.items():
-        assert traj[k] == v, f"{k} mismatch"
-
-    # Flexible checks for observations
-    # We only care that each observation mentions our error string; we ignore
-    # any extra traceback detail or differing prefixes.
-    for i in range(2):
-        obs = traj[f"observation_{i}"]
-        assert re.search(r"\btool error\b", obs), f"unexpected observation_{i!r}: {obs}"
+    # Verify trajectory is a History object with raw mode
+    assert isinstance(traj, dspy.History)
+    assert traj.mode == "raw"
+    # 2 tool calls (each = assistant + tool) + 1 extract = 5 messages
+    assert len(traj.messages) == 5
+
+    # Check tool call messages have the expected structure
+    for i in [0, 2]:
+        msg = traj.messages[i]
+        assert msg["role"] == "assistant"
+        assert msg["content"] == "I need to add two numbers."
+        assert msg["tool_calls"][0]["function"]["name"] == "foo"
+
+    for i in [1, 3]:
+        msg = traj.messages[i]
+        assert msg["role"] == "tool"
+        # Observation should contain the error
+        assert "tool error" in msg["content"]
+
+    # Check extract message
+    msg_extract = traj.messages[-1]
+    assert msg_extract["role"] == "assistant"
+    assert "I added the numbers successfully" in msg_extract["content"]
+    assert "c: 3" in msg_extract["content"]

From b24b4481584f42fd62dc7e17c68b8dd7fe646e2d Mon Sep 17 00:00:00 2001
From: Isaac Miller <isaacbmiller@gmail.com>
Date: Thu, 4 Dec 2025 10:17:50 -0500
Subject: [PATCH 2/2] Make mode explicit and consistent between messages

---
 dspy/adapters/base.py               | 117 +++++-----
 dspy/adapters/types/history.py      | 169 ++++++++-------
 dspy/predict/react.py               | 317 ++++++++++++----------------
 tests/adapters/test_baml_adapter.py |  11 +-
 tests/adapters/test_chat_adapter.py |  94 +++++----
 tests/adapters/test_json_adapter.py |  11 +-
 6 files changed, 342 insertions(+), 377 deletions(-)

diff --git a/dspy/adapters/base.py b/dspy/adapters/base.py
index 9ac3146671..4208fc18b6 100644
--- a/dspy/adapters/base.py
+++ b/dspy/adapters/base.py
@@ -453,13 +453,13 @@ def format_demos(self, signature: type[Signature], demos: list[dict[str, Any]])
 
         return messages
 
-    def _get_history_field_name(self, signature: type[Signature]) -> bool:
+    def _get_history_field_name(self, signature: type[Signature]) -> str | None:
         for name, field in signature.input_fields.items():
             if field.annotation == History:
                 return name
         return None
 
-    def _get_tool_call_input_field_name(self, signature: type[Signature]) -> bool:
+    def _get_tool_call_input_field_name(self, signature: type[Signature]) -> str | None:
         for name, field in signature.input_fields.items():
             # Look for annotation `list[dspy.Tool]` or `dspy.Tool`
             origin = get_origin(field.annotation)
@@ -469,7 +469,7 @@ def _get_tool_call_input_field_name(self, signature: type[Signature]) -> bool:
                 return name
         return None
 
-    def _get_tool_call_output_field_name(self, signature: type[Signature]) -> bool:
+    def _get_tool_call_output_field_name(self, signature: type[Signature]) -> str | None:
         for name, field in signature.output_fields.items():
             if field.annotation == ToolCalls:
                 return name
@@ -498,72 +498,75 @@ def format_conversation_history(
         history_field_name: str,
         inputs: dict[str, Any],
     ) -> list[dict[str, Any]]:
-        """Format the conversation history.
+        """Format the conversation history as multiturn messages.
 
-        This method formats the conversation history and the current input as multiturn messages.
         Supports four modes:
-        - signature: Dict keys match signature input/output fields → user/assistant pairs
-        - kv: Nested {"input_fields": {...}, "output_fields": {...}} → user/assistant pairs
-        - dict: Arbitrary serializable kv pairs → all in single user message (default)
-        - raw: Direct LM messages with {"role": "user", "content": "..."} → passed through
-
-        Args:
-            signature: The DSPy signature for which to format the conversation history.
-            history_field_name: The name of the history field in the signature.
-            inputs: The input arguments to the DSPy module.
-
-        Returns:
-            A list of multiturn messages.
+        - raw: Direct LM messages → passed through as-is
+        - demo: {"input_fields": {...}, "output_fields": {...}} → user/assistant pairs
+        - flat: Arbitrary kv pairs → single user message per dict (default)
+        - signature: Dict keys match signature fields → user/assistant pairs
         """
         history = inputs.get(history_field_name)
         if history is None:
             return []
 
-        messages = []
-        for msg in history.messages:
-            mode = history._detect_mode(msg)
-
-            if mode == "raw":
-                messages.append(dict(msg))
-
-            elif mode == "kv":
-                if "input_fields" in msg:
-                    input_dict = {k: self._serialize_kv_value(v) for k, v in msg["input_fields"].items()}
-                    sig = self._make_dynamic_signature_for_inputs(list(input_dict.keys()))
-                    messages.append({
-                        "role": "user",
-                        "content": self.format_user_message_content(sig, input_dict),
-                    })
-                if "output_fields" in msg:
-                    output_dict = {k: self._serialize_kv_value(v) for k, v in msg["output_fields"].items()}
-                    sig = self._make_dynamic_signature_for_outputs(list(output_dict.keys()))
-                    messages.append({
-                        "role": "assistant",
-                        "content": self.format_assistant_message_content(sig, output_dict),
-                    })
-
-            elif mode == "signature":
-                messages.append({
+        del inputs[history_field_name]
+
+        if history.mode == "raw":
+            return [dict(msg) for msg in history.messages]
+        if history.mode == "demo":
+            return self._format_demo_history(history.messages)
+        if history.mode == "signature":
+            return self._format_signature_history(signature, history.messages)
+        return self._format_flat_history(history.messages)
+
+    def _format_demo_history(self, messages: list[dict[str, Any]]) -> list[dict[str, Any]]:
+        """Format demo-mode history (input_fields/output_fields → user/assistant)."""
+        result = []
+        for msg in messages:
+            if "input_fields" in msg:
+                input_dict = {k: self._serialize_kv_value(v) for k, v in msg["input_fields"].items()}
+                sig = self._make_dynamic_signature_for_inputs(list(input_dict.keys()))
+                result.append({
                     "role": "user",
-                    "content": self.format_user_message_content(signature, msg),
+                    "content": self.format_user_message_content(sig, input_dict),
                 })
-                messages.append({
+            if "output_fields" in msg:
+                output_dict = {k: self._serialize_kv_value(v) for k, v in msg["output_fields"].items()}
+                sig = self._make_dynamic_signature_for_outputs(list(output_dict.keys()))
+                result.append({
                     "role": "assistant",
-                    "content": self.format_assistant_message_content(signature, msg),
-                })
-
-            else:  # dict mode (default) - all kv pairs go into single user message
-                serialized = {k: self._serialize_kv_value(v) for k, v in msg.items()}
-                sig = self._make_dynamic_signature_for_inputs(list(serialized.keys()))
-                messages.append({
-                    "role": "user",
-                    "content": self.format_user_message_content(sig, serialized),
+                    "content": self.format_assistant_message_content(sig, output_dict),
                 })
+        return result
 
-        # Remove the history field from the inputs
-        del inputs[history_field_name]
-
-        return messages
+    def _format_signature_history(
+        self, signature: type[Signature], messages: list[dict[str, Any]]
+    ) -> list[dict[str, Any]]:
+        """Format signature-mode history (signature fields → user/assistant pairs)."""
+        result = []
+        for msg in messages:
+            result.append({
+                "role": "user",
+                "content": self.format_user_message_content(signature, msg),
+            })
+            result.append({
+                "role": "assistant",
+                "content": self.format_assistant_message_content(signature, msg),
+            })
+        return result
+
+    def _format_flat_history(self, messages: list[dict[str, Any]]) -> list[dict[str, Any]]:
+        """Format flat-mode history (all kv pairs in single user message)."""
+        result = []
+        for msg in messages:
+            serialized = {k: self._serialize_kv_value(v) for k, v in msg.items()}
+            sig = self._make_dynamic_signature_for_inputs(list(serialized.keys()))
+            result.append({
+                "role": "user",
+                "content": self.format_user_message_content(sig, serialized),
+            })
+        return result
 
     def parse(self, signature: type[Signature], completion: str) -> dict[str, Any]:
         """Parse the LM output into a dictionary of the output fields.
diff --git a/dspy/adapters/types/history.py b/dspy/adapters/types/history.py
index 9659a06c30..e5738ea402 100644
--- a/dspy/adapters/types/history.py
+++ b/dspy/adapters/types/history.py
@@ -4,42 +4,42 @@
 
 
 class History(pydantic.BaseModel):
-    """Class representing the conversation history.
+    """Class representing conversation history.
 
-    History supports four message formats:
-    
-    1. **Signature mode**: Dict keys match signature input/output fields → user/assistant pairs.
-       Must be explicitly set via mode="signature".
+    History supports four message formats, with one mode per History instance:
+
+    1. **Raw mode**: Direct LM messages with `{"role": "...", "content": "..."}`.
+       Used for ReAct trajectories and native tool calling.
        ```python
-       history = dspy.History(messages=[
-           {"question": "What is 2+2?", "answer": "4"},
-       ], mode="signature")
+       history = dspy.History.from_raw([
+           {"role": "user", "content": "Hello"},
+           {"role": "assistant", "content": "Hi there!"},
+       ])
        ```
-    
-    2. **KV mode**: Nested `{"input_fields": {...}, "output_fields": {...}}` → user/assistant pairs.
+
+    2. **Demo mode**: Nested `{"input_fields": {...}, "output_fields": {...}}` pairs.
+       Used for few-shot demonstrations with explicit input/output separation.
        ```python
-       history = dspy.History.from_kv([
-           {"input_fields": {"thought": "...", "tool_name": "search"}, "output_fields": {"observation": "..."}},
+       history = dspy.History.from_demo([
+           {"input_fields": {"question": "2+2?"}, "output_fields": {"answer": "4"}},
        ])
        ```
-    
-    3. **Dict mode** (default): Arbitrary serializable key-value pairs → all in single user message.
+
+    3. **Flat mode** (default): Arbitrary key-value pairs in a single user message.
        ```python
        history = dspy.History(messages=[
-           {"thought": "I need to search", "tool_name": "search", "observation": "Results found"},
+           {"thought": "I need to search", "tool_name": "search", "observation": "Found it"},
        ])
        ```
-    
-    4. **Raw mode**: Direct LM messages with `{"role": "user", "content": "..."}` → passed through.
+
+    4. **Signature mode**: Dict keys match signature fields → user/assistant pairs.
+       Must be explicitly set.
        ```python
-       history = dspy.History.from_raw([
-           {"role": "user", "content": "Hello"},
-           {"role": "assistant", "content": "Hi there!"},
+       history = dspy.History.from_signature([
+           {"question": "What is 2+2?", "answer": "4"},
        ])
        ```
 
-    The mode is auto-detected from the first message if not explicitly provided.
-
     Example:
         ```python
         import dspy
@@ -51,12 +51,9 @@ class MySignature(dspy.Signature):
             history: dspy.History = dspy.InputField()
             answer: str = dspy.OutputField()
 
-        history = dspy.History(
-            messages=[
-                {"question": "What is the capital of France?", "answer": "Paris"},
-                {"question": "What is the capital of Germany?", "answer": "Berlin"},
-            ]
-        )
+        history = dspy.History.from_signature([
+            {"question": "What is the capital of France?", "answer": "Paris"},
+        ])
 
         predict = dspy.Predict(MySignature)
         outputs = predict(question="What is the capital of France?", history=history)
@@ -81,7 +78,7 @@ class MySignature(dspy.Signature):
     """
 
     messages: list[dict[str, Any]]
-    mode: Literal["signature", "kv", "dict", "raw"] | None = None
+    mode: Literal["signature", "demo", "flat", "raw"] = "flat"
 
     model_config = pydantic.ConfigDict(
         frozen=True,
@@ -90,85 +87,85 @@ class MySignature(dspy.Signature):
         extra="forbid",
     )
 
-    def _detect_mode(self, msg: dict) -> str:
-        """Detect the mode for a message based on its structure.
-        
-        Detection rules:
-        - Raw: has "role" and "content" keys, but NOT "input_fields"/"output_fields"
-        - KV: keys are ONLY "input_fields" and/or "output_fields"
-        - Signature: must be explicitly set (requires matching against signature fields)
-        - Dict: everything else (default) - arbitrary kv pairs go into user message
-        """
-        if self.mode:
-            return self.mode
+    @staticmethod
+    def _infer_mode_from_msg(msg: dict) -> str:
+        """Infer the mode from a message's structure.
 
+        Detection rules (conservative):
+        - Raw: has "role" key and ONLY LM-like keys (role, content, tool_calls, tool_call_id, name)
+        - Demo: keys are ONLY "input_fields" and/or "output_fields"
+        - Flat: everything else (signature mode must be explicit)
+        """
         keys = set(msg.keys())
+        lm_keys = {"role", "content", "tool_calls", "tool_call_id", "name"}
 
-        if {"role", "content"} <= keys and not ({"input_fields", "output_fields"} & keys):
+        if "role" in keys and keys <= lm_keys:
             return "raw"
 
         if keys <= {"input_fields", "output_fields"} and keys:
-            return "kv"
+            return "demo"
+
+        return "flat"
+
+    def _validate_msg_for_mode(self, msg: dict, mode: str) -> None:
+        """Validate a message conforms to the expected mode structure."""
+        if mode == "raw":
+            if not isinstance(msg.get("role"), str):
+                raise ValueError(f"Raw mode: 'role' must be a string: {msg}")
+            content = msg.get("content")
+            if content is not None and not isinstance(content, str):
+                raise ValueError(f"Raw mode: 'content' must be a string or None: {msg}")
 
-        return "dict"
+        elif mode == "demo":
+            if "input_fields" in msg and not isinstance(msg["input_fields"], dict):
+                raise ValueError(f"Demo mode: 'input_fields' must be a dict: {msg}")
+            if "output_fields" in msg and not isinstance(msg["output_fields"], dict):
+                raise ValueError(f"Demo mode: 'output_fields' must be a dict: {msg}")
+
+        elif mode == "signature":
+            if not isinstance(msg, dict) or not msg:
+                raise ValueError(f"Signature mode: messages must be non-empty dicts: {msg}")
 
     @pydantic.model_validator(mode="after")
     def _validate_messages(self) -> "History":
+        if not self.messages:
+            return self
+
+        # Only infer if mode is the default "flat" and messages clearly match another mode
+        if self.mode == "flat":
+            inferred = self._infer_mode_from_msg(self.messages[0])
+            if inferred in {"raw", "demo"}:
+                object.__setattr__(self, "mode", inferred)
+
         for msg in self.messages:
-            detected = self._detect_mode(msg)
-
-            if detected == "raw":
-                if not isinstance(msg.get("role"), str):
-                    raise ValueError(f"'role' must be a string: {msg}")
-                # content can be None for tool call messages, or string otherwise
-                content = msg.get("content")
-                if content is not None and not isinstance(content, str):
-                    raise ValueError(f"'content' must be a string or None: {msg}")
-
-            elif detected == "kv":
-                if "input_fields" in msg and not isinstance(msg["input_fields"], dict):
-                    raise ValueError(f"'input_fields' must be a dict: {msg}")
-                if "output_fields" in msg and not isinstance(msg["output_fields"], dict):
-                    raise ValueError(f"'output_fields' must be a dict: {msg}")
+            self._validate_msg_for_mode(msg, self.mode)
 
         return self
 
     def with_messages(self, messages: list[dict[str, Any]]) -> "History":
-        """Return a new History with additional messages appended.
-        
-        Args:
-            messages: List of messages to append.
-            
-        Returns:
-            A new History instance with the messages appended.
-        """
+        """Return a new History with additional messages appended."""
         return History(messages=[*self.messages, *messages], mode=self.mode)
 
     @classmethod
-    def from_kv(cls, messages: list[dict[str, Any]]) -> "History":
-        """Create a History instance with KV mode.
-        
-        KV mode expects messages with "input_fields" and/or "output_fields" keys,
-        each containing a dict of field names to values.
-        
-        Args:
-            messages: List of dicts with "input_fields" and/or "output_fields" keys.
-            
-        Returns:
-            A History instance with mode="kv".
+    def from_demo(cls, messages: list[dict[str, Any]]) -> "History":
+        """Create a History with demo mode.
+
+        Demo mode expects messages with "input_fields" and/or "output_fields" keys.
         """
-        return cls(messages=messages, mode="kv")
+        return cls(messages=messages, mode="demo")
 
     @classmethod
     def from_raw(cls, messages: list[dict[str, Any]]) -> "History":
-        """Create a History instance with raw mode.
-        
+        """Create a History with raw mode.
+
         Raw mode expects direct LM messages with "role" and "content" keys.
-        
-        Args:
-            messages: List of dicts with "role" and "content" keys.
-            
-        Returns:
-            A History instance with mode="raw".
         """
         return cls(messages=messages, mode="raw")
+
+    @classmethod
+    def from_signature(cls, messages: list[dict[str, Any]]) -> "History":
+        """Create a History with signature mode.
+
+        Signature mode expects dicts with keys matching the signature's fields.
+        """
+        return cls(messages=messages, mode="signature")
diff --git a/dspy/predict/react.py b/dspy/predict/react.py
index b7e1d076cd..3e3b1e1f39 100644
--- a/dspy/predict/react.py
+++ b/dspy/predict/react.py
@@ -18,29 +18,29 @@
 
 
 class ReAct(Module):
-    def __init__(self, signature: type["Signature"], tools: list[Callable], max_iters: int = 10):
-        """
-        ReAct stands for "Reasoning and Acting," a popular paradigm for building tool-using agents.
-        In this approach, the language model is iteratively provided with a list of tools and has
-        to reason about the current situation. The model decides whether to call a tool to gather more
-        information or to finish the task based on its reasoning process. The DSPy version of ReAct is
-        generalized to work over any signature, thanks to signature polymorphism.
+    """ReAct (Reasoning and Acting) agent module.
 
-        Args:
-            signature: The signature of the module, which defines the input and output of the react module.
-            tools (list[Callable]): A list of functions, callable objects, or `dspy.Tool` instances.
-            max_iters (Optional[int]): The maximum number of iterations to run. Defaults to 10.
+    ReAct iteratively reasons about the current situation and takes actions using tools.
+    The trajectory is stored as a History in raw LM message format.
 
-        Example:
+    Args:
+        signature: The signature defining input and output fields.
+        tools: List of callable tools the agent can use.
+        max_iters: Maximum reasoning iterations (default: 10).
 
+    Example:
         ```python
         def get_weather(city: str) -> str:
             return f"The weather in {city} is sunny."
 
-        react = dspy.ReAct(signature="question->answer", tools=[get_weather])
+        react = dspy.ReAct("question -> answer", tools=[get_weather])
         pred = react(question="What is the weather in Tokyo?")
+        print(pred.answer)
+        print(pred.trajectory)  # History object with tool call messages
         ```
-        """
+    """
+
+    def __init__(self, signature: type["Signature"], tools: list[Callable], max_iters: int = 10):
         super().__init__()
         self.signature = signature = ensure_signature(signature)
         self.max_iters = max_iters
@@ -52,21 +52,22 @@ def get_weather(city: str) -> str:
         outputs = ", ".join([f"`{k}`" for k in signature.output_fields.keys()])
         instr = [f"{signature.instructions}\n"] if signature.instructions else []
 
-        instr.extend(
-            [
-                f"You are an Agent. In each episode, you will be given the fields {inputs} as input. And you can see your past trajectory so far.",
-                f"Your goal is to use one or more of the supplied tools to collect any necessary information for producing {outputs}.\n",
-                "To do this, you will interleave next_thought, next_tool_name, and next_tool_args in each turn, and also when finishing the task.",
-                "After each tool call, you receive a resulting observation, which gets appended to your trajectory.\n",
-                "When writing next_thought, you may reason about the current situation and plan for future steps.",
-                "When selecting the next_tool_name and its next_tool_args, the tool must be one of:\n",
-            ]
-        )
+        instr.extend([
+            f"You are an Agent. In each episode, you will be given the fields {inputs} as input. "
+            "And you can see your past trajectory so far.",
+            f"Your goal is to use one or more of the supplied tools to collect any necessary information "
+            f"for producing {outputs}.\n",
+            "To do this, you will interleave next_thought, next_tool_name, and next_tool_args in each turn, "
+            "and also when finishing the task.",
+            "After each tool call, you receive a resulting observation, which gets appended to your trajectory.\n",
+            "When writing next_thought, you may reason about the current situation and plan for future steps.",
+            "When selecting the next_tool_name and its next_tool_args, the tool must be one of:\n",
+        ])
 
         tools["finish"] = Tool(
             func=lambda: "Completed.",
             name="finish",
-            desc=f"Marks the task as complete. That is, signals that all information for producing the outputs, i.e. {outputs}, are now available to be extracted.",
+            desc=f"Marks the task as complete. Signals that all information for producing {outputs} is available.",
             args={},
         )
 
@@ -83,224 +84,183 @@ def get_weather(city: str) -> str:
         )
 
         extract_instructions = (
-            "You are an extraction Agent whose job it is to extract the fields: {outputs} from the given trajectory."
-            + "The original task was:\n"
-            + signature.instructions
-            + "\nIn trying to solve this task, an executor agent with has used tools to generate the conversation below."
-            + "\nGiven this trajectory, your only job is to extract the fields: {outputs}."
+            f"You are an extraction agent. Extract the fields: {outputs} from the given trajectory.\n"
+            f"The original task was:\n{signature.instructions}\n"
+            "An executor agent has used tools to generate the conversation below. "
+            f"Given this trajectory, extract the fields: {outputs}."
         )
         fallback_signature = dspy.Signature(
             {**signature.input_fields, **signature.output_fields},
             extract_instructions,
-        ).append("trajectory", dspy.InputField(desc="The history of the conversation. There is enough context to produce the final output"), type_=History)
+        ).append(
+            "trajectory",
+            dspy.InputField(desc="The conversation history with enough context to produce the output"),
+            type_=History,
+        )
 
         self.tools = tools
         self.react = dspy.Predict(react_signature)
         self.extract = dspy.ChainOfThought(fallback_signature)
 
-    def forward(self, **input_args):
+    def forward(self, *, trajectory: History | None = None, **input_args):
         max_iters = input_args.pop("max_iters", self.max_iters)
-
-        # Check for existing history in input_args, otherwise start empty
-        trajectory = input_args.pop("trajectory", None)
-        if trajectory is None:
-            trajectory = History(messages=[], mode="raw")
+        trajectory = trajectory or History.from_raw([])
 
         for _ in range(max_iters):
             try:
-                pred, trajectory = self._call_with_potential_truncation(self.react, trajectory, **input_args)
-            except ValueError as err:
-                logger.warning(f"Ending the trajectory: Agent failed to select a valid tool: {_fmt_exc(err)}")
+                pred, trajectory = self._call_with_retry(self.react, trajectory, **input_args)
+            except (ValueError, ContextWindowExceededError) as err:
+                logger.warning(f"Ending trajectory: {_fmt_exc(err)}")
                 break
 
-            # Add the agent's action to trajectory
-            trajectory, tool_call_id = self._append_action(
-                trajectory,
-                thought=pred.next_thought,
-                tool_name=pred.next_tool_name,
-                tool_args=pred.next_tool_args,
-            )
-
-            # Execute tool and get observation
-            try:
-                observation = self.tools[pred.next_tool_name](**pred.next_tool_args)
-            except Exception as err:
-                observation = f"Execution error in {pred.next_tool_name}: {_fmt_exc(err)}"
-
-            # Add observation to trajectory
-            trajectory = self._append_observation(trajectory, observation, tool_call_id)
+            observation = self._run_tool(pred.next_tool_name, pred.next_tool_args)
+            trajectory = self._record_step(trajectory, pred, observation)
 
             if pred.next_tool_name == "finish":
                 break
 
-        extract, trajectory = self._call_with_potential_truncation(self.extract, trajectory, **input_args)
-
-        # Add the extract step to the trajectory
-        trajectory = self._append_extract(trajectory, extract)
+        extract, trajectory = self._call_with_retry(self.extract, trajectory, **input_args)
+        trajectory = self._record_extract(trajectory, extract)
 
         return dspy.Prediction(trajectory=trajectory, **extract)
 
-    async def aforward(self, **input_args):
+    async def aforward(self, *, trajectory: History | None = None, **input_args):
         max_iters = input_args.pop("max_iters", self.max_iters)
-
-        # Check for existing history in input_args, otherwise start empty
-        trajectory = input_args.pop("trajectory", None)
-        if trajectory is None:
-            trajectory = History(messages=[], mode="raw")
+        trajectory = trajectory or History.from_raw([])
 
         for _ in range(max_iters):
             try:
-                pred, trajectory = await self._async_call_with_potential_truncation(self.react, trajectory, **input_args)
-            except ValueError as err:
-                logger.warning(f"Ending the trajectory: Agent failed to select a valid tool: {_fmt_exc(err)}")
+                pred, trajectory = await self._acall_with_retry(self.react, trajectory, **input_args)
+            except (ValueError, ContextWindowExceededError) as err:
+                logger.warning(f"Ending trajectory: {_fmt_exc(err)}")
                 break
 
-            # Add the agent's action to trajectory
-            trajectory, tool_call_id = self._append_action(
-                trajectory,
-                thought=pred.next_thought,
-                tool_name=pred.next_tool_name,
-                tool_args=pred.next_tool_args,
-            )
-
-            # Execute tool and get observation
-            try:
-                observation = await self.tools[pred.next_tool_name].acall(**pred.next_tool_args)
-            except Exception as err:
-                observation = f"Execution error in {pred.next_tool_name}: {_fmt_exc(err)}"
-
-            # Add observation to trajectory
-            trajectory = self._append_observation(trajectory, observation, tool_call_id)
+            observation = await self._arun_tool(pred.next_tool_name, pred.next_tool_args)
+            trajectory = self._record_step(trajectory, pred, observation)
 
             if pred.next_tool_name == "finish":
                 break
 
-        extract, trajectory = await self._async_call_with_potential_truncation(self.extract, trajectory, **input_args)
-
-        # Add the extract step to the trajectory
-        trajectory = self._append_extract(trajectory, extract)
+        extract, trajectory = await self._acall_with_retry(self.extract, trajectory, **input_args)
+        trajectory = self._record_extract(trajectory, extract)
 
         return dspy.Prediction(trajectory=trajectory, **extract)
 
-    def _generate_tool_call_id(self) -> str:
-        """Generate a unique tool call ID."""
-        return f"call_{uuid.uuid4().hex[:24]}"
-
-    def _append_action(self, trajectory: History, thought: str, tool_name: str, tool_args: dict) -> tuple[History, str]:
-        """Append an action (thought + tool call) to the trajectory.
-        
-        Returns:
-            Tuple of (updated trajectory, tool_call_id for matching with observation)
-        """
-        tool_call_id = self._generate_tool_call_id()
-        new_msg = {
+    # -------------------------------------------------------------------------
+    # Tool execution
+    # -------------------------------------------------------------------------
+
+    def _run_tool(self, name: str, args: dict) -> str:
+        try:
+            result = self.tools[name](**args)
+            return self._serialize(result)
+        except Exception as err:
+            return f"Execution error in {name}: {_fmt_exc(err)}"
+
+    async def _arun_tool(self, name: str, args: dict) -> str:
+        try:
+            result = await self.tools[name].acall(**args)
+            return self._serialize(result)
+        except Exception as err:
+            return f"Execution error in {name}: {_fmt_exc(err)}"
+
+    def _serialize(self, value: Any) -> str:
+        if isinstance(value, str):
+            return value
+        try:
+            return json.dumps(value)
+        except (TypeError, ValueError):
+            return str(value)
+
+    # -------------------------------------------------------------------------
+    # Trajectory recording
+    # -------------------------------------------------------------------------
+
+    def _record_step(self, trajectory: History, pred, observation: str) -> History:
+        """Record a single agent step (action + observation) to the trajectory."""
+        tool_call_id = f"call_{uuid.uuid4().hex[:24]}"
+
+        action_msg = {
             "role": "assistant",
-            "content": thought,
-            "tool_calls": [
-                {
-                    "id": tool_call_id,
-                    "type": "function",
-                    "function": {
-                        "name": tool_name,
-                        "arguments": json.dumps(tool_args),
-                    },
-                }
-            ],
+            "content": pred.next_thought,
+            "tool_calls": [{
+                "id": tool_call_id,
+                "type": "function",
+                "function": {
+                    "name": pred.next_tool_name,
+                    "arguments": json.dumps(pred.next_tool_args),
+                },
+            }],
         }
-        return trajectory.with_messages([new_msg]), tool_call_id
-
-    def _append_observation(self, trajectory: History, observation: Any, tool_call_id: str) -> History:
-        """Append a tool response to the trajectory."""
-        if isinstance(observation, str):
-            content = observation
-        else:
-            try:
-                content = json.dumps(observation)
-            except (TypeError, ValueError):
-                content = str(observation)
 
-        new_msg = {
+        observation_msg = {
             "role": "tool",
             "tool_call_id": tool_call_id,
-            "content": content,
+            "content": observation,
         }
-        return trajectory.with_messages([new_msg])
 
-    def _append_extract(self, trajectory: History, extract) -> History:
-        """Append the extract step (final reasoning and outputs) to the trajectory."""
+        return trajectory.with_messages([action_msg, observation_msg])
+
+    def _record_extract(self, trajectory: History, extract) -> History:
+        """Record the final extraction result to the trajectory."""
         extract_dict = dict(extract)
         reasoning = extract_dict.pop("reasoning", None)
 
-        content_parts = []
+        parts = []
         if reasoning:
-            content_parts.append(f"Reasoning: {reasoning}")
+            parts.append(f"Reasoning: {reasoning}")
         for key, value in extract_dict.items():
-            if isinstance(value, str):
-                content_parts.append(f"{key}: {value}")
-            else:
-                try:
-                    content_parts.append(f"{key}: {json.dumps(value)}")
-                except (TypeError, ValueError):
-                    content_parts.append(f"{key}: {value}")
-
-        new_msg = {
-            "role": "assistant",
-            "content": "\n".join(content_parts),
-        }
-        return trajectory.with_messages([new_msg])
+            parts.append(f"{key}: {self._serialize(value)}")
 
-    def _call_with_potential_truncation(self, module, trajectory: History, **input_args) -> tuple[Any, History]:
-        """Call module with trajectory, truncating if context window exceeded.
-        
-        Returns:
-            Tuple of (module result, potentially truncated trajectory)
-        """
+        return trajectory.with_messages([{"role": "assistant", "content": "\n".join(parts)}])
+
+    # -------------------------------------------------------------------------
+    # LM calls with truncation retry
+    # -------------------------------------------------------------------------
+
+    def _call_with_retry(self, module, trajectory: History, **input_args) -> tuple[Any, History]:
+        last_err = None
         for _ in range(3):
             try:
                 return module(**input_args, trajectory=trajectory), trajectory
-            except ContextWindowExceededError:
-                logger.warning("Trajectory exceeded the context window, truncating the oldest tool call information.")
+            except ContextWindowExceededError as err:
+                last_err = err
+                logger.warning("Context window exceeded, truncating oldest step.")
                 trajectory = self.truncate_trajectory(trajectory)
-        return None, trajectory
 
-    async def _async_call_with_potential_truncation(self, module, trajectory: History, **input_args) -> tuple[Any, History]:
-        """Call module with trajectory, truncating if context window exceeded.
-        
-        Returns:
-            Tuple of (module result, potentially truncated trajectory)
-        """
+        raise ContextWindowExceededError(f"Context window exceeded after 3 truncation attempts: {last_err}")
+
+    async def _acall_with_retry(self, module, trajectory: History, **input_args) -> tuple[Any, History]:
+        last_err = None
         for _ in range(3):
             try:
                 return await module.acall(**input_args, trajectory=trajectory), trajectory
-            except ContextWindowExceededError:
-                logger.warning("Trajectory exceeded the context window, truncating the oldest tool call information.")
+            except ContextWindowExceededError as err:
+                last_err = err
+                logger.warning("Context window exceeded, truncating oldest step.")
                 trajectory = self.truncate_trajectory(trajectory)
-        return None, trajectory
+
+        raise ContextWindowExceededError(f"Context window exceeded after 3 truncation attempts: {last_err}")
 
     def truncate_trajectory(self, trajectory: History) -> History:
-        """Truncates the trajectory so that it fits in the context window.
+        """Remove the oldest tool call pair from the trajectory.
 
-        Users can override this method to implement their own truncation logic.
-        For tool call format, we remove pairs of messages (assistant + tool) together.
+        Override this method to implement custom truncation logic.
         """
-        if len(trajectory.messages) < 2:
-            raise ValueError(
-                "The trajectory is too long so your prompt exceeded the context window, but the trajectory cannot be "
-                "truncated because it only has one tool call."
-            )
-
-        # Remove the oldest pair (assistant message with tool_calls + tool response)
         messages = list(trajectory.messages)
-        if messages and messages[0].get("role") == "assistant" and messages[0].get("tool_calls"):
-            # Remove assistant + following tool message(s)
-            messages = messages[1:]
+
+        if len(messages) < 2:
+            raise ValueError("Trajectory too long but cannot truncate: only one step remains.")
+
+        # Remove assistant + following tool response(s)
+        if messages[0].get("role") == "assistant" and messages[0].get("tool_calls"):
+            messages.pop(0)
             while messages and messages[0].get("role") == "tool":
-                messages = messages[1:]
+                messages.pop(0)
         else:
-            # Fallback: just remove the first message
-            messages = messages[1:]
+            messages.pop(0)
 
-        return History(messages=messages, mode="raw")
+        return History(messages=messages, mode=trajectory.mode)
 
 
 def _fmt_exc(err: BaseException, *, limit: int = 5) -> str:
@@ -310,7 +270,6 @@ def _fmt_exc(err: BaseException, *, limit: int = 5) -> str:
     """
 
     import traceback
-
     return "\n" + "".join(traceback.format_exception(type(err), err, err.__traceback__, limit=limit)).strip()
 
 
diff --git a/tests/adapters/test_baml_adapter.py b/tests/adapters/test_baml_adapter.py
index b8c6027678..ebc900c0fb 100644
--- a/tests/adapters/test_baml_adapter.py
+++ b/tests/adapters/test_baml_adapter.py
@@ -357,13 +357,10 @@ class TestSignature(dspy.Signature):
         question: str = dspy.InputField()
         answer: str = dspy.OutputField()
 
-    history = dspy.History(
-        messages=[
-            {"question": "What is the patient's age?", "answer": "45 years old"},
-            {"question": "Any allergies?", "answer": "Penicillin allergy"},
-        ],
-        mode="signature",
-    )
+    history = dspy.History.from_signature([
+        {"question": "What is the patient's age?", "answer": "45 years old"},
+        {"question": "Any allergies?", "answer": "Penicillin allergy"},
+    ])
 
     adapter = BAMLAdapter()
     messages = adapter.format(TestSignature, [], {"history": history, "question": "What medications should we avoid?"})
diff --git a/tests/adapters/test_chat_adapter.py b/tests/adapters/test_chat_adapter.py
index 4a1f505f33..fe9961ae14 100644
--- a/tests/adapters/test_chat_adapter.py
+++ b/tests/adapters/test_chat_adapter.py
@@ -716,54 +716,61 @@ class MySignature(dspy.Signature):
 class TestHistoryModes:
     """Tests for History mode detection and adapter formatting."""
 
-    def test_history_mode_detection_dict_default(self):
-        """Messages with arbitrary keys are detected as dict mode (default)."""
+    def test_history_mode_detection_flat_default(self):
+        """Messages with arbitrary keys are detected as flat mode (default)."""
         history = dspy.History(messages=[{"question": "...", "answer": "..."}])
-        assert history._detect_mode(history.messages[0]) == "dict"
+        assert history.mode == "flat"
 
-    def test_history_mode_detection_kv(self):
-        """Messages with only input_fields/output_fields keys are detected as kv mode."""
+    def test_history_mode_detection_demo(self):
+        """Messages with only input_fields/output_fields keys are detected as demo mode."""
         history = dspy.History(messages=[{"input_fields": {"a": 1}, "output_fields": {"b": 2}}])
-        assert history._detect_mode(history.messages[0]) == "kv"
+        assert history.mode == "demo"
 
-    def test_history_mode_detection_kv_input_only(self):
-        """Messages with only input_fields are detected as kv mode."""
+    def test_history_mode_detection_demo_input_only(self):
+        """Messages with only input_fields are detected as demo mode."""
         history = dspy.History(messages=[{"input_fields": {"a": 1}}])
-        assert history._detect_mode(history.messages[0]) == "kv"
+        assert history.mode == "demo"
 
     def test_history_mode_detection_raw(self):
         """Messages with role+content are detected as raw mode."""
         history = dspy.History(messages=[{"role": "user", "content": "hello"}])
-        assert history._detect_mode(history.messages[0]) == "raw"
+        assert history.mode == "raw"
+
+    def test_history_mode_detection_raw_with_tool_calls(self):
+        """Raw mode detected for tool_calls messages."""
+        history = dspy.History(messages=[
+            {"role": "assistant", "content": None, "tool_calls": [{"id": "1", "type": "function", "function": {"name": "test", "arguments": "{}"}}]}
+        ])
+        assert history.mode == "raw"
 
-    def test_history_mode_detection_raw_with_extra_keys(self):
-        """Raw mode allows extra keys like 'name'."""
-        history = dspy.History(messages=[{"role": "user", "content": "hello", "name": "tool"}])
-        assert history._detect_mode(history.messages[0]) == "raw"
+    def test_history_mode_detection_flat_with_extra_keys(self):
+        """Messages with role+content AND extra keys fallback to flat mode."""
+        history = dspy.History(messages=[{"role": "user", "content": "hello", "extra": "data"}])
+        assert history.mode == "flat"
 
-    def test_history_mode_detection_dict_with_input_fields_and_extra(self):
-        """Messages with input_fields AND extra keys fallback to dict mode."""
+    def test_history_mode_detection_flat_with_input_fields_and_extra(self):
+        """Messages with input_fields AND extra keys fallback to flat mode."""
         history = dspy.History(messages=[{"question": "...", "input_fields": {"a": 1}}])
-        assert history._detect_mode(history.messages[0]) == "dict"
+        assert history.mode == "flat"
 
     def test_history_explicit_mode_override(self):
         """Explicit mode overrides auto-detection."""
         history = dspy.History(messages=[{"question": "...", "answer": "..."}], mode="signature")
-        assert history._detect_mode(history.messages[0]) == "signature"
+        assert history.mode == "signature"
 
-    def test_history_validation_kv_non_dict_input_fields(self):
-        """KV mode with non-dict input_fields raises ValueError."""
+    def test_history_validation_demo_non_dict_input_fields(self):
+        """Demo mode with non-dict input_fields raises ValueError."""
         with pytest.raises(ValueError, match="'input_fields' must be a dict"):
-            dspy.History(messages=[{"input_fields": "not a dict"}])
+            dspy.History.from_demo([{"input_fields": "not a dict"}])
 
     def test_history_validation_raw_non_string_content(self):
         """Raw mode with non-string content raises ValueError."""
         with pytest.raises(ValueError, match="'content' must be a string or None"):
-            dspy.History(messages=[{"role": "user", "content": 123}])
+            dspy.History.from_raw([{"role": "user", "content": 123}])
 
     def test_history_validation_raw_allows_none_content(self):
         """Raw mode allows None content for tool call messages."""
-        history = dspy.History(messages=[
+        history = dspy.History.from_raw([
             {"role": "assistant", "content": None, "tool_calls": [{"id": "1", "type": "function", "function": {"name": "test", "arguments": "{}"}}]}
         ])
         assert history.messages[0]["content"] is None
@@ -771,26 +778,31 @@ def test_history_validation_raw_allows_none_content(self):
     def test_history_validation_raw_non_string_role(self):
         """Raw mode with non-string role raises ValueError."""
         with pytest.raises(ValueError, match="'role' must be a string"):
-            dspy.History(messages=[{"role": 123, "content": "hello"}])
+            dspy.History.from_raw([{"role": 123, "content": "hello"}])
 
-    def test_history_from_kv_factory(self):
-        """from_kv factory sets mode to kv."""
-        history = dspy.History.from_kv([{"input_fields": {"a": 1}}])
-        assert history.mode == "kv"
+    def test_history_from_demo_factory(self):
+        """from_demo factory sets mode to demo."""
+        history = dspy.History.from_demo([{"input_fields": {"a": 1}}])
+        assert history.mode == "demo"
 
     def test_history_from_raw_factory(self):
         """from_raw factory sets mode to raw."""
         history = dspy.History.from_raw([{"role": "user", "content": "hello"}])
         assert history.mode == "raw"
 
-    def test_adapter_formats_kv_mode_history(self):
-        """Adapter correctly formats kv-mode history."""
+    def test_history_from_signature_factory(self):
+        """from_signature factory sets mode to signature."""
+        history = dspy.History.from_signature([{"question": "...", "answer": "..."}])
+        assert history.mode == "signature"
+
+    def test_adapter_formats_demo_mode_history(self):
+        """Adapter correctly formats demo-mode history."""
         class MySignature(dspy.Signature):
             question: str = dspy.InputField()
             history: dspy.History = dspy.InputField()
             answer: str = dspy.OutputField()
 
-        history = dspy.History.from_kv([
+        history = dspy.History.from_demo([
             {
                 "input_fields": {"thought": "I need to search", "tool_name": "search"},
                 "output_fields": {"observation": "Results found"},
@@ -828,14 +840,14 @@ class MySignature(dspy.Signature):
         assert messages[1] == {"role": "user", "content": "Hello"}
         assert messages[2] == {"role": "assistant", "content": "Hi there!"}
 
-    def test_adapter_kv_mode_serializes_complex_values(self):
-        """KV mode serializes non-primitive values to strings."""
+    def test_adapter_demo_mode_serializes_complex_values(self):
+        """Demo mode serializes non-primitive values to strings."""
         class MySignature(dspy.Signature):
             question: str = dspy.InputField()
             history: dspy.History = dspy.InputField()
             answer: str = dspy.OutputField()
 
-        history = dspy.History.from_kv([
+        history = dspy.History.from_demo([
             {
                 "input_fields": {"args": {"key": "value"}, "number": 42},
                 "output_fields": {"result": ["a", "b", "c"]},
@@ -850,14 +862,14 @@ class MySignature(dspy.Signature):
         assert "number" in messages[1]["content"]
         assert "result" in messages[2]["content"]
 
-    def test_adapter_kv_mode_input_only(self):
-        """KV mode with only input_fields produces only user message."""
+    def test_adapter_demo_mode_input_only(self):
+        """Demo mode with only input_fields produces only user message."""
         class MySignature(dspy.Signature):
             question: str = dspy.InputField()
             history: dspy.History = dspy.InputField()
             answer: str = dspy.OutputField()
 
-        history = dspy.History.from_kv([{"input_fields": {"thought": "Thinking..."}}])
+        history = dspy.History.from_demo([{"input_fields": {"thought": "Thinking..."}}])
 
         adapter = dspy.ChatAdapter()
         messages = adapter.format(MySignature, [], {"question": "test", "history": history})
@@ -867,8 +879,8 @@ class MySignature(dspy.Signature):
         assert messages[1]["role"] == "user"
         assert "thought" in messages[1]["content"]
 
-    def test_adapter_formats_dict_mode_history(self):
-        """Dict mode (default) puts all kv pairs in single user message."""
+    def test_adapter_formats_flat_mode_history(self):
+        """Flat mode (default) puts all kv pairs in single user message."""
         class MySignature(dspy.Signature):
             question: str = dspy.InputField()
             history: dspy.History = dspy.InputField()
@@ -895,9 +907,9 @@ class MySignature(dspy.Signature):
             history: dspy.History = dspy.InputField()
             answer: str = dspy.OutputField()
 
-        history = dspy.History(messages=[
+        history = dspy.History.from_signature([
             {"question": "What is 2+2?", "answer": "4"},
-        ], mode="signature")
+        ])
 
         adapter = dspy.ChatAdapter()
         messages = adapter.format(MySignature, [], {"question": "test", "history": history})
diff --git a/tests/adapters/test_json_adapter.py b/tests/adapters/test_json_adapter.py
index 95abacfe0a..c2a98a3534 100644
--- a/tests/adapters/test_json_adapter.py
+++ b/tests/adapters/test_json_adapter.py
@@ -518,13 +518,10 @@ class MySignature(dspy.Signature):
         history: dspy.History = dspy.InputField()
         answer: str = dspy.OutputField()
 
-    history = dspy.History(
-        messages=[
-            {"question": "What is the capital of France?", "answer": "Paris"},
-            {"question": "What is the capital of Germany?", "answer": "Berlin"},
-        ],
-        mode="signature",
-    )
+    history = dspy.History.from_signature([
+        {"question": "What is the capital of France?", "answer": "Paris"},
+        {"question": "What is the capital of Germany?", "answer": "Berlin"},
+    ])
 
     adapter = dspy.JSONAdapter()
     messages = adapter.format(MySignature, [], {"question": "What is the capital of France?", "history": history})