Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
115 changes: 83 additions & 32 deletions dspy/adapters/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from dspy.adapters.types.reasoning import Reasoning
from dspy.adapters.types.tool import Tool, ToolCalls
from dspy.experimental import Citations
from dspy.signatures.field import InputField, OutputField
from dspy.signatures.signature import Signature
from dspy.utils.callback import BaseCallback, with_callbacks

Expand Down Expand Up @@ -452,13 +453,13 @@ def format_demos(self, signature: type[Signature], demos: list[dict[str, Any]])

return messages

def _get_history_field_name(self, signature: type[Signature]) -> bool:
def _get_history_field_name(self, signature: type[Signature]) -> str | None:
for name, field in signature.input_fields.items():
if field.annotation == History:
return name
return None

def _get_tool_call_input_field_name(self, signature: type[Signature]) -> bool:
def _get_tool_call_input_field_name(self, signature: type[Signature]) -> str | None:
for name, field in signature.input_fields.items():
# Look for annotation `list[dspy.Tool]` or `dspy.Tool`
origin = get_origin(field.annotation)
Expand All @@ -468,54 +469,104 @@ def _get_tool_call_input_field_name(self, signature: type[Signature]) -> bool:
return name
return None

def _get_tool_call_output_field_name(self, signature: type[Signature]) -> bool:
def _get_tool_call_output_field_name(self, signature: type[Signature]) -> str | None:
for name, field in signature.output_fields.items():
if field.annotation == ToolCalls:
return name
return None

def _serialize_kv_value(self, v: Any) -> Any:
"""Safely serialize values for kv-mode formatting."""
if isinstance(v, (str, int, float, bool)) or v is None:
return v
try:
return str(v)
except Exception:
return f"<unserializable {type(v).__name__}>"

def _make_dynamic_signature_for_inputs(self, keys: list[str]) -> type[Signature]:
"""Create a dynamic signature with input fields only (no instructions)."""
return Signature({k: InputField() for k in keys}, instructions="")

def _make_dynamic_signature_for_outputs(self, keys: list[str]) -> type[Signature]:
"""Create a dynamic signature with output fields only (no instructions)."""
return Signature({k: OutputField() for k in keys}, instructions="")

def format_conversation_history(
self,
signature: type[Signature],
history_field_name: str,
inputs: dict[str, Any],
) -> list[dict[str, Any]]:
"""Format the conversation history.

This method formats the conversation history and the current input as multiturn messages.

Args:
signature: The DSPy signature for which to format the conversation history.
history_field_name: The name of the history field in the signature.
inputs: The input arguments to the DSPy module.
"""Format the conversation history as multiturn messages.

Returns:
A list of multiturn messages.
Supports four modes:
- raw: Direct LM messages → passed through as-is
- demo: {"input_fields": {...}, "output_fields": {...}} → user/assistant pairs
- flat: Arbitrary kv pairs → single user message per dict (default)
- signature: Dict keys match signature fields → user/assistant pairs
"""
conversation_history = inputs[history_field_name].messages if history_field_name in inputs else None

if conversation_history is None:
history = inputs.get(history_field_name)
if history is None:
return []

messages = []
for message in conversation_history:
messages.append(
{
del inputs[history_field_name]

if history.mode == "raw":
return [dict(msg) for msg in history.messages]
if history.mode == "demo":
return self._format_demo_history(history.messages)
if history.mode == "signature":
return self._format_signature_history(signature, history.messages)
return self._format_flat_history(history.messages)

def _format_demo_history(self, messages: list[dict[str, Any]]) -> list[dict[str, Any]]:
"""Format demo-mode history (input_fields/output_fields → user/assistant)."""
result = []
for msg in messages:
if "input_fields" in msg:
input_dict = {k: self._serialize_kv_value(v) for k, v in msg["input_fields"].items()}
sig = self._make_dynamic_signature_for_inputs(list(input_dict.keys()))
result.append({
"role": "user",
"content": self.format_user_message_content(signature, message),
}
)
messages.append(
{
"content": self.format_user_message_content(sig, input_dict),
})
if "output_fields" in msg:
output_dict = {k: self._serialize_kv_value(v) for k, v in msg["output_fields"].items()}
sig = self._make_dynamic_signature_for_outputs(list(output_dict.keys()))
result.append({
"role": "assistant",
"content": self.format_assistant_message_content(signature, message),
}
)

# Remove the history field from the inputs
del inputs[history_field_name]
"content": self.format_assistant_message_content(sig, output_dict),
})
return result

return messages
def _format_signature_history(
self, signature: type[Signature], messages: list[dict[str, Any]]
) -> list[dict[str, Any]]:
"""Format signature-mode history (signature fields → user/assistant pairs)."""
result = []
for msg in messages:
result.append({
"role": "user",
"content": self.format_user_message_content(signature, msg),
})
result.append({
"role": "assistant",
"content": self.format_assistant_message_content(signature, msg),
})
return result

def _format_flat_history(self, messages: list[dict[str, Any]]) -> list[dict[str, Any]]:
"""Format flat-mode history (all kv pairs in single user message)."""
result = []
for msg in messages:
serialized = {k: self._serialize_kv_value(v) for k, v in msg.items()}
sig = self._make_dynamic_signature_for_inputs(list(serialized.keys()))
result.append({
"role": "user",
"content": self.format_user_message_content(sig, serialized),
})
return result

def parse(self, signature: type[Signature], completion: str) -> dict[str, Any]:
"""Parse the LM output into a dictionary of the output fields.
Expand Down
147 changes: 125 additions & 22 deletions dspy/adapters/types/history.py
Original file line number Diff line number Diff line change
@@ -1,25 +1,47 @@
from typing import Any
from typing import Any, Literal

import pydantic


class History(pydantic.BaseModel):
"""Class representing the conversation history.

The conversation history is a list of messages, each message entity should have keys from the associated signature.
For example, if you have the following signature:

```
class MySignature(dspy.Signature):
question: str = dspy.InputField()
history: dspy.History = dspy.InputField()
answer: str = dspy.OutputField()
```

Then the history should be a list of dictionaries with keys "question" and "answer".
"""Class representing conversation history.

History supports four message formats, with one mode per History instance:

1. **Raw mode**: Direct LM messages with `{"role": "...", "content": "..."}`.
Used for ReAct trajectories and native tool calling.
```python
history = dspy.History.from_raw([
{"role": "user", "content": "Hello"},
{"role": "assistant", "content": "Hi there!"},
])
```

2. **Demo mode**: Nested `{"input_fields": {...}, "output_fields": {...}}` pairs.
Used for few-shot demonstrations with explicit input/output separation.
```python
history = dspy.History.from_demo([
{"input_fields": {"question": "2+2?"}, "output_fields": {"answer": "4"}},
])
```

3. **Flat mode** (default): Arbitrary key-value pairs in a single user message.
```python
history = dspy.History(messages=[
{"thought": "I need to search", "tool_name": "search", "observation": "Found it"},
])
```

4. **Signature mode**: Dict keys match signature fields → user/assistant pairs.
Must be explicitly set.
```python
history = dspy.History.from_signature([
{"question": "What is 2+2?", "answer": "4"},
])
```

Example:
```
```python
import dspy

dspy.configure(lm=dspy.LM("openai/gpt-4o-mini"))
Expand All @@ -29,19 +51,16 @@ class MySignature(dspy.Signature):
history: dspy.History = dspy.InputField()
answer: str = dspy.OutputField()

history = dspy.History(
messages=[
{"question": "What is the capital of France?", "answer": "Paris"},
{"question": "What is the capital of Germany?", "answer": "Berlin"},
]
)
history = dspy.History.from_signature([
{"question": "What is the capital of France?", "answer": "Paris"},
])

predict = dspy.Predict(MySignature)
outputs = predict(question="What is the capital of France?", history=history)
```

Example of capturing the conversation history:
```
```python
import dspy

dspy.configure(lm=dspy.LM("openai/gpt-4o-mini"))
Expand All @@ -59,10 +78,94 @@ class MySignature(dspy.Signature):
"""

messages: list[dict[str, Any]]
mode: Literal["signature", "demo", "flat", "raw"] = "flat"

model_config = pydantic.ConfigDict(
frozen=True,
str_strip_whitespace=True,
validate_assignment=True,
extra="forbid",
)

@staticmethod
def _infer_mode_from_msg(msg: dict) -> str:
"""Infer the mode from a message's structure.

Detection rules (conservative):
- Raw: has "role" key and ONLY LM-like keys (role, content, tool_calls, tool_call_id, name)
- Demo: keys are ONLY "input_fields" and/or "output_fields"
- Flat: everything else (signature mode must be explicit)
"""
keys = set(msg.keys())
lm_keys = {"role", "content", "tool_calls", "tool_call_id", "name"}

if "role" in keys and keys <= lm_keys:
return "raw"

if keys <= {"input_fields", "output_fields"} and keys:
return "demo"

return "flat"

def _validate_msg_for_mode(self, msg: dict, mode: str) -> None:
"""Validate a message conforms to the expected mode structure."""
if mode == "raw":
if not isinstance(msg.get("role"), str):
raise ValueError(f"Raw mode: 'role' must be a string: {msg}")
content = msg.get("content")
if content is not None and not isinstance(content, str):
raise ValueError(f"Raw mode: 'content' must be a string or None: {msg}")

elif mode == "demo":
if "input_fields" in msg and not isinstance(msg["input_fields"], dict):
raise ValueError(f"Demo mode: 'input_fields' must be a dict: {msg}")
if "output_fields" in msg and not isinstance(msg["output_fields"], dict):
raise ValueError(f"Demo mode: 'output_fields' must be a dict: {msg}")

elif mode == "signature":
if not isinstance(msg, dict) or not msg:
raise ValueError(f"Signature mode: messages must be non-empty dicts: {msg}")

@pydantic.model_validator(mode="after")
def _validate_messages(self) -> "History":
if not self.messages:
return self

# Only infer if mode is the default "flat" and messages clearly match another mode
if self.mode == "flat":
inferred = self._infer_mode_from_msg(self.messages[0])
if inferred in {"raw", "demo"}:
object.__setattr__(self, "mode", inferred)
Comment on lines +134 to +138

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P1 Badge Preserve signature histories instead of flattening

The validator only upgrades the default mode from flat to raw or demo, never to signature, so a typical signature-style history like {"question": ..., "answer": ...} remains in flat mode and _format_flat_history emits a single user message. Multi-turn prompts that previously produced user/assistant pairs (e.g., the default History(messages=[{"question": "...", "answer": "..."}]) used in tests/predict/test_predict.py) now collapse the assistant reply into the next user prompt, dropping prior assistant turns from the LM context. Consider keeping signature as the default or inferring signature mode when both input and output keys are present.

Useful? React with 👍 / 👎.


for msg in self.messages:
self._validate_msg_for_mode(msg, self.mode)

return self

def with_messages(self, messages: list[dict[str, Any]]) -> "History":
"""Return a new History with additional messages appended."""
return History(messages=[*self.messages, *messages], mode=self.mode)

@classmethod
def from_demo(cls, messages: list[dict[str, Any]]) -> "History":
"""Create a History with demo mode.

Demo mode expects messages with "input_fields" and/or "output_fields" keys.
"""
return cls(messages=messages, mode="demo")

@classmethod
def from_raw(cls, messages: list[dict[str, Any]]) -> "History":
"""Create a History with raw mode.

Raw mode expects direct LM messages with "role" and "content" keys.
"""
return cls(messages=messages, mode="raw")

@classmethod
def from_signature(cls, messages: list[dict[str, Any]]) -> "History":
"""Create a History with signature mode.

Signature mode expects dicts with keys matching the signature's fields.
"""
return cls(messages=messages, mode="signature")
Loading
Loading