Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
55 changes: 55 additions & 0 deletions docs/models/openrouter.md
Original file line number Diff line number Diff line change
Expand Up @@ -73,3 +73,58 @@ model = OpenRouterModel('openai/gpt-5')
agent = Agent(model, model_settings=settings)
...
```

## Image Generation
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We should also make this work with builtin_tools=[ImageGenerationTool()] and document it here: https://ai.pydantic.dev/builtin-tools/#image-generation-tool

As with Google, which doesn't expose that as a tool, using that tool or BinaryImage in output_type should automatically enable the modality.


You can use OpenRouter models that support image generation with the `openrouter_modalities` setting:

```python {test="skip"}
from pydantic_ai import Agent, BinaryImage
from pydantic_ai.models.openrouter import OpenRouterModelSettings

agent = Agent(
model='openrouter:google/gemini-2.5-flash-image-preview',
output_type=str | BinaryImage,
model_settings=OpenRouterModelSettings(openrouter_modalities=['image', 'text']),
)

result = agent.run_sync('A cat')
assert isinstance(result.output, BinaryImage)
```

You can further customize image generation using `openrouter_image_config`:

```python
from pydantic_ai.models.openrouter import OpenRouterModelSettings

settings = OpenRouterModelSettings(
openrouter_modalities=['image', 'text'],
openrouter_image_config={'aspect_ratio': '3:2'}
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I want this to be an option on ImageGenerationTool as in #3412.

Copy link
Collaborator

@DouweM DouweM Dec 1, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm OK with it also being a model setting if it supports more keys than ImageGenerationTool does currently (or really: after that PR), but if this is the only field supported so far I'd prefer to leave that and the modality settings off.

If you want, you can finish that PR as we're at it to make your life here easier.

)
```

> Available aspect ratios: `'1:1'`, `'2:3'`, `'3:2'`, `'3:4'`, `'4:3'`, `'4:5'`, `'5:4'`, `'9:16'`, `'16:9'`, `'21:9'`.

Image generation also works with streaming:

```python {test="skip"}
from pydantic_ai import Agent, BinaryImage
from pydantic_ai.models.openrouter import OpenRouterModelSettings

agent = Agent(
model='openrouter:google/gemini-2.5-flash-image-preview',
output_type=str | BinaryImage,
model_settings=OpenRouterModelSettings(
openrouter_modalities=['image', 'text'],
openrouter_image_config={'aspect_ratio': '3:2'},
),
)

response = agent.run_stream_sync('A dog')
for output in response.stream_output():
if isinstance(output, str):
print(output)
elif isinstance(output, BinaryImage):
# Handle the generated image
print(f'Generated image: {output.media_type}')
```
67 changes: 39 additions & 28 deletions pydantic_ai_slim/pydantic_ai/models/openai.py
Original file line number Diff line number Diff line change
Expand Up @@ -598,28 +598,7 @@ def _process_response(self, response: chat.ChatCompletion | str) -> ModelRespons
raise UnexpectedModelBehavior(f'Invalid response from {self.system} chat completions endpoint: {e}') from e

choice = response.choices[0]
items: list[ModelResponsePart] = []

if thinking_parts := self._process_thinking(choice.message):
items.extend(thinking_parts)

if choice.message.content:
items.extend(
(replace(part, id='content', provider_name=self.system) if isinstance(part, ThinkingPart) else part)
for part in split_content_into_text_and_thinking(choice.message.content, self.profile.thinking_tags)
)
if choice.message.tool_calls is not None:
for c in choice.message.tool_calls:
if isinstance(c, ChatCompletionMessageFunctionToolCall):
part = ToolCallPart(c.function.name, c.function.arguments, tool_call_id=c.id)
elif isinstance(c, ChatCompletionMessageCustomToolCall): # pragma: no cover
# NOTE: Custom tool calls are not supported.
# See <https://github.com/pydantic/pydantic-ai/issues/2513> for more details.
raise RuntimeError('Custom tool calls are not supported')
else:
assert_never(c)
part.tool_call_id = _guard_tool_call_id(part)
items.append(part)
items = list(self._process_parts(choice.message))

return ModelResponse(
parts=items,
Expand All @@ -632,25 +611,57 @@ def _process_response(self, response: chat.ChatCompletion | str) -> ModelRespons
finish_reason=self._map_finish_reason(choice.finish_reason),
)

def _process_thinking(self, message: chat.ChatCompletionMessage) -> list[ThinkingPart] | None:
def _process_thinking(self, message: chat.ChatCompletionMessage) -> Iterable[ThinkingPart]:
"""Hook that maps reasoning tokens to thinking parts.

This method may be overridden by subclasses of `OpenAIChatModel` to apply custom mappings.
"""
items: list[ThinkingPart] = []

# The `reasoning_content` field is only present in DeepSeek models.
# https://api-docs.deepseek.com/guides/reasoning_model
if reasoning_content := getattr(message, 'reasoning_content', None):
items.append(ThinkingPart(id='reasoning_content', content=reasoning_content, provider_name=self.system))
yield ThinkingPart(id='reasoning_content', content=reasoning_content, provider_name=self.system)

# The `reasoning` field is only present in gpt-oss via Ollama and OpenRouter.
# - https://cookbook.openai.com/articles/gpt-oss/handle-raw-cot#chat-completions-api
# - https://openrouter.ai/docs/use-cases/reasoning-tokens#basic-usage-with-reasoning-tokens
if reasoning := getattr(message, 'reasoning', None):
items.append(ThinkingPart(id='reasoning', content=reasoning, provider_name=self.system))
yield ThinkingPart(id='reasoning', content=reasoning, provider_name=self.system)

def _process_content(self, message: chat.ChatCompletionMessage) -> Iterable[TextPart | ThinkingPart]:
"""Hook that maps the message content to thinking or text parts.

This method may be overridden by subclasses of `OpenAIChatModel` to apply custom mappings.
"""
if message.content:
for part in split_content_into_text_and_thinking(message.content, self.profile.thinking_tags):
yield replace(part, id='content', provider_name=self.system) if isinstance(part, ThinkingPart) else part

return items
def _process_tool_calls(self, message: chat.ChatCompletionMessage) -> Iterable[ToolCallPart]:
"""Hook that maps tool calls to tool call parts.

This method may be overridden by subclasses of `OpenAIChatModel` to apply custom mappings.
"""
if message.tool_calls is not None:
for c in message.tool_calls:
if isinstance(c, ChatCompletionMessageFunctionToolCall):
part = ToolCallPart(c.function.name, c.function.arguments, tool_call_id=c.id)
elif isinstance(c, ChatCompletionMessageCustomToolCall): # pragma: no cover
# NOTE: Custom tool calls are not supported.
# See <https://github.com/pydantic/pydantic-ai/issues/2513> for more details.
raise RuntimeError('Custom tool calls are not supported')
else:
assert_never(c)
part.tool_call_id = _guard_tool_call_id(part)
yield part

def _process_parts(self, message: chat.ChatCompletionMessage) -> Iterable[ModelResponsePart]:
"""Hook that defines the mappings to transform message contents to response parts.

This method may be overridden by subclasses of `OpenAIChatModel` to apply custom mappings.
"""
return itertools.chain(
self._process_thinking(message), self._process_content(message), self._process_tool_calls(message)
)

async def _process_streamed_response(
self, response: AsyncStream[ChatCompletionChunk], model_request_parameters: ModelRequestParameters
Expand Down
79 changes: 76 additions & 3 deletions pydantic_ai_slim/pydantic_ai/models/openrouter.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from __future__ import annotations as _annotations

import itertools
from collections.abc import Iterable
from dataclasses import dataclass, field
from typing import Annotated, Any, Literal, TypeAlias, cast
Expand All @@ -9,7 +10,10 @@

from ..exceptions import ModelHTTPError
from ..messages import (
BinaryImage,
FilePart,
FinishReason,
ModelResponsePart,
ModelResponseStreamEvent,
ThinkingPart,
)
Expand Down Expand Up @@ -197,6 +201,12 @@ class OpenRouterUsageConfig(TypedDict, total=False):
include: bool


class OpenRouterImageConfig(TypedDict, total=False):
"""Configuration for OpenRouter image generation."""

aspect_ratio: Literal['1:1', '2:3', '3:2', '3:4', '4:3', '4:5', '5:4', '9:16', '16:9', '21:9']


class OpenRouterModelSettings(ModelSettings, total=False):
"""Settings used for an OpenRouter model request."""

Expand Down Expand Up @@ -230,12 +240,24 @@ class OpenRouterModelSettings(ModelSettings, total=False):
The reasoning config object consolidates settings for controlling reasoning strength across different models. [See more](https://openrouter.ai/docs/use-cases/reasoning-tokens)
"""

openrouter_modalities: list[Literal['text', 'image']]
"""To control the modalities of the model.

The modalities config object consolidates settings for controlling the output modalities of the model. [See more](https://openrouter.ai/docs/guides/overview/multimodal/image-generation)
"""

openrouter_usage: OpenRouterUsageConfig
"""To control the usage of the model.

The usage config object consolidates settings for enabling detailed usage information. [See more](https://openrouter.ai/docs/use-cases/usage-accounting)
"""

openrouter_image_config: OpenRouterImageConfig
"""To control the image generation of the model.

The image config object consolidates settings for controlling the image generation of the model. [See more](https://openrouter.ai/docs/guides/overview/multimodal/image-generation#image-aspect-ratio-configuration)
"""


class _OpenRouterError(BaseModel):
"""Utility class to validate error messages from OpenRouter."""
Expand Down Expand Up @@ -366,6 +388,14 @@ class _OpenRouterChatCompletionMessageFunctionToolCall(chat.ChatCompletionMessag
]


class _OpenRouterImageUrl(BaseModel):
url: str


class _OpenRouterImageGeneration(BaseModel):
image_url: _OpenRouterImageUrl


class _OpenRouterCompletionMessage(chat.ChatCompletionMessage):
"""Wrapped chat completion message with OpenRouter specific attributes."""

Expand All @@ -378,6 +408,9 @@ class _OpenRouterCompletionMessage(chat.ChatCompletionMessage):
tool_calls: list[_OpenRouterChatCompletionMessageToolCallUnion] | None = None # type: ignore[reportIncompatibleVariableOverride]
"""The tool calls generated by the model, such as function calls."""

images: list[_OpenRouterImageGeneration] | None = None
"""The images generated by the model, if any."""


class _OpenRouterChoice(chat_completion.Choice):
"""Wraps OpenAI chat completion choice with OpenRouter specific attributes."""
Expand Down Expand Up @@ -488,6 +521,10 @@ def _openrouter_settings_to_openai_settings(model_settings: OpenRouterModelSetti
extra_body['reasoning'] = reasoning
if usage := model_settings.pop('openrouter_usage', None):
extra_body['usage'] = usage
if modalities := model_settings.pop('openrouter_modalities', None):
extra_body['modalities'] = modalities
if image_config := model_settings.pop('openrouter_image_config', None):
extra_body['image_config'] = image_config

model_settings['extra_body'] = extra_body

Expand Down Expand Up @@ -535,13 +572,27 @@ def _validate_completion(self, response: chat.ChatCompletion) -> _OpenRouterChat
return response

@override
def _process_thinking(self, message: chat.ChatCompletionMessage) -> list[ThinkingPart] | None:
def _process_thinking(self, message: chat.ChatCompletionMessage) -> Iterable[ThinkingPart]:
assert isinstance(message, _OpenRouterCompletionMessage)

if reasoning_details := message.reasoning_details:
return [_from_reasoning_detail(detail) for detail in reasoning_details]
for detail in reasoning_details:
yield _from_reasoning_detail(detail)
else:
return super()._process_thinking(message)
yield from super()._process_thinking(message)

def _process_image(self, message: chat.ChatCompletionMessage) -> Iterable[FilePart]:
assert isinstance(message, _OpenRouterCompletionMessage)

if images := message.images:
for image in images:
yield FilePart(
content=BinaryImage.from_data_uri(image.image_url.url),
)

@override
def _process_parts(self, message: chat.ChatCompletionMessage) -> Iterable[ModelResponsePart]:
return itertools.chain(super()._process_parts(message), self._process_image(message))

@override
def _process_provider_details(self, response: chat.ChatCompletion) -> dict[str, Any]:
Expand Down Expand Up @@ -594,6 +645,9 @@ class _OpenRouterChoiceDelta(chat_completion_chunk.ChoiceDelta):
reasoning_details: list[_OpenRouterReasoningDetail] | None = None
"""The reasoning details associated with the message, if any."""

images: list[_OpenRouterImageGeneration] | None = None
"""The images generated by the model, if any."""


class _OpenRouterChunkChoice(chat_completion_chunk.Choice):
"""Wraps OpenAI chat completion chunk choice with OpenRouter specific attributes."""
Expand Down Expand Up @@ -661,6 +715,25 @@ def _map_thinking_delta(self, choice: chat_completion_chunk.Choice) -> Iterable[
else:
return super()._map_thinking_delta(choice)

def _map_file_delta(self, choice: chat_completion_chunk.Choice) -> Iterable[ModelResponseStreamEvent]:
assert isinstance(choice, _OpenRouterChunkChoice)

if images := choice.delta.images:
for image in images:
yield self._parts_manager.handle_part(
vendor_part_id=None,
part=FilePart(
content=BinaryImage.from_data_uri(image.image_url.url),
),
)

@override
def _map_part_delta(self, choice: chat_completion_chunk.Choice) -> Iterable[ModelResponseStreamEvent]:
return itertools.chain(
super()._map_part_delta(choice),
self._map_file_delta(choice),
)

@override
def _map_provider_details(self, chunk: chat.ChatCompletionChunk) -> dict[str, Any] | None:
assert isinstance(chunk, _OpenRouterChatCompletionChunk)
Expand Down

Large diffs are not rendered by default.

Large diffs are not rendered by default.

38 changes: 38 additions & 0 deletions tests/models/test_openrouter.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@

from pydantic_ai import (
Agent,
BinaryImage,
ModelHTTPError,
ModelMessage,
ModelRequest,
Expand Down Expand Up @@ -406,3 +407,40 @@ class FindEducationContentFilters(BaseModel):
}
]
)


async def test_openrouter_image_generation(allow_model_requests: None, openrouter_api_key: str) -> None:
provider = OpenRouterProvider(api_key=openrouter_api_key)
model = OpenRouterModel(
model_name='google/gemini-2.5-flash-image-preview',
provider=provider,
)
settings = OpenRouterModelSettings(openrouter_modalities=['image', 'text'])

agent = Agent(model=model, output_type=str | BinaryImage, model_settings=settings)

result = await agent.run('A cat')

assert result.response.text == snapshot('Here is a cat for you! ')
assert isinstance(result.output, BinaryImage)


async def test_openrouter_image_generation_streaming(allow_model_requests: None, openrouter_api_key: str) -> None:
provider = OpenRouterProvider(api_key=openrouter_api_key)
model = OpenRouterModel(
model_name='google/gemini-2.5-flash-image-preview',
provider=provider,
)
settings = OpenRouterModelSettings(
openrouter_modalities=['image', 'text'], openrouter_image_config={'aspect_ratio': '3:2'}
)

agent = Agent(model=model, output_type=str | BinaryImage, model_settings=settings)

async with agent.run_stream('A dog') as result:
async for output in result.stream_output():
if isinstance(output, str):
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This means we may never actually get to the image assertions!

assert output == snapshot('Here you go: ')
else:
assert isinstance(output, BinaryImage)
assert output.media_type == snapshot('image/png')
Loading