pydantic · ajac-zero · Nov 30, 2025 · Nov 30, 2025 · Nov 30, 2025 · Nov 30, 2025
diff --git a/docs/models/openrouter.md b/docs/models/openrouter.md
@@ -73,3 +73,58 @@ model = OpenRouterModel('openai/gpt-5')
 agent = Agent(model, model_settings=settings)
 ...
 ```
+
+## Image Generation
+
+You can use OpenRouter models that support image generation with the `openrouter_modalities` setting:
+
+```python {test="skip"}
+from pydantic_ai import Agent, BinaryImage
+from pydantic_ai.models.openrouter import OpenRouterModelSettings
+
+agent = Agent(
+    model='openrouter:google/gemini-2.5-flash-image-preview',
+    output_type=str | BinaryImage,
+    model_settings=OpenRouterModelSettings(openrouter_modalities=['image', 'text']),
+)
+
+result = agent.run_sync('A cat')
+assert isinstance(result.output, BinaryImage)
+```
+
+You can further customize image generation using `openrouter_image_config`:
+
+```python
+from pydantic_ai.models.openrouter import OpenRouterModelSettings
+
+settings = OpenRouterModelSettings(
+    openrouter_modalities=['image', 'text'],
+    openrouter_image_config={'aspect_ratio': '3:2'}
+)
+```
+
+> Available aspect ratios: `'1:1'`, `'2:3'`, `'3:2'`, `'3:4'`, `'4:3'`, `'4:5'`, `'5:4'`, `'9:16'`, `'16:9'`, `'21:9'`.
+
+Image generation also works with streaming:
+
+```python {test="skip"}
+from pydantic_ai import Agent, BinaryImage
+from pydantic_ai.models.openrouter import OpenRouterModelSettings
+
+agent = Agent(
+    model='openrouter:google/gemini-2.5-flash-image-preview',
+    output_type=str | BinaryImage,
+    model_settings=OpenRouterModelSettings(
+        openrouter_modalities=['image', 'text'],
+        openrouter_image_config={'aspect_ratio': '3:2'},
+    ),
+)
+
+response = agent.run_stream_sync('A dog')
+for output in response.stream_output():
+    if isinstance(output, str):
+        print(output)
+    elif isinstance(output, BinaryImage):
+        # Handle the generated image
+        print(f'Generated image: {output.media_type}')
+```
diff --git a/pydantic_ai_slim/pydantic_ai/models/openai.py b/pydantic_ai_slim/pydantic_ai/models/openai.py
@@ -598,28 +598,7 @@ def _process_response(self, response: chat.ChatCompletion | str) -> ModelRespons
             raise UnexpectedModelBehavior(f'Invalid response from {self.system} chat completions endpoint: {e}') from e
 
         choice = response.choices[0]
-        items: list[ModelResponsePart] = []
-
-        if thinking_parts := self._process_thinking(choice.message):
-            items.extend(thinking_parts)
-
-        if choice.message.content:
-            items.extend(
-                (replace(part, id='content', provider_name=self.system) if isinstance(part, ThinkingPart) else part)
-                for part in split_content_into_text_and_thinking(choice.message.content, self.profile.thinking_tags)
-            )
-        if choice.message.tool_calls is not None:
-            for c in choice.message.tool_calls:
-                if isinstance(c, ChatCompletionMessageFunctionToolCall):
-                    part = ToolCallPart(c.function.name, c.function.arguments, tool_call_id=c.id)
-                elif isinstance(c, ChatCompletionMessageCustomToolCall):  # pragma: no cover
-                    # NOTE: Custom tool calls are not supported.
-                    # See <https://github.com/pydantic/pydantic-ai/issues/2513> for more details.
-                    raise RuntimeError('Custom tool calls are not supported')
-                else:
-                    assert_never(c)
-                part.tool_call_id = _guard_tool_call_id(part)
-                items.append(part)
+        items = list(self._process_parts(choice.message))
 
         return ModelResponse(
             parts=items,
@@ -632,25 +611,57 @@ def _process_response(self, response: chat.ChatCompletion | str) -> ModelRespons
             finish_reason=self._map_finish_reason(choice.finish_reason),
         )
 
-    def _process_thinking(self, message: chat.ChatCompletionMessage) -> list[ThinkingPart] | None:
+    def _process_thinking(self, message: chat.ChatCompletionMessage) -> Iterable[ThinkingPart]:
         """Hook that maps reasoning tokens to thinking parts.
 
         This method may be overridden by subclasses of `OpenAIChatModel` to apply custom mappings.
         """
-        items: list[ThinkingPart] = []
-
         # The `reasoning_content` field is only present in DeepSeek models.
         # https://api-docs.deepseek.com/guides/reasoning_model
         if reasoning_content := getattr(message, 'reasoning_content', None):
-            items.append(ThinkingPart(id='reasoning_content', content=reasoning_content, provider_name=self.system))
+            yield ThinkingPart(id='reasoning_content', content=reasoning_content, provider_name=self.system)
 
         # The `reasoning` field is only present in gpt-oss via Ollama and OpenRouter.
         # - https://cookbook.openai.com/articles/gpt-oss/handle-raw-cot#chat-completions-api
         # - https://openrouter.ai/docs/use-cases/reasoning-tokens#basic-usage-with-reasoning-tokens
         if reasoning := getattr(message, 'reasoning', None):
-            items.append(ThinkingPart(id='reasoning', content=reasoning, provider_name=self.system))
+            yield ThinkingPart(id='reasoning', content=reasoning, provider_name=self.system)
+
+    def _process_content(self, message: chat.ChatCompletionMessage) -> Iterable[TextPart | ThinkingPart]:
+        """Hook that maps the message content to thinking or text parts.
+
+        This method may be overridden by subclasses of `OpenAIChatModel` to apply custom mappings.
+        """
+        if message.content:
+            for part in split_content_into_text_and_thinking(message.content, self.profile.thinking_tags):
+                yield replace(part, id='content', provider_name=self.system) if isinstance(part, ThinkingPart) else part
 
-        return items
+    def _process_tool_calls(self, message: chat.ChatCompletionMessage) -> Iterable[ToolCallPart]:
+        """Hook that maps tool calls to tool call parts.
+
+        This method may be overridden by subclasses of `OpenAIChatModel` to apply custom mappings.
+        """
+        if message.tool_calls is not None:
+            for c in message.tool_calls:
+                if isinstance(c, ChatCompletionMessageFunctionToolCall):
+                    part = ToolCallPart(c.function.name, c.function.arguments, tool_call_id=c.id)
+                elif isinstance(c, ChatCompletionMessageCustomToolCall):  # pragma: no cover
+                    # NOTE: Custom tool calls are not supported.
+                    # See <https://github.com/pydantic/pydantic-ai/issues/2513> for more details.
+                    raise RuntimeError('Custom tool calls are not supported')
+                else:
+                    assert_never(c)
+                part.tool_call_id = _guard_tool_call_id(part)
+                yield part
+
+    def _process_parts(self, message: chat.ChatCompletionMessage) -> Iterable[ModelResponsePart]:
+        """Hook that defines the mappings to transform message contents to response parts.
+
+        This method may be overridden by subclasses of `OpenAIChatModel` to apply custom mappings.
+        """
+        return itertools.chain(
+            self._process_thinking(message), self._process_content(message), self._process_tool_calls(message)
+        )
 
     async def _process_streamed_response(
         self, response: AsyncStream[ChatCompletionChunk], model_request_parameters: ModelRequestParameters

diff --git a/pydantic_ai_slim/pydantic_ai/models/openrouter.py b/pydantic_ai_slim/pydantic_ai/models/openrouter.py
@@ -1,5 +1,6 @@
 from __future__ import annotations as _annotations
 
+import itertools
 from collections.abc import Iterable
 from dataclasses import dataclass, field
 from typing import Annotated, Any, Literal, TypeAlias, cast
@@ -9,7 +10,10 @@
 
 from ..exceptions import ModelHTTPError
 from ..messages import (
+    BinaryImage,
+    FilePart,
     FinishReason,
+    ModelResponsePart,
     ModelResponseStreamEvent,
     ThinkingPart,
 )
@@ -197,6 +201,12 @@ class OpenRouterUsageConfig(TypedDict, total=False):
     include: bool
 
 
+class OpenRouterImageConfig(TypedDict, total=False):
+    """Configuration for OpenRouter image generation."""
+
+    aspect_ratio: Literal['1:1', '2:3', '3:2', '3:4', '4:3', '4:5', '5:4', '9:16', '16:9', '21:9']
+
+
 class OpenRouterModelSettings(ModelSettings, total=False):
     """Settings used for an OpenRouter model request."""
 
@@ -230,12 +240,24 @@ class OpenRouterModelSettings(ModelSettings, total=False):
     The reasoning config object consolidates settings for controlling reasoning strength across different models. [See more](https://openrouter.ai/docs/use-cases/reasoning-tokens)
     """
 
+    openrouter_modalities: list[Literal['text', 'image']]
+    """To control the modalities of the model.
+
+    The modalities config object consolidates settings for controlling the output modalities of the model. [See more](https://openrouter.ai/docs/guides/overview/multimodal/image-generation)
+    """
+
     openrouter_usage: OpenRouterUsageConfig
     """To control the usage of the model.
 
     The usage config object consolidates settings for enabling detailed usage information. [See more](https://openrouter.ai/docs/use-cases/usage-accounting)
     """
 
+    openrouter_image_config: OpenRouterImageConfig
+    """To control the image generation of the model.
+
+    The image config object consolidates settings for controlling the image generation of the model. [See more](https://openrouter.ai/docs/guides/overview/multimodal/image-generation#image-aspect-ratio-configuration)
+    """
+
 
 class _OpenRouterError(BaseModel):
     """Utility class to validate error messages from OpenRouter."""
@@ -366,6 +388,14 @@ class _OpenRouterChatCompletionMessageFunctionToolCall(chat.ChatCompletionMessag
 ]
 
 
+class _OpenRouterImageUrl(BaseModel):
+    url: str
+
+
+class _OpenRouterImageGeneration(BaseModel):
+    image_url: _OpenRouterImageUrl
+
+
 class _OpenRouterCompletionMessage(chat.ChatCompletionMessage):
     """Wrapped chat completion message with OpenRouter specific attributes."""
 
@@ -378,6 +408,9 @@ class _OpenRouterCompletionMessage(chat.ChatCompletionMessage):
     tool_calls: list[_OpenRouterChatCompletionMessageToolCallUnion] | None = None  # type: ignore[reportIncompatibleVariableOverride]
     """The tool calls generated by the model, such as function calls."""
 
+    images: list[_OpenRouterImageGeneration] | None = None
+    """The images generated by the model, if any."""
+
 
 class _OpenRouterChoice(chat_completion.Choice):
     """Wraps OpenAI chat completion choice with OpenRouter specific attributes."""
@@ -488,6 +521,10 @@ def _openrouter_settings_to_openai_settings(model_settings: OpenRouterModelSetti
         extra_body['reasoning'] = reasoning
     if usage := model_settings.pop('openrouter_usage', None):
         extra_body['usage'] = usage
+    if modalities := model_settings.pop('openrouter_modalities', None):
+        extra_body['modalities'] = modalities
+    if image_config := model_settings.pop('openrouter_image_config', None):
+        extra_body['image_config'] = image_config
 
     model_settings['extra_body'] = extra_body
 
@@ -535,13 +572,27 @@ def _validate_completion(self, response: chat.ChatCompletion) -> _OpenRouterChat
         return response
 
     @override
-    def _process_thinking(self, message: chat.ChatCompletionMessage) -> list[ThinkingPart] | None:
+    def _process_thinking(self, message: chat.ChatCompletionMessage) -> Iterable[ThinkingPart]:
         assert isinstance(message, _OpenRouterCompletionMessage)
 
         if reasoning_details := message.reasoning_details:
-            return [_from_reasoning_detail(detail) for detail in reasoning_details]
+            for detail in reasoning_details:
+                yield _from_reasoning_detail(detail)
         else:
-            return super()._process_thinking(message)
+            yield from super()._process_thinking(message)
+
+    def _process_image(self, message: chat.ChatCompletionMessage) -> Iterable[FilePart]:
+        assert isinstance(message, _OpenRouterCompletionMessage)
+
+        if images := message.images:
+            for image in images:
+                yield FilePart(
+                    content=BinaryImage.from_data_uri(image.image_url.url),
+                )
+
+    @override
+    def _process_parts(self, message: chat.ChatCompletionMessage) -> Iterable[ModelResponsePart]:
+        return itertools.chain(super()._process_parts(message), self._process_image(message))
 
     @override
     def _process_provider_details(self, response: chat.ChatCompletion) -> dict[str, Any]:
@@ -594,6 +645,9 @@ class _OpenRouterChoiceDelta(chat_completion_chunk.ChoiceDelta):
     reasoning_details: list[_OpenRouterReasoningDetail] | None = None
     """The reasoning details associated with the message, if any."""
 
+    images: list[_OpenRouterImageGeneration] | None = None
+    """The images generated by the model, if any."""
+
 
 class _OpenRouterChunkChoice(chat_completion_chunk.Choice):
     """Wraps OpenAI chat completion chunk choice with OpenRouter specific attributes."""
@@ -661,6 +715,25 @@ def _map_thinking_delta(self, choice: chat_completion_chunk.Choice) -> Iterable[
         else:
             return super()._map_thinking_delta(choice)
 
+    def _map_file_delta(self, choice: chat_completion_chunk.Choice) -> Iterable[ModelResponseStreamEvent]:
+        assert isinstance(choice, _OpenRouterChunkChoice)
+
+        if images := choice.delta.images:
+            for image in images:
+                yield self._parts_manager.handle_part(
+                    vendor_part_id=None,
+                    part=FilePart(
+                        content=BinaryImage.from_data_uri(image.image_url.url),
+                    ),
+                )
+
+    @override
+    def _map_part_delta(self, choice: chat_completion_chunk.Choice) -> Iterable[ModelResponseStreamEvent]:
+        return itertools.chain(
+            super()._map_part_delta(choice),
+            self._map_file_delta(choice),
+        )
+
     @override
     def _map_provider_details(self, chunk: chat.ChatCompletionChunk) -> dict[str, Any] | None:
         assert isinstance(chunk, _OpenRouterChatCompletionChunk)

diff --git a/tests/models/cassettes/test_openrouter/test_openrouter_image_generation.yaml b/tests/models/cassettes/test_openrouter/test_openrouter_image_generation.yaml
diff --git a/tests/models/cassettes/test_openrouter/test_openrouter_image_generation_streaming.yaml b/tests/models/cassettes/test_openrouter/test_openrouter_image_generation_streaming.yaml
diff --git a/tests/models/test_openrouter.py b/tests/models/test_openrouter.py
@@ -7,6 +7,7 @@
 
 from pydantic_ai import (
     Agent,
+    BinaryImage,
     ModelHTTPError,
     ModelMessage,
     ModelRequest,
@@ -406,3 +407,40 @@ class FindEducationContentFilters(BaseModel):
             }
         ]
     )
+
+
+async def test_openrouter_image_generation(allow_model_requests: None, openrouter_api_key: str) -> None:
+    provider = OpenRouterProvider(api_key=openrouter_api_key)
+    model = OpenRouterModel(
+        model_name='google/gemini-2.5-flash-image-preview',
+        provider=provider,
+    )
+    settings = OpenRouterModelSettings(openrouter_modalities=['image', 'text'])
+
+    agent = Agent(model=model, output_type=str | BinaryImage, model_settings=settings)
+
+    result = await agent.run('A cat')
+
+    assert result.response.text == snapshot('Here is a cat for you! ')
+    assert isinstance(result.output, BinaryImage)
+
+
+async def test_openrouter_image_generation_streaming(allow_model_requests: None, openrouter_api_key: str) -> None:
+    provider = OpenRouterProvider(api_key=openrouter_api_key)
+    model = OpenRouterModel(
+        model_name='google/gemini-2.5-flash-image-preview',
+        provider=provider,
+    )
+    settings = OpenRouterModelSettings(
+        openrouter_modalities=['image', 'text'], openrouter_image_config={'aspect_ratio': '3:2'}
+    )
+
+    agent = Agent(model=model, output_type=str | BinaryImage, model_settings=settings)
+
+    async with agent.run_stream('A dog') as result:
+        async for output in result.stream_output():
+            if isinstance(output, str):
+                assert output == snapshot('Here you go: ')
+            else:
+                assert isinstance(output, BinaryImage)
+                assert output.media_type == snapshot('image/png')