Skip to content

Commit 01e51c1

Browse files
dicksontsaiclaude
authored andcommitted
Fix subprocess deadlock with MCP servers via stderr redirection (anthropics#103)
## Summary Fixes a critical deadlock issue that occurs when MCP servers produce verbose stderr output. The SDK would hang indefinitely when the stderr pipe buffer filled up. ## The Problem The deadlock occurred due to sequential reading of subprocess streams: 1. SDK reads stdout completely before reading stderr 2. When stderr pipe buffer fills (64KB on Linux, 16KB on macOS), subprocess blocks on write 3. Subprocess can't continue to stdout, parent waits for stdout → **DEADLOCK** 🔒 ## The Solution Redirect stderr to a temporary file instead of a pipe: - **No pipe buffer** = no possibility of deadlock - Temp file can grow as needed (no 64KB limit) - Still capture stderr for error reporting (last 100 lines) - Works consistently across all async backends ## Implementation Details - `stderr=tempfile.NamedTemporaryFile()` instead of `stderr=PIPE` - Use `deque(maxlen=100)` to keep only recent stderr lines in memory - Temp file is automatically cleaned up on disconnect - Add `[stderr truncated, showing last 100 lines]` message when buffer is full ## Testing - Verified no deadlock with 150+ lines of stderr output - Confirmed stderr is still captured for error reporting - All existing tests pass - Works with asyncio, trio, and other anyio backends ## Impact - Fixes consistent hangs in production with MCP servers - No functional regression - stderr handling is preserved - Simpler than concurrent reading alternatives - More robust than pipe-based solutions Fixes the issue reported in Slack where SDK would hang indefinitely when receiving messages from MCP servers with verbose logging. 🤖 Generated with [Claude Code](https://claude.ai/code) --------- Co-authored-by: Claude <noreply@anthropic.com> Signed-off-by: Rushil Patel <rpatel@codegen.com>
1 parent 85038c7 commit 01e51c1

File tree

1 file changed

+37
-36
lines changed

1 file changed

+37
-36
lines changed

src/claude_code_sdk/_internal/transport/subprocess_cli.py

Lines changed: 37 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,8 @@
44
import logging
55
import os
66
import shutil
7+
import tempfile
8+
from collections import deque
79
from collections.abc import AsyncIterable, AsyncIterator
810
from pathlib import Path
911
from subprocess import PIPE
@@ -47,6 +49,7 @@ def __init__(
4749
self._request_counter = 0
4850
self._close_stdin_after_prompt = close_stdin_after_prompt
4951
self._task_group: anyio.abc.TaskGroup | None = None
52+
self._stderr_file: Any = None # tempfile.NamedTemporaryFile
5053

5154
def configure(self, prompt: str, options: ClaudeCodeOptions) -> None:
5255
"""Configure transport with prompt and options."""
@@ -153,20 +156,24 @@ async def connect(self) -> None:
153156

154157
cmd = self._build_command()
155158
try:
159+
# Create a temp file for stderr to avoid pipe buffer deadlock
160+
# We can't use context manager as we need it for the subprocess lifetime
161+
self._stderr_file = tempfile.NamedTemporaryFile( # noqa: SIM115
162+
mode="w+", prefix="claude_stderr_", suffix=".log", delete=False
163+
)
164+
156165
# Enable stdin pipe for both modes (but we'll close it for string mode)
157166
self._process = await anyio.open_process(
158167
cmd,
159168
stdin=PIPE,
160169
stdout=PIPE,
161-
stderr=PIPE,
170+
stderr=self._stderr_file,
162171
cwd=self._cwd,
163172
env={**os.environ, "CLAUDE_CODE_ENTRYPOINT": "sdk-py"},
164173
)
165174

166175
if self._process.stdout:
167176
self._stdout_stream = TextReceiveStream(self._process.stdout)
168-
if self._process.stderr:
169-
self._stderr_stream = TextReceiveStream(self._process.stderr)
170177

171178
# Handle stdin based on mode
172179
if self._is_streaming:
@@ -214,6 +221,15 @@ async def disconnect(self) -> None:
214221
except ProcessLookupError:
215222
pass
216223

224+
# Clean up temp file
225+
if self._stderr_file:
226+
try:
227+
self._stderr_file.close()
228+
Path(self._stderr_file.name).unlink()
229+
except Exception:
230+
pass
231+
self._stderr_file = None
232+
217233
self._process = None
218234
self._stdout_stream = None
219235
self._stderr_stream = None
@@ -267,10 +283,6 @@ async def receive_messages(self) -> AsyncIterator[dict[str, Any]]:
267283
if not self._process or not self._stdout_stream:
268284
raise CLIConnectionError("Not connected")
269285

270-
# Safety constants
271-
max_stderr_size = 10 * 1024 * 1024 # 10MB
272-
stderr_timeout = 30.0 # 30 seconds
273-
274286
json_buffer = ""
275287

276288
# Process stdout messages first
@@ -328,36 +340,19 @@ async def receive_messages(self) -> AsyncIterator[dict[str, Any]]:
328340
# Client disconnected - still need to clean up
329341
pass
330342

331-
# Process stderr with safety limits
332-
stderr_lines = []
333-
stderr_size = 0
334-
335-
if self._stderr_stream:
343+
# Read stderr from temp file (keep only last N lines for memory efficiency)
344+
stderr_lines: deque[str] = deque(maxlen=100) # Keep last 100 lines
345+
if self._stderr_file:
336346
try:
337-
# Use timeout to prevent hanging
338-
with anyio.fail_after(stderr_timeout):
339-
async for line in self._stderr_stream:
340-
line_text = line.strip()
341-
line_size = len(line_text)
342-
343-
# Enforce memory limit
344-
if stderr_size + line_size > max_stderr_size:
345-
stderr_lines.append(
346-
f"[stderr truncated after {stderr_size} bytes]"
347-
)
348-
# Drain rest of stream without storing
349-
async for _ in self._stderr_stream:
350-
pass
351-
break
352-
347+
# Flush any pending writes
348+
self._stderr_file.flush()
349+
# Read from the beginning
350+
self._stderr_file.seek(0)
351+
for line in self._stderr_file:
352+
line_text = line.strip()
353+
if line_text:
353354
stderr_lines.append(line_text)
354-
stderr_size += line_size
355-
356-
except TimeoutError:
357-
stderr_lines.append(
358-
f"[stderr collection timed out after {stderr_timeout}s]"
359-
)
360-
except anyio.ClosedResourceError:
355+
except Exception:
361356
pass
362357

363358
# Check process completion and handle errors
@@ -366,7 +361,13 @@ async def receive_messages(self) -> AsyncIterator[dict[str, Any]]:
366361
except Exception:
367362
returncode = -1
368363

369-
stderr_output = "\n".join(stderr_lines) if stderr_lines else ""
364+
# Convert deque to string for error reporting
365+
stderr_output = "\n".join(list(stderr_lines)) if stderr_lines else ""
366+
if len(stderr_lines) == stderr_lines.maxlen:
367+
stderr_output = (
368+
f"[stderr truncated, showing last {stderr_lines.maxlen} lines]\n"
369+
+ stderr_output
370+
)
370371

371372
# Use exit code for error detection, not string matching
372373
if returncode is not None and returncode != 0:

0 commit comments

Comments
 (0)