Skip to content
Merged
Show file tree
Hide file tree
Changes from 10 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 15 additions & 0 deletions docs/context-management.md
Original file line number Diff line number Diff line change
Expand Up @@ -101,3 +101,18 @@ Remove oldest 50% of messages.
- About as fast as `/clear`
- `/truncate 100` is equivalent to `/clear`
- **Irreversible** - messages are permanently removed

### OpenAI Responses API Limitation

⚠️ **`/truncate` does not work with OpenAI models** due to the Responses API architecture:

- OpenAI's Responses API stores conversation state server-side
- Manual message deletion via `/truncate` doesn't affect the server-side state
- Instead, OpenAI models use **automatic truncation** (`truncation: "auto"`)
- When context exceeds the limit, the API automatically drops messages from the middle of the conversation

**Workarounds for OpenAI:**

- Use `/clear` to start a fresh conversation
- Use `/compact` to intelligently summarize and reduce context
- Rely on automatic truncation (enabled by default)
83 changes: 79 additions & 4 deletions src/services/aiService.ts
Original file line number Diff line number Diff line change
Expand Up @@ -174,7 +174,10 @@ export class AIService extends EventEmitter {
* constructor, ensuring automatic parity with Vercel AI SDK - any configuration options
* supported by the provider will work without modification.
*/
private createModel(modelString: string): Result<LanguageModel, SendMessageError> {
private createModel(
modelString: string,
options?: { disableAutoTruncation?: boolean }
): Result<LanguageModel, SendMessageError> {
try {
// Parse model string (format: "provider:model-id")
const [providerName, modelId] = modelString.split(":");
Expand Down Expand Up @@ -220,10 +223,81 @@ export class AIService extends EventEmitter {
? (providerConfig.fetch as typeof fetch)
: defaultFetchWithUnlimitedTimeout;

// Wrap fetch to force truncation: "auto" for OpenAI Responses API calls.
// This is a temporary override until @ai-sdk/openai supports passing
// truncation via providerOptions. Safe because it only targets the
// OpenAI Responses endpoint and leaves other providers untouched.
// Can be disabled via options for testing purposes.
const disableAutoTruncation = options?.disableAutoTruncation ?? false;
const fetchWithOpenAITruncation = Object.assign(
async (
input: Parameters<typeof fetch>[0],
init?: Parameters<typeof fetch>[1]
): Promise<Response> => {
try {
const urlString = (() => {
if (typeof input === "string") {
return input;
}
if (input instanceof URL) {
return input.toString();
}
if (typeof input === "object" && input !== null && "url" in input) {
const possibleUrl = (input as { url?: unknown }).url;
if (typeof possibleUrl === "string") {
return possibleUrl;
}
}
return "";
})();

const method = (init?.method ?? "GET").toUpperCase();
const isOpenAIResponses = /\/v1\/responses(\?|$)/.test(urlString);

const body = init?.body;
if (
!disableAutoTruncation &&
isOpenAIResponses &&
method === "POST" &&
typeof body === "string"
) {
// Clone headers to avoid mutating caller-provided objects
const headers = new Headers(init?.headers);
// Remove content-length if present, since body will change
headers.delete("content-length");

try {
const json = JSON.parse(body) as Record<string, unknown>;
// Only set if not already present
if (json.truncation === undefined) {
json.truncation = "auto";
}
const newBody = JSON.stringify(json);
const newInit: RequestInit = { ...init, headers, body: newBody };
return fetchToUse(input, newInit);
} catch {
// If body isn't JSON, fall through to normal fetch
return fetchToUse(input, init);
}
}

// Default passthrough
return fetchToUse(input, init);
} catch {
// On any unexpected error, fall back to original fetch
return fetchToUse(input, init);
}
},
"preconnect" in fetchToUse &&
typeof (fetchToUse as typeof fetch).preconnect === "function"
? { preconnect: (fetchToUse as typeof fetch).preconnect.bind(fetchToUse) }
: {}
);

const provider = createOpenAI({
...providerConfig,
// eslint-disable-next-line @typescript-eslint/no-unsafe-assignment, @typescript-eslint/no-explicit-any
fetch: fetchToUse as any,
fetch: fetchWithOpenAITruncation as any,
});
// Use Responses API for persistence and built-in tools
const baseModel = provider.responses(modelId);
Expand Down Expand Up @@ -267,7 +341,8 @@ export class AIService extends EventEmitter {
toolPolicy?: ToolPolicy,
abortSignal?: AbortSignal,
additionalSystemInstructions?: string,
maxOutputTokens?: number
maxOutputTokens?: number,
disableAutoTruncation?: boolean
): Promise<Result<void, SendMessageError>> {
try {
// DEBUG: Log streamMessage call
Expand All @@ -281,7 +356,7 @@ export class AIService extends EventEmitter {
await this.partialService.commitToHistory(workspaceId);

// Create model instance with early API key validation
const modelResult = this.createModel(modelString);
const modelResult = this.createModel(modelString, { disableAutoTruncation });
if (!modelResult.success) {
return Err(modelResult.error);
}
Expand Down
6 changes: 5 additions & 1 deletion src/services/ipcMain.ts
Original file line number Diff line number Diff line change
Expand Up @@ -435,6 +435,7 @@ export class IpcMain {
toolPolicy,
additionalSystemInstructions,
maxOutputTokens,
disableAutoTruncation,
} = options ?? {};
log.debug("sendMessage handler: Received", {
workspaceId,
Expand All @@ -445,6 +446,7 @@ export class IpcMain {
toolPolicy,
additionalSystemInstructions,
maxOutputTokens,
disableAutoTruncation,
});
try {
// Early exit: empty message = either interrupt (if streaming) or invalid input
Expand Down Expand Up @@ -539,6 +541,7 @@ export class IpcMain {
toolPolicy,
additionalSystemInstructions,
maxOutputTokens,
disableAutoTruncation,
});
const streamResult = await this.aiService.streamMessage(
historyResult.data,
Expand All @@ -548,7 +551,8 @@ export class IpcMain {
toolPolicy,
undefined,
additionalSystemInstructions,
maxOutputTokens
maxOutputTokens,
disableAutoTruncation
);
log.debug("sendMessage handler: Stream completed");
return streamResult;
Expand Down
1 change: 1 addition & 0 deletions src/types/ipc.ts
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,7 @@ export interface SendMessageOptions {
toolPolicy?: ToolPolicy;
additionalSystemInstructions?: string;
maxOutputTokens?: number;
disableAutoTruncation?: boolean; // For testing truncation behavior
}

// API method signatures (shared between main and preload)
Expand Down
20 changes: 19 additions & 1 deletion src/utils/ai/providerOptions.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,12 +11,29 @@ import { ANTHROPIC_THINKING_BUDGETS, OPENAI_REASONING_EFFORT } from "@/types/thi
import { log } from "@/services/log";
import type { CmuxMessage } from "@/types/message";

/**
* Extended OpenAI Responses provider options to include truncation
*
* NOTE: The SDK types don't yet include this parameter, but it's supported by the OpenAI API.
* However, the @ai-sdk/openai v2.0.40 implementation does NOT pass truncation from provider
* options - it only sets it based on modelConfig.requiredAutoTruncation.
*
* This type extension is prepared for a future SDK update that will properly map the
* truncation parameter from provider options to the API request.
*
* Current behavior: OpenAI models will NOT use truncation: "auto" until the SDK is updated.
* Workaround: Use /clear or /compact commands to manage conversation history.
*/
type ExtendedOpenAIResponsesProviderOptions = OpenAIResponsesProviderOptions & {
truncation?: "auto" | "disabled";
};

/**
* Provider-specific options structure for AI SDK
*/
type ProviderOptions =
| { anthropic: AnthropicProviderOptions }
| { openai: OpenAIResponsesProviderOptions }
| { openai: ExtendedOpenAIResponsesProviderOptions }
| Record<string, never>; // Empty object for unsupported providers

/**
Expand Down Expand Up @@ -111,6 +128,7 @@ export function buildProviderOptions(
parallelToolCalls: true, // Always enable concurrent tool execution
// TODO: allow this to be configured
serviceTier: "priority", // Always use priority tier for best performance
truncation: "auto", // Automatically truncate conversation to fit context window
// Conditionally add reasoning configuration
...(reasoningEffort && {
reasoningEffort,
Expand Down
104 changes: 103 additions & 1 deletion tests/ipcMain/sendMessage.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -698,12 +698,14 @@ describeIntegration("IpcMain sendMessage integration tests", () => {

// Now try to send a new message - should trigger token limit error
// due to accumulated history
// Disable auto-truncation to force context error
const result = await sendMessageWithModel(
env.mockIpcRenderer,
workspaceId,
"What is the weather?",
provider,
model
model,
{ disableAutoTruncation: true }
);

// IPC call itself should succeed (errors come through stream events)
Expand Down Expand Up @@ -956,4 +958,104 @@ describeIntegration("IpcMain sendMessage integration tests", () => {
15000
);
});

// OpenAI auto truncation integration test
// This test verifies that the truncation: "auto" parameter works correctly
// by first forcing a context overflow error, then verifying recovery with auto-truncation
describeIntegration("OpenAI auto truncation integration", () => {
const provider = "openai";
const model = "gpt-4o-mini";

test.concurrent(
"respects disableAutoTruncation flag",
async () => {
const { env, workspaceId, cleanup } = await setupWorkspace(provider);

try {
// Phase 1: Build up large conversation history to exceed context limit
// HACK: Use HistoryService directly to populate history without API calls.
// This is a test-only shortcut. Real application code should NEVER bypass IPC.
const historyService = new HistoryService(env.config);

// gpt-4o-mini has ~128k token context window
// Create ~50k chars per message (~12.5k tokens)
const messageSize = 50_000;
const largeText = "A".repeat(messageSize);

// Need ~150k tokens to exceed 128k context limit
// 12 messages × 12.5k tokens = 150k tokens
const messageCount = 12;

// Build conversation history with alternating user/assistant messages
for (let i = 0; i < messageCount; i++) {
const isUser = i % 2 === 0;
const role = isUser ? "user" : "assistant";
const message = createCmuxMessage(`history-msg-${i}`, role, largeText, {});

const result = await historyService.appendToHistory(workspaceId, message);
expect(result.success).toBe(true);
}

// Now send a new message with auto-truncation disabled - should trigger error
const result = await sendMessageWithModel(
env.mockIpcRenderer,
workspaceId,
"This should trigger a context error",
provider,
model,
{ disableAutoTruncation: true }
);

// IPC call itself should succeed (errors come through stream events)
expect(result.success).toBe(true);

// Wait for either stream-end or stream-error
const collector = createEventCollector(env.sentEvents, workspaceId);
await Promise.race([
collector.waitForEvent("stream-end", 10000),
collector.waitForEvent("stream-error", 10000),
]);

// Should have received error event with context exceeded error
expect(collector.hasError()).toBe(true);

// Check that error message contains context-related keywords
const errorEvents = collector
.getEvents()
.filter((e) => "type" in e && e.type === "stream-error");
expect(errorEvents.length).toBeGreaterThan(0);

const errorEvent = errorEvents[0];
if (errorEvent && "error" in errorEvent) {
const errorStr = String(errorEvent.error).toLowerCase();
expect(
errorStr.includes("context") ||
errorStr.includes("length") ||
errorStr.includes("exceed") ||
errorStr.includes("token")
).toBe(true);
}

// Phase 2: Send message with auto-truncation enabled (should succeed)
env.sentEvents.length = 0;
const successResult = await sendMessageWithModel(
env.mockIpcRenderer,
workspaceId,
"This should succeed with auto-truncation",
provider,
model
// disableAutoTruncation defaults to false (auto-truncation enabled)
);

expect(successResult.success).toBe(true);
const successCollector = createEventCollector(env.sentEvents, workspaceId);
await successCollector.waitForEvent("stream-end", 30000);
assertStreamSuccess(successCollector);
} finally {
await cleanup();
}
},
60000 // 1 minute timeout (much faster since we don't make many API calls)
);
});
});
Loading