Skip to content

Commit 7d668fe

Browse files
🤖 feat: force compaction when approaching context limit (#778)
## Summary Automatically triggers compaction when live token usage approaches the context window limit during streaming. This prevents the AI from hitting context limit errors mid-response. ## Changes ### Force Compaction Logic - Extended `checkAutoCompaction()` to return `shouldForceCompact` flag - Uses `liveUsage` (real-time streaming tokens) with fallback to `lastUsage` - Triggers when remaining tokens ≤ 5200 (2× expected compaction output) - Works even with empty `usageHistory` (first message streaming) ### AIView Integration - New `useEffect` monitors `shouldForceCompact` during active streams - Tracks triggered stream ID to prevent duplicate compactions - Sends compaction request with "Continue with current task" as follow-up ### Queue & Restore Fixes - **Moved restore-to-input** from `stream-abort` handler to IPC `interruptStream` handler - User interrupts (Ctrl+C) still restore queued messages to input - Internal aborts (compaction flow) preserve queue for follow-up ### Shared Constants - `DEFAULT_COMPACTION_WORD_TARGET = 2000` - `WORDS_TO_TOKENS_RATIO = 1.3` - `FORCE_COMPACTION_TOKEN_BUFFER = 5200` (derived: 2 × 2000 × 1.3) ## Testing - 31 unit tests for `checkAutoCompaction` including force compaction scenarios - Manually tested by asking the agent to read a large file in 22 chunks whilst summarising each chunk. The process completed successfully even with a force-compaction triggering mid-task. --- _Generated with `mux`_
1 parent 36eb40e commit 7d668fe

File tree

7 files changed

+232
-60
lines changed

7 files changed

+232
-60
lines changed

src/browser/components/AIView.tsx

Lines changed: 62 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@ import { evictModelFromLRU } from "@/browser/hooks/useModelLRU";
3737
import { QueuedMessage } from "./Messages/QueuedMessage";
3838
import { CompactionWarning } from "./CompactionWarning";
3939
import { checkAutoCompaction } from "@/browser/utils/compaction/autoCompactionCheck";
40+
import { executeCompaction } from "@/browser/utils/chatCommands";
4041
import { useProviderOptions } from "@/browser/hooks/useProviderOptions";
4142
import { useAutoCompactionSettings } from "../hooks/useAutoCompactionSettings";
4243
import { useSendMessageOptions } from "@/browser/hooks/useSendMessageOptions";
@@ -120,6 +121,67 @@ const AIViewInner: React.FC<AIViewProps> = ({
120121
undefined
121122
);
122123

124+
// Use send options for auto-compaction check
125+
const pendingSendOptions = useSendMessageOptions(workspaceId);
126+
127+
// Track if we've already triggered force compaction for this stream
128+
const forceCompactionTriggeredRef = useRef<string | null>(null);
129+
130+
// Extract state from workspace state
131+
const { messages, canInterrupt, isCompacting, loading, currentModel } = workspaceState;
132+
133+
// Get active stream message ID for token counting
134+
const activeStreamMessageId = aggregator.getActiveStreamMessageId();
135+
136+
// Use pending send model for auto-compaction check, not the last stream's model.
137+
// This ensures the threshold is based on the model the user will actually send with,
138+
// preventing context-length errors when switching from a large-context to smaller model.
139+
const pendingModel = pendingSendOptions.model;
140+
141+
const autoCompactionResult = checkAutoCompaction(
142+
workspaceUsage,
143+
pendingModel,
144+
use1M,
145+
autoCompactionEnabled,
146+
autoCompactionThreshold / 100
147+
);
148+
149+
// Show warning when: shouldShowWarning flag is true AND not currently compacting
150+
const shouldShowCompactionWarning = !isCompacting && autoCompactionResult.shouldShowWarning;
151+
152+
// Force compaction when live usage shows we're about to hit context limit
153+
useEffect(() => {
154+
if (
155+
!autoCompactionResult.shouldForceCompact ||
156+
!canInterrupt ||
157+
isCompacting ||
158+
forceCompactionTriggeredRef.current === activeStreamMessageId
159+
) {
160+
return;
161+
}
162+
163+
forceCompactionTriggeredRef.current = activeStreamMessageId ?? null;
164+
void executeCompaction({
165+
workspaceId,
166+
sendMessageOptions: pendingSendOptions,
167+
continueMessage: { text: "Continue with the current task" },
168+
});
169+
}, [
170+
autoCompactionResult.shouldForceCompact,
171+
canInterrupt,
172+
isCompacting,
173+
activeStreamMessageId,
174+
workspaceId,
175+
pendingSendOptions,
176+
]);
177+
178+
// Reset force compaction trigger when stream ends
179+
useEffect(() => {
180+
if (!canInterrupt) {
181+
forceCompactionTriggeredRef.current = null;
182+
}
183+
}, [canInterrupt]);
184+
123185
// Auto-retry state - minimal setter for keybinds and message sent handler
124186
// RetryBarrier manages its own state, but we need this for interrupt keybind
125187
const [, setAutoRetry] = usePersistedState<boolean>(
@@ -144,9 +206,6 @@ const AIViewInner: React.FC<AIViewProps> = ({
144206
markUserInteraction,
145207
} = useAutoScroll();
146208

147-
// Use send options for auto-compaction check
148-
const pendingSendOptions = useSendMessageOptions(workspaceId);
149-
150209
// ChatInput API for focus management
151210
const chatInputAPI = useRef<ChatInputAPI | null>(null);
152211
const handleChatInputReady = useCallback((api: ChatInputAPI) => {
@@ -329,28 +388,6 @@ const AIViewInner: React.FC<AIViewProps> = ({
329388
);
330389
}
331390

332-
// Extract state from workspace state
333-
const { messages, canInterrupt, isCompacting, loading, currentModel } = workspaceState;
334-
335-
// Get active stream message ID for token counting
336-
const activeStreamMessageId = aggregator.getActiveStreamMessageId();
337-
338-
// Use pending send model for auto-compaction check, not the last stream's model.
339-
// This ensures the threshold is based on the model the user will actually send with,
340-
// preventing context-length errors when switching from a large-context to smaller model.
341-
const pendingModel = pendingSendOptions.model;
342-
343-
const autoCompactionResult = checkAutoCompaction(
344-
workspaceUsage,
345-
pendingModel,
346-
use1M,
347-
autoCompactionEnabled,
348-
autoCompactionThreshold / 100
349-
);
350-
351-
// Show warning when: shouldShowWarning flag is true AND not currently compacting
352-
const shouldShowCompactionWarning = !isCompacting && autoCompactionResult.shouldShowWarning;
353-
354391
// Note: We intentionally do NOT reset autoRetry when streams start.
355392
// If user pressed the interrupt key, autoRetry stays false until they manually retry.
356393
// This makes state transitions explicit and predictable.

src/browser/utils/chatCommands.ts

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ import { resolveCompactionModel } from "@/browser/utils/messages/compactionModel
2323
import type { ImageAttachment } from "../components/ImageAttachments";
2424
import { dispatchWorkspaceSwitch } from "./workspaceEvents";
2525
import { getRuntimeKey, copyWorkspaceStorage } from "@/common/constants/storage";
26+
import { DEFAULT_COMPACTION_WORD_TARGET, WORDS_TO_TOKENS_RATIO } from "@/common/constants/ui";
2627

2728
// ============================================================================
2829
// Workspace Creation
@@ -572,7 +573,9 @@ export function prepareCompactionMessage(options: CompactionOptions): {
572573
metadata: MuxFrontendMetadata;
573574
sendOptions: SendMessageOptions;
574575
} {
575-
const targetWords = options.maxOutputTokens ? Math.round(options.maxOutputTokens / 1.3) : 2000;
576+
const targetWords = options.maxOutputTokens
577+
? Math.round(options.maxOutputTokens / WORDS_TO_TOKENS_RATIO)
578+
: DEFAULT_COMPACTION_WORD_TARGET;
576579

577580
// Build compaction message with optional continue context
578581
let messageText = `Summarize this conversation into a compact form for a new Assistant to continue helping the user. Focus entirely on the summary of what has happened. Do not suggest next steps or future actions. Use approximately ${targetWords} words.`;

src/browser/utils/compaction/autoCompactionCheck.test.ts

Lines changed: 77 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ import { checkAutoCompaction } from "./autoCompactionCheck";
33
import type { WorkspaceUsageState } from "@/browser/stores/WorkspaceStore";
44
import type { ChatUsageDisplay } from "@/common/utils/tokens/usageAggregator";
55
import { KNOWN_MODELS } from "@/common/constants/knownModels";
6+
import { FORCE_COMPACTION_TOKEN_BUFFER } from "@/common/constants/ui";
67

78
// Helper to create a mock usage entry
89
const createUsageEntry = (
@@ -28,7 +29,8 @@ const createUsageEntry = (
2829
const createMockUsage = (
2930
lastEntryTokens: number,
3031
historicalTokens?: number,
31-
model: string = KNOWN_MODELS.SONNET.id
32+
model: string = KNOWN_MODELS.SONNET.id,
33+
liveUsage?: ChatUsageDisplay
3234
): WorkspaceUsageState => {
3335
const usageHistory: ChatUsageDisplay[] = [];
3436

@@ -40,7 +42,7 @@ const createMockUsage = (
4042
// Add recent usage
4143
usageHistory.push(createUsageEntry(lastEntryTokens, model));
4244

43-
return { usageHistory, totalTokens: 0 };
45+
return { usageHistory, totalTokens: 0, liveUsage };
4446
};
4547

4648
describe("checkAutoCompaction", () => {
@@ -297,4 +299,77 @@ describe("checkAutoCompaction", () => {
297299
expect(result.shouldShowWarning).toBe(true); // Above 60%
298300
});
299301
});
302+
303+
describe("Force Compaction (Live Usage)", () => {
304+
const SONNET_MAX_TOKENS = 200_000;
305+
const BUFFER = FORCE_COMPACTION_TOKEN_BUFFER;
306+
307+
test("shouldForceCompact is false when no liveUsage (falls back to lastUsage with room)", () => {
308+
const usage = createMockUsage(100_000); // 100k remaining - plenty of room
309+
const result = checkAutoCompaction(usage, KNOWN_MODELS.SONNET.id, false, true);
310+
311+
expect(result.shouldForceCompact).toBe(false);
312+
});
313+
314+
test("shouldForceCompact is false when currentUsage has plenty of room", () => {
315+
const liveUsage = createUsageEntry(100_000); // 100k remaining
316+
const usage = createMockUsage(50_000, undefined, KNOWN_MODELS.SONNET.id, liveUsage);
317+
const result = checkAutoCompaction(usage, KNOWN_MODELS.SONNET.id, false, true);
318+
319+
expect(result.shouldForceCompact).toBe(false);
320+
});
321+
322+
test("shouldForceCompact is true when remaining <= buffer", () => {
323+
// Exactly at buffer threshold
324+
const liveUsage = createUsageEntry(SONNET_MAX_TOKENS - BUFFER);
325+
const usage = createMockUsage(50_000, undefined, KNOWN_MODELS.SONNET.id, liveUsage);
326+
const result = checkAutoCompaction(usage, KNOWN_MODELS.SONNET.id, false, true);
327+
328+
expect(result.shouldForceCompact).toBe(true);
329+
});
330+
331+
test("shouldForceCompact is true when over context limit", () => {
332+
const liveUsage = createUsageEntry(SONNET_MAX_TOKENS + 5000);
333+
const usage = createMockUsage(50_000, undefined, KNOWN_MODELS.SONNET.id, liveUsage);
334+
const result = checkAutoCompaction(usage, KNOWN_MODELS.SONNET.id, false, true);
335+
336+
expect(result.shouldForceCompact).toBe(true);
337+
});
338+
339+
test("shouldForceCompact is false when just above buffer", () => {
340+
// 1 token above buffer threshold
341+
const liveUsage = createUsageEntry(SONNET_MAX_TOKENS - BUFFER - 1);
342+
const usage = createMockUsage(50_000, undefined, KNOWN_MODELS.SONNET.id, liveUsage);
343+
const result = checkAutoCompaction(usage, KNOWN_MODELS.SONNET.id, false, true);
344+
345+
expect(result.shouldForceCompact).toBe(false);
346+
});
347+
348+
test("shouldForceCompact respects 1M context mode", () => {
349+
// With 1M context, exactly at buffer threshold
350+
const liveUsage = createUsageEntry(1_000_000 - BUFFER);
351+
const usage = createMockUsage(50_000, undefined, KNOWN_MODELS.SONNET.id, liveUsage);
352+
const result = checkAutoCompaction(usage, KNOWN_MODELS.SONNET.id, true, true);
353+
354+
expect(result.shouldForceCompact).toBe(true);
355+
});
356+
357+
test("shouldForceCompact triggers with empty history but liveUsage near limit", () => {
358+
// Bug fix: empty history but liveUsage should still trigger
359+
const liveUsage = createUsageEntry(SONNET_MAX_TOKENS - BUFFER);
360+
const usage: WorkspaceUsageState = { usageHistory: [], totalTokens: 0, liveUsage };
361+
const result = checkAutoCompaction(usage, KNOWN_MODELS.SONNET.id, false, true);
362+
363+
expect(result.shouldForceCompact).toBe(true);
364+
expect(result.usagePercentage).toBe(0); // No lastUsage for percentage
365+
});
366+
367+
test("shouldForceCompact is false when auto-compaction disabled", () => {
368+
const liveUsage = createUsageEntry(199_000); // Very close to limit
369+
const usage = createMockUsage(50_000, undefined, KNOWN_MODELS.SONNET.id, liveUsage);
370+
const result = checkAutoCompaction(usage, KNOWN_MODELS.SONNET.id, false, false); // disabled
371+
372+
expect(result.shouldForceCompact).toBe(false);
373+
});
374+
});
300375
});

src/browser/utils/compaction/autoCompactionCheck.ts

Lines changed: 43 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -16,12 +16,29 @@
1616
*/
1717

1818
import type { WorkspaceUsageState } from "@/browser/stores/WorkspaceStore";
19+
import type { ChatUsageDisplay } from "@/common/utils/tokens/usageAggregator";
1920
import { getModelStats } from "@/common/utils/tokens/modelStats";
2021
import { supports1MContext } from "@/common/utils/ai/models";
21-
import { DEFAULT_AUTO_COMPACTION_THRESHOLD } from "@/common/constants/ui";
22+
import {
23+
DEFAULT_AUTO_COMPACTION_THRESHOLD,
24+
FORCE_COMPACTION_TOKEN_BUFFER,
25+
} from "@/common/constants/ui";
26+
27+
/** Sum all token components from a ChatUsageDisplay */
28+
function getTotalTokens(usage: ChatUsageDisplay): number {
29+
return (
30+
usage.input.tokens +
31+
usage.cached.tokens +
32+
usage.cacheCreate.tokens +
33+
usage.output.tokens +
34+
usage.reasoning.tokens
35+
);
36+
}
2237

2338
export interface AutoCompactionCheckResult {
2439
shouldShowWarning: boolean;
40+
/** True when live usage shows ≤FORCE_COMPACTION_TOKEN_BUFFER remaining in context */
41+
shouldForceCompact: boolean;
2542
usagePercentage: number;
2643
thresholdPercentage: number;
2744
}
@@ -54,11 +71,11 @@ export function checkAutoCompaction(
5471
): AutoCompactionCheckResult {
5572
const thresholdPercentage = threshold * 100;
5673

57-
// Short-circuit if auto-compaction is disabled
58-
// Or if no usage data yet
59-
if (!enabled || !model || !usage || usage.usageHistory.length === 0) {
74+
// Short-circuit if auto-compaction is disabled or missing required data
75+
if (!enabled || !model || !usage) {
6076
return {
6177
shouldShowWarning: false,
78+
shouldForceCompact: false,
6279
usagePercentage: 0,
6380
thresholdPercentage,
6481
};
@@ -67,31 +84,44 @@ export function checkAutoCompaction(
6784
// Determine max tokens for this model
6885
const modelStats = getModelStats(model);
6986
const maxTokens = use1M && supports1MContext(model) ? 1_000_000 : modelStats?.max_input_tokens;
70-
const lastUsage = usage.usageHistory[usage.usageHistory.length - 1];
7187

7288
// No max tokens known - safe default (can't calculate percentage)
7389
if (!maxTokens) {
7490
return {
7591
shouldShowWarning: false,
92+
shouldForceCompact: false,
7693
usagePercentage: 0,
7794
thresholdPercentage,
7895
};
7996
}
8097

81-
const currentContextTokens =
82-
lastUsage.input.tokens +
83-
lastUsage.cached.tokens +
84-
lastUsage.cacheCreate.tokens +
85-
lastUsage.output.tokens +
86-
lastUsage.reasoning.tokens;
98+
// Current usage: live when streaming, else last historical (pattern from CostsTab)
99+
const lastUsage = usage.usageHistory[usage.usageHistory.length - 1];
100+
const currentUsage = usage.liveUsage ?? lastUsage;
101+
102+
// Force-compact when approaching context limit (can trigger even with empty history if streaming)
103+
let shouldForceCompact = false;
104+
if (currentUsage) {
105+
const remainingTokens = maxTokens - getTotalTokens(currentUsage);
106+
shouldForceCompact = remainingTokens <= FORCE_COMPACTION_TOKEN_BUFFER;
107+
}
87108

88-
const usagePercentage = (currentContextTokens / maxTokens) * 100;
109+
// Warning/percentage based on lastUsage (completed requests only)
110+
if (!lastUsage) {
111+
return {
112+
shouldShowWarning: false,
113+
shouldForceCompact,
114+
usagePercentage: 0,
115+
thresholdPercentage,
116+
};
117+
}
89118

90-
// Show warning if within advance window (e.g., 60% for 70% threshold with 10% advance)
119+
const usagePercentage = (getTotalTokens(lastUsage) / maxTokens) * 100;
91120
const shouldShowWarning = usagePercentage >= thresholdPercentage - warningAdvancePercent;
92121

93122
return {
94123
shouldShowWarning,
124+
shouldForceCompact,
95125
usagePercentage,
96126
thresholdPercentage,
97127
};

src/common/constants/ui.ts

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,28 @@ export const DEFAULT_AUTO_COMPACTION_THRESHOLD_PERCENT = 70;
2727
* Default threshold as decimal for calculations (0.7 = 70%)
2828
*/
2929
export const DEFAULT_AUTO_COMPACTION_THRESHOLD = DEFAULT_AUTO_COMPACTION_THRESHOLD_PERCENT / 100;
30+
31+
/**
32+
* Default word target for compaction summaries
33+
*/
34+
export const DEFAULT_COMPACTION_WORD_TARGET = 2000;
35+
36+
/**
37+
* Approximate ratio of tokens to words (tokens per word)
38+
* Used for converting between word counts and token counts
39+
*/
40+
export const WORDS_TO_TOKENS_RATIO = 1.3;
41+
42+
/**
43+
* Force-compaction token buffer.
44+
* When auto-compaction is enabled and live usage shows this many tokens or fewer
45+
* remaining in the context window, force a compaction immediately.
46+
* Set to 2x the expected compaction output size to ensure room for the summary.
47+
*/
48+
export const FORCE_COMPACTION_TOKEN_BUFFER = Math.round(
49+
2 * DEFAULT_COMPACTION_WORD_TARGET * WORDS_TO_TOKENS_RATIO
50+
); // = 5200 tokens
51+
3052
/**
3153
* Duration (ms) to show "copied" feedback after copying to clipboard
3254
*/

0 commit comments

Comments
 (0)