🤖 feat: force compaction when approaching context limit (#778)

ethanndickson · web-flow · commit 7d668fe442f7 · 2025-12-01T03:18:21.000Z
## Summary

Automatically triggers compaction when live token usage approaches the
context window limit during streaming. This prevents the AI from hitting
context limit errors mid-response.

## Changes

### Force Compaction Logic
- Extended `checkAutoCompaction()` to return `shouldForceCompact` flag
- Uses `liveUsage` (real-time streaming tokens) with fallback to
`lastUsage`
- Triggers when remaining tokens ≤ 5200 (2× expected compaction output)
- Works even with empty `usageHistory` (first message streaming)

### AIView Integration
- New `useEffect` monitors `shouldForceCompact` during active streams
- Tracks triggered stream ID to prevent duplicate compactions
- Sends compaction request with "Continue with current task" as
follow-up

### Queue &amp; Restore Fixes
- **Moved restore-to-input** from `stream-abort` handler to IPC
`interruptStream` handler
  - User interrupts (Ctrl+C) still restore queued messages to input
  - Internal aborts (compaction flow) preserve queue for follow-up

### Shared Constants
- `DEFAULT_COMPACTION_WORD_TARGET = 2000`
- `WORDS_TO_TOKENS_RATIO = 1.3`
- `FORCE_COMPACTION_TOKEN_BUFFER = 5200` (derived: 2 × 2000 × 1.3)

## Testing

- 31 unit tests for `checkAutoCompaction` including force compaction
scenarios
- Manually tested by asking the agent to read a large file in 22 chunks
whilst summarising each chunk. The process completed successfully even
with a force-compaction triggering mid-task.

---
_Generated with `mux`_
diff --git a/src/browser/components/AIView.tsx b/src/browser/components/AIView.tsx
@@ -37,6 +37,7 @@ import { evictModelFromLRU } from "@/browser/hooks/useModelLRU";
 import { QueuedMessage } from "./Messages/QueuedMessage";
 import { CompactionWarning } from "./CompactionWarning";
 import { checkAutoCompaction } from "@/browser/utils/compaction/autoCompactionCheck";
+import { executeCompaction } from "@/browser/utils/chatCommands";
 import { useProviderOptions } from "@/browser/hooks/useProviderOptions";
 import { useAutoCompactionSettings } from "../hooks/useAutoCompactionSettings";
 import { useSendMessageOptions } from "@/browser/hooks/useSendMessageOptions";
@@ -120,6 +121,67 @@ const AIViewInner: React.FC<AIViewProps> = ({
     undefined
   );
 
+  // Use send options for auto-compaction check
+  const pendingSendOptions = useSendMessageOptions(workspaceId);
+
+  // Track if we've already triggered force compaction for this stream
+  const forceCompactionTriggeredRef = useRef<string | null>(null);
+
+  // Extract state from workspace state
+  const { messages, canInterrupt, isCompacting, loading, currentModel } = workspaceState;
+
+  // Get active stream message ID for token counting
+  const activeStreamMessageId = aggregator.getActiveStreamMessageId();
+
+  // Use pending send model for auto-compaction check, not the last stream's model.
+  // This ensures the threshold is based on the model the user will actually send with,
+  // preventing context-length errors when switching from a large-context to smaller model.
+  const pendingModel = pendingSendOptions.model;
+
+  const autoCompactionResult = checkAutoCompaction(
+    workspaceUsage,
+    pendingModel,
+    use1M,
+    autoCompactionEnabled,
+    autoCompactionThreshold / 100
+  );
+
+  // Show warning when: shouldShowWarning flag is true AND not currently compacting
+  const shouldShowCompactionWarning = !isCompacting && autoCompactionResult.shouldShowWarning;
+
+  // Force compaction when live usage shows we're about to hit context limit
+  useEffect(() => {
+    if (
+      !autoCompactionResult.shouldForceCompact ||
+      !canInterrupt ||
+      isCompacting ||
+      forceCompactionTriggeredRef.current === activeStreamMessageId
+    ) {
+      return;
+    }
+
+    forceCompactionTriggeredRef.current = activeStreamMessageId ?? null;
+    void executeCompaction({
+      workspaceId,
+      sendMessageOptions: pendingSendOptions,
+      continueMessage: { text: "Continue with the current task" },
+    });
+  }, [
+    autoCompactionResult.shouldForceCompact,
+    canInterrupt,
+    isCompacting,
+    activeStreamMessageId,
+    workspaceId,
+    pendingSendOptions,
+  ]);
+
+  // Reset force compaction trigger when stream ends
+  useEffect(() => {
+    if (!canInterrupt) {
+      forceCompactionTriggeredRef.current = null;
+    }
+  }, [canInterrupt]);
+
   // Auto-retry state - minimal setter for keybinds and message sent handler
   // RetryBarrier manages its own state, but we need this for interrupt keybind
   const [, setAutoRetry] = usePersistedState<boolean>(
@@ -144,9 +206,6 @@ const AIViewInner: React.FC<AIViewProps> = ({
     markUserInteraction,
   } = useAutoScroll();
 
-  // Use send options for auto-compaction check
-  const pendingSendOptions = useSendMessageOptions(workspaceId);
-
   // ChatInput API for focus management
   const chatInputAPI = useRef<ChatInputAPI | null>(null);
   const handleChatInputReady = useCallback((api: ChatInputAPI) => {
@@ -329,28 +388,6 @@ const AIViewInner: React.FC<AIViewProps> = ({
     );
   }
 
-  // Extract state from workspace state
-  const { messages, canInterrupt, isCompacting, loading, currentModel } = workspaceState;
-
-  // Get active stream message ID for token counting
-  const activeStreamMessageId = aggregator.getActiveStreamMessageId();
-
-  // Use pending send model for auto-compaction check, not the last stream's model.
-  // This ensures the threshold is based on the model the user will actually send with,
-  // preventing context-length errors when switching from a large-context to smaller model.
-  const pendingModel = pendingSendOptions.model;
-
-  const autoCompactionResult = checkAutoCompaction(
-    workspaceUsage,
-    pendingModel,
-    use1M,
-    autoCompactionEnabled,
-    autoCompactionThreshold / 100
-  );
-
-  // Show warning when: shouldShowWarning flag is true AND not currently compacting
-  const shouldShowCompactionWarning = !isCompacting && autoCompactionResult.shouldShowWarning;
-
   // Note: We intentionally do NOT reset autoRetry when streams start.
   // If user pressed the interrupt key, autoRetry stays false until they manually retry.
   // This makes state transitions explicit and predictable.
diff --git a/src/browser/utils/chatCommands.ts b/src/browser/utils/chatCommands.ts
@@ -23,6 +23,7 @@ import { resolveCompactionModel } from "@/browser/utils/messages/compactionModel
 import type { ImageAttachment } from "../components/ImageAttachments";
 import { dispatchWorkspaceSwitch } from "./workspaceEvents";
 import { getRuntimeKey, copyWorkspaceStorage } from "@/common/constants/storage";
+import { DEFAULT_COMPACTION_WORD_TARGET, WORDS_TO_TOKENS_RATIO } from "@/common/constants/ui";
 
 // ============================================================================
 // Workspace Creation
@@ -572,7 +573,9 @@ export function prepareCompactionMessage(options: CompactionOptions): {
   metadata: MuxFrontendMetadata;
   sendOptions: SendMessageOptions;
 } {
-  const targetWords = options.maxOutputTokens ? Math.round(options.maxOutputTokens / 1.3) : 2000;
+  const targetWords = options.maxOutputTokens
+    ? Math.round(options.maxOutputTokens / WORDS_TO_TOKENS_RATIO)
+    : DEFAULT_COMPACTION_WORD_TARGET;
 
   // Build compaction message with optional continue context
   let messageText = `Summarize this conversation into a compact form for a new Assistant to continue helping the user. Focus entirely on the summary of what has happened. Do not suggest next steps or future actions. Use approximately ${targetWords} words.`;
diff --git a/src/browser/utils/compaction/autoCompactionCheck.test.ts b/src/browser/utils/compaction/autoCompactionCheck.test.ts
@@ -3,6 +3,7 @@ import { checkAutoCompaction } from "./autoCompactionCheck";
 import type { WorkspaceUsageState } from "@/browser/stores/WorkspaceStore";
 import type { ChatUsageDisplay } from "@/common/utils/tokens/usageAggregator";
 import { KNOWN_MODELS } from "@/common/constants/knownModels";
+import { FORCE_COMPACTION_TOKEN_BUFFER } from "@/common/constants/ui";
 
 // Helper to create a mock usage entry
 const createUsageEntry = (
@@ -28,7 +29,8 @@ const createUsageEntry = (
 const createMockUsage = (
   lastEntryTokens: number,
   historicalTokens?: number,
-  model: string = KNOWN_MODELS.SONNET.id
+  model: string = KNOWN_MODELS.SONNET.id,
+  liveUsage?: ChatUsageDisplay
 ): WorkspaceUsageState => {
   const usageHistory: ChatUsageDisplay[] = [];
 
@@ -40,7 +42,7 @@ const createMockUsage = (
   // Add recent usage
   usageHistory.push(createUsageEntry(lastEntryTokens, model));
 
-  return { usageHistory, totalTokens: 0 };
+  return { usageHistory, totalTokens: 0, liveUsage };
 };
 
 describe("checkAutoCompaction", () => {
@@ -297,4 +299,77 @@ describe("checkAutoCompaction", () => {
       expect(result.shouldShowWarning).toBe(true); // Above 60%
     });
   });
+
+  describe("Force Compaction (Live Usage)", () => {
+    const SONNET_MAX_TOKENS = 200_000;
+    const BUFFER = FORCE_COMPACTION_TOKEN_BUFFER;
+
+    test("shouldForceCompact is false when no liveUsage (falls back to lastUsage with room)", () => {
+      const usage = createMockUsage(100_000); // 100k remaining - plenty of room
+      const result = checkAutoCompaction(usage, KNOWN_MODELS.SONNET.id, false, true);
+
+      expect(result.shouldForceCompact).toBe(false);
+    });
+
+    test("shouldForceCompact is false when currentUsage has plenty of room", () => {
+      const liveUsage = createUsageEntry(100_000); // 100k remaining
+      const usage = createMockUsage(50_000, undefined, KNOWN_MODELS.SONNET.id, liveUsage);
+      const result = checkAutoCompaction(usage, KNOWN_MODELS.SONNET.id, false, true);
+
+      expect(result.shouldForceCompact).toBe(false);
+    });
+
+    test("shouldForceCompact is true when remaining <= buffer", () => {
+      // Exactly at buffer threshold
+      const liveUsage = createUsageEntry(SONNET_MAX_TOKENS - BUFFER);
+      const usage = createMockUsage(50_000, undefined, KNOWN_MODELS.SONNET.id, liveUsage);
+      const result = checkAutoCompaction(usage, KNOWN_MODELS.SONNET.id, false, true);
+
+      expect(result.shouldForceCompact).toBe(true);
+    });
+
+    test("shouldForceCompact is true when over context limit", () => {
+      const liveUsage = createUsageEntry(SONNET_MAX_TOKENS + 5000);
+      const usage = createMockUsage(50_000, undefined, KNOWN_MODELS.SONNET.id, liveUsage);
+      const result = checkAutoCompaction(usage, KNOWN_MODELS.SONNET.id, false, true);
+
+      expect(result.shouldForceCompact).toBe(true);
+    });
+
+    test("shouldForceCompact is false when just above buffer", () => {
+      // 1 token above buffer threshold
+      const liveUsage = createUsageEntry(SONNET_MAX_TOKENS - BUFFER - 1);
+      const usage = createMockUsage(50_000, undefined, KNOWN_MODELS.SONNET.id, liveUsage);
+      const result = checkAutoCompaction(usage, KNOWN_MODELS.SONNET.id, false, true);
+
+      expect(result.shouldForceCompact).toBe(false);
+    });
+
+    test("shouldForceCompact respects 1M context mode", () => {
+      // With 1M context, exactly at buffer threshold
+      const liveUsage = createUsageEntry(1_000_000 - BUFFER);
+      const usage = createMockUsage(50_000, undefined, KNOWN_MODELS.SONNET.id, liveUsage);
+      const result = checkAutoCompaction(usage, KNOWN_MODELS.SONNET.id, true, true);
+
+      expect(result.shouldForceCompact).toBe(true);
+    });
+
+    test("shouldForceCompact triggers with empty history but liveUsage near limit", () => {
+      // Bug fix: empty history but liveUsage should still trigger
+      const liveUsage = createUsageEntry(SONNET_MAX_TOKENS - BUFFER);
+      const usage: WorkspaceUsageState = { usageHistory: [], totalTokens: 0, liveUsage };
+      const result = checkAutoCompaction(usage, KNOWN_MODELS.SONNET.id, false, true);
+
+      expect(result.shouldForceCompact).toBe(true);
+      expect(result.usagePercentage).toBe(0); // No lastUsage for percentage
+    });
+
+    test("shouldForceCompact is false when auto-compaction disabled", () => {
+      const liveUsage = createUsageEntry(199_000); // Very close to limit
+      const usage = createMockUsage(50_000, undefined, KNOWN_MODELS.SONNET.id, liveUsage);
+      const result = checkAutoCompaction(usage, KNOWN_MODELS.SONNET.id, false, false); // disabled
+
+      expect(result.shouldForceCompact).toBe(false);
+    });
+  });
 });
diff --git a/src/browser/utils/compaction/autoCompactionCheck.ts b/src/browser/utils/compaction/autoCompactionCheck.ts
@@ -16,12 +16,29 @@
  */
 
 import type { WorkspaceUsageState } from "@/browser/stores/WorkspaceStore";
+import type { ChatUsageDisplay } from "@/common/utils/tokens/usageAggregator";
 import { getModelStats } from "@/common/utils/tokens/modelStats";
 import { supports1MContext } from "@/common/utils/ai/models";
-import { DEFAULT_AUTO_COMPACTION_THRESHOLD } from "@/common/constants/ui";
+import {
+  DEFAULT_AUTO_COMPACTION_THRESHOLD,
+  FORCE_COMPACTION_TOKEN_BUFFER,
+} from "@/common/constants/ui";
+
+/** Sum all token components from a ChatUsageDisplay */
+function getTotalTokens(usage: ChatUsageDisplay): number {
+  return (
+    usage.input.tokens +
+    usage.cached.tokens +
+    usage.cacheCreate.tokens +
+    usage.output.tokens +
+    usage.reasoning.tokens
+  );
+}
 
 export interface AutoCompactionCheckResult {
   shouldShowWarning: boolean;
+  /** True when live usage shows ≤FORCE_COMPACTION_TOKEN_BUFFER remaining in context */
+  shouldForceCompact: boolean;
   usagePercentage: number;
   thresholdPercentage: number;
 }
@@ -54,11 +71,11 @@ export function checkAutoCompaction(
 ): AutoCompactionCheckResult {
   const thresholdPercentage = threshold * 100;
 
-  // Short-circuit if auto-compaction is disabled
-  // Or if no usage data yet
-  if (!enabled || !model || !usage || usage.usageHistory.length === 0) {
+  // Short-circuit if auto-compaction is disabled or missing required data
+  if (!enabled || !model || !usage) {
     return {
       shouldShowWarning: false,
+      shouldForceCompact: false,
       usagePercentage: 0,
       thresholdPercentage,
     };
@@ -67,31 +84,44 @@ export function checkAutoCompaction(
   // Determine max tokens for this model
   const modelStats = getModelStats(model);
   const maxTokens = use1M && supports1MContext(model) ? 1_000_000 : modelStats?.max_input_tokens;
-  const lastUsage = usage.usageHistory[usage.usageHistory.length - 1];
 
   // No max tokens known - safe default (can't calculate percentage)
   if (!maxTokens) {
     return {
       shouldShowWarning: false,
+      shouldForceCompact: false,
       usagePercentage: 0,
       thresholdPercentage,
     };
   }
 
-  const currentContextTokens =
-    lastUsage.input.tokens +
-    lastUsage.cached.tokens +
-    lastUsage.cacheCreate.tokens +
-    lastUsage.output.tokens +
-    lastUsage.reasoning.tokens;
+  // Current usage: live when streaming, else last historical (pattern from CostsTab)
+  const lastUsage = usage.usageHistory[usage.usageHistory.length - 1];
+  const currentUsage = usage.liveUsage ?? lastUsage;
+
+  // Force-compact when approaching context limit (can trigger even with empty history if streaming)
+  let shouldForceCompact = false;
+  if (currentUsage) {
+    const remainingTokens = maxTokens - getTotalTokens(currentUsage);
+    shouldForceCompact = remainingTokens <= FORCE_COMPACTION_TOKEN_BUFFER;
+  }
 
-  const usagePercentage = (currentContextTokens / maxTokens) * 100;
+  // Warning/percentage based on lastUsage (completed requests only)
+  if (!lastUsage) {
+    return {
+      shouldShowWarning: false,
+      shouldForceCompact,
+      usagePercentage: 0,
+      thresholdPercentage,
+    };
+  }
 
-  // Show warning if within advance window (e.g., 60% for 70% threshold with 10% advance)
+  const usagePercentage = (getTotalTokens(lastUsage) / maxTokens) * 100;
   const shouldShowWarning = usagePercentage >= thresholdPercentage - warningAdvancePercent;
 
   return {
     shouldShowWarning,
+    shouldForceCompact,
     usagePercentage,
     thresholdPercentage,
   };
diff --git a/src/common/constants/ui.ts b/src/common/constants/ui.ts
@@ -27,6 +27,28 @@ export const DEFAULT_AUTO_COMPACTION_THRESHOLD_PERCENT = 70;
  * Default threshold as decimal for calculations (0.7 = 70%)
  */
 export const DEFAULT_AUTO_COMPACTION_THRESHOLD = DEFAULT_AUTO_COMPACTION_THRESHOLD_PERCENT / 100;
+
+/**
+ * Default word target for compaction summaries
+ */
+export const DEFAULT_COMPACTION_WORD_TARGET = 2000;
+
+/**
+ * Approximate ratio of tokens to words (tokens per word)
+ * Used for converting between word counts and token counts
+ */
+export const WORDS_TO_TOKENS_RATIO = 1.3;
+
+/**
+ * Force-compaction token buffer.
+ * When auto-compaction is enabled and live usage shows this many tokens or fewer
+ * remaining in the context window, force a compaction immediately.
+ * Set to 2x the expected compaction output size to ensure room for the summary.
+ */
+export const FORCE_COMPACTION_TOKEN_BUFFER = Math.round(
+  2 * DEFAULT_COMPACTION_WORD_TARGET * WORDS_TO_TOKENS_RATIO
+); // = 5200 tokens
+
 /**
  * Duration (ms) to show "copied" feedback after copying to clipboard
  */
diff --git a/src/node/services/agentSession.ts b/src/node/services/agentSession.ts
diff --git a/src/node/services/ipcMain.ts b/src/node/services/ipcMain.ts