diff --git a/src/browser/components/AIView.tsx b/src/browser/components/AIView.tsx index 394abd8dc..85d8730e1 100644 --- a/src/browser/components/AIView.tsx +++ b/src/browser/components/AIView.tsx @@ -594,6 +594,7 @@ const AIViewInner: React.FC = ({ )} diff --git a/src/browser/components/CompactionWarning.tsx b/src/browser/components/CompactionWarning.tsx index 8c970ef81..e882d08e5 100644 --- a/src/browser/components/CompactionWarning.tsx +++ b/src/browser/components/CompactionWarning.tsx @@ -1,30 +1,50 @@ import React from "react"; +import { FORCE_COMPACTION_BUFFER_PERCENT } from "@/common/constants/ui"; /** * Warning indicator shown when context usage is approaching the compaction threshold. * * Displays as subtle right-aligned text: * - Below threshold: "Auto-Compact in X% usage" (where X = threshold - current) - * - At/above threshold: Bold "Next message will Auto-Compact" + * - At/above threshold (not streaming): Bold "Next message will Auto-Compact" + * - At/above threshold (streaming): "Force-compacting in N%" (where N = force threshold - current usage) * - * Both states are clickable to insert /compact command. + * All states are clickable to insert /compact command. * - * @param usagePercentage - Current token usage as percentage (0-100) + * @param usagePercentage - Current token usage as percentage (0-100), reflects live usage when streaming * @param thresholdPercentage - Auto-compaction trigger threshold (0-100, default 70) + * @param isStreaming - Whether currently streaming a response * @param onCompactClick - Callback when user clicks to trigger manual compaction */ export const CompactionWarning: React.FC<{ usagePercentage: number; thresholdPercentage: number; + isStreaming: boolean; onCompactClick?: () => void; }> = (props) => { // At threshold or above, next message will trigger compaction const willCompactNext = props.usagePercentage >= props.thresholdPercentage; const remaining = props.thresholdPercentage - props.usagePercentage; - const text = willCompactNext - ? "Next message will Auto-Compact" - : `Auto-Compact in ${Math.round(remaining)}% usage`; + // When streaming and above threshold, show countdown to force-compaction + const forceCompactThreshold = props.thresholdPercentage + FORCE_COMPACTION_BUFFER_PERCENT; + const showForceCompactCountdown = + props.isStreaming && willCompactNext && props.usagePercentage < forceCompactThreshold; + const forceCompactRemaining = forceCompactThreshold - props.usagePercentage; + + let text: string; + let isUrgent: boolean; + + if (showForceCompactCountdown) { + text = `Force-compacting in ${Math.round(forceCompactRemaining)}%`; + isUrgent = false; + } else if (willCompactNext) { + text = "Next message will Auto-Compact"; + isUrgent = true; + } else { + text = `Auto-Compact in ${Math.round(remaining)}% usage`; + isUrgent = false; + } return (
@@ -32,7 +52,7 @@ export const CompactionWarning: React.FC<{ type="button" onClick={props.onCompactClick} className={`cursor-pointer hover:underline ${ - willCompactNext ? "text-plan-mode font-semibold" : "text-muted" + isUrgent ? "text-plan-mode font-semibold" : "text-muted" }`} title="Click to insert /compact command" > diff --git a/src/browser/utils/compaction/autoCompactionCheck.test.ts b/src/browser/utils/compaction/autoCompactionCheck.test.ts index a2e0040f1..9900992e1 100644 --- a/src/browser/utils/compaction/autoCompactionCheck.test.ts +++ b/src/browser/utils/compaction/autoCompactionCheck.test.ts @@ -3,7 +3,6 @@ import { checkAutoCompaction } from "./autoCompactionCheck"; import type { WorkspaceUsageState } from "@/browser/stores/WorkspaceStore"; import type { ChatUsageDisplay } from "@/common/utils/tokens/usageAggregator"; import { KNOWN_MODELS } from "@/common/constants/knownModels"; -import { FORCE_COMPACTION_TOKEN_BUFFER } from "@/common/constants/ui"; // Helper to create a mock usage entry const createUsageEntry = ( @@ -302,63 +301,63 @@ describe("checkAutoCompaction", () => { }); }); - describe("Force Compaction (Live Usage)", () => { - const SONNET_MAX_TOKENS = 200_000; - const BUFFER = FORCE_COMPACTION_TOKEN_BUFFER; + describe("Force Compaction (threshold + 5% buffer)", () => { + // Force-compact triggers at threshold + 5% + // With default 70% threshold, force-compact at 75% - test("shouldForceCompact is false when no liveUsage (falls back to lastUsage with room)", () => { - const usage = createMockUsage(100_000); // 100k remaining - plenty of room + test("shouldForceCompact is false when usage just below force threshold", () => { + // 74% usage, threshold 70%, force at 75% - should NOT trigger + const usage = createMockUsage(148_000); // 74% const result = checkAutoCompaction(usage, KNOWN_MODELS.SONNET.id, false); expect(result.shouldForceCompact).toBe(false); }); - test("shouldForceCompact is false when currentUsage has plenty of room", () => { - const liveUsage = createUsageEntry(100_000); // 100k remaining - const usage = createMockUsage(50_000, undefined, KNOWN_MODELS.SONNET.id, liveUsage); + test("shouldForceCompact is true when usage at force threshold", () => { + // 75% usage, threshold 70%, force at 75% - should trigger + const usage = createMockUsage(150_000); // 75% const result = checkAutoCompaction(usage, KNOWN_MODELS.SONNET.id, false); - expect(result.shouldForceCompact).toBe(false); + expect(result.shouldForceCompact).toBe(true); }); - test("shouldForceCompact is true when remaining <= buffer", () => { - // Exactly at buffer threshold - const liveUsage = createUsageEntry(SONNET_MAX_TOKENS - BUFFER); - const usage = createMockUsage(50_000, undefined, KNOWN_MODELS.SONNET.id, liveUsage); + test("shouldForceCompact is true when usage above force threshold", () => { + // 80% usage, threshold 70%, force at 75% - should trigger + const usage = createMockUsage(160_000); // 80% const result = checkAutoCompaction(usage, KNOWN_MODELS.SONNET.id, false); expect(result.shouldForceCompact).toBe(true); }); - test("shouldForceCompact is true when over context limit", () => { - const liveUsage = createUsageEntry(SONNET_MAX_TOKENS + 5000); - const usage = createMockUsage(50_000, undefined, KNOWN_MODELS.SONNET.id, liveUsage); + test("shouldForceCompact uses liveUsage when available", () => { + // lastUsage at 50%, liveUsage at 75% - should trigger based on live + const liveUsage = createUsageEntry(150_000); // 75% + const usage = createMockUsage(100_000, undefined, KNOWN_MODELS.SONNET.id, liveUsage); const result = checkAutoCompaction(usage, KNOWN_MODELS.SONNET.id, false); expect(result.shouldForceCompact).toBe(true); + expect(result.usagePercentage).toBe(75); // usagePercentage reflects live when streaming }); - test("shouldForceCompact is false when just above buffer", () => { - // 1 token above buffer threshold - const liveUsage = createUsageEntry(SONNET_MAX_TOKENS - BUFFER - 1); - const usage = createMockUsage(50_000, undefined, KNOWN_MODELS.SONNET.id, liveUsage); - const result = checkAutoCompaction(usage, KNOWN_MODELS.SONNET.id, false); + test("shouldForceCompact respects custom threshold", () => { + // 55% usage with 50% threshold - force at 55%, should trigger + const usage = createMockUsage(110_000); // 55% + const result = checkAutoCompaction(usage, KNOWN_MODELS.SONNET.id, false, 0.5); - expect(result.shouldForceCompact).toBe(false); + expect(result.shouldForceCompact).toBe(true); }); test("shouldForceCompact respects 1M context mode", () => { - // With 1M context, exactly at buffer threshold - const liveUsage = createUsageEntry(1_000_000 - BUFFER); + // 75% of 1M = 750k tokens + const liveUsage = createUsageEntry(750_000); const usage = createMockUsage(50_000, undefined, KNOWN_MODELS.SONNET.id, liveUsage); const result = checkAutoCompaction(usage, KNOWN_MODELS.SONNET.id, true); expect(result.shouldForceCompact).toBe(true); }); - test("shouldForceCompact triggers with empty history but liveUsage near limit", () => { - // Bug fix: empty history but liveUsage should still trigger - const liveUsage = createUsageEntry(SONNET_MAX_TOKENS - BUFFER); + test("shouldForceCompact triggers with empty history but liveUsage at force threshold", () => { + const liveUsage = createUsageEntry(150_000); // 75% const usage: WorkspaceUsageState = { usageHistory: [], totalTokens: 0, @@ -367,12 +366,35 @@ describe("checkAutoCompaction", () => { const result = checkAutoCompaction(usage, KNOWN_MODELS.SONNET.id, false); expect(result.shouldForceCompact).toBe(true); - expect(result.usagePercentage).toBe(0); // No lastUsage for percentage + expect(result.usagePercentage).toBe(75); // usagePercentage reflects live even with empty history + }); + + test("shouldShowWarning uses live usage when no history exists", () => { + // No lastUsage, liveUsage at 65% - should show warning (65% >= 60%) + const liveUsage = createUsageEntry(130_000); // 65% + const usage: WorkspaceUsageState = { + usageHistory: [], + totalTokens: 0, + liveUsage, + }; + const result = checkAutoCompaction(usage, KNOWN_MODELS.SONNET.id, false); + + expect(result.shouldShowWarning).toBe(true); + expect(result.shouldForceCompact).toBe(false); // 65% < 75% + }); + + test("shouldShowWarning uses max of last and live usage", () => { + // lastUsage at 50% (below warning), liveUsage at 72% (above warning) + const liveUsage = createUsageEntry(144_000); // 72% + const usage = createMockUsage(100_000, undefined, KNOWN_MODELS.SONNET.id, liveUsage); + const result = checkAutoCompaction(usage, KNOWN_MODELS.SONNET.id, false); + + expect(result.shouldShowWarning).toBe(true); // 72% >= 60% + expect(result.shouldForceCompact).toBe(false); // 72% < 75% }); test("shouldForceCompact is false when auto-compaction disabled", () => { - const liveUsage = createUsageEntry(199_000); // Very close to limit - const usage = createMockUsage(50_000, undefined, KNOWN_MODELS.SONNET.id, liveUsage); + const usage = createMockUsage(190_000); // 95% - would trigger if enabled const result = checkAutoCompaction(usage, KNOWN_MODELS.SONNET.id, false, 1.0); // disabled expect(result.shouldForceCompact).toBe(false); diff --git a/src/browser/utils/compaction/autoCompactionCheck.ts b/src/browser/utils/compaction/autoCompactionCheck.ts index 80e56ff0a..c38753ec2 100644 --- a/src/browser/utils/compaction/autoCompactionCheck.ts +++ b/src/browser/utils/compaction/autoCompactionCheck.ts @@ -21,7 +21,7 @@ import { getModelStats } from "@/common/utils/tokens/modelStats"; import { supports1MContext } from "@/common/utils/ai/models"; import { DEFAULT_AUTO_COMPACTION_THRESHOLD, - FORCE_COMPACTION_TOKEN_BUFFER, + FORCE_COMPACTION_BUFFER_PERCENT, } from "@/common/constants/ui"; /** Sum all token components from a ChatUsageDisplay */ @@ -37,8 +37,9 @@ function getTotalTokens(usage: ChatUsageDisplay): number { export interface AutoCompactionCheckResult { shouldShowWarning: boolean; - /** True when live usage shows ≤FORCE_COMPACTION_TOKEN_BUFFER remaining in context */ + /** True when usage exceeds threshold + buffer (gives user control before force-compact) */ shouldForceCompact: boolean; + /** Current usage percentage - live when streaming, otherwise last completed */ usagePercentage: number; thresholdPercentage: number; } @@ -94,30 +95,22 @@ export function checkAutoCompaction( }; } - // Current usage: live when streaming, else last historical - // Use lastContextUsage (last step) for accurate context window size + // Current usage: live when streaming, else last completed const lastUsage = usage.lastContextUsage; const currentUsage = usage.liveUsage ?? lastUsage; - // Force-compact when approaching context limit (can trigger even with empty history if streaming) - let shouldForceCompact = false; - if (currentUsage) { - const remainingTokens = maxTokens - getTotalTokens(currentUsage); - shouldForceCompact = remainingTokens <= FORCE_COMPACTION_TOKEN_BUFFER; - } + // Usage percentage from current context (live when streaming, otherwise last completed) + const usagePercentage = currentUsage ? (getTotalTokens(currentUsage) / maxTokens) * 100 : 0; - // Warning/percentage based on lastUsage (completed requests only) - if (!lastUsage) { - return { - shouldShowWarning: false, - shouldForceCompact, - usagePercentage: 0, - thresholdPercentage, - }; - } + // Force-compact when usage exceeds threshold + buffer + const forceCompactThreshold = thresholdPercentage + FORCE_COMPACTION_BUFFER_PERCENT; + const shouldForceCompact = usagePercentage >= forceCompactThreshold; - const usagePercentage = (getTotalTokens(lastUsage) / maxTokens) * 100; - const shouldShowWarning = usagePercentage >= thresholdPercentage - warningAdvancePercent; + // Warning uses max of last completed and current (live when streaming) + // This ensures warning shows when live usage spikes above threshold mid-stream + const lastUsagePercentage = lastUsage ? (getTotalTokens(lastUsage) / maxTokens) * 100 : 0; + const shouldShowWarning = + Math.max(lastUsagePercentage, usagePercentage) >= thresholdPercentage - warningAdvancePercent; return { shouldShowWarning, diff --git a/src/common/constants/ui.ts b/src/common/constants/ui.ts index 119653b0d..5ce8a4de6 100644 --- a/src/common/constants/ui.ts +++ b/src/common/constants/ui.ts @@ -41,14 +41,11 @@ export const DEFAULT_COMPACTION_WORD_TARGET = 2000; export const WORDS_TO_TOKENS_RATIO = 1.3; /** - * Force-compaction token buffer. - * When auto-compaction is enabled and live usage shows this many tokens or fewer - * remaining in the context window, force a compaction immediately. - * Set to 2x the expected compaction output size to ensure room for the summary. - */ -export const FORCE_COMPACTION_TOKEN_BUFFER = Math.round( - 2 * DEFAULT_COMPACTION_WORD_TARGET * WORDS_TO_TOKENS_RATIO -); // = 5200 tokens + * Force-compact this many percentage points after threshold. + * Gives user a buffer zone between warning and force-compaction. + * E.g., with 70% threshold, force-compact triggers at 75%. + */ +export const FORCE_COMPACTION_BUFFER_PERCENT = 5; /** * Duration (ms) to show "copied" feedback after copying to clipboard