Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 28 additions & 7 deletions src/browser/utils/thinking/policy.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -10,11 +10,8 @@ describe("getThinkingPolicyForModel", () => {
expect(getThinkingPolicyForModel("openai:gpt-5-pro-2025-10-06")).toEqual(["high"]);
});

test("returns single HIGH for gpt-5-pro with whitespace after colon", () => {
expect(getThinkingPolicyForModel("openai: gpt-5-pro")).toEqual(["high"]);
});

test("returns all levels for gpt-5-pro-mini (not a fixed policy)", () => {
// gpt-5-pro-mini shouldn't match the gpt-5-pro config
expect(getThinkingPolicyForModel("openai:gpt-5-pro-mini")).toEqual([
"off",
"low",
Expand Down Expand Up @@ -48,6 +45,25 @@ describe("getThinkingPolicyForModel", () => {
]);
expect(getThinkingPolicyForModel("google:gemini-3-pro-preview")).toEqual(["low", "high"]);
});

test("returns binary on/off for xAI Grok models", () => {
expect(getThinkingPolicyForModel("xai:grok-4-1-fast")).toEqual(["off", "high"]);
expect(getThinkingPolicyForModel("xai:grok-2-latest")).toEqual(["off", "high"]);
expect(getThinkingPolicyForModel("xai:grok-beta")).toEqual(["off", "high"]);
});

test("grok models with version suffixes also get binary policy", () => {
expect(getThinkingPolicyForModel("xai:grok-4-1-fast-v2")).toEqual(["off", "high"]);
});

test("grok-code does not match grok- prefix, gets default policy", () => {
expect(getThinkingPolicyForModel("xai:grok-code-fast-1")).toEqual([
"off",
"low",
"medium",
"high",
]);
});
});

describe("enforceThinkingPolicy", () => {
Expand All @@ -72,10 +88,15 @@ describe("enforceThinkingPolicy", () => {
expect(enforceThinkingPolicy("anthropic:claude-opus-4", "high")).toBe("high");
});

test("falls back to medium when requested level not allowed", () => {
// Simulating behavior with gpt-5-pro (only allows "high")
// When requesting "low", falls back to first allowed level which is "high"
test("maps non-off levels to highest available when requested level not allowed", () => {
// gpt-5-pro only allows "high"
expect(enforceThinkingPolicy("openai:gpt-5-pro", "low")).toBe("high");
expect(enforceThinkingPolicy("openai:gpt-5-pro", "medium")).toBe("high");

// Grok only allows "off" and "high" - preserve reasoning intent
expect(enforceThinkingPolicy("xai:grok-4-1-fast", "low")).toBe("high");
expect(enforceThinkingPolicy("xai:grok-4-1-fast", "medium")).toBe("high");
expect(enforceThinkingPolicy("xai:grok-4-1-fast", "off")).toBe("off");
});
});
});
Expand Down
68 changes: 50 additions & 18 deletions src/browser/utils/thinking/policy.ts
Original file line number Diff line number Diff line change
Expand Up @@ -13,46 +13,72 @@
*/

import type { ThinkingLevel } from "@/common/types/thinking";
import modelsData from "@/common/utils/tokens/models.json";

/**
* Thinking policy is simply the set of allowed thinking levels for a model.
* Pure subset design - no wrapper object, no discriminated union.
*/
export type ThinkingPolicy = readonly ThinkingLevel[];

/**
* Helper to look up model metadata from models.json
*/
function getModelMetadata(modelString: string): Record<string, unknown> | null {
const colonIndex = modelString.indexOf(":");
const provider = colonIndex !== -1 ? modelString.slice(0, colonIndex) : "";
const modelName = colonIndex !== -1 ? modelString.slice(colonIndex + 1) : modelString;

const lookupKeys: string[] = [modelName];
if (provider) {
lookupKeys.push(`${provider}/${modelName}`);
}

for (const key of lookupKeys) {
const data = (modelsData as Record<string, Record<string, unknown>>)[key];
if (data) {
return data;
}
}

return null;
}

/**
* Returns the thinking policy for a given model.
*
* Rules:
* - openai:gpt-5-pro → ["high"] (only supported level)
* - default → ["off", "low", "medium", "high"] (all levels selectable)
*
* Tolerates version suffixes (e.g., gpt-5-pro-2025-10-06).
* Does NOT match gpt-5-pro-mini (uses negative lookahead).
*/
export function getThinkingPolicyForModel(modelString: string): ThinkingPolicy {
// Match "openai:" followed by optional whitespace and "gpt-5-pro"
// Allow version suffixes like "-2025-10-06" but NOT "-mini" or other text suffixes
if (/^openai:\s*gpt-5-pro(?!-[a-z])/.test(modelString)) {
// GPT-5 Pro: always high (but not gpt-5-pro-mini)
if (modelString.startsWith("openai:gpt-5-pro") && !modelString.includes("-mini")) {
return ["high"];
}

// Gemini 3 Pro only supports "low" and "high" reasoning levels
// Gemini 3: limited levels
if (modelString.includes("gemini-3")) {
return ["low", "high"];
}

// Default policy: all levels selectable
// Grok: binary on/off (but not grok-code)
if (modelString.startsWith("xai:grok-") && !modelString.includes("grok-code")) {
return ["off", "high"];
}

// Check models.json for no reasoning support
const metadata = getModelMetadata(modelString);
if (metadata?.supports_reasoning === false) {
return ["off"];
}

// Default: all levels
return ["off", "low", "medium", "high"];
}

/**
* Enforce thinking policy by clamping requested level to allowed set.
*
* Fallback strategy:
* 1. If requested level is allowed, use it
* 2. If "medium" is allowed, use it (reasonable default)
* 3. Otherwise use first allowed level
* If the requested level isn't allowed:
* - If user wanted reasoning (non-"off"), pick the highest available non-"off" level
* - Otherwise return the first allowed level
*/
export function enforceThinkingPolicy(
modelString: string,
Expand All @@ -64,6 +90,12 @@ export function enforceThinkingPolicy(
return requested;
}

// Fallback: prefer "medium" if allowed, else use first allowed level
return allowed.includes("medium") ? "medium" : allowed[0];
// If user wanted reasoning, keep it on with the best available level
if (requested !== "off") {
if (allowed.includes("high")) return "high";
if (allowed.includes("medium")) return "medium";
if (allowed.includes("low")) return "low";
}

return allowed[0];
}