coder · kylecarbs · Nov 21, 2025
diff --git a/src/browser/utils/thinking/policy.test.ts b/src/browser/utils/thinking/policy.test.ts
@@ -10,11 +10,8 @@ describe("getThinkingPolicyForModel", () => {
     expect(getThinkingPolicyForModel("openai:gpt-5-pro-2025-10-06")).toEqual(["high"]);
   });
 
-  test("returns single HIGH for gpt-5-pro with whitespace after colon", () => {
-    expect(getThinkingPolicyForModel("openai: gpt-5-pro")).toEqual(["high"]);
-  });
-
   test("returns all levels for gpt-5-pro-mini (not a fixed policy)", () => {
+    // gpt-5-pro-mini shouldn't match the gpt-5-pro config
     expect(getThinkingPolicyForModel("openai:gpt-5-pro-mini")).toEqual([
       "off",
       "low",
@@ -48,6 +45,25 @@ describe("getThinkingPolicyForModel", () => {
     ]);
     expect(getThinkingPolicyForModel("google:gemini-3-pro-preview")).toEqual(["low", "high"]);
   });
+
+  test("returns binary on/off for xAI Grok models", () => {
+    expect(getThinkingPolicyForModel("xai:grok-4-1-fast")).toEqual(["off", "high"]);
+    expect(getThinkingPolicyForModel("xai:grok-2-latest")).toEqual(["off", "high"]);
+    expect(getThinkingPolicyForModel("xai:grok-beta")).toEqual(["off", "high"]);
+  });
+
+  test("grok models with version suffixes also get binary policy", () => {
+    expect(getThinkingPolicyForModel("xai:grok-4-1-fast-v2")).toEqual(["off", "high"]);
+  });
+
+  test("grok-code does not match grok- prefix, gets default policy", () => {
+    expect(getThinkingPolicyForModel("xai:grok-code-fast-1")).toEqual([
+      "off",
+      "low",
+      "medium",
+      "high",
+    ]);
+  });
 });
 
 describe("enforceThinkingPolicy", () => {
@@ -72,10 +88,15 @@ describe("enforceThinkingPolicy", () => {
       expect(enforceThinkingPolicy("anthropic:claude-opus-4", "high")).toBe("high");
     });
 
-    test("falls back to medium when requested level not allowed", () => {
-      // Simulating behavior with gpt-5-pro (only allows "high")
-      // When requesting "low", falls back to first allowed level which is "high"
+    test("maps non-off levels to highest available when requested level not allowed", () => {
+      // gpt-5-pro only allows "high"
       expect(enforceThinkingPolicy("openai:gpt-5-pro", "low")).toBe("high");
+      expect(enforceThinkingPolicy("openai:gpt-5-pro", "medium")).toBe("high");
+
+      // Grok only allows "off" and "high" - preserve reasoning intent
+      expect(enforceThinkingPolicy("xai:grok-4-1-fast", "low")).toBe("high");
+      expect(enforceThinkingPolicy("xai:grok-4-1-fast", "medium")).toBe("high");
+      expect(enforceThinkingPolicy("xai:grok-4-1-fast", "off")).toBe("off");
     });
   });
 });

diff --git a/src/browser/utils/thinking/policy.ts b/src/browser/utils/thinking/policy.ts
@@ -13,46 +13,72 @@
  */
 
 import type { ThinkingLevel } from "@/common/types/thinking";
+import modelsData from "@/common/utils/tokens/models.json";
 
 /**
  * Thinking policy is simply the set of allowed thinking levels for a model.
  * Pure subset design - no wrapper object, no discriminated union.
  */
 export type ThinkingPolicy = readonly ThinkingLevel[];
 
+/**
+ * Helper to look up model metadata from models.json
+ */
+function getModelMetadata(modelString: string): Record<string, unknown> | null {
+  const colonIndex = modelString.indexOf(":");
+  const provider = colonIndex !== -1 ? modelString.slice(0, colonIndex) : "";
+  const modelName = colonIndex !== -1 ? modelString.slice(colonIndex + 1) : modelString;
+
+  const lookupKeys: string[] = [modelName];
+  if (provider) {
+    lookupKeys.push(`${provider}/${modelName}`);
+  }
+
+  for (const key of lookupKeys) {
+    const data = (modelsData as Record<string, Record<string, unknown>>)[key];
+    if (data) {
+      return data;
+    }
+  }
+
+  return null;
+}
+
 /**
  * Returns the thinking policy for a given model.
- *
- * Rules:
- * - openai:gpt-5-pro → ["high"] (only supported level)
- * - default → ["off", "low", "medium", "high"] (all levels selectable)
- *
- * Tolerates version suffixes (e.g., gpt-5-pro-2025-10-06).
- * Does NOT match gpt-5-pro-mini (uses negative lookahead).
  */
 export function getThinkingPolicyForModel(modelString: string): ThinkingPolicy {
-  // Match "openai:" followed by optional whitespace and "gpt-5-pro"
-  // Allow version suffixes like "-2025-10-06" but NOT "-mini" or other text suffixes
-  if (/^openai:\s*gpt-5-pro(?!-[a-z])/.test(modelString)) {
+  // GPT-5 Pro: always high (but not gpt-5-pro-mini)
+  if (modelString.startsWith("openai:gpt-5-pro") && !modelString.includes("-mini")) {
     return ["high"];
   }
 
-  // Gemini 3 Pro only supports "low" and "high" reasoning levels
+  // Gemini 3: limited levels
   if (modelString.includes("gemini-3")) {
     return ["low", "high"];
   }
 
-  // Default policy: all levels selectable
+  // Grok: binary on/off (but not grok-code)
+  if (modelString.startsWith("xai:grok-") && !modelString.includes("grok-code")) {
+    return ["off", "high"];
+  }
+
+  // Check models.json for no reasoning support
+  const metadata = getModelMetadata(modelString);
+  if (metadata?.supports_reasoning === false) {
+    return ["off"];
+  }
+
+  // Default: all levels
   return ["off", "low", "medium", "high"];
 }
 
 /**
  * Enforce thinking policy by clamping requested level to allowed set.
  *
- * Fallback strategy:
- * 1. If requested level is allowed, use it
- * 2. If "medium" is allowed, use it (reasonable default)
- * 3. Otherwise use first allowed level
+ * If the requested level isn't allowed:
+ * - If user wanted reasoning (non-"off"), pick the highest available non-"off" level
+ * - Otherwise return the first allowed level
  */
 export function enforceThinkingPolicy(
   modelString: string,
@@ -64,6 +90,12 @@ export function enforceThinkingPolicy(
     return requested;
   }
 
-  // Fallback: prefer "medium" if allowed, else use first allowed level
-  return allowed.includes("medium") ? "medium" : allowed[0];
+  // If user wanted reasoning, keep it on with the best available level
+  if (requested !== "off") {
+    if (allowed.includes("high")) return "high";
+    if (allowed.includes("medium")) return "medium";
+    if (allowed.includes("low")) return "low";
+  }
+
+  return allowed[0];
 }