Skip to content

Commit 2a6ee89

Browse files
feat: Enhance Claude Sonnet 4 support with 1M context window and tiered pricing (#196)
1 parent 7618b42 commit 2a6ee89

File tree

14 files changed

+3484
-1786
lines changed

14 files changed

+3484
-1786
lines changed

.changeset/fair-buttons-chew.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
---
2+
"hai-build-code-generator": patch
3+
---
4+
5+
Enhanced support for Claude Sonnet 4, extending its maximum context window to 1 million tokens and enabling tiered pricing for more flexible usage models.

package-lock.json

Lines changed: 2397 additions & 1449 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

package.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -386,7 +386,7 @@
386386
"@anthropic-ai/bedrock-sdk": "^0.12.4",
387387
"@anthropic-ai/sdk": "^0.37.0",
388388
"@anthropic-ai/vertex-sdk": "^0.6.4",
389-
"@aws-sdk/client-bedrock-runtime": "^3.758.0",
389+
"@aws-sdk/client-bedrock-runtime": "^3.873.0",
390390
"@bufbuild/protobuf": "^2.2.5",
391391
"@google-cloud/vertexai": "^1.9.3",
392392
"@google/genai": "^0.13.0",

proto/models.proto

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,27 @@ message VsCodeLmModel {
3535
string id = 4;
3636
}
3737

38+
// Price tier for tiered pricing models
39+
message PriceTier {
40+
int32 token_limit = 1; // Upper limit (inclusive) of input tokens for this price
41+
double price = 2; // Price per million tokens for this tier
42+
}
43+
44+
message ThinkingConfig {
45+
optional int32 max_budget = 1; // Max allowed thinking budget tokens
46+
optional double output_price = 2; // Output price per million tokens when budget > 0
47+
repeated PriceTier output_price_tiers = 3; // Optional: Tiered output price when budget > 0
48+
}
49+
50+
// Model tier for tiered pricing structures
51+
message ModelTier {
52+
int32 context_window = 1;
53+
optional double input_price = 2;
54+
optional double output_price = 3;
55+
optional double cache_writes_price = 4;
56+
optional double cache_reads_price = 5;
57+
}
58+
3859
// For OpenRouterCompatibleModelInfo structure in OpenRouterModels
3960
message OpenRouterModelInfo {
4061
int32 max_tokens = 1;
@@ -46,6 +67,9 @@ message OpenRouterModelInfo {
4667
double cache_writes_price = 7;
4768
double cache_reads_price = 8;
4869
string description = 9;
70+
optional ThinkingConfig thinking_config = 10;
71+
optional bool supports_global_endpoint = 11;
72+
repeated ModelTier tiers = 12;
4973
}
5074

5175
// Shared response message for model information

src/api/providers/anthropic.ts

Lines changed: 24 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,23 @@
11
import { Anthropic } from "@anthropic-ai/sdk"
22
import { Stream as AnthropicStream } from "@anthropic-ai/sdk/streaming"
33
import { withRetry } from "../retry"
4-
import { anthropicDefaultModelId, AnthropicModelId, anthropicModels, ApiHandlerOptions, ModelInfo } from "@shared/api"
4+
import { anthropicDefaultModelId, AnthropicModelId, anthropicModels, CLAUDE_SONNET_4_1M_SUFFIX, ModelInfo } from "@shared/api"
55
import { ApiHandler } from "../index"
66
import { ApiStream } from "../transform/stream"
77

8+
interface AnthropicHandlerOptions {
9+
apiKey?: string
10+
anthropicBaseUrl?: string
11+
apiModelId?: string
12+
thinkingBudgetTokens?: number
13+
maxRetries?: number
14+
}
15+
816
export class AnthropicHandler implements ApiHandler {
9-
private options: ApiHandlerOptions
17+
private options: AnthropicHandlerOptions
1018
private client: Anthropic
1119

12-
constructor(options: ApiHandlerOptions) {
20+
constructor(options: AnthropicHandlerOptions) {
1321
this.options = options
1422
this.client = new Anthropic({
1523
apiKey: this.options.apiKey,
@@ -22,7 +30,10 @@ export class AnthropicHandler implements ApiHandler {
2230
async *createMessage(systemPrompt: string, messages: Anthropic.Messages.MessageParam[]): ApiStream {
2331
const model = this.getModel()
2432
let stream: AnthropicStream<Anthropic.RawMessageStreamEvent>
25-
const modelId = model.id
33+
const modelId = model.id.endsWith(CLAUDE_SONNET_4_1M_SUFFIX)
34+
? model.id.slice(0, -CLAUDE_SONNET_4_1M_SUFFIX.length)
35+
: model.id
36+
const enable1mContextWindow = model.id.endsWith(CLAUDE_SONNET_4_1M_SUFFIX)
2637

2738
const budget_tokens = this.options.thinkingBudgetTokens || 0
2839
const reasoningOn = (modelId.includes("3-7") || modelId.includes("4-")) && budget_tokens !== 0 ? true : false
@@ -95,24 +106,15 @@ export class AnthropicHandler implements ApiHandler {
95106
stream: true,
96107
},
97108
(() => {
98-
// prompt caching: https://x.com/alexalbert__/status/1823751995901272068
99-
// https://github.com/anthropics/anthropic-sdk-typescript?tab=readme-ov-file#default-headers
100-
// https://github.com/anthropics/anthropic-sdk-typescript/commit/c920b77fc67bd839bfeb6716ceab9d7c9bbe7393
101-
switch (modelId) {
102-
case "claude-sonnet-4-20250514":
103-
case "claude-opus-4-20250514":
104-
case "claude-3-7-sonnet-20250219":
105-
case "claude-3-5-sonnet-20241022":
106-
case "claude-3-5-haiku-20241022":
107-
case "claude-3-opus-20240229":
108-
case "claude-3-haiku-20240307":
109-
return {
110-
headers: {
111-
"anthropic-beta": "prompt-caching-2024-07-31",
112-
},
113-
}
114-
default:
115-
return undefined
109+
// 1m context window beta header
110+
if (enable1mContextWindow) {
111+
return {
112+
headers: {
113+
"anthropic-beta": "context-1m-2025-08-07",
114+
},
115+
}
116+
} else {
117+
return undefined
116118
}
117119
})(),
118120
)

0 commit comments

Comments
 (0)