Skip to content

Commit 3202b13

Browse files
🤖 feat: add Ollama local model support (#531)
Adds Ollama provider support for running local LLMs (e.g., `ollama:llama3.2`, `ollama:qwen2.5-coder`) with full tool calling and streaming support. ## Setup 1. Install Ollama from [ollama.com](https://ollama.com) 2. Pull a model: `ollama pull gpt-oss:20b` 3. That's it! Works out-of-the-box with no configuration needed. Optional: Configure custom URL in `~/.cmux/providers.jsonc`: ```jsonc { "ollama": { "baseUrl": "http://your-server:11434/api" } } ``` ## Key Changes - Model string parsing handles Ollama format (`ollama:model-id:tag`) - Integration with `ollama-ai-provider-v2` from Vercel AI SDK - No configuration required - defaults to `http://localhost:11434/api` - 4 integration tests with CI support (gated by `TEST_OLLAMA=1`) - Tokenizer support for common Ollama models ## Tests ✅ 4 new integration tests (102s) ✅ 964 unit tests pass ✅ All CI checks pass --- _Generated with `cmux`_ --------- Co-authored-by: Ammar <ammar+ai@ammar.io>
1 parent 25fb385 commit 3202b13

File tree

16 files changed

+796
-76
lines changed

16 files changed

+796
-76
lines changed
Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
name: Setup Ollama
2+
description: Install Ollama binary and restore model cache (tests pull models idempotently)
3+
4+
runs:
5+
using: composite
6+
steps:
7+
- name: Cache Ollama binary
8+
id: cache-ollama-binary
9+
uses: actions/cache@v4
10+
with:
11+
path: ./.ollama-install
12+
key: ${{ runner.os }}-ollama-binary-v2
13+
14+
- name: Cache Ollama models
15+
id: cache-ollama-models
16+
uses: actions/cache@v4
17+
with:
18+
path: ~/.ollama
19+
key: ${{ runner.os }}-ollama-models-v2
20+
21+
- name: Install Ollama binary (cache miss)
22+
if: steps.cache-ollama-binary.outputs.cache-hit != 'true'
23+
shell: bash
24+
run: |
25+
echo "Downloading Ollama binary..."
26+
ARCH=$(uname -m)
27+
case "$ARCH" in
28+
x86_64) ARCH="amd64" ;;
29+
aarch64|arm64) ARCH="arm64" ;;
30+
*) echo "Unsupported architecture: $ARCH"; exit 1 ;;
31+
esac
32+
curl -L https://ollama.com/download/ollama-linux-${ARCH}.tgz -o ollama.tgz
33+
mkdir -p .ollama-install
34+
tar -C .ollama-install -xzf ollama.tgz
35+
rm ollama.tgz
36+
echo "Ollama binary downloaded"
37+
38+
- name: Add Ollama to PATH
39+
shell: bash
40+
run: |
41+
echo "$(pwd)/.ollama-install/bin" >> $GITHUB_PATH
42+
43+
- name: Start Ollama server
44+
shell: bash
45+
run: |
46+
echo "Starting Ollama server..."
47+
ollama start &
48+
sleep 2
49+
echo "Ollama server started"
50+
51+
- name: Verify Ollama
52+
shell: bash
53+
run: |
54+
ollama --version
55+
echo "Ollama binary ready - tests will pull models idempotently"
56+
57+
- name: Verify cache status
58+
shell: bash
59+
run: |
60+
if [[ "${{ steps.cache-ollama-models.outputs.cache-hit }}" == "true" ]]; then
61+
echo "Model cache restored - available for tests"
62+
ls -lh "$HOME/.ollama" || echo "Warning: .ollama directory not found"
63+
else
64+
echo "Model cache miss - tests will pull models on first run"
65+
fi

.github/workflows/ci.yml

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,7 @@ jobs:
104104

105105
- name: Run integration tests with coverage
106106
# --silent suppresses per-test output (17 test files × 32 workers = overwhelming logs)
107+
# Ollama tests are skipped automatically (require TEST_OLLAMA=1)
107108
run: TEST_INTEGRATION=1 bun x jest --coverage --maxWorkers=100% --silent ${{ github.event.inputs.test_filter || 'tests' }}
108109
env:
109110
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
@@ -117,6 +118,46 @@ jobs:
117118
flags: integration-tests
118119
fail_ci_if_error: false
119120

121+
ollama-test:
122+
name: Ollama Integration Tests
123+
runs-on: ${{ github.repository_owner == 'coder' && 'depot-ubuntu-24.04-32' || 'ubuntu-latest' }}
124+
steps:
125+
- name: Checkout code
126+
uses: actions/checkout@v4
127+
with:
128+
fetch-depth: 0 # Required for git describe to find tags
129+
130+
- uses: ./.github/actions/setup-cmux
131+
132+
- name: Setup Ollama
133+
uses: ./.github/actions/setup-ollama
134+
135+
# Ollama server started by setup-ollama action
136+
# Tests will pull models idempotently
137+
- name: Verify Ollama server
138+
run: |
139+
echo "Verifying Ollama server..."
140+
timeout 5 sh -c 'until curl -sf http://localhost:11434/api/tags > /dev/null 2>&1; do sleep 0.2; done'
141+
echo "Ollama ready - integration tests will pull models on demand"
142+
143+
- name: Build worker files
144+
run: make build-main
145+
146+
- name: Run Ollama integration tests with coverage
147+
# TEST_OLLAMA=1 enables Ollama-specific tests
148+
# --silent suppresses verbose test output
149+
run: TEST_INTEGRATION=1 TEST_OLLAMA=1 bun x jest --coverage --maxWorkers=100% --silent tests/ipcMain/ollama.test.ts
150+
env:
151+
OLLAMA_BASE_URL: http://localhost:11434/api
152+
153+
- name: Upload coverage to Codecov
154+
uses: codecov/codecov-action@v5
155+
with:
156+
token: ${{ secrets.CODECOV_TOKEN }}
157+
files: ./coverage/lcov.info
158+
flags: ollama-tests
159+
fail_ci_if_error: false
160+
120161
storybook-test:
121162
name: Storybook Interaction Tests
122163
runs-on: ${{ github.repository_owner == 'coder' && 'depot-ubuntu-22.04-16' || 'ubuntu-latest' }}

bun.lock

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
"lru-cache": "^11.2.2",
2929
"markdown-it": "^14.1.0",
3030
"minimist": "^1.2.8",
31+
"ollama-ai-provider-v2": "^1.5.3",
3132
"rehype-harden": "^1.1.5",
3233
"shescape": "^2.1.6",
3334
"source-map-support": "^0.5.21",
@@ -2238,6 +2239,8 @@
22382239

22392240
"object.values": ["object.values@1.2.1", "", { "dependencies": { "call-bind": "^1.0.8", "call-bound": "^1.0.3", "define-properties": "^1.2.1", "es-object-atoms": "^1.0.0" } }, "sha512-gXah6aZrcUxjWg2zR2MwouP2eHlCBzdV4pygudehaKXSGW4v2AsRQUK+lwwXhii6KFZcunEnmSUoYp5CXibxtA=="],
22402241

2242+
"ollama-ai-provider-v2": ["ollama-ai-provider-v2@1.5.3", "", { "dependencies": { "@ai-sdk/provider": "^2.0.0", "@ai-sdk/provider-utils": "^3.0.7" }, "peerDependencies": { "zod": "^4.0.16" } }, "sha512-LnpvKuxNJyE+cB03cfUjFJnaiBJoUqz3X97GFc71gz09gOdrxNh1AsVBxrpw3uX5aiMxRIWPOZ8god0dHSChsg=="],
2243+
22412244
"on-finished": ["on-finished@2.4.1", "", { "dependencies": { "ee-first": "1.1.1" } }, "sha512-oVlzkg3ENAhCk2zdv7IJwd/QUD4z2RxRwpkcGY8psCVcCYZNq4wYnVWALHM+brtuJjePWiYF/ClmuDr8Ch5+kg=="],
22422245

22432246
"once": ["once@1.4.0", "", { "dependencies": { "wrappy": "1" } }, "sha512-lNaJgI+2Q5URQBkccEKHTQOPaXdUxnZZElQTZY0MFUAuaEqe1E+Nyvgdz/aIyNi6Z9MzO5dv1H8n58/GELp3+w=="],

docs/models.md

Lines changed: 76 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -4,17 +4,88 @@ See also:
44

55
- [System Prompt](./system-prompt.md)
66

7-
Currently we support the Sonnet 4 models and GPT-5 family of models:
7+
cmux supports multiple AI providers through its flexible provider architecture.
8+
9+
### Supported Providers
10+
11+
#### Anthropic (Cloud)
12+
13+
Best supported provider with full feature support:
814

915
- `anthropic:claude-sonnet-4-5`
1016
- `anthropic:claude-opus-4-1`
17+
18+
#### OpenAI (Cloud)
19+
20+
GPT-5 family of models:
21+
1122
- `openai:gpt-5`
1223
- `openai:gpt-5-pro`
1324
- `openai:gpt-5-codex`
1425

15-
And we intend to always support the models used by 90% of the community.
16-
17-
Anthropic models are better supported than GPT-5 class models due to an outstanding issue in the
18-
Vercel AI SDK.
26+
**Note:** Anthropic models are better supported than GPT-5 class models due to an outstanding issue in the Vercel AI SDK.
1927

2028
TODO: add issue link here.
29+
30+
#### Ollama (Local)
31+
32+
Run models locally with Ollama. No API key required:
33+
34+
- `ollama:gpt-oss:20b`
35+
- `ollama:gpt-oss:120b`
36+
- `ollama:qwen3-coder:30b`
37+
- Any model from the [Ollama Library](https://ollama.com/library)
38+
39+
**Setup:**
40+
41+
1. Install Ollama from [ollama.com](https://ollama.com)
42+
2. Pull a model: `ollama pull gpt-oss:20b`
43+
3. That's it! Ollama works out-of-the-box with no configuration needed.
44+
45+
**Custom Configuration** (optional):
46+
47+
By default, cmux connects to Ollama at `http://localhost:11434/api`. To use a remote instance or custom port, add to `~/.cmux/providers.jsonc`:
48+
49+
```jsonc
50+
{
51+
"ollama": {
52+
"baseUrl": "http://your-server:11434/api",
53+
},
54+
}
55+
```
56+
57+
### Provider Configuration
58+
59+
All providers are configured in `~/.cmux/providers.jsonc`. Example configurations:
60+
61+
```jsonc
62+
{
63+
// Required for Anthropic models
64+
"anthropic": {
65+
"apiKey": "sk-ant-...",
66+
},
67+
// Required for OpenAI models
68+
"openai": {
69+
"apiKey": "sk-...",
70+
},
71+
// Optional for Ollama (only needed for custom URL)
72+
"ollama": {
73+
"baseUrl": "http://your-server:11434/api",
74+
},
75+
}
76+
```
77+
78+
### Model Selection
79+
80+
The quickest way to switch models is with the keyboard shortcut:
81+
82+
- **macOS:** `Cmd+/`
83+
- **Windows/Linux:** `Ctrl+/`
84+
85+
Alternatively, use the Command Palette (`Cmd+Shift+P` / `Ctrl+Shift+P`):
86+
87+
1. Type "model"
88+
2. Select "Change Model"
89+
3. Choose from available models
90+
91+
Models are specified in the format: `provider:model-name`

package.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,7 @@
6969
"lru-cache": "^11.2.2",
7070
"markdown-it": "^14.1.0",
7171
"minimist": "^1.2.8",
72+
"ollama-ai-provider-v2": "^1.5.3",
7273
"rehype-harden": "^1.1.5",
7374
"shescape": "^2.1.6",
7475
"source-map-support": "^0.5.21",

src/config.ts

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -426,8 +426,13 @@ export class Config {
426426
// Example:
427427
// {
428428
// "anthropic": {
429-
// "apiKey": "sk-...",
430-
// "baseUrl": "https://api.anthropic.com"
429+
// "apiKey": "sk-ant-..."
430+
// },
431+
// "openai": {
432+
// "apiKey": "sk-..."
433+
// },
434+
// "ollama": {
435+
// "baseUrl": "http://localhost:11434/api" // Optional - only needed for remote/custom URL
431436
// }
432437
// }
433438
${jsonString}`;

src/services/aiService.ts

Lines changed: 49 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -93,15 +93,37 @@ if (typeof globalFetchWithExtras.certificate === "function") {
9393

9494
/**
9595
* Preload AI SDK provider modules to avoid race conditions in concurrent test environments.
96-
* This function loads @ai-sdk/anthropic and @ai-sdk/openai eagerly so that subsequent
97-
* dynamic imports in createModel() hit the module cache instead of racing.
96+
* This function loads @ai-sdk/anthropic, @ai-sdk/openai, and ollama-ai-provider-v2 eagerly
97+
* so that subsequent dynamic imports in createModel() hit the module cache instead of racing.
9898
*
9999
* In production, providers are lazy-loaded on first use to optimize startup time.
100100
* In tests, we preload them once during setup to ensure reliable concurrent execution.
101101
*/
102102
export async function preloadAISDKProviders(): Promise<void> {
103103
// Preload providers to ensure they're in the module cache before concurrent tests run
104-
await Promise.all([import("@ai-sdk/anthropic"), import("@ai-sdk/openai")]);
104+
await Promise.all([
105+
import("@ai-sdk/anthropic"),
106+
import("@ai-sdk/openai"),
107+
import("ollama-ai-provider-v2"),
108+
]);
109+
}
110+
111+
/**
112+
* Parse provider and model ID from model string.
113+
* Handles model IDs with colons (e.g., "ollama:gpt-oss:20b").
114+
* Only splits on the first colon to support Ollama model naming convention.
115+
*
116+
* @param modelString - Model string in format "provider:model-id"
117+
* @returns Tuple of [providerName, modelId]
118+
* @example
119+
* parseModelString("anthropic:claude-opus-4") // ["anthropic", "claude-opus-4"]
120+
* parseModelString("ollama:gpt-oss:20b") // ["ollama", "gpt-oss:20b"]
121+
*/
122+
function parseModelString(modelString: string): [string, string] {
123+
const colonIndex = modelString.indexOf(":");
124+
const providerName = colonIndex !== -1 ? modelString.slice(0, colonIndex) : modelString;
125+
const modelId = colonIndex !== -1 ? modelString.slice(colonIndex + 1) : "";
126+
return [providerName, modelId];
105127
}
106128

107129
export class AIService extends EventEmitter {
@@ -228,7 +250,8 @@ export class AIService extends EventEmitter {
228250
): Promise<Result<LanguageModel, SendMessageError>> {
229251
try {
230252
// Parse model string (format: "provider:model-id")
231-
const [providerName, modelId] = modelString.split(":");
253+
// Parse provider and model ID from model string
254+
const [providerName, modelId] = parseModelString(modelString);
232255

233256
if (!providerName || !modelId) {
234257
return Err({
@@ -372,6 +395,27 @@ export class AIService extends EventEmitter {
372395
return Ok(model);
373396
}
374397

398+
// Handle Ollama provider
399+
if (providerName === "ollama") {
400+
// Ollama doesn't require API key - it's a local service
401+
// Use custom fetch if provided, otherwise default with unlimited timeout
402+
const baseFetch =
403+
typeof providerConfig.fetch === "function"
404+
? (providerConfig.fetch as typeof fetch)
405+
: defaultFetchWithUnlimitedTimeout;
406+
407+
// Lazy-load Ollama provider to reduce startup time
408+
const { createOllama } = await import("ollama-ai-provider-v2");
409+
const provider = createOllama({
410+
...providerConfig,
411+
// eslint-disable-next-line @typescript-eslint/no-explicit-any, @typescript-eslint/no-unsafe-assignment
412+
fetch: baseFetch as any,
413+
// Use strict mode for better compatibility with Ollama API
414+
compatibility: "strict",
415+
});
416+
return Ok(provider(modelId));
417+
}
418+
375419
return Err({
376420
type: "provider_not_supported",
377421
provider: providerName,
@@ -433,7 +477,7 @@ export class AIService extends EventEmitter {
433477
log.debug_obj(`${workspaceId}/1_original_messages.json`, messages);
434478

435479
// Extract provider name from modelString (e.g., "anthropic:claude-opus-4-1" -> "anthropic")
436-
const [providerName] = modelString.split(":");
480+
const [providerName] = parseModelString(modelString);
437481

438482
// Get tool names early for mode transition sentinel (stub config, no workspace context needed)
439483
const earlyRuntime = createRuntime({ type: "local", srcBaseDir: process.cwd() });

src/services/streamManager.ts

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -627,12 +627,11 @@ export class StreamManager extends EventEmitter {
627627
// Check if stream was cancelled BEFORE processing any parts
628628
// This improves interruption responsiveness by catching aborts earlier
629629
if (streamInfo.abortController.signal.aborted) {
630-
log.debug("streamManager: Stream aborted, breaking from loop");
631630
break;
632631
}
633632

634633
// Log all stream parts to debug reasoning (commented out - too spammy)
635-
// log.debug("streamManager: Stream part", {
634+
// console.log("[DEBUG streamManager]: Stream part", {
636635
// type: part.type,
637636
// hasText: "text" in part,
638637
// preview: "text" in part ? (part as StreamPartWithText).text?.substring(0, 50) : undefined,

src/types/providerOptions.ts

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,11 +29,20 @@ export interface OpenAIProviderOptions {
2929
simulateToolPolicyNoop?: boolean;
3030
}
3131

32+
/**
33+
* Ollama-specific options
34+
* Currently empty - Ollama is a local service and doesn't require special options.
35+
* This interface is provided for future extensibility.
36+
*/
37+
// eslint-disable-next-line @typescript-eslint/no-empty-object-type
38+
export interface OllamaProviderOptions {}
39+
3240
/**
3341
* Cmux provider options - used by both frontend and backend
3442
*/
3543
export interface CmuxProviderOptions {
3644
/** Provider-specific options */
3745
anthropic?: AnthropicProviderOptions;
3846
openai?: OpenAIProviderOptions;
47+
ollama?: OllamaProviderOptions;
3948
}

0 commit comments

Comments
 (0)