Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
38 commits
Select commit Hold shift + click to select a range
6fbac34
🤖 feat: add support for Ollama local models
ammar-agent Nov 8, 2025
9021ffd
🤖 test: add Ollama integration tests with CI support
ammar-agent Nov 8, 2025
a268216
🤖 ci: add caching for Ollama models
ammar-agent Nov 8, 2025
472270c
🤖 fix: format docs/models.md
ammar-agent Nov 8, 2025
94d4aa9
🤖 refactor: address review comments
ammar-agent Nov 8, 2025
6f8976b
🤖 fix: resolve Ollama integration test timing issues
ammar-agent Nov 8, 2025
6d48ecf
🤖 style: format ollama test file
ammar-agent Nov 8, 2025
5df1cf8
🤖 refactor: cleanup and consistency improvements
ammar-agent Nov 8, 2025
4cd2491
🤖 refactor: remove unused variable from EventCollector
ammar-agent Nov 8, 2025
5081dce
🤖 perf: optimize Ollama CI caching to <5s startup
ammar-agent Nov 8, 2025
1b577db
🤖 feat: add multi-pattern lookup for Ollama model context limits
ammar-agent Nov 8, 2025
f997fbe
🤖 perf: use stable cache key for Ollama (v3 without workflow hash)
ammar-agent Nov 8, 2025
872c6df
🤖 ci: trigger CI after resolving Codex comment
ammar-agent Nov 8, 2025
7fa5c47
🤖 fix: set permissions on Ollama directory for cache saving
ammar-agent Nov 8, 2025
5a4978e
🤖 ci: test warm cache after v3 saved
ammar-agent Nov 8, 2025
c7b245d
🤖 refactor: cache models in /tmp to avoid permission issues
ammar-agent Nov 8, 2025
09015ae
🤖 refactor: simplify Ollama setup (remove caching complexity)
ammar-agent Nov 8, 2025
c10ffcb
🤖 refactor: create setup-ollama action with caching
ammar-agent Nov 8, 2025
4db87ae
🤖 fix: properly stop Ollama process after model pull
ammar-agent Nov 8, 2025
fecacc0
🤖 fix: use absolute path for Ollama cache (~ doesn't expand)
ammar-agent Nov 8, 2025
87a76a7
🤖 debug: add directory listing to verify cache contents
ammar-agent Nov 8, 2025
ffeec29
🤖 debug: check both possible model locations
ammar-agent Nov 8, 2025
2cc309c
🤖 debug: bump cache version to v3 for fresh test
ammar-agent Nov 8, 2025
dfaa011
🤖 debug: remove restore-keys to force cache miss
ammar-agent Nov 8, 2025
75d6c05
🤖 fix: copy models from system location to cacheable location
ammar-agent Nov 8, 2025
7f9c95e
🤖 refactor: simplify ollama caching with binary-only install
ammar-agent Nov 8, 2025
f82f5a7
🤖 fix: bump cache version to v2 to invalidate empty cache
ammar-agent Nov 8, 2025
ab90e9b
🤖 refactor: move Ollama model pull to test-side for better parallelism
ammar-agent Nov 8, 2025
82b51a2
🤖 docs: recommend Ctrl+/ shortcut for model switching
ammar-agent Nov 9, 2025
e90b881
🤖 refactor: extract parseProviderName to eliminate duplication
ammar-agent Nov 9, 2025
f671d7a
🤖 refactor: parseModelString returns provider + model tuple
ammar-agent Nov 9, 2025
c115295
🤖 style: fix prettier formatting in docs/models.md
ammar-agent Nov 9, 2025
de39340
🤖 ci: split Ollama tests into separate job
ammar-agent Nov 9, 2025
94cab2c
🤖 refactor: use TEST_OLLAMA env var to control Ollama tests
ammar-agent Nov 9, 2025
c5305ee
🤖 style: reduce Ollama test log spam in CI
ammar-agent Nov 9, 2025
1c537e7
🤖 test: suppress console output in Ollama tests for CI
ammar-agent Nov 9, 2025
6d1c786
🤖 fix: deduplicate tokenizer warnings to eliminate log spam
ammar-agent Nov 9, 2025
8825ed8
🤖 docs: clarify Ollama works without configuration
ammar-agent Nov 9, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
65 changes: 65 additions & 0 deletions .github/actions/setup-ollama/action.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
name: Setup Ollama
description: Install Ollama binary and restore model cache (tests pull models idempotently)

runs:
using: composite
steps:
- name: Cache Ollama binary
id: cache-ollama-binary
uses: actions/cache@v4
with:
path: ./.ollama-install
key: ${{ runner.os }}-ollama-binary-v2

- name: Cache Ollama models
id: cache-ollama-models
uses: actions/cache@v4
with:
path: ~/.ollama
key: ${{ runner.os }}-ollama-models-v2

- name: Install Ollama binary (cache miss)
if: steps.cache-ollama-binary.outputs.cache-hit != 'true'
shell: bash
run: |
echo "Downloading Ollama binary..."
ARCH=$(uname -m)
case "$ARCH" in
x86_64) ARCH="amd64" ;;
aarch64|arm64) ARCH="arm64" ;;
*) echo "Unsupported architecture: $ARCH"; exit 1 ;;
esac
curl -L https://ollama.com/download/ollama-linux-${ARCH}.tgz -o ollama.tgz
mkdir -p .ollama-install
tar -C .ollama-install -xzf ollama.tgz
rm ollama.tgz
echo "Ollama binary downloaded"

- name: Add Ollama to PATH
shell: bash
run: |
echo "$(pwd)/.ollama-install/bin" >> $GITHUB_PATH

- name: Start Ollama server
shell: bash
run: |
echo "Starting Ollama server..."
ollama start &
sleep 2
echo "Ollama server started"

- name: Verify Ollama
shell: bash
run: |
ollama --version
echo "Ollama binary ready - tests will pull models idempotently"

- name: Verify cache status
shell: bash
run: |
if [[ "${{ steps.cache-ollama-models.outputs.cache-hit }}" == "true" ]]; then
echo "Model cache restored - available for tests"
ls -lh "$HOME/.ollama" || echo "Warning: .ollama directory not found"
else
echo "Model cache miss - tests will pull models on first run"
fi
41 changes: 41 additions & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,7 @@ jobs:

- name: Run integration tests with coverage
# --silent suppresses per-test output (17 test files × 32 workers = overwhelming logs)
# Ollama tests are skipped automatically (require TEST_OLLAMA=1)
run: TEST_INTEGRATION=1 bun x jest --coverage --maxWorkers=100% --silent ${{ github.event.inputs.test_filter || 'tests' }}
env:
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
Expand All @@ -117,6 +118,46 @@ jobs:
flags: integration-tests
fail_ci_if_error: false

ollama-test:
name: Ollama Integration Tests
runs-on: ${{ github.repository_owner == 'coder' && 'depot-ubuntu-24.04-32' || 'ubuntu-latest' }}
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
fetch-depth: 0 # Required for git describe to find tags

- uses: ./.github/actions/setup-cmux

- name: Setup Ollama
uses: ./.github/actions/setup-ollama

# Ollama server started by setup-ollama action
# Tests will pull models idempotently
- name: Verify Ollama server
run: |
echo "Verifying Ollama server..."
timeout 5 sh -c 'until curl -sf http://localhost:11434/api/tags > /dev/null 2>&1; do sleep 0.2; done'
echo "Ollama ready - integration tests will pull models on demand"

- name: Build worker files
run: make build-main

- name: Run Ollama integration tests with coverage
# TEST_OLLAMA=1 enables Ollama-specific tests
# --silent suppresses verbose test output
run: TEST_INTEGRATION=1 TEST_OLLAMA=1 bun x jest --coverage --maxWorkers=100% --silent tests/ipcMain/ollama.test.ts
env:
OLLAMA_BASE_URL: http://localhost:11434/api

- name: Upload coverage to Codecov
uses: codecov/codecov-action@v5
with:
token: ${{ secrets.CODECOV_TOKEN }}
files: ./coverage/lcov.info
flags: ollama-tests
fail_ci_if_error: false

storybook-test:
name: Storybook Interaction Tests
runs-on: ${{ github.repository_owner == 'coder' && 'depot-ubuntu-22.04-16' || 'ubuntu-latest' }}
Expand Down
3 changes: 3 additions & 0 deletions bun.lock
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
"lru-cache": "^11.2.2",
"markdown-it": "^14.1.0",
"minimist": "^1.2.8",
"ollama-ai-provider-v2": "^1.5.3",
"rehype-harden": "^1.1.5",
"shescape": "^2.1.6",
"source-map-support": "^0.5.21",
Expand Down Expand Up @@ -2238,6 +2239,8 @@

"object.values": ["object.values@1.2.1", "", { "dependencies": { "call-bind": "^1.0.8", "call-bound": "^1.0.3", "define-properties": "^1.2.1", "es-object-atoms": "^1.0.0" } }, "sha512-gXah6aZrcUxjWg2zR2MwouP2eHlCBzdV4pygudehaKXSGW4v2AsRQUK+lwwXhii6KFZcunEnmSUoYp5CXibxtA=="],

"ollama-ai-provider-v2": ["ollama-ai-provider-v2@1.5.3", "", { "dependencies": { "@ai-sdk/provider": "^2.0.0", "@ai-sdk/provider-utils": "^3.0.7" }, "peerDependencies": { "zod": "^4.0.16" } }, "sha512-LnpvKuxNJyE+cB03cfUjFJnaiBJoUqz3X97GFc71gz09gOdrxNh1AsVBxrpw3uX5aiMxRIWPOZ8god0dHSChsg=="],

"on-finished": ["on-finished@2.4.1", "", { "dependencies": { "ee-first": "1.1.1" } }, "sha512-oVlzkg3ENAhCk2zdv7IJwd/QUD4z2RxRwpkcGY8psCVcCYZNq4wYnVWALHM+brtuJjePWiYF/ClmuDr8Ch5+kg=="],

"once": ["once@1.4.0", "", { "dependencies": { "wrappy": "1" } }, "sha512-lNaJgI+2Q5URQBkccEKHTQOPaXdUxnZZElQTZY0MFUAuaEqe1E+Nyvgdz/aIyNi6Z9MzO5dv1H8n58/GELp3+w=="],
Expand Down
81 changes: 76 additions & 5 deletions docs/models.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,17 +4,88 @@ See also:

- [System Prompt](./system-prompt.md)

Currently we support the Sonnet 4 models and GPT-5 family of models:
cmux supports multiple AI providers through its flexible provider architecture.

### Supported Providers

#### Anthropic (Cloud)

Best supported provider with full feature support:

- `anthropic:claude-sonnet-4-5`
- `anthropic:claude-opus-4-1`

#### OpenAI (Cloud)

GPT-5 family of models:

- `openai:gpt-5`
- `openai:gpt-5-pro`
- `openai:gpt-5-codex`

And we intend to always support the models used by 90% of the community.

Anthropic models are better supported than GPT-5 class models due to an outstanding issue in the
Vercel AI SDK.
**Note:** Anthropic models are better supported than GPT-5 class models due to an outstanding issue in the Vercel AI SDK.

TODO: add issue link here.

#### Ollama (Local)

Run models locally with Ollama. No API key required:

- `ollama:gpt-oss:20b`
- `ollama:gpt-oss:120b`
- `ollama:qwen3-coder:30b`
- Any model from the [Ollama Library](https://ollama.com/library)

**Setup:**

1. Install Ollama from [ollama.com](https://ollama.com)
2. Pull a model: `ollama pull gpt-oss:20b`
3. That's it! Ollama works out-of-the-box with no configuration needed.

**Custom Configuration** (optional):

By default, cmux connects to Ollama at `http://localhost:11434/api`. To use a remote instance or custom port, add to `~/.cmux/providers.jsonc`:

```jsonc
{
"ollama": {
"baseUrl": "http://your-server:11434/api",
},
}
```

### Provider Configuration

All providers are configured in `~/.cmux/providers.jsonc`. Example configurations:

```jsonc
{
// Required for Anthropic models
"anthropic": {
"apiKey": "sk-ant-...",
},
// Required for OpenAI models
"openai": {
"apiKey": "sk-...",
},
// Optional for Ollama (only needed for custom URL)
"ollama": {
"baseUrl": "http://your-server:11434/api",
},
}
```

### Model Selection

The quickest way to switch models is with the keyboard shortcut:

- **macOS:** `Cmd+/`
- **Windows/Linux:** `Ctrl+/`

Alternatively, use the Command Palette (`Cmd+Shift+P` / `Ctrl+Shift+P`):

1. Type "model"
2. Select "Change Model"
3. Choose from available models

Models are specified in the format: `provider:model-name`
1 change: 1 addition & 0 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,7 @@
"lru-cache": "^11.2.2",
"markdown-it": "^14.1.0",
"minimist": "^1.2.8",
"ollama-ai-provider-v2": "^1.5.3",
"rehype-harden": "^1.1.5",
"shescape": "^2.1.6",
"source-map-support": "^0.5.21",
Expand Down
9 changes: 7 additions & 2 deletions src/config.ts
Original file line number Diff line number Diff line change
Expand Up @@ -426,8 +426,13 @@ export class Config {
// Example:
// {
// "anthropic": {
// "apiKey": "sk-...",
// "baseUrl": "https://api.anthropic.com"
// "apiKey": "sk-ant-..."
// },
// "openai": {
// "apiKey": "sk-..."
// },
// "ollama": {
// "baseUrl": "http://localhost:11434/api" // Optional - only needed for remote/custom URL
// }
// }
${jsonString}`;
Expand Down
54 changes: 49 additions & 5 deletions src/services/aiService.ts
Original file line number Diff line number Diff line change
Expand Up @@ -93,15 +93,37 @@ if (typeof globalFetchWithExtras.certificate === "function") {

/**
* Preload AI SDK provider modules to avoid race conditions in concurrent test environments.
* This function loads @ai-sdk/anthropic and @ai-sdk/openai eagerly so that subsequent
* dynamic imports in createModel() hit the module cache instead of racing.
* This function loads @ai-sdk/anthropic, @ai-sdk/openai, and ollama-ai-provider-v2 eagerly
* so that subsequent dynamic imports in createModel() hit the module cache instead of racing.
*
* In production, providers are lazy-loaded on first use to optimize startup time.
* In tests, we preload them once during setup to ensure reliable concurrent execution.
*/
export async function preloadAISDKProviders(): Promise<void> {
// Preload providers to ensure they're in the module cache before concurrent tests run
await Promise.all([import("@ai-sdk/anthropic"), import("@ai-sdk/openai")]);
await Promise.all([
import("@ai-sdk/anthropic"),
import("@ai-sdk/openai"),
import("ollama-ai-provider-v2"),
]);
}

/**
* Parse provider and model ID from model string.
* Handles model IDs with colons (e.g., "ollama:gpt-oss:20b").
* Only splits on the first colon to support Ollama model naming convention.
*
* @param modelString - Model string in format "provider:model-id"
* @returns Tuple of [providerName, modelId]
* @example
* parseModelString("anthropic:claude-opus-4") // ["anthropic", "claude-opus-4"]
* parseModelString("ollama:gpt-oss:20b") // ["ollama", "gpt-oss:20b"]
*/
function parseModelString(modelString: string): [string, string] {
const colonIndex = modelString.indexOf(":");
const providerName = colonIndex !== -1 ? modelString.slice(0, colonIndex) : modelString;
const modelId = colonIndex !== -1 ? modelString.slice(colonIndex + 1) : "";
return [providerName, modelId];
}

export class AIService extends EventEmitter {
Expand Down Expand Up @@ -228,7 +250,8 @@ export class AIService extends EventEmitter {
): Promise<Result<LanguageModel, SendMessageError>> {
try {
// Parse model string (format: "provider:model-id")
const [providerName, modelId] = modelString.split(":");
// Parse provider and model ID from model string
const [providerName, modelId] = parseModelString(modelString);

if (!providerName || !modelId) {
return Err({
Expand Down Expand Up @@ -372,6 +395,27 @@ export class AIService extends EventEmitter {
return Ok(model);
}

// Handle Ollama provider
if (providerName === "ollama") {
// Ollama doesn't require API key - it's a local service
// Use custom fetch if provided, otherwise default with unlimited timeout
const baseFetch =
typeof providerConfig.fetch === "function"
? (providerConfig.fetch as typeof fetch)
: defaultFetchWithUnlimitedTimeout;

// Lazy-load Ollama provider to reduce startup time
const { createOllama } = await import("ollama-ai-provider-v2");
const provider = createOllama({
...providerConfig,
// eslint-disable-next-line @typescript-eslint/no-explicit-any, @typescript-eslint/no-unsafe-assignment
fetch: baseFetch as any,
// Use strict mode for better compatibility with Ollama API
compatibility: "strict",
});
return Ok(provider(modelId));
}

return Err({
type: "provider_not_supported",
provider: providerName,
Expand Down Expand Up @@ -433,7 +477,7 @@ export class AIService extends EventEmitter {
log.debug_obj(`${workspaceId}/1_original_messages.json`, messages);

// Extract provider name from modelString (e.g., "anthropic:claude-opus-4-1" -> "anthropic")
const [providerName] = modelString.split(":");
const [providerName] = parseModelString(modelString);

// Get tool names early for mode transition sentinel (stub config, no workspace context needed)
const earlyRuntime = createRuntime({ type: "local", srcBaseDir: process.cwd() });
Expand Down
3 changes: 1 addition & 2 deletions src/services/streamManager.ts
Original file line number Diff line number Diff line change
Expand Up @@ -627,12 +627,11 @@ export class StreamManager extends EventEmitter {
// Check if stream was cancelled BEFORE processing any parts
// This improves interruption responsiveness by catching aborts earlier
if (streamInfo.abortController.signal.aborted) {
log.debug("streamManager: Stream aborted, breaking from loop");
break;
}

// Log all stream parts to debug reasoning (commented out - too spammy)
// log.debug("streamManager: Stream part", {
// console.log("[DEBUG streamManager]: Stream part", {
// type: part.type,
// hasText: "text" in part,
// preview: "text" in part ? (part as StreamPartWithText).text?.substring(0, 50) : undefined,
Expand Down
9 changes: 9 additions & 0 deletions src/types/providerOptions.ts
Original file line number Diff line number Diff line change
Expand Up @@ -29,11 +29,20 @@ export interface OpenAIProviderOptions {
simulateToolPolicyNoop?: boolean;
}

/**
* Ollama-specific options
* Currently empty - Ollama is a local service and doesn't require special options.
* This interface is provided for future extensibility.
*/
// eslint-disable-next-line @typescript-eslint/no-empty-object-type
export interface OllamaProviderOptions {}

/**
* Cmux provider options - used by both frontend and backend
*/
export interface CmuxProviderOptions {
/** Provider-specific options */
anthropic?: AnthropicProviderOptions;
openai?: OpenAIProviderOptions;
ollama?: OllamaProviderOptions;
}
Loading