Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
d72f5f7
ci : add AMD runners and workflows (#16249)
ggerganov Sep 29, 2025
5f7e166
Fix thinking blocks with quotes + add handling `[THINK]...[/THINK]` b…
ServeurpersoCom Sep 29, 2025
a74a0d6
tests: override test_set_rows::max_nmse_err to allow for occasional r…
jeffbolznv Sep 30, 2025
de41f2b
codeowners: add codeowners for opencl backend (#16344)
lhez Sep 30, 2025
f1eb1cb
kleidiai : fix work size and threads sync for fp16 (#16246)
chaxu01 Sep 30, 2025
3c62aed
common : simplify etag tracking by removing json (#16342)
angt Sep 30, 2025
35fb824
metal : dynamic simdgroups for MV kernels (#16340)
ggerganov Sep 30, 2025
a014310
cuda : Enable CUDA Graph usage for Nemotron Nano v2 (NemotronH) (#16328)
anavp-nvidia Sep 30, 2025
075c015
ggml : bump version to 0.9.4 (ggml/1363)
ggerganov Sep 30, 2025
2df5bcf
ci : disable ccache for android (#16348)
CISC Sep 30, 2025
364a7a6
common : remove common_has_curl() (#16351)
angt Sep 30, 2025
d1c84a6
opencl: support ne3 in get_rows (#15866)
lhez Sep 30, 2025
8d78cd2
ggml webgpu: support for rope,div,sub,glu,scale,cont operators (#16187)
reeselevine Sep 30, 2025
16b0ca0
Chatapi ignore empty sampling (#16330)
ServeurpersoCom Sep 30, 2025
7c156df
opencl: support pad_ext (#15888)
lhez Sep 30, 2025
bf6f3b3
common : disable progress bar without a tty (#16352)
angt Sep 30, 2025
b2ba81d
ci : fix ccache key for ubuntu-cpu-cmake (#16355)
CISC Sep 30, 2025
e74c92e
model : support GLM 4.6 (make a few NextN/MTP tensors not required) (…
bartowski1182 Sep 30, 2025
aa9538a
webui: Remove running `llama-server` within WebUI `dev.sh` script (#1…
allozaur Oct 1, 2025
132d673
vulkan: make ggml_vk_default_dispatcher support older vulkan headers …
netrunnereve Oct 1, 2025
4f15759
Add optional setting for showing "Model used:" information (#16337)
allozaur Oct 1, 2025
1104ca1
ci : use registry cache for docker builds (#16366)
CISC Oct 1, 2025
2a9b633
Improve code block color theming (#16325)
allozaur Oct 1, 2025
7647992
Conversation action dialogs as singletons from Chat Sidebar + apply c…
allozaur Oct 1, 2025
4201dea
common: introduce http.h for httplib-based client (#16373)
angt Oct 1, 2025
1fe4e38
ci: Properly install rocwmma for hip builds (#16305)
IMbackK Oct 1, 2025
ded67b9
llama : parameter conversion and loading fixes for PLaMo2 variants (#…
mitmul Oct 1, 2025
e95fec6
HIP: Disable ROCWMMA fattn on CDNA when compiled against ROCWMMA 2.0.…
IMbackK Oct 1, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 5 additions & 9 deletions .devops/rocm.Dockerfile
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
ARG UBUNTU_VERSION=24.04

# This needs to generally match the container host's environment.
ARG ROCM_VERSION=6.4
ARG AMDGPU_VERSION=6.4
ARG ROCM_VERSION=7.0
ARG AMDGPU_VERSION=7.0

# Target the ROCm build image
ARG BASE_ROCM_DEV_CONTAINER=rocm/dev-ubuntu-${UBUNTU_VERSION}:${ROCM_VERSION}-complete
Expand All @@ -13,11 +13,10 @@ FROM ${BASE_ROCM_DEV_CONTAINER} AS build
# Unless otherwise specified, we make a fat build.
# List from https://github.com/ggml-org/llama.cpp/pull/1087#issuecomment-1682807878
# This is mostly tied to rocBLAS supported archs.
# gfx803, gfx900, gfx1032, gfx1101, gfx1102,not officialy supported
# gfx906 is deprecated
#check https://rocm.docs.amd.com/projects/install-on-linux/en/docs-6.4.1/reference/system-requirements.html
# gfx803, gfx900, gfx906, gfx1032, gfx1101, gfx1102,not officialy supported
# check https://rocm.docs.amd.com/projects/install-on-linux/en/docs-6.4.1/reference/system-requirements.html

ARG ROCM_DOCKER_ARCH='gfx803;gfx900;gfx906;gfx908;gfx90a;gfx942;gfx1010;gfx1030;gfx1032;gfx1100;gfx1101;gfx1102;gfx1200;gfx1201;gfx1151'
ARG ROCM_DOCKER_ARCH='gfx803;gfx900;gfx906;gfx1010;gfx1030;gfx1032;gfx1100;gfx1101;gfx1102;gfx1200;gfx1201;gfx1151'
#ARG ROCM_DOCKER_ARCH='gfx1151'

# Set ROCm architectures
Expand All @@ -36,13 +35,10 @@ WORKDIR /app

COPY . .

RUN git clone https://github.com/rocm/rocwmma --branch develop --depth 1

RUN HIPCXX="$(hipconfig -l)/clang" HIP_PATH="$(hipconfig -R)" \
cmake -S . -B build \
-DGGML_HIP=ON \
-DGGML_HIP_ROCWMMA_FATTN=ON \
-DCMAKE_HIP_FLAGS="-I$(pwd)/rocwmma/library/include/" \
-DAMDGPU_TARGETS="$ROCM_DOCKER_ARCH" \
-DGGML_BACKEND_DL=ON -DGGML_CPU_ALL_VARIANTS=ON \
-DCMAKE_BUILD_TYPE=Release -DLLAMA_BUILD_TESTS=OFF \
Expand Down
52 changes: 52 additions & 0 deletions .github/workflows/build-amd.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
name: CI (AMD)

on:
workflow_dispatch: # allows manual triggering
push:
branches:
- master
paths: [
'.github/workflows/build-amd.yml',
'**/CMakeLists.txt',
'**/.cmake',
'**/*.h',
'**/*.hpp',
'**/*.c',
'**/*.cpp',
'**/*.cu',
'**/*.cuh',
'**/*.comp'
]

concurrency:
group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }}
cancel-in-progress: true

jobs:
ggml-ci-x64-amd-vulkan:
runs-on: [self-hosted, Linux, X64, AMD]

steps:
- name: Clone
id: checkout
uses: actions/checkout@v4

- name: Test
id: ggml-ci
run: |
vulkaninfo --summary
GG_BUILD_VULKAN=1 bash ./ci/run.sh ~/results/llama.cpp /mnt/llama.cpp

ggml-ci-x64-amd-rocm:
runs-on: [self-hosted, Linux, X64, AMD]

steps:
- name: Clone
id: checkout
uses: actions/checkout@v4

- name: Test
id: ggml-ci
run: |
amd-smi static
GG_BUILD_ROCM=1 GG_BUILD_AMDGPU_TARGETS="gfx1101" bash ./ci/run.sh ~/results/llama.cpp /mnt/llama.cpp
54 changes: 15 additions & 39 deletions .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -207,7 +207,7 @@ jobs:
- name: ccache
uses: ggml-org/ccache-action@v1.2.16
with:
key: ubuntu-cpu-cmake
key: ubuntu-cpu-cmake-${{ matrix.build }}
evict-old-files: 1d

- name: Build Dependencies
Expand Down Expand Up @@ -487,7 +487,7 @@ jobs:
id: depends
run: |
sudo apt-get update
sudo apt-get install -y build-essential git cmake rocblas-dev hipblas-dev libcurl4-openssl-dev
sudo apt-get install -y build-essential git cmake rocblas-dev hipblas-dev libcurl4-openssl-dev rocwmma-dev

- name: ccache
uses: ggml-org/ccache-action@v1.2.16
Expand Down Expand Up @@ -1097,10 +1097,12 @@ jobs:
id: checkout
uses: actions/checkout@v4

- name: Clone rocWMMA repository
id: clone_rocwmma
- name: Grab rocWMMA package
id: grab_rocwmma
run: |
git clone https://github.com/rocm/rocwmma --branch rocm-${{ env.ROCM_VERSION }} --depth 1
curl -o rocwmma.deb "https://repo.radeon.com/rocm/apt/${{ env.ROCM_VERSION }}/pool/main/r/rocwmma-dev/rocwmma-dev_1.7.0.60402-120~24.04_amd64.deb"
7z x rocwmma.deb
7z x data.tar

- name: Cache ROCm Installation
id: cache-rocm
Expand Down Expand Up @@ -1161,8 +1163,9 @@ jobs:
cmake -G "Unix Makefiles" -B build -S . `
-DCMAKE_C_COMPILER="${env:HIP_PATH}\bin\clang.exe" `
-DCMAKE_CXX_COMPILER="${env:HIP_PATH}\bin\clang++.exe" `
-DCMAKE_CXX_FLAGS="-I$($PWD.Path.Replace('\', '/'))/rocwmma/library/include/" `
-DCMAKE_CXX_FLAGS="-I$($PWD.Path.Replace('\', '/'))/opt/rocm-${{ env.ROCM_VERSION }}/include/" `
-DCMAKE_BUILD_TYPE=Release `
-DROCM_DIR="${env:HIP_PATH}" `
-DGGML_HIP=ON `
-DGGML_HIP_ROCWMMA_FATTN=ON `
-DGGML_RPC=ON `
Expand Down Expand Up @@ -1222,11 +1225,12 @@ jobs:
- name: Clone
uses: actions/checkout@v4

- name: ccache
uses: ggml-org/ccache-action@v1.2.16
with:
key: android-build
evict-old-files: 1d
# Disabled due to size (400MB) and always 0 cache hits
# - name: ccache
# uses: ggml-org/ccache-action@v1.2.16
# with:
# key: android-build
# evict-old-files: 1d

- name: Set up JDK
uses: actions/setup-java@v3
Expand Down Expand Up @@ -1461,34 +1465,6 @@ jobs:
run: |
bash ./ci/run.sh ~/results/llama.cpp /mnt/llama.cpp

# ggml-ci-x64-amd-vulkan:
# runs-on: [self-hosted, Linux, X64, AMD]
#
# steps:
# - name: Clone
# id: checkout
# uses: actions/checkout@v4
#
# - name: Test
# id: ggml-ci
# run: |
# vulkaninfo --summary
# GG_BUILD_VULKAN=1 bash ./ci/run.sh ~/results/llama.cpp /mnt/llama.cpp
#
# ggml-ci-x64-amd-rocm:
# runs-on: [self-hosted, Linux, X64, AMD]
#
# steps:
# - name: Clone
# id: checkout
# uses: actions/checkout@v4
#
# - name: Test
# id: ggml-ci
# run: |
# amd-smi static
# GG_BUILD_ROCM=1 GG_BUILD_AMDGPU_TARGETS="gfx1101" bash ./ci/run.sh ~/results/llama.cpp /mnt/llama.cpp

ggml-ci-mac-metal:
runs-on: [self-hosted, macOS, ARM64]

Expand Down
24 changes: 18 additions & 6 deletions .github/workflows/docker.yml
Original file line number Diff line number Diff line change
Expand Up @@ -89,12 +89,15 @@ jobs:
TYPE="-${{ matrix.config.tag }}"
fi
PREFIX="ghcr.io/${REPO_OWNER}/${REPO_NAME}:"
CACHETAGS="${PREFIX}buildcache${TYPE}"
FULLTAGS="${PREFIX}full${TYPE},${PREFIX}full${TYPE}-${{ steps.srctag.outputs.name }}"
LIGHTTAGS="${PREFIX}light${TYPE},${PREFIX}light${TYPE}-${{ steps.srctag.outputs.name }}"
SERVERTAGS="${PREFIX}server${TYPE},${PREFIX}server${TYPE}-${{ steps.srctag.outputs.name }}"
echo "cache_output_tags=$CACHETAGS" >> $GITHUB_OUTPUT
echo "full_output_tags=$FULLTAGS" >> $GITHUB_OUTPUT
echo "light_output_tags=$LIGHTTAGS" >> $GITHUB_OUTPUT
echo "server_output_tags=$SERVERTAGS" >> $GITHUB_OUTPUT
echo "cache_output_tags=$CACHETAGS" # print out for debugging
echo "full_output_tags=$FULLTAGS" # print out for debugging
echo "light_output_tags=$LIGHTTAGS" # print out for debugging
echo "server_output_tags=$SERVERTAGS" # print out for debugging
Expand Down Expand Up @@ -131,11 +134,14 @@ jobs:
target: full
provenance: false
# using github experimental cache
cache-from: type=gha
cache-to: type=gha,mode=max
#cache-from: type=gha
#cache-to: type=gha,mode=max
# return to this if the experimental github cache is having issues
#cache-to: type=local,dest=/tmp/.buildx-cache
#cache-from: type=local,src=/tmp/.buildx-cache
# using registry cache (no storage limit)
cache-from: type=registry,ref=${{ steps.tag.outputs.cache_output_tags }}
cache-to: type=registry,ref=${{ steps.tag.outputs.cache_output_tags }},mode=max

- name: Build and push Light Docker image (tagged + versioned)
if: ${{ (github.event_name == 'push' || github.event_name == 'schedule' || github.event_name == 'workflow_dispatch') && matrix.config.light == true }}
Expand All @@ -150,11 +156,14 @@ jobs:
target: light
provenance: false
# using github experimental cache
cache-from: type=gha
cache-to: type=gha,mode=max
#cache-from: type=gha
#cache-to: type=gha,mode=max
# return to this if the experimental github cache is having issues
#cache-to: type=local,dest=/tmp/.buildx-cache
#cache-from: type=local,src=/tmp/.buildx-cache
# using registry cache (no storage limit)
cache-from: type=registry,ref=${{ steps.tag.outputs.cache_output_tags }}
cache-to: type=registry,ref=${{ steps.tag.outputs.cache_output_tags }},mode=max

- name: Build and push Server Docker image (tagged + versioned)
if: ${{ (github.event_name == 'push' || github.event_name == 'schedule' || github.event_name == 'workflow_dispatch') && matrix.config.server == true }}
Expand All @@ -169,11 +178,14 @@ jobs:
target: server
provenance: false
# using github experimental cache
cache-from: type=gha
cache-to: type=gha,mode=max
#cache-from: type=gha
#cache-to: type=gha,mode=max
# return to this if the experimental github cache is having issues
#cache-to: type=local,dest=/tmp/.buildx-cache
#cache-from: type=local,src=/tmp/.buildx-cache
# using registry cache (no storage limit)
cache-from: type=registry,ref=${{ steps.tag.outputs.cache_output_tags }}
cache-to: type=registry,ref=${{ steps.tag.outputs.cache_output_tags }},mode=max

create_tag:
name: Create and push git tag
Expand Down
12 changes: 7 additions & 5 deletions .github/workflows/release.yml
Original file line number Diff line number Diff line change
Expand Up @@ -150,7 +150,7 @@ jobs:
- name: ccache
uses: ggml-org/ccache-action@v1.2.16
with:
key: ubuntu-cpu-cmake
key: ubuntu-cpu-cmake-${{ matrix.build }}
evict-old-files: 1d

- name: Dependencies
Expand Down Expand Up @@ -543,10 +543,12 @@ jobs:
id: checkout
uses: actions/checkout@v4

- name: Clone rocWMMA repository
id: clone_rocwmma
- name: Grab rocWMMA package
id: grab_rocwmma
run: |
git clone https://github.com/rocm/rocwmma --branch develop --depth 1
curl -o rocwmma.deb "https://repo.radeon.com/rocm/apt/7.0.1/pool/main/r/rocwmma-dev/rocwmma-dev_2.0.0.70001-42~24.04_amd64.deb"
7z x rocwmma.deb
7z x data.tar

- name: Cache ROCm Installation
id: cache-rocm
Expand Down Expand Up @@ -601,7 +603,7 @@ jobs:
cmake -G "Unix Makefiles" -B build -S . `
-DCMAKE_C_COMPILER="${env:HIP_PATH}\bin\clang.exe" `
-DCMAKE_CXX_COMPILER="${env:HIP_PATH}\bin\clang++.exe" `
-DCMAKE_CXX_FLAGS="-I$($PWD.Path.Replace('\', '/'))/rocwmma/library/include/ -Wno-ignored-attributes -Wno-nested-anon-types" `
-DCMAKE_CXX_FLAGS="-I$($PWD.Path.Replace('\', '/'))/opt/rocm-7.0.1/include/ -Wno-ignored-attributes -Wno-nested-anon-types" `
-DCMAKE_BUILD_TYPE=Release `
-DGGML_BACKEND_DL=ON `
-DGGML_NATIVE=OFF `
Expand Down
2 changes: 2 additions & 0 deletions CODEOWNERS
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
/common/build-info.* @ggerganov
/common/common.* @ggerganov
/common/console.* @ggerganov
/common/http.* @angt
/common/llguidance.* @ggerganov
/common/log.* @ggerganov
/common/sampling.* @ggerganov
Expand Down Expand Up @@ -60,6 +61,7 @@
/ggml/src/ggml-cuda/mmvq.* @JohannesGaessler
/ggml/src/ggml-impl.h @ggerganov @slaren
/ggml/src/ggml-metal/ @ggerganov
/ggml/src/ggml-opencl/ @lhez @max-krasnyansky
/ggml/src/ggml-opt.cpp @JohannesGaessler
/ggml/src/ggml-quants.* @ggerganov
/ggml/src/ggml-rpc/ @rgerganov
Expand Down
1 change: 1 addition & 0 deletions ci/run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,7 @@ if [ ! -z ${GG_BUILD_NO_SVE} ]; then
# arm 9 and newer enables sve by default, adjust these flags depending on the cpu used
CMAKE_EXTRA="${CMAKE_EXTRA} -DGGML_NATIVE=OFF -DGGML_CPU_ARM_ARCH=armv8.5-a+fp16+i8mm"
fi

## helpers

# download a file if it does not exist or if it is outdated
Expand Down
1 change: 1 addition & 0 deletions common/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ add_library(${TARGET} STATIC
common.h
console.cpp
console.h
http.h
json-partial.cpp
json-partial.h
json-schema-to-grammar.cpp
Expand Down
Loading
Loading