Skip to content

Commit 576f4cd

Browse files
Merge pull request #208 from menloresearch/update-dev-from-master-2025-08-19-00-12
Sync master with upstream release b6199
2 parents 4b8975c + f08c4c0 commit 576f4cd

File tree

13 files changed

+137
-200
lines changed

13 files changed

+137
-200
lines changed

.devops/cuda.Dockerfile

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,7 @@ RUN apt-get update \
6060
git \
6161
python3 \
6262
python3-pip \
63+
&& pip install --upgrade pip setuptools wheel \
6364
&& pip install --break-system-packages -r requirements.txt \
6465
&& apt autoremove -y \
6566
&& apt clean -y \

CODEOWNERS

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,6 @@
55
/tools/server/ @ngxson
66
/ggml/src/ggml-cuda/fattn* @JohannesGaessler
77
/ggml/src/ggml-cuda/mmq.* @JohannesGaessler
8-
/ggml/src/ggml-cuda/mmv.* @JohannesGaessler
98
/ggml/src/ggml-cuda/mmvq.* @JohannesGaessler
109
/ggml/src/ggml-opt.cpp @JohannesGaessler
1110
/ggml/src/gguf.cpp @JohannesGaessler

README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ LLM inference in C/C++
1717

1818
## Hot topics
1919

20+
- **[guide : running gpt-oss with llama.cpp](https://github.com/ggml-org/llama.cpp/discussions/15396)**
2021
- **[[FEEDBACK] Better packaging for llama.cpp to support downstream consumers 🤗](https://github.com/ggml-org/llama.cpp/discussions/15313)**
2122
- Support for the `gpt-oss` model with native MXFP4 format has been added | [PR](https://github.com/ggml-org/llama.cpp/pull/15091) | [Collaboration with NVIDIA](https://blogs.nvidia.com/blog/rtx-ai-garage-openai-oss) | [Comment](https://github.com/ggml-org/llama.cpp/discussions/15095)
2223
- Hot PRs: [All](https://github.com/ggml-org/llama.cpp/pulls?q=is%3Apr+label%3Ahot+) | [Open](https://github.com/ggml-org/llama.cpp/pulls?q=is%3Apr+label%3Ahot+is%3Aopen)

ggml/src/ggml-quants.c

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -566,7 +566,7 @@ static float make_q3_quants(int n, int nmax, const float * GGML_RESTRICT x, int8
566566
for (int i = 0; i < n; ++i) {
567567
L[i] += nmax;
568568
}
569-
return sumlx / suml2;
569+
return suml2 > 0.0f ? sumlx / suml2 : 0.0f;
570570
}
571571
for (int i = 0; i < n; ++i) {
572572
int l = nearest_int(iscale * x[i]);
@@ -901,7 +901,7 @@ static float make_qp_quants(int n, int nmax, const float * GGML_RESTRICT x, uint
901901
for (int i = 0; i < n; ++i) {
902902
max = MAX(max, x[i]);
903903
}
904-
if (!max) { // all zero
904+
if (max < GROUP_MAX_EPS) { // all zero
905905
for (int i = 0; i < n; ++i) { L[i] = 0; }
906906
return 0.f;
907907
}
@@ -966,7 +966,7 @@ static float make_qp_quants(int n, int nmax, const float * GGML_RESTRICT x, uint
966966
break;
967967
}
968968
}
969-
return sumlx/suml2;
969+
return suml2 > 0.0f ? sumlx / suml2 : 0.0f;
970970
}
971971

972972
static void quantize_row_q2_K_impl(const float * GGML_RESTRICT x, block_q2_K * GGML_RESTRICT y, int k, const float * GGML_RESTRICT quant_weights) {
@@ -4266,7 +4266,7 @@ static void quantize_row_iq1_s_impl(const float * GGML_RESTRICT x, void * GGML_R
42664266
sumw[j+1] = sumw[j] + weight[i];
42674267
}
42684268
}
4269-
float best_score = -FLT_MIN, scale = max;
4269+
float best_score = -FLT_MAX, scale = max;
42704270
int besti1 = -1, besti2 = -1, best_shift = 0;
42714271
for (int i1 = 0; i1 <= block_size; ++i1) {
42724272
for (int i2 = i1; i2 <= block_size; ++i2) {
@@ -4442,7 +4442,7 @@ static void quantize_row_iq1_m_impl(const float * GGML_RESTRICT x, void * GGML_R
44424442
idx[2*j] = j;
44434443
}
44444444
qsort(pairs, block_size, 2*sizeof(float), iq1_sort_helper);
4445-
float best_score = -FLT_MIN, scale = max;
4445+
float best_score = -FLT_MAX, scale = max;
44464446
int besti1 = -1, besti2 = -1, best_k = -1;
44474447
// 0: +, +
44484448
// 1: +, -

ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -223,7 +223,8 @@ void string_to_spv_func(const std::string& _name, const std::string& in_fname, c
223223
std::string target_env = (name.find("_cm2") != std::string::npos) ? "--target-env=vulkan1.3" : "--target-env=vulkan1.2";
224224

225225
// disable spirv-opt for coopmat shaders for https://github.com/ggerganov/llama.cpp/issues/10734
226-
std::string opt_level = coopmat ? "" : "-O";
226+
// disable spirv-opt for bf16 shaders for https://github.com/ggml-org/llama.cpp/issues/15344
227+
std::string opt_level = (coopmat || name.find("bf16") != std::string::npos) ? "" : "-O";
227228

228229
#ifdef _WIN32
229230
std::vector<std::string> cmd = {GLSLC, "-fshader-stage=compute", target_env, opt_level, "\"" + in_path + "\"", "-o", "\"" + out_fname + "\""};

scripts/sync-ggml-am.sh

Lines changed: 3 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -74,21 +74,7 @@ while read c; do
7474
cmake/common.cmake \
7575
cmake/ggml-config.cmake.in \
7676
src/ggml-cpu/cmake/FindSIMD.cmake \
77-
src/ggml*.h \
78-
src/ggml*.c \
79-
src/ggml*.cpp \
80-
src/gguf*.cpp \
81-
src/ggml-blas/* \
82-
src/ggml-cann/* \
83-
src/ggml-cpu/* \
84-
src/ggml-cuda/* \
85-
src/ggml-hip/* \
86-
src/ggml-metal/* \
87-
src/ggml-musa/* \
88-
src/ggml-opencl/* \
89-
src/ggml-rpc/* \
90-
src/ggml-sycl/* \
91-
src/ggml-vulkan/* \
77+
src/ggml* \
9278
include/ggml*.h \
9379
include/gguf*.h \
9480
tests/test-opt.cpp \
@@ -131,21 +117,7 @@ if [ -f $SRC_LLAMA/ggml-src.patch ]; then
131117
# cmake/ggml-config.cmake.in -> ggml/cmake/ggml-config.cmake.in
132118
# src/ggml-cpu/cmake/FindSIMD.cmake -> ggml/src/ggml-cpu/cmake/FindSIMD.cmake
133119
#
134-
# src/ggml*.c -> ggml/src/ggml*.c
135-
# src/ggml*.cpp -> ggml/src/ggml*.cpp
136-
# src/ggml*.h -> ggml/src/ggml*.h
137-
# src/gguf*.cpp -> ggml/src/gguf*.cpp
138-
# src/ggml-blas/* -> ggml/src/ggml-blas/*
139-
# src/ggml-cann/* -> ggml/src/ggml-cann/*
140-
# src/ggml-cpu/* -> ggml/src/ggml-cpu/*
141-
# src/ggml-cuda/* -> ggml/src/ggml-cuda/*
142-
# src/ggml-hip/* -> ggml/src/ggml-hip/*
143-
# src/ggml-metal/* -> ggml/src/ggml-metal/*
144-
# src/ggml-musa/* -> ggml/src/ggml-musa/*
145-
# src/ggml-opencl/* -> ggml/src/ggml-opencl/*
146-
# src/ggml-rpc/* -> ggml/src/ggml-rpc/*
147-
# src/ggml-sycl/* -> ggml/src/ggml-sycl/*
148-
# src/ggml-vulkan/* -> ggml/src/ggml-vulkan/*
120+
# src/ggml* -> ggml/src/ggml*
149121
#
150122
# include/ggml*.h -> ggml/include/ggml*.h
151123
# include/gguf*.h -> ggml/include/gguf*.h
@@ -163,20 +135,7 @@ if [ -f $SRC_LLAMA/ggml-src.patch ]; then
163135
-e 's/([[:space:]]| [ab]\/)cmake\/common.cmake/\1ggml\/cmake\/common.cmake/g' \
164136
-e 's/([[:space:]]| [ab]\/)cmake\/ggml-config.cmake.in/\1ggml\/cmake\/ggml-config.cmake.in/g' \
165137
-e 's/([[:space:]]| [ab]\/)src\/ggml-cpu\/cmake\/FindSIMD.cmake/\1ggml\/src\/ggml-cpu\/cmake\/FindSIMD.cmake/g' \
166-
-e 's/([[:space:]]| [ab]\/)src\/ggml(.*)\.c/\1ggml\/src\/ggml\2.c/g' \
167-
-e 's/([[:space:]]| [ab]\/)src\/ggml(.*)\.cpp/\1ggml\/src\/ggml\2.cpp/g' \
168-
-e 's/([[:space:]]| [ab]\/)src\/ggml(.*)\.h/\1ggml\/src\/ggml\2.h/g' \
169-
-e 's/([[:space:]]| [ab]\/)src\/gguf(.*)\.cpp/\1ggml\/src\/gguf\2.cpp/g' \
170-
-e 's/([[:space:]]| [ab]\/)src\/ggml-blas\//\1ggml\/src\/ggml-blas\//g' \
171-
-e 's/([[:space:]]| [ab]\/)src\/ggml-cann\//\1ggml\/src\/ggml-cann\//g' \
172-
-e 's/([[:space:]]| [ab]\/)src\/ggml-cpu\//\1ggml\/src\/ggml-cpu\//g' \
173-
-e 's/([[:space:]]| [ab]\/)src\/ggml-cuda\//\1ggml\/src\/ggml-cuda\//g' \
174-
-e 's/([[:space:]]| [ab]\/)src\/ggml-hip\//\1ggml\/src\/ggml-hip\//g' \
175-
-e 's/([[:space:]]| [ab]\/)src\/ggml-metal\//\1ggml\/src\/ggml-metal\//g' \
176-
-e 's/([[:space:]]| [ab]\/)src\/ggml-opencl\//\1ggml\/src\/ggml-opencl\//g' \
177-
-e 's/([[:space:]]| [ab]\/)src\/ggml-rpc\//\1ggml\/src\/ggml-rpc\//g' \
178-
-e 's/([[:space:]]| [ab]\/)src\/ggml-sycl\//\1ggml\/src\/ggml-sycl\//g' \
179-
-e 's/([[:space:]]| [ab]\/)src\/ggml-vulkan\//\1ggml\/src\/ggml-vulkan\//g' \
138+
-e 's/([[:space:]]| [ab]\/)src\/ggml(.*)/\1ggml\/src\/ggml\2/g' \
180139
-e 's/([[:space:]]| [ab]\/)include\/ggml(.*)\.h/\1ggml\/include\/ggml\2.h/g' \
181140
-e 's/([[:space:]]| [ab]\/)include\/gguf(.*)\.h/\1ggml\/include\/gguf\2.h/g' \
182141
-e 's/([[:space:]]| [ab]\/)tests\/(.*)\.cpp/\1tests\/\2.cpp/g' \

scripts/sync-ggml.last

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
b141fc226b68e4af383101c39da90b54ede98850
1+
323951f1bdcdfbd5b5ff3a9a7c3770e63b1a560e

scripts/sync-ggml.sh

Lines changed: 1 addition & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -6,21 +6,7 @@ cp -rpv ../ggml/src/CMakeLists.txt ./ggml/src/CMakeLists.txt
66
cp -rpv ../ggml/cmake/* ./ggml/cmake/
77
cp -rpv ../ggml/src/ggml-cpu/cmake/* ./ggml/src/ggml-cpu/cmake/
88

9-
cp -rpv ../ggml/src/ggml*.c ./ggml/src/
10-
cp -rpv ../ggml/src/ggml*.cpp ./ggml/src/
11-
cp -rpv ../ggml/src/ggml*.h ./ggml/src/
12-
cp -rpv ../ggml/src/gguf*.cpp ./ggml/src/
13-
cp -rpv ../ggml/src/ggml-blas/* ./ggml/src/ggml-blas/
14-
cp -rpv ../ggml/src/ggml-cann/* ./ggml/src/ggml-cann/
15-
cp -rpv ../ggml/src/ggml-cpu/* ./ggml/src/ggml-cpu/
16-
cp -rpv ../ggml/src/ggml-cuda/* ./ggml/src/ggml-cuda/
17-
cp -rpv ../ggml/src/ggml-hip/* ./ggml/src/ggml-hip/
18-
cp -rpv ../ggml/src/ggml-metal/* ./ggml/src/ggml-metal/
19-
cp -rpv ../ggml/src/ggml-musa/* ./ggml/src/ggml-musa/
20-
cp -rpv ../ggml/src/ggml-opencl/* ./ggml/src/ggml-opencl/
21-
cp -rpv ../ggml/src/ggml-rpc/* ./ggml/src/ggml-rpc/
22-
cp -rpv ../ggml/src/ggml-sycl/* ./ggml/src/ggml-sycl/
23-
cp -rpv ../ggml/src/ggml-vulkan/* ./ggml/src/ggml-vulkan/
9+
cp -rpv ../ggml/src/ggml* ./ggml/src/
2410

2511
cp -rpv ../ggml/include/ggml*.h ./ggml/include/
2612
cp -rpv ../ggml/include/gguf*.h ./ggml/include/

0 commit comments

Comments
 (0)