Skip to content

Commit f86216e

Browse files
Merge pull request #321 from janhq/update-dev-from-master-2025-11-12-00-36
Sync master with upstream release b7027
2 parents 07e4b51 + 7d019cf commit f86216e

File tree

29 files changed

+9063
-10109
lines changed

29 files changed

+9063
-10109
lines changed

.devops/nix/package.nix

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@
3434
rocmGpuTargets ? builtins.concatStringsSep ";" rocmPackages.clr.gpuTargets,
3535
enableCurl ? true,
3636
useVulkan ? false,
37+
useRpc ? false,
3738
llamaVersion ? "0.0.0", # Arbitrary version, substituted by the flake
3839

3940
# It's necessary to consistently use backendStdenv when building with CUDA support,
@@ -175,6 +176,7 @@ effectiveStdenv.mkDerivation (finalAttrs: {
175176
(cmakeBool "GGML_METAL" useMetalKit)
176177
(cmakeBool "GGML_VULKAN" useVulkan)
177178
(cmakeBool "GGML_STATIC" enableStatic)
179+
(cmakeBool "GGML_RPC" useRpc)
178180
]
179181
++ optionals useCuda [
180182
(

.github/workflows/build.yml

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1651,3 +1651,50 @@ jobs:
16511651
run: |
16521652
GG_BUILD_KLEIDIAI=1 GG_BUILD_EXTRA_TESTS_0=1 bash ./ci/run.sh ./tmp/results ./tmp/mnt
16531653
1654+
ggml-ci-arm64-graviton4-kleidiai:
1655+
runs-on: ah-ubuntu_22_04-c8g_8x
1656+
1657+
steps:
1658+
- name: Clone
1659+
id: checkout
1660+
uses: actions/checkout@v4
1661+
1662+
- name: Dependencies
1663+
id: depends
1664+
run: |
1665+
set -euxo pipefail
1666+
sudo apt-get update
1667+
sudo DEBIAN_FRONTEND=noninteractive NEEDRESTART_MODE=a \
1668+
apt-get install -y \
1669+
build-essential \
1670+
libcurl4-openssl-dev \
1671+
python3-venv \
1672+
gpg \
1673+
wget \
1674+
time \
1675+
git-lfs
1676+
1677+
git lfs install
1678+
1679+
# install the latest cmake
1680+
sudo install -d /usr/share/keyrings
1681+
wget -O - https://apt.kitware.com/keys/kitware-archive-latest.asc \
1682+
| gpg --dearmor \
1683+
| sudo tee /usr/share/keyrings/kitware-archive-keyring.gpg >/dev/null
1684+
echo 'deb [signed-by=/usr/share/keyrings/kitware-archive-keyring.gpg] https://apt.kitware.com/ubuntu/ jammy main' \
1685+
| sudo tee /etc/apt/sources.list.d/kitware.list
1686+
sudo apt-get update
1687+
sudo apt-get install -y cmake
1688+
1689+
- name: ccache
1690+
uses: ggml-org/ccache-action@v1.2.16
1691+
with:
1692+
key: ggml-ci-arm64-graviton4-kleidiai
1693+
evict-old-files: 1d
1694+
1695+
- name: Test
1696+
id: ggml-ci
1697+
run: |
1698+
GG_BUILD_KLEIDIAI=1 \
1699+
GG_BUILD_EXTRA_TESTS_0=1 \
1700+
bash ./ci/run.sh ./tmp/results ./tmp/mnt

CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -200,6 +200,7 @@ endif()
200200

201201
if (LLAMA_BUILD_COMMON)
202202
add_subdirectory(common)
203+
add_subdirectory(vendor/cpp-httplib)
203204
endif()
204205

205206
if (LLAMA_BUILD_COMMON AND LLAMA_BUILD_TESTS AND NOT CMAKE_JS_VERSION)

ci/run.sh

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -121,7 +121,12 @@ fi
121121
if [ -n "${GG_BUILD_KLEIDIAI}" ]; then
122122
echo ">>===== Enabling KleidiAI support"
123123

124-
CANDIDATES=("armv9-a+dotprod+i8mm" "armv8.6-a+dotprod+i8mm" "armv8.2-a+dotprod")
124+
CANDIDATES=(
125+
"armv9-a+dotprod+i8mm+sve2"
126+
"armv9-a+dotprod+i8mm"
127+
"armv8.6-a+dotprod+i8mm"
128+
"armv8.2-a+dotprod"
129+
)
125130
CPU=""
126131

127132
for cpu in "${CANDIDATES[@]}"; do

common/CMakeLists.txt

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -79,17 +79,21 @@ if (BUILD_SHARED_LIBS)
7979
set_target_properties(${TARGET} PROPERTIES POSITION_INDEPENDENT_CODE ON)
8080
endif()
8181

82+
# TODO: use list(APPEND LLAMA_COMMON_EXTRA_LIBS ...)
8283
set(LLAMA_COMMON_EXTRA_LIBS build_info)
8384

84-
# Use curl to download model url
8585
if (LLAMA_CURL)
86+
# Use curl to download model url
8687
find_package(CURL)
8788
if (NOT CURL_FOUND)
8889
message(FATAL_ERROR "Could NOT find CURL. Hint: to disable this feature, set -DLLAMA_CURL=OFF")
8990
endif()
9091
target_compile_definitions(${TARGET} PUBLIC LLAMA_USE_CURL)
9192
include_directories(${CURL_INCLUDE_DIRS})
9293
set(LLAMA_COMMON_EXTRA_LIBS ${LLAMA_COMMON_EXTRA_LIBS} ${CURL_LIBRARIES})
94+
else()
95+
# otherwise, use cpp-httplib
96+
set(LLAMA_COMMON_EXTRA_LIBS ${LLAMA_COMMON_EXTRA_LIBS} cpp-httplib)
9397
endif()
9498

9599
if (LLAMA_OPENSSL)

convert_hf_to_gguf.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7354,6 +7354,7 @@ def prepare_tensors(self):
73547354
@ModelBase.register("T5ForConditionalGeneration")
73557355
@ModelBase.register("MT5ForConditionalGeneration")
73567356
@ModelBase.register("UMT5ForConditionalGeneration")
7357+
@ModelBase.register("UMT5Model")
73577358
class T5Model(TextModel):
73587359
model_arch = gguf.MODEL_ARCH.T5
73597360

ggml/src/CMakeLists.txt

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -211,6 +211,11 @@ add_library(ggml-base
211211
ggml-quants.h
212212
gguf.cpp)
213213

214+
set_target_properties(ggml-base PROPERTIES
215+
VERSION ${GGML_VERSION}
216+
SOVERSION ${GGML_VERSION_MAJOR}
217+
)
218+
214219
target_include_directories(ggml-base PRIVATE .)
215220
if (GGML_BACKEND_DL)
216221
target_compile_definitions(ggml-base PUBLIC GGML_BACKEND_DL)
@@ -220,6 +225,11 @@ add_library(ggml
220225
ggml-backend-reg.cpp)
221226
add_library(ggml::ggml ALIAS ggml)
222227

228+
set_target_properties(ggml PROPERTIES
229+
VERSION ${GGML_VERSION}
230+
SOVERSION ${GGML_VERSION_MAJOR}
231+
)
232+
223233
if (GGML_BACKEND_DIR)
224234
if (NOT GGML_BACKEND_DL)
225235
message(FATAL_ERROR "GGML_BACKEND_DIR requires GGML_BACKEND_DL")
@@ -259,6 +269,12 @@ function(ggml_add_backend_library backend)
259269
target_compile_definitions(${backend} PUBLIC GGML_BACKEND_SHARED)
260270
endif()
261271

272+
# Set versioning properties for all backend libraries
273+
set_target_properties(${backend} PROPERTIES
274+
VERSION ${GGML_VERSION}
275+
SOVERSION ${GGML_VERSION_MAJOR}
276+
)
277+
262278
if(NOT GGML_AVAILABLE_BACKENDS)
263279
set(GGML_AVAILABLE_BACKENDS "${backend}"
264280
CACHE INTERNAL "List of backends for cmake package")

ggml/src/ggml-cpu/CMakeLists.txt

Lines changed: 15 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -590,6 +590,7 @@ function(ggml_add_cpu_backend_variant_impl tag_name)
590590
${KLEIDIAI_SRC}/kai/ukernels/
591591
${KLEIDIAI_SRC}/kai/ukernels/matmul/
592592
${KLEIDIAI_SRC}/kai/ukernels/matmul/matmul_clamp_f32_qsi8d32p_qsi4c32p/
593+
${KLEIDIAI_SRC}/kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi8cxp/
593594
${KLEIDIAI_SRC}/kai/ukernels/matmul/matmul_clamp_fp32_bf16p_bf16p/
594595
${KLEIDIAI_SRC}/kai/ukernels/matmul/pack/)
595596

@@ -608,23 +609,34 @@ function(ggml_add_cpu_backend_variant_impl tag_name)
608609
${KLEIDIAI_SRC}/kai/ukernels/matmul/pack/kai_lhs_quant_pack_qsi8d32p4x8sb_f32_neon.c
609610
${KLEIDIAI_SRC}/kai/ukernels/matmul/pack/kai_rhs_pack_nxk_qsi4c32ps1s0scalef16_qsu4c32s16s0_neon.c
610611
${KLEIDIAI_SRC}/kai/ukernels/matmul/pack/kai_lhs_quant_pack_qsi8d32p_f32_neon.c
611-
${KLEIDIAI_SRC}/kai/ukernels/matmul/pack/kai_rhs_pack_nxk_qsi4c32pscalef16_qsu4c32s16s0.c)
612+
${KLEIDIAI_SRC}/kai/ukernels/matmul/pack/kai_rhs_pack_nxk_qsi4c32pscalef16_qsu4c32s16s0.c
613+
${KLEIDIAI_SRC}/kai/ukernels/matmul/pack/kai_lhs_quant_pack_qai8dxp_f32.c
614+
${KLEIDIAI_SRC}/kai/ukernels/matmul/pack/kai_rhs_pack_nxk_qsi8cxp_qsi8cx_neon.c)
612615

613616
if (NOT DOTPROD_ENABLED MATCHES -1)
614617
list(APPEND GGML_KLEIDIAI_SOURCES
615618
${KLEIDIAI_SRC}/kai/ukernels/matmul/matmul_clamp_f32_qsi8d32p_qsi4c32p/kai_matmul_clamp_f32_qsi8d32p1x8_qsi4c32p4x8_1x4x32_neon_dotprod.c
616619
${KLEIDIAI_SRC}/kai/ukernels/matmul/matmul_clamp_f32_qsi8d32p_qsi4c32p/kai_matmul_clamp_f32_qsi8d32p1x4_qsi4c32p4x4_1x4_neon_dotprod.c
617-
${KLEIDIAI_SRC}/kai/ukernels/matmul/matmul_clamp_f32_qsi8d32p_qsi4c32p/kai_matmul_clamp_f32_qsi8d32p4x4_qsi4c32p4x4_16x4_neon_dotprod.c)
620+
${KLEIDIAI_SRC}/kai/ukernels/matmul/matmul_clamp_f32_qsi8d32p_qsi4c32p/kai_matmul_clamp_f32_qsi8d32p4x4_qsi4c32p4x4_16x4_neon_dotprod.c
621+
${KLEIDIAI_SRC}/kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi8cxp/kai_matmul_clamp_f32_qai8dxp4x4_qsi8cxp4x4_16x4_neon_dotprod.c
622+
${KLEIDIAI_SRC}/kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi8cxp/kai_matmul_clamp_f32_qai8dxp1x4_qsi8cxp4x4_1x4_neon_dotprod.c
623+
${KLEIDIAI_SRC}/kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi8cxp/kai_matmul_clamp_f32_qai8dxp1x8_qsi8cxp4x8_1x4_neon_dotprod.c)
618624
endif()
619625

620626
if (NOT I8MM_ENABLED MATCHES -1)
621-
list(APPEND GGML_KLEIDIAI_SOURCES ${KLEIDIAI_SRC}/kai/ukernels/matmul/matmul_clamp_f32_qsi8d32p_qsi4c32p/kai_matmul_clamp_f32_qsi8d32p4x8_qsi4c32p4x8_16x4_neon_i8mm.c)
627+
list(APPEND GGML_KLEIDIAI_SOURCES
628+
${KLEIDIAI_SRC}/kai/ukernels/matmul/matmul_clamp_f32_qsi8d32p_qsi4c32p/kai_matmul_clamp_f32_qsi8d32p4x8_qsi4c32p4x8_16x4_neon_i8mm.c
629+
${KLEIDIAI_SRC}/kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi8cxp/kai_matmul_clamp_f32_qai8dxp4x8_qsi8cxp4x8_16x4_neon_i8mm.c)
622630
endif()
623631

624632
if (NOT SME_ENABLED MATCHES -1)
625633
list(APPEND GGML_KLEIDIAI_SOURCES
626634
${KLEIDIAI_SRC}/kai/ukernels/matmul/matmul_clamp_f32_qsi8d32p_qsi4c32p/kai_matmul_clamp_f32_qsi8d32p1vlx4_qsi4c32p4vlx4_1vlx4vl_sme2_mopa.c
627635
${KLEIDIAI_SRC}/kai/ukernels/matmul/matmul_clamp_f32_qsi8d32p_qsi4c32p/kai_matmul_clamp_f32_qsi8d32p1x4_qsi4c32p4vlx4_1x4vl_sme2_sdot.c
636+
${KLEIDIAI_SRC}/kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi8cxp/kai_matmul_clamp_f32_qai8dxp1vlx4_qsi8cxp4vlx4_1vlx4vl_sme2_mopa.c
637+
${KLEIDIAI_SRC}/kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi8cxp/kai_matmul_clamp_f32_qai8dxp1vlx4_qsi8cxp4vlx4_1vlx4vl_sme2_mopa_asm.S
638+
${KLEIDIAI_SRC}/kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi8cxp/kai_matmul_clamp_f32_qai8dxp1x4_qsi8cxp4vlx4_1x4vl_sme2_dot.c
639+
${KLEIDIAI_SRC}/kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi8cxp/kai_matmul_clamp_f32_qai8dxp1x4_qsi8cxp4vlx4_1x4vl_sme2_dot_asm.S
628640
${KLEIDIAI_SRC}/kai/ukernels/matmul/matmul_clamp_fp32_bf16p_bf16p/kai_matmul_clamp_f32_bf16p2vlx2_bf16p2vlx2_2vlx2vl_sme2_mopa.c
629641
${KLEIDIAI_SRC}/kai/ukernels/matmul/matmul_clamp_fp32_bf16p_bf16p/kai_matmul_clamp_f32_bf16p2vlx2_bf16p2vlx2_2vlx2vl_sme2_mopa_asm.S
630642
${KLEIDIAI_SRC}/kai/ukernels/matmul/pack/kai_lhs_pack_bf16p2vlx2_f32_sme.c

ggml/src/ggml-cpu/ggml-cpu.c

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3274,6 +3274,13 @@ void ggml_cpu_fp16_to_fp32(const ggml_fp16_t * x, float * y, int64_t n) {
32743274
__m128 y_vec = _mm_cvtph_ps(x_vec);
32753275
_mm_storeu_ps(y + i, y_vec);
32763276
}
3277+
#elif defined(__riscv_zvfh)
3278+
for (int vl; i < n; i += vl) {
3279+
vl = __riscv_vsetvl_e16m1(n - i);
3280+
vfloat16m1_t vx = __riscv_vle16_v_f16m1((_Float16 *)&x[i], vl);
3281+
vfloat32m2_t vy = __riscv_vfwcvt_f_f_v_f32m2(vx, vl);
3282+
__riscv_vse32_v_f32m2(&y[i], vy, vl);
3283+
}
32773284
#endif
32783285

32793286
for (; i < n; ++i) {

0 commit comments

Comments
 (0)