Skip to content

Commit 397a75b

Browse files
Merge pull request #322 from janhq/update-dev-from-master-2025-11-13-00-35
Sync master with upstream release b7039
2 parents f86216e + 374fe09 commit 397a75b

32 files changed

+3689
-1982
lines changed

.github/workflows/check-vendor.yml

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
name: Check vendor
2+
3+
on:
4+
workflow_dispatch: # allows manual triggering
5+
push:
6+
branches:
7+
- master
8+
paths: [
9+
'vendor/**',
10+
'scripts/sync_vendor.py'
11+
]
12+
13+
pull_request:
14+
types: [opened, synchronize, reopened]
15+
paths: [
16+
'vendor/**',
17+
'scripts/sync_vendor.py'
18+
]
19+
20+
jobs:
21+
check-vendor:
22+
runs-on: ubuntu-latest
23+
24+
steps:
25+
- name: Checkout
26+
uses: actions/checkout@v4
27+
with:
28+
fetch-depth: 0
29+
30+
- name: Setup Python
31+
uses: actions/setup-python@v4
32+
with:
33+
python-version: '3.x'
34+
35+
- name: Run vendor sync
36+
run: |
37+
set -euo pipefail
38+
python3 scripts/sync_vendor.py
39+
40+
- name: Check for changes
41+
run: |
42+
set -euo pipefail
43+
# detect modified or untracked files
44+
changed=$(git status --porcelain --untracked-files=all || true)
45+
if [ -n "$changed" ]; then
46+
echo "Vendor sync modified files:"
47+
echo "$changed" | awk '{ print $2 }' | sed '/^$/d'
48+
echo "Failing because vendor files mismatch. Please update scripts/sync_vendor.py"
49+
exit 1
50+
else
51+
echo "Vendor files are up-to-date."
52+
fi

.github/workflows/server.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -209,7 +209,7 @@ jobs:
209209
working-directory: tools/server/webui
210210

211211
- name: Run UI tests
212-
run: npm run test:ui
212+
run: npm run test:ui -- --testTimeout=60000
213213
working-directory: tools/server/webui
214214

215215
- name: Run E2E tests

CMakeLists.txt

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,7 @@ option(LLAMA_TOOLS_INSTALL "llama: install tools" ${LLAMA_TOOLS_INSTALL_
9292

9393
# 3rd party libs
9494
option(LLAMA_CURL "llama: use libcurl to download model from an URL" ON)
95+
option(LLAMA_HTTPLIB "llama: if libcurl is disabled, use httplib to download model from an URL" ON)
9596
option(LLAMA_OPENSSL "llama: use openssl to support HTTPS" OFF)
9697
option(LLAMA_LLGUIDANCE "llama-common: include LLGuidance library for structured output in common utils" OFF)
9798

@@ -200,7 +201,9 @@ endif()
200201

201202
if (LLAMA_BUILD_COMMON)
202203
add_subdirectory(common)
203-
add_subdirectory(vendor/cpp-httplib)
204+
if (LLAMA_HTTPLIB)
205+
add_subdirectory(vendor/cpp-httplib)
206+
endif()
204207
endif()
205208

206209
if (LLAMA_BUILD_COMMON AND LLAMA_BUILD_TESTS AND NOT CMAKE_JS_VERSION)

build-xcframework.sh

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -454,6 +454,8 @@ cmake -B build-visionos -G Xcode \
454454
-DCMAKE_C_FLAGS="-D_XOPEN_SOURCE=700 ${COMMON_C_FLAGS}" \
455455
-DCMAKE_CXX_FLAGS="-D_XOPEN_SOURCE=700 ${COMMON_CXX_FLAGS}" \
456456
-DLLAMA_CURL=OFF \
457+
-DLLAMA_HTTPLIB=OFF \
458+
-DLLAMA_BUILD_SERVER=OFF \
457459
-S .
458460
cmake --build build-visionos --config Release -- -quiet
459461

@@ -468,6 +470,8 @@ cmake -B build-visionos-sim -G Xcode \
468470
-DCMAKE_C_FLAGS="-D_XOPEN_SOURCE=700 ${COMMON_C_FLAGS}" \
469471
-DCMAKE_CXX_FLAGS="-D_XOPEN_SOURCE=700 ${COMMON_CXX_FLAGS}" \
470472
-DLLAMA_CURL=OFF \
473+
-DLLAMA_HTTPLIB=OFF \
474+
-DLLAMA_BUILD_SERVER=OFF \
471475
-S .
472476
cmake --build build-visionos-sim --config Release -- -quiet
473477

common/CMakeLists.txt

Lines changed: 2 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -91,47 +91,12 @@ if (LLAMA_CURL)
9191
target_compile_definitions(${TARGET} PUBLIC LLAMA_USE_CURL)
9292
include_directories(${CURL_INCLUDE_DIRS})
9393
set(LLAMA_COMMON_EXTRA_LIBS ${LLAMA_COMMON_EXTRA_LIBS} ${CURL_LIBRARIES})
94-
else()
94+
elseif (LLAMA_HTTPLIB)
9595
# otherwise, use cpp-httplib
96+
target_compile_definitions(${TARGET} PUBLIC LLAMA_USE_HTTPLIB)
9697
set(LLAMA_COMMON_EXTRA_LIBS ${LLAMA_COMMON_EXTRA_LIBS} cpp-httplib)
9798
endif()
9899

99-
if (LLAMA_OPENSSL)
100-
find_package(OpenSSL)
101-
if (OpenSSL_FOUND)
102-
include(CheckCSourceCompiles)
103-
set(SAVED_CMAKE_REQUIRED_INCLUDES ${CMAKE_REQUIRED_INCLUDES})
104-
set(CMAKE_REQUIRED_INCLUDES ${OPENSSL_INCLUDE_DIR})
105-
check_c_source_compiles("
106-
#include <openssl/opensslv.h>
107-
#if defined(OPENSSL_IS_BORINGSSL) || defined(LIBRESSL_VERSION_NUMBER)
108-
# if OPENSSL_VERSION_NUMBER < 0x1010107f
109-
# error bad version
110-
# endif
111-
#else
112-
# if OPENSSL_VERSION_NUMBER < 0x30000000L
113-
# error bad version
114-
# endif
115-
#endif
116-
int main() { return 0; }
117-
" OPENSSL_VERSION_SUPPORTED)
118-
set(CMAKE_REQUIRED_INCLUDES ${SAVED_CMAKE_REQUIRED_INCLUDES})
119-
if (OPENSSL_VERSION_SUPPORTED)
120-
message(STATUS "OpenSSL found: ${OPENSSL_VERSION}")
121-
target_compile_definitions(${TARGET} PUBLIC CPPHTTPLIB_OPENSSL_SUPPORT)
122-
target_link_libraries(${TARGET} PUBLIC OpenSSL::SSL OpenSSL::Crypto)
123-
if (APPLE AND CMAKE_SYSTEM_NAME STREQUAL "Darwin")
124-
target_compile_definitions(${TARGET} PUBLIC CPPHTTPLIB_USE_CERTS_FROM_MACOSX_KEYCHAIN)
125-
find_library(CORE_FOUNDATION_FRAMEWORK CoreFoundation REQUIRED)
126-
find_library(SECURITY_FRAMEWORK Security REQUIRED)
127-
target_link_libraries(${TARGET} PUBLIC ${CORE_FOUNDATION_FRAMEWORK} ${SECURITY_FRAMEWORK})
128-
endif()
129-
endif()
130-
else()
131-
message(STATUS "OpenSSL not found, SSL support disabled")
132-
endif()
133-
endif()
134-
135100
if (LLAMA_LLGUIDANCE)
136101
include(ExternalProject)
137102
set(LLGUIDANCE_SRC ${CMAKE_BINARY_DIR}/llguidance/source)

common/download.cpp

Lines changed: 47 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@
2020
#if defined(LLAMA_USE_CURL)
2121
#include <curl/curl.h>
2222
#include <curl/easy.h>
23-
#else
23+
#elif defined(LLAMA_USE_HTTPLIB)
2424
#include "http.h"
2525
#endif
2626

@@ -467,7 +467,7 @@ std::pair<long, std::vector<char>> common_remote_get_content(const std::string &
467467
return { res_code, std::move(res_buffer) };
468468
}
469469

470-
#else
470+
#elif defined(LLAMA_USE_HTTPLIB)
471471

472472
static bool is_output_a_tty() {
473473
#if defined(_WIN32)
@@ -713,6 +713,8 @@ std::pair<long, std::vector<char>> common_remote_get_content(const std::string
713713

714714
#endif // LLAMA_USE_CURL
715715

716+
#if defined(LLAMA_USE_CURL) || defined(LLAMA_USE_HTTPLIB)
717+
716718
static bool common_download_file_single(const std::string & url,
717719
const std::string & path,
718720
const std::string & bearer_token,
@@ -907,33 +909,6 @@ common_hf_file_res common_get_hf_file(const std::string & hf_repo_with_tag, cons
907909
return { hf_repo, ggufFile, mmprojFile };
908910
}
909911

910-
std::vector<common_cached_model_info> common_list_cached_models() {
911-
std::vector<common_cached_model_info> models;
912-
const std::string cache_dir = fs_get_cache_directory();
913-
const std::vector<common_file_info> files = fs_list_files(cache_dir);
914-
for (const auto & file : files) {
915-
if (string_starts_with(file.name, "manifest=") && string_ends_with(file.name, ".json")) {
916-
common_cached_model_info model_info;
917-
model_info.manifest_path = file.path;
918-
std::string fname = file.name;
919-
string_replace_all(fname, ".json", ""); // remove extension
920-
auto parts = string_split<std::string>(fname, '=');
921-
if (parts.size() == 4) {
922-
// expect format: manifest=<user>=<model>=<tag>=<other>
923-
model_info.user = parts[1];
924-
model_info.model = parts[2];
925-
model_info.tag = parts[3];
926-
} else {
927-
// invalid format
928-
continue;
929-
}
930-
model_info.size = 0; // TODO: get GGUF size, not manifest size
931-
models.push_back(model_info);
932-
}
933-
}
934-
return models;
935-
}
936-
937912
//
938913
// Docker registry functions
939914
//
@@ -1052,3 +1027,46 @@ std::string common_docker_resolve_model(const std::string & docker) {
10521027
throw;
10531028
}
10541029
}
1030+
1031+
#else
1032+
1033+
common_hf_file_res common_get_hf_file(const std::string &, const std::string &, bool) {
1034+
throw std::runtime_error("download functionality is not enabled in this build");
1035+
}
1036+
1037+
bool common_download_model(const common_params_model &, const std::string &, bool) {
1038+
throw std::runtime_error("download functionality is not enabled in this build");
1039+
}
1040+
1041+
std::string common_docker_resolve_model(const std::string &) {
1042+
throw std::runtime_error("download functionality is not enabled in this build");
1043+
}
1044+
1045+
#endif // LLAMA_USE_CURL || LLAMA_USE_HTTPLIB
1046+
1047+
std::vector<common_cached_model_info> common_list_cached_models() {
1048+
std::vector<common_cached_model_info> models;
1049+
const std::string cache_dir = fs_get_cache_directory();
1050+
const std::vector<common_file_info> files = fs_list_files(cache_dir);
1051+
for (const auto & file : files) {
1052+
if (string_starts_with(file.name, "manifest=") && string_ends_with(file.name, ".json")) {
1053+
common_cached_model_info model_info;
1054+
model_info.manifest_path = file.path;
1055+
std::string fname = file.name;
1056+
string_replace_all(fname, ".json", ""); // remove extension
1057+
auto parts = string_split<std::string>(fname, '=');
1058+
if (parts.size() == 4) {
1059+
// expect format: manifest=<user>=<model>=<tag>=<other>
1060+
model_info.user = parts[1];
1061+
model_info.model = parts[2];
1062+
model_info.tag = parts[3];
1063+
} else {
1064+
// invalid format
1065+
continue;
1066+
}
1067+
model_info.size = 0; // TODO: get GGUF size, not manifest size
1068+
models.push_back(model_info);
1069+
}
1070+
}
1071+
return models;
1072+
}

docs/backend/CANN.md

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -313,7 +313,12 @@ Converting the matmul weight format from ND to NZ to improve performance. Enable
313313

314314
### GGML_CANN_ACL_GRAPH
315315

316-
Operators are executed using ACL graph execution, rather than in op-by-op (eager) mode. Enabled by default.
316+
Operators are executed using ACL graph execution, rather than in op-by-op (eager) mode. Enabled by default. This option is only effective if `USE_ACL_GRAPH` was enabled at compilation time. To enable it, recompile using:
317+
318+
```sh
319+
cmake -B build -DGGML_CANN=on -DCMAKE_BUILD_TYPE=release -DUSE_ACL_GRAPH=ON
320+
cmake --build build --config release
321+
```
317322

318323
### GGML_CANN_GRAPH_CACHE_CAPACITY
319324

ggml/src/ggml-cann/aclnn_ops.cpp

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -448,6 +448,35 @@ void ggml_cann_norm(ggml_backend_cann_context & ctx, ggml_tensor * dst) {
448448
ggml_cann_release_resources(ctx, norm, acl_src, acl_dst);
449449
}
450450

451+
void ggml_cann_l2_norm(ggml_backend_cann_context & ctx, ggml_tensor * dst) {
452+
ggml_tensor * src = dst->src[0];
453+
454+
aclTensor * acl_src = ggml_cann_create_tensor(src);
455+
aclTensor * acl_dst = ggml_cann_create_tensor(dst);
456+
457+
size_t type_size = ggml_type_size(src->type);
458+
int64_t n_bytes = src->ne[3]* src->ne[2]* src->ne[1]* type_size;
459+
ggml_cann_pool_alloc temp_buffer_allocator(ctx.pool(), n_bytes);
460+
void * buffer = temp_buffer_allocator.get();
461+
462+
int64_t div_ne[] = {1, src->ne[1], src->ne[2], src->ne[3]};
463+
size_t div_nb[GGML_MAX_DIMS];
464+
div_nb[0] = sizeof(float);
465+
for (int i = 1; i < GGML_MAX_DIMS; ++i) {
466+
div_nb[i] = div_nb[i - 1] * div_ne[i - 1];
467+
}
468+
aclTensor * acl_div = ggml_cann_create_tensor(buffer, ACL_FLOAT, type_size, div_ne, div_nb, GGML_MAX_DIMS);
469+
470+
std::vector<int64_t> norm_dims = { 3 };
471+
aclIntArray * dims_array = aclCreateIntArray(norm_dims.data(), norm_dims.size());
472+
473+
float p_value = 2.0f;
474+
aclScalar * p_scalar = aclCreateScalar(&p_value, aclDataType::ACL_FLOAT);
475+
GGML_CANN_CALL_ACLNN_OP(ctx, Norm, acl_src, p_scalar, dims_array, true, acl_div);
476+
GGML_CANN_CALL_ACLNN_OP(ctx, Div, acl_src, acl_div, acl_dst);
477+
ggml_cann_release_resources(ctx, dims_array, p_scalar, acl_src, acl_dst, acl_div);
478+
}
479+
451480
void ggml_cann_group_norm(ggml_backend_cann_context & ctx, ggml_tensor * dst) {
452481
ggml_tensor * src = dst->src[0];
453482

ggml/src/ggml-cann/aclnn_ops.h

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@
4646
#include <aclnnop/aclnn_cos.h>
4747
#include <aclnnop/aclnn_log.h>
4848
#include <aclnnop/aclnn_sign.h>
49+
#include <aclnnop/aclnn_norm.h>
4950
#include "acl_tensor.h"
5051
#include "common.h"
5152

@@ -187,6 +188,29 @@ void ggml_cann_argsort(ggml_backend_cann_context & ctx, ggml_tensor * dst);
187188
*/
188189
void ggml_cann_norm(ggml_backend_cann_context & ctx, ggml_tensor * dst);
189190

191+
/**
192+
* @brief Computes the L2 Normalization for a ggml tensor using the CANN
193+
* backend.
194+
*
195+
* @details This function applies the L2 Normalization operation on the
196+
* input tensor `src` and stores the result in the destination tensor
197+
* `dst`. L2 Normalization scales the input tensor such that the
198+
* L2 norm along the specified dimension equals 1. This operation
199+
* is commonly used in neural networks for feature normalization
200+
* and vector scaling.
201+
* The operation is defined as:
202+
* \f[
203+
* \text{out} = \frac{x}{\sqrt{\sum{x^2}}}
204+
* \f]
205+
* The normalization is performed along the last dimension by default.
206+
*
207+
* @param ctx The CANN context used for operations.
208+
* @param dst The destination tensor where the normalized values will be stored.
209+
* @attention The normalization is performed along the last dimension of the
210+
* input tensor by default.
211+
*/
212+
void ggml_cann_l2_norm(ggml_backend_cann_context & ctx, ggml_tensor * dst);
213+
190214
/**
191215
* @brief Computes the Group Normalization for a ggml tensor using the CANN
192216
* backend.

ggml/src/ggml-cann/ggml-cann.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1777,6 +1777,9 @@ static bool ggml_cann_compute_forward(ggml_backend_cann_context & ctx, struct gg
17771777
case GGML_OP_GROUP_NORM:
17781778
ggml_cann_group_norm(ctx, dst);
17791779
break;
1780+
case GGML_OP_L2_NORM:
1781+
ggml_cann_l2_norm(ctx, dst);
1782+
break;
17801783
case GGML_OP_CONCAT:
17811784
ggml_cann_concat(ctx, dst);
17821785
break;
@@ -2515,6 +2518,7 @@ static bool ggml_backend_cann_supports_op(ggml_backend_dev_t dev, const ggml_ten
25152518
// value of paddingW should be at most half of kernelW
25162519
return (p0 <= (k0 / 2)) && (p1 <= (k1 / 2));
25172520
}
2521+
case GGML_OP_L2_NORM:
25182522
case GGML_OP_DUP:
25192523
case GGML_OP_SUM:
25202524
case GGML_OP_IM2COL:

0 commit comments

Comments
 (0)