Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
52 changes: 52 additions & 0 deletions .github/workflows/check-vendor.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
name: Check vendor

on:
workflow_dispatch: # allows manual triggering
push:
branches:
- master
paths: [
'vendor/**',
'scripts/sync_vendor.py'
]

pull_request:
types: [opened, synchronize, reopened]
paths: [
'vendor/**',
'scripts/sync_vendor.py'
]

jobs:
check-vendor:
runs-on: ubuntu-latest

steps:
- name: Checkout
uses: actions/checkout@v4
with:
fetch-depth: 0

- name: Setup Python
uses: actions/setup-python@v4
with:
python-version: '3.x'

- name: Run vendor sync
run: |
set -euo pipefail
python3 scripts/sync_vendor.py

- name: Check for changes
run: |
set -euo pipefail
# detect modified or untracked files
changed=$(git status --porcelain --untracked-files=all || true)
if [ -n "$changed" ]; then
echo "Vendor sync modified files:"
echo "$changed" | awk '{ print $2 }' | sed '/^$/d'
echo "Failing because vendor files mismatch. Please update scripts/sync_vendor.py"
exit 1
else
echo "Vendor files are up-to-date."
fi
2 changes: 1 addition & 1 deletion .github/workflows/server.yml
Original file line number Diff line number Diff line change
Expand Up @@ -209,7 +209,7 @@ jobs:
working-directory: tools/server/webui

- name: Run UI tests
run: npm run test:ui
run: npm run test:ui -- --testTimeout=60000
working-directory: tools/server/webui

- name: Run E2E tests
Expand Down
5 changes: 4 additions & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,7 @@ option(LLAMA_TOOLS_INSTALL "llama: install tools" ${LLAMA_TOOLS_INSTALL_

# 3rd party libs
option(LLAMA_CURL "llama: use libcurl to download model from an URL" ON)
option(LLAMA_HTTPLIB "llama: if libcurl is disabled, use httplib to download model from an URL" ON)
option(LLAMA_OPENSSL "llama: use openssl to support HTTPS" OFF)
option(LLAMA_LLGUIDANCE "llama-common: include LLGuidance library for structured output in common utils" OFF)

Expand Down Expand Up @@ -200,7 +201,9 @@ endif()

if (LLAMA_BUILD_COMMON)
add_subdirectory(common)
add_subdirectory(vendor/cpp-httplib)
if (LLAMA_HTTPLIB)
add_subdirectory(vendor/cpp-httplib)
endif()
endif()

if (LLAMA_BUILD_COMMON AND LLAMA_BUILD_TESTS AND NOT CMAKE_JS_VERSION)
Expand Down
4 changes: 4 additions & 0 deletions build-xcframework.sh
Original file line number Diff line number Diff line change
Expand Up @@ -454,6 +454,8 @@ cmake -B build-visionos -G Xcode \
-DCMAKE_C_FLAGS="-D_XOPEN_SOURCE=700 ${COMMON_C_FLAGS}" \
-DCMAKE_CXX_FLAGS="-D_XOPEN_SOURCE=700 ${COMMON_CXX_FLAGS}" \
-DLLAMA_CURL=OFF \
-DLLAMA_HTTPLIB=OFF \
-DLLAMA_BUILD_SERVER=OFF \
-S .
cmake --build build-visionos --config Release -- -quiet

Expand All @@ -468,6 +470,8 @@ cmake -B build-visionos-sim -G Xcode \
-DCMAKE_C_FLAGS="-D_XOPEN_SOURCE=700 ${COMMON_C_FLAGS}" \
-DCMAKE_CXX_FLAGS="-D_XOPEN_SOURCE=700 ${COMMON_CXX_FLAGS}" \
-DLLAMA_CURL=OFF \
-DLLAMA_HTTPLIB=OFF \
-DLLAMA_BUILD_SERVER=OFF \
-S .
cmake --build build-visionos-sim --config Release -- -quiet

Expand Down
39 changes: 2 additions & 37 deletions common/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -91,47 +91,12 @@ if (LLAMA_CURL)
target_compile_definitions(${TARGET} PUBLIC LLAMA_USE_CURL)
include_directories(${CURL_INCLUDE_DIRS})
set(LLAMA_COMMON_EXTRA_LIBS ${LLAMA_COMMON_EXTRA_LIBS} ${CURL_LIBRARIES})
else()
elseif (LLAMA_HTTPLIB)
# otherwise, use cpp-httplib
target_compile_definitions(${TARGET} PUBLIC LLAMA_USE_HTTPLIB)
set(LLAMA_COMMON_EXTRA_LIBS ${LLAMA_COMMON_EXTRA_LIBS} cpp-httplib)
endif()

if (LLAMA_OPENSSL)
find_package(OpenSSL)
if (OpenSSL_FOUND)
include(CheckCSourceCompiles)
set(SAVED_CMAKE_REQUIRED_INCLUDES ${CMAKE_REQUIRED_INCLUDES})
set(CMAKE_REQUIRED_INCLUDES ${OPENSSL_INCLUDE_DIR})
check_c_source_compiles("
#include <openssl/opensslv.h>
#if defined(OPENSSL_IS_BORINGSSL) || defined(LIBRESSL_VERSION_NUMBER)
# if OPENSSL_VERSION_NUMBER < 0x1010107f
# error bad version
# endif
#else
# if OPENSSL_VERSION_NUMBER < 0x30000000L
# error bad version
# endif
#endif
int main() { return 0; }
" OPENSSL_VERSION_SUPPORTED)
set(CMAKE_REQUIRED_INCLUDES ${SAVED_CMAKE_REQUIRED_INCLUDES})
if (OPENSSL_VERSION_SUPPORTED)
message(STATUS "OpenSSL found: ${OPENSSL_VERSION}")
target_compile_definitions(${TARGET} PUBLIC CPPHTTPLIB_OPENSSL_SUPPORT)
target_link_libraries(${TARGET} PUBLIC OpenSSL::SSL OpenSSL::Crypto)
if (APPLE AND CMAKE_SYSTEM_NAME STREQUAL "Darwin")
target_compile_definitions(${TARGET} PUBLIC CPPHTTPLIB_USE_CERTS_FROM_MACOSX_KEYCHAIN)
find_library(CORE_FOUNDATION_FRAMEWORK CoreFoundation REQUIRED)
find_library(SECURITY_FRAMEWORK Security REQUIRED)
target_link_libraries(${TARGET} PUBLIC ${CORE_FOUNDATION_FRAMEWORK} ${SECURITY_FRAMEWORK})
endif()
endif()
else()
message(STATUS "OpenSSL not found, SSL support disabled")
endif()
endif()

if (LLAMA_LLGUIDANCE)
include(ExternalProject)
set(LLGUIDANCE_SRC ${CMAKE_BINARY_DIR}/llguidance/source)
Expand Down
76 changes: 47 additions & 29 deletions common/download.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
#if defined(LLAMA_USE_CURL)
#include <curl/curl.h>
#include <curl/easy.h>
#else
#elif defined(LLAMA_USE_HTTPLIB)
#include "http.h"
#endif

Expand Down Expand Up @@ -467,7 +467,7 @@ std::pair<long, std::vector<char>> common_remote_get_content(const std::string &
return { res_code, std::move(res_buffer) };
}

#else
#elif defined(LLAMA_USE_HTTPLIB)

static bool is_output_a_tty() {
#if defined(_WIN32)
Expand Down Expand Up @@ -713,6 +713,8 @@ std::pair<long, std::vector<char>> common_remote_get_content(const std::string

#endif // LLAMA_USE_CURL

#if defined(LLAMA_USE_CURL) || defined(LLAMA_USE_HTTPLIB)

static bool common_download_file_single(const std::string & url,
const std::string & path,
const std::string & bearer_token,
Expand Down Expand Up @@ -907,33 +909,6 @@ common_hf_file_res common_get_hf_file(const std::string & hf_repo_with_tag, cons
return { hf_repo, ggufFile, mmprojFile };
}

std::vector<common_cached_model_info> common_list_cached_models() {
std::vector<common_cached_model_info> models;
const std::string cache_dir = fs_get_cache_directory();
const std::vector<common_file_info> files = fs_list_files(cache_dir);
for (const auto & file : files) {
if (string_starts_with(file.name, "manifest=") && string_ends_with(file.name, ".json")) {
common_cached_model_info model_info;
model_info.manifest_path = file.path;
std::string fname = file.name;
string_replace_all(fname, ".json", ""); // remove extension
auto parts = string_split<std::string>(fname, '=');
if (parts.size() == 4) {
// expect format: manifest=<user>=<model>=<tag>=<other>
model_info.user = parts[1];
model_info.model = parts[2];
model_info.tag = parts[3];
} else {
// invalid format
continue;
}
model_info.size = 0; // TODO: get GGUF size, not manifest size
models.push_back(model_info);
}
}
return models;
}

//
// Docker registry functions
//
Expand Down Expand Up @@ -1052,3 +1027,46 @@ std::string common_docker_resolve_model(const std::string & docker) {
throw;
}
}

#else

common_hf_file_res common_get_hf_file(const std::string &, const std::string &, bool) {
throw std::runtime_error("download functionality is not enabled in this build");
}

bool common_download_model(const common_params_model &, const std::string &, bool) {
throw std::runtime_error("download functionality is not enabled in this build");
}

std::string common_docker_resolve_model(const std::string &) {
throw std::runtime_error("download functionality is not enabled in this build");
}

#endif // LLAMA_USE_CURL || LLAMA_USE_HTTPLIB

std::vector<common_cached_model_info> common_list_cached_models() {
std::vector<common_cached_model_info> models;
const std::string cache_dir = fs_get_cache_directory();
const std::vector<common_file_info> files = fs_list_files(cache_dir);
for (const auto & file : files) {
if (string_starts_with(file.name, "manifest=") && string_ends_with(file.name, ".json")) {
common_cached_model_info model_info;
model_info.manifest_path = file.path;
std::string fname = file.name;
string_replace_all(fname, ".json", ""); // remove extension
auto parts = string_split<std::string>(fname, '=');
if (parts.size() == 4) {
// expect format: manifest=<user>=<model>=<tag>=<other>
model_info.user = parts[1];
model_info.model = parts[2];
model_info.tag = parts[3];
} else {
// invalid format
continue;
}
model_info.size = 0; // TODO: get GGUF size, not manifest size
models.push_back(model_info);
}
}
return models;
}
7 changes: 6 additions & 1 deletion docs/backend/CANN.md
Original file line number Diff line number Diff line change
Expand Up @@ -313,7 +313,12 @@ Converting the matmul weight format from ND to NZ to improve performance. Enable

### GGML_CANN_ACL_GRAPH

Operators are executed using ACL graph execution, rather than in op-by-op (eager) mode. Enabled by default.
Operators are executed using ACL graph execution, rather than in op-by-op (eager) mode. Enabled by default. This option is only effective if `USE_ACL_GRAPH` was enabled at compilation time. To enable it, recompile using:

```sh
cmake -B build -DGGML_CANN=on -DCMAKE_BUILD_TYPE=release -DUSE_ACL_GRAPH=ON
cmake --build build --config release
```

### GGML_CANN_GRAPH_CACHE_CAPACITY

Expand Down
29 changes: 29 additions & 0 deletions ggml/src/ggml-cann/aclnn_ops.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -448,6 +448,35 @@ void ggml_cann_norm(ggml_backend_cann_context & ctx, ggml_tensor * dst) {
ggml_cann_release_resources(ctx, norm, acl_src, acl_dst);
}

void ggml_cann_l2_norm(ggml_backend_cann_context & ctx, ggml_tensor * dst) {
ggml_tensor * src = dst->src[0];

aclTensor * acl_src = ggml_cann_create_tensor(src);
aclTensor * acl_dst = ggml_cann_create_tensor(dst);

size_t type_size = ggml_type_size(src->type);
int64_t n_bytes = src->ne[3]* src->ne[2]* src->ne[1]* type_size;
ggml_cann_pool_alloc temp_buffer_allocator(ctx.pool(), n_bytes);
void * buffer = temp_buffer_allocator.get();

int64_t div_ne[] = {1, src->ne[1], src->ne[2], src->ne[3]};
size_t div_nb[GGML_MAX_DIMS];
div_nb[0] = sizeof(float);
for (int i = 1; i < GGML_MAX_DIMS; ++i) {
div_nb[i] = div_nb[i - 1] * div_ne[i - 1];
}
aclTensor * acl_div = ggml_cann_create_tensor(buffer, ACL_FLOAT, type_size, div_ne, div_nb, GGML_MAX_DIMS);

std::vector<int64_t> norm_dims = { 3 };
aclIntArray * dims_array = aclCreateIntArray(norm_dims.data(), norm_dims.size());

float p_value = 2.0f;
aclScalar * p_scalar = aclCreateScalar(&p_value, aclDataType::ACL_FLOAT);
GGML_CANN_CALL_ACLNN_OP(ctx, Norm, acl_src, p_scalar, dims_array, true, acl_div);
GGML_CANN_CALL_ACLNN_OP(ctx, Div, acl_src, acl_div, acl_dst);
ggml_cann_release_resources(ctx, dims_array, p_scalar, acl_src, acl_dst, acl_div);
}

void ggml_cann_group_norm(ggml_backend_cann_context & ctx, ggml_tensor * dst) {
ggml_tensor * src = dst->src[0];

Expand Down
24 changes: 24 additions & 0 deletions ggml/src/ggml-cann/aclnn_ops.h
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@
#include <aclnnop/aclnn_cos.h>
#include <aclnnop/aclnn_log.h>
#include <aclnnop/aclnn_sign.h>
#include <aclnnop/aclnn_norm.h>
#include "acl_tensor.h"
#include "common.h"

Expand Down Expand Up @@ -187,6 +188,29 @@ void ggml_cann_argsort(ggml_backend_cann_context & ctx, ggml_tensor * dst);
*/
void ggml_cann_norm(ggml_backend_cann_context & ctx, ggml_tensor * dst);

/**
* @brief Computes the L2 Normalization for a ggml tensor using the CANN
* backend.
*
* @details This function applies the L2 Normalization operation on the
* input tensor `src` and stores the result in the destination tensor
* `dst`. L2 Normalization scales the input tensor such that the
* L2 norm along the specified dimension equals 1. This operation
* is commonly used in neural networks for feature normalization
* and vector scaling.
* The operation is defined as:
* \f[
* \text{out} = \frac{x}{\sqrt{\sum{x^2}}}
* \f]
* The normalization is performed along the last dimension by default.
*
* @param ctx The CANN context used for operations.
* @param dst The destination tensor where the normalized values will be stored.
* @attention The normalization is performed along the last dimension of the
* input tensor by default.
*/
void ggml_cann_l2_norm(ggml_backend_cann_context & ctx, ggml_tensor * dst);

/**
* @brief Computes the Group Normalization for a ggml tensor using the CANN
* backend.
Expand Down
4 changes: 4 additions & 0 deletions ggml/src/ggml-cann/ggml-cann.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1777,6 +1777,9 @@ static bool ggml_cann_compute_forward(ggml_backend_cann_context & ctx, struct gg
case GGML_OP_GROUP_NORM:
ggml_cann_group_norm(ctx, dst);
break;
case GGML_OP_L2_NORM:
ggml_cann_l2_norm(ctx, dst);
break;
case GGML_OP_CONCAT:
ggml_cann_concat(ctx, dst);
break;
Expand Down Expand Up @@ -2515,6 +2518,7 @@ static bool ggml_backend_cann_supports_op(ggml_backend_dev_t dev, const ggml_ten
// value of paddingW should be at most half of kernelW
return (p0 <= (k0 / 2)) && (p1 <= (k1 / 2));
}
case GGML_OP_L2_NORM:
case GGML_OP_DUP:
case GGML_OP_SUM:
case GGML_OP_IM2COL:
Expand Down
Loading