Skip to content

Commit d006f35

Browse files
committed
Merge remote-tracking branch 'origin/master' into ggml-cumsum-tri
* origin/master: server: strip content-length header on proxy (ggml-org#17734) server: move msg diffs tracking to HTTP thread (ggml-org#17740) examples : add missing code block end marker [no ci] (ggml-org#17756) common : skip model validation when --help is requested (ggml-org#17755) ggml-cpu : remove asserts always evaluating to false (ggml-org#17728) convert: use existing local chat_template if mistral-format model has one. (ggml-org#17749) cmake : simplify build info detection using standard variables (ggml-org#17423) ci : disable ggml-ci-x64-amd-* (ggml-org#17753) common: use native MultiByteToWideChar (ggml-org#17738) metal : use params per pipeline instance (ggml-org#17739) llama : fix sanity checks during quantization (ggml-org#17721) build : move _WIN32_WINNT definition to headers (ggml-org#17736) build: enable parallel builds in msbuild using MTT (ggml-org#17708) ggml-cpu: remove duplicate conditional check 'iid' (ggml-org#17650) Add a couple of file types to the text section (ggml-org#17670) convert : support latest mistral-common (fix conversion with --mistral-format) (ggml-org#17712) Use OpenAI-compatible `/v1/models` endpoint by default (ggml-org#17689) webui: Fix zero pasteLongTextToFileLen to disable conversion being overridden (ggml-org#17445)
2 parents c195af2 + 9d02299 commit d006f35

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

41 files changed

+891
-892
lines changed

.github/workflows/build.yml

Lines changed: 27 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -1602,33 +1602,33 @@ jobs:
16021602
run: |
16031603
bash ./ci/run.sh ~/results/llama.cpp /mnt/llama.cpp
16041604
1605-
ggml-ci-x64-amd-vulkan:
1606-
runs-on: [self-hosted, Linux, X64, AMD]
1607-
1608-
steps:
1609-
- name: Clone
1610-
id: checkout
1611-
uses: actions/checkout@v4
1612-
1613-
- name: Test
1614-
id: ggml-ci
1615-
run: |
1616-
vulkaninfo --summary
1617-
GG_BUILD_VULKAN=1 bash ./ci/run.sh ~/results/llama.cpp /mnt/llama.cpp
1618-
1619-
ggml-ci-x64-amd-rocm:
1620-
runs-on: [self-hosted, Linux, X64, AMD]
1621-
1622-
steps:
1623-
- name: Clone
1624-
id: checkout
1625-
uses: actions/checkout@v4
1626-
1627-
- name: Test
1628-
id: ggml-ci
1629-
run: |
1630-
amd-smi static
1631-
GG_BUILD_ROCM=1 GG_BUILD_AMDGPU_TARGETS="gfx1101" bash ./ci/run.sh ~/results/llama.cpp /mnt/llama.cpp
1605+
# ggml-ci-x64-amd-vulkan:
1606+
# runs-on: [self-hosted, Linux, X64, AMD]
1607+
1608+
# steps:
1609+
# - name: Clone
1610+
# id: checkout
1611+
# uses: actions/checkout@v4
1612+
1613+
# - name: Test
1614+
# id: ggml-ci
1615+
# run: |
1616+
# vulkaninfo --summary
1617+
# GG_BUILD_VULKAN=1 bash ./ci/run.sh ~/results/llama.cpp /mnt/llama.cpp
1618+
1619+
# ggml-ci-x64-amd-rocm:
1620+
# runs-on: [self-hosted, Linux, X64, AMD]
1621+
1622+
# steps:
1623+
# - name: Clone
1624+
# id: checkout
1625+
# uses: actions/checkout@v4
1626+
1627+
# - name: Test
1628+
# id: ggml-ci
1629+
# run: |
1630+
# amd-smi static
1631+
# GG_BUILD_ROCM=1 GG_BUILD_AMDGPU_TARGETS="gfx1101" bash ./ci/run.sh ~/results/llama.cpp /mnt/llama.cpp
16321632

16331633
ggml-ci-mac-metal:
16341634
runs-on: [self-hosted, macOS, ARM64]

CMakeLists.txt

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,12 @@ if (MSVC)
7272
add_compile_options("$<$<COMPILE_LANGUAGE:CXX>:/bigobj>")
7373
endif()
7474

75+
if (LLAMA_STANDALONE)
76+
# enable parallel builds for msbuild
77+
list(APPEND CMAKE_VS_GLOBALS UseMultiToolTask=true)
78+
list(APPEND CMAKE_VS_GLOBALS EnforceProcessCountAcrossBuilds=true)
79+
endif()
80+
7581
if (CMAKE_SYSTEM_NAME STREQUAL "iOS")
7682
set(LLAMA_TOOLS_INSTALL_DEFAULT OFF)
7783
else()
@@ -193,11 +199,6 @@ if (NOT TARGET ggml AND NOT LLAMA_USE_SYSTEM_GGML)
193199
# ... otherwise assume ggml is added by a parent CMakeLists.txt
194200
endif()
195201

196-
if (MINGW)
197-
# Target Windows 8 for PrefetchVirtualMemory
198-
add_compile_definitions(_WIN32_WINNT=${GGML_WIN_VER})
199-
endif()
200-
201202
#
202203
# build the library
203204
#

cmake/build-info.cmake

Lines changed: 5 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -39,26 +39,10 @@ if(Git_FOUND)
3939
endif()
4040
endif()
4141

42-
if(MSVC)
43-
set(BUILD_COMPILER "${CMAKE_C_COMPILER_ID} ${CMAKE_C_COMPILER_VERSION}")
44-
if (CMAKE_VS_PLATFORM_NAME)
45-
set(BUILD_TARGET ${CMAKE_VS_PLATFORM_NAME})
46-
else()
47-
set(BUILD_TARGET "${CMAKE_SYSTEM_NAME} ${CMAKE_SYSTEM_PROCESSOR}")
48-
endif()
49-
else()
50-
execute_process(
51-
COMMAND ${CMAKE_C_COMPILER} --version
52-
OUTPUT_VARIABLE OUT
53-
OUTPUT_STRIP_TRAILING_WHITESPACE
54-
)
55-
string(REGEX REPLACE " *\n.*" "" OUT "${OUT}")
56-
set(BUILD_COMPILER ${OUT})
42+
set(BUILD_COMPILER "${CMAKE_C_COMPILER_ID} ${CMAKE_C_COMPILER_VERSION}")
5743

58-
execute_process(
59-
COMMAND ${CMAKE_C_COMPILER} -dumpmachine
60-
OUTPUT_VARIABLE OUT
61-
OUTPUT_STRIP_TRAILING_WHITESPACE
62-
)
63-
set(BUILD_TARGET ${OUT})
44+
if(CMAKE_VS_PLATFORM_NAME)
45+
set(BUILD_TARGET ${CMAKE_VS_PLATFORM_NAME})
46+
else()
47+
set(BUILD_TARGET "${CMAKE_SYSTEM_NAME} ${CMAKE_SYSTEM_PROCESSOR}")
6448
endif()

common/arg.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -427,7 +427,7 @@ static bool common_params_parse_ex(int argc, char ** argv, common_params_context
427427

428428
// model is required (except for server)
429429
// TODO @ngxson : maybe show a list of available models in CLI in this case
430-
if (params.model.path.empty() && ctx_arg.ex != LLAMA_EXAMPLE_SERVER) {
430+
if (params.model.path.empty() && ctx_arg.ex != LLAMA_EXAMPLE_SERVER && !params.usage) {
431431
throw std::invalid_argument("error: --model is required\n");
432432
}
433433

common/common.cpp

Lines changed: 20 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -786,11 +786,29 @@ bool fs_validate_filename(const std::string & filename, bool allow_subdirs) {
786786
#include <iostream>
787787

788788

789+
#ifdef _WIN32
790+
static std::wstring utf8_to_wstring(const std::string & str) {
791+
if (str.empty()) {
792+
return std::wstring();
793+
}
794+
795+
int size = MultiByteToWideChar(CP_UTF8, 0, str.c_str(), (int)str.size(), NULL, 0);
796+
797+
if (size <= 0) {
798+
return std::wstring();
799+
}
800+
801+
std::wstring wstr(size, 0);
802+
MultiByteToWideChar(CP_UTF8, 0, str.c_str(), (int)str.size(), &wstr[0], size);
803+
804+
return wstr;
805+
}
806+
#endif
807+
789808
// returns true if successful, false otherwise
790809
bool fs_create_directory_with_parents(const std::string & path) {
791810
#ifdef _WIN32
792-
std::wstring_convert<std::codecvt_utf8<wchar_t>> converter;
793-
std::wstring wpath = converter.from_bytes(path);
811+
std::wstring wpath = utf8_to_wstring(path);
794812

795813
// if the path already exists, check whether it's a directory
796814
const DWORD attributes = GetFileAttributesW(wpath.c_str());

common/common.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,10 @@
1212
#include <vector>
1313
#include <map>
1414

15+
#if defined(_WIN32) && !defined(_WIN32_WINNT)
16+
#define _WIN32_WINNT 0x0A00
17+
#endif
18+
1519
#ifdef _WIN32
1620
#define DIRECTORY_SEPARATOR '\\'
1721
#else

convert_hf_to_gguf.py

Lines changed: 16 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2341,19 +2341,31 @@ def _set_vocab_mistral(self):
23412341
self.gguf_writer.add_add_bos_token(True)
23422342
self.gguf_writer.add_add_eos_token(False)
23432343

2344-
template_dir = Path(__file__).parent / "models/templates/"
2344+
local_template_file_path = self.dir_model / "chat_template.jinja"
2345+
2346+
if self.is_mistral_format and local_template_file_path.is_file():
2347+
# Ministral-3 and other new Mistral models come with chat templates.
2348+
# ref: https://huggingface.co/mistralai/Ministral-3-14B-Instruct-2512/tree/main
2349+
logger.info("Using an existing Mistral local chat template.")
2350+
2351+
with open(local_template_file_path, "r", encoding="utf-8") as f:
2352+
template = f.read()
2353+
elif not self.is_mistral_format or not self.disable_mistral_community_chat_template:
2354+
template_dir = Path(__file__).parent / "models/templates/"
23452355

2346-
if not self.is_mistral_format or not self.disable_mistral_community_chat_template:
23472356
# Log only for Mistral format that the official tokenization and detokenization is via `mistral-common`.
23482357
if self.is_mistral_format:
23492358
logger.info(
23502359
"Using a Mistral community chat template. These templates can be subject to errors in early days or weeks after a release. "
23512360
"Mistral recommends to use `mistral-common` to perform tokenization and detokenization."
23522361
)
23532362
template = MistralModel.get_community_chat_template(vocab, template_dir, self.is_mistral_format)
2354-
self.gguf_writer.add_chat_template(template)
23552363
else:
2356-
logger.info("Not using a Mistral community chat template. Ensure to perform the tokenization and detokenization via `mistral-common`.")
2364+
logger.info("Not using a Mistral local or community chat template. Ensure to perform the tokenization and detokenization via `mistral-common`.")
2365+
template = None
2366+
2367+
if template is not None:
2368+
self.gguf_writer.add_chat_template(template)
23572369

23582370
def set_vocab(self):
23592371
if self.is_mistral_format:

examples/simple-cmake-pkg/README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ cd llama.cpp
1818
cmake -S . -B build
1919
cmake --build build
2020
cmake --install build --prefix inst
21+
```
2122

2223
### Build simple-cmake-pkg
2324

ggml/CMakeLists.txt

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -175,11 +175,6 @@ option(GGML_CPU_ALL_VARIANTS "ggml: build all variants of the CPU backend (requi
175175
set(GGML_CPU_ARM_ARCH "" CACHE STRING "ggml: CPU architecture for ARM")
176176
set(GGML_CPU_POWERPC_CPUTYPE "" CACHE STRING "ggml: CPU type for PowerPC")
177177

178-
179-
if (MINGW)
180-
set(GGML_WIN_VER "0xA00" CACHE STRING "ggml: Windows version")
181-
endif()
182-
183178
# ggml core
184179
set(GGML_SCHED_MAX_COPIES "4" CACHE STRING "ggml: max input copies for pipeline parallelism")
185180
option(GGML_CPU "ggml: enable CPU backend" ON)

ggml/include/ggml.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -204,6 +204,10 @@
204204
# define GGML_ATTRIBUTE_FORMAT(...) __attribute__((format(printf, __VA_ARGS__)))
205205
#endif
206206

207+
#if defined(_WIN32) && !defined(_WIN32_WINNT)
208+
# define _WIN32_WINNT 0x0A00
209+
#endif
210+
207211
#include <stdbool.h>
208212
#include <stddef.h>
209213
#include <stdint.h>

0 commit comments

Comments
 (0)