Skip to content

Commit ab2474d

Browse files
committed
Merge branch 'master' into xsn/mistral_large_moe
2 parents 646e47d + bd4ef13 commit ab2474d

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

50 files changed

+1884
-1568
lines changed

.github/workflows/build.yml

Lines changed: 27 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -1602,33 +1602,33 @@ jobs:
16021602
run: |
16031603
bash ./ci/run.sh ~/results/llama.cpp /mnt/llama.cpp
16041604
1605-
ggml-ci-x64-amd-vulkan:
1606-
runs-on: [self-hosted, Linux, X64, AMD]
1607-
1608-
steps:
1609-
- name: Clone
1610-
id: checkout
1611-
uses: actions/checkout@v4
1612-
1613-
- name: Test
1614-
id: ggml-ci
1615-
run: |
1616-
vulkaninfo --summary
1617-
GG_BUILD_VULKAN=1 bash ./ci/run.sh ~/results/llama.cpp /mnt/llama.cpp
1618-
1619-
ggml-ci-x64-amd-rocm:
1620-
runs-on: [self-hosted, Linux, X64, AMD]
1621-
1622-
steps:
1623-
- name: Clone
1624-
id: checkout
1625-
uses: actions/checkout@v4
1626-
1627-
- name: Test
1628-
id: ggml-ci
1629-
run: |
1630-
amd-smi static
1631-
GG_BUILD_ROCM=1 GG_BUILD_AMDGPU_TARGETS="gfx1101" bash ./ci/run.sh ~/results/llama.cpp /mnt/llama.cpp
1605+
# ggml-ci-x64-amd-vulkan:
1606+
# runs-on: [self-hosted, Linux, X64, AMD]
1607+
1608+
# steps:
1609+
# - name: Clone
1610+
# id: checkout
1611+
# uses: actions/checkout@v4
1612+
1613+
# - name: Test
1614+
# id: ggml-ci
1615+
# run: |
1616+
# vulkaninfo --summary
1617+
# GG_BUILD_VULKAN=1 bash ./ci/run.sh ~/results/llama.cpp /mnt/llama.cpp
1618+
1619+
# ggml-ci-x64-amd-rocm:
1620+
# runs-on: [self-hosted, Linux, X64, AMD]
1621+
1622+
# steps:
1623+
# - name: Clone
1624+
# id: checkout
1625+
# uses: actions/checkout@v4
1626+
1627+
# - name: Test
1628+
# id: ggml-ci
1629+
# run: |
1630+
# amd-smi static
1631+
# GG_BUILD_ROCM=1 GG_BUILD_AMDGPU_TARGETS="gfx1101" bash ./ci/run.sh ~/results/llama.cpp /mnt/llama.cpp
16321632

16331633
ggml-ci-mac-metal:
16341634
runs-on: [self-hosted, macOS, ARM64]

CMakeLists.txt

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,12 @@ if (MSVC)
7272
add_compile_options("$<$<COMPILE_LANGUAGE:CXX>:/bigobj>")
7373
endif()
7474

75+
if (LLAMA_STANDALONE)
76+
# enable parallel builds for msbuild
77+
list(APPEND CMAKE_VS_GLOBALS UseMultiToolTask=true)
78+
list(APPEND CMAKE_VS_GLOBALS EnforceProcessCountAcrossBuilds=true)
79+
endif()
80+
7581
if (CMAKE_SYSTEM_NAME STREQUAL "iOS")
7682
set(LLAMA_TOOLS_INSTALL_DEFAULT OFF)
7783
else()
@@ -193,11 +199,6 @@ if (NOT TARGET ggml AND NOT LLAMA_USE_SYSTEM_GGML)
193199
# ... otherwise assume ggml is added by a parent CMakeLists.txt
194200
endif()
195201

196-
if (MINGW)
197-
# Target Windows 8 for PrefetchVirtualMemory
198-
add_compile_definitions(_WIN32_WINNT=${GGML_WIN_VER})
199-
endif()
200-
201202
#
202203
# build the library
203204
#

cmake/build-info.cmake

Lines changed: 5 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -39,26 +39,10 @@ if(Git_FOUND)
3939
endif()
4040
endif()
4141

42-
if(MSVC)
43-
set(BUILD_COMPILER "${CMAKE_C_COMPILER_ID} ${CMAKE_C_COMPILER_VERSION}")
44-
if (CMAKE_VS_PLATFORM_NAME)
45-
set(BUILD_TARGET ${CMAKE_VS_PLATFORM_NAME})
46-
else()
47-
set(BUILD_TARGET "${CMAKE_SYSTEM_NAME} ${CMAKE_SYSTEM_PROCESSOR}")
48-
endif()
49-
else()
50-
execute_process(
51-
COMMAND ${CMAKE_C_COMPILER} --version
52-
OUTPUT_VARIABLE OUT
53-
OUTPUT_STRIP_TRAILING_WHITESPACE
54-
)
55-
string(REGEX REPLACE " *\n.*" "" OUT "${OUT}")
56-
set(BUILD_COMPILER ${OUT})
42+
set(BUILD_COMPILER "${CMAKE_C_COMPILER_ID} ${CMAKE_C_COMPILER_VERSION}")
5743

58-
execute_process(
59-
COMMAND ${CMAKE_C_COMPILER} -dumpmachine
60-
OUTPUT_VARIABLE OUT
61-
OUTPUT_STRIP_TRAILING_WHITESPACE
62-
)
63-
set(BUILD_TARGET ${OUT})
44+
if(CMAKE_VS_PLATFORM_NAME)
45+
set(BUILD_TARGET ${CMAKE_VS_PLATFORM_NAME})
46+
else()
47+
set(BUILD_TARGET "${CMAKE_SYSTEM_NAME} ${CMAKE_SYSTEM_PROCESSOR}")
6448
endif()

common/arg.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -427,7 +427,7 @@ static bool common_params_parse_ex(int argc, char ** argv, common_params_context
427427

428428
// model is required (except for server)
429429
// TODO @ngxson : maybe show a list of available models in CLI in this case
430-
if (params.model.path.empty() && ctx_arg.ex != LLAMA_EXAMPLE_SERVER) {
430+
if (params.model.path.empty() && ctx_arg.ex != LLAMA_EXAMPLE_SERVER && !params.usage) {
431431
throw std::invalid_argument("error: --model is required\n");
432432
}
433433

common/chat.cpp

Lines changed: 21 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -85,29 +85,36 @@ json common_chat_msg::to_json_oaicompat() const
8585
return message;
8686
}
8787

88-
std::vector<common_chat_msg_diff> common_chat_msg_diff::compute_diffs(const common_chat_msg & previous_msg, const common_chat_msg & new_msg) {
88+
std::vector<common_chat_msg_diff> common_chat_msg_diff::compute_diffs(const common_chat_msg & msg_prv, const common_chat_msg & msg_new) {
8989
std::vector<common_chat_msg_diff> diffs;
90-
if (previous_msg.reasoning_content != new_msg.reasoning_content) {
90+
if (msg_new.tool_calls.size() > msg_prv.tool_calls.size()) {
91+
diffs.reserve(msg_new.tool_calls.size() - msg_prv.tool_calls.size() + 3);
92+
} else {
93+
diffs.reserve(3);
94+
}
95+
96+
// TODO: these can become expensive for long messages - how to optimize?
97+
if (msg_prv.reasoning_content != msg_new.reasoning_content) {
9198
auto & diff = diffs.emplace_back();
92-
diff.reasoning_content_delta = string_diff(previous_msg.reasoning_content, new_msg.reasoning_content);
99+
diff.reasoning_content_delta = string_diff(msg_prv.reasoning_content, msg_new.reasoning_content);
93100
}
94-
if (previous_msg.content != new_msg.content) {
101+
if (msg_prv.content != msg_new.content) {
95102
auto & diff = diffs.emplace_back();
96-
diff.content_delta = string_diff(previous_msg.content, new_msg.content);
103+
diff.content_delta = string_diff(msg_prv.content, msg_new.content);
97104
}
98105

99-
if (new_msg.tool_calls.size() < previous_msg.tool_calls.size()) {
106+
if (msg_new.tool_calls.size() < msg_prv.tool_calls.size()) {
100107
throw std::runtime_error("Invalid diff: now finding less tool calls!");
101108
}
102109

103-
if (!previous_msg.tool_calls.empty()) {
104-
auto idx = previous_msg.tool_calls.size() - 1;
105-
const auto & pref = previous_msg.tool_calls[idx];
106-
const auto & newf = new_msg.tool_calls[idx];
110+
if (!msg_prv.tool_calls.empty()) {
111+
const auto idx = msg_prv.tool_calls.size() - 1;
112+
const auto & pref = msg_prv.tool_calls[idx];
113+
const auto & newf = msg_new.tool_calls[idx];
107114
if (pref.name != newf.name) {
108115
throw std::runtime_error("Invalid diff: tool call mismatch!");
109116
}
110-
auto args_diff = string_diff(pref.arguments, newf.arguments);
117+
const auto args_diff = string_diff(pref.arguments, newf.arguments);
111118
if (!args_diff.empty() || pref.id != newf.id) {
112119
auto & diff = diffs.emplace_back();
113120
diff.tool_call_index = idx;
@@ -118,11 +125,12 @@ std::vector<common_chat_msg_diff> common_chat_msg_diff::compute_diffs(const comm
118125
diff.tool_call_delta.arguments = args_diff;
119126
}
120127
}
121-
for (size_t idx = previous_msg.tool_calls.size(); idx < new_msg.tool_calls.size(); ++idx) {
128+
for (size_t idx = msg_prv.tool_calls.size(); idx < msg_new.tool_calls.size(); ++idx) {
122129
auto & diff = diffs.emplace_back();
123130
diff.tool_call_index = idx;
124-
diff.tool_call_delta = new_msg.tool_calls[idx];
131+
diff.tool_call_delta = msg_new.tool_calls[idx];
125132
}
133+
126134
return diffs;
127135
}
128136

common/chat.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,7 @@ struct common_chat_msg_diff {
7777
size_t tool_call_index = std::string::npos;
7878
common_chat_tool_call tool_call_delta;
7979

80-
static std::vector<common_chat_msg_diff> compute_diffs(const common_chat_msg & previous_msg, const common_chat_msg & new_msg);
80+
static std::vector<common_chat_msg_diff> compute_diffs(const common_chat_msg & msg_prv, const common_chat_msg & msg_new);
8181

8282
bool operator==(const common_chat_msg_diff & other) const {
8383
return content_delta == other.content_delta

common/common.cpp

Lines changed: 20 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -786,11 +786,29 @@ bool fs_validate_filename(const std::string & filename, bool allow_subdirs) {
786786
#include <iostream>
787787

788788

789+
#ifdef _WIN32
790+
static std::wstring utf8_to_wstring(const std::string & str) {
791+
if (str.empty()) {
792+
return std::wstring();
793+
}
794+
795+
int size = MultiByteToWideChar(CP_UTF8, 0, str.c_str(), (int)str.size(), NULL, 0);
796+
797+
if (size <= 0) {
798+
return std::wstring();
799+
}
800+
801+
std::wstring wstr(size, 0);
802+
MultiByteToWideChar(CP_UTF8, 0, str.c_str(), (int)str.size(), &wstr[0], size);
803+
804+
return wstr;
805+
}
806+
#endif
807+
789808
// returns true if successful, false otherwise
790809
bool fs_create_directory_with_parents(const std::string & path) {
791810
#ifdef _WIN32
792-
std::wstring_convert<std::codecvt_utf8<wchar_t>> converter;
793-
std::wstring wpath = converter.from_bytes(path);
811+
std::wstring wpath = utf8_to_wstring(path);
794812

795813
// if the path already exists, check whether it's a directory
796814
const DWORD attributes = GetFileAttributesW(wpath.c_str());

common/common.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,10 @@
1212
#include <vector>
1313
#include <map>
1414

15+
#if defined(_WIN32) && !defined(_WIN32_WINNT)
16+
#define _WIN32_WINNT 0x0A00
17+
#endif
18+
1519
#ifdef _WIN32
1620
#define DIRECTORY_SEPARATOR '\\'
1721
#else

convert_hf_to_gguf.py

Lines changed: 16 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1571,19 +1571,31 @@ def _set_vocab_mistral(self):
15711571
self.gguf_writer.add_add_bos_token(True)
15721572
self.gguf_writer.add_add_eos_token(False)
15731573

1574-
template_dir = Path(__file__).parent / "models/templates/"
1574+
local_template_file_path = self.dir_model / "chat_template.jinja"
1575+
1576+
if self.is_mistral_format and local_template_file_path.is_file():
1577+
# Ministral-3 and other new Mistral models come with chat templates.
1578+
# ref: https://huggingface.co/mistralai/Ministral-3-14B-Instruct-2512/tree/main
1579+
logger.info("Using an existing Mistral local chat template.")
1580+
1581+
with open(local_template_file_path, "r", encoding="utf-8") as f:
1582+
template = f.read()
1583+
elif not self.is_mistral_format or not self.disable_mistral_community_chat_template:
1584+
template_dir = Path(__file__).parent / "models/templates/"
15751585

1576-
if not self.is_mistral_format or not self.disable_mistral_community_chat_template:
15771586
# Log only for Mistral format that the official tokenization and detokenization is via `mistral-common`.
15781587
if self.is_mistral_format:
15791588
logger.info(
15801589
"Using a Mistral community chat template. These templates can be subject to errors in early days or weeks after a release. "
15811590
"Mistral recommends to use `mistral-common` to perform tokenization and detokenization."
15821591
)
15831592
template = MistralModel.get_community_chat_template(vocab, template_dir, self.is_mistral_format)
1584-
self.gguf_writer.add_chat_template(template)
15851593
else:
1586-
logger.info("Not using a Mistral community chat template. Ensure to perform the tokenization and detokenization via `mistral-common`.")
1594+
logger.info("Not using a Mistral local or community chat template. Ensure to perform the tokenization and detokenization via `mistral-common`.")
1595+
template = None
1596+
1597+
if template is not None:
1598+
self.gguf_writer.add_chat_template(template)
15871599

15881600

15891601
class MmprojModel(ModelBase):

ggml/CMakeLists.txt

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -175,11 +175,6 @@ option(GGML_CPU_ALL_VARIANTS "ggml: build all variants of the CPU backend (requi
175175
set(GGML_CPU_ARM_ARCH "" CACHE STRING "ggml: CPU architecture for ARM")
176176
set(GGML_CPU_POWERPC_CPUTYPE "" CACHE STRING "ggml: CPU type for PowerPC")
177177

178-
179-
if (MINGW)
180-
set(GGML_WIN_VER "0xA00" CACHE STRING "ggml: Windows version")
181-
endif()
182-
183178
# ggml core
184179
set(GGML_SCHED_MAX_COPIES "4" CACHE STRING "ggml: max input copies for pipeline parallelism")
185180
option(GGML_CPU "ggml: enable CPU backend" ON)

0 commit comments

Comments
 (0)