gabe-l-hart
diff --git a/‎.github/workflows/build.yml‎
Lines changed: 27 additions & 27 deletions b/‎.github/workflows/build.yml‎
Lines changed: 27 additions & 27 deletions
diff --git a/‎CMakeLists.txt‎
Lines changed: 6 additions & 5 deletions b/‎CMakeLists.txt‎
Lines changed: 6 additions & 5 deletions
diff --git a/‎cmake/build-info.cmake‎
Lines changed: 5 additions & 21 deletions b/‎cmake/build-info.cmake‎
Lines changed: 5 additions & 21 deletions
diff --git a/‎common/arg.cpp‎
Lines changed: 1 addition & 1 deletion b/‎common/arg.cpp‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎common/common.cpp‎
Lines changed: 20 additions & 2 deletions b/‎common/common.cpp‎
Lines changed: 20 additions & 2 deletions
diff --git a/‎common/common.h‎
Lines changed: 4 additions & 0 deletions b/‎common/common.h‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎convert_hf_to_gguf.py‎
Lines changed: 16 additions & 4 deletions b/‎convert_hf_to_gguf.py‎
Lines changed: 16 additions & 4 deletions
diff --git a/‎examples/simple-cmake-pkg/README.md‎
Lines changed: 1 addition & 0 deletions b/‎examples/simple-cmake-pkg/README.md‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎ggml/CMakeLists.txt‎
Lines changed: 0 additions & 5 deletions b/‎ggml/CMakeLists.txt‎
Lines changed: 0 additions & 5 deletions
diff --git a/‎ggml/include/ggml.h‎
Lines changed: 4 additions & 0 deletions b/‎ggml/include/ggml.h‎
Lines changed: 4 additions & 0 deletions
@@ -1602,33 +1602,33 @@ jobs:
         run: |
           bash ./ci/run.sh ~/results/llama.cpp /mnt/llama.cpp
 
-  ggml-ci-x64-amd-vulkan:
-    runs-on: [self-hosted, Linux, X64, AMD]
-
-    steps:
-      - name: Clone
-        id: checkout
-        uses: actions/checkout@v4
-
-      - name: Test
-        id: ggml-ci
-        run: |
-          vulkaninfo --summary
-          GG_BUILD_VULKAN=1 bash ./ci/run.sh ~/results/llama.cpp /mnt/llama.cpp
-
-  ggml-ci-x64-amd-rocm:
-    runs-on: [self-hosted, Linux, X64, AMD]
-
-    steps:
-      - name: Clone
-        id: checkout
-        uses: actions/checkout@v4
-
-      - name: Test
-        id: ggml-ci
-        run: |
-          amd-smi static
-          GG_BUILD_ROCM=1 GG_BUILD_AMDGPU_TARGETS="gfx1101" bash ./ci/run.sh ~/results/llama.cpp /mnt/llama.cpp
+  # ggml-ci-x64-amd-vulkan:
+  #   runs-on: [self-hosted, Linux, X64, AMD]
+
+  #   steps:
+  #     - name: Clone
+  #       id: checkout
+  #       uses: actions/checkout@v4
+
+  #     - name: Test
+  #       id: ggml-ci
+  #       run: |
+  #         vulkaninfo --summary
+  #         GG_BUILD_VULKAN=1 bash ./ci/run.sh ~/results/llama.cpp /mnt/llama.cpp
+
+  # ggml-ci-x64-amd-rocm:
+  #   runs-on: [self-hosted, Linux, X64, AMD]
+
+  #   steps:
+  #     - name: Clone
+  #       id: checkout
+  #       uses: actions/checkout@v4
+
+  #     - name: Test
+  #       id: ggml-ci
+  #       run: |
+  #         amd-smi static
+  #         GG_BUILD_ROCM=1 GG_BUILD_AMDGPU_TARGETS="gfx1101" bash ./ci/run.sh ~/results/llama.cpp /mnt/llama.cpp
 
   ggml-ci-mac-metal:
     runs-on: [self-hosted, macOS, ARM64]
 
@@ -72,6 +72,12 @@ if (MSVC)
     add_compile_options("$<$<COMPILE_LANGUAGE:CXX>:/bigobj>")
 endif()
 
+if (LLAMA_STANDALONE)
+    # enable parallel builds for msbuild
+    list(APPEND CMAKE_VS_GLOBALS UseMultiToolTask=true)
+    list(APPEND CMAKE_VS_GLOBALS EnforceProcessCountAcrossBuilds=true)
+endif()
+
 if (CMAKE_SYSTEM_NAME STREQUAL "iOS")
     set(LLAMA_TOOLS_INSTALL_DEFAULT OFF)
 else()
@@ -193,11 +199,6 @@ if (NOT TARGET ggml AND NOT LLAMA_USE_SYSTEM_GGML)
     # ... otherwise assume ggml is added by a parent CMakeLists.txt
 endif()
 
-if (MINGW)
-    # Target Windows 8 for PrefetchVirtualMemory
-    add_compile_definitions(_WIN32_WINNT=${GGML_WIN_VER})
-endif()
-
 #
 # build the library
 #
 
@@ -39,26 +39,10 @@ if(Git_FOUND)
     endif()
 endif()
 
-if(MSVC)
-    set(BUILD_COMPILER "${CMAKE_C_COMPILER_ID} ${CMAKE_C_COMPILER_VERSION}")
-    if (CMAKE_VS_PLATFORM_NAME)
-        set(BUILD_TARGET ${CMAKE_VS_PLATFORM_NAME})
-    else()
-        set(BUILD_TARGET "${CMAKE_SYSTEM_NAME} ${CMAKE_SYSTEM_PROCESSOR}")
-    endif()
-else()
-    execute_process(
-        COMMAND ${CMAKE_C_COMPILER} --version
-        OUTPUT_VARIABLE OUT
-        OUTPUT_STRIP_TRAILING_WHITESPACE
-    )
-    string(REGEX REPLACE " *\n.*" "" OUT "${OUT}")
-    set(BUILD_COMPILER ${OUT})
+set(BUILD_COMPILER "${CMAKE_C_COMPILER_ID} ${CMAKE_C_COMPILER_VERSION}")
 
-    execute_process(
-        COMMAND ${CMAKE_C_COMPILER} -dumpmachine
-        OUTPUT_VARIABLE OUT
-        OUTPUT_STRIP_TRAILING_WHITESPACE
-    )
-    set(BUILD_TARGET ${OUT})
+if(CMAKE_VS_PLATFORM_NAME)
+    set(BUILD_TARGET ${CMAKE_VS_PLATFORM_NAME})
+else()
+    set(BUILD_TARGET "${CMAKE_SYSTEM_NAME} ${CMAKE_SYSTEM_PROCESSOR}")
 endif()
@@ -427,7 +427,7 @@ static bool common_params_parse_ex(int argc, char ** argv, common_params_context
 
     // model is required (except for server)
     // TODO @ngxson : maybe show a list of available models in CLI in this case
-    if (params.model.path.empty() && ctx_arg.ex != LLAMA_EXAMPLE_SERVER) {
+    if (params.model.path.empty() && ctx_arg.ex != LLAMA_EXAMPLE_SERVER && !params.usage) {
         throw std::invalid_argument("error: --model is required\n");
     }
 
 
@@ -786,11 +786,29 @@ bool fs_validate_filename(const std::string & filename, bool allow_subdirs) {
 #include <iostream>
 
 
+#ifdef _WIN32
+static std::wstring utf8_to_wstring(const std::string & str) {
+    if (str.empty()) {
+        return std::wstring();
+    }
+
+    int size = MultiByteToWideChar(CP_UTF8, 0, str.c_str(), (int)str.size(), NULL, 0);
+
+    if (size <= 0) {
+        return std::wstring();
+    }
+
+    std::wstring wstr(size, 0);
+    MultiByteToWideChar(CP_UTF8, 0, str.c_str(), (int)str.size(), &wstr[0], size);
+
+    return wstr;
+}
+#endif
+
 // returns true if successful, false otherwise
 bool fs_create_directory_with_parents(const std::string & path) {
 #ifdef _WIN32
-    std::wstring_convert<std::codecvt_utf8<wchar_t>> converter;
-    std::wstring wpath = converter.from_bytes(path);
+    std::wstring wpath = utf8_to_wstring(path);
 
     // if the path already exists, check whether it's a directory
     const DWORD attributes = GetFileAttributesW(wpath.c_str());
 
@@ -12,6 +12,10 @@
 #include <vector>
 #include <map>
 
+#if defined(_WIN32) && !defined(_WIN32_WINNT)
+#define _WIN32_WINNT 0x0A00
+#endif
+
 #ifdef _WIN32
 #define DIRECTORY_SEPARATOR '\\'
 #else
 
@@ -2341,19 +2341,31 @@ def _set_vocab_mistral(self):
         self.gguf_writer.add_add_bos_token(True)
         self.gguf_writer.add_add_eos_token(False)
 
-        template_dir = Path(__file__).parent / "models/templates/"
+        local_template_file_path = self.dir_model / "chat_template.jinja"
+
+        if self.is_mistral_format and local_template_file_path.is_file():
+            # Ministral-3 and other new Mistral models come with chat templates.
+            # ref: https://huggingface.co/mistralai/Ministral-3-14B-Instruct-2512/tree/main
+            logger.info("Using an existing Mistral local chat template.")
+
+            with open(local_template_file_path, "r", encoding="utf-8") as f:
+                template = f.read()
+        elif not self.is_mistral_format or not self.disable_mistral_community_chat_template:
+            template_dir = Path(__file__).parent / "models/templates/"
 
-        if not self.is_mistral_format or not self.disable_mistral_community_chat_template:
             # Log only for Mistral format that the official tokenization and detokenization is via `mistral-common`.
             if self.is_mistral_format:
                 logger.info(
                     "Using a Mistral community chat template. These templates can be subject to errors in early days or weeks after a release. "
                     "Mistral recommends to use `mistral-common` to perform tokenization and detokenization."
                 )
             template = MistralModel.get_community_chat_template(vocab, template_dir, self.is_mistral_format)
-            self.gguf_writer.add_chat_template(template)
         else:
-            logger.info("Not using a Mistral community chat template. Ensure to perform the tokenization and detokenization via `mistral-common`.")
+            logger.info("Not using a Mistral local or community chat template. Ensure to perform the tokenization and detokenization via `mistral-common`.")
+            template = None
+
+        if template is not None:
+            self.gguf_writer.add_chat_template(template)
 
     def set_vocab(self):
         if self.is_mistral_format:
 
@@ -18,6 +18,7 @@ cd llama.cpp
 cmake -S . -B build
 cmake --build build
 cmake --install build --prefix inst
+```
 
 ### Build simple-cmake-pkg
 
 
@@ -175,11 +175,6 @@ option(GGML_CPU_ALL_VARIANTS "ggml: build all variants of the CPU backend (requi
 set(GGML_CPU_ARM_ARCH        "" CACHE STRING "ggml: CPU architecture for ARM")
 set(GGML_CPU_POWERPC_CPUTYPE "" CACHE STRING "ggml: CPU type for PowerPC")
 
-
-if (MINGW)
-    set(GGML_WIN_VER "0xA00" CACHE STRING   "ggml: Windows version")
-endif()
-
 # ggml core
 set(GGML_SCHED_MAX_COPIES  "4" CACHE STRING "ggml: max input copies for pipeline parallelism")
 option(GGML_CPU                             "ggml: enable CPU backend"                        ON)
 
@@ -204,6 +204,10 @@
 #    define GGML_ATTRIBUTE_FORMAT(...) __attribute__((format(printf, __VA_ARGS__)))
 #endif
 
+#if defined(_WIN32) && !defined(_WIN32_WINNT)
+#    define _WIN32_WINNT 0x0A00
+#endif
+
 #include <stdbool.h>
 #include <stddef.h>
 #include <stdint.h>
Original file line number	Diff line number	Diff line change
`@@ -427,7 +427,7 @@ static bool common_params_parse_ex(int argc, char ** argv, common_params_context`
`427`	`427`
`428`	`428`	`// model is required (except for server)`
`429`	`429`	`// TODO @ngxson : maybe show a list of available models in CLI in this case`
`430`		`- if (params.model.path.empty() && ctx_arg.ex != LLAMA_EXAMPLE_SERVER) {`
	`430`	`+ if (params.model.path.empty() && ctx_arg.ex != LLAMA_EXAMPLE_SERVER && !params.usage) {`
`431`	`431`	`throw std::invalid_argument("error: --model is required\n");`
`432`	`432`	`}`
`433`	`433`