From 3b195d301ade9f7c1798ef588ea2464781319e78 Mon Sep 17 00:00:00 2001
From: Xuan Son Nguyen <son@huggingface.co>
Date: Thu, 20 Nov 2025 17:31:55 +0100
Subject: [PATCH 1/2] grammar: fix regression caused by #17381

---
 src/llama-grammar.cpp | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/src/llama-grammar.cpp b/src/llama-grammar.cpp
index bdd337e9526..84ea140ac24 100644
--- a/src/llama-grammar.cpp
+++ b/src/llama-grammar.cpp
@@ -347,9 +347,10 @@ const char * llama_grammar_parser::parse_sequence(
     size_t last_sym_start = rule.size();
     const char * pos = src;
 
-    // use UINT64_MAX as the empty value because we aligned to the proper unsigned long type so -1 can't be used
+    // use UINT64_MAX as the empty value because we aligned to the proper uint64_t type so -1 can't be used
     // (though it's technically the same as -1 now)
-    auto handle_repetitions = [&](unsigned long min_times, unsigned long max_times) {
+    // ref: https://github.com/ggml-org/llama.cpp/pull/17381
+    auto handle_repetitions = [&](uint64_t min_times, uint64_t max_times) {
 
         if (last_sym_start == rule.size()) {
             throw std::runtime_error(std::string("expecting preceding item to */+/?/{ at ") + pos);
@@ -377,7 +378,7 @@ const char * llama_grammar_parser::parse_sequence(
             rule.resize(last_sym_start);
         } else {
             // Repeat the previous elements (min_times - 1) times
-            for (unsigned long i = 1; i < min_times; i++) {
+            for (uint64_t i = 1; i < min_times; i++) {
                 rule.insert(rule.end(), prev_rule.begin(), prev_rule.end());
             }
         }
@@ -386,7 +387,7 @@ const char * llama_grammar_parser::parse_sequence(
         auto n_opt = max_times == UINT64_MAX ? 1 : max_times - min_times;
 
         llama_grammar_rule rec_rule(prev_rule);
-        for (unsigned long i = 0; i < n_opt; i++) {
+        for (uint64_t i = 0; i < n_opt; i++) {
             rec_rule.resize(prev_rule.size());
             uint32_t rec_rule_id = generate_symbol_id( rule_name);
             if (i > 0 || max_times == UINT64_MAX) {
@@ -482,10 +483,10 @@ const char * llama_grammar_parser::parse_sequence(
                 throw std::runtime_error(std::string("expecting an int at ") + pos);
             }
             const char * int_end = parse_int(pos);
-            unsigned long min_times = std::stoul(std::string(pos, int_end - pos));
+            uint64_t min_times = std::stoul(std::string(pos, int_end - pos));
             pos = parse_space(int_end, is_nested);
 
-            unsigned long max_times = UINT64_MAX;
+            uint64_t max_times = UINT64_MAX;
 
             if (*pos == '}') {
                 max_times = min_times;
@@ -506,7 +507,7 @@ const char * llama_grammar_parser::parse_sequence(
             } else {
                 throw std::runtime_error(std::string("expecting ',' at ") + pos);
             }
-            if (min_times > MAX_REPETITION_THRESHOLD || (max_times != UINT64_MAX && max_times > MAX_REPETITION_THRESHOLD)) {
+            if (min_times > MAX_REPETITION_THRESHOLD || max_times > MAX_REPETITION_THRESHOLD) {
                 throw std::runtime_error(std::string("number of repetitions exceeds sane defaults, please reduce the number of repetitions"));
             }
             handle_repetitions(min_times, max_times);

From c0b9903a1ae5e32e4bff4fedfacae7a575a1faf7 Mon Sep 17 00:00:00 2001
From: Xuan Son Nguyen <son@huggingface.co>
Date: Thu, 20 Nov 2025 17:45:37 +0100
Subject: [PATCH 2/2] more readable

---
 src/llama-grammar.cpp | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/src/llama-grammar.cpp b/src/llama-grammar.cpp
index 84ea140ac24..b3c5eb57174 100644
--- a/src/llama-grammar.cpp
+++ b/src/llama-grammar.cpp
@@ -349,9 +349,8 @@ const char * llama_grammar_parser::parse_sequence(
 
     // use UINT64_MAX as the empty value because we aligned to the proper uint64_t type so -1 can't be used
     // (though it's technically the same as -1 now)
-    // ref: https://github.com/ggml-org/llama.cpp/pull/17381
     auto handle_repetitions = [&](uint64_t min_times, uint64_t max_times) {
-
+        bool no_max = max_times == UINT64_MAX;
         if (last_sym_start == rule.size()) {
             throw std::runtime_error(std::string("expecting preceding item to */+/?/{ at ") + pos);
         }
@@ -384,14 +383,14 @@ const char * llama_grammar_parser::parse_sequence(
         }
 
         uint32_t last_rec_rule_id = 0;
-        auto n_opt = max_times == UINT64_MAX ? 1 : max_times - min_times;
+        auto n_opt = no_max ? 1 : max_times - min_times;
 
         llama_grammar_rule rec_rule(prev_rule);
         for (uint64_t i = 0; i < n_opt; i++) {
             rec_rule.resize(prev_rule.size());
             uint32_t rec_rule_id = generate_symbol_id( rule_name);
-            if (i > 0 || max_times == UINT64_MAX) {
-                rec_rule.push_back({LLAMA_GRETYPE_RULE_REF, max_times == UINT64_MAX ? rec_rule_id : last_rec_rule_id});
+            if (i > 0 || no_max) {
+                rec_rule.push_back({LLAMA_GRETYPE_RULE_REF, no_max ? rec_rule_id : last_rec_rule_id});
             }
             rec_rule.push_back({LLAMA_GRETYPE_ALT, 0});
             rec_rule.push_back({LLAMA_GRETYPE_END, 0});
@@ -486,7 +485,7 @@ const char * llama_grammar_parser::parse_sequence(
             uint64_t min_times = std::stoul(std::string(pos, int_end - pos));
             pos = parse_space(int_end, is_nested);
 
-            uint64_t max_times = UINT64_MAX;
+            uint64_t max_times = UINT64_MAX; // default: no max limit
 
             if (*pos == '}') {
                 max_times = min_times;
@@ -507,7 +506,8 @@ const char * llama_grammar_parser::parse_sequence(
             } else {
                 throw std::runtime_error(std::string("expecting ',' at ") + pos);
             }
-            if (min_times > MAX_REPETITION_THRESHOLD || max_times > MAX_REPETITION_THRESHOLD) {
+            bool has_max = max_times != UINT64_MAX;
+            if (min_times > MAX_REPETITION_THRESHOLD || (has_max && max_times > MAX_REPETITION_THRESHOLD)) {
                 throw std::runtime_error(std::string("number of repetitions exceeds sane defaults, please reduce the number of repetitions"));
             }
             handle_repetitions(min_times, max_times);