fix: Increase max nodes for models known to use mamba2

gabe-l-hart · gabe-l-hart · commit 960bb52b9d89 · 2025-12-08T13:39:42.000-07:00
Branch: Mamba2SSD

Signed-off-by: Gabe Goodhart &lt;ghart@us.ibm.com&gt;
diff --git a/src/llama-context.cpp b/src/llama-context.cpp
@@ -1387,7 +1387,11 @@ void llama_context::output_reorder() {
 //
 
 uint32_t llama_context::graph_max_nodes(uint32_t n_tokens) const {
-    if (model.arch == LLM_ARCH_QWEN3NEXT) {
+    if (model.arch == LLM_ARCH_QWEN3NEXT ||
+        model.arch == LLM_ARCH_GRANITE_HYBRID ||
+        model.arch == LLM_ARCH_MAMBA2 ||
+        model.arch == LLM_ARCH_FALCON_H1 ||
+        model.arch == LLM_ARCH_NEMOTRON_H) {
         return std::max<uint32_t>(n_tokens * 40, 32u * model.n_tensors());
     }
     return std::max<uint32_t>(1024u, 8u*model.n_tensors());