Skip to content

Commit 960bb52

Browse files
committed
fix: Increase max nodes for models known to use mamba2
Branch: Mamba2SSD Signed-off-by: Gabe Goodhart <ghart@us.ibm.com>
1 parent 9d715f4 commit 960bb52

File tree

1 file changed

+5
-1
lines changed

1 file changed

+5
-1
lines changed

src/llama-context.cpp

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1387,7 +1387,11 @@ void llama_context::output_reorder() {
13871387
//
13881388

13891389
uint32_t llama_context::graph_max_nodes(uint32_t n_tokens) const {
1390-
if (model.arch == LLM_ARCH_QWEN3NEXT) {
1390+
if (model.arch == LLM_ARCH_QWEN3NEXT ||
1391+
model.arch == LLM_ARCH_GRANITE_HYBRID ||
1392+
model.arch == LLM_ARCH_MAMBA2 ||
1393+
model.arch == LLM_ARCH_FALCON_H1 ||
1394+
model.arch == LLM_ARCH_NEMOTRON_H) {
13911395
return std::max<uint32_t>(n_tokens * 40, 32u * model.n_tensors());
13921396
}
13931397
return std::max<uint32_t>(1024u, 8u*model.n_tensors());

0 commit comments

Comments
 (0)