We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 9d715f4 commit 960bb52Copy full SHA for 960bb52
src/llama-context.cpp
@@ -1387,7 +1387,11 @@ void llama_context::output_reorder() {
1387
//
1388
1389
uint32_t llama_context::graph_max_nodes(uint32_t n_tokens) const {
1390
- if (model.arch == LLM_ARCH_QWEN3NEXT) {
+ if (model.arch == LLM_ARCH_QWEN3NEXT ||
1391
+ model.arch == LLM_ARCH_GRANITE_HYBRID ||
1392
+ model.arch == LLM_ARCH_MAMBA2 ||
1393
+ model.arch == LLM_ARCH_FALCON_H1 ||
1394
+ model.arch == LLM_ARCH_NEMOTRON_H) {
1395
return std::max<uint32_t>(n_tokens * 40, 32u * model.n_tensors());
1396
}
1397
return std::max<uint32_t>(1024u, 8u*model.n_tensors());
0 commit comments