Skip to content

Commit 0040a42

Browse files
committed
Revert "Update llama.cpp"
This reverts commit 4250fb4.
1 parent 4250fb4 commit 0040a42

File tree

1 file changed

+7
-8
lines changed

1 file changed

+7
-8
lines changed

src/llama.cpp

Lines changed: 7 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1439,7 +1439,6 @@ static const std::map<llm_arch, std::map<llm_tensor, std::string>> LLM_TENSOR_NA
14391439
{ LLM_TENSOR_FFN_GATE_SHEXP, "blk.%d.ffn_gate_shexp" },
14401440
{ LLM_TENSOR_FFN_DOWN_SHEXP, "blk.%d.ffn_down_shexp" },
14411441
{ LLM_TENSOR_FFN_UP_SHEXP, "blk.%d.ffn_up_shexp" },
1442-
{ LLM_TENSOR_FFN_EXP_PROBS_B, "blk.%d.exp_probs_b" },
14431442
// NextN/MTP tensors - preserved but unused (in final layer, dynamic layer number)
14441443
{ LLM_TENSOR_NEXTN_EH_PROJ, "blk.%d.eh_proj" },
14451444
{ LLM_TENSOR_NEXTN_EMBED_TOKENS, "blk.%d.embed_tokens" },
@@ -9036,9 +9035,9 @@ static bool llm_load_tensors(
90369035
model.output = create_tensor(ctx_output, tn(LLM_TENSOR_TOKEN_EMBD, "weight"), {n_embd, n_vocab}, llama_model_loader::TENSOR_DUPLICATED);
90379036
}
90389037

9039-
// NextN/MTP tensors (preserved but unused) - only in final layer (46 for Air, 92 for GLM-4.5)
9038+
// --- NextN / MTP tensors (preserved but unused), on the final layer ---
90409039
{
9041-
const int final_layer = n_layer - 1; // NextN tensors are in the last layer only
9040+
const int final_layer = n_layer - 1;
90429041
// EH_PROJ: [2*embd, embd]
90439042
create_tensor(ctx_for_layer(final_layer),
90449043
tn(LLM_TENSOR_NEXTN_EH_PROJ, final_layer),
@@ -9082,9 +9081,9 @@ static bool llm_load_tensors(
90829081
layer.wq = create_tensor(ctx_split, tn(LLM_TENSOR_ATTN_Q, "weight", i), { n_embd, n_embd_head_k * n_head }, 0);
90839082
layer.wk = create_tensor(ctx_split, tn(LLM_TENSOR_ATTN_K, "weight", i), { n_embd, n_embd_k_gqa }, 0);
90849083
layer.wv = create_tensor(ctx_split, tn(LLM_TENSOR_ATTN_V, "weight", i), { n_embd, n_embd_v_gqa }, 0);
9085-
layer.bq = create_tensor(ctx_layer, tn(LLM_TENSOR_ATTN_Q, "bias", i), { n_embd_head_k * n_head }, 0);
9086-
layer.bk = create_tensor(ctx_layer, tn(LLM_TENSOR_ATTN_K, "bias", i), { n_embd_k_gqa }, 0);
9087-
layer.bv = create_tensor(ctx_layer, tn(LLM_TENSOR_ATTN_V, "bias", i), { n_embd_v_gqa }, 0);
9084+
layer.bq = create_tensor(ctx_layer, tn(LLM_TENSOR_ATTN_Q, "bias", i), { n_embd_head_k * n_head }, llama_model_loader::TENSOR_NOT_REQUIRED);
9085+
layer.bk = create_tensor(ctx_layer, tn(LLM_TENSOR_ATTN_K, "bias", i), { n_embd_k_gqa }, llama_model_loader::TENSOR_NOT_REQUIRED);
9086+
layer.bv = create_tensor(ctx_layer, tn(LLM_TENSOR_ATTN_V, "bias", i), { n_embd_v_gqa }, llama_model_loader::TENSOR_NOT_REQUIRED);
90889087

90899088
layer.wo = create_tensor(ctx_split, tn(LLM_TENSOR_ATTN_OUT, "weight", i), { n_embd_head_k * n_head, n_embd }, 0);
90909089

@@ -9107,8 +9106,8 @@ static bool llm_load_tensors(
91079106
create_tensor(ctx_layer, tn(LLM_TENSOR_FFN_GATE_INP, "weight", i), { n_embd, n_expert }, 0);
91089107
// gate bias
91099108
layer.ffn_exp_probs_b =
9110-
create_tensor(ctx_layer, tn(LLM_TENSOR_FFN_EXP_PROBS_B, i), { n_expert },
9111-
0);
9109+
create_tensor(ctx_layer, tn(LLM_TENSOR_FFN_GATE_INP, "bias", i), { n_expert },
9110+
llama_model_loader::TENSOR_NOT_REQUIRED);
91129111

91139112
if (n_expert == 0) {
91149113
GGML_ASSERT(hparams.n_expert > 0 && "n_expert must be > 0 for GLM4_MOE MoE layers");

0 commit comments

Comments
 (0)