Skip to content

Commit f4def9b

Browse files
ikawrakowIwan Kawrakow
andauthored
Don't split the output tensor (#1038)
Co-authored-by: Iwan Kawrakow <iwan.kawrakow@gmail.com>
1 parent b43801a commit f4def9b

File tree

1 file changed

+2
-2
lines changed

1 file changed

+2
-2
lines changed

src/llama-load-tensors.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -388,7 +388,7 @@ void create_tensors_helper::create_std_ffn(int i, const LLM_TN & tn, llama_layer
388388

389389
bool create_tensors_helper::create_llama_tensors(const LLM_TN & tn) {
390390
LOADING_PRELUDE
391-
create_embd_output(tn, n_embd, n_vocab, true, true);
391+
create_embd_output(tn, n_embd, n_vocab, true, false); //true);
392392

393393
for (int i = 0; i < n_layer; ++i) {
394394
ggml_context * ctx_layer = ctx_for_layer(i);
@@ -1843,7 +1843,7 @@ bool create_tensors_helper::create_glm4_moe_tensors(const LLM_TN & tn) {
18431843
GGML_ASSERT(hparams.n_expert > 0 && "n_expert must be > 0 for GLM4_MOE MoE layers");
18441844
GGML_ASSERT(hparams.n_expert_used > 0 && "n_expert_used must be > 0 for GLM4_MOE MoE layers");
18451845

1846-
create_embd_output(tn, n_embd, n_vocab, true, true);
1846+
create_embd_output(tn, n_embd, n_vocab, true, false); //true);
18471847

18481848
for (int i = 0; i < n_layer; ++i) {
18491849
ggml_context * ctx_layer = ctx_for_layer(i);

0 commit comments

Comments
 (0)