Skip to content

Commit 5001dee

Browse files
committed
fix
1 parent f298a83 commit 5001dee

File tree

1 file changed

+4
-5
lines changed

1 file changed

+4
-5
lines changed

src/llama.cpp

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -595,7 +595,6 @@ static struct ggml_tensor * llm_build_kqv(
595595
padded_v = ggml_pad(ctx, v, 0, k->ne[0] - v->ne[1], 0, 0);
596596
cb(padded_v, "padded_v", il);
597597
n_embd_head_v_out = n_embd_head_k;
598-
padded_v = ggml_cont(ctx, padded_v);
599598
}
600599

601600
cur = ggml_flash_attn_ext(ctx, q, k, padded_v, kq_mask, kq_scale, hparams.f_max_alibi_bias,
@@ -604,11 +603,11 @@ static struct ggml_tensor * llm_build_kqv(
604603
ggml_flash_attn_ext_set_prec(cur, GGML_PREC_F32);
605604

606605
if (n_embd_head_v < n_embd_head_k) {
607-
cur = ggml_cont(ctx, cur);
606+
cur = ggml_reshape_3d(ctx, cur, n_embd_head_v, n_head, n_tokens);
608607
cur = ggml_cont(ctx, ggml_view_3d(ctx, cur, n_embd_head_v, n_head, n_tokens,
609-
cur->nb[1],
610-
cur->nb[2],
611-
0));
608+
ggml_row_size(cur->type, n_embd_head_v_out),
609+
ggml_row_size(cur->type, n_embd_head_v_out * n_head),
610+
0));
612611
}
613612

614613
cur = ggml_reshape_2d(ctx, cur, n_embd_head_v*n_head, n_tokens);

0 commit comments

Comments
 (0)