@@ -595,7 +595,6 @@ static struct ggml_tensor * llm_build_kqv(
595595 padded_v = ggml_pad (ctx, v, 0 , k->ne [0 ] - v->ne [1 ], 0 , 0 );
596596 cb (padded_v, " padded_v" , il);
597597 n_embd_head_v_out = n_embd_head_k;
598- padded_v = ggml_cont (ctx, padded_v);
599598 }
600599
601600 cur = ggml_flash_attn_ext (ctx, q, k, padded_v, kq_mask, kq_scale, hparams.f_max_alibi_bias ,
@@ -604,11 +603,11 @@ static struct ggml_tensor * llm_build_kqv(
604603 ggml_flash_attn_ext_set_prec (cur, GGML_PREC_F32);
605604
606605 if (n_embd_head_v < n_embd_head_k) {
607- cur = ggml_cont (ctx, cur);
606+ cur = ggml_reshape_3d (ctx, cur, n_embd_head_v, n_head, n_tokens );
608607 cur = ggml_cont (ctx, ggml_view_3d (ctx, cur, n_embd_head_v, n_head, n_tokens,
609- cur->nb [ 1 ] ,
610- cur->nb [ 2 ] ,
611- 0 ));
608+ ggml_row_size ( cur->type , n_embd_head_v_out) ,
609+ ggml_row_size ( cur->type , n_embd_head_v_out * n_head) ,
610+ 0 ));
612611 }
613612
614613 cur = ggml_reshape_2d (ctx, cur, n_embd_head_v*n_head, n_tokens);
0 commit comments