Skip to content

Commit 97677f7

Browse files
committed
fix
1 parent f8f5be1 commit 97677f7

File tree

2 files changed

+1
-2
lines changed

2 files changed

+1
-2
lines changed

ggml/src/ggml-cuda/pad.cu

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,6 @@ static void pad_f32_cuda(const float * x, float * dst,
5757
dim3 gridDim(num_blocks, ne1, ne2*ne3);
5858
pad_f32<<<gridDim, CUDA_PAD_BLOCK_SIZE, 0, stream>>>(x, dst, ne0, ne00, ne01, ne02, ne03);
5959
}
60-
#include "ggml-impl.h"
6160

6261
static void pad_f16_cuda(const half * x, half * dst,
6362
const int ne00, const int ne01, const int ne02, const int ne03,

src/llama.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9568,7 +9568,7 @@ struct llama_context * llama_init_from_model(
95689568
}
95699569

95709570
if (params.flash_attn && model->hparams.n_embd_head_k != model->hparams.n_embd_head_v) {
9571-
LLAMA_LOG_WARN("%s: flash_attn requires n_embd_head_k != n_embd_head_v - forcing off\n", __func__);
9571+
LLAMA_LOG_WARN("%s: flash_attn requires n_embd_head_k == n_embd_head_v - forcing off\n", __func__);
95729572
params.flash_attn = false;
95739573
}
95749574

0 commit comments

Comments
 (0)