Skip to content

Commit 394ced5

Browse files
adjust kernel selection logic
1 parent 13500e8 commit 394ced5

File tree

1 file changed

+3
-0
lines changed

1 file changed

+3
-0
lines changed

ggml/src/ggml-cuda/fattn.cu

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -306,6 +306,9 @@ static best_fattn_kernel ggml_cuda_get_best_fattn_kernel(const int device, const
306306
while (gqa_ratio % (2*gqa_ratio_eff) == 0 && gqa_ratio_eff < ncols2_max) {
307307
gqa_ratio_eff *= 2;
308308
}
309+
if (Q->ne[1] * gqa_ratio_eff <= 2) {
310+
return BEST_FATTN_KERNEL_VEC;
311+
}
309312
if (Q->ne[1] * gqa_ratio_eff <= 16) {
310313
return BEST_FATTN_KERNEL_TILE; // On Volta tensor cores are only faster for sufficiently large matrices.
311314
}

0 commit comments

Comments
 (0)