Skip to content

Commit 2fd9264

Browse files
Update ggml/src/ggml-cuda/solve_tri.cu
Co-authored-by: Johannes Gäßler <johannesg@5d6.de>
1 parent c55b5bf commit 2fd9264

File tree

1 file changed

+1
-1
lines changed

1 file changed

+1
-1
lines changed

ggml/src/ggml-cuda/solve_tri.cu

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,7 @@ static __global__ void solve_tri_f32_fast(const float * __restrict__ A,
7070
for (int row = 0; row < nrows_low; ++row) {
7171
float sum = 0.0f;
7272
if (lane < row) {
73-
sum = fmaf(sA[row * n + lane], x_low, sum);
73+
sum += sA[row * n + lane] * x_low;
7474
}
7575
sum = warp_reduce_sum(sum);
7676

0 commit comments

Comments
 (0)