We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent e9a02fd commit e324bb0Copy full SHA for e324bb0
ggml/src/ggml-hexagon/htp/rope-ops.c
@@ -344,13 +344,8 @@ static void rope_hex_f32(struct rope_th_ctx * rope_ctx,
344
dst_data_loc += (is_neox ? half_dims : 0);
345
}
346
347
- for (uint32_t i0 = rope_ctx->n_dims; i0 < ne0; i0 += 2) {
348
- dst_data_loc[0] = src_loc[0];
349
- dst_data_loc[1] = src_loc[1];
350
-
351
- src_loc += 2;
352
- dst_data_loc += 2;
353
- }
+ // TODO: use simd to speed up the remaining elements copy
+ memcpy(dst_data_loc, src_loc, (ne0 - rope_ctx->n_dims) * sizeof(float));
354
355
356
0 commit comments