Skip to content

Commit e324bb0

Browse files
committed
fix: optimize element copying in rope_hex_f32 using memcpy
1 parent e9a02fd commit e324bb0

File tree

1 file changed

+2
-7
lines changed

1 file changed

+2
-7
lines changed

ggml/src/ggml-hexagon/htp/rope-ops.c

Lines changed: 2 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -344,13 +344,8 @@ static void rope_hex_f32(struct rope_th_ctx * rope_ctx,
344344
dst_data_loc += (is_neox ? half_dims : 0);
345345
}
346346

347-
for (uint32_t i0 = rope_ctx->n_dims; i0 < ne0; i0 += 2) {
348-
dst_data_loc[0] = src_loc[0];
349-
dst_data_loc[1] = src_loc[1];
350-
351-
src_loc += 2;
352-
dst_data_loc += 2;
353-
}
347+
// TODO: use simd to speed up the remaining elements copy
348+
memcpy(dst_data_loc, src_loc, (ne0 - rope_ctx->n_dims) * sizeof(float));
354349
}
355350
}
356351
}

0 commit comments

Comments
 (0)