Skip to content

Commit 0121291

Browse files
committed
fix: optimize loop boundaries in rope_hex_f32 for better performance
1 parent e324bb0 commit 0121291

File tree

1 file changed

+3
-10
lines changed

1 file changed

+3
-10
lines changed

ggml/src/ggml-hexagon/htp/rope-ops.c

Lines changed: 3 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -282,23 +282,16 @@ static void rope_hex_f32(struct rope_th_ctx * rope_ctx,
282282
freq_factors = (const float *) src2->data;
283283
}
284284

285-
int ir = 0;
286-
const int32_t half_dims = rope_ctx->n_dims / 2;
285+
const uint32_t i0_end = MIN(ir1, ne1);
286+
const int32_t half_dims = rope_ctx->n_dims / 2;
287287
for (uint32_t i3 = 0; i3 < ne3; i3++) { // batch
288288
for (uint32_t i2 = 0; i2 < ne2; i2++) { // seq-len
289289
const int32_t p = pos[i2];
290290

291291
rope_cache_init(p, rope_ctx->freq_scale, freq_factors, rope_ctx->corr_dims, ne0, rope_ctx->ext_factor,
292292
rope_ctx->attn_factor, wp0, rope_ctx->theta_scale);
293293

294-
for (uint32_t i1 = 0; i1 < ne1; i1++) { // attn-heads
295-
if (ir++ < ir0) {
296-
continue;
297-
}
298-
if (ir > ir1) {
299-
break;
300-
}
301-
294+
for (uint32_t i1 = ir0; i1 < i0_end; i1++) { // attn-heads
302295
const float * src = (float *) ((char *) src0->data + i3 * nb03 + i2 * nb02 + i1 * nb01);
303296
float * dst_data = (float *) ((char *) dst->data + i3 * nb3 + i2 * nb2 + i1 * nb1);
304297

0 commit comments

Comments
 (0)