Skip to content

Commit b567413

Browse files
committed
feat: add profiling macros for performance measurement in operations
1 parent 8abecfa commit b567413

File tree

2 files changed

+10
-6
lines changed

2 files changed

+10
-6
lines changed

ggml/src/ggml-hexagon/htp/ops-utils.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -146,4 +146,11 @@ static inline void htp_dump_f16(char * pref, const __fp16 * x, uint32_t n) {
146146
}
147147
}
148148

149+
#define PROFILER_START(name) const uint64_t name##_start_cycles = HAP_perf_get_qtimer_count()
150+
#define PROFILER_END(name, ...) \
151+
do { \
152+
const uint64_t name##_end_cycles = HAP_perf_get_qtimer_count(); \
153+
FARF(HIGH, __VA_ARGS__, (unsigned) HAP_perf_qtimer_count_to_us(name##_end_cycles - name##_start_cycles)); \
154+
} while (0)
155+
149156
#endif /* OPS_UTILS_H */

ggml/src/ggml-hexagon/htp/rope-ops.c

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -365,8 +365,7 @@ static void rope_job_f32_per_thread(struct rope_th_ctx * rope_ctx, int nth, int
365365
return;
366366
}
367367

368-
uint64_t t1, t2;
369-
t1 = HAP_perf_get_qtimer_count();
368+
PROFILER_START(rope_job_f32);
370369

371370
int is_aligned = 1;
372371
int opt_path = 0;
@@ -381,10 +380,8 @@ static void rope_job_f32_per_thread(struct rope_th_ctx * rope_ctx, int nth, int
381380

382381
rope_hex_f32(rope_ctx, src0_start_row, src0_end_row, nth, ith, opt_path);
383382

384-
t2 = HAP_perf_get_qtimer_count();
385-
386-
FARF(HIGH, "rope-f32: %d/%d/%d: (%u:%u) usec %u\n", ith, nth, opt_path, src0_start_row, src0_end_row,
387-
(unsigned) HAP_perf_qtimer_count_to_us(t2 - t1));
383+
PROFILER_END(rope_job_f32, "rope-f32: %d/%d/%d: (%u:%u) usec %u\n", ith, nth, opt_path, src0_start_row,
384+
src0_end_row);
388385
}
389386

390387
static void rope_job_dispatcher_f32(unsigned int n, unsigned int i, void * data) {

0 commit comments

Comments
 (0)