@@ -193,11 +193,6 @@ typedef pthread_t ggml_thread_t;
193193#include <TargetConditionals.h>
194194#endif
195195
196- #include <stdatomic.h>
197-
198- static _Atomic uint64_t ggml_op_us [GGML_OP_COUNT ];
199- static _Atomic uint64_t ggml_op_calls [GGML_OP_COUNT ];
200-
201196static const struct ggml_type_traits_cpu type_traits_cpu [GGML_TYPE_COUNT ] = {
202197 [GGML_TYPE_F32 ] = {
203198 .from_float = (ggml_from_float_t ) ggml_cpu_fp32_to_fp32 ,
@@ -2869,44 +2864,6 @@ struct ggml_cplan ggml_graph_plan(
28692864 return cplan ;
28702865}
28712866
2872- // static thread_ret_t ggml_graph_compute_thread(void * data) {
2873- // struct ggml_compute_state * state = (struct ggml_compute_state *) data;
2874- // struct ggml_threadpool * tp = state->threadpool;
2875- //
2876- // const struct ggml_cgraph * cgraph = tp->cgraph;
2877- // const struct ggml_cplan * cplan = tp->cplan;
2878- //
2879- // set_numa_thread_affinity(state->ith);
2880- //
2881- // struct ggml_compute_params params = {
2882- // /*.ith =*/ state->ith,
2883- // /*.nth =*/ atomic_load_explicit(&tp->n_threads_cur, memory_order_relaxed),
2884- // /*.wsize =*/ cplan->work_size,
2885- // /*.wdata =*/ cplan->work_data,
2886- // /*.threadpool=*/ tp,
2887- // };
2888- //
2889- // for (int node_n = 0; node_n < cgraph->n_nodes && atomic_load_explicit(&tp->abort, memory_order_relaxed) != node_n; node_n++) {
2890- // struct ggml_tensor * node = cgraph->nodes[node_n];
2891- //
2892- // ggml_compute_forward(¶ms, node);
2893- //
2894- // if (state->ith == 0 && cplan->abort_callback &&
2895- // cplan->abort_callback(cplan->abort_callback_data)) {
2896- // atomic_store_explicit(&tp->abort, node_n + 1, memory_order_relaxed);
2897- // tp->ec = GGML_STATUS_ABORTED;
2898- // }
2899- //
2900- // if (node_n + 1 < cgraph->n_nodes) {
2901- // ggml_barrier(state->threadpool);
2902- // }
2903- // }
2904- //
2905- // ggml_barrier(state->threadpool);
2906- //
2907- // return 0;
2908- // }
2909-
29102867static thread_ret_t ggml_graph_compute_thread (void * data ) {
29112868 struct ggml_compute_state * state = (struct ggml_compute_state * ) data ;
29122869 struct ggml_threadpool * tp = state -> threadpool ;
@@ -2927,25 +2884,21 @@ static thread_ret_t ggml_graph_compute_thread(void * data) {
29272884 for (int node_n = 0 ; node_n < cgraph -> n_nodes && atomic_load_explicit (& tp -> abort , memory_order_relaxed ) != node_n ; node_n ++ ) {
29282885 struct ggml_tensor * node = cgraph -> nodes [node_n ];
29292886
2930- uint64_t t0 = ggml_time_us ();
29312887 ggml_compute_forward (& params , node );
2932- uint64_t dt = ggml_time_us () - t0 ;
2933-
2934- atomic_fetch_add_explicit (& ggml_op_us [node -> op ], dt , memory_order_relaxed );
2935- atomic_fetch_add_explicit (& ggml_op_calls [node -> op ], 1 , memory_order_relaxed );
29362888
29372889 if (state -> ith == 0 && cplan -> abort_callback &&
29382890 cplan -> abort_callback (cplan -> abort_callback_data )) {
29392891 atomic_store_explicit (& tp -> abort , node_n + 1 , memory_order_relaxed );
29402892 tp -> ec = GGML_STATUS_ABORTED ;
2941- }
2893+ }
29422894
29432895 if (node_n + 1 < cgraph -> n_nodes ) {
29442896 ggml_barrier (state -> threadpool );
29452897 }
29462898 }
29472899
29482900 ggml_barrier (state -> threadpool );
2901+
29492902 return 0 ;
29502903}
29512904
@@ -3248,33 +3201,6 @@ enum ggml_status ggml_graph_compute(struct ggml_cgraph * cgraph, struct ggml_cpl
32483201 ggml_threadpool_free (threadpool );
32493202 }
32503203
3251- // printf("\n========= GGML OP PERF =========\n");
3252- // for (int i = 0; i < GGML_OP_COUNT; i++) {
3253- // uint64_t us = atomic_load(&ggml_op_us[i]);
3254- // uint64_t calls = atomic_load(&ggml_op_calls[i]);
3255- // if (calls == 0) continue;
3256- //
3257- // printf("%-16s : %8llu us %6llu calls avg %6llu us\n",
3258- // ggml_op_name(i),
3259- // (unsigned long long)us,
3260- // (unsigned long long)calls,
3261- // (unsigned long long)(us / calls));
3262- // }
3263- // printf("================================\n\n");
3264-
3265- // printf("\n");
3266- // for (int i = 0; i < GGML_OP_COUNT; i++) {
3267- // uint64_t us = atomic_load(&ggml_op_us[i]);
3268- // uint64_t calls = atomic_load(&ggml_op_calls[i]);
3269- // if (calls == 0) continue;
3270- //
3271- // printf("%-16s,%8llu us,%6llu,%6llu us,",
3272- // ggml_op_name(i),
3273- // (unsigned long long)us,
3274- // (unsigned long long)calls,
3275- // (unsigned long long)(us / calls));
3276- // }
3277-
32783204 return ret ;
32793205}
32803206
0 commit comments