@@ -135,6 +135,84 @@ static void test_active(int n_threads, int n_rounds) {
135135 ggml_free (ctx);
136136}
137137
138+ static void test_multi_graph (int n_threads, int n_rounds) {
139+ struct ggml_init_params params = {
140+ /* .mem_size = */ 1024 *1024 *1024 ,
141+ /* .mem_buffer = */ NULL ,
142+ /* .no_alloc = */ false ,
143+ };
144+
145+ struct ggml_context * ctx = ggml_init (params);
146+
147+ // Create graphs
148+ struct ggml_cgraph * gf0 = ggml_new_graph (ctx);
149+ {
150+ // Small graph with parallel ops with barriers
151+ struct ggml_tensor * out = ggml_new_tensor_1d (ctx, GGML_TYPE_F32, 64 );
152+ for (int i = 0 ; i < 2 ; i++) {
153+ struct ggml_tensor * a = ggml_new_tensor_2d (ctx, GGML_TYPE_Q4_0, 64 , 128 );
154+ out = ggml_mul_mat (ctx, a, out);
155+
156+ struct ggml_tensor * d = ggml_new_tensor_2d (ctx, GGML_TYPE_Q4_0, 128 , 64 );
157+ out = ggml_mul_mat (ctx, d, out);
158+ }
159+
160+ ggml_build_forward_expand (gf0, out);
161+ }
162+
163+ struct ggml_cgraph * gf1 = ggml_new_graph (ctx);
164+ {
165+ // Small graph with parallel ops with barriers
166+ // Use larger tensors to make sure work_data size is larger than gf0
167+ struct ggml_tensor * out = ggml_new_tensor_1d (ctx, GGML_TYPE_F32, 256 );
168+ for (int i = 0 ; i < 4 ; i++) {
169+ struct ggml_tensor * a = ggml_new_tensor_2d (ctx, GGML_TYPE_Q4_0, 256 , 128 );
170+ out = ggml_mul_mat (ctx, a, out);
171+
172+ struct ggml_tensor * d = ggml_new_tensor_2d (ctx, GGML_TYPE_Q4_0, 128 , 256 );
173+ out = ggml_mul_mat (ctx, d, out);
174+ }
175+
176+ ggml_build_forward_expand (gf1, out);
177+ }
178+
179+
180+ // Create threadpool
181+ struct ggml_threadpool_params tpp = ggml_threadpool_params_default (n_threads);
182+ struct ggml_threadpool * threadpool = ggml_threadpool_new (&tpp);
183+ if (!threadpool) {
184+ fprintf (stderr, " threadpool create failed : n_threads %d\n " , n_threads);
185+ exit (1 );
186+ }
187+
188+ std::cerr << " graph-compute with"
189+ << " \n gf0 n_nodes: " << ggml_graph_n_nodes (gf0)
190+ << " \n gf1 n_nodes: " << ggml_graph_n_nodes (gf1)
191+ << " \n n_threads: " << n_threads
192+ << " \n n_rounds: " << n_rounds
193+ << " \n " ;
194+
195+ // In this test we keep changing the number of threads every 4th iteration
196+ // and we compute two graphs back to back to test graph frequent graph switching
197+
198+ for (int i=0 ; i < n_rounds; i++) {
199+ struct ggml_cplan cplan0 = ggml_graph_plan (gf0, (i % 4 ) == 0 ? 1 : n_threads, threadpool);
200+ std::vector<uint8_t > work_data0 (cplan0.work_size );
201+ cplan0.work_data = work_data0.data ();
202+
203+ struct ggml_cplan cplan1 = ggml_graph_plan (gf1, (i % 4 ) == 0 ? 1 : n_threads, threadpool);
204+ std::vector<uint8_t > work_data1 (cplan1.work_size );
205+ cplan1.work_data = work_data1.data ();
206+
207+ ggml_graph_compute (gf0, &cplan0);
208+ ggml_graph_compute (gf1, &cplan1);
209+ }
210+
211+ ggml_threadpool_free (threadpool);
212+ ggml_free (ctx);
213+ }
214+
215+
138216int main (int argc, char *argv[]) {
139217
140218 int n_threads = std::max (1 , std::min (4 , (int ) std::thread::hardware_concurrency ()));
@@ -152,5 +230,7 @@ int main(int argc, char *argv[]) {
152230
153231 test_active (n_threads, n_rounds * 100 );
154232
233+ test_multi_graph (n_threads, n_rounds * 10 );
234+
155235 return 0 ;
156236}
0 commit comments