Skip to content

Commit 222c9f8

Browse files
tests: add multi-graph test for test_barrier
1 parent 8b7c68f commit 222c9f8

File tree

1 file changed

+80
-0
lines changed

1 file changed

+80
-0
lines changed

tests/test-barrier.cpp

Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -135,6 +135,84 @@ static void test_active(int n_threads, int n_rounds) {
135135
ggml_free(ctx);
136136
}
137137

138+
static void test_multi_graph(int n_threads, int n_rounds) {
139+
struct ggml_init_params params = {
140+
/* .mem_size = */ 1024*1024*1024,
141+
/* .mem_buffer = */ NULL,
142+
/* .no_alloc = */ false,
143+
};
144+
145+
struct ggml_context * ctx = ggml_init(params);
146+
147+
// Create graphs
148+
struct ggml_cgraph * gf0 = ggml_new_graph(ctx);
149+
{
150+
// Small graph with parallel ops with barriers
151+
struct ggml_tensor * out = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, 64);
152+
for (int i = 0; i < 2; i++) {
153+
struct ggml_tensor * a = ggml_new_tensor_2d(ctx, GGML_TYPE_Q4_0, 64, 128);
154+
out = ggml_mul_mat(ctx, a, out);
155+
156+
struct ggml_tensor * d = ggml_new_tensor_2d(ctx, GGML_TYPE_Q4_0, 128, 64);
157+
out = ggml_mul_mat(ctx, d, out);
158+
}
159+
160+
ggml_build_forward_expand(gf0, out);
161+
}
162+
163+
struct ggml_cgraph * gf1 = ggml_new_graph(ctx);
164+
{
165+
// Small graph with parallel ops with barriers
166+
// Use larger tensors to make sure work_data size is larger than gf0
167+
struct ggml_tensor * out = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, 256);
168+
for (int i = 0; i < 4; i++) {
169+
struct ggml_tensor * a = ggml_new_tensor_2d(ctx, GGML_TYPE_Q4_0, 256, 128);
170+
out = ggml_mul_mat(ctx, a, out);
171+
172+
struct ggml_tensor * d = ggml_new_tensor_2d(ctx, GGML_TYPE_Q4_0, 128, 256);
173+
out = ggml_mul_mat(ctx, d, out);
174+
}
175+
176+
ggml_build_forward_expand(gf1, out);
177+
}
178+
179+
180+
// Create threadpool
181+
struct ggml_threadpool_params tpp = ggml_threadpool_params_default(n_threads);
182+
struct ggml_threadpool* threadpool = ggml_threadpool_new(&tpp);
183+
if (!threadpool) {
184+
fprintf(stderr, "threadpool create failed : n_threads %d\n", n_threads);
185+
exit(1);
186+
}
187+
188+
std::cerr << "graph-compute with"
189+
<< "\n gf0 n_nodes: " << ggml_graph_n_nodes(gf0)
190+
<< "\n gf1 n_nodes: " << ggml_graph_n_nodes(gf1)
191+
<< "\n n_threads: " << n_threads
192+
<< "\n n_rounds: " << n_rounds
193+
<< "\n";
194+
195+
// In this test we keep changing the number of threads every 4th iteration
196+
// and we compute two graphs back to back to test graph frequent graph switching
197+
198+
for (int i=0; i < n_rounds; i++) {
199+
struct ggml_cplan cplan0 = ggml_graph_plan(gf0, (i % 4) == 0 ? 1 : n_threads, threadpool);
200+
std::vector<uint8_t> work_data0(cplan0.work_size);
201+
cplan0.work_data = work_data0.data();
202+
203+
struct ggml_cplan cplan1 = ggml_graph_plan(gf1, (i % 4) == 0 ? 1 : n_threads, threadpool);
204+
std::vector<uint8_t> work_data1(cplan1.work_size);
205+
cplan1.work_data = work_data1.data();
206+
207+
ggml_graph_compute(gf0, &cplan0);
208+
ggml_graph_compute(gf1, &cplan1);
209+
}
210+
211+
ggml_threadpool_free(threadpool);
212+
ggml_free(ctx);
213+
}
214+
215+
138216
int main(int argc, char *argv[]) {
139217

140218
int n_threads = std::max(1, std::min(4, (int) std::thread::hardware_concurrency()));
@@ -152,5 +230,7 @@ int main(int argc, char *argv[]) {
152230

153231
test_active(n_threads, n_rounds * 100);
154232

233+
test_multi_graph(n_threads, n_rounds * 10);
234+
155235
return 0;
156236
}

0 commit comments

Comments
 (0)