@@ -63,53 +63,39 @@ void multiply(celerity::distr_queue queue, celerity::buffer<T, 2> mat_a, celerit
6363int main (int argc, char * argv[]) {
6464 bool verification_passed = true ;
6565
66- celerity::runtime::init (&argc, &argv);
67-
68- int rank;
69- MPI_Comm_rank (MPI_COMM_WORLD, &rank);
70-
71- celerity::experimental::bench::log_user_config ({{" matSize" , std::to_string (MAT_SIZE)}});
72-
73- {
74- celerity::distr_queue queue;
75-
76- auto range = celerity::range<2 >(MAT_SIZE, MAT_SIZE);
77- celerity::buffer<float , 2 > mat_a_buf (range);
78- celerity::buffer<float , 2 > mat_b_buf (range);
79- celerity::buffer<float , 2 > mat_c_buf (range);
80-
81- set_identity (queue, mat_a_buf);
82- set_identity (queue, mat_b_buf);
83-
84- celerity::experimental::bench::begin (" main program" );
85-
86- multiply (queue, mat_a_buf, mat_b_buf, mat_c_buf);
87- multiply (queue, mat_b_buf, mat_c_buf, mat_a_buf);
88-
89- queue.submit (celerity::allow_by_ref, [&](celerity::handler& cgh) {
90- celerity::accessor result{mat_a_buf, cgh, celerity::access::one_to_one{}, celerity::read_only_host_task};
91-
92- cgh.host_task (range, [=, &verification_passed](celerity::partition<2 > part) {
93- celerity::experimental::bench::end (" main program" );
94-
95- auto sr = part.get_subrange ();
96- for (size_t i = sr.offset [0 ]; i < sr.offset [0 ] + sr.range [0 ]; ++i) {
97- for (size_t j = sr.offset [0 ]; j < sr.offset [0 ] + sr.range [0 ]; ++j) {
98- const float kernel_value = result[{i, j}];
99- const float host_value = i == j;
100- if (kernel_value != host_value) {
101- fprintf (stderr, " rank %d: VERIFICATION FAILED for element %zu,%zu: %f (received) != %f (expected)\n " , rank, i, j, kernel_value,
102- host_value);
103- verification_passed = false ;
104- break ;
105- }
66+ celerity::distr_queue queue;
67+
68+ auto range = celerity::range<2 >(MAT_SIZE, MAT_SIZE);
69+ celerity::buffer<float , 2 > mat_a_buf (range);
70+ celerity::buffer<float , 2 > mat_b_buf (range);
71+ celerity::buffer<float , 2 > mat_c_buf (range);
72+
73+ set_identity (queue, mat_a_buf);
74+ set_identity (queue, mat_b_buf);
75+
76+ multiply (queue, mat_a_buf, mat_b_buf, mat_c_buf);
77+ multiply (queue, mat_b_buf, mat_c_buf, mat_a_buf);
78+
79+ queue.submit (celerity::allow_by_ref, [&](celerity::handler& cgh) {
80+ celerity::accessor result{mat_a_buf, cgh, celerity::access::one_to_one{}, celerity::read_only_host_task};
81+
82+ cgh.host_task (range, [=, &verification_passed](celerity::partition<2 > part) {
83+ auto sr = part.get_subrange ();
84+ for (size_t i = sr.offset [0 ]; i < sr.offset [0 ] + sr.range [0 ]; ++i) {
85+ for (size_t j = sr.offset [0 ]; j < sr.offset [0 ] + sr.range [0 ]; ++j) {
86+ const float received = result[{i, j}];
87+ const float expected = float (i == j);
88+ if (expected != received) {
89+ fprintf (stderr, " VERIFICATION FAILED for element %zu,%zu: %f (received) != %f (expected)\n " , i, j, received, expected);
90+ verification_passed = false ;
91+ break ;
10692 }
107- if (!verification_passed) { break ; }
10893 }
109- if (verification_passed) { printf (" rank %d: VERIFICATION PASSED!\n " , rank); }
110- });
94+ if (!verification_passed) { break ; }
95+ }
96+ if (verification_passed) { printf (" VERIFICATION PASSED!\n " ); }
11197 });
112- }
98+ });
11399
114100 return verification_passed ? EXIT_SUCCESS : EXIT_FAILURE;
115101}
0 commit comments