@@ -230,9 +230,34 @@ inline bool APRConverter<ImageType>::get_apr_method(APR<ImageType> &aAPR, PixelD
230230 method_timer.start_timer (" compute_levels" );
231231 computeLevels (grad_temp, local_scale_temp, (*apr).level_max (), par.rel_error , par.dx , par.dy , par.dz );
232232 method_timer.stop_timer ();
233+
234+ method_timer.start_timer (" initialize_particle_cell_tree" );
235+ iPullingScheme.initialize_particle_cell_tree (aAPR.apr_access );
236+ method_timer.stop_timer ();
237+
238+ method_timer.start_timer (" compute_local_particle_set" );
239+ get_local_particle_cell_set (local_scale_temp, local_scale_temp2);
240+ method_timer.stop_timer ();
241+
242+ method_timer.start_timer (" compute_pulling_scheme" );
243+ iPullingScheme.pulling_scheme_main ();
244+ method_timer.stop_timer ();
245+
246+ method_timer.start_timer (" downsample_pyramid" );
247+ std::vector<PixelData<T>> downsampled_img;
248+ // Down-sample the image for particle intensity estimation
249+ downsamplePyrmaid (input_image, downsampled_img, aAPR.level_max (), aAPR.level_min ());
250+ method_timer.stop_timer ();
251+
252+ method_timer.start_timer (" compute_apr_datastructure" );
253+ aAPR.apr_access .initialize_structure_from_particle_cell_tree (aAPR.parameters , iPullingScheme.getParticleCellTree ());
254+ method_timer.stop_timer ();
255+
256+ method_timer.start_timer (" sample_particles" );
257+ aAPR.get_parts_from_img (downsampled_img,aAPR.particles_intensities );
258+ method_timer.stop_timer ();
233259#else
234260 method_timer.start_timer (" compute_gradient_magnitude_using_bsplines and local instensity scale CUDA" );
235- // getFullPipeline(image_temp, grad_temp, local_scale_temp, local_scale_temp2, bspline_offset, par, (*apr).level_max());
236261 APRTimer t (true );
237262 APRTimer d (true );
238263 t.start_timer (" =========== ALL" );
@@ -241,35 +266,57 @@ inline bool APRConverter<ImageType>::get_apr_method(APR<ImageType> &aAPR, PixelD
241266
242267 std::vector<GpuProcessingTask<ImageType>> gpts;
243268
244- int n = 3 ;
245- int rep = 5 ;
246- for (int i = 0 ; i < n; ++i) {
269+ int numOfStreams = 1 ;
270+ int repetitionsPerStream = 1 ;
271+
272+ // Create streams and send initial task to do
273+ for (int i = 0 ; i < numOfStreams; ++i) {
247274 gpts.emplace_back (GpuProcessingTask<ImageType>(image_temp, local_scale_temp, par, bspline_offset, (*apr).level_max ()));
248275 gpts.back ().sendDataToGpu ();
249276 gpts.back ().processOnGpu ();
250277 }
251278
252- for (int i = 0 ; i < n * rep; ++i) {
253- int c = i % n;
279+ for (int i = 0 ; i < numOfStreams * repetitionsPerStream; ++i) {
280+ int c = i % numOfStreams;
281+
282+ // get data from previous task
254283 gpts[c].getDataFromGpu ();
255284
256285 // in theory we get new data and send them to task
257- if (i < n * (rep - 1 )) { gpts[c].sendDataToGpu ();
258- gpts[c].processOnGpu ();
259- }
260- std::cout << " --------- start CPU processing ---------- " << i << std::endl;
261- d.start_timer (" CPU processing" );
286+ if (i < numOfStreams * (repetitionsPerStream - 1 )) {
287+ gpts[c].sendDataToGpu ();
288+ gpts[c].processOnGpu ();
289+ }
290+
291+ // Postprocess on CPU
292+ std::cout << " --------- start CPU processing ---------- " << i << std::endl;
262293 init_apr (aAPR, input_image);
294+ d.start_timer (" 1" );
263295 iPullingScheme.initialize_particle_cell_tree (aAPR.apr_access );
296+ d.stop_timer ();
297+ d.start_timer (" 2" );
264298 PixelData<float > lst (local_scale_temp, true );
299+ d.stop_timer ();
300+ d.start_timer (" 3" );
265301 get_local_particle_cell_set (lst, local_scale_temp2);
302+ d.stop_timer ();
303+ d.start_timer (" 4" );
266304 iPullingScheme.pulling_scheme_main ();
305+ d.stop_timer ();
306+ d.start_timer (" 5" );
267307 PixelData<T> inImg (input_image, true );
308+ d.stop_timer ();
309+ d.start_timer (" 6" );
268310 std::vector<PixelData<T>> downsampled_img;
269311 downsamplePyrmaid (inImg, downsampled_img, aAPR.level_max (), aAPR.level_min ());
312+ d.stop_timer ();
313+ d.start_timer (" 7" );
270314 aAPR.apr_access .initialize_structure_from_particle_cell_tree (aAPR.parameters , iPullingScheme.getParticleCellTree ());
315+ d.stop_timer ();
316+ d.start_timer (" 8" );
271317 aAPR.get_parts_from_img (downsampled_img, aAPR.particles_intensities );
272318 d.stop_timer ();
319+
273320 }
274321 std::cout << " Total n ENDED" << std::endl;
275322
@@ -278,32 +325,6 @@ inline bool APRConverter<ImageType>::get_apr_method(APR<ImageType> &aAPR, PixelD
278325 method_timer.stop_timer ();
279326#endif
280327
281- // method_timer.start_timer("initialize_particle_cell_tree");
282- // iPullingScheme.initialize_particle_cell_tree(aAPR.apr_access);
283- // method_timer.stop_timer();
284- //
285- // method_timer.start_timer("compute_local_particle_set");
286- // get_local_particle_cell_set(local_scale_temp, local_scale_temp2);
287- // method_timer.stop_timer();
288- //
289- // method_timer.start_timer("compute_pulling_scheme");
290- // iPullingScheme.pulling_scheme_main();
291- // method_timer.stop_timer();
292- //
293- // method_timer.start_timer("downsample_pyramid");
294- // std::vector<PixelData<T>> downsampled_img;
295- // //Down-sample the image for particle intensity estimation
296- // downsamplePyrmaid(input_image, downsampled_img, aAPR.level_max(), aAPR.level_min());
297- // method_timer.stop_timer();
298- //
299- // method_timer.start_timer("compute_apr_datastructure");
300- // aAPR.apr_access.initialize_structure_from_particle_cell_tree(aAPR.parameters, iPullingScheme.getParticleCellTree());
301- // method_timer.stop_timer();
302- //
303- // method_timer.start_timer("sample_particles");
304- // aAPR.get_parts_from_img(downsampled_img,aAPR.particles_intensities);
305- // method_timer.stop_timer();
306-
307328 computation_timer.stop_timer ();
308329
309330 total_timer.stop_timer ();
0 commit comments