@@ -234,27 +234,31 @@ inline bool APRConverter<ImageType>::get_apr_method(APR<ImageType> &aAPR, PixelD
234234 method_timer.start_timer (" compute_gradient_magnitude_using_bsplines and local instensity scale CUDA" );
235235// getFullPipeline(image_temp, grad_temp, local_scale_temp, local_scale_temp2, bspline_offset, par, (*apr).level_max());
236236 APRTimer t (true );
237+ APRTimer d (true );
237238 t.start_timer (" =========== ALL" );
238239 {
239240
240241
241242 std::vector<GpuProcessingTask<ImageType>> gpts;
242243
243244 int n = 3 ;
245+ int rep = 5 ;
244246 for (int i = 0 ; i < n; ++i) {
245247 gpts.emplace_back (GpuProcessingTask<ImageType>(image_temp, local_scale_temp, par, bspline_offset, (*apr).level_max ()));
246248 gpts.back ().sendDataToGpu ();
247249 gpts.back ().processOnGpu ();
248250 }
249251
250- for (int i = 0 ; i < n * 2 ; ++i) {
252+ for (int i = 0 ; i < n * rep ; ++i) {
251253 int c = i % n;
252254 gpts[c].getDataFromGpu ();
253255
254256 // in theory we get new data and send them to task
255- gpts[c].sendDataToGpu ();
257+ if (i < n * (rep - 1 )) { gpts[c].sendDataToGpu ();
256258 gpts[c].processOnGpu ();
257-
259+ }
260+ std::cout << " --------- start CPU processing ---------- " << i << std::endl;
261+ d.start_timer (" CPU processing" );
258262 init_apr (aAPR, input_image);
259263 iPullingScheme.initialize_particle_cell_tree (aAPR.apr_access );
260264 PixelData<float > lst (local_scale_temp, true );
@@ -265,6 +269,7 @@ inline bool APRConverter<ImageType>::get_apr_method(APR<ImageType> &aAPR, PixelD
265269 downsamplePyrmaid (inImg, downsampled_img, aAPR.level_max (), aAPR.level_min ());
266270 aAPR.apr_access .initialize_structure_from_particle_cell_tree (aAPR.parameters , iPullingScheme.getParticleCellTree ());
267271 aAPR.get_parts_from_img (downsampled_img, aAPR.particles_intensities );
272+ d.stop_timer ();
268273 }
269274 std::cout << " Total n ENDED" << std::endl;
270275
0 commit comments