@@ -134,7 +134,7 @@ namespace {
134134 };
135135 }
136136
137- auto transferSpline (BsplineParams &aParams, cudaStream_t aStream) {
137+ auto transferSpline (const BsplineParams &aParams, cudaStream_t aStream) {
138138 ScopedCudaMemHandler<float *, H2D> bc1 (aParams.bc1 .get (), aParams.k0 , aStream);
139139 ScopedCudaMemHandler<float *, H2D> bc2 (aParams.bc2 .get (), aParams.k0 , aStream);
140140 ScopedCudaMemHandler<float *, H2D> bc3 (aParams.bc3 .get (), aParams.k0 , aStream);
@@ -267,11 +267,13 @@ class GpuProcessingTask<U>::GpuProcessingTaskImpl {
267267
268268 // bspline stuff
269269 const float tolerance = 0.0001 ;
270- BsplineParams params;
271- ScopedCudaMemHandler<float *, H2D> bc1;
272- ScopedCudaMemHandler<float *, H2D> bc2;
273- ScopedCudaMemHandler<float *, H2D> bc3;
274- ScopedCudaMemHandler<float *, H2D> bc4;
270+ std::pair<BsplineParamsCuda, BsplineParamsCudaMemoryHandlers> cudax;
271+ std::pair<BsplineParamsCuda, BsplineParamsCudaMemoryHandlers> cuday;
272+ std::pair<BsplineParamsCuda, BsplineParamsCudaMemoryHandlers> cudaz;
273+ BsplineParamsCuda splineCudaX;
274+ BsplineParamsCuda splineCudaY;
275+ BsplineParamsCuda splineCudaZ;
276+
275277 const size_t boundaryLen;
276278 ScopedCudaMemHandler<float *, JUST_ALLOC> boundary;
277279
@@ -306,19 +308,18 @@ public:
306308 iAprInfo (iCpuImage.getDimension()),
307309 iBsplineOffset (bspline_offset),
308310 iMaxLevel (maxLevel),
309- // TODO: This is wrong and done only for compile. BsplineParams has to be computed seperately for each dimension.
310- // Should be fixed when other parts of pipeline are ready.
311- // params(prepareBsplineStuff((size_t)inputImage.x_num, parameters.lambda, tolerance)),
312- // bc1(params.bc1.get(), params.k0, iStream),
313- // bc2(params.bc2.get(), params.k0, iStream),
314- // bc3(params.bc3.get(), params.k0, iStream),
315- // bc4(params.bc4.get(), params.k0, iStream),
311+ cudax (transferSpline(prepareBsplineStuff(iCpuImage.x_num, iParameters.lambda, tolerance), iStream)),
312+ cuday (transferSpline(prepareBsplineStuff(iCpuImage.y_num, iParameters.lambda, tolerance), iStream)),
313+ cudaz (transferSpline(prepareBsplineStuff(iCpuImage.z_num, iParameters.lambda, tolerance), iStream)),
316314 boundaryLen{(2 /* two first elements*/ + 2 /* two last elements */ ) * (size_t )inputImage.x_num * (size_t )inputImage.z_num },
317315 boundary{nullptr , boundaryLen, iStream},
318316 pctc (iAprInfo, iStream),
319317 y_vec (nullptr , iAprInfo.getSize(), iStream)
320318 {
321- // std::cout << "\n=============== GpuProcessingTaskImpl ===================\n\n";
319+ splineCudaX = cudax.first ;
320+ splineCudaY = cuday.first ;
321+ splineCudaZ = cudaz.first ;
322+ std::cout << " \n =============== GpuProcessingTaskImpl ===================" << iStream << " \n\n " ;
322323// std::cout << iCpuImage << std::endl;
323324// std::cout << iCpuLevels << std::endl;
324325 }
@@ -332,47 +333,42 @@ public:
332333 }
333334
334335 LinearAccessCudaStructs getDataFromGpu () {
335- // CurrentTime ct;
336- // uint64_t start = ct.microseconds();
337- // local_scale_temp.copyD2H();
338- // checkCuda(cudaStreamSynchronize(iStream));
339- // std::cout << "RCV time: " << ct.microseconds() - start << std::endl;
336+ // TODO: Temporarily turned off here since synchronized already in computeLinearStructureCuda
337+ // checkCuda(cudaStreamSynchronize(iStream));
338+
340339 return std::move (lacs);
341340 }
342341
343342 void processOnGpu () {
344- CurrentTime ct;
343+ // image.copyH2D();
344+ CurrentTime ct{};
345345 uint64_t start = ct.microseconds ();
346346
347- // TODO: temporarily bspline params are generated here
348- // In principle this is OK and correct but would be faster (for processing series of same size images) if
349- // they would be calculated in constructor of GpuProcessingTaskImpl class (once).
350- BsplineParams px = prepareBsplineStuff (iCpuImage.x_num , iParameters.lambda , tolerance);
351- auto cudax = transferSpline (px, iStream);
352- auto splineCudaX = cudax.first ;
353- BsplineParams py = prepareBsplineStuff (iCpuImage.y_num , iParameters.lambda , tolerance);
354- auto cuday = transferSpline (py, iStream);
355- auto splineCudaY = cuday.first ;
356- BsplineParams pz = prepareBsplineStuff (iCpuImage.z_num , iParameters.lambda , tolerance);
357- auto cudaz = transferSpline (pz, iStream);
358- auto splineCudaZ = cudaz.first ;
359-
347+ CudaTimer time (false , " PIPELINE" );
348+ time.start_timer (" getgradient" );
360349 getGradientCuda (iCpuImage, iCpuLevels, image.get (), gradient.get (), local_scale_temp.get (),
361350 splineCudaX, splineCudaY, splineCudaZ, boundary.get (),
362351 iBsplineOffset, iParameters, iStream);
352+ time.stop_timer ();
353+ time.start_timer (" intensity" );
363354 runLocalIntensityScalePipeline (iCpuLevels, iParameters, local_scale_temp.get (), local_scale_temp2.get (), iStream);
355+ time.stop_timer ();
356+
364357
365358 // Apply parameters from APRConverter:
359+ time.start_timer (" runs...." );
366360 runThreshold (local_scale_temp2.get (), gradient.get (), iCpuLevels.x_num , iCpuLevels.y_num , iCpuLevels.z_num , iParameters.Ip_th + iBsplineOffset, iStream);
367361 runRescaleAndThreshold (local_scale_temp.get (), iCpuLevels.mesh .size (), iParameters.sigma_th , iParameters.sigma_th_max , iStream);
368362 runThreshold (gradient.get (), gradient.get (), iCpuLevels.x_num , iCpuLevels.y_num , iCpuLevels.z_num , iParameters.grad_th , iStream);
369363 // TODO: automatic parameters are not implemented for GPU pipeline (yet)
364+ time.stop_timer ();
370365
366+ time.start_timer (" compute lev" );
371367 float min_dim = std::min (iParameters.dy , std::min (iParameters.dx , iParameters.dz ));
372368 float level_factor = pow (2 , iMaxLevel) * min_dim;
373369 const float mult_const = level_factor/iParameters.rel_error ;
374370 runComputeLevels (gradient.get (), local_scale_temp.get (), iCpuLevels.mesh .size (), mult_const, iStream);
375-
371+ time. stop_timer ();
376372 computeOvpcCuda (local_scale_temp.get (), pctc, iAprInfo, iStream);
377373 computeLinearStructureCuda (y_vec.get (), pctc, iAprInfo, iParameters, lacs, iStream);
378374 }
0 commit comments