@@ -174,22 +174,22 @@ void getGradientCuda(const PixelData<ImgType> &image, PixelData<float> &local_sc
174174 isErrorDetected = false ;
175175 isErrorDetectedCuda.copyH2D ();
176176 if (image.y_num > 2 ) runBsplineYdir (cudaImage, image.getDimension (), py, boundary, isErrorDetectedCuda.get (), aStream);
177- if (image.x_num > 2 ) runBsplineXdir (cudaImage, image.getDimension (), px, aStream);
178- if (image.z_num > 2 ) runBsplineZdir (cudaImage, image.getDimension (), pz, aStream);
179- isErrorDetectedCuda.copyD2H ();
180- if (isErrorDetected) {
181- throw std::invalid_argument (" integer under-/overflow encountered in CUDA bspline(XYZ)dir - "
182- " try squashing the input image to a narrower range or use APRConverter<float>" );
183- }
184-
185-
186- runKernelGradient (cudaImage, cudaGrad, image.getDimension (), local_scale_temp.getDimension (), par.dx , par.dy , par.dz , aStream);
187-
188- runDownsampleMean (cudaImage, cudalocal_scale_temp, image.x_num , image.y_num , image.z_num , aStream);
189-
190- if (image.y_num > 2 ) runInvBsplineYdir (cudalocal_scale_temp, local_scale_temp.x_num , local_scale_temp.y_num , local_scale_temp.z_num , aStream);
191- if (image.x_num > 2 ) runInvBsplineXdir (cudalocal_scale_temp, local_scale_temp.x_num , local_scale_temp.y_num , local_scale_temp.z_num , aStream);
192- if (image.z_num > 2 ) runInvBsplineZdir (cudalocal_scale_temp, local_scale_temp.x_num , local_scale_temp.y_num , local_scale_temp.z_num , aStream);
177+ if (image.x_num > 2 ) runBsplineXdir (cudaImage, image.getDimension (), px, isErrorDetectedCuda. get (), aStream);
178+ // if (image.z_num > 2) runBsplineZdir(cudaImage, image.getDimension(), pz, aStream);
179+ // isErrorDetectedCuda.copyD2H();
180+ // if (isErrorDetected) {
181+ // throw std::invalid_argument("integer under-/overflow encountered in CUDA bspline(XYZ)dir - "
182+ // "try squashing the input image to a narrower range or use APRConverter<float>");
183+ // }
184+ //
185+ //
186+ // runKernelGradient(cudaImage, cudaGrad, image.getDimension(), local_scale_temp.getDimension(), par.dx, par.dy, par.dz, aStream);
187+ //
188+ // runDownsampleMean(cudaImage, cudalocal_scale_temp, image.x_num, image.y_num, image.z_num, aStream);
189+ //
190+ // if (image.y_num > 2) runInvBsplineYdir(cudalocal_scale_temp, local_scale_temp.x_num, local_scale_temp.y_num, local_scale_temp.z_num, aStream);
191+ // if (image.x_num > 2) runInvBsplineXdir(cudalocal_scale_temp, local_scale_temp.x_num, local_scale_temp.y_num, local_scale_temp.z_num, aStream);
192+ // if (image.z_num > 2) runInvBsplineZdir(cudalocal_scale_temp, local_scale_temp.x_num, local_scale_temp.y_num, local_scale_temp.z_num, aStream);
193193}
194194
195195class CurrentTime {
@@ -361,27 +361,27 @@ public:
361361 splineCudaX, splineCudaY, splineCudaZ, boundary.get (), isErrorDetected, isErrorDetectedCuda,
362362 iBsplineOffset, iParameters, iStream);
363363 time.stop_timer ();
364- time.start_timer (" intensity" );
365- runLocalIntensityScalePipeline (iCpuLevels, iParameters, local_scale_temp.get (), local_scale_temp2.get (), iStream);
366- time.stop_timer ();
367-
368-
369- // Apply parameters from APRConverter:
370- time.start_timer (" runs...." );
371- runThreshold (local_scale_temp2.get (), gradient.get (), iCpuLevels.x_num , iCpuLevels.y_num , iCpuLevels.z_num , iParameters.Ip_th + iBsplineOffset, iStream);
372- runRescaleAndThreshold (local_scale_temp.get (), iCpuLevels.mesh .size (), iParameters.sigma_th , iParameters.sigma_th_max , iStream);
373- runThreshold (gradient.get (), gradient.get (), iCpuLevels.x_num , iCpuLevels.y_num , iCpuLevels.z_num , iParameters.grad_th , iStream);
374- // TODO: automatic parameters are not implemented for GPU pipeline (yet)
375- time.stop_timer ();
376-
377- time.start_timer (" compute lev" );
378- float min_dim = std::min (iParameters.dy , std::min (iParameters.dx , iParameters.dz ));
379- float level_factor = pow (2 , iMaxLevel) * min_dim;
380- const float mult_const = level_factor/iParameters.rel_error ;
381- runComputeLevels (gradient.get (), local_scale_temp.get (), iCpuLevels.mesh .size (), mult_const, iStream);
382- time.stop_timer ();
383- computeOvpcCuda (local_scale_temp.get (), pctc, iAprInfo, iStream);
384- computeLinearStructureCuda (y_vec.get (), pctc, iAprInfo, iParameters, lacs, iStream);
364+ // time.start_timer("intensity");
365+ // runLocalIntensityScalePipeline(iCpuLevels, iParameters, local_scale_temp.get(), local_scale_temp2.get(), iStream);
366+ // time.stop_timer();
367+ //
368+ //
369+ // // Apply parameters from APRConverter:
370+ // time.start_timer("runs....");
371+ // runThreshold(local_scale_temp2.get(), gradient.get(), iCpuLevels.x_num, iCpuLevels.y_num, iCpuLevels.z_num, iParameters.Ip_th + iBsplineOffset, iStream);
372+ // runRescaleAndThreshold(local_scale_temp.get(), iCpuLevels.mesh.size(), iParameters.sigma_th, iParameters.sigma_th_max, iStream);
373+ // runThreshold(gradient.get(), gradient.get(), iCpuLevels.x_num, iCpuLevels.y_num, iCpuLevels.z_num, iParameters.grad_th, iStream);
374+ // // TODO: automatic parameters are not implemented for GPU pipeline (yet)
375+ // time.stop_timer();
376+ //
377+ // time.start_timer("compute lev");
378+ // float min_dim = std::min(iParameters.dy, std::min(iParameters.dx, iParameters.dz));
379+ // float level_factor = pow(2, iMaxLevel) * min_dim;
380+ // const float mult_const = level_factor/iParameters.rel_error;
381+ // runComputeLevels(gradient.get(), local_scale_temp.get(), iCpuLevels.mesh.size(), mult_const, iStream);
382+ // time.stop_timer();
383+ // computeOvpcCuda(local_scale_temp.get(), pctc, iAprInfo, iStream);
384+ // computeLinearStructureCuda(y_vec.get(), pctc, iAprInfo, iParameters, lacs, iStream);
385385 }
386386
387387 ~GpuProcessingTaskImpl () {
@@ -446,7 +446,7 @@ void cudaFilterBsplineFull(PixelData<ImgType> &input, float lambda, float tolera
446446 BsplineParams p = prepareBsplineStuff ((size_t )input.x_num , lambda, tolerance, maxFilterLen);
447447 auto cuda = transferSpline (p, aStream);
448448 auto splineCuda = cuda.first ;
449- runBsplineXdir (cudaInput.get (), input.getDimension (), splineCuda, aStream);
449+ runBsplineXdir (cudaInput.get (), input.getDimension (), splineCuda, error. get (), aStream);
450450 }
451451 if (flags & BSPLINE_Z_DIR) {
452452 BsplineParams p = prepareBsplineStuff ((size_t )input.z_num , lambda, tolerance, maxFilterLen);
0 commit comments