@@ -57,6 +57,7 @@ namespace {
5757 }
5858
5959 BsplineParams prepareBsplineStuff (size_t dimLen, float lambda, float tol, int maxFilterLen = -1 ) {
60+
6061 // Recursive Filter Implimentation for Smoothing BSplines
6162 // B-Spline Signal Processing: Part II - Efficient Design and Applications, Unser 1993
6263
@@ -79,8 +80,8 @@ namespace {
7980
8081 const float norm_factor = powf ((1 - 2.0 * rho * cosf (omg) + powf (rho, 2 )), 2 );
8182
82- // std::cout << std::fixed << std::setprecision(9) << "GPU: xi=" << xi << " rho=" << rho << " omg=" << omg << " gamma=" << gamma << " b1=" << b1
83- // << " b2=" << b2 << " k0=" << k0 << " minLen=" << minLen << " norm_factor=" << norm_factor << std::endl;
83+ // std::cout << std::fixed << std::setprecision(9) << "GPU: xi=" << xi << " rho=" << rho << " omg=" << omg << " gamma=" << gamma << " b1=" << b1
84+ // << " b2=" << b2 << " k0=" << k0 << " minLen=" << minLen << " norm_factor=" << norm_factor << " lambda=" << lambda << " tol=" << tol << std::endl;
8485
8586 // ------- Calculating boundary conditions
8687
@@ -169,18 +170,18 @@ void getGradientCuda(const PixelData<ImgType> &image, PixelData<float> &local_sc
169170
170171 // TODO: Used PixelDataDim in all methods below and change input parameter from image to imageDim
171172
172- runBsplineYdir (cudaImage, image.getDimension (), py, boundary, aStream);
173- runBsplineXdir (cudaImage, image.getDimension (), px, aStream);
174- runBsplineZdir (cudaImage, image.getDimension (), pz, aStream);
173+ if (image. y_num > 2 ) runBsplineYdir (cudaImage, image.getDimension (), py, boundary, aStream);
174+ if (image. x_num > 2 ) runBsplineXdir (cudaImage, image.getDimension (), px, aStream);
175+ if (image. z_num > 2 ) runBsplineZdir (cudaImage, image.getDimension (), pz, aStream);
175176
176177
177178 runKernelGradient (cudaImage, cudaGrad, image.getDimension (), local_scale_temp.getDimension (), par.dx , par.dy , par.dz , aStream);
178179
179180 runDownsampleMean (cudaImage, cudalocal_scale_temp, image.x_num , image.y_num , image.z_num , aStream);
180181
181- runInvBsplineYdir (cudalocal_scale_temp, local_scale_temp.x_num , local_scale_temp.y_num , local_scale_temp.z_num , aStream);
182- runInvBsplineXdir (cudalocal_scale_temp, local_scale_temp.x_num , local_scale_temp.y_num , local_scale_temp.z_num , aStream);
183- runInvBsplineZdir (cudalocal_scale_temp, local_scale_temp.x_num , local_scale_temp.y_num , local_scale_temp.z_num , aStream);
182+ if (image. y_num > 2 ) runInvBsplineYdir (cudalocal_scale_temp, local_scale_temp.x_num , local_scale_temp.y_num , local_scale_temp.z_num , aStream);
183+ if (image. x_num > 2 ) runInvBsplineXdir (cudalocal_scale_temp, local_scale_temp.x_num , local_scale_temp.y_num , local_scale_temp.z_num , aStream);
184+ if (image. z_num > 2 ) runInvBsplineZdir (cudalocal_scale_temp, local_scale_temp.x_num , local_scale_temp.y_num , local_scale_temp.z_num , aStream);
184185}
185186
186187class CurrentTime {
@@ -202,6 +203,49 @@ public:
202203};
203204
204205
206+ /* *
207+ * Thresholds output basing on input values. When input is <= thresholdLevel then output is set to 0 and is not changed otherwise.
208+ * @param input
209+ * @param output
210+ * @param length - len of input/output arrays
211+ * @param thresholdLevel
212+ */
213+ template <typename T, typename S>
214+ __global__ void threshold (const T *input, S *output, size_t length, float thresholdLevel) {
215+ size_t idx = (size_t )blockDim .x * blockIdx .x + threadIdx .x ;
216+ if (idx < length) {
217+ if (input[idx] <= thresholdLevel) { output[idx] = 0 ; }
218+ }
219+ }
220+
221+ template <typename ImgType, typename T>
222+ void runThreshold (ImgType *cudaImage, T *cudaGrad, size_t x_num, size_t y_num, size_t z_num, float Ip_th, cudaStream_t aStream) {
223+ dim3 threadsPerBlock (64 );
224+ dim3 numBlocks ((x_num * y_num * z_num + threadsPerBlock.x - 1 )/threadsPerBlock.x );
225+ threshold<<<numBlocks,threadsPerBlock, 0 , aStream>>> (cudaImage, cudaGrad, x_num * y_num * z_num, Ip_th);
226+ };
227+
228+ template <typename T>
229+ __global__ void rescaleAndThreshold (T *data, size_t len, float sigmaThreshold, float sigmaThresholdMax) {
230+ const float max_th = 60000.0 ;
231+ size_t idx = (size_t )blockIdx .x * blockDim .x + threadIdx .x ;
232+ if (idx < len) {
233+ float rescaled = data[idx];
234+ if (rescaled < sigmaThreshold) {
235+ rescaled = (rescaled < sigmaThresholdMax) ? max_th : sigmaThreshold;
236+ }
237+ data[idx] = rescaled;
238+ }
239+ }
240+
241+ template <typename T>
242+ void runRescaleAndThreshold (T *data, size_t len, float sigma, float sigmaMax, cudaStream_t aStream) {
243+ dim3 threadsPerBlock (64 );
244+ dim3 numBlocks ((len + threadsPerBlock.x - 1 ) / threadsPerBlock.x );
245+ rescaleAndThreshold <<< numBlocks, threadsPerBlock, 0 , aStream >>> (data, len, sigma, sigmaMax);
246+ }
247+
248+
205249template <typename U>
206250template <typename ImgType>
207251class GpuProcessingTask <U>::GpuProcessingTaskImpl {
@@ -264,11 +308,11 @@ public:
264308 iMaxLevel (maxLevel),
265309 // TODO: This is wrong and done only for compile. BsplineParams has to be computed seperately for each dimension.
266310 // Should be fixed when other parts of pipeline are ready.
267- params (prepareBsplineStuff((size_t )inputImage.x_num, parameters.lambda, tolerance)),
268- bc1 (params.bc1.get(), params.k0, iStream),
269- bc2 (params.bc2.get(), params.k0, iStream),
270- bc3 (params.bc3.get(), params.k0, iStream),
271- bc4 (params.bc4.get(), params.k0, iStream),
311+ // params(prepareBsplineStuff((size_t)inputImage.x_num, parameters.lambda, tolerance)),
312+ // bc1(params.bc1.get(), params.k0, iStream),
313+ // bc2(params.bc2.get(), params.k0, iStream),
314+ // bc3(params.bc3.get(), params.k0, iStream),
315+ // bc4(params.bc4.get(), params.k0, iStream),
272316 boundaryLen{(2 /* two first elements*/ + 2 /* two last elements */ ) * (size_t )inputImage.x_num * (size_t )inputImage.z_num },
273317 boundary{nullptr , boundaryLen, iStream},
274318 pctc (iAprInfo, iStream),
@@ -317,6 +361,13 @@ public:
317361 splineCudaX, splineCudaY, splineCudaZ, boundary.get (),
318362 iBsplineOffset, iParameters, iStream);
319363 runLocalIntensityScalePipeline (iCpuLevels, iParameters, local_scale_temp.get (), local_scale_temp2.get (), iStream);
364+
365+ // Apply parameters from APRConverter:
366+ runThreshold (local_scale_temp2.get (), gradient.get (), iCpuLevels.x_num , iCpuLevels.y_num , iCpuLevels.z_num , iParameters.Ip_th + iBsplineOffset, iStream);
367+ runRescaleAndThreshold (local_scale_temp.get (), iCpuLevels.mesh .size (), iParameters.sigma_th , iParameters.sigma_th_max , iStream);
368+ runThreshold (gradient.get (), gradient.get (), iCpuLevels.x_num , iCpuLevels.y_num , iCpuLevels.z_num , iParameters.grad_th , iStream);
369+ // TODO: automatic parameters are not implemented for GPU pipeline (yet)
370+
320371 float min_dim = std::min (iParameters.dy , std::min (iParameters.dx , iParameters.dz ));
321372 float level_factor = pow (2 , iMaxLevel) * min_dim;
322373 const float mult_const = level_factor/iParameters.rel_error ;
0 commit comments