@@ -144,8 +144,7 @@ struct SchoenbergCubicSplineKernel{NDIMS} <: AbstractSmoothingKernel{NDIMS} end
144144 q = r / h
145145
146146 # We do not use `+=` or `-=` since these are not recognized by MuladdMacro.jl.
147- # Use `//` to preserve the type of `q`.
148- result = 1 // 4 * (2 - q)^ 3
147+ result = 1 * (2 - q)^ 3 / 4
149148 result = result - (q < 1 ) * (1 - q)^ 3
150149
151150 # Zero out result if q >= 2
159158 q = r * inner_deriv
160159
161160 # We do not use `+=` or `-=` since these are not recognized by MuladdMacro.jl
162- # Use `//` to preserve the type of `q`.
163- result = - 3 // 4 * (2 - q)^ 2
161+ result = - 3 * (2 - q)^ 2 / 4
164162 result = result + 3 * (q < 1 ) * (1 - q)^ 2
165163
166164 # Zero out result if q >= 2
172170
173171@inline compact_support (:: SchoenbergCubicSplineKernel , h) = 2 * h
174172
175- @inline normalization_factor ( :: SchoenbergCubicSplineKernel{1} , h) = 2 / 3 h
176- # `7 * pi` is always `Float64`. `pi * h^2 * 7` preserves the type of `h`.
173+ # Note that ` 2 // 3 / h` saves one instruction but is significantly slower on GPUs (for now)
174+ @inline normalization_factor ( :: SchoenbergCubicSplineKernel{1} , h) = 2 / ( 3 * h)
177175@inline normalization_factor (:: SchoenbergCubicSplineKernel{2} , h) = 10 / (pi * h^ 2 * 7 )
178176@inline normalization_factor (:: SchoenbergCubicSplineKernel{3} , h) = 1 / (pi * h^ 3 )
179177
262260 return result
263261end
264262
265- @inline compact_support (:: SchoenbergQuarticSplineKernel , h) = 5 // 2 * h
263+ @inline compact_support (:: SchoenbergQuarticSplineKernel , h) = 5 * h / 2
266264
267- @inline normalization_factor (:: SchoenbergQuarticSplineKernel{1} , h) = 1 / 24 h
268- # `1199 * pi` is always `Float64`. `pi * h^2 * 1199` preserves the type of `h`.
265+ @inline normalization_factor (:: SchoenbergQuarticSplineKernel{1} , h) = 1 / (24 * h)
269266@inline normalization_factor (:: SchoenbergQuarticSplineKernel{2} , h) = 96 / (pi * h^ 2 * 1199 )
270267@inline normalization_factor (:: SchoenbergQuarticSplineKernel{3} , h) = 1 / (pi * h^ 3 * 20 )
271268
@@ -343,15 +340,14 @@ end
343340
344341@inline compact_support (:: SchoenbergQuinticSplineKernel , h) = 3 * h
345342
346- @inline normalization_factor (:: SchoenbergQuinticSplineKernel{1} , h) = 1 / 120 h
347- # `478 * pi` is always `Float64`. `pi * h^2 * 478` preserves the type of `h`.
343+ @inline normalization_factor (:: SchoenbergQuinticSplineKernel{1} , h) = 1 / (120 * h)
348344@inline normalization_factor (:: SchoenbergQuinticSplineKernel{2} , h) = 7 / (pi * h^ 2 * 478 )
349345@inline normalization_factor (:: SchoenbergQuinticSplineKernel{3} , h) = 1 / (pi * h^ 3 * 120 )
350346
351347abstract type AbstractWendlandKernel{NDIMS} <: AbstractSmoothingKernel{NDIMS} end
352348
353349# Compact support for all Wendland kernels
354- @inline compact_support (:: AbstractWendlandKernel , h) = 2 h
350+ @inline compact_support (:: AbstractWendlandKernel , h) = 2 * h
355351
356352@doc raw """
357353 WendlandC2Kernel{NDIMS}()
@@ -390,24 +386,19 @@ struct WendlandC2Kernel{NDIMS} <: AbstractWendlandKernel{NDIMS} end
390386@fastpow @inline function kernel (kernel:: WendlandC2Kernel , r:: Real , h)
391387 q = r / h
392388
393- result = (1 - q / 2 )^ 4 * (2 q + 1 )
389+ result = (1 - q / 2 )^ 4 * (2 * q + 1 )
394390
395391 # Zero out result if q >= 2
396392 result = ifelse (q < 2 , normalization_factor (kernel, h) * result, zero (q))
397393
398394 return result
399395end
400396
401- @fastpow @muladd @ inline function kernel_deriv (kernel:: WendlandC2Kernel , r:: Real , h)
397+ @inline function kernel_deriv (kernel:: WendlandC2Kernel , r:: Real , h)
402398 inner_deriv = 1 / h
403399 q = r * inner_deriv
404400
405- q1_3 = (1 - q / 2 )^ 3
406- q1_4 = (1 - q / 2 )^ 4
407-
408- # We do not use `+=` or `-=` since these are not recognized by MuladdMacro.jl
409- result = - 2 * q1_3 * (2 q + 1 )
410- result = result + q1_4 * 2
401+ result = - 5 * (1 - q / 2 )^ 3 * q
411402
412403 # Zero out result if q >= 2
413404 result = ifelse (q < 2 ,
416407 return result
417408end
418409
419- @inline normalization_factor ( :: WendlandC2Kernel{2} , h) = 7 / ( pi * h ^ 2 ) / 4
420- # `2 * pi` is always `Float64`. ` pi * h^3 * 2` preserves the type of `h`.
421- @inline normalization_factor (:: WendlandC2Kernel{3} , h) = 21 / (pi * h^ 3 * 2 ) / 8
410+ # Note that ` 7 // 4` saves one instruction but is significantly slower on GPUs (for now)
411+ @inline normalization_factor ( :: WendlandC2Kernel{2} , h) = 7 / ( pi * h^ 2 * 4 )
412+ @inline normalization_factor (:: WendlandC2Kernel{3} , h) = 21 / (pi * h^ 3 * 16 )
422413
423414@doc raw """
424415 WendlandC4Kernel{NDIMS}()
@@ -457,21 +448,18 @@ struct WendlandC4Kernel{NDIMS} <: AbstractWendlandKernel{NDIMS} end
457448@fastpow @inline function kernel (kernel:: WendlandC4Kernel , r:: Real , h)
458449 q = r / h
459450
460- result = (1 - q / 2 )^ 6 * (35 q ^ 2 / 12 + 3 q + 1 )
451+ result = (1 - q / 2 )^ 6 * (35 * q ^ 2 / 12 + 3 * q + 1 )
461452
462453 # Zero out result if q >= 2
463454 result = ifelse (q < 2 , normalization_factor (kernel, h) * result, zero (q))
464455
465456 return result
466457end
467458
468- @fastpow @muladd @ inline function kernel_deriv (kernel:: WendlandC4Kernel , r:: Real , h)
459+ @fastpow @inline function kernel_deriv (kernel:: WendlandC4Kernel , r:: Real , h)
469460 q = r / h
470461
471- # Use `//` to preserve the type of `q`
472- term1 = (1 - q / 2 )^ 6 * (3 + 35 // 6 * q)
473- term2 = 3 * (1 - q / 2 )^ 5 * (1 + 3 q + 35 // 12 * q^ 2 )
474- derivative = term1 - term2
462+ derivative = - 7 * q / 3 * (2 + 5 * q) * (1 - q / 2 )^ 5
475463
476464 # Zero out result if q >= 2
477465 result = ifelse (q < 2 , normalization_factor (kernel, h) * derivative / h,
480468 return result
481469end
482470
483- @inline normalization_factor (:: WendlandC4Kernel{2} , h) = 9 / (pi * h^ 2 ) / 4
484- # `32 * pi` is always `Float64`. `pi * h^2 * 32` preserves the type of `h`.
485- @inline normalization_factor (:: WendlandC4Kernel{3} , h) = 495 / (pi * h^ 3 * 32 ) / 8
471+ @inline normalization_factor (:: WendlandC4Kernel{2} , h) = 9 / (pi * h^ 2 * 4 )
472+ @inline normalization_factor (:: WendlandC4Kernel{3} , h) = 495 / (pi * h^ 3 * 256 )
486473
487474@doc raw """
488475 WendlandC6Kernel{NDIMS}()
@@ -521,7 +508,7 @@ struct WendlandC6Kernel{NDIMS} <: AbstractWendlandKernel{NDIMS} end
521508@fastpow @inline function kernel (kernel:: WendlandC6Kernel , r:: Real , h)
522509 q = r / h
523510
524- result = (1 - q / 2 )^ 8 * (4 q ^ 3 + 25 q ^ 2 / 4 + 4 q + 1 )
511+ result = (1 - q / 2 )^ 8 * (4 * q ^ 3 + 25 * q ^ 2 / 4 + 4 * q + 1 )
525512
526513 # Zero out result if q >= 2
527514 result = ifelse (q < 2 , normalization_factor (kernel, h) * result, zero (q))
531518
532519@fastpow @muladd @inline function kernel_deriv (kernel:: WendlandC6Kernel , r:: Real , h)
533520 q = r / h
534- term1 = - 4 * (1 - q / 2 )^ 7 * (4 q^ 3 + 25 q^ 2 / 4 + 4 q + 1 )
535- term2 = (1 - q / 2 )^ 8 * (12 q^ 2 + 50 q / 4 + 4 )
536- derivative = term1 + term2
521+
522+ derivative = - 11 * q / 4 * (8 * q^ 2 + 7 * q + 2 ) * (1 - q / 2 )^ 7
537523
538524 # Zero out result if q >= 2
539525 result = ifelse (q < 2 , normalization_factor (kernel, h) * derivative / h,
542528 return result
543529end
544530
545- # `7 * pi` is always `Float64`. `pi * h^2 * 7` preserves the type of `h`.
546- @inline normalization_factor (:: WendlandC6Kernel{2} , h) = 78 / (pi * h^ 2 * 7 ) / 4
547- @inline normalization_factor (:: WendlandC6Kernel{3} , h) = 1365 / (pi * h^ 3 * 64 ) / 8
531+ @inline normalization_factor (:: WendlandC6Kernel{2} , h) = 39 / (pi * h^ 2 * 14 )
532+ @inline normalization_factor (:: WendlandC6Kernel{3} , h) = 1365 / (pi * h^ 3 * 512 )
548533
549534@doc raw """
550535 Poly6Kernel{NDIMS}()
609594
610595@inline compact_support (:: Poly6Kernel , h) = h
611596
597+ # Note that `315 // 64` saves one instruction but is significantly slower on GPUs (for now)
612598@inline normalization_factor (:: Poly6Kernel{2} , h) = 4 / (pi * h^ 2 )
613- # `64 * pi` is always `Float64`. `pi * h^3 * 64` preserves the type of `h`.
614599@inline normalization_factor (:: Poly6Kernel{3} , h) = 315 / (pi * h^ 3 * 64 )
615600
616601@doc raw """
0 commit comments