Skip to content

Commit fff6a8c

Browse files
author
LasNikas
committed
Merge branch 'main' into fsi-aorta
2 parents b9e633c + e3da2b9 commit fff6a8c

File tree

5 files changed

+37
-49
lines changed

5 files changed

+37
-49
lines changed

Project.toml

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
name = "TrixiParticles"
22
uuid = "66699cd8-9c01-4e9d-a059-b96c86d16b3a"
3-
authors = ["erik.faulhaber <44124897+efaulhaber@users.noreply.github.com>"]
43
version = "0.4.2"
4+
authors = ["erik.faulhaber <44124897+efaulhaber@users.noreply.github.com>"]
55

66
[deps]
77
Adapt = "79e6a3ab-5dfb-504d-930d-738a2a938a0e"
@@ -34,7 +34,6 @@ TrixiBase = "9a0f1c46-06d5-4909-a5a3-ce25d3fa3284"
3434
WriteVTK = "64499a7a-5c06-52f2-abe2-ccb03c286192"
3535

3636
[weakdeps]
37-
IntervalSets = "8197267c-284f-5f27-9208-e0e47529a953"
3837
OrdinaryDiffEq = "1dea7af3-3e70-54e6-95c3-0bf5283fa5ed"
3938
OrdinaryDiffEqCore = "bbf590c4-e513-4bbe-9b18-05decba2e5d8"
4039

@@ -51,7 +50,6 @@ FastPow = "0.1"
5150
FileIO = "1"
5251
ForwardDiff = "1"
5352
GPUArraysCore = "0.2"
54-
IntervalSets = "0.7 - 0.7.11"
5553
JSON = "1"
5654
KernelAbstractions = "0.9"
5755
MuladdMacro = "0.2"

src/general/smoothing_kernels.jl

Lines changed: 25 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -144,8 +144,7 @@ struct SchoenbergCubicSplineKernel{NDIMS} <: AbstractSmoothingKernel{NDIMS} end
144144
q = r / h
145145

146146
# We do not use `+=` or `-=` since these are not recognized by MuladdMacro.jl.
147-
# Use `//` to preserve the type of `q`.
148-
result = 1 // 4 * (2 - q)^3
147+
result = 1 * (2 - q)^3 / 4
149148
result = result - (q < 1) * (1 - q)^3
150149

151150
# Zero out result if q >= 2
@@ -159,8 +158,7 @@ end
159158
q = r * inner_deriv
160159

161160
# We do not use `+=` or `-=` since these are not recognized by MuladdMacro.jl
162-
# Use `//` to preserve the type of `q`.
163-
result = -3 // 4 * (2 - q)^2
161+
result = -3 * (2 - q)^2 / 4
164162
result = result + 3 * (q < 1) * (1 - q)^2
165163

166164
# Zero out result if q >= 2
@@ -172,8 +170,8 @@ end
172170

173171
@inline compact_support(::SchoenbergCubicSplineKernel, h) = 2 * h
174172

175-
@inline normalization_factor(::SchoenbergCubicSplineKernel{1}, h) = 2 / 3h
176-
# `7 * pi` is always `Float64`. `pi * h^2 * 7` preserves the type of `h`.
173+
# Note that `2 // 3 / h` saves one instruction but is significantly slower on GPUs (for now)
174+
@inline normalization_factor(::SchoenbergCubicSplineKernel{1}, h) = 2 / (3 * h)
177175
@inline normalization_factor(::SchoenbergCubicSplineKernel{2}, h) = 10 / (pi * h^2 * 7)
178176
@inline normalization_factor(::SchoenbergCubicSplineKernel{3}, h) = 1 / (pi * h^3)
179177

@@ -262,10 +260,9 @@ end
262260
return result
263261
end
264262

265-
@inline compact_support(::SchoenbergQuarticSplineKernel, h) = 5 // 2 * h
263+
@inline compact_support(::SchoenbergQuarticSplineKernel, h) = 5 * h / 2
266264

267-
@inline normalization_factor(::SchoenbergQuarticSplineKernel{1}, h) = 1 / 24h
268-
# `1199 * pi` is always `Float64`. `pi * h^2 * 1199` preserves the type of `h`.
265+
@inline normalization_factor(::SchoenbergQuarticSplineKernel{1}, h) = 1 / (24 * h)
269266
@inline normalization_factor(::SchoenbergQuarticSplineKernel{2}, h) = 96 / (pi * h^2 * 1199)
270267
@inline normalization_factor(::SchoenbergQuarticSplineKernel{3}, h) = 1 / (pi * h^3 * 20)
271268

@@ -343,15 +340,14 @@ end
343340

344341
@inline compact_support(::SchoenbergQuinticSplineKernel, h) = 3 * h
345342

346-
@inline normalization_factor(::SchoenbergQuinticSplineKernel{1}, h) = 1 / 120h
347-
# `478 * pi` is always `Float64`. `pi * h^2 * 478` preserves the type of `h`.
343+
@inline normalization_factor(::SchoenbergQuinticSplineKernel{1}, h) = 1 / (120 * h)
348344
@inline normalization_factor(::SchoenbergQuinticSplineKernel{2}, h) = 7 / (pi * h^2 * 478)
349345
@inline normalization_factor(::SchoenbergQuinticSplineKernel{3}, h) = 1 / (pi * h^3 * 120)
350346

351347
abstract type AbstractWendlandKernel{NDIMS} <: AbstractSmoothingKernel{NDIMS} end
352348

353349
# Compact support for all Wendland kernels
354-
@inline compact_support(::AbstractWendlandKernel, h) = 2h
350+
@inline compact_support(::AbstractWendlandKernel, h) = 2 * h
355351

356352
@doc raw"""
357353
WendlandC2Kernel{NDIMS}()
@@ -390,24 +386,19 @@ struct WendlandC2Kernel{NDIMS} <: AbstractWendlandKernel{NDIMS} end
390386
@fastpow @inline function kernel(kernel::WendlandC2Kernel, r::Real, h)
391387
q = r / h
392388

393-
result = (1 - q / 2)^4 * (2q + 1)
389+
result = (1 - q / 2)^4 * (2 * q + 1)
394390

395391
# Zero out result if q >= 2
396392
result = ifelse(q < 2, normalization_factor(kernel, h) * result, zero(q))
397393

398394
return result
399395
end
400396

401-
@fastpow @muladd @inline function kernel_deriv(kernel::WendlandC2Kernel, r::Real, h)
397+
@inline function kernel_deriv(kernel::WendlandC2Kernel, r::Real, h)
402398
inner_deriv = 1 / h
403399
q = r * inner_deriv
404400

405-
q1_3 = (1 - q / 2)^3
406-
q1_4 = (1 - q / 2)^4
407-
408-
# We do not use `+=` or `-=` since these are not recognized by MuladdMacro.jl
409-
result = -2 * q1_3 * (2q + 1)
410-
result = result + q1_4 * 2
401+
result = -5 * (1 - q / 2)^3 * q
411402

412403
# Zero out result if q >= 2
413404
result = ifelse(q < 2,
@@ -416,9 +407,9 @@ end
416407
return result
417408
end
418409

419-
@inline normalization_factor(::WendlandC2Kernel{2}, h) = 7 / (pi * h^2) / 4
420-
# `2 * pi` is always `Float64`. `pi * h^3 * 2` preserves the type of `h`.
421-
@inline normalization_factor(::WendlandC2Kernel{3}, h) = 21 / (pi * h^3 * 2) / 8
410+
# Note that `7 // 4` saves one instruction but is significantly slower on GPUs (for now)
411+
@inline normalization_factor(::WendlandC2Kernel{2}, h) = 7 / (pi * h^2 * 4)
412+
@inline normalization_factor(::WendlandC2Kernel{3}, h) = 21 / (pi * h^3 * 16)
422413

423414
@doc raw"""
424415
WendlandC4Kernel{NDIMS}()
@@ -457,21 +448,18 @@ struct WendlandC4Kernel{NDIMS} <: AbstractWendlandKernel{NDIMS} end
457448
@fastpow @inline function kernel(kernel::WendlandC4Kernel, r::Real, h)
458449
q = r / h
459450

460-
result = (1 - q / 2)^6 * (35q^2 / 12 + 3q + 1)
451+
result = (1 - q / 2)^6 * (35 * q^2 / 12 + 3 * q + 1)
461452

462453
# Zero out result if q >= 2
463454
result = ifelse(q < 2, normalization_factor(kernel, h) * result, zero(q))
464455

465456
return result
466457
end
467458

468-
@fastpow @muladd @inline function kernel_deriv(kernel::WendlandC4Kernel, r::Real, h)
459+
@fastpow @inline function kernel_deriv(kernel::WendlandC4Kernel, r::Real, h)
469460
q = r / h
470461

471-
# Use `//` to preserve the type of `q`
472-
term1 = (1 - q / 2)^6 * (3 + 35 // 6 * q)
473-
term2 = 3 * (1 - q / 2)^5 * (1 + 3q + 35 // 12 * q^2)
474-
derivative = term1 - term2
462+
derivative = -7 * q / 3 * (2 + 5 * q) * (1 - q / 2)^5
475463

476464
# Zero out result if q >= 2
477465
result = ifelse(q < 2, normalization_factor(kernel, h) * derivative / h,
@@ -480,9 +468,8 @@ end
480468
return result
481469
end
482470

483-
@inline normalization_factor(::WendlandC4Kernel{2}, h) = 9 / (pi * h^2) / 4
484-
# `32 * pi` is always `Float64`. `pi * h^2 * 32` preserves the type of `h`.
485-
@inline normalization_factor(::WendlandC4Kernel{3}, h) = 495 / (pi * h^3 * 32) / 8
471+
@inline normalization_factor(::WendlandC4Kernel{2}, h) = 9 / (pi * h^2 * 4)
472+
@inline normalization_factor(::WendlandC4Kernel{3}, h) = 495 / (pi * h^3 * 256)
486473

487474
@doc raw"""
488475
WendlandC6Kernel{NDIMS}()
@@ -521,7 +508,7 @@ struct WendlandC6Kernel{NDIMS} <: AbstractWendlandKernel{NDIMS} end
521508
@fastpow @inline function kernel(kernel::WendlandC6Kernel, r::Real, h)
522509
q = r / h
523510

524-
result = (1 - q / 2)^8 * (4q^3 + 25q^2 / 4 + 4q + 1)
511+
result = (1 - q / 2)^8 * (4 * q^3 + 25 * q^2 / 4 + 4 * q + 1)
525512

526513
# Zero out result if q >= 2
527514
result = ifelse(q < 2, normalization_factor(kernel, h) * result, zero(q))
@@ -531,9 +518,8 @@ end
531518

532519
@fastpow @muladd @inline function kernel_deriv(kernel::WendlandC6Kernel, r::Real, h)
533520
q = r / h
534-
term1 = -4 * (1 - q / 2)^7 * (4q^3 + 25q^2 / 4 + 4q + 1)
535-
term2 = (1 - q / 2)^8 * (12q^2 + 50q / 4 + 4)
536-
derivative = term1 + term2
521+
522+
derivative = -11 * q / 4 * (8 * q^2 + 7 * q + 2) * (1 - q / 2)^7
537523

538524
# Zero out result if q >= 2
539525
result = ifelse(q < 2, normalization_factor(kernel, h) * derivative / h,
@@ -542,9 +528,8 @@ end
542528
return result
543529
end
544530

545-
# `7 * pi` is always `Float64`. `pi * h^2 * 7` preserves the type of `h`.
546-
@inline normalization_factor(::WendlandC6Kernel{2}, h) = 78 / (pi * h^2 * 7) / 4
547-
@inline normalization_factor(::WendlandC6Kernel{3}, h) = 1365 / (pi * h^3 * 64) / 8
531+
@inline normalization_factor(::WendlandC6Kernel{2}, h) = 39 / (pi * h^2 * 14)
532+
@inline normalization_factor(::WendlandC6Kernel{3}, h) = 1365 / (pi * h^3 * 512)
548533

549534
@doc raw"""
550535
Poly6Kernel{NDIMS}()
@@ -609,8 +594,8 @@ end
609594

610595
@inline compact_support(::Poly6Kernel, h) = h
611596

597+
# Note that `315 // 64` saves one instruction but is significantly slower on GPUs (for now)
612598
@inline normalization_factor(::Poly6Kernel{2}, h) = 4 / (pi * h^2)
613-
# `64 * pi` is always `Float64`. `pi * h^3 * 64` preserves the type of `h`.
614599
@inline normalization_factor(::Poly6Kernel{3}, h) = 315 / (pi * h^3 * 64)
615600

616601
@doc raw"""

src/schemes/fluid/shifting_techniques.jl

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -410,11 +410,13 @@ end
410410
# means `compute_v_max=true`
411411
function v_max(shifting::ParticleShiftingTechnique{<:Any, <:Any, <:Any, true},
412412
v, system)
413-
# This has similar performance to `maximum(..., eachparticle(system))`,
413+
# This has similar performance as `maximum(..., eachparticle(system))`,
414414
# but is GPU-compatible.
415-
v_max = maximum(x -> sqrt(dot(x, x)),
416-
reinterpret(reshape, SVector{ndims(system), eltype(v)},
417-
current_velocity(v, system)))
415+
v_max2 = maximum(x -> dot(x, x),
416+
reinterpret(reshape, SVector{ndims(system), eltype(v)},
417+
current_velocity(v, system)))
418+
v_max = sqrt(v_max2)
419+
418420
return shifting.v_factor * v_max
419421
end
420422

test/examples/examples.jl

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -118,6 +118,7 @@
118118
maxiters=500,
119119
extra_callback=split_integration) [
120120
"┌ Warning: Instability detected. Aborting\n",
121+
r".*dt was forced below floating point epsilon.*\n",
121122
r"└ @ SciMLBase.*\n"
122123
]
123124
@test sol.retcode == ReturnCode.Unstable

test/examples/gpu.jl

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -153,15 +153,17 @@ end
153153
parallelization_backend=Main.parallelization_backend)
154154

155155
# Note that this simulation only takes 42 time steps on the CPU.
156-
# TODO This takes 43 time steps on Metal.
156+
# TODO This takes 44 time steps on Metal.
157157
# Maybe related to https://github.com/JuliaGPU/Metal.jl/issues/549
158+
ismetal = nameof(typeof(Main.parallelization_backend)) == :MetalBackend
159+
maxiters = ismetal ? 44 : 42
158160
trixi_include_changeprecision(Float32, @__MODULE__,
159161
joinpath(examples_dir(), "fluid",
160162
"dam_break_3d.jl"),
161163
tspan=(0.0f0, 0.1f0),
162164
fluid_particle_spacing=0.1,
163165
semi=semi_fullgrid,
164-
maxiters=43)
166+
maxiters=maxiters)
165167
@test sol.retcode == ReturnCode.Success
166168
backend = TrixiParticles.KernelAbstractions.get_backend(sol.u[end].x[1])
167169
@test backend == Main.parallelization_backend

0 commit comments

Comments
 (0)