Merge branch 'main' into fsi-aorta

LasNikas · LasNikas · commit fff6a8c4e2f8 · 2025-11-27T07:50:18.000+01:00
diff --git a/Project.toml b/Project.toml
@@ -1,7 +1,7 @@
 name = "TrixiParticles"
 uuid = "66699cd8-9c01-4e9d-a059-b96c86d16b3a"
-authors = ["erik.faulhaber <44124897+efaulhaber@users.noreply.github.com>"]
 version = "0.4.2"
+authors = ["erik.faulhaber <44124897+efaulhaber@users.noreply.github.com>"]
 
 [deps]
 Adapt = "79e6a3ab-5dfb-504d-930d-738a2a938a0e"
@@ -34,7 +34,6 @@ TrixiBase = "9a0f1c46-06d5-4909-a5a3-ce25d3fa3284"
 WriteVTK = "64499a7a-5c06-52f2-abe2-ccb03c286192"
 
 [weakdeps]
-IntervalSets = "8197267c-284f-5f27-9208-e0e47529a953"
 OrdinaryDiffEq = "1dea7af3-3e70-54e6-95c3-0bf5283fa5ed"
 OrdinaryDiffEqCore = "bbf590c4-e513-4bbe-9b18-05decba2e5d8"
 
@@ -51,7 +50,6 @@ FastPow = "0.1"
 FileIO = "1"
 ForwardDiff = "1"
 GPUArraysCore = "0.2"
-IntervalSets = "0.7 - 0.7.11"
 JSON = "1"
 KernelAbstractions = "0.9"
 MuladdMacro = "0.2"
diff --git a/src/general/smoothing_kernels.jl b/src/general/smoothing_kernels.jl
@@ -144,8 +144,7 @@ struct SchoenbergCubicSplineKernel{NDIMS} <: AbstractSmoothingKernel{NDIMS} end
     q = r / h
 
     # We do not use `+=` or `-=` since these are not recognized by MuladdMacro.jl.
-    # Use `//` to preserve the type of `q`.
-    result = 1 // 4 * (2 - q)^3
+    result = 1 * (2 - q)^3 / 4
     result = result - (q < 1) * (1 - q)^3
 
     # Zero out result if q >= 2
@@ -159,8 +158,7 @@ end
     q = r * inner_deriv
 
     # We do not use `+=` or `-=` since these are not recognized by MuladdMacro.jl
-    # Use `//` to preserve the type of `q`.
-    result = -3 // 4 * (2 - q)^2
+    result = -3 * (2 - q)^2 / 4
     result = result + 3 * (q < 1) * (1 - q)^2
 
     # Zero out result if q >= 2
@@ -172,8 +170,8 @@ end
 
 @inline compact_support(::SchoenbergCubicSplineKernel, h) = 2 * h
 
-@inline normalization_factor(::SchoenbergCubicSplineKernel{1}, h) = 2 / 3h
-# `7 * pi` is always `Float64`. `pi * h^2 * 7` preserves the type of `h`.
+# Note that `2 // 3 / h` saves one instruction but is significantly slower on GPUs (for now)
+@inline normalization_factor(::SchoenbergCubicSplineKernel{1}, h) = 2 / (3 * h)
 @inline normalization_factor(::SchoenbergCubicSplineKernel{2}, h) = 10 / (pi * h^2 * 7)
 @inline normalization_factor(::SchoenbergCubicSplineKernel{3}, h) = 1 / (pi * h^3)
 
@@ -262,10 +260,9 @@ end
     return result
 end
 
-@inline compact_support(::SchoenbergQuarticSplineKernel, h) = 5 // 2 * h
+@inline compact_support(::SchoenbergQuarticSplineKernel, h) = 5 * h / 2
 
-@inline normalization_factor(::SchoenbergQuarticSplineKernel{1}, h) = 1 / 24h
-# `1199 * pi` is always `Float64`. `pi * h^2 * 1199` preserves the type of `h`.
+@inline normalization_factor(::SchoenbergQuarticSplineKernel{1}, h) = 1 / (24 * h)
 @inline normalization_factor(::SchoenbergQuarticSplineKernel{2}, h) = 96 / (pi * h^2 * 1199)
 @inline normalization_factor(::SchoenbergQuarticSplineKernel{3}, h) = 1 / (pi * h^3 * 20)
 
@@ -343,15 +340,14 @@ end
 
 @inline compact_support(::SchoenbergQuinticSplineKernel, h) = 3 * h
 
-@inline normalization_factor(::SchoenbergQuinticSplineKernel{1}, h) = 1 / 120h
-# `478 * pi` is always `Float64`. `pi * h^2 * 478` preserves the type of `h`.
+@inline normalization_factor(::SchoenbergQuinticSplineKernel{1}, h) = 1 / (120 * h)
 @inline normalization_factor(::SchoenbergQuinticSplineKernel{2}, h) = 7 / (pi * h^2 * 478)
 @inline normalization_factor(::SchoenbergQuinticSplineKernel{3}, h) = 1 / (pi * h^3 * 120)
 
 abstract type AbstractWendlandKernel{NDIMS} <: AbstractSmoothingKernel{NDIMS} end
 
 # Compact support for all Wendland kernels
-@inline compact_support(::AbstractWendlandKernel, h) = 2h
+@inline compact_support(::AbstractWendlandKernel, h) = 2 * h
 
 @doc raw"""
     WendlandC2Kernel{NDIMS}()
@@ -390,24 +386,19 @@ struct WendlandC2Kernel{NDIMS} <: AbstractWendlandKernel{NDIMS} end
 @fastpow @inline function kernel(kernel::WendlandC2Kernel, r::Real, h)
     q = r / h
 
-    result = (1 - q / 2)^4 * (2q + 1)
+    result = (1 - q / 2)^4 * (2 * q + 1)
 
     # Zero out result if q >= 2
     result = ifelse(q < 2, normalization_factor(kernel, h) * result, zero(q))
 
     return result
 end
 
-@fastpow @muladd @inline function kernel_deriv(kernel::WendlandC2Kernel, r::Real, h)
+@inline function kernel_deriv(kernel::WendlandC2Kernel, r::Real, h)
     inner_deriv = 1 / h
     q = r * inner_deriv
 
-    q1_3 = (1 - q / 2)^3
-    q1_4 = (1 - q / 2)^4
-
-    # We do not use `+=` or `-=` since these are not recognized by MuladdMacro.jl
-    result = -2 * q1_3 * (2q + 1)
-    result = result + q1_4 * 2
+    result = -5 * (1 - q / 2)^3 * q
 
     # Zero out result if q >= 2
     result = ifelse(q < 2,
@@ -416,9 +407,9 @@ end
     return result
 end
 
-@inline normalization_factor(::WendlandC2Kernel{2}, h) = 7 / (pi * h^2) / 4
-# `2 * pi` is always `Float64`. `pi * h^3 * 2` preserves the type of `h`.
-@inline normalization_factor(::WendlandC2Kernel{3}, h) = 21 / (pi * h^3 * 2) / 8
+# Note that `7 // 4` saves one instruction but is significantly slower on GPUs (for now)
+@inline normalization_factor(::WendlandC2Kernel{2}, h) = 7 / (pi * h^2 * 4)
+@inline normalization_factor(::WendlandC2Kernel{3}, h) = 21 / (pi * h^3 * 16)
 
 @doc raw"""
     WendlandC4Kernel{NDIMS}()
@@ -457,21 +448,18 @@ struct WendlandC4Kernel{NDIMS} <: AbstractWendlandKernel{NDIMS} end
 @fastpow @inline function kernel(kernel::WendlandC4Kernel, r::Real, h)
     q = r / h
 
-    result = (1 - q / 2)^6 * (35q^2 / 12 + 3q + 1)
+    result = (1 - q / 2)^6 * (35 * q^2 / 12 + 3 * q + 1)
 
     # Zero out result if q >= 2
     result = ifelse(q < 2, normalization_factor(kernel, h) * result, zero(q))
 
     return result
 end
 
-@fastpow @muladd @inline function kernel_deriv(kernel::WendlandC4Kernel, r::Real, h)
+@fastpow @inline function kernel_deriv(kernel::WendlandC4Kernel, r::Real, h)
     q = r / h
 
-    # Use `//` to preserve the type of `q`
-    term1 = (1 - q / 2)^6 * (3 + 35 // 6 * q)
-    term2 = 3 * (1 - q / 2)^5 * (1 + 3q + 35 // 12 * q^2)
-    derivative = term1 - term2
+    derivative = -7 * q / 3 * (2 + 5 * q) * (1 - q / 2)^5
 
     # Zero out result if q >= 2
     result = ifelse(q < 2, normalization_factor(kernel, h) * derivative / h,
@@ -480,9 +468,8 @@ end
     return result
 end
 
-@inline normalization_factor(::WendlandC4Kernel{2}, h) = 9 / (pi * h^2) / 4
-# `32 * pi` is always `Float64`. `pi * h^2 * 32` preserves the type of `h`.
-@inline normalization_factor(::WendlandC4Kernel{3}, h) = 495 / (pi * h^3 * 32) / 8
+@inline normalization_factor(::WendlandC4Kernel{2}, h) = 9 / (pi * h^2 * 4)
+@inline normalization_factor(::WendlandC4Kernel{3}, h) = 495 / (pi * h^3 * 256)
 
 @doc raw"""
     WendlandC6Kernel{NDIMS}()
@@ -521,7 +508,7 @@ struct WendlandC6Kernel{NDIMS} <: AbstractWendlandKernel{NDIMS} end
 @fastpow @inline function kernel(kernel::WendlandC6Kernel, r::Real, h)
     q = r / h
 
-    result = (1 - q / 2)^8 * (4q^3 + 25q^2 / 4 + 4q + 1)
+    result = (1 - q / 2)^8 * (4 * q^3 + 25 * q^2 / 4 + 4 * q + 1)
 
     # Zero out result if q >= 2
     result = ifelse(q < 2, normalization_factor(kernel, h) * result, zero(q))
@@ -531,9 +518,8 @@ end
 
 @fastpow @muladd @inline function kernel_deriv(kernel::WendlandC6Kernel, r::Real, h)
     q = r / h
-    term1 = -4 * (1 - q / 2)^7 * (4q^3 + 25q^2 / 4 + 4q + 1)
-    term2 = (1 - q / 2)^8 * (12q^2 + 50q / 4 + 4)
-    derivative = term1 + term2
+
+    derivative = -11 * q / 4 * (8 * q^2 + 7 * q + 2) * (1 - q / 2)^7
 
     # Zero out result if q >= 2
     result = ifelse(q < 2, normalization_factor(kernel, h) * derivative / h,
@@ -542,9 +528,8 @@ end
     return result
 end
 
-# `7 * pi` is always `Float64`. `pi * h^2 * 7` preserves the type of `h`.
-@inline normalization_factor(::WendlandC6Kernel{2}, h) = 78 / (pi * h^2 * 7) / 4
-@inline normalization_factor(::WendlandC6Kernel{3}, h) = 1365 / (pi * h^3 * 64) / 8
+@inline normalization_factor(::WendlandC6Kernel{2}, h) = 39 / (pi * h^2 * 14)
+@inline normalization_factor(::WendlandC6Kernel{3}, h) = 1365 / (pi * h^3 * 512)
 
 @doc raw"""
     Poly6Kernel{NDIMS}()
@@ -609,8 +594,8 @@ end
 
 @inline compact_support(::Poly6Kernel, h) = h
 
+# Note that `315 // 64` saves one instruction but is significantly slower on GPUs (for now)
 @inline normalization_factor(::Poly6Kernel{2}, h) = 4 / (pi * h^2)
-# `64 * pi` is always `Float64`. `pi * h^3 * 64` preserves the type of `h`.
 @inline normalization_factor(::Poly6Kernel{3}, h) = 315 / (pi * h^3 * 64)
 
 @doc raw"""
diff --git a/src/schemes/fluid/shifting_techniques.jl b/src/schemes/fluid/shifting_techniques.jl
@@ -410,11 +410,13 @@ end
 # means `compute_v_max=true`
 function v_max(shifting::ParticleShiftingTechnique{<:Any, <:Any, <:Any, true},
                v, system)
-    # This has similar performance to `maximum(..., eachparticle(system))`,
+    # This has similar performance as `maximum(..., eachparticle(system))`,
     # but is GPU-compatible.
-    v_max = maximum(x -> sqrt(dot(x, x)),
-                    reinterpret(reshape, SVector{ndims(system), eltype(v)},
-                                current_velocity(v, system)))
+    v_max2 = maximum(x -> dot(x, x),
+                     reinterpret(reshape, SVector{ndims(system), eltype(v)},
+                                 current_velocity(v, system)))
+    v_max = sqrt(v_max2)
+
     return shifting.v_factor * v_max
 end
 
diff --git a/test/examples/examples.jl b/test/examples/examples.jl
@@ -118,6 +118,7 @@
                                              maxiters=500,
                                              extra_callback=split_integration) [
                 "┌ Warning: Instability detected. Aborting\n",
+                r".*dt was forced below floating point epsilon.*\n",
                 r"└ @ SciMLBase.*\n"
             ]
             @test sol.retcode == ReturnCode.Unstable
diff --git a/test/examples/gpu.jl b/test/examples/gpu.jl
@@ -153,15 +153,17 @@ end
                                                parallelization_backend=Main.parallelization_backend)
 
             # Note that this simulation only takes 42 time steps on the CPU.
-            # TODO This takes 43 time steps on Metal.
+            # TODO This takes 44 time steps on Metal.
             # Maybe related to https://github.com/JuliaGPU/Metal.jl/issues/549
+            ismetal = nameof(typeof(Main.parallelization_backend)) == :MetalBackend
+            maxiters = ismetal ? 44 : 42
             trixi_include_changeprecision(Float32, @__MODULE__,
                                           joinpath(examples_dir(), "fluid",
                                                    "dam_break_3d.jl"),
                                           tspan=(0.0f0, 0.1f0),
                                           fluid_particle_spacing=0.1,
                                           semi=semi_fullgrid,
-                                          maxiters=43)
+                                          maxiters=maxiters)
             @test sol.retcode == ReturnCode.Success
             backend = TrixiParticles.KernelAbstractions.get_backend(sol.u[end].x[1])
             @test backend == Main.parallelization_backend

Original file line number	Diff line number	Diff line change
`@@ -118,6 +118,7 @@`
`118`	`118`	`maxiters=500,`
`119`	`119`	`extra_callback=split_integration) [`
`120`	`120`	`"┌ Warning: Instability detected. Aborting\n",`
	`121`	`+ r".dt was forced below floating point epsilon.\n",`
`121`	`122`	`r"└ @ SciMLBase.*\n"`
`122`	`123`	`]`
`123`	`124`	`@test sol.retcode == ReturnCode.Unstable`