Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
f195f0c
Make interpolation with different smoothing length work on the GPU
efaulhaber Oct 13, 2025
6633e2d
update
svchb Oct 13, 2025
10c5835
update
svchb Oct 13, 2025
1e368e2
Merge remote-tracking branch 'efaulhaber/interpolation-gpu' into add_…
svchb Oct 13, 2025
c36730e
update
svchb Oct 14, 2025
d94896c
Allow a different eltype only for the coordinates
efaulhaber Oct 15, 2025
410faa8
Merge remote-tracking branch 'efaulhaber/coordinates-eltype' into pap…
svchb Oct 16, 2025
322b9c4
set to parallelUpdate
svchb Oct 16, 2025
9df5f4a
Workaround for postprocess callback
efaulhaber Oct 16, 2025
a9a7844
Merge remote-tracking branch 'efaulhaber/coordinates-eltype' into pap…
svchb Oct 16, 2025
3083bf0
update DS Setup
svchb Oct 17, 2025
7d80fa7
update
svchb Oct 17, 2025
65ca42f
update
svchb Oct 23, 2025
3adc532
update
svchb Oct 27, 2025
915ae69
Merge branch 'main' into paper_test_dam
svchb Oct 27, 2025
ff64b87
Merge branch 'main' into paper_test_dam
svchb Nov 1, 2025
a88f00a
Improve GPU performance with shifting
efaulhaber Nov 5, 2025
3abae10
Improve update callback timers
efaulhaber Nov 5, 2025
0547955
Merge branch 'main' into shifting-gpu-performance
efaulhaber Nov 11, 2025
e852aac
Merge branch 'main' into shifting-gpu-performance
efaulhaber Nov 12, 2025
9e6533c
Merge branch 'main' into shifting-gpu-performance
efaulhaber Nov 17, 2025
0e90d26
Fix TVF
efaulhaber Nov 17, 2025
2795dea
Merge branch 'shifting-gpu-performance' of github.com:efaulhaber/Trix…
efaulhaber Nov 17, 2025
1fc0359
Merge branch 'main' into paper_test_dam
svchb Nov 17, 2025
e7f54cc
Merge branch 'pr-974' into paper_test_dam
svchb Nov 17, 2025
9c9eb37
Merge branch 'main' into paper_test_dam
svchb Nov 25, 2025
e96abe2
Merge branch 'paper_test_dam' of https://github.com/svchb/TrixiPartic…
svchb Nov 25, 2025
adc4819
update
svchb Nov 25, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions Project.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
name = "TrixiParticles"
uuid = "66699cd8-9c01-4e9d-a059-b96c86d16b3a"
authors = ["erik.faulhaber <44124897+efaulhaber@users.noreply.github.com>"]
version = "0.4.2"
authors = ["erik.faulhaber <44124897+efaulhaber@users.noreply.github.com>"]

[deps]
Adapt = "79e6a3ab-5dfb-504d-930d-738a2a938a0e"
Expand Down Expand Up @@ -51,7 +51,7 @@ FastPow = "0.1"
FileIO = "1"
ForwardDiff = "1"
GPUArraysCore = "0.2"
IntervalSets = "0.7 - 0.7.11"
IntervalSets = "0.7.13"
JSON = "1"
KernelAbstractions = "0.9"
MuladdMacro = "0.2"
Expand Down
27 changes: 18 additions & 9 deletions examples/fluid/dam_break_2d.jl
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ tspan = (0.0, 5.7 / sqrt(gravity / H))

# Boundary geometry and initial fluid particle positions
initial_fluid_size = (W, H)
tank_size = (floor(5.366 * H / boundary_particle_spacing) * boundary_particle_spacing, 4.0)
tank_size = (floor(5.366 * H / boundary_particle_spacing) * boundary_particle_spacing, 2.0+10*fluid_particle_spacing)

fluid_density = 1000.0
sound_speed = 20 * sqrt(gravity * H)
Expand All @@ -61,16 +61,17 @@ viscosity_fluid = ArtificialViscosityMonaghan(alpha=alpha, beta=0.0)
# The density diffusion model by Molteni and Colagrossi shows unphysical effects at the
# free surface in long-running simulations, but is significantly faster than the model
# by Antuono. This simulation is short enough to use the faster model.
density_diffusion = DensityDiffusionMolteniColagrossi(delta=0.1)
# density_diffusion = DensityDiffusionAntuono(tank.fluid, delta=0.1)
# density_diffusion = DensityDiffusionMolteniColagrossi(delta=0.1)
density_diffusion = DensityDiffusionAntuono(tank.fluid, delta=0.1)

fluid_system = WeaklyCompressibleSPHSystem(tank.fluid, fluid_density_calculator,
state_equation, smoothing_kernel,
smoothing_length, viscosity=viscosity_fluid,
density_diffusion=density_diffusion,
acceleration=(0.0, -gravity), correction=nothing,
surface_tension=nothing,
reference_particle_spacing=0)
reference_particle_spacing=fluid_particle_spacing,
shifting_technique=nothing)

# ==========================================================================================
# ==== Boundary
Expand All @@ -83,7 +84,7 @@ boundary_model = BoundaryModelDummyParticles(tank.boundary.density, tank.boundar
boundary_density_calculator,
smoothing_kernel, smoothing_length,
correction=nothing,
reference_particle_spacing=0,
reference_particle_spacing=fluid_particle_spacing,
viscosity=viscosity_wall)

boundary_system = WallBoundarySystem(tank.boundary, boundary_model,
Expand All @@ -93,15 +94,17 @@ boundary_system = WallBoundarySystem(tank.boundary, boundary_model,
# ==== Simulation
# `nothing` will automatically choose the best update strategy. This is only to be able
# to change this with `trixi_include`.
semi = Semidiscretization(fluid_system, boundary_system,
extra_system = nothing
extra_system2 = nothing
semi = Semidiscretization(fluid_system, boundary_system, extra_system, extra_system2,
neighborhood_search=GridNeighborhoodSearch{2}(update_strategy=nothing),
parallelization_backend=PolyesterBackend())
ode = semidiscretize(semi, tspan)

info_callback = InfoCallback(interval=100)

solution_prefix = ""
saving_callback = SolutionSavingCallback(dt=0.02, prefix=solution_prefix)
saving_callback = SolutionSavingCallback(dt=0.01, prefix=solution_prefix)

# This can be overwritten with `trixi_include`
extra_callback = nothing
Expand All @@ -117,6 +120,12 @@ callbacks = CallbackSet(info_callback, saving_callback, stepsize_callback, extra
extra_callback2, density_reinit_cb)

time_integration_scheme = CarpenterKennedy2N54(williamson_condition=false)
sol = solve(ode, time_integration_scheme,
dt=1.0, # This is overwritten by the stepsize callback
# sol = solve(ode, time_integration_scheme,
# dt=1.0, # This is overwritten by the stepsize callback
# save_everystep=false, callback=callbacks);
sol = solve(ode, RDPK3SpFSAL35(),
abstol=1e-5, # Default abstol is 1e-6 (may need to be tuned to prevent boundary penetration)
reltol=1e-4, # Default reltol is 1e-3 (may need to be tuned to prevent boundary penetration)
dtmax=1e-2, # Limit stepsize to prevent crashing
maxiters=1e7,
save_everystep=false, callback=callbacks);
6 changes: 4 additions & 2 deletions examples/fluid/dam_break_2d_gpu.jl
Original file line number Diff line number Diff line change
Expand Up @@ -36,9 +36,11 @@ trixi_include(@__MODULE__,
joinpath(examples_dir(), "fluid", "dam_break_2d.jl"),
neighborhood_search=neighborhood_search,
fluid_particle_spacing=fluid_particle_spacing,
tspan=tspan, smoothing_length=smoothing_length,
tspan=tspan,
smoothing_length=smoothing_length,
density_diffusion=density_diffusion,
boundary_layers=boundary_layers, spacing_ratio=spacing_ratio,
boundary_layers=boundary_layers,
spacing_ratio=spacing_ratio,
boundary_model=boundary_model,
parallelization_backend=PolyesterBackend(),
boundary_density_calculator=boundary_density_calculator)
38 changes: 38 additions & 0 deletions examples/fluid/dam_break_ds.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
using TrixiParticles
using CUDA

fluid_particle_spacing = 0.001

# Load setup from dam break example
trixi_include(@__MODULE__,
joinpath(examples_dir(), "fluid", "dam_break_2d.jl"),
fluid_particle_spacing=fluid_particle_spacing,
tank_size=(4.0, 3.0), W=1.0, H=2.0,
spacing_ratio=1, boundary_layers=1,
sol=nothing, ode=nothing)

tank.fluid.coordinates .+= 0.005
tank.boundary.coordinates .+= 0.005

# Define a GPU-compatible neighborhood search
min_corner = minimum(tank.boundary.coordinates, dims=2) .- 1
max_corner = maximum(tank.boundary.coordinates, dims=2) .+ 1
cell_list = FullGridCellList(; min_corner, max_corner, max_points_per_cell=30)
neighborhood_search = GridNeighborhoodSearch{2}(; cell_list,
update_strategy=ParallelUpdate())

# Run the dam break simulation with this neighborhood search
trixi_include(@__MODULE__,
joinpath(examples_dir(), "fluid", "dam_break_2d.jl"),
tank=tank,
smoothing_length=1.414216 * fluid_particle_spacing,
time_integration_scheme=SymplecticPositionVerlet(),
boundary_density_calculator=ContinuityDensity(),
fluid_particle_spacing=fluid_particle_spacing,
tank_size=(4.0, 3.0), W=1.0, H=2.0,
spacing_ratio=1, boundary_layers=1,
tspan=(0.0, 0.05), cfl=0.2,
neighborhood_search=neighborhood_search,
viscosity_wall=viscosity_fluid,
dt=1e-5, stepsize_callback=nothing, saving_callback=nothing,
parallelization_backend=CUDABackend())
42 changes: 42 additions & 0 deletions examples/paper/dam_break_2d_val_600.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
# This file computes the pressure sensor data of the dam break setup described in
#
# J. J. De Courcy, T. C. S. Rendall, L. Constantin, B. Titurus, J. E. Cooper.
# "Incompressible δ-SPH via artificial compressibility".
# In: Computer Methods in Applied Mechanics and Engineering, Volume 420 (2024),
# https://doi.org/10.1016/j.cma.2023.116700

using TrixiParticles
using TrixiParticles.JSON
using CUDA

# When using data center CPUs with large numbers of cores, especially on multi-socket
# systems with multiple NUMA nodes, pinning threads to cores can significantly
# improve performance, even for low resolutions.
# using ThreadPinning
# pinthreads(:numa)

# `resolution` in this case is set relative to `H`, the initial height of the fluid.
# Use 40, 80 or 400 for validation.
# Note: 400 takes about 30 minutes on a large data center CPU (much longer with serial update)
resolution = 600

min_corner = (-1.5, -1.5)
max_corner = (6.5, 5.0)
cell_list = FullGridCellList(; min_corner, max_corner, max_points_per_cell=30)

neighborhood_search = GridNeighborhoodSearch{2}(; cell_list, update_strategy=ParallelUpdate())
# neighborhood_search = GridNeighborhoodSearch{2}(; cell_list)


# ==========================================================================================
# ==== WCSPH simulation
trixi_include_changeprecision(Float32, @__MODULE__,
joinpath(validation_dir(), "dam_break_2d",
"setup_marrone_2011.jl"),
use_edac=false,
particles_per_height=resolution,
sound_speed=50 * sqrt(9.81 * 0.6), # This is used by De Courcy et al. (2024)
alpha=0.01, # This is used by De Courcy et al. (2024)
tspan=(0.0, 7 / sqrt(9.81 / 0.6)), # This is used by De Courcy et al. (2024)
parallelization_backend=CUDABackend(),
neighborhood_search=neighborhood_search)
49 changes: 49 additions & 0 deletions examples/paper/dam_break_2d_val_600_phys_viscosity.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
# This file computes the pressure sensor data of the dam break setup described in
#
# J. J. De Courcy, T. C. S. Rendall, L. Constantin, B. Titurus, J. E. Cooper.
# "Incompressible δ-SPH via artificial compressibility".
# In: Computer Methods in Applied Mechanics and Engineering, Volume 420 (2024),
# https://doi.org/10.1016/j.cma.2023.116700

using TrixiParticles
using TrixiParticles.JSON
using CUDA

# When using data center CPUs with large numbers of cores, especially on multi-socket
# systems with multiple NUMA nodes, pinning threads to cores can significantly
# improve performance, even for low resolutions.
# using ThreadPinning
# pinthreads(:numa)

# `resolution` in this case is set relative to `H`, the initial height of the fluid.
# Use 40, 80 or 400 for validation.
# Note: 400 takes about 30 minutes on a large data center CPU (much longer with serial update)
resolution = 600

min_corner = (-1.5, -1.5)
max_corner = (6.5, 5.0)
cell_list = FullGridCellList(; min_corner, max_corner, max_points_per_cell=30)

neighborhood_search = GridNeighborhoodSearch{2}(; cell_list, update_strategy=ParallelUpdate())
# neighborhood_search = GridNeighborhoodSearch{2}(; cell_list)

# switch to physical viscosity model
viscosity_fluid = ViscosityMorris(nu = 8.9E-7)
# viscosity_fluid = ViscosityAdami(nu = 8.9E-7)
# viscosity_fluid = nothing


# ==========================================================================================
# ==== WCSPH simulation
trixi_include(@__MODULE__,
joinpath(validation_dir(), "dam_break_2d",
"setup_marrone_2011.jl"),
use_edac=false,
extra_string = "_phys_viscosity",
viscosity_fluid=viscosity_fluid,
particles_per_height=resolution,
sound_speed=50 * sqrt(9.81 * 0.6), # This is used by De Courcy et al. (2024)
tspan=(0.0, 7 / sqrt(9.81 / 0.6)), # This is used by De Courcy et al. (2024)
# tspan=(0.0, 0.0001), # This is used by De Courcy et al. (2024)
parallelization_backend=CUDABackend(),
neighborhood_search=neighborhood_search)
131 changes: 131 additions & 0 deletions examples/paper/dam_break_2d_val_600_phys_viscosity_2ph.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,131 @@
# This file computes the pressure sensor data of the dam break setup described in
#
# J. J. De Courcy, T. C. S. Rendall, L. Constantin, B. Titurus, J. E. Cooper.
# "Incompressible δ-SPH via artificial compressibility".
# In: Computer Methods in Applied Mechanics and Engineering, Volume 420 (2024),
# https://doi.org/10.1016/j.cma.2023.116700

using TrixiParticles
using TrixiParticles.JSON
using CUDA

# When using data center CPUs with large numbers of cores, especially on multi-socket
# systems with multiple NUMA nodes, pinning threads to cores can significantly
# improve performance, even for low resolutions.
# using ThreadPinning
# pinthreads(:numa)

#TODO: duplicated
# Size parameters
H = 0.6
# W = 2 * H

# `resolution` in this case is set relative to `H`, the initial height of the fluid.
# Use 40, 80 or 400 for validation.
# Note: 400 takes about 30 minutes on a large data center CPU (much longer with serial update)
resolution = 200

#TODO: duplicated
fluid_particle_spacing = H / resolution

trixi_include(@__MODULE__, joinpath(examples_dir(), "fluid", "dam_break_2d.jl"),
fluid_particle_spacing=fluid_particle_spacing,
sol=nothing,
ode=nothing)

# tank_size = (floor(5.366 * H / fluid_particle_spacing) * fluid_particle_spacing, 4.0)


min_corner = (-1.5, -1.5)
max_corner = (6.5, 5.0)
cell_list = FullGridCellList(; min_corner, max_corner, max_points_per_cell=30)

neighborhood_search = GridNeighborhoodSearch{2}(; cell_list, update_strategy=ParallelUpdate())
# neighborhood_search = GridNeighborhoodSearch{2}(; cell_list)

# physical values
nu_water = 8.9E-7*10
nu_air = 1.544E-5*10

# switch to physical viscosity model
viscosity_fluid = ViscosityMorris(nu = nu_water)
# viscosity_fluid = ViscosityAdami(nu = 8.9E-7)
# viscosity_fluid = nothing

# set air viscosity model
viscosity_air = ViscosityMorris(nu = nu_air)

#TODO: duplicated
smoothing_length = 2 * fluid_particle_spacing
smoothing_kernel = WendlandC2Kernel{2}()
fluid_density_calculator = ContinuityDensity()


# ==========================================================================================
# ==== Setup air_system layer

air_size = (tank_size[1], tank_size[2]-H)
air_size2 = (tank_size[1] - W, tank_size[2])
air_density = 1.0

# Air above the initial water
air_system = RectangularShape(fluid_particle_spacing,
round.(Int, air_size ./ fluid_particle_spacing),
zeros(length(air_size)), density=air_density)

# Air for the rest of the empty volume
air_system2 = RectangularShape(fluid_particle_spacing,
round.(Int, air_size2 ./ fluid_particle_spacing),
(W, 0.0), density=air_density)

# move on top of the water
for i in axes(air_system.coordinates, 2)
air_system.coordinates[:, i] .+= [0.0, H]
end

air_system = union(air_system, air_system2)

#TODO: duplicated
sound_speed=50 * sqrt(9.81 * 0.6)
gravity = 9.81

air_eos = StateEquationCole(; sound_speed, reference_density=air_density, exponent=1,
clip_negative_pressure=false, background_pressure=10.0)

air_system_system = WeaklyCompressibleSPHSystem(air_system, fluid_density_calculator,
air_eos, smoothing_kernel, smoothing_length,
viscosity=viscosity_air,
acceleration=(0.0, -gravity))

tank_air_density = fill!(similar(tank.boundary.density), air_density)
tank_air_mass = fill!(similar(tank.boundary.mass), air_density*fluid_particle_spacing^2)

air_boundary_model = BoundaryModelDummyParticles(tank_air_density,
tank_air_mass,
state_equation=air_eos,
boundary_density_calculator,
smoothing_kernel,
smoothing_length,
correction=nothing,
reference_particle_spacing=fluid_particle_spacing,
viscosity=viscosity_wall)

air_boundary_system = WallBoundarySystem(tank.boundary, air_boundary_model,
adhesion_coefficient=0.0)


trixi_include(@__MODULE__,
joinpath(validation_dir(), "dam_break_2d",
"setup_marrone_2011.jl"),
use_edac=false,
extra_string = "_phys_viscosity_2ph_v2_pa10",
viscosity_fluid=viscosity_fluid,
particles_per_height=resolution,
sound_speed=50 * sqrt(9.81 * 0.6), # This is used by De Courcy et al. (2024) (120)
tspan=(0.0, 7 / sqrt(9.81 / 0.6)), # This is used by De Courcy et al. (2024)
# tspan=(0.0, 0.0001),
cfl=0.5,
parallelization_backend=PolyesterBackend(),
neighborhood_search=neighborhood_search,
extra_system=air_system_system,
extra_system2=air_boundary_system)
17 changes: 17 additions & 0 deletions examples/paper/dam_break_2d_val_600_phys_viscosity_2ph_gpu.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
# This file computes the pressure sensor data of the dam break setup described in
#
# J. J. De Courcy, T. C. S. Rendall, L. Constantin, B. Titurus, J. E. Cooper.
# "Incompressible δ-SPH via artificial compressibility".
# In: Computer Methods in Applied Mechanics and Engineering, Volume 420 (2024),
# https://doi.org/10.1016/j.cma.2023.116700

using TrixiParticles
using TrixiParticles.JSON
using CUDA

# ==========================================================================================
# ==== WCSPH simulation
trixi_include_changeprecision(Float32, @__MODULE__,
joinpath(examples_dir(), "paper",
"dam_break_2d_val_600_phys_viscosity_2ph.jl"),
parallelization_backend=CUDABackend())
Loading
Loading