Skip to content

Commit 81a1fe1

Browse files
Merge pull request #50 from SciML/u/release
Prepare the package for release
2 parents 3a3fc70 + b8b0b3c commit 81a1fe1

File tree

15 files changed

+89
-57
lines changed

15 files changed

+89
-57
lines changed

.buildkite/runtests.yml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ steps:
44
setup:
55
version:
66
- "1"
7+
- "1.10"
78
env:
89
GROUP: CUDA
910
plugins:
@@ -30,7 +31,7 @@ steps:
3031
matrix:
3132
setup:
3233
version:
33-
- "1"
34+
- "1.10"
3435
env:
3536
GROUP: AMDGPU
3637
plugins:

Project.toml

Lines changed: 13 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -5,22 +5,30 @@ version = "1.0.0-DEV"
55

66
[deps]
77
Adapt = "79e6a3ab-5dfb-504d-930d-738a2a938a0e"
8-
CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
98
DiffEqGPU = "071ae1c0-96b5-11e9-1965-c90190d839ea"
109
Enzyme = "7da242da-08ed-463a-9acd-ee780be4f1d9"
1110
ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210"
1211
KernelAbstractions = "63c18a36-062a-441e-b654-da1e3ab1ce7c"
13-
MLDatasets = "eb30cadb-4394-5ae3-aed4-317e484a6458"
14-
NonlinearSolve = "8913a72c-1f9b-4ce2-8d82-65094dcecaec"
1512
Optimization = "7f7a1694-90dd-40f0-9382-eb1efda571ba"
1613
QuasiMonteCarlo = "8a4e6c94-4038-4cdc-81c3-7e6ffdb2a71b"
1714
Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
1815
Reexport = "189a3867-3050-52da-a836-e630ba90ab69"
1916
SciMLBase = "0bca4576-84f4-4d90-8ffe-ffa030f20462"
2017
Setfield = "efcf1570-3423-57d1-acb7-fd33fddbac46"
21-
SimpleChains = "de6bee2f-e2f4-4ec7-b6ed-219cc6f6e9e5"
2218
SimpleNonlinearSolve = "727e6d20-b764-4bd8-a329-72de5adea6c7"
2319
StaticArrays = "90137ffa-7385-5640-81b9-e52037218182"
2420

2521
[compat]
26-
julia = "1.6"
22+
Adapt = "4.3"
23+
DiffEqGPU = "3.4"
24+
Enzyme = "<0.13.35"
25+
ForwardDiff = "0.10"
26+
KernelAbstractions = "<0.9.30"
27+
Optimization = "4.1"
28+
QuasiMonteCarlo = "0.3"
29+
Reexport = "1.2"
30+
SciMLBase = "2.79"
31+
Setfield = "1.1"
32+
SimpleNonlinearSolve = "2.2"
33+
StaticArrays = "1.9"
34+
julia = "1.10"

benchmarks/CPU_vs_GPU/benchmark.jl

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,11 @@ device!(2)
99

1010
N = 10
1111
function rosenbrock(x, p)
12-
sum(p[2] * (x[i + 1] - x[i]^2)^2 + (p[1] - x[i])^2 for i in 1:(length(x) - 1))
12+
res = zero(eltype(x))
13+
for i in 1:(length(x) - 1)
14+
res += p[2] * (x[i + 1] - x[i]^2)^2 + (p[1] - x[i])^2
15+
end
16+
res
1317
end
1418
x0 = @SArray zeros(Float32, N)
1519
p = @SArray Float32[1.0, 100.0]

benchmarks/CPU_vs_GPU/wp_algs.jl

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,11 @@ device!(2)
99

1010
N = 10
1111
function rosenbrock(x, p)
12-
sum(p[2] * (x[i + 1] - x[i]^2)^2 + (p[1] - x[i])^2 for i in 1:(length(x) - 1))
12+
res = zero(eltype(x))
13+
for i in 1:(length(x) - 1)
14+
res += p[2] * (x[i + 1] - x[i]^2)^2 + (p[1] - x[i])^2
15+
end
16+
res
1317
end
1418
# x0 = @SArray zeros(Float32, N)
1519

src/PSOGPU.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ using SciMLBase, StaticArrays, Setfield, KernelAbstractions
44
using QuasiMonteCarlo, Optimization, SimpleNonlinearSolve, ForwardDiff
55
import Adapt
66
import Adapt: adapt
7-
import Enzyme: autodiff_deferred, Active, Reverse
7+
import Enzyme: autodiff_deferred, Active, Reverse, Const
88
import KernelAbstractions: @atomic, @atomicreplace, @atomicswap
99
using QuasiMonteCarlo
1010
import DiffEqGPU: GPUTsit5, make_prob_compatible, vectorized_solve, vectorized_asolve

src/hybrid.jl

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
i = @index(Global, Linear)
33
nlcache = remake(nlprob; u0 = x0s[i])
44
sol = solve(nlcache, opt; maxiters, abstol, reltol)
5-
result[i] = sol.u
5+
@inbounds result[i] = sol.u
66
end
77

88
function SciMLBase.solve!(
@@ -19,13 +19,14 @@ function SciMLBase.solve!(
1919
backend = opt.backend
2020

2121
prob = remake(cache.prob, lb = nothing, ub = nothing)
22-
f = Base.Fix2(prob.f.f, prob.p)
23-
∇f = instantiate_gradient(f, prob.f.adtype)
2422

25-
kernel = simplebfgs_run!(backend)
2623
result = cache.start_points
2724
copyto!(result, x0s)
28-
nlprob = NonlinearProblem{false}(∇f, prob.u0)
25+
26+
∇f = instantiate_gradient(prob.f.f, prob.f.adtype)
27+
28+
kernel = simplebfgs_run!(backend)
29+
nlprob = SimpleNonlinearSolve.ImmutableNonlinearProblem{false}(∇f, prob.u0, prob.p)
2930

3031
nlalg = LocalOpt isa LBFGS ?
3132
SimpleLimitedMemoryBroyden(;

src/utils.jl

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,8 @@
1+
import SciMLBase: @add_kwonly, AbstractNonlinearProblem, AbstractNonlinearFunction,
2+
AbstractODEFunction, AbstractODEProblem, warn_paramtype, ConstructionBase,
3+
NullParameters, StandardNonlinearProblem, @reset, updated_u0_p,
4+
remake_initialization_data, maybe_eager_initialize_problem
5+
16
@inbounds function uniform_itr(
27
dim::Int, lb::AbstractArray{T}, ub::AbstractArray{T}) where {T}
38
(rand(T) * (ub[i] - lb[i]) + lb[i] for i in 1:dim)
@@ -342,10 +347,12 @@ Based on the paper: Particle swarm optimization method for constrained optimizat
342347
penalty
343348
end
344349

350+
#TODO: Possible migration to DifferentiationInterface.jl,
351+
# however I cannot compile GPU-compatible gradients with Enzyme as Mar 2025
345352
@inline function instantiate_gradient(f, adtype::AutoForwardDiff)
346-
(θ, p) -> ForwardDiff.gradient(f, θ)
353+
(θ, p) -> ForwardDiff.gradient(x -> f(x, p), θ)
347354
end
348355

349356
@inline function instantiate_gradient(f, adtype::AutoEnzyme)
350-
(θ, p) -> autodiff_deferred(Reverse, f, Active, Active(θ))[1][1]
357+
(θ, p) -> autodiff_deferred(Reverse, Const(x -> f(x, p)), Active, Active(θ))[1][1]
351358
end

test/Project.toml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
11
[deps]
2-
CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
32
KernelAbstractions = "63c18a36-062a-441e-b654-da1e3ab1ce7c"
43
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
54
Optimization = "7f7a1694-90dd-40f0-9382-eb1efda571ba"

test/constraints.jl

Lines changed: 1 addition & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,6 @@
11
using PSOGPU, StaticArrays, SciMLBase, Test, LinearAlgebra, Random
22

3-
DEVICE = get(ENV, "GROUP", "CUDA")
4-
5-
@eval using $(Symbol(DEVICE))
6-
7-
if DEVICE == "CUDA"
8-
backend = CUDABackend()
9-
elseif DEVICE == "AMDGPU"
10-
backend = ROCBackend()
11-
end
3+
include("./utils.jl")
124

135
Random.seed!(1234)
146

test/gpu.jl

Lines changed: 6 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,6 @@
11
using PSOGPU, StaticArrays, SciMLBase, Test, LinearAlgebra, Random
22

3-
DEVICE = get(ENV, "GROUP", "CUDA")
4-
5-
@eval using $(Symbol(DEVICE))
6-
7-
if DEVICE == "CUDA"
8-
backend = CUDABackend()
9-
elseif DEVICE == "AMDGPU"
10-
backend = ROCBackend()
11-
end
3+
include("./utils.jl")
124

135
@testset "Rosenbrock GPU tests $(N)" for N in 2:4
146
Random.seed!(1234)
@@ -19,7 +11,11 @@ end
1911
ub = @SArray fill(Float32(10.0), N)
2012

2113
function rosenbrock(x, p)
22-
sum(p[2] * (x[i + 1] - x[i]^2)^2 + (p[1] - x[i])^2 for i in 1:(length(x) - 1))
14+
res = zero(eltype(x))
15+
for i in 1:(length(x) - 1)
16+
res += p[2] * (x[i + 1] - x[i]^2)^2 + (p[1] - x[i])^2
17+
end
18+
res
2319
end
2420

2521
x0 = @SArray zeros(Float32, N)

0 commit comments

Comments
 (0)