Skip to content

Commit 84dd97e

Browse files
committed
Prepare the package for release
1 parent be7d98f commit 84dd97e

File tree

13 files changed

+75
-52
lines changed

13 files changed

+75
-52
lines changed

Project.toml

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,13 +5,11 @@ version = "1.0.0-DEV"
55

66
[deps]
77
Adapt = "79e6a3ab-5dfb-504d-930d-738a2a938a0e"
8-
CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
98
DiffEqGPU = "071ae1c0-96b5-11e9-1965-c90190d839ea"
109
Enzyme = "7da242da-08ed-463a-9acd-ee780be4f1d9"
1110
ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210"
1211
KernelAbstractions = "63c18a36-062a-441e-b654-da1e3ab1ce7c"
1312
MLDatasets = "eb30cadb-4394-5ae3-aed4-317e484a6458"
14-
NonlinearSolve = "8913a72c-1f9b-4ce2-8d82-65094dcecaec"
1513
Optimization = "7f7a1694-90dd-40f0-9382-eb1efda571ba"
1614
QuasiMonteCarlo = "8a4e6c94-4038-4cdc-81c3-7e6ffdb2a71b"
1715
Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
@@ -23,4 +21,5 @@ SimpleNonlinearSolve = "727e6d20-b764-4bd8-a329-72de5adea6c7"
2321
StaticArrays = "90137ffa-7385-5640-81b9-e52037218182"
2422

2523
[compat]
26-
julia = "1.6"
24+
KernelAbstractions = "<0.9.30"
25+
julia = "1.6"

benchmarks/CPU_vs_GPU/benchmark.jl

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,11 @@ device!(2)
99

1010
N = 10
1111
function rosenbrock(x, p)
12-
sum(p[2] * (x[i + 1] - x[i]^2)^2 + (p[1] - x[i])^2 for i in 1:(length(x) - 1))
12+
res = zero(eltype(x))
13+
for i in 1:(length(x) - 1)
14+
res += p[2] * (x[i + 1] - x[i]^2)^2 + (p[1] - x[i])^2
15+
end
16+
res
1317
end
1418
x0 = @SArray zeros(Float32, N)
1519
p = @SArray Float32[1.0, 100.0]

benchmarks/CPU_vs_GPU/wp_algs.jl

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,11 @@ device!(2)
99

1010
N = 10
1111
function rosenbrock(x, p)
12-
sum(p[2] * (x[i + 1] - x[i]^2)^2 + (p[1] - x[i])^2 for i in 1:(length(x) - 1))
12+
res = zero(eltype(x))
13+
for i in 1:(length(x) - 1)
14+
res += p[2] * (x[i + 1] - x[i]^2)^2 + (p[1] - x[i])^2
15+
end
16+
res
1317
end
1418
# x0 = @SArray zeros(Float32, N)
1519

src/PSOGPU.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ using SciMLBase, StaticArrays, Setfield, KernelAbstractions
44
using QuasiMonteCarlo, Optimization, SimpleNonlinearSolve, ForwardDiff
55
import Adapt
66
import Adapt: adapt
7-
import Enzyme: autodiff_deferred, Active, Reverse
7+
import Enzyme: autodiff_deferred, Active, Reverse, Const
88
import KernelAbstractions: @atomic, @atomicreplace, @atomicswap
99
using QuasiMonteCarlo
1010
import DiffEqGPU: GPUTsit5, make_prob_compatible, vectorized_solve, vectorized_asolve

src/hybrid.jl

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
i = @index(Global, Linear)
33
nlcache = remake(nlprob; u0 = x0s[i])
44
sol = solve(nlcache, opt; maxiters, abstol, reltol)
5-
result[i] = sol.u
5+
@inbounds result[i] = sol.u
66
end
77

88
function SciMLBase.solve!(
@@ -19,13 +19,14 @@ function SciMLBase.solve!(
1919
backend = opt.backend
2020

2121
prob = remake(cache.prob, lb = nothing, ub = nothing)
22-
f = Base.Fix2(prob.f.f, prob.p)
23-
∇f = instantiate_gradient(f, prob.f.adtype)
2422

25-
kernel = simplebfgs_run!(backend)
2623
result = cache.start_points
2724
copyto!(result, x0s)
28-
nlprob = NonlinearProblem{false}(∇f, prob.u0)
25+
26+
∇f = instantiate_gradient(prob.f.f, prob.f.adtype)
27+
28+
kernel = simplebfgs_run!(backend)
29+
nlprob = SimpleNonlinearSolve.ImmutableNonlinearProblem{false}(∇f, prob.u0, prob.p)
2930

3031
nlalg = LocalOpt isa LBFGS ?
3132
SimpleLimitedMemoryBroyden(;

src/utils.jl

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,8 @@
1+
import SciMLBase: @add_kwonly, AbstractNonlinearProblem, AbstractNonlinearFunction,
2+
AbstractODEFunction, AbstractODEProblem, warn_paramtype, ConstructionBase,
3+
NullParameters, StandardNonlinearProblem, @reset, updated_u0_p,
4+
remake_initialization_data, maybe_eager_initialize_problem
5+
16
@inbounds function uniform_itr(
27
dim::Int, lb::AbstractArray{T}, ub::AbstractArray{T}) where {T}
38
(rand(T) * (ub[i] - lb[i]) + lb[i] for i in 1:dim)
@@ -342,10 +347,12 @@ Based on the paper: Particle swarm optimization method for constrained optimizat
342347
penalty
343348
end
344349

350+
#TODO: Possible migration to DifferentiationInterface.jl,
351+
# however I cannot compile GPU-compatible gradients with Enzyme as Mar 2025
345352
@inline function instantiate_gradient(f, adtype::AutoForwardDiff)
346-
(θ, p) -> ForwardDiff.gradient(f, θ)
353+
(θ, p) -> ForwardDiff.gradient(x -> f(x, p), θ)
347354
end
348355

349356
@inline function instantiate_gradient(f, adtype::AutoEnzyme)
350-
(θ, p) -> autodiff_deferred(Reverse, f, Active, Active(θ))[1][1]
357+
(θ, p) -> autodiff_deferred(Reverse, Const(x -> f(x, p)), Active, Active(θ))[1][1]
351358
end

test/constraints.jl

Lines changed: 1 addition & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,6 @@
11
using PSOGPU, StaticArrays, SciMLBase, Test, LinearAlgebra, Random
22

3-
DEVICE = get(ENV, "GROUP", "CUDA")
4-
5-
@eval using $(Symbol(DEVICE))
6-
7-
if DEVICE == "CUDA"
8-
backend = CUDABackend()
9-
elseif DEVICE == "AMDGPU"
10-
backend = ROCBackend()
11-
end
3+
include("./utils.jl")
124

135
Random.seed!(1234)
146

test/gpu.jl

Lines changed: 6 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,6 @@
11
using PSOGPU, StaticArrays, SciMLBase, Test, LinearAlgebra, Random
22

3-
DEVICE = get(ENV, "GROUP", "CUDA")
4-
5-
@eval using $(Symbol(DEVICE))
6-
7-
if DEVICE == "CUDA"
8-
backend = CUDABackend()
9-
elseif DEVICE == "AMDGPU"
10-
backend = ROCBackend()
11-
end
3+
include("./utils.jl")
124

135
@testset "Rosenbrock GPU tests $(N)" for N in 2:4
146
Random.seed!(1234)
@@ -19,7 +11,11 @@ end
1911
ub = @SArray fill(Float32(10.0), N)
2012

2113
function rosenbrock(x, p)
22-
sum(p[2] * (x[i + 1] - x[i]^2)^2 + (p[1] - x[i])^2 for i in 1:(length(x) - 1))
14+
res = zero(eltype(x))
15+
for i in 1:(length(x) - 1)
16+
res += p[2] * (x[i + 1] - x[i]^2)^2 + (p[1] - x[i])^2
17+
end
18+
res
2319
end
2420

2521
x0 = @SArray zeros(Float32, N)

test/lbfgs.jl

Lines changed: 6 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,6 @@
11
using PSOGPU, Optimization, StaticArrays
22

3-
DEVICE = get(ENV, "GROUP", "CUDA")
4-
5-
@eval using $(Symbol(DEVICE))
6-
7-
if DEVICE == "CUDA"
8-
backend = CUDABackend()
9-
elseif DEVICE == "AMDGPU"
10-
backend = ROCBackend()
11-
end
3+
include("./utils.jl")
124

135
function objf(x, p)
146
return 1 - x[1]^2 - x[2]^2
@@ -25,7 +17,11 @@ sol = Optimization.solve(prob,
2517

2618
N = 10
2719
function rosenbrock(x, p)
28-
sum(p[2] * (x[i + 1] - x[i]^2)^2 + (p[1] - x[i])^2 for i in 1:(length(x) - 1))
20+
res = zero(eltype(x))
21+
for i in 1:(length(x) - 1)
22+
res += p[2] * (x[i + 1] - x[i]^2)^2 + (p[1] - x[i])^2
23+
end
24+
res
2925
end
3026
x0 = @SArray rand(Float32, N)
3127
p = @SArray Float32[1.0, 100.0]

test/regression.jl

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,11 @@ using QuasiMonteCarlo
99
ub = @SArray fill(Float32(10.0), N)
1010

1111
function rosenbrock(x, p)
12-
sum(p[2] * (x[i + 1] - x[i]^2)^2 + (p[1] - x[i])^2 for i in 1:(length(x) - 1))
12+
res = zero(eltype(x))
13+
for i in 1:(length(x) - 1)
14+
res += p[2] * (x[i + 1] - x[i]^2)^2 + (p[1] - x[i])^2
15+
end
16+
res
1317
end
1418

1519
x0 = @SArray zeros(Float32, N)
@@ -157,7 +161,11 @@ end
157161
ub = @SArray fill(Float32(10.0), N)
158162

159163
function rosenbrock(x, p)
160-
sum(p[2] * (x[i + 1] - x[i]^2)^2 + (p[1] - x[i])^2 for i in 1:(length(x) - 1))
164+
res = zero(eltype(x))
165+
for i in 1:(length(x) - 1)
166+
res += p[2] * (x[i + 1] - x[i]^2)^2 + (p[1] - x[i])^2
167+
end
168+
res
161169
end
162170

163171
x0 = @SArray zeros(Float32, N)

0 commit comments

Comments
 (0)