Skip to content

Commit 1359b80

Browse files
committed
Update solvers to run CPU backend
1 parent 437ff77 commit 1359b80

File tree

3 files changed

+43
-9
lines changed

3 files changed

+43
-9
lines changed

benchmarks/CPU_vs_GPU/benchmark.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ using CUDA
33

44
device!(2)
55

6-
N = 10
6+
N = 3
77
function rosenbrock(x, p)
88
sum(p[2] * (x[i + 1] - x[i]^2)^2 + (p[1] - x[i])^2 for i in 1:(length(x) - 1))
99
end

src/kernels.jl

Lines changed: 17 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -48,19 +48,18 @@ end
4848
particle = @private SPSOParticle{T1, T2} 1
4949

5050
@inbounds particle[1] = gpu_particles[i]
51-
@inbounds gbest = gbest_ref[1]
52-
5351
# Initialize cost to be Inf
5452
if tidx == 1
55-
fill!(best_queue, SPSOGBest(gbest.position, convert(typeof(gbest.cost), Inf)))
53+
fill!(best_queue,
54+
SPSOGBest(particle[1].position, convert(typeof(particle[1].cost), Inf)))
5655
queue_num[1] = UInt32(0)
5756
end
5857

5958
@synchronize
6059

6160
@inbounds particle[1] = update_particle_state(particle[1],
6261
prob,
63-
gbest,
62+
gbest_ref[1],
6463
w,
6564
c1,
6665
c2,
@@ -69,8 +68,7 @@ end
6968

7069
@synchronize
7170

72-
gbest = @inbounds gbest_ref[1]
73-
if particle[1].best_cost < gbest.cost
71+
@inbounds if particle[1].best_cost < gbest_ref[1].cost
7472
queue_idx = @atomic queue_num[1] += UInt32(1)
7573
@inbounds best_queue[queue_idx] = SPSOGBest(particle[1].best_position,
7674
particle[1].best_cost)
@@ -96,8 +94,7 @@ end
9694
end
9795

9896
# Update global best fit
99-
gbest = @inbounds gbest_ref[1]
100-
@inbounds if best_queue[1].cost < gbest.cost
97+
@inbounds if best_queue[1].cost < gbest_ref[1].cost
10198
gbest_ref[1] = best_queue[1]
10299
end
103100

@@ -153,6 +150,18 @@ end
153150
@inbounds gpu_particles[i] = particle
154151
end
155152

153+
@kernel function update_particle_states!(prob, gpu_particles, gbest, w,
154+
opt::ParallelSyncPSOKernel{Backend, T, G, H}; c1 = 1.4962f0,
155+
c2 = 1.4962f0) where {Backend <: CPU, T, G, H}
156+
i = @index(Global, Linear)
157+
158+
@inbounds particle = gpu_particles[i]
159+
160+
particle = update_particle_state(particle, prob, gbest, w, c1, c2, i, opt)
161+
162+
@inbounds gpu_particles[i] = particle
163+
end
164+
156165
@kernel function update_particle_states_async!(prob,
157166
gpu_particles,
158167
gbest_ref,

src/lowerlevel_solve.jl

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,31 @@ function vectorized_solve!(prob,
2828
return gbest, gpu_particles
2929
end
3030

31+
function vectorized_solve!(prob,
32+
gbest,
33+
gpu_particles, opt::ParallelSyncPSOKernel{Backend, T, G, H};
34+
maxiters = 100,
35+
w = 0.7298f0,
36+
wdamp = 1.0f0,
37+
debug = false) where {Backend <: CPU, T, G, H}
38+
backend = get_backend(gpu_particles)
39+
40+
update_particle_kernel = update_particle_states!(backend)
41+
42+
for i in 1:maxiters
43+
update_particle_kernel(prob,
44+
gpu_particles,
45+
gbest,
46+
w, opt;
47+
ndrange = length(gpu_particles))
48+
best_particle = minimum(gpu_particles)
49+
gbest = SPSOGBest(best_particle.position, best_particle.best_cost)
50+
w = w * wdamp
51+
end
52+
53+
return gbest, gpu_particles
54+
end
55+
3156
function vectorized_solve!(prob,
3257
gbest,
3358
gpu_particles, opt::ParallelPSOKernel, ::Val{true};

0 commit comments

Comments
 (0)