|
33 | 33 | particle |
34 | 34 | end |
35 | 35 |
|
36 | | -@kernel function update_particle_states!(prob, gpu_particles::AbstractArray{SPSOParticle{T1,T2}}, gbest_ref, w, |
| 36 | +@kernel function update_particle_states!(prob, |
| 37 | + gpu_particles::AbstractArray{SPSOParticle{T1, T2}}, gbest_ref, w, |
37 | 38 | opt::ParallelPSOKernel, lock; c1 = 1.4962f0, |
38 | | - c2 = 1.4962f0) where {T1,T2} |
| 39 | + c2 = 1.4962f0) where {T1, T2} |
39 | 40 | i = @index(Global, Linear) |
40 | 41 | # FIXME: Determine the right amount of shmem to use |
41 | | - best_queue = @localmem SPSOGBest{T1,T2} 1024 |
| 42 | + |
| 43 | + @uniform gs = @groupsize()[1] |
| 44 | + |
| 45 | + best_queue = @localmem SPSOGBest{T1, T2} (gs) |
42 | 46 | queue_num = @localmem UInt32 1 |
43 | 47 |
|
44 | 48 | @inbounds gbest = gbest_ref[1] |
45 | 49 | @inbounds particle = gpu_particles[i] |
46 | 50 |
|
| 51 | + # Initialize cost to be Inf |
| 52 | + for bq_idx in 1:gs |
| 53 | + best_queue[bq_idx] = SPSOGBest(particle.best_position, |
| 54 | + convert(typeof(particle.cost), Inf)) |
| 55 | + end |
| 56 | + |
| 57 | + @synchronize |
| 58 | + |
47 | 59 | particle = update_particle_state(particle, prob, gbest, w, c1, c2, i, opt) |
48 | 60 | @inbounds gpu_particles[i] = particle |
49 | 61 |
|
|
52 | 64 | if particle.best_cost < gbest.cost |
53 | 65 | queue_idx = @atomic queue_num[1] += UInt32(1) |
54 | 66 | @inbounds best_queue[queue_idx] = SPSOGBest(particle.best_position, |
55 | | - particle.best_cost) |
| 67 | + particle.best_cost) |
56 | 68 | end |
57 | | - |
58 | 69 | @synchronize |
59 | | - |
60 | 70 | if i <= first(@ndrange()) |
61 | 71 | tidx = @index(Local, Linear) |
62 | 72 | if tidx == 1 |
|
0 commit comments