Skip to content

Commit 12d1929

Browse files
authored
Merge pull request #66 from cheesema/neigh_iteration_optimization
performance improvement using by level, and slight iterator impovement
2 parents 357cb1e + 2eb249d commit 12d1929

File tree

2 files changed

+134
-93
lines changed

2 files changed

+134
-93
lines changed

examples/Example_apr_neighbour_access.cpp

Lines changed: 61 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -60,34 +60,38 @@ int main(int argc, char **argv) {
6060
//Basic serial iteration over all particles
6161
uint64_t particle_number;
6262
//Basic serial iteration over all particles
63-
for (particle_number = 0; particle_number < apr_iterator.total_number_particles(); ++particle_number) {
64-
//This step is required for all loops to set the iterator by the particle number
65-
apr_iterator.set_iterator_to_particle_by_number(particle_number);
63+
for (unsigned int level = apr_iterator.level_min(); level <= apr_iterator.level_max(); ++level) {
64+
for (particle_number = apr_iterator.particles_level_begin(level);
65+
particle_number < apr_iterator.particles_level_end(level); ++particle_number) {
6666

67-
//now we only update the neighbours, and directly access them through a neighbour iterator
67+
//This step is required for all loops to set the iterator by the particle number
68+
apr_iterator.set_iterator_to_particle_by_number(particle_number);
6869

69-
float counter = 0;
70-
float temp = 0;
70+
//now we only update the neighbours, and directly access them through a neighbour iterator
7171

72-
//loop over all the neighbours and set the neighbour iterator to it
73-
for (int direction = 0; direction < 6; ++direction) {
74-
// Neighbour Particle Cell Face definitions [+y,-y,+x,-x,+z,-z] = [0,1,2,3,4,5]
75-
apr_iterator.find_neighbours_in_direction(direction);
72+
float counter = 0;
73+
float temp = 0;
7674

77-
for (int index = 0; index < apr_iterator.number_neighbours_in_direction(direction); ++index) {
78-
// on each face, there can be 0-4 neighbours accessed by index
79-
if(neighbour_iterator.set_neighbour_iterator(apr_iterator, direction, index)){
80-
//will return true if there is a neighbour defined
75+
//loop over all the neighbours and set the neighbour iterator to it
76+
for (int direction = 0; direction < 6; ++direction) {
77+
// Neighbour Particle Cell Face definitions [+y,-y,+x,-x,+z,-z] = [0,1,2,3,4,5]
78+
apr_iterator.find_neighbours_in_direction(direction);
8179

82-
temp += apr.particles_intensities[neighbour_iterator];
83-
counter++;
80+
for (int index = 0; index < apr_iterator.number_neighbours_in_direction(direction); ++index) {
81+
// on each face, there can be 0-4 neighbours accessed by index
82+
if (neighbour_iterator.set_neighbour_iterator(apr_iterator, direction, index)) {
83+
//will return true if there is a neighbour defined
8484

85+
temp += apr.particles_intensities[neighbour_iterator];
86+
counter++;
87+
88+
}
8589
}
8690
}
87-
}
8891

89-
neigh_avg[apr_iterator] = temp/counter;
92+
neigh_avg[apr_iterator] = temp / counter;
9093

94+
}
9195
}
9296

9397
timer.stop_timer();
@@ -104,22 +108,26 @@ int main(int argc, char **argv) {
104108

105109
timer.start_timer("APR parallel iterator neighbour loop");
106110

111+
for (unsigned int level = apr_iterator.level_min(); level <= apr_iterator.level_max(); ++level) {
107112
#ifdef HAVE_OPENMP
108-
#pragma omp parallel for schedule(static) private(particle_number) firstprivate(apr_iterator,neighbour_iterator)
113+
#pragma omp parallel for schedule(static) private(particle_number) firstprivate(apr_iterator, neighbour_iterator)
109114
#endif
110-
for (particle_number = 0; particle_number < apr_iterator.total_number_particles(); ++particle_number) {
111-
//needed step for any loop (update to the next part)
112-
apr_iterator.set_iterator_to_particle_by_number(particle_number);
113-
114-
//loop over all the neighbours and set the neighbour iterator to it
115-
for (int direction = 0; direction < 6; ++direction) {
116-
apr_iterator.find_neighbours_in_direction(direction);
117-
// Neighbour Particle Cell Face definitions [+y,-y,+x,-x,+z,-z] = [0,1,2,3,4,5]
118-
for (int index = 0; index < apr_iterator.number_neighbours_in_direction(direction); ++index) {
119-
120-
if(neighbour_iterator.set_neighbour_iterator(apr_iterator, direction, index)){
121-
//neighbour_iterator works just like apr, and apr_parallel_iterator (you could also call neighbours)
122-
neigh_xm[apr_iterator] += apr.particles_intensities[neighbour_iterator]*(apr_iterator.y() - neighbour_iterator.y());
115+
for (particle_number = apr_iterator.particles_level_begin(level);
116+
particle_number < apr_iterator.particles_level_end(level); ++particle_number) {
117+
//needed step for any loop (update to the next part)
118+
apr_iterator.set_iterator_to_particle_by_number(particle_number);
119+
120+
//loop over all the neighbours and set the neighbour iterator to it
121+
for (int direction = 0; direction < 6; ++direction) {
122+
apr_iterator.find_neighbours_in_direction(direction);
123+
// Neighbour Particle Cell Face definitions [+y,-y,+x,-x,+z,-z] = [0,1,2,3,4,5]
124+
for (int index = 0; index < apr_iterator.number_neighbours_in_direction(direction); ++index) {
125+
126+
if (neighbour_iterator.set_neighbour_iterator(apr_iterator, direction, index)) {
127+
//neighbour_iterator works just like apr, and apr_parallel_iterator (you could also call neighbours)
128+
neigh_xm[apr_iterator] += apr.particles_intensities[neighbour_iterator] *
129+
(apr_iterator.y() - neighbour_iterator.y());
130+
}
123131
}
124132
}
125133
}
@@ -137,27 +145,32 @@ int main(int argc, char **argv) {
137145
//need to initialize the neighbour iterator with the APR you are iterating over.
138146

139147
timer.start_timer("APR parallel iterator neighbours loop only -x face neighbours");
148+
for (unsigned int level = apr_iterator.level_min(); level <= apr_iterator.level_max(); ++level) {
140149
#ifdef HAVE_OPENMP
141-
#pragma omp parallel for schedule(static) private(particle_number) firstprivate(apr_iterator,neighbour_iterator)
150+
#pragma omp parallel for schedule(static) private(particle_number) firstprivate(apr_iterator, neighbour_iterator)
142151
#endif
143-
for (particle_number = 0; particle_number < apr_iterator.total_number_particles(); ++particle_number) {
144-
//needed step for any loop (update to the next part)
145-
apr_iterator.set_iterator_to_particle_by_number(particle_number);
146-
147-
const unsigned int direction = 3;
148-
//now we only update the neighbours, and directly access them through a neighbour iterator
149-
apr_iterator.find_neighbours_in_direction(direction); // 3 = -x face
150-
151-
for (int index = 0; index < apr_iterator.number_neighbours_in_direction(direction); ++index) {
152-
// from 0 to 4 neighbours
153-
if(neighbour_iterator.set_neighbour_iterator(apr_iterator, direction, index)){
154-
//access data and perform a conditional sum (neighbour_iterator has all access like the normal iterator)
155-
if((neighbour_iterator.type() == 1) & (neighbour_iterator.level() <= neighbour_iterator.level_max())){
156-
type_sum[apr_iterator] += apr.particles_intensities[neighbour_iterator]*apr_iterator.type();
152+
for (particle_number = apr_iterator.particles_level_begin(level);
153+
particle_number < apr_iterator.particles_level_end(level); ++particle_number) {
154+
//needed step for any loop (update to the next part)
155+
apr_iterator.set_iterator_to_particle_by_number(particle_number);
156+
157+
const unsigned int direction = 3;
158+
//now we only update the neighbours, and directly access them through a neighbour iterator
159+
apr_iterator.find_neighbours_in_direction(direction); // 3 = -x face
160+
161+
for (int index = 0; index < apr_iterator.number_neighbours_in_direction(direction); ++index) {
162+
// from 0 to 4 neighbours
163+
if (neighbour_iterator.set_neighbour_iterator(apr_iterator, direction, index)) {
164+
//access data and perform a conditional sum (neighbour_iterator has all access like the normal iterator)
165+
if ((neighbour_iterator.type() == 1) &
166+
(neighbour_iterator.level() <= neighbour_iterator.level_max())) {
167+
type_sum[apr_iterator] +=
168+
apr.particles_intensities[neighbour_iterator] * apr_iterator.type();
169+
}
170+
}
157171
}
158172
}
159173
}
160-
}
161174

162175
timer.stop_timer();
163176

src/data_structures/APR/APRIterator.hpp

Lines changed: 73 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -233,69 +233,97 @@ class APRIterator {
233233

234234
bool find_neighbours_in_direction(const uint8_t& direction){
235235

236-
bool found = false;
237-
238236
//the three cases
239237
if(current_particle_cell.level == apr_access->level_max){
240-
for (int l = 0; l < 2; ++l) {
241-
level_delta = level_check_max[l];
242-
apr_access->get_neighbour_coordinate(current_particle_cell,neighbour_particle_cell,direction,level_delta,0);
243-
244-
if(check_neighbours_particle_cell_in_bounds()){
245-
if(apr_access->find_particle_cell(neighbour_particle_cell,apr_access->get_local_iterator(local_iterators,
246-
level_delta, direction,0))){
247-
//found the neighbour! :D
248-
found=true;
249-
break;
250-
}
251-
};
238+
//for (int l = 0; l < 2; ++l) {
239+
//level_delta = level_check_max[l];
240+
apr_access->get_neighbour_coordinate(current_particle_cell,neighbour_particle_cell,direction,_LEVEL_SAME,0);
241+
242+
if(check_neighbours_particle_cell_in_bounds()){
243+
if(apr_access->find_particle_cell(neighbour_particle_cell,local_iterators.same_level[direction])){
244+
//found the neighbour! :D
245+
level_delta = _LEVEL_SAME;
246+
return true;
247+
}
248+
};
252249

253-
}
250+
apr_access->get_neighbour_coordinate(current_particle_cell,neighbour_particle_cell,direction,_LEVEL_DECREASE,0);
251+
252+
if(check_neighbours_particle_cell_in_bounds()){
253+
if(apr_access->find_particle_cell(neighbour_particle_cell,local_iterators.parent_level[direction])){
254+
level_delta = _LEVEL_DECREASE;
255+
return true;
256+
}
257+
};
258+
259+
//}
254260

255261
} else if(current_particle_cell.level == apr_access->level_min){
256-
for (int l = 0; l < 2; ++l) {
257-
level_delta = level_check_min[l];
258-
apr_access->get_neighbour_coordinate(current_particle_cell,neighbour_particle_cell,direction,level_delta,0);
259-
260-
if(check_neighbours_particle_cell_in_bounds()){
261-
if(apr_access->find_particle_cell(neighbour_particle_cell,apr_access->get_local_iterator(local_iterators,
262-
level_delta, direction,0))){
263-
//found the neighbour! :D
264-
found=true;
265-
break;
266-
}
267-
};
262+
//for (int l = 0; l < 2; ++l) {
263+
//level_delta = level_check_min[l];
264+
apr_access->get_neighbour_coordinate(current_particle_cell,neighbour_particle_cell,direction,_LEVEL_SAME,0);
265+
266+
if(check_neighbours_particle_cell_in_bounds()){
267+
if(apr_access->find_particle_cell(neighbour_particle_cell,local_iterators.same_level[direction])){
268+
//found the neighbour! :D
269+
level_delta = _LEVEL_SAME;
270+
return true;
271+
}
272+
};
268273

269-
}
274+
apr_access->get_neighbour_coordinate(current_particle_cell,neighbour_particle_cell,direction,_LEVEL_INCREASE,0);
275+
276+
if(check_neighbours_particle_cell_in_bounds()){
277+
if(apr_access->find_particle_cell(neighbour_particle_cell,local_iterators.child_level[direction][0])){
278+
level_delta = _LEVEL_INCREASE;
279+
return true;
280+
}
281+
};
282+
283+
//}
270284
} else {
271-
for (int l = 0; l < 3; ++l) {
272-
level_delta = level_check_middle[l];
273-
apr_access->get_neighbour_coordinate(current_particle_cell,neighbour_particle_cell,direction,level_delta,0);
274-
275-
if(check_neighbours_particle_cell_in_bounds()){
276-
if(apr_access->find_particle_cell(neighbour_particle_cell,apr_access->get_local_iterator(local_iterators,
277-
level_delta, direction,0))){
278-
//found the neighbour! :D
279-
found=true;
280-
break;
281-
}
282-
};
285+
//for (int l = 0; l < 3; ++l) {
286+
apr_access->get_neighbour_coordinate(current_particle_cell,neighbour_particle_cell,direction,_LEVEL_SAME,0);
287+
288+
if(check_neighbours_particle_cell_in_bounds()){
289+
if(apr_access->find_particle_cell(neighbour_particle_cell,local_iterators.same_level[direction])){
290+
//found the neighbour! :D
291+
level_delta = _LEVEL_SAME;
292+
return true;
293+
}
294+
};
295+
296+
apr_access->get_neighbour_coordinate(current_particle_cell,neighbour_particle_cell,direction,_LEVEL_DECREASE,0);
297+
298+
if(check_neighbours_particle_cell_in_bounds()){
299+
if(apr_access->find_particle_cell(neighbour_particle_cell,local_iterators.parent_level[direction])){
300+
level_delta = _LEVEL_DECREASE;
301+
return true;
302+
}
303+
};
304+
apr_access->get_neighbour_coordinate(current_particle_cell,neighbour_particle_cell,direction,_LEVEL_INCREASE,0);
305+
306+
if(check_neighbours_particle_cell_in_bounds()){
307+
if(apr_access->find_particle_cell(neighbour_particle_cell,local_iterators.child_level[direction][0])){
308+
level_delta = _LEVEL_INCREASE;
309+
return true;
310+
}
311+
};
283312

284-
}
285313

286-
}
287314

288-
if(!found){
289-
level_delta=_NO_NEIGHBOUR;
290315
}
291316

292-
return found;
317+
level_delta=_NO_NEIGHBOUR;
318+
319+
return false;
293320

294321
}
295322

296323

297324

298325

326+
299327
bool set_neighbour_iterator(APRIterator<ImageType> &original_iterator, const uint8_t& direction, const uint8_t& index){
300328
//
301329
// This is sets the this iterator, to the neighbour of the particle cell that original_iterator is pointing to

0 commit comments

Comments
 (0)