Skip to content

Commit 238ab82

Browse files
committed
Merge pull request #433 from haahh/dev_reverse_copy
reverse_copy() performance improvement
2 parents aa15cd6 + 5aace2d commit 238ab82

File tree

7 files changed

+202
-4
lines changed

7 files changed

+202
-4
lines changed

include/boost/compute/algorithm/reverse_copy.hpp

Lines changed: 33 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,35 @@
2020

2121
namespace boost {
2222
namespace compute {
23+
namespace detail {
24+
25+
template<class Iterator, class OutputIterator>
26+
struct reverse_copy_kernel : public meta_kernel
27+
{
28+
reverse_copy_kernel(Iterator first, Iterator last, OutputIterator result)
29+
: meta_kernel("reverse_copy")
30+
{
31+
typedef typename std::iterator_traits<Iterator>::value_type value_type;
32+
33+
// store size of the range
34+
m_size = detail::iterator_range_size(first, last);
35+
add_set_arg<const cl_uint>("size", static_cast<const cl_uint>(m_size));
36+
37+
*this <<
38+
decl<cl_uint>("i") << " = get_global_id(0);\n" <<
39+
decl<cl_uint>("j") << " = size - get_global_id(0) - 1;\n" <<
40+
result[var<cl_uint>("j")] << "=" << first[var<cl_uint>("i")] << ";\n";
41+
}
42+
43+
void exec(command_queue &queue)
44+
{
45+
exec_1d(queue, 0, m_size);
46+
}
47+
48+
size_t m_size;
49+
};
50+
51+
} // end detail namespace
2352

2453
/// Copies the elements in the range [\p first, \p last) in reversed
2554
/// order to the range beginning at \p result.
@@ -36,11 +65,11 @@ reverse_copy(InputIterator first,
3665

3766
difference_type count = std::distance(first, last);
3867

39-
// copy data to result
40-
::boost::compute::copy(first, last, result, queue);
68+
detail::reverse_copy_kernel<InputIterator, OutputIterator>
69+
kernel(first, last, result);
4170

42-
// reverse result
43-
::boost::compute::reverse(result, result + count, queue);
71+
// run kernel
72+
kernel.exec(queue);
4473

4574
// return iterator to the end of result
4675
return result + count;

perf/CMakeLists.txt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@ set(BENCHMARKS
4141
partition_point
4242
prev_permutation
4343
reverse
44+
reverse_copy
4445
rotate
4546
rotate_copy
4647
host_sort
@@ -80,6 +81,7 @@ set(STL_BENCHMARKS
8081
stl_partition
8182
stl_prev_permutation
8283
stl_reverse
84+
stl_reverse_copy
8385
stl_rotate
8486
stl_rotate_copy
8587
stl_saxpy
@@ -123,6 +125,7 @@ if(${BOOST_COMPUTE_HAVE_CUDA})
123125
thrust_partial_sum
124126
thrust_partition
125127
thrust_reverse
128+
thrust_reverse_copy
126129
thrust_rotate
127130
thrust_saxpy
128131
thrust_set_difference

perf/perf.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -123,6 +123,7 @@ def run_benchmark(name, sizes, vs=[]):
123123
"partial_sum",
124124
"partition",
125125
"reverse",
126+
"reverse_copy",
126127
"rotate",
127128
"saxpy",
128129
"sort",
@@ -149,6 +150,7 @@ def run_benchmark(name, sizes, vs=[]):
149150
"partition_point",
150151
"prev_permutation",
151152
"reverse",
153+
"reverse_copy",
152154
"rotate",
153155
"rotate_copy",
154156
"saxpy",

perf/perf_reverse_copy.cpp

Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
//---------------------------------------------------------------------------//
2+
// Copyright (c) 2015 Jakub Szuppe <j.szuppe@gmail.com>
3+
//
4+
// Distributed under the Boost Software License, Version 1.0
5+
// See accompanying file LICENSE_1_0.txt or copy at
6+
// http://www.boost.org/LICENSE_1_0.txt
7+
//
8+
// See http://kylelutz.github.com/compute for more information.
9+
//---------------------------------------------------------------------------//
10+
11+
#include <algorithm>
12+
#include <iostream>
13+
#include <numeric>
14+
#include <vector>
15+
16+
#include <boost/compute/system.hpp>
17+
#include <boost/compute/algorithm/reverse_copy.hpp>
18+
#include <boost/compute/container/vector.hpp>
19+
20+
#include "perf.hpp"
21+
22+
int rand_int()
23+
{
24+
return static_cast<int>((rand() / double(RAND_MAX)) * 25.0);
25+
}
26+
27+
int main(int argc, char *argv[])
28+
{
29+
perf_parse_args(argc, argv);
30+
std::cout << "size: " << PERF_N << std::endl;
31+
32+
// setup context and queue for the default device
33+
boost::compute::device device = boost::compute::system::default_device();
34+
boost::compute::context context(device);
35+
boost::compute::command_queue queue(context, device);
36+
std::cout << "device: " << device.name() << std::endl;
37+
38+
// create vector of random numbers on the host
39+
std::vector<int> host_vector(PERF_N);
40+
std::generate(host_vector.begin(), host_vector.end(), rand_int);
41+
42+
// create vector on the device and copy the data
43+
boost::compute::vector<int> device_vector(PERF_N, context);
44+
boost::compute::copy(
45+
host_vector.begin(), host_vector.end(), device_vector.begin(), queue
46+
);
47+
48+
// create vector on the device for reversed data
49+
boost::compute::vector<int> device_reversed_vector(PERF_N, context);
50+
51+
perf_timer t;
52+
for(size_t trial = 0; trial < PERF_TRIALS; trial++){
53+
t.start();
54+
boost::compute::reverse_copy(
55+
device_vector.begin(), device_vector.end(),
56+
device_reversed_vector.begin(),
57+
queue
58+
);
59+
queue.finish();
60+
t.stop();
61+
}
62+
std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl;
63+
64+
return 0;
65+
}

perf/perf_stl_reverse_copy.cpp

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
//---------------------------------------------------------------------------//
2+
// Copyright (c) 2015 Jakub Szuppe <j.szuppe@gmail.com>
3+
//
4+
// Distributed under the Boost Software License, Version 1.0
5+
// See accompanying file LICENSE_1_0.txt or copy at
6+
// http://www.boost.org/LICENSE_1_0.txt
7+
//
8+
// See http://kylelutz.github.com/compute for more information.
9+
//---------------------------------------------------------------------------//
10+
11+
#include <algorithm>
12+
#include <iostream>
13+
#include <numeric>
14+
#include <vector>
15+
16+
#include "perf.hpp"
17+
18+
int rand_int()
19+
{
20+
return static_cast<int>((rand() / double(RAND_MAX)) * 25.0);
21+
}
22+
23+
int main(int argc, char *argv[])
24+
{
25+
perf_parse_args(argc, argv);
26+
std::cout << "size: " << PERF_N << std::endl;
27+
28+
// create vector of random numbers on the host
29+
std::vector<int> host_vector(PERF_N);
30+
std::generate(host_vector.begin(), host_vector.end(), rand_int);
31+
32+
// create vector for reversed data
33+
std::vector<int> host_reversed_vector(PERF_N);
34+
35+
perf_timer t;
36+
for(size_t trial = 0; trial < PERF_TRIALS; trial++){
37+
t.start();
38+
std::reverse_copy(host_vector.begin(), host_vector.end(),
39+
host_reversed_vector.begin());
40+
t.stop();
41+
}
42+
std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl;
43+
44+
return 0;
45+
}

perf/perf_thrust_reverse_copy.cu

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
//---------------------------------------------------------------------------//
2+
// Copyright (c) 2015 Jakub Szuppe <j.szuppe@gmail.com>
3+
//
4+
// Distributed under the Boost Software License, Version 1.0
5+
// See accompanying file LICENSE_1_0.txt or copy at
6+
// http://www.boost.org/LICENSE_1_0.txt
7+
//
8+
// See http://kylelutz.github.com/compute for more information.
9+
//---------------------------------------------------------------------------//
10+
11+
#include <algorithm>
12+
#include <cstdlib>
13+
14+
#include <thrust/copy.h>
15+
#include <thrust/device_vector.h>
16+
#include <thrust/generate.h>
17+
#include <thrust/host_vector.h>
18+
#include <thrust/reverse.h>
19+
20+
#include "perf.hpp"
21+
22+
int main(int argc, char *argv[])
23+
{
24+
perf_parse_args(argc, argv);
25+
26+
std::cout << "size: " << PERF_N << std::endl;
27+
thrust::host_vector<int> h_vec = generate_random_vector<int>(PERF_N);
28+
29+
// transfer data to the device
30+
thrust::device_vector<int> d_vec;
31+
d_vec = h_vec;
32+
33+
// device vector for reversed data
34+
thrust::device_vector<int> d_reversed_vec(PERF_N);
35+
36+
perf_timer t;
37+
for(size_t trial = 0; trial < PERF_TRIALS; trial++){
38+
t.start();
39+
thrust::reverse_copy(d_vec.begin(), d_vec.end(), d_reversed_vec.begin());
40+
cudaDeviceSynchronize();
41+
t.stop();
42+
}
43+
std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl;
44+
45+
return 0;
46+
}

test/test_reverse.cpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,14 @@ BOOST_AUTO_TEST_CASE(reverse_copy_int)
6868
bc::reverse_copy(a.begin(), a.end(), b.begin());
6969
BOOST_CHECK(iter == b.end());
7070
CHECK_RANGE_EQUAL(int, 5, b, (4, 3, 2, 1, 0));
71+
72+
iter = bc::reverse_copy(b.begin() + 1, b.end(), a.begin() + 1);
73+
BOOST_CHECK(iter == a.end());
74+
CHECK_RANGE_EQUAL(int, 5, a, (0, 0, 1, 2, 3));
75+
76+
iter = bc::reverse_copy(a.begin(), a.end() - 1, b.begin());
77+
BOOST_CHECK(iter == (b.end() - 1));
78+
CHECK_RANGE_EQUAL(int, 5, b, (2, 1, 0, 0, 0));
7179
}
7280

7381
BOOST_AUTO_TEST_CASE(reverse_copy_counting_iterator)

0 commit comments

Comments
 (0)