Skip to content

Commit 99c2821

Browse files
committed
Benchmarks and version bump
1 parent 55eb6a4 commit 99c2821

File tree

9 files changed

+421
-1
lines changed

9 files changed

+421
-1
lines changed

Project.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
name = "AcceleratedKernels"
22
uuid = "6a4ca0a5-0e36-4168-a932-d9be78d558f1"
33
authors = ["Andrei-Leonard Nicusan <leonard@evophase.co.uk> and contributors"]
4-
version = "0.4.0"
4+
version = "0.4.1"
55

66
[deps]
77
ArgCheck = "dce04be8-c92d-5529-be00-80e4d2c0e197"

benchmark/Project.toml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
[deps]
2+
AcceleratedKernels = "6a4ca0a5-0e36-4168-a932-d9be78d558f1"
3+
KernelAbstractions = "63c18a36-062a-441e-b654-da1e3ab1ce7c"

benchmark/accumulate_1d.jl

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
import AcceleratedKernels as AK
2+
using KernelAbstractions
3+
4+
using BenchmarkTools
5+
using Random
6+
Random.seed!(0)
7+
8+
9+
# Choose the Array backend:
10+
#
11+
# using CUDA
12+
# const ArrayType = CuArray
13+
#
14+
# using AMDGPU
15+
# const ArrayType = ROCArray
16+
#
17+
# using oneAPI
18+
# const ArrayType = oneArray
19+
#
20+
# using Metal
21+
# const ArrayType = MtlArray
22+
#
23+
# using OpenCL
24+
# const ArrayType = CLArray
25+
#
26+
const ArrayType = Array
27+
28+
29+
println("Using ArrayType: ", ArrayType)
30+
31+
32+
n = 1_000_000
33+
34+
35+
println("\n===\nBenchmarking accumulate(+) on $n UInt32 - Base vs. AK")
36+
display(@benchmark Base.accumulate(+, v, init=UInt32(0)) setup=(v = ArrayType(rand(UInt32(1):UInt32(100), n))))
37+
display(@benchmark AK.accumulate(+, v, init=UInt32(0)) setup=(v = ArrayType(rand(UInt32(1):UInt32(100), n))))
38+
39+
40+
println("\n===\nBenchmarking accumulate(+) on $n Int64 - Base vs. AK")
41+
display(@benchmark Base.accumulate(+, v, init=Int64(0)) setup=(v = ArrayType(rand(Int64(1):Int64(100), n))))
42+
display(@benchmark AK.accumulate(+, v, init=Int64(0)) setup=(v = ArrayType(rand(Int64(1):Int64(100), n))))
43+
44+
45+
println("\n===\nBenchmarking accumulate(+) on $n Float32 - Base vs. AK")
46+
display(@benchmark Base.accumulate(+, v, init=Float32(0)) setup=(v = ArrayType(rand(Float32, n))))
47+
display(@benchmark AK.accumulate(+, v, init=Float32(0)) setup=(v = ArrayType(rand(Float32, n))))
48+
49+
50+
println("\n===\nBenchmarking accumulate((x, y) -> sin(x) + cos(y)) on $n Float32 - Base vs. AK")
51+
display(@benchmark Base.accumulate((x, y) -> sin(x) + cos(y), v, init=Float32(0)) setup=(v = ArrayType(rand(Float32, n))))
52+
display(@benchmark AK.accumulate((x, y) -> sin(x) + cos(y), v, init=Float32(0), neutral=Float32(0)) setup=(v = ArrayType(rand(Float32, n))))
53+

benchmark/accumulate_nd.jl

Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,75 @@
1+
import AcceleratedKernels as AK
2+
using KernelAbstractions
3+
4+
using BenchmarkTools
5+
using Random
6+
Random.seed!(0)
7+
8+
9+
# Choose the Array backend:
10+
#
11+
# using CUDA
12+
# const ArrayType = CuArray
13+
#
14+
# using AMDGPU
15+
# const ArrayType = ROCArray
16+
#
17+
# using oneAPI
18+
# const ArrayType = oneArray
19+
#
20+
# using Metal
21+
# const ArrayType = MtlArray
22+
#
23+
# using OpenCL
24+
# const ArrayType = CLArray
25+
#
26+
const ArrayType = Array
27+
28+
29+
println("Using ArrayType: ", ArrayType)
30+
31+
32+
n1 = 3
33+
n2 = 1_000_000
34+
35+
36+
println("\n===\nBenchmarking accumulate(+, dims=1) on $n1 × $n2 UInt32 - Base vs. AK")
37+
display(@benchmark Base.accumulate(+, v, init=UInt32(0), dims=1) setup=(v = ArrayType(rand(UInt32(1):UInt32(100), n1, n2))))
38+
display(@benchmark AK.accumulate(+, v, init=UInt32(0), dims=1) setup=(v = ArrayType(rand(UInt32(1):UInt32(100), n1, n2))))
39+
40+
println("\n===\nBenchmarking accumulate(+, dims=2) on $n1 × $n2 UInt32 - Base vs. AK")
41+
display(@benchmark Base.accumulate(+, v, init=UInt32(0), dims=2) setup=(v = ArrayType(rand(UInt32(1):UInt32(100), n1, n2))))
42+
display(@benchmark AK.accumulate(+, v, init=UInt32(0), dims=2) setup=(v = ArrayType(rand(UInt32(1):UInt32(100), n1, n2))))
43+
44+
45+
46+
47+
println("\n===\nBenchmarking accumulate(+, dims=1) on $n1 × $n2 Int64 - Base vs. AK")
48+
display(@benchmark Base.accumulate(+, v, init=Int64(0), dims=1) setup=(v = ArrayType(rand(Int64(1):Int64(100), n1, n2))))
49+
display(@benchmark AK.accumulate(+, v, init=Int64(0), dims=1) setup=(v = ArrayType(rand(Int64(1):Int64(100), n1, n2))))
50+
51+
println("\n===\nBenchmarking accumulate(+, dims=2) on $n1 × $n2 Int64 - Base vs. AK")
52+
display(@benchmark Base.accumulate(+, v, init=Int64(0), dims=2) setup=(v = ArrayType(rand(Int64(1):Int64(100), n1, n2))))
53+
display(@benchmark AK.reduce(+, v, init=Int64(0), dims=2) setup=(v = ArrayType(rand(Int64(1):Int64(100), n1, n2))))
54+
55+
56+
57+
58+
println("\n===\nBenchmarking accumulate(+, dims=1) on $n1 × $n2 Float32 - Base vs. AK")
59+
display(@benchmark Base.accumulate(+, v, init=Float32(0), dims=1) setup=(v = ArrayType(rand(Float32, n1, n2))))
60+
display(@benchmark AK.accumulate(+, v, init=Float32(0), dims=1) setup=(v = ArrayType(rand(Float32, n1, n2))))
61+
62+
println("\n===\nBenchmarking accumulate(+, dims=2) on $n1 × $n2 Float32 - Base vs. AK")
63+
display(@benchmark Base.accumulate(+, v, init=Float32(0), dims=2) setup=(v = ArrayType(rand(Float32, n1, n2))))
64+
display(@benchmark AK.accumulate(+, v, init=Float32(0), dims=2) setup=(v = ArrayType(rand(Float32, n1, n2))))
65+
66+
67+
68+
69+
println("\n===\nBenchmarking accumulate((x, y) -> sin(x) + cos(y)), dims=1) on $n1 × $n2 Float32 - Base vs. AK")
70+
display(@benchmark Base.accumulate((x, y) -> sin(x) + cos(y), v, init=Float32(0), dims=1) setup=(v = ArrayType(rand(Float32, n1, n2))))
71+
display(@benchmark AK.accumulate((x, y) -> sin(x) + cos(y), v, init=Float32(0), neutral=Float32(0), dims=1) setup=(v = ArrayType(rand(Float32, n1, n2))))
72+
73+
println("\n===\nBenchmarking accumulate((x, y) -> sin(x) + cos(y)), dims=2) on $n1 × $n2 Float32 - Base vs. AK")
74+
display(@benchmark Base.accumulate((x, y) -> sin(x) + cos(y), v, init=Float32(0), dims=2) setup=(v = ArrayType(rand(Float32, n1, n2))))
75+
display(@benchmark AK.accumulate((x, y) -> sin(x) + cos(y), v, init=Float32(0), neutral=Float32(0), dims=2) setup=(v = ArrayType(rand(Float32, n1, n2))))

benchmark/map.jl

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
import AcceleratedKernels as AK
2+
using KernelAbstractions
3+
4+
using BenchmarkTools
5+
using Random
6+
Random.seed!(0)
7+
8+
9+
# Choose the Array backend:
10+
#
11+
# using CUDA
12+
# const ArrayType = CuArray
13+
#
14+
# using AMDGPU
15+
# const ArrayType = ROCArray
16+
#
17+
# using oneAPI
18+
# const ArrayType = oneArray
19+
#
20+
# using Metal
21+
# const ArrayType = MtlArray
22+
#
23+
# using OpenCL
24+
# const ArrayType = CLArray
25+
#
26+
const ArrayType = Array
27+
28+
29+
println("Using ArrayType: ", ArrayType)
30+
31+
32+
n = 1_000_000
33+
f(x) = typeof(x)(2) * x
34+
35+
36+
println("\n===\nBenchmarking map(x->2x) on $n UInt32 - Base vs. AK")
37+
display(@benchmark Base.map(f, v) setup=(v = ArrayType(rand(UInt32(1):UInt32(1_000_000), n))))
38+
display(@benchmark AK.map(f, v) setup=(v = ArrayType(rand(UInt32(1):UInt32(1_000_000), n))))
39+
40+
41+
println("\n===\nBenchmarking map(x->2x) on $n Int64 - Base vs. AK")
42+
display(@benchmark Base.map(f, v) setup=(v = ArrayType(rand(Int64(1):Int64(1_000_000), n))))
43+
display(@benchmark AK.map(f, v) setup=(v = ArrayType(rand(Int64(1):Int64(1_000_000), n))))
44+
45+
46+
println("\n===\nBenchmarking map(x->2x) on $n Float32 - Base vs. AK")
47+
display(@benchmark Base.map(f, v) setup=(v = ArrayType(rand(Float32, n))))
48+
display(@benchmark AK.map(f, v) setup=(v = ArrayType(rand(Float32, n))))
49+
50+
51+
println("\n===\nBenchmarking map(x->sin(x)) on $n Float32 - Base vs. AK")
52+
display(@benchmark Base.map(sin, v) setup=(v = ArrayType(rand(Float32, n))))
53+
display(@benchmark AK.map(sin, v) setup=(v = ArrayType(rand(Float32, n))))
54+

benchmark/mapreduce_1d.jl

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
import AcceleratedKernels as AK
2+
using KernelAbstractions
3+
4+
using BenchmarkTools
5+
using Random
6+
Random.seed!(0)
7+
8+
9+
# Choose the Array backend:
10+
#
11+
# using CUDA
12+
# const ArrayType = CuArray
13+
#
14+
# using AMDGPU
15+
# const ArrayType = ROCArray
16+
#
17+
# using oneAPI
18+
# const ArrayType = oneArray
19+
#
20+
# using Metal
21+
# const ArrayType = MtlArray
22+
#
23+
# using OpenCL
24+
# const ArrayType = CLArray
25+
#
26+
const ArrayType = Array
27+
28+
29+
println("Using ArrayType: ", ArrayType)
30+
31+
32+
n = 1_000_000
33+
34+
35+
println("\n===\nBenchmarking mapreduce(identity, +) on $n UInt32 - Base vs. AK")
36+
display(@benchmark Base.reduce(+, v, init=UInt32(0)) setup=(v = ArrayType(rand(UInt32(1):UInt32(100), n))))
37+
display(@benchmark AK.reduce(+, v, init=UInt32(0)) setup=(v = ArrayType(rand(UInt32(1):UInt32(100), n))))
38+
39+
40+
println("\n===\nBenchmarking mapreduce(identity, +) on $n Int64 - Base vs. AK")
41+
display(@benchmark Base.reduce(+, v, init=Int64(0)) setup=(v = ArrayType(rand(Int64(1):Int64(100), n))))
42+
display(@benchmark AK.reduce(+, v, init=Int64(0)) setup=(v = ArrayType(rand(Int64(1):Int64(100), n))))
43+
44+
45+
println("\n===\nBenchmarking mapreduce(identity, +) on $n Float32 - Base vs. AK")
46+
display(@benchmark Base.reduce(+, v, init=Float32(0)) setup=(v = ArrayType(rand(Float32, n))))
47+
display(@benchmark AK.reduce(+, v, init=Float32(0)) setup=(v = ArrayType(rand(Float32, n))))
48+
49+
50+
println("\n===\nBenchmarking mapreduce!(sin, +) on $n Float32 - Base vs. AK")
51+
display(@benchmark Base.mapreduce(sin, +, v, init=Float32(0)) setup=(v = ArrayType(rand(Float32, n))))
52+
display(@benchmark AK.mapreduce(sin, +, v, init=Float32(0)) setup=(v = ArrayType(rand(Float32, n))))
53+

benchmark/mapreduce_nd.jl

Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,75 @@
1+
import AcceleratedKernels as AK
2+
using KernelAbstractions
3+
4+
using BenchmarkTools
5+
using Random
6+
Random.seed!(0)
7+
8+
9+
# Choose the Array backend:
10+
#
11+
# using CUDA
12+
# const ArrayType = CuArray
13+
#
14+
# using AMDGPU
15+
# const ArrayType = ROCArray
16+
#
17+
# using oneAPI
18+
# const ArrayType = oneArray
19+
#
20+
# using Metal
21+
# const ArrayType = MtlArray
22+
#
23+
# using OpenCL
24+
# const ArrayType = CLArray
25+
#
26+
const ArrayType = Array
27+
28+
29+
println("Using ArrayType: ", ArrayType)
30+
31+
32+
n1 = 3
33+
n2 = 1_000_000
34+
35+
36+
println("\n===\nBenchmarking mapreduce(identity, +, dims=1) on $n1 × $n2 UInt32 - Base vs. AK")
37+
display(@benchmark Base.reduce(+, v, init=UInt32(0), dims=1) setup=(v = ArrayType(rand(UInt32(1):UInt32(100), n1, n2))))
38+
display(@benchmark AK.reduce(+, v, init=UInt32(0), dims=1) setup=(v = ArrayType(rand(UInt32(1):UInt32(100), n1, n2))))
39+
40+
println("\n===\nBenchmarking mapreduce(identity, +, dims=2) on $n1 × $n2 UInt32 - Base vs. AK")
41+
display(@benchmark Base.reduce(+, v, init=UInt32(0), dims=2) setup=(v = ArrayType(rand(UInt32(1):UInt32(100), n1, n2))))
42+
display(@benchmark AK.reduce(+, v, init=UInt32(0), dims=2) setup=(v = ArrayType(rand(UInt32(1):UInt32(100), n1, n2))))
43+
44+
45+
46+
47+
println("\n===\nBenchmarking mapreduce(identity, +, dims=1) on $n1 × $n2 Int64 - Base vs. AK")
48+
display(@benchmark Base.reduce(+, v, init=Int64(0), dims=1) setup=(v = ArrayType(rand(Int64(1):Int64(100), n1, n2))))
49+
display(@benchmark AK.reduce(+, v, init=Int64(0), dims=1) setup=(v = ArrayType(rand(Int64(1):Int64(100), n1, n2))))
50+
51+
println("\n===\nBenchmarking mapreduce(identity, +, dims=2) on $n1 × $n2 Int64 - Base vs. AK")
52+
display(@benchmark Base.reduce(+, v, init=Int64(0), dims=2) setup=(v = ArrayType(rand(Int64(1):Int64(100), n1, n2))))
53+
display(@benchmark AK.reduce(+, v, init=Int64(0), dims=2) setup=(v = ArrayType(rand(Int64(1):Int64(100), n1, n2))))
54+
55+
56+
57+
58+
println("\n===\nBenchmarking mapreduce(identity, +, dims=1) on $n1 × $n2 Float32 - Base vs. AK")
59+
display(@benchmark Base.reduce(+, v, init=Float32(0), dims=1) setup=(v = ArrayType(rand(Float32, n1, n2))))
60+
display(@benchmark AK.reduce(+, v, init=Float32(0), dims=1) setup=(v = ArrayType(rand(Float32, n1, n2))))
61+
62+
println("\n===\nBenchmarking mapreduce(identity, +, dims=2) on $n1 × $n2 Float32 - Base vs. AK")
63+
display(@benchmark Base.reduce(+, v, init=Float32(0), dims=2) setup=(v = ArrayType(rand(Float32, n1, n2))))
64+
display(@benchmark AK.reduce(+, v, init=Float32(0), dims=2) setup=(v = ArrayType(rand(Float32, n1, n2))))
65+
66+
67+
68+
69+
println("\n===\nBenchmarking mapreduce(sin, +, dims=1) on $n1 × $n2 Float32 - Base vs. AK")
70+
display(@benchmark Base.mapreduce(sin, +, v, init=Float32(0), dims=1) setup=(v = ArrayType(rand(Float32, n1, n2))))
71+
display(@benchmark AK.mapreduce(sin, +, v, init=Float32(0), dims=1) setup=(v = ArrayType(rand(Float32, n1, n2))))
72+
73+
println("\n===\nBenchmarking mapreduce(sin, +, dims=2) on $n1 × $n2 Float32 - Base vs. AK")
74+
display(@benchmark Base.mapreduce(sin, +, v, init=Float32(0), dims=2) setup=(v = ArrayType(rand(Float32, n1, n2))))
75+
display(@benchmark AK.mapreduce(sin, +, v, init=Float32(0), dims=2) setup=(v = ArrayType(rand(Float32, n1, n2))))

benchmark/sort.jl

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
import AcceleratedKernels as AK
2+
using KernelAbstractions
3+
4+
using BenchmarkTools
5+
using Random
6+
Random.seed!(0)
7+
8+
9+
# Choose the Array backend:
10+
#
11+
# using CUDA
12+
# const ArrayType = CuArray
13+
#
14+
# using AMDGPU
15+
# const ArrayType = ROCArray
16+
#
17+
# using oneAPI
18+
# const ArrayType = oneArray
19+
#
20+
# using Metal
21+
# const ArrayType = MtlArray
22+
#
23+
# using OpenCL
24+
# const ArrayType = CLArray
25+
#
26+
const ArrayType = Array
27+
28+
29+
println("Using ArrayType: ", ArrayType)
30+
31+
32+
n = 1_000_000
33+
34+
35+
println("\n===\nBenchmarking sort! on $n UInt32 - Base vs. AK")
36+
display(@benchmark Base.sort!(v) setup=(v = ArrayType(rand(UInt32(1):UInt32(1_000_000), n))))
37+
display(@benchmark AK.sort!(v) setup=(v = ArrayType(rand(UInt32(1):UInt32(1_000_000), n))))
38+
39+
40+
println("\n===\nBenchmarking sort! on $n Int64 - Base vs. AK")
41+
display(@benchmark Base.sort!(v) setup=(v = ArrayType(rand(Int64(1):Int64(1_000_000), n))))
42+
display(@benchmark AK.sort!(v) setup=(v = ArrayType(rand(Int64(1):Int64(1_000_000), n))))
43+
44+
45+
println("\n===\nBenchmarking sort! on $n Float32 - Base vs. AK")
46+
display(@benchmark Base.sort!(v) setup=(v = ArrayType(rand(Float32, n))))
47+
display(@benchmark AK.sort!(v) setup=(v = ArrayType(rand(Float32, n))))
48+
49+
50+
println("\n===\nBenchmarking sort!(by=sin) on $n Float32 - Base vs. AK")
51+
display(@benchmark Base.sort!(v, by=sin) setup=(v = ArrayType(rand(Float32, n))))
52+
display(@benchmark AK.sort!(v, by=sin) setup=(v = ArrayType(rand(Float32, n))))
53+

0 commit comments

Comments
 (0)