Skip to content

Commit 1d9d964

Browse files
Merge pull request #4 from thomvet/more-flexible-chunk-resizing
More flexible chunk resizing
2 parents 6ee3863 + 39d8a6e commit 1d9d964

File tree

9 files changed

+290
-72
lines changed

9 files changed

+290
-72
lines changed

.buildkite/pipeline.yml

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
steps:
2+
- label: "Julia 1"
3+
plugins:
4+
- JuliaCI/julia#v1:
5+
version: "1"
6+
- JuliaCI/julia-test#v1:
7+
coverage: false # 1000x slowdown
8+
agents:
9+
queue: "juliagpu"
10+
cuda: "*"
11+
timeout_in_minutes: 30
12+
# Don't run Buildkite if the commit message includes the text [skip tests]
13+
if: build.message !~ /\[skip tests\]/
14+
15+
env:
16+
GROUP: GPU
17+
JULIA_PKG_SERVER: "" # it often struggles with our large artifacts
18+
# SECRET_CODECOV_TOKEN: "..."

Project.toml

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,10 @@ authors = ["Chris Rackauckas <accounts@chrisrackauckas.com>"]
44
version = "0.1.1"
55

66
[deps]
7+
Adapt = "79e6a3ab-5dfb-504d-930d-738a2a938a0e"
78
ArrayInterface = "4fba245c-0d91-5ea0-9b3e-6abc04ee57a9"
8-
LabelledArrays = "2ee39098-c373-598a-b85f-a56591580800"
99
ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210"
10+
LabelledArrays = "2ee39098-c373-598a-b85f-a56591580800"
1011

1112
[compat]
1213
ArrayInterface = "2.6, 3.0"
@@ -18,6 +19,9 @@ julia = "1.6"
1819
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
1920
OrdinaryDiffEq = "1dea7af3-3e70-54e6-95c3-0bf5283fa5ed"
2021
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
22+
RecursiveArrayTools = "731186ca-8d62-57ce-b412-fbd966d074cd"
23+
Pkg = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f"
24+
SafeTestsets = "1bc83da4-3b8d-516f-aca4-4fe02f6d838f"
2125

2226
[targets]
23-
test = ["LinearAlgebra", "OrdinaryDiffEq", "Test"]
27+
test = ["LinearAlgebra", "OrdinaryDiffEq", "Test", "RecursiveArrayTools", "Pkg", "SafeTestsets"]

src/PreallocationTools.jl

Lines changed: 19 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -1,56 +1,49 @@
11
module PreallocationTools
22

3-
using ForwardDiff, ArrayInterface, LabelledArrays
3+
using ForwardDiff, ArrayInterface, LabelledArrays, Adapt
44

55
struct DiffCache{T<:AbstractArray, S<:AbstractArray}
66
du::T
77
dual_du::S
88
end
99

10-
function DiffCache(u::AbstractArray{T}, siz, ::Type{Val{chunk_size}}) where {T, chunk_size}
11-
x = ArrayInterface.restructure(u,zeros(ForwardDiff.Dual{nothing,T,chunk_size}, siz...))
10+
function DiffCache(u::AbstractArray{T}, siz, chunk_size) where {T}
11+
x = adapt(ArrayInterface.parameterless_type(u), zeros(T,(chunk_size+1)*prod(siz)))
1212
DiffCache(u, x)
1313
end
1414

1515
"""
1616
17-
`dualcache(u::AbstractArray, N = Val{default_cache_size(length(u))})`
17+
`dualcache(u::AbstractArray, N = default_cache_size(length(u)))`
1818
1919
Builds a `DualCache` object that stores both a version of the cache for `u`
2020
and for the `Dual` version of `u`, allowing use of pre-cached vectors with
2121
forward-mode automatic differentiation.
2222
2323
"""
24-
dualcache(u::AbstractArray, N=Val{ForwardDiff.pickchunksize(length(u))}) = DiffCache(u, size(u), N)
24+
dualcache(u::AbstractArray, N=ForwardDiff.pickchunksize(length(u))) = DiffCache(u, size(u), N)
2525

26-
chunksize(::Type{ForwardDiff.Dual{T,V,N}}) where {T,V,N} = N
26+
"""
27+
28+
`get_tmp(dc::DiffCache, u)`
29+
30+
Returns the `Dual` or normal cache array stored in `dc` based on the type of `u`.
2731
32+
"""
2833
function get_tmp(dc::DiffCache, u::T) where T<:ForwardDiff.Dual
29-
x = reinterpret(T, dc.dual_du)
30-
if chunksize(T) === chunksize(eltype(dc.dual_du))
31-
x
32-
else
33-
@view x[axes(dc.du)...]
34-
end
34+
nelem = div(sizeof(T), sizeof(eltype(dc.dual_du)))*length(dc.du)
35+
ArrayInterface.restructure(dc.du, reinterpret(T, view(dc.dual_du, 1:nelem)))
3536
end
3637

3738
function get_tmp(dc::DiffCache, u::AbstractArray{T}) where T<:ForwardDiff.Dual
38-
x = reinterpret(T, dc.dual_du)
39-
if chunksize(T) === chunksize(eltype(dc.dual_du))
40-
x
41-
else
42-
@view x[axes(dc.du)...]
43-
end
39+
nelem = div(sizeof(T), sizeof(eltype(dc.dual_du)))*length(dc.du)
40+
ArrayInterface.restructure(dc.du, reinterpret(T, view(dc.dual_du, 1:nelem)))
4441
end
4542

4643
function get_tmp(dc::DiffCache, u::LabelledArrays.LArray{T,N,D,Syms}) where {T,N,D,Syms}
47-
x = reinterpret(T, dc.dual_du.__x)
48-
_x = if chunksize(T) === chunksize(eltype(dc.dual_du))
49-
x
50-
else
51-
@view x[axes(dc.du)...]
52-
end
53-
LabelledArrays.LArray{T,N,D,Syms}(_x)
44+
nelem = div(sizeof(T), sizeof(eltype(dc.dual_du)))*length(dc.du)
45+
_x = ArrayInterface.restructure(dc.du, reinterpret(T, view(dc.dual_du, 1:nelem)))
46+
LabelledArrays.LArray{T,N,D,Syms}(_x)
5447
end
5548

5649
get_tmp(dc::DiffCache, u::Number) = dc.du
@@ -62,6 +55,7 @@ get_tmp(dc::DiffCache, u::AbstractArray) = dc.du
6255
A lazily allocated buffer object. Given a vector `u`, `b[u]` returns a `Vector` of the
6356
same element type and length `f(length(u))` (defaulting to the same length), which is
6457
allocated as needed and then cached within `b` for subsequent usage.
58+
6559
"""
6660
struct LazyBufferCache{F<:Function}
6761
bufs::Dict # a dictionary mapping types to buffers

test/GPU/Project.toml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
[deps]
2+
CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
3+
PreallocationTools = "d236fae5-4411-538c-8e31-a6e3d9e00b46"

test/core_dispatch.jl

Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
using LinearAlgebra, OrdinaryDiffEq, Test, PreallocationTools, ForwardDiff, LabelledArrays, RecursiveArrayTools
2+
3+
#Base Array tests
4+
chunk_size = 5
5+
u0_B = ones(5, 5)
6+
dual_B = zeros(ForwardDiff.Dual{ForwardDiff.Tag{typeof(something), Float64}, Float64, chunk_size}, 2, 2)
7+
cache_B = dualcache(u0_B, chunk_size)
8+
tmp_du_BA = get_tmp(cache_B, u0_B)
9+
tmp_dual_du_BA = get_tmp(cache_B, dual_B)
10+
tmp_du_BN = get_tmp(cache_B, u0_B[1])
11+
tmp_dual_du_BN = get_tmp(cache_B, dual_B[1])
12+
@test size(tmp_du_BA) == size(u0_B)
13+
@test typeof(tmp_du_BA) == typeof(u0_B)
14+
@test eltype(tmp_du_BA) == eltype(u0_B)
15+
@test size(tmp_dual_du_BA) == size(u0_B)
16+
@test typeof(tmp_dual_du_BA) == typeof(dual_B)
17+
@test eltype(tmp_dual_du_BA) == eltype(dual_B)
18+
@test size(tmp_du_BN) == size(u0_B)
19+
@test typeof(tmp_du_BN) == typeof(u0_B)
20+
@test eltype(tmp_du_BN) == eltype(u0_B)
21+
@test size(tmp_dual_du_BN) == size(u0_B)
22+
@test typeof(tmp_dual_du_BN) == typeof(dual_B)
23+
@test eltype(tmp_dual_du_BN) == eltype(dual_B)
24+
25+
#LArray tests
26+
chunk_size = 4
27+
u0_L = LArray((2,2); a=1.0, b=1.0, c=1.0, d=1.0)
28+
zerodual = zero(ForwardDiff.Dual{ForwardDiff.Tag{typeof(something), Float64}, Float64, chunk_size})
29+
dual_L = LArray((2,2); a=zerodual, b=zerodual, c=zerodual, d=zerodual)
30+
cache_L = dualcache(u0_L, chunk_size)
31+
tmp_du_LA = get_tmp(cache_L, u0_L)
32+
tmp_dual_du_LA = get_tmp(cache_L, dual_L)
33+
tmp_du_LN = get_tmp(cache_L, u0_L[1])
34+
tmp_dual_du_LN = get_tmp(cache_L, dual_L[1])
35+
@test size(tmp_du_LA) == size(u0_L)
36+
@test typeof(tmp_du_LA) == typeof(u0_L)
37+
@test eltype(tmp_du_LA) == eltype(u0_L)
38+
@test size(tmp_dual_du_LA) == size(u0_L)
39+
@test typeof(tmp_dual_du_LA) == typeof(dual_L)
40+
@test eltype(tmp_dual_du_LA) == eltype(dual_L)
41+
@test size(tmp_du_LN) == size(u0_L)
42+
@test typeof(tmp_du_LN) == typeof(u0_L)
43+
@test eltype(tmp_du_LN) == eltype(u0_L)
44+
@test size(tmp_dual_du_LN) == size(u0_L)
45+
@test typeof(tmp_dual_du_LN) == typeof(dual_L)
46+
@test eltype(tmp_dual_du_LN) == eltype(dual_L)
47+
48+
#ArrayPartition tests
49+
u0_AP = ArrayPartition(ones(2,2), ones(3,3))
50+
dual_a = zeros(ForwardDiff.Dual{ForwardDiff.Tag{typeof(something), Float64}, Float64, chunk_size}, 2, 2)
51+
dual_b = zeros(ForwardDiff.Dual{ForwardDiff.Tag{typeof(something), Float64}, Float64, chunk_size}, 3, 3)
52+
dual_AP = ArrayPartition(dual_a, dual_b)
53+
cache_AP = dualcache(u0_AP, chunk_size)
54+
tmp_du_APA = get_tmp(cache_AP, u0_AP)
55+
tmp_dual_du_APA = get_tmp(cache_AP, dual_AP)
56+
tmp_du_APN = get_tmp(cache_AP, u0_AP[1])
57+
tmp_dual_du_APN = get_tmp(cache_AP, dual_AP[1])
58+
@test size(tmp_du_APA) == size(u0_AP)
59+
@test typeof(tmp_du_APA) == typeof(u0_AP)
60+
@test eltype(tmp_du_APA) == eltype(u0_AP)
61+
@test size(tmp_dual_du_APA) == size(u0_AP)
62+
@test typeof(tmp_dual_du_APA) == typeof(dual_AP)
63+
@test eltype(tmp_dual_du_APA) == eltype(dual_AP)
64+
@test size(tmp_du_APN) == size(u0_AP)
65+
@test typeof(tmp_du_APN) == typeof(u0_AP)
66+
@test eltype(tmp_du_APN) == eltype(u0_AP)
67+
@test size(tmp_dual_du_APN) == size(u0_AP)
68+
@test typeof(tmp_dual_du_APN) == typeof(dual_AP)
69+
@test eltype(tmp_dual_du_APN) == eltype(dual_AP)

test/core_odes.jl

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
using LinearAlgebra, OrdinaryDiffEq, Test, PreallocationTools, LabelledArrays, RecursiveArrayTools
2+
3+
#Base array
4+
function foo(du, u, (A, tmp), t)
5+
tmp = get_tmp(tmp, u)
6+
mul!(tmp, A, u)
7+
@. du = u + tmp
8+
nothing
9+
end
10+
#with defined chunk_size
11+
chunk_size = 5
12+
u0 = ones(5, 5)
13+
A = ones(5,5)
14+
cache = dualcache(zeros(5,5), chunk_size)
15+
prob = ODEProblem(foo, u0, (0., 1.0), (A, cache))
16+
sol = solve(prob, TRBDF2(chunk_size=chunk_size))
17+
@test sol.retcode == :Success
18+
19+
#with auto-detected chunk_size
20+
prob = ODEProblem(foo, ones(5, 5), (0., 1.0), (ones(5,5), dualcache(zeros(5,5))))
21+
sol = solve(prob, TRBDF2())
22+
@test sol.retcode == :Success
23+
24+
#Base array with LBC
25+
function foo(du, u, (A, lbc), t)
26+
tmp = lbc[u]
27+
mul!(tmp, A, u)
28+
@. du = u + tmp
29+
nothing
30+
end
31+
prob = ODEProblem(foo, ones(5, 5), (0., 1.0), (ones(5,5), LazyBufferCache()))
32+
sol = solve(prob, TRBDF2())
33+
@test sol.retcode == :Success
34+
35+
#LArray
36+
A = LArray((2,2); a=1.0, b=1.0, c=1.0, d=1.0)
37+
c = LArray((2,2); a=0.0, b=0.0, c=0.0, d=0.0)
38+
u0 = LArray((2,2); a=1.0, b=1.0, c=1.0, d=1.0)
39+
function foo(du, u, (A, tmp), t)
40+
tmp = get_tmp(tmp, u)
41+
mul!(tmp, A, u)
42+
@. du = u + tmp
43+
nothing
44+
end
45+
#with specified chunk_size
46+
chunk_size = 4
47+
prob = ODEProblem(foo, u0, (0., 1.0), (A, dualcache(c, chunk_size)))
48+
sol = solve(prob, TRBDF2(chunk_size = chunk_size))
49+
@test sol.retcode == :Success
50+
#with auto-detected chunk_size
51+
prob = ODEProblem(foo, u0, (0., 1.0), (A, dualcache(c)))
52+
sol = solve(prob, TRBDF2())
53+
@test sol.retcode == :Success

test/core_resizing.jl

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
using Test, PreallocationTools, ForwardDiff
2+
3+
randmat = rand(5, 3)
4+
sto = similar(randmat)
5+
stod = dualcache(sto)
6+
7+
function claytonsample!(sto, τ, α; randmat=randmat)
8+
sto = get_tmp(sto, τ)
9+
sto .= randmat
10+
τ == 0 && return sto
11+
12+
n = size(sto, 1)
13+
for i in 1:n
14+
v = sto[i, 2]
15+
u = sto[i, 1]
16+
sto[i, 1] = (1 - u^(-τ) + u^(-τ)*v^(-/(1 + τ))))^(-1/τ)*α
17+
sto[i, 2] = (1 - u^(-τ) + u^(-τ)*v^(-/(1 + τ))))^(-1/τ)
18+
end
19+
return sto
20+
end
21+
22+
#taking the derivative of claytonsample! with respect to τ only
23+
df1 = ForwardDiff.derivative-> claytonsample!(stod, τ, 0.0), 0.3)
24+
@test size(randmat) == size(df1)
25+
26+
#calculating the jacobian of claytonsample! with respect to τ and α
27+
df2 = ForwardDiff.jacobian(x -> claytonsample!(stod, x[1], x[2]), [0.3; 0.0]) #should give a 15x2 array,
28+
#because ForwardDiff flattens the output of jacobian, see: https://juliadiff.org/ForwardDiff.jl/stable/user/api/#ForwardDiff.jacobian
29+
30+
@test (length(randmat), 2) == size(df2)
31+
@test df1[1:5,2] df2[6:10,1]

test/gpu_all.jl

Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,76 @@
1+
using LinearAlgebra, OrdinaryDiffEq, Test, PreallocationTools, CUDA
2+
3+
#Dispatch tests
4+
u0_CU = cu(ones(5,5))
5+
dual_CU = cu(zeros(ForwardDiff.Dual{ForwardDiff.Tag{typeof(something), Float64}, Float64, chunk_size}, 2, 2))
6+
cache_CU = dualcache(u0_CU, chunk_size)
7+
tmp_du_CUA = get_tmp(cache_CU, u0_CU)
8+
tmp_dual_du_CUA = get_tmp(cache_CU, dual_CU)
9+
tmp_du_CUN = get_tmp(cache_CU, u0_CU[1])
10+
tmp_dual_du_CUN = get_tmp(cache_CU, dual_CU[1])
11+
@test typeof(cache_CU.dual_du) == typeof(u0_CU) #check that dual cache array is a GPU array for performance reasons.
12+
@test size(tmp_du_CUA) == size(u0_CU)
13+
@test typeof(tmp_du_CUA) == typeof(u0_CU)
14+
@test eltype(tmp_du_CUA) == eltype(u0_CU)
15+
@test size(tmp_dual_du_CUA) == size(u0_CU)
16+
@test typeof(tmp_dual_du_CUA) == typeof(dual_CU)
17+
@test eltype(tmp_dual_du_CUA) == eltype(dual_CU)
18+
@test size(tmp_du_CUN) == size(u0_CU)
19+
@test typeof(tmp_du_CUN) == typeof(u0_CU)
20+
@test eltype(tmp_du_CUN) == eltype(u0_CU)
21+
@test size(tmp_dual_du_CUN) == size(u0_CU)
22+
@test typeof(tmp_dual_du_CUN) == typeof(dual_CU)
23+
@test eltype(tmp_dual_du_CUN) == eltype(dual_CU)
24+
25+
#ODE tests
26+
function foo(du, u, (A, tmp), t)
27+
tmp = get_tmp(tmp, u)
28+
mul!(tmp, A, u)
29+
@. du = u + tmp
30+
nothing
31+
end
32+
#with specified chunk_size
33+
chunk_size = 10
34+
u0 = cu(rand(10,10)) #example kept small for test purposes.
35+
A = cu(-randn(10,10))
36+
cache = dualcache(A, chunk_size)
37+
prob = ODEProblem(foo, u0, (0.0f0,1.0f0), (A, cache))
38+
sol = solve(prob, TRBDF2(chunk_size = chunk_size))
39+
@test sol.retcode == :Success
40+
41+
#with auto-detected chunk_size
42+
u0 = cu(rand(10,10)) #example kept small for test purposes.
43+
A = cu(-randn(10,10))
44+
cache = dualcache(A)
45+
prob = ODEProblem(foo, u0, (0.0f0,1.0f0), (A, cache))
46+
sol = solve(prob, TRBDF2())
47+
@test sol.retcode == :Success
48+
49+
randmat = cu(rand(5, 3))
50+
sto = similar(randmat)
51+
stod = dualcache(sto)
52+
function claytonsample!(sto, τ, α; randmat=randmat)
53+
sto = get_tmp(sto, τ)
54+
sto .= randmat
55+
τ == 0 && return sto
56+
n = size(sto, 1)
57+
for i in 1:n
58+
v = sto[i, 2]
59+
u = sto[i, 1]
60+
sto[i, 1] = (1 - u^(-τ) + u^(-τ)*v^(-/(1 + τ))))^(-1/τ)*α
61+
sto[i, 2] = (1 - u^(-τ) + u^(-τ)*v^(-/(1 + τ))))^(-1/τ)
62+
end
63+
return sto
64+
end
65+
66+
#resizing tests
67+
#taking the derivative of claytonsample! with respect to τ only
68+
df1 = ForwardDiff.derivative-> claytonsample!(stod, τ, 0.0), 0.3)
69+
@test size(randmat) == size(df1)
70+
71+
#calculating the jacobian of claytonsample! with respect to τ and α
72+
df2 = ForwardDiff.jacobian(x -> claytonsample!(stod, x[1], x[2]), [0.3; 0.0]) #should give a 15x2 array,
73+
#because ForwardDiff flattens the output of jacobian, see: https://juliadiff.org/ForwardDiff.jl/stable/user/api/#ForwardDiff.jacobian
74+
75+
@test (length(randmat), 2) == size(df2)
76+
@test df1[1:5,2] df2[6:10,1]

0 commit comments

Comments
 (0)