Merge pull request #4 from thomvet/more-flexible-chunk-resizing

ChrisRackauckas · web-flow · commit 1d9d9646606b · 2021-11-02T08:52:48.000-04:00
More flexible chunk resizing
diff --git a/.buildkite/pipeline.yml b/.buildkite/pipeline.yml
@@ -0,0 +1,18 @@
+steps:
+  - label: "Julia 1"
+    plugins:
+      - JuliaCI/julia#v1:
+          version: "1"
+      - JuliaCI/julia-test#v1:
+           coverage: false # 1000x slowdown
+    agents:
+      queue: "juliagpu"
+      cuda: "*"
+    timeout_in_minutes: 30
+    # Don't run Buildkite if the commit message includes the text [skip tests]
+    if: build.message !~ /\[skip tests\]/
+
+env:
+  GROUP: GPU
+  JULIA_PKG_SERVER: "" # it often struggles with our large artifacts
+  # SECRET_CODECOV_TOKEN: "..."
diff --git a/Project.toml b/Project.toml
@@ -4,9 +4,10 @@ authors = ["Chris Rackauckas <accounts@chrisrackauckas.com>"]
 version = "0.1.1"
 
 [deps]
+Adapt = "79e6a3ab-5dfb-504d-930d-738a2a938a0e"
 ArrayInterface = "4fba245c-0d91-5ea0-9b3e-6abc04ee57a9"
-LabelledArrays = "2ee39098-c373-598a-b85f-a56591580800"
 ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210"
+LabelledArrays = "2ee39098-c373-598a-b85f-a56591580800"
 
 [compat]
 ArrayInterface = "2.6, 3.0"
@@ -18,6 +19,9 @@ julia = "1.6"
 LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
 OrdinaryDiffEq = "1dea7af3-3e70-54e6-95c3-0bf5283fa5ed"
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
+RecursiveArrayTools = "731186ca-8d62-57ce-b412-fbd966d074cd"
+Pkg = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f"
+SafeTestsets = "1bc83da4-3b8d-516f-aca4-4fe02f6d838f"
 
 [targets]
-test = ["LinearAlgebra", "OrdinaryDiffEq", "Test"]
+test = ["LinearAlgebra", "OrdinaryDiffEq", "Test", "RecursiveArrayTools", "Pkg", "SafeTestsets"]
diff --git a/src/PreallocationTools.jl b/src/PreallocationTools.jl
@@ -1,56 +1,49 @@
 module PreallocationTools
 
-using ForwardDiff, ArrayInterface, LabelledArrays
+using ForwardDiff, ArrayInterface, LabelledArrays, Adapt
 
 struct DiffCache{T<:AbstractArray, S<:AbstractArray}
     du::T
     dual_du::S
 end
 
-function DiffCache(u::AbstractArray{T}, siz, ::Type{Val{chunk_size}}) where {T, chunk_size}
-    x = ArrayInterface.restructure(u,zeros(ForwardDiff.Dual{nothing,T,chunk_size}, siz...))
+function DiffCache(u::AbstractArray{T}, siz, chunk_size) where {T}
+    x = adapt(ArrayInterface.parameterless_type(u), zeros(T,(chunk_size+1)*prod(siz)))
     DiffCache(u, x)
 end
 
 """
 
-`dualcache(u::AbstractArray, N = Val{default_cache_size(length(u))})`
+`dualcache(u::AbstractArray, N = default_cache_size(length(u)))`
 
 Builds a `DualCache` object that stores both a version of the cache for `u`
 and for the `Dual` version of `u`, allowing use of pre-cached vectors with
 forward-mode automatic differentiation.
 
 """
-dualcache(u::AbstractArray, N=Val{ForwardDiff.pickchunksize(length(u))}) = DiffCache(u, size(u), N)
+dualcache(u::AbstractArray, N=ForwardDiff.pickchunksize(length(u))) = DiffCache(u, size(u), N)
 
-chunksize(::Type{ForwardDiff.Dual{T,V,N}}) where {T,V,N} = N
+"""
+
+`get_tmp(dc::DiffCache, u)`
+
+Returns the `Dual` or normal cache array stored in `dc` based on the type of `u`. 
 
+"""
 function get_tmp(dc::DiffCache, u::T) where T<:ForwardDiff.Dual
-  x = reinterpret(T, dc.dual_du)
-  if chunksize(T) === chunksize(eltype(dc.dual_du))
-      x
-  else
-      @view x[axes(dc.du)...]
-  end
+    nelem = div(sizeof(T), sizeof(eltype(dc.dual_du)))*length(dc.du)
+    ArrayInterface.restructure(dc.du, reinterpret(T, view(dc.dual_du, 1:nelem)))
 end
 
 function get_tmp(dc::DiffCache, u::AbstractArray{T}) where T<:ForwardDiff.Dual
-  x = reinterpret(T, dc.dual_du)
-  if chunksize(T) === chunksize(eltype(dc.dual_du))
-      x
-  else
-      @view x[axes(dc.du)...]
-  end
+    nelem = div(sizeof(T), sizeof(eltype(dc.dual_du)))*length(dc.du)
+    ArrayInterface.restructure(dc.du, reinterpret(T, view(dc.dual_du, 1:nelem)))
 end
 
 function get_tmp(dc::DiffCache, u::LabelledArrays.LArray{T,N,D,Syms}) where {T,N,D,Syms}
-  x = reinterpret(T, dc.dual_du.__x)
-  _x = if chunksize(T) === chunksize(eltype(dc.dual_du))
-      x
-  else
-      @view x[axes(dc.du)...]
-  end
-  LabelledArrays.LArray{T,N,D,Syms}(_x)
+    nelem = div(sizeof(T), sizeof(eltype(dc.dual_du)))*length(dc.du)
+    _x = ArrayInterface.restructure(dc.du, reinterpret(T, view(dc.dual_du, 1:nelem)))
+    LabelledArrays.LArray{T,N,D,Syms}(_x)
 end
 
 get_tmp(dc::DiffCache, u::Number) = dc.du
@@ -62,6 +55,7 @@ get_tmp(dc::DiffCache, u::AbstractArray) = dc.du
 A lazily allocated buffer object.  Given a vector `u`, `b[u]` returns a `Vector` of the
 same element type and length `f(length(u))` (defaulting to the same length), which is
 allocated as needed and then cached within `b` for subsequent usage.
+
 """
 struct LazyBufferCache{F<:Function}
     bufs::Dict # a dictionary mapping types to buffers
diff --git a/test/GPU/Project.toml b/test/GPU/Project.toml
@@ -0,0 +1,3 @@
+[deps]
+CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
+PreallocationTools = "d236fae5-4411-538c-8e31-a6e3d9e00b46"
diff --git a/test/core_dispatch.jl b/test/core_dispatch.jl
@@ -0,0 +1,69 @@
+using LinearAlgebra, OrdinaryDiffEq, Test, PreallocationTools, ForwardDiff, LabelledArrays, RecursiveArrayTools
+
+#Base Array tests
+chunk_size = 5
+u0_B = ones(5, 5)
+dual_B = zeros(ForwardDiff.Dual{ForwardDiff.Tag{typeof(something), Float64}, Float64, chunk_size}, 2, 2)
+cache_B = dualcache(u0_B, chunk_size)
+tmp_du_BA = get_tmp(cache_B, u0_B)
+tmp_dual_du_BA = get_tmp(cache_B, dual_B)
+tmp_du_BN = get_tmp(cache_B, u0_B[1])
+tmp_dual_du_BN = get_tmp(cache_B, dual_B[1])
+@test size(tmp_du_BA) == size(u0_B)
+@test typeof(tmp_du_BA) == typeof(u0_B)
+@test eltype(tmp_du_BA) == eltype(u0_B)
+@test size(tmp_dual_du_BA) == size(u0_B)
+@test typeof(tmp_dual_du_BA) == typeof(dual_B)
+@test eltype(tmp_dual_du_BA) == eltype(dual_B) 
+@test size(tmp_du_BN) == size(u0_B) 
+@test typeof(tmp_du_BN) == typeof(u0_B)
+@test eltype(tmp_du_BN) == eltype(u0_B)
+@test size(tmp_dual_du_BN) == size(u0_B)
+@test typeof(tmp_dual_du_BN) == typeof(dual_B)
+@test eltype(tmp_dual_du_BN) == eltype(dual_B)
+
+#LArray tests
+chunk_size = 4
+u0_L = LArray((2,2); a=1.0, b=1.0, c=1.0, d=1.0)
+zerodual = zero(ForwardDiff.Dual{ForwardDiff.Tag{typeof(something), Float64}, Float64, chunk_size})
+dual_L = LArray((2,2); a=zerodual, b=zerodual, c=zerodual, d=zerodual) 
+cache_L = dualcache(u0_L, chunk_size)
+tmp_du_LA = get_tmp(cache_L, u0_L)
+tmp_dual_du_LA = get_tmp(cache_L, dual_L)
+tmp_du_LN = get_tmp(cache_L, u0_L[1])
+tmp_dual_du_LN = get_tmp(cache_L, dual_L[1])
+@test size(tmp_du_LA) == size(u0_L)
+@test typeof(tmp_du_LA) == typeof(u0_L)
+@test eltype(tmp_du_LA) == eltype(u0_L)
+@test size(tmp_dual_du_LA) == size(u0_L)
+@test typeof(tmp_dual_du_LA) == typeof(dual_L)
+@test eltype(tmp_dual_du_LA) == eltype(dual_L) 
+@test size(tmp_du_LN) == size(u0_L) 
+@test typeof(tmp_du_LN) == typeof(u0_L)
+@test eltype(tmp_du_LN) == eltype(u0_L)
+@test size(tmp_dual_du_LN) == size(u0_L)
+@test typeof(tmp_dual_du_LN) == typeof(dual_L)
+@test eltype(tmp_dual_du_LN) == eltype(dual_L) 
+
+#ArrayPartition tests
+u0_AP = ArrayPartition(ones(2,2), ones(3,3))
+dual_a = zeros(ForwardDiff.Dual{ForwardDiff.Tag{typeof(something), Float64}, Float64, chunk_size}, 2, 2)
+dual_b = zeros(ForwardDiff.Dual{ForwardDiff.Tag{typeof(something), Float64}, Float64, chunk_size}, 3, 3)
+dual_AP = ArrayPartition(dual_a, dual_b) 
+cache_AP = dualcache(u0_AP, chunk_size)
+tmp_du_APA = get_tmp(cache_AP, u0_AP)
+tmp_dual_du_APA = get_tmp(cache_AP, dual_AP)
+tmp_du_APN = get_tmp(cache_AP, u0_AP[1])
+tmp_dual_du_APN = get_tmp(cache_AP, dual_AP[1])
+@test size(tmp_du_APA) == size(u0_AP)
+@test typeof(tmp_du_APA) == typeof(u0_AP)
+@test eltype(tmp_du_APA) == eltype(u0_AP)
+@test size(tmp_dual_du_APA) == size(u0_AP)
+@test typeof(tmp_dual_du_APA) == typeof(dual_AP)
+@test eltype(tmp_dual_du_APA) == eltype(dual_AP) 
+@test size(tmp_du_APN) == size(u0_AP) 
+@test typeof(tmp_du_APN) == typeof(u0_AP)
+@test eltype(tmp_du_APN) == eltype(u0_AP)
+@test size(tmp_dual_du_APN) == size(u0_AP)
+@test typeof(tmp_dual_du_APN) == typeof(dual_AP)
+@test eltype(tmp_dual_du_APN) == eltype(dual_AP)  
diff --git a/test/core_odes.jl b/test/core_odes.jl
@@ -0,0 +1,53 @@
+using LinearAlgebra, OrdinaryDiffEq, Test, PreallocationTools, LabelledArrays, RecursiveArrayTools
+
+#Base array
+function foo(du, u, (A, tmp), t)
+    tmp = get_tmp(tmp, u)
+    mul!(tmp, A, u)
+    @. du = u + tmp
+    nothing
+end
+#with defined chunk_size
+chunk_size = 5
+u0 = ones(5, 5)
+A = ones(5,5)
+cache = dualcache(zeros(5,5), chunk_size)
+prob = ODEProblem(foo, u0, (0., 1.0), (A, cache))
+sol = solve(prob, TRBDF2(chunk_size=chunk_size))
+@test sol.retcode == :Success
+
+#with auto-detected chunk_size
+prob = ODEProblem(foo, ones(5, 5), (0., 1.0), (ones(5,5), dualcache(zeros(5,5))))
+sol = solve(prob, TRBDF2())
+@test sol.retcode == :Success
+
+#Base array with LBC
+function foo(du, u, (A, lbc), t)
+    tmp = lbc[u]
+    mul!(tmp, A, u)
+    @. du = u + tmp
+nothing
+end
+prob = ODEProblem(foo, ones(5, 5), (0., 1.0), (ones(5,5), LazyBufferCache()))
+sol = solve(prob, TRBDF2())
+@test sol.retcode == :Success
+
+#LArray
+A = LArray((2,2); a=1.0, b=1.0, c=1.0, d=1.0)
+c = LArray((2,2); a=0.0, b=0.0, c=0.0, d=0.0)
+u0 = LArray((2,2); a=1.0, b=1.0, c=1.0, d=1.0)
+function foo(du, u, (A, tmp), t)
+    tmp = get_tmp(tmp, u)
+    mul!(tmp, A, u)
+    @. du = u + tmp
+    nothing
+end
+#with specified chunk_size
+chunk_size = 4
+prob = ODEProblem(foo, u0, (0., 1.0), (A, dualcache(c, chunk_size)))
+sol = solve(prob, TRBDF2(chunk_size = chunk_size))
+@test sol.retcode == :Success
+#with auto-detected chunk_size
+prob = ODEProblem(foo, u0, (0., 1.0), (A, dualcache(c)))
+sol = solve(prob, TRBDF2())
+@test sol.retcode == :Success
diff --git a/test/core_resizing.jl b/test/core_resizing.jl
@@ -0,0 +1,31 @@
+using Test, PreallocationTools, ForwardDiff
+
+randmat = rand(5, 3)
+sto = similar(randmat)
+stod = dualcache(sto)
+
+function claytonsample!(sto, τ, α; randmat=randmat)
+    sto = get_tmp(sto, τ)
+    sto .= randmat
+    τ == 0 && return sto
+
+    n = size(sto, 1)
+    for i in 1:n
+        v = sto[i, 2]
+        u = sto[i, 1]
+        sto[i, 1] = (1 - u^(-τ) + u^(-τ)*v^(-(τ/(1 + τ))))^(-1/τ)*α
+        sto[i, 2] = (1 - u^(-τ) + u^(-τ)*v^(-(τ/(1 + τ))))^(-1/τ)
+    end
+    return sto
+end
+
+#taking the derivative of claytonsample! with respect to τ only
+df1 = ForwardDiff.derivative(τ -> claytonsample!(stod, τ, 0.0), 0.3)
+@test size(randmat) == size(df1)
+
+#calculating the jacobian of claytonsample! with respect to τ and α
+df2 = ForwardDiff.jacobian(x -> claytonsample!(stod, x[1], x[2]), [0.3; 0.0]) #should give a 15x2 array,
+#because ForwardDiff flattens the output of jacobian, see: https://juliadiff.org/ForwardDiff.jl/stable/user/api/#ForwardDiff.jacobian
+
+@test (length(randmat), 2) == size(df2)
+@test df1[1:5,2] ≈ df2[6:10,1]
diff --git a/test/gpu_all.jl b/test/gpu_all.jl
@@ -0,0 +1,76 @@
+using LinearAlgebra, OrdinaryDiffEq, Test, PreallocationTools, CUDA
+
+#Dispatch tests
+u0_CU = cu(ones(5,5))
+dual_CU = cu(zeros(ForwardDiff.Dual{ForwardDiff.Tag{typeof(something), Float64}, Float64, chunk_size}, 2, 2))
+cache_CU = dualcache(u0_CU, chunk_size)
+tmp_du_CUA = get_tmp(cache_CU, u0_CU)
+tmp_dual_du_CUA = get_tmp(cache_CU, dual_CU)
+tmp_du_CUN = get_tmp(cache_CU, u0_CU[1])
+tmp_dual_du_CUN = get_tmp(cache_CU, dual_CU[1])
+@test typeof(cache_CU.dual_du) == typeof(u0_CU) #check that dual cache array is a GPU array for performance reasons.
+@test size(tmp_du_CUA) == size(u0_CU)                
+@test typeof(tmp_du_CUA) == typeof(u0_CU)
+@test eltype(tmp_du_CUA) == eltype(u0_CU)
+@test size(tmp_dual_du_CUA) == size(u0_CU)
+@test typeof(tmp_dual_du_CUA) == typeof(dual_CU)
+@test eltype(tmp_dual_du_CUA) == eltype(dual_CU) 
+@test size(tmp_du_CUN) == size(u0_CU) 
+@test typeof(tmp_du_CUN) == typeof(u0_CU)
+@test eltype(tmp_du_CUN) == eltype(u0_CU)
+@test size(tmp_dual_du_CUN) == size(u0_CU)
+@test typeof(tmp_dual_du_CUN) == typeof(dual_CU)
+@test eltype(tmp_dual_du_CUN) == eltype(dual_CU) 
+
+#ODE tests
+function foo(du, u, (A, tmp), t)
+    tmp = get_tmp(tmp, u)
+    mul!(tmp, A, u)
+    @. du = u + tmp
+    nothing
+end
+#with specified chunk_size
+chunk_size = 10
+u0 = cu(rand(10,10)) #example kept small for test purposes.
+A  = cu(-randn(10,10))                  
+cache = dualcache(A, chunk_size)
+prob = ODEProblem(foo, u0, (0.0f0,1.0f0), (A, cache))
+sol = solve(prob, TRBDF2(chunk_size = chunk_size))
+@test sol.retcode == :Success
+
+#with auto-detected chunk_size
+u0 = cu(rand(10,10)) #example kept small for test purposes.
+A  = cu(-randn(10,10))                  
+cache = dualcache(A)
+prob = ODEProblem(foo, u0, (0.0f0,1.0f0), (A, cache))
+sol = solve(prob, TRBDF2())
+@test sol.retcode == :Success
+
+randmat = cu(rand(5, 3))
+sto = similar(randmat)
+stod = dualcache(sto)
+function claytonsample!(sto, τ, α; randmat=randmat)
+    sto = get_tmp(sto, τ)
+        sto .= randmat
+    τ == 0 && return sto
+    n = size(sto, 1)
+    for i in 1:n
+        v = sto[i, 2]
+        u = sto[i, 1]
+        sto[i, 1] = (1 - u^(-τ) + u^(-τ)*v^(-(τ/(1 + τ))))^(-1/τ)*α
+        sto[i, 2] = (1 - u^(-τ) + u^(-τ)*v^(-(τ/(1 + τ))))^(-1/τ)
+    end
+    return sto
+end
+
+#resizing tests
+#taking the derivative of claytonsample! with respect to τ only
+df1 = ForwardDiff.derivative(τ -> claytonsample!(stod, τ, 0.0), 0.3)
+@test size(randmat) == size(df1)
+
+#calculating the jacobian of claytonsample! with respect to τ and α
+df2 = ForwardDiff.jacobian(x -> claytonsample!(stod, x[1], x[2]), [0.3; 0.0]) #should give a 15x2 array,
+#because ForwardDiff flattens the output of jacobian, see: https://juliadiff.org/ForwardDiff.jl/stable/user/api/#ForwardDiff.jacobian
+
+@test (length(randmat), 2) == size(df2)
+@test df1[1:5,2] ≈ df2[6:10,1]
diff --git a/test/runtests.jl b/test/runtests.jl

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,3 @@`
	`1`	`+[deps]`
	`2`	`+CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"`
	`3`	`+PreallocationTools = "d236fae5-4411-538c-8e31-a6e3d9e00b46"`