Skip to content

Commit ff66ab9

Browse files
committed
lexture 11
1 parent 2144f89 commit ff66ab9

15 files changed

+1523
-0
lines changed

docs/make.jl

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -97,6 +97,11 @@ lecture_10 = [
9797
"Lab" => "./lecture_10/lab.md"
9898
"Homework" => "./lecture_10/hw.md"
9999
]
100+
101+
lecture_11 = [
102+
"Lecture" => "./lecture_11/lecture.md"
103+
"Lab" => "./lecture_11/lab.md"
104+
]
100105
lecture_12 = [
101106
"Lecture" => "./lecture_12/lecture.md"
102107
"Lab" => "./lecture_12/lab.md"
@@ -130,6 +135,7 @@ makedocs(;
130135
"8: Automatic differentiation" => lecture_08,
131136
"9: Intermediate representation" => lecture_09,
132137
"10: Parallel programming" => lecture_10,
138+
"11: GPU programming" => lecture_11,
133139
"12: Uncertainty propagation in ODE" => lecture_12
134140
],
135141
)
107 KB
Loading

docs/src/lecture_11/cudanative.png

289 KB
Loading

docs/src/lecture_11/ffnn.jl

Lines changed: 204 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,204 @@
1+
using GLMakie
2+
function flower(n; npetals = 8)
3+
n = div(n, npetals)
4+
x = mapreduce(hcat, (1:npetals) .* (2π/npetals)) do θ
5+
ct = cos(θ)
6+
st = sin(θ)
7+
8+
x0 = tanh.(randn(1, n) .- 1) .+ 4.0 .+ 0.05.* randn(1, n)
9+
y0 = randn(1, n) .* 0.3
10+
11+
x₁ = x0 * cos(θ) .- y0 * sin(θ)
12+
x₂ = x0 * sin(θ) .+ y0 * cos(θ)
13+
vcat(x₁, x₂)
14+
end
15+
_y = mapreduce(i -> fill(i, n), vcat, 1:npetals)
16+
y = zeros(npetals, length(_y))
17+
foreach(i -> y[_y[i], i] = 1, 1:length(_y))
18+
Float32.(x), Float32.(y)
19+
end
20+
21+
x, y = flower(900)
22+
scatter(x[1,:], x[2,:], color = mapslices(argmax, y, dims = 1)[:])
23+
24+
#######
25+
# Define a Tracked Array for operator overloading AD
26+
#######
27+
struct TrackedArray{T,N,V<:AbstractArray{T,N}} <: AbstractArray{T,N}
28+
value::V
29+
grad::Union{Nothing,V}
30+
tape::Vector{Any}
31+
end
32+
33+
TrackedArray(a::AbstractArray) = TrackedArray(a, similar(a) .= 0, [])
34+
TrackedMatrix{T,V} = TrackedArray{T,2,V} where {T,V<:AbstractMatrix{T}}
35+
TrackedVector{T,V} = TrackedArray{T,1,V} where {T,V<:AbstractVector{T}}
36+
Base.size(a::TrackedArray) = size(a.value)
37+
Base.show(io::IO, ::MIME"text/plain", a::TrackedArray) = show(io, a)
38+
Base.show(io::IO, a::TrackedArray) = print(io, "TrackedArray($(size(a.value)))")
39+
value(A::TrackedArray) = A.value
40+
resetgrad!(A::TrackedArray) = (A.grad .= 0; empty!(A.tape))
41+
value(A) = A
42+
track(A) = TrackedArray(A)
43+
track(a::Number) = TrackedArray(reshape([a], 1, 1))
44+
45+
function accum!(A::TrackedArray)
46+
isempty(A.tape) && return(A.grad)
47+
A.grad .= sum(g(accum!(r)) for (r, g) in A.tape)
48+
empty!(A.tape)
49+
A.grad
50+
end
51+
52+
#######
53+
# Define AD rules for few operations appearing in FFNN
54+
#######
55+
import Base: +, *
56+
import Base.Broadcast: broadcasted
57+
function *(A::TrackedMatrix, B::TrackedMatrix)
58+
a, b = value.((A, B))
59+
C = track(a * b)
60+
push!(A.tape, (C, Δ -> Δ * b'))
61+
push!(B.tape, (C, Δ -> a' * Δ))
62+
C
63+
end
64+
65+
function *(A::TrackedMatrix, B::AbstractMatrix)
66+
a, b = value.((A, B))
67+
C = track(a * b)
68+
push!(A.tape, (C, Δ -> Δ * b'))
69+
C
70+
end
71+
72+
function broadcasted(::typeof(+), A::TrackedMatrix, B::TrackedVector)
73+
C = track(value(A) .+ value(B))
74+
push!(A.tape, (C, Δ -> Δ))
75+
push!(B.tape, (C, Δ -> sum(Δ, dims = 2)[:]))
76+
C
77+
end
78+
79+
function σ(x::Real)
80+
t = @fastmath exp(-abs(x))
81+
y = ifelse(x 0, inv(1 + t), t / (1 + t))
82+
ifelse(x > 40, one(y), ifelse(x < -80, zero(y), y))
83+
end
84+
85+
broadcasted(::typeof(identity), A::TrackedArray) = A
86+
87+
function broadcasted(::typeof(σ), A::TrackedArray)
88+
Ω = σ.(value(A))
89+
C = track(Ω)
90+
push!(A.tape, (C, Δ -> Δ .* Ω .* (1 .- Ω)))
91+
C
92+
end
93+
94+
function mse(A::TrackedMatrix, B::AbstractMatrix)
95+
n = size(A, 2)
96+
a = value(A)
97+
c = similar(a, 1, 1)
98+
c .= sum((a .- B).^2)/2n
99+
C = track(c)
100+
push!(A.tape, (C, Δ -> Δ .* (a .- B) ./ n))
101+
C
102+
end
103+
104+
mse(x::AbstractMatrix, y::AbstractMatrix) = sum((x - y).^2) / (2*size(x,2))
105+
106+
#######
107+
# Define a Dense layer
108+
#######
109+
struct Dense{F,W,B}
110+
σ::F
111+
w::W
112+
b::B
113+
end
114+
115+
Base.show(io::IO, m::Dense) = print(io, "Dense($(size(m.w,2))$(size(m.w,1)))")
116+
Dense(i::Int, o::Int, σ = identity) = Dense(σ, randn(Float32, o, i), randn(Float32, o))
117+
track(m::Dense) = Dense(m.σ, track(m.w), track(m.b))
118+
track(m::ComposedFunction) = track(m.outer) track(m.inner)
119+
(m::Dense)(x) = m.σ.(m.w * x .+ m.b)
120+
params(m::ComposedFunction) = vcat(params(m.outer), params(m.inner))
121+
params(m::Dense) = [m.w, m.b]
122+
123+
#######
124+
# Let's try to actually train a model
125+
#######
126+
x, y = flower(900)
127+
function initmodel()
128+
m₁ = track(Dense(2, 20, σ))
129+
m₂ = track(Dense(20, 20, σ))
130+
m₃ = track(Dense(20, size(y,1)))
131+
m = m₃ m₂ m₁
132+
end
133+
m = initmodel()
134+
m(x) |> value
135+
136+
######
137+
# Let's try to learn the parameters
138+
######
139+
α = 0.01
140+
ps = params(m)
141+
@elapsed for i in 1:10000
142+
foreach(resetgrad!, ps)
143+
loss = mse(m(x), y)
144+
fill!(loss.grad, 1)
145+
foreach(accum!, ps)
146+
foreach(x -> x.value .-= α .* x.grad, ps)
147+
mod(i,250) == 0 && println("loss after $(i) iterations = ", sum(value(loss)))
148+
end
149+
150+
all(mapslices(argmax, value(m(x)), dims = 1)[:] .== mapslices(argmax, y, dims = 1)[:])
151+
scatter(x[1,:], x[2,:], color = mapslices(argmax, value(m(x)), dims = 1)[:])
152+
153+
######
154+
# Let's try to move the computation to GPU
155+
######
156+
using CUDA
157+
gpu(x::AbstractArray) = CuArray(x)
158+
gpu(x::TrackedArray) = TrackedArray(CuArray(value(x)))
159+
gpu(m::Dense) = Dense(m.σ, gpu(m.w), gpu(m.b))
160+
gpu(m::ComposedFunction) = gpu(m.outer) gpu(m.inner)
161+
162+
gx, gy = gpu(x), gpu(y)
163+
m = gpu(m)
164+
ps = params(m)
165+
@elapsed for i in 1:10000
166+
foreach(resetgrad!, ps)
167+
loss = mse(m(gx), gy)
168+
fill!(loss.grad, 1)
169+
foreach(accum!, ps)
170+
foreach(x -> x.value .-= α .* x.grad, ps)
171+
mod(i,250) == 0 && println("loss after $(i) iterations = ", sum(value(loss)))
172+
end
173+
174+
#######
175+
# Why we see a small speed-up? The problem is small
176+
#######
177+
using BenchmarkTools
178+
p = randn(Float32, 20, 2)
179+
@benchmark $(p) * $(x)
180+
gp = gpu(p)
181+
@benchmark $(gp) * $(gx)
182+
183+
184+
######
185+
# Let's verify the gradients
186+
######
187+
using FiniteDifferences
188+
ps = [m₃.w, m₃.b, m₂.w, m₂.b, m₁.w, m₁.b]
189+
map(ps) do p
190+
foreach(resetgrad!, ps)
191+
loss = mse(m(x), y)
192+
fill!(loss.grad, 1)
193+
foreach(accum!, ps)
194+
accum!(p)
195+
θ = deepcopy(value(p))
196+
Δθ = deepcopy(p.grad)
197+
f = θ -> begin
198+
p.value .= θ
199+
value(mse(m(x), y))
200+
end
201+
sum(abs2.(grad(central_fdm(5, 1), f, θ)[1] - Δθ))
202+
end
203+
204+
140 KB
Loading

0 commit comments

Comments
 (0)