Skip to content

Commit 99dcf89

Browse files
committed
Add lecture 10
1 parent 5202893 commit 99dcf89

File tree

10 files changed

+2038
-5
lines changed

10 files changed

+2038
-5
lines changed

docs_vitepress/make.jl

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -75,11 +75,11 @@ pages = [
7575
# "Lecture" => "lecture.md",
7676
# "Lab" => "lab.md",
7777
# ]),
78-
# "10: Parallel programming" => add_prefix("lecture_10", [
79-
# "Lecture" => "lecture.md",
80-
# "Lab" => "lab.md",
81-
# "Homework" => "hw.md",
82-
# ]),
78+
"10: Parallel programming" => add_prefix("lecture_10", [
79+
"Lecture" => "lecture.md",
80+
"Lab" => "lab.md",
81+
"Homework" => "hw.md",
82+
]),
8383
"11: GPU programming" => add_prefix("lecture_11", [
8484
"Lecture" => "lecture.md",
8585
"Lab" => "lab.md",
171 KB
Loading
Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,75 @@
1+
@everywhere begin
2+
"""
3+
sample_all_installed_pkgs(path::AbstractString)
4+
5+
Returns root folders of all installed packages in the system. Package version is sampled.
6+
"""
7+
function sample_all_installed_pkgs(path::AbstractString)
8+
pkgs = readdir(path)
9+
# [rand(readdir(joinpath(path, p), join=true)) for p in pkgs] # sampling version
10+
[readdir(joinpath(path, p), join=true)[1] for p in pkgs if isdir(joinpath(path, p))] # deterministic version
11+
end
12+
13+
"""
14+
filter_jl(path)
15+
16+
Recursively walks the directory structure to obtain all `.jl` files.
17+
"""
18+
filter_jl(path) = reduce(vcat, joinpath.(rootpath, filter(endswith(".jl"), files)) for (rootpath, dirs, files) in walkdir(path))
19+
20+
"""
21+
tokenize(jl_path)
22+
23+
Parses a ".jl" file located at `jl_path` and extracts all symbols and expression heads from the extracted AST.
24+
"""
25+
function tokenize(jl_path)
26+
_extract_symbols(x) = Symbol[]
27+
_extract_symbols(x::Symbol) = [x]
28+
function _extract_symbols(x::Expr)
29+
if length(x.args) > 0
30+
Symbol.(vcat(x.head, reduce(vcat, _extract_symbols(arg) for arg in x.args)))
31+
else
32+
Symbol[]
33+
end
34+
end
35+
36+
scode = "begin\n" * read(jl_path, String) * "end\n"
37+
try
38+
code = Meta.parse(scode)
39+
_extract_symbols(code)
40+
catch e
41+
if ~isa(e, Meta.ParseError)
42+
rethrow(e)
43+
end
44+
Symbol[]
45+
end
46+
end
47+
48+
49+
function histtokens!(h, filename::AbstractString)
50+
for t in tokenize(filename)
51+
h[t] = get(h, t, 0) + 1
52+
end
53+
h
54+
end
55+
56+
function dohistogram(chnl)
57+
h = Dict{Symbol, Int}()
58+
while isready(chnl)
59+
f = take!(chnl)
60+
histtokens!(h, f)
61+
end
62+
return(h)
63+
end
64+
end
65+
66+
chnl = RemoteChannel() do
67+
Channel(typemax(Int)) do ch
68+
for package in sample_all_installed_pkgs("/Users/tomas.pevny/.julia/packages")
69+
foreach(c -> put!(ch, c), filter_jl(package))
70+
end
71+
end
72+
end
73+
74+
mapreduce(fetch, mergewith(+), [@spawnat i dohistogram(chnl) for i in workers()])
75+
Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
# [Homework 9: Accelerating 1D convolution with threads](@id hw09)
2+
3+
## How to submit
4+
5+
Put all the code of inside `hw.jl`. Zip only this file (not its parent folder) and upload it to BRUTE. You should not not import anything but `Base.Threads` or just `Threads`.
6+
7+
::: danger Homework (2 points)
8+
9+
Implement *multithreaded* discrete 1D convolution operator[^1] without padding (output will be shorter). The required function signature: `thread_conv1d(x, w)`, where `x` is the signal array and `w` the kernel. For testing correctness of the implementation you can use the following example of a step function and it's derivative realized by kernel `[-1, 1]`:
10+
11+
```julia
12+
using Test
13+
@test all(thread_conv1d(vcat([0.0, 0.0, 1.0, 1.0, 0.0, 0.0]), [-1.0, 1.0]) .≈ [0.0, -1.0, 0.0, 1.0, 0.0])
14+
```
15+
16+
[^1]: Discrete convolution with finite support [https://en.wikipedia.org/wiki/Convolution#Discrete\_convolution](https://en.wikipedia.org/wiki/Convolution#Discrete_convolution)
17+
18+
Your parallel implementation will be tested both in sequential and two threaded mode with the following inputs
19+
20+
```julia
21+
using Random
22+
Random.seed!(42)
23+
x = rand(10_000_000)
24+
w = [1.0, 2.0, 4.0, 2.0, 1.0]
25+
@btime thread_conv1d($x, $w);
26+
```
27+
28+
On your local machine you should be able to achieve `0.6x` reduction in execution time with two threads, however the automatic eval system is a noisy environment and therefore we require only `0.8x` reduction therein. This being said, please reach out to us, if you encounter any issues.
29+
30+
**HINTS**:
31+
32+
- start with single threaded implementation
33+
- don't forget to reverse the kernel
34+
- `@threads` macro should be all you need
35+
- for testing purposes create a simple script, that you can run with `julia -t 1` and `julia -t 2`
36+
37+
:::
38+
39+
::: details Show solution
40+
41+
Nothing to see here
42+
43+
:::
Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,75 @@
1+
using Pkg
2+
Pkg.activate(@__DIR__)
3+
using GLMakie
4+
using BenchmarkTools
5+
using Distributed
6+
using SharedArrays
7+
8+
function juliaset_pixel(z₀, c)
9+
z = z₀
10+
for i in 1:255
11+
abs2(z)> 4.0 && return (i - 1)%UInt8
12+
z = z*z + c
13+
end
14+
return UInt8(255)
15+
end
16+
17+
function juliaset_column!(img, c, n, colj, j)
18+
x = -2.0 + (j-1)*4.0/(n-1)
19+
for i in 1:n
20+
y = -2.0 + (i-1)*4.0/(n-1)
21+
@inbounds img[i, colj] = juliaset_pixel(x+im*y, c)
22+
end
23+
nothing
24+
end
25+
26+
function juliaset_columns(c, n, columns)
27+
img = Array{UInt8,2}(undef, n, length(columns))
28+
for (colj, j) in enumerate(columns)
29+
juliaset_column!(img, c, n, colj, j)
30+
end
31+
img
32+
end
33+
34+
function juliaset_distributed(x, y, partitions = nworkers(), n = 1000)
35+
c = x + y*im
36+
columns = Iterators.partition(1:n, div(n, partitions))
37+
slices = pmap(cols -> juliaset_columns(c, n, cols), columns)
38+
reduce(hcat, slices)
39+
end
40+
41+
# @btime juliaset_distributed(-0.79, 0.15)
42+
43+
# frac = juliaset_distributed(-0.79, 0.15)
44+
# plot(heatmap(1:size(frac,1), 1:size(frac,2), frac, color=:Spectral))
45+
46+
47+
####
48+
# Let's work out the shared array approach
49+
####
50+
function juliaset_column!(img, c, n, j)
51+
x = -2.0 + (j-1)*4.0/(n-1)
52+
for i in 1:n
53+
y = -2.0 + (i-1)*4.0/(n-1)
54+
@inbounds img[i, j] = juliaset_pixel(x+im*y, c)
55+
end
56+
nothing
57+
end
58+
59+
function juliaset_range!(img, c, n, columns)
60+
for j in columns
61+
juliaset_column!(img, c, n, j)
62+
end
63+
nothing
64+
end
65+
66+
function juliaset_shared(x, y, partitions = nworkers(), n = 1000)
67+
c = x + y*im
68+
columns = Iterators.partition(1:n, div(n, partitions))
69+
img = SharedArray{UInt8,2}((n, n))
70+
slices = pmap(cols -> juliaset_range!(img, c, n, cols), columns)
71+
img
72+
end
73+
74+
# juliaset_shared(-0.79, 0.15)
75+
# juliaset_shared(-0.79, 0.15, 16)
Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,77 @@
1+
using Plots, BenchmarkTools
2+
Threads.nthreads()
3+
function juliaset_pixel(z₀, c)
4+
z = z₀
5+
for i in 1:255
6+
abs2(z)> 4.0 && return (i - 1)%UInt8
7+
z = z*z + c
8+
end
9+
return UInt8(255)
10+
end
11+
12+
function juliaset_column!(img, c, n, j)
13+
x = -2.0 + (j-1)*4.0/(n-1)
14+
for i in 1:n
15+
y = -2.0 + (i-1)*4.0/(n-1)
16+
@inbounds img[i,j] = juliaset_pixel(x+im*y, c)
17+
end
18+
nothing
19+
end
20+
21+
function juliaset_single!(img, c, n)
22+
for j in 1:n
23+
juliaset_column!(img, c, n, j)
24+
end
25+
nothing
26+
end
27+
28+
function juliaset(x, y, n=1000, method = juliaset_single!, extra...)
29+
c = x + y*im
30+
img = Array{UInt8,2}(undef,n,n)
31+
method(img, c, n, extra...)
32+
return img
33+
end
34+
35+
# frac = juliaset(-0.79, 0.15)
36+
# plot(heatmap(1:size(frac,1),1:size(frac,2), frac, color=:Spectral))
37+
38+
39+
@btime juliaset(-0.79, 0.15);
40+
41+
function juliaset_static!(img, c, n)
42+
Threads.@threads for j in 1:n
43+
juliaset_column!(img, c, n, j)
44+
end
45+
nothing
46+
end
47+
48+
@btime juliaset(-0.79, 0.15, 1000, juliaset_static!);
49+
50+
51+
using Folds
52+
function juliaset_folds!(img, c, n)
53+
Folds.foreach(j -> juliaset_column!(img, c, n, j), 1:n)
54+
nothing
55+
end
56+
julia> @btime juliaset(-0.79, 0.15, 1000, juliaset_folds!);
57+
16.267 ms (25 allocations: 978.20 KiB)
58+
59+
function juliaset_folds!(img, c, n, nt)
60+
parts = collect(Iterators.partition(1:n, cld(n, nt)))
61+
Folds.foreach(parts) do ii
62+
foreach(j ->juliaset_column!(img, c, n, j), ii)
63+
end
64+
nothing
65+
end
66+
julia> @btime juliaset(-0.79, 0.15, 1000, (args...) -> juliaset_folds!(args..., 16));
67+
16.716 ms (25 allocations: 978.61 KiB)
68+
69+
70+
using FLoops, FoldsThreads
71+
function juliaset_folds!(img, c, n)
72+
@floop ThreadedEx(basesize = 2) for j in 1:n
73+
juliaset_column!(img, c, n, j)
74+
end
75+
nothing
76+
end
77+
@btime juliaset(-0.79, 0.15, 1000, juliaset_folds!);

0 commit comments

Comments
 (0)