Skip to content

Commit a4d5352

Browse files
committed
GPU puzzles
1 parent d40af23 commit a4d5352

File tree

4 files changed

+1093
-0
lines changed

4 files changed

+1093
-0
lines changed

gpu_puzzles/add_10.ipynb

Lines changed: 178 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,178 @@
1+
{
2+
"cells": [
3+
{
4+
"cell_type": "code",
5+
"source": [
6+
"!curl -ssL https://magic.modular.com/ | bash"
7+
],
8+
"metadata": {
9+
"id": "A8X6phvz7ZoQ"
10+
},
11+
"execution_count": null,
12+
"outputs": []
13+
},
14+
{
15+
"cell_type": "code",
16+
"source": [
17+
"import os\n",
18+
"os.environ['PATH'] += ':/root/.modular/bin'"
19+
],
20+
"metadata": {
21+
"id": "n7zS_6gK7fnB"
22+
},
23+
"execution_count": null,
24+
"outputs": []
25+
},
26+
{
27+
"cell_type": "code",
28+
"source": [
29+
"!magic init gpu_puzzles --format mojoproject"
30+
],
31+
"metadata": {
32+
"id": "Zlg5BNMn7j64"
33+
},
34+
"execution_count": null,
35+
"outputs": []
36+
},
37+
{
38+
"cell_type": "code",
39+
"source": [
40+
"%cd gpu_puzzles/"
41+
],
42+
"metadata": {
43+
"id": "mO77-mj17lsA"
44+
},
45+
"execution_count": null,
46+
"outputs": []
47+
},
48+
{
49+
"cell_type": "code",
50+
"source": [
51+
"%%writefile add_10.mojo\n",
52+
"\n",
53+
"### Add 10\n",
54+
"### Implement a kernel that adds 10 to each position of vector a and stores it in vector out.\n",
55+
"\n",
56+
"from gpu.host import DeviceContext\n",
57+
"from memory import UnsafePointer\n",
58+
"from gpu import thread_idx\n",
59+
"\n",
60+
"alias SIZE = 4\n",
61+
"alias BLOCKS_PER_GRID = 1\n",
62+
"alias THREADS_PER_BLOCK = SIZE\n",
63+
"alias dtype = DType.float32\n",
64+
"\n",
65+
"\n",
66+
"fn add_10(\n",
67+
" out: UnsafePointer[Scalar[dtype]], array: UnsafePointer[Scalar[dtype]]\n",
68+
"):\n",
69+
" tid = thread_idx.x\n",
70+
" out[tid] = array[tid] + 10\n",
71+
"\n",
72+
"\n",
73+
"fn main() raises:\n",
74+
" ctx = DeviceContext()\n",
75+
" d_array_buff = ctx.enqueue_create_buffer[dtype](SIZE)\n",
76+
" expected = ctx.enqueue_create_buffer[dtype](SIZE)\n",
77+
" d_out_buff = ctx.enqueue_create_buffer[dtype](SIZE)\n",
78+
"\n",
79+
" _ = d_out_buff.enqueue_fill(0)\n",
80+
"\n",
81+
" with d_array_buff.map_to_host() as h_array_buff:\n",
82+
" for i in range(SIZE):\n",
83+
" h_array_buff[i] = i\n",
84+
"\n",
85+
" ctx.enqueue_function[add_10](\n",
86+
" d_out_buff.unsafe_ptr(),\n",
87+
" d_array_buff.unsafe_ptr(),\n",
88+
" grid_dim=BLOCKS_PER_GRID,\n",
89+
" block_dim=THREADS_PER_BLOCK,\n",
90+
" )\n",
91+
"\n",
92+
" ctx.synchronize()\n",
93+
"\n",
94+
" with d_out_buff.map_to_host() as h_out_buff:\n",
95+
" print(h_out_buff)\n"
96+
],
97+
"metadata": {
98+
"id": "UT3V1O2M7txw",
99+
"colab": {
100+
"base_uri": "https://localhost:8080/"
101+
},
102+
"outputId": "6557f1f2-e6ff-4850-84c6-f7cc69954f57"
103+
},
104+
"execution_count": 25,
105+
"outputs": [
106+
{
107+
"output_type": "stream",
108+
"name": "stdout",
109+
"text": [
110+
"Overwriting add_10.mojo\n"
111+
]
112+
}
113+
]
114+
},
115+
{
116+
"cell_type": "code",
117+
"source": [
118+
"!magic run mojo add_10.mojo"
119+
],
120+
"metadata": {
121+
"id": "CkjRGISm7y1Q",
122+
"outputId": "458efc1c-a004-43e8-b540-f973f7f26027",
123+
"colab": {
124+
"base_uri": "https://localhost:8080/"
125+
}
126+
},
127+
"execution_count": 26,
128+
"outputs": [
129+
{
130+
"output_type": "stream",
131+
"name": "stdout",
132+
"text": [
133+
"\u001b[32m⠁\u001b[0m \r\u001b[2K\u001b[32m⠁\u001b[0m activating environment \r\u001b[2K\u001b[32m⠁\u001b[0m activating environment \r\u001b[2KHostBuffer([10.0, 11.0, 12.0, 13.0])\n"
134+
]
135+
}
136+
]
137+
},
138+
{
139+
"cell_type": "code",
140+
"source": [
141+
"!magic run mojo format add_10.mojo"
142+
],
143+
"metadata": {
144+
"colab": {
145+
"base_uri": "https://localhost:8080/"
146+
},
147+
"id": "Cc2XVTrevpy5",
148+
"outputId": "7608d036-835a-446b-96eb-68ffea96ba1e"
149+
},
150+
"execution_count": 27,
151+
"outputs": [
152+
{
153+
"output_type": "stream",
154+
"name": "stdout",
155+
"text": [
156+
"\u001b[32m⠁\u001b[0m \r\u001b[2K\u001b[32m⠁\u001b[0m activating environment \r\u001b[2K\u001b[32m⠁\u001b[0m activating environment \r\u001b[2K\u001b[1mreformatted add_10.mojo\u001b[0m\n",
157+
"\n",
158+
"\u001b[1mAll done! ✨ 🍰 ✨\u001b[0m\n",
159+
"\u001b[34m\u001b[1m1 file \u001b[0m\u001b[1mreformatted\u001b[0m.\n"
160+
]
161+
}
162+
]
163+
}
164+
],
165+
"metadata": {
166+
"colab": {
167+
"provenance": [],
168+
"gpuType": "T4"
169+
},
170+
"kernelspec": {
171+
"display_name": "Python 3",
172+
"name": "python3"
173+
},
174+
"accelerator": "GPU"
175+
},
176+
"nbformat": 4,
177+
"nbformat_minor": 0
178+
}

gpu_puzzles/add_10.mojo

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
### Add 10
2+
### Implement a kernel that adds 10 to each position of vector a and stores it in vector out.
3+
4+
from gpu.host import DeviceContext
5+
from memory import UnsafePointer
6+
from gpu import thread_idx
7+
8+
alias SIZE = 4
9+
alias BLOCKS_PER_GRID = 1
10+
alias THREADS_PER_BLOCK = SIZE
11+
alias dtype = DType.float32
12+
13+
14+
fn add_10(
15+
out: UnsafePointer[Scalar[dtype]], array: UnsafePointer[Scalar[dtype]]
16+
):
17+
tid = thread_idx.x
18+
out[tid] = array[tid] + 10
19+
20+
21+
fn main() raises:
22+
ctx = DeviceContext()
23+
d_array_buff = ctx.enqueue_create_buffer[dtype](SIZE)
24+
expected = ctx.enqueue_create_buffer[dtype](SIZE)
25+
d_out_buff = ctx.enqueue_create_buffer[dtype](SIZE)
26+
27+
_ = d_out_buff.enqueue_fill(0)
28+
29+
with d_array_buff.map_to_host() as h_array_buff:
30+
for i in range(SIZE):
31+
h_array_buff[i] = i
32+
33+
ctx.enqueue_function[add_10](
34+
d_out_buff.unsafe_ptr(),
35+
d_array_buff.unsafe_ptr(),
36+
grid_dim=BLOCKS_PER_GRID,
37+
block_dim=THREADS_PER_BLOCK,
38+
)
39+
40+
ctx.synchronize()
41+
42+
with d_out_buff.map_to_host() as h_out_buff:
43+
print(h_out_buff)
44+

0 commit comments

Comments
 (0)