Skip to content

Commit 57cc364

Browse files
committed
Add 10 to 2D Layout tensor
1 parent 9f274bc commit 57cc364

File tree

2 files changed

+275
-0
lines changed

2 files changed

+275
-0
lines changed

gpu_puzzles/add_10_2dlayout.ipynb

Lines changed: 215 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,215 @@
1+
{
2+
"cells": [
3+
{
4+
"cell_type": "code",
5+
"execution_count": null,
6+
"metadata": {
7+
"id": "buOgxm25ONit"
8+
},
9+
"outputs": [],
10+
"source": [
11+
"!curl -ssL https://magic.modular.com/ | bash"
12+
]
13+
},
14+
{
15+
"cell_type": "code",
16+
"execution_count": null,
17+
"metadata": {
18+
"id": "FVZvyhRiONiw"
19+
},
20+
"outputs": [],
21+
"source": [
22+
"import os\n",
23+
"os.environ['PATH'] +=':/root/.modular/bin'"
24+
]
25+
},
26+
{
27+
"cell_type": "code",
28+
"execution_count": null,
29+
"metadata": {
30+
"id": "TqFD0EK0ONiw"
31+
},
32+
"outputs": [],
33+
"source": [
34+
"!magic init gpu_puzzles --format mojoproject"
35+
]
36+
},
37+
{
38+
"cell_type": "code",
39+
"execution_count": null,
40+
"metadata": {
41+
"id": "k3Ddb6GcONiw"
42+
},
43+
"outputs": [],
44+
"source": [
45+
"%cd gpu_puzzles/"
46+
]
47+
},
48+
{
49+
"cell_type": "code",
50+
"execution_count": 16,
51+
"metadata": {
52+
"colab": {
53+
"base_uri": "https://localhost:8080/"
54+
},
55+
"id": "IaxB1auxONix",
56+
"outputId": "3474befe-bbb9-459a-ddd5-e9855ca305b5"
57+
},
58+
"outputs": [
59+
{
60+
"output_type": "stream",
61+
"name": "stdout",
62+
"text": [
63+
"Overwriting add_10_2dlayout.mojo\n"
64+
]
65+
}
66+
],
67+
"source": [
68+
"%%writefile add_10_2dlayout.mojo\n",
69+
"\n",
70+
"### Add constant to 2D Layout tensor\n",
71+
"### Implement a kernel that adds 10 to each position of 2D LayoutTensor a and stores it in 2D LayoutTensor out.\n",
72+
"\n",
73+
"from gpu.host import DeviceContext\n",
74+
"from gpu import thread_idx\n",
75+
"from layout import Layout, LayoutTensor\n",
76+
"from math import iota\n",
77+
"\n",
78+
"\n",
79+
"alias SIZE = 2\n",
80+
"alias BLOCKS_PER_GRID = 1\n",
81+
"alias THREADS_PER_BLOCK = (3, 3)\n",
82+
"alias dtype = DType.float32\n",
83+
"alias layout = Layout.row_major(SIZE, SIZE)\n",
84+
"\n",
85+
"\n",
86+
"fn add_10_2dlayout(\n",
87+
" out: LayoutTensor[mut=True, dtype, layout],\n",
88+
" a: LayoutTensor[mut=True, dtype, layout],\n",
89+
" size: Int,\n",
90+
"):\n",
91+
" row = thread_idx.y\n",
92+
" col = thread_idx.x\n",
93+
" # FILL ME IN (roughly 2 lines)\n",
94+
" if row < size and col < size:\n",
95+
" out[row, col] = a[row, col] + 10\n",
96+
"\n",
97+
"\n",
98+
"fn main():\n",
99+
" try:\n",
100+
" ctx = DeviceContext()\n",
101+
"\n",
102+
" buffer_a = ctx.enqueue_create_buffer[dtype](SIZE * SIZE).enqueue_fill(\n",
103+
" 0.0\n",
104+
" )\n",
105+
" buffer_out = ctx.enqueue_create_buffer[dtype](SIZE * SIZE).enqueue_fill(\n",
106+
" 0.0\n",
107+
" )\n",
108+
"\n",
109+
" with buffer_a.map_to_host() as h_buffer_a:\n",
110+
" iota(h_buffer_a.unsafe_ptr(), SIZE * SIZE)\n",
111+
"\n",
112+
" out = LayoutTensor[mut=True, dtype, layout](buffer_out)\n",
113+
" a = LayoutTensor[mut=True, dtype, layout](buffer_a)\n",
114+
"\n",
115+
" ctx.enqueue_function[add_10_2dlayout](\n",
116+
" out,\n",
117+
" a,\n",
118+
" SIZE,\n",
119+
" grid_dim=(BLOCKS_PER_GRID, BLOCKS_PER_GRID),\n",
120+
" block_dim=THREADS_PER_BLOCK,\n",
121+
" )\n",
122+
"\n",
123+
" ctx.synchronize()\n",
124+
"\n",
125+
" with buffer_out.map_to_host() as h_buffer_out:\n",
126+
" print(h_buffer_out)\n",
127+
" except e:\n",
128+
" print(e)\n"
129+
]
130+
},
131+
{
132+
"cell_type": "code",
133+
"execution_count": 17,
134+
"metadata": {
135+
"colab": {
136+
"base_uri": "https://localhost:8080/"
137+
},
138+
"id": "h2k9wkDaONiz",
139+
"outputId": "3cb08451-43dd-4400-bab3-fcb435191c05"
140+
},
141+
"outputs": [
142+
{
143+
"output_type": "stream",
144+
"name": "stdout",
145+
"text": [
146+
"\u001b[32m⠁\u001b[0m \r\u001b[2K\u001b[32m⠁\u001b[0m activating environment \r\u001b[2K\u001b[32m⠁\u001b[0m activating environment \r\u001b[2KHostBuffer([10.0, 11.0, 12.0, 13.0])\n"
147+
]
148+
}
149+
],
150+
"source": [
151+
"!magic run mojo add_10_2dlayout.mojo"
152+
]
153+
},
154+
{
155+
"cell_type": "code",
156+
"execution_count": 15,
157+
"metadata": {
158+
"colab": {
159+
"base_uri": "https://localhost:8080/"
160+
},
161+
"id": "bSglX7bNONi0",
162+
"outputId": "4f7126e7-85f3-4ef3-fed9-79673b282bc4"
163+
},
164+
"outputs": [
165+
{
166+
"output_type": "stream",
167+
"name": "stdout",
168+
"text": [
169+
"\u001b[32m⠁\u001b[0m \r\u001b[2K\u001b[32m⠁\u001b[0m activating environment \r\u001b[2K\u001b[32m⠁\u001b[0m activating environment \r\u001b[2K\u001b[1mreformatted add_10_2dlayout.mojo\u001b[0m\n",
170+
"\n",
171+
"\u001b[1mAll done! ✨ 🍰 ✨\u001b[0m\n",
172+
"\u001b[34m\u001b[1m1 file \u001b[0m\u001b[1mreformatted\u001b[0m.\n"
173+
]
174+
}
175+
],
176+
"source": [
177+
"!magic run mojo format add_10_2dlayout.mojo"
178+
]
179+
}
180+
],
181+
"metadata": {
182+
"accelerator": "GPU",
183+
"colab": {
184+
"gpuType": "T4",
185+
"provenance": []
186+
},
187+
"kaggle": {
188+
"accelerator": "nvidiaTeslaT4",
189+
"dataSources": [],
190+
"dockerImageVersionId": 31041,
191+
"isGpuEnabled": true,
192+
"isInternetEnabled": true,
193+
"language": "python",
194+
"sourceType": "notebook"
195+
},
196+
"kernelspec": {
197+
"display_name": "Python 3",
198+
"name": "python3"
199+
},
200+
"language_info": {
201+
"codemirror_mode": {
202+
"name": "ipython",
203+
"version": 3
204+
},
205+
"file_extension": ".py",
206+
"mimetype": "text/x-python",
207+
"name": "python",
208+
"nbconvert_exporter": "python",
209+
"pygments_lexer": "ipython3",
210+
"version": "3.11.11"
211+
}
212+
},
213+
"nbformat": 4,
214+
"nbformat_minor": 0
215+
}

gpu_puzzles/add_10_2dlayout.mojo

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
### Add constant to 2D Layout tensor
2+
### Implement a kernel that adds 10 to each position of 2D LayoutTensor a and stores it in 2D LayoutTensor out.
3+
4+
from gpu.host import DeviceContext
5+
from gpu import thread_idx
6+
from layout import Layout, LayoutTensor
7+
from math import iota
8+
9+
10+
alias SIZE = 2
11+
alias BLOCKS_PER_GRID = 1
12+
alias THREADS_PER_BLOCK = (3, 3)
13+
alias dtype = DType.float32
14+
alias layout = Layout.row_major(SIZE, SIZE)
15+
16+
17+
fn add_10_2dlayout(
18+
out: LayoutTensor[mut=True, dtype, layout],
19+
a: LayoutTensor[mut=True, dtype, layout],
20+
size: Int,
21+
):
22+
row = thread_idx.y
23+
col = thread_idx.x
24+
# FILL ME IN (roughly 2 lines)
25+
if row < size and col < size:
26+
out[row, col] = a[row, col] + 10
27+
28+
29+
fn main():
30+
try:
31+
ctx = DeviceContext()
32+
33+
buffer_a = ctx.enqueue_create_buffer[dtype](SIZE * SIZE).enqueue_fill(
34+
0.0
35+
)
36+
buffer_out = ctx.enqueue_create_buffer[dtype](SIZE * SIZE).enqueue_fill(
37+
0.0
38+
)
39+
40+
with buffer_a.map_to_host() as h_buffer_a:
41+
iota(h_buffer_a.unsafe_ptr(), SIZE * SIZE)
42+
43+
out = LayoutTensor[mut=True, dtype, layout](buffer_out)
44+
a = LayoutTensor[mut=True, dtype, layout](buffer_a)
45+
46+
ctx.enqueue_function[add_10_2dlayout](
47+
out,
48+
a,
49+
SIZE,
50+
grid_dim=(BLOCKS_PER_GRID, BLOCKS_PER_GRID),
51+
block_dim=THREADS_PER_BLOCK,
52+
)
53+
54+
ctx.synchronize()
55+
56+
with buffer_out.map_to_host() as h_buffer_out:
57+
print(h_buffer_out)
58+
except e:
59+
print(e)
60+

0 commit comments

Comments
 (0)