Skip to content

Commit 3deae99

Browse files
committed
Add vectors
1 parent a4d5352 commit 3deae99

File tree

3 files changed

+306
-7
lines changed

3 files changed

+306
-7
lines changed

gpu_puzzles/add_10.ipynb

Lines changed: 12 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,7 @@
5656
"from gpu.host import DeviceContext\n",
5757
"from memory import UnsafePointer\n",
5858
"from gpu import thread_idx\n",
59+
"from testing import assert_equal\n",
5960
"\n",
6061
"alias SIZE = 4\n",
6162
"alias BLOCKS_PER_GRID = 1\n",
@@ -73,9 +74,8 @@
7374
"fn main() raises:\n",
7475
" ctx = DeviceContext()\n",
7576
" d_array_buff = ctx.enqueue_create_buffer[dtype](SIZE)\n",
76-
" expected = ctx.enqueue_create_buffer[dtype](SIZE)\n",
7777
" d_out_buff = ctx.enqueue_create_buffer[dtype](SIZE)\n",
78-
"\n",
78+
" expected = ctx.enqueue_create_host_buffer[dtype](SIZE)\n",
7979
" _ = d_out_buff.enqueue_fill(0)\n",
8080
"\n",
8181
" with d_array_buff.map_to_host() as h_array_buff:\n",
@@ -91,17 +91,22 @@
9191
"\n",
9292
" ctx.synchronize()\n",
9393
"\n",
94+
" for i in range(SIZE):\n",
95+
" expected[i] = i + 10\n",
96+
"\n",
9497
" with d_out_buff.map_to_host() as h_out_buff:\n",
95-
" print(h_out_buff)\n"
98+
" print(h_out_buff)\n",
99+
" for i in range(SIZE):\n",
100+
" assert_equal(h_out_buff[i], expected[i])\n"
96101
],
97102
"metadata": {
98103
"id": "UT3V1O2M7txw",
99104
"colab": {
100105
"base_uri": "https://localhost:8080/"
101106
},
102-
"outputId": "6557f1f2-e6ff-4850-84c6-f7cc69954f57"
107+
"outputId": "30d37b0f-737b-4857-8b30-72cc17964d25"
103108
},
104-
"execution_count": 25,
109+
"execution_count": 30,
105110
"outputs": [
106111
{
107112
"output_type": "stream",
@@ -119,12 +124,12 @@
119124
],
120125
"metadata": {
121126
"id": "CkjRGISm7y1Q",
122-
"outputId": "458efc1c-a004-43e8-b540-f973f7f26027",
127+
"outputId": "4b8f6798-4e2f-4228-d899-9492e1901abe",
123128
"colab": {
124129
"base_uri": "https://localhost:8080/"
125130
}
126131
},
127-
"execution_count": 26,
132+
"execution_count": 31,
128133
"outputs": [
129134
{
130135
"output_type": "stream",

gpu_puzzles/add_vectors.ipynb

Lines changed: 237 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,237 @@
1+
{
2+
"cells": [
3+
{
4+
"cell_type": "code",
5+
"source": [
6+
"!curl -ssL https://magic.modular.com/ | bash"
7+
],
8+
"metadata": {
9+
"id": "A8X6phvz7ZoQ",
10+
"outputId": "21c85116-eb45-4bff-fe32-ba59589954be",
11+
"colab": {
12+
"base_uri": "https://localhost:8080/"
13+
}
14+
},
15+
"execution_count": 1,
16+
"outputs": [
17+
{
18+
"output_type": "stream",
19+
"name": "stdout",
20+
"text": [
21+
"Installing the latest version of Magic...\n",
22+
" % Total % Received % Xferd Average Speed Time Time Time Current\n",
23+
" Dload Upload Total Spent Left Speed\n",
24+
" 0 0 0 0 0 0 0 0 --:--:-- 0:00:03 --:--:-- 0\n",
25+
"100 49.9M 100 49.9M 0 0 9574k 0 0:00:05 0:00:05 --:--:-- 72.0M\n",
26+
"Done. The 'magic' binary is in '/root/.modular/bin'\n",
27+
"\n",
28+
"Two more steps:\n",
29+
"1. To use 'magic', run this command so it's in your PATH:\n",
30+
"source /root/.bashrc\n",
31+
"2. To build with MAX and Mojo, go to http://modul.ar/get-started\n"
32+
]
33+
}
34+
]
35+
},
36+
{
37+
"cell_type": "code",
38+
"source": [
39+
"import os\n",
40+
"os.environ['PATH'] += ':/root/.modular/bin'"
41+
],
42+
"metadata": {
43+
"id": "n7zS_6gK7fnB"
44+
},
45+
"execution_count": 2,
46+
"outputs": []
47+
},
48+
{
49+
"cell_type": "code",
50+
"source": [
51+
"!magic init gpu_puzzles --format mojoproject"
52+
],
53+
"metadata": {
54+
"id": "Zlg5BNMn7j64",
55+
"outputId": "d8c7e15e-af56-4a3e-8340-984d2df2c267",
56+
"colab": {
57+
"base_uri": "https://localhost:8080/"
58+
}
59+
},
60+
"execution_count": 3,
61+
"outputs": [
62+
{
63+
"output_type": "stream",
64+
"name": "stdout",
65+
"text": [
66+
"\u001b[32m✔ \u001b[0mCreated /content/gpu_puzzles/mojoproject.toml\n"
67+
]
68+
}
69+
]
70+
},
71+
{
72+
"cell_type": "code",
73+
"source": [
74+
"%cd gpu_puzzles/"
75+
],
76+
"metadata": {
77+
"id": "mO77-mj17lsA",
78+
"outputId": "669fb4cf-44e7-44f7-bb8a-bf7c0ebb2832",
79+
"colab": {
80+
"base_uri": "https://localhost:8080/"
81+
}
82+
},
83+
"execution_count": 4,
84+
"outputs": [
85+
{
86+
"output_type": "stream",
87+
"name": "stdout",
88+
"text": [
89+
"/content/gpu_puzzles\n"
90+
]
91+
}
92+
]
93+
},
94+
{
95+
"cell_type": "code",
96+
"source": [
97+
"%%writefile add_vectors.mojo\n",
98+
"\n",
99+
"### Add vectors\n",
100+
"### Mojo kernel for adding corresponding elements of vectors a and b, store in out.\n",
101+
"\n",
102+
"from gpu.host import DeviceContext\n",
103+
"from memory import UnsafePointer\n",
104+
"from gpu import thread_idx, block_idx, block_dim\n",
105+
"from testing import assert_equal\n",
106+
"\n",
107+
"alias SIZE = 4\n",
108+
"alias BLOCKS_PER_GRID = 1\n",
109+
"alias THREADS_PER_BLOCK = SIZE\n",
110+
"alias dtype = DType.float32\n",
111+
"\n",
112+
"\n",
113+
"fn add(\n",
114+
" out: UnsafePointer[Scalar[dtype]],\n",
115+
" a: UnsafePointer[Scalar[dtype]],\n",
116+
" b: UnsafePointer[Scalar[dtype]],\n",
117+
"):\n",
118+
" tid = block_idx.x * block_dim.x + thread_idx.x\n",
119+
" if tid < SIZE:\n",
120+
" out[tid] = a[tid] + b[tid]\n",
121+
"\n",
122+
"\n",
123+
"fn main() raises:\n",
124+
" ctx = DeviceContext()\n",
125+
" d_array_buff_1 = ctx.enqueue_create_buffer[dtype](SIZE)\n",
126+
" d_array_buff_2 = ctx.enqueue_create_buffer[dtype](SIZE)\n",
127+
" d_out_buff = ctx.enqueue_create_buffer[dtype](SIZE)\n",
128+
" expected = ctx.enqueue_create_host_buffer[dtype](SIZE)\n",
129+
" _ = d_out_buff.enqueue_fill(0)\n",
130+
" _ = expected.enqueue_fill(SIZE - 1)\n",
131+
"\n",
132+
" with d_array_buff_1.map_to_host() as h_array_buff_1:\n",
133+
" for i in range(SIZE):\n",
134+
" h_array_buff_1[i] = i\n",
135+
"\n",
136+
" with d_array_buff_2.map_to_host() as h_array_buff_2:\n",
137+
" for i in range(SIZE - 1, -1, -1):\n",
138+
" h_array_buff_2[SIZE - 1 - i] = i\n",
139+
"\n",
140+
" ctx.enqueue_function[add](\n",
141+
" d_out_buff.unsafe_ptr(),\n",
142+
" d_array_buff_1.unsafe_ptr(),\n",
143+
" d_array_buff_2.unsafe_ptr(),\n",
144+
" grid_dim=BLOCKS_PER_GRID,\n",
145+
" block_dim=THREADS_PER_BLOCK,\n",
146+
" )\n",
147+
"\n",
148+
" ctx.synchronize()\n",
149+
"\n",
150+
" with d_out_buff.map_to_host() as h_out_buff:\n",
151+
" print(h_out_buff)\n",
152+
" for i in range(SIZE):\n",
153+
" assert_equal(h_out_buff[i], expected[i])\n"
154+
],
155+
"metadata": {
156+
"id": "UT3V1O2M7txw",
157+
"colab": {
158+
"base_uri": "https://localhost:8080/"
159+
},
160+
"outputId": "786e9c23-3d97-4238-fcac-117ba6e8555f"
161+
},
162+
"execution_count": 27,
163+
"outputs": [
164+
{
165+
"output_type": "stream",
166+
"name": "stdout",
167+
"text": [
168+
"Overwriting add_vectors.mojo\n"
169+
]
170+
}
171+
]
172+
},
173+
{
174+
"cell_type": "code",
175+
"source": [
176+
"!magic run mojo add_vectors.mojo"
177+
],
178+
"metadata": {
179+
"id": "CkjRGISm7y1Q",
180+
"outputId": "d05fe142-8743-4ecc-9401-48e532b5ef0a",
181+
"colab": {
182+
"base_uri": "https://localhost:8080/"
183+
}
184+
},
185+
"execution_count": 26,
186+
"outputs": [
187+
{
188+
"output_type": "stream",
189+
"name": "stdout",
190+
"text": [
191+
"\u001b[32m⠁\u001b[0m \r\u001b[2K\u001b[32m⠁\u001b[0m activating environment \r\u001b[2K\u001b[32m⠁\u001b[0m activating environment \r\u001b[2KHostBuffer([3.0, 3.0, 3.0, 3.0])\n"
192+
]
193+
}
194+
]
195+
},
196+
{
197+
"cell_type": "code",
198+
"source": [
199+
"!magic run mojo format add_vectors.mojo"
200+
],
201+
"metadata": {
202+
"colab": {
203+
"base_uri": "https://localhost:8080/"
204+
},
205+
"id": "Cc2XVTrevpy5",
206+
"outputId": "112f97cd-da96-4c3a-831e-f9f4eae633d9"
207+
},
208+
"execution_count": 24,
209+
"outputs": [
210+
{
211+
"output_type": "stream",
212+
"name": "stdout",
213+
"text": [
214+
"\u001b[32m⠁\u001b[0m \r\u001b[2K\u001b[32m⠁\u001b[0m activating environment \r\u001b[2K\u001b[32m⠁\u001b[0m activating environment \r\u001b[2K\u001b[1mreformatted add_vectors.mojo\u001b[0m\n",
215+
"\n",
216+
"\u001b[1mAll done! ✨ 🍰 ✨\u001b[0m\n",
217+
"\u001b[34m\u001b[1m1 file \u001b[0m\u001b[1mreformatted\u001b[0m.\n"
218+
]
219+
}
220+
]
221+
}
222+
],
223+
"metadata": {
224+
"colab": {
225+
"name": "Welcome To Colab",
226+
"provenance": [],
227+
"gpuType": "T4"
228+
},
229+
"kernelspec": {
230+
"display_name": "Python 3",
231+
"name": "python3"
232+
},
233+
"accelerator": "GPU"
234+
},
235+
"nbformat": 4,
236+
"nbformat_minor": 0
237+
}

gpu_puzzles/add_vectors.mojo

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
2+
### Add vectors
3+
### Mojo kernel for adding corresponding elements of vectors a and b, store in out.
4+
5+
from gpu.host import DeviceContext
6+
from memory import UnsafePointer
7+
from gpu import thread_idx, block_idx, block_dim
8+
from testing import assert_equal
9+
10+
alias SIZE = 4
11+
alias BLOCKS_PER_GRID = 1
12+
alias THREADS_PER_BLOCK = SIZE
13+
alias dtype = DType.float32
14+
15+
16+
fn add(
17+
out: UnsafePointer[Scalar[dtype]],
18+
a: UnsafePointer[Scalar[dtype]],
19+
b: UnsafePointer[Scalar[dtype]],
20+
):
21+
tid = block_idx.x * block_dim.x + thread_idx.x
22+
if tid < SIZE:
23+
out[tid] = a[tid] + b[tid]
24+
25+
26+
fn main() raises:
27+
ctx = DeviceContext()
28+
d_array_buff_1 = ctx.enqueue_create_buffer[dtype](SIZE)
29+
d_array_buff_2 = ctx.enqueue_create_buffer[dtype](SIZE)
30+
d_out_buff = ctx.enqueue_create_buffer[dtype](SIZE)
31+
expected = ctx.enqueue_create_host_buffer[dtype](SIZE)
32+
_ = d_out_buff.enqueue_fill(0)
33+
_ = expected.enqueue_fill(SIZE - 1)
34+
35+
with d_array_buff_1.map_to_host() as h_array_buff_1:
36+
for i in range(SIZE):
37+
h_array_buff_1[i] = i
38+
39+
with d_array_buff_2.map_to_host() as h_array_buff_2:
40+
for i in range(SIZE - 1, -1, -1):
41+
h_array_buff_2[SIZE - 1 - i] = i
42+
43+
ctx.enqueue_function[add](
44+
d_out_buff.unsafe_ptr(),
45+
d_array_buff_1.unsafe_ptr(),
46+
d_array_buff_2.unsafe_ptr(),
47+
grid_dim=BLOCKS_PER_GRID,
48+
block_dim=THREADS_PER_BLOCK,
49+
)
50+
51+
ctx.synchronize()
52+
53+
with d_out_buff.map_to_host() as h_out_buff:
54+
print(h_out_buff)
55+
for i in range(SIZE):
56+
assert_equal(h_out_buff[i], expected[i])
57+

0 commit comments

Comments
 (0)