Skip to content
Closed
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions problems/p03/p03.mojo
Original file line number Diff line number Diff line change
Expand Up @@ -13,11 +13,11 @@ alias dtype = DType.float32
fn add_10_guard(
output: UnsafePointer[Scalar[dtype]],
a: UnsafePointer[Scalar[dtype]],
size: Int,
size: UInt,
):
i = thread_idx.x
# FILL ME IN (roughly 2 lines)


# ANCHOR_END: add_10_guard

Expand Down
2 changes: 1 addition & 1 deletion problems/p04/p04.mojo
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ alias dtype = DType.float32
fn add_10_2d(
output: UnsafePointer[Scalar[dtype]],
a: UnsafePointer[Scalar[dtype]],
size: Int,
size: UInt,
):
row = thread_idx.y
col = thread_idx.x
Expand Down
2 changes: 1 addition & 1 deletion problems/p04/p04_layout_tensor.mojo
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ alias layout = Layout.row_major(SIZE, SIZE)
fn add_10_2d(
output: LayoutTensor[mut=True, dtype, layout],
a: LayoutTensor[mut=True, dtype, layout],
size: Int,
size: UInt,
):
row = thread_idx.y
col = thread_idx.x
Expand Down
2 changes: 1 addition & 1 deletion problems/p05/p05.mojo
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ fn broadcast_add(
output: UnsafePointer[Scalar[dtype]],
a: UnsafePointer[Scalar[dtype]],
b: UnsafePointer[Scalar[dtype]],
size: Int,
size: UInt,
):
row = thread_idx.y
col = thread_idx.x
Expand Down
3 changes: 1 addition & 2 deletions problems/p05/p05_layout_tensor.mojo
Original file line number Diff line number Diff line change
Expand Up @@ -21,13 +21,12 @@ fn broadcast_add[
output: LayoutTensor[mut=True, dtype, out_layout],
a: LayoutTensor[mut=False, dtype, a_layout],
b: LayoutTensor[mut=False, dtype, b_layout],
size: Int,
size: UInt,
):
row = thread_idx.y
col = thread_idx.x
# FILL ME IN (roughly 2 lines)


# ANCHOR_END: broadcast_add_layout_tensor
def main():
with DeviceContext() as ctx:
Expand Down
2 changes: 1 addition & 1 deletion problems/p06/p06.mojo
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ alias dtype = DType.float32
fn add_10_blocks(
output: UnsafePointer[Scalar[dtype]],
a: UnsafePointer[Scalar[dtype]],
size: Int,
size: UInt,
):
i = block_dim.x * block_idx.x + thread_idx.x
# FILL ME IN (roughly 2 lines)
Expand Down
2 changes: 1 addition & 1 deletion problems/p07/p07.mojo
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ alias dtype = DType.float32
fn add_10_blocks_2d(
output: UnsafePointer[Scalar[dtype]],
a: UnsafePointer[Scalar[dtype]],
size: Int,
size: UInt,
):
row = block_dim.y * block_idx.y + thread_idx.y
col = block_dim.x * block_idx.x + thread_idx.x
Expand Down
2 changes: 1 addition & 1 deletion problems/p07/p07_layout_tensor.mojo
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ fn add_10_blocks_2d[
](
output: LayoutTensor[mut=True, dtype, out_layout],
a: LayoutTensor[mut=False, dtype, a_layout],
size: Int,
size: UInt,
):
row = block_dim.y * block_idx.y + thread_idx.y
col = block_dim.x * block_idx.x + thread_idx.x
Expand Down
2 changes: 1 addition & 1 deletion problems/p08/p08.mojo
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ alias dtype = DType.float32
fn add_10_shared(
output: UnsafePointer[Scalar[dtype]],
a: UnsafePointer[Scalar[dtype]],
size: Int,
size: UInt,
):
shared = stack_allocation[
TPB,
Expand Down
2 changes: 1 addition & 1 deletion problems/p08/p08_layout_tensor.mojo
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ fn add_10_shared_layout_tensor[
](
output: LayoutTensor[mut=True, dtype, layout],
a: LayoutTensor[mut=True, dtype, layout],
size: Int,
size: UInt,
):
# Allocate shared memory using LayoutTensor with explicit address_space
shared = LayoutTensor[
Expand Down
4 changes: 2 additions & 2 deletions problems/p10/p10.mojo
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ alias layout = Layout.row_major(SIZE, SIZE)
fn shared_memory_race(
output: LayoutTensor[mut=True, dtype, layout],
a: LayoutTensor[mut=False, dtype, layout],
size: Int,
size: UInt,
):
row = thread_idx.y
col = thread_idx.x
Expand Down Expand Up @@ -45,7 +45,7 @@ fn shared_memory_race(
fn add_10_2d(
output: LayoutTensor[mut=True, dtype, layout],
a: LayoutTensor[mut=True, dtype, layout],
size: Int,
size: UInt,
):
row = thread_idx.y
col = thread_idx.x
Expand Down
2 changes: 1 addition & 1 deletion problems/p11/p11.mojo
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ alias dtype = DType.float32
fn pooling(
output: UnsafePointer[Scalar[dtype]],
a: UnsafePointer[Scalar[dtype]],
size: Int,
size: UInt,
):
shared = stack_allocation[
TPB,
Expand Down
2 changes: 1 addition & 1 deletion problems/p11/p11_layout_tensor.mojo
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ fn pooling[
](
output: LayoutTensor[mut=True, dtype, layout],
a: LayoutTensor[mut=True, dtype, layout],
size: Int,
size: UInt,
):
# Allocate shared memory using tensor builder
shared = LayoutTensor[
Expand Down
2 changes: 1 addition & 1 deletion problems/p12/p12.mojo
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ fn dot_product(
output: UnsafePointer[Scalar[dtype]],
a: UnsafePointer[Scalar[dtype]],
b: UnsafePointer[Scalar[dtype]],
size: Int,
size: UInt,
):
# FILL ME IN (roughly 13 lines)
...
Expand Down
2 changes: 1 addition & 1 deletion problems/p12/p12_layout_tensor.mojo
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ fn dot_product[
output: LayoutTensor[mut=True, dtype, out_layout],
a: LayoutTensor[mut=True, dtype, in_layout],
b: LayoutTensor[mut=True, dtype, in_layout],
size: Int,
size: UInt,
):
# FILL ME IN (roughly 13 lines)
...
Expand Down
4 changes: 2 additions & 2 deletions problems/p14/p14.mojo
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ fn prefix_sum_simple[
](
output: LayoutTensor[mut=True, dtype, layout],
a: LayoutTensor[mut=False, dtype, layout],
size: Int,
size: UInt,
):
global_i = block_dim.x * block_idx.x + thread_idx.x
local_i = thread_idx.x
Expand All @@ -43,7 +43,7 @@ fn prefix_sum_local_phase[
](
output: LayoutTensor[mut=True, dtype, out_layout],
a: LayoutTensor[mut=False, dtype, in_layout],
size: Int,
size: UInt,
):
global_i = block_dim.x * block_idx.x + thread_idx.x
local_i = thread_idx.x
Expand Down
2 changes: 1 addition & 1 deletion problems/p15/p15.mojo
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ fn axis_sum[
](
output: LayoutTensor[mut=True, dtype, out_layout],
a: LayoutTensor[mut=False, dtype, in_layout],
size: Int,
size: UInt,
):
global_i = block_dim.x * block_idx.x + thread_idx.x
local_i = thread_idx.x
Expand Down
8 changes: 4 additions & 4 deletions problems/p17/op/conv1d.mojo
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,8 @@ fn conv1d_kernel[
in_layout: Layout,
out_layout: Layout,
conv_layout: Layout,
input_size: Int,
conv_size: Int,
input_size: UInt,
conv_size: UInt,
dtype: DType = DType.float32,
](
output: LayoutTensor[mut=True, dtype, out_layout],
Expand Down Expand Up @@ -80,8 +80,8 @@ struct Conv1DCustomOp:
fn execute[
# The kind of device this will be run on: "cpu" or "gpu"
target: StaticString,
input_size: Int,
conv_size: Int,
input_size: UInt,
conv_size: UInt,
dtype: DType = DType.float32,
](
output: OutputTensor[rank=1],
Expand Down
8 changes: 4 additions & 4 deletions problems/p18/op/softmax.mojo
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,8 @@ alias BLOCK_DIM_X = 1 << log2_ceil(SIZE)


fn softmax_gpu_kernel[
layout: Layout,
input_size: Int,
layout: Layout,
input_size: UInt,
dtype: DType = DType.float32,
](
output: LayoutTensor[mut=True, dtype, layout],
Expand All @@ -35,7 +35,7 @@ fn softmax_gpu_kernel[
# ANCHOR: softmax_cpu_kernel
fn softmax_cpu_kernel[
layout: Layout,
input_size: Int,
input_size: UInt,
dtype: DType = DType.float32,
](
output: LayoutTensor[dtype, layout, MutableAnyOrigin],
Expand All @@ -57,7 +57,7 @@ struct SoftmaxCustomOp:
@staticmethod
fn execute[
target: StaticString, # "cpu" or "gpu"
input_size: Int,
input_size: UInt,
dtype: DType = DType.float32,
](
output: OutputTensor[rank=1],
Expand Down
20 changes: 10 additions & 10 deletions problems/p19/op/attention.mojo
Original file line number Diff line number Diff line change
Expand Up @@ -32,9 +32,9 @@ fn matmul_idiomatic_tiled[
a_layout: Layout,
b_layout: Layout,
out_layout: Layout,
rows: Int,
cols: Int,
inner: Int,
rows: UInt,
cols: UInt,
inner: UInt,
dtype: DType = DType.float32,
](
output: LayoutTensor[mut=True, dtype, out_layout, MutableAnyOrigin],
Expand Down Expand Up @@ -120,8 +120,8 @@ fn matmul_idiomatic_tiled[
fn transpose_kernel[
layout_in: Layout, # Layout for input matrix (seq_len, d)
layout_out: Layout, # Layout for output matrix (d, seq_len)
rows: Int,
cols: Int,
rows: UInt,
cols: UInt,
dtype: DType = DType.float32,
](
output: LayoutTensor[mut=True, dtype, layout_out, MutableAnyOrigin],
Expand All @@ -137,7 +137,7 @@ fn transpose_kernel[
# Apply softmax to attention scores taken from p16
fn softmax_gpu_kernel[
layout: Layout,
input_size: Int,
input_size: UInt,
dtype: DType = DType.float32,
](
output: LayoutTensor[mut=True, dtype, layout],
Expand Down Expand Up @@ -209,8 +209,8 @@ fn attention_cpu_kernel[
layout_k: Layout,
layout_v: Layout,
layout_out: Layout,
seq_len: Int,
d: Int,
seq_len: UInt,
d: UInt,
dtype: DType = DType.float32,
](
output: LayoutTensor[dtype, layout_out, MutableAnyOrigin],
Expand Down Expand Up @@ -259,8 +259,8 @@ struct AttentionCustomOp:
@staticmethod
fn execute[
target: StaticString, # "cpu" or "gpu"
seq_len: Int,
d: Int,
seq_len: UInt,
d: UInt,
dtype: DType = DType.float32,
](
output: OutputTensor[rank=1], # Output vector (d,)
Expand Down
4 changes: 2 additions & 2 deletions problems/p20/op/conv1d.mojo
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,8 @@ fn conv1d_kernel[
in_layout: Layout,
out_layout: Layout,
conv_layout: Layout,
input_size: Int,
conv_size: Int,
input_size: UInt,
conv_size: UInt,
dtype: DType = DType.float32,
](
output: LayoutTensor[mut=True, dtype, out_layout],
Expand Down
32 changes: 16 additions & 16 deletions problems/p21/op/embedding.mojo
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,10 @@ fn embedding_kernel_coalesced[
indices_layout: Layout,
weights_layout: Layout,
out_layout: Layout,
batch_size: Int,
seq_len: Int,
vocab_size: Int,
embed_dim: Int,
batch_size: UInt,
seq_len: UInt,
vocab_size: UInt,
embed_dim: UInt,
dtype: DType = DType.float32,
](
output: LayoutTensor[mut=True, dtype, out_layout],
Expand Down Expand Up @@ -57,10 +57,10 @@ fn embedding_kernel_2d[
indices_layout: Layout,
weights_layout: Layout,
out_layout: Layout,
batch_size: Int,
seq_len: Int,
vocab_size: Int,
embed_dim: Int,
batch_size: UInt,
seq_len: UInt,
vocab_size: UInt,
embed_dim: UInt,
dtype: DType = DType.float32,
](
output: LayoutTensor[mut=True, dtype, out_layout],
Expand Down Expand Up @@ -108,10 +108,10 @@ struct EmbeddingCustomOp:
@staticmethod
fn execute[
target: StaticString,
batch_size: Int,
seq_len: Int,
vocab_size: Int,
embed_dim: Int,
batch_size: UInt,
seq_len: UInt,
vocab_size: UInt,
embed_dim: UInt,
](
output: OutputTensor[
dtype = DType.float32, rank=3
Expand Down Expand Up @@ -194,10 +194,10 @@ struct Embedding2DCustomOp:
@staticmethod
fn execute[
target: StaticString,
batch_size: Int,
seq_len: Int,
vocab_size: Int,
embed_dim: Int,
batch_size: UInt,
seq_len: UInt,
vocab_size: UInt,
embed_dim: UInt,
](
output: OutputTensor[
dtype = DType.float32, rank=3
Expand Down
Loading
Loading