From 491fac222ce0b30780bdec1bfe75f097b176e5b7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alonso=20Mart=C3=ADnez=20Cisneros?= Date: Sat, 4 Oct 2025 17:24:40 +0200 Subject: [PATCH 1/2] Initial fixes to support Metal.jl --- Project.toml | 11 +++++---- ext/TullioCUDAExt.jl | 4 ++++ ext/TullioMetalExt.jl | 21 +++++++++++++++++ src/macro.jl | 14 +++++++++++ test/group-2.jl | 14 ++++++++++- test/metal.jl | 54 +++++++++++++++++++++++++++++++++++++++++++ 6 files changed, 113 insertions(+), 5 deletions(-) create mode 100644 ext/TullioMetalExt.jl create mode 100644 test/metal.jl diff --git a/Project.toml b/Project.toml index e39acb4..993b63f 100644 --- a/Project.toml +++ b/Project.toml @@ -11,15 +11,17 @@ Requires = "ae029012-a4dd-5104-9daa-d747884805df" [weakdeps] CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba" +Metal = "dde4c033-4e86-420c-a63e-0dd931031962" +ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4" FillArrays = "1a297f60-69ca-5386-bcde-b61e274b549b" Tracker = "9f7883ad-71c0-57eb-9f7f-b5c9e6d3789c" -ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4" [extensions] TullioCUDAExt = "CUDA" +TullioMetalExt = "Metal" +TullioChainRulesCoreExt = "ChainRulesCore" TullioFillArraysExt = "FillArrays" TullioTrackerExt = "Tracker" -TullioChainRulesCoreExt = "ChainRulesCore" [compat] CUDA = "4, 5" @@ -29,6 +31,7 @@ FillArrays = "0.11, 0.12, 0.13, 1" ForwardDiff = "0.10, 1.0" KernelAbstractions = "0.9" LoopVectorization = "0.12.101" +Metal = "1.8.1" NamedDims = "0.2, 1" OffsetArrays = "1" Requires = "1" @@ -39,8 +42,8 @@ Zygote = "0.6.33, 0.7" julia = "1.10" # note that this is the minimum Julia version, 1.11 & 1.12 etc. are allowed & should work. [extras] -ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4" CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba" +ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4" FillArrays = "1a297f60-69ca-5386-bcde-b61e274b549b" ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210" KernelAbstractions = "63c18a36-062a-441e-b654-da1e3ab1ce7c" @@ -58,4 +61,4 @@ VectorizationBase = "3d5dd08c-fd9d-11e8-17fa-ed2836048c2f" Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f" [targets] -test = ["Test", "CUDA", "FillArrays", "ForwardDiff", "KernelAbstractions", "LinearAlgebra", "LoopVectorization", "NamedDims", "OffsetArrays", "Pkg", "Printf", "Random", "TensorOperations", "Tracker", "VectorizationBase", "Zygote"] +test = ["Test", "CUDA", "Metal", "FillArrays", "ForwardDiff", "KernelAbstractions", "LinearAlgebra", "LoopVectorization", "NamedDims", "OffsetArrays", "Pkg", "Printf", "Random", "TensorOperations", "Tracker", "VectorizationBase", "Zygote"] diff --git a/ext/TullioCUDAExt.jl b/ext/TullioCUDAExt.jl index 38b4b9b..76e8136 100644 --- a/ext/TullioCUDAExt.jl +++ b/ext/TullioCUDAExt.jl @@ -6,6 +6,10 @@ else using Tullio, CUDA end +if isdefined(@__MODULE__, :Metal) && isdefined(@__MODULE__, :MtlArray) + @warn "Loading multiple GPU backends can lead to unexpected bugs" +end + Tullio.threader(fun!::F, ::Type{T}, Z::AbstractArray, As::Tuple, Is::Tuple, Js::Tuple, redfun, block=0, keep=nothing) where {F<:Function, T<:CUDA.CuArray} = diff --git a/ext/TullioMetalExt.jl b/ext/TullioMetalExt.jl new file mode 100644 index 0000000..235f167 --- /dev/null +++ b/ext/TullioMetalExt.jl @@ -0,0 +1,21 @@ +module TullioMetalExt +if !isdefined(Base, :get_extension) + using ..Tullio, ..Metal +else + using Tullio, Metal +end + +if isdefined(@__MODULE__, :CUDA) && isdefined(@__MODULE__, :CuArray) + @warn "Loading multiple GPU backends can lead to unexpected bugs" +end + +Tullio.threader(fun!::F, ::Type{T}, + Z::AbstractArray, As::Tuple, Is::Tuple, Js::Tuple, + redfun, block=0, keep=nothing) where {F<:Function, T<:Metal.MtlArray} = + fun!(T, Z, As..., Is..., Js..., keep) + +Tullio.∇threader(fun!::F, ::Type{T}, + As::Tuple, Is::Tuple, Js::Tuple, block=0) where {F<:Function, T<:Metal.MtlArray} = + fun!(T, As..., Is..., Js...,) + +end diff --git a/src/macro.jl b/src/macro.jl index b8173d7..93446a8 100644 --- a/src/macro.jl +++ b/src/macro.jl @@ -1175,6 +1175,20 @@ function make_many_actors(act!, args, ex1, outer::Vector, ex3, inner::Vector, ex end store.verbose==2 && @info "=====KA===== KernelAbstractions CUDA actor $note" verbosetidy(kex2) push!(store.outpre, kex2) + elseif isdefined(store.mod, :Metal) && isdefined(store.mod, :MtlArray) + info2 = store.verbose>0 ? :(@info "running KernelAbstractions + Metal actor $($note)" maxlog=3 _id=$(hash(store))) : nothing + kex2 = quote + + local @inline function $act!(::Type{<:MtlArray}, $(args...), $KEEP=nothing, $FINAL=true) where {$TYP} + $info2 + mtl_kern! = $kernel(MetalBackend()) + $(asserts...) + $ACC = mtl_kern!($(args...), $KEEP, $FINAL; ndrange=tuple($(sizes...)), workgroupsize=$workgroupsize) + end + + end + store.verbose==2 && @info "=====KA===== KernelAbstractions Metal actor $note" verbosetidy(kex2) + push!(store.outpre, kex2) end info3 = store.verbose>0 ? :(@info "running KernelAbstractions CPU actor $($note)" maxlog=3 _id=$(hash(store))) : nothing kex3 = quote diff --git a/test/group-2.jl b/test/group-2.jl index 6381206..d09e0f5 100644 --- a/test/group-2.jl +++ b/test/group-2.jl @@ -11,7 +11,7 @@ _gradient(x...) = Tracker.gradient(x...) @testset "KernelAbstractions + gradients" begin A = (rand(3,4)); B = (rand(4,5)); - @tullio C[i,k] := A[i,j] * B[j,k] threads=false # verbose=2 + @tullio C[i,k] := A[i,j] * B[j,k] threads=false # verbose=2 @test C ≈ A * B @tullio threads=false # else KernelAbstractions CPU kernels not used @@ -23,6 +23,16 @@ _gradient(x...) = Tracker.gradient(x...) end end +try # Load Metal before CUDA to avoid Kernel generation problems + using Metal + vi = Metal.versioninfo(); + @info "===== found an Apple GPU, starting Metal tests =====" + @testset "===== Metal tests on GPU =====" begin + include("metal.jl") + end +catch +end + using CUDA if is_buildkite @@ -37,6 +47,8 @@ if CUDA.has_cuda_gpu() end end + + @info @sprintf("KernelAbstractions tests took %.1f seconds", time()-t4) @tullio cuda=false diff --git a/test/metal.jl b/test/metal.jl new file mode 100644 index 0000000..7c9df6a --- /dev/null +++ b/test/metal.jl @@ -0,0 +1,54 @@ +using Tullio, Test +using Metal, KernelAbstractions +using Tracker, ForwardDiff +@tullio grad=Base + +# matmul +mul(A, B) = @tullio C[i,k] := A[i,j] * B[j,k] + +A = rand(3,40); B = rand(40,500); +@test A * B ≈ mul(A, B) +@test mtl(A * B) ≈ mul(mtl(A), mtl(B)) + +# gradient +# FIXME: Broken I think +ΔA = Tracker.gradient((A,B) -> sum(mul(A, B)), A, B)[1] +@test ΔA ≈ ones(3,500) * B' +@test mtl(ΔA) ≈ Tracker.gradient((A,B) -> sum(mul(A, B)), mtl(A), mtl(B))[1] + +# shifts +@tullio D[i,j] := A[i,j+k] k in 0:10 +@test axes(D) == (1:3, 1:30) +@tullio D_dev[i,j] := mtl(A)[i,j+k] k in 0:10 +@test D_dev isa MtlArray +@test D_dev ≈ mtl(D) + +# product +@tullio (*) F[j] := A[i,j] +@test F ≈ vec(prod(A, dims=1)) +@tullio (*) F_dev[j] := mtl(A)[i,j] +@test F_dev ≈ mtl(F) + +# maximum +g(A) = @tullio (max) G[j] := A[i,j] +@test g(A) == vec(maximum(A, dims=1)) +A0 = zero(A); +A0[findmax(A, dims=1)[2]] .= 1 +@test A0 ≈ Tracker.gradient(sum∘g, A)[1] +@test g(mtl(A)) isa MtlArray +@test g(mtl(A)) ≈ mtl(g(A)) +@test mtl(A0) ≈ Tracker.gradient(sum∘g, mtl(A))[1] + +# functions +h(A) = @tullio H[j] := exp(A[i,j]) / log(A[i,j]) +@test h(mtl(A)) isa MtlArray +@test h(mtl(A)) ≈ mtl(h(A)) +A1 = Tracker.gradient(sum∘h, A)[1] +@test mtl(A1) ≈ Tracker.gradient(sum∘h, mtl(A))[1] + +A, B, C = Metal.rand(2, 2, 2), Metal.rand(2, 2), Metal.rand(2, 2, 2); +@tullio A[k,i,a] = tanh(B[i,a] + C[k,i,a]) +A2 = similar(A) +struct Bee{T}; B::T; end +B2 = Bee(B) +@test A ≈ @tullio A2[k,i,a] = tanh(B2.B[i,a] + C[k,i,a]) From 8feb9215dc6bc524badf87173dff758e3ed95193 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alonso=20Mart=C3=ADnez=20Cisneros?= Date: Sun, 5 Oct 2025 10:42:41 +0200 Subject: [PATCH 2/2] CI updates --- .buildkite/pipeline.yml | 4 ++-- .github/workflows/ci.yml | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.buildkite/pipeline.yml b/.buildkite/pipeline.yml index e8baa59..149ca82 100644 --- a/.buildkite/pipeline.yml +++ b/.buildkite/pipeline.yml @@ -3,10 +3,10 @@ env: # SECRET_CODECOV_TOKEN: "..." steps: - - label: "Julia 1.8" + - label: "Julia 1.11" plugins: - JuliaCI/julia#v0.5: - version: "1.8" + version: "1.11" - JuliaCI/julia-test#v0.3: ~ # - JuliaCI/julia-coverage#v0.3: # codecov: true diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index c022019..33947b4 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -30,12 +30,12 @@ jobs: - '1' # automatically expands to the latest stable 1.x release of Julia # - 'nightly' steps: - - uses: actions/checkout@v2 - - uses: julia-actions/setup-julia@v1 + - uses: actions/checkout@v5 + - uses: julia-actions/setup-julia@v2 with: version: ${{ matrix.version }} arch: ${{ matrix.arch }} - - uses: actions/cache@v1 + - uses: actions/cache@v4 env: cache-name: cache-artifacts with: