From 89d50591c06f084b80a5ce527781e07f8dc32be6 Mon Sep 17 00:00:00 2001 From: Manuel Drehwald Date: Fri, 21 Nov 2025 00:14:03 -0800 Subject: [PATCH 1/3] Replace the first of 4 binary invocations for offload --- compiler/rustc_codegen_llvm/src/back/write.rs | 7 +++ compiler/rustc_codegen_llvm/src/llvm/ffi.rs | 3 ++ .../rustc_llvm/llvm-wrapper/RustWrapper.cpp | 47 +++++++++++++++++++ 3 files changed, 57 insertions(+) diff --git a/compiler/rustc_codegen_llvm/src/back/write.rs b/compiler/rustc_codegen_llvm/src/back/write.rs index fde7dd6ef7a85..78b11c458d5aa 100644 --- a/compiler/rustc_codegen_llvm/src/back/write.rs +++ b/compiler/rustc_codegen_llvm/src/back/write.rs @@ -765,6 +765,13 @@ pub(crate) unsafe fn llvm_optimize( llvm_plugins.len(), ) }; + + if cgcx.target_is_like_gpu && config.offload.contains(&config::Offload::Enable) { + unsafe { + llvm::LLVMRustBundleImages(module.module_llvm.llmod(), module.module_llvm.tm.raw()); + } + } + result.into_result().unwrap_or_else(|()| llvm_err(dcx, LlvmError::RunLlvmPasses)) } diff --git a/compiler/rustc_codegen_llvm/src/llvm/ffi.rs b/compiler/rustc_codegen_llvm/src/llvm/ffi.rs index ca64d96c2a33c..34f0e4b953381 100644 --- a/compiler/rustc_codegen_llvm/src/llvm/ffi.rs +++ b/compiler/rustc_codegen_llvm/src/llvm/ffi.rs @@ -1641,6 +1641,9 @@ unsafe extern "C" { Name: *const c_char, ) -> &'a Value; + /// Processes the module and writes it in an offload compatible way into a "host.out" file. + pub(crate) fn LLVMRustBundleImages<'a>(M: &'a Module, TM: &'a TargetMachine) -> bool; + /// Writes a module to the specified path. Returns 0 on success. pub(crate) fn LLVMWriteBitcodeToFile(M: &Module, Path: *const c_char) -> c_int; diff --git a/compiler/rustc_llvm/llvm-wrapper/RustWrapper.cpp b/compiler/rustc_llvm/llvm-wrapper/RustWrapper.cpp index 8823c83922822..ba17aef92d0d5 100644 --- a/compiler/rustc_llvm/llvm-wrapper/RustWrapper.cpp +++ b/compiler/rustc_llvm/llvm-wrapper/RustWrapper.cpp @@ -25,6 +25,7 @@ #include "llvm/IR/Module.h" #include "llvm/IR/Value.h" #include "llvm/Object/COFFImportFile.h" +#include "llvm/Object/OffloadBinary.h" #include "llvm/Remarks/RemarkFormat.h" #include "llvm/Remarks/RemarkSerializer.h" #include "llvm/Remarks/RemarkStreamer.h" @@ -35,6 +36,7 @@ #include "llvm/Support/Signals.h" #include "llvm/Support/Timer.h" #include "llvm/Support/ToolOutputFile.h" +#include "llvm/Target/TargetMachine.h" #include "llvm/Transforms/Utils/Cloning.h" #include "llvm/Transforms/Utils/ValueMapper.h" #include @@ -144,6 +146,51 @@ extern "C" void LLVMRustPrintStatistics(RustStringRef OutBuf) { llvm::PrintStatistics(OS); } +static Error writeFile(StringRef Filename, StringRef Data) { + Expected> OutputOrErr = + FileOutputBuffer::create(Filename, Data.size()); + if (!OutputOrErr) + return OutputOrErr.takeError(); + std::unique_ptr Output = std::move(*OutputOrErr); + llvm::copy(Data, Output->getBufferStart()); + if (Error E = Output->commit()) + return E; + return Error::success(); +} + +// This is the first of many steps in creating a binary using llvm offload, +// to run code on the gpu. Concrete, it replaces the following binary use: +// clang-offload-packager -o host.out +// --image=file=device.bc,triple=amdgcn-amd-amdhsa,arch=gfx90a,kind=openmp +// The input module is the rust code compiled for a gpu target like amdgpu. +// Based on clang/tools/clang-offload-packager/ClangOffloadPackager.cpp +extern "C" bool LLVMRustBundleImages(LLVMModuleRef M, TargetMachine &TM) { + std::string Storage; + llvm::raw_string_ostream OS1(Storage); + llvm::WriteBitcodeToFile(*unwrap(M), OS1); + OS1.flush(); + auto MB = llvm::MemoryBuffer::getMemBufferCopy(Storage, "module.bc"); + + SmallVector BinaryData; + raw_svector_ostream OS2(BinaryData); + + OffloadBinary::OffloadingImage ImageBinary{}; + ImageBinary.TheImageKind = object::IMG_Bitcode; + ImageBinary.Image = std::move(MB); + ImageBinary.TheOffloadKind = object::OFK_OpenMP; + ImageBinary.StringData["triple"] = TM.getTargetTriple().str(); + ImageBinary.StringData["arch"] = TM.getTargetCPU(); + llvm::SmallString<0> Buffer = OffloadBinary::write(ImageBinary); + if (Buffer.size() % OffloadBinary::getAlignment() != 0) + // Offload binary has invalid size alignment + return false; + OS2 << Buffer; + if (Error E = writeFile("host.out", + StringRef(BinaryData.begin(), BinaryData.size()))) + return false; + return true; +} + extern "C" void LLVMRustOffloadMapper(LLVMValueRef OldFn, LLVMValueRef NewFn) { llvm::Function *oldFn = llvm::unwrap(OldFn); llvm::Function *newFn = llvm::unwrap(NewFn); From 5fbe5dae4264816c04f34711c9f53a6775d75505 Mon Sep 17 00:00:00 2001 From: Manuel Drehwald Date: Sat, 22 Nov 2025 22:15:26 -0800 Subject: [PATCH 2/3] Only try to link against offload functions if llvm.enzyme is enabled --- compiler/rustc/Cargo.toml | 1 + compiler/rustc_codegen_llvm/Cargo.toml | 1 + compiler/rustc_codegen_llvm/src/llvm/ffi.rs | 35 ++++++++++++++++--- compiler/rustc_driver_impl/Cargo.toml | 1 + compiler/rustc_interface/Cargo.toml | 1 + compiler/rustc_llvm/build.rs | 4 +++ .../rustc_llvm/llvm-wrapper/RustWrapper.cpp | 15 ++++++-- src/bootstrap/src/core/build_steps/compile.rs | 3 ++ src/bootstrap/src/lib.rs | 3 ++ 9 files changed, 58 insertions(+), 6 deletions(-) diff --git a/compiler/rustc/Cargo.toml b/compiler/rustc/Cargo.toml index 9ef8fa75062a2..d68c1c9249f89 100644 --- a/compiler/rustc/Cargo.toml +++ b/compiler/rustc/Cargo.toml @@ -31,6 +31,7 @@ check_only = ['rustc_driver_impl/check_only'] jemalloc = ['dep:tikv-jemalloc-sys'] llvm = ['rustc_driver_impl/llvm'] llvm_enzyme = ['rustc_driver_impl/llvm_enzyme'] +llvm_offload = ['rustc_driver_impl/llvm_offload'] max_level_info = ['rustc_driver_impl/max_level_info'] rustc_randomized_layouts = ['rustc_driver_impl/rustc_randomized_layouts'] # tidy-alphabetical-end diff --git a/compiler/rustc_codegen_llvm/Cargo.toml b/compiler/rustc_codegen_llvm/Cargo.toml index 67bd1e59bb0c2..0544a94fd59fe 100644 --- a/compiler/rustc_codegen_llvm/Cargo.toml +++ b/compiler/rustc_codegen_llvm/Cargo.toml @@ -47,5 +47,6 @@ tracing = "0.1" # tidy-alphabetical-start check_only = ["rustc_llvm/check_only"] llvm_enzyme = [] +llvm_offload = [] # tidy-alphabetical-end diff --git a/compiler/rustc_codegen_llvm/src/llvm/ffi.rs b/compiler/rustc_codegen_llvm/src/llvm/ffi.rs index 34f0e4b953381..09978dc6f873d 100644 --- a/compiler/rustc_codegen_llvm/src/llvm/ffi.rs +++ b/compiler/rustc_codegen_llvm/src/llvm/ffi.rs @@ -1641,9 +1641,6 @@ unsafe extern "C" { Name: *const c_char, ) -> &'a Value; - /// Processes the module and writes it in an offload compatible way into a "host.out" file. - pub(crate) fn LLVMRustBundleImages<'a>(M: &'a Module, TM: &'a TargetMachine) -> bool; - /// Writes a module to the specified path. Returns 0 on success. pub(crate) fn LLVMWriteBitcodeToFile(M: &Module, Path: *const c_char) -> c_int; @@ -1721,6 +1718,37 @@ unsafe extern "C" { ) -> &'a Value; } +#[cfg(feature = "llvm_offload")] +pub(crate) use self::Offload::*; + +#[cfg(feature = "llvm_offload")] +mod Offload { + use super::*; + unsafe extern "C" { + /// Processes the module and writes it in an offload compatible way into a "host.out" file. + pub(crate) fn LLVMRustBundleImages<'a>(M: &'a Module, TM: &'a TargetMachine) -> bool; + pub(crate) fn LLVMRustOffloadMapper<'a>(OldFn: &'a Value, NewFn: &'a Value); + } +} + +#[cfg(not(feature = "llvm_offload"))] +pub(crate) use self::Offload_fallback::*; + +#[cfg(not(feature = "llvm_offload"))] +mod Offload_fallback { + use super::*; + /// Processes the module and writes it in an offload compatible way into a "host.out" file. + /// Marked as unsafe to match the real offload wrapper which is unsafe due to FFI. + #[allow(unused_unsafe)] + pub(crate) unsafe fn LLVMRustBundleImages<'a>(_M: &'a Module, _TM: &'a TargetMachine) -> bool { + unimplemented!("This rustc version was not built with LLVM Offload support!"); + } + #[allow(unused_unsafe)] + pub(crate) unsafe fn LLVMRustOffloadMapper<'a>(_OldFn: &'a Value, _NewFn: &'a Value) { + unimplemented!("This rustc version was not built with LLVM Offload support!"); + } +} + // FFI bindings for `DIBuilder` functions in the LLVM-C API. // Try to keep these in the same order as in `llvm/include/llvm-c/DebugInfo.h`. // @@ -2028,7 +2056,6 @@ unsafe extern "C" { ) -> &Attribute; // Operations on functions - pub(crate) fn LLVMRustOffloadMapper<'a>(Fn: &'a Value, Fn: &'a Value); pub(crate) fn LLVMRustGetOrInsertFunction<'a>( M: &'a Module, Name: *const c_char, diff --git a/compiler/rustc_driver_impl/Cargo.toml b/compiler/rustc_driver_impl/Cargo.toml index 46efa50cff364..531b9e0c8ff72 100644 --- a/compiler/rustc_driver_impl/Cargo.toml +++ b/compiler/rustc_driver_impl/Cargo.toml @@ -75,6 +75,7 @@ ctrlc = "3.4.4" check_only = ['rustc_interface/check_only'] llvm = ['rustc_interface/llvm'] llvm_enzyme = ['rustc_interface/llvm_enzyme'] +llvm_offload = ['rustc_interface/llvm_offload'] max_level_info = ['rustc_log/max_level_info'] rustc_randomized_layouts = [ 'rustc_index/rustc_randomized_layouts', diff --git a/compiler/rustc_interface/Cargo.toml b/compiler/rustc_interface/Cargo.toml index f0836c47740a2..d785030b5f2c8 100644 --- a/compiler/rustc_interface/Cargo.toml +++ b/compiler/rustc_interface/Cargo.toml @@ -59,4 +59,5 @@ rustc_abi = { path = "../rustc_abi" } check_only = ['rustc_codegen_llvm?/check_only'] llvm = ['dep:rustc_codegen_llvm'] llvm_enzyme = ['rustc_builtin_macros/llvm_enzyme', 'rustc_codegen_llvm/llvm_enzyme'] +llvm_offload = ['rustc_codegen_llvm/llvm_offload'] # tidy-alphabetical-end diff --git a/compiler/rustc_llvm/build.rs b/compiler/rustc_llvm/build.rs index d5c43c4fa0661..c58dd64cca5f7 100644 --- a/compiler/rustc_llvm/build.rs +++ b/compiler/rustc_llvm/build.rs @@ -214,6 +214,10 @@ fn main() { cfg.define("ENZYME", None); } + if tracked_env_var_os("LLVM_OFFLOAD").is_some() { + cfg.define("OFFLOAD", None); + } + if tracked_env_var_os("LLVM_RUSTLLVM").is_some() { cfg.define("LLVM_RUSTLLVM", None); } diff --git a/compiler/rustc_llvm/llvm-wrapper/RustWrapper.cpp b/compiler/rustc_llvm/llvm-wrapper/RustWrapper.cpp index ba17aef92d0d5..8d95b7f3aa407 100644 --- a/compiler/rustc_llvm/llvm-wrapper/RustWrapper.cpp +++ b/compiler/rustc_llvm/llvm-wrapper/RustWrapper.cpp @@ -25,7 +25,6 @@ #include "llvm/IR/Module.h" #include "llvm/IR/Value.h" #include "llvm/Object/COFFImportFile.h" -#include "llvm/Object/OffloadBinary.h" #include "llvm/Remarks/RemarkFormat.h" #include "llvm/Remarks/RemarkSerializer.h" #include "llvm/Remarks/RemarkStreamer.h" @@ -36,11 +35,18 @@ #include "llvm/Support/Signals.h" #include "llvm/Support/Timer.h" #include "llvm/Support/ToolOutputFile.h" -#include "llvm/Target/TargetMachine.h" #include "llvm/Transforms/Utils/Cloning.h" #include "llvm/Transforms/Utils/ValueMapper.h" #include +// Some of the functions below rely on LLVM modules that may not always be +// available. As such, we only try to build it in the first place, if +// llvm.offload is enabled. +#ifdef OFFLOAD +#include "llvm/Object/OffloadBinary.h" +#include "llvm/Target/TargetMachine.h" +#endif + // for raw `write` in the bad-alloc handler #ifdef _MSC_VER #include @@ -146,6 +152,10 @@ extern "C" void LLVMRustPrintStatistics(RustStringRef OutBuf) { llvm::PrintStatistics(OS); } +// Some of the functions here rely on LLVM modules that may not always be +// available. As such, we only try to build it in the first place, if +// llvm.offload is enabled. +#ifdef OFFLOAD static Error writeFile(StringRef Filename, StringRef Data) { Expected> OutputOrErr = FileOutputBuffer::create(Filename, Data.size()); @@ -210,6 +220,7 @@ extern "C" void LLVMRustOffloadMapper(LLVMValueRef OldFn, LLVMValueRef NewFn) { llvm::CloneFunctionChangeType::LocalChangesOnly, returns); } +#endif extern "C" LLVMValueRef LLVMRustGetNamedValue(LLVMModuleRef M, const char *Name, size_t NameLen) { diff --git a/src/bootstrap/src/core/build_steps/compile.rs b/src/bootstrap/src/core/build_steps/compile.rs index 6857a40ada81b..d0ba6c5e896c5 100644 --- a/src/bootstrap/src/core/build_steps/compile.rs +++ b/src/bootstrap/src/core/build_steps/compile.rs @@ -1436,6 +1436,9 @@ fn rustc_llvm_env(builder: &Builder<'_>, cargo: &mut Cargo, target: TargetSelect if builder.config.llvm_enzyme { cargo.env("LLVM_ENZYME", "1"); } + if builder.config.llvm_offload { + cargo.env("LLVM_OFFLOAD", "1"); + } let llvm::LlvmResult { host_llvm_config, .. } = builder.ensure(llvm::Llvm { target }); cargo.env("LLVM_CONFIG", &host_llvm_config); diff --git a/src/bootstrap/src/lib.rs b/src/bootstrap/src/lib.rs index f4f467e013258..a31eb0c1c8012 100644 --- a/src/bootstrap/src/lib.rs +++ b/src/bootstrap/src/lib.rs @@ -873,6 +873,9 @@ impl Build { if self.config.llvm_enzyme { features.push("llvm_enzyme"); } + if self.config.llvm_offload { + features.push("llvm_offload"); + } // keep in sync with `bootstrap/compile.rs:rustc_cargo_env` if self.config.rust_randomize_layout && check("rustc_randomized_layouts") { features.push("rustc_randomized_layouts"); From b31005e381492763ad994661bef864a39ef58e01 Mon Sep 17 00:00:00 2001 From: Manuel Drehwald Date: Fri, 21 Nov 2025 01:47:47 -0800 Subject: [PATCH 3/3] update dev guide --- src/doc/rustc-dev-guide/src/offload/usage.md | 14 ++------------ 1 file changed, 2 insertions(+), 12 deletions(-) diff --git a/src/doc/rustc-dev-guide/src/offload/usage.md b/src/doc/rustc-dev-guide/src/offload/usage.md index 7abf90aa6e0b6..8350fb5777fba 100644 --- a/src/doc/rustc-dev-guide/src/offload/usage.md +++ b/src/doc/rustc-dev-guide/src/offload/usage.md @@ -79,19 +79,8 @@ Now we generate the device code. Replace the target-cpu with the right code for RUSTFLAGS="-Ctarget-cpu=gfx90a --emit=llvm-bc,llvm-ir -Zoffload=Enable -Zunstable-options" cargo +offload build -Zunstable-options -r -v --target amdgcn-amd-amdhsa -Zbuild-std=core ``` -Now find the `.ll` under target/amdgcn-amd-amdhsa folder and copy it to a device.ll file (or adjust the file names below). -If you work on an NVIDIA or Intel gpu, please adjust the names acordingly and open an issue to share your results (either if you succeed or fail). -First we compile our .ll files (good for manual inspections) to .bc files and clean up leftover artifacts. The cleanup is important, otherwise caching might interfere on following runs. -``` -opt lib.ll -o lib.bc -opt device.ll -o device.bc -rm *.o -rm bare.amdgcn.gfx90a.img* -``` ``` -"clang-offload-packager" "-o" "host.out" "--image=file=device.bc,triple=amdgcn-amd-amdhsa,arch=gfx90a,kind=openmp" - "clang-21" "-cc1" "-triple" "x86_64-unknown-linux-gnu" "-S" "-save-temps=cwd" "-disable-free" "-clear-ast-before-backend" "-main-file-name" "lib.rs" "-mrelocation-model" "pic" "-pic-level" "2" "-pic-is-pie" "-mframe-pointer=all" "-fmath-errno" "-ffp-contract=on" "-fno-rounding-math" "-mconstructor-aliases" "-funwind-tables=2" "-target-cpu" "x86-64" "-tune-cpu" "generic" "-resource-dir" "//rust/build/x86_64-unknown-linux-gnu/llvm/lib/clang/21" "-ferror-limit" "19" "-fopenmp" "-fopenmp-offload-mandatory" "-fgnuc-version=4.2.1" "-fskip-odr-check-in-gmf" "-fembed-offload-object=host.out" "-fopenmp-targets=amdgcn-amd-amdhsa" "-faddrsig" "-D__GCC_HAVE_DWARF2_CFI_ASM=1" "-o" "host.s" "-x" "ir" "lib.bc" "clang-21" "-cc1as" "-triple" "x86_64-unknown-linux-gnu" "-filetype" "obj" "-main-file-name" "lib.rs" "-target-cpu" "x86-64" "-mrelocation-model" "pic" "-o" "host.o" "host.s" @@ -99,7 +88,8 @@ rm bare.amdgcn.gfx90a.img* "clang-linker-wrapper" "--should-extract=gfx90a" "--device-compiler=amdgcn-amd-amdhsa=-g" "--device-compiler=amdgcn-amd-amdhsa=-save-temps=cwd" "--device-linker=amdgcn-amd-amdhsa=-lompdevice" "--host-triple=x86_64-unknown-linux-gnu" "--save-temps" "--linker-path=/ABSOlUTE_PATH_TO/rust/build/x86_64-unknown-linux-gnu/lld/bin/ld.lld" "--hash-style=gnu" "--eh-frame-hdr" "-m" "elf_x86_64" "-pie" "-dynamic-linker" "/lib64/ld-linux-x86-64.so.2" "-o" "bare" "/lib/../lib64/Scrt1.o" "/lib/../lib64/crti.o" "/ABSOLUTE_PATH_TO/crtbeginS.o" "-L/ABSOLUTE_PATH_TO/rust/build/x86_64-unknown-linux-gnu/llvm/bin/../lib/x86_64-unknown-linux-gnu" "-L/ABSOLUTE_PATH_TO/rust/build/x86_64-unknown-linux-gnu/llvm/lib/clang/21/lib/x86_64-unknown-linux-gnu" "-L/lib/../lib64" "-L/usr/lib64" "-L/lib" "-L/usr/lib" "host.o" "-lstdc++" "-lm" "-lomp" "-lomptarget" "-L/ABSOLUTE_PATH_TO/rust/build/x86_64-unknown-linux-gnu/llvm/lib" "-lgcc_s" "-lgcc" "-lpthread" "-lc" "-lgcc_s" "-lgcc" "/ABSOLUTE_PATH_TO/crtendS.o" "/lib/../lib64/crtn.o" ``` -Especially for the last command I recommend to not fix the paths, but rather just re-generate them by copying a bare-mode openmp example and compiling it with your clang. By adding `-###` to your clang invocation, you can see the invidual steps. +Especially for the last three commands I recommend to not fix the paths, but rather just re-generate them by copying a bare-mode openmp example and compiling it with your clang. By adding `-###` to your clang invocation, you can see the invidual steps. +You can ignore other steps, e.g. the invocation of a "clang-offload-packager". ``` myclang++ -fuse-ld=lld -O3 -fopenmp -fopenmp-offload-mandatory --offload-arch=gfx90a omp_bare.cpp -o main -### ```