diff --git a/compiler/rustc_codegen_llvm/src/base.rs b/compiler/rustc_codegen_llvm/src/base.rs index 388118f9b4f17..d00e70638b45a 100644 --- a/compiler/rustc_codegen_llvm/src/base.rs +++ b/compiler/rustc_codegen_llvm/src/base.rs @@ -93,8 +93,13 @@ pub(crate) fn compile_codegen_unit( // They are necessary for correct offload execution. We do this here to simplify the // `offload` intrinsic, avoiding the need for tracking whether it's the first // intrinsic call or not. - let has_host_offload = - cx.sess().opts.unstable_opts.offload.iter().any(|o| matches!(o, Offload::Host(_))); + let has_host_offload = cx + .sess() + .opts + .unstable_opts + .offload + .iter() + .any(|o| matches!(o, Offload::Host(_) | Offload::Test)); if has_host_offload && !cx.sess().target.is_like_gpu { cx.offload_globals.replace(Some(OffloadGlobals::declare(&cx))); } diff --git a/compiler/rustc_codegen_llvm/src/builder/gpu_offload.rs b/compiler/rustc_codegen_llvm/src/builder/gpu_offload.rs index fba92f996aa6b..b8eb4f0382167 100644 --- a/compiler/rustc_codegen_llvm/src/builder/gpu_offload.rs +++ b/compiler/rustc_codegen_llvm/src/builder/gpu_offload.rs @@ -49,8 +49,9 @@ impl<'ll> OffloadGlobals<'ll> { let bin_desc = cx.type_named_struct("struct.__tgt_bin_desc"); cx.set_struct_body(bin_desc, &tgt_bin_desc_ty, false); - let register_lib = declare_offload_fn(&cx, "__tgt_register_lib", mapper_fn_ty); - let unregister_lib = declare_offload_fn(&cx, "__tgt_unregister_lib", mapper_fn_ty); + let reg_lib_decl = cx.type_func(&[cx.type_ptr()], cx.type_void()); + let register_lib = declare_offload_fn(&cx, "__tgt_register_lib", reg_lib_decl); + let unregister_lib = declare_offload_fn(&cx, "__tgt_unregister_lib", reg_lib_decl); let init_ty = cx.type_func(&[], cx.type_void()); let init_rtls = declare_offload_fn(cx, "__tgt_init_all_rtls", init_ty); diff --git a/compiler/rustc_session/src/config.rs b/compiler/rustc_session/src/config.rs index fe96dabf63302..8c492fcf8f15d 100644 --- a/compiler/rustc_session/src/config.rs +++ b/compiler/rustc_session/src/config.rs @@ -196,6 +196,8 @@ pub enum Offload { Device, /// Second step in the offload pipeline, generates the host code to call kernels. Host(String), + /// Test is similar to Host, but allows testing without a device artifact. + Test, } /// The different settings that the `-Z autodiff` flag can have. diff --git a/compiler/rustc_session/src/options.rs b/compiler/rustc_session/src/options.rs index f11ad12fb9ddf..21fa3321a30ad 100644 --- a/compiler/rustc_session/src/options.rs +++ b/compiler/rustc_session/src/options.rs @@ -794,7 +794,8 @@ mod desc { pub(crate) const parse_list_with_polarity: &str = "a comma-separated list of strings, with elements beginning with + or -"; pub(crate) const parse_autodiff: &str = "a comma separated list of settings: `Enable`, `PrintSteps`, `PrintTA`, `PrintTAFn`, `PrintAA`, `PrintPerf`, `PrintModBefore`, `PrintModAfter`, `PrintModFinal`, `PrintPasses`, `NoPostopt`, `LooseTypes`, `Inline`, `NoTT`"; - pub(crate) const parse_offload: &str = "a comma separated list of settings: `Enable`"; + pub(crate) const parse_offload: &str = + "a comma separated list of settings: `Host=`, `Device`, `Test`"; pub(crate) const parse_comma_list: &str = "a comma-separated list of strings"; pub(crate) const parse_opt_comma_list: &str = parse_comma_list; pub(crate) const parse_number: &str = "a number"; @@ -1471,6 +1472,13 @@ pub mod parse { } Offload::Device } + "Test" => { + if let Some(_) = arg { + // Test does not accept a value + return false; + } + Offload::Test + } _ => { // FIXME(ZuseZ4): print an error saying which value is not recognized return false; diff --git a/tests/codegen-llvm/gpu_offload/gpu_host.rs b/tests/codegen-llvm/gpu_offload/gpu_host.rs index b4d17143720a7..dcbd65b144277 100644 --- a/tests/codegen-llvm/gpu_offload/gpu_host.rs +++ b/tests/codegen-llvm/gpu_offload/gpu_host.rs @@ -1,15 +1,10 @@ -//@ compile-flags: -Zoffload=Enable -Zunstable-options -C opt-level=3 -Clto=fat +//@ compile-flags: -Zoffload=Test -Zunstable-options -C opt-level=3 -Clto=fat //@ no-prefer-dynamic -//@ needs-enzyme +//@ needs-offload // This test is verifying that we generate __tgt_target_data_*_mapper before and after a call to the // kernel_1. Better documentation to what each global or variable means is available in the gpu -// offlaod code, or the LLVM offload documentation. This code does not launch any GPU kernels yet, -// and will be rewritten once a proper offload frontend has landed. -// -// We currently only handle memory transfer for specific calls to functions named `kernel_{num}`, -// when inside of a function called main. This, too, is a temporary workaround for not having a -// frontend. +// offload code, or the LLVM offload documentation. #![feature(rustc_attrs)] #![feature(core_intrinsics)] @@ -22,6 +17,20 @@ fn main() { core::hint::black_box(&x); } +#[unsafe(no_mangle)] +#[inline(never)] +pub fn kernel_1(x: &mut [f32; 256]) { + core::intrinsics::offload(_kernel_1, [256, 1, 1], [32, 1, 1], (x,)) +} + +#[unsafe(no_mangle)] +#[inline(never)] +pub fn _kernel_1(x: &mut [f32; 256]) { + for i in 0..256 { + x[i] = 21.0; + } +} + // CHECK: %struct.ident_t = type { i32, i32, i32, i32, ptr } // CHECK: %struct.__tgt_offload_entry = type { i64, i16, i16, i32, ptr, ptr, i64, i64, ptr } // CHECK: %struct.__tgt_bin_desc = type { i32, ptr, ptr, ptr } @@ -36,8 +45,9 @@ fn main() { // CHECK: @.offloading.entry_name._kernel_1 = internal unnamed_addr constant [10 x i8] c"_kernel_1\00", section ".llvm.rodata.offloading", align 1 // CHECK: @.offloading.entry._kernel_1 = internal constant %struct.__tgt_offload_entry { i64 0, i16 1, i16 1, i32 0, ptr @._kernel_1.region_id, ptr @.offloading.entry_name._kernel_1, i64 0, i64 0, ptr null }, section "llvm_offload_entries", align 8 -// CHECK: Function Attrs: nounwind // CHECK: declare i32 @__tgt_target_kernel(ptr, i64, i32, i32, ptr, ptr) +// CHECK: declare void @__tgt_register_lib(ptr) local_unnamed_addr +// CHECK: declare void @__tgt_unregister_lib(ptr) local_unnamed_addr // CHECK: define{{( dso_local)?}} void @main() // CHECK-NEXT: start: @@ -94,17 +104,3 @@ fn main() { // CHECK-NEXT: call void @__tgt_unregister_lib(ptr nonnull %EmptyDesc) // CHECK-NEXT: ret void // CHECK-NEXT: } - -#[unsafe(no_mangle)] -#[inline(never)] -pub fn kernel_1(x: &mut [f32; 256]) { - core::intrinsics::offload(_kernel_1, [256, 1, 1], [32, 1, 1], (x,)) -} - -#[unsafe(no_mangle)] -#[inline(never)] -pub fn _kernel_1(x: &mut [f32; 256]) { - for i in 0..256 { - x[i] = 21.0; - } -}