forked from llvm/llvm-project
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[CUDA][HIP] Make template implicitly host device (llvm#70369)
Added option -foffload-implicit-host-device-templates which is off by default. When the option is on, template functions and specializations without host/device attributes have implicit host device attributes. They can be overridden by device template functions with the same signagure. They are emitted on device side only if they are used on device side. This feature is added as an extension. `__has_extension(cuda_implicit_host_device_templates)` can be used to check whether it is enabled. This is to facilitate using standard C++ headers for device. Fixes: llvm#69956 Fixes: SWDEV-428314
- Loading branch information
Showing
12 changed files
with
241 additions
and
4 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,118 @@ | ||
// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu \ | ||
// RUN: -foffload-implicit-host-device-templates \ | ||
// RUN: -emit-llvm -o - -x hip %s 2>&1 | \ | ||
// RUN: FileCheck -check-prefixes=COMM,HOST %s | ||
// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device \ | ||
// RUN: -target-cpu gfx1100 \ | ||
// RUN: -foffload-implicit-host-device-templates \ | ||
// RUN: -emit-llvm -o - -x hip %s 2>&1 | \ | ||
// RUN: FileCheck -check-prefixes=COMM,DEV %s | ||
// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device \ | ||
// RUN: -target-cpu gfx1100 \ | ||
// RUN: -foffload-implicit-host-device-templates \ | ||
// RUN: -emit-llvm -o - -x hip %s 2>&1 | \ | ||
// RUN: FileCheck -check-prefixes=DEV-NEG %s | ||
|
||
#include "Inputs/cuda.h" | ||
|
||
// Implicit host device template not overloaded by device template. | ||
// Used by both device and host function. | ||
// Emitted on both host and device. | ||
|
||
// COMM-LABEL: define {{.*}}@_Z20template_no_overloadIiET_S0_( | ||
// COMM: ret i32 1 | ||
template<typename T> | ||
T template_no_overload(T x) { | ||
return 1; | ||
} | ||
|
||
// Implicit host device template overloaded by device template. | ||
// Used by both device and host function. | ||
// Implicit host device template emitted on host. | ||
// Device template emitted on device. | ||
|
||
// COMM-LABEL: define {{.*}}@_Z22template_with_overloadIiET_S0_( | ||
// HOST: ret i32 2 | ||
// DEV: ret i32 3 | ||
template<typename T> | ||
T template_with_overload(T x) { | ||
return 2; | ||
} | ||
|
||
template<typename T> | ||
__device__ T template_with_overload(T x) { | ||
return 3; | ||
} | ||
|
||
// Implicit host device template used by host function only. | ||
// Emitted on host only. | ||
// HOST-LABEL: define {{.*}}@_Z21template_used_by_hostIiET_S0_( | ||
// DEV-NEG-NOT: define {{.*}}@_Z21template_used_by_hostIiET_S0_( | ||
// HOST: ret i32 10 | ||
template<typename T> | ||
T template_used_by_host(T x) { | ||
return 10; | ||
} | ||
|
||
// Implicit host device template indirectly used by host function only. | ||
// Emitted on host only. | ||
// HOST-LABEL: define {{.*}}@_Z32template_indirectly_used_by_hostIiET_S0_( | ||
// DEV-NEG-NOT: define {{.*}}@_Z32template_indirectly_used_by_hostIiET_S0_( | ||
// HOST: ret i32 11 | ||
template<typename T> | ||
T template_indirectly_used_by_host(T x) { | ||
return 11; | ||
} | ||
|
||
template<typename T> | ||
T template_in_middle_by_host(T x) { | ||
template_indirectly_used_by_host(x); | ||
return 12; | ||
} | ||
|
||
// Implicit host device template indirectly used by device function only. | ||
// Emitted on device. | ||
// DEVICE-LABEL: define {{.*}}@_Z34template_indirectly_used_by_deviceIiET_S0_( | ||
// DEVICE: ret i32 21 | ||
template<typename T> | ||
T template_indirectly_used_by_device(T x) { | ||
return 21; | ||
} | ||
|
||
template<typename T> | ||
T template_in_middle_by_device(T x) { | ||
template_indirectly_used_by_device(x); | ||
return 22; | ||
} | ||
|
||
// Implicit host device template indirectly used by host device function only. | ||
// Emitted on host and device. | ||
// COMMON-LABEL: define {{.*}}@_Z39template_indirectly_used_by_host_deviceIiET_S0_( | ||
// COMMON: ret i32 31 | ||
template<typename T> | ||
T template_indirectly_used_by_host_device(T x) { | ||
return 31; | ||
} | ||
|
||
template<typename T> | ||
T template_in_middle_by_host_device(T x) { | ||
template_indirectly_used_by_host_device(x); | ||
return 32; | ||
} | ||
|
||
void host_fun() { | ||
template_no_overload(0); | ||
template_with_overload(0); | ||
template_used_by_host(0); | ||
template_in_middle_by_host(0); | ||
} | ||
|
||
__device__ void device_fun() { | ||
template_no_overload(0); | ||
template_with_overload(0); | ||
template_in_middle_by_device(0); | ||
} | ||
|
||
__host__ __device__ void host_device_fun() { | ||
template_in_middle_by_host_device(0); | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
// RUN: %clang_cc1 -E -triple x86_64-linux-gnu %s -o - \ | ||
// RUN: | FileCheck -check-prefix=NOHDT %s | ||
// RUN: %clang_cc1 -E -triple x86_64-linux-gnu %s -o - \ | ||
// RUN: -foffload-implicit-host-device-templates \ | ||
// RUN: | FileCheck -check-prefix=HDT %s | ||
|
||
// NOHDT: no_implicit_host_device_templates | ||
// HDT: has_implicit_host_device_templates | ||
#if __has_extension(cuda_implicit_host_device_templates) | ||
int has_implicit_host_device_templates(); | ||
#else | ||
int no_implicit_host_device_templates(); | ||
#endif |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,22 @@ | ||
// RUN: %clang_cc1 -isystem %S/Inputs -fsyntax-only %s | ||
// RUN: %clang_cc1 -isystem %S/Inputs -fcuda-is-device -fsyntax-only %s | ||
// RUN: %clang_cc1 -isystem %S/Inputs -foffload-implicit-host-device-templates -fsyntax-only %s | ||
// RUN: %clang_cc1 -isystem %S/Inputs -foffload-implicit-host-device-templates -fcuda-is-device -fsyntax-only %s | ||
|
||
#include <cuda.h> | ||
|
||
template<typename T> | ||
void tempf(T x) { | ||
} | ||
|
||
template<typename T> | ||
__device__ void tempf(T x) { | ||
} | ||
|
||
void host_fun() { | ||
tempf(1); | ||
} | ||
|
||
__device__ void device_fun() { | ||
tempf(1); | ||
} |