Skip to content

Commit 3c0532d

Browse files
LU-JOHNmaarquitos14
LU-JOHN
andauthored
Connect support for dynamic linking to user options (#14575)
Add option "-fsycl-allow-device-dependencies" to enable support for dynamic linking. Also: 1. No functions are importable without "-fsycl-allow-device-dependencies" 2. Deal with SYCL_EXTERNAL header decls that have lost SYCL_EXTERNAL attribute in LLVM IR 3. SPIRV/SYCL/ESIMD builtins cannot be an imported function Tested in three E2E test cases. Minor change: Change SYCL-EXTERNAL to SYCL_EXTERNAL in testcase comment. --------- Signed-off-by: Lu, John <john.lu@intel.com> Co-authored-by: Marcos Maronas <marcos.maronas@intel.com>
1 parent 13ef711 commit 3c0532d

File tree

19 files changed

+243
-15
lines changed

19 files changed

+243
-15
lines changed

clang/include/clang/Driver/Options.td

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4189,6 +4189,10 @@ def fsycl_remove_unused_external_funcs : Flag<["-"], "fsycl-remove-unused-extern
41894189
Group<sycl_Group>, HelpText<"Allow removal of unused `SYCL_EXTERNAL` functions (default)">;
41904190
def fno_sycl_remove_unused_external_funcs : Flag<["-"], "fno-sycl-remove-unused-external-funcs">,
41914191
Group<sycl_Group>, HelpText<"Prevent removal of unused `SYCL_EXTERNAL` functions">;
4192+
def fsycl_allow_device_dependencies : Flag<["-"], "fsycl-allow-device-dependencies">,
4193+
Group<sycl_Group>, HelpText<"Allow dependencies between device code images">;
4194+
def fno_sycl_allow_device_dependencies : Flag<["-"], "fno-sycl-allow-device-dependencies">,
4195+
Group<sycl_Group>, HelpText<"Do not allow dependencies between device code images (default)">;
41924196

41934197
def fsave_optimization_record : Flag<["-"], "fsave-optimization-record">,
41944198
Visibility<[ClangOption, FlangOption]>,

clang/lib/Driver/ToolChains/Clang.cpp

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10703,6 +10703,14 @@ static void addArgs(ArgStringList &DstArgs, const llvm::opt::ArgList &Alloc,
1070310703
}
1070410704
}
1070510705

10706+
static bool supportDynamicLinking(const llvm::opt::ArgList &TCArgs) {
10707+
if (TCArgs.hasFlag(options::OPT_fsycl_allow_device_dependencies,
10708+
options::OPT_fno_sycl_allow_device_dependencies,
10709+
false))
10710+
return true;
10711+
return false;
10712+
}
10713+
1070610714
static void getNonTripleBasedSYCLPostLinkOpts(const ToolChain &TC,
1070710715
const JobAction &JA,
1070810716
const llvm::opt::ArgList &TCArgs,
@@ -10729,6 +10737,9 @@ static void getNonTripleBasedSYCLPostLinkOpts(const ToolChain &TC,
1072910737
if (TCArgs.hasFlag(options::OPT_fno_sycl_esimd_force_stateless_mem,
1073010738
options::OPT_fsycl_esimd_force_stateless_mem, false))
1073110739
addArgs(PostLinkArgs, TCArgs, {"-lower-esimd-force-stateless-mem=false"});
10740+
10741+
if (supportDynamicLinking(TCArgs))
10742+
addArgs(PostLinkArgs, TCArgs, {"-support-dynamic-linking"});
1073210743
}
1073310744

1073410745
// Add any sycl-post-link options that rely on a specific Triple in addition
@@ -10776,6 +10787,8 @@ static void getTripleBasedSYCLPostLinkOpts(const ToolChain &TC,
1077610787
options::OPT_fsycl_remove_unused_external_funcs,
1077710788
false) &&
1077810789
!isSYCLNativeCPU(TC)) &&
10790+
// When supporting dynamic linking, non-kernels in a device image can be called
10791+
!supportDynamicLinking(TCArgs) &&
1077910792
!Triple.isNVPTX() && !Triple.isAMDGPU())
1078010793
addArgs(PostLinkArgs, TCArgs, {"-emit-only-kernels-as-entry-points"});
1078110794

clang/test/Driver/sycl-offload-old-model.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -174,9 +174,12 @@
174174
// RUN: %clang -### -target x86_64-unknown-linux-gnu -fsycl --no-offload-new-driver -fsycl-targets=spir64_gen %s 2>&1 | FileCheck -check-prefix=CHECK_SYCL_POST_LINK_OPT_PASS %s
175175
// CHECK_SYCL_POST_LINK_OPT_PASS: sycl-post-link{{.*}}emit-only-kernels-as-entry-points
176176
// RUN: %clang -### -target x86_64-unknown-linux-gnu -fsycl --no-offload-new-driver -fsycl-targets=spir64_gen -fno-sycl-remove-unused-external-funcs %s 2>&1 | FileCheck -check-prefix=CHECK_SYCL_POST_LINK_OPT_NO_PASS %s
177+
// RUN: %clang -### -target x86_64-unknown-linux-gnu -fsycl --no-offload-new-driver -fsycl-targets=spir64_gen -fsycl-allow-device-dependencies %s 2>&1 | FileCheck -check-prefix=CHECK_SYCL_POST_LINK_OPT_NO_PASS %s
177178
// CHECK_SYCL_POST_LINK_OPT_NO_PASS-NOT: sycl-post-link{{.*}}emit-only-kernels-as-entry-points
178179

179180
/// Check selective passing of -support-dynamic-linking to sycl-post-link tool
181+
// RUN: %clang -### -target x86_64-unknown-linux-gnu -fsycl --no-offload-new-driver -fsycl-targets=spir64_fpga -fsycl-allow-device-dependencies %s 2>&1 | FileCheck -check-prefix=CHECK_SYCL_POST_LINK_SHARED_PASS %s
182+
// RUN: %clang -### -target x86_64-unknown-linux-gnu -fsycl --no-offload-new-driver -fsycl-targets=spir64_gen -fsycl-allow-device-dependencies %s 2>&1 | FileCheck -check-prefix=CHECK_SYCL_POST_LINK_SHARED_PASS %s
180183
// TODO: Enable when SYCL RT supports dynamic linking
181184
// RUNx: %clang -### -target x86_64-unknown-linux-gnu -fsycl --no-offload-new-driver -fsycl-targets=spir64_fpga -shared %s 2>&1 | FileCheck -check-prefix=CHECK_SYCL_POST_LINK_SHARED_PASS %s
182185
// RUNx: %clang -### -target x86_64-unknown-linux-gnu -fsycl --no-offload-new-driver -fsycl-targets=spir64_gen -shared %s 2>&1 | FileCheck -check-prefix=CHECK_SYCL_POST_LINK_SHARED_PASS %s

llvm/lib/SYCLLowerIR/ModuleSplitter.cpp

Lines changed: 20 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -182,11 +182,8 @@ class DependencyGraph {
182182
FuncTypeToFuncsMap[F.getFunctionType()].insert(&F);
183183
}
184184

185-
// We add every function into the graph except if
186-
// SupportDynamicLinking is true
187185
for (const auto &F : M.functions()) {
188-
189-
if (SupportDynamicLinking && canBeImportedFunction(F))
186+
if (canBeImportedFunction(F))
190187
continue;
191188

192189
// case (1), see comment above the class definition
@@ -1312,8 +1309,26 @@ splitSYCLModule(std::unique_ptr<Module> M, ModuleSplitterSettings Settings) {
13121309
}
13131310

13141311
bool canBeImportedFunction(const Function &F) {
1312+
// It may be theoretically possible to determine what is importable
1313+
// based solely on function F, but the "SYCL/imported symbols"
1314+
// property list MUST NOT have any imported symbols that are not supplied
1315+
// the exported symbols from another device image. This will lead to a
1316+
// runtime crash "No device image found for external symbol". Generating
1317+
// precise "SYCL/imported symbols" can be difficult because there exist
1318+
// functions that may look like they can be imported, but are supplied outside
1319+
// of user device code (e.g. _Z38__spirv_JointMatrixWorkItemLength...) In
1320+
// order to be safe and not require perfect name analysis just start with this
1321+
// simple check.
1322+
if (!SupportDynamicLinking)
1323+
return false;
1324+
1325+
// SYCL_EXTERNAL property is not recorded for a declaration
1326+
// in a header file. Thus SYCL IR that is a declaration
1327+
// will be considered as SYCL_EXTERNAL for the purposes of
1328+
// this function.
13151329
if (F.isIntrinsic() || F.getName().starts_with("__") ||
1316-
!llvm::sycl::utils::isSYCLExternalFunction(&F))
1330+
isSpirvSyclBuiltin(F.getName()) || isESIMDBuiltin(F.getName()) ||
1331+
(!F.isDeclaration() && !llvm::sycl::utils::isSYCLExternalFunction(&F)))
13171332
return false;
13181333

13191334
bool ReturnValue = true;

llvm/test/tools/sycl-post-link/emit_imported_symbols.ll

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,12 @@
11
; This test checks that the -emit-imported-symbols option generates a list of imported symbols
22
; Function names were chosen so that no function with a 'inside' in their function name is imported
3-
;
3+
; Note that -emit-imported-symbols will not emit any imported symbols without -support-dynamic-linking.
44

55
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
66
; Test with -split=kernel
77
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
88

9-
; RUN: sycl-post-link -properties -symbols -emit-imported-symbols -split=kernel -S < %s -o %t_kernel.table
9+
; RUN: sycl-post-link -properties -symbols -support-dynamic-linking -emit-imported-symbols -split=kernel -S < %s -o %t_kernel.table
1010

1111
; RUN: FileCheck %s -input-file=%t_kernel_0.sym --check-prefixes CHECK-KERNEL-SYM-0
1212
; RUN: FileCheck %s -input-file=%t_kernel_1.sym --check-prefixes CHECK-KERNEL-SYM-1
@@ -23,29 +23,29 @@
2323

2424
; CHECK-KERNEL-SYM-1: foo
2525
; CHECK-KERNEL-IMPORTED-SYM-1: [SYCL/imported symbols]
26+
; CHECK-KERNEL-IMPORTED-SYM-1-NEXT: middle
2627
; CHECK-KERNEL-IMPORTED-SYM-1-NEXT: childA
2728
; CHECK-KERNEL-IMPORTED-SYM-1-NEXT: childC
28-
; CHECK-KERNEL-IMPORTED-SYM-1-NEXT: childD
2929
; CHECK-KERNEL-IMPORTED-SYM-1-EMPTY:
3030

3131

3232
; CHECK-KERNEL-SYM-2: bar
3333
; CHECK-KERNEL-IMPORTED-SYM-2: [SYCL/imported symbols]
34+
; CHECK-KERNEL-IMPORTED-SYM-2-NEXT: middle
3435
; CHECK-KERNEL-IMPORTED-SYM-2-NEXT: childB
3536
; CHECK-KERNEL-IMPORTED-SYM-2-NEXT: childC
36-
; CHECK-KERNEL-IMPORTED-SYM-2-NEXT: childD
3737
; CHECK-KERNEL-IMPORTED-SYM-2-NEXT: _Z7outsidev
3838
; CHECK-KERNEL-IMPORTED-SYM-2-EMPTY:
3939

4040
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4141
; Test with -split=source
4242
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4343

44-
; RUN: sycl-post-link -properties -symbols -emit-imported-symbols -split=source -S < %s -o %t_source.table
44+
; RUN: sycl-post-link -properties -symbols -support-dynamic-linking -emit-imported-symbols -split=source -S < %s -o %t_source.table
4545
; RUN: FileCheck %s -input-file=%t_source_0.sym --check-prefixes CHECK-SOURCE-SYM-0
4646
; RUN: FileCheck %s -input-file=%t_source_0.prop --check-prefixes CHECK-SOURCE-IMPORTED-SYM-0
4747

48-
; RUN: sycl-post-link -properties -symbols -emit-imported-symbols -split=source -S < %s -o %t_source.table -O0
48+
; RUN: sycl-post-link -properties -symbols -support-dynamic-linking -emit-imported-symbols -split=source -S < %s -o %t_source.table -O0
4949
; RUN: FileCheck %s -input-file=%t_source_0.sym --check-prefixes CHECK-SOURCE-SYM-0
5050
; RUN: FileCheck %s -input-file=%t_source_0.prop --check-prefixes CHECK-SOURCE-IMPORTED-SYM-0
5151

@@ -73,7 +73,7 @@ define weak_odr spir_kernel void @foo() #0 {
7373
}
7474

7575
define weak_odr spir_kernel void @bar() #0 {
76-
;; Functions that are not SYCL External (i.e. they have no sycl-module-id) cannot be imported
76+
;; Functions whose name start with '__' cannot be imported
7777
call spir_func void @__itt_offload_wi_start_wrapper()
7878

7979
call void @childB()

llvm/test/tools/sycl-post-link/internalize_functions.ll

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
; Test that when -support-dynamic-linking is used
2-
; non SYCL-EXTERNAL functions are internalized.
2+
; non SYCL_EXTERNAL functions are internalized.
33
; Variables must not be internalized.
44

55
; RUN: sycl-post-link -symbols -support-dynamic-linking -split=kernel -S < %s -o %t.table
@@ -8,8 +8,8 @@
88

99
; CHECK-SYM-0: foo0
1010

11-
; Non SYCL-EXTERNAL Functions are internalized
12-
; foo0 is a SYCL-EXTERNAL function
11+
; Non SYCL_EXTERNAL Functions are internalized
12+
; foo0 is a SYCL_EXTERNAL function
1313
; CHECK-LL-0-DAG: define weak_odr spir_kernel void @foo0() #0 {
1414
; Internalize does not change available_externally
1515
; CHECK-LL-0-DAG: define available_externally spir_func void @internalA() {
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
#include <iostream>
2+
#include "a.hpp"
3+
#include "b.hpp"
4+
5+
SYCL_EXTERNAL int levelA(int val) {
6+
#ifndef __SYCL_DEVICE_ONLY__
7+
std::cerr << "Host symbol used" << std::endl;
8+
return 0;
9+
#endif
10+
val=levelB(val);
11+
return val|=(0xA<<0);
12+
}
13+
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
#include <sycl/detail/core.hpp>
2+
3+
SYCL_EXTERNAL int levelA(int val);
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
#include <iostream>
2+
#include "b.hpp"
3+
#include "c.hpp"
4+
5+
SYCL_EXTERNAL int levelB(int val) {
6+
#ifndef __SYCL_DEVICE_ONLY__
7+
std::cerr << "Host symbol used" << std::endl;
8+
return 0;
9+
#endif
10+
val=levelC(val);
11+
return val|=(0xB<<4);
12+
}
13+
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
#include <sycl/detail/core.hpp>
2+
3+
SYCL_EXTERNAL int levelB(int val);
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
#include <iostream>
2+
#include "c.hpp"
3+
#include "d.hpp"
4+
5+
SYCL_EXTERNAL int levelC(int val) {
6+
#ifndef __SYCL_DEVICE_ONLY__
7+
std::cerr << "Host symbol used" << std::endl;
8+
return 0;
9+
#endif
10+
val=levelD(val);
11+
return val|=(0xC<<8);
12+
}
13+
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
#include <sycl/detail/core.hpp>
2+
3+
SYCL_EXTERNAL int levelC(int val);
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
#include <iostream>
2+
#include "d.hpp"
3+
4+
SYCL_EXTERNAL int levelD(int val) {
5+
#ifndef __SYCL_DEVICE_ONLY__
6+
std::cerr << "Host symbol used" << std::endl;
7+
return 0;
8+
#endif
9+
return val|=(0xD<<12);
10+
}
11+
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
#include <sycl/detail/core.hpp>
2+
3+
SYCL_EXTERNAL int levelD(int val);
Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
#include <sycl/detail/core.hpp>
2+
#include "a.hpp"
3+
#include <iostream>
4+
#define EXPORT
5+
#include "wrapper.hpp"
6+
7+
using namespace sycl;
8+
9+
class ExeKernel;
10+
11+
int wrapper() {
12+
int val = 0;
13+
{
14+
buffer<int, 1> buf(&val, range<1>(1));
15+
queue q;
16+
q.submit([&](handler &cgh) {
17+
auto acc = buf.get_access(cgh);
18+
cgh.single_task<ExeKernel>([=]() {acc[0] = levelA(acc[0]);});
19+
});
20+
}
21+
22+
std::cout << "val=" << std::hex << val << "\n";
23+
if (val!=0xDCBA)
24+
return (1);
25+
return(0);
26+
}
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
#if defined(_WIN32)
2+
#ifdef EXPORT
3+
__declspec(dllexport)
4+
#else
5+
__declspec(dllimport)
6+
#endif
7+
#endif
8+
int wrapper();
Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
// Test -fsycl-allow-device-dependencies with dynamic libraries.
2+
3+
// REQUIRES: linux
4+
// UNSUPPORTED: cuda || hip
5+
6+
// RUN: %clangxx -fsycl -fPIC -shared -fsycl-allow-device-dependencies %S/Inputs/a.cpp -I %S/Inputs -o %T/libdevice_a.so
7+
// RUN: %clangxx -fsycl -fPIC -shared -fsycl-allow-device-dependencies %S/Inputs/b.cpp -I %S/Inputs -o %T/libdevice_b.so
8+
// RUN: %clangxx -fsycl -fPIC -shared -fsycl-allow-device-dependencies %S/Inputs/c.cpp -I %S/Inputs -o %T/libdevice_c.so
9+
// RUN: %clangxx -fsycl -fPIC -shared -fsycl-allow-device-dependencies %S/Inputs/d.cpp -I %S/Inputs -o %T/libdevice_d.so
10+
// RUN: %{build} -fsycl-allow-device-dependencies -L%T -ldevice_a -ldevice_b -ldevice_c -ldevice_d -I %S/Inputs -o %t.out -Wl,-rpath=%T
11+
// RUN: %{run} %t.out
12+
13+
#include <sycl/detail/core.hpp>
14+
#include "a.hpp"
15+
#include <iostream>
16+
17+
using namespace sycl;
18+
19+
class ExeKernel;
20+
21+
int main() {
22+
int val = 0;
23+
{
24+
buffer<int, 1> buf(&val, range<1>(1));
25+
queue q;
26+
q.submit([&](handler &cgh) {
27+
auto acc = buf.get_access(cgh);
28+
cgh.single_task<ExeKernel>([=]() {acc[0] = levelA(acc[0]);});
29+
});
30+
}
31+
32+
std::cout << "val=" << std::hex << val << "\n";
33+
if (val!=0xDCBA)
34+
return (1);
35+
return(0);
36+
}
Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
// Test -fsycl-allow-device-dependencies with objects.
2+
3+
// UNSUPPORTED: cuda || hip
4+
5+
// RUN: %clangxx -fsycl %S/Inputs/a.cpp -I %S/Inputs -c -o %t_a.o
6+
// RUN: %clangxx -fsycl %S/Inputs/b.cpp -I %S/Inputs -c -o %t_b.o
7+
// RUN: %clangxx -fsycl %S/Inputs/c.cpp -I %S/Inputs -c -o %t_c.o
8+
// RUN: %clangxx -fsycl %S/Inputs/d.cpp -I %S/Inputs -c -o %t_d.o
9+
// RUN: %{build} -fsycl-allow-device-dependencies %t_a.o %t_b.o %t_c.o %t_d.o -I %S/Inputs -o %t.out
10+
// RUN: %{run} %t.out
11+
12+
#include <sycl/detail/core.hpp>
13+
#include "a.hpp"
14+
#include <iostream>
15+
16+
using namespace sycl;
17+
18+
class ExeKernel;
19+
20+
int main() {
21+
int val = 0;
22+
{
23+
buffer<int, 1> buf(&val, range<1>(1));
24+
queue q;
25+
q.submit([&](handler &cgh) {
26+
auto acc = buf.get_access(cgh);
27+
cgh.single_task<ExeKernel>([=]() {acc[0] = levelA(acc[0]);});
28+
});
29+
}
30+
31+
std::cout << "val=" << std::hex << val << "\n";
32+
if (val!=0xDCBA)
33+
return (1);
34+
return(0);
35+
}
Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
// Test -fsycl-allow-device-dependencies with a single dynamic library on Windows
2+
// and Linux.
3+
4+
// UNSUPPORTED: cuda || hip
5+
6+
// RUN: %clangxx -fsycl %fPIC %shared_lib -fsycl-allow-device-dependencies -I %S/Inputs \
7+
// RUN: %S/Inputs/a.cpp \
8+
// RUN: %S/Inputs/b.cpp \
9+
// RUN: %S/Inputs/c.cpp \
10+
// RUN: %S/Inputs/d.cpp \
11+
// RUN: %S/Inputs/wrapper.cpp \
12+
// RUN: -o %if windows %{%T/device_single.dll%} %else %{%T/libdevice_single.so%}
13+
14+
// RUN: %{build} -I%S/Inputs -o %t.out \
15+
// RUN: %if windows \
16+
// RUN: %{%T/device_single.lib%} \
17+
// RUN: %else \
18+
// RUN: %{-L%T -ldevice_single -Wl,-rpath=%T%}
19+
20+
// RUN: %{run} %t.out
21+
22+
#include "wrapper.hpp"
23+
24+
int main() {
25+
return(wrapper());
26+
}

0 commit comments

Comments
 (0)