Skip to content

Commit f44dd13

Browse files
committed
Merge remote-tracking branch 'intel_llvm/sycl' into esimd_L1_L2_rework_L3_assert
2 parents 5fc647c + f8abcb7 commit f44dd13

File tree

11 files changed

+36
-27
lines changed

11 files changed

+36
-27
lines changed

clang/lib/Driver/ToolChains/SYCL.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -371,6 +371,8 @@ static llvm::SmallVector<StringRef, 16> SYCLDeviceLibList {
371371
"bfloat16", "crt", "cmath", "cmath-fp64", "complex", "complex-fp64",
372372
#if defined(_WIN32)
373373
"msvc-math",
374+
#else
375+
"sanitizer",
374376
#endif
375377
"imf", "imf-fp64", "itt-compiler-wrappers", "itt-stubs",
376378
"itt-user-wrappers", "fallback-cassert", "fallback-cstring",

clang/test/Driver/sycl-device-lib.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@
3636
// SYCL_DEVICE_LIB_UNBUNDLE_DEFAULT-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-fallback-imf.o" "-output={{.*}}libsycl-fallback-imf-{{.*}}.o" "-unbundle"
3737
// SYCL_DEVICE_LIB_UNBUNDLE_DEFAULT-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-fallback-imf-fp64.o" "-output={{.*}}libsycl-fallback-imf-fp64-{{.*}}.o" "-unbundle"
3838
// SYCL_DEVICE_LIB_UNBUNDLE_DEFAULT-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-fallback-imf-bf16.o" "-output={{.*}}libsycl-fallback-imf-bf16-{{.*}}.o" "-unbundle"
39+
3940
/// ###########################################################################
4041
/// test sycl fallback device libraries are not linked by default
4142
// RUN: %clangxx -fsycl -fsycl-device-lib-jit-link %s --sysroot=%S/Inputs/SYCL -### 2>&1 \
@@ -251,3 +252,4 @@
251252
// SYCL_DEVICE_ASAN_MACRO-SAME: "USE_SYCL_DEVICE_ASAN"
252253
// SYCL_DEVICE_ASAN_MACRO-NEXT: llvm-link{{.*}}
253254
// SYCL_DEVICE_ASAN_MACRO: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-sanitizer.o" "-output={{.*}}libsycl-sanitizer-{{.*}}.o" "-unbundle"
255+
// SYCL_DEVICE_ASAN_MACRO: llvm-link{{.*}} "-only-needed" "{{.*}}" "-o" "{{.*}}.bc" "--suppress-warnings"

libdevice/include/spir_global_var.hpp

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -11,15 +11,6 @@
1111
// Treat this header as system one to workaround frontend's restriction
1212
#pragma clang system_header
1313

14-
#ifndef SPIR_GLOBAL_VAR
15-
#ifdef __SYCL_DEVICE_ONLY__
16-
#define SPIR_GLOBAL_VAR __attribute__((sycl_global_var))
17-
#else
18-
#warning "SPIR_GLOBAL_VAR not defined in host mode. Defining as empty macro."
19-
#define SPIR_GLOBAL_VAR
20-
#endif
21-
#endif
22-
2314
#define __SYCL_GLOBAL__ __attribute__((opencl_global))
2415
#define __SYCL_LOCAL__ __attribute__((opencl_local))
2516
#define __SYCL_PRIVATE__ __attribute__((opencl_private))

sycl/include/sycl/ext/intel/experimental/grf_size_properties.hpp

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ inline constexpr grf_size_key::value_t<Size> grf_size;
3636
inline constexpr grf_size_automatic_key::value_t grf_size_automatic;
3737

3838
} // namespace ext::intel::experimental
39-
namespace ext::oneapi::experimental {
39+
namespace ext::oneapi::experimental::detail {
4040
template <unsigned int Size>
4141
struct PropertyMetaInfo<
4242
sycl::ext::intel::experimental::grf_size_key::value_t<Size>> {
@@ -79,7 +79,6 @@ struct ConflictingProperties<sycl::detail::register_alloc_mode_key, Properties>
7979
sycl::ext::intel::experimental::grf_size_automatic_key,
8080
Properties>::value> {};
8181

82-
} // namespace detail
83-
} // namespace ext::oneapi::experimental
82+
} // namespace ext::oneapi::experimental::detail
8483
} // namespace _V1
8584
} // namespace sycl

sycl/source/detail/event_impl.hpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -289,6 +289,10 @@ class event_impl {
289289
return MEventFromSubmittedExecCommandBuffer;
290290
}
291291

292+
const std::vector<EventImplPtr> &getPostCompleteEvents() const {
293+
return MPostCompleteEvents;
294+
}
295+
292296
protected:
293297
// When instrumentation is enabled emits trace event for event wait begin and
294298
// returns the telemetry event generated for the wait

sycl/source/detail/graph_impl.cpp

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -764,7 +764,9 @@ exec_graph_impl::enqueue(const std::shared_ptr<sycl::detail::queue_impl> &Queue,
764764
sycl::detail::CG::StorageInitHelper CGData) {
765765
WriteLock Lock(MMutex);
766766

767-
std::vector<sycl::detail::EventImplPtr> PartitionEvents;
767+
// Map of the partitions to their execution events
768+
std::unordered_map<std::shared_ptr<partition>, sycl::detail::EventImplPtr>
769+
PartitionsExecutionEvents;
768770

769771
auto CreateNewEvent([&]() {
770772
auto NewEvent = std::make_shared<sycl::detail::event_impl>(Queue);
@@ -787,7 +789,7 @@ exec_graph_impl::enqueue(const std::shared_ptr<sycl::detail::queue_impl> &Queue,
787789
}
788790

789791
for (auto const &DepPartition : CurrentPartition->MPredecessors) {
790-
CGData.MEvents.push_back(MPartitionsExecutionEvents[DepPartition]);
792+
CGData.MEvents.push_back(PartitionsExecutionEvents[DepPartition]);
791793
}
792794

793795
auto CommandBuffer =
@@ -819,7 +821,13 @@ exec_graph_impl::enqueue(const std::shared_ptr<sycl::detail::queue_impl> &Queue,
819821
sycl::backend::ext_oneapi_level_zero) {
820822
Event->wait(Event);
821823
} else {
824+
auto &AttachedEventsList = Event->getPostCompleteEvents();
825+
CGData.MEvents.reserve(AttachedEventsList.size() + 1);
822826
CGData.MEvents.push_back(Event);
827+
// Add events of the previous execution of all graph partitions.
828+
for (auto &AttachedEvent : AttachedEventsList) {
829+
CGData.MEvents.push_back(AttachedEvent);
830+
}
823831
}
824832
++It;
825833
} else {
@@ -929,15 +937,15 @@ exec_graph_impl::enqueue(const std::shared_ptr<sycl::detail::queue_impl> &Queue,
929937
NewEvent->setStateIncomplete();
930938
NewEvent->getPreparedDepsEvents() = ScheduledEvents;
931939
}
932-
MPartitionsExecutionEvents[CurrentPartition] = NewEvent;
940+
PartitionsExecutionEvents[CurrentPartition] = NewEvent;
933941
}
934942

935943
// Keep track of this execution event so we can make sure it's completed in
936944
// the destructor.
937945
MExecutionEvents.push_back(NewEvent);
938946
// Attach events of previous partitions to ensure that when the returned event
939947
// is complete all execution associated with the graph have been completed.
940-
for (auto const &Elem : MPartitionsExecutionEvents) {
948+
for (auto const &Elem : PartitionsExecutionEvents) {
941949
if (Elem.second != NewEvent) {
942950
NewEvent->attachEventToComplete(Elem.second);
943951
}

sycl/source/detail/graph_impl.hpp

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1190,9 +1190,6 @@ class exec_graph_impl {
11901190
std::vector<sycl::detail::EventImplPtr> MExecutionEvents;
11911191
/// List of the partitions that compose the exec graph.
11921192
std::vector<std::shared_ptr<partition>> MPartitions;
1193-
/// Map of the partitions to their execution events
1194-
std::unordered_map<std::shared_ptr<partition>, sycl::detail::EventImplPtr>
1195-
MPartitionsExecutionEvents;
11961193
/// Storage for copies of nodes from the original modifiable graph.
11971194
std::vector<std::shared_ptr<node_impl>> MNodeStorage;
11981195
};

sycl/test-e2e/Matrix/SG32/joint_matrix_bf16_fill_k_cache.cpp

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,11 @@
88
// REQUIRES: matrix
99
// REQUIRES-INTEL-DRIVER: lin: 27501, win: 101.4943
1010

11-
// RUN: %{build} -o %t.out -ffp-model=precise
12-
// RUN: %{run} %t.out
11+
// RUN: %{build} -o %t_gpu.out -ffp-model=precise
12+
// RUN: %if gpu %{ %{run} %t_gpu.out %}
13+
14+
// RUN: %{build} -ffp-model=precise -o %t_cpu.out -DtM=16 -DtK=32 -DNCACHE1=32 -DKCACHE1=32
15+
// RUN: %if cpu %{ %{run} %t_cpu.out %}
1316

1417
// -ffp-model=precise is added to not depend on compiler defaults.
1518

sycl/test-e2e/Matrix/joint_matrix_bf16_fill_k_cache.cpp

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,11 @@
77
//===----------------------------------------------------------------------===//
88
// REQUIRES: matrix
99

10-
// RUN: %{build} -o %t.out -ffp-model=precise
11-
// RUN: %{run} %t.out
10+
// RUN: %{build} -o %t_gpu.out -ffp-model=precise
11+
// RUN: %if gpu %{ %{run} %t_gpu.out %}
12+
13+
// RUN: %{build} -ffp-model=precise -o %t_cpu.out -DtM=16 -DtK=32 -DNCACHE1=32 -DKCACHE1=32
14+
// RUN: %if cpu %{ %{run} %t_cpu.out %}
1215

1316
// -ffp-model=precise is added to not depend on compiler defaults.
1417

sycl/test-e2e/Matrix/joint_matrix_prefetch_impl.hpp

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -69,15 +69,16 @@ int main() {
6969
.get_info<sycl::ext::oneapi::experimental::info::device::
7070
matrix_combinations>();
7171
bool support_p = false;
72-
for (int i = 0; i < combinations.size(); i++) {
73-
if (combinations[i].atype == matrix_type::tf32) {
72+
// joint_matrix_prefetch is not supported on DG2
73+
for (unsigned int i = 0; i < combinations.size(); i++) {
74+
if (combinations[i].nsize == 0 || combinations[i].nsize == 16) {
7475
support_p = true;
7576
break;
7677
}
7778
}
7879
if (!support_p) {
7980
std::cout << "Prefetch not supported on this device" << std::endl;
80-
// Once the test is not marke as XFAIL, this should change to return 0;
81+
// Once the test is not marked as XFAIL, this should change to return 0;
8182
return 1;
8283
}
8384
static constexpr size_t M = TM * 2;

sycl/test-e2e/Plugin/level_zero_usm_residency.cpp

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,6 @@ int main(int argc, char *argv[]) {
2323
// DEVICE: ---> piextUSMDeviceAlloc
2424
// DEVICE: ZE ---> zeMemAllocDevice
2525
// DEVICE: ZE ---> zeContextMakeMemoryResident
26-
// DEVICE-NOT: ZE ---> zeContextMakeMemoryResident
2726

2827
auto ptr2 = malloc_shared<int>(1, Q);
2928
// SHARED: ---> piextUSMSharedAlloc

0 commit comments

Comments
 (0)