From 98af36bcf4f9b6924f06027bad5cd4a16f3cc27c Mon Sep 17 00:00:00 2001 From: "Kenneth Benzie (Benie)" Date: Tue, 30 Apr 2024 12:57:42 +0100 Subject: [PATCH 1/6] [CTS] Ensure GoogleTest does not output color The match files depend on the output from our GoogleTest test suites to not include ANSI color escape sequences but this setting can be controlled via environment variable `GTEST_COLOR=yes`. This can cause tests to fail even though they should not. This patch explicitly disabled color output by setting `GTEST_COLOR=no` in the test environment. --- test/conformance/CMakeLists.txt | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/test/conformance/CMakeLists.txt b/test/conformance/CMakeLists.txt index 13466a027a..14875a117a 100644 --- a/test/conformance/CMakeLists.txt +++ b/test/conformance/CMakeLists.txt @@ -20,8 +20,12 @@ function(add_test_adapter name adapter) WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} ) + set(testEnv + UR_ADAPTERS_FORCE_LOAD="$" + GTEST_COLOR=no + ) set_tests_properties(${TEST_NAME} PROPERTIES - ENVIRONMENT "UR_ADAPTERS_FORCE_LOAD=\"$\"" + ENVIRONMENT "${testEnv}" LABELS "conformance;${adapter}") endfunction() From 5bd6d94f85c3cff9c56b65dddda160378c47f151 Mon Sep 17 00:00:00 2001 From: Luke Drummond Date: Thu, 7 Mar 2024 17:22:00 +0000 Subject: [PATCH 2/6] Introduce a utility to walk polymorphic linked lists Add `find_stype_node` for walking polymorphic linked lists looking for a particular type. The implementation here works with an auto-generated compile time map, that links a given type to the structure enumeration tag. The implementation simply walks the list looking for the .stype implied by the template parameter type and casts it to the expected type, then returning it. It's one of those unfortunate cases where you can write pretty nasty implementation code that makes the user code much much nicer. In this case the user doesn't need to worry about the `.types` at all, and the const-void casts can be eliminated from user code. --- scripts/generate_code.py | 18 +++- scripts/templates/stype_map_helpers.hpp.mako | 22 +++++ source/adapters/hip/usm.cpp | 15 +-- source/common/stype_map_helpers.def | 98 ++++++++++++++++++++ source/common/ur_util.hpp | 43 +++++++++ 5 files changed, 182 insertions(+), 14 deletions(-) create mode 100644 scripts/templates/stype_map_helpers.hpp.mako create mode 100644 source/common/stype_map_helpers.def diff --git a/scripts/generate_code.py b/scripts/generate_code.py index bc891f62e0..b8bfa97ba5 100644 --- a/scripts/generate_code.py +++ b/scripts/generate_code.py @@ -411,11 +411,25 @@ def generate_layers(path, section, namespace, tags, version, specs, meta): generates common utilities for unified_runtime """ def generate_common(path, section, namespace, tags, version, specs, meta): + template = "stype_map_helpers.hpp.mako" + fin = os.path.join("templates", template) + + filename = "stype_map_helpers.def" layer_dstpath = os.path.join(path, "common") os.makedirs(layer_dstpath, exist_ok=True) + fout = os.path.join(layer_dstpath, filename) + + print("Generating %s..." % fout) + + loc = util.makoWrite( + fin, fout, + ver=version, + namespace=namespace, + tags=tags, + specs=specs, + meta=meta) + print("COMMON Generated %s lines of code.\n" % loc) - loc = 0 - print("COMMON Generated %s lines of code.\n"%loc) """ Entry-point: diff --git a/scripts/templates/stype_map_helpers.hpp.mako b/scripts/templates/stype_map_helpers.hpp.mako new file mode 100644 index 0000000000..26aff00cd5 --- /dev/null +++ b/scripts/templates/stype_map_helpers.hpp.mako @@ -0,0 +1,22 @@ +<%! +import re +from templates import helper as th +%><% + n=namespace + N=n.upper() + x=tags['$x'] + X=x.upper() +%> +// This file is autogenerated from the template at ${self.template.filename} + +%for obj in th.extract_objs(specs, r"enum"): + %if obj["name"] == '$x_structure_type_t': + %for etor in obj['etors']: + %if 'UINT32' not in etor['name']: +template <> +struct stype_map<${x}_${etor['desc'][3:]}> : stype_map_impl<${X}_${etor['name'][3:]}> {}; + %endif + %endfor + %endif +%endfor + diff --git a/source/adapters/hip/usm.cpp b/source/adapters/hip/usm.cpp index f29fab7b92..4068c1d865 100644 --- a/source/adapters/hip/usm.cpp +++ b/source/adapters/hip/usm.cpp @@ -327,24 +327,15 @@ ur_result_t USMHostMemoryProvider::allocateImpl(void **ResultPtr, size_t Size, ur_usm_pool_handle_t_::ur_usm_pool_handle_t_(ur_context_handle_t Context, ur_usm_pool_desc_t *PoolDesc) : Context(Context) { - const void *pNext = PoolDesc->pNext; - while (pNext != nullptr) { - const ur_base_desc_t *BaseDesc = static_cast(pNext); - switch (BaseDesc->stype) { - case UR_STRUCTURE_TYPE_USM_POOL_LIMITS_DESC: { - const ur_usm_pool_limits_desc_t *Limits = - reinterpret_cast(BaseDesc); + if (PoolDesc) { + if (auto *Limits = find_stype_node(PoolDesc)) { for (auto &config : DisjointPoolConfigs.Configs) { config.MaxPoolableSize = Limits->maxPoolableSize; config.SlabMinSize = Limits->minDriverAllocSize; } - break; - } - default: { + } else { throw UsmAllocationException(UR_RESULT_ERROR_INVALID_ARGUMENT); } - } - pNext = BaseDesc->pNext; } auto MemProvider = diff --git a/source/common/stype_map_helpers.def b/source/common/stype_map_helpers.def new file mode 100644 index 0000000000..0c3e5b1cc1 --- /dev/null +++ b/source/common/stype_map_helpers.def @@ -0,0 +1,98 @@ + +// This file is autogenerated from the template at templates/stype_map_helpers.hpp.mako + +template <> +struct stype_map : stype_map_impl {}; +template <> +struct stype_map : stype_map_impl {}; +template <> +struct stype_map : stype_map_impl {}; +template <> +struct stype_map : stype_map_impl {}; +template <> +struct stype_map : stype_map_impl {}; +template <> +struct stype_map : stype_map_impl {}; +template <> +struct stype_map : stype_map_impl {}; +template <> +struct stype_map : stype_map_impl {}; +template <> +struct stype_map : stype_map_impl {}; +template <> +struct stype_map : stype_map_impl {}; +template <> +struct stype_map : stype_map_impl {}; +template <> +struct stype_map : stype_map_impl {}; +template <> +struct stype_map : stype_map_impl {}; +template <> +struct stype_map : stype_map_impl {}; +template <> +struct stype_map : stype_map_impl {}; +template <> +struct stype_map : stype_map_impl {}; +template <> +struct stype_map : stype_map_impl {}; +template <> +struct stype_map : stype_map_impl {}; +template <> +struct stype_map : stype_map_impl {}; +template <> +struct stype_map : stype_map_impl {}; +template <> +struct stype_map : stype_map_impl {}; +template <> +struct stype_map : stype_map_impl {}; +template <> +struct stype_map : stype_map_impl {}; +template <> +struct stype_map : stype_map_impl {}; +template <> +struct stype_map : stype_map_impl {}; +template <> +struct stype_map : stype_map_impl {}; +template <> +struct stype_map : stype_map_impl {}; +template <> +struct stype_map : stype_map_impl {}; +template <> +struct stype_map : stype_map_impl {}; +template <> +struct stype_map : stype_map_impl {}; +template <> +struct stype_map : stype_map_impl {}; +template <> +struct stype_map : stype_map_impl {}; +template <> +struct stype_map : stype_map_impl {}; +template <> +struct stype_map : stype_map_impl {}; +template <> +struct stype_map : stype_map_impl {}; +template <> +struct stype_map : stype_map_impl {}; +template <> +struct stype_map : stype_map_impl {}; +template <> +struct stype_map : stype_map_impl {}; +template <> +struct stype_map : stype_map_impl {}; +template <> +struct stype_map : stype_map_impl {}; +template <> +struct stype_map : stype_map_impl {}; +template <> +struct stype_map : stype_map_impl {}; +template <> +struct stype_map : stype_map_impl {}; +template <> +struct stype_map : stype_map_impl {}; +template <> +struct stype_map : stype_map_impl {}; +template <> +struct stype_map : stype_map_impl {}; +template <> +struct stype_map : stype_map_impl {}; + diff --git a/source/common/ur_util.hpp b/source/common/ur_util.hpp index 94e306f48f..9cecdbec1e 100644 --- a/source/common/ur_util.hpp +++ b/source/common/ur_util.hpp @@ -281,6 +281,49 @@ inline ur_result_t exceptionToResult(std::exception_ptr eptr) { template inline constexpr bool ur_always_false_t = false; +namespace { +// Compile-time map, mapping a UR list node type, to the enum tag type +// These are helpers for the `find_stype_node` helper below +template struct stype_map_impl { + static constexpr ur_structure_type_t value = val; +}; + +template struct stype_map {}; +// contains definitions of the map specializations e.g. +// template <> struct stype_map : +// stype_map_impl {}; +#include "stype_map_helpers.def" + +template constexpr int as_stype() { return stype_map::value; }; + +/// Walk a generic UR linked list looking for a node of the given type. If it's +/// found, its address is returned, othewise `nullptr`. e.g. to find out whether +/// a `ur_usm_host_desc_t` exists in the given polymorphic list, `mylist`: +/// +/// ```cpp +/// auto *node = find_stype_node(mylist); +/// if (!node) +/// printf("node of expected type not found!\n"); +/// ``` +template +typename std::conditional_t>, + const T *, T *> +find_stype_node(P list_head) noexcept { + auto *list = reinterpret_cast(list_head); + for (const auto *next = reinterpret_cast(list); next; + next = reinterpret_cast(next->pNext)) { + if (next->stype == as_stype()) { + if constexpr (!std::is_const_v

) { + return const_cast(next); + } else { + return next; + } + } + } + return nullptr; +} +} // namespace + namespace ur { [[noreturn]] inline void unreachable() { #ifdef _MSC_VER From d5e3ded1683796cb60a0a96a7ac0fe2a0b237fc6 Mon Sep 17 00:00:00 2001 From: "Kenneth Benzie (Benie)" Date: Sun, 28 Apr 2024 11:52:48 +0100 Subject: [PATCH 3/6] Add option to enable spec generation fast-mode The new `--fast-mode` option to the `run.py` script disables the print API section of the API listing mako template which causes the `generate` CMake target to take an order of magnitude longer to complete. Before these changes: ``` ninja generate 199.22s user 1.80s system 99% cpu 3:21.67 total ``` After these changes: ``` ninja generate 37.63s user 1.00s system 99% cpu 38.893 total ``` This greatly improves the iterative spec writing experience and can opted by setting the CMake option `UR_ENABLE_FAST_SPEC_MODE=ON`. --- CMakeLists.txt | 6 +++++- README.md | 1 + scripts/generate_docs.py | 12 +++++++----- scripts/run.py | 4 +++- scripts/templates/api_listing.mako | 2 ++ 5 files changed, 18 insertions(+), 7 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index f15b575e67..4fcd74e729 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -29,6 +29,7 @@ option(UR_BUILD_TESTS "Build unit tests." ON) option(UR_BUILD_TOOLS "build ur tools" ON) option(UR_FORMAT_CPP_STYLE "format code style of C++ sources" OFF) option(UR_DEVELOPER_MODE "enable developer checks, treats warnings as errors" OFF) +option(UR_ENABLE_FAST_SPEC_MODE "enable fast specification generation mode" OFF) option(UR_USE_ASAN "enable AddressSanitizer" OFF) option(UR_USE_UBSAN "enable UndefinedBehaviorSanitizer" OFF) option(UR_USE_MSAN "enable MemorySanitizer" OFF) @@ -292,7 +293,10 @@ if(UR_FORMAT_CPP_STYLE) # Generate source from the specification add_custom_target(generate-code USES_TERMINAL WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}/scripts - COMMAND ${Python3_EXECUTABLE} run.py --api-json ${API_JSON_FILE} --clang-format=${CLANG_FORMAT} + COMMAND ${Python3_EXECUTABLE} run.py + --api-json ${API_JSON_FILE} + --clang-format=${CLANG_FORMAT} + $<$:--fast-mode> COMMAND ${Python3_EXECUTABLE} json2src.py --api-json ${API_JSON_FILE} ${PROJECT_SOURCE_DIR} ) diff --git a/README.md b/README.md index 49f7f764ba..cb43c380b9 100644 --- a/README.md +++ b/README.md @@ -122,6 +122,7 @@ List of options provided by CMake: | UR_BUILD_TOOLS | Build tools | ON/OFF | ON | | UR_FORMAT_CPP_STYLE | Format code style | ON/OFF | OFF | | UR_DEVELOPER_MODE | Treat warnings as errors and enables additional checks | ON/OFF | OFF | +| UR_ENABLE_FAST_SPEC_MODE | Enable fast specification generation mode | ON/OFF | OFF | | UR_USE_ASAN | Enable AddressSanitizer | ON/OFF | OFF | | UR_USE_TSAN | Enable ThreadSanitizer | ON/OFF | OFF | | UR_USE_UBSAN | Enable UndefinedBehavior Sanitizer | ON/OFF | OFF | diff --git a/scripts/generate_docs.py b/scripts/generate_docs.py index f4d281e9ad..5c14305898 100644 --- a/scripts/generate_docs.py +++ b/scripts/generate_docs.py @@ -86,7 +86,7 @@ def _make_ref(symbol, symbol_type, meta): """ generate a valid reStructuredText file """ -def _generate_valid_rst(fin, fout, namespace, tags, ver, rev, meta): +def _generate_valid_rst(fin, fout, namespace, tags, ver, rev, meta, fast_mode): ver=float(ver) enable = True code_block = False @@ -185,13 +185,14 @@ def _generate_valid_rst(fin, fout, namespace, tags, ver, rev, meta): ver=ver, namespace=namespace, tags=tags, - meta=meta) + meta=meta, + fast_mode=fast_mode) """ Entry-point: generate restructuredtext documents from templates """ -def generate_rst(docpath, section, namespace, tags, ver, rev, specs, meta): +def generate_rst(docpath, section, namespace, tags, ver, rev, specs, meta, fast_mode): srcpath = os.path.join("./", section) dstpath = os.path.join(docpath, "source", section) @@ -200,7 +201,7 @@ def generate_rst(docpath, section, namespace, tags, ver, rev, specs, meta): util.removeFiles(dstpath, "*.rst") for fin in util.findFiles(srcpath, "*.rst"): fout = os.path.join(dstpath, os.path.basename(fin)) - loc += _generate_valid_rst(os.path.abspath(fin), fout, namespace, tags, ver, rev, meta) + loc += _generate_valid_rst(os.path.abspath(fin), fout, namespace, tags, ver, rev, meta, fast_mode) print("Generated %s lines of reStructuredText (rst).\n"%loc) @@ -215,7 +216,8 @@ def generate_rst(docpath, section, namespace, tags, ver, rev, specs, meta): rev=rev, tags=tags, meta=meta, - specs=specs) + specs=specs, + fast_mode=fast_mode) """ Entry-point: diff --git a/scripts/run.py b/scripts/run.py index ecd60741f8..b5a9ccc8b8 100755 --- a/scripts/run.py +++ b/scripts/run.py @@ -1,3 +1,4 @@ +#!/usr/bin/env python3 """ Copyright (C) 2022 Intel Corporation @@ -120,6 +121,7 @@ def main(): required=False, help="specification version to generate.") parser.add_argument("--api-json", type=str, default="unified_runtime.json", required=False, help="json output file for the spec") parser.add_argument("--clang-format", type=str, default="clang-format", required=False, help="path to clang-format executable") + parser.add_argument('--fast-mode', action='store_true', help='Disable sections which are slow to render') args = vars(parser.parse_args()) args['rev'] = revision() @@ -175,7 +177,7 @@ def main(): raise Exception("Failed to format ur_api.h") if args['rst']: - generate_docs.generate_rst(docpath, config['name'], config['namespace'], config['tags'], args['ver'], args['rev'], specs, input['meta']) + generate_docs.generate_rst(docpath, config['name'], config['namespace'], config['tags'], args['ver'], args['rev'], specs, input['meta'], args['fast_mode']) if util.makeErrorCount(): print("\n%s Errors found during generation, stopping execution!"%util.makeErrorCount()) diff --git a/scripts/templates/api_listing.mako b/scripts/templates/api_listing.mako index 023c4aa889..aff75df002 100644 --- a/scripts/templates/api_listing.mako +++ b/scripts/templates/api_listing.mako @@ -265,6 +265,7 @@ ${th.make_type_name(n, tags, obj)} %endfor # s in specs +%if not fast_mode: ################################################################# ## Print API not part of the spec, needs to be generated separately ################################################################# @@ -359,3 +360,4 @@ Print Operators :project: UnifiedRuntime :outline: %endfor +%endif From 394874260bfbf8795a2465efd355fd3f2662e952 Mon Sep 17 00:00:00 2001 From: Konrad Kusiak Date: Tue, 5 Mar 2024 17:05:57 +0000 Subject: [PATCH 4/6] Emulated Fill with copy when patternSize is not a power of 2 --- source/adapters/level_zero/memory.cpp | 40 +++++++++++++++++++++------ 1 file changed, 31 insertions(+), 9 deletions(-) diff --git a/source/adapters/level_zero/memory.cpp b/source/adapters/level_zero/memory.cpp index 82ecd7043b..359161ad41 100644 --- a/source/adapters/level_zero/memory.cpp +++ b/source/adapters/level_zero/memory.cpp @@ -11,6 +11,7 @@ #include #include #include +#include #include "context.hpp" #include "event.hpp" @@ -183,9 +184,6 @@ static ur_result_t enqueueMemFillHelper(ur_command_t CommandType, uint32_t NumEventsInWaitList, const ur_event_handle_t *EventWaitList, ur_event_handle_t *OutEvent) { - // Pattern size must be a power of two. - UR_ASSERT((PatternSize > 0) && ((PatternSize & (PatternSize - 1)) == 0), - UR_RESULT_ERROR_INVALID_VALUE); auto &Device = Queue->Device; // Make sure that pattern size matches the capability of the copy queues. @@ -237,18 +235,42 @@ static ur_result_t enqueueMemFillHelper(ur_command_t CommandType, const auto &ZeCommandList = CommandList->first; const auto &WaitList = (*Event)->WaitList; - ZE2UR_CALL(zeCommandListAppendMemoryFill, - (ZeCommandList, Ptr, Pattern, PatternSize, Size, ZeEvent, - WaitList.Length, WaitList.ZeEventList)); + // PatternSize must be a power of two for zeCommandListAppendMemoryFill. + // When it's not, the fill is emulated with zeCommandListAppendMemoryCopy. + if (isPowerOf2(PatternSize)) { + ZE2UR_CALL(zeCommandListAppendMemoryFill, + (ZeCommandList, Ptr, Pattern, PatternSize, Size, ZeEvent, + WaitList.Length, WaitList.ZeEventList)); logger::debug("calling zeCommandListAppendMemoryFill() with" " ZeEvent {}", ur_cast(ZeEvent)); printZeEventList(WaitList); - // Execute command list asynchronously, as the event will be used - // to track down its completion. - UR_CALL(Queue->executeCommandList(CommandList, false, OkToBatch)); + // Execute command list asynchronously, as the event will be used + // to track down its completion. + UR_CALL(Queue->executeCommandList(CommandList, false, OkToBatch)); + } else { + // Copy pattern into every entry in memory array pointed by Ptr. + uint32_t NumOfCopySteps = Size / PatternSize; + const void *Src = Pattern; + + for (uint32_t step = 0; step < NumOfCopySteps; ++step) { + void *Dst = reinterpret_cast(reinterpret_cast(Ptr) + + step * PatternSize); + ZE2UR_CALL(zeCommandListAppendMemoryCopy, + (ZeCommandList, Dst, Src, PatternSize, ZeEvent, + WaitList.Length, WaitList.ZeEventList)); + } + + urPrint("calling zeCommandListAppendMemoryCopy() with\n" + " ZeEvent %#" PRIxPTR "\n", + ur_cast(ZeEvent)); + printZeEventList(WaitList); + + // Execute command list synchronously. + UR_CALL(Queue->executeCommandList(CommandList, true, OkToBatch)); + } return UR_RESULT_SUCCESS; } From 08f4c75f25785c8411c9b8aa0f93ed079c613b70 Mon Sep 17 00:00:00 2001 From: Konrad Kusiak Date: Fri, 8 Mar 2024 13:32:07 +0000 Subject: [PATCH 5/6] Added condition with isPowerOf2 to opencl Fill --- source/adapters/level_zero/memory.cpp | 8 ++++---- source/adapters/opencl/usm.cpp | 4 +++- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/source/adapters/level_zero/memory.cpp b/source/adapters/level_zero/memory.cpp index 359161ad41..3392e0359e 100644 --- a/source/adapters/level_zero/memory.cpp +++ b/source/adapters/level_zero/memory.cpp @@ -242,10 +242,10 @@ static ur_result_t enqueueMemFillHelper(ur_command_t CommandType, (ZeCommandList, Ptr, Pattern, PatternSize, Size, ZeEvent, WaitList.Length, WaitList.ZeEventList)); - logger::debug("calling zeCommandListAppendMemoryFill() with" - " ZeEvent {}", - ur_cast(ZeEvent)); - printZeEventList(WaitList); + logger::debug("calling zeCommandListAppendMemoryFill() with" + " ZeEvent {}", + ur_cast(ZeEvent)); + printZeEventList(WaitList); // Execute command list asynchronously, as the event will be used // to track down its completion. diff --git a/source/adapters/opencl/usm.cpp b/source/adapters/opencl/usm.cpp index 0d64f23d13..3f4382fc0d 100644 --- a/source/adapters/opencl/usm.cpp +++ b/source/adapters/opencl/usm.cpp @@ -8,6 +8,8 @@ // //===----------------------------------------------------------------------===// +#include + #include "common.hpp" inline cl_mem_alloc_flags_intel @@ -239,7 +241,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMFill( return mapCLErrorToUR(CLErr); } - if (patternSize <= 128) { + if (patternSize <= 128 && isPowerOf2(patternSize)) { clEnqueueMemFillINTEL_fn EnqueueMemFill = nullptr; UR_RETURN_ON_FAILURE( cl_ext::getExtFuncFromContext( From 2727e8afe2257313f94847549a426e97e958d7d8 Mon Sep 17 00:00:00 2001 From: Konrad Kusiak Date: Mon, 29 Apr 2024 16:32:26 +0100 Subject: [PATCH 6/6] Adjusted urPrint to logger::debug according to newest changes --- source/adapters/level_zero/memory.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/source/adapters/level_zero/memory.cpp b/source/adapters/level_zero/memory.cpp index 3392e0359e..4757a0563d 100644 --- a/source/adapters/level_zero/memory.cpp +++ b/source/adapters/level_zero/memory.cpp @@ -263,9 +263,9 @@ static ur_result_t enqueueMemFillHelper(ur_command_t CommandType, WaitList.Length, WaitList.ZeEventList)); } - urPrint("calling zeCommandListAppendMemoryCopy() with\n" - " ZeEvent %#" PRIxPTR "\n", - ur_cast(ZeEvent)); + logger::debug("calling zeCommandListAppendMemoryCopy() with" + " ZeEvent {}", + ur_cast(ZeEvent)); printZeEventList(WaitList); // Execute command list synchronously.