From f6b23852728b59ac258bbad287b8782dc2d5ba56 Mon Sep 17 00:00:00 2001 From: "Aji, Ashwin" Date: Thu, 28 Oct 2021 01:18:15 +0000 Subject: [PATCH 1/2] ATMI Release for ROCm v4.5 --- src/runtime/core/data.cpp | 73 +++++++++++++++----------- src/runtime/core/system.cpp | 100 ++++++++++++------------------------ 2 files changed, 77 insertions(+), 96 deletions(-) diff --git a/src/runtime/core/data.cpp b/src/runtime/core/data.cpp index 005b3f21..a9cdae36 100644 --- a/src/runtime/core/data.cpp +++ b/src/runtime/core/data.cpp @@ -505,10 +505,10 @@ atmi_status_t DataTaskImpl::dispatch() { hsa_amd_pointer_info_t dest_ptr_info; src_ptr_info.size = sizeof(hsa_amd_pointer_info_t); dest_ptr_info.size = sizeof(hsa_amd_pointer_info_t); - err = hsa_amd_pointer_info(reinterpret_cast(src), &src_ptr_info, - NULL, /* alloc fn ptr */ - NULL, /* num_agents_accessible */ - NULL); /* accessible agents */ + err = hsa_amd_pointer_info(reinterpret_cast(const_cast(src)), + &src_ptr_info, NULL, /* alloc fn ptr */ + NULL, /* num_agents_accessible */ + NULL); /* accessible agents */ ErrorCheck(Checking src pointer info, err); err = hsa_amd_pointer_info(reinterpret_cast(dest), &dest_ptr_info, NULL, /* alloc fn ptr */ @@ -541,13 +541,17 @@ atmi_status_t DataTaskImpl::dispatch() { } else if (src_data && !dest_data) { type = Direction::ATMI_D2H; src_agent = get_mem_agent(src_data->place()); - dest_agent = src_agent; + dest_agent = cpu_agent; + // TODO(ashwin): can the two agents be the GPU agent itself? ROCr team: no + // dest_agent = src_agent; src_ptr = src; dest_ptr = dest; } else if (!src_data && dest_data) { type = Direction::ATMI_H2D; dest_agent = get_mem_agent(dest_data->place()); - src_agent = dest_agent; + src_agent = cpu_agent; + // TODO(ashwin): can the two agents be the GPU agent itself? ROCr team: no + // src_agent = dest_agent; src_ptr = src; dest_ptr = dest; } else { @@ -573,9 +577,10 @@ atmi_status_t DataTaskImpl::dispatch() { // signal count = 2 (one for actual host-device copy and another // for H2H copy to setup the device copy. std::thread( - [](void *dst, const void *src, size_t size, hsa_agent_t agent, - Direction type, atmi_mem_place_t cpu, hsa_signal_t signal, - std::vector dep_signals, TaskImpl *task) { + [](void *dst, const void *src, size_t size, hsa_agent_t src_agent, + hsa_agent_t dest_agent, Direction type, atmi_mem_place_t cpu, + hsa_signal_t signal, std::vector dep_signals, + TaskImpl *task) { atmi_status_t ret; hsa_status_t err; atl_dep_sync_t dep_sync_type = @@ -584,6 +589,7 @@ atmi_status_t DataTaskImpl::dispatch() { const void *src_ptr = src; void *dest_ptr = dst; ret = atmi_malloc(&temp_host_ptr, size, cpu); + assert(ret == ATMI_STATUS_SUCCESS && "temp atmi_malloc"); if (type == Direction::ATMI_H2D) { memcpy(temp_host_ptr, src, size); src_ptr = (const void *)temp_host_ptr; @@ -596,27 +602,30 @@ atmi_status_t DataTaskImpl::dispatch() { if (dep_sync_type == ATL_SYNC_BARRIER_PKT && !dep_signals.empty()) { DEBUG_PRINT("SDMA-host for %p (%lu) with %lu dependencies\n", task, task->id_, dep_signals.size()); - err = hsa_amd_memory_async_copy(dest_ptr, agent, src_ptr, agent, - size, dep_signals.size(), - &(dep_signals[0]), signal); + err = hsa_amd_memory_async_copy(dest_ptr, dest_agent, src_ptr, + src_agent, size, dep_signals.size(), + dep_signals.data(), signal); ErrorCheck(Copy async between memory pools, err); } else { DEBUG_PRINT("SDMA-host for %p (%lu)\n", task, task->id_); - err = hsa_amd_memory_async_copy(dest_ptr, agent, src_ptr, agent, - size, 0, NULL, signal); + err = hsa_amd_memory_async_copy(dest_ptr, dest_agent, src_ptr, + src_agent, size, 0, NULL, signal); ErrorCheck(Copy async between memory pools, err); } task->set_state(ATMI_DISPATCHED); hsa_signal_wait_acquire(signal, HSA_SIGNAL_CONDITION_EQ, 1, UINT64_MAX, ATMI_WAIT_STATE); + // cleanup for D2H and H2D if (type == Direction::ATMI_D2H) { memcpy(dst, temp_host_ptr, size); } - atmi_free(temp_host_ptr); + ret = atmi_free(temp_host_ptr); + assert(ret == ATMI_STATUS_SUCCESS && "temp atmi_free"); hsa_signal_subtract_acq_rel(signal, 1); }, - dest, src, size, src_agent, type, cpu, signal_, dep_signals, this) + dest, src, size, src_agent, dest_agent, type, cpu, signal_, dep_signals, + this) .detach(); } else { if (groupable_ == ATMI_TRUE) { @@ -635,7 +644,7 @@ atmi_status_t DataTaskImpl::dispatch() { dep_signals.size()); err = hsa_amd_memory_async_copy(dest_ptr, dest_agent, src_ptr, src_agent, size, dep_signals.size(), - &(dep_signals[0]), signal_); + dep_signals.data(), signal_); ErrorCheck(Copy async between memory pools, err); } else { DEBUG_PRINT("SDMA for %p (%lu)\n", this, id_); @@ -648,7 +657,7 @@ atmi_status_t DataTaskImpl::dispatch() { } atmi_status_t Runtime::Memcpy(void *dest, const void *src, size_t size) { - atmi_status_t ret; + atmi_status_t ret = ATMI_STATUS_SUCCESS; hsa_status_t err; #ifndef USE_ROCR_PTR_INFO @@ -659,10 +668,10 @@ atmi_status_t Runtime::Memcpy(void *dest, const void *src, size_t size) { hsa_amd_pointer_info_t dest_ptr_info; src_ptr_info.size = sizeof(hsa_amd_pointer_info_t); dest_ptr_info.size = sizeof(hsa_amd_pointer_info_t); - err = hsa_amd_pointer_info(reinterpret_cast(src), &src_ptr_info, - NULL, /* alloc fn ptr */ - NULL, /* num_agents_accessible */ - NULL); /* accessible agents */ + err = hsa_amd_pointer_info(reinterpret_cast(const_cast(src)), + &src_ptr_info, NULL, /* alloc fn ptr */ + NULL, /* num_agents_accessible */ + NULL); /* accessible agents */ ErrorCheck(Checking src pointer info, err); err = hsa_amd_pointer_info(reinterpret_cast(dest), &dest_ptr_info, NULL, /* alloc fn ptr */ @@ -685,10 +694,11 @@ atmi_status_t Runtime::Memcpy(void *dest, const void *src, size_t size) { if (src_data && !dest_data) { type = Direction::ATMI_D2H; src_agent = get_mem_agent(src_data->place()); - dest_agent = src_agent; - // dest_agent = cpu_agent; // FIXME: can the two agents be the GPU agent - // itself? + dest_agent = cpu_agent; + // TODO(ashwin): can the two agents be the GPU agent itself? ROCr team: no + // dest_agent = src_agent; ret = atmi_malloc(&temp_host_ptr, size, cpu); + assert(ret == ATMI_STATUS_SUCCESS && "temp atmi_malloc"); // err = hsa_amd_agents_allow_access(1, &src_agent, NULL, temp_host_ptr); // ErrorCheck(Allow access to ptr, err); src_ptr = src; @@ -696,10 +706,11 @@ atmi_status_t Runtime::Memcpy(void *dest, const void *src, size_t size) { } else if (!src_data && dest_data) { type = Direction::ATMI_H2D; dest_agent = get_mem_agent(dest_data->place()); - // src_agent = cpu_agent; // FIXME: can the two agents be the GPU agent - // itself? - src_agent = dest_agent; + src_agent = cpu_agent; + // TODO(ashwin): can the two agents be the GPU agent itself? ROCr team: no + // src_agent = dest_agent; ret = atmi_malloc(&temp_host_ptr, size, cpu); + assert(ret == ATMI_STATUS_SUCCESS && "temp atmi_malloc"); memcpy(temp_host_ptr, src, size); // FIXME: ideally lock would be the better approach, but we need to try to // understand why the h2d copy segfaults if we dont have the below lines @@ -722,20 +733,22 @@ atmi_status_t Runtime::Memcpy(void *dest, const void *src, size_t size) { } DEBUG_PRINT("Memcpy source agent: %lu\n", src_agent.handle); DEBUG_PRINT("Memcpy dest agent: %lu\n", dest_agent.handle); - hsa_signal_store_release(IdentityCopySignal, 1); + hsa_signal_store_screlease(IdentityCopySignal, 1); // hsa_signal_add_acq_rel(IdentityCopySignal, 1); err = hsa_amd_memory_async_copy(dest_ptr, dest_agent, src_ptr, src_agent, size, 0, NULL, IdentityCopySignal); ErrorCheck(Copy async between memory pools, err); - hsa_signal_wait_acquire(IdentityCopySignal, HSA_SIGNAL_CONDITION_EQ, 0, + hsa_signal_wait_relaxed(IdentityCopySignal, HSA_SIGNAL_CONDITION_EQ, 0, UINT64_MAX, ATMI_WAIT_STATE); // cleanup for D2H and H2D if (type == Direction::ATMI_D2H) { memcpy(dest, temp_host_ptr, size); ret = atmi_free(temp_host_ptr); + assert(ret == ATMI_STATUS_SUCCESS && "temp atmi_free"); } else if (type == Direction::ATMI_H2D) { ret = atmi_free(temp_host_ptr); + assert(ret == ATMI_STATUS_SUCCESS && "temp atmi_free"); } if (err != HSA_STATUS_SUCCESS || ret != ATMI_STATUS_SUCCESS) ret = ATMI_STATUS_ERROR; diff --git a/src/runtime/core/system.cpp b/src/runtime/core/system.cpp index 0f1b7e6c..96aa4af8 100644 --- a/src/runtime/core/system.cpp +++ b/src/runtime/core/system.cpp @@ -291,7 +291,7 @@ void allow_access_to_all_gpu_agents(void *ptr) { for (int i = 0; i < gpu_procs.size(); i++) { agents.push_back(gpu_procs[i].agent()); } - err = hsa_amd_agents_allow_access(agents.size(), &agents[0], NULL, ptr); + err = hsa_amd_agents_allow_access(agents.size(), agents.data(), NULL, ptr); ErrorCheck(Allow agents ptr access, err); } @@ -960,42 +960,6 @@ bool isImplicit(KernelArgMD::ValueKind value_kind) { } } -hsa_status_t validate_code_object(hsa_code_object_t code_object, - hsa_code_symbol_t symbol, void *data) { - hsa_status_t retVal = HSA_STATUS_SUCCESS; - std::set *SymbolSet = static_cast *>(data); - hsa_symbol_kind_t type; - - uint32_t name_length; - hsa_status_t err; - err = hsa_code_symbol_get_info(symbol, HSA_CODE_SYMBOL_INFO_TYPE, &type); - ErrorCheck(Symbol info extraction, err); - DEBUG_PRINT("Exec Symbol type: %d\n", type); - - if (type == HSA_SYMBOL_KIND_VARIABLE) { - err = hsa_code_symbol_get_info(symbol, HSA_CODE_SYMBOL_INFO_NAME_LENGTH, - &name_length); - ErrorCheck(Symbol info extraction, err); - char *name = reinterpret_cast(malloc(name_length + 1)); - err = hsa_code_symbol_get_info(symbol, HSA_CODE_SYMBOL_INFO_NAME, name); - ErrorCheck(Symbol info extraction, err); - name[name_length] = 0; - - if (SymbolSet->find(std::string(name)) != SymbolSet->end()) { - // Symbol already found. Return Error - DEBUG_PRINT("Symbol %s already found!\n", name); - retVal = HSA_STATUS_ERROR_VARIABLE_ALREADY_DEFINED; - } else { - SymbolSet->insert(std::string(name)); - } - - free(name); - } else { - DEBUG_PRINT("Symbol is an indirect function\n"); - } - return retVal; -} - static amd_comgr_status_t getMetaBuf(const amd_comgr_metadata_node_t meta, std::string *str) { size_t size = 0; @@ -1613,7 +1577,7 @@ hsa_status_t get_code_object_custom_metadata(atmi_platform_type_t platform, HSA_STATUS_ERROR_INVALID_CODE_OBJECT); } -hsa_status_t populate_InfoTables(hsa_executable_t executable, +hsa_status_t populate_InfoTables(hsa_executable_t executable, hsa_agent_t agent, hsa_executable_symbol_t symbol, void *data) { int gpu = *static_cast(data); hsa_symbol_kind_t type; @@ -1685,6 +1649,14 @@ hsa_status_t populate_InfoTables(hsa_executable_t executable, ErrorCheck(Symbol info extraction, err); name[name_length] = 0; + if (SymbolInfoTable[gpu].find(std::string(name)) != + SymbolInfoTable[gpu].end()) { + // Symbol already found. Return Error + DEBUG_PRINT("Symbol %s already found!\n", name); + ErrorCheck(Symbol variable already defined check, + HSA_STATUS_ERROR_VARIABLE_ALREADY_DEFINED); + } + atl_symbol_info_t info; err = hsa_executable_symbol_get_info( @@ -1735,13 +1707,11 @@ atmi_status_t Runtime::RegisterModuleFromMemory(void **modules, // GCN with base profile agent_profile = HSA_PROFILE_FULL; /* Create the empty executable. */ - err = hsa_executable_create(agent_profile, HSA_EXECUTABLE_STATE_UNFROZEN, "", - &executable); + err = hsa_executable_create_alt(agent_profile, + HSA_DEFAULT_FLOAT_ROUNDING_MODE_DEFAULT, NULL, + &executable); ErrorCheck(Create the executable, err); - // initially empty symbol set for every executable - std::set SymbolSet; - bool module_load_success = false; for (int i = 0; i < num_modules; i++) { void *module_bytes = modules[i]; @@ -1756,24 +1726,19 @@ atmi_status_t Runtime::RegisterModuleFromMemory(void **modules, ErrorCheckAndContinue(Getting custom code object metadata, err); free(tmp_module); - // Deserialize code object. - hsa_code_object_t code_object = {0}; - err = hsa_code_object_deserialize(module_bytes, module_size, NULL, - &code_object); - ErrorCheckAndContinue(Code Object Deserialization, err); - assert(0 != code_object.handle); - - err = hsa_code_object_iterate_symbols(code_object, validate_code_object, - static_cast(&SymbolSet)); - ErrorCheckAndContinue(Iterating over symbols for execuatable, err); + // Read code object. + hsa_code_object_reader_t code_obj_reader = {0}; + err = hsa_code_object_reader_create_from_memory(module_bytes, module_size, + &code_obj_reader); + ErrorCheck(Create the code object reader, err); + assert(0 != code_obj_reader.handle); /* Load the code object. */ - err = - hsa_executable_load_code_object(executable, agent, code_object, NULL); + err = hsa_executable_load_agent_code_object(executable, agent, + code_obj_reader, NULL, NULL); ErrorCheckAndContinue(Loading the code object, err); // cannot iterate over symbols until executable is frozen - } else { ErrorCheckAndContinue(Loading non - AMDGCN code object, HSA_STATUS_ERROR_INVALID_CODE_OBJECT); @@ -1783,20 +1748,23 @@ atmi_status_t Runtime::RegisterModuleFromMemory(void **modules, DEBUG_PRINT("Modules loaded successful? %d\n", module_load_success); if (module_load_success) { /* Freeze the executable; it can now be queried for symbols. */ - err = hsa_executable_freeze(executable, ""); + err = hsa_executable_freeze(executable, NULL); ErrorCheck(Freeze the executable, err); - err = hsa_executable_iterate_symbols(executable, populate_InfoTables, - static_cast(&gpu)); - ErrorCheck(Iterating over symbols for execuatable, err); + // DEPRECATED API + // err = hsa_executable_iterate_symbols(executable, populate_InfoTables, + // static_cast(&gpu)); + // ErrorCheck(Iterating over symbols for execuatable, err); - // err = hsa_executable_iterate_program_symbols(executable, - // iterate_program_symbols, &gpu); - // ErrorCheckAndContinue(Iterating over symbols for execuatable, err); + // TODO(ashwin): find out the difference between the below two iterator + // APIs. err = hsa_executable_iterate_program_symbols(executable, + // populate_InfoTables, + // static_cast(&gpu)); + // ErrorCheck(Iterating over symbols for execuatable, err); - // err = hsa_executable_iterate_agent_symbols(executable, - // iterate_agent_symbols, &gpu); - // ErrorCheckAndContinue(Iterating over symbols for execuatable, err); + err = hsa_executable_iterate_agent_symbols( + executable, agent, populate_InfoTables, static_cast(&gpu)); + ErrorCheck(Iterating over symbols for execuatable, err); // save the executable and destroy during finalize g_executables.push_back(executable); From 6d8c4a11b23a9a0dbcc52b82c3116640c0bb4304 Mon Sep 17 00:00:00 2001 From: "Aji, Ashwin" Date: Fri, 21 Jan 2022 17:26:05 +0000 Subject: [PATCH 2/2] ATMI Release for ROCm v5.0 --- src/CMakeLists.txt | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 76290609..c224fab7 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -98,6 +98,8 @@ set ( CPACK_PACKAGE_VERSION "${CPACK_PACKAGE_VERSION_MAJOR}.${CPACK_PACKAGE_VERS set ( CPACK_PACKAGE_CONTACT "ATMI Support " ) set ( CPACK_PACKAGE_DESCRIPTION_SUMMARY "Asynchronous Task and Memory Interface" ) set ( CPACK_RESOURCE_FILE_LICENSE "${CMAKE_CURRENT_SOURCE_DIR}/../LICENSE.txt" ) +install( FILES ${CPACK_RESOURCE_FILE_LICENSE} DESTINATION share/doc/atmi ) + if ( NOT DEFINED CPACK_PACKAGING_INSTALL_PREFIX ) set ( CPACK_PACKAGING_INSTALL_PREFIX /opt/rocm/atmi ) endif() @@ -151,6 +153,7 @@ set ( CPACK_RPM_PACKAGE_REQUIRES "hsa-rocr-dev" ) set ( CPACK_RPM_PRE_INSTALL_SCRIPT_FILE "${CMAKE_CURRENT_SOURCE_DIR}/RPM/rpm_post" ) set ( CPACK_RPM_POST_UNINSTALL_SCRIPT_FILE "${CMAKE_CURRENT_SOURCE_DIR}/RPM/rpm_postun" ) set ( CPACK_RPM_COMPONENT_INSTALL ON) +set ( CPACK_RPM_PACKAGE_LICENSE "MIT" ) ## Set components set ( CPACK_COMPONENTS_ALL runtime cplugin device_runtime )