Skip to content

Commit

Permalink
Merge pull request #122 from RadeonOpenCompute/rocm-4.3.x
Browse files Browse the repository at this point in the history
ROCm 4.3.0 updates
  • Loading branch information
skeelyamd authored Jul 30, 2021
2 parents ea47a79 + c5f95f9 commit f114e4d
Show file tree
Hide file tree
Showing 39 changed files with 1,283 additions and 530 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -65,4 +65,4 @@ The information contained herein is for informational purposes only, and is subj

AMD, the AMD Arrow logo, and combinations thereof are trademarks of Advanced Micro Devices, Inc. Other product names used in this publication are for identification purposes only and may be trademarks of their respective companies.

Copyright (c) 2014-2017 Advanced Micro Devices, Inc. All rights reserved.
Copyright (c) 2014-2021 Advanced Micro Devices, Inc. All rights reserved.
16 changes: 10 additions & 6 deletions src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -191,7 +191,6 @@ set ( SRCS core/util/lnx/os_linux.cpp
core/common/shared.cpp
core/common/hsa_table_interface.cpp
loader/executable.cpp
loader/loaders.cpp
libamdhsacode/amd_elf_image.cpp
libamdhsacode/amd_hsa_code_util.cpp
libamdhsacode/amd_hsa_locks.cpp
Expand Down Expand Up @@ -386,6 +385,7 @@ endif()

## Packaging directives
set ( CPACK_GENERATOR "DEB;RPM" CACHE STRING "Package types to build")
set ( ENABLE_LDCONFIG ON CACHE BOOL "Set library links and caches using ldconfig.")

## Only pack the "binary" and "dev" components, post install script will add the directory link.
set (CPACK_DEB_COMPONENT_INSTALL ON)
Expand All @@ -399,9 +399,9 @@ set ( CPACK_PACKAGE_CONTACT "TODO Advanced Micro Devices, Inc." )
set ( CPACK_PACKAGE_DESCRIPTION_SUMMARY "AMD Heterogeneous System Architecture HSA - Linux HSA Runtime for Boltzmann (ROCm) platforms" )
set ( CPACK_RESOURCE_FILE_LICENSE "${CMAKE_CURRENT_SOURCE_DIR}/LICENSE.md" )

## Process the install scripts to update the CPACK variables
configure_file(${CMAKE_CURRENT_SOURCE_DIR}/DEBIAN/post_install DEBIAN/postinst @ONLY)
configure_file(${CMAKE_CURRENT_SOURCE_DIR}/DEBIAN/pre_remove DEBIAN/prerm @ONLY)
## Process the Debian install/remove scripts to update the CPACK variables
configure_file ( ${CMAKE_CURRENT_SOURCE_DIR}/DEBIAN/postinst.in DEBIAN/postinst @ONLY )
configure_file ( ${CMAKE_CURRENT_SOURCE_DIR}/DEBIAN/prerm.in DEBIAN/prerm @ONLY )

if ( DEFINED ENV{ROCM_LIBPATCH_VERSION} )
set ( CPACK_PACKAGE_VERSION "${CPACK_PACKAGE_VERSION}.$ENV{ROCM_LIBPATCH_VERSION}" )
Expand Down Expand Up @@ -452,8 +452,12 @@ set ( CPACK_RPM_PACKAGE_PROVIDES "hsa-ext-rocr-dev" )
set ( CPACK_RPM_PACKAGE_OBSOLETES "hsa-ext-rocr-dev" )
set ( CPACK_RPM_PACKAGE_CONFLICTS "hsa-ext-rocr-dev" )

set ( CPACK_RPM_POST_INSTALL_SCRIPT_FILE "${CMAKE_CURRENT_SOURCE_DIR}/RPM/rpm_post" )
set ( CPACK_RPM_POST_UNINSTALL_SCRIPT_FILE "${CMAKE_CURRENT_SOURCE_DIR}/RPM/rpm_postun" )
## Process the Rpm install/remove scripts to update the CPACK variables
configure_file ( "${CMAKE_CURRENT_SOURCE_DIR}/RPM/post.in" RPM/post @ONLY )
configure_file ( "${CMAKE_CURRENT_SOURCE_DIR}/RPM/postun.in" RPM/postun @ONLY )

set ( CPACK_RPM_POST_INSTALL_SCRIPT_FILE "${CMAKE_CURRENT_BINARY_DIR}/RPM/post" )
set ( CPACK_RPM_POST_UNINSTALL_SCRIPT_FILE "${CMAKE_CURRENT_BINARY_DIR}/RPM/postun" )

## Include packaging
include ( CPack )
10 changes: 7 additions & 3 deletions src/DEBIAN/post_install → src/DEBIAN/postinst.in
Original file line number Diff line number Diff line change
Expand Up @@ -44,12 +44,16 @@

set -e

# left-hand term originates from @ENABLE_LDCONFIG@ = ON/OFF at package build
do_ldconfig() {
echo @CPACK_PACKAGING_INSTALL_PREFIX@/hsa/lib > /etc/ld.so.conf.d/hsa-rocr-dev.conf && ldconfig
if [ "@ENABLE_LDCONFIG@" == "ON" ]; then
echo @CPACK_PACKAGING_INSTALL_PREFIX@/hsa/lib > /etc/ld.so.conf.d/hsa-rocr-dev.conf
ldconfig
fi
}

case "$1" in
configure)
( configure )
do_ldconfig
# Workaround for CPACK directory symlink handling error.
mkdir -p @CPACK_PACKAGING_INSTALL_PREFIX@/hsa/include
Expand All @@ -58,7 +62,7 @@ case "$1" in
abort-upgrade|abort-remove|abort-deconfigure)
echo "$1"
;;
*)
( * )
exit 0
;;
esac
13 changes: 8 additions & 5 deletions src/DEBIAN/pre_remove → src/DEBIAN/prerm.in
Original file line number Diff line number Diff line change
Expand Up @@ -44,20 +44,23 @@

set -e

# left-hand term originates from @ENABLE_LDCONFIG@ = ON/OFF at package build
rm_ldconfig() {
rm -f /etc/ld.so.conf.d/hsa-rocr-dev.conf && ldconfig
if [ "@ENABLE_LDCONFIG@" == "ON" ]; then
rm -f /etc/ld.so.conf.d/hsa-rocr-dev.conf
ldconfig
fi
}

case "$1" in
remove)
( remove )
rm_ldconfig
# Workaround for CPACK directory symlink handling error.
rm -rf @CPACK_PACKAGING_INSTALL_PREFIX@/hsa
;;
purge)
( purge )
;;
*)
( * )
exit 0
;;
esac

6 changes: 5 additions & 1 deletion src/RPM/rpm_post → src/RPM/post.in
Original file line number Diff line number Diff line change
Expand Up @@ -40,4 +40,8 @@
##
################################################################################

echo /opt/rocm/hsa/lib > /etc/ld.so.conf.d/hsa-rocr-dev.conf && ldconfig
# left-hand term originates from @ENABLE_LDCONFIG@ = ON/OFF at package build
if [ "@ENABLE_LDCONFIG@" == "ON" ]; then
echo @CPACK_PACKAGING_INSTALL_PREFIX@/hsa/lib > /etc/ld.so.conf.d/hsa-rocr-dev.conf
ldconfig
fi
6 changes: 4 additions & 2 deletions src/RPM/rpm_postun → src/RPM/postun.in
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,8 @@
##
################################################################################

if [ $1 -eq 0 ]; then
rm -f /etc/ld.so.conf.d/hsa-rocr-dev.conf && ldconfig
# left-hand term originates from @ENABLE_LDCONFIG@ = ON/OFF at package build
if [ $1 -eq 0 ] && [ "@ENABLE_LDCONFIG@" == "ON" ]; then
rm -f /etc/ld.so.conf.d/hsa-rocr-dev.conf
ldconfig
fi
21 changes: 21 additions & 0 deletions src/core/common/hsa_table_interface.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1151,6 +1151,27 @@ hsa_status_t HSA_API hsa_amd_signal_value_pointer(hsa_signal_t signal,
return amdExtTable->hsa_amd_signal_value_pointer_fn(signal, value_ptr);
}

// Mirrors Amd Extension Apis
hsa_status_t HSA_API hsa_amd_svm_attributes_set(void* ptr, size_t size,
hsa_amd_svm_attribute_pair_t* attribute_list,
size_t attribute_count) {
return amdExtTable->hsa_amd_svm_attributes_set_fn(ptr, size, attribute_list, attribute_count);
}

// Mirrors Amd Extension Apis
hsa_status_t HSA_API hsa_amd_svm_attributes_get(void* ptr, size_t size,
hsa_amd_svm_attribute_pair_t* attribute_list,
size_t attribute_count) {
return amdExtTable->hsa_amd_svm_attributes_get_fn(ptr, size, attribute_list, attribute_count);
}

// Mirrors Amd Extension Apis
hsa_status_t HSA_API hsa_amd_svm_prefetch_async(void* ptr, size_t size, hsa_agent_t agent,
uint32_t num_dep_signals, const hsa_signal_t* dep_signals,
hsa_signal_t completion_signal) {
return amdExtTable->hsa_amd_svm_prefetch_async_fn(ptr, size, agent, num_dep_signals, dep_signals, completion_signal);
}

// Tools only table interfaces.
namespace rocr {

Expand Down
3 changes: 2 additions & 1 deletion src/core/inc/amd_gpu_agent.h
Original file line number Diff line number Diff line change
Expand Up @@ -173,7 +173,8 @@ class GpuAgent : public GpuAgentInt {
// @param [in] node Node id. Each CPU in different socket will get distinct
// id.
// @param [in] node_props Node property.
GpuAgent(HSAuint32 node, const HsaNodeProperties& node_props);
// @param [in] xnack_mode XNACK mode of device.
GpuAgent(HSAuint32 node, const HsaNodeProperties& node_props, bool xnack_mode);

// @brief GPU agent destructor.
~GpuAgent();
Expand Down
26 changes: 26 additions & 0 deletions src/core/inc/amd_gpu_shaders.h
Original file line number Diff line number Diff line change
Expand Up @@ -439,6 +439,32 @@ static const unsigned int kCodeTrapHandler9[] = {
0x001f8000, 0xb96ef807, 0x86fe7e7e, 0x86ea6a6a, 0xb978f802, 0xbe801f6c,
};

static const unsigned int kCodeTrapHandler90a[] = {
0x8973ff73, 0x3e000000, 0x92eeff78, 0x0001000d, 0x8e6e9d6e, 0x87736e73,
0x92eeff6d, 0x00080010, 0xbf850041, 0xb8eef803, 0x866fff6e, 0x00000900,
0xbf850031, 0xbeee007e, 0xbeef007f, 0xbefe00ff, 0x80000000, 0xbf90000a,
0xbf800007, 0xbf0c9f7e, 0xbf84fffd, 0xbeff006f, 0x866fff7e, 0x00000fff,
0xbefe006e, 0xbeef1a97, 0xbeee007c, 0xbefc006f, 0xbf800000, 0xbf900001,
0xbefc006e, 0xbf0d9f73, 0xbf85000f, 0x866fff6f, 0x000003ff, 0x8e6f836f,
0xc0051bbd, 0x0000006f, 0xbf8cc07f, 0xc0031bb7, 0x00000008, 0xbf8cc07f,
0x80ee6e72, 0x8f6e866e, 0x8973ff73, 0x01ffffff, 0x87736e73, 0xbef31a9f,
0xbef2006c, 0x866dff6d, 0x0000ffff, 0x8e6d876d, 0x8977ff77, 0x007fff80,
0x87776d77, 0xbeec1c00, 0x806cff6c, 0x00000010, 0x826d806d, 0xbf820044,
0xbf920002, 0xbf82fffe, 0x866fff6e, 0x10000100, 0xbf06ff6f, 0x00000100,
0xbeef00ff, 0x20000000, 0xbf850011, 0x866fff6e, 0x00000800, 0xbeef00f4,
0xbf85000d, 0xbf820036, 0x83ef8f6e, 0x8e6f996f, 0x87736f73, 0xbf09836e,
0xbf85ffbe, 0xbf06826e, 0xbeef00ff, 0x80000000, 0xbf850003, 0x806c846c,
0x826d806d, 0xbf82002c, 0xbef0006f, 0xbeee007e, 0xbeef007f, 0xbefe00ff,
0x80000000, 0xbf90000a, 0xbf800007, 0xbf0c9f7e, 0xbf84fffd, 0xbeff006f,
0x867eff7e, 0x000003ff, 0x8e6f837e, 0xbefe006e, 0xc0051bbd, 0x0000006f,
0xbf8cc07f, 0xc0071bb7, 0x000000c0, 0xbf8cc07f, 0xbef10080, 0xc2831c37,
0x00000008, 0xbf8cc07f, 0x87707170, 0xbf85000e, 0xc0071c37, 0x00000010,
0xbf8cc07f, 0x86f07070, 0xbf840009, 0xc0031bb7, 0x00000018, 0xbf8cc07f,
0xc0431bb8, 0x00000000, 0xbf8cc07f, 0xbefc0080, 0xbf800000, 0xbf900001,
0xbef00080, 0xbef10080, 0xbef31a9e, 0xbef81a8d, 0x8f6e8b77, 0x866eff6e,
0x001f8000, 0xb96ef807, 0x86fe7e7e, 0x86ea6a6a, 0xb978f802, 0xbe801f6c,
};

static const unsigned int kCodeCopyAligned8[] = {
0xC00A0100, 0x00000000, 0xC00A0200, 0x00000010, 0xC00A0300, 0x00000020,
0xC00A0400, 0x00000030, 0xC00A0500, 0x00000040, 0xC0020600, 0x00000050,
Expand Down
2 changes: 1 addition & 1 deletion src/core/inc/amd_memory_region.h
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ class MemoryRegion : public core::MemoryRegion {
/// @brief Unpin memory.
static void MakeKfdMemoryUnresident(const void* ptr);

MemoryRegion(bool fine_grain, bool full_profile, core::Agent* owner,
MemoryRegion(bool fine_grain, bool kernarg, bool full_profile, core::Agent* owner,
const HsaMemoryProperties& mem_props);

~MemoryRegion();
Expand Down
15 changes: 15 additions & 0 deletions src/core/inc/hsa_ext_amd_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -246,6 +246,21 @@ hsa_status_t hsa_amd_deregister_deallocation_callback(
hsa_status_t hsa_amd_signal_value_pointer(hsa_signal_t signal,
volatile hsa_signal_value_t** value_ptr);

// Mirrors Amd Extension Apis
hsa_status_t HSA_API hsa_amd_svm_attributes_set(void* ptr, size_t size,
hsa_amd_svm_attribute_pair_t* attribute_list,
size_t attribute_count);

// Mirrors Amd Extension Apis
hsa_status_t HSA_API hsa_amd_svm_attributes_get(void* ptr, size_t size,
hsa_amd_svm_attribute_pair_t* attribute_list,
size_t attribute_count);

// Mirrors Amd Extension Apis
hsa_status_t HSA_API hsa_amd_svm_prefetch_async(void* ptr, size_t size, hsa_agent_t agent,
uint32_t num_dep_signals, const hsa_signal_t* dep_signals,
hsa_signal_t completion_signal);

} // namespace amd
} // namespace rocr

Expand Down
6 changes: 6 additions & 0 deletions src/core/inc/hsa_ven_amd_loader_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,12 @@ namespace rocr {
size_t size,
hsa_code_object_reader_t *code_object_reader);

hsa_status_t
hsa_ven_amd_loader_iterate_executables(
hsa_status_t (*callback)(
hsa_executable_t executable,
void *data),
void *data);
} // namespace rocr

#endif
7 changes: 5 additions & 2 deletions src/core/inc/memory_region.h
Original file line number Diff line number Diff line change
Expand Up @@ -57,8 +57,8 @@ class Agent;

class MemoryRegion : public Checked<0x9C961F19EE175BB3> {
public:
MemoryRegion(bool fine_grain, bool full_profile, core::Agent* owner)
: fine_grain_(fine_grain), full_profile_(full_profile), owner_(owner) {
MemoryRegion(bool fine_grain, bool kernarg, bool full_profile, core::Agent* owner)
: fine_grain_(fine_grain), kernarg_(kernarg), full_profile_(full_profile), owner_(owner) {
assert(owner_ != NULL);
}

Expand Down Expand Up @@ -112,12 +112,15 @@ class MemoryRegion : public Checked<0x9C961F19EE175BB3> {

__forceinline bool fine_grain() const { return fine_grain_; }

__forceinline bool kernarg() const { return kernarg_; }

__forceinline bool full_profile() const { return full_profile_; }

__forceinline core::Agent* owner() const { return owner_; }

private:
const bool fine_grain_;
const bool kernarg_;
const bool full_profile_;

core::Agent* owner_;
Expand Down
38 changes: 38 additions & 0 deletions src/core/inc/runtime.h
Original file line number Diff line number Diff line change
Expand Up @@ -292,6 +292,15 @@ class Runtime {

hsa_status_t IPCDetach(void* ptr);

hsa_status_t SetSvmAttrib(void* ptr, size_t size, hsa_amd_svm_attribute_pair_t* attribute_list,
size_t attribute_count);

hsa_status_t GetSvmAttrib(void* ptr, size_t size, hsa_amd_svm_attribute_pair_t* attribute_list,
size_t attribute_count);

hsa_status_t SvmPrefetch(void* ptr, size_t size, hsa_agent_t agent, uint32_t num_dep_signals,
const hsa_signal_t* dep_signals, hsa_signal_t completion_signal);

const std::vector<Agent*>& cpu_agents() { return cpu_agents_; }

const std::vector<Agent*>& gpu_agents() { return gpu_agents_; }
Expand Down Expand Up @@ -395,6 +404,28 @@ class Runtime {
std::vector<void*> arg_;
};

struct PrefetchRange;
typedef std::map<uintptr_t, PrefetchRange> prefetch_map_t;

struct PrefetchOp {
void* base;
size_t size;
uint32_t node_id;
int remaining_deps;
hsa_signal_t completion;
std::vector<hsa_signal_t> dep_signals;
prefetch_map_t::iterator prefetch_map_entry;
};

struct PrefetchRange {
PrefetchRange() {}
PrefetchRange(size_t Bytes, PrefetchOp* Op) : bytes(Bytes), op(Op) {}
size_t bytes;
PrefetchOp* op;
prefetch_map_t::iterator prev;
prefetch_map_t::iterator next;
};

// Will be created before any user could call hsa_init but also could be
// destroyed before incorrectly written programs call hsa_shutdown.
static KernelMutex bootstrap_lock_;
Expand Down Expand Up @@ -444,6 +475,9 @@ class Runtime {
/// @retval Index in ::link_matrix_.
uint32_t GetIndexLinkInfo(uint32_t node_id_from, uint32_t node_id_to);

/// @brief Get most recently issued SVM prefetch agent for the range in question.
Agent* GetSVMPrefetchAgent(void* ptr, size_t size);

// Mutex object to protect multithreaded access to ::allocation_map_,
// KFD map/unmap, register/unregister, and access to hsaKmtQueryPointerInfo
// registered & mapped arrays.
Expand Down Expand Up @@ -485,6 +519,10 @@ class Runtime {
// Contains the region, address, and size of previously allocated memory.
std::map<const void*, AllocationRegion> allocation_map_;

// Pending prefetch containers.
KernelMutex prefetch_lock_;
prefetch_map_t prefetch_map_;

// Allocator using ::system_region_
std::function<void*(size_t size, size_t align, MemoryRegion::AllocateFlags flags)> system_allocator_;

Expand Down
12 changes: 11 additions & 1 deletion src/core/inc/scratch_cache.h
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@ class ScratchCache {
ScratchCache& operator=(const ScratchCache& rhs) = delete;
ScratchCache& operator=(ScratchCache&& rhs) = delete;

ScratchCache(deallocator_t deallocator) : dealloc(deallocator) {}
ScratchCache(deallocator_t deallocator) : dealloc(deallocator), available_bytes(0) {}

~ScratchCache() { assert(map.empty() && "ScratchCache not empty at shutdown."); }

Expand All @@ -122,6 +122,7 @@ class ScratchCache {
it->second.alloc();
info.queue_base = it->second.base;
info.scratch_node = it;
available_bytes -= it->first;
return true;
}
it++;
Expand All @@ -136,6 +137,7 @@ class ScratchCache {
info.queue_base = it->second.base;
info.size = it->first;
info.scratch_node = it;
available_bytes -= it->first;
return true;
}
it++;
Expand All @@ -152,13 +154,16 @@ class ScratchCache {
return;
}
it->second.free();
available_bytes += it->first;
assert(it->first == info.size && "Scratch cache size mismatch.");
}

bool trim(bool trim_nodes_in_use) {
bool ret = !map.empty();
auto it = map.begin();
while (it != map.end()) {
if (it->second.isFree()) {
available_bytes -= it->first;
dealloc(it->second.base, it->first, it->second.large);
auto temp = it;
it++;
Expand All @@ -181,9 +186,14 @@ class ScratchCache {
info.scratch_node = it;
}

size_t free_bytes() const {
return available_bytes;
}

private:
map_t map;
deallocator_t dealloc;
size_t available_bytes;
};

} // namespace AMD
Expand Down
Loading

0 comments on commit f114e4d

Please sign in to comment.