Skip to content

Commit

Permalink
ROCm 4.3.0 updates
Browse files Browse the repository at this point in the history
  • Loading branch information
skeelyamd committed Jul 30, 2021
1 parent ea47a79 commit 71ef148
Show file tree
Hide file tree
Showing 38 changed files with 1,282 additions and 529 deletions.
16 changes: 10 additions & 6 deletions src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -191,7 +191,6 @@ set ( SRCS core/util/lnx/os_linux.cpp
core/common/shared.cpp
core/common/hsa_table_interface.cpp
loader/executable.cpp
loader/loaders.cpp
libamdhsacode/amd_elf_image.cpp
libamdhsacode/amd_hsa_code_util.cpp
libamdhsacode/amd_hsa_locks.cpp
Expand Down Expand Up @@ -386,6 +385,7 @@ endif()

## Packaging directives
set ( CPACK_GENERATOR "DEB;RPM" CACHE STRING "Package types to build")
set ( ENABLE_LDCONFIG ON CACHE BOOL "Set library links and caches using ldconfig.")

## Only pack the "binary" and "dev" components, post install script will add the directory link.
set (CPACK_DEB_COMPONENT_INSTALL ON)
Expand All @@ -399,9 +399,9 @@ set ( CPACK_PACKAGE_CONTACT "TODO Advanced Micro Devices, Inc." )
set ( CPACK_PACKAGE_DESCRIPTION_SUMMARY "AMD Heterogeneous System Architecture HSA - Linux HSA Runtime for Boltzmann (ROCm) platforms" )
set ( CPACK_RESOURCE_FILE_LICENSE "${CMAKE_CURRENT_SOURCE_DIR}/LICENSE.md" )

## Process the install scripts to update the CPACK variables
configure_file(${CMAKE_CURRENT_SOURCE_DIR}/DEBIAN/post_install DEBIAN/postinst @ONLY)
configure_file(${CMAKE_CURRENT_SOURCE_DIR}/DEBIAN/pre_remove DEBIAN/prerm @ONLY)
## Process the Debian install/remove scripts to update the CPACK variables
configure_file ( ${CMAKE_CURRENT_SOURCE_DIR}/DEBIAN/postinst.in DEBIAN/postinst @ONLY )
configure_file ( ${CMAKE_CURRENT_SOURCE_DIR}/DEBIAN/prerm.in DEBIAN/prerm @ONLY )

if ( DEFINED ENV{ROCM_LIBPATCH_VERSION} )
set ( CPACK_PACKAGE_VERSION "${CPACK_PACKAGE_VERSION}.$ENV{ROCM_LIBPATCH_VERSION}" )
Expand Down Expand Up @@ -452,8 +452,12 @@ set ( CPACK_RPM_PACKAGE_PROVIDES "hsa-ext-rocr-dev" )
set ( CPACK_RPM_PACKAGE_OBSOLETES "hsa-ext-rocr-dev" )
set ( CPACK_RPM_PACKAGE_CONFLICTS "hsa-ext-rocr-dev" )

set ( CPACK_RPM_POST_INSTALL_SCRIPT_FILE "${CMAKE_CURRENT_SOURCE_DIR}/RPM/rpm_post" )
set ( CPACK_RPM_POST_UNINSTALL_SCRIPT_FILE "${CMAKE_CURRENT_SOURCE_DIR}/RPM/rpm_postun" )
## Process the Rpm install/remove scripts to update the CPACK variables
configure_file ( "${CMAKE_CURRENT_SOURCE_DIR}/RPM/post.in" RPM/post @ONLY )
configure_file ( "${CMAKE_CURRENT_SOURCE_DIR}/RPM/postun.in" RPM/postun @ONLY )

set ( CPACK_RPM_POST_INSTALL_SCRIPT_FILE "${CMAKE_CURRENT_BINARY_DIR}/RPM/post" )
set ( CPACK_RPM_POST_UNINSTALL_SCRIPT_FILE "${CMAKE_CURRENT_BINARY_DIR}/RPM/postun" )

## Include packaging
include ( CPack )
10 changes: 7 additions & 3 deletions src/DEBIAN/post_install → src/DEBIAN/postinst.in
Original file line number Diff line number Diff line change
Expand Up @@ -44,12 +44,16 @@

set -e

# left-hand term originates from @ENABLE_LDCONFIG@ = ON/OFF at package build
do_ldconfig() {
echo @CPACK_PACKAGING_INSTALL_PREFIX@/hsa/lib > /etc/ld.so.conf.d/hsa-rocr-dev.conf && ldconfig
if [ "@ENABLE_LDCONFIG@" == "ON" ]; then
echo @CPACK_PACKAGING_INSTALL_PREFIX@/hsa/lib > /etc/ld.so.conf.d/hsa-rocr-dev.conf
ldconfig
fi
}

case "$1" in
configure)
( configure )
do_ldconfig
# Workaround for CPACK directory symlink handling error.
mkdir -p @CPACK_PACKAGING_INSTALL_PREFIX@/hsa/include
Expand All @@ -58,7 +62,7 @@ case "$1" in
abort-upgrade|abort-remove|abort-deconfigure)
echo "$1"
;;
*)
( * )
exit 0
;;
esac
13 changes: 8 additions & 5 deletions src/DEBIAN/pre_remove → src/DEBIAN/prerm.in
Original file line number Diff line number Diff line change
Expand Up @@ -44,20 +44,23 @@

set -e

# left-hand term originates from @ENABLE_LDCONFIG@ = ON/OFF at package build
rm_ldconfig() {
rm -f /etc/ld.so.conf.d/hsa-rocr-dev.conf && ldconfig
if [ "@ENABLE_LDCONFIG@" == "ON" ]; then
rm -f /etc/ld.so.conf.d/hsa-rocr-dev.conf
ldconfig
fi
}

case "$1" in
remove)
( remove )
rm_ldconfig
# Workaround for CPACK directory symlink handling error.
rm -rf @CPACK_PACKAGING_INSTALL_PREFIX@/hsa
;;
purge)
( purge )
;;
*)
( * )
exit 0
;;
esac

6 changes: 5 additions & 1 deletion src/RPM/rpm_post → src/RPM/post.in
Original file line number Diff line number Diff line change
Expand Up @@ -40,4 +40,8 @@
##
################################################################################

echo /opt/rocm/hsa/lib > /etc/ld.so.conf.d/hsa-rocr-dev.conf && ldconfig
# left-hand term originates from @ENABLE_LDCONFIG@ = ON/OFF at package build
if [ "@ENABLE_LDCONFIG@" == "ON" ]; then
echo @CPACK_PACKAGING_INSTALL_PREFIX@/hsa/lib > /etc/ld.so.conf.d/hsa-rocr-dev.conf
ldconfig
fi
6 changes: 4 additions & 2 deletions src/RPM/rpm_postun → src/RPM/postun.in
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,8 @@
##
################################################################################

if [ $1 -eq 0 ]; then
rm -f /etc/ld.so.conf.d/hsa-rocr-dev.conf && ldconfig
# left-hand term originates from @ENABLE_LDCONFIG@ = ON/OFF at package build
if [ $1 -eq 0 ] && [ "@ENABLE_LDCONFIG@" == "ON" ]; then
rm -f /etc/ld.so.conf.d/hsa-rocr-dev.conf
ldconfig
fi
21 changes: 21 additions & 0 deletions src/core/common/hsa_table_interface.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1151,6 +1151,27 @@ hsa_status_t HSA_API hsa_amd_signal_value_pointer(hsa_signal_t signal,
return amdExtTable->hsa_amd_signal_value_pointer_fn(signal, value_ptr);
}

// Mirrors Amd Extension Apis
hsa_status_t HSA_API hsa_amd_svm_attributes_set(void* ptr, size_t size,
hsa_amd_svm_attribute_pair_t* attribute_list,
size_t attribute_count) {
return amdExtTable->hsa_amd_svm_attributes_set_fn(ptr, size, attribute_list, attribute_count);
}

// Mirrors Amd Extension Apis
hsa_status_t HSA_API hsa_amd_svm_attributes_get(void* ptr, size_t size,
hsa_amd_svm_attribute_pair_t* attribute_list,
size_t attribute_count) {
return amdExtTable->hsa_amd_svm_attributes_get_fn(ptr, size, attribute_list, attribute_count);
}

// Mirrors Amd Extension Apis
hsa_status_t HSA_API hsa_amd_svm_prefetch_async(void* ptr, size_t size, hsa_agent_t agent,
uint32_t num_dep_signals, const hsa_signal_t* dep_signals,
hsa_signal_t completion_signal) {
return amdExtTable->hsa_amd_svm_prefetch_async_fn(ptr, size, agent, num_dep_signals, dep_signals, completion_signal);
}

// Tools only table interfaces.
namespace rocr {

Expand Down
3 changes: 2 additions & 1 deletion src/core/inc/amd_gpu_agent.h
Original file line number Diff line number Diff line change
Expand Up @@ -173,7 +173,8 @@ class GpuAgent : public GpuAgentInt {
// @param [in] node Node id. Each CPU in different socket will get distinct
// id.
// @param [in] node_props Node property.
GpuAgent(HSAuint32 node, const HsaNodeProperties& node_props);
// @param [in] xnack_mode XNACK mode of device.
GpuAgent(HSAuint32 node, const HsaNodeProperties& node_props, bool xnack_mode);

// @brief GPU agent destructor.
~GpuAgent();
Expand Down
26 changes: 26 additions & 0 deletions src/core/inc/amd_gpu_shaders.h
Original file line number Diff line number Diff line change
Expand Up @@ -439,6 +439,32 @@ static const unsigned int kCodeTrapHandler9[] = {
0x001f8000, 0xb96ef807, 0x86fe7e7e, 0x86ea6a6a, 0xb978f802, 0xbe801f6c,
};

static const unsigned int kCodeTrapHandler90a[] = {
0x8973ff73, 0x3e000000, 0x92eeff78, 0x0001000d, 0x8e6e9d6e, 0x87736e73,
0x92eeff6d, 0x00080010, 0xbf850041, 0xb8eef803, 0x866fff6e, 0x00000900,
0xbf850031, 0xbeee007e, 0xbeef007f, 0xbefe00ff, 0x80000000, 0xbf90000a,
0xbf800007, 0xbf0c9f7e, 0xbf84fffd, 0xbeff006f, 0x866fff7e, 0x00000fff,
0xbefe006e, 0xbeef1a97, 0xbeee007c, 0xbefc006f, 0xbf800000, 0xbf900001,
0xbefc006e, 0xbf0d9f73, 0xbf85000f, 0x866fff6f, 0x000003ff, 0x8e6f836f,
0xc0051bbd, 0x0000006f, 0xbf8cc07f, 0xc0031bb7, 0x00000008, 0xbf8cc07f,
0x80ee6e72, 0x8f6e866e, 0x8973ff73, 0x01ffffff, 0x87736e73, 0xbef31a9f,
0xbef2006c, 0x866dff6d, 0x0000ffff, 0x8e6d876d, 0x8977ff77, 0x007fff80,
0x87776d77, 0xbeec1c00, 0x806cff6c, 0x00000010, 0x826d806d, 0xbf820044,
0xbf920002, 0xbf82fffe, 0x866fff6e, 0x10000100, 0xbf06ff6f, 0x00000100,
0xbeef00ff, 0x20000000, 0xbf850011, 0x866fff6e, 0x00000800, 0xbeef00f4,
0xbf85000d, 0xbf820036, 0x83ef8f6e, 0x8e6f996f, 0x87736f73, 0xbf09836e,
0xbf85ffbe, 0xbf06826e, 0xbeef00ff, 0x80000000, 0xbf850003, 0x806c846c,
0x826d806d, 0xbf82002c, 0xbef0006f, 0xbeee007e, 0xbeef007f, 0xbefe00ff,
0x80000000, 0xbf90000a, 0xbf800007, 0xbf0c9f7e, 0xbf84fffd, 0xbeff006f,
0x867eff7e, 0x000003ff, 0x8e6f837e, 0xbefe006e, 0xc0051bbd, 0x0000006f,
0xbf8cc07f, 0xc0071bb7, 0x000000c0, 0xbf8cc07f, 0xbef10080, 0xc2831c37,
0x00000008, 0xbf8cc07f, 0x87707170, 0xbf85000e, 0xc0071c37, 0x00000010,
0xbf8cc07f, 0x86f07070, 0xbf840009, 0xc0031bb7, 0x00000018, 0xbf8cc07f,
0xc0431bb8, 0x00000000, 0xbf8cc07f, 0xbefc0080, 0xbf800000, 0xbf900001,
0xbef00080, 0xbef10080, 0xbef31a9e, 0xbef81a8d, 0x8f6e8b77, 0x866eff6e,
0x001f8000, 0xb96ef807, 0x86fe7e7e, 0x86ea6a6a, 0xb978f802, 0xbe801f6c,
};

static const unsigned int kCodeCopyAligned8[] = {
0xC00A0100, 0x00000000, 0xC00A0200, 0x00000010, 0xC00A0300, 0x00000020,
0xC00A0400, 0x00000030, 0xC00A0500, 0x00000040, 0xC0020600, 0x00000050,
Expand Down
2 changes: 1 addition & 1 deletion src/core/inc/amd_memory_region.h
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ class MemoryRegion : public core::MemoryRegion {
/// @brief Unpin memory.
static void MakeKfdMemoryUnresident(const void* ptr);

MemoryRegion(bool fine_grain, bool full_profile, core::Agent* owner,
MemoryRegion(bool fine_grain, bool kernarg, bool full_profile, core::Agent* owner,
const HsaMemoryProperties& mem_props);

~MemoryRegion();
Expand Down
15 changes: 15 additions & 0 deletions src/core/inc/hsa_ext_amd_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -246,6 +246,21 @@ hsa_status_t hsa_amd_deregister_deallocation_callback(
hsa_status_t hsa_amd_signal_value_pointer(hsa_signal_t signal,
volatile hsa_signal_value_t** value_ptr);

// Mirrors Amd Extension Apis
hsa_status_t HSA_API hsa_amd_svm_attributes_set(void* ptr, size_t size,
hsa_amd_svm_attribute_pair_t* attribute_list,
size_t attribute_count);

// Mirrors Amd Extension Apis
hsa_status_t HSA_API hsa_amd_svm_attributes_get(void* ptr, size_t size,
hsa_amd_svm_attribute_pair_t* attribute_list,
size_t attribute_count);

// Mirrors Amd Extension Apis
hsa_status_t HSA_API hsa_amd_svm_prefetch_async(void* ptr, size_t size, hsa_agent_t agent,
uint32_t num_dep_signals, const hsa_signal_t* dep_signals,
hsa_signal_t completion_signal);

} // namespace amd
} // namespace rocr

Expand Down
6 changes: 6 additions & 0 deletions src/core/inc/hsa_ven_amd_loader_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,12 @@ namespace rocr {
size_t size,
hsa_code_object_reader_t *code_object_reader);

hsa_status_t
hsa_ven_amd_loader_iterate_executables(
hsa_status_t (*callback)(
hsa_executable_t executable,
void *data),
void *data);
} // namespace rocr

#endif
7 changes: 5 additions & 2 deletions src/core/inc/memory_region.h
Original file line number Diff line number Diff line change
Expand Up @@ -57,8 +57,8 @@ class Agent;

class MemoryRegion : public Checked<0x9C961F19EE175BB3> {
public:
MemoryRegion(bool fine_grain, bool full_profile, core::Agent* owner)
: fine_grain_(fine_grain), full_profile_(full_profile), owner_(owner) {
MemoryRegion(bool fine_grain, bool kernarg, bool full_profile, core::Agent* owner)
: fine_grain_(fine_grain), kernarg_(kernarg), full_profile_(full_profile), owner_(owner) {
assert(owner_ != NULL);
}

Expand Down Expand Up @@ -112,12 +112,15 @@ class MemoryRegion : public Checked<0x9C961F19EE175BB3> {

__forceinline bool fine_grain() const { return fine_grain_; }

__forceinline bool kernarg() const { return kernarg_; }

__forceinline bool full_profile() const { return full_profile_; }

__forceinline core::Agent* owner() const { return owner_; }

private:
const bool fine_grain_;
const bool kernarg_;
const bool full_profile_;

core::Agent* owner_;
Expand Down
38 changes: 38 additions & 0 deletions src/core/inc/runtime.h
Original file line number Diff line number Diff line change
Expand Up @@ -292,6 +292,15 @@ class Runtime {

hsa_status_t IPCDetach(void* ptr);

hsa_status_t SetSvmAttrib(void* ptr, size_t size, hsa_amd_svm_attribute_pair_t* attribute_list,
size_t attribute_count);

hsa_status_t GetSvmAttrib(void* ptr, size_t size, hsa_amd_svm_attribute_pair_t* attribute_list,
size_t attribute_count);

hsa_status_t SvmPrefetch(void* ptr, size_t size, hsa_agent_t agent, uint32_t num_dep_signals,
const hsa_signal_t* dep_signals, hsa_signal_t completion_signal);

const std::vector<Agent*>& cpu_agents() { return cpu_agents_; }

const std::vector<Agent*>& gpu_agents() { return gpu_agents_; }
Expand Down Expand Up @@ -395,6 +404,28 @@ class Runtime {
std::vector<void*> arg_;
};

struct PrefetchRange;
typedef std::map<uintptr_t, PrefetchRange> prefetch_map_t;

struct PrefetchOp {
void* base;
size_t size;
uint32_t node_id;
int remaining_deps;
hsa_signal_t completion;
std::vector<hsa_signal_t> dep_signals;
prefetch_map_t::iterator prefetch_map_entry;
};

struct PrefetchRange {
PrefetchRange() {}
PrefetchRange(size_t Bytes, PrefetchOp* Op) : bytes(Bytes), op(Op) {}
size_t bytes;
PrefetchOp* op;
prefetch_map_t::iterator prev;
prefetch_map_t::iterator next;
};

// Will be created before any user could call hsa_init but also could be
// destroyed before incorrectly written programs call hsa_shutdown.
static KernelMutex bootstrap_lock_;
Expand Down Expand Up @@ -444,6 +475,9 @@ class Runtime {
/// @retval Index in ::link_matrix_.
uint32_t GetIndexLinkInfo(uint32_t node_id_from, uint32_t node_id_to);

/// @brief Get most recently issued SVM prefetch agent for the range in question.
Agent* GetSVMPrefetchAgent(void* ptr, size_t size);

// Mutex object to protect multithreaded access to ::allocation_map_,
// KFD map/unmap, register/unregister, and access to hsaKmtQueryPointerInfo
// registered & mapped arrays.
Expand Down Expand Up @@ -485,6 +519,10 @@ class Runtime {
// Contains the region, address, and size of previously allocated memory.
std::map<const void*, AllocationRegion> allocation_map_;

// Pending prefetch containers.
KernelMutex prefetch_lock_;
prefetch_map_t prefetch_map_;

// Allocator using ::system_region_
std::function<void*(size_t size, size_t align, MemoryRegion::AllocateFlags flags)> system_allocator_;

Expand Down
12 changes: 11 additions & 1 deletion src/core/inc/scratch_cache.h
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@ class ScratchCache {
ScratchCache& operator=(const ScratchCache& rhs) = delete;
ScratchCache& operator=(ScratchCache&& rhs) = delete;

ScratchCache(deallocator_t deallocator) : dealloc(deallocator) {}
ScratchCache(deallocator_t deallocator) : dealloc(deallocator), available_bytes(0) {}

~ScratchCache() { assert(map.empty() && "ScratchCache not empty at shutdown."); }

Expand All @@ -122,6 +122,7 @@ class ScratchCache {
it->second.alloc();
info.queue_base = it->second.base;
info.scratch_node = it;
available_bytes -= it->first;
return true;
}
it++;
Expand All @@ -136,6 +137,7 @@ class ScratchCache {
info.queue_base = it->second.base;
info.size = it->first;
info.scratch_node = it;
available_bytes -= it->first;
return true;
}
it++;
Expand All @@ -152,13 +154,16 @@ class ScratchCache {
return;
}
it->second.free();
available_bytes += it->first;
assert(it->first == info.size && "Scratch cache size mismatch.");
}

bool trim(bool trim_nodes_in_use) {
bool ret = !map.empty();
auto it = map.begin();
while (it != map.end()) {
if (it->second.isFree()) {
available_bytes -= it->first;
dealloc(it->second.base, it->first, it->second.large);
auto temp = it;
it++;
Expand All @@ -181,9 +186,14 @@ class ScratchCache {
info.scratch_node = it;
}

size_t free_bytes() const {
return available_bytes;
}

private:
map_t map;
deallocator_t dealloc;
size_t available_bytes;
};

} // namespace AMD
Expand Down
Loading

0 comments on commit 71ef148

Please sign in to comment.