Skip to content

Commit

Permalink
ROCm 3.9.0 updates
Browse files Browse the repository at this point in the history
  • Loading branch information
skeelyamd committed Oct 27, 2020
1 parent 471b17c commit 68b0743
Show file tree
Hide file tree
Showing 15 changed files with 101 additions and 10 deletions.
6 changes: 4 additions & 2 deletions src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -393,9 +393,9 @@ set (CPACK_COMPONENTS_ALL_IN_ONE_PACKAGE 1)
set (CPACK_COMPONENTS_ALL binary dev)

set ( CPACK_PACKAGE_NAME "hsa-rocr-dev" )
set ( CPACK_PACKAGE_VENDOR "AMD" )
set ( CPACK_PACKAGE_VENDOR "Advanced Micro Devices, Inc." )
set ( CPACK_PACKAGE_VERSION ${PACKAGE_VERSION_STRING} )
set ( CPACK_PACKAGE_CONTACT "Advanced Micro Devices Inc." )
set ( CPACK_PACKAGE_CONTACT "Advanced Micro Devices, Inc." )
set ( CPACK_PACKAGE_DESCRIPTION_SUMMARY "AMD Heterogeneous System Architecture HSA - Linux HSA Runtime for Boltzmann (ROCm) platforms" )
set ( CPACK_RESOURCE_FILE_LICENSE "${CMAKE_CURRENT_SOURCE_DIR}/LICENSE.md" )

Expand All @@ -404,6 +404,7 @@ configure_file(${CMAKE_CURRENT_SOURCE_DIR}/DEBIAN/post_install DEBIAN/postinst @
configure_file(${CMAKE_CURRENT_SOURCE_DIR}/DEBIAN/pre_remove DEBIAN/prerm @ONLY)

# Debian package specific variables
set ( CPACK_DEBIAN_FILE_NAME "DEB-DEFAULT" )
set ( CPACK_DEBIAN_PACKAGE_DEPENDS "hsakmt-roct" )
set ( CPACK_DEBIAN_PACKAGE_HOMEPAGE "https://github.com/RadeonOpenCompute/ROCR-Runtime" )
set ( CPACK_DEBIAN_PACKAGE_CONTROL_EXTRA "DEBIAN/postinst;DEBIAN/prerm" )
Expand All @@ -415,6 +416,7 @@ set ( CPACK_DEBIAN_PACKAGE_REPLACES "hsa-ext-rocr-dev" )
set ( CPACK_DEBIAN_PACKAGE_CONFLICTS "hsa-ext-rocr-dev" )

## RPM package specific variables
set ( CPACK_RPM_FILE_NAME "RPM-DEFAULT" )
set ( CPACK_RPM_PACKAGE_DEPENDS "hsakmt-roct" )

# Declare that this package will replace functionality provided by hsa-ext-rocr-dev package
Expand Down
3 changes: 2 additions & 1 deletion src/core/inc/amd_gpu_agent.h
Original file line number Diff line number Diff line change
Expand Up @@ -502,7 +502,8 @@ class GpuAgent : public GpuAgentInt {
lazy_ptr<core::Blit>& GetPcieBlit(const core::Agent& dst_agent, const core::Agent& src_agent);

// Bind the Blit object that will drive the copy operation
lazy_ptr<core::Blit>& GetBlitObject(const core::Agent& dst_agent, const core::Agent& src_agent);
lazy_ptr<core::Blit>& GetBlitObject(const core::Agent& dst_agent, const core::Agent& src_agent,
const size_t size);
// @brief Alternative aperture base address. Only on KV.
uintptr_t ape1_base_;

Expand Down
11 changes: 8 additions & 3 deletions src/core/runtime/amd_gpu_agent.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -645,7 +645,7 @@ hsa_status_t GpuAgent::DmaCopy(void* dst, core::Agent& dst_agent,
std::vector<core::Signal*>& dep_signals,
core::Signal& out_signal) {
// Bind the Blit object that will drive this copy operation
lazy_ptr<core::Blit>& blit = GetBlitObject(dst_agent, src_agent);
lazy_ptr<core::Blit>& blit = GetBlitObject(dst_agent, src_agent, size);

if (profiling_enabled()) {
// Track the agent so we could translate the resulting timestamp to system
Expand Down Expand Up @@ -1188,7 +1188,7 @@ uint64_t GpuAgent::TranslateTime(uint64_t tick) {
uint64_t system_tick = 0;
double ratio = double(t1_.SystemClockCounter - t0_.SystemClockCounter) /
double(t1_.GPUClockCounter - t0_.GPUClockCounter);
system_tick = uint64_t(ratio * double(int64_t(tick - t1_.GPUClockCounter))) + t1_.SystemClockCounter;
system_tick = uint64_t(int64_t(ratio * double(int64_t(tick - t1_.GPUClockCounter)))) + t1_.SystemClockCounter;

// tick predates HSA startup - extrapolate with fixed clock ratio
if (tick < t0_.GPUClockCounter) {
Expand Down Expand Up @@ -1354,7 +1354,7 @@ lazy_ptr<core::Blit>& GpuAgent::GetPcieBlit(const core::Agent& dst_agent,
}

lazy_ptr<core::Blit>& GpuAgent::GetBlitObject(const core::Agent& dst_agent,
const core::Agent& src_agent) {
const core::Agent& src_agent, const size_t size) {
// At this point it is guaranteed that one of
// the two devices is a GPU, potentially both
assert(((src_agent.device_type() == core::Agent::kAmdGpuDevice) ||
Expand All @@ -1363,6 +1363,11 @@ lazy_ptr<core::Blit>& GpuAgent::GetBlitObject(const core::Agent& dst_agent,

// Determine if Src and Dst devices are same
if ((src_agent.public_handle().handle) == (dst_agent.public_handle().handle)) {
// If the copy is very small then cache flush overheads can dominate.
// Choose a (potentially) SDMA enabled engine to avoid cache flushing.
if (size < core::Runtime::runtime_singleton_->flag().force_sdma_size()) {
return blits_[BlitDevToHost];
}
return blits_[BlitDevToDev];
}

Expand Down
4 changes: 4 additions & 0 deletions src/core/runtime/hsa.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1888,6 +1888,10 @@ static std::string ConvertOldTargetNameToNew(
NewName = "amdgcn-amd-amdhsa--gfx1011";
else if (OldName == "AMD:AMDGPU:10:1:2")
NewName = "amdgcn-amd-amdhsa--gfx1012";
else if (OldName == "AMD:AMDGPU:10:3:0")
NewName = "amdgcn-amd-amdhsa--gfx1030";
else if (OldName == "AMD:AMDGPU:10:3:1")
NewName = "amdgcn-amd-amdhsa--gfx1031";
else
assert(false && "Unhandled target");

Expand Down
5 changes: 4 additions & 1 deletion src/core/runtime/isa.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -236,7 +236,10 @@ const IsaRegistry::IsaMap IsaRegistry::GetSupportedIsas() {
ISAREG_ENTRY_GEN(10, 1, 1, true, false)
ISAREG_ENTRY_GEN(10, 1, 2, false, false)
ISAREG_ENTRY_GEN(10, 1, 2, true, false)

ISAREG_ENTRY_GEN(10, 3, 0, false, false)
ISAREG_ENTRY_GEN(10, 3, 0, true, false)
ISAREG_ENTRY_GEN(10, 3, 1, false, false)
ISAREG_ENTRY_GEN(10, 3, 1, true, false)
return supported_isas;
}

Expand Down
7 changes: 7 additions & 0 deletions src/core/util/flag.h
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,9 @@ class Flag {

var = os::GetEnvVar("HSA_LOADER_ENABLE_MMAP_URI");
loader_enable_mmap_uri_ = (var == "1") ? true : false;

var = os::GetEnvVar("HSA_FORCE_SDMA_SIZE");
force_sdma_size_ = var.empty() ? 1024 * 1024 : atoi(var.c_str());
}

bool check_flat_scratch() const { return check_flat_scratch_; }
Expand Down Expand Up @@ -165,6 +168,8 @@ class Flag {

bool loader_enable_mmap_uri() const { return loader_enable_mmap_uri_; }

size_t force_sdma_size() const { return force_sdma_size_; }

private:
bool check_flat_scratch_;
bool enable_vm_fault_message_;
Expand Down Expand Up @@ -193,6 +198,8 @@ class Flag {

std::string tools_lib_names_;

size_t force_sdma_size_;

DISALLOW_COPY_AND_ASSIGN(Flag);
};

Expand Down
8 changes: 8 additions & 0 deletions src/image/blit_kernel.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,8 @@ extern uint8_t ocl_blit_object_gfx908[];
extern uint8_t ocl_blit_object_gfx1010[];
extern uint8_t ocl_blit_object_gfx1011[];
extern uint8_t ocl_blit_object_gfx1012[];
extern uint8_t ocl_blit_object_gfx1030[];
extern uint8_t ocl_blit_object_gfx1031[];

// Arguments inserted by OCL compiler, all zero here.
struct OCLHiddenArgs {
Expand Down Expand Up @@ -1005,6 +1007,12 @@ hsa_status_t BlitKernel::GetPatchedBlitObject(const char* agent_name,
case 1012:
*blit_code_object = ocl_blit_object_gfx1012;
break;
case 1030:
*blit_code_object = ocl_blit_object_gfx1030;
break;
case 1031:
*blit_code_object = ocl_blit_object_gfx1031;
break;
default:
return HSA_STATUS_ERROR_INVALID_ISA_NAME;
}
Expand Down
2 changes: 1 addition & 1 deletion src/image/blit_src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ set( XNACK_DEVS ${XNACK_DEVS} CACHE STRING "XNACK targets" FORCE )

# Determine the target devices if not specified
if (NOT DEFINED TARGET_DEVICES)
set (TARGET_DEVICES "gfx700;gfx701;gfx702;gfx801;gfx802;gfx803;gfx900;gfx902;gfx904;gfx906;gfx908;gfx1010;gfx1011;gfx1012")
set (TARGET_DEVICES "gfx700;gfx701;gfx702;gfx801;gfx802;gfx803;gfx900;gfx902;gfx904;gfx906;gfx908;gfx1010;gfx1011;gfx1012;gfx1030;gfx1031")
endif()
set( TARGET_DEVICES ${TARGET_DEVICES} CACHE STRING "Build targets" FORCE )

Expand Down
1 change: 1 addition & 0 deletions src/image/device_info.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -181,6 +181,7 @@ uint32_t DevIDToAddrLibFamily(uint32_t dev_id) {
switch (minor_ver) {
case 0:
case 1: // Navi
case 3:
switch (step) {
case 0:
case 1:
Expand Down
Loading

0 comments on commit 68b0743

Please sign in to comment.