Skip to content

Commit

Permalink
ROCm 6.2.0 updates
Browse files Browse the repository at this point in the history
  • Loading branch information
dayatsin-amd committed Aug 1, 2024
1 parent 397fa1d commit 1d1b845
Show file tree
Hide file tree
Showing 114 changed files with 11,551 additions and 2,424 deletions.
63 changes: 51 additions & 12 deletions src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@ include ( GNUInstallDirs )
if ( NOT DEFINED BUILD_SHARED_LIBS )
set ( BUILD_SHARED_LIBS ON )
endif()

set ( BUILD_SHARED_LIBS ${BUILD_SHARED_LIBS} CACHE BOOL "Build shared library (.so) or not.")

## Adjust target name for static builds
Expand All @@ -85,7 +86,7 @@ if (ROCM_CCACHE_BUILD)
endif() # if (ROCM_CCACHE_BUILD)

## Get version strings
get_version ( "1.13.0" )
get_version ( "1.14.0" )
if ( ${ROCM_PATCH_VERSION} )
set ( VERSION_PATCH ${ROCM_PATCH_VERSION})
endif()
Expand Down Expand Up @@ -158,7 +159,8 @@ set_property(TARGET ${CORE_RUNTIME_TARGET} PROPERTY LINK_FLAGS ${HSA_SHARED_LINK
## ------------------------- End Compiler and Linker options ----------------------------

## Source files.
set ( SRCS core/util/lnx/os_linux.cpp
set ( SRCS core/driver/driver.cpp
core/util/lnx/os_linux.cpp
core/util/small_heap.cpp
core/util/timer.cpp
core/util/flag.cpp
Expand Down Expand Up @@ -208,6 +210,16 @@ add_dependencies( ${CORE_RUNTIME_TARGET} amd_trap_handler_v2 )
add_subdirectory( ${CMAKE_CURRENT_SOURCE_DIR}/core/runtime/blit_shaders )
add_dependencies( ${CORE_RUNTIME_TARGET} amd_blit_shaders_v2)

option(PC_SAMPLING_SUPPORT "Enable PC Sampling Support" ON)

if (${PC_SAMPLING_SUPPORT})
target_compile_definitions(${CORE_RUNTIME_TARGET} PRIVATE HSA_PC_SAMPLING_SUPPORT)

set( PCS_SRCS pcs/hsa_ven_amd_pc_sampling.cpp pcs/pcs_runtime.cpp )

target_sources( ${CORE_RUNTIME_TARGET} PRIVATE ${PCS_SRCS} )
endif()

if ( NOT DEFINED IMAGE_SUPPORT AND CMAKE_SYSTEM_PROCESSOR MATCHES "i?86|x86_64|amd64|AMD64|loongarch64" )
set ( IMAGE_SUPPORT ON )
endif()
Expand All @@ -228,6 +240,7 @@ if(${IMAGE_SUPPORT})
image/addrlib/src/core/addrlib.cpp
image/addrlib/src/core/addrlib1.cpp
image/addrlib/src/core/addrlib2.cpp
image/addrlib/src/core/addrlib3.cpp
image/addrlib/src/core/addrobject.cpp
image/addrlib/src/core/addrelemlib.cpp
image/addrlib/src/r800/ciaddrlib.cpp
Expand All @@ -236,6 +249,7 @@ if(${IMAGE_SUPPORT})
image/addrlib/src/gfx9/gfx9addrlib.cpp
image/addrlib/src/gfx10/gfx10addrlib.cpp
image/addrlib/src/gfx11/gfx11addrlib.cpp
image/addrlib/src/gfx12/gfx12addrlib.cpp
image/device_info.cpp
image/hsa_ext_image.cpp
image/image_runtime.cpp
Expand All @@ -244,6 +258,7 @@ if(${IMAGE_SUPPORT})
image/image_manager_ai.cpp
image/image_manager_nv.cpp
image/image_manager_gfx11.cpp
image/image_manager_gfx12.cpp
image/image_lut_kv.cpp
image/image_lut_gfx11.cpp
image/blit_object_gfx7xx.cpp
Expand All @@ -265,10 +280,12 @@ if(${IMAGE_SUPPORT})
${CMAKE_CURRENT_SOURCE_DIR}/image/addrlib/src/gfx9
${CMAKE_CURRENT_SOURCE_DIR}/image/addrlib/src/gfx10
${CMAKE_CURRENT_SOURCE_DIR}/image/addrlib/src/gfx11
${CMAKE_CURRENT_SOURCE_DIR}/image/addrlib/src/gfx12
${CMAKE_CURRENT_SOURCE_DIR}/image/addrlib/src/chip/r800
${CMAKE_CURRENT_SOURCE_DIR}/image/addrlib/src/chip/gfx9
${CMAKE_CURRENT_SOURCE_DIR}/image/addrlib/src/chip/gfx10
${CMAKE_CURRENT_SOURCE_DIR}/image/addrlib/src/chip/gfx11 )
${CMAKE_CURRENT_SOURCE_DIR}/image/addrlib/src/chip/gfx11
${CMAKE_CURRENT_SOURCE_DIR}/image/addrlib/src/chip/gfx12 )

target_sources( ${CORE_RUNTIME_TARGET} PRIVATE ${IMAGE_SRCS} )

Expand All @@ -278,21 +295,27 @@ if(${IMAGE_SUPPORT})

endif()

## Link dependencies.
target_link_libraries ( ${CORE_RUNTIME_TARGET} PRIVATE hsakmt::hsakmt PkgConfig::drm)
target_link_libraries ( ${CORE_RUNTIME_TARGET} PRIVATE elf::elf dl pthread rt )

find_package(rocprofiler-register)
if(rocprofiler-register_FOUND)
# For static package rocprofiler-register dependency is not required
# Link to hsakmt target for shared library builds
# Link to hsakmt-staticdrm target for static library builds
if( BUILD_SHARED_LIBS )
target_link_libraries ( ${CORE_RUNTIME_TARGET} PRIVATE hsakmt::hsakmt PkgConfig::drm)
find_package(rocprofiler-register)
if(rocprofiler-register_FOUND)
target_compile_definitions(${CORE_RUNTIME_TARGET} PRIVATE HSA_ROCPROFILER_REGISTER=1
HSA_VERSION_MAJOR=${VERSION_MAJOR}
HSA_VERSION_MINOR=${VERSION_MINOR}
HSA_VERSION_PATCH=${VERSION_PATCH})
target_link_libraries(${CORE_RUNTIME_TARGET} PRIVATE rocprofiler-register::rocprofiler-register)
set(HSA_DEP_ROCPROFILER_REGISTER ON)
else()
else()
set(HSA_DEP_ROCPROFILER_REGISTER OFF)
endif()
endif() # end rocprofiler-register_FOUND
else()
include_directories(${drm_INCLUDE_DIRS})
target_link_libraries ( ${CORE_RUNTIME_TARGET} PRIVATE hsakmt-staticdrm::hsakmt-staticdrm)
endif()#end BUILD_SHARED_LIBS

## Set the VERSION and SOVERSION values
set_property ( TARGET ${CORE_RUNTIME_TARGET} PROPERTY VERSION "${SO_VERSION_STRING}" )
Expand All @@ -312,7 +335,7 @@ if( NOT ${BUILD_SHARED_LIBS} )
add_dependencies( ${CORE_RUNTIME_NAME} ${CORE_RUNTIME_TARGET} )

## Add external link requirements.
target_link_libraries ( ${CORE_RUNTIME_NAME} INTERFACE hsakmt::hsakmt )
target_link_libraries ( ${CORE_RUNTIME_NAME} INTERFACE hsakmt-staticdrm::hsakmt-staticdrm )
target_link_libraries ( ${CORE_RUNTIME_NAME} INTERFACE elf::elf dl pthread rt )

install ( TARGETS ${CORE_RUNTIME_NAME} EXPORT ${CORE_RUNTIME_NAME}Targets )
Expand Down Expand Up @@ -417,7 +440,6 @@ endif()
## Packaging directives
set ( CPACK_GENERATOR "DEB;RPM" CACHE STRING "Package types to build")
set ( ENABLE_LDCONFIG ON CACHE BOOL "Set library links and caches using ldconfig.")

## Only pack the "binary" and "dev" components, post install script will add the directory link.
set ( CPACK_COMPONENTS_ALL binary dev )
# ASAN Package will have libraries and license file
Expand Down Expand Up @@ -518,5 +540,22 @@ endif()
set ( CPACK_RPM_PACKAGE_PROVIDES "hsa-ext-rocr-dev hsa-rocr-dev" )
set ( CPACK_RPM_PACKAGE_OBSOLETES "hsa-ext-rocr-dev" )

if( NOT BUILD_SHARED_LIBS )
# Suffix package name with static
set ( CPACK_RPM_STATIC_PACKAGE_NAME "hsa-rocr-static-devel")
set ( CPACK_DEBIAN_STATIC_PACKAGE_NAME "hsa-rocr-static-dev")
set ( CPACK_COMPONENT_STATIC_DESCRIPTION "HSA (Heterogenous System Architecture) core runtime - Linux static libraries" )
set ( CPACK_RPM_STATIC_PACKAGE_REQUIRES "${CPACK_RPM_BINARY_PACKAGE_REQUIRES}" )
string ( APPEND CPACK_RPM_STATIC_PACKAGE_REQUIRES " hsakmt-roct-devel" )
set ( CPACK_DEBIAN_STATIC_PACKAGE_DEPENDS "${CPACK_DEBIAN_BINARY_PACKAGE_DEPENDS}" )
string ( APPEND CPACK_DEBIAN_STATIC_PACKAGE_DEPENDS ", hsakmt-roct-dev" )
endif()
## Include packaging
include ( CPack )
# static package generation
# Group binary and dev component to single package
if( NOT BUILD_SHARED_LIBS )
cpack_add_component_group("static")
cpack_add_component( binary GROUP static )
cpack_add_component( dev GROUP static )
endif()
10 changes: 10 additions & 0 deletions src/core/common/hsa_table_interface.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1238,6 +1238,11 @@ hsa_status_t HSA_API hsa_amd_vmem_address_reserve(void** ptr, size_t size, uint6
return amdExtTable->hsa_amd_vmem_address_reserve_fn(ptr, size, address, flags);
}

hsa_status_t HSA_API hsa_amd_vmem_address_reserve_align(void** ptr, size_t size, uint64_t address,
uint64_t alignment, uint64_t flags) {
return amdExtTable->hsa_amd_vmem_address_reserve_align_fn(ptr, size, address, alignment, flags);
}

hsa_status_t HSA_API hsa_amd_vmem_address_free(void* ptr, size_t size) {
return amdExtTable->hsa_amd_vmem_address_free_fn(ptr, size);
}
Expand Down Expand Up @@ -1298,6 +1303,11 @@ hsa_status_t HSA_API hsa_amd_agent_set_async_scratch_limit(hsa_agent_t agent, si
return amdExtTable->hsa_amd_agent_set_async_scratch_limit_fn(agent, threshold);
}

hsa_status_t HSA_API hsa_amd_queue_get_info(hsa_queue_t* queue,
hsa_queue_info_attribute_t attribute, void* value) {
return amdExtTable->hsa_amd_queue_get_info_fn(queue, attribute, value);
}

// Tools only table interfaces.
namespace rocr {

Expand Down
79 changes: 79 additions & 0 deletions src/core/driver/driver.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
////////////////////////////////////////////////////////////////////////////////
//
// The University of Illinois/NCSA
// Open Source License (NCSA)
//
// Copyright (c) 2023, Advanced Micro Devices, Inc. All rights reserved.
//
// Developed by:
//
// AMD Research and AMD HSA Software Development
//
// Advanced Micro Devices, Inc.
//
// www.amd.com
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal with the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
//
// - Redistributions of source code must retain the above copyright notice,
// this list of conditions and the following disclaimers.
// - Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimers in
// the documentation and/or other materials provided with the distribution.
// - Neither the names of Advanced Micro Devices, Inc,
// nor the names of its contributors may be used to endorse or promote
// products derived from this Software without specific prior written
// permission.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS WITH THE SOFTWARE.
//
////////////////////////////////////////////////////////////////////////////////

#include "core/inc/driver.h"

#include <fcntl.h>
#include <unistd.h>

#include "inc/hsa.h"

namespace rocr {
namespace core {

Driver::Driver(const std::string devnode_name, Agent::DeviceType agent_device_type)
: agent_device_type_(agent_device_type), devnode_name_(devnode_name) { }

hsa_status_t Driver::Open()
{
fd_ = open(devnode_name_.c_str(), O_RDWR | O_CLOEXEC);
if (fd_ < 0) {
return HSA_STATUS_ERROR_OUT_OF_RESOURCES;
}
return HSA_STATUS_SUCCESS;
}

hsa_status_t Driver::Close()
{
int ret(0);
if (fd_ > 0) {
ret = close(fd_);
fd_ = -1;
}
if (ret) {
return HSA_STATUS_ERROR;
}
return HSA_STATUS_SUCCESS;
}

} // namespace core
} // namespace rocr
12 changes: 10 additions & 2 deletions src/core/inc/amd_aql_queue.h
Original file line number Diff line number Diff line change
Expand Up @@ -196,8 +196,11 @@ class AqlQueue : public core::Queue, private core::LocalSignal, public core::Doo
/// @return hsa_status_t
hsa_status_t GetCUMasking(uint32_t num_cu_mask_count, uint32_t* cu_mask) override;

/// @brief Submits a block of PM4 and waits until it has been executed.
void ExecutePM4(uint32_t* cmd_data, size_t cmd_size_b) override;
// @brief Submits a block of PM4 and waits until it has been executed.
void ExecutePM4(uint32_t* cmd_data, size_t cmd_size_b,
hsa_fence_scope_t acquireFence = HSA_FENCE_SCOPE_NONE,
hsa_fence_scope_t releaseFence = HSA_FENCE_SCOPE_NONE,
hsa_signal_t* signal = NULL) override;

/// @brief Enables/Disables profiling overrides SetProfiling from core::Queue
void SetProfiling(bool enabled) override;
Expand All @@ -208,6 +211,9 @@ class AqlQueue : public core::Queue, private core::LocalSignal, public core::Doo
/// @brief Update signal value using Release semantics
void StoreRelease(hsa_signal_value_t value) override;

/// @brief Provide information about the queue
hsa_status_t GetInfo(hsa_queue_info_attribute_t attribute, void* value) override;

/// @brief Enable use of GWS from this queue.
hsa_status_t EnableGWS(int gws_slot_count);

Expand Down Expand Up @@ -246,9 +252,11 @@ class AqlQueue : public core::Queue, private core::LocalSignal, public core::Doo
void FillBufRsrcWord3();
void FillBufRsrcWord3_Gfx10();
void FillBufRsrcWord3_Gfx11();
void FillBufRsrcWord3_Gfx12();
void FillComputeTmpRingSize();
void FillAltComputeTmpRingSize();
void FillComputeTmpRingSize_Gfx11();
void FillComputeTmpRingSize_Gfx12();

void FreeMainScratchSpace();
void FreeAltScratchSpace();
Expand Down
Loading

0 comments on commit 1d1b845

Please sign in to comment.