Skip to content

Commit 8d56c60

Browse files
committed
ROCm 1.6.1 updates.
1 parent 9f1f9f8 commit 8d56c60

File tree

7 files changed

+32
-43
lines changed

7 files changed

+32
-43
lines changed

src/core/inc/queue.h

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -64,15 +64,13 @@ struct AqlPacket {
6464
hsa_agent_dispatch_packet_t agent;
6565
};
6666

67-
uint8_t type() {
67+
uint8_t type() const {
6868
return ((dispatch.header >> HSA_PACKET_HEADER_TYPE) &
6969
((1 << HSA_PACKET_HEADER_WIDTH_TYPE) - 1));
7070
}
7171

72-
bool IsValid() {
73-
const uint8_t packet_type = dispatch.header >> HSA_PACKET_HEADER_TYPE;
74-
return (packet_type > HSA_PACKET_TYPE_INVALID &&
75-
packet_type <= HSA_PACKET_TYPE_BARRIER_OR);
72+
bool IsValid() const {
73+
return (type() <= HSA_PACKET_TYPE_BARRIER_OR) & (type() != HSA_PACKET_TYPE_INVALID);
7674
}
7775

7876
std::string string() const {

src/core/runtime/amd_aql_queue.cpp

Lines changed: 11 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -775,17 +775,20 @@ void AqlQueue::ExecutePM4(uint32_t* cmd_data, size_t cmd_size_b) {
775775
// pm4_ib_buf_ is a shared resource, so mutually exclude here.
776776
ScopedAcquire<KernelMutex> lock(&pm4_ib_mutex_);
777777

778+
// Obtain reference to any container queue.
779+
core::Queue* queue = core::Queue::Convert(public_handle());
780+
778781
// Obtain a queue slot for a single AQL packet.
779-
uint64_t write_idx = AddWriteIndexAcqRel(1);
782+
uint64_t write_idx = queue->AddWriteIndexAcqRel(1);
780783

781-
while ((write_idx - LoadReadIndexRelaxed()) > public_handle()->size) {
784+
while ((write_idx - queue->LoadReadIndexRelaxed()) > queue->amd_queue_.hsa_queue.size) {
782785
os::YieldThread();
783786
}
784787

785-
uint32_t slot_idx = uint32_t(write_idx % public_handle()->size);
788+
uint32_t slot_idx = uint32_t(write_idx % queue->amd_queue_.hsa_queue.size);
786789
constexpr uint32_t slot_size_b = 0x40;
787790
uint32_t* queue_slot =
788-
(uint32_t*)(uintptr_t(public_handle()->base_address) + (slot_idx * slot_size_b));
791+
(uint32_t*)(uintptr_t(queue->amd_queue_.hsa_queue.base_address) + (slot_idx * slot_size_b));
789792

790793
// Copy client PM4 command into IB.
791794
assert(cmd_size_b < pm4_ib_size_b_ && "PM4 exceeds IB size");
@@ -877,12 +880,13 @@ void AqlQueue::ExecutePM4(uint32_t* cmd_data, size_t cmd_size_b) {
877880
atomic::Store(&queue_slot[0], slot_data[0], std::memory_order_release);
878881

879882
// Submit the packet slot.
880-
core::Signal* doorbell =
881-
core::Signal::Convert(public_handle()->doorbell_signal);
883+
core::Signal* doorbell = core::Signal::Convert(queue->amd_queue_.hsa_queue.doorbell_signal);
882884
doorbell->StoreRelease(write_idx);
883885

884886
// Wait for the packet to be consumed.
885-
while (LoadReadIndexRelaxed() <= write_idx) {
887+
// Should be switched to a signal wait when aql_pm4_ib can be used on all
888+
// supported platforms.
889+
while (queue->LoadReadIndexRelaxed() <= write_idx) {
886890
os::YieldThread();
887891
}
888892
}

src/core/runtime/amd_gpu_agent.cpp

Lines changed: 7 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -512,18 +512,10 @@ hsa_status_t GpuAgent::VisitRegion(
512512
}
513513

514514
core::Queue* GpuAgent::CreateInterceptibleQueue() {
515-
// Until tools runtime is merged in we need to use HSA API
516-
// rather than GpuAgent::QueueCreate to allow interception.
517-
hsa_queue_t* queue_handle;
518-
hsa_status_t status =
519-
HSA::hsa_queue_create(public_handle(), minAqlSize_, HSA_QUEUE_TYPE_MULTI,
520-
NULL, NULL, 0, 0, &queue_handle);
521-
522-
if (status != HSA_STATUS_SUCCESS) {
523-
return NULL;
524-
}
525-
526-
return core::Queue::Convert(queue_handle);
515+
// Disabled intercept of internal queues pending tools updates.
516+
core::Queue* queue = nullptr;
517+
QueueCreate(minAqlSize_, HSA_QUEUE_TYPE_MULTI, NULL, NULL, 0, 0, &queue);
518+
return queue;
527519
}
528520

529521
core::Blit* GpuAgent::CreateBlitSdma() {
@@ -643,15 +635,14 @@ hsa_status_t GpuAgent::DmaCopy(void* dst, core::Agent& dst_agent,
643635
return HSA_STATUS_ERROR_OUT_OF_RESOURCES;
644636
}
645637

646-
hsa_status_t stat =
647-
blit->SubmitLinearCopyCommand(dst, src, size, dep_signals, out_signal);
648-
649-
if (profiling_enabled() && HSA_STATUS_SUCCESS == stat) {
638+
if (profiling_enabled()) {
650639
// Track the agent so we could translate the resulting timestamp to system
651640
// domain correctly.
652641
out_signal.async_copy_agent(this);
653642
}
654643

644+
hsa_status_t stat = blit->SubmitLinearCopyCommand(dst, src, size, dep_signals, out_signal);
645+
655646
return stat;
656647
}
657648

src/core/runtime/runtime.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -113,6 +113,7 @@ hsa_status_t Runtime::Acquire() {
113113
hsa_status_t status = runtime_singleton_->Load();
114114

115115
if (status != HSA_STATUS_SUCCESS) {
116+
runtime_singleton_->ref_count_--;
116117
return HSA_STATUS_ERROR_OUT_OF_RESOURCES;
117118
}
118119
}
@@ -683,7 +684,7 @@ hsa_status_t Runtime::PtrInfo(void* ptr, hsa_amd_pointer_info_t* info, void* (*a
683684
hsa_amd_pointer_info_t retInfo;
684685

685686
// check output struct is at least as large as the first info revision.
686-
if (info->size < sizeof(struct hsa_amd_pointer_info_v1_s)) return HSA_STATUS_ERROR_INVALID_ARGUMENT;
687+
if (info->size < 40) return HSA_STATUS_ERROR_INVALID_ARGUMENT;
687688

688689
bool returnListData =
689690
((alloc != nullptr) && (num_agents_accessible != nullptr) && (accessible != nullptr));

src/core/util/lnx/os_linux.cpp

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -322,11 +322,14 @@ int ResetOsEvent(EventHandle event) {
322322
return ret_code;
323323
}
324324

325+
static double invPeriod = 0.0;
326+
325327
uint64_t ReadAccurateClock() {
328+
if (invPeriod == 0.0) AccurateClockFrequency();
326329
timespec time;
327330
int err = clock_gettime(CLOCK_MONOTONIC_RAW, &time);
328331
assert(err == 0 && "clock_gettime(CLOCK_MONOTONIC_RAW,...) failed");
329-
return uint64_t(time.tv_sec) * 1000000000ull + uint64_t(time.tv_nsec);
332+
return (uint64_t(time.tv_sec) * 1000000000ull + uint64_t(time.tv_nsec)) * invPeriod;
330333
}
331334

332335
uint64_t AccurateClockFrequency() {
@@ -339,7 +342,8 @@ uint64_t AccurateClockFrequency() {
339342
assert(time.tv_nsec < 0xFFFFFFFF &&
340343
"clock_getres(CLOCK_MONOTONIC_RAW,...) returned very low frequency "
341344
"(<1Hz).");
342-
return uint64_t(time.tv_nsec) * 1000000000ull;
345+
if (invPeriod == 0.0) invPeriod = 1.0 / double(time.tv_nsec);
346+
return 1000000000ull / uint64_t(time.tv_nsec);
343347
}
344348
}
345349

src/core/util/timer.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,8 @@ fast_clock::init::init() {
9393

9494
fast_clock::freq = double(min) / duration_in_seconds(elapsed);
9595
fast_clock::period_ps = 1e12 / fast_clock::freq;
96+
// printf("Timer setup took %f ms\n", duration_in_seconds(elapsed)*1000.0f);
97+
// printf("Fast clock frequency: %f MHz\n", double(fast_clock::freq)/1e6);
9698
}
9799

98100
double accurate_clock::period_ns;

src/inc/hsa_ext_amd.h

Lines changed: 1 addition & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1320,7 +1320,7 @@ typedef enum {
13201320
* @brief Describes a memory allocation known to ROCr.
13211321
* Within a ROCr major version this structure can only grow.
13221322
*/
1323-
typedef struct hsa_amd_pointer_info_v1_s {
1323+
typedef struct hsa_amd_pointer_info_s {
13241324
/*
13251325
Size in bytes of this structure. Used for version control within a major ROCr
13261326
revision. Set to sizeof(hsa_amd_pointer_t) prior to calling
@@ -1349,17 +1349,6 @@ typedef struct hsa_amd_pointer_info_v1_s {
13491349
Application provided value.
13501350
*/
13511351
void* userData;
1352-
} hsa_amd_pointer_info_v1_t;
1353-
1354-
/**
1355-
* @brief Minor version updates to pointer info.
1356-
*/
1357-
#ifdef __cplusplus
1358-
typedef struct hsa_amd_pointer_info_v2_s : hsa_amd_pointer_info_v1_t {
1359-
#else
1360-
typedef struct hsa_amd_pointer_info_v2_t {
1361-
struct hsa_amd_pointer_info_v1_t;
1362-
#endif
13631352
/*
13641353
Reports an agent which "owns" (ie has preferred access to) the pool in which the allocation was
13651354
made. When multiple agents share equal access to a pool (ex: multiple CPU agents, or multi-die

0 commit comments

Comments
 (0)