Skip to content

Commit

Permalink
ROCm 1.8.0 updates
Browse files Browse the repository at this point in the history
  • Loading branch information
jedwards-AMD committed May 9, 2018
1 parent 0e8046e commit 36f9c49
Show file tree
Hide file tree
Showing 35 changed files with 1,517 additions and 993 deletions.
163 changes: 11 additions & 152 deletions src/core/inc/amd_aql_queue.h
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ namespace amd {
/// @brief Encapsulates HW Aql Command Processor functionality. It
/// provide the interface for things such as Doorbell register, read,
/// write pointers and a buffer.
class AqlQueue : public core::Queue, private core::LocalSignal, public core::Signal {
class AqlQueue : public core::Queue, private core::LocalSignal, public core::DoorbellSignal {
public:
static __forceinline bool IsType(core::Signal* signal) {
return signal->IsType(&rtti_id_);
Expand Down Expand Up @@ -183,164 +183,15 @@ class AqlQueue : public core::Queue, private core::LocalSignal, public core::Sig
// @brief Submits a block of PM4 and waits until it has been executed.
void ExecutePM4(uint32_t* cmd_data, size_t cmd_size_b) override;

/// @brief This operation is illegal
hsa_signal_value_t LoadRelaxed() override {
assert(false);
return 0;
}

/// @brief This operation is illegal
hsa_signal_value_t LoadAcquire() override {
assert(false);
return 0;
}

/// @brief Update signal value using Relaxed semantics
void StoreRelaxed(hsa_signal_value_t value) override;

/// @brief Update signal value using Release semantics
void StoreRelease(hsa_signal_value_t value) override;

/// @brief This operation is illegal
hsa_signal_value_t WaitRelaxed(hsa_signal_condition_t condition, hsa_signal_value_t compare_value,
uint64_t timeout, hsa_wait_state_t wait_hint) override {
assert(false);
return 0;
}

/// @brief This operation is illegal
hsa_signal_value_t WaitAcquire(hsa_signal_condition_t condition, hsa_signal_value_t compare_value,
uint64_t timeout, hsa_wait_state_t wait_hint) override {
assert(false);
return 0;
}

/// @brief This operation is illegal
void AndRelaxed(hsa_signal_value_t value) override { assert(false); }

/// @brief This operation is illegal
void AndAcquire(hsa_signal_value_t value) override { assert(false); }

/// @brief This operation is illegal
void AndRelease(hsa_signal_value_t value) override { assert(false); }

/// @brief This operation is illegal
void AndAcqRel(hsa_signal_value_t value) override { assert(false); }

/// @brief This operation is illegal
void OrRelaxed(hsa_signal_value_t value) override { assert(false); }

/// @brief This operation is illegal
void OrAcquire(hsa_signal_value_t value) override { assert(false); }

/// @brief This operation is illegal
void OrRelease(hsa_signal_value_t value) override { assert(false); }

/// @brief This operation is illegal
void OrAcqRel(hsa_signal_value_t value) override { assert(false); }

/// @brief This operation is illegal
void XorRelaxed(hsa_signal_value_t value) override { assert(false); }

/// @brief This operation is illegal
void XorAcquire(hsa_signal_value_t value) override { assert(false); }

/// @brief This operation is illegal
void XorRelease(hsa_signal_value_t value) override { assert(false); }

/// @brief This operation is illegal
void XorAcqRel(hsa_signal_value_t value) override { assert(false); }

/// @brief This operation is illegal
void AddRelaxed(hsa_signal_value_t value) override { assert(false); }

/// @brief This operation is illegal
void AddAcquire(hsa_signal_value_t value) override { assert(false); }

/// @brief This operation is illegal
void AddRelease(hsa_signal_value_t value) override { assert(false); }

/// @brief This operation is illegal
void AddAcqRel(hsa_signal_value_t value) override { assert(false); }

/// @brief This operation is illegal
void SubRelaxed(hsa_signal_value_t value) override { assert(false); }

/// @brief This operation is illegal
void SubAcquire(hsa_signal_value_t value) override { assert(false); }

/// @brief This operation is illegal
void SubRelease(hsa_signal_value_t value) override { assert(false); }

/// @brief This operation is illegal
void SubAcqRel(hsa_signal_value_t value) override { assert(false); }

/// @brief This operation is illegal
hsa_signal_value_t ExchRelaxed(hsa_signal_value_t value) override {
assert(false);
return 0;
}

/// @brief This operation is illegal
hsa_signal_value_t ExchAcquire(hsa_signal_value_t value) override {
assert(false);
return 0;
}

/// @brief This operation is illegal
hsa_signal_value_t ExchRelease(hsa_signal_value_t value) override {
assert(false);
return 0;
}

/// @brief This operation is illegal
hsa_signal_value_t ExchAcqRel(hsa_signal_value_t value) override {
assert(false);
return 0;
}

/// @brief This operation is illegal
hsa_signal_value_t CasRelaxed(hsa_signal_value_t expected, hsa_signal_value_t value) override {
assert(false);
return 0;
}

/// @brief This operation is illegal
hsa_signal_value_t CasAcquire(hsa_signal_value_t expected, hsa_signal_value_t value) override {
assert(false);
return 0;
}

/// @brief This operation is illegal
hsa_signal_value_t CasRelease(hsa_signal_value_t expected, hsa_signal_value_t value) override {
assert(false);
return 0;
}

/// @brief This operation is illegal
hsa_signal_value_t CasAcqRel(hsa_signal_value_t expected, hsa_signal_value_t value) override {
assert(false);
return 0;
}

/// @brief This operation is illegal
hsa_signal_value_t* ValueLocation() const override {
assert(false);
return NULL;
}

/// @brief This operation is illegal
HsaEvent* EopEvent() override {
assert(false);
return NULL;
}

protected:
bool _IsA(Queue::rtti_t id) const override { return id == &rtti_id_; }

/// @brief Disallow destroying doorbell apart from its queue.
void doDestroySignal() override { assert(false); }

private:
uint32_t ComputeRingBufferMinPkts();
uint32_t ComputeRingBufferMaxPkts();
Expand All @@ -353,12 +204,16 @@ class AqlQueue : public core::Queue, private core::LocalSignal, public core::Sig
void CloseRingBufferFD(const char* ring_buf_shm_path, int fd) const;
int CreateRingBufferFD(const char* ring_buf_shm_path, uint32_t ring_buf_phys_size_bytes) const;

static bool DynamicScratchHandler(hsa_signal_value_t error_code, void* arg);

/// @brief Define the Scratch Buffer Descriptor and related parameters
/// that enable kernel access scratch memory
void InitScratchSRD();

/// @brief Halt the queue without destroying it or fencing memory.
void Suspend();

/// @brief Handler for hardware queue events.
static bool DynamicScratchHandler(hsa_signal_value_t error_code, void* arg);

// AQL packet ring buffer
void* ring_buf_;

Expand Down Expand Up @@ -395,6 +250,10 @@ class AqlQueue : public core::Queue, private core::LocalSignal, public core::Sig
uint32_t pm4_ib_size_b_;
KernelMutex pm4_ib_mutex_;

// Error handler control variable.
std::atomic<uint32_t> dynamicScratchState;
enum { ERROR_HANDLER_DONE = 1, ERROR_HANDLER_TERMINATE = 2, ERROR_HANDLER_SCRATCH_RETRY = 4 };

// Shared event used for queue errors
static HsaEvent* queue_event_;

Expand Down
12 changes: 5 additions & 7 deletions src/core/inc/amd_blit_kernel.h
Original file line number Diff line number Diff line change
Expand Up @@ -76,24 +76,26 @@ class BlitKernel : public core::Blit {
/// @brief Submit an AQL packet to perform vector copy. The call is blocking
/// until the command execution is finished.
///
/// @param p2p true if it is a peer-to-peer copy
/// @param dst Memory address of the copy destination.
/// @param src Memory address of the copy source.
/// @param size Size of the data to be copied.
virtual hsa_status_t SubmitLinearCopyCommand(void* dst, const void* src,
size_t size) override;
virtual hsa_status_t SubmitLinearCopyCommand(bool p2p, void* dst,
const void* src, size_t size) override;

/// @brief Submit a linear copy command to the the underlying compute device's
/// control block. The call is non blocking. The memory transfer will start
/// after all dependent signals are satisfied. After the transfer is
/// completed, the out signal will be decremented.
///
/// @param p2p true if it is a peer-to-peer copy
/// @param dst Memory address of the copy destination.
/// @param src Memory address of the copy source.
/// @param size Size of the data to be copied.
/// @param dep_signals Arrays of dependent signal.
/// @param out_signal Output signal.
virtual hsa_status_t SubmitLinearCopyCommand(
void* dst, const void* src, size_t size,
bool p2p, void* dst, const void* src, size_t size,
std::vector<core::Signal*>& dep_signals,
core::Signal& out_signal) override;

Expand Down Expand Up @@ -152,10 +154,6 @@ class BlitKernel : public core::Blit {
/// packet processor doesn't get invalid packet.
void ReleaseWriteIndex(uint64_t write_index, uint32_t num_packet);

/// Wait until all packets are finished.
hsa_status_t FenceRelease(uint64_t write_index, uint32_t num_copy_packet,
hsa_fence_scope_t fence);

void PopulateQueue(uint64_t index, uint64_t code_handle, void* args,
uint32_t grid_size_x, hsa_signal_t completion_signal);

Expand Down
59 changes: 36 additions & 23 deletions src/core/inc/amd_blit_sdma.h
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@ class BlitSdmaBase : public core::Blit {
static const size_t kCopyPacketSize;
static const size_t kMaxSingleCopySize;
static const size_t kMaxSingleFillSize;
virtual bool isSDMA() const override { return true; }
};

// RingIndexTy: 32/64-bit monotonic ring index, counting in bytes.
Expand All @@ -69,7 +70,7 @@ class BlitSdmaBase : public core::Blit {
template <typename RingIndexTy, bool HwIndexMonotonic, int SizeToCountOffset>
class BlitSdma : public BlitSdmaBase {
public:
explicit BlitSdma();
explicit BlitSdma(bool copy_direction);

virtual ~BlitSdma() override;

Expand All @@ -94,24 +95,26 @@ class BlitSdma : public BlitSdmaBase {

/// @brief Submit a linear copy command to the queue buffer.
///
/// @param p2p true if it is a peer-to-peer copy
/// @param dst Memory address of the copy destination.
/// @param src Memory address of the copy source.
/// @param size Size of the data to be copied.
virtual hsa_status_t SubmitLinearCopyCommand(void* dst, const void* src,
size_t size) override;
virtual hsa_status_t SubmitLinearCopyCommand(bool p2p, void* dst,
const void* src, size_t size) override;

/// @brief Submit a linear copy command to the the underlying compute device's
/// control block. The call is non blocking. The memory transfer will start
/// after all dependent signals are satisfied. After the transfer is
/// completed, the out signal will be decremented.
///
/// @param p2p true if it is a peer-to-peer copy
/// @param dst Memory address of the copy destination.
/// @param src Memory address of the copy source.
/// @param size Size of the data to be copied.
/// @param dep_signals Arrays of dependent signal.
/// @param out_signal Output signal.
virtual hsa_status_t SubmitLinearCopyCommand(
void* dst, const void* src, size_t size,
bool p2p, void* dst, const void* src, size_t size,
std::vector<core::Signal*>& dep_signals,
core::Signal& out_signal) override;

Expand All @@ -125,7 +128,7 @@ class BlitSdma : public BlitSdmaBase {

virtual hsa_status_t EnableProfiling(bool enable) override;

protected:
private:
/// @brief Acquires the address into queue buffer where a new command
/// packet of specified size could be written. The address that is
/// returned is guaranteed to be unique even in a multi-threaded access
Expand Down Expand Up @@ -170,6 +173,9 @@ class BlitSdma : public BlitSdmaBase {
void BuildFenceCommand(char* fence_command_addr, uint32_t* fence,
uint32_t fence_value);

/// @brief Build Hdp Flush command
void BuildHdpFlushCommand(char* cmd_addr);

uint32_t* ObtainFenceObject();

void WaitFence(uint32_t* fence, uint32_t fence_value);
Expand Down Expand Up @@ -204,19 +210,25 @@ class BlitSdma : public BlitSdmaBase {
RingIndexTy cached_reserve_index_;
RingIndexTy cached_commit_index_;

uint32_t linear_copy_command_size_;
static const uint32_t linear_copy_command_size_;

static const uint32_t fill_command_size_;

static const uint32_t fence_command_size_;

uint32_t fill_command_size_;
static const uint32_t poll_command_size_;

uint32_t fence_command_size_;
static const uint32_t flush_command_size_;

uint32_t poll_command_size_;
static const uint32_t atomic_command_size_;

uint32_t atomic_command_size_;
static const uint32_t timestamp_command_size_;

uint32_t timestamp_command_size_;
static const uint32_t trap_command_size_;

uint32_t trap_command_size_;
// Flag to indicate if sDMA queue is used for H2D copy operations
// true if used for H2D operations, false otherwise
const bool sdma_h2d_;

// Max copy size of a single linear copy command packet.
size_t max_single_linear_copy_size_;
Expand All @@ -232,19 +244,20 @@ class BlitSdma : public BlitSdmaBase {

/// True if platform atomic is supported.
bool platform_atomic_support_;

/// True if sDMA supports HDP flush
bool hdp_flush_support_;
};

class BlitSdmaV2V3
// Ring indices are 32-bit.
// HW ring indices are not monotonic (wrap at end of ring).
// Count fields of SDMA commands are 0-based.
: public BlitSdma<uint32_t, false, 0> {};

class BlitSdmaV4
// Ring indices are 64-bit.
// HW ring indices are monotonic (do not wrap at end of ring).
// Count fields of SDMA commands are 1-based.
: public BlitSdma<uint64_t, true, -1> {};
// Ring indices are 32-bit.
// HW ring indices are not monotonic (wrap at end of ring).
// Count fields of SDMA commands are 0-based.
typedef BlitSdma<uint32_t, false, 0> BlitSdmaV2V3;

// Ring indices are 64-bit.
// HW ring indices are monotonic (do not wrap at end of ring).
// Count fields of SDMA commands are 1-based.
typedef BlitSdma<uint64_t, true, -1> BlitSdmaV4;

} // namespace amd

Expand Down
1 change: 1 addition & 0 deletions src/core/inc/amd_elf_image.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -207,6 +207,7 @@ namespace amd {

virtual uint16_t Machine() = 0;
virtual uint16_t Type() = 0;
virtual uint32_t EFlags() = 0;

std::string output() { return out.str(); }

Expand Down
Loading

0 comments on commit 36f9c49

Please sign in to comment.