Skip to content

Commit

Permalink
command buffer
Browse files Browse the repository at this point in the history
  • Loading branch information
makslevental committed Oct 11, 2024
1 parent f5a6459 commit 996a7bb
Show file tree
Hide file tree
Showing 9 changed files with 141 additions and 122 deletions.
29 changes: 10 additions & 19 deletions runtime/src/iree-amd-aie/driver/xrt-lite/allocator.cc
Original file line number Diff line number Diff line change
Expand Up @@ -125,26 +125,9 @@ struct iree_hal_xrt_lite_allocator {
this->query_buffer_compatibility(&compat_params, &allocation_size);
if (!iree_all_bits_set(compatibility,
IREE_HAL_BUFFER_COMPATIBILITY_ALLOCATABLE)) {
// TODO(benvanik): make a helper for this.
#if IREE_STATUS_MODE
iree_bitfield_string_temp_t temp0, temp1, temp2;
iree_string_view_t memory_type_str =
iree_hal_memory_type_format(params->type, &temp0);
iree_string_view_t usage_str =
iree_hal_buffer_usage_format(params->usage, &temp1);
iree_string_view_t compatibility_str =
iree_hal_buffer_compatibility_format(compatibility, &temp2);
return iree_make_status(
IREE_STATUS_INVALID_ARGUMENT,
"allocator cannot allocate a buffer with the given parameters; "
"memory_type=%.*s, usage=%.*s, compatibility=%.*s",
(int)memory_type_str.size, memory_type_str.data, (int)usage_str.size,
usage_str.data, (int)compatibility_str.size, compatibility_str.data);
#else
return iree_make_status(
IREE_STATUS_INVALID_ARGUMENT,
"allocator cannot allocate a buffer with the given parameters");
#endif // IREE_STATUS_MODE
}

// TODO(null): allocate the underlying device memory.
Expand All @@ -156,10 +139,18 @@ struct iree_hal_xrt_lite_allocator {
// just wrapping those device pointers in the HAL buffer type. Other
// implementations that require more tracking can provide their own buffer
// types that do such tracking for them.
(void)this;

uint32_t flags = XCL_BO_FLAGS_HOST_ONLY;
// if (iree_all_bits_set(params->type, IREE_HAL_MEMORY_TYPE_HOST_CACHED)) {
// flags = XCL_BO_FLAGS_CACHEABLE;
// } else if (iree_all_bits_set(params->type,
// IREE_HAL_MEMORY_TYPE_OPTIMAL_FOR_DEVICE)) {
// // TODO(max): the test here isn't specific enough
// flags = XCL_BO_FLAGS_EXECBUF;
// }

std::unique_ptr<shim_xdna::bo> bo =
shim_device->alloc_bo(allocation_size, XCL_BO_FLAGS_HOST_ONLY);
shim_device->alloc_bo(allocation_size, flags);
iree_hal_buffer_t* buffer = nullptr;
iree_status_t status = iree_hal_xrt_lite_buffer_wrap(
std::move(bo), reinterpret_cast<iree_hal_allocator_t*>(this),
Expand Down
160 changes: 81 additions & 79 deletions runtime/src/iree-amd-aie/driver/xrt-lite/buffer.cc
Original file line number Diff line number Diff line change
Expand Up @@ -9,92 +9,87 @@
#include "iree-amd-aie/driver/xrt-lite/shim/linux/kmq/bo.h"
#include "iree-amd-aie/driver/xrt-lite/util.h"

struct iree_hal_xrt_lite_buffer {
iree_hal_buffer_t base;
std::unique_ptr<shim_xdna::bo> bo;
iree_hal_buffer_release_callback_t release_callback;

iree_status_t map_range(iree_hal_mapping_mode_t mapping_mode,
iree_hal_memory_access_t memory_access,
iree_device_size_t local_byte_offset,
iree_device_size_t local_byte_length,
iree_hal_buffer_mapping_t* mapping) {
IREE_RETURN_IF_ERROR(iree_hal_buffer_validate_memory_type(
iree_hal_buffer_memory_type(
reinterpret_cast<const iree_hal_buffer_t*>(this)),
IREE_HAL_MEMORY_TYPE_HOST_VISIBLE));
IREE_RETURN_IF_ERROR(iree_hal_buffer_validate_usage(
iree_hal_buffer_allowed_usage(
reinterpret_cast<const iree_hal_buffer_t*>(this)),
mapping_mode == IREE_HAL_MAPPING_MODE_PERSISTENT
? IREE_HAL_BUFFER_USAGE_MAPPING_PERSISTENT
: IREE_HAL_BUFFER_USAGE_MAPPING_SCOPED));

// TODO(null): perform mapping as described. Note that local-to-buffer range
// adjustment may be required. The resulting mapping is populated with
// standard information such as contents indicating the host addressable
// memory range of the mapped buffer and implementation-specific information
// if additional resources are required. iree_hal_buffer_emulated_map_range
// can be used by implementations that have no way of providing host
// pointers at a large cost (alloc + device->host transfer on map and
// host->device transfer + dealloc on umap). Try not to use that.
void* host_ptr = this->bo->map();
IREE_ASSERT(host_ptr !=
nullptr); // Should be guaranteed by previous checks.
uint8_t* data_ptr = (uint8_t*)host_ptr + local_byte_offset;
iree_status_t status =
this->invalidate_range(local_byte_offset, local_byte_length);
// If we mapped for discard scribble over the bytes. This is not a mandated
// behavior but it will make debugging issues easier. Alternatively for heap
// buffers we could reallocate them such that ASAN yells, but that would
// only work if the entire buffer was discarded.
iree_status_t iree_hal_xrt_lite_buffer::map_range(
iree_hal_mapping_mode_t mapping_mode,
iree_hal_memory_access_t memory_access,
iree_device_size_t local_byte_offset, iree_device_size_t local_byte_length,
iree_hal_buffer_mapping_t* mapping) {
IREE_RETURN_IF_ERROR(iree_hal_buffer_validate_memory_type(
iree_hal_buffer_memory_type(
reinterpret_cast<const iree_hal_buffer_t*>(this)),
IREE_HAL_MEMORY_TYPE_HOST_VISIBLE));
IREE_RETURN_IF_ERROR(iree_hal_buffer_validate_usage(
iree_hal_buffer_allowed_usage(
reinterpret_cast<const iree_hal_buffer_t*>(this)),
mapping_mode == IREE_HAL_MAPPING_MODE_PERSISTENT
? IREE_HAL_BUFFER_USAGE_MAPPING_PERSISTENT
: IREE_HAL_BUFFER_USAGE_MAPPING_SCOPED));

// TODO(null): perform mapping as described. Note that local-to-buffer range
// adjustment may be required. The resulting mapping is populated with
// standard information such as contents indicating the host addressable
// memory range of the mapped buffer and implementation-specific information
// if additional resources are required. iree_hal_buffer_emulated_map_range
// can be used by implementations that have no way of providing host
// pointers at a large cost (alloc + device->host transfer on map and
// host->device transfer + dealloc on umap). Try not to use that.
void* host_ptr = this->bo->map();
IREE_ASSERT(host_ptr != nullptr); // Should be guaranteed by previous checks.
uint8_t* data_ptr = (uint8_t*)host_ptr + local_byte_offset;
iree_status_t status =
this->invalidate_range(local_byte_offset, local_byte_length);
// If we mapped for discard scribble over the bytes. This is not a mandated
// behavior but it will make debugging issues easier. Alternatively for heap
// buffers we could reallocate them such that ASAN yells, but that would
// only work if the entire buffer was discarded.
#ifndef NDEBUG
if (iree_any_bit_set(memory_access, IREE_HAL_MEMORY_ACCESS_DISCARD)) {
memset(data_ptr, 0xCD, local_byte_length);
}
#endif // !NDEBUG
mapping->contents = iree_make_byte_span(data_ptr, local_byte_length);
return status;
if (iree_any_bit_set(memory_access, IREE_HAL_MEMORY_ACCESS_DISCARD)) {
memset(data_ptr, 0xCD, local_byte_length);
}
#endif // !NDEBUG
mapping->contents = iree_make_byte_span(data_ptr, local_byte_length);
return status;
}

iree_status_t unmap_range(iree_device_size_t local_byte_offset,
iree_device_size_t local_byte_length,
iree_hal_buffer_mapping_t* mapping) {
// TODO(null): reverse of map_range. Note that cache invalidation is
// explicit via invalidate_range and need not be performed here. If using
// emulated mapping this must call iree_hal_buffer_emulated_unmap_range to
// release the transient resources.
return this->flush_range(local_byte_offset, local_byte_length);
}
iree_status_t iree_hal_xrt_lite_buffer::unmap_range(
iree_device_size_t local_byte_offset, iree_device_size_t local_byte_length,
iree_hal_buffer_mapping_t* mapping) {
// TODO(null): reverse of map_range. Note that cache invalidation is
// explicit via invalidate_range and need not be performed here. If using
// emulated mapping this must call iree_hal_buffer_emulated_unmap_range to
// release the transient resources.
return this->flush_range(local_byte_offset, local_byte_length);
}

iree_status_t invalidate_range(iree_device_size_t local_byte_offset,
iree_device_size_t local_byte_length) {
// TODO(null): invalidate the range if required by the buffer. Writes on the
// device are expected to be visible to the host after this returns.
if (IREE_UNLIKELY(!this->bo)) {
return iree_make_status(
IREE_STATUS_FAILED_PRECONDITION,
"buffer does not have device memory attached and cannot be mapped");
}
this->bo->sync(shim_xdna::direction::device2host, local_byte_length,
local_byte_offset);
return iree_ok_status();
iree_status_t iree_hal_xrt_lite_buffer::invalidate_range(
iree_device_size_t local_byte_offset,
iree_device_size_t local_byte_length) {
// TODO(null): invalidate the range if required by the buffer. Writes on the
// device are expected to be visible to the host after this returns.
if (IREE_UNLIKELY(!this->bo)) {
return iree_make_status(
IREE_STATUS_FAILED_PRECONDITION,
"buffer does not have device memory attached and cannot be mapped");
}
this->bo->sync(shim_xdna::direction::device2host, local_byte_length,
local_byte_offset);
return iree_ok_status();
}

iree_status_t flush_range(iree_device_size_t local_byte_offset,
iree_device_size_t local_byte_length) {
// TODO(null): flush the range if required by the buffer. Writes on the
// host are expected to be visible to the device after this returns.
if (IREE_UNLIKELY(!this->bo)) {
return iree_make_status(
IREE_STATUS_FAILED_PRECONDITION,
"buffer does not have device memory attached and cannot be mapped");
}
this->bo->sync(shim_xdna::direction::host2device, local_byte_length,
local_byte_offset);
return iree_ok_status();
iree_status_t iree_hal_xrt_lite_buffer::flush_range(
iree_device_size_t local_byte_offset,
iree_device_size_t local_byte_length) {
// TODO(null): flush the range if required by the buffer. Writes on the
// host are expected to be visible to the device after this returns.
if (IREE_UNLIKELY(!this->bo)) {
return iree_make_status(
IREE_STATUS_FAILED_PRECONDITION,
"buffer does not have device memory attached and cannot be mapped");
}
};
this->bo->sync(shim_xdna::direction::host2device, local_byte_length,
local_byte_offset);
return iree_ok_status();
}

namespace {
extern const iree_hal_buffer_vtable_t iree_hal_xrt_lite_buffer_vtable;
Expand Down Expand Up @@ -151,6 +146,13 @@ static void iree_hal_xrt_lite_buffer_destroy(iree_hal_buffer_t* base_buffer) {
IREE_TRACE_ZONE_END(z0);
}

std::unique_ptr<shim_xdna::bo> iree_hal_xrt_lite_buffer_unwrap(
iree_hal_buffer_t* base_buffer) {
iree_hal_xrt_lite_buffer* buffer =
reinterpret_cast<iree_hal_xrt_lite_buffer*>(base_buffer);
return std::move(buffer->bo);
}

#define BUFFER_MEMBER_STATUS(member) \
MEMBER_WRAPPER_STATUS(iree_hal_buffer_t, iree_hal_xrt_lite_buffer, member)

Expand Down
26 changes: 25 additions & 1 deletion runtime/src/iree-amd-aie/driver/xrt-lite/buffer.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,28 @@
#include "iree/base/api.h"
#include "iree/hal/api.h"

// Wraps an allocation in an iree_hal_buffer_t.
struct iree_hal_xrt_lite_buffer {
iree_hal_buffer_t base;
std::unique_ptr<shim_xdna::bo> bo;
iree_hal_buffer_release_callback_t release_callback;

iree_status_t map_range(iree_hal_mapping_mode_t mapping_mode,
iree_hal_memory_access_t memory_access,
iree_device_size_t local_byte_offset,
iree_device_size_t local_byte_length,
iree_hal_buffer_mapping_t* mapping);

iree_status_t unmap_range(iree_device_size_t local_byte_offset,
iree_device_size_t local_byte_length,
iree_hal_buffer_mapping_t* mapping);

iree_status_t invalidate_range(iree_device_size_t local_byte_offset,
iree_device_size_t local_byte_length);

iree_status_t flush_range(iree_device_size_t local_byte_offset,
iree_device_size_t local_byte_length);
};

iree_status_t iree_hal_xrt_lite_buffer_wrap(
std::unique_ptr<shim_xdna::bo> bo, iree_hal_allocator_t* allocator,
iree_hal_memory_type_t memory_type, iree_hal_memory_access_t allowed_access,
Expand All @@ -20,4 +41,7 @@ iree_status_t iree_hal_xrt_lite_buffer_wrap(
iree_hal_buffer_release_callback_t release_callback,
iree_allocator_t host_allocator, iree_hal_buffer_t** out_buffer);

std::unique_ptr<shim_xdna::bo> iree_hal_xrt_lite_buffer_unwrap(
iree_hal_buffer_t* base_buffer);

#endif // IREE_HAL_DRIVERS_XRT_LITE_BUFFER_H_
13 changes: 10 additions & 3 deletions runtime/src/iree-amd-aie/driver/xrt-lite/command_buffer.cc
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,11 @@

#include "iree-amd-aie/driver/xrt-lite/command_buffer.h"

#include "buffer.h"
#include "iree-amd-aie/driver/xrt-lite/util.h"
#include "shim/linux/kmq/bo.h"

#define MAX_EXEC_BO_SIZE (4096)

namespace {
extern const iree_hal_command_buffer_vtable_t
Expand All @@ -16,6 +20,7 @@ extern const iree_hal_command_buffer_vtable_t
struct iree_hal_xrt_lite_command_buffer {
iree_hal_command_buffer_t base;
iree_allocator_t host_allocator;
iree_hal_buffer_t* exec_buffer;

iree_status_t begin() {
// TODO(null): if the implementation needs to route the begin to the
Expand Down Expand Up @@ -252,8 +257,10 @@ iree_status_t iree_hal_xrt_lite_command_buffer_create(
// iree_arena_t/block pools. Implementations should also retain any resources
// used during the recording and can use iree_hal_resource_set_t* to make that
// easier.
iree_status_t status = iree_make_status(
IREE_STATUS_UNIMPLEMENTED, "command buffers not yet implemented");
iree_hal_buffer_params_t params;
params.type = IREE_HAL_MEMORY_TYPE_OPTIMAL_FOR_DEVICE;
iree_status_t status = iree_hal_allocator_allocate_buffer(
device_allocator, params, MAX_EXEC_BO_SIZE, &command_buffer->exec_buffer);

if (iree_status_is_ok(status)) {
*out_command_buffer = &command_buffer->base;
Expand All @@ -273,7 +280,7 @@ static void iree_hal_xrt_lite_command_buffer_destroy(

// TODO(null): release any implementation resources and
// iree_hal_resource_set_t.

iree_hal_buffer_destroy(command_buffer->exec_buffer);
iree_allocator_free(host_allocator, command_buffer);

IREE_TRACE_ZONE_END(z0);
Expand Down
23 changes: 8 additions & 15 deletions runtime/src/iree-amd-aie/driver/xrt-lite/executable.cc
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,6 @@
#include "iree-amd-aie/schemas/xrt_executable_def_verifier.h"
#include "iree/base/api.h"

#define MAX_EXEC_BO_SIZE (4096)

struct iree_hal_xrt_lite_native_executable_t {
// Abstract resource used for injecting reference counting and vtable; must be
// at offset 0.
Expand All @@ -33,7 +31,7 @@ extern const iree_hal_executable_vtable_t
static iree_hal_xrt_lite_native_executable_t*
iree_hal_xrt_lite_native_executable_cast(iree_hal_executable_t* base_value) {
IREE_HAL_ASSERT_TYPE(base_value, &iree_hal_xrt_lite_native_executable_vtable);
return (iree_hal_xrt_lite_native_executable_t*)base_value;
return reinterpret_cast<iree_hal_xrt_lite_native_executable_t*>(base_value);
}

// Verifies the structure of the flatbuffer so that we can avoid doing so during
Expand Down Expand Up @@ -151,12 +149,11 @@ iree_status_t iree_hal_xrt_lite_native_executable_create(
entry_point_count * sizeof(executable->entry_points[0]) +
total_entry_point_name_chars;
IREE_RETURN_AND_END_ZONE_IF_ERROR(
z0,
iree_allocator_malloc(host_allocator, total_size, (void**)&executable));
IREE_TRACE(
char* string_table_buffer =
(char*)((char*)executable + sizeof(*executable) +
entry_point_count * sizeof(executable->entry_points[0])));
z0, iree_allocator_malloc(host_allocator, total_size,
reinterpret_cast<void**>(&executable)));
IREE_TRACE(char* string_table_buffer = reinterpret_cast<char*>(
reinterpret_cast<char*>(executable) + sizeof(*executable) +
entry_point_count * sizeof(executable->entry_points[0])));

iree_hal_resource_initialize(&iree_hal_xrt_lite_native_executable_vtable,
&executable->resource);
Expand All @@ -180,8 +177,7 @@ iree_status_t iree_hal_xrt_lite_native_executable_create(
std::vector<char> xclbinVector(
xclbin_fb, xclbin_fb + flatbuffers_string_len(xclbin_fb));
xrt::xclbin xclbin = xrt::xclbin(xclbinVector);
std::unique_ptr<shim_xdna::hw_ctx> hw_ctx =
shim_device->create_hw_context(xclbin);
params->context = shim_device->create_hw_context(xclbin);

uint32_t asm_instr_index =
flatbuffers_uint32_vec_at(asm_instr_indices_vec, entry_ordinal);
Expand All @@ -194,9 +190,6 @@ iree_status_t iree_hal_xrt_lite_native_executable_create(
size_t ctrl_code_size = num_instr * sizeof(uint32_t);
params->bo_ctrl_code =
shim_device->alloc_bo(ctrl_code_size, XCL_BO_FLAGS_CACHEABLE);
params->bo_exec_buf =
shim_device->alloc_bo(MAX_EXEC_BO_SIZE, XCL_BO_FLAGS_EXECBUF);

uint32_t* instr_buffer =
static_cast<uint32_t*>(params->bo_ctrl_code->map());
memcpy(instr_buffer, asm_inst, ctrl_code_size);
Expand Down Expand Up @@ -231,7 +224,7 @@ iree_status_t iree_hal_xrt_lite_native_executable_create(
});
}

*out_executable = (iree_hal_executable_t*)executable;
*out_executable = reinterpret_cast<iree_hal_executable_t*>(executable);
IREE_TRACE_ZONE_END(z0);
return iree_ok_status();
}
Expand Down
3 changes: 1 addition & 2 deletions runtime/src/iree-amd-aie/driver/xrt-lite/executable.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@ extern "C" {
struct iree_hal_xrt_lite_kernel_params_t {
std::unique_ptr<shim_xdna::hw_ctx> context;
std::unique_ptr<shim_xdna::bo> bo_ctrl_code;
std::unique_ptr<shim_xdna::bo> bo_exec_buf;
// Number of assembly instructions argument to the kernel
uint32_t num_instr; // number of instructions
IREE_TRACE(iree_string_view_t kernel_name;)
Expand All @@ -45,7 +44,7 @@ iree_status_t iree_hal_xrt_lite_native_executable_entry_point_kernel_params(
iree_hal_xrt_lite_kernel_params_t* out_params);

#ifdef __cplusplus
} // extern "C"
} // extern "C"
#endif // __cplusplus

#endif // IREE_AMD_AIE_DRIVER_XRT_LITE_NATIVE_EXECUTABLE_H_
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ iree_status_t iree_hal_xrt_lite_nop_executable_cache_create(
iree_hal_executable_cache_t** out_executable_cache);

#ifdef __cplusplus
} // extern "C"
} // extern "C"
#endif // __cplusplus

#endif // IREE_AMD_AIE_DRIVER_XRT_LITE_NOP_EXECUTABLE_CACHE_H_
Loading

0 comments on commit 996a7bb

Please sign in to comment.