Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add resize kernel for c++ for opencl #260

Merged
merged 6 commits into from
Nov 26, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 1 addition & 10 deletions include/nmtools/array/eval/kernel_helper.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
#include "nmtools/array/view/flatten.hpp"
#include "nmtools/array/view/mutable_flatten.hpp"
#include "nmtools/array/view/reshape.hpp"
#include "nmtools/utility/unwrap.hpp"

#ifdef NMTOOLS_KERNEL_MAX_DIM
#define NMTOOLS_KERNEL_MAX_DIM_ NMTOOLS_KERNEL_MAX_DIM
Expand All @@ -25,16 +26,6 @@ namespace nmtools::array
{
struct create_vector_t {};

template <typename T>
constexpr auto unwrap(const T& t)
{
if constexpr (meta::is_maybe_v<T>) {
return *t;
} else {
return t;
}
}

template <auto DIM=0, typename size_type=nm_index_t, typename type>
nmtools_func_attribute
auto create_vector(const type* data_ptr, size_type dim)
Expand Down
19 changes: 16 additions & 3 deletions include/nmtools/array/eval/opencl/kernel_helper.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ namespace nmtools::array::opencl
auto assign_vector(mutable_vector_t& lhs, const vector_t& rhs)
{
auto size = nmtools::size(lhs);
for (nm_cl_index_t i=0; i<(nm_cl_index_t)size; i++) {
for (nm_cl_size_t i=0; i<(nm_cl_size_t)size; i++) {
at(lhs,i) = at(rhs,i);
}
}
Expand All @@ -48,11 +48,24 @@ namespace nmtools::array::opencl
template <typename mutable_array_t, typename array_t>
auto assign_array(mutable_array_t& output, const array_t& input)
{
auto valid = [&](){
if constexpr (meta::is_maybe_v<array_t>) {
return static_cast<bool>(input);
} else {
return true;
}
}();
auto size = nmtools::size(output);
auto idx = get_global_id(0);
if ((nm_cl_size_t)idx < (nm_cl_size_t)size) {
if (((nm_cl_size_t)idx < (nm_cl_size_t)size) && valid) {
auto flat_lhs = view::mutable_flatten(output);
auto flat_rhs = view::flatten(input);
auto flat_rhs = [&](){
if constexpr (meta::is_maybe_v<array_t>) {
return view::flatten(*input);
} else {
return view::flatten(input);
}
}();
flat_lhs((nm_cl_index_t)idx) = flat_rhs((nm_cl_index_t)idx);
}
}
Expand Down
18 changes: 14 additions & 4 deletions include/nmtools/array/eval/opencl/kernels/repeat.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ kernel void nmtools_cl_kernel_name(out_type,inp_type) \
, const nm_cl_size_t out_dim \
, const nm_cl_size_t inp_dim \
, const nm_cl_size_t repeats_size \
, const nm_cl_size_t axis \
, const nm_cl_index_t axis \
) \
{ \
auto repeats = na::create_vector(repeats_ptr,repeats_size); \
Expand Down Expand Up @@ -93,8 +93,8 @@ namespace nmtools::array::opencl
auto inp_buffer = context->create_buffer(inp_array);
auto out_buffer = context->create_buffer<out_t>(nmtools::size(output));

uint32_t repeats_size = nmtools::len(view.repeats);
uint32_t axis = view.axis;
nm_cl_size_t repeats_size = nmtools::len(view.repeats);
nm_cl_size_t axis = view.axis;

auto kernel_name = this->kernel_name<inp_t,out_t>();

Expand Down Expand Up @@ -122,7 +122,17 @@ namespace nmtools::array::opencl
auto local_size = nmtools_array{kernel_info->preferred_work_group_size_multiple};
auto global_size = nmtools_array{size_t(std::ceil(float(out_size) / local_size[0])) * local_size[0]};

auto default_args = nmtools_tuple{out_buffer,inp_buffer,out_shape_buffer,inp_shape_buffer,repeats_buffer,index::cast<nm_cl_index_t>(out_dim),index::cast<nm_cl_index_t>(inp_dim),index::cast<nm_cl_index_t>(repeats_size),index::cast<nm_cl_index_t>(axis)};
auto default_args = nmtools_tuple{
out_buffer
, inp_buffer
, out_shape_buffer
, inp_shape_buffer
, repeats_buffer
, index::cast<nm_cl_size_t>(out_dim)
, index::cast<nm_cl_size_t>(inp_dim)
, index::cast<nm_cl_size_t>(repeats_size)
, index::cast<nm_cl_index_t>(axis)
};

context->set_args(kernel,default_args);
context->run(kernel,out_buffer,output,global_size,local_size);
Expand Down
140 changes: 140 additions & 0 deletions include/nmtools/array/eval/opencl/kernels/resize.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,140 @@
#ifndef NMTOOLS_ARRAY_EVAL_OPENCL_KERNELS_RESIZE_HPP
#define NMTOOLS_ARRAY_EVAL_OPENCL_KERNELS_RESIZE_HPP

#include "nmtools/array/ndarray.hpp"
#include "nmtools/array/view/resize.hpp"
#include "nmtools/array/eval/opencl/kernel_helper.hpp"
#include "nmtools/array/index/cast.hpp"

#define nmtools_cl_kernel_name(out_type,inp_type) resize##_##out_type##_##inp_type
#define nmtools_cl_kernel_name_str(out_type,inp_type) nm_stringify(resize##_##out_type##_##inp_type)

#ifdef NMTOOLS_OPENCL_BUILD_KERNELS

namespace nm = nmtools;
namespace na = nmtools::array;
namespace ix = nmtools::index;
namespace view = nmtools::view;
namespace meta = nmtools::meta;
namespace opencl = nmtools::array::opencl;
namespace detail = nmtools::view::detail;

#define nmtools_cl_kernel(out_type,inp_type) \
kernel void nmtools_cl_kernel_name(out_type,inp_type) \
( global out_type* out_ptr \
, global const inp_type* inp_ptr \
, global const nm_cl_index_t* out_shape_ptr \
, global const nm_cl_index_t* inp_shape_ptr \
, global const nm_cl_index_t* dst_shape_ptr \
, const nm_cl_size_t out_dim \
, const nm_cl_size_t inp_dim \
, const nm_cl_size_t dst_size \
) \
{ \
auto dst_shape = na::create_vector(dst_shape_ptr,dst_size); \
auto input = na::create_array(inp_ptr,inp_shape_ptr,inp_dim); \
auto output = na::create_mutable_array(out_ptr,out_shape_ptr,out_dim); \
auto resized = view::resize(input,dst_shape); \
opencl::assign_array(output,resized); \
}

nmtools_cl_kernel(float,float)
nmtools_cl_kernel(double,double)

#else // NMTOOLS_OPENCL_BUILD_KERNELS

#include "nmtools/array/eval/opencl/context.hpp"
#include <cstring> // memcpy

extern unsigned char nm_cl_resize_spv[];
extern unsigned int nm_cl_resize_spv_len;

namespace nmtools::array::opencl
{
template <typename...args_t>
struct kernel_t<
view::decorator_t<view::resize_t,args_t...>
> {
using view_t = view::decorator_t<view::resize_t,args_t...>;

view_t view;
std::shared_ptr<context_t> context;

static auto get_spirv()
{
using vector = nmtools_list<unsigned char>;
auto spirv = vector();
spirv.resize(nm_cl_resize_spv_len);
memcpy(spirv.data(),nm_cl_resize_spv,sizeof(unsigned char) * nm_cl_resize_spv_len);
return spirv;
}

template <typename inp_t, typename out_t=inp_t>
static auto kernel_name()
{
if constexpr (meta::is_same_v<inp_t,float> && meta::is_same_v<out_t,float>) {
return nmtools_cl_kernel_name_str(float,float);
} else if constexpr (meta::is_same_v<inp_t,double> && meta::is_same_v<out_t,double>) {
return nmtools_cl_kernel_name_str(double,double);
}
}

template <typename output_t>
auto eval(output_t& output)
{
using out_t = meta::get_element_type_t<output_t>;

const auto& inp_array = *get_array(view);

using inp_t = meta::get_element_type_t<meta::remove_cvref_pointer_t<decltype(inp_array)>>;

auto inp_buffer = context->create_buffer(inp_array);
auto out_buffer = context->create_buffer<out_t>(nmtools::size(output));

auto kernel_name = this->kernel_name<inp_t,out_t>();

if (!context->has_kernel(kernel_name)) {
context->create_kernel(get_spirv(),kernel_name);
}

auto kernel = context->get_kernel(kernel_name);

auto out_size = nmtools::size(output);
[[maybe_unused]] auto inp_size = nmtools::size(inp_array);

auto out_shape = nmtools::shape(output);
auto inp_shape = nmtools::shape(inp_array);
auto dst_shape = view.dst_shape;

auto out_shape_buffer = context->create_buffer(index::cast<nm_cl_index_t>(out_shape));
auto inp_shape_buffer = context->create_buffer(index::cast<nm_cl_index_t>(inp_shape));
auto dst_shape_buffer = context->create_buffer(index::cast<nm_cl_index_t>(dst_shape));

auto out_dim = nmtools::len(out_shape);
auto inp_dim = nmtools::len(inp_shape);
auto dst_len = nmtools::len(dst_shape);

auto kernel_info = kernel.kernel_info_;
auto local_size = nmtools_array{kernel_info->preferred_work_group_size_multiple};
auto global_size = nmtools_array{size_t(std::ceil(float(out_size) / local_size[0])) * local_size[0]};

auto default_args = nmtools_tuple{
out_buffer
, inp_buffer
, out_shape_buffer
, inp_shape_buffer
, dst_shape_buffer
, (nm_cl_size_t)out_dim
, (nm_cl_size_t)inp_dim
, (nm_cl_size_t)dst_len
};

context->set_args(kernel,default_args);
context->run(kernel,out_buffer,output,global_size,local_size);
}
};
}

#endif // NMTOOLS_OPENCL_BUILD_KERNELS

#endif // NMTOOLS_ARRAY_EVAL_OPENCL_KERNELS_RESIZE_HPP
Loading