Skip to content

Commit

Permalink
Added skeleton of batch based GPU assignment
Browse files Browse the repository at this point in the history
  • Loading branch information
spectre-ns committed Jan 4, 2025
1 parent ae52796 commit 1904331
Show file tree
Hide file tree
Showing 9 changed files with 220 additions and 16 deletions.
1 change: 1 addition & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,7 @@ set(XTENSOR_HEADERS
${XTENSOR_INCLUDE_DIR}/xtensor/xcomplex.hpp
${XTENSOR_INCLUDE_DIR}/xtensor/xcontainer.hpp
${XTENSOR_INCLUDE_DIR}/xtensor/xcsv.hpp
${XTENSOR_INCLUDE_DIR}/xtensor/xdevice.hpp
${XTENSOR_INCLUDE_DIR}/xtensor/xdynamic_view.hpp
${XTENSOR_INCLUDE_DIR}/xtensor/xeval.hpp
${XTENSOR_INCLUDE_DIR}/xtensor/xexception.hpp
Expand Down
14 changes: 13 additions & 1 deletion include/xtensor/xassign.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -168,6 +168,17 @@ namespace xt
static void run_impl(E1& e1, const E2& e2, std::false_type);
};

class device_assigner
{
public:

template <class E1, class E2>
static void run(E1& e1, const E2& e2)
{
e1.store_device(e2.load_device());
}
};

/*************************
* strided_loop_assigner *
*************************/
Expand Down Expand Up @@ -463,7 +474,8 @@ namespace xt
// in compilation error for expressions that do not provide a SIMD interface.
// simd_assign is true if simd_linear_assign() or simd_linear_assign(de1, de2)
// is true.
linear_assigner<simd_assign>::run(de1, de2);
//linear_assigner<simd_assign>::run(de1, de2);
device_assigner::run(de1, de2);
}
else
{
Expand Down
16 changes: 16 additions & 0 deletions include/xtensor/xcontainer.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
#include "xstrides.hpp"
#include "xtensor_config.hpp"
#include "xtensor_forward.hpp"
#include "xdevice.hpp"

namespace xt
{
Expand Down Expand Up @@ -112,6 +113,8 @@ namespace xt
using reverse_linear_iterator = typename iterable_base::reverse_linear_iterator;
using const_reverse_linear_iterator = typename iterable_base::const_reverse_linear_iterator;

using container_device_return_type_t = host_device_batch<value_type>;

static_assert(static_layout != layout_type::any, "Container layout can never be layout_type::any!");

size_type size() const noexcept;
Expand Down Expand Up @@ -187,6 +190,19 @@ namespace xt
container_simd_return_type_t<storage_type, value_type, requested_type>
/*simd_return_type<requested_type>*/ load_simd(size_type i) const;

template<class device_batch>
void store_device(device_batch&& e)
{
//check length matching
e.store_host(storage().data());
}

container_device_return_type_t load_device() const
{
auto ptr = data();
return container_device_return_type_t(ptr, size());
}

linear_iterator linear_begin() noexcept;
linear_iterator linear_end() noexcept;

Expand Down
86 changes: 86 additions & 0 deletions include/xtensor/xdevice.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
#ifndef XTENSOR_DEVICE_HPP
#define XTENSOR_DEVICE_HPP

#include <memory>
#include <algorithm>
#include <functional>
#include <vector>

namespace xt{
namespace detail{

}
/**
* Device implementation for the various operations. All device specific code goes in here disabled via macro
* for invalid syntax which might be needed for Sycl or CUDA.
*/
//#ifdef XTENSOR_DEVICE_ASSIGN
template<class T>
class host_device_batch
{
public:
host_device_batch(const T* ptr, std::size_t size)
{
//copy the data to the device
//CUDA Impl = Nearly identical
m_data.resize(size);
std::copy(ptr, ptr + size, std::begin(m_data));
}
template<class A>
host_device_batch& operator+(const host_device_batch<A>& rhs)
{
//CUDA impl = thrust::transform(m_data.begin(), m_data.end(), rhs.m_data().begin(), m_data.end(), thrust::plus<T>{});
std::transform(std::begin(m_data), std::end(m_data), std::begin(rhs.m_data), std::begin(m_data), std::plus<T>{});
return *this;
}
template<class A>
host_device_batch& operator-(const host_device_batch<A>& rhs)
{
std::transform(std::begin(m_data), std::end(m_data), std::begin(rhs.m_data), std::begin(m_data), std::minus<T>{});
return *this;
}
template<class A>
host_device_batch& operator*(const host_device_batch<A>& rhs)
{
std::transform(std::begin(m_data), std::end(m_data), std::begin(rhs.m_data), std::begin(m_data), std::multiplies<T>{});
return *this;
}
template<class A>
host_device_batch& operator/(const host_device_batch<A>& rhs)
{
std::transform(std::begin(m_data), std::end(m_data), std::begin(rhs.m_data), std::begin(m_data), std::divides<T>{});
return *this;
}
void store_host(T* dst)
{
std::copy(std::begin(m_data), std::end(m_data), dst);
}
private:
//CUDA impl = thrust::device_vector<T> m_data;
std::vector<T> m_data;
};
//#endif

// template<class T>
// class cuda_device_batch : public batch<host_device_batch<T>>
// {
// public:

// };

// template<class T>
// class intel_device_batch : public batch<host_device_batch<T>>
// {
// public:

// };

// template<class T>
// class opencl_device_batch : public batch<host_device_batch<T>>
// {
// public:

// };
}

#endif
20 changes: 20 additions & 0 deletions include/xtensor/xfunction.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
#include "xstrides.hpp"
#include "xtensor_simd.hpp"
#include "xutils.hpp"
#include "xdevice.hpp"

namespace xt
{
Expand Down Expand Up @@ -283,6 +284,7 @@ namespace xt
using const_iterator = typename iterable_base::const_iterator;
using reverse_iterator = typename iterable_base::reverse_iterator;
using const_reverse_iterator = typename iterable_base::const_reverse_iterator;
using device_return_type = host_device_batch<value_type>;

template <class Func, class... CTA, class U = std::enable_if_t<!std::is_base_of<std::decay_t<Func>, self_type>::value>>
xfunction(Func&& f, CTA&&... e) noexcept;
Expand Down Expand Up @@ -361,6 +363,8 @@ namespace xt
template <class align, class requested_type = value_type, std::size_t N = xt_simd::simd_traits<requested_type>::size>
simd_return_type<requested_type> load_simd(size_type i) const;

device_return_type load_device() const;

const tuple_type& arguments() const noexcept;

const functor_type& functor() const noexcept;
Expand All @@ -385,6 +389,9 @@ namespace xt
template <class align, class requested_type, std::size_t N, std::size_t... I>
auto load_simd_impl(std::index_sequence<I...>, size_type i) const;

template <std::size_t... I>
inline auto load_device_impl(std::index_sequence<I...>) const;

template <class Func, std::size_t... I>
const_stepper build_stepper(Func&& f, std::index_sequence<I...>) const noexcept;

Expand Down Expand Up @@ -844,6 +851,12 @@ namespace xt
return operator()();
}

template <class F, class... CT>
inline auto xfunction<F, CT...>::load_device() const -> device_return_type
{
return load_device_impl(std::make_index_sequence<sizeof...(CT)>());
}

template <class F, class... CT>
template <class align, class requested_type, std::size_t N>
inline auto xfunction<F, CT...>::load_simd(size_type i) const -> simd_return_type<requested_type>
Expand Down Expand Up @@ -912,6 +925,13 @@ namespace xt
return m_f.simd_apply((std::get<I>(m_e).template load_simd<align, requested_type>(i))...);
}

template <class F, class... CT>
template <std::size_t... I>
inline auto xfunction<F, CT...>::load_device_impl(std::index_sequence<I...>) const
{
return m_f.device_apply((std::get<I>(m_e).load_device())...);
}

template <class F, class... CT>
template <class Func, std::size_t... I>
inline auto xfunction<F, CT...>::build_stepper(Func&& f, std::index_sequence<I...>) const noexcept
Expand Down
54 changes: 39 additions & 15 deletions include/xtensor/xmath.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -81,21 +81,27 @@ namespace xt
XTENSOR_INT_SPECIALIZATION_IMPL(FUNC_NAME, RETURN_VAL, unsigned long long);


#define XTENSOR_UNARY_MATH_FUNCTOR(NAME) \
struct NAME##_fun \
{ \
template <class T> \
constexpr auto operator()(const T& arg) const \
{ \
using math::NAME; \
return NAME(arg); \
} \
template <class B> \
constexpr auto simd_apply(const B& arg) const \
{ \
using math::NAME; \
return NAME(arg); \
} \
#define XTENSOR_UNARY_MATH_FUNCTOR(NAME) \
struct NAME##_fun \
{ \
template <class T> \
constexpr auto operator()(const T& arg) const \
{ \
using math::NAME; \
return NAME(arg); \
} \
template <class B> \
constexpr auto simd_apply(const B& arg) const \
{ \
using math::NAME; \
return NAME(arg); \
} \
template <class B> \
constexpr auto device_apply(const B& arg) const \
{ \
using math::NAME; \
return NAME(arg); \
} \
}

#define XTENSOR_UNARY_MATH_FUNCTOR_COMPLEX_REDUCING(NAME) \
Expand All @@ -113,6 +119,12 @@ namespace xt
using math::NAME; \
return NAME(arg); \
} \
template <class B> \
constexpr auto device_apply(const B& arg) const \
{ \
using math::NAME; \
return NAME(arg); \
} \
}

#define XTENSOR_BINARY_MATH_FUNCTOR(NAME) \
Expand All @@ -130,6 +142,12 @@ namespace xt
using math::NAME; \
return NAME(arg1, arg2); \
} \
template <class B> \
constexpr auto device_apply(const B& arg1, const B& arg2) const \
{ \
using math::NAME; \
return NAME(arg1, arg2); \
} \
}

#define XTENSOR_TERNARY_MATH_FUNCTOR(NAME) \
Expand All @@ -147,6 +165,12 @@ namespace xt
using math::NAME; \
return NAME(arg1, arg2, arg3); \
} \
template <class B> \
auto device_apply(const B& arg1, const B& arg2, const B& arg3) const \
{ \
using math::NAME; \
return NAME(arg1, arg2, arg3); \
} \
}

namespace math
Expand Down
5 changes: 5 additions & 0 deletions include/xtensor/xoperation.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,11 @@ namespace xt
{ \
return (arg1 OP arg2); \
} \
template <class B> \
constexpr auto device_apply(B&& arg1, const B&& arg2) const \
{ \
return (arg1 OP arg2); \
} \
}

namespace detail
Expand Down
1 change: 1 addition & 0 deletions test/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -183,6 +183,7 @@ set(XTENSOR_TESTS
test_xcomplex.cpp
test_xcsv.cpp
test_xdatesupport.cpp
test_xdevice_assign.cpp
test_xdynamic_view.cpp
test_xfunctor_adaptor.cpp
test_xfixed.cpp
Expand Down
39 changes: 39 additions & 0 deletions test/test_xdevice_assign.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
/***************************************************************************
* Copyright (c) Johan Mabille, Sylvain Corlay and Wolf Vollprecht *
* Copyright (c) QuantStack *
* *
* Distributed under the terms of the BSD 3-Clause License. *
* *
* The full license is in the file LICENSE, distributed with this software. *
****************************************************************************/
// This file is generated from test/files/cppy_source/test_extended_broadcast_view.cppy by preprocess.py!
// Warning: This file should not be modified directly! Instead, modify the `*.cppy` file.


#include <algorithm>

#include "xtensor/xarray.hpp"
#include "xtensor/xfixed.hpp"
#include "xtensor/xnoalias.hpp"
#include "xtensor/xstrided_view.hpp"
#include "xtensor/xtensor.hpp"
#include "xtensor/xview.hpp"

#include "test_common_macros.hpp"

namespace xt
{
TEST(test_xdevice, basic_xfunction)
{
std::vector<double> expectation = {2,3,4,5,6};

xt::xarray<float> a = {1., 2., 3., 4., 5.};
xt::xarray<float> b = xt::ones_like(a);
auto c = xt::xtensor<float, 1>::from_shape(a.shape());
c = a + b;
for(size_t i = 0; i < expectation.size(); i++)
{
ASSERT_EQ(c(i), expectation.at(i));
}
}
}

0 comments on commit 1904331

Please sign in to comment.