diff --git a/include/matx/core/tensor.h b/include/matx/core/tensor.h index d2fa6191e..cc8514092 100644 --- a/include/matx/core/tensor.h +++ b/include/matx/core/tensor.h @@ -1428,8 +1428,6 @@ class tensor_t : public detail::tensor_impl_t { * more dimensions of a tensor. This includes completely dropping an unwanted * dimension, or simply taking a piece of a wanted dimension. Slice() is very * similar to indexing operations in both Python and MATLAB. - * - * *NOTE* Users should not call Slice() directly anymore. Use the slice() operator instead. * * @param firsts * List of starting index into each dimension. Indexing is 0-based @@ -1452,10 +1450,10 @@ class tensor_t : public detail::tensor_impl_t { * @returns Sliced view of tensor * */ - template + template __MATX_INLINE__ auto Slice([[maybe_unused]] const cuda::std::array &firsts, - [[maybe_unused]] const cuda::std::array &ends, - [[maybe_unused]] StrideType strides) const + [[maybe_unused]] const cuda::std::array &ends, + [[maybe_unused]] const cuda::std::array &strides) const { static_assert(N <= RANK && RANK > 0, "Must slice to a rank the same or less than current rank."); @@ -1466,6 +1464,7 @@ class tensor_t : public detail::tensor_impl_t { T *data = this->ldata_; int d = 0; + bool def_stride = (strides[0] == -1); [[maybe_unused]] int end_count = 0; for (int i = 0; i < RANK; i++) { @@ -1487,14 +1486,9 @@ class tensor_t : public detail::tensor_impl_t { MATX_ASSERT_STR(first < end, matxInvalidSize, "Slice must be at least one element long"); - [[maybe_unused]] typename Desc::stride_type stride_mult; - - if constexpr (std::is_same_v) { - stride_mult = 1; - } - else { - stride_mult = (strides[i] == matxKeepStride) ? 1 : strides[i]; - } + [[maybe_unused]] typename Desc::stride_type stride_mult = (def_stride || strides[i] == matxKeepStride) + ? 1 + : strides[i]; // custom stride MATX_ASSERT_STR(first < end, matxInvalidParameter, "Starting slice must be less than end slice"); @@ -1531,10 +1525,10 @@ class tensor_t : public detail::tensor_impl_t { return tensor_t{storage_, std::move(new_desc), data}; } - template + template __MATX_INLINE__ auto Slice(const typename Desc::shape_type (&firsts)[RANK], - const typename Desc::shape_type (&ends)[RANK], - StrideType strides) const + const typename Desc::shape_type (&ends)[RANK], + const typename Desc::stride_type (&strides)[RANK]) const { return Slice(detail::to_array(firsts), detail::to_array(ends), detail::to_array(strides)); } @@ -1565,13 +1559,15 @@ class tensor_t : public detail::tensor_impl_t { */ template __MATX_INLINE__ auto Slice(const cuda::std::array &firsts, - const cuda::std::array &ends) const + const cuda::std::array &ends) const { static_assert(N <= RANK && RANK > 0, "Must slice to a rank the same or less than current rank."); MATX_NVTX_START("", matx::MATX_NVTX_LOG_API) - return Slice(firsts, ends, detail::NoStride{}); + const cuda::std::array strides = {-1}; + + return Slice(firsts, ends, strides); } template diff --git a/include/matx/core/type_utils.h b/include/matx/core/type_utils.h index 956de9297..945ba369b 100644 --- a/include/matx/core/type_utils.h +++ b/include/matx/core/type_utils.h @@ -66,7 +66,6 @@ enum class MemoryLayout { namespace detail { struct NoShape{}; struct EmptyOp{}; -struct NoStride{}; template struct is_noshape : std::integral_constant> {}; diff --git a/include/matx/operators/slice.h b/include/matx/operators/slice.h index 74716c00d..ff84d7f6a 100644 --- a/include/matx/operators/slice.h +++ b/include/matx/operators/slice.h @@ -42,21 +42,20 @@ namespace matx * Slices elements from an operator/tensor. */ namespace detail { - - template - class SliceOp : public BaseOp> + template + class SliceOp : public BaseOp> { public: using value_type = typename T::value_type; using shape_type = index_t; - using self_type = SliceOp; + using self_type = SliceOp; private: typename base_type::type op_; cuda::std::array sizes_; cuda::std::array dims_; cuda::std::array starts_; - StrideType strides_; // Add [[no_unique_address]] in c++20 + cuda::std::array strides_; public: using matxop = bool; @@ -69,7 +68,7 @@ namespace matx __MATX_INLINE__ SliceOp(T op, const cuda::std::array &starts, const cuda::std::array &ends, - StrideType strides) : op_(op) { + const cuda::std::array &strides) : op_(op) { int32_t d = 0; for(int32_t i = 0; i < T::Rank(); i++) { shape_type start = starts[i] < 0 ? op.Size(i) + starts[i] : starts[i]; @@ -81,10 +80,7 @@ namespace matx "Slice end index out of range of operator"); starts_[i] = start; - - if constexpr (!std::is_same_v) { - strides_[i] = strides[i]; - } + strides_[i] = strides[i]; // compute dims and sizes if(end != matxDropDim) { @@ -99,10 +95,7 @@ namespace matx } //adjust size by stride - if constexpr (!std::is_same_v) { - sizes_[d] = (shape_type)std::ceil(static_cast(sizes_[d])/ static_cast(strides_[d])); - } - + sizes_[d] = (shape_type)std::ceil(static_cast(sizes_[d])/ static_cast(strides_[d])); d++; } } @@ -115,7 +108,7 @@ namespace matx static_assert(sizeof...(Is)==Rank()); static_assert((std::is_convertible_v && ... )); -#if 0 + // convert variadic type to tuple so we can read/update cuda::std::array inds{indices...}; cuda::std::array ind{indices...}; @@ -128,29 +121,6 @@ namespace matx for(int32_t i = 0; i < Rank(); i++) { ind[dims_[i]] += inds[i] * strides_[i]; } -#else - // convert variadic type to tuple so we can read/update - cuda::std::array ind; - cuda::std::array inds{indices...}; - - #pragma unroll - for (int32_t i = 0; i < T::Rank(); i++) { - #pragma unroll - for(int32_t j = 0; j < Rank(); j++) { - if(dims_[j] == i) { - if constexpr (!std::is_same_v) { - ind[i] = starts_[j] + inds[j] * strides_[i]; - } - else { - ind[i] = starts_[j] + inds[j]; - } - } - else { - ind[i] = starts_[i]; - } - } - } -#endif //return op_(ind); return cuda::std::apply(op_, ind); @@ -162,42 +132,19 @@ namespace matx static_assert(sizeof...(Is)==Rank()); static_assert((std::is_convertible_v && ... )); -#if 0 - cuda::std::array inds{indices...}; - cuda::std::array ind{indices...}; + // convert variadic type to tuple so we can read/update + cuda::std::array inds{indices...}; + cuda::std::array ind{indices...}; #pragma unroll - for(int32_t i = 0; i < T::Rank(); i++) { + for(int i = 0; i < T::Rank(); i++) { ind[i] = starts_[i]; } #pragma unroll - for(int32_t i = 0; i < Rank(); i++) { + for(int i = 0; i < Rank(); i++) { ind[dims_[i]] += inds[i] * strides_[i]; } -#else - // convert variadic type to tuple so we can read/update - cuda::std::array ind; - cuda::std::array inds{indices...}; - - #pragma unroll - for (int32_t i = 0; i < T::Rank(); i++) { - #pragma unroll - for(int32_t j = 0; j < Rank(); j++) { - if(dims_[j] == i) { - if constexpr (!std::is_same_v) { - ind[i] = starts_[j] + inds[j] * strides_[i]; - } - else { - ind[i] = starts_[j] + inds[j]; - } - } - else { - ind[i] = starts_[i]; - } - } - } -#endif //return op_(ind); return cuda::std::apply(op_, ind); @@ -269,23 +216,10 @@ namespace matx if constexpr (is_tensor_view_v) { return op.Slice(starts, ends, strides); } else { - return detail::SliceOp(op, starts, ends, strides); + return detail::SliceOp(op, starts, ends, strides); } } - template - __MATX_INLINE__ auto slice( const OpType &op, - const cuda::std::array &starts, - const cuda::std::array &ends, - detail::NoStride strides) - { - if constexpr (is_tensor_view_v) { - return op.Slice(starts, ends, strides); - } else { - return detail::SliceOp(op, starts, ends, detail::NoStride{}); - } - } - template __MATX_INLINE__ auto slice( const OpType &op, const index_t (&starts)[OpType::Rank()], @@ -316,7 +250,10 @@ namespace matx const cuda::std::array &starts, const cuda::std::array &ends) { - return slice(op, starts, ends, detail::NoStride{}); + cuda::std::array strides; + strides.fill(1); + + return slice(op, starts, ends, strides); } template __MATX_INLINE__ auto slice( const OpType &op, @@ -354,24 +291,10 @@ namespace matx if constexpr (is_tensor_view_v) { return op.template Slice(starts, ends, strides); } else { - return detail::SliceOp(op, starts, ends, strides); - } - } - - template - __MATX_INLINE__ auto slice( const OpType op, - const cuda::std::array &starts, - const cuda::std::array &ends, - detail::NoStride no_stride) - { - if constexpr (is_tensor_view_v) { - return op.template Slice(starts, ends); - } else { - return detail::SliceOp(op, starts, ends, detail::NoStride{}); + return detail::SliceOp(op, starts, ends, strides); } } - template __MATX_INLINE__ auto slice( const OpType op, const index_t (&starts)[OpType::Rank()], @@ -405,7 +328,9 @@ namespace matx const cuda::std::array &starts, const cuda::std::array &ends) { - return slice(opIn, starts, ends, detail::NoStride{}); + cuda::std::array strides; + strides.fill(1); + return slice(opIn, starts, ends, strides); } template