diff --git a/src/auxiliary/auxiliary.jl b/src/auxiliary/auxiliary.jl index a7105cda6..9fde41af2 100644 --- a/src/auxiliary/auxiliary.jl +++ b/src/auxiliary/auxiliary.jl @@ -57,30 +57,6 @@ function _interleave(a::NTuple{N}, b::NTuple{N}) where {N} return (a[1], b[1], _interleave(tail(a), tail(b))...) end -# Low-overhead implementation of `copyto!` for specific case of `stride(B, 1) < stride(B, 2)` -# used in indexmanipulations: avoids the overhead of Strided.jl -function _copyto!(A::StridedView{<:Any, 1}, B::StridedView{<:Any, 2}) - length(A) == length(B) || throw(DimensionMismatch()) - - Adata = parent(A) - Astr = stride(A, 1) - IA = A.offset - - Bdata = parent(B) - Bstr = strides(B) - - IB_1 = B.offset - @inbounds for _ in axes(B, 2) - IB = IB_1 - for _ in axes(B, 1) - Adata[IA += Astr] = Bdata[IB += Bstr[1]] - end - IB_1 += Bstr[2] - end - - return A -end - @static if VERSION < v"1.11" # TODO: remove once support for v1.10 is dropped _allequal(f, xs) = allequal(Base.Generator(f, xs)) else diff --git a/src/tensors/indexmanipulations.jl b/src/tensors/indexmanipulations.jl index 49842f858..5ac378854 100644 --- a/src/tensors/indexmanipulations.jl +++ b/src/tensors/indexmanipulations.jl @@ -701,7 +701,8 @@ function _add_transform_multi!( buffer_src = StridedView(buffer2, (blocksize, cols), (1, blocksize), 0) for (i, struct_src) in enumerate(structs_src) subblock_src = sreshape(StridedView(tsrc.data, sz_src, struct_src...), matsize) - _copyto!(buffer_src[:, i], subblock_src) + bufblock_src = sreshape(buffer_src[:, i], matsize) + copy!(bufblock_src, subblock_src) end # Resummation into a second buffer using BLAS