diff --git a/src/arraymancer/tensor/private/p_accessors.nim b/src/arraymancer/tensor/private/p_accessors.nim index 1e453b0a..fc868e17 100644 --- a/src/arraymancer/tensor/private/p_accessors.nim +++ b/src/arraymancer/tensor/private/p_accessors.nim @@ -57,6 +57,53 @@ import ../backend/[global_config, memory_optimization_hints], # coord[k] = 0 # iter_pos -= backstrides[k] +type TensorForm = object + shape: Metadata + strides: Metadata + +proc rank(t: TensorForm): range[0 .. LASER_MAXRANK] {.inline.} = + t.shape.len + +func size(t: TensorForm): int {.inline.} = + result = 1 + for i in 0.. 1 + + let prev_s = t.strides[^2] + let last_s = t.strides[^1] + let rank = t.rank + let size = t.size + + assert iter_offset >= 0 + assert iter_size <= size - iter_offset + assert prev_d > 0 and last_d > 0 + assert size mod prev_d*last_d == 0 + + initStridedIteration(coord, backstrides, iter_pos, t, iter_offset, iter_size) + + let bp1 = + if iter_offset == 0: + 0 + else: + min(iter_offset + iter_size, ceil(iter_offset, last_d)) + let bp2 = + if iter_offset == 0: + 0 + else: + max(bp1, min(floor(iter_offset + iter_size, prev_d*last_d), ceil(iter_offset, prev_d*last_d))) + let bp3 = + if iter_size == size: + size + else: + max(bp2, floor(iter_offset + iter_size, prev_d*last_d)) + let bp4 = + if iter_size == size: + size + else: + max(bp3, floor(iter_offset + iter_size, last_d)) + + assert iter_offset <= bp1 and bp1 <= bp2 and bp2 <= bp3 and bp3 <= bp4 and bp4 <= iter_offset + iter_size + assert bp1 - iter_offset < last_d and (bp1 mod last_d == 0 or bp1 == iter_offset + iter_size) + assert bp2 == bp1 or (bp2 mod prev_d*last_d == 0 and bp2 - bp1 < prev_d*last_d) + assert bp3 == bp2 or bp3 mod prev_d*last_d == 0 + assert bp4 == bp3 or (bp4 mod last_d == 0 and bp4 - bp3 < prev_d*last_d) + assert iter_offset + iter_size - bp4 < last_d + + var i = iter_offset + + if bp1 > iter_offset: + coord[rank - 1] += bp1 - i - 1 + while i < bp1: + stridedIterationYield(strider, data, i, iter_pos) + iter_pos += last_s + i += 1 + iter_pos -= last_s + advanceStridedIteration(coord, backstrides, iter_pos, t, iter_offset, iter_size) + + if bp2 > bp1: + coord[rank - 2] += ((bp2 - i) div last_d) - 1 + coord[rank - 1] = last_d - 1 + while i < bp2: + for _ in 0.. bp3: + coord[rank - 2] += ((bp4 - i) div last_d) - 1 + coord[rank - 1] = last_d - 1 + while i < bp4: + for _ in 0..= 1 + if tf.rank == 1: + let s = tf.strides[^1] for i in iter_offset..<(iter_offset+iter_size): - stridedIterationYield(strider, data, i, i) + stridedIterationYield(strider, data, i, i*s) else: - initStridedIteration(coord, backstrides, iter_pos, t, iter_offset, iter_size) - for i in iter_offset..<(iter_offset+iter_size): - stridedIterationYield(strider, data, i, iter_pos) - advanceStridedIteration(coord, backstrides, iter_pos, t, iter_offset, iter_size) + let prev_d = tf.shape[^2] + let last_d = tf.shape[^1] + if prev_d == 2 and last_d == 2: + stridedIterationLoop(strider, data, tf, iter_offset, iter_size, 2, 2) + elif last_d == 2: + stridedIterationLoop(strider, data, tf, iter_offset, iter_size, prev_d, 2) + elif last_d == 3: + stridedIterationLoop(strider, data, tf, iter_offset, iter_size, prev_d, 3) + else: + stridedIterationLoop(strider, data, tf, iter_offset, iter_size, prev_d, last_d) template stridedCoordsIteration*(t, iter_offset, iter_size: typed): untyped = ## Iterate over a Tensor, displaying data as in C order, whatever the strides. (coords) diff --git a/src/arraymancer/tensor/private/p_shapeshifting.nim b/src/arraymancer/tensor/private/p_shapeshifting.nim index 2207b247..840b1c8e 100644 --- a/src/arraymancer/tensor/private/p_shapeshifting.nim +++ b/src/arraymancer/tensor/private/p_shapeshifting.nim @@ -30,15 +30,15 @@ proc contiguousImpl*[T](t: Tensor[T], layout: OrderType, result: var Tensor[T]) apply2_inline(result, t): y -proc reshape_with_copy*[T](t: Tensor[T], new_shape: varargs[int]|Metadata|seq[int], result: var Tensor[T]) = - result = newTensorUninit[T](new_shape) - result.apply2_inline(t,y) - proc reshape_no_copy*(t: AnyTensor, new_shape: varargs[int]|Metadata|seq[int], result: var AnyTensor, layout: OrderType) {.noSideEffect.}= result.shape.copyFrom(new_shape) shape_to_strides(result.shape, layout, result.strides) result.offset = t.offset +proc reshape_with_copy*[T](t: Tensor[T], new_shape: varargs[int]|Metadata|seq[int], result: var Tensor[T]) = + contiguousImpl(t, rowMajor, result) + reshape_no_copy(t, new_shape, result, rowMajor) + proc infer_shape*(t: Tensor, new_shape: varargs[int]): seq[int] {.noinit.} = ## Replace the single -1 value on `new_shape` with the value that ## makes the size the same as that of the input tensor