diff --git a/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/data/tensor/KITensor.kt b/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/data/tensor/KITensor.kt index d1ca7c5f6..cdf96e0e1 100644 --- a/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/data/tensor/KITensor.kt +++ b/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/data/tensor/KITensor.kt @@ -3,6 +3,7 @@ package io.kinference.core.data.tensor import io.kinference.core.* import io.kinference.data.ONNXTensor import io.kinference.ndarray.arrays.* +import io.kinference.ndarray.arrays.memory.ManualAllocatorContext import io.kinference.ndarray.arrays.tiled.* import io.kinference.protobuf.FLOAT_TENSOR_TYPES import io.kinference.protobuf.message.TensorProto @@ -12,10 +13,11 @@ import io.kinference.types.ValueTypeInfo //TODO: support segments //TODO: support external data -class KITensor(name: String?, override val data: NDArrayCore, val info: ValueTypeInfo.TensorTypeInfo) : ONNXTensor(name, data) { +class KITensor(name: String?, override val data: NDArrayCore, val info: ValueTypeInfo.TensorTypeInfo, private var context: ManualAllocatorContext? = null) : ONNXTensor(name, data) { constructor(data: NDArrayCore, info: ValueInfo) : this(info.name, data, info.typeInfo as ValueTypeInfo.TensorTypeInfo) override suspend fun close() { + context?.returnNDArray(data) data.close() } @@ -41,7 +43,7 @@ class KITensor(name: String?, override val data: NDArrayCore, val info: ValueTyp override val backend = CoreBackend override fun rename(name: String): KITensor { - return KITensor(name, data, info) + return KITensor(name, data, info, context) } companion object { diff --git a/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/data/tensor/TensorExtensions.kt b/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/data/tensor/TensorExtensions.kt index 618431c01..b83e75c2e 100644 --- a/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/data/tensor/TensorExtensions.kt +++ b/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/data/tensor/TensorExtensions.kt @@ -1,6 +1,7 @@ package io.kinference.core.data.tensor import io.kinference.ndarray.arrays.* +import io.kinference.ndarray.arrays.memory.ManualAllocatorContext import io.kinference.ndarray.extensions.concat import io.kinference.ndarray.extensions.splitWithAxis import io.kinference.primitives.types.DataType @@ -8,9 +9,9 @@ import io.kinference.protobuf.resolveProtoDataType import io.kinference.types.TensorShape import io.kinference.types.ValueTypeInfo -fun NDArrayCore.asTensor(name: String? = null) = KITensor(name, this, ValueTypeInfo.TensorTypeInfo(TensorShape(this.shape), type.resolveProtoDataType())) +fun NDArrayCore.asTensor(name: String? = null, context: ManualAllocatorContext? = null) = KITensor(name, this, ValueTypeInfo.TensorTypeInfo(TensorShape(this.shape), type.resolveProtoDataType()), context) -internal fun T.asTensor(name: String? = null) = (this as NDArrayCore).asTensor(name) +internal fun T.asTensor(name: String? = null, context: ManualAllocatorContext? = null) = (this as NDArrayCore).asTensor(name, context) internal fun Collection.asONNXTensors(names: List): List { return this.zip(names).map { (data, name) -> data.asTensor(name) } diff --git a/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/model/KIModel.kt b/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/model/KIModel.kt index 5aecb3ce6..35d554631 100644 --- a/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/model/KIModel.kt +++ b/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/model/KIModel.kt @@ -24,7 +24,7 @@ class KIModel( @OptIn(ExperimentalCoroutinesApi::class) private val dispatcher: CoroutineDispatcher = Dispatchers.Default.limitedParallelism(parallelismLimit) - private val modelArrayStorage: ModelArrayStorage = ModelArrayStorage(MemoryLimiters.Default) + private val modelArrayStorage: ModelArrayStorage = ModelArrayStorage(memoryLimiter) override fun addProfilingContext(name: String): ProfilingContext = ProfilingContext(name).apply { profiles.add(this) } override fun analyzeProfilingResults(): ProfileAnalysisEntry = profiles.analyze("Model $name") @@ -44,20 +44,31 @@ class KIModel( coreReserved = true } - if (memoryLimiter == MemoryLimiters.NoAllocator) { - withContext(limiterContext) { - return@withContext graph.execute(input, contexts) + when (memoryLimiter) { + MemoryLimiters.NoAllocator -> { + withContext(limiterContext) { + return@withContext graph.execute(input, contexts) + } } - } else { - val allocatorContext = modelArrayStorage.createAllocatorContext() - val mixedContext = allocatorContext + limiterContext + MemoryLimiters.DefaultManualAllocator -> { + val allocatorContext = modelArrayStorage.createManualAllocatorContext() + val mixedContext = allocatorContext + limiterContext - withContext(mixedContext) { - val coroutineContext = coroutineContext[AllocatorContext.Key]!! - val execResult = graph.execute(input, contexts) - val copies = execResult.map { it.clone(it.name) }.toList() - coroutineContext.closeAllocated() - return@withContext copies + withContext(mixedContext) { + return@withContext graph.execute(input, contexts) + } + } + else -> { + val allocatorContext = modelArrayStorage.createAutoAllocatorContext() + val mixedContext = allocatorContext + limiterContext + + withContext(mixedContext) { + val coroutineContext = coroutineContext[AutoAllocatorContext.Key]!! + val execResult = graph.execute(input, contexts) + val copies = execResult.map { it.clone(it.name) }.toList() + coroutineContext.returnUsedArrays() + return@withContext copies + } } } } finally { diff --git a/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/operators/layer/attention/Attention.kt b/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/operators/layer/attention/Attention.kt index 05b76119b..234639c96 100644 --- a/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/operators/layer/attention/Attention.kt +++ b/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/operators/layer/attention/Attention.kt @@ -7,17 +7,21 @@ import io.kinference.core.optimizer.rules.context.AttentionContextRule import io.kinference.data.ONNXData import io.kinference.graph.Contexts import io.kinference.ndarray.arrays.* +import io.kinference.ndarray.arrays.memory.ManualAllocatorContext import io.kinference.ndarray.arrays.pointers.accept import io.kinference.ndarray.arrays.pointers.map import io.kinference.ndarray.arrays.tiled.FloatTiledArray import io.kinference.ndarray.extensions.allocateNDArray import io.kinference.ndarray.extensions.dotTransposedWithAlpha +import io.kinference.ndarray.extensions.softmax.softmax import io.kinference.operator.* import io.kinference.optimizer.GraphOptimizer.Companion.isOpt +import io.kinference.primitives.types.DataType import io.kinference.protobuf.message.AttributeProto import io.kinference.protobuf.message.TensorProto import io.kinference.utils.launchWithLimitOrDefault import kotlinx.coroutines.coroutineScope +import kotlin.coroutines.coroutineContext import kotlin.math.min import kotlin.math.sqrt @@ -25,11 +29,12 @@ sealed class Attention(name: String, info: OperatorInfo, attributes: Map { val headSize = hiddenSize / numHeads - val output = allocateNDArray(scores.type, Strides(intArrayOf(batchSize, numHeads, seqLen, headSize))) + val outputStrides = Strides(intArrayOf(batchSize, numHeads, seqLen, headSize)) + val output = context?.getNDArray(scores.type, outputStrides, fillZeros = true) ?: allocateNDArray(scores.type, outputStrides) coroutineScope { for (batchNum in 0 until batchSize) { @@ -46,6 +51,8 @@ sealed class Attention(name: String, info: OperatorInfo, attributes: Map { val headSize = hiddenSize / numHeads val pastSeqLen = past?.shape?.get(3) ?: 0 val present = makePresent(past, k, v, batchSize, seqLen, numHeads, hiddenSize) - val scores = normalizedScores(unidir, q, mask, batchSize, seqLen, pastSeqLen, headSize, numHeads, present, maskFilterValue) - return attentionScore(scores, batchSize, seqLen, numHeads, hiddenSize, present) + val scores = normalizedScores(unidir, q, mask, batchSize, seqLen, pastSeqLen, headSize, numHeads, present, maskFilterValue, context) + return attentionScore(scores, batchSize, seqLen, numHeads, hiddenSize, present, context) } private suspend fun normalizedScores( unidir: Boolean, queries: NDArrayCore, maskIndices: IntNDArray?, batchSize: Int, - seqLen: Int, pastSeqLen: Int, headSize: Int, numHeads: Int, present: NDArrayCore, maskFilterValue: Float = -10_000f + seqLen: Int, pastSeqLen: Int, headSize: Int, numHeads: Int, present: NDArrayCore, maskFilterValue: Float = -10_000f, context: ManualAllocatorContext? = null ): NumberNDArrayCore { val allSeqLen = present.shape[3] - val scores = allocateNDArray(queries.type, Strides(intArrayOf(batchSize, numHeads, seqLen, allSeqLen))) as MutableNumberNDArrayCore + val scoresStrides = Strides(intArrayOf(batchSize, numHeads, seqLen, allSeqLen)) + val scores = (context?.getNDArray(queries.type, scoresStrides, fillZeros = true) ?: allocateNDArray(queries.type, scoresStrides)) as MutableNumberNDArrayCore - val maskData = maskIndices?.maskFromIndices(unidir, batchSize, seqLen, pastSeqLen, maskFilterValue) + val maskData = maskIndices?.maskFromIndices(unidir, batchSize, seqLen, pastSeqLen, maskFilterValue, context) val alpha = 1.0 / sqrt(headSize.toDouble()) @@ -148,27 +156,38 @@ sealed class Attention(name: String, info: OperatorInfo, attributes: Map if (this != null) { //raw attention (no padding). only raw attention mask is 2-dimensional if (this.rank == 2) { - val maskPointer = mask.array.pointer(maskOffset * i) + val maskPointer = (mask as MutableFloatNDArray).array.pointer(maskOffset * i) val maskIndicesPointer = this.array.pointer(i * fullSeqLen) maskPointer.accept(maskIndicesPointer, fullSeqLen) { _, src -> if (src > 0) 0f else maskFilterValue } } else { //for left/right-side padding val maskIndicesPointer = this.array.pointer(i) - val maskPointer = mask.array.pointer(maskOffset * i + maskIndicesPointer.get()) + val maskPointer = (mask as MutableFloatNDArray).array.pointer(maskOffset * i + maskIndicesPointer.get()) maskPointer.map(fullSeqLen - maskIndicesPointer.get()) { maskFilterValue } if (this.rank == 1 && this.shape[0] == 2 * batchSize) { @@ -186,7 +205,7 @@ sealed class Attention(name: String, info: OperatorInfo, attributes: Map>, input internal suspend fun initQueryKeyValue( input: NDArrayCore, weights: NDArrayCore, bias: NDArrayCore, - batchSize: Int, seqLen: Int, hiddenSize: Int, numHeads: Int + batchSize: Int, seqLen: Int, hiddenSize: Int, numHeads: Int, context: ManualAllocatorContext? = null ): Array { input as NumberNDArrayCore val headSize = hiddenSize / numHeads - val qkv = Array(3) { allocateNDArray(input.type, Strides(intArrayOf(batchSize, numHeads, seqLen, headSize))) } + val qkvStrides = Strides(intArrayOf(batchSize, numHeads, seqLen, headSize)) + val qkv = Array(3) { context?.getNDArray(input.type, qkvStrides, fillZeros = true) ?: allocateNDArray(input.type, qkvStrides) } coroutineScope { for (qkvIdx in 0 until 3) { @@ -269,6 +289,8 @@ class AttentionVer1(name: String, attributes: Map>, input private val maskFilterValue: Float by attribute("mask_filter_value") { it: Number -> it.toFloat() } override suspend fun > apply(contexts: Contexts, inputs: List): List { + val context = coroutineContext[ManualAllocatorContext.Key] + val input = inputs[0]!! val weights = inputs[1]!! @@ -286,10 +308,10 @@ class AttentionVer1(name: String, attributes: Map>, input input.data, preparedWeights.data, preparedBias.data, - batchSize, seqLen, hiddenSize, numHeads, + batchSize, seqLen, hiddenSize, numHeads, context ) - val (scores, present) = getScores(unidir, queries, keys, values, maskIndices, past, batchSize, seqLen, numHeads, hiddenSize, maskFilterValue) - return listOf(scores.asTensor(), present.asTensor()) + val (scores, present) = getScores(unidir, queries, keys, values, maskIndices, past, batchSize, seqLen, numHeads, hiddenSize, maskFilterValue, context) + return listOf(scores.asTensor(context = context), present.asTensor(context = context)) } } diff --git a/ndarray/ndarray-api/src/commonMain/kotlin/io/kinference/ndarray/arrays/ArrayDispatcherUtils.kt b/ndarray/ndarray-api/src/commonMain/kotlin/io/kinference/ndarray/arrays/ArrayDispatcherUtils.kt deleted file mode 100644 index c52d6ffd8..000000000 --- a/ndarray/ndarray-api/src/commonMain/kotlin/io/kinference/ndarray/arrays/ArrayDispatcherUtils.kt +++ /dev/null @@ -1,19 +0,0 @@ -package io.kinference.ndarray.arrays - -enum class ArrayTypes(val index: Int, val size: Int) { - ByteArrayType(0, Byte.SIZE_BYTES), - UByteArrayType(1, UByte.SIZE_BYTES), - ShortArrayType(2, Short.SIZE_BYTES), - UShortArrayType(3, UShort.SIZE_BYTES), - IntArrayType(4, Int.SIZE_BYTES), - UIntArrayType(5, UInt.SIZE_BYTES), - LongArrayType(6, Long.SIZE_BYTES), - ULongArrayType(7, ULong.SIZE_BYTES), - FloatArrayType(8, Float.SIZE_BYTES), - DoubleArrayType(9, Double.SIZE_BYTES), - BooleanArrayType(10, 1); - - companion object { - fun sizeInBytes(index: Int, arraySize: Int): Long = entries[index].size * arraySize.toLong() - } -} diff --git a/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/AllocatorContext.kt b/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/AllocatorContext.kt deleted file mode 100644 index 7c5286a41..000000000 --- a/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/AllocatorContext.kt +++ /dev/null @@ -1,23 +0,0 @@ -package io.kinference.ndarray.arrays.memory - -import io.kinference.ndarray.arrays.* -import kotlin.coroutines.CoroutineContext - -data class AllocatorContext internal constructor( - private val unusedContainers: ArrayStorage, - private val limiter: MemoryLimiter, - private val returnStorageFn: (ArrayStorage) -> Unit -) : CoroutineContext.Element { - - companion object Key : CoroutineContext.Key - override val key: CoroutineContext.Key<*> get() = Key - - internal fun getArrayContainers(type: ArrayTypes, size: Int, count: Int): Array { - return Array(count) { unusedContainers.getArrayContainer(type, size) } - } - - fun closeAllocated() { - unusedContainers.moveUsedArrays() - returnStorageFn(unusedContainers) - } -} diff --git a/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/ArrayStorage.kt b/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/ArrayStorage.kt index 782c121c2..dcf704673 100644 --- a/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/ArrayStorage.kt +++ b/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/ArrayStorage.kt @@ -1,22 +1,10 @@ package io.kinference.ndarray.arrays.memory -import io.kinference.ndarray.arrays.ArrayTypes +import io.kinference.primitives.types.DataType -internal class ArrayStorage(typeLength: Int, sizeLength: Int, private val limiter: MemoryLimiter) { +internal abstract class BaseArrayStorage(typeLength: Int, sizeLength: Int, storageCount: Int) { /** - * This is a storage for arrays which are available for retrieving - * - * Structure is as follows: - * 1. Array by predefined types (all types are known compiled time) - * 2. Array by size. Starting with 'INIT_SIZE_VALUE' element and grow it doubling (typically there are no more than 16 different sizes) - * 3. Queue of array containers (used as FIFO) - */ - private var storageUnused: Array>> = - Array(typeLength) { Array(sizeLength) { ArrayDeque() } } - - /** - * This is a storage for arrays which are currently in use. - * They should be moved back into unused storage when there is no need for them. + * This is a storage for arrays. * * Structure is as follows: * 1. Array by predefined types (all types are known compiled time) @@ -24,45 +12,19 @@ internal class ArrayStorage(typeLength: Int, sizeLength: Int, private val limite * Starting with 'INIT_SIZE_VALUE' element and grow it doubling (typically there are no more than 16 different sizes) * 3. Queue of array containers (used as FIFO) */ - private var storageUsed: Array>> = - Array(typeLength) { Array(sizeLength) { ArrayDeque() } } + protected var storage: Array>>> = + Array(storageCount) { Array(typeLength) { Array(sizeLength) { ArrayDeque() } } } private var sizeIndices: IntArray = IntArray(typeLength) private var sizes: Array = Array(typeLength) { IntArray(sizeLength) } - internal fun getArrayContainer(type: ArrayTypes, size: Int): Any { - return if (limiter.checkMemoryLimitAndAdd(ArrayTypes.sizeInBytes(type.index, size))) { - val tIndex = type.index - val sIndex = getSizeIndex(tIndex, size) - val array = storageUnused[tIndex][sIndex].removeFirstOrNull()?.also(::resetArray) - ?: create(type, size) - - storageUsed[tIndex][sIndex].addLast(array) - array - } else { - create(type, size) - } - } - - internal fun moveUsedArrays() { - storageUsed.forEachIndexed { typeIndex, arraysByType -> - arraysByType.forEachIndexed { sizeIndex, arrayDeque -> - arrayDeque.forEach { - storageUnused[typeIndex][sizeIndex].addLast(it) - } - arrayDeque.clear() - } - } - limiter.resetLimit() - } - - private fun getSizeIndex(tIndex: Int, size: Int): Int { + protected fun getSizeIndex(tIndex: Int, size: Int): Int { val sIndex = sizes[tIndex].indexOf(size) return if (sIndex != -1) { sIndex } else { - if (sizeIndices[tIndex] >= storageUnused[tIndex].size) + if (sizeIndices[tIndex] >= storage[0][tIndex].size) grow(tIndex) val idx = sizeIndices[tIndex]++ @@ -73,37 +35,40 @@ internal class ArrayStorage(typeLength: Int, sizeLength: Int, private val limite private fun grow(typeIndex: Int) { val newSize = sizes[typeIndex].size * 2 - val newStorageUnused: Array> = Array(newSize) { ArrayDeque() } - val newStorageUsed: Array> = Array(newSize) { ArrayDeque() } + for (i in storage.indices) { + val newStorage: Array> = Array(newSize) { ArrayDeque() } - for (i in storageUnused[typeIndex].indices) { - newStorageUnused[i] = storageUnused[typeIndex][i] - newStorageUsed[i] = storageUsed[typeIndex][i] + for (j in storage[i][typeIndex].indices) { + newStorage[j] = storage[i][typeIndex][j] + } + + storage[i][typeIndex] = newStorage } - storageUnused[typeIndex] = newStorageUnused - storageUsed[typeIndex] = newStorageUsed sizes[typeIndex] = sizes[typeIndex].copyOf(newSize) } - private fun create(type: ArrayTypes, size: Int): Any { + protected fun create(type: DataType, size: Int): Any { return when (type) { - ArrayTypes.ByteArrayType -> ByteArray(size) // 8-bit signed - ArrayTypes.UByteArrayType -> UByteArray(size) // 8-bit unsigned - ArrayTypes.ShortArrayType -> ShortArray(size) // 16-bit signed - ArrayTypes.UShortArrayType -> UShortArray(size) // 16-bit unsigned - ArrayTypes.IntArrayType -> IntArray(size) // 32-bit signed - ArrayTypes.UIntArrayType -> UIntArray(size) // 32-bit unsigned - ArrayTypes.LongArrayType -> LongArray(size) // 64-bit signed - ArrayTypes.ULongArrayType -> ULongArray(size) // 64-bit unsigned - ArrayTypes.FloatArrayType -> FloatArray(size) - ArrayTypes.DoubleArrayType -> DoubleArray(size) - ArrayTypes.BooleanArrayType -> BooleanArray(size) + DataType.BYTE -> ByteArray(size) // 8-bit signed + DataType.SHORT -> ShortArray(size) // 16-bit signed + DataType.INT -> IntArray(size) // 32-bit signed + DataType.LONG -> LongArray(size) // 64-bit signed + + DataType.UBYTE -> UByteArray(size) // 8-bit unsigned + DataType.USHORT -> UShortArray(size) // 16-bit unsigned + DataType.UINT -> UIntArray(size) // 32-bit unsigned + DataType.ULONG -> ULongArray(size) // 64-bit unsigned + + DataType.FLOAT -> FloatArray(size) + DataType.DOUBLE -> DoubleArray(size) + + DataType.BOOLEAN -> BooleanArray(size) else -> throw IllegalArgumentException("Unsupported array type") } } - private fun resetArray(array: Any): Unit = + protected fun resetArray(array: Any): Unit = when (array) { is ByteArray -> array.fill(0) // 8-bit signed is UByteArray -> array.fill(0u) // 8-bit unsigned @@ -116,6 +81,59 @@ internal class ArrayStorage(typeLength: Int, sizeLength: Int, private val limite is FloatArray -> array.fill(0.0f) is DoubleArray -> array.fill(0.0) is BooleanArray -> array.fill(false) - else -> throw IllegalArgumentException("Unsupported array type") + else -> error("Unsupported array type") } } + +internal class SingleArrayStorage(typeLength: Int, sizeLength: Int, private val limiter: MemoryLimiter) : BaseArrayStorage(typeLength, sizeLength, 1) { + internal fun getArray(type: DataType, size: Int, fillZeros: Boolean = true): Any { + return if (limiter.checkMemoryLimitAndAdd(type, size)) { + val tIndex = type.ordinal + val sIndex = getSizeIndex(tIndex, size) + storage[0][tIndex][sIndex].removeFirstOrNull()?.takeIf { fillZeros }?.apply(::resetArray) ?: create(type, size) + } else { + create(type, size) + } + } + + internal fun returnArrays(type: DataType, size: Int, arrays: Array) { + val tIndex = type.ordinal + val sIndex = getSizeIndex(tIndex, size) + val queue = storage[0][tIndex][sIndex] + + queue.addAll(arrays) + } + + internal fun clear() { + storage[0].forEach { arraysBySize -> + arraysBySize.forEach { arrayDeque -> + arrayDeque.clear() + } + } + limiter.resetLimit() + } +} + +internal class ArrayStorage(typeLength: Int, sizeLength: Int, private val limiter: MemoryLimiter) : BaseArrayStorage(typeLength, sizeLength, 2) { + internal fun getArray(type: DataType, size: Int, fillZeros: Boolean = true): Any { + return if (limiter.checkMemoryLimitAndAdd(type, size)) { + val tIndex = type.ordinal + val sIndex = getSizeIndex(tIndex, size) + val array = storage[0][tIndex][sIndex].removeFirstOrNull()?.takeIf { fillZeros }?.apply(::resetArray) ?: create(type, size) + storage[1][tIndex][sIndex].add(array) + array + } else { + create(type, size) + } + } + + internal fun moveArrays() { + storage[1].forEachIndexed { typeIndex, arraysByType -> + arraysByType.forEachIndexed { sizeIndex, arrayDeque -> + storage[0][typeIndex][sizeIndex].addAll(arrayDeque) + arrayDeque.clear() + } + } + limiter.resetLimit() + } +} diff --git a/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/AutoAllocatorContext.kt b/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/AutoAllocatorContext.kt new file mode 100644 index 000000000..a9255dd93 --- /dev/null +++ b/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/AutoAllocatorContext.kt @@ -0,0 +1,23 @@ +package io.kinference.ndarray.arrays.memory + +import io.kinference.ndarray.arrays.* +import io.kinference.primitives.types.DataType +import kotlin.coroutines.CoroutineContext + +data class AutoAllocatorContext internal constructor( + private val storage: ArrayStorage, + private val returnStorageFn: (ArrayStorage) -> Unit +) : CoroutineContext.Element { + + companion object Key : CoroutineContext.Key + override val key: CoroutineContext.Key<*> get() = Key + + internal fun getArrays(type: DataType, size: Int, count: Int): Array { + return Array(count) { storage.getArray(type, size) } + } + + fun returnUsedArrays() { + storage.moveArrays() + returnStorageFn(storage) + } +} diff --git a/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/ManualAllocatorContext.kt b/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/ManualAllocatorContext.kt new file mode 100644 index 000000000..788541e6f --- /dev/null +++ b/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/ManualAllocatorContext.kt @@ -0,0 +1,53 @@ +package io.kinference.ndarray.arrays.memory + +import io.kinference.ndarray.arrays.* +import io.kinference.primitives.types.DataType +import kotlin.coroutines.CoroutineContext + +data class ManualAllocatorContext internal constructor(private val storage: SingleArrayStorage) : CoroutineContext.Element { + + companion object Key : CoroutineContext.Key + override val key: CoroutineContext.Key<*> get() = Key + + fun getNDArray(dataType: DataType, strides: Strides, fillZeros: Boolean = false): MutableNDArrayCore { + return when(dataType) { + DataType.BYTE -> ByteArrayStorageWrapper.getNDArray(strides, storage, fillZeros) + DataType.SHORT -> ShortArrayStorageWrapper.getNDArray(strides, storage, fillZeros) + DataType.INT -> IntArrayStorageWrapper.getNDArray(strides, storage, fillZeros) + DataType.LONG -> LongArrayStorageWrapper.getNDArray(strides, storage, fillZeros) + + DataType.UBYTE -> UByteArrayStorageWrapper.getNDArray(strides, storage, fillZeros) + DataType.USHORT -> UShortArrayStorageWrapper.getNDArray(strides, storage, fillZeros) + DataType.UINT -> UIntArrayStorageWrapper.getNDArray(strides, storage, fillZeros) + DataType.ULONG -> ULongArrayStorageWrapper.getNDArray(strides, storage, fillZeros) + + DataType.FLOAT -> FloatArrayStorageWrapper.getNDArray(strides, storage, fillZeros) + DataType.DOUBLE -> DoubleArrayStorageWrapper.getNDArray(strides, storage, fillZeros) + + DataType.BOOLEAN -> BooleanArrayStorageWrapper.getNDArray(strides, storage, fillZeros) + + else -> error("Unsupported array type") + } + } + + fun returnNDArray(ndArray: NDArrayCore) { + when(ndArray.type) { + DataType.BYTE -> ByteArrayStorageWrapper.returnNDArray(storage, ndArray as ByteNDArray) + DataType.SHORT -> ShortArrayStorageWrapper.returnNDArray(storage, ndArray as ShortNDArray) + DataType.INT -> IntArrayStorageWrapper.returnNDArray(storage, ndArray as IntNDArray) + DataType.LONG -> LongArrayStorageWrapper.returnNDArray(storage, ndArray as LongNDArray) + + DataType.UBYTE -> UByteArrayStorageWrapper.returnNDArray(storage, ndArray as UByteNDArray) + DataType.USHORT -> UShortArrayStorageWrapper.returnNDArray(storage, ndArray as UShortNDArray) + DataType.UINT -> UIntArrayStorageWrapper.returnNDArray(storage, ndArray as UIntNDArray) + DataType.ULONG -> ULongArrayStorageWrapper.returnNDArray(storage, ndArray as ULongNDArray) + + DataType.FLOAT -> FloatArrayStorageWrapper.returnNDArray(storage, ndArray as FloatNDArray) + DataType.DOUBLE -> DoubleArrayStorageWrapper.returnNDArray(storage, ndArray as DoubleNDArray) + + DataType.BOOLEAN -> BooleanArrayStorageWrapper.returnNDArray(storage, ndArray as BooleanNDArray) + + else -> error("Unsupported array type") + } + } +} diff --git a/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/MemoryLimiter.kt b/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/MemoryLimiter.kt index ebb86fd8c..85ed03eb1 100644 --- a/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/MemoryLimiter.kt +++ b/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/MemoryLimiter.kt @@ -1,5 +1,6 @@ package io.kinference.ndarray.arrays.memory +import io.kinference.primitives.types.DataType import io.kinference.utils.PlatformUtils import kotlinx.atomicfu.* @@ -7,10 +8,11 @@ interface MemoryLimiter { /** * Checks if the memory limit allows adding the specified amount of memory and performs the addition * - * @param added the memory in bytes to add + * @param type is the DataType of underlying primitives in a checking array + * @param size is the checking array size * @return true if the memory was added successfully and false if adding the memory exceeds the memory limit */ - fun checkMemoryLimitAndAdd(added: Long): Boolean + fun checkMemoryLimitAndAdd(type: DataType, size: Int): Boolean /** * Resets the used memory into 0L @@ -21,8 +23,9 @@ interface MemoryLimiter { class BaseMemoryLimiter internal constructor(private val memoryLimit: Long) : MemoryLimiter { private var usedMemory: AtomicLong = atomic(0L) - override fun checkMemoryLimitAndAdd(added: Long): Boolean { + override fun checkMemoryLimitAndAdd(type: DataType, size: Int): Boolean { // Attempt to add memory and check the limit + val added = sizeInBytes(type.ordinal, size) val successful = usedMemory.getAndUpdate { current -> if (current + added > memoryLimit) current else current + added } != usedMemory.value // Check if the update was successful @@ -33,10 +36,34 @@ class BaseMemoryLimiter internal constructor(private val memoryLimit: Long) : Me override fun resetLimit() { usedMemory.value = 0L } + + companion object { + private val typeSizes: LongArray = LongArray(DataType.entries.size).apply { + this[DataType.BYTE.ordinal] = Byte.SIZE_BYTES.toLong() + this[DataType.SHORT.ordinal] = Short.SIZE_BYTES.toLong() + this[DataType.INT.ordinal] = Int.SIZE_BYTES.toLong() + this[DataType.LONG.ordinal] = Long.SIZE_BYTES.toLong() + + this[DataType.UBYTE.ordinal] = UByte.SIZE_BYTES.toLong() + this[DataType.USHORT.ordinal] = UShort.SIZE_BYTES.toLong() + this[DataType.UINT.ordinal] = UInt.SIZE_BYTES.toLong() + this[DataType.ULONG.ordinal] = ULong.SIZE_BYTES.toLong() + + this[DataType.FLOAT.ordinal] = Float.SIZE_BYTES.toLong() + this[DataType.DOUBLE.ordinal] = Double.SIZE_BYTES.toLong() + + this[DataType.BOOLEAN.ordinal] = 1.toLong() + } + + private fun sizeInBytes(typeIndex: Int, size: Int): Long { + return typeSizes[typeIndex] * size + } + } } object MemoryLimiters { - val Default: MemoryLimiter = BaseMemoryLimiter((PlatformUtils.maxHeap * 0.3).toLong()) + val DefaultAutoAllocator: MemoryLimiter = BaseMemoryLimiter((PlatformUtils.maxHeap * 0.3).toLong()) + val DefaultManualAllocator: MemoryLimiter = BaseMemoryLimiter(50 * 1024 * 1024) val NoAllocator: MemoryLimiter = BaseMemoryLimiter(0L) fun customLimiter(memoryLimit: Long): MemoryLimiter { diff --git a/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/ModelArrayStorage.kt b/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/ModelArrayStorage.kt index 9c7f02aa5..0135921cb 100644 --- a/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/ModelArrayStorage.kt +++ b/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/ModelArrayStorage.kt @@ -1,34 +1,39 @@ package io.kinference.ndarray.arrays.memory -import io.kinference.ndarray.arrays.ArrayTypes +import io.kinference.primitives.types.DataType import io.kinference.utils.Closeable import java.util.concurrent.ConcurrentLinkedQueue class ModelArrayStorage(private val limiter: MemoryLimiter = MemoryLimiters.NoAllocator) : Closeable { - private val unusedArrays: ConcurrentLinkedQueue = ConcurrentLinkedQueue() + private val autoStorageQueue: ConcurrentLinkedQueue = ConcurrentLinkedQueue() companion object { private const val INIT_SIZE_VALUE: Int = 2 - private val typeSize: Int = ArrayTypes.entries.size + private val typeSize: Int = DataType.entries.size } - fun createAllocatorContext(): AllocatorContext { - return AllocatorContext(getStorage(), limiter, ::returnStorage) + fun createAutoAllocatorContext(): AutoAllocatorContext { + return AutoAllocatorContext(getStorage(autoStorageQueue), ::returnStorage) + } + + fun createManualAllocatorContext(): ManualAllocatorContext { + limiter.resetLimit() + return ManualAllocatorContext(SingleArrayStorage(typeSize, INIT_SIZE_VALUE, limiter)) } fun clearCache() { - unusedArrays.clear() + autoStorageQueue.clear() } override suspend fun close() { clearCache() } - private fun getStorage(): ArrayStorage { - return unusedArrays.poll() ?: ArrayStorage(typeSize, INIT_SIZE_VALUE, limiter) + private fun getStorage(queue: ConcurrentLinkedQueue): ArrayStorage { + return queue.poll() ?: ArrayStorage(typeSize, INIT_SIZE_VALUE, limiter) } private fun returnStorage(storage: ArrayStorage) { - unusedArrays.offer(storage) + autoStorageQueue.offer(storage) } } diff --git a/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/PrimitiveArrayStorageWrapper.kt b/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/PrimitiveArrayStorageWrapper.kt new file mode 100644 index 000000000..52921ced8 --- /dev/null +++ b/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/PrimitiveArrayStorageWrapper.kt @@ -0,0 +1,30 @@ +@file:GeneratePrimitives(DataType.ALL) +package io.kinference.ndarray.arrays.memory + +import io.kinference.ndarray.arrays.* +import io.kinference.ndarray.arrays.PrimitiveNDArray +import io.kinference.ndarray.arrays.tiled.PrimitiveTiledArray +import io.kinference.ndarray.blockSizeByStrides +import io.kinference.primitives.annotations.* +import io.kinference.primitives.types.DataType +import io.kinference.primitives.types.PrimitiveArray + +@GenerateNameFromPrimitives +internal object PrimitiveArrayStorageWrapper { + private val type = DataType.CurrentPrimitive + + fun getNDArray(strides: Strides, storage: SingleArrayStorage, fillZeros: Boolean = false): MutablePrimitiveNDArray { + val blockSize = blockSizeByStrides(strides) + val blocksNum = strides.linearSize / blockSize + val blocks = Array(blocksNum) { storage.getArray(type, blockSize, fillZeros) } + val typedBlocks = blocks.map { it as PrimitiveArray }.toTypedArray() + val tiled = PrimitiveTiledArray(typedBlocks) + + return MutablePrimitiveNDArray(tiled, strides) + } + + fun returnNDArray(storage: SingleArrayStorage, ndarray: PrimitiveNDArray) { + val blockSize = ndarray.array.blockSize + storage.returnArrays(type, blockSize, ndarray.array.blocks as Array) + } +} diff --git a/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/tiled/PrimitiveTiledArray.kt b/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/tiled/PrimitiveTiledArray.kt index 07cbf57f1..4469e9d4e 100644 --- a/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/tiled/PrimitiveTiledArray.kt +++ b/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/tiled/PrimitiveTiledArray.kt @@ -26,7 +26,7 @@ internal class PrimitiveTiledArray(val blocks: Array) { } companion object { - val type: ArrayTypes = ArrayTypes.valueOf(PrimitiveArray::class.simpleName!! + "Type") + val type: DataType = DataType.CurrentPrimitive suspend operator fun invoke(strides: Strides): PrimitiveTiledArray { val blockSize = blockSizeByStrides(strides) @@ -59,8 +59,8 @@ internal class PrimitiveTiledArray(val blocks: Array) { val blocksNum = if (blockSize == 0) 0 else size / blockSize - val coroutineContext = coroutineContext[AllocatorContext.Key] - val blocks = coroutineContext?.getArrayContainers(type, blockSize, blocksNum) ?: Array(blocksNum) { PrimitiveArray(blockSize) } + val coroutineContext = coroutineContext[AutoAllocatorContext.Key] + val blocks = coroutineContext?.getArrays(type, blockSize, blocksNum) ?: Array(blocksNum) { PrimitiveArray(blockSize) } return PrimitiveTiledArray(blocks.map { it as PrimitiveArray }.toTypedArray()) } diff --git a/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/extensions/constants/BooleanConstants.kt b/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/extensions/constants/BooleanConstants.kt new file mode 100644 index 000000000..00f4767fa --- /dev/null +++ b/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/extensions/constants/BooleanConstants.kt @@ -0,0 +1,6 @@ +package io.kinference.ndarray.extensions.constants + +object BooleanConstants { + const val ZERO = false + const val ONE = true +}