From 54ec016bf8794bf5145e8bcd522dcb3b96f71ac0 Mon Sep 17 00:00:00 2001 From: dmitriyb Date: Mon, 5 Aug 2024 14:16:55 +0200 Subject: [PATCH 01/19] JBAI-4393 [core, ndarray] Edited an output allocation marking mechanism from special function referencing to simple output copying. --- .../jvmMain/kotlin/io/kinference.core/KIEngine.kt | 10 ---------- .../kotlin/io/kinference.core/data/map/KIONNXMap.kt | 6 +----- .../io/kinference.core/data/seq/KIONNXSequence.kt | 6 +----- .../io/kinference.core/data/tensor/KITensor.kt | 10 ++-------- .../kotlin/io/kinference.core/model/KIModel.kt | 7 +++---- .../operators/layer/attention/Attention.kt | 12 +----------- .../ndarray/arrays/ArrayDispatcherUtils.kt | 4 ---- .../io/kinference/ndarray/arrays/BooleanNDArray.kt | 6 +----- .../io/kinference/ndarray/arrays/PrimitiveNDArray.kt | 6 +----- .../ndarray/arrays/memory/AllocatorContext.kt | 2 +- .../ndarray/arrays/memory/ArrayContainer.kt | 7 ------- .../ndarray/arrays/tiled/PrimitiveTiledArray.kt | 6 ++---- .../extensions/gather/PrimitiveGatherByBlocks.kt | 6 +----- 13 files changed, 14 insertions(+), 74 deletions(-) diff --git a/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/KIEngine.kt b/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/KIEngine.kt index 287094629..dba47b43d 100644 --- a/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/KIEngine.kt +++ b/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/KIEngine.kt @@ -24,16 +24,6 @@ import okio.Path.Companion.toPath typealias KIONNXData = ONNXData -// Define an interface for allocation control marking output -internal interface KIONNXDataArraysReleaser { - fun markOutput() -} - -internal fun KIONNXData.markOutput() { - if (this is KIONNXDataArraysReleaser) - this.markOutput() -} - object CoreBackend : BackendInfo(name = "KInference Core CPU Backend") /** diff --git a/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/data/map/KIONNXMap.kt b/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/data/map/KIONNXMap.kt index f541c4c23..a1bbcf7eb 100644 --- a/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/data/map/KIONNXMap.kt +++ b/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/data/map/KIONNXMap.kt @@ -9,7 +9,7 @@ import io.kinference.protobuf.message.TensorProto import io.kinference.types.ValueInfo import io.kinference.types.ValueTypeInfo -class KIONNXMap(name: String?, data: Map>, val info: ValueTypeInfo.MapTypeInfo) : ONNXMap>, CoreBackend>(name, data), KIONNXDataArraysReleaser { +class KIONNXMap(name: String?, data: Map>, val info: ValueTypeInfo.MapTypeInfo) : ONNXMap>, CoreBackend>(name, data) { constructor(data: Map>, info: ValueInfo) : this(info.name, data, info.typeInfo as ValueTypeInfo.MapTypeInfo) override val backend = CoreBackend @@ -26,10 +26,6 @@ class KIONNXMap(name: String?, data: Map>, val info: ValueTyp override fun rename(name: String): KIONNXMap = KIONNXMap(name, data, info) - override fun markOutput() { - data.values.forEach { it.markOutput() } - } - override suspend fun clone(newName: String?): KIONNXMap { val newMap = HashMap>(data.size) for ((key, value) in data.entries) { diff --git a/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/data/seq/KIONNXSequence.kt b/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/data/seq/KIONNXSequence.kt index 24b52085c..49383fca0 100644 --- a/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/data/seq/KIONNXSequence.kt +++ b/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/data/seq/KIONNXSequence.kt @@ -7,7 +7,7 @@ import io.kinference.data.ONNXSequence import io.kinference.protobuf.message.SequenceProto import io.kinference.types.* -class KIONNXSequence(name: String?, data: List>, val info: ValueTypeInfo.SequenceTypeInfo) : ONNXSequence>, CoreBackend>(name, data), KIONNXDataArraysReleaser { +class KIONNXSequence(name: String?, data: List>, val info: ValueTypeInfo.SequenceTypeInfo) : ONNXSequence>, CoreBackend>(name, data) { constructor(name: String?, info: ValueTypeInfo.SequenceTypeInfo, size: Int, init: (Int) -> KIONNXData<*>) : this(name, List(size, init), info) constructor(data: List>, info: ValueInfo) : this(info.name, data, info.typeInfo as ValueTypeInfo.SequenceTypeInfo) @@ -23,10 +23,6 @@ class KIONNXSequence(name: String?, data: List>, val info: ValueTy override fun rename(name: String): KIONNXSequence = KIONNXSequence(name, data, info) - override fun markOutput() { - data.forEach { it.markOutput() } - } - val length: Int = data.size companion object { diff --git a/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/data/tensor/KITensor.kt b/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/data/tensor/KITensor.kt index 2c6de1a69..d1ca7c5f6 100644 --- a/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/data/tensor/KITensor.kt +++ b/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/data/tensor/KITensor.kt @@ -1,7 +1,6 @@ package io.kinference.core.data.tensor -import io.kinference.core.CoreBackend -import io.kinference.core.KIONNXDataArraysReleaser +import io.kinference.core.* import io.kinference.data.ONNXTensor import io.kinference.ndarray.arrays.* import io.kinference.ndarray.arrays.tiled.* @@ -13,7 +12,7 @@ import io.kinference.types.ValueTypeInfo //TODO: support segments //TODO: support external data -class KITensor(name: String?, override val data: NDArrayCore, val info: ValueTypeInfo.TensorTypeInfo) : ONNXTensor(name, data), KIONNXDataArraysReleaser { +class KITensor(name: String?, override val data: NDArrayCore, val info: ValueTypeInfo.TensorTypeInfo) : ONNXTensor(name, data) { constructor(data: NDArrayCore, info: ValueInfo) : this(info.name, data, info.typeInfo as ValueTypeInfo.TensorTypeInfo) override suspend fun close() { @@ -24,11 +23,6 @@ class KITensor(name: String?, override val data: NDArrayCore, val info: ValueTyp return KITensor(newName, data.clone(), info) } - override fun markOutput() { - if (this.data is MemoryControlledArray) - data.markOutput() - } - suspend operator fun minus(other: KITensor): KITensor { require(this.data is NumberNDArrayCore && other.data is NumberNDArrayCore) return (this.data - other.data).asTensor() diff --git a/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/model/KIModel.kt b/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/model/KIModel.kt index 1e25b4ae1..2a1e50db7 100644 --- a/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/model/KIModel.kt +++ b/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/model/KIModel.kt @@ -1,8 +1,7 @@ package io.kinference.core.model -import io.kinference.core.KIONNXData +import io.kinference.core.* import io.kinference.core.graph.KIGraph -import io.kinference.core.markOutput import io.kinference.graph.Contexts import io.kinference.model.Model import io.kinference.ndarray.arrays.memory.* @@ -51,9 +50,9 @@ class KIModel( withContext(mixedContext) { val coroutineContext = coroutineContext[AllocatorContext.Key]!! val execResult = graph.execute(input, contexts) - execResult.forEach { it.markOutput() } + val copies = execResult.map { it.clone(it.name) }.toList() coroutineContext.closeAllocated() - execResult + copies } } finally { if (coreReserved) { diff --git a/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/operators/layer/attention/Attention.kt b/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/operators/layer/attention/Attention.kt index 6487c46c1..05b76119b 100644 --- a/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/operators/layer/attention/Attention.kt +++ b/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/operators/layer/attention/Attention.kt @@ -61,21 +61,16 @@ sealed class Attention(name: String, info: OperatorInfo, attributes: Map - val resultMarker: Array if (past == null || past.linearSize == 0) { resultBlocks = kBlocks.plus(vBlocks) - resultMarker = kMarker.plus(vMarker) } else { val pastSeqLen = past.shape[3] presentDims[3] += pastSeqLen val pastBlocks = past.array.blocks - val pastMarker = past.array.marker val blocksInRow = headSize / past.array.blockSize @@ -84,35 +79,30 @@ sealed class Attention(name: String, info: OperatorInfo, attributes: Map(2 * batchSize * numHeads * presentDims[3] * blocksInRow) - val futureResMarker = arrayOfNulls(2 * batchSize * numHeads * presentDims[3] * blocksInRow) var resBlockIdx = 0 var pastBlocIdx = 0 repeat(2) { presentKeyValueIdx -> val kvBlocks = if (presentKeyValueIdx == 0) kBlocks else vBlocks - val kvMarker = if (presentKeyValueIdx == 0) kMarker else vMarker var kvBlockIdx = 0 repeat(rowsSize) { pastBlocks.copyInto(futureRes, resBlockIdx, pastBlocIdx, pastBlocIdx + pastRowBlocksCount) - pastMarker.copyInto(futureResMarker, resBlockIdx, pastBlocIdx, pastBlocIdx + pastRowBlocksCount) resBlockIdx += pastRowBlocksCount pastBlocIdx += pastRowBlocksCount kvBlocks.copyInto(futureRes, resBlockIdx, kvBlockIdx, kvBlockIdx + kvRowBlocksCount) - kvMarker.copyInto(futureResMarker, resBlockIdx, kvBlockIdx, kvBlockIdx + kvRowBlocksCount) resBlockIdx += kvRowBlocksCount kvBlockIdx += kvRowBlocksCount } } resultBlocks = futureRes as Array - resultMarker = futureResMarker as Array } - return FloatNDArray(FloatTiledArray(resultBlocks, resultMarker), Strides(presentDims)) + return FloatNDArray(FloatTiledArray(resultBlocks), Strides(presentDims)) } diff --git a/ndarray/ndarray-api/src/commonMain/kotlin/io/kinference/ndarray/arrays/ArrayDispatcherUtils.kt b/ndarray/ndarray-api/src/commonMain/kotlin/io/kinference/ndarray/arrays/ArrayDispatcherUtils.kt index bea90d149..2da712ca3 100644 --- a/ndarray/ndarray-api/src/commonMain/kotlin/io/kinference/ndarray/arrays/ArrayDispatcherUtils.kt +++ b/ndarray/ndarray-api/src/commonMain/kotlin/io/kinference/ndarray/arrays/ArrayDispatcherUtils.kt @@ -15,7 +15,3 @@ enum class ArrayTypes(val index: Int, val size: Int) { DoubleArray(9, Double.SIZE_BYTES), BooleanArray(10, 1); } - -interface MemoryControlledArray { - fun markOutput() -} diff --git a/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/BooleanNDArray.kt b/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/BooleanNDArray.kt index 5a4e758dc..3037028d3 100644 --- a/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/BooleanNDArray.kt +++ b/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/BooleanNDArray.kt @@ -21,7 +21,7 @@ fun interface BooleanBinaryOperation { operator fun invoke(first: Boolean, second: Boolean): Boolean } -open class BooleanNDArray(var array: BooleanTiledArray, strides: Strides) : NDArrayCore, MemoryControlledArray { +open class BooleanNDArray(var array: BooleanTiledArray, strides: Strides) : NDArrayCore { override val type: DataType = DataType.BOOLEAN final override var strides: Strides = strides @@ -79,10 +79,6 @@ open class BooleanNDArray(var array: BooleanTiledArray, strides: Strides) : NDAr return array.blocks[0][0] } - override fun markOutput() { - array.marker.forEach { it.invoke() } - } - override suspend fun toMutable(): MutableBooleanNDArray { return MutableBooleanNDArray(array.copyOf(), strides) } diff --git a/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/PrimitiveNDArray.kt b/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/PrimitiveNDArray.kt index 0b391f275..f1bd91b44 100644 --- a/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/PrimitiveNDArray.kt +++ b/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/PrimitiveNDArray.kt @@ -30,7 +30,7 @@ import kotlin.math.* @GenerateNameFromPrimitives @MakePublic -internal open class PrimitiveNDArray(array: PrimitiveTiledArray, strides: Strides) : NumberNDArrayCore, MemoryControlledArray { +internal open class PrimitiveNDArray(array: PrimitiveTiledArray, strides: Strides) : NumberNDArrayCore { var array: PrimitiveTiledArray = array protected set @@ -85,10 +85,6 @@ internal open class PrimitiveNDArray(array: PrimitiveTiledArray, strides: Stride return array.blocks[0][0] } - override fun markOutput() { - array.marker.forEach { it.invoke() } - } - override suspend fun clone(): PrimitiveNDArray { return PrimitiveNDArray(array.copyOf(), Strides(shape)) } diff --git a/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/AllocatorContext.kt b/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/AllocatorContext.kt index 2ed73f878..f6fd4f008 100644 --- a/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/AllocatorContext.kt +++ b/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/AllocatorContext.kt @@ -25,7 +25,7 @@ data class AllocatorContext internal constructor( fun closeAllocated() { usedContainers.forEach { - if (!it.isOutput && limiter.checkMemoryLimitAndAdd(it.sizeBytes.toLong())) { + if (limiter.checkMemoryLimitAndAdd(it.sizeBytes.toLong())) { unusedContainers[it.arrayTypeIndex, it.arraySizeIndex].addLast(it) } } diff --git a/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/ArrayContainer.kt b/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/ArrayContainer.kt index d39ba62ba..8884fcfa1 100644 --- a/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/ArrayContainer.kt +++ b/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/ArrayContainer.kt @@ -7,13 +7,6 @@ sealed class ArrayContainer( val arraySizeIndex: Int, val sizeBytes: Int ) { - var isOutput: Boolean = false - private set - - val markAsOutput = { - isOutput = true - } - companion object { private const val EMPTY_INDEX = -1 diff --git a/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/tiled/PrimitiveTiledArray.kt b/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/tiled/PrimitiveTiledArray.kt index eda58c092..a9863aadb 100644 --- a/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/tiled/PrimitiveTiledArray.kt +++ b/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/tiled/PrimitiveTiledArray.kt @@ -17,7 +17,7 @@ import kotlin.math.min @GenerateNameFromPrimitives @MakePublic -internal class PrimitiveTiledArray(val blocks: Array, val marker: Array = emptyMarker) { +internal class PrimitiveTiledArray(val blocks: Array) { val size: Int val blockSize: Int = if (blocks.isEmpty()) 0 else blocks.first().size val blocksNum: Int = blocks.size @@ -28,7 +28,6 @@ internal class PrimitiveTiledArray(val blocks: Array, val marker companion object { val type: ArrayTypes = ArrayTypes.valueOf(PrimitiveArray::class.simpleName!!) - private val emptyMarker: Array = arrayOf() suspend operator fun invoke(strides: Strides): PrimitiveTiledArray { val blockSize = blockSizeByStrides(strides) @@ -66,9 +65,8 @@ internal class PrimitiveTiledArray(val blocks: Array, val marker // With array dispatcher val containerArray = coroutineContext?.getArrayContainers(type, blockSize, blocksNum) ?: Array(blocksNum) { ArrayContainer(type, blockSize) } val blocks = Array(containerArray.size) { i -> (containerArray[i] as PrimitiveArrayContainer).array } - val marker = Array(containerArray.size) { i -> containerArray[i].markAsOutput } - return PrimitiveTiledArray(blocks, marker) + return PrimitiveTiledArray(blocks) } suspend operator fun invoke(size: Int, blockSize: Int, init: (InlineInt) -> PrimitiveType) : PrimitiveTiledArray { diff --git a/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/extensions/gather/PrimitiveGatherByBlocks.kt b/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/extensions/gather/PrimitiveGatherByBlocks.kt index b40873787..9adb86a46 100644 --- a/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/extensions/gather/PrimitiveGatherByBlocks.kt +++ b/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/extensions/gather/PrimitiveGatherByBlocks.kt @@ -26,14 +26,11 @@ internal suspend fun gatherByBlocksPrimitive(array: PrimitiveNDArray, indices: N val dataToCopyBlocks = dataToCopySize / array.array.blockSize val dataBlocks = array.array.blocks - val dataMarkers = array.array.marker val destBatchBlocksOffset = indicesSize * dataToCopyBlocks val inputBatchBlockOffset = array.shape[actualAxis] * dataToCopyBlocks val destArray = arrayOfNulls(destBatchBlocksOffset * dataBatchSize) - val destMarkersArray = arrayOfNulls(destBatchBlocksOffset * dataBatchSize) - for (dataBatchNum in 0 until dataBatchSize) { val dataBlocksOffset = inputBatchBlockOffset * dataBatchNum @@ -46,12 +43,11 @@ internal suspend fun gatherByBlocksPrimitive(array: PrimitiveNDArray, indices: N for (blockIdx in 0 until dataToCopyBlocks) { destArray[destBlocksOffset + blockIdx] = dataBlocks[dataOffset + blockIdx] - destMarkersArray[destBlocksOffset + blockIdx] = dataMarkers[dataOffset + blockIdx] } destBlocksOffset += dataToCopyBlocks } } - return PrimitiveNDArray(PrimitiveTiledArray(destArray as Array, destMarkersArray as Array), Strides(destShape)) + return PrimitiveNDArray(PrimitiveTiledArray(destArray as Array), Strides(destShape)) } From 2d7c3108668abb208e09ce5d52291d3b3a65a6a6 Mon Sep 17 00:00:00 2001 From: dmitriyb Date: Mon, 12 Aug 2024 11:45:01 +0200 Subject: [PATCH 02/19] JBAI-4393 [core, ndarray] Removed ArrayContainer and refactored memory management: use of primitive arrays directly. --- .../ndarray/arrays/ArrayDispatcherUtils.kt | 2 - .../ndarray/arrays/memory/AllocatorContext.kt | 20 ++--- .../ndarray/arrays/memory/ArrayContainer.kt | 48 ----------- .../ndarray/arrays/memory/ArrayStorage.kt | 81 ++++++++++++++++--- .../arrays/memory/PrimitiveArrayContainer.kt | 17 ---- .../arrays/tiled/PrimitiveTiledArray.kt | 17 ++-- .../extensions/gelu/BiasGeluPrimitive.kt | 2 - .../extensions/gelu/FastGeluPrimitive.kt | 1 - .../runners/PerformanceRunner.kt | 1 + 9 files changed, 85 insertions(+), 104 deletions(-) delete mode 100644 ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/ArrayContainer.kt delete mode 100644 ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/PrimitiveArrayContainer.kt diff --git a/ndarray/ndarray-api/src/commonMain/kotlin/io/kinference/ndarray/arrays/ArrayDispatcherUtils.kt b/ndarray/ndarray-api/src/commonMain/kotlin/io/kinference/ndarray/arrays/ArrayDispatcherUtils.kt index 2da712ca3..53f217c09 100644 --- a/ndarray/ndarray-api/src/commonMain/kotlin/io/kinference/ndarray/arrays/ArrayDispatcherUtils.kt +++ b/ndarray/ndarray-api/src/commonMain/kotlin/io/kinference/ndarray/arrays/ArrayDispatcherUtils.kt @@ -1,7 +1,5 @@ package io.kinference.ndarray.arrays -typealias StateMarker = () -> Unit - enum class ArrayTypes(val index: Int, val size: Int) { ByteArray(0, Byte.SIZE_BYTES), UByteArray(1, UByte.SIZE_BYTES), diff --git a/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/AllocatorContext.kt b/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/AllocatorContext.kt index f6fd4f008..84d02017e 100644 --- a/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/AllocatorContext.kt +++ b/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/AllocatorContext.kt @@ -8,28 +8,24 @@ data class AllocatorContext internal constructor( private val limiter: MemoryLimiter, private val returnStorageFn: (ArrayStorage) -> Unit ) : CoroutineContext.Element { - private val usedContainers: ArrayDeque = ArrayDeque() companion object Key : CoroutineContext.Key override val key: CoroutineContext.Key<*> get() = Key - internal fun getArrayContainers(type: ArrayTypes, size: Int, count: Int): Array { + internal fun getArrayContainers(type: ArrayTypes, size: Int, count: Int): Array { return if (limiter !is NoAllocatorMemoryLimiter) { - val result = Array(count) { unusedContainers.getArrayContainer(type, size) } - usedContainers.addAll(result) - result + Array(count) { unusedContainers.getArrayContainer(type, size) } } else { - Array(count) { ArrayContainer(type, size) } + Array(count) { unusedContainers.create(type, size) } } } + fun closeOperator() { + unusedContainers.moveUsedArrays() + } + fun closeAllocated() { - usedContainers.forEach { - if (limiter.checkMemoryLimitAndAdd(it.sizeBytes.toLong())) { - unusedContainers[it.arrayTypeIndex, it.arraySizeIndex].addLast(it) - } - } - usedContainers.clear() + unusedContainers.moveUsedArrays() returnStorageFn(unusedContainers) } } diff --git a/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/ArrayContainer.kt b/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/ArrayContainer.kt deleted file mode 100644 index 8884fcfa1..000000000 --- a/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/ArrayContainer.kt +++ /dev/null @@ -1,48 +0,0 @@ -package io.kinference.ndarray.arrays.memory - -import io.kinference.ndarray.arrays.* - -sealed class ArrayContainer( - val arrayTypeIndex: Int, - val arraySizeIndex: Int, - val sizeBytes: Int -) { - companion object { - private const val EMPTY_INDEX = -1 - - operator fun invoke(type: ArrayTypes, size: Int, sizeIndex: Int = EMPTY_INDEX): ArrayContainer { - val sizeBytes: Int = type.size * size - return when (type) { - ArrayTypes.ByteArray -> ByteArrayContainer(type.index, sizeIndex, sizeBytes, ByteArray(size)) // 8-bit signed - ArrayTypes.UByteArray -> UByteArrayContainer(type.index, sizeIndex, sizeBytes, UByteArray(size)) // 8-bit unsigned - ArrayTypes.ShortArray -> ShortArrayContainer(type.index, sizeIndex, sizeBytes, ShortArray(size)) // 16-bit signed - ArrayTypes.UShortArray -> UShortArrayContainer(type.index, sizeIndex, sizeBytes, UShortArray(size)) // 16-bit unsigned - ArrayTypes.IntArray -> IntArrayContainer(type.index, sizeIndex, sizeBytes, IntArray(size)) // 32-bit signed - ArrayTypes.UIntArray -> UIntArrayContainer(type.index, sizeIndex, sizeBytes, UIntArray(size)) // 32-bit unsigned - ArrayTypes.LongArray -> LongArrayContainer(type.index, sizeIndex, sizeBytes, LongArray(size)) // 64-bit signed - ArrayTypes.ULongArray -> ULongArrayContainer(type.index, sizeIndex, sizeBytes, ULongArray(size)) // 64-bit unsigned - ArrayTypes.FloatArray -> FloatArrayContainer(type.index, sizeIndex, sizeBytes, FloatArray(size)) - ArrayTypes.DoubleArray -> DoubleArrayContainer(type.index, sizeIndex, sizeBytes, DoubleArray(size)) - ArrayTypes.BooleanArray -> BooleanArrayContainer(type.index, sizeIndex, sizeBytes, BooleanArray(size)) - else -> throw IllegalArgumentException("Unsupported array type") - } - } - - fun resetArray(arrayContainer: ArrayContainer) { - when (arrayContainer) { - is ByteArrayContainer -> arrayContainer.array.fill(0) // 8-bit signed - is UByteArrayContainer -> arrayContainer.array.fill(0u) // 8-bit unsigned - is ShortArrayContainer -> arrayContainer.array.fill(0) // 16-bit signed - is UShortArrayContainer -> arrayContainer.array.fill(0u) // 16-bit unsigned - is IntArrayContainer -> arrayContainer.array.fill(0) // 32-bit signed - is UIntArrayContainer -> arrayContainer.array.fill(0u) // 32-bit unsigned - is LongArrayContainer -> arrayContainer.array.fill(0L) // 64-bit signed - is ULongArrayContainer -> arrayContainer.array.fill(0U) // 64-bit unsigned - is FloatArrayContainer -> arrayContainer.array.fill(0.0f) - is DoubleArrayContainer -> arrayContainer.array.fill(0.0) - is BooleanArrayContainer -> arrayContainer.array.fill(false) - else -> throw IllegalArgumentException("Unsupported array type") - } - } - } -} diff --git a/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/ArrayStorage.kt b/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/ArrayStorage.kt index 00a98c0cb..859ad2654 100644 --- a/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/ArrayStorage.kt +++ b/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/ArrayStorage.kt @@ -9,32 +9,36 @@ internal class ArrayStorage(typeLength: Int, sizeLength: Int, private val limite * 2. Array by size. Starting with 'INIT_SIZE_VALUE' element and grow it doubling (typically there are no more than 16 different sizes) * 3. Queue of array containers (used as FIFO) */ - private var storage: Array>> = + private var storageUnused: Array>> = + Array(typeLength) { Array(sizeLength) { ArrayDeque() } } + + private var storageUsed: Array>> = Array(typeLength) { Array(sizeLength) { ArrayDeque() } } private var sizeIndices: IntArray = IntArray(typeLength) private var sizes: Array = Array(typeLength) { IntArray(sizeLength) } - operator fun get(typeIndex: Int, sizeIndex: Int): ArrayDeque { - return storage[typeIndex][sizeIndex] + operator fun get(typeIndex: Int, sizeIndex: Int): ArrayDeque { + return storageUnused[typeIndex][sizeIndex] } - fun getArrayContainer(type: ArrayTypes, size: Int): ArrayContainer { + fun getArrayContainer(type: ArrayTypes, size: Int): Any { val tIndex = type.index val sIndex = sizes[tIndex].indexOf(size) // Checking that we have this array size in our storage for this type val idx = if (sIndex != -1) { - val array = storage[tIndex][sIndex].removeFirstOrNull() + val array = storageUnused[tIndex][sIndex].removeFirstOrNull() array?.let { - ArrayContainer.resetArray(it) - limiter.deductMemory(it.sizeBytes.toLong()) + resetArray(it) + limiter.deductMemory((type.size * size).toLong()) + storageUsed[tIndex][sIndex].addLast(it) return it } sIndex } else { - if (sizeIndices[tIndex] >= storage[tIndex].size) + if (sizeIndices[tIndex] >= storageUnused[tIndex].size) grow(tIndex) val idx = sizeIndices[tIndex]++ @@ -42,18 +46,69 @@ internal class ArrayStorage(typeLength: Int, sizeLength: Int, private val limite idx } - return ArrayContainer(type, size, idx) + val array = create(type, size) + storageUsed[tIndex][idx].addLast(array) + + return array + } + + fun moveUsedArrays() { + storageUsed.forEachIndexed { typeIndex, arraysByType -> + arraysByType.forEachIndexed { sizeIndex, arrayDeque -> + arrayDeque.forEach { + storageUnused[typeIndex][sizeIndex].addLast(it) + } + arrayDeque.clear() + } + } } private fun grow(typeIndex: Int) { val newSize = sizes[typeIndex].size * 2 - val newStorage: Array> = Array(newSize) { ArrayDeque() } + val newStorageUnused: Array> = Array(newSize) { ArrayDeque() } + val newStorageUsed: Array> = Array(newSize) { ArrayDeque() } - for (i in storage[typeIndex].indices) { - newStorage[i] = storage[typeIndex][i] + for (i in storageUnused[typeIndex].indices) { + newStorageUnused[i] = storageUnused[typeIndex][i] + newStorageUsed[i] = storageUsed[typeIndex][i] } - storage[typeIndex] = newStorage + storageUnused[typeIndex] = newStorageUnused + storageUsed[typeIndex] = newStorageUsed sizes[typeIndex] = sizes[typeIndex].copyOf(newSize) } + + fun create(type: ArrayTypes, size: Int): Any { + return when (type) { + ArrayTypes.ByteArray -> ByteArray(size) // 8-bit signed + ArrayTypes.UByteArray -> UByteArray(size) // 8-bit unsigned + ArrayTypes.ShortArray -> ShortArray(size) // 16-bit signed + ArrayTypes.UShortArray -> UShortArray(size) // 16-bit unsigned + ArrayTypes.IntArray -> IntArray(size) // 32-bit signed + ArrayTypes.UIntArray -> UIntArray(size) // 32-bit unsigned + ArrayTypes.LongArray -> LongArray(size) // 64-bit signed + ArrayTypes.ULongArray -> ULongArray(size) // 64-bit unsigned + ArrayTypes.FloatArray -> FloatArray(size) + ArrayTypes.DoubleArray -> DoubleArray(size) + ArrayTypes.BooleanArray -> BooleanArray(size) + else -> throw IllegalArgumentException("Unsupported array type") + } + } + + private fun resetArray(array: Any) { + when (array) { + is ByteArray -> array.fill(0) // 8-bit signed + is UByteArray -> array.fill(0u) // 8-bit unsigned + is ShortArray -> array.fill(0) // 16-bit signed + is UShortArray -> array.fill(0u) // 16-bit unsigned + is IntArray -> array.fill(0) // 32-bit signed + is UIntArray -> array.fill(0u) // 32-bit unsigned + is LongArray -> array.fill(0L) // 64-bit signed + is ULongArray -> array.fill(0U) // 64-bit unsigned + is FloatArray -> array.fill(0.0f) + is DoubleArray -> array.fill(0.0) + is BooleanArray -> array.fill(false) + else -> throw IllegalArgumentException("Unsupported array type") + } + } } diff --git a/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/PrimitiveArrayContainer.kt b/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/PrimitiveArrayContainer.kt deleted file mode 100644 index 8818345fe..000000000 --- a/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/PrimitiveArrayContainer.kt +++ /dev/null @@ -1,17 +0,0 @@ -@file:GeneratePrimitives(DataType.ALL) -@file:Suppress("DuplicatedCode") - -package io.kinference.ndarray.arrays.memory - -import io.kinference.primitives.annotations.GenerateNameFromPrimitives -import io.kinference.primitives.annotations.GeneratePrimitives -import io.kinference.primitives.types.DataType -import io.kinference.primitives.types.PrimitiveArray - -@GenerateNameFromPrimitives -internal class PrimitiveArrayContainer( - arrayTypeIndex: Int, - arraySizeIndex: Int, - sizeBytes: Int, - val array: PrimitiveArray -) : ArrayContainer(arrayTypeIndex, arraySizeIndex, sizeBytes) diff --git a/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/tiled/PrimitiveTiledArray.kt b/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/tiled/PrimitiveTiledArray.kt index a9863aadb..e6ad36001 100644 --- a/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/tiled/PrimitiveTiledArray.kt +++ b/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/tiled/PrimitiveTiledArray.kt @@ -5,7 +5,6 @@ package io.kinference.ndarray.arrays.tiled import io.kinference.ndarray.arrays.* import io.kinference.ndarray.arrays.memory.* -import io.kinference.ndarray.arrays.memory.PrimitiveArrayContainer import io.kinference.ndarray.arrays.pointers.PrimitivePointer import io.kinference.ndarray.arrays.pointers.accept import io.kinference.ndarray.blockSizeByStrides @@ -61,12 +60,9 @@ internal class PrimitiveTiledArray(val blocks: Array) { val blocksNum = if (blockSize == 0) 0 else size / blockSize val coroutineContext = coroutineContext[AllocatorContext.Key] + val blocks = coroutineContext?.getArrayContainers(type, blockSize, blocksNum) ?: Array(blocksNum) { PrimitiveArray(blockSize) } - // With array dispatcher - val containerArray = coroutineContext?.getArrayContainers(type, blockSize, blocksNum) ?: Array(blocksNum) { ArrayContainer(type, blockSize) } - val blocks = Array(containerArray.size) { i -> (containerArray[i] as PrimitiveArrayContainer).array } - - return PrimitiveTiledArray(blocks) + return PrimitiveTiledArray(blocks.map { it as PrimitiveArray }.toTypedArray()) } suspend operator fun invoke(size: Int, blockSize: Int, init: (InlineInt) -> PrimitiveType) : PrimitiveTiledArray { @@ -132,16 +128,19 @@ internal class PrimitiveTiledArray(val blocks: Array) { } suspend fun copyOf(): PrimitiveTiledArray { - val copyArray = PrimitiveTiledArray(size, blockSize) +// val copyArray = PrimitiveTiledArray(size, blockSize) + val copyBlocks = Array(blocksNum) { PrimitiveArray(blockSize) } for (blockNum in 0 until blocksNum) { val thisBlock = this.blocks[blockNum] - val destBlock = copyArray.blocks[blockNum] +// val destBlock = copyArray.blocks[blockNum] + val destBlock = copyBlocks[blockNum] thisBlock.copyInto(destBlock) } - return copyArray +// return copyArray + return PrimitiveTiledArray(copyBlocks) } fun copyInto(dest: PrimitiveTiledArray, destOffset: Int = 0, srcStart: Int = 0, srcEnd: Int = size) { diff --git a/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/extensions/gelu/BiasGeluPrimitive.kt b/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/extensions/gelu/BiasGeluPrimitive.kt index 577c3ea02..bc14a927a 100644 --- a/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/extensions/gelu/BiasGeluPrimitive.kt +++ b/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/extensions/gelu/BiasGeluPrimitive.kt @@ -3,8 +3,6 @@ package io.kinference.ndarray.extensions.gelu import io.kinference.ndarray.* import io.kinference.ndarray.arrays.* - -import io.kinference.ndarray.arrays.memory.PrimitiveArrayContainer import io.kinference.ndarray.arrays.tiled.PrimitiveTiledArray import io.kinference.ndarray.extensions.constants.PrimitiveConstants import io.kinference.ndarray.stubs.absoluteValue diff --git a/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/extensions/gelu/FastGeluPrimitive.kt b/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/extensions/gelu/FastGeluPrimitive.kt index ffb626267..32a1e6e02 100644 --- a/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/extensions/gelu/FastGeluPrimitive.kt +++ b/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/extensions/gelu/FastGeluPrimitive.kt @@ -6,7 +6,6 @@ package io.kinference.ndarray.extensions.gelu import io.kinference.ndarray.arrays.* import io.kinference.ndarray.arrays.MutablePrimitiveNDArray import io.kinference.ndarray.arrays.PrimitiveNDArray -import io.kinference.ndarray.arrays.memory.PrimitiveArrayContainer import io.kinference.ndarray.arrays.tiled.PrimitiveTiledArray import io.kinference.ndarray.countCoroutinesByData import io.kinference.ndarray.parallelizeByBlocks diff --git a/utils/utils-testing/src/commonMain/kotlin/io.kinference/runners/PerformanceRunner.kt b/utils/utils-testing/src/commonMain/kotlin/io.kinference/runners/PerformanceRunner.kt index 6d8054dc2..5b288eac5 100644 --- a/utils/utils-testing/src/commonMain/kotlin/io.kinference/runners/PerformanceRunner.kt +++ b/utils/utils-testing/src/commonMain/kotlin/io.kinference/runners/PerformanceRunner.kt @@ -120,6 +120,7 @@ class PerformanceRunner>(private val engine: TestEngine) { for (result in results.sortedBy { it.name }) { logger.info { "Test ${result.name}: avg ${result.avg}, min ${result.min}, max ${result.max}" } } + logger.info { "Average between inputs: avg ${results.map { it.avg }.average()}, min ${results.minOfOrNull { it.min }}, max ${results.maxOfOrNull { it.max }}" } } companion object { From 9caf75c01947c03b2afd58fbf9ebdebad148602a Mon Sep 17 00:00:00 2001 From: dmitriyb Date: Thu, 15 Aug 2024 22:07:08 +0200 Subject: [PATCH 03/19] JBAI-4393 [core, ndarray] Refactored memory management and array handling: streamlined array type handling and improved memory limit checks within create and reset methods; KIModel predict improved for NoAllocator case. --- .../io/kinference.core/model/KIModel.kt | 26 +++-- .../ndarray/arrays/ArrayDispatcherUtils.kt | 26 +++-- .../ndarray/arrays/memory/AllocatorContext.kt | 10 +- .../ndarray/arrays/memory/ArrayStorage.kt | 109 ++++++++++-------- .../ndarray/arrays/memory/MemoryLimiter.kt | 40 +++---- .../arrays/tiled/PrimitiveTiledArray.kt | 7 +- 6 files changed, 106 insertions(+), 112 deletions(-) diff --git a/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/model/KIModel.kt b/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/model/KIModel.kt index 2a1e50db7..5aecb3ce6 100644 --- a/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/model/KIModel.kt +++ b/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/model/KIModel.kt @@ -17,14 +17,14 @@ class KIModel( val name: String, val opSet: OperatorSetRegistry, val graph: KIGraph, - memoryLimiter: MemoryLimiter = MemoryLimiters.NoAllocator, + private val memoryLimiter: MemoryLimiter = MemoryLimiters.NoAllocator, parallelismLimit: Int = PlatformUtils.cores, ) : Model>, Profilable, Cacheable { private val profiles: MutableList = ArrayList() @OptIn(ExperimentalCoroutinesApi::class) private val dispatcher: CoroutineDispatcher = Dispatchers.Default.limitedParallelism(parallelismLimit) - private val modelArrayStorage: ModelArrayStorage = ModelArrayStorage(memoryLimiter) + private val modelArrayStorage: ModelArrayStorage = ModelArrayStorage(MemoryLimiters.Default) override fun addProfilingContext(name: String): ProfilingContext = ProfilingContext(name).apply { profiles.add(this) } override fun analyzeProfilingResults(): ProfileAnalysisEntry = profiles.analyze("Model $name") @@ -44,15 +44,21 @@ class KIModel( coreReserved = true } - val allocatorContext = modelArrayStorage.createAllocatorContext() - val mixedContext = allocatorContext + limiterContext + if (memoryLimiter == MemoryLimiters.NoAllocator) { + withContext(limiterContext) { + return@withContext graph.execute(input, contexts) + } + } else { + val allocatorContext = modelArrayStorage.createAllocatorContext() + val mixedContext = allocatorContext + limiterContext - withContext(mixedContext) { - val coroutineContext = coroutineContext[AllocatorContext.Key]!! - val execResult = graph.execute(input, contexts) - val copies = execResult.map { it.clone(it.name) }.toList() - coroutineContext.closeAllocated() - copies + withContext(mixedContext) { + val coroutineContext = coroutineContext[AllocatorContext.Key]!! + val execResult = graph.execute(input, contexts) + val copies = execResult.map { it.clone(it.name) }.toList() + coroutineContext.closeAllocated() + return@withContext copies + } } } finally { if (coreReserved) { diff --git a/ndarray/ndarray-api/src/commonMain/kotlin/io/kinference/ndarray/arrays/ArrayDispatcherUtils.kt b/ndarray/ndarray-api/src/commonMain/kotlin/io/kinference/ndarray/arrays/ArrayDispatcherUtils.kt index 53f217c09..c52d6ffd8 100644 --- a/ndarray/ndarray-api/src/commonMain/kotlin/io/kinference/ndarray/arrays/ArrayDispatcherUtils.kt +++ b/ndarray/ndarray-api/src/commonMain/kotlin/io/kinference/ndarray/arrays/ArrayDispatcherUtils.kt @@ -1,15 +1,19 @@ package io.kinference.ndarray.arrays enum class ArrayTypes(val index: Int, val size: Int) { - ByteArray(0, Byte.SIZE_BYTES), - UByteArray(1, UByte.SIZE_BYTES), - ShortArray(2, Short.SIZE_BYTES), - UShortArray(3, UShort.SIZE_BYTES), - IntArray(4, Int.SIZE_BYTES), - UIntArray(5, UInt.SIZE_BYTES), - LongArray(6, Long.SIZE_BYTES), - ULongArray(7, ULong.SIZE_BYTES), - FloatArray(8, Float.SIZE_BYTES), - DoubleArray(9, Double.SIZE_BYTES), - BooleanArray(10, 1); + ByteArrayType(0, Byte.SIZE_BYTES), + UByteArrayType(1, UByte.SIZE_BYTES), + ShortArrayType(2, Short.SIZE_BYTES), + UShortArrayType(3, UShort.SIZE_BYTES), + IntArrayType(4, Int.SIZE_BYTES), + UIntArrayType(5, UInt.SIZE_BYTES), + LongArrayType(6, Long.SIZE_BYTES), + ULongArrayType(7, ULong.SIZE_BYTES), + FloatArrayType(8, Float.SIZE_BYTES), + DoubleArrayType(9, Double.SIZE_BYTES), + BooleanArrayType(10, 1); + + companion object { + fun sizeInBytes(index: Int, arraySize: Int): Long = entries[index].size * arraySize.toLong() + } } diff --git a/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/AllocatorContext.kt b/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/AllocatorContext.kt index 84d02017e..7c5286a41 100644 --- a/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/AllocatorContext.kt +++ b/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/AllocatorContext.kt @@ -13,15 +13,7 @@ data class AllocatorContext internal constructor( override val key: CoroutineContext.Key<*> get() = Key internal fun getArrayContainers(type: ArrayTypes, size: Int, count: Int): Array { - return if (limiter !is NoAllocatorMemoryLimiter) { - Array(count) { unusedContainers.getArrayContainer(type, size) } - } else { - Array(count) { unusedContainers.create(type, size) } - } - } - - fun closeOperator() { - unusedContainers.moveUsedArrays() + return Array(count) { unusedContainers.getArrayContainer(type, size) } } fun closeAllocated() { diff --git a/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/ArrayStorage.kt b/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/ArrayStorage.kt index 859ad2654..782c121c2 100644 --- a/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/ArrayStorage.kt +++ b/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/ArrayStorage.kt @@ -4,6 +4,8 @@ import io.kinference.ndarray.arrays.ArrayTypes internal class ArrayStorage(typeLength: Int, sizeLength: Int, private val limiter: MemoryLimiter) { /** + * This is a storage for arrays which are available for retrieving + * * Structure is as follows: * 1. Array by predefined types (all types are known compiled time) * 2. Array by size. Starting with 'INIT_SIZE_VALUE' element and grow it doubling (typically there are no more than 16 different sizes) @@ -12,30 +14,52 @@ internal class ArrayStorage(typeLength: Int, sizeLength: Int, private val limite private var storageUnused: Array>> = Array(typeLength) { Array(sizeLength) { ArrayDeque() } } + /** + * This is a storage for arrays which are currently in use. + * They should be moved back into unused storage when there is no need for them. + * + * Structure is as follows: + * 1. Array by predefined types (all types are known compiled time) + * 2. Array by size. + * Starting with 'INIT_SIZE_VALUE' element and grow it doubling (typically there are no more than 16 different sizes) + * 3. Queue of array containers (used as FIFO) + */ private var storageUsed: Array>> = Array(typeLength) { Array(sizeLength) { ArrayDeque() } } private var sizeIndices: IntArray = IntArray(typeLength) private var sizes: Array = Array(typeLength) { IntArray(sizeLength) } + internal fun getArrayContainer(type: ArrayTypes, size: Int): Any { + return if (limiter.checkMemoryLimitAndAdd(ArrayTypes.sizeInBytes(type.index, size))) { + val tIndex = type.index + val sIndex = getSizeIndex(tIndex, size) + val array = storageUnused[tIndex][sIndex].removeFirstOrNull()?.also(::resetArray) + ?: create(type, size) - operator fun get(typeIndex: Int, sizeIndex: Int): ArrayDeque { - return storageUnused[typeIndex][sizeIndex] + storageUsed[tIndex][sIndex].addLast(array) + array + } else { + create(type, size) + } } - fun getArrayContainer(type: ArrayTypes, size: Int): Any { - val tIndex = type.index + internal fun moveUsedArrays() { + storageUsed.forEachIndexed { typeIndex, arraysByType -> + arraysByType.forEachIndexed { sizeIndex, arrayDeque -> + arrayDeque.forEach { + storageUnused[typeIndex][sizeIndex].addLast(it) + } + arrayDeque.clear() + } + } + limiter.resetLimit() + } + + private fun getSizeIndex(tIndex: Int, size: Int): Int { val sIndex = sizes[tIndex].indexOf(size) - // Checking that we have this array size in our storage for this type - val idx = if (sIndex != -1) { - val array = storageUnused[tIndex][sIndex].removeFirstOrNull() - array?.let { - resetArray(it) - limiter.deductMemory((type.size * size).toLong()) - storageUsed[tIndex][sIndex].addLast(it) - return it - } + return if (sIndex != -1) { sIndex } else { if (sizeIndices[tIndex] >= storageUnused[tIndex].size) @@ -45,22 +69,6 @@ internal class ArrayStorage(typeLength: Int, sizeLength: Int, private val limite sizes[tIndex][idx] = size idx } - - val array = create(type, size) - storageUsed[tIndex][idx].addLast(array) - - return array - } - - fun moveUsedArrays() { - storageUsed.forEachIndexed { typeIndex, arraysByType -> - arraysByType.forEachIndexed { sizeIndex, arrayDeque -> - arrayDeque.forEach { - storageUnused[typeIndex][sizeIndex].addLast(it) - } - arrayDeque.clear() - } - } } private fun grow(typeIndex: Int) { @@ -78,37 +86,36 @@ internal class ArrayStorage(typeLength: Int, sizeLength: Int, private val limite sizes[typeIndex] = sizes[typeIndex].copyOf(newSize) } - fun create(type: ArrayTypes, size: Int): Any { + private fun create(type: ArrayTypes, size: Int): Any { return when (type) { - ArrayTypes.ByteArray -> ByteArray(size) // 8-bit signed - ArrayTypes.UByteArray -> UByteArray(size) // 8-bit unsigned - ArrayTypes.ShortArray -> ShortArray(size) // 16-bit signed - ArrayTypes.UShortArray -> UShortArray(size) // 16-bit unsigned - ArrayTypes.IntArray -> IntArray(size) // 32-bit signed - ArrayTypes.UIntArray -> UIntArray(size) // 32-bit unsigned - ArrayTypes.LongArray -> LongArray(size) // 64-bit signed - ArrayTypes.ULongArray -> ULongArray(size) // 64-bit unsigned - ArrayTypes.FloatArray -> FloatArray(size) - ArrayTypes.DoubleArray -> DoubleArray(size) - ArrayTypes.BooleanArray -> BooleanArray(size) + ArrayTypes.ByteArrayType -> ByteArray(size) // 8-bit signed + ArrayTypes.UByteArrayType -> UByteArray(size) // 8-bit unsigned + ArrayTypes.ShortArrayType -> ShortArray(size) // 16-bit signed + ArrayTypes.UShortArrayType -> UShortArray(size) // 16-bit unsigned + ArrayTypes.IntArrayType -> IntArray(size) // 32-bit signed + ArrayTypes.UIntArrayType -> UIntArray(size) // 32-bit unsigned + ArrayTypes.LongArrayType -> LongArray(size) // 64-bit signed + ArrayTypes.ULongArrayType -> ULongArray(size) // 64-bit unsigned + ArrayTypes.FloatArrayType -> FloatArray(size) + ArrayTypes.DoubleArrayType -> DoubleArray(size) + ArrayTypes.BooleanArrayType -> BooleanArray(size) else -> throw IllegalArgumentException("Unsupported array type") } } - private fun resetArray(array: Any) { + private fun resetArray(array: Any): Unit = when (array) { - is ByteArray -> array.fill(0) // 8-bit signed - is UByteArray -> array.fill(0u) // 8-bit unsigned - is ShortArray -> array.fill(0) // 16-bit signed - is UShortArray -> array.fill(0u) // 16-bit unsigned - is IntArray -> array.fill(0) // 32-bit signed - is UIntArray -> array.fill(0u) // 32-bit unsigned - is LongArray -> array.fill(0L) // 64-bit signed - is ULongArray -> array.fill(0U) // 64-bit unsigned + is ByteArray -> array.fill(0) // 8-bit signed + is UByteArray -> array.fill(0u) // 8-bit unsigned + is ShortArray -> array.fill(0) // 16-bit signed + is UShortArray -> array.fill(0u) // 16-bit unsigned + is IntArray -> array.fill(0) // 32-bit signed + is UIntArray -> array.fill(0u) // 32-bit unsigned + is LongArray -> array.fill(0L) // 64-bit signed + is ULongArray -> array.fill(0U) // 64-bit unsigned is FloatArray -> array.fill(0.0f) is DoubleArray -> array.fill(0.0) is BooleanArray -> array.fill(false) else -> throw IllegalArgumentException("Unsupported array type") } - } } diff --git a/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/MemoryLimiter.kt b/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/MemoryLimiter.kt index 775c0a895..ebb86fd8c 100644 --- a/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/MemoryLimiter.kt +++ b/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/MemoryLimiter.kt @@ -1,12 +1,11 @@ package io.kinference.ndarray.arrays.memory import io.kinference.utils.PlatformUtils -import kotlinx.atomicfu.AtomicLong -import kotlinx.atomicfu.atomic +import kotlinx.atomicfu.* interface MemoryLimiter { /** - * Checks if the memory limit allows adding the specified amount of memory and performs the addition. + * Checks if the memory limit allows adding the specified amount of memory and performs the addition * * @param added the memory in bytes to add * @return true if the memory was added successfully and false if adding the memory exceeds the memory limit @@ -14,44 +13,33 @@ interface MemoryLimiter { fun checkMemoryLimitAndAdd(added: Long): Boolean /** - * Deducts the specified amount of memory from the memory limiter. - * - * @param deducted the memory in bytes to deduct from the memory limiter + * Resets the used memory into 0L */ - fun deductMemory(deducted: Long) + fun resetLimit() } -class BaseMemoryLimiter(private val memoryLimit: Long) : MemoryLimiter { +class BaseMemoryLimiter internal constructor(private val memoryLimit: Long) : MemoryLimiter { private var usedMemory: AtomicLong = atomic(0L) override fun checkMemoryLimitAndAdd(added: Long): Boolean { - val currentMemory = usedMemory.addAndGet(added) - return if (currentMemory > memoryLimit) { - usedMemory.addAndGet(-added) - false - } else true + // Attempt to add memory and check the limit + val successful = usedMemory.getAndUpdate { current -> + if (current + added > memoryLimit) current else current + added + } != usedMemory.value // Check if the update was successful + + return successful } - override fun deductMemory(deducted: Long) { - usedMemory.addAndGet(-deducted) + override fun resetLimit() { + usedMemory.value = 0L } } object MemoryLimiters { val Default: MemoryLimiter = BaseMemoryLimiter((PlatformUtils.maxHeap * 0.3).toLong()) - val NoAllocator: MemoryLimiter = NoAllocatorMemoryLimiter + val NoAllocator: MemoryLimiter = BaseMemoryLimiter(0L) fun customLimiter(memoryLimit: Long): MemoryLimiter { return BaseMemoryLimiter(memoryLimit) } } - -internal object NoAllocatorMemoryLimiter : MemoryLimiter { - override fun checkMemoryLimitAndAdd(added: Long): Boolean { - return false - } - - override fun deductMemory(deducted: Long) { - - } -} diff --git a/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/tiled/PrimitiveTiledArray.kt b/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/tiled/PrimitiveTiledArray.kt index e6ad36001..07cbf57f1 100644 --- a/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/tiled/PrimitiveTiledArray.kt +++ b/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/tiled/PrimitiveTiledArray.kt @@ -26,7 +26,7 @@ internal class PrimitiveTiledArray(val blocks: Array) { } companion object { - val type: ArrayTypes = ArrayTypes.valueOf(PrimitiveArray::class.simpleName!!) + val type: ArrayTypes = ArrayTypes.valueOf(PrimitiveArray::class.simpleName!! + "Type") suspend operator fun invoke(strides: Strides): PrimitiveTiledArray { val blockSize = blockSizeByStrides(strides) @@ -127,19 +127,16 @@ internal class PrimitiveTiledArray(val blocks: Array) { blocks[blockIdx][blockOff] = value } - suspend fun copyOf(): PrimitiveTiledArray { -// val copyArray = PrimitiveTiledArray(size, blockSize) + fun copyOf(): PrimitiveTiledArray { val copyBlocks = Array(blocksNum) { PrimitiveArray(blockSize) } for (blockNum in 0 until blocksNum) { val thisBlock = this.blocks[blockNum] -// val destBlock = copyArray.blocks[blockNum] val destBlock = copyBlocks[blockNum] thisBlock.copyInto(destBlock) } -// return copyArray return PrimitiveTiledArray(copyBlocks) } From f1a929665a1af27ab7c465e6d072755fc2fab6d4 Mon Sep 17 00:00:00 2001 From: dmitriyb Date: Mon, 19 Aug 2024 18:52:03 +0200 Subject: [PATCH 04/19] JBAI-4393 [core, ndarray] Refactored memory management and array handling: added new type for limiter which works with manually managed ndarrays, added manual ndarray handling in Attention and TensorExtensions, moved to use standard DataType enum instead of ArrayTypes. --- .../kinference.core/data/tensor/KITensor.kt | 6 +- .../data/tensor/TensorExtensions.kt | 5 +- .../io/kinference.core/model/KIModel.kt | 37 +++-- .../operators/layer/attention/Attention.kt | 62 +++++--- .../ndarray/arrays/ArrayDispatcherUtils.kt | 19 --- .../ndarray/arrays/memory/AllocatorContext.kt | 23 --- .../ndarray/arrays/memory/ArrayStorage.kt | 150 ++++++++++-------- .../arrays/memory/AutoAllocatorContext.kt | 23 +++ .../arrays/memory/ManualAllocatorContext.kt | 53 +++++++ .../ndarray/arrays/memory/MemoryLimiter.kt | 35 +++- .../arrays/memory/ModelArrayStorage.kt | 23 +-- .../memory/PrimitiveArrayStorageWrapper.kt | 30 ++++ .../arrays/tiled/PrimitiveTiledArray.kt | 6 +- .../extensions/constants/BooleanConstants.kt | 6 + 14 files changed, 317 insertions(+), 161 deletions(-) delete mode 100644 ndarray/ndarray-api/src/commonMain/kotlin/io/kinference/ndarray/arrays/ArrayDispatcherUtils.kt delete mode 100644 ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/AllocatorContext.kt create mode 100644 ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/AutoAllocatorContext.kt create mode 100644 ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/ManualAllocatorContext.kt create mode 100644 ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/PrimitiveArrayStorageWrapper.kt create mode 100644 ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/extensions/constants/BooleanConstants.kt diff --git a/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/data/tensor/KITensor.kt b/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/data/tensor/KITensor.kt index d1ca7c5f6..cdf96e0e1 100644 --- a/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/data/tensor/KITensor.kt +++ b/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/data/tensor/KITensor.kt @@ -3,6 +3,7 @@ package io.kinference.core.data.tensor import io.kinference.core.* import io.kinference.data.ONNXTensor import io.kinference.ndarray.arrays.* +import io.kinference.ndarray.arrays.memory.ManualAllocatorContext import io.kinference.ndarray.arrays.tiled.* import io.kinference.protobuf.FLOAT_TENSOR_TYPES import io.kinference.protobuf.message.TensorProto @@ -12,10 +13,11 @@ import io.kinference.types.ValueTypeInfo //TODO: support segments //TODO: support external data -class KITensor(name: String?, override val data: NDArrayCore, val info: ValueTypeInfo.TensorTypeInfo) : ONNXTensor(name, data) { +class KITensor(name: String?, override val data: NDArrayCore, val info: ValueTypeInfo.TensorTypeInfo, private var context: ManualAllocatorContext? = null) : ONNXTensor(name, data) { constructor(data: NDArrayCore, info: ValueInfo) : this(info.name, data, info.typeInfo as ValueTypeInfo.TensorTypeInfo) override suspend fun close() { + context?.returnNDArray(data) data.close() } @@ -41,7 +43,7 @@ class KITensor(name: String?, override val data: NDArrayCore, val info: ValueTyp override val backend = CoreBackend override fun rename(name: String): KITensor { - return KITensor(name, data, info) + return KITensor(name, data, info, context) } companion object { diff --git a/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/data/tensor/TensorExtensions.kt b/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/data/tensor/TensorExtensions.kt index 618431c01..b83e75c2e 100644 --- a/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/data/tensor/TensorExtensions.kt +++ b/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/data/tensor/TensorExtensions.kt @@ -1,6 +1,7 @@ package io.kinference.core.data.tensor import io.kinference.ndarray.arrays.* +import io.kinference.ndarray.arrays.memory.ManualAllocatorContext import io.kinference.ndarray.extensions.concat import io.kinference.ndarray.extensions.splitWithAxis import io.kinference.primitives.types.DataType @@ -8,9 +9,9 @@ import io.kinference.protobuf.resolveProtoDataType import io.kinference.types.TensorShape import io.kinference.types.ValueTypeInfo -fun NDArrayCore.asTensor(name: String? = null) = KITensor(name, this, ValueTypeInfo.TensorTypeInfo(TensorShape(this.shape), type.resolveProtoDataType())) +fun NDArrayCore.asTensor(name: String? = null, context: ManualAllocatorContext? = null) = KITensor(name, this, ValueTypeInfo.TensorTypeInfo(TensorShape(this.shape), type.resolveProtoDataType()), context) -internal fun T.asTensor(name: String? = null) = (this as NDArrayCore).asTensor(name) +internal fun T.asTensor(name: String? = null, context: ManualAllocatorContext? = null) = (this as NDArrayCore).asTensor(name, context) internal fun Collection.asONNXTensors(names: List): List { return this.zip(names).map { (data, name) -> data.asTensor(name) } diff --git a/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/model/KIModel.kt b/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/model/KIModel.kt index 5aecb3ce6..35d554631 100644 --- a/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/model/KIModel.kt +++ b/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/model/KIModel.kt @@ -24,7 +24,7 @@ class KIModel( @OptIn(ExperimentalCoroutinesApi::class) private val dispatcher: CoroutineDispatcher = Dispatchers.Default.limitedParallelism(parallelismLimit) - private val modelArrayStorage: ModelArrayStorage = ModelArrayStorage(MemoryLimiters.Default) + private val modelArrayStorage: ModelArrayStorage = ModelArrayStorage(memoryLimiter) override fun addProfilingContext(name: String): ProfilingContext = ProfilingContext(name).apply { profiles.add(this) } override fun analyzeProfilingResults(): ProfileAnalysisEntry = profiles.analyze("Model $name") @@ -44,20 +44,31 @@ class KIModel( coreReserved = true } - if (memoryLimiter == MemoryLimiters.NoAllocator) { - withContext(limiterContext) { - return@withContext graph.execute(input, contexts) + when (memoryLimiter) { + MemoryLimiters.NoAllocator -> { + withContext(limiterContext) { + return@withContext graph.execute(input, contexts) + } } - } else { - val allocatorContext = modelArrayStorage.createAllocatorContext() - val mixedContext = allocatorContext + limiterContext + MemoryLimiters.DefaultManualAllocator -> { + val allocatorContext = modelArrayStorage.createManualAllocatorContext() + val mixedContext = allocatorContext + limiterContext - withContext(mixedContext) { - val coroutineContext = coroutineContext[AllocatorContext.Key]!! - val execResult = graph.execute(input, contexts) - val copies = execResult.map { it.clone(it.name) }.toList() - coroutineContext.closeAllocated() - return@withContext copies + withContext(mixedContext) { + return@withContext graph.execute(input, contexts) + } + } + else -> { + val allocatorContext = modelArrayStorage.createAutoAllocatorContext() + val mixedContext = allocatorContext + limiterContext + + withContext(mixedContext) { + val coroutineContext = coroutineContext[AutoAllocatorContext.Key]!! + val execResult = graph.execute(input, contexts) + val copies = execResult.map { it.clone(it.name) }.toList() + coroutineContext.returnUsedArrays() + return@withContext copies + } } } } finally { diff --git a/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/operators/layer/attention/Attention.kt b/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/operators/layer/attention/Attention.kt index 05b76119b..234639c96 100644 --- a/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/operators/layer/attention/Attention.kt +++ b/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/operators/layer/attention/Attention.kt @@ -7,17 +7,21 @@ import io.kinference.core.optimizer.rules.context.AttentionContextRule import io.kinference.data.ONNXData import io.kinference.graph.Contexts import io.kinference.ndarray.arrays.* +import io.kinference.ndarray.arrays.memory.ManualAllocatorContext import io.kinference.ndarray.arrays.pointers.accept import io.kinference.ndarray.arrays.pointers.map import io.kinference.ndarray.arrays.tiled.FloatTiledArray import io.kinference.ndarray.extensions.allocateNDArray import io.kinference.ndarray.extensions.dotTransposedWithAlpha +import io.kinference.ndarray.extensions.softmax.softmax import io.kinference.operator.* import io.kinference.optimizer.GraphOptimizer.Companion.isOpt +import io.kinference.primitives.types.DataType import io.kinference.protobuf.message.AttributeProto import io.kinference.protobuf.message.TensorProto import io.kinference.utils.launchWithLimitOrDefault import kotlinx.coroutines.coroutineScope +import kotlin.coroutines.coroutineContext import kotlin.math.min import kotlin.math.sqrt @@ -25,11 +29,12 @@ sealed class Attention(name: String, info: OperatorInfo, attributes: Map { val headSize = hiddenSize / numHeads - val output = allocateNDArray(scores.type, Strides(intArrayOf(batchSize, numHeads, seqLen, headSize))) + val outputStrides = Strides(intArrayOf(batchSize, numHeads, seqLen, headSize)) + val output = context?.getNDArray(scores.type, outputStrides, fillZeros = true) ?: allocateNDArray(scores.type, outputStrides) coroutineScope { for (batchNum in 0 until batchSize) { @@ -46,6 +51,8 @@ sealed class Attention(name: String, info: OperatorInfo, attributes: Map { val headSize = hiddenSize / numHeads val pastSeqLen = past?.shape?.get(3) ?: 0 val present = makePresent(past, k, v, batchSize, seqLen, numHeads, hiddenSize) - val scores = normalizedScores(unidir, q, mask, batchSize, seqLen, pastSeqLen, headSize, numHeads, present, maskFilterValue) - return attentionScore(scores, batchSize, seqLen, numHeads, hiddenSize, present) + val scores = normalizedScores(unidir, q, mask, batchSize, seqLen, pastSeqLen, headSize, numHeads, present, maskFilterValue, context) + return attentionScore(scores, batchSize, seqLen, numHeads, hiddenSize, present, context) } private suspend fun normalizedScores( unidir: Boolean, queries: NDArrayCore, maskIndices: IntNDArray?, batchSize: Int, - seqLen: Int, pastSeqLen: Int, headSize: Int, numHeads: Int, present: NDArrayCore, maskFilterValue: Float = -10_000f + seqLen: Int, pastSeqLen: Int, headSize: Int, numHeads: Int, present: NDArrayCore, maskFilterValue: Float = -10_000f, context: ManualAllocatorContext? = null ): NumberNDArrayCore { val allSeqLen = present.shape[3] - val scores = allocateNDArray(queries.type, Strides(intArrayOf(batchSize, numHeads, seqLen, allSeqLen))) as MutableNumberNDArrayCore + val scoresStrides = Strides(intArrayOf(batchSize, numHeads, seqLen, allSeqLen)) + val scores = (context?.getNDArray(queries.type, scoresStrides, fillZeros = true) ?: allocateNDArray(queries.type, scoresStrides)) as MutableNumberNDArrayCore - val maskData = maskIndices?.maskFromIndices(unidir, batchSize, seqLen, pastSeqLen, maskFilterValue) + val maskData = maskIndices?.maskFromIndices(unidir, batchSize, seqLen, pastSeqLen, maskFilterValue, context) val alpha = 1.0 / sqrt(headSize.toDouble()) @@ -148,27 +156,38 @@ sealed class Attention(name: String, info: OperatorInfo, attributes: Map if (this != null) { //raw attention (no padding). only raw attention mask is 2-dimensional if (this.rank == 2) { - val maskPointer = mask.array.pointer(maskOffset * i) + val maskPointer = (mask as MutableFloatNDArray).array.pointer(maskOffset * i) val maskIndicesPointer = this.array.pointer(i * fullSeqLen) maskPointer.accept(maskIndicesPointer, fullSeqLen) { _, src -> if (src > 0) 0f else maskFilterValue } } else { //for left/right-side padding val maskIndicesPointer = this.array.pointer(i) - val maskPointer = mask.array.pointer(maskOffset * i + maskIndicesPointer.get()) + val maskPointer = (mask as MutableFloatNDArray).array.pointer(maskOffset * i + maskIndicesPointer.get()) maskPointer.map(fullSeqLen - maskIndicesPointer.get()) { maskFilterValue } if (this.rank == 1 && this.shape[0] == 2 * batchSize) { @@ -186,7 +205,7 @@ sealed class Attention(name: String, info: OperatorInfo, attributes: Map>, input internal suspend fun initQueryKeyValue( input: NDArrayCore, weights: NDArrayCore, bias: NDArrayCore, - batchSize: Int, seqLen: Int, hiddenSize: Int, numHeads: Int + batchSize: Int, seqLen: Int, hiddenSize: Int, numHeads: Int, context: ManualAllocatorContext? = null ): Array { input as NumberNDArrayCore val headSize = hiddenSize / numHeads - val qkv = Array(3) { allocateNDArray(input.type, Strides(intArrayOf(batchSize, numHeads, seqLen, headSize))) } + val qkvStrides = Strides(intArrayOf(batchSize, numHeads, seqLen, headSize)) + val qkv = Array(3) { context?.getNDArray(input.type, qkvStrides, fillZeros = true) ?: allocateNDArray(input.type, qkvStrides) } coroutineScope { for (qkvIdx in 0 until 3) { @@ -269,6 +289,8 @@ class AttentionVer1(name: String, attributes: Map>, input private val maskFilterValue: Float by attribute("mask_filter_value") { it: Number -> it.toFloat() } override suspend fun > apply(contexts: Contexts, inputs: List): List { + val context = coroutineContext[ManualAllocatorContext.Key] + val input = inputs[0]!! val weights = inputs[1]!! @@ -286,10 +308,10 @@ class AttentionVer1(name: String, attributes: Map>, input input.data, preparedWeights.data, preparedBias.data, - batchSize, seqLen, hiddenSize, numHeads, + batchSize, seqLen, hiddenSize, numHeads, context ) - val (scores, present) = getScores(unidir, queries, keys, values, maskIndices, past, batchSize, seqLen, numHeads, hiddenSize, maskFilterValue) - return listOf(scores.asTensor(), present.asTensor()) + val (scores, present) = getScores(unidir, queries, keys, values, maskIndices, past, batchSize, seqLen, numHeads, hiddenSize, maskFilterValue, context) + return listOf(scores.asTensor(context = context), present.asTensor(context = context)) } } diff --git a/ndarray/ndarray-api/src/commonMain/kotlin/io/kinference/ndarray/arrays/ArrayDispatcherUtils.kt b/ndarray/ndarray-api/src/commonMain/kotlin/io/kinference/ndarray/arrays/ArrayDispatcherUtils.kt deleted file mode 100644 index c52d6ffd8..000000000 --- a/ndarray/ndarray-api/src/commonMain/kotlin/io/kinference/ndarray/arrays/ArrayDispatcherUtils.kt +++ /dev/null @@ -1,19 +0,0 @@ -package io.kinference.ndarray.arrays - -enum class ArrayTypes(val index: Int, val size: Int) { - ByteArrayType(0, Byte.SIZE_BYTES), - UByteArrayType(1, UByte.SIZE_BYTES), - ShortArrayType(2, Short.SIZE_BYTES), - UShortArrayType(3, UShort.SIZE_BYTES), - IntArrayType(4, Int.SIZE_BYTES), - UIntArrayType(5, UInt.SIZE_BYTES), - LongArrayType(6, Long.SIZE_BYTES), - ULongArrayType(7, ULong.SIZE_BYTES), - FloatArrayType(8, Float.SIZE_BYTES), - DoubleArrayType(9, Double.SIZE_BYTES), - BooleanArrayType(10, 1); - - companion object { - fun sizeInBytes(index: Int, arraySize: Int): Long = entries[index].size * arraySize.toLong() - } -} diff --git a/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/AllocatorContext.kt b/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/AllocatorContext.kt deleted file mode 100644 index 7c5286a41..000000000 --- a/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/AllocatorContext.kt +++ /dev/null @@ -1,23 +0,0 @@ -package io.kinference.ndarray.arrays.memory - -import io.kinference.ndarray.arrays.* -import kotlin.coroutines.CoroutineContext - -data class AllocatorContext internal constructor( - private val unusedContainers: ArrayStorage, - private val limiter: MemoryLimiter, - private val returnStorageFn: (ArrayStorage) -> Unit -) : CoroutineContext.Element { - - companion object Key : CoroutineContext.Key - override val key: CoroutineContext.Key<*> get() = Key - - internal fun getArrayContainers(type: ArrayTypes, size: Int, count: Int): Array { - return Array(count) { unusedContainers.getArrayContainer(type, size) } - } - - fun closeAllocated() { - unusedContainers.moveUsedArrays() - returnStorageFn(unusedContainers) - } -} diff --git a/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/ArrayStorage.kt b/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/ArrayStorage.kt index 782c121c2..dcf704673 100644 --- a/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/ArrayStorage.kt +++ b/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/ArrayStorage.kt @@ -1,22 +1,10 @@ package io.kinference.ndarray.arrays.memory -import io.kinference.ndarray.arrays.ArrayTypes +import io.kinference.primitives.types.DataType -internal class ArrayStorage(typeLength: Int, sizeLength: Int, private val limiter: MemoryLimiter) { +internal abstract class BaseArrayStorage(typeLength: Int, sizeLength: Int, storageCount: Int) { /** - * This is a storage for arrays which are available for retrieving - * - * Structure is as follows: - * 1. Array by predefined types (all types are known compiled time) - * 2. Array by size. Starting with 'INIT_SIZE_VALUE' element and grow it doubling (typically there are no more than 16 different sizes) - * 3. Queue of array containers (used as FIFO) - */ - private var storageUnused: Array>> = - Array(typeLength) { Array(sizeLength) { ArrayDeque() } } - - /** - * This is a storage for arrays which are currently in use. - * They should be moved back into unused storage when there is no need for them. + * This is a storage for arrays. * * Structure is as follows: * 1. Array by predefined types (all types are known compiled time) @@ -24,45 +12,19 @@ internal class ArrayStorage(typeLength: Int, sizeLength: Int, private val limite * Starting with 'INIT_SIZE_VALUE' element and grow it doubling (typically there are no more than 16 different sizes) * 3. Queue of array containers (used as FIFO) */ - private var storageUsed: Array>> = - Array(typeLength) { Array(sizeLength) { ArrayDeque() } } + protected var storage: Array>>> = + Array(storageCount) { Array(typeLength) { Array(sizeLength) { ArrayDeque() } } } private var sizeIndices: IntArray = IntArray(typeLength) private var sizes: Array = Array(typeLength) { IntArray(sizeLength) } - internal fun getArrayContainer(type: ArrayTypes, size: Int): Any { - return if (limiter.checkMemoryLimitAndAdd(ArrayTypes.sizeInBytes(type.index, size))) { - val tIndex = type.index - val sIndex = getSizeIndex(tIndex, size) - val array = storageUnused[tIndex][sIndex].removeFirstOrNull()?.also(::resetArray) - ?: create(type, size) - - storageUsed[tIndex][sIndex].addLast(array) - array - } else { - create(type, size) - } - } - - internal fun moveUsedArrays() { - storageUsed.forEachIndexed { typeIndex, arraysByType -> - arraysByType.forEachIndexed { sizeIndex, arrayDeque -> - arrayDeque.forEach { - storageUnused[typeIndex][sizeIndex].addLast(it) - } - arrayDeque.clear() - } - } - limiter.resetLimit() - } - - private fun getSizeIndex(tIndex: Int, size: Int): Int { + protected fun getSizeIndex(tIndex: Int, size: Int): Int { val sIndex = sizes[tIndex].indexOf(size) return if (sIndex != -1) { sIndex } else { - if (sizeIndices[tIndex] >= storageUnused[tIndex].size) + if (sizeIndices[tIndex] >= storage[0][tIndex].size) grow(tIndex) val idx = sizeIndices[tIndex]++ @@ -73,37 +35,40 @@ internal class ArrayStorage(typeLength: Int, sizeLength: Int, private val limite private fun grow(typeIndex: Int) { val newSize = sizes[typeIndex].size * 2 - val newStorageUnused: Array> = Array(newSize) { ArrayDeque() } - val newStorageUsed: Array> = Array(newSize) { ArrayDeque() } + for (i in storage.indices) { + val newStorage: Array> = Array(newSize) { ArrayDeque() } - for (i in storageUnused[typeIndex].indices) { - newStorageUnused[i] = storageUnused[typeIndex][i] - newStorageUsed[i] = storageUsed[typeIndex][i] + for (j in storage[i][typeIndex].indices) { + newStorage[j] = storage[i][typeIndex][j] + } + + storage[i][typeIndex] = newStorage } - storageUnused[typeIndex] = newStorageUnused - storageUsed[typeIndex] = newStorageUsed sizes[typeIndex] = sizes[typeIndex].copyOf(newSize) } - private fun create(type: ArrayTypes, size: Int): Any { + protected fun create(type: DataType, size: Int): Any { return when (type) { - ArrayTypes.ByteArrayType -> ByteArray(size) // 8-bit signed - ArrayTypes.UByteArrayType -> UByteArray(size) // 8-bit unsigned - ArrayTypes.ShortArrayType -> ShortArray(size) // 16-bit signed - ArrayTypes.UShortArrayType -> UShortArray(size) // 16-bit unsigned - ArrayTypes.IntArrayType -> IntArray(size) // 32-bit signed - ArrayTypes.UIntArrayType -> UIntArray(size) // 32-bit unsigned - ArrayTypes.LongArrayType -> LongArray(size) // 64-bit signed - ArrayTypes.ULongArrayType -> ULongArray(size) // 64-bit unsigned - ArrayTypes.FloatArrayType -> FloatArray(size) - ArrayTypes.DoubleArrayType -> DoubleArray(size) - ArrayTypes.BooleanArrayType -> BooleanArray(size) + DataType.BYTE -> ByteArray(size) // 8-bit signed + DataType.SHORT -> ShortArray(size) // 16-bit signed + DataType.INT -> IntArray(size) // 32-bit signed + DataType.LONG -> LongArray(size) // 64-bit signed + + DataType.UBYTE -> UByteArray(size) // 8-bit unsigned + DataType.USHORT -> UShortArray(size) // 16-bit unsigned + DataType.UINT -> UIntArray(size) // 32-bit unsigned + DataType.ULONG -> ULongArray(size) // 64-bit unsigned + + DataType.FLOAT -> FloatArray(size) + DataType.DOUBLE -> DoubleArray(size) + + DataType.BOOLEAN -> BooleanArray(size) else -> throw IllegalArgumentException("Unsupported array type") } } - private fun resetArray(array: Any): Unit = + protected fun resetArray(array: Any): Unit = when (array) { is ByteArray -> array.fill(0) // 8-bit signed is UByteArray -> array.fill(0u) // 8-bit unsigned @@ -116,6 +81,59 @@ internal class ArrayStorage(typeLength: Int, sizeLength: Int, private val limite is FloatArray -> array.fill(0.0f) is DoubleArray -> array.fill(0.0) is BooleanArray -> array.fill(false) - else -> throw IllegalArgumentException("Unsupported array type") + else -> error("Unsupported array type") } } + +internal class SingleArrayStorage(typeLength: Int, sizeLength: Int, private val limiter: MemoryLimiter) : BaseArrayStorage(typeLength, sizeLength, 1) { + internal fun getArray(type: DataType, size: Int, fillZeros: Boolean = true): Any { + return if (limiter.checkMemoryLimitAndAdd(type, size)) { + val tIndex = type.ordinal + val sIndex = getSizeIndex(tIndex, size) + storage[0][tIndex][sIndex].removeFirstOrNull()?.takeIf { fillZeros }?.apply(::resetArray) ?: create(type, size) + } else { + create(type, size) + } + } + + internal fun returnArrays(type: DataType, size: Int, arrays: Array) { + val tIndex = type.ordinal + val sIndex = getSizeIndex(tIndex, size) + val queue = storage[0][tIndex][sIndex] + + queue.addAll(arrays) + } + + internal fun clear() { + storage[0].forEach { arraysBySize -> + arraysBySize.forEach { arrayDeque -> + arrayDeque.clear() + } + } + limiter.resetLimit() + } +} + +internal class ArrayStorage(typeLength: Int, sizeLength: Int, private val limiter: MemoryLimiter) : BaseArrayStorage(typeLength, sizeLength, 2) { + internal fun getArray(type: DataType, size: Int, fillZeros: Boolean = true): Any { + return if (limiter.checkMemoryLimitAndAdd(type, size)) { + val tIndex = type.ordinal + val sIndex = getSizeIndex(tIndex, size) + val array = storage[0][tIndex][sIndex].removeFirstOrNull()?.takeIf { fillZeros }?.apply(::resetArray) ?: create(type, size) + storage[1][tIndex][sIndex].add(array) + array + } else { + create(type, size) + } + } + + internal fun moveArrays() { + storage[1].forEachIndexed { typeIndex, arraysByType -> + arraysByType.forEachIndexed { sizeIndex, arrayDeque -> + storage[0][typeIndex][sizeIndex].addAll(arrayDeque) + arrayDeque.clear() + } + } + limiter.resetLimit() + } +} diff --git a/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/AutoAllocatorContext.kt b/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/AutoAllocatorContext.kt new file mode 100644 index 000000000..a9255dd93 --- /dev/null +++ b/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/AutoAllocatorContext.kt @@ -0,0 +1,23 @@ +package io.kinference.ndarray.arrays.memory + +import io.kinference.ndarray.arrays.* +import io.kinference.primitives.types.DataType +import kotlin.coroutines.CoroutineContext + +data class AutoAllocatorContext internal constructor( + private val storage: ArrayStorage, + private val returnStorageFn: (ArrayStorage) -> Unit +) : CoroutineContext.Element { + + companion object Key : CoroutineContext.Key + override val key: CoroutineContext.Key<*> get() = Key + + internal fun getArrays(type: DataType, size: Int, count: Int): Array { + return Array(count) { storage.getArray(type, size) } + } + + fun returnUsedArrays() { + storage.moveArrays() + returnStorageFn(storage) + } +} diff --git a/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/ManualAllocatorContext.kt b/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/ManualAllocatorContext.kt new file mode 100644 index 000000000..788541e6f --- /dev/null +++ b/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/ManualAllocatorContext.kt @@ -0,0 +1,53 @@ +package io.kinference.ndarray.arrays.memory + +import io.kinference.ndarray.arrays.* +import io.kinference.primitives.types.DataType +import kotlin.coroutines.CoroutineContext + +data class ManualAllocatorContext internal constructor(private val storage: SingleArrayStorage) : CoroutineContext.Element { + + companion object Key : CoroutineContext.Key + override val key: CoroutineContext.Key<*> get() = Key + + fun getNDArray(dataType: DataType, strides: Strides, fillZeros: Boolean = false): MutableNDArrayCore { + return when(dataType) { + DataType.BYTE -> ByteArrayStorageWrapper.getNDArray(strides, storage, fillZeros) + DataType.SHORT -> ShortArrayStorageWrapper.getNDArray(strides, storage, fillZeros) + DataType.INT -> IntArrayStorageWrapper.getNDArray(strides, storage, fillZeros) + DataType.LONG -> LongArrayStorageWrapper.getNDArray(strides, storage, fillZeros) + + DataType.UBYTE -> UByteArrayStorageWrapper.getNDArray(strides, storage, fillZeros) + DataType.USHORT -> UShortArrayStorageWrapper.getNDArray(strides, storage, fillZeros) + DataType.UINT -> UIntArrayStorageWrapper.getNDArray(strides, storage, fillZeros) + DataType.ULONG -> ULongArrayStorageWrapper.getNDArray(strides, storage, fillZeros) + + DataType.FLOAT -> FloatArrayStorageWrapper.getNDArray(strides, storage, fillZeros) + DataType.DOUBLE -> DoubleArrayStorageWrapper.getNDArray(strides, storage, fillZeros) + + DataType.BOOLEAN -> BooleanArrayStorageWrapper.getNDArray(strides, storage, fillZeros) + + else -> error("Unsupported array type") + } + } + + fun returnNDArray(ndArray: NDArrayCore) { + when(ndArray.type) { + DataType.BYTE -> ByteArrayStorageWrapper.returnNDArray(storage, ndArray as ByteNDArray) + DataType.SHORT -> ShortArrayStorageWrapper.returnNDArray(storage, ndArray as ShortNDArray) + DataType.INT -> IntArrayStorageWrapper.returnNDArray(storage, ndArray as IntNDArray) + DataType.LONG -> LongArrayStorageWrapper.returnNDArray(storage, ndArray as LongNDArray) + + DataType.UBYTE -> UByteArrayStorageWrapper.returnNDArray(storage, ndArray as UByteNDArray) + DataType.USHORT -> UShortArrayStorageWrapper.returnNDArray(storage, ndArray as UShortNDArray) + DataType.UINT -> UIntArrayStorageWrapper.returnNDArray(storage, ndArray as UIntNDArray) + DataType.ULONG -> ULongArrayStorageWrapper.returnNDArray(storage, ndArray as ULongNDArray) + + DataType.FLOAT -> FloatArrayStorageWrapper.returnNDArray(storage, ndArray as FloatNDArray) + DataType.DOUBLE -> DoubleArrayStorageWrapper.returnNDArray(storage, ndArray as DoubleNDArray) + + DataType.BOOLEAN -> BooleanArrayStorageWrapper.returnNDArray(storage, ndArray as BooleanNDArray) + + else -> error("Unsupported array type") + } + } +} diff --git a/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/MemoryLimiter.kt b/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/MemoryLimiter.kt index ebb86fd8c..85ed03eb1 100644 --- a/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/MemoryLimiter.kt +++ b/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/MemoryLimiter.kt @@ -1,5 +1,6 @@ package io.kinference.ndarray.arrays.memory +import io.kinference.primitives.types.DataType import io.kinference.utils.PlatformUtils import kotlinx.atomicfu.* @@ -7,10 +8,11 @@ interface MemoryLimiter { /** * Checks if the memory limit allows adding the specified amount of memory and performs the addition * - * @param added the memory in bytes to add + * @param type is the DataType of underlying primitives in a checking array + * @param size is the checking array size * @return true if the memory was added successfully and false if adding the memory exceeds the memory limit */ - fun checkMemoryLimitAndAdd(added: Long): Boolean + fun checkMemoryLimitAndAdd(type: DataType, size: Int): Boolean /** * Resets the used memory into 0L @@ -21,8 +23,9 @@ interface MemoryLimiter { class BaseMemoryLimiter internal constructor(private val memoryLimit: Long) : MemoryLimiter { private var usedMemory: AtomicLong = atomic(0L) - override fun checkMemoryLimitAndAdd(added: Long): Boolean { + override fun checkMemoryLimitAndAdd(type: DataType, size: Int): Boolean { // Attempt to add memory and check the limit + val added = sizeInBytes(type.ordinal, size) val successful = usedMemory.getAndUpdate { current -> if (current + added > memoryLimit) current else current + added } != usedMemory.value // Check if the update was successful @@ -33,10 +36,34 @@ class BaseMemoryLimiter internal constructor(private val memoryLimit: Long) : Me override fun resetLimit() { usedMemory.value = 0L } + + companion object { + private val typeSizes: LongArray = LongArray(DataType.entries.size).apply { + this[DataType.BYTE.ordinal] = Byte.SIZE_BYTES.toLong() + this[DataType.SHORT.ordinal] = Short.SIZE_BYTES.toLong() + this[DataType.INT.ordinal] = Int.SIZE_BYTES.toLong() + this[DataType.LONG.ordinal] = Long.SIZE_BYTES.toLong() + + this[DataType.UBYTE.ordinal] = UByte.SIZE_BYTES.toLong() + this[DataType.USHORT.ordinal] = UShort.SIZE_BYTES.toLong() + this[DataType.UINT.ordinal] = UInt.SIZE_BYTES.toLong() + this[DataType.ULONG.ordinal] = ULong.SIZE_BYTES.toLong() + + this[DataType.FLOAT.ordinal] = Float.SIZE_BYTES.toLong() + this[DataType.DOUBLE.ordinal] = Double.SIZE_BYTES.toLong() + + this[DataType.BOOLEAN.ordinal] = 1.toLong() + } + + private fun sizeInBytes(typeIndex: Int, size: Int): Long { + return typeSizes[typeIndex] * size + } + } } object MemoryLimiters { - val Default: MemoryLimiter = BaseMemoryLimiter((PlatformUtils.maxHeap * 0.3).toLong()) + val DefaultAutoAllocator: MemoryLimiter = BaseMemoryLimiter((PlatformUtils.maxHeap * 0.3).toLong()) + val DefaultManualAllocator: MemoryLimiter = BaseMemoryLimiter(50 * 1024 * 1024) val NoAllocator: MemoryLimiter = BaseMemoryLimiter(0L) fun customLimiter(memoryLimit: Long): MemoryLimiter { diff --git a/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/ModelArrayStorage.kt b/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/ModelArrayStorage.kt index 9c7f02aa5..0135921cb 100644 --- a/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/ModelArrayStorage.kt +++ b/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/ModelArrayStorage.kt @@ -1,34 +1,39 @@ package io.kinference.ndarray.arrays.memory -import io.kinference.ndarray.arrays.ArrayTypes +import io.kinference.primitives.types.DataType import io.kinference.utils.Closeable import java.util.concurrent.ConcurrentLinkedQueue class ModelArrayStorage(private val limiter: MemoryLimiter = MemoryLimiters.NoAllocator) : Closeable { - private val unusedArrays: ConcurrentLinkedQueue = ConcurrentLinkedQueue() + private val autoStorageQueue: ConcurrentLinkedQueue = ConcurrentLinkedQueue() companion object { private const val INIT_SIZE_VALUE: Int = 2 - private val typeSize: Int = ArrayTypes.entries.size + private val typeSize: Int = DataType.entries.size } - fun createAllocatorContext(): AllocatorContext { - return AllocatorContext(getStorage(), limiter, ::returnStorage) + fun createAutoAllocatorContext(): AutoAllocatorContext { + return AutoAllocatorContext(getStorage(autoStorageQueue), ::returnStorage) + } + + fun createManualAllocatorContext(): ManualAllocatorContext { + limiter.resetLimit() + return ManualAllocatorContext(SingleArrayStorage(typeSize, INIT_SIZE_VALUE, limiter)) } fun clearCache() { - unusedArrays.clear() + autoStorageQueue.clear() } override suspend fun close() { clearCache() } - private fun getStorage(): ArrayStorage { - return unusedArrays.poll() ?: ArrayStorage(typeSize, INIT_SIZE_VALUE, limiter) + private fun getStorage(queue: ConcurrentLinkedQueue): ArrayStorage { + return queue.poll() ?: ArrayStorage(typeSize, INIT_SIZE_VALUE, limiter) } private fun returnStorage(storage: ArrayStorage) { - unusedArrays.offer(storage) + autoStorageQueue.offer(storage) } } diff --git a/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/PrimitiveArrayStorageWrapper.kt b/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/PrimitiveArrayStorageWrapper.kt new file mode 100644 index 000000000..52921ced8 --- /dev/null +++ b/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/PrimitiveArrayStorageWrapper.kt @@ -0,0 +1,30 @@ +@file:GeneratePrimitives(DataType.ALL) +package io.kinference.ndarray.arrays.memory + +import io.kinference.ndarray.arrays.* +import io.kinference.ndarray.arrays.PrimitiveNDArray +import io.kinference.ndarray.arrays.tiled.PrimitiveTiledArray +import io.kinference.ndarray.blockSizeByStrides +import io.kinference.primitives.annotations.* +import io.kinference.primitives.types.DataType +import io.kinference.primitives.types.PrimitiveArray + +@GenerateNameFromPrimitives +internal object PrimitiveArrayStorageWrapper { + private val type = DataType.CurrentPrimitive + + fun getNDArray(strides: Strides, storage: SingleArrayStorage, fillZeros: Boolean = false): MutablePrimitiveNDArray { + val blockSize = blockSizeByStrides(strides) + val blocksNum = strides.linearSize / blockSize + val blocks = Array(blocksNum) { storage.getArray(type, blockSize, fillZeros) } + val typedBlocks = blocks.map { it as PrimitiveArray }.toTypedArray() + val tiled = PrimitiveTiledArray(typedBlocks) + + return MutablePrimitiveNDArray(tiled, strides) + } + + fun returnNDArray(storage: SingleArrayStorage, ndarray: PrimitiveNDArray) { + val blockSize = ndarray.array.blockSize + storage.returnArrays(type, blockSize, ndarray.array.blocks as Array) + } +} diff --git a/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/tiled/PrimitiveTiledArray.kt b/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/tiled/PrimitiveTiledArray.kt index 07cbf57f1..4469e9d4e 100644 --- a/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/tiled/PrimitiveTiledArray.kt +++ b/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/tiled/PrimitiveTiledArray.kt @@ -26,7 +26,7 @@ internal class PrimitiveTiledArray(val blocks: Array) { } companion object { - val type: ArrayTypes = ArrayTypes.valueOf(PrimitiveArray::class.simpleName!! + "Type") + val type: DataType = DataType.CurrentPrimitive suspend operator fun invoke(strides: Strides): PrimitiveTiledArray { val blockSize = blockSizeByStrides(strides) @@ -59,8 +59,8 @@ internal class PrimitiveTiledArray(val blocks: Array) { val blocksNum = if (blockSize == 0) 0 else size / blockSize - val coroutineContext = coroutineContext[AllocatorContext.Key] - val blocks = coroutineContext?.getArrayContainers(type, blockSize, blocksNum) ?: Array(blocksNum) { PrimitiveArray(blockSize) } + val coroutineContext = coroutineContext[AutoAllocatorContext.Key] + val blocks = coroutineContext?.getArrays(type, blockSize, blocksNum) ?: Array(blocksNum) { PrimitiveArray(blockSize) } return PrimitiveTiledArray(blocks.map { it as PrimitiveArray }.toTypedArray()) } diff --git a/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/extensions/constants/BooleanConstants.kt b/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/extensions/constants/BooleanConstants.kt new file mode 100644 index 000000000..00f4767fa --- /dev/null +++ b/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/extensions/constants/BooleanConstants.kt @@ -0,0 +1,6 @@ +package io.kinference.ndarray.extensions.constants + +object BooleanConstants { + const val ZERO = false + const val ONE = true +} From f3346323dce2960e1ca722472a0f5bb50cf4a025 Mon Sep 17 00:00:00 2001 From: dmitriyb Date: Wed, 21 Aug 2024 13:02:53 +0200 Subject: [PATCH 05/19] JBAI-4393 [core, ndarray] Refactored memory management and array handling Added manual NDArray handling, refactored existing operations to use standard DataType enum instead of ArrayTypes, and optimized memory allocations across multiple modules. --- .../io/kinference.core/model/KIModel.kt | 4 +- .../normalization/EmbedLayerNormalization.kt | 34 ++- .../normalization/SkipLayerNormalization.kt | 10 +- .../io/kinference.core/operators/math/Add.kt | 18 +- .../operators/math/BiasGelu.kt | 10 +- .../kinference.core/operators/math/MatMul.kt | 16 +- .../kinference.core/operators/tensor/Cast.kt | 285 +++++++++--------- .../io/kinference/models/bert/BERTTest.kt | 2 +- .../ndarray/extensions/PrimitiveExtensions.kt | 43 +-- .../ndarray/extensions/gelu/BiasGelu.kt | 14 + .../extensions/gelu/BiasGeluPrimitive.kt | 9 +- 11 files changed, 259 insertions(+), 186 deletions(-) diff --git a/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/model/KIModel.kt b/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/model/KIModel.kt index 35d554631..837d222da 100644 --- a/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/model/KIModel.kt +++ b/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/model/KIModel.kt @@ -24,7 +24,7 @@ class KIModel( @OptIn(ExperimentalCoroutinesApi::class) private val dispatcher: CoroutineDispatcher = Dispatchers.Default.limitedParallelism(parallelismLimit) - private val modelArrayStorage: ModelArrayStorage = ModelArrayStorage(memoryLimiter) + private val modelArrayStorage: ModelArrayStorage = ModelArrayStorage(MemoryLimiters.DefaultManualAllocator) override fun addProfilingContext(name: String): ProfilingContext = ProfilingContext(name).apply { profiles.add(this) } override fun analyzeProfilingResults(): ProfileAnalysisEntry = profiles.analyze("Model $name") @@ -44,7 +44,7 @@ class KIModel( coreReserved = true } - when (memoryLimiter) { + when (MemoryLimiters.DefaultManualAllocator) { MemoryLimiters.NoAllocator -> { withContext(limiterContext) { return@withContext graph.execute(input, contexts) diff --git a/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/operators/layer/normalization/EmbedLayerNormalization.kt b/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/operators/layer/normalization/EmbedLayerNormalization.kt index b1861b281..f2be9a212 100644 --- a/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/operators/layer/normalization/EmbedLayerNormalization.kt +++ b/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/operators/layer/normalization/EmbedLayerNormalization.kt @@ -1,15 +1,17 @@ package io.kinference.core.operators.layer.normalization import io.kinference.attribute.Attribute -import io.kinference.core.data.tensor.KITensor -import io.kinference.core.data.tensor.asONNXTensors +import io.kinference.core.data.tensor.* import io.kinference.data.ONNXData import io.kinference.graph.Contexts import io.kinference.ndarray.arrays.* +import io.kinference.ndarray.arrays.memory.ManualAllocatorContext import io.kinference.ndarray.arrays.pointers.* import io.kinference.operator.* +import io.kinference.primitives.types.DataType import io.kinference.protobuf.message.AttributeProto.AttributeType import io.kinference.protobuf.message.TensorProto +import kotlin.coroutines.coroutineContext import kotlin.math.sqrt sealed class EmbedLayerNormalization( @@ -73,9 +75,12 @@ class EmbedLayerNormalizationVer1( private data class NormalizeResult(val output: FloatNDArray, val embeddingSum: FloatNDArray) - internal suspend fun createMaskIndices(mask: IntNDArray?, batchSize: Int, seqLen: Int): NumberNDArrayCore { - val maskIndices = MutableIntNDArray(intArrayOf(batchSize)) - if (mask == null) return maskIndices + internal suspend fun createMaskIndices(mask: IntNDArray?, batchSize: Int, seqLen: Int, context: ManualAllocatorContext? = null): NumberNDArrayCore { + val strides = Strides(intArrayOf(batchSize)) + val maskIndices = (context?.getNDArray(DataType.INT, strides) ?: MutableIntNDArray(strides)) as MutableIntNDArray + + if (mask == null) + return maskIndices.also { it.fill(0) } val pointer = mask.array.pointer() val maskIndicesPointer = maskIndices.array.pointer() @@ -95,12 +100,15 @@ class EmbedLayerNormalizationVer1( private suspend fun normalize( epsilon: Float, inputIds: IntNDArray, segmentIds: IntNDArray?, wordEmbed: FloatNDArray, posEmbed: FloatNDArray, - segmentEmbed: FloatNDArray?, gamma: FloatNDArray, beta: FloatNDArray, positionIds: IntNDArray? + segmentEmbed: FloatNDArray?, gamma: FloatNDArray, beta: FloatNDArray, positionIds: IntNDArray?, context: ManualAllocatorContext? = null ): NormalizeResult { val (batchSize, seqLen) = inputIds.shape val (_, hiddenSize) = wordEmbed.shape - val output = MutableFloatNDArray(intArrayOf(batchSize, seqLen, hiddenSize)) - val embeddingSum = MutableFloatNDArray(intArrayOf(batchSize, seqLen, hiddenSize)) + + val outputStrides = Strides(intArrayOf(batchSize, seqLen, hiddenSize)) + + val output = (context?.getNDArray(DataType.FLOAT, outputStrides, fillZeros = false) ?: MutableFloatNDArray(outputStrides)) as MutableFloatNDArray + val embeddingSum = (context?.getNDArray(DataType.FLOAT, outputStrides, fillZeros = false) ?: MutableFloatNDArray(outputStrides)) as MutableFloatNDArray for (batch in 0 until batchSize) { val blockIdx = batch * seqLen @@ -167,6 +175,8 @@ class EmbedLayerNormalizationVer1( } override suspend fun > apply(contexts: Contexts, inputs: List): List { + val manualContext = coroutineContext[ManualAllocatorContext.Key] + val inputIds = inputs[0]!!.data as IntNDArray val segmentIds = inputs[1]?.data as IntNDArray? val wordEmbed = inputs[2]!!.data as FloatNDArray @@ -177,8 +187,12 @@ class EmbedLayerNormalizationVer1( val mask = inputs.getOrNull(7)?.data as IntNDArray? val positionIds = inputs.getOrNull(8)?.data as IntNDArray? - val (normalized, embedSum) = normalize(epsilon, inputIds, segmentIds, wordEmbed, posEmbed, segmentEmbed, gamma, beta, positionIds) + val (normalized, embedSum) = normalize(epsilon, inputIds, segmentIds, wordEmbed, posEmbed, segmentEmbed, gamma, beta, positionIds, manualContext) val maskIndices = createMaskIndices(mask, inputIds.shape[0], inputIds.shape[1]) - return listOf(normalized, maskIndices, embedSum).asONNXTensors(outputs) + return listOf( + normalized.asTensor(context = manualContext), + maskIndices.asTensor(context = manualContext), + embedSum.asTensor(context = manualContext) + ) } } diff --git a/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/operators/layer/normalization/SkipLayerNormalization.kt b/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/operators/layer/normalization/SkipLayerNormalization.kt index 6b6243ba3..75320199f 100644 --- a/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/operators/layer/normalization/SkipLayerNormalization.kt +++ b/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/operators/layer/normalization/SkipLayerNormalization.kt @@ -7,10 +7,13 @@ import io.kinference.data.ONNXData import io.kinference.graph.Contexts import io.kinference.ndarray.arrays.FloatNDArray import io.kinference.ndarray.arrays.MutableFloatNDArray +import io.kinference.ndarray.arrays.memory.ManualAllocatorContext import io.kinference.ndarray.arrays.pointers.* import io.kinference.operator.* +import io.kinference.primitives.types.DataType import io.kinference.protobuf.message.AttributeProto import io.kinference.protobuf.message.TensorProto +import kotlin.coroutines.coroutineContext import kotlin.math.sqrt sealed class SkipLayerNormalization(name: String, info: OperatorInfo, attributes: Map>, inputs: List, outputs: List) : Operator(name, info, attributes, inputs, outputs) { @@ -104,8 +107,10 @@ class SkipLayerNormalizationVer1(name: String, attributes: Map> apply(contexts: Contexts, inputs: List): List { + val manualContext = coroutineContext[ManualAllocatorContext.Key] + val input = inputs[0]!!.data as FloatNDArray - val output = MutableFloatNDArray(input.strides) + val output = (manualContext?.getNDArray(DataType.FLOAT, input.strides, fillZeros = false) ?: MutableFloatNDArray(input.strides)) as MutableFloatNDArray input.normalize( skip = inputs[1]!!.data as FloatNDArray, gamma = inputs[2]!!.data as FloatNDArray, @@ -114,6 +119,7 @@ class SkipLayerNormalizationVer1(name: String, attributes: Map>, inputs: List, outputs: List) : Operator(name, info, attributes, inputs, outputs) { companion object { @@ -52,7 +55,16 @@ class AddVer7(name: String, attributes: Map>, inputs: Lis } override suspend fun > apply(contexts: Contexts, inputs: List): List { - val result = (inputs[0]!!.data as NumberNDArrayCore) + (inputs[1]!!.data as NumberNDArrayCore) - return listOf(result.asTensor("C")) + val manualContext = coroutineContext[ManualAllocatorContext.Key] + + val left = inputs[0]!!.data as NumberNDArrayCore + val right = inputs[1]!!.data as NumberNDArrayCore + + val destShape = broadcastShape(listOf(left.shape, right.shape)) + val destStrides = Strides(destShape) + val dest = (manualContext?.getNDArray(left.type, destStrides) ?: allocateNDArray(left.type, destStrides)) as MutableNumberNDArrayCore + + val result = left.plus(right, dest) //(inputs[0]!!.data as NumberNDArrayCore) + (inputs[1]!!.data as NumberNDArrayCore) + return listOf(result.asTensor("C", manualContext)) } } diff --git a/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/operators/math/BiasGelu.kt b/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/operators/math/BiasGelu.kt index 0be701b4e..f2d8d01b3 100644 --- a/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/operators/math/BiasGelu.kt +++ b/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/operators/math/BiasGelu.kt @@ -5,9 +5,13 @@ import io.kinference.core.data.tensor.KITensor import io.kinference.core.data.tensor.asTensor import io.kinference.data.ONNXData import io.kinference.graph.Contexts +import io.kinference.ndarray.arrays.MutableNumberNDArrayCore import io.kinference.ndarray.arrays.NumberNDArrayCore +import io.kinference.ndarray.arrays.memory.ManualAllocatorContext +import io.kinference.ndarray.extensions.allocateNDArray import io.kinference.ndarray.extensions.gelu.biasGelu import io.kinference.operator.* +import kotlin.coroutines.coroutineContext sealed class BiasGelu(name: String, info: OperatorInfo, attributes: Map>, inputs: List, outputs: List) : Operator(name, info, attributes, inputs, outputs) { companion object { @@ -39,16 +43,20 @@ class BiasGeluVer1(name: String, attributes: Map> = empty } override suspend fun > apply(contexts: Contexts, inputs: List): List { + val manualContext = coroutineContext[ManualAllocatorContext.Key] + val input = inputs[0]!!.data as NumberNDArrayCore val bias = inputs[1]!!.data as NumberNDArrayCore require(input.shape.last() == bias.shape.last()) { "Last dimensions of input and bias tensors must be equal" } + val dest = (manualContext?.getNDArray(input.type, input.strides) ?: allocateNDArray(input.type, input.strides)) as MutableNumberNDArrayCore + // Uses ERF formula with fractional error less than x.xx * 10 ^ -4. // Algorithm 26.2.17 in Abromowitz and Stegun, Handbook of Mathematical. // Another possible ERF implementation (several ms faster): // https://github.com/apache/commons-numbers/blob/master/commons-numbers-gamma/src/main/java/org/apache/commons/numbers/gamma/BoostErf.java - return listOf(biasGelu(input, bias).asTensor("C")) + return listOf(biasGelu(input, bias, dest).asTensor("C", manualContext)) } } diff --git a/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/operators/math/MatMul.kt b/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/operators/math/MatMul.kt index 8c1735ea3..e3baa2e4e 100644 --- a/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/operators/math/MatMul.kt +++ b/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/operators/math/MatMul.kt @@ -5,9 +5,13 @@ import io.kinference.core.data.tensor.KITensor import io.kinference.core.data.tensor.asTensor import io.kinference.data.ONNXData import io.kinference.graph.Contexts -import io.kinference.ndarray.arrays.NumberNDArrayCore +import io.kinference.ndarray.arrays.* +import io.kinference.ndarray.arrays.memory.ManualAllocatorContext +import io.kinference.ndarray.broadcasting.Broadcasting +import io.kinference.ndarray.extensions.allocateNDArray import io.kinference.operator.* import io.kinference.protobuf.message.TensorProto +import kotlin.coroutines.coroutineContext sealed class MatMul(name: String, info: OperatorInfo, attributes: Map>, inputs: List, outputs: List) : Operator(name, info, attributes, inputs, outputs) { companion object { @@ -46,8 +50,16 @@ class MatMulVer1(name: String, attributes: Map>, inputs: } override suspend fun > apply(contexts: Contexts, inputs: List): List { + val manualContext = coroutineContext[ManualAllocatorContext.Key] + val first = inputs[0]!!.data as NumberNDArrayCore val second = inputs[1]!!.data as NumberNDArrayCore - return listOf((first.matmul(second)).asTensor("Y")) + + val destShape = Broadcasting.broadcastShapeForMatmul(first.shape, second.shape) + val destStrides = Strides(destShape) + + val dest = (manualContext?.getNDArray(first.type, destStrides, fillZeros = true) ?: allocateNDArray(first.type, destStrides)) as MutableNumberNDArrayCore + + return listOf((first.matmul(second, dest)).asTensor("Y", manualContext)) } } diff --git a/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/operators/tensor/Cast.kt b/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/operators/tensor/Cast.kt index 5ce45c866..742fd7c2d 100644 --- a/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/operators/tensor/Cast.kt +++ b/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/operators/tensor/Cast.kt @@ -6,6 +6,7 @@ import io.kinference.core.data.tensor.asTensor import io.kinference.data.ONNXData import io.kinference.graph.Contexts import io.kinference.ndarray.arrays.* +import io.kinference.ndarray.arrays.memory.ManualAllocatorContext import io.kinference.ndarray.arrays.pointers.mapTo import io.kinference.ndarray.arrays.tiled.* import io.kinference.operator.* @@ -13,6 +14,7 @@ import io.kinference.primitives.types.DataType import io.kinference.protobuf.FLOAT_TENSOR_TYPES import io.kinference.protobuf.message.AttributeProto import io.kinference.protobuf.message.TensorProto +import kotlin.coroutines.coroutineContext sealed class Cast(name: String, info: OperatorInfo, attributes: Map>, inputs: List, outputs: List) : Operator(name, info, attributes, inputs, outputs) { companion object { @@ -41,65 +43,65 @@ class CastVer6(name: String, attributes: Map>, inputs: Li internal val VERSION = VersionInfo(sinceVersion = 6) private val INFO = OperatorInfo("Cast", ATTRIBUTES_INFO, INPUTS_INFO, OUTPUTS_INFO, VERSION, OperatorInfo.DEFAULT_DOMAIN) - private suspend fun castByte(array: ByteNDArray, to: TensorProto.DataType): NDArrayCore { + private suspend fun castByte(array: ByteNDArray, to: TensorProto.DataType, context: ManualAllocatorContext? = null): NDArrayCore { return when (to) { in FLOAT_TENSOR_TYPES -> { - val output = FloatNDArray(FloatTiledArray(array.shape), array.strides) + val output = (context?.getNDArray(DataType.FLOAT, array.strides) ?: FloatNDArray(FloatTiledArray(array.shape), array.strides)) as FloatNDArray array.array.pointer().mapTo(output.array.pointer(), array.linearSize) { it.toFloat() } output } TensorProto.DataType.UINT8 -> { - val output = UByteNDArray(UByteTiledArray(array.shape), array.strides) + val output = (context?.getNDArray(DataType.UBYTE, array.strides) ?: UByteNDArray(UByteTiledArray(array.shape), array.strides)) as UByteNDArray array.array.pointer().mapTo(output.array.pointer(), array.linearSize) { it.toUByte() } output } TensorProto.DataType.INT8 -> array TensorProto.DataType.UINT16 -> { - val output = UShortNDArray(UShortTiledArray(array.shape), array.strides) + val output = (context?.getNDArray(DataType.USHORT, array.strides) ?: UShortNDArray(UShortTiledArray(array.shape), array.strides)) as UShortNDArray array.array.pointer().mapTo(output.array.pointer(), array.linearSize) { it.toUShort() } output } TensorProto.DataType.INT16 -> { - val output = ShortNDArray(ShortTiledArray(array.shape), array.strides) + val output = (context?.getNDArray(DataType.SHORT, array.strides) ?: ShortNDArray(ShortTiledArray(array.shape), array.strides)) as ShortNDArray array.array.pointer().mapTo(output.array.pointer(), array.linearSize) { it.toShort() } output } TensorProto.DataType.INT32 -> { - val output = IntNDArray(IntTiledArray(array.shape), array.strides) + val output = (context?.getNDArray(DataType.INT, array.strides) ?: IntNDArray(IntTiledArray(array.shape), array.strides)) as IntNDArray array.array.pointer().mapTo(output.array.pointer(), array.linearSize) { it.toInt() } output } TensorProto.DataType.INT64 -> { - val output = LongNDArray(LongTiledArray(array.shape), array.strides) + val output = (context?.getNDArray(DataType.LONG, array.strides) ?: LongNDArray(LongTiledArray(array.shape), array.strides)) as LongNDArray array.array.pointer().mapTo(output.array.pointer(), array.linearSize) { it.toLong() } output } TensorProto.DataType.BOOL -> { - val output = BooleanNDArray(BooleanTiledArray(array.shape), array.strides) + val output = (context?.getNDArray(DataType.BOOLEAN, array.strides) ?: BooleanNDArray(BooleanTiledArray(array.shape), array.strides)) as BooleanNDArray array.array.pointer().mapTo(output.array.pointer(), array.linearSize) { it != (0).toByte() } output } TensorProto.DataType.DOUBLE -> { - val output = DoubleNDArray(DoubleTiledArray(array.shape), array.strides) + val output = (context?.getNDArray(DataType.DOUBLE, array.strides) ?: DoubleNDArray(DoubleTiledArray(array.shape), array.strides)) as DoubleNDArray array.array.pointer().mapTo(output.array.pointer(), array.linearSize) { it.toDouble() } output } TensorProto.DataType.UINT32 -> { - val output = UIntNDArray(UIntTiledArray(array.shape), array.strides) + val output = (context?.getNDArray(DataType.UINT, array.strides) ?: UIntNDArray(UIntTiledArray(array.shape), array.strides)) as UIntNDArray array.array.pointer().mapTo(output.array.pointer(), array.linearSize) { it.toUInt() } output } TensorProto.DataType.UINT64 -> { - val output = ULongNDArray(ULongTiledArray(array.shape), array.strides) + val output = (context?.getNDArray(DataType.ULONG, array.strides) ?: ULongNDArray(ULongTiledArray(array.shape), array.strides)) as ULongNDArray array.array.pointer().mapTo(output.array.pointer(), array.linearSize) { it.toULong() } output } @@ -108,65 +110,65 @@ class CastVer6(name: String, attributes: Map>, inputs: Li } } - private suspend fun castShort(array: ShortNDArray, to: TensorProto.DataType): NDArrayCore { + private suspend fun castShort(array: ShortNDArray, to: TensorProto.DataType, context: ManualAllocatorContext? = null): NDArrayCore { return when (to) { in FLOAT_TENSOR_TYPES -> { - val output = FloatNDArray(FloatTiledArray(array.shape), array.strides) + val output = (context?.getNDArray(DataType.FLOAT, array.strides) ?: FloatNDArray(FloatTiledArray(array.shape), array.strides)) as FloatNDArray array.array.pointer().mapTo(output.array.pointer(), array.linearSize) { it.toFloat() } output } TensorProto.DataType.UINT8 -> { - val output = UByteNDArray(UByteTiledArray(array.shape), array.strides) + val output = (context?.getNDArray(DataType.UBYTE, array.strides) ?: UByteNDArray(UByteTiledArray(array.shape), array.strides)) as UByteNDArray array.array.pointer().mapTo(output.array.pointer(), array.linearSize) { it.toUByte() } output } TensorProto.DataType.INT8 -> { - val output = ByteNDArray(ByteTiledArray(array.shape), array.strides) + val output = (context?.getNDArray(DataType.BYTE, array.strides) ?: ByteNDArray(ByteTiledArray(array.shape), array.strides)) as ByteNDArray array.array.pointer().mapTo(output.array.pointer(), array.linearSize) { it.toByte() } output } TensorProto.DataType.UINT16 -> { - val output = UShortNDArray(UShortTiledArray(array.shape), array.strides) + val output = (context?.getNDArray(DataType.USHORT, array.strides) ?: UShortNDArray(UShortTiledArray(array.shape), array.strides)) as UShortNDArray array.array.pointer().mapTo(output.array.pointer(), array.linearSize) { it.toUShort() } output } TensorProto.DataType.INT16 -> array TensorProto.DataType.INT32 -> { - val output = IntNDArray(IntTiledArray(array.shape), array.strides) + val output = (context?.getNDArray(DataType.INT, array.strides) ?: IntNDArray(IntTiledArray(array.shape), array.strides)) as IntNDArray array.array.pointer().mapTo(output.array.pointer(), array.linearSize) { it.toInt() } output } TensorProto.DataType.INT64 -> { - val output = LongNDArray(LongTiledArray(array.shape), array.strides) + val output = (context?.getNDArray(DataType.LONG, array.strides) ?: LongNDArray(LongTiledArray(array.shape), array.strides)) as LongNDArray array.array.pointer().mapTo(output.array.pointer(), array.linearSize) { it.toLong() } output } TensorProto.DataType.BOOL -> { - val output = BooleanNDArray(BooleanTiledArray(array.shape), array.strides) + val output = (context?.getNDArray(DataType.BOOLEAN, array.strides) ?: BooleanNDArray(BooleanTiledArray(array.shape), array.strides)) as BooleanNDArray array.array.pointer().mapTo(output.array.pointer(), array.linearSize) { it != (0).toShort() } output } TensorProto.DataType.DOUBLE -> { - val output = DoubleNDArray(DoubleTiledArray(array.shape), array.strides) + val output = (context?.getNDArray(DataType.DOUBLE, array.strides) ?: DoubleNDArray(DoubleTiledArray(array.shape), array.strides)) as DoubleNDArray array.array.pointer().mapTo(output.array.pointer(), array.linearSize) { it.toDouble() } output } TensorProto.DataType.UINT32 -> { - val output = UIntNDArray(UIntTiledArray(array.shape), array.strides) + val output = (context?.getNDArray(DataType.UINT, array.strides) ?: UIntNDArray(UIntTiledArray(array.shape), array.strides)) as UIntNDArray array.array.pointer().mapTo(output.array.pointer(), array.linearSize) { it.toUInt() } output } TensorProto.DataType.UINT64 -> { - val output = ULongNDArray(ULongTiledArray(array.shape), array.strides) + val output = (context?.getNDArray(DataType.ULONG, array.strides) ?: ULongNDArray(ULongTiledArray(array.shape), array.strides)) as ULongNDArray array.array.pointer().mapTo(output.array.pointer(), array.linearSize) { it.toULong() } output } @@ -175,66 +177,65 @@ class CastVer6(name: String, attributes: Map>, inputs: Li } } - - private suspend fun castInt(array: IntNDArray, to: TensorProto.DataType): NDArrayCore { + private suspend fun castInt(array: IntNDArray, to: TensorProto.DataType, context: ManualAllocatorContext? = null): NDArrayCore { return when (to) { in FLOAT_TENSOR_TYPES -> { - val output = FloatNDArray(FloatTiledArray(array.shape), array.strides) + val output = (context?.getNDArray(DataType.FLOAT, array.strides) ?: FloatNDArray(FloatTiledArray(array.shape), array.strides)) as FloatNDArray array.array.pointer().mapTo(output.array.pointer(), array.linearSize) { it.toFloat() } output } TensorProto.DataType.UINT8 -> { - val output = UByteNDArray(UByteTiledArray(array.shape), array.strides) + val output = (context?.getNDArray(DataType.UBYTE, array.strides) ?: UByteNDArray(UByteTiledArray(array.shape), array.strides)) as UByteNDArray array.array.pointer().mapTo(output.array.pointer(), array.linearSize) { it.toUByte() } output } TensorProto.DataType.INT8 -> { - val output = ByteNDArray(ByteTiledArray(array.shape), array.strides) + val output = (context?.getNDArray(DataType.BYTE, array.strides) ?: ByteNDArray(ByteTiledArray(array.shape), array.strides)) as ByteNDArray array.array.pointer().mapTo(output.array.pointer(), array.linearSize) { it.toByte() } output } TensorProto.DataType.UINT16 -> { - val output = UShortNDArray(UShortTiledArray(array.shape), array.strides) + val output = (context?.getNDArray(DataType.USHORT, array.strides) ?: UShortNDArray(UShortTiledArray(array.shape), array.strides)) as UShortNDArray array.array.pointer().mapTo(output.array.pointer(), array.linearSize) { it.toUShort() } output } TensorProto.DataType.INT16 -> { - val output = ShortNDArray(ShortTiledArray(array.shape), array.strides) + val output = (context?.getNDArray(DataType.SHORT, array.strides) ?: ShortNDArray(ShortTiledArray(array.shape), array.strides)) as ShortNDArray array.array.pointer().mapTo(output.array.pointer(), array.linearSize) { it.toShort() } output } TensorProto.DataType.INT32 -> array TensorProto.DataType.INT64 -> { - val output = LongNDArray(LongTiledArray(array.shape), array.strides) + val output = (context?.getNDArray(DataType.LONG, array.strides) ?: LongNDArray(LongTiledArray(array.shape), array.strides)) as LongNDArray array.array.pointer().mapTo(output.array.pointer(), array.linearSize) { it.toLong() } output } TensorProto.DataType.BOOL -> { - val output = BooleanNDArray(BooleanTiledArray(array.shape), array.strides) - array.array.pointer().mapTo(output.array.pointer(), array.linearSize) { it != 0 } + val output = (context?.getNDArray(DataType.BOOLEAN, array.strides) ?: BooleanNDArray(BooleanTiledArray(array.shape), array.strides)) as BooleanNDArray + array.array.pointer().mapTo(output.array.pointer(), array.linearSize) { it != (0).toInt() } output } TensorProto.DataType.DOUBLE -> { - val output = DoubleNDArray(DoubleTiledArray(array.shape), array.strides) + val output = (context?.getNDArray(DataType.DOUBLE, array.strides) ?: DoubleNDArray(DoubleTiledArray(array.shape), array.strides)) as DoubleNDArray array.array.pointer().mapTo(output.array.pointer(), array.linearSize) { it.toDouble() } output } TensorProto.DataType.UINT32 -> { - val output = UIntNDArray(UIntTiledArray(array.shape), array.strides) + val output = (context?.getNDArray(DataType.UINT, array.strides) ?: UIntNDArray(UIntTiledArray(array.shape), array.strides)) as UIntNDArray array.array.pointer().mapTo(output.array.pointer(), array.linearSize) { it.toUInt() } output } TensorProto.DataType.UINT64 -> { - val output = ULongNDArray(ULongTiledArray(array.shape), array.strides) + val output = (context?.getNDArray(DataType.ULONG, array.strides) ?: ULongNDArray(ULongTiledArray(array.shape), array.strides)) as ULongNDArray array.array.pointer().mapTo(output.array.pointer(), array.linearSize) { it.toULong() } output } @@ -243,66 +244,65 @@ class CastVer6(name: String, attributes: Map>, inputs: Li } } - - private suspend fun castLong(array: LongNDArray, to: TensorProto.DataType): NDArrayCore { + private suspend fun castLong(array: LongNDArray, to: TensorProto.DataType, context: ManualAllocatorContext? = null): NDArrayCore { return when (to) { in FLOAT_TENSOR_TYPES -> { - val output = FloatNDArray(FloatTiledArray(array.shape), array.strides) + val output = (context?.getNDArray(DataType.FLOAT, array.strides) ?: FloatNDArray(FloatTiledArray(array.shape), array.strides)) as FloatNDArray array.array.pointer().mapTo(output.array.pointer(), array.linearSize) { it.toFloat() } output } TensorProto.DataType.UINT8 -> { - val output = UByteNDArray(UByteTiledArray(array.shape), array.strides) + val output = (context?.getNDArray(DataType.UBYTE, array.strides) ?: UByteNDArray(UByteTiledArray(array.shape), array.strides)) as UByteNDArray array.array.pointer().mapTo(output.array.pointer(), array.linearSize) { it.toUByte() } output } TensorProto.DataType.INT8 -> { - val output = ByteNDArray(ByteTiledArray(array.shape), array.strides) + val output = (context?.getNDArray(DataType.BYTE, array.strides) ?: ByteNDArray(ByteTiledArray(array.shape), array.strides)) as ByteNDArray array.array.pointer().mapTo(output.array.pointer(), array.linearSize) { it.toByte() } output } TensorProto.DataType.UINT16 -> { - val output = UShortNDArray(UShortTiledArray(array.shape), array.strides) + val output = (context?.getNDArray(DataType.USHORT, array.strides) ?: UShortNDArray(UShortTiledArray(array.shape), array.strides)) as UShortNDArray array.array.pointer().mapTo(output.array.pointer(), array.linearSize) { it.toUShort() } output } TensorProto.DataType.INT16 -> { - val output = ShortNDArray(ShortTiledArray(array.shape), array.strides) + val output = (context?.getNDArray(DataType.SHORT, array.strides) ?: ShortNDArray(ShortTiledArray(array.shape), array.strides)) as ShortNDArray array.array.pointer().mapTo(output.array.pointer(), array.linearSize) { it.toShort() } output } TensorProto.DataType.INT32 -> { - val output = IntNDArray(IntTiledArray(array.shape), array.strides) + val output = (context?.getNDArray(DataType.INT, array.strides) ?: IntNDArray(IntTiledArray(array.shape), array.strides)) as IntNDArray array.array.pointer().mapTo(output.array.pointer(), array.linearSize) { it.toInt() } output } TensorProto.DataType.INT64 -> array TensorProto.DataType.BOOL -> { - val output = BooleanNDArray(BooleanTiledArray(array.shape), array.strides) + val output = (context?.getNDArray(DataType.BOOLEAN, array.strides) ?: BooleanNDArray(BooleanTiledArray(array.shape), array.strides)) as BooleanNDArray array.array.pointer().mapTo(output.array.pointer(), array.linearSize) { it != 0L } output } TensorProto.DataType.DOUBLE -> { - val output = DoubleNDArray(DoubleTiledArray(array.shape), array.strides) + val output = (context?.getNDArray(DataType.DOUBLE, array.strides) ?: DoubleNDArray(DoubleTiledArray(array.shape), array.strides)) as DoubleNDArray array.array.pointer().mapTo(output.array.pointer(), array.linearSize) { it.toDouble() } output } TensorProto.DataType.UINT32 -> { - val output = UIntNDArray(UIntTiledArray(array.shape), array.strides) + val output = (context?.getNDArray(DataType.UINT, array.strides) ?: UIntNDArray(UIntTiledArray(array.shape), array.strides)) as UIntNDArray array.array.pointer().mapTo(output.array.pointer(), array.linearSize) { it.toUInt() } output } TensorProto.DataType.UINT64 -> { - val output = ULongNDArray(ULongTiledArray(array.shape), array.strides) + val output = (context?.getNDArray(DataType.ULONG, array.strides) ?: ULongNDArray(ULongTiledArray(array.shape), array.strides)) as ULongNDArray array.array.pointer().mapTo(output.array.pointer(), array.linearSize) { it.toULong() } output } @@ -311,66 +311,65 @@ class CastVer6(name: String, attributes: Map>, inputs: Li } } - - private suspend fun castUByte(array: UByteNDArray, to: TensorProto.DataType): NDArrayCore { + private suspend fun castUByte(array: UByteNDArray, to: TensorProto.DataType, context: ManualAllocatorContext? = null): NDArrayCore { return when (to) { in FLOAT_TENSOR_TYPES -> { - val output = FloatNDArray(FloatTiledArray(array.shape), array.strides) + val output = (context?.getNDArray(DataType.FLOAT, array.strides) ?: FloatNDArray(FloatTiledArray(array.shape), array.strides)) as FloatNDArray array.array.pointer().mapTo(output.array.pointer(), array.linearSize) { it.toFloat() } output } TensorProto.DataType.UINT8 -> array TensorProto.DataType.INT8 -> { - val output = ByteNDArray(ByteTiledArray(array.shape), array.strides) + val output = (context?.getNDArray(DataType.BYTE, array.strides) ?: ByteNDArray(ByteTiledArray(array.shape), array.strides)) as ByteNDArray array.array.pointer().mapTo(output.array.pointer(), array.linearSize) { it.toByte() } output } TensorProto.DataType.UINT16 -> { - val output = UShortNDArray(UShortTiledArray(array.shape), array.strides) + val output = (context?.getNDArray(DataType.USHORT, array.strides) ?: UShortNDArray(UShortTiledArray(array.shape), array.strides)) as UShortNDArray array.array.pointer().mapTo(output.array.pointer(), array.linearSize) { it.toUShort() } output } TensorProto.DataType.INT16 -> { - val output = ShortNDArray(ShortTiledArray(array.shape), array.strides) + val output = (context?.getNDArray(DataType.SHORT, array.strides) ?: ShortNDArray(ShortTiledArray(array.shape), array.strides)) as ShortNDArray array.array.pointer().mapTo(output.array.pointer(), array.linearSize) { it.toShort() } output } TensorProto.DataType.INT32 -> { - val output = IntNDArray(IntTiledArray(array.shape), array.strides) + val output = (context?.getNDArray(DataType.INT, array.strides) ?: IntNDArray(IntTiledArray(array.shape), array.strides)) as IntNDArray array.array.pointer().mapTo(output.array.pointer(), array.linearSize) { it.toInt() } output } TensorProto.DataType.INT64 -> { - val output = LongNDArray(LongTiledArray(array.shape), array.strides) + val output = (context?.getNDArray(DataType.LONG, array.strides) ?: LongNDArray(LongTiledArray(array.shape), array.strides)) as LongNDArray array.array.pointer().mapTo(output.array.pointer(), array.linearSize) { it.toLong() } output } TensorProto.DataType.BOOL -> { - val output = BooleanNDArray(BooleanTiledArray(array.shape), array.strides) + val output = (context?.getNDArray(DataType.BOOLEAN, array.strides) ?: BooleanNDArray(BooleanTiledArray(array.shape), array.strides)) as BooleanNDArray array.array.pointer().mapTo(output.array.pointer(), array.linearSize) { it != (0).toUByte() } output } TensorProto.DataType.DOUBLE -> { - val output = DoubleNDArray(DoubleTiledArray(array.shape), array.strides) + val output = (context?.getNDArray(DataType.DOUBLE, array.strides) ?: DoubleNDArray(DoubleTiledArray(array.shape), array.strides)) as DoubleNDArray array.array.pointer().mapTo(output.array.pointer(), array.linearSize) { it.toDouble() } output } TensorProto.DataType.UINT32 -> { - val output = UIntNDArray(UIntTiledArray(array.shape), array.strides) + val output = (context?.getNDArray(DataType.UINT, array.strides) ?: UIntNDArray(UIntTiledArray(array.shape), array.strides)) as UIntNDArray array.array.pointer().mapTo(output.array.pointer(), array.linearSize) { it.toUInt() } output } TensorProto.DataType.UINT64 -> { - val output = ULongNDArray(ULongTiledArray(array.shape), array.strides) + val output = (context?.getNDArray(DataType.ULONG, array.strides) ?: ULongNDArray(ULongTiledArray(array.shape), array.strides)) as ULongNDArray array.array.pointer().mapTo(output.array.pointer(), array.linearSize) { it.toULong() } output } @@ -379,66 +378,65 @@ class CastVer6(name: String, attributes: Map>, inputs: Li } } - - private suspend fun castUShort(array: UShortNDArray, to: TensorProto.DataType): NDArrayCore { + private suspend fun castUShort(array: UShortNDArray, to: TensorProto.DataType, context: ManualAllocatorContext? = null): NDArrayCore { return when (to) { in FLOAT_TENSOR_TYPES -> { - val output = FloatNDArray(FloatTiledArray(array.shape), array.strides) + val output = (context?.getNDArray(DataType.FLOAT, array.strides) ?: FloatNDArray(FloatTiledArray(array.shape), array.strides)) as FloatNDArray array.array.pointer().mapTo(output.array.pointer(), array.linearSize) { it.toFloat() } output } TensorProto.DataType.UINT8 -> { - val output = UByteNDArray(UByteTiledArray(array.shape), array.strides) + val output = (context?.getNDArray(DataType.UBYTE, array.strides) ?: UByteNDArray(UByteTiledArray(array.shape), array.strides)) as UByteNDArray array.array.pointer().mapTo(output.array.pointer(), array.linearSize) { it.toUByte() } output } TensorProto.DataType.INT8 -> { - val output = ByteNDArray(ByteTiledArray(array.shape), array.strides) + val output = (context?.getNDArray(DataType.BYTE, array.strides) ?: ByteNDArray(ByteTiledArray(array.shape), array.strides)) as ByteNDArray array.array.pointer().mapTo(output.array.pointer(), array.linearSize) { it.toByte() } output } TensorProto.DataType.UINT16 -> array TensorProto.DataType.INT16 -> { - val output = ShortNDArray(ShortTiledArray(array.shape), array.strides) + val output = (context?.getNDArray(DataType.SHORT, array.strides) ?: ShortNDArray(ShortTiledArray(array.shape), array.strides)) as ShortNDArray array.array.pointer().mapTo(output.array.pointer(), array.linearSize) { it.toShort() } output } TensorProto.DataType.INT32 -> { - val output = IntNDArray(IntTiledArray(array.shape), array.strides) + val output = (context?.getNDArray(DataType.INT, array.strides) ?: IntNDArray(IntTiledArray(array.shape), array.strides)) as IntNDArray array.array.pointer().mapTo(output.array.pointer(), array.linearSize) { it.toInt() } output } TensorProto.DataType.INT64 -> { - val output = LongNDArray(LongTiledArray(array.shape), array.strides) + val output = (context?.getNDArray(DataType.LONG, array.strides) ?: LongNDArray(LongTiledArray(array.shape), array.strides)) as LongNDArray array.array.pointer().mapTo(output.array.pointer(), array.linearSize) { it.toLong() } output } TensorProto.DataType.BOOL -> { - val output = BooleanNDArray(BooleanTiledArray(array.shape), array.strides) + val output = (context?.getNDArray(DataType.BOOLEAN, array.strides) ?: BooleanNDArray(BooleanTiledArray(array.shape), array.strides)) as BooleanNDArray array.array.pointer().mapTo(output.array.pointer(), array.linearSize) { it != (0).toUShort() } output } TensorProto.DataType.DOUBLE -> { - val output = DoubleNDArray(DoubleTiledArray(array.shape), array.strides) + val output = (context?.getNDArray(DataType.DOUBLE, array.strides) ?: DoubleNDArray(DoubleTiledArray(array.shape), array.strides)) as DoubleNDArray array.array.pointer().mapTo(output.array.pointer(), array.linearSize) { it.toDouble() } output } TensorProto.DataType.UINT32 -> { - val output = UIntNDArray(UIntTiledArray(array.shape), array.strides) + val output = (context?.getNDArray(DataType.UINT, array.strides) ?: UIntNDArray(UIntTiledArray(array.shape), array.strides)) as UIntNDArray array.array.pointer().mapTo(output.array.pointer(), array.linearSize) { it.toUInt() } output } TensorProto.DataType.UINT64 -> { - val output = ULongNDArray(ULongTiledArray(array.shape), array.strides) + val output = (context?.getNDArray(DataType.ULONG, array.strides) ?: ULongNDArray(ULongTiledArray(array.shape), array.strides)) as ULongNDArray array.array.pointer().mapTo(output.array.pointer(), array.linearSize) { it.toULong() } output } @@ -447,66 +445,65 @@ class CastVer6(name: String, attributes: Map>, inputs: Li } } - - private suspend fun castUInt(array: UIntNDArray, to: TensorProto.DataType): NDArrayCore { + private suspend fun castUInt(array: UIntNDArray, to: TensorProto.DataType, context: ManualAllocatorContext? = null): NDArrayCore { return when (to) { in FLOAT_TENSOR_TYPES -> { - val output = FloatNDArray(FloatTiledArray(array.shape), array.strides) + val output = (context?.getNDArray(DataType.FLOAT, array.strides) ?: FloatNDArray(FloatTiledArray(array.shape), array.strides)) as FloatNDArray array.array.pointer().mapTo(output.array.pointer(), array.linearSize) { it.toFloat() } output } TensorProto.DataType.UINT8 -> { - val output = UByteNDArray(UByteTiledArray(array.shape), array.strides) + val output = (context?.getNDArray(DataType.BYTE, array.strides) ?: UByteNDArray(UByteTiledArray(array.shape), array.strides)) as UByteNDArray array.array.pointer().mapTo(output.array.pointer(), array.linearSize) { it.toUByte() } output } TensorProto.DataType.INT8 -> { - val output = ByteNDArray(ByteTiledArray(array.shape), array.strides) + val output = (context?.getNDArray(DataType.BYTE, array.strides) ?: ByteNDArray(ByteTiledArray(array.shape), array.strides)) as ByteNDArray array.array.pointer().mapTo(output.array.pointer(), array.linearSize) { it.toByte() } output } TensorProto.DataType.UINT16 -> { - val output = UShortNDArray(UShortTiledArray(array.shape), array.strides) + val output = (context?.getNDArray(DataType.USHORT, array.strides) ?: UShortNDArray(UShortTiledArray(array.shape), array.strides)) as UShortNDArray array.array.pointer().mapTo(output.array.pointer(), array.linearSize) { it.toUShort() } output } TensorProto.DataType.INT16 -> { - val output = ShortNDArray(ShortTiledArray(array.shape), array.strides) + val output = (context?.getNDArray(DataType.SHORT, array.strides) ?: ShortNDArray(ShortTiledArray(array.shape), array.strides)) as ShortNDArray array.array.pointer().mapTo(output.array.pointer(), array.linearSize) { it.toShort() } output } TensorProto.DataType.INT32 -> { - val output = IntNDArray(IntTiledArray(array.shape), array.strides) + val output = (context?.getNDArray(DataType.INT, array.strides) ?: IntNDArray(IntTiledArray(array.shape), array.strides)) as IntNDArray array.array.pointer().mapTo(output.array.pointer(), array.linearSize) { it.toInt() } output } TensorProto.DataType.INT64 -> { - val output = LongNDArray(LongTiledArray(array.shape), array.strides) + val output = (context?.getNDArray(DataType.LONG, array.strides) ?: LongNDArray(LongTiledArray(array.shape), array.strides)) as LongNDArray array.array.pointer().mapTo(output.array.pointer(), array.linearSize) { it.toLong() } output } TensorProto.DataType.BOOL -> { - val output = BooleanNDArray(BooleanTiledArray(array.shape), array.strides) + val output = (context?.getNDArray(DataType.BOOLEAN, array.strides) ?: BooleanNDArray(BooleanTiledArray(array.shape), array.strides)) as BooleanNDArray array.array.pointer().mapTo(output.array.pointer(), array.linearSize) { it != (0).toUInt() } output } TensorProto.DataType.DOUBLE -> { - val output = DoubleNDArray(DoubleTiledArray(array.shape), array.strides) + val output = (context?.getNDArray(DataType.DOUBLE, array.strides) ?: DoubleNDArray(DoubleTiledArray(array.shape), array.strides)) as DoubleNDArray array.array.pointer().mapTo(output.array.pointer(), array.linearSize) { it.toDouble() } output } TensorProto.DataType.UINT32 -> array TensorProto.DataType.UINT64 -> { - val output = ULongNDArray(ULongTiledArray(array.shape), array.strides) + val output = (context?.getNDArray(DataType.ULONG, array.strides) ?: ULongNDArray(ULongTiledArray(array.shape), array.strides)) as ULongNDArray array.array.pointer().mapTo(output.array.pointer(), array.linearSize) { it.toULong() } output } @@ -515,65 +512,64 @@ class CastVer6(name: String, attributes: Map>, inputs: Li } } - - private suspend fun castULong(array: ULongNDArray, to: TensorProto.DataType): NDArrayCore { + private suspend fun castULong(array: ULongNDArray, to: TensorProto.DataType, context: ManualAllocatorContext? = null): NDArrayCore { return when (to) { in FLOAT_TENSOR_TYPES -> { - val output = FloatNDArray(FloatTiledArray(array.shape), array.strides) + val output = (context?.getNDArray(DataType.FLOAT, array.strides) ?: FloatNDArray(FloatTiledArray(array.shape), array.strides)) as FloatNDArray array.array.pointer().mapTo(output.array.pointer(), array.linearSize) { it.toFloat() } output } TensorProto.DataType.UINT8 -> { - val output = UByteNDArray(UByteTiledArray(array.shape), array.strides) + val output = (context?.getNDArray(DataType.BYTE, array.strides) ?: UByteNDArray(UByteTiledArray(array.shape), array.strides)) as UByteNDArray array.array.pointer().mapTo(output.array.pointer(), array.linearSize) { it.toUByte() } output } TensorProto.DataType.INT8 -> { - val output = ByteNDArray(ByteTiledArray(array.shape), array.strides) + val output = (context?.getNDArray(DataType.BYTE, array.strides) ?: ByteNDArray(ByteTiledArray(array.shape), array.strides)) as ByteNDArray array.array.pointer().mapTo(output.array.pointer(), array.linearSize) { it.toByte() } output } TensorProto.DataType.UINT16 -> { - val output = UShortNDArray(UShortTiledArray(array.shape), array.strides) + val output = (context?.getNDArray(DataType.USHORT, array.strides) ?: UShortNDArray(UShortTiledArray(array.shape), array.strides)) as UShortNDArray array.array.pointer().mapTo(output.array.pointer(), array.linearSize) { it.toUShort() } output } TensorProto.DataType.INT16 -> { - val output = ShortNDArray(ShortTiledArray(array.shape), array.strides) + val output = (context?.getNDArray(DataType.SHORT, array.strides) ?: ShortNDArray(ShortTiledArray(array.shape), array.strides)) as ShortNDArray array.array.pointer().mapTo(output.array.pointer(), array.linearSize) { it.toShort() } output } TensorProto.DataType.INT32 -> { - val output = IntNDArray(IntTiledArray(array.shape), array.strides) + val output = (context?.getNDArray(DataType.INT, array.strides) ?: IntNDArray(IntTiledArray(array.shape), array.strides)) as IntNDArray array.array.pointer().mapTo(output.array.pointer(), array.linearSize) { it.toInt() } output } TensorProto.DataType.INT64 -> { - val output = LongNDArray(LongTiledArray(array.shape), array.strides) + val output = (context?.getNDArray(DataType.LONG, array.strides) ?: LongNDArray(LongTiledArray(array.shape), array.strides)) as LongNDArray array.array.pointer().mapTo(output.array.pointer(), array.linearSize) { it.toLong() } output } TensorProto.DataType.BOOL -> { - val output = BooleanNDArray(BooleanTiledArray(array.shape), array.strides) + val output = (context?.getNDArray(DataType.BOOLEAN, array.strides) ?: BooleanNDArray(BooleanTiledArray(array.shape), array.strides)) as BooleanNDArray array.array.pointer().mapTo(output.array.pointer(), array.linearSize) { it != (0).toULong() } output } TensorProto.DataType.DOUBLE -> { - val output = DoubleNDArray(DoubleTiledArray(array.shape), array.strides) + val output = (context?.getNDArray(DataType.DOUBLE, array.strides) ?: DoubleNDArray(DoubleTiledArray(array.shape), array.strides)) as DoubleNDArray array.array.pointer().mapTo(output.array.pointer(), array.linearSize) { it.toDouble() } output } TensorProto.DataType.UINT32 -> { - val output = UIntNDArray(UIntTiledArray(array.shape), array.strides) + val output = (context?.getNDArray(DataType.UINT, array.strides) ?: UIntNDArray(UIntTiledArray(array.shape), array.strides)) as UIntNDArray array.array.pointer().mapTo(output.array.pointer(), array.linearSize) { it.toUInt() } output } @@ -583,66 +579,65 @@ class CastVer6(name: String, attributes: Map>, inputs: Li } } - - private suspend fun castFloat(array: FloatNDArray, to: TensorProto.DataType): NDArrayCore { + private suspend fun castFloat(array: FloatNDArray, to: TensorProto.DataType, context: ManualAllocatorContext? = null): NDArrayCore { return when (to) { in FLOAT_TENSOR_TYPES -> array TensorProto.DataType.UINT8 -> { - val output = UByteNDArray(UByteTiledArray(array.shape), array.strides) + val output = (context?.getNDArray(DataType.BYTE, array.strides) ?: UByteNDArray(UByteTiledArray(array.shape), array.strides)) as UByteNDArray array.array.pointer().mapTo(output.array.pointer(), array.linearSize) { it.toLong().toUByte() } output } TensorProto.DataType.INT8 -> { - val output = ByteNDArray(ByteTiledArray(array.shape), array.strides) + val output = (context?.getNDArray(DataType.BYTE, array.strides) ?: ByteNDArray(ByteTiledArray(array.shape), array.strides)) as ByteNDArray array.array.pointer().mapTo(output.array.pointer(), array.linearSize) { it.toInt().toByte() } output } TensorProto.DataType.UINT16 -> { - val output = UShortNDArray(UShortTiledArray(array.shape), array.strides) + val output = (context?.getNDArray(DataType.USHORT, array.strides) ?: UShortNDArray(UShortTiledArray(array.shape), array.strides)) as UShortNDArray array.array.pointer().mapTo(output.array.pointer(), array.linearSize) { it.toLong().toUShort() } output } TensorProto.DataType.INT16 -> { - val output = ShortNDArray(ShortTiledArray(array.shape), array.strides) + val output = (context?.getNDArray(DataType.SHORT, array.strides) ?: ShortNDArray(ShortTiledArray(array.shape), array.strides)) as ShortNDArray array.array.pointer().mapTo(output.array.pointer(), array.linearSize) { it.toInt().toShort() } output } TensorProto.DataType.INT32 -> { - val output = IntNDArray(IntTiledArray(array.shape), array.strides) + val output = (context?.getNDArray(DataType.INT, array.strides) ?: IntNDArray(IntTiledArray(array.shape), array.strides)) as IntNDArray array.array.pointer().mapTo(output.array.pointer(), array.linearSize) { it.toInt() } output } TensorProto.DataType.INT64 -> { - val output = LongNDArray(LongTiledArray(array.shape), array.strides) + val output = (context?.getNDArray(DataType.LONG, array.strides) ?: LongNDArray(LongTiledArray(array.shape), array.strides)) as LongNDArray array.array.pointer().mapTo(output.array.pointer(), array.linearSize) { it.toLong() } output } TensorProto.DataType.BOOL -> { - val output = BooleanNDArray(BooleanTiledArray(array.shape), array.strides) + val output = (context?.getNDArray(DataType.BOOLEAN, array.strides) ?: BooleanNDArray(BooleanTiledArray(array.shape), array.strides)) as BooleanNDArray array.array.pointer().mapTo(output.array.pointer(), array.linearSize) { it != 0f } output } TensorProto.DataType.DOUBLE -> { - val output = DoubleNDArray(DoubleTiledArray(array.shape), array.strides) + val output = (context?.getNDArray(DataType.DOUBLE, array.strides) ?: DoubleNDArray(DoubleTiledArray(array.shape), array.strides)) as DoubleNDArray array.array.pointer().mapTo(output.array.pointer(), array.linearSize) { it.toDouble() } output } TensorProto.DataType.UINT32 -> { - val output = UIntNDArray(UIntTiledArray(array.shape), array.strides) + val output = (context?.getNDArray(DataType.UINT, array.strides) ?: UIntNDArray(UIntTiledArray(array.shape), array.strides)) as UIntNDArray array.array.pointer().mapTo(output.array.pointer(), array.linearSize) { it.toUInt() } output } TensorProto.DataType.UINT64 -> { - val output = ULongNDArray(ULongTiledArray(array.shape), array.strides) + val output = (context?.getNDArray(DataType.ULONG, array.strides) ?: ULongNDArray(ULongTiledArray(array.shape), array.strides)) as ULongNDArray array.array.pointer().mapTo(output.array.pointer(), array.linearSize) { it.toULong() } output } @@ -651,66 +646,65 @@ class CastVer6(name: String, attributes: Map>, inputs: Li } } - - private suspend fun castDouble(array: DoubleNDArray, to: TensorProto.DataType): NDArrayCore { + private suspend fun castDouble(array: DoubleNDArray, to: TensorProto.DataType, context: ManualAllocatorContext? = null): NDArrayCore { return when (to) { in FLOAT_TENSOR_TYPES -> { - val output = FloatNDArray(FloatTiledArray(array.shape), array.strides) + val output = (context?.getNDArray(DataType.FLOAT, array.strides) ?: FloatNDArray(FloatTiledArray(array.shape), array.strides)) as FloatNDArray array.array.pointer().mapTo(output.array.pointer(), array.linearSize) { it.toFloat() } output } TensorProto.DataType.UINT8 -> { - val output = UByteNDArray(UByteTiledArray(array.shape), array.strides) + val output = (context?.getNDArray(DataType.BYTE, array.strides) ?: UByteNDArray(UByteTiledArray(array.shape), array.strides)) as UByteNDArray array.array.pointer().mapTo(output.array.pointer(), array.linearSize) { it.toLong().toUByte() } output } TensorProto.DataType.INT8 -> { - val output = ByteNDArray(ByteTiledArray(array.shape), array.strides) + val output = (context?.getNDArray(DataType.BYTE, array.strides) ?: ByteNDArray(ByteTiledArray(array.shape), array.strides)) as ByteNDArray array.array.pointer().mapTo(output.array.pointer(), array.linearSize) { it.toInt().toByte() } output } TensorProto.DataType.UINT16 -> { - val output = UShortNDArray(UShortTiledArray(array.shape), array.strides) + val output = (context?.getNDArray(DataType.USHORT, array.strides) ?: UShortNDArray(UShortTiledArray(array.shape), array.strides)) as UShortNDArray array.array.pointer().mapTo(output.array.pointer(), array.linearSize) { it.toLong().toUShort() } output } TensorProto.DataType.INT16 -> { - val output = ShortNDArray(ShortTiledArray(array.shape), array.strides) + val output = (context?.getNDArray(DataType.SHORT, array.strides) ?: ShortNDArray(ShortTiledArray(array.shape), array.strides)) as ShortNDArray array.array.pointer().mapTo(output.array.pointer(), array.linearSize) { it.toInt().toShort() } output } TensorProto.DataType.INT32 -> { - val output = IntNDArray(IntTiledArray(array.shape), array.strides) + val output = (context?.getNDArray(DataType.INT, array.strides) ?: IntNDArray(IntTiledArray(array.shape), array.strides)) as IntNDArray array.array.pointer().mapTo(output.array.pointer(), array.linearSize) { it.toInt() } output } TensorProto.DataType.INT64 -> { - val output = LongNDArray(LongTiledArray(array.shape), array.strides) + val output = (context?.getNDArray(DataType.LONG, array.strides) ?: LongNDArray(LongTiledArray(array.shape), array.strides)) as LongNDArray array.array.pointer().mapTo(output.array.pointer(), array.linearSize) { it.toLong() } output } TensorProto.DataType.BOOL -> { - val output = BooleanNDArray(BooleanTiledArray(array.shape), array.strides) + val output = (context?.getNDArray(DataType.BOOLEAN, array.strides) ?: BooleanNDArray(BooleanTiledArray(array.shape), array.strides)) as BooleanNDArray array.array.pointer().mapTo(output.array.pointer(), array.linearSize) { it != 0.0 } output } TensorProto.DataType.DOUBLE -> array TensorProto.DataType.UINT32 -> { - val output = UIntNDArray(UIntTiledArray(array.shape), array.strides) + val output = (context?.getNDArray(DataType.UINT, array.strides) ?: UIntNDArray(UIntTiledArray(array.shape), array.strides)) as UIntNDArray array.array.pointer().mapTo(output.array.pointer(), array.linearSize) { it.toUInt() } output } TensorProto.DataType.UINT64 -> { - val output = ULongNDArray(ULongTiledArray(array.shape), array.strides) + val output = (context?.getNDArray(DataType.ULONG, array.strides) ?: ULongNDArray(ULongTiledArray(array.shape), array.strides)) as ULongNDArray array.array.pointer().mapTo(output.array.pointer(), array.linearSize) { it.toULong() } output } @@ -719,66 +713,65 @@ class CastVer6(name: String, attributes: Map>, inputs: Li } } - - private suspend fun castBoolean(array: BooleanNDArray, to: TensorProto.DataType): NDArrayCore { + private suspend fun castBoolean(array: BooleanNDArray, to: TensorProto.DataType, context: ManualAllocatorContext? = null): NDArrayCore { return when (to) { in FLOAT_TENSOR_TYPES -> { - val output = FloatNDArray(FloatTiledArray(array.shape), array.strides) + val output = (context?.getNDArray(DataType.FLOAT, array.strides) ?: FloatNDArray(FloatTiledArray(array.shape), array.strides)) as FloatNDArray array.array.pointer().mapTo(output.array.pointer(), array.linearSize) { if (it) 1f else 0f } output } TensorProto.DataType.UINT8 -> { - val output = UByteNDArray(UByteTiledArray(array.shape), array.strides) + val output = (context?.getNDArray(DataType.BYTE, array.strides) ?: UByteNDArray(UByteTiledArray(array.shape), array.strides)) as UByteNDArray array.array.pointer().mapTo(output.array.pointer(), array.linearSize) { if (it) (1).toUByte() else (0).toUByte() } output } TensorProto.DataType.INT8 -> { - val output = ByteNDArray(ByteTiledArray(array.shape), array.strides) + val output = (context?.getNDArray(DataType.BYTE, array.strides) ?: ByteNDArray(ByteTiledArray(array.shape), array.strides)) as ByteNDArray array.array.pointer().mapTo(output.array.pointer(), array.linearSize) { if (it) (1).toByte() else (0).toByte() } output } TensorProto.DataType.UINT16 -> { - val output = UShortNDArray(UShortTiledArray(array.shape), array.strides) + val output = (context?.getNDArray(DataType.USHORT, array.strides) ?: UShortNDArray(UShortTiledArray(array.shape), array.strides)) as UShortNDArray array.array.pointer().mapTo(output.array.pointer(), array.linearSize) { if (it) (1).toUShort() else (0).toUShort() } output } TensorProto.DataType.INT16 -> { - val output = ShortNDArray(ShortTiledArray(array.shape), array.strides) + val output = (context?.getNDArray(DataType.SHORT, array.strides) ?: ShortNDArray(ShortTiledArray(array.shape), array.strides)) as ShortNDArray array.array.pointer().mapTo(output.array.pointer(), array.linearSize) { if (it) (1).toShort() else (0).toShort() } output } TensorProto.DataType.INT32 -> { - val output = IntNDArray(IntTiledArray(array.shape), array.strides) + val output = (context?.getNDArray(DataType.INT, array.strides) ?: IntNDArray(IntTiledArray(array.shape), array.strides)) as IntNDArray array.array.pointer().mapTo(output.array.pointer(), array.linearSize) { if (it) 1 else 0 } output } TensorProto.DataType.INT64 -> { - val output = LongNDArray(LongTiledArray(array.shape), array.strides) + val output = (context?.getNDArray(DataType.LONG, array.strides) ?: LongNDArray(LongTiledArray(array.shape), array.strides)) as LongNDArray array.array.pointer().mapTo(output.array.pointer(), array.linearSize) { if (it) 1L else 0L } output } TensorProto.DataType.BOOL -> array TensorProto.DataType.DOUBLE -> { - val output = DoubleNDArray(DoubleTiledArray(array.shape), array.strides) + val output = (context?.getNDArray(DataType.DOUBLE, array.strides) ?: DoubleNDArray(DoubleTiledArray(array.shape), array.strides)) as DoubleNDArray array.array.pointer().mapTo(output.array.pointer(), array.linearSize) { if (it) 1.0 else 0.0 } output } TensorProto.DataType.UINT32 -> { - val output = UIntNDArray(UIntTiledArray(array.shape), array.strides) + val output = (context?.getNDArray(DataType.UINT, array.strides) ?: UIntNDArray(UIntTiledArray(array.shape), array.strides)) as UIntNDArray array.array.pointer().mapTo(output.array.pointer(), array.linearSize) { if (it) (1).toUInt() else (0).toUInt() } output } TensorProto.DataType.UINT64 -> { - val output = ULongNDArray(ULongTiledArray(array.shape), array.strides) + val output = (context?.getNDArray(DataType.ULONG, array.strides) ?: ULongNDArray(ULongTiledArray(array.shape), array.strides)) as ULongNDArray array.array.pointer().mapTo(output.array.pointer(), array.linearSize) { if (it) (1).toULong() else (0).toULong() } output } @@ -787,19 +780,19 @@ class CastVer6(name: String, attributes: Map>, inputs: Li } } - internal suspend fun castTo(input: NDArrayCore, to: TensorProto.DataType): NDArrayCore { + internal suspend fun castTo(input: NDArrayCore, to: TensorProto.DataType, context: ManualAllocatorContext? = null): NDArrayCore { return when (input.type) { - DataType.BYTE -> castByte(input as ByteNDArray, to) - DataType.SHORT -> castShort(input as ShortNDArray, to) - DataType.INT -> castInt(input as IntNDArray, to) - DataType.LONG -> castLong(input as LongNDArray, to) - DataType.UBYTE -> castUByte(input as UByteNDArray, to) - DataType.USHORT -> castUShort(input as UShortNDArray, to) - DataType.UINT -> castUInt(input as UIntNDArray, to) - DataType.ULONG -> castULong(input as ULongNDArray, to) - DataType.FLOAT -> castFloat(input as FloatNDArray, to) - DataType.DOUBLE -> castDouble(input as DoubleNDArray, to) - DataType.BOOLEAN -> castBoolean(input as BooleanNDArray, to) + DataType.BYTE -> castByte(input as ByteNDArray, to, context) + DataType.SHORT -> castShort(input as ShortNDArray, to, context) + DataType.INT -> castInt(input as IntNDArray, to, context) + DataType.LONG -> castLong(input as LongNDArray, to, context) + DataType.UBYTE -> castUByte(input as UByteNDArray, to, context) + DataType.USHORT -> castUShort(input as UShortNDArray, to, context) + DataType.UINT -> castUInt(input as UIntNDArray, to, context) + DataType.ULONG -> castULong(input as ULongNDArray, to, context) + DataType.FLOAT -> castFloat(input as FloatNDArray, to, context) + DataType.DOUBLE -> castDouble(input as DoubleNDArray, to, context) + DataType.BOOLEAN -> castBoolean(input as BooleanNDArray, to, context) else -> throw IllegalStateException("Unsupported type ${input.type}") } } @@ -808,11 +801,13 @@ class CastVer6(name: String, attributes: Map>, inputs: Li private val toType: Int by attribute("to") { it: Number -> it.toInt() } override suspend fun > apply(contexts: Contexts, inputs: List): List { + val manualContext = coroutineContext[ManualAllocatorContext.Key] + val tensor = inputs.first()!! val to = TensorProto.DataType.fromValue(toType)!! - val casted = castTo(tensor.data, to) + val casted = castTo(tensor.data, to, manualContext) - return listOf(casted.asTensor("output")) + return listOf(casted.asTensor("output", manualContext)) } } diff --git a/inference/inference-core/src/jvmTest/kotlin/io/kinference/models/bert/BERTTest.kt b/inference/inference-core/src/jvmTest/kotlin/io/kinference/models/bert/BERTTest.kt index 30ff16b83..bed95862c 100644 --- a/inference/inference-core/src/jvmTest/kotlin/io/kinference/models/bert/BERTTest.kt +++ b/inference/inference-core/src/jvmTest/kotlin/io/kinference/models/bert/BERTTest.kt @@ -15,6 +15,6 @@ class BERTTest { @Test fun benchmark_test_vanilla_bert_performance() = TestRunner.runTest { - KIPerformanceRunner.runFromS3("bert:standard:en:v1", count = 3) + KIPerformanceRunner.runFromS3("bert:standard:en:v1", count = 20) } } diff --git a/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/extensions/PrimitiveExtensions.kt b/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/extensions/PrimitiveExtensions.kt index 21442ecda..570ca8520 100644 --- a/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/extensions/PrimitiveExtensions.kt +++ b/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/extensions/PrimitiveExtensions.kt @@ -8,6 +8,7 @@ import io.kinference.ndarray.arrays.pointers.accept import io.kinference.ndarray.arrays.pointers.acceptWithRecursive import io.kinference.ndarray.stubs.* import io.kinference.ndarray.arrays.tiled.* +import io.kinference.ndarray.extensions.constants.PrimitiveConstants import io.kinference.primitives.annotations.* import io.kinference.primitives.types.* import io.kinference.utils.launchWithLimitOrDefault @@ -127,21 +128,21 @@ internal suspend fun PrimitiveNDArray.dotTransposedWithAlpha(alpha: Double, othe other as PrimitiveNDArray; destination as MutablePrimitiveNDArray val alpha = alpha.toPrimitive() - val dBlocksInRow = destination.blocksInRow +// val dBlocksInRow = destination.blocksInRow val lrBlocksInRow = this.blocksInRow val n = this.shape[0] val t = this.shape[1] val m = other.shape[0] - val dBlockSize = destination.array.blockSize +// val dBlockSize = destination.array.blockSize val lrBlockSize = this.array.blockSize - val destBlocks = destination.array.blocks +// val destBlocks = destination.array.blocks val leftBlocks = this.array.blocks val rightBlocks = other.array.blocks val rowFlop = t * m - val zero = (0).toPrimitive() +// val zero = (0).toPrimitive() /* TODO: (dmitriyb) this is temporary commented. On GEC performance test we have large inputs that cause out of memory exceptions @@ -161,34 +162,40 @@ internal suspend fun PrimitiveNDArray.dotTransposedWithAlpha(alpha: Double, othe // TODO: (cupertank) Remove constants // TODO: (dmitriyb) Implement concurrent array retrieve with a separate structure from ArraysDispatcher parallelizeByRows(rowFlop, n, 262144) { nStart: Int, nEnd: Int, _ -> - val mSums = Array(m) { PrimitiveArray(lrBlockSize) } +// val mSums = Array(m) { PrimitiveArray(lrBlockSize) } + val tempSum = PrimitiveArray(lrBlockSize) + val destPointer = destination.array.pointer() for (i in nStart until nEnd) { val leftBlockOffset = i * lrBlocksInRow val rightBlockIter = rightBlocks.iterator() - val destBlockOffset = i * dBlocksInRow + destPointer.linearIndex = i * m +// val destBlockOffset = i * dBlocksInRow for (k in 0 until m) { - val tempArray = mSums[k] +// val tempArray = mSums[k] for (lrBlock in 0 until lrBlocksInRow) { val leftBlock = leftBlocks[leftBlockOffset + lrBlock] val rightBlock = rightBlockIter.next() - for (j in tempArray.indices) { - tempArray[j] += leftBlock[j] * rightBlock[j] + for (j in tempSum.indices) { + tempSum[j] += leftBlock[j] * rightBlock[j] } } - } - val mSumsIter = mSums.iterator() - for (destBlockNum in 0 until dBlocksInRow) { - val destBlock = destBlocks[destBlockOffset + destBlockNum] - for (j in destBlock.indices) { - val sumBlock = mSumsIter.next() - destBlock[j] = sumBlock.sum() * alpha - sumBlock.fill(zero) - } + destPointer.setAndIncrement(tempSum.sum() * alpha) + tempSum.fill(PrimitiveConstants.ZERO) } + +// val mSumsIter = mSums.iterator() +// for (destBlockNum in 0 until dBlocksInRow) { +// val destBlock = destBlocks[destBlockOffset + destBlockNum] +// for (j in destBlock.indices) { +// val sumBlock = mSumsIter.next() +// destBlock[j] = sumBlock.sum() * alpha +// sumBlock.fill(zero) +// } +// } } } diff --git a/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/extensions/gelu/BiasGelu.kt b/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/extensions/gelu/BiasGelu.kt index 0636cb824..8dc2a6705 100644 --- a/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/extensions/gelu/BiasGelu.kt +++ b/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/extensions/gelu/BiasGelu.kt @@ -16,3 +16,17 @@ suspend fun biasGelu(input: NumberNDArrayCore, bias: NumberNDArrayCore): Mutable else -> error("BiasGelu operation supported only for FLOAT and DOUBLE tensors, actual types is ${input.type}") } } + +suspend fun biasGelu(input: NumberNDArrayCore, bias: NumberNDArrayCore, dest: MutableNumberNDArrayCore): MutableNumberNDArrayCore { + require(input.type == bias.type) + { "Input and Bias types should be equal, actual input type is ${input.type}, actual bias type is ${bias.type}" } + + require(input.type == DataType.FLOAT || input.type == DataType.DOUBLE) + { "BiasGelu operation supported only for FLOAT and DOUBLE tensors, actual types is ${input.type}" } + + return when(input.type) { + DataType.FLOAT -> computeGeluFloat(input as FloatNDArray, bias as FloatNDArray, dest as MutableFloatNDArray) + DataType.DOUBLE -> computeGeluDouble(input as DoubleNDArray, bias as DoubleNDArray, dest as MutableDoubleNDArray) + else -> error("BiasGelu operation supported only for FLOAT and DOUBLE tensors, actual types is ${input.type}") + } +} diff --git a/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/extensions/gelu/BiasGeluPrimitive.kt b/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/extensions/gelu/BiasGeluPrimitive.kt index bc14a927a..4c5682899 100644 --- a/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/extensions/gelu/BiasGeluPrimitive.kt +++ b/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/extensions/gelu/BiasGeluPrimitive.kt @@ -14,8 +14,8 @@ import io.kinference.primitives.types.* import kotlin.math.* @GenerateNameFromPrimitives -internal suspend fun computeGeluPrimitive(input: PrimitiveNDArray, bias: PrimitiveNDArray): MutablePrimitiveNDArray { - val output = MutablePrimitiveNDArray(input.strides) +internal suspend fun computeGeluPrimitive(input: PrimitiveNDArray, bias: PrimitiveNDArray, output: MutablePrimitiveNDArray): MutablePrimitiveNDArray { +// val output = MutablePrimitiveNDArray(input.strides) val inputBlocks = input.array.blocks val biasBlocks = bias.array.blocks @@ -79,3 +79,8 @@ internal suspend fun computeGeluPrimitive(input: PrimitiveNDArray, bias: Primiti return output } + +@GenerateNameFromPrimitives +internal suspend fun computeGeluPrimitive(input: PrimitiveNDArray, bias: PrimitiveNDArray): MutablePrimitiveNDArray { + return computeGeluPrimitive(input, bias, MutablePrimitiveNDArray(input.strides)) +} From e900ca89d3519dd9066d2d927abdf84bd228f693 Mon Sep 17 00:00:00 2001 From: dmitriyb Date: Thu, 22 Aug 2024 12:24:46 +0200 Subject: [PATCH 06/19] Refactor NDArray storage and retrieval methods. Replaced static method calls with instance method calls for NDArray storage wrappers. Introduced internal storage fields in `ManualAllocatorContext` to streamline access and management of different data types. --- .../arrays/memory/ManualAllocatorContext.kt | 101 ++++++++++++++---- .../memory/PrimitiveArrayStorageWrapper.kt | 34 +++++- 2 files changed, 112 insertions(+), 23 deletions(-) diff --git a/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/ManualAllocatorContext.kt b/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/ManualAllocatorContext.kt index 788541e6f..27f261fef 100644 --- a/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/ManualAllocatorContext.kt +++ b/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/ManualAllocatorContext.kt @@ -6,25 +6,40 @@ import kotlin.coroutines.CoroutineContext data class ManualAllocatorContext internal constructor(private val storage: SingleArrayStorage) : CoroutineContext.Element { + internal val byteStorage = ByteArrayStorageWrapper() + internal val shortStorage = ShortArrayStorageWrapper() + internal val intStorage = IntArrayStorageWrapper() + internal val longStorage = LongArrayStorageWrapper() + + internal val ubyteStorage = UByteArrayStorageWrapper() + internal val ushortStorage = UShortArrayStorageWrapper() + internal val uintStorage = UIntArrayStorageWrapper() + internal val ulongStorage = ULongArrayStorageWrapper() + + internal val floatStorage = FloatArrayStorageWrapper() + internal val doubleStorage = DoubleArrayStorageWrapper() + + internal val booleanStorage = BooleanArrayStorageWrapper() + companion object Key : CoroutineContext.Key override val key: CoroutineContext.Key<*> get() = Key fun getNDArray(dataType: DataType, strides: Strides, fillZeros: Boolean = false): MutableNDArrayCore { return when(dataType) { - DataType.BYTE -> ByteArrayStorageWrapper.getNDArray(strides, storage, fillZeros) - DataType.SHORT -> ShortArrayStorageWrapper.getNDArray(strides, storage, fillZeros) - DataType.INT -> IntArrayStorageWrapper.getNDArray(strides, storage, fillZeros) - DataType.LONG -> LongArrayStorageWrapper.getNDArray(strides, storage, fillZeros) + DataType.BYTE -> byteStorage.getNDArray(strides, fillZeros) + DataType.SHORT -> shortStorage.getNDArray(strides, fillZeros) + DataType.INT -> intStorage.getNDArray(strides, fillZeros) + DataType.LONG -> longStorage.getNDArray(strides, fillZeros) - DataType.UBYTE -> UByteArrayStorageWrapper.getNDArray(strides, storage, fillZeros) - DataType.USHORT -> UShortArrayStorageWrapper.getNDArray(strides, storage, fillZeros) - DataType.UINT -> UIntArrayStorageWrapper.getNDArray(strides, storage, fillZeros) - DataType.ULONG -> ULongArrayStorageWrapper.getNDArray(strides, storage, fillZeros) + DataType.UBYTE -> ubyteStorage.getNDArray(strides, fillZeros) + DataType.USHORT -> ushortStorage.getNDArray(strides, fillZeros) + DataType.UINT -> uintStorage.getNDArray(strides, fillZeros) + DataType.ULONG -> ulongStorage.getNDArray(strides, fillZeros) - DataType.FLOAT -> FloatArrayStorageWrapper.getNDArray(strides, storage, fillZeros) - DataType.DOUBLE -> DoubleArrayStorageWrapper.getNDArray(strides, storage, fillZeros) + DataType.FLOAT -> floatStorage.getNDArray(strides, fillZeros) + DataType.DOUBLE -> doubleStorage.getNDArray(strides, fillZeros) - DataType.BOOLEAN -> BooleanArrayStorageWrapper.getNDArray(strides, storage, fillZeros) + DataType.BOOLEAN -> booleanStorage.getNDArray(strides, fillZeros) else -> error("Unsupported array type") } @@ -32,22 +47,64 @@ data class ManualAllocatorContext internal constructor(private val storage: Sing fun returnNDArray(ndArray: NDArrayCore) { when(ndArray.type) { - DataType.BYTE -> ByteArrayStorageWrapper.returnNDArray(storage, ndArray as ByteNDArray) - DataType.SHORT -> ShortArrayStorageWrapper.returnNDArray(storage, ndArray as ShortNDArray) - DataType.INT -> IntArrayStorageWrapper.returnNDArray(storage, ndArray as IntNDArray) - DataType.LONG -> LongArrayStorageWrapper.returnNDArray(storage, ndArray as LongNDArray) + DataType.BYTE -> byteStorage.returnNDArray(ndArray as ByteNDArray) + DataType.SHORT -> shortStorage.returnNDArray(ndArray as ShortNDArray) + DataType.INT -> intStorage.returnNDArray(ndArray as IntNDArray) + DataType.LONG -> longStorage.returnNDArray(ndArray as LongNDArray) - DataType.UBYTE -> UByteArrayStorageWrapper.returnNDArray(storage, ndArray as UByteNDArray) - DataType.USHORT -> UShortArrayStorageWrapper.returnNDArray(storage, ndArray as UShortNDArray) - DataType.UINT -> UIntArrayStorageWrapper.returnNDArray(storage, ndArray as UIntNDArray) - DataType.ULONG -> ULongArrayStorageWrapper.returnNDArray(storage, ndArray as ULongNDArray) + DataType.UBYTE -> ubyteStorage.returnNDArray(ndArray as UByteNDArray) + DataType.USHORT -> ushortStorage.returnNDArray(ndArray as UShortNDArray) + DataType.UINT -> uintStorage.returnNDArray(ndArray as UIntNDArray) + DataType.ULONG -> ulongStorage.returnNDArray(ndArray as ULongNDArray) - DataType.FLOAT -> FloatArrayStorageWrapper.returnNDArray(storage, ndArray as FloatNDArray) - DataType.DOUBLE -> DoubleArrayStorageWrapper.returnNDArray(storage, ndArray as DoubleNDArray) + DataType.FLOAT -> floatStorage.returnNDArray(ndArray as FloatNDArray) + DataType.DOUBLE -> doubleStorage.returnNDArray(ndArray as DoubleNDArray) - DataType.BOOLEAN -> BooleanArrayStorageWrapper.returnNDArray(storage, ndArray as BooleanNDArray) + DataType.BOOLEAN -> booleanStorage.returnNDArray(ndArray as BooleanNDArray) else -> error("Unsupported array type") } } + +// fun getNDArray(dataType: DataType, strides: Strides, fillZeros: Boolean = false): MutableNDArrayCore { +// return when(dataType) { +// DataType.BYTE -> ByteArrayStorageWrapper.getNDArray(strides, storage, fillZeros) +// DataType.SHORT -> ShortArrayStorageWrapper.getNDArray(strides, storage, fillZeros) +// DataType.INT -> IntArrayStorageWrapper.getNDArray(strides, storage, fillZeros) +// DataType.LONG -> LongArrayStorageWrapper.getNDArray(strides, storage, fillZeros) +// +// DataType.UBYTE -> UByteArrayStorageWrapper.getNDArray(strides, storage, fillZeros) +// DataType.USHORT -> UShortArrayStorageWrapper.getNDArray(strides, storage, fillZeros) +// DataType.UINT -> UIntArrayStorageWrapper.getNDArray(strides, storage, fillZeros) +// DataType.ULONG -> ULongArrayStorageWrapper.getNDArray(strides, storage, fillZeros) +// +// DataType.FLOAT -> FloatArrayStorageWrapper.getNDArray(strides, storage, fillZeros) +// DataType.DOUBLE -> DoubleArrayStorageWrapper.getNDArray(strides, storage, fillZeros) +// +// DataType.BOOLEAN -> BooleanArrayStorageWrapper.getNDArray(strides, storage, fillZeros) +// +// else -> error("Unsupported array type") +// } +// } +// +// fun returnNDArray(ndArray: NDArrayCore) { +// when(ndArray.type) { +// DataType.BYTE -> ByteArrayStorageWrapper.returnNDArray(storage, ndArray as ByteNDArray) +// DataType.SHORT -> ShortArrayStorageWrapper.returnNDArray(storage, ndArray as ShortNDArray) +// DataType.INT -> IntArrayStorageWrapper.returnNDArray(storage, ndArray as IntNDArray) +// DataType.LONG -> LongArrayStorageWrapper.returnNDArray(storage, ndArray as LongNDArray) +// +// DataType.UBYTE -> UByteArrayStorageWrapper.returnNDArray(storage, ndArray as UByteNDArray) +// DataType.USHORT -> UShortArrayStorageWrapper.returnNDArray(storage, ndArray as UShortNDArray) +// DataType.UINT -> UIntArrayStorageWrapper.returnNDArray(storage, ndArray as UIntNDArray) +// DataType.ULONG -> ULongArrayStorageWrapper.returnNDArray(storage, ndArray as ULongNDArray) +// +// DataType.FLOAT -> FloatArrayStorageWrapper.returnNDArray(storage, ndArray as FloatNDArray) +// DataType.DOUBLE -> DoubleArrayStorageWrapper.returnNDArray(storage, ndArray as DoubleNDArray) +// +// DataType.BOOLEAN -> BooleanArrayStorageWrapper.returnNDArray(storage, ndArray as BooleanNDArray) +// +// else -> error("Unsupported array type") +// } +// } } diff --git a/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/PrimitiveArrayStorageWrapper.kt b/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/PrimitiveArrayStorageWrapper.kt index 52921ced8..e6c566938 100644 --- a/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/PrimitiveArrayStorageWrapper.kt +++ b/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/PrimitiveArrayStorageWrapper.kt @@ -5,14 +5,46 @@ import io.kinference.ndarray.arrays.* import io.kinference.ndarray.arrays.PrimitiveNDArray import io.kinference.ndarray.arrays.tiled.PrimitiveTiledArray import io.kinference.ndarray.blockSizeByStrides +import io.kinference.ndarray.extensions.constants.PrimitiveConstants import io.kinference.primitives.annotations.* import io.kinference.primitives.types.DataType import io.kinference.primitives.types.PrimitiveArray +import io.kinference.utils.inlines.InlineInt @GenerateNameFromPrimitives -internal object PrimitiveArrayStorageWrapper { +internal class PrimitiveArrayStorageWrapper { private val type = DataType.CurrentPrimitive + private val storage = HashMap>(2) + + fun getNDArray(strides: Strides, fillZeros: Boolean = false): MutablePrimitiveNDArray { + val blockSize = InlineInt(blockSizeByStrides(strides)) + val blocksNum = strides.linearSize / blockSize.value + + val queue = storage.getOrPut(blockSize) { ArrayDeque(blocksNum) } + + val blocks = Array(blocksNum) { + val block = queue.removeFirstOrNull() + if (fillZeros) { + block?.fill(PrimitiveConstants.ZERO) + } + block ?: PrimitiveArray(blockSize.value) + } + + val tiled = PrimitiveTiledArray(blocks) + + return MutablePrimitiveNDArray(tiled, strides) + } + + fun returnNDArray(ndarray: PrimitiveNDArray) { + val blockSize = InlineInt(ndarray.array.blockSize) + val blocksNum = ndarray.array.blocksNum + + val queue = storage.getOrPut(blockSize) { ArrayDeque(blocksNum) } + + queue.addAll(ndarray.array.blocks) + } + fun getNDArray(strides: Strides, storage: SingleArrayStorage, fillZeros: Boolean = false): MutablePrimitiveNDArray { val blockSize = blockSizeByStrides(strides) val blocksNum = strides.linearSize / blockSize From 954f6cc8b484217041577c8876c753993f6bdf57 Mon Sep 17 00:00:00 2001 From: dmitriyb Date: Tue, 27 Aug 2024 17:09:15 +0200 Subject: [PATCH 07/19] JBAI-4393 [core, ndarray, utils] Major allocator refactoring Deleted obsolete memory storage and allocator context classes. Added new classes with improved manual and auto handling storage for arrays, enhancing memory management and performance. --- .../kotlin/io/kinference.core/KIEngine.kt | 31 ++-- .../kinference.core/data/tensor/KITensor.kt | 2 +- .../data/tensor/TensorExtensions.kt | 2 +- .../io/kinference.core/model/KIModel.kt | 54 ++----- .../operators/layer/attention/Attention.kt | 2 +- .../normalization/EmbedLayerNormalization.kt | 2 +- .../normalization/SkipLayerNormalization.kt | 2 +- .../io/kinference.core/operators/math/Add.kt | 2 +- .../operators/math/BiasGelu.kt | 2 +- .../kinference.core/operators/math/MatMul.kt | 2 +- .../kinference.core/operators/tensor/Cast.kt | 2 +- .../ndarray/arrays/memory/ArrayStorage.kt | 139 ------------------ .../arrays/memory/AutoAllocatorContext.kt | 23 --- .../arrays/memory/ManualAllocatorContext.kt | 110 -------------- .../{MemoryLimiter.kt => MemoryManager.kt} | 62 ++++---- .../arrays/memory/ModelArrayStorage.kt | 39 ----- .../memory/PredictionContextDispatcher.kt | 64 ++++++++ .../memory/PrimitiveArrayStorageWrapper.kt | 62 -------- .../memory/contexts/AutoAllocatorContext.kt | 17 +++ .../BaseAllocatorContextWithStorage.kt | 24 +++ .../memory/contexts/ManualAllocatorContext.kt | 23 +++ .../storage/AutoArrayHandlingStorage.kt | 34 +++++ .../storage/ManualArrayHandlingStorage.kt | 45 ++++++ .../PrimitiveAutoHandlingArrayStorage.kt | 47 ++++++ .../PrimitiveManualHandlingArrayStorage.kt | 56 +++++++ .../arrays/tiled/PrimitiveTiledArray.kt | 1 + .../io/kinference/utils/PredictionConfig.kt | 67 +++++++++ 27 files changed, 455 insertions(+), 461 deletions(-) delete mode 100644 ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/ArrayStorage.kt delete mode 100644 ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/AutoAllocatorContext.kt delete mode 100644 ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/ManualAllocatorContext.kt rename ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/{MemoryLimiter.kt => MemoryManager.kt} (60%) delete mode 100644 ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/ModelArrayStorage.kt create mode 100644 ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/PredictionContextDispatcher.kt delete mode 100644 ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/PrimitiveArrayStorageWrapper.kt create mode 100644 ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/contexts/AutoAllocatorContext.kt create mode 100644 ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/contexts/BaseAllocatorContextWithStorage.kt create mode 100644 ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/contexts/ManualAllocatorContext.kt create mode 100644 ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/storage/AutoArrayHandlingStorage.kt create mode 100644 ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/storage/ManualArrayHandlingStorage.kt create mode 100644 ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/storage/PrimitiveAutoHandlingArrayStorage.kt create mode 100644 ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/storage/PrimitiveManualHandlingArrayStorage.kt create mode 100644 utils/utils-common/src/commonMain/kotlin/io/kinference/utils/PredictionConfig.kt diff --git a/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/KIEngine.kt b/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/KIEngine.kt index dba47b43d..674bbaed5 100644 --- a/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/KIEngine.kt +++ b/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/KIEngine.kt @@ -10,14 +10,13 @@ import io.kinference.core.optimizer.rules.OptimizerRuleSet import io.kinference.data.ONNXData import io.kinference.data.ONNXDataType import io.kinference.model.IrOptimizableEngine -import io.kinference.ndarray.arrays.memory.MemoryLimiter -import io.kinference.ndarray.arrays.memory.MemoryLimiters import io.kinference.optimizer.GraphOptimizer import io.kinference.optimizer.OptimizerRule import io.kinference.protobuf.* import io.kinference.protobuf.message.* import io.kinference.utils.CommonDataLoader -import io.kinference.utils.PlatformUtils +import io.kinference.utils.PredictionConfig +import io.kinference.utils.PredictionConfigs import okio.Buffer import okio.Path import okio.Path.Companion.toPath @@ -41,24 +40,24 @@ object KIEngine : IrOptimizableEngine> { fun protoReader(bytes: ByteArray) = ProtobufReader(Buffer().write(bytes), KI_READER_CONFIG) - suspend fun loadModel(bytes: ByteArray, optimize: Boolean, memoryLimiter: MemoryLimiter, parallelismLimit: Int): KIModel { + suspend fun loadModel(bytes: ByteArray, optimize: Boolean, predictionConfig: PredictionConfig): KIModel { val rules = if (optimize) OptimizerRuleSet.DEFAULT_OPT_RULES else emptyList() - return loadModel(bytes, rules, memoryLimiter, parallelismLimit) + return loadModel(bytes, rules, predictionConfig) } override suspend fun loadModel(bytes: ByteArray, optimize: Boolean): KIModel { - return loadModel(bytes, optimize, MemoryLimiters.NoAllocator, PlatformUtils.cores) + return loadModel(bytes, optimize, PredictionConfigs.NoAllocator) } - override suspend fun loadModel(bytes: ByteArray, rules: List>>): KIModel = loadModel(bytes, rules, MemoryLimiters.NoAllocator, PlatformUtils.cores) + override suspend fun loadModel(bytes: ByteArray, rules: List>>): KIModel = loadModel(bytes, rules, PredictionConfigs.NoAllocator) - suspend fun loadModel(bytes: ByteArray, rules: List>>, memoryLimiter: MemoryLimiter, parallelismLimit: Int): KIModel { + suspend fun loadModel(bytes: ByteArray, rules: List>>, predictionConfig: PredictionConfig): KIModel { val modelScheme = ModelProto.decode(protoReader(bytes)) - val model = KIModel(modelScheme, memoryLimiter) + val model = KIModel(modelScheme, predictionConfig) return if (rules.isNotEmpty()) { val newGraph = GraphOptimizer(model.graph).run(rules) as KIGraph - KIModel(model.id, model.name, model.opSet, newGraph, memoryLimiter, parallelismLimit) + KIModel(model.id, model.name, model.opSet, newGraph, predictionConfig) } else { model } @@ -66,12 +65,12 @@ object KIEngine : IrOptimizableEngine> { override suspend fun loadModel(bytes: ByteArray): KIModel = loadModel(bytes, optimize = true) - suspend fun loadModel(path: Path, optimize: Boolean, memoryLimiter: MemoryLimiter, parallelismLimit: Int): KIModel { - return loadModel(CommonDataLoader.bytes(path), optimize, memoryLimiter, parallelismLimit) + suspend fun loadModel(path: Path, optimize: Boolean, predictionConfig: PredictionConfig): KIModel { + return loadModel(CommonDataLoader.bytes(path), optimize, predictionConfig) } override suspend fun loadModel(path: Path, optimize: Boolean): KIModel { - return loadModel(path, optimize, MemoryLimiters.NoAllocator, PlatformUtils.cores) + return loadModel(path, optimize, PredictionConfigs.NoAllocator) } override suspend fun loadModel(path: Path): KIModel = loadModel(path, optimize = true) @@ -80,12 +79,12 @@ object KIEngine : IrOptimizableEngine> { return loadModel(CommonDataLoader.bytes(path), rules) } - suspend fun loadModel(path: String, optimize: Boolean, memoryLimiter: MemoryLimiter, parallelismLimit: Int): KIModel { - return loadModel(CommonDataLoader.bytes(path.toPath()), optimize, memoryLimiter, parallelismLimit) + suspend fun loadModel(path: String, optimize: Boolean, predictionConfig: PredictionConfig): KIModel { + return loadModel(CommonDataLoader.bytes(path.toPath()), optimize, predictionConfig) } override suspend fun loadModel(path: String, optimize: Boolean): KIModel { - return loadModel(path, optimize, MemoryLimiters.NoAllocator, PlatformUtils.cores) + return loadModel(path, optimize, PredictionConfigs.NoAllocator) } override suspend fun loadModel(path: String): KIModel = loadModel(path, optimize = true) diff --git a/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/data/tensor/KITensor.kt b/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/data/tensor/KITensor.kt index cdf96e0e1..dba23754f 100644 --- a/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/data/tensor/KITensor.kt +++ b/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/data/tensor/KITensor.kt @@ -3,7 +3,7 @@ package io.kinference.core.data.tensor import io.kinference.core.* import io.kinference.data.ONNXTensor import io.kinference.ndarray.arrays.* -import io.kinference.ndarray.arrays.memory.ManualAllocatorContext +import io.kinference.ndarray.arrays.memory.contexts.ManualAllocatorContext import io.kinference.ndarray.arrays.tiled.* import io.kinference.protobuf.FLOAT_TENSOR_TYPES import io.kinference.protobuf.message.TensorProto diff --git a/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/data/tensor/TensorExtensions.kt b/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/data/tensor/TensorExtensions.kt index b83e75c2e..f8e2daf19 100644 --- a/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/data/tensor/TensorExtensions.kt +++ b/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/data/tensor/TensorExtensions.kt @@ -1,7 +1,7 @@ package io.kinference.core.data.tensor import io.kinference.ndarray.arrays.* -import io.kinference.ndarray.arrays.memory.ManualAllocatorContext +import io.kinference.ndarray.arrays.memory.contexts.ManualAllocatorContext import io.kinference.ndarray.extensions.concat import io.kinference.ndarray.extensions.splitWithAxis import io.kinference.primitives.types.DataType diff --git a/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/model/KIModel.kt b/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/model/KIModel.kt index 837d222da..6611fc1ce 100644 --- a/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/model/KIModel.kt +++ b/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/model/KIModel.kt @@ -5,6 +5,7 @@ import io.kinference.core.graph.KIGraph import io.kinference.graph.Contexts import io.kinference.model.Model import io.kinference.ndarray.arrays.memory.* +import io.kinference.ndarray.arrays.memory.contexts.finalizeAllocatorContext import io.kinference.operator.OperatorSetRegistry import io.kinference.profiler.* import io.kinference.protobuf.message.ModelProto @@ -17,14 +18,10 @@ class KIModel( val name: String, val opSet: OperatorSetRegistry, val graph: KIGraph, - private val memoryLimiter: MemoryLimiter = MemoryLimiters.NoAllocator, - parallelismLimit: Int = PlatformUtils.cores, + predictionConfig: PredictionConfig = PredictionConfigs.NoAllocator, ) : Model>, Profilable, Cacheable { private val profiles: MutableList = ArrayList() - - @OptIn(ExperimentalCoroutinesApi::class) - private val dispatcher: CoroutineDispatcher = Dispatchers.Default.limitedParallelism(parallelismLimit) - private val modelArrayStorage: ModelArrayStorage = ModelArrayStorage(MemoryLimiters.DefaultManualAllocator) + private val predictionContextDispatcher: PredictionContextDispatcher = PredictionContextDispatcher(predictionConfig) override fun addProfilingContext(name: String): ProfilingContext = ProfilingContext(name).apply { profiles.add(this) } override fun analyzeProfilingResults(): ProfileAnalysisEntry = profiles.analyze("Model $name") @@ -36,7 +33,6 @@ class KIModel( if (profile) addProfilingContext("Model $name") else null ) - val limiterContext = ParallelismLimiterContext(dispatcher) var coreReserved = false val results = try { withContext(NonCancellable) { @@ -44,33 +40,16 @@ class KIModel( coreReserved = true } - when (MemoryLimiters.DefaultManualAllocator) { - MemoryLimiters.NoAllocator -> { - withContext(limiterContext) { - return@withContext graph.execute(input, contexts) - } - } - MemoryLimiters.DefaultManualAllocator -> { - val allocatorContext = modelArrayStorage.createManualAllocatorContext() - val mixedContext = allocatorContext + limiterContext - - withContext(mixedContext) { - return@withContext graph.execute(input, contexts) - } - } - else -> { - val allocatorContext = modelArrayStorage.createAutoAllocatorContext() - val mixedContext = allocatorContext + limiterContext - - withContext(mixedContext) { - val coroutineContext = coroutineContext[AutoAllocatorContext.Key]!! - val execResult = graph.execute(input, contexts) - val copies = execResult.map { it.clone(it.name) }.toList() - coroutineContext.returnUsedArrays() - return@withContext copies - } - } + val predictionContext = predictionContextDispatcher.getPredictionContext() + val output = if (predictionContextDispatcher.allocationMode != AllocationMode.Auto) withContext(predictionContext) { + return@withContext graph.execute(input, contexts) + } else withContext(predictionContext) { + return@withContext graph.execute(input, contexts).map { it.clone(it.name) }.toList() } + + predictionContext.finalizeAllocatorContext() + predictionContextDispatcher.returnStorage(predictionContext) + output } finally { if (coreReserved) { ResourcesDispatcher.releaseCore() @@ -82,11 +61,11 @@ class KIModel( override suspend fun close() { graph.close() - modelArrayStorage.close() + predictionContextDispatcher.close() } override fun clearCache() { - modelArrayStorage.clearCache() + predictionContextDispatcher.clearCache() } companion object { @@ -96,14 +75,13 @@ class KIModel( suspend operator fun invoke( proto: ModelProto, - memoryLimiter: MemoryLimiter = MemoryLimiters.NoAllocator, - limiterParallelismCounter: Int = PlatformUtils.cores, + predictionConfig: PredictionConfig = PredictionConfigs.NoAllocator, ): KIModel { val name = "${proto.domain}:${proto.modelVersion}" val id = "$name:${generateModelId()}" val opSet = OperatorSetRegistry(proto.opSetImport) val graph = KIGraph(proto.graph!!, opSet) - return KIModel(id, name, opSet, graph, memoryLimiter, limiterParallelismCounter) + return KIModel(id, name, opSet, graph, predictionConfig) } } } diff --git a/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/operators/layer/attention/Attention.kt b/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/operators/layer/attention/Attention.kt index 234639c96..a06b99080 100644 --- a/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/operators/layer/attention/Attention.kt +++ b/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/operators/layer/attention/Attention.kt @@ -7,7 +7,7 @@ import io.kinference.core.optimizer.rules.context.AttentionContextRule import io.kinference.data.ONNXData import io.kinference.graph.Contexts import io.kinference.ndarray.arrays.* -import io.kinference.ndarray.arrays.memory.ManualAllocatorContext +import io.kinference.ndarray.arrays.memory.contexts.ManualAllocatorContext import io.kinference.ndarray.arrays.pointers.accept import io.kinference.ndarray.arrays.pointers.map import io.kinference.ndarray.arrays.tiled.FloatTiledArray diff --git a/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/operators/layer/normalization/EmbedLayerNormalization.kt b/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/operators/layer/normalization/EmbedLayerNormalization.kt index f2be9a212..33a01c6d3 100644 --- a/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/operators/layer/normalization/EmbedLayerNormalization.kt +++ b/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/operators/layer/normalization/EmbedLayerNormalization.kt @@ -5,7 +5,7 @@ import io.kinference.core.data.tensor.* import io.kinference.data.ONNXData import io.kinference.graph.Contexts import io.kinference.ndarray.arrays.* -import io.kinference.ndarray.arrays.memory.ManualAllocatorContext +import io.kinference.ndarray.arrays.memory.contexts.ManualAllocatorContext import io.kinference.ndarray.arrays.pointers.* import io.kinference.operator.* import io.kinference.primitives.types.DataType diff --git a/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/operators/layer/normalization/SkipLayerNormalization.kt b/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/operators/layer/normalization/SkipLayerNormalization.kt index 75320199f..08b8e7f1a 100644 --- a/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/operators/layer/normalization/SkipLayerNormalization.kt +++ b/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/operators/layer/normalization/SkipLayerNormalization.kt @@ -7,7 +7,7 @@ import io.kinference.data.ONNXData import io.kinference.graph.Contexts import io.kinference.ndarray.arrays.FloatNDArray import io.kinference.ndarray.arrays.MutableFloatNDArray -import io.kinference.ndarray.arrays.memory.ManualAllocatorContext +import io.kinference.ndarray.arrays.memory.contexts.ManualAllocatorContext import io.kinference.ndarray.arrays.pointers.* import io.kinference.operator.* import io.kinference.primitives.types.DataType diff --git a/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/operators/math/Add.kt b/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/operators/math/Add.kt index b7a64397d..46596f4e1 100644 --- a/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/operators/math/Add.kt +++ b/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/operators/math/Add.kt @@ -6,7 +6,7 @@ import io.kinference.core.data.tensor.asTensor import io.kinference.data.ONNXData import io.kinference.graph.Contexts import io.kinference.ndarray.arrays.* -import io.kinference.ndarray.arrays.memory.ManualAllocatorContext +import io.kinference.ndarray.arrays.memory.contexts.ManualAllocatorContext import io.kinference.ndarray.extensions.allocateNDArray import io.kinference.operator.* import io.kinference.protobuf.message.TensorProto diff --git a/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/operators/math/BiasGelu.kt b/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/operators/math/BiasGelu.kt index f2d8d01b3..c6b21a778 100644 --- a/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/operators/math/BiasGelu.kt +++ b/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/operators/math/BiasGelu.kt @@ -7,7 +7,7 @@ import io.kinference.data.ONNXData import io.kinference.graph.Contexts import io.kinference.ndarray.arrays.MutableNumberNDArrayCore import io.kinference.ndarray.arrays.NumberNDArrayCore -import io.kinference.ndarray.arrays.memory.ManualAllocatorContext +import io.kinference.ndarray.arrays.memory.contexts.ManualAllocatorContext import io.kinference.ndarray.extensions.allocateNDArray import io.kinference.ndarray.extensions.gelu.biasGelu import io.kinference.operator.* diff --git a/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/operators/math/MatMul.kt b/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/operators/math/MatMul.kt index e3baa2e4e..1d5608450 100644 --- a/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/operators/math/MatMul.kt +++ b/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/operators/math/MatMul.kt @@ -6,7 +6,7 @@ import io.kinference.core.data.tensor.asTensor import io.kinference.data.ONNXData import io.kinference.graph.Contexts import io.kinference.ndarray.arrays.* -import io.kinference.ndarray.arrays.memory.ManualAllocatorContext +import io.kinference.ndarray.arrays.memory.contexts.ManualAllocatorContext import io.kinference.ndarray.broadcasting.Broadcasting import io.kinference.ndarray.extensions.allocateNDArray import io.kinference.operator.* diff --git a/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/operators/tensor/Cast.kt b/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/operators/tensor/Cast.kt index 742fd7c2d..d0bc9a56a 100644 --- a/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/operators/tensor/Cast.kt +++ b/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/operators/tensor/Cast.kt @@ -6,7 +6,7 @@ import io.kinference.core.data.tensor.asTensor import io.kinference.data.ONNXData import io.kinference.graph.Contexts import io.kinference.ndarray.arrays.* -import io.kinference.ndarray.arrays.memory.ManualAllocatorContext +import io.kinference.ndarray.arrays.memory.contexts.ManualAllocatorContext import io.kinference.ndarray.arrays.pointers.mapTo import io.kinference.ndarray.arrays.tiled.* import io.kinference.operator.* diff --git a/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/ArrayStorage.kt b/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/ArrayStorage.kt deleted file mode 100644 index dcf704673..000000000 --- a/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/ArrayStorage.kt +++ /dev/null @@ -1,139 +0,0 @@ -package io.kinference.ndarray.arrays.memory - -import io.kinference.primitives.types.DataType - -internal abstract class BaseArrayStorage(typeLength: Int, sizeLength: Int, storageCount: Int) { - /** - * This is a storage for arrays. - * - * Structure is as follows: - * 1. Array by predefined types (all types are known compiled time) - * 2. Array by size. - * Starting with 'INIT_SIZE_VALUE' element and grow it doubling (typically there are no more than 16 different sizes) - * 3. Queue of array containers (used as FIFO) - */ - protected var storage: Array>>> = - Array(storageCount) { Array(typeLength) { Array(sizeLength) { ArrayDeque() } } } - - private var sizeIndices: IntArray = IntArray(typeLength) - private var sizes: Array = Array(typeLength) { IntArray(sizeLength) } - - protected fun getSizeIndex(tIndex: Int, size: Int): Int { - val sIndex = sizes[tIndex].indexOf(size) - - return if (sIndex != -1) { - sIndex - } else { - if (sizeIndices[tIndex] >= storage[0][tIndex].size) - grow(tIndex) - - val idx = sizeIndices[tIndex]++ - sizes[tIndex][idx] = size - idx - } - } - - private fun grow(typeIndex: Int) { - val newSize = sizes[typeIndex].size * 2 - for (i in storage.indices) { - val newStorage: Array> = Array(newSize) { ArrayDeque() } - - for (j in storage[i][typeIndex].indices) { - newStorage[j] = storage[i][typeIndex][j] - } - - storage[i][typeIndex] = newStorage - } - - sizes[typeIndex] = sizes[typeIndex].copyOf(newSize) - } - - protected fun create(type: DataType, size: Int): Any { - return when (type) { - DataType.BYTE -> ByteArray(size) // 8-bit signed - DataType.SHORT -> ShortArray(size) // 16-bit signed - DataType.INT -> IntArray(size) // 32-bit signed - DataType.LONG -> LongArray(size) // 64-bit signed - - DataType.UBYTE -> UByteArray(size) // 8-bit unsigned - DataType.USHORT -> UShortArray(size) // 16-bit unsigned - DataType.UINT -> UIntArray(size) // 32-bit unsigned - DataType.ULONG -> ULongArray(size) // 64-bit unsigned - - DataType.FLOAT -> FloatArray(size) - DataType.DOUBLE -> DoubleArray(size) - - DataType.BOOLEAN -> BooleanArray(size) - else -> throw IllegalArgumentException("Unsupported array type") - } - } - - protected fun resetArray(array: Any): Unit = - when (array) { - is ByteArray -> array.fill(0) // 8-bit signed - is UByteArray -> array.fill(0u) // 8-bit unsigned - is ShortArray -> array.fill(0) // 16-bit signed - is UShortArray -> array.fill(0u) // 16-bit unsigned - is IntArray -> array.fill(0) // 32-bit signed - is UIntArray -> array.fill(0u) // 32-bit unsigned - is LongArray -> array.fill(0L) // 64-bit signed - is ULongArray -> array.fill(0U) // 64-bit unsigned - is FloatArray -> array.fill(0.0f) - is DoubleArray -> array.fill(0.0) - is BooleanArray -> array.fill(false) - else -> error("Unsupported array type") - } -} - -internal class SingleArrayStorage(typeLength: Int, sizeLength: Int, private val limiter: MemoryLimiter) : BaseArrayStorage(typeLength, sizeLength, 1) { - internal fun getArray(type: DataType, size: Int, fillZeros: Boolean = true): Any { - return if (limiter.checkMemoryLimitAndAdd(type, size)) { - val tIndex = type.ordinal - val sIndex = getSizeIndex(tIndex, size) - storage[0][tIndex][sIndex].removeFirstOrNull()?.takeIf { fillZeros }?.apply(::resetArray) ?: create(type, size) - } else { - create(type, size) - } - } - - internal fun returnArrays(type: DataType, size: Int, arrays: Array) { - val tIndex = type.ordinal - val sIndex = getSizeIndex(tIndex, size) - val queue = storage[0][tIndex][sIndex] - - queue.addAll(arrays) - } - - internal fun clear() { - storage[0].forEach { arraysBySize -> - arraysBySize.forEach { arrayDeque -> - arrayDeque.clear() - } - } - limiter.resetLimit() - } -} - -internal class ArrayStorage(typeLength: Int, sizeLength: Int, private val limiter: MemoryLimiter) : BaseArrayStorage(typeLength, sizeLength, 2) { - internal fun getArray(type: DataType, size: Int, fillZeros: Boolean = true): Any { - return if (limiter.checkMemoryLimitAndAdd(type, size)) { - val tIndex = type.ordinal - val sIndex = getSizeIndex(tIndex, size) - val array = storage[0][tIndex][sIndex].removeFirstOrNull()?.takeIf { fillZeros }?.apply(::resetArray) ?: create(type, size) - storage[1][tIndex][sIndex].add(array) - array - } else { - create(type, size) - } - } - - internal fun moveArrays() { - storage[1].forEachIndexed { typeIndex, arraysByType -> - arraysByType.forEachIndexed { sizeIndex, arrayDeque -> - storage[0][typeIndex][sizeIndex].addAll(arrayDeque) - arrayDeque.clear() - } - } - limiter.resetLimit() - } -} diff --git a/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/AutoAllocatorContext.kt b/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/AutoAllocatorContext.kt deleted file mode 100644 index a9255dd93..000000000 --- a/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/AutoAllocatorContext.kt +++ /dev/null @@ -1,23 +0,0 @@ -package io.kinference.ndarray.arrays.memory - -import io.kinference.ndarray.arrays.* -import io.kinference.primitives.types.DataType -import kotlin.coroutines.CoroutineContext - -data class AutoAllocatorContext internal constructor( - private val storage: ArrayStorage, - private val returnStorageFn: (ArrayStorage) -> Unit -) : CoroutineContext.Element { - - companion object Key : CoroutineContext.Key - override val key: CoroutineContext.Key<*> get() = Key - - internal fun getArrays(type: DataType, size: Int, count: Int): Array { - return Array(count) { storage.getArray(type, size) } - } - - fun returnUsedArrays() { - storage.moveArrays() - returnStorageFn(storage) - } -} diff --git a/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/ManualAllocatorContext.kt b/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/ManualAllocatorContext.kt deleted file mode 100644 index 27f261fef..000000000 --- a/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/ManualAllocatorContext.kt +++ /dev/null @@ -1,110 +0,0 @@ -package io.kinference.ndarray.arrays.memory - -import io.kinference.ndarray.arrays.* -import io.kinference.primitives.types.DataType -import kotlin.coroutines.CoroutineContext - -data class ManualAllocatorContext internal constructor(private val storage: SingleArrayStorage) : CoroutineContext.Element { - - internal val byteStorage = ByteArrayStorageWrapper() - internal val shortStorage = ShortArrayStorageWrapper() - internal val intStorage = IntArrayStorageWrapper() - internal val longStorage = LongArrayStorageWrapper() - - internal val ubyteStorage = UByteArrayStorageWrapper() - internal val ushortStorage = UShortArrayStorageWrapper() - internal val uintStorage = UIntArrayStorageWrapper() - internal val ulongStorage = ULongArrayStorageWrapper() - - internal val floatStorage = FloatArrayStorageWrapper() - internal val doubleStorage = DoubleArrayStorageWrapper() - - internal val booleanStorage = BooleanArrayStorageWrapper() - - companion object Key : CoroutineContext.Key - override val key: CoroutineContext.Key<*> get() = Key - - fun getNDArray(dataType: DataType, strides: Strides, fillZeros: Boolean = false): MutableNDArrayCore { - return when(dataType) { - DataType.BYTE -> byteStorage.getNDArray(strides, fillZeros) - DataType.SHORT -> shortStorage.getNDArray(strides, fillZeros) - DataType.INT -> intStorage.getNDArray(strides, fillZeros) - DataType.LONG -> longStorage.getNDArray(strides, fillZeros) - - DataType.UBYTE -> ubyteStorage.getNDArray(strides, fillZeros) - DataType.USHORT -> ushortStorage.getNDArray(strides, fillZeros) - DataType.UINT -> uintStorage.getNDArray(strides, fillZeros) - DataType.ULONG -> ulongStorage.getNDArray(strides, fillZeros) - - DataType.FLOAT -> floatStorage.getNDArray(strides, fillZeros) - DataType.DOUBLE -> doubleStorage.getNDArray(strides, fillZeros) - - DataType.BOOLEAN -> booleanStorage.getNDArray(strides, fillZeros) - - else -> error("Unsupported array type") - } - } - - fun returnNDArray(ndArray: NDArrayCore) { - when(ndArray.type) { - DataType.BYTE -> byteStorage.returnNDArray(ndArray as ByteNDArray) - DataType.SHORT -> shortStorage.returnNDArray(ndArray as ShortNDArray) - DataType.INT -> intStorage.returnNDArray(ndArray as IntNDArray) - DataType.LONG -> longStorage.returnNDArray(ndArray as LongNDArray) - - DataType.UBYTE -> ubyteStorage.returnNDArray(ndArray as UByteNDArray) - DataType.USHORT -> ushortStorage.returnNDArray(ndArray as UShortNDArray) - DataType.UINT -> uintStorage.returnNDArray(ndArray as UIntNDArray) - DataType.ULONG -> ulongStorage.returnNDArray(ndArray as ULongNDArray) - - DataType.FLOAT -> floatStorage.returnNDArray(ndArray as FloatNDArray) - DataType.DOUBLE -> doubleStorage.returnNDArray(ndArray as DoubleNDArray) - - DataType.BOOLEAN -> booleanStorage.returnNDArray(ndArray as BooleanNDArray) - - else -> error("Unsupported array type") - } - } - -// fun getNDArray(dataType: DataType, strides: Strides, fillZeros: Boolean = false): MutableNDArrayCore { -// return when(dataType) { -// DataType.BYTE -> ByteArrayStorageWrapper.getNDArray(strides, storage, fillZeros) -// DataType.SHORT -> ShortArrayStorageWrapper.getNDArray(strides, storage, fillZeros) -// DataType.INT -> IntArrayStorageWrapper.getNDArray(strides, storage, fillZeros) -// DataType.LONG -> LongArrayStorageWrapper.getNDArray(strides, storage, fillZeros) -// -// DataType.UBYTE -> UByteArrayStorageWrapper.getNDArray(strides, storage, fillZeros) -// DataType.USHORT -> UShortArrayStorageWrapper.getNDArray(strides, storage, fillZeros) -// DataType.UINT -> UIntArrayStorageWrapper.getNDArray(strides, storage, fillZeros) -// DataType.ULONG -> ULongArrayStorageWrapper.getNDArray(strides, storage, fillZeros) -// -// DataType.FLOAT -> FloatArrayStorageWrapper.getNDArray(strides, storage, fillZeros) -// DataType.DOUBLE -> DoubleArrayStorageWrapper.getNDArray(strides, storage, fillZeros) -// -// DataType.BOOLEAN -> BooleanArrayStorageWrapper.getNDArray(strides, storage, fillZeros) -// -// else -> error("Unsupported array type") -// } -// } -// -// fun returnNDArray(ndArray: NDArrayCore) { -// when(ndArray.type) { -// DataType.BYTE -> ByteArrayStorageWrapper.returnNDArray(storage, ndArray as ByteNDArray) -// DataType.SHORT -> ShortArrayStorageWrapper.returnNDArray(storage, ndArray as ShortNDArray) -// DataType.INT -> IntArrayStorageWrapper.returnNDArray(storage, ndArray as IntNDArray) -// DataType.LONG -> LongArrayStorageWrapper.returnNDArray(storage, ndArray as LongNDArray) -// -// DataType.UBYTE -> UByteArrayStorageWrapper.returnNDArray(storage, ndArray as UByteNDArray) -// DataType.USHORT -> UShortArrayStorageWrapper.returnNDArray(storage, ndArray as UShortNDArray) -// DataType.UINT -> UIntArrayStorageWrapper.returnNDArray(storage, ndArray as UIntNDArray) -// DataType.ULONG -> ULongArrayStorageWrapper.returnNDArray(storage, ndArray as ULongNDArray) -// -// DataType.FLOAT -> FloatArrayStorageWrapper.returnNDArray(storage, ndArray as FloatNDArray) -// DataType.DOUBLE -> DoubleArrayStorageWrapper.returnNDArray(storage, ndArray as DoubleNDArray) -// -// DataType.BOOLEAN -> BooleanArrayStorageWrapper.returnNDArray(storage, ndArray as BooleanNDArray) -// -// else -> error("Unsupported array type") -// } -// } -} diff --git a/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/MemoryLimiter.kt b/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/MemoryManager.kt similarity index 60% rename from ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/MemoryLimiter.kt rename to ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/MemoryManager.kt index 85ed03eb1..3dc575bf2 100644 --- a/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/MemoryLimiter.kt +++ b/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/MemoryManager.kt @@ -1,10 +1,15 @@ package io.kinference.ndarray.arrays.memory import io.kinference.primitives.types.DataType -import io.kinference.utils.PlatformUtils import kotlinx.atomicfu.* +import kotlinx.coroutines.* + +internal class MemoryManager internal constructor(private val memoryLimit: Long, private val cacheClearingInterval: Long, private val onCacheClear: () -> Unit) { + private var usedMemory: AtomicLong = atomic(0L) + private val lastAccessTime = atomic(System.currentTimeMillis()) + private val monitorJob: AtomicRef = atomic(initial = null) + private val isFinalized = atomic(initial = false) -interface MemoryLimiter { /** * Checks if the memory limit allows adding the specified amount of memory and performs the addition * @@ -12,18 +17,7 @@ interface MemoryLimiter { * @param size is the checking array size * @return true if the memory was added successfully and false if adding the memory exceeds the memory limit */ - fun checkMemoryLimitAndAdd(type: DataType, size: Int): Boolean - - /** - * Resets the used memory into 0L - */ - fun resetLimit() -} - -class BaseMemoryLimiter internal constructor(private val memoryLimit: Long) : MemoryLimiter { - private var usedMemory: AtomicLong = atomic(0L) - - override fun checkMemoryLimitAndAdd(type: DataType, size: Int): Boolean { + fun checkMemoryLimitAndAdd(type: DataType, size: Int): Boolean { // Attempt to add memory and check the limit val added = sizeInBytes(type.ordinal, size) val successful = usedMemory.getAndUpdate { current -> @@ -33,10 +27,38 @@ class BaseMemoryLimiter internal constructor(private val memoryLimit: Long) : Me return successful } - override fun resetLimit() { + /** + * Resets the used memory into 0L + */ + fun resetLimit() { usedMemory.value = 0L } + fun updateLastAccessTime() { + lastAccessTime.value = System.currentTimeMillis() + + // Start monitoring if not already started + if (monitorJob.compareAndSet(expect = null, update = null) && !isFinalized.value) { + val newJob = CoroutineScope(Dispatchers.Default).launch { + while (isActive) { + delay(cacheClearingInterval) + if (System.currentTimeMillis() - lastAccessTime.value > cacheClearingInterval) { + onCacheClear() + } + } + } + if (!monitorJob.compareAndSet(expect = null, newJob)) { + newJob.cancel() // Cancel if another thread set the job + } + } + } + + fun stopMonitoring() { + if (isFinalized.compareAndSet(expect = false, update = true)) { + monitorJob.getAndSet(value = null)?.cancel() + } + } + companion object { private val typeSizes: LongArray = LongArray(DataType.entries.size).apply { this[DataType.BYTE.ordinal] = Byte.SIZE_BYTES.toLong() @@ -60,13 +82,3 @@ class BaseMemoryLimiter internal constructor(private val memoryLimit: Long) : Me } } } - -object MemoryLimiters { - val DefaultAutoAllocator: MemoryLimiter = BaseMemoryLimiter((PlatformUtils.maxHeap * 0.3).toLong()) - val DefaultManualAllocator: MemoryLimiter = BaseMemoryLimiter(50 * 1024 * 1024) - val NoAllocator: MemoryLimiter = BaseMemoryLimiter(0L) - - fun customLimiter(memoryLimit: Long): MemoryLimiter { - return BaseMemoryLimiter(memoryLimit) - } -} diff --git a/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/ModelArrayStorage.kt b/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/ModelArrayStorage.kt deleted file mode 100644 index 0135921cb..000000000 --- a/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/ModelArrayStorage.kt +++ /dev/null @@ -1,39 +0,0 @@ -package io.kinference.ndarray.arrays.memory - -import io.kinference.primitives.types.DataType -import io.kinference.utils.Closeable -import java.util.concurrent.ConcurrentLinkedQueue - -class ModelArrayStorage(private val limiter: MemoryLimiter = MemoryLimiters.NoAllocator) : Closeable { - private val autoStorageQueue: ConcurrentLinkedQueue = ConcurrentLinkedQueue() - - companion object { - private const val INIT_SIZE_VALUE: Int = 2 - private val typeSize: Int = DataType.entries.size - } - - fun createAutoAllocatorContext(): AutoAllocatorContext { - return AutoAllocatorContext(getStorage(autoStorageQueue), ::returnStorage) - } - - fun createManualAllocatorContext(): ManualAllocatorContext { - limiter.resetLimit() - return ManualAllocatorContext(SingleArrayStorage(typeSize, INIT_SIZE_VALUE, limiter)) - } - - fun clearCache() { - autoStorageQueue.clear() - } - - override suspend fun close() { - clearCache() - } - - private fun getStorage(queue: ConcurrentLinkedQueue): ArrayStorage { - return queue.poll() ?: ArrayStorage(typeSize, INIT_SIZE_VALUE, limiter) - } - - private fun returnStorage(storage: ArrayStorage) { - autoStorageQueue.offer(storage) - } -} diff --git a/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/PredictionContextDispatcher.kt b/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/PredictionContextDispatcher.kt new file mode 100644 index 000000000..10a2c4bc4 --- /dev/null +++ b/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/PredictionContextDispatcher.kt @@ -0,0 +1,64 @@ +package io.kinference.ndarray.arrays.memory + +import io.kinference.ndarray.arrays.memory.contexts.* +import io.kinference.ndarray.arrays.memory.storage.* +import io.kinference.utils.* +import kotlinx.coroutines.Dispatchers +import kotlinx.coroutines.ExperimentalCoroutinesApi +import java.util.concurrent.ConcurrentLinkedQueue +import kotlin.coroutines.CoroutineContext + +interface ArrayStorage { + fun resetState() +} + +class PredictionContextDispatcher(private val predictionConfig: PredictionConfig) : Closeable { + private val limiter: MemoryManager = MemoryManager( + memoryLimit = predictionConfig.memoryThreshold, + cacheClearingInterval = predictionConfig.memoryClearingInterval, + onCacheClear = ::clearCache) + + private val contextQueue: ConcurrentLinkedQueue = ConcurrentLinkedQueue() + val allocationMode + get() = predictionConfig.allocationMode + + fun getPredictionContext(): CoroutineContext { + val allocatorContext = when (predictionConfig.allocationMode) { + AllocationMode.NoAllocation -> getNoAllocatorContext() + AllocationMode.Manual -> getManualAllocatorContext() + AllocationMode.Auto -> getAutoAllocatorContext() + } + return allocatorContext + } + + @OptIn(ExperimentalCoroutinesApi::class) + private fun getNoAllocatorContext(): CoroutineContext { + return contextQueue.poll() ?: (NoAllocatorContext() + ParallelismLimiterContext(Dispatchers.Default.limitedParallelism(predictionConfig.parallelismLimit))) + } + + @OptIn(ExperimentalCoroutinesApi::class) + private fun getAutoAllocatorContext(): CoroutineContext { + limiter.updateLastAccessTime() + return contextQueue.poll() ?: (AutoAllocatorContext(AutoArrayHandlingStorage(limiter)) + ParallelismLimiterContext(Dispatchers.Default.limitedParallelism(predictionConfig.parallelismLimit))) + } + + @OptIn(ExperimentalCoroutinesApi::class) + private fun getManualAllocatorContext(): CoroutineContext { + limiter.updateLastAccessTime() + return contextQueue.poll() ?: (ManualAllocatorContext(ManualArrayHandlingStorage(limiter)) + ParallelismLimiterContext(Dispatchers.Default.limitedParallelism(predictionConfig.parallelismLimit))) + } + + fun clearCache() { + limiter.stopMonitoring() + contextQueue.clear() + limiter.resetLimit() + } + + override suspend fun close() { + clearCache() + } + + fun returnStorage(context: CoroutineContext) { + contextQueue.offer(context) + } +} diff --git a/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/PrimitiveArrayStorageWrapper.kt b/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/PrimitiveArrayStorageWrapper.kt deleted file mode 100644 index e6c566938..000000000 --- a/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/PrimitiveArrayStorageWrapper.kt +++ /dev/null @@ -1,62 +0,0 @@ -@file:GeneratePrimitives(DataType.ALL) -package io.kinference.ndarray.arrays.memory - -import io.kinference.ndarray.arrays.* -import io.kinference.ndarray.arrays.PrimitiveNDArray -import io.kinference.ndarray.arrays.tiled.PrimitiveTiledArray -import io.kinference.ndarray.blockSizeByStrides -import io.kinference.ndarray.extensions.constants.PrimitiveConstants -import io.kinference.primitives.annotations.* -import io.kinference.primitives.types.DataType -import io.kinference.primitives.types.PrimitiveArray -import io.kinference.utils.inlines.InlineInt - -@GenerateNameFromPrimitives -internal class PrimitiveArrayStorageWrapper { - private val type = DataType.CurrentPrimitive - - private val storage = HashMap>(2) - - fun getNDArray(strides: Strides, fillZeros: Boolean = false): MutablePrimitiveNDArray { - val blockSize = InlineInt(blockSizeByStrides(strides)) - val blocksNum = strides.linearSize / blockSize.value - - val queue = storage.getOrPut(blockSize) { ArrayDeque(blocksNum) } - - val blocks = Array(blocksNum) { - val block = queue.removeFirstOrNull() - if (fillZeros) { - block?.fill(PrimitiveConstants.ZERO) - } - block ?: PrimitiveArray(blockSize.value) - } - - val tiled = PrimitiveTiledArray(blocks) - - return MutablePrimitiveNDArray(tiled, strides) - } - - fun returnNDArray(ndarray: PrimitiveNDArray) { - val blockSize = InlineInt(ndarray.array.blockSize) - val blocksNum = ndarray.array.blocksNum - - val queue = storage.getOrPut(blockSize) { ArrayDeque(blocksNum) } - - queue.addAll(ndarray.array.blocks) - } - - fun getNDArray(strides: Strides, storage: SingleArrayStorage, fillZeros: Boolean = false): MutablePrimitiveNDArray { - val blockSize = blockSizeByStrides(strides) - val blocksNum = strides.linearSize / blockSize - val blocks = Array(blocksNum) { storage.getArray(type, blockSize, fillZeros) } - val typedBlocks = blocks.map { it as PrimitiveArray }.toTypedArray() - val tiled = PrimitiveTiledArray(typedBlocks) - - return MutablePrimitiveNDArray(tiled, strides) - } - - fun returnNDArray(storage: SingleArrayStorage, ndarray: PrimitiveNDArray) { - val blockSize = ndarray.array.blockSize - storage.returnArrays(type, blockSize, ndarray.array.blocks as Array) - } -} diff --git a/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/contexts/AutoAllocatorContext.kt b/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/contexts/AutoAllocatorContext.kt new file mode 100644 index 000000000..05f7063c0 --- /dev/null +++ b/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/contexts/AutoAllocatorContext.kt @@ -0,0 +1,17 @@ +package io.kinference.ndarray.arrays.memory.contexts + +import io.kinference.ndarray.arrays.memory.storage.AutoArrayHandlingStorage +import io.kinference.primitives.types.DataType +import kotlin.coroutines.* + +internal class AutoAllocatorContext internal constructor( + storage: AutoArrayHandlingStorage, +) : BaseAllocatorContextWithStorage(storage) { + + companion object Key : CoroutineContext.Key + override val key: CoroutineContext.Key<*> get() = Key + + internal fun getArrays(type: DataType, size: Int, count: Int): Array { + return storage.getArrays(type, size, count) + } +} diff --git a/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/contexts/BaseAllocatorContextWithStorage.kt b/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/contexts/BaseAllocatorContextWithStorage.kt new file mode 100644 index 000000000..f98e96649 --- /dev/null +++ b/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/contexts/BaseAllocatorContextWithStorage.kt @@ -0,0 +1,24 @@ +package io.kinference.ndarray.arrays.memory.contexts + +import io.kinference.ndarray.arrays.memory.ArrayStorage +import kotlin.coroutines.CoroutineContext + +interface BaseAllocatorContext: CoroutineContext.Element + +abstract class BaseAllocatorContextWithStorage(protected val storage: T) : BaseAllocatorContext { + fun finalizeContext() { + storage.resetState() + } +} + +fun CoroutineContext.finalizeAllocatorContext() { + this.fold(Unit) { _, context -> + if (context is BaseAllocatorContextWithStorage<*>) + context.finalizeContext() + } +} + +class NoAllocatorContext : BaseAllocatorContext { + companion object Key : CoroutineContext.Key + override val key: CoroutineContext.Key<*> get() = Key +} diff --git a/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/contexts/ManualAllocatorContext.kt b/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/contexts/ManualAllocatorContext.kt new file mode 100644 index 000000000..a713f31fe --- /dev/null +++ b/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/contexts/ManualAllocatorContext.kt @@ -0,0 +1,23 @@ +package io.kinference.ndarray.arrays.memory.contexts + +import io.kinference.ndarray.arrays.* +import io.kinference.ndarray.arrays.memory.storage.ManualArrayHandlingStorage +import io.kinference.ndarray.arrays.memory.storage.ManualStorage +import io.kinference.primitives.types.DataType +import kotlin.coroutines.CoroutineContext + +class ManualAllocatorContext internal constructor( + storage: ManualArrayHandlingStorage, +) : BaseAllocatorContextWithStorage(storage) { + + companion object Key : CoroutineContext.Key + override val key: CoroutineContext.Key<*> get() = Key + + fun getNDArray(dataType: DataType, strides: Strides, fillZeros: Boolean = false): MutableNDArrayCore { + return storage.getNDArray(dataType, strides, fillZeros) + } + + fun returnNDArray(ndArray: NDArrayCore) { + storage.returnNDArray(ndArray) + } +} diff --git a/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/storage/AutoArrayHandlingStorage.kt b/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/storage/AutoArrayHandlingStorage.kt new file mode 100644 index 000000000..030beac56 --- /dev/null +++ b/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/storage/AutoArrayHandlingStorage.kt @@ -0,0 +1,34 @@ +package io.kinference.ndarray.arrays.memory.storage + +import io.kinference.ndarray.arrays.memory.* +import io.kinference.primitives.types.DataType + +internal interface TypedAutoHandlingStorage { + fun getBlock(blocksNum: Int, blockSize: Int, limiter: MemoryManager): Array + fun moveBlocksIntoUnused() +} + +internal class AutoArrayHandlingStorage(private val limiter: MemoryManager) : ArrayStorage { + private val storage: List = listOf( + ByteAutoHandlingArrayStorage(), + ShortAutoHandlingArrayStorage(), + IntAutoHandlingArrayStorage(), + LongAutoHandlingArrayStorage(), + UByteAutoHandlingArrayStorage(), + UShortAutoHandlingArrayStorage(), + UIntAutoHandlingArrayStorage(), + ULongAutoHandlingArrayStorage(), + FloatAutoHandlingArrayStorage(), + DoubleAutoHandlingArrayStorage(), + BooleanAutoHandlingArrayStorage() + ) + + internal fun getArrays(type: DataType, size: Int, count: Int): Array { + return storage[type.ordinal].getBlock(blocksNum = count, blockSize = size, limiter = limiter) + } + + override fun resetState() { + storage.forEach { it.moveBlocksIntoUnused() } + limiter.resetLimit() + } +} diff --git a/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/storage/ManualArrayHandlingStorage.kt b/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/storage/ManualArrayHandlingStorage.kt new file mode 100644 index 000000000..0631056b3 --- /dev/null +++ b/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/storage/ManualArrayHandlingStorage.kt @@ -0,0 +1,45 @@ +package io.kinference.ndarray.arrays.memory.storage + +import io.kinference.ndarray.arrays.* +import io.kinference.ndarray.arrays.memory.* +import io.kinference.primitives.types.DataType + +internal interface TypedManualHandlingStorage { + fun getNDArray(strides: Strides, fillZeros: Boolean = false, limiter: MemoryManager): MutableNDArrayCore + fun returnNDArray(ndarray: NDArrayCore) + fun clear() +} + +interface ManualStorage : ArrayStorage { + fun getNDArray(dataType: DataType, strides: Strides, fillZeros: Boolean = false): MutableNDArrayCore + fun returnNDArray(ndArray: NDArrayCore) +} + +internal class ManualArrayHandlingStorage(private val memoryManager: MemoryManager) : ManualStorage { + private val storage: List = listOf( + ByteManualHandlingArrayStorage(), + ShortManualHandlingArrayStorage(), + IntManualHandlingArrayStorage(), + LongManualHandlingArrayStorage(), + UByteManualHandlingArrayStorage(), + UShortManualHandlingArrayStorage(), + UIntManualHandlingArrayStorage(), + ULongManualHandlingArrayStorage(), + FloatManualHandlingArrayStorage(), + DoubleManualHandlingArrayStorage(), + BooleanManualHandlingArrayStorage() + ) + + override fun getNDArray(dataType: DataType, strides: Strides, fillZeros: Boolean): MutableNDArrayCore { + return storage[dataType.ordinal].getNDArray(strides, fillZeros, memoryManager) + } + + override fun returnNDArray(ndArray: NDArrayCore) { + storage[ndArray.type.ordinal].returnNDArray(ndArray) + } + + override fun resetState() { + storage.forEach { it.clear() } + memoryManager.resetLimit() + } +} diff --git a/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/storage/PrimitiveAutoHandlingArrayStorage.kt b/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/storage/PrimitiveAutoHandlingArrayStorage.kt new file mode 100644 index 000000000..0d9d4e7da --- /dev/null +++ b/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/storage/PrimitiveAutoHandlingArrayStorage.kt @@ -0,0 +1,47 @@ +@file:GeneratePrimitives(DataType.ALL) +package io.kinference.ndarray.arrays.memory.storage + +import io.kinference.ndarray.arrays.memory.MemoryManager +import io.kinference.ndarray.extensions.constants.PrimitiveConstants +import io.kinference.primitives.annotations.GenerateNameFromPrimitives +import io.kinference.primitives.annotations.GeneratePrimitives +import io.kinference.primitives.types.DataType +import io.kinference.primitives.types.PrimitiveArray + +@GenerateNameFromPrimitives +internal class PrimitiveAutoHandlingArrayStorage : TypedAutoHandlingStorage { + private val used = HashMap>(8) + private val unused = HashMap>(8) + + companion object { + private val type = DataType.CurrentPrimitive + } + + override fun getBlock(blocksNum: Int, blockSize: Int, limiter: MemoryManager): Array { + val unusedQueue = unused.getOrPut(blockSize) { ArrayDeque(blocksNum) } + val usedQueue = used.getOrPut(blockSize) { ArrayDeque(blocksNum) } + + val blocks = if (limiter.checkMemoryLimitAndAdd(type, blockSize * blocksNum)) { + Array(blocksNum) { + val block = unusedQueue.removeFirstOrNull() + block?.fill(PrimitiveConstants.ZERO) + block ?: PrimitiveArray(blockSize) + } + } else { + Array(blocksNum) { + PrimitiveArray(blockSize) + } + } + + usedQueue.addAll(blocks) + + return blocks as Array + } + + override fun moveBlocksIntoUnused() { + used.forEach { (blockSize, queue) -> + unused[blockSize]!!.addAll(queue) + queue.clear() + } + } +} diff --git a/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/storage/PrimitiveManualHandlingArrayStorage.kt b/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/storage/PrimitiveManualHandlingArrayStorage.kt new file mode 100644 index 000000000..821438a97 --- /dev/null +++ b/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/storage/PrimitiveManualHandlingArrayStorage.kt @@ -0,0 +1,56 @@ +@file:GeneratePrimitives(DataType.ALL) +package io.kinference.ndarray.arrays.memory.storage + +import io.kinference.ndarray.arrays.* +import io.kinference.ndarray.arrays.PrimitiveNDArray +import io.kinference.ndarray.arrays.memory.MemoryManager +import io.kinference.ndarray.arrays.tiled.PrimitiveTiledArray +import io.kinference.ndarray.blockSizeByStrides +import io.kinference.ndarray.extensions.constants.PrimitiveConstants +import io.kinference.primitives.annotations.* +import io.kinference.primitives.types.DataType +import io.kinference.primitives.types.PrimitiveArray + +@GenerateNameFromPrimitives +internal class PrimitiveManualHandlingArrayStorage : TypedManualHandlingStorage { + private val storage = HashMap>(8) + + companion object { + private val type = DataType.CurrentPrimitive + } + + override fun getNDArray(strides: Strides, fillZeros: Boolean, limiter: MemoryManager): MutableNDArrayCore { + val blockSize = blockSizeByStrides(strides) + val blocksNum = strides.linearSize / blockSize + val blocks = if (limiter.checkMemoryLimitAndAdd(type, blockSize * blocksNum)) { + val queue = storage.getOrPut(blockSize) { ArrayDeque(blocksNum) } + Array(blocksNum) { + val block = queue.removeFirstOrNull() + if (fillZeros) { + block?.fill(PrimitiveConstants.ZERO) + } + block ?: PrimitiveArray(blockSize) + } + } else { + Array(blocksNum) { PrimitiveArray(blockSize) } + } + + val tiled = PrimitiveTiledArray(blocks) + + return MutablePrimitiveNDArray(tiled, strides) + } + + override fun returnNDArray(ndarray: NDArrayCore) { + require(ndarray is PrimitiveNDArray) + val blockSize = ndarray.array.blockSize + val blocksNum = ndarray.array.blocksNum + + val queue = storage.getOrPut(blockSize) { ArrayDeque(blocksNum) } + + queue.addAll(ndarray.array.blocks) + } + + override fun clear() { + storage.forEach { (_, queue) -> queue.clear() } + } +} diff --git a/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/tiled/PrimitiveTiledArray.kt b/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/tiled/PrimitiveTiledArray.kt index 4469e9d4e..db442f977 100644 --- a/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/tiled/PrimitiveTiledArray.kt +++ b/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/tiled/PrimitiveTiledArray.kt @@ -5,6 +5,7 @@ package io.kinference.ndarray.arrays.tiled import io.kinference.ndarray.arrays.* import io.kinference.ndarray.arrays.memory.* +import io.kinference.ndarray.arrays.memory.contexts.AutoAllocatorContext import io.kinference.ndarray.arrays.pointers.PrimitivePointer import io.kinference.ndarray.arrays.pointers.accept import io.kinference.ndarray.blockSizeByStrides diff --git a/utils/utils-common/src/commonMain/kotlin/io/kinference/utils/PredictionConfig.kt b/utils/utils-common/src/commonMain/kotlin/io/kinference/utils/PredictionConfig.kt new file mode 100644 index 000000000..1828b36bd --- /dev/null +++ b/utils/utils-common/src/commonMain/kotlin/io/kinference/utils/PredictionConfig.kt @@ -0,0 +1,67 @@ +package io.kinference.utils + +enum class AllocationMode { + NoAllocation, + Manual, + Auto; +} + +class PredictionConfig private constructor( + val parallelismLimit: Int, + val allocationMode: AllocationMode, + val memoryThreshold: Long, + val memoryClearingInterval: Long +) { + companion object { + operator fun invoke( + parallelismLimit: Int, + allocationMode: AllocationMode, + memoryThreshold: Long, + memoryClearingInterval: Long + ): PredictionConfig { + require(parallelismLimit in 1..PlatformUtils.cores) { + "Parallelism limit must be within the range of 1 to available CPU cores: ${PlatformUtils.cores}." + } + return if (allocationMode == AllocationMode.NoAllocation) { + PredictionConfig(parallelismLimit, allocationMode, 0L, Long.MAX_VALUE) + } else { + require(memoryThreshold > 0) { + "Memory threshold must be positive." + } + require(memoryClearingInterval > 0) { + "Memory clearing interval must be positive." + } + + PredictionConfig(parallelismLimit, allocationMode, memoryThreshold, memoryClearingInterval) + } + } + } +} + +object PredictionConfigs { + val DefaultAutoAllocator: PredictionConfig = PredictionConfig( + parallelismLimit = PlatformUtils.cores, + allocationMode = AllocationMode.Auto, + memoryThreshold = (PlatformUtils.maxHeap * 0.3).toLong(), + memoryClearingInterval = 500 + ) + val DefaultManualAllocator: PredictionConfig = PredictionConfig( + parallelismLimit = PlatformUtils.cores, + allocationMode = AllocationMode.Manual, + memoryThreshold = 50 * 1024 * 1024, + memoryClearingInterval = 500 + ) + val NoAllocator: PredictionConfig = PredictionConfig( + parallelismLimit = PlatformUtils.cores, + allocationMode = AllocationMode.NoAllocation, + memoryThreshold = 0L, + memoryClearingInterval = Long.MAX_VALUE + ) + + fun customPredictionConfig(parallelismLimit: Int, + allocationMode: AllocationMode, + memoryThreshold: Long, + memoryClearingInterval: Long): PredictionConfig { + return PredictionConfig(parallelismLimit, allocationMode, memoryThreshold, memoryClearingInterval) + } +} From b83f7f8ad20df6f909caed4789b92c91c64e1d0b Mon Sep 17 00:00:00 2001 From: dmitriyb Date: Thu, 29 Aug 2024 13:44:48 +0200 Subject: [PATCH 08/19] JBAI-4393 [core, ndarray] Added getPrimitiveBlock extension functions for better primitive types handling: this solution gives less double primitive array allocations when Array changes to actual type. --- .../memory/contexts/AutoAllocatorContext.kt | 5 +---- .../BaseAllocatorContextWithStorage.kt | 2 +- .../memory/storage/AutoArrayHandlingStorage.kt | 10 ++-------- .../PrimitiveAutoHandlingArrayStorage.kt | 10 +++++----- .../PrimitiveGetBlockFunctionsExtension.kt | 18 ++++++++++++++++++ .../arrays/tiled/PrimitiveTiledArray.kt | 8 +++----- 6 files changed, 30 insertions(+), 23 deletions(-) create mode 100644 ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/storage/PrimitiveGetBlockFunctionsExtension.kt diff --git a/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/contexts/AutoAllocatorContext.kt b/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/contexts/AutoAllocatorContext.kt index 05f7063c0..a4d36b555 100644 --- a/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/contexts/AutoAllocatorContext.kt +++ b/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/contexts/AutoAllocatorContext.kt @@ -2,6 +2,7 @@ package io.kinference.ndarray.arrays.memory.contexts import io.kinference.ndarray.arrays.memory.storage.AutoArrayHandlingStorage import io.kinference.primitives.types.DataType +import io.kinference.primitives.types.PrimitiveArray import kotlin.coroutines.* internal class AutoAllocatorContext internal constructor( @@ -10,8 +11,4 @@ internal class AutoAllocatorContext internal constructor( companion object Key : CoroutineContext.Key override val key: CoroutineContext.Key<*> get() = Key - - internal fun getArrays(type: DataType, size: Int, count: Int): Array { - return storage.getArrays(type, size, count) - } } diff --git a/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/contexts/BaseAllocatorContextWithStorage.kt b/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/contexts/BaseAllocatorContextWithStorage.kt index f98e96649..e617c78de 100644 --- a/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/contexts/BaseAllocatorContextWithStorage.kt +++ b/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/contexts/BaseAllocatorContextWithStorage.kt @@ -5,7 +5,7 @@ import kotlin.coroutines.CoroutineContext interface BaseAllocatorContext: CoroutineContext.Element -abstract class BaseAllocatorContextWithStorage(protected val storage: T) : BaseAllocatorContext { +abstract class BaseAllocatorContextWithStorage(internal val storage: T) : BaseAllocatorContext { fun finalizeContext() { storage.resetState() } diff --git a/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/storage/AutoArrayHandlingStorage.kt b/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/storage/AutoArrayHandlingStorage.kt index 030beac56..803fe4416 100644 --- a/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/storage/AutoArrayHandlingStorage.kt +++ b/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/storage/AutoArrayHandlingStorage.kt @@ -1,15 +1,13 @@ package io.kinference.ndarray.arrays.memory.storage import io.kinference.ndarray.arrays.memory.* -import io.kinference.primitives.types.DataType internal interface TypedAutoHandlingStorage { - fun getBlock(blocksNum: Int, blockSize: Int, limiter: MemoryManager): Array fun moveBlocksIntoUnused() } -internal class AutoArrayHandlingStorage(private val limiter: MemoryManager) : ArrayStorage { - private val storage: List = listOf( +internal class AutoArrayHandlingStorage(internal val limiter: MemoryManager) : ArrayStorage { + internal val storage: List = listOf( ByteAutoHandlingArrayStorage(), ShortAutoHandlingArrayStorage(), IntAutoHandlingArrayStorage(), @@ -23,10 +21,6 @@ internal class AutoArrayHandlingStorage(private val limiter: MemoryManager) : Ar BooleanAutoHandlingArrayStorage() ) - internal fun getArrays(type: DataType, size: Int, count: Int): Array { - return storage[type.ordinal].getBlock(blocksNum = count, blockSize = size, limiter = limiter) - } - override fun resetState() { storage.forEach { it.moveBlocksIntoUnused() } limiter.resetLimit() diff --git a/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/storage/PrimitiveAutoHandlingArrayStorage.kt b/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/storage/PrimitiveAutoHandlingArrayStorage.kt index 0d9d4e7da..71ad40341 100644 --- a/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/storage/PrimitiveAutoHandlingArrayStorage.kt +++ b/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/storage/PrimitiveAutoHandlingArrayStorage.kt @@ -17,15 +17,15 @@ internal class PrimitiveAutoHandlingArrayStorage : TypedAutoHandlingStorage { private val type = DataType.CurrentPrimitive } - override fun getBlock(blocksNum: Int, blockSize: Int, limiter: MemoryManager): Array { + fun getBlock(blocksNum: Int, blockSize: Int, limiter: MemoryManager): Array { val unusedQueue = unused.getOrPut(blockSize) { ArrayDeque(blocksNum) } val usedQueue = used.getOrPut(blockSize) { ArrayDeque(blocksNum) } val blocks = if (limiter.checkMemoryLimitAndAdd(type, blockSize * blocksNum)) { Array(blocksNum) { - val block = unusedQueue.removeFirstOrNull() - block?.fill(PrimitiveConstants.ZERO) - block ?: PrimitiveArray(blockSize) + unusedQueue.removeFirstOrNull()?.apply { + fill(PrimitiveConstants.ZERO) + } ?: PrimitiveArray(blockSize) } } else { Array(blocksNum) { @@ -35,7 +35,7 @@ internal class PrimitiveAutoHandlingArrayStorage : TypedAutoHandlingStorage { usedQueue.addAll(blocks) - return blocks as Array + return blocks } override fun moveBlocksIntoUnused() { diff --git a/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/storage/PrimitiveGetBlockFunctionsExtension.kt b/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/storage/PrimitiveGetBlockFunctionsExtension.kt new file mode 100644 index 000000000..6bb61d9c8 --- /dev/null +++ b/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/storage/PrimitiveGetBlockFunctionsExtension.kt @@ -0,0 +1,18 @@ +@file:GeneratePrimitives(DataType.ALL) +@file:Suppress("DuplicatedCode") +package io.kinference.ndarray.arrays.memory.storage + +import io.kinference.ndarray.arrays.memory.contexts.AutoAllocatorContext +import io.kinference.primitives.annotations.GenerateNameFromPrimitives +import io.kinference.primitives.annotations.GeneratePrimitives +import io.kinference.primitives.types.* + +@GenerateNameFromPrimitives +internal fun AutoArrayHandlingStorage.getPrimitiveBlock(blocksNum: Int, blockSize: Int): Array { + return (storage[DataType.CurrentPrimitive.ordinal] as PrimitiveAutoHandlingArrayStorage).getBlock(blocksNum = blocksNum, blockSize = blockSize, limiter = limiter) +} + +@GenerateNameFromPrimitives +internal fun AutoAllocatorContext.getPrimitiveBlock(blocksNum: Int, blockSize: Int): Array { + return storage.getPrimitiveBlock(blocksNum = blocksNum, blockSize = blockSize) +} diff --git a/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/tiled/PrimitiveTiledArray.kt b/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/tiled/PrimitiveTiledArray.kt index db442f977..600211e3b 100644 --- a/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/tiled/PrimitiveTiledArray.kt +++ b/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/tiled/PrimitiveTiledArray.kt @@ -4,8 +4,8 @@ package io.kinference.ndarray.arrays.tiled import io.kinference.ndarray.arrays.* -import io.kinference.ndarray.arrays.memory.* import io.kinference.ndarray.arrays.memory.contexts.AutoAllocatorContext +import io.kinference.ndarray.arrays.memory.storage.* import io.kinference.ndarray.arrays.pointers.PrimitivePointer import io.kinference.ndarray.arrays.pointers.accept import io.kinference.ndarray.blockSizeByStrides @@ -59,11 +59,9 @@ internal class PrimitiveTiledArray(val blocks: Array) { require(size % blockSize == 0) { "Size must divide blockSize" } val blocksNum = if (blockSize == 0) 0 else size / blockSize + val blocks = coroutineContext[AutoAllocatorContext.Key]?.getPrimitiveBlock(blocksNum, blockSize) ?: Array(blocksNum) { PrimitiveArray(blockSize) } - val coroutineContext = coroutineContext[AutoAllocatorContext.Key] - val blocks = coroutineContext?.getArrays(type, blockSize, blocksNum) ?: Array(blocksNum) { PrimitiveArray(blockSize) } - - return PrimitiveTiledArray(blocks.map { it as PrimitiveArray }.toTypedArray()) + return PrimitiveTiledArray(blocks) } suspend operator fun invoke(size: Int, blockSize: Int, init: (InlineInt) -> PrimitiveType) : PrimitiveTiledArray { From 450a39e712c2fb343e08ff6d8ac2888d04f0c18f Mon Sep 17 00:00:00 2001 From: dmitriyb Date: Thu, 29 Aug 2024 16:44:28 +0200 Subject: [PATCH 09/19] JBAI-4393 [ndarray] Added Fastutil support for more efficient primitive handling in primitive array storage classes. --- gradle/libs.versions.toml | 2 ++ ndarray/ndarray-core/build.gradle.kts | 1 + .../jvmMain/kotlin/io/kinference/ndarray/Utils.kt | 2 ++ .../storage/PrimitiveAutoHandlingArrayStorage.kt | 15 ++++++++------- .../PrimitiveManualHandlingArrayStorage.kt | 15 ++++++++------- .../kinference/ndarray/extensions/utils/Utils.kt | 13 +++++++++++++ 6 files changed, 34 insertions(+), 14 deletions(-) diff --git a/gradle/libs.versions.toml b/gradle/libs.versions.toml index 71df4deb4..bf86867b5 100644 --- a/gradle/libs.versions.toml +++ b/gradle/libs.versions.toml @@ -11,6 +11,7 @@ okio = "3.6.0" onnxruntime = "1.17.0.patched-1" slf4j = "2.0.9" wire = "4.9.3" +fastutil = "8.5.14" # JS Dependencies loglevel = "1.8.1" @@ -36,3 +37,4 @@ onnxruntime-gpu = { module = "com.microsoft.onnxruntime:onnxruntime_gpu", versio slf4j-api = { module = "org.slf4j:slf4j-api", version.ref = "slf4j" } slf4j-simple = { module = "org.slf4j:slf4j-simple", version.ref = "slf4j" } wire-runtime = { module = "com.squareup.wire:wire-runtime", version.ref = "wire" } +fastutil-core = { module = "it.unimi.dsi:fastutil-core", version.ref = "fastutil" } diff --git a/ndarray/ndarray-core/build.gradle.kts b/ndarray/ndarray-core/build.gradle.kts index f68fa0d08..96d59fb66 100644 --- a/ndarray/ndarray-core/build.gradle.kts +++ b/ndarray/ndarray-core/build.gradle.kts @@ -17,6 +17,7 @@ kotlin { implementation(libs.kotlinx.coroutines.core) implementation(libs.kotlinx.atomicfu) api(libs.apache.commons.math4.core) + api(libs.fastutil.core) } } } diff --git a/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/Utils.kt b/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/Utils.kt index c1af61364..3869f6162 100644 --- a/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/Utils.kt +++ b/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/Utils.kt @@ -73,6 +73,8 @@ const val ERF_COEF_3 = 1.421413741 const val ERF_COEF_4 = -1.453152027 const val ERF_COEF_5 = 1.061405429 +const val INIT_STORAGE_SIZE = 64 + internal fun IntArray.swap(leftIdx: Int, rightIdx: Int) { val temp = get(leftIdx) this[leftIdx] = this[rightIdx] diff --git a/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/storage/PrimitiveAutoHandlingArrayStorage.kt b/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/storage/PrimitiveAutoHandlingArrayStorage.kt index 71ad40341..aca7fd13f 100644 --- a/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/storage/PrimitiveAutoHandlingArrayStorage.kt +++ b/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/storage/PrimitiveAutoHandlingArrayStorage.kt @@ -1,36 +1,37 @@ @file:GeneratePrimitives(DataType.ALL) package io.kinference.ndarray.arrays.memory.storage +import io.kinference.ndarray.INIT_STORAGE_SIZE import io.kinference.ndarray.arrays.memory.MemoryManager import io.kinference.ndarray.extensions.constants.PrimitiveConstants +import io.kinference.ndarray.extensions.utils.getOrPut import io.kinference.primitives.annotations.GenerateNameFromPrimitives import io.kinference.primitives.annotations.GeneratePrimitives import io.kinference.primitives.types.DataType import io.kinference.primitives.types.PrimitiveArray +import it.unimi.dsi.fastutil.ints.Int2ObjectOpenHashMap @GenerateNameFromPrimitives internal class PrimitiveAutoHandlingArrayStorage : TypedAutoHandlingStorage { - private val used = HashMap>(8) - private val unused = HashMap>(8) + private val used = Int2ObjectOpenHashMap>(INIT_STORAGE_SIZE) + private val unused = Int2ObjectOpenHashMap>(INIT_STORAGE_SIZE) companion object { private val type = DataType.CurrentPrimitive } - fun getBlock(blocksNum: Int, blockSize: Int, limiter: MemoryManager): Array { + internal fun getBlock(blocksNum: Int, blockSize: Int, limiter: MemoryManager): Array { val unusedQueue = unused.getOrPut(blockSize) { ArrayDeque(blocksNum) } val usedQueue = used.getOrPut(blockSize) { ArrayDeque(blocksNum) } - val blocks = if (limiter.checkMemoryLimitAndAdd(type, blockSize * blocksNum)) { + val blocks = if (limiter.checkMemoryLimitAndAdd(type, size = blockSize * blocksNum)) { Array(blocksNum) { unusedQueue.removeFirstOrNull()?.apply { fill(PrimitiveConstants.ZERO) } ?: PrimitiveArray(blockSize) } } else { - Array(blocksNum) { - PrimitiveArray(blockSize) - } + Array(blocksNum) { PrimitiveArray(blockSize) } } usedQueue.addAll(blocks) diff --git a/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/storage/PrimitiveManualHandlingArrayStorage.kt b/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/storage/PrimitiveManualHandlingArrayStorage.kt index 821438a97..7a71e16c9 100644 --- a/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/storage/PrimitiveManualHandlingArrayStorage.kt +++ b/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/storage/PrimitiveManualHandlingArrayStorage.kt @@ -1,19 +1,22 @@ @file:GeneratePrimitives(DataType.ALL) package io.kinference.ndarray.arrays.memory.storage +import io.kinference.ndarray.INIT_STORAGE_SIZE import io.kinference.ndarray.arrays.* import io.kinference.ndarray.arrays.PrimitiveNDArray import io.kinference.ndarray.arrays.memory.MemoryManager import io.kinference.ndarray.arrays.tiled.PrimitiveTiledArray import io.kinference.ndarray.blockSizeByStrides import io.kinference.ndarray.extensions.constants.PrimitiveConstants +import io.kinference.ndarray.extensions.utils.getOrPut import io.kinference.primitives.annotations.* import io.kinference.primitives.types.DataType import io.kinference.primitives.types.PrimitiveArray +import it.unimi.dsi.fastutil.ints.Int2ObjectOpenHashMap @GenerateNameFromPrimitives internal class PrimitiveManualHandlingArrayStorage : TypedManualHandlingStorage { - private val storage = HashMap>(8) + private val storage = Int2ObjectOpenHashMap>(INIT_STORAGE_SIZE) companion object { private val type = DataType.CurrentPrimitive @@ -22,14 +25,12 @@ internal class PrimitiveManualHandlingArrayStorage : TypedManualHandlingStorage override fun getNDArray(strides: Strides, fillZeros: Boolean, limiter: MemoryManager): MutableNDArrayCore { val blockSize = blockSizeByStrides(strides) val blocksNum = strides.linearSize / blockSize - val blocks = if (limiter.checkMemoryLimitAndAdd(type, blockSize * blocksNum)) { + val blocks = if (limiter.checkMemoryLimitAndAdd(type, size = blockSize * blocksNum)) { val queue = storage.getOrPut(blockSize) { ArrayDeque(blocksNum) } Array(blocksNum) { - val block = queue.removeFirstOrNull() - if (fillZeros) { - block?.fill(PrimitiveConstants.ZERO) - } - block ?: PrimitiveArray(blockSize) + queue.removeFirstOrNull()?.apply { + fill(PrimitiveConstants.ZERO) + } ?: PrimitiveArray(blockSize) } } else { Array(blocksNum) { PrimitiveArray(blockSize) } diff --git a/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/extensions/utils/Utils.kt b/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/extensions/utils/Utils.kt index 8c4e18063..fec73c0f9 100644 --- a/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/extensions/utils/Utils.kt +++ b/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/extensions/utils/Utils.kt @@ -1,5 +1,7 @@ package io.kinference.ndarray.extensions.utils +import it.unimi.dsi.fastutil.ints.Int2ObjectOpenHashMap + /*** * Calculates the total size of the tensor with such shape. */ @@ -50,3 +52,14 @@ internal fun computeColumnMajorIndex( internal fun isInPadding(actual: Int, bound: Int) : Boolean { return actual < 0 || actual >= bound } + +inline fun Int2ObjectOpenHashMap.getOrPut(key: Int, defaultValue: () -> V): V { + val existingValue = this[key] + return if (existingValue != null) { + existingValue + } else { + val value = defaultValue() + put(key, value) + value + } +} From d25ecfff8b64b03e9ce835548a4f66b733c92f18 Mon Sep 17 00:00:00 2001 From: dmitriyb Date: Thu, 29 Aug 2024 18:11:02 +0200 Subject: [PATCH 10/19] JBAI-4393 [buildSrc] Configured JVM benchmark tests to disable coroutines debug mode. --- buildSrc/src/main/kotlin/io/kinference/gradle/JVMTestTasks.kt | 1 + 1 file changed, 1 insertion(+) diff --git a/buildSrc/src/main/kotlin/io/kinference/gradle/JVMTestTasks.kt b/buildSrc/src/main/kotlin/io/kinference/gradle/JVMTestTasks.kt index 250843f1c..c9c51d46a 100644 --- a/buildSrc/src/main/kotlin/io/kinference/gradle/JVMTestTasks.kt +++ b/buildSrc/src/main/kotlin/io/kinference/gradle/JVMTestTasks.kt @@ -56,6 +56,7 @@ fun KotlinJvmTarget.configureBenchmarkTests() { group = "verification" maxHeapSize = "4G" + systemProperty("kotlinx.coroutines.debug", "off") useJUnitPlatform() From 8dfd6eb5750605513a3b4376f6bd4ea5ab6f3c6e Mon Sep 17 00:00:00 2001 From: dmitriyb Date: Fri, 30 Aug 2024 13:47:25 +0200 Subject: [PATCH 11/19] Fixed broadcasting shape logic in matrix multiplication for 1D. --- .../kotlin/io/kinference/ndarray/broadcasting/Broadcasting.kt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/broadcasting/Broadcasting.kt b/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/broadcasting/Broadcasting.kt index bde7580d4..7e2e18551 100644 --- a/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/broadcasting/Broadcasting.kt +++ b/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/broadcasting/Broadcasting.kt @@ -17,7 +17,7 @@ fun unsqueezeFirst(shape: IntArray, newShapeSize: Int): IntArray { object Broadcasting { fun broadcastShapeForMatmul(leftShape: IntArray, rightShape: IntArray): IntArray { val actualLeftShape = if (leftShape.size == 1) intArrayOf(1, leftShape[0]) else leftShape - val actualRightShape = if (rightShape.size == 1) intArrayOf(1, rightShape[1]) else rightShape + val actualRightShape = if (rightShape.size == 1) intArrayOf(rightShape[0], 1) else rightShape val outputMatrixShape = intArrayOf(actualLeftShape[actualLeftShape.lastIndex - 1], actualRightShape.last()) val broadcastShape = broadcastShape(listOf(actualLeftShape.copyOfRange(0, actualLeftShape.size - 2), From a19fc9ce083c7831a6bf6ed5ad9134edb2d36ca6 Mon Sep 17 00:00:00 2001 From: dmitriyb Date: Mon, 2 Sep 2024 10:26:05 +0200 Subject: [PATCH 12/19] JBAI-4393 [core, ndarray] Streamlined memory size calculations using constants, removed commented-out and redundant code. --- .../operators/layer/attention/Attention.kt | 14 +++--- .../ndarray/arrays/memory/MemoryManager.kt | 49 ++++++++----------- .../storage/AutoArrayHandlingStorage.kt | 2 +- .../storage/ManualArrayHandlingStorage.kt | 2 +- .../PrimitiveAutoHandlingArrayStorage.kt | 2 +- .../PrimitiveGetBlockFunctionsExtension.kt | 6 +++ .../PrimitiveManualHandlingArrayStorage.kt | 2 +- .../ndarray/extensions/PrimitiveExtensions.kt | 17 ------- .../extensions/constants/BooleanConstants.kt | 1 + .../constants/PrimitiveConstants.kt | 5 +- .../extensions/gelu/BiasGeluPrimitive.kt | 1 - 11 files changed, 40 insertions(+), 61 deletions(-) diff --git a/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/operators/layer/attention/Attention.kt b/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/operators/layer/attention/Attention.kt index a06b99080..1add2b1b9 100644 --- a/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/operators/layer/attention/Attention.kt +++ b/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/operators/layer/attention/Attention.kt @@ -163,10 +163,8 @@ sealed class Attention(name: String, info: OperatorInfo, attributes: Map if (this != null) { //raw attention (no padding). only raw attention mask is 2-dimensional if (this.rank == 2) { - val maskPointer = (mask as MutableFloatNDArray).array.pointer(maskOffset * i) + val maskPointer = mask.array.pointer(maskOffset * i) val maskIndicesPointer = this.array.pointer(i * fullSeqLen) maskPointer.accept(maskIndicesPointer, fullSeqLen) { _, src -> if (src > 0) 0f else maskFilterValue } } else { //for left/right-side padding val maskIndicesPointer = this.array.pointer(i) - val maskPointer = (mask as MutableFloatNDArray).array.pointer(maskOffset * i + maskIndicesPointer.get()) + val maskPointer = mask.array.pointer(maskOffset * i + maskIndicesPointer.get()) maskPointer.map(fullSeqLen - maskIndicesPointer.get()) { maskFilterValue } if (this.rank == 1 && this.shape[0] == 2 * batchSize) { @@ -205,7 +203,7 @@ sealed class Attention(name: String, info: OperatorInfo, attributes: Map - if (current + added > memoryLimit) current else current + added + if (current + sizeInBytes > memoryLimit) current else current + sizeInBytes } != usedMemory.value // Check if the update was successful return successful @@ -34,6 +31,16 @@ internal class MemoryManager internal constructor(private val memoryLimit: Long, usedMemory.value = 0L } + /** + * Updates the last access time to the current system time and starts a monitoring coroutine if it isn't already running. + * + * This function sets the `lastAccessTime` to the current system time in milliseconds. + * It also initiates a monitoring coroutine to periodically check + * if the time since the last access exceeds a predefined `cacheClearingInterval`. + * If it does, the `onCacheClear` function is triggered to handle + * any necessary cache clearing. + * The coroutine will run only if it is not already running and `isFinalized` is false. + */ fun updateLastAccessTime() { lastAccessTime.value = System.currentTimeMillis() @@ -53,32 +60,16 @@ internal class MemoryManager internal constructor(private val memoryLimit: Long, } } + /** + * Stops the monitoring process by canceling the active monitoring coroutine. + * + * This function sets the `isFinalized` flag to true, indicating that the monitoring process has been + * concluded. + * If a monitoring coroutine is currently active, it will be canceled. + */ fun stopMonitoring() { if (isFinalized.compareAndSet(expect = false, update = true)) { monitorJob.getAndSet(value = null)?.cancel() } } - - companion object { - private val typeSizes: LongArray = LongArray(DataType.entries.size).apply { - this[DataType.BYTE.ordinal] = Byte.SIZE_BYTES.toLong() - this[DataType.SHORT.ordinal] = Short.SIZE_BYTES.toLong() - this[DataType.INT.ordinal] = Int.SIZE_BYTES.toLong() - this[DataType.LONG.ordinal] = Long.SIZE_BYTES.toLong() - - this[DataType.UBYTE.ordinal] = UByte.SIZE_BYTES.toLong() - this[DataType.USHORT.ordinal] = UShort.SIZE_BYTES.toLong() - this[DataType.UINT.ordinal] = UInt.SIZE_BYTES.toLong() - this[DataType.ULONG.ordinal] = ULong.SIZE_BYTES.toLong() - - this[DataType.FLOAT.ordinal] = Float.SIZE_BYTES.toLong() - this[DataType.DOUBLE.ordinal] = Double.SIZE_BYTES.toLong() - - this[DataType.BOOLEAN.ordinal] = 1.toLong() - } - - private fun sizeInBytes(typeIndex: Int, size: Int): Long { - return typeSizes[typeIndex] * size - } - } } diff --git a/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/storage/AutoArrayHandlingStorage.kt b/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/storage/AutoArrayHandlingStorage.kt index 803fe4416..62364570b 100644 --- a/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/storage/AutoArrayHandlingStorage.kt +++ b/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/storage/AutoArrayHandlingStorage.kt @@ -7,7 +7,7 @@ internal interface TypedAutoHandlingStorage { } internal class AutoArrayHandlingStorage(internal val limiter: MemoryManager) : ArrayStorage { - internal val storage: List = listOf( + internal val storage: Array = arrayOf( ByteAutoHandlingArrayStorage(), ShortAutoHandlingArrayStorage(), IntAutoHandlingArrayStorage(), diff --git a/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/storage/ManualArrayHandlingStorage.kt b/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/storage/ManualArrayHandlingStorage.kt index 0631056b3..559334f8d 100644 --- a/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/storage/ManualArrayHandlingStorage.kt +++ b/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/storage/ManualArrayHandlingStorage.kt @@ -16,7 +16,7 @@ interface ManualStorage : ArrayStorage { } internal class ManualArrayHandlingStorage(private val memoryManager: MemoryManager) : ManualStorage { - private val storage: List = listOf( + private val storage: Array = arrayOf( ByteManualHandlingArrayStorage(), ShortManualHandlingArrayStorage(), IntManualHandlingArrayStorage(), diff --git a/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/storage/PrimitiveAutoHandlingArrayStorage.kt b/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/storage/PrimitiveAutoHandlingArrayStorage.kt index aca7fd13f..c0b7d9866 100644 --- a/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/storage/PrimitiveAutoHandlingArrayStorage.kt +++ b/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/storage/PrimitiveAutoHandlingArrayStorage.kt @@ -24,7 +24,7 @@ internal class PrimitiveAutoHandlingArrayStorage : TypedAutoHandlingStorage { val unusedQueue = unused.getOrPut(blockSize) { ArrayDeque(blocksNum) } val usedQueue = used.getOrPut(blockSize) { ArrayDeque(blocksNum) } - val blocks = if (limiter.checkMemoryLimitAndAdd(type, size = blockSize * blocksNum)) { + val blocks = if (limiter.checkMemoryLimitAndAdd(type.getPrimitiveArraySizeInBytes(arraySize = blockSize * blocksNum))) { Array(blocksNum) { unusedQueue.removeFirstOrNull()?.apply { fill(PrimitiveConstants.ZERO) diff --git a/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/storage/PrimitiveGetBlockFunctionsExtension.kt b/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/storage/PrimitiveGetBlockFunctionsExtension.kt index 6bb61d9c8..5da084dc3 100644 --- a/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/storage/PrimitiveGetBlockFunctionsExtension.kt +++ b/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/storage/PrimitiveGetBlockFunctionsExtension.kt @@ -3,6 +3,7 @@ package io.kinference.ndarray.arrays.memory.storage import io.kinference.ndarray.arrays.memory.contexts.AutoAllocatorContext +import io.kinference.ndarray.extensions.constants.PrimitiveConstants import io.kinference.primitives.annotations.GenerateNameFromPrimitives import io.kinference.primitives.annotations.GeneratePrimitives import io.kinference.primitives.types.* @@ -16,3 +17,8 @@ internal fun AutoArrayHandlingStorage.getPrimitiveBlock(blocksNum: Int, blockSiz internal fun AutoAllocatorContext.getPrimitiveBlock(blocksNum: Int, blockSize: Int): Array { return storage.getPrimitiveBlock(blocksNum = blocksNum, blockSize = blockSize) } + +@GenerateNameFromPrimitives +internal fun DataType.getPrimitiveArraySizeInBytes(arraySize: Int): Long { + return PrimitiveConstants.SIZE_BYTES * arraySize +} diff --git a/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/storage/PrimitiveManualHandlingArrayStorage.kt b/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/storage/PrimitiveManualHandlingArrayStorage.kt index 7a71e16c9..29060279b 100644 --- a/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/storage/PrimitiveManualHandlingArrayStorage.kt +++ b/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/storage/PrimitiveManualHandlingArrayStorage.kt @@ -25,7 +25,7 @@ internal class PrimitiveManualHandlingArrayStorage : TypedManualHandlingStorage override fun getNDArray(strides: Strides, fillZeros: Boolean, limiter: MemoryManager): MutableNDArrayCore { val blockSize = blockSizeByStrides(strides) val blocksNum = strides.linearSize / blockSize - val blocks = if (limiter.checkMemoryLimitAndAdd(type, size = blockSize * blocksNum)) { + val blocks = if (limiter.checkMemoryLimitAndAdd(type.getPrimitiveArraySizeInBytes(arraySize = blockSize * blocksNum))) { val queue = storage.getOrPut(blockSize) { ArrayDeque(blocksNum) } Array(blocksNum) { queue.removeFirstOrNull()?.apply { diff --git a/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/extensions/PrimitiveExtensions.kt b/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/extensions/PrimitiveExtensions.kt index 570ca8520..cbf651bc0 100644 --- a/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/extensions/PrimitiveExtensions.kt +++ b/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/extensions/PrimitiveExtensions.kt @@ -128,21 +128,17 @@ internal suspend fun PrimitiveNDArray.dotTransposedWithAlpha(alpha: Double, othe other as PrimitiveNDArray; destination as MutablePrimitiveNDArray val alpha = alpha.toPrimitive() -// val dBlocksInRow = destination.blocksInRow val lrBlocksInRow = this.blocksInRow val n = this.shape[0] val t = this.shape[1] val m = other.shape[0] -// val dBlockSize = destination.array.blockSize val lrBlockSize = this.array.blockSize -// val destBlocks = destination.array.blocks val leftBlocks = this.array.blocks val rightBlocks = other.array.blocks val rowFlop = t * m -// val zero = (0).toPrimitive() /* TODO: (dmitriyb) this is temporary commented. On GEC performance test we have large inputs that cause out of memory exceptions @@ -162,7 +158,6 @@ internal suspend fun PrimitiveNDArray.dotTransposedWithAlpha(alpha: Double, othe // TODO: (cupertank) Remove constants // TODO: (dmitriyb) Implement concurrent array retrieve with a separate structure from ArraysDispatcher parallelizeByRows(rowFlop, n, 262144) { nStart: Int, nEnd: Int, _ -> -// val mSums = Array(m) { PrimitiveArray(lrBlockSize) } val tempSum = PrimitiveArray(lrBlockSize) val destPointer = destination.array.pointer() for (i in nStart until nEnd) { @@ -170,10 +165,8 @@ internal suspend fun PrimitiveNDArray.dotTransposedWithAlpha(alpha: Double, othe val rightBlockIter = rightBlocks.iterator() destPointer.linearIndex = i * m -// val destBlockOffset = i * dBlocksInRow for (k in 0 until m) { -// val tempArray = mSums[k] for (lrBlock in 0 until lrBlocksInRow) { val leftBlock = leftBlocks[leftBlockOffset + lrBlock] val rightBlock = rightBlockIter.next() @@ -186,16 +179,6 @@ internal suspend fun PrimitiveNDArray.dotTransposedWithAlpha(alpha: Double, othe destPointer.setAndIncrement(tempSum.sum() * alpha) tempSum.fill(PrimitiveConstants.ZERO) } - -// val mSumsIter = mSums.iterator() -// for (destBlockNum in 0 until dBlocksInRow) { -// val destBlock = destBlocks[destBlockOffset + destBlockNum] -// for (j in destBlock.indices) { -// val sumBlock = mSumsIter.next() -// destBlock[j] = sumBlock.sum() * alpha -// sumBlock.fill(zero) -// } -// } } } diff --git a/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/extensions/constants/BooleanConstants.kt b/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/extensions/constants/BooleanConstants.kt index 00f4767fa..e3e369c6b 100644 --- a/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/extensions/constants/BooleanConstants.kt +++ b/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/extensions/constants/BooleanConstants.kt @@ -3,4 +3,5 @@ package io.kinference.ndarray.extensions.constants object BooleanConstants { const val ZERO = false const val ONE = true + const val SIZE_BYTES = 1.toLong() } diff --git a/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/extensions/constants/PrimitiveConstants.kt b/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/extensions/constants/PrimitiveConstants.kt index e1edbef10..09aec0c9e 100644 --- a/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/extensions/constants/PrimitiveConstants.kt +++ b/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/extensions/constants/PrimitiveConstants.kt @@ -2,10 +2,9 @@ package io.kinference.ndarray.extensions.constants import io.kinference.primitives.annotations.* -import io.kinference.primitives.types.DataType -import io.kinference.primitives.types.toPrimitive import io.kinference.ndarray.toUShort import io.kinference.ndarray.toUByte +import io.kinference.primitives.types.* @GenerateNameFromPrimitives @@ -29,5 +28,7 @@ internal object PrimitiveConstants { val INV_ERF_COEF_1 = (4.330746750799873).toPrimitive() val INV_ERF_COEF_2 = (6.802721088435375).toPrimitive() + + val SIZE_BYTES = PrimitiveType.SIZE_BYTES.toLong() } diff --git a/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/extensions/gelu/BiasGeluPrimitive.kt b/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/extensions/gelu/BiasGeluPrimitive.kt index 4c5682899..9ba08ddb0 100644 --- a/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/extensions/gelu/BiasGeluPrimitive.kt +++ b/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/extensions/gelu/BiasGeluPrimitive.kt @@ -15,7 +15,6 @@ import kotlin.math.* @GenerateNameFromPrimitives internal suspend fun computeGeluPrimitive(input: PrimitiveNDArray, bias: PrimitiveNDArray, output: MutablePrimitiveNDArray): MutablePrimitiveNDArray { -// val output = MutablePrimitiveNDArray(input.strides) val inputBlocks = input.array.blocks val biasBlocks = bias.array.blocks From c942273ab31b07d037c02cbfab4e2d17dd84f825 Mon Sep 17 00:00:00 2001 From: dmitriyb Date: Mon, 2 Sep 2024 16:08:20 +0200 Subject: [PATCH 13/19] JBAI-4393 [core, ndarray] Refactored coroutine contexts to be polymorphic, merge ParallelismLimiterContext and its thread limiter behavior into PredictionContext. --- .../io/kinference.core/model/KIModel.kt | 2 - .../operators/layer/attention/Attention.kt | 3 +- .../normalization/EmbedLayerNormalization.kt | 3 +- .../normalization/SkipLayerNormalization.kt | 5 ++- .../io/kinference.core/operators/math/Add.kt | 3 +- .../operators/math/BiasGelu.kt | 3 +- .../kinference.core/operators/math/MatMul.kt | 3 +- .../kinference.core/operators/tensor/Cast.kt | 3 +- .../memory/PredictionContextDispatcher.kt | 37 +++++++++---------- .../memory/contexts/AutoAllocatorContext.kt | 9 ++--- .../BaseAllocatorContextWithStorage.kt | 24 ------------ .../memory/contexts/ManualAllocatorContext.kt | 9 ++--- .../storage/AutoArrayHandlingStorage.kt | 1 + .../storage/ManualArrayHandlingStorage.kt | 1 + .../PrimitiveAutoHandlingArrayStorage.kt | 2 +- .../PrimitiveGetBlockFunctionsExtension.kt | 2 +- .../PrimitiveManualHandlingArrayStorage.kt | 2 +- .../arrays/tiled/PrimitiveTiledArray.kt | 4 +- .../kinference/utils/ResourcesDispatcher.kt | 28 ++++++++++++-- 19 files changed, 73 insertions(+), 71 deletions(-) delete mode 100644 ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/contexts/BaseAllocatorContextWithStorage.kt diff --git a/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/model/KIModel.kt b/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/model/KIModel.kt index 6611fc1ce..3f78377d5 100644 --- a/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/model/KIModel.kt +++ b/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/model/KIModel.kt @@ -5,7 +5,6 @@ import io.kinference.core.graph.KIGraph import io.kinference.graph.Contexts import io.kinference.model.Model import io.kinference.ndarray.arrays.memory.* -import io.kinference.ndarray.arrays.memory.contexts.finalizeAllocatorContext import io.kinference.operator.OperatorSetRegistry import io.kinference.profiler.* import io.kinference.protobuf.message.ModelProto @@ -47,7 +46,6 @@ class KIModel( return@withContext graph.execute(input, contexts).map { it.clone(it.name) }.toList() } - predictionContext.finalizeAllocatorContext() predictionContextDispatcher.returnStorage(predictionContext) output } finally { diff --git a/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/operators/layer/attention/Attention.kt b/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/operators/layer/attention/Attention.kt index 1add2b1b9..737328779 100644 --- a/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/operators/layer/attention/Attention.kt +++ b/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/operators/layer/attention/Attention.kt @@ -19,6 +19,7 @@ import io.kinference.optimizer.GraphOptimizer.Companion.isOpt import io.kinference.primitives.types.DataType import io.kinference.protobuf.message.AttributeProto import io.kinference.protobuf.message.TensorProto +import io.kinference.utils.PredictionContext import io.kinference.utils.launchWithLimitOrDefault import kotlinx.coroutines.coroutineScope import kotlin.coroutines.coroutineContext @@ -287,7 +288,7 @@ class AttentionVer1(name: String, attributes: Map>, input private val maskFilterValue: Float by attribute("mask_filter_value") { it: Number -> it.toFloat() } override suspend fun > apply(contexts: Contexts, inputs: List): List { - val context = coroutineContext[ManualAllocatorContext.Key] + val context = coroutineContext[PredictionContext.Key] as? ManualAllocatorContext val input = inputs[0]!! val weights = inputs[1]!! diff --git a/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/operators/layer/normalization/EmbedLayerNormalization.kt b/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/operators/layer/normalization/EmbedLayerNormalization.kt index 33a01c6d3..098667725 100644 --- a/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/operators/layer/normalization/EmbedLayerNormalization.kt +++ b/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/operators/layer/normalization/EmbedLayerNormalization.kt @@ -11,6 +11,7 @@ import io.kinference.operator.* import io.kinference.primitives.types.DataType import io.kinference.protobuf.message.AttributeProto.AttributeType import io.kinference.protobuf.message.TensorProto +import io.kinference.utils.PredictionContext import kotlin.coroutines.coroutineContext import kotlin.math.sqrt @@ -175,7 +176,7 @@ class EmbedLayerNormalizationVer1( } override suspend fun > apply(contexts: Contexts, inputs: List): List { - val manualContext = coroutineContext[ManualAllocatorContext.Key] + val manualContext = coroutineContext[PredictionContext.Key] as? ManualAllocatorContext val inputIds = inputs[0]!!.data as IntNDArray val segmentIds = inputs[1]?.data as IntNDArray? diff --git a/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/operators/layer/normalization/SkipLayerNormalization.kt b/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/operators/layer/normalization/SkipLayerNormalization.kt index 08b8e7f1a..aa246044f 100644 --- a/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/operators/layer/normalization/SkipLayerNormalization.kt +++ b/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/operators/layer/normalization/SkipLayerNormalization.kt @@ -13,6 +13,7 @@ import io.kinference.operator.* import io.kinference.primitives.types.DataType import io.kinference.protobuf.message.AttributeProto import io.kinference.protobuf.message.TensorProto +import io.kinference.utils.PredictionContext import kotlin.coroutines.coroutineContext import kotlin.math.sqrt @@ -107,7 +108,7 @@ class SkipLayerNormalizationVer1(name: String, attributes: Map> apply(contexts: Contexts, inputs: List): List { - val manualContext = coroutineContext[ManualAllocatorContext.Key] + val manualContext = coroutineContext[PredictionContext.Key] as? ManualAllocatorContext val input = inputs[0]!!.data as FloatNDArray val output = (manualContext?.getNDArray(DataType.FLOAT, input.strides, fillZeros = false) ?: MutableFloatNDArray(input.strides)) as MutableFloatNDArray @@ -119,7 +120,7 @@ class SkipLayerNormalizationVer1(name: String, attributes: Map>, inputs: List, outputs: List) : Operator(name, info, attributes, inputs, outputs) { @@ -55,7 +56,7 @@ class AddVer7(name: String, attributes: Map>, inputs: Lis } override suspend fun > apply(contexts: Contexts, inputs: List): List { - val manualContext = coroutineContext[ManualAllocatorContext.Key] + val manualContext = coroutineContext[PredictionContext.Key] as? ManualAllocatorContext val left = inputs[0]!!.data as NumberNDArrayCore val right = inputs[1]!!.data as NumberNDArrayCore diff --git a/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/operators/math/BiasGelu.kt b/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/operators/math/BiasGelu.kt index c6b21a778..65b5089ec 100644 --- a/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/operators/math/BiasGelu.kt +++ b/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/operators/math/BiasGelu.kt @@ -11,6 +11,7 @@ import io.kinference.ndarray.arrays.memory.contexts.ManualAllocatorContext import io.kinference.ndarray.extensions.allocateNDArray import io.kinference.ndarray.extensions.gelu.biasGelu import io.kinference.operator.* +import io.kinference.utils.PredictionContext import kotlin.coroutines.coroutineContext sealed class BiasGelu(name: String, info: OperatorInfo, attributes: Map>, inputs: List, outputs: List) : Operator(name, info, attributes, inputs, outputs) { @@ -43,7 +44,7 @@ class BiasGeluVer1(name: String, attributes: Map> = empty } override suspend fun > apply(contexts: Contexts, inputs: List): List { - val manualContext = coroutineContext[ManualAllocatorContext.Key] + val manualContext = coroutineContext[PredictionContext.Key] as? ManualAllocatorContext val input = inputs[0]!!.data as NumberNDArrayCore val bias = inputs[1]!!.data as NumberNDArrayCore diff --git a/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/operators/math/MatMul.kt b/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/operators/math/MatMul.kt index 1d5608450..aabce734e 100644 --- a/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/operators/math/MatMul.kt +++ b/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/operators/math/MatMul.kt @@ -11,6 +11,7 @@ import io.kinference.ndarray.broadcasting.Broadcasting import io.kinference.ndarray.extensions.allocateNDArray import io.kinference.operator.* import io.kinference.protobuf.message.TensorProto +import io.kinference.utils.PredictionContext import kotlin.coroutines.coroutineContext sealed class MatMul(name: String, info: OperatorInfo, attributes: Map>, inputs: List, outputs: List) : Operator(name, info, attributes, inputs, outputs) { @@ -50,7 +51,7 @@ class MatMulVer1(name: String, attributes: Map>, inputs: } override suspend fun > apply(contexts: Contexts, inputs: List): List { - val manualContext = coroutineContext[ManualAllocatorContext.Key] + val manualContext = coroutineContext[PredictionContext.Key] as? ManualAllocatorContext val first = inputs[0]!!.data as NumberNDArrayCore val second = inputs[1]!!.data as NumberNDArrayCore diff --git a/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/operators/tensor/Cast.kt b/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/operators/tensor/Cast.kt index d0bc9a56a..acc9dfb94 100644 --- a/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/operators/tensor/Cast.kt +++ b/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/operators/tensor/Cast.kt @@ -14,6 +14,7 @@ import io.kinference.primitives.types.DataType import io.kinference.protobuf.FLOAT_TENSOR_TYPES import io.kinference.protobuf.message.AttributeProto import io.kinference.protobuf.message.TensorProto +import io.kinference.utils.PredictionContext import kotlin.coroutines.coroutineContext sealed class Cast(name: String, info: OperatorInfo, attributes: Map>, inputs: List, outputs: List) : Operator(name, info, attributes, inputs, outputs) { @@ -801,7 +802,7 @@ class CastVer6(name: String, attributes: Map>, inputs: Li private val toType: Int by attribute("to") { it: Number -> it.toInt() } override suspend fun > apply(contexts: Contexts, inputs: List): List { - val manualContext = coroutineContext[ManualAllocatorContext.Key] + val manualContext = coroutineContext[PredictionContext.Key] as? ManualAllocatorContext val tensor = inputs.first()!! val to = TensorProto.DataType.fromValue(toType)!! diff --git a/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/PredictionContextDispatcher.kt b/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/PredictionContextDispatcher.kt index 10a2c4bc4..801e5c66b 100644 --- a/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/PredictionContextDispatcher.kt +++ b/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/PredictionContextDispatcher.kt @@ -3,14 +3,8 @@ package io.kinference.ndarray.arrays.memory import io.kinference.ndarray.arrays.memory.contexts.* import io.kinference.ndarray.arrays.memory.storage.* import io.kinference.utils.* -import kotlinx.coroutines.Dispatchers -import kotlinx.coroutines.ExperimentalCoroutinesApi +import kotlinx.coroutines.* import java.util.concurrent.ConcurrentLinkedQueue -import kotlin.coroutines.CoroutineContext - -interface ArrayStorage { - fun resetState() -} class PredictionContextDispatcher(private val predictionConfig: PredictionConfig) : Closeable { private val limiter: MemoryManager = MemoryManager( @@ -18,11 +12,11 @@ class PredictionContextDispatcher(private val predictionConfig: PredictionConfig cacheClearingInterval = predictionConfig.memoryClearingInterval, onCacheClear = ::clearCache) - private val contextQueue: ConcurrentLinkedQueue = ConcurrentLinkedQueue() + private val contextQueue: ConcurrentLinkedQueue = ConcurrentLinkedQueue() val allocationMode get() = predictionConfig.allocationMode - fun getPredictionContext(): CoroutineContext { + fun getPredictionContext(): PredictionContext { val allocatorContext = when (predictionConfig.allocationMode) { AllocationMode.NoAllocation -> getNoAllocatorContext() AllocationMode.Manual -> getManualAllocatorContext() @@ -31,21 +25,23 @@ class PredictionContextDispatcher(private val predictionConfig: PredictionConfig return allocatorContext } - @OptIn(ExperimentalCoroutinesApi::class) - private fun getNoAllocatorContext(): CoroutineContext { - return contextQueue.poll() ?: (NoAllocatorContext() + ParallelismLimiterContext(Dispatchers.Default.limitedParallelism(predictionConfig.parallelismLimit))) + private fun getNoAllocatorContext(): PredictionContext { + return contextQueue.poll() ?: (NoAllocatorContext(getDispatcher())) } - @OptIn(ExperimentalCoroutinesApi::class) - private fun getAutoAllocatorContext(): CoroutineContext { + private fun getAutoAllocatorContext(): PredictionContext { limiter.updateLastAccessTime() - return contextQueue.poll() ?: (AutoAllocatorContext(AutoArrayHandlingStorage(limiter)) + ParallelismLimiterContext(Dispatchers.Default.limitedParallelism(predictionConfig.parallelismLimit))) + return contextQueue.poll() ?: (AutoAllocatorContext(getDispatcher(), AutoArrayHandlingStorage(limiter))) } - @OptIn(ExperimentalCoroutinesApi::class) - private fun getManualAllocatorContext(): CoroutineContext { + private fun getManualAllocatorContext(): PredictionContext { limiter.updateLastAccessTime() - return contextQueue.poll() ?: (ManualAllocatorContext(ManualArrayHandlingStorage(limiter)) + ParallelismLimiterContext(Dispatchers.Default.limitedParallelism(predictionConfig.parallelismLimit))) + return contextQueue.poll() ?: (ManualAllocatorContext(getDispatcher(), ManualArrayHandlingStorage(limiter))) + } + + @OptIn(ExperimentalCoroutinesApi::class) + private fun getDispatcher(): CoroutineDispatcher { + return Dispatchers.Default.limitedParallelism(predictionConfig.parallelismLimit) } fun clearCache() { @@ -58,7 +54,10 @@ class PredictionContextDispatcher(private val predictionConfig: PredictionConfig clearCache() } - fun returnStorage(context: CoroutineContext) { + fun returnStorage(context: PredictionContext) { + if (context is AllocatorContext<*>) { + context.finalizeContext() + } contextQueue.offer(context) } } diff --git a/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/contexts/AutoAllocatorContext.kt b/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/contexts/AutoAllocatorContext.kt index a4d36b555..e69367f55 100644 --- a/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/contexts/AutoAllocatorContext.kt +++ b/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/contexts/AutoAllocatorContext.kt @@ -3,12 +3,11 @@ package io.kinference.ndarray.arrays.memory.contexts import io.kinference.ndarray.arrays.memory.storage.AutoArrayHandlingStorage import io.kinference.primitives.types.DataType import io.kinference.primitives.types.PrimitiveArray +import io.kinference.utils.* +import kotlinx.coroutines.CoroutineDispatcher import kotlin.coroutines.* internal class AutoAllocatorContext internal constructor( + dispatcher: CoroutineDispatcher, storage: AutoArrayHandlingStorage, -) : BaseAllocatorContextWithStorage(storage) { - - companion object Key : CoroutineContext.Key - override val key: CoroutineContext.Key<*> get() = Key -} +) : AllocatorContext(dispatcher, storage) diff --git a/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/contexts/BaseAllocatorContextWithStorage.kt b/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/contexts/BaseAllocatorContextWithStorage.kt deleted file mode 100644 index e617c78de..000000000 --- a/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/contexts/BaseAllocatorContextWithStorage.kt +++ /dev/null @@ -1,24 +0,0 @@ -package io.kinference.ndarray.arrays.memory.contexts - -import io.kinference.ndarray.arrays.memory.ArrayStorage -import kotlin.coroutines.CoroutineContext - -interface BaseAllocatorContext: CoroutineContext.Element - -abstract class BaseAllocatorContextWithStorage(internal val storage: T) : BaseAllocatorContext { - fun finalizeContext() { - storage.resetState() - } -} - -fun CoroutineContext.finalizeAllocatorContext() { - this.fold(Unit) { _, context -> - if (context is BaseAllocatorContextWithStorage<*>) - context.finalizeContext() - } -} - -class NoAllocatorContext : BaseAllocatorContext { - companion object Key : CoroutineContext.Key - override val key: CoroutineContext.Key<*> get() = Key -} diff --git a/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/contexts/ManualAllocatorContext.kt b/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/contexts/ManualAllocatorContext.kt index a713f31fe..9a6663c7f 100644 --- a/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/contexts/ManualAllocatorContext.kt +++ b/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/contexts/ManualAllocatorContext.kt @@ -4,14 +4,13 @@ import io.kinference.ndarray.arrays.* import io.kinference.ndarray.arrays.memory.storage.ManualArrayHandlingStorage import io.kinference.ndarray.arrays.memory.storage.ManualStorage import io.kinference.primitives.types.DataType -import kotlin.coroutines.CoroutineContext +import io.kinference.utils.AllocatorContext +import kotlinx.coroutines.CoroutineDispatcher class ManualAllocatorContext internal constructor( + dispatcher: CoroutineDispatcher, storage: ManualArrayHandlingStorage, -) : BaseAllocatorContextWithStorage(storage) { - - companion object Key : CoroutineContext.Key - override val key: CoroutineContext.Key<*> get() = Key +) : AllocatorContext(dispatcher, storage) { fun getNDArray(dataType: DataType, strides: Strides, fillZeros: Boolean = false): MutableNDArrayCore { return storage.getNDArray(dataType, strides, fillZeros) diff --git a/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/storage/AutoArrayHandlingStorage.kt b/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/storage/AutoArrayHandlingStorage.kt index 62364570b..b0ffdbbb5 100644 --- a/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/storage/AutoArrayHandlingStorage.kt +++ b/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/storage/AutoArrayHandlingStorage.kt @@ -1,6 +1,7 @@ package io.kinference.ndarray.arrays.memory.storage import io.kinference.ndarray.arrays.memory.* +import io.kinference.utils.ArrayStorage internal interface TypedAutoHandlingStorage { fun moveBlocksIntoUnused() diff --git a/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/storage/ManualArrayHandlingStorage.kt b/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/storage/ManualArrayHandlingStorage.kt index 559334f8d..227d25136 100644 --- a/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/storage/ManualArrayHandlingStorage.kt +++ b/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/storage/ManualArrayHandlingStorage.kt @@ -3,6 +3,7 @@ package io.kinference.ndarray.arrays.memory.storage import io.kinference.ndarray.arrays.* import io.kinference.ndarray.arrays.memory.* import io.kinference.primitives.types.DataType +import io.kinference.utils.ArrayStorage internal interface TypedManualHandlingStorage { fun getNDArray(strides: Strides, fillZeros: Boolean = false, limiter: MemoryManager): MutableNDArrayCore diff --git a/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/storage/PrimitiveAutoHandlingArrayStorage.kt b/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/storage/PrimitiveAutoHandlingArrayStorage.kt index c0b7d9866..4cd5bb663 100644 --- a/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/storage/PrimitiveAutoHandlingArrayStorage.kt +++ b/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/storage/PrimitiveAutoHandlingArrayStorage.kt @@ -24,7 +24,7 @@ internal class PrimitiveAutoHandlingArrayStorage : TypedAutoHandlingStorage { val unusedQueue = unused.getOrPut(blockSize) { ArrayDeque(blocksNum) } val usedQueue = used.getOrPut(blockSize) { ArrayDeque(blocksNum) } - val blocks = if (limiter.checkMemoryLimitAndAdd(type.getPrimitiveArraySizeInBytes(arraySize = blockSize * blocksNum))) { + val blocks = if (limiter.checkMemoryLimitAndAdd(getPrimitiveArraySizeInBytes(arraySize = blockSize * blocksNum))) { Array(blocksNum) { unusedQueue.removeFirstOrNull()?.apply { fill(PrimitiveConstants.ZERO) diff --git a/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/storage/PrimitiveGetBlockFunctionsExtension.kt b/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/storage/PrimitiveGetBlockFunctionsExtension.kt index 5da084dc3..9280823d8 100644 --- a/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/storage/PrimitiveGetBlockFunctionsExtension.kt +++ b/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/storage/PrimitiveGetBlockFunctionsExtension.kt @@ -19,6 +19,6 @@ internal fun AutoAllocatorContext.getPrimitiveBlock(blocksNum: Int, blockSize: I } @GenerateNameFromPrimitives -internal fun DataType.getPrimitiveArraySizeInBytes(arraySize: Int): Long { +internal fun getPrimitiveArraySizeInBytes(arraySize: Int): Long { return PrimitiveConstants.SIZE_BYTES * arraySize } diff --git a/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/storage/PrimitiveManualHandlingArrayStorage.kt b/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/storage/PrimitiveManualHandlingArrayStorage.kt index 29060279b..1c264be01 100644 --- a/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/storage/PrimitiveManualHandlingArrayStorage.kt +++ b/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/storage/PrimitiveManualHandlingArrayStorage.kt @@ -25,7 +25,7 @@ internal class PrimitiveManualHandlingArrayStorage : TypedManualHandlingStorage override fun getNDArray(strides: Strides, fillZeros: Boolean, limiter: MemoryManager): MutableNDArrayCore { val blockSize = blockSizeByStrides(strides) val blocksNum = strides.linearSize / blockSize - val blocks = if (limiter.checkMemoryLimitAndAdd(type.getPrimitiveArraySizeInBytes(arraySize = blockSize * blocksNum))) { + val blocks = if (limiter.checkMemoryLimitAndAdd(getPrimitiveArraySizeInBytes(arraySize = blockSize * blocksNum))) { val queue = storage.getOrPut(blockSize) { ArrayDeque(blocksNum) } Array(blocksNum) { queue.removeFirstOrNull()?.apply { diff --git a/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/tiled/PrimitiveTiledArray.kt b/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/tiled/PrimitiveTiledArray.kt index 600211e3b..339f2fb8d 100644 --- a/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/tiled/PrimitiveTiledArray.kt +++ b/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/tiled/PrimitiveTiledArray.kt @@ -11,6 +11,7 @@ import io.kinference.ndarray.arrays.pointers.accept import io.kinference.ndarray.blockSizeByStrides import io.kinference.primitives.annotations.* import io.kinference.primitives.types.* +import io.kinference.utils.PredictionContext import io.kinference.utils.inlines.InlineInt import kotlin.coroutines.coroutineContext import kotlin.math.min @@ -59,7 +60,8 @@ internal class PrimitiveTiledArray(val blocks: Array) { require(size % blockSize == 0) { "Size must divide blockSize" } val blocksNum = if (blockSize == 0) 0 else size / blockSize - val blocks = coroutineContext[AutoAllocatorContext.Key]?.getPrimitiveBlock(blocksNum, blockSize) ?: Array(blocksNum) { PrimitiveArray(blockSize) } + val blocks = (coroutineContext[PredictionContext.Key] as? AutoAllocatorContext)?.getPrimitiveBlock(blocksNum, blockSize) + ?: Array(blocksNum) { PrimitiveArray(blockSize) } return PrimitiveTiledArray(blocks) } diff --git a/utils/utils-common/src/commonMain/kotlin/io/kinference/utils/ResourcesDispatcher.kt b/utils/utils-common/src/commonMain/kotlin/io/kinference/utils/ResourcesDispatcher.kt index 66b5cea95..b17df1f79 100644 --- a/utils/utils-common/src/commonMain/kotlin/io/kinference/utils/ResourcesDispatcher.kt +++ b/utils/utils-common/src/commonMain/kotlin/io/kinference/utils/ResourcesDispatcher.kt @@ -2,6 +2,7 @@ package io.kinference.utils import kotlinx.coroutines.* import kotlinx.coroutines.channels.Channel +import kotlin.coroutines.AbstractCoroutineContextElement import kotlin.coroutines.CoroutineContext object ResourcesDispatcher { @@ -16,11 +17,30 @@ object ResourcesDispatcher { } } -class ParallelismLimiterContext(val dispatcher: CoroutineDispatcher) : CoroutineContext.Element { - companion object Key : CoroutineContext.Key - override val key: CoroutineContext.Key<*> get() = Key +interface PredictionKey : CoroutineContext.Key + +sealed class PredictionContext( + val dispatcher: CoroutineDispatcher +) : AbstractCoroutineContextElement(PredictionContext) { + companion object Key : PredictionKey +} + +interface ArrayStorage { + fun resetState() +} + +abstract class AllocatorContext( + dispatcher: CoroutineDispatcher, + val storage: T +) : PredictionContext(dispatcher) { + + fun finalizeContext() { + storage.resetState() + } } +class NoAllocatorContext(dispatcher: CoroutineDispatcher) : PredictionContext(dispatcher) + fun CoroutineScope.launchWithLimitOrDefault(block: suspend CoroutineScope.() -> Unit) { - this.launch(coroutineContext[ParallelismLimiterContext.Key]?.dispatcher ?: Dispatchers.Default, block = block) + this.launch(coroutineContext[PredictionContext]?.dispatcher ?: Dispatchers.Default, block = block) } From 3caa0bcf62b1514fe5a22e224769da54262be46f Mon Sep 17 00:00:00 2001 From: Ilia Vologin Date: Mon, 2 Sep 2024 17:20:12 +0200 Subject: [PATCH 14/19] JBAI-4393 [core] Rework context keys --- .../operators/layer/attention/Attention.kt | 2 +- .../normalization/EmbedLayerNormalization.kt | 2 +- .../normalization/SkipLayerNormalization.kt | 2 +- .../io/kinference.core/operators/math/Add.kt | 2 +- .../operators/math/BiasGelu.kt | 2 +- .../kinference.core/operators/math/MatMul.kt | 2 +- .../kinference.core/operators/tensor/Cast.kt | 2 +- .../memory/contexts/AutoAllocatorContext.kt | 7 ++++- .../memory/contexts/ManualAllocatorContext.kt | 6 +++++ .../arrays/tiled/PrimitiveTiledArray.kt | 2 +- .../kinference/utils/ResourcesDispatcher.kt | 26 ++++++++++++++----- 11 files changed, 40 insertions(+), 15 deletions(-) diff --git a/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/operators/layer/attention/Attention.kt b/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/operators/layer/attention/Attention.kt index 737328779..ee7af6f18 100644 --- a/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/operators/layer/attention/Attention.kt +++ b/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/operators/layer/attention/Attention.kt @@ -288,7 +288,7 @@ class AttentionVer1(name: String, attributes: Map>, input private val maskFilterValue: Float by attribute("mask_filter_value") { it: Number -> it.toFloat() } override suspend fun > apply(contexts: Contexts, inputs: List): List { - val context = coroutineContext[PredictionContext.Key] as? ManualAllocatorContext + val context = coroutineContext[ManualAllocatorContext] val input = inputs[0]!! val weights = inputs[1]!! diff --git a/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/operators/layer/normalization/EmbedLayerNormalization.kt b/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/operators/layer/normalization/EmbedLayerNormalization.kt index 098667725..21b64def4 100644 --- a/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/operators/layer/normalization/EmbedLayerNormalization.kt +++ b/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/operators/layer/normalization/EmbedLayerNormalization.kt @@ -176,7 +176,7 @@ class EmbedLayerNormalizationVer1( } override suspend fun > apply(contexts: Contexts, inputs: List): List { - val manualContext = coroutineContext[PredictionContext.Key] as? ManualAllocatorContext + val manualContext = coroutineContext[ManualAllocatorContext] val inputIds = inputs[0]!!.data as IntNDArray val segmentIds = inputs[1]?.data as IntNDArray? diff --git a/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/operators/layer/normalization/SkipLayerNormalization.kt b/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/operators/layer/normalization/SkipLayerNormalization.kt index aa246044f..842eee3b1 100644 --- a/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/operators/layer/normalization/SkipLayerNormalization.kt +++ b/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/operators/layer/normalization/SkipLayerNormalization.kt @@ -108,7 +108,7 @@ class SkipLayerNormalizationVer1(name: String, attributes: Map> apply(contexts: Contexts, inputs: List): List { - val manualContext = coroutineContext[PredictionContext.Key] as? ManualAllocatorContext + val manualContext = coroutineContext[ManualAllocatorContext] val input = inputs[0]!!.data as FloatNDArray val output = (manualContext?.getNDArray(DataType.FLOAT, input.strides, fillZeros = false) ?: MutableFloatNDArray(input.strides)) as MutableFloatNDArray diff --git a/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/operators/math/Add.kt b/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/operators/math/Add.kt index afb811664..131f91786 100644 --- a/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/operators/math/Add.kt +++ b/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/operators/math/Add.kt @@ -56,7 +56,7 @@ class AddVer7(name: String, attributes: Map>, inputs: Lis } override suspend fun > apply(contexts: Contexts, inputs: List): List { - val manualContext = coroutineContext[PredictionContext.Key] as? ManualAllocatorContext + val manualContext = coroutineContext[ManualAllocatorContext] val left = inputs[0]!!.data as NumberNDArrayCore val right = inputs[1]!!.data as NumberNDArrayCore diff --git a/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/operators/math/BiasGelu.kt b/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/operators/math/BiasGelu.kt index 65b5089ec..02bbc6349 100644 --- a/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/operators/math/BiasGelu.kt +++ b/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/operators/math/BiasGelu.kt @@ -44,7 +44,7 @@ class BiasGeluVer1(name: String, attributes: Map> = empty } override suspend fun > apply(contexts: Contexts, inputs: List): List { - val manualContext = coroutineContext[PredictionContext.Key] as? ManualAllocatorContext + val manualContext = coroutineContext[ManualAllocatorContext] val input = inputs[0]!!.data as NumberNDArrayCore val bias = inputs[1]!!.data as NumberNDArrayCore diff --git a/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/operators/math/MatMul.kt b/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/operators/math/MatMul.kt index aabce734e..deb54fa35 100644 --- a/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/operators/math/MatMul.kt +++ b/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/operators/math/MatMul.kt @@ -51,7 +51,7 @@ class MatMulVer1(name: String, attributes: Map>, inputs: } override suspend fun > apply(contexts: Contexts, inputs: List): List { - val manualContext = coroutineContext[PredictionContext.Key] as? ManualAllocatorContext + val manualContext = coroutineContext[ManualAllocatorContext] val first = inputs[0]!!.data as NumberNDArrayCore val second = inputs[1]!!.data as NumberNDArrayCore diff --git a/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/operators/tensor/Cast.kt b/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/operators/tensor/Cast.kt index acc9dfb94..e858dd91f 100644 --- a/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/operators/tensor/Cast.kt +++ b/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/operators/tensor/Cast.kt @@ -802,7 +802,7 @@ class CastVer6(name: String, attributes: Map>, inputs: Li private val toType: Int by attribute("to") { it: Number -> it.toInt() } override suspend fun > apply(contexts: Contexts, inputs: List): List { - val manualContext = coroutineContext[PredictionContext.Key] as? ManualAllocatorContext + val manualContext = coroutineContext[ManualAllocatorContext] val tensor = inputs.first()!! val to = TensorProto.DataType.fromValue(toType)!! diff --git a/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/contexts/AutoAllocatorContext.kt b/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/contexts/AutoAllocatorContext.kt index e69367f55..486738d91 100644 --- a/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/contexts/AutoAllocatorContext.kt +++ b/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/contexts/AutoAllocatorContext.kt @@ -7,7 +7,12 @@ import io.kinference.utils.* import kotlinx.coroutines.CoroutineDispatcher import kotlin.coroutines.* +@OptIn(ExperimentalStdlibApi::class) internal class AutoAllocatorContext internal constructor( dispatcher: CoroutineDispatcher, storage: AutoArrayHandlingStorage, -) : AllocatorContext(dispatcher, storage) +) : AllocatorContext(dispatcher, storage) { + companion object Key : AbstractCoroutineContextKey, AutoAllocatorContext>( + AllocatorContext.Key, { it as? AutoAllocatorContext } + ) +} diff --git a/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/contexts/ManualAllocatorContext.kt b/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/contexts/ManualAllocatorContext.kt index 9a6663c7f..5a93917de 100644 --- a/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/contexts/ManualAllocatorContext.kt +++ b/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/contexts/ManualAllocatorContext.kt @@ -6,11 +6,17 @@ import io.kinference.ndarray.arrays.memory.storage.ManualStorage import io.kinference.primitives.types.DataType import io.kinference.utils.AllocatorContext import kotlinx.coroutines.CoroutineDispatcher +import kotlin.coroutines.AbstractCoroutineContextKey +@OptIn(ExperimentalStdlibApi::class) class ManualAllocatorContext internal constructor( dispatcher: CoroutineDispatcher, storage: ManualArrayHandlingStorage, ) : AllocatorContext(dispatcher, storage) { + companion object Key : AbstractCoroutineContextKey, ManualAllocatorContext>( + AllocatorContext.Key, { it as? ManualAllocatorContext } + ) + fun getNDArray(dataType: DataType, strides: Strides, fillZeros: Boolean = false): MutableNDArrayCore { return storage.getNDArray(dataType, strides, fillZeros) diff --git a/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/tiled/PrimitiveTiledArray.kt b/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/tiled/PrimitiveTiledArray.kt index 339f2fb8d..2f791feb3 100644 --- a/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/tiled/PrimitiveTiledArray.kt +++ b/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/tiled/PrimitiveTiledArray.kt @@ -60,7 +60,7 @@ internal class PrimitiveTiledArray(val blocks: Array) { require(size % blockSize == 0) { "Size must divide blockSize" } val blocksNum = if (blockSize == 0) 0 else size / blockSize - val blocks = (coroutineContext[PredictionContext.Key] as? AutoAllocatorContext)?.getPrimitiveBlock(blocksNum, blockSize) + val blocks = coroutineContext[AutoAllocatorContext]?.getPrimitiveBlock(blocksNum, blockSize) ?: Array(blocksNum) { PrimitiveArray(blockSize) } return PrimitiveTiledArray(blocks) diff --git a/utils/utils-common/src/commonMain/kotlin/io/kinference/utils/ResourcesDispatcher.kt b/utils/utils-common/src/commonMain/kotlin/io/kinference/utils/ResourcesDispatcher.kt index b17df1f79..45727274a 100644 --- a/utils/utils-common/src/commonMain/kotlin/io/kinference/utils/ResourcesDispatcher.kt +++ b/utils/utils-common/src/commonMain/kotlin/io/kinference/utils/ResourcesDispatcher.kt @@ -1,9 +1,9 @@ +@file:OptIn(ExperimentalStdlibApi::class) package io.kinference.utils import kotlinx.coroutines.* import kotlinx.coroutines.channels.Channel -import kotlin.coroutines.AbstractCoroutineContextElement -import kotlin.coroutines.CoroutineContext +import kotlin.coroutines.* object ResourcesDispatcher { private val tokenChannel = Channel(capacity = PlatformUtils.cores) @@ -17,12 +17,17 @@ object ResourcesDispatcher { } } -interface PredictionKey : CoroutineContext.Key - sealed class PredictionContext( val dispatcher: CoroutineDispatcher ) : AbstractCoroutineContextElement(PredictionContext) { - companion object Key : PredictionKey + companion object Key : CoroutineContext.Key + + override val key + get() = Key + + override fun get(key: CoroutineContext.Key): E? = getPolymorphicElement(key) + + override fun minusKey(key: CoroutineContext.Key<*>): CoroutineContext = minusPolymorphicKey(key) } interface ArrayStorage { @@ -33,13 +38,22 @@ abstract class AllocatorContext( dispatcher: CoroutineDispatcher, val storage: T ) : PredictionContext(dispatcher) { + companion object Key : AbstractCoroutineContextKey>( + PredictionContext.Key, + { it as? AllocatorContext<*> } + ) fun finalizeContext() { storage.resetState() } } -class NoAllocatorContext(dispatcher: CoroutineDispatcher) : PredictionContext(dispatcher) +class NoAllocatorContext(dispatcher: CoroutineDispatcher) : PredictionContext(dispatcher) { + companion object Key : AbstractCoroutineContextKey( + PredictionContext.Key, + { it as? NoAllocatorContext } + ) +} fun CoroutineScope.launchWithLimitOrDefault(block: suspend CoroutineScope.() -> Unit) { this.launch(coroutineContext[PredictionContext]?.dispatcher ?: Dispatchers.Default, block = block) From 9d67670bb7a807c81907640ac35fbdb929c48aa5 Mon Sep 17 00:00:00 2001 From: Ilia Vologin Date: Mon, 2 Sep 2024 17:21:41 +0200 Subject: [PATCH 15/19] JBAI-4393 [core] Optimize imports --- .../io/kinference.core/operators/layer/attention/Attention.kt | 1 - .../operators/layer/normalization/EmbedLayerNormalization.kt | 1 - .../operators/layer/normalization/SkipLayerNormalization.kt | 1 - .../src/jvmMain/kotlin/io/kinference.core/operators/math/Add.kt | 1 - .../kotlin/io/kinference.core/operators/math/BiasGelu.kt | 1 - .../jvmMain/kotlin/io/kinference.core/operators/math/MatMul.kt | 1 - .../jvmMain/kotlin/io/kinference.core/operators/tensor/Cast.kt | 1 - .../ndarray/arrays/memory/contexts/AutoAllocatorContext.kt | 2 -- .../io/kinference/ndarray/arrays/tiled/PrimitiveTiledArray.kt | 1 - .../kotlin/io/kinference/utils/ResourcesDispatcher.kt | 2 ++ 10 files changed, 2 insertions(+), 10 deletions(-) diff --git a/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/operators/layer/attention/Attention.kt b/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/operators/layer/attention/Attention.kt index ee7af6f18..0a60d0278 100644 --- a/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/operators/layer/attention/Attention.kt +++ b/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/operators/layer/attention/Attention.kt @@ -19,7 +19,6 @@ import io.kinference.optimizer.GraphOptimizer.Companion.isOpt import io.kinference.primitives.types.DataType import io.kinference.protobuf.message.AttributeProto import io.kinference.protobuf.message.TensorProto -import io.kinference.utils.PredictionContext import io.kinference.utils.launchWithLimitOrDefault import kotlinx.coroutines.coroutineScope import kotlin.coroutines.coroutineContext diff --git a/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/operators/layer/normalization/EmbedLayerNormalization.kt b/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/operators/layer/normalization/EmbedLayerNormalization.kt index 21b64def4..5fad8cd77 100644 --- a/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/operators/layer/normalization/EmbedLayerNormalization.kt +++ b/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/operators/layer/normalization/EmbedLayerNormalization.kt @@ -11,7 +11,6 @@ import io.kinference.operator.* import io.kinference.primitives.types.DataType import io.kinference.protobuf.message.AttributeProto.AttributeType import io.kinference.protobuf.message.TensorProto -import io.kinference.utils.PredictionContext import kotlin.coroutines.coroutineContext import kotlin.math.sqrt diff --git a/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/operators/layer/normalization/SkipLayerNormalization.kt b/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/operators/layer/normalization/SkipLayerNormalization.kt index 842eee3b1..598a14c26 100644 --- a/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/operators/layer/normalization/SkipLayerNormalization.kt +++ b/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/operators/layer/normalization/SkipLayerNormalization.kt @@ -13,7 +13,6 @@ import io.kinference.operator.* import io.kinference.primitives.types.DataType import io.kinference.protobuf.message.AttributeProto import io.kinference.protobuf.message.TensorProto -import io.kinference.utils.PredictionContext import kotlin.coroutines.coroutineContext import kotlin.math.sqrt diff --git a/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/operators/math/Add.kt b/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/operators/math/Add.kt index 131f91786..55d325668 100644 --- a/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/operators/math/Add.kt +++ b/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/operators/math/Add.kt @@ -10,7 +10,6 @@ import io.kinference.ndarray.arrays.memory.contexts.ManualAllocatorContext import io.kinference.ndarray.extensions.allocateNDArray import io.kinference.operator.* import io.kinference.protobuf.message.TensorProto -import io.kinference.utils.PredictionContext import kotlin.coroutines.coroutineContext sealed class Add(name: String, info: OperatorInfo, attributes: Map>, inputs: List, outputs: List) : Operator(name, info, attributes, inputs, outputs) { diff --git a/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/operators/math/BiasGelu.kt b/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/operators/math/BiasGelu.kt index 02bbc6349..da93d0e8a 100644 --- a/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/operators/math/BiasGelu.kt +++ b/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/operators/math/BiasGelu.kt @@ -11,7 +11,6 @@ import io.kinference.ndarray.arrays.memory.contexts.ManualAllocatorContext import io.kinference.ndarray.extensions.allocateNDArray import io.kinference.ndarray.extensions.gelu.biasGelu import io.kinference.operator.* -import io.kinference.utils.PredictionContext import kotlin.coroutines.coroutineContext sealed class BiasGelu(name: String, info: OperatorInfo, attributes: Map>, inputs: List, outputs: List) : Operator(name, info, attributes, inputs, outputs) { diff --git a/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/operators/math/MatMul.kt b/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/operators/math/MatMul.kt index deb54fa35..4165c554c 100644 --- a/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/operators/math/MatMul.kt +++ b/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/operators/math/MatMul.kt @@ -11,7 +11,6 @@ import io.kinference.ndarray.broadcasting.Broadcasting import io.kinference.ndarray.extensions.allocateNDArray import io.kinference.operator.* import io.kinference.protobuf.message.TensorProto -import io.kinference.utils.PredictionContext import kotlin.coroutines.coroutineContext sealed class MatMul(name: String, info: OperatorInfo, attributes: Map>, inputs: List, outputs: List) : Operator(name, info, attributes, inputs, outputs) { diff --git a/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/operators/tensor/Cast.kt b/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/operators/tensor/Cast.kt index e858dd91f..1bfb35fee 100644 --- a/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/operators/tensor/Cast.kt +++ b/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/operators/tensor/Cast.kt @@ -14,7 +14,6 @@ import io.kinference.primitives.types.DataType import io.kinference.protobuf.FLOAT_TENSOR_TYPES import io.kinference.protobuf.message.AttributeProto import io.kinference.protobuf.message.TensorProto -import io.kinference.utils.PredictionContext import kotlin.coroutines.coroutineContext sealed class Cast(name: String, info: OperatorInfo, attributes: Map>, inputs: List, outputs: List) : Operator(name, info, attributes, inputs, outputs) { diff --git a/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/contexts/AutoAllocatorContext.kt b/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/contexts/AutoAllocatorContext.kt index 486738d91..9af632e6b 100644 --- a/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/contexts/AutoAllocatorContext.kt +++ b/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/contexts/AutoAllocatorContext.kt @@ -1,8 +1,6 @@ package io.kinference.ndarray.arrays.memory.contexts import io.kinference.ndarray.arrays.memory.storage.AutoArrayHandlingStorage -import io.kinference.primitives.types.DataType -import io.kinference.primitives.types.PrimitiveArray import io.kinference.utils.* import kotlinx.coroutines.CoroutineDispatcher import kotlin.coroutines.* diff --git a/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/tiled/PrimitiveTiledArray.kt b/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/tiled/PrimitiveTiledArray.kt index 2f791feb3..eee93692c 100644 --- a/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/tiled/PrimitiveTiledArray.kt +++ b/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/tiled/PrimitiveTiledArray.kt @@ -11,7 +11,6 @@ import io.kinference.ndarray.arrays.pointers.accept import io.kinference.ndarray.blockSizeByStrides import io.kinference.primitives.annotations.* import io.kinference.primitives.types.* -import io.kinference.utils.PredictionContext import io.kinference.utils.inlines.InlineInt import kotlin.coroutines.coroutineContext import kotlin.math.min diff --git a/utils/utils-common/src/commonMain/kotlin/io/kinference/utils/ResourcesDispatcher.kt b/utils/utils-common/src/commonMain/kotlin/io/kinference/utils/ResourcesDispatcher.kt index 45727274a..b2d5b40a9 100644 --- a/utils/utils-common/src/commonMain/kotlin/io/kinference/utils/ResourcesDispatcher.kt +++ b/utils/utils-common/src/commonMain/kotlin/io/kinference/utils/ResourcesDispatcher.kt @@ -25,8 +25,10 @@ sealed class PredictionContext( override val key get() = Key + @OptIn(ExperimentalStdlibApi::class) override fun get(key: CoroutineContext.Key): E? = getPolymorphicElement(key) + @OptIn(ExperimentalStdlibApi::class) override fun minusKey(key: CoroutineContext.Key<*>): CoroutineContext = minusPolymorphicKey(key) } From 61011f8b1665052d74bc698e1d57d302d0d646de Mon Sep 17 00:00:00 2001 From: dmitriyb Date: Mon, 2 Sep 2024 17:59:37 +0200 Subject: [PATCH 16/19] JBAI-4393 [ndarray] Functional interface to streamline parallelization block handling (avoid Integer boxing operations). --- .../src/jvmMain/kotlin/io/kinference/ndarray/Utils.kt | 9 +++++++-- .../ndarray/extensions/constants/BooleanConstants.kt | 2 +- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/Utils.kt b/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/Utils.kt index 3869f6162..546857006 100644 --- a/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/Utils.kt +++ b/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/Utils.kt @@ -80,13 +80,18 @@ internal fun IntArray.swap(leftIdx: Int, rightIdx: Int) { this[leftIdx] = this[rightIdx] this[rightIdx] = temp } + +fun interface ParallelizeBody { + operator fun invoke(start: Int, end: Int, coroutineIndex: Int) +} + /* * Parallelize with batching by minDataPerLaunch */ suspend fun parallelizeByBlocks(blockSize: Int, countBlocks: Int, minDataPerLaunch: Int, - body: (blockStart: Int, blockEnd: Int, coroutineIndex: Int) -> Unit) { + body: ParallelizeBody) { val batchSize = batchSizeByData(blockSize, countBlocks, minDataPerLaunch) @@ -103,7 +108,7 @@ suspend fun parallelizeByBlocks(blockSize: Int, } } -suspend inline fun parallelizeByRows(rowSize: Int, countRows: Int, minDataPerLaunch: Int, noinline body: (rowStart: Int, rowEnd: Int, index: Int) -> Unit) = parallelizeByBlocks(rowSize, countRows, minDataPerLaunch, body) +suspend inline fun parallelizeByRows(rowSize: Int, countRows: Int, minDataPerLaunch: Int, body: ParallelizeBody) = parallelizeByBlocks(rowSize, countRows, minDataPerLaunch, body) internal fun countCoroutinesByData(rowSize: Int, countRows: Int, minDataPerLaunch: Int): Int { val batchSize = batchSizeByData(rowSize, countRows, minDataPerLaunch) diff --git a/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/extensions/constants/BooleanConstants.kt b/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/extensions/constants/BooleanConstants.kt index e3e369c6b..0bac99911 100644 --- a/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/extensions/constants/BooleanConstants.kt +++ b/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/extensions/constants/BooleanConstants.kt @@ -3,5 +3,5 @@ package io.kinference.ndarray.extensions.constants object BooleanConstants { const val ZERO = false const val ONE = true - const val SIZE_BYTES = 1.toLong() + const val SIZE_BYTES = 1L } From 460f929f2f78d056c00332021efc23f236955c31 Mon Sep 17 00:00:00 2001 From: dmitriyb Date: Mon, 23 Sep 2024 12:23:31 +0200 Subject: [PATCH 17/19] JBAI-6945 [ndarray] Fixed broadcasting logic for batch processing. --- .../BroadcastTwoArgumentsPrimitive.kt | 20 +++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/extensions/broadcasting/BroadcastTwoArgumentsPrimitive.kt b/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/extensions/broadcasting/BroadcastTwoArgumentsPrimitive.kt index 90056a8bf..61fc1c076 100644 --- a/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/extensions/broadcasting/BroadcastTwoArgumentsPrimitive.kt +++ b/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/extensions/broadcasting/BroadcastTwoArgumentsPrimitive.kt @@ -50,11 +50,15 @@ internal fun broadcastTwoTensorsPrimitive( val batchSize = destBroadcastingShape[shapeIdx] for (batchIdx in 0 until batchSize) { - val leftScalar = leftBlocks[leftOffset.value][0] + val leftBatchOffset = leftOffset.value + leftOffsets[shapeIdx] * batchIdx + val rightBatchOffset = rightOffset.value + rightOffsets[shapeIdx] * batchIdx + val destBatchOffset = destOffset.value + destOffsets[shapeIdx] * batchIdx + + val leftScalar = leftBlocks[leftBatchOffset][0] for (blockIdx in 0 until destBlocksInRow) { - val destBlock = destBlocks[destOffset.value + blockIdx] - val rightBlock = rightBlocks[rightOffset.value + blockIdx] + val destBlock = destBlocks[destBatchOffset + blockIdx] + val rightBlock = rightBlocks[rightBatchOffset + blockIdx] for (idx in destBlock.indices) { destBlock[idx] = op(leftScalar, rightBlock[idx]) @@ -68,11 +72,15 @@ internal fun broadcastTwoTensorsPrimitive( val batchSize = destBroadcastingShape[shapeIdx] for (batchIdx in 0 until batchSize) { - val rightScalar = rightBlocks[rightOffset.value][0] + val leftBatchOffset = leftOffset.value + leftOffsets[shapeIdx] * batchIdx + val rightBatchOffset = rightOffset.value + rightOffsets[shapeIdx] * batchIdx + val destBatchOffset = destOffset.value + destOffsets[shapeIdx] * batchIdx + + val rightScalar = rightBlocks[rightBatchOffset][0] for (blockIdx in 0 until destBlocksInRow) { - val destBlock = destBlocks[destOffset.value + blockIdx] - val leftBlock = leftBlocks[leftOffset.value + blockIdx] + val destBlock = destBlocks[destBatchOffset + blockIdx] + val leftBlock = leftBlocks[leftBatchOffset + blockIdx] for (idx in destBlock.indices) { destBlock[idx] = op(leftBlock[idx], rightScalar) From 4bdb061d906bdfe6e5a63a81ffe9b4a7c407123b Mon Sep 17 00:00:00 2001 From: dmitriyb Date: Tue, 24 Sep 2024 14:34:17 +0200 Subject: [PATCH 18/19] JBAI-6945 [ndarray] Introduced functional interface ScalarBroadcastFun instead of lambda, so InlineInt inside changed to regular Int without additional boxing operations. --- .../BroadcastTwoArgumentsPrimitive.kt | 33 +++++++++---------- .../extensions/broadcasting/ReshapeView.kt | 6 ++-- 2 files changed, 20 insertions(+), 19 deletions(-) diff --git a/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/extensions/broadcasting/BroadcastTwoArgumentsPrimitive.kt b/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/extensions/broadcasting/BroadcastTwoArgumentsPrimitive.kt index 61fc1c076..8fd770dd1 100644 --- a/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/extensions/broadcasting/BroadcastTwoArgumentsPrimitive.kt +++ b/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/extensions/broadcasting/BroadcastTwoArgumentsPrimitive.kt @@ -8,7 +8,6 @@ import io.kinference.primitives.annotations.GenerateNameFromPrimitives import io.kinference.primitives.annotations.GeneratePrimitives import io.kinference.primitives.types.DataType import io.kinference.primitives.types.PrimitiveType -import io.kinference.utils.inlines.InlineInt @GenerateNameFromPrimitives internal fun broadcastTwoTensorsPrimitive( @@ -45,14 +44,14 @@ internal fun broadcastTwoTensorsPrimitive( val rightBlocks = right.array.blocks val destBlocks = dest.array.blocks - val leftIsScalarFun = { leftOffset: InlineInt, rightOffset: InlineInt, destOffset: InlineInt, axisToBroadcastIdx: InlineInt -> - val shapeIdx = axisToBroadcastIdx.value * 2 + val leftIsScalarFun = ScalarBroadcastFun { leftOffset, rightOffset, destOffset, axisToBroadcastIdx -> + val shapeIdx = axisToBroadcastIdx * 2 val batchSize = destBroadcastingShape[shapeIdx] for (batchIdx in 0 until batchSize) { - val leftBatchOffset = leftOffset.value + leftOffsets[shapeIdx] * batchIdx - val rightBatchOffset = rightOffset.value + rightOffsets[shapeIdx] * batchIdx - val destBatchOffset = destOffset.value + destOffsets[shapeIdx] * batchIdx + val leftBatchOffset = leftOffset + leftOffsets[shapeIdx] * batchIdx + val rightBatchOffset = rightOffset + rightOffsets[shapeIdx] * batchIdx + val destBatchOffset = destOffset + destOffsets[shapeIdx] * batchIdx val leftScalar = leftBlocks[leftBatchOffset][0] @@ -67,14 +66,14 @@ internal fun broadcastTwoTensorsPrimitive( } } - val rightIsScalarFun = { leftOffset: InlineInt, rightOffset: InlineInt, destOffset: InlineInt, axisToBroadcastIdx: InlineInt -> - val shapeIdx = axisToBroadcastIdx.value * 2 + val rightIsScalarFun = ScalarBroadcastFun { leftOffset, rightOffset, destOffset, axisToBroadcastIdx -> + val shapeIdx = axisToBroadcastIdx * 2 val batchSize = destBroadcastingShape[shapeIdx] for (batchIdx in 0 until batchSize) { - val leftBatchOffset = leftOffset.value + leftOffsets[shapeIdx] * batchIdx - val rightBatchOffset = rightOffset.value + rightOffsets[shapeIdx] * batchIdx - val destBatchOffset = destOffset.value + destOffsets[shapeIdx] * batchIdx + val leftBatchOffset = leftOffset + leftOffsets[shapeIdx] * batchIdx + val rightBatchOffset = rightOffset + rightOffsets[shapeIdx] * batchIdx + val destBatchOffset = destOffset + destOffsets[shapeIdx] * batchIdx val rightScalar = rightBlocks[rightBatchOffset][0] @@ -89,11 +88,11 @@ internal fun broadcastTwoTensorsPrimitive( } } - val defaultFun = { leftOffset: InlineInt, rightOffset: InlineInt, destOffset: InlineInt, axisToBroadcastIdx: InlineInt -> + val defaultFun = ScalarBroadcastFun { leftOffset, rightOffset, destOffset, _ -> for (blockIdx in 0 until destBlocksInRow) { - val leftBlock = leftBlocks[leftOffset.value + blockIdx] - val rightBlock = rightBlocks[rightOffset.value + blockIdx] - val destBlock = destBlocks[destOffset.value + blockIdx] + val leftBlock = leftBlocks[leftOffset + blockIdx] + val rightBlock = rightBlocks[rightOffset + blockIdx] + val destBlock = destBlocks[destOffset + blockIdx] for (idx in destBlock.indices) { destBlock[idx] = op(leftBlock[idx], rightBlock[idx]) @@ -101,7 +100,7 @@ internal fun broadcastTwoTensorsPrimitive( } } - val broadcastingFun = when { + val broadcastingFun: ScalarBroadcastFun = when { leftIsScalar -> leftIsScalarFun rightIsScalar -> rightIsScalarFun else -> defaultFun @@ -109,7 +108,7 @@ internal fun broadcastTwoTensorsPrimitive( fun broadcast(leftOffset: Int, rightOffset: Int, destOffset: Int, axisToBroadcastIdx: Int) { if (axisToBroadcastIdx == totalAxesToBroadcast) { - broadcastingFun(InlineInt(leftOffset), InlineInt(rightOffset), InlineInt(destOffset), InlineInt(axisToBroadcastIdx)) + broadcastingFun(leftOffset, rightOffset, destOffset, axisToBroadcastIdx) } else { val shapeIdx = axisToBroadcastIdx * 2 diff --git a/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/extensions/broadcasting/ReshapeView.kt b/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/extensions/broadcasting/ReshapeView.kt index 43c6f672a..b980f530e 100644 --- a/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/extensions/broadcasting/ReshapeView.kt +++ b/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/extensions/broadcasting/ReshapeView.kt @@ -3,6 +3,10 @@ package io.kinference.ndarray.extensions.broadcasting import io.kinference.ndarray.arrays.NDArrayCore import io.kinference.ndarray.extensions.utils.calculateBlock +internal fun interface ScalarBroadcastFun { + operator fun invoke(leftOffset: Int, rightOffset: Int, destOffset: Int, axisToBroadcastIdx: Int) +} + internal data class BroadcastingInfo( val broadcastingShapes: Array, val broadcastingDestShape: IntArray, @@ -89,8 +93,6 @@ internal data class BroadcastingInfo( } } - - internal fun makeOffsets(shape: IntArray, blocksInRow: Int): IntArray { val offsets = IntArray(shape.size) offsets[offsets.lastIndex - 1] = blocksInRow From 52c7687d9fd5b3119b5d6a4a71b67c65b9a0d04a Mon Sep 17 00:00:00 2001 From: dmitriyb Date: Thu, 26 Sep 2024 11:39:50 +0200 Subject: [PATCH 19/19] [RELEASE] Update version to 0.2.23 --- README.md | 24 ++++++++++++------------ build.gradle.kts | 2 +- 2 files changed, 13 insertions(+), 13 deletions(-) diff --git a/README.md b/README.md index b209fb9db..0d33e61aa 100644 --- a/README.md +++ b/README.md @@ -54,7 +54,7 @@ it is highly recommended to use KInference TensorFlow.js backend instead for mor KInference Core dependency coordinates: ```kotlin dependencies { - api("io.kinference", "inference-core", "0.2.22") + api("io.kinference", "inference-core", "0.2.23") } ``` @@ -67,7 +67,7 @@ This backend is recommended for JavaScript projects. TensorFlow.js backend dependency coordinates: ```kotlin dependencies { - api("io.kinference", "inference-tfjs", "0.2.22") + api("io.kinference", "inference-tfjs", "0.2.23") } ``` @@ -81,14 +81,14 @@ To check on the system requirements, visit the following [link](https://onnxrunt ONNXRuntime CPU backend dependency coordinates: ```kotlin dependencies { - api("io.kinference", "inference-ort", "0.2.22") + api("io.kinference", "inference-ort", "0.2.23") } ``` ONNXRuntime GPU backend dependency coordinates: ```kotlin dependencies { - api("io.kinference", "inference-ort-gpu", "0.2.22") + api("io.kinference", "inference-ort-gpu", "0.2.23") } ``` @@ -104,7 +104,7 @@ Array adapter for the [kmath](https://github.com/SciProgCentre/kmath) library th Dependency coordinates: ```kotlin dependencies { - api("io.kinference", "adapter-kmath-{backend_name}", "0.2.22") + api("io.kinference", "adapter-kmath-{backend_name}", "0.2.23") } ``` @@ -114,12 +114,12 @@ Array adapter for the [multik](https://github.com/Kotlin/multik) library that wo Dependency coordinates: ```kotlin dependencies { - api("io.kinference", "adapter-multik-{backend_name}", "0.2.22") + api("io.kinference", "adapter-multik-{backend_name}", "0.2.23") } ``` ## Getting started -Let us now walk through how to get started with KInference. The latest version of KInference is *0.2.22* +Let us now walk through how to get started with KInference. The latest version of KInference is *0.2.23* ### Setup dependencies repository @@ -142,7 +142,7 @@ To enable the backend, you can add the chosen KInference runtime as a dependency ```kotlin dependencies { - api("io.kinference", "inference-core", "0.2.22") + api("io.kinference", "inference-core", "0.2.23") } ``` @@ -160,20 +160,20 @@ kotlin { sourceSets { val commonMain by getting { dependencies { - api("io.kinference:inference-api:0.2.22") - api("io.kinference:ndarray-api:0.2.22") + api("io.kinference:inference-api:0.2.23") + api("io.kinference:ndarray-api:0.2.23") } } val jvmMain by getting { dependencies { - api("io.kinference:inference-core:0.2.22") + api("io.kinference:inference-core:0.2.23") } } val jsMain by getting { dependencies { - api("io.kinference:inference-tfjs:0.2.22") + api("io.kinference:inference-tfjs:0.2.23") } } } diff --git a/build.gradle.kts b/build.gradle.kts index 7c543737d..7e08e87a7 100644 --- a/build.gradle.kts +++ b/build.gradle.kts @@ -8,7 +8,7 @@ import org.jetbrains.kotlin.gradle.targets.js.yarn.YarnRootExtension import org.jetbrains.kotlin.gradle.tasks.KotlinCompilationTask group = "io.kinference" -version = "0.2.22" +version = "0.2.23" plugins { alias(libs.plugins.kotlin.multiplatform) apply false