diff --git a/inference/inference-core/src/commonMain/kotlin/io/kinference.core/model/KIModel.kt b/inference/inference-core/src/commonMain/kotlin/io/kinference.core/model/KIModel.kt index 86a309fb4..e49f7ecb6 100644 --- a/inference/inference-core/src/commonMain/kotlin/io/kinference.core/model/KIModel.kt +++ b/inference/inference-core/src/commonMain/kotlin/io/kinference.core/model/KIModel.kt @@ -5,11 +5,10 @@ import io.kinference.core.graph.KIGraph import io.kinference.core.markOutput import io.kinference.graph.Contexts import io.kinference.model.Model -import io.kinference.ndarray.arrays.memory.ArrayDispatcher import io.kinference.operator.OperatorSetRegistry import io.kinference.profiler.* import io.kinference.protobuf.message.ModelProto -import io.kinference.utils.ModelContext +import io.kinference.ndarray.arrays.memory.AllocatorContext import kotlinx.coroutines.withContext import kotlinx.atomicfu.atomic @@ -20,23 +19,21 @@ class KIModel(val id: String, val name: String, val opSet: OperatorSetRegistry, override fun analyzeProfilingResults(): ProfileAnalysisEntry = profiles.analyze("Model $name") override fun resetProfiles() = profiles.clear() - override suspend fun predict(input: List>, profile: Boolean): Map> = withContext(ModelContext(id, getInferenceCycleId().toString())) { - val contexts = Contexts>( - null, - if (profile) addProfilingContext("Model $name") else null - ) - val modelName = coroutineContext[ModelContext.Key]!!.modelName - val inferenceCycle = coroutineContext[ModelContext.Key]!!.cycleId - ArrayDispatcher.addInferenceContext(modelName, inferenceCycle) - val execResult = graph.execute(input, contexts) - execResult.forEach { it.markOutput() } - ArrayDispatcher.closeInferenceContext(modelName, inferenceCycle) - execResult.associateBy { it.name!! } - } + override suspend fun predict(input: List>, profile: Boolean): Map> = + withContext(AllocatorContext(id, getInferenceCycleId())) { + val contexts = Contexts>( + null, + if (profile) addProfilingContext("Model $name") else null + ) + val coroutineContext = coroutineContext[AllocatorContext.Key]!! + val execResult = graph.execute(input, contexts) + execResult.forEach { it.markOutput() } + coroutineContext.closeAllocated() + execResult.associateBy { it.name!! } + } override suspend fun close() { graph.close() - ArrayDispatcher.removeModelContext(id) } private fun getInferenceCycleId(): Long = inferenceCycleCounter.incrementAndGet() @@ -51,7 +48,6 @@ class KIModel(val id: String, val name: String, val opSet: OperatorSetRegistry, val id = "$name:${generateModelId()}" val opSet = OperatorSetRegistry(proto.opSetImport) val graph = KIGraph(proto.graph!!, opSet) - ArrayDispatcher.addModelContext(id) return KIModel(id, name, opSet, graph) } } diff --git a/ndarray/ndarray-api/src/commonMain/kotlin/io/kinference/ndarray/arrays/ArrayDispatcherUtils.kt b/ndarray/ndarray-api/src/commonMain/kotlin/io/kinference/ndarray/arrays/ArrayDispatcherUtils.kt index b58874007..208ed32e5 100644 --- a/ndarray/ndarray-api/src/commonMain/kotlin/io/kinference/ndarray/arrays/ArrayDispatcherUtils.kt +++ b/ndarray/ndarray-api/src/commonMain/kotlin/io/kinference/ndarray/arrays/ArrayDispatcherUtils.kt @@ -1,15 +1,6 @@ package io.kinference.ndarray.arrays -typealias StateMarker = (ArrayUsageMarker) -> Unit - -const val NO_MODEL_CONTEXT = "NoContext" -const val NO_INFERENCE_CONTEXT = "NoInferenceContext" - -enum class ArrayUsageMarker { - Unused, - Used, - Output, -} +typealias StateMarker = () -> Unit enum class ArrayTypes(val index: Int) { ByteArray(0), diff --git a/ndarray/ndarray-core/src/commonMain/kotlin/io/kinference/ndarray/arrays/BooleanNDArray.kt b/ndarray/ndarray-core/src/commonMain/kotlin/io/kinference/ndarray/arrays/BooleanNDArray.kt index f5f392dfd..0fb962b38 100644 --- a/ndarray/ndarray-core/src/commonMain/kotlin/io/kinference/ndarray/arrays/BooleanNDArray.kt +++ b/ndarray/ndarray-core/src/commonMain/kotlin/io/kinference/ndarray/arrays/BooleanNDArray.kt @@ -76,7 +76,7 @@ open class BooleanNDArray(var array: BooleanTiledArray, strides: Strides) : NDAr } override fun markOutput() { - array.marker.forEach { it.invoke(ArrayUsageMarker.Output) } + array.marker.forEach { it.invoke() } } override suspend fun toMutable(): MutableBooleanNDArray { diff --git a/ndarray/ndarray-core/src/commonMain/kotlin/io/kinference/ndarray/arrays/PrimitiveNDArray.kt b/ndarray/ndarray-core/src/commonMain/kotlin/io/kinference/ndarray/arrays/PrimitiveNDArray.kt index fe444adef..ce7e80a74 100644 --- a/ndarray/ndarray-core/src/commonMain/kotlin/io/kinference/ndarray/arrays/PrimitiveNDArray.kt +++ b/ndarray/ndarray-core/src/commonMain/kotlin/io/kinference/ndarray/arrays/PrimitiveNDArray.kt @@ -86,7 +86,7 @@ internal open class PrimitiveNDArray(array: PrimitiveTiledArray, strides: Stride } override fun markOutput() { - array.marker.forEach { it.invoke(ArrayUsageMarker.Output) } + array.marker.forEach { it.invoke() } } override suspend fun clone(): PrimitiveNDArray { @@ -540,7 +540,7 @@ internal open class PrimitiveNDArray(array: PrimitiveTiledArray, strides: Stride } override suspend fun reduceSum(axes: IntArray, keepDims: Boolean): PrimitiveNDArray = - reduceOperationPrimitive(axes, keepDims) { output: PrimitiveType, input: PrimitiveType -> (output + input).toPrimitive() } + reduceOperationPrimitive(axes, keepDims) { output: InlinePrimitive, input: InlinePrimitive -> (output + input) } override suspend fun topK(axis: Int, k: Int, largest: Boolean, sorted: Boolean): Pair { val actualAxis = indexAxis(axis) diff --git a/ndarray/ndarray-core/src/commonMain/kotlin/io/kinference/ndarray/arrays/memory/AllocatorContext.kt b/ndarray/ndarray-core/src/commonMain/kotlin/io/kinference/ndarray/arrays/memory/AllocatorContext.kt new file mode 100644 index 000000000..4fff74816 --- /dev/null +++ b/ndarray/ndarray-core/src/commonMain/kotlin/io/kinference/ndarray/arrays/memory/AllocatorContext.kt @@ -0,0 +1,28 @@ +package io.kinference.ndarray.arrays.memory + +import io.kinference.ndarray.arrays.* +import kotlin.coroutines.CoroutineContext + +data class AllocatorContext(val modelName: String, val cycleId: Long) : CoroutineContext.Element { + private val usedContainers: ArrayDeque = ArrayDeque() + private val unusedContainers: ArrayStorage = ArrayDispatcher.getStorage() + + companion object Key : CoroutineContext.Key + override val key: CoroutineContext.Key<*> get() = Key + + internal fun getArrayContainers(type: ArrayTypes, size: Int, count: Int): Array { + val arrayContainers = Array(count) { unusedContainers.getArrayContainer(type, size) } + usedContainers.addAll(arrayContainers) + return arrayContainers + } + + + fun closeAllocated() { + usedContainers.forEach { + if (!it.isOutput) { + unusedContainers[it.arrayTypeIndex, it.arraySizeIndex].addLast(it) + } + } + ArrayDispatcher.returnStorage(unusedContainers) + } +} diff --git a/ndarray/ndarray-core/src/commonMain/kotlin/io/kinference/ndarray/arrays/memory/ArrayContainer.kt b/ndarray/ndarray-core/src/commonMain/kotlin/io/kinference/ndarray/arrays/memory/ArrayContainer.kt index f4ada6fa7..0d4c6ca71 100644 --- a/ndarray/ndarray-core/src/commonMain/kotlin/io/kinference/ndarray/arrays/memory/ArrayContainer.kt +++ b/ndarray/ndarray-core/src/commonMain/kotlin/io/kinference/ndarray/arrays/memory/ArrayContainer.kt @@ -4,22 +4,20 @@ import io.kinference.ndarray.arrays.* internal sealed class ArrayContainer( val arrayTypeIndex: Int, - val arraySizeIndex: Int, - var marker: ArrayUsageMarker = ArrayUsageMarker.Used, + val arraySizeIndex: Int ) { - val markAsOutput: StateMarker = { - marker = it + var isOutput: Boolean = false + private set + + val markAsOutput = { + isOutput = true } var next: ArrayContainer? = null - private class EmptyArrayContainer : ArrayContainer(EMPTY_INDEX, EMPTY_INDEX) - companion object { private const val EMPTY_INDEX = -1 - fun emptyContainer(): ArrayContainer = EmptyArrayContainer() - operator fun invoke(type: ArrayTypes, size: Int, sizeIndex: Int = EMPTY_INDEX): ArrayContainer { return when (type) { ArrayTypes.ByteArray -> ByteArrayContainer(type.index, sizeIndex, ByteArray(size)) // 8-bit signed diff --git a/ndarray/ndarray-core/src/commonMain/kotlin/io/kinference/ndarray/arrays/memory/ArrayDispatcher.kt b/ndarray/ndarray-core/src/commonMain/kotlin/io/kinference/ndarray/arrays/memory/ArrayDispatcher.kt index d233f1e47..f58ae04b6 100644 --- a/ndarray/ndarray-core/src/commonMain/kotlin/io/kinference/ndarray/arrays/memory/ArrayDispatcher.kt +++ b/ndarray/ndarray-core/src/commonMain/kotlin/io/kinference/ndarray/arrays/memory/ArrayDispatcher.kt @@ -1,230 +1,19 @@ package io.kinference.ndarray.arrays.memory -import io.kinference.ndarray.arrays.* -import kotlinx.atomicfu.atomic -import kotlinx.coroutines.sync.Mutex -import kotlinx.coroutines.sync.withLock +import io.kinference.ndarray.arrays.ArrayTypes +import io.kinference.utils.ConcurrentQueue -object ArrayDispatcher { - private val modelDispatchers = mutableMapOf() - private val mutex = Mutex() +internal object ArrayDispatcher { + private const val INIT_SIZE_VALUE: Int = 2 + private val typeSize: Int = ArrayTypes.entries.size - suspend fun addModelContext(modelContext: String) { - mutex.withLock { - modelDispatchers[modelContext] = ModelArrayDispatcher() - } - } - - suspend fun removeModelContext(modelContext: String) { - val modelDispatcher = mutex.withLock { - modelDispatchers.remove(modelContext) - } - modelDispatcher?.close() - } - - suspend fun addInferenceContext(modelContext: String, inferenceContext: String) { - modelDispatchers[modelContext]!!.addInferenceContext(inferenceContext) - } - - suspend fun closeInferenceContext(modelContext: String, inferenceContext: String) { - modelDispatchers[modelContext]!!.closeInferenceContext(inferenceContext) - } - - internal suspend fun getArrayContainers( - type: ArrayTypes, - size: Int, - count: Int, - modelContext: String = NO_MODEL_CONTEXT, - inferenceContext: String = NO_INFERENCE_CONTEXT - ): Array { - if (modelContext == NO_MODEL_CONTEXT || inferenceContext == NO_MODEL_CONTEXT) - return Array(count) { ArrayContainer(type, size) } - - return modelDispatchers[modelContext]!!.getArrayContainers(inferenceContext, type, size, count) - } -} - -private class ModelArrayDispatcher { - companion object { - private const val INIT_SIZE_VALUE: Int = 2 - private val typeSize: Int = ArrayTypes.entries.size - } - - private val usedArrays: HashMap = hashMapOf() - private val unusedArrays: ArrayStorage = ArrayStorage(typeSize, INIT_SIZE_VALUE) - private val mutex = Mutex() - - class ConcurrentArrayContainerQueue { - // Initialize the head with the emptyContainer sentinel node - private var head: ArrayContainer? = ArrayContainer.emptyContainer() - private var tail: ArrayContainer? = head - private val isClosed = atomic(false) - private val lock = atomic(false) - - fun addLast(container: ArrayContainer) { - while (true) { - if (lock.compareAndSet(expect = false, update = true)) { - if (isClosed.value) { - lock.value = false - throw IllegalStateException("Cannot add to a closed queue.") - } - - container.next = null - tail?.next = container - tail = container - lock.value = false - return - } - } - } - - fun removeFirstOrNull(): ArrayContainer? { - while (true) { - if (lock.compareAndSet(expect = false, update = true)) { - if (isClosed.value) { - lock.value = false - throw IllegalStateException("Cannot remove from a closed queue.") - } - - val first = head?.next - if (first == null) { - lock.value = false - return null - } - - head?.next = first.next - if (first.next == null) { - tail = head - } - lock.value = false - return first - } - } - } - - fun close() { - while (true) { - if (lock.compareAndSet(expect = false, update = true)) { - isClosed.value = true - var current = head - while (current != null) { - val next = current.next - current.next = null - current = next - } - lock.value = false - return - } - } - } - } - - private class ArrayStorage(typeLength: Int, sizeLength: Int) { - /** - * Structure is as follows: - * 1. Array by predefined types (all types are known compiled time) - * 2. Array by size. Starting with 'INIT_SIZE_VALUE' element and grow it doubling (typically there are no more than 16 different sizes) - * 3. Queue of array containers (used as FIFO) - */ - private var storage: Array> = - Array(typeLength) { Array(sizeLength) { ConcurrentArrayContainerQueue() } } - - private var sizeIndices: IntArray = IntArray(typeLength) - private var sizes: Array = Array(typeLength) { IntArray(sizeLength) } - private val mutex = Mutex() - - operator fun get(typeIndex: Int, sizeIndex: Int): ConcurrentArrayContainerQueue { - return storage[typeIndex][sizeIndex] - } - - suspend fun getArrayContainer(type: ArrayTypes, size: Int): ArrayContainer { - val tIndex = type.index - val sIndex = sizes[tIndex].indexOf(size) - - // Checking that we have this array size in our storage for this type - val idx = if (sIndex != -1) { - val array = storage[tIndex][sIndex].removeFirstOrNull() - array?.let { - it.marker = ArrayUsageMarker.Used - ArrayContainer.resetArray(it) - return it - } - sIndex - } else { - mutex.withLock { - if (sizeIndices[tIndex] >= storage[tIndex].size) - grow(tIndex) - - val idx = sizeIndices[tIndex]++ - sizes[tIndex][idx] = size - idx - } - } - - return ArrayContainer(type, size, idx) - } - - fun grow(typeIndex: Int) { - val newSize = sizes[typeIndex].size * 2 - val newStorage: Array = Array(newSize) { ConcurrentArrayContainerQueue() } - - for (i in storage[typeIndex].indices) { - newStorage[i] = storage[typeIndex][i] - } - - storage[typeIndex] = newStorage - sizes[typeIndex] = sizes[typeIndex].copyOf(newSize) - } - - fun close() { - for (i in storage.indices) { - for (j in storage[i].indices) { - storage[i][j].close() - } - } - } - } - - suspend fun addInferenceContext(inferenceContext: String) { - mutex.withLock { - usedArrays[inferenceContext] = ConcurrentArrayContainerQueue() - } - } - - suspend fun getArrayContainers(inferenceContext: String, type: ArrayTypes, size: Int, count: Int): Array { - return Array(count) { getArrayContainer(inferenceContext, type, size) } - } - - suspend fun closeInferenceContext(inferenceContext: String) { - val usedArrays = mutex.withLock { - usedArrays.remove(inferenceContext)!! - } - var isProcessed = false - - while (!isProcessed) { - val container = usedArrays.removeFirstOrNull() - if (container != null) { - if (container.marker != ArrayUsageMarker.Output) { - container.marker = ArrayUsageMarker.Unused - unusedArrays[container.arrayTypeIndex, container.arraySizeIndex].addLast(container) - } - } else { - isProcessed = true - } - } - - usedArrays.close() - } + private val unusedArrays: ConcurrentQueue = ConcurrentQueue() - fun close() { - unusedArrays.close() - usedArrays.forEach { it.value.close() } - usedArrays.clear() + fun getStorage(): ArrayStorage { + return unusedArrays.removeFirstOrNull() ?: ArrayStorage(typeSize, INIT_SIZE_VALUE) } - private suspend fun getArrayContainer(inferenceContext: String, type: ArrayTypes, size: Int): ArrayContainer { - val newArray = unusedArrays.getArrayContainer(type, size) - usedArrays[inferenceContext]!!.addLast(newArray) - return newArray + fun returnStorage(storage: ArrayStorage) { + unusedArrays.addLast(storage) } } diff --git a/ndarray/ndarray-core/src/commonMain/kotlin/io/kinference/ndarray/arrays/memory/ArrayStorage.kt b/ndarray/ndarray-core/src/commonMain/kotlin/io/kinference/ndarray/arrays/memory/ArrayStorage.kt new file mode 100644 index 000000000..2831f3a66 --- /dev/null +++ b/ndarray/ndarray-core/src/commonMain/kotlin/io/kinference/ndarray/arrays/memory/ArrayStorage.kt @@ -0,0 +1,58 @@ +package io.kinference.ndarray.arrays.memory + +import io.kinference.ndarray.arrays.ArrayTypes + +internal class ArrayStorage(typeLength: Int, sizeLength: Int) { + /** + * Structure is as follows: + * 1. Array by predefined types (all types are known compiled time) + * 2. Array by size. Starting with 'INIT_SIZE_VALUE' element and grow it doubling (typically there are no more than 16 different sizes) + * 3. Queue of array containers (used as FIFO) + */ + private var storage: Array>> = + Array(typeLength) { Array(sizeLength) { ArrayDeque() } } + + private var sizeIndices: IntArray = IntArray(typeLength) + private var sizes: Array = Array(typeLength) { IntArray(sizeLength) } + + + operator fun get(typeIndex: Int, sizeIndex: Int): ArrayDeque { + return storage[typeIndex][sizeIndex] + } + + fun getArrayContainer(type: ArrayTypes, size: Int): ArrayContainer { + val tIndex = type.index + val sIndex = sizes[tIndex].indexOf(size) + + // Checking that we have this array size in our storage for this type + val idx = if (sIndex != -1) { + val array = storage[tIndex][sIndex].removeFirstOrNull() + array?.let { + ArrayContainer.resetArray(it) + return it + } + sIndex + } else { + if (sizeIndices[tIndex] >= storage[tIndex].size) + grow(tIndex) + + val idx = sizeIndices[tIndex]++ + sizes[tIndex][idx] = size + idx + } + + return ArrayContainer(type, size, idx) + } + + private fun grow(typeIndex: Int) { + val newSize = sizes[typeIndex].size * 2 + val newStorage: Array> = Array(newSize) { ArrayDeque() } + + for (i in storage[typeIndex].indices) { + newStorage[i] = storage[typeIndex][i] + } + + storage[typeIndex] = newStorage + sizes[typeIndex] = sizes[typeIndex].copyOf(newSize) + } +} diff --git a/ndarray/ndarray-core/src/commonMain/kotlin/io/kinference/ndarray/arrays/tiled/PrimitiveTiledArray.kt b/ndarray/ndarray-core/src/commonMain/kotlin/io/kinference/ndarray/arrays/tiled/PrimitiveTiledArray.kt index c8e59e2bc..eda58c092 100644 --- a/ndarray/ndarray-core/src/commonMain/kotlin/io/kinference/ndarray/arrays/tiled/PrimitiveTiledArray.kt +++ b/ndarray/ndarray-core/src/commonMain/kotlin/io/kinference/ndarray/arrays/tiled/PrimitiveTiledArray.kt @@ -4,14 +4,13 @@ package io.kinference.ndarray.arrays.tiled import io.kinference.ndarray.arrays.* -import io.kinference.ndarray.arrays.memory.ArrayDispatcher +import io.kinference.ndarray.arrays.memory.* import io.kinference.ndarray.arrays.memory.PrimitiveArrayContainer import io.kinference.ndarray.arrays.pointers.PrimitivePointer import io.kinference.ndarray.arrays.pointers.accept import io.kinference.ndarray.blockSizeByStrides import io.kinference.primitives.annotations.* import io.kinference.primitives.types.* -import io.kinference.utils.ModelContext import io.kinference.utils.inlines.InlineInt import kotlin.coroutines.coroutineContext import kotlin.math.min @@ -62,12 +61,10 @@ internal class PrimitiveTiledArray(val blocks: Array, val marker val blocksNum = if (blockSize == 0) 0 else size / blockSize - val coroutineContext = coroutineContext[ModelContext.Key] - val modelName = coroutineContext?.modelName ?: NO_MODEL_CONTEXT - val inferenceCycle = coroutineContext?.cycleId ?: NO_INFERENCE_CONTEXT + val coroutineContext = coroutineContext[AllocatorContext.Key] // With array dispatcher - val containerArray = ArrayDispatcher.getArrayContainers(type, blockSize, blocksNum, modelName, inferenceCycle) + val containerArray = coroutineContext?.getArrayContainers(type, blockSize, blocksNum) ?: Array(blocksNum) { ArrayContainer(type, blockSize) } val blocks = Array(containerArray.size) { i -> (containerArray[i] as PrimitiveArrayContainer).array } val marker = Array(containerArray.size) { i -> containerArray[i].markAsOutput } diff --git a/ndarray/ndarray-core/src/commonMain/kotlin/io/kinference/ndarray/extensions/gelu/BiasGeluPrimitive.kt b/ndarray/ndarray-core/src/commonMain/kotlin/io/kinference/ndarray/extensions/gelu/BiasGeluPrimitive.kt index ce0f52b8d..555243f19 100644 --- a/ndarray/ndarray-core/src/commonMain/kotlin/io/kinference/ndarray/extensions/gelu/BiasGeluPrimitive.kt +++ b/ndarray/ndarray-core/src/commonMain/kotlin/io/kinference/ndarray/extensions/gelu/BiasGeluPrimitive.kt @@ -14,8 +14,6 @@ import io.kinference.primitives.annotations.GenerateNameFromPrimitives import io.kinference.primitives.annotations.GeneratePrimitives import io.kinference.primitives.types.* import kotlin.math.* -import io.kinference.utils.ModelContext -import kotlin.coroutines.coroutineContext @GenerateNameFromPrimitives internal suspend fun computeGeluPrimitive(input: PrimitiveNDArray, bias: PrimitiveNDArray): MutablePrimitiveNDArray { @@ -27,23 +25,12 @@ internal suspend fun computeGeluPrimitive(input: PrimitiveNDArray, bias: Primiti val blockSize = input.array.blockSize - // This approach when arrays acquired before parallelizeByBlocks() is faster - val coroutineContext = coroutineContext[ModelContext.Key]!! - val modelName = coroutineContext.modelName - val inferenceCycle = coroutineContext.cycleId - - val coroutineCount = countCoroutinesByData(blockSize, inputBlocks.size, 2048) - val containerTemporaryBlockArrays = ArrayDispatcher.getArrayContainers(PrimitiveTiledArray.type, blockSize, coroutineCount, modelName, inferenceCycle) - val containerTemporaryBlockAbsArrays = ArrayDispatcher.getArrayContainers(PrimitiveTiledArray.type, blockSize, coroutineCount, modelName, inferenceCycle) - val temporaryBlockArrays = Array(containerTemporaryBlockArrays.size) { i -> (containerTemporaryBlockArrays[i] as PrimitiveArrayContainer).array } - val temporaryBlockAbsArrays = Array(containerTemporaryBlockAbsArrays.size) { i -> (containerTemporaryBlockAbsArrays[i] as PrimitiveArrayContainer).array } - // Constant 2048 was precomputed on M1 Max processor // With this constant two launches work faster than single thread without launches // TODO: (cupertank) Remove constants parallelizeByBlocks(blockSize, inputBlocks.size, 2048) { blockStart, blockEnd, coroutineIndex -> - val temporaryBlock = temporaryBlockArrays[coroutineIndex] - val temporaryBlockAbs = temporaryBlockAbsArrays[coroutineIndex] + val temporaryBlock = PrimitiveArray(blockSize) + val temporaryBlockAbs = PrimitiveArray(blockSize) for (blockIdx in blockStart until blockEnd) { val outputBlock = outputBlocks[blockIdx] diff --git a/ndarray/ndarray-core/src/commonMain/kotlin/io/kinference/ndarray/extensions/gelu/FastGeluPrimitive.kt b/ndarray/ndarray-core/src/commonMain/kotlin/io/kinference/ndarray/extensions/gelu/FastGeluPrimitive.kt index 9f132e6ed..c429b11ef 100644 --- a/ndarray/ndarray-core/src/commonMain/kotlin/io/kinference/ndarray/extensions/gelu/FastGeluPrimitive.kt +++ b/ndarray/ndarray-core/src/commonMain/kotlin/io/kinference/ndarray/extensions/gelu/FastGeluPrimitive.kt @@ -29,15 +29,11 @@ internal suspend fun fastGeluPrimitive(input: PrimitiveNDArray, bias: PrimitiveN val blockSize = input.array.blockSize - val coroutineCount = countCoroutinesByData(blockSize, inputBlocks.size, 2048) - val containerArray = ArrayDispatcher.getArrayContainers(PrimitiveTiledArray.type, blockSize, coroutineCount) - val temporaryBlockExpArrays = Array(containerArray.size) { i -> (containerArray[i] as PrimitiveArrayContainer).array } - // Constant 2048 was precomputed on M1 Max processor // With this constant two launches work faster than single thread without launches // TODO: (cupertank) Remove constants - parallelizeByBlocks(blockSize, inputBlocks.size, 2048) { blockStart, blockEnd, coroutineIndex -> - val temporaryBlockExp = temporaryBlockExpArrays[coroutineIndex] + parallelizeByBlocks(blockSize, inputBlocks.size, 2048) { blockStart, blockEnd, _ -> + val temporaryBlockExp = PrimitiveArray(blockSize) for (blockIdx in blockStart until blockEnd) { val outputBlock = outputBlocks[blockIdx] val block = inputBlocks[blockIdx] diff --git a/ndarray/ndarray-core/src/commonMain/kotlin/io/kinference/ndarray/extensions/reduce/l1/ReduceL1Primitive.kt b/ndarray/ndarray-core/src/commonMain/kotlin/io/kinference/ndarray/extensions/reduce/l1/ReduceL1Primitive.kt index fa627535f..0823b64e6 100644 --- a/ndarray/ndarray-core/src/commonMain/kotlin/io/kinference/ndarray/extensions/reduce/l1/ReduceL1Primitive.kt +++ b/ndarray/ndarray-core/src/commonMain/kotlin/io/kinference/ndarray/extensions/reduce/l1/ReduceL1Primitive.kt @@ -16,8 +16,9 @@ import io.kinference.primitives.types.PrimitiveType import io.kinference.primitives.annotations.MakePublic import io.kinference.ndarray.stubs.abs import io.kinference.ndarray.extensions.abs +import io.kinference.utils.inlines.InlinePrimitive import kotlin.math.abs @MakePublic internal suspend fun PrimitiveNDArray.reduceL1(axes: IntArray, keepDims: Boolean) = - reduceOperationPrimitive(axes, keepDims) { out: PrimitiveType, inp: PrimitiveType -> out + abs(inp) } + reduceOperationPrimitive(axes, keepDims) { out: InlinePrimitive, inp: InlinePrimitive -> out + InlinePrimitive(abs(inp.value)) } diff --git a/ndarray/ndarray-core/src/commonMain/kotlin/io/kinference/ndarray/extensions/reduce/logSumExp/ReduceLogSumExpPrimitive.kt b/ndarray/ndarray-core/src/commonMain/kotlin/io/kinference/ndarray/extensions/reduce/logSumExp/ReduceLogSumExpPrimitive.kt index 476f26601..72052405d 100644 --- a/ndarray/ndarray-core/src/commonMain/kotlin/io/kinference/ndarray/extensions/reduce/logSumExp/ReduceLogSumExpPrimitive.kt +++ b/ndarray/ndarray-core/src/commonMain/kotlin/io/kinference/ndarray/extensions/reduce/logSumExp/ReduceLogSumExpPrimitive.kt @@ -20,12 +20,13 @@ import io.kinference.primitives.annotations.MakePublic import io.kinference.ndarray.extensions.ln import io.kinference.ndarray.math.FastMath import io.kinference.ndarray.math.exp +import io.kinference.utils.inlines.InlinePrimitive import kotlin.math.ln @MakePublic internal suspend fun PrimitiveNDArray.reduceLogSumExp(axes: IntArray, keepDims: Boolean): PrimitiveNDArray { val sumTensor = reduceOperationPrimitive(axes, keepDims) - { out: PrimitiveType, inp: PrimitiveType -> out + FastMath.exp(inp) } as MutablePrimitiveNDArray + { out: InlinePrimitive, inp: InlinePrimitive -> out + InlinePrimitive(FastMath.exp(inp.value)) } as MutablePrimitiveNDArray sumTensor.mapMutable(object : PrimitiveMap { override fun apply(value: PrimitiveType) = ln(value) diff --git a/ndarray/ndarray-core/src/commonMain/kotlin/io/kinference/ndarray/extensions/reduce/max/ReduceMaxPrimitive.kt b/ndarray/ndarray-core/src/commonMain/kotlin/io/kinference/ndarray/extensions/reduce/max/ReduceMaxPrimitive.kt index c675b054d..769ec9ae9 100644 --- a/ndarray/ndarray-core/src/commonMain/kotlin/io/kinference/ndarray/extensions/reduce/max/ReduceMaxPrimitive.kt +++ b/ndarray/ndarray-core/src/commonMain/kotlin/io/kinference/ndarray/extensions/reduce/max/ReduceMaxPrimitive.kt @@ -11,7 +11,8 @@ import io.kinference.primitives.types.PrimitiveType import io.kinference.primitives.annotations.MakePublic import kotlin.comparisons.maxOf import io.kinference.ndarray.extensions.* +import io.kinference.utils.inlines.InlinePrimitive @MakePublic internal suspend fun PrimitiveNDArray.reduceMax(axes: IntArray, keepDims: Boolean) = - reduceOperationPrimitive(axes, keepDims, initOutputValue = PrimitiveType.MIN_VALUE_FOR_MAX) { out: PrimitiveType, inp: PrimitiveType -> maxOf(out, inp) } + reduceOperationPrimitive(axes, keepDims, initOutputValue = PrimitiveType.MIN_VALUE_FOR_MAX) { out: InlinePrimitive, inp: InlinePrimitive -> InlinePrimitive(maxOf(out.value, inp.value)) } diff --git a/ndarray/ndarray-core/src/commonMain/kotlin/io/kinference/ndarray/extensions/reduce/min/ReduceMinPrimitive.kt b/ndarray/ndarray-core/src/commonMain/kotlin/io/kinference/ndarray/extensions/reduce/min/ReduceMinPrimitive.kt index 02b7b8e45..4597bd30d 100644 --- a/ndarray/ndarray-core/src/commonMain/kotlin/io/kinference/ndarray/extensions/reduce/min/ReduceMinPrimitive.kt +++ b/ndarray/ndarray-core/src/commonMain/kotlin/io/kinference/ndarray/extensions/reduce/min/ReduceMinPrimitive.kt @@ -10,8 +10,9 @@ import io.kinference.primitives.types.DataType import io.kinference.primitives.types.PrimitiveType import io.kinference.primitives.annotations.MakePublic import io.kinference.ndarray.extensions.MAX_VALUE_FOR_MIN +import io.kinference.utils.inlines.InlinePrimitive import kotlin.comparisons.minOf @MakePublic internal suspend fun PrimitiveNDArray.reduceMin(axes: IntArray, keepDims: Boolean) = - reduceOperationPrimitive(axes, keepDims, initOutputValue = PrimitiveType.MAX_VALUE_FOR_MIN) { out: PrimitiveType, inp: PrimitiveType -> minOf(out, inp) } + reduceOperationPrimitive(axes, keepDims, initOutputValue = PrimitiveType.MAX_VALUE_FOR_MIN) { out: InlinePrimitive, inp: InlinePrimitive -> InlinePrimitive(minOf(out.value, inp.value)) } diff --git a/ndarray/ndarray-core/src/commonMain/kotlin/io/kinference/ndarray/extensions/reduce/primitive/PrimitiveReduceOperation.kt b/ndarray/ndarray-core/src/commonMain/kotlin/io/kinference/ndarray/extensions/reduce/primitive/PrimitiveReduceOperation.kt index 61907cd90..888a8af11 100644 --- a/ndarray/ndarray-core/src/commonMain/kotlin/io/kinference/ndarray/extensions/reduce/primitive/PrimitiveReduceOperation.kt +++ b/ndarray/ndarray-core/src/commonMain/kotlin/io/kinference/ndarray/extensions/reduce/primitive/PrimitiveReduceOperation.kt @@ -8,13 +8,14 @@ import io.kinference.ndarray.arrays.pointers.forEach import io.kinference.primitives.annotations.GenerateNameFromPrimitives import io.kinference.primitives.annotations.GeneratePrimitives import io.kinference.primitives.types.* +import io.kinference.utils.inlines.InlinePrimitive @GenerateNameFromPrimitives internal suspend fun PrimitiveNDArray.reduceOperationPrimitive( axes: IntArray, keepDims: Boolean, initOutputValue: PrimitiveType? = null, - operation: (output: PrimitiveType, input: PrimitiveType) -> PrimitiveType + operation: (output: InlinePrimitive, input: InlinePrimitive) -> InlinePrimitive ): PrimitiveNDArray { if (axes.isEmpty()) return this @@ -40,7 +41,7 @@ internal suspend fun PrimitiveNDArray.reduceOperationPrimitive( val inputPointer = this.array.pointer(inputOffset) val outputPointer = outputArray.array.pointer(outputOffset) - outputPointer.accept(inputPointer, blockToApply) { dst: PrimitiveType, src: PrimitiveType -> operation(dst, src) } + outputPointer.accept(inputPointer, blockToApply) { dst: PrimitiveType, src: PrimitiveType -> operation(InlinePrimitive(dst), InlinePrimitive(src)).value } } this.shape.lastIndex -> { val dim = this.shape[axis] @@ -48,7 +49,7 @@ internal suspend fun PrimitiveNDArray.reduceOperationPrimitive( val outputPointer = outputArray.array.pointer(outputOffset) var accumulator = outputPointer.get() - inputPointer.forEach(dim) { accumulator = operation(accumulator, it) } + inputPointer.forEach(dim) { accumulator = operation(InlinePrimitive(accumulator), InlinePrimitive(it)).value } outputPointer.set(accumulator) } else -> { diff --git a/ndarray/ndarray-core/src/commonMain/kotlin/io/kinference/ndarray/extensions/reduce/prod/ReduceProdPrimitive.kt b/ndarray/ndarray-core/src/commonMain/kotlin/io/kinference/ndarray/extensions/reduce/prod/ReduceProdPrimitive.kt index 4780e2b8d..d074cc2f7 100644 --- a/ndarray/ndarray-core/src/commonMain/kotlin/io/kinference/ndarray/extensions/reduce/prod/ReduceProdPrimitive.kt +++ b/ndarray/ndarray-core/src/commonMain/kotlin/io/kinference/ndarray/extensions/reduce/prod/ReduceProdPrimitive.kt @@ -15,7 +15,8 @@ import io.kinference.primitives.annotations.GeneratePrimitives import io.kinference.primitives.types.DataType import io.kinference.primitives.types.PrimitiveType import io.kinference.primitives.annotations.MakePublic +import io.kinference.utils.inlines.InlinePrimitive @MakePublic internal suspend fun PrimitiveNDArray.reduceProd(axes: IntArray, keepDims: Boolean) = - reduceOperationPrimitive(axes, keepDims, initOutputValue = PrimitiveConstants.ONE) { out: PrimitiveType, inp: PrimitiveType -> out * inp } + reduceOperationPrimitive(axes, keepDims, initOutputValue = PrimitiveConstants.ONE) { out: InlinePrimitive, inp: InlinePrimitive -> out * inp } diff --git a/ndarray/ndarray-core/src/commonMain/kotlin/io/kinference/ndarray/extensions/reduce/sumSquare/ReduceSumSquarePrimitive.kt b/ndarray/ndarray-core/src/commonMain/kotlin/io/kinference/ndarray/extensions/reduce/sumSquare/ReduceSumSquarePrimitive.kt index 15e609f74..b6821dd2a 100644 --- a/ndarray/ndarray-core/src/commonMain/kotlin/io/kinference/ndarray/extensions/reduce/sumSquare/ReduceSumSquarePrimitive.kt +++ b/ndarray/ndarray-core/src/commonMain/kotlin/io/kinference/ndarray/extensions/reduce/sumSquare/ReduceSumSquarePrimitive.kt @@ -14,7 +14,8 @@ import io.kinference.primitives.annotations.GeneratePrimitives import io.kinference.primitives.types.DataType import io.kinference.primitives.types.PrimitiveType import io.kinference.primitives.annotations.MakePublic +import io.kinference.utils.inlines.InlinePrimitive @MakePublic internal suspend fun PrimitiveNDArray.reduceSumSquare(axes: IntArray, keepDims: Boolean) = - reduceOperationPrimitive(axes, keepDims) { out: PrimitiveType, inp: PrimitiveType -> out + inp * inp } + reduceOperationPrimitive(axes, keepDims) { out: InlinePrimitive, inp: InlinePrimitive -> out + inp * inp } diff --git a/utils/utils-common/src/commonMain/kotlin/io/kinference/utils/ConcurrentQueue.kt b/utils/utils-common/src/commonMain/kotlin/io/kinference/utils/ConcurrentQueue.kt new file mode 100644 index 000000000..fc7b9e875 --- /dev/null +++ b/utils/utils-common/src/commonMain/kotlin/io/kinference/utils/ConcurrentQueue.kt @@ -0,0 +1,6 @@ +package io.kinference.utils + +expect class ConcurrentQueue() { + fun removeFirstOrNull(): T? + fun addLast(element: T) +} diff --git a/utils/utils-common/src/commonMain/kotlin/io/kinference/utils/ModelContext.kt b/utils/utils-common/src/commonMain/kotlin/io/kinference/utils/ModelContext.kt deleted file mode 100644 index 6e0f50f73..000000000 --- a/utils/utils-common/src/commonMain/kotlin/io/kinference/utils/ModelContext.kt +++ /dev/null @@ -1,8 +0,0 @@ -package io.kinference.utils - -import kotlin.coroutines.CoroutineContext - -data class ModelContext(val modelName: String, val cycleId: String) : CoroutineContext.Element { - companion object Key : CoroutineContext.Key - override val key: CoroutineContext.Key<*> get() = Key -} diff --git a/utils/utils-common/src/jsMain/kotlin/io/kinference/utils/ConcurrentQueue.kt b/utils/utils-common/src/jsMain/kotlin/io/kinference/utils/ConcurrentQueue.kt new file mode 100644 index 000000000..310ba2ce1 --- /dev/null +++ b/utils/utils-common/src/jsMain/kotlin/io/kinference/utils/ConcurrentQueue.kt @@ -0,0 +1,13 @@ +package io.kinference.utils + +actual class ConcurrentQueue actual constructor() { + private val queue: ArrayDeque = ArrayDeque() + + actual fun removeFirstOrNull(): T? { + return queue.removeFirstOrNull() + } + + actual fun addLast(element: T) { + queue.addLast(element) + } +} diff --git a/utils/utils-common/src/jvmMain/kotlin/io/kinference/utils/ConcurrentQueue.kt b/utils/utils-common/src/jvmMain/kotlin/io/kinference/utils/ConcurrentQueue.kt new file mode 100644 index 000000000..ab0ed35dc --- /dev/null +++ b/utils/utils-common/src/jvmMain/kotlin/io/kinference/utils/ConcurrentQueue.kt @@ -0,0 +1,15 @@ +package io.kinference.utils + +import java.util.concurrent.ConcurrentLinkedQueue + +actual class ConcurrentQueue actual constructor() { + private val queue: ConcurrentLinkedQueue = ConcurrentLinkedQueue() + + actual fun removeFirstOrNull(): T? { + return queue.poll() + } + + actual fun addLast(element: T) { + queue.offer(element) + } +}