diff --git a/inference/inference-core/src/commonMain/kotlin/io/kinference.core/operators/layer/attention/Attention.kt b/inference/inference-core/src/commonMain/kotlin/io/kinference.core/operators/layer/attention/Attention.kt index d3a29f9f0..f6477867c 100644 --- a/inference/inference-core/src/commonMain/kotlin/io/kinference.core/operators/layer/attention/Attention.kt +++ b/inference/inference-core/src/commonMain/kotlin/io/kinference.core/operators/layer/attention/Attention.kt @@ -65,7 +65,7 @@ sealed class Attention(name: String, info: OperatorInfo, attributes: Map - val resultMarker: Array<() -> Unit> + val resultMarker: Array if (past == null || past.linearSize == 0) { resultBlocks = kBlocks.plus(vBlocks) @@ -84,7 +84,7 @@ sealed class Attention(name: String, info: OperatorInfo, attributes: Map(2 * batchSize * numHeads * presentDims[3] * blocksInRow) - val futureResMarker = arrayOfNulls<() -> Unit>(2 * batchSize * numHeads * presentDims[3] * blocksInRow) + val futureResMarker = arrayOfNulls(2 * batchSize * numHeads * presentDims[3] * blocksInRow) var resBlockIdx = 0 var pastBlocIdx = 0 @@ -109,7 +109,7 @@ sealed class Attention(name: String, info: OperatorInfo, attributes: Map - resultMarker = futureResMarker as Array<() -> Unit> + resultMarker = futureResMarker as Array } return FloatNDArray(FloatTiledArray(resultBlocks, resultMarker), Strides(presentDims)) diff --git a/ndarray/ndarray-api/src/commonMain/kotlin/io/kinference/ndarray/arrays/ArrayDispatcherUtils.kt b/ndarray/ndarray-api/src/commonMain/kotlin/io/kinference/ndarray/arrays/ArrayDispatcherUtils.kt index af6751243..208ed32e5 100644 --- a/ndarray/ndarray-api/src/commonMain/kotlin/io/kinference/ndarray/arrays/ArrayDispatcherUtils.kt +++ b/ndarray/ndarray-api/src/commonMain/kotlin/io/kinference/ndarray/arrays/ArrayDispatcherUtils.kt @@ -1,5 +1,7 @@ package io.kinference.ndarray.arrays +typealias StateMarker = () -> Unit + enum class ArrayTypes(val index: Int) { ByteArray(0), UByteArray(1), diff --git a/ndarray/ndarray-core/src/commonMain/kotlin/io/kinference/ndarray/arrays/memory/ArrayDispatcher.kt b/ndarray/ndarray-core/src/commonMain/kotlin/io/kinference/ndarray/arrays/memory/ArrayDispatcher.kt index def0b3b48..f58ae04b6 100644 --- a/ndarray/ndarray-core/src/commonMain/kotlin/io/kinference/ndarray/arrays/memory/ArrayDispatcher.kt +++ b/ndarray/ndarray-core/src/commonMain/kotlin/io/kinference/ndarray/arrays/memory/ArrayDispatcher.kt @@ -1,13 +1,13 @@ package io.kinference.ndarray.arrays.memory import io.kinference.ndarray.arrays.ArrayTypes -import io.kinference.utils.PlatformQueue +import io.kinference.utils.ConcurrentQueue internal object ArrayDispatcher { private const val INIT_SIZE_VALUE: Int = 2 private val typeSize: Int = ArrayTypes.entries.size - private val unusedArrays: PlatformQueue = PlatformQueue() + private val unusedArrays: ConcurrentQueue = ConcurrentQueue() fun getStorage(): ArrayStorage { return unusedArrays.removeFirstOrNull() ?: ArrayStorage(typeSize, INIT_SIZE_VALUE) diff --git a/ndarray/ndarray-core/src/commonMain/kotlin/io/kinference/ndarray/arrays/tiled/PrimitiveTiledArray.kt b/ndarray/ndarray-core/src/commonMain/kotlin/io/kinference/ndarray/arrays/tiled/PrimitiveTiledArray.kt index f42176cbe..eda58c092 100644 --- a/ndarray/ndarray-core/src/commonMain/kotlin/io/kinference/ndarray/arrays/tiled/PrimitiveTiledArray.kt +++ b/ndarray/ndarray-core/src/commonMain/kotlin/io/kinference/ndarray/arrays/tiled/PrimitiveTiledArray.kt @@ -17,7 +17,7 @@ import kotlin.math.min @GenerateNameFromPrimitives @MakePublic -internal class PrimitiveTiledArray(val blocks: Array, val marker: Array<()->Unit> = emptyMarker) { +internal class PrimitiveTiledArray(val blocks: Array, val marker: Array = emptyMarker) { val size: Int val blockSize: Int = if (blocks.isEmpty()) 0 else blocks.first().size val blocksNum: Int = blocks.size @@ -28,7 +28,7 @@ internal class PrimitiveTiledArray(val blocks: Array, val marker companion object { val type: ArrayTypes = ArrayTypes.valueOf(PrimitiveArray::class.simpleName!!) - private val emptyMarker: Array<()->Unit> = arrayOf() + private val emptyMarker: Array = arrayOf() suspend operator fun invoke(strides: Strides): PrimitiveTiledArray { val blockSize = blockSizeByStrides(strides) diff --git a/ndarray/ndarray-core/src/commonMain/kotlin/io/kinference/ndarray/extensions/gelu/BiasGeluPrimitive.kt b/ndarray/ndarray-core/src/commonMain/kotlin/io/kinference/ndarray/extensions/gelu/BiasGeluPrimitive.kt index d3a45033b..555243f19 100644 --- a/ndarray/ndarray-core/src/commonMain/kotlin/io/kinference/ndarray/extensions/gelu/BiasGeluPrimitive.kt +++ b/ndarray/ndarray-core/src/commonMain/kotlin/io/kinference/ndarray/extensions/gelu/BiasGeluPrimitive.kt @@ -25,23 +25,10 @@ internal suspend fun computeGeluPrimitive(input: PrimitiveNDArray, bias: Primiti val blockSize = input.array.blockSize -// // This approach when arrays acquired before parallelizeByBlocks() is faster -// val coroutineContext = coroutineContext[ModelContext.Key]!! -// val modelName = coroutineContext.modelName -// val inferenceCycle = coroutineContext.cycleId -// -// val coroutineCount = countCoroutinesByData(blockSize, inputBlocks.size, 2048) -// val containerTemporaryBlockArrays = ArrayDispatcher.getArrayContainers(PrimitiveTiledArray.type, blockSize, coroutineCount) -// val containerTemporaryBlockAbsArrays = ArrayDispatcher.getArrayContainers(PrimitiveTiledArray.type, blockSize, coroutineCount) -// val temporaryBlockArrays = Array(containerTemporaryBlockArrays.size) { i -> (containerTemporaryBlockArrays[i] as PrimitiveArrayContainer).array } -// val temporaryBlockAbsArrays = Array(containerTemporaryBlockAbsArrays.size) { i -> (containerTemporaryBlockAbsArrays[i] as PrimitiveArrayContainer).array } - // Constant 2048 was precomputed on M1 Max processor // With this constant two launches work faster than single thread without launches // TODO: (cupertank) Remove constants parallelizeByBlocks(blockSize, inputBlocks.size, 2048) { blockStart, blockEnd, coroutineIndex -> -// val temporaryBlock = temporaryBlockArrays[coroutineIndex] -// val temporaryBlockAbs = temporaryBlockAbsArrays[coroutineIndex] val temporaryBlock = PrimitiveArray(blockSize) val temporaryBlockAbs = PrimitiveArray(blockSize) diff --git a/ndarray/ndarray-core/src/commonMain/kotlin/io/kinference/ndarray/extensions/gelu/FastGeluPrimitive.kt b/ndarray/ndarray-core/src/commonMain/kotlin/io/kinference/ndarray/extensions/gelu/FastGeluPrimitive.kt index 6215fc0d0..c429b11ef 100644 --- a/ndarray/ndarray-core/src/commonMain/kotlin/io/kinference/ndarray/extensions/gelu/FastGeluPrimitive.kt +++ b/ndarray/ndarray-core/src/commonMain/kotlin/io/kinference/ndarray/extensions/gelu/FastGeluPrimitive.kt @@ -29,15 +29,10 @@ internal suspend fun fastGeluPrimitive(input: PrimitiveNDArray, bias: PrimitiveN val blockSize = input.array.blockSize -// val coroutineCount = countCoroutinesByData(blockSize, inputBlocks.size, 2048) -// val containerArray = ArrayDispatcher.getArrayContainers(PrimitiveTiledArray.type, blockSize, coroutineCount) -// val temporaryBlockExpArrays = Array(containerArray.size) { i -> (containerArray[i] as PrimitiveArrayContainer).array } - // Constant 2048 was precomputed on M1 Max processor // With this constant two launches work faster than single thread without launches // TODO: (cupertank) Remove constants - parallelizeByBlocks(blockSize, inputBlocks.size, 2048) { blockStart, blockEnd, coroutineIndex -> -// val temporaryBlockExp = temporaryBlockExpArrays[coroutineIndex] + parallelizeByBlocks(blockSize, inputBlocks.size, 2048) { blockStart, blockEnd, _ -> val temporaryBlockExp = PrimitiveArray(blockSize) for (blockIdx in blockStart until blockEnd) { val outputBlock = outputBlocks[blockIdx] diff --git a/utils/utils-common/src/commonMain/kotlin/io/kinference/utils/PlatformQueue.kt b/utils/utils-common/src/commonMain/kotlin/io/kinference/utils/ConcurrentQueue.kt similarity index 71% rename from utils/utils-common/src/commonMain/kotlin/io/kinference/utils/PlatformQueue.kt rename to utils/utils-common/src/commonMain/kotlin/io/kinference/utils/ConcurrentQueue.kt index 0aa0caa27..fc7b9e875 100644 --- a/utils/utils-common/src/commonMain/kotlin/io/kinference/utils/PlatformQueue.kt +++ b/utils/utils-common/src/commonMain/kotlin/io/kinference/utils/ConcurrentQueue.kt @@ -1,6 +1,6 @@ package io.kinference.utils -expect class PlatformQueue() { +expect class ConcurrentQueue() { fun removeFirstOrNull(): T? fun addLast(element: T) } diff --git a/utils/utils-common/src/jsMain/kotlin/io/kinference/utils/PlatformQueue.kt b/utils/utils-common/src/jsMain/kotlin/io/kinference/utils/ConcurrentQueue.kt similarity index 81% rename from utils/utils-common/src/jsMain/kotlin/io/kinference/utils/PlatformQueue.kt rename to utils/utils-common/src/jsMain/kotlin/io/kinference/utils/ConcurrentQueue.kt index 73824e4eb..310ba2ce1 100644 --- a/utils/utils-common/src/jsMain/kotlin/io/kinference/utils/PlatformQueue.kt +++ b/utils/utils-common/src/jsMain/kotlin/io/kinference/utils/ConcurrentQueue.kt @@ -1,6 +1,6 @@ package io.kinference.utils -actual class PlatformQueue actual constructor() { +actual class ConcurrentQueue actual constructor() { private val queue: ArrayDeque = ArrayDeque() actual fun removeFirstOrNull(): T? { diff --git a/utils/utils-common/src/jvmMain/kotlin/io/kinference/utils/PlatformQueue.kt b/utils/utils-common/src/jvmMain/kotlin/io/kinference/utils/ConcurrentQueue.kt similarity index 84% rename from utils/utils-common/src/jvmMain/kotlin/io/kinference/utils/PlatformQueue.kt rename to utils/utils-common/src/jvmMain/kotlin/io/kinference/utils/ConcurrentQueue.kt index c8268c091..ab0ed35dc 100644 --- a/utils/utils-common/src/jvmMain/kotlin/io/kinference/utils/PlatformQueue.kt +++ b/utils/utils-common/src/jvmMain/kotlin/io/kinference/utils/ConcurrentQueue.kt @@ -2,7 +2,7 @@ package io.kinference.utils import java.util.concurrent.ConcurrentLinkedQueue -actual class PlatformQueue actual constructor() { +actual class ConcurrentQueue actual constructor() { private val queue: ConcurrentLinkedQueue = ConcurrentLinkedQueue() actual fun removeFirstOrNull(): T? {