diff --git a/README.md b/README.md index b209fb9db..0d33e61aa 100644 --- a/README.md +++ b/README.md @@ -54,7 +54,7 @@ it is highly recommended to use KInference TensorFlow.js backend instead for mor KInference Core dependency coordinates: ```kotlin dependencies { - api("io.kinference", "inference-core", "0.2.22") + api("io.kinference", "inference-core", "0.2.23") } ``` @@ -67,7 +67,7 @@ This backend is recommended for JavaScript projects. TensorFlow.js backend dependency coordinates: ```kotlin dependencies { - api("io.kinference", "inference-tfjs", "0.2.22") + api("io.kinference", "inference-tfjs", "0.2.23") } ``` @@ -81,14 +81,14 @@ To check on the system requirements, visit the following [link](https://onnxrunt ONNXRuntime CPU backend dependency coordinates: ```kotlin dependencies { - api("io.kinference", "inference-ort", "0.2.22") + api("io.kinference", "inference-ort", "0.2.23") } ``` ONNXRuntime GPU backend dependency coordinates: ```kotlin dependencies { - api("io.kinference", "inference-ort-gpu", "0.2.22") + api("io.kinference", "inference-ort-gpu", "0.2.23") } ``` @@ -104,7 +104,7 @@ Array adapter for the [kmath](https://github.com/SciProgCentre/kmath) library th Dependency coordinates: ```kotlin dependencies { - api("io.kinference", "adapter-kmath-{backend_name}", "0.2.22") + api("io.kinference", "adapter-kmath-{backend_name}", "0.2.23") } ``` @@ -114,12 +114,12 @@ Array adapter for the [multik](https://github.com/Kotlin/multik) library that wo Dependency coordinates: ```kotlin dependencies { - api("io.kinference", "adapter-multik-{backend_name}", "0.2.22") + api("io.kinference", "adapter-multik-{backend_name}", "0.2.23") } ``` ## Getting started -Let us now walk through how to get started with KInference. The latest version of KInference is *0.2.22* +Let us now walk through how to get started with KInference. The latest version of KInference is *0.2.23* ### Setup dependencies repository @@ -142,7 +142,7 @@ To enable the backend, you can add the chosen KInference runtime as a dependency ```kotlin dependencies { - api("io.kinference", "inference-core", "0.2.22") + api("io.kinference", "inference-core", "0.2.23") } ``` @@ -160,20 +160,20 @@ kotlin { sourceSets { val commonMain by getting { dependencies { - api("io.kinference:inference-api:0.2.22") - api("io.kinference:ndarray-api:0.2.22") + api("io.kinference:inference-api:0.2.23") + api("io.kinference:ndarray-api:0.2.23") } } val jvmMain by getting { dependencies { - api("io.kinference:inference-core:0.2.22") + api("io.kinference:inference-core:0.2.23") } } val jsMain by getting { dependencies { - api("io.kinference:inference-tfjs:0.2.22") + api("io.kinference:inference-tfjs:0.2.23") } } } diff --git a/build.gradle.kts b/build.gradle.kts index 371868781..085912ebb 100644 --- a/build.gradle.kts +++ b/build.gradle.kts @@ -8,7 +8,7 @@ import org.jetbrains.kotlin.gradle.targets.js.yarn.YarnRootExtension import org.jetbrains.kotlin.gradle.tasks.KotlinCompilationTask group = "io.kinference" -version = "0.2.22-kotlin18" +version = "0.2.23-kotlin18" plugins { alias(libs.plugins.kotlin.multiplatform) apply false diff --git a/buildSrc/src/main/kotlin/io/kinference/gradle/JVMTestTasks.kt b/buildSrc/src/main/kotlin/io/kinference/gradle/JVMTestTasks.kt index 250843f1c..c9c51d46a 100644 --- a/buildSrc/src/main/kotlin/io/kinference/gradle/JVMTestTasks.kt +++ b/buildSrc/src/main/kotlin/io/kinference/gradle/JVMTestTasks.kt @@ -56,6 +56,7 @@ fun KotlinJvmTarget.configureBenchmarkTests() { group = "verification" maxHeapSize = "4G" + systemProperty("kotlinx.coroutines.debug", "off") useJUnitPlatform() diff --git a/gradle/libs.versions.toml b/gradle/libs.versions.toml index 5566bb795..e9edc52ed 100644 --- a/gradle/libs.versions.toml +++ b/gradle/libs.versions.toml @@ -11,6 +11,7 @@ okio = "3.4.0" onnxruntime = "1.17.0.patched-1" slf4j = "2.0.9" wire = "4.8.1" +fastutil = "8.5.14" primitives = "0.1.26-kotlin18" @@ -38,3 +39,4 @@ onnxruntime-gpu = { module = "com.microsoft.onnxruntime:onnxruntime_gpu", versio slf4j-api = { module = "org.slf4j:slf4j-api", version.ref = "slf4j" } slf4j-simple = { module = "org.slf4j:slf4j-simple", version.ref = "slf4j" } wire-runtime = { module = "com.squareup.wire:wire-runtime", version.ref = "wire" } +fastutil-core = { module = "it.unimi.dsi:fastutil-core", version.ref = "fastutil" } diff --git a/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/KIEngine.kt b/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/KIEngine.kt index 287094629..674bbaed5 100644 --- a/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/KIEngine.kt +++ b/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/KIEngine.kt @@ -10,30 +10,19 @@ import io.kinference.core.optimizer.rules.OptimizerRuleSet import io.kinference.data.ONNXData import io.kinference.data.ONNXDataType import io.kinference.model.IrOptimizableEngine -import io.kinference.ndarray.arrays.memory.MemoryLimiter -import io.kinference.ndarray.arrays.memory.MemoryLimiters import io.kinference.optimizer.GraphOptimizer import io.kinference.optimizer.OptimizerRule import io.kinference.protobuf.* import io.kinference.protobuf.message.* import io.kinference.utils.CommonDataLoader -import io.kinference.utils.PlatformUtils +import io.kinference.utils.PredictionConfig +import io.kinference.utils.PredictionConfigs import okio.Buffer import okio.Path import okio.Path.Companion.toPath typealias KIONNXData = ONNXData -// Define an interface for allocation control marking output -internal interface KIONNXDataArraysReleaser { - fun markOutput() -} - -internal fun KIONNXData.markOutput() { - if (this is KIONNXDataArraysReleaser) - this.markOutput() -} - object CoreBackend : BackendInfo(name = "KInference Core CPU Backend") /** @@ -51,24 +40,24 @@ object KIEngine : IrOptimizableEngine> { fun protoReader(bytes: ByteArray) = ProtobufReader(Buffer().write(bytes), KI_READER_CONFIG) - suspend fun loadModel(bytes: ByteArray, optimize: Boolean, memoryLimiter: MemoryLimiter, parallelismLimit: Int): KIModel { + suspend fun loadModel(bytes: ByteArray, optimize: Boolean, predictionConfig: PredictionConfig): KIModel { val rules = if (optimize) OptimizerRuleSet.DEFAULT_OPT_RULES else emptyList() - return loadModel(bytes, rules, memoryLimiter, parallelismLimit) + return loadModel(bytes, rules, predictionConfig) } override suspend fun loadModel(bytes: ByteArray, optimize: Boolean): KIModel { - return loadModel(bytes, optimize, MemoryLimiters.NoAllocator, PlatformUtils.cores) + return loadModel(bytes, optimize, PredictionConfigs.NoAllocator) } - override suspend fun loadModel(bytes: ByteArray, rules: List>>): KIModel = loadModel(bytes, rules, MemoryLimiters.NoAllocator, PlatformUtils.cores) + override suspend fun loadModel(bytes: ByteArray, rules: List>>): KIModel = loadModel(bytes, rules, PredictionConfigs.NoAllocator) - suspend fun loadModel(bytes: ByteArray, rules: List>>, memoryLimiter: MemoryLimiter, parallelismLimit: Int): KIModel { + suspend fun loadModel(bytes: ByteArray, rules: List>>, predictionConfig: PredictionConfig): KIModel { val modelScheme = ModelProto.decode(protoReader(bytes)) - val model = KIModel(modelScheme, memoryLimiter) + val model = KIModel(modelScheme, predictionConfig) return if (rules.isNotEmpty()) { val newGraph = GraphOptimizer(model.graph).run(rules) as KIGraph - KIModel(model.id, model.name, model.opSet, newGraph, memoryLimiter, parallelismLimit) + KIModel(model.id, model.name, model.opSet, newGraph, predictionConfig) } else { model } @@ -76,12 +65,12 @@ object KIEngine : IrOptimizableEngine> { override suspend fun loadModel(bytes: ByteArray): KIModel = loadModel(bytes, optimize = true) - suspend fun loadModel(path: Path, optimize: Boolean, memoryLimiter: MemoryLimiter, parallelismLimit: Int): KIModel { - return loadModel(CommonDataLoader.bytes(path), optimize, memoryLimiter, parallelismLimit) + suspend fun loadModel(path: Path, optimize: Boolean, predictionConfig: PredictionConfig): KIModel { + return loadModel(CommonDataLoader.bytes(path), optimize, predictionConfig) } override suspend fun loadModel(path: Path, optimize: Boolean): KIModel { - return loadModel(path, optimize, MemoryLimiters.NoAllocator, PlatformUtils.cores) + return loadModel(path, optimize, PredictionConfigs.NoAllocator) } override suspend fun loadModel(path: Path): KIModel = loadModel(path, optimize = true) @@ -90,12 +79,12 @@ object KIEngine : IrOptimizableEngine> { return loadModel(CommonDataLoader.bytes(path), rules) } - suspend fun loadModel(path: String, optimize: Boolean, memoryLimiter: MemoryLimiter, parallelismLimit: Int): KIModel { - return loadModel(CommonDataLoader.bytes(path.toPath()), optimize, memoryLimiter, parallelismLimit) + suspend fun loadModel(path: String, optimize: Boolean, predictionConfig: PredictionConfig): KIModel { + return loadModel(CommonDataLoader.bytes(path.toPath()), optimize, predictionConfig) } override suspend fun loadModel(path: String, optimize: Boolean): KIModel { - return loadModel(path, optimize, MemoryLimiters.NoAllocator, PlatformUtils.cores) + return loadModel(path, optimize, PredictionConfigs.NoAllocator) } override suspend fun loadModel(path: String): KIModel = loadModel(path, optimize = true) diff --git a/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/data/map/KIONNXMap.kt b/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/data/map/KIONNXMap.kt index f541c4c23..a1bbcf7eb 100644 --- a/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/data/map/KIONNXMap.kt +++ b/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/data/map/KIONNXMap.kt @@ -9,7 +9,7 @@ import io.kinference.protobuf.message.TensorProto import io.kinference.types.ValueInfo import io.kinference.types.ValueTypeInfo -class KIONNXMap(name: String?, data: Map>, val info: ValueTypeInfo.MapTypeInfo) : ONNXMap>, CoreBackend>(name, data), KIONNXDataArraysReleaser { +class KIONNXMap(name: String?, data: Map>, val info: ValueTypeInfo.MapTypeInfo) : ONNXMap>, CoreBackend>(name, data) { constructor(data: Map>, info: ValueInfo) : this(info.name, data, info.typeInfo as ValueTypeInfo.MapTypeInfo) override val backend = CoreBackend @@ -26,10 +26,6 @@ class KIONNXMap(name: String?, data: Map>, val info: ValueTyp override fun rename(name: String): KIONNXMap = KIONNXMap(name, data, info) - override fun markOutput() { - data.values.forEach { it.markOutput() } - } - override suspend fun clone(newName: String?): KIONNXMap { val newMap = HashMap>(data.size) for ((key, value) in data.entries) { diff --git a/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/data/seq/KIONNXSequence.kt b/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/data/seq/KIONNXSequence.kt index 24b52085c..49383fca0 100644 --- a/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/data/seq/KIONNXSequence.kt +++ b/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/data/seq/KIONNXSequence.kt @@ -7,7 +7,7 @@ import io.kinference.data.ONNXSequence import io.kinference.protobuf.message.SequenceProto import io.kinference.types.* -class KIONNXSequence(name: String?, data: List>, val info: ValueTypeInfo.SequenceTypeInfo) : ONNXSequence>, CoreBackend>(name, data), KIONNXDataArraysReleaser { +class KIONNXSequence(name: String?, data: List>, val info: ValueTypeInfo.SequenceTypeInfo) : ONNXSequence>, CoreBackend>(name, data) { constructor(name: String?, info: ValueTypeInfo.SequenceTypeInfo, size: Int, init: (Int) -> KIONNXData<*>) : this(name, List(size, init), info) constructor(data: List>, info: ValueInfo) : this(info.name, data, info.typeInfo as ValueTypeInfo.SequenceTypeInfo) @@ -23,10 +23,6 @@ class KIONNXSequence(name: String?, data: List>, val info: ValueTy override fun rename(name: String): KIONNXSequence = KIONNXSequence(name, data, info) - override fun markOutput() { - data.forEach { it.markOutput() } - } - val length: Int = data.size companion object { diff --git a/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/data/tensor/KITensor.kt b/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/data/tensor/KITensor.kt index 2c6de1a69..dba23754f 100644 --- a/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/data/tensor/KITensor.kt +++ b/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/data/tensor/KITensor.kt @@ -1,9 +1,9 @@ package io.kinference.core.data.tensor -import io.kinference.core.CoreBackend -import io.kinference.core.KIONNXDataArraysReleaser +import io.kinference.core.* import io.kinference.data.ONNXTensor import io.kinference.ndarray.arrays.* +import io.kinference.ndarray.arrays.memory.contexts.ManualAllocatorContext import io.kinference.ndarray.arrays.tiled.* import io.kinference.protobuf.FLOAT_TENSOR_TYPES import io.kinference.protobuf.message.TensorProto @@ -13,10 +13,11 @@ import io.kinference.types.ValueTypeInfo //TODO: support segments //TODO: support external data -class KITensor(name: String?, override val data: NDArrayCore, val info: ValueTypeInfo.TensorTypeInfo) : ONNXTensor(name, data), KIONNXDataArraysReleaser { +class KITensor(name: String?, override val data: NDArrayCore, val info: ValueTypeInfo.TensorTypeInfo, private var context: ManualAllocatorContext? = null) : ONNXTensor(name, data) { constructor(data: NDArrayCore, info: ValueInfo) : this(info.name, data, info.typeInfo as ValueTypeInfo.TensorTypeInfo) override suspend fun close() { + context?.returnNDArray(data) data.close() } @@ -24,11 +25,6 @@ class KITensor(name: String?, override val data: NDArrayCore, val info: ValueTyp return KITensor(newName, data.clone(), info) } - override fun markOutput() { - if (this.data is MemoryControlledArray) - data.markOutput() - } - suspend operator fun minus(other: KITensor): KITensor { require(this.data is NumberNDArrayCore && other.data is NumberNDArrayCore) return (this.data - other.data).asTensor() @@ -47,7 +43,7 @@ class KITensor(name: String?, override val data: NDArrayCore, val info: ValueTyp override val backend = CoreBackend override fun rename(name: String): KITensor { - return KITensor(name, data, info) + return KITensor(name, data, info, context) } companion object { diff --git a/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/data/tensor/TensorExtensions.kt b/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/data/tensor/TensorExtensions.kt index 618431c01..f8e2daf19 100644 --- a/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/data/tensor/TensorExtensions.kt +++ b/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/data/tensor/TensorExtensions.kt @@ -1,6 +1,7 @@ package io.kinference.core.data.tensor import io.kinference.ndarray.arrays.* +import io.kinference.ndarray.arrays.memory.contexts.ManualAllocatorContext import io.kinference.ndarray.extensions.concat import io.kinference.ndarray.extensions.splitWithAxis import io.kinference.primitives.types.DataType @@ -8,9 +9,9 @@ import io.kinference.protobuf.resolveProtoDataType import io.kinference.types.TensorShape import io.kinference.types.ValueTypeInfo -fun NDArrayCore.asTensor(name: String? = null) = KITensor(name, this, ValueTypeInfo.TensorTypeInfo(TensorShape(this.shape), type.resolveProtoDataType())) +fun NDArrayCore.asTensor(name: String? = null, context: ManualAllocatorContext? = null) = KITensor(name, this, ValueTypeInfo.TensorTypeInfo(TensorShape(this.shape), type.resolveProtoDataType()), context) -internal fun T.asTensor(name: String? = null) = (this as NDArrayCore).asTensor(name) +internal fun T.asTensor(name: String? = null, context: ManualAllocatorContext? = null) = (this as NDArrayCore).asTensor(name, context) internal fun Collection.asONNXTensors(names: List): List { return this.zip(names).map { (data, name) -> data.asTensor(name) } diff --git a/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/model/KIModel.kt b/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/model/KIModel.kt index 1e25b4ae1..3f78377d5 100644 --- a/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/model/KIModel.kt +++ b/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/model/KIModel.kt @@ -1,8 +1,7 @@ package io.kinference.core.model -import io.kinference.core.KIONNXData +import io.kinference.core.* import io.kinference.core.graph.KIGraph -import io.kinference.core.markOutput import io.kinference.graph.Contexts import io.kinference.model.Model import io.kinference.ndarray.arrays.memory.* @@ -18,14 +17,10 @@ class KIModel( val name: String, val opSet: OperatorSetRegistry, val graph: KIGraph, - memoryLimiter: MemoryLimiter = MemoryLimiters.NoAllocator, - parallelismLimit: Int = PlatformUtils.cores, + predictionConfig: PredictionConfig = PredictionConfigs.NoAllocator, ) : Model>, Profilable, Cacheable { private val profiles: MutableList = ArrayList() - - @OptIn(ExperimentalCoroutinesApi::class) - private val dispatcher: CoroutineDispatcher = Dispatchers.Default.limitedParallelism(parallelismLimit) - private val modelArrayStorage: ModelArrayStorage = ModelArrayStorage(memoryLimiter) + private val predictionContextDispatcher: PredictionContextDispatcher = PredictionContextDispatcher(predictionConfig) override fun addProfilingContext(name: String): ProfilingContext = ProfilingContext(name).apply { profiles.add(this) } override fun analyzeProfilingResults(): ProfileAnalysisEntry = profiles.analyze("Model $name") @@ -37,7 +32,6 @@ class KIModel( if (profile) addProfilingContext("Model $name") else null ) - val limiterContext = ParallelismLimiterContext(dispatcher) var coreReserved = false val results = try { withContext(NonCancellable) { @@ -45,16 +39,15 @@ class KIModel( coreReserved = true } - val allocatorContext = modelArrayStorage.createAllocatorContext() - val mixedContext = allocatorContext + limiterContext - - withContext(mixedContext) { - val coroutineContext = coroutineContext[AllocatorContext.Key]!! - val execResult = graph.execute(input, contexts) - execResult.forEach { it.markOutput() } - coroutineContext.closeAllocated() - execResult + val predictionContext = predictionContextDispatcher.getPredictionContext() + val output = if (predictionContextDispatcher.allocationMode != AllocationMode.Auto) withContext(predictionContext) { + return@withContext graph.execute(input, contexts) + } else withContext(predictionContext) { + return@withContext graph.execute(input, contexts).map { it.clone(it.name) }.toList() } + + predictionContextDispatcher.returnStorage(predictionContext) + output } finally { if (coreReserved) { ResourcesDispatcher.releaseCore() @@ -66,11 +59,11 @@ class KIModel( override suspend fun close() { graph.close() - modelArrayStorage.close() + predictionContextDispatcher.close() } override fun clearCache() { - modelArrayStorage.clearCache() + predictionContextDispatcher.clearCache() } companion object { @@ -80,14 +73,13 @@ class KIModel( suspend operator fun invoke( proto: ModelProto, - memoryLimiter: MemoryLimiter = MemoryLimiters.NoAllocator, - limiterParallelismCounter: Int = PlatformUtils.cores, + predictionConfig: PredictionConfig = PredictionConfigs.NoAllocator, ): KIModel { val name = "${proto.domain}:${proto.modelVersion}" val id = "$name:${generateModelId()}" val opSet = OperatorSetRegistry(proto.opSetImport) val graph = KIGraph(proto.graph!!, opSet) - return KIModel(id, name, opSet, graph, memoryLimiter, limiterParallelismCounter) + return KIModel(id, name, opSet, graph, predictionConfig) } } } diff --git a/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/operators/layer/attention/Attention.kt b/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/operators/layer/attention/Attention.kt index 6487c46c1..0a60d0278 100644 --- a/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/operators/layer/attention/Attention.kt +++ b/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/operators/layer/attention/Attention.kt @@ -7,17 +7,21 @@ import io.kinference.core.optimizer.rules.context.AttentionContextRule import io.kinference.data.ONNXData import io.kinference.graph.Contexts import io.kinference.ndarray.arrays.* +import io.kinference.ndarray.arrays.memory.contexts.ManualAllocatorContext import io.kinference.ndarray.arrays.pointers.accept import io.kinference.ndarray.arrays.pointers.map import io.kinference.ndarray.arrays.tiled.FloatTiledArray import io.kinference.ndarray.extensions.allocateNDArray import io.kinference.ndarray.extensions.dotTransposedWithAlpha +import io.kinference.ndarray.extensions.softmax.softmax import io.kinference.operator.* import io.kinference.optimizer.GraphOptimizer.Companion.isOpt +import io.kinference.primitives.types.DataType import io.kinference.protobuf.message.AttributeProto import io.kinference.protobuf.message.TensorProto import io.kinference.utils.launchWithLimitOrDefault import kotlinx.coroutines.coroutineScope +import kotlin.coroutines.coroutineContext import kotlin.math.min import kotlin.math.sqrt @@ -25,11 +29,12 @@ sealed class Attention(name: String, info: OperatorInfo, attributes: Map { val headSize = hiddenSize / numHeads - val output = allocateNDArray(scores.type, Strides(intArrayOf(batchSize, numHeads, seqLen, headSize))) + val outputStrides = Strides(intArrayOf(batchSize, numHeads, seqLen, headSize)) + val output = context?.getNDArray(scores.type, outputStrides, fillZeros = true) ?: allocateNDArray(scores.type, outputStrides) coroutineScope { for (batchNum in 0 until batchSize) { @@ -46,6 +51,8 @@ sealed class Attention(name: String, info: OperatorInfo, attributes: Map - val resultMarker: Array if (past == null || past.linearSize == 0) { resultBlocks = kBlocks.plus(vBlocks) - resultMarker = kMarker.plus(vMarker) } else { val pastSeqLen = past.shape[3] presentDims[3] += pastSeqLen val pastBlocks = past.array.blocks - val pastMarker = past.array.marker val blocksInRow = headSize / past.array.blockSize @@ -84,60 +86,56 @@ sealed class Attention(name: String, info: OperatorInfo, attributes: Map(2 * batchSize * numHeads * presentDims[3] * blocksInRow) - val futureResMarker = arrayOfNulls(2 * batchSize * numHeads * presentDims[3] * blocksInRow) var resBlockIdx = 0 var pastBlocIdx = 0 repeat(2) { presentKeyValueIdx -> val kvBlocks = if (presentKeyValueIdx == 0) kBlocks else vBlocks - val kvMarker = if (presentKeyValueIdx == 0) kMarker else vMarker var kvBlockIdx = 0 repeat(rowsSize) { pastBlocks.copyInto(futureRes, resBlockIdx, pastBlocIdx, pastBlocIdx + pastRowBlocksCount) - pastMarker.copyInto(futureResMarker, resBlockIdx, pastBlocIdx, pastBlocIdx + pastRowBlocksCount) resBlockIdx += pastRowBlocksCount pastBlocIdx += pastRowBlocksCount kvBlocks.copyInto(futureRes, resBlockIdx, kvBlockIdx, kvBlockIdx + kvRowBlocksCount) - kvMarker.copyInto(futureResMarker, resBlockIdx, kvBlockIdx, kvBlockIdx + kvRowBlocksCount) resBlockIdx += kvRowBlocksCount kvBlockIdx += kvRowBlocksCount } } resultBlocks = futureRes as Array - resultMarker = futureResMarker as Array } - return FloatNDArray(FloatTiledArray(resultBlocks, resultMarker), Strides(presentDims)) + return FloatNDArray(FloatTiledArray(resultBlocks), Strides(presentDims)) } internal suspend fun getScores( unidir: Boolean, q: NDArrayCore, k: NDArrayCore, v: NDArrayCore, mask: IntNDArray?, - past: NDArrayCore?, batchSize: Int, seqLen: Int, numHeads: Int, hiddenSize: Int, maskFilterValue: Float = -10_000f + past: NDArrayCore?, batchSize: Int, seqLen: Int, numHeads: Int, hiddenSize: Int, maskFilterValue: Float = -10_000f, context: ManualAllocatorContext? = null ): Pair { val headSize = hiddenSize / numHeads val pastSeqLen = past?.shape?.get(3) ?: 0 val present = makePresent(past, k, v, batchSize, seqLen, numHeads, hiddenSize) - val scores = normalizedScores(unidir, q, mask, batchSize, seqLen, pastSeqLen, headSize, numHeads, present, maskFilterValue) - return attentionScore(scores, batchSize, seqLen, numHeads, hiddenSize, present) + val scores = normalizedScores(unidir, q, mask, batchSize, seqLen, pastSeqLen, headSize, numHeads, present, maskFilterValue, context) + return attentionScore(scores, batchSize, seqLen, numHeads, hiddenSize, present, context) } private suspend fun normalizedScores( unidir: Boolean, queries: NDArrayCore, maskIndices: IntNDArray?, batchSize: Int, - seqLen: Int, pastSeqLen: Int, headSize: Int, numHeads: Int, present: NDArrayCore, maskFilterValue: Float = -10_000f + seqLen: Int, pastSeqLen: Int, headSize: Int, numHeads: Int, present: NDArrayCore, maskFilterValue: Float = -10_000f, context: ManualAllocatorContext? = null ): NumberNDArrayCore { val allSeqLen = present.shape[3] - val scores = allocateNDArray(queries.type, Strides(intArrayOf(batchSize, numHeads, seqLen, allSeqLen))) as MutableNumberNDArrayCore + val scoresStrides = Strides(intArrayOf(batchSize, numHeads, seqLen, allSeqLen)) + val scores = (context?.getNDArray(queries.type, scoresStrides, fillZeros = true) ?: allocateNDArray(queries.type, scoresStrides)) as MutableNumberNDArrayCore - val maskData = maskIndices?.maskFromIndices(unidir, batchSize, seqLen, pastSeqLen, maskFilterValue) + val maskData = maskIndices?.maskFromIndices(unidir, batchSize, seqLen, pastSeqLen, maskFilterValue, context) val alpha = 1.0 / sqrt(headSize.toDouble()) @@ -158,14 +156,23 @@ sealed class Attention(name: String, info: OperatorInfo, attributes: Map if (this != null) { @@ -245,12 +252,13 @@ class AttentionVer1(name: String, attributes: Map>, input internal suspend fun initQueryKeyValue( input: NDArrayCore, weights: NDArrayCore, bias: NDArrayCore, - batchSize: Int, seqLen: Int, hiddenSize: Int, numHeads: Int + batchSize: Int, seqLen: Int, hiddenSize: Int, numHeads: Int, context: ManualAllocatorContext? = null ): Array { input as NumberNDArrayCore val headSize = hiddenSize / numHeads - val qkv = Array(3) { allocateNDArray(input.type, Strides(intArrayOf(batchSize, numHeads, seqLen, headSize))) } + val qkvStrides = Strides(intArrayOf(batchSize, numHeads, seqLen, headSize)) + val qkv = Array(3) { context?.getNDArray(input.type, qkvStrides, fillZeros = true) ?: allocateNDArray(input.type, qkvStrides) } coroutineScope { for (qkvIdx in 0 until 3) { @@ -279,6 +287,8 @@ class AttentionVer1(name: String, attributes: Map>, input private val maskFilterValue: Float by attribute("mask_filter_value") { it: Number -> it.toFloat() } override suspend fun > apply(contexts: Contexts, inputs: List): List { + val context = coroutineContext[ManualAllocatorContext] + val input = inputs[0]!! val weights = inputs[1]!! @@ -296,10 +306,10 @@ class AttentionVer1(name: String, attributes: Map>, input input.data, preparedWeights.data, preparedBias.data, - batchSize, seqLen, hiddenSize, numHeads, + batchSize, seqLen, hiddenSize, numHeads, context ) - val (scores, present) = getScores(unidir, queries, keys, values, maskIndices, past, batchSize, seqLen, numHeads, hiddenSize, maskFilterValue) - return listOf(scores.asTensor(), present.asTensor()) + val (scores, present) = getScores(unidir, queries, keys, values, maskIndices, past, batchSize, seqLen, numHeads, hiddenSize, maskFilterValue, context) + return listOf(scores.asTensor(context = context), present.asTensor(context = context)) } } diff --git a/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/operators/layer/normalization/EmbedLayerNormalization.kt b/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/operators/layer/normalization/EmbedLayerNormalization.kt index b1861b281..5fad8cd77 100644 --- a/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/operators/layer/normalization/EmbedLayerNormalization.kt +++ b/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/operators/layer/normalization/EmbedLayerNormalization.kt @@ -1,15 +1,17 @@ package io.kinference.core.operators.layer.normalization import io.kinference.attribute.Attribute -import io.kinference.core.data.tensor.KITensor -import io.kinference.core.data.tensor.asONNXTensors +import io.kinference.core.data.tensor.* import io.kinference.data.ONNXData import io.kinference.graph.Contexts import io.kinference.ndarray.arrays.* +import io.kinference.ndarray.arrays.memory.contexts.ManualAllocatorContext import io.kinference.ndarray.arrays.pointers.* import io.kinference.operator.* +import io.kinference.primitives.types.DataType import io.kinference.protobuf.message.AttributeProto.AttributeType import io.kinference.protobuf.message.TensorProto +import kotlin.coroutines.coroutineContext import kotlin.math.sqrt sealed class EmbedLayerNormalization( @@ -73,9 +75,12 @@ class EmbedLayerNormalizationVer1( private data class NormalizeResult(val output: FloatNDArray, val embeddingSum: FloatNDArray) - internal suspend fun createMaskIndices(mask: IntNDArray?, batchSize: Int, seqLen: Int): NumberNDArrayCore { - val maskIndices = MutableIntNDArray(intArrayOf(batchSize)) - if (mask == null) return maskIndices + internal suspend fun createMaskIndices(mask: IntNDArray?, batchSize: Int, seqLen: Int, context: ManualAllocatorContext? = null): NumberNDArrayCore { + val strides = Strides(intArrayOf(batchSize)) + val maskIndices = (context?.getNDArray(DataType.INT, strides) ?: MutableIntNDArray(strides)) as MutableIntNDArray + + if (mask == null) + return maskIndices.also { it.fill(0) } val pointer = mask.array.pointer() val maskIndicesPointer = maskIndices.array.pointer() @@ -95,12 +100,15 @@ class EmbedLayerNormalizationVer1( private suspend fun normalize( epsilon: Float, inputIds: IntNDArray, segmentIds: IntNDArray?, wordEmbed: FloatNDArray, posEmbed: FloatNDArray, - segmentEmbed: FloatNDArray?, gamma: FloatNDArray, beta: FloatNDArray, positionIds: IntNDArray? + segmentEmbed: FloatNDArray?, gamma: FloatNDArray, beta: FloatNDArray, positionIds: IntNDArray?, context: ManualAllocatorContext? = null ): NormalizeResult { val (batchSize, seqLen) = inputIds.shape val (_, hiddenSize) = wordEmbed.shape - val output = MutableFloatNDArray(intArrayOf(batchSize, seqLen, hiddenSize)) - val embeddingSum = MutableFloatNDArray(intArrayOf(batchSize, seqLen, hiddenSize)) + + val outputStrides = Strides(intArrayOf(batchSize, seqLen, hiddenSize)) + + val output = (context?.getNDArray(DataType.FLOAT, outputStrides, fillZeros = false) ?: MutableFloatNDArray(outputStrides)) as MutableFloatNDArray + val embeddingSum = (context?.getNDArray(DataType.FLOAT, outputStrides, fillZeros = false) ?: MutableFloatNDArray(outputStrides)) as MutableFloatNDArray for (batch in 0 until batchSize) { val blockIdx = batch * seqLen @@ -167,6 +175,8 @@ class EmbedLayerNormalizationVer1( } override suspend fun > apply(contexts: Contexts, inputs: List): List { + val manualContext = coroutineContext[ManualAllocatorContext] + val inputIds = inputs[0]!!.data as IntNDArray val segmentIds = inputs[1]?.data as IntNDArray? val wordEmbed = inputs[2]!!.data as FloatNDArray @@ -177,8 +187,12 @@ class EmbedLayerNormalizationVer1( val mask = inputs.getOrNull(7)?.data as IntNDArray? val positionIds = inputs.getOrNull(8)?.data as IntNDArray? - val (normalized, embedSum) = normalize(epsilon, inputIds, segmentIds, wordEmbed, posEmbed, segmentEmbed, gamma, beta, positionIds) + val (normalized, embedSum) = normalize(epsilon, inputIds, segmentIds, wordEmbed, posEmbed, segmentEmbed, gamma, beta, positionIds, manualContext) val maskIndices = createMaskIndices(mask, inputIds.shape[0], inputIds.shape[1]) - return listOf(normalized, maskIndices, embedSum).asONNXTensors(outputs) + return listOf( + normalized.asTensor(context = manualContext), + maskIndices.asTensor(context = manualContext), + embedSum.asTensor(context = manualContext) + ) } } diff --git a/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/operators/layer/normalization/SkipLayerNormalization.kt b/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/operators/layer/normalization/SkipLayerNormalization.kt index 6b6243ba3..598a14c26 100644 --- a/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/operators/layer/normalization/SkipLayerNormalization.kt +++ b/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/operators/layer/normalization/SkipLayerNormalization.kt @@ -7,10 +7,13 @@ import io.kinference.data.ONNXData import io.kinference.graph.Contexts import io.kinference.ndarray.arrays.FloatNDArray import io.kinference.ndarray.arrays.MutableFloatNDArray +import io.kinference.ndarray.arrays.memory.contexts.ManualAllocatorContext import io.kinference.ndarray.arrays.pointers.* import io.kinference.operator.* +import io.kinference.primitives.types.DataType import io.kinference.protobuf.message.AttributeProto import io.kinference.protobuf.message.TensorProto +import kotlin.coroutines.coroutineContext import kotlin.math.sqrt sealed class SkipLayerNormalization(name: String, info: OperatorInfo, attributes: Map>, inputs: List, outputs: List) : Operator(name, info, attributes, inputs, outputs) { @@ -104,8 +107,10 @@ class SkipLayerNormalizationVer1(name: String, attributes: Map> apply(contexts: Contexts, inputs: List): List { + val manualContext = coroutineContext[ManualAllocatorContext] + val input = inputs[0]!!.data as FloatNDArray - val output = MutableFloatNDArray(input.strides) + val output = (manualContext?.getNDArray(DataType.FLOAT, input.strides, fillZeros = false) ?: MutableFloatNDArray(input.strides)) as MutableFloatNDArray input.normalize( skip = inputs[1]!!.data as FloatNDArray, gamma = inputs[2]!!.data as FloatNDArray, @@ -114,6 +119,7 @@ class SkipLayerNormalizationVer1(name: String, attributes: Map>, inputs: List, outputs: List) : Operator(name, info, attributes, inputs, outputs) { companion object { @@ -52,7 +55,16 @@ class AddVer7(name: String, attributes: Map>, inputs: Lis } override suspend fun > apply(contexts: Contexts, inputs: List): List { - val result = (inputs[0]!!.data as NumberNDArrayCore) + (inputs[1]!!.data as NumberNDArrayCore) - return listOf(result.asTensor("C")) + val manualContext = coroutineContext[ManualAllocatorContext] + + val left = inputs[0]!!.data as NumberNDArrayCore + val right = inputs[1]!!.data as NumberNDArrayCore + + val destShape = broadcastShape(listOf(left.shape, right.shape)) + val destStrides = Strides(destShape) + val dest = (manualContext?.getNDArray(left.type, destStrides) ?: allocateNDArray(left.type, destStrides)) as MutableNumberNDArrayCore + + val result = left.plus(right, dest) //(inputs[0]!!.data as NumberNDArrayCore) + (inputs[1]!!.data as NumberNDArrayCore) + return listOf(result.asTensor("C", manualContext)) } } diff --git a/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/operators/math/BiasGelu.kt b/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/operators/math/BiasGelu.kt index 0be701b4e..da93d0e8a 100644 --- a/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/operators/math/BiasGelu.kt +++ b/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/operators/math/BiasGelu.kt @@ -5,9 +5,13 @@ import io.kinference.core.data.tensor.KITensor import io.kinference.core.data.tensor.asTensor import io.kinference.data.ONNXData import io.kinference.graph.Contexts +import io.kinference.ndarray.arrays.MutableNumberNDArrayCore import io.kinference.ndarray.arrays.NumberNDArrayCore +import io.kinference.ndarray.arrays.memory.contexts.ManualAllocatorContext +import io.kinference.ndarray.extensions.allocateNDArray import io.kinference.ndarray.extensions.gelu.biasGelu import io.kinference.operator.* +import kotlin.coroutines.coroutineContext sealed class BiasGelu(name: String, info: OperatorInfo, attributes: Map>, inputs: List, outputs: List) : Operator(name, info, attributes, inputs, outputs) { companion object { @@ -39,16 +43,20 @@ class BiasGeluVer1(name: String, attributes: Map> = empty } override suspend fun > apply(contexts: Contexts, inputs: List): List { + val manualContext = coroutineContext[ManualAllocatorContext] + val input = inputs[0]!!.data as NumberNDArrayCore val bias = inputs[1]!!.data as NumberNDArrayCore require(input.shape.last() == bias.shape.last()) { "Last dimensions of input and bias tensors must be equal" } + val dest = (manualContext?.getNDArray(input.type, input.strides) ?: allocateNDArray(input.type, input.strides)) as MutableNumberNDArrayCore + // Uses ERF formula with fractional error less than x.xx * 10 ^ -4. // Algorithm 26.2.17 in Abromowitz and Stegun, Handbook of Mathematical. // Another possible ERF implementation (several ms faster): // https://github.com/apache/commons-numbers/blob/master/commons-numbers-gamma/src/main/java/org/apache/commons/numbers/gamma/BoostErf.java - return listOf(biasGelu(input, bias).asTensor("C")) + return listOf(biasGelu(input, bias, dest).asTensor("C", manualContext)) } } diff --git a/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/operators/math/MatMul.kt b/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/operators/math/MatMul.kt index 8c1735ea3..4165c554c 100644 --- a/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/operators/math/MatMul.kt +++ b/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/operators/math/MatMul.kt @@ -5,9 +5,13 @@ import io.kinference.core.data.tensor.KITensor import io.kinference.core.data.tensor.asTensor import io.kinference.data.ONNXData import io.kinference.graph.Contexts -import io.kinference.ndarray.arrays.NumberNDArrayCore +import io.kinference.ndarray.arrays.* +import io.kinference.ndarray.arrays.memory.contexts.ManualAllocatorContext +import io.kinference.ndarray.broadcasting.Broadcasting +import io.kinference.ndarray.extensions.allocateNDArray import io.kinference.operator.* import io.kinference.protobuf.message.TensorProto +import kotlin.coroutines.coroutineContext sealed class MatMul(name: String, info: OperatorInfo, attributes: Map>, inputs: List, outputs: List) : Operator(name, info, attributes, inputs, outputs) { companion object { @@ -46,8 +50,16 @@ class MatMulVer1(name: String, attributes: Map>, inputs: } override suspend fun > apply(contexts: Contexts, inputs: List): List { + val manualContext = coroutineContext[ManualAllocatorContext] + val first = inputs[0]!!.data as NumberNDArrayCore val second = inputs[1]!!.data as NumberNDArrayCore - return listOf((first.matmul(second)).asTensor("Y")) + + val destShape = Broadcasting.broadcastShapeForMatmul(first.shape, second.shape) + val destStrides = Strides(destShape) + + val dest = (manualContext?.getNDArray(first.type, destStrides, fillZeros = true) ?: allocateNDArray(first.type, destStrides)) as MutableNumberNDArrayCore + + return listOf((first.matmul(second, dest)).asTensor("Y", manualContext)) } } diff --git a/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/operators/tensor/Cast.kt b/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/operators/tensor/Cast.kt index 5ce45c866..1bfb35fee 100644 --- a/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/operators/tensor/Cast.kt +++ b/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/operators/tensor/Cast.kt @@ -6,6 +6,7 @@ import io.kinference.core.data.tensor.asTensor import io.kinference.data.ONNXData import io.kinference.graph.Contexts import io.kinference.ndarray.arrays.* +import io.kinference.ndarray.arrays.memory.contexts.ManualAllocatorContext import io.kinference.ndarray.arrays.pointers.mapTo import io.kinference.ndarray.arrays.tiled.* import io.kinference.operator.* @@ -13,6 +14,7 @@ import io.kinference.primitives.types.DataType import io.kinference.protobuf.FLOAT_TENSOR_TYPES import io.kinference.protobuf.message.AttributeProto import io.kinference.protobuf.message.TensorProto +import kotlin.coroutines.coroutineContext sealed class Cast(name: String, info: OperatorInfo, attributes: Map>, inputs: List, outputs: List) : Operator(name, info, attributes, inputs, outputs) { companion object { @@ -41,65 +43,65 @@ class CastVer6(name: String, attributes: Map>, inputs: Li internal val VERSION = VersionInfo(sinceVersion = 6) private val INFO = OperatorInfo("Cast", ATTRIBUTES_INFO, INPUTS_INFO, OUTPUTS_INFO, VERSION, OperatorInfo.DEFAULT_DOMAIN) - private suspend fun castByte(array: ByteNDArray, to: TensorProto.DataType): NDArrayCore { + private suspend fun castByte(array: ByteNDArray, to: TensorProto.DataType, context: ManualAllocatorContext? = null): NDArrayCore { return when (to) { in FLOAT_TENSOR_TYPES -> { - val output = FloatNDArray(FloatTiledArray(array.shape), array.strides) + val output = (context?.getNDArray(DataType.FLOAT, array.strides) ?: FloatNDArray(FloatTiledArray(array.shape), array.strides)) as FloatNDArray array.array.pointer().mapTo(output.array.pointer(), array.linearSize) { it.toFloat() } output } TensorProto.DataType.UINT8 -> { - val output = UByteNDArray(UByteTiledArray(array.shape), array.strides) + val output = (context?.getNDArray(DataType.UBYTE, array.strides) ?: UByteNDArray(UByteTiledArray(array.shape), array.strides)) as UByteNDArray array.array.pointer().mapTo(output.array.pointer(), array.linearSize) { it.toUByte() } output } TensorProto.DataType.INT8 -> array TensorProto.DataType.UINT16 -> { - val output = UShortNDArray(UShortTiledArray(array.shape), array.strides) + val output = (context?.getNDArray(DataType.USHORT, array.strides) ?: UShortNDArray(UShortTiledArray(array.shape), array.strides)) as UShortNDArray array.array.pointer().mapTo(output.array.pointer(), array.linearSize) { it.toUShort() } output } TensorProto.DataType.INT16 -> { - val output = ShortNDArray(ShortTiledArray(array.shape), array.strides) + val output = (context?.getNDArray(DataType.SHORT, array.strides) ?: ShortNDArray(ShortTiledArray(array.shape), array.strides)) as ShortNDArray array.array.pointer().mapTo(output.array.pointer(), array.linearSize) { it.toShort() } output } TensorProto.DataType.INT32 -> { - val output = IntNDArray(IntTiledArray(array.shape), array.strides) + val output = (context?.getNDArray(DataType.INT, array.strides) ?: IntNDArray(IntTiledArray(array.shape), array.strides)) as IntNDArray array.array.pointer().mapTo(output.array.pointer(), array.linearSize) { it.toInt() } output } TensorProto.DataType.INT64 -> { - val output = LongNDArray(LongTiledArray(array.shape), array.strides) + val output = (context?.getNDArray(DataType.LONG, array.strides) ?: LongNDArray(LongTiledArray(array.shape), array.strides)) as LongNDArray array.array.pointer().mapTo(output.array.pointer(), array.linearSize) { it.toLong() } output } TensorProto.DataType.BOOL -> { - val output = BooleanNDArray(BooleanTiledArray(array.shape), array.strides) + val output = (context?.getNDArray(DataType.BOOLEAN, array.strides) ?: BooleanNDArray(BooleanTiledArray(array.shape), array.strides)) as BooleanNDArray array.array.pointer().mapTo(output.array.pointer(), array.linearSize) { it != (0).toByte() } output } TensorProto.DataType.DOUBLE -> { - val output = DoubleNDArray(DoubleTiledArray(array.shape), array.strides) + val output = (context?.getNDArray(DataType.DOUBLE, array.strides) ?: DoubleNDArray(DoubleTiledArray(array.shape), array.strides)) as DoubleNDArray array.array.pointer().mapTo(output.array.pointer(), array.linearSize) { it.toDouble() } output } TensorProto.DataType.UINT32 -> { - val output = UIntNDArray(UIntTiledArray(array.shape), array.strides) + val output = (context?.getNDArray(DataType.UINT, array.strides) ?: UIntNDArray(UIntTiledArray(array.shape), array.strides)) as UIntNDArray array.array.pointer().mapTo(output.array.pointer(), array.linearSize) { it.toUInt() } output } TensorProto.DataType.UINT64 -> { - val output = ULongNDArray(ULongTiledArray(array.shape), array.strides) + val output = (context?.getNDArray(DataType.ULONG, array.strides) ?: ULongNDArray(ULongTiledArray(array.shape), array.strides)) as ULongNDArray array.array.pointer().mapTo(output.array.pointer(), array.linearSize) { it.toULong() } output } @@ -108,65 +110,65 @@ class CastVer6(name: String, attributes: Map>, inputs: Li } } - private suspend fun castShort(array: ShortNDArray, to: TensorProto.DataType): NDArrayCore { + private suspend fun castShort(array: ShortNDArray, to: TensorProto.DataType, context: ManualAllocatorContext? = null): NDArrayCore { return when (to) { in FLOAT_TENSOR_TYPES -> { - val output = FloatNDArray(FloatTiledArray(array.shape), array.strides) + val output = (context?.getNDArray(DataType.FLOAT, array.strides) ?: FloatNDArray(FloatTiledArray(array.shape), array.strides)) as FloatNDArray array.array.pointer().mapTo(output.array.pointer(), array.linearSize) { it.toFloat() } output } TensorProto.DataType.UINT8 -> { - val output = UByteNDArray(UByteTiledArray(array.shape), array.strides) + val output = (context?.getNDArray(DataType.UBYTE, array.strides) ?: UByteNDArray(UByteTiledArray(array.shape), array.strides)) as UByteNDArray array.array.pointer().mapTo(output.array.pointer(), array.linearSize) { it.toUByte() } output } TensorProto.DataType.INT8 -> { - val output = ByteNDArray(ByteTiledArray(array.shape), array.strides) + val output = (context?.getNDArray(DataType.BYTE, array.strides) ?: ByteNDArray(ByteTiledArray(array.shape), array.strides)) as ByteNDArray array.array.pointer().mapTo(output.array.pointer(), array.linearSize) { it.toByte() } output } TensorProto.DataType.UINT16 -> { - val output = UShortNDArray(UShortTiledArray(array.shape), array.strides) + val output = (context?.getNDArray(DataType.USHORT, array.strides) ?: UShortNDArray(UShortTiledArray(array.shape), array.strides)) as UShortNDArray array.array.pointer().mapTo(output.array.pointer(), array.linearSize) { it.toUShort() } output } TensorProto.DataType.INT16 -> array TensorProto.DataType.INT32 -> { - val output = IntNDArray(IntTiledArray(array.shape), array.strides) + val output = (context?.getNDArray(DataType.INT, array.strides) ?: IntNDArray(IntTiledArray(array.shape), array.strides)) as IntNDArray array.array.pointer().mapTo(output.array.pointer(), array.linearSize) { it.toInt() } output } TensorProto.DataType.INT64 -> { - val output = LongNDArray(LongTiledArray(array.shape), array.strides) + val output = (context?.getNDArray(DataType.LONG, array.strides) ?: LongNDArray(LongTiledArray(array.shape), array.strides)) as LongNDArray array.array.pointer().mapTo(output.array.pointer(), array.linearSize) { it.toLong() } output } TensorProto.DataType.BOOL -> { - val output = BooleanNDArray(BooleanTiledArray(array.shape), array.strides) + val output = (context?.getNDArray(DataType.BOOLEAN, array.strides) ?: BooleanNDArray(BooleanTiledArray(array.shape), array.strides)) as BooleanNDArray array.array.pointer().mapTo(output.array.pointer(), array.linearSize) { it != (0).toShort() } output } TensorProto.DataType.DOUBLE -> { - val output = DoubleNDArray(DoubleTiledArray(array.shape), array.strides) + val output = (context?.getNDArray(DataType.DOUBLE, array.strides) ?: DoubleNDArray(DoubleTiledArray(array.shape), array.strides)) as DoubleNDArray array.array.pointer().mapTo(output.array.pointer(), array.linearSize) { it.toDouble() } output } TensorProto.DataType.UINT32 -> { - val output = UIntNDArray(UIntTiledArray(array.shape), array.strides) + val output = (context?.getNDArray(DataType.UINT, array.strides) ?: UIntNDArray(UIntTiledArray(array.shape), array.strides)) as UIntNDArray array.array.pointer().mapTo(output.array.pointer(), array.linearSize) { it.toUInt() } output } TensorProto.DataType.UINT64 -> { - val output = ULongNDArray(ULongTiledArray(array.shape), array.strides) + val output = (context?.getNDArray(DataType.ULONG, array.strides) ?: ULongNDArray(ULongTiledArray(array.shape), array.strides)) as ULongNDArray array.array.pointer().mapTo(output.array.pointer(), array.linearSize) { it.toULong() } output } @@ -175,66 +177,65 @@ class CastVer6(name: String, attributes: Map>, inputs: Li } } - - private suspend fun castInt(array: IntNDArray, to: TensorProto.DataType): NDArrayCore { + private suspend fun castInt(array: IntNDArray, to: TensorProto.DataType, context: ManualAllocatorContext? = null): NDArrayCore { return when (to) { in FLOAT_TENSOR_TYPES -> { - val output = FloatNDArray(FloatTiledArray(array.shape), array.strides) + val output = (context?.getNDArray(DataType.FLOAT, array.strides) ?: FloatNDArray(FloatTiledArray(array.shape), array.strides)) as FloatNDArray array.array.pointer().mapTo(output.array.pointer(), array.linearSize) { it.toFloat() } output } TensorProto.DataType.UINT8 -> { - val output = UByteNDArray(UByteTiledArray(array.shape), array.strides) + val output = (context?.getNDArray(DataType.UBYTE, array.strides) ?: UByteNDArray(UByteTiledArray(array.shape), array.strides)) as UByteNDArray array.array.pointer().mapTo(output.array.pointer(), array.linearSize) { it.toUByte() } output } TensorProto.DataType.INT8 -> { - val output = ByteNDArray(ByteTiledArray(array.shape), array.strides) + val output = (context?.getNDArray(DataType.BYTE, array.strides) ?: ByteNDArray(ByteTiledArray(array.shape), array.strides)) as ByteNDArray array.array.pointer().mapTo(output.array.pointer(), array.linearSize) { it.toByte() } output } TensorProto.DataType.UINT16 -> { - val output = UShortNDArray(UShortTiledArray(array.shape), array.strides) + val output = (context?.getNDArray(DataType.USHORT, array.strides) ?: UShortNDArray(UShortTiledArray(array.shape), array.strides)) as UShortNDArray array.array.pointer().mapTo(output.array.pointer(), array.linearSize) { it.toUShort() } output } TensorProto.DataType.INT16 -> { - val output = ShortNDArray(ShortTiledArray(array.shape), array.strides) + val output = (context?.getNDArray(DataType.SHORT, array.strides) ?: ShortNDArray(ShortTiledArray(array.shape), array.strides)) as ShortNDArray array.array.pointer().mapTo(output.array.pointer(), array.linearSize) { it.toShort() } output } TensorProto.DataType.INT32 -> array TensorProto.DataType.INT64 -> { - val output = LongNDArray(LongTiledArray(array.shape), array.strides) + val output = (context?.getNDArray(DataType.LONG, array.strides) ?: LongNDArray(LongTiledArray(array.shape), array.strides)) as LongNDArray array.array.pointer().mapTo(output.array.pointer(), array.linearSize) { it.toLong() } output } TensorProto.DataType.BOOL -> { - val output = BooleanNDArray(BooleanTiledArray(array.shape), array.strides) - array.array.pointer().mapTo(output.array.pointer(), array.linearSize) { it != 0 } + val output = (context?.getNDArray(DataType.BOOLEAN, array.strides) ?: BooleanNDArray(BooleanTiledArray(array.shape), array.strides)) as BooleanNDArray + array.array.pointer().mapTo(output.array.pointer(), array.linearSize) { it != (0).toInt() } output } TensorProto.DataType.DOUBLE -> { - val output = DoubleNDArray(DoubleTiledArray(array.shape), array.strides) + val output = (context?.getNDArray(DataType.DOUBLE, array.strides) ?: DoubleNDArray(DoubleTiledArray(array.shape), array.strides)) as DoubleNDArray array.array.pointer().mapTo(output.array.pointer(), array.linearSize) { it.toDouble() } output } TensorProto.DataType.UINT32 -> { - val output = UIntNDArray(UIntTiledArray(array.shape), array.strides) + val output = (context?.getNDArray(DataType.UINT, array.strides) ?: UIntNDArray(UIntTiledArray(array.shape), array.strides)) as UIntNDArray array.array.pointer().mapTo(output.array.pointer(), array.linearSize) { it.toUInt() } output } TensorProto.DataType.UINT64 -> { - val output = ULongNDArray(ULongTiledArray(array.shape), array.strides) + val output = (context?.getNDArray(DataType.ULONG, array.strides) ?: ULongNDArray(ULongTiledArray(array.shape), array.strides)) as ULongNDArray array.array.pointer().mapTo(output.array.pointer(), array.linearSize) { it.toULong() } output } @@ -243,66 +244,65 @@ class CastVer6(name: String, attributes: Map>, inputs: Li } } - - private suspend fun castLong(array: LongNDArray, to: TensorProto.DataType): NDArrayCore { + private suspend fun castLong(array: LongNDArray, to: TensorProto.DataType, context: ManualAllocatorContext? = null): NDArrayCore { return when (to) { in FLOAT_TENSOR_TYPES -> { - val output = FloatNDArray(FloatTiledArray(array.shape), array.strides) + val output = (context?.getNDArray(DataType.FLOAT, array.strides) ?: FloatNDArray(FloatTiledArray(array.shape), array.strides)) as FloatNDArray array.array.pointer().mapTo(output.array.pointer(), array.linearSize) { it.toFloat() } output } TensorProto.DataType.UINT8 -> { - val output = UByteNDArray(UByteTiledArray(array.shape), array.strides) + val output = (context?.getNDArray(DataType.UBYTE, array.strides) ?: UByteNDArray(UByteTiledArray(array.shape), array.strides)) as UByteNDArray array.array.pointer().mapTo(output.array.pointer(), array.linearSize) { it.toUByte() } output } TensorProto.DataType.INT8 -> { - val output = ByteNDArray(ByteTiledArray(array.shape), array.strides) + val output = (context?.getNDArray(DataType.BYTE, array.strides) ?: ByteNDArray(ByteTiledArray(array.shape), array.strides)) as ByteNDArray array.array.pointer().mapTo(output.array.pointer(), array.linearSize) { it.toByte() } output } TensorProto.DataType.UINT16 -> { - val output = UShortNDArray(UShortTiledArray(array.shape), array.strides) + val output = (context?.getNDArray(DataType.USHORT, array.strides) ?: UShortNDArray(UShortTiledArray(array.shape), array.strides)) as UShortNDArray array.array.pointer().mapTo(output.array.pointer(), array.linearSize) { it.toUShort() } output } TensorProto.DataType.INT16 -> { - val output = ShortNDArray(ShortTiledArray(array.shape), array.strides) + val output = (context?.getNDArray(DataType.SHORT, array.strides) ?: ShortNDArray(ShortTiledArray(array.shape), array.strides)) as ShortNDArray array.array.pointer().mapTo(output.array.pointer(), array.linearSize) { it.toShort() } output } TensorProto.DataType.INT32 -> { - val output = IntNDArray(IntTiledArray(array.shape), array.strides) + val output = (context?.getNDArray(DataType.INT, array.strides) ?: IntNDArray(IntTiledArray(array.shape), array.strides)) as IntNDArray array.array.pointer().mapTo(output.array.pointer(), array.linearSize) { it.toInt() } output } TensorProto.DataType.INT64 -> array TensorProto.DataType.BOOL -> { - val output = BooleanNDArray(BooleanTiledArray(array.shape), array.strides) + val output = (context?.getNDArray(DataType.BOOLEAN, array.strides) ?: BooleanNDArray(BooleanTiledArray(array.shape), array.strides)) as BooleanNDArray array.array.pointer().mapTo(output.array.pointer(), array.linearSize) { it != 0L } output } TensorProto.DataType.DOUBLE -> { - val output = DoubleNDArray(DoubleTiledArray(array.shape), array.strides) + val output = (context?.getNDArray(DataType.DOUBLE, array.strides) ?: DoubleNDArray(DoubleTiledArray(array.shape), array.strides)) as DoubleNDArray array.array.pointer().mapTo(output.array.pointer(), array.linearSize) { it.toDouble() } output } TensorProto.DataType.UINT32 -> { - val output = UIntNDArray(UIntTiledArray(array.shape), array.strides) + val output = (context?.getNDArray(DataType.UINT, array.strides) ?: UIntNDArray(UIntTiledArray(array.shape), array.strides)) as UIntNDArray array.array.pointer().mapTo(output.array.pointer(), array.linearSize) { it.toUInt() } output } TensorProto.DataType.UINT64 -> { - val output = ULongNDArray(ULongTiledArray(array.shape), array.strides) + val output = (context?.getNDArray(DataType.ULONG, array.strides) ?: ULongNDArray(ULongTiledArray(array.shape), array.strides)) as ULongNDArray array.array.pointer().mapTo(output.array.pointer(), array.linearSize) { it.toULong() } output } @@ -311,66 +311,65 @@ class CastVer6(name: String, attributes: Map>, inputs: Li } } - - private suspend fun castUByte(array: UByteNDArray, to: TensorProto.DataType): NDArrayCore { + private suspend fun castUByte(array: UByteNDArray, to: TensorProto.DataType, context: ManualAllocatorContext? = null): NDArrayCore { return when (to) { in FLOAT_TENSOR_TYPES -> { - val output = FloatNDArray(FloatTiledArray(array.shape), array.strides) + val output = (context?.getNDArray(DataType.FLOAT, array.strides) ?: FloatNDArray(FloatTiledArray(array.shape), array.strides)) as FloatNDArray array.array.pointer().mapTo(output.array.pointer(), array.linearSize) { it.toFloat() } output } TensorProto.DataType.UINT8 -> array TensorProto.DataType.INT8 -> { - val output = ByteNDArray(ByteTiledArray(array.shape), array.strides) + val output = (context?.getNDArray(DataType.BYTE, array.strides) ?: ByteNDArray(ByteTiledArray(array.shape), array.strides)) as ByteNDArray array.array.pointer().mapTo(output.array.pointer(), array.linearSize) { it.toByte() } output } TensorProto.DataType.UINT16 -> { - val output = UShortNDArray(UShortTiledArray(array.shape), array.strides) + val output = (context?.getNDArray(DataType.USHORT, array.strides) ?: UShortNDArray(UShortTiledArray(array.shape), array.strides)) as UShortNDArray array.array.pointer().mapTo(output.array.pointer(), array.linearSize) { it.toUShort() } output } TensorProto.DataType.INT16 -> { - val output = ShortNDArray(ShortTiledArray(array.shape), array.strides) + val output = (context?.getNDArray(DataType.SHORT, array.strides) ?: ShortNDArray(ShortTiledArray(array.shape), array.strides)) as ShortNDArray array.array.pointer().mapTo(output.array.pointer(), array.linearSize) { it.toShort() } output } TensorProto.DataType.INT32 -> { - val output = IntNDArray(IntTiledArray(array.shape), array.strides) + val output = (context?.getNDArray(DataType.INT, array.strides) ?: IntNDArray(IntTiledArray(array.shape), array.strides)) as IntNDArray array.array.pointer().mapTo(output.array.pointer(), array.linearSize) { it.toInt() } output } TensorProto.DataType.INT64 -> { - val output = LongNDArray(LongTiledArray(array.shape), array.strides) + val output = (context?.getNDArray(DataType.LONG, array.strides) ?: LongNDArray(LongTiledArray(array.shape), array.strides)) as LongNDArray array.array.pointer().mapTo(output.array.pointer(), array.linearSize) { it.toLong() } output } TensorProto.DataType.BOOL -> { - val output = BooleanNDArray(BooleanTiledArray(array.shape), array.strides) + val output = (context?.getNDArray(DataType.BOOLEAN, array.strides) ?: BooleanNDArray(BooleanTiledArray(array.shape), array.strides)) as BooleanNDArray array.array.pointer().mapTo(output.array.pointer(), array.linearSize) { it != (0).toUByte() } output } TensorProto.DataType.DOUBLE -> { - val output = DoubleNDArray(DoubleTiledArray(array.shape), array.strides) + val output = (context?.getNDArray(DataType.DOUBLE, array.strides) ?: DoubleNDArray(DoubleTiledArray(array.shape), array.strides)) as DoubleNDArray array.array.pointer().mapTo(output.array.pointer(), array.linearSize) { it.toDouble() } output } TensorProto.DataType.UINT32 -> { - val output = UIntNDArray(UIntTiledArray(array.shape), array.strides) + val output = (context?.getNDArray(DataType.UINT, array.strides) ?: UIntNDArray(UIntTiledArray(array.shape), array.strides)) as UIntNDArray array.array.pointer().mapTo(output.array.pointer(), array.linearSize) { it.toUInt() } output } TensorProto.DataType.UINT64 -> { - val output = ULongNDArray(ULongTiledArray(array.shape), array.strides) + val output = (context?.getNDArray(DataType.ULONG, array.strides) ?: ULongNDArray(ULongTiledArray(array.shape), array.strides)) as ULongNDArray array.array.pointer().mapTo(output.array.pointer(), array.linearSize) { it.toULong() } output } @@ -379,66 +378,65 @@ class CastVer6(name: String, attributes: Map>, inputs: Li } } - - private suspend fun castUShort(array: UShortNDArray, to: TensorProto.DataType): NDArrayCore { + private suspend fun castUShort(array: UShortNDArray, to: TensorProto.DataType, context: ManualAllocatorContext? = null): NDArrayCore { return when (to) { in FLOAT_TENSOR_TYPES -> { - val output = FloatNDArray(FloatTiledArray(array.shape), array.strides) + val output = (context?.getNDArray(DataType.FLOAT, array.strides) ?: FloatNDArray(FloatTiledArray(array.shape), array.strides)) as FloatNDArray array.array.pointer().mapTo(output.array.pointer(), array.linearSize) { it.toFloat() } output } TensorProto.DataType.UINT8 -> { - val output = UByteNDArray(UByteTiledArray(array.shape), array.strides) + val output = (context?.getNDArray(DataType.UBYTE, array.strides) ?: UByteNDArray(UByteTiledArray(array.shape), array.strides)) as UByteNDArray array.array.pointer().mapTo(output.array.pointer(), array.linearSize) { it.toUByte() } output } TensorProto.DataType.INT8 -> { - val output = ByteNDArray(ByteTiledArray(array.shape), array.strides) + val output = (context?.getNDArray(DataType.BYTE, array.strides) ?: ByteNDArray(ByteTiledArray(array.shape), array.strides)) as ByteNDArray array.array.pointer().mapTo(output.array.pointer(), array.linearSize) { it.toByte() } output } TensorProto.DataType.UINT16 -> array TensorProto.DataType.INT16 -> { - val output = ShortNDArray(ShortTiledArray(array.shape), array.strides) + val output = (context?.getNDArray(DataType.SHORT, array.strides) ?: ShortNDArray(ShortTiledArray(array.shape), array.strides)) as ShortNDArray array.array.pointer().mapTo(output.array.pointer(), array.linearSize) { it.toShort() } output } TensorProto.DataType.INT32 -> { - val output = IntNDArray(IntTiledArray(array.shape), array.strides) + val output = (context?.getNDArray(DataType.INT, array.strides) ?: IntNDArray(IntTiledArray(array.shape), array.strides)) as IntNDArray array.array.pointer().mapTo(output.array.pointer(), array.linearSize) { it.toInt() } output } TensorProto.DataType.INT64 -> { - val output = LongNDArray(LongTiledArray(array.shape), array.strides) + val output = (context?.getNDArray(DataType.LONG, array.strides) ?: LongNDArray(LongTiledArray(array.shape), array.strides)) as LongNDArray array.array.pointer().mapTo(output.array.pointer(), array.linearSize) { it.toLong() } output } TensorProto.DataType.BOOL -> { - val output = BooleanNDArray(BooleanTiledArray(array.shape), array.strides) + val output = (context?.getNDArray(DataType.BOOLEAN, array.strides) ?: BooleanNDArray(BooleanTiledArray(array.shape), array.strides)) as BooleanNDArray array.array.pointer().mapTo(output.array.pointer(), array.linearSize) { it != (0).toUShort() } output } TensorProto.DataType.DOUBLE -> { - val output = DoubleNDArray(DoubleTiledArray(array.shape), array.strides) + val output = (context?.getNDArray(DataType.DOUBLE, array.strides) ?: DoubleNDArray(DoubleTiledArray(array.shape), array.strides)) as DoubleNDArray array.array.pointer().mapTo(output.array.pointer(), array.linearSize) { it.toDouble() } output } TensorProto.DataType.UINT32 -> { - val output = UIntNDArray(UIntTiledArray(array.shape), array.strides) + val output = (context?.getNDArray(DataType.UINT, array.strides) ?: UIntNDArray(UIntTiledArray(array.shape), array.strides)) as UIntNDArray array.array.pointer().mapTo(output.array.pointer(), array.linearSize) { it.toUInt() } output } TensorProto.DataType.UINT64 -> { - val output = ULongNDArray(ULongTiledArray(array.shape), array.strides) + val output = (context?.getNDArray(DataType.ULONG, array.strides) ?: ULongNDArray(ULongTiledArray(array.shape), array.strides)) as ULongNDArray array.array.pointer().mapTo(output.array.pointer(), array.linearSize) { it.toULong() } output } @@ -447,66 +445,65 @@ class CastVer6(name: String, attributes: Map>, inputs: Li } } - - private suspend fun castUInt(array: UIntNDArray, to: TensorProto.DataType): NDArrayCore { + private suspend fun castUInt(array: UIntNDArray, to: TensorProto.DataType, context: ManualAllocatorContext? = null): NDArrayCore { return when (to) { in FLOAT_TENSOR_TYPES -> { - val output = FloatNDArray(FloatTiledArray(array.shape), array.strides) + val output = (context?.getNDArray(DataType.FLOAT, array.strides) ?: FloatNDArray(FloatTiledArray(array.shape), array.strides)) as FloatNDArray array.array.pointer().mapTo(output.array.pointer(), array.linearSize) { it.toFloat() } output } TensorProto.DataType.UINT8 -> { - val output = UByteNDArray(UByteTiledArray(array.shape), array.strides) + val output = (context?.getNDArray(DataType.BYTE, array.strides) ?: UByteNDArray(UByteTiledArray(array.shape), array.strides)) as UByteNDArray array.array.pointer().mapTo(output.array.pointer(), array.linearSize) { it.toUByte() } output } TensorProto.DataType.INT8 -> { - val output = ByteNDArray(ByteTiledArray(array.shape), array.strides) + val output = (context?.getNDArray(DataType.BYTE, array.strides) ?: ByteNDArray(ByteTiledArray(array.shape), array.strides)) as ByteNDArray array.array.pointer().mapTo(output.array.pointer(), array.linearSize) { it.toByte() } output } TensorProto.DataType.UINT16 -> { - val output = UShortNDArray(UShortTiledArray(array.shape), array.strides) + val output = (context?.getNDArray(DataType.USHORT, array.strides) ?: UShortNDArray(UShortTiledArray(array.shape), array.strides)) as UShortNDArray array.array.pointer().mapTo(output.array.pointer(), array.linearSize) { it.toUShort() } output } TensorProto.DataType.INT16 -> { - val output = ShortNDArray(ShortTiledArray(array.shape), array.strides) + val output = (context?.getNDArray(DataType.SHORT, array.strides) ?: ShortNDArray(ShortTiledArray(array.shape), array.strides)) as ShortNDArray array.array.pointer().mapTo(output.array.pointer(), array.linearSize) { it.toShort() } output } TensorProto.DataType.INT32 -> { - val output = IntNDArray(IntTiledArray(array.shape), array.strides) + val output = (context?.getNDArray(DataType.INT, array.strides) ?: IntNDArray(IntTiledArray(array.shape), array.strides)) as IntNDArray array.array.pointer().mapTo(output.array.pointer(), array.linearSize) { it.toInt() } output } TensorProto.DataType.INT64 -> { - val output = LongNDArray(LongTiledArray(array.shape), array.strides) + val output = (context?.getNDArray(DataType.LONG, array.strides) ?: LongNDArray(LongTiledArray(array.shape), array.strides)) as LongNDArray array.array.pointer().mapTo(output.array.pointer(), array.linearSize) { it.toLong() } output } TensorProto.DataType.BOOL -> { - val output = BooleanNDArray(BooleanTiledArray(array.shape), array.strides) + val output = (context?.getNDArray(DataType.BOOLEAN, array.strides) ?: BooleanNDArray(BooleanTiledArray(array.shape), array.strides)) as BooleanNDArray array.array.pointer().mapTo(output.array.pointer(), array.linearSize) { it != (0).toUInt() } output } TensorProto.DataType.DOUBLE -> { - val output = DoubleNDArray(DoubleTiledArray(array.shape), array.strides) + val output = (context?.getNDArray(DataType.DOUBLE, array.strides) ?: DoubleNDArray(DoubleTiledArray(array.shape), array.strides)) as DoubleNDArray array.array.pointer().mapTo(output.array.pointer(), array.linearSize) { it.toDouble() } output } TensorProto.DataType.UINT32 -> array TensorProto.DataType.UINT64 -> { - val output = ULongNDArray(ULongTiledArray(array.shape), array.strides) + val output = (context?.getNDArray(DataType.ULONG, array.strides) ?: ULongNDArray(ULongTiledArray(array.shape), array.strides)) as ULongNDArray array.array.pointer().mapTo(output.array.pointer(), array.linearSize) { it.toULong() } output } @@ -515,65 +512,64 @@ class CastVer6(name: String, attributes: Map>, inputs: Li } } - - private suspend fun castULong(array: ULongNDArray, to: TensorProto.DataType): NDArrayCore { + private suspend fun castULong(array: ULongNDArray, to: TensorProto.DataType, context: ManualAllocatorContext? = null): NDArrayCore { return when (to) { in FLOAT_TENSOR_TYPES -> { - val output = FloatNDArray(FloatTiledArray(array.shape), array.strides) + val output = (context?.getNDArray(DataType.FLOAT, array.strides) ?: FloatNDArray(FloatTiledArray(array.shape), array.strides)) as FloatNDArray array.array.pointer().mapTo(output.array.pointer(), array.linearSize) { it.toFloat() } output } TensorProto.DataType.UINT8 -> { - val output = UByteNDArray(UByteTiledArray(array.shape), array.strides) + val output = (context?.getNDArray(DataType.BYTE, array.strides) ?: UByteNDArray(UByteTiledArray(array.shape), array.strides)) as UByteNDArray array.array.pointer().mapTo(output.array.pointer(), array.linearSize) { it.toUByte() } output } TensorProto.DataType.INT8 -> { - val output = ByteNDArray(ByteTiledArray(array.shape), array.strides) + val output = (context?.getNDArray(DataType.BYTE, array.strides) ?: ByteNDArray(ByteTiledArray(array.shape), array.strides)) as ByteNDArray array.array.pointer().mapTo(output.array.pointer(), array.linearSize) { it.toByte() } output } TensorProto.DataType.UINT16 -> { - val output = UShortNDArray(UShortTiledArray(array.shape), array.strides) + val output = (context?.getNDArray(DataType.USHORT, array.strides) ?: UShortNDArray(UShortTiledArray(array.shape), array.strides)) as UShortNDArray array.array.pointer().mapTo(output.array.pointer(), array.linearSize) { it.toUShort() } output } TensorProto.DataType.INT16 -> { - val output = ShortNDArray(ShortTiledArray(array.shape), array.strides) + val output = (context?.getNDArray(DataType.SHORT, array.strides) ?: ShortNDArray(ShortTiledArray(array.shape), array.strides)) as ShortNDArray array.array.pointer().mapTo(output.array.pointer(), array.linearSize) { it.toShort() } output } TensorProto.DataType.INT32 -> { - val output = IntNDArray(IntTiledArray(array.shape), array.strides) + val output = (context?.getNDArray(DataType.INT, array.strides) ?: IntNDArray(IntTiledArray(array.shape), array.strides)) as IntNDArray array.array.pointer().mapTo(output.array.pointer(), array.linearSize) { it.toInt() } output } TensorProto.DataType.INT64 -> { - val output = LongNDArray(LongTiledArray(array.shape), array.strides) + val output = (context?.getNDArray(DataType.LONG, array.strides) ?: LongNDArray(LongTiledArray(array.shape), array.strides)) as LongNDArray array.array.pointer().mapTo(output.array.pointer(), array.linearSize) { it.toLong() } output } TensorProto.DataType.BOOL -> { - val output = BooleanNDArray(BooleanTiledArray(array.shape), array.strides) + val output = (context?.getNDArray(DataType.BOOLEAN, array.strides) ?: BooleanNDArray(BooleanTiledArray(array.shape), array.strides)) as BooleanNDArray array.array.pointer().mapTo(output.array.pointer(), array.linearSize) { it != (0).toULong() } output } TensorProto.DataType.DOUBLE -> { - val output = DoubleNDArray(DoubleTiledArray(array.shape), array.strides) + val output = (context?.getNDArray(DataType.DOUBLE, array.strides) ?: DoubleNDArray(DoubleTiledArray(array.shape), array.strides)) as DoubleNDArray array.array.pointer().mapTo(output.array.pointer(), array.linearSize) { it.toDouble() } output } TensorProto.DataType.UINT32 -> { - val output = UIntNDArray(UIntTiledArray(array.shape), array.strides) + val output = (context?.getNDArray(DataType.UINT, array.strides) ?: UIntNDArray(UIntTiledArray(array.shape), array.strides)) as UIntNDArray array.array.pointer().mapTo(output.array.pointer(), array.linearSize) { it.toUInt() } output } @@ -583,66 +579,65 @@ class CastVer6(name: String, attributes: Map>, inputs: Li } } - - private suspend fun castFloat(array: FloatNDArray, to: TensorProto.DataType): NDArrayCore { + private suspend fun castFloat(array: FloatNDArray, to: TensorProto.DataType, context: ManualAllocatorContext? = null): NDArrayCore { return when (to) { in FLOAT_TENSOR_TYPES -> array TensorProto.DataType.UINT8 -> { - val output = UByteNDArray(UByteTiledArray(array.shape), array.strides) + val output = (context?.getNDArray(DataType.BYTE, array.strides) ?: UByteNDArray(UByteTiledArray(array.shape), array.strides)) as UByteNDArray array.array.pointer().mapTo(output.array.pointer(), array.linearSize) { it.toLong().toUByte() } output } TensorProto.DataType.INT8 -> { - val output = ByteNDArray(ByteTiledArray(array.shape), array.strides) + val output = (context?.getNDArray(DataType.BYTE, array.strides) ?: ByteNDArray(ByteTiledArray(array.shape), array.strides)) as ByteNDArray array.array.pointer().mapTo(output.array.pointer(), array.linearSize) { it.toInt().toByte() } output } TensorProto.DataType.UINT16 -> { - val output = UShortNDArray(UShortTiledArray(array.shape), array.strides) + val output = (context?.getNDArray(DataType.USHORT, array.strides) ?: UShortNDArray(UShortTiledArray(array.shape), array.strides)) as UShortNDArray array.array.pointer().mapTo(output.array.pointer(), array.linearSize) { it.toLong().toUShort() } output } TensorProto.DataType.INT16 -> { - val output = ShortNDArray(ShortTiledArray(array.shape), array.strides) + val output = (context?.getNDArray(DataType.SHORT, array.strides) ?: ShortNDArray(ShortTiledArray(array.shape), array.strides)) as ShortNDArray array.array.pointer().mapTo(output.array.pointer(), array.linearSize) { it.toInt().toShort() } output } TensorProto.DataType.INT32 -> { - val output = IntNDArray(IntTiledArray(array.shape), array.strides) + val output = (context?.getNDArray(DataType.INT, array.strides) ?: IntNDArray(IntTiledArray(array.shape), array.strides)) as IntNDArray array.array.pointer().mapTo(output.array.pointer(), array.linearSize) { it.toInt() } output } TensorProto.DataType.INT64 -> { - val output = LongNDArray(LongTiledArray(array.shape), array.strides) + val output = (context?.getNDArray(DataType.LONG, array.strides) ?: LongNDArray(LongTiledArray(array.shape), array.strides)) as LongNDArray array.array.pointer().mapTo(output.array.pointer(), array.linearSize) { it.toLong() } output } TensorProto.DataType.BOOL -> { - val output = BooleanNDArray(BooleanTiledArray(array.shape), array.strides) + val output = (context?.getNDArray(DataType.BOOLEAN, array.strides) ?: BooleanNDArray(BooleanTiledArray(array.shape), array.strides)) as BooleanNDArray array.array.pointer().mapTo(output.array.pointer(), array.linearSize) { it != 0f } output } TensorProto.DataType.DOUBLE -> { - val output = DoubleNDArray(DoubleTiledArray(array.shape), array.strides) + val output = (context?.getNDArray(DataType.DOUBLE, array.strides) ?: DoubleNDArray(DoubleTiledArray(array.shape), array.strides)) as DoubleNDArray array.array.pointer().mapTo(output.array.pointer(), array.linearSize) { it.toDouble() } output } TensorProto.DataType.UINT32 -> { - val output = UIntNDArray(UIntTiledArray(array.shape), array.strides) + val output = (context?.getNDArray(DataType.UINT, array.strides) ?: UIntNDArray(UIntTiledArray(array.shape), array.strides)) as UIntNDArray array.array.pointer().mapTo(output.array.pointer(), array.linearSize) { it.toUInt() } output } TensorProto.DataType.UINT64 -> { - val output = ULongNDArray(ULongTiledArray(array.shape), array.strides) + val output = (context?.getNDArray(DataType.ULONG, array.strides) ?: ULongNDArray(ULongTiledArray(array.shape), array.strides)) as ULongNDArray array.array.pointer().mapTo(output.array.pointer(), array.linearSize) { it.toULong() } output } @@ -651,66 +646,65 @@ class CastVer6(name: String, attributes: Map>, inputs: Li } } - - private suspend fun castDouble(array: DoubleNDArray, to: TensorProto.DataType): NDArrayCore { + private suspend fun castDouble(array: DoubleNDArray, to: TensorProto.DataType, context: ManualAllocatorContext? = null): NDArrayCore { return when (to) { in FLOAT_TENSOR_TYPES -> { - val output = FloatNDArray(FloatTiledArray(array.shape), array.strides) + val output = (context?.getNDArray(DataType.FLOAT, array.strides) ?: FloatNDArray(FloatTiledArray(array.shape), array.strides)) as FloatNDArray array.array.pointer().mapTo(output.array.pointer(), array.linearSize) { it.toFloat() } output } TensorProto.DataType.UINT8 -> { - val output = UByteNDArray(UByteTiledArray(array.shape), array.strides) + val output = (context?.getNDArray(DataType.BYTE, array.strides) ?: UByteNDArray(UByteTiledArray(array.shape), array.strides)) as UByteNDArray array.array.pointer().mapTo(output.array.pointer(), array.linearSize) { it.toLong().toUByte() } output } TensorProto.DataType.INT8 -> { - val output = ByteNDArray(ByteTiledArray(array.shape), array.strides) + val output = (context?.getNDArray(DataType.BYTE, array.strides) ?: ByteNDArray(ByteTiledArray(array.shape), array.strides)) as ByteNDArray array.array.pointer().mapTo(output.array.pointer(), array.linearSize) { it.toInt().toByte() } output } TensorProto.DataType.UINT16 -> { - val output = UShortNDArray(UShortTiledArray(array.shape), array.strides) + val output = (context?.getNDArray(DataType.USHORT, array.strides) ?: UShortNDArray(UShortTiledArray(array.shape), array.strides)) as UShortNDArray array.array.pointer().mapTo(output.array.pointer(), array.linearSize) { it.toLong().toUShort() } output } TensorProto.DataType.INT16 -> { - val output = ShortNDArray(ShortTiledArray(array.shape), array.strides) + val output = (context?.getNDArray(DataType.SHORT, array.strides) ?: ShortNDArray(ShortTiledArray(array.shape), array.strides)) as ShortNDArray array.array.pointer().mapTo(output.array.pointer(), array.linearSize) { it.toInt().toShort() } output } TensorProto.DataType.INT32 -> { - val output = IntNDArray(IntTiledArray(array.shape), array.strides) + val output = (context?.getNDArray(DataType.INT, array.strides) ?: IntNDArray(IntTiledArray(array.shape), array.strides)) as IntNDArray array.array.pointer().mapTo(output.array.pointer(), array.linearSize) { it.toInt() } output } TensorProto.DataType.INT64 -> { - val output = LongNDArray(LongTiledArray(array.shape), array.strides) + val output = (context?.getNDArray(DataType.LONG, array.strides) ?: LongNDArray(LongTiledArray(array.shape), array.strides)) as LongNDArray array.array.pointer().mapTo(output.array.pointer(), array.linearSize) { it.toLong() } output } TensorProto.DataType.BOOL -> { - val output = BooleanNDArray(BooleanTiledArray(array.shape), array.strides) + val output = (context?.getNDArray(DataType.BOOLEAN, array.strides) ?: BooleanNDArray(BooleanTiledArray(array.shape), array.strides)) as BooleanNDArray array.array.pointer().mapTo(output.array.pointer(), array.linearSize) { it != 0.0 } output } TensorProto.DataType.DOUBLE -> array TensorProto.DataType.UINT32 -> { - val output = UIntNDArray(UIntTiledArray(array.shape), array.strides) + val output = (context?.getNDArray(DataType.UINT, array.strides) ?: UIntNDArray(UIntTiledArray(array.shape), array.strides)) as UIntNDArray array.array.pointer().mapTo(output.array.pointer(), array.linearSize) { it.toUInt() } output } TensorProto.DataType.UINT64 -> { - val output = ULongNDArray(ULongTiledArray(array.shape), array.strides) + val output = (context?.getNDArray(DataType.ULONG, array.strides) ?: ULongNDArray(ULongTiledArray(array.shape), array.strides)) as ULongNDArray array.array.pointer().mapTo(output.array.pointer(), array.linearSize) { it.toULong() } output } @@ -719,66 +713,65 @@ class CastVer6(name: String, attributes: Map>, inputs: Li } } - - private suspend fun castBoolean(array: BooleanNDArray, to: TensorProto.DataType): NDArrayCore { + private suspend fun castBoolean(array: BooleanNDArray, to: TensorProto.DataType, context: ManualAllocatorContext? = null): NDArrayCore { return when (to) { in FLOAT_TENSOR_TYPES -> { - val output = FloatNDArray(FloatTiledArray(array.shape), array.strides) + val output = (context?.getNDArray(DataType.FLOAT, array.strides) ?: FloatNDArray(FloatTiledArray(array.shape), array.strides)) as FloatNDArray array.array.pointer().mapTo(output.array.pointer(), array.linearSize) { if (it) 1f else 0f } output } TensorProto.DataType.UINT8 -> { - val output = UByteNDArray(UByteTiledArray(array.shape), array.strides) + val output = (context?.getNDArray(DataType.BYTE, array.strides) ?: UByteNDArray(UByteTiledArray(array.shape), array.strides)) as UByteNDArray array.array.pointer().mapTo(output.array.pointer(), array.linearSize) { if (it) (1).toUByte() else (0).toUByte() } output } TensorProto.DataType.INT8 -> { - val output = ByteNDArray(ByteTiledArray(array.shape), array.strides) + val output = (context?.getNDArray(DataType.BYTE, array.strides) ?: ByteNDArray(ByteTiledArray(array.shape), array.strides)) as ByteNDArray array.array.pointer().mapTo(output.array.pointer(), array.linearSize) { if (it) (1).toByte() else (0).toByte() } output } TensorProto.DataType.UINT16 -> { - val output = UShortNDArray(UShortTiledArray(array.shape), array.strides) + val output = (context?.getNDArray(DataType.USHORT, array.strides) ?: UShortNDArray(UShortTiledArray(array.shape), array.strides)) as UShortNDArray array.array.pointer().mapTo(output.array.pointer(), array.linearSize) { if (it) (1).toUShort() else (0).toUShort() } output } TensorProto.DataType.INT16 -> { - val output = ShortNDArray(ShortTiledArray(array.shape), array.strides) + val output = (context?.getNDArray(DataType.SHORT, array.strides) ?: ShortNDArray(ShortTiledArray(array.shape), array.strides)) as ShortNDArray array.array.pointer().mapTo(output.array.pointer(), array.linearSize) { if (it) (1).toShort() else (0).toShort() } output } TensorProto.DataType.INT32 -> { - val output = IntNDArray(IntTiledArray(array.shape), array.strides) + val output = (context?.getNDArray(DataType.INT, array.strides) ?: IntNDArray(IntTiledArray(array.shape), array.strides)) as IntNDArray array.array.pointer().mapTo(output.array.pointer(), array.linearSize) { if (it) 1 else 0 } output } TensorProto.DataType.INT64 -> { - val output = LongNDArray(LongTiledArray(array.shape), array.strides) + val output = (context?.getNDArray(DataType.LONG, array.strides) ?: LongNDArray(LongTiledArray(array.shape), array.strides)) as LongNDArray array.array.pointer().mapTo(output.array.pointer(), array.linearSize) { if (it) 1L else 0L } output } TensorProto.DataType.BOOL -> array TensorProto.DataType.DOUBLE -> { - val output = DoubleNDArray(DoubleTiledArray(array.shape), array.strides) + val output = (context?.getNDArray(DataType.DOUBLE, array.strides) ?: DoubleNDArray(DoubleTiledArray(array.shape), array.strides)) as DoubleNDArray array.array.pointer().mapTo(output.array.pointer(), array.linearSize) { if (it) 1.0 else 0.0 } output } TensorProto.DataType.UINT32 -> { - val output = UIntNDArray(UIntTiledArray(array.shape), array.strides) + val output = (context?.getNDArray(DataType.UINT, array.strides) ?: UIntNDArray(UIntTiledArray(array.shape), array.strides)) as UIntNDArray array.array.pointer().mapTo(output.array.pointer(), array.linearSize) { if (it) (1).toUInt() else (0).toUInt() } output } TensorProto.DataType.UINT64 -> { - val output = ULongNDArray(ULongTiledArray(array.shape), array.strides) + val output = (context?.getNDArray(DataType.ULONG, array.strides) ?: ULongNDArray(ULongTiledArray(array.shape), array.strides)) as ULongNDArray array.array.pointer().mapTo(output.array.pointer(), array.linearSize) { if (it) (1).toULong() else (0).toULong() } output } @@ -787,19 +780,19 @@ class CastVer6(name: String, attributes: Map>, inputs: Li } } - internal suspend fun castTo(input: NDArrayCore, to: TensorProto.DataType): NDArrayCore { + internal suspend fun castTo(input: NDArrayCore, to: TensorProto.DataType, context: ManualAllocatorContext? = null): NDArrayCore { return when (input.type) { - DataType.BYTE -> castByte(input as ByteNDArray, to) - DataType.SHORT -> castShort(input as ShortNDArray, to) - DataType.INT -> castInt(input as IntNDArray, to) - DataType.LONG -> castLong(input as LongNDArray, to) - DataType.UBYTE -> castUByte(input as UByteNDArray, to) - DataType.USHORT -> castUShort(input as UShortNDArray, to) - DataType.UINT -> castUInt(input as UIntNDArray, to) - DataType.ULONG -> castULong(input as ULongNDArray, to) - DataType.FLOAT -> castFloat(input as FloatNDArray, to) - DataType.DOUBLE -> castDouble(input as DoubleNDArray, to) - DataType.BOOLEAN -> castBoolean(input as BooleanNDArray, to) + DataType.BYTE -> castByte(input as ByteNDArray, to, context) + DataType.SHORT -> castShort(input as ShortNDArray, to, context) + DataType.INT -> castInt(input as IntNDArray, to, context) + DataType.LONG -> castLong(input as LongNDArray, to, context) + DataType.UBYTE -> castUByte(input as UByteNDArray, to, context) + DataType.USHORT -> castUShort(input as UShortNDArray, to, context) + DataType.UINT -> castUInt(input as UIntNDArray, to, context) + DataType.ULONG -> castULong(input as ULongNDArray, to, context) + DataType.FLOAT -> castFloat(input as FloatNDArray, to, context) + DataType.DOUBLE -> castDouble(input as DoubleNDArray, to, context) + DataType.BOOLEAN -> castBoolean(input as BooleanNDArray, to, context) else -> throw IllegalStateException("Unsupported type ${input.type}") } } @@ -808,11 +801,13 @@ class CastVer6(name: String, attributes: Map>, inputs: Li private val toType: Int by attribute("to") { it: Number -> it.toInt() } override suspend fun > apply(contexts: Contexts, inputs: List): List { + val manualContext = coroutineContext[ManualAllocatorContext] + val tensor = inputs.first()!! val to = TensorProto.DataType.fromValue(toType)!! - val casted = castTo(tensor.data, to) + val casted = castTo(tensor.data, to, manualContext) - return listOf(casted.asTensor("output")) + return listOf(casted.asTensor("output", manualContext)) } } diff --git a/inference/inference-core/src/jvmTest/kotlin/io/kinference/models/bert/BERTTest.kt b/inference/inference-core/src/jvmTest/kotlin/io/kinference/models/bert/BERTTest.kt index 30ff16b83..bed95862c 100644 --- a/inference/inference-core/src/jvmTest/kotlin/io/kinference/models/bert/BERTTest.kt +++ b/inference/inference-core/src/jvmTest/kotlin/io/kinference/models/bert/BERTTest.kt @@ -15,6 +15,6 @@ class BERTTest { @Test fun benchmark_test_vanilla_bert_performance() = TestRunner.runTest { - KIPerformanceRunner.runFromS3("bert:standard:en:v1", count = 3) + KIPerformanceRunner.runFromS3("bert:standard:en:v1", count = 20) } } diff --git a/ndarray/ndarray-api/src/commonMain/kotlin/io/kinference/ndarray/arrays/ArrayDispatcherUtils.kt b/ndarray/ndarray-api/src/commonMain/kotlin/io/kinference/ndarray/arrays/ArrayDispatcherUtils.kt deleted file mode 100644 index bea90d149..000000000 --- a/ndarray/ndarray-api/src/commonMain/kotlin/io/kinference/ndarray/arrays/ArrayDispatcherUtils.kt +++ /dev/null @@ -1,21 +0,0 @@ -package io.kinference.ndarray.arrays - -typealias StateMarker = () -> Unit - -enum class ArrayTypes(val index: Int, val size: Int) { - ByteArray(0, Byte.SIZE_BYTES), - UByteArray(1, UByte.SIZE_BYTES), - ShortArray(2, Short.SIZE_BYTES), - UShortArray(3, UShort.SIZE_BYTES), - IntArray(4, Int.SIZE_BYTES), - UIntArray(5, UInt.SIZE_BYTES), - LongArray(6, Long.SIZE_BYTES), - ULongArray(7, ULong.SIZE_BYTES), - FloatArray(8, Float.SIZE_BYTES), - DoubleArray(9, Double.SIZE_BYTES), - BooleanArray(10, 1); -} - -interface MemoryControlledArray { - fun markOutput() -} diff --git a/ndarray/ndarray-core/build.gradle.kts b/ndarray/ndarray-core/build.gradle.kts index 4b88ac6ed..8aa7ddf85 100644 --- a/ndarray/ndarray-core/build.gradle.kts +++ b/ndarray/ndarray-core/build.gradle.kts @@ -19,6 +19,7 @@ kotlin { implementation(libs.kotlinx.coroutines.core) implementation(libs.kotlinx.atomicfu) api(libs.apache.commons.math4.core) + api(libs.fastutil.core) } } } diff --git a/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/Utils.kt b/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/Utils.kt index c1af61364..546857006 100644 --- a/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/Utils.kt +++ b/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/Utils.kt @@ -73,18 +73,25 @@ const val ERF_COEF_3 = 1.421413741 const val ERF_COEF_4 = -1.453152027 const val ERF_COEF_5 = 1.061405429 +const val INIT_STORAGE_SIZE = 64 + internal fun IntArray.swap(leftIdx: Int, rightIdx: Int) { val temp = get(leftIdx) this[leftIdx] = this[rightIdx] this[rightIdx] = temp } + +fun interface ParallelizeBody { + operator fun invoke(start: Int, end: Int, coroutineIndex: Int) +} + /* * Parallelize with batching by minDataPerLaunch */ suspend fun parallelizeByBlocks(blockSize: Int, countBlocks: Int, minDataPerLaunch: Int, - body: (blockStart: Int, blockEnd: Int, coroutineIndex: Int) -> Unit) { + body: ParallelizeBody) { val batchSize = batchSizeByData(blockSize, countBlocks, minDataPerLaunch) @@ -101,7 +108,7 @@ suspend fun parallelizeByBlocks(blockSize: Int, } } -suspend inline fun parallelizeByRows(rowSize: Int, countRows: Int, minDataPerLaunch: Int, noinline body: (rowStart: Int, rowEnd: Int, index: Int) -> Unit) = parallelizeByBlocks(rowSize, countRows, minDataPerLaunch, body) +suspend inline fun parallelizeByRows(rowSize: Int, countRows: Int, minDataPerLaunch: Int, body: ParallelizeBody) = parallelizeByBlocks(rowSize, countRows, minDataPerLaunch, body) internal fun countCoroutinesByData(rowSize: Int, countRows: Int, minDataPerLaunch: Int): Int { val batchSize = batchSizeByData(rowSize, countRows, minDataPerLaunch) diff --git a/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/BooleanNDArray.kt b/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/BooleanNDArray.kt index 5a4e758dc..3037028d3 100644 --- a/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/BooleanNDArray.kt +++ b/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/BooleanNDArray.kt @@ -21,7 +21,7 @@ fun interface BooleanBinaryOperation { operator fun invoke(first: Boolean, second: Boolean): Boolean } -open class BooleanNDArray(var array: BooleanTiledArray, strides: Strides) : NDArrayCore, MemoryControlledArray { +open class BooleanNDArray(var array: BooleanTiledArray, strides: Strides) : NDArrayCore { override val type: DataType = DataType.BOOLEAN final override var strides: Strides = strides @@ -79,10 +79,6 @@ open class BooleanNDArray(var array: BooleanTiledArray, strides: Strides) : NDAr return array.blocks[0][0] } - override fun markOutput() { - array.marker.forEach { it.invoke() } - } - override suspend fun toMutable(): MutableBooleanNDArray { return MutableBooleanNDArray(array.copyOf(), strides) } diff --git a/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/PrimitiveNDArray.kt b/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/PrimitiveNDArray.kt index 0b391f275..f1bd91b44 100644 --- a/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/PrimitiveNDArray.kt +++ b/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/PrimitiveNDArray.kt @@ -30,7 +30,7 @@ import kotlin.math.* @GenerateNameFromPrimitives @MakePublic -internal open class PrimitiveNDArray(array: PrimitiveTiledArray, strides: Strides) : NumberNDArrayCore, MemoryControlledArray { +internal open class PrimitiveNDArray(array: PrimitiveTiledArray, strides: Strides) : NumberNDArrayCore { var array: PrimitiveTiledArray = array protected set @@ -85,10 +85,6 @@ internal open class PrimitiveNDArray(array: PrimitiveTiledArray, strides: Stride return array.blocks[0][0] } - override fun markOutput() { - array.marker.forEach { it.invoke() } - } - override suspend fun clone(): PrimitiveNDArray { return PrimitiveNDArray(array.copyOf(), Strides(shape)) } diff --git a/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/AllocatorContext.kt b/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/AllocatorContext.kt deleted file mode 100644 index 2ed73f878..000000000 --- a/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/AllocatorContext.kt +++ /dev/null @@ -1,35 +0,0 @@ -package io.kinference.ndarray.arrays.memory - -import io.kinference.ndarray.arrays.* -import kotlin.coroutines.CoroutineContext - -data class AllocatorContext internal constructor( - private val unusedContainers: ArrayStorage, - private val limiter: MemoryLimiter, - private val returnStorageFn: (ArrayStorage) -> Unit -) : CoroutineContext.Element { - private val usedContainers: ArrayDeque = ArrayDeque() - - companion object Key : CoroutineContext.Key - override val key: CoroutineContext.Key<*> get() = Key - - internal fun getArrayContainers(type: ArrayTypes, size: Int, count: Int): Array { - return if (limiter !is NoAllocatorMemoryLimiter) { - val result = Array(count) { unusedContainers.getArrayContainer(type, size) } - usedContainers.addAll(result) - result - } else { - Array(count) { ArrayContainer(type, size) } - } - } - - fun closeAllocated() { - usedContainers.forEach { - if (!it.isOutput && limiter.checkMemoryLimitAndAdd(it.sizeBytes.toLong())) { - unusedContainers[it.arrayTypeIndex, it.arraySizeIndex].addLast(it) - } - } - usedContainers.clear() - returnStorageFn(unusedContainers) - } -} diff --git a/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/ArrayContainer.kt b/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/ArrayContainer.kt deleted file mode 100644 index d39ba62ba..000000000 --- a/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/ArrayContainer.kt +++ /dev/null @@ -1,55 +0,0 @@ -package io.kinference.ndarray.arrays.memory - -import io.kinference.ndarray.arrays.* - -sealed class ArrayContainer( - val arrayTypeIndex: Int, - val arraySizeIndex: Int, - val sizeBytes: Int -) { - var isOutput: Boolean = false - private set - - val markAsOutput = { - isOutput = true - } - - companion object { - private const val EMPTY_INDEX = -1 - - operator fun invoke(type: ArrayTypes, size: Int, sizeIndex: Int = EMPTY_INDEX): ArrayContainer { - val sizeBytes: Int = type.size * size - return when (type) { - ArrayTypes.ByteArray -> ByteArrayContainer(type.index, sizeIndex, sizeBytes, ByteArray(size)) // 8-bit signed - ArrayTypes.UByteArray -> UByteArrayContainer(type.index, sizeIndex, sizeBytes, UByteArray(size)) // 8-bit unsigned - ArrayTypes.ShortArray -> ShortArrayContainer(type.index, sizeIndex, sizeBytes, ShortArray(size)) // 16-bit signed - ArrayTypes.UShortArray -> UShortArrayContainer(type.index, sizeIndex, sizeBytes, UShortArray(size)) // 16-bit unsigned - ArrayTypes.IntArray -> IntArrayContainer(type.index, sizeIndex, sizeBytes, IntArray(size)) // 32-bit signed - ArrayTypes.UIntArray -> UIntArrayContainer(type.index, sizeIndex, sizeBytes, UIntArray(size)) // 32-bit unsigned - ArrayTypes.LongArray -> LongArrayContainer(type.index, sizeIndex, sizeBytes, LongArray(size)) // 64-bit signed - ArrayTypes.ULongArray -> ULongArrayContainer(type.index, sizeIndex, sizeBytes, ULongArray(size)) // 64-bit unsigned - ArrayTypes.FloatArray -> FloatArrayContainer(type.index, sizeIndex, sizeBytes, FloatArray(size)) - ArrayTypes.DoubleArray -> DoubleArrayContainer(type.index, sizeIndex, sizeBytes, DoubleArray(size)) - ArrayTypes.BooleanArray -> BooleanArrayContainer(type.index, sizeIndex, sizeBytes, BooleanArray(size)) - else -> throw IllegalArgumentException("Unsupported array type") - } - } - - fun resetArray(arrayContainer: ArrayContainer) { - when (arrayContainer) { - is ByteArrayContainer -> arrayContainer.array.fill(0) // 8-bit signed - is UByteArrayContainer -> arrayContainer.array.fill(0u) // 8-bit unsigned - is ShortArrayContainer -> arrayContainer.array.fill(0) // 16-bit signed - is UShortArrayContainer -> arrayContainer.array.fill(0u) // 16-bit unsigned - is IntArrayContainer -> arrayContainer.array.fill(0) // 32-bit signed - is UIntArrayContainer -> arrayContainer.array.fill(0u) // 32-bit unsigned - is LongArrayContainer -> arrayContainer.array.fill(0L) // 64-bit signed - is ULongArrayContainer -> arrayContainer.array.fill(0U) // 64-bit unsigned - is FloatArrayContainer -> arrayContainer.array.fill(0.0f) - is DoubleArrayContainer -> arrayContainer.array.fill(0.0) - is BooleanArrayContainer -> arrayContainer.array.fill(false) - else -> throw IllegalArgumentException("Unsupported array type") - } - } - } -} diff --git a/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/ArrayStorage.kt b/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/ArrayStorage.kt deleted file mode 100644 index 00a98c0cb..000000000 --- a/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/ArrayStorage.kt +++ /dev/null @@ -1,59 +0,0 @@ -package io.kinference.ndarray.arrays.memory - -import io.kinference.ndarray.arrays.ArrayTypes - -internal class ArrayStorage(typeLength: Int, sizeLength: Int, private val limiter: MemoryLimiter) { - /** - * Structure is as follows: - * 1. Array by predefined types (all types are known compiled time) - * 2. Array by size. Starting with 'INIT_SIZE_VALUE' element and grow it doubling (typically there are no more than 16 different sizes) - * 3. Queue of array containers (used as FIFO) - */ - private var storage: Array>> = - Array(typeLength) { Array(sizeLength) { ArrayDeque() } } - - private var sizeIndices: IntArray = IntArray(typeLength) - private var sizes: Array = Array(typeLength) { IntArray(sizeLength) } - - - operator fun get(typeIndex: Int, sizeIndex: Int): ArrayDeque { - return storage[typeIndex][sizeIndex] - } - - fun getArrayContainer(type: ArrayTypes, size: Int): ArrayContainer { - val tIndex = type.index - val sIndex = sizes[tIndex].indexOf(size) - - // Checking that we have this array size in our storage for this type - val idx = if (sIndex != -1) { - val array = storage[tIndex][sIndex].removeFirstOrNull() - array?.let { - ArrayContainer.resetArray(it) - limiter.deductMemory(it.sizeBytes.toLong()) - return it - } - sIndex - } else { - if (sizeIndices[tIndex] >= storage[tIndex].size) - grow(tIndex) - - val idx = sizeIndices[tIndex]++ - sizes[tIndex][idx] = size - idx - } - - return ArrayContainer(type, size, idx) - } - - private fun grow(typeIndex: Int) { - val newSize = sizes[typeIndex].size * 2 - val newStorage: Array> = Array(newSize) { ArrayDeque() } - - for (i in storage[typeIndex].indices) { - newStorage[i] = storage[typeIndex][i] - } - - storage[typeIndex] = newStorage - sizes[typeIndex] = sizes[typeIndex].copyOf(newSize) - } -} diff --git a/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/MemoryLimiter.kt b/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/MemoryLimiter.kt deleted file mode 100644 index 775c0a895..000000000 --- a/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/MemoryLimiter.kt +++ /dev/null @@ -1,57 +0,0 @@ -package io.kinference.ndarray.arrays.memory - -import io.kinference.utils.PlatformUtils -import kotlinx.atomicfu.AtomicLong -import kotlinx.atomicfu.atomic - -interface MemoryLimiter { - /** - * Checks if the memory limit allows adding the specified amount of memory and performs the addition. - * - * @param added the memory in bytes to add - * @return true if the memory was added successfully and false if adding the memory exceeds the memory limit - */ - fun checkMemoryLimitAndAdd(added: Long): Boolean - - /** - * Deducts the specified amount of memory from the memory limiter. - * - * @param deducted the memory in bytes to deduct from the memory limiter - */ - fun deductMemory(deducted: Long) -} - -class BaseMemoryLimiter(private val memoryLimit: Long) : MemoryLimiter { - private var usedMemory: AtomicLong = atomic(0L) - - override fun checkMemoryLimitAndAdd(added: Long): Boolean { - val currentMemory = usedMemory.addAndGet(added) - return if (currentMemory > memoryLimit) { - usedMemory.addAndGet(-added) - false - } else true - } - - override fun deductMemory(deducted: Long) { - usedMemory.addAndGet(-deducted) - } -} - -object MemoryLimiters { - val Default: MemoryLimiter = BaseMemoryLimiter((PlatformUtils.maxHeap * 0.3).toLong()) - val NoAllocator: MemoryLimiter = NoAllocatorMemoryLimiter - - fun customLimiter(memoryLimit: Long): MemoryLimiter { - return BaseMemoryLimiter(memoryLimit) - } -} - -internal object NoAllocatorMemoryLimiter : MemoryLimiter { - override fun checkMemoryLimitAndAdd(added: Long): Boolean { - return false - } - - override fun deductMemory(deducted: Long) { - - } -} diff --git a/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/MemoryManager.kt b/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/MemoryManager.kt new file mode 100644 index 000000000..cef20b3af --- /dev/null +++ b/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/MemoryManager.kt @@ -0,0 +1,75 @@ +package io.kinference.ndarray.arrays.memory + +import kotlinx.atomicfu.* +import kotlinx.coroutines.* + +internal class MemoryManager internal constructor(private val memoryLimit: Long, private val cacheClearingInterval: Long, private val onCacheClear: () -> Unit) { + private var usedMemory: AtomicLong = atomic(0L) + private val lastAccessTime = atomic(System.currentTimeMillis()) + private val monitorJob: AtomicRef = atomic(initial = null) + private val isFinalized = atomic(initial = false) + + /** + * Checks if the memory limit allows adding the specified amount of memory and performs the addition + * + * @param sizeInBytes is the checking size of an array in bytes + * @return true if the memory was added successfully and false if adding the memory exceeds the memory limit + */ + fun checkMemoryLimitAndAdd(sizeInBytes: Long): Boolean { + // Attempt to add memory and check the limit + val successful = usedMemory.getAndUpdate { current -> + if (current + sizeInBytes > memoryLimit) current else current + sizeInBytes + } != usedMemory.value // Check if the update was successful + + return successful + } + + /** + * Resets the used memory into 0L + */ + fun resetLimit() { + usedMemory.value = 0L + } + + /** + * Updates the last access time to the current system time and starts a monitoring coroutine if it isn't already running. + * + * This function sets the `lastAccessTime` to the current system time in milliseconds. + * It also initiates a monitoring coroutine to periodically check + * if the time since the last access exceeds a predefined `cacheClearingInterval`. + * If it does, the `onCacheClear` function is triggered to handle + * any necessary cache clearing. + * The coroutine will run only if it is not already running and `isFinalized` is false. + */ + fun updateLastAccessTime() { + lastAccessTime.value = System.currentTimeMillis() + + // Start monitoring if not already started + if (monitorJob.compareAndSet(expect = null, update = null) && !isFinalized.value) { + val newJob = CoroutineScope(Dispatchers.Default).launch { + while (isActive) { + delay(cacheClearingInterval) + if (System.currentTimeMillis() - lastAccessTime.value > cacheClearingInterval) { + onCacheClear() + } + } + } + if (!monitorJob.compareAndSet(expect = null, newJob)) { + newJob.cancel() // Cancel if another thread set the job + } + } + } + + /** + * Stops the monitoring process by canceling the active monitoring coroutine. + * + * This function sets the `isFinalized` flag to true, indicating that the monitoring process has been + * concluded. + * If a monitoring coroutine is currently active, it will be canceled. + */ + fun stopMonitoring() { + if (isFinalized.compareAndSet(expect = false, update = true)) { + monitorJob.getAndSet(value = null)?.cancel() + } + } +} diff --git a/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/ModelArrayStorage.kt b/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/ModelArrayStorage.kt deleted file mode 100644 index c9247d060..000000000 --- a/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/ModelArrayStorage.kt +++ /dev/null @@ -1,34 +0,0 @@ -package io.kinference.ndarray.arrays.memory - -import io.kinference.ndarray.arrays.ArrayTypes -import io.kinference.utils.Closeable -import java.util.concurrent.ConcurrentLinkedQueue - -class ModelArrayStorage(private val limiter: MemoryLimiter = MemoryLimiters.NoAllocator) : Closeable { - private val unusedArrays: ConcurrentLinkedQueue = ConcurrentLinkedQueue() - - companion object { - private const val INIT_SIZE_VALUE: Int = 2 - private val typeSize: Int = ArrayTypes.values().size - } - - fun createAllocatorContext(): AllocatorContext { - return AllocatorContext(getStorage(), limiter, ::returnStorage) - } - - fun clearCache() { - unusedArrays.clear() - } - - override suspend fun close() { - clearCache() - } - - private fun getStorage(): ArrayStorage { - return unusedArrays.poll() ?: ArrayStorage(typeSize, INIT_SIZE_VALUE, limiter) - } - - private fun returnStorage(storage: ArrayStorage) { - unusedArrays.offer(storage) - } -} diff --git a/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/PredictionContextDispatcher.kt b/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/PredictionContextDispatcher.kt new file mode 100644 index 000000000..801e5c66b --- /dev/null +++ b/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/PredictionContextDispatcher.kt @@ -0,0 +1,63 @@ +package io.kinference.ndarray.arrays.memory + +import io.kinference.ndarray.arrays.memory.contexts.* +import io.kinference.ndarray.arrays.memory.storage.* +import io.kinference.utils.* +import kotlinx.coroutines.* +import java.util.concurrent.ConcurrentLinkedQueue + +class PredictionContextDispatcher(private val predictionConfig: PredictionConfig) : Closeable { + private val limiter: MemoryManager = MemoryManager( + memoryLimit = predictionConfig.memoryThreshold, + cacheClearingInterval = predictionConfig.memoryClearingInterval, + onCacheClear = ::clearCache) + + private val contextQueue: ConcurrentLinkedQueue = ConcurrentLinkedQueue() + val allocationMode + get() = predictionConfig.allocationMode + + fun getPredictionContext(): PredictionContext { + val allocatorContext = when (predictionConfig.allocationMode) { + AllocationMode.NoAllocation -> getNoAllocatorContext() + AllocationMode.Manual -> getManualAllocatorContext() + AllocationMode.Auto -> getAutoAllocatorContext() + } + return allocatorContext + } + + private fun getNoAllocatorContext(): PredictionContext { + return contextQueue.poll() ?: (NoAllocatorContext(getDispatcher())) + } + + private fun getAutoAllocatorContext(): PredictionContext { + limiter.updateLastAccessTime() + return contextQueue.poll() ?: (AutoAllocatorContext(getDispatcher(), AutoArrayHandlingStorage(limiter))) + } + + private fun getManualAllocatorContext(): PredictionContext { + limiter.updateLastAccessTime() + return contextQueue.poll() ?: (ManualAllocatorContext(getDispatcher(), ManualArrayHandlingStorage(limiter))) + } + + @OptIn(ExperimentalCoroutinesApi::class) + private fun getDispatcher(): CoroutineDispatcher { + return Dispatchers.Default.limitedParallelism(predictionConfig.parallelismLimit) + } + + fun clearCache() { + limiter.stopMonitoring() + contextQueue.clear() + limiter.resetLimit() + } + + override suspend fun close() { + clearCache() + } + + fun returnStorage(context: PredictionContext) { + if (context is AllocatorContext<*>) { + context.finalizeContext() + } + contextQueue.offer(context) + } +} diff --git a/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/PrimitiveArrayContainer.kt b/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/PrimitiveArrayContainer.kt deleted file mode 100644 index 8818345fe..000000000 --- a/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/PrimitiveArrayContainer.kt +++ /dev/null @@ -1,17 +0,0 @@ -@file:GeneratePrimitives(DataType.ALL) -@file:Suppress("DuplicatedCode") - -package io.kinference.ndarray.arrays.memory - -import io.kinference.primitives.annotations.GenerateNameFromPrimitives -import io.kinference.primitives.annotations.GeneratePrimitives -import io.kinference.primitives.types.DataType -import io.kinference.primitives.types.PrimitiveArray - -@GenerateNameFromPrimitives -internal class PrimitiveArrayContainer( - arrayTypeIndex: Int, - arraySizeIndex: Int, - sizeBytes: Int, - val array: PrimitiveArray -) : ArrayContainer(arrayTypeIndex, arraySizeIndex, sizeBytes) diff --git a/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/contexts/AutoAllocatorContext.kt b/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/contexts/AutoAllocatorContext.kt new file mode 100644 index 000000000..9af632e6b --- /dev/null +++ b/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/contexts/AutoAllocatorContext.kt @@ -0,0 +1,16 @@ +package io.kinference.ndarray.arrays.memory.contexts + +import io.kinference.ndarray.arrays.memory.storage.AutoArrayHandlingStorage +import io.kinference.utils.* +import kotlinx.coroutines.CoroutineDispatcher +import kotlin.coroutines.* + +@OptIn(ExperimentalStdlibApi::class) +internal class AutoAllocatorContext internal constructor( + dispatcher: CoroutineDispatcher, + storage: AutoArrayHandlingStorage, +) : AllocatorContext(dispatcher, storage) { + companion object Key : AbstractCoroutineContextKey, AutoAllocatorContext>( + AllocatorContext.Key, { it as? AutoAllocatorContext } + ) +} diff --git a/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/contexts/ManualAllocatorContext.kt b/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/contexts/ManualAllocatorContext.kt new file mode 100644 index 000000000..5a93917de --- /dev/null +++ b/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/contexts/ManualAllocatorContext.kt @@ -0,0 +1,28 @@ +package io.kinference.ndarray.arrays.memory.contexts + +import io.kinference.ndarray.arrays.* +import io.kinference.ndarray.arrays.memory.storage.ManualArrayHandlingStorage +import io.kinference.ndarray.arrays.memory.storage.ManualStorage +import io.kinference.primitives.types.DataType +import io.kinference.utils.AllocatorContext +import kotlinx.coroutines.CoroutineDispatcher +import kotlin.coroutines.AbstractCoroutineContextKey + +@OptIn(ExperimentalStdlibApi::class) +class ManualAllocatorContext internal constructor( + dispatcher: CoroutineDispatcher, + storage: ManualArrayHandlingStorage, +) : AllocatorContext(dispatcher, storage) { + companion object Key : AbstractCoroutineContextKey, ManualAllocatorContext>( + AllocatorContext.Key, { it as? ManualAllocatorContext } + ) + + + fun getNDArray(dataType: DataType, strides: Strides, fillZeros: Boolean = false): MutableNDArrayCore { + return storage.getNDArray(dataType, strides, fillZeros) + } + + fun returnNDArray(ndArray: NDArrayCore) { + storage.returnNDArray(ndArray) + } +} diff --git a/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/storage/AutoArrayHandlingStorage.kt b/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/storage/AutoArrayHandlingStorage.kt new file mode 100644 index 000000000..b0ffdbbb5 --- /dev/null +++ b/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/storage/AutoArrayHandlingStorage.kt @@ -0,0 +1,29 @@ +package io.kinference.ndarray.arrays.memory.storage + +import io.kinference.ndarray.arrays.memory.* +import io.kinference.utils.ArrayStorage + +internal interface TypedAutoHandlingStorage { + fun moveBlocksIntoUnused() +} + +internal class AutoArrayHandlingStorage(internal val limiter: MemoryManager) : ArrayStorage { + internal val storage: Array = arrayOf( + ByteAutoHandlingArrayStorage(), + ShortAutoHandlingArrayStorage(), + IntAutoHandlingArrayStorage(), + LongAutoHandlingArrayStorage(), + UByteAutoHandlingArrayStorage(), + UShortAutoHandlingArrayStorage(), + UIntAutoHandlingArrayStorage(), + ULongAutoHandlingArrayStorage(), + FloatAutoHandlingArrayStorage(), + DoubleAutoHandlingArrayStorage(), + BooleanAutoHandlingArrayStorage() + ) + + override fun resetState() { + storage.forEach { it.moveBlocksIntoUnused() } + limiter.resetLimit() + } +} diff --git a/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/storage/ManualArrayHandlingStorage.kt b/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/storage/ManualArrayHandlingStorage.kt new file mode 100644 index 000000000..227d25136 --- /dev/null +++ b/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/storage/ManualArrayHandlingStorage.kt @@ -0,0 +1,46 @@ +package io.kinference.ndarray.arrays.memory.storage + +import io.kinference.ndarray.arrays.* +import io.kinference.ndarray.arrays.memory.* +import io.kinference.primitives.types.DataType +import io.kinference.utils.ArrayStorage + +internal interface TypedManualHandlingStorage { + fun getNDArray(strides: Strides, fillZeros: Boolean = false, limiter: MemoryManager): MutableNDArrayCore + fun returnNDArray(ndarray: NDArrayCore) + fun clear() +} + +interface ManualStorage : ArrayStorage { + fun getNDArray(dataType: DataType, strides: Strides, fillZeros: Boolean = false): MutableNDArrayCore + fun returnNDArray(ndArray: NDArrayCore) +} + +internal class ManualArrayHandlingStorage(private val memoryManager: MemoryManager) : ManualStorage { + private val storage: Array = arrayOf( + ByteManualHandlingArrayStorage(), + ShortManualHandlingArrayStorage(), + IntManualHandlingArrayStorage(), + LongManualHandlingArrayStorage(), + UByteManualHandlingArrayStorage(), + UShortManualHandlingArrayStorage(), + UIntManualHandlingArrayStorage(), + ULongManualHandlingArrayStorage(), + FloatManualHandlingArrayStorage(), + DoubleManualHandlingArrayStorage(), + BooleanManualHandlingArrayStorage() + ) + + override fun getNDArray(dataType: DataType, strides: Strides, fillZeros: Boolean): MutableNDArrayCore { + return storage[dataType.ordinal].getNDArray(strides, fillZeros, memoryManager) + } + + override fun returnNDArray(ndArray: NDArrayCore) { + storage[ndArray.type.ordinal].returnNDArray(ndArray) + } + + override fun resetState() { + storage.forEach { it.clear() } + memoryManager.resetLimit() + } +} diff --git a/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/storage/PrimitiveAutoHandlingArrayStorage.kt b/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/storage/PrimitiveAutoHandlingArrayStorage.kt new file mode 100644 index 000000000..4cd5bb663 --- /dev/null +++ b/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/storage/PrimitiveAutoHandlingArrayStorage.kt @@ -0,0 +1,48 @@ +@file:GeneratePrimitives(DataType.ALL) +package io.kinference.ndarray.arrays.memory.storage + +import io.kinference.ndarray.INIT_STORAGE_SIZE +import io.kinference.ndarray.arrays.memory.MemoryManager +import io.kinference.ndarray.extensions.constants.PrimitiveConstants +import io.kinference.ndarray.extensions.utils.getOrPut +import io.kinference.primitives.annotations.GenerateNameFromPrimitives +import io.kinference.primitives.annotations.GeneratePrimitives +import io.kinference.primitives.types.DataType +import io.kinference.primitives.types.PrimitiveArray +import it.unimi.dsi.fastutil.ints.Int2ObjectOpenHashMap + +@GenerateNameFromPrimitives +internal class PrimitiveAutoHandlingArrayStorage : TypedAutoHandlingStorage { + private val used = Int2ObjectOpenHashMap>(INIT_STORAGE_SIZE) + private val unused = Int2ObjectOpenHashMap>(INIT_STORAGE_SIZE) + + companion object { + private val type = DataType.CurrentPrimitive + } + + internal fun getBlock(blocksNum: Int, blockSize: Int, limiter: MemoryManager): Array { + val unusedQueue = unused.getOrPut(blockSize) { ArrayDeque(blocksNum) } + val usedQueue = used.getOrPut(blockSize) { ArrayDeque(blocksNum) } + + val blocks = if (limiter.checkMemoryLimitAndAdd(getPrimitiveArraySizeInBytes(arraySize = blockSize * blocksNum))) { + Array(blocksNum) { + unusedQueue.removeFirstOrNull()?.apply { + fill(PrimitiveConstants.ZERO) + } ?: PrimitiveArray(blockSize) + } + } else { + Array(blocksNum) { PrimitiveArray(blockSize) } + } + + usedQueue.addAll(blocks) + + return blocks + } + + override fun moveBlocksIntoUnused() { + used.forEach { (blockSize, queue) -> + unused[blockSize]!!.addAll(queue) + queue.clear() + } + } +} diff --git a/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/storage/PrimitiveGetBlockFunctionsExtension.kt b/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/storage/PrimitiveGetBlockFunctionsExtension.kt new file mode 100644 index 000000000..9280823d8 --- /dev/null +++ b/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/storage/PrimitiveGetBlockFunctionsExtension.kt @@ -0,0 +1,24 @@ +@file:GeneratePrimitives(DataType.ALL) +@file:Suppress("DuplicatedCode") +package io.kinference.ndarray.arrays.memory.storage + +import io.kinference.ndarray.arrays.memory.contexts.AutoAllocatorContext +import io.kinference.ndarray.extensions.constants.PrimitiveConstants +import io.kinference.primitives.annotations.GenerateNameFromPrimitives +import io.kinference.primitives.annotations.GeneratePrimitives +import io.kinference.primitives.types.* + +@GenerateNameFromPrimitives +internal fun AutoArrayHandlingStorage.getPrimitiveBlock(blocksNum: Int, blockSize: Int): Array { + return (storage[DataType.CurrentPrimitive.ordinal] as PrimitiveAutoHandlingArrayStorage).getBlock(blocksNum = blocksNum, blockSize = blockSize, limiter = limiter) +} + +@GenerateNameFromPrimitives +internal fun AutoAllocatorContext.getPrimitiveBlock(blocksNum: Int, blockSize: Int): Array { + return storage.getPrimitiveBlock(blocksNum = blocksNum, blockSize = blockSize) +} + +@GenerateNameFromPrimitives +internal fun getPrimitiveArraySizeInBytes(arraySize: Int): Long { + return PrimitiveConstants.SIZE_BYTES * arraySize +} diff --git a/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/storage/PrimitiveManualHandlingArrayStorage.kt b/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/storage/PrimitiveManualHandlingArrayStorage.kt new file mode 100644 index 000000000..1c264be01 --- /dev/null +++ b/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/storage/PrimitiveManualHandlingArrayStorage.kt @@ -0,0 +1,57 @@ +@file:GeneratePrimitives(DataType.ALL) +package io.kinference.ndarray.arrays.memory.storage + +import io.kinference.ndarray.INIT_STORAGE_SIZE +import io.kinference.ndarray.arrays.* +import io.kinference.ndarray.arrays.PrimitiveNDArray +import io.kinference.ndarray.arrays.memory.MemoryManager +import io.kinference.ndarray.arrays.tiled.PrimitiveTiledArray +import io.kinference.ndarray.blockSizeByStrides +import io.kinference.ndarray.extensions.constants.PrimitiveConstants +import io.kinference.ndarray.extensions.utils.getOrPut +import io.kinference.primitives.annotations.* +import io.kinference.primitives.types.DataType +import io.kinference.primitives.types.PrimitiveArray +import it.unimi.dsi.fastutil.ints.Int2ObjectOpenHashMap + +@GenerateNameFromPrimitives +internal class PrimitiveManualHandlingArrayStorage : TypedManualHandlingStorage { + private val storage = Int2ObjectOpenHashMap>(INIT_STORAGE_SIZE) + + companion object { + private val type = DataType.CurrentPrimitive + } + + override fun getNDArray(strides: Strides, fillZeros: Boolean, limiter: MemoryManager): MutableNDArrayCore { + val blockSize = blockSizeByStrides(strides) + val blocksNum = strides.linearSize / blockSize + val blocks = if (limiter.checkMemoryLimitAndAdd(getPrimitiveArraySizeInBytes(arraySize = blockSize * blocksNum))) { + val queue = storage.getOrPut(blockSize) { ArrayDeque(blocksNum) } + Array(blocksNum) { + queue.removeFirstOrNull()?.apply { + fill(PrimitiveConstants.ZERO) + } ?: PrimitiveArray(blockSize) + } + } else { + Array(blocksNum) { PrimitiveArray(blockSize) } + } + + val tiled = PrimitiveTiledArray(blocks) + + return MutablePrimitiveNDArray(tiled, strides) + } + + override fun returnNDArray(ndarray: NDArrayCore) { + require(ndarray is PrimitiveNDArray) + val blockSize = ndarray.array.blockSize + val blocksNum = ndarray.array.blocksNum + + val queue = storage.getOrPut(blockSize) { ArrayDeque(blocksNum) } + + queue.addAll(ndarray.array.blocks) + } + + override fun clear() { + storage.forEach { (_, queue) -> queue.clear() } + } +} diff --git a/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/tiled/PrimitiveTiledArray.kt b/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/tiled/PrimitiveTiledArray.kt index eda58c092..eee93692c 100644 --- a/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/tiled/PrimitiveTiledArray.kt +++ b/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/tiled/PrimitiveTiledArray.kt @@ -4,8 +4,8 @@ package io.kinference.ndarray.arrays.tiled import io.kinference.ndarray.arrays.* -import io.kinference.ndarray.arrays.memory.* -import io.kinference.ndarray.arrays.memory.PrimitiveArrayContainer +import io.kinference.ndarray.arrays.memory.contexts.AutoAllocatorContext +import io.kinference.ndarray.arrays.memory.storage.* import io.kinference.ndarray.arrays.pointers.PrimitivePointer import io.kinference.ndarray.arrays.pointers.accept import io.kinference.ndarray.blockSizeByStrides @@ -17,7 +17,7 @@ import kotlin.math.min @GenerateNameFromPrimitives @MakePublic -internal class PrimitiveTiledArray(val blocks: Array, val marker: Array = emptyMarker) { +internal class PrimitiveTiledArray(val blocks: Array) { val size: Int val blockSize: Int = if (blocks.isEmpty()) 0 else blocks.first().size val blocksNum: Int = blocks.size @@ -27,8 +27,7 @@ internal class PrimitiveTiledArray(val blocks: Array, val marker } companion object { - val type: ArrayTypes = ArrayTypes.valueOf(PrimitiveArray::class.simpleName!!) - private val emptyMarker: Array = arrayOf() + val type: DataType = DataType.CurrentPrimitive suspend operator fun invoke(strides: Strides): PrimitiveTiledArray { val blockSize = blockSizeByStrides(strides) @@ -60,15 +59,10 @@ internal class PrimitiveTiledArray(val blocks: Array, val marker require(size % blockSize == 0) { "Size must divide blockSize" } val blocksNum = if (blockSize == 0) 0 else size / blockSize + val blocks = coroutineContext[AutoAllocatorContext]?.getPrimitiveBlock(blocksNum, blockSize) + ?: Array(blocksNum) { PrimitiveArray(blockSize) } - val coroutineContext = coroutineContext[AllocatorContext.Key] - - // With array dispatcher - val containerArray = coroutineContext?.getArrayContainers(type, blockSize, blocksNum) ?: Array(blocksNum) { ArrayContainer(type, blockSize) } - val blocks = Array(containerArray.size) { i -> (containerArray[i] as PrimitiveArrayContainer).array } - val marker = Array(containerArray.size) { i -> containerArray[i].markAsOutput } - - return PrimitiveTiledArray(blocks, marker) + return PrimitiveTiledArray(blocks) } suspend operator fun invoke(size: Int, blockSize: Int, init: (InlineInt) -> PrimitiveType) : PrimitiveTiledArray { @@ -133,17 +127,17 @@ internal class PrimitiveTiledArray(val blocks: Array, val marker blocks[blockIdx][blockOff] = value } - suspend fun copyOf(): PrimitiveTiledArray { - val copyArray = PrimitiveTiledArray(size, blockSize) + fun copyOf(): PrimitiveTiledArray { + val copyBlocks = Array(blocksNum) { PrimitiveArray(blockSize) } for (blockNum in 0 until blocksNum) { val thisBlock = this.blocks[blockNum] - val destBlock = copyArray.blocks[blockNum] + val destBlock = copyBlocks[blockNum] thisBlock.copyInto(destBlock) } - return copyArray + return PrimitiveTiledArray(copyBlocks) } fun copyInto(dest: PrimitiveTiledArray, destOffset: Int = 0, srcStart: Int = 0, srcEnd: Int = size) { diff --git a/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/broadcasting/Broadcasting.kt b/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/broadcasting/Broadcasting.kt index bde7580d4..7e2e18551 100644 --- a/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/broadcasting/Broadcasting.kt +++ b/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/broadcasting/Broadcasting.kt @@ -17,7 +17,7 @@ fun unsqueezeFirst(shape: IntArray, newShapeSize: Int): IntArray { object Broadcasting { fun broadcastShapeForMatmul(leftShape: IntArray, rightShape: IntArray): IntArray { val actualLeftShape = if (leftShape.size == 1) intArrayOf(1, leftShape[0]) else leftShape - val actualRightShape = if (rightShape.size == 1) intArrayOf(1, rightShape[1]) else rightShape + val actualRightShape = if (rightShape.size == 1) intArrayOf(rightShape[0], 1) else rightShape val outputMatrixShape = intArrayOf(actualLeftShape[actualLeftShape.lastIndex - 1], actualRightShape.last()) val broadcastShape = broadcastShape(listOf(actualLeftShape.copyOfRange(0, actualLeftShape.size - 2), diff --git a/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/extensions/PrimitiveExtensions.kt b/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/extensions/PrimitiveExtensions.kt index 21442ecda..cbf651bc0 100644 --- a/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/extensions/PrimitiveExtensions.kt +++ b/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/extensions/PrimitiveExtensions.kt @@ -8,6 +8,7 @@ import io.kinference.ndarray.arrays.pointers.accept import io.kinference.ndarray.arrays.pointers.acceptWithRecursive import io.kinference.ndarray.stubs.* import io.kinference.ndarray.arrays.tiled.* +import io.kinference.ndarray.extensions.constants.PrimitiveConstants import io.kinference.primitives.annotations.* import io.kinference.primitives.types.* import io.kinference.utils.launchWithLimitOrDefault @@ -127,21 +128,17 @@ internal suspend fun PrimitiveNDArray.dotTransposedWithAlpha(alpha: Double, othe other as PrimitiveNDArray; destination as MutablePrimitiveNDArray val alpha = alpha.toPrimitive() - val dBlocksInRow = destination.blocksInRow val lrBlocksInRow = this.blocksInRow val n = this.shape[0] val t = this.shape[1] val m = other.shape[0] - val dBlockSize = destination.array.blockSize val lrBlockSize = this.array.blockSize - val destBlocks = destination.array.blocks val leftBlocks = this.array.blocks val rightBlocks = other.array.blocks val rowFlop = t * m - val zero = (0).toPrimitive() /* TODO: (dmitriyb) this is temporary commented. On GEC performance test we have large inputs that cause out of memory exceptions @@ -161,33 +158,26 @@ internal suspend fun PrimitiveNDArray.dotTransposedWithAlpha(alpha: Double, othe // TODO: (cupertank) Remove constants // TODO: (dmitriyb) Implement concurrent array retrieve with a separate structure from ArraysDispatcher parallelizeByRows(rowFlop, n, 262144) { nStart: Int, nEnd: Int, _ -> - val mSums = Array(m) { PrimitiveArray(lrBlockSize) } + val tempSum = PrimitiveArray(lrBlockSize) + val destPointer = destination.array.pointer() for (i in nStart until nEnd) { val leftBlockOffset = i * lrBlocksInRow val rightBlockIter = rightBlocks.iterator() - val destBlockOffset = i * dBlocksInRow + destPointer.linearIndex = i * m for (k in 0 until m) { - val tempArray = mSums[k] for (lrBlock in 0 until lrBlocksInRow) { val leftBlock = leftBlocks[leftBlockOffset + lrBlock] val rightBlock = rightBlockIter.next() - for (j in tempArray.indices) { - tempArray[j] += leftBlock[j] * rightBlock[j] + for (j in tempSum.indices) { + tempSum[j] += leftBlock[j] * rightBlock[j] } } - } - val mSumsIter = mSums.iterator() - for (destBlockNum in 0 until dBlocksInRow) { - val destBlock = destBlocks[destBlockOffset + destBlockNum] - for (j in destBlock.indices) { - val sumBlock = mSumsIter.next() - destBlock[j] = sumBlock.sum() * alpha - sumBlock.fill(zero) - } + destPointer.setAndIncrement(tempSum.sum() * alpha) + tempSum.fill(PrimitiveConstants.ZERO) } } } diff --git a/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/extensions/broadcasting/BroadcastTwoArgumentsPrimitive.kt b/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/extensions/broadcasting/BroadcastTwoArgumentsPrimitive.kt index 90056a8bf..8fd770dd1 100644 --- a/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/extensions/broadcasting/BroadcastTwoArgumentsPrimitive.kt +++ b/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/extensions/broadcasting/BroadcastTwoArgumentsPrimitive.kt @@ -8,7 +8,6 @@ import io.kinference.primitives.annotations.GenerateNameFromPrimitives import io.kinference.primitives.annotations.GeneratePrimitives import io.kinference.primitives.types.DataType import io.kinference.primitives.types.PrimitiveType -import io.kinference.utils.inlines.InlineInt @GenerateNameFromPrimitives internal fun broadcastTwoTensorsPrimitive( @@ -45,16 +44,20 @@ internal fun broadcastTwoTensorsPrimitive( val rightBlocks = right.array.blocks val destBlocks = dest.array.blocks - val leftIsScalarFun = { leftOffset: InlineInt, rightOffset: InlineInt, destOffset: InlineInt, axisToBroadcastIdx: InlineInt -> - val shapeIdx = axisToBroadcastIdx.value * 2 + val leftIsScalarFun = ScalarBroadcastFun { leftOffset, rightOffset, destOffset, axisToBroadcastIdx -> + val shapeIdx = axisToBroadcastIdx * 2 val batchSize = destBroadcastingShape[shapeIdx] for (batchIdx in 0 until batchSize) { - val leftScalar = leftBlocks[leftOffset.value][0] + val leftBatchOffset = leftOffset + leftOffsets[shapeIdx] * batchIdx + val rightBatchOffset = rightOffset + rightOffsets[shapeIdx] * batchIdx + val destBatchOffset = destOffset + destOffsets[shapeIdx] * batchIdx + + val leftScalar = leftBlocks[leftBatchOffset][0] for (blockIdx in 0 until destBlocksInRow) { - val destBlock = destBlocks[destOffset.value + blockIdx] - val rightBlock = rightBlocks[rightOffset.value + blockIdx] + val destBlock = destBlocks[destBatchOffset + blockIdx] + val rightBlock = rightBlocks[rightBatchOffset + blockIdx] for (idx in destBlock.indices) { destBlock[idx] = op(leftScalar, rightBlock[idx]) @@ -63,16 +66,20 @@ internal fun broadcastTwoTensorsPrimitive( } } - val rightIsScalarFun = { leftOffset: InlineInt, rightOffset: InlineInt, destOffset: InlineInt, axisToBroadcastIdx: InlineInt -> - val shapeIdx = axisToBroadcastIdx.value * 2 + val rightIsScalarFun = ScalarBroadcastFun { leftOffset, rightOffset, destOffset, axisToBroadcastIdx -> + val shapeIdx = axisToBroadcastIdx * 2 val batchSize = destBroadcastingShape[shapeIdx] for (batchIdx in 0 until batchSize) { - val rightScalar = rightBlocks[rightOffset.value][0] + val leftBatchOffset = leftOffset + leftOffsets[shapeIdx] * batchIdx + val rightBatchOffset = rightOffset + rightOffsets[shapeIdx] * batchIdx + val destBatchOffset = destOffset + destOffsets[shapeIdx] * batchIdx + + val rightScalar = rightBlocks[rightBatchOffset][0] for (blockIdx in 0 until destBlocksInRow) { - val destBlock = destBlocks[destOffset.value + blockIdx] - val leftBlock = leftBlocks[leftOffset.value + blockIdx] + val destBlock = destBlocks[destBatchOffset + blockIdx] + val leftBlock = leftBlocks[leftBatchOffset + blockIdx] for (idx in destBlock.indices) { destBlock[idx] = op(leftBlock[idx], rightScalar) @@ -81,11 +88,11 @@ internal fun broadcastTwoTensorsPrimitive( } } - val defaultFun = { leftOffset: InlineInt, rightOffset: InlineInt, destOffset: InlineInt, axisToBroadcastIdx: InlineInt -> + val defaultFun = ScalarBroadcastFun { leftOffset, rightOffset, destOffset, _ -> for (blockIdx in 0 until destBlocksInRow) { - val leftBlock = leftBlocks[leftOffset.value + blockIdx] - val rightBlock = rightBlocks[rightOffset.value + blockIdx] - val destBlock = destBlocks[destOffset.value + blockIdx] + val leftBlock = leftBlocks[leftOffset + blockIdx] + val rightBlock = rightBlocks[rightOffset + blockIdx] + val destBlock = destBlocks[destOffset + blockIdx] for (idx in destBlock.indices) { destBlock[idx] = op(leftBlock[idx], rightBlock[idx]) @@ -93,7 +100,7 @@ internal fun broadcastTwoTensorsPrimitive( } } - val broadcastingFun = when { + val broadcastingFun: ScalarBroadcastFun = when { leftIsScalar -> leftIsScalarFun rightIsScalar -> rightIsScalarFun else -> defaultFun @@ -101,7 +108,7 @@ internal fun broadcastTwoTensorsPrimitive( fun broadcast(leftOffset: Int, rightOffset: Int, destOffset: Int, axisToBroadcastIdx: Int) { if (axisToBroadcastIdx == totalAxesToBroadcast) { - broadcastingFun(InlineInt(leftOffset), InlineInt(rightOffset), InlineInt(destOffset), InlineInt(axisToBroadcastIdx)) + broadcastingFun(leftOffset, rightOffset, destOffset, axisToBroadcastIdx) } else { val shapeIdx = axisToBroadcastIdx * 2 diff --git a/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/extensions/broadcasting/ReshapeView.kt b/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/extensions/broadcasting/ReshapeView.kt index 43c6f672a..b980f530e 100644 --- a/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/extensions/broadcasting/ReshapeView.kt +++ b/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/extensions/broadcasting/ReshapeView.kt @@ -3,6 +3,10 @@ package io.kinference.ndarray.extensions.broadcasting import io.kinference.ndarray.arrays.NDArrayCore import io.kinference.ndarray.extensions.utils.calculateBlock +internal fun interface ScalarBroadcastFun { + operator fun invoke(leftOffset: Int, rightOffset: Int, destOffset: Int, axisToBroadcastIdx: Int) +} + internal data class BroadcastingInfo( val broadcastingShapes: Array, val broadcastingDestShape: IntArray, @@ -89,8 +93,6 @@ internal data class BroadcastingInfo( } } - - internal fun makeOffsets(shape: IntArray, blocksInRow: Int): IntArray { val offsets = IntArray(shape.size) offsets[offsets.lastIndex - 1] = blocksInRow diff --git a/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/extensions/constants/BooleanConstants.kt b/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/extensions/constants/BooleanConstants.kt new file mode 100644 index 000000000..0bac99911 --- /dev/null +++ b/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/extensions/constants/BooleanConstants.kt @@ -0,0 +1,7 @@ +package io.kinference.ndarray.extensions.constants + +object BooleanConstants { + const val ZERO = false + const val ONE = true + const val SIZE_BYTES = 1L +} diff --git a/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/extensions/constants/PrimitiveConstants.kt b/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/extensions/constants/PrimitiveConstants.kt index e1edbef10..09aec0c9e 100644 --- a/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/extensions/constants/PrimitiveConstants.kt +++ b/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/extensions/constants/PrimitiveConstants.kt @@ -2,10 +2,9 @@ package io.kinference.ndarray.extensions.constants import io.kinference.primitives.annotations.* -import io.kinference.primitives.types.DataType -import io.kinference.primitives.types.toPrimitive import io.kinference.ndarray.toUShort import io.kinference.ndarray.toUByte +import io.kinference.primitives.types.* @GenerateNameFromPrimitives @@ -29,5 +28,7 @@ internal object PrimitiveConstants { val INV_ERF_COEF_1 = (4.330746750799873).toPrimitive() val INV_ERF_COEF_2 = (6.802721088435375).toPrimitive() + + val SIZE_BYTES = PrimitiveType.SIZE_BYTES.toLong() } diff --git a/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/extensions/gather/PrimitiveGatherByBlocks.kt b/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/extensions/gather/PrimitiveGatherByBlocks.kt index b40873787..9adb86a46 100644 --- a/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/extensions/gather/PrimitiveGatherByBlocks.kt +++ b/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/extensions/gather/PrimitiveGatherByBlocks.kt @@ -26,14 +26,11 @@ internal suspend fun gatherByBlocksPrimitive(array: PrimitiveNDArray, indices: N val dataToCopyBlocks = dataToCopySize / array.array.blockSize val dataBlocks = array.array.blocks - val dataMarkers = array.array.marker val destBatchBlocksOffset = indicesSize * dataToCopyBlocks val inputBatchBlockOffset = array.shape[actualAxis] * dataToCopyBlocks val destArray = arrayOfNulls(destBatchBlocksOffset * dataBatchSize) - val destMarkersArray = arrayOfNulls(destBatchBlocksOffset * dataBatchSize) - for (dataBatchNum in 0 until dataBatchSize) { val dataBlocksOffset = inputBatchBlockOffset * dataBatchNum @@ -46,12 +43,11 @@ internal suspend fun gatherByBlocksPrimitive(array: PrimitiveNDArray, indices: N for (blockIdx in 0 until dataToCopyBlocks) { destArray[destBlocksOffset + blockIdx] = dataBlocks[dataOffset + blockIdx] - destMarkersArray[destBlocksOffset + blockIdx] = dataMarkers[dataOffset + blockIdx] } destBlocksOffset += dataToCopyBlocks } } - return PrimitiveNDArray(PrimitiveTiledArray(destArray as Array, destMarkersArray as Array), Strides(destShape)) + return PrimitiveNDArray(PrimitiveTiledArray(destArray as Array), Strides(destShape)) } diff --git a/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/extensions/gelu/BiasGelu.kt b/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/extensions/gelu/BiasGelu.kt index 0636cb824..8dc2a6705 100644 --- a/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/extensions/gelu/BiasGelu.kt +++ b/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/extensions/gelu/BiasGelu.kt @@ -16,3 +16,17 @@ suspend fun biasGelu(input: NumberNDArrayCore, bias: NumberNDArrayCore): Mutable else -> error("BiasGelu operation supported only for FLOAT and DOUBLE tensors, actual types is ${input.type}") } } + +suspend fun biasGelu(input: NumberNDArrayCore, bias: NumberNDArrayCore, dest: MutableNumberNDArrayCore): MutableNumberNDArrayCore { + require(input.type == bias.type) + { "Input and Bias types should be equal, actual input type is ${input.type}, actual bias type is ${bias.type}" } + + require(input.type == DataType.FLOAT || input.type == DataType.DOUBLE) + { "BiasGelu operation supported only for FLOAT and DOUBLE tensors, actual types is ${input.type}" } + + return when(input.type) { + DataType.FLOAT -> computeGeluFloat(input as FloatNDArray, bias as FloatNDArray, dest as MutableFloatNDArray) + DataType.DOUBLE -> computeGeluDouble(input as DoubleNDArray, bias as DoubleNDArray, dest as MutableDoubleNDArray) + else -> error("BiasGelu operation supported only for FLOAT and DOUBLE tensors, actual types is ${input.type}") + } +} diff --git a/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/extensions/gelu/BiasGeluPrimitive.kt b/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/extensions/gelu/BiasGeluPrimitive.kt index 577c3ea02..9ba08ddb0 100644 --- a/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/extensions/gelu/BiasGeluPrimitive.kt +++ b/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/extensions/gelu/BiasGeluPrimitive.kt @@ -3,8 +3,6 @@ package io.kinference.ndarray.extensions.gelu import io.kinference.ndarray.* import io.kinference.ndarray.arrays.* - -import io.kinference.ndarray.arrays.memory.PrimitiveArrayContainer import io.kinference.ndarray.arrays.tiled.PrimitiveTiledArray import io.kinference.ndarray.extensions.constants.PrimitiveConstants import io.kinference.ndarray.stubs.absoluteValue @@ -16,8 +14,7 @@ import io.kinference.primitives.types.* import kotlin.math.* @GenerateNameFromPrimitives -internal suspend fun computeGeluPrimitive(input: PrimitiveNDArray, bias: PrimitiveNDArray): MutablePrimitiveNDArray { - val output = MutablePrimitiveNDArray(input.strides) +internal suspend fun computeGeluPrimitive(input: PrimitiveNDArray, bias: PrimitiveNDArray, output: MutablePrimitiveNDArray): MutablePrimitiveNDArray { val inputBlocks = input.array.blocks val biasBlocks = bias.array.blocks @@ -81,3 +78,8 @@ internal suspend fun computeGeluPrimitive(input: PrimitiveNDArray, bias: Primiti return output } + +@GenerateNameFromPrimitives +internal suspend fun computeGeluPrimitive(input: PrimitiveNDArray, bias: PrimitiveNDArray): MutablePrimitiveNDArray { + return computeGeluPrimitive(input, bias, MutablePrimitiveNDArray(input.strides)) +} diff --git a/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/extensions/gelu/FastGeluPrimitive.kt b/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/extensions/gelu/FastGeluPrimitive.kt index ffb626267..32a1e6e02 100644 --- a/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/extensions/gelu/FastGeluPrimitive.kt +++ b/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/extensions/gelu/FastGeluPrimitive.kt @@ -6,7 +6,6 @@ package io.kinference.ndarray.extensions.gelu import io.kinference.ndarray.arrays.* import io.kinference.ndarray.arrays.MutablePrimitiveNDArray import io.kinference.ndarray.arrays.PrimitiveNDArray -import io.kinference.ndarray.arrays.memory.PrimitiveArrayContainer import io.kinference.ndarray.arrays.tiled.PrimitiveTiledArray import io.kinference.ndarray.countCoroutinesByData import io.kinference.ndarray.parallelizeByBlocks diff --git a/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/extensions/utils/Utils.kt b/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/extensions/utils/Utils.kt index 8c4e18063..fec73c0f9 100644 --- a/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/extensions/utils/Utils.kt +++ b/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/extensions/utils/Utils.kt @@ -1,5 +1,7 @@ package io.kinference.ndarray.extensions.utils +import it.unimi.dsi.fastutil.ints.Int2ObjectOpenHashMap + /*** * Calculates the total size of the tensor with such shape. */ @@ -50,3 +52,14 @@ internal fun computeColumnMajorIndex( internal fun isInPadding(actual: Int, bound: Int) : Boolean { return actual < 0 || actual >= bound } + +inline fun Int2ObjectOpenHashMap.getOrPut(key: Int, defaultValue: () -> V): V { + val existingValue = this[key] + return if (existingValue != null) { + existingValue + } else { + val value = defaultValue() + put(key, value) + value + } +} diff --git a/utils/utils-common/src/commonMain/kotlin/io/kinference/utils/PredictionConfig.kt b/utils/utils-common/src/commonMain/kotlin/io/kinference/utils/PredictionConfig.kt new file mode 100644 index 000000000..1828b36bd --- /dev/null +++ b/utils/utils-common/src/commonMain/kotlin/io/kinference/utils/PredictionConfig.kt @@ -0,0 +1,67 @@ +package io.kinference.utils + +enum class AllocationMode { + NoAllocation, + Manual, + Auto; +} + +class PredictionConfig private constructor( + val parallelismLimit: Int, + val allocationMode: AllocationMode, + val memoryThreshold: Long, + val memoryClearingInterval: Long +) { + companion object { + operator fun invoke( + parallelismLimit: Int, + allocationMode: AllocationMode, + memoryThreshold: Long, + memoryClearingInterval: Long + ): PredictionConfig { + require(parallelismLimit in 1..PlatformUtils.cores) { + "Parallelism limit must be within the range of 1 to available CPU cores: ${PlatformUtils.cores}." + } + return if (allocationMode == AllocationMode.NoAllocation) { + PredictionConfig(parallelismLimit, allocationMode, 0L, Long.MAX_VALUE) + } else { + require(memoryThreshold > 0) { + "Memory threshold must be positive." + } + require(memoryClearingInterval > 0) { + "Memory clearing interval must be positive." + } + + PredictionConfig(parallelismLimit, allocationMode, memoryThreshold, memoryClearingInterval) + } + } + } +} + +object PredictionConfigs { + val DefaultAutoAllocator: PredictionConfig = PredictionConfig( + parallelismLimit = PlatformUtils.cores, + allocationMode = AllocationMode.Auto, + memoryThreshold = (PlatformUtils.maxHeap * 0.3).toLong(), + memoryClearingInterval = 500 + ) + val DefaultManualAllocator: PredictionConfig = PredictionConfig( + parallelismLimit = PlatformUtils.cores, + allocationMode = AllocationMode.Manual, + memoryThreshold = 50 * 1024 * 1024, + memoryClearingInterval = 500 + ) + val NoAllocator: PredictionConfig = PredictionConfig( + parallelismLimit = PlatformUtils.cores, + allocationMode = AllocationMode.NoAllocation, + memoryThreshold = 0L, + memoryClearingInterval = Long.MAX_VALUE + ) + + fun customPredictionConfig(parallelismLimit: Int, + allocationMode: AllocationMode, + memoryThreshold: Long, + memoryClearingInterval: Long): PredictionConfig { + return PredictionConfig(parallelismLimit, allocationMode, memoryThreshold, memoryClearingInterval) + } +} diff --git a/utils/utils-common/src/commonMain/kotlin/io/kinference/utils/ResourcesDispatcher.kt b/utils/utils-common/src/commonMain/kotlin/io/kinference/utils/ResourcesDispatcher.kt index 66b5cea95..b2d5b40a9 100644 --- a/utils/utils-common/src/commonMain/kotlin/io/kinference/utils/ResourcesDispatcher.kt +++ b/utils/utils-common/src/commonMain/kotlin/io/kinference/utils/ResourcesDispatcher.kt @@ -1,8 +1,9 @@ +@file:OptIn(ExperimentalStdlibApi::class) package io.kinference.utils import kotlinx.coroutines.* import kotlinx.coroutines.channels.Channel -import kotlin.coroutines.CoroutineContext +import kotlin.coroutines.* object ResourcesDispatcher { private val tokenChannel = Channel(capacity = PlatformUtils.cores) @@ -16,11 +17,46 @@ object ResourcesDispatcher { } } -class ParallelismLimiterContext(val dispatcher: CoroutineDispatcher) : CoroutineContext.Element { - companion object Key : CoroutineContext.Key - override val key: CoroutineContext.Key<*> get() = Key +sealed class PredictionContext( + val dispatcher: CoroutineDispatcher +) : AbstractCoroutineContextElement(PredictionContext) { + companion object Key : CoroutineContext.Key + + override val key + get() = Key + + @OptIn(ExperimentalStdlibApi::class) + override fun get(key: CoroutineContext.Key): E? = getPolymorphicElement(key) + + @OptIn(ExperimentalStdlibApi::class) + override fun minusKey(key: CoroutineContext.Key<*>): CoroutineContext = minusPolymorphicKey(key) +} + +interface ArrayStorage { + fun resetState() +} + +abstract class AllocatorContext( + dispatcher: CoroutineDispatcher, + val storage: T +) : PredictionContext(dispatcher) { + companion object Key : AbstractCoroutineContextKey>( + PredictionContext.Key, + { it as? AllocatorContext<*> } + ) + + fun finalizeContext() { + storage.resetState() + } +} + +class NoAllocatorContext(dispatcher: CoroutineDispatcher) : PredictionContext(dispatcher) { + companion object Key : AbstractCoroutineContextKey( + PredictionContext.Key, + { it as? NoAllocatorContext } + ) } fun CoroutineScope.launchWithLimitOrDefault(block: suspend CoroutineScope.() -> Unit) { - this.launch(coroutineContext[ParallelismLimiterContext.Key]?.dispatcher ?: Dispatchers.Default, block = block) + this.launch(coroutineContext[PredictionContext]?.dispatcher ?: Dispatchers.Default, block = block) } diff --git a/utils/utils-testing/src/commonMain/kotlin/io.kinference/runners/PerformanceRunner.kt b/utils/utils-testing/src/commonMain/kotlin/io.kinference/runners/PerformanceRunner.kt index 6d8054dc2..5b288eac5 100644 --- a/utils/utils-testing/src/commonMain/kotlin/io.kinference/runners/PerformanceRunner.kt +++ b/utils/utils-testing/src/commonMain/kotlin/io.kinference/runners/PerformanceRunner.kt @@ -120,6 +120,7 @@ class PerformanceRunner>(private val engine: TestEngine) { for (result in results.sortedBy { it.name }) { logger.info { "Test ${result.name}: avg ${result.avg}, min ${result.min}, max ${result.max}" } } + logger.info { "Average between inputs: avg ${results.map { it.avg }.average()}, min ${results.minOfOrNull { it.min }}, max ${results.maxOfOrNull { it.max }}" } } companion object {