diff --git a/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/KIEngine.kt b/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/KIEngine.kt index 287094629..dba47b43d 100644 --- a/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/KIEngine.kt +++ b/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/KIEngine.kt @@ -24,16 +24,6 @@ import okio.Path.Companion.toPath typealias KIONNXData = ONNXData -// Define an interface for allocation control marking output -internal interface KIONNXDataArraysReleaser { - fun markOutput() -} - -internal fun KIONNXData.markOutput() { - if (this is KIONNXDataArraysReleaser) - this.markOutput() -} - object CoreBackend : BackendInfo(name = "KInference Core CPU Backend") /** diff --git a/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/data/map/KIONNXMap.kt b/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/data/map/KIONNXMap.kt index f541c4c23..a1bbcf7eb 100644 --- a/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/data/map/KIONNXMap.kt +++ b/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/data/map/KIONNXMap.kt @@ -9,7 +9,7 @@ import io.kinference.protobuf.message.TensorProto import io.kinference.types.ValueInfo import io.kinference.types.ValueTypeInfo -class KIONNXMap(name: String?, data: Map>, val info: ValueTypeInfo.MapTypeInfo) : ONNXMap>, CoreBackend>(name, data), KIONNXDataArraysReleaser { +class KIONNXMap(name: String?, data: Map>, val info: ValueTypeInfo.MapTypeInfo) : ONNXMap>, CoreBackend>(name, data) { constructor(data: Map>, info: ValueInfo) : this(info.name, data, info.typeInfo as ValueTypeInfo.MapTypeInfo) override val backend = CoreBackend @@ -26,10 +26,6 @@ class KIONNXMap(name: String?, data: Map>, val info: ValueTyp override fun rename(name: String): KIONNXMap = KIONNXMap(name, data, info) - override fun markOutput() { - data.values.forEach { it.markOutput() } - } - override suspend fun clone(newName: String?): KIONNXMap { val newMap = HashMap>(data.size) for ((key, value) in data.entries) { diff --git a/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/data/seq/KIONNXSequence.kt b/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/data/seq/KIONNXSequence.kt index 24b52085c..49383fca0 100644 --- a/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/data/seq/KIONNXSequence.kt +++ b/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/data/seq/KIONNXSequence.kt @@ -7,7 +7,7 @@ import io.kinference.data.ONNXSequence import io.kinference.protobuf.message.SequenceProto import io.kinference.types.* -class KIONNXSequence(name: String?, data: List>, val info: ValueTypeInfo.SequenceTypeInfo) : ONNXSequence>, CoreBackend>(name, data), KIONNXDataArraysReleaser { +class KIONNXSequence(name: String?, data: List>, val info: ValueTypeInfo.SequenceTypeInfo) : ONNXSequence>, CoreBackend>(name, data) { constructor(name: String?, info: ValueTypeInfo.SequenceTypeInfo, size: Int, init: (Int) -> KIONNXData<*>) : this(name, List(size, init), info) constructor(data: List>, info: ValueInfo) : this(info.name, data, info.typeInfo as ValueTypeInfo.SequenceTypeInfo) @@ -23,10 +23,6 @@ class KIONNXSequence(name: String?, data: List>, val info: ValueTy override fun rename(name: String): KIONNXSequence = KIONNXSequence(name, data, info) - override fun markOutput() { - data.forEach { it.markOutput() } - } - val length: Int = data.size companion object { diff --git a/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/data/tensor/KITensor.kt b/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/data/tensor/KITensor.kt index 2c6de1a69..d1ca7c5f6 100644 --- a/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/data/tensor/KITensor.kt +++ b/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/data/tensor/KITensor.kt @@ -1,7 +1,6 @@ package io.kinference.core.data.tensor -import io.kinference.core.CoreBackend -import io.kinference.core.KIONNXDataArraysReleaser +import io.kinference.core.* import io.kinference.data.ONNXTensor import io.kinference.ndarray.arrays.* import io.kinference.ndarray.arrays.tiled.* @@ -13,7 +12,7 @@ import io.kinference.types.ValueTypeInfo //TODO: support segments //TODO: support external data -class KITensor(name: String?, override val data: NDArrayCore, val info: ValueTypeInfo.TensorTypeInfo) : ONNXTensor(name, data), KIONNXDataArraysReleaser { +class KITensor(name: String?, override val data: NDArrayCore, val info: ValueTypeInfo.TensorTypeInfo) : ONNXTensor(name, data) { constructor(data: NDArrayCore, info: ValueInfo) : this(info.name, data, info.typeInfo as ValueTypeInfo.TensorTypeInfo) override suspend fun close() { @@ -24,11 +23,6 @@ class KITensor(name: String?, override val data: NDArrayCore, val info: ValueTyp return KITensor(newName, data.clone(), info) } - override fun markOutput() { - if (this.data is MemoryControlledArray) - data.markOutput() - } - suspend operator fun minus(other: KITensor): KITensor { require(this.data is NumberNDArrayCore && other.data is NumberNDArrayCore) return (this.data - other.data).asTensor() diff --git a/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/model/KIModel.kt b/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/model/KIModel.kt index 1e25b4ae1..2a1e50db7 100644 --- a/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/model/KIModel.kt +++ b/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/model/KIModel.kt @@ -1,8 +1,7 @@ package io.kinference.core.model -import io.kinference.core.KIONNXData +import io.kinference.core.* import io.kinference.core.graph.KIGraph -import io.kinference.core.markOutput import io.kinference.graph.Contexts import io.kinference.model.Model import io.kinference.ndarray.arrays.memory.* @@ -51,9 +50,9 @@ class KIModel( withContext(mixedContext) { val coroutineContext = coroutineContext[AllocatorContext.Key]!! val execResult = graph.execute(input, contexts) - execResult.forEach { it.markOutput() } + val copies = execResult.map { it.clone(it.name) }.toList() coroutineContext.closeAllocated() - execResult + copies } } finally { if (coreReserved) { diff --git a/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/operators/layer/attention/Attention.kt b/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/operators/layer/attention/Attention.kt index 6487c46c1..05b76119b 100644 --- a/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/operators/layer/attention/Attention.kt +++ b/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/operators/layer/attention/Attention.kt @@ -61,21 +61,16 @@ sealed class Attention(name: String, info: OperatorInfo, attributes: Map - val resultMarker: Array if (past == null || past.linearSize == 0) { resultBlocks = kBlocks.plus(vBlocks) - resultMarker = kMarker.plus(vMarker) } else { val pastSeqLen = past.shape[3] presentDims[3] += pastSeqLen val pastBlocks = past.array.blocks - val pastMarker = past.array.marker val blocksInRow = headSize / past.array.blockSize @@ -84,35 +79,30 @@ sealed class Attention(name: String, info: OperatorInfo, attributes: Map(2 * batchSize * numHeads * presentDims[3] * blocksInRow) - val futureResMarker = arrayOfNulls(2 * batchSize * numHeads * presentDims[3] * blocksInRow) var resBlockIdx = 0 var pastBlocIdx = 0 repeat(2) { presentKeyValueIdx -> val kvBlocks = if (presentKeyValueIdx == 0) kBlocks else vBlocks - val kvMarker = if (presentKeyValueIdx == 0) kMarker else vMarker var kvBlockIdx = 0 repeat(rowsSize) { pastBlocks.copyInto(futureRes, resBlockIdx, pastBlocIdx, pastBlocIdx + pastRowBlocksCount) - pastMarker.copyInto(futureResMarker, resBlockIdx, pastBlocIdx, pastBlocIdx + pastRowBlocksCount) resBlockIdx += pastRowBlocksCount pastBlocIdx += pastRowBlocksCount kvBlocks.copyInto(futureRes, resBlockIdx, kvBlockIdx, kvBlockIdx + kvRowBlocksCount) - kvMarker.copyInto(futureResMarker, resBlockIdx, kvBlockIdx, kvBlockIdx + kvRowBlocksCount) resBlockIdx += kvRowBlocksCount kvBlockIdx += kvRowBlocksCount } } resultBlocks = futureRes as Array - resultMarker = futureResMarker as Array } - return FloatNDArray(FloatTiledArray(resultBlocks, resultMarker), Strides(presentDims)) + return FloatNDArray(FloatTiledArray(resultBlocks), Strides(presentDims)) } diff --git a/ndarray/ndarray-api/src/commonMain/kotlin/io/kinference/ndarray/arrays/ArrayDispatcherUtils.kt b/ndarray/ndarray-api/src/commonMain/kotlin/io/kinference/ndarray/arrays/ArrayDispatcherUtils.kt index bea90d149..2da712ca3 100644 --- a/ndarray/ndarray-api/src/commonMain/kotlin/io/kinference/ndarray/arrays/ArrayDispatcherUtils.kt +++ b/ndarray/ndarray-api/src/commonMain/kotlin/io/kinference/ndarray/arrays/ArrayDispatcherUtils.kt @@ -15,7 +15,3 @@ enum class ArrayTypes(val index: Int, val size: Int) { DoubleArray(9, Double.SIZE_BYTES), BooleanArray(10, 1); } - -interface MemoryControlledArray { - fun markOutput() -} diff --git a/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/BooleanNDArray.kt b/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/BooleanNDArray.kt index 5a4e758dc..3037028d3 100644 --- a/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/BooleanNDArray.kt +++ b/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/BooleanNDArray.kt @@ -21,7 +21,7 @@ fun interface BooleanBinaryOperation { operator fun invoke(first: Boolean, second: Boolean): Boolean } -open class BooleanNDArray(var array: BooleanTiledArray, strides: Strides) : NDArrayCore, MemoryControlledArray { +open class BooleanNDArray(var array: BooleanTiledArray, strides: Strides) : NDArrayCore { override val type: DataType = DataType.BOOLEAN final override var strides: Strides = strides @@ -79,10 +79,6 @@ open class BooleanNDArray(var array: BooleanTiledArray, strides: Strides) : NDAr return array.blocks[0][0] } - override fun markOutput() { - array.marker.forEach { it.invoke() } - } - override suspend fun toMutable(): MutableBooleanNDArray { return MutableBooleanNDArray(array.copyOf(), strides) } diff --git a/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/PrimitiveNDArray.kt b/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/PrimitiveNDArray.kt index 0b391f275..f1bd91b44 100644 --- a/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/PrimitiveNDArray.kt +++ b/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/PrimitiveNDArray.kt @@ -30,7 +30,7 @@ import kotlin.math.* @GenerateNameFromPrimitives @MakePublic -internal open class PrimitiveNDArray(array: PrimitiveTiledArray, strides: Strides) : NumberNDArrayCore, MemoryControlledArray { +internal open class PrimitiveNDArray(array: PrimitiveTiledArray, strides: Strides) : NumberNDArrayCore { var array: PrimitiveTiledArray = array protected set @@ -85,10 +85,6 @@ internal open class PrimitiveNDArray(array: PrimitiveTiledArray, strides: Stride return array.blocks[0][0] } - override fun markOutput() { - array.marker.forEach { it.invoke() } - } - override suspend fun clone(): PrimitiveNDArray { return PrimitiveNDArray(array.copyOf(), Strides(shape)) } diff --git a/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/AllocatorContext.kt b/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/AllocatorContext.kt index 2ed73f878..f6fd4f008 100644 --- a/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/AllocatorContext.kt +++ b/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/AllocatorContext.kt @@ -25,7 +25,7 @@ data class AllocatorContext internal constructor( fun closeAllocated() { usedContainers.forEach { - if (!it.isOutput && limiter.checkMemoryLimitAndAdd(it.sizeBytes.toLong())) { + if (limiter.checkMemoryLimitAndAdd(it.sizeBytes.toLong())) { unusedContainers[it.arrayTypeIndex, it.arraySizeIndex].addLast(it) } } diff --git a/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/ArrayContainer.kt b/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/ArrayContainer.kt index d39ba62ba..8884fcfa1 100644 --- a/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/ArrayContainer.kt +++ b/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/memory/ArrayContainer.kt @@ -7,13 +7,6 @@ sealed class ArrayContainer( val arraySizeIndex: Int, val sizeBytes: Int ) { - var isOutput: Boolean = false - private set - - val markAsOutput = { - isOutput = true - } - companion object { private const val EMPTY_INDEX = -1 diff --git a/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/tiled/PrimitiveTiledArray.kt b/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/tiled/PrimitiveTiledArray.kt index eda58c092..a9863aadb 100644 --- a/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/tiled/PrimitiveTiledArray.kt +++ b/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/arrays/tiled/PrimitiveTiledArray.kt @@ -17,7 +17,7 @@ import kotlin.math.min @GenerateNameFromPrimitives @MakePublic -internal class PrimitiveTiledArray(val blocks: Array, val marker: Array = emptyMarker) { +internal class PrimitiveTiledArray(val blocks: Array) { val size: Int val blockSize: Int = if (blocks.isEmpty()) 0 else blocks.first().size val blocksNum: Int = blocks.size @@ -28,7 +28,6 @@ internal class PrimitiveTiledArray(val blocks: Array, val marker companion object { val type: ArrayTypes = ArrayTypes.valueOf(PrimitiveArray::class.simpleName!!) - private val emptyMarker: Array = arrayOf() suspend operator fun invoke(strides: Strides): PrimitiveTiledArray { val blockSize = blockSizeByStrides(strides) @@ -66,9 +65,8 @@ internal class PrimitiveTiledArray(val blocks: Array, val marker // With array dispatcher val containerArray = coroutineContext?.getArrayContainers(type, blockSize, blocksNum) ?: Array(blocksNum) { ArrayContainer(type, blockSize) } val blocks = Array(containerArray.size) { i -> (containerArray[i] as PrimitiveArrayContainer).array } - val marker = Array(containerArray.size) { i -> containerArray[i].markAsOutput } - return PrimitiveTiledArray(blocks, marker) + return PrimitiveTiledArray(blocks) } suspend operator fun invoke(size: Int, blockSize: Int, init: (InlineInt) -> PrimitiveType) : PrimitiveTiledArray { diff --git a/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/extensions/gather/PrimitiveGatherByBlocks.kt b/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/extensions/gather/PrimitiveGatherByBlocks.kt index b40873787..9adb86a46 100644 --- a/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/extensions/gather/PrimitiveGatherByBlocks.kt +++ b/ndarray/ndarray-core/src/jvmMain/kotlin/io/kinference/ndarray/extensions/gather/PrimitiveGatherByBlocks.kt @@ -26,14 +26,11 @@ internal suspend fun gatherByBlocksPrimitive(array: PrimitiveNDArray, indices: N val dataToCopyBlocks = dataToCopySize / array.array.blockSize val dataBlocks = array.array.blocks - val dataMarkers = array.array.marker val destBatchBlocksOffset = indicesSize * dataToCopyBlocks val inputBatchBlockOffset = array.shape[actualAxis] * dataToCopyBlocks val destArray = arrayOfNulls(destBatchBlocksOffset * dataBatchSize) - val destMarkersArray = arrayOfNulls(destBatchBlocksOffset * dataBatchSize) - for (dataBatchNum in 0 until dataBatchSize) { val dataBlocksOffset = inputBatchBlockOffset * dataBatchNum @@ -46,12 +43,11 @@ internal suspend fun gatherByBlocksPrimitive(array: PrimitiveNDArray, indices: N for (blockIdx in 0 until dataToCopyBlocks) { destArray[destBlocksOffset + blockIdx] = dataBlocks[dataOffset + blockIdx] - destMarkersArray[destBlocksOffset + blockIdx] = dataMarkers[dataOffset + blockIdx] } destBlocksOffset += dataToCopyBlocks } } - return PrimitiveNDArray(PrimitiveTiledArray(destArray as Array, destMarkersArray as Array), Strides(destShape)) + return PrimitiveNDArray(PrimitiveTiledArray(destArray as Array), Strides(destShape)) }