From 0b2bab7e458f6f82107f18fa6be52e953b67ca6c Mon Sep 17 00:00:00 2001
From: dmitriyb <dvbozhko@gmail.com>
Date: Tue, 17 Sep 2024 14:25:54 +0200
Subject: [PATCH 01/11] JBAI-5829 [core, utils] Added support for Squeeze
 operator version 13, also LRN widen to v1 and Dropout to v12 (there were only
 type changes in specification).

---
 .../io/kinference.core/operators/math/LRN.kt  |   8 ++---
 .../operators/tensor/Dropout.kt               |   8 ++---
 .../operators/tensor/Squeeze.kt               |  30 +++++++++++++++++-
 .../operators/operations/SqueezeTest.kt       |  18 +++++++++--
 .../{ => v1}/test_squeeze/descriptor.txt      |   0
 .../squeeze/{ => v1}/test_squeeze/model.onnx  | Bin
 .../test_squeeze/test_data_set_0/input_0.pb   |   0
 .../test_squeeze/test_data_set_0/output_0.pb  |   0
 .../test_squeeze_negative_axes/descriptor.txt |   0
 .../test_squeeze_negative_axes/model.onnx     |   0
 .../test_data_set_0/input_0.pb                |   0
 .../test_data_set_0/output_0.pb               |   0
 .../squeeze/v13/test_squeeze/descriptor.txt   |   3 ++
 .../squeeze/v13/test_squeeze/model.onnx       |  19 +++++++++++
 .../test_squeeze/test_data_set_0/input_0.pb   |   1 +
 .../test_squeeze/test_data_set_0/input_1.pb   | Bin 0 -> 20 bytes
 .../test_squeeze/test_data_set_0/output_0.pb  |   1 +
 .../test_squeeze_negative_axes/descriptor.txt |   3 ++
 .../v13/test_squeeze_negative_axes/model.onnx |  19 +++++++++++
 .../test_data_set_0/input_0.pb                |   1 +
 .../test_data_set_0/input_1.pb                |   1 +
 .../test_data_set_0/output_0.pb               |   1 +
 22 files changed, 102 insertions(+), 11 deletions(-)
 rename utils/utils-testing/src/commonMain/resources/squeeze/{ => v1}/test_squeeze/descriptor.txt (100%)
 rename utils/utils-testing/src/commonMain/resources/squeeze/{ => v1}/test_squeeze/model.onnx (100%)
 rename utils/utils-testing/src/commonMain/resources/squeeze/{ => v1}/test_squeeze/test_data_set_0/input_0.pb (100%)
 rename utils/utils-testing/src/commonMain/resources/squeeze/{ => v1}/test_squeeze/test_data_set_0/output_0.pb (100%)
 rename utils/utils-testing/src/commonMain/resources/squeeze/{ => v1}/test_squeeze_negative_axes/descriptor.txt (100%)
 rename utils/utils-testing/src/commonMain/resources/squeeze/{ => v1}/test_squeeze_negative_axes/model.onnx (100%)
 rename utils/utils-testing/src/commonMain/resources/squeeze/{ => v1}/test_squeeze_negative_axes/test_data_set_0/input_0.pb (100%)
 rename utils/utils-testing/src/commonMain/resources/squeeze/{ => v1}/test_squeeze_negative_axes/test_data_set_0/output_0.pb (100%)
 create mode 100644 utils/utils-testing/src/commonMain/resources/squeeze/v13/test_squeeze/descriptor.txt
 create mode 100644 utils/utils-testing/src/commonMain/resources/squeeze/v13/test_squeeze/model.onnx
 create mode 100644 utils/utils-testing/src/commonMain/resources/squeeze/v13/test_squeeze/test_data_set_0/input_0.pb
 create mode 100644 utils/utils-testing/src/commonMain/resources/squeeze/v13/test_squeeze/test_data_set_0/input_1.pb
 create mode 100644 utils/utils-testing/src/commonMain/resources/squeeze/v13/test_squeeze/test_data_set_0/output_0.pb
 create mode 100644 utils/utils-testing/src/commonMain/resources/squeeze/v13/test_squeeze_negative_axes/descriptor.txt
 create mode 100644 utils/utils-testing/src/commonMain/resources/squeeze/v13/test_squeeze_negative_axes/model.onnx
 create mode 100644 utils/utils-testing/src/commonMain/resources/squeeze/v13/test_squeeze_negative_axes/test_data_set_0/input_0.pb
 create mode 100644 utils/utils-testing/src/commonMain/resources/squeeze/v13/test_squeeze_negative_axes/test_data_set_0/input_1.pb
 create mode 100644 utils/utils-testing/src/commonMain/resources/squeeze/v13/test_squeeze_negative_axes/test_data_set_0/output_0.pb

diff --git a/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/operators/math/LRN.kt b/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/operators/math/LRN.kt
index 0ff15f088..1bae19370 100644
--- a/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/operators/math/LRN.kt
+++ b/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/operators/math/LRN.kt
@@ -16,17 +16,17 @@ import io.kinference.protobuf.message.TensorProto
 sealed class LRN(name: String, info: OperatorInfo, attributes: Map<String, Attribute<Any>>, inputs: List<String>, outputs: List<String>) :
     Operator<KITensor, KITensor>(name, info, attributes, inputs, outputs) {
     companion object {
-        private val DEFAULT_VERSION = VersionInfo(sinceVersion = 13)  // last version. Other versions: 1.
+        private val DEFAULT_VERSION = VersionInfo(sinceVersion = 1)
 
         operator fun invoke(name: String, version: Int?, attributes: Map<String, Attribute<Any>>, inputs: List<String>, outputs: List<String>) =
             when (version ?: DEFAULT_VERSION.sinceVersion) {
-                in LRN13.VERSION.asRange() -> LRN13(name, attributes, inputs, outputs)
+                in LRN1.VERSION.asRange() -> LRN1(name, attributes, inputs, outputs)
                 else -> error("Unsupported version of LRN operator: $version")
             }
     }
 }
 
-class LRN13(name: String, attributes: Map<String, Attribute<Any>>, inputs: List<String>, outputs: List<String>) :
+class LRN1(name: String, attributes: Map<String, Attribute<Any>>, inputs: List<String>, outputs: List<String>) :
     LRN(name, INFO, attributes, inputs, outputs) {
     companion object {
         private val TYPE_CONSTRAINTS = setOf(
@@ -51,7 +51,7 @@ class LRN13(name: String, attributes: Map<String, Attribute<Any>>, inputs: List<
             IOInfo(0, TYPE_CONSTRAINTS, "Y", optional = false, differentiable = true)
         )
 
-        internal val VERSION = VersionInfo(sinceVersion = 13)
+        internal val VERSION = VersionInfo(sinceVersion = 1)
         private val INFO = OperatorInfo("LRN", ATTRIBUTES_INFO, INPUTS_INFO, OUTPUTS_INFO, VERSION, OperatorInfo.DEFAULT_DOMAIN)
     }
 
diff --git a/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/operators/tensor/Dropout.kt b/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/operators/tensor/Dropout.kt
index 904fb01be..4b3c13ddf 100644
--- a/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/operators/tensor/Dropout.kt
+++ b/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/operators/tensor/Dropout.kt
@@ -14,17 +14,17 @@ import io.kinference.utils.inlines.InlineInt
 sealed class Dropout(name: String, info: OperatorInfo, attributes: Map<String, Attribute<Any>>, inputs: List<String>, outputs: List<String>) :
     Operator<KITensor, KITensor>(name, info, attributes, inputs, outputs) {
     companion object {
-        private val DEFAULT_VERSION = VersionInfo(sinceVersion = 13)  // last version. Other versions: 1, 6, 7, 10, 12.
+        private val DEFAULT_VERSION = VersionInfo(sinceVersion = 12)  // last version. Other versions: 1, 6, 7, 10.
 
         operator fun invoke(name: String, version: Int?, attributes: Map<String, Attribute<Any>>, inputs: List<String>, outputs: List<String>) =
             when (version ?: DEFAULT_VERSION.sinceVersion) {
-                in Dropout13.VERSION.asRange() -> Dropout13(name, attributes, inputs, outputs)
+                in Dropout12.VERSION.asRange() -> Dropout12(name, attributes, inputs, outputs)
                 else -> error("Unsupported version of Dropout operator: $version")
             }
     }
 }
 
-class Dropout13(name: String, attributes: Map<String, Attribute<Any>>, inputs: List<String>, outputs: List<String>) :
+class Dropout12(name: String, attributes: Map<String, Attribute<Any>>, inputs: List<String>, outputs: List<String>) :
     Dropout(name, INFO, attributes, inputs, outputs) {
     companion object {
         private val TYPE_CONSTRAINTS_T = setOf(
@@ -59,7 +59,7 @@ class Dropout13(name: String, attributes: Map<String, Attribute<Any>>, inputs: L
             IOInfo(1, TYPE_CONSTRAINTS_T2, "mask", optional = true, differentiable = false)
         )
 
-        internal val VERSION = VersionInfo(sinceVersion = 13)
+        internal val VERSION = VersionInfo(sinceVersion = 12)
         private val INFO = OperatorInfo("Dropout", ATTRIBUTES_INFO, INPUTS_INFO, OUTPUTS_INFO, VERSION, OperatorInfo.DEFAULT_DOMAIN)
     }
 
diff --git a/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/operators/tensor/Squeeze.kt b/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/operators/tensor/Squeeze.kt
index 38d521038..e329b239f 100644
--- a/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/operators/tensor/Squeeze.kt
+++ b/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/operators/tensor/Squeeze.kt
@@ -5,6 +5,7 @@ import io.kinference.core.data.tensor.KITensor
 import io.kinference.core.data.tensor.asTensor
 import io.kinference.data.ONNXData
 import io.kinference.graph.Contexts
+import io.kinference.ndarray.arrays.LongNDArray
 import io.kinference.operator.*
 import io.kinference.protobuf.message.AttributeProto
 import io.kinference.utils.toIntArray
@@ -15,7 +16,8 @@ sealed class Squeeze(name: String, info: OperatorInfo, attributes: Map<String, A
 
         operator fun invoke(name: String, version: Int?, attributes: Map<String, Attribute<Any>>, inputs: List<String>, outputs: List<String>) = when (version ?: DEFAULT_VERSION.sinceVersion) {
             in SqueezeVer1.VERSION.asRange() -> SqueezeVer1(name, attributes, inputs, outputs)
-            else -> error("Unsupported version of Constant operator: $version")
+            in SqueezeVer13.VERSION.asRange() -> SqueezeVer13(name, attributes, inputs, outputs)
+            else -> error("Unsupported version of Squeeze operator: $version")
         }
     }
 }
@@ -44,3 +46,29 @@ class SqueezeVer1(name: String, attributes: Map<String, Attribute<Any>>, inputs:
         return listOf(inputs.first()!!.data.toMutable().squeeze(*squeezeAxes).asTensor())
     }
 }
+
+class SqueezeVer13(name: String, attributes: Map<String, Attribute<Any>>, inputs: List<String>, outputs: List<String>) : Squeeze(name, INFO, attributes, inputs, outputs) {
+    companion object {
+        private val TYPE_CONSTRAINTS = ALL_DATA_TYPES
+
+        private val ATTRIBUTES_INFO = listOf<AttributeInfo>()
+
+        private val INPUTS_INFO = listOf(
+            IOInfo(0, TYPE_CONSTRAINTS, "data", optional = false),
+            IOInfo(1, INT_DATA_TYPES, "axes", optional = true)
+        )
+
+        private val OUTPUTS_INFO = listOf(IOInfo(0, TYPE_CONSTRAINTS, "squeezed", optional = false))
+
+        internal val VERSION = VersionInfo(sinceVersion = 13, untilVersion = 21)
+        private val INFO = OperatorInfo("Squeeze", ATTRIBUTES_INFO, INPUTS_INFO, OUTPUTS_INFO, VERSION, OperatorInfo.DEFAULT_DOMAIN)
+    }
+
+    override suspend fun <D : ONNXData<*, *>> apply(contexts: Contexts<D>, inputs: List<KITensor?>): List<KITensor?> {
+        val axes = inputs.getOrNull(1)?.data as? LongNDArray?
+        if (axes != null && axes.rank != 1) error("Axes attribute must be an 1D tensor")
+
+        val squeezeAxes = axes?.array?.toArray()?.toIntArray() ?: IntArray(0)
+        return listOf(inputs.first()!!.data.toMutable().squeeze(*squeezeAxes).asTensor())
+    }
+}
diff --git a/inference/inference-core/src/jvmTest/kotlin/io/kinference/operators/operations/SqueezeTest.kt b/inference/inference-core/src/jvmTest/kotlin/io/kinference/operators/operations/SqueezeTest.kt
index b0856866d..f4d9981ea 100644
--- a/inference/inference-core/src/jvmTest/kotlin/io/kinference/operators/operations/SqueezeTest.kt
+++ b/inference/inference-core/src/jvmTest/kotlin/io/kinference/operators/operations/SqueezeTest.kt
@@ -4,8 +4,22 @@ import io.kinference.KITestEngine.KIAccuracyRunner
 import io.kinference.utils.TestRunner
 import kotlin.test.Test
 
-class SqueezeTest {
-    private fun getTargetPath(dirName: String) = "squeeze/$dirName/"
+class SqueezeVer1Test {
+    private fun getTargetPath(dirName: String) = "squeeze/v1/$dirName/"
+
+    @Test
+    fun test_squeeze() = TestRunner.runTest {
+        KIAccuracyRunner.runFromResources(getTargetPath("test_squeeze"))
+    }
+
+    @Test
+    fun test_squeeze_with_negative_axes() = TestRunner.runTest {
+        KIAccuracyRunner.runFromResources(getTargetPath("test_squeeze_negative_axes"))
+    }
+}
+
+class SqueezeVer13Test {
+    private fun getTargetPath(dirName: String) = "squeeze/v13/$dirName/"
 
     @Test
     fun test_squeeze() = TestRunner.runTest {
diff --git a/utils/utils-testing/src/commonMain/resources/squeeze/test_squeeze/descriptor.txt b/utils/utils-testing/src/commonMain/resources/squeeze/v1/test_squeeze/descriptor.txt
similarity index 100%
rename from utils/utils-testing/src/commonMain/resources/squeeze/test_squeeze/descriptor.txt
rename to utils/utils-testing/src/commonMain/resources/squeeze/v1/test_squeeze/descriptor.txt
diff --git a/utils/utils-testing/src/commonMain/resources/squeeze/test_squeeze/model.onnx b/utils/utils-testing/src/commonMain/resources/squeeze/v1/test_squeeze/model.onnx
similarity index 100%
rename from utils/utils-testing/src/commonMain/resources/squeeze/test_squeeze/model.onnx
rename to utils/utils-testing/src/commonMain/resources/squeeze/v1/test_squeeze/model.onnx
diff --git a/utils/utils-testing/src/commonMain/resources/squeeze/test_squeeze/test_data_set_0/input_0.pb b/utils/utils-testing/src/commonMain/resources/squeeze/v1/test_squeeze/test_data_set_0/input_0.pb
similarity index 100%
rename from utils/utils-testing/src/commonMain/resources/squeeze/test_squeeze/test_data_set_0/input_0.pb
rename to utils/utils-testing/src/commonMain/resources/squeeze/v1/test_squeeze/test_data_set_0/input_0.pb
diff --git a/utils/utils-testing/src/commonMain/resources/squeeze/test_squeeze/test_data_set_0/output_0.pb b/utils/utils-testing/src/commonMain/resources/squeeze/v1/test_squeeze/test_data_set_0/output_0.pb
similarity index 100%
rename from utils/utils-testing/src/commonMain/resources/squeeze/test_squeeze/test_data_set_0/output_0.pb
rename to utils/utils-testing/src/commonMain/resources/squeeze/v1/test_squeeze/test_data_set_0/output_0.pb
diff --git a/utils/utils-testing/src/commonMain/resources/squeeze/test_squeeze_negative_axes/descriptor.txt b/utils/utils-testing/src/commonMain/resources/squeeze/v1/test_squeeze_negative_axes/descriptor.txt
similarity index 100%
rename from utils/utils-testing/src/commonMain/resources/squeeze/test_squeeze_negative_axes/descriptor.txt
rename to utils/utils-testing/src/commonMain/resources/squeeze/v1/test_squeeze_negative_axes/descriptor.txt
diff --git a/utils/utils-testing/src/commonMain/resources/squeeze/test_squeeze_negative_axes/model.onnx b/utils/utils-testing/src/commonMain/resources/squeeze/v1/test_squeeze_negative_axes/model.onnx
similarity index 100%
rename from utils/utils-testing/src/commonMain/resources/squeeze/test_squeeze_negative_axes/model.onnx
rename to utils/utils-testing/src/commonMain/resources/squeeze/v1/test_squeeze_negative_axes/model.onnx
diff --git a/utils/utils-testing/src/commonMain/resources/squeeze/test_squeeze_negative_axes/test_data_set_0/input_0.pb b/utils/utils-testing/src/commonMain/resources/squeeze/v1/test_squeeze_negative_axes/test_data_set_0/input_0.pb
similarity index 100%
rename from utils/utils-testing/src/commonMain/resources/squeeze/test_squeeze_negative_axes/test_data_set_0/input_0.pb
rename to utils/utils-testing/src/commonMain/resources/squeeze/v1/test_squeeze_negative_axes/test_data_set_0/input_0.pb
diff --git a/utils/utils-testing/src/commonMain/resources/squeeze/test_squeeze_negative_axes/test_data_set_0/output_0.pb b/utils/utils-testing/src/commonMain/resources/squeeze/v1/test_squeeze_negative_axes/test_data_set_0/output_0.pb
similarity index 100%
rename from utils/utils-testing/src/commonMain/resources/squeeze/test_squeeze_negative_axes/test_data_set_0/output_0.pb
rename to utils/utils-testing/src/commonMain/resources/squeeze/v1/test_squeeze_negative_axes/test_data_set_0/output_0.pb
diff --git a/utils/utils-testing/src/commonMain/resources/squeeze/v13/test_squeeze/descriptor.txt b/utils/utils-testing/src/commonMain/resources/squeeze/v13/test_squeeze/descriptor.txt
new file mode 100644
index 000000000..d0d4ef393
--- /dev/null
+++ b/utils/utils-testing/src/commonMain/resources/squeeze/v13/test_squeeze/descriptor.txt
@@ -0,0 +1,3 @@
+test_data_set_0/input_0.pb
+test_data_set_0/input_1.pb
+test_data_set_0/output_0.pb
diff --git a/utils/utils-testing/src/commonMain/resources/squeeze/v13/test_squeeze/model.onnx b/utils/utils-testing/src/commonMain/resources/squeeze/v13/test_squeeze/model.onnx
new file mode 100644
index 000000000..26798f82e
--- /dev/null
+++ b/utils/utils-testing/src/commonMain/resources/squeeze/v13/test_squeeze/model.onnx
@@ -0,0 +1,19 @@
+backend-test:o
+
+x
+axesy"Squeezetest_squeezeZ
+x
+
+
+
+
+Z
+axes
+
+
+b
+y
+
+
+
+B
\ No newline at end of file
diff --git a/utils/utils-testing/src/commonMain/resources/squeeze/v13/test_squeeze/test_data_set_0/input_0.pb b/utils/utils-testing/src/commonMain/resources/squeeze/v13/test_squeeze/test_data_set_0/input_0.pb
new file mode 100644
index 000000000..09d5a14a4
--- /dev/null
+++ b/utils/utils-testing/src/commonMain/resources/squeeze/v13/test_squeeze/test_data_set_0/input_0.pb
@@ -0,0 +1 @@
+BxJ�x��?h��>��z?�j@$�?�.z��8s?b��hdӽ�9�>(�>�%�?^�B?�0�=B�>]ת>�=�?R�iJ�>�Z�/d#��S'?�K]?��=��C@�(��Hm;= �?�2�?��?��>���>�Ec������!��� >*z�?��?�Oƾmǚ��6��&õ�gڿ��?�x�FKྙ[���	G?4�ο��Y�L=e��> �����k��QN�>.:�=�ݚ>�b"�6���
\ No newline at end of file
diff --git a/utils/utils-testing/src/commonMain/resources/squeeze/v13/test_squeeze/test_data_set_0/input_1.pb b/utils/utils-testing/src/commonMain/resources/squeeze/v13/test_squeeze/test_data_set_0/input_1.pb
new file mode 100644
index 0000000000000000000000000000000000000000..ec9874a7b23bdc41efa2b3dcca6d1c221235b1a3
GIT binary patch
literal 20
Vcmd;J6kvB^NvudM_TpfG0stQQ0%`yN

literal 0
HcmV?d00001

diff --git a/utils/utils-testing/src/commonMain/resources/squeeze/v13/test_squeeze/test_data_set_0/output_0.pb b/utils/utils-testing/src/commonMain/resources/squeeze/v13/test_squeeze/test_data_set_0/output_0.pb
new file mode 100644
index 000000000..ad67ee7f4
--- /dev/null
+++ b/utils/utils-testing/src/commonMain/resources/squeeze/v13/test_squeeze/test_data_set_0/output_0.pb
@@ -0,0 +1 @@
+ByJ�x��?h��>��z?�j@$�?�.z��8s?b��hdӽ�9�>(�>�%�?^�B?�0�=B�>]ת>�=�?R�iJ�>�Z�/d#��S'?�K]?��=��C@�(��Hm;= �?�2�?��?��>���>�Ec������!��� >*z�?��?�Oƾmǚ��6��&õ�gڿ��?�x�FKྙ[���	G?4�ο��Y�L=e��> �����k��QN�>.:�=�ݚ>�b"�6���
\ No newline at end of file
diff --git a/utils/utils-testing/src/commonMain/resources/squeeze/v13/test_squeeze_negative_axes/descriptor.txt b/utils/utils-testing/src/commonMain/resources/squeeze/v13/test_squeeze_negative_axes/descriptor.txt
new file mode 100644
index 000000000..d0d4ef393
--- /dev/null
+++ b/utils/utils-testing/src/commonMain/resources/squeeze/v13/test_squeeze_negative_axes/descriptor.txt
@@ -0,0 +1,3 @@
+test_data_set_0/input_0.pb
+test_data_set_0/input_1.pb
+test_data_set_0/output_0.pb
diff --git a/utils/utils-testing/src/commonMain/resources/squeeze/v13/test_squeeze_negative_axes/model.onnx b/utils/utils-testing/src/commonMain/resources/squeeze/v13/test_squeeze_negative_axes/model.onnx
new file mode 100644
index 000000000..e148d48ca
--- /dev/null
+++ b/utils/utils-testing/src/commonMain/resources/squeeze/v13/test_squeeze_negative_axes/model.onnx
@@ -0,0 +1,19 @@
+backend-test:}
+
+x
+axesy"Squeezetest_squeeze_negative_axesZ
+x
+
+
+
+
+Z
+axes
+
+
+b
+y
+
+
+
+B
\ No newline at end of file
diff --git a/utils/utils-testing/src/commonMain/resources/squeeze/v13/test_squeeze_negative_axes/test_data_set_0/input_0.pb b/utils/utils-testing/src/commonMain/resources/squeeze/v13/test_squeeze_negative_axes/test_data_set_0/input_0.pb
new file mode 100644
index 000000000..55c2bdcf0
--- /dev/null
+++ b/utils/utils-testing/src/commonMain/resources/squeeze/v13/test_squeeze_negative_axes/test_data_set_0/input_0.pb
@@ -0,0 +1 @@
+BxJ<x��?h��>��z?�j@$�?�.z��8s?b��hdӽ�9�>(�>�%�?^�B?�0�=B�>
\ No newline at end of file
diff --git a/utils/utils-testing/src/commonMain/resources/squeeze/v13/test_squeeze_negative_axes/test_data_set_0/input_1.pb b/utils/utils-testing/src/commonMain/resources/squeeze/v13/test_squeeze_negative_axes/test_data_set_0/input_1.pb
new file mode 100644
index 000000000..2f4bbd39a
--- /dev/null
+++ b/utils/utils-testing/src/commonMain/resources/squeeze/v13/test_squeeze_negative_axes/test_data_set_0/input_1.pb
@@ -0,0 +1 @@
+BaxesJ��������
\ No newline at end of file
diff --git a/utils/utils-testing/src/commonMain/resources/squeeze/v13/test_squeeze_negative_axes/test_data_set_0/output_0.pb b/utils/utils-testing/src/commonMain/resources/squeeze/v13/test_squeeze_negative_axes/test_data_set_0/output_0.pb
new file mode 100644
index 000000000..ed4b2d0f6
--- /dev/null
+++ b/utils/utils-testing/src/commonMain/resources/squeeze/v13/test_squeeze_negative_axes/test_data_set_0/output_0.pb
@@ -0,0 +1 @@
+ByJ<x��?h��>��z?�j@$�?�.z��8s?b��hdӽ�9�>(�>�%�?^�B?�0�=B�>
\ No newline at end of file

From 61ff5749bad151d4d1fa3562ac3c16e90e185b82 Mon Sep 17 00:00:00 2001
From: dmitriyb <dvbozhko@gmail.com>
Date: Tue, 17 Sep 2024 14:28:04 +0200
Subject: [PATCH 02/11] JBAI-5829 [examples] Added a new `examples` module with
 sample project for image classification. The classification example
 demonstrates a pipeline for dogs vs. cats recognition using a pre-trained
 CaffeNet model.

---
 examples/build.gradle.kts                     |  32 ++++
 .../examples/classification/Main.kt           | 158 ++++++++++++++++++
 settings.gradle.kts                           |   2 +
 3 files changed, 192 insertions(+)
 create mode 100644 examples/build.gradle.kts
 create mode 100644 examples/src/jvmMain/kotlin/io/kinference/examples/classification/Main.kt

diff --git a/examples/build.gradle.kts b/examples/build.gradle.kts
new file mode 100644
index 000000000..e4da20e20
--- /dev/null
+++ b/examples/build.gradle.kts
@@ -0,0 +1,32 @@
+group = rootProject.group
+version = rootProject.version
+
+kotlin {
+    jvm()
+
+    sourceSets {
+        jvmMain {
+            dependencies {
+                api(project(":inference:inference-api"))
+                api(project(":inference:inference-core"))
+                api(project(":serialization:serializer-protobuf"))
+                api(project(":utils:utils-common"))
+
+                api(project(":ndarray:ndarray-api"))
+                api(project(":ndarray:ndarray-core"))
+
+                api(libs.wire.runtime)
+                implementation("org.jetbrains.kotlinx:kotlin-deeplearning-api:0.5.2")
+                implementation("org.jetbrains.kotlinx:kotlin-deeplearning-dataset:0.5.2")  // Dataset support
+
+                implementation("io.ktor:ktor-client-core:2.3.12")
+                implementation("io.ktor:ktor-client-cio:2.3.12") // JVM Engine
+
+                api("org.slf4j:slf4j-api:2.0.9")
+                api("org.slf4j:slf4j-simple:2.0.9")
+
+                implementation("com.knuddels:jtokkit:1.1.0")
+            }
+        }
+    }
+}
diff --git a/examples/src/jvmMain/kotlin/io/kinference/examples/classification/Main.kt b/examples/src/jvmMain/kotlin/io/kinference/examples/classification/Main.kt
new file mode 100644
index 000000000..4107eca46
--- /dev/null
+++ b/examples/src/jvmMain/kotlin/io/kinference/examples/classification/Main.kt
@@ -0,0 +1,158 @@
+package io.kinference.examples.classification
+
+import io.kinference.core.KIEngine
+import io.kinference.core.data.tensor.KITensor
+import io.kinference.core.data.tensor.asTensor
+import io.kinference.ndarray.arrays.*
+import io.kinference.ndarray.arrays.FloatNDArray.Companion.invoke
+import io.kinference.utils.CommonDataLoader
+import io.kinference.utils.PredictionConfigs
+import io.kinference.utils.inlines.InlineInt
+import io.ktor.client.HttpClient
+import io.ktor.client.plugins.HttpTimeout
+import io.ktor.client.request.prepareRequest
+import io.ktor.client.statement.bodyAsChannel
+import io.ktor.util.cio.writeChannel
+import io.ktor.utils.io.copyAndClose
+import okio.Path.Companion.toPath
+import org.jetbrains.kotlinx.dl.api.preprocessing.pipeline
+import org.jetbrains.kotlinx.dl.dataset.OnFlyImageDataset
+import org.jetbrains.kotlinx.dl.dataset.embedded.dogsCatsSmallDatasetPath
+import org.jetbrains.kotlinx.dl.dataset.generator.FromFolders
+import org.jetbrains.kotlinx.dl.impl.inference.imagerecognition.InputType
+import org.jetbrains.kotlinx.dl.impl.preprocessing.*
+import org.jetbrains.kotlinx.dl.impl.preprocessing.image.*
+import java.awt.image.BufferedImage
+import java.io.File
+import kotlin.collections.mutableMapOf
+
+// Constants for input and output tensor names used in the CaffeNet model
+private const val INPUT_TENSOR_NAME = "data_0"
+private const val OUTPUT_TENSOR_NAME = "prob_1"
+
+// Preprocessing pipeline for input images using KotlinDL
+private val preprocessing = pipeline<BufferedImage>()
+    .resize {
+        outputWidth = 224
+        outputHeight = 224
+        interpolation = InterpolationType.BILINEAR
+    }
+    .convert { colorMode = ColorMode.BGR }
+    .toFloatArray { }
+    .call(InputType.CAFFE.preprocessing())
+
+// Path to the small dataset of dogs vs cats images (100 images)
+private val dogsVsCatsDatasetPath = dogsCatsSmallDatasetPath()
+
+/**
+ * Downloads a file from the specified URL and saves it to the given output path.
+ * If the file already exists at the output path, the download is skipped.
+ *
+ * @param url The URL from which the file will be downloaded.
+ * @param outputPath The path to which the downloaded file will be saved.
+ */
+private suspend fun downloadFile(url: String, outputPath: String) {
+    // Check if the file already exists
+    val file = File(outputPath)
+    if (file.exists()) {
+        println("File already exists at $outputPath. Skipping download.")
+        return // Exit the function if the file exists
+    }
+
+    // Create an instance of HttpClient with custom timeout settings
+    val client = HttpClient {
+        install(HttpTimeout) {
+            requestTimeoutMillis = 600_000 // Set timeout to 10 minutes (600,000 milliseconds)
+        }
+    }
+
+    // Download the file and write to the specified output path
+    client.prepareRequest(url).execute { response ->
+        response.bodyAsChannel().copyAndClose(File(outputPath).writeChannel())
+    }
+
+    client.close()
+}
+
+/**
+ * Creates a Map of input tensors categorized by their respective classes (e.g., "cat" and "dog").
+ *
+ * This function reads images from the dataset, preprocesses them,
+ * transposes the tensors to the required format, and groups them
+ * based on their class label.
+ *
+ * @return A Map where the keys are the class labels (e.g., "cat" and "dog"),
+ * and the values are lists of KITensor objects representing the input tensors
+ * for each class.
+ */
+private suspend fun createInputs(): Map<String, List<KITensor>> {
+    val dataset = OnFlyImageDataset.create(
+        File(dogsVsCatsDatasetPath),
+        FromFolders(mapping = mapOf("cat" to 0, "dog" to 1)),
+        preprocessing
+    ).shuffle()
+
+
+    val tensorShape = intArrayOf(1, 224, 224, 3)        // Original tensor shape is [batch, width, height, channel]
+    val permuteAxis = intArrayOf(0, 3, 1, 2)            // Permutations for shape [batch, channel, width, height]
+    val inputTensors = mutableMapOf<String, MutableList<KITensor>>()
+
+    for (i in 0 until dataset.xSize()) {
+        val inputData = dataset.getX(i)
+        val inputClass = if (dataset.getY(i).toInt() == 0) "cat" else "dog"
+        val floatNDArray = FloatNDArray(tensorShape) { index: InlineInt -> inputData[index.value]}  // Create an NDArray from the image data
+        val inputTensor = floatNDArray.transpose(permuteAxis).asTensor(INPUT_TENSOR_NAME)           // Transpose and create a tensor from the NDArray
+        inputTensors.putIfAbsent(inputClass, mutableListOf())
+        inputTensors[inputClass]!!.add(inputTensor)
+    }
+
+    return inputTensors
+}
+
+/**
+ * Displays the top 5 predictions with their corresponding labels and scores.
+ *
+ * @param predictions The predicted scores in a multidimensional array format.
+ * @param classLabels The list of class labels corresponding to the predictions.
+ * @param originalClass The actual class label of the instance being predicted.
+ */
+private fun displayTopPredictions(predictions: FloatNDArray, classLabels: List<String>, originalClass: String) {
+    val predictionArray = predictions.array.blocks.first()
+    val indexedScores = predictionArray.withIndex().sortedByDescending { it.value }.take(5)
+
+    println("\nOriginal class: $originalClass")
+    println("Top 5 predictions:")
+    for ((index, score) in indexedScores) {
+        val predictedClassLabel = if (index in classLabels.indices) classLabels[index] else "Unknown"
+        println("${predictedClassLabel}: ${"%.2f".format(score * 100)}%")
+    }
+}
+
+suspend fun main() {
+    val resourcesPath = System.getProperty("user.dir") + "/cache/"
+    val modelUrl = "https://github.com/onnx/models/raw/main/validated/vision/classification/caffenet/model/caffenet-12.onnx"
+    val synsetUrl = "https://s3.amazonaws.com/onnx-model-zoo/synset.txt"
+
+    println("Current working directory: $resourcesPath")
+    println("Downloading model from: $modelUrl")
+    downloadFile(modelUrl, "$resourcesPath/model.onnx")
+    println("Downloading synset from: $synsetUrl")
+    downloadFile(synsetUrl, "$resourcesPath/synset.txt")
+
+    val modelBytes = CommonDataLoader.bytes("$resourcesPath/model.onnx".toPath())
+    val classLabels = File("$resourcesPath/synset.txt").readLines()
+
+    println("Loading model...")
+    val model = KIEngine.loadModel(modelBytes, optimize = true, predictionConfig = PredictionConfigs.DefaultAutoAllocator)
+    println("Creating inputs...")
+    val inputTensors = createInputs()
+
+    println("Starting inference...")
+    inputTensors.forEach { dataClass ->
+        dataClass.value.forEach { tensor ->
+            val actualOutputs = model.predict(listOf(tensor))
+            val predictions = actualOutputs[OUTPUT_TENSOR_NAME]?.data as FloatNDArray
+            displayTopPredictions(predictions, classLabels, dataClass.key)
+        }
+    }
+}
diff --git a/settings.gradle.kts b/settings.gradle.kts
index a0fda8249..2b93d5119 100644
--- a/settings.gradle.kts
+++ b/settings.gradle.kts
@@ -28,6 +28,8 @@ include(":adapters:kmath:adapter-kmath-core")
 include(":adapters:kmath:adapter-kmath-ort")
 include(":adapters:kmath:adapter-kmath-ort-gpu")
 
+include(":examples")
+
 
 pluginManagement {
     repositories {

From 5710839e4e4d5ac0fffadb087da8c867d949d68c Mon Sep 17 00:00:00 2001
From: dmitriyb <dvbozhko@gmail.com>
Date: Thu, 26 Sep 2024 11:32:09 +0200
Subject: [PATCH 03/11] JBAI-5829 [examples] Added gpt-2 example.

---
 examples/build.gradle.kts                     |   5 +-
 .../kotlin/io/kinference/examples/Utils.kt    |  41 +++++
 .../examples/classification/Main.kt           |  45 +----
 .../kotlin/io/kinference/examples/lm/Main.kt  | 169 ++++++++++++++++++
 4 files changed, 218 insertions(+), 42 deletions(-)
 create mode 100644 examples/src/jvmMain/kotlin/io/kinference/examples/Utils.kt
 create mode 100644 examples/src/jvmMain/kotlin/io/kinference/examples/lm/Main.kt

diff --git a/examples/build.gradle.kts b/examples/build.gradle.kts
index e4da20e20..69891a917 100644
--- a/examples/build.gradle.kts
+++ b/examples/build.gradle.kts
@@ -9,13 +9,13 @@ kotlin {
             dependencies {
                 api(project(":inference:inference-api"))
                 api(project(":inference:inference-core"))
+                api(project(":inference:inference-ort"))
                 api(project(":serialization:serializer-protobuf"))
                 api(project(":utils:utils-common"))
 
                 api(project(":ndarray:ndarray-api"))
                 api(project(":ndarray:ndarray-core"))
 
-                api(libs.wire.runtime)
                 implementation("org.jetbrains.kotlinx:kotlin-deeplearning-api:0.5.2")
                 implementation("org.jetbrains.kotlinx:kotlin-deeplearning-dataset:0.5.2")  // Dataset support
 
@@ -25,7 +25,8 @@ kotlin {
                 api("org.slf4j:slf4j-api:2.0.9")
                 api("org.slf4j:slf4j-simple:2.0.9")
 
-                implementation("com.knuddels:jtokkit:1.1.0")
+                implementation("ai.djl:api:0.28.0")
+                implementation("ai.djl.huggingface:tokenizers:0.28.0")
             }
         }
     }
diff --git a/examples/src/jvmMain/kotlin/io/kinference/examples/Utils.kt b/examples/src/jvmMain/kotlin/io/kinference/examples/Utils.kt
new file mode 100644
index 000000000..4aede5dcd
--- /dev/null
+++ b/examples/src/jvmMain/kotlin/io/kinference/examples/Utils.kt
@@ -0,0 +1,41 @@
+package io.kinference.examples
+
+import io.ktor.client.HttpClient
+import io.ktor.client.plugins.HttpTimeout
+import io.ktor.client.request.prepareRequest
+import io.ktor.client.statement.bodyAsChannel
+import io.ktor.util.cio.writeChannel
+import io.ktor.utils.io.copyAndClose
+import java.io.File
+
+val resourcesPath = System.getProperty("user.dir") + "/cache/"
+
+/**
+ * Downloads a file from the specified URL and saves it to the given output path.
+ * If the file already exists at the output path, the download is skipped.
+ *
+ * @param url The URL from which the file will be downloaded.
+ * @param outputPath The path to which the downloaded file will be saved.
+ */
+suspend fun downloadFile(url: String, outputPath: String) {
+    // Check if the file already exists
+    val file = File(outputPath)
+    if (file.exists()) {
+        println("File already exists at $outputPath. Skipping download.")
+        return // Exit the function if the file exists
+    }
+
+    // Create an instance of HttpClient with custom timeout settings
+    val client = HttpClient {
+        install(HttpTimeout) {
+            requestTimeoutMillis = 600_000 // Set timeout to 10 minutes (600,000 milliseconds)
+        }
+    }
+
+    // Download the file and write to the specified output path
+    client.prepareRequest(url).execute { response ->
+        response.bodyAsChannel().copyAndClose(File(outputPath).writeChannel())
+    }
+
+    client.close()
+}
diff --git a/examples/src/jvmMain/kotlin/io/kinference/examples/classification/Main.kt b/examples/src/jvmMain/kotlin/io/kinference/examples/classification/Main.kt
index 4107eca46..260638218 100644
--- a/examples/src/jvmMain/kotlin/io/kinference/examples/classification/Main.kt
+++ b/examples/src/jvmMain/kotlin/io/kinference/examples/classification/Main.kt
@@ -3,17 +3,13 @@ package io.kinference.examples.classification
 import io.kinference.core.KIEngine
 import io.kinference.core.data.tensor.KITensor
 import io.kinference.core.data.tensor.asTensor
+import io.kinference.examples.downloadFile
+import io.kinference.examples.resourcesPath
 import io.kinference.ndarray.arrays.*
 import io.kinference.ndarray.arrays.FloatNDArray.Companion.invoke
 import io.kinference.utils.CommonDataLoader
 import io.kinference.utils.PredictionConfigs
 import io.kinference.utils.inlines.InlineInt
-import io.ktor.client.HttpClient
-import io.ktor.client.plugins.HttpTimeout
-import io.ktor.client.request.prepareRequest
-import io.ktor.client.statement.bodyAsChannel
-import io.ktor.util.cio.writeChannel
-import io.ktor.utils.io.copyAndClose
 import okio.Path.Companion.toPath
 import org.jetbrains.kotlinx.dl.api.preprocessing.pipeline
 import org.jetbrains.kotlinx.dl.dataset.OnFlyImageDataset
@@ -44,36 +40,6 @@ private val preprocessing = pipeline<BufferedImage>()
 // Path to the small dataset of dogs vs cats images (100 images)
 private val dogsVsCatsDatasetPath = dogsCatsSmallDatasetPath()
 
-/**
- * Downloads a file from the specified URL and saves it to the given output path.
- * If the file already exists at the output path, the download is skipped.
- *
- * @param url The URL from which the file will be downloaded.
- * @param outputPath The path to which the downloaded file will be saved.
- */
-private suspend fun downloadFile(url: String, outputPath: String) {
-    // Check if the file already exists
-    val file = File(outputPath)
-    if (file.exists()) {
-        println("File already exists at $outputPath. Skipping download.")
-        return // Exit the function if the file exists
-    }
-
-    // Create an instance of HttpClient with custom timeout settings
-    val client = HttpClient {
-        install(HttpTimeout) {
-            requestTimeoutMillis = 600_000 // Set timeout to 10 minutes (600,000 milliseconds)
-        }
-    }
-
-    // Download the file and write to the specified output path
-    client.prepareRequest(url).execute { response ->
-        response.bodyAsChannel().copyAndClose(File(outputPath).writeChannel())
-    }
-
-    client.close()
-}
-
 /**
  * Creates a Map of input tensors categorized by their respective classes (e.g., "cat" and "dog").
  *
@@ -129,17 +95,16 @@ private fun displayTopPredictions(predictions: FloatNDArray, classLabels: List<S
 }
 
 suspend fun main() {
-    val resourcesPath = System.getProperty("user.dir") + "/cache/"
     val modelUrl = "https://github.com/onnx/models/raw/main/validated/vision/classification/caffenet/model/caffenet-12.onnx"
     val synsetUrl = "https://s3.amazonaws.com/onnx-model-zoo/synset.txt"
+    val modelName = "CaffeNet"
 
-    println("Current working directory: $resourcesPath")
     println("Downloading model from: $modelUrl")
-    downloadFile(modelUrl, "$resourcesPath/model.onnx")
+    downloadFile(modelUrl, "$resourcesPath/$modelName.onnx")
     println("Downloading synset from: $synsetUrl")
     downloadFile(synsetUrl, "$resourcesPath/synset.txt")
 
-    val modelBytes = CommonDataLoader.bytes("$resourcesPath/model.onnx".toPath())
+    val modelBytes = CommonDataLoader.bytes("$resourcesPath/$modelName.onnx".toPath())
     val classLabels = File("$resourcesPath/synset.txt").readLines()
 
     println("Loading model...")
diff --git a/examples/src/jvmMain/kotlin/io/kinference/examples/lm/Main.kt b/examples/src/jvmMain/kotlin/io/kinference/examples/lm/Main.kt
new file mode 100644
index 000000000..01de33932
--- /dev/null
+++ b/examples/src/jvmMain/kotlin/io/kinference/examples/lm/Main.kt
@@ -0,0 +1,169 @@
+package io.kinference.examples.lm
+
+import ai.djl.huggingface.tokenizers.HuggingFaceTokenizer
+import ai.onnxruntime.OnnxTensor
+import io.kinference.core.KIEngine
+import io.kinference.core.KIONNXData
+import io.kinference.core.data.tensor.KITensor
+import io.kinference.core.data.tensor.asTensor
+import io.kinference.data.ONNXDataType
+import io.kinference.examples.downloadFile
+import io.kinference.examples.resourcesPath
+import io.kinference.ndarray.arrays.FloatNDArray
+import io.kinference.ndarray.arrays.LongNDArray
+import io.kinference.ndarray.arrays.NDArrayCore
+import io.kinference.ndarray.arrays.NumberNDArrayCore
+import io.kinference.ort.ORTEngine
+import io.kinference.utils.CommonDataLoader
+import io.kinference.utils.PredictionConfigs
+import io.kinference.utils.inlines.InlineInt
+import io.kinference.utils.toIntArray
+import okio.Path.Companion.toPath
+
+// Softmax function
+fun softmax(logits: FloatArray): FloatArray {
+    val maxLogit = logits.maxOrNull() ?: 0.0f
+    val expLogits = logits.map { Math.exp((it - maxLogit).toDouble()).toFloat() }.toFloatArray()
+    val sumExp = expLogits.sum()
+    return expLogits.map { it / sumExp }.toFloatArray()  // Normalize
+}
+
+// Top-K function to get the top K probabilities and their indices
+fun topK(probs: FloatArray, k: Int): Pair<FloatArray, IntArray> {
+    val indexedProbs = probs.mapIndexed { index, prob -> index to prob }
+    val sortedProbs = indexedProbs.sortedByDescending { it.second }.take(k)
+    val topProbs = sortedProbs.map { it.second }.toFloatArray()
+    val topIndices = sortedProbs.map { it.first }.toIntArray()
+    return Pair(topProbs, topIndices)
+}
+
+fun transformToFloatArray2D(original: FloatArray, n: Int): Array<FloatArray> {
+    // Calculate how many sub-arrays (rows) we will have
+    val rowCount = original.size / n
+
+    // Create a new 2D array to store the result
+    val result = Array(rowCount) { FloatArray(n) }
+
+    // Fill the new 2D array with sub-arrays from the original array
+    for (i in 0 until rowCount) {
+        // Copy the next n elements into the current row
+        result[i] = original.sliceArray(i * n until (i + 1) * n)
+    }
+
+    return result
+}
+
+suspend fun mainONNXRuntimeValidation() {
+    val modelBytes = CommonDataLoader.bytes("$resourcesPath/gpt2-lm-head-10.onnx".toPath())
+    val model = ORTEngine.loadModel(modelBytes)
+
+    val inputTestTensor = ORTEngine.loadData("$resourcesPath/test_data_set_0/input_0.pb".toPath(), ONNXDataType.ONNX_TENSOR)
+    val realOutput = model.predict(listOf(inputTestTensor))
+    println(realOutput)
+    val output = realOutput["output1"]!!.data as OnnxTensor
+    val logits = output.value as Array<Array<Array<FloatArray>>>
+    val lastTokenLogits = logits[0][0][7] // shape: [50257]
+    val lastTokenProbs = softmax(lastTokenLogits)
+    val topK = topK(lastTokenProbs, 5)
+    val topKIndices = topK.second
+    println(topKIndices.joinToString(", "))
+}
+
+suspend fun mainKIValidation() {
+    val modelBytes = CommonDataLoader.bytes("$resourcesPath/gpt2-lm-head-10.onnx".toPath())
+    val model = KIEngine.loadModel(modelBytes, optimize = true, predictionConfig = PredictionConfigs.NoAllocator)
+
+    val tokenizer = HuggingFaceTokenizer.newInstance("gpt2")
+
+    val inputTestTensor = KIEngine.loadData("$resourcesPath/test_data_set_0/input_0.pb".toPath(), ONNXDataType.ONNX_TENSOR)
+    val realOutput = model.predict(listOf(inputTestTensor))
+    println(realOutput)
+
+    val farray = ((realOutput["output1"]!! as KITensor).data as FloatNDArray).array.toArray()
+    val farray2d = transformToFloatArray2D(farray, 50257)
+    println(farray2d)
+
+    val slicedReal = (realOutput["output1"]!! as KITensor).data.slice(
+        starts = intArrayOf(0, 0, 8 - 1, 0),
+        ends = intArrayOf(1, 1, 8, 50257),
+        steps = intArrayOf(1, 1, 1, 1)
+    ) as NumberNDArrayCore
+    val fslice = (slicedReal as FloatNDArray).array.toArray()
+    println(fslice)
+    val softmaxReal = slicedReal.softmax(axis = -1)
+    val topKReal = softmaxReal.topK(
+        axis = -1,
+        k = 5,
+        largest = true,
+        sorted = true
+    )
+
+    val tokenIdReal = (topKReal.second as LongNDArray)[intArrayOf(0,0,0,0)].toInt()
+    val decodeReal = tokenizer.decode(longArrayOf(tokenIdReal.toLong()))
+    println(decodeReal)
+}
+
+// Constants for input and output tensor names used in the GPT-2 model
+private const val INPUT_TENSOR_NAME = "input1"
+private const val OUTPUT_TENSOR_NAME = "output1" // We use only logits tensor
+
+suspend fun extractTopToken(output: Map<String, KIONNXData<*>>, tokensSize: Int): Long {
+    val logits = output[OUTPUT_TENSOR_NAME]!! as KITensor
+    val sliced = logits.data.slice(
+        starts = intArrayOf(0, 0, tokensSize - 1, 0),   // First batch, first element in the second dimension, last token, first vocab entry
+        ends = intArrayOf(1, 1, tokensSize, 50257),     // Same batch, same second dimension, one token step, whole vocab (50257)
+        steps = intArrayOf(1, 1, 1, 1)                  // Step of 1 for each dimension
+    ) as NumberNDArrayCore
+    val softmax = sliced.softmax(axis = -1)
+    val topK = softmax.topK(
+        axis = -1,                                      // Apply top-k along the last dimension (vocabulary size)
+        k = 1,                                          // Retrieve the top 1 element
+        largest = true,                                 // We want the largest probabilities (most probable tokens)
+        sorted = false                                  // Sorting is unnecessary since we are only retrieving the top 1
+    )
+    val tokenId = (topK.second as LongNDArray)[intArrayOf(0, 0, 0, 0)]
+
+    return tokenId
+}
+
+suspend fun main() {
+    val modelUrl = "https://github.com/onnx/models/raw/main/validated/text/machine_comprehension/gpt-2/model/gpt2-lm-head-10.onnx"
+    val modelName = "gpt2-lm-head-10"
+
+    println("Downloading model from: $modelUrl")
+    downloadFile(modelUrl, "$resourcesPath/$modelName.onnx")
+
+    val modelBytes = CommonDataLoader.bytes("${resourcesPath}/$modelName.onnx".toPath())
+
+    println("Loading model...")
+    val model = KIEngine.loadModel(modelBytes, optimize = true, predictionConfig = PredictionConfigs.DefaultAutoAllocator)
+
+    val tokenizer = HuggingFaceTokenizer.newInstance("gpt2", mapOf("modelMaxLength" to "1024"))
+    val testString = "Neurogenesis is most active during embryonic development and is responsible for producing " +
+        "all the various types of neurons of the organism, but it continues throughout adult life " +
+        "in a variety of organisms. Once born, neurons do not divide (see mitosis), and many will " +
+        "live the lifespan of the animal, except under extraordinary and usually pathogenic circumstances."
+    val encoded = tokenizer.encode(testString)
+    val tokens = encoded.ids
+    val tokensSize = tokens.size
+
+    val predictionLength = 34
+    val outputTokens = LongArray(predictionLength) { 0 }
+
+    val input = LongNDArray(1, tokensSize) { idx: InlineInt -> tokens[idx.value] }.unsqueeze(0)
+    var currentContext = input.clone()
+
+    print("Here goes the test text for generation:\n$testString")
+
+    for (idx in 0 until predictionLength) {
+        val inputTensor = listOf((currentContext as NDArrayCore).asTensor(INPUT_TENSOR_NAME))
+        val output = model.predict(inputTensor)
+
+        outputTokens[idx] = extractTopToken(output, tokensSize + idx)
+
+        val newTokenArray = LongNDArray(1, 1) { _: InlineInt -> outputTokens[idx] }
+        currentContext = currentContext.concat(listOf(newTokenArray.unsqueeze(0)), axis = -1)
+        print(tokenizer.decode(longArrayOf(outputTokens[idx])))
+    }
+    println("\n\nDone")
+}

From a38c5559403fed97ca8e2991c3f8bda286ac1a06 Mon Sep 17 00:00:00 2001
From: dmitriyb <dvbozhko@gmail.com>
Date: Fri, 27 Sep 2024 13:50:47 +0200
Subject: [PATCH 04/11] JBAI-5829 [examples] Added GPT-2 example using
 ORTEngine for text generation.

---
 .../kotlin/io/kinference/examples/Utils.kt    |  23 +++
 .../io/kinference/examples/lm/KIMain.kt       |  60 +++++++
 .../kotlin/io/kinference/examples/lm/Main.kt  | 169 ------------------
 .../io/kinference/examples/lm/ORTMain.kt      |  74 ++++++++
 4 files changed, 157 insertions(+), 169 deletions(-)
 create mode 100644 examples/src/jvmMain/kotlin/io/kinference/examples/lm/KIMain.kt
 delete mode 100644 examples/src/jvmMain/kotlin/io/kinference/examples/lm/Main.kt
 create mode 100644 examples/src/jvmMain/kotlin/io/kinference/examples/lm/ORTMain.kt

diff --git a/examples/src/jvmMain/kotlin/io/kinference/examples/Utils.kt b/examples/src/jvmMain/kotlin/io/kinference/examples/Utils.kt
index 4aede5dcd..3b3acdc30 100644
--- a/examples/src/jvmMain/kotlin/io/kinference/examples/Utils.kt
+++ b/examples/src/jvmMain/kotlin/io/kinference/examples/Utils.kt
@@ -1,5 +1,9 @@
 package io.kinference.examples
 
+import io.kinference.core.KIONNXData
+import io.kinference.core.data.tensor.KITensor
+import io.kinference.ndarray.arrays.LongNDArray
+import io.kinference.ndarray.arrays.NumberNDArrayCore
 import io.ktor.client.HttpClient
 import io.ktor.client.plugins.HttpTimeout
 import io.ktor.client.request.prepareRequest
@@ -39,3 +43,22 @@ suspend fun downloadFile(url: String, outputPath: String) {
 
     client.close()
 }
+
+suspend fun extractTopToken(output: Map<String, KIONNXData<*>>, tokensSize: Int, outputName: String): Long {
+    val logits = output[outputName]!! as KITensor
+    val sliced = logits.data.slice(
+        starts = intArrayOf(0, 0, tokensSize - 1, 0),   // First batch, first element in the second dimension, last token, first vocab entry
+        ends = intArrayOf(1, 1, tokensSize, 50257),     // Same batch, same second dimension, one token step, whole vocab (50257)
+        steps = intArrayOf(1, 1, 1, 1)                  // Step of 1 for each dimension
+    ) as NumberNDArrayCore
+    val softmax = sliced.softmax(axis = -1)
+    val topK = softmax.topK(
+        axis = -1,                                      // Apply top-k along the last dimension (vocabulary size)
+        k = 1,                                          // Retrieve the top 1 element
+        largest = true,                                 // We want the largest probabilities (most probable tokens)
+        sorted = false                                  // Sorting is unnecessary since we are only retrieving the top 1
+    )
+    val tokenId = (topK.second as LongNDArray)[intArrayOf(0, 0, 0, 0)]
+
+    return tokenId
+}
diff --git a/examples/src/jvmMain/kotlin/io/kinference/examples/lm/KIMain.kt b/examples/src/jvmMain/kotlin/io/kinference/examples/lm/KIMain.kt
new file mode 100644
index 000000000..6a1bc484c
--- /dev/null
+++ b/examples/src/jvmMain/kotlin/io/kinference/examples/lm/KIMain.kt
@@ -0,0 +1,60 @@
+package io.kinference.examples.lm
+
+import ai.djl.huggingface.tokenizers.HuggingFaceTokenizer
+import io.kinference.core.KIEngine
+import io.kinference.core.data.tensor.asTensor
+import io.kinference.examples.downloadFile
+import io.kinference.examples.extractTopToken
+import io.kinference.examples.resourcesPath
+import io.kinference.ndarray.arrays.LongNDArray
+import io.kinference.ndarray.arrays.NDArrayCore
+import io.kinference.utils.CommonDataLoader
+import io.kinference.utils.PredictionConfigs
+import io.kinference.utils.inlines.InlineInt
+import okio.Path.Companion.toPath
+
+// Constants for input and output tensor names used in the GPT-2 model
+private const val INPUT_TENSOR_NAME = "input1"
+private const val OUTPUT_TENSOR_NAME = "output1" // We use only logits tensor
+
+suspend fun main() {
+    val modelUrl = "https://github.com/onnx/models/raw/main/validated/text/machine_comprehension/gpt-2/model/gpt2-lm-head-10.onnx"
+    val modelName = "gpt2-lm-head-10"
+
+    println("Downloading model from: $modelUrl")
+    downloadFile(modelUrl, "$resourcesPath/$modelName.onnx")
+
+    val modelBytes = CommonDataLoader.bytes("${resourcesPath}/$modelName.onnx".toPath())
+
+    println("Loading model...")
+    val model = KIEngine.loadModel(modelBytes, optimize = true, predictionConfig = PredictionConfigs.DefaultAutoAllocator)
+
+    val tokenizer = HuggingFaceTokenizer.newInstance("gpt2", mapOf("modelMaxLength" to "1024"))
+    val testString = "Neurogenesis is most active during embryonic development and is responsible for producing " +
+        "all the various types of neurons of the organism, but it continues throughout adult life " +
+        "in a variety of organisms. Once born, neurons do not divide (see mitosis), and many will " +
+        "live the lifespan of the animal, except under extraordinary and usually pathogenic circumstances."
+    val encoded = tokenizer.encode(testString)
+    val tokens = encoded.ids
+    val tokensSize = tokens.size
+
+    val predictionLength = 34
+    val outputTokens = LongArray(predictionLength) { 0 }
+
+    val input = LongNDArray(1, tokensSize) { idx: InlineInt -> tokens[idx.value] }.unsqueeze(0)
+    var currentContext = input.clone()
+
+    print("Here goes the test text for generation:\n$testString")
+
+    for (idx in 0 until predictionLength) {
+        val inputTensor = listOf((currentContext as NDArrayCore).asTensor(INPUT_TENSOR_NAME))
+        val output = model.predict(inputTensor)
+
+        outputTokens[idx] = extractTopToken(output, tokensSize + idx, OUTPUT_TENSOR_NAME)
+
+        val newTokenArray = LongNDArray(1, 1) { _: InlineInt -> outputTokens[idx] }
+        currentContext = currentContext.concat(listOf(newTokenArray.unsqueeze(0)), axis = -1)
+        print(tokenizer.decode(longArrayOf(outputTokens[idx])))
+    }
+    println("\n\nDone")
+}
diff --git a/examples/src/jvmMain/kotlin/io/kinference/examples/lm/Main.kt b/examples/src/jvmMain/kotlin/io/kinference/examples/lm/Main.kt
deleted file mode 100644
index 01de33932..000000000
--- a/examples/src/jvmMain/kotlin/io/kinference/examples/lm/Main.kt
+++ /dev/null
@@ -1,169 +0,0 @@
-package io.kinference.examples.lm
-
-import ai.djl.huggingface.tokenizers.HuggingFaceTokenizer
-import ai.onnxruntime.OnnxTensor
-import io.kinference.core.KIEngine
-import io.kinference.core.KIONNXData
-import io.kinference.core.data.tensor.KITensor
-import io.kinference.core.data.tensor.asTensor
-import io.kinference.data.ONNXDataType
-import io.kinference.examples.downloadFile
-import io.kinference.examples.resourcesPath
-import io.kinference.ndarray.arrays.FloatNDArray
-import io.kinference.ndarray.arrays.LongNDArray
-import io.kinference.ndarray.arrays.NDArrayCore
-import io.kinference.ndarray.arrays.NumberNDArrayCore
-import io.kinference.ort.ORTEngine
-import io.kinference.utils.CommonDataLoader
-import io.kinference.utils.PredictionConfigs
-import io.kinference.utils.inlines.InlineInt
-import io.kinference.utils.toIntArray
-import okio.Path.Companion.toPath
-
-// Softmax function
-fun softmax(logits: FloatArray): FloatArray {
-    val maxLogit = logits.maxOrNull() ?: 0.0f
-    val expLogits = logits.map { Math.exp((it - maxLogit).toDouble()).toFloat() }.toFloatArray()
-    val sumExp = expLogits.sum()
-    return expLogits.map { it / sumExp }.toFloatArray()  // Normalize
-}
-
-// Top-K function to get the top K probabilities and their indices
-fun topK(probs: FloatArray, k: Int): Pair<FloatArray, IntArray> {
-    val indexedProbs = probs.mapIndexed { index, prob -> index to prob }
-    val sortedProbs = indexedProbs.sortedByDescending { it.second }.take(k)
-    val topProbs = sortedProbs.map { it.second }.toFloatArray()
-    val topIndices = sortedProbs.map { it.first }.toIntArray()
-    return Pair(topProbs, topIndices)
-}
-
-fun transformToFloatArray2D(original: FloatArray, n: Int): Array<FloatArray> {
-    // Calculate how many sub-arrays (rows) we will have
-    val rowCount = original.size / n
-
-    // Create a new 2D array to store the result
-    val result = Array(rowCount) { FloatArray(n) }
-
-    // Fill the new 2D array with sub-arrays from the original array
-    for (i in 0 until rowCount) {
-        // Copy the next n elements into the current row
-        result[i] = original.sliceArray(i * n until (i + 1) * n)
-    }
-
-    return result
-}
-
-suspend fun mainONNXRuntimeValidation() {
-    val modelBytes = CommonDataLoader.bytes("$resourcesPath/gpt2-lm-head-10.onnx".toPath())
-    val model = ORTEngine.loadModel(modelBytes)
-
-    val inputTestTensor = ORTEngine.loadData("$resourcesPath/test_data_set_0/input_0.pb".toPath(), ONNXDataType.ONNX_TENSOR)
-    val realOutput = model.predict(listOf(inputTestTensor))
-    println(realOutput)
-    val output = realOutput["output1"]!!.data as OnnxTensor
-    val logits = output.value as Array<Array<Array<FloatArray>>>
-    val lastTokenLogits = logits[0][0][7] // shape: [50257]
-    val lastTokenProbs = softmax(lastTokenLogits)
-    val topK = topK(lastTokenProbs, 5)
-    val topKIndices = topK.second
-    println(topKIndices.joinToString(", "))
-}
-
-suspend fun mainKIValidation() {
-    val modelBytes = CommonDataLoader.bytes("$resourcesPath/gpt2-lm-head-10.onnx".toPath())
-    val model = KIEngine.loadModel(modelBytes, optimize = true, predictionConfig = PredictionConfigs.NoAllocator)
-
-    val tokenizer = HuggingFaceTokenizer.newInstance("gpt2")
-
-    val inputTestTensor = KIEngine.loadData("$resourcesPath/test_data_set_0/input_0.pb".toPath(), ONNXDataType.ONNX_TENSOR)
-    val realOutput = model.predict(listOf(inputTestTensor))
-    println(realOutput)
-
-    val farray = ((realOutput["output1"]!! as KITensor).data as FloatNDArray).array.toArray()
-    val farray2d = transformToFloatArray2D(farray, 50257)
-    println(farray2d)
-
-    val slicedReal = (realOutput["output1"]!! as KITensor).data.slice(
-        starts = intArrayOf(0, 0, 8 - 1, 0),
-        ends = intArrayOf(1, 1, 8, 50257),
-        steps = intArrayOf(1, 1, 1, 1)
-    ) as NumberNDArrayCore
-    val fslice = (slicedReal as FloatNDArray).array.toArray()
-    println(fslice)
-    val softmaxReal = slicedReal.softmax(axis = -1)
-    val topKReal = softmaxReal.topK(
-        axis = -1,
-        k = 5,
-        largest = true,
-        sorted = true
-    )
-
-    val tokenIdReal = (topKReal.second as LongNDArray)[intArrayOf(0,0,0,0)].toInt()
-    val decodeReal = tokenizer.decode(longArrayOf(tokenIdReal.toLong()))
-    println(decodeReal)
-}
-
-// Constants for input and output tensor names used in the GPT-2 model
-private const val INPUT_TENSOR_NAME = "input1"
-private const val OUTPUT_TENSOR_NAME = "output1" // We use only logits tensor
-
-suspend fun extractTopToken(output: Map<String, KIONNXData<*>>, tokensSize: Int): Long {
-    val logits = output[OUTPUT_TENSOR_NAME]!! as KITensor
-    val sliced = logits.data.slice(
-        starts = intArrayOf(0, 0, tokensSize - 1, 0),   // First batch, first element in the second dimension, last token, first vocab entry
-        ends = intArrayOf(1, 1, tokensSize, 50257),     // Same batch, same second dimension, one token step, whole vocab (50257)
-        steps = intArrayOf(1, 1, 1, 1)                  // Step of 1 for each dimension
-    ) as NumberNDArrayCore
-    val softmax = sliced.softmax(axis = -1)
-    val topK = softmax.topK(
-        axis = -1,                                      // Apply top-k along the last dimension (vocabulary size)
-        k = 1,                                          // Retrieve the top 1 element
-        largest = true,                                 // We want the largest probabilities (most probable tokens)
-        sorted = false                                  // Sorting is unnecessary since we are only retrieving the top 1
-    )
-    val tokenId = (topK.second as LongNDArray)[intArrayOf(0, 0, 0, 0)]
-
-    return tokenId
-}
-
-suspend fun main() {
-    val modelUrl = "https://github.com/onnx/models/raw/main/validated/text/machine_comprehension/gpt-2/model/gpt2-lm-head-10.onnx"
-    val modelName = "gpt2-lm-head-10"
-
-    println("Downloading model from: $modelUrl")
-    downloadFile(modelUrl, "$resourcesPath/$modelName.onnx")
-
-    val modelBytes = CommonDataLoader.bytes("${resourcesPath}/$modelName.onnx".toPath())
-
-    println("Loading model...")
-    val model = KIEngine.loadModel(modelBytes, optimize = true, predictionConfig = PredictionConfigs.DefaultAutoAllocator)
-
-    val tokenizer = HuggingFaceTokenizer.newInstance("gpt2", mapOf("modelMaxLength" to "1024"))
-    val testString = "Neurogenesis is most active during embryonic development and is responsible for producing " +
-        "all the various types of neurons of the organism, but it continues throughout adult life " +
-        "in a variety of organisms. Once born, neurons do not divide (see mitosis), and many will " +
-        "live the lifespan of the animal, except under extraordinary and usually pathogenic circumstances."
-    val encoded = tokenizer.encode(testString)
-    val tokens = encoded.ids
-    val tokensSize = tokens.size
-
-    val predictionLength = 34
-    val outputTokens = LongArray(predictionLength) { 0 }
-
-    val input = LongNDArray(1, tokensSize) { idx: InlineInt -> tokens[idx.value] }.unsqueeze(0)
-    var currentContext = input.clone()
-
-    print("Here goes the test text for generation:\n$testString")
-
-    for (idx in 0 until predictionLength) {
-        val inputTensor = listOf((currentContext as NDArrayCore).asTensor(INPUT_TENSOR_NAME))
-        val output = model.predict(inputTensor)
-
-        outputTokens[idx] = extractTopToken(output, tokensSize + idx)
-
-        val newTokenArray = LongNDArray(1, 1) { _: InlineInt -> outputTokens[idx] }
-        currentContext = currentContext.concat(listOf(newTokenArray.unsqueeze(0)), axis = -1)
-        print(tokenizer.decode(longArrayOf(outputTokens[idx])))
-    }
-    println("\n\nDone")
-}
diff --git a/examples/src/jvmMain/kotlin/io/kinference/examples/lm/ORTMain.kt b/examples/src/jvmMain/kotlin/io/kinference/examples/lm/ORTMain.kt
new file mode 100644
index 000000000..f1956fb91
--- /dev/null
+++ b/examples/src/jvmMain/kotlin/io/kinference/examples/lm/ORTMain.kt
@@ -0,0 +1,74 @@
+package io.kinference.examples.lm
+
+import ai.djl.huggingface.tokenizers.HuggingFaceTokenizer
+import io.kinference.core.data.tensor.KITensor
+import io.kinference.core.data.tensor.asTensor
+import io.kinference.examples.downloadFile
+import io.kinference.examples.extractTopToken
+import io.kinference.examples.resourcesPath
+import io.kinference.ndarray.arrays.FloatNDArray
+import io.kinference.ndarray.arrays.FloatNDArray.Companion.invoke
+import io.kinference.ort.ORTData
+import io.kinference.ort.ORTEngine
+import io.kinference.ort.data.tensor.ORTTensor
+import io.kinference.utils.CommonDataLoader
+import io.kinference.utils.inlines.InlineInt
+import io.kinference.utils.toIntArray
+import okio.Path.Companion.toPath
+
+// Constants for input and output tensor names used in the GPT-2 model
+private const val INPUT_TENSOR_NAME = "input1"
+private const val OUTPUT_TENSOR_NAME = "output1" // We use only logits tensor
+
+suspend fun main() {
+    val modelUrl = "https://github.com/onnx/models/raw/main/validated/text/machine_comprehension/gpt-2/model/gpt2-lm-head-10.onnx"
+    val modelName = "gpt2-lm-head-10"
+
+    println("Downloading model from: $modelUrl")
+    downloadFile(modelUrl, "$resourcesPath/$modelName.onnx")
+
+    val modelBytes = CommonDataLoader.bytes("${resourcesPath}/$modelName.onnx".toPath())
+
+    println("Loading model...")
+    val model = ORTEngine.loadModel(modelBytes)
+
+    val tokenizer = HuggingFaceTokenizer.newInstance("gpt2", mapOf("modelMaxLength" to "1024"))
+    val testString = "Neurogenesis is most active during embryonic development and is responsible for producing " +
+        "all the various types of neurons of the organism, but it continues throughout adult life " +
+        "in a variety of organisms. Once born, neurons do not divide (see mitosis), and many will " +
+        "live the lifespan of the animal, except under extraordinary and usually pathogenic circumstances."
+    val encoded = tokenizer.encode(testString)
+    val tokens = encoded.ids
+    val tokensSize = tokens.size
+
+    val predictionLength = 34
+    val outputTokens = LongArray(predictionLength) { 0 }
+
+    val input = ORTTensor(tokens, longArrayOf(1, 1, tokensSize.toLong()))
+    var currentContext = input.clone(INPUT_TENSOR_NAME)
+
+    print("Here goes the test text for generation:\n$testString")
+
+    for (idx in 0 until predictionLength) {
+        val inputTensor = listOf(currentContext)
+        val output = model.predict(inputTensor)
+
+        outputTokens[idx] = extractTopToken(convertToKITensorMap(output), tokensSize + idx, OUTPUT_TENSOR_NAME)
+
+        val newTokenArray = tokens + outputTokens.slice(IntRange(0, idx))
+        currentContext = ORTTensor(newTokenArray, longArrayOf(1, 1, tokensSize + idx + 1L), INPUT_TENSOR_NAME)
+        print(tokenizer.decode(longArrayOf(outputTokens[idx])))
+    }
+    println("\n\nDone")
+}
+
+private suspend fun convertToKITensorMap(outputs: Map<String, ORTData<*>>): Map<String, KITensor> {
+    return outputs.map { (key, value) ->
+        val ortTensor = value as ORTTensor
+        val data = ortTensor.toFloatArray()
+        val shape = ortTensor.shape.toIntArray()
+        val ndArray = FloatNDArray(shape) { idx: InlineInt -> data[idx.value] }
+        val tensor = ndArray.asTensor(key)
+        return@map key to tensor
+    }.toMap()
+}

From 8fc7f7e8af7d06b6c0220864e9b483af9b767ebf Mon Sep 17 00:00:00 2001
From: dmitriyb <dvbozhko@gmail.com>
Date: Fri, 27 Sep 2024 15:44:01 +0200
Subject: [PATCH 05/11] JBAI-5829 [examples] Added support for GemmVer9 and
 refactor existing Gemm logic.

---
 .../io/kinference.core/operators/math/Gemm.kt | 121 ++++++++++++------
 1 file changed, 84 insertions(+), 37 deletions(-)

diff --git a/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/operators/math/Gemm.kt b/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/operators/math/Gemm.kt
index ed2646f2c..5a9c7558f 100644
--- a/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/operators/math/Gemm.kt
+++ b/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/operators/math/Gemm.kt
@@ -15,24 +15,65 @@ import io.kinference.protobuf.message.AttributeProto
 import io.kinference.protobuf.message.TensorProto
 
 sealed class Gemm(name: String, info: OperatorInfo, attributes: Map<String, Attribute<Any>>, inputs: List<String>, outputs: List<String>) : Operator<KITensor, KITensor>(name, info, attributes, inputs, outputs) {
+    private val alpha: Double by attribute { it: Number -> it.toDouble() }
+    private val beta: Double by attribute { it: Number -> it.toDouble() }
+
+    private val transA: Boolean by attribute { it: Number -> it.toInt() != 0 }
+    private val transB: Boolean by attribute { it: Number -> it.toInt() != 0 }
+
     companion object {
         private val DEFAULT_VERSION = VersionInfo(sinceVersion = 11)
 
         operator fun invoke(name: String, version: Int?, attributes: Map<String, Attribute<Any>>, inputs: List<String>, outputs: List<String>) = when (version ?: DEFAULT_VERSION.sinceVersion) {
+            in GemmVer9.VERSION.asRange() -> GemmVer9(name, attributes, inputs, outputs)
             in GemmVer11.VERSION.asRange() -> GemmVer11(name, attributes, inputs, outputs)
             else -> error("Unsupported version of Gemm operator: $version")
         }
     }
-}
 
+    protected suspend fun getDest(array: NDArrayCore, type: DataType, targetShape: IntArray): MutableNDArrayCore {
+        if (array.shape.contentEquals(targetShape)) return array.toMutable()
 
-class GemmVer11(name: String, attributes: Map<String, Attribute<Any>>, inputs: List<String>, outputs: List<String>) : Gemm(name, INFO, attributes, inputs, outputs) {
-    private val alpha: Double by attribute { it: Number -> it.toDouble() }
-    private val beta: Double by attribute { it: Number -> it.toDouble() }
+        val dstArray = allocateNDArray(type, Strides(targetShape)) as MutableNumberNDArrayCore
+        val unsqueezedShape = unsqueezeFirst(array.shape, targetShape.size)
 
-    private val transA: Boolean by attribute { it: Number -> it.toInt() != 0 }
-    private val transB: Boolean by attribute { it: Number -> it.toInt() != 0 }
+        if (targetShape[1] != unsqueezedShape[1] && unsqueezedShape[1] == 1) {
+            val targetBlockSize = targetShape[1]
+            for (i in 0 until unsqueezedShape[0]) {
+                val dstOffsetBase = i * targetBlockSize
+                dstArray.fillByArrayValue(array, i, dstOffsetBase, dstOffsetBase + targetBlockSize)
+            }
+        } else {
+            dstArray.copyFrom(0, array)
+        }
+
+        for (i in 1 until targetShape[0]) dstArray.copyFrom(i * targetShape[1], dstArray, 0, targetShape[1])
+        return dstArray
+    }
+
+    protected suspend fun <D : ONNXData<*, *>> apply(inputs: List<KITensor?>, optionalBias: Boolean): List<KITensor?> {
+        val a = inputs[0]!!.data as NumberNDArrayCore
+        val b = inputs[1]!!.data as NumberNDArrayCore
 
+        val m = if (!transA) a.shape[0] else a.shape[1]
+        val n = if (!transB) b.shape[1] else b.shape[0]
+        val k = if (!transA) a.shape[1] else a.shape[0]
+
+        val targetShape = intArrayOf(m, n)
+        val bias = if (optionalBias) {
+            inputs.getOrNull(2)?.data ?: allocateNDArray(a.type, targetShape)
+        } else {
+            inputs[2]!!.data
+        } as NumberNDArrayCore
+
+        val c = getDest(bias, a.type, intArrayOf(m, n))
+        gemm(m, n, k, alpha, a, b, beta, c, transposeA = transA, transposeB = transB)
+
+        return listOf(c.asTensor())
+    }
+}
+
+class GemmVer9(name: String, attributes: Map<String, Attribute<Any>>, inputs: List<String>, outputs: List<String>) : Gemm(name, INFO, attributes, inputs, outputs) {
     companion object {
         private val TYPE_CONSTRAINTS = setOf(
             TensorProto.DataType.FLOAT16,
@@ -55,47 +96,53 @@ class GemmVer11(name: String, attributes: Map<String, Attribute<Any>>, inputs: L
         private val INPUTS_INFO = listOf(
             IOInfo(0, TYPE_CONSTRAINTS, "A", optional = false),
             IOInfo(1, TYPE_CONSTRAINTS, "B", optional = false),
-            IOInfo(2, TYPE_CONSTRAINTS, "C", optional = true)
+            IOInfo(2, TYPE_CONSTRAINTS, "C", optional = false)
         )
 
         private val OUTPUTS_INFO = listOf(IOInfo(0, TYPE_CONSTRAINTS, "Y", optional = false))
 
-        internal val VERSION = VersionInfo(sinceVersion = 11)
+        internal val VERSION = VersionInfo(sinceVersion = 9, untilVersion = 11)
         private val INFO = OperatorInfo("Gemm", ATTRIBUTES_INFO, INPUTS_INFO, OUTPUTS_INFO, VERSION, OperatorInfo.DEFAULT_DOMAIN)
-
-        private suspend fun getDest(array: NDArrayCore?, type: DataType, targetShape: IntArray): MutableNDArrayCore {
-            if (array == null) return allocateNDArray(type, Strides(targetShape))
-            if (array.shape.contentEquals(targetShape)) return array.toMutable()
-
-            val dstArray = allocateNDArray(type, Strides(targetShape)) as MutableNumberNDArrayCore
-            val unsqueezedShape = unsqueezeFirst(array.shape, targetShape.size)
-
-            if (targetShape[1] != unsqueezedShape[1] && unsqueezedShape[1] == 1) {
-                val targetBlockSize = targetShape[1]
-                for (i in 0 until unsqueezedShape[0]) {
-                    val dstOffsetBase = i * targetBlockSize
-                    dstArray.fillByArrayValue(array, i, dstOffsetBase, dstOffsetBase + targetBlockSize)
-                }
-            } else {
-                dstArray.copyFrom(0, array)
-            }
-
-            for (i in 1 until targetShape[0]) dstArray.copyFrom(i * targetShape[1], dstArray, 0, targetShape[1])
-            return dstArray
-        }
     }
 
     override suspend fun <D : ONNXData<*, *>> apply(contexts: Contexts<D>, inputs: List<KITensor?>): List<KITensor?> {
-        val a = inputs[0]!!.data as NumberNDArrayCore
-        val b = inputs[1]!!.data as NumberNDArrayCore
+        return apply<ONNXData<*, *>>(inputs, INPUTS_INFO[2].optional)
+    }
+}
 
-        val m = if (!transA) a.shape[0] else a.shape[1]
-        val n = if (!transB) b.shape[1] else b.shape[0]
-        val k = if (!transA) a.shape[1] else a.shape[0]
+class GemmVer11(name: String, attributes: Map<String, Attribute<Any>>, inputs: List<String>, outputs: List<String>) : Gemm(name, INFO, attributes, inputs, outputs) {
+    companion object {
+        private val TYPE_CONSTRAINTS = setOf(
+            TensorProto.DataType.FLOAT16,
+            TensorProto.DataType.FLOAT,
+            TensorProto.DataType.DOUBLE,
+            TensorProto.DataType.UINT32,
+            TensorProto.DataType.UINT64,
+            TensorProto.DataType.INT32,
+            TensorProto.DataType.INT64,
+            TensorProto.DataType.BFLOAT16
+        )
 
-        val c = getDest(inputs.getOrNull(2)?.data, a.type, intArrayOf(m, n))
-        gemm(m, n, k, alpha, a, b, beta, c, transposeA = transA, transposeB = transB)
+        private val ATTRIBUTES_INFO = listOf(
+            AttributeInfo("alpha", setOf(AttributeProto.AttributeType.FLOAT), false, 1.0),
+            AttributeInfo("beta", setOf(AttributeProto.AttributeType.FLOAT), false, 1.0),
+            AttributeInfo("transA", setOf(AttributeProto.AttributeType.INT), false, 0),
+            AttributeInfo("transB", setOf(AttributeProto.AttributeType.INT), false, 0)
+        )
 
-        return listOf(c.asTensor())
+        private val INPUTS_INFO = listOf(
+            IOInfo(0, TYPE_CONSTRAINTS, "A", optional = false),
+            IOInfo(1, TYPE_CONSTRAINTS, "B", optional = false),
+            IOInfo(2, TYPE_CONSTRAINTS, "C", optional = true)
+        )
+
+        private val OUTPUTS_INFO = listOf(IOInfo(0, TYPE_CONSTRAINTS, "Y", optional = false))
+
+        internal val VERSION = VersionInfo(sinceVersion = 11)
+        private val INFO = OperatorInfo("Gemm", ATTRIBUTES_INFO, INPUTS_INFO, OUTPUTS_INFO, VERSION, OperatorInfo.DEFAULT_DOMAIN)
+    }
+
+    override suspend fun <D : ONNXData<*, *>> apply(contexts: Contexts<D>, inputs: List<KITensor?>): List<KITensor?> {
+        return apply<ONNXData<*, *>>(inputs, INPUTS_INFO[2].optional)
     }
 }

From 905d24cb1ebbf5dcb0d468bfd68651af16d62b1f Mon Sep 17 00:00:00 2001
From: dmitriyb <dvbozhko@gmail.com>
Date: Fri, 27 Sep 2024 17:58:05 +0200
Subject: [PATCH 06/11] JBAI-5829 [examples] Added ORT-based classification
 example for image recognition.

---
 .../{Main.kt => KIClassificationMain.kt}      |   4 +-
 .../classification/ORTClassificationMain.kt   | 122 ++++++++++++++++++
 .../examples/lm/{KIMain.kt => KIGPT2Main.kt}  |   0
 .../lm/{ORTMain.kt => ORTGPT2Main.kt}         |   0
 4 files changed, 124 insertions(+), 2 deletions(-)
 rename examples/src/jvmMain/kotlin/io/kinference/examples/classification/{Main.kt => KIClassificationMain.kt} (97%)
 create mode 100644 examples/src/jvmMain/kotlin/io/kinference/examples/classification/ORTClassificationMain.kt
 rename examples/src/jvmMain/kotlin/io/kinference/examples/lm/{KIMain.kt => KIGPT2Main.kt} (100%)
 rename examples/src/jvmMain/kotlin/io/kinference/examples/lm/{ORTMain.kt => ORTGPT2Main.kt} (100%)

diff --git a/examples/src/jvmMain/kotlin/io/kinference/examples/classification/Main.kt b/examples/src/jvmMain/kotlin/io/kinference/examples/classification/KIClassificationMain.kt
similarity index 97%
rename from examples/src/jvmMain/kotlin/io/kinference/examples/classification/Main.kt
rename to examples/src/jvmMain/kotlin/io/kinference/examples/classification/KIClassificationMain.kt
index 260638218..af40e1bd9 100644
--- a/examples/src/jvmMain/kotlin/io/kinference/examples/classification/Main.kt
+++ b/examples/src/jvmMain/kotlin/io/kinference/examples/classification/KIClassificationMain.kt
@@ -66,7 +66,7 @@ private suspend fun createInputs(): Map<String, List<KITensor>> {
     for (i in 0 until dataset.xSize()) {
         val inputData = dataset.getX(i)
         val inputClass = if (dataset.getY(i).toInt() == 0) "cat" else "dog"
-        val floatNDArray = FloatNDArray(tensorShape) { index: InlineInt -> inputData[index.value]}  // Create an NDArray from the image data
+        val floatNDArray = FloatNDArray(tensorShape) { index: InlineInt -> inputData[index.value] } // Create an NDArray from the image data
         val inputTensor = floatNDArray.transpose(permuteAxis).asTensor(INPUT_TENSOR_NAME)           // Transpose and create a tensor from the NDArray
         inputTensors.putIfAbsent(inputClass, mutableListOf())
         inputTensors[inputClass]!!.add(inputTensor)
@@ -83,7 +83,7 @@ private suspend fun createInputs(): Map<String, List<KITensor>> {
  * @param originalClass The actual class label of the instance being predicted.
  */
 private fun displayTopPredictions(predictions: FloatNDArray, classLabels: List<String>, originalClass: String) {
-    val predictionArray = predictions.array.blocks.first()
+    val predictionArray = predictions.array.toArray()
     val indexedScores = predictionArray.withIndex().sortedByDescending { it.value }.take(5)
 
     println("\nOriginal class: $originalClass")
diff --git a/examples/src/jvmMain/kotlin/io/kinference/examples/classification/ORTClassificationMain.kt b/examples/src/jvmMain/kotlin/io/kinference/examples/classification/ORTClassificationMain.kt
new file mode 100644
index 000000000..df21fe33b
--- /dev/null
+++ b/examples/src/jvmMain/kotlin/io/kinference/examples/classification/ORTClassificationMain.kt
@@ -0,0 +1,122 @@
+package io.kinference.examples.classification
+
+import io.kinference.examples.downloadFile
+import io.kinference.examples.resourcesPath
+import io.kinference.ndarray.arrays.*
+import io.kinference.ndarray.arrays.FloatNDArray.Companion.invoke
+import io.kinference.ort.ORTEngine
+import io.kinference.ort.data.tensor.ORTTensor
+import io.kinference.utils.CommonDataLoader
+import io.kinference.utils.inlines.InlineInt
+import io.kinference.utils.toLongArray
+import okio.Path.Companion.toPath
+import org.jetbrains.kotlinx.dl.api.preprocessing.pipeline
+import org.jetbrains.kotlinx.dl.dataset.OnFlyImageDataset
+import org.jetbrains.kotlinx.dl.dataset.embedded.dogsCatsSmallDatasetPath
+import org.jetbrains.kotlinx.dl.dataset.generator.FromFolders
+import org.jetbrains.kotlinx.dl.impl.inference.imagerecognition.InputType
+import org.jetbrains.kotlinx.dl.impl.preprocessing.*
+import org.jetbrains.kotlinx.dl.impl.preprocessing.image.*
+import java.awt.image.BufferedImage
+import java.io.File
+import kotlin.collections.mutableMapOf
+
+// Constants for input and output tensor names used in the CaffeNet model
+private const val INPUT_TENSOR_NAME = "data_0"
+private const val OUTPUT_TENSOR_NAME = "prob_1"
+
+// Preprocessing pipeline for input images using KotlinDL
+private val preprocessing = pipeline<BufferedImage>()
+    .resize {
+        outputWidth = 224
+        outputHeight = 224
+        interpolation = InterpolationType.BILINEAR
+    }
+    .convert { colorMode = ColorMode.BGR }
+    .toFloatArray { }
+    .call(InputType.CAFFE.preprocessing())
+
+// Path to the small dataset of dogs vs cats images (100 images)
+private val dogsVsCatsDatasetPath = dogsCatsSmallDatasetPath()
+
+/**
+ * Creates a Map of input tensors categorized by their respective classes (e.g., "cat" and "dog").
+ *
+ * This function reads images from the dataset, preprocesses them,
+ * transposes the tensors to the required format, and groups them
+ * based on their class label.
+ *
+ * @return A Map where the keys are the class labels (e.g., "cat" and "dog"),
+ * and the values are lists of KITensor objects representing the input tensors
+ * for each class.
+ */
+private suspend fun createInputs(): Map<String, List<ORTTensor>> {
+    val dataset = OnFlyImageDataset.create(
+        File(dogsVsCatsDatasetPath),
+        FromFolders(mapping = mapOf("cat" to 0, "dog" to 1)),
+        preprocessing
+    ).shuffle()
+
+
+    val tensorShape = intArrayOf(1, 224, 224, 3)        // Original tensor shape is [batch, width, height, channel]
+    val permuteAxis = intArrayOf(0, 3, 1, 2)            // Permutations for shape [batch, channel, width, height]
+    val inputTensors = mutableMapOf<String, MutableList<ORTTensor>>()
+
+    for (i in 0 until dataset.xSize()) {
+        val inputData = dataset.getX(i)
+        val inputClass = if (dataset.getY(i).toInt() == 0) "cat" else "dog"
+        val floatNDArray = FloatNDArray(tensorShape) { index: InlineInt -> inputData[index.value] }.transpose(permuteAxis)  // Create an NDArray from the image data
+        val inputTensor = ORTTensor(floatNDArray.array.toArray(), floatNDArray.shape.toLongArray(), INPUT_TENSOR_NAME)      // Transpose and create a tensor from the NDArray
+        inputTensors.putIfAbsent(inputClass, mutableListOf())
+        inputTensors[inputClass]!!.add(inputTensor)
+    }
+
+    return inputTensors
+}
+
+/**
+ * Displays the top 5 predictions with their corresponding labels and scores.
+ *
+ * @param predictions The predicted scores in a multidimensional array format.
+ * @param classLabels The list of class labels corresponding to the predictions.
+ * @param originalClass The actual class label of the instance being predicted.
+ */
+private fun displayTopPredictions(predictions: ORTTensor, classLabels: List<String>, originalClass: String) {
+    val predictionArray = predictions.toFloatArray()
+    val indexedScores = predictionArray.withIndex().sortedByDescending { it.value }.take(5)
+
+    println("\nOriginal class: $originalClass")
+    println("Top 5 predictions:")
+    for ((index, score) in indexedScores) {
+        val predictedClassLabel = if (index in classLabels.indices) classLabels[index] else "Unknown"
+        println("${predictedClassLabel}: ${"%.2f".format(score * 100)}%")
+    }
+}
+
+suspend fun main() {
+    val modelUrl = "https://github.com/onnx/models/raw/main/validated/vision/classification/caffenet/model/caffenet-12.onnx"
+    val synsetUrl = "https://s3.amazonaws.com/onnx-model-zoo/synset.txt"
+    val modelName = "CaffeNet"
+
+    println("Downloading model from: $modelUrl")
+    downloadFile(modelUrl, "$resourcesPath/$modelName.onnx")
+    println("Downloading synset from: $synsetUrl")
+    downloadFile(synsetUrl, "$resourcesPath/synset.txt")
+
+    val modelBytes = CommonDataLoader.bytes("$resourcesPath/$modelName.onnx".toPath())
+    val classLabels = File("$resourcesPath/synset.txt").readLines()
+
+    println("Loading model...")
+    val model = ORTEngine.loadModel(modelBytes)
+    println("Creating inputs...")
+    val inputTensors = createInputs()
+
+    println("Starting inference...")
+    inputTensors.forEach { dataClass ->
+        dataClass.value.forEach { tensor ->
+            val actualOutputs = model.predict(listOf(tensor))
+            val predictions = actualOutputs[OUTPUT_TENSOR_NAME]!! as ORTTensor
+            displayTopPredictions(predictions, classLabels, dataClass.key)
+        }
+    }
+}
diff --git a/examples/src/jvmMain/kotlin/io/kinference/examples/lm/KIMain.kt b/examples/src/jvmMain/kotlin/io/kinference/examples/lm/KIGPT2Main.kt
similarity index 100%
rename from examples/src/jvmMain/kotlin/io/kinference/examples/lm/KIMain.kt
rename to examples/src/jvmMain/kotlin/io/kinference/examples/lm/KIGPT2Main.kt
diff --git a/examples/src/jvmMain/kotlin/io/kinference/examples/lm/ORTMain.kt b/examples/src/jvmMain/kotlin/io/kinference/examples/lm/ORTGPT2Main.kt
similarity index 100%
rename from examples/src/jvmMain/kotlin/io/kinference/examples/lm/ORTMain.kt
rename to examples/src/jvmMain/kotlin/io/kinference/examples/lm/ORTGPT2Main.kt

From ac047368d90892fb0db69d82f7e462f366a3b1f8 Mon Sep 17 00:00:00 2001
From: dmitriyb <dvbozhko@gmail.com>
Date: Mon, 30 Sep 2024 12:59:16 +0200
Subject: [PATCH 07/11] JBAI-5829 [examples] Disabled Maven publication for
 `:examples` module.

---
 build.gradle.kts | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/build.gradle.kts b/build.gradle.kts
index 7c543737d..a6bde9f5b 100644
--- a/build.gradle.kts
+++ b/build.gradle.kts
@@ -83,3 +83,9 @@ subprojects {
         targetCompatibility = jvmTargetVersion.toString()
     }
 }
+
+project(":examples") {
+    tasks.withType<PublishToMavenRepository>().configureEach {
+        onlyIf { false }
+    }
+}

From 5b2e75512f93487726c16a4069aa86efe2214f90 Mon Sep 17 00:00:00 2001
From: dmitriyb <dvbozhko@gmail.com>
Date: Mon, 30 Sep 2024 13:44:46 +0200
Subject: [PATCH 08/11] JBAI-5829 [examples] Updated examples section in
 README.md

---
 README.md | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/README.md b/README.md
index b209fb9db..b4f196305 100644
--- a/README.md
+++ b/README.md
@@ -181,8 +181,13 @@ kotlin {
 ```
 
 ## Examples
-You can find several KInference usage examples in [this repository](https://github.com/JetBrains-Research/kinference-examples).
-The repository has examples of multi-backend project configuration and sharing KInference-related code between the modules.
+The [examples module](https://github.com/JetBrains-Research/kinference/tree/master/examples) contains examples of solving classification tasks
+(cats vs dogs) and text generation. 
+Different backends are used in the examples.
+Models for the examples were selected from the [ONNX Model Zoo](https://github.com/onnx/models).
+Running the examples does not require converting models to different opsets.
+However, if you need to run a model with operator versions not supported by KInference,
+you can refer to [Convert guide](https://github.com/OpenPPL/ppl.nn/blob/master/docs/en/onnx-model-opset-convert-guide.md).
 
 ## Want to know more?
 KInference API itself is widely documented, so you can explore its code and interfaces to get to know KInference better.

From 740da0f2f632ba78bf12bf01e089163654a7a742 Mon Sep 17 00:00:00 2001
From: dmitriyb <dvbozhko@gmail.com>
Date: Mon, 30 Sep 2024 14:49:54 +0200
Subject: [PATCH 09/11] JBAI-5829 [examples] Refactored resource path usage to
 cache directory.

---
 .../kotlin/io/kinference/examples/Utils.kt    | 52 +++++++++++++++----
 .../classification/KIClassificationMain.kt    | 10 ++--
 .../classification/ORTClassificationMain.kt   | 10 ++--
 .../io/kinference/examples/lm/KIGPT2Main.kt   |  6 +--
 .../io/kinference/examples/lm/ORTGPT2Main.kt  |  6 +--
 5 files changed, 58 insertions(+), 26 deletions(-)

diff --git a/examples/src/jvmMain/kotlin/io/kinference/examples/Utils.kt b/examples/src/jvmMain/kotlin/io/kinference/examples/Utils.kt
index 3b3acdc30..30fb56439 100644
--- a/examples/src/jvmMain/kotlin/io/kinference/examples/Utils.kt
+++ b/examples/src/jvmMain/kotlin/io/kinference/examples/Utils.kt
@@ -12,38 +12,70 @@ import io.ktor.util.cio.writeChannel
 import io.ktor.utils.io.copyAndClose
 import java.io.File
 
-val resourcesPath = System.getProperty("user.dir") + "/cache/"
+/**
+ * Directory used to store cached files.
+ *
+ * This variable combines the user's current working directory
+ * with a "cache" subdirectory to create the path for storing cache files.
+ * It is used in various functions to check for existing files or directories,
+ * create new ones if they do not exist, and manage the caching of downloaded files.
+ */
+val cacheDirectory = System.getProperty("user.dir") + "/cache/"
 
 /**
- * Downloads a file from the specified URL and saves it to the given output path.
- * If the file already exists at the output path, the download is skipped.
+ * Downloads a file from the given URL and saves it with the specified file name.
  *
- * @param url The URL from which the file will be downloaded.
- * @param outputPath The path to which the downloaded file will be saved.
+ * Checks if the directory specified by `cacheDirectory` exists.
+ * If not, it creates the directory. If the file already exists,
+ * the download is skipped. Otherwise, the file is downloaded
+ * using an HTTP client with a 10-minute timeout setting.
+ *
+ * @param url The URL from which to download the file.
+ * @param fileName The name to use for the downloaded file.
+ * @param timeout Optional timeout duration for the download request, in milliseconds.
+ * Defaults to 600,000 milliseconds (10 minutes).
+ * Increase the timeout if you are not sure that download for the particular model with fit into the default timeout.
  */
-suspend fun downloadFile(url: String, outputPath: String) {
+suspend fun downloadFile(url: String, fileName: String, timeout: Long = 600_000) {
+    // Ensure the predefined path is treated as a directory
+    val directory = File(cacheDirectory)
+
+    // Check if the directory exists, if not create it
+    if (!directory.exists()) {
+        println("Predefined directory doesn't exist. Creating directory at $cacheDirectory.")
+        directory.mkdirs() // Create the directory if it doesn't exist
+    }
+
     // Check if the file already exists
-    val file = File(outputPath)
+    val file = File(directory, fileName)
     if (file.exists()) {
-        println("File already exists at $outputPath. Skipping download.")
+        println("File already exists at ${file.absolutePath}. Skipping download.")
         return // Exit the function if the file exists
     }
 
     // Create an instance of HttpClient with custom timeout settings
     val client = HttpClient {
         install(HttpTimeout) {
-            requestTimeoutMillis = 600_000 // Set timeout to 10 minutes (600,000 milliseconds)
+            requestTimeoutMillis = timeout
         }
     }
 
     // Download the file and write to the specified output path
     client.prepareRequest(url).execute { response ->
-        response.bodyAsChannel().copyAndClose(File(outputPath).writeChannel())
+        response.bodyAsChannel().copyAndClose(file.writeChannel())
     }
 
     client.close()
 }
 
+/**
+ * Extracts the token ID with the highest probability from the output tensor.
+ *
+ * @param output A map containing the output tensors identified by their names.
+ * @param tokensSize The number of tokens in the sequence.
+ * @param outputName The name of the tensor containing the logits.
+ * @return The ID of the top token.
+ */
 suspend fun extractTopToken(output: Map<String, KIONNXData<*>>, tokensSize: Int, outputName: String): Long {
     val logits = output[outputName]!! as KITensor
     val sliced = logits.data.slice(
diff --git a/examples/src/jvmMain/kotlin/io/kinference/examples/classification/KIClassificationMain.kt b/examples/src/jvmMain/kotlin/io/kinference/examples/classification/KIClassificationMain.kt
index af40e1bd9..b72032979 100644
--- a/examples/src/jvmMain/kotlin/io/kinference/examples/classification/KIClassificationMain.kt
+++ b/examples/src/jvmMain/kotlin/io/kinference/examples/classification/KIClassificationMain.kt
@@ -4,7 +4,7 @@ import io.kinference.core.KIEngine
 import io.kinference.core.data.tensor.KITensor
 import io.kinference.core.data.tensor.asTensor
 import io.kinference.examples.downloadFile
-import io.kinference.examples.resourcesPath
+import io.kinference.examples.cacheDirectory
 import io.kinference.ndarray.arrays.*
 import io.kinference.ndarray.arrays.FloatNDArray.Companion.invoke
 import io.kinference.utils.CommonDataLoader
@@ -100,12 +100,12 @@ suspend fun main() {
     val modelName = "CaffeNet"
 
     println("Downloading model from: $modelUrl")
-    downloadFile(modelUrl, "$resourcesPath/$modelName.onnx")
+    downloadFile(modelUrl, "$modelName.onnx")
     println("Downloading synset from: $synsetUrl")
-    downloadFile(synsetUrl, "$resourcesPath/synset.txt")
+    downloadFile(synsetUrl, "synset.txt")
 
-    val modelBytes = CommonDataLoader.bytes("$resourcesPath/$modelName.onnx".toPath())
-    val classLabels = File("$resourcesPath/synset.txt").readLines()
+    val modelBytes = CommonDataLoader.bytes("$cacheDirectory/$modelName.onnx".toPath())
+    val classLabels = File("$cacheDirectory/synset.txt").readLines()
 
     println("Loading model...")
     val model = KIEngine.loadModel(modelBytes, optimize = true, predictionConfig = PredictionConfigs.DefaultAutoAllocator)
diff --git a/examples/src/jvmMain/kotlin/io/kinference/examples/classification/ORTClassificationMain.kt b/examples/src/jvmMain/kotlin/io/kinference/examples/classification/ORTClassificationMain.kt
index df21fe33b..2ccf757f4 100644
--- a/examples/src/jvmMain/kotlin/io/kinference/examples/classification/ORTClassificationMain.kt
+++ b/examples/src/jvmMain/kotlin/io/kinference/examples/classification/ORTClassificationMain.kt
@@ -1,7 +1,7 @@
 package io.kinference.examples.classification
 
 import io.kinference.examples.downloadFile
-import io.kinference.examples.resourcesPath
+import io.kinference.examples.cacheDirectory
 import io.kinference.ndarray.arrays.*
 import io.kinference.ndarray.arrays.FloatNDArray.Companion.invoke
 import io.kinference.ort.ORTEngine
@@ -99,12 +99,12 @@ suspend fun main() {
     val modelName = "CaffeNet"
 
     println("Downloading model from: $modelUrl")
-    downloadFile(modelUrl, "$resourcesPath/$modelName.onnx")
+    downloadFile(modelUrl, "$modelName.onnx")
     println("Downloading synset from: $synsetUrl")
-    downloadFile(synsetUrl, "$resourcesPath/synset.txt")
+    downloadFile(synsetUrl, "synset.txt")
 
-    val modelBytes = CommonDataLoader.bytes("$resourcesPath/$modelName.onnx".toPath())
-    val classLabels = File("$resourcesPath/synset.txt").readLines()
+    val modelBytes = CommonDataLoader.bytes("$cacheDirectory/$modelName.onnx".toPath())
+    val classLabels = File("$cacheDirectory/synset.txt").readLines()
 
     println("Loading model...")
     val model = ORTEngine.loadModel(modelBytes)
diff --git a/examples/src/jvmMain/kotlin/io/kinference/examples/lm/KIGPT2Main.kt b/examples/src/jvmMain/kotlin/io/kinference/examples/lm/KIGPT2Main.kt
index 6a1bc484c..e142fc576 100644
--- a/examples/src/jvmMain/kotlin/io/kinference/examples/lm/KIGPT2Main.kt
+++ b/examples/src/jvmMain/kotlin/io/kinference/examples/lm/KIGPT2Main.kt
@@ -5,7 +5,7 @@ import io.kinference.core.KIEngine
 import io.kinference.core.data.tensor.asTensor
 import io.kinference.examples.downloadFile
 import io.kinference.examples.extractTopToken
-import io.kinference.examples.resourcesPath
+import io.kinference.examples.cacheDirectory
 import io.kinference.ndarray.arrays.LongNDArray
 import io.kinference.ndarray.arrays.NDArrayCore
 import io.kinference.utils.CommonDataLoader
@@ -22,9 +22,9 @@ suspend fun main() {
     val modelName = "gpt2-lm-head-10"
 
     println("Downloading model from: $modelUrl")
-    downloadFile(modelUrl, "$resourcesPath/$modelName.onnx")
+    downloadFile(modelUrl, "$modelName.onnx")  //GPT-2 from model zoo is around 650 Mb, adjust your timeout if needed
 
-    val modelBytes = CommonDataLoader.bytes("${resourcesPath}/$modelName.onnx".toPath())
+    val modelBytes = CommonDataLoader.bytes("${cacheDirectory}/$modelName.onnx".toPath())
 
     println("Loading model...")
     val model = KIEngine.loadModel(modelBytes, optimize = true, predictionConfig = PredictionConfigs.DefaultAutoAllocator)
diff --git a/examples/src/jvmMain/kotlin/io/kinference/examples/lm/ORTGPT2Main.kt b/examples/src/jvmMain/kotlin/io/kinference/examples/lm/ORTGPT2Main.kt
index f1956fb91..c1b9c7d3b 100644
--- a/examples/src/jvmMain/kotlin/io/kinference/examples/lm/ORTGPT2Main.kt
+++ b/examples/src/jvmMain/kotlin/io/kinference/examples/lm/ORTGPT2Main.kt
@@ -5,7 +5,7 @@ import io.kinference.core.data.tensor.KITensor
 import io.kinference.core.data.tensor.asTensor
 import io.kinference.examples.downloadFile
 import io.kinference.examples.extractTopToken
-import io.kinference.examples.resourcesPath
+import io.kinference.examples.cacheDirectory
 import io.kinference.ndarray.arrays.FloatNDArray
 import io.kinference.ndarray.arrays.FloatNDArray.Companion.invoke
 import io.kinference.ort.ORTData
@@ -25,9 +25,9 @@ suspend fun main() {
     val modelName = "gpt2-lm-head-10"
 
     println("Downloading model from: $modelUrl")
-    downloadFile(modelUrl, "$resourcesPath/$modelName.onnx")
+    downloadFile(modelUrl, "$modelName.onnx") //GPT-2 from model zoo is around 650 Mb, adjust your timeout if needed
 
-    val modelBytes = CommonDataLoader.bytes("${resourcesPath}/$modelName.onnx".toPath())
+    val modelBytes = CommonDataLoader.bytes("${cacheDirectory}/$modelName.onnx".toPath())
 
     println("Loading model...")
     val model = ORTEngine.loadModel(modelBytes)

From 6d67891cedb0d5b5832ee22ebc18beb1df508fa9 Mon Sep 17 00:00:00 2001
From: dmitriyb <dvbozhko@gmail.com>
Date: Tue, 1 Oct 2024 11:24:20 +0200
Subject: [PATCH 10/11] JBAI-5829 [examples] Replaced loading models using
 bytes with direct file paths in ORT and KI example modules. Updated cache
 directory usage and adjusted build script for conditional plugin application.

---
 build.gradle.kts                              | 29 +++++++++----------
 .../kotlin/io/kinference/examples/Utils.kt    |  2 +-
 .../classification/KIClassificationMain.kt    |  3 +-
 .../classification/ORTClassificationMain.kt   |  3 +-
 .../io/kinference/examples/lm/KIGPT2Main.kt   |  4 +--
 .../io/kinference/examples/lm/ORTGPT2Main.kt  | 12 ++++----
 6 files changed, 22 insertions(+), 31 deletions(-)

diff --git a/build.gradle.kts b/build.gradle.kts
index a6bde9f5b..3caf12538 100644
--- a/build.gradle.kts
+++ b/build.gradle.kts
@@ -6,6 +6,7 @@ import org.jetbrains.kotlin.gradle.targets.js.yarn.YarnLockMismatchReport
 import org.jetbrains.kotlin.gradle.targets.js.yarn.YarnPlugin
 import org.jetbrains.kotlin.gradle.targets.js.yarn.YarnRootExtension
 import org.jetbrains.kotlin.gradle.tasks.KotlinCompilationTask
+import org.jetbrains.kotlin.utils.addToStdlib.applyIf
 
 group = "io.kinference"
 version = "0.2.22"
@@ -35,21 +36,23 @@ subprojects {
 
     apply {
         plugin("org.jetbrains.kotlin.multiplatform")
-
-        plugin("maven-publish")
         plugin("idea")
     }
 
 
-    publishing {
-        repositories {
-            maven {
-                name = "SpacePackages"
-                url = uri("https://packages.jetbrains.team/maven/p/ki/maven")
+    applyIf(path != ":examples") {
+        apply(plugin = "maven-publish")
+
+        publishing {
+            repositories {
+                maven {
+                    name = "SpacePackages"
+                    url = uri("https://packages.jetbrains.team/maven/p/ki/maven")
 
-                credentials {
-                    username = System.getenv("JB_SPACE_CLIENT_ID")
-                    password = System.getenv("JB_SPACE_CLIENT_SECRET")
+                    credentials {
+                        username = System.getenv("JB_SPACE_CLIENT_ID")
+                        password = System.getenv("JB_SPACE_CLIENT_SECRET")
+                    }
                 }
             }
         }
@@ -83,9 +86,3 @@ subprojects {
         targetCompatibility = jvmTargetVersion.toString()
     }
 }
-
-project(":examples") {
-    tasks.withType<PublishToMavenRepository>().configureEach {
-        onlyIf { false }
-    }
-}
diff --git a/examples/src/jvmMain/kotlin/io/kinference/examples/Utils.kt b/examples/src/jvmMain/kotlin/io/kinference/examples/Utils.kt
index 30fb56439..eb9cbcd99 100644
--- a/examples/src/jvmMain/kotlin/io/kinference/examples/Utils.kt
+++ b/examples/src/jvmMain/kotlin/io/kinference/examples/Utils.kt
@@ -20,7 +20,7 @@ import java.io.File
  * It is used in various functions to check for existing files or directories,
  * create new ones if they do not exist, and manage the caching of downloaded files.
  */
-val cacheDirectory = System.getProperty("user.dir") + "/cache/"
+val cacheDirectory = System.getProperty("user.dir") + "/.cache/"
 
 /**
  * Downloads a file from the given URL and saves it with the specified file name.
diff --git a/examples/src/jvmMain/kotlin/io/kinference/examples/classification/KIClassificationMain.kt b/examples/src/jvmMain/kotlin/io/kinference/examples/classification/KIClassificationMain.kt
index b72032979..00ace87bc 100644
--- a/examples/src/jvmMain/kotlin/io/kinference/examples/classification/KIClassificationMain.kt
+++ b/examples/src/jvmMain/kotlin/io/kinference/examples/classification/KIClassificationMain.kt
@@ -104,11 +104,10 @@ suspend fun main() {
     println("Downloading synset from: $synsetUrl")
     downloadFile(synsetUrl, "synset.txt")
 
-    val modelBytes = CommonDataLoader.bytes("$cacheDirectory/$modelName.onnx".toPath())
     val classLabels = File("$cacheDirectory/synset.txt").readLines()
 
     println("Loading model...")
-    val model = KIEngine.loadModel(modelBytes, optimize = true, predictionConfig = PredictionConfigs.DefaultAutoAllocator)
+    val model = KIEngine.loadModel("$cacheDirectory/$modelName.onnx".toPath(), optimize = true, predictionConfig = PredictionConfigs.DefaultAutoAllocator)
     println("Creating inputs...")
     val inputTensors = createInputs()
 
diff --git a/examples/src/jvmMain/kotlin/io/kinference/examples/classification/ORTClassificationMain.kt b/examples/src/jvmMain/kotlin/io/kinference/examples/classification/ORTClassificationMain.kt
index 2ccf757f4..c8f3b0d5c 100644
--- a/examples/src/jvmMain/kotlin/io/kinference/examples/classification/ORTClassificationMain.kt
+++ b/examples/src/jvmMain/kotlin/io/kinference/examples/classification/ORTClassificationMain.kt
@@ -103,11 +103,10 @@ suspend fun main() {
     println("Downloading synset from: $synsetUrl")
     downloadFile(synsetUrl, "synset.txt")
 
-    val modelBytes = CommonDataLoader.bytes("$cacheDirectory/$modelName.onnx".toPath())
     val classLabels = File("$cacheDirectory/synset.txt").readLines()
 
     println("Loading model...")
-    val model = ORTEngine.loadModel(modelBytes)
+    val model = ORTEngine.loadModel("$cacheDirectory/$modelName.onnx".toPath())
     println("Creating inputs...")
     val inputTensors = createInputs()
 
diff --git a/examples/src/jvmMain/kotlin/io/kinference/examples/lm/KIGPT2Main.kt b/examples/src/jvmMain/kotlin/io/kinference/examples/lm/KIGPT2Main.kt
index e142fc576..81e106ee2 100644
--- a/examples/src/jvmMain/kotlin/io/kinference/examples/lm/KIGPT2Main.kt
+++ b/examples/src/jvmMain/kotlin/io/kinference/examples/lm/KIGPT2Main.kt
@@ -24,10 +24,8 @@ suspend fun main() {
     println("Downloading model from: $modelUrl")
     downloadFile(modelUrl, "$modelName.onnx")  //GPT-2 from model zoo is around 650 Mb, adjust your timeout if needed
 
-    val modelBytes = CommonDataLoader.bytes("${cacheDirectory}/$modelName.onnx".toPath())
-
     println("Loading model...")
-    val model = KIEngine.loadModel(modelBytes, optimize = true, predictionConfig = PredictionConfigs.DefaultAutoAllocator)
+    val model = KIEngine.loadModel("$cacheDirectory/$modelName.onnx".toPath(), optimize = true, predictionConfig = PredictionConfigs.DefaultAutoAllocator)
 
     val tokenizer = HuggingFaceTokenizer.newInstance("gpt2", mapOf("modelMaxLength" to "1024"))
     val testString = "Neurogenesis is most active during embryonic development and is responsible for producing " +
diff --git a/examples/src/jvmMain/kotlin/io/kinference/examples/lm/ORTGPT2Main.kt b/examples/src/jvmMain/kotlin/io/kinference/examples/lm/ORTGPT2Main.kt
index c1b9c7d3b..dd0634131 100644
--- a/examples/src/jvmMain/kotlin/io/kinference/examples/lm/ORTGPT2Main.kt
+++ b/examples/src/jvmMain/kotlin/io/kinference/examples/lm/ORTGPT2Main.kt
@@ -27,10 +27,8 @@ suspend fun main() {
     println("Downloading model from: $modelUrl")
     downloadFile(modelUrl, "$modelName.onnx") //GPT-2 from model zoo is around 650 Mb, adjust your timeout if needed
 
-    val modelBytes = CommonDataLoader.bytes("${cacheDirectory}/$modelName.onnx".toPath())
-
     println("Loading model...")
-    val model = ORTEngine.loadModel(modelBytes)
+    val model = ORTEngine.loadModel("$cacheDirectory/$modelName.onnx".toPath())
 
     val tokenizer = HuggingFaceTokenizer.newInstance("gpt2", mapOf("modelMaxLength" to "1024"))
     val testString = "Neurogenesis is most active during embryonic development and is responsible for producing " +
@@ -63,12 +61,12 @@ suspend fun main() {
 }
 
 private suspend fun convertToKITensorMap(outputs: Map<String, ORTData<*>>): Map<String, KITensor> {
-    return outputs.map { (key, value) ->
-        val ortTensor = value as ORTTensor
+    return outputs.map { (name, ortTensor) ->
+        val ortTensor = ortTensor as ORTTensor
         val data = ortTensor.toFloatArray()
         val shape = ortTensor.shape.toIntArray()
         val ndArray = FloatNDArray(shape) { idx: InlineInt -> data[idx.value] }
-        val tensor = ndArray.asTensor(key)
-        return@map key to tensor
+        val kiTensor = ndArray.asTensor(name)
+        return@map name to kiTensor
     }.toMap()
 }

From e6bda3aa544e7ceee21f45e916bc5a0eef45a325 Mon Sep 17 00:00:00 2001
From: dmitriyb <dvbozhko@gmail.com>
Date: Mon, 7 Oct 2024 16:31:58 +0200
Subject: [PATCH 11/11] [RELEASE] Update version to 0.2.24

---
 README.md        | 24 ++++++++++++------------
 build.gradle.kts |  2 +-
 2 files changed, 13 insertions(+), 13 deletions(-)

diff --git a/README.md b/README.md
index fc009857a..ad4f89082 100644
--- a/README.md
+++ b/README.md
@@ -54,7 +54,7 @@ it is highly recommended to use KInference TensorFlow.js backend instead for mor
 KInference Core dependency coordinates:
 ```kotlin
 dependencies {
-    api("io.kinference", "inference-core", "0.2.23")
+    api("io.kinference", "inference-core", "0.2.24")
 }
 ```
 
@@ -67,7 +67,7 @@ This backend is recommended for JavaScript projects.
 TensorFlow.js backend dependency coordinates:
 ```kotlin
 dependencies {
-    api("io.kinference", "inference-tfjs", "0.2.23")
+    api("io.kinference", "inference-tfjs", "0.2.24")
 }
 ```
 
@@ -81,14 +81,14 @@ To check on the system requirements, visit the following [link](https://onnxrunt
 ONNXRuntime CPU backend dependency coordinates:
 ```kotlin
 dependencies {
-    api("io.kinference", "inference-ort", "0.2.23")
+    api("io.kinference", "inference-ort", "0.2.24")
 }
 ```
 
 ONNXRuntime GPU backend dependency coordinates:
 ```kotlin
 dependencies {
-    api("io.kinference", "inference-ort-gpu", "0.2.23")
+    api("io.kinference", "inference-ort-gpu", "0.2.24")
 }
 ```
 
@@ -104,7 +104,7 @@ Array adapter for the [kmath](https://github.com/SciProgCentre/kmath) library th
 Dependency coordinates:
 ```kotlin
 dependencies {
-    api("io.kinference", "adapter-kmath-{backend_name}", "0.2.23")
+    api("io.kinference", "adapter-kmath-{backend_name}", "0.2.24")
 }
 ```
 
@@ -114,12 +114,12 @@ Array adapter for the [multik](https://github.com/Kotlin/multik) library that wo
 Dependency coordinates:
 ```kotlin
 dependencies {
-    api("io.kinference", "adapter-multik-{backend_name}", "0.2.23")
+    api("io.kinference", "adapter-multik-{backend_name}", "0.2.24")
 }
 ```
 
 ## Getting started
-Let us now walk through how to get started with KInference. The latest version of KInference is *0.2.23*
+Let us now walk through how to get started with KInference. The latest version of KInference is *0.2.24*
 
 ### Setup dependencies repository
 
@@ -142,7 +142,7 @@ To enable the backend, you can add the chosen KInference runtime as a dependency
 
 ```kotlin
 dependencies {
-    api("io.kinference", "inference-core", "0.2.23")
+    api("io.kinference", "inference-core", "0.2.24")
 }
 ```
 
@@ -160,20 +160,20 @@ kotlin {
     sourceSets {
         val commonMain by getting {
             dependencies {
-                api("io.kinference:inference-api:0.2.23")
-                api("io.kinference:ndarray-api:0.2.23")
+                api("io.kinference:inference-api:0.2.24")
+                api("io.kinference:ndarray-api:0.2.24")
             }
         }
 
         val jvmMain by getting {
             dependencies {
-                api("io.kinference:inference-core:0.2.23")
+                api("io.kinference:inference-core:0.2.24")
             }
         }
 
         val jsMain by getting {
             dependencies {
-                api("io.kinference:inference-tfjs:0.2.23")
+                api("io.kinference:inference-tfjs:0.2.24")
             }
         }
     }
diff --git a/build.gradle.kts b/build.gradle.kts
index 1711b8a32..cc3570c34 100644
--- a/build.gradle.kts
+++ b/build.gradle.kts
@@ -9,7 +9,7 @@ import org.jetbrains.kotlin.gradle.tasks.KotlinCompilationTask
 import org.jetbrains.kotlin.utils.addToStdlib.applyIf
 
 group = "io.kinference"
-version = "0.2.23"
+version = "0.2.24"
 
 plugins {
     alias(libs.plugins.kotlin.multiplatform) apply false