From 55554c3af68f2eabeba664ad85b4dc91a1472428 Mon Sep 17 00:00:00 2001
From: Stephen von Takach <steve@place.technology>
Date: Sun, 3 Sep 2023 22:38:25 +1000
Subject: [PATCH 1/8] feat: add support for GPU delegates

---
 bindings/bindings_generator.cr            |   2 +
 build_tensorflowlite.sh                   |   6 +-
 spec/tensorflow_lite_spec.cr              |  41 +++++++
 src/tensorflow_lite.cr                    |   1 +
 src/tensorflow_lite/bindings_generator.cr |  17 +++
 src/tensorflow_lite/delegate_gpu.cr       |  21 ++++
 src/tensorflow_lite/lib_tensorflowlite.cr | 124 +++++++++++++++++++---
 tensorflow.patch                          |  36 +++++++
 8 files changed, 230 insertions(+), 18 deletions(-)
 mode change 100755 => 100644 build_tensorflowlite.sh
 create mode 100644 src/tensorflow_lite/bindings_generator.cr
 create mode 100644 src/tensorflow_lite/delegate_gpu.cr
 create mode 100644 tensorflow.patch

diff --git a/bindings/bindings_generator.cr b/bindings/bindings_generator.cr
index 8e493bf..b7d675e 100644
--- a/bindings/bindings_generator.cr
+++ b/bindings/bindings_generator.cr
@@ -3,6 +3,8 @@
   "tensorflow/lite/core/c/c_api_types.h",
   "tensorflow/lite/core/c/c_api.h",
   "tensorflow/lite/core/c/c_api_experimental.h",
+  "tensorflow/lite/delegates/gpu/delegate_options.h",
+  "tensorflow/lite/delegates/gpu/delegate.h",
   flags: "
     -I/{tensorflow_dir}/tensorflow/
     -I/{tensorflow_dir}/tensorflow/bazel-genfiles
diff --git a/build_tensorflowlite.sh b/build_tensorflowlite.sh
old mode 100755
new mode 100644
index 8f473fa..ad275c7
--- a/build_tensorflowlite.sh
+++ b/build_tensorflowlite.sh
@@ -15,6 +15,9 @@ echo "--"
 
 # clone the required repositories
 git clone --depth 1 https://github.com/tensorflow/tensorflow
+cd tensorflow
+git apply ../tensorflow.patch
+cd ..
 
 echo "--"
 echo "configuring..."
@@ -22,7 +25,8 @@ echo "--"
 
 mkdir tflite_build
 cd tflite_build
-cmake ../tensorflow/tensorflow/lite/c -DTFLITE_ENABLE_GPU=ON
+cmake ../tensorflow/tensorflow/lite/c \
+  -DTFLITE_ENABLE_GPU=ON
 
 echo "--"
 echo "building..."
diff --git a/spec/tensorflow_lite_spec.cr b/spec/tensorflow_lite_spec.cr
index 46c1b1c..595f28a 100644
--- a/spec/tensorflow_lite_spec.cr
+++ b/spec/tensorflow_lite_spec.cr
@@ -111,5 +111,46 @@ module TensorflowLite
       client.outputs.size.should eq 4
       client.labels.as(Array(String)).size.should eq 90
     end
+
+    it "can add a GPU delegate to the interpreter options" do
+      # we have to skip this test if there is no hardware installed
+      # however at least we know it compiles
+      file_io = File.new(model_path)
+      file_data = Bytes.new(file_io.size)
+      file_io.read_fully(file_data)
+      file_io.close
+
+      {Model.new(model_path), Model.new(file_data)}.each do |model|
+        opts = InterpreterOptions.new
+        opts.add_delegate DelegateGPU.new
+        opts.on_error do |error_msg|
+          puts "error was #{error_msg}"
+        end
+        interpreter = Interpreter.new(model, opts)
+
+        xor_test.each do |test|
+          inputs = test[:input]
+          expected = test[:result]
+
+          # configure inputs
+          input_tensor = interpreter.input_tensor(0)
+          input_tensor.raw_data.bytesize.should eq input_tensor.bytesize
+          input_tensor.size.should eq 2
+
+          floats = input_tensor.as_f32
+          floats[0], floats[1] = inputs
+
+          # run through NN
+          interpreter.invoke!
+
+          # check results
+          output_tensor = interpreter.output_tensor(0)
+          floats = output_tensor.as_f32
+          result = (floats[0] + 0.5_f32).to_i
+
+          result.should eq expected
+        end
+      end
+    end
   end
 end
diff --git a/src/tensorflow_lite.cr b/src/tensorflow_lite.cr
index 3f2e2fb..552da38 100644
--- a/src/tensorflow_lite.cr
+++ b/src/tensorflow_lite.cr
@@ -29,4 +29,5 @@ require "./tensorflow_lite/interpreter_options"
 require "./tensorflow_lite/tensor"
 require "./tensorflow_lite/interpreter"
 require "./tensorflow_lite/client"
+require "./tensorflow_lite/delegate_gpu"
 require "./tensorflow_lite/utilities/*"
diff --git a/src/tensorflow_lite/bindings_generator.cr b/src/tensorflow_lite/bindings_generator.cr
new file mode 100644
index 0000000..b7d675e
--- /dev/null
+++ b/src/tensorflow_lite/bindings_generator.cr
@@ -0,0 +1,17 @@
+@[Include(
+  "tensorflow/lite/builtin_ops.h",
+  "tensorflow/lite/core/c/c_api_types.h",
+  "tensorflow/lite/core/c/c_api.h",
+  "tensorflow/lite/core/c/c_api_experimental.h",
+  "tensorflow/lite/delegates/gpu/delegate_options.h",
+  "tensorflow/lite/delegates/gpu/delegate.h",
+  flags: "
+    -I/{tensorflow_dir}/tensorflow/
+    -I/{tensorflow_dir}/tensorflow/bazel-genfiles
+    -I/Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/include/c++/v1
+  ",
+  prefix: %w(TFL_ TfLite kTfLite)
+)]
+@[Link("tensorflowlite_c", ldflags: "-L#{__DIR__}/../../ext/ -Wl,-rpath='$ORIGIN'")]
+lib LibTensorflowLite
+end
diff --git a/src/tensorflow_lite/delegate_gpu.cr b/src/tensorflow_lite/delegate_gpu.cr
new file mode 100644
index 0000000..1f1d55d
--- /dev/null
+++ b/src/tensorflow_lite/delegate_gpu.cr
@@ -0,0 +1,21 @@
+require "./delegate"
+
+class TensorflowLite::DelegateGPU < TensorflowLite::Delegate
+  def initialize
+    @options = opts = LibTensorflowLite.gpu_delegate_options_v2_default
+    options_ptr = pointerof(opts)
+    @delegate = LibTensorflowLite.gpu_delegate_v2_create(options_ptr)
+    @to_unsafe = pointerof(@delegate)
+  end
+
+  @options : LibTensorflowLite::GpuDelegateOptionsV2
+  @delegate : LibTensorflowLite::Delegate
+
+  # :nodoc:
+  def finalize
+    LibTensorflowLite.gpu_delegate_v2_delete(@delegate)
+  end
+
+  # :nodoc:
+  getter to_unsafe : Pointer(LibTensorflowLite::Delegate)
+end
diff --git a/src/tensorflow_lite/lib_tensorflowlite.cr b/src/tensorflow_lite/lib_tensorflowlite.cr
index 1eb6da1..d0aceec 100644
--- a/src/tensorflow_lite/lib_tensorflowlite.cr
+++ b/src/tensorflow_lite/lib_tensorflowlite.cr
@@ -163,6 +163,47 @@ lib LibTensorflowLite
   BuiltinBitcast                      = 159_i64
   BuiltinBitwiseXor                   = 160_i64
   BuiltinRightShift                   = 161_i64
+  BuiltinStablehloLogistic            = 162_i64
+  BuiltinStablehloAdd                 = 163_i64
+  BuiltinStablehloDivide              = 164_i64
+  BuiltinStablehloMultiply            = 165_i64
+  BuiltinStablehloMaximum             = 166_i64
+  BuiltinStablehloReshape             = 167_i64
+  BuiltinStablehloClamp               = 168_i64
+  BuiltinStablehloConcatenate         = 169_i64
+  BuiltinStablehloBroadcastInDim      = 170_i64
+  BuiltinStablehloConvolution         = 171_i64
+  BuiltinStablehloSlice               = 172_i64
+  BuiltinStablehloCustomCall          = 173_i64
+  BuiltinStablehloReduce              = 174_i64
+  BuiltinStablehloAbs                 = 175_i64
+  BuiltinStablehloAnd                 = 176_i64
+  BuiltinStablehloCosine              = 177_i64
+  BuiltinStablehloExponential         = 178_i64
+  BuiltinStablehloFloor               = 179_i64
+  BuiltinStablehloLog                 = 180_i64
+  BuiltinStablehloMinimum             = 181_i64
+  BuiltinStablehloNegate              = 182_i64
+  BuiltinStablehloOr                  = 183_i64
+  BuiltinStablehloPower               = 184_i64
+  BuiltinStablehloRemainder           = 185_i64
+  BuiltinStablehloRsqrt               = 186_i64
+  BuiltinStablehloSelect              = 187_i64
+  BuiltinStablehloSubtract            = 188_i64
+  BuiltinStablehloTanh                = 189_i64
+  BuiltinStablehloScatter             = 190_i64
+  BuiltinStablehloCompare             = 191_i64
+  BuiltinStablehloConvert             = 192_i64
+  BuiltinStablehloDynamicSlice        = 193_i64
+  BuiltinStablehloDynamicUpdateSlice  = 194_i64
+  BuiltinStablehloPad                 = 195_i64
+  BuiltinStablehloIota                = 196_i64
+  BuiltinStablehloDotGeneral          = 197_i64
+  BuiltinStablehloReduceWindow        = 198_i64
+  BuiltinStablehloSort                = 199_i64
+  BuiltinStablehloWhile               = 200_i64
+  BuiltinStablehloGather              = 201_i64
+  BuiltinStablehloTranspose           = 202_i64
   NoType                              =   0_i64
   Float32                             =   1_i64
   Int32                               =   2_i64
@@ -208,8 +249,7 @@ lib LibTensorflowLite
   fun interpreter_options_add_delegate = TfLiteInterpreterOptionsAddDelegate(options : InterpreterOptions, delegate : OpaqueDelegate*)
   alias OpaqueDelegate = Delegate
   fun interpreter_options_set_error_reporter = TfLiteInterpreterOptionsSetErrorReporter(options : InterpreterOptions, reporter : (Void*, LibC::Char*, VaList -> Void), user_data : Void*)
-  fun interpreter_options_add_registration_external = TfLiteInterpreterOptionsAddRegistrationExternal(options : InterpreterOptions, registration : RegistrationExternal)
-  type RegistrationExternal = Void*
+  fun interpreter_options_add_registration_external = TfLiteInterpreterOptionsAddRegistrationExternal(options : InterpreterOptions, registration : LibC::Int*)
   fun interpreter_options_enable_cancellation = TfLiteInterpreterOptionsEnableCancellation(options : InterpreterOptions, enable : LibC::Int) : Status
   enum Status
     Ok                     = 0
@@ -267,7 +307,8 @@ lib LibTensorflowLite
   fun tensor_quantization_params = TfLiteTensorQuantizationParams(tensor : Tensor) : QuantizationParams
   fun tensor_copy_from_buffer = TfLiteTensorCopyFromBuffer(tensor : Tensor, input_data : Void*, input_data_size : LibC::SizeT) : Status
   fun tensor_copy_to_buffer = TfLiteTensorCopyToBuffer(output_tensor : Tensor, output_data : Void*, output_data_size : LibC::SizeT) : Status
-  fun registration_external_create = TfLiteRegistrationExternalCreate(builtin_code : BuiltinOperator, custom_name : LibC::Char*, version : LibC::Int) : RegistrationExternal
+  fun interpreter_reset_variable_tensors = TfLiteInterpreterResetVariableTensors(interpreter : Interpreter) : Status
+  fun interpreter_options_add_builtin_op = TfLiteInterpreterOptionsAddBuiltinOp(options : InterpreterOptions, op : BuiltinOperator, registration : LibC::Int*, min_version : Int32T, max_version : Int32T)
   enum BuiltinOperator
     BuiltinAdd                          =   0
     BuiltinAveragePool2d                =   1
@@ -431,23 +472,53 @@ lib LibTensorflowLite
     BuiltinBitcast                      = 159
     BuiltinBitwiseXor                   = 160
     BuiltinRightShift                   = 161
+    BuiltinStablehloLogistic            = 162
+    BuiltinStablehloAdd                 = 163
+    BuiltinStablehloDivide              = 164
+    BuiltinStablehloMultiply            = 165
+    BuiltinStablehloMaximum             = 166
+    BuiltinStablehloReshape             = 167
+    BuiltinStablehloClamp               = 168
+    BuiltinStablehloConcatenate         = 169
+    BuiltinStablehloBroadcastInDim      = 170
+    BuiltinStablehloConvolution         = 171
+    BuiltinStablehloSlice               = 172
+    BuiltinStablehloCustomCall          = 173
+    BuiltinStablehloReduce              = 174
+    BuiltinStablehloAbs                 = 175
+    BuiltinStablehloAnd                 = 176
+    BuiltinStablehloCosine              = 177
+    BuiltinStablehloExponential         = 178
+    BuiltinStablehloFloor               = 179
+    BuiltinStablehloLog                 = 180
+    BuiltinStablehloMinimum             = 181
+    BuiltinStablehloNegate              = 182
+    BuiltinStablehloOr                  = 183
+    BuiltinStablehloPower               = 184
+    BuiltinStablehloRemainder           = 185
+    BuiltinStablehloRsqrt               = 186
+    BuiltinStablehloSelect              = 187
+    BuiltinStablehloSubtract            = 188
+    BuiltinStablehloTanh                = 189
+    BuiltinStablehloScatter             = 190
+    BuiltinStablehloCompare             = 191
+    BuiltinStablehloConvert             = 192
+    BuiltinStablehloDynamicSlice        = 193
+    BuiltinStablehloDynamicUpdateSlice  = 194
+    BuiltinStablehloPad                 = 195
+    BuiltinStablehloIota                = 196
+    BuiltinStablehloDotGeneral          = 197
+    BuiltinStablehloReduceWindow        = 198
+    BuiltinStablehloSort                = 199
+    BuiltinStablehloWhile               = 200
+    BuiltinStablehloGather              = 201
+    BuiltinStablehloTranspose           = 202
   end
-  fun registration_external_get_built_in_code = TfLiteRegistrationExternalGetBuiltInCode(registration : RegistrationExternal) : BuiltinOperator
-  fun registration_external_get_version = TfLiteRegistrationExternalGetVersion(registration : RegistrationExternal) : LibC::Int
-  fun registration_external_get_custom_name = TfLiteRegistrationExternalGetCustomName(registration : RegistrationExternal) : LibC::Char*
-  fun registration_external_delete = TfLiteRegistrationExternalDelete(registration : RegistrationExternal)
-  fun registration_external_set_init = TfLiteRegistrationExternalSetInit(registration : RegistrationExternal, init : (OpaqueContext, LibC::Char*, LibC::SizeT -> Void*))
-  type OpaqueContext = Void*
-  fun registration_external_set_free = TfLiteRegistrationExternalSetFree(registration : RegistrationExternal, free : (OpaqueContext, Void* -> Void))
-  fun registration_external_set_prepare = TfLiteRegistrationExternalSetPrepare(registration : RegistrationExternal, prepare : (OpaqueContext, OpaqueNode -> Status))
-  type OpaqueNode = Void*
-  fun registration_external_set_invoke = TfLiteRegistrationExternalSetInvoke(registration : RegistrationExternal, invoke : (OpaqueContext, OpaqueNode -> Status))
-  fun registration_external_set_async_kernel = TfLiteRegistrationExternalSetAsyncKernel(registration : RegistrationExternal, async_kernel : (OpaqueContext, OpaqueNode -> LibC::Int*))
-  fun interpreter_reset_variable_tensors = TfLiteInterpreterResetVariableTensors(interpreter : Interpreter) : Status
-  fun interpreter_options_add_builtin_op = TfLiteInterpreterOptionsAddBuiltinOp(options : InterpreterOptions, op : BuiltinOperator, registration : LibC::Int*, min_version : Int32T, max_version : Int32T)
   fun interpreter_options_add_custom_op = TfLiteInterpreterOptionsAddCustomOp(options : InterpreterOptions, name : LibC::Char*, registration : LibC::Int*, min_version : Int32T, max_version : Int32T)
-  fun interpreter_options_set_op_resolver_external = TfLiteInterpreterOptionsSetOpResolverExternal(options : InterpreterOptions, find_builtin_op : (Void*, LibC::Int, LibC::Int -> RegistrationExternal), find_custom_op : (Void*, LibC::Char*, LibC::Int -> RegistrationExternal), op_resolver_user_data : Void*)
+  fun interpreter_options_set_op_resolver_external = TfLiteInterpreterOptionsSetOpResolverExternal(options : InterpreterOptions, find_builtin_op : (Void*, LibC::Int, LibC::Int -> LibC::Int*), find_custom_op : (Void*, LibC::Char*, LibC::Int -> LibC::Int*), op_resolver_user_data : Void*)
+  fun interpreter_options_set_op_resolver_external_with_fallback = TfLiteInterpreterOptionsSetOpResolverExternalWithFallback(options : InterpreterOptions, find_builtin_op_external : (Void*, LibC::Int, LibC::Int -> LibC::Int*), find_custom_op_external : (Void*, LibC::Char*, LibC::Int -> LibC::Int*), find_builtin_op : (Void*, BuiltinOperator, LibC::Int -> LibC::Int*), find_custom_op : (Void*, LibC::Char*, LibC::Int -> LibC::Int*), op_resolver_user_data : Void*)
   fun interpreter_options_set_op_resolver = TfLiteInterpreterOptionsSetOpResolver(options : InterpreterOptions, find_builtin_op : (Void*, BuiltinOperator, LibC::Int -> LibC::Int*), find_custom_op : (Void*, LibC::Char*, LibC::Int -> LibC::Int*), op_resolver_user_data : Void*)
+  fun interpreter_options_set_op_resolver_v3 = TfLiteInterpreterOptionsSetOpResolverV3(options : InterpreterOptions, find_builtin_op_v3 : (Void*, BuiltinOperator, LibC::Int -> LibC::Int*), find_custom_op_v3 : (Void*, LibC::Char*, LibC::Int -> LibC::Int*), op_resolver_user_data : Void*)
   fun interpreter_options_set_op_resolver_v2 = TfLiteInterpreterOptionsSetOpResolverV2(options : InterpreterOptions, find_builtin_op_v2 : (Void*, BuiltinOperator, LibC::Int -> LibC::Int*), find_custom_op_v2 : (Void*, LibC::Char*, LibC::Int -> LibC::Int*), op_resolver_user_data : Void*)
   fun interpreter_options_set_op_resolver_v1 = TfLiteInterpreterOptionsSetOpResolverV1(options : InterpreterOptions, find_builtin_op_v1 : (Void*, BuiltinOperator, LibC::Int -> LibC::Int*), find_custom_op_v1 : (Void*, LibC::Char*, LibC::Int -> LibC::Int*), op_resolver_user_data : Void*)
   fun interpreter_create_with_selected_ops = TfLiteInterpreterCreateWithSelectedOps(model : Model, options : InterpreterOptions) : Interpreter
@@ -473,4 +544,23 @@ lib LibTensorflowLite
   fun signature_runner_get_output_tensor = TfLiteSignatureRunnerGetOutputTensor(signature_runner : SignatureRunner, output_name : LibC::Char*) : Tensor
   fun signature_runner_cancel = TfLiteSignatureRunnerCancel(signature_runner : SignatureRunner) : Status
   fun signature_runner_delete = TfLiteSignatureRunnerDelete(signature_runner : SignatureRunner)
+  # fun interpreter_options_set_telemetry_profiler = TfLiteInterpreterOptionsSetTelemetryProfiler(options : InterpreterOptions, profiler : TelemetryProfilerStruct*)
+  fun gpu_delegate_options_v2_default = TfLiteGpuDelegateOptionsV2Default : GpuDelegateOptionsV2
+
+  struct GpuDelegateOptionsV2
+    is_precision_loss_allowed : Int32T
+    inference_preference : Int32T
+    inference_priority1 : Int32T
+    inference_priority2 : Int32T
+    inference_priority3 : Int32T
+    experimental_flags : Int64T
+    max_delegated_partitions : Int32T
+    serialization_dir : LibC::Char*
+    model_token : LibC::Char*
+  end
+
+  alias X__Int64T = LibC::Long
+  alias Int64T = X__Int64T
+  fun gpu_delegate_v2_create = TfLiteGpuDelegateV2Create(options : GpuDelegateOptionsV2*) : Delegate
+  fun gpu_delegate_v2_delete = TfLiteGpuDelegateV2Delete(delegate : Delegate)
 end
diff --git a/tensorflow.patch b/tensorflow.patch
new file mode 100644
index 0000000..3b680e1
--- /dev/null
+++ b/tensorflow.patch
@@ -0,0 +1,36 @@
+diff --git a/tensorflow/lite/CMakeLists.txt b/tensorflow/lite/CMakeLists.txt
+index 0924ca9d..812f8c55 100644
+--- a/tensorflow/lite/CMakeLists.txt
++++ b/tensorflow/lite/CMakeLists.txt
+@@ -564,14 +564,17 @@ endif()
+ set(_ALL_TFLITE_SRCS
+   ${TFLITE_CORE_ACCELERATION_SRCS}
+   ${TFLITE_CORE_API_SRCS}
+-  ${TFLITE_CORE_C_SRCS}
++
++  ${TFLITE_SOURCE_DIR}/c/common_internal.cc
++  ${TFLITE_SOURCE_DIR}/c/c_api_opaque_internal.cc
++  ${TFLITE_SOURCE_DIR}/core/c/c_api_opaque.cc
++
+   ${TFLITE_CORE_EXPERIMENTAL_SRCS}
+   ${TFLITE_CORE_KERNELS_SRCS}
+   ${TFLITE_CORE_SRCS}
+   ${TFLITE_CORE_ASYNC_SRCS}
+   ${TFLITE_CORE_ASYNC_C_SRCS}
+   ${TFLITE_CORE_TOOLS_SRCS}
+-  ${TFLITE_C_SRCS}
+   ${TFLITE_DELEGATES_FLEX_SRCS}
+   ${TFLITE_DELEGATES_GPU_SRCS}
+   ${TFLITE_DELEGATES_NNAPI_SRCS}
+diff --git a/tensorflow/lite/c/CMakeLists.txt b/tensorflow/lite/c/CMakeLists.txt
+index 3fb086ec..f91a0fe4 100644
+--- a/tensorflow/lite/c/CMakeLists.txt
++++ b/tensorflow/lite/c/CMakeLists.txt
+@@ -86,5 +86,7 @@ if (TFLITE_C_BUILD_SHARED_LIBS)
+ endif()
+ 
+ target_link_libraries(tensorflowlite_c
++  -Wl,--whole-archive
+   tensorflow-lite
++  -Wl,--no-whole-archive
+ )

From 259b41d99f259b52897372cfeba1de4fe93005fb Mon Sep 17 00:00:00 2001
From: Stephen von Takach <steve@place.technology>
Date: Wed, 13 Sep 2023 17:05:51 +1000
Subject: [PATCH 2/8] fix(build_tensorflowlite.sh): select version that builds
 with the patch

---
 build_tensorflowlite.sh | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/build_tensorflowlite.sh b/build_tensorflowlite.sh
index ad275c7..2cb988d 100644
--- a/build_tensorflowlite.sh
+++ b/build_tensorflowlite.sh
@@ -16,6 +16,9 @@ echo "--"
 # clone the required repositories
 git clone --depth 1 https://github.com/tensorflow/tensorflow
 cd tensorflow
+git fetch origin refs/tags/v2.13.0:refs/tags/v2.13.0
+git checkout v2.13.0
+
 git apply ../tensorflow.patch
 cd ..
 
@@ -32,7 +35,7 @@ echo "--"
 echo "building..."
 echo "--"
 
-cmake --build . -j2 || true
+cmake --build . -j3 || true
 
 FILE=./libtensorflowlite_c.so
 if test -f "$FILE"; then
@@ -79,8 +82,8 @@ fi
 
 cd ..
 
-rm -rf ./tensorflow
-rm -rf ./tflite_build
+# rm -rf ./tensorflow
+# rm -rf ./tflite_build
 
 echo "--"
 echo "Done"

From e1d0582f833fb342748d6fbc869633a9ecfe3acd Mon Sep 17 00:00:00 2001
From: Stephen von Takach <steve@place.technology>
Date: Wed, 22 May 2024 10:13:21 +1000
Subject: [PATCH 3/8] feat cleanup code and add specs

---
 spec/tensorflow_lite_spec.cr                  | 94 +++++++++++++------
 src/tensorflow_lite/client.cr                 | 11 ++-
 src/tensorflow_lite/delegate_gpu.cr           | 28 +-----
 .../delegate_gpu/lib_delegate_gpu.cr          | 26 +++++
 src/tensorflow_lite/interpreter.cr            |  6 +-
 5 files changed, 101 insertions(+), 64 deletions(-)
 create mode 100644 src/tensorflow_lite/delegate_gpu/lib_delegate_gpu.cr

diff --git a/spec/tensorflow_lite_spec.cr b/spec/tensorflow_lite_spec.cr
index f99874b..5edbeb6 100644
--- a/spec/tensorflow_lite_spec.cr
+++ b/spec/tensorflow_lite_spec.cr
@@ -112,43 +112,77 @@ module TensorflowLite
       client.labels.as(Array(String)).size.should eq 90
     end
 
-    it "can add a GPU delegate to the interpreter options" do
-      # we have to skip this test if there is no hardware installed
-      # however at least we know it compiles
-      file_io = File.new(model_path)
-      file_data = Bytes.new(file_io.size)
-      file_io.read_fully(file_data)
-      file_io.close
-
-      {Model.new(model_path), Model.new(file_data)}.each do |model|
-        opts = InterpreterOptions.new
-        opts.add_delegate DelegateGPU.new
-        opts.on_error do |error_msg|
-          puts "error was #{error_msg}"
+    describe TensorflowLite do
+      it "can add a GPU delegate" do
+        # it will fallback to CPU for this test if there is no hardware installed
+        # however at least we know it compiles
+        file_io = File.new(model_path)
+        file_data = Bytes.new(file_io.size)
+        file_io.read_fully(file_data)
+        file_io.close
+
+        {Model.new(model_path), Model.new(file_data)}.each do |model|
+          opts = InterpreterOptions.new
+          opts.on_error do |error_msg|
+            puts "error was #{error_msg}"
+          end
+          interpreter = Interpreter.new(model, opts)
+
+          gpu = DelegateGPU.new
+          interpreter.modify_graph_with_delegate gpu
+
+          xor_test.each do |test|
+            inputs = test[:input]
+            expected = test[:result]
+
+            # configure inputs
+            input_tensor = interpreter.input_tensor(0)
+            input_tensor.raw_data.bytesize.should eq input_tensor.bytesize
+            input_tensor.size.should eq 2
+
+            floats = input_tensor.as_f32
+            floats[0], floats[1] = inputs
+
+            # run through NN
+            interpreter.invoke!
+
+            # check results
+            output_tensor = interpreter.output_tensor(0)
+            floats = output_tensor.as_f32
+            result = (floats[0] + 0.5_f32).to_i
+
+            result.should eq expected
+          end
         end
-        interpreter = Interpreter.new(model, opts)
+      end
 
-        xor_test.each do |test|
-          inputs = test[:input]
-          expected = test[:result]
+      it "can add a GPU delegate to the client" do
+        # it will fallback to CPU for this test if there is no hardware installed
+        file_io = File.new(model_path)
+        file_data = Bytes.new(file_io.size)
+        file_io.read_fully(file_data)
+        file_io.close
 
-          # configure inputs
-          input_tensor = interpreter.input_tensor(0)
-          input_tensor.raw_data.bytesize.should eq input_tensor.bytesize
-          input_tensor.size.should eq 2
+        {Model.new(model_path), Model.new(file_data)}.each do |model|
+          client = TensorflowLite::Client.new(model_path, delegate: DelegateGPU.new)
 
-          floats = input_tensor.as_f32
-          floats[0], floats[1] = inputs
+          xor_test.each do |test|
+            inputs = test[:input]
+            expected = test[:result]
 
-          # run through NN
-          interpreter.invoke!
+            # configure inputs
+            floats = client[0].as_f32
+            floats[0], floats[1] = inputs
 
-          # check results
-          output_tensor = interpreter.output_tensor(0)
-          floats = output_tensor.as_f32
-          result = (floats[0] + 0.5_f32).to_i
+            # run through NN
+            client.invoke!
 
-          result.should eq expected
+            # check results
+            floats = client.output.as_f32
+            result = (floats[0] + 0.5_f32).to_i
+
+            result.should eq expected
+          end
         end
       end
     end
diff --git a/src/tensorflow_lite/client.cr b/src/tensorflow_lite/client.cr
index fdd5f26..64d29f5 100644
--- a/src/tensorflow_lite/client.cr
+++ b/src/tensorflow_lite/client.cr
@@ -47,10 +47,15 @@ class TensorflowLite::Client
     if threads
       @options.num_threads(threads)
     end
-    if delegate
-      @options.add_delegate delegate
+
+    case delegate
+    when DelegateGPU
+      @interpreter = Interpreter.new(@model, @options)
+      @interpreter.modify_graph_with_delegate delegate
+    else
+      @options.add_delegate(delegate) if delegate
+      @interpreter = Interpreter.new(@model, @options)
     end
-    @interpreter = Interpreter.new(@model, @options)
   end
 
   getter model : Model
diff --git a/src/tensorflow_lite/delegate_gpu.cr b/src/tensorflow_lite/delegate_gpu.cr
index 383505f..837ee13 100644
--- a/src/tensorflow_lite/delegate_gpu.cr
+++ b/src/tensorflow_lite/delegate_gpu.cr
@@ -1,31 +1,5 @@
 require "./delegate"
-
-# :nodoc:
-@[Link("tensorflowlite_gpu_delegate", ldflags: "-L#{__DIR__}/../../ext/ -Wl,-rpath='$ORIGIN'")]
-lib LibDelegateGPU
-  alias X__Int32T = LibC::Int
-  alias Int32T = X__Int32T
-
-  alias X__Int64T = LibC::Long
-  alias Int64T = X__Int64T
-
-  fun gpu_delegate_options_v2_default = TfLiteGpuDelegateOptionsV2Default : GpuDelegateOptionsV2
-
-  struct GpuDelegateOptionsV2
-    is_precision_loss_allowed : Int32T
-    inference_preference : Int32T
-    inference_priority1 : Int32T
-    inference_priority2 : Int32T
-    inference_priority3 : Int32T
-    experimental_flags : Int64T
-    max_delegated_partitions : Int32T
-    serialization_dir : LibC::Char*
-    model_token : LibC::Char*
-  end
-
-  fun gpu_delegate_v2_create = TfLiteGpuDelegateV2Create(options : GpuDelegateOptionsV2*) : LibTensorflowLite::OpaqueDelegate
-  fun gpu_delegate_v2_delete = TfLiteGpuDelegateV2Delete(delegate : LibTensorflowLite::OpaqueDelegate)
-end
+require "./delegate_gpu/*"
 
 class TensorflowLite::DelegateGPU < TensorflowLite::Delegate
   def initialize
diff --git a/src/tensorflow_lite/delegate_gpu/lib_delegate_gpu.cr b/src/tensorflow_lite/delegate_gpu/lib_delegate_gpu.cr
new file mode 100644
index 0000000..0523bb3
--- /dev/null
+++ b/src/tensorflow_lite/delegate_gpu/lib_delegate_gpu.cr
@@ -0,0 +1,26 @@
+# :nodoc:
+@[Link("tensorflowlite_gpu_delegate", ldflags: "-L#{__DIR__}/../../ext/ -Wl,-rpath='$ORIGIN'")]
+lib LibDelegateGPU
+  alias X__Int32T = LibC::Int
+  alias Int32T = X__Int32T
+
+  alias X__Int64T = LibC::Long
+  alias Int64T = X__Int64T
+
+  fun gpu_delegate_options_v2_default = TfLiteGpuDelegateOptionsV2Default : GpuDelegateOptionsV2
+
+  struct GpuDelegateOptionsV2
+    is_precision_loss_allowed : Int32T
+    inference_preference : Int32T
+    inference_priority1 : Int32T
+    inference_priority2 : Int32T
+    inference_priority3 : Int32T
+    experimental_flags : Int64T
+    max_delegated_partitions : Int32T
+    serialization_dir : LibC::Char*
+    model_token : LibC::Char*
+  end
+
+  fun gpu_delegate_v2_create = TfLiteGpuDelegateV2Create(options : GpuDelegateOptionsV2*) : LibTensorflowLite::OpaqueDelegate
+  fun gpu_delegate_v2_delete = TfLiteGpuDelegateV2Delete(delegate : LibTensorflowLite::OpaqueDelegate)
+end
diff --git a/src/tensorflow_lite/interpreter.cr b/src/tensorflow_lite/interpreter.cr
index 11c3b04..a6afa74 100644
--- a/src/tensorflow_lite/interpreter.cr
+++ b/src/tensorflow_lite/interpreter.cr
@@ -1,4 +1,5 @@
 require "./model"
+require "./delegate"
 require "./interpreter_options"
 
 # The Interpreter takes a model, loads it, and allows you to run (or "interpret") the model, i.e., to use it to make predictions based on input data.
@@ -64,14 +65,11 @@ class TensorflowLite::Interpreter
 
   alias Status = LibTensorflowLite::Status
 
-  # :nodoc:
-  alias Delegate = LibTensorflowLite::Delegate
-
   # :nodoc:
   # provides a method to add a delegate after initialization.
   # Recommended that delegates are configured via `InterpreterOptions`
   def modify_graph_with_delegate(delegate : Delegate) : Status
-    LibTensorflowLite.interpreter_modify_graph_with_delegate(self, delegate)
+    LibTensorflowLite.interpreter_modify_graph_with_delegate(self, delegate.to_unsafe.value)
   end
 
   # runs the model and returns the result status

From 9a25e0d1d4e1062649bb297ad07791ae0f66acf6 Mon Sep 17 00:00:00 2001
From: Stephen von Takach <steve@place.tech>
Date: Wed, 22 May 2024 14:11:17 +1000
Subject: [PATCH 4/8] feat: add docker build and installation

---
 Dockerfile                | 123 ++++++++++++++++++++++++++++++++++++++
 README.md                 |  29 +++++++++
 install_tensorflowlite.sh |  55 +++++++++++++++++
 shard.yml                 |   2 +-
 4 files changed, 208 insertions(+), 1 deletion(-)
 create mode 100644 Dockerfile
 create mode 100755 install_tensorflowlite.sh

diff --git a/Dockerfile b/Dockerfile
new file mode 100644
index 0000000..0686542
--- /dev/null
+++ b/Dockerfile
@@ -0,0 +1,123 @@
+# Use an image supported by https://rocm.docs.amd.com/projects/install-on-linux/en/latest/tutorial/quick-start.html
+FROM ubuntu:22.04 as build
+
+# Install necessary packages
+RUN apt-get update && \
+    apt-get install -y \
+    build-essential \
+    cmake \
+    git \
+    wget \
+    unzip \
+    libtool \
+    zlib1g-dev \
+    vim-common \
+    curl \
+    unzip \
+    zlib1g \
+    python3 \
+    python3-pip \
+    python3-dev \
+    libopenblas-dev \
+    opencl-headers \
+    clinfo \
+    ocl-icd-opencl-dev \
+    clang \
+    libclang-dev \
+    libc++-dev \
+    linux-headers-generic \
+    software-properties-common \
+    libabsl-dev \
+    libusb-1.0-0-dev \
+    gnupg2 && \
+    apt-get clean
+
+# Install Bazelisk for building TensorFlow
+ARG TARGETARCH
+RUN wget -O /usr/local/bin/bazel https://github.com/bazelbuild/bazelisk/releases/download/v1.10.1/bazelisk-linux-$TARGETARCH && \
+    chmod +x /usr/local/bin/bazel
+
+ENV TMP=/tmp
+
+# Clone TensorFlow repository
+# https://www.tensorflow.org/install/source#gpu (lib compatibility list)
+RUN git clone --depth 1 --branch "v2.16.1" https://github.com/tensorflow/tensorflow
+
+# =======================
+# build edge TPU delegate
+# =======================
+
+WORKDIR /tensorflow
+RUN git clone https://github.com/google-coral/libedgetpu
+WORKDIR /tensorflow/libedgetpu
+
+# Build TensorFlow Lite GPU delegate (excluding Android, linux only)
+RUN make libedgetpu-direct
+
+# Copy the built shared libraries to /usr/local/lib
+RUN mkdir -p /usr/local/lib && \
+    cp /tensorflow/libedgetpu/out/direct/k8/libedgetpu.so.1.0 /usr/local/lib/libedgetpu.so
+
+
+# ==================================
+# Build tensorflow lite GPU delegate
+# ==================================
+
+WORKDIR /tensorflow
+
+# Configure TensorFlow build (excluding Android)
+RUN ./configure <<EOF
+
+
+
+
+
+
+
+
+EOF
+
+# Build TensorFlow Lite GPU delegate (excluding Android, linux only)
+RUN bazel build //tensorflow/lite/delegates/gpu:libtensorflowlite_gpu_delegate.so \
+    --config=opt \
+    --config=monolithic \
+    --copt=-g \
+    --cxxopt=-std=c++17 \
+    --copt=-DMESA_EGL_NO_X11_HEADERS \
+    --copt=-DEGL_NO_X11 \
+    --copt=-DCL_DELEGATE_NO_GL \
+    --define=with_xla_support=false \
+    --define=with_flex_support=false \
+    --define=no_tensorflow_py_deps=true \
+    --config=noaws \
+    --config=nogcp \
+    --config=nohdfs \
+    --verbose_failures
+
+# Copy the built shared libraries to /usr/local/lib
+RUN mkdir -p /usr/local/lib && \
+    cp bazel-bin/tensorflow/lite/delegates/gpu/libtensorflowlite_gpu_delegate.so /usr/local/lib/
+
+# =================================
+# Build tensorflow lite using cmake
+# =================================
+
+RUN mkdir tflite_build
+WORKDIR /tensorflow/tflite_build
+RUN cmake /tensorflow/tensorflow/lite/c -DTFLITE_ENABLE_GPU=ON
+RUN cmake --build . -j4 || true
+RUN echo "---------- WE ARE BUILDING AGAIN!! ----------"
+RUN cmake --build . -j1
+
+# copy the shard lib into place
+RUN cp ./libtensorflowlite_c.so /usr/local/lib/
+
+# ======================
+# Set up the final stage
+# ======================
+FROM scratch
+
+# Copy the built libraries from the build stage
+COPY --from=build /usr/local/lib/libedgetpu.so /usr/local/lib/libedgetpu.so
+COPY --from=build /usr/local/lib/libtensorflowlite_c.so /usr/local/lib/libtensorflowlite_c.so
+COPY --from=build /usr/local/lib/libtensorflowlite_gpu_delegate.so /usr/local/lib/libtensorflowlite_gpu_delegate.so
diff --git a/README.md b/README.md
index 7b6690f..1f8a6cf 100644
--- a/README.md
+++ b/README.md
@@ -78,6 +78,35 @@ To update tensorflow lite bindings `./generate_bindings.sh`
 
 ### lib installation
 
+#### Dockerfile
+
+The dockerfile is used to build a compatible tensorflow build for target platforms.
+There is an image pre-built at `docker pull stakach/tensorflowlite:latest`
+
+To build an image run:
+
+```shell
+docker buildx build --progress=plain --platform linux/arm64,linux/amd64 -t stakach/tensorflowlite:latest --push .
+```
+
+to extract the libraries
+
+```shell
+mkdir -p ./ext
+docker pull stakach/tensorflowlite:latest
+docker create --name tflite_tmp stakach/tensorflowlite:latest true
+
+docker cp tflite_tmp:/usr/local/lib/libedgetpu.so ./ext/libedgetpu.so
+docker cp tflite_tmp:/usr/local/lib/libtensorflowlite_c.so ./ext/libtensorflowlite_c.so
+docker cp tflite_tmp:/usr/local/lib/libtensorflowlite_gpu_delegate.so ./ext/libtensorflowlite_gpu_delegate.so
+
+docker rm tflite_tmp
+```
+
+this operation is performed post-install by this library
+
+#### Old method
+
 Requires [libtensorflow](https://www.tensorflow.org/install/lang_c) to be installed, this is handled automatically by `./build_tensorflowlite.sh`
 
 * there is a [guide to building it](https://www.tensorflow.org/lite/guide/build_cmake)
diff --git a/install_tensorflowlite.sh b/install_tensorflowlite.sh
new file mode 100755
index 0000000..a4cd142
--- /dev/null
+++ b/install_tensorflowlite.sh
@@ -0,0 +1,55 @@
+#!/bin/sh
+
+SHARDS_INSTALL=IS_LIB
+IS_LOCAL=./ext/libtensorflowlite_c.so
+if test -f "$IS_LOCAL"; then
+  echo "--"
+  echo "tensorflow lite library installed, skipping installation"
+  echo "--"
+  exit 0
+fi
+
+echo "--"
+echo "downloading images... (requires docker)"
+echo "--"
+
+mkdir -p ./ext
+docker pull stakach/tensorflowlite:latest
+docker create --name tflite_tmp stakach/tensorflowlite:latest true
+
+echo "--"
+echo "copying library into place.."
+echo "--"
+
+docker cp tflite_tmp:/usr/local/lib/libedgetpu.so ./ext/libedgetpu.so
+docker cp tflite_tmp:/usr/local/lib/libtensorflowlite_c.so ./ext/libtensorflowlite_c.so
+docker cp tflite_tmp:/usr/local/lib/libtensorflowlite_gpu_delegate.so ./ext/libtensorflowlite_gpu_delegate.so
+docker rm tflite_tmp
+
+# we'll put the lib into a few different places so it'll run when using crystal normally
+
+# Temp location crystal runs applications from
+mkdir -p ~/.cache/crystal/
+cp ./ext/libedgetpu.so ~/.cache/crystal/
+cp ./ext/libtensorflowlite_c.so ~/.cache/crystal/
+cp ./ext/libtensorflowlite_gpu_delegate.so ~/.cache/crystal/
+
+# other locations you might be running the application from
+# check if being installed as a lib
+if [ "$1" = "$SHARDS_INSTALL" ]; then
+  echo "copying into parent directory.."
+  mkdir -p ../../bin
+  ln -s ./ext/libedgetpu.so ../../bin/libedgetpu.so
+  ln -s ./ext/libedgetpu.so ../../libedgetpu.so
+
+  ln -s ./ext/libtensorflowlite_c.so ../../bin/libtensorflowlite_c.so
+  ln -s ./ext/libtensorflowlite_c.so ../../libtensorflowlite_c.so
+
+  ln -s ./ext/libtensorflowlite_gpu_delegate.so ../../bin/libtensorflowlite_gpu_delegate.so
+  ln -s ./ext/libtensorflowlite_gpu_delegate.so ../../libtensorflowlite_gpu_delegate.so
+else
+  echo "run manually, assuming library development"
+fi
+
+echo "--"
+echo "Done"
diff --git a/shard.yml b/shard.yml
index 6f16c8d..51817d3 100644
--- a/shard.yml
+++ b/shard.yml
@@ -11,4 +11,4 @@ authors:
 license: MIT
 
 scripts:
-  postinstall: /bin/sh ./build_tensorflowlite.sh IS_LIB
+  postinstall: /bin/sh ./install_tensorflowlite.sh IS_LIB

From 16a6648a7dafbc73adddf77db2df0a793f062f91 Mon Sep 17 00:00:00 2001
From: Stephen von Takach <steve@place.technology>
Date: Wed, 22 May 2024 14:13:12 +1000
Subject: [PATCH 5/8] chore: remove tensorflow.patch

---
 tensorflow.patch | 36 ------------------------------------
 1 file changed, 36 deletions(-)
 delete mode 100644 tensorflow.patch

diff --git a/tensorflow.patch b/tensorflow.patch
deleted file mode 100644
index 3b680e1..0000000
--- a/tensorflow.patch
+++ /dev/null
@@ -1,36 +0,0 @@
-diff --git a/tensorflow/lite/CMakeLists.txt b/tensorflow/lite/CMakeLists.txt
-index 0924ca9d..812f8c55 100644
---- a/tensorflow/lite/CMakeLists.txt
-+++ b/tensorflow/lite/CMakeLists.txt
-@@ -564,14 +564,17 @@ endif()
- set(_ALL_TFLITE_SRCS
-   ${TFLITE_CORE_ACCELERATION_SRCS}
-   ${TFLITE_CORE_API_SRCS}
--  ${TFLITE_CORE_C_SRCS}
-+
-+  ${TFLITE_SOURCE_DIR}/c/common_internal.cc
-+  ${TFLITE_SOURCE_DIR}/c/c_api_opaque_internal.cc
-+  ${TFLITE_SOURCE_DIR}/core/c/c_api_opaque.cc
-+
-   ${TFLITE_CORE_EXPERIMENTAL_SRCS}
-   ${TFLITE_CORE_KERNELS_SRCS}
-   ${TFLITE_CORE_SRCS}
-   ${TFLITE_CORE_ASYNC_SRCS}
-   ${TFLITE_CORE_ASYNC_C_SRCS}
-   ${TFLITE_CORE_TOOLS_SRCS}
--  ${TFLITE_C_SRCS}
-   ${TFLITE_DELEGATES_FLEX_SRCS}
-   ${TFLITE_DELEGATES_GPU_SRCS}
-   ${TFLITE_DELEGATES_NNAPI_SRCS}
-diff --git a/tensorflow/lite/c/CMakeLists.txt b/tensorflow/lite/c/CMakeLists.txt
-index 3fb086ec..f91a0fe4 100644
---- a/tensorflow/lite/c/CMakeLists.txt
-+++ b/tensorflow/lite/c/CMakeLists.txt
-@@ -86,5 +86,7 @@ if (TFLITE_C_BUILD_SHARED_LIBS)
- endif()
- 
- target_link_libraries(tensorflowlite_c
-+  -Wl,--whole-archive
-   tensorflow-lite
-+  -Wl,--no-whole-archive
- )

From fbf276159afd63fe747d7282f0e60b8193205992 Mon Sep 17 00:00:00 2001
From: Stephen von Takach <steve@place.technology>
Date: Wed, 22 May 2024 14:15:19 +1000
Subject: [PATCH 6/8] fix(spec): use model in client spec

---
 spec/tensorflow_lite_spec.cr | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/spec/tensorflow_lite_spec.cr b/spec/tensorflow_lite_spec.cr
index 5edbeb6..1aacbf8 100644
--- a/spec/tensorflow_lite_spec.cr
+++ b/spec/tensorflow_lite_spec.cr
@@ -164,7 +164,7 @@ module TensorflowLite
         file_io.close
 
         {Model.new(model_path), Model.new(file_data)}.each do |model|
-          client = TensorflowLite::Client.new(model_path, delegate: DelegateGPU.new)
+          client = TensorflowLite::Client.new(model, delegate: DelegateGPU.new)
 
           xor_test.each do |test|
             inputs = test[:input]

From fd46b4d1f962c10d42209add3ea6e5c5fe04be27 Mon Sep 17 00:00:00 2001
From: Stephen von Takach <steve@place.technology>
Date: Wed, 22 May 2024 18:07:08 +1000
Subject: [PATCH 7/8] fix: libedgetpu on arm64

---
 Dockerfile | 19 +++++++++++++------
 1 file changed, 13 insertions(+), 6 deletions(-)

diff --git a/Dockerfile b/Dockerfile
index 0686542..008b10f 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -52,12 +52,19 @@ RUN git clone https://github.com/google-coral/libedgetpu
 WORKDIR /tensorflow/libedgetpu
 
 # Build TensorFlow Lite GPU delegate (excluding Android, linux only)
-RUN make libedgetpu-direct
-
-# Copy the built shared libraries to /usr/local/lib
-RUN mkdir -p /usr/local/lib && \
-    cp /tensorflow/libedgetpu/out/direct/k8/libedgetpu.so.1.0 /usr/local/lib/libedgetpu.so
-
+ARG TARGETPLATFORM
+RUN if [ "$TARGETPLATFORM" = "linux/amd64" ]; then \
+        make libedgetpu-direct CPU=k8 && \
+        mkdir -p /usr/local/lib && \
+        cp /tensorflow/libedgetpu/out/direct/k8/libedgetpu.so.1.0 /usr/local/lib/libedgetpu.so; \
+    elif [ "$TARGETPLATFORM" = "linux/arm64" ]; then \
+        make libedgetpu-direct CPU=aarch64 && \
+        mkdir -p /usr/local/lib && \
+        cp /tensorflow/libedgetpu/out/direct/aarch64/libedgetpu.so.1.0 /usr/local/lib/libedgetpu.so; \
+    else \
+        echo "Unknown platform"; \
+        exit 1; \
+    fi
 
 # ==================================
 # Build tensorflow lite GPU delegate

From ea8529f94681bbd0b5f32c14c3be0bf40620e91d Mon Sep 17 00:00:00 2001
From: Stephen von Takach <steve@place.tech>
Date: Thu, 23 May 2024 08:39:37 +1000
Subject: [PATCH 8/8] fix build and specs

---
 build_tensorflowlite.sh      | 4 ++--
 spec/tensorflow_lite_spec.cr | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)
 mode change 100644 => 100755 build_tensorflowlite.sh

diff --git a/build_tensorflowlite.sh b/build_tensorflowlite.sh
old mode 100644
new mode 100755
index d6b510b..936fd03
--- a/build_tensorflowlite.sh
+++ b/build_tensorflowlite.sh
@@ -76,8 +76,8 @@ fi
 
 cd ..
 
-# rm -rf ./tensorflow
-# rm -rf ./tflite_build
+rm -rf ./tensorflow
+rm -rf ./tflite_build
 
 echo "--"
 echo "Done"
diff --git a/spec/tensorflow_lite_spec.cr b/spec/tensorflow_lite_spec.cr
index 1aacbf8..1ca13fa 100644
--- a/spec/tensorflow_lite_spec.cr
+++ b/spec/tensorflow_lite_spec.cr
@@ -188,7 +188,7 @@ module TensorflowLite
     end
 
     it "works with quantized models" do
-      model_path = Path.new File.join(__DIR__, "./test_data/xor_model_quantized.tflite")
+      quant_path = Path.new File.join(__DIR__, "./test_data/xor_model_quantized.tflite")
       quantized_test = {
         {input: {-128_i8, -128_i8}, result: 0},
         {input: {127_i8, -128_i8}, result: 1},
@@ -196,7 +196,7 @@ module TensorflowLite
         {input: {127_i8, 127_i8}, result: 0},
       }
 
-      client = TensorflowLite::Client.new(model_path)
+      client = TensorflowLite::Client.new(quant_path)
 
       quantized_test.each do |test|
         inputs = test[:input]