From 55554c3af68f2eabeba664ad85b4dc91a1472428 Mon Sep 17 00:00:00 2001 From: Stephen von Takach Date: Sun, 3 Sep 2023 22:38:25 +1000 Subject: [PATCH 1/8] feat: add support for GPU delegates --- bindings/bindings_generator.cr | 2 + build_tensorflowlite.sh | 6 +- spec/tensorflow_lite_spec.cr | 41 +++++++ src/tensorflow_lite.cr | 1 + src/tensorflow_lite/bindings_generator.cr | 17 +++ src/tensorflow_lite/delegate_gpu.cr | 21 ++++ src/tensorflow_lite/lib_tensorflowlite.cr | 124 +++++++++++++++++++--- tensorflow.patch | 36 +++++++ 8 files changed, 230 insertions(+), 18 deletions(-) mode change 100755 => 100644 build_tensorflowlite.sh create mode 100644 src/tensorflow_lite/bindings_generator.cr create mode 100644 src/tensorflow_lite/delegate_gpu.cr create mode 100644 tensorflow.patch diff --git a/bindings/bindings_generator.cr b/bindings/bindings_generator.cr index 8e493bf..b7d675e 100644 --- a/bindings/bindings_generator.cr +++ b/bindings/bindings_generator.cr @@ -3,6 +3,8 @@ "tensorflow/lite/core/c/c_api_types.h", "tensorflow/lite/core/c/c_api.h", "tensorflow/lite/core/c/c_api_experimental.h", + "tensorflow/lite/delegates/gpu/delegate_options.h", + "tensorflow/lite/delegates/gpu/delegate.h", flags: " -I/{tensorflow_dir}/tensorflow/ -I/{tensorflow_dir}/tensorflow/bazel-genfiles diff --git a/build_tensorflowlite.sh b/build_tensorflowlite.sh old mode 100755 new mode 100644 index 8f473fa..ad275c7 --- a/build_tensorflowlite.sh +++ b/build_tensorflowlite.sh @@ -15,6 +15,9 @@ echo "--" # clone the required repositories git clone --depth 1 https://github.com/tensorflow/tensorflow +cd tensorflow +git apply ../tensorflow.patch +cd .. echo "--" echo "configuring..." @@ -22,7 +25,8 @@ echo "--" mkdir tflite_build cd tflite_build -cmake ../tensorflow/tensorflow/lite/c -DTFLITE_ENABLE_GPU=ON +cmake ../tensorflow/tensorflow/lite/c \ + -DTFLITE_ENABLE_GPU=ON echo "--" echo "building..." diff --git a/spec/tensorflow_lite_spec.cr b/spec/tensorflow_lite_spec.cr index 46c1b1c..595f28a 100644 --- a/spec/tensorflow_lite_spec.cr +++ b/spec/tensorflow_lite_spec.cr @@ -111,5 +111,46 @@ module TensorflowLite client.outputs.size.should eq 4 client.labels.as(Array(String)).size.should eq 90 end + + it "can add a GPU delegate to the interpreter options" do + # we have to skip this test if there is no hardware installed + # however at least we know it compiles + file_io = File.new(model_path) + file_data = Bytes.new(file_io.size) + file_io.read_fully(file_data) + file_io.close + + {Model.new(model_path), Model.new(file_data)}.each do |model| + opts = InterpreterOptions.new + opts.add_delegate DelegateGPU.new + opts.on_error do |error_msg| + puts "error was #{error_msg}" + end + interpreter = Interpreter.new(model, opts) + + xor_test.each do |test| + inputs = test[:input] + expected = test[:result] + + # configure inputs + input_tensor = interpreter.input_tensor(0) + input_tensor.raw_data.bytesize.should eq input_tensor.bytesize + input_tensor.size.should eq 2 + + floats = input_tensor.as_f32 + floats[0], floats[1] = inputs + + # run through NN + interpreter.invoke! + + # check results + output_tensor = interpreter.output_tensor(0) + floats = output_tensor.as_f32 + result = (floats[0] + 0.5_f32).to_i + + result.should eq expected + end + end + end end end diff --git a/src/tensorflow_lite.cr b/src/tensorflow_lite.cr index 3f2e2fb..552da38 100644 --- a/src/tensorflow_lite.cr +++ b/src/tensorflow_lite.cr @@ -29,4 +29,5 @@ require "./tensorflow_lite/interpreter_options" require "./tensorflow_lite/tensor" require "./tensorflow_lite/interpreter" require "./tensorflow_lite/client" +require "./tensorflow_lite/delegate_gpu" require "./tensorflow_lite/utilities/*" diff --git a/src/tensorflow_lite/bindings_generator.cr b/src/tensorflow_lite/bindings_generator.cr new file mode 100644 index 0000000..b7d675e --- /dev/null +++ b/src/tensorflow_lite/bindings_generator.cr @@ -0,0 +1,17 @@ +@[Include( + "tensorflow/lite/builtin_ops.h", + "tensorflow/lite/core/c/c_api_types.h", + "tensorflow/lite/core/c/c_api.h", + "tensorflow/lite/core/c/c_api_experimental.h", + "tensorflow/lite/delegates/gpu/delegate_options.h", + "tensorflow/lite/delegates/gpu/delegate.h", + flags: " + -I/{tensorflow_dir}/tensorflow/ + -I/{tensorflow_dir}/tensorflow/bazel-genfiles + -I/Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/include/c++/v1 + ", + prefix: %w(TFL_ TfLite kTfLite) +)] +@[Link("tensorflowlite_c", ldflags: "-L#{__DIR__}/../../ext/ -Wl,-rpath='$ORIGIN'")] +lib LibTensorflowLite +end diff --git a/src/tensorflow_lite/delegate_gpu.cr b/src/tensorflow_lite/delegate_gpu.cr new file mode 100644 index 0000000..1f1d55d --- /dev/null +++ b/src/tensorflow_lite/delegate_gpu.cr @@ -0,0 +1,21 @@ +require "./delegate" + +class TensorflowLite::DelegateGPU < TensorflowLite::Delegate + def initialize + @options = opts = LibTensorflowLite.gpu_delegate_options_v2_default + options_ptr = pointerof(opts) + @delegate = LibTensorflowLite.gpu_delegate_v2_create(options_ptr) + @to_unsafe = pointerof(@delegate) + end + + @options : LibTensorflowLite::GpuDelegateOptionsV2 + @delegate : LibTensorflowLite::Delegate + + # :nodoc: + def finalize + LibTensorflowLite.gpu_delegate_v2_delete(@delegate) + end + + # :nodoc: + getter to_unsafe : Pointer(LibTensorflowLite::Delegate) +end diff --git a/src/tensorflow_lite/lib_tensorflowlite.cr b/src/tensorflow_lite/lib_tensorflowlite.cr index 1eb6da1..d0aceec 100644 --- a/src/tensorflow_lite/lib_tensorflowlite.cr +++ b/src/tensorflow_lite/lib_tensorflowlite.cr @@ -163,6 +163,47 @@ lib LibTensorflowLite BuiltinBitcast = 159_i64 BuiltinBitwiseXor = 160_i64 BuiltinRightShift = 161_i64 + BuiltinStablehloLogistic = 162_i64 + BuiltinStablehloAdd = 163_i64 + BuiltinStablehloDivide = 164_i64 + BuiltinStablehloMultiply = 165_i64 + BuiltinStablehloMaximum = 166_i64 + BuiltinStablehloReshape = 167_i64 + BuiltinStablehloClamp = 168_i64 + BuiltinStablehloConcatenate = 169_i64 + BuiltinStablehloBroadcastInDim = 170_i64 + BuiltinStablehloConvolution = 171_i64 + BuiltinStablehloSlice = 172_i64 + BuiltinStablehloCustomCall = 173_i64 + BuiltinStablehloReduce = 174_i64 + BuiltinStablehloAbs = 175_i64 + BuiltinStablehloAnd = 176_i64 + BuiltinStablehloCosine = 177_i64 + BuiltinStablehloExponential = 178_i64 + BuiltinStablehloFloor = 179_i64 + BuiltinStablehloLog = 180_i64 + BuiltinStablehloMinimum = 181_i64 + BuiltinStablehloNegate = 182_i64 + BuiltinStablehloOr = 183_i64 + BuiltinStablehloPower = 184_i64 + BuiltinStablehloRemainder = 185_i64 + BuiltinStablehloRsqrt = 186_i64 + BuiltinStablehloSelect = 187_i64 + BuiltinStablehloSubtract = 188_i64 + BuiltinStablehloTanh = 189_i64 + BuiltinStablehloScatter = 190_i64 + BuiltinStablehloCompare = 191_i64 + BuiltinStablehloConvert = 192_i64 + BuiltinStablehloDynamicSlice = 193_i64 + BuiltinStablehloDynamicUpdateSlice = 194_i64 + BuiltinStablehloPad = 195_i64 + BuiltinStablehloIota = 196_i64 + BuiltinStablehloDotGeneral = 197_i64 + BuiltinStablehloReduceWindow = 198_i64 + BuiltinStablehloSort = 199_i64 + BuiltinStablehloWhile = 200_i64 + BuiltinStablehloGather = 201_i64 + BuiltinStablehloTranspose = 202_i64 NoType = 0_i64 Float32 = 1_i64 Int32 = 2_i64 @@ -208,8 +249,7 @@ lib LibTensorflowLite fun interpreter_options_add_delegate = TfLiteInterpreterOptionsAddDelegate(options : InterpreterOptions, delegate : OpaqueDelegate*) alias OpaqueDelegate = Delegate fun interpreter_options_set_error_reporter = TfLiteInterpreterOptionsSetErrorReporter(options : InterpreterOptions, reporter : (Void*, LibC::Char*, VaList -> Void), user_data : Void*) - fun interpreter_options_add_registration_external = TfLiteInterpreterOptionsAddRegistrationExternal(options : InterpreterOptions, registration : RegistrationExternal) - type RegistrationExternal = Void* + fun interpreter_options_add_registration_external = TfLiteInterpreterOptionsAddRegistrationExternal(options : InterpreterOptions, registration : LibC::Int*) fun interpreter_options_enable_cancellation = TfLiteInterpreterOptionsEnableCancellation(options : InterpreterOptions, enable : LibC::Int) : Status enum Status Ok = 0 @@ -267,7 +307,8 @@ lib LibTensorflowLite fun tensor_quantization_params = TfLiteTensorQuantizationParams(tensor : Tensor) : QuantizationParams fun tensor_copy_from_buffer = TfLiteTensorCopyFromBuffer(tensor : Tensor, input_data : Void*, input_data_size : LibC::SizeT) : Status fun tensor_copy_to_buffer = TfLiteTensorCopyToBuffer(output_tensor : Tensor, output_data : Void*, output_data_size : LibC::SizeT) : Status - fun registration_external_create = TfLiteRegistrationExternalCreate(builtin_code : BuiltinOperator, custom_name : LibC::Char*, version : LibC::Int) : RegistrationExternal + fun interpreter_reset_variable_tensors = TfLiteInterpreterResetVariableTensors(interpreter : Interpreter) : Status + fun interpreter_options_add_builtin_op = TfLiteInterpreterOptionsAddBuiltinOp(options : InterpreterOptions, op : BuiltinOperator, registration : LibC::Int*, min_version : Int32T, max_version : Int32T) enum BuiltinOperator BuiltinAdd = 0 BuiltinAveragePool2d = 1 @@ -431,23 +472,53 @@ lib LibTensorflowLite BuiltinBitcast = 159 BuiltinBitwiseXor = 160 BuiltinRightShift = 161 + BuiltinStablehloLogistic = 162 + BuiltinStablehloAdd = 163 + BuiltinStablehloDivide = 164 + BuiltinStablehloMultiply = 165 + BuiltinStablehloMaximum = 166 + BuiltinStablehloReshape = 167 + BuiltinStablehloClamp = 168 + BuiltinStablehloConcatenate = 169 + BuiltinStablehloBroadcastInDim = 170 + BuiltinStablehloConvolution = 171 + BuiltinStablehloSlice = 172 + BuiltinStablehloCustomCall = 173 + BuiltinStablehloReduce = 174 + BuiltinStablehloAbs = 175 + BuiltinStablehloAnd = 176 + BuiltinStablehloCosine = 177 + BuiltinStablehloExponential = 178 + BuiltinStablehloFloor = 179 + BuiltinStablehloLog = 180 + BuiltinStablehloMinimum = 181 + BuiltinStablehloNegate = 182 + BuiltinStablehloOr = 183 + BuiltinStablehloPower = 184 + BuiltinStablehloRemainder = 185 + BuiltinStablehloRsqrt = 186 + BuiltinStablehloSelect = 187 + BuiltinStablehloSubtract = 188 + BuiltinStablehloTanh = 189 + BuiltinStablehloScatter = 190 + BuiltinStablehloCompare = 191 + BuiltinStablehloConvert = 192 + BuiltinStablehloDynamicSlice = 193 + BuiltinStablehloDynamicUpdateSlice = 194 + BuiltinStablehloPad = 195 + BuiltinStablehloIota = 196 + BuiltinStablehloDotGeneral = 197 + BuiltinStablehloReduceWindow = 198 + BuiltinStablehloSort = 199 + BuiltinStablehloWhile = 200 + BuiltinStablehloGather = 201 + BuiltinStablehloTranspose = 202 end - fun registration_external_get_built_in_code = TfLiteRegistrationExternalGetBuiltInCode(registration : RegistrationExternal) : BuiltinOperator - fun registration_external_get_version = TfLiteRegistrationExternalGetVersion(registration : RegistrationExternal) : LibC::Int - fun registration_external_get_custom_name = TfLiteRegistrationExternalGetCustomName(registration : RegistrationExternal) : LibC::Char* - fun registration_external_delete = TfLiteRegistrationExternalDelete(registration : RegistrationExternal) - fun registration_external_set_init = TfLiteRegistrationExternalSetInit(registration : RegistrationExternal, init : (OpaqueContext, LibC::Char*, LibC::SizeT -> Void*)) - type OpaqueContext = Void* - fun registration_external_set_free = TfLiteRegistrationExternalSetFree(registration : RegistrationExternal, free : (OpaqueContext, Void* -> Void)) - fun registration_external_set_prepare = TfLiteRegistrationExternalSetPrepare(registration : RegistrationExternal, prepare : (OpaqueContext, OpaqueNode -> Status)) - type OpaqueNode = Void* - fun registration_external_set_invoke = TfLiteRegistrationExternalSetInvoke(registration : RegistrationExternal, invoke : (OpaqueContext, OpaqueNode -> Status)) - fun registration_external_set_async_kernel = TfLiteRegistrationExternalSetAsyncKernel(registration : RegistrationExternal, async_kernel : (OpaqueContext, OpaqueNode -> LibC::Int*)) - fun interpreter_reset_variable_tensors = TfLiteInterpreterResetVariableTensors(interpreter : Interpreter) : Status - fun interpreter_options_add_builtin_op = TfLiteInterpreterOptionsAddBuiltinOp(options : InterpreterOptions, op : BuiltinOperator, registration : LibC::Int*, min_version : Int32T, max_version : Int32T) fun interpreter_options_add_custom_op = TfLiteInterpreterOptionsAddCustomOp(options : InterpreterOptions, name : LibC::Char*, registration : LibC::Int*, min_version : Int32T, max_version : Int32T) - fun interpreter_options_set_op_resolver_external = TfLiteInterpreterOptionsSetOpResolverExternal(options : InterpreterOptions, find_builtin_op : (Void*, LibC::Int, LibC::Int -> RegistrationExternal), find_custom_op : (Void*, LibC::Char*, LibC::Int -> RegistrationExternal), op_resolver_user_data : Void*) + fun interpreter_options_set_op_resolver_external = TfLiteInterpreterOptionsSetOpResolverExternal(options : InterpreterOptions, find_builtin_op : (Void*, LibC::Int, LibC::Int -> LibC::Int*), find_custom_op : (Void*, LibC::Char*, LibC::Int -> LibC::Int*), op_resolver_user_data : Void*) + fun interpreter_options_set_op_resolver_external_with_fallback = TfLiteInterpreterOptionsSetOpResolverExternalWithFallback(options : InterpreterOptions, find_builtin_op_external : (Void*, LibC::Int, LibC::Int -> LibC::Int*), find_custom_op_external : (Void*, LibC::Char*, LibC::Int -> LibC::Int*), find_builtin_op : (Void*, BuiltinOperator, LibC::Int -> LibC::Int*), find_custom_op : (Void*, LibC::Char*, LibC::Int -> LibC::Int*), op_resolver_user_data : Void*) fun interpreter_options_set_op_resolver = TfLiteInterpreterOptionsSetOpResolver(options : InterpreterOptions, find_builtin_op : (Void*, BuiltinOperator, LibC::Int -> LibC::Int*), find_custom_op : (Void*, LibC::Char*, LibC::Int -> LibC::Int*), op_resolver_user_data : Void*) + fun interpreter_options_set_op_resolver_v3 = TfLiteInterpreterOptionsSetOpResolverV3(options : InterpreterOptions, find_builtin_op_v3 : (Void*, BuiltinOperator, LibC::Int -> LibC::Int*), find_custom_op_v3 : (Void*, LibC::Char*, LibC::Int -> LibC::Int*), op_resolver_user_data : Void*) fun interpreter_options_set_op_resolver_v2 = TfLiteInterpreterOptionsSetOpResolverV2(options : InterpreterOptions, find_builtin_op_v2 : (Void*, BuiltinOperator, LibC::Int -> LibC::Int*), find_custom_op_v2 : (Void*, LibC::Char*, LibC::Int -> LibC::Int*), op_resolver_user_data : Void*) fun interpreter_options_set_op_resolver_v1 = TfLiteInterpreterOptionsSetOpResolverV1(options : InterpreterOptions, find_builtin_op_v1 : (Void*, BuiltinOperator, LibC::Int -> LibC::Int*), find_custom_op_v1 : (Void*, LibC::Char*, LibC::Int -> LibC::Int*), op_resolver_user_data : Void*) fun interpreter_create_with_selected_ops = TfLiteInterpreterCreateWithSelectedOps(model : Model, options : InterpreterOptions) : Interpreter @@ -473,4 +544,23 @@ lib LibTensorflowLite fun signature_runner_get_output_tensor = TfLiteSignatureRunnerGetOutputTensor(signature_runner : SignatureRunner, output_name : LibC::Char*) : Tensor fun signature_runner_cancel = TfLiteSignatureRunnerCancel(signature_runner : SignatureRunner) : Status fun signature_runner_delete = TfLiteSignatureRunnerDelete(signature_runner : SignatureRunner) + # fun interpreter_options_set_telemetry_profiler = TfLiteInterpreterOptionsSetTelemetryProfiler(options : InterpreterOptions, profiler : TelemetryProfilerStruct*) + fun gpu_delegate_options_v2_default = TfLiteGpuDelegateOptionsV2Default : GpuDelegateOptionsV2 + + struct GpuDelegateOptionsV2 + is_precision_loss_allowed : Int32T + inference_preference : Int32T + inference_priority1 : Int32T + inference_priority2 : Int32T + inference_priority3 : Int32T + experimental_flags : Int64T + max_delegated_partitions : Int32T + serialization_dir : LibC::Char* + model_token : LibC::Char* + end + + alias X__Int64T = LibC::Long + alias Int64T = X__Int64T + fun gpu_delegate_v2_create = TfLiteGpuDelegateV2Create(options : GpuDelegateOptionsV2*) : Delegate + fun gpu_delegate_v2_delete = TfLiteGpuDelegateV2Delete(delegate : Delegate) end diff --git a/tensorflow.patch b/tensorflow.patch new file mode 100644 index 0000000..3b680e1 --- /dev/null +++ b/tensorflow.patch @@ -0,0 +1,36 @@ +diff --git a/tensorflow/lite/CMakeLists.txt b/tensorflow/lite/CMakeLists.txt +index 0924ca9d..812f8c55 100644 +--- a/tensorflow/lite/CMakeLists.txt ++++ b/tensorflow/lite/CMakeLists.txt +@@ -564,14 +564,17 @@ endif() + set(_ALL_TFLITE_SRCS + ${TFLITE_CORE_ACCELERATION_SRCS} + ${TFLITE_CORE_API_SRCS} +- ${TFLITE_CORE_C_SRCS} ++ ++ ${TFLITE_SOURCE_DIR}/c/common_internal.cc ++ ${TFLITE_SOURCE_DIR}/c/c_api_opaque_internal.cc ++ ${TFLITE_SOURCE_DIR}/core/c/c_api_opaque.cc ++ + ${TFLITE_CORE_EXPERIMENTAL_SRCS} + ${TFLITE_CORE_KERNELS_SRCS} + ${TFLITE_CORE_SRCS} + ${TFLITE_CORE_ASYNC_SRCS} + ${TFLITE_CORE_ASYNC_C_SRCS} + ${TFLITE_CORE_TOOLS_SRCS} +- ${TFLITE_C_SRCS} + ${TFLITE_DELEGATES_FLEX_SRCS} + ${TFLITE_DELEGATES_GPU_SRCS} + ${TFLITE_DELEGATES_NNAPI_SRCS} +diff --git a/tensorflow/lite/c/CMakeLists.txt b/tensorflow/lite/c/CMakeLists.txt +index 3fb086ec..f91a0fe4 100644 +--- a/tensorflow/lite/c/CMakeLists.txt ++++ b/tensorflow/lite/c/CMakeLists.txt +@@ -86,5 +86,7 @@ if (TFLITE_C_BUILD_SHARED_LIBS) + endif() + + target_link_libraries(tensorflowlite_c ++ -Wl,--whole-archive + tensorflow-lite ++ -Wl,--no-whole-archive + ) From 259b41d99f259b52897372cfeba1de4fe93005fb Mon Sep 17 00:00:00 2001 From: Stephen von Takach Date: Wed, 13 Sep 2023 17:05:51 +1000 Subject: [PATCH 2/8] fix(build_tensorflowlite.sh): select version that builds with the patch --- build_tensorflowlite.sh | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/build_tensorflowlite.sh b/build_tensorflowlite.sh index ad275c7..2cb988d 100644 --- a/build_tensorflowlite.sh +++ b/build_tensorflowlite.sh @@ -16,6 +16,9 @@ echo "--" # clone the required repositories git clone --depth 1 https://github.com/tensorflow/tensorflow cd tensorflow +git fetch origin refs/tags/v2.13.0:refs/tags/v2.13.0 +git checkout v2.13.0 + git apply ../tensorflow.patch cd .. @@ -32,7 +35,7 @@ echo "--" echo "building..." echo "--" -cmake --build . -j2 || true +cmake --build . -j3 || true FILE=./libtensorflowlite_c.so if test -f "$FILE"; then @@ -79,8 +82,8 @@ fi cd .. -rm -rf ./tensorflow -rm -rf ./tflite_build +# rm -rf ./tensorflow +# rm -rf ./tflite_build echo "--" echo "Done" From e1d0582f833fb342748d6fbc869633a9ecfe3acd Mon Sep 17 00:00:00 2001 From: Stephen von Takach Date: Wed, 22 May 2024 10:13:21 +1000 Subject: [PATCH 3/8] feat cleanup code and add specs --- spec/tensorflow_lite_spec.cr | 94 +++++++++++++------ src/tensorflow_lite/client.cr | 11 ++- src/tensorflow_lite/delegate_gpu.cr | 28 +----- .../delegate_gpu/lib_delegate_gpu.cr | 26 +++++ src/tensorflow_lite/interpreter.cr | 6 +- 5 files changed, 101 insertions(+), 64 deletions(-) create mode 100644 src/tensorflow_lite/delegate_gpu/lib_delegate_gpu.cr diff --git a/spec/tensorflow_lite_spec.cr b/spec/tensorflow_lite_spec.cr index f99874b..5edbeb6 100644 --- a/spec/tensorflow_lite_spec.cr +++ b/spec/tensorflow_lite_spec.cr @@ -112,43 +112,77 @@ module TensorflowLite client.labels.as(Array(String)).size.should eq 90 end - it "can add a GPU delegate to the interpreter options" do - # we have to skip this test if there is no hardware installed - # however at least we know it compiles - file_io = File.new(model_path) - file_data = Bytes.new(file_io.size) - file_io.read_fully(file_data) - file_io.close - - {Model.new(model_path), Model.new(file_data)}.each do |model| - opts = InterpreterOptions.new - opts.add_delegate DelegateGPU.new - opts.on_error do |error_msg| - puts "error was #{error_msg}" + describe TensorflowLite do + it "can add a GPU delegate" do + # it will fallback to CPU for this test if there is no hardware installed + # however at least we know it compiles + file_io = File.new(model_path) + file_data = Bytes.new(file_io.size) + file_io.read_fully(file_data) + file_io.close + + {Model.new(model_path), Model.new(file_data)}.each do |model| + opts = InterpreterOptions.new + opts.on_error do |error_msg| + puts "error was #{error_msg}" + end + interpreter = Interpreter.new(model, opts) + + gpu = DelegateGPU.new + interpreter.modify_graph_with_delegate gpu + + xor_test.each do |test| + inputs = test[:input] + expected = test[:result] + + # configure inputs + input_tensor = interpreter.input_tensor(0) + input_tensor.raw_data.bytesize.should eq input_tensor.bytesize + input_tensor.size.should eq 2 + + floats = input_tensor.as_f32 + floats[0], floats[1] = inputs + + # run through NN + interpreter.invoke! + + # check results + output_tensor = interpreter.output_tensor(0) + floats = output_tensor.as_f32 + result = (floats[0] + 0.5_f32).to_i + + result.should eq expected + end end - interpreter = Interpreter.new(model, opts) + end - xor_test.each do |test| - inputs = test[:input] - expected = test[:result] + it "can add a GPU delegate to the client" do + # it will fallback to CPU for this test if there is no hardware installed + file_io = File.new(model_path) + file_data = Bytes.new(file_io.size) + file_io.read_fully(file_data) + file_io.close - # configure inputs - input_tensor = interpreter.input_tensor(0) - input_tensor.raw_data.bytesize.should eq input_tensor.bytesize - input_tensor.size.should eq 2 + {Model.new(model_path), Model.new(file_data)}.each do |model| + client = TensorflowLite::Client.new(model_path, delegate: DelegateGPU.new) - floats = input_tensor.as_f32 - floats[0], floats[1] = inputs + xor_test.each do |test| + inputs = test[:input] + expected = test[:result] - # run through NN - interpreter.invoke! + # configure inputs + floats = client[0].as_f32 + floats[0], floats[1] = inputs - # check results - output_tensor = interpreter.output_tensor(0) - floats = output_tensor.as_f32 - result = (floats[0] + 0.5_f32).to_i + # run through NN + client.invoke! - result.should eq expected + # check results + floats = client.output.as_f32 + result = (floats[0] + 0.5_f32).to_i + + result.should eq expected + end end end end diff --git a/src/tensorflow_lite/client.cr b/src/tensorflow_lite/client.cr index fdd5f26..64d29f5 100644 --- a/src/tensorflow_lite/client.cr +++ b/src/tensorflow_lite/client.cr @@ -47,10 +47,15 @@ class TensorflowLite::Client if threads @options.num_threads(threads) end - if delegate - @options.add_delegate delegate + + case delegate + when DelegateGPU + @interpreter = Interpreter.new(@model, @options) + @interpreter.modify_graph_with_delegate delegate + else + @options.add_delegate(delegate) if delegate + @interpreter = Interpreter.new(@model, @options) end - @interpreter = Interpreter.new(@model, @options) end getter model : Model diff --git a/src/tensorflow_lite/delegate_gpu.cr b/src/tensorflow_lite/delegate_gpu.cr index 383505f..837ee13 100644 --- a/src/tensorflow_lite/delegate_gpu.cr +++ b/src/tensorflow_lite/delegate_gpu.cr @@ -1,31 +1,5 @@ require "./delegate" - -# :nodoc: -@[Link("tensorflowlite_gpu_delegate", ldflags: "-L#{__DIR__}/../../ext/ -Wl,-rpath='$ORIGIN'")] -lib LibDelegateGPU - alias X__Int32T = LibC::Int - alias Int32T = X__Int32T - - alias X__Int64T = LibC::Long - alias Int64T = X__Int64T - - fun gpu_delegate_options_v2_default = TfLiteGpuDelegateOptionsV2Default : GpuDelegateOptionsV2 - - struct GpuDelegateOptionsV2 - is_precision_loss_allowed : Int32T - inference_preference : Int32T - inference_priority1 : Int32T - inference_priority2 : Int32T - inference_priority3 : Int32T - experimental_flags : Int64T - max_delegated_partitions : Int32T - serialization_dir : LibC::Char* - model_token : LibC::Char* - end - - fun gpu_delegate_v2_create = TfLiteGpuDelegateV2Create(options : GpuDelegateOptionsV2*) : LibTensorflowLite::OpaqueDelegate - fun gpu_delegate_v2_delete = TfLiteGpuDelegateV2Delete(delegate : LibTensorflowLite::OpaqueDelegate) -end +require "./delegate_gpu/*" class TensorflowLite::DelegateGPU < TensorflowLite::Delegate def initialize diff --git a/src/tensorflow_lite/delegate_gpu/lib_delegate_gpu.cr b/src/tensorflow_lite/delegate_gpu/lib_delegate_gpu.cr new file mode 100644 index 0000000..0523bb3 --- /dev/null +++ b/src/tensorflow_lite/delegate_gpu/lib_delegate_gpu.cr @@ -0,0 +1,26 @@ +# :nodoc: +@[Link("tensorflowlite_gpu_delegate", ldflags: "-L#{__DIR__}/../../ext/ -Wl,-rpath='$ORIGIN'")] +lib LibDelegateGPU + alias X__Int32T = LibC::Int + alias Int32T = X__Int32T + + alias X__Int64T = LibC::Long + alias Int64T = X__Int64T + + fun gpu_delegate_options_v2_default = TfLiteGpuDelegateOptionsV2Default : GpuDelegateOptionsV2 + + struct GpuDelegateOptionsV2 + is_precision_loss_allowed : Int32T + inference_preference : Int32T + inference_priority1 : Int32T + inference_priority2 : Int32T + inference_priority3 : Int32T + experimental_flags : Int64T + max_delegated_partitions : Int32T + serialization_dir : LibC::Char* + model_token : LibC::Char* + end + + fun gpu_delegate_v2_create = TfLiteGpuDelegateV2Create(options : GpuDelegateOptionsV2*) : LibTensorflowLite::OpaqueDelegate + fun gpu_delegate_v2_delete = TfLiteGpuDelegateV2Delete(delegate : LibTensorflowLite::OpaqueDelegate) +end diff --git a/src/tensorflow_lite/interpreter.cr b/src/tensorflow_lite/interpreter.cr index 11c3b04..a6afa74 100644 --- a/src/tensorflow_lite/interpreter.cr +++ b/src/tensorflow_lite/interpreter.cr @@ -1,4 +1,5 @@ require "./model" +require "./delegate" require "./interpreter_options" # The Interpreter takes a model, loads it, and allows you to run (or "interpret") the model, i.e., to use it to make predictions based on input data. @@ -64,14 +65,11 @@ class TensorflowLite::Interpreter alias Status = LibTensorflowLite::Status - # :nodoc: - alias Delegate = LibTensorflowLite::Delegate - # :nodoc: # provides a method to add a delegate after initialization. # Recommended that delegates are configured via `InterpreterOptions` def modify_graph_with_delegate(delegate : Delegate) : Status - LibTensorflowLite.interpreter_modify_graph_with_delegate(self, delegate) + LibTensorflowLite.interpreter_modify_graph_with_delegate(self, delegate.to_unsafe.value) end # runs the model and returns the result status From 9a25e0d1d4e1062649bb297ad07791ae0f66acf6 Mon Sep 17 00:00:00 2001 From: Stephen von Takach Date: Wed, 22 May 2024 14:11:17 +1000 Subject: [PATCH 4/8] feat: add docker build and installation --- Dockerfile | 123 ++++++++++++++++++++++++++++++++++++++ README.md | 29 +++++++++ install_tensorflowlite.sh | 55 +++++++++++++++++ shard.yml | 2 +- 4 files changed, 208 insertions(+), 1 deletion(-) create mode 100644 Dockerfile create mode 100755 install_tensorflowlite.sh diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..0686542 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,123 @@ +# Use an image supported by https://rocm.docs.amd.com/projects/install-on-linux/en/latest/tutorial/quick-start.html +FROM ubuntu:22.04 as build + +# Install necessary packages +RUN apt-get update && \ + apt-get install -y \ + build-essential \ + cmake \ + git \ + wget \ + unzip \ + libtool \ + zlib1g-dev \ + vim-common \ + curl \ + unzip \ + zlib1g \ + python3 \ + python3-pip \ + python3-dev \ + libopenblas-dev \ + opencl-headers \ + clinfo \ + ocl-icd-opencl-dev \ + clang \ + libclang-dev \ + libc++-dev \ + linux-headers-generic \ + software-properties-common \ + libabsl-dev \ + libusb-1.0-0-dev \ + gnupg2 && \ + apt-get clean + +# Install Bazelisk for building TensorFlow +ARG TARGETARCH +RUN wget -O /usr/local/bin/bazel https://github.com/bazelbuild/bazelisk/releases/download/v1.10.1/bazelisk-linux-$TARGETARCH && \ + chmod +x /usr/local/bin/bazel + +ENV TMP=/tmp + +# Clone TensorFlow repository +# https://www.tensorflow.org/install/source#gpu (lib compatibility list) +RUN git clone --depth 1 --branch "v2.16.1" https://github.com/tensorflow/tensorflow + +# ======================= +# build edge TPU delegate +# ======================= + +WORKDIR /tensorflow +RUN git clone https://github.com/google-coral/libedgetpu +WORKDIR /tensorflow/libedgetpu + +# Build TensorFlow Lite GPU delegate (excluding Android, linux only) +RUN make libedgetpu-direct + +# Copy the built shared libraries to /usr/local/lib +RUN mkdir -p /usr/local/lib && \ + cp /tensorflow/libedgetpu/out/direct/k8/libedgetpu.so.1.0 /usr/local/lib/libedgetpu.so + + +# ================================== +# Build tensorflow lite GPU delegate +# ================================== + +WORKDIR /tensorflow + +# Configure TensorFlow build (excluding Android) +RUN ./configure < Date: Wed, 22 May 2024 14:13:12 +1000 Subject: [PATCH 5/8] chore: remove tensorflow.patch --- tensorflow.patch | 36 ------------------------------------ 1 file changed, 36 deletions(-) delete mode 100644 tensorflow.patch diff --git a/tensorflow.patch b/tensorflow.patch deleted file mode 100644 index 3b680e1..0000000 --- a/tensorflow.patch +++ /dev/null @@ -1,36 +0,0 @@ -diff --git a/tensorflow/lite/CMakeLists.txt b/tensorflow/lite/CMakeLists.txt -index 0924ca9d..812f8c55 100644 ---- a/tensorflow/lite/CMakeLists.txt -+++ b/tensorflow/lite/CMakeLists.txt -@@ -564,14 +564,17 @@ endif() - set(_ALL_TFLITE_SRCS - ${TFLITE_CORE_ACCELERATION_SRCS} - ${TFLITE_CORE_API_SRCS} -- ${TFLITE_CORE_C_SRCS} -+ -+ ${TFLITE_SOURCE_DIR}/c/common_internal.cc -+ ${TFLITE_SOURCE_DIR}/c/c_api_opaque_internal.cc -+ ${TFLITE_SOURCE_DIR}/core/c/c_api_opaque.cc -+ - ${TFLITE_CORE_EXPERIMENTAL_SRCS} - ${TFLITE_CORE_KERNELS_SRCS} - ${TFLITE_CORE_SRCS} - ${TFLITE_CORE_ASYNC_SRCS} - ${TFLITE_CORE_ASYNC_C_SRCS} - ${TFLITE_CORE_TOOLS_SRCS} -- ${TFLITE_C_SRCS} - ${TFLITE_DELEGATES_FLEX_SRCS} - ${TFLITE_DELEGATES_GPU_SRCS} - ${TFLITE_DELEGATES_NNAPI_SRCS} -diff --git a/tensorflow/lite/c/CMakeLists.txt b/tensorflow/lite/c/CMakeLists.txt -index 3fb086ec..f91a0fe4 100644 ---- a/tensorflow/lite/c/CMakeLists.txt -+++ b/tensorflow/lite/c/CMakeLists.txt -@@ -86,5 +86,7 @@ if (TFLITE_C_BUILD_SHARED_LIBS) - endif() - - target_link_libraries(tensorflowlite_c -+ -Wl,--whole-archive - tensorflow-lite -+ -Wl,--no-whole-archive - ) From fbf276159afd63fe747d7282f0e60b8193205992 Mon Sep 17 00:00:00 2001 From: Stephen von Takach Date: Wed, 22 May 2024 14:15:19 +1000 Subject: [PATCH 6/8] fix(spec): use model in client spec --- spec/tensorflow_lite_spec.cr | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/spec/tensorflow_lite_spec.cr b/spec/tensorflow_lite_spec.cr index 5edbeb6..1aacbf8 100644 --- a/spec/tensorflow_lite_spec.cr +++ b/spec/tensorflow_lite_spec.cr @@ -164,7 +164,7 @@ module TensorflowLite file_io.close {Model.new(model_path), Model.new(file_data)}.each do |model| - client = TensorflowLite::Client.new(model_path, delegate: DelegateGPU.new) + client = TensorflowLite::Client.new(model, delegate: DelegateGPU.new) xor_test.each do |test| inputs = test[:input] From fd46b4d1f962c10d42209add3ea6e5c5fe04be27 Mon Sep 17 00:00:00 2001 From: Stephen von Takach Date: Wed, 22 May 2024 18:07:08 +1000 Subject: [PATCH 7/8] fix: libedgetpu on arm64 --- Dockerfile | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/Dockerfile b/Dockerfile index 0686542..008b10f 100644 --- a/Dockerfile +++ b/Dockerfile @@ -52,12 +52,19 @@ RUN git clone https://github.com/google-coral/libedgetpu WORKDIR /tensorflow/libedgetpu # Build TensorFlow Lite GPU delegate (excluding Android, linux only) -RUN make libedgetpu-direct - -# Copy the built shared libraries to /usr/local/lib -RUN mkdir -p /usr/local/lib && \ - cp /tensorflow/libedgetpu/out/direct/k8/libedgetpu.so.1.0 /usr/local/lib/libedgetpu.so - +ARG TARGETPLATFORM +RUN if [ "$TARGETPLATFORM" = "linux/amd64" ]; then \ + make libedgetpu-direct CPU=k8 && \ + mkdir -p /usr/local/lib && \ + cp /tensorflow/libedgetpu/out/direct/k8/libedgetpu.so.1.0 /usr/local/lib/libedgetpu.so; \ + elif [ "$TARGETPLATFORM" = "linux/arm64" ]; then \ + make libedgetpu-direct CPU=aarch64 && \ + mkdir -p /usr/local/lib && \ + cp /tensorflow/libedgetpu/out/direct/aarch64/libedgetpu.so.1.0 /usr/local/lib/libedgetpu.so; \ + else \ + echo "Unknown platform"; \ + exit 1; \ + fi # ================================== # Build tensorflow lite GPU delegate From ea8529f94681bbd0b5f32c14c3be0bf40620e91d Mon Sep 17 00:00:00 2001 From: Stephen von Takach Date: Thu, 23 May 2024 08:39:37 +1000 Subject: [PATCH 8/8] fix build and specs --- build_tensorflowlite.sh | 4 ++-- spec/tensorflow_lite_spec.cr | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) mode change 100644 => 100755 build_tensorflowlite.sh diff --git a/build_tensorflowlite.sh b/build_tensorflowlite.sh old mode 100644 new mode 100755 index d6b510b..936fd03 --- a/build_tensorflowlite.sh +++ b/build_tensorflowlite.sh @@ -76,8 +76,8 @@ fi cd .. -# rm -rf ./tensorflow -# rm -rf ./tflite_build +rm -rf ./tensorflow +rm -rf ./tflite_build echo "--" echo "Done" diff --git a/spec/tensorflow_lite_spec.cr b/spec/tensorflow_lite_spec.cr index 1aacbf8..1ca13fa 100644 --- a/spec/tensorflow_lite_spec.cr +++ b/spec/tensorflow_lite_spec.cr @@ -188,7 +188,7 @@ module TensorflowLite end it "works with quantized models" do - model_path = Path.new File.join(__DIR__, "./test_data/xor_model_quantized.tflite") + quant_path = Path.new File.join(__DIR__, "./test_data/xor_model_quantized.tflite") quantized_test = { {input: {-128_i8, -128_i8}, result: 0}, {input: {127_i8, -128_i8}, result: 1}, @@ -196,7 +196,7 @@ module TensorflowLite {input: {127_i8, 127_i8}, result: 0}, } - client = TensorflowLite::Client.new(model_path) + client = TensorflowLite::Client.new(quant_path) quantized_test.each do |test| inputs = test[:input]