|
| 1 | +#include <algorithm> |
| 2 | +#include <cstddef> |
| 3 | +#include <iterator> |
| 4 | +#include <string> |
| 5 | +#include <vector> |
| 6 | + |
| 7 | +#include <catch2/benchmark/catch_benchmark.hpp> |
| 8 | +#include <catch2/catch_message.hpp> |
| 9 | +#include <catch2/catch_test_macros.hpp> |
| 10 | + |
| 11 | +#include "edgerunner/edgerunner.hpp" |
| 12 | +#include "edgerunner/model.hpp" |
| 13 | +#include "edgerunner/tensor.hpp" |
| 14 | +#include "utils.hpp" |
| 15 | + |
| 16 | +TEST_CASE("QNN context binary NPU runtime", "[qnn][context][npu]") { |
| 17 | + const std::string modelPath = "models/qnn/mobilenet_v3_small.bin"; |
| 18 | + |
| 19 | + auto model = edge::createModel(modelPath); |
| 20 | + REQUIRE(model != nullptr); |
| 21 | + REQUIRE(std::string {"mobilenet_v3_small"} == model->name()); |
| 22 | + REQUIRE(model->getDelegate() == edge::DELEGATE::CPU); |
| 23 | + |
| 24 | + /* ensure CPU and NPU inference have the same inputs */ |
| 25 | + auto cpuInputData = model->getInput(0)->getTensorAs<float>(); |
| 26 | + std::fill(cpuInputData.begin(), cpuInputData.end(), 0); |
| 27 | + |
| 28 | + auto executionStatus = model->execute(); |
| 29 | + CHECK(executionStatus == edge::STATUS::SUCCESS); |
| 30 | + |
| 31 | + const auto cpuOutput = model->getOutput(0)->getTensorAs<float>(); |
| 32 | + |
| 33 | + /* applying a new delegate releases memory, so need to copy CPU output to |
| 34 | + * compare later */ |
| 35 | + std::vector<float> cpuResult; |
| 36 | + cpuResult.reserve(cpuOutput.size()); |
| 37 | + std::copy( |
| 38 | + cpuOutput.cbegin(), cpuOutput.cend(), std::back_inserter(cpuResult)); |
| 39 | + |
| 40 | + const auto delegateStatus = model->applyDelegate(edge::DELEGATE::NPU); |
| 41 | + REQUIRE(delegateStatus == edge::STATUS::SUCCESS); |
| 42 | + REQUIRE(model->getDelegate() == edge::DELEGATE::NPU); |
| 43 | + |
| 44 | + const auto numInputs = model->getNumInputs(); |
| 45 | + REQUIRE(numInputs == 1); |
| 46 | + |
| 47 | + const auto numOutputs = model->getNumOutputs(); |
| 48 | + REQUIRE(numOutputs == 1); |
| 49 | + |
| 50 | + auto input = model->getInput(0); |
| 51 | + REQUIRE(input->getName() == "image_tensor"); |
| 52 | + REQUIRE(input->getDimensions() == std::vector<size_t> {1, 224, 224, 3}); |
| 53 | + REQUIRE(input->getType() == edge::TensorType::FLOAT32); |
| 54 | + |
| 55 | + auto inputData = input->getTensorAs<float>(); |
| 56 | + REQUIRE(inputData.size() == input->getSize()); |
| 57 | + |
| 58 | + /* ensure CPU and NPU inference have the same inputs */ |
| 59 | + std::fill(inputData.begin(), inputData.end(), 0); |
| 60 | + |
| 61 | + executionStatus = model->execute(); |
| 62 | + REQUIRE(executionStatus == edge::STATUS::SUCCESS); |
| 63 | + |
| 64 | + BENCHMARK("execution") { |
| 65 | + return model->execute(); |
| 66 | + }; |
| 67 | + |
| 68 | + auto output = model->getOutput(0); |
| 69 | + REQUIRE(output->getName() == "class_logits"); |
| 70 | + REQUIRE(output->getDimensions() == std::vector<size_t> {1, 1000}); |
| 71 | + REQUIRE(output->getType() == edge::TensorType::FLOAT32); |
| 72 | + |
| 73 | + auto outputData = output->getTensorAs<float>(); |
| 74 | + REQUIRE(outputData.size() == output->getSize()); |
| 75 | + |
| 76 | + const auto mse = meanSquaredError(cpuResult, outputData); |
| 77 | + CAPTURE(mse); |
| 78 | + REQUIRE(mse < MseThreshold); |
| 79 | +} |
0 commit comments