1 /** 2 * Copyright 2017 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #ifndef COM_EXAMPLE_ANDROID_NN_BENCHMARK_RUN_TFLITE_H 18 #define COM_EXAMPLE_ANDROID_NN_BENCHMARK_RUN_TFLITE_H 19 20 #include "tensorflow/lite/delegates/gpu/delegate.h" 21 #include "tensorflow/lite/delegates/nnapi/nnapi_delegate.h" 22 #include "tensorflow/lite/interpreter.h" 23 #include "tensorflow/lite/model.h" 24 25 #include <memory> 26 #include <unistd.h> 27 #include <vector> 28 29 struct InferenceOutput { 30 uint8_t* ptr; 31 size_t size; 32 }; 33 34 // Inputs and expected outputs for inference 35 struct InferenceInOut { 36 // Input can either be directly specified as a pointer or indirectly with 37 // the createInput callback. This is needed for large datasets where 38 // allocating memory for all inputs at once is not feasible. 39 uint8_t* input; 40 size_t input_size; 41 42 std::vector<InferenceOutput> outputs; 43 std::function<bool(uint8_t*, size_t)> createInput; 44 }; 45 46 // Inputs and expected outputs for an inference sequence. 47 using InferenceInOutSequence = std::vector<InferenceInOut>; 48 49 // Result of a single inference 50 struct InferenceResult { 51 float computeTimeSec; 52 // MSE for each output 53 std::vector<float> meanSquareErrors; 54 // Max single error for each output 55 std::vector<float> maxSingleErrors; 56 // Outputs 57 std::vector<std::vector<uint8_t>> inferenceOutputs; 58 int inputOutputSequenceIndex; 59 int inputOutputIndex; 60 }; 61 62 struct CompilationBenchmarkResult { 63 std::vector<float> compileWithoutCacheTimeSec; 64 // The following optional fields have no value if compilation caching is not supported. 65 std::optional<std::vector<float>> saveToCacheTimeSec; 66 std::optional<std::vector<float>> prepareFromCacheTimeSec; 67 // The total size of cache files. It is zero if compilation caching is not supported. 68 int cacheSizeBytes = 0; 69 }; 70 71 /** Discard inference output in inference results. */ 72 const int FLAG_DISCARD_INFERENCE_OUTPUT = 1 << 0; 73 /** Do not expect golden output for inference inputs. */ 74 const int FLAG_IGNORE_GOLDEN_OUTPUT = 1 << 1; 75 /** Collect only 1 benchmark result every INFERENCE_OUT_SAMPLE_RATE **/ 76 const int FLAG_SAMPLE_BENCHMARK_RESULTS = 1 << 2; 77 78 const int INFERENCE_OUT_SAMPLE_RATE = 10; 79 80 enum class CompilationBenchmarkType { 81 // Benchmark without cache 82 WITHOUT_CACHE, 83 // Benchmark cache miss 84 SAVE_TO_CACHE, 85 // Benchmark cache hit 86 PREPARE_FROM_CACHE, 87 }; 88 89 /** TFLite backend. */ 90 constexpr int TFLITE_CPU = 0; 91 constexpr int TFLITE_NNAPI = 1; 92 constexpr int TFLITE_GPU = 2; 93 94 class BenchmarkModel { 95 public: 96 ~BenchmarkModel(); 97 98 static BenchmarkModel* create(const char* modelfile, int tfliteBackend, 99 bool enable_intermediate_tensors_dump, 100 int* nnapiErrno, const char* nnapi_device_name, 101 bool mmapModel, const char* nnapi_cache_dir); 102 103 bool resizeInputTensors(std::vector<int> shape); 104 bool setInput(const uint8_t* dataPtr, size_t length); 105 bool runInference(); 106 // Resets TFLite states (RNN/LSTM states etc). 107 bool resetStates(); 108 109 bool benchmark(const std::vector<InferenceInOutSequence>& inOutData, 110 int seqInferencesMaxCount, float timeout, int flags, 111 std::vector<InferenceResult>* result); 112 113 bool benchmarkCompilation(int maxNumIterations, float warmupTimeout, float runTimeout, 114 CompilationBenchmarkResult* result); 115 116 bool dumpAllLayers(const char* path, 117 const std::vector<InferenceInOutSequence>& inOutData); 118 119 private: 120 BenchmarkModel() = default; 121 bool init(const char* modelfile, int tfliteBackend, 122 bool enable_intermediate_tensors_dump, 123 int* nnapiErrno, const char* nnapi_device_name, 124 /* flag to choose between memory mapping the model and initializing 125 the model from programs memory*/ 126 bool mmapModel, 127 const char* nnapi_cache_dir); 128 129 void getOutputError(const uint8_t* dataPtr, size_t length, 130 InferenceResult* result, int output_index); 131 void saveInferenceOutput(InferenceResult* result, int output_index); 132 133 bool runCompilation(const char* cacheDir); 134 bool benchmarkSingleTypeOfCompilation(CompilationBenchmarkType type, int maxNumIterations, 135 float timeout, std::vector<float>* results); 136 bool benchmarkSingleTypeOfCompilationWithWarmup(CompilationBenchmarkType type, 137 int maxNumIterations, float warmupTimeout, 138 float runTimeout, std::vector<float>* results); 139 bool getCompilationCacheSize(int* cacheSizeBytes); 140 141 std::string mModelBuffer; 142 std::unique_ptr<tflite::FlatBufferModel> mTfliteModel; 143 std::unique_ptr<tflite::Interpreter> mTfliteInterpreter; 144 std::unique_ptr<tflite::StatefulNnApiDelegate> mTfliteNnapiDelegate; 145 // Store indices of output tensors, used to dump intermediate tensors 146 std::vector<int> outputs; 147 148 // Parameters for compilation 149 std::string mModelFile; 150 std::optional<std::string> mCacheDir; 151 std::string mNnApiDeviceName; 152 #if defined(NN_BENCHMARK_ENABLE_GPU) 153 TfLiteDelegate* mGpuDelegate; 154 #endif // defined(NN_BENCHMARK_ENABLE_GPU) 155 int mTfliteBackend; 156 }; 157 158 #endif // COM_EXAMPLE_ANDROID_NN_BENCHMARK_RUN_TFLITE_H 159