/** * Copyright 2017 The Android Open Source Project * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef COM_EXAMPLE_ANDROID_NN_BENCHMARK_RUN_TFLITE_H #define COM_EXAMPLE_ANDROID_NN_BENCHMARK_RUN_TFLITE_H #include "tensorflow/lite/delegates/gpu/delegate.h" #include "tensorflow/lite/delegates/nnapi/nnapi_delegate.h" #include "tensorflow/lite/interpreter.h" #include "tensorflow/lite/model.h" #include #include #include struct InferenceOutput { uint8_t* ptr; size_t size; }; // Inputs and expected outputs for inference struct InferenceInOut { // Input can either be directly specified as a pointer or indirectly with // the createInput callback. This is needed for large datasets where // allocating memory for all inputs at once is not feasible. uint8_t* input; size_t input_size; std::vector outputs; std::function createInput; }; // Inputs and expected outputs for an inference sequence. using InferenceInOutSequence = std::vector; // Result of a single inference struct InferenceResult { float computeTimeSec; // MSE for each output std::vector meanSquareErrors; // Max single error for each output std::vector maxSingleErrors; // Outputs std::vector> inferenceOutputs; int inputOutputSequenceIndex; int inputOutputIndex; }; struct CompilationBenchmarkResult { std::vector compileWithoutCacheTimeSec; // The following optional fields have no value if compilation caching is not supported. std::optional> saveToCacheTimeSec; std::optional> prepareFromCacheTimeSec; // The total size of cache files. It is zero if compilation caching is not supported. int cacheSizeBytes = 0; }; /** Discard inference output in inference results. */ const int FLAG_DISCARD_INFERENCE_OUTPUT = 1 << 0; /** Do not expect golden output for inference inputs. */ const int FLAG_IGNORE_GOLDEN_OUTPUT = 1 << 1; /** Collect only 1 benchmark result every INFERENCE_OUT_SAMPLE_RATE **/ const int FLAG_SAMPLE_BENCHMARK_RESULTS = 1 << 2; const int INFERENCE_OUT_SAMPLE_RATE = 10; enum class CompilationBenchmarkType { // Benchmark without cache WITHOUT_CACHE, // Benchmark cache miss SAVE_TO_CACHE, // Benchmark cache hit PREPARE_FROM_CACHE, }; /** TFLite backend. */ constexpr int TFLITE_CPU = 0; constexpr int TFLITE_NNAPI = 1; constexpr int TFLITE_GPU = 2; class BenchmarkModel { public: ~BenchmarkModel(); static BenchmarkModel* create(const char* modelfile, int tfliteBackend, bool enable_intermediate_tensors_dump, int* nnapiErrno, const char* nnapi_device_name, bool mmapModel, const char* nnapi_cache_dir); bool resizeInputTensors(std::vector shape); bool setInput(const uint8_t* dataPtr, size_t length); bool runInference(); // Resets TFLite states (RNN/LSTM states etc). bool resetStates(); bool benchmark(const std::vector& inOutData, int seqInferencesMaxCount, float timeout, int flags, std::vector* result); bool benchmarkCompilation(int maxNumIterations, float warmupTimeout, float runTimeout, CompilationBenchmarkResult* result); bool dumpAllLayers(const char* path, const std::vector& inOutData); private: BenchmarkModel() = default; bool init(const char* modelfile, int tfliteBackend, bool enable_intermediate_tensors_dump, int* nnapiErrno, const char* nnapi_device_name, /* flag to choose between memory mapping the model and initializing the model from programs memory*/ bool mmapModel, const char* nnapi_cache_dir); void getOutputError(const uint8_t* dataPtr, size_t length, InferenceResult* result, int output_index); void saveInferenceOutput(InferenceResult* result, int output_index); bool runCompilation(const char* cacheDir); bool benchmarkSingleTypeOfCompilation(CompilationBenchmarkType type, int maxNumIterations, float timeout, std::vector* results); bool benchmarkSingleTypeOfCompilationWithWarmup(CompilationBenchmarkType type, int maxNumIterations, float warmupTimeout, float runTimeout, std::vector* results); bool getCompilationCacheSize(int* cacheSizeBytes); std::string mModelBuffer; std::unique_ptr mTfliteModel; std::unique_ptr mTfliteInterpreter; std::unique_ptr mTfliteNnapiDelegate; // Store indices of output tensors, used to dump intermediate tensors std::vector outputs; // Parameters for compilation std::string mModelFile; std::optional mCacheDir; std::string mNnApiDeviceName; #if defined(NN_BENCHMARK_ENABLE_GPU) TfLiteDelegate* mGpuDelegate; #endif // defined(NN_BENCHMARK_ENABLE_GPU) int mTfliteBackend; }; #endif // COM_EXAMPLE_ANDROID_NN_BENCHMARK_RUN_TFLITE_H