1 /**
2  * Copyright 2017 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef COM_EXAMPLE_ANDROID_NN_BENCHMARK_RUN_TFLITE_H
18 #define COM_EXAMPLE_ANDROID_NN_BENCHMARK_RUN_TFLITE_H
19 
20 #include "tensorflow/lite/delegates/gpu/delegate.h"
21 #include "tensorflow/lite/delegates/nnapi/nnapi_delegate.h"
22 #include "tensorflow/lite/interpreter.h"
23 #include "tensorflow/lite/model.h"
24 
25 #include <memory>
26 #include <unistd.h>
27 #include <vector>
28 
29 struct InferenceOutput {
30   uint8_t* ptr;
31   size_t size;
32 };
33 
34 // Inputs and expected outputs for inference
35 struct InferenceInOut {
36   // Input can either be directly specified as a pointer or indirectly with
37   // the createInput callback. This is needed for large datasets where
38   // allocating memory for all inputs at once is not feasible.
39   uint8_t* input;
40   size_t input_size;
41 
42   std::vector<InferenceOutput> outputs;
43   std::function<bool(uint8_t*, size_t)> createInput;
44 };
45 
46 // Inputs and expected outputs for an inference sequence.
47 using InferenceInOutSequence = std::vector<InferenceInOut>;
48 
49 // Result of a single inference
50 struct InferenceResult {
51   float computeTimeSec;
52   // MSE for each output
53   std::vector<float> meanSquareErrors;
54   // Max single error for each output
55   std::vector<float> maxSingleErrors;
56   // Outputs
57   std::vector<std::vector<uint8_t>> inferenceOutputs;
58   int inputOutputSequenceIndex;
59   int inputOutputIndex;
60 };
61 
62 struct CompilationBenchmarkResult {
63   std::vector<float> compileWithoutCacheTimeSec;
64   // The following optional fields have no value if compilation caching is not supported.
65   std::optional<std::vector<float>> saveToCacheTimeSec;
66   std::optional<std::vector<float>> prepareFromCacheTimeSec;
67   // The total size of cache files. It is zero if compilation caching is not supported.
68   int cacheSizeBytes = 0;
69 };
70 
71 /** Discard inference output in inference results. */
72 const int FLAG_DISCARD_INFERENCE_OUTPUT = 1 << 0;
73 /** Do not expect golden output for inference inputs. */
74 const int FLAG_IGNORE_GOLDEN_OUTPUT = 1 << 1;
75 /** Collect only 1 benchmark result every INFERENCE_OUT_SAMPLE_RATE **/
76 const int FLAG_SAMPLE_BENCHMARK_RESULTS = 1 << 2;
77 
78 const int INFERENCE_OUT_SAMPLE_RATE = 10;
79 
80 enum class CompilationBenchmarkType {
81   // Benchmark without cache
82   WITHOUT_CACHE,
83   // Benchmark cache miss
84   SAVE_TO_CACHE,
85   // Benchmark cache hit
86   PREPARE_FROM_CACHE,
87 };
88 
89 /** TFLite backend. */
90 constexpr int TFLITE_CPU = 0;
91 constexpr int TFLITE_NNAPI = 1;
92 constexpr int TFLITE_GPU = 2;
93 
94 class BenchmarkModel {
95  public:
96   ~BenchmarkModel();
97 
98   static BenchmarkModel* create(const char* modelfile, int tfliteBackend,
99                                 bool enable_intermediate_tensors_dump,
100                                 int* nnapiErrno, const char* nnapi_device_name,
101                                 bool mmapModel, const char* nnapi_cache_dir);
102 
103   bool resizeInputTensors(std::vector<int> shape);
104   bool setInput(const uint8_t* dataPtr, size_t length);
105   bool runInference();
106   // Resets TFLite states (RNN/LSTM states etc).
107   bool resetStates();
108 
109   bool benchmark(const std::vector<InferenceInOutSequence>& inOutData,
110                  int seqInferencesMaxCount, float timeout, int flags,
111                  std::vector<InferenceResult>* result);
112 
113   bool benchmarkCompilation(int maxNumIterations, float warmupTimeout, float runTimeout,
114                             CompilationBenchmarkResult* result);
115 
116   bool dumpAllLayers(const char* path,
117                      const std::vector<InferenceInOutSequence>& inOutData);
118 
119  private:
120   BenchmarkModel() = default;
121   bool init(const char* modelfile, int tfliteBackend,
122             bool enable_intermediate_tensors_dump,
123             int* nnapiErrno, const char* nnapi_device_name,
124             /* flag to choose between memory mapping the model and initializing
125                 the model from programs memory*/
126             bool mmapModel,
127             const char* nnapi_cache_dir);
128 
129   void getOutputError(const uint8_t* dataPtr, size_t length,
130                       InferenceResult* result, int output_index);
131   void saveInferenceOutput(InferenceResult* result, int output_index);
132 
133   bool runCompilation(const char* cacheDir);
134   bool benchmarkSingleTypeOfCompilation(CompilationBenchmarkType type, int maxNumIterations,
135                                         float timeout, std::vector<float>* results);
136   bool benchmarkSingleTypeOfCompilationWithWarmup(CompilationBenchmarkType type,
137                                                   int maxNumIterations, float warmupTimeout,
138                                                   float runTimeout, std::vector<float>* results);
139   bool getCompilationCacheSize(int* cacheSizeBytes);
140 
141   std::string mModelBuffer;
142   std::unique_ptr<tflite::FlatBufferModel> mTfliteModel;
143   std::unique_ptr<tflite::Interpreter> mTfliteInterpreter;
144   std::unique_ptr<tflite::StatefulNnApiDelegate> mTfliteNnapiDelegate;
145   // Store indices of output tensors, used to dump intermediate tensors
146   std::vector<int> outputs;
147 
148   // Parameters for compilation
149   std::string mModelFile;
150   std::optional<std::string> mCacheDir;
151   std::string mNnApiDeviceName;
152 #if defined(NN_BENCHMARK_ENABLE_GPU)
153   TfLiteDelegate* mGpuDelegate;
154 #endif  // defined(NN_BENCHMARK_ENABLE_GPU)
155   int mTfliteBackend;
156 };
157 
158 #endif  // COM_EXAMPLE_ANDROID_NN_BENCHMARK_RUN_TFLITE_H
159