1 /* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 #include "tensorflow/lite/tools/benchmark/benchmark_tflite_model.h"
17 
18 #include <cstdarg>
19 #include <cstdlib>
20 #include <iostream>
21 #include <memory>
22 #include <string>
23 #include <unordered_set>
24 #include <vector>
25 
26 #include "tensorflow/lite/kernels/register.h"
27 #include "tensorflow/lite/model.h"
28 #include "tensorflow/lite/op_resolver.h"
29 #include "tensorflow/lite/string_util.h"
30 #include "tensorflow/lite/tools/benchmark/logging.h"
31 
32 #ifdef GEMMLOWP_PROFILING
33 #include "gemmlowp/profiling/profiler.h"
34 #endif
35 
36 #ifdef TFLITE_CUSTOM_OPS_HEADER
37 void RegisterSelectedOps(::tflite::MutableOpResolver* resolver);
38 #endif
39 
40 namespace tflite {
41 namespace benchmark {
42 
SetInterpreter(tflite::Interpreter * interpreter)43 void ProfilingListener::SetInterpreter(tflite::Interpreter* interpreter) {
44   TFLITE_BENCHMARK_CHECK(interpreter);
45   interpreter_ = interpreter;
46   interpreter_->SetProfiler(&profiler_);
47 }
48 
OnSingleRunStart(RunType run_type)49 void ProfilingListener::OnSingleRunStart(RunType run_type) {
50   if (run_type == REGULAR) {
51     profiler_.Reset();
52     profiler_.StartProfiling();
53   }
54 }
55 
OnBenchmarkEnd(const BenchmarkResults & results)56 void ProfilingListener::OnBenchmarkEnd(const BenchmarkResults& results) {
57   if (has_profiles_) {
58     TFLITE_LOG(INFO) << summarizer_.GetOutputString();
59   }
60 }
61 
OnSingleRunEnd()62 void ProfilingListener::OnSingleRunEnd() {
63   profiler_.StopProfiling();
64   auto profile_events = profiler_.GetProfileEvents();
65   has_profiles_ = !profile_events.empty();
66   summarizer_.ProcessProfiles(profile_events, *interpreter_);
67 }
68 
OnBenchmarkStart(const BenchmarkParams & params)69 void GemmlowpProfilingListener::OnBenchmarkStart(
70     const BenchmarkParams& params) {
71 #ifdef GEMMLOWP_PROFILING
72   gemmlowp::RegisterCurrentThreadForProfiling();
73   gemmlowp::StartProfiling();
74 #endif
75 }
76 
OnBenchmarkEnd(const BenchmarkResults & results)77 void GemmlowpProfilingListener::OnBenchmarkEnd(
78     const BenchmarkResults& results) {
79 #ifdef GEMMLOWP_PROFILING
80   gemmlowp::FinishProfiling();
81 #endif
82 }
83 
84 namespace {
85 
Split(const std::string & str,const char delim)86 std::vector<std::string> Split(const std::string& str, const char delim) {
87   std::istringstream input(str);
88   std::vector<std::string> results;
89   std::string item;
90   while (std::getline(input, item, delim)) {
91     results.push_back(item);
92   }
93   return results;
94 }
95 
96 template <typename T>
SplitAndParse(const std::string & str,char delim,std::vector<T> * values)97 bool SplitAndParse(const std::string& str, char delim, std::vector<T>* values) {
98   std::istringstream input(str);
99   bool first = true;
100   while (!input.eof()) {
101     if (!first) {
102       char c;
103       input >> c;
104       if (c != delim) {
105         return false;
106       }
107     } else {
108       first = false;
109     }
110     T val;
111     input >> val;
112     if (!input.eof() && !input.good()) {
113       return false;
114     }
115     values->push_back(val);
116   }
117   return true;
118 }
119 
120 template <typename T>
FillRandomValue(T * ptr,int num_elements,const std::function<T ()> & random_func)121 void FillRandomValue(T* ptr, int num_elements,
122                      const std::function<T()>& random_func) {
123   for (int i = 0; i < num_elements; ++i) {
124     *ptr++ = random_func();
125   }
126 }
127 
FillRandomString(tflite::DynamicBuffer * buffer,const std::vector<int> & sizes,const std::function<string ()> & random_func)128 void FillRandomString(tflite::DynamicBuffer* buffer,
129                       const std::vector<int>& sizes,
130                       const std::function<string()>& random_func) {
131   int num_elements = 1;
132   for (int dim : sizes) {
133     num_elements *= dim;
134   }
135   for (int i = 0; i < num_elements; ++i) {
136     auto str = random_func();
137     buffer->AddString(str.data(), str.length());
138   }
139 }
140 
PopulateInputLayerInfo(const string & names_string,const string & shapes_string,std::vector<BenchmarkTfLiteModel::InputLayerInfo> * info)141 bool PopulateInputLayerInfo(
142     const string& names_string, const string& shapes_string,
143     std::vector<BenchmarkTfLiteModel::InputLayerInfo>* info) {
144   std::vector<std::string> names = Split(names_string, ',');
145   std::vector<std::string> shapes = Split(shapes_string, ':');
146 
147   if (names.size() != shapes.size()) {
148     TFLITE_LOG(ERROR) << "The number of items in"
149                       << " --input_layer_shape (" << shapes_string << ", with "
150                       << shapes.size() << " items)"
151                       << " must match the number of items in"
152                       << " --input_layer (" << names_string << ", with "
153                       << names.size() << " items)."
154                       << " For example --input_layer=input1,input2"
155                       << " --input_layer_shape=1,224,224,4:1,20";
156     return false;
157   }
158 
159   for (int i = 0; i < names.size(); ++i) {
160     info->push_back(BenchmarkTfLiteModel::InputLayerInfo());
161     BenchmarkTfLiteModel::InputLayerInfo& input = info->back();
162 
163     input.name = names[i];
164 
165     TFLITE_BENCHMARK_CHECK(SplitAndParse(shapes[i], ',', &input.shape))
166         << "Incorrect size string specified: " << shapes[i];
167     for (int dim : input.shape) {
168       if (dim == -1) {
169         TFLITE_LOG(ERROR)
170             << "Any unknown sizes in the shapes (-1's) must be replaced"
171             << " with the size you want to benchmark with.";
172         return false;
173       }
174     }
175   }
176 
177   return true;
178 }
179 
TfLiteIntArrayToVector(const TfLiteIntArray * int_array)180 std::vector<int> TfLiteIntArrayToVector(const TfLiteIntArray* int_array) {
181   std::vector<int> values;
182   values.reserve(int_array->size);
183   for (size_t i = 0; i < int_array->size; i++) {
184     values.push_back(int_array->data[i]);
185   }
186   return values;
187 }
188 
189 }  // namespace
190 
DefaultParams()191 BenchmarkParams BenchmarkTfLiteModel::DefaultParams() {
192   BenchmarkParams default_params = BenchmarkModel::DefaultParams();
193   default_params.AddParam("graph", BenchmarkParam::Create<std::string>(""));
194   default_params.AddParam("input_layer",
195                           BenchmarkParam::Create<std::string>(""));
196   default_params.AddParam("input_layer_shape",
197                           BenchmarkParam::Create<std::string>(""));
198   default_params.AddParam("use_nnapi", BenchmarkParam::Create<bool>(false));
199   default_params.AddParam("allow_fp16", BenchmarkParam::Create<bool>(false));
200   return default_params;
201 }
202 
BenchmarkTfLiteModel()203 BenchmarkTfLiteModel::BenchmarkTfLiteModel()
204     : BenchmarkTfLiteModel(DefaultParams()) {}
205 
BenchmarkTfLiteModel(BenchmarkParams params)206 BenchmarkTfLiteModel::BenchmarkTfLiteModel(BenchmarkParams params)
207     : BenchmarkModel(std::move(params)) {
208   AddListener(&profiling_listener_);
209   AddListener(&gemmlowp_profiling_listener_);
210 }
211 
CleanUp()212 void BenchmarkTfLiteModel::CleanUp() {
213   if (inputs_data_.empty()) {
214     return;
215   }
216   // Free up any pre-allocated tensor data during PrepareInputData.
217   for (int i = 0; i < inputs_data_.size(); ++i) {
218     delete[] inputs_data_[i].data.raw;
219   }
220   inputs_data_.clear();
221 }
222 
~BenchmarkTfLiteModel()223 BenchmarkTfLiteModel::~BenchmarkTfLiteModel() { CleanUp(); }
224 
GetFlags()225 std::vector<Flag> BenchmarkTfLiteModel::GetFlags() {
226   std::vector<Flag> flags = BenchmarkTfLiteModel::BenchmarkModel::GetFlags();
227   std::vector<Flag> specific_flags = {
228       CreateFlag<std::string>("graph", &params_, "graph file name"),
229       CreateFlag<std::string>("input_layer", &params_, "input layer names"),
230       CreateFlag<std::string>("input_layer_shape", &params_,
231                               "input layer shape"),
232       CreateFlag<bool>("use_nnapi", &params_, "use nnapi api"),
233       CreateFlag<bool>("allow_fp16", &params_, "allow fp16")};
234 
235   flags.insert(flags.end(), specific_flags.begin(), specific_flags.end());
236   return flags;
237 }
238 
LogParams()239 void BenchmarkTfLiteModel::LogParams() {
240   BenchmarkModel::LogParams();
241   TFLITE_LOG(INFO) << "Graph: [" << params_.Get<std::string>("graph") << "]";
242   TFLITE_LOG(INFO) << "Input layers: ["
243                    << params_.Get<std::string>("input_layer") << "]";
244   TFLITE_LOG(INFO) << "Input shapes: ["
245                    << params_.Get<std::string>("input_layer_shape") << "]";
246   TFLITE_LOG(INFO) << "Use nnapi : [" << params_.Get<bool>("use_nnapi") << "]";
247   TFLITE_LOG(INFO) << "Allow fp16 : [" << params_.Get<bool>("allow_fp16")
248                    << "]";
249 }
250 
ValidateParams()251 bool BenchmarkTfLiteModel::ValidateParams() {
252   if (params_.Get<std::string>("graph").empty()) {
253     TFLITE_LOG(ERROR)
254         << "Please specify the name of your TF Lite input file with --graph";
255     return false;
256   }
257   return PopulateInputLayerInfo(params_.Get<std::string>("input_layer"),
258                                 params_.Get<std::string>("input_layer_shape"),
259                                 &inputs);
260 }
261 
ComputeInputBytes()262 uint64_t BenchmarkTfLiteModel::ComputeInputBytes() {
263   TFLITE_BENCHMARK_CHECK(interpreter);
264   uint64_t total_input_bytes = 0;
265   for (int input : interpreter->inputs()) {
266     auto* t = interpreter->tensor(input);
267     total_input_bytes += t->bytes;
268   }
269   return total_input_bytes;
270 }
271 
PrepareInputData()272 void BenchmarkTfLiteModel::PrepareInputData() {
273   auto interpreter_inputs = interpreter->inputs();
274   const size_t input_size = interpreter_inputs.size();
275   CleanUp();
276 
277   for (int j = 0; j < input_size; ++j) {
278     int i = interpreter_inputs[j];
279     TfLiteTensor* t = interpreter->tensor(i);
280     std::vector<int> sizes = TfLiteIntArrayToVector(t->dims);
281     int num_elements = 1;
282     // TODO(haoliang): Ignore the 0-th dimension (number of batches).
283     for (int i = 1; i < sizes.size(); ++i) {
284       num_elements *= sizes[i];
285     }
286     InputTensorData t_data;
287     if (t->type == kTfLiteFloat32) {
288       t_data.bytes = sizeof(float) * num_elements;
289       t_data.data.raw = new char[t_data.bytes];
290       FillRandomValue<float>(t_data.data.f, num_elements, []() {
291         return static_cast<float>(rand()) / RAND_MAX - 0.5f;
292       });
293     } else if (t->type == kTfLiteInt32) {
294       // TODO(yunluli): This is currently only used for handling embedding input
295       // for speech models. Generalize if necessary.
296       t_data.bytes = sizeof(int32_t) * num_elements;
297       t_data.data.raw = new char[t_data.bytes];
298       FillRandomValue<int32_t>(t_data.data.i32, num_elements, []() {
299         return static_cast<int32_t>(rand()) % 100;
300       });
301     } else if (t->type == kTfLiteUInt8) {
302       t_data.bytes = sizeof(uint8_t) * num_elements;
303       t_data.data.raw = new char[t_data.bytes];
304       FillRandomValue<uint8_t>(t_data.data.uint8, num_elements, []() {
305         return static_cast<uint8_t>(rand()) % 255;
306       });
307     } else if (t->type == kTfLiteInt8) {
308       t_data.bytes = sizeof(int8_t) * num_elements;
309       t_data.data.raw = new char[t_data.bytes];
310       FillRandomValue<int8_t>(t_data.data.int8, num_elements, []() {
311         return static_cast<int8_t>(rand()) % 255 - 127;
312       });
313     } else if (t->type == kTfLiteString) {
314       // TODO(haoliang): No need to cache string tensors right now.
315     } else {
316       TFLITE_LOG(FATAL) << "Don't know how to populate tensor " << t->name
317                         << " of type " << t->type;
318     }
319     inputs_data_.push_back(t_data);
320   }
321 }
322 
ResetInputsAndOutputs()323 void BenchmarkTfLiteModel::ResetInputsAndOutputs() {
324   auto interpreter_inputs = interpreter->inputs();
325   // Set the values of the input tensors from inputs_data_.
326   for (int j = 0; j < interpreter_inputs.size(); ++j) {
327     int i = interpreter_inputs[j];
328     TfLiteTensor* t = interpreter->tensor(i);
329     if (t->type == kTfLiteFloat32) {
330       std::memcpy(interpreter->typed_tensor<float>(i), inputs_data_[j].data.f,
331                   inputs_data_[j].bytes);
332     } else if (t->type == kTfLiteInt32) {
333       std::memcpy(interpreter->typed_tensor<int32_t>(i),
334                   inputs_data_[j].data.i32, inputs_data_[j].bytes);
335     } else if (t->type == kTfLiteUInt8) {
336       std::memcpy(interpreter->typed_tensor<uint8_t>(i),
337                   inputs_data_[j].data.uint8, inputs_data_[j].bytes);
338     } else if (t->type == kTfLiteInt8) {
339       std::memcpy(interpreter->typed_tensor<int8_t>(i),
340                   inputs_data_[j].data.int8, inputs_data_[j].bytes);
341     } else if (t->type == kTfLiteString) {
342       tflite::DynamicBuffer buffer;
343       std::vector<int> sizes = TfLiteIntArrayToVector(t->dims);
344       FillRandomString(&buffer, sizes, []() {
345         return "we're have some friends over saturday to hang out in the yard";
346       });
347       buffer.WriteToTensor(interpreter->tensor(i), /*new_shape=*/nullptr);
348     } else {
349       TFLITE_LOG(FATAL) << "Don't know how to populate tensor " << t->name
350                         << " of type " << t->type;
351     }
352   }
353 }
354 
Init()355 void BenchmarkTfLiteModel::Init() {
356   std::string graph = params_.Get<std::string>("graph");
357   model = tflite::FlatBufferModel::BuildFromFile(graph.c_str());
358   if (!model) {
359     TFLITE_LOG(FATAL) << "Failed to mmap model " << graph;
360   }
361   TFLITE_LOG(INFO) << "Loaded model " << graph;
362   model->error_reporter();
363   TFLITE_LOG(INFO) << "resolved reporter";
364 
365 #ifdef TFLITE_CUSTOM_OPS_HEADER
366   tflite::MutableOpResolver resolver;
367   RegisterSelectedOps(&resolver);
368 #else
369   tflite::ops::builtin::BuiltinOpResolver resolver;
370 #endif
371 
372   const int32_t num_threads = params_.Get<int32_t>("num_threads");
373   tflite::InterpreterBuilder(*model, resolver)(&interpreter, num_threads);
374   if (!interpreter) {
375     TFLITE_LOG(FATAL) << "Failed to construct interpreter";
376   }
377   profiling_listener_.SetInterpreter(interpreter.get());
378 
379   bool use_nnapi = params_.Get<bool>("use_nnapi");
380 
381   interpreter->UseNNAPI(use_nnapi);
382   ApplyDelegates();
383 
384   bool allow_fp16 = params_.Get<bool>("allow_fp16");
385 
386   interpreter->SetAllowFp16PrecisionForFp32(allow_fp16);
387 
388   auto interpreter_inputs = interpreter->inputs();
389 
390   if (!inputs.empty()) {
391     TFLITE_BENCHMARK_CHECK_EQ(inputs.size(), interpreter_inputs.size())
392         << "Inputs mismatch: Model inputs #:" << interpreter_inputs.size()
393         << " expected: " << inputs.size();
394   }
395 
396   // Check if the tensor names match, and log a warning if it doesn't.
397   // TODO(ycling): Consider to make this an error again when the new converter
398   // create tensors with consistent naming.
399   for (int j = 0; j < inputs.size(); ++j) {
400     const InputLayerInfo& input = inputs[j];
401     int i = interpreter_inputs[j];
402     TfLiteTensor* t = interpreter->tensor(i);
403     if (input.name != t->name) {
404       TFLITE_LOG(WARN) << "Tensor # " << i << " is named " << t->name
405                        << " but flags call it " << input.name;
406     }
407   }
408 
409   // Resize all non-string tensors.
410   for (int j = 0; j < inputs.size(); ++j) {
411     const InputLayerInfo& input = inputs[j];
412     int i = interpreter_inputs[j];
413     TfLiteTensor* t = interpreter->tensor(i);
414     if (t->type != kTfLiteString) {
415       interpreter->ResizeInputTensor(i, input.shape);
416     }
417   }
418 
419   // Don't allocate tensors if we have delegates.
420   if (delegates_.empty() && interpreter->AllocateTensors() != kTfLiteOk) {
421     TFLITE_LOG(FATAL) << "Failed to allocate tensors!";
422   }
423 }
424 
ApplyDelegates()425 void BenchmarkTfLiteModel::ApplyDelegates() {
426   for (int i = 0; i < delegates_.size(); ++i) {
427     if (interpreter->ModifyGraphWithDelegate(delegates_[i].get()) !=
428         kTfLiteOk) {
429       TFLITE_LOG(FATAL) << "Failed to apply delegate # " << i;
430     } else {
431       TFLITE_LOG(INFO) << "Applied Delegate # " << i;
432     }
433   }
434 }
435 
RunImpl()436 void BenchmarkTfLiteModel::RunImpl() {
437   if (interpreter->Invoke() != kTfLiteOk) {
438     TFLITE_LOG(FATAL) << "Failed to invoke!";
439   }
440 }
441 
442 }  // namespace benchmark
443 }  // namespace tflite
444