1 /* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
2
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15
16 #include "tensorflow/lite/tools/benchmark/benchmark_tflite_model.h"
17
18 #include <cstdarg>
19 #include <cstdlib>
20 #include <iostream>
21 #include <memory>
22 #include <string>
23 #include <unordered_set>
24 #include <vector>
25
26 #include "tensorflow/lite/kernels/register.h"
27 #include "tensorflow/lite/model.h"
28 #include "tensorflow/lite/op_resolver.h"
29 #include "tensorflow/lite/string_util.h"
30 #include "tensorflow/lite/tools/benchmark/logging.h"
31
32 #ifdef GEMMLOWP_PROFILING
33 #include "gemmlowp/profiling/profiler.h"
34 #endif
35
36 #ifdef TFLITE_CUSTOM_OPS_HEADER
37 void RegisterSelectedOps(::tflite::MutableOpResolver* resolver);
38 #endif
39
40 namespace tflite {
41 namespace benchmark {
42
SetInterpreter(tflite::Interpreter * interpreter)43 void ProfilingListener::SetInterpreter(tflite::Interpreter* interpreter) {
44 TFLITE_BENCHMARK_CHECK(interpreter);
45 interpreter_ = interpreter;
46 interpreter_->SetProfiler(&profiler_);
47 }
48
OnSingleRunStart(RunType run_type)49 void ProfilingListener::OnSingleRunStart(RunType run_type) {
50 if (run_type == REGULAR) {
51 profiler_.Reset();
52 profiler_.StartProfiling();
53 }
54 }
55
OnBenchmarkEnd(const BenchmarkResults & results)56 void ProfilingListener::OnBenchmarkEnd(const BenchmarkResults& results) {
57 if (has_profiles_) {
58 TFLITE_LOG(INFO) << summarizer_.GetOutputString();
59 }
60 }
61
OnSingleRunEnd()62 void ProfilingListener::OnSingleRunEnd() {
63 profiler_.StopProfiling();
64 auto profile_events = profiler_.GetProfileEvents();
65 has_profiles_ = !profile_events.empty();
66 summarizer_.ProcessProfiles(profile_events, *interpreter_);
67 }
68
OnBenchmarkStart(const BenchmarkParams & params)69 void GemmlowpProfilingListener::OnBenchmarkStart(
70 const BenchmarkParams& params) {
71 #ifdef GEMMLOWP_PROFILING
72 gemmlowp::RegisterCurrentThreadForProfiling();
73 gemmlowp::StartProfiling();
74 #endif
75 }
76
OnBenchmarkEnd(const BenchmarkResults & results)77 void GemmlowpProfilingListener::OnBenchmarkEnd(
78 const BenchmarkResults& results) {
79 #ifdef GEMMLOWP_PROFILING
80 gemmlowp::FinishProfiling();
81 #endif
82 }
83
84 namespace {
85
Split(const std::string & str,const char delim)86 std::vector<std::string> Split(const std::string& str, const char delim) {
87 std::istringstream input(str);
88 std::vector<std::string> results;
89 std::string item;
90 while (std::getline(input, item, delim)) {
91 results.push_back(item);
92 }
93 return results;
94 }
95
96 template <typename T>
SplitAndParse(const std::string & str,char delim,std::vector<T> * values)97 bool SplitAndParse(const std::string& str, char delim, std::vector<T>* values) {
98 std::istringstream input(str);
99 bool first = true;
100 while (!input.eof()) {
101 if (!first) {
102 char c;
103 input >> c;
104 if (c != delim) {
105 return false;
106 }
107 } else {
108 first = false;
109 }
110 T val;
111 input >> val;
112 if (!input.eof() && !input.good()) {
113 return false;
114 }
115 values->push_back(val);
116 }
117 return true;
118 }
119
120 template <typename T>
FillRandomValue(T * ptr,int num_elements,const std::function<T ()> & random_func)121 void FillRandomValue(T* ptr, int num_elements,
122 const std::function<T()>& random_func) {
123 for (int i = 0; i < num_elements; ++i) {
124 *ptr++ = random_func();
125 }
126 }
127
FillRandomString(tflite::DynamicBuffer * buffer,const std::vector<int> & sizes,const std::function<string ()> & random_func)128 void FillRandomString(tflite::DynamicBuffer* buffer,
129 const std::vector<int>& sizes,
130 const std::function<string()>& random_func) {
131 int num_elements = 1;
132 for (int dim : sizes) {
133 num_elements *= dim;
134 }
135 for (int i = 0; i < num_elements; ++i) {
136 auto str = random_func();
137 buffer->AddString(str.data(), str.length());
138 }
139 }
140
PopulateInputLayerInfo(const string & names_string,const string & shapes_string,std::vector<BenchmarkTfLiteModel::InputLayerInfo> * info)141 bool PopulateInputLayerInfo(
142 const string& names_string, const string& shapes_string,
143 std::vector<BenchmarkTfLiteModel::InputLayerInfo>* info) {
144 std::vector<std::string> names = Split(names_string, ',');
145 std::vector<std::string> shapes = Split(shapes_string, ':');
146
147 if (names.size() != shapes.size()) {
148 TFLITE_LOG(ERROR) << "The number of items in"
149 << " --input_layer_shape (" << shapes_string << ", with "
150 << shapes.size() << " items)"
151 << " must match the number of items in"
152 << " --input_layer (" << names_string << ", with "
153 << names.size() << " items)."
154 << " For example --input_layer=input1,input2"
155 << " --input_layer_shape=1,224,224,4:1,20";
156 return false;
157 }
158
159 for (int i = 0; i < names.size(); ++i) {
160 info->push_back(BenchmarkTfLiteModel::InputLayerInfo());
161 BenchmarkTfLiteModel::InputLayerInfo& input = info->back();
162
163 input.name = names[i];
164
165 TFLITE_BENCHMARK_CHECK(SplitAndParse(shapes[i], ',', &input.shape))
166 << "Incorrect size string specified: " << shapes[i];
167 for (int dim : input.shape) {
168 if (dim == -1) {
169 TFLITE_LOG(ERROR)
170 << "Any unknown sizes in the shapes (-1's) must be replaced"
171 << " with the size you want to benchmark with.";
172 return false;
173 }
174 }
175 }
176
177 return true;
178 }
179
TfLiteIntArrayToVector(const TfLiteIntArray * int_array)180 std::vector<int> TfLiteIntArrayToVector(const TfLiteIntArray* int_array) {
181 std::vector<int> values;
182 values.reserve(int_array->size);
183 for (size_t i = 0; i < int_array->size; i++) {
184 values.push_back(int_array->data[i]);
185 }
186 return values;
187 }
188
189 } // namespace
190
DefaultParams()191 BenchmarkParams BenchmarkTfLiteModel::DefaultParams() {
192 BenchmarkParams default_params = BenchmarkModel::DefaultParams();
193 default_params.AddParam("graph", BenchmarkParam::Create<std::string>(""));
194 default_params.AddParam("input_layer",
195 BenchmarkParam::Create<std::string>(""));
196 default_params.AddParam("input_layer_shape",
197 BenchmarkParam::Create<std::string>(""));
198 default_params.AddParam("use_nnapi", BenchmarkParam::Create<bool>(false));
199 default_params.AddParam("allow_fp16", BenchmarkParam::Create<bool>(false));
200 return default_params;
201 }
202
BenchmarkTfLiteModel()203 BenchmarkTfLiteModel::BenchmarkTfLiteModel()
204 : BenchmarkTfLiteModel(DefaultParams()) {}
205
BenchmarkTfLiteModel(BenchmarkParams params)206 BenchmarkTfLiteModel::BenchmarkTfLiteModel(BenchmarkParams params)
207 : BenchmarkModel(std::move(params)) {
208 AddListener(&profiling_listener_);
209 AddListener(&gemmlowp_profiling_listener_);
210 }
211
CleanUp()212 void BenchmarkTfLiteModel::CleanUp() {
213 if (inputs_data_.empty()) {
214 return;
215 }
216 // Free up any pre-allocated tensor data during PrepareInputData.
217 for (int i = 0; i < inputs_data_.size(); ++i) {
218 delete[] inputs_data_[i].data.raw;
219 }
220 inputs_data_.clear();
221 }
222
~BenchmarkTfLiteModel()223 BenchmarkTfLiteModel::~BenchmarkTfLiteModel() { CleanUp(); }
224
GetFlags()225 std::vector<Flag> BenchmarkTfLiteModel::GetFlags() {
226 std::vector<Flag> flags = BenchmarkTfLiteModel::BenchmarkModel::GetFlags();
227 std::vector<Flag> specific_flags = {
228 CreateFlag<std::string>("graph", ¶ms_, "graph file name"),
229 CreateFlag<std::string>("input_layer", ¶ms_, "input layer names"),
230 CreateFlag<std::string>("input_layer_shape", ¶ms_,
231 "input layer shape"),
232 CreateFlag<bool>("use_nnapi", ¶ms_, "use nnapi api"),
233 CreateFlag<bool>("allow_fp16", ¶ms_, "allow fp16")};
234
235 flags.insert(flags.end(), specific_flags.begin(), specific_flags.end());
236 return flags;
237 }
238
LogParams()239 void BenchmarkTfLiteModel::LogParams() {
240 BenchmarkModel::LogParams();
241 TFLITE_LOG(INFO) << "Graph: [" << params_.Get<std::string>("graph") << "]";
242 TFLITE_LOG(INFO) << "Input layers: ["
243 << params_.Get<std::string>("input_layer") << "]";
244 TFLITE_LOG(INFO) << "Input shapes: ["
245 << params_.Get<std::string>("input_layer_shape") << "]";
246 TFLITE_LOG(INFO) << "Use nnapi : [" << params_.Get<bool>("use_nnapi") << "]";
247 TFLITE_LOG(INFO) << "Allow fp16 : [" << params_.Get<bool>("allow_fp16")
248 << "]";
249 }
250
ValidateParams()251 bool BenchmarkTfLiteModel::ValidateParams() {
252 if (params_.Get<std::string>("graph").empty()) {
253 TFLITE_LOG(ERROR)
254 << "Please specify the name of your TF Lite input file with --graph";
255 return false;
256 }
257 return PopulateInputLayerInfo(params_.Get<std::string>("input_layer"),
258 params_.Get<std::string>("input_layer_shape"),
259 &inputs);
260 }
261
ComputeInputBytes()262 uint64_t BenchmarkTfLiteModel::ComputeInputBytes() {
263 TFLITE_BENCHMARK_CHECK(interpreter);
264 uint64_t total_input_bytes = 0;
265 for (int input : interpreter->inputs()) {
266 auto* t = interpreter->tensor(input);
267 total_input_bytes += t->bytes;
268 }
269 return total_input_bytes;
270 }
271
PrepareInputData()272 void BenchmarkTfLiteModel::PrepareInputData() {
273 auto interpreter_inputs = interpreter->inputs();
274 const size_t input_size = interpreter_inputs.size();
275 CleanUp();
276
277 for (int j = 0; j < input_size; ++j) {
278 int i = interpreter_inputs[j];
279 TfLiteTensor* t = interpreter->tensor(i);
280 std::vector<int> sizes = TfLiteIntArrayToVector(t->dims);
281 int num_elements = 1;
282 // TODO(haoliang): Ignore the 0-th dimension (number of batches).
283 for (int i = 1; i < sizes.size(); ++i) {
284 num_elements *= sizes[i];
285 }
286 InputTensorData t_data;
287 if (t->type == kTfLiteFloat32) {
288 t_data.bytes = sizeof(float) * num_elements;
289 t_data.data.raw = new char[t_data.bytes];
290 FillRandomValue<float>(t_data.data.f, num_elements, []() {
291 return static_cast<float>(rand()) / RAND_MAX - 0.5f;
292 });
293 } else if (t->type == kTfLiteInt32) {
294 // TODO(yunluli): This is currently only used for handling embedding input
295 // for speech models. Generalize if necessary.
296 t_data.bytes = sizeof(int32_t) * num_elements;
297 t_data.data.raw = new char[t_data.bytes];
298 FillRandomValue<int32_t>(t_data.data.i32, num_elements, []() {
299 return static_cast<int32_t>(rand()) % 100;
300 });
301 } else if (t->type == kTfLiteUInt8) {
302 t_data.bytes = sizeof(uint8_t) * num_elements;
303 t_data.data.raw = new char[t_data.bytes];
304 FillRandomValue<uint8_t>(t_data.data.uint8, num_elements, []() {
305 return static_cast<uint8_t>(rand()) % 255;
306 });
307 } else if (t->type == kTfLiteInt8) {
308 t_data.bytes = sizeof(int8_t) * num_elements;
309 t_data.data.raw = new char[t_data.bytes];
310 FillRandomValue<int8_t>(t_data.data.int8, num_elements, []() {
311 return static_cast<int8_t>(rand()) % 255 - 127;
312 });
313 } else if (t->type == kTfLiteString) {
314 // TODO(haoliang): No need to cache string tensors right now.
315 } else {
316 TFLITE_LOG(FATAL) << "Don't know how to populate tensor " << t->name
317 << " of type " << t->type;
318 }
319 inputs_data_.push_back(t_data);
320 }
321 }
322
ResetInputsAndOutputs()323 void BenchmarkTfLiteModel::ResetInputsAndOutputs() {
324 auto interpreter_inputs = interpreter->inputs();
325 // Set the values of the input tensors from inputs_data_.
326 for (int j = 0; j < interpreter_inputs.size(); ++j) {
327 int i = interpreter_inputs[j];
328 TfLiteTensor* t = interpreter->tensor(i);
329 if (t->type == kTfLiteFloat32) {
330 std::memcpy(interpreter->typed_tensor<float>(i), inputs_data_[j].data.f,
331 inputs_data_[j].bytes);
332 } else if (t->type == kTfLiteInt32) {
333 std::memcpy(interpreter->typed_tensor<int32_t>(i),
334 inputs_data_[j].data.i32, inputs_data_[j].bytes);
335 } else if (t->type == kTfLiteUInt8) {
336 std::memcpy(interpreter->typed_tensor<uint8_t>(i),
337 inputs_data_[j].data.uint8, inputs_data_[j].bytes);
338 } else if (t->type == kTfLiteInt8) {
339 std::memcpy(interpreter->typed_tensor<int8_t>(i),
340 inputs_data_[j].data.int8, inputs_data_[j].bytes);
341 } else if (t->type == kTfLiteString) {
342 tflite::DynamicBuffer buffer;
343 std::vector<int> sizes = TfLiteIntArrayToVector(t->dims);
344 FillRandomString(&buffer, sizes, []() {
345 return "we're have some friends over saturday to hang out in the yard";
346 });
347 buffer.WriteToTensor(interpreter->tensor(i), /*new_shape=*/nullptr);
348 } else {
349 TFLITE_LOG(FATAL) << "Don't know how to populate tensor " << t->name
350 << " of type " << t->type;
351 }
352 }
353 }
354
Init()355 void BenchmarkTfLiteModel::Init() {
356 std::string graph = params_.Get<std::string>("graph");
357 model = tflite::FlatBufferModel::BuildFromFile(graph.c_str());
358 if (!model) {
359 TFLITE_LOG(FATAL) << "Failed to mmap model " << graph;
360 }
361 TFLITE_LOG(INFO) << "Loaded model " << graph;
362 model->error_reporter();
363 TFLITE_LOG(INFO) << "resolved reporter";
364
365 #ifdef TFLITE_CUSTOM_OPS_HEADER
366 tflite::MutableOpResolver resolver;
367 RegisterSelectedOps(&resolver);
368 #else
369 tflite::ops::builtin::BuiltinOpResolver resolver;
370 #endif
371
372 const int32_t num_threads = params_.Get<int32_t>("num_threads");
373 tflite::InterpreterBuilder(*model, resolver)(&interpreter, num_threads);
374 if (!interpreter) {
375 TFLITE_LOG(FATAL) << "Failed to construct interpreter";
376 }
377 profiling_listener_.SetInterpreter(interpreter.get());
378
379 bool use_nnapi = params_.Get<bool>("use_nnapi");
380
381 interpreter->UseNNAPI(use_nnapi);
382 ApplyDelegates();
383
384 bool allow_fp16 = params_.Get<bool>("allow_fp16");
385
386 interpreter->SetAllowFp16PrecisionForFp32(allow_fp16);
387
388 auto interpreter_inputs = interpreter->inputs();
389
390 if (!inputs.empty()) {
391 TFLITE_BENCHMARK_CHECK_EQ(inputs.size(), interpreter_inputs.size())
392 << "Inputs mismatch: Model inputs #:" << interpreter_inputs.size()
393 << " expected: " << inputs.size();
394 }
395
396 // Check if the tensor names match, and log a warning if it doesn't.
397 // TODO(ycling): Consider to make this an error again when the new converter
398 // create tensors with consistent naming.
399 for (int j = 0; j < inputs.size(); ++j) {
400 const InputLayerInfo& input = inputs[j];
401 int i = interpreter_inputs[j];
402 TfLiteTensor* t = interpreter->tensor(i);
403 if (input.name != t->name) {
404 TFLITE_LOG(WARN) << "Tensor # " << i << " is named " << t->name
405 << " but flags call it " << input.name;
406 }
407 }
408
409 // Resize all non-string tensors.
410 for (int j = 0; j < inputs.size(); ++j) {
411 const InputLayerInfo& input = inputs[j];
412 int i = interpreter_inputs[j];
413 TfLiteTensor* t = interpreter->tensor(i);
414 if (t->type != kTfLiteString) {
415 interpreter->ResizeInputTensor(i, input.shape);
416 }
417 }
418
419 // Don't allocate tensors if we have delegates.
420 if (delegates_.empty() && interpreter->AllocateTensors() != kTfLiteOk) {
421 TFLITE_LOG(FATAL) << "Failed to allocate tensors!";
422 }
423 }
424
ApplyDelegates()425 void BenchmarkTfLiteModel::ApplyDelegates() {
426 for (int i = 0; i < delegates_.size(); ++i) {
427 if (interpreter->ModifyGraphWithDelegate(delegates_[i].get()) !=
428 kTfLiteOk) {
429 TFLITE_LOG(FATAL) << "Failed to apply delegate # " << i;
430 } else {
431 TFLITE_LOG(INFO) << "Applied Delegate # " << i;
432 }
433 }
434 }
435
RunImpl()436 void BenchmarkTfLiteModel::RunImpl() {
437 if (interpreter->Invoke() != kTfLiteOk) {
438 TFLITE_LOG(FATAL) << "Failed to invoke!";
439 }
440 }
441
442 } // namespace benchmark
443 } // namespace tflite
444