1 /**
2  * Copyright 2017 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "run_tflite.h"
18 
19 #include <android/log.h>
20 #include <dirent.h>
21 #include <dlfcn.h>
22 #include <fcntl.h>
23 #include <ftw.h>
24 #include <sys/time.h>
25 #include <unistd.h>
26 
27 #include <cstdio>
28 #include <fstream>
29 
30 #include "tensorflow/lite/delegates/nnapi/nnapi_delegate.h"
31 #include "tensorflow/lite/nnapi/NeuralNetworksTypes.h"
32 
33 #include "tensorflow/lite/kernels/register.h"
34 
35 #define LOG_TAG "NN_BENCHMARK"
36 
37 #define FATAL(fmt, ...)                                                  \
38   do {                                                                   \
39     __android_log_print(ANDROID_LOG_FATAL, LOG_TAG, fmt, ##__VA_ARGS__); \
40     assert(false);                                                       \
41   } while (0)
42 
43 namespace {
44 
currentTimeInUsec()45 long long currentTimeInUsec() {
46   timeval tv;
47   gettimeofday(&tv, NULL);
48   return ((tv.tv_sec * 1000000L) + tv.tv_usec);
49 }
50 
51 // Workaround for build systems that make difficult to pick the correct NDK API
52 // level. NDK tracing methods are dynamically loaded from libandroid.so.
53 typedef void* (*fp_ATrace_beginSection)(const char* sectionName);
54 typedef void* (*fp_ATrace_endSection)();
55 struct TraceFunc {
56   fp_ATrace_beginSection ATrace_beginSection;
57   fp_ATrace_endSection ATrace_endSection;
58 };
setupTraceFunc()59 TraceFunc setupTraceFunc() {
60   void* lib = dlopen("libandroid.so", RTLD_NOW | RTLD_LOCAL);
61   if (lib == nullptr) {
62     FATAL("unable to open libandroid.so");
63   }
64   return {
65       reinterpret_cast<fp_ATrace_beginSection>(
66           dlsym(lib, "ATrace_beginSection")),
67       reinterpret_cast<fp_ATrace_endSection>(dlsym(lib, "ATrace_endSection"))};
68 }
69 static TraceFunc kTraceFunc{setupTraceFunc()};
70 
71 }  // namespace
72 
create(const char * modelfile,int tfliteBackend,bool enable_intermediate_tensors_dump,int * nnapiErrno,const char * nnapi_device_name,bool mmapModel,const char * nnapi_cache_dir)73 BenchmarkModel* BenchmarkModel::create(const char* modelfile, int tfliteBackend,
74                                        bool enable_intermediate_tensors_dump, int* nnapiErrno,
75                                        const char* nnapi_device_name, bool mmapModel,
76                                        const char* nnapi_cache_dir) {
77   BenchmarkModel* model = new BenchmarkModel();
78   if (!model->init(modelfile, tfliteBackend, enable_intermediate_tensors_dump, nnapiErrno,
79                    nnapi_device_name, mmapModel, nnapi_cache_dir)) {
80     __android_log_print(ANDROID_LOG_ERROR, LOG_TAG, "Failed to init model %s", modelfile);
81     delete model;
82     return nullptr;
83   }
84   return model;
85 }
86 
init(const char * modelfile,int tfliteBackend,bool enable_intermediate_tensors_dump,int * nnapiErrno,const char * nnapi_device_name,bool mmapModel,const char * nnapi_cache_dir)87 bool BenchmarkModel::init(const char* modelfile, int tfliteBackend,
88                           bool enable_intermediate_tensors_dump, int* nnapiErrno,
89                           const char* nnapi_device_name, bool mmapModel,
90                           const char* nnapi_cache_dir) {
91   __android_log_print(ANDROID_LOG_INFO, LOG_TAG, "BenchmarkModel %s",
92                       modelfile);
93   mModelFile = modelfile;
94   if (nnapi_cache_dir) {
95     mCacheDir = nnapi_cache_dir;
96   }
97   if (nnapi_device_name) {
98     mNnApiDeviceName = nnapi_device_name;
99   }
100 
101   if (mmapModel) {
102     // Memory map the model. NOTE this needs lifetime greater than or equal
103     // to interpreter context.
104     mTfliteModel = tflite::FlatBufferModel::BuildFromFile(modelfile);
105   } else {
106     std::ifstream t(modelfile);
107     mModelBuffer = std::string((std::istreambuf_iterator<char>(t)), std::istreambuf_iterator<char>());
108     mTfliteModel = tflite::FlatBufferModel::BuildFromBuffer(mModelBuffer.c_str(), mModelBuffer.size());
109   }
110   if (!mTfliteModel) {
111     __android_log_print(ANDROID_LOG_ERROR, LOG_TAG, "Failed to load model %s",
112                         modelfile);
113     return false;
114   }
115 
116   tflite::ops::builtin::BuiltinOpResolver resolver;
117   tflite::InterpreterBuilder(*mTfliteModel, resolver)(&mTfliteInterpreter);
118   if (!mTfliteInterpreter) {
119     __android_log_print(ANDROID_LOG_ERROR, LOG_TAG,
120                         "Failed to create TFlite interpreter");
121     return false;
122   }
123 
124   if (enable_intermediate_tensors_dump) {
125     // Make output of every op a model output. This way we will be able to
126     // fetch each intermediate tensor when running with delegates.
127     outputs.clear();
128     for (size_t node = 0; node < mTfliteInterpreter->nodes_size(); ++node) {
129       auto node_outputs =
130           mTfliteInterpreter->node_and_registration(node)->first.outputs;
131       outputs.insert(outputs.end(), node_outputs->data,
132                      node_outputs->data + node_outputs->size);
133     }
134     mTfliteInterpreter->SetOutputs(outputs);
135   }
136 
137   // Allow Fp16 precision for all models
138   mTfliteInterpreter->SetAllowFp16PrecisionForFp32(true);
139 
140   mTfliteBackend = tfliteBackend;
141   switch (mTfliteBackend) {
142     case TFLITE_NNAPI: {
143       tflite::StatefulNnApiDelegate::Options nnapi_options;
144       nnapi_options.accelerator_name = nnapi_device_name;
145       mTfliteNnapiDelegate = std::make_unique<tflite::StatefulNnApiDelegate>(nnapi_options);
146       int delegationStatus = mTfliteInterpreter->ModifyGraphWithDelegate(mTfliteNnapiDelegate.get());
147       *nnapiErrno = mTfliteNnapiDelegate->GetNnApiErrno();
148       if (delegationStatus != kTfLiteOk ||
149           *nnapiErrno != ANEURALNETWORKS_NO_ERROR) {
150         __android_log_print(
151             ANDROID_LOG_ERROR, LOG_TAG,
152             "Failed to initialize NNAPI Delegate for model %s, nnapi_errno is %d",
153             modelfile, *nnapiErrno);
154         return false;
155       }
156     } break;
157     case TFLITE_GPU: {
158 #if defined(NN_BENCHMARK_ENABLE_GPU)
159       mGpuDelegate = TfLiteGpuDelegateV2Create(/*default options=*/nullptr);
160       if (mTfliteInterpreter->ModifyGraphWithDelegate(mGpuDelegate) !=
161           kTfLiteOk) {
162         __android_log_print(ANDROID_LOG_ERROR, LOG_TAG,
163                             "Failed to initialize GPU Delegate");
164         return false;
165       }
166 #else  // !defined(NN_BENCHMARK_ENABLE_GPU)
167         __android_log_print(ANDROID_LOG_ERROR, LOG_TAG,
168                             "GPU delegate requested but not enabled with "
169                             "NN_BENCHMARK_ENABLE_GPU");
170         return false;
171 #endif  // defined(NN_BENCHMARK_ENABLE_GPU)
172     } break;
173     default:
174       break;
175   }
176   return true;
177 }
178 
~BenchmarkModel()179 BenchmarkModel::~BenchmarkModel() {
180   switch (mTfliteBackend) {
181     case TFLITE_GPU: {
182 #if defined(NN_BENCHMARK_ENABLE_GPU)  // !defined(NN_BENCHMARK_ENABLE_GPU)
183       TfLiteGpuDelegateV2Delete(mGpuDelegate);
184 #endif  // !defined(NN_BENCHMARK_ENABLE_GPU)
185     } break;
186     default:
187       break;
188   }
189 }
190 
setInput(const uint8_t * dataPtr,size_t length)191 bool BenchmarkModel::setInput(const uint8_t* dataPtr, size_t length) {
192   int input = mTfliteInterpreter->inputs()[0];
193   auto* input_tensor = mTfliteInterpreter->tensor(input);
194 
195   switch (input_tensor->type) {
196     case kTfLiteFloat32:
197     case kTfLiteUInt8: {
198       void* raw = input_tensor->data.raw;
199       memcpy(raw, dataPtr, length);
200       break;
201     }
202     default:
203       __android_log_print(ANDROID_LOG_ERROR, LOG_TAG,
204                           "Input tensor type not supported");
205       return false;
206   }
207   return true;
208 }
saveInferenceOutput(InferenceResult * result,int output_index)209 void BenchmarkModel::saveInferenceOutput(InferenceResult* result,
210                                          int output_index) {
211   int output = mTfliteInterpreter->outputs()[output_index];
212   auto* output_tensor = mTfliteInterpreter->tensor(output);
213   auto& sink = result->inferenceOutputs[output_index];
214   sink.insert(sink.end(), output_tensor->data.uint8,
215               output_tensor->data.uint8 + output_tensor->bytes);
216 }
217 
getOutputError(const uint8_t * expected_data,size_t length,InferenceResult * result,int output_index)218 void BenchmarkModel::getOutputError(const uint8_t* expected_data, size_t length,
219                                     InferenceResult* result, int output_index) {
220   int output = mTfliteInterpreter->outputs()[output_index];
221   auto* output_tensor = mTfliteInterpreter->tensor(output);
222   if (output_tensor->bytes != length) {
223     FATAL("Wrong size of output tensor, expected %zu, is %zu",
224           output_tensor->bytes, length);
225   }
226 
227   size_t elements_count = 0;
228   float err_sum = 0.0;
229   float max_error = 0.0;
230   switch (output_tensor->type) {
231     case kTfLiteUInt8: {
232       uint8_t* output_raw = mTfliteInterpreter->typed_tensor<uint8_t>(output);
233       elements_count = output_tensor->bytes;
234       for (size_t i = 0; i < output_tensor->bytes; ++i) {
235         float err = ((float)output_raw[i]) - ((float)expected_data[i]);
236         if (err > max_error) max_error = err;
237         err_sum += err * err;
238       }
239       break;
240     }
241     case kTfLiteFloat32: {
242       const float* expected = reinterpret_cast<const float*>(expected_data);
243       float* output_raw = mTfliteInterpreter->typed_tensor<float>(output);
244       elements_count = output_tensor->bytes / sizeof(float);
245       for (size_t i = 0; i < output_tensor->bytes / sizeof(float); ++i) {
246         float err = output_raw[i] - expected[i];
247         if (err > max_error) max_error = err;
248         err_sum += err * err;
249       }
250       break;
251     }
252     default:
253       FATAL("Output sensor type %d not supported", output_tensor->type);
254   }
255   result->meanSquareErrors[output_index] = err_sum / elements_count;
256   result->maxSingleErrors[output_index] = max_error;
257 }
258 
resizeInputTensors(std::vector<int> shape)259 bool BenchmarkModel::resizeInputTensors(std::vector<int> shape) {
260   // The benchmark only expects single input tensor, hardcoded as 0.
261   int input = mTfliteInterpreter->inputs()[0];
262   mTfliteInterpreter->ResizeInputTensor(input, shape);
263   if (mTfliteInterpreter->AllocateTensors() != kTfLiteOk) {
264     __android_log_print(ANDROID_LOG_ERROR, LOG_TAG,
265                         "Failed to allocate tensors!");
266     return false;
267   }
268   return true;
269 }
270 
runInference()271 bool BenchmarkModel::runInference() {
272   auto status = mTfliteInterpreter->Invoke();
273   auto nnapi_errno = mTfliteNnapiDelegate
274                          ? mTfliteNnapiDelegate->GetNnApiErrno()
275                          : ANEURALNETWORKS_NO_ERROR;
276   if (status != kTfLiteOk || nnapi_errno != ANEURALNETWORKS_NO_ERROR) {
277     __android_log_print(ANDROID_LOG_ERROR, LOG_TAG,
278                         "Failed to invoke, tflite status: %d, nnapi errno: %d!",
279                         (int)status, nnapi_errno);
280     return false;
281   }
282   return true;
283 }
284 
resetStates()285 bool BenchmarkModel::resetStates() {
286   auto status = mTfliteInterpreter->ResetVariableTensors();
287   if (status != kTfLiteOk) {
288     __android_log_print(ANDROID_LOG_ERROR, LOG_TAG,
289                         "Failed to reset variable tensors: %d!", (int)status);
290     return false;
291   }
292   return true;
293 }
294 
benchmark(const std::vector<InferenceInOutSequence> & inOutData,int seqInferencesMaxCount,float timeout,int flags,std::vector<InferenceResult> * results)295 bool BenchmarkModel::benchmark(
296     const std::vector<InferenceInOutSequence>& inOutData,
297     int seqInferencesMaxCount, float timeout, int flags,
298     std::vector<InferenceResult>* results) {
299   if (inOutData.empty()) {
300     __android_log_print(ANDROID_LOG_WARN, LOG_TAG,
301                         "Input/output vector is empty");
302     return true;
303   }
304 
305   float inferenceTotal = 0.0;
306   for (int seqInferenceIndex = 0; seqInferenceIndex < seqInferencesMaxCount;
307        ++seqInferenceIndex) {
308     resetStates();
309 
310     const int inputOutputSequenceIndex = seqInferenceIndex % inOutData.size();
311     const InferenceInOutSequence& seq = inOutData[inputOutputSequenceIndex];
312     const bool sampleResults = (flags & FLAG_SAMPLE_BENCHMARK_RESULTS) != 0;
313     for (int i = 0; i < seq.size(); ++i) {
314       const InferenceInOut& data = seq[i];
315 
316       // For NNAPI systrace usage documentation, see
317       // frameworks/ml/nn/common/include/Tracing.h.
318       kTraceFunc.ATrace_beginSection("[NN_LA_PE]BenchmarkModel::benchmark");
319       kTraceFunc.ATrace_beginSection("[NN_LA_PIO]BenchmarkModel::input");
320       if (data.input) {
321         setInput(data.input, data.input_size);
322       } else {
323         int input = mTfliteInterpreter->inputs()[0];
324         auto* input_tensor = mTfliteInterpreter->tensor(input);
325         if (!data.createInput((uint8_t*)input_tensor->data.raw,
326                               input_tensor->bytes)) {
327           __android_log_print(ANDROID_LOG_ERROR, LOG_TAG,
328                               "Input creation %d failed", i);
329           return false;
330         }
331       }
332       kTraceFunc.ATrace_endSection();
333       long long startTime = currentTimeInUsec();
334       const bool success = runInference();
335       kTraceFunc.ATrace_endSection();
336       long long endTime = currentTimeInUsec();
337       if (!success) {
338         __android_log_print(ANDROID_LOG_ERROR, LOG_TAG, "Inference %d failed",
339                             i);
340         return false;
341       }
342 
343       float inferenceTime =
344           static_cast<float>(endTime - startTime) / 1000000.0f;
345       size_t outputsCount = mTfliteInterpreter->outputs().size();
346       InferenceResult result{
347           inferenceTime, {}, {}, {}, inputOutputSequenceIndex, i};
348       result.meanSquareErrors.resize(outputsCount);
349       result.maxSingleErrors.resize(outputsCount);
350       result.inferenceOutputs.resize(outputsCount);
351 
352       if ((flags & FLAG_IGNORE_GOLDEN_OUTPUT) == 0) {
353         if (outputsCount != data.outputs.size()) {
354           __android_log_print(ANDROID_LOG_ERROR, LOG_TAG,
355                               "Golden/actual outputs (%zu/%zu) count mismatch",
356                               data.outputs.size(), outputsCount);
357           return false;
358         }
359         for (int j = 0; j < outputsCount; ++j) {
360           getOutputError(data.outputs[j].ptr, data.outputs[j].size, &result, j);
361         }
362       }
363 
364       if ((flags & FLAG_DISCARD_INFERENCE_OUTPUT) == 0) {
365         for (int j = 0; j < outputsCount; ++j) {
366           saveInferenceOutput(&result, j);
367         }
368       }
369 
370       if (!sampleResults || (seqInferenceIndex % INFERENCE_OUT_SAMPLE_RATE) == 0) {
371         results->push_back(result);
372       }
373       inferenceTotal += inferenceTime;
374     }
375 
376     // Timeout?
377     if (timeout > 0.001 && inferenceTotal > timeout) {
378       return true;
379     }
380   }
381   return true;
382 }
383 
384 // If cacheDir is not nullptr, compilation caching will be used with NNAPI.
runCompilation(const char * cacheDir)385 bool BenchmarkModel::runCompilation(const char* cacheDir) {
386   std::unique_ptr<tflite::Interpreter> interpreter;
387   tflite::ops::builtin::BuiltinOpResolver resolver;
388   tflite::InterpreterBuilder(*mTfliteModel, resolver)(&interpreter);
389   if (!interpreter) {
390     __android_log_print(ANDROID_LOG_ERROR, LOG_TAG, "Failed to create TFlite interpreter");
391     return false;
392   }
393 
394   // Allow Fp16 precision for all models
395   interpreter->SetAllowFp16PrecisionForFp32(true);
396 
397   if (mTfliteBackend == TFLITE_NNAPI) {
398     tflite::StatefulNnApiDelegate::Options nnapi_options;
399     nnapi_options.accelerator_name = mNnApiDeviceName.empty() ? nullptr : mNnApiDeviceName.c_str();
400     if (cacheDir) {
401       nnapi_options.cache_dir = cacheDir;
402       nnapi_options.model_token = mModelFile.c_str();
403     }
404     tflite::StatefulNnApiDelegate delegate(nnapi_options);
405     int delegationStatus = interpreter->ModifyGraphWithDelegate(&delegate);
406     auto nnapiErrno = delegate.GetNnApiErrno();
407     if (delegationStatus != kTfLiteOk || nnapiErrno != ANEURALNETWORKS_NO_ERROR) {
408       __android_log_print(ANDROID_LOG_ERROR, LOG_TAG,
409                           "Failed to initialize NNAPI Delegate for model %s, nnapi_errno is %d",
410                           mModelFile.c_str(), nnapiErrno);
411       return false;
412     }
413   }
414   return true;
415 }
416 
417 // A helper class to manage the lifetime of a temporary cache directory.
418 class ScopedTempDirectory {
419  public:
ScopedTempDirectory(std::string base)420   ScopedTempDirectory(std::string base) : mBase(std::move(base)) {}
~ScopedTempDirectory()421   ~ScopedTempDirectory() { cleanup(); }
422 
423   // Create a new temp directory, remove the old one if needed.
recreate()424   void recreate() {
425     cleanup();
426     mTempDir = mBase + "/XXXXXX";
427     mkdtemp(&mTempDir[0]);
428   }
429 
430   // Get the path to the temp directory.
get() const431   const char* get() const { return mTempDir.empty() ? nullptr : mTempDir.c_str(); }
432 
433  private:
cleanup()434   void cleanup() {
435     if (mTempDir.empty()) {
436       return;
437     }
438     auto callback = [](const char* entry, const struct stat*, int, struct FTW*) {
439       return remove(entry);
440     };
441     nftw(mTempDir.c_str(), callback, 128, FTW_DEPTH | FTW_MOUNT | FTW_PHYS);
442     mTempDir.clear();
443   }
444 
445   std::string mBase;
446   std::string mTempDir;
447 };
448 
getCompilationCacheSize(int * cacheSizeBytes)449 bool BenchmarkModel::getCompilationCacheSize(int* cacheSizeBytes) {
450   if (cacheSizeBytes == nullptr) return false;
451 
452   // Create cache files.
453   ScopedTempDirectory tempDir(mCacheDir.value());
454   tempDir.recreate();
455   const bool success = runCompilation(tempDir.get());
456   if (!success) {
457     __android_log_print(ANDROID_LOG_ERROR, LOG_TAG, "Save to cache failed");
458     return false;
459   }
460 
461   // Compute total size of cache files.
462   int totalSize = 0;
463   DIR* dir = opendir(tempDir.get());
464   if (dir == nullptr) {
465     __android_log_print(ANDROID_LOG_ERROR, LOG_TAG, "Failed to open cache directory");
466     return false;
467   }
468   struct dirent* dp = nullptr;
469   while ((dp = readdir(dir)) != nullptr) {
470     char fullPath[1024];
471     snprintf(fullPath, 1024, "%s/%s", tempDir.get(), dp->d_name);
472     struct stat st;
473     int err = stat(fullPath, &st);
474     if (err != 0) {
475       closedir(dir);
476       __android_log_print(ANDROID_LOG_ERROR, LOG_TAG, "Failed to stat %s", fullPath);
477       return false;
478     }
479     // Only accumulate sizes of regular files. This will exclude '.' and '..'.
480     if (S_ISREG(st.st_mode)) {
481       totalSize += st.st_size;
482     }
483   }
484   closedir(dir);
485   *cacheSizeBytes = totalSize;
486   return true;
487 }
488 
benchmarkSingleTypeOfCompilation(CompilationBenchmarkType type,int maxNumIterations,float timeout,std::vector<float> * results)489 bool BenchmarkModel::benchmarkSingleTypeOfCompilation(CompilationBenchmarkType type,
490                                                       int maxNumIterations, float timeout,
491                                                       std::vector<float>* results) {
492   if (results != nullptr) {
493     results->clear();
494   }
495   ScopedTempDirectory tempDir(mCacheDir.value());
496 
497   // Initialize cache files to benchmark cache hit.
498   if (type == CompilationBenchmarkType::PREPARE_FROM_CACHE) {
499     tempDir.recreate();
500     const bool success = runCompilation(tempDir.get());
501     if (!success) {
502       __android_log_print(ANDROID_LOG_ERROR, LOG_TAG, "Save to cache failed");
503       return false;
504     }
505   }
506 
507   float compilationTotal = 0.0;
508   for (int i = 0; i < maxNumIterations; i++) {
509     const char* cacheDir = nullptr;
510     switch (type) {
511       case CompilationBenchmarkType::WITHOUT_CACHE:
512         cacheDir = nullptr;
513         break;
514       case CompilationBenchmarkType::SAVE_TO_CACHE:
515         // Remove the cache files from the last iteration to benchmark cache miss.
516         tempDir.recreate();
517         [[fallthrough]];
518       case CompilationBenchmarkType::PREPARE_FROM_CACHE:
519         cacheDir = tempDir.get();
520         break;
521       default:
522         __android_log_print(ANDROID_LOG_ERROR, LOG_TAG, "Unknown CompilationBenchmarkType: %d",
523                             static_cast<int>(type));
524         return false;
525     }
526 
527     kTraceFunc.ATrace_beginSection("[NN_LA_PC]BenchmarkModel::benchmarkCompilation");
528     const long long startTime = currentTimeInUsec();
529     const bool success = runCompilation(cacheDir);
530     const long long endTime = currentTimeInUsec();
531     kTraceFunc.ATrace_endSection();
532     if (!success) {
533       __android_log_print(ANDROID_LOG_ERROR, LOG_TAG, "Compilation %d failed", i);
534       return false;
535     }
536 
537     const float compilationTime = static_cast<float>(endTime - startTime) / 1000000.0f;
538     if (results != nullptr) {
539       results->push_back(compilationTime);
540     }
541 
542     // Timeout?
543     compilationTotal += compilationTime;
544     if (timeout > 0.001 && compilationTotal > timeout) {
545       return true;
546     }
547   }
548   return true;
549 }
550 
benchmarkSingleTypeOfCompilationWithWarmup(CompilationBenchmarkType type,int maxNumIterations,float warmupTimeout,float runTimeout,std::vector<float> * results)551 bool BenchmarkModel::benchmarkSingleTypeOfCompilationWithWarmup(CompilationBenchmarkType type,
552                                                                 int maxNumIterations,
553                                                                 float warmupTimeout,
554                                                                 float runTimeout,
555                                                                 std::vector<float>* results) {
556   kTraceFunc.ATrace_beginSection(
557           "[NN_LA_PWM]BenchmarkModel::benchmarkSingleTypeOfCompilationWithWarmup");
558   bool success = benchmarkSingleTypeOfCompilation(type, maxNumIterations, warmupTimeout, nullptr);
559   kTraceFunc.ATrace_endSection();
560   if (!success) return false;
561 
562   kTraceFunc.ATrace_beginSection(
563           "[NN_LA_PBM]BenchmarkModel::benchmarkSingleTypeOfCompilationWithWarmup");
564   success = benchmarkSingleTypeOfCompilation(type, maxNumIterations, runTimeout, results);
565   kTraceFunc.ATrace_endSection();
566   return success;
567 }
568 
benchmarkCompilation(int maxNumIterations,float warmupTimeout,float runTimeout,CompilationBenchmarkResult * result)569 bool BenchmarkModel::benchmarkCompilation(int maxNumIterations, float warmupTimeout,
570                                           float runTimeout, CompilationBenchmarkResult* result) {
571   if (result == nullptr) return false;
572 
573   // Benchmark compile without cache.
574   bool success = benchmarkSingleTypeOfCompilationWithWarmup(
575           CompilationBenchmarkType::WITHOUT_CACHE, maxNumIterations, warmupTimeout, runTimeout,
576           &result->compileWithoutCacheTimeSec);
577   if (!success) {
578     __android_log_print(ANDROID_LOG_ERROR, LOG_TAG,
579                         "Failed to benchmark compilation without cache");
580     return false;
581   }
582 
583   // Get compilation cache size.
584   success = getCompilationCacheSize(&result->cacheSizeBytes);
585   if (!success) {
586     __android_log_print(ANDROID_LOG_ERROR, LOG_TAG, "Failed to retrieve compilation cache size");
587     return false;
588   }
589 
590   // Benchmark saving to cache and preparing from cache only if supported.
591   if (result->cacheSizeBytes > 0) {
592     // Benchmark saving to cache.
593     auto& saveToCacheTimeSec = result->saveToCacheTimeSec.emplace();
594     success = benchmarkSingleTypeOfCompilationWithWarmup(
595             CompilationBenchmarkType::SAVE_TO_CACHE, maxNumIterations, warmupTimeout, runTimeout,
596             &saveToCacheTimeSec);
597     if (!success) {
598       __android_log_print(ANDROID_LOG_ERROR, LOG_TAG, "Failed to benchmark saving to cache");
599       return false;
600     }
601 
602     // Benchmark preparing from cache.
603     auto& prepareFromCacheTimeSec = result->prepareFromCacheTimeSec.emplace();
604     success = benchmarkSingleTypeOfCompilationWithWarmup(
605             CompilationBenchmarkType::PREPARE_FROM_CACHE, maxNumIterations, warmupTimeout,
606             runTimeout, &prepareFromCacheTimeSec);
607     if (!success) {
608       __android_log_print(ANDROID_LOG_ERROR, LOG_TAG, "Failed to benchmark preparing from cache");
609       return false;
610     }
611   }
612   return result;
613 }
614 
dumpAllLayers(const char * path,const std::vector<InferenceInOutSequence> & inOutData)615 bool BenchmarkModel::dumpAllLayers(
616     const char* path, const std::vector<InferenceInOutSequence>& inOutData) {
617   if (inOutData.empty()) {
618     FATAL("Input/output vector is empty");
619   }
620 
621   for (int seqInferenceIndex = 0; seqInferenceIndex < inOutData.size();
622        ++seqInferenceIndex) {
623     resetStates();
624 
625     const InferenceInOutSequence& seq = inOutData[seqInferenceIndex];
626     for (int i = 0; i < seq.size(); ++i) {
627       const InferenceInOut& data = seq[i];
628       setInput(data.input, data.input_size);
629       const bool success = runInference();
630       if (!success) {
631         __android_log_print(ANDROID_LOG_ERROR, LOG_TAG, "Inference %d failed",
632                             i);
633         return false;
634       }
635 
636       // The order of the tensor is not sorted by the tensor index
637       for (int tensor_order = 0; tensor_order < outputs.size(); ++tensor_order) {
638         int tensor_index = outputs[tensor_order];
639         auto* output_tensor = mTfliteInterpreter->tensor(tensor_index);
640         if (output_tensor->data.raw == nullptr) {
641           __android_log_print(ANDROID_LOG_ERROR, LOG_TAG,
642                       "output_tensor->data.raw == nullptr at index %d ", tensor_index);
643           continue;
644         }
645         char fullpath[1024];
646         snprintf(fullpath, 1024, "%s/dump_%.3d_seq_%.3d_order_%.3d_tensor_%.3d", path,
647                  seqInferenceIndex, i, tensor_order, tensor_index);
648         FILE* f = fopen(fullpath, "wb");
649         fwrite(output_tensor->data.raw, output_tensor->bytes, 1, f);
650         fclose(f);
651       }
652     }
653   }
654   return true;
655 }
656